summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaibo Chen <haibo.chen@nxp.com>2023-05-04 19:22:22 +0800
committerUlf Hansson <ulf.hansson@linaro.org>2023-05-09 11:55:02 +0200
commit81dce1490e28439c3cd8a8650b862a712f3061ba (patch)
tree94a4fb9e6faf1cd568d05de9c8fd1789f2223161
parente5bce3c15ad20d7dd4f8393513b1c41373d2dc37 (diff)
mmc: sdhci-esdhc-imx: make "no-mmc-hs400" works
After commit 1ed5c3b22fc7 ("mmc: sdhci-esdhc-imx: Propagate ESDHC_FLAG_HS400* only on 8bit bus"), the property "no-mmc-hs400" from device tree file do not work any more. This patch reorder the code, which can avoid the warning message "drop HS400 support since no 8-bit bus" and also make the property "no-mmc-hs400" from dts file works. Fixes: 1ed5c3b22fc7 ("mmc: sdhci-esdhc-imx: Propagate ESDHC_FLAG_HS400* only on 8bit bus") Signed-off-by: Haibo Chen <haibo.chen@nxp.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230504112222.3599602-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c18
1 files changed, 10 insertions, 8 deletions
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index d7c0c0b9e26c..eebf94604a7f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1634,6 +1634,10 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
if (ret)
return ret;
+ /* HS400/HS400ES require 8 bit bus */
+ if (!(host->mmc->caps & MMC_CAP_8_BIT_DATA))
+ host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);
+
if (mmc_gpio_get_cd(host->mmc) >= 0)
host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
@@ -1724,10 +1728,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
host->mmc_host_ops.init_card = usdhc_init_card;
}
- err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data);
- if (err)
- goto disable_ahb_clk;
-
if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING)
sdhci_esdhc_ops.platform_execute_tuning =
esdhc_executing_tuning;
@@ -1735,15 +1735,13 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536)
host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
- if (host->mmc->caps & MMC_CAP_8_BIT_DATA &&
- imx_data->socdata->flags & ESDHC_FLAG_HS400)
+ if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
host->mmc->caps2 |= MMC_CAP2_HS400;
if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23)
host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN;
- if (host->mmc->caps & MMC_CAP_8_BIT_DATA &&
- imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) {
+ if (imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) {
host->mmc->caps2 |= MMC_CAP2_HS400_ES;
host->mmc_host_ops.hs400_enhanced_strobe =
esdhc_hs400_enhanced_strobe;
@@ -1765,6 +1763,10 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
goto disable_ahb_clk;
}
+ err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data);
+ if (err)
+ goto disable_ahb_clk;
+
sdhci_esdhc_imx_hwinit(host);
err = sdhci_add_host(host);
0%'> -rw-r--r--tools/testing/cxl/cxl_core_exports.c29
-rw-r--r--tools/testing/cxl/cxl_core_test.c6
-rw-r--r--tools/testing/cxl/cxl_mem_test.c6
-rw-r--r--tools/testing/cxl/cxl_pmem_test.c6
-rw-r--r--tools/testing/cxl/cxl_port_test.c6
-rw-r--r--tools/testing/cxl/exports.h13
-rw-r--r--tools/testing/cxl/mock_acpi.c35
-rw-r--r--tools/testing/cxl/test/Kbuild13
-rw-r--r--tools/testing/cxl/test/cxl.c1628
-rw-r--r--tools/testing/cxl/test/cxl_translate.c445
-rw-r--r--tools/testing/cxl/test/mem.c1895
-rw-r--r--tools/testing/cxl/test/mock.c279
-rw-r--r--tools/testing/cxl/test/mock.h35
-rw-r--r--tools/testing/cxl/watermark.h25
-rwxr-xr-x[-rw-r--r--]tools/testing/fault-injection/failcmd.sh12
-rwxr-xr-xtools/testing/ktest/config-bisect.pl4
-rw-r--r--tools/testing/ktest/examples/bootconfigs/boottrace.bconf59
-rw-r--r--tools/testing/ktest/examples/bootconfigs/config-bootconfig1
-rw-r--r--tools/testing/ktest/examples/bootconfigs/functiongraph.bconf15
-rw-r--r--tools/testing/ktest/examples/bootconfigs/tracing.bconf33
-rwxr-xr-xtools/testing/ktest/examples/bootconfigs/verify-boottrace.sh84
-rwxr-xr-xtools/testing/ktest/examples/bootconfigs/verify-functiongraph.sh61
-rwxr-xr-xtools/testing/ktest/examples/bootconfigs/verify-tracing.sh72
-rw-r--r--tools/testing/ktest/examples/include/bootconfig.conf69
-rw-r--r--tools/testing/ktest/examples/include/defaults.conf2
-rw-r--r--tools/testing/ktest/examples/kvm.conf1
-rw-r--r--tools/testing/ktest/examples/vmware.conf137
-rwxr-xr-xtools/testing/ktest/ktest.pl807
-rw-r--r--tools/testing/ktest/sample.conf15
-rw-r--r--tools/testing/kunit/configs/all_tests.config56
-rw-r--r--tools/testing/kunit/configs/arch_uml.config7
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config42
-rw-r--r--tools/testing/kunit/configs/coverage_uml.config11
-rw-r--r--tools/testing/kunit/configs/default.config3
-rwxr-xr-xtools/testing/kunit/kunit.py682
-rw-r--r--tools/testing/kunit/kunit_config.py130
-rw-r--r--tools/testing/kunit/kunit_json.py104
-rw-r--r--tools/testing/kunit/kunit_kernel.py390
-rw-r--r--tools/testing/kunit/kunit_parser.py1111
-rw-r--r--tools/testing/kunit/kunit_printer.py54
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py904
-rw-r--r--tools/testing/kunit/mypy.ini6
-rw-r--r--tools/testing/kunit/qemu_config.py20
-rw-r--r--tools/testing/kunit/qemu_configs/alpha.py10
-rw-r--r--tools/testing/kunit/qemu_configs/arm.py13
-rw-r--r--tools/testing/kunit/qemu_configs/arm64.py12
-rw-r--r--tools/testing/kunit/qemu_configs/i386.py10
-rw-r--r--tools/testing/kunit/qemu_configs/loongarch.py21
-rw-r--r--tools/testing/kunit/qemu_configs/m68k.py10
-rw-r--r--tools/testing/kunit/qemu_configs/mips.py18
-rw-r--r--tools/testing/kunit/qemu_configs/mips64.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mips64el.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mipsel.py18
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc.py13
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/powerpcle.py14
-rw-r--r--tools/testing/kunit/qemu_configs/riscv.py28
-rw-r--r--tools/testing/kunit/qemu_configs/riscv32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/s390.py14
-rw-r--r--tools/testing/kunit/qemu_configs/sh.py19
-rw-r--r--tools/testing/kunit/qemu_configs/sparc.py13
-rw-r--r--tools/testing/kunit/qemu_configs/sparc64.py16
-rw-r--r--tools/testing/kunit/qemu_configs/x86_64.py12
-rwxr-xr-xtools/testing/kunit/run_checks.py81
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log34
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-crash.log70
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-kselftest.log15
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log31
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log7
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log (renamed from tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log)0
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log2
-rw-r--r--tools/testing/kunit/test_data/test_parse_attributes.log9
-rw-r--r--tools/testing/kunit/test_data/test_parse_ktap_output.log8
-rw-r--r--tools/testing/kunit/test_data/test_parse_subtest_header.log7
-rw-r--r--tools/testing/kunit/test_data/test_skip_all_tests.log15
-rw-r--r--tools/testing/kunit/test_data/test_skip_tests.log15
-rw-r--r--tools/testing/kunit/test_data/test_strip_hyphen.log16
-rw-r--r--tools/testing/memblock/.gitignore5
-rw-r--r--tools/testing/memblock/Makefile55
-rw-r--r--tools/testing/memblock/README118
-rw-r--r--tools/testing/memblock/TODO5
-rw-r--r--tools/testing/memblock/asm/dma.h5
-rw-r--r--tools/testing/memblock/internal.h33
-rw-r--r--tools/testing/memblock/lib/slab.c9
-rw-r--r--tools/testing/memblock/linux/kernel.h14
-rw-r--r--tools/testing/memblock/linux/kmemleak.h18
-rw-r--r--tools/testing/memblock/linux/memory_hotplug.h17
-rw-r--r--tools/testing/memblock/linux/mmzone.h38
-rw-r--r--tools/testing/memblock/linux/mutex.h14
-rw-r--r--tools/testing/memblock/linux/printk.h25
-rw-r--r--tools/testing/memblock/main.c19
-rw-r--r--tools/testing/memblock/mmzone.c20
-rw-r--r--tools/testing/memblock/scripts/Makefile.include19
-rw-r--r--tools/testing/memblock/tests/alloc_api.c884
-rw-r--r--tools/testing/memblock/tests/alloc_api.h9
-rw-r--r--tools/testing/memblock/tests/alloc_exact_nid_api.c1113
-rw-r--r--tools/testing/memblock/tests/alloc_exact_nid_api.h25
-rw-r--r--tools/testing/memblock/tests/alloc_helpers_api.c414
-rw-r--r--tools/testing/memblock/tests/alloc_helpers_api.h9
-rw-r--r--tools/testing/memblock/tests/alloc_nid_api.c2733
-rw-r--r--tools/testing/memblock/tests/alloc_nid_api.h26
-rw-r--r--tools/testing/memblock/tests/basic_api.c2551
-rw-r--r--tools/testing/memblock/tests/basic_api.h9
-rw-r--r--tools/testing/memblock/tests/common.c209
-rw-r--r--tools/testing/memblock/tests/common.h177
-rw-r--r--tools/testing/nvdimm/Kbuild15
-rw-r--r--tools/testing/nvdimm/config_check.c1
-rw-r--r--tools/testing/nvdimm/dax_pmem_compat_test.c8
-rw-r--r--tools/testing/nvdimm/dax_pmem_core_test.c8
-rw-r--r--tools/testing/nvdimm/dimm_devs.c30
-rw-r--r--tools/testing/nvdimm/pmem-dax.c8
-rw-r--r--tools/testing/nvdimm/test/Kbuild1
-rw-r--r--tools/testing/nvdimm/test/iomap.c69
-rw-r--r--tools/testing/nvdimm/test/ndtest.c184
-rw-r--r--tools/testing/nvdimm/test/ndtest.h31
-rw-r--r--tools/testing/nvdimm/test/nfit.c110
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h28
-rw-r--r--tools/testing/radix-tree/.gitignore4
-rw-r--r--tools/testing/radix-tree/Makefile61
-rw-r--r--tools/testing/radix-tree/bitmap.c23
-rw-r--r--tools/testing/radix-tree/idr-test.c38
-rw-r--r--tools/testing/radix-tree/linux.c120
-rw-r--r--tools/testing/radix-tree/linux/gfp.h33
-rw-r--r--tools/testing/radix-tree/linux/idr.h1
-rw-r--r--tools/testing/radix-tree/linux/init.h1
-rw-r--r--tools/testing/radix-tree/linux/slab.h27
-rw-r--r--tools/testing/radix-tree/maple.c36227
-rw-r--r--tools/testing/radix-tree/multiorder.c74
-rw-r--r--tools/testing/radix-tree/regression1.c2
-rw-r--r--tools/testing/radix-tree/xarray.c11
-rw-r--r--tools/testing/rbtree/Makefile33
-rw-r--r--tools/testing/rbtree/interval_tree_test.c58
-rw-r--r--tools/testing/rbtree/rbtree_test.c48
-rw-r--r--tools/testing/rbtree/test.h4
-rw-r--r--tools/testing/scatterlist/linux/mm.h5
-rw-r--r--tools/testing/scatterlist/main.c39
-rw-r--r--tools/testing/selftests/.gitignore2
-rw-r--r--tools/testing/selftests/Makefile183
-rw-r--r--tools/testing/selftests/acct/.gitignore3
-rw-r--r--tools/testing/selftests/acct/Makefile5
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c78
-rw-r--r--tools/testing/selftests/alsa/.gitignore5
-rw-r--r--tools/testing/selftests/alsa/Makefile31
-rw-r--r--tools/testing/selftests/alsa/alsa-local.h37
-rw-r--r--tools/testing/selftests/alsa/conf.c475
-rw-r--r--tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf84
-rw-r--r--tools/testing/selftests/alsa/global-timer.c87
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c1145
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c671
-rw-r--r--tools/testing/selftests/alsa/pcm-test.conf63
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c330
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c165
-rw-r--r--tools/testing/selftests/amd-pstate/Makefile18
-rwxr-xr-xtools/testing/selftests/amd-pstate/basic.sh38
-rw-r--r--tools/testing/selftests/amd-pstate/config1
-rwxr-xr-xtools/testing/selftests/amd-pstate/gitsource.sh359
-rwxr-xr-xtools/testing/selftests/amd-pstate/run.sh396
-rwxr-xr-xtools/testing/selftests/amd-pstate/tbench.sh339
-rw-r--r--tools/testing/selftests/arm64/Makefile17
-rw-r--r--tools/testing/selftests/arm64/abi/.gitignore4
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile15
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c1295
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c271
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi-asm.S360
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c565
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.h15
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c246
-rw-r--r--tools/testing/selftests/arm64/bti/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/Makefile56
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h79
-rw-r--r--tools/testing/selftests/arm64/bti/btitest.h23
-rw-r--r--tools/testing/selftests/arm64/bti/signal.c37
-rw-r--r--tools/testing/selftests/arm64/bti/signal.h21
-rw-r--r--tools/testing/selftests/arm64/bti/start.S14
-rw-r--r--tools/testing/selftests/arm64/bti/syscall.S23
-rw-r--r--tools/testing/selftests/arm64/bti/system.c20
-rw-r--r--tools/testing/selftests/arm64/bti/system.h28
-rw-r--r--tools/testing/selftests/arm64/bti/test.c229
-rw-r--r--tools/testing/selftests/arm64/bti/teststubs.S39
-rw-r--r--tools/testing/selftests/arm64/bti/trampoline.S29
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore13
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile55
-rw-r--r--tools/testing/selftests/arm64/fp/TODO7
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h1
-rw-r--r--tools/testing/selftests/arm64/fp/asm-utils.S172
-rw-r--r--tools/testing/selftests/arm64/fp/assembler.h26
-rw-r--r--tools/testing/selftests/arm64/fp/fp-pidbench.S70
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace-asm.S292
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c1692
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.h25
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c660
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S222
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c326
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sme.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sve.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.S20
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.h9
-rw-r--r--tools/testing/selftests/arm64/fp/sme-inst.h73
-rw-r--r--tools/testing/selftests/arm64/fp/ssve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c9
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace-asm.S33
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c891
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S292
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c796
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c10
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork-asm.S61
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork.c100
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c368
-rw-r--r--tools/testing/selftests/arm64/fp/za-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/za-test.S400
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c366
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S318
-rw-r--r--tools/testing/selftests/arm64/gcs/.gitignore7
-rw-r--r--tools/testing/selftests/arm64/gcs/Makefile30
-rw-r--r--tools/testing/selftests/arm64/gcs/asm-offsets.h (renamed from tools/testing/radix-tree/linux/compiler_types.h)0
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c420
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-locking.c199
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress-thread.S311
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c530
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-util.h100
-rw-r--r--tools/testing/selftests/arm64/gcs/gcspushm.S96
-rw-r--r--tools/testing/selftests/arm64/gcs/gcsstr.S99
-rw-r--r--tools/testing/selftests/arm64/gcs/libc-gcs.c728
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile28
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c28
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c16
-rw-r--r--tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c11
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c296
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c11
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c905
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c132
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c62
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c200
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c180
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h28
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h8
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S2
-rw-r--r--tools/testing/selftests/arm64/pauth/Makefile6
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c7
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c17
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore9
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile11
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.c56
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.h34
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.c19
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h19
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c161
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h73
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/TODO1
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c76
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c76
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c82
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c62
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/gcs_frame.c88
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c67
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c38
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c45
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c117
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c141
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c105
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c209
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h41
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c90
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_no_regs.c103
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c123
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c51
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/zt_regs.c86
-rw-r--r--tools/testing/selftests/arm64/tags/Makefile3
-rwxr-xr-xtools/testing/selftests/arm64/tags/run_tags_test.sh12
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c12
-rw-r--r--tools/testing/selftests/bpf/.gitignore38
-rw-r--r--tools/testing/selftests/bpf/DENYLIST7
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.riscv643
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x4
-rw-r--r--tools/testing/selftests/bpf/Makefile768
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs77
-rw-r--r--tools/testing/selftests/bpf/README.rst234
-rw-r--r--tools/testing/selftests/bpf/autoconf_helper.h9
-rw-r--r--tools/testing/selftests/bpf/bench.c412
-rw-r--r--tools/testing/selftests/bpf/bench.h40
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c477
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c185
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c89
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c277
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_loop.c100
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c350
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c282
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c258
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c263
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c33
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c99
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c599
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_strncmp.c156
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c611
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh45
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh11
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh15
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh40
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage.sh24
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh11
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_rename.sh2
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh58
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_strncmp.sh12
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh22
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh9
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh92
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_alloc.h67
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h75
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_htab.h100
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h90
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_strsearch.h128
-rw-r--r--tools/testing/selftests/bpf/bpf_atomic.h140
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h656
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h98
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h28
-rw-r--r--tools/testing/selftests/bpf/bpf_rlimit.h28
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h234
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/Makefile20
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c74
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h20
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h34
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c53
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.c67
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.h20
-rw-r--r--tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h25
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c554
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h37
-rw-r--r--tools/testing/selftests/bpf/cgroup_tcp_skb.h35
-rw-r--r--tools/testing/selftests/bpf/config161
-rw-r--r--tools/testing/selftests/bpf/config.aarch64154
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el92
-rw-r--r--tools/testing/selftests/bpf/config.riscv6483
-rw-r--r--tools/testing/selftests/bpf/config.s390x125
-rw-r--r--tools/testing/selftests/bpf/config.vm15
-rw-r--r--tools/testing/selftests/bpf/config.x86_64227
l---------tools/testing/selftests/bpf/disasm.c1
l---------tools/testing/selftests/bpf/disasm.h1
-rw-r--r--tools/testing/selftests/bpf/disasm_helpers.c69
-rw-r--r--tools/testing/selftests/bpf/disasm_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c18
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h13
-rwxr-xr-xtools/testing/selftests/bpf/generate_udp_fragments.py90
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c139
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h2
-rwxr-xr-xtools/testing/selftests/bpf/ima_setup.sh35
-rw-r--r--tools/testing/selftests/bpf/io_helpers.c21
-rw-r--r--tools/testing/selftests/bpf/io_helpers.h7
-rw-r--r--tools/testing/selftests/bpf/ip_check_defrag_frags.h57
-rw-r--r--tools/testing/selftests/bpf/jit_disasm_helpers.c245
-rw-r--r--tools/testing/selftests/bpf/jit_disasm_helpers.h10
l---------tools/testing/selftests/bpf/json_writer.c1
l---------tools/testing/selftests/bpf/json_writer.h1
-rw-r--r--tools/testing/selftests/bpf/liburandom_read.map15
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c118
-rw-r--r--tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c17
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c (renamed from tools/testing/selftests/bpf/test_lpm_map.c)490
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c155
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c109
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c278
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_percpu_stats.c488
-rw-r--r--tools/testing/selftests/bpf/map_tests/sk_storage_map.c90
-rw-r--r--tools/testing/selftests/bpf/map_tests/task_storage_map.c128
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h38
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.c358
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.h46
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c1106
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h251
-rw-r--r--tools/testing/selftests/bpf/prog_tests/access_variable_array.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c296
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c268
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_strsearch.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c181
-rw-r--r--tools/testing/selftests/bpf/prog_tests/assign_reuse.c199
-rw-r--r--tools/testing/selftests/bpf/prog_tests/async_stack_depth.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c468
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoattach.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c224
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c763
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c504
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1038
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c207
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c206
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c198
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c230
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c269
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c231
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c694
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c278
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c1904
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c267
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_distill.c692
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c990
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_endian.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_field_iter.c161
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c299
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sysfs.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_tag.c249
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/build_id.c118
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cb_refs.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c163
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_dev.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_get_current_cgroup_id.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c549
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c339
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c333
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c617
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_storage.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c344
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c180
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c361
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c73
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c191
-rw-r--r--tools/testing/selftests/bpf/prog_tests/compute_live_registers.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_ping.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c444
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc_raw.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_retro.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/crypto_sanity.c196
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c821
-rw-r--r--tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/deny_namespace.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c191
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c193
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/enable_stats.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exceptions.c409
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exhandler.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c441
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c192
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c103
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c407
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c103
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c377
-rw-r--r--tools/testing/selftests/bpf/prog_tests/file_reader.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c652
-rw-r--r--tools/testing/selftests/bpf/prog_tests/find_vma.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c437
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c797
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c253
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c169
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_args.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_map_resize.c235
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c199
-rw-r--r--tools/testing/selftests/bpf/prog_tests/helper_restricted.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_reuse.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c283
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c319
-rw-r--r--tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c324
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c119
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c609
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/legacy_printk.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c87
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c225
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c813
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_maps.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_vars.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c147
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c278
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c181
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_key.c112
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lru_bug.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c319
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h109
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c540
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_redirect.c331
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_reroute.c264
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_btf.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_in_map.c271
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c163
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ops.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/metadata.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c559
-rw-r--r--tools/testing/selftests/bpf/prog_tests/missed.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c134
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c587
-rw-r--r--tools/testing/selftests/bpf/prog_tests/nested_trust.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/net_timestamping.c239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c206
-rw-r--r--tools/testing/selftests/bpf/prog_tests/obj_name.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/percpu_alloc.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_skip.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_htab.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempt_lock.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prepare.c99
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pro_epilogue.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_array_init.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_opts.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c181
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_null.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c195
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursion.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursive_attach.c218
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2161
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c356
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c185
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c201
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c245
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sha256.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c129
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf_btf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_addr.c2660
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_create.c348
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c994
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_post_bind.c426
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h473
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c921
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c452
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c609
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_redir.c465
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_strp.c454
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c120
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c174
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c63
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs_extable.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subskeleton.c152
-rw-r--r--tools/testing/selftests/bpf/prog_tests/summarization.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c1311
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c518
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_bpf.c429
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_change_tail.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c1962
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c870
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c2814
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c1303
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_estats.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c119
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_btf_ext.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_csum_diff.c408
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c219
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c173
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_mmap_inner_array.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_profiler.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_strncmp.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c317
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_sysctl.c (renamed from tools/testing/selftests/bpf/test_sysctl.c)68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_edt.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c714
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c1074
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c261
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c599
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c2596
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.h298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/time_tai.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c135
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c1197
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_attach_query.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_ext.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_vprintk.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/type_cast.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uninit_stack.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c317
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c249
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c1376
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c803
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c186
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c526
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c701
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verif_stats.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_log.c450
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c565
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vmlinux.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c312
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c146
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c369
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c115
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c691
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c146
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c512
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c135
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c254
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c421
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c168
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_info.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c545
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_vlan.c175
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdpwall.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xfrm_info.c347
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xsk.c151
-rw-r--r--tools/testing/selftests/bpf/progs/access_map_in_map.c93
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c397
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c59
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab_asm.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c88
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c54
-rw-r--r--tools/testing/selftests/bpf/progs/arena_strsearch.c146
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c61
-rw-r--r--tools/testing/selftests/bpf/progs/atomics.c44
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c83
-rw-r--r--tools/testing/selftests/bpf/progs/bench_sockmap_prog.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c51
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c51
-rw-r--r--tools/testing/selftests/bpf/progs/bind_prog.h19
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_bench.c154
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_map.c83
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h542
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c180
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_compiler.h33
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c106
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c140
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c20
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_gotox.c448
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c63
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h138
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c29
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c12
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c24
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c58
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c60
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c12
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c35
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c (renamed from tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c)10
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c198
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c13
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c37
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c225
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c30
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h273
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_mod_race.c100
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_common.h27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c126
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c756
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c117
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_syscall_macro.c108
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h143
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c12
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c84
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c173
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c59
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag.c25
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c73
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_user.c40
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c114
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi.h2
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_ancestor.c40
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c58
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c16
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c69
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c156
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_iter.c38
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_mprog.c30
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_preorder.c41
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c158
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c15
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_storage.c24
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c382
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h79
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c247
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c235
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c100
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_negative.c26
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c94
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c125
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c126
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c440
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c28
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c36
-rw-r--r--tools/testing/selftests/bpf/progs/connect6_prog.c8
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/connect_ping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c45
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern.c120
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern_overflow.c22
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h243
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h129
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c262
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c890
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_basic.c68
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_bench.c107
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_common.h66
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c179
-rw-r--r--tools/testing/selftests/bpf/progs/csum_diff_test.c42
-rw-r--r--tools/testing/selftests/bpf/progs/decap_sanity.c68
-rw-r--r--tools/testing/selftests/bpf/progs/dev_cgroup.c5
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c101
-rw-r--r--tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c27
-rw-r--r--tools/testing/selftests/bpf/progs/dummy_st_ops_success.c56
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c1995
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c1137
-rw-r--r--tools/testing/selftests/bpf/progs/empty_skb.c37
-rw-r--r--tools/testing/selftests/bpf/progs/epilogue_exit.c82
-rw-r--r--tools/testing/selftests/bpf/progs/epilogue_tailcall.c58
-rw-r--r--tools/testing/selftests/bpf/progs/err.h28
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions.c368
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c135
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_ext.c72
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c349
-rw-r--r--tools/testing/selftests/bpf/progs/exhandler_kern.c52
-rw-r--r--tools/testing/selftests/bpf/progs/fd_htab_lookup.c25
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_many_args.c39
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_recursive.c14
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_recursive_target.c25
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c10
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_many_args.c40
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_sleep.c32
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c4
-rw-r--r--tools/testing/selftests/bpf/progs/fib_lookup.c22
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c145
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader_fail.c52
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma.c69
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail2.c29
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_array_map_elem.c73
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c95
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_modify.c30
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c27
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_multi_maps.c49
-rw-r--r--tools/testing/selftests/bpf/progs/free_timer.c71
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_attach_probe.c2
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_cls_redirect.c12
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_dead_global_func.c11
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_global_func.c18
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_progmap.c24
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c20
-rw-r--r--tools/testing/selftests/bpf/progs/get_branch_snapshot.c40
-rw-r--r--tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c27
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c123
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c105
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c18
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/htab_mem_bench.c105
-rw-r--r--tools/testing/selftests/bpf/progs/htab_reuse.c19
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c36
-rw-r--r--tools/testing/selftests/bpf/progs/ima.c66
-rw-r--r--tools/testing/selftests/bpf/progs/inner_array_lookup.c45
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c99
-rw-r--r--tools/testing/selftests/bpf/progs/irq.c566
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c1929
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css.c72
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css_task.c102
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c216
-rw-r--r--tools/testing/selftests/bpf/progs/iters_num.c242
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c426
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task.c51
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c105
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_vma.c43
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c171
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c129
-rw-r--r--tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c46
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c59
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c8
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_destructive.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_fail.c160
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_race.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c217
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c37
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_module_order.c30
-rw-r--r--tools/testing/selftests/bpf/progs/kmem_cache_iter.c108
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi.c162
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_empty.c12
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_override.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session.c78
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c58
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c31
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/kptr_xchg_inline.c48
-rw-r--r--tools/testing/selftests/bpf/progs/ksym_race.c13
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c97
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c97
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c420
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.h56
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c611
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_peek.c113
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c82
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps2.c76
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars1.c54
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars2.c55
-rw-r--r--tools/testing/selftests/bpf/progs/livepatch_trampoline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c285
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c85
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c125
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_bench.c104
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c67
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c11
-rw-r--r--tools/testing/selftests/bpf/progs/loop4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/loop5.c1
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c95
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/lru_bug.c49
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c20
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c192
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c14
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_tailcall.c34
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/map_in_map_btf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c540
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c388
-rw-r--r--tools/testing/selftests/bpf/progs/map_percpu_stats.c24
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c28
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c229
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe.c30
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c54
-rw-r--r--tools/testing/selftests/bpf/progs/missed_tp_recursion.c41
-rw-r--r--tools/testing/selftests/bpf/progs/mmap_inner_array.c57
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c40
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_bpf.h42
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sock.c88
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_subflow.c128
-rw-r--r--tools/testing/selftests/bpf/progs/mptcpify.c24
-rw-r--r--tools/testing/selftests/bpf/progs/nested_acquire.c33
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_common.h12
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c41
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_success.c42
-rw-r--r--tools/testing/selftests/bpf/progs/net_timestamping.c248
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c10
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c15
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c103
-rw-r--r--tools/testing/selftests/bpf/progs/normal_map_btf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_array.c190
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c109
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_fail.c182
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c8
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c4
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c212
-rw-r--r--tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c106
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c27
-rw-r--r--tools/testing/selftests/bpf/progs/priv_freplace_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_map.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue.c154
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c149
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c88
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h162
-rw-r--r--tools/testing/selftests/bpf/progs/profiler1.c1
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h88
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf180.c22
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600.c11
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_iter.c7
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_nounroll.c3
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null.c31
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null_fail.c24
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c323
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c52
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c38
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c304
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c206
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c541
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c36
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c38
-rw-r--r--tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c59
-rw-r--r--tools/testing/selftests/bpf/progs/recursion.c9
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg6_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c631
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c121
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock.c147
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock_fail.c244
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c14
-rw-r--r--tools/testing/selftests/bpf/progs/security_bpf_map.c69
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c10
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg6_prog.c59
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c45
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c106
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c434
-rw-r--r--tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c104
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c59
-rw-r--r--tools/testing/selftests/bpf/progs/skb_load_bytes.c19
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c16
-rw-r--r--tools/testing/selftests/bpf/progs/sock_addr_kern.c65
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog.c145
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c133
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c14
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c18
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c19
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c31
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c43
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c95
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c49
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)14
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map_skip.c68
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c237
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c33
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c105
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c26
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c56
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c53
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_test.c54
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h172
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c9
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate.c52
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c32
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_detach.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c19
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c26
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c34
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c25
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c29
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c24
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c90
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c102
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack.c62
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c62
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c50
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c31
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c36
-rw-r--r--tools/testing/selftests/bpf/progs/summarization.c78
-rw-r--r--tools/testing/selftests/bpf/progs/summarization_freplace.c33
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c208
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c23
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall6.c34
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c22
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c45
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c37
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c73
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c65
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c38
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_fail.c64
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_freplace.c23
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_poke.c32
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h77
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c340
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c419
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage.c64
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c35
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c90
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_uptr.c63
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c47
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/tc_bpf2bpf.c25
-rw-r--r--tools/testing/selftests/bpf/progs/tc_dummy.c12
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c29
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c21
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_update.c74
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c63
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_access_variable_array.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_assign_reuse.c142
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c177
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe_manual.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoattach.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c137
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_ma.c285
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c346
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c149
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_decl_tag.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_ext.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_map_in_map.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_nokv.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c100
-rw-r--r--tools/testing/selftests/bpf/progs/test_build_id.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup_link.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c116
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h11
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c981
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_extern.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_read_macros.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_existence.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_mods.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_module.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_size.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c49
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c63
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_types.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_deny_namespace.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_enable_stats.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_fill_link_info.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_fsverity.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c84
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func11.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func12.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func13.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func14.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func15.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func16.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func17.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func2.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func3.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func4.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func5.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func6.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func7.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func8.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func9.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c172
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c94
-rw-r--r--tools/testing/selftests/bpf/progs/test_hash_large_key.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_helper_restricted.c123
-rw-r--r--tools/testing/selftests/bpf/progs/test_jhash.h31
-rw-r--r--tools/testing/selftests/bpf/progs/test_kernel_flag.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c86
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_module.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c487
-rw-r--r--tools/testing/selftests/bpf/progs/test_ldsx_insn.c125
-rw-r--r--tools/testing/selftests/bpf/progs/test_legacy_printk.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_buf.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_fixup.c74
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_and_delete.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_redirect.c90
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_reroute.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c76
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_ops.c138
-rw-r--r--tools/testing/selftests/bpf/progs/test_migrate_reuseport.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c118
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c114
-rw-r--r--tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_skip.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_htab.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_invalid.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_md_access.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_prog_array_init.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_queue_stack_map.h4
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_n.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_set_remove_xattr.c133
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c87
-rw-r--r--tools/testing/selftests/bpf/progs/test_siphash.h64
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h67
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_redir.c68
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_strp.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_update.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c317
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton_lib.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_bpf.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_change_tail.c106
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c392
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c129
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c265
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c167
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c591
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h138
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_time_tai.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c747
-rw-r--r--tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c83
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c99
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c117
-rw-r--r--tools/testing/selftests/bpf/progs/test_urandom_usdt.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c141
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt_multispec.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_user_ringbuf.h35
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c92
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c54
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c128
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c256
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c658
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c111
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_redirect.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_update_frags.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c15
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c425
-rw-r--r--tools/testing/selftests/bpf/progs/timer_crash.c54
-rw-r--r--tools/testing/selftests/bpf/progs/timer_failure.c68
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c88
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c74
-rw-r--r--tools/testing/selftests/bpf/progs/token_lsm.c32
-rw-r--r--tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c21
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c7
-rw-r--r--tools/testing/selftests/bpf/progs/trace_vprintk.c34
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c32
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c166
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct_many_args.c95
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c136
-rw-r--r--tools/testing/selftests/bpf/progs/twfw.c58
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c79
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c2
-rw-r--r--tools/testing/selftests/bpf/progs/uninit_stack.c88
-rw-r--r--tools/testing/selftests/bpf/progs/unsupported_ops.c22
-rw-r--r--tools/testing/selftests/bpf/progs/update_map_in_htab.c30
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi.c143
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_bench.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c39
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_pid_filter.c40
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session.c71
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c48
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c44
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c44
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c16
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c31
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c73
-rw-r--r--tools/testing/selftests/bpf/progs/uptr_failure.c105
-rw-r--r--tools/testing/selftests/bpf/progs/uptr_map_failure.c27
-rw-r--r--tools/testing/selftests/bpf/progs/uptr_update_failure.c42
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c96
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c245
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_success.c212
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c113
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c257
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c274
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c731
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c181
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_basic_stack.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bitfield_write.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bits_iter.c232
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c1866
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c174
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c639
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c554
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c888
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c124
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_trap.c71
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c80
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cfg.c162
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c89
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c227
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c308
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_const.c98
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_const_or.c82
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c295
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c228
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_d_path.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c862
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c56
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div0.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c144
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c310
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c391
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotox.c389
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c825
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c550
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c279
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_value_access.c1282
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_int_ptr.c155
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c786
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jit_convergence.c114
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c170
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ld_ind.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c447
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_leak_ptr.c92
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_linked_scalars.c34
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c344
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c234
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c307
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c162
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lwt.c234
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_in_map.c260
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c162
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c265
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ret_val.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_masking.c410
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c109
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_2.c28
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_meta_access.c284
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c354
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mtu.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c116
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c41
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c304
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c61
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_private_stack.c359
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_stack.c372
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ref_tracking.c1495
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_reg_equal.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_regalloc.c364
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ringbuf.c131
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_runtime_jit.c360
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c830
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sdiv.c1224
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_search_pruning.c362
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c1169
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock_addr.c331
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c187
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c1282
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spin_lock.c559
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c536
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c301
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c849
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subreg.c673
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_tailcall.c31
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c105
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_typedef.c23
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_uninit.c61
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c953
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c34
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value.c158
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c78
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c215
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_or_null.c288
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c1441
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c418
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c103
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_reject.c176
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xadd.c124
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp.c24
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c1722
-rw-r--r--tools/testing/selftests/bpf/progs/vrf_socket_lookup.c89
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c206
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c167
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_dummy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_features.c268
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_flowtable.c148
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c117
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c111
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata2.c24
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c94
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c111
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c865
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c9
-rw-r--r--tools/testing/selftests/bpf/progs/xdpwall.c364
-rw-r--r--tools/testing/selftests/bpf/progs/xfrm_info.c41
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c123
-rw-r--r--tools/testing/selftests/bpf/sdt-config.h6
-rw-r--r--tools/testing/selftests/bpf/sdt.h515
-rw-r--r--tools/testing/selftests/bpf/task_local_storage_helpers.h22
-rw-r--r--tools/testing/selftests/bpf/test_bpftool.py22
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh23
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh7
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py627
-rw-r--r--tools/testing/selftests/bpf/test_btf.h18
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c170
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp113
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c82
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh20
-rw-r--r--tools/testing/selftests/bpf/test_flow_dissector.c780
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh168
-rwxr-xr-xtools/testing/selftests/bpf/test_ftrace.sh7
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh20
-rw-r--r--tools/testing/selftests/bpf/test_kmods/.gitignore (renamed from tools/testing/selftests/bpf/bpf_testmod/.gitignore)0
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile21
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c39
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c39
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c84
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c393
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h (renamed from tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h)29
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c1787
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h125
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h166
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh5
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c9
-rw-r--r--tools/testing/selftests/bpf/test_loader.c1406
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c226
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh464
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh149
-rw-r--r--tools/testing/selftests/bpf/test_maps.c502
-rw-r--r--tools/testing/selftests/bpf/test_maps.h7
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c148
-rw-r--r--tools/testing/selftests/bpf/test_progs.c1913
-rw-r--r--tools/testing/selftests/bpf/test_progs.h381
-rwxr-xr-xtools/testing/selftests/bpf/test_skb_cgroup_id.sh63
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c181
-rw-r--r--tools/testing/selftests/bpf/test_sock.c481
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1421
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh58
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c463
-rw-r--r--tools/testing/selftests/bpf/test_stub.c44
-rw-r--r--tools/testing/selftests/bpf/test_tag.c14
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh99
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_redirect.sh216
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh295
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh84
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c257
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h1
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c35
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh798
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c854
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c174
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_features.sh107
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh52
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh77
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh118
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh228
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh277
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c436
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h54
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c699
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h26
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c144
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.h7
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.c136
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.ld11
-rw-r--r--tools/testing/selftests/bpf/uptr_test_common.h63
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c78
-rw-r--r--tools/testing/selftests/bpf/urandom_read_aux.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib1.c35
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib2.c8
-rw-r--r--tools/testing/selftests/bpf/usdt.h545
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c66
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c378
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_and.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c149
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_fetch.c151
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_invalid.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_or.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_instr.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_stack.c64
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c746
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_deduction.c124
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c406
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c44
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c270
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c99
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c445
-rw-r--r--tools/testing/selftests/bpf/verifier/cfg.c73
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_skb.c197
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_storage.c220
-rw-r--r--tools/testing/selftests/bpf/verifier/const_or.c60
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c197
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_msg.c181
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c76
-rw-r--r--tools/testing/selftests/bpf/verifier/d_path.c37
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c656
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c40
-rw-r--r--tools/testing/selftests/bpf/verifier/div0.c184
-rw-r--r--tools/testing/selftests/bpf/verifier/div_overflow.c110
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c616
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_packet_access.c460
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c953
-rw-r--r--tools/testing/selftests/bpf/verifier/int_ptr.c160
-rw-r--r--tools/testing/selftests/bpf/verifier/jit.c93
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c40
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/jump.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_imm64.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_ind.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/leak_ptr.c67
-rw-r--r--tools/testing/selftests/bpf/verifier/loops1.c206
-rw-r--r--tools/testing/selftests/bpf/verifier/lwt.c189
-rw-r--r--tools/testing/selftests/bpf/verifier/map_in_map.c62
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c444
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr_mixing.c100
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ret_val.c65
-rw-r--r--tools/testing/selftests/bpf/verifier/masking.c322
-rw-r--r--tools/testing/selftests/bpf/verifier/meta_access.c235
-rw-r--r--tools/testing/selftests/bpf/verifier/perf_event_sample_period.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c180
-rw-r--r--tools/testing/selftests/bpf/verifier/prevent_map_lookup.c29
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_stack.c305
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_tp_writable.c35
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c907
-rw-r--r--tools/testing/selftests/bpf/verifier/regalloc.c277
-rw-r--r--tools/testing/selftests/bpf/verifier/runtime_jit.c231
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c156
-rw-r--r--tools/testing/selftests/bpf/verifier/sleepable.c91
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c658
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c106
-rw-r--r--tools/testing/selftests/bpf/verifier/spin_lock.c333
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c361
-rw-r--r--tools/testing/selftests/bpf/verifier/subreg.c533
-rw-r--r--tools/testing/selftests/bpf/verifier/uninit.c39
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c524
-rw-r--r--tools/testing/selftests/bpf/verifier/value.c104
-rw-r--r--tools/testing/selftests/bpf/verifier/value_adj_spill.c43
-rw-r--r--tools/testing/selftests/bpf/verifier/value_illegal_alu.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c171
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c876
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c343
-rw-r--r--tools/testing/selftests/bpf/verifier/xadd.c97
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp.c14
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c900
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh136
-rw-r--r--tools/testing/selftests/bpf/veristat.c3384
-rw-r--r--tools/testing/selftests/bpf/veristat.cfg18
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh282
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rw-r--r--tools/testing/selftests/bpf/xdp_features.c718
-rw-r--r--tools/testing/selftests/bpf/xdp_features.h20
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c894
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h52
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c471
-rw-r--r--tools/testing/selftests/bpf/xdping.c26
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c1065
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h158
-rw-r--r--tools/testing/selftests/bpf/xsk.c781
-rw-r--r--tools/testing/selftests/bpf/xsk.h249
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh108
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h13
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c456
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h34
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c6
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c6
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c47
-rw-r--r--tools/testing/selftests/cachestat/.gitignore3
-rw-r--r--tools/testing/selftests/cachestat/Makefile8
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c365
-rw-r--r--tools/testing/selftests/capabilities/Makefile2
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c28
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c17
-rw-r--r--tools/testing/selftests/cgroup/.gitignore11
-rw-r--r--tools/testing/selftests/cgroup/Makefile35
-rw-r--r--tools/testing/selftests/cgroup/config6
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c (renamed from tools/testing/selftests/cgroup/cgroup_util.c)265
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h (renamed from tools/testing/selftests/cgroup/cgroup_util.h)55
-rw-r--r--tools/testing/selftests/cgroup/lib/libcgroup.mk19
-rw-r--r--tools/testing/selftests/cgroup/memcg_protection.m89
-rw-r--r--tools/testing/selftests/cgroup/test_core.c265
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c825
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c276
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh1193
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_base.sh77
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_hp.sh46
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c733
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c234
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c298
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c57
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c872
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c181
-rwxr-xr-xtools/testing/selftests/cgroup/test_stress.sh2
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c636
-rw-r--r--tools/testing/selftests/cgroup/wait_inotify.c87
-rw-r--r--tools/testing/selftests/clone3/Makefile2
-rw-r--r--tools/testing/selftests/clone3/clone3.c285
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c6
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c8
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h17
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c125
-rw-r--r--tools/testing/selftests/connector/.gitignore1
-rw-r--r--tools/testing/selftests/connector/Makefile6
-rw-r--r--tools/testing/selftests/connector/proc_filter.c310
-rw-r--r--tools/testing/selftests/core/.gitignore1
-rw-r--r--tools/testing/selftests/core/Makefile4
-rw-r--r--tools/testing/selftests/core/close_range_test.c154
-rw-r--r--tools/testing/selftests/core/unshare_test.c94
-rw-r--r--tools/testing/selftests/coredump/.gitignore4
-rw-r--r--tools/testing/selftests/coredump/Makefile13
-rw-r--r--tools/testing/selftests/coredump/README.rst50
-rw-r--r--tools/testing/selftests/coredump/config3
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_protocol_test.c1568
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_test.c742
-rw-r--r--tools/testing/selftests/coredump/coredump_test.h59
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c383
-rwxr-xr-xtools/testing/selftests/coredump/stackdump14
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c169
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile2
-rw-r--r--tools/testing/selftests/cpu-hotplug/config1
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh142
-rw-r--r--tools/testing/selftests/cpufreq/.gitignore2
-rw-r--r--tools/testing/selftests/cpufreq/Makefile1
-rw-r--r--tools/testing/selftests/cpufreq/config8
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh36
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh62
-rwxr-xr-xtools/testing/selftests/cpufreq/module.sh6
-rw-r--r--tools/testing/selftests/damon/.gitignore3
-rw-r--r--tools/testing/selftests/damon/Makefile26
-rw-r--r--tools/testing/selftests/damon/_common.sh11
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py837
-rw-r--r--tools/testing/selftests/damon/access_memory.c41
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c39
-rw-r--r--tools/testing/selftests/damon/config6
-rwxr-xr-xtools/testing/selftests/damon/damon_nr_regions.py147
-rwxr-xr-xtools/testing/selftests/damon/damos_apply_interval.py67
-rwxr-xr-xtools/testing/selftests/damon/damos_quota.py70
-rwxr-xr-xtools/testing/selftests/damon/damos_quota_goal.py80
-rwxr-xr-xtools/testing/selftests/damon/damos_tried_regions.py65
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py223
-rwxr-xr-xtools/testing/selftests/damon/lru_sort.sh39
-rwxr-xr-xtools/testing/selftests/damon/reclaim.sh40
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py303
-rwxr-xr-xtools/testing/selftests/damon/sysfs.sh370
-rwxr-xr-xtools/testing/selftests/damon/sysfs_memcg_path_leak.sh43
-rwxr-xr-xtools/testing/selftests/damon/sysfs_no_op_commit_break.py72
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh56
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py33
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py55
-rw-r--r--tools/testing/selftests/devices/error_logs/Makefile3
-rwxr-xr-xtools/testing/selftests/devices/error_logs/test_device_error_logs.py85
-rw-r--r--tools/testing/selftests/devices/probe/Makefile4
-rw-r--r--tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml40
-rw-r--r--tools/testing/selftests/devices/probe/boards/google,spherion.yaml54
-rwxr-xr-xtools/testing/selftests/devices/probe/test_discoverable_devices.py358
-rw-r--r--tools/testing/selftests/dma/Makefile6
-rw-r--r--tools/testing/selftests/dma/config1
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c125
-rw-r--r--tools/testing/selftests/dmabuf-heaps/.gitignore1
-rw-r--r--tools/testing/selftests/dmabuf-heaps/Makefile2
-rw-r--r--tools/testing/selftests/dmabuf-heaps/config3
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c277
-rw-r--r--tools/testing/selftests/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/dma-buf/Makefile2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c236
-rwxr-xr-xtools/testing/selftests/drivers/gpu/drm_mm.sh4
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore4
-rw-r--r--tools/testing/selftests/drivers/net/Makefile43
-rw-r--r--tools/testing/selftests/drivers/net/README.rst136
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile32
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh46
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh80
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh84
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh80
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh156
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh108
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh97
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh578
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh105
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh161
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh45
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config21
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh109
-rw-r--r--tools/testing/selftests/drivers/net/bonding/lag_lib.sh177
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh45
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh45
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh361
-rw-r--r--tools/testing/selftests/drivers/net/bonding/settings1
-rw-r--r--tools/testing/selftests/drivers/net/config10
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile36
l---------tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mld.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh1
-rw-r--r--tools/testing/selftests/drivers/net/dsa/forwarding.config2
l---------tools/testing/selftests/drivers/net/dsa/local_termination.sh1
l---------tools/testing/selftests/drivers/net/dsa/no_forwarding.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh9
l---------tools/testing/selftests/drivers/net/dsa/tc_actions.sh1
l---------tools/testing/selftests/drivers/net/dsa/tc_taprio.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh47
-rw-r--r--tools/testing/selftests/drivers/net/gro.c1369
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py164
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py329
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore4
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile79
-rw-r--r--tools/testing/selftests/drivers/net/hw/config11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py110
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_port_split.py (renamed from tools/testing/selftests/net/devlink_port_split.py)32
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py439
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py77
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool.sh (renamed from tools/testing/selftests/net/forwarding/ethtool.sh)18
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_extended_state.sh)46
-rw-r--r--tools/testing/selftests/drivers/net/hw/ethtool_lib.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_lib.sh)0
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_mm.sh341
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_rmon.sh145
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3.sh334
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh111
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c464
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py145
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/irq.py99
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py52
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/loopback.sh (renamed from tools/testing/selftests/net/forwarding/loopback.sh)5
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1524
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py113
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py144
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py832
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py92
-rw-r--r--tools/testing/selftests/drivers/net/hw/settings1
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c655
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py261
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py55
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py287
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py139
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote.py15
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_netns.py21
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_ssh.py39
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh419
-rwxr-xr-xtools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh668
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh334
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh29
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh105
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh46
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh120
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh54
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh250
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh11
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh342
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh272
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh263
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh264
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh311
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh71
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh22
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh64
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/pci_reset.sh58
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh111
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh95
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh10
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh26
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh27
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh182
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh74
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh42
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh243
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh151
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh46
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh184
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh107
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag.sh137
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh147
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh72
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh213
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh147
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh50
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh258
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh40
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_offload.sh290
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh342
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh98
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh33
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh69
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh77
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh44
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh161
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh15
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh339
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh6
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh35
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh34
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh334
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh23
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh22
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh658
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh283
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh65
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py23
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c100
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py143
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh74
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh65
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_fragmented_msg.sh122
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh67
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh272
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_torture.sh130
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile26
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/config11
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh381
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh27
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh8
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh114
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh51
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh421
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh117
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh626
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh144
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh183
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/settings1
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh80
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh87
-rwxr-xr-xtools/testing/selftests/drivers/net/netpoll_basic.py396
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/basic_qos.sh253
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh323
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh212
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py241
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py640
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c483
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py125
-rwxr-xr-xtools/testing/selftests/drivers/net/ring_reconfig.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/shaper.py461
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py321
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile18
-rw-r--r--tools/testing/selftests/drivers/net/team/config7
-rwxr-xr-xtools/testing/selftests/drivers/net/team/dev_addr_lists.sh51
-rwxr-xr-xtools/testing/selftests/drivers/net/team/options.sh188
-rwxr-xr-xtools/testing/selftests/drivers/net/team/propagation.sh80
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile12
-rwxr-xr-xtools/testing/selftests/drivers/net/virtio_net/basic_features.sh131
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/config8
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py779
-rw-r--r--tools/testing/selftests/drivers/ntsync/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/ntsync/Makefile7
-rw-r--r--tools/testing/selftests/drivers/ntsync/config1
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c1343
-rw-r--r--tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile6
-rwxr-xr-xtools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh494
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/Makefile20
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/config1
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c270
-rwxr-xr-xtools/testing/selftests/drivers/sdsi/sdsi.sh25
-rw-r--r--tools/testing/selftests/drivers/sdsi/sdsi_test.py226
-rw-r--r--tools/testing/selftests/dt/.gitignore1
-rw-r--r--tools/testing/selftests/dt/Makefile21
-rw-r--r--tools/testing/selftests/dt/compatible_ignore_list1
-rwxr-xr-xtools/testing/selftests/dt/test_unprobed_devices.sh93
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c2
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh173
-rw-r--r--tools/testing/selftests/exec/.gitignore9
-rw-r--r--tools/testing/selftests/exec/Makefile46
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script.py (renamed from tools/testing/selftests/exec/binfmt_script)10
-rwxr-xr-xtools/testing/selftests/exec/check-exec-tests.sh205
-rw-r--r--tools/testing/selftests/exec/check-exec.c463
-rw-r--r--tools/testing/selftests/exec/config2
-rw-r--r--tools/testing/selftests/exec/execveat.c172
-rw-r--r--tools/testing/selftests/exec/false.c5
-rw-r--r--tools/testing/selftests/exec/load_address.c83
-rw-r--r--tools/testing/selftests/exec/non-regular.c2
-rw-r--r--tools/testing/selftests/exec/null-argv.c78
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c53
-rw-r--r--tools/testing/selftests/fchmodat2/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore)2
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile15
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c142
-rw-r--r--tools/testing/selftests/filelock/Makefile5
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c132
-rw-r--r--tools/testing/selftests/filesystems/.gitignore4
-rw-r--r--tools/testing/selftests/filesystems/Makefile4
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c69
-rw-r--r--tools/testing/selftests/filesystems/binderfs/Makefile4
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c26
-rw-r--r--tools/testing/selftests/filesystems/binderfs/config1
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c2
-rw-r--r--tools/testing/selftests/filesystems/epoll/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c46
-rw-r--r--tools/testing/selftests/filesystems/eventfd/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore)2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c311
-rw-r--r--tools/testing/selftests/filesystems/fat/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/fat/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/fat/config2
-rw-r--r--tools/testing/selftests/filesystems/fat/rename_exchange.c37
-rwxr-xr-xtools/testing/selftests/filesystems/fat/run_fat_tests.sh82
-rw-r--r--tools/testing/selftests/filesystems/fclog.c130
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c194
-rw-r--r--tools/testing/selftests/filesystems/fuse/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/fuse/Makefile21
-rw-r--r--tools/testing/selftests/filesystems/fuse/fuse_mnt.c146
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c140
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c38
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c528
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c555
-rw-r--r--tools/testing/selftests/filesystems/nsfs/.gitignore (renamed from tools/testing/selftests/nsfs/.gitignore)1
-rw-r--r--tools/testing/selftests/filesystems/nsfs/Makefile (renamed from tools/testing/selftests/nsfs/Makefile)4
-rw-r--r--tools/testing/selftests/filesystems/nsfs/config (renamed from tools/testing/selftests/nsfs/config)0
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c163
-rw-r--r--tools/testing/selftests/filesystems/nsfs/owner.c (renamed from tools/testing/selftests/nsfs/owner.c)0
-rw-r--r--tools/testing/selftests/filesystems/nsfs/pidns.c (renamed from tools/testing/selftests/nsfs/pidns.c)0
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile14
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c160
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/log.h26
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c720
-rw-r--r--tools/testing/selftests/filesystems/statmount/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile10
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c66
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h82
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c702
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c291
-rw-r--r--tools/testing/selftests/filesystems/utils.c589
-rw-r--r--tools/testing/selftests/filesystems/utils.h48
-rw-r--r--tools/testing/selftests/filesystems/wrappers.h108
-rw-r--r--tools/testing/selftests/firmware/Makefile2
-rw-r--r--tools/testing/selftests/firmware/config1
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh170
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh19
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c9
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_upload.sh214
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/Makefile5
-rw-r--r--tools/testing/selftests/ftrace/config27
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest115
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest-ktap8
-rw-r--r--tools/testing/selftests/ftrace/poll.c74
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc101
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc95
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc122
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc82
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc78
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc97
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc82
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc61
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc32
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc63
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc122
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc81
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-mod.tc191
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc30
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc109
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc177
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc103
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc17
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc42
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions79
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance-event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc60
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc25
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc34
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc20
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc53
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc24
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc35
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc64
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc18
-rw-r--r--tools/testing/selftests/futex/Makefile6
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore6
-rw-r--r--tools/testing/selftests/futex/functional/Makefile28
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c263
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c219
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c270
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c106
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c259
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c131
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c144
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c209
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c81
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c226
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh60
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h98
-rw-r--r--tools/testing/selftests/futex/include/futextest.h22
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/gpio/.gitignore4
-rw-r--r--tools/testing/selftests/gpio/Makefile29
-rw-r--r--tools/testing/selftests/gpio/config3
-rwxr-xr-xtools/testing/selftests/gpio/gpio-aggregator.sh727
-rw-r--r--tools/testing/selftests/gpio/gpio-chip-info.c57
-rw-r--r--tools/testing/selftests/gpio/gpio-line-name.c55
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-cdev.c198
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-chardev.c323
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup-sysfs.sh168
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh494
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh418
-rw-r--r--tools/testing/selftests/hid/.gitignore7
-rw-r--r--tools/testing/selftests/hid/Makefile242
-rw-r--r--tools/testing/selftests/hid/config32
-rw-r--r--tools/testing/selftests/hid/config.common241
-rw-r--r--tools/testing/selftests/hid/config.x86_644
-rwxr-xr-xtools/testing/selftests/hid/hid-apple.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-core.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-gamepad.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-ite.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-keyboard.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-mouse.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-multitouch.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-sony.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-tablet.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-usb_crash.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-wacom.sh7
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c911
-rw-r--r--tools/testing/selftests/hid/hid_common.h480
-rw-r--r--tools/testing/selftests/hid/hidraw.c694
-rw-r--r--tools/testing/selftests/hid/progs/hid.c600
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h125
-rwxr-xr-xtools/testing/selftests/hid/run-hid-tools-tests.sh30
-rw-r--r--tools/testing/selftests/hid/settings3
-rw-r--r--tools/testing/selftests/hid/tests/__init__.py2
-rw-r--r--tools/testing/selftests/hid/tests/base.py432
-rw-r--r--tools/testing/selftests/hid/tests/base_device.py448
-rw-r--r--tools/testing/selftests/hid/tests/base_gamepad.py238
-rw-r--r--tools/testing/selftests/hid/tests/conftest.py81
-rw-r--r--tools/testing/selftests/hid/tests/descriptors_wacom.py1360
-rw-r--r--tools/testing/selftests/hid/tests/test_apple_keyboard.py441
-rw-r--r--tools/testing/selftests/hid/tests/test_gamepad.py665
-rw-r--r--tools/testing/selftests/hid/tests/test_hid_core.py154
-rw-r--r--tools/testing/selftests/hid/tests/test_ite_keyboard.py167
-rw-r--r--tools/testing/selftests/hid/tests/test_keyboard.py485
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py1047
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py2143
-rw-r--r--tools/testing/selftests/hid/tests/test_sony.py343
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py1577
-rw-r--r--tools/testing/selftests/hid/tests/test_usb_crash.py103
-rw-r--r--tools/testing/selftests/hid/tests/test_wacom_generic.py1407
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh474
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile8
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c2
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh9
-rw-r--r--tools/testing/selftests/iommu/.gitignore3
-rw-r--r--tools/testing/selftests/iommu/Makefile10
-rw-r--r--tools/testing/selftests/iommu/config5
-rw-r--r--tools/testing/selftests/iommu/iommufd.c3544
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c748
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h1259
-rw-r--r--tools/testing/selftests/ipc/Makefile2
-rw-r--r--tools/testing/selftests/ipc/msgque.c68
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c13
-rwxr-xr-xtools/testing/selftests/ir/ir_loopback.sh2
-rw-r--r--tools/testing/selftests/kcmp/Makefile2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c10
-rw-r--r--tools/testing/selftests/kexec/.gitignore2
-rw-r--r--tools/testing/selftests/kexec/Makefile13
-rwxr-xr-xtools/testing/selftests/kexec/kexec_common_lib.sh67
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_file_load.sh13
-rw-r--r--tools/testing/selftests/kexec/test_kexec_jump.c72
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_jump.sh42
-rw-r--r--tools/testing/selftests/kho/arm64.conf9
-rw-r--r--tools/testing/selftests/kho/init.c95
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh186
-rw-r--r--tools/testing/selftests/kho/x86.conf7
-rw-r--r--tools/testing/selftests/kmod/config5
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh13
-rw-r--r--tools/testing/selftests/kselftest.h230
-rw-r--r--tools/testing/selftests/kselftest/ksft.py93
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh126
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh2
-rw-r--r--tools/testing/selftests/kselftest/runner.sh122
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh92
-rw-r--r--tools/testing/selftests/kselftest_harness.h633
-rw-r--r--tools/testing/selftests/kselftest_harness/.gitignore2
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile8
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c136
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.expected64
-rwxr-xr-xtools/testing/selftests/kselftest_harness/harness-selftest.sh13
-rw-r--r--tools/testing/selftests/kselftest_module.h22
-rw-r--r--tools/testing/selftests/kvm/.gitignore49
-rw-r--r--tools/testing/selftests/kvm/Makefile155
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm356
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c3
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c609
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c252
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c167
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c215
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c1059
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c607
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c415
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c (renamed from tools/testing/selftests/kvm/aarch64/get-reg-list.c)829
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c334
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c177
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c1135
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c291
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c813
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c281
-rw-r--r--tools/testing/selftests/kvm/arm64/vcpu_width_config.c121
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c1021
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c1084
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c413
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c643
-rw-r--r--tools/testing/selftests/kvm/coalesced_io_test.c236
-rw-r--r--tools/testing/selftests/kvm/config3
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c477
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c259
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c743
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c405
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c492
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c235
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c180
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h59
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h182
-rw-r--r--tools/testing/selftests/kvm/include/arm64/delay.h25
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h65
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3.h604
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h20
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h10
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h387
-rw-r--r--tools/testing/selftests/kvm/include/arm64/spinlock.h13
-rw-r--r--tools/testing/selftests/kvm/include/arm64/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h40
-rw-r--r--tools/testing/selftests/kvm/include/guest_modes.h4
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_test_harness.h36
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h1359
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h20
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h220
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h82
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h54
-rw-r--r--tools/testing/selftests/kvm/include/riscv/arch_timer.h71
-rw-r--r--tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h195
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h141
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h21
-rw-r--r--tools/testing/selftests/kvm/include/s390/debug_print.h69
-rw-r--r--tools/testing/selftests/kvm/include/s390/diag318_test_handler.h (renamed from tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h)0
-rw-r--r--tools/testing/selftests/kvm/include/s390/facility.h50
-rw-r--r--tools/testing/selftests/kvm/include/s390/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/s390/processor.h (renamed from tools/testing/selftests/kvm/include/s390x/processor.h)19
-rw-r--r--tools/testing/selftests/kvm/include/s390/sie.h240
-rw-r--r--tools/testing/selftests/kvm/include/s390/ucall.h19
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h56
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h191
-rw-r--r--tools/testing/selftests/kvm/include/timer_test.h45
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h116
-rw-r--r--tools/testing/selftests/kvm/include/userfaultfd_util.h52
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h118
-rw-r--r--tools/testing/selftests/kvm/include/x86/evmcs.h (renamed from tools/testing/selftests/kvm/include/evmcs.h)248
-rw-r--r--tools/testing/selftests/kvm/include/x86/hyperv.h361
-rw-r--r--tools/testing/selftests/kvm/include/x86/kvm_util_arch.h51
-rw-r--r--tools/testing/selftests/kvm/include/x86/mce.h23
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h123
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h1497
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h147
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm.h)41
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm_util.h62
-rw-r--r--tools/testing/selftests/kvm/include/x86/ucall.h13
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h (renamed from tools/testing/selftests/kvm/include/x86_64/vmx.h)101
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h478
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h49
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c143
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c273
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c10
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c477
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c339
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c115
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c163
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h33
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c449
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c265
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c732
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/spinlock.c27
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/ucall.c34
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c212
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c27
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c17
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c79
-rw-r--r--tools/testing/selftests/kvm/lib/guest_sprintf.c314
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c2341
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h117
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S65
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c389
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c391
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c145
-rw-r--r--tools/testing/selftests/kvm/lib/rbtree.c1
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S104
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c517
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c32
-rw-r--r--tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c (renamed from tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c)14
-rw-r--r--tools/testing/selftests/kvm/lib/s390/facility.c14
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c (renamed from tools/testing/selftests/kvm/lib/s390x/processor.c)85
-rw-r--r--tools/testing/selftests/kvm/lib/s390/ucall.c22
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c59
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c56
-rw-r--r--tools/testing/selftests/kvm/lib/string_override.c48
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c332
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c163
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c206
-rw-r--r--tools/testing/selftests/kvm/lib/x86/apic.c43
-rw-r--r--tools/testing/selftests/kvm/lib/x86/handlers.S (renamed from tools/testing/selftests/kvm/lib/x86_64/handlers.S)4
-rw-r--r--tools/testing/selftests/kvm/lib/x86/hyperv.c113
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c112
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c80
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c1325
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c199
-rw-r--r--tools/testing/selftests/kvm/lib/x86/svm.c (renamed from tools/testing/selftests/kvm/lib/x86_64/svm.c)40
-rw-r--r--tools/testing/selftests/kvm/lib/x86/ucall.c56
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c (renamed from tools/testing/selftests/kvm/lib/x86_64/vmx.c)276
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c1370
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c59
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c127
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1146
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c425
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c239
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c109
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c83
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c1302
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c731
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c325
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c695
-rw-r--r--tools/testing/selftests/kvm/s390/config2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c301
-rw-r--r--tools/testing/selftests/kvm/s390/debug_test.c160
-rw-r--r--tools/testing/selftests/kvm/s390/memop.c1187
-rw-r--r--tools/testing/selftests/kvm/s390/resets.c (renamed from tools/testing/selftests/kvm/s390x/resets.c)184
-rw-r--r--tools/testing/selftests/kvm/s390/shared_zeropage_test.c111
-rw-r--r--tools/testing/selftests/kvm/s390/sync_regs_test.c (renamed from tools/testing/selftests/kvm/s390x/sync_regs_test.c)155
-rw-r--r--tools/testing/selftests/kvm/s390/tprot.c244
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c798
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c166
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c392
-rw-r--r--tools/testing/selftests/kvm/steal_time.c318
-rw-r--r--tools/testing/selftests/kvm/system_counter_offset_test.c127
-rw-r--r--tools/testing/selftests/kvm/x86/amx_test.c315
-rw-r--r--tools/testing/selftests/kvm/x86/aperfmperf_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86/apic_bus_clock_test.c194
-rw-r--r--tools/testing/selftests/kvm/x86/cpuid_test.c225
-rw-r--r--tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c100
-rw-r--r--tools/testing/selftests/kvm/x86/debug_regs.c (renamed from tools/testing/selftests/kvm/x86_64/debug_regs.c)112
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c263
-rw-r--r--tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c39
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c209
-rw-r--r--tools/testing/selftests/kvm/x86/feature_msrs_test.c113
-rw-r--r--tools/testing/selftests/kvm/x86/fix_hypercall_test.c142
-rw-r--r--tools/testing/selftests/kvm/x86/flds_emulation.h52
-rw-r--r--tools/testing/selftests/kvm/x86/hwcr_msr_test.c45
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_clock.c263
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c170
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_evmcs.c307
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c98
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c695
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c310
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_svm_test.c199
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c680
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_clock_test.c156
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_pv_test.c218
-rw-r--r--tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c62
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c136
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c104
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nested_exceptions_test.c290
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c)102
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c244
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c266
-rwxr-xr-xtools/testing/selftests/kvm/x86/nx_huge_pages_test.sh69
-rw-r--r--tools/testing/selftests/kvm/x86/platform_info_test.c78
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c697
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c878
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c480
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c120
-rw-r--r--tools/testing/selftests/kvm/x86/recalc_apic_map_test.c74
-rw-r--r--tools/testing/selftests/kvm/x86/set_boot_cpu_id.c146
-rw-r--r--tools/testing/selftests/kvm/x86/set_sregs_test.c158
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c165
-rw-r--r--tools/testing/selftests/kvm/x86/sev_migrate_tests.c397
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c229
-rw-r--r--tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c105
-rw-r--r--tools/testing/selftests/kvm/x86/smm_test.c (renamed from tools/testing/selftests/kvm/x86_64/smm_test.c)117
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c (renamed from tools/testing/selftests/kvm/x86_64/state_test.c)149
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c115
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c59
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c210
-rw-r--r--tools/testing/selftests/kvm/x86/svm_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c)24
-rw-r--r--tools/testing/selftests/kvm/x86/sync_regs_test.c411
-rw-r--r--tools/testing/selftests/kvm/x86/triple_fault_event_test.c124
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c)94
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_scaling_sync.c110
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c295
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c103
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c)341
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apic_access_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c)47
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c)107
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c142
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c103
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_msrs_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c248
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c)45
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c)117
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c)146
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c262
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c139
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c1145
-rw-r--r--tools/testing/selftests/kvm/x86/xen_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c)56
-rw-r--r--tools/testing/selftests/kvm/x86/xss_msr_test.c54
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c165
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_cpuid_test.c175
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c176
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c234
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c126
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c107
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c142
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c243
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c86
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c167
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c76
-rw-r--r--tools/testing/selftests/landlock/.gitignore5
-rw-r--r--tools/testing/selftests/landlock/Makefile27
-rw-r--r--tools/testing/selftests/landlock/audit.h478
-rw-r--r--tools/testing/selftests/landlock/audit_test.c672
-rw-r--r--tools/testing/selftests/landlock/base_test.c529
-rw-r--r--tools/testing/selftests/landlock/common.h252
-rw-r--r--tools/testing/selftests/landlock/config19
-rw-r--r--tools/testing/selftests/landlock/config.um1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c7650
-rw-r--r--tools/testing/selftests/landlock/net_test.c2003
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c577
-rw-r--r--tools/testing/selftests/landlock/sandbox-and-launch.c82
-rw-r--r--tools/testing/selftests/landlock/scoped_abstract_unix_test.c1152
-rw-r--r--tools/testing/selftests/landlock/scoped_base_variants.h156
-rw-r--r--tools/testing/selftests/landlock/scoped_common.h28
-rw-r--r--tools/testing/selftests/landlock/scoped_multiple_domain_variants.h152
-rw-r--r--tools/testing/selftests/landlock/scoped_signal_test.c562
-rw-r--r--tools/testing/selftests/landlock/scoped_test.c33
-rw-r--r--tools/testing/selftests/landlock/true.c5
-rw-r--r--tools/testing/selftests/landlock/wait-pipe-sandbox.c131
-rw-r--r--tools/testing/selftests/landlock/wait-pipe.c42
-rw-r--r--tools/testing/selftests/landlock/wrappers.h47
-rw-r--r--tools/testing/selftests/lib.mk184
-rw-r--r--tools/testing/selftests/lib/Makefile3
-rw-r--r--tools/testing/selftests/lib/config2
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh4
-rwxr-xr-xtools/testing/selftests/lib/printf.sh4
-rwxr-xr-xtools/testing/selftests/lib/strscpy.sh3
-rw-r--r--tools/testing/selftests/livepatch/.gitignore1
-rw-r--r--tools/testing/selftests/livepatch/Makefile7
-rw-r--r--tools/testing/selftests/livepatch/README25
-rw-r--r--tools/testing/selftests/livepatch/config1
-rw-r--r--tools/testing/selftests/livepatch/functions.sh217
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh76
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh45
-rwxr-xr-xtools/testing/selftests/livepatch/test-kprobe.sh64
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh143
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh26
-rwxr-xr-xtools/testing/selftests/livepatch/test-syscall.sh56
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh205
-rw-r--r--tools/testing/selftests/livepatch/test_klp-call_getpid.c44
-rw-r--r--tools/testing/selftests/livepatch/test_modules/Makefile27
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c57
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c70
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c121
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c93
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c24
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c38
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c51
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c301
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state.c162
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state2.c191
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state3.c5
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c116
-rw-r--r--tools/testing/selftests/liveupdate/.gitignore9
-rw-r--r--tools/testing/selftests/liveupdate/Makefile34
-rw-r--r--tools/testing/selftests/liveupdate/config11
-rwxr-xr-xtools/testing/selftests/liveupdate/do_kexec.sh16
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c348
-rw-r--r--tools/testing/selftests/liveupdate/luo_kexec_simple.c89
-rw-r--r--tools/testing/selftests/liveupdate/luo_multi_session.c162
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c266
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h44
-rw-r--r--tools/testing/selftests/lkdtm/.gitignore1
-rw-r--r--tools/testing/selftests/lkdtm/Makefile1
-rw-r--r--tools/testing/selftests/lkdtm/config13
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh20
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh51
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt31
-rw-r--r--tools/testing/selftests/lsm/.gitignore1
-rw-r--r--tools/testing/selftests/lsm/Makefile17
-rw-r--r--tools/testing/selftests/lsm/common.c89
-rw-r--r--tools/testing/selftests/lsm/common.h33
-rw-r--r--tools/testing/selftests/lsm/config3
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c275
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c146
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c73
-rw-r--r--tools/testing/selftests/media_tests/Makefile2
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c2
-rw-r--r--tools/testing/selftests/media_tests/regression_test.txt8
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c111
-rw-r--r--tools/testing/selftests/membarrier/Makefile2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h35
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c4
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c8
-rw-r--r--tools/testing/selftests/memfd/Makefile4
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c3
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c606
-rw-r--r--tools/testing/selftests/memory-hotplug/config1
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh35
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c48
-rw-r--r--tools/testing/selftests/mm/.gitignore62
-rw-r--r--tools/testing/selftests/mm/Makefile257
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/charge_reserved_hugetlb.sh (renamed from tools/testing/selftests/vm/charge_reserved_hugetlb.sh)47
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/check_config.sh (renamed from tools/testing/selftests/vm/check_config.sh)14
-rw-r--r--tools/testing/selftests/mm/compaction_test.c276
-rw-r--r--tools/testing/selftests/mm/config13
-rw-r--r--tools/testing/selftests/mm/cow.c1897
-rw-r--r--tools/testing/selftests/mm/droppable.c53
-rw-r--r--tools/testing/selftests/mm/guard-regions.c2326
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c524
-rw-r--r--tools/testing/selftests/mm/gup_test.c260
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c2855
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c (renamed from tools/testing/selftests/vm/hugepage-mmap.c)49
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c178
-rw-r--r--tools/testing/selftests/mm/hugepage-shm.c (renamed from tools/testing/selftests/vm/hugepage-shm.c)22
-rw-r--r--tools/testing/selftests/mm/hugepage-vmemmap.c134
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c367
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c322
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c228
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c125
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c109
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c126
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/hugetlb_reparenting_test.sh (renamed from tools/testing/selftests/vm/hugetlb_reparenting_test.sh)131
-rw-r--r--tools/testing/selftests/mm/khugepaged.c1290
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c759
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c926
-rw-r--r--tools/testing/selftests/mm/madv_populate.c294
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c201
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c88
-rw-r--r--tools/testing/selftests/mm/map_populate.c (renamed from tools/testing/selftests/vm/map_populate.c)48
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c303
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c346
-rw-r--r--tools/testing/selftests/mm/merge.c1174
-rw-r--r--tools/testing/selftests/mm/migration.c313
-rw-r--r--tools/testing/selftests/mm/mkdirty.c380
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c (renamed from tools/testing/selftests/vm/mlock-random-test.c)137
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c (renamed from tools/testing/selftests/vm/mlock2-tests.c)296
-rw-r--r--tools/testing/selftests/mm/mlock2.h (renamed from tools/testing/selftests/vm/mlock2.h)27
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c184
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c (renamed from tools/testing/selftests/vm/mremap_dontunmap.c)91
-rw-r--r--tools/testing/selftests/mm/mremap_test.c1485
-rw-r--r--tools/testing/selftests/mm/mseal_helpers.h41
-rw-r--r--tools/testing/selftests/mm/mseal_test.c1989
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c42
-rw-r--r--tools/testing/selftests/mm/page_frag/Makefile18
-rw-r--r--tools/testing/selftests/mm/page_frag/page_frag_test.c198
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c1738
-rw-r--r--tools/testing/selftests/mm/pfnmap.c269
-rw-r--r--tools/testing/selftests/mm/pkey-arm64.h140
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h (renamed from tools/testing/selftests/vm/pkey-helpers.h)80
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h (renamed from tools/testing/selftests/vm/pkey-powerpc.h)24
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h (renamed from tools/testing/selftests/vm/pkey-x86.h)66
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c546
-rw-r--r--tools/testing/selftests/mm/pkey_util.c41
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c291
-rw-r--r--tools/testing/selftests/mm/process_madv.c344
-rw-r--r--tools/testing/selftests/mm/protection_keys.c (renamed from tools/testing/selftests/vm/protection_keys.c)556
-rw-r--r--tools/testing/selftests/mm/rmap.c433
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh553
-rw-r--r--tools/testing/selftests/mm/settings1
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c348
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c837
-rwxr-xr-xtools/testing/selftests/mm/test_hmm.sh (renamed from tools/testing/selftests/vm/test_hmm.sh)24
-rwxr-xr-xtools/testing/selftests/mm/test_page_frag.sh175
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh (renamed from tools/testing/selftests/vm/test_vmalloc.sh)27
-rw-r--r--tools/testing/selftests/mm/thp_settings.c401
-rw-r--r--tools/testing/selftests/mm/thp_settings.h90
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c (renamed from tools/testing/selftests/vm/thuge-gen.c)215
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c138
-rw-r--r--tools/testing/selftests/mm/uffd-common.c745
-rw-r--r--tools/testing/selftests/mm/uffd-common.h139
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c521
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c1813
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c380
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c336
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh117
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c260
-rw-r--r--tools/testing/selftests/mm/vm_util.c725
-rw-r--r--tools/testing/selftests/mm/vm_util.h158
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/write_hugetlb_memory.sh (renamed from tools/testing/selftests/vm/write_hugetlb_memory.sh)4
-rw-r--r--tools/testing/selftests/mm/write_to_hugetlbfs.c (renamed from tools/testing/selftests/vm/write_to_hugetlbfs.c)23
-rw-r--r--tools/testing/selftests/module/Makefile (renamed from tools/testing/selftests/user/Makefile)7
-rw-r--r--tools/testing/selftests/module/config3
-rwxr-xr-xtools/testing/selftests/module/find_symbol.sh81
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c4
-rw-r--r--tools/testing/selftests/mount_setattr/.gitignore1
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile9
-rw-r--r--tools/testing/selftests/mount_setattr/config1
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c2140
-rw-r--r--tools/testing/selftests/move_mount_set_group/.gitignore1
-rw-r--r--tools/testing/selftests/move_mount_set_group/Makefile7
-rw-r--r--tools/testing/selftests/move_mount_set_group/config1
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c375
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c37
-rw-r--r--tools/testing/selftests/mqueue/setting1
-rw-r--r--tools/testing/selftests/mseal_system_mappings/.gitignore2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/Makefile6
-rw-r--r--tools/testing/selftests/mseal_system_mappings/config1
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c119
-rw-r--r--tools/testing/selftests/namespaces/.gitignore12
-rw-r--r--tools/testing/selftests/namespaces/Makefile29
-rw-r--r--tools/testing/selftests/namespaces/config7
-rw-r--r--tools/testing/selftests/namespaces/cred_change_test.c814
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c1429
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c61
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c530
-rw-r--r--tools/testing/selftests/namespaces/listns_pagination_bug.c138
-rw-r--r--tools/testing/selftests/namespaces/listns_permissions_test.c759
-rw-r--r--tools/testing/selftests/namespaces/listns_test.c679
-rw-r--r--tools/testing/selftests/namespaces/ns_active_ref_test.c2672
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c981
-rw-r--r--tools/testing/selftests/namespaces/regression_pidfd_setns_test.c113
-rw-r--r--tools/testing/selftests/namespaces/siocgskns_test.c1824
-rw-r--r--tools/testing/selftests/namespaces/stress_test.c626
-rw-r--r--tools/testing/selftests/namespaces/wrappers.h35
-rw-r--r--tools/testing/selftests/nci/.gitignore1
-rw-r--r--tools/testing/selftests/nci/nci_dev.c431
-rw-r--r--tools/testing/selftests/net/.gitignore61
-rw-r--r--tools/testing/selftests/net/Makefile247
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config3
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c177
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c891
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c123
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c556
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c381
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c148
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/altnames.sh2
-rwxr-xr-xtools/testing/selftests/net/amt.sh298
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh213
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_untracked_subnets.sh283
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh51
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rwxr-xr-xtools/testing/selftests/net/big_tcp.sh182
-rw-r--r--tools/testing/selftests/net/bind_bhash.c144
-rwxr-xr-xtools/testing/selftests/net/bind_bhash.sh68
-rw-r--r--tools/testing/selftests/net/bind_timewait.c92
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c809
-rw-r--r--tools/testing/selftests/net/bpf.mk53
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py (renamed from tools/testing/selftests/bpf/test_offload.py)223
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh187
-rw-r--r--tools/testing/selftests/net/busy_poller.c368
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rwxr-xr-xtools/testing/selftests/net/cmsg_ip.sh187
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c583
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_mark.sh78
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_priority.sh151
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh102
-rw-r--r--tools/testing/selftests/net/config147
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh23
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c320
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh858
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh813
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh96
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh9
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh98
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_nongw.sh115
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh1070
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh644
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh1221
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile96
-rw-r--r--tools/testing/selftests/net/forwarding/README50
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh301
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh99
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_locked_port.sh365
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh1467
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_host.sh103
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh1347
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh118
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh97
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh153
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh546
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh30
-rw-r--r--tools/testing/selftests/net/forwarding/config53
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh372
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh367
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample41
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh464
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh.sh41
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh323
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh174
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh466
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_key.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_key.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/ip6gre_lib.sh518
-rw-r--r--tools/testing/selftests/net/forwarding/ipip_lib.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh1235
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh215
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh597
-rwxr-xr-xtools/testing/selftests/net/forwarding/min_max_mtu.sh283
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh24
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh28
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh28
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh42
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh76
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh66
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh39
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh35
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh82
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh101
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh264
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_ip.sh201
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh347
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh47
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d.sh185
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh409
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_lag.sh324
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh155
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh169
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh171
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh123
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh490
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_seed.sh333
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh127
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_nh.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_vid_1.sh27
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh84
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh41
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh118
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh95
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_root.sh7
-rw-r--r--tools/testing/selftests/net/forwarding/settings1
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh143
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh12
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh60
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_cfm.sh206
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh357
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh274
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh130
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh162
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh275
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh504
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh804
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh837
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh766
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh347
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh563
-rwxr-xr-xtools/testing/selftests/net/fq_band_pktlimit.sh59
-rwxr-xr-xtools/testing/selftests/net/gre_gso.sh235
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh184
-rw-r--r--tools/testing/selftests/net/hsr/Makefile12
-rw-r--r--tools/testing/selftests/net/hsr/config6
-rw-r--r--tools/testing/selftests/net/hsr/hsr_common.sh84
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh289
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_redbox.sh136
-rw-r--r--tools/testing/selftests/net/hsr/settings1
-rw-r--r--tools/testing/selftests/net/hwtstamp_config.c6
-rwxr-xr-xtools/testing/selftests/net/icmp.sh72
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh195
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c320
-rwxr-xr-xtools/testing/selftests/net/io_uring_zerocopy_tx.sh126
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh1683
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c1101
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c463
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh7
-rw-r--r--tools/testing/selftests/net/ipsec.c281
-rw-r--r--tools/testing/selftests/net/ipv6_flowlabel.c75
-rwxr-xr-xtools/testing/selftests/net/ipv6_flowlabel.sh16
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh261
-rwxr-xr-xtools/testing/selftests/net/l2_tos_ttl_inherit.sh446
-rwxr-xr-xtools/testing/selftests/net/l2tp.sh130
-rw-r--r--tools/testing/selftests/net/lib.sh671
-rw-r--r--tools/testing/selftests/net/lib/.gitignore3
-rw-r--r--tools/testing/selftests/net/lib/Makefile24
-rw-r--r--tools/testing/selftests/net/lib/csum.c1010
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py33
-rw-r--r--tools/testing/selftests/net/lib/py/consts.py9
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py370
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py49
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py135
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py278
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py68
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh131
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c19
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c131
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c680
-rwxr-xr-xtools/testing/selftests/net/link_netns.py141
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore3
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile32
-rw-r--r--tools/testing/selftests/net/mptcp/config39
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh418
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c904
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh684
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c435
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c613
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4829
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh760
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c884
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh371
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh349
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c949
-rw-r--r--tools/testing/selftests/net/mptcp/settings2
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh197
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh945
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c36
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh93
-rw-r--r--tools/testing/selftests/net/nat6to4.bpf.c285
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rwxr-xr-xtools/testing/selftests/net/ndisc_unsolicited_na_test.sh249
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh60
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore8
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile73
-rw-r--r--tools/testing/selftests/net/netfilter/audit_logread.c165
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh175
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh85
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh120
-rw-r--r--tools/testing/selftests/net/netfilter/config101
-rw-r--r--tools/testing/selftests/net/netfilter/connect_close.c136
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh174
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c476
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_dump_flush.sh3
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_icmp_related.sh278
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh (renamed from tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh)121
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh515
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c125
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh51
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh87
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh216
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh205
-rw-r--r--tools/testing/selftests/net/netfilter/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh71
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh121
-rw-r--r--tools/testing/selftests/net/netfilter/nf_queue.c (renamed from tools/testing/selftests/netfilter/nf-queue.c)0
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_audit.sh269
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh (renamed from tools/testing/selftests/netfilter/nft_concat_range.sh)713
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range_perf.sh9
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_conntrack_helper.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh850
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh811
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh157
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_meta.sh (renamed from tools/testing/selftests/netfilter/nft_meta.sh)4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh1227
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh265
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh672
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_synproxy.sh96
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh358
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_tproxy_udp.sh262
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_zones_many.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/packetdrill/common.sh33
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt118
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt62
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt59
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt44
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt51
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt34
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh181
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c100
-rw-r--r--tools/testing/selftests/net/netfilter/settings1
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rwxr-xr-xtools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh121
-rwxr-xr-xtools/testing/selftests/net/netfilter/xt_string.sh133
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c263
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh102
-rwxr-xr-xtools/testing/selftests/net/netns-sysctl.sh40
-rw-r--r--tools/testing/selftests/net/nettest.c238
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py254
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile13
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh948
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2775
-rw-r--r--tools/testing/selftests/net/openvswitch/settings1
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile34
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2387
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile12
-rw-r--r--tools/testing/selftests/net/packetdrill/config11
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh64
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh62
-rwxr-xr-xtools/testing/selftests/net/packetdrill/set_sysctls.py38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt40
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt62
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt92
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt91
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt145
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt57
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt69
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt58
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt120
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt59
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh684
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c690
-rw-r--r--tools/testing/selftests/net/psock_fanout.c93
-rw-r--r--tools/testing/selftests/net/psock_lib.h8
-rw-r--r--tools/testing/selftests/net/psock_snd.c2
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh3
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c8
-rw-r--r--tools/testing/selftests/net/rds/.gitignore1
-rw-r--r--tools/testing/selftests/net/rds/Makefile18
-rw-r--r--tools/testing/selftests/net/rds/README.txt41
-rwxr-xr-xtools/testing/selftests/net/rds/config.sh53
-rwxr-xr-xtools/testing/selftests/net/rds/run.sh224
-rwxr-xr-xtools/testing/selftests/net/rds/test.py265
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c2
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c36
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c6
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c8
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/route_localnet.sh6
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh79
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1378
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c20
-rw-r--r--tools/testing/selftests/net/sample_map_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_map_ret0.c)26
-rw-r--r--tools/testing/selftests/net/sample_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_ret0.c)3
-rw-r--r--tools/testing/selftests/net/sctp_hello.c124
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh189
-rw-r--r--tools/testing/selftests/net/settings1
-rw-r--r--tools/testing/selftests/net/sk_bind_sendto_listen.c80
-rw-r--r--tools/testing/selftests/net/sk_connect_zero_addr.c62
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c202
-rw-r--r--tools/testing/selftests/net/skf_net_off.c244
-rwxr-xr-xtools/testing/selftests/net/skf_net_off.sh30
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c274
-rw-r--r--tools/testing/selftests/net/so_netns_cookie.c61
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rw-r--r--tools/testing/selftests/net/so_txtime.c256
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh120
-rw-r--r--tools/testing/selftests/net/socket.c14
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh568
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh59
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh51
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh335
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh340
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh869
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh1104
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh1220
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh837
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh780
-rw-r--r--tools/testing/selftests/net/stress_reuseport_listen.c105
-rwxr-xr-xtools/testing/selftests/net/stress_reuseport_listen.sh25
-rw-r--r--tools/testing/selftests/net/tap.c434
-rw-r--r--tools/testing/selftests/net/tcp_ao/.gitignore2
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile57
-rw-r--r--tools/testing/selftests/net/tcp_ao/bench-lookups.c360
-rw-r--r--tools/testing/selftests/net/tcp_ao/config11
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c291
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c90
l---------tools/testing/selftests/net/tcp_ao/icmps-accept.c1
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c448
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c1198
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h832
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c556
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace.c543
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/kconfig.c157
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/netlink.c413
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/proc.c273
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/repair.c254
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c368
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c730
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/utils.c56
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c251
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c459
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c191
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c255
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c1011
-rw-r--r--tools/testing/selftests/net/tcp_ao/settings1
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c772
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c6
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c123
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh798
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh972
-rw-r--r--tools/testing/selftests/net/test_ingress_egress_chaining.sh79
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh2562
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nolocalbypass.sh238
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_under_vrf.sh72
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh606
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rw-r--r--tools/testing/selftests/net/timestamping.c55
-rw-r--r--tools/testing/selftests/net/tls.c2279
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh645
-rw-r--r--tools/testing/selftests/net/tun.c162
-rw-r--r--tools/testing/selftests/net/txtimestamp.c61
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh24
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh66
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh13
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh104
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh265
-rw-r--r--tools/testing/selftests/net/udpgso.c214
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh92
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh29
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c23
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c46
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh126
-rwxr-xr-xtools/testing/selftests/net/veth.sh390
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh258
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh103
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh109
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh299
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh98
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh142
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy_add_speed.sh83
-rw-r--r--tools/testing/selftests/net/ynl.mk40
-rw-r--r--tools/testing/selftests/netfilter/Makefile13
-rwxr-xr-xtools/testing/selftests/netfilter/bridge_brouter.sh146
-rw-r--r--tools/testing/selftests/netfilter/config8
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh283
-rwxr-xr-xtools/testing/selftests/netfilter/ipvs.sh228
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh181
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh421
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh869
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh376
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh78
-rw-r--r--tools/testing/selftests/nolibc/.gitignore7
-rw-r--r--tools/testing/selftests/nolibc/Makefile26
-rw-r--r--tools/testing/selftests/nolibc/Makefile.include10
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc382
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c24
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.h9
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c2027
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh212
-rw-r--r--tools/testing/selftests/openat2/Makefile10
-rw-r--r--tools/testing/selftests/openat2/helpers.h14
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c28
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c2
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c2
-rw-r--r--tools/testing/selftests/pci_endpoint/.gitignore2
-rw-r--r--tools/testing/selftests/pci_endpoint/Makefile7
-rw-r--r--tools/testing/selftests/pci_endpoint/config4
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c264
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile3
-rwxr-xr-xtools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh122
-rwxr-xr-xtools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh67
-rw-r--r--tools/testing/selftests/perf_events/.gitignore5
-rw-r--r--tools/testing/selftests/perf_events/Makefile6
-rw-r--r--tools/testing/selftests/perf_events/config1
-rw-r--r--tools/testing/selftests/perf_events/mmap.c236
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c260
-rw-r--r--tools/testing/selftests/perf_events/settings1
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c240
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c144
-rw-r--r--tools/testing/selftests/pid_namespace/.gitignore1
-rw-r--r--tools/testing/selftests/pid_namespace/Makefile8
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c359
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c2
-rw-r--r--tools/testing/selftests/pidfd/.gitignore6
-rw-r--r--tools/testing/selftests/pidfd/Makefile8
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h217
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c116
-rw-r--r--tools/testing/selftests/pidfd/pidfd_exec_helper.c12
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c30
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c563
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c33
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c766
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c62
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c4
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c69
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c264
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c107
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c67
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c132
-rw-r--r--tools/testing/selftests/power_supply/Makefile4
-rw-r--r--tools/testing/selftests/power_supply/helpers.sh178
-rwxr-xr-xtools/testing/selftests/power_supply/test_power_supply_properties.sh114
-rw-r--r--tools/testing/selftests/powerpc/Makefile31
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c11
-rw-r--r--tools/testing/selftests/powerpc/alignment/settings1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c16
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/gettimeofday.c6
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c3
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/settings1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile30
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h14
-rw-r--r--tools/testing/selftests/powerpc/copyloops/linux/export.h (renamed from tools/testing/selftests/powerpc/copyloops/asm/export.h)0
l---------tools/testing/selftests/powerpc/copyloops/mem_64.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S8
-rw-r--r--tools/testing/selftests/powerpc/copyloops/memmove_validate.c58
-rw-r--r--tools/testing/selftests/powerpc/copyloops/settings1
-rw-r--r--tools/testing/selftests/powerpc/dexcr/.gitignore4
-rw-r--r--tools/testing/selftests/powerpc/dexcr/Makefile12
-rw-r--r--tools/testing/selftests/powerpc/dexcr/chdexcr.c112
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.c172
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.h106
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr_test.c215
-rw-r--r--tools/testing/selftests/powerpc/dexcr/hashchk_test.c233
-rw-r--r--tools/testing/selftests/powerpc/dexcr/lsdexcr.c172
-rw-r--r--tools/testing/selftests/powerpc/dexcr/settings1
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr.h55
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c207
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c169
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c4
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c39
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c4
-rw-r--r--tools/testing/selftests/powerpc/dscr/settings2
-rw-r--r--tools/testing/selftests/powerpc/eeh/Makefile1
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh41
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/powerpc/eeh/eeh-functions.sh168
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh45
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh35
-rw-r--r--tools/testing/selftests/powerpc/flags.mk9
-rw-r--r--tools/testing/selftests/powerpc/harness.c4
-rw-r--r--tools/testing/selftests/powerpc/include/basic_asm.h63
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h2
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h15
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h81
-rw-r--r--tools/testing/selftests/powerpc/include/subunit.h16
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h73
-rw-r--r--tools/testing/selftests/powerpc/lib/reg.S107
-rw-r--r--tools/testing/selftests/powerpc/lib/settings1
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/math/fpu.h25
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S48
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c30
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c16
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c8
-rw-r--r--tools/testing/selftests/powerpc/math/mma.S36
-rw-r--r--tools/testing/selftests/powerpc/math/mma.c48
-rw-r--r--tools/testing/selftests/powerpc/math/settings1
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c10
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c1
-rw-r--r--tools/testing/selftests/powerpc/mce/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/mce/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/mce/inject-ra-err.c65
-rw-r--r--tools/testing/selftests/powerpc/mce/settings1
l---------tools/testing/selftests/powerpc/mce/vas-api.h1
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore16
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/mm/exec_prot.c231
-rw-r--r--tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c158
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/settings1
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c2
-rwxr-xr-xtools/testing/selftests/powerpc/mm/stress_code_patching.sh49
-rw-r--r--tools/testing/selftests/powerpc/mm/subpage_prot.c4
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c11
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c71
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/settings1
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/.gitignore (renamed from tools/testing/selftests/netfilter/.gitignore)2
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/attr_test.c113
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/settings1
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/Makefile13
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c196
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/settings1
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/Makefile13
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c352
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/settings1
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile46
-rw-r--r--tools/testing/selftests/powerpc/pmu/branch_loops.S28
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile25
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c6
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.c19
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.h6
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore20
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile16
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c132
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c110
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c116
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c131
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c60
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c54
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c70
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c56
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c56
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c96
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c67
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c78
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c44
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c53
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore21
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile17
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c117
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c35
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c57
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c553
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h236
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c66
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c77
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c85
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c67
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c65
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c69
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c66
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c69
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c80
-rw-r--r--tools/testing/selftests/powerpc/pmu/settings1
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile5
l---------tools/testing/selftests/powerpc/primitives/asm/extable.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/linux/bitops.h (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h)0
l---------tools/testing/selftests/powerpc/primitives/linux/wordpart.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/settings1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile42
-rw-r--r--tools/testing/selftests/powerpc/ptrace/child.h4
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c69
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c671
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S52
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c125
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h14
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c111
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S33
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c445
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c34
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c23
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c25
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace.h84
-rw-r--r--tools/testing/selftests/powerpc/ptrace/settings1
-rwxr-xr-xtools/testing/selftests/powerpc/scripts/hmi.sh2
-rw-r--r--tools/testing/selftests/powerpc/scripts/settings1
-rw-r--r--tools/testing/selftests/powerpc/security/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/security/entry_flush.c14
-rw-r--r--tools/testing/selftests/powerpc/security/flush_utils.c16
-rw-r--r--tools/testing/selftests/powerpc/security/flush_utils.h7
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh78
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c14
-rw-r--r--tools/testing/selftests/powerpc/security/settings1
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c34
-rw-r--r--tools/testing/selftests/powerpc/security/uaccess_flush.c158
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/signal/sigfuz.c2
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c1
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_kernel.c132
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c43
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/linux/export.h (renamed from tools/testing/selftests/powerpc/stringloops/asm/export.h)0
-rw-r--r--tools/testing/selftests/powerpc/stringloops/settings1
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/settings1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/syscalls/rtas_filter.c81
-rw-r--r--tools/testing/selftests/powerpc/syscalls/settings1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c4
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S37
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c38
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c5
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h36
-rw-r--r--tools/testing/selftests/powerpc/utils.c459
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile5
l---------tools/testing/selftests/powerpc/vphn/asm/lppaca.h1
l---------tools/testing/selftests/powerpc/vphn/asm/vphn.h1
-rw-r--r--tools/testing/selftests/powerpc/vphn/settings1
-rw-r--r--tools/testing/selftests/powerpc/vphn/test-vphn.c2
-rw-r--r--tools/testing/selftests/prctl/.gitignore2
-rw-r--r--tools/testing/selftests/prctl/Makefile4
-rw-r--r--tools/testing/selftests/prctl/config1
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c2
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c104
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c94
-rw-r--r--tools/testing/selftests/proc/.gitignore8
-rw-r--r--tools/testing/selftests/proc/Makefile11
-rw-r--r--tools/testing/selftests/proc/proc-2-is-kthread.c53
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c541
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c806
-rw-r--r--tools/testing/selftests/proc/proc-net-dev-lseek.c68
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c154
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c211
-rw-r--r--tools/testing/selftests/proc/proc-self-isnt-kthread.c37
-rw-r--r--tools/testing/selftests/proc/proc-subset-pid.c121
-rw-r--r--tools/testing/selftests/proc/proc-tid0.c81
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c25
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c30
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h28
-rw-r--r--tools/testing/selftests/proc/read.c4
-rw-r--r--tools/testing/selftests/ptp/Makefile9
-rw-r--r--tools/testing/selftests/ptp/ptpchmaskfmt.sh14
-rw-r--r--tools/testing/selftests/ptp/testptp.c255
-rw-r--r--tools/testing/selftests/ptrace/.gitignore2
-rw-r--r--tools/testing/selftests/ptrace/Makefile4
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c72
-rw-r--r--tools/testing/selftests/ptrace/get_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c16
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c519
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config2csv.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh65
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh1
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/rcutorture/bin/functions.sh41
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh49
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstart.sh37
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstop.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh245
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh105
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-check-branches.sh15
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh88
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh8
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh297
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh116
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh93
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh184
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh235
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-transform.sh102
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh258
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mkinitrd.sh20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mktestid.sh29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh118
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh519
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK086
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK096
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i6862
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_642
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE014
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t)1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u)1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS035
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE015
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE023
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE048
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE051
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE061
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE075
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE081
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE097
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE107
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh21
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon9
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TINY2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TRACE013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE543
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFcommon4
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT6
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/PREEMPT3
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/TINY20
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT4
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh4
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt5
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h152
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk376
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h41
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h28
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c32
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h34
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h221
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h58
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h93
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c79
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h59
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c51
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h103
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c73
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh103
-rw-r--r--tools/testing/selftests/resctrl/.gitignore2
-rw-r--r--tools/testing/selftests/resctrl/Makefile20
-rw-r--r--tools/testing/selftests/resctrl/README43
-rw-r--r--tools/testing/selftests/resctrl/cache.c265
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c466
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c190
-rw-r--r--tools/testing/selftests/resctrl/config2
-rw-r--r--tools/testing/selftests/resctrl/cqm_test.c176
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c189
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c176
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c155
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h255
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c388
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c765
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c783
-rw-r--r--tools/testing/selftests/resctrl/settings3
-rw-r--r--tools/testing/selftests/ring-buffer/.gitignore1
-rw-r--r--tools/testing/selftests/ring-buffer/Makefile7
-rw-r--r--tools/testing/selftests/ring-buffer/config2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c324
-rw-r--r--tools/testing/selftests/riscv/Makefile58
-rw-r--r--tools/testing/selftests/riscv/README24
-rw-r--r--tools/testing/selftests/riscv/abi/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/abi/Makefile10
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c348
-rw-r--r--tools/testing/selftests/riscv/hwprobe/.gitignore3
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile18
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c377
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c66
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.h25
-rw-r--r--tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S12
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c154
-rw-r--r--tools/testing/selftests/riscv/mm/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile15
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c12
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c12
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h23
-rwxr-xr-xtools/testing/selftests/riscv/mm/run_mmap.sh12
-rw-r--r--tools/testing/selftests/riscv/sigreturn/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/sigreturn/Makefile12
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c82
-rw-r--r--tools/testing/selftests/riscv/vector/.gitignore4
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile31
-rw-r--r--tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c90
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.c68
-rw-r--r--tools/testing/selftests/riscv/vector/v_helpers.h8
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c22
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c118
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c246
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/rlimits/.gitignore2
-rw-r--r--tools/testing/selftests/rlimits/Makefile6
-rw-r--r--tools/testing/selftests/rlimits/config1
-rw-r--r--tools/testing/selftests/rlimits/rlimits-per-userns.c161
-rw-r--r--tools/testing/selftests/rseq/.gitignore5
-rw-r--r--tools/testing/selftests/rseq/Makefile35
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c63
-rw-r--r--tools/testing/selftests/rseq/basic_test.c4
-rw-r--r--tools/testing/selftests/rseq/compiler.h62
-rw-r--r--tools/testing/selftests/rseq/param_test.c254
-rw-r--r--tools/testing/selftests/rseq/rseq-abi.h173
-rw-r--r--tools/testing/selftests/rseq/rseq-arm-bits.h505
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h703
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64-bits.h392
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64.h545
-rw-r--r--tools/testing/selftests/rseq/rseq-bits-reset.h11
-rw-r--r--tools/testing/selftests/rseq/rseq-bits-template.h41
-rw-r--r--tools/testing/selftests/rseq/rseq-generic-thread-pointer.h25
-rw-r--r--tools/testing/selftests/rseq/rseq-mips-bits.h462
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h687
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k-bits.h412
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h13
-rw-r--r--tools/testing/selftests/rseq/rseq-or1k.h181
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc-bits.h454
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h30
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h637
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv-bits.h410
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h197
-rw-r--r--tools/testing/selftests/rseq/rseq-s390-bits.h474
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h509
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h65
-rw-r--r--tools/testing/selftests/rseq/rseq-thread-pointer.h21
-rw-r--r--tools/testing/selftests/rseq/rseq-x86-bits.h993
-rw-r--r--tools/testing/selftests/rseq/rseq-x86-thread-pointer.h40
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h1133
-rw-r--r--tools/testing/selftests/rseq/rseq.c302
-rw-r--r--tools/testing/selftests/rseq/rseq.h247
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh5
-rwxr-xr-xtools/testing/selftests/rseq/run_syscall_errors_test.sh5
-rw-r--r--tools/testing/selftests/rseq/syscall_errors_test.c124
-rw-r--r--tools/testing/selftests/rtc/.gitignore1
-rw-r--r--tools/testing/selftests/rtc/Makefile4
-rw-r--r--tools/testing/selftests/rtc/rtctest.c192
-rw-r--r--tools/testing/selftests/rtc/setdate.c77
-rw-r--r--tools/testing/selftests/rtc/settings2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh40
-rw-r--r--tools/testing/selftests/rust/Makefile4
-rw-r--r--tools/testing/selftests/rust/config6
-rwxr-xr-xtools/testing/selftests/rust/test_probe_samples.sh41
-rw-r--r--tools/testing/selftests/safesetid/Makefile2
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c295
-rw-r--r--tools/testing/selftests/sched/.gitignore1
-rw-r--r--tools/testing/selftests/sched/Makefile14
-rw-r--r--tools/testing/selftests/sched/config1
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c359
-rw-r--r--tools/testing/selftests/sched_ext/.gitignore6
-rw-r--r--tools/testing/selftests/sched_ext/Makefile214
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.bpf.c144
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.c84
-rw-r--r--tools/testing/selftests/sched_ext/config8
-rw-r--r--tools/testing/selftests/sched_ext/create_dsq.bpf.c58
-rw-r--r--tools/testing/selftests/sched_ext/create_dsq.c57
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c42
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c60
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c39
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c59
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.bpf.c68
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.c60
-rw-r--r--tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c29
-rw-r--r--tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c64
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c74
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.c88
-rw-r--r--tools/testing/selftests/sched_ext/exit.bpf.c86
-rw-r--r--tools/testing/selftests/sched_ext/exit.c64
-rw-r--r--tools/testing/selftests/sched_ext/exit_test.h20
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.bpf.c61
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c169
-rw-r--r--tools/testing/selftests/sched_ext/hotplug_test.h15
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.bpf.c53
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.c157
-rw-r--r--tools/testing/selftests/sched_ext/maximal.bpf.c171
-rw-r--r--tools/testing/selftests/sched_ext/maximal.c54
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null.bpf.c36
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null.c49
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c25
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c28
-rw-r--r--tools/testing/selftests/sched_ext/minimal.bpf.c21
-rw-r--r--tools/testing/selftests/sched_ext/minimal.c58
-rw-r--r--tools/testing/selftests/sched_ext/numa.bpf.c100
-rw-r--r--tools/testing/selftests/sched_ext/numa.c59
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c251
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.c224
-rw-r--r--tools/testing/selftests/sched_ext/prog_run.bpf.c33
-rw-r--r--tools/testing/selftests/sched_ext/prog_run.c78
-rw-r--r--tools/testing/selftests/sched_ext/reload_loop.c74
-rw-r--r--tools/testing/selftests/sched_ext/runner.c212
-rw-r--r--tools/testing/selftests/sched_ext/scx_test.h131
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c40
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.c75
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c89
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c75
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c41
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch.c73
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c37
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c59
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c38
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c59
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c92
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_vtime.c62
-rw-r--r--tools/testing/selftests/sched_ext/test_example.c49
-rw-r--r--tools/testing/selftests/sched_ext/util.c71
-rw-r--r--tools/testing/selftests/sched_ext/util.h13
-rw-r--r--tools/testing/selftests/seccomp/Makefile3
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c154
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c1257
-rw-r--r--tools/testing/selftests/seccomp/settings2
-rw-r--r--tools/testing/selftests/sgx/Makefile17
-rw-r--r--tools/testing/selftests/sgx/call.S6
-rw-r--r--tools/testing/selftests/sgx/defines.h62
-rw-r--r--tools/testing/selftests/sgx/load.c145
-rw-r--r--tools/testing/selftests/sgx/main.c1941
-rw-r--r--tools/testing/selftests/sgx/main.h11
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c64
-rw-r--r--tools/testing/selftests/sgx/test_encl.c146
-rw-r--r--tools/testing/selftests/sgx/test_encl.lds13
-rw-r--r--tools/testing/selftests/sgx/test_encl_bootstrap.S53
-rw-r--r--tools/testing/selftests/signal/.gitignore3
-rw-r--r--tools/testing/selftests/signal/Makefile (renamed from tools/testing/selftests/sigaltstack/Makefile)3
-rw-r--r--tools/testing/selftests/signal/current_stack_pointer.h23
-rw-r--r--tools/testing/selftests/signal/mangle_uc_sigmask.c184
-rw-r--r--tools/testing/selftests/signal/sas.c (renamed from tools/testing/selftests/sigaltstack/sas.c)29
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c6
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh119
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh8
-rw-r--r--tools/testing/selftests/sync/Makefile2
-rw-r--r--tools/testing/selftests/sync/config2
-rw-r--r--tools/testing/selftests/sync/sync_test.c5
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c156
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh308
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore1
-rw-r--r--tools/testing/selftests/tc-testing/Makefile30
-rw-r--r--tools/testing/selftests/tc-testing/README67
-rw-r--r--tools/testing/selftests/tc-testing/TdcPlugin.py4
-rw-r--r--tools/testing/selftests/tc-testing/TdcResults.py3
-rw-r--r--tools/testing/selftests/tc-testing/action-ebpfbin0 -> 856 bytes-rw-r--r--tools/testing/selftests/tc-testing/config54
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt2
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py67
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py242
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py4
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py42
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py5
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py21
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh16
-rw-r--r--tools/testing/selftests/tc-testing/settings1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json14
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json95
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json69
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json99
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json352
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json152
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gate.json351
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json194
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json475
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json159
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json145
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json198
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json184
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json137
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json110
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json90
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json78
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json192
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json136
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json53
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json175
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json1236
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flow.json623
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flower.json (renamed from tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json)98
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/fw.json315
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json222
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/route.json206
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json129
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json106
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json438
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/filter.json26
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json1037
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json445
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json214
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json172
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json217
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json90
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json254
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json107
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json284
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json121
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json403
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json320
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json35
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json150
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json173
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json214
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json261
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json50
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json182
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json114
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json114
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json421
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json109
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json172
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json99
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json280
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json51
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json255
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json304
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json87
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json306
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json193
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json85
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py397
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh69
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py3
-rw-r--r--tools/testing/selftests/tdx/.gitignore1
-rw-r--r--tools/testing/selftests/tdx/Makefile7
-rw-r--r--tools/testing/selftests/tdx/config1
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c163
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c108
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c157
-rw-r--r--tools/testing/selftests/timens/.gitignore2
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c4
-rw-r--r--tools/testing/selftests/timens/exec.c8
-rw-r--r--tools/testing/selftests/timens/futex.c2
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c10
-rw-r--r--tools/testing/selftests/timens/procfs.c4
-rw-r--r--tools/testing/selftests/timens/timens.c4
-rw-r--r--tools/testing/selftests/timens/timens.h2
-rw-r--r--tools/testing/selftests/timens/timer.c6
-rw-r--r--tools/testing/selftests/timens/timerfd.c8
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c141
-rw-r--r--tools/testing/selftests/timers/Makefile2
-rw-r--r--tools/testing/selftests/timers/adjtick.c14
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c34
-rw-r--r--tools/testing/selftests/timers/change_skew.c11
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c79
-rw-r--r--tools/testing/selftests/timers/freq-step.c6
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c57
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c16
-rw-r--r--tools/testing/selftests/timers/leapcrash.c10
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c8
-rw-r--r--tools/testing/selftests/timers/nanosleep.c96
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c48
-rw-r--r--tools/testing/selftests/timers/posix_timers.c673
-rw-r--r--tools/testing/selftests/timers/raw_skew.c17
-rw-r--r--tools/testing/selftests/timers/rtcpie.c13
-rw-r--r--tools/testing/selftests/timers/set-2038.c9
-rw-r--r--tools/testing/selftests/timers/set-tai.c6
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c29
-rw-r--r--tools/testing/selftests/timers/set-tz.c6
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c12
-rw-r--r--tools/testing/selftests/timers/threadtest.c10
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c87
-rw-r--r--tools/testing/selftests/tmpfs/Makefile1
-rw-r--r--tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c41
-rw-r--r--tools/testing/selftests/tpm2/.gitignore3
-rw-r--r--tools/testing/selftests/tpm2/Makefile2
-rw-r--r--tools/testing/selftests/tpm2/settings1
-rwxr-xr-xtools/testing/selftests/tpm2/test_async.sh10
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh5
-rwxr-xr-xtools/testing/selftests/tpm2/test_space.sh2
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py41
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py45
-rw-r--r--tools/testing/selftests/tty/.gitignore3
-rw-r--r--tools/testing/selftests/tty/Makefile9
-rw-r--r--tools/testing/selftests/tty/config1
-rw-r--r--tools/testing/selftests/tty/tty_tiocsti_test.c650
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c106
-rwxr-xr-xtools/testing/selftests/turbostat/added_perf_counters.py178
-rwxr-xr-xtools/testing/selftests/turbostat/defcolumns.py60
-rwxr-xr-xtools/testing/selftests/turbostat/smi_aperf_mperf.py157
-rw-r--r--tools/testing/selftests/ublk/.gitignore3
-rw-r--r--tools/testing/selftests/ublk/Makefile51
-rw-r--r--tools/testing/selftests/ublk/common.c55
-rw-r--r--tools/testing/selftests/ublk/config1
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c106
-rw-r--r--tools/testing/selftests/ublk/file_backed.c182
-rw-r--r--tools/testing/selftests/ublk/kublk.c1729
-rw-r--r--tools/testing/selftests/ublk/kublk.h421
-rw-r--r--tools/testing/selftests/ublk/null.c152
-rw-r--r--tools/testing/selftests/ublk/stripe.c391
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh384
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh48
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh48
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_03.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh40
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh41
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_07.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_08.sh32
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_09.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_10.sh30
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_11.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh59
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_13.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh25
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh21
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh24
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh24
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh34
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh36
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh54
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh51
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh84
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh21
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh21
-rw-r--r--tools/testing/selftests/ublk/trace/count_ios_per_tid.bt11
-rw-r--r--tools/testing/selftests/ublk/trace/seq_io.bt25
-rw-r--r--tools/testing/selftests/ublk/ublk_dep.h18
-rw-r--r--tools/testing/selftests/ublk/utils.h68
-rw-r--r--tools/testing/selftests/uevent/.gitignore1
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c10
-rw-r--r--tools/testing/selftests/user/config1
-rwxr-xr-xtools/testing/selftests/user/test_user_copy.sh18
-rw-r--r--tools/testing/selftests/user_events/.gitignore4
-rw-r--r--tools/testing/selftests/user_events/Makefile9
-rw-r--r--tools/testing/selftests/user_events/abi_test.c423
-rw-r--r--tools/testing/selftests/user_events/config1
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c296
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c597
-rw-r--r--tools/testing/selftests/user_events/perf_test.c254
-rw-r--r--tools/testing/selftests/user_events/settings1
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h114
-rw-r--r--tools/testing/selftests/vDSO/.gitignore3
-rw-r--r--tools/testing/selftests/vDSO/Makefile57
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c159
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.h1
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h69
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h32
l---------[-rw-r--r--]tools/testing/selftests/vDSO/vdso_standalone_test_x86.c127
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c254
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c133
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c124
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c32
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c17
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c322
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c32
-rw-r--r--tools/testing/selftests/vDSO/vgetrandom-chacha.S20
-rw-r--r--tools/testing/selftests/verification/.gitignore (renamed from tools/testing/selftests/sigaltstack/.gitignore)2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile29
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c378
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c312
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c182
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c263
-rw-r--r--tools/testing/selftests/vm/.gitignore24
-rw-r--r--tools/testing/selftests/vm/Makefile157
-rw-r--r--tools/testing/selftests/vm/compaction_test.c231
-rw-r--r--tools/testing/selftests/vm/config6
-rw-r--r--tools/testing/selftests/vm/gup_test.c194
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c1488
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1035
-rw-r--r--tools/testing/selftests/vm/map_fixed_noreplace.c206
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c109
-rw-r--r--tools/testing/selftests/vm/mremap_test.c344
-rw-r--r--tools/testing/selftests/vm/on-fault-limit.c48
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh349
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c144
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1548
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c289
-rw-r--r--tools/testing/selftests/vm/virtual_address_range.c139
-rw-r--r--tools/testing/selftests/vsock/.gitignore2
-rw-r--r--tools/testing/selftests/vsock/Makefile17
-rw-r--r--tools/testing/selftests/vsock/config111
-rw-r--r--tools/testing/selftests/vsock/settings1
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh607
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c118
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh118
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore1
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile255
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config7
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config14
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/s390x.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/um.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c46
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config15
-rw-r--r--tools/testing/selftests/x86/Makefile61
-rw-r--r--tools/testing/selftests/x86/amx.c518
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/avx.c12
-rw-r--r--tools/testing/selftests/x86/bugs/Makefile3
-rwxr-xr-xtools/testing/selftests/x86/bugs/common.py164
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_indirect_alignment.py150
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_permutations.py109
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_ret_alignment.py139
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_sysfs.py65
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rw-r--r--tools/testing/selftests/x86/clang_helpers_32.S11
-rw-r--r--tools/testing/selftests/x86/clang_helpers_64.S28
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c90
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c24
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c30
-rw-r--r--tools/testing/selftests/x86/fsgsbase_restore.c11
-rw-r--r--tools/testing/selftests/x86/helpers.h28
-rw-r--r--tools/testing/selftests/x86/ioperm.c25
-rw-r--r--tools/testing/selftests/x86/iopl.c103
-rw-r--r--tools/testing/selftests/x86/lam.c1371
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c20
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c18
-rw-r--r--tools/testing/selftests/x86/nx_stack.c212
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c24
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c106
-rw-r--r--tools/testing/selftests/x86/sigreturn.c33
-rw-r--r--tools/testing/selftests/x86/sigtrap_loop.c101
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c26
-rw-r--r--tools/testing/selftests/x86/srso.c70
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c20
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c12
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c492
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c42
-rw-r--r--tools/testing/selftests/x86/test_FISTTP.c8
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c86
-rw-r--r--tools/testing/selftests/x86/test_shadow_stack.c1088
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c506
-rw-r--r--tools/testing/selftests/x86/thunks_32.S2
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c12
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c2
-rw-r--r--tools/testing/selftests/x86/xstate.c478
-rw-r--r--tools/testing/selftests/x86/xstate.h197
-rw-r--r--tools/testing/selftests/zram/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore)2
-rw-r--r--tools/testing/selftests/zram/README1
-rwxr-xr-xtools/testing/selftests/zram/zram.sh15
-rwxr-xr-xtools/testing/selftests/zram/zram01.sh33
-rwxr-xr-xtools/testing/selftests/zram/zram02.sh1
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh134
-rw-r--r--tools/testing/shared/autoconf.h (renamed from tools/testing/radix-tree/generated/autoconf.h)1
-rw-r--r--tools/testing/shared/interval_tree-shim.c5
-rw-r--r--tools/testing/shared/linux.c379
-rw-r--r--tools/testing/shared/linux/bug.h (renamed from tools/testing/radix-tree/linux/bug.h)0
-rw-r--r--tools/testing/shared/linux/cleanup.h2
-rw-r--r--tools/testing/shared/linux/cpu.h (renamed from tools/testing/radix-tree/linux/cpu.h)0
-rw-r--r--tools/testing/shared/linux/idr.h5
-rw-r--r--tools/testing/shared/linux/interval_tree.h7
-rw-r--r--tools/testing/shared/linux/interval_tree_generic.h2
-rw-r--r--tools/testing/shared/linux/kconfig.h (renamed from tools/testing/radix-tree/linux/kconfig.h)0
-rw-r--r--tools/testing/shared/linux/kernel.h (renamed from tools/testing/radix-tree/linux/kernel.h)3
-rw-r--r--tools/testing/shared/linux/kmemleak.h (renamed from tools/testing/radix-tree/linux/kmemleak.h)0
-rw-r--r--tools/testing/shared/linux/local_lock.h (renamed from tools/testing/radix-tree/linux/local_lock.h)0
-rw-r--r--tools/testing/shared/linux/lockdep.h (renamed from tools/testing/radix-tree/linux/lockdep.h)5
-rw-r--r--tools/testing/shared/linux/maple_tree.h5
-rw-r--r--tools/testing/shared/linux/percpu.h (renamed from tools/testing/radix-tree/linux/percpu.h)0
-rw-r--r--tools/testing/shared/linux/preempt.h (renamed from tools/testing/radix-tree/linux/preempt.h)0
-rw-r--r--tools/testing/shared/linux/radix-tree.h (renamed from tools/testing/radix-tree/linux/radix-tree.h)0
-rw-r--r--tools/testing/shared/linux/rbtree.h8
-rw-r--r--tools/testing/shared/linux/rbtree_augmented.h7
-rw-r--r--tools/testing/shared/linux/rbtree_types.h8
-rw-r--r--tools/testing/shared/linux/rcupdate.h (renamed from tools/testing/radix-tree/linux/rcupdate.h)0
-rw-r--r--tools/testing/shared/linux/xarray.h (renamed from tools/testing/radix-tree/linux/xarray.h)0
-rw-r--r--tools/testing/shared/maple-shared.h24
-rw-r--r--tools/testing/shared/maple-shim.c14
-rw-r--r--tools/testing/shared/rbtree-shim.c6
-rw-r--r--tools/testing/shared/shared.h37
-rw-r--r--tools/testing/shared/shared.mk79
-rw-r--r--tools/testing/shared/trace/events/maple_tree.h5
-rw-r--r--tools/testing/shared/xarray-shared.c5
-rw-r--r--tools/testing/shared/xarray-shared.h8
-rw-r--r--tools/testing/vma/.gitignore7
-rw-r--r--tools/testing/vma/Makefile18
-rw-r--r--tools/testing/vma/linux/mmzone.h38
-rw-r--r--tools/testing/vma/vma.c1785
-rw-r--r--tools/testing/vma/vma_internal.h1863
-rw-r--r--tools/testing/vsock/.gitignore2
-rw-r--r--tools/testing/vsock/Makefile26
-rw-r--r--tools/testing/vsock/README49
-rw-r--r--tools/testing/vsock/control.c37
-rw-r--r--tools/testing/vsock/control.h2
-rw-r--r--tools/testing/vsock/msg_zerocopy_common.c77
-rw-r--r--tools/testing/vsock/msg_zerocopy_common.h17
-rw-r--r--tools/testing/vsock/timeout.c18
-rw-r--r--tools/testing/vsock/timeout.h1
-rw-r--r--tools/testing/vsock/util.c732
-rw-r--r--tools/testing/vsock/util.h70
-rw-r--r--tools/testing/vsock/vsock_diag_test.c25
-rw-r--r--tools/testing/vsock/vsock_perf.c499
-rw-r--r--tools/testing/vsock/vsock_test.c2148
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.c358
-rw-r--r--tools/testing/vsock/vsock_test_zerocopy.h15
-rw-r--r--tools/testing/vsock/vsock_uring_test.c353
4452 files changed, 683021 insertions, 78368 deletions
diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile
new file mode 100644
index 000000000000..db81cd2fb9c5
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+
+obj-m += test_cipher.o
+test_cipher-y := test-cipher.o
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
+clean:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh
new file mode 100644
index 000000000000..43108794b996
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/run-tests.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+#
+# This script runs (via instmod) test-cipher.ko module which invokes
+# generic and s390-native ChaCha20 encryprion algorithms with different
+# size of data. Check 'dmesg' for results.
+#
+# The insmod error is expected:
+# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted
+
+lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod
+modprobe chacha_generic
+modprobe chacha_s390
+
+# run encryption for different data size, including whole block(s) +/- 1
+insmod test_cipher.ko size=63
+insmod test_cipher.ko size=64
+insmod test_cipher.ko size=65
+insmod test_cipher.ko size=127
+insmod test_cipher.ko size=128
+insmod test_cipher.ko size=129
+insmod test_cipher.ko size=511
+insmod test_cipher.ko size=512
+insmod test_cipher.ko size=513
+insmod test_cipher.ko size=4096
+insmod test_cipher.ko size=65611
+insmod test_cipher.ko size=6291456
+insmod test_cipher.ko size=62914560
+
+# print test logs
+dmesg | tail -170
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
new file mode 100644
index 000000000000..827507844e8f
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ * Author: Vladis Dronov <vdronoff@gmail.com>
+ */
+
+#include <asm/elf.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <crypto/skcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/acompress.h>
+#include <crypto/rng.h>
+#include <crypto/drbg.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/simd.h>
+#include <crypto/chacha.h>
+#include <crypto/aead.h>
+#include <crypto/hash.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+#include <linux/once.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static unsigned int data_size __read_mostly = 256;
+static unsigned int debug __read_mostly = 0;
+
+/* tie all skcipher structures together */
+struct skcipher_def {
+ struct scatterlist sginp, sgout;
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
+ struct crypto_wait wait;
+};
+
+/* Perform cipher operations with the chacha lib */
+static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
+{
+ struct chacha_state chacha_state;
+ u8 iv[16], key[32];
+ u64 start, end;
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ /* Encrypt */
+ chacha_init(&chacha_state, (u32 *)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20);
+ end = ktime_get_ns();
+
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib encryption took: %lld nsec", end - start);
+
+ /* Decrypt */
+ chacha_init(&chacha_state, (u32 *)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20);
+ end = ktime_get_ns();
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib decryption took: %lld nsec", end - start);
+
+ return 0;
+}
+
+/* Perform cipher operations with skcipher */
+static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
+ int enc)
+{
+ int rc;
+
+ if (enc) {
+ rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher encrypt returned with result"
+ "%d\n", rc);
+ }
+ else
+ {
+ rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher decrypt returned with result"
+ "%d\n", rc);
+ }
+
+ return rc;
+}
+
+/* Initialize and trigger cipher operations */
+static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain)
+{
+ struct skcipher_def sk;
+ struct crypto_skcipher *skcipher = NULL;
+ struct skcipher_request *req = NULL;
+ u8 iv[16], key[32];
+ u64 start, end;
+ int ret = -EFAULT;
+
+ skcipher = crypto_alloc_skcipher(name, 0, 0);
+ if (IS_ERR(skcipher)) {
+ pr_info("could not allocate skcipher %s handle\n", name);
+ return PTR_ERR(skcipher);
+ }
+
+ req = skcipher_request_alloc(skcipher, GFP_KERNEL);
+ if (!req) {
+ pr_info("could not allocate skcipher request\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done,
+ &sk.wait);
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (crypto_skcipher_setkey(skcipher, key, 32)) {
+ pr_info("key could not be set\n");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ sk.tfm = skcipher;
+ sk.req = req;
+
+ /* Encrypt in one pass */
+ sg_init_one(&sk.sginp, plain, data_size);
+ sg_init_one(&sk.sgout, cipher, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Encrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 1);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Prepare for decryption */
+ memset(iv, 'I', sizeof(iv));
+
+ sg_init_one(&sk.sginp, cipher, data_size);
+ sg_init_one(&sk.sgout, revert, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Decrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 0);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Dump some internal skcipher data */
+ if (debug)
+ pr_info("skcipher %s: cryptlen %d blksize %d stride %d "
+ "ivsize %d alignmask 0x%x\n",
+ name, sk.req->cryptlen,
+ crypto_skcipher_blocksize(sk.tfm),
+ crypto_skcipher_alg(sk.tfm)->walksize,
+ crypto_skcipher_ivsize(sk.tfm),
+ crypto_skcipher_alignmask(sk.tfm));
+
+out:
+ if (skcipher)
+ crypto_free_skcipher(skcipher);
+ if (req)
+ skcipher_request_free(req);
+ return ret;
+}
+
+static int __init chacha_s390_test_init(void)
+{
+ u8 *plain = NULL, *revert = NULL;
+ u8 *cipher_generic = NULL, *cipher_s390 = NULL;
+ int ret = -1;
+
+ pr_info("s390 ChaCha20 test module: size=%d debug=%d\n",
+ data_size, debug);
+
+ /* Allocate and fill buffers */
+ plain = vmalloc(data_size);
+ if (!plain) {
+ pr_info("could not allocate plain buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(plain, 'a', data_size);
+ get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
+
+ cipher_generic = vzalloc(data_size);
+ if (!cipher_generic) {
+ pr_info("could not allocate cipher_generic buffer\n");
+ ret = -2;
+ goto out;
+ }
+
+ cipher_s390 = vzalloc(data_size);
+ if (!cipher_s390) {
+ pr_info("could not allocate cipher_s390 buffer\n");
+ ret = -2;
+ goto out;
+ }
+
+ revert = vzalloc(data_size);
+ if (!revert) {
+ pr_info("could not allocate revert buffer\n");
+ ret = -2;
+ goto out;
+ }
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
+ 16, 1, plain,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Use chacha20 generic */
+ ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("generic en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("generic en/decryption check OK\n");
+
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 s390 */
+ ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("s390 en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("s390 vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 vs generic check OK\n");
+
+ memset(cipher_s390, 0, data_size);
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 lib */
+ test_lib_chacha(revert, cipher_s390, plain);
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("lib en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("lib vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib vs generic check OK\n");
+
+ pr_info("--- chacha20 s390 test end ---\n");
+
+out:
+ if (plain)
+ vfree(plain);
+ if (cipher_generic)
+ vfree(cipher_generic);
+ if (cipher_s390)
+ vfree(cipher_s390);
+ if (revert)
+ vfree(revert);
+
+ return -1;
+}
+
+static void __exit chacha_s390_test_exit(void)
+{
+ pr_info("s390 ChaCha20 test module exit\n");
+}
+
+module_param_named(size, data_size, uint, 0660);
+module_param(debug, int, 0660);
+MODULE_PARM_DESC(size, "Size of a plaintext");
+MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)");
+
+module_init(chacha_s390_test_init);
+module_exit(chacha_s390_test_exit);
+
+MODULE_DESCRIPTION("s390 ChaCha20 self-test");
+MODULE_AUTHOR("Vladis Dronov <vdronoff@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
new file mode 100644
index 000000000000..0e151d0572d1
--- /dev/null
+++ b/tools/testing/cxl/Kbuild
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0
+ldflags-y += --wrap=acpi_table_parse_cedt
+ldflags-y += --wrap=is_acpi_device_node
+ldflags-y += --wrap=acpi_evaluate_integer
+ldflags-y += --wrap=acpi_pci_find_root
+ldflags-y += --wrap=nvdimm_bus_register
+ldflags-y += --wrap=cxl_await_media_ready
+ldflags-y += --wrap=devm_cxl_add_rch_dport
+ldflags-y += --wrap=cxl_endpoint_parse_cdat
+ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
+ldflags-y += --wrap=hmat_get_extended_linear_cache_size
+
+DRIVERS := ../../../drivers
+CXL_SRC := $(DRIVERS)/cxl
+CXL_CORE_SRC := $(DRIVERS)/cxl/core
+ccflags-y := -I$(srctree)/drivers/cxl/
+ccflags-y += -D__mock=__weak
+ccflags-y += -DCXL_TEST_ENABLE=1
+ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
+
+obj-m += cxl_acpi.o
+
+cxl_acpi-y := $(CXL_SRC)/acpi.o
+cxl_acpi-y += mock_acpi.o
+cxl_acpi-y += config_check.o
+cxl_acpi-y += cxl_acpi_test.o
+
+obj-m += cxl_pmem.o
+
+cxl_pmem-y := $(CXL_SRC)/pmem.o
+cxl_pmem-y += $(CXL_SRC)/security.o
+cxl_pmem-y += config_check.o
+cxl_pmem-y += cxl_pmem_test.o
+
+obj-m += cxl_port.o
+
+cxl_port-y := $(CXL_SRC)/port.o
+cxl_port-y += config_check.o
+cxl_port-y += cxl_port_test.o
+
+
+obj-m += cxl_mem.o
+
+cxl_mem-y := $(CXL_SRC)/mem.o
+cxl_mem-y += config_check.o
+cxl_mem-y += cxl_mem_test.o
+
+obj-m += cxl_core.o
+
+cxl_core-y := $(CXL_CORE_SRC)/port.o
+cxl_core-y += $(CXL_CORE_SRC)/pmem.o
+cxl_core-y += $(CXL_CORE_SRC)/regs.o
+cxl_core-y += $(CXL_CORE_SRC)/memdev.o
+cxl_core-y += $(CXL_CORE_SRC)/mbox.o
+cxl_core-y += $(CXL_CORE_SRC)/pci.o
+cxl_core-y += $(CXL_CORE_SRC)/hdm.o
+cxl_core-y += $(CXL_CORE_SRC)/pmu.o
+cxl_core-y += $(CXL_CORE_SRC)/cdat.o
+cxl_core-y += $(CXL_CORE_SRC)/ras.o
+cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
+cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
+cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
+cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
+cxl_core-y += config_check.o
+cxl_core-y += cxl_core_test.o
+cxl_core-y += cxl_core_exports.o
+
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
+
+obj-m += test/
diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c
new file mode 100644
index 000000000000..a80bc2c062fe
--- /dev/null
+++ b/tools/testing/cxl/config_check.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+
+void check(void)
+{
+ /*
+ * These kconfig symbols must be set to "m" for cxl_test to load
+ * and operate.
+ */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_64BIT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM));
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST));
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST));
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS));
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_MEMORY_HOTPLUG));
+}
diff --git a/tools/testing/cxl/cxl_acpi_test.c b/tools/testing/cxl/cxl_acpi_test.c
new file mode 100644
index 000000000000..8602dc27c81c
--- /dev/null
+++ b/tools/testing/cxl/cxl_acpi_test.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "watermark.h"
+
+cxl_test_watermark(cxl_acpi);
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
new file mode 100644
index 000000000000..6754de35598d
--- /dev/null
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "cxl.h"
+#include "exports.h"
+
+/* Exporting of cxl_core symbols that are only used by cxl_test */
+EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
+
+cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
+ __devm_cxl_add_dport_by_dev;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
+
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ return _devm_cxl_add_dport_by_dev(port, dport_dev);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
+
+cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
+
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ return _devm_cxl_switch_port_decoders_setup(port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/cxl_core_test.c b/tools/testing/cxl/cxl_core_test.c
new file mode 100644
index 000000000000..464a9255e4d6
--- /dev/null
+++ b/tools/testing/cxl/cxl_core_test.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "watermark.h"
+
+cxl_test_watermark(cxl_core);
diff --git a/tools/testing/cxl/cxl_mem_test.c b/tools/testing/cxl/cxl_mem_test.c
new file mode 100644
index 000000000000..ba7fb8a44288
--- /dev/null
+++ b/tools/testing/cxl/cxl_mem_test.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "watermark.h"
+
+cxl_test_watermark(cxl_mem);
diff --git a/tools/testing/cxl/cxl_pmem_test.c b/tools/testing/cxl/cxl_pmem_test.c
new file mode 100644
index 000000000000..3fd884fae537
--- /dev/null
+++ b/tools/testing/cxl/cxl_pmem_test.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "watermark.h"
+
+cxl_test_watermark(cxl_pmem);
diff --git a/tools/testing/cxl/cxl_port_test.c b/tools/testing/cxl/cxl_port_test.c
new file mode 100644
index 000000000000..be183917a9f6
--- /dev/null
+++ b/tools/testing/cxl/cxl_port_test.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include "watermark.h"
+
+cxl_test_watermark(cxl_port);
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
new file mode 100644
index 000000000000..7ebee7c0bd67
--- /dev/null
+++ b/tools/testing/cxl/exports.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef __MOCK_CXL_EXPORTS_H_
+#define __MOCK_CXL_EXPORTS_H_
+
+typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
+ struct device *dport_dev);
+extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
+
+typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
+extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
+
+#endif
diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c
new file mode 100644
index 000000000000..8da94378ccec
--- /dev/null
+++ b/tools/testing/cxl/mock_acpi.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/acpi.h>
+#include <cxl.h>
+#include "test/mock.h"
+
+struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev)
+{
+ int index;
+ struct acpi_device *adev, *found = NULL;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_bridge(dev)) {
+ found = ACPI_COMPANION(dev);
+ goto out;
+ }
+
+ if (dev_is_platform(dev))
+ goto out;
+
+ adev = to_acpi_device(dev);
+ if (!acpi_pci_find_root(adev->handle))
+ goto out;
+
+ if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0) {
+ found = adev;
+ dev_dbg(host, "found host bridge %s\n", dev_name(&adev->dev));
+ }
+out:
+ put_cxl_mock_ops(index);
+ return found;
+}
diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
new file mode 100644
index 000000000000..af50972c8b6d
--- /dev/null
+++ b/tools/testing/cxl/test/Kbuild
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
+
+obj-m += cxl_test.o
+obj-m += cxl_mock.o
+obj-m += cxl_mock_mem.o
+obj-m += cxl_translate.o
+
+cxl_test-y := cxl.o
+cxl_mock-y := mock.o
+cxl_mock_mem-y := mem.o
+
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
new file mode 100644
index 000000000000..81e2aef3627a
--- /dev/null
+++ b/tools/testing/cxl/test/cxl.c
@@ -0,0 +1,1628 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2021 Intel Corporation. All rights reserved.
+
+#include <linux/platform_device.h>
+#include <linux/memory_hotplug.h>
+#include <linux/genalloc.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <cxlmem.h>
+
+#include "../watermark.h"
+#include "mock.h"
+
+static int interleave_arithmetic;
+static bool extended_linear_cache;
+
+#define FAKE_QTG_ID 42
+
+#define NR_CXL_HOST_BRIDGES 2
+#define NR_CXL_SINGLE_HOST 1
+#define NR_CXL_RCH 1
+#define NR_CXL_ROOT_PORTS 2
+#define NR_CXL_SWITCH_PORTS 2
+#define NR_CXL_PORT_DECODERS 8
+#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
+
+#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M
+static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT;
+
+static struct platform_device *cxl_acpi;
+static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
+#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS)
+static struct platform_device *cxl_root_port[NR_MULTI_ROOT];
+static struct platform_device *cxl_switch_uport[NR_MULTI_ROOT];
+#define NR_MEM_MULTI \
+ (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS)
+static struct platform_device *cxl_switch_dport[NR_MEM_MULTI];
+
+static struct platform_device *cxl_hb_single[NR_CXL_SINGLE_HOST];
+static struct platform_device *cxl_root_single[NR_CXL_SINGLE_HOST];
+static struct platform_device *cxl_swu_single[NR_CXL_SINGLE_HOST];
+#define NR_MEM_SINGLE (NR_CXL_SINGLE_HOST * NR_CXL_SWITCH_PORTS)
+static struct platform_device *cxl_swd_single[NR_MEM_SINGLE];
+
+struct platform_device *cxl_mem[NR_MEM_MULTI];
+struct platform_device *cxl_mem_single[NR_MEM_SINGLE];
+
+static struct platform_device *cxl_rch[NR_CXL_RCH];
+static struct platform_device *cxl_rcd[NR_CXL_RCH];
+
+static inline bool is_multi_bridge(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++)
+ if (&cxl_host_bridge[i]->dev == dev)
+ return true;
+ return false;
+}
+
+static inline bool is_single_bridge(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++)
+ if (&cxl_hb_single[i]->dev == dev)
+ return true;
+ return false;
+}
+
+static struct acpi_device acpi0017_mock;
+static struct acpi_device host_bridge[NR_BRIDGES] = {
+ [0] = {
+ .handle = &host_bridge[0],
+ .pnp.unique_id = "0",
+ },
+ [1] = {
+ .handle = &host_bridge[1],
+ .pnp.unique_id = "1",
+ },
+ [2] = {
+ .handle = &host_bridge[2],
+ .pnp.unique_id = "2",
+ },
+ [3] = {
+ .handle = &host_bridge[3],
+ .pnp.unique_id = "3",
+ },
+};
+
+static bool is_mock_dev(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_mem); i++)
+ if (dev == &cxl_mem[i]->dev)
+ return true;
+ for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++)
+ if (dev == &cxl_mem_single[i]->dev)
+ return true;
+ for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++)
+ if (dev == &cxl_rcd[i]->dev)
+ return true;
+ if (dev == &cxl_acpi->dev)
+ return true;
+ return false;
+}
+
+static bool is_mock_adev(struct acpi_device *adev)
+{
+ int i;
+
+ if (adev == &acpi0017_mock)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(host_bridge); i++)
+ if (adev == &host_bridge[i])
+ return true;
+
+ return false;
+}
+
+static struct {
+ struct acpi_table_cedt cedt;
+ struct acpi_cedt_chbs chbs[NR_BRIDGES];
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[1];
+ } cfmws0;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[2];
+ } cfmws1;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[1];
+ } cfmws2;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[2];
+ } cfmws3;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[1];
+ } cfmws4;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[1];
+ } cfmws5;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[1];
+ } cfmws6;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[2];
+ } cfmws7;
+ struct {
+ struct acpi_cedt_cfmws cfmws;
+ u32 target[3];
+ } cfmws8;
+ struct {
+ struct acpi_cedt_cxims cxims;
+ u64 xormap_list[2];
+ } cxims0;
+} __packed mock_cedt = {
+ .cedt = {
+ .header = {
+ .signature = "CEDT",
+ .length = sizeof(mock_cedt),
+ .revision = 1,
+ },
+ },
+ .chbs[0] = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CHBS,
+ .length = sizeof(mock_cedt.chbs[0]),
+ },
+ .uid = 0,
+ .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
+ },
+ .chbs[1] = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CHBS,
+ .length = sizeof(mock_cedt.chbs[0]),
+ },
+ .uid = 1,
+ .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
+ },
+ .chbs[2] = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CHBS,
+ .length = sizeof(mock_cedt.chbs[0]),
+ },
+ .uid = 2,
+ .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
+ },
+ .chbs[3] = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CHBS,
+ .length = sizeof(mock_cedt.chbs[0]),
+ },
+ .uid = 3,
+ .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL11,
+ },
+ .cfmws0 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws0),
+ },
+ .interleave_ways = 0,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 4UL,
+ },
+ .target = { 0 },
+ },
+ .cfmws1 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws1),
+ },
+ .interleave_ways = 1,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 8UL,
+ },
+ .target = { 0, 1, },
+ },
+ .cfmws2 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws2),
+ },
+ .interleave_ways = 0,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 4UL,
+ },
+ .target = { 0 },
+ },
+ .cfmws3 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws3),
+ },
+ .interleave_ways = 1,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 8UL,
+ },
+ .target = { 0, 1, },
+ },
+ .cfmws4 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws4),
+ },
+ .interleave_ways = 0,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 4UL,
+ },
+ .target = { 2 },
+ },
+ .cfmws5 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws5),
+ },
+ .interleave_ways = 0,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M,
+ },
+ .target = { 3 },
+ },
+ /* .cfmws6,7,8 use ACPI_CEDT_CFMWS_ARITHMETIC_XOR */
+ .cfmws6 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws6),
+ },
+ .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
+ .interleave_ways = 0,
+ .granularity = 4,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 8UL,
+ },
+ .target = { 0, },
+ },
+ .cfmws7 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws7),
+ },
+ .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
+ .interleave_ways = 1,
+ .granularity = 0,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_256M * 8UL,
+ },
+ .target = { 0, 1, },
+ },
+ .cfmws8 = {
+ .cfmws = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CFMWS,
+ .length = sizeof(mock_cedt.cfmws8),
+ },
+ .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
+ .interleave_ways = 8,
+ .granularity = 1,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
+ ACPI_CEDT_CFMWS_RESTRICT_PMEM,
+ .qtg_id = FAKE_QTG_ID,
+ .window_size = SZ_512M * 6UL,
+ },
+ .target = { 0, 1, 2, },
+ },
+ .cxims0 = {
+ .cxims = {
+ .header = {
+ .type = ACPI_CEDT_TYPE_CXIMS,
+ .length = sizeof(mock_cedt.cxims0),
+ },
+ .hbig = 0,
+ .nr_xormaps = 2,
+ },
+ .xormap_list = { 0x404100, 0x808200, },
+ },
+};
+
+struct acpi_cedt_cfmws *mock_cfmws[] = {
+ [0] = &mock_cedt.cfmws0.cfmws,
+ [1] = &mock_cedt.cfmws1.cfmws,
+ [2] = &mock_cedt.cfmws2.cfmws,
+ [3] = &mock_cedt.cfmws3.cfmws,
+ [4] = &mock_cedt.cfmws4.cfmws,
+ [5] = &mock_cedt.cfmws5.cfmws,
+ /* Modulo Math above, XOR Math below */
+ [6] = &mock_cedt.cfmws6.cfmws,
+ [7] = &mock_cedt.cfmws7.cfmws,
+ [8] = &mock_cedt.cfmws8.cfmws,
+};
+
+static int cfmws_start;
+static int cfmws_end;
+#define CFMWS_MOD_ARRAY_START 0
+#define CFMWS_MOD_ARRAY_END 5
+#define CFMWS_XOR_ARRAY_START 6
+#define CFMWS_XOR_ARRAY_END 8
+
+struct acpi_cedt_cxims *mock_cxims[1] = {
+ [0] = &mock_cedt.cxims0.cxims,
+};
+
+struct cxl_mock_res {
+ struct list_head list;
+ struct range range;
+};
+
+static LIST_HEAD(mock_res);
+static DEFINE_MUTEX(mock_res_lock);
+static struct gen_pool *cxl_mock_pool;
+
+static void depopulate_all_mock_resources(void)
+{
+ struct cxl_mock_res *res, *_res;
+
+ mutex_lock(&mock_res_lock);
+ list_for_each_entry_safe(res, _res, &mock_res, list) {
+ gen_pool_free(cxl_mock_pool, res->range.start,
+ range_len(&res->range));
+ list_del(&res->list);
+ kfree(res);
+ }
+ mutex_unlock(&mock_res_lock);
+}
+
+static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
+{
+ struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL);
+ struct genpool_data_align data = {
+ .align = align,
+ };
+ unsigned long phys;
+
+ INIT_LIST_HEAD(&res->list);
+ phys = gen_pool_alloc_algo(cxl_mock_pool, size,
+ gen_pool_first_fit_align, &data);
+ if (!phys)
+ return NULL;
+
+ res->range = (struct range) {
+ .start = phys,
+ .end = phys + size - 1,
+ };
+ mutex_lock(&mock_res_lock);
+ list_add(&res->list, &mock_res);
+ mutex_unlock(&mock_res_lock);
+
+ return res;
+}
+
+/* Only update CFMWS0 as this is used by the auto region. */
+static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index)
+{
+ if (!extended_linear_cache)
+ return;
+
+ if (index != 0)
+ return;
+
+ /*
+ * The window size should be 2x of the CXL region size where half is
+ * DRAM and half is CXL
+ */
+ window->window_size = mock_auto_region_size * 2;
+}
+
+static int populate_cedt(void)
+{
+ struct cxl_mock_res *res;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) {
+ struct acpi_cedt_chbs *chbs = &mock_cedt.chbs[i];
+ resource_size_t size;
+
+ if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL20)
+ size = ACPI_CEDT_CHBS_LENGTH_CXL20;
+ else
+ size = ACPI_CEDT_CHBS_LENGTH_CXL11;
+
+ res = alloc_mock_res(size, size);
+ if (!res)
+ return -ENOMEM;
+ chbs->base = res->range.start;
+ chbs->length = size;
+ }
+
+ for (i = cfmws_start; i <= cfmws_end; i++) {
+ struct acpi_cedt_cfmws *window = mock_cfmws[i];
+
+ cfmws_elc_update(window, i);
+ res = alloc_mock_res(window->window_size, SZ_256M);
+ if (!res)
+ return -ENOMEM;
+ window->base_hpa = res->range.start;
+ }
+
+ return 0;
+}
+
+static bool is_mock_port(struct device *dev);
+
+/*
+ * WARNING, this hack assumes the format of 'struct cxl_cfmws_context'
+ * and 'struct cxl_chbs_context' share the property that the first
+ * struct member is a cxl_test device being probed by the cxl_acpi
+ * driver.
+ */
+struct cxl_cedt_context {
+ struct device *dev;
+};
+
+static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg)
+{
+ struct cxl_cedt_context *ctx = arg;
+ struct device *dev = ctx->dev;
+ union acpi_subtable_headers *h;
+ unsigned long end;
+ int i;
+
+ if (!is_mock_port(dev) && !is_mock_dev(dev))
+ return acpi_table_parse_cedt(id, handler_arg, arg);
+
+ if (id == ACPI_CEDT_TYPE_CHBS)
+ for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) {
+ h = (union acpi_subtable_headers *)&mock_cedt.chbs[i];
+ end = (unsigned long)&mock_cedt.chbs[i + 1];
+ handler_arg(h, arg, end);
+ }
+
+ if (id == ACPI_CEDT_TYPE_CFMWS)
+ for (i = cfmws_start; i <= cfmws_end; i++) {
+ h = (union acpi_subtable_headers *) mock_cfmws[i];
+ end = (unsigned long) h + mock_cfmws[i]->header.length;
+ handler_arg(h, arg, end);
+ }
+
+ if (id == ACPI_CEDT_TYPE_CXIMS)
+ for (i = 0; i < ARRAY_SIZE(mock_cxims); i++) {
+ h = (union acpi_subtable_headers *)mock_cxims[i];
+ end = (unsigned long)h + mock_cxims[i]->header.length;
+ handler_arg(h, arg, end);
+ }
+
+ return 0;
+}
+
+static bool is_mock_bridge(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++)
+ if (dev == &cxl_host_bridge[i]->dev)
+ return true;
+ for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++)
+ if (dev == &cxl_hb_single[i]->dev)
+ return true;
+ for (i = 0; i < ARRAY_SIZE(cxl_rch); i++)
+ if (dev == &cxl_rch[i]->dev)
+ return true;
+
+ return false;
+}
+
+static bool is_mock_port(struct device *dev)
+{
+ int i;
+
+ if (is_mock_bridge(dev))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++)
+ if (dev == &cxl_root_port[i]->dev)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++)
+ if (dev == &cxl_switch_uport[i]->dev)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++)
+ if (dev == &cxl_switch_dport[i]->dev)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++)
+ if (dev == &cxl_root_single[i]->dev)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++)
+ if (dev == &cxl_swu_single[i]->dev)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++)
+ if (dev == &cxl_swd_single[i]->dev)
+ return true;
+
+ if (is_cxl_memdev(dev))
+ return is_mock_dev(dev->parent);
+
+ return false;
+}
+
+static int host_bridge_index(struct acpi_device *adev)
+{
+ return adev - host_bridge;
+}
+
+static struct acpi_device *find_host_bridge(acpi_handle handle)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(host_bridge); i++)
+ if (handle == host_bridge[i].handle)
+ return &host_bridge[i];
+ return NULL;
+}
+
+static acpi_status
+mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
+ struct acpi_object_list *arguments,
+ unsigned long long *data)
+{
+ struct acpi_device *adev = find_host_bridge(handle);
+
+ if (!adev || strcmp(pathname, METHOD_NAME__UID) != 0)
+ return acpi_evaluate_integer(handle, pathname, arguments, data);
+
+ *data = host_bridge_index(adev);
+ return AE_OK;
+}
+
+static int
+mock_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *cache_size)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct resource cfmws0_res =
+ DEFINE_RES_MEM(window->base_hpa, window->window_size);
+
+ if (!extended_linear_cache ||
+ !resource_contains(&cfmws0_res, backing_res)) {
+ return hmat_get_extended_linear_cache_size(backing_res,
+ nid, cache_size);
+ }
+
+ *cache_size = mock_auto_region_size;
+
+ return 0;
+}
+
+static struct pci_bus mock_pci_bus[NR_BRIDGES];
+static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
+ [0] = {
+ .bus = &mock_pci_bus[0],
+ },
+ [1] = {
+ .bus = &mock_pci_bus[1],
+ },
+ [2] = {
+ .bus = &mock_pci_bus[2],
+ },
+ [3] = {
+ .bus = &mock_pci_bus[3],
+ },
+
+};
+
+static bool is_mock_bus(struct pci_bus *bus)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++)
+ if (bus == &mock_pci_bus[i])
+ return true;
+ return false;
+}
+
+static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle)
+{
+ struct acpi_device *adev = find_host_bridge(handle);
+
+ if (!adev)
+ return acpi_pci_find_root(handle);
+ return &mock_pci_root[host_bridge_index(adev)];
+}
+
+static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+ struct device *dev = &port->dev;
+
+ if (!cxlhdm)
+ return ERR_PTR(-ENOMEM);
+
+ cxlhdm->port = port;
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
+ dev_set_drvdata(dev, cxlhdm);
+ return cxlhdm;
+}
+
+struct target_map_ctx {
+ u32 *target_map;
+ int index;
+ int target_count;
+};
+
+static int map_targets(struct device *dev, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct target_map_ctx *ctx = data;
+
+ ctx->target_map[ctx->index++] = pdev->id;
+
+ if (ctx->index > ctx->target_count) {
+ dev_WARN_ONCE(dev, 1, "too many targets found?\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int mock_decoder_commit(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ int id = cxld->id;
+
+ if (cxld->flags & CXL_DECODER_F_ENABLE)
+ return 0;
+
+ dev_dbg(&port->dev, "%s commit\n", dev_name(&cxld->dev));
+ if (cxl_num_decoders_committed(port) != id) {
+ dev_dbg(&port->dev,
+ "%s: out of order commit, expected decoder%d.%d\n",
+ dev_name(&cxld->dev), port->id,
+ cxl_num_decoders_committed(port));
+ return -EBUSY;
+ }
+
+ port->commit_end++;
+ cxld->flags |= CXL_DECODER_F_ENABLE;
+
+ return 0;
+}
+
+static void mock_decoder_reset(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ int id = cxld->id;
+
+ if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
+ return;
+
+ dev_dbg(&port->dev, "%s reset\n", dev_name(&cxld->dev));
+ if (port->commit_end == id)
+ cxl_port_commit_reap(cxld);
+ else
+ dev_dbg(&port->dev,
+ "%s: out of order reset, expected decoder%d.%d\n",
+ dev_name(&cxld->dev), port->id, port->commit_end);
+ cxld->flags &= ~CXL_DECODER_F_ENABLE;
+}
+
+static void default_mock_decoder(struct cxl_decoder *cxld)
+{
+ cxld->hpa_range = (struct range){
+ .start = 0,
+ .end = -1,
+ };
+
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = 256;
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
+ cxld->commit = mock_decoder_commit;
+ cxld->reset = mock_decoder_reset;
+}
+
+static int first_decoder(struct device *dev, const void *data)
+{
+ struct cxl_decoder *cxld;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+ cxld = to_cxl_decoder(dev);
+ if (cxld->id == 0)
+ return 1;
+ return 0;
+}
+
+static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct platform_device *pdev = NULL;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_port *port, *iter;
+ struct cxl_memdev *cxlmd;
+ struct cxl_dport *dport;
+ struct device *dev;
+ bool hb0 = false;
+ u64 base;
+ int i;
+
+ if (is_endpoint_decoder(&cxld->dev)) {
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ cxlmd = cxled_to_memdev(cxled);
+ WARN_ON(!dev_is_platform(cxlmd->dev.parent));
+ pdev = to_platform_device(cxlmd->dev.parent);
+
+ /* check is endpoint is attach to host-bridge0 */
+ port = cxled_to_port(cxled);
+ do {
+ if (port->uport_dev == &cxl_host_bridge[0]->dev) {
+ hb0 = true;
+ break;
+ }
+ if (is_cxl_port(port->dev.parent))
+ port = to_cxl_port(port->dev.parent);
+ else
+ port = NULL;
+ } while (port);
+ port = cxled_to_port(cxled);
+ }
+
+ /*
+ * The first decoder on the first 2 devices on the first switch
+ * attached to host-bridge0 mock a fake / static RAM region. All
+ * other decoders are default disabled. Given the round robin
+ * assignment those devices are named cxl_mem.0, and cxl_mem.4.
+ *
+ * See 'cxl list -BMPu -m cxl_mem.0,cxl_mem.4'
+ */
+ if (!hb0 || pdev->id % 4 || pdev->id > 4 || cxld->id > 0) {
+ default_mock_decoder(cxld);
+ return;
+ }
+
+ base = window->base_hpa;
+ if (extended_linear_cache)
+ base += mock_auto_region_size;
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + mock_auto_region_size - 1,
+ };
+
+ cxld->interleave_ways = 2;
+ eig_to_granularity(window->granularity, &cxld->interleave_granularity);
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
+ cxld->flags = CXL_DECODER_F_ENABLE;
+ cxled->state = CXL_DECODER_STATE_AUTO;
+ port->commit_end = cxld->id;
+ devm_cxl_dpa_reserve(cxled, 0,
+ mock_auto_region_size / cxld->interleave_ways, 0);
+ cxld->commit = mock_decoder_commit;
+ cxld->reset = mock_decoder_reset;
+
+ /*
+ * Now that endpoint decoder is set up, walk up the hierarchy
+ * and setup the switch and root port decoders targeting @cxlmd.
+ */
+ iter = port;
+ for (i = 0; i < 2; i++) {
+ dport = iter->parent_dport;
+ iter = dport->port;
+ dev = device_find_child(&iter->dev, NULL, first_decoder);
+ /*
+ * Ancestor ports are guaranteed to be enumerated before
+ * @port, and all ports have at least one decoder.
+ */
+ if (WARN_ON(!dev))
+ continue;
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ if (i == 0) {
+ /* put cxl_mem.4 second in the decode order */
+ if (pdev->id == 4) {
+ cxlsd->target[1] = dport;
+ cxld->target_map[1] = dport->port_id;
+ } else {
+ cxlsd->target[0] = dport;
+ cxld->target_map[0] = dport->port_id;
+ }
+ } else {
+ cxlsd->target[0] = dport;
+ cxld->target_map[0] = dport->port_id;
+ }
+ cxld = &cxlsd->cxld;
+ cxld->target_type = CXL_DECODER_HOSTONLYMEM;
+ cxld->flags = CXL_DECODER_F_ENABLE;
+ iter->commit_end = 0;
+ /*
+ * Switch targets 2 endpoints, while host bridge targets
+ * one root port
+ */
+ if (i == 0)
+ cxld->interleave_ways = 2;
+ else
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = 4096;
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + mock_auto_region_size - 1,
+ };
+ put_device(dev);
+ }
+}
+
+static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ struct cxl_port *port = cxlhdm->port;
+ struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+ int target_count, i;
+
+ if (is_cxl_endpoint(port))
+ target_count = 0;
+ else if (is_cxl_root(parent_port))
+ target_count = NR_CXL_ROOT_PORTS;
+ else
+ target_count = NR_CXL_SWITCH_PORTS;
+
+ for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
+ struct target_map_ctx ctx = {
+ .target_count = target_count,
+ };
+ struct cxl_decoder *cxld;
+ int rc;
+
+ if (target_count) {
+ struct cxl_switch_decoder *cxlsd;
+
+ cxlsd = cxl_switch_decoder_alloc(port, target_count);
+ if (IS_ERR(cxlsd)) {
+ dev_warn(&port->dev,
+ "Failed to allocate the decoder\n");
+ return PTR_ERR(cxlsd);
+ }
+ cxld = &cxlsd->cxld;
+ } else {
+ struct cxl_endpoint_decoder *cxled;
+
+ cxled = cxl_endpoint_decoder_alloc(port);
+
+ if (IS_ERR(cxled)) {
+ dev_warn(&port->dev,
+ "Failed to allocate the decoder\n");
+ return PTR_ERR(cxled);
+ }
+ cxld = &cxled->cxld;
+ }
+
+ ctx.target_map = cxld->target_map;
+
+ mock_init_hdm_decoder(cxld);
+
+ if (target_count) {
+ rc = device_for_each_child(port->uport_dev, &ctx,
+ map_targets);
+ if (rc) {
+ put_device(&cxld->dev);
+ return rc;
+ }
+ }
+
+ rc = cxl_decoder_add_locked(cxld);
+ if (rc) {
+ put_device(&cxld->dev);
+ dev_err(&port->dev, "Failed to add decoder\n");
+ return rc;
+ }
+
+ rc = cxl_decoder_autoremove(&port->dev, cxld);
+ if (rc)
+ return rc;
+ dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+ }
+
+ return 0;
+}
+
+static int __mock_cxl_decoders_setup(struct cxl_port *port)
+{
+ struct cxl_hdm *cxlhdm;
+
+ cxlhdm = mock_cxl_setup_hdm(port, NULL);
+ if (IS_ERR(cxlhdm)) {
+ if (PTR_ERR(cxlhdm) != -ENODEV)
+ dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+ return PTR_ERR(cxlhdm);
+ }
+
+ return mock_cxl_enumerate_decoders(cxlhdm, NULL);
+}
+
+static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ if (is_cxl_root(port) || is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+ if (!is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int get_port_array(struct cxl_port *port,
+ struct platform_device ***port_array,
+ int *port_array_size)
+{
+ struct platform_device **array;
+ int array_size;
+
+ if (port->depth == 1) {
+ if (is_multi_bridge(port->uport_dev)) {
+ array_size = ARRAY_SIZE(cxl_root_port);
+ array = cxl_root_port;
+ } else if (is_single_bridge(port->uport_dev)) {
+ array_size = ARRAY_SIZE(cxl_root_single);
+ array = cxl_root_single;
+ } else {
+ dev_dbg(&port->dev, "%s: unknown bridge type\n",
+ dev_name(port->uport_dev));
+ return -ENXIO;
+ }
+ } else if (port->depth == 2) {
+ struct cxl_port *parent = to_cxl_port(port->dev.parent);
+
+ if (is_multi_bridge(parent->uport_dev)) {
+ array_size = ARRAY_SIZE(cxl_switch_dport);
+ array = cxl_switch_dport;
+ } else if (is_single_bridge(parent->uport_dev)) {
+ array_size = ARRAY_SIZE(cxl_swd_single);
+ array = cxl_swd_single;
+ } else {
+ dev_dbg(&port->dev, "%s: unknown bridge type\n",
+ dev_name(port->uport_dev));
+ return -ENXIO;
+ }
+ } else {
+ dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n",
+ port->depth);
+ return -ENXIO;
+ }
+
+ *port_array = array;
+ *port_array_size = array_size;
+
+ return 0;
+}
+
+static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ struct platform_device **array;
+ int rc, i, array_size;
+
+ rc = get_port_array(port, &array, &array_size);
+ if (rc)
+ return ERR_PTR(rc);
+
+ for (i = 0; i < array_size; i++) {
+ struct platform_device *pdev = array[i];
+
+ if (pdev->dev.parent != port->uport_dev) {
+ dev_dbg(&port->dev, "%s: mismatch parent %s\n",
+ dev_name(port->uport_dev),
+ dev_name(pdev->dev.parent));
+ continue;
+ }
+
+ if (&pdev->dev != dport_dev)
+ continue;
+
+ return devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+ CXL_RESOURCE_NONE);
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+/*
+ * Faking the cxl_dpa_perf for the memdev when appropriate.
+ */
+static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
+ struct cxl_dpa_perf *dpa_perf)
+{
+ dpa_perf->qos_class = FAKE_QTG_ID;
+ dpa_perf->dpa_range = *range;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
+}
+
+static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+ struct cxl_root *cxl_root __free(put_cxl_root) =
+ find_cxl_root(port);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
+
+ if (!cxl_root)
+ return;
+
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
+
+ dpa_perf_setup(port, &range, perf);
+ }
+
+ cxl_memdev_update_perf(cxlmd);
+
+ /*
+ * This function is here to only test the topology iterator. It serves
+ * no other purpose.
+ */
+ cxl_endpoint_get_perf_coordinates(port, ep_c);
+}
+
+static struct cxl_mock_ops cxl_mock_ops = {
+ .is_mock_adev = is_mock_adev,
+ .is_mock_bridge = is_mock_bridge,
+ .is_mock_bus = is_mock_bus,
+ .is_mock_port = is_mock_port,
+ .is_mock_dev = is_mock_dev,
+ .acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
+ .acpi_evaluate_integer = mock_acpi_evaluate_integer,
+ .acpi_pci_find_root = mock_acpi_pci_find_root,
+ .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
+ .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
+ .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
+ .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
+ .hmat_get_extended_linear_cache_size =
+ mock_hmat_get_extended_linear_cache_size,
+ .list = LIST_HEAD_INIT(cxl_mock_ops.list),
+};
+
+static void mock_companion(struct acpi_device *adev, struct device *dev)
+{
+ device_initialize(&adev->dev);
+ fwnode_init(&adev->fwnode, NULL);
+ dev->fwnode = &adev->fwnode;
+ adev->fwnode.dev = dev;
+}
+
+#ifndef SZ_64G
+#define SZ_64G (SZ_32G * 2)
+#endif
+
+static __init int cxl_rch_topo_init(void)
+{
+ int rc, i;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) {
+ int idx = NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + i;
+ struct acpi_device *adev = &host_bridge[idx];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_host_bridge", idx);
+ if (!pdev)
+ goto err_bridge;
+
+ mock_companion(adev, &pdev->dev);
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_bridge;
+ }
+
+ cxl_rch[i] = pdev;
+ mock_pci_bus[idx].bridge = &pdev->dev;
+ rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
+ "firmware_node");
+ if (rc)
+ goto err_bridge;
+ }
+
+ return 0;
+
+err_bridge:
+ for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_rch[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "firmware_node");
+ platform_device_unregister(cxl_rch[i]);
+ }
+
+ return rc;
+}
+
+static void cxl_rch_topo_exit(void)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_rch[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "firmware_node");
+ platform_device_unregister(cxl_rch[i]);
+ }
+}
+
+static __init int cxl_single_topo_init(void)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) {
+ struct acpi_device *adev =
+ &host_bridge[NR_CXL_HOST_BRIDGES + i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_host_bridge",
+ NR_CXL_HOST_BRIDGES + i);
+ if (!pdev)
+ goto err_bridge;
+
+ mock_companion(adev, &pdev->dev);
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_bridge;
+ }
+
+ cxl_hb_single[i] = pdev;
+ mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev;
+ rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
+ "physical_node");
+ if (rc)
+ goto err_bridge;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) {
+ struct platform_device *bridge =
+ cxl_hb_single[i % ARRAY_SIZE(cxl_hb_single)];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_root_port",
+ NR_MULTI_ROOT + i);
+ if (!pdev)
+ goto err_port;
+ pdev->dev.parent = &bridge->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_port;
+ }
+ cxl_root_single[i] = pdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) {
+ struct platform_device *root_port = cxl_root_single[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_switch_uport",
+ NR_MULTI_ROOT + i);
+ if (!pdev)
+ goto err_uport;
+ pdev->dev.parent = &root_port->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_uport;
+ }
+ cxl_swu_single[i] = pdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) {
+ struct platform_device *uport =
+ cxl_swu_single[i % ARRAY_SIZE(cxl_swu_single)];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_switch_dport",
+ i + NR_MEM_MULTI);
+ if (!pdev)
+ goto err_dport;
+ pdev->dev.parent = &uport->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_dport;
+ }
+ cxl_swd_single[i] = pdev;
+ }
+
+ return 0;
+
+err_dport:
+ for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_swd_single[i]);
+err_uport:
+ for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_swu_single[i]);
+err_port:
+ for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_root_single[i]);
+err_bridge:
+ for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_hb_single[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "physical_node");
+ platform_device_unregister(cxl_hb_single[i]);
+ }
+
+ return rc;
+}
+
+static void cxl_single_topo_exit(void)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_swd_single[i]);
+ for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_swu_single[i]);
+ for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_root_single[i]);
+ for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_hb_single[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "physical_node");
+ platform_device_unregister(cxl_hb_single[i]);
+ }
+}
+
+static void cxl_mem_exit(void)
+{
+ int i;
+
+ for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_rcd[i]);
+ for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_mem_single[i]);
+ for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_mem[i]);
+}
+
+static int cxl_mem_init(void)
+{
+ int i, rc;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) {
+ struct platform_device *dport = cxl_switch_dport[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_mem", i);
+ if (!pdev)
+ goto err_mem;
+ pdev->dev.parent = &dport->dev;
+ set_dev_node(&pdev->dev, i % 2);
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_mem;
+ }
+ cxl_mem[i] = pdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) {
+ struct platform_device *dport = cxl_swd_single[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i);
+ if (!pdev)
+ goto err_single;
+ pdev->dev.parent = &dport->dev;
+ set_dev_node(&pdev->dev, i % 2);
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_single;
+ }
+ cxl_mem_single[i] = pdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) {
+ int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i;
+ struct platform_device *rch = cxl_rch[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_rcd", idx);
+ if (!pdev)
+ goto err_rcd;
+ pdev->dev.parent = &rch->dev;
+ set_dev_node(&pdev->dev, i % 2);
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_rcd;
+ }
+ cxl_rcd[i] = pdev;
+ }
+
+ return 0;
+
+err_rcd:
+ for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_rcd[i]);
+err_single:
+ for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_mem_single[i]);
+err_mem:
+ for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_mem[i]);
+ return rc;
+}
+
+static __init int cxl_test_init(void)
+{
+ int rc, i;
+ struct range mappable;
+
+ cxl_acpi_test();
+ cxl_core_test();
+ cxl_mem_test();
+ cxl_pmem_test();
+ cxl_port_test();
+
+ register_cxl_mock_ops(&cxl_mock_ops);
+
+ cxl_mock_pool = gen_pool_create(ilog2(SZ_2M), NUMA_NO_NODE);
+ if (!cxl_mock_pool) {
+ rc = -ENOMEM;
+ goto err_gen_pool_create;
+ }
+ mappable = mhp_get_pluggable_range(true);
+
+ rc = gen_pool_add(cxl_mock_pool,
+ min(iomem_resource.end + 1 - SZ_64G,
+ mappable.end + 1 - SZ_64G),
+ SZ_64G, NUMA_NO_NODE);
+ if (rc)
+ goto err_gen_pool_add;
+
+ if (interleave_arithmetic == 1) {
+ cfmws_start = CFMWS_XOR_ARRAY_START;
+ cfmws_end = CFMWS_XOR_ARRAY_END;
+ } else {
+ cfmws_start = CFMWS_MOD_ARRAY_START;
+ cfmws_end = CFMWS_MOD_ARRAY_END;
+ }
+
+ rc = populate_cedt();
+ if (rc)
+ goto err_populate;
+
+ for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) {
+ struct acpi_device *adev = &host_bridge[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_host_bridge", i);
+ if (!pdev)
+ goto err_bridge;
+
+ mock_companion(adev, &pdev->dev);
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_bridge;
+ }
+
+ cxl_host_bridge[i] = pdev;
+ mock_pci_bus[i].bridge = &pdev->dev;
+ rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
+ "physical_node");
+ if (rc)
+ goto err_bridge;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
+ struct platform_device *bridge =
+ cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_root_port", i);
+ if (!pdev)
+ goto err_port;
+ pdev->dev.parent = &bridge->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_port;
+ }
+ cxl_root_port[i] = pdev;
+ }
+
+ BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port));
+ for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) {
+ struct platform_device *root_port = cxl_root_port[i];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_switch_uport", i);
+ if (!pdev)
+ goto err_uport;
+ pdev->dev.parent = &root_port->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_uport;
+ }
+ cxl_switch_uport[i] = pdev;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) {
+ struct platform_device *uport =
+ cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)];
+ struct platform_device *pdev;
+
+ pdev = platform_device_alloc("cxl_switch_dport", i);
+ if (!pdev)
+ goto err_dport;
+ pdev->dev.parent = &uport->dev;
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ platform_device_put(pdev);
+ goto err_dport;
+ }
+ cxl_switch_dport[i] = pdev;
+ }
+
+ rc = cxl_single_topo_init();
+ if (rc)
+ goto err_dport;
+
+ rc = cxl_rch_topo_init();
+ if (rc)
+ goto err_single;
+
+ cxl_acpi = platform_device_alloc("cxl_acpi", 0);
+ if (!cxl_acpi)
+ goto err_rch;
+
+ mock_companion(&acpi0017_mock, &cxl_acpi->dev);
+ acpi0017_mock.dev.bus = &platform_bus_type;
+
+ rc = platform_device_add(cxl_acpi);
+ if (rc)
+ goto err_root;
+
+ rc = cxl_mem_init();
+ if (rc)
+ goto err_root;
+
+ return 0;
+
+err_root:
+ platform_device_put(cxl_acpi);
+err_rch:
+ cxl_rch_topo_exit();
+err_single:
+ cxl_single_topo_exit();
+err_dport:
+ for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_switch_dport[i]);
+err_uport:
+ for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_switch_uport[i]);
+err_port:
+ for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_root_port[i]);
+err_bridge:
+ for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_host_bridge[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "physical_node");
+ platform_device_unregister(cxl_host_bridge[i]);
+ }
+err_populate:
+ depopulate_all_mock_resources();
+err_gen_pool_add:
+ gen_pool_destroy(cxl_mock_pool);
+err_gen_pool_create:
+ unregister_cxl_mock_ops(&cxl_mock_ops);
+ return rc;
+}
+
+static __exit void cxl_test_exit(void)
+{
+ int i;
+
+ cxl_mem_exit();
+ platform_device_unregister(cxl_acpi);
+ cxl_rch_topo_exit();
+ cxl_single_topo_exit();
+ for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_switch_dport[i]);
+ for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_switch_uport[i]);
+ for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
+ platform_device_unregister(cxl_root_port[i]);
+ for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+ struct platform_device *pdev = cxl_host_bridge[i];
+
+ if (!pdev)
+ continue;
+ sysfs_remove_link(&pdev->dev.kobj, "physical_node");
+ platform_device_unregister(cxl_host_bridge[i]);
+ }
+ depopulate_all_mock_resources();
+ gen_pool_destroy(cxl_mock_pool);
+ unregister_cxl_mock_ops(&cxl_mock_ops);
+}
+
+module_param(interleave_arithmetic, int, 0444);
+MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
+module_param(extended_linear_cache, bool, 0444);
+MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support");
+module_init(cxl_test_init);
+module_exit(cxl_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: setup module");
+MODULE_IMPORT_NS("ACPI");
+MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
new file mode 100644
index 000000000000..2200ae21795c
--- /dev/null
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2025 Intel Corporation. All rights reserved.
+
+/* Preface all log entries with "cxl_translate" */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+/* Maximum number of test vectors and entry length */
+#define MAX_TABLE_ENTRIES 128
+#define MAX_ENTRY_LEN 128
+
+/* Expected number of parameters in each test vector */
+#define EXPECTED_PARAMS 7
+
+/* Module parameters for test vectors */
+static char *table[MAX_TABLE_ENTRIES];
+static int table_num;
+
+/* Interleave Arithmetic */
+#define MODULO_MATH 0
+#define XOR_MATH 1
+
+/*
+ * XOR mapping configuration
+ * The test data sets all use the same set of xormaps. When additional
+ * data sets arrive for validation, this static setup will need to
+ * be changed to accept xormaps as additional parameters.
+ */
+struct cxl_cxims_data *cximsd;
+static u64 xormaps[] = {
+ 0x2020900,
+ 0x4041200,
+ 0x1010400,
+ 0x800,
+};
+
+static int nr_maps = ARRAY_SIZE(xormaps);
+
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+ [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+/**
+ * to_hpa - calculate an HPA offset from a DPA offset and position
+ *
+ * dpa_offset: device physical address offset
+ * pos: devices position in interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: host physical address offset
+ */
+static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
+ u8 math)
+{
+ u64 hpa_offset;
+
+ /* Calculate base HPA offset from DPA and position */
+ hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return hpa_offset;
+}
+
+/**
+ * to_dpa - translate an HPA offset to DPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: device physical address offset
+ */
+static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ offset =
+ cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return cxl_calculate_dpa_offset(offset, r_eiw, r_eig);
+}
+
+/**
+ * to_pos - extract an interleave position from an HPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: devices position in region interleave
+ */
+static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ /* Reverse XOR mapping if specified */
+ if (math == XOR_MATH)
+ offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+
+ return cxl_calculate_position(offset, r_eiw, r_eig);
+}
+
+/**
+ * run_translation_test - execute forward and reverse translations
+ *
+ * @dpa: device physical address
+ * @pos: expected position in region interleave
+ * @r_eiw: region encoded interleave ways
+ * @r_eig: region encoded interleave granularity
+ * @hb_ways: host bridge interleave ways
+ * @math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * @expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig,
+ u8 hb_ways, int math, u64 expect_hpa)
+{
+ u64 translated_spa, reverse_dpa;
+ int reverse_pos;
+
+ /* Test Device to Host translation: DPA + POS -> SPA */
+ translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math);
+ if (translated_spa != expect_hpa) {
+ pr_err("Device to host failed: expected HPA %llu, got %llu\n",
+ expect_hpa, translated_spa);
+ return -1;
+ }
+
+ /* Test Host to Device DPA translation: SPA -> DPA */
+ reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_dpa != dpa) {
+ pr_err("Host to Device DPA failed: expected %llu, got %llu\n",
+ dpa, reverse_dpa);
+ return -1;
+ }
+
+ /* Test Host to Device Position translation: SPA -> POS */
+ reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_pos != pos) {
+ pr_err("Position lookup failed: expected %d, got %d\n", pos,
+ reverse_pos);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * parse_test_vector - parse a single test vector string
+ *
+ * entry: test vector string to parse
+ * dpa: device physical address
+ * pos: expected position in region interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw,
+ u16 *r_eig, u8 *hb_ways, int *math,
+ u64 *expect_hpa)
+{
+ unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways;
+ int parsed;
+
+ parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw,
+ &tmp_r_eig, &tmp_hb_ways, math, expect_hpa);
+
+ if (parsed != EXPECTED_PARAMS) {
+ pr_err("Parse error: expected %d parameters, got %d in '%s'\n",
+ EXPECTED_PARAMS, parsed, entry);
+ return -EINVAL;
+ }
+ if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) {
+ pr_err("Parameter overflow in entry: '%s'\n", entry);
+ return -ERANGE;
+ }
+ if (*math != MODULO_MATH && *math != XOR_MATH) {
+ pr_err("Invalid math type %d in entry: '%s'\n", *math, entry);
+ return -EINVAL;
+ }
+ *r_eiw = tmp_r_eiw;
+ *r_eig = tmp_r_eig;
+ *hb_ways = tmp_hb_ways;
+
+ return 0;
+}
+
+/*
+ * setup_xor_mapping - Initialize XOR mapping data structure
+ *
+ * The test data sets all use the same HBIG so we can use one set
+ * of xormaps, and set the number to apply based on HBIW before
+ * calling cxl_do_xormap_calc().
+ *
+ * When additional data sets arrive for validation with different
+ * HBIG's this static setup will need to be updated.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int setup_xor_mapping(void)
+{
+ if (nr_maps <= 0)
+ return -EINVAL;
+
+ cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL);
+ if (!cximsd)
+ return -ENOMEM;
+
+ memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps));
+ cximsd->nr_maps = nr_maps;
+
+ return 0;
+}
+
+static int test_random_params(void)
+{
+ u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 };
+ u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 };
+ int i, ways, pos, reverse_pos;
+ u64 dpa, hpa, reverse_dpa;
+ int iterations = 10000;
+ int failures = 0;
+
+ for (i = 0; i < iterations; i++) {
+ /* Generate valid random parameters for eiw, eig, pos, dpa */
+ u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)];
+ u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)];
+
+ eiw_to_ways(eiw, &ways);
+ pos = get_random_u32() % ways;
+ dpa = get_random_u64() >> 12;
+
+ hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
+ reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+ reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+
+ if (reverse_dpa != dpa || reverse_pos != pos) {
+ pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+ i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
+ eig);
+
+ if (failures++ > 10) {
+ pr_err("test random too many failures, stop\n");
+ break;
+ }
+ }
+ }
+ pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+struct param_test {
+ u8 eiw;
+ u16 eig;
+ int pos;
+ bool expect; /* true: expect pass, false: expect fail */
+ const char *desc;
+};
+
+static struct param_test param_tests[] = {
+ { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" },
+ { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" },
+ { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" },
+ { 0x1, 0, 0, true, "2-way, eig=0, pos=0" },
+ { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" },
+ { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" },
+ { 0x2, 0, 0, true, "4-way, eig=0, pos=0" },
+ { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" },
+ { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" },
+ { 0x3, 0, 0, true, "8-way, eig=0, pos=0" },
+ { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" },
+ { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" },
+ { 0x4, 0, 0, true, "16-way, eig=0, pos=0" },
+ { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" },
+ { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" },
+ { 0x8, 0, 0, true, "3-way, eig=0, pos=0" },
+ { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" },
+ { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" },
+ { 0x9, 0, 0, true, "6-way, eig=0, pos=0" },
+ { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" },
+ { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" },
+ { 0xA, 0, 0, true, "12-way, eig=0, pos=0" },
+ { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" },
+ { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" },
+ { 0x5, 0, 0, false, "invalid eiw=5" },
+ { 0x7, 0, 0, false, "invalid eiw=7" },
+ { 0xB, 0, 0, false, "invalid eiw=0xB" },
+ { 0xFF, 0, 0, false, "invalid eiw=0xFF" },
+ { 0x1, 7, 0, false, "invalid eig=7 (out of range)" },
+ { 0x2, 0x10, 0, false, "invalid eig=0x10" },
+ { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" },
+ { 0x1, 0, -1, false, "pos < 0" },
+ { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" },
+ { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" },
+ { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" },
+ { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" },
+ { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" },
+ { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" },
+ { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" },
+};
+
+static int test_cxl_validate_translation_params(void)
+{
+ int i, rc, failures = 0;
+ bool valid;
+
+ for (i = 0; i < ARRAY_SIZE(param_tests); i++) {
+ struct param_test *t = &param_tests[i];
+
+ rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos);
+ valid = (rc == 0);
+
+ if (valid != t->expect) {
+ pr_err("test params failed: %s\n", t->desc);
+ failures++;
+ }
+ }
+ pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * cxl_translate_init
+ *
+ * Run the internal validation tests when no params are passed.
+ * Otherwise, parse the parameters (test vectors), and kick off
+ * the translation test.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int __init cxl_translate_init(void)
+{
+ int rc, i;
+
+ /* If no tables are passed, validate module params only */
+ if (table_num == 0) {
+ pr_info("Internal validation test start...\n");
+ rc = test_cxl_validate_translation_params();
+ if (rc)
+ return rc;
+
+ rc = test_random_params();
+ if (rc)
+ return rc;
+
+ pr_info("Internal validation test completed successfully\n");
+
+ return 0;
+ }
+
+ pr_info("CXL translate test module loaded with %d test vectors\n",
+ table_num);
+
+ rc = setup_xor_mapping();
+ if (rc)
+ return rc;
+
+ /* Process each test vector */
+ for (i = 0; i < table_num; i++) {
+ u64 dpa, expect_spa;
+ int pos, math;
+ u8 r_eiw, hb_ways;
+ u16 r_eig;
+
+ pr_debug("Processing test vector %d: '%s'\n", i, table[i]);
+
+ /* Parse the test vector */
+ rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig,
+ &hb_ways, &math, &expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " Failed to parse test vector '%s'\n",
+ i, table[i]);
+ continue;
+ }
+ /* Run the translation test */
+ rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math,
+ expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n",
+ i, dpa, pos, r_eiw, r_eig, hb_ways,
+ (math == XOR_MATH) ? "XOR" : "MODULO",
+ expect_spa);
+ } else {
+ pr_info("CXL Translate Test %d: PASS\n", i);
+ }
+ }
+
+ kfree(cximsd);
+ pr_info("CXL translate test completed\n");
+
+ return 0;
+}
+
+static void __exit cxl_translate_exit(void)
+{
+ pr_info("CXL translate test module unloaded\n");
+}
+
+module_param_array(table, charp, &table_num, 0444);
+MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cxl_test: cxl address translation test module");
+MODULE_IMPORT_NS("CXL");
+
+module_init(cxl_translate_init);
+module_exit(cxl_translate_exit);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
new file mode 100644
index 000000000000..176dcde570cd
--- /dev/null
+++ b/tools/testing/cxl/test/mem.c
@@ -0,0 +1,1895 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2021 Intel Corporation. All rights reserved.
+
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/bits.h>
+#include <cxl/mailbox.h>
+#include <linux/unaligned.h>
+#include <crypto/sha2.h>
+#include <cxlmem.h>
+
+#include "trace.h"
+
+#define LSA_SIZE SZ_128K
+#define FW_SIZE SZ_64M
+#define FW_SLOTS 3
+#define DEV_SIZE SZ_2G
+#define EFFECT(x) (1U << x)
+
+#define MOCK_INJECT_DEV_MAX 8
+#define MOCK_INJECT_TEST_MAX 128
+
+static unsigned int poison_inject_dev_max = MOCK_INJECT_DEV_MAX;
+
+enum cxl_command_effects {
+ CONF_CHANGE_COLD_RESET = 0,
+ CONF_CHANGE_IMMEDIATE,
+ DATA_CHANGE_IMMEDIATE,
+ POLICY_CHANGE_IMMEDIATE,
+ LOG_CHANGE_IMMEDIATE,
+ SECURITY_CHANGE_IMMEDIATE,
+ BACKGROUND_OP,
+ SECONDARY_MBOX_SUPPORTED,
+};
+
+#define CXL_CMD_EFFECT_NONE cpu_to_le16(0)
+
+static struct cxl_cel_entry mock_cel[] = {
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_LSA),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_PARTITION_INFO),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE) |
+ EFFECT(DATA_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE),
+ .effect = POLICY_CHANGE_IMMEDIATE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_INJECT_POISON),
+ .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_CLEAR_POISON),
+ .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FW_INFO),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_TRANSFER_FW),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) |
+ EFFECT(BACKGROUND_OP)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_ACTIVATE_FW),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) |
+ EFFECT(CONF_CHANGE_IMMEDIATE)),
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SANITIZE),
+ .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE) |
+ EFFECT(SECURITY_CHANGE_IMMEDIATE) |
+ EFFECT(BACKGROUND_OP)),
+ },
+};
+
+/* See CXL 2.0 Table 181 Get Health Info Output Payload */
+struct cxl_mbox_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 ext_status;
+ u8 life_used;
+ __le16 temperature;
+ __le32 dirty_shutdowns;
+ __le32 volatile_errors;
+ __le32 pmem_errors;
+} __packed;
+
+static struct {
+ struct cxl_mbox_get_supported_logs gsl;
+ struct cxl_gsl_entry entry;
+} mock_gsl_payload = {
+ .gsl = {
+ .entries = cpu_to_le16(1),
+ },
+ .entry = {
+ .uuid = DEFINE_CXL_CEL_UUID,
+ .size = cpu_to_le32(sizeof(mock_cel)),
+ },
+};
+
+#define PASS_TRY_LIMIT 3
+
+#define CXL_TEST_EVENT_CNT_MAX 15
+
+/* Set a number of events to return at a time for simulation. */
+#define CXL_TEST_EVENT_RET_MAX 4
+
+struct mock_event_log {
+ u16 clear_idx;
+ u16 cur_idx;
+ u16 nr_events;
+ u16 nr_overflow;
+ u16 overflow_reset;
+ struct cxl_event_record_raw *events[CXL_TEST_EVENT_CNT_MAX];
+};
+
+struct mock_event_store {
+ struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX];
+ u32 ev_status;
+};
+
+struct vendor_test_feat {
+ __le32 data;
+} __packed;
+
+struct cxl_mockmem_data {
+ void *lsa;
+ void *fw;
+ int fw_slot;
+ int fw_staged;
+ size_t fw_size;
+ u32 security_state;
+ u8 user_pass[NVDIMM_PASSPHRASE_LEN];
+ u8 master_pass[NVDIMM_PASSPHRASE_LEN];
+ int user_limit;
+ int master_limit;
+ struct mock_event_store mes;
+ struct cxl_memdev_state *mds;
+ u8 event_buf[SZ_4K];
+ u64 timestamp;
+ unsigned long sanitize_timeout;
+ struct vendor_test_feat test_feat;
+ u8 shutdown_state;
+};
+
+static struct mock_event_log *event_find_log(struct device *dev, int log_type)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+
+ if (log_type >= CXL_EVENT_TYPE_MAX)
+ return NULL;
+ return &mdata->mes.mock_logs[log_type];
+}
+
+static struct cxl_event_record_raw *event_get_current(struct mock_event_log *log)
+{
+ return log->events[log->cur_idx];
+}
+
+static void event_reset_log(struct mock_event_log *log)
+{
+ log->cur_idx = 0;
+ log->clear_idx = 0;
+ log->nr_overflow = log->overflow_reset;
+}
+
+/* Handle can never be 0 use 1 based indexing for handle */
+static u16 event_get_clear_handle(struct mock_event_log *log)
+{
+ return log->clear_idx + 1;
+}
+
+/* Handle can never be 0 use 1 based indexing for handle */
+static __le16 event_get_cur_event_handle(struct mock_event_log *log)
+{
+ u16 cur_handle = log->cur_idx + 1;
+
+ return cpu_to_le16(cur_handle);
+}
+
+static bool event_log_empty(struct mock_event_log *log)
+{
+ return log->cur_idx == log->nr_events;
+}
+
+static void mes_add_event(struct mock_event_store *mes,
+ enum cxl_event_log_type log_type,
+ struct cxl_event_record_raw *event)
+{
+ struct mock_event_log *log;
+
+ if (WARN_ON(log_type >= CXL_EVENT_TYPE_MAX))
+ return;
+
+ log = &mes->mock_logs[log_type];
+
+ if ((log->nr_events + 1) > CXL_TEST_EVENT_CNT_MAX) {
+ log->nr_overflow++;
+ log->overflow_reset = log->nr_overflow;
+ return;
+ }
+
+ log->events[log->nr_events] = event;
+ log->nr_events++;
+}
+
+/*
+ * Vary the number of events returned to simulate events occuring while the
+ * logs are being read.
+ */
+static atomic_t event_counter = ATOMIC_INIT(0);
+
+static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_get_event_payload *pl;
+ struct mock_event_log *log;
+ int ret_limit;
+ u8 log_type;
+ int i;
+
+ if (cmd->size_in != sizeof(log_type))
+ return -EINVAL;
+
+ /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */
+ ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1;
+
+ if (cmd->size_out < struct_size(pl, records, ret_limit))
+ return -EINVAL;
+
+ log_type = *((u8 *)cmd->payload_in);
+ if (log_type >= CXL_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ memset(cmd->payload_out, 0, struct_size(pl, records, 0));
+
+ log = event_find_log(dev, log_type);
+ if (!log || event_log_empty(log))
+ return 0;
+
+ pl = cmd->payload_out;
+
+ for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
+ memcpy(&pl->records[i], event_get_current(log),
+ sizeof(pl->records[i]));
+ pl->records[i].event.generic.hdr.handle =
+ event_get_cur_event_handle(log);
+ log->cur_idx++;
+ }
+
+ cmd->size_out = struct_size(pl, records, i);
+ pl->record_count = cpu_to_le16(i);
+ if (!event_log_empty(log))
+ pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
+
+ if (log->nr_overflow) {
+ u64 ns;
+
+ pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
+ pl->overflow_err_count = cpu_to_le16(log->nr_overflow);
+ ns = ktime_get_real_ns();
+ ns -= 5000000000; /* 5s ago */
+ pl->first_overflow_timestamp = cpu_to_le64(ns);
+ ns = ktime_get_real_ns();
+ ns -= 1000000000; /* 1s ago */
+ pl->last_overflow_timestamp = cpu_to_le64(ns);
+ }
+
+ return 0;
+}
+
+static int mock_clear_event(struct device *dev, struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_clear_event_payload *pl = cmd->payload_in;
+ struct mock_event_log *log;
+ u8 log_type = pl->event_log;
+ u16 handle;
+ int nr;
+
+ if (log_type >= CXL_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ log = event_find_log(dev, log_type);
+ if (!log)
+ return 0; /* No mock data in this log */
+
+ /*
+ * This check is technically not invalid per the specification AFAICS.
+ * (The host could 'guess' handles and clear them in order).
+ * However, this is not good behavior for the host so test it.
+ */
+ if (log->clear_idx + pl->nr_recs > log->cur_idx) {
+ dev_err(dev,
+ "Attempting to clear more events than returned!\n");
+ return -EINVAL;
+ }
+
+ /* Check handle order prior to clearing events */
+ for (nr = 0, handle = event_get_clear_handle(log);
+ nr < pl->nr_recs;
+ nr++, handle++) {
+ if (handle != le16_to_cpu(pl->handles[nr])) {
+ dev_err(dev, "Clearing events out of order\n");
+ return -EINVAL;
+ }
+ }
+
+ if (log->nr_overflow)
+ log->nr_overflow = 0;
+
+ /* Clear events */
+ log->clear_idx += pl->nr_recs;
+ return 0;
+}
+
+static void cxl_mock_event_trigger(struct device *dev)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ struct mock_event_store *mes = &mdata->mes;
+ int i;
+
+ for (i = CXL_EVENT_TYPE_INFO; i < CXL_EVENT_TYPE_MAX; i++) {
+ struct mock_event_log *log;
+
+ log = event_find_log(dev, i);
+ if (log)
+ event_reset_log(log);
+ }
+
+ cxl_mem_get_event_records(mdata->mds, mes->ev_status);
+}
+
+struct cxl_event_record_raw maint_needed = {
+ .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB,
+ 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5),
+ .event.generic = {
+ .hdr = {
+ .length = sizeof(struct cxl_event_record_raw),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0xa5b6),
+ },
+ .data = { 0xDE, 0xAD, 0xBE, 0xEF },
+ },
+};
+
+struct cxl_event_record_raw hardware_replace = {
+ .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E,
+ 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5),
+ .event.generic = {
+ .hdr = {
+ .length = sizeof(struct cxl_event_record_raw),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0xb6a5),
+ },
+ .data = { 0xDE, 0xAD, 0xBE, 0xEF },
+ },
+};
+
+struct cxl_test_gen_media {
+ uuid_t id;
+ struct cxl_event_gen_media rec;
+} __packed;
+
+struct cxl_test_gen_media gen_media = {
+ .id = CXL_EVENT_GEN_MEDIA_UUID,
+ .rec = {
+ .media_hdr = {
+ .hdr = {
+ .length = sizeof(struct cxl_test_gen_media),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0),
+ },
+ .phys_addr = cpu_to_le64(0x2000),
+ .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT,
+ .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,
+ .transaction_type = CXL_GMER_TRANS_HOST_WRITE,
+ /* .validity_flags = <set below> */
+ .channel = 1,
+ .rank = 30,
+ },
+ .component_id = { 0x3, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .cme_threshold_ev_flags = 3,
+ .cme_count = { 33, 0, 0 },
+ .sub_type = 0x2,
+ },
+};
+
+struct cxl_test_dram {
+ uuid_t id;
+ struct cxl_event_dram rec;
+} __packed;
+
+struct cxl_test_dram dram = {
+ .id = CXL_EVENT_DRAM_UUID,
+ .rec = {
+ .media_hdr = {
+ .hdr = {
+ .length = sizeof(struct cxl_test_dram),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0),
+ },
+ .phys_addr = cpu_to_le64(0x8000),
+ .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT,
+ .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR,
+ .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB,
+ /* .validity_flags = <set below> */
+ .channel = 1,
+ },
+ .bank_group = 5,
+ .bank = 2,
+ .column = {0xDE, 0xAD},
+ .component_id = { 0x1, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .sub_channel = 8,
+ .cme_threshold_ev_flags = 2,
+ .cvme_count = { 14, 0, 0 },
+ .sub_type = 0x5,
+ },
+};
+
+struct cxl_test_mem_module {
+ uuid_t id;
+ struct cxl_event_mem_module rec;
+} __packed;
+
+struct cxl_test_mem_module mem_module = {
+ .id = CXL_EVENT_MEM_MODULE_UUID,
+ .rec = {
+ .hdr = {
+ .length = sizeof(struct cxl_test_mem_module),
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0),
+ },
+ .event_type = CXL_MMER_TEMP_CHANGE,
+ .info = {
+ .health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED,
+ .media_status = CXL_DHI_MS_ALL_DATA_LOST,
+ .add_status = (CXL_DHI_AS_CRITICAL << 2) |
+ (CXL_DHI_AS_WARNING << 4) |
+ (CXL_DHI_AS_WARNING << 5),
+ .device_temp = { 0xDE, 0xAD},
+ .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef },
+ .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
+ .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
+ },
+ /* .validity_flags = <set below> */
+ .component_id = { 0x2, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+ .event_sub_type = 0x3,
+ },
+};
+
+static int mock_set_timestamp(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev);
+ struct cxl_mbox_set_timestamp_in *ts = cmd->payload_in;
+
+ if (cmd->size_in != sizeof(*ts))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ mdata->timestamp = le64_to_cpu(ts->timestamp);
+ return 0;
+}
+
+static void cxl_mock_add_event_logs(struct mock_event_store *mes)
+{
+ put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK |
+ CXL_GMER_VALID_COMPONENT | CXL_GMER_VALID_COMPONENT_ID_FORMAT,
+ &gen_media.rec.media_hdr.validity_flags);
+
+ put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP |
+ CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN | CXL_DER_VALID_SUB_CHANNEL |
+ CXL_DER_VALID_COMPONENT | CXL_DER_VALID_COMPONENT_ID_FORMAT,
+ &dram.rec.media_hdr.validity_flags);
+
+ put_unaligned_le16(CXL_MMER_VALID_COMPONENT | CXL_MMER_VALID_COMPONENT_ID_FORMAT,
+ &mem_module.rec.validity_flags);
+
+ mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed);
+ mes_add_event(mes, CXL_EVENT_TYPE_INFO,
+ (struct cxl_event_record_raw *)&gen_media);
+ mes_add_event(mes, CXL_EVENT_TYPE_INFO,
+ (struct cxl_event_record_raw *)&mem_module);
+ mes->ev_status |= CXLDEV_EVENT_STATUS_INFO;
+
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &maint_needed);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
+ (struct cxl_event_record_raw *)&dram);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
+ (struct cxl_event_record_raw *)&gen_media);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
+ (struct cxl_event_record_raw *)&mem_module);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
+ (struct cxl_event_record_raw *)&dram);
+ /* Overflow this log */
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
+ mes->ev_status |= CXLDEV_EVENT_STATUS_FAIL;
+
+ mes_add_event(mes, CXL_EVENT_TYPE_FATAL, &hardware_replace);
+ mes_add_event(mes, CXL_EVENT_TYPE_FATAL,
+ (struct cxl_event_record_raw *)&dram);
+ mes->ev_status |= CXLDEV_EVENT_STATUS_FATAL;
+}
+
+static int mock_gsl(struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_out < sizeof(mock_gsl_payload))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &mock_gsl_payload, sizeof(mock_gsl_payload));
+ cmd->size_out = sizeof(mock_gsl_payload);
+
+ return 0;
+}
+
+static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+ struct cxl_mbox_get_log *gl = cmd->payload_in;
+ u32 offset = le32_to_cpu(gl->offset);
+ u32 length = le32_to_cpu(gl->length);
+ uuid_t uuid = DEFINE_CXL_CEL_UUID;
+ void *data = &mock_cel;
+
+ if (cmd->size_in < sizeof(*gl))
+ return -EINVAL;
+ if (length > cxl_mbox->payload_size)
+ return -EINVAL;
+ if (offset + length > sizeof(mock_cel))
+ return -EINVAL;
+ if (!uuid_equal(&gl->uuid, &uuid))
+ return -EINVAL;
+ if (length > cmd->size_out)
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, data + offset, length);
+
+ return 0;
+}
+
+static int mock_rcd_id(struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_identify id = {
+ .fw_revision = { "mock fw v1 " },
+ .total_capacity =
+ cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
+ .volatile_capacity =
+ cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
+ };
+
+ if (cmd->size_out < sizeof(id))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &id, sizeof(id));
+
+ return 0;
+}
+
+static int mock_id(struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_identify id = {
+ .fw_revision = { "mock fw v1 " },
+ .lsa_size = cpu_to_le32(LSA_SIZE),
+ .partition_align =
+ cpu_to_le64(SZ_256M / CXL_CAPACITY_MULTIPLIER),
+ .total_capacity =
+ cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER),
+ .inject_poison_limit = cpu_to_le16(MOCK_INJECT_TEST_MAX),
+ };
+
+ put_unaligned_le24(CXL_POISON_LIST_MAX, id.poison_list_max_mer);
+
+ if (cmd->size_out < sizeof(id))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &id, sizeof(id));
+
+ return 0;
+}
+
+static int mock_partition_info(struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_partition_info pi = {
+ .active_volatile_cap =
+ cpu_to_le64(DEV_SIZE / 2 / CXL_CAPACITY_MULTIPLIER),
+ .active_persistent_cap =
+ cpu_to_le64(DEV_SIZE / 2 / CXL_CAPACITY_MULTIPLIER),
+ };
+
+ if (cmd->size_out < sizeof(pi))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &pi, sizeof(pi));
+
+ return 0;
+}
+
+void cxl_mockmem_sanitize_work(struct work_struct *work)
+{
+ struct cxl_memdev_state *mds =
+ container_of(work, typeof(*mds), security.poll_dwork.work);
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+
+ mutex_lock(&cxl_mbox->mbox_mutex);
+ if (mds->security.sanitize_node)
+ sysfs_notify_dirent(mds->security.sanitize_node);
+ mds->security.sanitize_active = false;
+ mutex_unlock(&cxl_mbox->mbox_mutex);
+
+ dev_dbg(mds->cxlds.dev, "sanitize complete\n");
+}
+
+static int mock_sanitize(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_memdev_state *mds = mdata->mds;
+ struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
+ int rc = 0;
+
+ if (cmd->size_in != 0)
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ mutex_lock(&cxl_mbox->mbox_mutex);
+ if (schedule_delayed_work(&mds->security.poll_dwork,
+ msecs_to_jiffies(mdata->sanitize_timeout))) {
+ mds->security.sanitize_active = true;
+ dev_dbg(mds->cxlds.dev, "sanitize issued\n");
+ } else
+ rc = -EBUSY;
+ mutex_unlock(&cxl_mbox->mbox_mutex);
+
+ return rc;
+}
+
+static int mock_secure_erase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_in != 0)
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int mock_get_security_state(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_in)
+ return -EINVAL;
+
+ if (cmd->size_out != sizeof(u32))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &mdata->security_state, sizeof(u32));
+
+ return 0;
+}
+
+static void master_plimit_check(struct cxl_mockmem_data *mdata)
+{
+ if (mdata->master_limit == PASS_TRY_LIMIT)
+ return;
+ mdata->master_limit++;
+ if (mdata->master_limit == PASS_TRY_LIMIT)
+ mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PLIMIT;
+}
+
+static void user_plimit_check(struct cxl_mockmem_data *mdata)
+{
+ if (mdata->user_limit == PASS_TRY_LIMIT)
+ return;
+ mdata->user_limit++;
+ if (mdata->user_limit == PASS_TRY_LIMIT)
+ mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT;
+}
+
+static int mock_set_passphrase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_set_pass *set_pass;
+
+ if (cmd->size_in != sizeof(*set_pass))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ set_pass = cmd->payload_in;
+ switch (set_pass->type) {
+ case CXL_PMEM_SEC_PASS_MASTER:
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+ /*
+ * CXL spec rev3.0 8.2.9.8.6.2, The master pasphrase shall only be set in
+ * the security disabled state when the user passphrase is not set.
+ */
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+ if (memcmp(mdata->master_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) {
+ master_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+ memcpy(mdata->master_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN);
+ mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PASS_SET;
+ return 0;
+
+ case CXL_PMEM_SEC_PASS_USER:
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+ if (memcmp(mdata->user_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) {
+ user_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+ memcpy(mdata->user_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN);
+ mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PASS_SET;
+ return 0;
+
+ default:
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ }
+ return -EINVAL;
+}
+
+static int mock_disable_passphrase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_disable_pass *dis_pass;
+
+ if (cmd->size_in != sizeof(*dis_pass))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ dis_pass = cmd->payload_in;
+ switch (dis_pass->type) {
+ case CXL_PMEM_SEC_PASS_MASTER:
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (!(mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (memcmp(dis_pass->pass, mdata->master_pass, NVDIMM_PASSPHRASE_LEN)) {
+ master_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+
+ mdata->master_limit = 0;
+ memset(mdata->master_pass, 0, NVDIMM_PASSPHRASE_LEN);
+ mdata->security_state &= ~CXL_PMEM_SEC_STATE_MASTER_PASS_SET;
+ return 0;
+
+ case CXL_PMEM_SEC_PASS_USER:
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (memcmp(dis_pass->pass, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) {
+ user_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+
+ mdata->user_limit = 0;
+ memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
+ mdata->security_state &= ~(CXL_PMEM_SEC_STATE_USER_PASS_SET |
+ CXL_PMEM_SEC_STATE_LOCKED);
+ return 0;
+
+ default:
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mock_freeze_security(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_in != 0)
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN)
+ return 0;
+
+ mdata->security_state |= CXL_PMEM_SEC_STATE_FROZEN;
+ return 0;
+}
+
+static int mock_unlock_security(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ if (cmd->size_in != NVDIMM_PASSPHRASE_LEN)
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (memcmp(cmd->payload_in, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) {
+ if (++mdata->user_limit == PASS_TRY_LIMIT)
+ mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT;
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+
+ mdata->user_limit = 0;
+ mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED;
+ return 0;
+}
+
+static int mock_passphrase_secure_erase(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_pass_erase *erase;
+
+ if (cmd->size_in != sizeof(*erase))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ erase = cmd->payload_in;
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT &&
+ erase->type == CXL_PMEM_SEC_PASS_USER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT &&
+ erase->type == CXL_PMEM_SEC_PASS_MASTER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_SECURITY;
+ return -ENXIO;
+ }
+
+ switch (erase->type) {
+ case CXL_PMEM_SEC_PASS_MASTER:
+ /*
+ * The spec does not clearly define the behavior of the scenario
+ * where a master passphrase is passed in while the master
+ * passphrase is not set and user passphrase is not set. The
+ * code will take the assumption that it will behave the same
+ * as a CXL secure erase command without passphrase (0x4401).
+ */
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) {
+ if (memcmp(mdata->master_pass, erase->pass,
+ NVDIMM_PASSPHRASE_LEN)) {
+ master_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+ mdata->master_limit = 0;
+ mdata->user_limit = 0;
+ mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET;
+ memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
+ mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED;
+ } else {
+ /*
+ * CXL rev3 8.2.9.8.6.3 Disable Passphrase
+ * When master passphrase is disabled, the device shall
+ * return Invalid Input for the Passphrase Secure Erase
+ * command with master passphrase.
+ */
+ return -EINVAL;
+ }
+ /* Scramble encryption keys so that data is effectively erased */
+ break;
+ case CXL_PMEM_SEC_PASS_USER:
+ /*
+ * The spec does not clearly define the behavior of the scenario
+ * where a user passphrase is passed in while the user
+ * passphrase is not set. The code will take the assumption that
+ * it will behave the same as a CXL secure erase command without
+ * passphrase (0x4401).
+ */
+ if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ if (memcmp(mdata->user_pass, erase->pass,
+ NVDIMM_PASSPHRASE_LEN)) {
+ user_plimit_check(mdata);
+ cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE;
+ return -ENXIO;
+ }
+ mdata->user_limit = 0;
+ mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET;
+ memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN);
+ }
+
+ /*
+ * CXL rev3 Table 8-118
+ * If user passphrase is not set or supported by device, current
+ * passphrase value is ignored. Will make the assumption that
+ * the operation will proceed as secure erase w/o passphrase
+ * since spec is not explicit.
+ */
+
+ /* Scramble encryption keys so that data is effectively erased */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mock_get_lsa(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in;
+ void *lsa = mdata->lsa;
+ u32 offset, length;
+
+ if (sizeof(*get_lsa) > cmd->size_in)
+ return -EINVAL;
+ offset = le32_to_cpu(get_lsa->offset);
+ length = le32_to_cpu(get_lsa->length);
+ if (offset + length > LSA_SIZE)
+ return -EINVAL;
+ if (length > cmd->size_out)
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, lsa + offset, length);
+ return 0;
+}
+
+static int mock_set_lsa(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in;
+ void *lsa = mdata->lsa;
+ u32 offset, length;
+
+ if (sizeof(*set_lsa) > cmd->size_in)
+ return -EINVAL;
+ offset = le32_to_cpu(set_lsa->offset);
+ length = cmd->size_in - sizeof(*set_lsa);
+ if (offset + length > LSA_SIZE)
+ return -EINVAL;
+
+ memcpy(lsa + offset, &set_lsa->data[0], length);
+ return 0;
+}
+
+static int mock_health_info(struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_health_info health_info = {
+ /* set flags for maint needed, perf degraded, hw replacement */
+ .health_status = 0x7,
+ /* set media status to "All Data Lost" */
+ .media_status = 0x3,
+ /*
+ * set ext_status flags for:
+ * ext_life_used: normal,
+ * ext_temperature: critical,
+ * ext_corrected_volatile: warning,
+ * ext_corrected_persistent: normal,
+ */
+ .ext_status = 0x18,
+ .life_used = 15,
+ .temperature = cpu_to_le16(25),
+ .dirty_shutdowns = cpu_to_le32(10),
+ .volatile_errors = cpu_to_le32(20),
+ .pmem_errors = cpu_to_le32(30),
+ };
+
+ if (cmd->size_out < sizeof(health_info))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &health_info, sizeof(health_info));
+ return 0;
+}
+
+static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in;
+
+ if (cmd->size_in != sizeof(*ss))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ mdata->shutdown_state = ss->state;
+ return 0;
+}
+
+static struct mock_poison {
+ struct cxl_dev_state *cxlds;
+ u64 dpa;
+} mock_poison_list[MOCK_INJECT_TEST_MAX];
+
+static struct cxl_mbox_poison_out *
+cxl_get_injected_po(struct cxl_dev_state *cxlds, u64 offset, u64 length)
+{
+ struct cxl_mbox_poison_out *po;
+ int nr_records = 0;
+ u64 dpa;
+
+ po = kzalloc(struct_size(po, record, poison_inject_dev_max), GFP_KERNEL);
+ if (!po)
+ return NULL;
+
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (mock_poison_list[i].cxlds != cxlds)
+ continue;
+ if (mock_poison_list[i].dpa < offset ||
+ mock_poison_list[i].dpa > offset + length - 1)
+ continue;
+
+ dpa = mock_poison_list[i].dpa + CXL_POISON_SOURCE_INJECTED;
+ po->record[nr_records].address = cpu_to_le64(dpa);
+ po->record[nr_records].length = cpu_to_le32(1);
+ nr_records++;
+ if (nr_records == poison_inject_dev_max)
+ break;
+ }
+
+ /* Always return count, even when zero */
+ po->count = cpu_to_le16(nr_records);
+
+ return po;
+}
+
+static int mock_get_poison(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_poison_in *pi = cmd->payload_in;
+ struct cxl_mbox_poison_out *po;
+ u64 offset = le64_to_cpu(pi->offset);
+ u64 length = le64_to_cpu(pi->length);
+ int nr_records;
+
+ po = cxl_get_injected_po(cxlds, offset, length);
+ if (!po)
+ return -ENOMEM;
+ nr_records = le16_to_cpu(po->count);
+ memcpy(cmd->payload_out, po, struct_size(po, record, nr_records));
+ cmd->size_out = struct_size(po, record, nr_records);
+ kfree(po);
+
+ return 0;
+}
+
+static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds)
+{
+ int count = 0;
+
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (mock_poison_list[i].cxlds == cxlds)
+ count++;
+ }
+ return (count >= poison_inject_dev_max);
+}
+
+static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa)
+{
+ /* Return EBUSY to match the CXL driver handling */
+ if (mock_poison_dev_max_injected(cxlds)) {
+ dev_dbg(cxlds->dev,
+ "Device poison injection limit has been reached: %d\n",
+ poison_inject_dev_max);
+ return -EBUSY;
+ }
+
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (!mock_poison_list[i].cxlds) {
+ mock_poison_list[i].cxlds = cxlds;
+ mock_poison_list[i].dpa = dpa;
+ return 0;
+ }
+ }
+ dev_dbg(cxlds->dev,
+ "Mock test poison injection limit has been reached: %d\n",
+ MOCK_INJECT_TEST_MAX);
+
+ return -ENXIO;
+}
+
+static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa)
+{
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (mock_poison_list[i].cxlds == cxlds &&
+ mock_poison_list[i].dpa == dpa)
+ return true;
+ }
+ return false;
+}
+
+static int mock_inject_poison(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_inject_poison *pi = cmd->payload_in;
+ u64 dpa = le64_to_cpu(pi->address);
+
+ if (mock_poison_found(cxlds, dpa)) {
+ /* Not an error to inject poison if already poisoned */
+ dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa);
+ return 0;
+ }
+
+ return mock_poison_add(cxlds, dpa);
+}
+
+static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa)
+{
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (mock_poison_list[i].cxlds == cxlds &&
+ mock_poison_list[i].dpa == dpa) {
+ mock_poison_list[i].cxlds = NULL;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int mock_clear_poison(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_clear_poison *pi = cmd->payload_in;
+ u64 dpa = le64_to_cpu(pi->address);
+
+ /*
+ * A real CXL device will write pi->write_data to the address
+ * being cleared. In this mock, just delete this address from
+ * the mock poison list.
+ */
+ if (!mock_poison_del(cxlds, dpa))
+ dev_dbg(cxlds->dev, "DPA: 0x%llx not in poison list\n", dpa);
+
+ return 0;
+}
+
+static bool mock_poison_list_empty(void)
+{
+ for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
+ if (mock_poison_list[i].cxlds)
+ return false;
+ }
+ return true;
+}
+
+static ssize_t poison_inject_max_show(struct device_driver *drv, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", poison_inject_dev_max);
+}
+
+static ssize_t poison_inject_max_store(struct device_driver *drv,
+ const char *buf, size_t len)
+{
+ int val;
+
+ if (kstrtoint(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (!mock_poison_list_empty())
+ return -EBUSY;
+
+ if (val <= MOCK_INJECT_TEST_MAX)
+ poison_inject_dev_max = val;
+ else
+ return -EINVAL;
+
+ return len;
+}
+
+static DRIVER_ATTR_RW(poison_inject_max);
+
+static struct attribute *cxl_mock_mem_core_attrs[] = {
+ &driver_attr_poison_inject_max.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(cxl_mock_mem_core);
+
+static int mock_fw_info(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_fw_info fw_info = {
+ .num_slots = FW_SLOTS,
+ .slot_info = (mdata->fw_slot & 0x7) |
+ ((mdata->fw_staged & 0x7) << 3),
+ .activation_cap = 0,
+ };
+
+ strcpy(fw_info.slot_1_revision, "cxl_test_fw_001");
+ strcpy(fw_info.slot_2_revision, "cxl_test_fw_002");
+ strcpy(fw_info.slot_3_revision, "cxl_test_fw_003");
+ strcpy(fw_info.slot_4_revision, "");
+
+ if (cmd->size_out < sizeof(fw_info))
+ return -EINVAL;
+
+ memcpy(cmd->payload_out, &fw_info, sizeof(fw_info));
+ return 0;
+}
+
+static int mock_transfer_fw(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_transfer_fw *transfer = cmd->payload_in;
+ void *fw = mdata->fw;
+ size_t offset, length;
+
+ offset = le32_to_cpu(transfer->offset) * CXL_FW_TRANSFER_ALIGNMENT;
+ length = cmd->size_in - sizeof(*transfer);
+ if (offset + length > FW_SIZE)
+ return -EINVAL;
+
+ switch (transfer->action) {
+ case CXL_FW_TRANSFER_ACTION_FULL:
+ if (offset != 0)
+ return -EINVAL;
+ fallthrough;
+ case CXL_FW_TRANSFER_ACTION_END:
+ if (transfer->slot == 0 || transfer->slot > FW_SLOTS)
+ return -EINVAL;
+ mdata->fw_size = offset + length;
+ break;
+ case CXL_FW_TRANSFER_ACTION_INITIATE:
+ case CXL_FW_TRANSFER_ACTION_CONTINUE:
+ break;
+ case CXL_FW_TRANSFER_ACTION_ABORT:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(fw + offset, transfer->data, length);
+ usleep_range(1500, 2000);
+ return 0;
+}
+
+static int mock_activate_fw(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_activate_fw *activate = cmd->payload_in;
+
+ if (activate->slot == 0 || activate->slot > FW_SLOTS)
+ return -EINVAL;
+
+ switch (activate->action) {
+ case CXL_FW_ACTIVATE_ONLINE:
+ mdata->fw_slot = activate->slot;
+ mdata->fw_staged = 0;
+ return 0;
+ case CXL_FW_ACTIVATE_OFFLINE:
+ mdata->fw_staged = activate->slot;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+#define CXL_VENDOR_FEATURE_TEST \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \
+ 0xff, 0xff, 0xff)
+
+static void fill_feature_vendor_test(struct cxl_feat_entry *feat)
+{
+ feat->uuid = CXL_VENDOR_FEATURE_TEST;
+ feat->id = 0;
+ feat->get_feat_size = cpu_to_le16(0x4);
+ feat->set_feat_size = cpu_to_le16(0x4);
+ feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE |
+ CXL_FEATURE_F_DEFAULT_SEL |
+ CXL_FEATURE_F_SAVED_SEL);
+ feat->get_feat_ver = 1;
+ feat->set_feat_ver = 1;
+ feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_EFFECTS_VALID);
+}
+
+#define MAX_CXL_TEST_FEATS 1
+
+static int mock_get_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct vendor_test_feat *output = cmd->payload_out;
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+ u16 offset = le16_to_cpu(input->offset);
+ u16 count = le16_to_cpu(input->count);
+ u8 *ptr;
+
+ if (offset > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ if (offset + count > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ ptr = (u8 *)&mdata->test_feat + offset;
+ memcpy((u8 *)output + offset, ptr, count);
+
+ return 0;
+}
+
+static int mock_get_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_get_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_set_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+ struct vendor_test_feat *test =
+ (struct vendor_test_feat *)input->feat_data;
+ u32 action;
+
+ action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK,
+ le32_to_cpu(input->hdr.flags));
+ /*
+ * While it is spec compliant to support other set actions, it is not
+ * necessary to add the complication in the emulation currently. Reject
+ * anything besides full xfer.
+ */
+ if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ /* Offset should be reserved when doing full transfer */
+ if (input->hdr.offset) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ memcpy(&mdata->test_feat.data, &test->data, sizeof(u32));
+
+ return 0;
+}
+
+static int mock_set_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_set_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_get_supported_features(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in;
+ struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out;
+ struct cxl_feat_entry *feat;
+ u16 start_idx, count;
+
+ if (cmd->size_out < sizeof(*out)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ /*
+ * Current emulation only supports 1 feature
+ */
+ start_idx = le16_to_cpu(in->start_idx);
+ if (start_idx != 0) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ count = le16_to_cpu(in->count);
+ if (count < struct_size(out, ents, 0)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ cmd->return_code = 0;
+ if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) {
+ out->num_entries = 0;
+ return 0;
+ }
+
+ out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ feat = out->ents;
+ fill_feature_vendor_test(feat);
+
+ return 0;
+}
+
+static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct device *dev = cxl_mbox->host;
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ struct cxl_memdev_state *mds = mdata->mds;
+ struct cxl_dev_state *cxlds = &mds->cxlds;
+ int rc = -EIO;
+
+ switch (cmd->opcode) {
+ case CXL_MBOX_OP_SET_TIMESTAMP:
+ rc = mock_set_timestamp(cxlds, cmd);
+ break;
+ case CXL_MBOX_OP_GET_SUPPORTED_LOGS:
+ rc = mock_gsl(cmd);
+ break;
+ case CXL_MBOX_OP_GET_LOG:
+ rc = mock_get_log(mds, cmd);
+ break;
+ case CXL_MBOX_OP_IDENTIFY:
+ if (cxlds->rcd)
+ rc = mock_rcd_id(cmd);
+ else
+ rc = mock_id(cmd);
+ break;
+ case CXL_MBOX_OP_GET_LSA:
+ rc = mock_get_lsa(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_PARTITION_INFO:
+ rc = mock_partition_info(cmd);
+ break;
+ case CXL_MBOX_OP_GET_EVENT_RECORD:
+ rc = mock_get_event(dev, cmd);
+ break;
+ case CXL_MBOX_OP_CLEAR_EVENT_RECORD:
+ rc = mock_clear_event(dev, cmd);
+ break;
+ case CXL_MBOX_OP_SET_LSA:
+ rc = mock_set_lsa(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_HEALTH_INFO:
+ rc = mock_health_info(cmd);
+ break;
+ case CXL_MBOX_OP_SANITIZE:
+ rc = mock_sanitize(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SECURE_ERASE:
+ rc = mock_secure_erase(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_SECURITY_STATE:
+ rc = mock_get_security_state(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_PASSPHRASE:
+ rc = mock_set_passphrase(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_DISABLE_PASSPHRASE:
+ rc = mock_disable_passphrase(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_FREEZE_SECURITY:
+ rc = mock_freeze_security(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_UNLOCK:
+ rc = mock_unlock_security(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
+ rc = mock_passphrase_secure_erase(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_SHUTDOWN_STATE:
+ rc = mock_set_shutdown_state(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_POISON:
+ rc = mock_get_poison(cxlds, cmd);
+ break;
+ case CXL_MBOX_OP_INJECT_POISON:
+ rc = mock_inject_poison(cxlds, cmd);
+ break;
+ case CXL_MBOX_OP_CLEAR_POISON:
+ rc = mock_clear_poison(cxlds, cmd);
+ break;
+ case CXL_MBOX_OP_GET_FW_INFO:
+ rc = mock_fw_info(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_TRANSFER_FW:
+ rc = mock_transfer_fw(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_ACTIVATE_FW:
+ rc = mock_activate_fw(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ rc = mock_get_supported_features(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_FEATURE:
+ rc = mock_get_feature(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_FEATURE:
+ rc = mock_set_feature(mdata, cmd);
+ break;
+ default:
+ break;
+ }
+
+ dev_dbg(dev, "opcode: %#x sz_in: %zd sz_out: %zd rc: %d\n", cmd->opcode,
+ cmd->size_in, cmd->size_out, rc);
+
+ return rc;
+}
+
+static void label_area_release(void *lsa)
+{
+ vfree(lsa);
+}
+
+static void fw_buf_release(void *buf)
+{
+ vfree(buf);
+}
+
+static bool is_rcd(struct platform_device *pdev)
+{
+ const struct platform_device_id *id = platform_get_device_id(pdev);
+
+ return !!id->driver_data;
+}
+
+static ssize_t event_trigger_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ cxl_mock_event_trigger(dev);
+ return count;
+}
+static DEVICE_ATTR_WO(event_trigger);
+
+static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
+{
+ int rc;
+
+ rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata)
+{
+ mdata->test_feat.data = cpu_to_le32(0xdeadbeef);
+}
+
+static int cxl_mock_mem_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct cxl_memdev *cxlmd;
+ struct cxl_memdev_state *mds;
+ struct cxl_dev_state *cxlds;
+ struct cxl_mockmem_data *mdata;
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_dpa_info range_info = { 0 };
+ int rc;
+
+ mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+ dev_set_drvdata(dev, mdata);
+
+ mdata->lsa = vmalloc(LSA_SIZE);
+ if (!mdata->lsa)
+ return -ENOMEM;
+ mdata->fw = vmalloc(FW_SIZE);
+ if (!mdata->fw)
+ return -ENOMEM;
+ mdata->fw_slot = 2;
+
+ rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa);
+ if (rc)
+ return rc;
+
+ rc = devm_add_action_or_reset(dev, fw_buf_release, mdata->fw);
+ if (rc)
+ return rc;
+
+ mds = cxl_memdev_state_create(dev);
+ if (IS_ERR(mds))
+ return PTR_ERR(mds);
+
+ cxlds = &mds->cxlds;
+ rc = cxl_mock_mailbox_create(cxlds);
+ if (rc)
+ return rc;
+
+ cxl_mbox = &mds->cxlds.cxl_mbox;
+ mdata->mds = mds;
+ cxl_mbox->mbox_send = cxl_mock_mbox_send;
+ cxl_mbox->payload_size = SZ_4K;
+ mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
+ INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
+
+ cxlds->serial = pdev->id + 1;
+ if (is_rcd(pdev))
+ cxlds->rcd = true;
+
+ rc = cxl_enumerate_cmds(mds);
+ if (rc)
+ return rc;
+
+ rc = cxl_poison_state_init(mds);
+ if (rc)
+ return rc;
+
+ rc = cxl_set_timestamp(mds);
+ if (rc)
+ return rc;
+
+ cxlds->media_ready = true;
+ rc = cxl_dev_state_identify(mds);
+ if (rc)
+ return rc;
+
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = cxl_dpa_setup(cxlds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(dev, "No CXL Features discovered\n");
+
+ cxl_mock_add_event_logs(&mdata->mes);
+
+ cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
+ if (IS_ERR(cxlmd))
+ return PTR_ERR(cxlmd);
+
+ rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
+ if (rc)
+ return rc;
+
+ rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
+ if (rc)
+ return rc;
+
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
+ if (rc)
+ dev_dbg(dev, "No CXL FWCTL setup\n");
+
+ cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
+ cxl_mock_test_feat_init(mdata);
+
+ return 0;
+}
+
+static ssize_t security_lock_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%u\n",
+ !!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED));
+}
+
+static ssize_t security_lock_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ u32 mask = CXL_PMEM_SEC_STATE_FROZEN | CXL_PMEM_SEC_STATE_USER_PLIMIT |
+ CXL_PMEM_SEC_STATE_MASTER_PLIMIT;
+ int val;
+
+ if (kstrtoint(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val == 1) {
+ if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET))
+ return -ENXIO;
+ mdata->security_state |= CXL_PMEM_SEC_STATE_LOCKED;
+ mdata->security_state &= ~mask;
+ } else {
+ return -EINVAL;
+ }
+ return count;
+}
+
+static DEVICE_ATTR_RW(security_lock);
+
+static ssize_t fw_buf_checksum_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ u8 hash[SHA256_DIGEST_SIZE];
+
+ sha256(mdata->fw, mdata->fw_size, hash);
+
+ return sysfs_emit(buf, "%*phN\n", SHA256_DIGEST_SIZE, hash);
+}
+
+static DEVICE_ATTR_RO(fw_buf_checksum);
+
+static ssize_t sanitize_timeout_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", mdata->sanitize_timeout);
+}
+
+static ssize_t sanitize_timeout_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ mdata->sanitize_timeout = val;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(sanitize_timeout);
+
+static struct attribute *cxl_mock_mem_attrs[] = {
+ &dev_attr_security_lock.attr,
+ &dev_attr_event_trigger.attr,
+ &dev_attr_fw_buf_checksum.attr,
+ &dev_attr_sanitize_timeout.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(cxl_mock_mem);
+
+static const struct platform_device_id cxl_mock_mem_ids[] = {
+ { .name = "cxl_mem", 0 },
+ { .name = "cxl_rcd", 1 },
+ { },
+};
+MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids);
+
+static struct platform_driver cxl_mock_mem_driver = {
+ .probe = cxl_mock_mem_probe,
+ .id_table = cxl_mock_mem_ids,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .dev_groups = cxl_mock_mem_groups,
+ .groups = cxl_mock_mem_core_groups,
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ },
+};
+
+module_platform_driver(cxl_mock_mem_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: mem device mock module");
+MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
new file mode 100644
index 000000000000..44bce80ef3ff
--- /dev/null
+++ b/tools/testing/cxl/test/mock.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//Copyright(c) 2021 Intel Corporation. All rights reserved.
+
+#include <linux/libnvdimm.h>
+#include <linux/rculist.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <cxlmem.h>
+#include <cxlpci.h>
+#include "mock.h"
+#include "../exports.h"
+
+static LIST_HEAD(mock);
+
+static struct cxl_dport *
+redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev);
+static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+
+void register_cxl_mock_ops(struct cxl_mock_ops *ops)
+{
+ list_add_rcu(&ops->list, &mock);
+ _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
+ _devm_cxl_switch_port_decoders_setup =
+ redirect_devm_cxl_switch_port_decoders_setup;
+}
+EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
+
+DEFINE_STATIC_SRCU(cxl_mock_srcu);
+
+void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
+{
+ _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+ _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
+ list_del_rcu(&ops->list);
+ synchronize_srcu(&cxl_mock_srcu);
+}
+EXPORT_SYMBOL_GPL(unregister_cxl_mock_ops);
+
+struct cxl_mock_ops *get_cxl_mock_ops(int *index)
+{
+ *index = srcu_read_lock(&cxl_mock_srcu);
+ return list_first_or_null_rcu(&mock, struct cxl_mock_ops, list);
+}
+EXPORT_SYMBOL_GPL(get_cxl_mock_ops);
+
+void put_cxl_mock_ops(int index)
+{
+ srcu_read_unlock(&cxl_mock_srcu, index);
+}
+EXPORT_SYMBOL_GPL(put_cxl_mock_ops);
+
+bool __wrap_is_acpi_device_node(const struct fwnode_handle *fwnode)
+{
+ struct acpi_device *adev =
+ container_of(fwnode, struct acpi_device, fwnode);
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ bool retval = false;
+
+ if (ops)
+ retval = ops->is_mock_adev(adev);
+
+ if (!retval)
+ retval = is_acpi_device_node(fwnode);
+
+ put_cxl_mock_ops(index);
+ return retval;
+}
+EXPORT_SYMBOL(__wrap_is_acpi_device_node);
+
+int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ rc = ops->acpi_table_parse_cedt(id, handler_arg, arg);
+ else
+ rc = acpi_table_parse_cedt(id, handler_arg, arg);
+
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, "ACPI");
+
+acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
+ acpi_string pathname,
+ struct acpi_object_list *arguments,
+ unsigned long long *data)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ acpi_status status;
+
+ if (ops)
+ status = ops->acpi_evaluate_integer(handle, pathname, arguments,
+ data);
+ else
+ status = acpi_evaluate_integer(handle, pathname, arguments,
+ data);
+ put_cxl_mock_ops(index);
+
+ return status;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_integer);
+
+int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+ else
+ rc = hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size);
+
+struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
+{
+ int index;
+ struct acpi_pci_root *root;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ root = ops->acpi_pci_find_root(handle);
+ else
+ root = acpi_pci_find_root(handle);
+
+ put_cxl_mock_ops(index);
+
+ return root;
+}
+EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root);
+
+struct nvdimm_bus *
+__wrap_nvdimm_bus_register(struct device *dev,
+ struct nvdimm_bus_descriptor *nd_desc)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_dev(dev->parent->parent))
+ nd_desc->provider_name = "cxl_test";
+ put_cxl_mock_ops(index);
+
+ return nvdimm_bus_register(dev, nd_desc);
+}
+EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
+
+int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ int rc, index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ rc = ops->devm_cxl_switch_port_decoders_setup(port);
+ else
+ rc = __devm_cxl_switch_port_decoders_setup(port);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+
+int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+ int rc, index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ rc = ops->devm_cxl_endpoint_decoders_setup(port);
+ else
+ rc = devm_cxl_endpoint_decoders_setup(port);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
+
+int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+ int rc, index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_dev(cxlds->dev))
+ rc = 0;
+ else
+ rc = cxl_await_media_ready(cxlds);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL");
+
+struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
+ struct device *dport_dev,
+ int port_id,
+ resource_size_t rcrb)
+{
+ int index;
+ struct cxl_dport *dport;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_port(dport_dev)) {
+ dport = devm_cxl_add_dport(port, dport_dev, port_id,
+ CXL_RESOURCE_NONE);
+ if (!IS_ERR(dport)) {
+ dport->rcrb.base = rcrb;
+ dport->rch = true;
+ }
+ } else
+ dport = devm_cxl_add_rch_dport(port, dport_dev, port_id, rcrb);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL");
+
+void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+
+ if (ops && ops->is_mock_dev(cxlmd->dev.parent))
+ ops->cxl_endpoint_parse_cdat(port);
+ else
+ cxl_endpoint_parse_cdat(port);
+ put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, "CXL");
+
+void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (!ops || !ops->is_mock_port(dport->dport_dev))
+ cxl_dport_init_ras_reporting(dport, host);
+
+ put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
+
+struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_dport *dport;
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
+ else
+ dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: emulation module");
+MODULE_IMPORT_NS("ACPI");
+MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
new file mode 100644
index 000000000000..2684b89c8aa2
--- /dev/null
+++ b/tools/testing/cxl/test/mock.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <cxl.h>
+
+struct cxl_mock_ops {
+ struct list_head list;
+ bool (*is_mock_adev)(struct acpi_device *dev);
+ int (*acpi_table_parse_cedt)(enum acpi_cedt_type id,
+ acpi_tbl_entry_handler_arg handler_arg,
+ void *arg);
+ bool (*is_mock_bridge)(struct device *dev);
+ acpi_status (*acpi_evaluate_integer)(acpi_handle handle,
+ acpi_string pathname,
+ struct acpi_object_list *arguments,
+ unsigned long long *data);
+ struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle);
+ bool (*is_mock_bus)(struct pci_bus *bus);
+ bool (*is_mock_port)(struct device *dev);
+ bool (*is_mock_dev)(struct device *dev);
+ int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
+ int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
+ void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
+ struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
+ struct device *dport_dev);
+ int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size);
+};
+
+void register_cxl_mock_ops(struct cxl_mock_ops *ops);
+void unregister_cxl_mock_ops(struct cxl_mock_ops *ops);
+struct cxl_mock_ops *get_cxl_mock_ops(int *index);
+void put_cxl_mock_ops(int index);
diff --git a/tools/testing/cxl/watermark.h b/tools/testing/cxl/watermark.h
new file mode 100644
index 000000000000..9d81d4a5f6be
--- /dev/null
+++ b/tools/testing/cxl/watermark.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#ifndef _TEST_CXL_WATERMARK_H_
+#define _TEST_CXL_WATERMARK_H_
+#include <linux/module.h>
+#include <linux/printk.h>
+
+int cxl_acpi_test(void);
+int cxl_core_test(void);
+int cxl_mem_test(void);
+int cxl_pmem_test(void);
+int cxl_port_test(void);
+
+/*
+ * dummy routine for cxl_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define cxl_test_watermark(x) \
+int x##_test(void) \
+{ \
+ pr_debug("%s for cxl_test\n", KBUILD_MODNAME); \
+ return 0; \
+} \
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_CXL_WATERMARK_H_ */
diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh
index 78dac34264be..c4f2432750f4 100644..100755
--- a/tools/testing/fault-injection/failcmd.sh
+++ b/tools/testing/fault-injection/failcmd.sh
@@ -64,6 +64,14 @@ ENVIRONMENT
EOF
}
+exit_if_not_hex() {
+ local value="$1"
+ if ! [[ $value =~ ^0x[0-9a-fA-F]+$ ]]; then
+ echo "Error: The provided value '$value' is not a valid hexadecimal number." >&2
+ exit 1
+ fi
+}
+
if [ $UID != 0 ]; then
echo must be run as root >&2
exit 1
@@ -160,18 +168,22 @@ while true; do
shift 2
;;
--require-start)
+ exit_if_not_hex "$2"
echo $2 > $FAULTATTR/require-start
shift 2
;;
--require-end)
+ exit_if_not_hex "$2"
echo $2 > $FAULTATTR/require-end
shift 2
;;
--reject-start)
+ exit_if_not_hex "$2"
echo $2 > $FAULTATTR/reject-start
shift 2
;;
--reject-end)
+ exit_if_not_hex "$2"
echo $2 > $FAULTATTR/reject-end
shift 2
;;
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 6fd864935319..bee7cb34a289 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -741,9 +741,9 @@ if ($start) {
die "Can not find file $bad\n";
}
if ($val eq "good") {
- run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ run_command "cp $output_config $good" or die "failed to copy $output_config to $good\n";
} elsif ($val eq "bad") {
- run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ run_command "cp $output_config $bad" or die "failed to copy $output_config to $bad\n";
}
}
diff --git a/tools/testing/ktest/examples/bootconfigs/boottrace.bconf b/tools/testing/ktest/examples/bootconfigs/boottrace.bconf
new file mode 100644
index 000000000000..7aa706cccb3b
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/boottrace.bconf
@@ -0,0 +1,59 @@
+ftrace.event {
+ task.task_newtask {
+ filter = "pid < 128"
+ enable
+ }
+ kprobes.vfs_read {
+ probes = "vfs_read $arg1 $arg2"
+ filter = "common_pid < 200"
+ enable
+ }
+ synthetic.initcall_latency {
+ fields = "unsigned long func", "u64 lat"
+ hist {
+ keys = func.sym,lat
+ values = lat
+ sort = lat
+ }
+ }
+ initcall.initcall_start.hist {
+ keys = func;
+ var.ts0 = common_timestamp.usecs
+ }
+ initcall.initcall_finish.hist {
+ keys = func
+ var.lat = common_timestamp.usecs - $ts0
+ onmatch {
+ event = initcall.initcall_start
+ trace = initcall_latency, func, $lat
+ }
+ }
+}
+
+ftrace.instance {
+ foo {
+ tracer = "function"
+ ftrace.filters = "user_*"
+ cpumask = 1
+ options = nosym-addr
+ buffer_size = 512KB
+ trace_clock = mono
+ event.signal.signal_deliver.actions=snapshot
+ }
+ bar {
+ tracer = "function"
+ ftrace.filters = "kernel_*"
+ cpumask = 2
+ trace_clock = x86-tsc
+ }
+}
+
+ftrace.alloc_snapshot
+
+kernel {
+ trace_options = sym-addr
+ trace_event = "initcall:*"
+ trace_buf_size = 1M
+ ftrace = function
+ ftrace_filter = "vfs*"
+}
diff --git a/tools/testing/ktest/examples/bootconfigs/config-bootconfig b/tools/testing/ktest/examples/bootconfigs/config-bootconfig
new file mode 100644
index 000000000000..0685b6811388
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/config-bootconfig
@@ -0,0 +1 @@
+CONFIG_CMDLINE="bootconfig"
diff --git a/tools/testing/ktest/examples/bootconfigs/functiongraph.bconf b/tools/testing/ktest/examples/bootconfigs/functiongraph.bconf
new file mode 100644
index 000000000000..68debfcbda76
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/functiongraph.bconf
@@ -0,0 +1,15 @@
+ftrace {
+ tracing_on = 0 # off by default
+ tracer = function_graph
+ event.kprobes {
+ start_event {
+ probes = "pci_proc_init"
+ actions = "traceon"
+ }
+ end_event {
+ probes = "pci_proc_init%return"
+ actions = "traceoff"
+ }
+ }
+}
+
diff --git a/tools/testing/ktest/examples/bootconfigs/tracing.bconf b/tools/testing/ktest/examples/bootconfigs/tracing.bconf
new file mode 100644
index 000000000000..bf117c78115a
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/tracing.bconf
@@ -0,0 +1,33 @@
+ftrace {
+ tracer = function_graph;
+ options = event-fork, sym-addr, stacktrace;
+ buffer_size = 1M;
+ alloc_snapshot;
+ trace_clock = global;
+ events = "task:task_newtask", "initcall:*";
+ event.sched.sched_process_exec {
+ filter = "pid < 128";
+ }
+ instance.bar {
+ event.kprobes {
+ myevent {
+ probes = "vfs_read $arg2 $arg3";
+ }
+ myevent2 {
+ probes = "vfs_write $arg2 +0($arg2):ustring $arg3";
+ }
+ myevent3 {
+ probes = "initrd_load";
+ }
+ enable
+ }
+ }
+ instance.foo {
+ tracer = function;
+ tracing_on = false;
+ };
+}
+kernel {
+ ftrace_dump_on_oops = "orig_cpu"
+ traceoff_on_warning
+}
diff --git a/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh b/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
new file mode 100755
index 000000000000..233e95cfcf20
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+cd /sys/kernel/tracing
+
+compare_file() {
+ file="$1"
+ val="$2"
+ content=`cat $file`
+ if [ "$content" != "$val" ]; then
+ echo "FAILED: $file has '$content', expected '$val'"
+ exit 1
+ fi
+}
+
+compare_file_partial() {
+ file="$1"
+ val="$2"
+ content=`cat $file | sed -ne "/^$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not contain '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+file_contains() {
+ file=$1
+ val="$2"
+
+ if ! grep -q "$val" $file ; then
+ echo "FAILED: $file does not contain $val"
+ cat $file
+ exit 1
+ fi
+}
+
+compare_mask() {
+ file=$1
+ val="$2"
+
+ content=`cat $file | sed -ne "/^[0 ]*$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not have mask '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+compare_file "events/task/task_newtask/filter" "pid < 128"
+compare_file "events/task/task_newtask/enable" "1"
+
+compare_file "events/kprobes/vfs_read/filter" "common_pid < 200"
+compare_file "events/kprobes/vfs_read/enable" "1"
+
+compare_file_partial "events/synthetic/initcall_latency/trigger" "hist:keys=func.sym,lat:vals=hitcount,lat:sort=lat"
+compare_file_partial "events/synthetic/initcall_latency/enable" "0"
+
+compare_file_partial "events/initcall/initcall_start/trigger" "hist:keys=func:vals=hitcount:ts0=common_timestamp.usecs"
+compare_file_partial "events/initcall/initcall_start/enable" "1"
+
+compare_file_partial "events/initcall/initcall_finish/trigger" 'hist:keys=func:vals=hitcount:lat=common_timestamp.usecs-\$ts0:sort=hitcount:size=2048:clock=global:onmatch(initcall.initcall_start).trace(initcall_latency,func,\$lat)'
+compare_file_partial "events/initcall/initcall_finish/enable" "1"
+
+compare_file "instances/foo/current_tracer" "function"
+file_contains "instances/foo/set_ftrace_filter" "^user"
+compare_file "instances/foo/buffer_size_kb" "512"
+compare_mask "instances/foo/tracing_cpumask" "1"
+compare_file "instances/foo/options/sym-addr" "0"
+file_contains "instances/foo/trace_clock" '\[mono\]'
+compare_file_partial "instances/foo/events/signal/signal_deliver/trigger" "snapshot"
+
+compare_file "instances/bar/current_tracer" "function"
+file_contains "instances/bar/set_ftrace_filter" "^kernel"
+compare_mask "instances/bar/tracing_cpumask" "2"
+file_contains "instances/bar/trace_clock" '\[x86-tsc\]'
+
+file_contains "snapshot" "Snapshot is allocated"
+compare_file "options/sym-addr" "1"
+compare_file "events/initcall/enable" "1"
+compare_file "buffer_size_kb" "1027"
+compare_file "current_tracer" "function"
+file_contains "set_ftrace_filter" '^vfs'
+
+exit 0
diff --git a/tools/testing/ktest/examples/bootconfigs/verify-functiongraph.sh b/tools/testing/ktest/examples/bootconfigs/verify-functiongraph.sh
new file mode 100755
index 000000000000..b50baa10fe97
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/verify-functiongraph.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+cd /sys/kernel/tracing
+
+compare_file() {
+ file="$1"
+ val="$2"
+ content=`cat $file`
+ if [ "$content" != "$val" ]; then
+ echo "FAILED: $file has '$content', expected '$val'"
+ exit 1
+ fi
+}
+
+compare_file_partial() {
+ file="$1"
+ val="$2"
+ content=`cat $file | sed -ne "/^$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not contain '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+file_contains() {
+ file=$1
+ val="$2"
+
+ if ! grep -q "$val" $file ; then
+ echo "FAILED: $file does not contain $val"
+ cat $file
+ exit 1
+ fi
+}
+
+compare_mask() {
+ file=$1
+ val="$2"
+
+ content=`cat $file | sed -ne "/^[0 ]*$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not have mask '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+
+compare_file "tracing_on" "0"
+compare_file "current_tracer" "function_graph"
+
+compare_file_partial "events/kprobes/start_event/enable" "1"
+compare_file_partial "events/kprobes/start_event/trigger" "traceon"
+file_contains "kprobe_events" 'start_event.*pci_proc_init'
+
+compare_file_partial "events/kprobes/end_event/enable" "1"
+compare_file_partial "events/kprobes/end_event/trigger" "traceoff"
+file_contains "kprobe_events" '^r.*end_event.*pci_proc_init'
+
+exit 0
diff --git a/tools/testing/ktest/examples/bootconfigs/verify-tracing.sh b/tools/testing/ktest/examples/bootconfigs/verify-tracing.sh
new file mode 100755
index 000000000000..01e111e36e63
--- /dev/null
+++ b/tools/testing/ktest/examples/bootconfigs/verify-tracing.sh
@@ -0,0 +1,72 @@
+#!/bin/sh
+
+cd /sys/kernel/tracing
+
+compare_file() {
+ file="$1"
+ val="$2"
+ content=`cat $file`
+ if [ "$content" != "$val" ]; then
+ echo "FAILED: $file has '$content', expected '$val'"
+ exit 1
+ fi
+}
+
+compare_file_partial() {
+ file="$1"
+ val="$2"
+ content=`cat $file | sed -ne "/^$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not contain '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+file_contains() {
+ file=$1
+ val="$2"
+
+ if ! grep -q "$val" $file ; then
+ echo "FAILED: $file does not contain $val"
+ cat $file
+ exit 1
+ fi
+}
+
+compare_mask() {
+ file=$1
+ val="$2"
+
+ content=`cat $file | sed -ne "/^[0 ]*$val/p"`
+ if [ -z "$content" ]; then
+ echo "FAILED: $file does not have mask '$val'"
+ cat $file
+ exit 1
+ fi
+}
+
+compare_file "current_tracer" "function_graph"
+compare_file "options/event-fork" "1"
+compare_file "options/sym-addr" "1"
+compare_file "options/stacktrace" "1"
+compare_file "buffer_size_kb" "1024"
+file_contains "snapshot" "Snapshot is allocated"
+file_contains "trace_clock" '\[global\]'
+
+compare_file "events/initcall/enable" "1"
+compare_file "events/task/task_newtask/enable" "1"
+compare_file "events/sched/sched_process_exec/filter" "pid < 128"
+compare_file "events/kprobes/enable" "1"
+
+compare_file "instances/bar/events/kprobes/myevent/enable" "1"
+compare_file "instances/bar/events/kprobes/myevent2/enable" "1"
+compare_file "instances/bar/events/kprobes/myevent3/enable" "1"
+
+compare_file "instances/foo/current_tracer" "function"
+compare_file "instances/foo/tracing_on" "0"
+
+compare_file "/proc/sys/kernel/ftrace_dump_on_oops" "2"
+compare_file "/proc/sys/kernel/traceoff_on_warning" "1"
+
+exit 0
diff --git a/tools/testing/ktest/examples/include/bootconfig.conf b/tools/testing/ktest/examples/include/bootconfig.conf
new file mode 100644
index 000000000000..3b885de085bd
--- /dev/null
+++ b/tools/testing/ktest/examples/include/bootconfig.conf
@@ -0,0 +1,69 @@
+# bootconfig.conf
+#
+# Tests to test some bootconfig scripts
+
+# List where on the target machine the initrd is used
+INITRD := /boot/initramfs-test.img
+
+# Install bootconfig on the target machine and define the path here.
+BOOTCONFIG := /usr/bin/bootconfig
+
+# Currenty we just build the .config in the BUILD_DIR
+BUILD_TYPE := oldconfig
+
+# Helper macro to run bootconfig on the target
+# SSH is defined in include/defaults.conf
+ADD_BOOTCONFIG := ${SSH} "${BOOTCONFIG} -d ${INITRD} && ${BOOTCONFIG} -a /tmp/${BOOTCONFIG_FILE} ${INITRD}"
+
+# This copies a bootconfig script to the target and then will
+# add it to the initrd. SSH_USER is defined in include/defaults.conf
+# and MACHINE is defined in the example configs.
+BOOTCONFIG_TEST_PREP = scp ${BOOTCONFIG_PATH}${BOOTCONFIG_FILE} ${SSH_USER}@${MACHINE}:/tmp && ${ADD_BOOTCONFIG}
+
+# When a test is complete, remove the bootconfig from the initrd.
+CLEAR_BOOTCONFIG := ${SSH} "${BOOTCONFIG} -d ${INITRD}"
+
+# Run a verifier on the target after it had booted, to make sure that the
+# bootconfig script did what it was expected to do
+DO_TEST = scp ${BOOTCONFIG_PATH}${BOOTCONFIG_VERIFY} ${SSH_USER}@${MACHINE}:/tmp && ${SSH} /tmp/${BOOTCONFIG_VERIFY}
+
+# Comment this out to not run the boot configs
+RUN_BOOTCONFIG := 1
+
+TEST_START IF DEFINED RUN_BOOTCONFIG
+TEST_TYPE = test
+TEST_NAME = bootconfig boottrace
+# Just testing the bootconfig on initrd, no need to build the kernel
+BUILD_TYPE = nobuild
+BOOTCONFIG_FILE = boottrace.bconf
+BOOTCONFIG_VERIFY = verify-boottrace.sh
+ADD_CONFIG = ${ADD_CONFIG} ${BOOTCONFIG_PATH}/config-bootconfig
+PRE_TEST = ${BOOTCONFIG_TEST_PREP}
+PRE_TEST_DIE = 1
+TEST = ${DO_TEST}
+POST_TEST = ${CLEAR_BOOTCONFIG}
+
+TEST_START IF DEFINED RUN_BOOTCONFIG
+TEST_TYPE = test
+TEST_NAME = bootconfig function graph
+BUILD_TYPE = nobuild
+BOOTCONFIG_FILE = functiongraph.bconf
+BOOTCONFIG_VERIFY = verify-functiongraph.sh
+ADD_CONFIG = ${ADD_CONFIG} ${BOOTCONFIG_PATH}/config-bootconfig
+PRE_TEST = ${BOOTCONFIG_TEST_PREP}
+PRE_TEST_DIE = 1
+TEST = ${DO_TEST}
+POST_TEST = ${CLEAR_BOOTCONFIG}
+
+TEST_START IF DEFINED RUN_BOOTCONFIG
+TEST_TYPE = test
+TEST_NAME = bootconfig tracing
+BUILD_TYPE = nobuild
+BOOTCONFIG_FILE = tracing.bconf
+BOOTCONFIG_VERIFY = verify-tracing.sh
+ADD_CONFIG = ${ADD_CONFIG} ${BOOTCONFIG_PATH}/config-bootconfig
+PRE_TEST = ${BOOTCONFIG_TEST_PREP}
+PRE_TEST_DIE = 1
+TEST = ${DO_TEST}
+POST_TEST = ${CLEAR_BOOTCONFIG}
+
diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf
index 63a1a83f4f0b..f6d8517a471e 100644
--- a/tools/testing/ktest/examples/include/defaults.conf
+++ b/tools/testing/ktest/examples/include/defaults.conf
@@ -46,7 +46,7 @@ CLEAR_LOG = 1
SSH_USER = root
-# For accesing the machine, we will ssh to root@machine.
+# For accessing the machine, we will ssh to root@machine.
SSH := ssh ${SSH_USER}@${MACHINE}
# Update this. The default here is ktest will ssh to the target box
diff --git a/tools/testing/ktest/examples/kvm.conf b/tools/testing/ktest/examples/kvm.conf
index fbc134f9ac6e..c700e8bb7fde 100644
--- a/tools/testing/ktest/examples/kvm.conf
+++ b/tools/testing/ktest/examples/kvm.conf
@@ -90,3 +90,4 @@ INCLUDE include/patchcheck.conf
INCLUDE include/tests.conf
INCLUDE include/bisect.conf
INCLUDE include/min-config.conf
+INCLUDE include/bootconfig.conf \ No newline at end of file
diff --git a/tools/testing/ktest/examples/vmware.conf b/tools/testing/ktest/examples/vmware.conf
new file mode 100644
index 000000000000..61958163d242
--- /dev/null
+++ b/tools/testing/ktest/examples/vmware.conf
@@ -0,0 +1,137 @@
+#
+# This config is an example usage of ktest.pl with a vmware guest
+#
+# VMware Setup:
+# -------------
+# - Edit the Virtual Machine ("Edit virtual machine settings")
+# - Add a Serial Port
+# - You almost certainly want it set "Connect at power on"
+# - Select "Use socket (named pipe)"
+# - Select a name that you'll recognize, like 'ktestserialpipe'
+# - From: Server
+# - To: A Virtual Machine
+# - Save
+# - Make sure you note the name, it will be in the base directory of the
+# virtual machine (where the "disks" are stored. The default
+# is /var/lib/vmware/<virtual machine name>/<the name you entered above>
+#
+# - Make note of the path to the VM
+# </End VMware setup>
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+# Name of the serial pipe you set in the VMware settings
+VMWARE_SERIAL_NAME = <the name you entered above>
+
+# Define a variable of the name of the VM
+# Noting this needs to be the name of the kmx file, and usually, the
+# name of the directory that it's in. If the directory and name
+# differ change the VMWARE_VM_DIR accordingly.
+# Please ommit the .kmx extension
+VMWARE_VM_NAME = <virtual machine name>
+
+# VM dir name. This is usually the same as the virtual machine's name,
+# but not always the case. Change if they differ
+VMWARE_VM_DIR = ${VMWARE_VM_NAME}
+
+# Base directory that the Virtual machine is contained in
+# /var/lib/vmware is the default on Linux
+VMWARE_VM_BASE_DIR = /var/lib/vmware/${VMWARE_VM_DIR}
+
+# Use ncat to read the unix pipe. Anything that can read the Unix Pipe
+# and output it's contents to stdout will work
+CONSOLE = /usr/bin/ncat -U ${VMWARE_VM_BASE_DIR}/${VMWARE_SERIAL_NAME}
+
+# Define what version of Workstation you are using
+# This is used by vmrun to use the appropriate appripriate pieces to
+# test this. In all likelihood you want 'ws' or 'player'
+# Valid options:
+# ws - Workstation (Windows or Linux host)
+# fusion - Fusion (Mac host)
+# player - Using VMware Player (Windows or Linux host)
+# Note: vmrun has to run directly on the host machine
+VMWARE_HOST_TYPE = ws
+
+# VMware provides `vmrun` to allow you to do certain things to the virtual machine
+# This should hard reset the VM and force a boot
+VMWARE_POWER_CYCLE = /usr/bin/vmrun -T ${VMWARE_HOST_TYPE} reset ${VMWARE_VM_BASE_DIR}/${VMWARE_VM_NAME}.kmx nogui
+
+#*************************************#
+# This part is the same as test.conf #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = ${VMWARE_POWER_CYCLE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 4e2450964517..001c4df9f7df 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -21,10 +21,12 @@ my %opt;
my %repeat_tests;
my %repeats;
my %evals;
+my @command_vars;
+my %command_tmp_vars;
#default opts
my %default = (
- "MAILER" => "sendmail", # default mailer
+ "MAILER" => "sendmail", # default mailer
"EMAIL_ON_ERROR" => 1,
"EMAIL_WHEN_FINISHED" => 1,
"EMAIL_WHEN_CANCELED" => 0,
@@ -36,15 +38,15 @@ my %default = (
"CLOSE_CONSOLE_SIGNAL" => "INT",
"TIMEOUT" => 120,
"TMP_DIR" => "/tmp/ktest/\${MACHINE}",
- "SLEEP_TIME" => 60, # sleep time between tests
+ "SLEEP_TIME" => 60, # sleep time between tests
"BUILD_NOCLEAN" => 0,
"REBOOT_ON_ERROR" => 0,
"POWEROFF_ON_ERROR" => 0,
"REBOOT_ON_SUCCESS" => 1,
"POWEROFF_ON_SUCCESS" => 0,
"BUILD_OPTIONS" => "",
- "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
- "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
+ "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
+ "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
"CLEAR_LOG" => 0,
"BISECT_MANUAL" => 0,
"BISECT_SKIP" => 1,
@@ -178,6 +180,7 @@ my $store_failures;
my $store_successes;
my $test_name;
my $timeout;
+my $run_timeout;
my $connect_timeout;
my $config_bisect_exec;
my $booted_timeout;
@@ -215,12 +218,15 @@ my $patchcheck_type;
my $patchcheck_start;
my $patchcheck_cherry;
my $patchcheck_end;
+my $patchcheck_skip;
my $build_time;
my $install_time;
my $reboot_time;
my $test_time;
+my $warning_found = 0;
+
my $pwd;
my $dirname = $FindBin::Bin;
@@ -340,6 +346,7 @@ my %option_map = (
"STORE_SUCCESSES" => \$store_successes,
"TEST_NAME" => \$test_name,
"TIMEOUT" => \$timeout,
+ "RUN_TIMEOUT" => \$run_timeout,
"CONNECT_TIMEOUT" => \$connect_timeout,
"CONFIG_BISECT_EXEC" => \$config_bisect_exec,
"BOOTED_TIMEOUT" => \$booted_timeout,
@@ -376,6 +383,7 @@ my %option_map = (
"PATCHCHECK_START" => \$patchcheck_start,
"PATCHCHECK_CHERRY" => \$patchcheck_cherry,
"PATCHCHECK_END" => \$patchcheck_end,
+ "PATCHCHECK_SKIP" => \$patchcheck_skip,
);
# Options may be used by other options, record them.
@@ -512,6 +520,69 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF"
EOF
;
+# used with process_expression()
+my $d = 0;
+
+# defined before get_test_name()
+my $in_die = 0;
+
+# defined before process_warning_line()
+my $check_build_re = ".*:.*(warning|error|Error):.*";
+my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
+
+# defined before child_finished()
+my $child_done;
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+# found above run_config_bisect()
+my $pass = 1;
+
+# found above add_dep()
+
+my %depends;
+my %depcount;
+my $iflevel = 0;
+my @ifdeps;
+
+# prevent recursion
+my %read_kconfigs;
+
+# found above test_this_config()
+my %min_configs;
+my %keep_configs;
+my %save_configs;
+my %processed_configs;
+my %nochange_config;
+
+#
+# These are first defined here, main function later on
+#
+sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
+
sub _logit {
if (defined($opt{"LOG_FILE"})) {
print LOG @_;
@@ -537,7 +608,7 @@ sub read_prompt {
my $ans;
for (;;) {
- if ($cancel) {
+ if ($cancel) {
print "$prompt [y/n/C] ";
} else {
print "$prompt [Y/n] ";
@@ -664,11 +735,18 @@ sub print_times {
show_time($test_time);
doprint "\n";
}
+ if ($warning_found) {
+ doprint "\n*** WARNING";
+ doprint "S" if ($warning_found > 1);
+ doprint " found in build: $warning_found ***\n\n";
+ }
+
# reset for iterations like bisect
$build_time = 0;
$install_time = 0;
$reboot_time = 0;
$test_time = 0;
+ $warning_found = 0;
}
sub get_mandatory_configs {
@@ -727,40 +805,51 @@ sub process_variables {
my $retval = "";
# We want to check for '\', and it is just easier
- # to check the previous characet of '$' and not need
+ # to check the previous character of '$' and not need
# to worry if '$' is the first character. By adding
# a space to $value, we can just check [^\\]\$ and
# it will still work.
$value = " $value";
- while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ while ($value =~ /(.*?[^\\])\$\{([^\{]*?)\}(.*)/) {
my $begin = $1;
my $var = $2;
my $end = $3;
# append beginning of value to retval
$retval = "$retval$begin";
- if (defined($variable{$var})) {
+ if ($var =~ s/^shell\s+//) {
+ $retval = `$var`;
+ if ($?) {
+ doprint "WARNING: $var returned an error\n";
+ } else {
+ chomp $retval;
+ }
+ } elsif (defined($variable{$var})) {
$retval = "$retval$variable{$var}";
} elsif (defined($remove_undef) && $remove_undef) {
# for if statements, any variable that is not defined,
# we simple convert to 0
$retval = "${retval}0";
} else {
- # put back the origin piece.
- $retval = "$retval\$\{$var\}";
+ # put back the origin piece, but with $#### to not reprocess it
+ $retval = "$retval\$####\{$var\}";
# This could be an option that is used later, save
# it so we don't warn if this option is not one of
# ktests options.
$used_options{$var} = 1;
}
- $value = $end;
+ $value = "$retval$end";
+ $retval = "";
}
- $retval = "$retval$value";
+ $retval = $value;
+
+ # Convert the saved variables with $####{var} back to ${var}
+ $retval =~ s/\$####/\$/g;
# remove the space added in the beginning
$retval =~ s/ //;
- return "$retval"
+ return "$retval";
}
sub set_value {
@@ -771,6 +860,7 @@ sub set_value {
if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
$prvalue !~ /^(config_|)bisect$/ &&
$prvalue !~ /^build$/ &&
+ $prvalue !~ /^make_warnings_file$/ &&
$buildonly) {
# Note if a test is something other than build, then we
@@ -814,14 +904,22 @@ sub set_eval {
}
sub set_variable {
- my ($lvalue, $rvalue) = @_;
+ my ($lvalue, $rvalue, $command) = @_;
+ # Command line variables override all others
+ if (defined($command_tmp_vars{$lvalue})) {
+ return;
+ }
if ($rvalue =~ /^\s*$/) {
delete $variable{$lvalue};
} else {
$rvalue = process_variables($rvalue);
$variable{$lvalue} = $rvalue;
}
+
+ if (defined($command)) {
+ $command_tmp_vars{$lvalue} = 1;
+ }
}
sub process_compare {
@@ -863,7 +961,6 @@ sub value_defined {
defined($opt{$2});
}
-my $d = 0;
sub process_expression {
my ($name, $val) = @_;
@@ -978,7 +1075,6 @@ sub __read_config {
$override = 0;
if ($type eq "TEST_START") {
-
if ($num_tests_set) {
die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
}
@@ -1048,7 +1144,6 @@ sub __read_config {
$test_num = $old_test_num;
$repeat = $old_repeat;
}
-
} elsif (/^\s*ELSE\b(.*)$/) {
if (!$if) {
die "$name: $.: ELSE found with out matching IF section\n$_";
@@ -1095,7 +1190,7 @@ sub __read_config {
}
}
}
-
+
if ( ! -r $file ) {
die "$name: $.: Can't read file $file\n$_";
}
@@ -1162,7 +1257,7 @@ sub __read_config {
# Config variables are only active while reading the
# config and can be defined anywhere. They also ignore
# TEST_START and DEFAULTS, but are skipped if they are in
- # on of these sections that have SKIP defined.
+ # one of these sections that have SKIP defined.
# The save variable can be
# defined multiple times and the new one simply overrides
# the previous one.
@@ -1186,13 +1281,13 @@ sub __read_config {
}
sub get_test_case {
- print "What test case would you like to run?\n";
- print " (build, install or boot)\n";
- print " Other tests are available but require editing ktest.conf\n";
- print " (see tools/testing/ktest/sample.conf)\n";
- my $ans = <STDIN>;
- chomp $ans;
- $default{"TEST_TYPE"} = $ans;
+ print "What test case would you like to run?\n";
+ print " (build, install or boot)\n";
+ print " Other tests are available but require editing ktest.conf\n";
+ print " (see tools/testing/ktest/sample.conf)\n";
+ my $ans = <STDIN>;
+ chomp $ans;
+ $default{"TEST_TYPE"} = $ans;
}
sub read_config {
@@ -1203,6 +1298,19 @@ sub read_config {
$test_case = __read_config $config, \$test_num;
+ foreach my $val (@command_vars) {
+ chomp $val;
+ my %command_overrides;
+ if ($val =~ m/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ set_value($lvalue, $rvalue, 1, \%command_overrides, "COMMAND LINE");
+ } else {
+ die "Invalid option definition '$val'\n";
+ }
+ }
+
# make sure we have all mandatory configs
get_mandatory_configs;
@@ -1288,7 +1396,10 @@ sub __eval_option {
# If a variable contains itself, use the default var
if (($var eq $name) && defined($opt{$var})) {
$o = $opt{$var};
- $retval = "$retval$o";
+ # Only append if the default doesn't contain itself
+ if ($o !~ m/\$\{$var\}/) {
+ $retval = "$retval$o";
+ }
} elsif (defined($opt{$o})) {
$o = $opt{$o};
$retval = "$retval$o";
@@ -1368,11 +1479,6 @@ sub eval_option {
return $option;
}
-sub run_command;
-sub start_monitor;
-sub end_monitor;
-sub wait_for_monitor;
-
sub reboot {
my ($time) = @_;
my $powercycle = 0;
@@ -1433,7 +1539,8 @@ sub reboot {
# Still need to wait for the reboot to finish
wait_for_monitor($time, $reboot_success_line);
-
+ }
+ if ($powercycle || $time) {
end_monitor;
}
}
@@ -1457,8 +1564,6 @@ sub do_not_reboot {
($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
}
-my $in_die = 0;
-
sub get_test_name() {
my $name;
@@ -1471,20 +1576,22 @@ sub get_test_name() {
}
sub dodie {
-
# avoid recursion
return if ($in_die);
$in_die = 1;
+ if ($monitor_cnt) {
+ # restore terminal settings
+ system("stty $stty_orig");
+ }
+
my $i = $iteration;
doprint "CRITICAL FAILURE... [TEST $i] ", @_, "\n";
if ($reboot_on_error && !do_not_reboot) {
-
doprint "REBOOTING\n";
reboot_to_good;
-
} elsif ($poweroff_on_error && defined($power_off)) {
doprint "POWERING OFF\n";
`$power_off`;
@@ -1519,13 +1626,9 @@ sub dodie {
close O;
close L;
}
- send_email("KTEST: critical failure for test $i [$name]",
- "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
- }
- if ($monitor_cnt) {
- # restore terminal settings
- system("stty $stty_orig");
+ send_email("KTEST: critical failure for test $i [$name]",
+ "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
}
if (defined($post_test)) {
@@ -1709,81 +1812,81 @@ sub wait_for_monitor {
}
sub save_logs {
- my ($result, $basedir) = @_;
- my @t = localtime;
- my $date = sprintf "%04d%02d%02d%02d%02d%02d",
- 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+ my ($result, $basedir) = @_;
+ my @t = localtime;
+ my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+ 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
- my $type = $build_type;
- if ($type =~ /useconfig/) {
- $type = "useconfig";
- }
+ my $type = $build_type;
+ if ($type =~ /useconfig/) {
+ $type = "useconfig";
+ }
- my $dir = "$machine-$test_type-$type-$result-$date";
+ my $dir = "$machine-$test_type-$type-$result-$date";
- $dir = "$basedir/$dir";
+ $dir = "$basedir/$dir";
- if (!-d $dir) {
- mkpath($dir) or
- dodie "can't create $dir";
- }
+ if (!-d $dir) {
+ mkpath($dir) or
+ dodie "can't create $dir";
+ }
- my %files = (
- "config" => $output_config,
- "buildlog" => $buildlog,
- "dmesg" => $dmesg,
- "testlog" => $testlog,
- );
+ my %files = (
+ "config" => $output_config,
+ "buildlog" => $buildlog,
+ "dmesg" => $dmesg,
+ "testlog" => $testlog,
+ );
- while (my ($name, $source) = each(%files)) {
- if (-f "$source") {
- cp "$source", "$dir/$name" or
- dodie "failed to copy $source";
- }
+ while (my ($name, $source) = each(%files)) {
+ if (-f "$source") {
+ cp "$source", "$dir/$name" or
+ dodie "failed to copy $source";
}
+ }
- doprint "*** Saved info to $dir ***\n";
+ doprint "*** Saved info to $dir ***\n";
}
sub fail {
- if ($die_on_failure) {
- dodie @_;
- }
+ if ($die_on_failure) {
+ dodie @_;
+ }
- doprint "FAILED\n";
+ doprint "FAILED\n";
- my $i = $iteration;
+ my $i = $iteration;
- # no need to reboot for just building.
- if (!do_not_reboot) {
- doprint "REBOOTING\n";
- reboot_to_good $sleep_time;
- }
+ # no need to reboot for just building.
+ if (!do_not_reboot) {
+ doprint "REBOOTING\n";
+ reboot_to_good $sleep_time;
+ }
- my $name = "";
+ my $name = "";
- if (defined($test_name)) {
- $name = " ($test_name)";
- }
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
- print_times;
+ print_times;
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- if (defined($store_failures)) {
- save_logs "fail", $store_failures;
- }
+ if (defined($store_failures)) {
+ save_logs "fail", $store_failures;
+ }
- if (defined($post_test)) {
- run_command $post_test;
- }
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
- return 1;
+ return 1;
}
sub run_command {
@@ -1799,6 +1902,14 @@ sub run_command {
$command =~ s/\$SSH_USER/$ssh_user/g;
$command =~ s/\$MACHINE/$machine/g;
+ if (!defined($timeout)) {
+ $timeout = $run_timeout;
+ }
+
+ if (!defined($timeout)) {
+ $timeout = -1; # tell wait_for_input to wait indefinitely
+ }
+
doprint("$command ... ");
$start_time = time;
@@ -1825,13 +1936,10 @@ sub run_command {
while (1) {
my $fp = \*CMD;
- if (defined($timeout)) {
- doprint "timeout = $timeout\n";
- }
my $line = wait_for_input($fp, $timeout);
if (!defined($line)) {
my $now = time;
- if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+ if ($timeout >= 0 && (($now - $start_time) >= $timeout)) {
doprint "Hit timeout of $timeout, killing process\n";
$hit_timeout = 1;
kill 9, $pid;
@@ -1912,11 +2020,11 @@ sub run_scp_mod {
sub _get_grub_index {
- my ($command, $target, $skip) = @_;
+ my ($command, $target, $skip, $submenu) = @_;
return if (defined($grub_number) && defined($last_grub_menu) &&
- $last_grub_menu eq $grub_menu && defined($last_machine) &&
- $last_machine eq $machine);
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
doprint "Find $reboot_type menu ... ";
$grub_number = -1;
@@ -1924,16 +2032,21 @@ sub _get_grub_index {
my $ssh_grub = $ssh_exec;
$ssh_grub =~ s,\$SSH_COMMAND,$command,g;
- open(IN, "$ssh_grub |")
- or dodie "unable to execute $command";
+ open(IN, "$ssh_grub |") or
+ dodie "unable to execute $command";
my $found = 0;
+ my $submenu_number = 0;
+
while (<IN>) {
if (/$target/) {
$grub_number++;
$found = 1;
last;
+ } elsif (defined($submenu) && /$submenu/) {
+ $submenu_number++;
+ $grub_number = -1;
} elsif (/$skip/) {
$grub_number++;
}
@@ -1942,6 +2055,9 @@ sub _get_grub_index {
dodie "Could not find '$grub_menu' through $command on $machine"
if (!$found);
+ if ($submenu_number > 0) {
+ $grub_number = "$submenu_number>$grub_number";
+ }
doprint "$grub_number\n";
$last_grub_menu = $grub_menu;
$last_machine = $machine;
@@ -1952,6 +2068,7 @@ sub get_grub_index {
my $command;
my $target;
my $skip;
+ my $submenu;
my $grub_menu_qt;
if ($reboot_type !~ /^grub/) {
@@ -1966,21 +2083,21 @@ sub get_grub_index {
$skip = '^\s*title\s';
} elsif ($reboot_type eq "grub2") {
$command = "cat $grub_file";
- $target = '^menuentry.*' . $grub_menu_qt;
- $skip = '^menuentry\s|^submenu\s';
+ $target = '^\s*menuentry.*' . $grub_menu_qt;
+ $skip = '^\s*menuentry\s';
+ $submenu = '^\s*submenu\s';
} elsif ($reboot_type eq "grub2bls") {
- $command = $grub_bls_get;
- $target = '^title=.*' . $grub_menu_qt;
- $skip = '^title=';
+ $command = $grub_bls_get;
+ $target = '^title=.*' . $grub_menu_qt;
+ $skip = '^title=';
} else {
return;
}
- _get_grub_index($command, $target, $skip);
+ _get_grub_index($command, $target, $skip, $submenu);
}
-sub wait_for_input
-{
+sub wait_for_input {
my ($fp, $time) = @_;
my $start_time;
my $rin;
@@ -1994,6 +2111,11 @@ sub wait_for_input
$time = $timeout;
}
+ if ($time < 0) {
+ # Negative number means wait indefinitely
+ undef $time;
+ }
+
$rin = '';
vec($rin, fileno($fp), 1) = 1;
vec($rin, fileno(\*STDIN), 1) = 1;
@@ -2040,7 +2162,7 @@ sub reboot_to {
if ($reboot_type eq "grub") {
run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
} elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) {
- run_ssh "$grub_reboot $grub_number";
+ run_ssh "$grub_reboot \"'$grub_number'\"";
} elsif ($reboot_type eq "syslinux") {
run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
} elsif (defined $reboot_script) {
@@ -2096,7 +2218,6 @@ sub monitor {
my $version_found = 0;
while (!$done) {
-
if ($bug && defined($stop_after_failure) &&
$stop_after_failure >= 0) {
my $time = $stop_after_failure - (time - $failure_start);
@@ -2326,6 +2447,11 @@ sub get_version {
return if ($have_version);
doprint "$make kernelrelease ... ";
$version = `$make -s kernelrelease | tail -1`;
+ if (!length($version)) {
+ run_command "$make allnoconfig" or return 0;
+ doprint "$make kernelrelease ... ";
+ $version = `$make -s kernelrelease | tail -1`;
+ }
chomp($version);
doprint "$version\n";
$have_version = 1;
@@ -2349,9 +2475,6 @@ sub start_monitor_and_install {
return monitor;
}
-my $check_build_re = ".*:.*(warning|error|Error):.*";
-my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
-
sub process_warning_line {
my ($line) = @_;
@@ -2379,8 +2502,6 @@ sub process_warning_line {
# Returns 1 if OK
# 0 otherwise
sub check_buildlog {
- return 1 if (!defined $warnings_file);
-
my %warnings_list;
# Failed builds should not reboot the target
@@ -2394,25 +2515,28 @@ sub check_buildlog {
while (<IN>) {
if (/$check_build_re/) {
my $warning = process_warning_line $_;
-
+
$warnings_list{$warning} = 1;
}
}
close(IN);
}
- # If warnings file didn't exist, and WARNINGS_FILE exist,
- # then we fail on any warning!
-
open(IN, $buildlog) or dodie "Can't open $buildlog";
while (<IN>) {
if (/$check_build_re/) {
my $warning = process_warning_line $_;
if (!defined $warnings_list{$warning}) {
- fail "New warning found (not in $warnings_file)\n$_\n";
- $no_reboot = $save_no_reboot;
- return 0;
+ $warning_found++;
+
+ # If warnings file didn't exist, and WARNINGS_FILE exist,
+ # then we fail on any warning!
+ if (defined $warnings_file) {
+ fail "New warning found (not in $warnings_file)\n$_\n";
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
}
}
}
@@ -2571,7 +2695,6 @@ sub build {
run_command "mv $outputdir/config_temp $output_config" or
dodie "moving config_temp";
}
-
} elsif (!$noclean) {
unlink "$output_config";
run_command "$make mrproper" or
@@ -2594,6 +2717,9 @@ sub build {
# Run old config regardless, to enforce min configurations
make_oldconfig;
+ if (not defined($build_options)){
+ $build_options = "";
+ }
my $build_ret = run_command "$make $build_options", $buildlog;
if (defined($post_build)) {
@@ -2649,14 +2775,15 @@ sub success {
print_times;
- doprint "\n\n*******************************************\n";
- doprint "*******************************************\n";
- doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
- doprint "*******************************************\n";
- doprint "*******************************************\n";
+ doprint "\n\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
+ doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
if (defined($store_successes)) {
- save_logs "success", $store_successes;
+ save_logs "success", $store_successes;
}
if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
@@ -2698,8 +2825,6 @@ sub child_run_test {
exit $run_command_status;
}
-my $child_done;
-
sub child_finished {
$child_done = 1;
}
@@ -2868,8 +2993,6 @@ sub run_bisect_test {
my $failed = 0;
my $result;
- my $output;
- my $ret;
$in_bisect = 1;
@@ -3031,7 +3154,6 @@ sub bisect {
}
if ($do_check) {
-
# get current HEAD
my $head = get_sha1("HEAD");
@@ -3071,13 +3193,11 @@ sub bisect {
run_command "git bisect replay $replay" or
dodie "failed to run replay";
} else {
-
run_command "git bisect good $good" or
dodie "could not set bisect good to $good";
run_git_bisect "git bisect bad $bad" or
dodie "could not set bisect bad to $bad";
-
}
if (defined($start)) {
@@ -3103,35 +3223,13 @@ sub bisect {
success $i;
}
-# config_ignore holds the configs that were set (or unset) for
-# a good config and we will ignore these configs for the rest
-# of a config bisect. These configs stay as they were.
-my %config_ignore;
-
-# config_set holds what all configs were set as.
-my %config_set;
-
-# config_off holds the set of configs that the bad config had disabled.
-# We need to record them and set them in the .config when running
-# olddefconfig, because olddefconfig keeps the defaults.
-my %config_off;
-
-# config_off_tmp holds a set of configs to turn off for now
-my @config_off_tmp;
-
-# config_list is the set of configs that are being tested
-my %config_list;
-my %null_config;
-
-my %dependency;
-
sub assign_configs {
my ($hash, $config) = @_;
doprint "Reading configs from $config\n";
- open (IN, $config)
- or dodie "Failed to read $config";
+ open (IN, $config) or
+ dodie "Failed to read $config";
while (<IN>) {
chomp;
@@ -3219,8 +3317,6 @@ sub config_bisect_end {
doprint "***************************************\n\n";
}
-my $pass = 1;
-
sub run_config_bisect {
my ($good, $bad, $last_result) = @_;
my $reset = "";
@@ -3243,13 +3339,13 @@ sub run_config_bisect {
$ret = run_config_bisect_test $config_bisect_type;
if ($ret) {
- doprint "NEW GOOD CONFIG ($pass)\n";
+ doprint "NEW GOOD CONFIG ($pass)\n";
system("cp $output_config $tmpdir/good_config.tmp.$pass");
$pass++;
# Return 3 for good config
return 3;
} else {
- doprint "NEW BAD CONFIG ($pass)\n";
+ doprint "NEW BAD CONFIG ($pass)\n";
system("cp $output_config $tmpdir/bad_config.tmp.$pass");
$pass++;
# Return 4 for bad config
@@ -3284,10 +3380,11 @@ sub config_bisect {
if (!defined($config_bisect_exec)) {
# First check the location that ktest.pl ran
- my @locations = ( "$pwd/config-bisect.pl",
- "$dirname/config-bisect.pl",
- "$builddir/tools/testing/ktest/config-bisect.pl",
- undef );
+ my @locations = (
+ "$pwd/config-bisect.pl",
+ "$dirname/config-bisect.pl",
+ "$builddir/tools/testing/ktest/config-bisect.pl",
+ undef );
foreach my $loc (@locations) {
doprint "loc = $loc\n";
$config_bisect_exec = $loc;
@@ -3368,7 +3465,7 @@ sub config_bisect {
} while ($ret == 3 || $ret == 4);
if ($ret == 2) {
- config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+ config_bisect_end "$good_config.tmp", "$bad_config.tmp";
}
return $ret if ($ret < 0);
@@ -3442,11 +3539,37 @@ sub patchcheck {
@list = reverse @list;
}
+ my %skip_list;
+ my $will_skip = 0;
+
+ if (defined($patchcheck_skip)) {
+ foreach my $s (split /\s+/, $patchcheck_skip) {
+ $s = `git log --pretty=oneline $s~1..$s`;
+ $s =~ s/^(\S+).*/$1/;
+ chomp $s;
+ $skip_list{$s} = 1;
+ $will_skip++;
+ }
+ }
+
doprint("Going to test the following commits:\n");
foreach my $l (@list) {
+ my $sha1 = $l;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ next if (defined($skip_list{$sha1}));
doprint "$l\n";
}
+ if ($will_skip) {
+ doprint("\nSkipping the following commits:\n");
+ foreach my $l (@list) {
+ my $sha1 = $l;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ next if (!defined($skip_list{$sha1}));
+ doprint "$l\n";
+ }
+ }
+
my $save_clean = $noclean;
my %ignored_warnings;
@@ -3461,6 +3584,11 @@ sub patchcheck {
my $sha1 = $item;
$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ if (defined($skip_list{$sha1})) {
+ doprint "\nSkipping \"$item\"\n\n";
+ next;
+ }
+
doprint "\nProcessing commit \"$item\"\n\n";
run_command "git checkout $sha1" or
@@ -3511,14 +3639,6 @@ sub patchcheck {
return 1;
}
-my %depends;
-my %depcount;
-my $iflevel = 0;
-my @ifdeps;
-
-# prevent recursion
-my %read_kconfigs;
-
sub add_dep {
# $config depends on $dep
my ($config, $dep) = @_;
@@ -3548,7 +3668,6 @@ sub read_kconfig {
my $cont = 0;
my $line;
-
if (! -f $kconfig) {
doprint "file $kconfig does not exist, skipping\n";
return;
@@ -3630,8 +3749,8 @@ sub read_kconfig {
sub read_depends {
# find out which arch this is by the kconfig file
- open (IN, $output_config)
- or dodie "Failed to read $output_config";
+ open (IN, $output_config) or
+ dodie "Failed to read $output_config";
my $arch;
while (<IN>) {
if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) {
@@ -3657,7 +3776,7 @@ sub read_depends {
if (! -f $kconfig && $arch =~ /\d$/) {
my $orig = $arch;
- # some subarchs have numbers, truncate them
+ # some subarchs have numbers, truncate them
$arch =~ s/\d*$//;
$kconfig = "$builddir/arch/$arch/Kconfig";
if (! -f $kconfig) {
@@ -3706,7 +3825,6 @@ sub get_depends {
my @configs;
while ($dep =~ /[$valid]/) {
-
if ($dep =~ /^[^$valid]*([$valid]+)/) {
my $conf = "CONFIG_" . $1;
@@ -3721,12 +3839,6 @@ sub get_depends {
return @configs;
}
-my %min_configs;
-my %keep_configs;
-my %save_configs;
-my %processed_configs;
-my %nochange_config;
-
sub test_this_config {
my ($config) = @_;
@@ -3763,9 +3875,10 @@ sub test_this_config {
# .config to make sure it is missing the config that
# we had before
my %configs = %min_configs;
- delete $configs{$config};
+ $configs{$config} = "# $config is not set";
make_new_config ((values %configs), (values %keep_configs));
make_oldconfig;
+ delete $configs{$config};
undef %configs;
assign_configs \%configs, $output_config;
@@ -3852,7 +3965,7 @@ sub make_min_config {
foreach my $config (@config_keys) {
my $kconfig = chomp_config $config;
if (!defined $depcount{$kconfig}) {
- $depcount{$kconfig} = 0;
+ $depcount{$kconfig} = 0;
}
}
@@ -3887,7 +4000,6 @@ sub make_min_config {
my $take_two = 0;
while (!$done) {
-
my $config;
my $found;
@@ -3898,7 +4010,7 @@ sub make_min_config {
# Sort keys by who is most dependent on
@test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
- @test_configs ;
+ @test_configs ;
# Put configs that did not modify the config at the end.
my $reset = 1;
@@ -3954,13 +4066,13 @@ sub make_min_config {
my $failed = 0;
build "oldconfig" or $failed = 1;
if (!$failed) {
- start_monitor_and_install or $failed = 1;
+ start_monitor_and_install or $failed = 1;
- if ($type eq "test" && !$failed) {
- do_run_test or $failed = 1;
- }
+ if ($type eq "test" && !$failed) {
+ do_run_test or $failed = 1;
+ }
- end_monitor;
+ end_monitor;
}
$in_bisect = 0;
@@ -3974,8 +4086,8 @@ sub make_min_config {
# update new ignore configs
if (defined($ignore_config)) {
- open (OUT, ">$temp_config")
- or dodie "Can't write to $temp_config";
+ open (OUT, ">$temp_config") or
+ dodie "Can't write to $temp_config";
foreach my $config (keys %save_configs) {
print OUT "$save_configs{$config}\n";
}
@@ -4002,8 +4114,8 @@ sub make_min_config {
}
# Save off all the current mandatory configs
- open (OUT, ">$temp_config")
- or dodie "Can't write to $temp_config";
+ open (OUT, ">$temp_config") or
+ dodie "Can't write to $temp_config";
foreach my $config (keys %keep_configs) {
print OUT "$keep_configs{$config}\n";
}
@@ -4041,7 +4153,6 @@ sub make_warnings_file {
open(IN, $buildlog) or dodie "Can't open $buildlog";
while (<IN>) {
-
# Some compilers use UTF-8 extended for quotes
# for distcc heterogeneous systems, this causes issues
s/$utf8_quote/'/g;
@@ -4057,98 +4168,6 @@ sub make_warnings_file {
success $i;
}
-$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
-
-if ($#ARGV == 0) {
- $ktest_config = $ARGV[0];
- if (! -f $ktest_config) {
- print "$ktest_config does not exist.\n";
- if (!read_yn "Create it?") {
- exit 0;
- }
- }
-}
-
-if (! -f $ktest_config) {
- $newconfig = 1;
- get_test_case;
- open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
- print OUT << "EOF"
-# Generated by ktest.pl
-#
-
-# PWD is a ktest.pl variable that will result in the process working
-# directory that ktest.pl is executed in.
-
-# THIS_DIR is automatically assigned the PWD of the path that generated
-# the config file. It is best to use this variable when assigning other
-# directory paths within this directory. This allows you to easily
-# move the test cases to other locations or to other machines.
-#
-THIS_DIR := $variable{"PWD"}
-
-# Define each test with TEST_START
-# The config options below it will override the defaults
-TEST_START
-TEST_TYPE = $default{"TEST_TYPE"}
-
-DEFAULTS
-EOF
-;
- close(OUT);
-}
-read_config $ktest_config;
-
-if (defined($opt{"LOG_FILE"})) {
- $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
-}
-
-# Append any configs entered in manually to the config file.
-my @new_configs = keys %entered_configs;
-if ($#new_configs >= 0) {
- print "\nAppending entered in configs to $ktest_config\n";
- open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
- foreach my $config (@new_configs) {
- print OUT "$config = $entered_configs{$config}\n";
- $opt{$config} = process_variables($entered_configs{$config});
- }
-}
-
-if (defined($opt{"LOG_FILE"})) {
- if ($opt{"CLEAR_LOG"}) {
- unlink $opt{"LOG_FILE"};
- }
- open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
- LOG->autoflush(1);
-}
-
-doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
-
-for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
-
- if (!$i) {
- doprint "DEFAULT OPTIONS:\n";
- } else {
- doprint "\nTEST $i OPTIONS";
- if (defined($repeat_tests{$i})) {
- $repeat = $repeat_tests{$i};
- doprint " ITERATE $repeat";
- }
- doprint "\n";
- }
-
- foreach my $option (sort keys %opt) {
-
- if ($option =~ /\[(\d+)\]$/) {
- next if ($i != $1);
- } else {
- next if ($i);
- }
-
- doprint "$option = $opt{$option}\n";
- }
-}
-
sub option_defined {
my ($option) = @_;
@@ -4261,7 +4280,6 @@ sub do_send_mail {
}
sub send_email {
-
if (defined($mailto)) {
if (!defined($mailer)) {
doprint "No email sent: email or mailer not specified in config.\n";
@@ -4272,14 +4290,163 @@ sub send_email {
}
sub cancel_test {
+ if ($monitor_cnt) {
+ end_monitor;
+ }
if ($email_when_canceled) {
my $name = get_test_name;
- send_email("KTEST: Your [$name] test was cancelled",
- "Your test started at $script_start_time was cancelled: sig int");
+ send_email("KTEST: Your [$name] test was cancelled",
+ "Your test started at $script_start_time was cancelled: sig int");
}
die "\nCaught Sig Int, test interrupted: $!\n"
}
+sub die_usage {
+ die << "EOF"
+ktest.pl version: $VERSION
+ usage: ktest.pl [options] [config-file]
+ [options]:
+ -D value: Where value can act as an option override.
+ -D BUILD_NOCLEAN=1
+ Sets global BUILD_NOCLEAN to 1
+ -D TEST_TYPE[2]=build
+ Sets TEST_TYPE of test 2 to "build"
+
+ It can also override all temp variables.
+ -D USE_TEMP_DIR:=1
+ Will override all variables that use
+ "USE_TEMP_DIR="
+
+EOF
+;
+}
+
+while ( $#ARGV >= 0 ) {
+ if ( $ARGV[0] eq "-D" ) {
+ shift;
+ die_usage if ($#ARGV < 1);
+ my $val = shift;
+
+ if ($val =~ m/(.*?):=(.*)$/) {
+ set_variable($1, $2, 1);
+ } else {
+ $command_vars[$#command_vars + 1] = $val;
+ }
+
+ } elsif ( $ARGV[0] =~ m/^-D(.*)/) {
+ my $val = $1;
+ shift;
+
+ if ($val =~ m/(.*?):=(.*)$/) {
+ set_variable($1, $2, 1);
+ } else {
+ $command_vars[$#command_vars + 1] = $val;
+ }
+ } elsif ( $ARGV[0] eq "-h" ) {
+ die_usage;
+ } else {
+ last;
+ }
+}
+
+$#ARGV < 1 or die_usage;
+if ($#ARGV == 0) {
+ $ktest_config = $ARGV[0];
+ if (! -f $ktest_config) {
+ print "$ktest_config does not exist.\n";
+ if (!read_yn "Create it?") {
+ exit 0;
+ }
+ }
+}
+
+if (! -f $ktest_config) {
+ $newconfig = 1;
+ get_test_case;
+ open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+ print OUT << "EOF"
+# Generated by ktest.pl
+#
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := $variable{"PWD"}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+TEST_TYPE = $default{"TEST_TYPE"}
+
+DEFAULTS
+EOF
+;
+ close(OUT);
+}
+read_config $ktest_config;
+
+if (defined($opt{"LOG_FILE"})) {
+ $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
+}
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+ print "\nAppending entered in configs to $ktest_config\n";
+ open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+ foreach my $config (@new_configs) {
+ print OUT "$config = $entered_configs{$config}\n";
+ $opt{$config} = process_variables($entered_configs{$config});
+ }
+}
+
+if (defined($opt{"LOG_FILE"})) {
+ if ($opt{"CLEAR_LOG"}) {
+ unlink $opt{"LOG_FILE"};
+ }
+
+ if (! -e $opt{"LOG_FILE"} && $opt{"LOG_FILE"} =~ m,^(.*/),) {
+ my $dir = $1;
+ if (! -d $dir) {
+ mkpath($dir) or die "Failed to create directories '$dir': $!";
+ print "\nThe log directory $dir did not exist, so it was created.\n";
+ }
+ }
+ open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ LOG->autoflush(1);
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+ if (!$i) {
+ doprint "DEFAULT OPTIONS:\n";
+ } else {
+ doprint "\nTEST $i OPTIONS";
+ if (defined($repeat_tests{$i})) {
+ $repeat = $repeat_tests{$i};
+ doprint " ITERATE $repeat";
+ }
+ doprint "\n";
+ }
+
+ foreach my $option (sort keys %opt) {
+ if ($option =~ /\[(\d+)\]$/) {
+ next if ($i != $1);
+ } else {
+ next if ($i);
+ }
+
+ doprint "$option = $opt{$option}\n";
+ }
+}
+
$SIG{INT} = qw(cancel_test);
# First we need to do is the builds
@@ -4323,15 +4490,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
# The first test may override the PRE_KTEST option
if ($i == 1) {
- if (defined($pre_ktest)) {
- doprint "\n";
- run_command $pre_ktest;
- }
- if ($email_when_started) {
+ if (defined($pre_ktest)) {
+ doprint "\n";
+ run_command $pre_ktest;
+ }
+ if ($email_when_started) {
my $name = get_test_name;
- send_email("KTEST: Your [$name] test was started",
- "Your test was started on $script_start_time");
- }
+ send_email("KTEST: Your [$name] test was started",
+ "Your test was started on $script_start_time");
+ }
}
# Any test can override the POST_KTEST option
@@ -4405,11 +4572,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
+ # Always show which build directory and output directory is being used
+ doprint "BUILD_DIR=$builddir\n";
+ doprint "OUTPUT_DIR=$outputdir\n\n";
+
if (defined($pre_test)) {
my $ret = run_command $pre_test;
if (!$ret && defined($pre_test_die) &&
$pre_test_die) {
- dodie "failed to pre_test\n";
+ dodie "failed to pre_test\n";
}
}
@@ -4503,12 +4674,11 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
run_command $switch_to_good;
}
-
doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
if ($email_when_finished) {
send_email("KTEST: Your test has finished!",
- "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+ "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
}
if (defined($opt{"LOG_FILE"})) {
@@ -4517,3 +4687,12 @@ if (defined($opt{"LOG_FILE"})) {
}
exit 0;
+
+##
+# The following are here to standardize tabs/spaces/etc across the most likely editors
+###
+
+# Local Variables:
+# mode: perl
+# End:
+# vim: softtabstop=4
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 5e7d1d729752..9c4c449a8f3e 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -259,6 +259,14 @@
# If PATH is not a config variable, then the ${PATH} in
# the MAKE_CMD option will be evaluated by the shell when
# the MAKE_CMD option is passed into shell processing.
+#
+# Shell commands can also be inserted with the ${shell <command>}
+# expression. Note, this is case sensitive, thus ${SHELL <command>}
+# will not work.
+#
+# HOSTNAME := ${shell hostname}
+# DEFAULTS IF "${HOSTNAME}" == "frodo"
+#
#### Using options in other options ####
#
@@ -809,6 +817,11 @@
# is issued instead of a reboot.
# CONNECT_TIMEOUT = 25
+# The timeout in seconds for how long to wait for any running command
+# to timeout. If not defined, it will let it go indefinitely.
+# (default undefined)
+#RUN_TIMEOUT = 600
+
# In between tests, a reboot of the box may occur, and this
# is the time to wait for the console after it stops producing
# output. Some machines may not produce a large lag on reboot
@@ -1004,6 +1017,8 @@
# Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
# (default 0)
#
+# PATCHCHECK_SKIP is an optional list of shas to skip testing
+#
# PATCHCHECK_TYPE is required and is the type of test to run:
# build, boot, test.
#
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 9235b7d42d38..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -1,3 +1,57 @@
+# This config enables as many tests as possible under UML.
+# It is intended for use in continuous integration systems and similar for
+# automated testing of as much as possible.
+# The config is manually maintained, though it uses KUNIT_ALL_TESTS=y to enable
+# any tests whose dependencies are already satisfied. Please feel free to add
+# more options if they any new tests.
+
CONFIG_KUNIT=y
-CONFIG_KUNIT_TEST=y
CONFIG_KUNIT_EXAMPLE_TEST=y
+CONFIG_KUNIT_ALL_TESTS=y
+
+CONFIG_FORTIFY_SOURCE=y
+
+CONFIG_IIO=y
+
+CONFIG_EXT4_FS=y
+
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+
+CONFIG_PCI=y
+CONFIG_USB4=y
+CONFIG_I2C=y
+
+CONFIG_NET=y
+CONFIG_MCTP=y
+CONFIG_MCTP_FLOWS=y
+
+CONFIG_INET=y
+CONFIG_MPTCP=y
+
+CONFIG_NETDEVICES=y
+CONFIG_WLAN=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_WLAN_VENDOR_INTEL=y
+CONFIG_IWLWIFI=y
+
+CONFIG_DAMON=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_PADDR=y
+
+CONFIG_REGMAP_BUILD=y
+
+CONFIG_AUDIT=y
+
+CONFIG_PRIME_NUMBERS=y
+
+CONFIG_SECURITY=y
+CONFIG_SECURITY_APPARMOR=y
+CONFIG_SECURITY_LANDLOCK=y
+
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SOC=y
+CONFIG_SND_SOC_TOPOLOGY_BUILD=y
+CONFIG_SND_SOC_CS35L56_I2C=y
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config
new file mode 100644
index 000000000000..28edf816aa70
--- /dev/null
+++ b/tools/testing/kunit/configs/arch_uml.config
@@ -0,0 +1,7 @@
+# Config options which are added to UML builds by default
+
+# Enable pci, as a lot of tests require it.
+CONFIG_KUNIT_UML_PCI=y
+
+# Enable FORTIFY_SOURCE for wider checking.
+CONFIG_FORTIFY_SOURCE=y
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
deleted file mode 100644
index a7f0603d33f6..000000000000
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ /dev/null
@@ -1,42 +0,0 @@
-# These are currently broken on UML and prevent allyesconfig from building
-# CONFIG_STATIC_LINK is not set
-# CONFIG_UML_NET_VECTOR is not set
-# CONFIG_UML_NET_VDE is not set
-# CONFIG_UML_NET_PCAP is not set
-# CONFIG_NET_PTP_CLASSIFY is not set
-# CONFIG_IP_VS is not set
-# CONFIG_BRIDGE_EBT_BROUTE is not set
-# CONFIG_BRIDGE_EBT_T_FILTER is not set
-# CONFIG_BRIDGE_EBT_T_NAT is not set
-# CONFIG_MTD_NAND_CADENCE is not set
-# CONFIG_MTD_NAND_NANDSIM is not set
-# CONFIG_BLK_DEV_NULL_BLK is not set
-# CONFIG_BLK_DEV_RAM is not set
-# CONFIG_SCSI_DEBUG is not set
-# CONFIG_NET_VENDOR_XILINX is not set
-# CONFIG_NULL_TTY is not set
-# CONFIG_PTP_1588_CLOCK is not set
-# CONFIG_PINCTRL_EQUILIBRIUM is not set
-# CONFIG_DMABUF_SELFTESTS is not set
-# CONFIG_COMEDI is not set
-# CONFIG_XIL_AXIS_FIFO is not set
-# CONFIG_EXFAT_FS is not set
-# CONFIG_STM_DUMMY is not set
-# CONFIG_FSI_MASTER_ASPEED is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_UBIFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
-# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set
-# CONFIG_KCOV is not set
-# CONFIG_LKDTM is not set
-# CONFIG_REED_SOLOMON_TEST is not set
-# CONFIG_TEST_RHASHTABLE is not set
-# CONFIG_TEST_MEMINIT is not set
-# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
-# CONFIG_DEBUG_INFO_BTF is not set
-# CONFIG_PTP_1588_CLOCK_INES is not set
-# CONFIG_QCOM_CPR is not set
-# CONFIG_RESET_BRCMSTB_RESCAL is not set
-# CONFIG_RESET_INTEL_GW is not set
-# CONFIG_ADI_AXI_ADC is not set
diff --git a/tools/testing/kunit/configs/coverage_uml.config b/tools/testing/kunit/configs/coverage_uml.config
new file mode 100644
index 000000000000..bacb77664fa8
--- /dev/null
+++ b/tools/testing/kunit/configs/coverage_uml.config
@@ -0,0 +1,11 @@
+# This config fragment enables coverage on UML, which is different from the
+# normal gcov used in other arches (no debugfs).
+# Example usage:
+# ./tools/testing/kunit/kunit.py run \
+# --kunitconfig=tools/testing/kunit/configs/all_tests_uml.config \
+# --kunitconfig=tools/testing/kunit/configs/coverage_uml.config
+
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_GCOV=y
diff --git a/tools/testing/kunit/configs/default.config b/tools/testing/kunit/configs/default.config
new file mode 100644
index 000000000000..e67af7b9f1bb
--- /dev/null
+++ b/tools/testing/kunit/configs/default.config
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
+CONFIG_KUNIT_ALL_TESTS=y
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index e808a47c839b..cd99c1956331 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -8,34 +8,22 @@
# Author: Brendan Higgins <brendanhiggins@google.com>
import argparse
-import sys
import os
+import re
+import shlex
+import sys
import time
-from collections import namedtuple
+assert sys.version_info >= (3, 7), "Python version is too old"
+
+from dataclasses import dataclass
from enum import Enum, auto
+from typing import Iterable, List, Optional, Sequence, Tuple
-import kunit_config
import kunit_json
import kunit_kernel
import kunit_parser
-
-KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time'])
-
-KunitConfigRequest = namedtuple('KunitConfigRequest',
- ['build_dir', 'make_options'])
-KunitBuildRequest = namedtuple('KunitBuildRequest',
- ['jobs', 'build_dir', 'alltests',
- 'make_options'])
-KunitExecRequest = namedtuple('KunitExecRequest',
- ['timeout', 'build_dir', 'alltests'])
-KunitParseRequest = namedtuple('KunitParseRequest',
- ['raw_output', 'input_data', 'build_dir', 'json'])
-KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
- 'build_dir', 'alltests', 'json',
- 'make_options'])
-
-KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
+from kunit_printer import stdout, null_printer
class KunitStatus(Enum):
SUCCESS = auto()
@@ -43,6 +31,44 @@ class KunitStatus(Enum):
BUILD_FAILURE = auto()
TEST_FAILURE = auto()
+@dataclass
+class KunitResult:
+ status: KunitStatus
+ elapsed_time: float
+
+@dataclass
+class KunitConfigRequest:
+ build_dir: str
+ make_options: Optional[List[str]]
+
+@dataclass
+class KunitBuildRequest(KunitConfigRequest):
+ jobs: int
+
+@dataclass
+class KunitParseRequest:
+ raw_output: Optional[str]
+ json: Optional[str]
+ summary: bool
+ failed: bool
+
+@dataclass
+class KunitExecRequest(KunitParseRequest):
+ build_dir: str
+ timeout: int
+ filter_glob: str
+ filter: str
+ filter_action: Optional[str]
+ kernel_args: Optional[List[str]]
+ run_isolated: Optional[str]
+ list_tests: bool
+ list_tests_attr: bool
+
+@dataclass
+class KunitRequest(KunitExecRequest, KunitBuildRequest):
+ pass
+
+
def get_kernel_root_path() -> str:
path = sys.argv[0] if not __file__ else __file__
parts = os.path.realpath(path).split('tools/testing/kunit')
@@ -52,163 +78,495 @@ def get_kernel_root_path() -> str:
def config_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitConfigRequest) -> KunitResult:
- kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
+ stdout.print_with_timestamp('Configuring KUnit Kernel ...')
config_start = time.time()
success = linux.build_reconfig(request.build_dir, request.make_options)
config_end = time.time()
- if not success:
- return KunitResult(KunitStatus.CONFIG_FAILURE,
- 'could not configure kernel',
- config_end - config_start)
- return KunitResult(KunitStatus.SUCCESS,
- 'configured kernel successfully',
- config_end - config_start)
+ status = KunitStatus.SUCCESS if success else KunitStatus.CONFIG_FAILURE
+ return KunitResult(status, config_end - config_start)
def build_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitBuildRequest) -> KunitResult:
- kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
+ stdout.print_with_timestamp('Building KUnit Kernel ...')
build_start = time.time()
- success = linux.build_um_kernel(request.alltests,
- request.jobs,
- request.build_dir,
- request.make_options)
+ success = linux.build_kernel(request.jobs,
+ request.build_dir,
+ request.make_options)
build_end = time.time()
- if not success:
- return KunitResult(KunitStatus.BUILD_FAILURE,
- 'could not build kernel',
- build_end - build_start)
- if not success:
- return KunitResult(KunitStatus.BUILD_FAILURE,
- 'could not build kernel',
- build_end - build_start)
- return KunitResult(KunitStatus.SUCCESS,
- 'built kernel successfully',
- build_end - build_start)
-
-def exec_tests(linux: kunit_kernel.LinuxSourceTree,
- request: KunitExecRequest) -> KunitResult:
- kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
- test_start = time.time()
- result = linux.run_kernel(
- timeout=None if request.alltests else request.timeout,
- build_dir=request.build_dir)
-
- test_end = time.time()
-
- return KunitResult(KunitStatus.SUCCESS,
- result,
- test_end - test_start)
-
-def parse_tests(request: KunitParseRequest) -> KunitResult:
- parse_start = time.time()
+ status = KunitStatus.SUCCESS if success else KunitStatus.BUILD_FAILURE
+ return KunitResult(status, build_end - build_start)
+
+def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitBuildRequest) -> KunitResult:
+ config_result = config_tests(linux, request)
+ if config_result.status != KunitStatus.SUCCESS:
+ return config_result
- test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
- [],
- 'Tests not Parsed.')
+ return build_tests(linux, request)
+
+def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]:
+ args = ['kunit.action=list']
+
+ if request.kernel_args:
+ args.extend(request.kernel_args)
+
+ output = linux.run_kernel(args=args,
+ timeout=request.timeout,
+ filter_glob=request.filter_glob,
+ filter=request.filter,
+ filter_action=request.filter_action,
+ build_dir=request.build_dir)
+ lines = kunit_parser.extract_tap_lines(output)
+ # Hack! Drop the dummy TAP version header that the executor prints out.
+ lines.pop()
+
+ # Filter out any extraneous non-test output that might have gotten mixed in.
+ return [l for l in output if re.match(r'^[^\s.]+\.[^\s.]+$', l)]
+
+def _list_tests_attr(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> Iterable[str]:
+ args = ['kunit.action=list_attr']
+
+ if request.kernel_args:
+ args.extend(request.kernel_args)
+
+ output = linux.run_kernel(args=args,
+ timeout=request.timeout,
+ filter_glob=request.filter_glob,
+ filter=request.filter,
+ filter_action=request.filter_action,
+ build_dir=request.build_dir)
+ lines = kunit_parser.extract_tap_lines(output)
+ # Hack! Drop the dummy TAP version header that the executor prints out.
+ lines.pop()
+
+ # Filter out any extraneous non-test output that might have gotten mixed in.
+ return lines
+
+def _suites_from_test_list(tests: List[str]) -> List[str]:
+ """Extracts all the suites from an ordered list of tests."""
+ suites = [] # type: List[str]
+ for t in tests:
+ parts = t.split('.', maxsplit=2)
+ if len(parts) != 2:
+ raise ValueError(f'internal KUnit error, test name should be of the form "<suite>.<test>", got "{t}"')
+ suite, _ = parts
+ if not suites or suites[-1] != suite:
+ suites.append(suite)
+ return suites
+
+def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult:
+ filter_globs = [request.filter_glob]
+ if request.list_tests:
+ output = _list_tests(linux, request)
+ for line in output:
+ print(line.rstrip())
+ return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0)
+ if request.list_tests_attr:
+ attr_output = _list_tests_attr(linux, request)
+ for line in attr_output:
+ print(line.rstrip())
+ return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0)
+ if request.run_isolated:
+ tests = _list_tests(linux, request)
+ if request.run_isolated == 'test':
+ filter_globs = tests
+ elif request.run_isolated == 'suite':
+ filter_globs = _suites_from_test_list(tests)
+ # Apply the test-part of the user's glob, if present.
+ if '.' in request.filter_glob:
+ test_glob = request.filter_glob.split('.', maxsplit=2)[1]
+ filter_globs = [g + '.'+ test_glob for g in filter_globs]
+
+ metadata = kunit_json.Metadata(arch=linux.arch(), build_dir=request.build_dir, def_config='kunit_defconfig')
+
+ test_counts = kunit_parser.TestCounts()
+ exec_time = 0.0
+ for i, filter_glob in enumerate(filter_globs):
+ stdout.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs)))
+
+ test_start = time.time()
+ run_result = linux.run_kernel(
+ args=request.kernel_args,
+ timeout=request.timeout,
+ filter_glob=filter_glob,
+ filter=request.filter,
+ filter_action=request.filter_action,
+ build_dir=request.build_dir)
+
+ _, test_result = parse_tests(request, metadata, run_result)
+ # run_kernel() doesn't block on the kernel exiting.
+ # That only happens after we get the last line of output from `run_result`.
+ # So exec_time here actually contains parsing + execution time, which is fine.
+ test_end = time.time()
+ exec_time += test_end - test_start
+
+ test_counts.add_subtest_counts(test_result.counts)
+
+ if len(filter_globs) == 1 and test_counts.crashed > 0:
+ bd = request.build_dir
+ print('The kernel seems to have crashed; you can decode the stack traces with:')
+ print('$ scripts/decode_stacktrace.sh {}/vmlinux {} < {} | tee {}/decoded.log | {} parse'.format(
+ bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0]))
+
+ kunit_status = _map_to_overall_status(test_counts.get_status())
+ return KunitResult(status=kunit_status, elapsed_time=exec_time)
+
+def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
+ if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED):
+ return KunitStatus.SUCCESS
+ return KunitStatus.TEST_FAILURE
+
+def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]:
+ parse_start = time.time()
if request.raw_output:
- kunit_parser.raw_output(request.input_data)
- else:
- test_result = kunit_parser.parse_run_tests(request.input_data)
- parse_end = time.time()
+ # Treat unparsed results as one passing test.
+ fake_test = kunit_parser.Test()
+ fake_test.status = kunit_parser.TestStatus.SUCCESS
+ fake_test.counts.passed = 1
+
+ output: Iterable[str] = input_data
+ if request.raw_output == 'all' or request.raw_output == 'full':
+ pass
+ elif request.raw_output == 'kunit':
+ output = kunit_parser.extract_tap_lines(output)
+ for line in output:
+ print(line.rstrip())
+ parse_time = time.time() - parse_start
+ return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test
+
+ default_printer = stdout
+ if request.summary or request.failed:
+ default_printer = null_printer
+
+ # Actually parse the test results.
+ test = kunit_parser.parse_run_tests(input_data, default_printer)
+ parse_time = time.time() - parse_start
+
+ if request.failed:
+ kunit_parser.print_test(test, request.failed, stdout)
+ kunit_parser.print_summary_line(test, stdout)
if request.json:
- json_obj = kunit_json.get_json_result(
- test_result=test_result,
- def_config='kunit_defconfig',
- build_dir=request.build_dir,
- json_path=request.json)
+ json_str = kunit_json.get_json_result(
+ test=test,
+ metadata=metadata)
if request.json == 'stdout':
- print(json_obj)
-
- if test_result.status != kunit_parser.TestStatus.SUCCESS:
- return KunitResult(KunitStatus.TEST_FAILURE, test_result,
- parse_end - parse_start)
+ print(json_str)
+ else:
+ with open(request.json, 'w') as f:
+ f.write(json_str)
+ stdout.print_with_timestamp("Test results stored in %s" %
+ os.path.abspath(request.json))
- return KunitResult(KunitStatus.SUCCESS, test_result,
- parse_end - parse_start)
+ if test.status != kunit_parser.TestStatus.SUCCESS:
+ return KunitResult(KunitStatus.TEST_FAILURE, parse_time), test
+ return KunitResult(KunitStatus.SUCCESS, parse_time), test
def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
run_start = time.time()
- config_request = KunitConfigRequest(request.build_dir,
- request.make_options)
- config_result = config_tests(linux, config_request)
+ config_result = config_tests(linux, request)
if config_result.status != KunitStatus.SUCCESS:
return config_result
- build_request = KunitBuildRequest(request.jobs, request.build_dir,
- request.alltests,
- request.make_options)
- build_result = build_tests(linux, build_request)
+ build_result = build_tests(linux, request)
if build_result.status != KunitStatus.SUCCESS:
return build_result
- exec_request = KunitExecRequest(request.timeout, request.build_dir,
- request.alltests)
- exec_result = exec_tests(linux, exec_request)
- if exec_result.status != KunitStatus.SUCCESS:
- return exec_result
-
- parse_request = KunitParseRequest(request.raw_output,
- exec_result.result,
- request.build_dir,
- request.json)
- parse_result = parse_tests(parse_request)
+ exec_result = exec_tests(linux, request)
run_end = time.time()
- kunit_parser.print_with_timestamp((
+ stdout.print_with_timestamp((
'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' +
'building, %.3fs running\n') % (
run_end - run_start,
config_result.elapsed_time,
build_result.elapsed_time,
exec_result.elapsed_time))
- return parse_result
-
-def add_common_opts(parser) -> None:
+ return exec_result
+
+# Problem:
+# $ kunit.py run --json
+# works as one would expect and prints the parsed test results as JSON.
+# $ kunit.py run --json suite_name
+# would *not* pass suite_name as the filter_glob and print as json.
+# argparse will consider it to be another way of writing
+# $ kunit.py run --json=suite_name
+# i.e. it would run all tests, and dump the json to a `suite_name` file.
+# So we hackily automatically rewrite --json => --json=stdout
+pseudo_bool_flag_defaults = {
+ '--json': 'stdout',
+ '--raw_output': 'kunit',
+}
+def massage_argv(argv: Sequence[str]) -> Sequence[str]:
+ def massage_arg(arg: str) -> str:
+ if arg not in pseudo_bool_flag_defaults:
+ return arg
+ return f'{arg}={pseudo_bool_flag_defaults[arg]}'
+ return list(map(massage_arg, argv))
+
+def get_default_jobs() -> int:
+ if sys.version_info >= (3, 13):
+ if (ncpu := os.process_cpu_count()) is not None:
+ return ncpu
+ raise RuntimeError("os.process_cpu_count() returned None")
+ # See https://github.com/python/cpython/blob/b61fece/Lib/os.py#L1175-L1186.
+ if sys.platform != "darwin":
+ return len(os.sched_getaffinity(0))
+ if (ncpu := os.cpu_count()) is not None:
+ return ncpu
+ raise RuntimeError("os.cpu_count() returned None")
+
+def add_common_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
'directory.',
- type=str, default='.kunit', metavar='build_dir')
+ type=str, default='.kunit', metavar='DIR')
parser.add_argument('--make_options',
help='X=Y make option, can be repeated.',
- action='append')
+ action='append', metavar='X=Y')
parser.add_argument('--alltests',
- help='Run all KUnit tests through allyesconfig',
+ help='Run all KUnit tests via tools/testing/kunit/configs/all_tests.config',
action='store_true')
-
-def add_build_opts(parser) -> None:
+ parser.add_argument('--kunitconfig',
+ help='Path to Kconfig fragment that enables KUnit tests.'
+ ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
+ 'will get automatically appended. If repeated, the files '
+ 'blindly concatenated, which might not work in all cases.',
+ action='append', metavar='PATHS')
+ parser.add_argument('--kconfig_add',
+ help='Additional Kconfig options to append to the '
+ '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.',
+ action='append', metavar='CONFIG_X=Y')
+
+ parser.add_argument('--arch',
+ help=('Specifies the architecture to run tests under. '
+ 'The architecture specified here must match the '
+ 'string passed to the ARCH make param, '
+ 'e.g. i386, x86_64, arm, um, etc. Non-UML '
+ 'architectures run on QEMU.'),
+ type=str, default='um', metavar='ARCH')
+
+ parser.add_argument('--cross_compile',
+ help=('Sets make\'s CROSS_COMPILE variable; it should '
+ 'be set to a toolchain path prefix (the prefix '
+ 'of gcc and other tools in your toolchain, for '
+ 'example `sparc64-linux-gnu-` if you have the '
+ 'sparc toolchain installed on your system, or '
+ '`$HOME/toolchains/microblaze/gcc-9.2.0-nolibc/microblaze-linux/bin/microblaze-linux-` '
+ 'if you have downloaded the microblaze toolchain '
+ 'from the 0-day website to a directory in your '
+ 'home directory called `toolchains`).'),
+ metavar='PREFIX')
+
+ parser.add_argument('--qemu_config',
+ help=('Takes a path to a path to a file containing '
+ 'a QemuArchParams object.'),
+ type=str, metavar='FILE')
+
+ parser.add_argument('--qemu_args',
+ help='Additional QEMU arguments, e.g. "-smp 8"',
+ action='append', metavar='')
+
+def add_build_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--jobs',
help='As in the make command, "Specifies the number of '
'jobs (commands) to run simultaneously."',
- type=int, default=8, metavar='jobs')
+ type=int, default=get_default_jobs(), metavar='N')
-def add_exec_opts(parser) -> None:
+def add_exec_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--timeout',
help='maximum number of seconds to allow for all tests '
'to run. This does not include time taken to build the '
'tests.',
type=int,
default=300,
- metavar='timeout')
-
-def add_parse_opts(parser) -> None:
- parser.add_argument('--raw_output', help='don\'t format output from kernel',
+ metavar='SECONDS')
+ parser.add_argument('filter_glob',
+ help='Filter which KUnit test suites/tests run at '
+ 'boot-time, e.g. list* or list*.*del_test',
+ type=str,
+ nargs='?',
+ default='',
+ metavar='filter_glob')
+ parser.add_argument('--filter',
+ help='Filter KUnit tests with attributes, '
+ 'e.g. module=example or speed>slow',
+ type=str,
+ default='')
+ parser.add_argument('--filter_action',
+ help='If set to skip, filtered tests will be skipped, '
+ 'e.g. --filter_action=skip. Otherwise they will not run.',
+ type=str,
+ choices=['skip'])
+ parser.add_argument('--kernel_args',
+ help='Kernel command-line parameters. Maybe be repeated',
+ action='append', metavar='')
+ parser.add_argument('--run_isolated', help='If set, boot the kernel for each '
+ 'individual suite/test. This is can be useful for debugging '
+ 'a non-hermetic test, one that might pass/fail based on '
+ 'what ran before it.',
+ type=str,
+ choices=['suite', 'test'])
+ parser.add_argument('--list_tests', help='If set, list all tests that will be '
+ 'run.',
+ action='store_true')
+ parser.add_argument('--list_tests_attr', help='If set, list all tests and test '
+ 'attributes.',
action='store_true')
+
+def add_parse_opts(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. '
+ 'By default, filters to just KUnit output. Use '
+ '--raw_output=all to show everything',
+ type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit'])
parser.add_argument('--json',
nargs='?',
- help='Stores test results in a JSON, and either '
- 'prints to stdout or saves to file if a '
- 'filename is specified',
- type=str, const='stdout', default=None)
+ help='Prints parsed test results as JSON to stdout or a file if '
+ 'a filename is specified. Does nothing if --raw_output is set.',
+ type=str, const='stdout', default=None, metavar='FILE')
+ parser.add_argument('--summary',
+ help='Prints only the summary line for parsed test results.'
+ 'Does nothing if --raw_output is set.',
+ action='store_true')
+ parser.add_argument('--failed',
+ help='Prints only the failed parsed test results and summary line.'
+ 'Does nothing if --raw_output is set.',
+ action='store_true')
+
-def main(argv, linux=None):
+def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree:
+ """Returns a LinuxSourceTree based on the user's arguments."""
+ # Allow users to specify multiple arguments in one string, e.g. '-smp 8'
+ qemu_args: List[str] = []
+ if cli_args.qemu_args:
+ for arg in cli_args.qemu_args:
+ qemu_args.extend(shlex.split(arg))
+
+ kunitconfigs = cli_args.kunitconfig if cli_args.kunitconfig else []
+ if cli_args.alltests:
+ # Prepend so user-specified options take prio if we ever allow
+ # --kunitconfig options to have differing options.
+ kunitconfigs = [kunit_kernel.ALL_TESTS_CONFIG_PATH] + kunitconfigs
+
+ return kunit_kernel.LinuxSourceTree(cli_args.build_dir,
+ kunitconfig_paths=kunitconfigs,
+ kconfig_add=cli_args.kconfig_add,
+ arch=cli_args.arch,
+ cross_compile=cli_args.cross_compile,
+ qemu_config_path=cli_args.qemu_config,
+ extra_qemu_args=qemu_args)
+
+
+def run_handler(cli_args: argparse.Namespace) -> None:
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
+
+ linux = tree_from_args(cli_args)
+ request = KunitRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options,
+ jobs=cli_args.jobs,
+ raw_output=cli_args.raw_output,
+ json=cli_args.json,
+ summary=cli_args.summary,
+ failed=cli_args.failed,
+ timeout=cli_args.timeout,
+ filter_glob=cli_args.filter_glob,
+ filter=cli_args.filter,
+ filter_action=cli_args.filter_action,
+ kernel_args=cli_args.kernel_args,
+ run_isolated=cli_args.run_isolated,
+ list_tests=cli_args.list_tests,
+ list_tests_attr=cli_args.list_tests_attr)
+ result = run_tests(linux, request)
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+
+
+def config_handler(cli_args: argparse.Namespace) -> None:
+ if cli_args.build_dir and (
+ not os.path.exists(cli_args.build_dir)):
+ os.mkdir(cli_args.build_dir)
+
+ linux = tree_from_args(cli_args)
+ request = KunitConfigRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options)
+ result = config_tests(linux, request)
+ stdout.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+
+
+def build_handler(cli_args: argparse.Namespace) -> None:
+ linux = tree_from_args(cli_args)
+ request = KunitBuildRequest(build_dir=cli_args.build_dir,
+ make_options=cli_args.make_options,
+ jobs=cli_args.jobs)
+ result = config_and_build_tests(linux, request)
+ stdout.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+
+
+def exec_handler(cli_args: argparse.Namespace) -> None:
+ linux = tree_from_args(cli_args)
+ exec_request = KunitExecRequest(raw_output=cli_args.raw_output,
+ build_dir=cli_args.build_dir,
+ json=cli_args.json,
+ summary=cli_args.summary,
+ failed=cli_args.failed,
+ timeout=cli_args.timeout,
+ filter_glob=cli_args.filter_glob,
+ filter=cli_args.filter,
+ filter_action=cli_args.filter_action,
+ kernel_args=cli_args.kernel_args,
+ run_isolated=cli_args.run_isolated,
+ list_tests=cli_args.list_tests,
+ list_tests_attr=cli_args.list_tests_attr)
+ result = exec_tests(linux, exec_request)
+ stdout.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+
+
+def parse_handler(cli_args: argparse.Namespace) -> None:
+ if cli_args.file is None:
+ sys.stdin.reconfigure(errors='backslashreplace') # type: ignore
+ kunit_output = sys.stdin # type: Iterable[str]
+ else:
+ with open(cli_args.file, 'r', errors='backslashreplace') as f:
+ kunit_output = f.read().splitlines()
+ # We know nothing about how the result was created!
+ metadata = kunit_json.Metadata()
+ request = KunitParseRequest(raw_output=cli_args.raw_output,
+ json=cli_args.json, summary=cli_args.summary,
+ failed=cli_args.failed)
+ result, _ = parse_tests(request, metadata, kunit_output)
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+
+
+subcommand_handlers_map = {
+ 'run': run_handler,
+ 'config': config_handler,
+ 'build': build_handler,
+ 'exec': exec_handler,
+ 'parse': parse_handler
+}
+
+
+def main(argv: Sequence[str]) -> None:
parser = argparse.ArgumentParser(
description='Helps writing and running KUnit tests.')
subparser = parser.add_subparsers(dest='subcommand')
@@ -246,91 +604,19 @@ def main(argv, linux=None):
help='Specifies the file to read results from.',
type=str, nargs='?', metavar='input_file')
- cli_args = parser.parse_args(argv)
+ cli_args = parser.parse_args(massage_argv(argv))
if get_kernel_root_path():
os.chdir(get_kernel_root_path())
- if cli_args.subcommand == 'run':
- if not os.path.exists(cli_args.build_dir):
- os.mkdir(cli_args.build_dir)
-
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
-
- request = KunitRequest(cli_args.raw_output,
- cli_args.timeout,
- cli_args.jobs,
- cli_args.build_dir,
- cli_args.alltests,
- cli_args.json,
- cli_args.make_options)
- result = run_tests(linux, request)
- if result.status != KunitStatus.SUCCESS:
- sys.exit(1)
- elif cli_args.subcommand == 'config':
- if cli_args.build_dir and (
- not os.path.exists(cli_args.build_dir)):
- os.mkdir(cli_args.build_dir)
-
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
-
- request = KunitConfigRequest(cli_args.build_dir,
- cli_args.make_options)
- result = config_tests(linux, request)
- kunit_parser.print_with_timestamp((
- 'Elapsed time: %.3fs\n') % (
- result.elapsed_time))
- if result.status != KunitStatus.SUCCESS:
- sys.exit(1)
- elif cli_args.subcommand == 'build':
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
-
- request = KunitBuildRequest(cli_args.jobs,
- cli_args.build_dir,
- cli_args.alltests,
- cli_args.make_options)
- result = build_tests(linux, request)
- kunit_parser.print_with_timestamp((
- 'Elapsed time: %.3fs\n') % (
- result.elapsed_time))
- if result.status != KunitStatus.SUCCESS:
- sys.exit(1)
- elif cli_args.subcommand == 'exec':
- if not linux:
- linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir)
-
- exec_request = KunitExecRequest(cli_args.timeout,
- cli_args.build_dir,
- cli_args.alltests)
- exec_result = exec_tests(linux, exec_request)
- parse_request = KunitParseRequest(cli_args.raw_output,
- exec_result.result,
- cli_args.build_dir,
- cli_args.json)
- result = parse_tests(parse_request)
- kunit_parser.print_with_timestamp((
- 'Elapsed time: %.3fs\n') % (
- exec_result.elapsed_time))
- if result.status != KunitStatus.SUCCESS:
- sys.exit(1)
- elif cli_args.subcommand == 'parse':
- if cli_args.file == None:
- kunit_output = sys.stdin
- else:
- with open(cli_args.file, 'r') as f:
- kunit_output = f.read().splitlines()
- request = KunitParseRequest(cli_args.raw_output,
- kunit_output,
- None,
- cli_args.json)
- result = parse_tests(request)
- if result.status != KunitStatus.SUCCESS:
- sys.exit(1)
- else:
+ subcomand_handler = subcommand_handlers_map.get(cli_args.subcommand, None)
+
+ if subcomand_handler is None:
parser.print_help()
+ return
+
+ subcomand_handler(cli_args)
+
if __name__ == '__main__':
main(sys.argv[1:])
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index bdd60230764b..eb5dd01210b1 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -6,85 +6,103 @@
# Author: Felix Guo <felixguoxiuping@gmail.com>
# Author: Brendan Higgins <brendanhiggins@google.com>
-import collections
+from dataclasses import dataclass
import re
-from typing import List, Set
+from typing import Any, Dict, Iterable, List, Tuple
CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
-
-class KconfigEntry(KconfigEntryBase):
+@dataclass(frozen=True)
+class KconfigEntry:
+ name: str
+ value: str
def __str__(self) -> str:
if self.value == 'n':
- return r'# CONFIG_%s is not set' % (self.name)
- else:
- return r'CONFIG_%s=%s' % (self.name, self.value)
+ return f'# CONFIG_{self.name} is not set'
+ return f'CONFIG_{self.name}={self.value}'
class KconfigParseError(Exception):
"""Error parsing Kconfig defconfig or .config."""
-class Kconfig(object):
+class Kconfig:
"""Represents defconfig or .config specified using the Kconfig language."""
def __init__(self) -> None:
- self._entries = [] # type: List[KconfigEntry]
+ self._entries = {} # type: Dict[str, str]
+
+ def __eq__(self, other: Any) -> bool:
+ if not isinstance(other, self.__class__):
+ return False
+ return self._entries == other._entries
+
+ def __repr__(self) -> str:
+ return ','.join(str(e) for e in self.as_entries())
- def entries(self) -> Set[KconfigEntry]:
- return set(self._entries)
+ def as_entries(self) -> Iterable[KconfigEntry]:
+ for name, value in self._entries.items():
+ yield KconfigEntry(name, value)
- def add_entry(self, entry: KconfigEntry) -> None:
- self._entries.append(entry)
+ def add_entry(self, name: str, value: str) -> None:
+ self._entries[name] = value
def is_subset_of(self, other: 'Kconfig') -> bool:
- for a in self.entries():
- found = False
- for b in other.entries():
- if a.name != b.name:
+ for name, value in self._entries.items():
+ b = other._entries.get(name)
+ if b is None:
+ if value == 'n':
continue
- if a.value != b.value:
- return False
- found = True
- if a.value != 'n' and found == False:
+ return False
+ if value != b:
return False
return True
+ def conflicting_options(self, other: 'Kconfig') -> List[Tuple[KconfigEntry, KconfigEntry]]:
+ diff = [] # type: List[Tuple[KconfigEntry, KconfigEntry]]
+ for name, value in self._entries.items():
+ b = other._entries.get(name)
+ if b and value != b:
+ pair = (KconfigEntry(name, value), KconfigEntry(name, b))
+ diff.append(pair)
+ return diff
+
+ def merge_in_entries(self, other: 'Kconfig') -> None:
+ for name, value in other._entries.items():
+ self._entries[name] = value
+
def write_to_file(self, path: str) -> None:
- with open(path, 'w') as f:
- for entry in self.entries():
- f.write(str(entry) + '\n')
-
- def parse_from_string(self, blob: str) -> None:
- """Parses a string containing KconfigEntrys and populates this Kconfig."""
- self._entries = []
- is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
- config_matcher = re.compile(CONFIG_PATTERN)
- for line in blob.split('\n'):
- line = line.strip()
- if not line:
- continue
-
- match = config_matcher.match(line)
- if match:
- entry = KconfigEntry(match.group(1), match.group(2))
- self.add_entry(entry)
- continue
-
- empty_match = is_not_set_matcher.match(line)
- if empty_match:
- entry = KconfigEntry(empty_match.group(1), 'n')
- self.add_entry(entry)
- continue
-
- if line[0] == '#':
- continue
- else:
- raise KconfigParseError('Failed to parse: ' + line)
-
- def read_from_file(self, path: str) -> None:
- with open(path, 'r') as f:
- self.parse_from_string(f.read())
+ with open(path, 'a+') as f:
+ for e in self.as_entries():
+ f.write(str(e) + '\n')
+
+def parse_file(path: str) -> Kconfig:
+ with open(path, 'r') as f:
+ return parse_from_string(f.read())
+
+def parse_from_string(blob: str) -> Kconfig:
+ """Parses a string containing Kconfig entries."""
+ kconfig = Kconfig()
+ is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
+ config_matcher = re.compile(CONFIG_PATTERN)
+ for line in blob.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+
+ match = config_matcher.match(line)
+ if match:
+ kconfig.add_entry(match.group(1), match.group(2))
+ continue
+
+ empty_match = is_not_set_matcher.match(line)
+ if empty_match:
+ kconfig.add_entry(empty_match.group(1), 'n')
+ continue
+
+ if line[0] == '#':
+ continue
+ raise KconfigParseError('Failed to parse: ' + line)
+ return kconfig
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index f5cca5c38cac..80fa4e354a17 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -6,58 +6,68 @@
# Copyright (C) 2020, Google LLC.
# Author: Heidi Fahim <heidifahim@google.com>
+from dataclasses import dataclass
import json
-import os
-
-import kunit_parser
-
-from kunit_parser import TestStatus
-
-def get_json_result(test_result, def_config, build_dir, json_path) -> str:
- sub_groups = []
-
- # Each test suite is mapped to a KernelCI sub_group
- for test_suite in test_result.suites:
- sub_group = {
- "name": test_suite.name,
- "arch": "UM",
- "defconfig": def_config,
- "build_environment": build_dir,
- "test_cases": [],
- "lab_name": None,
- "kernel": None,
- "job": None,
- "git_branch": "kselftest",
- }
- test_cases = []
- # TODO: Add attachments attribute in test_case with detailed
- # failure message, see https://api.kernelci.org/schema-test-case.html#get
- for case in test_suite.cases:
- test_case = {"name": case.name, "status": "FAIL"}
- if case.status == TestStatus.SUCCESS:
- test_case["status"] = "PASS"
- elif case.status == TestStatus.TEST_CRASHED:
- test_case["status"] = "ERROR"
- test_cases.append(test_case)
- sub_group["test_cases"] = test_cases
- sub_groups.append(sub_group)
+from typing import Any, Dict
+
+from kunit_parser import Test, TestStatus
+
+@dataclass
+class Metadata:
+ """Stores metadata about this run to include in get_json_result()."""
+ arch: str = ''
+ def_config: str = ''
+ build_dir: str = ''
+
+JsonObj = Dict[str, Any]
+
+_status_map: Dict[TestStatus, str] = {
+ TestStatus.SUCCESS: "PASS",
+ TestStatus.SKIPPED: "SKIP",
+ TestStatus.TEST_CRASHED: "ERROR",
+}
+
+def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj:
+ sub_groups = [] # List[JsonObj]
+ test_cases = [] # List[JsonObj]
+
+ for subtest in test.subtests:
+ if subtest.subtests:
+ sub_group = _get_group_json(subtest, common_fields)
+ sub_groups.append(sub_group)
+ continue
+ status = _status_map.get(subtest.status, "FAIL")
+ test_cases.append({"name": subtest.name, "status": status})
+
+ test_counts = test.counts
+ counts_json = {
+ "tests": test_counts.total(),
+ "passed": test_counts.passed,
+ "failed": test_counts.failed,
+ "crashed": test_counts.crashed,
+ "skipped": test_counts.skipped,
+ "errors": test_counts.errors,
+ }
test_group = {
- "name": "KUnit Test Group",
- "arch": "UM",
- "defconfig": def_config,
- "build_environment": build_dir,
+ "name": test.name,
"sub_groups": sub_groups,
+ "test_cases": test_cases,
+ "misc": counts_json
+ }
+ test_group.update(common_fields)
+ return test_group
+
+def get_json_result(test: Test, metadata: Metadata) -> str:
+ common_fields = {
+ "arch": metadata.arch,
+ "defconfig": metadata.def_config,
+ "build_environment": metadata.build_dir,
"lab_name": None,
"kernel": None,
"job": None,
"git_branch": "kselftest",
}
- json_obj = json.dumps(test_group, indent=4)
- if json_path != 'stdout':
- with open(json_path, 'w') as result_path:
- result_path.write(json_obj)
- root = __file__.split('tools/testing/kunit/')[0]
- kunit_parser.print_with_timestamp(
- "Test results stored in %s" %
- os.path.join(root, result_path.name))
- return json_obj
+
+ test_group = _get_group_json(test, common_fields)
+ test_group["name"] = "KUnit Test Group"
+ return json.dumps(test_group, indent=4)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 2076a5a2d060..260d8d9aa1db 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -6,28 +6,31 @@
# Author: Felix Guo <felixguoxiuping@gmail.com>
# Author: Brendan Higgins <brendanhiggins@google.com>
+import importlib.abc
+import importlib.util
import logging
import subprocess
import os
+import shlex
import shutil
import signal
-from typing import Iterator
-
-from contextlib import ExitStack
+import sys
+import threading
+from typing import Iterator, List, Optional, Tuple
+from types import FrameType
import kunit_config
-import kunit_parser
+import qemu_config
KCONFIG_PATH = '.config'
KUNITCONFIG_PATH = '.kunitconfig'
-DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
-BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
+OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig'
+DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config'
+ALL_TESTS_CONFIG_PATH = 'tools/testing/kunit/configs/all_tests.config'
+UML_KCONFIG_PATH = 'tools/testing/kunit/configs/arch_uml.config'
OUTFILE_PATH = 'test.log'
-
-def get_file_path(build_dir, default):
- if build_dir:
- default = os.path.join(build_dir, default)
- return default
+ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
+QEMU_CONFIGS_DIR = os.path.join(ABS_TOOL_PATH, 'qemu_configs')
class ConfigError(Exception):
"""Represents an error trying to configure the Linux kernel."""
@@ -37,9 +40,13 @@ class BuildError(Exception):
"""Represents an error trying to build the Linux kernel."""
-class LinuxSourceTreeOperations(object):
+class LinuxSourceTreeOperations:
"""An abstraction over command line operations performed on a source tree."""
+ def __init__(self, linux_arch: str, cross_compile: Optional[str]):
+ self._linux_arch = linux_arch
+ self._cross_compile = cross_compile
+
def make_mrproper(self) -> None:
try:
subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT)
@@ -48,12 +55,16 @@ class LinuxSourceTreeOperations(object):
except subprocess.CalledProcessError as e:
raise ConfigError(e.output.decode())
- def make_olddefconfig(self, build_dir, make_options) -> None:
- command = ['make', 'ARCH=um', 'olddefconfig']
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
+ return base_kunitconfig
+
+ def make_olddefconfig(self, build_dir: str, make_options: Optional[List[str]]) -> None:
+ command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, 'olddefconfig']
+ if self._cross_compile:
+ command += ['CROSS_COMPILE=' + self._cross_compile]
if make_options:
command.extend(make_options)
- if build_dir:
- command += ['O=' + build_dir]
+ print('Populating config with:\n$', ' '.join(command))
try:
subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
@@ -61,82 +72,202 @@ class LinuxSourceTreeOperations(object):
except subprocess.CalledProcessError as e:
raise ConfigError(e.output.decode())
- def make_allyesconfig(self, build_dir, make_options) -> None:
- kunit_parser.print_with_timestamp(
- 'Enabling all CONFIGs for UML...')
- command = ['make', 'ARCH=um', 'allyesconfig']
+ def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None:
+ command = ['make', 'all', 'compile_commands.json', 'scripts_gdb',
+ 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)]
if make_options:
command.extend(make_options)
- if build_dir:
- command += ['O=' + build_dir]
- process = subprocess.Popen(
- command,
- stdout=subprocess.DEVNULL,
- stderr=subprocess.STDOUT)
- process.wait()
- kunit_parser.print_with_timestamp(
- 'Disabling broken configs to run KUnit tests...')
- with ExitStack() as es:
- config = open(get_kconfig_path(build_dir), 'a')
- disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
- config.write(disable)
- kunit_parser.print_with_timestamp(
- 'Starting Kernel with all configs takes a few minutes...')
-
- def make(self, jobs, build_dir, make_options) -> None:
- command = ['make', 'ARCH=um', '--jobs=' + str(jobs)]
- if make_options:
- command.extend(make_options)
- if build_dir:
- command += ['O=' + build_dir]
+ if self._cross_compile:
+ command += ['CROSS_COMPILE=' + self._cross_compile]
+ print('Building with:\n$', ' '.join(command))
try:
proc = subprocess.Popen(command,
stderr=subprocess.PIPE,
stdout=subprocess.DEVNULL)
except OSError as e:
- raise BuildError('Could not call make command: ' + str(e))
+ raise BuildError('Could not call execute make: ' + str(e))
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
_, stderr = proc.communicate()
if proc.returncode != 0:
raise BuildError(stderr.decode())
if stderr: # likely only due to build warnings
print(stderr.decode())
- def linux_bin(self, params, timeout, build_dir) -> None:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
+ raise RuntimeError('not implemented!')
+
+
+class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
+
+ def __init__(self, qemu_arch_params: qemu_config.QemuArchParams, cross_compile: Optional[str]):
+ super().__init__(linux_arch=qemu_arch_params.linux_arch,
+ cross_compile=cross_compile)
+ self._kconfig = qemu_arch_params.kconfig
+ self._qemu_arch = qemu_arch_params.qemu_arch
+ self._kernel_path = qemu_arch_params.kernel_path
+ self._kernel_command_line = qemu_arch_params.kernel_command_line
+ if 'kunit_shutdown=' not in self._kernel_command_line:
+ self._kernel_command_line += ' kunit_shutdown=reboot'
+ self._extra_qemu_params = qemu_arch_params.extra_qemu_params
+ self._serial = qemu_arch_params.serial
+
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
+ kconfig = kunit_config.parse_from_string(self._kconfig)
+ kconfig.merge_in_entries(base_kunitconfig)
+ return kconfig
+
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
+ kernel_path = os.path.join(build_dir, self._kernel_path)
+ qemu_command = ['qemu-system-' + self._qemu_arch,
+ '-nodefaults',
+ '-m', '1024',
+ '-kernel', kernel_path,
+ '-append', ' '.join(params + [self._kernel_command_line]),
+ '-no-reboot',
+ '-nographic',
+ '-accel', 'kvm',
+ '-accel', 'hvf',
+ '-accel', 'tcg',
+ '-serial', self._serial] + self._extra_qemu_params
+ # Note: shlex.join() does what we want, but requires python 3.8+.
+ print('Running tests with:\n$', ' '.join(shlex.quote(arg) for arg in qemu_command))
+ return subprocess.Popen(qemu_command,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True, errors='backslashreplace')
+
+class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
+ """An abstraction over command line operations performed on a source tree."""
+
+ def __init__(self, cross_compile: Optional[str]=None):
+ super().__init__(linux_arch='um', cross_compile=cross_compile)
+
+ def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig:
+ kconfig = kunit_config.parse_file(UML_KCONFIG_PATH)
+ kconfig.merge_in_entries(base_kunitconfig)
+ return kconfig
+
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
"""Runs the Linux UML binary. Must be named 'linux'."""
- linux_bin = get_file_path(build_dir, 'linux')
- outfile = get_outfile_path(build_dir)
- with open(outfile, 'w') as output:
- process = subprocess.Popen([linux_bin] + params,
- stdout=output,
- stderr=subprocess.STDOUT)
- process.wait(timeout)
+ linux_bin = os.path.join(build_dir, 'linux')
+ params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
+ print('Running tests with:\n$', linux_bin, ' '.join(shlex.quote(arg) for arg in params))
+ return subprocess.Popen([linux_bin] + params,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True, errors='backslashreplace')
-def get_kconfig_path(build_dir) -> str:
- return get_file_path(build_dir, KCONFIG_PATH)
+def get_kconfig_path(build_dir: str) -> str:
+ return os.path.join(build_dir, KCONFIG_PATH)
-def get_kunitconfig_path(build_dir) -> str:
- return get_file_path(build_dir, KUNITCONFIG_PATH)
+def get_kunitconfig_path(build_dir: str) -> str:
+ return os.path.join(build_dir, KUNITCONFIG_PATH)
-def get_outfile_path(build_dir) -> str:
- return get_file_path(build_dir, OUTFILE_PATH)
+def get_old_kunitconfig_path(build_dir: str) -> str:
+ return os.path.join(build_dir, OLD_KUNITCONFIG_PATH)
-class LinuxSourceTree(object):
- """Represents a Linux kernel source tree with KUnit tests."""
+def get_parsed_kunitconfig(build_dir: str,
+ kunitconfig_paths: Optional[List[str]]=None) -> kunit_config.Kconfig:
+ if not kunitconfig_paths:
+ path = get_kunitconfig_path(build_dir)
+ if not os.path.exists(path):
+ shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, path)
+ return kunit_config.parse_file(path)
- def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None:
- signal.signal(signal.SIGINT, self.signal_handler)
+ merged = kunit_config.Kconfig()
+
+ for path in kunitconfig_paths:
+ if os.path.isdir(path):
+ path = os.path.join(path, KUNITCONFIG_PATH)
+ if not os.path.exists(path):
+ raise ConfigError(f'Specified kunitconfig ({path}) does not exist')
+
+ partial = kunit_config.parse_file(path)
+ diff = merged.conflicting_options(partial)
+ if diff:
+ diff_str = '\n\n'.join(f'{a}\n vs from {path}\n{b}' for a, b in diff)
+ raise ConfigError(f'Multiple values specified for {len(diff)} options in kunitconfig:\n{diff_str}')
+ merged.merge_in_entries(partial)
+ return merged
+
+def get_outfile_path(build_dir: str) -> str:
+ return os.path.join(build_dir, OUTFILE_PATH)
+
+def _default_qemu_config_path(arch: str) -> str:
+ config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py')
+ if os.path.isfile(config_path):
+ return config_path
+
+ options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
+
+ if arch == 'help':
+ print('um')
+ for option in options:
+ print(option)
+ sys.exit()
- self._ops = LinuxSourceTreeOperations()
+ raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options)))
- if not load_config:
- return
+def _get_qemu_ops(config_path: str,
+ extra_qemu_args: Optional[List[str]],
+ cross_compile: Optional[str]) -> Tuple[str, LinuxSourceTreeOperations]:
+ # The module name/path has very little to do with where the actual file
+ # exists (I learned this through experimentation and could not find it
+ # anywhere in the Python documentation).
+ #
+ # Bascially, we completely ignore the actual file location of the config
+ # we are loading and just tell Python that the module lives in the
+ # QEMU_CONFIGS_DIR for import purposes regardless of where it actually
+ # exists as a file.
+ module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path))
+ spec = importlib.util.spec_from_file_location(module_path, config_path)
+ assert spec is not None
+ config = importlib.util.module_from_spec(spec)
+ # See https://github.com/python/typeshed/pull/2626 for context.
+ assert isinstance(spec.loader, importlib.abc.Loader)
+ spec.loader.exec_module(config)
- kunitconfig_path = get_kunitconfig_path(build_dir)
- if not os.path.exists(kunitconfig_path):
- shutil.copyfile(defconfig, kunitconfig_path)
+ if not hasattr(config, 'QEMU_ARCH'):
+ raise ValueError('qemu_config module missing "QEMU_ARCH": ' + config_path)
+ params: qemu_config.QemuArchParams = config.QEMU_ARCH
+ if extra_qemu_args:
+ params.extra_qemu_params.extend(extra_qemu_args)
+ return params.linux_arch, LinuxSourceTreeOperationsQemu(
+ params, cross_compile=cross_compile)
- self._kconfig = kunit_config.Kconfig()
- self._kconfig.read_from_file(kunitconfig_path)
+class LinuxSourceTree:
+ """Represents a Linux kernel source tree with KUnit tests."""
+
+ def __init__(
+ self,
+ build_dir: str,
+ kunitconfig_paths: Optional[List[str]]=None,
+ kconfig_add: Optional[List[str]]=None,
+ arch: Optional[str]=None,
+ cross_compile: Optional[str]=None,
+ qemu_config_path: Optional[str]=None,
+ extra_qemu_args: Optional[List[str]]=None) -> None:
+ signal.signal(signal.SIGINT, self.signal_handler)
+ if qemu_config_path:
+ self._arch, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile)
+ else:
+ self._arch = 'um' if arch is None else arch
+ if self._arch == 'um':
+ self._ops = LinuxSourceTreeOperationsUml(cross_compile=cross_compile)
+ else:
+ qemu_config_path = _default_qemu_config_path(self._arch)
+ _, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile)
+
+ self._kconfig = get_parsed_kunitconfig(build_dir, kunitconfig_paths)
+ if kconfig_add:
+ kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add))
+ self._kconfig.merge_in_entries(kconfig)
+
+ def arch(self) -> str:
+ return self._arch
def clean(self) -> bool:
try:
@@ -146,52 +277,67 @@ class LinuxSourceTree(object):
return False
return True
- def validate_config(self, build_dir) -> bool:
+ def validate_config(self, build_dir: str) -> bool:
kconfig_path = get_kconfig_path(build_dir)
- validated_kconfig = kunit_config.Kconfig()
- validated_kconfig.read_from_file(kconfig_path)
- if not self._kconfig.is_subset_of(validated_kconfig):
- invalid = self._kconfig.entries() - validated_kconfig.entries()
- message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \
- 'but not in .config: %s' % (
- ', '.join([str(e) for e in invalid])
- )
- logging.error(message)
- return False
- return True
+ validated_kconfig = kunit_config.parse_file(kconfig_path)
+ if self._kconfig.is_subset_of(validated_kconfig):
+ return True
+ missing = set(self._kconfig.as_entries()) - set(validated_kconfig.as_entries())
+ message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \
+ 'This is probably due to unsatisfied dependencies.\n' \
+ 'Missing: ' + ', '.join(str(e) for e in missing)
+ if self._arch == 'um':
+ message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \
+ 'on a different architecture with something like "--arch=x86_64".'
+ logging.error(message)
+ return False
- def build_config(self, build_dir, make_options) -> bool:
+ def build_config(self, build_dir: str, make_options: Optional[List[str]]) -> bool:
kconfig_path = get_kconfig_path(build_dir)
if build_dir and not os.path.exists(build_dir):
os.mkdir(build_dir)
- self._kconfig.write_to_file(kconfig_path)
try:
+ self._kconfig = self._ops.make_arch_config(self._kconfig)
+ self._kconfig.write_to_file(kconfig_path)
self._ops.make_olddefconfig(build_dir, make_options)
except ConfigError as e:
logging.error(e)
return False
- return self.validate_config(build_dir)
+ if not self.validate_config(build_dir):
+ return False
+
+ old_path = get_old_kunitconfig_path(build_dir)
+ if os.path.exists(old_path):
+ os.remove(old_path) # write_to_file appends to the file
+ self._kconfig.write_to_file(old_path)
+ return True
- def build_reconfig(self, build_dir, make_options) -> bool:
+ def _kunitconfig_changed(self, build_dir: str) -> bool:
+ old_path = get_old_kunitconfig_path(build_dir)
+ if not os.path.exists(old_path):
+ return True
+
+ old_kconfig = kunit_config.parse_file(old_path)
+ return old_kconfig != self._kconfig
+
+ def build_reconfig(self, build_dir: str, make_options: Optional[List[str]]) -> bool:
"""Creates a new .config if it is not a subset of the .kunitconfig."""
kconfig_path = get_kconfig_path(build_dir)
- if os.path.exists(kconfig_path):
- existing_kconfig = kunit_config.Kconfig()
- existing_kconfig.read_from_file(kconfig_path)
- if not self._kconfig.is_subset_of(existing_kconfig):
- print('Regenerating .config ...')
- os.remove(kconfig_path)
- return self.build_config(build_dir, make_options)
- else:
- return True
- else:
+ if not os.path.exists(kconfig_path):
print('Generating .config ...')
return self.build_config(build_dir, make_options)
- def build_um_kernel(self, alltests, jobs, build_dir, make_options) -> bool:
+ existing_kconfig = kunit_config.parse_file(kconfig_path)
+ self._kconfig = self._ops.make_arch_config(self._kconfig)
+
+ if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir):
+ return True
+ print('Regenerating .config ...')
+ os.remove(kconfig_path)
+ return self.build_config(build_dir, make_options)
+
+ def build_kernel(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> bool:
try:
- if alltests:
- self._ops.make_allyesconfig(build_dir, make_options)
self._ops.make_olddefconfig(build_dir, make_options)
self._ops.make(jobs, build_dir, make_options)
except (ConfigError, BuildError) as e:
@@ -199,15 +345,47 @@ class LinuxSourceTree(object):
return False
return self.validate_config(build_dir)
- def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]:
- args.extend(['mem=1G', 'console=tty'])
- self._ops.linux_bin(args, timeout, build_dir)
- outfile = get_outfile_path(build_dir)
- subprocess.call(['stty', 'sane'])
- with open(outfile, 'r') as file:
- for line in file:
+ def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]:
+ if not args:
+ args = []
+ if filter_glob:
+ args.append('kunit.filter_glob=' + filter_glob)
+ if filter:
+ args.append('kunit.filter="' + filter + '"')
+ if filter_action:
+ args.append('kunit.filter_action=' + filter_action)
+ args.append('kunit.enable=1')
+
+ process = self._ops.start(args, build_dir)
+ assert process.stdout is not None # tell mypy it's set
+
+ # Enforce the timeout in a background thread.
+ def _wait_proc() -> None:
+ try:
+ process.wait(timeout=timeout)
+ except Exception as e:
+ print(e)
+ process.terminate()
+ process.wait()
+ waiter = threading.Thread(target=_wait_proc)
+ waiter.start()
+
+ output = open(get_outfile_path(build_dir), 'w')
+ try:
+ # Tee the output to the file and to our caller in real time.
+ for line in process.stdout:
+ output.write(line)
yield line
+ # This runs even if our caller doesn't consume every line.
+ finally:
+ # Flush any leftover output to the file
+ output.write(process.stdout.read())
+ output.close()
+ process.stdout.close()
+
+ waiter.join()
+ subprocess.call(['stty', 'sane'])
- def signal_handler(self, sig, frame) -> None:
+ def signal_handler(self, unused_sig: int, unused_frame: Optional[FrameType]) -> None:
logging.error('Build interruption occurred. Cleaning console.')
subprocess.call(['stty', 'sane'])
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index e8bcc139702e..333cd3a4a56b 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -1,359 +1,864 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Parses test results from a kernel dmesg log.
+# Parses KTAP test results from a kernel dmesg log and incrementally prints
+# results with reader-friendly format. Stores and returns test results in a
+# Test object.
#
# Copyright (C) 2019, Google LLC.
# Author: Felix Guo <felixguoxiuping@gmail.com>
# Author: Brendan Higgins <brendanhiggins@google.com>
+# Author: Rae Moar <rmoar@google.com>
+from __future__ import annotations
+from dataclasses import dataclass
import re
+import textwrap
-from collections import namedtuple
-from datetime import datetime
from enum import Enum, auto
-from functools import reduce
from typing import Iterable, Iterator, List, Optional, Tuple
-TestResult = namedtuple('TestResult', ['status','suites','log'])
-
-class TestSuite(object):
+from kunit_printer import Printer, stdout
+
+class Test:
+ """
+ A class to represent a test parsed from KTAP results. All KTAP
+ results within a test log are stored in a main Test object as
+ subtests.
+
+ Attributes:
+ status : TestStatus - status of the test
+ name : str - name of the test
+ expected_count : int - expected number of subtests (0 if single
+ test case and None if unknown expected number of subtests)
+ subtests : List[Test] - list of subtests
+ log : List[str] - log of KTAP lines that correspond to the test
+ counts : TestCounts - counts of the test statuses and errors of
+ subtests or of the test itself if the test is a single
+ test case.
+ """
def __init__(self) -> None:
- self.status = TestStatus.SUCCESS
+ """Creates Test object with default attributes."""
+ self.status = TestStatus.TEST_CRASHED
self.name = ''
- self.cases = [] # type: List[TestCase]
+ self.expected_count = 0 # type: Optional[int]
+ self.subtests = [] # type: List[Test]
+ self.log = [] # type: List[str]
+ self.counts = TestCounts()
def __str__(self) -> str:
- return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')'
+ """Returns string representation of a Test class object."""
+ return (f'Test({self.status}, {self.name}, {self.expected_count}, '
+ f'{self.subtests}, {self.log}, {self.counts})')
def __repr__(self) -> str:
+ """Returns string representation of a Test class object."""
return str(self)
-class TestCase(object):
- def __init__(self) -> None:
- self.status = TestStatus.SUCCESS
- self.name = ''
- self.log = [] # type: List[str]
-
- def __str__(self) -> str:
- return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')'
+ def add_error(self, printer: Printer, error_message: str) -> None:
+ """Records an error that occurred while parsing this test."""
+ self.counts.errors += 1
+ printer.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}')
- def __repr__(self) -> str:
- return str(self)
+ def ok_status(self) -> bool:
+ """Returns true if the status was ok, i.e. passed or skipped."""
+ return self.status in (TestStatus.SUCCESS, TestStatus.SKIPPED)
class TestStatus(Enum):
+ """An enumeration class to represent the status of a test."""
SUCCESS = auto()
FAILURE = auto()
+ SKIPPED = auto()
TEST_CRASHED = auto()
NO_TESTS = auto()
FAILURE_TO_PARSE_TESTS = auto()
-kunit_start_re = re.compile(r'TAP version [0-9]+$')
-kunit_end_re = re.compile('(List of all partitions:|'
- 'Kernel panic - not syncing: VFS:)')
-
-def isolate_kunit_output(kernel_output) -> Iterator[str]:
- started = False
- for line in kernel_output:
- line = line.rstrip() # line always has a trailing \n
- if kunit_start_re.search(line):
- prefix_len = len(line.split('TAP version')[0])
- started = True
- yield line[prefix_len:] if prefix_len > 0 else line
- elif kunit_end_re.search(line):
- break
- elif started:
- yield line[prefix_len:] if prefix_len > 0 else line
-
-def raw_output(kernel_output) -> None:
- for line in kernel_output:
- print(line.rstrip())
-
-DIVIDER = '=' * 60
-
-RESET = '\033[0;0m'
-
-def red(text) -> str:
- return '\033[1;31m' + text + RESET
-
-def yellow(text) -> str:
- return '\033[1;33m' + text + RESET
+@dataclass
+class TestCounts:
+ """
+ Tracks the counts of statuses of all test cases and any errors within
+ a Test.
+ """
+ passed: int = 0
+ failed: int = 0
+ crashed: int = 0
+ skipped: int = 0
+ errors: int = 0
-def green(text) -> str:
- return '\033[1;32m' + text + RESET
-
-def print_with_timestamp(message) -> None:
- print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message))
-
-def format_suite_divider(message) -> str:
- return '======== ' + message + ' ========'
-
-def print_suite_divider(message) -> None:
- print_with_timestamp(DIVIDER)
- print_with_timestamp(format_suite_divider(message))
-
-def print_log(log) -> None:
- for m in log:
- print_with_timestamp(m)
-
-TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$')
-
-def consume_non_diagnostic(lines: List[str]) -> None:
- while lines and not TAP_ENTRIES.match(lines[0]):
- lines.pop(0)
+ def __str__(self) -> str:
+ """Returns the string representation of a TestCounts object."""
+ statuses = [('passed', self.passed), ('failed', self.failed),
+ ('crashed', self.crashed), ('skipped', self.skipped),
+ ('errors', self.errors)]
+ return f'Ran {self.total()} tests: ' + \
+ ', '.join(f'{s}: {n}' for s, n in statuses if n > 0)
+
+ def total(self) -> int:
+ """Returns the total number of test cases within a test
+ object, where a test case is a test with no subtests.
+ """
+ return (self.passed + self.failed + self.crashed +
+ self.skipped)
+
+ def add_subtest_counts(self, counts: TestCounts) -> None:
+ """
+ Adds the counts of another TestCounts object to the current
+ TestCounts object. Used to add the counts of a subtest to the
+ parent test.
+
+ Parameters:
+ counts - a different TestCounts object whose counts
+ will be added to the counts of the TestCounts object
+ """
+ self.passed += counts.passed
+ self.failed += counts.failed
+ self.crashed += counts.crashed
+ self.skipped += counts.skipped
+ self.errors += counts.errors
+
+ def get_status(self) -> TestStatus:
+ """Returns the aggregated status of a Test using test
+ counts.
+ """
+ if self.total() == 0:
+ return TestStatus.NO_TESTS
+ if self.crashed:
+ # Crashes should take priority.
+ return TestStatus.TEST_CRASHED
+ if self.failed:
+ return TestStatus.FAILURE
+ if self.passed:
+ # No failures or crashes, looks good!
+ return TestStatus.SUCCESS
+ # We have only skipped tests.
+ return TestStatus.SKIPPED
+
+ def add_status(self, status: TestStatus) -> None:
+ """Increments the count for `status`."""
+ if status == TestStatus.SUCCESS:
+ self.passed += 1
+ elif status == TestStatus.FAILURE:
+ self.failed += 1
+ elif status == TestStatus.SKIPPED:
+ self.skipped += 1
+ elif status != TestStatus.NO_TESTS:
+ self.crashed += 1
+
+class LineStream:
+ """
+ A class to represent the lines of kernel output.
+ Provides a lazy peek()/pop() interface over an iterator of
+ (line#, text).
+ """
+ _lines: Iterator[Tuple[int, str]]
+ _next: Tuple[int, str]
+ _need_next: bool
+ _done: bool
+
+ def __init__(self, lines: Iterator[Tuple[int, str]]):
+ """Creates a new LineStream that wraps the given iterator."""
+ self._lines = lines
+ self._done = False
+ self._need_next = True
+ self._next = (0, '')
+
+ def _get_next(self) -> None:
+ """Advances the LineSteam to the next line, if necessary."""
+ if not self._need_next:
+ return
+ try:
+ self._next = next(self._lines)
+ except StopIteration:
+ self._done = True
+ finally:
+ self._need_next = False
+
+ def peek(self) -> str:
+ """Returns the current line, without advancing the LineStream.
+ """
+ self._get_next()
+ return self._next[1]
+
+ def pop(self) -> str:
+ """Returns the current line and advances the LineStream to
+ the next line.
+ """
+ s = self.peek()
+ if self._done:
+ raise ValueError(f'LineStream: going past EOF, last line was {s}')
+ self._need_next = True
+ return s
+
+ def __bool__(self) -> bool:
+ """Returns True if stream has more lines."""
+ self._get_next()
+ return not self._done
+
+ # Only used by kunit_tool_test.py.
+ def __iter__(self) -> Iterator[str]:
+ """Empties all lines stored in LineStream object into
+ Iterator object and returns the Iterator object.
+ """
+ while bool(self):
+ yield self.pop()
+
+ def line_number(self) -> int:
+ """Returns the line number of the current line."""
+ self._get_next()
+ return self._next[0]
+
+# Parsing helper methods:
+
+KTAP_START = re.compile(r'\s*KTAP version ([0-9]+)$')
+TAP_START = re.compile(r'\s*TAP version ([0-9]+)$')
+KTAP_END = re.compile(r'\s*(List of all partitions:|'
+ 'Kernel panic - not syncing: VFS:|reboot: System halted)')
+EXECUTOR_ERROR = re.compile(r'\s*kunit executor: (.*)$')
+
+def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream:
+ """Extracts KTAP lines from the kernel output."""
+ def isolate_ktap_output(kernel_output: Iterable[str]) \
+ -> Iterator[Tuple[int, str]]:
+ line_num = 0
+ started = False
+ for line in kernel_output:
+ line_num += 1
+ line = line.rstrip() # remove trailing \n
+ if not started and KTAP_START.search(line):
+ # start extracting KTAP lines and set prefix
+ # to number of characters before version line
+ prefix_len = len(
+ line.split('KTAP version')[0])
+ started = True
+ yield line_num, line[prefix_len:]
+ elif not started and TAP_START.search(line):
+ # start extracting KTAP lines and set prefix
+ # to number of characters before version line
+ prefix_len = len(line.split('TAP version')[0])
+ started = True
+ yield line_num, line[prefix_len:]
+ elif started and KTAP_END.search(line):
+ # stop extracting KTAP lines
+ break
+ elif started:
+ # remove the prefix, if any.
+ line = line[prefix_len:]
+ yield line_num, line
+ elif EXECUTOR_ERROR.search(line):
+ yield line_num, line
+ return LineStream(lines=isolate_ktap_output(kernel_output))
+
+KTAP_VERSIONS = [1]
+TAP_VERSIONS = [13, 14]
+
+def check_version(version_num: int, accepted_versions: List[int],
+ version_type: str, test: Test, printer: Printer) -> None:
+ """
+ Adds error to test object if version number is too high or too
+ low.
+
+ Parameters:
+ version_num - The inputted version number from the parsed KTAP or TAP
+ header line
+ accepted_version - List of accepted KTAP or TAP versions
+ version_type - 'KTAP' or 'TAP' depending on the type of
+ version line.
+ test - Test object for current test being parsed
+ printer - Printer object to output error
+ """
+ if version_num < min(accepted_versions):
+ test.add_error(printer, f'{version_type} version lower than expected!')
+ elif version_num > max(accepted_versions):
+ test.add_error(printer, f'{version_type} version higer than expected!')
+
+def parse_ktap_header(lines: LineStream, test: Test, printer: Printer) -> bool:
+ """
+ Parses KTAP/TAP header line and checks version number.
+ Returns False if fails to parse KTAP/TAP header line.
+
+ Accepted formats:
+ - 'KTAP version [version number]'
+ - 'TAP version [version number]'
+
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ test - Test object for current test being parsed
+ printer - Printer object to output results
+
+ Return:
+ True if successfully parsed KTAP/TAP header line
+ """
+ ktap_match = KTAP_START.match(lines.peek())
+ tap_match = TAP_START.match(lines.peek())
+ if ktap_match:
+ version_num = int(ktap_match.group(1))
+ check_version(version_num, KTAP_VERSIONS, 'KTAP', test, printer)
+ elif tap_match:
+ version_num = int(tap_match.group(1))
+ check_version(version_num, TAP_VERSIONS, 'TAP', test, printer)
+ else:
+ return False
+ lines.pop()
+ return True
-def save_non_diagnostic(lines: List[str], test_case: TestCase) -> None:
- while lines and not TAP_ENTRIES.match(lines[0]):
- test_case.log.append(lines[0])
- lines.pop(0)
+TEST_HEADER = re.compile(r'^\s*# Subtest: (.*)$')
-OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
+def parse_test_header(lines: LineStream, test: Test) -> bool:
+ """
+ Parses test header and stores test name in test object.
+ Returns False if fails to parse test header line.
-OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
+ Accepted format:
+ - '# Subtest: [test name]'
-OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$')
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ test - Test object for current test being parsed
-def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
- save_non_diagnostic(lines, test_case)
- if not lines:
- test_case.status = TestStatus.TEST_CRASHED
- return True
- line = lines[0]
- match = OK_NOT_OK_SUBTEST.match(line)
- while not match and lines:
- line = lines.pop(0)
- match = OK_NOT_OK_SUBTEST.match(line)
- if match:
- test_case.log.append(lines.pop(0))
- test_case.name = match.group(2)
- if test_case.status == TestStatus.TEST_CRASHED:
- return True
- if match.group(1) == 'ok':
- test_case.status = TestStatus.SUCCESS
- else:
- test_case.status = TestStatus.FAILURE
- return True
- else:
+ Return:
+ True if successfully parsed test header line
+ """
+ match = TEST_HEADER.match(lines.peek())
+ if not match:
return False
-
-SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$')
-DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$')
-
-def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
- save_non_diagnostic(lines, test_case)
- if not lines:
+ test.name = match.group(1)
+ lines.pop()
+ return True
+
+TEST_PLAN = re.compile(r'^\s*1\.\.([0-9]+)')
+
+def parse_test_plan(lines: LineStream, test: Test) -> bool:
+ """
+ Parses test plan line and stores the expected number of subtests in
+ test object. Reports an error if expected count is 0.
+ Returns False and sets expected_count to None if there is no valid test
+ plan.
+
+ Accepted format:
+ - '1..[number of subtests]'
+
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ test - Test object for current test being parsed
+
+ Return:
+ True if successfully parsed test plan line
+ """
+ match = TEST_PLAN.match(lines.peek())
+ if not match:
+ test.expected_count = None
return False
- line = lines[0]
- match = SUBTEST_DIAGNOSTIC.match(line)
- if match:
- test_case.log.append(lines.pop(0))
- crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line)
- if crash_match:
- test_case.status = TestStatus.TEST_CRASHED
- return True
- else:
+ expected_count = int(match.group(1))
+ test.expected_count = expected_count
+ lines.pop()
+ return True
+
+TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$')
+
+TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$')
+
+def peek_test_name_match(lines: LineStream, test: Test) -> bool:
+ """
+ Matches current line with the format of a test result line and checks
+ if the name matches the name of the current test.
+ Returns False if fails to match format or name.
+
+ Accepted format:
+ - '[ok|not ok] [test number] [-] [test name] [optional skip
+ directive]'
+
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ test - Test object for current test being parsed
+
+ Return:
+ True if matched a test result line and the name matching the
+ expected test name
+ """
+ line = lines.peek()
+ match = TEST_RESULT.match(line)
+ if not match:
+ return False
+ name = match.group(4)
+ if not name:
return False
+ return name == test.name
+
+def parse_test_result(lines: LineStream, test: Test,
+ expected_num: int, printer: Printer) -> bool:
+ """
+ Parses test result line and stores the status and name in the test
+ object. Reports an error if the test number does not match expected
+ test number.
+ Returns False if fails to parse test result line.
+
+ Note that the SKIP directive is the only direction that causes a
+ change in status.
+
+ Accepted format:
+ - '[ok|not ok] [test number] [-] [test name] [optional skip
+ directive]'
+
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ test - Test object for current test being parsed
+ expected_num - expected test number for current test
+ printer - Printer object to output results
+
+ Return:
+ True if successfully parsed a test result line.
+ """
+ line = lines.peek()
+ match = TEST_RESULT.match(line)
+ skip_match = TEST_RESULT_SKIP.match(line)
+
+ # Check if line matches test result line format
+ if not match:
+ return False
+ lines.pop()
-def parse_test_case(lines: List[str]) -> Optional[TestCase]:
- test_case = TestCase()
- save_non_diagnostic(lines, test_case)
- while parse_diagnostic(lines, test_case):
- pass
- if parse_ok_not_ok_test_case(lines, test_case):
- return test_case
+ # Set name of test object
+ if skip_match:
+ test.name = skip_match.group(4) or skip_match.group(5)
else:
- return None
-
-SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$')
-
-def parse_subtest_header(lines: List[str]) -> Optional[str]:
- consume_non_diagnostic(lines)
- if not lines:
- return None
- match = SUBTEST_HEADER.match(lines[0])
- if match:
- lines.pop(0)
- return match.group(1)
+ test.name = match.group(4)
+
+ # Check test num
+ num = int(match.group(2))
+ if num != expected_num:
+ test.add_error(printer, f'Expected test number {expected_num} but found {num}')
+
+ # Set status of test object
+ status = match.group(1)
+ if skip_match:
+ test.status = TestStatus.SKIPPED
+ elif status == 'ok':
+ test.status = TestStatus.SUCCESS
else:
- return None
+ test.status = TestStatus.FAILURE
+ return True
-SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)')
+def parse_diagnostic(lines: LineStream) -> List[str]:
+ """
+ Parse lines that do not match the format of a test result line or
+ test header line and returns them in list.
-def parse_subtest_plan(lines: List[str]) -> Optional[int]:
- consume_non_diagnostic(lines)
- match = SUBTEST_PLAN.match(lines[0])
- if match:
- lines.pop(0)
- return int(match.group(1))
- else:
- return None
-
-def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
- if left == TestStatus.TEST_CRASHED or right == TestStatus.TEST_CRASHED:
- return TestStatus.TEST_CRASHED
- elif left == TestStatus.FAILURE or right == TestStatus.FAILURE:
- return TestStatus.FAILURE
- elif left != TestStatus.SUCCESS:
- return left
- elif right != TestStatus.SUCCESS:
- return right
- else:
- return TestStatus.SUCCESS
+ Line formats that are not parsed:
+ - '# Subtest: [test name]'
+ - '[ok|not ok] [test number] [-] [test name] [optional skip
+ directive]'
+ - 'KTAP version [version number]'
-def parse_ok_not_ok_test_suite(lines: List[str],
- test_suite: TestSuite,
- expected_suite_index: int) -> bool:
- consume_non_diagnostic(lines)
- if not lines:
- test_suite.status = TestStatus.TEST_CRASHED
- return False
- line = lines[0]
- match = OK_NOT_OK_MODULE.match(line)
- if match:
- lines.pop(0)
- if match.group(1) == 'ok':
- test_suite.status = TestStatus.SUCCESS
- else:
- test_suite.status = TestStatus.FAILURE
- suite_index = int(match.group(2))
- if suite_index != expected_suite_index:
- print_with_timestamp(
- red('[ERROR] ') + 'expected_suite_index ' +
- str(expected_suite_index) + ', but got ' +
- str(suite_index))
- return True
- else:
- return False
+ Parameters:
+ lines - LineStream of KTAP output to parse
-def bubble_up_errors(statuses: Iterable[TestStatus]) -> TestStatus:
- return reduce(max_status, statuses, TestStatus.SUCCESS)
+ Return:
+ Log of diagnostic lines
+ """
+ log = [] # type: List[str]
+ non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START, TEST_PLAN]
+ while lines and not any(re.match(lines.peek())
+ for re in non_diagnostic_lines):
+ log.append(lines.pop())
+ return log
-def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
- max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases)
- return max_status(max_test_case_status, test_suite.status)
-def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]:
- if not lines:
- return None
- consume_non_diagnostic(lines)
- test_suite = TestSuite()
- test_suite.status = TestStatus.SUCCESS
- name = parse_subtest_header(lines)
- if not name:
- return None
- test_suite.name = name
- expected_test_case_num = parse_subtest_plan(lines)
- if expected_test_case_num is None:
- return None
- while expected_test_case_num > 0:
- test_case = parse_test_case(lines)
- if not test_case:
- break
- test_suite.cases.append(test_case)
- expected_test_case_num -= 1
- if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index):
- test_suite.status = bubble_up_test_case_errors(test_suite)
- return test_suite
- elif not lines:
- print_with_timestamp(red('[ERROR] ') + 'ran out of lines before end token')
- return test_suite
- else:
- print('failed to parse end of suite' + lines[0])
- return None
+# Printing helper methods:
-TAP_HEADER = re.compile(r'^TAP version 14$')
+DIVIDER = '=' * 60
-def parse_tap_header(lines: List[str]) -> bool:
- consume_non_diagnostic(lines)
- if TAP_HEADER.match(lines[0]):
- lines.pop(0)
- return True
+def format_test_divider(message: str, len_message: int) -> str:
+ """
+ Returns string with message centered in fixed width divider.
+
+ Example:
+ '===================== message example ====================='
+
+ Parameters:
+ message - message to be centered in divider line
+ len_message - length of the message to be printed such that
+ any characters of the color codes are not counted
+
+ Return:
+ String containing message centered in fixed width divider
+ """
+ default_count = 3 # default number of dashes
+ len_1 = default_count
+ len_2 = default_count
+ difference = len(DIVIDER) - len_message - 2 # 2 spaces added
+ if difference > 0:
+ # calculate number of dashes for each side of the divider
+ len_1 = int(difference / 2)
+ len_2 = difference - len_1
+ return ('=' * len_1) + f' {message} ' + ('=' * len_2)
+
+def print_test_header(test: Test, printer: Printer) -> None:
+ """
+ Prints test header with test name and optionally the expected number
+ of subtests.
+
+ Example:
+ '=================== example (2 subtests) ==================='
+
+ Parameters:
+ test - Test object representing current test being printed
+ printer - Printer object to output results
+ """
+ message = test.name
+ if message != "":
+ # Add a leading space before the subtest counts only if a test name
+ # is provided using a "# Subtest" header line.
+ message += " "
+ if test.expected_count:
+ if test.expected_count == 1:
+ message += '(1 subtest)'
+ else:
+ message += f'({test.expected_count} subtests)'
+ printer.print_with_timestamp(format_test_divider(message, len(message)))
+
+def print_log(log: Iterable[str], printer: Printer) -> None:
+ """Prints all strings in saved log for test in yellow."""
+ formatted = textwrap.dedent('\n'.join(log))
+ for line in formatted.splitlines():
+ printer.print_with_timestamp(printer.yellow(line))
+
+def format_test_result(test: Test, printer: Printer) -> str:
+ """
+ Returns string with formatted test result with colored status and test
+ name.
+
+ Example:
+ '[PASSED] example'
+
+ Parameters:
+ test - Test object representing current test being printed
+ printer - Printer object to output results
+
+ Return:
+ String containing formatted test result
+ """
+ if test.status == TestStatus.SUCCESS:
+ return printer.green('[PASSED] ') + test.name
+ if test.status == TestStatus.SKIPPED:
+ return printer.yellow('[SKIPPED] ') + test.name
+ if test.status == TestStatus.NO_TESTS:
+ return printer.yellow('[NO TESTS RUN] ') + test.name
+ if test.status == TestStatus.TEST_CRASHED:
+ print_log(test.log, printer)
+ return stdout.red('[CRASHED] ') + test.name
+ print_log(test.log, printer)
+ return printer.red('[FAILED] ') + test.name
+
+def print_test_result(test: Test, printer: Printer) -> None:
+ """
+ Prints result line with status of test.
+
+ Example:
+ '[PASSED] example'
+
+ Parameters:
+ test - Test object representing current test being printed
+ printer - Printer object
+ """
+ printer.print_with_timestamp(format_test_result(test, printer))
+
+def print_test_footer(test: Test, printer: Printer) -> None:
+ """
+ Prints test footer with status of test.
+
+ Example:
+ '===================== [PASSED] example ====================='
+
+ Parameters:
+ test - Test object representing current test being printed
+ printer - Printer object to output results
+ """
+ message = format_test_result(test, printer)
+ printer.print_with_timestamp(format_test_divider(message,
+ len(message) - printer.color_len()))
+
+def print_test(test: Test, failed_only: bool, printer: Printer) -> None:
+ """
+ Prints Test object to given printer. For a child test, the result line is
+ printed. For a parent test, the test header, all child test results, and
+ the test footer are all printed. If failed_only is true, only failed/crashed
+ tests will be printed.
+
+ Parameters:
+ test - Test object to print
+ failed_only - True if only failed/crashed tests should be printed.
+ printer - Printer object to output results
+ """
+ if test.name == "main":
+ printer.print_with_timestamp(DIVIDER)
+ for subtest in test.subtests:
+ print_test(subtest, failed_only, printer)
+ printer.print_with_timestamp(DIVIDER)
+ elif test.subtests != []:
+ if not failed_only or not test.ok_status():
+ print_test_header(test, printer)
+ for subtest in test.subtests:
+ print_test(subtest, failed_only, printer)
+ print_test_footer(test, printer)
else:
- return False
+ if not failed_only or not test.ok_status():
+ print_test_result(test, printer)
-TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
+def _summarize_failed_tests(test: Test) -> str:
+ """Tries to summarize all the failing subtests in `test`."""
-def parse_test_plan(lines: List[str]) -> Optional[int]:
- consume_non_diagnostic(lines)
- match = TEST_PLAN.match(lines[0])
- if match:
- lines.pop(0)
- return int(match.group(1))
- else:
- return None
-
-def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus:
- return bubble_up_errors(x.status for x in test_suites)
-
-def parse_test_result(lines: List[str]) -> TestResult:
- consume_non_diagnostic(lines)
- if not lines or not parse_tap_header(lines):
- return TestResult(TestStatus.NO_TESTS, [], lines)
- expected_test_suite_num = parse_test_plan(lines)
- if not expected_test_suite_num:
- return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines)
- test_suites = []
- for i in range(1, expected_test_suite_num + 1):
- test_suite = parse_test_suite(lines, i)
- if test_suite:
- test_suites.append(test_suite)
+ def failed_names(test: Test, parent_name: str) -> List[str]:
+ # Note: we use 'main' internally for the top-level test.
+ if not parent_name or parent_name == 'main':
+ full_name = test.name
else:
- print_with_timestamp(
- red('[ERROR] ') + ' expected ' +
- str(expected_test_suite_num) +
- ' test suites, but got ' + str(i - 2))
- break
- test_suite = parse_test_suite(lines, -1)
- if test_suite:
- print_with_timestamp(red('[ERROR] ') +
- 'got unexpected test suite: ' + test_suite.name)
- if test_suites:
- return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+ full_name = parent_name + '.' + test.name
+
+ if not test.subtests: # this is a leaf node
+ return [full_name]
+
+ # If all the children failed, just say this subtest failed.
+ # Don't summarize it down "the top-level test failed", though.
+ failed_subtests = [sub for sub in test.subtests if not sub.ok_status()]
+ if parent_name and len(failed_subtests) == len(test.subtests):
+ return [full_name]
+
+ all_failures = [] # type: List[str]
+ for t in failed_subtests:
+ all_failures.extend(failed_names(t, full_name))
+ return all_failures
+
+ failures = failed_names(test, '')
+ # If there are too many failures, printing them out will just be noisy.
+ if len(failures) > 10: # this is an arbitrary limit
+ return ''
+
+ return 'Failures: ' + ', '.join(failures)
+
+
+def print_summary_line(test: Test, printer: Printer) -> None:
+ """
+ Prints summary line of test object. Color of line is dependent on
+ status of test. Color is green if test passes, yellow if test is
+ skipped, and red if the test fails or crashes. Summary line contains
+ counts of the statuses of the tests subtests or the test itself if it
+ has no subtests.
+
+ Example:
+ "Testing complete. Passed: 2, Failed: 0, Crashed: 0, Skipped: 0,
+ Errors: 0"
+
+ test - Test object representing current test being printed
+ printer - Printer object to output results
+ """
+ if test.status == TestStatus.SUCCESS:
+ color = stdout.green
+ elif test.status in (TestStatus.SKIPPED, TestStatus.NO_TESTS):
+ color = stdout.yellow
else:
- return TestResult(TestStatus.NO_TESTS, [], lines)
-
-def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]:
- total_tests = 0
- failed_tests = 0
- crashed_tests = 0
- for test_suite in test_result.suites:
- if test_suite.status == TestStatus.SUCCESS:
- print_suite_divider(green('[PASSED] ') + test_suite.name)
- elif test_suite.status == TestStatus.TEST_CRASHED:
- print_suite_divider(red('[CRASHED] ' + test_suite.name))
- else:
- print_suite_divider(red('[FAILED] ') + test_suite.name)
- for test_case in test_suite.cases:
- total_tests += 1
- if test_case.status == TestStatus.SUCCESS:
- print_with_timestamp(green('[PASSED] ') + test_case.name)
- elif test_case.status == TestStatus.TEST_CRASHED:
- crashed_tests += 1
- print_with_timestamp(red('[CRASHED] ' + test_case.name))
- print_log(map(yellow, test_case.log))
- print_with_timestamp('')
+ color = stdout.red
+ printer.print_with_timestamp(color(f'Testing complete. {test.counts}'))
+
+ # Summarize failures that might have gone off-screen since we had a lot
+ # of tests (arbitrarily defined as >=100 for now).
+ if test.ok_status() or test.counts.total() < 100:
+ return
+ summarized = _summarize_failed_tests(test)
+ if not summarized:
+ return
+ printer.print_with_timestamp(color(summarized))
+
+# Other methods:
+
+def bubble_up_test_results(test: Test) -> None:
+ """
+ If the test has subtests, add the test counts of the subtests to the
+ test and check if any of the tests crashed and if so set the test
+ status to crashed. Otherwise if the test has no subtests add the
+ status of the test to the test counts.
+
+ Parameters:
+ test - Test object for current test being parsed
+ """
+ subtests = test.subtests
+ counts = test.counts
+ status = test.status
+ for t in subtests:
+ counts.add_subtest_counts(t.counts)
+ if counts.total() == 0:
+ counts.add_status(status)
+ elif test.counts.get_status() == TestStatus.TEST_CRASHED:
+ test.status = TestStatus.TEST_CRASHED
+
+def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool, printer: Printer) -> Test:
+ """
+ Finds next test to parse in LineStream, creates new Test object,
+ parses any subtests of the test, populates Test object with all
+ information (status, name) about the test and the Test objects for
+ any subtests, and then returns the Test object. The method accepts
+ three formats of tests:
+
+ Accepted test formats:
+
+ - Main KTAP/TAP header
+
+ Example:
+
+ KTAP version 1
+ 1..4
+ [subtests]
+
+ - Subtest header (must include either the KTAP version line or
+ "# Subtest" header line)
+
+ Example (preferred format with both KTAP version line and
+ "# Subtest" line):
+
+ KTAP version 1
+ # Subtest: name
+ 1..3
+ [subtests]
+ ok 1 name
+
+ Example (only "# Subtest" line):
+
+ # Subtest: name
+ 1..3
+ [subtests]
+ ok 1 name
+
+ Example (only KTAP version line, compliant with KTAP v1 spec):
+
+ KTAP version 1
+ 1..3
+ [subtests]
+ ok 1 name
+
+ - Test result line
+
+ Example:
+
+ ok 1 - test
+
+ Parameters:
+ lines - LineStream of KTAP output to parse
+ expected_num - expected test number for test to be parsed
+ log - list of strings containing any preceding diagnostic lines
+ corresponding to the current test
+ is_subtest - boolean indicating whether test is a subtest
+ printer - Printer object to output results
+
+ Return:
+ Test object populated with characteristics and any subtests
+ """
+ test = Test()
+ test.log.extend(log)
+
+ # Parse any errors prior to parsing tests
+ err_log = parse_diagnostic(lines)
+ test.log.extend(err_log)
+
+ if not is_subtest:
+ # If parsing the main/top-level test, parse KTAP version line and
+ # test plan
+ test.name = "main"
+ parse_ktap_header(lines, test, printer)
+ test.log.extend(parse_diagnostic(lines))
+ parse_test_plan(lines, test)
+ parent_test = True
+ else:
+ # If not the main test, attempt to parse a test header containing
+ # the KTAP version line and/or subtest header line
+ ktap_line = parse_ktap_header(lines, test, printer)
+ subtest_line = parse_test_header(lines, test)
+ test.log.extend(parse_diagnostic(lines))
+ parse_test_plan(lines, test)
+ parent_test = (ktap_line or subtest_line)
+ if parent_test:
+ print_test_header(test, printer)
+
+ expected_count = test.expected_count
+ subtests = []
+ test_num = 1
+ while parent_test and (expected_count is None or test_num <= expected_count):
+ # Loop to parse any subtests.
+ # Break after parsing expected number of tests or
+ # if expected number of tests is unknown break when test
+ # result line with matching name to subtest header is found
+ # or no more lines in stream.
+ sub_log = parse_diagnostic(lines)
+ sub_test = Test()
+ if not lines or (peek_test_name_match(lines, test) and
+ is_subtest):
+ if expected_count and test_num <= expected_count:
+ # If parser reaches end of test before
+ # parsing expected number of subtests, print
+ # crashed subtest and record error
+ test.add_error(printer, 'missing expected subtest!')
+ sub_test.log.extend(sub_log)
+ test.counts.add_status(
+ TestStatus.TEST_CRASHED)
+ print_test_result(sub_test, printer)
else:
- failed_tests += 1
- print_with_timestamp(red('[FAILED] ') + test_case.name)
- print_log(map(yellow, test_case.log))
- print_with_timestamp('')
- return total_tests, failed_tests, crashed_tests
-
-def parse_run_tests(kernel_output) -> TestResult:
- total_tests = 0
- failed_tests = 0
- crashed_tests = 0
- test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
- if test_result.status == TestStatus.NO_TESTS:
- print(red('[ERROR] ') + yellow('no tests run!'))
- elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS:
- print(red('[ERROR] ') + yellow('could not parse test results!'))
+ test.log.extend(sub_log)
+ break
+ else:
+ sub_test = parse_test(lines, test_num, sub_log, True, printer)
+ subtests.append(sub_test)
+ test_num += 1
+ test.subtests = subtests
+ if is_subtest:
+ # If not main test, look for test result line
+ test.log.extend(parse_diagnostic(lines))
+ if test.name != "" and not peek_test_name_match(lines, test):
+ test.add_error(printer, 'missing subtest result line!')
+ elif not lines:
+ print_log(test.log, printer)
+ test.status = TestStatus.NO_TESTS
+ test.add_error(printer, 'No more test results!')
+ else:
+ parse_test_result(lines, test, expected_num, printer)
+
+ # Check for there being no subtests within parent test
+ if parent_test and len(subtests) == 0:
+ # Don't override a bad status if this test had one reported.
+ # Assumption: no subtests means CRASHED is from Test.__init__()
+ if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS):
+ print_log(test.log, printer)
+ test.status = TestStatus.NO_TESTS
+ test.add_error(printer, '0 tests run!')
+
+ # Add statuses to TestCounts attribute in Test object
+ bubble_up_test_results(test)
+ if parent_test and is_subtest:
+ # If test has subtests and is not the main test object, print
+ # footer.
+ print_test_footer(test, printer)
+ elif is_subtest:
+ print_test_result(test, printer)
+ return test
+
+def parse_run_tests(kernel_output: Iterable[str], printer: Printer) -> Test:
+ """
+ Using kernel output, extract KTAP lines, parse the lines for test
+ results and print condensed test results and summary line.
+
+ Parameters:
+ kernel_output - Iterable object contains lines of kernel output
+ printer - Printer object to output results
+
+ Return:
+ Test - the main test object with all subtests.
+ """
+ printer.print_with_timestamp(DIVIDER)
+ lines = extract_tap_lines(kernel_output)
+ test = Test()
+ if not lines:
+ test.name = '<missing>'
+ test.add_error(printer, 'Could not find any KTAP output. Did any KUnit tests run?')
+ test.status = TestStatus.FAILURE_TO_PARSE_TESTS
else:
- (total_tests,
- failed_tests,
- crashed_tests) = print_and_count_results(test_result)
- print_with_timestamp(DIVIDER)
- fmt = green if test_result.status == TestStatus.SUCCESS else red
- print_with_timestamp(
- fmt('Testing complete. %d tests run. %d failed. %d crashed.' %
- (total_tests, failed_tests, crashed_tests)))
- return test_result
+ test = parse_test(lines, 0, [], False, printer)
+ if test.status != TestStatus.NO_TESTS:
+ test.status = test.counts.get_status()
+ printer.print_with_timestamp(DIVIDER)
+ return test
diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py
new file mode 100644
index 000000000000..ca119f61fe79
--- /dev/null
+++ b/tools/testing/kunit/kunit_printer.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Utilities for printing and coloring output.
+#
+# Copyright (C) 2022, Google LLC.
+# Author: Daniel Latypov <dlatypov@google.com>
+
+import datetime
+import sys
+import typing
+
+_RESET = '\033[0;0m'
+
+class Printer:
+ """Wraps a file object, providing utilities for coloring output, etc."""
+
+ def __init__(self, print: bool=True, output: typing.IO[str]=sys.stdout):
+ self._output = output
+ self._print = print
+ if print:
+ self._use_color = output.isatty()
+ else:
+ self._use_color = False
+
+ def print(self, message: str) -> None:
+ if self._print:
+ print(message, file=self._output)
+
+ def print_with_timestamp(self, message: str) -> None:
+ ts = datetime.datetime.now().strftime('%H:%M:%S')
+ self.print(f'[{ts}] {message}')
+
+ def _color(self, code: str, text: str) -> str:
+ if not self._use_color:
+ return text
+ return code + text + _RESET
+
+ def red(self, text: str) -> str:
+ return self._color('\033[1;31m', text)
+
+ def yellow(self, text: str) -> str:
+ return self._color('\033[1;33m', text)
+
+ def green(self, text: str) -> str:
+ return self._color('\033[1;32m', text)
+
+ def color_len(self) -> int:
+ """Returns the length of the color escape codes."""
+ return len(self.red(''))
+
+# Provides a default instance that prints to stdout
+stdout = Printer()
+null_printer = Printer(print=False)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index b593f4448e83..bbba921e0eac 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -11,26 +11,33 @@ from unittest import mock
import tempfile, shutil # Handling test_tmpdir
+import itertools
import json
import os
+import signal
+import subprocess
+from typing import Iterable
import kunit_config
import kunit_parser
import kunit_kernel
import kunit_json
import kunit
+from kunit_printer import stdout
test_tmpdir = ''
+abs_test_data_dir = ''
def setUpModule():
- global test_tmpdir
+ global test_tmpdir, abs_test_data_dir
test_tmpdir = tempfile.mkdtemp()
+ abs_test_data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test_data'))
def tearDownModule():
shutil.rmtree(test_tmpdir)
-def get_absolute_path(path):
- return os.path.join(os.path.dirname(__file__), path)
+def test_data_path(path):
+ return os.path.join(abs_test_data_dir, path)
class KconfigTest(unittest.TestCase):
@@ -39,365 +46,814 @@ class KconfigTest(unittest.TestCase):
self.assertTrue(kconfig0.is_subset_of(kconfig0))
kconfig1 = kunit_config.Kconfig()
- kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y'))
+ kconfig1.add_entry('TEST', 'y')
self.assertTrue(kconfig1.is_subset_of(kconfig1))
self.assertTrue(kconfig0.is_subset_of(kconfig1))
self.assertFalse(kconfig1.is_subset_of(kconfig0))
def test_read_from_file(self):
- kconfig = kunit_config.Kconfig()
- kconfig_path = get_absolute_path(
- 'test_data/test_read_from_file.kconfig')
+ kconfig_path = test_data_path('test_read_from_file.kconfig')
- kconfig.read_from_file(kconfig_path)
+ kconfig = kunit_config.parse_file(kconfig_path)
expected_kconfig = kunit_config.Kconfig()
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('UML', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MMU', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MK8', 'n'))
-
- self.assertEqual(kconfig.entries(), expected_kconfig.entries())
+ expected_kconfig.add_entry('UML', 'y')
+ expected_kconfig.add_entry('MMU', 'y')
+ expected_kconfig.add_entry('TEST', 'y')
+ expected_kconfig.add_entry('EXAMPLE_TEST', 'y')
+ expected_kconfig.add_entry('MK8', 'n')
+
+ self.assertEqual(kconfig, expected_kconfig)
def test_write_to_file(self):
kconfig_path = os.path.join(test_tmpdir, '.config')
expected_kconfig = kunit_config.Kconfig()
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('UML', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MMU', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
- expected_kconfig.add_entry(
- kunit_config.KconfigEntry('MK8', 'n'))
+ expected_kconfig.add_entry('UML', 'y')
+ expected_kconfig.add_entry('MMU', 'y')
+ expected_kconfig.add_entry('TEST', 'y')
+ expected_kconfig.add_entry('EXAMPLE_TEST', 'y')
+ expected_kconfig.add_entry('MK8', 'n')
expected_kconfig.write_to_file(kconfig_path)
- actual_kconfig = kunit_config.Kconfig()
- actual_kconfig.read_from_file(kconfig_path)
-
- self.assertEqual(actual_kconfig.entries(),
- expected_kconfig.entries())
+ actual_kconfig = kunit_config.parse_file(kconfig_path)
+ self.assertEqual(actual_kconfig, expected_kconfig)
class KUnitParserTest(unittest.TestCase):
+ def setUp(self):
+ self.print_mock = mock.patch('kunit_printer.Printer.print').start()
+ self.addCleanup(mock.patch.stopall)
- def assertContains(self, needle, haystack):
- for line in haystack:
+ def noPrintCallContains(self, substr: str):
+ for call in self.print_mock.mock_calls:
+ self.assertNotIn(substr, call.args[0])
+
+ def assertContains(self, needle: str, haystack: kunit_parser.LineStream):
+ # Clone the iterator so we can print the contents on failure.
+ copy, backup = itertools.tee(haystack)
+ for line in copy:
if needle in line:
return
- raise AssertionError('"' +
- str(needle) + '" not found in "' + str(haystack) + '"!')
+ raise AssertionError(f'"{needle}" not found in {list(backup)}!')
def test_output_isolated_correctly(self):
- log_path = get_absolute_path(
- 'test_data/test_output_isolated_correctly.log')
- file = open(log_path)
- result = kunit_parser.isolate_kunit_output(file.readlines())
+ log_path = test_data_path('test_output_isolated_correctly.log')
+ with open(log_path) as file:
+ result = kunit_parser.extract_tap_lines(file.readlines())
self.assertContains('TAP version 14', result)
- self.assertContains(' # Subtest: example', result)
- self.assertContains(' 1..2', result)
- self.assertContains(' ok 1 - example_simple_test', result)
- self.assertContains(' ok 2 - example_mock_test', result)
+ self.assertContains('# Subtest: example', result)
+ self.assertContains('1..2', result)
+ self.assertContains('ok 1 - example_simple_test', result)
+ self.assertContains('ok 2 - example_mock_test', result)
self.assertContains('ok 1 - example', result)
- file.close()
def test_output_with_prefix_isolated_correctly(self):
- log_path = get_absolute_path(
- 'test_data/test_pound_sign.log')
+ log_path = test_data_path('test_pound_sign.log')
with open(log_path) as file:
- result = kunit_parser.isolate_kunit_output(file.readlines())
+ result = kunit_parser.extract_tap_lines(file.readlines())
self.assertContains('TAP version 14', result)
- self.assertContains(' # Subtest: kunit-resource-test', result)
- self.assertContains(' 1..5', result)
- self.assertContains(' ok 1 - kunit_resource_test_init_resources', result)
- self.assertContains(' ok 2 - kunit_resource_test_alloc_resource', result)
- self.assertContains(' ok 3 - kunit_resource_test_destroy_resource', result)
- self.assertContains(' foo bar #', result)
- self.assertContains(' ok 4 - kunit_resource_test_cleanup_resources', result)
- self.assertContains(' ok 5 - kunit_resource_test_proper_free_ordering', result)
+ self.assertContains('# Subtest: kunit-resource-test', result)
+ self.assertContains('1..5', result)
+ self.assertContains('ok 1 - kunit_resource_test_init_resources', result)
+ self.assertContains('ok 2 - kunit_resource_test_alloc_resource', result)
+ self.assertContains('ok 3 - kunit_resource_test_destroy_resource', result)
+ self.assertContains('foo bar #', result)
+ self.assertContains('ok 4 - kunit_resource_test_cleanup_resources', result)
+ self.assertContains('ok 5 - kunit_resource_test_proper_free_ordering', result)
self.assertContains('ok 1 - kunit-resource-test', result)
- self.assertContains(' foo bar # non-kunit output', result)
- self.assertContains(' # Subtest: kunit-try-catch-test', result)
- self.assertContains(' 1..2', result)
- self.assertContains(' ok 1 - kunit_test_try_catch_successful_try_no_catch',
+ self.assertContains('foo bar # non-kunit output', result)
+ self.assertContains('# Subtest: kunit-try-catch-test', result)
+ self.assertContains('1..2', result)
+ self.assertContains('ok 1 - kunit_test_try_catch_successful_try_no_catch',
result)
- self.assertContains(' ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch',
+ self.assertContains('ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch',
result)
self.assertContains('ok 2 - kunit-try-catch-test', result)
- self.assertContains(' # Subtest: string-stream-test', result)
- self.assertContains(' 1..3', result)
- self.assertContains(' ok 1 - string_stream_test_empty_on_creation', result)
- self.assertContains(' ok 2 - string_stream_test_not_empty_after_add', result)
- self.assertContains(' ok 3 - string_stream_test_get_string', result)
+ self.assertContains('# Subtest: string-stream-test', result)
+ self.assertContains('1..3', result)
+ self.assertContains('ok 1 - string_stream_test_empty_on_creation', result)
+ self.assertContains('ok 2 - string_stream_test_not_empty_after_add', result)
+ self.assertContains('ok 3 - string_stream_test_get_string', result)
self.assertContains('ok 3 - string-stream-test', result)
def test_parse_successful_test_log(self):
- all_passed_log = get_absolute_path(
- 'test_data/test_is_test_passed-all_passed.log')
- file = open(all_passed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- file.close()
+ all_passed_log = test_data_path('test_is_test_passed-all_passed.log')
+ with open(all_passed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts.errors, 0)
+
+ def test_parse_successful_nested_tests_log(self):
+ all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log')
+ with open(all_passed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts.errors, 0)
+
+ def test_kselftest_nested(self):
+ kselftest_log = test_data_path('test_is_test_passed-kselftest.log')
+ with open(kselftest_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts.errors, 0)
def test_parse_failed_test_log(self):
- failed_log = get_absolute_path(
- 'test_data/test_is_test_passed-failure.log')
- file = open(failed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.FAILURE,
- result.status)
- file.close()
+ failed_log = test_data_path('test_is_test_passed-failure.log')
+ with open(failed_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
+ self.assertEqual(result.counts.errors, 0)
+
+ def test_no_header(self):
+ empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log')
+ with open(empty_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(file.readlines()), stdout)
+ self.assertEqual(0, len(result.subtests))
+ self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status)
+ self.assertEqual(result.counts.errors, 1)
+
+ def test_missing_test_plan(self):
+ missing_plan_log = test_data_path('test_is_test_passed-'
+ 'missing_plan.log')
+ with open(missing_plan_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(
+ file.readlines()), stdout)
+ # A missing test plan is not an error.
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0))
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
def test_no_tests(self):
- empty_log = get_absolute_path(
- 'test_data/test_is_test_passed-no_tests_run.log')
- file = open(empty_log)
- result = kunit_parser.parse_run_tests(
- kunit_parser.isolate_kunit_output(file.readlines()))
- self.assertEqual(0, len(result.suites))
+ header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log')
+ with open(header_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(file.readlines()), stdout)
+ self.assertEqual(0, len(result.subtests))
+ self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status)
+ self.assertEqual(result.counts.errors, 1)
+
+ def test_no_tests_no_plan(self):
+ no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log')
+ with open(no_plan_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(file.readlines()), stdout)
+ self.assertEqual(0, len(result.subtests[0].subtests[0].subtests))
self.assertEqual(
kunit_parser.TestStatus.NO_TESTS,
- result.status)
- file.close()
+ result.subtests[0].subtests[0].status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=1))
+
def test_no_kunit_output(self):
- crash_log = get_absolute_path(
- 'test_data/test_insufficient_memory.log')
- file = open(crash_log)
- print_mock = mock.patch('builtins.print').start()
- result = kunit_parser.parse_run_tests(
- kunit_parser.isolate_kunit_output(file.readlines()))
- print_mock.assert_any_call(StrContains('no tests run!'))
+ crash_log = test_data_path('test_insufficient_memory.log')
+ print_mock = mock.patch('kunit_printer.Printer.print').start()
+ with open(crash_log) as file:
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.extract_tap_lines(file.readlines()), stdout)
+ print_mock.assert_any_call(StrContains('Could not find any KTAP output.'))
print_mock.stop()
- file.close()
+ self.assertEqual(0, len(result.subtests))
+ self.assertEqual(result.counts.errors, 1)
+
+ def test_skipped_test(self):
+ skipped_log = test_data_path('test_skip_tests.log')
+ with open(skipped_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+
+ # A skipped test does not fail the whole suite.
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=4, skipped=1))
+
+ def test_skipped_all_tests(self):
+ skipped_log = test_data_path('test_skip_all_tests.log')
+ with open(skipped_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
- def test_crashed_test(self):
- crashed_log = get_absolute_path(
- 'test_data/test_is_test_passed-crash.log')
- file = open(crashed_log)
- result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5))
+
+ def test_ignores_hyphen(self):
+ hyphen_log = test_data_path('test_strip_hyphen.log')
+ with open(hyphen_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+
+ # A skipped test does not fail the whole suite.
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(
+ "sysctl_test",
+ result.subtests[0].name)
self.assertEqual(
- kunit_parser.TestStatus.TEST_CRASHED,
- result.status)
- file.close()
+ "example",
+ result.subtests[1].name)
def test_ignores_prefix_printk_time(self):
- prefix_log = get_absolute_path(
- 'test_data/test_config_printk_time.log')
+ prefix_log = test_data_path('test_config_printk_time.log')
with open(prefix_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertEqual(result.counts.errors, 0)
def test_ignores_multiple_prefixes(self):
- prefix_log = get_absolute_path(
- 'test_data/test_multiple_prefixes.log')
+ prefix_log = test_data_path('test_multiple_prefixes.log')
with open(prefix_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertEqual(result.counts.errors, 0)
def test_prefix_mixed_kernel_output(self):
- mixed_prefix_log = get_absolute_path(
- 'test_data/test_interrupted_tap_output.log')
+ mixed_prefix_log = test_data_path('test_interrupted_tap_output.log')
with open(mixed_prefix_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertEqual(result.counts.errors, 0)
def test_prefix_poundsign(self):
- pound_log = get_absolute_path('test_data/test_pound_sign.log')
+ pound_log = test_data_path('test_pound_sign.log')
with open(pound_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertEqual(result.counts.errors, 0)
def test_kernel_panic_end(self):
- panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
+ panic_log = test_data_path('test_kernel_panic_interrupt.log')
with open(panic_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.TEST_CRASHED,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertGreaterEqual(result.counts.errors, 1)
def test_pound_no_prefix(self):
- pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
+ pound_log = test_data_path('test_pound_no_prefix.log')
with open(pound_log) as file:
- result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual(
- kunit_parser.TestStatus.SUCCESS,
- result.status)
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual('kunit-resource-test', result.subtests[0].name)
+ self.assertEqual(result.counts.errors, 0)
+
+ def test_summarize_failures(self):
+ output = """
+ KTAP version 1
+ 1..2
+ # Subtest: all_failed_suite
+ 1..2
+ not ok 1 - test1
+ not ok 2 - test2
+ not ok 1 - all_failed_suite
+ # Subtest: some_failed_suite
+ 1..2
+ ok 1 - test1
+ not ok 2 - test2
+ not ok 1 - some_failed_suite
+ """
+ result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
+
+ self.assertEqual(kunit_parser._summarize_failed_tests(result),
+ 'Failures: all_failed_suite, some_failed_suite.test2')
+
+ def test_ktap_format(self):
+ ktap_log = test_data_path('test_parse_ktap_output.log')
+ with open(ktap_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3))
+ self.assertEqual('suite', result.subtests[0].name)
+ self.assertEqual('case_1', result.subtests[0].subtests[0].name)
+ self.assertEqual('case_2', result.subtests[0].subtests[1].name)
+
+ def test_parse_subtest_header(self):
+ ktap_log = test_data_path('test_parse_subtest_header.log')
+ with open(ktap_log) as file:
+ kunit_parser.parse_run_tests(file.readlines(), stdout)
+ self.print_mock.assert_any_call(StrContains('suite (1 subtest)'))
+
+ def test_parse_attributes(self):
+ ktap_log = test_data_path('test_parse_attributes.log')
+ with open(ktap_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines(), stdout)
+
+ # Test should pass with no errors
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=0))
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+
+ # Ensure suite header is parsed correctly
+ self.print_mock.assert_any_call(StrContains('suite (1 subtest)'))
+
+ # Ensure attributes in correct test log
+ self.assertContains('# module: example', result.subtests[0].log)
+ self.assertContains('# test.speed: slow', result.subtests[0].subtests[0].log)
+
+ def test_show_test_output_on_failure(self):
+ output = """
+ KTAP version 1
+ 1..1
+ Test output.
+ Indented more.
+ not ok 1 test1
+ """
+ result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
+ self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
+
+ self.print_mock.assert_any_call(StrContains('Test output.'))
+ self.print_mock.assert_any_call(StrContains(' Indented more.'))
+ self.noPrintCallContains('not ok 1 test1')
+
+ def test_parse_late_test_plan(self):
+ output = """
+ TAP version 13
+ ok 4 test4
+ 1..4
+ """
+ result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
+ # Missing test results after test plan should alert a suspected test crash.
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2))
+
+def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
+ return kunit_parser.LineStream(enumerate(strs, start=1))
+
+class LineStreamTest(unittest.TestCase):
+
+ def test_basic(self):
+ stream = line_stream_from_strs(['hello', 'world'])
+
+ self.assertTrue(stream, msg='Should be more input')
+ self.assertEqual(stream.line_number(), 1)
+ self.assertEqual(stream.peek(), 'hello')
+ self.assertEqual(stream.pop(), 'hello')
+
+ self.assertTrue(stream, msg='Should be more input')
+ self.assertEqual(stream.line_number(), 2)
+ self.assertEqual(stream.peek(), 'world')
+ self.assertEqual(stream.pop(), 'world')
+
+ self.assertFalse(stream, msg='Should be no more input')
+ with self.assertRaisesRegex(ValueError, 'LineStream: going past EOF'):
+ stream.pop()
+
+ def test_is_lazy(self):
+ called_times = 0
+ def generator():
+ nonlocal called_times
+ for _ in range(1,5):
+ called_times += 1
+ yield called_times, str(called_times)
+
+ stream = kunit_parser.LineStream(generator())
+ self.assertEqual(called_times, 0)
+
+ self.assertEqual(stream.pop(), '1')
+ self.assertEqual(called_times, 1)
+
+ self.assertEqual(stream.pop(), '2')
+ self.assertEqual(called_times, 2)
+
+class LinuxSourceTreeTest(unittest.TestCase):
+
+ def setUp(self):
+ mock.patch.object(signal, 'signal').start()
+ self.addCleanup(mock.patch.stopall)
+
+ def test_invalid_kunitconfig(self):
+ with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'):
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=['/nonexistent_file'])
+
+ def test_valid_kunitconfig(self):
+ with tempfile.NamedTemporaryFile('wt') as kunitconfig:
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[kunitconfig.name])
+
+ def test_dir_kunitconfig(self):
+ with tempfile.TemporaryDirectory('') as dir:
+ with open(os.path.join(dir, '.kunitconfig'), 'w'):
+ pass
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir])
+
+ def test_multiple_kunitconfig(self):
+ want_kconfig = kunit_config.Kconfig()
+ want_kconfig.add_entry('KUNIT', 'y')
+ want_kconfig.add_entry('KUNIT_TEST', 'm')
+
+ with tempfile.TemporaryDirectory('') as dir:
+ other = os.path.join(dir, 'otherkunitconfig')
+ with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(other, 'w') as f:
+ f.write('CONFIG_KUNIT_TEST=m')
+ pass
+
+ tree = kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other])
+ self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig)
+
+
+ def test_multiple_kunitconfig_invalid(self):
+ with tempfile.TemporaryDirectory('') as dir:
+ other = os.path.join(dir, 'otherkunitconfig')
+ with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(other, 'w') as f:
+ f.write('CONFIG_KUNIT=m')
+
+ with self.assertRaisesRegex(kunit_kernel.ConfigError, '(?s)Multiple values.*CONFIG_KUNIT'):
+ kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other])
+
+
+ def test_kconfig_add(self):
+ want_kconfig = kunit_config.Kconfig()
+ want_kconfig.add_entry('NOT_REAL', 'y')
+
+ tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y'])
+ self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig)
+
+ def test_invalid_arch(self):
+ with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'):
+ kunit_kernel.LinuxSourceTree('', arch='invalid')
+
+ def test_run_kernel_hits_exception(self):
+ def fake_start(unused_args, unused_build_dir):
+ return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE)
+
+ with tempfile.TemporaryDirectory('') as build_dir:
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ mock.patch.object(tree._ops, 'start', side_effect=fake_start).start()
+
+ with self.assertRaises(ValueError):
+ for line in tree.run_kernel(build_dir=build_dir):
+ self.assertEqual(line, 'hi\n')
+ raise ValueError('uh oh, did not read all output')
+
+ with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile:
+ self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output')
+
+ def test_build_reconfig_no_config(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ # Should generate the .config
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ mock_build_config.assert_called_once_with(build_dir, [])
+
+ def test_build_reconfig_existing_config(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ # Existing .config is a superset, should not touch it
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ self.assertEqual(mock_build_config.call_count, 0)
+
+ def test_build_reconfig_remove_option(self):
+ with tempfile.TemporaryDirectory('') as build_dir:
+ # We removed CONFIG_KUNIT_TEST=y from our .kunitconfig...
+ with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y')
+ with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+ with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f:
+ f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y')
+
+ tree = kunit_kernel.LinuxSourceTree(build_dir)
+ # Stub out the source tree operations, so we don't have
+ # the defaults for any given architecture get in the
+ # way.
+ tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None)
+ mock_build_config = mock.patch.object(tree, 'build_config').start()
+
+ # ... so we should trigger a call to build_config()
+ self.assertTrue(tree.build_reconfig(build_dir, make_options=[]))
+ mock_build_config.assert_called_once_with(build_dir, [])
+
+ # TODO: add more test cases.
+
class KUnitJsonTest(unittest.TestCase):
+ def setUp(self):
+ self.print_mock = mock.patch('kunit_printer.Printer.print').start()
+ self.addCleanup(mock.patch.stopall)
def _json_for(self, log_file):
- with(open(get_absolute_path(log_file))) as file:
- test_result = kunit_parser.parse_run_tests(file)
+ with open(test_data_path(log_file)) as file:
+ test_result = kunit_parser.parse_run_tests(file, stdout)
json_obj = kunit_json.get_json_result(
- test_result=test_result,
- def_config='kunit_defconfig',
- build_dir=None,
- json_path='stdout')
+ test=test_result,
+ metadata=kunit_json.Metadata())
return json.loads(json_obj)
def test_failed_test_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-failure.log')
+ result = self._json_for('test_is_test_passed-failure.log')
self.assertEqual(
{'name': 'example_simple_test', 'status': 'FAIL'},
result["sub_groups"][1]["test_cases"][0])
def test_crashed_test_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-crash.log')
+ result = self._json_for('test_kernel_panic_interrupt.log')
self.assertEqual(
- {'name': 'example_simple_test', 'status': 'ERROR'},
- result["sub_groups"][1]["test_cases"][0])
+ {'name': '', 'status': 'ERROR'},
+ result["sub_groups"][2]["test_cases"][1])
+
+ def test_skipped_test_json(self):
+ result = self._json_for('test_skip_tests.log')
+ self.assertEqual(
+ {'name': 'example_skip_test', 'status': 'SKIP'},
+ result["sub_groups"][1]["test_cases"][1])
def test_no_tests_json(self):
- result = self._json_for(
- 'test_data/test_is_test_passed-no_tests_run.log')
+ result = self._json_for('test_is_test_passed-no_tests_run_with_header.log')
self.assertEqual(0, len(result['sub_groups']))
+ def test_nested_json(self):
+ result = self._json_for('test_is_test_passed-all_passed_nested.log')
+ self.assertEqual(
+ {'name': 'example_simple_test', 'status': 'PASS'},
+ result["sub_groups"][0]["sub_groups"][0]["test_cases"][0])
+
class StrContains(str):
def __eq__(self, other):
return self in other
class KUnitMainTest(unittest.TestCase):
def setUp(self):
- path = get_absolute_path('test_data/test_is_test_passed-all_passed.log')
- file = open(path)
- all_passed_log = file.readlines()
- self.print_patch = mock.patch('builtins.print')
- self.print_mock = self.print_patch.start()
- self.linux_source_mock = mock.Mock()
- self.linux_source_mock.build_reconfig = mock.Mock(return_value=True)
- self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True)
- self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log)
-
- def tearDown(self):
- self.print_patch.stop()
- pass
+ path = test_data_path('test_is_test_passed-all_passed.log')
+ with open(path) as file:
+ all_passed_log = file.readlines()
+
+ self.print_mock = mock.patch('kunit_printer.Printer.print').start()
+ self.addCleanup(mock.patch.stopall)
+
+ self.mock_linux_init = mock.patch.object(kunit_kernel, 'LinuxSourceTree').start()
+ self.linux_source_mock = self.mock_linux_init.return_value
+ self.linux_source_mock.build_reconfig.return_value = True
+ self.linux_source_mock.build_kernel.return_value = True
+ self.linux_source_mock.run_kernel.return_value = all_passed_log
def test_config_passes_args_pass(self):
- kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 0
+ kunit.main(['config', '--build_dir=.kunit'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_build_passes_args_pass(self):
- kunit.main(['build'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 0
- self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None)
- assert self.linux_source_mock.run_kernel.call_count == 0
+ kunit.main(['build'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.build_kernel.assert_called_once_with(kunit.get_default_jobs(), '.kunit', None)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0)
def test_exec_passes_args_pass(self):
- kunit.main(['exec'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 0
- assert self.linux_source_mock.run_kernel.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300)
+ kunit.main(['exec'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_pass(self):
- kunit.main(['run'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
+ kunit.main(['run'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', timeout=300)
+ args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_exec_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
- kunit.main(['exec'], self.linux_source_mock)
- assert type(e.exception) == SystemExit
- assert e.exception.code == 1
+ kunit.main(['exec'])
+ self.assertEqual(e.exception.code, 1)
def test_run_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
- kunit.main(['run'], self.linux_source_mock)
- assert type(e.exception) == SystemExit
- assert e.exception.code == 1
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
- self.print_mock.assert_any_call(StrContains(' 0 tests run'))
+ kunit.main(['run'])
+ self.assertEqual(e.exception.code, 1)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ self.print_mock.assert_any_call(StrContains('Could not find any KTAP output.'))
+
+ def test_exec_no_tests(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0'])
+ with self.assertRaises(SystemExit) as e:
+ kunit.main(['run'])
+ self.assertEqual(e.exception.code, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
+ self.print_mock.assert_any_call(StrContains(' 0 tests run!'))
def test_exec_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['exec', '--raw_output'], self.linux_source_mock)
- assert self.linux_source_mock.run_kernel.call_count == 1
- for kall in self.print_mock.call_args_list:
- assert kall != mock.call(StrContains('Testing complete.'))
- assert kall != mock.call(StrContains(' 0 tests run'))
+ kunit.main(['exec', '--raw_output'])
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!')))
def test_run_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- kunit.main(['run', '--raw_output'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
- assert self.linux_source_mock.run_kernel.call_count == 1
- for kall in self.print_mock.call_args_list:
- assert kall != mock.call(StrContains('Testing complete.'))
- assert kall != mock.call(StrContains(' 0 tests run'))
+ kunit.main(['run', '--raw_output'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!')))
+
+ def test_run_raw_output_kunit(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output=kunit'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+
+ def test_run_raw_output_invalid(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ with self.assertRaises(SystemExit) as e:
+ kunit.main(['run', '--raw_output=invalid'])
+ self.assertNotEqual(e.exception.code, 0)
+
+ def test_run_raw_output_does_not_take_positional_args(self):
+ # --raw_output is a string flag, but we don't want it to consume
+ # any positional arguments, only ones after an '='
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output', 'filter_glob'])
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='filter_glob', filter='', filter_action=None, timeout=300)
def test_exec_timeout(self):
timeout = 3453
- kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout)
+ kunit.main(['exec', '--timeout', str(timeout)])
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
timeout = 3453
- kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ kunit.main(['run', '--timeout', str(timeout)])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', timeout=timeout)
+ args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
build_dir = '.kunit'
- kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ kunit.main(['run', '--build_dir=.kunit'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir=build_dir, timeout=300)
+ args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_config_builddir(self):
build_dir = '.kunit'
- kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock)
- assert self.linux_source_mock.build_reconfig.call_count == 1
+ kunit.main(['config', '--build_dir', build_dir])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
def test_build_builddir(self):
build_dir = '.kunit'
- kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock)
- self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, build_dir, None)
+ jobs = kunit.get_default_jobs()
+ kunit.main(['build', '--build_dir', build_dir])
+ self.linux_source_mock.build_kernel.assert_called_once_with(jobs, build_dir, None)
def test_exec_builddir(self):
build_dir = '.kunit'
- kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
+ kunit.main(['exec', '--build_dir', build_dir])
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+ def test_run_kunitconfig(self):
+ kunit.main(['run', '--kunitconfig=mykunitconfig'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=['mykunitconfig'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_config_kunitconfig(self):
+ kunit.main(['config', '--kunitconfig=mykunitconfig'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=['mykunitconfig'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_config_alltests(self):
+ kunit.main(['config', '--kunitconfig=mykunitconfig', '--alltests'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=[kunit_kernel.ALL_TESTS_CONFIG_PATH, 'mykunitconfig'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+
+ @mock.patch.object(kunit_kernel, 'LinuxSourceTree')
+ def test_run_multiple_kunitconfig(self, mock_linux_init):
+ mock_linux_init.return_value = self.linux_source_mock
+ kunit.main(['run', '--kunitconfig=mykunitconfig', '--kunitconfig=other'])
+ # Just verify that we parsed and initialized it correctly here.
+ mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=['mykunitconfig', 'other'],
+ kconfig_add=None,
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_run_kconfig_add(self):
+ kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=[],
+ kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'],
+ arch='um',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=[])
+
+ def test_run_qemu_args(self):
+ kunit.main(['run', '--arch=x86_64', '--qemu_args', '-m 2048'])
+ # Just verify that we parsed and initialized it correctly here.
+ self.mock_linux_init.assert_called_once_with('.kunit',
+ kunitconfig_paths=[],
+ kconfig_add=None,
+ arch='x86_64',
+ cross_compile=None,
+ qemu_config_path=None,
+ extra_qemu_args=['-m', '2048'])
+
+ def test_run_kernel_args(self):
+ kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'])
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=['a=1','b=2'], build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ def test_list_tests(self):
+ want = ['suite.test1', 'suite.test2', 'suite2.test1']
+ self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want
+
+ got = kunit._list_tests(self.linux_source_mock,
+ kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False))
+ self.assertEqual(got, want)
+ # Should respect the user's filter glob when listing tests.
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', filter='', filter_action=None, timeout=300)
+
+ @mock.patch.object(kunit, '_list_tests')
+ def test_run_isolated_by_suite(self, mock_tests):
+ mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1']
+ kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'])
+
+ # Should respect the user's filter glob when listing tests.
+ mock_tests.assert_called_once_with(mock.ANY,
+ kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False))
+ self.linux_source_mock.run_kernel.assert_has_calls([
+ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300),
+ mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300),
+ ])
+
+ @mock.patch.object(kunit, '_list_tests')
+ def test_run_isolated_by_test(self, mock_tests):
+ mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1']
+ kunit.main(['exec', '--run_isolated=test', 'suite*'])
+
+ # Should respect the user's filter glob when listing tests.
+ mock_tests.assert_called_once_with(mock.ANY,
+ kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'test', False, False))
+ self.linux_source_mock.run_kernel.assert_has_calls([
+ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300),
+ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300),
+ mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', filter='', filter_action=None, timeout=300),
+ ])
+
if __name__ == '__main__':
unittest.main()
diff --git a/tools/testing/kunit/mypy.ini b/tools/testing/kunit/mypy.ini
new file mode 100644
index 000000000000..ddd288309efa
--- /dev/null
+++ b/tools/testing/kunit/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+strict = True
+
+# E.g. we can't write subprocess.Popen[str] until Python 3.9+.
+# But kunit.py tries to support Python 3.7+, so let's disable it.
+disable_error_code = type-arg
diff --git a/tools/testing/kunit/qemu_config.py b/tools/testing/kunit/qemu_config.py
new file mode 100644
index 000000000000..b1fba9016eed
--- /dev/null
+++ b/tools/testing/kunit/qemu_config.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Collection of configs for building non-UML kernels and running them on QEMU.
+#
+# Copyright (C) 2021, Google LLC.
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+from dataclasses import dataclass
+from typing import List
+
+
+@dataclass(frozen=True)
+class QemuArchParams:
+ linux_arch: str
+ kconfig: str
+ qemu_arch: str
+ kernel_path: str
+ kernel_command_line: str
+ extra_qemu_params: List[str]
+ serial: str = 'stdio'
diff --git a/tools/testing/kunit/qemu_configs/alpha.py b/tools/testing/kunit/qemu_configs/alpha.py
new file mode 100644
index 000000000000..3ac846e03a6b
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/alpha.py
@@ -0,0 +1,10 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='alpha',
+ kconfig='''
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y''',
+ qemu_arch='alpha',
+ kernel_path='arch/alpha/boot/vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/arm.py b/tools/testing/kunit/qemu_configs/arm.py
new file mode 100644
index 000000000000..db2160200566
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/arm.py
@@ -0,0 +1,13 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='arm',
+ kconfig='''
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_AMBA_PL010=y
+CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
+ qemu_arch='arm',
+ kernel_path='arch/arm/boot/zImage',
+ kernel_command_line='console=ttyAMA0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py
new file mode 100644
index 000000000000..5c44d3a87e6d
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/arm64.py
@@ -0,0 +1,12 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='arm64',
+ kconfig='''
+CONFIG_SERIAL_AMBA_PL010=y
+CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
+ qemu_arch='aarch64',
+ kernel_path='arch/arm64/boot/Image.gz',
+ kernel_command_line='console=ttyAMA0',
+ extra_qemu_params=['-machine', 'virt', '-cpu', 'max'])
diff --git a/tools/testing/kunit/qemu_configs/i386.py b/tools/testing/kunit/qemu_configs/i386.py
new file mode 100644
index 000000000000..4463ebefd567
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/i386.py
@@ -0,0 +1,10 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='i386',
+ kconfig='''
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y''',
+ qemu_arch='i386',
+ kernel_path='arch/x86/boot/bzImage',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/loongarch.py b/tools/testing/kunit/qemu_configs/loongarch.py
new file mode 100644
index 000000000000..a92422967d1d
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/loongarch.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='loongarch',
+ kconfig='''
+CONFIG_EFI_STUB=n
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PVPANIC=y
+CONFIG_PVPANIC_PCI=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+''',
+ qemu_arch='loongarch64',
+ kernel_path='arch/loongarch/boot/vmlinux.elf',
+ kernel_command_line='console=ttyS0 kunit_shutdown=poweroff',
+ extra_qemu_params=[
+ '-machine', 'virt',
+ '-device', 'pvpanic-pci',
+ '-cpu', 'max',])
diff --git a/tools/testing/kunit/qemu_configs/m68k.py b/tools/testing/kunit/qemu_configs/m68k.py
new file mode 100644
index 000000000000..287fc386f8a7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/m68k.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='m68k',
+ kconfig='''
+CONFIG_VIRT=y''',
+ qemu_arch='m68k',
+ kernel_path='vmlinux',
+ kernel_command_line='console=hvc0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/mips.py b/tools/testing/kunit/qemu_configs/mips.py
new file mode 100644
index 000000000000..8899ac157b30
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/qemu_configs/mips64.py b/tools/testing/kunit/qemu_configs/mips64.py
new file mode 100644
index 000000000000..1478aed05b94
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mips64el.py b/tools/testing/kunit/qemu_configs/mips64el.py
new file mode 100644
index 000000000000..300c711d7a82
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64el.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64el',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mipsel.py b/tools/testing/kunit/qemu_configs/mipsel.py
new file mode 100644
index 000000000000..3d3543315b45
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mipsel.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mipsel',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py
new file mode 100644
index 000000000000..5b4c895d5d5a
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpc.py
@@ -0,0 +1,13 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC64=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HVC_CONSOLE=y''',
+ qemu_arch='ppc64',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py
new file mode 100644
index 000000000000..88bd60dbb948
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpc32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC32=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_ADB_CUDA=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+''',
+ qemu_arch='ppc',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'g3beige', '-cpu', 'max'])
diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py
new file mode 100644
index 000000000000..7ddee8af4bd7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpcle.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_HVC_CONSOLE=y
+''',
+ qemu_arch='ppc64',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py
new file mode 100644
index 000000000000..c87758030ff7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/riscv.py
@@ -0,0 +1,28 @@
+from ..qemu_config import QemuArchParams
+import os
+import os.path
+import sys
+
+OPENSBI_FILE = 'opensbi-riscv64-generic-fw_dynamic.bin'
+OPENSBI_PATH = '/usr/share/qemu/' + OPENSBI_FILE
+
+if not os.path.isfile(OPENSBI_PATH):
+ print('\n\nOpenSBI bios was not found in "' + OPENSBI_PATH + '".\n'
+ 'Please ensure that qemu-system-riscv is installed, or edit the path in "qemu_configs/riscv.py"\n')
+ sys.exit()
+
+QEMU_ARCH = QemuArchParams(linux_arch='riscv',
+ kconfig='''
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_RISCV_SBI_V01=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y''',
+ qemu_arch='riscv64',
+ kernel_path='arch/riscv/boot/Image',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=[
+ '-machine', 'virt',
+ '-cpu', 'rv64',
+ '-bios', OPENSBI_PATH])
diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py
new file mode 100644
index 000000000000..b79ba0ae30f8
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/riscv32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='riscv',
+ kconfig='''
+CONFIG_NONPORTABLE=y
+CONFIG_ARCH_RV32I=y
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+''',
+ qemu_arch='riscv32',
+ kernel_path='arch/riscv/boot/Image',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/s390.py b/tools/testing/kunit/qemu_configs/s390.py
new file mode 100644
index 000000000000..98fa4fb60c0a
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/s390.py
@@ -0,0 +1,14 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='s390',
+ kconfig='''
+CONFIG_EXPERT=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NUMA=y
+CONFIG_MODULES=y''',
+ qemu_arch='s390x',
+ kernel_path='arch/s390/boot/bzImage',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=[
+ '-machine', 's390-ccw-virtio',
+ '-cpu', 'qemu',])
diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py
new file mode 100644
index 000000000000..f00cb89fdef6
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sh.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sh',
+ kconfig='''
+CONFIG_CPU_SUBTYPE_SH7751R=y
+CONFIG_MEMORY_START=0x0c000000
+CONFIG_SH_RTS7751R2D=y
+CONFIG_RTS7751R2D_PLUS=y
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_CMDLINE_EXTEND=y
+''',
+ qemu_arch='sh4',
+ kernel_path='arch/sh/boot/zImage',
+ kernel_command_line='console=ttySC1',
+ serial='null',
+ extra_qemu_params=[
+ '-machine', 'r2d',
+ '-serial', 'mon:stdio'])
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
new file mode 100644
index 000000000000..2019550a1b69
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -0,0 +1,13 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sparc',
+ kconfig='''
+CONFIG_KUNIT_FAULT_TEST=n
+CONFIG_SPARC32=y
+CONFIG_SERIAL_SUNZILOG=y
+CONFIG_SERIAL_SUNZILOG_CONSOLE=y
+''',
+ qemu_arch='sparc',
+ kernel_path='arch/sparc/boot/zImage',
+ kernel_command_line='console=ttyS0 mem=256M',
+ extra_qemu_params=['-m', '256'])
diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py
new file mode 100644
index 000000000000..53d4e5a8c972
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sparc64.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sparc',
+ kconfig='''
+CONFIG_64BIT=y
+CONFIG_SPARC64=y
+CONFIG_PCI=y
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+''',
+ qemu_arch='sparc64',
+ kernel_path='arch/sparc/boot/image',
+ kernel_command_line='console=ttyS0 kunit_shutdown=poweroff',
+ extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py
new file mode 100644
index 000000000000..4a6bf4e048f5
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/x86_64.py
@@ -0,0 +1,12 @@
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='x86_64',
+ kconfig='''
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y''',
+ qemu_arch='x86_64',
+ kernel_path='arch/x86/boot/bzImage',
+ kernel_command_line='console=ttyS0',
+ # qboot is faster than SeaBIOS and doesn't mess up
+ # the terminal.
+ extra_qemu_params=['-bios', 'qboot.rom'])
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
new file mode 100755
index 000000000000..c6d494ea3373
--- /dev/null
+++ b/tools/testing/kunit/run_checks.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# This file runs some basic checks to verify kunit works.
+# It is only of interest if you're making changes to KUnit itself.
+#
+# Copyright (C) 2021, Google LLC.
+# Author: Daniel Latypov <dlatypov@google.com.com>
+
+from concurrent import futures
+import datetime
+import os
+import shutil
+import subprocess
+import sys
+import textwrap
+from typing import Dict, List, Sequence
+
+ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
+TIMEOUT = datetime.timedelta(minutes=5).total_seconds()
+
+commands: Dict[str, Sequence[str]] = {
+ 'kunit_tool_test.py': ['./kunit_tool_test.py'],
+ 'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'],
+ 'pytype': ['/bin/sh', '-c', 'pytype *.py'],
+ 'mypy': ['mypy', '--config-file', 'mypy.ini', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'],
+}
+
+# The user might not have mypy or pytype installed, skip them if so.
+# Note: you can install both via `$ pip install mypy pytype`
+necessary_deps : Dict[str, str] = {
+ 'pytype': 'pytype',
+ 'mypy': 'mypy',
+}
+
+def main(argv: Sequence[str]) -> None:
+ if argv:
+ raise RuntimeError('This script takes no arguments')
+
+ future_to_name: Dict[futures.Future[None], str] = {}
+ executor = futures.ThreadPoolExecutor(max_workers=len(commands))
+ for name, argv in commands.items():
+ if name in necessary_deps and shutil.which(necessary_deps[name]) is None:
+ print(f'{name}: SKIPPED, {necessary_deps[name]} not in $PATH')
+ continue
+ f = executor.submit(run_cmd, argv)
+ future_to_name[f] = name
+
+ has_failures = False
+ print(f'Waiting on {len(future_to_name)} checks ({", ".join(future_to_name.values())})...')
+ for f in futures.as_completed(future_to_name.keys()):
+ name = future_to_name[f]
+ ex = f.exception()
+ if not ex:
+ print(f'{name}: PASSED')
+ continue
+
+ has_failures = True
+ if isinstance(ex, subprocess.TimeoutExpired):
+ print(f'{name}: TIMED OUT')
+ elif isinstance(ex, subprocess.CalledProcessError):
+ print(f'{name}: FAILED')
+ else:
+ print(f'{name}: unexpected exception: {ex}')
+ continue
+
+ output = ex.output
+ if output:
+ print(textwrap.indent(output.decode(), '> '))
+ executor.shutdown()
+
+ if has_failures:
+ sys.exit(1)
+
+
+def run_cmd(argv: Sequence[str]) -> None:
+ subprocess.check_output(argv, stderr=subprocess.STDOUT, cwd=ABS_TOOL_PATH, timeout=TIMEOUT)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log
new file mode 100644
index 000000000000..9d5b04fe43a6
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log
@@ -0,0 +1,34 @@
+TAP version 14
+1..2
+ # Subtest: sysctl_test
+ 1..4
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+ # Subtest: example
+ 1..2
+ init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+ kunit example: all tests passed
+ ok 2 - example
+ # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
+ ok 3 - sysctl_test_dointvec_table_len_is_zero
+ # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
+ ok 4 - sysctl_test_dointvec_table_read_but_position_set
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: all tests passed
+ok 2 - example
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
deleted file mode 100644
index 4d97f6708c4a..000000000000
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ /dev/null
@@ -1,70 +0,0 @@
-printk: console [tty0] enabled
-printk: console [mc-1] enabled
-TAP version 14
-1..2
- # Subtest: sysctl_test
- 1..8
- # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
- ok 1 - sysctl_test_dointvec_null_tbl_data
- # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
- ok 2 - sysctl_test_dointvec_table_maxlen_unset
- # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
- ok 3 - sysctl_test_dointvec_table_len_is_zero
- # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
- ok 4 - sysctl_test_dointvec_table_read_but_position_set
- # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
- ok 5 - sysctl_test_dointvec_happy_single_positive
- # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
- ok 6 - sysctl_test_dointvec_happy_single_negative
- # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
- ok 7 - sysctl_test_dointvec_single_less_int_min
- # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
- ok 8 - sysctl_test_dointvec_single_greater_int_max
-kunit sysctl_test: all tests passed
-ok 1 - sysctl_test
- # Subtest: example
- 1..2
-init_suite
- # example_simple_test: initializing
-Stack:
- 6016f7db 6f81bd30 6f81bdd0 60021450
- 6024b0e8 60021440 60018bbe 16f81bdc0
- 00000001 6f81bd30 6f81bd20 6f81bdd0
-Call Trace:
- [<6016f7db>] ? kunit_try_run_case+0xab/0xf0
- [<60021450>] ? set_signals+0x0/0x60
- [<60021440>] ? get_signals+0x0/0x10
- [<60018bbe>] ? kunit_um_run_try_catch+0x5e/0xc0
- [<60021450>] ? set_signals+0x0/0x60
- [<60021440>] ? get_signals+0x0/0x10
- [<60018bb3>] ? kunit_um_run_try_catch+0x53/0xc0
- [<6016f321>] ? kunit_run_case_catch_errors+0x121/0x1a0
- [<60018b60>] ? kunit_um_run_try_catch+0x0/0xc0
- [<600189e0>] ? kunit_um_throw+0x0/0x180
- [<6016f730>] ? kunit_try_run_case+0x0/0xf0
- [<6016f600>] ? kunit_catch_run_case+0x0/0x130
- [<6016edd0>] ? kunit_vprintk+0x0/0x30
- [<6016ece0>] ? kunit_fail+0x0/0x40
- [<6016eca0>] ? kunit_abort+0x0/0x40
- [<6016ed20>] ? kunit_printk_emit+0x0/0xb0
- [<6016f200>] ? kunit_run_case_catch_errors+0x0/0x1a0
- [<6016f46e>] ? kunit_run_tests+0xce/0x260
- [<6005b390>] ? unregister_console+0x0/0x190
- [<60175b70>] ? suite_kunit_initexample_test_suite+0x0/0x20
- [<60001cbb>] ? do_one_initcall+0x0/0x197
- [<60001d47>] ? do_one_initcall+0x8c/0x197
- [<6005cd20>] ? irq_to_desc+0x0/0x30
- [<60002005>] ? kernel_init_freeable+0x1b3/0x272
- [<6005c5ec>] ? printk+0x0/0x9b
- [<601c0086>] ? kernel_init+0x26/0x160
- [<60014442>] ? new_thread_handler+0x82/0xc0
-
- # example_simple_test: kunit test case crashed!
- # example_simple_test: example_simple_test failed
- not ok 1 - example_simple_test
- # example_mock_test: initializing
- # example_mock_test: example_mock_test passed
- ok 2 - example_mock_test
-kunit example: one or more tests failed
-not ok 2 - example
-List of all partitions:
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
new file mode 100644
index 000000000000..30d9ef18bcec
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
@@ -0,0 +1,15 @@
+TAP version 13
+1..3
+# selftests: membarrier: membarrier_test_single_thread
+# TAP version 13
+# 1..2
+# ok 1 sys_membarrier available
+# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected
+ok 1 selftests: membarrier: membarrier_test_single_thread
+# selftests: membarrier: membarrier_test_multi_thread
+# TAP version 13
+# 1..2
+# ok 1 sys_membarrier available
+# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected
+ok 2 selftests: membarrier: membarrier_test_multi_thread
+ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log
new file mode 100644
index 000000000000..5cd17b7f818a
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log
@@ -0,0 +1,31 @@
+KTAP version 1
+ # Subtest: sysctl_test
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+ # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
+ ok 2 - sysctl_test_dointvec_table_maxlen_unset
+ # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
+ ok 3 - sysctl_test_dointvec_table_len_is_zero
+ # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
+ ok 4 - sysctl_test_dointvec_table_read_but_position_set
+ # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
+ ok 5 - sysctl_test_dointvec_happy_single_positive
+ # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
+ ok 6 - sysctl_test_dointvec_happy_single_negative
+ # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
+ ok 7 - sysctl_test_dointvec_single_less_int_min
+ # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
+ ok 8 - sysctl_test_dointvec_single_greater_int_max
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: all tests passed
+ok 2 - example
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
new file mode 100644
index 000000000000..4f81876ee6f1
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
@@ -0,0 +1,7 @@
+TAP version 14
+1..1
+ # Subtest: suite
+ 1..1
+ # Subtest: case
+ ok 1 - case
+ok 1 - suite
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log
index ba69f5c94b75..ba69f5c94b75 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log
new file mode 100644
index 000000000000..5f48ee659d40
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log
@@ -0,0 +1,2 @@
+TAP version 14
+1..0
diff --git a/tools/testing/kunit/test_data/test_parse_attributes.log b/tools/testing/kunit/test_data/test_parse_attributes.log
new file mode 100644
index 000000000000..1a13c371fe9d
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_parse_attributes.log
@@ -0,0 +1,9 @@
+KTAP version 1
+1..1
+ KTAP version 1
+ # Subtest: suite
+ # module: example
+ 1..1
+ # test.speed: slow
+ ok 1 test
+ok 1 suite \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_parse_ktap_output.log b/tools/testing/kunit/test_data/test_parse_ktap_output.log
new file mode 100644
index 000000000000..ccdf244e5303
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_parse_ktap_output.log
@@ -0,0 +1,8 @@
+KTAP version 1
+1..1
+ KTAP version 1
+ 1..3
+ ok 1 case_1
+ ok 2 case_2
+ ok 3 case_3
+ok 1 suite
diff --git a/tools/testing/kunit/test_data/test_parse_subtest_header.log b/tools/testing/kunit/test_data/test_parse_subtest_header.log
new file mode 100644
index 000000000000..216631092e7b
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_parse_subtest_header.log
@@ -0,0 +1,7 @@
+KTAP version 1
+1..1
+ KTAP version 1
+ # Subtest: suite
+ 1..1
+ ok 1 test
+ok 1 suite \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_skip_all_tests.log b/tools/testing/kunit/test_data/test_skip_all_tests.log
new file mode 100644
index 000000000000..2ea6e6d14fff
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_skip_all_tests.log
@@ -0,0 +1,15 @@
+TAP version 14
+1..2
+ # Subtest: string-stream-test
+ 1..3
+ ok 1 - string_stream_test_empty_on_creation # SKIP all tests skipped
+ ok 2 - string_stream_test_not_empty_after_add # SKIP all tests skipped
+ ok 3 - string_stream_test_get_string # SKIP all tests skipped
+ok 1 - string-stream-test # SKIP
+ # Subtest: example
+ 1..2
+ # example_simple_test: initializing
+ ok 1 - example_simple_test # SKIP all tests skipped
+ # example_skip_test: initializing
+ ok 2 - example_skip_test # SKIP this test should be skipped
+ok 2 - example # SKIP
diff --git a/tools/testing/kunit/test_data/test_skip_tests.log b/tools/testing/kunit/test_data/test_skip_tests.log
new file mode 100644
index 000000000000..79b326e31274
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_skip_tests.log
@@ -0,0 +1,15 @@
+TAP version 14
+1..2
+ # Subtest: string-stream-test
+ 1..3
+ ok 1 - string_stream_test_empty_on_creation
+ ok 2 - string_stream_test_not_empty_after_add
+ ok 3 - string_stream_test_get_string
+ok 1 - string-stream-test
+ # Subtest: example
+ 1..2
+ # example_simple_test: initializing
+ ok 1 - example_simple_test
+ # example_skip_test: initializing
+ ok 2 - example_skip_test # SKIP this test should be skipped
+ok 2 - example
diff --git a/tools/testing/kunit/test_data/test_strip_hyphen.log b/tools/testing/kunit/test_data/test_strip_hyphen.log
new file mode 100644
index 000000000000..92ac7c24b374
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_strip_hyphen.log
@@ -0,0 +1,16 @@
+KTAP version 1
+1..2
+ # Subtest: sysctl_test
+ 1..1
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..1
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 example_simple_test
+kunit example: all tests passed
+ok 2 example
diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore
new file mode 100644
index 000000000000..4cc7cd5aac2b
--- /dev/null
+++ b/tools/testing/memblock/.gitignore
@@ -0,0 +1,5 @@
+main
+memblock.c
+linux/memblock.h
+asm/asm.h
+asm/cmpxchg.h
diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
new file mode 100644
index 000000000000..d80982ccdc20
--- /dev/null
+++ b/tools/testing/memblock/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Memblock simulator requires AddressSanitizer (libasan) and liburcu development
+# packages installed
+CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
+ -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+TARGETS = main
+TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \
+ tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o
+DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o cmdline.o
+OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
+EXTR_SRC = ../../../mm/memblock.c
+
+ifeq ($(BUILD), 32)
+ CFLAGS += -m32
+ LDFLAGS += -m32
+endif
+
+# Process user parameters
+include scripts/Makefile.include
+
+main: $(OFILES)
+
+$(OFILES): include
+
+include: ../../../include/linux/memblock.h ../../include/linux/*.h \
+ ../../include/asm/*.h
+
+ @mkdir -p linux
+ test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h
+ test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h
+ test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h
+
+memblock.c: $(EXTR_SRC)
+ test -L memblock.c || ln -s $(EXTR_SRC) memblock.c
+
+clean:
+ $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h
+
+help:
+ @echo 'Memblock simulator'
+ @echo ''
+ @echo 'Available targets:'
+ @echo ' main - Build the memblock simulator'
+ @echo ' clean - Remove generated files and symlinks in the directory'
+ @echo ''
+ @echo 'Configuration:'
+ @echo ' make MEMBLOCK_DEBUG=1 - enable memblock_dbg() messages'
+ @echo ' make NUMA=1 - simulate enabled NUMA'
+ @echo ' make 32BIT_PHYS_ADDR_T=1 - Use 32 bit physical addresses'
+
+vpath %.c ../../lib
+
+.PHONY: clean include help
diff --git a/tools/testing/memblock/README b/tools/testing/memblock/README
new file mode 100644
index 000000000000..7ca437d81806
--- /dev/null
+++ b/tools/testing/memblock/README
@@ -0,0 +1,118 @@
+==================
+Memblock simulator
+==================
+
+Introduction
+============
+
+Memblock is a boot time memory allocator[1] that manages memory regions before
+the actual memory management is initialized. Its APIs allow to register physical
+memory regions, mark them as available or reserved, allocate a block of memory
+within the requested range and/or in specific NUMA node, and many more.
+
+Because it is used so early in the booting process, testing and debugging it is
+difficult. This test suite, usually referred as memblock simulator, is
+an attempt at testing the memblock mechanism. It runs one monolithic test that
+consist of a series of checks that exercise both the basic operations and
+allocation functionalities of memblock. The main data structure of the boot time
+memory allocator is initialized at the build time, so the checks here reuse its
+instance throughout the duration of the test. To ensure that tests don't affect
+each other, region arrays are reset in between.
+
+As this project uses the actual memblock code and has to run in user space,
+some of the kernel definitions were stubbed by the initial commit that
+introduced memblock simulator (commit 16802e55dea9 ("memblock tests: Add
+skeleton of the memblock simulator")) and a few preparation commits just
+before it. Most of them don't match the kernel implementation, so one should
+consult them first before making any significant changes to the project.
+
+Usage
+=====
+
+To run the tests, build the main target and run it:
+
+$ make && ./main
+
+A successful run produces no output. It is possible to control the behavior
+by passing options from command line. For example, to include verbose output,
+append the `-v` options when you run the tests:
+
+$ ./main -v
+
+This will print information about which functions are being tested and the
+number of test cases that passed.
+
+For the full list of options from command line, see `./main --help`.
+
+It is also possible to override different configuration parameters to change
+the test functions. For example, to simulate enabled NUMA, use:
+
+$ make NUMA=1
+
+For the full list of build options, see `make help`.
+
+Project structure
+=================
+
+The project has one target, main, which calls a group of checks for basic and
+allocation functions. Tests for each group are defined in dedicated files, as it
+can be seen here:
+
+memblock
+|-- asm ------------------,
+|-- lib |-- implement function and struct stubs
+|-- linux ------------------'
+|-- scripts
+| |-- Makefile.include -- handles `make` parameters
+|-- tests
+| |-- alloc_api.(c|h) -- memblock_alloc tests
+| |-- alloc_helpers_api.(c|h) -- memblock_alloc_from tests
+| |-- alloc_nid_api.(c|h) -- memblock_alloc_try_nid tests
+| |-- basic_api.(c|h) -- memblock_add/memblock_reserve/... tests
+| |-- common.(c|h) -- helper functions for resetting memblock;
+|-- main.c --------------. dummy physical memory definition
+|-- Makefile `- test runner
+|-- README
+|-- TODO
+|-- .gitignore
+
+Simulating physical memory
+==========================
+
+Some allocation functions clear the memory in the process, so it is required for
+memblock to track valid memory ranges. To achieve this, the test suite registers
+with memblock memory stored by test_memory struct. It is a small wrapper that
+points to a block of memory allocated via malloc. For each group of allocation
+tests, dummy physical memory is allocated, added to memblock, and then released
+at the end of the test run. The structure of a test runner checking allocation
+functions is as follows:
+
+int memblock_alloc_foo_checks(void)
+{
+ reset_memblock_attributes(); /* data structure reset */
+ dummy_physical_memory_init(); /* allocate and register memory */
+
+ (...allocation checks...)
+
+ dummy_physical_memory_cleanup(); /* free the memory */
+}
+
+There's no need to explicitly free the dummy memory from memblock via
+memblock_free() call. The entry will be erased by reset_memblock_regions(),
+called at the beginning of each test.
+
+Known issues
+============
+
+1. Requesting a specific NUMA node via memblock_alloc_node() does not work as
+ intended. Once the fix is in place, tests for this function can be added.
+
+2. Tests for memblock_alloc_low() can't be easily implemented. The function uses
+ ARCH_LOW_ADDRESS_LIMIT marco, which can't be changed to point at the low
+ memory of the memory_block.
+
+References
+==========
+
+1. Boot time memory management documentation page:
+ https://www.kernel.org/doc/html/latest/core-api/boot-time-mm.html
diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
new file mode 100644
index 000000000000..e306c90c535f
--- /dev/null
+++ b/tools/testing/memblock/TODO
@@ -0,0 +1,5 @@
+TODO
+=====
+
+1. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
+ for the new region
diff --git a/tools/testing/memblock/asm/dma.h b/tools/testing/memblock/asm/dma.h
new file mode 100644
index 000000000000..13ff8e5d22ef
--- /dev/null
+++ b/tools/testing/memblock/asm/dma.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_DMA_H
+#define _TOOLS_DMA_H
+
+#endif
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
new file mode 100644
index 000000000000..0ab4b53bb4f3
--- /dev/null
+++ b/tools/testing/memblock/internal.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MM_INTERNAL_H
+#define _MM_INTERNAL_H
+
+/*
+ * Enable memblock_dbg() messages
+ */
+#ifdef MEMBLOCK_DEBUG
+static int memblock_debug = 1;
+#endif
+
+#define pr_warn_ratelimited(fmt, ...) printf(fmt, ##__VA_ARGS__)
+
+bool mirrored_kernelcore = false;
+
+struct page {};
+
+void memblock_free_pages(struct page *page, unsigned long pfn,
+ unsigned int order)
+{
+}
+
+static inline void accept_memory(phys_addr_t start, unsigned long size)
+{
+}
+
+static inline unsigned long free_reserved_area(void *start, void *end,
+ int poison, const char *s)
+{
+ return 0;
+}
+
+#endif
diff --git a/tools/testing/memblock/lib/slab.c b/tools/testing/memblock/lib/slab.c
new file mode 100644
index 000000000000..6be6020328fb
--- /dev/null
+++ b/tools/testing/memblock/lib/slab.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/slab.h>
+
+enum slab_state slab_state;
+
+bool slab_is_available(void)
+{
+ return slab_state >= UP;
+}
diff --git a/tools/testing/memblock/linux/kernel.h b/tools/testing/memblock/linux/kernel.h
new file mode 100644
index 000000000000..4d1012d5be6e
--- /dev/null
+++ b/tools/testing/memblock/linux/kernel.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _MEMBLOCK_LINUX_KERNEL_H
+#define _MEMBLOCK_LINUX_KERNEL_H
+
+#include <../../include/linux/kernel.h>
+#include <linux/errno.h>
+#include <string.h>
+#include <linux/printk.h>
+#include <linux/linkage.h>
+#include <linux/kconfig.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+
+#endif
diff --git a/tools/testing/memblock/linux/kmemleak.h b/tools/testing/memblock/linux/kmemleak.h
new file mode 100644
index 000000000000..5fed13bb9ec4
--- /dev/null
+++ b/tools/testing/memblock/linux/kmemleak.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _KMEMLEAK_H
+#define _KMEMLEAK_H
+
+static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+{
+}
+
+static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
+ gfp_t gfp)
+{
+}
+
+static inline void dump_stack(void)
+{
+}
+
+#endif
diff --git a/tools/testing/memblock/linux/memory_hotplug.h b/tools/testing/memblock/linux/memory_hotplug.h
new file mode 100644
index 000000000000..dabe2c556858
--- /dev/null
+++ b/tools/testing/memblock/linux/memory_hotplug.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MEMORY_HOTPLUG_H
+#define _LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+extern bool movable_node_enabled;
+
+static inline bool movable_node_is_enabled(void)
+{
+ return movable_node_enabled;
+}
+
+#endif
diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h
new file mode 100644
index 000000000000..bb682659a12d
--- /dev/null
+++ b/tools/testing/memblock/linux/mmzone.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_MMZONE_H
+#define _TOOLS_MMZONE_H
+
+#include <linux/atomic.h>
+#include <linux/memory_hotplug.h>
+
+struct pglist_data *first_online_pgdat(void);
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
+
+#define for_each_online_pgdat(pgdat) \
+ for (pgdat = first_online_pgdat(); \
+ pgdat; \
+ pgdat = next_online_pgdat(pgdat))
+
+enum zone_type {
+ __MAX_NR_ZONES
+};
+
+#define MAX_NR_ZONES __MAX_NR_ZONES
+#define MAX_PAGE_ORDER 10
+#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER)
+
+#define pageblock_order MAX_PAGE_ORDER
+#define pageblock_nr_pages BIT(pageblock_order)
+#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages)
+#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages)
+
+struct zone {
+ atomic_long_t managed_pages;
+};
+
+typedef struct pglist_data {
+ struct zone node_zones[MAX_NR_ZONES];
+
+} pg_data_t;
+
+#endif
diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h
new file mode 100644
index 000000000000..ae3f497165d6
--- /dev/null
+++ b/tools/testing/memblock/linux/mutex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MUTEX_H
+#define _MUTEX_H
+
+#define DEFINE_MUTEX(name) int name
+
+static inline void dummy_mutex_guard(int *name)
+{
+}
+
+#define guard(mutex) \
+ dummy_##mutex##_guard
+
+#endif /* _MUTEX_H */ \ No newline at end of file
diff --git a/tools/testing/memblock/linux/printk.h b/tools/testing/memblock/linux/printk.h
new file mode 100644
index 000000000000..61af424d8c6c
--- /dev/null
+++ b/tools/testing/memblock/linux/printk.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PRINTK_H
+#define _PRINTK_H
+
+#include <stdio.h>
+#include <asm/bug.h>
+
+/*
+ * memblock_dbg is called with u64 arguments that don't match the "%llu"
+ * specifier in printf. This results in warnings that cannot be fixed without
+ * modifying memblock.c, which we wish to avoid. As these messaged are not used
+ * in testing anyway, the mismatch can be ignored.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat"
+#define printk printf
+#pragma GCC diagnostic push
+
+#define pr_info printk
+#define pr_debug printk
+#define pr_cont printk
+#define pr_err printk
+#define pr_warn printk
+
+#endif
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
new file mode 100644
index 000000000000..278f9dec5008
--- /dev/null
+++ b/tools/testing/memblock/main.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "tests/basic_api.h"
+#include "tests/alloc_api.h"
+#include "tests/alloc_helpers_api.h"
+#include "tests/alloc_nid_api.h"
+#include "tests/alloc_exact_nid_api.h"
+#include "tests/common.h"
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+ memblock_basic_checks();
+ memblock_alloc_checks();
+ memblock_alloc_helpers_checks();
+ memblock_alloc_nid_checks();
+ memblock_alloc_exact_nid_checks();
+
+ return 0;
+}
diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c
new file mode 100644
index 000000000000..d3d58851864e
--- /dev/null
+++ b/tools/testing/memblock/mmzone.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/mmzone.h>
+
+struct pglist_data *first_online_pgdat(void)
+{
+ return NULL;
+}
+
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+{
+ return NULL;
+}
+
+void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid)
+{
+}
+
+void atomic_long_set(atomic_long_t *v, long i)
+{
+}
diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
new file mode 100644
index 000000000000..998281723590
--- /dev/null
+++ b/tools/testing/memblock/scripts/Makefile.include
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+# Definitions for user-provided arguments
+
+# Simulate CONFIG_NUMA=y
+ifeq ($(NUMA), 1)
+ CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
+endif
+
+# Use 32 bit physical addresses.
+# Remember to install 32-bit version of dependencies.
+ifeq ($(32BIT_PHYS_ADDR_T), 1)
+ CFLAGS += -m32 -U CONFIG_PHYS_ADDR_T_64BIT
+ LDFLAGS += -m32
+endif
+
+# Enable memblock_dbg() messages
+ifeq ($(MEMBLOCK_DEBUG), 1)
+ CFLAGS += -D MEMBLOCK_DEBUG
+endif
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
new file mode 100644
index 000000000000..c55f67dd367d
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -0,0 +1,884 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_api.h"
+
+static int alloc_test_flags = TEST_F_NONE;
+
+static inline const char * const get_memblock_alloc_name(int flags)
+{
+ if (flags & TEST_F_RAW)
+ return "memblock_alloc_raw";
+ return "memblock_alloc";
+}
+
+static inline void *run_memblock_alloc(phys_addr_t size, phys_addr_t align)
+{
+ if (alloc_test_flags & TEST_F_RAW)
+ return memblock_alloc_raw(size, align);
+ return memblock_alloc(size, align);
+}
+
+/*
+ * A simple test that tries to allocate a small memory region.
+ * Expect to allocate an aligned region near the end of the available memory.
+ */
+static int alloc_top_down_simple_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_2;
+ phys_addr_t expected_start;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ expected_start = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+
+ allocated_ptr = run_memblock_alloc(size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, expected_start);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory next to a reserved region that starts at
+ * the misaligned address. Expect to create two separate entries, with the new
+ * entry aligned to the provided alignment:
+ *
+ * +
+ * | +--------+ +--------|
+ * | | rgn2 | | rgn1 |
+ * +------------+--------+---------+--------+
+ * ^
+ * |
+ * Aligned address boundary
+ *
+ * The allocation direction is top-down and region arrays are sorted from lower
+ * to higher addresses, so the new region will be the first entry in
+ * memory.reserved array. The previously reserved region does not get modified.
+ * Region counter and total size get updated.
+ */
+static int alloc_top_down_disjoint_check(void)
+{
+ /* After allocation, this will point to the "old" region */
+ struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+ struct region r1;
+ void *allocated_ptr = NULL;
+ phys_addr_t r2_size = SZ_16;
+ /* Use custom alignment */
+ phys_addr_t alignment = SMP_CACHE_BYTES * 2;
+ phys_addr_t total_size;
+ phys_addr_t expected_start;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SZ_2;
+ r1.size = SZ_2;
+
+ total_size = r1.size + r2_size;
+ expected_start = memblock_end_of_DRAM() - alignment;
+
+ memblock_reserve(r1.base, r1.size);
+
+ allocated_ptr = run_memblock_alloc(r2_size, alignment);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn1->size, r1.size);
+ ASSERT_EQ(rgn1->base, r1.base);
+
+ ASSERT_EQ(rgn2->size, r2_size);
+ ASSERT_EQ(rgn2->base, expected_start);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is enough space at the end
+ * of the previously reserved block (i.e. first fit):
+ *
+ * | +--------+--------------|
+ * | | r1 | r2 |
+ * +--------------+--------+--------------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_top_down_before_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ /*
+ * The first region ends at the aligned address to test region merging
+ */
+ phys_addr_t r1_size = SMP_CACHE_BYTES;
+ phys_addr_t r2_size = SZ_512;
+ phys_addr_t total_size = r1_size + r2_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ memblock_reserve_kern(memblock_end_of_DRAM() - total_size, r1_size);
+
+ allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is not enough space at the
+ * end of the previously reserved block (i.e. second fit):
+ *
+ * | +-----------+------+ |
+ * | | r2 | r1 | |
+ * +------------+-----------+------+-----+
+ *
+ * Expect a merge of both regions. Both the base address and size of the region
+ * get updated.
+ */
+static int alloc_top_down_after_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ struct region r1;
+ void *allocated_ptr = NULL;
+ phys_addr_t r2_size = SZ_512;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /*
+ * The first region starts at the aligned address to test region merging
+ */
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+ r1.size = SZ_8;
+
+ total_size = r1.size + r2_size;
+
+ memblock_reserve_kern(r1.base, r1.size);
+
+ allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, r1.base - r2_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions with
+ * a gap too small to fit the new region:
+ *
+ * | +--------+----------+ +------|
+ * | | r3 | r2 | | r1 |
+ * +-------+--------+----------+---+------+
+ *
+ * Expect to allocate a region before the one that starts at the lower address,
+ * and merge them into one. The region counter and total size fields get
+ * updated.
+ */
+static int alloc_top_down_second_fit_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ struct region r1, r2;
+ void *allocated_ptr = NULL;
+ phys_addr_t r3_size = SZ_1K;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SZ_512;
+ r1.size = SZ_512;
+
+ r2.base = r1.base - SZ_512;
+ r2.size = SZ_256;
+
+ total_size = r1.size + r2.size + r3_size;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, r2.size + r3_size);
+ ASSERT_EQ(rgn->base, r2.base - r3_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions with
+ * a gap big enough to accommodate the new region:
+ *
+ * | +--------+--------+--------+ |
+ * | | r2 | r3 | r1 | |
+ * +-----+--------+--------+--------+-----+
+ *
+ * Expect to merge all of them, creating one big entry in memblock.reserved
+ * array. The region counter and total size fields get updated.
+ */
+static int alloc_in_between_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ struct region r1, r2;
+ void *allocated_ptr = NULL;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t r3_size = SZ_64;
+ /*
+ * Calculate regions size so there's just enough space for the new entry
+ */
+ phys_addr_t rgn_size = (MEM_SIZE - (2 * gap_size + r3_size)) / 2;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.size = rgn_size;
+ r1.base = memblock_end_of_DRAM() - (gap_size + rgn_size);
+
+ r2.size = rgn_size;
+ r2.base = memblock_start_of_DRAM() + gap_size;
+
+ total_size = r1.size + r2.size + r3_size;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, r1.base - r2.size - r3_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is filled with reserved
+ * regions with memory gaps too small to fit the new region:
+ *
+ * +-------+
+ * | new |
+ * +--+----+
+ * | +-----+ +-----+ +-----+ |
+ * | | res | | res | | res | |
+ * +----+-----+----+-----+----+-----+----+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_small_gaps_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ phys_addr_t region_size = SZ_1K;
+ phys_addr_t gap_size = SZ_256;
+ phys_addr_t region_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ region_end = memblock_start_of_DRAM();
+
+ while (region_end < memblock_end_of_DRAM()) {
+ memblock_reserve(region_end + gap_size, region_size);
+ region_end += gap_size + region_size;
+ }
+
+ allocated_ptr = run_memblock_alloc(region_size, SMP_CACHE_BYTES);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when all memory is reserved.
+ * Expect no allocation to happen.
+ */
+static int alloc_all_reserved_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* Simulate full memory */
+ memblock_reserve(memblock_start_of_DRAM(), MEM_SIZE);
+
+ allocated_ptr = run_memblock_alloc(SZ_256, SMP_CACHE_BYTES);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is almost full,
+ * with not enough space left for the new region:
+ *
+ * +-------+
+ * | new |
+ * +-------+
+ * |-----------------------------+ |
+ * | reserved | |
+ * +-----------------------------+---+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_no_space_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ phys_addr_t available_size = SZ_256;
+ phys_addr_t reserved_size = MEM_SIZE - available_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* Simulate almost-full memory */
+ memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+
+ allocated_ptr = run_memblock_alloc(SZ_1K, SMP_CACHE_BYTES);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is almost full,
+ * but there is just enough space left:
+ *
+ * |---------------------------+---------|
+ * | reserved | new |
+ * +---------------------------+---------+
+ *
+ * Expect to allocate memory and merge all the regions. The total size field
+ * gets updated.
+ */
+static int alloc_limited_space_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t available_size = SZ_256;
+ phys_addr_t reserved_size = MEM_SIZE - available_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* Simulate almost-full memory */
+ memblock_reserve_kern(memblock_start_of_DRAM(), reserved_size);
+
+ allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, available_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, MEM_SIZE);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, MEM_SIZE);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is no available memory
+ * registered (i.e. memblock.memory has only a dummy entry).
+ * Expect no allocation to happen.
+ */
+static int alloc_no_memory_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+
+ allocated_ptr = run_memblock_alloc(SZ_1K, SMP_CACHE_BYTES);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->size, 0);
+ ASSERT_EQ(rgn->base, 0);
+ ASSERT_EQ(memblock.reserved.total_size, 0);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a region that is larger than the total size of
+ * available memory (memblock.memory):
+ *
+ * +-----------------------------------+
+ * | new |
+ * +-----------------------------------+
+ * | |
+ * | |
+ * +---------------------------------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_too_large_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ allocated_ptr = run_memblock_alloc(MEM_SIZE + SZ_2, SMP_CACHE_BYTES);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->size, 0);
+ ASSERT_EQ(rgn->base, 0);
+ ASSERT_EQ(memblock.reserved.total_size, 0);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a small memory region.
+ * Expect to allocate an aligned region at the beginning of the available
+ * memory.
+ */
+static int alloc_bottom_up_simple_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ allocated_ptr = run_memblock_alloc(SZ_2, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, SZ_2, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, SZ_2);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, SZ_2);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory next to a reserved region that starts at
+ * the misaligned address. Expect to create two separate entries, with the new
+ * entry aligned to the provided alignment:
+ *
+ * +
+ * | +----------+ +----------+ |
+ * | | rgn1 | | rgn2 | |
+ * +----+----------+---+----------+-----+
+ * ^
+ * |
+ * Aligned address boundary
+ *
+ * The allocation direction is bottom-up, so the new region will be the second
+ * entry in memory.reserved array. The previously reserved region does not get
+ * modified. Region counter and total size get updated.
+ */
+static int alloc_bottom_up_disjoint_check(void)
+{
+ struct memblock_region *rgn1 = &memblock.reserved.regions[0];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[1];
+ struct region r1;
+ void *allocated_ptr = NULL;
+ phys_addr_t r2_size = SZ_16;
+ /* Use custom alignment */
+ phys_addr_t alignment = SMP_CACHE_BYTES * 2;
+ phys_addr_t total_size;
+ phys_addr_t expected_start;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_start_of_DRAM() + SZ_2;
+ r1.size = SZ_2;
+
+ total_size = r1.size + r2_size;
+ expected_start = memblock_start_of_DRAM() + alignment;
+
+ memblock_reserve(r1.base, r1.size);
+
+ allocated_ptr = run_memblock_alloc(r2_size, alignment);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn1->size, r1.size);
+ ASSERT_EQ(rgn1->base, r1.base);
+
+ ASSERT_EQ(rgn2->size, r2_size);
+ ASSERT_EQ(rgn2->base, expected_start);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is enough space at
+ * the beginning of the previously reserved block (i.e. first fit):
+ *
+ * |------------------+--------+ |
+ * | r1 | r2 | |
+ * +------------------+--------+---------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_bottom_up_before_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_512;
+ phys_addr_t r2_size = SZ_128;
+ phys_addr_t total_size = r1_size + r2_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ memblock_reserve_kern(memblock_start_of_DRAM() + r1_size, r2_size);
+
+ allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r1_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is not enough space at
+ * the beginning of the previously reserved block (i.e. second fit):
+ *
+ * | +--------+--------------+ |
+ * | | r1 | r2 | |
+ * +----+--------+--------------+---------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_bottom_up_after_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ struct region r1;
+ void *allocated_ptr = NULL;
+ phys_addr_t r2_size = SZ_512;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /*
+ * The first region starts at the aligned address to test region merging
+ */
+ r1.base = memblock_start_of_DRAM() + SMP_CACHE_BYTES;
+ r1.size = SZ_64;
+
+ total_size = r1.size + r2_size;
+
+ memblock_reserve_kern(r1.base, r1.size);
+
+ allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, r1.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions, the
+ * first one starting at the beginning of the available memory, with a gap too
+ * small to fit the new region:
+ *
+ * |------------+ +--------+--------+ |
+ * | r1 | | r2 | r3 | |
+ * +------------+-----+--------+--------+--+
+ *
+ * Expect to allocate after the second region, which starts at the higher
+ * address, and merge them into one. The region counter and total size fields
+ * get updated.
+ */
+static int alloc_bottom_up_second_fit_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[1];
+ struct region r1, r2;
+ void *allocated_ptr = NULL;
+ phys_addr_t r3_size = SZ_1K;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_start_of_DRAM();
+ r1.size = SZ_512;
+
+ r2.base = r1.base + r1.size + SZ_512;
+ r2.size = SZ_256;
+
+ total_size = r1.size + r2.size + r3_size;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_test_flags);
+
+ ASSERT_EQ(rgn->size, r2.size + r3_size);
+ ASSERT_EQ(rgn->base, r2.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/* Test case wrappers */
+static int alloc_simple_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_top_down_simple_check();
+ memblock_set_bottom_up(true);
+ alloc_bottom_up_simple_check();
+
+ return 0;
+}
+
+static int alloc_disjoint_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_top_down_disjoint_check();
+ memblock_set_bottom_up(true);
+ alloc_bottom_up_disjoint_check();
+
+ return 0;
+}
+
+static int alloc_before_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_top_down_before_check();
+ memblock_set_bottom_up(true);
+ alloc_bottom_up_before_check();
+
+ return 0;
+}
+
+static int alloc_after_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_top_down_after_check();
+ memblock_set_bottom_up(true);
+ alloc_bottom_up_after_check();
+
+ return 0;
+}
+
+static int alloc_in_between_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_in_between_generic_check);
+ run_bottom_up(alloc_in_between_generic_check);
+
+ return 0;
+}
+
+static int alloc_second_fit_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_top_down_second_fit_check();
+ memblock_set_bottom_up(true);
+ alloc_bottom_up_second_fit_check();
+
+ return 0;
+}
+
+static int alloc_small_gaps_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_small_gaps_generic_check);
+ run_bottom_up(alloc_small_gaps_generic_check);
+
+ return 0;
+}
+
+static int alloc_all_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_all_reserved_generic_check);
+ run_bottom_up(alloc_all_reserved_generic_check);
+
+ return 0;
+}
+
+static int alloc_no_space_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_no_space_generic_check);
+ run_bottom_up(alloc_no_space_generic_check);
+
+ return 0;
+}
+
+static int alloc_limited_space_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_limited_space_generic_check);
+ run_bottom_up(alloc_limited_space_generic_check);
+
+ return 0;
+}
+
+static int alloc_no_memory_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_no_memory_generic_check);
+ run_bottom_up(alloc_no_memory_generic_check);
+
+ return 0;
+}
+
+static int alloc_too_large_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_too_large_generic_check);
+ run_bottom_up(alloc_too_large_generic_check);
+
+ return 0;
+}
+
+static int memblock_alloc_checks_internal(int flags)
+{
+ const char *func = get_memblock_alloc_name(flags);
+
+ alloc_test_flags = flags;
+ prefix_reset();
+ prefix_push(func);
+ test_print("Running %s tests...\n", func);
+
+ reset_memblock_attributes();
+ dummy_physical_memory_init();
+
+ alloc_simple_check();
+ alloc_disjoint_check();
+ alloc_before_check();
+ alloc_after_check();
+ alloc_second_fit_check();
+ alloc_small_gaps_check();
+ alloc_in_between_check();
+ alloc_all_reserved_check();
+ alloc_no_space_check();
+ alloc_limited_space_check();
+ alloc_no_memory_check();
+ alloc_too_large_check();
+
+ dummy_physical_memory_cleanup();
+
+ prefix_pop();
+
+ return 0;
+}
+
+int memblock_alloc_checks(void)
+{
+ memblock_alloc_checks_internal(TEST_F_NONE);
+ memblock_alloc_checks_internal(TEST_F_RAW);
+
+ return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_api.h b/tools/testing/memblock/tests/alloc_api.h
new file mode 100644
index 000000000000..585b085baf21
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOCS_H
+#define _MEMBLOCK_ALLOCS_H
+
+#include "common.h"
+
+int memblock_alloc_checks(void);
+
+#endif
diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c
new file mode 100644
index 000000000000..6e14447da6e1
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c
@@ -0,0 +1,1113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_exact_nid_api.h"
+#include "alloc_nid_api.h"
+
+#define FUNC_NAME "memblock_alloc_exact_nid_raw"
+
+/*
+ * contains the fraction of MEM_SIZE contained in each node in basis point
+ * units (one hundredth of 1% or 1/10000)
+ */
+static const unsigned int node_fractions[] = {
+ 2500, /* 1/4 */
+ 625, /* 1/16 */
+ 1250, /* 1/8 */
+ 1250, /* 1/8 */
+ 625, /* 1/16 */
+ 625, /* 1/16 */
+ 2500, /* 1/4 */
+ 625, /* 1/16 */
+};
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_simple_check(void)
+{
+ int nid_req = 3;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ * | +---------------------------------------+ |
+ * | | requested | |
+ * +-----------+---------------------------------------+----------+
+ *
+ * | +------------------+ +-----+ |
+ * | | reserved | | new | |
+ * +-----------+------------------+--------------+-----+----------+
+ *
+ * Expect to allocate an aligned region at the end of the requested node. The
+ * region count and total size get updated.
+ */
+static int alloc_exact_nid_top_down_numa_part_reserved_check(void)
+{
+ int nid_req = 4;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[1];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_8, req_node->size);
+ r1.base = req_node->base;
+ r1.size = req_node->size / SZ_2;
+ size = r1.size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------------------+-----------+ |
+ * | | requested | node3 | |
+ * +-----------+-----------------------+-----------+--------------+
+ * + +
+ * | +-----------+ |
+ * | | rgn | |
+ * +-----------------------+-----------+--------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_split_range_low_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t req_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ req_node_end = region_end(req_node);
+ min_addr = req_node_end - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node_end - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +---------------+ +-------------+---------+ |
+ * | | requested | | node1 | node2 | |
+ * +----+---------------+--------+-------------+---------+----------+
+ * + +
+ * | +---------+ |
+ * | | rgn | |
+ * +----------+---------+-------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_no_overlap_split_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *node2 = &memblock.memory.regions[6];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_512;
+ min_addr = node2->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * |-----------+ +----------+----...----+----------+ |
+ * | requested | | min node | ... | max node | |
+ * +-----------+-----------+----------+----...----+----------+------+
+ * + +
+ * | +-----+ |
+ * | | rgn | |
+ * +-----+-----+----------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void)
+{
+ int nid_req = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_simple_check(void)
+{
+ int nid_req = 3;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ * | +---------------------------------------+ |
+ * | | requested | |
+ * +-----------+---------------------------------------+---------+
+ *
+ * | +------------------+-----+ |
+ * | | reserved | new | |
+ * +-----------+------------------+-----+------------------------+
+ *
+ * Expect to allocate an aligned region in the requested node that merges with
+ * the existing reserved region. The total size gets updated.
+ */
+static int alloc_exact_nid_bottom_up_numa_part_reserved_check(void)
+{
+ int nid_req = 4;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_8, req_node->size);
+ r1.base = req_node->base;
+ r1.size = req_node->size / SZ_2;
+ size = r1.size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+ total_size = size + r1.size;
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, total_size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------------------+-----------+ |
+ * | | requested | node3 | |
+ * +-----------+-----------------------+-----------+--------------+
+ * + +
+ * | +-----------+ |
+ * | | rgn | |
+ * +-----------+-----------+--------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region at the beginning
+ * of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_split_range_low_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t req_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ req_node_end = region_end(req_node);
+ min_addr = req_node_end - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), req_node_end);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +---------------+ +-------------+---------+ |
+ * | | requested | | node1 | node2 | |
+ * +----+---------------+--------+-------------+---------+---------+
+ * + +
+ * | +---------+ |
+ * | | rgn | |
+ * +----+---------+------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that starts at
+ * the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_no_overlap_split_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *node2 = &memblock.memory.regions[6];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_512;
+ min_addr = node2->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * |-----------+ +----------+----...----+----------+ |
+ * | requested | | min node | ... | max node | |
+ * +-----------+-----------+----------+----...----+----------+------+
+ * + +
+ * |-----+ |
+ * | rgn | |
+ * +-----+----------------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that starts at
+ * the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void)
+{
+ int nid_req = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size:
+ *
+ * | +-----+ |
+ * | | req | |
+ * +---+-----+----------------------------+
+ *
+ * +---------+
+ * | rgn |
+ * +---------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_small_node_generic_check(void)
+{
+ int nid_req = 1;
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_2 * req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is fully reserved:
+ *
+ * | +---------+ |
+ * | |requested| |
+ * +--------------+---------+-------------+
+ *
+ * | +---------+ |
+ * | | reserved| |
+ * +--------------+---------+-------------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_node_reserved_generic_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(req_node->base, req_node->size);
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved and does not have enough contiguous memory for the
+ * allocated region:
+ *
+ * | +-----------------------+ |
+ * | | requested | |
+ * +-----------+-----------------------+----+
+ *
+ * | +----------+ |
+ * | | reserved | |
+ * +-----------------+----------+-----------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_part_reserved_fail_generic_check(void)
+{
+ int nid_req = 4;
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_2;
+ r1.base = req_node->base + (size / SZ_2);
+ r1.size = size;
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the second
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +--------------------------+---------+ |
+ * | | first node |requested| |
+ * +------+--------------------------+---------+----------------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_split_range_high_generic_check(void)
+{
+ int nid_req = 3;
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = req_node->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node starts
+ * after max_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +----------+----...----+----------+ +-----------+ |
+ * | | min node | ... | max node | | requested | |
+ * +-----+----------+----...----+----------+--------+-----------+---+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_no_overlap_high_generic_check(void)
+{
+ int nid_req = 7;
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size.
+ * Additionally, none of the nodes have enough memory to allocate the region:
+ *
+ * +-----------------------------------+
+ * | new |
+ * +-----------------------------------+
+ * |-------+-------+-------+-------+-------+-------+-------+-------|
+ * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_large_region_generic_check(void)
+{
+ int nid_req = 3;
+ void *allocated_ptr = NULL;
+ phys_addr_t size = MEM_SIZE / SZ_2;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_addr range when
+ * there are two reserved regions at the borders. The requested node starts at
+ * min_addr and ends at max_addr and is the same size as the region to be
+ * allocated:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------+-----------------------+-----------------------|
+ * | | node5 | requested | node7 |
+ * +------+-----------+-----------------------+-----------------------+
+ * + +
+ * | +----+-----------------------+----+ |
+ * | | r2 | new | r1 | |
+ * +-------------+----+-----------------------+----+------------------+
+ *
+ * Expect to merge all of the regions into one. The region counter and total
+ * size fields get updated.
+ */
+static int alloc_exact_nid_numa_reserved_full_merge_generic_check(void)
+{
+ int nid_req = 6;
+ int nid_next = nid_req + 1;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *next_node = &memblock.memory.regions[nid_next];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t size = req_node->size;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ r1.base = next_node->base;
+ r1.size = SZ_128;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (size + r2.size);
+
+ total_size = r1.size + r2.size + size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+ ASSERT_EQ(new_rgn->size, total_size);
+ ASSERT_EQ(new_rgn->base, r2.base);
+
+ ASSERT_LE(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(req_node), region_end(new_rgn));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range,
+ * where the total range can fit the region, but it is split between two nodes
+ * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE
+ * instead of requesting a specific node:
+ *
+ * +-----------+
+ * | new |
+ * +-----------+
+ * | +---------------------+-----------|
+ * | | prev node | next node |
+ * +------+---------------------+-----------+
+ * + +
+ * |----------------------+ +-----|
+ * | r1 | | r2 |
+ * +----------------------+-----------+-----+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_split_all_reserved_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ struct memblock_region *next_node = &memblock.memory.regions[7];
+ struct region r1, r2;
+ phys_addr_t size = SZ_256;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ r2.base = next_node->base + SZ_128;
+ r2.size = memblock_end_of_DRAM() - r2.base;
+
+ r1.size = MEM_SIZE - (r2.size + size);
+ r1.base = memblock_start_of_DRAM();
+
+ min_addr = r1.base + r1.size;
+ max_addr = r2.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/* Test case wrappers for NUMA tests */
+static int alloc_exact_nid_numa_simple_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_exact_nid_top_down_numa_simple_check();
+ memblock_set_bottom_up(true);
+ alloc_exact_nid_bottom_up_numa_simple_check();
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_part_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_exact_nid_top_down_numa_part_reserved_check();
+ memblock_set_bottom_up(true);
+ alloc_exact_nid_bottom_up_numa_part_reserved_check();
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_split_range_low_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_exact_nid_top_down_numa_split_range_low_check();
+ memblock_set_bottom_up(true);
+ alloc_exact_nid_bottom_up_numa_split_range_low_check();
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_split_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_exact_nid_top_down_numa_no_overlap_split_check();
+ memblock_set_bottom_up(true);
+ alloc_exact_nid_bottom_up_numa_no_overlap_split_check();
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_low_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_exact_nid_top_down_numa_no_overlap_low_check();
+ memblock_set_bottom_up(true);
+ alloc_exact_nid_bottom_up_numa_no_overlap_low_check();
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_small_node_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_small_node_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_small_node_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_node_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_node_reserved_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_node_reserved_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_part_reserved_fail_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_part_reserved_fail_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_part_reserved_fail_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_split_range_high_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_split_range_high_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_split_range_high_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_high_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_no_overlap_high_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_no_overlap_high_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_large_region_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_large_region_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_large_region_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_reserved_full_merge_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_reserved_full_merge_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_reserved_full_merge_generic_check);
+
+ return 0;
+}
+
+static int alloc_exact_nid_numa_split_all_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_exact_nid_numa_split_all_reserved_generic_check);
+ run_bottom_up(alloc_exact_nid_numa_split_all_reserved_generic_check);
+
+ return 0;
+}
+
+int __memblock_alloc_exact_nid_numa_checks(void)
+{
+ test_print("Running %s NUMA tests...\n", FUNC_NAME);
+
+ alloc_exact_nid_numa_simple_check();
+ alloc_exact_nid_numa_part_reserved_check();
+ alloc_exact_nid_numa_split_range_low_check();
+ alloc_exact_nid_numa_no_overlap_split_check();
+ alloc_exact_nid_numa_no_overlap_low_check();
+
+ alloc_exact_nid_numa_small_node_check();
+ alloc_exact_nid_numa_node_reserved_check();
+ alloc_exact_nid_numa_part_reserved_fail_check();
+ alloc_exact_nid_numa_split_range_high_check();
+ alloc_exact_nid_numa_no_overlap_high_check();
+ alloc_exact_nid_numa_large_region_check();
+ alloc_exact_nid_numa_reserved_full_merge_check();
+ alloc_exact_nid_numa_split_all_reserved_check();
+
+ return 0;
+}
+
+int memblock_alloc_exact_nid_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_NAME);
+
+ reset_memblock_attributes();
+ dummy_physical_memory_init();
+
+ memblock_alloc_exact_nid_range_checks();
+ memblock_alloc_exact_nid_numa_checks();
+
+ dummy_physical_memory_cleanup();
+
+ prefix_pop();
+
+ return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h
new file mode 100644
index 000000000000..cef419d55d2a
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_EXACT_NID_H
+#define _MEMBLOCK_ALLOC_EXACT_NID_H
+
+#include "common.h"
+
+int memblock_alloc_exact_nid_checks(void);
+int __memblock_alloc_exact_nid_numa_checks(void);
+
+#ifdef CONFIG_NUMA
+static inline int memblock_alloc_exact_nid_numa_checks(void)
+{
+ __memblock_alloc_exact_nid_numa_checks();
+ return 0;
+}
+
+#else
+static inline int memblock_alloc_exact_nid_numa_checks(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+#endif
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
new file mode 100644
index 000000000000..e5362cfd2ff3
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_helpers_api.h"
+
+/*
+ * A simple test that tries to allocate a memory region above a specified,
+ * aligned address:
+ *
+ * +
+ * | +-----------+ |
+ * | | rgn | |
+ * +----------+-----------+---------+
+ * ^
+ * |
+ * Aligned min_addr
+ *
+ * Expect to allocate a cleared region at the minimal memory address.
+ */
+static int alloc_from_simple_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_16;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+
+ allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_EQ(allocated_ptr, 0, size);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, min_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region above a certain address.
+ * The minimal address here is not aligned:
+ *
+ * + +
+ * | + +---------+ |
+ * | | | rgn | |
+ * +------+------+---------+------------+
+ * ^ ^------.
+ * | |
+ * min_addr Aligned address
+ * boundary
+ *
+ * Expect to allocate a cleared region at the closest aligned memory address.
+ */
+static int alloc_from_misaligned_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_32;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* A misaligned address */
+ min_addr = memblock_end_of_DRAM() - (SMP_CACHE_BYTES * 2 - 1);
+
+ allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_MEM_EQ(allocated_ptr, 0, size);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - SMP_CACHE_BYTES);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region above an address that is too
+ * close to the end of the memory:
+ *
+ * + +
+ * | +--------+---+ |
+ * | | rgn + | |
+ * +-----------+--------+---+------+
+ * ^ ^
+ * | |
+ * | min_addr
+ * |
+ * Aligned address
+ * boundary
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement.
+ */
+static int alloc_from_top_down_high_addr_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_32;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* The address is too close to the end of the memory */
+ min_addr = memblock_end_of_DRAM() - SZ_16;
+
+ allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - SMP_CACHE_BYTES);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region when there is no space
+ * available above the minimal address above a certain address:
+ *
+ * +
+ * | +---------+-------------|
+ * | | rgn | |
+ * +--------+---------+-------------+
+ * ^
+ * |
+ * min_addr
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement and to allocate next to the previously reserved region. The
+ * regions get merged into one.
+ */
+static int alloc_from_top_down_no_space_above_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_64;
+ phys_addr_t r2_size = SZ_2;
+ phys_addr_t total_size = r1_size + r2_size;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+
+ /* No space above this address */
+ memblock_reserve_kern(min_addr, r2_size);
+
+ allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->base, min_addr - r1_size);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region with a minimal address below
+ * the start address of the available memory. As the allocation is top-down,
+ * first reserve a region that will force allocation near the start.
+ * Expect successful allocation and merge of both regions.
+ */
+static int alloc_from_top_down_min_addr_cap_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_64;
+ phys_addr_t min_addr;
+ phys_addr_t start_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ start_addr = (phys_addr_t)memblock_start_of_DRAM();
+ min_addr = start_addr - SMP_CACHE_BYTES * 3;
+
+ memblock_reserve_kern(start_addr + r1_size, MEM_SIZE - r1_size);
+
+ allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->base, start_addr);
+ ASSERT_EQ(rgn->size, MEM_SIZE);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, MEM_SIZE);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region above an address that is too
+ * close to the end of the memory:
+ *
+ * +
+ * |-----------+ + |
+ * | rgn | | |
+ * +-----------+--------------+-----+
+ * ^ ^
+ * | |
+ * Aligned address min_addr
+ * boundary
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement. Allocation happens at beginning of the available memory.
+ */
+static int alloc_from_bottom_up_high_addr_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_32;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ /* The address is too close to the end of the memory */
+ min_addr = memblock_end_of_DRAM() - SZ_8;
+
+ allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region when there is no space
+ * available above the minimal address above a certain address:
+ *
+ * +
+ * |-----------+ +-------------------|
+ * | rgn | | |
+ * +-----------+----+-------------------+
+ * ^
+ * |
+ * min_addr
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement and to allocate at the beginning of the available memory.
+ */
+static int alloc_from_bottom_up_no_space_above_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_64;
+ phys_addr_t min_addr;
+ phys_addr_t r2_size;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SZ_128;
+ r2_size = memblock_end_of_DRAM() - min_addr;
+
+ /* No space above this address */
+ memblock_reserve(min_addr - SMP_CACHE_BYTES, r2_size);
+
+ allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, r1_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, r1_size + r2_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region with a minimal address below
+ * the start address of the available memory. Expect to allocate a region
+ * at the beginning of the available memory.
+ */
+static int alloc_from_bottom_up_min_addr_cap_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_64;
+ phys_addr_t min_addr;
+ phys_addr_t start_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ start_addr = (phys_addr_t)memblock_start_of_DRAM();
+ min_addr = start_addr - SMP_CACHE_BYTES * 3;
+
+ allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ ASSERT_EQ(rgn->base, start_addr);
+ ASSERT_EQ(rgn->size, r1_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, r1_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/* Test case wrappers */
+static int alloc_from_simple_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_from_simple_generic_check);
+ run_bottom_up(alloc_from_simple_generic_check);
+
+ return 0;
+}
+
+static int alloc_from_misaligned_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_from_misaligned_generic_check);
+ run_bottom_up(alloc_from_misaligned_generic_check);
+
+ return 0;
+}
+
+static int alloc_from_high_addr_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_from_top_down_high_addr_check();
+ memblock_set_bottom_up(true);
+ alloc_from_bottom_up_high_addr_check();
+
+ return 0;
+}
+
+static int alloc_from_no_space_above_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_from_top_down_no_space_above_check();
+ memblock_set_bottom_up(true);
+ alloc_from_bottom_up_no_space_above_check();
+
+ return 0;
+}
+
+static int alloc_from_min_addr_cap_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_from_top_down_min_addr_cap_check();
+ memblock_set_bottom_up(true);
+ alloc_from_bottom_up_min_addr_cap_check();
+
+ return 0;
+}
+
+int memblock_alloc_helpers_checks(void)
+{
+ const char *func_testing = "memblock_alloc_from";
+
+ prefix_reset();
+ prefix_push(func_testing);
+ test_print("Running %s tests...\n", func_testing);
+
+ reset_memblock_attributes();
+ dummy_physical_memory_init();
+
+ alloc_from_simple_check();
+ alloc_from_misaligned_check();
+ alloc_from_high_addr_check();
+ alloc_from_no_space_above_check();
+ alloc_from_min_addr_cap_check();
+
+ dummy_physical_memory_cleanup();
+
+ prefix_pop();
+
+ return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.h b/tools/testing/memblock/tests/alloc_helpers_api.h
new file mode 100644
index 000000000000..c9e4827b1623
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_helpers_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_HELPERS_H
+#define _MEMBLOCK_ALLOC_HELPERS_H
+
+#include "common.h"
+
+int memblock_alloc_helpers_checks(void);
+
+#endif
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
new file mode 100644
index 000000000000..562e4701b0e0
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -0,0 +1,2733 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_nid_api.h"
+
+static int alloc_nid_test_flags = TEST_F_NONE;
+
+/*
+ * contains the fraction of MEM_SIZE contained in each node in basis point
+ * units (one hundredth of 1% or 1/10000)
+ */
+static const unsigned int node_fractions[] = {
+ 2500, /* 1/4 */
+ 625, /* 1/16 */
+ 1250, /* 1/8 */
+ 1250, /* 1/8 */
+ 625, /* 1/16 */
+ 625, /* 1/16 */
+ 2500, /* 1/4 */
+ 625, /* 1/16 */
+};
+
+static inline const char * const get_memblock_alloc_nid_name(int flags)
+{
+ if (flags & TEST_F_EXACT)
+ return "memblock_alloc_exact_nid_raw";
+ if (flags & TEST_F_RAW)
+ return "memblock_alloc_try_nid_raw";
+ return "memblock_alloc_try_nid";
+}
+
+static inline void *run_memblock_alloc_nid(phys_addr_t size,
+ phys_addr_t align,
+ phys_addr_t min_addr,
+ phys_addr_t max_addr, int nid)
+{
+ assert(!(alloc_nid_test_flags & TEST_F_EXACT) ||
+ (alloc_nid_test_flags & TEST_F_RAW));
+ /*
+ * TEST_F_EXACT should be checked before TEST_F_RAW since
+ * memblock_alloc_exact_nid_raw() performs raw allocations.
+ */
+ if (alloc_nid_test_flags & TEST_F_EXACT)
+ return memblock_alloc_exact_nid_raw(size, align, min_addr,
+ max_addr, nid);
+ if (alloc_nid_test_flags & TEST_F_RAW)
+ return memblock_alloc_try_nid_raw(size, align, min_addr,
+ max_addr, nid);
+ return memblock_alloc_try_nid(size, align, min_addr, max_addr, nid);
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range:
+ *
+ * + +
+ * | + +-----------+ |
+ * | | | rgn | |
+ * +----+-------+-----------+------+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to allocate a region that ends at max_addr.
+ */
+static int alloc_nid_top_down_simple_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_128;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t rgn_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+ max_addr = min_addr + SZ_512;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+ rgn_end = rgn->base + rgn->size;
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, max_addr - size);
+ ASSERT_EQ(rgn_end, max_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range, where the end address is misaligned:
+ *
+ * + + +
+ * | + +---------+ + |
+ * | | | rgn | | |
+ * +------+-------+---------+--+----+
+ * ^ ^ ^
+ * | | |
+ * min_add | max_addr
+ * |
+ * Aligned address
+ * boundary
+ *
+ * Expect to allocate an aligned region that ends before max_addr.
+ */
+static int alloc_nid_top_down_end_misaligned_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_128;
+ phys_addr_t misalign = SZ_2;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t rgn_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+ max_addr = min_addr + SZ_512 + misalign;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+ rgn_end = rgn->base + rgn->size;
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, max_addr - size - misalign);
+ ASSERT_LT(rgn_end, max_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region, which spans over the
+ * min_addr and max_addr range:
+ *
+ * + +
+ * | +---------------+ |
+ * | | rgn | |
+ * +------+---------------+-------+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to allocate a region that starts at min_addr and ends at
+ * max_addr, given that min_addr is aligned.
+ */
+static int alloc_nid_exact_address_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_1K;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t rgn_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+ rgn_end = rgn->base + rgn->size;
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, min_addr);
+ ASSERT_EQ(rgn_end, max_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into
+ * min_addr and max_addr range:
+ *
+ * + + +
+ * | +----------+-----+ |
+ * | | rgn + | |
+ * +--------+----------+-----+----+
+ * ^ ^ ^
+ * | | |
+ * Aligned | max_addr
+ * address |
+ * boundary min_add
+ *
+ * Expect to drop the lower limit and allocate a memory region which
+ * ends at max_addr (if the address is aligned).
+ */
+static int alloc_nid_top_down_narrow_range_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_256;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SZ_512;
+ max_addr = min_addr + SMP_CACHE_BYTES;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, max_addr - size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into
+ * min_addr and max_addr range, with the latter being too close to the beginning
+ * of the available memory:
+ *
+ * +-------------+
+ * | new |
+ * +-------------+
+ * + +
+ * | + |
+ * | | |
+ * +-------+--------------+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_nid_low_max_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_1K;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = min_addr + SMP_CACHE_BYTES;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region within min_addr min_addr range,
+ * with min_addr being so close that it's next to an allocated region:
+ *
+ * + +
+ * | +--------+---------------|
+ * | | r1 | rgn |
+ * +-------+--------+---------------+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_nid_min_reserved_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_128;
+ phys_addr_t r2_size = SZ_64;
+ phys_addr_t total_size = r1_size + r2_size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t reserved_base;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ max_addr = memblock_end_of_DRAM();
+ min_addr = max_addr - r2_size;
+ reserved_base = min_addr - r1_size;
+
+ memblock_reserve_kern(reserved_base, r1_size);
+
+ allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, reserved_base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region within min_addr and max_addr,
+ * with max_addr being so close that it's next to an allocated region:
+ *
+ * + +
+ * | +-------------+--------|
+ * | | rgn | r1 |
+ * +----------+-------------+--------+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect a merge of regions. Only the region size gets updated.
+ */
+static int alloc_nid_max_reserved_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t r1_size = SZ_64;
+ phys_addr_t r2_size = SZ_128;
+ phys_addr_t total_size = r1_size + r2_size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ max_addr = memblock_end_of_DRAM() - r1_size;
+ min_addr = max_addr - r2_size;
+
+ memblock_reserve_kern(max_addr, r1_size);
+
+ allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, min_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap big enough to fit
+ * a new region:
+ *
+ * + +
+ * | +--------+ +-------+------+ |
+ * | | r2 | | rgn | r1 | |
+ * +----+--------+---+-------+------+--+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to merge the new region with r1. The second region does not get
+ * updated. The total size field gets updated.
+ */
+
+static int alloc_nid_top_down_reserved_with_space_check(void)
+{
+ struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_64;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (r3_size + gap_size + r2.size);
+
+ total_size = r1.size + r2.size + r3_size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn1->size, r1.size + r3_size);
+ ASSERT_EQ(rgn1->base, max_addr - r3_size);
+
+ ASSERT_EQ(rgn2->size, r2.size);
+ ASSERT_EQ(rgn2->base, r2.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap of a size equal to
+ * the size of the new region:
+ *
+ * + +
+ * | +--------+--------+--------+ |
+ * | | r2 | r3 | r1 | |
+ * +-----+--------+--------+--------+-----+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to merge all of the regions into one. The region counter and total
+ * size fields get updated.
+ */
+static int alloc_nid_reserved_full_merge_generic_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_64;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (r3_size + r2.size);
+
+ total_size = r1.size + r2.size + r3_size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, total_size);
+ ASSERT_EQ(rgn->base, r2.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap that can't fit
+ * a new region:
+ *
+ * + +
+ * | +----------+------+ +------+ |
+ * | | r3 | r2 | | r1 | |
+ * +--+----------+------+----+------+---+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect to merge the new region with r2. The second region does not get
+ * updated. The total size counter gets updated.
+ */
+static int alloc_nid_top_down_reserved_no_space_check(void)
+{
+ struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_256;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (r2.size + gap_size);
+
+ total_size = r1.size + r2.size + r3_size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn1->size, r1.size);
+ ASSERT_EQ(rgn1->base, r1.base);
+
+ ASSERT_EQ(rgn2->size, r2.size + r3_size);
+ ASSERT_EQ(rgn2->base, r2.base - r3_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, but
+ * it's too narrow and everything else is reserved:
+ *
+ * +-----------+
+ * | new |
+ * +-----------+
+ * + +
+ * |--------------+ +----------|
+ * | r2 | | r1 |
+ * +--------------+------+----------+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect no allocation to happen.
+ */
+
+static int alloc_nid_reserved_all_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_256;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = MEM_SIZE - (r1.size + gap_size);
+ r2.base = memblock_start_of_DRAM();
+
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where max_addr is
+ * bigger than the end address of the available memory. Expect to allocate
+ * a region that ends before the end of the memory.
+ */
+static int alloc_nid_top_down_cap_max_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_256;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_end_of_DRAM() - SZ_1K;
+ max_addr = memblock_end_of_DRAM() + SZ_256;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where min_addr is
+ * smaller than the start address of the available memory. Expect to allocate
+ * a region that ends before the end of the memory.
+ */
+static int alloc_nid_top_down_cap_min_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_1K;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() - SZ_256;
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range:
+ *
+ * + +
+ * | +-----------+ | |
+ * | | rgn | | |
+ * +----+-----------+-----------+------+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to allocate a region that ends before max_addr.
+ */
+static int alloc_nid_bottom_up_simple_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_128;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t rgn_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+ max_addr = min_addr + SZ_512;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+ rgn_end = rgn->base + rgn->size;
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, min_addr);
+ ASSERT_LT(rgn_end, max_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range, where the start address is misaligned:
+ *
+ * + +
+ * | + +-----------+ + |
+ * | | | rgn | | |
+ * +-----+---+-----------+-----+-----+
+ * ^ ^----. ^
+ * | | |
+ * min_add | max_addr
+ * |
+ * Aligned address
+ * boundary
+ *
+ * Expect to allocate an aligned region that ends before max_addr.
+ */
+static int alloc_nid_bottom_up_start_misaligned_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_128;
+ phys_addr_t misalign = SZ_2;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t rgn_end;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + misalign;
+ max_addr = min_addr + SZ_512;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+ rgn_end = rgn->base + rgn->size;
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, min_addr + (SMP_CACHE_BYTES - misalign));
+ ASSERT_LT(rgn_end, max_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into min_addr
+ * and max_addr range:
+ *
+ * + +
+ * |---------+ + + |
+ * | rgn | | | |
+ * +---------+---------+----+------+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_add
+ *
+ * Expect to drop the lower limit and allocate a memory region which
+ * starts at the beginning of the available memory.
+ */
+static int alloc_nid_bottom_up_narrow_range_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_256;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SZ_512;
+ max_addr = min_addr + SMP_CACHE_BYTES;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap big enough to fit
+ * a new region:
+ *
+ * + +
+ * | +--------+-------+ +------+ |
+ * | | r2 | rgn | | r1 | |
+ * +----+--------+-------+---+------+--+
+ * ^ ^
+ * | |
+ * min_addr max_addr
+ *
+ * Expect to merge the new region with r2. The second region does not get
+ * updated. The total size field gets updated.
+ */
+
+static int alloc_nid_bottom_up_reserved_with_space_check(void)
+{
+ struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_64;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (r3_size + gap_size + r2.size);
+
+ total_size = r1.size + r2.size + r3_size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn1->size, r1.size);
+ ASSERT_EQ(rgn1->base, max_addr);
+
+ ASSERT_EQ(rgn2->size, r2.size + r3_size);
+ ASSERT_EQ(rgn2->base, r2.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap of a size equal to
+ * the size of the new region:
+ *
+ * + +
+ * |----------+ +------+ +----+ |
+ * | r3 | | r2 | | r1 | |
+ * +----------+----+------+---+----+--+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect to drop the lower limit and allocate memory at the beginning of the
+ * available memory. The region counter and total size fields get updated.
+ * Other regions are not modified.
+ */
+
+static int alloc_nid_bottom_up_reserved_no_space_check(void)
+{
+ struct memblock_region *rgn1 = &memblock.reserved.regions[2];
+ struct memblock_region *rgn2 = &memblock.reserved.regions[1];
+ struct memblock_region *rgn3 = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t r3_size = SZ_256;
+ phys_addr_t gap_size = SMP_CACHE_BYTES;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+ r1.size = SMP_CACHE_BYTES;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (r2.size + gap_size);
+
+ total_size = r1.size + r2.size + r3_size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn3->size, r3_size);
+ ASSERT_EQ(rgn3->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(rgn2->size, r2.size);
+ ASSERT_EQ(rgn2->base, r2.base);
+
+ ASSERT_EQ(rgn1->size, r1.size);
+ ASSERT_EQ(rgn1->base, r1.base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 3);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where max_addr is
+ * bigger than the end address of the available memory. Expect to allocate
+ * a region that starts at the min_addr.
+ */
+static int alloc_nid_bottom_up_cap_max_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_256;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM() + SZ_1K;
+ max_addr = memblock_end_of_DRAM() + SZ_256;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, min_addr);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where min_addr is
+ * smaller than the start address of the available memory. Expect to allocate
+ * a region at the beginning of the available memory.
+ */
+static int alloc_nid_bottom_up_cap_min_check(void)
+{
+ struct memblock_region *rgn = &memblock.reserved.regions[0];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_1K;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_memblock();
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM() - SZ_256;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(rgn->size, size);
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/* Test case wrappers for range tests */
+static int alloc_nid_simple_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_simple_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_simple_check();
+
+ return 0;
+}
+
+static int alloc_nid_misaligned_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_end_misaligned_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_start_misaligned_check();
+
+ return 0;
+}
+
+static int alloc_nid_narrow_range_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_narrow_range_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_narrow_range_check();
+
+ return 0;
+}
+
+static int alloc_nid_reserved_with_space_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_reserved_with_space_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_reserved_with_space_check();
+
+ return 0;
+}
+
+static int alloc_nid_reserved_no_space_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_reserved_no_space_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_reserved_no_space_check();
+
+ return 0;
+}
+
+static int alloc_nid_cap_max_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_cap_max_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_cap_max_check();
+
+ return 0;
+}
+
+static int alloc_nid_cap_min_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_cap_min_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_cap_min_check();
+
+ return 0;
+}
+
+static int alloc_nid_min_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_min_reserved_generic_check);
+ run_bottom_up(alloc_nid_min_reserved_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_max_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_max_reserved_generic_check);
+ run_bottom_up(alloc_nid_max_reserved_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_exact_address_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_exact_address_generic_check);
+ run_bottom_up(alloc_nid_exact_address_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_reserved_full_merge_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_reserved_full_merge_generic_check);
+ run_bottom_up(alloc_nid_reserved_full_merge_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_reserved_all_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_reserved_all_generic_check);
+ run_bottom_up(alloc_nid_reserved_all_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_low_max_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_low_max_generic_check);
+ run_bottom_up(alloc_nid_low_max_generic_check);
+
+ return 0;
+}
+
+static int memblock_alloc_nid_range_checks(void)
+{
+ test_print("Running %s range tests...\n",
+ get_memblock_alloc_nid_name(alloc_nid_test_flags));
+
+ alloc_nid_simple_check();
+ alloc_nid_misaligned_check();
+ alloc_nid_narrow_range_check();
+ alloc_nid_reserved_with_space_check();
+ alloc_nid_reserved_no_space_check();
+ alloc_nid_cap_max_check();
+ alloc_nid_cap_min_check();
+
+ alloc_nid_min_reserved_check();
+ alloc_nid_max_reserved_check();
+ alloc_nid_exact_address_check();
+ alloc_nid_reserved_full_merge_check();
+ alloc_nid_reserved_all_check();
+ alloc_nid_low_max_check();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the end of the requested node.
+ */
+static int alloc_nid_top_down_numa_simple_check(void)
+{
+ int nid_req = 3;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size:
+ *
+ * | +-----+ +------------------+ |
+ * | | req | | expected | |
+ * +---+-----+----------+------------------+-----+
+ *
+ * | +---------+ |
+ * | | rgn | |
+ * +-----------------------------+---------+-----+
+ *
+ * Expect to allocate an aligned region at the end of the last node that has
+ * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_top_down_numa_small_node_check(void)
+{
+ int nid_req = 1;
+ int nid_exp = 6;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_2 * req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(exp_node) - size);
+ ASSERT_LE(exp_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is fully reserved:
+ *
+ * | +---------+ +------------------+ |
+ * | |requested| | expected | |
+ * +--------------+---------+------------+------------------+-----+
+ *
+ * | +---------+ +---------+ |
+ * | | reserved| | new | |
+ * +--------------+---------+---------------------+---------+-----+
+ *
+ * Expect to allocate an aligned region at the end of the last node that is
+ * large enough and has enough unreserved memory (in this case, nid = 6) after
+ * falling back to NUMA_NO_NODE. The region count and total size get updated.
+ */
+static int alloc_nid_top_down_numa_node_reserved_check(void)
+{
+ int nid_req = 2;
+ int nid_exp = 6;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[1];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(req_node->base, req_node->size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(exp_node) - size);
+ ASSERT_LE(exp_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + req_node->size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ * | +---------------------------------------+ |
+ * | | requested | |
+ * +-----------+---------------------------------------+----------+
+ *
+ * | +------------------+ +-----+ |
+ * | | reserved | | new | |
+ * +-----------+------------------+--------------+-----+----------+
+ *
+ * Expect to allocate an aligned region at the end of the requested node. The
+ * region count and total size get updated.
+ */
+static int alloc_nid_top_down_numa_part_reserved_check(void)
+{
+ int nid_req = 4;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[1];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_8, req_node->size);
+ r1.base = req_node->base;
+ r1.size = req_node->size / SZ_2;
+ size = r1.size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved and does not have enough contiguous memory for the
+ * allocated region:
+ *
+ * | +-----------------------+ +----------------------|
+ * | | requested | | expected |
+ * +-----------+-----------------------+---------+----------------------+
+ *
+ * | +----------+ +-----------|
+ * | | reserved | | new |
+ * +-----------------+----------+---------------------------+-----------+
+ *
+ * Expect to allocate an aligned region at the end of the last node that is
+ * large enough and has enough unreserved memory (in this case,
+ * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count
+ * and total size get updated.
+ */
+static int alloc_nid_top_down_numa_part_reserved_fallback_check(void)
+{
+ int nid_req = 4;
+ int nid_exp = NUMA_NODES - 1;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[1];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_2;
+ r1.base = req_node->base + (size / SZ_2);
+ r1.size = size;
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(exp_node) - size);
+ ASSERT_LE(exp_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------------------+-----------+ |
+ * | | requested | node3 | |
+ * +-----------+-----------------------+-----------+--------------+
+ * + +
+ * | +-----------+ |
+ * | | rgn | |
+ * +-----------------------+-----------+--------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_nid_top_down_numa_split_range_low_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t req_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ req_node_end = region_end(req_node);
+ min_addr = req_node_end - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node_end - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the second
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +--------------------------+---------+ |
+ * | | expected |requested| |
+ * +------+--------------------------+---------+----------------+
+ * + +
+ * | +---------+ |
+ * | | rgn | |
+ * +-----------------------+---------+--------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that
+ * ends at the end of the first node that overlaps with the range.
+ */
+static int alloc_nid_top_down_numa_split_range_high_check(void)
+{
+ int nid_req = 3;
+ int nid_exp = nid_req - 1;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t exp_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ exp_node_end = region_end(exp_node);
+ min_addr = exp_node_end - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, exp_node_end - size);
+ ASSERT_LE(exp_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +---------------+ +-------------+---------+ |
+ * | | requested | | node1 | node2 | |
+ * +----+---------------+--------+-------------+---------+----------+
+ * + +
+ * | +---------+ |
+ * | | rgn | |
+ * +----------+---------+-------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_nid_top_down_numa_no_overlap_split_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *node2 = &memblock.memory.regions[6];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_512;
+ min_addr = node2->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+ ASSERT_LE(req_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * |-----------+ +----------+----...----+----------+ |
+ * | requested | | min node | ... | max node | |
+ * +-----------+-----------+----------+----...----+----------+------+
+ * + +
+ * | +-----+ |
+ * | | rgn | |
+ * +---------------------------------------------------+-----+------+
+ *
+ * Expect to allocate a memory region at the end of the final node in
+ * the range after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_top_down_numa_no_overlap_low_check(void)
+{
+ int nid_req = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, max_addr - size);
+ ASSERT_LE(max_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node starts
+ * after max_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +----------+----...----+----------+ +-----------+ |
+ * | | min node | ... | max node | | requested | |
+ * +-----+----------+----...----+----------+--------+-----------+---+
+ * + +
+ * | +-----+ |
+ * | | rgn | |
+ * +---------------------------------+-----+------------------------+
+ *
+ * Expect to allocate a memory region at the end of the final node in
+ * the range after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_top_down_numa_no_overlap_high_check(void)
+{
+ int nid_req = 7;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, max_addr - size);
+ ASSERT_LE(max_node->base, new_rgn->base);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the beginning of the requested node.
+ */
+static int alloc_nid_bottom_up_numa_simple_check(void)
+{
+ int nid_req = 3;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size:
+ *
+ * |----------------------+-----+ |
+ * | expected | req | |
+ * +----------------------+-----+----------------+
+ *
+ * |---------+ |
+ * | rgn | |
+ * +---------+-----------------------------------+
+ *
+ * Expect to allocate an aligned region at the beginning of the first node that
+ * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_bottom_up_numa_small_node_check(void)
+{
+ int nid_req = 1;
+ int nid_exp = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_2 * req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, exp_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(exp_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is fully reserved:
+ *
+ * |----------------------+ +-----------+ |
+ * | expected | | requested | |
+ * +----------------------+-----+-----------+--------------------+
+ *
+ * |-----------+ +-----------+ |
+ * | new | | reserved | |
+ * +-----------+----------------+-----------+--------------------+
+ *
+ * Expect to allocate an aligned region at the beginning of the first node that
+ * is large enough and has enough unreserved memory (in this case, nid = 0)
+ * after falling back to NUMA_NO_NODE. The region count and total size get
+ * updated.
+ */
+static int alloc_nid_bottom_up_numa_node_reserved_check(void)
+{
+ int nid_req = 2;
+ int nid_exp = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = req_node->size;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(req_node->base, req_node->size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, exp_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(exp_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + req_node->size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ * | +---------------------------------------+ |
+ * | | requested | |
+ * +-----------+---------------------------------------+---------+
+ *
+ * | +------------------+-----+ |
+ * | | reserved | new | |
+ * +-----------+------------------+-----+------------------------+
+ *
+ * Expect to allocate an aligned region in the requested node that merges with
+ * the existing reserved region. The total size gets updated.
+ */
+static int alloc_nid_bottom_up_numa_part_reserved_check(void)
+{
+ int nid_req = 4;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t total_size;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_8, req_node->size);
+ r1.base = req_node->base;
+ r1.size = req_node->size / SZ_2;
+ size = r1.size / SZ_4;
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+ total_size = size + r1.size;
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, total_size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved and does not have enough contiguous memory for the
+ * allocated region:
+ *
+ * |----------------------+ +-----------------------+ |
+ * | expected | | requested | |
+ * +----------------------+-------+-----------------------+---------+
+ *
+ * |-----------+ +----------+ |
+ * | new | | reserved | |
+ * +-----------+------------------------+----------+----------------+
+ *
+ * Expect to allocate an aligned region at the beginning of the first
+ * node that is large enough and has enough unreserved memory (in this case,
+ * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size
+ * get updated.
+ */
+static int alloc_nid_bottom_up_numa_part_reserved_fallback_check(void)
+{
+ int nid_req = 4;
+ int nid_exp = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ struct region r1;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ ASSERT_LE(SZ_4, req_node->size);
+ size = req_node->size / SZ_2;
+ r1.base = req_node->base + (size / SZ_2);
+ r1.size = size;
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ memblock_reserve(r1.base, r1.size);
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, exp_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(exp_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, size + r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------------------+-----------+ |
+ * | | requested | node3 | |
+ * +-----------+-----------------------+-----------+--------------+
+ * + +
+ * | +-----------+ |
+ * | | rgn | |
+ * +-----------+-----------+--------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region at the beginning
+ * of the requested node.
+ */
+static int alloc_nid_bottom_up_numa_split_range_low_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t req_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ req_node_end = region_end(req_node);
+ min_addr = req_node_end - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), req_node_end);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the second
+ * node is the requested node:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * |------------------+ +----------------------+---------+ |
+ * | expected | | previous |requested| |
+ * +------------------+--------+----------------------+---------+------+
+ * + +
+ * |---------+ |
+ * | rgn | |
+ * +---------+---------------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region at the beginning
+ * of the first node that has enough memory.
+ */
+static int alloc_nid_bottom_up_numa_split_range_high_check(void)
+{
+ int nid_req = 3;
+ int nid_exp = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *exp_node = &memblock.memory.regions[nid_exp];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_512;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+ phys_addr_t exp_node_end;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ exp_node_end = region_end(req_node);
+ min_addr = req_node->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, exp_node->base);
+ ASSERT_LE(region_end(new_rgn), exp_node_end);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +---------------+ +-------------+---------+ |
+ * | | requested | | node1 | node2 | |
+ * +----+---------------+--------+-------------+---------+---------+
+ * + +
+ * | +---------+ |
+ * | | rgn | |
+ * +----+---------+------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that starts at
+ * the beginning of the requested node.
+ */
+static int alloc_nid_bottom_up_numa_no_overlap_split_check(void)
+{
+ int nid_req = 2;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *node2 = &memblock.memory.regions[6];
+ void *allocated_ptr = NULL;
+ phys_addr_t size;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ size = SZ_512;
+ min_addr = node2->base - SZ_256;
+ max_addr = min_addr + size;
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * |-----------+ +----------+----...----+----------+ |
+ * | requested | | min node | ... | max node | |
+ * +-----------+-----------+----------+----...----+----------+------+
+ * + +
+ * | +-----+ |
+ * | | rgn | |
+ * +-----------------------+-----+----------------------------------+
+ *
+ * Expect to allocate a memory region at the beginning of the first node
+ * in the range after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_bottom_up_numa_no_overlap_low_check(void)
+{
+ int nid_req = 0;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, min_addr);
+ ASSERT_LE(region_end(new_rgn), region_end(min_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node starts
+ * after max_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +----------+----...----+----------+ +---------+ |
+ * | | min node | ... | max node | |requested| |
+ * +-----+----------+----...----+----------+---------+---------+---+
+ * + +
+ * | +-----+ |
+ * | | rgn | |
+ * +-----+-----+---------------------------------------------------+
+ *
+ * Expect to allocate a memory region at the beginning of the first node
+ * in the range after falling back to NUMA_NO_NODE.
+ */
+static int alloc_nid_bottom_up_numa_no_overlap_high_check(void)
+{
+ int nid_req = 7;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *min_node = &memblock.memory.regions[2];
+ struct memblock_region *max_node = &memblock.memory.regions[5];
+ void *allocated_ptr = NULL;
+ phys_addr_t size = SZ_64;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = min_node->base;
+ max_addr = region_end(max_node);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, size);
+ ASSERT_EQ(new_rgn->base, min_addr);
+ ASSERT_LE(region_end(new_rgn), region_end(min_node));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size.
+ * Additionally, none of the nodes have enough memory to allocate the region:
+ *
+ * +-----------------------------------+
+ * | new |
+ * +-----------------------------------+
+ * |-------+-------+-------+-------+-------+-------+-------+-------|
+ * | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_nid_numa_large_region_generic_check(void)
+{
+ int nid_req = 3;
+ void *allocated_ptr = NULL;
+ phys_addr_t size = MEM_SIZE / SZ_2;
+ phys_addr_t min_addr;
+ phys_addr_t max_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ min_addr = memblock_start_of_DRAM();
+ max_addr = memblock_end_of_DRAM();
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_addr range when
+ * there are two reserved regions at the borders. The requested node starts at
+ * min_addr and ends at max_addr and is the same size as the region to be
+ * allocated:
+ *
+ * min_addr
+ * | max_addr
+ * | |
+ * v v
+ * | +-----------+-----------------------+-----------------------|
+ * | | node5 | requested | node7 |
+ * +------+-----------+-----------------------+-----------------------+
+ * + +
+ * | +----+-----------------------+----+ |
+ * | | r2 | new | r1 | |
+ * +-------------+----+-----------------------+----+------------------+
+ *
+ * Expect to merge all of the regions into one. The region counter and total
+ * size fields get updated.
+ */
+static int alloc_nid_numa_reserved_full_merge_generic_check(void)
+{
+ int nid_req = 6;
+ int nid_next = nid_req + 1;
+ struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+ struct memblock_region *next_node = &memblock.memory.regions[nid_next];
+ void *allocated_ptr = NULL;
+ struct region r1, r2;
+ phys_addr_t size = req_node->size;
+ phys_addr_t total_size;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ r1.base = next_node->base;
+ r1.size = SZ_128;
+
+ r2.size = SZ_128;
+ r2.base = r1.base - (size + r2.size);
+
+ total_size = r1.size + r2.size + size;
+ min_addr = r2.base + r2.size;
+ max_addr = r1.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+ assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
+
+ ASSERT_EQ(new_rgn->size, total_size);
+ ASSERT_EQ(new_rgn->base, r2.base);
+
+ ASSERT_LE(new_rgn->base, req_node->base);
+ ASSERT_LE(region_end(req_node), region_end(new_rgn));
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range,
+ * where the total range can fit the region, but it is split between two nodes
+ * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE
+ * instead of requesting a specific node:
+ *
+ * +-----------+
+ * | new |
+ * +-----------+
+ * | +---------------------+-----------|
+ * | | prev node | next node |
+ * +------+---------------------+-----------+
+ * + +
+ * |----------------------+ +-----|
+ * | r1 | | r2 |
+ * +----------------------+-----------+-----+
+ * ^ ^
+ * | |
+ * | max_addr
+ * |
+ * min_addr
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_nid_numa_split_all_reserved_generic_check(void)
+{
+ void *allocated_ptr = NULL;
+ struct memblock_region *next_node = &memblock.memory.regions[7];
+ struct region r1, r2;
+ phys_addr_t size = SZ_256;
+ phys_addr_t max_addr;
+ phys_addr_t min_addr;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ r2.base = next_node->base + SZ_128;
+ r2.size = memblock_end_of_DRAM() - r2.base;
+
+ r1.size = MEM_SIZE - (r2.size + size);
+ r1.base = memblock_start_of_DRAM();
+
+ min_addr = r1.base + r1.size;
+ max_addr = r2.base;
+
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+ min_addr, max_addr,
+ NUMA_NO_NODE);
+
+ ASSERT_EQ(allocated_ptr, NULL);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region through the
+ * memblock_alloc_node() on a NUMA node with id `nid`. Expected to have the
+ * correct NUMA node set for the new region.
+ */
+static int alloc_node_on_correct_nid(void)
+{
+ int nid_req = 2;
+ void *allocated_ptr = NULL;
+#ifdef CONFIG_NUMA
+ struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+#endif
+ phys_addr_t size = SZ_512;
+
+ PREFIX_PUSH();
+ setup_numa_memblock(node_fractions);
+
+ allocated_ptr = memblock_alloc_node(size, SMP_CACHE_BYTES, nid_req);
+
+ ASSERT_NE(allocated_ptr, NULL);
+#ifdef CONFIG_NUMA
+ ASSERT_EQ(nid_req, req_node->nid);
+#endif
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/* Test case wrappers for NUMA tests */
+static int alloc_nid_numa_simple_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_simple_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_simple_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_small_node_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_small_node_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_small_node_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_node_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_node_reserved_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_node_reserved_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_part_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_part_reserved_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_part_reserved_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_part_reserved_fallback_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_part_reserved_fallback_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_part_reserved_fallback_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_split_range_low_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_split_range_low_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_split_range_low_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_split_range_high_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_split_range_high_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_split_range_high_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_no_overlap_split_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_no_overlap_split_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_no_overlap_split_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_no_overlap_low_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_no_overlap_low_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_no_overlap_low_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_no_overlap_high_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ memblock_set_bottom_up(false);
+ alloc_nid_top_down_numa_no_overlap_high_check();
+ memblock_set_bottom_up(true);
+ alloc_nid_bottom_up_numa_no_overlap_high_check();
+
+ return 0;
+}
+
+static int alloc_nid_numa_large_region_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_numa_large_region_generic_check);
+ run_bottom_up(alloc_nid_numa_large_region_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_numa_reserved_full_merge_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_numa_reserved_full_merge_generic_check);
+ run_bottom_up(alloc_nid_numa_reserved_full_merge_generic_check);
+
+ return 0;
+}
+
+static int alloc_nid_numa_split_all_reserved_check(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_nid_numa_split_all_reserved_generic_check);
+ run_bottom_up(alloc_nid_numa_split_all_reserved_generic_check);
+
+ return 0;
+}
+
+static int alloc_node_numa_on_correct_nid(void)
+{
+ test_print("\tRunning %s...\n", __func__);
+ run_top_down(alloc_node_on_correct_nid);
+ run_bottom_up(alloc_node_on_correct_nid);
+
+ return 0;
+}
+
+int __memblock_alloc_nid_numa_checks(void)
+{
+ test_print("Running %s NUMA tests...\n",
+ get_memblock_alloc_nid_name(alloc_nid_test_flags));
+
+ alloc_nid_numa_simple_check();
+ alloc_nid_numa_small_node_check();
+ alloc_nid_numa_node_reserved_check();
+ alloc_nid_numa_part_reserved_check();
+ alloc_nid_numa_part_reserved_fallback_check();
+ alloc_nid_numa_split_range_low_check();
+ alloc_nid_numa_split_range_high_check();
+
+ alloc_nid_numa_no_overlap_split_check();
+ alloc_nid_numa_no_overlap_low_check();
+ alloc_nid_numa_no_overlap_high_check();
+ alloc_nid_numa_large_region_check();
+ alloc_nid_numa_reserved_full_merge_check();
+ alloc_nid_numa_split_all_reserved_check();
+
+ alloc_node_numa_on_correct_nid();
+
+ return 0;
+}
+
+static int memblock_alloc_nid_checks_internal(int flags)
+{
+ alloc_nid_test_flags = flags;
+
+ prefix_reset();
+ prefix_push(get_memblock_alloc_nid_name(flags));
+
+ reset_memblock_attributes();
+ dummy_physical_memory_init();
+
+ memblock_alloc_nid_range_checks();
+ memblock_alloc_nid_numa_checks();
+
+ dummy_physical_memory_cleanup();
+
+ prefix_pop();
+
+ return 0;
+}
+
+int memblock_alloc_nid_checks(void)
+{
+ memblock_alloc_nid_checks_internal(TEST_F_NONE);
+ memblock_alloc_nid_checks_internal(TEST_F_RAW);
+
+ return 0;
+}
+
+int memblock_alloc_exact_nid_range_checks(void)
+{
+ alloc_nid_test_flags = (TEST_F_RAW | TEST_F_EXACT);
+
+ memblock_alloc_nid_range_checks();
+
+ return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h
new file mode 100644
index 000000000000..2b8cabacacb8
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_nid_api.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_NID_H
+#define _MEMBLOCK_ALLOC_NID_H
+
+#include "common.h"
+
+int memblock_alloc_nid_checks(void);
+int memblock_alloc_exact_nid_range_checks(void);
+int __memblock_alloc_nid_numa_checks(void);
+
+#ifdef CONFIG_NUMA
+static inline int memblock_alloc_nid_numa_checks(void)
+{
+ __memblock_alloc_nid_numa_checks();
+ return 0;
+}
+
+#else
+static inline int memblock_alloc_nid_numa_checks(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+#endif
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
new file mode 100644
index 000000000000..01e836fba488
--- /dev/null
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -0,0 +1,2551 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "basic_api.h"
+#include <string.h>
+#include <linux/memblock.h>
+
+#define EXPECTED_MEMBLOCK_REGIONS 128
+#define FUNC_ADD "memblock_add"
+#define FUNC_RESERVE "memblock_reserve"
+#define FUNC_REMOVE "memblock_remove"
+#define FUNC_FREE "memblock_free"
+#define FUNC_TRIM "memblock_trim_memory"
+
+static int memblock_initialization_check(void)
+{
+ PREFIX_PUSH();
+
+ ASSERT_NE(memblock.memory.regions, NULL);
+ ASSERT_EQ(memblock.memory.cnt, 0);
+ ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
+ ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0);
+
+ ASSERT_NE(memblock.reserved.regions, NULL);
+ ASSERT_EQ(memblock.reserved.cnt, 0);
+ ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
+ ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0);
+
+ ASSERT_EQ(memblock.bottom_up, false);
+ ASSERT_EQ(memblock.current_limit, MEMBLOCK_ALLOC_ANYWHERE);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that adds a memory block of a specified base address
+ * and size to the collection of available memory regions (memblock.memory).
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
+ */
+static int memblock_add_simple_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r = {
+ .base = SZ_1G,
+ .size = SZ_4M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, r.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that adds a memory block of a specified base address, size,
+ * NUMA node and memory flags to the collection of available memory regions.
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
+ */
+static int memblock_add_node_simple_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r = {
+ .base = SZ_1M,
+ .size = SZ_16M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add_node(r.base, r.size, 1, MEMBLOCK_HOTPLUG);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, r.size);
+#ifdef CONFIG_NUMA
+ ASSERT_EQ(rgn->nid, 1);
+#endif
+ ASSERT_EQ(rgn->flags, MEMBLOCK_HOTPLUG);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks that don't overlap with one
+ * another:
+ *
+ * | +--------+ +--------+ |
+ * | | r1 | | r2 | |
+ * +--------+--------+--------+--------+--+
+ *
+ * Expect to add two correctly initialized entries to the collection of
+ * available memory regions (memblock.memory). The total size and
+ * region counter fields get updated.
+ */
+static int memblock_add_disjoint_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+
+ rgn1 = &memblock.memory.regions[0];
+ rgn2 = &memblock.memory.regions[1];
+
+ struct region r1 = {
+ .base = SZ_1G,
+ .size = SZ_8K
+ };
+ struct region r2 = {
+ .base = SZ_1G + SZ_16K,
+ .size = SZ_8K
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1.size);
+
+ ASSERT_EQ(rgn2->base, r2.base);
+ ASSERT_EQ(rgn2->size, r2.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ ASSERT_EQ(memblock.memory.total_size, r1.size + r2.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the beginning of r1 (that is r1.base < r2.base + r2.size):
+ *
+ * | +----+----+------------+ |
+ * | | |r2 | r1 | |
+ * +----+----+----+------------+----------+
+ * ^ ^
+ * | |
+ * | r1.base
+ * |
+ * r2.base
+ *
+ * Expect to merge the two entries into one region that starts at r2.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
+ */
+static int memblock_add_overlap_top_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_512M,
+ .size = SZ_1G
+ };
+ struct region r2 = {
+ .base = SZ_256M,
+ .size = SZ_512M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = (r1.base - r2.base) + r1.size;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r2.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the end of r1 (that is r2.base < r1.base + r1.size):
+ *
+ * | +--+------+----------+ |
+ * | | | r1 | r2 | |
+ * +--+--+------+----------+--------------+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge the two entries into one region that starts at r1.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
+ */
+static int memblock_add_overlap_bottom_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_128M,
+ .size = SZ_512M
+ };
+ struct region r2 = {
+ .base = SZ_256M,
+ .size = SZ_1G
+ };
+
+ PREFIX_PUSH();
+
+ total_size = (r2.base - r1.base) + r2.size;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks r1 and r2, where r2 is
+ * within the range of r1 (that is r1.base < r2.base &&
+ * r2.base + r2.size < r1.base + r1.size):
+ *
+ * | +-------+--+-----------------------+
+ * | | |r2| r1 |
+ * +---+-------+--+-----------------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that stays the same.
+ * The counter and total size of available memory are not updated.
+ */
+static int memblock_add_within_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_8M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_16M,
+ .size = SZ_1M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, r1.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to add the same memory block twice. Expect
+ * the counter and total size of available memory to not be updated.
+ */
+static int memblock_add_twice_check(void)
+{
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+
+ memblock_add(r.base, r.size);
+ memblock_add(r.base, r.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks that don't overlap with one
+ * another and then add a third memory block in the space between the first two:
+ *
+ * | +--------+--------+--------+ |
+ * | | r1 | r3 | r2 | |
+ * +--------+--------+--------+--------+--+
+ *
+ * Expect to merge the three entries into one region that starts at r1.base
+ * and has size of r1.size + r2.size + r3.size. The region counter and total
+ * size of the available memory are updated.
+ */
+static int memblock_add_between_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_1G,
+ .size = SZ_8K
+ };
+ struct region r2 = {
+ .base = SZ_1G + SZ_16K,
+ .size = SZ_8K
+ };
+ struct region r3 = {
+ .base = SZ_1G + SZ_8K,
+ .size = SZ_8K
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r1.size + r2.size + r3.size;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_add(r3.base, r3.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to add a memory block r when r extends past
+ * PHYS_ADDR_MAX:
+ *
+ * +--------+
+ * | r |
+ * +--------+
+ * | +----+
+ * | | rgn|
+ * +----------------------------+----+
+ *
+ * Expect to add a memory block of size PHYS_ADDR_MAX - r.base. Expect the
+ * total size of available memory and the counter to be updated.
+ */
+static int memblock_add_near_max_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r = {
+ .base = PHYS_ADDR_MAX - SZ_1M,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = PHYS_ADDR_MAX - r.base;
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that trying to add the 129th memory block.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as
+ * memory.regions.
+ */
+static int memblock_add_many_check(void)
+{
+ int i;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_memory_regions_size;
+ phys_addr_t base, size = SZ_64;
+ phys_addr_t gap_size = SZ_64;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ /*
+ * We allocated enough memory by using dummy_physical_memory_init(), and
+ * split it into small block. First we split a large enough memory block
+ * as the memory region which will be choosed by memblock_double_array().
+ */
+ base = PAGE_ALIGN(dummy_physical_memory_base());
+ new_memory_regions_size = PAGE_ALIGN(INIT_MEMBLOCK_REGIONS * 2 *
+ sizeof(struct memblock_region));
+ memblock_add(base, new_memory_regions_size);
+
+ /* This is the base of small memory block. */
+ base += new_memory_regions_size + gap_size;
+
+ orig_region = memblock.memory.regions;
+
+ for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) {
+ /*
+ * Add these small block to fulfill the memblock. We keep a
+ * gap between the nearby memory to avoid being merged.
+ */
+ memblock_add(base, size);
+ base += size + gap_size;
+
+ ASSERT_EQ(memblock.memory.cnt, i + 2);
+ ASSERT_EQ(memblock.memory.total_size, new_memory_regions_size +
+ (i + 1) * size);
+ }
+
+ /*
+ * At there, memblock_double_array() has been succeed, check if it
+ * update the memory.max.
+ */
+ ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /* memblock_double_array() will reserve the memory it used. Check it. */
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, new_memory_regions_size);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_add() still works as normal.
+ */
+ memblock_add(r.base, r.size);
+ ASSERT_EQ(memblock.memory.regions[0].base, r.base);
+ ASSERT_EQ(memblock.memory.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.memory.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.memory.total_size, INIT_MEMBLOCK_REGIONS * size +
+ new_memory_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current memory.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.memory.regions = orig_region;
+ memblock.memory.cnt = INIT_MEMBLOCK_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_add_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_ADD);
+ test_print("Running %s tests...\n", FUNC_ADD);
+
+ memblock_add_simple_check();
+ memblock_add_node_simple_check();
+ memblock_add_disjoint_check();
+ memblock_add_overlap_top_check();
+ memblock_add_overlap_bottom_check();
+ memblock_add_within_check();
+ memblock_add_twice_check();
+ memblock_add_between_check();
+ memblock_add_near_max_check();
+ memblock_add_many_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that marks a memory block of a specified base address
+ * and size as reserved and to the collection of reserved memory regions
+ * (memblock.reserved). Expect to create a new entry. The region counter
+ * and total memory size are updated.
+ */
+static int memblock_reserve_simple_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r = {
+ .base = SZ_2G,
+ .size = SZ_128M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r.base, r.size);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, r.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks that don't overlap as reserved:
+ *
+ * | +--+ +----------------+ |
+ * | |r1| | r2 | |
+ * +--------+--+------+----------------+--+
+ *
+ * Expect to add two entries to the collection of reserved memory regions
+ * (memblock.reserved). The total size and region counter for
+ * memblock.reserved are updated.
+ */
+static int memblock_reserve_disjoint_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+
+ rgn1 = &memblock.reserved.regions[0];
+ rgn2 = &memblock.reserved.regions[1];
+
+ struct region r1 = {
+ .base = SZ_256M,
+ .size = SZ_16M
+ };
+ struct region r2 = {
+ .base = SZ_512M,
+ .size = SZ_512M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1.size);
+
+ ASSERT_EQ(rgn2->base, r2.base);
+ ASSERT_EQ(rgn2->size, r2.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, r1.size + r2.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the beginning of r1 (that is
+ * r1.base < r2.base + r2.size):
+ *
+ * | +--------------+--+--------------+ |
+ * | | r2 | | r1 | |
+ * +--+--------------+--+--------------+--+
+ * ^ ^
+ * | |
+ * | r1.base
+ * |
+ * r2.base
+ *
+ * Expect to merge two entries into one region that starts at r2.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
+ */
+static int memblock_reserve_overlap_top_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_1G,
+ .size = SZ_1G
+ };
+ struct region r2 = {
+ .base = SZ_128M,
+ .size = SZ_1G
+ };
+
+ PREFIX_PUSH();
+
+ total_size = (r1.base - r2.base) + r1.size;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r2.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the end of r1 (that is
+ * r2.base < r1.base + r1.size):
+ *
+ * | +--------------+--+--------------+ |
+ * | | r1 | | r2 | |
+ * +--+--------------+--+--------------+--+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that starts at r1.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
+ */
+static int memblock_reserve_overlap_bottom_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2K,
+ .size = SZ_128K
+ };
+ struct region r2 = {
+ .base = SZ_128K,
+ .size = SZ_128K
+ };
+
+ PREFIX_PUSH();
+
+ total_size = (r2.base - r1.base) + r2.size;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 is within the range of r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * | +-----+--+---------------------------|
+ * | | |r2| r1 |
+ * +-+-----+--+---------------------------+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that stays the same. The
+ * counter and total size of available memory are not updated.
+ */
+static int memblock_reserve_within_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_1M,
+ .size = SZ_8M
+ };
+ struct region r2 = {
+ .base = SZ_2M,
+ .size = SZ_64K
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, r1.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to reserve the same memory block twice.
+ * Expect the region counter and total size of reserved memory to not
+ * be updated.
+ */
+static int memblock_reserve_twice_check(void)
+{
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+
+ memblock_reserve(r.base, r.size);
+ memblock_reserve(r.base, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, r.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks that don't overlap as reserved
+ * and then reserve a third memory block in the space between the first two:
+ *
+ * | +--------+--------+--------+ |
+ * | | r1 | r3 | r2 | |
+ * +--------+--------+--------+--------+--+
+ *
+ * Expect to merge the three entries into one reserved region that starts at
+ * r1.base and has size of r1.size + r2.size + r3.size. The region counter and
+ * total for memblock.reserved are updated.
+ */
+static int memblock_reserve_between_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_1G,
+ .size = SZ_8K
+ };
+ struct region r2 = {
+ .base = SZ_1G + SZ_16K,
+ .size = SZ_8K
+ };
+ struct region r3 = {
+ .base = SZ_1G + SZ_8K,
+ .size = SZ_8K
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r1.size + r2.size + r3.size;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+ memblock_reserve(r3.base, r3.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to reserve a memory block r when r extends past
+ * PHYS_ADDR_MAX:
+ *
+ * +--------+
+ * | r |
+ * +--------+
+ * | +----+
+ * | | rgn|
+ * +----------------------------+----+
+ *
+ * Expect to reserve a memory block of size PHYS_ADDR_MAX - r.base. Expect the
+ * total size of reserved memory and the counter to be updated.
+ */
+static int memblock_reserve_near_max_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r = {
+ .base = PHYS_ADDR_MAX - SZ_1M,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = PHYS_ADDR_MAX - r.base;
+
+ reset_memblock_regions();
+ memblock_reserve(r.base, r.size);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that trying to reserve the 129th memory block.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as
+ * reserved.regions.
+ */
+static int memblock_reserve_many_check(void)
+{
+ int i;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t memory_base = SZ_128K;
+ phys_addr_t new_reserved_regions_size;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ /* Add a valid memory region used by double_array(). */
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+
+ for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) {
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(memory_base, MEM_SIZE);
+
+ ASSERT_EQ(memblock.reserved.cnt, i + 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
+
+ /* Keep the gap so these memory region will not be merged. */
+ memory_base += MEM_SIZE * 2;
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(memory_base, MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions.
+ *
+ * 0 1 2 128
+ * +-------+ +-------+ +-------+ +-------+
+ * | 32K | | 32K | | 32K | ... | 32K |
+ * +-------+-------+-------+-------+-------+ +-------+
+ * |<-32K->| |<-32K->|
+ *
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_all_locations_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ /* Add a valid memory region used by double_array(). */
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+
+ for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE(i), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i + 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE(skip), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions. And make
+ * sure it doesn't conflict with the range we want to reserve.
+ *
+ * For example, we have 128 regions in reserved and now want to reserve
+ * the skipped one. Since reserved is full, memblock_double_array() would find
+ * an available range in memory for the new array. We intended to put two
+ * ranges in memory with one is the exact range of the skipped one. Before
+ * commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling
+ * reserved array"), the new array would sits in the skipped range which is a
+ * conflict. The expected new array should be allocated from memory.regions[0].
+ *
+ * 0 1
+ * memory +-------+ +-------+
+ * | 32K | | 32K |
+ * +-------+ ------+-------+-------+-------+
+ * |<-32K->|<-32K->|<-32K->|
+ *
+ * 0 skipped 127
+ * reserved +-------+ ......... +-------+
+ * | 32K | . 32K . ... | 32K |
+ * +-------+-------+-------+ +-------+
+ * |<-32K->|
+ * ^
+ * |
+ * |
+ * skipped one
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_many_may_conflict_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ /*
+ * 0 1 129
+ * +---+ +---+ +---+
+ * |32K| |32K| .. |32K|
+ * +---+ +---+ +---+
+ *
+ * Pre-allocate the range for 129 memory block + one range for double
+ * memblock.reserved.regions at idx 0.
+ */
+ dummy_physical_memory_init();
+ phys_addr_t memory_base = dummy_physical_memory_base();
+ phys_addr_t offset = PAGE_ALIGN(memory_base);
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ reset_memblock_attributes();
+ /* Add a valid memory region used by double_array(). */
+ memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE);
+ /*
+ * Add a memory region which will be reserved as 129th memory
+ * region. This is not expected to be used by double_array().
+ */
+ memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i - 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The first reserved region is allocated for double array
+ * with the size of new_reserved_regions_size and the base to be
+ * MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size
+ */
+ ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size,
+ MEMORY_BASE_OFFSET(0, offset) + SZ_32K);
+ ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ dummy_physical_memory_cleanup();
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_reserve_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_RESERVE);
+ test_print("Running %s tests...\n", FUNC_RESERVE);
+
+ memblock_reserve_simple_check();
+ memblock_reserve_disjoint_check();
+ memblock_reserve_overlap_top_check();
+ memblock_reserve_overlap_bottom_check();
+ memblock_reserve_within_check();
+ memblock_reserve_twice_check();
+ memblock_reserve_between_check();
+ memblock_reserve_near_max_check();
+ memblock_reserve_many_check();
+ memblock_reserve_all_locations_check();
+ memblock_reserve_many_may_conflict_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to remove a region r1 from the array of
+ * available memory regions. By "removing" a region we mean overwriting it
+ * with the next region r2 in memblock.memory:
+ *
+ * | ...... +----------------+ |
+ * | : r1 : | r2 | |
+ * +--+----+----------+----------------+--+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect to add two memory blocks r1 and r2 and then remove r1 so that
+ * r2 is the first available region. The region counter and total size
+ * are updated.
+ */
+static int memblock_remove_simple_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2K,
+ .size = SZ_4K
+ };
+ struct region r2 = {
+ .base = SZ_128K,
+ .size = SZ_4M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_remove(r1.base, r1.size);
+
+ ASSERT_EQ(rgn->base, r2.base);
+ ASSERT_EQ(rgn->size, r2.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r2.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to remove a region r2 that was not registered as
+ * available memory (i.e. has no corresponding entry in memblock.memory):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +----+ |
+ * | | r1 | |
+ * +--+----+------------------------------+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect the array, regions counter and total size to not be modified.
+ */
+static int memblock_remove_absent_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_512K,
+ .size = SZ_4M
+ };
+ struct region r2 = {
+ .base = SZ_64M,
+ .size = SZ_1G
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, r1.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to remove a region r2 that overlaps with the
+ * beginning of the already existing entry r1
+ * (that is r1.base < r2.base + r2.size):
+ *
+ * +-----------------+
+ * | r2 |
+ * +-----------------+
+ * | .........+--------+ |
+ * | : r1 | rgn | |
+ * +-----------------+--------+--------+--+
+ * ^ ^
+ * | |
+ * | rgn.base
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
+ */
+static int memblock_remove_overlap_top_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t r1_end, r2_end, total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_32M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_16M,
+ .size = SZ_32M
+ };
+
+ PREFIX_PUSH();
+
+ r1_end = r1.base + r1.size;
+ r2_end = r2.base + r2.size;
+ total_size = r1_end - r2_end;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base + r2.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to remove a region r2 that overlaps with the end of
+ * the already existing region r1 (that is r2.base < r1.base + r1.size):
+ *
+ * +--------------------------------+
+ * | r2 |
+ * +--------------------------------+
+ * | +---+..... |
+ * | |rgn| r1 : |
+ * +-+---+----+---------------------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
+ */
+static int memblock_remove_overlap_bottom_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2M,
+ .size = SZ_64M
+ };
+ struct region r2 = {
+ .base = SZ_32M,
+ .size = SZ_256M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r2.base - r1.base;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to remove a region r2 that is within the range of
+ * the already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | +-------------+....+---------------+ |
+ * | | rgn1 | r1 | rgn2 | |
+ * +-+-------------+----+---------------+-+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size are updated.
+ */
+static int memblock_remove_within_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ phys_addr_t r1_size, r2_size, total_size;
+
+ rgn1 = &memblock.memory.regions[0];
+ rgn2 = &memblock.memory.regions[1];
+
+ struct region r1 = {
+ .base = SZ_1M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_16M,
+ .size = SZ_1M
+ };
+
+ PREFIX_PUSH();
+
+ r1_size = r2.base - r1.base;
+ r2_size = (r1.base + r1.size) - (r2.base + r2.size);
+ total_size = r1_size + r2_size;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r2.base, r2.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1_size);
+
+ ASSERT_EQ(rgn2->base, r2.base + r2.size);
+ ASSERT_EQ(rgn2->size, r2_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to remove a region r1 from the array of
+ * available memory regions when r1 is the only available region.
+ * Expect to add a memory block r1 and then remove r1 so that a dummy
+ * region is added. The region counter stays the same, and the total size
+ * is updated.
+ */
+static int memblock_remove_only_region_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2K,
+ .size = SZ_4K
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r1.base, r1.size);
+
+ ASSERT_EQ(rgn->base, 0);
+ ASSERT_EQ(rgn->size, 0);
+
+ ASSERT_EQ(memblock.memory.cnt, 0);
+ ASSERT_EQ(memblock.memory.total_size, 0);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries remove a region r2 from the array of available
+ * memory regions when r2 extends past PHYS_ADDR_MAX:
+ *
+ * +--------+
+ * | r2 |
+ * +--------+
+ * | +---+....+
+ * | |rgn| |
+ * +------------------------+---+----+
+ *
+ * Expect that only the portion between PHYS_ADDR_MAX and r2.base is removed.
+ * Expect the total size of available memory to be updated and the counter to
+ * not be updated.
+ */
+static int memblock_remove_near_max_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = PHYS_ADDR_MAX - SZ_2M,
+ .size = SZ_2M
+ };
+
+ struct region r2 = {
+ .base = PHYS_ADDR_MAX - SZ_1M,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r1.size - (PHYS_ADDR_MAX - r2.base);
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_remove(r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to remove a region r3 that overlaps with two existing
+ * regions r1 and r2:
+ *
+ * +----------------+
+ * | r3 |
+ * +----------------+
+ * | +----+..... ........+--------+
+ * | | |r1 : : |r2 | |
+ * +----+----+----+---+-------+--------+-----+
+ *
+ * Expect that only the intersections of r1 with r3 and r2 with r3 are removed
+ * from the available memory pool. Expect the total size of available memory to
+ * be updated and the counter to not be updated.
+ */
+static int memblock_remove_overlap_two_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ phys_addr_t new_r1_size, new_r2_size, r2_end, r3_end, total_size;
+
+ rgn1 = &memblock.memory.regions[0];
+ rgn2 = &memblock.memory.regions[1];
+
+ struct region r1 = {
+ .base = SZ_16M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_64M,
+ .size = SZ_64M
+ };
+ struct region r3 = {
+ .base = SZ_32M,
+ .size = SZ_64M
+ };
+
+ PREFIX_PUSH();
+
+ r2_end = r2.base + r2.size;
+ r3_end = r3.base + r3.size;
+ new_r1_size = r3.base - r1.base;
+ new_r2_size = r2_end - r3_end;
+ total_size = new_r1_size + new_r2_size;
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_remove(r3.base, r3.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, new_r1_size);
+
+ ASSERT_EQ(rgn2->base, r3_end);
+ ASSERT_EQ(rgn2->size, new_r2_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ ASSERT_EQ(memblock.memory.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_remove_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_REMOVE);
+ test_print("Running %s tests...\n", FUNC_REMOVE);
+
+ memblock_remove_simple_check();
+ memblock_remove_absent_check();
+ memblock_remove_overlap_top_check();
+ memblock_remove_overlap_bottom_check();
+ memblock_remove_within_check();
+ memblock_remove_only_region_check();
+ memblock_remove_near_max_check();
+ memblock_remove_overlap_two_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to free a memory block r1 that was marked
+ * earlier as reserved. By "freeing" a region we mean overwriting it with
+ * the next entry r2 in memblock.reserved:
+ *
+ * | ...... +----+ |
+ * | : r1 : | r2 | |
+ * +--------------+----+-----------+----+-+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect to reserve two memory regions and then erase r1 region with the
+ * value of r2. The region counter and total size are updated.
+ */
+static int memblock_free_simple_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_4M,
+ .size = SZ_1M
+ };
+ struct region r2 = {
+ .base = SZ_8M,
+ .size = SZ_1M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+ memblock_free((void *)r1.base, r1.size);
+
+ ASSERT_EQ(rgn->base, r2.base);
+ ASSERT_EQ(rgn->size, r2.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, r2.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to free a region r2 that was not marked as reserved
+ * (i.e. has no corresponding entry in memblock.reserved):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +----+ |
+ * | | r1 | |
+ * +--+----+------------------------------+
+ * ^
+ * |
+ * rgn.base
+ *
+ * The array, regions counter and total size are not modified.
+ */
+static int memblock_free_absent_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2M,
+ .size = SZ_8K
+ };
+ struct region r2 = {
+ .base = SZ_16M,
+ .size = SZ_128M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, r1.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, r1.size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to free a region r2 that overlaps with the beginning
+ * of the already existing entry r1 (that is r1.base < r2.base + r2.size):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | ...+--------------+ |
+ * | : | r1 | |
+ * +----+--+--------------+---------------+
+ * ^ ^
+ * | |
+ * | rgn.base
+ * |
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
+ */
+static int memblock_free_overlap_top_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_8M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_1M,
+ .size = SZ_8M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = (r1.size + r1.base) - (r2.base + r2.size);
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r2.base + r2.size);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to free a region r2 that overlaps with the end of
+ * the already existing entry r1 (that is r2.base < r1.base + r1.size):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +-----------+..... |
+ * | | r1 | : |
+ * +----+-----------+----+----------------+
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
+ */
+static int memblock_free_overlap_bottom_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_8M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_32M,
+ .size = SZ_32M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r2.base - r1.base;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to free a region r2 that is within the range of the
+ * already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | +------------+....+---------------+
+ * | | rgn1 | r1 | rgn2 |
+ * +----+------------+----+---------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size fields are updated.
+ */
+static int memblock_free_within_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ phys_addr_t r1_size, r2_size, total_size;
+
+ rgn1 = &memblock.reserved.regions[0];
+ rgn2 = &memblock.reserved.regions[1];
+
+ struct region r1 = {
+ .base = SZ_1M,
+ .size = SZ_8M
+ };
+ struct region r2 = {
+ .base = SZ_4M,
+ .size = SZ_1M
+ };
+
+ PREFIX_PUSH();
+
+ r1_size = r2.base - r1.base;
+ r2_size = (r1.base + r1.size) - (r2.base + r2.size);
+ total_size = r1_size + r2_size;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r2.base, r2.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1_size);
+
+ ASSERT_EQ(rgn2->base, r2.base + r2.size);
+ ASSERT_EQ(rgn2->size, r2_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries to free a memory block r1 that was marked
+ * earlier as reserved when r1 is the only available region.
+ * Expect to reserve a memory block r1 and then free r1 so that r1 is
+ * overwritten with a dummy region. The region counter stays the same,
+ * and the total size is updated.
+ */
+static int memblock_free_only_region_check(void)
+{
+ struct memblock_region *rgn;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = SZ_2K,
+ .size = SZ_4K
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r1.base, r1.size);
+
+ ASSERT_EQ(rgn->base, 0);
+ ASSERT_EQ(rgn->size, 0);
+
+ ASSERT_EQ(memblock.reserved.cnt, 0);
+ ASSERT_EQ(memblock.reserved.total_size, 0);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A simple test that tries free a region r2 when r2 extends past PHYS_ADDR_MAX:
+ *
+ * +--------+
+ * | r2 |
+ * +--------+
+ * | +---+....+
+ * | |rgn| |
+ * +------------------------+---+----+
+ *
+ * Expect that only the portion between PHYS_ADDR_MAX and r2.base is freed.
+ * Expect the total size of reserved memory to be updated and the counter to
+ * not be updated.
+ */
+static int memblock_free_near_max_check(void)
+{
+ struct memblock_region *rgn;
+ phys_addr_t total_size;
+
+ rgn = &memblock.reserved.regions[0];
+
+ struct region r1 = {
+ .base = PHYS_ADDR_MAX - SZ_2M,
+ .size = SZ_2M
+ };
+
+ struct region r2 = {
+ .base = PHYS_ADDR_MAX - SZ_1M,
+ .size = SZ_2M
+ };
+
+ PREFIX_PUSH();
+
+ total_size = r1.size - (PHYS_ADDR_MAX - r2.base);
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_free((void *)r2.base, r2.size);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, total_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to free a reserved region r3 that overlaps with two
+ * existing reserved regions r1 and r2:
+ *
+ * +----------------+
+ * | r3 |
+ * +----------------+
+ * | +----+..... ........+--------+
+ * | | |r1 : : |r2 | |
+ * +----+----+----+---+-------+--------+-----+
+ *
+ * Expect that only the intersections of r1 with r3 and r2 with r3 are freed
+ * from the collection of reserved memory. Expect the total size of reserved
+ * memory to be updated and the counter to not be updated.
+ */
+static int memblock_free_overlap_two_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ phys_addr_t new_r1_size, new_r2_size, r2_end, r3_end, total_size;
+
+ rgn1 = &memblock.reserved.regions[0];
+ rgn2 = &memblock.reserved.regions[1];
+
+ struct region r1 = {
+ .base = SZ_16M,
+ .size = SZ_32M
+ };
+ struct region r2 = {
+ .base = SZ_64M,
+ .size = SZ_64M
+ };
+ struct region r3 = {
+ .base = SZ_32M,
+ .size = SZ_64M
+ };
+
+ PREFIX_PUSH();
+
+ r2_end = r2.base + r2.size;
+ r3_end = r3.base + r3.size;
+ new_r1_size = r3.base - r1.base;
+ new_r2_size = r2_end - r3_end;
+ total_size = new_r1_size + new_r2_size;
+
+ reset_memblock_regions();
+ memblock_reserve(r1.base, r1.size);
+ memblock_reserve(r2.base, r2.size);
+ memblock_free((void *)r3.base, r3.size);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, new_r1_size);
+
+ ASSERT_EQ(rgn2->base, r3_end);
+ ASSERT_EQ(rgn2->size, new_r2_size);
+
+ ASSERT_EQ(memblock.reserved.cnt, 2);
+ ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_free_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_FREE);
+ test_print("Running %s tests...\n", FUNC_FREE);
+
+ memblock_free_simple_check();
+ memblock_free_absent_check();
+ memblock_free_overlap_top_check();
+ memblock_free_overlap_bottom_check();
+ memblock_free_within_check();
+ memblock_free_only_region_check();
+ memblock_free_near_max_check();
+ memblock_free_overlap_two_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+static int memblock_set_bottom_up_check(void)
+{
+ prefix_push("memblock_set_bottom_up");
+
+ memblock_set_bottom_up(false);
+ ASSERT_EQ(memblock.bottom_up, false);
+ memblock_set_bottom_up(true);
+ ASSERT_EQ(memblock.bottom_up, true);
+
+ reset_memblock_attributes();
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_bottom_up_check(void)
+{
+ prefix_push("memblock_bottom_up");
+
+ memblock_set_bottom_up(false);
+ ASSERT_EQ(memblock_bottom_up(), memblock.bottom_up);
+ ASSERT_EQ(memblock_bottom_up(), false);
+ memblock_set_bottom_up(true);
+ ASSERT_EQ(memblock_bottom_up(), memblock.bottom_up);
+ ASSERT_EQ(memblock_bottom_up(), true);
+
+ reset_memblock_attributes();
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_bottom_up_checks(void)
+{
+ test_print("Running memblock_*bottom_up tests...\n");
+
+ prefix_reset();
+ memblock_set_bottom_up_check();
+ prefix_reset();
+ memblock_bottom_up_check();
+
+ return 0;
+}
+
+/*
+ * A test that tries to trim memory when both ends of the memory region are
+ * aligned. Expect that the memory will not be trimmed. Expect the counter to
+ * not be updated.
+ */
+static int memblock_trim_memory_aligned_check(void)
+{
+ struct memblock_region *rgn;
+ const phys_addr_t alignment = SMP_CACHE_BYTES;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r = {
+ .base = alignment,
+ .size = alignment * 4
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+ memblock_trim_memory(alignment);
+
+ ASSERT_EQ(rgn->base, r.base);
+ ASSERT_EQ(rgn->size, r.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to trim memory when there are two available regions, r1 and
+ * r2. Region r1 is aligned on both ends and region r2 is unaligned on one end
+ * and smaller than the alignment:
+ *
+ * alignment
+ * |--------|
+ * | +-----------------+ +------+ |
+ * | | r1 | | r2 | |
+ * +--------+-----------------+--------+------+---+
+ * ^ ^ ^ ^ ^
+ * |________|________|________| |
+ * | Unaligned address
+ * Aligned addresses
+ *
+ * Expect that r1 will not be trimmed and r2 will be removed. Expect the
+ * counter to be updated.
+ */
+static int memblock_trim_memory_too_small_check(void)
+{
+ struct memblock_region *rgn;
+ const phys_addr_t alignment = SMP_CACHE_BYTES;
+
+ rgn = &memblock.memory.regions[0];
+
+ struct region r1 = {
+ .base = alignment,
+ .size = alignment * 2
+ };
+ struct region r2 = {
+ .base = alignment * 4,
+ .size = alignment - SZ_2
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_trim_memory(alignment);
+
+ ASSERT_EQ(rgn->base, r1.base);
+ ASSERT_EQ(rgn->size, r1.size);
+
+ ASSERT_EQ(memblock.memory.cnt, 1);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to trim memory when there are two available regions, r1 and
+ * r2. Region r1 is aligned on both ends and region r2 is unaligned at the base
+ * and aligned at the end:
+ *
+ * Unaligned address
+ * |
+ * v
+ * | +-----------------+ +---------------+ |
+ * | | r1 | | r2 | |
+ * +--------+-----------------+----------+---------------+---+
+ * ^ ^ ^ ^ ^ ^
+ * |________|________|________|________|________|
+ * |
+ * Aligned addresses
+ *
+ * Expect that r1 will not be trimmed and r2 will be trimmed at the base.
+ * Expect the counter to not be updated.
+ */
+static int memblock_trim_memory_unaligned_base_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ const phys_addr_t alignment = SMP_CACHE_BYTES;
+ phys_addr_t offset = SZ_2;
+ phys_addr_t new_r2_base, new_r2_size;
+
+ rgn1 = &memblock.memory.regions[0];
+ rgn2 = &memblock.memory.regions[1];
+
+ struct region r1 = {
+ .base = alignment,
+ .size = alignment * 2
+ };
+ struct region r2 = {
+ .base = alignment * 4 + offset,
+ .size = alignment * 2 - offset
+ };
+
+ PREFIX_PUSH();
+
+ new_r2_base = r2.base + (alignment - offset);
+ new_r2_size = r2.size - (alignment - offset);
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_trim_memory(alignment);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1.size);
+
+ ASSERT_EQ(rgn2->base, new_r2_base);
+ ASSERT_EQ(rgn2->size, new_r2_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that tries to trim memory when there are two available regions, r1 and
+ * r2. Region r1 is aligned on both ends and region r2 is aligned at the base
+ * and unaligned at the end:
+ *
+ * Unaligned address
+ * |
+ * v
+ * | +-----------------+ +---------------+ |
+ * | | r1 | | r2 | |
+ * +--------+-----------------+--------+---------------+---+
+ * ^ ^ ^ ^ ^ ^
+ * |________|________|________|________|________|
+ * |
+ * Aligned addresses
+ *
+ * Expect that r1 will not be trimmed and r2 will be trimmed at the end.
+ * Expect the counter to not be updated.
+ */
+static int memblock_trim_memory_unaligned_end_check(void)
+{
+ struct memblock_region *rgn1, *rgn2;
+ const phys_addr_t alignment = SMP_CACHE_BYTES;
+ phys_addr_t offset = SZ_2;
+ phys_addr_t new_r2_size;
+
+ rgn1 = &memblock.memory.regions[0];
+ rgn2 = &memblock.memory.regions[1];
+
+ struct region r1 = {
+ .base = alignment,
+ .size = alignment * 2
+ };
+ struct region r2 = {
+ .base = alignment * 4,
+ .size = alignment * 2 - offset
+ };
+
+ PREFIX_PUSH();
+
+ new_r2_size = r2.size - (alignment - offset);
+
+ reset_memblock_regions();
+ memblock_add(r1.base, r1.size);
+ memblock_add(r2.base, r2.size);
+ memblock_trim_memory(alignment);
+
+ ASSERT_EQ(rgn1->base, r1.base);
+ ASSERT_EQ(rgn1->size, r1.size);
+
+ ASSERT_EQ(rgn2->base, r2.base);
+ ASSERT_EQ(rgn2->size, new_r2_size);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_trim_memory_checks(void)
+{
+ prefix_reset();
+ prefix_push(FUNC_TRIM);
+ test_print("Running %s tests...\n", FUNC_TRIM);
+
+ memblock_trim_memory_aligned_check();
+ memblock_trim_memory_too_small_check();
+ memblock_trim_memory_unaligned_base_check();
+ memblock_trim_memory_unaligned_end_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+static int memblock_overlaps_region_check(void)
+{
+ struct region r = {
+ .base = SZ_1G,
+ .size = SZ_4M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+
+ /* Far Away */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M));
+
+ /* Neighbor */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M));
+
+ /* Partial Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M));
+
+ /* Totally Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M));
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_overlaps_region_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_overlaps_region");
+ test_print("Running memblock_overlaps_region tests...\n");
+
+ memblock_overlaps_region_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
+#ifdef CONFIG_NUMA
+static int memblock_set_node_check(void)
+{
+ unsigned long i, max_reserved;
+ struct memblock_region *rgn;
+ void *orig_region;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+ orig_region = memblock.reserved.regions;
+
+ /* Equally Split range to node 0 and 1*/
+ memblock_set_node(memblock_start_of_DRAM(),
+ memblock_phys_mem_size() / 2, &memblock.memory, 0);
+ memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2,
+ memblock_phys_mem_size() / 2, &memblock.memory, 1);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ rgn = &memblock.memory.regions[0];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 0);
+ rgn = &memblock.memory.regions[1];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2);
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 1);
+
+ /* Reserve 126 regions with the last one across node boundary */
+ for (i = 0; i < 125; i++)
+ memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8);
+
+ memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8,
+ SZ_16);
+
+ /*
+ * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")
+ * do following process to set nid to each memblock.reserved region.
+ * But it may miss some region if memblock_set_node() double the
+ * array.
+ *
+ * By checking 'max', we make sure all region nid is set properly.
+ */
+repeat:
+ max_reserved = memblock.reserved.max;
+ for_each_mem_region(rgn) {
+ int nid = memblock_get_region_node(rgn);
+
+ memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid);
+ }
+ if (max_reserved != memblock.reserved.max)
+ goto repeat;
+
+ /* Confirm each region has valid node set */
+ for_each_reserved_mem_region(rgn) {
+ ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn)));
+ if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1))
+ ASSERT_EQ(1, memblock_get_region_node(rgn));
+ else
+ ASSERT_EQ(0, memblock_get_region_node(rgn));
+ }
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_set_node_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_set_node");
+ test_print("Running memblock_set_node tests...\n");
+
+ memblock_set_node_check();
+
+ prefix_pop();
+
+ return 0;
+}
+#else
+static int memblock_set_node_checks(void)
+{
+ return 0;
+}
+#endif
+
+int memblock_basic_checks(void)
+{
+ memblock_initialization_check();
+ memblock_add_checks();
+ memblock_reserve_checks();
+ memblock_remove_checks();
+ memblock_free_checks();
+ memblock_bottom_up_checks();
+ memblock_trim_memory_checks();
+ memblock_overlaps_region_checks();
+ memblock_set_node_checks();
+
+ return 0;
+}
diff --git a/tools/testing/memblock/tests/basic_api.h b/tools/testing/memblock/tests/basic_api.h
new file mode 100644
index 000000000000..1873faa54754
--- /dev/null
+++ b/tools/testing/memblock/tests/basic_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_BASIC_H
+#define _MEMBLOCK_BASIC_H
+
+#include "common.h"
+
+int memblock_basic_checks(void);
+
+#endif
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
new file mode 100644
index 000000000000..3250c8e5124b
--- /dev/null
+++ b/tools/testing/memblock/tests/common.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "tests/common.h"
+#include <string.h>
+#include <getopt.h>
+#include <linux/memory_hotplug.h>
+#include <linux/build_bug.h>
+
+#define PREFIXES_MAX 15
+#define DELIM ": "
+#define BASIS 10000
+
+static struct test_memory memory_block;
+static const char __maybe_unused *prefixes[PREFIXES_MAX];
+static int __maybe_unused nr_prefixes;
+
+static const char *short_opts = "hmv";
+static const struct option long_opts[] = {
+ {"help", 0, NULL, 'h'},
+ {"movable-node", 0, NULL, 'm'},
+ {"verbose", 0, NULL, 'v'},
+ {NULL, 0, NULL, 0}
+};
+
+static const char * const help_opts[] = {
+ "display this help message and exit",
+ "disallow allocations from regions marked as hotplugged\n\t\t\t"
+ "by simulating enabling the \"movable_node\" kernel\n\t\t\t"
+ "parameter",
+ "enable verbose output, which includes the name of the\n\t\t\t"
+ "memblock function being tested, the name of the test,\n\t\t\t"
+ "and whether the test passed or failed."
+};
+
+static int verbose;
+
+/* sets global variable returned by movable_node_is_enabled() stub */
+bool movable_node_enabled;
+
+void reset_memblock_regions(void)
+{
+ memset(memblock.memory.regions, 0,
+ memblock.memory.cnt * sizeof(struct memblock_region));
+ memblock.memory.cnt = 0;
+ memblock.memory.max = INIT_MEMBLOCK_REGIONS;
+ memblock.memory.total_size = 0;
+
+ memset(memblock.reserved.regions, 0,
+ memblock.reserved.cnt * sizeof(struct memblock_region));
+ memblock.reserved.cnt = 0;
+ memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS;
+ memblock.reserved.total_size = 0;
+}
+
+void reset_memblock_attributes(void)
+{
+ memblock.memory.name = "memory";
+ memblock.reserved.name = "reserved";
+ memblock.bottom_up = false;
+ memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
+}
+
+static inline void fill_memblock(void)
+{
+ memset(memory_block.base, 1, PHYS_MEM_SIZE);
+}
+
+void setup_memblock(void)
+{
+ reset_memblock_regions();
+ memblock_add((phys_addr_t)memory_block.base, MEM_SIZE);
+ fill_memblock();
+}
+
+/**
+ * setup_numa_memblock:
+ * Set up a memory layout with multiple NUMA nodes in a previously allocated
+ * dummy physical memory.
+ * @node_fracs: an array representing the fraction of MEM_SIZE contained in
+ * each node in basis point units (one hundredth of 1% or 1/10000).
+ * For example, if node 0 should contain 1/8 of MEM_SIZE,
+ * node_fracs[0] = 1250.
+ *
+ * The nids will be set to 0 through NUMA_NODES - 1.
+ */
+void setup_numa_memblock(const unsigned int node_fracs[])
+{
+ phys_addr_t base;
+ int flags;
+
+ reset_memblock_regions();
+ base = (phys_addr_t)memory_block.base;
+ flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
+
+ for (int i = 0; i < NUMA_NODES; i++) {
+ assert(node_fracs[i] <= BASIS);
+ phys_addr_t size = MEM_SIZE * node_fracs[i] / BASIS;
+
+ memblock_add_node(base, size, i, flags);
+ base += size;
+ }
+ fill_memblock();
+}
+
+void dummy_physical_memory_init(void)
+{
+ memory_block.base = malloc(PHYS_MEM_SIZE);
+ assert(memory_block.base);
+ fill_memblock();
+}
+
+void dummy_physical_memory_cleanup(void)
+{
+ free(memory_block.base);
+}
+
+phys_addr_t dummy_physical_memory_base(void)
+{
+ return (phys_addr_t)memory_block.base;
+}
+
+static void usage(const char *prog)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1);
+
+ printf("Usage: %s [-%s]\n", prog, short_opts);
+
+ for (int i = 0; long_opts[i].name; i++) {
+ printf(" -%c, --%-12s\t%s\n", long_opts[i].val,
+ long_opts[i].name, help_opts[i]);
+ }
+
+ exit(1);
+}
+
+void parse_args(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt_long_only(argc, argv, short_opts, long_opts,
+ NULL)) != -1) {
+ switch (c) {
+ case 'm':
+ movable_node_enabled = true;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+}
+
+void print_prefixes(const char *postfix)
+{
+ for (int i = 0; i < nr_prefixes; i++)
+ test_print("%s%s", prefixes[i], DELIM);
+ test_print(postfix);
+}
+
+void test_fail(void)
+{
+ if (verbose) {
+ ksft_test_result_fail(": ");
+ print_prefixes("failed\n");
+ }
+}
+
+void test_pass(void)
+{
+ if (verbose) {
+ ksft_test_result_pass(": ");
+ print_prefixes("passed\n");
+ }
+}
+
+void test_print(const char *fmt, ...)
+{
+ if (verbose) {
+ int saved_errno = errno;
+ va_list args;
+
+ va_start(args, fmt);
+ errno = saved_errno;
+ vprintf(fmt, args);
+ va_end(args);
+ }
+}
+
+void prefix_reset(void)
+{
+ memset(prefixes, 0, PREFIXES_MAX * sizeof(char *));
+ nr_prefixes = 0;
+}
+
+void prefix_push(const char *prefix)
+{
+ assert(nr_prefixes < PREFIXES_MAX);
+ prefixes[nr_prefixes] = prefix;
+ nr_prefixes++;
+}
+
+void prefix_pop(void)
+{
+ if (nr_prefixes > 0) {
+ prefixes[nr_prefixes - 1] = 0;
+ nr_prefixes--;
+ }
+}
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
new file mode 100644
index 000000000000..e1138e06c903
--- /dev/null
+++ b/tools/testing/memblock/tests/common.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_TEST_H
+#define _MEMBLOCK_TEST_H
+
+#include <stdlib.h>
+#include <assert.h>
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
+#include <linux/printk.h>
+#include <../selftests/kselftest.h>
+
+#define MEM_SIZE SZ_32K
+#define PHYS_MEM_SIZE SZ_16M
+#define NUMA_NODES 8
+
+#define INIT_MEMBLOCK_REGIONS 128
+#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
+
+enum test_flags {
+ /* No special request. */
+ TEST_F_NONE = 0x0,
+ /* Perform raw allocations (no zeroing of memory). */
+ TEST_F_RAW = 0x1,
+ /* Perform allocations on the exact node specified. */
+ TEST_F_EXACT = 0x2
+};
+
+/**
+ * ASSERT_EQ():
+ * Check the condition
+ * @_expected == @_seen
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_EQ(_expected, _seen) do { \
+ if ((_expected) != (_seen)) \
+ test_fail(); \
+ assert((_expected) == (_seen)); \
+} while (0)
+
+#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen)
+#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen)
+
+/**
+ * ASSERT_NE():
+ * Check the condition
+ * @_expected != @_seen
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_NE(_expected, _seen) do { \
+ if ((_expected) == (_seen)) \
+ test_fail(); \
+ assert((_expected) != (_seen)); \
+} while (0)
+
+/**
+ * ASSERT_LT():
+ * Check the condition
+ * @_expected < @_seen
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_LT(_expected, _seen) do { \
+ if ((_expected) >= (_seen)) \
+ test_fail(); \
+ assert((_expected) < (_seen)); \
+} while (0)
+
+/**
+ * ASSERT_LE():
+ * Check the condition
+ * @_expected <= @_seen
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_LE(_expected, _seen) do { \
+ if ((_expected) > (_seen)) \
+ test_fail(); \
+ assert((_expected) <= (_seen)); \
+} while (0)
+
+/**
+ * ASSERT_MEM_EQ():
+ * Check that the first @_size bytes of @_seen are all equal to @_expected.
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_MEM_EQ(_seen, _expected, _size) do { \
+ for (int _i = 0; _i < (_size); _i++) { \
+ ASSERT_EQ(((char *)_seen)[_i], (_expected)); \
+ } \
+} while (0)
+
+/**
+ * ASSERT_MEM_NE():
+ * Check that none of the first @_size bytes of @_seen are equal to @_expected.
+ * If false, print failed test message (if running with --verbose) and then
+ * assert.
+ */
+#define ASSERT_MEM_NE(_seen, _expected, _size) do { \
+ for (int _i = 0; _i < (_size); _i++) { \
+ ASSERT_NE(((char *)_seen)[_i], (_expected)); \
+ } \
+} while (0)
+
+#define PREFIX_PUSH() prefix_push(__func__)
+
+/*
+ * Available memory registered with memblock needs to be valid for allocs
+ * test to run. This is a convenience wrapper for memory allocated in
+ * dummy_physical_memory_init() that is later registered with memblock
+ * in setup_memblock().
+ */
+struct test_memory {
+ void *base;
+};
+
+struct region {
+ phys_addr_t base;
+ phys_addr_t size;
+};
+
+static inline phys_addr_t __maybe_unused region_end(struct memblock_region *rgn)
+{
+ return rgn->base + rgn->size;
+}
+
+void reset_memblock_regions(void);
+void reset_memblock_attributes(void);
+void setup_memblock(void);
+void setup_numa_memblock(const unsigned int node_fracs[]);
+void dummy_physical_memory_init(void);
+void dummy_physical_memory_cleanup(void);
+phys_addr_t dummy_physical_memory_base(void);
+void parse_args(int argc, char **argv);
+
+void test_fail(void);
+void test_pass(void);
+void test_print(const char *fmt, ...);
+void prefix_reset(void);
+void prefix_push(const char *prefix);
+void prefix_pop(void);
+
+static inline void test_pass_pop(void)
+{
+ test_pass();
+ prefix_pop();
+}
+
+static inline void run_top_down(int (*func)())
+{
+ memblock_set_bottom_up(false);
+ prefix_push("top-down");
+ func();
+ prefix_pop();
+}
+
+static inline void run_bottom_up(int (*func)())
+{
+ memblock_set_bottom_up(true);
+ prefix_push("bottom-up");
+ func();
+ prefix_pop();
+}
+
+static inline void assert_mem_content(void *mem, int size, int flags)
+{
+ if (flags & TEST_F_RAW)
+ ASSERT_MEM_NE(mem, 0, size);
+ else
+ ASSERT_MEM_EQ(mem, 0, size);
+}
+
+#endif
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 47f9cc9dcd94..91a3627f301a 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -27,7 +27,6 @@ ccflags-y += -I$(srctree)/drivers/acpi/nfit/
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
-obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_ACPI_NFIT) += nfit.o
ifeq ($(CONFIG_DAX),m)
@@ -35,8 +34,6 @@ obj-$(CONFIG_DAX) += dax.o
endif
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
-obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
nfit-y := $(ACPI_SRC)/core.o
nfit-y += $(ACPI_SRC)/intel.o
@@ -52,9 +49,6 @@ nd_pmem-y += config_check.o
nd_btt-y := $(NVDIMM_SRC)/btt.o
nd_btt-y += config_check.o
-nd_blk-y := $(NVDIMM_SRC)/blk.o
-nd_blk-y += config_check.o
-
nd_e820-y := $(NVDIMM_SRC)/e820.o
nd_e820-y += config_check.o
@@ -67,12 +61,8 @@ device_dax-y += dax-dev.o
device_dax-y += device_dax_test.o
device_dax-y += config_check.o
-dax_pmem-y := $(DAX_SRC)/pmem/pmem.o
+dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += dax_pmem_test.o
-dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
-dax_pmem_core-y += dax_pmem_core_test.o
-dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
-dax_pmem_compat-y += dax_pmem_compat_test.o
dax_pmem-y += config_check.o
libnvdimm-y := $(NVDIMM_SRC)/core.o
@@ -89,8 +79,9 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o
-libnvdimm-y += dimm_devs.o
libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o
+KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS))
+
obj-m += test/
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index 3e3a5f518864..baed75e2ccbc 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -11,7 +11,6 @@ void check(void)
BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
- BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
if (IS_ENABLED(CONFIG_ACPI_NFIT))
BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
diff --git a/tools/testing/nvdimm/dax_pmem_compat_test.c b/tools/testing/nvdimm/dax_pmem_compat_test.c
deleted file mode 100644
index 7cd1877f3765..000000000000
--- a/tools/testing/nvdimm/dax_pmem_compat_test.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright(c) 2019 Intel Corporation. All rights reserved.
-
-#include <linux/module.h>
-#include <linux/printk.h>
-#include "watermark.h"
-
-nfit_test_watermark(dax_pmem_compat);
diff --git a/tools/testing/nvdimm/dax_pmem_core_test.c b/tools/testing/nvdimm/dax_pmem_core_test.c
deleted file mode 100644
index a4249cdbeec1..000000000000
--- a/tools/testing/nvdimm/dax_pmem_core_test.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright(c) 2019 Intel Corporation. All rights reserved.
-
-#include <linux/module.h>
-#include <linux/printk.h>
-#include "watermark.h"
-
-nfit_test_watermark(dax_pmem_core);
diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c
deleted file mode 100644
index 57bd27dedf1f..000000000000
--- a/tools/testing/nvdimm/dimm_devs.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright Intel Corp. 2018 */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nd.h>
-#include "pmem.h"
-#include "pfn.h"
-#include "nd.h"
-#include "nd-core.h"
-
-ssize_t security_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvdimm *nvdimm = to_nvdimm(dev);
-
- /*
- * For the test version we need to poll the "hardware" in order
- * to get the updated status for unlock testing.
- */
- nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
-
- if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
- return sprintf(buf, "disabled\n");
- if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
- return sprintf(buf, "unlocked\n");
- if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
- return sprintf(buf, "locked\n");
- return -ENOTTY;
-}
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index af19c85558e7..05e763aab104 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -4,11 +4,13 @@
*/
#include "test/nfit_test.h"
#include <linux/blkdev.h>
+#include <linux/dax.h>
#include <pmem.h>
#include <nd.h>
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
+ long nr_pages, enum dax_access_mode mode, void **kaddr,
+ unsigned long *pfn)
{
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
@@ -27,7 +29,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
*kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset);
if (pfn)
- *pfn = page_to_pfn_t(page);
+ *pfn = page_to_pfn(page);
pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
__func__, pmem, pgoff, page_to_pfn(page));
@@ -37,7 +39,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
if (kaddr)
*kaddr = pmem->virt_addr + offset;
if (pfn)
- *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+ *pfn = PHYS_PFN(pmem->phys_addr + offset);
/*
* If badblocks are present, limit known good range to the
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
index 197bcb2b7f35..003d48f5f24f 100644
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -7,6 +7,7 @@ obj-m += nfit_test_iomap.o
ifeq ($(CONFIG_ACPI_NFIT),m)
nfit_test-y := nfit.o
+ obj-m += ndtest.o
else
nfit_test-y := ndtest.o
endif
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c62d372d426f..f7e7bfe9bb85 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -8,7 +8,6 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <linux/pfn_t.h>
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/mm.h>
@@ -62,16 +61,14 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
}
EXPORT_SYMBOL(get_nfit_res);
-void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
- void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
-{
- struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
- if (nfit_res)
- return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res.start;
- return fallback_fn(offset, size);
-}
+#define __nfit_test_ioremap(offset, size, fallback_fn) ({ \
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset); \
+ nfit_res ? \
+ (void __iomem *) nfit_res->buf + (offset) \
+ - nfit_res->res.start \
+ : \
+ fallback_fn((offset), (size)) ; \
+})
void __iomem *__wrap_devm_ioremap(struct device *dev,
resource_size_t offset, unsigned long size)
@@ -100,25 +97,17 @@ static void nfit_test_kill(void *_pgmap)
{
struct dev_pagemap *pgmap = _pgmap;
- WARN_ON(!pgmap || !pgmap->ref);
+ WARN_ON(!pgmap);
- if (pgmap->ops && pgmap->ops->kill)
- pgmap->ops->kill(pgmap);
- else
- percpu_ref_kill(pgmap->ref);
+ percpu_ref_kill(&pgmap->ref);
- if (pgmap->ops && pgmap->ops->cleanup) {
- pgmap->ops->cleanup(pgmap);
- } else {
- wait_for_completion(&pgmap->done);
- percpu_ref_exit(pgmap->ref);
- }
+ wait_for_completion(&pgmap->done);
+ percpu_ref_exit(&pgmap->ref);
}
static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{
- struct dev_pagemap *pgmap =
- container_of(ref, struct dev_pagemap, internal_ref);
+ struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
complete(&pgmap->done);
}
@@ -132,22 +121,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (!nfit_res)
return devm_memremap_pages(dev, pgmap);
- if (!pgmap->ref) {
- if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
- return ERR_PTR(-EINVAL);
-
- init_completion(&pgmap->done);
- error = percpu_ref_init(&pgmap->internal_ref,
- dev_pagemap_percpu_release, 0, GFP_KERNEL);
- if (error)
- return ERR_PTR(error);
- pgmap->ref = &pgmap->internal_ref;
- } else {
- if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
- WARN(1, "Missing reference count teardown definition\n");
- return ERR_PTR(-EINVAL);
- }
- }
+ init_completion(&pgmap->done);
+ error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
+ GFP_KERNEL);
+ if (error)
+ return ERR_PTR(error);
error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (error)
@@ -156,16 +134,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
}
EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
-pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
-{
- struct nfit_test_resource *nfit_res = get_nfit_res(addr);
-
- if (nfit_res)
- flags &= ~PFN_MAP;
- return phys_to_pfn_t(addr, flags);
-}
-EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags)
{
@@ -428,4 +396,5 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *g
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
+MODULE_DESCRIPTION("NVDIMM unit test");
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 6862915f1fb0..8e3b6be53839 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -13,6 +13,8 @@
#include <nd-core.h>
#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/papr_scm.h>
+#include <uapi/linux/papr_pdsm.h>
#include "../watermark.h"
#include "nfit_test.h"
@@ -38,7 +40,11 @@ enum {
static DEFINE_SPINLOCK(ndtest_lock);
static struct ndtest_priv *instances[NUM_INSTANCES];
-static struct class *ndtest_dimm_class;
+
+static const struct class ndtest_dimm_class = {
+ .name = "nfit_test_dimm",
+};
+
static struct gen_pool *ndtest_pool;
static struct ndtest_dimm dimm_group1[] = {
@@ -134,39 +140,6 @@ static struct ndtest_mapping region1_mapping[] = {
},
};
-static struct ndtest_mapping region2_mapping[] = {
- {
- .dimm = 0,
- .position = 0,
- .start = 0,
- .size = DIMM_SIZE,
- },
-};
-
-static struct ndtest_mapping region3_mapping[] = {
- {
- .dimm = 1,
- .start = 0,
- .size = DIMM_SIZE,
- }
-};
-
-static struct ndtest_mapping region4_mapping[] = {
- {
- .dimm = 2,
- .start = 0,
- .size = DIMM_SIZE,
- }
-};
-
-static struct ndtest_mapping region5_mapping[] = {
- {
- .dimm = 3,
- .start = 0,
- .size = DIMM_SIZE,
- }
-};
-
static struct ndtest_region bus0_regions[] = {
{
.type = ND_DEVICE_NAMESPACE_PMEM,
@@ -182,34 +155,6 @@ static struct ndtest_region bus0_regions[] = {
.size = DIMM_SIZE * 2,
.range_index = 2,
},
- {
- .type = ND_DEVICE_NAMESPACE_BLK,
- .num_mappings = ARRAY_SIZE(region2_mapping),
- .mapping = region2_mapping,
- .size = DIMM_SIZE,
- .range_index = 3,
- },
- {
- .type = ND_DEVICE_NAMESPACE_BLK,
- .num_mappings = ARRAY_SIZE(region3_mapping),
- .mapping = region3_mapping,
- .size = DIMM_SIZE,
- .range_index = 4,
- },
- {
- .type = ND_DEVICE_NAMESPACE_BLK,
- .num_mappings = ARRAY_SIZE(region4_mapping),
- .mapping = region4_mapping,
- .size = DIMM_SIZE,
- .range_index = 5,
- },
- {
- .type = ND_DEVICE_NAMESPACE_BLK,
- .num_mappings = ARRAY_SIZE(region5_mapping),
- .mapping = region5_mapping,
- .size = DIMM_SIZE,
- .range_index = 6,
- },
};
static struct ndtest_mapping region6_mapping[] = {
@@ -338,62 +283,6 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
-static int ndtest_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
- void *iobuf, u64 len, int rw)
-{
- struct ndtest_dimm *dimm = ndbr->blk_provider_data;
- struct ndtest_blk_mmio *mmio = dimm->mmio;
- struct nd_region *nd_region = &ndbr->nd_region;
- unsigned int lane;
-
- if (!mmio)
- return -ENOMEM;
-
- lane = nd_region_acquire_lane(nd_region);
- if (rw)
- memcpy(mmio->base + dpa, iobuf, len);
- else {
- memcpy(iobuf, mmio->base + dpa, len);
- arch_invalidate_pmem(mmio->base + dpa, len);
- }
-
- nd_region_release_lane(nd_region, lane);
-
- return 0;
-}
-
-static int ndtest_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
- struct device *dev)
-{
- struct nd_blk_region *ndbr = to_nd_blk_region(dev);
- struct nvdimm *nvdimm;
- struct ndtest_dimm *dimm;
- struct ndtest_blk_mmio *mmio;
-
- nvdimm = nd_blk_region_to_dimm(ndbr);
- dimm = nvdimm_provider_data(nvdimm);
-
- nd_blk_region_set_provider_data(ndbr, dimm);
- dimm->blk_region = to_nd_region(dev);
-
- mmio = devm_kzalloc(dev, sizeof(struct ndtest_blk_mmio), GFP_KERNEL);
- if (!mmio)
- return -ENOMEM;
-
- mmio->base = (void __iomem *) devm_nvdimm_memremap(
- dev, dimm->address, 12, nd_blk_memremap_flags(ndbr));
- if (!mmio->base) {
- dev_err(dev, "%s failed to map blk dimm\n", nvdimm_name(nvdimm));
- return -ENOMEM;
- }
- mmio->size = dimm->size;
- mmio->base_offset = 0;
-
- dimm->mmio = mmio;
-
- return 0;
-}
-
static struct nfit_test_resource *ndtest_resource_lookup(resource_size_t addr)
{
int i;
@@ -523,17 +412,16 @@ static int ndtest_create_region(struct ndtest_priv *p,
struct ndtest_region *region)
{
struct nd_mapping_desc mappings[NDTEST_MAX_MAPPING];
- struct nd_blk_region_desc ndbr_desc;
+ struct nd_region_desc *ndr_desc, _ndr_desc;
struct nd_interleave_set *nd_set;
- struct nd_region_desc *ndr_desc;
struct resource res;
int i, ndimm = region->mapping[0].dimm;
u64 uuid[2];
memset(&res, 0, sizeof(res));
memset(&mappings, 0, sizeof(mappings));
- memset(&ndbr_desc, 0, sizeof(ndbr_desc));
- ndr_desc = &ndbr_desc.ndr_desc;
+ memset(&_ndr_desc, 0, sizeof(_ndr_desc));
+ ndr_desc = &_ndr_desc;
if (!ndtest_alloc_resource(p, region->size, &res.start))
return -ENOMEM;
@@ -558,21 +446,6 @@ static int ndtest_create_region(struct ndtest_priv *p,
nd_set->altcookie = nd_set->cookie1;
ndr_desc->nd_set = nd_set;
- if (region->type == ND_DEVICE_NAMESPACE_BLK) {
- mappings[0].start = 0;
- mappings[0].size = DIMM_SIZE;
- mappings[0].nvdimm = p->config->dimms[ndimm].nvdimm;
-
- ndr_desc->mapping = &mappings[0];
- ndr_desc->num_mappings = 1;
- ndr_desc->num_lanes = 1;
- ndbr_desc.enable = ndtest_blk_region_enable;
- ndbr_desc.do_io = ndtest_blk_do_io;
- region->region = nvdimm_blk_region_create(p->bus, ndr_desc);
-
- goto done;
- }
-
for (i = 0; i < region->num_mappings; i++) {
ndimm = region->mapping[i].dimm;
mappings[i].start = region->mapping[i].start;
@@ -584,7 +457,6 @@ static int ndtest_create_region(struct ndtest_priv *p,
ndr_desc->num_mappings = region->num_mappings;
region->region = nvdimm_pmem_region_create(p->bus, ndr_desc);
-done:
if (!region->region) {
dev_err(&p->pdev.dev, "Error registering region %pR\n",
ndr_desc->res);
@@ -857,10 +729,8 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
struct device *dev = &priv->pdev.dev;
unsigned long dimm_flags = dimm->flags;
- if (dimm->num_formats > 1) {
- set_bit(NDD_ALIASING, &dimm_flags);
+ if (dimm->num_formats > 1)
set_bit(NDD_LABELING, &dimm_flags);
- }
if (dimm->flags & PAPR_PMEM_UNARMED_MASK)
set_bit(NDD_UNARMED, &dimm_flags);
@@ -873,7 +743,7 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
return -ENXIO;
}
- dimm->dev = device_create_with_groups(ndtest_dimm_class,
+ dimm->dev = device_create_with_groups(&ndtest_dimm_class,
&priv->pdev.dev,
0, dimm, dimm_attribute_groups,
"test_dimm%d", id);
@@ -962,12 +832,11 @@ static int ndtest_bus_register(struct ndtest_priv *p)
return 0;
}
-static int ndtest_remove(struct platform_device *pdev)
+static void ndtest_remove(struct platform_device *pdev)
{
struct ndtest_priv *p = to_ndtest_priv(&pdev->dev);
nvdimm_bus_unregister(p->bus);
- return 0;
}
static int ndtest_probe(struct platform_device *pdev)
@@ -981,11 +850,22 @@ static int ndtest_probe(struct platform_device *pdev)
p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->label_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
-
+ if (!p->dimm_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
@@ -1042,8 +922,7 @@ static void cleanup_devices(void)
gen_pool_destroy(ndtest_pool);
- if (ndtest_dimm_class)
- class_destroy(ndtest_dimm_class);
+ class_unregister(&ndtest_dimm_class);
}
static __init int ndtest_init(void)
@@ -1054,18 +933,12 @@ static __init int ndtest_init(void)
libnvdimm_test();
device_dax_test();
dax_pmem_test();
- dax_pmem_core_test();
-#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
- dax_pmem_compat_test();
-#endif
nfit_test_setup(ndtest_resource_lookup, NULL);
- ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm");
- if (IS_ERR(ndtest_dimm_class)) {
- rc = PTR_ERR(ndtest_dimm_class);
+ rc = class_register(&ndtest_dimm_class);
+ if (rc)
goto err_register;
- }
ndtest_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
if (!ndtest_pool) {
@@ -1125,5 +998,6 @@ static __exit void ndtest_exit(void)
module_init(ndtest_init);
module_exit(ndtest_exit);
+MODULE_DESCRIPTION("Test non-NFIT devices");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("IBM Corporation");
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..8f27ad6f7319 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -5,37 +5,6 @@
#include <linux/platform_device.h>
#include <linux/libnvdimm.h>
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10))
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED)
-
struct ndtest_config;
struct ndtest_priv {
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 9b185bf82da8..f87e9f251d13 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,8 +23,6 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/mce.h>
-
/*
* Generate an NFIT table to describe the following topology:
*
@@ -434,7 +432,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
fw->missed_activate = false;
- /* fall through */
+ fallthrough;
case FW_STATE_UPDATED:
nd_cmd->status = 0;
/* bogus test version */
@@ -672,6 +670,7 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
.addr = spa->spa,
.region = NULL,
};
+ struct nfit_mem *nfit_mem;
u64 dpa;
ret = device_for_each_child(&bus->dev, &ctx,
@@ -689,8 +688,12 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
*/
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem)
+ return -EINVAL;
- spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->devices[0].nfit_device_handle =
+ __to_nfit_memdev(nfit_mem)->device_handle;
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
@@ -1714,7 +1717,9 @@ static void put_dimms(void *data)
device_unregister(t->dimm_dev[i]);
}
-static struct class *nfit_test_dimm;
+static const struct class nfit_test_dimm = {
+ .name = "nfit_test_dimm",
+};
static int dimm_name_to_id(struct device *dev)
{
@@ -1832,7 +1837,7 @@ static int nfit_test_dimm_init(struct nfit_test *t)
if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
return -ENOMEM;
for (i = 0; i < t->num_dcr; i++) {
- t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ t->dimm_dev[i] = device_create_with_groups(&nfit_test_dimm,
&t->pdev.dev, 0, NULL,
nfit_test_dimm_attribute_groups,
"test_dimm%d", i + t->dcr_idx);
@@ -1842,7 +1847,7 @@ static int nfit_test_dimm_init(struct nfit_test *t)
return 0;
}
-static void security_init(struct nfit_test *t)
+static void nfit_security_init(struct nfit_test *t)
{
int i;
@@ -1871,16 +1876,23 @@ static void smart_init(struct nfit_test *t)
}
}
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+ /* until spa location cookie support is added... */
+ return sizeof(*spa) - 8;
+}
+
static int nfit_test0_alloc(struct nfit_test *t)
{
- size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ struct acpi_nfit_system_address *spa = NULL;
+ struct acpi_nfit_flush_address *flush;
+ size_t nfit_size = sizeof_spa(spa) * NUM_SPA
+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ offsetof(struct acpi_nfit_control_region,
window_size) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
- + (sizeof(struct acpi_nfit_flush_address)
- + sizeof(u64) * NUM_HINTS) * NUM_DCR
+ + struct_size(flush, hint_address, NUM_HINTS) * NUM_DCR
+ sizeof(struct acpi_nfit_capabilities);
int i;
@@ -1931,13 +1943,14 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (nfit_test_dimm_init(t))
return -ENOMEM;
smart_init(t);
- security_init(t);
+ nfit_security_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
static int nfit_test1_alloc(struct nfit_test *t)
{
- size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ struct acpi_nfit_system_address *spa = NULL;
+ size_t nfit_size = sizeof_spa(spa) * 2
+ sizeof(struct acpi_nfit_memory_map) * 2
+ offsetof(struct acpi_nfit_control_region, window_size) * 2;
int i;
@@ -2000,7 +2013,7 @@ static void nfit_test0_setup(struct nfit_test *t)
*/
spa = nfit_buf;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 0+1;
spa->address = t->spa_set_dma[0];
@@ -2014,7 +2027,7 @@ static void nfit_test0_setup(struct nfit_test *t)
*/
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 1+1;
spa->address = t->spa_set_dma[1];
@@ -2024,7 +2037,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa2 (dcr0) dimm0 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 2+1;
spa->address = t->dcr_dma[0];
@@ -2034,7 +2047,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa3 (dcr1) dimm1 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 3+1;
spa->address = t->dcr_dma[1];
@@ -2044,7 +2057,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa4 (dcr2) dimm2 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 4+1;
spa->address = t->dcr_dma[2];
@@ -2054,7 +2067,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa5 (dcr3) dimm3 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 5+1;
spa->address = t->dcr_dma[3];
@@ -2064,7 +2077,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa6 (bdw for dcr0) dimm0 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 6+1;
spa->address = t->dimm_dma[0];
@@ -2074,7 +2087,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa7 (bdw for dcr1) dimm1 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 7+1;
spa->address = t->dimm_dma[1];
@@ -2084,7 +2097,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa8 (bdw for dcr2) dimm2 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 8+1;
spa->address = t->dimm_dma[2];
@@ -2094,7 +2107,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa9 (bdw for dcr3) dimm3 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 9+1;
spa->address = t->dimm_dma[3];
@@ -2581,7 +2594,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa10 (dcr4) dimm4 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 10+1;
spa->address = t->dcr_dma[4];
@@ -2595,7 +2608,7 @@ static void nfit_test0_setup(struct nfit_test *t)
*/
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 11+1;
spa->address = t->spa_set_dma[2];
@@ -2605,7 +2618,7 @@ static void nfit_test0_setup(struct nfit_test *t)
/* spa12 (bdw for dcr4) dimm4 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 12+1;
spa->address = t->dimm_dma[4];
@@ -2739,7 +2752,7 @@ static void nfit_test1_setup(struct nfit_test *t)
/* spa0 (flat range with no bdw aliasing) */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 0+1;
spa->address = t->spa_set_dma[0];
@@ -2749,7 +2762,7 @@ static void nfit_test1_setup(struct nfit_test *t)
/* virtual cd region */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
- spa->header.length = sizeof(*spa);
+ spa->header.length = sizeof_spa(spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
spa->range_index = 0;
spa->address = t->spa_set_dma[1];
@@ -2834,28 +2847,6 @@ static void nfit_test1_setup(struct nfit_test *t)
set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
}
-static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
- void *iobuf, u64 len, int rw)
-{
- struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
- struct nd_region *nd_region = &ndbr->nd_region;
- unsigned int lane;
-
- lane = nd_region_acquire_lane(nd_region);
- if (rw)
- memcpy(mmio->addr.base + dpa, iobuf, len);
- else {
- memcpy(iobuf, mmio->addr.base + dpa, len);
-
- /* give us some some coverage of the arch_invalidate_pmem() API */
- arch_invalidate_pmem(mmio->addr.base + dpa, len);
- }
- nd_region_release_lane(nd_region, lane);
-
- return 0;
-}
-
static unsigned long nfit_ctl_handle;
union acpi_object *result;
@@ -3211,7 +3202,6 @@ static int nfit_test_probe(struct platform_device *pdev)
nfit_test->setup(nfit_test);
acpi_desc = &nfit_test->acpi_desc;
acpi_nfit_desc_init(acpi_desc, &pdev->dev);
- acpi_desc->blk_do_io = nfit_test_blk_do_io;
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = NULL;
nd_desc->module = THIS_MODULE;
@@ -3257,11 +3247,6 @@ static int nfit_test_probe(struct platform_device *pdev)
return 0;
}
-static int nfit_test_remove(struct platform_device *pdev)
-{
- return 0;
-}
-
static void nfit_test_release(struct device *dev)
{
struct nfit_test *nfit_test = to_nfit_test(dev);
@@ -3276,7 +3261,6 @@ static const struct platform_device_id nfit_test_id[] = {
static struct platform_driver nfit_test_driver = {
.probe = nfit_test_probe,
- .remove = nfit_test_remove,
.driver = {
.name = KBUILD_MODNAME,
},
@@ -3292,10 +3276,6 @@ static __init int nfit_test_init(void)
acpi_nfit_test();
device_dax_test();
dax_pmem_test();
- dax_pmem_core_test();
-#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
- dax_pmem_compat_test();
-#endif
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
@@ -3303,11 +3283,9 @@ static __init int nfit_test_init(void)
if (!nfit_wq)
return -ENOMEM;
- nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
- if (IS_ERR(nfit_test_dimm)) {
- rc = PTR_ERR(nfit_test_dimm);
+ rc = class_register(&nfit_test_dimm);
+ if (rc)
goto err_register;
- }
nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
if (!nfit_pool) {
@@ -3394,7 +3372,6 @@ static __exit void nfit_test_exit(void)
{
int i;
- flush_workqueue(nfit_wq);
destroy_workqueue(nfit_wq);
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
@@ -3405,10 +3382,11 @@ static __exit void nfit_test_exit(void)
for (i = 0; i < NUM_NFITS; i++)
put_device(&instances[i]->pdev.dev);
- class_destroy(nfit_test_dimm);
+ class_unregister(&nfit_test_dimm);
}
module_init(nfit_test_init);
module_exit(nfit_test_exit);
+MODULE_DESCRIPTION("Test ACPI NFIT devices");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index b5f7a996c4d0..b9047fb8ea4a 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -207,7 +207,35 @@ typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
const guid_t *guid, u64 rev, u64 func,
union acpi_object *argv4);
+void __iomem *__wrap_devm_ioremap(struct device *dev,
+ resource_size_t offset, unsigned long size);
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags);
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+void *__wrap_memremap(resource_size_t offset, size_t size,
+ unsigned long flags);
+void __wrap_devm_memunmap(struct device *dev, void *addr);
+void __iomem *__wrap_ioremap(resource_size_t offset, unsigned long size);
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size);
void __wrap_iounmap(volatile void __iomem *addr);
+void __wrap_memunmap(void *addr);
+struct resource *__wrap___request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n, const char *name,
+ int flags);
+int __wrap_insert_resource(struct resource *parent, struct resource *res);
+int __wrap_remove_resource(struct resource *res);
+struct resource *__wrap___devm_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name);
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n);
+void __wrap___devm_release_region(struct device *dev, struct resource *parent,
+ resource_size_t start, resource_size_t n);
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf);
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+ u64 rev, u64 func, union acpi_object *argv4);
+
void nfit_test_setup(nfit_test_lookup_fn lookup,
nfit_test_evaluate_dsm_fn evaluate);
void nfit_test_teardown(void);
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d971516401e6..ce167a761981 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+generated/autoconf.h
+generated/bit-length.h
generated/map-shift.h
idr.c
idr-test
@@ -6,3 +8,5 @@ main
multiorder
radix-tree.c
xarray
+maple
+ma_xa_benchmark
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index aa6abfe0749c..b2a6660bbd92 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,57 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
- -fsanitize=undefined
-LDFLAGS += -fsanitize=address -fsanitize=undefined
-LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder xarray
-CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
-OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
- regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
- iteration_check_2.o benchmark.o
-
-ifndef SHIFT
- SHIFT=3
-endif
-
-ifeq ($(BUILD), 32)
- CFLAGS += -m32
- LDFLAGS += -m32
-endif
-
-targets: generated/map-shift.h $(TARGETS)
+.PHONY: clean
+
+TARGETS = main idr-test multiorder xarray maple
+CORE_OFILES = $(SHARED_OFILES) xarray.o maple.o test.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o \
+ regression3.o regression4.o tag_check.o multiorder.o idr-test.o \
+ iteration_check.o iteration_check_2.o benchmark.o
+
+targets: generated/map-shift.h generated/bit-length.h $(TARGETS)
+
+include ../shared/shared.mk
main: $(OFILES)
+xarray.o: ../../../lib/test_xarray.c
idr-test.o: ../../../lib/test_ida.c
idr-test: idr-test.o $(CORE_OFILES)
-xarray: $(CORE_OFILES)
+xarray: $(CORE_OFILES) xarray.o
+
+maple: $(CORE_OFILES) maple.o
multiorder: multiorder.o $(CORE_OFILES)
clean:
- $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
-
-vpath %.c ../../lib
-
-$(OFILES): Makefile *.h */*.h generated/map-shift.h \
- ../../include/linux/*.h \
- ../../include/asm/*.h \
- ../../../include/linux/xarray.h \
- ../../../include/linux/radix-tree.h \
- ../../../include/linux/idr.h
-
-radix-tree.c: ../../../lib/radix-tree.c
- sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
-
-idr.c: ../../../lib/idr.c
- sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
-
-xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/*
-generated/map-shift.h:
- @if ! grep -qws $(SHIFT) generated/map-shift.h; then \
- echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
- generated/map-shift.h; \
- fi
+$(OFILES): $(SHARED_DEPS) *.h
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
deleted file mode 100644
index 66ec4a24a203..000000000000
--- a/tools/testing/radix-tree/bitmap.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* lib/bitmap.c pulls in at least two other files. */
-
-#include <linux/bitmap.h>
-
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
-{
- unsigned long *p = map + BIT_WORD(start);
- const unsigned int size = start + len;
- int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
- unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
-
- while (len - bits_to_clear >= 0) {
- *p &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_LONG;
- mask_to_clear = ~0UL;
- p++;
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
- *p &= ~mask_to_clear;
- }
-}
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 3b796dd5e577..2f830ff8396c 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -296,21 +296,34 @@ static void *idr_throbber(void *arg)
return NULL;
}
+/*
+ * There are always either 1 or 2 objects in the IDR. If we find nothing,
+ * or we find something at an ID we didn't expect, that's a bug.
+ */
void idr_find_test_1(int anchor_id, int throbber_id)
{
pthread_t throbber;
time_t start = time(NULL);
- pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
-
BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
anchor_id + 1, GFP_KERNEL) != anchor_id);
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ rcu_read_lock();
do {
int id = 0;
void *entry = idr_get_next(&find_idr, &id);
- BUG_ON(entry != xa_mk_value(id));
+ rcu_read_unlock();
+ if ((id != anchor_id && id != throbber_id) ||
+ entry != xa_mk_value(id)) {
+ printf("%s(%d, %d): %p at %d\n", __func__, anchor_id,
+ throbber_id, entry, id);
+ abort();
+ }
+ rcu_read_lock();
} while (time(NULL) < start + 11);
+ rcu_read_unlock();
pthread_join(throbber, NULL);
@@ -411,6 +424,7 @@ void idr_checks(void)
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
+#define MODULE_DESCRIPTION(X)
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
void ida_dump(struct ida *);
@@ -485,19 +499,17 @@ void ida_check_random(void)
goto repeat;
}
-void ida_simple_get_remove_test(void)
+void ida_alloc_free_test(void)
{
DEFINE_IDA(ida);
unsigned long i;
- for (i = 0; i < 10000; i++) {
- assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
- }
- assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+ for (i = 0; i < 10000; i++)
+ assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i);
+ assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0);
- for (i = 0; i < 10000; i++) {
- ida_simple_remove(&ida, i);
- }
+ for (i = 0; i < 10000; i++)
+ ida_free(&ida, i);
assert(ida_is_empty(&ida));
ida_destroy(&ida);
@@ -510,7 +522,7 @@ void user_ida_checks(void)
ida_check_nomem();
ida_check_conv_user();
ida_check_random();
- ida_simple_get_remove_test();
+ ida_alloc_free_test();
radix_tree_cpu_dead(1);
}
@@ -577,6 +589,7 @@ void ida_tests(void)
int __weak main(void)
{
+ rcu_register_thread();
radix_tree_init();
idr_checks();
ida_tests();
@@ -584,5 +597,6 @@ int __weak main(void)
rcu_barrier();
if (nr_allocated)
printf("nr_allocated = %d\n", nr_allocated);
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
deleted file mode 100644
index 2d9c59df60de..000000000000
--- a/tools/testing/radix-tree/linux.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdlib.h>
-#include <string.h>
-#include <malloc.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include <linux/gfp.h>
-#include <linux/poison.h>
-#include <linux/slab.h>
-#include <linux/radix-tree.h>
-#include <urcu/uatomic.h>
-
-int nr_allocated;
-int preempt_count;
-int kmalloc_verbose;
-int test_verbose;
-
-struct kmem_cache {
- pthread_mutex_t lock;
- unsigned int size;
- unsigned int align;
- int nr_objs;
- void *objs;
- void (*ctor)(void *);
-};
-
-void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
-{
- void *p;
-
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- return NULL;
-
- pthread_mutex_lock(&cachep->lock);
- if (cachep->nr_objs) {
- struct radix_tree_node *node = cachep->objs;
- cachep->nr_objs--;
- cachep->objs = node->parent;
- pthread_mutex_unlock(&cachep->lock);
- node->parent = NULL;
- p = node;
- } else {
- pthread_mutex_unlock(&cachep->lock);
- if (cachep->align)
- posix_memalign(&p, cachep->align, cachep->size);
- else
- p = malloc(cachep->size);
- if (cachep->ctor)
- cachep->ctor(p);
- else if (gfp & __GFP_ZERO)
- memset(p, 0, cachep->size);
- }
-
- uatomic_inc(&nr_allocated);
- if (kmalloc_verbose)
- printf("Allocating %p from slab\n", p);
- return p;
-}
-
-void kmem_cache_free(struct kmem_cache *cachep, void *objp)
-{
- assert(objp);
- uatomic_dec(&nr_allocated);
- if (kmalloc_verbose)
- printf("Freeing %p to slab\n", objp);
- pthread_mutex_lock(&cachep->lock);
- if (cachep->nr_objs > 10 || cachep->align) {
- memset(objp, POISON_FREE, cachep->size);
- free(objp);
- } else {
- struct radix_tree_node *node = objp;
- cachep->nr_objs++;
- node->parent = cachep->objs;
- cachep->objs = node;
- }
- pthread_mutex_unlock(&cachep->lock);
-}
-
-void *kmalloc(size_t size, gfp_t gfp)
-{
- void *ret;
-
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- return NULL;
-
- ret = malloc(size);
- uatomic_inc(&nr_allocated);
- if (kmalloc_verbose)
- printf("Allocating %p from malloc\n", ret);
- if (gfp & __GFP_ZERO)
- memset(ret, 0, size);
- return ret;
-}
-
-void kfree(void *p)
-{
- if (!p)
- return;
- uatomic_dec(&nr_allocated);
- if (kmalloc_verbose)
- printf("Freeing %p to malloc\n", p);
- free(p);
-}
-
-struct kmem_cache *
-kmem_cache_create(const char *name, unsigned int size, unsigned int align,
- unsigned int flags, void (*ctor)(void *))
-{
- struct kmem_cache *ret = malloc(sizeof(*ret));
-
- pthread_mutex_init(&ret->lock, NULL);
- ret->size = size;
- ret->align = align;
- ret->nr_objs = 0;
- ret->objs = NULL;
- ret->ctor = ctor;
- return ret;
-}
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
deleted file mode 100644
index 32159c08a52e..000000000000
--- a/tools/testing/radix-tree/linux/gfp.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _GFP_H
-#define _GFP_H
-
-#include <linux/types.h>
-
-#define __GFP_BITS_SHIFT 26
-#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
-
-#define __GFP_HIGH 0x20u
-#define __GFP_IO 0x40u
-#define __GFP_FS 0x80u
-#define __GFP_NOWARN 0x200u
-#define __GFP_ZERO 0x8000u
-#define __GFP_ATOMIC 0x80000u
-#define __GFP_ACCOUNT 0x100000u
-#define __GFP_DIRECT_RECLAIM 0x400000u
-#define __GFP_KSWAPD_RECLAIM 0x2000000u
-
-#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
-
-#define GFP_ZONEMASK 0x0fu
-#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
-#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
-#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
-
-
-static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
-{
- return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
-}
-
-#endif
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
deleted file mode 100644
index 4e342f2e37cf..000000000000
--- a/tools/testing/radix-tree/linux/idr.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
deleted file mode 100644
index 1bb0afc21309..000000000000
--- a/tools/testing/radix-tree/linux/init.h
+++ /dev/null
@@ -1 +0,0 @@
-#define __init
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
deleted file mode 100644
index 2958830ce4d7..000000000000
--- a/tools/testing/radix-tree/linux/slab.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef SLAB_H
-#define SLAB_H
-
-#include <linux/types.h>
-#include <linux/gfp.h>
-
-#define SLAB_HWCACHE_ALIGN 1
-#define SLAB_PANIC 2
-#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
-
-void *kmalloc(size_t size, gfp_t);
-void kfree(void *);
-
-static inline void *kzalloc(size_t size, gfp_t gfp)
-{
- return kmalloc(size, gfp | __GFP_ZERO);
-}
-
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
-void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
- unsigned int align, unsigned int flags,
- void (*ctor)(void *));
-
-#endif /* SLAB_H */
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
new file mode 100644
index 000000000000..5c1b18e3ed21
--- /dev/null
+++ b/tools/testing/radix-tree/maple.c
@@ -0,0 +1,36227 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * maple_tree.c: Userspace testing for maple tree test-suite
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Author: Liam R. Howlett <Liam.Howlett@Oracle.com>
+ *
+ * Any tests that require internal knowledge of the tree or threads and other
+ * difficult to handle in kernel tests.
+ */
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_DESCRIPTION(X)
+#define MODULE_LICENSE(x)
+#define dump_stack() assert(0)
+
+#include "test.h"
+
+#include "../shared/maple-shim.c"
+#include "../../../lib/test_maple_tree.c"
+
+#define RCU_RANGE_COUNT 1000
+#define RCU_MT_BUG_ON(test, y) {if (y) { test->stop = true; } MT_BUG_ON(test->mt, y); }
+
+struct rcu_test_struct2 {
+ struct maple_tree *mt;
+
+ bool start;
+ bool stop;
+ unsigned int thread_count;
+
+ unsigned int seen_toggle;
+ unsigned int seen_added;
+ unsigned int seen_modified;
+ unsigned int seen_deleted;
+ int pause;
+
+ unsigned long index[RCU_RANGE_COUNT];
+ unsigned long last[RCU_RANGE_COUNT];
+};
+
+struct rcu_test_struct3 {
+ struct maple_tree *mt;
+ unsigned long index;
+ unsigned long last;
+ bool stop;
+};
+
+struct rcu_reader_struct {
+ unsigned int id;
+ int mod;
+ int del;
+ int flip;
+ int add;
+ int next;
+ struct rcu_test_struct2 *test;
+};
+
+/*
+ * Check erasing including RCU.
+ */
+static noinline void __init check_erase(struct maple_tree *mt, unsigned long index,
+ void *ptr)
+{
+ MT_BUG_ON(mt, mtree_test_erase(mt, index) != ptr);
+}
+
+#define erase_check_load(mt, i) check_load(mt, set[i], entry[i%2])
+#define erase_check_insert(mt, i) check_insert(mt, set[i], entry[i%2])
+#define erase_check_erase(mt, i) check_erase(mt, set[i], entry[i%2])
+
+static noinline void __init check_erase_testset(struct maple_tree *mt)
+{
+ static const unsigned long set[] = { 5015, 5014, 5017, 25, 1000,
+ 1001, 1002, 1003, 1005, 0,
+ 6003, 6002, 6008, 6012, 6015,
+ 7003, 7002, 7008, 7012, 7015,
+ 8003, 8002, 8008, 8012, 8015,
+ 9003, 9002, 9008, 9012, 9015,
+ 10003, 10002, 10008, 10012, 10015,
+ 11003, 11002, 11008, 11012, 11015,
+ 12003, 12002, 12008, 12012, 12015,
+ 13003, 13002, 13008, 13012, 13015,
+ 14003, 14002, 14008, 14012, 14015,
+ 15003, 15002, 15008, 15012, 15015,
+ };
+
+
+ void *ptr = &check_erase_testset;
+ void *entry[2] = { ptr, mt };
+ void *root_node;
+
+
+ rcu_register_thread();
+ mt_set_in_rcu(mt);
+ for (int i = 0; i < 4; i++)
+ erase_check_insert(mt, i);
+ for (int i = 0; i < 4; i++)
+ erase_check_load(mt, i);
+
+ mt_set_non_kernel(2);
+ erase_check_erase(mt, 1);
+ erase_check_load(mt, 0);
+ check_load(mt, set[1], NULL);
+ for (int i = 2; i < 4; i++)
+ erase_check_load(mt, i);
+
+
+ erase_check_erase(mt, 2);
+ erase_check_load(mt, 0);
+ check_load(mt, set[1], NULL);
+ check_load(mt, set[2], NULL);
+
+ erase_check_insert(mt, 1);
+ erase_check_insert(mt, 2);
+
+ for (int i = 0; i < 4; i++)
+ erase_check_load(mt, i);
+
+ /* Check erase and load without an allocation. */
+ erase_check_load(mt, 3);
+ erase_check_erase(mt, 1);
+ erase_check_load(mt, 0);
+ check_load(mt, set[1], NULL);
+ for (int i = 2; i < 4; i++)
+ erase_check_load(mt, i);
+
+ /*
+ * Set the newly erased node. This will produce a different allocated
+ * node to avoid busy slots.
+ */
+ root_node = mt->ma_root;
+ erase_check_insert(mt, 1);
+
+ erase_check_load(mt, 0);
+ check_load(mt, 5016, NULL);
+ erase_check_load(mt, 1);
+ check_load(mt, 5013, NULL);
+ erase_check_load(mt, 2);
+ check_load(mt, 5018, NULL);
+ erase_check_load(mt, 3);
+
+ erase_check_erase(mt, 2); /* erase 5017 to check append */
+ erase_check_load(mt, 0);
+ check_load(mt, 5016, NULL);
+ erase_check_load(mt, 1);
+ check_load(mt, 5013, NULL);
+ check_load(mt, set[2], NULL);
+ check_load(mt, 5018, NULL);
+
+ erase_check_load(mt, 3);
+
+ root_node = mt->ma_root;
+ erase_check_insert(mt, 2);
+
+ erase_check_load(mt, 0);
+ check_load(mt, 5016, NULL);
+ erase_check_load(mt, 1);
+ check_load(mt, 5013, NULL);
+ erase_check_load(mt, 2);
+ check_load(mt, 5018, NULL);
+ erase_check_load(mt, 3);
+
+ mt_set_non_kernel(1);
+ erase_check_erase(mt, 2); /* erase 5017 to check append */
+ erase_check_load(mt, 0);
+ check_load(mt, 5016, NULL);
+ check_load(mt, set[2], NULL);
+ erase_check_erase(mt, 0); /* erase 5015 to check append */
+ check_load(mt, set[0], NULL);
+ check_load(mt, 5016, NULL);
+ erase_check_insert(mt, 4); /* 1000 < Should not split. */
+ check_load(mt, set[0], NULL);
+ check_load(mt, 5016, NULL);
+ erase_check_load(mt, 1);
+ check_load(mt, 5013, NULL);
+ check_load(mt, set[2], NULL);
+ check_load(mt, 5018, NULL);
+ erase_check_load(mt, 4);
+ check_load(mt, 999, NULL);
+ check_load(mt, 1001, NULL);
+ erase_check_load(mt, 4);
+ if (mt_in_rcu(mt))
+ MT_BUG_ON(mt, root_node == mt->ma_root);
+ else
+ MT_BUG_ON(mt, root_node != mt->ma_root);
+
+ /* Should not have split. */
+ MT_BUG_ON(mt, !mte_is_leaf(mt->ma_root));
+
+
+ /* Coalesce testing */
+ erase_check_insert(mt, 0);
+ erase_check_insert(mt, 2);
+
+ for (int i = 5; i < 25; i++) {
+ erase_check_insert(mt, i);
+ for (int j = i; j >= 0; j--)
+ erase_check_load(mt, j);
+ }
+
+ erase_check_erase(mt, 14); /*6015 */
+ for (int i = 0; i < 25; i++) {
+ if (i == 14)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+ erase_check_erase(mt, 16); /*7002 */
+ for (int i = 0; i < 25; i++) {
+ if (i == 16 || i == 14)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+
+ mt_set_non_kernel(1);
+ erase_check_erase(mt, 13); /*6012 */
+ for (int i = 0; i < 25; i++) {
+ if (i == 16 || i == 14 || i == 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ erase_check_erase(mt, 15); /*7003 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 16 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ mt_set_non_kernel(2);
+ erase_check_erase(mt, 17); /*7008 *should* cause coalesce. */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 17 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ erase_check_erase(mt, 18); /*7012 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 18 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ mt_set_non_kernel(2);
+ erase_check_erase(mt, 19); /*7015 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 19 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ erase_check_erase(mt, 20); /*8003 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 20 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ erase_check_erase(mt, 21); /*8002 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 21 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ mt_set_non_kernel(2);
+ erase_check_erase(mt, 22); /*8008 */
+ for (int i = 0; i < 25; i++) {
+ if (i <= 22 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+ for (int i = 23; i < 25; i++)
+ erase_check_erase(mt, i);
+
+ for (int i = 0; i < 25; i++) {
+ if (i <= 25 && i >= 13)
+ check_load(mt, set[i], NULL);
+ else
+ erase_check_load(mt, i);
+ }
+
+ /* Shrinking tree test. */
+
+ for (int i = 13; i < ARRAY_SIZE(set); i++)
+ erase_check_insert(mt, i);
+
+ mt_set_non_kernel(99);
+ for (int i = 18; i < ARRAY_SIZE(set); i++) {
+ erase_check_erase(mt, i);
+ for (int j = 0; j < ARRAY_SIZE(set); j++) {
+ if (j < 18 || j > i)
+ erase_check_load(mt, j);
+ else
+ check_load(mt, set[j], NULL);
+ }
+ }
+ mt_set_non_kernel(35);
+ for (int i = 0; i < 18; i++) {
+ erase_check_erase(mt, i);
+ for (int j = 0; j < ARRAY_SIZE(set); j++) {
+ if (j < 18 && j > i)
+ erase_check_load(mt, j);
+ else
+ check_load(mt, set[j], NULL);
+ }
+ }
+ erase_check_insert(mt, 8);
+ erase_check_insert(mt, 9);
+ erase_check_erase(mt, 8);
+ rcu_unregister_thread();
+}
+
+/* End of erase testing */
+
+/* VM Generated Crashes - uses its own tree walk for verification */
+#define erase_check_store_range(mt, a, i, ptr) mtree_test_store_range(mt, \
+ a[(i)], a[(i + 1)], ptr)
+#define STORE 1
+#define SNULL 2
+#define ERASE 3
+#define ec_type_str(x) \
+ (((x) == STORE) ? \
+ "STORE" : \
+ (((x) == SNULL) ? \
+ "SNULL" : "ERASE") \
+ )
+#define check_erase2_debug 0
+
+/* Calculate the overwritten entries. */
+int mas_ce2_over_count(struct ma_state *mas_start, struct ma_state *mas_end,
+ void *s_entry, unsigned long s_min,
+ void *e_entry, unsigned long e_max,
+ const unsigned long *set, int i, bool null_entry)
+{
+ int count = 0, span = 0;
+ unsigned long retry = 0;
+ void *entry;
+ struct ma_state tmp;
+
+
+ /* count slots */
+ memcpy(&tmp, mas_start, sizeof(tmp));
+ entry = mas_next(&tmp, mas_end->last);
+ while (entry) {
+ BUG_ON(retry > 50); /* stop infinite retry on testing. */
+ if (xa_is_zero(s_entry)) {
+ retry++;
+ continue;
+ }
+ count++;
+ span++;
+ entry = mas_next(&tmp, mas_end->last);
+ }
+
+ if (null_entry) {
+ /* Check splitting end. */
+ if (e_entry && (e_max > mas_end->last))
+ count--;
+
+ /* check overwrite of entire start */
+ if (s_entry && (s_min == mas_start->index))
+ count++;
+ } else { /* !null_entry (store) */
+ bool esplit = e_max > mas_end->last;
+ bool ssplit = s_min != mas_start->index;
+
+ if (s_entry && e_entry) {
+ if (esplit && ssplit)
+ count--;
+ else if (ssplit)
+ count--;
+ else if (esplit) {
+ if (span)
+ count--;
+ }
+ } else if (s_entry && !e_entry) {
+ if (ssplit)
+ count--;
+ } else if (!s_entry && e_entry) {
+ if (esplit)
+ count--;
+ count--;
+ } else {
+ count--;
+ }
+ }
+ return count;
+}
+
+/*
+ * mas_node_walk() - Walk a maple node to offset of the index.
+ * @mas: The maple state
+ * @type: The maple node type
+ * @*range_min: Pointer to store the minimum range of the offset
+ * @*range_max: Pointer to store the maximum range of the offset
+ *
+ * The offset will be stored in the maple state.
+ *
+ */
+static inline void mas_node_walk(struct ma_state *mas, struct maple_node *node,
+ enum maple_type type, unsigned long *range_min,
+ unsigned long *range_max)
+
+{
+ unsigned long *pivots;
+ unsigned char count;
+ unsigned long prev, max;
+ unsigned char offset;
+ unsigned long index;
+
+ if (unlikely(ma_is_dense(type))) {
+ (*range_max) = (*range_min) = mas->index;
+ if (unlikely(ma_dead_node(node)))
+ return;
+
+ mas->offset = mas->index = mas->min;
+ return;
+ }
+
+ pivots = ma_pivots(node, type);
+ max = pivots[0];
+ if (unlikely(ma_dead_node(node)))
+ return;
+
+ offset = 0;
+ prev = mas->min;
+ index = mas->index;
+ if (unlikely(index <= max))
+ goto offset_zero;
+
+ count = mt_pivots[type];
+ while (++offset < count) {
+ prev = max;
+ max = pivots[offset];
+ if (unlikely(ma_dead_node(node)))
+ return;
+
+ if (index <= max)
+ goto offset_found;
+ else if (unlikely(!max))
+ goto mas_max;
+ }
+
+ prev = max;
+mas_max:
+ max = mas->max;
+offset_found:
+ prev++;
+offset_zero:
+ mas->offset = offset;
+ if (ma_is_leaf(type)) {
+ *range_max = max;
+ *range_min = prev;
+ } else {
+ mas->max = max;
+ mas->min = prev;
+ }
+}
+
+/*
+ * mas_descend_walk(): Locates a value and sets the mas->node and slot
+ * accordingly. range_min and range_max are set to the range which the entry is
+ * valid.
+ * @mas: The maple state
+ * @*range_min: A pointer to store the minimum of the range
+ * @*range_max: A pointer to store the maximum of the range
+ *
+ * Check mas->node is still valid on return of any value.
+ *
+ * Return: true if pointing to a valid node and offset. False otherwise.
+ */
+static inline bool mas_descend_walk(struct ma_state *mas,
+ unsigned long *range_min, unsigned long *range_max)
+{
+ struct maple_enode *next;
+ struct maple_node *node;
+ enum maple_type type;
+
+ next = mas->node;
+ while (true) {
+ node = mte_to_node(next);
+ type = mte_node_type(next);
+ mas_node_walk(mas, node, type, range_min, range_max);
+ next = mas_slot(mas, ma_slots(node, type), mas->offset);
+ if (unlikely(ma_dead_node(node)))
+ return false;
+
+ if (unlikely(ma_is_leaf(type)))
+ return true;
+
+ /* Descend. */
+ mas->node = next;
+ }
+ return false;
+}
+
+/*
+ * mas_tree_walk() - Walk to @mas->index and set the range values.
+ * @mas: The maple state.
+ * @*range_min: The minimum range to be set.
+ * @*range_max: The maximum range to be set.
+ *
+ * Ranges are only valid if there is a valid entry at @mas->index.
+ *
+ * Return: True if a value exists, false otherwise.
+ */
+static inline bool mas_tree_walk(struct ma_state *mas, unsigned long *range_min,
+ unsigned long *range_max)
+{
+ bool ret;
+
+retry:
+ ret = false;
+ mas_start(mas);
+ if (mas_is_none(mas))
+ goto not_found;
+
+ if (mas_is_ptr(mas)) {
+ *range_min = *range_max = 0;
+ if (!mas->index)
+ return true;
+
+ goto not_found;
+ }
+
+ ret = mas_descend_walk(mas, range_min, range_max);
+ if (unlikely(mte_dead_node(mas->node))) {
+ mas->status = ma_start;
+ goto retry;
+ }
+
+ mas->end = mas_data_end(mas);
+ return ret;
+
+not_found:
+ mas->offset = MAPLE_NODE_SLOTS;
+ return false;
+}
+
+static inline void *mas_range_load(struct ma_state *mas,
+ unsigned long *range_min, unsigned long *range_max)
+
+{
+ void *entry = NULL;
+ unsigned long index = mas->index;
+
+ if (mas_is_none(mas) || mas_is_paused(mas))
+ mas->status = ma_start;
+retry:
+ if (mas_tree_walk(mas, range_min, range_max))
+ if (unlikely(mas->status == ma_root))
+ return mas_root(mas);
+
+ if (likely(mas->offset != MAPLE_NODE_SLOTS))
+ entry = mas_get_slot(mas, mas->offset);
+
+ if (mas_is_active(mas) && mte_dead_node(mas->node)) {
+ mas_set(mas, index);
+ goto retry;
+ }
+
+ return entry;
+}
+
+#if defined(CONFIG_64BIT)
+static noinline void __init check_erase2_testset(struct maple_tree *mt,
+ const unsigned long *set, unsigned long size)
+{
+ int entry_count = 0;
+ int check = 0;
+ void *foo;
+ unsigned long addr = 0;
+ void *s_entry = NULL, *e_entry = NULL;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ for (int i = 0; i < size; i += 3) {
+ unsigned long s_min, s_max;
+ unsigned long e_min, e_max;
+ void *value = NULL;
+
+ MA_STATE(mas_start, mt, set[i+1], set[i+1]);
+ MA_STATE(mas_end, mt, set[i+2], set[i+2]);
+ mt_set_non_kernel(127);
+#if check_erase2_debug
+ pr_err("%s: %d %s %lu - %lu\n", __func__, i,
+ ec_type_str(set[i]),
+ set[i+1], set[i+2]);
+#endif
+ s_entry = mas_range_load(&mas_start, &s_min, &s_max);
+ e_entry = mas_range_load(&mas_end, &e_min, &e_max);
+
+ switch (set[i]) {
+ case SNULL:
+ if ((s_min == set[i+1]) && (s_max == set[i+2])) {
+ if (s_entry)
+ entry_count--;
+ } else if ((s_min != set[i+1]) && (s_max != set[i+2])) {
+ entry_count++;
+ } else if ((mas_start.node != mas_end.node) ||
+ (mas_start.offset != mas_end.offset)) {
+ entry_count -=
+ mas_ce2_over_count(&mas_start, &mas_end,
+ s_entry, s_min,
+ e_entry, e_max, set, i,
+ true);
+ }
+
+
+ erase_check_store_range(mt, set, i + 1, value);
+ break;
+ case STORE:
+ value = xa_mk_value(set[i + 1]);
+ if (mas_start.offset > mt_slot_count(mas_start.node)) {
+ entry_count++; /* appending an entry. */
+ } else if ((s_min == e_min) && (s_max == e_max)) {
+ if (!entry_count)
+ entry_count++;
+
+ else if (s_entry) {
+ if (e_max > mas_end.last)
+ entry_count++;
+
+ if (s_min < mas_start.index)
+ entry_count++;
+
+ } else {
+ entry_count++;
+ }
+ } else {
+ entry_count -=
+ mas_ce2_over_count(&mas_start, &mas_end,
+ s_entry, s_min,
+ e_entry, e_max, set, i,
+ false);
+ }
+
+ erase_check_store_range(mt, set, i + 1, value);
+ break;
+ case ERASE:
+ if (!s_entry)
+ break;
+ check_erase(mt, set[i+1], xa_mk_value(set[i+1]));
+ entry_count--;
+ break;
+ }
+ mt_validate(mt);
+ if (entry_count)
+ MT_BUG_ON(mt, !mt_height(mt));
+#if check_erase2_debug > 1
+ mt_dump(mt, mt_dump_hex);
+#endif
+#if check_erase2_debug
+ pr_err("Done\n");
+#endif
+
+ check = 0;
+ addr = 0;
+ mt_for_each(mt, foo, addr, ULONG_MAX) {
+ check++;
+#if check_erase2_debug > 2
+ pr_err("mt: %lu -> %p (%d)\n", addr+1, foo, check);
+#endif
+ if (check > entry_count)
+ break;
+ }
+
+#if check_erase2_debug > 2
+ pr_err("mt_for_each %d and count %d\n", check, entry_count);
+#endif
+
+ MT_BUG_ON(mt, check != entry_count);
+
+ check = 0;
+ addr = 0;
+ mas_reset(&mas);
+ mas.index = 0;
+ rcu_read_lock();
+ mas_for_each(&mas, foo, ULONG_MAX) {
+ if (xa_is_zero(foo)) {
+ if (addr == mas.index) {
+ mt_dump(mas.tree, mt_dump_hex);
+ pr_err("retry failed %lu - %lu\n",
+ mas.index, mas.last);
+ MT_BUG_ON(mt, 1);
+ }
+ addr = mas.index;
+ continue;
+ }
+#if check_erase2_debug > 2
+ pr_err("mas: %lu -> %p\n", mas.index, foo);
+#endif
+ check++;
+ if (check > entry_count)
+ break;
+ }
+ rcu_read_unlock();
+#if check_erase2_debug > 2
+ pr_err("mas_for_each %d and count %d\n", check, entry_count);
+ mt_validate(mt);
+#endif
+
+ MT_BUG_ON(mt, check != entry_count);
+
+ MT_BUG_ON(mt, mtree_load(mas.tree, 0) != NULL);
+ }
+}
+
+
+/* These tests were pulled from KVM tree modifications which failed. */
+static noinline void __init check_erase2_sets(struct maple_tree *mt)
+{
+ void *entry;
+ unsigned long start = 0;
+ static const unsigned long set[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140721266458624, 140737488351231,
+ERASE, 140721266458624, 140737488351231,
+STORE, 140721266458624, 140721266462719,
+STORE, 94735788949504, 94735789121535,
+ERASE, 94735788949504, 94735789121535,
+STORE, 94735788949504, 94735788965887,
+STORE, 94735788965888, 94735789121535,
+ERASE, 94735788965888, 94735789121535,
+STORE, 94735788965888, 94735789068287,
+STORE, 94735789068288, 94735789109247,
+STORE, 94735789109248, 94735789121535,
+STORE, 140253902692352, 140253902864383,
+ERASE, 140253902692352, 140253902864383,
+STORE, 140253902692352, 140253902696447,
+STORE, 140253902696448, 140253902864383,
+ };
+ static const unsigned long set2[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140735933583360, 140737488351231,
+ERASE, 140735933583360, 140737488351231,
+STORE, 140735933583360, 140735933587455,
+STORE, 94811003260928, 94811003432959,
+ERASE, 94811003260928, 94811003432959,
+STORE, 94811003260928, 94811003277311,
+STORE, 94811003277312, 94811003432959,
+ERASE, 94811003277312, 94811003432959,
+STORE, 94811003277312, 94811003379711,
+STORE, 94811003379712, 94811003420671,
+STORE, 94811003420672, 94811003432959,
+STORE, 140277094653952, 140277094825983,
+ERASE, 140277094653952, 140277094825983,
+STORE, 140277094653952, 140277094658047,
+STORE, 140277094658048, 140277094825983,
+ERASE, 140277094658048, 140277094825983,
+STORE, 140277094658048, 140277094780927,
+STORE, 140277094780928, 140277094813695,
+STORE, 140277094813696, 140277094821887,
+STORE, 140277094821888, 140277094825983,
+STORE, 140735933906944, 140735933911039,
+ };
+ static const unsigned long set3[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140735790264320, 140737488351231,
+ERASE, 140735790264320, 140737488351231,
+STORE, 140735790264320, 140735790268415,
+STORE, 94016597282816, 94016597454847,
+ERASE, 94016597282816, 94016597454847,
+STORE, 94016597282816, 94016597299199,
+STORE, 94016597299200, 94016597454847,
+ERASE, 94016597299200, 94016597454847,
+STORE, 94016597299200, 94016597401599,
+STORE, 94016597401600, 94016597442559,
+STORE, 94016597442560, 94016597454847,
+STORE, 140496959283200, 140496959455231,
+ERASE, 140496959283200, 140496959455231,
+STORE, 140496959283200, 140496959287295,
+STORE, 140496959287296, 140496959455231,
+ERASE, 140496959287296, 140496959455231,
+STORE, 140496959287296, 140496959410175,
+STORE, 140496959410176, 140496959442943,
+STORE, 140496959442944, 140496959451135,
+STORE, 140496959451136, 140496959455231,
+STORE, 140735791718400, 140735791722495,
+STORE, 140735791706112, 140735791718399,
+STORE, 47135835713536, 47135835721727,
+STORE, 47135835721728, 47135835729919,
+STORE, 47135835729920, 47135835893759,
+ERASE, 47135835729920, 47135835893759,
+STORE, 47135835729920, 47135835742207,
+STORE, 47135835742208, 47135835893759,
+STORE, 47135835840512, 47135835893759,
+STORE, 47135835742208, 47135835840511,
+ERASE, 47135835742208, 47135835840511,
+STORE, 47135835742208, 47135835840511,
+STORE, 47135835885568, 47135835893759,
+STORE, 47135835840512, 47135835885567,
+ERASE, 47135835840512, 47135835885567,
+STORE, 47135835840512, 47135835893759,
+ERASE, 47135835840512, 47135835893759,
+STORE, 47135835840512, 47135835885567,
+STORE, 47135835885568, 47135835893759,
+ };
+
+ static const unsigned long set4[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140728251703296, 140737488351231,
+ERASE, 140728251703296, 140737488351231,
+STORE, 140728251703296, 140728251707391,
+STORE, 94668429205504, 94668429377535,
+ERASE, 94668429205504, 94668429377535,
+STORE, 94668429205504, 94668429221887,
+STORE, 94668429221888, 94668429377535,
+ERASE, 94668429221888, 94668429377535,
+STORE, 94668429221888, 94668429324287,
+STORE, 94668429324288, 94668429365247,
+STORE, 94668429365248, 94668429377535,
+STORE, 47646523273216, 47646523445247,
+ERASE, 47646523273216, 47646523445247,
+STORE, 47646523273216, 47646523277311,
+STORE, 47646523277312, 47646523445247,
+ERASE, 47646523277312, 47646523445247,
+STORE, 47646523277312, 47646523400191,
+ };
+
+ static const unsigned long set5[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140726874062848, 140737488351231,
+ERASE, 140726874062848, 140737488351231,
+STORE, 140726874062848, 140726874066943,
+STORE, 94248892870656, 94248893042687,
+ERASE, 94248892870656, 94248893042687,
+STORE, 94248892870656, 94248892887039,
+STORE, 94248892887040, 94248893042687,
+ERASE, 94248892887040, 94248893042687,
+STORE, 94248892887040, 94248892989439,
+STORE, 94248892989440, 94248893030399,
+STORE, 94248893030400, 94248893042687,
+STORE, 47884786266112, 47884786438143,
+ERASE, 47884786266112, 47884786438143,
+STORE, 47884786266112, 47884786270207,
+STORE, 47884786270208, 47884786438143,
+ERASE, 47884786270208, 47884786438143,
+STORE, 47884786270208, 47884786393087,
+STORE, 47884786393088, 47884786425855,
+STORE, 47884786425856, 47884786434047,
+STORE, 47884786434048, 47884786438143,
+STORE, 140726874513408, 140726874517503,
+STORE, 140726874501120, 140726874513407,
+STORE, 47884786438144, 47884786446335,
+STORE, 47884786446336, 47884786454527,
+STORE, 47884786454528, 47884786618367,
+ERASE, 47884786454528, 47884786618367,
+STORE, 47884786454528, 47884786466815,
+STORE, 47884786466816, 47884786618367,
+STORE, 47884786565120, 47884786618367,
+STORE, 47884786466816, 47884786565119,
+ERASE, 47884786466816, 47884786565119,
+STORE, 47884786466816, 47884786565119,
+STORE, 47884786610176, 47884786618367,
+STORE, 47884786565120, 47884786610175,
+ERASE, 47884786565120, 47884786610175,
+STORE, 47884786565120, 47884786618367,
+ERASE, 47884786565120, 47884786618367,
+STORE, 47884786565120, 47884786610175,
+STORE, 47884786610176, 47884786618367,
+ERASE, 47884786610176, 47884786618367,
+STORE, 47884786610176, 47884786618367,
+STORE, 47884786618368, 47884789669887,
+STORE, 47884787163136, 47884789669887,
+STORE, 47884786618368, 47884787163135,
+ERASE, 47884787163136, 47884789669887,
+STORE, 47884787163136, 47884789448703,
+STORE, 47884789448704, 47884789669887,
+STORE, 47884788858880, 47884789448703,
+STORE, 47884787163136, 47884788858879,
+ERASE, 47884787163136, 47884788858879,
+STORE, 47884787163136, 47884788858879,
+STORE, 47884789444608, 47884789448703,
+STORE, 47884788858880, 47884789444607,
+ERASE, 47884788858880, 47884789444607,
+STORE, 47884788858880, 47884789444607,
+STORE, 47884789653504, 47884789669887,
+STORE, 47884789448704, 47884789653503,
+ERASE, 47884789448704, 47884789653503,
+STORE, 47884789448704, 47884789653503,
+ERASE, 47884789653504, 47884789669887,
+STORE, 47884789653504, 47884789669887,
+STORE, 47884789669888, 47884791508991,
+STORE, 47884789809152, 47884791508991,
+STORE, 47884789669888, 47884789809151,
+ERASE, 47884789809152, 47884791508991,
+STORE, 47884789809152, 47884791468031,
+STORE, 47884791468032, 47884791508991,
+STORE, 47884791152640, 47884791468031,
+STORE, 47884789809152, 47884791152639,
+ERASE, 47884789809152, 47884791152639,
+STORE, 47884789809152, 47884791152639,
+STORE, 47884791463936, 47884791468031,
+STORE, 47884791152640, 47884791463935,
+ERASE, 47884791152640, 47884791463935,
+STORE, 47884791152640, 47884791463935,
+STORE, 47884791492608, 47884791508991,
+STORE, 47884791468032, 47884791492607,
+ERASE, 47884791468032, 47884791492607,
+STORE, 47884791468032, 47884791492607,
+ERASE, 47884791492608, 47884791508991,
+STORE, 47884791492608, 47884791508991,
+STORE, 47884791508992, 47884791644159,
+ERASE, 47884791508992, 47884791644159,
+STORE, 47884791508992, 47884791533567,
+STORE, 47884791533568, 47884791644159,
+STORE, 47884791595008, 47884791644159,
+STORE, 47884791533568, 47884791595007,
+ERASE, 47884791533568, 47884791595007,
+STORE, 47884791533568, 47884791595007,
+STORE, 47884791619584, 47884791644159,
+STORE, 47884791595008, 47884791619583,
+ERASE, 47884791595008, 47884791619583,
+STORE, 47884791595008, 47884791644159,
+ERASE, 47884791595008, 47884791644159,
+STORE, 47884791595008, 47884791619583,
+STORE, 47884791619584, 47884791644159,
+STORE, 47884791627776, 47884791644159,
+STORE, 47884791619584, 47884791627775,
+ERASE, 47884791619584, 47884791627775,
+STORE, 47884791619584, 47884791627775,
+ERASE, 47884791627776, 47884791644159,
+STORE, 47884791627776, 47884791644159,
+STORE, 47884791644160, 47884791664639,
+ERASE, 47884791644160, 47884791664639,
+STORE, 47884791644160, 47884791648255,
+STORE, 47884791648256, 47884791664639,
+STORE, 47884791652352, 47884791664639,
+STORE, 47884791648256, 47884791652351,
+ERASE, 47884791648256, 47884791652351,
+STORE, 47884791648256, 47884791652351,
+STORE, 47884791656448, 47884791664639,
+STORE, 47884791652352, 47884791656447,
+ERASE, 47884791652352, 47884791656447,
+STORE, 47884791652352, 47884791664639,
+ERASE, 47884791652352, 47884791664639,
+STORE, 47884791652352, 47884791656447,
+STORE, 47884791656448, 47884791664639,
+ERASE, 47884791656448, 47884791664639,
+STORE, 47884791656448, 47884791664639,
+STORE, 47884791664640, 47884791672831,
+ERASE, 47884791468032, 47884791492607,
+STORE, 47884791468032, 47884791484415,
+STORE, 47884791484416, 47884791492607,
+ERASE, 47884791656448, 47884791664639,
+STORE, 47884791656448, 47884791660543,
+STORE, 47884791660544, 47884791664639,
+ERASE, 47884791619584, 47884791627775,
+STORE, 47884791619584, 47884791623679,
+STORE, 47884791623680, 47884791627775,
+ };
+
+ static const unsigned long set6[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140722999021568, 140737488351231,
+ERASE, 140722999021568, 140737488351231,
+STORE, 140722999021568, 140722999025663,
+STORE, 94901500268544, 94901500440575,
+ERASE, 94901500268544, 94901500440575,
+STORE, 94901500268544, 94901500284927,
+STORE, 94901500284928, 94901500440575,
+ERASE, 94901500284928, 94901500440575,
+STORE, 94901500284928, 94901500387327,
+STORE, 94901500387328, 94901500428287,
+STORE, 94901500428288, 94901500440575,
+STORE, 47430426660864, 47430426832895,
+ERASE, 47430426660864, 47430426832895,
+STORE, 47430426660864, 47430426664959,
+STORE, 47430426664960, 47430426832895,
+ERASE, 47430426664960, 47430426832895,
+STORE, 47430426664960, 47430426787839,
+STORE, 47430426787840, 47430426820607,
+STORE, 47430426820608, 47430426828799,
+STORE, 47430426828800, 47430426832895,
+STORE, 140722999115776, 140722999119871,
+STORE, 140722999103488, 140722999115775,
+STORE, 47430426832896, 47430426841087,
+STORE, 47430426841088, 47430426849279,
+STORE, 47430426849280, 47430427013119,
+ERASE, 47430426849280, 47430427013119,
+STORE, 47430426849280, 47430426861567,
+STORE, 47430426861568, 47430427013119,
+STORE, 47430426959872, 47430427013119,
+STORE, 47430426861568, 47430426959871,
+ERASE, 47430426861568, 47430426959871,
+STORE, 47430426861568, 47430426959871,
+STORE, 47430427004928, 47430427013119,
+STORE, 47430426959872, 47430427004927,
+ERASE, 47430426959872, 47430427004927,
+STORE, 47430426959872, 47430427013119,
+ERASE, 47430426959872, 47430427013119,
+STORE, 47430426959872, 47430427004927,
+STORE, 47430427004928, 47430427013119,
+ERASE, 47430427004928, 47430427013119,
+STORE, 47430427004928, 47430427013119,
+STORE, 47430427013120, 47430430064639,
+STORE, 47430427557888, 47430430064639,
+STORE, 47430427013120, 47430427557887,
+ERASE, 47430427557888, 47430430064639,
+STORE, 47430427557888, 47430429843455,
+STORE, 47430429843456, 47430430064639,
+STORE, 47430429253632, 47430429843455,
+STORE, 47430427557888, 47430429253631,
+ERASE, 47430427557888, 47430429253631,
+STORE, 47430427557888, 47430429253631,
+STORE, 47430429839360, 47430429843455,
+STORE, 47430429253632, 47430429839359,
+ERASE, 47430429253632, 47430429839359,
+STORE, 47430429253632, 47430429839359,
+STORE, 47430430048256, 47430430064639,
+STORE, 47430429843456, 47430430048255,
+ERASE, 47430429843456, 47430430048255,
+STORE, 47430429843456, 47430430048255,
+ERASE, 47430430048256, 47430430064639,
+STORE, 47430430048256, 47430430064639,
+STORE, 47430430064640, 47430431903743,
+STORE, 47430430203904, 47430431903743,
+STORE, 47430430064640, 47430430203903,
+ERASE, 47430430203904, 47430431903743,
+STORE, 47430430203904, 47430431862783,
+STORE, 47430431862784, 47430431903743,
+STORE, 47430431547392, 47430431862783,
+STORE, 47430430203904, 47430431547391,
+ERASE, 47430430203904, 47430431547391,
+STORE, 47430430203904, 47430431547391,
+STORE, 47430431858688, 47430431862783,
+STORE, 47430431547392, 47430431858687,
+ERASE, 47430431547392, 47430431858687,
+STORE, 47430431547392, 47430431858687,
+STORE, 47430431887360, 47430431903743,
+STORE, 47430431862784, 47430431887359,
+ERASE, 47430431862784, 47430431887359,
+STORE, 47430431862784, 47430431887359,
+ERASE, 47430431887360, 47430431903743,
+STORE, 47430431887360, 47430431903743,
+STORE, 47430431903744, 47430432038911,
+ERASE, 47430431903744, 47430432038911,
+STORE, 47430431903744, 47430431928319,
+STORE, 47430431928320, 47430432038911,
+STORE, 47430431989760, 47430432038911,
+STORE, 47430431928320, 47430431989759,
+ERASE, 47430431928320, 47430431989759,
+STORE, 47430431928320, 47430431989759,
+STORE, 47430432014336, 47430432038911,
+STORE, 47430431989760, 47430432014335,
+ERASE, 47430431989760, 47430432014335,
+STORE, 47430431989760, 47430432038911,
+ERASE, 47430431989760, 47430432038911,
+STORE, 47430431989760, 47430432014335,
+STORE, 47430432014336, 47430432038911,
+STORE, 47430432022528, 47430432038911,
+STORE, 47430432014336, 47430432022527,
+ERASE, 47430432014336, 47430432022527,
+STORE, 47430432014336, 47430432022527,
+ERASE, 47430432022528, 47430432038911,
+STORE, 47430432022528, 47430432038911,
+STORE, 47430432038912, 47430432059391,
+ERASE, 47430432038912, 47430432059391,
+STORE, 47430432038912, 47430432043007,
+STORE, 47430432043008, 47430432059391,
+STORE, 47430432047104, 47430432059391,
+STORE, 47430432043008, 47430432047103,
+ERASE, 47430432043008, 47430432047103,
+STORE, 47430432043008, 47430432047103,
+STORE, 47430432051200, 47430432059391,
+STORE, 47430432047104, 47430432051199,
+ERASE, 47430432047104, 47430432051199,
+STORE, 47430432047104, 47430432059391,
+ERASE, 47430432047104, 47430432059391,
+STORE, 47430432047104, 47430432051199,
+STORE, 47430432051200, 47430432059391,
+ERASE, 47430432051200, 47430432059391,
+STORE, 47430432051200, 47430432059391,
+STORE, 47430432059392, 47430432067583,
+ERASE, 47430431862784, 47430431887359,
+STORE, 47430431862784, 47430431879167,
+STORE, 47430431879168, 47430431887359,
+ERASE, 47430432051200, 47430432059391,
+STORE, 47430432051200, 47430432055295,
+STORE, 47430432055296, 47430432059391,
+ERASE, 47430432014336, 47430432022527,
+STORE, 47430432014336, 47430432018431,
+STORE, 47430432018432, 47430432022527,
+ };
+ static const unsigned long set7[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140729808330752, 140737488351231,
+ERASE, 140729808330752, 140737488351231,
+STORE, 140729808330752, 140729808334847,
+STORE, 94629632020480, 94629632192511,
+ERASE, 94629632020480, 94629632192511,
+STORE, 94629632020480, 94629632036863,
+STORE, 94629632036864, 94629632192511,
+ERASE, 94629632036864, 94629632192511,
+STORE, 94629632036864, 94629632139263,
+STORE, 94629632139264, 94629632180223,
+STORE, 94629632180224, 94629632192511,
+STORE, 47439981776896, 47439981948927,
+ERASE, 47439981776896, 47439981948927,
+STORE, 47439981776896, 47439981780991,
+STORE, 47439981780992, 47439981948927,
+ERASE, 47439981780992, 47439981948927,
+STORE, 47439981780992, 47439981903871,
+STORE, 47439981903872, 47439981936639,
+STORE, 47439981936640, 47439981944831,
+STORE, 47439981944832, 47439981948927,
+STORE, 140729808474112, 140729808478207,
+STORE, 140729808461824, 140729808474111,
+STORE, 47439981948928, 47439981957119,
+STORE, 47439981957120, 47439981965311,
+STORE, 47439981965312, 47439982129151,
+ERASE, 47439981965312, 47439982129151,
+STORE, 47439981965312, 47439981977599,
+STORE, 47439981977600, 47439982129151,
+STORE, 47439982075904, 47439982129151,
+STORE, 47439981977600, 47439982075903,
+ERASE, 47439981977600, 47439982075903,
+STORE, 47439981977600, 47439982075903,
+STORE, 47439982120960, 47439982129151,
+STORE, 47439982075904, 47439982120959,
+ERASE, 47439982075904, 47439982120959,
+STORE, 47439982075904, 47439982129151,
+ERASE, 47439982075904, 47439982129151,
+STORE, 47439982075904, 47439982120959,
+STORE, 47439982120960, 47439982129151,
+ERASE, 47439982120960, 47439982129151,
+STORE, 47439982120960, 47439982129151,
+STORE, 47439982129152, 47439985180671,
+STORE, 47439982673920, 47439985180671,
+STORE, 47439982129152, 47439982673919,
+ERASE, 47439982673920, 47439985180671,
+STORE, 47439982673920, 47439984959487,
+STORE, 47439984959488, 47439985180671,
+STORE, 47439984369664, 47439984959487,
+STORE, 47439982673920, 47439984369663,
+ERASE, 47439982673920, 47439984369663,
+STORE, 47439982673920, 47439984369663,
+STORE, 47439984955392, 47439984959487,
+STORE, 47439984369664, 47439984955391,
+ERASE, 47439984369664, 47439984955391,
+STORE, 47439984369664, 47439984955391,
+STORE, 47439985164288, 47439985180671,
+STORE, 47439984959488, 47439985164287,
+ERASE, 47439984959488, 47439985164287,
+STORE, 47439984959488, 47439985164287,
+ERASE, 47439985164288, 47439985180671,
+STORE, 47439985164288, 47439985180671,
+STORE, 47439985180672, 47439987019775,
+STORE, 47439985319936, 47439987019775,
+STORE, 47439985180672, 47439985319935,
+ERASE, 47439985319936, 47439987019775,
+STORE, 47439985319936, 47439986978815,
+STORE, 47439986978816, 47439987019775,
+STORE, 47439986663424, 47439986978815,
+STORE, 47439985319936, 47439986663423,
+ERASE, 47439985319936, 47439986663423,
+STORE, 47439985319936, 47439986663423,
+STORE, 47439986974720, 47439986978815,
+STORE, 47439986663424, 47439986974719,
+ERASE, 47439986663424, 47439986974719,
+STORE, 47439986663424, 47439986974719,
+STORE, 47439987003392, 47439987019775,
+STORE, 47439986978816, 47439987003391,
+ERASE, 47439986978816, 47439987003391,
+STORE, 47439986978816, 47439987003391,
+ERASE, 47439987003392, 47439987019775,
+STORE, 47439987003392, 47439987019775,
+STORE, 47439987019776, 47439987154943,
+ERASE, 47439987019776, 47439987154943,
+STORE, 47439987019776, 47439987044351,
+STORE, 47439987044352, 47439987154943,
+STORE, 47439987105792, 47439987154943,
+STORE, 47439987044352, 47439987105791,
+ERASE, 47439987044352, 47439987105791,
+STORE, 47439987044352, 47439987105791,
+STORE, 47439987130368, 47439987154943,
+STORE, 47439987105792, 47439987130367,
+ERASE, 47439987105792, 47439987130367,
+STORE, 47439987105792, 47439987154943,
+ERASE, 47439987105792, 47439987154943,
+STORE, 47439987105792, 47439987130367,
+STORE, 47439987130368, 47439987154943,
+STORE, 47439987138560, 47439987154943,
+STORE, 47439987130368, 47439987138559,
+ERASE, 47439987130368, 47439987138559,
+STORE, 47439987130368, 47439987138559,
+ERASE, 47439987138560, 47439987154943,
+STORE, 47439987138560, 47439987154943,
+STORE, 47439987154944, 47439987175423,
+ERASE, 47439987154944, 47439987175423,
+STORE, 47439987154944, 47439987159039,
+STORE, 47439987159040, 47439987175423,
+STORE, 47439987163136, 47439987175423,
+STORE, 47439987159040, 47439987163135,
+ERASE, 47439987159040, 47439987163135,
+STORE, 47439987159040, 47439987163135,
+STORE, 47439987167232, 47439987175423,
+STORE, 47439987163136, 47439987167231,
+ERASE, 47439987163136, 47439987167231,
+STORE, 47439987163136, 47439987175423,
+ERASE, 47439987163136, 47439987175423,
+STORE, 47439987163136, 47439987167231,
+STORE, 47439987167232, 47439987175423,
+ERASE, 47439987167232, 47439987175423,
+STORE, 47439987167232, 47439987175423,
+STORE, 47439987175424, 47439987183615,
+ERASE, 47439986978816, 47439987003391,
+STORE, 47439986978816, 47439986995199,
+STORE, 47439986995200, 47439987003391,
+ERASE, 47439987167232, 47439987175423,
+STORE, 47439987167232, 47439987171327,
+STORE, 47439987171328, 47439987175423,
+ERASE, 47439987130368, 47439987138559,
+STORE, 47439987130368, 47439987134463,
+STORE, 47439987134464, 47439987138559,
+ };
+ static const unsigned long set8[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140722482974720, 140737488351231,
+ERASE, 140722482974720, 140737488351231,
+STORE, 140722482974720, 140722482978815,
+STORE, 94121505034240, 94121505206271,
+ERASE, 94121505034240, 94121505206271,
+STORE, 94121505034240, 94121505050623,
+STORE, 94121505050624, 94121505206271,
+ERASE, 94121505050624, 94121505206271,
+STORE, 94121505050624, 94121505153023,
+STORE, 94121505153024, 94121505193983,
+STORE, 94121505193984, 94121505206271,
+STORE, 47708483284992, 47708483457023,
+ERASE, 47708483284992, 47708483457023,
+STORE, 47708483284992, 47708483289087,
+STORE, 47708483289088, 47708483457023,
+ERASE, 47708483289088, 47708483457023,
+STORE, 47708483289088, 47708483411967,
+STORE, 47708483411968, 47708483444735,
+STORE, 47708483444736, 47708483452927,
+STORE, 47708483452928, 47708483457023,
+STORE, 140722483142656, 140722483146751,
+STORE, 140722483130368, 140722483142655,
+STORE, 47708483457024, 47708483465215,
+STORE, 47708483465216, 47708483473407,
+STORE, 47708483473408, 47708483637247,
+ERASE, 47708483473408, 47708483637247,
+STORE, 47708483473408, 47708483485695,
+STORE, 47708483485696, 47708483637247,
+STORE, 47708483584000, 47708483637247,
+STORE, 47708483485696, 47708483583999,
+ERASE, 47708483485696, 47708483583999,
+STORE, 47708483485696, 47708483583999,
+STORE, 47708483629056, 47708483637247,
+STORE, 47708483584000, 47708483629055,
+ERASE, 47708483584000, 47708483629055,
+STORE, 47708483584000, 47708483637247,
+ERASE, 47708483584000, 47708483637247,
+STORE, 47708483584000, 47708483629055,
+STORE, 47708483629056, 47708483637247,
+ERASE, 47708483629056, 47708483637247,
+STORE, 47708483629056, 47708483637247,
+STORE, 47708483637248, 47708486688767,
+STORE, 47708484182016, 47708486688767,
+STORE, 47708483637248, 47708484182015,
+ERASE, 47708484182016, 47708486688767,
+STORE, 47708484182016, 47708486467583,
+STORE, 47708486467584, 47708486688767,
+STORE, 47708485877760, 47708486467583,
+STORE, 47708484182016, 47708485877759,
+ERASE, 47708484182016, 47708485877759,
+STORE, 47708484182016, 47708485877759,
+STORE, 47708486463488, 47708486467583,
+STORE, 47708485877760, 47708486463487,
+ERASE, 47708485877760, 47708486463487,
+STORE, 47708485877760, 47708486463487,
+STORE, 47708486672384, 47708486688767,
+STORE, 47708486467584, 47708486672383,
+ERASE, 47708486467584, 47708486672383,
+STORE, 47708486467584, 47708486672383,
+ERASE, 47708486672384, 47708486688767,
+STORE, 47708486672384, 47708486688767,
+STORE, 47708486688768, 47708488527871,
+STORE, 47708486828032, 47708488527871,
+STORE, 47708486688768, 47708486828031,
+ERASE, 47708486828032, 47708488527871,
+STORE, 47708486828032, 47708488486911,
+STORE, 47708488486912, 47708488527871,
+STORE, 47708488171520, 47708488486911,
+STORE, 47708486828032, 47708488171519,
+ERASE, 47708486828032, 47708488171519,
+STORE, 47708486828032, 47708488171519,
+STORE, 47708488482816, 47708488486911,
+STORE, 47708488171520, 47708488482815,
+ERASE, 47708488171520, 47708488482815,
+STORE, 47708488171520, 47708488482815,
+STORE, 47708488511488, 47708488527871,
+STORE, 47708488486912, 47708488511487,
+ERASE, 47708488486912, 47708488511487,
+STORE, 47708488486912, 47708488511487,
+ERASE, 47708488511488, 47708488527871,
+STORE, 47708488511488, 47708488527871,
+STORE, 47708488527872, 47708488663039,
+ERASE, 47708488527872, 47708488663039,
+STORE, 47708488527872, 47708488552447,
+STORE, 47708488552448, 47708488663039,
+STORE, 47708488613888, 47708488663039,
+STORE, 47708488552448, 47708488613887,
+ERASE, 47708488552448, 47708488613887,
+STORE, 47708488552448, 47708488613887,
+STORE, 47708488638464, 47708488663039,
+STORE, 47708488613888, 47708488638463,
+ERASE, 47708488613888, 47708488638463,
+STORE, 47708488613888, 47708488663039,
+ERASE, 47708488613888, 47708488663039,
+STORE, 47708488613888, 47708488638463,
+STORE, 47708488638464, 47708488663039,
+STORE, 47708488646656, 47708488663039,
+STORE, 47708488638464, 47708488646655,
+ERASE, 47708488638464, 47708488646655,
+STORE, 47708488638464, 47708488646655,
+ERASE, 47708488646656, 47708488663039,
+STORE, 47708488646656, 47708488663039,
+STORE, 47708488663040, 47708488683519,
+ERASE, 47708488663040, 47708488683519,
+STORE, 47708488663040, 47708488667135,
+STORE, 47708488667136, 47708488683519,
+STORE, 47708488671232, 47708488683519,
+STORE, 47708488667136, 47708488671231,
+ERASE, 47708488667136, 47708488671231,
+STORE, 47708488667136, 47708488671231,
+STORE, 47708488675328, 47708488683519,
+STORE, 47708488671232, 47708488675327,
+ERASE, 47708488671232, 47708488675327,
+STORE, 47708488671232, 47708488683519,
+ERASE, 47708488671232, 47708488683519,
+STORE, 47708488671232, 47708488675327,
+STORE, 47708488675328, 47708488683519,
+ERASE, 47708488675328, 47708488683519,
+STORE, 47708488675328, 47708488683519,
+STORE, 47708488683520, 47708488691711,
+ERASE, 47708488486912, 47708488511487,
+STORE, 47708488486912, 47708488503295,
+STORE, 47708488503296, 47708488511487,
+ERASE, 47708488675328, 47708488683519,
+STORE, 47708488675328, 47708488679423,
+STORE, 47708488679424, 47708488683519,
+ERASE, 47708488638464, 47708488646655,
+STORE, 47708488638464, 47708488642559,
+STORE, 47708488642560, 47708488646655,
+ };
+
+ static const unsigned long set9[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140736427839488, 140737488351231,
+ERASE, 140736427839488, 140736427839488,
+STORE, 140736427839488, 140736427843583,
+STORE, 94071213395968, 94071213567999,
+ERASE, 94071213395968, 94071213395968,
+STORE, 94071213395968, 94071213412351,
+STORE, 94071213412352, 94071213567999,
+ERASE, 94071213412352, 94071213412352,
+STORE, 94071213412352, 94071213514751,
+STORE, 94071213514752, 94071213555711,
+STORE, 94071213555712, 94071213567999,
+STORE, 139968410644480, 139968410816511,
+ERASE, 139968410644480, 139968410644480,
+STORE, 139968410644480, 139968410648575,
+STORE, 139968410648576, 139968410816511,
+ERASE, 139968410648576, 139968410648576,
+STORE, 139968410648576, 139968410771455,
+STORE, 139968410771456, 139968410804223,
+STORE, 139968410804224, 139968410812415,
+STORE, 139968410812416, 139968410816511,
+STORE, 140736429277184, 140736429281279,
+STORE, 140736429264896, 140736429277183,
+STORE, 47664384352256, 47664384360447,
+STORE, 47664384360448, 47664384368639,
+STORE, 47664384368640, 47664384532479,
+ERASE, 47664384368640, 47664384368640,
+STORE, 47664384368640, 47664384380927,
+STORE, 47664384380928, 47664384532479,
+STORE, 47664384479232, 47664384532479,
+STORE, 47664384380928, 47664384479231,
+ERASE, 47664384380928, 47664384380928,
+STORE, 47664384380928, 47664384479231,
+STORE, 47664384524288, 47664384532479,
+STORE, 47664384479232, 47664384524287,
+ERASE, 47664384479232, 47664384479232,
+STORE, 47664384479232, 47664384532479,
+ERASE, 47664384479232, 47664384479232,
+STORE, 47664384479232, 47664384524287,
+STORE, 47664384524288, 47664384532479,
+ERASE, 47664384524288, 47664384524288,
+STORE, 47664384524288, 47664384532479,
+STORE, 47664384532480, 47664387583999,
+STORE, 47664385077248, 47664387583999,
+STORE, 47664384532480, 47664385077247,
+ERASE, 47664385077248, 47664385077248,
+STORE, 47664385077248, 47664387362815,
+STORE, 47664387362816, 47664387583999,
+STORE, 47664386772992, 47664387362815,
+STORE, 47664385077248, 47664386772991,
+ERASE, 47664385077248, 47664385077248,
+STORE, 47664385077248, 47664386772991,
+STORE, 47664387358720, 47664387362815,
+STORE, 47664386772992, 47664387358719,
+ERASE, 47664386772992, 47664386772992,
+STORE, 47664386772992, 47664387358719,
+STORE, 47664387567616, 47664387583999,
+STORE, 47664387362816, 47664387567615,
+ERASE, 47664387362816, 47664387362816,
+STORE, 47664387362816, 47664387567615,
+ERASE, 47664387567616, 47664387567616,
+STORE, 47664387567616, 47664387583999,
+STORE, 47664387584000, 47664389423103,
+STORE, 47664387723264, 47664389423103,
+STORE, 47664387584000, 47664387723263,
+ERASE, 47664387723264, 47664387723264,
+STORE, 47664387723264, 47664389382143,
+STORE, 47664389382144, 47664389423103,
+STORE, 47664389066752, 47664389382143,
+STORE, 47664387723264, 47664389066751,
+ERASE, 47664387723264, 47664387723264,
+STORE, 47664387723264, 47664389066751,
+STORE, 47664389378048, 47664389382143,
+STORE, 47664389066752, 47664389378047,
+ERASE, 47664389066752, 47664389066752,
+STORE, 47664389066752, 47664389378047,
+STORE, 47664389406720, 47664389423103,
+STORE, 47664389382144, 47664389406719,
+ERASE, 47664389382144, 47664389382144,
+STORE, 47664389382144, 47664389406719,
+ERASE, 47664389406720, 47664389406720,
+STORE, 47664389406720, 47664389423103,
+STORE, 47664389423104, 47664389558271,
+ERASE, 47664389423104, 47664389423104,
+STORE, 47664389423104, 47664389447679,
+STORE, 47664389447680, 47664389558271,
+STORE, 47664389509120, 47664389558271,
+STORE, 47664389447680, 47664389509119,
+ERASE, 47664389447680, 47664389447680,
+STORE, 47664389447680, 47664389509119,
+STORE, 47664389533696, 47664389558271,
+STORE, 47664389509120, 47664389533695,
+ERASE, 47664389509120, 47664389509120,
+STORE, 47664389509120, 47664389558271,
+ERASE, 47664389509120, 47664389509120,
+STORE, 47664389509120, 47664389533695,
+STORE, 47664389533696, 47664389558271,
+STORE, 47664389541888, 47664389558271,
+STORE, 47664389533696, 47664389541887,
+ERASE, 47664389533696, 47664389533696,
+STORE, 47664389533696, 47664389541887,
+ERASE, 47664389541888, 47664389541888,
+STORE, 47664389541888, 47664389558271,
+STORE, 47664389558272, 47664389578751,
+ERASE, 47664389558272, 47664389558272,
+STORE, 47664389558272, 47664389562367,
+STORE, 47664389562368, 47664389578751,
+STORE, 47664389566464, 47664389578751,
+STORE, 47664389562368, 47664389566463,
+ERASE, 47664389562368, 47664389562368,
+STORE, 47664389562368, 47664389566463,
+STORE, 47664389570560, 47664389578751,
+STORE, 47664389566464, 47664389570559,
+ERASE, 47664389566464, 47664389566464,
+STORE, 47664389566464, 47664389578751,
+ERASE, 47664389566464, 47664389566464,
+STORE, 47664389566464, 47664389570559,
+STORE, 47664389570560, 47664389578751,
+ERASE, 47664389570560, 47664389570560,
+STORE, 47664389570560, 47664389578751,
+STORE, 47664389578752, 47664389586943,
+ERASE, 47664389382144, 47664389382144,
+STORE, 47664389382144, 47664389398527,
+STORE, 47664389398528, 47664389406719,
+ERASE, 47664389570560, 47664389570560,
+STORE, 47664389570560, 47664389574655,
+STORE, 47664389574656, 47664389578751,
+ERASE, 47664389533696, 47664389533696,
+STORE, 47664389533696, 47664389537791,
+STORE, 47664389537792, 47664389541887,
+ERASE, 47664387362816, 47664387362816,
+STORE, 47664387362816, 47664387559423,
+STORE, 47664387559424, 47664387567615,
+ERASE, 47664384524288, 47664384524288,
+STORE, 47664384524288, 47664384528383,
+STORE, 47664384528384, 47664384532479,
+ERASE, 94071213555712, 94071213555712,
+STORE, 94071213555712, 94071213563903,
+STORE, 94071213563904, 94071213567999,
+ERASE, 139968410804224, 139968410804224,
+STORE, 139968410804224, 139968410808319,
+STORE, 139968410808320, 139968410812415,
+ERASE, 47664384352256, 47664384352256,
+STORE, 94071244402688, 94071244537855,
+STORE, 140737488347136, 140737488351231,
+STORE, 140728271503360, 140737488351231,
+ERASE, 140728271503360, 140728271503360,
+STORE, 140728271503360, 140728271507455,
+STORE, 94410361982976, 94410362155007,
+ERASE, 94410361982976, 94410361982976,
+STORE, 94410361982976, 94410361999359,
+STORE, 94410361999360, 94410362155007,
+ERASE, 94410361999360, 94410361999360,
+STORE, 94410361999360, 94410362101759,
+STORE, 94410362101760, 94410362142719,
+STORE, 94410362142720, 94410362155007,
+STORE, 140351953997824, 140351954169855,
+ERASE, 140351953997824, 140351953997824,
+STORE, 140351953997824, 140351954001919,
+STORE, 140351954001920, 140351954169855,
+ERASE, 140351954001920, 140351954001920,
+STORE, 140351954001920, 140351954124799,
+STORE, 140351954124800, 140351954157567,
+STORE, 140351954157568, 140351954165759,
+STORE, 140351954165760, 140351954169855,
+STORE, 140728272429056, 140728272433151,
+STORE, 140728272416768, 140728272429055,
+STORE, 47280840998912, 47280841007103,
+STORE, 47280841007104, 47280841015295,
+STORE, 47280841015296, 47280841179135,
+ERASE, 47280841015296, 47280841015296,
+STORE, 47280841015296, 47280841027583,
+STORE, 47280841027584, 47280841179135,
+STORE, 47280841125888, 47280841179135,
+STORE, 47280841027584, 47280841125887,
+ERASE, 47280841027584, 47280841027584,
+STORE, 47280841027584, 47280841125887,
+STORE, 47280841170944, 47280841179135,
+STORE, 47280841125888, 47280841170943,
+ERASE, 47280841125888, 47280841125888,
+STORE, 47280841125888, 47280841179135,
+ERASE, 47280841125888, 47280841125888,
+STORE, 47280841125888, 47280841170943,
+STORE, 47280841170944, 47280841179135,
+ERASE, 47280841170944, 47280841170944,
+STORE, 47280841170944, 47280841179135,
+STORE, 47280841179136, 47280844230655,
+STORE, 47280841723904, 47280844230655,
+STORE, 47280841179136, 47280841723903,
+ERASE, 47280841723904, 47280841723904,
+STORE, 47280841723904, 47280844009471,
+STORE, 47280844009472, 47280844230655,
+STORE, 47280843419648, 47280844009471,
+STORE, 47280841723904, 47280843419647,
+ERASE, 47280841723904, 47280841723904,
+STORE, 47280841723904, 47280843419647,
+STORE, 47280844005376, 47280844009471,
+STORE, 47280843419648, 47280844005375,
+ERASE, 47280843419648, 47280843419648,
+STORE, 47280843419648, 47280844005375,
+STORE, 47280844214272, 47280844230655,
+STORE, 47280844009472, 47280844214271,
+ERASE, 47280844009472, 47280844009472,
+STORE, 47280844009472, 47280844214271,
+ERASE, 47280844214272, 47280844214272,
+STORE, 47280844214272, 47280844230655,
+STORE, 47280844230656, 47280846069759,
+STORE, 47280844369920, 47280846069759,
+STORE, 47280844230656, 47280844369919,
+ERASE, 47280844369920, 47280844369920,
+STORE, 47280844369920, 47280846028799,
+STORE, 47280846028800, 47280846069759,
+STORE, 47280845713408, 47280846028799,
+STORE, 47280844369920, 47280845713407,
+ERASE, 47280844369920, 47280844369920,
+STORE, 47280844369920, 47280845713407,
+STORE, 47280846024704, 47280846028799,
+STORE, 47280845713408, 47280846024703,
+ERASE, 47280845713408, 47280845713408,
+STORE, 47280845713408, 47280846024703,
+STORE, 47280846053376, 47280846069759,
+STORE, 47280846028800, 47280846053375,
+ERASE, 47280846028800, 47280846028800,
+STORE, 47280846028800, 47280846053375,
+ERASE, 47280846053376, 47280846053376,
+STORE, 47280846053376, 47280846069759,
+STORE, 47280846069760, 47280846204927,
+ERASE, 47280846069760, 47280846069760,
+STORE, 47280846069760, 47280846094335,
+STORE, 47280846094336, 47280846204927,
+STORE, 47280846155776, 47280846204927,
+STORE, 47280846094336, 47280846155775,
+ERASE, 47280846094336, 47280846094336,
+STORE, 47280846094336, 47280846155775,
+STORE, 47280846180352, 47280846204927,
+STORE, 47280846155776, 47280846180351,
+ERASE, 47280846155776, 47280846155776,
+STORE, 47280846155776, 47280846204927,
+ERASE, 47280846155776, 47280846155776,
+STORE, 47280846155776, 47280846180351,
+STORE, 47280846180352, 47280846204927,
+STORE, 47280846188544, 47280846204927,
+STORE, 47280846180352, 47280846188543,
+ERASE, 47280846180352, 47280846180352,
+STORE, 47280846180352, 47280846188543,
+ERASE, 47280846188544, 47280846188544,
+STORE, 47280846188544, 47280846204927,
+STORE, 47280846204928, 47280846225407,
+ERASE, 47280846204928, 47280846204928,
+STORE, 47280846204928, 47280846209023,
+STORE, 47280846209024, 47280846225407,
+STORE, 47280846213120, 47280846225407,
+STORE, 47280846209024, 47280846213119,
+ERASE, 47280846209024, 47280846209024,
+STORE, 47280846209024, 47280846213119,
+STORE, 47280846217216, 47280846225407,
+STORE, 47280846213120, 47280846217215,
+ERASE, 47280846213120, 47280846213120,
+STORE, 47280846213120, 47280846225407,
+ERASE, 47280846213120, 47280846213120,
+STORE, 47280846213120, 47280846217215,
+STORE, 47280846217216, 47280846225407,
+ERASE, 47280846217216, 47280846217216,
+STORE, 47280846217216, 47280846225407,
+STORE, 47280846225408, 47280846233599,
+ERASE, 47280846028800, 47280846028800,
+STORE, 47280846028800, 47280846045183,
+STORE, 47280846045184, 47280846053375,
+ERASE, 47280846217216, 47280846217216,
+STORE, 47280846217216, 47280846221311,
+STORE, 47280846221312, 47280846225407,
+ERASE, 47280846180352, 47280846180352,
+STORE, 47280846180352, 47280846184447,
+STORE, 47280846184448, 47280846188543,
+ERASE, 47280844009472, 47280844009472,
+STORE, 47280844009472, 47280844206079,
+STORE, 47280844206080, 47280844214271,
+ERASE, 47280841170944, 47280841170944,
+STORE, 47280841170944, 47280841175039,
+STORE, 47280841175040, 47280841179135,
+ERASE, 94410362142720, 94410362142720,
+STORE, 94410362142720, 94410362150911,
+STORE, 94410362150912, 94410362155007,
+ERASE, 140351954157568, 140351954157568,
+STORE, 140351954157568, 140351954161663,
+STORE, 140351954161664, 140351954165759,
+ERASE, 47280840998912, 47280840998912,
+STORE, 94410379456512, 94410379591679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732946362368, 140737488351231,
+ERASE, 140732946362368, 140732946362368,
+STORE, 140732946362368, 140732946366463,
+STORE, 94352937934848, 94352938106879,
+ERASE, 94352937934848, 94352937934848,
+STORE, 94352937934848, 94352937951231,
+STORE, 94352937951232, 94352938106879,
+ERASE, 94352937951232, 94352937951232,
+STORE, 94352937951232, 94352938053631,
+STORE, 94352938053632, 94352938094591,
+STORE, 94352938094592, 94352938106879,
+STORE, 140595518742528, 140595518914559,
+ERASE, 140595518742528, 140595518742528,
+STORE, 140595518742528, 140595518746623,
+STORE, 140595518746624, 140595518914559,
+ERASE, 140595518746624, 140595518746624,
+STORE, 140595518746624, 140595518869503,
+STORE, 140595518869504, 140595518902271,
+STORE, 140595518902272, 140595518910463,
+STORE, 140595518910464, 140595518914559,
+STORE, 140732947468288, 140732947472383,
+STORE, 140732947456000, 140732947468287,
+STORE, 47037276254208, 47037276262399,
+STORE, 47037276262400, 47037276270591,
+STORE, 47037276270592, 47037276434431,
+ERASE, 47037276270592, 47037276270592,
+STORE, 47037276270592, 47037276282879,
+STORE, 47037276282880, 47037276434431,
+STORE, 47037276381184, 47037276434431,
+STORE, 47037276282880, 47037276381183,
+ERASE, 47037276282880, 47037276282880,
+STORE, 47037276282880, 47037276381183,
+STORE, 47037276426240, 47037276434431,
+STORE, 47037276381184, 47037276426239,
+ERASE, 47037276381184, 47037276381184,
+STORE, 47037276381184, 47037276434431,
+ERASE, 47037276381184, 47037276381184,
+STORE, 47037276381184, 47037276426239,
+STORE, 47037276426240, 47037276434431,
+ERASE, 47037276426240, 47037276426240,
+STORE, 47037276426240, 47037276434431,
+STORE, 47037276434432, 47037279485951,
+STORE, 47037276979200, 47037279485951,
+STORE, 47037276434432, 47037276979199,
+ERASE, 47037276979200, 47037276979200,
+STORE, 47037276979200, 47037279264767,
+STORE, 47037279264768, 47037279485951,
+STORE, 47037278674944, 47037279264767,
+STORE, 47037276979200, 47037278674943,
+ERASE, 47037276979200, 47037276979200,
+STORE, 47037276979200, 47037278674943,
+STORE, 47037279260672, 47037279264767,
+STORE, 47037278674944, 47037279260671,
+ERASE, 47037278674944, 47037278674944,
+STORE, 47037278674944, 47037279260671,
+STORE, 47037279469568, 47037279485951,
+STORE, 47037279264768, 47037279469567,
+ERASE, 47037279264768, 47037279264768,
+STORE, 47037279264768, 47037279469567,
+ERASE, 47037279469568, 47037279469568,
+STORE, 47037279469568, 47037279485951,
+STORE, 47037279485952, 47037281325055,
+STORE, 47037279625216, 47037281325055,
+STORE, 47037279485952, 47037279625215,
+ERASE, 47037279625216, 47037279625216,
+STORE, 47037279625216, 47037281284095,
+STORE, 47037281284096, 47037281325055,
+STORE, 47037280968704, 47037281284095,
+STORE, 47037279625216, 47037280968703,
+ERASE, 47037279625216, 47037279625216,
+STORE, 47037279625216, 47037280968703,
+STORE, 47037281280000, 47037281284095,
+STORE, 47037280968704, 47037281279999,
+ERASE, 47037280968704, 47037280968704,
+STORE, 47037280968704, 47037281279999,
+STORE, 47037281308672, 47037281325055,
+STORE, 47037281284096, 47037281308671,
+ERASE, 47037281284096, 47037281284096,
+STORE, 47037281284096, 47037281308671,
+ERASE, 47037281308672, 47037281308672,
+STORE, 47037281308672, 47037281325055,
+STORE, 47037281325056, 47037281460223,
+ERASE, 47037281325056, 47037281325056,
+STORE, 47037281325056, 47037281349631,
+STORE, 47037281349632, 47037281460223,
+STORE, 47037281411072, 47037281460223,
+STORE, 47037281349632, 47037281411071,
+ERASE, 47037281349632, 47037281349632,
+STORE, 47037281349632, 47037281411071,
+STORE, 47037281435648, 47037281460223,
+STORE, 47037281411072, 47037281435647,
+ERASE, 47037281411072, 47037281411072,
+STORE, 47037281411072, 47037281460223,
+ERASE, 47037281411072, 47037281411072,
+STORE, 47037281411072, 47037281435647,
+STORE, 47037281435648, 47037281460223,
+STORE, 47037281443840, 47037281460223,
+STORE, 47037281435648, 47037281443839,
+ERASE, 47037281435648, 47037281435648,
+STORE, 47037281435648, 47037281443839,
+ERASE, 47037281443840, 47037281443840,
+STORE, 47037281443840, 47037281460223,
+STORE, 47037281460224, 47037281480703,
+ERASE, 47037281460224, 47037281460224,
+STORE, 47037281460224, 47037281464319,
+STORE, 47037281464320, 47037281480703,
+STORE, 47037281468416, 47037281480703,
+STORE, 47037281464320, 47037281468415,
+ERASE, 47037281464320, 47037281464320,
+STORE, 47037281464320, 47037281468415,
+STORE, 47037281472512, 47037281480703,
+STORE, 47037281468416, 47037281472511,
+ERASE, 47037281468416, 47037281468416,
+STORE, 47037281468416, 47037281480703,
+ERASE, 47037281468416, 47037281468416,
+STORE, 47037281468416, 47037281472511,
+STORE, 47037281472512, 47037281480703,
+ERASE, 47037281472512, 47037281472512,
+STORE, 47037281472512, 47037281480703,
+STORE, 47037281480704, 47037281488895,
+ERASE, 47037281284096, 47037281284096,
+STORE, 47037281284096, 47037281300479,
+STORE, 47037281300480, 47037281308671,
+ERASE, 47037281472512, 47037281472512,
+STORE, 47037281472512, 47037281476607,
+STORE, 47037281476608, 47037281480703,
+ERASE, 47037281435648, 47037281435648,
+STORE, 47037281435648, 47037281439743,
+STORE, 47037281439744, 47037281443839,
+ERASE, 47037279264768, 47037279264768,
+STORE, 47037279264768, 47037279461375,
+STORE, 47037279461376, 47037279469567,
+ERASE, 47037276426240, 47037276426240,
+STORE, 47037276426240, 47037276430335,
+STORE, 47037276430336, 47037276434431,
+ERASE, 94352938094592, 94352938094592,
+STORE, 94352938094592, 94352938102783,
+STORE, 94352938102784, 94352938106879,
+ERASE, 140595518902272, 140595518902272,
+STORE, 140595518902272, 140595518906367,
+STORE, 140595518906368, 140595518910463,
+ERASE, 47037276254208, 47037276254208,
+STORE, 94352938438656, 94352938573823,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733506027520, 140737488351231,
+ERASE, 140733506027520, 140733506027520,
+STORE, 140733506027520, 140733506031615,
+STORE, 94150123073536, 94150123245567,
+ERASE, 94150123073536, 94150123073536,
+STORE, 94150123073536, 94150123089919,
+STORE, 94150123089920, 94150123245567,
+ERASE, 94150123089920, 94150123089920,
+STORE, 94150123089920, 94150123192319,
+STORE, 94150123192320, 94150123233279,
+STORE, 94150123233280, 94150123245567,
+STORE, 140081290375168, 140081290547199,
+ERASE, 140081290375168, 140081290375168,
+STORE, 140081290375168, 140081290379263,
+STORE, 140081290379264, 140081290547199,
+ERASE, 140081290379264, 140081290379264,
+STORE, 140081290379264, 140081290502143,
+STORE, 140081290502144, 140081290534911,
+STORE, 140081290534912, 140081290543103,
+STORE, 140081290543104, 140081290547199,
+STORE, 140733506707456, 140733506711551,
+STORE, 140733506695168, 140733506707455,
+STORE, 47551504621568, 47551504629759,
+STORE, 47551504629760, 47551504637951,
+STORE, 47551504637952, 47551504801791,
+ERASE, 47551504637952, 47551504637952,
+STORE, 47551504637952, 47551504650239,
+STORE, 47551504650240, 47551504801791,
+STORE, 47551504748544, 47551504801791,
+STORE, 47551504650240, 47551504748543,
+ERASE, 47551504650240, 47551504650240,
+STORE, 47551504650240, 47551504748543,
+STORE, 47551504793600, 47551504801791,
+STORE, 47551504748544, 47551504793599,
+ERASE, 47551504748544, 47551504748544,
+STORE, 47551504748544, 47551504801791,
+ERASE, 47551504748544, 47551504748544,
+STORE, 47551504748544, 47551504793599,
+STORE, 47551504793600, 47551504801791,
+ERASE, 47551504793600, 47551504793600,
+STORE, 47551504793600, 47551504801791,
+STORE, 47551504801792, 47551507853311,
+STORE, 47551505346560, 47551507853311,
+STORE, 47551504801792, 47551505346559,
+ERASE, 47551505346560, 47551505346560,
+STORE, 47551505346560, 47551507632127,
+STORE, 47551507632128, 47551507853311,
+STORE, 47551507042304, 47551507632127,
+STORE, 47551505346560, 47551507042303,
+ERASE, 47551505346560, 47551505346560,
+STORE, 47551505346560, 47551507042303,
+STORE, 47551507628032, 47551507632127,
+STORE, 47551507042304, 47551507628031,
+ERASE, 47551507042304, 47551507042304,
+STORE, 47551507042304, 47551507628031,
+STORE, 47551507836928, 47551507853311,
+STORE, 47551507632128, 47551507836927,
+ERASE, 47551507632128, 47551507632128,
+STORE, 47551507632128, 47551507836927,
+ERASE, 47551507836928, 47551507836928,
+STORE, 47551507836928, 47551507853311,
+STORE, 47551507853312, 47551509692415,
+STORE, 47551507992576, 47551509692415,
+STORE, 47551507853312, 47551507992575,
+ERASE, 47551507992576, 47551507992576,
+STORE, 47551507992576, 47551509651455,
+STORE, 47551509651456, 47551509692415,
+STORE, 47551509336064, 47551509651455,
+STORE, 47551507992576, 47551509336063,
+ERASE, 47551507992576, 47551507992576,
+STORE, 47551507992576, 47551509336063,
+STORE, 47551509647360, 47551509651455,
+STORE, 47551509336064, 47551509647359,
+ERASE, 47551509336064, 47551509336064,
+STORE, 47551509336064, 47551509647359,
+STORE, 47551509676032, 47551509692415,
+STORE, 47551509651456, 47551509676031,
+ERASE, 47551509651456, 47551509651456,
+STORE, 47551509651456, 47551509676031,
+ERASE, 47551509676032, 47551509676032,
+STORE, 47551509676032, 47551509692415,
+STORE, 47551509692416, 47551509827583,
+ERASE, 47551509692416, 47551509692416,
+STORE, 47551509692416, 47551509716991,
+STORE, 47551509716992, 47551509827583,
+STORE, 47551509778432, 47551509827583,
+STORE, 47551509716992, 47551509778431,
+ERASE, 47551509716992, 47551509716992,
+STORE, 47551509716992, 47551509778431,
+STORE, 47551509803008, 47551509827583,
+STORE, 47551509778432, 47551509803007,
+ERASE, 47551509778432, 47551509778432,
+STORE, 47551509778432, 47551509827583,
+ERASE, 47551509778432, 47551509778432,
+STORE, 47551509778432, 47551509803007,
+STORE, 47551509803008, 47551509827583,
+STORE, 47551509811200, 47551509827583,
+STORE, 47551509803008, 47551509811199,
+ERASE, 47551509803008, 47551509803008,
+STORE, 47551509803008, 47551509811199,
+ERASE, 47551509811200, 47551509811200,
+STORE, 47551509811200, 47551509827583,
+STORE, 47551509827584, 47551509848063,
+ERASE, 47551509827584, 47551509827584,
+STORE, 47551509827584, 47551509831679,
+STORE, 47551509831680, 47551509848063,
+STORE, 47551509835776, 47551509848063,
+STORE, 47551509831680, 47551509835775,
+ERASE, 47551509831680, 47551509831680,
+STORE, 47551509831680, 47551509835775,
+STORE, 47551509839872, 47551509848063,
+STORE, 47551509835776, 47551509839871,
+ERASE, 47551509835776, 47551509835776,
+STORE, 47551509835776, 47551509848063,
+ERASE, 47551509835776, 47551509835776,
+STORE, 47551509835776, 47551509839871,
+STORE, 47551509839872, 47551509848063,
+ERASE, 47551509839872, 47551509839872,
+STORE, 47551509839872, 47551509848063,
+STORE, 47551509848064, 47551509856255,
+ERASE, 47551509651456, 47551509651456,
+STORE, 47551509651456, 47551509667839,
+STORE, 47551509667840, 47551509676031,
+ERASE, 47551509839872, 47551509839872,
+STORE, 47551509839872, 47551509843967,
+STORE, 47551509843968, 47551509848063,
+ERASE, 47551509803008, 47551509803008,
+STORE, 47551509803008, 47551509807103,
+STORE, 47551509807104, 47551509811199,
+ERASE, 47551507632128, 47551507632128,
+STORE, 47551507632128, 47551507828735,
+STORE, 47551507828736, 47551507836927,
+ERASE, 47551504793600, 47551504793600,
+STORE, 47551504793600, 47551504797695,
+STORE, 47551504797696, 47551504801791,
+ERASE, 94150123233280, 94150123233280,
+STORE, 94150123233280, 94150123241471,
+STORE, 94150123241472, 94150123245567,
+ERASE, 140081290534912, 140081290534912,
+STORE, 140081290534912, 140081290539007,
+STORE, 140081290539008, 140081290543103,
+ERASE, 47551504621568, 47551504621568,
+STORE, 94150148112384, 94150148247551,
+STORE, 140737488347136, 140737488351231,
+STORE, 140734389334016, 140737488351231,
+ERASE, 140734389334016, 140734389334016,
+STORE, 140734389334016, 140734389338111,
+STORE, 94844636606464, 94844636778495,
+ERASE, 94844636606464, 94844636606464,
+STORE, 94844636606464, 94844636622847,
+STORE, 94844636622848, 94844636778495,
+ERASE, 94844636622848, 94844636622848,
+STORE, 94844636622848, 94844636725247,
+STORE, 94844636725248, 94844636766207,
+STORE, 94844636766208, 94844636778495,
+STORE, 139922765217792, 139922765389823,
+ERASE, 139922765217792, 139922765217792,
+STORE, 139922765217792, 139922765221887,
+STORE, 139922765221888, 139922765389823,
+ERASE, 139922765221888, 139922765221888,
+STORE, 139922765221888, 139922765344767,
+STORE, 139922765344768, 139922765377535,
+STORE, 139922765377536, 139922765385727,
+STORE, 139922765385728, 139922765389823,
+STORE, 140734389678080, 140734389682175,
+STORE, 140734389665792, 140734389678079,
+STORE, 47710029778944, 47710029787135,
+STORE, 47710029787136, 47710029795327,
+STORE, 47710029795328, 47710029959167,
+ERASE, 47710029795328, 47710029795328,
+STORE, 47710029795328, 47710029807615,
+STORE, 47710029807616, 47710029959167,
+STORE, 47710029905920, 47710029959167,
+STORE, 47710029807616, 47710029905919,
+ERASE, 47710029807616, 47710029807616,
+STORE, 47710029807616, 47710029905919,
+STORE, 47710029950976, 47710029959167,
+STORE, 47710029905920, 47710029950975,
+ERASE, 47710029905920, 47710029905920,
+STORE, 47710029905920, 47710029959167,
+ERASE, 47710029905920, 47710029905920,
+STORE, 47710029905920, 47710029950975,
+STORE, 47710029950976, 47710029959167,
+ERASE, 47710029950976, 47710029950976,
+STORE, 47710029950976, 47710029959167,
+STORE, 47710029959168, 47710033010687,
+STORE, 47710030503936, 47710033010687,
+STORE, 47710029959168, 47710030503935,
+ERASE, 47710030503936, 47710030503936,
+STORE, 47710030503936, 47710032789503,
+STORE, 47710032789504, 47710033010687,
+STORE, 47710032199680, 47710032789503,
+STORE, 47710030503936, 47710032199679,
+ERASE, 47710030503936, 47710030503936,
+STORE, 47710030503936, 47710032199679,
+STORE, 47710032785408, 47710032789503,
+STORE, 47710032199680, 47710032785407,
+ERASE, 47710032199680, 47710032199680,
+STORE, 47710032199680, 47710032785407,
+STORE, 47710032994304, 47710033010687,
+STORE, 47710032789504, 47710032994303,
+ERASE, 47710032789504, 47710032789504,
+STORE, 47710032789504, 47710032994303,
+ERASE, 47710032994304, 47710032994304,
+STORE, 47710032994304, 47710033010687,
+STORE, 47710033010688, 47710034849791,
+STORE, 47710033149952, 47710034849791,
+STORE, 47710033010688, 47710033149951,
+ERASE, 47710033149952, 47710033149952,
+STORE, 47710033149952, 47710034808831,
+STORE, 47710034808832, 47710034849791,
+STORE, 47710034493440, 47710034808831,
+STORE, 47710033149952, 47710034493439,
+ERASE, 47710033149952, 47710033149952,
+STORE, 47710033149952, 47710034493439,
+STORE, 47710034804736, 47710034808831,
+STORE, 47710034493440, 47710034804735,
+ERASE, 47710034493440, 47710034493440,
+STORE, 47710034493440, 47710034804735,
+STORE, 47710034833408, 47710034849791,
+STORE, 47710034808832, 47710034833407,
+ERASE, 47710034808832, 47710034808832,
+STORE, 47710034808832, 47710034833407,
+ERASE, 47710034833408, 47710034833408,
+STORE, 47710034833408, 47710034849791,
+STORE, 47710034849792, 47710034984959,
+ERASE, 47710034849792, 47710034849792,
+STORE, 47710034849792, 47710034874367,
+STORE, 47710034874368, 47710034984959,
+STORE, 47710034935808, 47710034984959,
+STORE, 47710034874368, 47710034935807,
+ERASE, 47710034874368, 47710034874368,
+STORE, 47710034874368, 47710034935807,
+STORE, 47710034960384, 47710034984959,
+STORE, 47710034935808, 47710034960383,
+ERASE, 47710034935808, 47710034935808,
+STORE, 47710034935808, 47710034984959,
+ERASE, 47710034935808, 47710034935808,
+STORE, 47710034935808, 47710034960383,
+STORE, 47710034960384, 47710034984959,
+STORE, 47710034968576, 47710034984959,
+STORE, 47710034960384, 47710034968575,
+ERASE, 47710034960384, 47710034960384,
+STORE, 47710034960384, 47710034968575,
+ERASE, 47710034968576, 47710034968576,
+STORE, 47710034968576, 47710034984959,
+STORE, 47710034984960, 47710035005439,
+ERASE, 47710034984960, 47710034984960,
+STORE, 47710034984960, 47710034989055,
+STORE, 47710034989056, 47710035005439,
+STORE, 47710034993152, 47710035005439,
+STORE, 47710034989056, 47710034993151,
+ERASE, 47710034989056, 47710034989056,
+STORE, 47710034989056, 47710034993151,
+STORE, 47710034997248, 47710035005439,
+STORE, 47710034993152, 47710034997247,
+ERASE, 47710034993152, 47710034993152,
+STORE, 47710034993152, 47710035005439,
+ERASE, 47710034993152, 47710034993152,
+STORE, 47710034993152, 47710034997247,
+STORE, 47710034997248, 47710035005439,
+ERASE, 47710034997248, 47710034997248,
+STORE, 47710034997248, 47710035005439,
+STORE, 47710035005440, 47710035013631,
+ERASE, 47710034808832, 47710034808832,
+STORE, 47710034808832, 47710034825215,
+STORE, 47710034825216, 47710034833407,
+ERASE, 47710034997248, 47710034997248,
+STORE, 47710034997248, 47710035001343,
+STORE, 47710035001344, 47710035005439,
+ERASE, 47710034960384, 47710034960384,
+STORE, 47710034960384, 47710034964479,
+STORE, 47710034964480, 47710034968575,
+ERASE, 47710032789504, 47710032789504,
+STORE, 47710032789504, 47710032986111,
+STORE, 47710032986112, 47710032994303,
+ERASE, 47710029950976, 47710029950976,
+STORE, 47710029950976, 47710029955071,
+STORE, 47710029955072, 47710029959167,
+ERASE, 94844636766208, 94844636766208,
+STORE, 94844636766208, 94844636774399,
+STORE, 94844636774400, 94844636778495,
+ERASE, 139922765377536, 139922765377536,
+STORE, 139922765377536, 139922765381631,
+STORE, 139922765381632, 139922765385727,
+ERASE, 47710029778944, 47710029778944,
+STORE, 94844641775616, 94844641910783,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732213886976, 140737488351231,
+ERASE, 140732213886976, 140732213886976,
+STORE, 140732213886976, 140732213891071,
+STORE, 94240508887040, 94240509059071,
+ERASE, 94240508887040, 94240508887040,
+STORE, 94240508887040, 94240508903423,
+STORE, 94240508903424, 94240509059071,
+ERASE, 94240508903424, 94240508903424,
+STORE, 94240508903424, 94240509005823,
+STORE, 94240509005824, 94240509046783,
+STORE, 94240509046784, 94240509059071,
+STORE, 140275106516992, 140275106689023,
+ERASE, 140275106516992, 140275106516992,
+STORE, 140275106516992, 140275106521087,
+STORE, 140275106521088, 140275106689023,
+ERASE, 140275106521088, 140275106521088,
+STORE, 140275106521088, 140275106643967,
+STORE, 140275106643968, 140275106676735,
+STORE, 140275106676736, 140275106684927,
+STORE, 140275106684928, 140275106689023,
+STORE, 140732213977088, 140732213981183,
+STORE, 140732213964800, 140732213977087,
+STORE, 47357688479744, 47357688487935,
+STORE, 47357688487936, 47357688496127,
+STORE, 47357688496128, 47357688659967,
+ERASE, 47357688496128, 47357688496128,
+STORE, 47357688496128, 47357688508415,
+STORE, 47357688508416, 47357688659967,
+STORE, 47357688606720, 47357688659967,
+STORE, 47357688508416, 47357688606719,
+ERASE, 47357688508416, 47357688508416,
+STORE, 47357688508416, 47357688606719,
+STORE, 47357688651776, 47357688659967,
+STORE, 47357688606720, 47357688651775,
+ERASE, 47357688606720, 47357688606720,
+STORE, 47357688606720, 47357688659967,
+ERASE, 47357688606720, 47357688606720,
+STORE, 47357688606720, 47357688651775,
+STORE, 47357688651776, 47357688659967,
+ERASE, 47357688651776, 47357688651776,
+STORE, 47357688651776, 47357688659967,
+STORE, 47357688659968, 47357691711487,
+STORE, 47357689204736, 47357691711487,
+STORE, 47357688659968, 47357689204735,
+ERASE, 47357689204736, 47357689204736,
+STORE, 47357689204736, 47357691490303,
+STORE, 47357691490304, 47357691711487,
+STORE, 47357690900480, 47357691490303,
+STORE, 47357689204736, 47357690900479,
+ERASE, 47357689204736, 47357689204736,
+STORE, 47357689204736, 47357690900479,
+STORE, 47357691486208, 47357691490303,
+STORE, 47357690900480, 47357691486207,
+ERASE, 47357690900480, 47357690900480,
+STORE, 47357690900480, 47357691486207,
+STORE, 47357691695104, 47357691711487,
+STORE, 47357691490304, 47357691695103,
+ERASE, 47357691490304, 47357691490304,
+STORE, 47357691490304, 47357691695103,
+ERASE, 47357691695104, 47357691695104,
+STORE, 47357691695104, 47357691711487,
+STORE, 47357691711488, 47357693550591,
+STORE, 47357691850752, 47357693550591,
+STORE, 47357691711488, 47357691850751,
+ERASE, 47357691850752, 47357691850752,
+STORE, 47357691850752, 47357693509631,
+STORE, 47357693509632, 47357693550591,
+STORE, 47357693194240, 47357693509631,
+STORE, 47357691850752, 47357693194239,
+ERASE, 47357691850752, 47357691850752,
+STORE, 47357691850752, 47357693194239,
+STORE, 47357693505536, 47357693509631,
+STORE, 47357693194240, 47357693505535,
+ERASE, 47357693194240, 47357693194240,
+STORE, 47357693194240, 47357693505535,
+STORE, 47357693534208, 47357693550591,
+STORE, 47357693509632, 47357693534207,
+ERASE, 47357693509632, 47357693509632,
+STORE, 47357693509632, 47357693534207,
+ERASE, 47357693534208, 47357693534208,
+STORE, 47357693534208, 47357693550591,
+STORE, 47357693550592, 47357693685759,
+ERASE, 47357693550592, 47357693550592,
+STORE, 47357693550592, 47357693575167,
+STORE, 47357693575168, 47357693685759,
+STORE, 47357693636608, 47357693685759,
+STORE, 47357693575168, 47357693636607,
+ERASE, 47357693575168, 47357693575168,
+STORE, 47357693575168, 47357693636607,
+STORE, 47357693661184, 47357693685759,
+STORE, 47357693636608, 47357693661183,
+ERASE, 47357693636608, 47357693636608,
+STORE, 47357693636608, 47357693685759,
+ERASE, 47357693636608, 47357693636608,
+STORE, 47357693636608, 47357693661183,
+STORE, 47357693661184, 47357693685759,
+STORE, 47357693669376, 47357693685759,
+STORE, 47357693661184, 47357693669375,
+ERASE, 47357693661184, 47357693661184,
+STORE, 47357693661184, 47357693669375,
+ERASE, 47357693669376, 47357693669376,
+STORE, 47357693669376, 47357693685759,
+STORE, 47357693685760, 47357693706239,
+ERASE, 47357693685760, 47357693685760,
+STORE, 47357693685760, 47357693689855,
+STORE, 47357693689856, 47357693706239,
+STORE, 47357693693952, 47357693706239,
+STORE, 47357693689856, 47357693693951,
+ERASE, 47357693689856, 47357693689856,
+STORE, 47357693689856, 47357693693951,
+STORE, 47357693698048, 47357693706239,
+STORE, 47357693693952, 47357693698047,
+ERASE, 47357693693952, 47357693693952,
+STORE, 47357693693952, 47357693706239,
+ERASE, 47357693693952, 47357693693952,
+STORE, 47357693693952, 47357693698047,
+STORE, 47357693698048, 47357693706239,
+ERASE, 47357693698048, 47357693698048,
+STORE, 47357693698048, 47357693706239,
+STORE, 47357693706240, 47357693714431,
+ERASE, 47357693509632, 47357693509632,
+STORE, 47357693509632, 47357693526015,
+STORE, 47357693526016, 47357693534207,
+ERASE, 47357693698048, 47357693698048,
+STORE, 47357693698048, 47357693702143,
+STORE, 47357693702144, 47357693706239,
+ERASE, 47357693661184, 47357693661184,
+STORE, 47357693661184, 47357693665279,
+STORE, 47357693665280, 47357693669375,
+ERASE, 47357691490304, 47357691490304,
+STORE, 47357691490304, 47357691686911,
+STORE, 47357691686912, 47357691695103,
+ERASE, 47357688651776, 47357688651776,
+STORE, 47357688651776, 47357688655871,
+STORE, 47357688655872, 47357688659967,
+ERASE, 94240509046784, 94240509046784,
+STORE, 94240509046784, 94240509054975,
+STORE, 94240509054976, 94240509059071,
+ERASE, 140275106676736, 140275106676736,
+STORE, 140275106676736, 140275106680831,
+STORE, 140275106680832, 140275106684927,
+ERASE, 47357688479744, 47357688479744,
+STORE, 94240518361088, 94240518496255,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732688277504, 140737488351231,
+ERASE, 140732688277504, 140732688277504,
+STORE, 140732688277504, 140732688281599,
+STORE, 94629171351552, 94629172064255,
+ERASE, 94629171351552, 94629171351552,
+STORE, 94629171351552, 94629171400703,
+STORE, 94629171400704, 94629172064255,
+ERASE, 94629171400704, 94629171400704,
+STORE, 94629171400704, 94629171945471,
+STORE, 94629171945472, 94629172043775,
+STORE, 94629172043776, 94629172064255,
+STORE, 139770707644416, 139770707816447,
+ERASE, 139770707644416, 139770707644416,
+STORE, 139770707644416, 139770707648511,
+STORE, 139770707648512, 139770707816447,
+ERASE, 139770707648512, 139770707648512,
+STORE, 139770707648512, 139770707771391,
+STORE, 139770707771392, 139770707804159,
+STORE, 139770707804160, 139770707812351,
+STORE, 139770707812352, 139770707816447,
+STORE, 140732689121280, 140732689125375,
+STORE, 140732689108992, 140732689121279,
+STORE, 47862087352320, 47862087360511,
+STORE, 47862087360512, 47862087368703,
+STORE, 47862087368704, 47862087475199,
+STORE, 47862087385088, 47862087475199,
+STORE, 47862087368704, 47862087385087,
+ERASE, 47862087385088, 47862087385088,
+STORE, 47862087385088, 47862087458815,
+STORE, 47862087458816, 47862087475199,
+STORE, 47862087438336, 47862087458815,
+STORE, 47862087385088, 47862087438335,
+ERASE, 47862087385088, 47862087385088,
+STORE, 47862087385088, 47862087438335,
+STORE, 47862087454720, 47862087458815,
+STORE, 47862087438336, 47862087454719,
+ERASE, 47862087438336, 47862087438336,
+STORE, 47862087438336, 47862087454719,
+STORE, 47862087467008, 47862087475199,
+STORE, 47862087458816, 47862087467007,
+ERASE, 47862087458816, 47862087458816,
+STORE, 47862087458816, 47862087467007,
+ERASE, 47862087467008, 47862087467008,
+STORE, 47862087467008, 47862087475199,
+STORE, 47862087475200, 47862089314303,
+STORE, 47862087614464, 47862089314303,
+STORE, 47862087475200, 47862087614463,
+ERASE, 47862087614464, 47862087614464,
+STORE, 47862087614464, 47862089273343,
+STORE, 47862089273344, 47862089314303,
+STORE, 47862088957952, 47862089273343,
+STORE, 47862087614464, 47862088957951,
+ERASE, 47862087614464, 47862087614464,
+STORE, 47862087614464, 47862088957951,
+STORE, 47862089269248, 47862089273343,
+STORE, 47862088957952, 47862089269247,
+ERASE, 47862088957952, 47862088957952,
+STORE, 47862088957952, 47862089269247,
+STORE, 47862089297920, 47862089314303,
+STORE, 47862089273344, 47862089297919,
+ERASE, 47862089273344, 47862089273344,
+STORE, 47862089273344, 47862089297919,
+ERASE, 47862089297920, 47862089297920,
+STORE, 47862089297920, 47862089314303,
+STORE, 47862089297920, 47862089326591,
+ERASE, 47862089273344, 47862089273344,
+STORE, 47862089273344, 47862089289727,
+STORE, 47862089289728, 47862089297919,
+ERASE, 47862087458816, 47862087458816,
+STORE, 47862087458816, 47862087462911,
+STORE, 47862087462912, 47862087467007,
+ERASE, 94629172043776, 94629172043776,
+STORE, 94629172043776, 94629172060159,
+STORE, 94629172060160, 94629172064255,
+ERASE, 139770707804160, 139770707804160,
+STORE, 139770707804160, 139770707808255,
+STORE, 139770707808256, 139770707812351,
+ERASE, 47862087352320, 47862087352320,
+STORE, 94629197533184, 94629197668351,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727540711424, 140737488351231,
+ERASE, 140727540711424, 140727540711424,
+STORE, 140727540711424, 140727540715519,
+STORE, 94299865313280, 94299866025983,
+ERASE, 94299865313280, 94299865313280,
+STORE, 94299865313280, 94299865362431,
+STORE, 94299865362432, 94299866025983,
+ERASE, 94299865362432, 94299865362432,
+STORE, 94299865362432, 94299865907199,
+STORE, 94299865907200, 94299866005503,
+STORE, 94299866005504, 94299866025983,
+STORE, 140680268763136, 140680268935167,
+ERASE, 140680268763136, 140680268763136,
+STORE, 140680268763136, 140680268767231,
+STORE, 140680268767232, 140680268935167,
+ERASE, 140680268767232, 140680268767232,
+STORE, 140680268767232, 140680268890111,
+STORE, 140680268890112, 140680268922879,
+STORE, 140680268922880, 140680268931071,
+STORE, 140680268931072, 140680268935167,
+STORE, 140727541424128, 140727541428223,
+STORE, 140727541411840, 140727541424127,
+STORE, 46952526233600, 46952526241791,
+STORE, 46952526241792, 46952526249983,
+STORE, 46952526249984, 46952526356479,
+STORE, 46952526266368, 46952526356479,
+STORE, 46952526249984, 46952526266367,
+ERASE, 46952526266368, 46952526266368,
+STORE, 46952526266368, 46952526340095,
+STORE, 46952526340096, 46952526356479,
+STORE, 46952526319616, 46952526340095,
+STORE, 46952526266368, 46952526319615,
+ERASE, 46952526266368, 46952526266368,
+STORE, 46952526266368, 46952526319615,
+STORE, 46952526336000, 46952526340095,
+STORE, 46952526319616, 46952526335999,
+ERASE, 46952526319616, 46952526319616,
+STORE, 46952526319616, 46952526335999,
+STORE, 46952526348288, 46952526356479,
+STORE, 46952526340096, 46952526348287,
+ERASE, 46952526340096, 46952526340096,
+STORE, 46952526340096, 46952526348287,
+ERASE, 46952526348288, 46952526348288,
+STORE, 46952526348288, 46952526356479,
+STORE, 46952526356480, 46952528195583,
+STORE, 46952526495744, 46952528195583,
+STORE, 46952526356480, 46952526495743,
+ERASE, 46952526495744, 46952526495744,
+STORE, 46952526495744, 46952528154623,
+STORE, 46952528154624, 46952528195583,
+STORE, 46952527839232, 46952528154623,
+STORE, 46952526495744, 46952527839231,
+ERASE, 46952526495744, 46952526495744,
+STORE, 46952526495744, 46952527839231,
+STORE, 46952528150528, 46952528154623,
+STORE, 46952527839232, 46952528150527,
+ERASE, 46952527839232, 46952527839232,
+STORE, 46952527839232, 46952528150527,
+STORE, 46952528179200, 46952528195583,
+STORE, 46952528154624, 46952528179199,
+ERASE, 46952528154624, 46952528154624,
+STORE, 46952528154624, 46952528179199,
+ERASE, 46952528179200, 46952528179200,
+STORE, 46952528179200, 46952528195583,
+STORE, 46952528179200, 46952528207871,
+ERASE, 46952528154624, 46952528154624,
+STORE, 46952528154624, 46952528171007,
+STORE, 46952528171008, 46952528179199,
+ERASE, 46952526340096, 46952526340096,
+STORE, 46952526340096, 46952526344191,
+STORE, 46952526344192, 46952526348287,
+ERASE, 94299866005504, 94299866005504,
+STORE, 94299866005504, 94299866021887,
+STORE, 94299866021888, 94299866025983,
+ERASE, 140680268922880, 140680268922880,
+STORE, 140680268922880, 140680268926975,
+STORE, 140680268926976, 140680268931071,
+ERASE, 46952526233600, 46952526233600,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722874793984, 140737488351231,
+ERASE, 140722874793984, 140722874793984,
+STORE, 140722874793984, 140722874798079,
+STORE, 94448916213760, 94448916926463,
+ERASE, 94448916213760, 94448916213760,
+STORE, 94448916213760, 94448916262911,
+STORE, 94448916262912, 94448916926463,
+ERASE, 94448916262912, 94448916262912,
+STORE, 94448916262912, 94448916807679,
+STORE, 94448916807680, 94448916905983,
+STORE, 94448916905984, 94448916926463,
+STORE, 140389117046784, 140389117218815,
+ERASE, 140389117046784, 140389117046784,
+STORE, 140389117046784, 140389117050879,
+STORE, 140389117050880, 140389117218815,
+ERASE, 140389117050880, 140389117050880,
+STORE, 140389117050880, 140389117173759,
+STORE, 140389117173760, 140389117206527,
+STORE, 140389117206528, 140389117214719,
+STORE, 140389117214720, 140389117218815,
+STORE, 140722875297792, 140722875301887,
+STORE, 140722875285504, 140722875297791,
+STORE, 47243677949952, 47243677958143,
+STORE, 47243677958144, 47243677966335,
+STORE, 47243677966336, 47243678072831,
+STORE, 47243677982720, 47243678072831,
+STORE, 47243677966336, 47243677982719,
+ERASE, 47243677982720, 47243677982720,
+STORE, 47243677982720, 47243678056447,
+STORE, 47243678056448, 47243678072831,
+STORE, 47243678035968, 47243678056447,
+STORE, 47243677982720, 47243678035967,
+ERASE, 47243677982720, 47243677982720,
+STORE, 47243677982720, 47243678035967,
+STORE, 47243678052352, 47243678056447,
+STORE, 47243678035968, 47243678052351,
+ERASE, 47243678035968, 47243678035968,
+STORE, 47243678035968, 47243678052351,
+STORE, 47243678064640, 47243678072831,
+STORE, 47243678056448, 47243678064639,
+ERASE, 47243678056448, 47243678056448,
+STORE, 47243678056448, 47243678064639,
+ERASE, 47243678064640, 47243678064640,
+STORE, 47243678064640, 47243678072831,
+STORE, 47243678072832, 47243679911935,
+STORE, 47243678212096, 47243679911935,
+STORE, 47243678072832, 47243678212095,
+ERASE, 47243678212096, 47243678212096,
+STORE, 47243678212096, 47243679870975,
+STORE, 47243679870976, 47243679911935,
+STORE, 47243679555584, 47243679870975,
+STORE, 47243678212096, 47243679555583,
+ERASE, 47243678212096, 47243678212096,
+STORE, 47243678212096, 47243679555583,
+STORE, 47243679866880, 47243679870975,
+STORE, 47243679555584, 47243679866879,
+ERASE, 47243679555584, 47243679555584,
+STORE, 47243679555584, 47243679866879,
+STORE, 47243679895552, 47243679911935,
+STORE, 47243679870976, 47243679895551,
+ERASE, 47243679870976, 47243679870976,
+STORE, 47243679870976, 47243679895551,
+ERASE, 47243679895552, 47243679895552,
+STORE, 47243679895552, 47243679911935,
+STORE, 47243679895552, 47243679924223,
+ERASE, 47243679870976, 47243679870976,
+STORE, 47243679870976, 47243679887359,
+STORE, 47243679887360, 47243679895551,
+ERASE, 47243678056448, 47243678056448,
+STORE, 47243678056448, 47243678060543,
+STORE, 47243678060544, 47243678064639,
+ERASE, 94448916905984, 94448916905984,
+STORE, 94448916905984, 94448916922367,
+STORE, 94448916922368, 94448916926463,
+ERASE, 140389117206528, 140389117206528,
+STORE, 140389117206528, 140389117210623,
+STORE, 140389117210624, 140389117214719,
+ERASE, 47243677949952, 47243677949952,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733068505088, 140737488351231,
+ERASE, 140733068505088, 140733068505088,
+STORE, 140733068505088, 140733068509183,
+STORE, 94207145750528, 94207146463231,
+ERASE, 94207145750528, 94207145750528,
+STORE, 94207145750528, 94207145799679,
+STORE, 94207145799680, 94207146463231,
+ERASE, 94207145799680, 94207145799680,
+STORE, 94207145799680, 94207146344447,
+STORE, 94207146344448, 94207146442751,
+STORE, 94207146442752, 94207146463231,
+STORE, 140684504911872, 140684505083903,
+ERASE, 140684504911872, 140684504911872,
+STORE, 140684504911872, 140684504915967,
+STORE, 140684504915968, 140684505083903,
+ERASE, 140684504915968, 140684504915968,
+STORE, 140684504915968, 140684505038847,
+STORE, 140684505038848, 140684505071615,
+STORE, 140684505071616, 140684505079807,
+STORE, 140684505079808, 140684505083903,
+STORE, 140733068607488, 140733068611583,
+STORE, 140733068595200, 140733068607487,
+STORE, 46948290084864, 46948290093055,
+STORE, 46948290093056, 46948290101247,
+STORE, 46948290101248, 46948290207743,
+STORE, 46948290117632, 46948290207743,
+STORE, 46948290101248, 46948290117631,
+ERASE, 46948290117632, 46948290117632,
+STORE, 46948290117632, 46948290191359,
+STORE, 46948290191360, 46948290207743,
+STORE, 46948290170880, 46948290191359,
+STORE, 46948290117632, 46948290170879,
+ERASE, 46948290117632, 46948290117632,
+STORE, 46948290117632, 46948290170879,
+STORE, 46948290187264, 46948290191359,
+STORE, 46948290170880, 46948290187263,
+ERASE, 46948290170880, 46948290170880,
+STORE, 46948290170880, 46948290187263,
+STORE, 46948290199552, 46948290207743,
+STORE, 46948290191360, 46948290199551,
+ERASE, 46948290191360, 46948290191360,
+STORE, 46948290191360, 46948290199551,
+ERASE, 46948290199552, 46948290199552,
+STORE, 46948290199552, 46948290207743,
+STORE, 46948290207744, 46948292046847,
+STORE, 46948290347008, 46948292046847,
+STORE, 46948290207744, 46948290347007,
+ERASE, 46948290347008, 46948290347008,
+STORE, 46948290347008, 46948292005887,
+STORE, 46948292005888, 46948292046847,
+STORE, 46948291690496, 46948292005887,
+STORE, 46948290347008, 46948291690495,
+ERASE, 46948290347008, 46948290347008,
+STORE, 46948290347008, 46948291690495,
+STORE, 46948292001792, 46948292005887,
+STORE, 46948291690496, 46948292001791,
+ERASE, 46948291690496, 46948291690496,
+STORE, 46948291690496, 46948292001791,
+STORE, 46948292030464, 46948292046847,
+STORE, 46948292005888, 46948292030463,
+ERASE, 46948292005888, 46948292005888,
+STORE, 46948292005888, 46948292030463,
+ERASE, 46948292030464, 46948292030464,
+STORE, 46948292030464, 46948292046847,
+STORE, 46948292030464, 46948292059135,
+ERASE, 46948292005888, 46948292005888,
+STORE, 46948292005888, 46948292022271,
+STORE, 46948292022272, 46948292030463,
+ERASE, 46948290191360, 46948290191360,
+STORE, 46948290191360, 46948290195455,
+STORE, 46948290195456, 46948290199551,
+ERASE, 94207146442752, 94207146442752,
+STORE, 94207146442752, 94207146459135,
+STORE, 94207146459136, 94207146463231,
+ERASE, 140684505071616, 140684505071616,
+STORE, 140684505071616, 140684505075711,
+STORE, 140684505075712, 140684505079807,
+ERASE, 46948290084864, 46948290084864,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726367158272, 140737488351231,
+ERASE, 140726367158272, 140726367158272,
+STORE, 140726367158272, 140726367162367,
+STORE, 94436124106752, 94436124819455,
+ERASE, 94436124106752, 94436124106752,
+STORE, 94436124106752, 94436124155903,
+STORE, 94436124155904, 94436124819455,
+ERASE, 94436124155904, 94436124155904,
+STORE, 94436124155904, 94436124700671,
+STORE, 94436124700672, 94436124798975,
+STORE, 94436124798976, 94436124819455,
+STORE, 140049025044480, 140049025216511,
+ERASE, 140049025044480, 140049025044480,
+STORE, 140049025044480, 140049025048575,
+STORE, 140049025048576, 140049025216511,
+ERASE, 140049025048576, 140049025048576,
+STORE, 140049025048576, 140049025171455,
+STORE, 140049025171456, 140049025204223,
+STORE, 140049025204224, 140049025212415,
+STORE, 140049025212416, 140049025216511,
+STORE, 140726367256576, 140726367260671,
+STORE, 140726367244288, 140726367256575,
+STORE, 47583769952256, 47583769960447,
+STORE, 47583769960448, 47583769968639,
+STORE, 47583769968640, 47583770075135,
+STORE, 47583769985024, 47583770075135,
+STORE, 47583769968640, 47583769985023,
+ERASE, 47583769985024, 47583769985024,
+STORE, 47583769985024, 47583770058751,
+STORE, 47583770058752, 47583770075135,
+STORE, 47583770038272, 47583770058751,
+STORE, 47583769985024, 47583770038271,
+ERASE, 47583769985024, 47583769985024,
+STORE, 47583769985024, 47583770038271,
+STORE, 47583770054656, 47583770058751,
+STORE, 47583770038272, 47583770054655,
+ERASE, 47583770038272, 47583770038272,
+STORE, 47583770038272, 47583770054655,
+STORE, 47583770066944, 47583770075135,
+STORE, 47583770058752, 47583770066943,
+ERASE, 47583770058752, 47583770058752,
+STORE, 47583770058752, 47583770066943,
+ERASE, 47583770066944, 47583770066944,
+STORE, 47583770066944, 47583770075135,
+STORE, 47583770075136, 47583771914239,
+STORE, 47583770214400, 47583771914239,
+STORE, 47583770075136, 47583770214399,
+ERASE, 47583770214400, 47583770214400,
+STORE, 47583770214400, 47583771873279,
+STORE, 47583771873280, 47583771914239,
+STORE, 47583771557888, 47583771873279,
+STORE, 47583770214400, 47583771557887,
+ERASE, 47583770214400, 47583770214400,
+STORE, 47583770214400, 47583771557887,
+STORE, 47583771869184, 47583771873279,
+STORE, 47583771557888, 47583771869183,
+ERASE, 47583771557888, 47583771557888,
+STORE, 47583771557888, 47583771869183,
+STORE, 47583771897856, 47583771914239,
+STORE, 47583771873280, 47583771897855,
+ERASE, 47583771873280, 47583771873280,
+STORE, 47583771873280, 47583771897855,
+ERASE, 47583771897856, 47583771897856,
+STORE, 47583771897856, 47583771914239,
+STORE, 47583771897856, 47583771926527,
+ERASE, 47583771873280, 47583771873280,
+STORE, 47583771873280, 47583771889663,
+STORE, 47583771889664, 47583771897855,
+ERASE, 47583770058752, 47583770058752,
+STORE, 47583770058752, 47583770062847,
+STORE, 47583770062848, 47583770066943,
+ERASE, 94436124798976, 94436124798976,
+STORE, 94436124798976, 94436124815359,
+STORE, 94436124815360, 94436124819455,
+ERASE, 140049025204224, 140049025204224,
+STORE, 140049025204224, 140049025208319,
+STORE, 140049025208320, 140049025212415,
+ERASE, 47583769952256, 47583769952256,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727116099584, 140737488351231,
+ERASE, 140727116099584, 140727116099584,
+STORE, 140727116099584, 140727116103679,
+STORE, 94166319734784, 94166320447487,
+ERASE, 94166319734784, 94166319734784,
+STORE, 94166319734784, 94166319783935,
+STORE, 94166319783936, 94166320447487,
+ERASE, 94166319783936, 94166319783936,
+STORE, 94166319783936, 94166320328703,
+STORE, 94166320328704, 94166320427007,
+STORE, 94166320427008, 94166320447487,
+STORE, 139976559542272, 139976559714303,
+ERASE, 139976559542272, 139976559542272,
+STORE, 139976559542272, 139976559546367,
+STORE, 139976559546368, 139976559714303,
+ERASE, 139976559546368, 139976559546368,
+STORE, 139976559546368, 139976559669247,
+STORE, 139976559669248, 139976559702015,
+STORE, 139976559702016, 139976559710207,
+STORE, 139976559710208, 139976559714303,
+STORE, 140727116222464, 140727116226559,
+STORE, 140727116210176, 140727116222463,
+STORE, 47656235454464, 47656235462655,
+STORE, 47656235462656, 47656235470847,
+STORE, 47656235470848, 47656235577343,
+STORE, 47656235487232, 47656235577343,
+STORE, 47656235470848, 47656235487231,
+ERASE, 47656235487232, 47656235487232,
+STORE, 47656235487232, 47656235560959,
+STORE, 47656235560960, 47656235577343,
+STORE, 47656235540480, 47656235560959,
+STORE, 47656235487232, 47656235540479,
+ERASE, 47656235487232, 47656235487232,
+STORE, 47656235487232, 47656235540479,
+STORE, 47656235556864, 47656235560959,
+STORE, 47656235540480, 47656235556863,
+ERASE, 47656235540480, 47656235540480,
+STORE, 47656235540480, 47656235556863,
+STORE, 47656235569152, 47656235577343,
+STORE, 47656235560960, 47656235569151,
+ERASE, 47656235560960, 47656235560960,
+STORE, 47656235560960, 47656235569151,
+ERASE, 47656235569152, 47656235569152,
+STORE, 47656235569152, 47656235577343,
+STORE, 47656235577344, 47656237416447,
+STORE, 47656235716608, 47656237416447,
+STORE, 47656235577344, 47656235716607,
+ERASE, 47656235716608, 47656235716608,
+STORE, 47656235716608, 47656237375487,
+STORE, 47656237375488, 47656237416447,
+STORE, 47656237060096, 47656237375487,
+STORE, 47656235716608, 47656237060095,
+ERASE, 47656235716608, 47656235716608,
+STORE, 47656235716608, 47656237060095,
+STORE, 47656237371392, 47656237375487,
+STORE, 47656237060096, 47656237371391,
+ERASE, 47656237060096, 47656237060096,
+STORE, 47656237060096, 47656237371391,
+STORE, 47656237400064, 47656237416447,
+STORE, 47656237375488, 47656237400063,
+ERASE, 47656237375488, 47656237375488,
+STORE, 47656237375488, 47656237400063,
+ERASE, 47656237400064, 47656237400064,
+STORE, 47656237400064, 47656237416447,
+STORE, 47656237400064, 47656237428735,
+ERASE, 47656237375488, 47656237375488,
+STORE, 47656237375488, 47656237391871,
+STORE, 47656237391872, 47656237400063,
+ERASE, 47656235560960, 47656235560960,
+STORE, 47656235560960, 47656235565055,
+STORE, 47656235565056, 47656235569151,
+ERASE, 94166320427008, 94166320427008,
+STORE, 94166320427008, 94166320443391,
+STORE, 94166320443392, 94166320447487,
+ERASE, 139976559702016, 139976559702016,
+STORE, 139976559702016, 139976559706111,
+STORE, 139976559706112, 139976559710207,
+ERASE, 47656235454464, 47656235454464,
+STORE, 94166332153856, 94166332289023,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726412816384, 140737488351231,
+ERASE, 140726412816384, 140726412816384,
+STORE, 140726412816384, 140726412820479,
+STORE, 94094884507648, 94094885220351,
+ERASE, 94094884507648, 94094884507648,
+STORE, 94094884507648, 94094884556799,
+STORE, 94094884556800, 94094885220351,
+ERASE, 94094884556800, 94094884556800,
+STORE, 94094884556800, 94094885101567,
+STORE, 94094885101568, 94094885199871,
+STORE, 94094885199872, 94094885220351,
+STORE, 139773773938688, 139773774110719,
+ERASE, 139773773938688, 139773773938688,
+STORE, 139773773938688, 139773773942783,
+STORE, 139773773942784, 139773774110719,
+ERASE, 139773773942784, 139773773942784,
+STORE, 139773773942784, 139773774065663,
+STORE, 139773774065664, 139773774098431,
+STORE, 139773774098432, 139773774106623,
+STORE, 139773774106624, 139773774110719,
+STORE, 140726412963840, 140726412967935,
+STORE, 140726412951552, 140726412963839,
+STORE, 47859021058048, 47859021066239,
+STORE, 47859021066240, 47859021074431,
+STORE, 47859021074432, 47859021180927,
+STORE, 47859021090816, 47859021180927,
+STORE, 47859021074432, 47859021090815,
+ERASE, 47859021090816, 47859021090816,
+STORE, 47859021090816, 47859021164543,
+STORE, 47859021164544, 47859021180927,
+STORE, 47859021144064, 47859021164543,
+STORE, 47859021090816, 47859021144063,
+ERASE, 47859021090816, 47859021090816,
+STORE, 47859021090816, 47859021144063,
+STORE, 47859021160448, 47859021164543,
+STORE, 47859021144064, 47859021160447,
+ERASE, 47859021144064, 47859021144064,
+STORE, 47859021144064, 47859021160447,
+STORE, 47859021172736, 47859021180927,
+STORE, 47859021164544, 47859021172735,
+ERASE, 47859021164544, 47859021164544,
+STORE, 47859021164544, 47859021172735,
+ERASE, 47859021172736, 47859021172736,
+STORE, 47859021172736, 47859021180927,
+STORE, 47859021180928, 47859023020031,
+STORE, 47859021320192, 47859023020031,
+STORE, 47859021180928, 47859021320191,
+ERASE, 47859021320192, 47859021320192,
+STORE, 47859021320192, 47859022979071,
+STORE, 47859022979072, 47859023020031,
+STORE, 47859022663680, 47859022979071,
+STORE, 47859021320192, 47859022663679,
+ERASE, 47859021320192, 47859021320192,
+STORE, 47859021320192, 47859022663679,
+STORE, 47859022974976, 47859022979071,
+STORE, 47859022663680, 47859022974975,
+ERASE, 47859022663680, 47859022663680,
+STORE, 47859022663680, 47859022974975,
+STORE, 47859023003648, 47859023020031,
+STORE, 47859022979072, 47859023003647,
+ERASE, 47859022979072, 47859022979072,
+STORE, 47859022979072, 47859023003647,
+ERASE, 47859023003648, 47859023003648,
+STORE, 47859023003648, 47859023020031,
+STORE, 47859023003648, 47859023032319,
+ERASE, 47859022979072, 47859022979072,
+STORE, 47859022979072, 47859022995455,
+STORE, 47859022995456, 47859023003647,
+ERASE, 47859021164544, 47859021164544,
+STORE, 47859021164544, 47859021168639,
+STORE, 47859021168640, 47859021172735,
+ERASE, 94094885199872, 94094885199872,
+STORE, 94094885199872, 94094885216255,
+STORE, 94094885216256, 94094885220351,
+ERASE, 139773774098432, 139773774098432,
+STORE, 139773774098432, 139773774102527,
+STORE, 139773774102528, 139773774106623,
+ERASE, 47859021058048, 47859021058048,
+STORE, 94094901108736, 94094901243903,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736567963648, 140737488351231,
+ERASE, 140736567963648, 140736567963648,
+STORE, 140736567963648, 140736567967743,
+STORE, 94924425748480, 94924426461183,
+ERASE, 94924425748480, 94924425748480,
+STORE, 94924425748480, 94924425797631,
+STORE, 94924425797632, 94924426461183,
+ERASE, 94924425797632, 94924425797632,
+STORE, 94924425797632, 94924426342399,
+STORE, 94924426342400, 94924426440703,
+STORE, 94924426440704, 94924426461183,
+STORE, 140042126319616, 140042126491647,
+ERASE, 140042126319616, 140042126319616,
+STORE, 140042126319616, 140042126323711,
+STORE, 140042126323712, 140042126491647,
+ERASE, 140042126323712, 140042126323712,
+STORE, 140042126323712, 140042126446591,
+STORE, 140042126446592, 140042126479359,
+STORE, 140042126479360, 140042126487551,
+STORE, 140042126487552, 140042126491647,
+STORE, 140736568672256, 140736568676351,
+STORE, 140736568659968, 140736568672255,
+STORE, 47590668677120, 47590668685311,
+STORE, 47590668685312, 47590668693503,
+STORE, 47590668693504, 47590668799999,
+STORE, 47590668709888, 47590668799999,
+STORE, 47590668693504, 47590668709887,
+ERASE, 47590668709888, 47590668709888,
+STORE, 47590668709888, 47590668783615,
+STORE, 47590668783616, 47590668799999,
+STORE, 47590668763136, 47590668783615,
+STORE, 47590668709888, 47590668763135,
+ERASE, 47590668709888, 47590668709888,
+STORE, 47590668709888, 47590668763135,
+STORE, 47590668779520, 47590668783615,
+STORE, 47590668763136, 47590668779519,
+ERASE, 47590668763136, 47590668763136,
+STORE, 47590668763136, 47590668779519,
+STORE, 47590668791808, 47590668799999,
+STORE, 47590668783616, 47590668791807,
+ERASE, 47590668783616, 47590668783616,
+STORE, 47590668783616, 47590668791807,
+ERASE, 47590668791808, 47590668791808,
+STORE, 47590668791808, 47590668799999,
+STORE, 47590668800000, 47590670639103,
+STORE, 47590668939264, 47590670639103,
+STORE, 47590668800000, 47590668939263,
+ERASE, 47590668939264, 47590668939264,
+STORE, 47590668939264, 47590670598143,
+STORE, 47590670598144, 47590670639103,
+STORE, 47590670282752, 47590670598143,
+STORE, 47590668939264, 47590670282751,
+ERASE, 47590668939264, 47590668939264,
+STORE, 47590668939264, 47590670282751,
+STORE, 47590670594048, 47590670598143,
+STORE, 47590670282752, 47590670594047,
+ERASE, 47590670282752, 47590670282752,
+STORE, 47590670282752, 47590670594047,
+STORE, 47590670622720, 47590670639103,
+STORE, 47590670598144, 47590670622719,
+ERASE, 47590670598144, 47590670598144,
+STORE, 47590670598144, 47590670622719,
+ERASE, 47590670622720, 47590670622720,
+STORE, 47590670622720, 47590670639103,
+STORE, 47590670622720, 47590670651391,
+ERASE, 47590670598144, 47590670598144,
+STORE, 47590670598144, 47590670614527,
+STORE, 47590670614528, 47590670622719,
+ERASE, 47590668783616, 47590668783616,
+STORE, 47590668783616, 47590668787711,
+STORE, 47590668787712, 47590668791807,
+ERASE, 94924426440704, 94924426440704,
+STORE, 94924426440704, 94924426457087,
+STORE, 94924426457088, 94924426461183,
+ERASE, 140042126479360, 140042126479360,
+STORE, 140042126479360, 140042126483455,
+STORE, 140042126483456, 140042126487551,
+ERASE, 47590668677120, 47590668677120,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733281439744, 140737488351231,
+ERASE, 140733281439744, 140733281439744,
+STORE, 140733281439744, 140733281443839,
+STORE, 94490667069440, 94490667782143,
+ERASE, 94490667069440, 94490667069440,
+STORE, 94490667069440, 94490667118591,
+STORE, 94490667118592, 94490667782143,
+ERASE, 94490667118592, 94490667118592,
+STORE, 94490667118592, 94490667663359,
+STORE, 94490667663360, 94490667761663,
+STORE, 94490667761664, 94490667782143,
+STORE, 139878215118848, 139878215290879,
+ERASE, 139878215118848, 139878215118848,
+STORE, 139878215118848, 139878215122943,
+STORE, 139878215122944, 139878215290879,
+ERASE, 139878215122944, 139878215122944,
+STORE, 139878215122944, 139878215245823,
+STORE, 139878215245824, 139878215278591,
+STORE, 139878215278592, 139878215286783,
+STORE, 139878215286784, 139878215290879,
+STORE, 140733281464320, 140733281468415,
+STORE, 140733281452032, 140733281464319,
+STORE, 47754579877888, 47754579886079,
+STORE, 47754579886080, 47754579894271,
+STORE, 47754579894272, 47754580000767,
+STORE, 47754579910656, 47754580000767,
+STORE, 47754579894272, 47754579910655,
+ERASE, 47754579910656, 47754579910656,
+STORE, 47754579910656, 47754579984383,
+STORE, 47754579984384, 47754580000767,
+STORE, 47754579963904, 47754579984383,
+STORE, 47754579910656, 47754579963903,
+ERASE, 47754579910656, 47754579910656,
+STORE, 47754579910656, 47754579963903,
+STORE, 47754579980288, 47754579984383,
+STORE, 47754579963904, 47754579980287,
+ERASE, 47754579963904, 47754579963904,
+STORE, 47754579963904, 47754579980287,
+STORE, 47754579992576, 47754580000767,
+STORE, 47754579984384, 47754579992575,
+ERASE, 47754579984384, 47754579984384,
+STORE, 47754579984384, 47754579992575,
+ERASE, 47754579992576, 47754579992576,
+STORE, 47754579992576, 47754580000767,
+STORE, 47754580000768, 47754581839871,
+STORE, 47754580140032, 47754581839871,
+STORE, 47754580000768, 47754580140031,
+ERASE, 47754580140032, 47754580140032,
+STORE, 47754580140032, 47754581798911,
+STORE, 47754581798912, 47754581839871,
+STORE, 47754581483520, 47754581798911,
+STORE, 47754580140032, 47754581483519,
+ERASE, 47754580140032, 47754580140032,
+STORE, 47754580140032, 47754581483519,
+STORE, 47754581794816, 47754581798911,
+STORE, 47754581483520, 47754581794815,
+ERASE, 47754581483520, 47754581483520,
+STORE, 47754581483520, 47754581794815,
+STORE, 47754581823488, 47754581839871,
+STORE, 47754581798912, 47754581823487,
+ERASE, 47754581798912, 47754581798912,
+STORE, 47754581798912, 47754581823487,
+ERASE, 47754581823488, 47754581823488,
+STORE, 47754581823488, 47754581839871,
+STORE, 47754581823488, 47754581852159,
+ERASE, 47754581798912, 47754581798912,
+STORE, 47754581798912, 47754581815295,
+STORE, 47754581815296, 47754581823487,
+ERASE, 47754579984384, 47754579984384,
+STORE, 47754579984384, 47754579988479,
+STORE, 47754579988480, 47754579992575,
+ERASE, 94490667761664, 94490667761664,
+STORE, 94490667761664, 94490667778047,
+STORE, 94490667778048, 94490667782143,
+ERASE, 139878215278592, 139878215278592,
+STORE, 139878215278592, 139878215282687,
+STORE, 139878215282688, 139878215286783,
+ERASE, 47754579877888, 47754579877888,
+STORE, 94490669649920, 94490669785087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140735382188032, 140737488351231,
+ERASE, 140735382188032, 140735382188032,
+STORE, 140735382188032, 140735382192127,
+STORE, 94150181302272, 94150182014975,
+ERASE, 94150181302272, 94150181302272,
+STORE, 94150181302272, 94150181351423,
+STORE, 94150181351424, 94150182014975,
+ERASE, 94150181351424, 94150181351424,
+STORE, 94150181351424, 94150181896191,
+STORE, 94150181896192, 94150181994495,
+STORE, 94150181994496, 94150182014975,
+STORE, 139679752458240, 139679752630271,
+ERASE, 139679752458240, 139679752458240,
+STORE, 139679752458240, 139679752462335,
+STORE, 139679752462336, 139679752630271,
+ERASE, 139679752462336, 139679752462336,
+STORE, 139679752462336, 139679752585215,
+STORE, 139679752585216, 139679752617983,
+STORE, 139679752617984, 139679752626175,
+STORE, 139679752626176, 139679752630271,
+STORE, 140735382536192, 140735382540287,
+STORE, 140735382523904, 140735382536191,
+STORE, 47953042538496, 47953042546687,
+STORE, 47953042546688, 47953042554879,
+STORE, 47953042554880, 47953042661375,
+STORE, 47953042571264, 47953042661375,
+STORE, 47953042554880, 47953042571263,
+ERASE, 47953042571264, 47953042571264,
+STORE, 47953042571264, 47953042644991,
+STORE, 47953042644992, 47953042661375,
+STORE, 47953042624512, 47953042644991,
+STORE, 47953042571264, 47953042624511,
+ERASE, 47953042571264, 47953042571264,
+STORE, 47953042571264, 47953042624511,
+STORE, 47953042640896, 47953042644991,
+STORE, 47953042624512, 47953042640895,
+ERASE, 47953042624512, 47953042624512,
+STORE, 47953042624512, 47953042640895,
+STORE, 47953042653184, 47953042661375,
+STORE, 47953042644992, 47953042653183,
+ERASE, 47953042644992, 47953042644992,
+STORE, 47953042644992, 47953042653183,
+ERASE, 47953042653184, 47953042653184,
+STORE, 47953042653184, 47953042661375,
+STORE, 47953042661376, 47953044500479,
+STORE, 47953042800640, 47953044500479,
+STORE, 47953042661376, 47953042800639,
+ERASE, 47953042800640, 47953042800640,
+STORE, 47953042800640, 47953044459519,
+STORE, 47953044459520, 47953044500479,
+STORE, 47953044144128, 47953044459519,
+STORE, 47953042800640, 47953044144127,
+ERASE, 47953042800640, 47953042800640,
+STORE, 47953042800640, 47953044144127,
+STORE, 47953044455424, 47953044459519,
+STORE, 47953044144128, 47953044455423,
+ERASE, 47953044144128, 47953044144128,
+STORE, 47953044144128, 47953044455423,
+STORE, 47953044484096, 47953044500479,
+STORE, 47953044459520, 47953044484095,
+ERASE, 47953044459520, 47953044459520,
+STORE, 47953044459520, 47953044484095,
+ERASE, 47953044484096, 47953044484096,
+STORE, 47953044484096, 47953044500479,
+STORE, 47953044484096, 47953044512767,
+ERASE, 47953044459520, 47953044459520,
+STORE, 47953044459520, 47953044475903,
+STORE, 47953044475904, 47953044484095,
+ERASE, 47953042644992, 47953042644992,
+STORE, 47953042644992, 47953042649087,
+STORE, 47953042649088, 47953042653183,
+ERASE, 94150181994496, 94150181994496,
+STORE, 94150181994496, 94150182010879,
+STORE, 94150182010880, 94150182014975,
+ERASE, 139679752617984, 139679752617984,
+STORE, 139679752617984, 139679752622079,
+STORE, 139679752622080, 139679752626175,
+ERASE, 47953042538496, 47953042538496,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737044123648, 140737488351231,
+ERASE, 140737044123648, 140737044123648,
+STORE, 140737044123648, 140737044127743,
+STORE, 94425324294144, 94425325006847,
+ERASE, 94425324294144, 94425324294144,
+STORE, 94425324294144, 94425324343295,
+STORE, 94425324343296, 94425325006847,
+ERASE, 94425324343296, 94425324343296,
+STORE, 94425324343296, 94425324888063,
+STORE, 94425324888064, 94425324986367,
+STORE, 94425324986368, 94425325006847,
+STORE, 140382015016960, 140382015188991,
+ERASE, 140382015016960, 140382015016960,
+STORE, 140382015016960, 140382015021055,
+STORE, 140382015021056, 140382015188991,
+ERASE, 140382015021056, 140382015021056,
+STORE, 140382015021056, 140382015143935,
+STORE, 140382015143936, 140382015176703,
+STORE, 140382015176704, 140382015184895,
+STORE, 140382015184896, 140382015188991,
+STORE, 140737045585920, 140737045590015,
+STORE, 140737045573632, 140737045585919,
+STORE, 47250779979776, 47250779987967,
+STORE, 47250779987968, 47250779996159,
+STORE, 47250779996160, 47250780102655,
+STORE, 47250780012544, 47250780102655,
+STORE, 47250779996160, 47250780012543,
+ERASE, 47250780012544, 47250780012544,
+STORE, 47250780012544, 47250780086271,
+STORE, 47250780086272, 47250780102655,
+STORE, 47250780065792, 47250780086271,
+STORE, 47250780012544, 47250780065791,
+ERASE, 47250780012544, 47250780012544,
+STORE, 47250780012544, 47250780065791,
+STORE, 47250780082176, 47250780086271,
+STORE, 47250780065792, 47250780082175,
+ERASE, 47250780065792, 47250780065792,
+STORE, 47250780065792, 47250780082175,
+STORE, 47250780094464, 47250780102655,
+STORE, 47250780086272, 47250780094463,
+ERASE, 47250780086272, 47250780086272,
+STORE, 47250780086272, 47250780094463,
+ERASE, 47250780094464, 47250780094464,
+STORE, 47250780094464, 47250780102655,
+STORE, 47250780102656, 47250781941759,
+STORE, 47250780241920, 47250781941759,
+STORE, 47250780102656, 47250780241919,
+ERASE, 47250780241920, 47250780241920,
+STORE, 47250780241920, 47250781900799,
+STORE, 47250781900800, 47250781941759,
+STORE, 47250781585408, 47250781900799,
+STORE, 47250780241920, 47250781585407,
+ERASE, 47250780241920, 47250780241920,
+STORE, 47250780241920, 47250781585407,
+STORE, 47250781896704, 47250781900799,
+STORE, 47250781585408, 47250781896703,
+ERASE, 47250781585408, 47250781585408,
+STORE, 47250781585408, 47250781896703,
+STORE, 47250781925376, 47250781941759,
+STORE, 47250781900800, 47250781925375,
+ERASE, 47250781900800, 47250781900800,
+STORE, 47250781900800, 47250781925375,
+ERASE, 47250781925376, 47250781925376,
+STORE, 47250781925376, 47250781941759,
+STORE, 47250781925376, 47250781954047,
+ERASE, 47250781900800, 47250781900800,
+STORE, 47250781900800, 47250781917183,
+STORE, 47250781917184, 47250781925375,
+ERASE, 47250780086272, 47250780086272,
+STORE, 47250780086272, 47250780090367,
+STORE, 47250780090368, 47250780094463,
+ERASE, 94425324986368, 94425324986368,
+STORE, 94425324986368, 94425325002751,
+STORE, 94425325002752, 94425325006847,
+ERASE, 140382015176704, 140382015176704,
+STORE, 140382015176704, 140382015180799,
+STORE, 140382015180800, 140382015184895,
+ERASE, 47250779979776, 47250779979776,
+STORE, 94425351438336, 94425351573503,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736801144832, 140737488351231,
+ERASE, 140736801144832, 140736801144832,
+STORE, 140736801144832, 140736801148927,
+STORE, 94629429358592, 94629430071295,
+ERASE, 94629429358592, 94629429358592,
+STORE, 94629429358592, 94629429407743,
+STORE, 94629429407744, 94629430071295,
+ERASE, 94629429407744, 94629429407744,
+STORE, 94629429407744, 94629429952511,
+STORE, 94629429952512, 94629430050815,
+STORE, 94629430050816, 94629430071295,
+STORE, 139801685483520, 139801685655551,
+ERASE, 139801685483520, 139801685483520,
+STORE, 139801685483520, 139801685487615,
+STORE, 139801685487616, 139801685655551,
+ERASE, 139801685487616, 139801685487616,
+STORE, 139801685487616, 139801685610495,
+STORE, 139801685610496, 139801685643263,
+STORE, 139801685643264, 139801685651455,
+STORE, 139801685651456, 139801685655551,
+STORE, 140736801198080, 140736801202175,
+STORE, 140736801185792, 140736801198079,
+STORE, 47831109513216, 47831109521407,
+STORE, 47831109521408, 47831109529599,
+STORE, 47831109529600, 47831109636095,
+STORE, 47831109545984, 47831109636095,
+STORE, 47831109529600, 47831109545983,
+ERASE, 47831109545984, 47831109545984,
+STORE, 47831109545984, 47831109619711,
+STORE, 47831109619712, 47831109636095,
+STORE, 47831109599232, 47831109619711,
+STORE, 47831109545984, 47831109599231,
+ERASE, 47831109545984, 47831109545984,
+STORE, 47831109545984, 47831109599231,
+STORE, 47831109615616, 47831109619711,
+STORE, 47831109599232, 47831109615615,
+ERASE, 47831109599232, 47831109599232,
+STORE, 47831109599232, 47831109615615,
+STORE, 47831109627904, 47831109636095,
+STORE, 47831109619712, 47831109627903,
+ERASE, 47831109619712, 47831109619712,
+STORE, 47831109619712, 47831109627903,
+ERASE, 47831109627904, 47831109627904,
+STORE, 47831109627904, 47831109636095,
+STORE, 47831109636096, 47831111475199,
+STORE, 47831109775360, 47831111475199,
+STORE, 47831109636096, 47831109775359,
+ERASE, 47831109775360, 47831109775360,
+STORE, 47831109775360, 47831111434239,
+STORE, 47831111434240, 47831111475199,
+STORE, 47831111118848, 47831111434239,
+STORE, 47831109775360, 47831111118847,
+ERASE, 47831109775360, 47831109775360,
+STORE, 47831109775360, 47831111118847,
+STORE, 47831111430144, 47831111434239,
+STORE, 47831111118848, 47831111430143,
+ERASE, 47831111118848, 47831111118848,
+STORE, 47831111118848, 47831111430143,
+STORE, 47831111458816, 47831111475199,
+STORE, 47831111434240, 47831111458815,
+ERASE, 47831111434240, 47831111434240,
+STORE, 47831111434240, 47831111458815,
+ERASE, 47831111458816, 47831111458816,
+STORE, 47831111458816, 47831111475199,
+STORE, 47831111458816, 47831111487487,
+ERASE, 47831111434240, 47831111434240,
+STORE, 47831111434240, 47831111450623,
+STORE, 47831111450624, 47831111458815,
+ERASE, 47831109619712, 47831109619712,
+STORE, 47831109619712, 47831109623807,
+STORE, 47831109623808, 47831109627903,
+ERASE, 94629430050816, 94629430050816,
+STORE, 94629430050816, 94629430067199,
+STORE, 94629430067200, 94629430071295,
+ERASE, 139801685643264, 139801685643264,
+STORE, 139801685643264, 139801685647359,
+STORE, 139801685647360, 139801685651455,
+ERASE, 47831109513216, 47831109513216,
+STORE, 140737488347136, 140737488351231,
+STORE, 140729419612160, 140737488351231,
+ERASE, 140729419612160, 140729419612160,
+STORE, 140729419612160, 140729419616255,
+STORE, 94443354148864, 94443354861567,
+ERASE, 94443354148864, 94443354148864,
+STORE, 94443354148864, 94443354198015,
+STORE, 94443354198016, 94443354861567,
+ERASE, 94443354198016, 94443354198016,
+STORE, 94443354198016, 94443354742783,
+STORE, 94443354742784, 94443354841087,
+STORE, 94443354841088, 94443354861567,
+STORE, 139741700038656, 139741700210687,
+ERASE, 139741700038656, 139741700038656,
+STORE, 139741700038656, 139741700042751,
+STORE, 139741700042752, 139741700210687,
+ERASE, 139741700042752, 139741700042752,
+STORE, 139741700042752, 139741700165631,
+STORE, 139741700165632, 139741700198399,
+STORE, 139741700198400, 139741700206591,
+STORE, 139741700206592, 139741700210687,
+STORE, 140729420574720, 140729420578815,
+STORE, 140729420562432, 140729420574719,
+STORE, 47891094958080, 47891094966271,
+STORE, 47891094966272, 47891094974463,
+STORE, 47891094974464, 47891095080959,
+STORE, 47891094990848, 47891095080959,
+STORE, 47891094974464, 47891094990847,
+ERASE, 47891094990848, 47891094990848,
+STORE, 47891094990848, 47891095064575,
+STORE, 47891095064576, 47891095080959,
+STORE, 47891095044096, 47891095064575,
+STORE, 47891094990848, 47891095044095,
+ERASE, 47891094990848, 47891094990848,
+STORE, 47891094990848, 47891095044095,
+STORE, 47891095060480, 47891095064575,
+STORE, 47891095044096, 47891095060479,
+ERASE, 47891095044096, 47891095044096,
+STORE, 47891095044096, 47891095060479,
+STORE, 47891095072768, 47891095080959,
+STORE, 47891095064576, 47891095072767,
+ERASE, 47891095064576, 47891095064576,
+STORE, 47891095064576, 47891095072767,
+ERASE, 47891095072768, 47891095072768,
+STORE, 47891095072768, 47891095080959,
+STORE, 47891095080960, 47891096920063,
+STORE, 47891095220224, 47891096920063,
+STORE, 47891095080960, 47891095220223,
+ERASE, 47891095220224, 47891095220224,
+STORE, 47891095220224, 47891096879103,
+STORE, 47891096879104, 47891096920063,
+STORE, 47891096563712, 47891096879103,
+STORE, 47891095220224, 47891096563711,
+ERASE, 47891095220224, 47891095220224,
+STORE, 47891095220224, 47891096563711,
+STORE, 47891096875008, 47891096879103,
+STORE, 47891096563712, 47891096875007,
+ERASE, 47891096563712, 47891096563712,
+STORE, 47891096563712, 47891096875007,
+STORE, 47891096903680, 47891096920063,
+STORE, 47891096879104, 47891096903679,
+ERASE, 47891096879104, 47891096879104,
+STORE, 47891096879104, 47891096903679,
+ERASE, 47891096903680, 47891096903680,
+STORE, 47891096903680, 47891096920063,
+STORE, 47891096903680, 47891096932351,
+ERASE, 47891096879104, 47891096879104,
+STORE, 47891096879104, 47891096895487,
+STORE, 47891096895488, 47891096903679,
+ERASE, 47891095064576, 47891095064576,
+STORE, 47891095064576, 47891095068671,
+STORE, 47891095068672, 47891095072767,
+ERASE, 94443354841088, 94443354841088,
+STORE, 94443354841088, 94443354857471,
+STORE, 94443354857472, 94443354861567,
+ERASE, 139741700198400, 139741700198400,
+STORE, 139741700198400, 139741700202495,
+STORE, 139741700202496, 139741700206591,
+ERASE, 47891094958080, 47891094958080,
+STORE, 94443360825344, 94443360960511,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722961661952, 140737488351231,
+ERASE, 140722961661952, 140722961661952,
+STORE, 140722961661952, 140722961666047,
+STORE, 94878388944896, 94878389657599,
+ERASE, 94878388944896, 94878388944896,
+STORE, 94878388944896, 94878388994047,
+STORE, 94878388994048, 94878389657599,
+ERASE, 94878388994048, 94878388994048,
+STORE, 94878388994048, 94878389538815,
+STORE, 94878389538816, 94878389637119,
+STORE, 94878389637120, 94878389657599,
+STORE, 140210690056192, 140210690228223,
+ERASE, 140210690056192, 140210690056192,
+STORE, 140210690056192, 140210690060287,
+STORE, 140210690060288, 140210690228223,
+ERASE, 140210690060288, 140210690060288,
+STORE, 140210690060288, 140210690183167,
+STORE, 140210690183168, 140210690215935,
+STORE, 140210690215936, 140210690224127,
+STORE, 140210690224128, 140210690228223,
+STORE, 140722963148800, 140722963152895,
+STORE, 140722963136512, 140722963148799,
+STORE, 47422104940544, 47422104948735,
+STORE, 47422104948736, 47422104956927,
+STORE, 47422104956928, 47422105063423,
+STORE, 47422104973312, 47422105063423,
+STORE, 47422104956928, 47422104973311,
+ERASE, 47422104973312, 47422104973312,
+STORE, 47422104973312, 47422105047039,
+STORE, 47422105047040, 47422105063423,
+STORE, 47422105026560, 47422105047039,
+STORE, 47422104973312, 47422105026559,
+ERASE, 47422104973312, 47422104973312,
+STORE, 47422104973312, 47422105026559,
+STORE, 47422105042944, 47422105047039,
+STORE, 47422105026560, 47422105042943,
+ERASE, 47422105026560, 47422105026560,
+STORE, 47422105026560, 47422105042943,
+STORE, 47422105055232, 47422105063423,
+STORE, 47422105047040, 47422105055231,
+ERASE, 47422105047040, 47422105047040,
+STORE, 47422105047040, 47422105055231,
+ERASE, 47422105055232, 47422105055232,
+STORE, 47422105055232, 47422105063423,
+STORE, 47422105063424, 47422106902527,
+STORE, 47422105202688, 47422106902527,
+STORE, 47422105063424, 47422105202687,
+ERASE, 47422105202688, 47422105202688,
+STORE, 47422105202688, 47422106861567,
+STORE, 47422106861568, 47422106902527,
+STORE, 47422106546176, 47422106861567,
+STORE, 47422105202688, 47422106546175,
+ERASE, 47422105202688, 47422105202688,
+STORE, 47422105202688, 47422106546175,
+STORE, 47422106857472, 47422106861567,
+STORE, 47422106546176, 47422106857471,
+ERASE, 47422106546176, 47422106546176,
+STORE, 47422106546176, 47422106857471,
+STORE, 47422106886144, 47422106902527,
+STORE, 47422106861568, 47422106886143,
+ERASE, 47422106861568, 47422106861568,
+STORE, 47422106861568, 47422106886143,
+ERASE, 47422106886144, 47422106886144,
+STORE, 47422106886144, 47422106902527,
+STORE, 47422106886144, 47422106914815,
+ERASE, 47422106861568, 47422106861568,
+STORE, 47422106861568, 47422106877951,
+STORE, 47422106877952, 47422106886143,
+ERASE, 47422105047040, 47422105047040,
+STORE, 47422105047040, 47422105051135,
+STORE, 47422105051136, 47422105055231,
+ERASE, 94878389637120, 94878389637120,
+STORE, 94878389637120, 94878389653503,
+STORE, 94878389653504, 94878389657599,
+ERASE, 140210690215936, 140210690215936,
+STORE, 140210690215936, 140210690220031,
+STORE, 140210690220032, 140210690224127,
+ERASE, 47422104940544, 47422104940544,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727690309632, 140737488351231,
+ERASE, 140727690309632, 140727690309632,
+STORE, 140727690309632, 140727690313727,
+STORE, 94121892208640, 94121892921343,
+ERASE, 94121892208640, 94121892208640,
+STORE, 94121892208640, 94121892257791,
+STORE, 94121892257792, 94121892921343,
+ERASE, 94121892257792, 94121892257792,
+STORE, 94121892257792, 94121892802559,
+STORE, 94121892802560, 94121892900863,
+STORE, 94121892900864, 94121892921343,
+STORE, 140662438326272, 140662438498303,
+ERASE, 140662438326272, 140662438326272,
+STORE, 140662438326272, 140662438330367,
+STORE, 140662438330368, 140662438498303,
+ERASE, 140662438330368, 140662438330368,
+STORE, 140662438330368, 140662438453247,
+STORE, 140662438453248, 140662438486015,
+STORE, 140662438486016, 140662438494207,
+STORE, 140662438494208, 140662438498303,
+STORE, 140727690379264, 140727690383359,
+STORE, 140727690366976, 140727690379263,
+STORE, 46970356670464, 46970356678655,
+STORE, 46970356678656, 46970356686847,
+STORE, 46970356686848, 46970356793343,
+STORE, 46970356703232, 46970356793343,
+STORE, 46970356686848, 46970356703231,
+ERASE, 46970356703232, 46970356703232,
+STORE, 46970356703232, 46970356776959,
+STORE, 46970356776960, 46970356793343,
+STORE, 46970356756480, 46970356776959,
+STORE, 46970356703232, 46970356756479,
+ERASE, 46970356703232, 46970356703232,
+STORE, 46970356703232, 46970356756479,
+STORE, 46970356772864, 46970356776959,
+STORE, 46970356756480, 46970356772863,
+ERASE, 46970356756480, 46970356756480,
+STORE, 46970356756480, 46970356772863,
+STORE, 46970356785152, 46970356793343,
+STORE, 46970356776960, 46970356785151,
+ERASE, 46970356776960, 46970356776960,
+STORE, 46970356776960, 46970356785151,
+ERASE, 46970356785152, 46970356785152,
+STORE, 46970356785152, 46970356793343,
+STORE, 46970356793344, 46970358632447,
+STORE, 46970356932608, 46970358632447,
+STORE, 46970356793344, 46970356932607,
+ERASE, 46970356932608, 46970356932608,
+STORE, 46970356932608, 46970358591487,
+STORE, 46970358591488, 46970358632447,
+STORE, 46970358276096, 46970358591487,
+STORE, 46970356932608, 46970358276095,
+ERASE, 46970356932608, 46970356932608,
+STORE, 46970356932608, 46970358276095,
+STORE, 46970358587392, 46970358591487,
+STORE, 46970358276096, 46970358587391,
+ERASE, 46970358276096, 46970358276096,
+STORE, 46970358276096, 46970358587391,
+STORE, 46970358616064, 46970358632447,
+STORE, 46970358591488, 46970358616063,
+ERASE, 46970358591488, 46970358591488,
+STORE, 46970358591488, 46970358616063,
+ERASE, 46970358616064, 46970358616064,
+STORE, 46970358616064, 46970358632447,
+STORE, 46970358616064, 46970358644735,
+ERASE, 46970358591488, 46970358591488,
+STORE, 46970358591488, 46970358607871,
+STORE, 46970358607872, 46970358616063,
+ERASE, 46970356776960, 46970356776960,
+STORE, 46970356776960, 46970356781055,
+STORE, 46970356781056, 46970356785151,
+ERASE, 94121892900864, 94121892900864,
+STORE, 94121892900864, 94121892917247,
+STORE, 94121892917248, 94121892921343,
+ERASE, 140662438486016, 140662438486016,
+STORE, 140662438486016, 140662438490111,
+STORE, 140662438490112, 140662438494207,
+ERASE, 46970356670464, 46970356670464,
+STORE, 94121898610688, 94121898745855,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737189351424, 140737488351231,
+ERASE, 140737189351424, 140737189351424,
+STORE, 140737189351424, 140737189355519,
+STORE, 93847948832768, 93847949545471,
+ERASE, 93847948832768, 93847948832768,
+STORE, 93847948832768, 93847948881919,
+STORE, 93847948881920, 93847949545471,
+ERASE, 93847948881920, 93847948881920,
+STORE, 93847948881920, 93847949426687,
+STORE, 93847949426688, 93847949524991,
+STORE, 93847949524992, 93847949545471,
+STORE, 139698989985792, 139698990157823,
+ERASE, 139698989985792, 139698989985792,
+STORE, 139698989985792, 139698989989887,
+STORE, 139698989989888, 139698990157823,
+ERASE, 139698989989888, 139698989989888,
+STORE, 139698989989888, 139698990112767,
+STORE, 139698990112768, 139698990145535,
+STORE, 139698990145536, 139698990153727,
+STORE, 139698990153728, 139698990157823,
+STORE, 140737189744640, 140737189748735,
+STORE, 140737189732352, 140737189744639,
+STORE, 47933805010944, 47933805019135,
+STORE, 47933805019136, 47933805027327,
+STORE, 47933805027328, 47933805133823,
+STORE, 47933805043712, 47933805133823,
+STORE, 47933805027328, 47933805043711,
+ERASE, 47933805043712, 47933805043712,
+STORE, 47933805043712, 47933805117439,
+STORE, 47933805117440, 47933805133823,
+STORE, 47933805096960, 47933805117439,
+STORE, 47933805043712, 47933805096959,
+ERASE, 47933805043712, 47933805043712,
+STORE, 47933805043712, 47933805096959,
+STORE, 47933805113344, 47933805117439,
+STORE, 47933805096960, 47933805113343,
+ERASE, 47933805096960, 47933805096960,
+STORE, 47933805096960, 47933805113343,
+STORE, 47933805125632, 47933805133823,
+STORE, 47933805117440, 47933805125631,
+ERASE, 47933805117440, 47933805117440,
+STORE, 47933805117440, 47933805125631,
+ERASE, 47933805125632, 47933805125632,
+STORE, 47933805125632, 47933805133823,
+STORE, 47933805133824, 47933806972927,
+STORE, 47933805273088, 47933806972927,
+STORE, 47933805133824, 47933805273087,
+ERASE, 47933805273088, 47933805273088,
+STORE, 47933805273088, 47933806931967,
+STORE, 47933806931968, 47933806972927,
+STORE, 47933806616576, 47933806931967,
+STORE, 47933805273088, 47933806616575,
+ERASE, 47933805273088, 47933805273088,
+STORE, 47933805273088, 47933806616575,
+STORE, 47933806927872, 47933806931967,
+STORE, 47933806616576, 47933806927871,
+ERASE, 47933806616576, 47933806616576,
+STORE, 47933806616576, 47933806927871,
+STORE, 47933806956544, 47933806972927,
+STORE, 47933806931968, 47933806956543,
+ERASE, 47933806931968, 47933806931968,
+STORE, 47933806931968, 47933806956543,
+ERASE, 47933806956544, 47933806956544,
+STORE, 47933806956544, 47933806972927,
+STORE, 47933806956544, 47933806985215,
+ERASE, 47933806931968, 47933806931968,
+STORE, 47933806931968, 47933806948351,
+STORE, 47933806948352, 47933806956543,
+ERASE, 47933805117440, 47933805117440,
+STORE, 47933805117440, 47933805121535,
+STORE, 47933805121536, 47933805125631,
+ERASE, 93847949524992, 93847949524992,
+STORE, 93847949524992, 93847949541375,
+STORE, 93847949541376, 93847949545471,
+ERASE, 139698990145536, 139698990145536,
+STORE, 139698990145536, 139698990149631,
+STORE, 139698990149632, 139698990153727,
+ERASE, 47933805010944, 47933805010944,
+STORE, 140737488347136, 140737488351231,
+STORE, 140725553991680, 140737488351231,
+ERASE, 140725553991680, 140725553991680,
+STORE, 140725553991680, 140725553995775,
+STORE, 93980056248320, 93980056961023,
+ERASE, 93980056248320, 93980056248320,
+STORE, 93980056248320, 93980056297471,
+STORE, 93980056297472, 93980056961023,
+ERASE, 93980056297472, 93980056297472,
+STORE, 93980056297472, 93980056842239,
+STORE, 93980056842240, 93980056940543,
+STORE, 93980056940544, 93980056961023,
+STORE, 140146588971008, 140146589143039,
+ERASE, 140146588971008, 140146588971008,
+STORE, 140146588971008, 140146588975103,
+STORE, 140146588975104, 140146589143039,
+ERASE, 140146588975104, 140146588975104,
+STORE, 140146588975104, 140146589097983,
+STORE, 140146589097984, 140146589130751,
+STORE, 140146589130752, 140146589138943,
+STORE, 140146589138944, 140146589143039,
+STORE, 140725554860032, 140725554864127,
+STORE, 140725554847744, 140725554860031,
+STORE, 47486206025728, 47486206033919,
+STORE, 47486206033920, 47486206042111,
+STORE, 47486206042112, 47486206148607,
+STORE, 47486206058496, 47486206148607,
+STORE, 47486206042112, 47486206058495,
+ERASE, 47486206058496, 47486206058496,
+STORE, 47486206058496, 47486206132223,
+STORE, 47486206132224, 47486206148607,
+STORE, 47486206111744, 47486206132223,
+STORE, 47486206058496, 47486206111743,
+ERASE, 47486206058496, 47486206058496,
+STORE, 47486206058496, 47486206111743,
+STORE, 47486206128128, 47486206132223,
+STORE, 47486206111744, 47486206128127,
+ERASE, 47486206111744, 47486206111744,
+STORE, 47486206111744, 47486206128127,
+STORE, 47486206140416, 47486206148607,
+STORE, 47486206132224, 47486206140415,
+ERASE, 47486206132224, 47486206132224,
+STORE, 47486206132224, 47486206140415,
+ERASE, 47486206140416, 47486206140416,
+STORE, 47486206140416, 47486206148607,
+STORE, 47486206148608, 47486207987711,
+STORE, 47486206287872, 47486207987711,
+STORE, 47486206148608, 47486206287871,
+ERASE, 47486206287872, 47486206287872,
+STORE, 47486206287872, 47486207946751,
+STORE, 47486207946752, 47486207987711,
+STORE, 47486207631360, 47486207946751,
+STORE, 47486206287872, 47486207631359,
+ERASE, 47486206287872, 47486206287872,
+STORE, 47486206287872, 47486207631359,
+STORE, 47486207942656, 47486207946751,
+STORE, 47486207631360, 47486207942655,
+ERASE, 47486207631360, 47486207631360,
+STORE, 47486207631360, 47486207942655,
+STORE, 47486207971328, 47486207987711,
+STORE, 47486207946752, 47486207971327,
+ERASE, 47486207946752, 47486207946752,
+STORE, 47486207946752, 47486207971327,
+ERASE, 47486207971328, 47486207971328,
+STORE, 47486207971328, 47486207987711,
+STORE, 47486207971328, 47486207999999,
+ERASE, 47486207946752, 47486207946752,
+STORE, 47486207946752, 47486207963135,
+STORE, 47486207963136, 47486207971327,
+ERASE, 47486206132224, 47486206132224,
+STORE, 47486206132224, 47486206136319,
+STORE, 47486206136320, 47486206140415,
+ERASE, 93980056940544, 93980056940544,
+STORE, 93980056940544, 93980056956927,
+STORE, 93980056956928, 93980056961023,
+ERASE, 140146589130752, 140146589130752,
+STORE, 140146589130752, 140146589134847,
+STORE, 140146589134848, 140146589138943,
+ERASE, 47486206025728, 47486206025728,
+STORE, 93980070006784, 93980070141951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727334776832, 140737488351231,
+ERASE, 140727334776832, 140727334776832,
+STORE, 140727334776832, 140727334780927,
+STORE, 94049747247104, 94049747959807,
+ERASE, 94049747247104, 94049747247104,
+STORE, 94049747247104, 94049747296255,
+STORE, 94049747296256, 94049747959807,
+ERASE, 94049747296256, 94049747296256,
+STORE, 94049747296256, 94049747841023,
+STORE, 94049747841024, 94049747939327,
+STORE, 94049747939328, 94049747959807,
+STORE, 140227307216896, 140227307388927,
+ERASE, 140227307216896, 140227307216896,
+STORE, 140227307216896, 140227307220991,
+STORE, 140227307220992, 140227307388927,
+ERASE, 140227307220992, 140227307220992,
+STORE, 140227307220992, 140227307343871,
+STORE, 140227307343872, 140227307376639,
+STORE, 140227307376640, 140227307384831,
+STORE, 140227307384832, 140227307388927,
+STORE, 140727335337984, 140727335342079,
+STORE, 140727335325696, 140727335337983,
+STORE, 47405487779840, 47405487788031,
+STORE, 47405487788032, 47405487796223,
+STORE, 47405487796224, 47405487902719,
+STORE, 47405487812608, 47405487902719,
+STORE, 47405487796224, 47405487812607,
+ERASE, 47405487812608, 47405487812608,
+STORE, 47405487812608, 47405487886335,
+STORE, 47405487886336, 47405487902719,
+STORE, 47405487865856, 47405487886335,
+STORE, 47405487812608, 47405487865855,
+ERASE, 47405487812608, 47405487812608,
+STORE, 47405487812608, 47405487865855,
+STORE, 47405487882240, 47405487886335,
+STORE, 47405487865856, 47405487882239,
+ERASE, 47405487865856, 47405487865856,
+STORE, 47405487865856, 47405487882239,
+STORE, 47405487894528, 47405487902719,
+STORE, 47405487886336, 47405487894527,
+ERASE, 47405487886336, 47405487886336,
+STORE, 47405487886336, 47405487894527,
+ERASE, 47405487894528, 47405487894528,
+STORE, 47405487894528, 47405487902719,
+STORE, 47405487902720, 47405489741823,
+STORE, 47405488041984, 47405489741823,
+STORE, 47405487902720, 47405488041983,
+ERASE, 47405488041984, 47405488041984,
+STORE, 47405488041984, 47405489700863,
+STORE, 47405489700864, 47405489741823,
+STORE, 47405489385472, 47405489700863,
+STORE, 47405488041984, 47405489385471,
+ERASE, 47405488041984, 47405488041984,
+STORE, 47405488041984, 47405489385471,
+STORE, 47405489696768, 47405489700863,
+STORE, 47405489385472, 47405489696767,
+ERASE, 47405489385472, 47405489385472,
+STORE, 47405489385472, 47405489696767,
+STORE, 47405489725440, 47405489741823,
+STORE, 47405489700864, 47405489725439,
+ERASE, 47405489700864, 47405489700864,
+STORE, 47405489700864, 47405489725439,
+ERASE, 47405489725440, 47405489725440,
+STORE, 47405489725440, 47405489741823,
+STORE, 47405489725440, 47405489754111,
+ERASE, 47405489700864, 47405489700864,
+STORE, 47405489700864, 47405489717247,
+STORE, 47405489717248, 47405489725439,
+ERASE, 47405487886336, 47405487886336,
+STORE, 47405487886336, 47405487890431,
+STORE, 47405487890432, 47405487894527,
+ERASE, 94049747939328, 94049747939328,
+STORE, 94049747939328, 94049747955711,
+STORE, 94049747955712, 94049747959807,
+ERASE, 140227307376640, 140227307376640,
+STORE, 140227307376640, 140227307380735,
+STORE, 140227307380736, 140227307384831,
+ERASE, 47405487779840, 47405487779840,
+STORE, 94049758810112, 94049758945279,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727079718912, 140737488351231,
+ERASE, 140727079718912, 140727079718912,
+STORE, 140727079718912, 140727079723007,
+STORE, 94250996527104, 94250997239807,
+ERASE, 94250996527104, 94250996527104,
+STORE, 94250996527104, 94250996576255,
+STORE, 94250996576256, 94250997239807,
+ERASE, 94250996576256, 94250996576256,
+STORE, 94250996576256, 94250997121023,
+STORE, 94250997121024, 94250997219327,
+STORE, 94250997219328, 94250997239807,
+STORE, 140060022587392, 140060022759423,
+ERASE, 140060022587392, 140060022587392,
+STORE, 140060022587392, 140060022591487,
+STORE, 140060022591488, 140060022759423,
+ERASE, 140060022591488, 140060022591488,
+STORE, 140060022591488, 140060022714367,
+STORE, 140060022714368, 140060022747135,
+STORE, 140060022747136, 140060022755327,
+STORE, 140060022755328, 140060022759423,
+STORE, 140727079788544, 140727079792639,
+STORE, 140727079776256, 140727079788543,
+/* this next one caused issues when lowering the efficiency */
+STORE, 47572772409344, 47572772417535,
+STORE, 47572772417536, 47572772425727,
+STORE, 47572772425728, 47572772532223,
+STORE, 47572772442112, 47572772532223,
+STORE, 47572772425728, 47572772442111,
+ERASE, 47572772442112, 47572772442112,
+STORE, 47572772442112, 47572772515839,
+STORE, 47572772515840, 47572772532223,
+STORE, 47572772495360, 47572772515839,
+STORE, 47572772442112, 47572772495359,
+ERASE, 47572772442112, 47572772442112,
+STORE, 47572772442112, 47572772495359,
+STORE, 47572772511744, 47572772515839,
+STORE, 47572772495360, 47572772511743,
+ERASE, 47572772495360, 47572772495360,
+STORE, 47572772495360, 47572772511743,
+STORE, 47572772524032, 47572772532223,
+STORE, 47572772515840, 47572772524031,
+ERASE, 47572772515840, 47572772515840,
+STORE, 47572772515840, 47572772524031,
+ERASE, 47572772524032, 47572772524032,
+STORE, 47572772524032, 47572772532223,
+STORE, 47572772532224, 47572774371327,
+STORE, 47572772671488, 47572774371327,
+STORE, 47572772532224, 47572772671487,
+ERASE, 47572772671488, 47572772671488,
+STORE, 47572772671488, 47572774330367,
+STORE, 47572774330368, 47572774371327,
+STORE, 47572774014976, 47572774330367,
+STORE, 47572772671488, 47572774014975,
+ERASE, 47572772671488, 47572772671488,
+STORE, 47572772671488, 47572774014975,
+STORE, 47572774326272, 47572774330367,
+STORE, 47572774014976, 47572774326271,
+ERASE, 47572774014976, 47572774014976,
+STORE, 47572774014976, 47572774326271,
+STORE, 47572774354944, 47572774371327,
+STORE, 47572774330368, 47572774354943,
+ERASE, 47572774330368, 47572774330368,
+STORE, 47572774330368, 47572774354943,
+ERASE, 47572774354944, 47572774354944,
+STORE, 47572774354944, 47572774371327,
+STORE, 47572774354944, 47572774383615,
+ERASE, 47572774330368, 47572774330368,
+STORE, 47572774330368, 47572774346751,
+STORE, 47572774346752, 47572774354943,
+ERASE, 47572772515840, 47572772515840,
+STORE, 47572772515840, 47572772519935,
+STORE, 47572772519936, 47572772524031,
+ERASE, 94250997219328, 94250997219328,
+STORE, 94250997219328, 94250997235711,
+STORE, 94250997235712, 94250997239807,
+ERASE, 140060022747136, 140060022747136,
+STORE, 140060022747136, 140060022751231,
+STORE, 140060022751232, 140060022755327,
+ERASE, 47572772409344, 47572772409344,
+STORE, 94251018305536, 94251018440703,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730012389376, 140737488351231,
+ERASE, 140730012389376, 140730012389376,
+STORE, 140730012389376, 140730012393471,
+STORE, 94382607675392, 94382607695871,
+ERASE, 94382607675392, 94382607675392,
+STORE, 94382607675392, 94382607679487,
+STORE, 94382607679488, 94382607695871,
+ERASE, 94382607679488, 94382607679488,
+STORE, 94382607679488, 94382607683583,
+STORE, 94382607683584, 94382607687679,
+STORE, 94382607687680, 94382607695871,
+STORE, 140252451454976, 140252451627007,
+ERASE, 140252451454976, 140252451454976,
+STORE, 140252451454976, 140252451459071,
+STORE, 140252451459072, 140252451627007,
+ERASE, 140252451459072, 140252451459072,
+STORE, 140252451459072, 140252451581951,
+STORE, 140252451581952, 140252451614719,
+STORE, 140252451614720, 140252451622911,
+STORE, 140252451622912, 140252451627007,
+STORE, 140730013548544, 140730013552639,
+STORE, 140730013536256, 140730013548543,
+STORE, 47380343541760, 47380343549951,
+STORE, 47380343549952, 47380343558143,
+STORE, 47380343558144, 47380345397247,
+STORE, 47380343697408, 47380345397247,
+STORE, 47380343558144, 47380343697407,
+ERASE, 47380343697408, 47380343697408,
+STORE, 47380343697408, 47380345356287,
+STORE, 47380345356288, 47380345397247,
+STORE, 47380345040896, 47380345356287,
+STORE, 47380343697408, 47380345040895,
+ERASE, 47380343697408, 47380343697408,
+STORE, 47380343697408, 47380345040895,
+STORE, 47380345352192, 47380345356287,
+STORE, 47380345040896, 47380345352191,
+ERASE, 47380345040896, 47380345040896,
+STORE, 47380345040896, 47380345352191,
+STORE, 47380345380864, 47380345397247,
+STORE, 47380345356288, 47380345380863,
+ERASE, 47380345356288, 47380345356288,
+STORE, 47380345356288, 47380345380863,
+ERASE, 47380345380864, 47380345380864,
+STORE, 47380345380864, 47380345397247,
+ERASE, 47380345356288, 47380345356288,
+STORE, 47380345356288, 47380345372671,
+STORE, 47380345372672, 47380345380863,
+ERASE, 94382607687680, 94382607687680,
+STORE, 94382607687680, 94382607691775,
+STORE, 94382607691776, 94382607695871,
+ERASE, 140252451614720, 140252451614720,
+STORE, 140252451614720, 140252451618815,
+STORE, 140252451618816, 140252451622911,
+ERASE, 47380343541760, 47380343541760,
+STORE, 94382626803712, 94382626938879,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730900271104, 140737488351231,
+ERASE, 140730900271104, 140730900271104,
+STORE, 140730900271104, 140730900275199,
+STORE, 93855478120448, 93855478337535,
+ERASE, 93855478120448, 93855478120448,
+STORE, 93855478120448, 93855478198271,
+STORE, 93855478198272, 93855478337535,
+ERASE, 93855478198272, 93855478198272,
+STORE, 93855478198272, 93855478243327,
+STORE, 93855478243328, 93855478288383,
+STORE, 93855478288384, 93855478337535,
+STORE, 140092686573568, 140092686745599,
+ERASE, 140092686573568, 140092686573568,
+STORE, 140092686573568, 140092686577663,
+STORE, 140092686577664, 140092686745599,
+ERASE, 140092686577664, 140092686577664,
+STORE, 140092686577664, 140092686700543,
+STORE, 140092686700544, 140092686733311,
+STORE, 140092686733312, 140092686741503,
+STORE, 140092686741504, 140092686745599,
+STORE, 140730900537344, 140730900541439,
+STORE, 140730900525056, 140730900537343,
+STORE, 47540108423168, 47540108431359,
+STORE, 47540108431360, 47540108439551,
+STORE, 47540108439552, 47540110278655,
+STORE, 47540108578816, 47540110278655,
+STORE, 47540108439552, 47540108578815,
+ERASE, 47540108578816, 47540108578816,
+STORE, 47540108578816, 47540110237695,
+STORE, 47540110237696, 47540110278655,
+STORE, 47540109922304, 47540110237695,
+STORE, 47540108578816, 47540109922303,
+ERASE, 47540108578816, 47540108578816,
+STORE, 47540108578816, 47540109922303,
+STORE, 47540110233600, 47540110237695,
+STORE, 47540109922304, 47540110233599,
+ERASE, 47540109922304, 47540109922304,
+STORE, 47540109922304, 47540110233599,
+STORE, 47540110262272, 47540110278655,
+STORE, 47540110237696, 47540110262271,
+ERASE, 47540110237696, 47540110237696,
+STORE, 47540110237696, 47540110262271,
+ERASE, 47540110262272, 47540110262272,
+STORE, 47540110262272, 47540110278655,
+ERASE, 47540110237696, 47540110237696,
+STORE, 47540110237696, 47540110254079,
+STORE, 47540110254080, 47540110262271,
+ERASE, 93855478288384, 93855478288384,
+STORE, 93855478288384, 93855478333439,
+STORE, 93855478333440, 93855478337535,
+ERASE, 140092686733312, 140092686733312,
+STORE, 140092686733312, 140092686737407,
+STORE, 140092686737408, 140092686741503,
+ERASE, 47540108423168, 47540108423168,
+STORE, 93855492222976, 93855492358143,
+STORE, 93855492222976, 93855492493311,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733498146816, 140737488351231,
+ERASE, 140733498146816, 140733498146816,
+STORE, 140733498146816, 140733498150911,
+STORE, 94170739654656, 94170740367359,
+ERASE, 94170739654656, 94170739654656,
+STORE, 94170739654656, 94170739703807,
+STORE, 94170739703808, 94170740367359,
+ERASE, 94170739703808, 94170739703808,
+STORE, 94170739703808, 94170740248575,
+STORE, 94170740248576, 94170740346879,
+STORE, 94170740346880, 94170740367359,
+STORE, 140024788877312, 140024789049343,
+ERASE, 140024788877312, 140024788877312,
+STORE, 140024788877312, 140024788881407,
+STORE, 140024788881408, 140024789049343,
+ERASE, 140024788881408, 140024788881408,
+STORE, 140024788881408, 140024789004287,
+STORE, 140024789004288, 140024789037055,
+STORE, 140024789037056, 140024789045247,
+STORE, 140024789045248, 140024789049343,
+STORE, 140733499023360, 140733499027455,
+STORE, 140733499011072, 140733499023359,
+STORE, 47608006119424, 47608006127615,
+STORE, 47608006127616, 47608006135807,
+STORE, 47608006135808, 47608006242303,
+STORE, 47608006152192, 47608006242303,
+STORE, 47608006135808, 47608006152191,
+ERASE, 47608006152192, 47608006152192,
+STORE, 47608006152192, 47608006225919,
+STORE, 47608006225920, 47608006242303,
+STORE, 47608006205440, 47608006225919,
+STORE, 47608006152192, 47608006205439,
+ERASE, 47608006152192, 47608006152192,
+STORE, 47608006152192, 47608006205439,
+STORE, 47608006221824, 47608006225919,
+STORE, 47608006205440, 47608006221823,
+ERASE, 47608006205440, 47608006205440,
+STORE, 47608006205440, 47608006221823,
+STORE, 47608006234112, 47608006242303,
+STORE, 47608006225920, 47608006234111,
+ERASE, 47608006225920, 47608006225920,
+STORE, 47608006225920, 47608006234111,
+ERASE, 47608006234112, 47608006234112,
+STORE, 47608006234112, 47608006242303,
+STORE, 47608006242304, 47608008081407,
+STORE, 47608006381568, 47608008081407,
+STORE, 47608006242304, 47608006381567,
+ERASE, 47608006381568, 47608006381568,
+STORE, 47608006381568, 47608008040447,
+STORE, 47608008040448, 47608008081407,
+STORE, 47608007725056, 47608008040447,
+STORE, 47608006381568, 47608007725055,
+ERASE, 47608006381568, 47608006381568,
+STORE, 47608006381568, 47608007725055,
+STORE, 47608008036352, 47608008040447,
+STORE, 47608007725056, 47608008036351,
+ERASE, 47608007725056, 47608007725056,
+STORE, 47608007725056, 47608008036351,
+STORE, 47608008065024, 47608008081407,
+STORE, 47608008040448, 47608008065023,
+ERASE, 47608008040448, 47608008040448,
+STORE, 47608008040448, 47608008065023,
+ERASE, 47608008065024, 47608008065024,
+STORE, 47608008065024, 47608008081407,
+STORE, 47608008065024, 47608008093695,
+ERASE, 47608008040448, 47608008040448,
+STORE, 47608008040448, 47608008056831,
+STORE, 47608008056832, 47608008065023,
+ERASE, 47608006225920, 47608006225920,
+STORE, 47608006225920, 47608006230015,
+STORE, 47608006230016, 47608006234111,
+ERASE, 94170740346880, 94170740346880,
+STORE, 94170740346880, 94170740363263,
+STORE, 94170740363264, 94170740367359,
+ERASE, 140024789037056, 140024789037056,
+STORE, 140024789037056, 140024789041151,
+STORE, 140024789041152, 140024789045247,
+ERASE, 47608006119424, 47608006119424,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730264326144, 140737488351231,
+ERASE, 140730264326144, 140730264326144,
+STORE, 140730264326144, 140730264330239,
+STORE, 94653216407552, 94653217120255,
+ERASE, 94653216407552, 94653216407552,
+STORE, 94653216407552, 94653216456703,
+STORE, 94653216456704, 94653217120255,
+ERASE, 94653216456704, 94653216456704,
+STORE, 94653216456704, 94653217001471,
+STORE, 94653217001472, 94653217099775,
+STORE, 94653217099776, 94653217120255,
+STORE, 140103617011712, 140103617183743,
+ERASE, 140103617011712, 140103617011712,
+STORE, 140103617011712, 140103617015807,
+STORE, 140103617015808, 140103617183743,
+ERASE, 140103617015808, 140103617015808,
+STORE, 140103617015808, 140103617138687,
+STORE, 140103617138688, 140103617171455,
+STORE, 140103617171456, 140103617179647,
+STORE, 140103617179648, 140103617183743,
+STORE, 140730265427968, 140730265432063,
+STORE, 140730265415680, 140730265427967,
+STORE, 47529177985024, 47529177993215,
+STORE, 47529177993216, 47529178001407,
+STORE, 47529178001408, 47529178107903,
+STORE, 47529178017792, 47529178107903,
+STORE, 47529178001408, 47529178017791,
+ERASE, 47529178017792, 47529178017792,
+STORE, 47529178017792, 47529178091519,
+STORE, 47529178091520, 47529178107903,
+STORE, 47529178071040, 47529178091519,
+STORE, 47529178017792, 47529178071039,
+ERASE, 47529178017792, 47529178017792,
+STORE, 47529178017792, 47529178071039,
+STORE, 47529178087424, 47529178091519,
+STORE, 47529178071040, 47529178087423,
+ERASE, 47529178071040, 47529178071040,
+STORE, 47529178071040, 47529178087423,
+STORE, 47529178099712, 47529178107903,
+STORE, 47529178091520, 47529178099711,
+ERASE, 47529178091520, 47529178091520,
+STORE, 47529178091520, 47529178099711,
+ERASE, 47529178099712, 47529178099712,
+STORE, 47529178099712, 47529178107903,
+STORE, 47529178107904, 47529179947007,
+STORE, 47529178247168, 47529179947007,
+STORE, 47529178107904, 47529178247167,
+ERASE, 47529178247168, 47529178247168,
+STORE, 47529178247168, 47529179906047,
+STORE, 47529179906048, 47529179947007,
+STORE, 47529179590656, 47529179906047,
+STORE, 47529178247168, 47529179590655,
+ERASE, 47529178247168, 47529178247168,
+STORE, 47529178247168, 47529179590655,
+STORE, 47529179901952, 47529179906047,
+STORE, 47529179590656, 47529179901951,
+ERASE, 47529179590656, 47529179590656,
+STORE, 47529179590656, 47529179901951,
+STORE, 47529179930624, 47529179947007,
+STORE, 47529179906048, 47529179930623,
+ERASE, 47529179906048, 47529179906048,
+STORE, 47529179906048, 47529179930623,
+ERASE, 47529179930624, 47529179930624,
+STORE, 47529179930624, 47529179947007,
+STORE, 47529179930624, 47529179959295,
+ERASE, 47529179906048, 47529179906048,
+STORE, 47529179906048, 47529179922431,
+STORE, 47529179922432, 47529179930623,
+ERASE, 47529178091520, 47529178091520,
+STORE, 47529178091520, 47529178095615,
+STORE, 47529178095616, 47529178099711,
+ERASE, 94653217099776, 94653217099776,
+STORE, 94653217099776, 94653217116159,
+STORE, 94653217116160, 94653217120255,
+ERASE, 140103617171456, 140103617171456,
+STORE, 140103617171456, 140103617175551,
+STORE, 140103617175552, 140103617179647,
+ERASE, 47529177985024, 47529177985024,
+STORE, 94653241135104, 94653241270271,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736284549120, 140737488351231,
+ERASE, 140736284549120, 140736284549120,
+STORE, 140736284549120, 140736284553215,
+STORE, 93963663822848, 93963664506879,
+ERASE, 93963663822848, 93963663822848,
+STORE, 93963663822848, 93963663884287,
+STORE, 93963663884288, 93963664506879,
+ERASE, 93963663884288, 93963663884288,
+STORE, 93963663884288, 93963664240639,
+STORE, 93963664240640, 93963664379903,
+STORE, 93963664379904, 93963664506879,
+STORE, 140450188439552, 140450188611583,
+ERASE, 140450188439552, 140450188439552,
+STORE, 140450188439552, 140450188443647,
+STORE, 140450188443648, 140450188611583,
+ERASE, 140450188443648, 140450188443648,
+STORE, 140450188443648, 140450188566527,
+STORE, 140450188566528, 140450188599295,
+STORE, 140450188599296, 140450188607487,
+STORE, 140450188607488, 140450188611583,
+STORE, 140736284577792, 140736284581887,
+STORE, 140736284565504, 140736284577791,
+STORE, 47182606557184, 47182606565375,
+STORE, 47182606565376, 47182606573567,
+STORE, 47182606573568, 47182608412671,
+STORE, 47182606712832, 47182608412671,
+STORE, 47182606573568, 47182606712831,
+ERASE, 47182606712832, 47182606712832,
+STORE, 47182606712832, 47182608371711,
+STORE, 47182608371712, 47182608412671,
+STORE, 47182608056320, 47182608371711,
+STORE, 47182606712832, 47182608056319,
+ERASE, 47182606712832, 47182606712832,
+STORE, 47182606712832, 47182608056319,
+STORE, 47182608367616, 47182608371711,
+STORE, 47182608056320, 47182608367615,
+ERASE, 47182608056320, 47182608056320,
+STORE, 47182608056320, 47182608367615,
+STORE, 47182608396288, 47182608412671,
+STORE, 47182608371712, 47182608396287,
+ERASE, 47182608371712, 47182608371712,
+STORE, 47182608371712, 47182608396287,
+ERASE, 47182608396288, 47182608396288,
+STORE, 47182608396288, 47182608412671,
+STORE, 47182608412672, 47182608523263,
+STORE, 47182608429056, 47182608523263,
+STORE, 47182608412672, 47182608429055,
+ERASE, 47182608429056, 47182608429056,
+STORE, 47182608429056, 47182608515071,
+STORE, 47182608515072, 47182608523263,
+STORE, 47182608490496, 47182608515071,
+STORE, 47182608429056, 47182608490495,
+ERASE, 47182608429056, 47182608429056,
+STORE, 47182608429056, 47182608490495,
+STORE, 47182608510976, 47182608515071,
+STORE, 47182608490496, 47182608510975,
+ERASE, 47182608490496, 47182608490496,
+STORE, 47182608490496, 47182608510975,
+ERASE, 47182608515072, 47182608515072,
+STORE, 47182608515072, 47182608523263,
+STORE, 47182608523264, 47182608568319,
+ERASE, 47182608523264, 47182608523264,
+STORE, 47182608523264, 47182608531455,
+STORE, 47182608531456, 47182608568319,
+STORE, 47182608551936, 47182608568319,
+STORE, 47182608531456, 47182608551935,
+ERASE, 47182608531456, 47182608531456,
+STORE, 47182608531456, 47182608551935,
+STORE, 47182608560128, 47182608568319,
+STORE, 47182608551936, 47182608560127,
+ERASE, 47182608551936, 47182608551936,
+STORE, 47182608551936, 47182608568319,
+ERASE, 47182608551936, 47182608551936,
+STORE, 47182608551936, 47182608560127,
+STORE, 47182608560128, 47182608568319,
+ERASE, 47182608560128, 47182608560128,
+STORE, 47182608560128, 47182608568319,
+STORE, 47182608568320, 47182608916479,
+STORE, 47182608609280, 47182608916479,
+STORE, 47182608568320, 47182608609279,
+ERASE, 47182608609280, 47182608609280,
+STORE, 47182608609280, 47182608891903,
+STORE, 47182608891904, 47182608916479,
+STORE, 47182608822272, 47182608891903,
+STORE, 47182608609280, 47182608822271,
+ERASE, 47182608609280, 47182608609280,
+STORE, 47182608609280, 47182608822271,
+STORE, 47182608887808, 47182608891903,
+STORE, 47182608822272, 47182608887807,
+ERASE, 47182608822272, 47182608822272,
+STORE, 47182608822272, 47182608887807,
+ERASE, 47182608891904, 47182608891904,
+STORE, 47182608891904, 47182608916479,
+STORE, 47182608916480, 47182611177471,
+STORE, 47182609068032, 47182611177471,
+STORE, 47182608916480, 47182609068031,
+ERASE, 47182609068032, 47182609068032,
+STORE, 47182609068032, 47182611161087,
+STORE, 47182611161088, 47182611177471,
+STORE, 47182611169280, 47182611177471,
+STORE, 47182611161088, 47182611169279,
+ERASE, 47182611161088, 47182611161088,
+STORE, 47182611161088, 47182611169279,
+ERASE, 47182611169280, 47182611169280,
+STORE, 47182611169280, 47182611177471,
+STORE, 47182611177472, 47182611312639,
+ERASE, 47182611177472, 47182611177472,
+STORE, 47182611177472, 47182611202047,
+STORE, 47182611202048, 47182611312639,
+STORE, 47182611263488, 47182611312639,
+STORE, 47182611202048, 47182611263487,
+ERASE, 47182611202048, 47182611202048,
+STORE, 47182611202048, 47182611263487,
+STORE, 47182611288064, 47182611312639,
+STORE, 47182611263488, 47182611288063,
+ERASE, 47182611263488, 47182611263488,
+STORE, 47182611263488, 47182611312639,
+ERASE, 47182611263488, 47182611263488,
+STORE, 47182611263488, 47182611288063,
+STORE, 47182611288064, 47182611312639,
+STORE, 47182611296256, 47182611312639,
+STORE, 47182611288064, 47182611296255,
+ERASE, 47182611288064, 47182611288064,
+STORE, 47182611288064, 47182611296255,
+ERASE, 47182611296256, 47182611296256,
+STORE, 47182611296256, 47182611312639,
+STORE, 47182611296256, 47182611320831,
+STORE, 47182611320832, 47182611484671,
+ERASE, 47182611320832, 47182611320832,
+STORE, 47182611320832, 47182611333119,
+STORE, 47182611333120, 47182611484671,
+STORE, 47182611431424, 47182611484671,
+STORE, 47182611333120, 47182611431423,
+ERASE, 47182611333120, 47182611333120,
+STORE, 47182611333120, 47182611431423,
+STORE, 47182611476480, 47182611484671,
+STORE, 47182611431424, 47182611476479,
+ERASE, 47182611431424, 47182611431424,
+STORE, 47182611431424, 47182611484671,
+ERASE, 47182611431424, 47182611431424,
+STORE, 47182611431424, 47182611476479,
+STORE, 47182611476480, 47182611484671,
+ERASE, 47182611476480, 47182611476480,
+STORE, 47182611476480, 47182611484671,
+STORE, 47182611484672, 47182612082687,
+STORE, 47182611603456, 47182612082687,
+STORE, 47182611484672, 47182611603455,
+ERASE, 47182611603456, 47182611603456,
+STORE, 47182611603456, 47182612029439,
+STORE, 47182612029440, 47182612082687,
+STORE, 47182611918848, 47182612029439,
+STORE, 47182611603456, 47182611918847,
+ERASE, 47182611603456, 47182611603456,
+STORE, 47182611603456, 47182611918847,
+STORE, 47182612025344, 47182612029439,
+STORE, 47182611918848, 47182612025343,
+ERASE, 47182611918848, 47182611918848,
+STORE, 47182611918848, 47182612025343,
+ERASE, 47182612029440, 47182612029440,
+STORE, 47182612029440, 47182612082687,
+STORE, 47182612082688, 47182615134207,
+STORE, 47182612627456, 47182615134207,
+STORE, 47182612082688, 47182612627455,
+ERASE, 47182612627456, 47182612627456,
+STORE, 47182612627456, 47182614913023,
+STORE, 47182614913024, 47182615134207,
+STORE, 47182614323200, 47182614913023,
+STORE, 47182612627456, 47182614323199,
+ERASE, 47182612627456, 47182612627456,
+STORE, 47182612627456, 47182614323199,
+STORE, 47182614908928, 47182614913023,
+STORE, 47182614323200, 47182614908927,
+ERASE, 47182614323200, 47182614323200,
+STORE, 47182614323200, 47182614908927,
+STORE, 47182615117824, 47182615134207,
+STORE, 47182614913024, 47182615117823,
+ERASE, 47182614913024, 47182614913024,
+STORE, 47182614913024, 47182615117823,
+ERASE, 47182615117824, 47182615117824,
+STORE, 47182615117824, 47182615134207,
+STORE, 47182615134208, 47182615166975,
+ERASE, 47182615134208, 47182615134208,
+STORE, 47182615134208, 47182615142399,
+STORE, 47182615142400, 47182615166975,
+STORE, 47182615154688, 47182615166975,
+STORE, 47182615142400, 47182615154687,
+ERASE, 47182615142400, 47182615142400,
+STORE, 47182615142400, 47182615154687,
+STORE, 47182615158784, 47182615166975,
+STORE, 47182615154688, 47182615158783,
+ERASE, 47182615154688, 47182615154688,
+STORE, 47182615154688, 47182615166975,
+ERASE, 47182615154688, 47182615154688,
+STORE, 47182615154688, 47182615158783,
+STORE, 47182615158784, 47182615166975,
+ERASE, 47182615158784, 47182615158784,
+STORE, 47182615158784, 47182615166975,
+STORE, 47182615166976, 47182615203839,
+ERASE, 47182615166976, 47182615166976,
+STORE, 47182615166976, 47182615175167,
+STORE, 47182615175168, 47182615203839,
+STORE, 47182615191552, 47182615203839,
+STORE, 47182615175168, 47182615191551,
+ERASE, 47182615175168, 47182615175168,
+STORE, 47182615175168, 47182615191551,
+STORE, 47182615195648, 47182615203839,
+STORE, 47182615191552, 47182615195647,
+ERASE, 47182615191552, 47182615191552,
+STORE, 47182615191552, 47182615203839,
+ERASE, 47182615191552, 47182615191552,
+STORE, 47182615191552, 47182615195647,
+STORE, 47182615195648, 47182615203839,
+ERASE, 47182615195648, 47182615195648,
+STORE, 47182615195648, 47182615203839,
+STORE, 47182615203840, 47182615678975,
+ERASE, 47182615203840, 47182615203840,
+STORE, 47182615203840, 47182615212031,
+STORE, 47182615212032, 47182615678975,
+STORE, 47182615547904, 47182615678975,
+STORE, 47182615212032, 47182615547903,
+ERASE, 47182615212032, 47182615212032,
+STORE, 47182615212032, 47182615547903,
+STORE, 47182615670784, 47182615678975,
+STORE, 47182615547904, 47182615670783,
+ERASE, 47182615547904, 47182615547904,
+STORE, 47182615547904, 47182615678975,
+ERASE, 47182615547904, 47182615547904,
+STORE, 47182615547904, 47182615670783,
+STORE, 47182615670784, 47182615678975,
+ERASE, 47182615670784, 47182615670784,
+STORE, 47182615670784, 47182615678975,
+STORE, 47182615678976, 47182615687167,
+STORE, 47182615687168, 47182615707647,
+ERASE, 47182615687168, 47182615687168,
+STORE, 47182615687168, 47182615691263,
+STORE, 47182615691264, 47182615707647,
+STORE, 47182615695360, 47182615707647,
+STORE, 47182615691264, 47182615695359,
+ERASE, 47182615691264, 47182615691264,
+STORE, 47182615691264, 47182615695359,
+STORE, 47182615699456, 47182615707647,
+STORE, 47182615695360, 47182615699455,
+ERASE, 47182615695360, 47182615695360,
+STORE, 47182615695360, 47182615707647,
+ERASE, 47182615695360, 47182615695360,
+STORE, 47182615695360, 47182615699455,
+STORE, 47182615699456, 47182615707647,
+ERASE, 47182615699456, 47182615699456,
+STORE, 47182615699456, 47182615707647,
+STORE, 47182615707648, 47182615715839,
+ERASE, 47182608371712, 47182608371712,
+STORE, 47182608371712, 47182608388095,
+STORE, 47182608388096, 47182608396287,
+ERASE, 47182615699456, 47182615699456,
+STORE, 47182615699456, 47182615703551,
+STORE, 47182615703552, 47182615707647,
+ERASE, 47182611288064, 47182611288064,
+STORE, 47182611288064, 47182611292159,
+STORE, 47182611292160, 47182611296255,
+ERASE, 47182615670784, 47182615670784,
+STORE, 47182615670784, 47182615674879,
+STORE, 47182615674880, 47182615678975,
+ERASE, 47182615195648, 47182615195648,
+STORE, 47182615195648, 47182615199743,
+STORE, 47182615199744, 47182615203839,
+ERASE, 47182615158784, 47182615158784,
+STORE, 47182615158784, 47182615162879,
+STORE, 47182615162880, 47182615166975,
+ERASE, 47182614913024, 47182614913024,
+STORE, 47182614913024, 47182615109631,
+STORE, 47182615109632, 47182615117823,
+ERASE, 47182612029440, 47182612029440,
+STORE, 47182612029440, 47182612066303,
+STORE, 47182612066304, 47182612082687,
+ERASE, 47182611476480, 47182611476480,
+STORE, 47182611476480, 47182611480575,
+STORE, 47182611480576, 47182611484671,
+ERASE, 47182611161088, 47182611161088,
+STORE, 47182611161088, 47182611165183,
+STORE, 47182611165184, 47182611169279,
+ERASE, 47182608891904, 47182608891904,
+STORE, 47182608891904, 47182608912383,
+STORE, 47182608912384, 47182608916479,
+ERASE, 47182608560128, 47182608560128,
+STORE, 47182608560128, 47182608564223,
+STORE, 47182608564224, 47182608568319,
+ERASE, 47182608515072, 47182608515072,
+STORE, 47182608515072, 47182608519167,
+STORE, 47182608519168, 47182608523263,
+ERASE, 93963664379904, 93963664379904,
+STORE, 93963664379904, 93963664502783,
+STORE, 93963664502784, 93963664506879,
+ERASE, 140450188599296, 140450188599296,
+STORE, 140450188599296, 140450188603391,
+STORE, 140450188603392, 140450188607487,
+ERASE, 47182606557184, 47182606557184,
+STORE, 93963694723072, 93963694858239,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730313261056, 140737488351231,
+ERASE, 140730313261056, 140730313261056,
+STORE, 140730313261056, 140730313265151,
+STORE, 94386579017728, 94386579697663,
+ERASE, 94386579017728, 94386579017728,
+STORE, 94386579017728, 94386579083263,
+STORE, 94386579083264, 94386579697663,
+ERASE, 94386579083264, 94386579083264,
+STORE, 94386579083264, 94386579431423,
+STORE, 94386579431424, 94386579570687,
+STORE, 94386579570688, 94386579697663,
+STORE, 140124810838016, 140124811010047,
+ERASE, 140124810838016, 140124810838016,
+STORE, 140124810838016, 140124810842111,
+STORE, 140124810842112, 140124811010047,
+ERASE, 140124810842112, 140124810842112,
+STORE, 140124810842112, 140124810964991,
+STORE, 140124810964992, 140124810997759,
+STORE, 140124810997760, 140124811005951,
+STORE, 140124811005952, 140124811010047,
+STORE, 140730313601024, 140730313605119,
+STORE, 140730313588736, 140730313601023,
+STORE, 47507984158720, 47507984166911,
+STORE, 47507984166912, 47507984175103,
+STORE, 47507984175104, 47507986014207,
+STORE, 47507984314368, 47507986014207,
+STORE, 47507984175104, 47507984314367,
+ERASE, 47507984314368, 47507984314368,
+STORE, 47507984314368, 47507985973247,
+STORE, 47507985973248, 47507986014207,
+STORE, 47507985657856, 47507985973247,
+STORE, 47507984314368, 47507985657855,
+ERASE, 47507984314368, 47507984314368,
+STORE, 47507984314368, 47507985657855,
+STORE, 47507985969152, 47507985973247,
+STORE, 47507985657856, 47507985969151,
+ERASE, 47507985657856, 47507985657856,
+STORE, 47507985657856, 47507985969151,
+STORE, 47507985997824, 47507986014207,
+STORE, 47507985973248, 47507985997823,
+ERASE, 47507985973248, 47507985973248,
+STORE, 47507985973248, 47507985997823,
+ERASE, 47507985997824, 47507985997824,
+STORE, 47507985997824, 47507986014207,
+STORE, 47507986014208, 47507986124799,
+STORE, 47507986030592, 47507986124799,
+STORE, 47507986014208, 47507986030591,
+ERASE, 47507986030592, 47507986030592,
+STORE, 47507986030592, 47507986116607,
+STORE, 47507986116608, 47507986124799,
+STORE, 47507986092032, 47507986116607,
+STORE, 47507986030592, 47507986092031,
+ERASE, 47507986030592, 47507986030592,
+STORE, 47507986030592, 47507986092031,
+STORE, 47507986112512, 47507986116607,
+STORE, 47507986092032, 47507986112511,
+ERASE, 47507986092032, 47507986092032,
+STORE, 47507986092032, 47507986112511,
+ERASE, 47507986116608, 47507986116608,
+STORE, 47507986116608, 47507986124799,
+STORE, 47507986124800, 47507986169855,
+ERASE, 47507986124800, 47507986124800,
+STORE, 47507986124800, 47507986132991,
+STORE, 47507986132992, 47507986169855,
+STORE, 47507986153472, 47507986169855,
+STORE, 47507986132992, 47507986153471,
+ERASE, 47507986132992, 47507986132992,
+STORE, 47507986132992, 47507986153471,
+STORE, 47507986161664, 47507986169855,
+STORE, 47507986153472, 47507986161663,
+ERASE, 47507986153472, 47507986153472,
+STORE, 47507986153472, 47507986169855,
+ERASE, 47507986153472, 47507986153472,
+STORE, 47507986153472, 47507986161663,
+STORE, 47507986161664, 47507986169855,
+ERASE, 47507986161664, 47507986161664,
+STORE, 47507986161664, 47507986169855,
+STORE, 47507986169856, 47507986518015,
+STORE, 47507986210816, 47507986518015,
+STORE, 47507986169856, 47507986210815,
+ERASE, 47507986210816, 47507986210816,
+STORE, 47507986210816, 47507986493439,
+STORE, 47507986493440, 47507986518015,
+STORE, 47507986423808, 47507986493439,
+STORE, 47507986210816, 47507986423807,
+ERASE, 47507986210816, 47507986210816,
+STORE, 47507986210816, 47507986423807,
+STORE, 47507986489344, 47507986493439,
+STORE, 47507986423808, 47507986489343,
+ERASE, 47507986423808, 47507986423808,
+STORE, 47507986423808, 47507986489343,
+ERASE, 47507986493440, 47507986493440,
+STORE, 47507986493440, 47507986518015,
+STORE, 47507986518016, 47507988779007,
+STORE, 47507986669568, 47507988779007,
+STORE, 47507986518016, 47507986669567,
+ERASE, 47507986669568, 47507986669568,
+STORE, 47507986669568, 47507988762623,
+STORE, 47507988762624, 47507988779007,
+STORE, 47507988770816, 47507988779007,
+STORE, 47507988762624, 47507988770815,
+ERASE, 47507988762624, 47507988762624,
+STORE, 47507988762624, 47507988770815,
+ERASE, 47507988770816, 47507988770816,
+STORE, 47507988770816, 47507988779007,
+STORE, 47507988779008, 47507988914175,
+ERASE, 47507988779008, 47507988779008,
+STORE, 47507988779008, 47507988803583,
+STORE, 47507988803584, 47507988914175,
+STORE, 47507988865024, 47507988914175,
+STORE, 47507988803584, 47507988865023,
+ERASE, 47507988803584, 47507988803584,
+STORE, 47507988803584, 47507988865023,
+STORE, 47507988889600, 47507988914175,
+STORE, 47507988865024, 47507988889599,
+ERASE, 47507988865024, 47507988865024,
+STORE, 47507988865024, 47507988914175,
+ERASE, 47507988865024, 47507988865024,
+STORE, 47507988865024, 47507988889599,
+STORE, 47507988889600, 47507988914175,
+STORE, 47507988897792, 47507988914175,
+STORE, 47507988889600, 47507988897791,
+ERASE, 47507988889600, 47507988889600,
+STORE, 47507988889600, 47507988897791,
+ERASE, 47507988897792, 47507988897792,
+STORE, 47507988897792, 47507988914175,
+STORE, 47507988897792, 47507988922367,
+STORE, 47507988922368, 47507989086207,
+ERASE, 47507988922368, 47507988922368,
+STORE, 47507988922368, 47507988934655,
+STORE, 47507988934656, 47507989086207,
+STORE, 47507989032960, 47507989086207,
+STORE, 47507988934656, 47507989032959,
+ERASE, 47507988934656, 47507988934656,
+STORE, 47507988934656, 47507989032959,
+STORE, 47507989078016, 47507989086207,
+STORE, 47507989032960, 47507989078015,
+ERASE, 47507989032960, 47507989032960,
+STORE, 47507989032960, 47507989086207,
+ERASE, 47507989032960, 47507989032960,
+STORE, 47507989032960, 47507989078015,
+STORE, 47507989078016, 47507989086207,
+ERASE, 47507989078016, 47507989078016,
+STORE, 47507989078016, 47507989086207,
+STORE, 47507989086208, 47507989684223,
+STORE, 47507989204992, 47507989684223,
+STORE, 47507989086208, 47507989204991,
+ERASE, 47507989204992, 47507989204992,
+STORE, 47507989204992, 47507989630975,
+STORE, 47507989630976, 47507989684223,
+STORE, 47507989520384, 47507989630975,
+STORE, 47507989204992, 47507989520383,
+ERASE, 47507989204992, 47507989204992,
+STORE, 47507989204992, 47507989520383,
+STORE, 47507989626880, 47507989630975,
+STORE, 47507989520384, 47507989626879,
+ERASE, 47507989520384, 47507989520384,
+STORE, 47507989520384, 47507989626879,
+ERASE, 47507989630976, 47507989630976,
+STORE, 47507989630976, 47507989684223,
+STORE, 47507989684224, 47507992735743,
+STORE, 47507990228992, 47507992735743,
+STORE, 47507989684224, 47507990228991,
+ERASE, 47507990228992, 47507990228992,
+STORE, 47507990228992, 47507992514559,
+STORE, 47507992514560, 47507992735743,
+STORE, 47507991924736, 47507992514559,
+STORE, 47507990228992, 47507991924735,
+ERASE, 47507990228992, 47507990228992,
+STORE, 47507990228992, 47507991924735,
+STORE, 47507992510464, 47507992514559,
+STORE, 47507991924736, 47507992510463,
+ERASE, 47507991924736, 47507991924736,
+STORE, 47507991924736, 47507992510463,
+STORE, 47507992719360, 47507992735743,
+STORE, 47507992514560, 47507992719359,
+ERASE, 47507992514560, 47507992514560,
+STORE, 47507992514560, 47507992719359,
+ERASE, 47507992719360, 47507992719360,
+STORE, 47507992719360, 47507992735743,
+STORE, 47507992735744, 47507992768511,
+ERASE, 47507992735744, 47507992735744,
+STORE, 47507992735744, 47507992743935,
+STORE, 47507992743936, 47507992768511,
+STORE, 47507992756224, 47507992768511,
+STORE, 47507992743936, 47507992756223,
+ERASE, 47507992743936, 47507992743936,
+STORE, 47507992743936, 47507992756223,
+STORE, 47507992760320, 47507992768511,
+STORE, 47507992756224, 47507992760319,
+ERASE, 47507992756224, 47507992756224,
+STORE, 47507992756224, 47507992768511,
+ERASE, 47507992756224, 47507992756224,
+STORE, 47507992756224, 47507992760319,
+STORE, 47507992760320, 47507992768511,
+ERASE, 47507992760320, 47507992760320,
+STORE, 47507992760320, 47507992768511,
+STORE, 47507992768512, 47507992805375,
+ERASE, 47507992768512, 47507992768512,
+STORE, 47507992768512, 47507992776703,
+STORE, 47507992776704, 47507992805375,
+STORE, 47507992793088, 47507992805375,
+STORE, 47507992776704, 47507992793087,
+ERASE, 47507992776704, 47507992776704,
+STORE, 47507992776704, 47507992793087,
+STORE, 47507992797184, 47507992805375,
+STORE, 47507992793088, 47507992797183,
+ERASE, 47507992793088, 47507992793088,
+STORE, 47507992793088, 47507992805375,
+ERASE, 47507992793088, 47507992793088,
+STORE, 47507992793088, 47507992797183,
+STORE, 47507992797184, 47507992805375,
+ERASE, 47507992797184, 47507992797184,
+STORE, 47507992797184, 47507992805375,
+STORE, 47507992805376, 47507993280511,
+ERASE, 47507992805376, 47507992805376,
+STORE, 47507992805376, 47507992813567,
+STORE, 47507992813568, 47507993280511,
+STORE, 47507993149440, 47507993280511,
+STORE, 47507992813568, 47507993149439,
+ERASE, 47507992813568, 47507992813568,
+STORE, 47507992813568, 47507993149439,
+STORE, 47507993272320, 47507993280511,
+STORE, 47507993149440, 47507993272319,
+ERASE, 47507993149440, 47507993149440,
+STORE, 47507993149440, 47507993280511,
+ERASE, 47507993149440, 47507993149440,
+STORE, 47507993149440, 47507993272319,
+STORE, 47507993272320, 47507993280511,
+ERASE, 47507993272320, 47507993272320,
+STORE, 47507993272320, 47507993280511,
+STORE, 47507993280512, 47507993288703,
+STORE, 47507993288704, 47507993309183,
+ERASE, 47507993288704, 47507993288704,
+STORE, 47507993288704, 47507993292799,
+STORE, 47507993292800, 47507993309183,
+STORE, 47507993296896, 47507993309183,
+STORE, 47507993292800, 47507993296895,
+ERASE, 47507993292800, 47507993292800,
+STORE, 47507993292800, 47507993296895,
+STORE, 47507993300992, 47507993309183,
+STORE, 47507993296896, 47507993300991,
+ERASE, 47507993296896, 47507993296896,
+STORE, 47507993296896, 47507993309183,
+ERASE, 47507993296896, 47507993296896,
+STORE, 47507993296896, 47507993300991,
+STORE, 47507993300992, 47507993309183,
+ERASE, 47507993300992, 47507993300992,
+STORE, 47507993300992, 47507993309183,
+STORE, 47507993309184, 47507993317375,
+ERASE, 47507985973248, 47507985973248,
+STORE, 47507985973248, 47507985989631,
+STORE, 47507985989632, 47507985997823,
+ERASE, 47507993300992, 47507993300992,
+STORE, 47507993300992, 47507993305087,
+STORE, 47507993305088, 47507993309183,
+ERASE, 47507988889600, 47507988889600,
+STORE, 47507988889600, 47507988893695,
+STORE, 47507988893696, 47507988897791,
+ERASE, 47507993272320, 47507993272320,
+STORE, 47507993272320, 47507993276415,
+STORE, 47507993276416, 47507993280511,
+ERASE, 47507992797184, 47507992797184,
+STORE, 47507992797184, 47507992801279,
+STORE, 47507992801280, 47507992805375,
+ERASE, 47507992760320, 47507992760320,
+STORE, 47507992760320, 47507992764415,
+STORE, 47507992764416, 47507992768511,
+ERASE, 47507992514560, 47507992514560,
+STORE, 47507992514560, 47507992711167,
+STORE, 47507992711168, 47507992719359,
+ERASE, 47507989630976, 47507989630976,
+STORE, 47507989630976, 47507989667839,
+STORE, 47507989667840, 47507989684223,
+ERASE, 47507989078016, 47507989078016,
+STORE, 47507989078016, 47507989082111,
+STORE, 47507989082112, 47507989086207,
+ERASE, 47507988762624, 47507988762624,
+STORE, 47507988762624, 47507988766719,
+STORE, 47507988766720, 47507988770815,
+ERASE, 47507986493440, 47507986493440,
+STORE, 47507986493440, 47507986513919,
+STORE, 47507986513920, 47507986518015,
+ERASE, 47507986161664, 47507986161664,
+STORE, 47507986161664, 47507986165759,
+STORE, 47507986165760, 47507986169855,
+ERASE, 47507986116608, 47507986116608,
+STORE, 47507986116608, 47507986120703,
+STORE, 47507986120704, 47507986124799,
+ERASE, 94386579570688, 94386579570688,
+STORE, 94386579570688, 94386579693567,
+STORE, 94386579693568, 94386579697663,
+ERASE, 140124810997760, 140124810997760,
+STORE, 140124810997760, 140124811001855,
+STORE, 140124811001856, 140124811005951,
+ERASE, 47507984158720, 47507984158720,
+STORE, 94386583982080, 94386584117247,
+STORE, 94386583982080, 94386584256511,
+ERASE, 94386583982080, 94386583982080,
+STORE, 94386583982080, 94386584223743,
+STORE, 94386584223744, 94386584256511,
+ERASE, 94386584223744, 94386584223744,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733763395584, 140737488351231,
+ERASE, 140733763395584, 140733763395584,
+STORE, 140733763395584, 140733763399679,
+STORE, 94011546472448, 94011547152383,
+ERASE, 94011546472448, 94011546472448,
+STORE, 94011546472448, 94011546537983,
+STORE, 94011546537984, 94011547152383,
+ERASE, 94011546537984, 94011546537984,
+STORE, 94011546537984, 94011546886143,
+STORE, 94011546886144, 94011547025407,
+STORE, 94011547025408, 94011547152383,
+STORE, 139757597949952, 139757598121983,
+ERASE, 139757597949952, 139757597949952,
+STORE, 139757597949952, 139757597954047,
+STORE, 139757597954048, 139757598121983,
+ERASE, 139757597954048, 139757597954048,
+STORE, 139757597954048, 139757598076927,
+STORE, 139757598076928, 139757598109695,
+STORE, 139757598109696, 139757598117887,
+STORE, 139757598117888, 139757598121983,
+STORE, 140733763596288, 140733763600383,
+STORE, 140733763584000, 140733763596287,
+STORE, 47875197046784, 47875197054975,
+STORE, 47875197054976, 47875197063167,
+STORE, 47875197063168, 47875198902271,
+STORE, 47875197202432, 47875198902271,
+STORE, 47875197063168, 47875197202431,
+ERASE, 47875197202432, 47875197202432,
+STORE, 47875197202432, 47875198861311,
+STORE, 47875198861312, 47875198902271,
+STORE, 47875198545920, 47875198861311,
+STORE, 47875197202432, 47875198545919,
+ERASE, 47875197202432, 47875197202432,
+STORE, 47875197202432, 47875198545919,
+STORE, 47875198857216, 47875198861311,
+STORE, 47875198545920, 47875198857215,
+ERASE, 47875198545920, 47875198545920,
+STORE, 47875198545920, 47875198857215,
+STORE, 47875198885888, 47875198902271,
+STORE, 47875198861312, 47875198885887,
+ERASE, 47875198861312, 47875198861312,
+STORE, 47875198861312, 47875198885887,
+ERASE, 47875198885888, 47875198885888,
+STORE, 47875198885888, 47875198902271,
+STORE, 47875198902272, 47875199012863,
+STORE, 47875198918656, 47875199012863,
+STORE, 47875198902272, 47875198918655,
+ERASE, 47875198918656, 47875198918656,
+STORE, 47875198918656, 47875199004671,
+STORE, 47875199004672, 47875199012863,
+STORE, 47875198980096, 47875199004671,
+STORE, 47875198918656, 47875198980095,
+ERASE, 47875198918656, 47875198918656,
+STORE, 47875198918656, 47875198980095,
+STORE, 47875199000576, 47875199004671,
+STORE, 47875198980096, 47875199000575,
+ERASE, 47875198980096, 47875198980096,
+STORE, 47875198980096, 47875199000575,
+ERASE, 47875199004672, 47875199004672,
+STORE, 47875199004672, 47875199012863,
+STORE, 47875199012864, 47875199057919,
+ERASE, 47875199012864, 47875199012864,
+STORE, 47875199012864, 47875199021055,
+STORE, 47875199021056, 47875199057919,
+STORE, 47875199041536, 47875199057919,
+STORE, 47875199021056, 47875199041535,
+ERASE, 47875199021056, 47875199021056,
+STORE, 47875199021056, 47875199041535,
+STORE, 47875199049728, 47875199057919,
+STORE, 47875199041536, 47875199049727,
+ERASE, 47875199041536, 47875199041536,
+STORE, 47875199041536, 47875199057919,
+ERASE, 47875199041536, 47875199041536,
+STORE, 47875199041536, 47875199049727,
+STORE, 47875199049728, 47875199057919,
+ERASE, 47875199049728, 47875199049728,
+STORE, 47875199049728, 47875199057919,
+STORE, 47875199057920, 47875199406079,
+STORE, 47875199098880, 47875199406079,
+STORE, 47875199057920, 47875199098879,
+ERASE, 47875199098880, 47875199098880,
+STORE, 47875199098880, 47875199381503,
+STORE, 47875199381504, 47875199406079,
+STORE, 47875199311872, 47875199381503,
+STORE, 47875199098880, 47875199311871,
+ERASE, 47875199098880, 47875199098880,
+STORE, 47875199098880, 47875199311871,
+STORE, 47875199377408, 47875199381503,
+STORE, 47875199311872, 47875199377407,
+ERASE, 47875199311872, 47875199311872,
+STORE, 47875199311872, 47875199377407,
+ERASE, 47875199381504, 47875199381504,
+STORE, 47875199381504, 47875199406079,
+STORE, 47875199406080, 47875201667071,
+STORE, 47875199557632, 47875201667071,
+STORE, 47875199406080, 47875199557631,
+ERASE, 47875199557632, 47875199557632,
+STORE, 47875199557632, 47875201650687,
+STORE, 47875201650688, 47875201667071,
+STORE, 47875201658880, 47875201667071,
+STORE, 47875201650688, 47875201658879,
+ERASE, 47875201650688, 47875201650688,
+STORE, 47875201650688, 47875201658879,
+ERASE, 47875201658880, 47875201658880,
+STORE, 47875201658880, 47875201667071,
+STORE, 47875201667072, 47875201802239,
+ERASE, 47875201667072, 47875201667072,
+STORE, 47875201667072, 47875201691647,
+STORE, 47875201691648, 47875201802239,
+STORE, 47875201753088, 47875201802239,
+STORE, 47875201691648, 47875201753087,
+ERASE, 47875201691648, 47875201691648,
+STORE, 47875201691648, 47875201753087,
+STORE, 47875201777664, 47875201802239,
+STORE, 47875201753088, 47875201777663,
+ERASE, 47875201753088, 47875201753088,
+STORE, 47875201753088, 47875201802239,
+ERASE, 47875201753088, 47875201753088,
+STORE, 47875201753088, 47875201777663,
+STORE, 47875201777664, 47875201802239,
+STORE, 47875201785856, 47875201802239,
+STORE, 47875201777664, 47875201785855,
+ERASE, 47875201777664, 47875201777664,
+STORE, 47875201777664, 47875201785855,
+ERASE, 47875201785856, 47875201785856,
+STORE, 47875201785856, 47875201802239,
+STORE, 47875201785856, 47875201810431,
+STORE, 47875201810432, 47875201974271,
+ERASE, 47875201810432, 47875201810432,
+STORE, 47875201810432, 47875201822719,
+STORE, 47875201822720, 47875201974271,
+STORE, 47875201921024, 47875201974271,
+STORE, 47875201822720, 47875201921023,
+ERASE, 47875201822720, 47875201822720,
+STORE, 47875201822720, 47875201921023,
+STORE, 47875201966080, 47875201974271,
+STORE, 47875201921024, 47875201966079,
+ERASE, 47875201921024, 47875201921024,
+STORE, 47875201921024, 47875201974271,
+ERASE, 47875201921024, 47875201921024,
+STORE, 47875201921024, 47875201966079,
+STORE, 47875201966080, 47875201974271,
+ERASE, 47875201966080, 47875201966080,
+STORE, 47875201966080, 47875201974271,
+STORE, 47875201974272, 47875202572287,
+STORE, 47875202093056, 47875202572287,
+STORE, 47875201974272, 47875202093055,
+ERASE, 47875202093056, 47875202093056,
+STORE, 47875202093056, 47875202519039,
+STORE, 47875202519040, 47875202572287,
+STORE, 47875202408448, 47875202519039,
+STORE, 47875202093056, 47875202408447,
+ERASE, 47875202093056, 47875202093056,
+STORE, 47875202093056, 47875202408447,
+STORE, 47875202514944, 47875202519039,
+STORE, 47875202408448, 47875202514943,
+ERASE, 47875202408448, 47875202408448,
+STORE, 47875202408448, 47875202514943,
+ERASE, 47875202519040, 47875202519040,
+STORE, 47875202519040, 47875202572287,
+STORE, 47875202572288, 47875205623807,
+STORE, 47875203117056, 47875205623807,
+STORE, 47875202572288, 47875203117055,
+ERASE, 47875203117056, 47875203117056,
+STORE, 47875203117056, 47875205402623,
+STORE, 47875205402624, 47875205623807,
+STORE, 47875204812800, 47875205402623,
+STORE, 47875203117056, 47875204812799,
+ERASE, 47875203117056, 47875203117056,
+STORE, 47875203117056, 47875204812799,
+STORE, 47875205398528, 47875205402623,
+STORE, 47875204812800, 47875205398527,
+ERASE, 47875204812800, 47875204812800,
+STORE, 47875204812800, 47875205398527,
+STORE, 47875205607424, 47875205623807,
+STORE, 47875205402624, 47875205607423,
+ERASE, 47875205402624, 47875205402624,
+STORE, 47875205402624, 47875205607423,
+ERASE, 47875205607424, 47875205607424,
+STORE, 47875205607424, 47875205623807,
+STORE, 47875205623808, 47875205656575,
+ERASE, 47875205623808, 47875205623808,
+STORE, 47875205623808, 47875205631999,
+STORE, 47875205632000, 47875205656575,
+STORE, 47875205644288, 47875205656575,
+STORE, 47875205632000, 47875205644287,
+ERASE, 47875205632000, 47875205632000,
+STORE, 47875205632000, 47875205644287,
+STORE, 47875205648384, 47875205656575,
+STORE, 47875205644288, 47875205648383,
+ERASE, 47875205644288, 47875205644288,
+STORE, 47875205644288, 47875205656575,
+ERASE, 47875205644288, 47875205644288,
+STORE, 47875205644288, 47875205648383,
+STORE, 47875205648384, 47875205656575,
+ERASE, 47875205648384, 47875205648384,
+STORE, 47875205648384, 47875205656575,
+STORE, 47875205656576, 47875205693439,
+ERASE, 47875205656576, 47875205656576,
+STORE, 47875205656576, 47875205664767,
+STORE, 47875205664768, 47875205693439,
+STORE, 47875205681152, 47875205693439,
+STORE, 47875205664768, 47875205681151,
+ERASE, 47875205664768, 47875205664768,
+STORE, 47875205664768, 47875205681151,
+STORE, 47875205685248, 47875205693439,
+STORE, 47875205681152, 47875205685247,
+ERASE, 47875205681152, 47875205681152,
+STORE, 47875205681152, 47875205693439,
+ERASE, 47875205681152, 47875205681152,
+STORE, 47875205681152, 47875205685247,
+STORE, 47875205685248, 47875205693439,
+ERASE, 47875205685248, 47875205685248,
+STORE, 47875205685248, 47875205693439,
+STORE, 47875205693440, 47875206168575,
+ERASE, 47875205693440, 47875205693440,
+STORE, 47875205693440, 47875205701631,
+STORE, 47875205701632, 47875206168575,
+STORE, 47875206037504, 47875206168575,
+STORE, 47875205701632, 47875206037503,
+ERASE, 47875205701632, 47875205701632,
+STORE, 47875205701632, 47875206037503,
+STORE, 47875206160384, 47875206168575,
+STORE, 47875206037504, 47875206160383,
+ERASE, 47875206037504, 47875206037504,
+STORE, 47875206037504, 47875206168575,
+ERASE, 47875206037504, 47875206037504,
+STORE, 47875206037504, 47875206160383,
+STORE, 47875206160384, 47875206168575,
+ERASE, 47875206160384, 47875206160384,
+STORE, 47875206160384, 47875206168575,
+STORE, 47875206168576, 47875206176767,
+STORE, 47875206176768, 47875206197247,
+ERASE, 47875206176768, 47875206176768,
+STORE, 47875206176768, 47875206180863,
+STORE, 47875206180864, 47875206197247,
+STORE, 47875206184960, 47875206197247,
+STORE, 47875206180864, 47875206184959,
+ERASE, 47875206180864, 47875206180864,
+STORE, 47875206180864, 47875206184959,
+STORE, 47875206189056, 47875206197247,
+STORE, 47875206184960, 47875206189055,
+ERASE, 47875206184960, 47875206184960,
+STORE, 47875206184960, 47875206197247,
+ERASE, 47875206184960, 47875206184960,
+STORE, 47875206184960, 47875206189055,
+STORE, 47875206189056, 47875206197247,
+ERASE, 47875206189056, 47875206189056,
+STORE, 47875206189056, 47875206197247,
+STORE, 47875206197248, 47875206205439,
+ERASE, 47875198861312, 47875198861312,
+STORE, 47875198861312, 47875198877695,
+STORE, 47875198877696, 47875198885887,
+ERASE, 47875206189056, 47875206189056,
+STORE, 47875206189056, 47875206193151,
+STORE, 47875206193152, 47875206197247,
+ERASE, 47875201777664, 47875201777664,
+STORE, 47875201777664, 47875201781759,
+STORE, 47875201781760, 47875201785855,
+ERASE, 47875206160384, 47875206160384,
+STORE, 47875206160384, 47875206164479,
+STORE, 47875206164480, 47875206168575,
+ERASE, 47875205685248, 47875205685248,
+STORE, 47875205685248, 47875205689343,
+STORE, 47875205689344, 47875205693439,
+ERASE, 47875205648384, 47875205648384,
+STORE, 47875205648384, 47875205652479,
+STORE, 47875205652480, 47875205656575,
+ERASE, 47875205402624, 47875205402624,
+STORE, 47875205402624, 47875205599231,
+STORE, 47875205599232, 47875205607423,
+ERASE, 47875202519040, 47875202519040,
+STORE, 47875202519040, 47875202555903,
+STORE, 47875202555904, 47875202572287,
+ERASE, 47875201966080, 47875201966080,
+STORE, 47875201966080, 47875201970175,
+STORE, 47875201970176, 47875201974271,
+ERASE, 47875201650688, 47875201650688,
+STORE, 47875201650688, 47875201654783,
+STORE, 47875201654784, 47875201658879,
+ERASE, 47875199381504, 47875199381504,
+STORE, 47875199381504, 47875199401983,
+STORE, 47875199401984, 47875199406079,
+ERASE, 47875199049728, 47875199049728,
+STORE, 47875199049728, 47875199053823,
+STORE, 47875199053824, 47875199057919,
+ERASE, 47875199004672, 47875199004672,
+STORE, 47875199004672, 47875199008767,
+STORE, 47875199008768, 47875199012863,
+ERASE, 94011547025408, 94011547025408,
+STORE, 94011547025408, 94011547148287,
+STORE, 94011547148288, 94011547152383,
+ERASE, 139757598109696, 139757598109696,
+STORE, 139757598109696, 139757598113791,
+STORE, 139757598113792, 139757598117887,
+ERASE, 47875197046784, 47875197046784,
+STORE, 94011557584896, 94011557720063,
+STORE, 94011557584896, 94011557855231,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557851135,
+STORE, 94011557851136, 94011557855231,
+ERASE, 94011557851136, 94011557851136,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557847039,
+STORE, 94011557847040, 94011557851135,
+ERASE, 94011557847040, 94011557847040,
+STORE, 94011557584896, 94011557982207,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557978111,
+STORE, 94011557978112, 94011557982207,
+ERASE, 94011557978112, 94011557978112,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557974015,
+STORE, 94011557974016, 94011557978111,
+ERASE, 94011557974016, 94011557974016,
+STORE, 140737488347136, 140737488351231,
+STORE, 140734130360320, 140737488351231,
+ERASE, 140734130360320, 140734130360320,
+STORE, 140734130360320, 140734130364415,
+STORE, 94641232105472, 94641232785407,
+ERASE, 94641232105472, 94641232105472,
+STORE, 94641232105472, 94641232171007,
+STORE, 94641232171008, 94641232785407,
+ERASE, 94641232171008, 94641232171008,
+STORE, 94641232171008, 94641232519167,
+STORE, 94641232519168, 94641232658431,
+STORE, 94641232658432, 94641232785407,
+STORE, 139726599516160, 139726599688191,
+ERASE, 139726599516160, 139726599516160,
+STORE, 139726599516160, 139726599520255,
+STORE, 139726599520256, 139726599688191,
+ERASE, 139726599520256, 139726599520256,
+STORE, 139726599520256, 139726599643135,
+STORE, 139726599643136, 139726599675903,
+STORE, 139726599675904, 139726599684095,
+STORE, 139726599684096, 139726599688191,
+STORE, 140734130446336, 140734130450431,
+STORE, 140734130434048, 140734130446335,
+STORE, 47906195480576, 47906195488767,
+STORE, 47906195488768, 47906195496959,
+STORE, 47906195496960, 47906197336063,
+STORE, 47906195636224, 47906197336063,
+STORE, 47906195496960, 47906195636223,
+ERASE, 47906195636224, 47906195636224,
+STORE, 47906195636224, 47906197295103,
+STORE, 47906197295104, 47906197336063,
+STORE, 47906196979712, 47906197295103,
+STORE, 47906195636224, 47906196979711,
+ERASE, 47906195636224, 47906195636224,
+STORE, 47906195636224, 47906196979711,
+STORE, 47906197291008, 47906197295103,
+STORE, 47906196979712, 47906197291007,
+ERASE, 47906196979712, 47906196979712,
+STORE, 47906196979712, 47906197291007,
+STORE, 47906197319680, 47906197336063,
+STORE, 47906197295104, 47906197319679,
+ERASE, 47906197295104, 47906197295104,
+STORE, 47906197295104, 47906197319679,
+ERASE, 47906197319680, 47906197319680,
+STORE, 47906197319680, 47906197336063,
+STORE, 47906197336064, 47906197446655,
+STORE, 47906197352448, 47906197446655,
+STORE, 47906197336064, 47906197352447,
+ERASE, 47906197352448, 47906197352448,
+STORE, 47906197352448, 47906197438463,
+STORE, 47906197438464, 47906197446655,
+STORE, 47906197413888, 47906197438463,
+STORE, 47906197352448, 47906197413887,
+ERASE, 47906197352448, 47906197352448,
+STORE, 47906197352448, 47906197413887,
+STORE, 47906197434368, 47906197438463,
+STORE, 47906197413888, 47906197434367,
+ERASE, 47906197413888, 47906197413888,
+STORE, 47906197413888, 47906197434367,
+ERASE, 47906197438464, 47906197438464,
+STORE, 47906197438464, 47906197446655,
+STORE, 47906197446656, 47906197491711,
+ERASE, 47906197446656, 47906197446656,
+STORE, 47906197446656, 47906197454847,
+STORE, 47906197454848, 47906197491711,
+STORE, 47906197475328, 47906197491711,
+STORE, 47906197454848, 47906197475327,
+ERASE, 47906197454848, 47906197454848,
+STORE, 47906197454848, 47906197475327,
+STORE, 47906197483520, 47906197491711,
+STORE, 47906197475328, 47906197483519,
+ERASE, 47906197475328, 47906197475328,
+STORE, 47906197475328, 47906197491711,
+ERASE, 47906197475328, 47906197475328,
+STORE, 47906197475328, 47906197483519,
+STORE, 47906197483520, 47906197491711,
+ERASE, 47906197483520, 47906197483520,
+STORE, 47906197483520, 47906197491711,
+STORE, 47906197491712, 47906197839871,
+STORE, 47906197532672, 47906197839871,
+STORE, 47906197491712, 47906197532671,
+ERASE, 47906197532672, 47906197532672,
+STORE, 47906197532672, 47906197815295,
+STORE, 47906197815296, 47906197839871,
+STORE, 47906197745664, 47906197815295,
+STORE, 47906197532672, 47906197745663,
+ERASE, 47906197532672, 47906197532672,
+STORE, 47906197532672, 47906197745663,
+STORE, 47906197811200, 47906197815295,
+STORE, 47906197745664, 47906197811199,
+ERASE, 47906197745664, 47906197745664,
+STORE, 47906197745664, 47906197811199,
+ERASE, 47906197815296, 47906197815296,
+STORE, 47906197815296, 47906197839871,
+STORE, 47906197839872, 47906200100863,
+STORE, 47906197991424, 47906200100863,
+STORE, 47906197839872, 47906197991423,
+ERASE, 47906197991424, 47906197991424,
+STORE, 47906197991424, 47906200084479,
+STORE, 47906200084480, 47906200100863,
+STORE, 47906200092672, 47906200100863,
+STORE, 47906200084480, 47906200092671,
+ERASE, 47906200084480, 47906200084480,
+STORE, 47906200084480, 47906200092671,
+ERASE, 47906200092672, 47906200092672,
+STORE, 47906200092672, 47906200100863,
+STORE, 47906200100864, 47906200236031,
+ERASE, 47906200100864, 47906200100864,
+STORE, 47906200100864, 47906200125439,
+STORE, 47906200125440, 47906200236031,
+STORE, 47906200186880, 47906200236031,
+STORE, 47906200125440, 47906200186879,
+ERASE, 47906200125440, 47906200125440,
+STORE, 47906200125440, 47906200186879,
+STORE, 47906200211456, 47906200236031,
+STORE, 47906200186880, 47906200211455,
+ERASE, 47906200186880, 47906200186880,
+STORE, 47906200186880, 47906200236031,
+ERASE, 47906200186880, 47906200186880,
+STORE, 47906200186880, 47906200211455,
+STORE, 47906200211456, 47906200236031,
+STORE, 47906200219648, 47906200236031,
+STORE, 47906200211456, 47906200219647,
+ERASE, 47906200211456, 47906200211456,
+STORE, 47906200211456, 47906200219647,
+ERASE, 47906200219648, 47906200219648,
+STORE, 47906200219648, 47906200236031,
+STORE, 47906200219648, 47906200244223,
+STORE, 47906200244224, 47906200408063,
+ERASE, 47906200244224, 47906200244224,
+STORE, 47906200244224, 47906200256511,
+STORE, 47906200256512, 47906200408063,
+STORE, 47906200354816, 47906200408063,
+STORE, 47906200256512, 47906200354815,
+ERASE, 47906200256512, 47906200256512,
+STORE, 47906200256512, 47906200354815,
+STORE, 47906200399872, 47906200408063,
+STORE, 47906200354816, 47906200399871,
+ERASE, 47906200354816, 47906200354816,
+STORE, 47906200354816, 47906200408063,
+ERASE, 47906200354816, 47906200354816,
+STORE, 47906200354816, 47906200399871,
+STORE, 47906200399872, 47906200408063,
+ERASE, 47906200399872, 47906200399872,
+STORE, 47906200399872, 47906200408063,
+STORE, 47906200408064, 47906201006079,
+STORE, 47906200526848, 47906201006079,
+STORE, 47906200408064, 47906200526847,
+ERASE, 47906200526848, 47906200526848,
+STORE, 47906200526848, 47906200952831,
+STORE, 47906200952832, 47906201006079,
+STORE, 47906200842240, 47906200952831,
+STORE, 47906200526848, 47906200842239,
+ERASE, 47906200526848, 47906200526848,
+STORE, 47906200526848, 47906200842239,
+STORE, 47906200948736, 47906200952831,
+STORE, 47906200842240, 47906200948735,
+ERASE, 47906200842240, 47906200842240,
+STORE, 47906200842240, 47906200948735,
+ERASE, 47906200952832, 47906200952832,
+STORE, 47906200952832, 47906201006079,
+STORE, 47906201006080, 47906204057599,
+STORE, 47906201550848, 47906204057599,
+STORE, 47906201006080, 47906201550847,
+ERASE, 47906201550848, 47906201550848,
+STORE, 47906201550848, 47906203836415,
+STORE, 47906203836416, 47906204057599,
+STORE, 47906203246592, 47906203836415,
+STORE, 47906201550848, 47906203246591,
+ERASE, 47906201550848, 47906201550848,
+STORE, 47906201550848, 47906203246591,
+STORE, 47906203832320, 47906203836415,
+STORE, 47906203246592, 47906203832319,
+ERASE, 47906203246592, 47906203246592,
+STORE, 47906203246592, 47906203832319,
+STORE, 47906204041216, 47906204057599,
+STORE, 47906203836416, 47906204041215,
+ERASE, 47906203836416, 47906203836416,
+STORE, 47906203836416, 47906204041215,
+ERASE, 47906204041216, 47906204041216,
+STORE, 47906204041216, 47906204057599,
+STORE, 47906204057600, 47906204090367,
+ERASE, 47906204057600, 47906204057600,
+STORE, 47906204057600, 47906204065791,
+STORE, 47906204065792, 47906204090367,
+STORE, 47906204078080, 47906204090367,
+STORE, 47906204065792, 47906204078079,
+ERASE, 47906204065792, 47906204065792,
+STORE, 47906204065792, 47906204078079,
+STORE, 47906204082176, 47906204090367,
+STORE, 47906204078080, 47906204082175,
+ERASE, 47906204078080, 47906204078080,
+STORE, 47906204078080, 47906204090367,
+ERASE, 47906204078080, 47906204078080,
+STORE, 47906204078080, 47906204082175,
+STORE, 47906204082176, 47906204090367,
+ERASE, 47906204082176, 47906204082176,
+STORE, 47906204082176, 47906204090367,
+STORE, 47906204090368, 47906204127231,
+ERASE, 47906204090368, 47906204090368,
+STORE, 47906204090368, 47906204098559,
+STORE, 47906204098560, 47906204127231,
+STORE, 47906204114944, 47906204127231,
+STORE, 47906204098560, 47906204114943,
+ERASE, 47906204098560, 47906204098560,
+STORE, 47906204098560, 47906204114943,
+STORE, 47906204119040, 47906204127231,
+STORE, 47906204114944, 47906204119039,
+ERASE, 47906204114944, 47906204114944,
+STORE, 47906204114944, 47906204127231,
+ERASE, 47906204114944, 47906204114944,
+STORE, 47906204114944, 47906204119039,
+STORE, 47906204119040, 47906204127231,
+ERASE, 47906204119040, 47906204119040,
+STORE, 47906204119040, 47906204127231,
+STORE, 47906204127232, 47906204602367,
+ERASE, 47906204127232, 47906204127232,
+STORE, 47906204127232, 47906204135423,
+STORE, 47906204135424, 47906204602367,
+STORE, 47906204471296, 47906204602367,
+STORE, 47906204135424, 47906204471295,
+ERASE, 47906204135424, 47906204135424,
+STORE, 47906204135424, 47906204471295,
+STORE, 47906204594176, 47906204602367,
+STORE, 47906204471296, 47906204594175,
+ERASE, 47906204471296, 47906204471296,
+STORE, 47906204471296, 47906204602367,
+ERASE, 47906204471296, 47906204471296,
+STORE, 47906204471296, 47906204594175,
+STORE, 47906204594176, 47906204602367,
+ERASE, 47906204594176, 47906204594176,
+STORE, 47906204594176, 47906204602367,
+STORE, 47906204602368, 47906204610559,
+STORE, 47906204610560, 47906204631039,
+ERASE, 47906204610560, 47906204610560,
+STORE, 47906204610560, 47906204614655,
+STORE, 47906204614656, 47906204631039,
+STORE, 47906204618752, 47906204631039,
+STORE, 47906204614656, 47906204618751,
+ERASE, 47906204614656, 47906204614656,
+STORE, 47906204614656, 47906204618751,
+STORE, 47906204622848, 47906204631039,
+STORE, 47906204618752, 47906204622847,
+ERASE, 47906204618752, 47906204618752,
+STORE, 47906204618752, 47906204631039,
+ERASE, 47906204618752, 47906204618752,
+STORE, 47906204618752, 47906204622847,
+STORE, 47906204622848, 47906204631039,
+ERASE, 47906204622848, 47906204622848,
+STORE, 47906204622848, 47906204631039,
+STORE, 47906204631040, 47906204639231,
+ERASE, 47906197295104, 47906197295104,
+STORE, 47906197295104, 47906197311487,
+STORE, 47906197311488, 47906197319679,
+ERASE, 47906204622848, 47906204622848,
+STORE, 47906204622848, 47906204626943,
+STORE, 47906204626944, 47906204631039,
+ERASE, 47906200211456, 47906200211456,
+STORE, 47906200211456, 47906200215551,
+STORE, 47906200215552, 47906200219647,
+ERASE, 47906204594176, 47906204594176,
+STORE, 47906204594176, 47906204598271,
+STORE, 47906204598272, 47906204602367,
+ERASE, 47906204119040, 47906204119040,
+STORE, 47906204119040, 47906204123135,
+STORE, 47906204123136, 47906204127231,
+ERASE, 47906204082176, 47906204082176,
+STORE, 47906204082176, 47906204086271,
+STORE, 47906204086272, 47906204090367,
+ERASE, 47906203836416, 47906203836416,
+STORE, 47906203836416, 47906204033023,
+STORE, 47906204033024, 47906204041215,
+ERASE, 47906200952832, 47906200952832,
+STORE, 47906200952832, 47906200989695,
+STORE, 47906200989696, 47906201006079,
+ERASE, 47906200399872, 47906200399872,
+STORE, 47906200399872, 47906200403967,
+STORE, 47906200403968, 47906200408063,
+ERASE, 47906200084480, 47906200084480,
+STORE, 47906200084480, 47906200088575,
+STORE, 47906200088576, 47906200092671,
+ERASE, 47906197815296, 47906197815296,
+STORE, 47906197815296, 47906197835775,
+STORE, 47906197835776, 47906197839871,
+ERASE, 47906197483520, 47906197483520,
+STORE, 47906197483520, 47906197487615,
+STORE, 47906197487616, 47906197491711,
+ERASE, 47906197438464, 47906197438464,
+STORE, 47906197438464, 47906197442559,
+STORE, 47906197442560, 47906197446655,
+ERASE, 94641232658432, 94641232658432,
+STORE, 94641232658432, 94641232781311,
+STORE, 94641232781312, 94641232785407,
+ERASE, 139726599675904, 139726599675904,
+STORE, 139726599675904, 139726599679999,
+STORE, 139726599680000, 139726599684095,
+ERASE, 47906195480576, 47906195480576,
+STORE, 94641242615808, 94641242750975,
+ };
+
+ static const unsigned long set10[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140736427839488, 140737488351231,
+ERASE, 140736427839488, 140736427839488,
+STORE, 140736427839488, 140736427843583,
+STORE, 94071213395968, 94071213567999,
+ERASE, 94071213395968, 94071213395968,
+STORE, 94071213395968, 94071213412351,
+STORE, 94071213412352, 94071213567999,
+ERASE, 94071213412352, 94071213412352,
+STORE, 94071213412352, 94071213514751,
+STORE, 94071213514752, 94071213555711,
+STORE, 94071213555712, 94071213567999,
+STORE, 139968410644480, 139968410816511,
+ERASE, 139968410644480, 139968410644480,
+STORE, 139968410644480, 139968410648575,
+STORE, 139968410648576, 139968410816511,
+ERASE, 139968410648576, 139968410648576,
+STORE, 139968410648576, 139968410771455,
+STORE, 139968410771456, 139968410804223,
+STORE, 139968410804224, 139968410812415,
+STORE, 139968410812416, 139968410816511,
+STORE, 140736429277184, 140736429281279,
+STORE, 140736429264896, 140736429277183,
+STORE, 47664384352256, 47664384360447,
+STORE, 47664384360448, 47664384368639,
+STORE, 47664384368640, 47664384532479,
+ERASE, 47664384368640, 47664384368640,
+STORE, 47664384368640, 47664384380927,
+STORE, 47664384380928, 47664384532479,
+STORE, 47664384479232, 47664384532479,
+STORE, 47664384380928, 47664384479231,
+ERASE, 47664384380928, 47664384380928,
+STORE, 47664384380928, 47664384479231,
+STORE, 47664384524288, 47664384532479,
+STORE, 47664384479232, 47664384524287,
+ERASE, 47664384479232, 47664384479232,
+STORE, 47664384479232, 47664384532479,
+ERASE, 47664384479232, 47664384479232,
+STORE, 47664384479232, 47664384524287,
+STORE, 47664384524288, 47664384532479,
+ERASE, 47664384524288, 47664384524288,
+STORE, 47664384524288, 47664384532479,
+STORE, 47664384532480, 47664387583999,
+STORE, 47664385077248, 47664387583999,
+STORE, 47664384532480, 47664385077247,
+ERASE, 47664385077248, 47664385077248,
+STORE, 47664385077248, 47664387362815,
+STORE, 47664387362816, 47664387583999,
+STORE, 47664386772992, 47664387362815,
+STORE, 47664385077248, 47664386772991,
+ERASE, 47664385077248, 47664385077248,
+STORE, 47664385077248, 47664386772991,
+STORE, 47664387358720, 47664387362815,
+STORE, 47664386772992, 47664387358719,
+ERASE, 47664386772992, 47664386772992,
+STORE, 47664386772992, 47664387358719,
+STORE, 47664387567616, 47664387583999,
+STORE, 47664387362816, 47664387567615,
+ERASE, 47664387362816, 47664387362816,
+STORE, 47664387362816, 47664387567615,
+ERASE, 47664387567616, 47664387567616,
+STORE, 47664387567616, 47664387583999,
+STORE, 47664387584000, 47664389423103,
+STORE, 47664387723264, 47664389423103,
+STORE, 47664387584000, 47664387723263,
+ERASE, 47664387723264, 47664387723264,
+STORE, 47664387723264, 47664389382143,
+STORE, 47664389382144, 47664389423103,
+STORE, 47664389066752, 47664389382143,
+STORE, 47664387723264, 47664389066751,
+ERASE, 47664387723264, 47664387723264,
+STORE, 47664387723264, 47664389066751,
+STORE, 47664389378048, 47664389382143,
+STORE, 47664389066752, 47664389378047,
+ERASE, 47664389066752, 47664389066752,
+STORE, 47664389066752, 47664389378047,
+STORE, 47664389406720, 47664389423103,
+STORE, 47664389382144, 47664389406719,
+ERASE, 47664389382144, 47664389382144,
+STORE, 47664389382144, 47664389406719,
+ERASE, 47664389406720, 47664389406720,
+STORE, 47664389406720, 47664389423103,
+STORE, 47664389423104, 47664389558271,
+ERASE, 47664389423104, 47664389423104,
+STORE, 47664389423104, 47664389447679,
+STORE, 47664389447680, 47664389558271,
+STORE, 47664389509120, 47664389558271,
+STORE, 47664389447680, 47664389509119,
+ERASE, 47664389447680, 47664389447680,
+STORE, 47664389447680, 47664389509119,
+STORE, 47664389533696, 47664389558271,
+STORE, 47664389509120, 47664389533695,
+ERASE, 47664389509120, 47664389509120,
+STORE, 47664389509120, 47664389558271,
+ERASE, 47664389509120, 47664389509120,
+STORE, 47664389509120, 47664389533695,
+STORE, 47664389533696, 47664389558271,
+STORE, 47664389541888, 47664389558271,
+STORE, 47664389533696, 47664389541887,
+ERASE, 47664389533696, 47664389533696,
+STORE, 47664389533696, 47664389541887,
+ERASE, 47664389541888, 47664389541888,
+STORE, 47664389541888, 47664389558271,
+STORE, 47664389558272, 47664389578751,
+ERASE, 47664389558272, 47664389558272,
+STORE, 47664389558272, 47664389562367,
+STORE, 47664389562368, 47664389578751,
+STORE, 47664389566464, 47664389578751,
+STORE, 47664389562368, 47664389566463,
+ERASE, 47664389562368, 47664389562368,
+STORE, 47664389562368, 47664389566463,
+STORE, 47664389570560, 47664389578751,
+STORE, 47664389566464, 47664389570559,
+ERASE, 47664389566464, 47664389566464,
+STORE, 47664389566464, 47664389578751,
+ERASE, 47664389566464, 47664389566464,
+STORE, 47664389566464, 47664389570559,
+STORE, 47664389570560, 47664389578751,
+ERASE, 47664389570560, 47664389570560,
+STORE, 47664389570560, 47664389578751,
+STORE, 47664389578752, 47664389586943,
+ERASE, 47664389382144, 47664389382144,
+STORE, 47664389382144, 47664389398527,
+STORE, 47664389398528, 47664389406719,
+ERASE, 47664389570560, 47664389570560,
+STORE, 47664389570560, 47664389574655,
+STORE, 47664389574656, 47664389578751,
+ERASE, 47664389533696, 47664389533696,
+STORE, 47664389533696, 47664389537791,
+STORE, 47664389537792, 47664389541887,
+ERASE, 47664387362816, 47664387362816,
+STORE, 47664387362816, 47664387559423,
+STORE, 47664387559424, 47664387567615,
+ERASE, 47664384524288, 47664384524288,
+STORE, 47664384524288, 47664384528383,
+STORE, 47664384528384, 47664384532479,
+ERASE, 94071213555712, 94071213555712,
+STORE, 94071213555712, 94071213563903,
+STORE, 94071213563904, 94071213567999,
+ERASE, 139968410804224, 139968410804224,
+STORE, 139968410804224, 139968410808319,
+STORE, 139968410808320, 139968410812415,
+ERASE, 47664384352256, 47664384352256,
+STORE, 94071244402688, 94071244537855,
+STORE, 140737488347136, 140737488351231,
+STORE, 140728271503360, 140737488351231,
+ERASE, 140728271503360, 140728271503360,
+STORE, 140728271503360, 140728271507455,
+STORE, 94410361982976, 94410362155007,
+ERASE, 94410361982976, 94410361982976,
+STORE, 94410361982976, 94410361999359,
+STORE, 94410361999360, 94410362155007,
+ERASE, 94410361999360, 94410361999360,
+STORE, 94410361999360, 94410362101759,
+STORE, 94410362101760, 94410362142719,
+STORE, 94410362142720, 94410362155007,
+STORE, 140351953997824, 140351954169855,
+ERASE, 140351953997824, 140351953997824,
+STORE, 140351953997824, 140351954001919,
+STORE, 140351954001920, 140351954169855,
+ERASE, 140351954001920, 140351954001920,
+STORE, 140351954001920, 140351954124799,
+STORE, 140351954124800, 140351954157567,
+STORE, 140351954157568, 140351954165759,
+STORE, 140351954165760, 140351954169855,
+STORE, 140728272429056, 140728272433151,
+STORE, 140728272416768, 140728272429055,
+STORE, 47280840998912, 47280841007103,
+STORE, 47280841007104, 47280841015295,
+STORE, 47280841015296, 47280841179135,
+ERASE, 47280841015296, 47280841015296,
+STORE, 47280841015296, 47280841027583,
+STORE, 47280841027584, 47280841179135,
+STORE, 47280841125888, 47280841179135,
+STORE, 47280841027584, 47280841125887,
+ERASE, 47280841027584, 47280841027584,
+STORE, 47280841027584, 47280841125887,
+STORE, 47280841170944, 47280841179135,
+STORE, 47280841125888, 47280841170943,
+ERASE, 47280841125888, 47280841125888,
+STORE, 47280841125888, 47280841179135,
+ERASE, 47280841125888, 47280841125888,
+STORE, 47280841125888, 47280841170943,
+STORE, 47280841170944, 47280841179135,
+ERASE, 47280841170944, 47280841170944,
+STORE, 47280841170944, 47280841179135,
+STORE, 47280841179136, 47280844230655,
+STORE, 47280841723904, 47280844230655,
+STORE, 47280841179136, 47280841723903,
+ERASE, 47280841723904, 47280841723904,
+STORE, 47280841723904, 47280844009471,
+STORE, 47280844009472, 47280844230655,
+STORE, 47280843419648, 47280844009471,
+STORE, 47280841723904, 47280843419647,
+ERASE, 47280841723904, 47280841723904,
+STORE, 47280841723904, 47280843419647,
+STORE, 47280844005376, 47280844009471,
+STORE, 47280843419648, 47280844005375,
+ERASE, 47280843419648, 47280843419648,
+STORE, 47280843419648, 47280844005375,
+STORE, 47280844214272, 47280844230655,
+STORE, 47280844009472, 47280844214271,
+ERASE, 47280844009472, 47280844009472,
+STORE, 47280844009472, 47280844214271,
+ERASE, 47280844214272, 47280844214272,
+STORE, 47280844214272, 47280844230655,
+STORE, 47280844230656, 47280846069759,
+STORE, 47280844369920, 47280846069759,
+STORE, 47280844230656, 47280844369919,
+ERASE, 47280844369920, 47280844369920,
+STORE, 47280844369920, 47280846028799,
+STORE, 47280846028800, 47280846069759,
+STORE, 47280845713408, 47280846028799,
+STORE, 47280844369920, 47280845713407,
+ERASE, 47280844369920, 47280844369920,
+STORE, 47280844369920, 47280845713407,
+STORE, 47280846024704, 47280846028799,
+STORE, 47280845713408, 47280846024703,
+ERASE, 47280845713408, 47280845713408,
+STORE, 47280845713408, 47280846024703,
+STORE, 47280846053376, 47280846069759,
+STORE, 47280846028800, 47280846053375,
+ERASE, 47280846028800, 47280846028800,
+STORE, 47280846028800, 47280846053375,
+ERASE, 47280846053376, 47280846053376,
+STORE, 47280846053376, 47280846069759,
+STORE, 47280846069760, 47280846204927,
+ERASE, 47280846069760, 47280846069760,
+STORE, 47280846069760, 47280846094335,
+STORE, 47280846094336, 47280846204927,
+STORE, 47280846155776, 47280846204927,
+STORE, 47280846094336, 47280846155775,
+ERASE, 47280846094336, 47280846094336,
+STORE, 47280846094336, 47280846155775,
+STORE, 47280846180352, 47280846204927,
+STORE, 47280846155776, 47280846180351,
+ERASE, 47280846155776, 47280846155776,
+STORE, 47280846155776, 47280846204927,
+ERASE, 47280846155776, 47280846155776,
+STORE, 47280846155776, 47280846180351,
+STORE, 47280846180352, 47280846204927,
+STORE, 47280846188544, 47280846204927,
+STORE, 47280846180352, 47280846188543,
+ERASE, 47280846180352, 47280846180352,
+STORE, 47280846180352, 47280846188543,
+ERASE, 47280846188544, 47280846188544,
+STORE, 47280846188544, 47280846204927,
+STORE, 47280846204928, 47280846225407,
+ERASE, 47280846204928, 47280846204928,
+STORE, 47280846204928, 47280846209023,
+STORE, 47280846209024, 47280846225407,
+STORE, 47280846213120, 47280846225407,
+STORE, 47280846209024, 47280846213119,
+ERASE, 47280846209024, 47280846209024,
+STORE, 47280846209024, 47280846213119,
+STORE, 47280846217216, 47280846225407,
+STORE, 47280846213120, 47280846217215,
+ERASE, 47280846213120, 47280846213120,
+STORE, 47280846213120, 47280846225407,
+ERASE, 47280846213120, 47280846213120,
+STORE, 47280846213120, 47280846217215,
+STORE, 47280846217216, 47280846225407,
+ERASE, 47280846217216, 47280846217216,
+STORE, 47280846217216, 47280846225407,
+STORE, 47280846225408, 47280846233599,
+ERASE, 47280846028800, 47280846028800,
+STORE, 47280846028800, 47280846045183,
+STORE, 47280846045184, 47280846053375,
+ERASE, 47280846217216, 47280846217216,
+STORE, 47280846217216, 47280846221311,
+STORE, 47280846221312, 47280846225407,
+ERASE, 47280846180352, 47280846180352,
+STORE, 47280846180352, 47280846184447,
+STORE, 47280846184448, 47280846188543,
+ERASE, 47280844009472, 47280844009472,
+STORE, 47280844009472, 47280844206079,
+STORE, 47280844206080, 47280844214271,
+ERASE, 47280841170944, 47280841170944,
+STORE, 47280841170944, 47280841175039,
+STORE, 47280841175040, 47280841179135,
+ERASE, 94410362142720, 94410362142720,
+STORE, 94410362142720, 94410362150911,
+STORE, 94410362150912, 94410362155007,
+ERASE, 140351954157568, 140351954157568,
+STORE, 140351954157568, 140351954161663,
+STORE, 140351954161664, 140351954165759,
+ERASE, 47280840998912, 47280840998912,
+STORE, 94410379456512, 94410379591679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732946362368, 140737488351231,
+ERASE, 140732946362368, 140732946362368,
+STORE, 140732946362368, 140732946366463,
+STORE, 94352937934848, 94352938106879,
+ERASE, 94352937934848, 94352937934848,
+STORE, 94352937934848, 94352937951231,
+STORE, 94352937951232, 94352938106879,
+ERASE, 94352937951232, 94352937951232,
+STORE, 94352937951232, 94352938053631,
+STORE, 94352938053632, 94352938094591,
+STORE, 94352938094592, 94352938106879,
+STORE, 140595518742528, 140595518914559,
+ERASE, 140595518742528, 140595518742528,
+STORE, 140595518742528, 140595518746623,
+STORE, 140595518746624, 140595518914559,
+ERASE, 140595518746624, 140595518746624,
+STORE, 140595518746624, 140595518869503,
+STORE, 140595518869504, 140595518902271,
+STORE, 140595518902272, 140595518910463,
+STORE, 140595518910464, 140595518914559,
+STORE, 140732947468288, 140732947472383,
+STORE, 140732947456000, 140732947468287,
+STORE, 47037276254208, 47037276262399,
+STORE, 47037276262400, 47037276270591,
+STORE, 47037276270592, 47037276434431,
+ERASE, 47037276270592, 47037276270592,
+STORE, 47037276270592, 47037276282879,
+STORE, 47037276282880, 47037276434431,
+STORE, 47037276381184, 47037276434431,
+STORE, 47037276282880, 47037276381183,
+ERASE, 47037276282880, 47037276282880,
+STORE, 47037276282880, 47037276381183,
+STORE, 47037276426240, 47037276434431,
+STORE, 47037276381184, 47037276426239,
+ERASE, 47037276381184, 47037276381184,
+STORE, 47037276381184, 47037276434431,
+ERASE, 47037276381184, 47037276381184,
+STORE, 47037276381184, 47037276426239,
+STORE, 47037276426240, 47037276434431,
+ERASE, 47037276426240, 47037276426240,
+STORE, 47037276426240, 47037276434431,
+STORE, 47037276434432, 47037279485951,
+STORE, 47037276979200, 47037279485951,
+STORE, 47037276434432, 47037276979199,
+ERASE, 47037276979200, 47037276979200,
+STORE, 47037276979200, 47037279264767,
+STORE, 47037279264768, 47037279485951,
+STORE, 47037278674944, 47037279264767,
+STORE, 47037276979200, 47037278674943,
+ERASE, 47037276979200, 47037276979200,
+STORE, 47037276979200, 47037278674943,
+STORE, 47037279260672, 47037279264767,
+STORE, 47037278674944, 47037279260671,
+ERASE, 47037278674944, 47037278674944,
+STORE, 47037278674944, 47037279260671,
+STORE, 47037279469568, 47037279485951,
+STORE, 47037279264768, 47037279469567,
+ERASE, 47037279264768, 47037279264768,
+STORE, 47037279264768, 47037279469567,
+ERASE, 47037279469568, 47037279469568,
+STORE, 47037279469568, 47037279485951,
+STORE, 47037279485952, 47037281325055,
+STORE, 47037279625216, 47037281325055,
+STORE, 47037279485952, 47037279625215,
+ERASE, 47037279625216, 47037279625216,
+STORE, 47037279625216, 47037281284095,
+STORE, 47037281284096, 47037281325055,
+STORE, 47037280968704, 47037281284095,
+STORE, 47037279625216, 47037280968703,
+ERASE, 47037279625216, 47037279625216,
+STORE, 47037279625216, 47037280968703,
+STORE, 47037281280000, 47037281284095,
+STORE, 47037280968704, 47037281279999,
+ERASE, 47037280968704, 47037280968704,
+STORE, 47037280968704, 47037281279999,
+STORE, 47037281308672, 47037281325055,
+STORE, 47037281284096, 47037281308671,
+ERASE, 47037281284096, 47037281284096,
+STORE, 47037281284096, 47037281308671,
+ERASE, 47037281308672, 47037281308672,
+STORE, 47037281308672, 47037281325055,
+STORE, 47037281325056, 47037281460223,
+ERASE, 47037281325056, 47037281325056,
+STORE, 47037281325056, 47037281349631,
+STORE, 47037281349632, 47037281460223,
+STORE, 47037281411072, 47037281460223,
+STORE, 47037281349632, 47037281411071,
+ERASE, 47037281349632, 47037281349632,
+STORE, 47037281349632, 47037281411071,
+STORE, 47037281435648, 47037281460223,
+STORE, 47037281411072, 47037281435647,
+ERASE, 47037281411072, 47037281411072,
+STORE, 47037281411072, 47037281460223,
+ERASE, 47037281411072, 47037281411072,
+STORE, 47037281411072, 47037281435647,
+STORE, 47037281435648, 47037281460223,
+STORE, 47037281443840, 47037281460223,
+STORE, 47037281435648, 47037281443839,
+ERASE, 47037281435648, 47037281435648,
+STORE, 47037281435648, 47037281443839,
+ERASE, 47037281443840, 47037281443840,
+STORE, 47037281443840, 47037281460223,
+STORE, 47037281460224, 47037281480703,
+ERASE, 47037281460224, 47037281460224,
+STORE, 47037281460224, 47037281464319,
+STORE, 47037281464320, 47037281480703,
+STORE, 47037281468416, 47037281480703,
+STORE, 47037281464320, 47037281468415,
+ERASE, 47037281464320, 47037281464320,
+STORE, 47037281464320, 47037281468415,
+STORE, 47037281472512, 47037281480703,
+STORE, 47037281468416, 47037281472511,
+ERASE, 47037281468416, 47037281468416,
+STORE, 47037281468416, 47037281480703,
+ERASE, 47037281468416, 47037281468416,
+STORE, 47037281468416, 47037281472511,
+STORE, 47037281472512, 47037281480703,
+ERASE, 47037281472512, 47037281472512,
+STORE, 47037281472512, 47037281480703,
+STORE, 47037281480704, 47037281488895,
+ERASE, 47037281284096, 47037281284096,
+STORE, 47037281284096, 47037281300479,
+STORE, 47037281300480, 47037281308671,
+ERASE, 47037281472512, 47037281472512,
+STORE, 47037281472512, 47037281476607,
+STORE, 47037281476608, 47037281480703,
+ERASE, 47037281435648, 47037281435648,
+STORE, 47037281435648, 47037281439743,
+STORE, 47037281439744, 47037281443839,
+ERASE, 47037279264768, 47037279264768,
+STORE, 47037279264768, 47037279461375,
+STORE, 47037279461376, 47037279469567,
+ERASE, 47037276426240, 47037276426240,
+STORE, 47037276426240, 47037276430335,
+STORE, 47037276430336, 47037276434431,
+ERASE, 94352938094592, 94352938094592,
+STORE, 94352938094592, 94352938102783,
+STORE, 94352938102784, 94352938106879,
+ERASE, 140595518902272, 140595518902272,
+STORE, 140595518902272, 140595518906367,
+STORE, 140595518906368, 140595518910463,
+ERASE, 47037276254208, 47037276254208,
+STORE, 94352938438656, 94352938573823,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733506027520, 140737488351231,
+ERASE, 140733506027520, 140733506027520,
+STORE, 140733506027520, 140733506031615,
+STORE, 94150123073536, 94150123245567,
+ERASE, 94150123073536, 94150123073536,
+STORE, 94150123073536, 94150123089919,
+STORE, 94150123089920, 94150123245567,
+ERASE, 94150123089920, 94150123089920,
+STORE, 94150123089920, 94150123192319,
+STORE, 94150123192320, 94150123233279,
+STORE, 94150123233280, 94150123245567,
+STORE, 140081290375168, 140081290547199,
+ERASE, 140081290375168, 140081290375168,
+STORE, 140081290375168, 140081290379263,
+STORE, 140081290379264, 140081290547199,
+ERASE, 140081290379264, 140081290379264,
+STORE, 140081290379264, 140081290502143,
+STORE, 140081290502144, 140081290534911,
+STORE, 140081290534912, 140081290543103,
+STORE, 140081290543104, 140081290547199,
+STORE, 140733506707456, 140733506711551,
+STORE, 140733506695168, 140733506707455,
+STORE, 47551504621568, 47551504629759,
+STORE, 47551504629760, 47551504637951,
+STORE, 47551504637952, 47551504801791,
+ERASE, 47551504637952, 47551504637952,
+STORE, 47551504637952, 47551504650239,
+STORE, 47551504650240, 47551504801791,
+STORE, 47551504748544, 47551504801791,
+STORE, 47551504650240, 47551504748543,
+ERASE, 47551504650240, 47551504650240,
+STORE, 47551504650240, 47551504748543,
+STORE, 47551504793600, 47551504801791,
+STORE, 47551504748544, 47551504793599,
+ERASE, 47551504748544, 47551504748544,
+STORE, 47551504748544, 47551504801791,
+ERASE, 47551504748544, 47551504748544,
+STORE, 47551504748544, 47551504793599,
+STORE, 47551504793600, 47551504801791,
+ERASE, 47551504793600, 47551504793600,
+STORE, 47551504793600, 47551504801791,
+STORE, 47551504801792, 47551507853311,
+STORE, 47551505346560, 47551507853311,
+STORE, 47551504801792, 47551505346559,
+ERASE, 47551505346560, 47551505346560,
+STORE, 47551505346560, 47551507632127,
+STORE, 47551507632128, 47551507853311,
+STORE, 47551507042304, 47551507632127,
+STORE, 47551505346560, 47551507042303,
+ERASE, 47551505346560, 47551505346560,
+STORE, 47551505346560, 47551507042303,
+STORE, 47551507628032, 47551507632127,
+STORE, 47551507042304, 47551507628031,
+ERASE, 47551507042304, 47551507042304,
+STORE, 47551507042304, 47551507628031,
+STORE, 47551507836928, 47551507853311,
+STORE, 47551507632128, 47551507836927,
+ERASE, 47551507632128, 47551507632128,
+STORE, 47551507632128, 47551507836927,
+ERASE, 47551507836928, 47551507836928,
+STORE, 47551507836928, 47551507853311,
+STORE, 47551507853312, 47551509692415,
+STORE, 47551507992576, 47551509692415,
+STORE, 47551507853312, 47551507992575,
+ERASE, 47551507992576, 47551507992576,
+STORE, 47551507992576, 47551509651455,
+STORE, 47551509651456, 47551509692415,
+STORE, 47551509336064, 47551509651455,
+STORE, 47551507992576, 47551509336063,
+ERASE, 47551507992576, 47551507992576,
+STORE, 47551507992576, 47551509336063,
+STORE, 47551509647360, 47551509651455,
+STORE, 47551509336064, 47551509647359,
+ERASE, 47551509336064, 47551509336064,
+STORE, 47551509336064, 47551509647359,
+STORE, 47551509676032, 47551509692415,
+STORE, 47551509651456, 47551509676031,
+ERASE, 47551509651456, 47551509651456,
+STORE, 47551509651456, 47551509676031,
+ERASE, 47551509676032, 47551509676032,
+STORE, 47551509676032, 47551509692415,
+STORE, 47551509692416, 47551509827583,
+ERASE, 47551509692416, 47551509692416,
+STORE, 47551509692416, 47551509716991,
+STORE, 47551509716992, 47551509827583,
+STORE, 47551509778432, 47551509827583,
+STORE, 47551509716992, 47551509778431,
+ERASE, 47551509716992, 47551509716992,
+STORE, 47551509716992, 47551509778431,
+STORE, 47551509803008, 47551509827583,
+STORE, 47551509778432, 47551509803007,
+ERASE, 47551509778432, 47551509778432,
+STORE, 47551509778432, 47551509827583,
+ERASE, 47551509778432, 47551509778432,
+STORE, 47551509778432, 47551509803007,
+STORE, 47551509803008, 47551509827583,
+STORE, 47551509811200, 47551509827583,
+STORE, 47551509803008, 47551509811199,
+ERASE, 47551509803008, 47551509803008,
+STORE, 47551509803008, 47551509811199,
+ERASE, 47551509811200, 47551509811200,
+STORE, 47551509811200, 47551509827583,
+STORE, 47551509827584, 47551509848063,
+ERASE, 47551509827584, 47551509827584,
+STORE, 47551509827584, 47551509831679,
+STORE, 47551509831680, 47551509848063,
+STORE, 47551509835776, 47551509848063,
+STORE, 47551509831680, 47551509835775,
+ERASE, 47551509831680, 47551509831680,
+STORE, 47551509831680, 47551509835775,
+STORE, 47551509839872, 47551509848063,
+STORE, 47551509835776, 47551509839871,
+ERASE, 47551509835776, 47551509835776,
+STORE, 47551509835776, 47551509848063,
+ERASE, 47551509835776, 47551509835776,
+STORE, 47551509835776, 47551509839871,
+STORE, 47551509839872, 47551509848063,
+ERASE, 47551509839872, 47551509839872,
+STORE, 47551509839872, 47551509848063,
+STORE, 47551509848064, 47551509856255,
+ERASE, 47551509651456, 47551509651456,
+STORE, 47551509651456, 47551509667839,
+STORE, 47551509667840, 47551509676031,
+ERASE, 47551509839872, 47551509839872,
+STORE, 47551509839872, 47551509843967,
+STORE, 47551509843968, 47551509848063,
+ERASE, 47551509803008, 47551509803008,
+STORE, 47551509803008, 47551509807103,
+STORE, 47551509807104, 47551509811199,
+ERASE, 47551507632128, 47551507632128,
+STORE, 47551507632128, 47551507828735,
+STORE, 47551507828736, 47551507836927,
+ERASE, 47551504793600, 47551504793600,
+STORE, 47551504793600, 47551504797695,
+STORE, 47551504797696, 47551504801791,
+ERASE, 94150123233280, 94150123233280,
+STORE, 94150123233280, 94150123241471,
+STORE, 94150123241472, 94150123245567,
+ERASE, 140081290534912, 140081290534912,
+STORE, 140081290534912, 140081290539007,
+STORE, 140081290539008, 140081290543103,
+ERASE, 47551504621568, 47551504621568,
+STORE, 94150148112384, 94150148247551,
+STORE, 140737488347136, 140737488351231,
+STORE, 140734389334016, 140737488351231,
+ERASE, 140734389334016, 140734389334016,
+STORE, 140734389334016, 140734389338111,
+STORE, 94844636606464, 94844636778495,
+ERASE, 94844636606464, 94844636606464,
+STORE, 94844636606464, 94844636622847,
+STORE, 94844636622848, 94844636778495,
+ERASE, 94844636622848, 94844636622848,
+STORE, 94844636622848, 94844636725247,
+STORE, 94844636725248, 94844636766207,
+STORE, 94844636766208, 94844636778495,
+STORE, 139922765217792, 139922765389823,
+ERASE, 139922765217792, 139922765217792,
+STORE, 139922765217792, 139922765221887,
+STORE, 139922765221888, 139922765389823,
+ERASE, 139922765221888, 139922765221888,
+STORE, 139922765221888, 139922765344767,
+STORE, 139922765344768, 139922765377535,
+STORE, 139922765377536, 139922765385727,
+STORE, 139922765385728, 139922765389823,
+STORE, 140734389678080, 140734389682175,
+STORE, 140734389665792, 140734389678079,
+STORE, 47710029778944, 47710029787135,
+STORE, 47710029787136, 47710029795327,
+STORE, 47710029795328, 47710029959167,
+ERASE, 47710029795328, 47710029795328,
+STORE, 47710029795328, 47710029807615,
+STORE, 47710029807616, 47710029959167,
+STORE, 47710029905920, 47710029959167,
+STORE, 47710029807616, 47710029905919,
+ERASE, 47710029807616, 47710029807616,
+STORE, 47710029807616, 47710029905919,
+STORE, 47710029950976, 47710029959167,
+STORE, 47710029905920, 47710029950975,
+ERASE, 47710029905920, 47710029905920,
+STORE, 47710029905920, 47710029959167,
+ERASE, 47710029905920, 47710029905920,
+STORE, 47710029905920, 47710029950975,
+STORE, 47710029950976, 47710029959167,
+ERASE, 47710029950976, 47710029950976,
+STORE, 47710029950976, 47710029959167,
+STORE, 47710029959168, 47710033010687,
+STORE, 47710030503936, 47710033010687,
+STORE, 47710029959168, 47710030503935,
+ERASE, 47710030503936, 47710030503936,
+STORE, 47710030503936, 47710032789503,
+STORE, 47710032789504, 47710033010687,
+STORE, 47710032199680, 47710032789503,
+STORE, 47710030503936, 47710032199679,
+ERASE, 47710030503936, 47710030503936,
+STORE, 47710030503936, 47710032199679,
+STORE, 47710032785408, 47710032789503,
+STORE, 47710032199680, 47710032785407,
+ERASE, 47710032199680, 47710032199680,
+STORE, 47710032199680, 47710032785407,
+STORE, 47710032994304, 47710033010687,
+STORE, 47710032789504, 47710032994303,
+ERASE, 47710032789504, 47710032789504,
+STORE, 47710032789504, 47710032994303,
+ERASE, 47710032994304, 47710032994304,
+STORE, 47710032994304, 47710033010687,
+STORE, 47710033010688, 47710034849791,
+STORE, 47710033149952, 47710034849791,
+STORE, 47710033010688, 47710033149951,
+ERASE, 47710033149952, 47710033149952,
+STORE, 47710033149952, 47710034808831,
+STORE, 47710034808832, 47710034849791,
+STORE, 47710034493440, 47710034808831,
+STORE, 47710033149952, 47710034493439,
+ERASE, 47710033149952, 47710033149952,
+STORE, 47710033149952, 47710034493439,
+STORE, 47710034804736, 47710034808831,
+STORE, 47710034493440, 47710034804735,
+ERASE, 47710034493440, 47710034493440,
+STORE, 47710034493440, 47710034804735,
+STORE, 47710034833408, 47710034849791,
+STORE, 47710034808832, 47710034833407,
+ERASE, 47710034808832, 47710034808832,
+STORE, 47710034808832, 47710034833407,
+ERASE, 47710034833408, 47710034833408,
+STORE, 47710034833408, 47710034849791,
+STORE, 47710034849792, 47710034984959,
+ERASE, 47710034849792, 47710034849792,
+STORE, 47710034849792, 47710034874367,
+STORE, 47710034874368, 47710034984959,
+STORE, 47710034935808, 47710034984959,
+STORE, 47710034874368, 47710034935807,
+ERASE, 47710034874368, 47710034874368,
+STORE, 47710034874368, 47710034935807,
+STORE, 47710034960384, 47710034984959,
+STORE, 47710034935808, 47710034960383,
+ERASE, 47710034935808, 47710034935808,
+STORE, 47710034935808, 47710034984959,
+ERASE, 47710034935808, 47710034935808,
+STORE, 47710034935808, 47710034960383,
+STORE, 47710034960384, 47710034984959,
+STORE, 47710034968576, 47710034984959,
+STORE, 47710034960384, 47710034968575,
+ERASE, 47710034960384, 47710034960384,
+STORE, 47710034960384, 47710034968575,
+ERASE, 47710034968576, 47710034968576,
+STORE, 47710034968576, 47710034984959,
+STORE, 47710034984960, 47710035005439,
+ERASE, 47710034984960, 47710034984960,
+STORE, 47710034984960, 47710034989055,
+STORE, 47710034989056, 47710035005439,
+STORE, 47710034993152, 47710035005439,
+STORE, 47710034989056, 47710034993151,
+ERASE, 47710034989056, 47710034989056,
+STORE, 47710034989056, 47710034993151,
+STORE, 47710034997248, 47710035005439,
+STORE, 47710034993152, 47710034997247,
+ERASE, 47710034993152, 47710034993152,
+STORE, 47710034993152, 47710035005439,
+ERASE, 47710034993152, 47710034993152,
+STORE, 47710034993152, 47710034997247,
+STORE, 47710034997248, 47710035005439,
+ERASE, 47710034997248, 47710034997248,
+STORE, 47710034997248, 47710035005439,
+STORE, 47710035005440, 47710035013631,
+ERASE, 47710034808832, 47710034808832,
+STORE, 47710034808832, 47710034825215,
+STORE, 47710034825216, 47710034833407,
+ERASE, 47710034997248, 47710034997248,
+STORE, 47710034997248, 47710035001343,
+STORE, 47710035001344, 47710035005439,
+ERASE, 47710034960384, 47710034960384,
+STORE, 47710034960384, 47710034964479,
+STORE, 47710034964480, 47710034968575,
+ERASE, 47710032789504, 47710032789504,
+STORE, 47710032789504, 47710032986111,
+STORE, 47710032986112, 47710032994303,
+ERASE, 47710029950976, 47710029950976,
+STORE, 47710029950976, 47710029955071,
+STORE, 47710029955072, 47710029959167,
+ERASE, 94844636766208, 94844636766208,
+STORE, 94844636766208, 94844636774399,
+STORE, 94844636774400, 94844636778495,
+ERASE, 139922765377536, 139922765377536,
+STORE, 139922765377536, 139922765381631,
+STORE, 139922765381632, 139922765385727,
+ERASE, 47710029778944, 47710029778944,
+STORE, 94844641775616, 94844641910783,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732213886976, 140737488351231,
+ERASE, 140732213886976, 140732213886976,
+STORE, 140732213886976, 140732213891071,
+STORE, 94240508887040, 94240509059071,
+ERASE, 94240508887040, 94240508887040,
+STORE, 94240508887040, 94240508903423,
+STORE, 94240508903424, 94240509059071,
+ERASE, 94240508903424, 94240508903424,
+STORE, 94240508903424, 94240509005823,
+STORE, 94240509005824, 94240509046783,
+STORE, 94240509046784, 94240509059071,
+STORE, 140275106516992, 140275106689023,
+ERASE, 140275106516992, 140275106516992,
+STORE, 140275106516992, 140275106521087,
+STORE, 140275106521088, 140275106689023,
+ERASE, 140275106521088, 140275106521088,
+STORE, 140275106521088, 140275106643967,
+STORE, 140275106643968, 140275106676735,
+STORE, 140275106676736, 140275106684927,
+STORE, 140275106684928, 140275106689023,
+STORE, 140732213977088, 140732213981183,
+STORE, 140732213964800, 140732213977087,
+STORE, 47357688479744, 47357688487935,
+STORE, 47357688487936, 47357688496127,
+STORE, 47357688496128, 47357688659967,
+ERASE, 47357688496128, 47357688496128,
+STORE, 47357688496128, 47357688508415,
+STORE, 47357688508416, 47357688659967,
+STORE, 47357688606720, 47357688659967,
+STORE, 47357688508416, 47357688606719,
+ERASE, 47357688508416, 47357688508416,
+STORE, 47357688508416, 47357688606719,
+STORE, 47357688651776, 47357688659967,
+STORE, 47357688606720, 47357688651775,
+ERASE, 47357688606720, 47357688606720,
+STORE, 47357688606720, 47357688659967,
+ERASE, 47357688606720, 47357688606720,
+STORE, 47357688606720, 47357688651775,
+STORE, 47357688651776, 47357688659967,
+ERASE, 47357688651776, 47357688651776,
+STORE, 47357688651776, 47357688659967,
+STORE, 47357688659968, 47357691711487,
+STORE, 47357689204736, 47357691711487,
+STORE, 47357688659968, 47357689204735,
+ERASE, 47357689204736, 47357689204736,
+STORE, 47357689204736, 47357691490303,
+STORE, 47357691490304, 47357691711487,
+STORE, 47357690900480, 47357691490303,
+STORE, 47357689204736, 47357690900479,
+ERASE, 47357689204736, 47357689204736,
+STORE, 47357689204736, 47357690900479,
+STORE, 47357691486208, 47357691490303,
+STORE, 47357690900480, 47357691486207,
+ERASE, 47357690900480, 47357690900480,
+STORE, 47357690900480, 47357691486207,
+STORE, 47357691695104, 47357691711487,
+STORE, 47357691490304, 47357691695103,
+ERASE, 47357691490304, 47357691490304,
+STORE, 47357691490304, 47357691695103,
+ERASE, 47357691695104, 47357691695104,
+STORE, 47357691695104, 47357691711487,
+STORE, 47357691711488, 47357693550591,
+STORE, 47357691850752, 47357693550591,
+STORE, 47357691711488, 47357691850751,
+ERASE, 47357691850752, 47357691850752,
+STORE, 47357691850752, 47357693509631,
+STORE, 47357693509632, 47357693550591,
+STORE, 47357693194240, 47357693509631,
+STORE, 47357691850752, 47357693194239,
+ERASE, 47357691850752, 47357691850752,
+STORE, 47357691850752, 47357693194239,
+STORE, 47357693505536, 47357693509631,
+STORE, 47357693194240, 47357693505535,
+ERASE, 47357693194240, 47357693194240,
+STORE, 47357693194240, 47357693505535,
+STORE, 47357693534208, 47357693550591,
+STORE, 47357693509632, 47357693534207,
+ERASE, 47357693509632, 47357693509632,
+STORE, 47357693509632, 47357693534207,
+ERASE, 47357693534208, 47357693534208,
+STORE, 47357693534208, 47357693550591,
+STORE, 47357693550592, 47357693685759,
+ERASE, 47357693550592, 47357693550592,
+STORE, 47357693550592, 47357693575167,
+STORE, 47357693575168, 47357693685759,
+STORE, 47357693636608, 47357693685759,
+STORE, 47357693575168, 47357693636607,
+ERASE, 47357693575168, 47357693575168,
+STORE, 47357693575168, 47357693636607,
+STORE, 47357693661184, 47357693685759,
+STORE, 47357693636608, 47357693661183,
+ERASE, 47357693636608, 47357693636608,
+STORE, 47357693636608, 47357693685759,
+ERASE, 47357693636608, 47357693636608,
+STORE, 47357693636608, 47357693661183,
+STORE, 47357693661184, 47357693685759,
+STORE, 47357693669376, 47357693685759,
+STORE, 47357693661184, 47357693669375,
+ERASE, 47357693661184, 47357693661184,
+STORE, 47357693661184, 47357693669375,
+ERASE, 47357693669376, 47357693669376,
+STORE, 47357693669376, 47357693685759,
+STORE, 47357693685760, 47357693706239,
+ERASE, 47357693685760, 47357693685760,
+STORE, 47357693685760, 47357693689855,
+STORE, 47357693689856, 47357693706239,
+STORE, 47357693693952, 47357693706239,
+STORE, 47357693689856, 47357693693951,
+ERASE, 47357693689856, 47357693689856,
+STORE, 47357693689856, 47357693693951,
+STORE, 47357693698048, 47357693706239,
+STORE, 47357693693952, 47357693698047,
+ERASE, 47357693693952, 47357693693952,
+STORE, 47357693693952, 47357693706239,
+ERASE, 47357693693952, 47357693693952,
+STORE, 47357693693952, 47357693698047,
+STORE, 47357693698048, 47357693706239,
+ERASE, 47357693698048, 47357693698048,
+STORE, 47357693698048, 47357693706239,
+STORE, 47357693706240, 47357693714431,
+ERASE, 47357693509632, 47357693509632,
+STORE, 47357693509632, 47357693526015,
+STORE, 47357693526016, 47357693534207,
+ERASE, 47357693698048, 47357693698048,
+STORE, 47357693698048, 47357693702143,
+STORE, 47357693702144, 47357693706239,
+ERASE, 47357693661184, 47357693661184,
+STORE, 47357693661184, 47357693665279,
+STORE, 47357693665280, 47357693669375,
+ERASE, 47357691490304, 47357691490304,
+STORE, 47357691490304, 47357691686911,
+STORE, 47357691686912, 47357691695103,
+ERASE, 47357688651776, 47357688651776,
+STORE, 47357688651776, 47357688655871,
+STORE, 47357688655872, 47357688659967,
+ERASE, 94240509046784, 94240509046784,
+STORE, 94240509046784, 94240509054975,
+STORE, 94240509054976, 94240509059071,
+ERASE, 140275106676736, 140275106676736,
+STORE, 140275106676736, 140275106680831,
+STORE, 140275106680832, 140275106684927,
+ERASE, 47357688479744, 47357688479744,
+STORE, 94240518361088, 94240518496255,
+STORE, 140737488347136, 140737488351231,
+STORE, 140732688277504, 140737488351231,
+ERASE, 140732688277504, 140732688277504,
+STORE, 140732688277504, 140732688281599,
+STORE, 94629171351552, 94629172064255,
+ERASE, 94629171351552, 94629171351552,
+STORE, 94629171351552, 94629171400703,
+STORE, 94629171400704, 94629172064255,
+ERASE, 94629171400704, 94629171400704,
+STORE, 94629171400704, 94629171945471,
+STORE, 94629171945472, 94629172043775,
+STORE, 94629172043776, 94629172064255,
+STORE, 139770707644416, 139770707816447,
+ERASE, 139770707644416, 139770707644416,
+STORE, 139770707644416, 139770707648511,
+STORE, 139770707648512, 139770707816447,
+ERASE, 139770707648512, 139770707648512,
+STORE, 139770707648512, 139770707771391,
+STORE, 139770707771392, 139770707804159,
+STORE, 139770707804160, 139770707812351,
+STORE, 139770707812352, 139770707816447,
+STORE, 140732689121280, 140732689125375,
+STORE, 140732689108992, 140732689121279,
+STORE, 47862087352320, 47862087360511,
+STORE, 47862087360512, 47862087368703,
+STORE, 47862087368704, 47862087475199,
+STORE, 47862087385088, 47862087475199,
+STORE, 47862087368704, 47862087385087,
+ERASE, 47862087385088, 47862087385088,
+STORE, 47862087385088, 47862087458815,
+STORE, 47862087458816, 47862087475199,
+STORE, 47862087438336, 47862087458815,
+STORE, 47862087385088, 47862087438335,
+ERASE, 47862087385088, 47862087385088,
+STORE, 47862087385088, 47862087438335,
+STORE, 47862087454720, 47862087458815,
+STORE, 47862087438336, 47862087454719,
+ERASE, 47862087438336, 47862087438336,
+STORE, 47862087438336, 47862087454719,
+STORE, 47862087467008, 47862087475199,
+STORE, 47862087458816, 47862087467007,
+ERASE, 47862087458816, 47862087458816,
+STORE, 47862087458816, 47862087467007,
+ERASE, 47862087467008, 47862087467008,
+STORE, 47862087467008, 47862087475199,
+STORE, 47862087475200, 47862089314303,
+STORE, 47862087614464, 47862089314303,
+STORE, 47862087475200, 47862087614463,
+ERASE, 47862087614464, 47862087614464,
+STORE, 47862087614464, 47862089273343,
+STORE, 47862089273344, 47862089314303,
+STORE, 47862088957952, 47862089273343,
+STORE, 47862087614464, 47862088957951,
+ERASE, 47862087614464, 47862087614464,
+STORE, 47862087614464, 47862088957951,
+STORE, 47862089269248, 47862089273343,
+STORE, 47862088957952, 47862089269247,
+ERASE, 47862088957952, 47862088957952,
+STORE, 47862088957952, 47862089269247,
+STORE, 47862089297920, 47862089314303,
+STORE, 47862089273344, 47862089297919,
+ERASE, 47862089273344, 47862089273344,
+STORE, 47862089273344, 47862089297919,
+ERASE, 47862089297920, 47862089297920,
+STORE, 47862089297920, 47862089314303,
+STORE, 47862089297920, 47862089326591,
+ERASE, 47862089273344, 47862089273344,
+STORE, 47862089273344, 47862089289727,
+STORE, 47862089289728, 47862089297919,
+ERASE, 47862087458816, 47862087458816,
+STORE, 47862087458816, 47862087462911,
+STORE, 47862087462912, 47862087467007,
+ERASE, 94629172043776, 94629172043776,
+STORE, 94629172043776, 94629172060159,
+STORE, 94629172060160, 94629172064255,
+ERASE, 139770707804160, 139770707804160,
+STORE, 139770707804160, 139770707808255,
+STORE, 139770707808256, 139770707812351,
+ERASE, 47862087352320, 47862087352320,
+STORE, 94629197533184, 94629197668351,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727540711424, 140737488351231,
+ERASE, 140727540711424, 140727540711424,
+STORE, 140727540711424, 140727540715519,
+STORE, 94299865313280, 94299866025983,
+ERASE, 94299865313280, 94299865313280,
+STORE, 94299865313280, 94299865362431,
+STORE, 94299865362432, 94299866025983,
+ERASE, 94299865362432, 94299865362432,
+STORE, 94299865362432, 94299865907199,
+STORE, 94299865907200, 94299866005503,
+STORE, 94299866005504, 94299866025983,
+STORE, 140680268763136, 140680268935167,
+ERASE, 140680268763136, 140680268763136,
+STORE, 140680268763136, 140680268767231,
+STORE, 140680268767232, 140680268935167,
+ERASE, 140680268767232, 140680268767232,
+STORE, 140680268767232, 140680268890111,
+STORE, 140680268890112, 140680268922879,
+STORE, 140680268922880, 140680268931071,
+STORE, 140680268931072, 140680268935167,
+STORE, 140727541424128, 140727541428223,
+STORE, 140727541411840, 140727541424127,
+STORE, 46952526233600, 46952526241791,
+STORE, 46952526241792, 46952526249983,
+STORE, 46952526249984, 46952526356479,
+STORE, 46952526266368, 46952526356479,
+STORE, 46952526249984, 46952526266367,
+ERASE, 46952526266368, 46952526266368,
+STORE, 46952526266368, 46952526340095,
+STORE, 46952526340096, 46952526356479,
+STORE, 46952526319616, 46952526340095,
+STORE, 46952526266368, 46952526319615,
+ERASE, 46952526266368, 46952526266368,
+STORE, 46952526266368, 46952526319615,
+STORE, 46952526336000, 46952526340095,
+STORE, 46952526319616, 46952526335999,
+ERASE, 46952526319616, 46952526319616,
+STORE, 46952526319616, 46952526335999,
+STORE, 46952526348288, 46952526356479,
+STORE, 46952526340096, 46952526348287,
+ERASE, 46952526340096, 46952526340096,
+STORE, 46952526340096, 46952526348287,
+ERASE, 46952526348288, 46952526348288,
+STORE, 46952526348288, 46952526356479,
+STORE, 46952526356480, 46952528195583,
+STORE, 46952526495744, 46952528195583,
+STORE, 46952526356480, 46952526495743,
+ERASE, 46952526495744, 46952526495744,
+STORE, 46952526495744, 46952528154623,
+STORE, 46952528154624, 46952528195583,
+STORE, 46952527839232, 46952528154623,
+STORE, 46952526495744, 46952527839231,
+ERASE, 46952526495744, 46952526495744,
+STORE, 46952526495744, 46952527839231,
+STORE, 46952528150528, 46952528154623,
+STORE, 46952527839232, 46952528150527,
+ERASE, 46952527839232, 46952527839232,
+STORE, 46952527839232, 46952528150527,
+STORE, 46952528179200, 46952528195583,
+STORE, 46952528154624, 46952528179199,
+ERASE, 46952528154624, 46952528154624,
+STORE, 46952528154624, 46952528179199,
+ERASE, 46952528179200, 46952528179200,
+STORE, 46952528179200, 46952528195583,
+STORE, 46952528179200, 46952528207871,
+ERASE, 46952528154624, 46952528154624,
+STORE, 46952528154624, 46952528171007,
+STORE, 46952528171008, 46952528179199,
+ERASE, 46952526340096, 46952526340096,
+STORE, 46952526340096, 46952526344191,
+STORE, 46952526344192, 46952526348287,
+ERASE, 94299866005504, 94299866005504,
+STORE, 94299866005504, 94299866021887,
+STORE, 94299866021888, 94299866025983,
+ERASE, 140680268922880, 140680268922880,
+STORE, 140680268922880, 140680268926975,
+STORE, 140680268926976, 140680268931071,
+ERASE, 46952526233600, 46952526233600,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722874793984, 140737488351231,
+ERASE, 140722874793984, 140722874793984,
+STORE, 140722874793984, 140722874798079,
+STORE, 94448916213760, 94448916926463,
+ERASE, 94448916213760, 94448916213760,
+STORE, 94448916213760, 94448916262911,
+STORE, 94448916262912, 94448916926463,
+ERASE, 94448916262912, 94448916262912,
+STORE, 94448916262912, 94448916807679,
+STORE, 94448916807680, 94448916905983,
+STORE, 94448916905984, 94448916926463,
+STORE, 140389117046784, 140389117218815,
+ERASE, 140389117046784, 140389117046784,
+STORE, 140389117046784, 140389117050879,
+STORE, 140389117050880, 140389117218815,
+ERASE, 140389117050880, 140389117050880,
+STORE, 140389117050880, 140389117173759,
+STORE, 140389117173760, 140389117206527,
+STORE, 140389117206528, 140389117214719,
+STORE, 140389117214720, 140389117218815,
+STORE, 140722875297792, 140722875301887,
+STORE, 140722875285504, 140722875297791,
+STORE, 47243677949952, 47243677958143,
+STORE, 47243677958144, 47243677966335,
+STORE, 47243677966336, 47243678072831,
+STORE, 47243677982720, 47243678072831,
+STORE, 47243677966336, 47243677982719,
+ERASE, 47243677982720, 47243677982720,
+STORE, 47243677982720, 47243678056447,
+STORE, 47243678056448, 47243678072831,
+STORE, 47243678035968, 47243678056447,
+STORE, 47243677982720, 47243678035967,
+ERASE, 47243677982720, 47243677982720,
+STORE, 47243677982720, 47243678035967,
+STORE, 47243678052352, 47243678056447,
+STORE, 47243678035968, 47243678052351,
+ERASE, 47243678035968, 47243678035968,
+STORE, 47243678035968, 47243678052351,
+STORE, 47243678064640, 47243678072831,
+STORE, 47243678056448, 47243678064639,
+ERASE, 47243678056448, 47243678056448,
+STORE, 47243678056448, 47243678064639,
+ERASE, 47243678064640, 47243678064640,
+STORE, 47243678064640, 47243678072831,
+STORE, 47243678072832, 47243679911935,
+STORE, 47243678212096, 47243679911935,
+STORE, 47243678072832, 47243678212095,
+ERASE, 47243678212096, 47243678212096,
+STORE, 47243678212096, 47243679870975,
+STORE, 47243679870976, 47243679911935,
+STORE, 47243679555584, 47243679870975,
+STORE, 47243678212096, 47243679555583,
+ERASE, 47243678212096, 47243678212096,
+STORE, 47243678212096, 47243679555583,
+STORE, 47243679866880, 47243679870975,
+STORE, 47243679555584, 47243679866879,
+ERASE, 47243679555584, 47243679555584,
+STORE, 47243679555584, 47243679866879,
+STORE, 47243679895552, 47243679911935,
+STORE, 47243679870976, 47243679895551,
+ERASE, 47243679870976, 47243679870976,
+STORE, 47243679870976, 47243679895551,
+ERASE, 47243679895552, 47243679895552,
+STORE, 47243679895552, 47243679911935,
+STORE, 47243679895552, 47243679924223,
+ERASE, 47243679870976, 47243679870976,
+STORE, 47243679870976, 47243679887359,
+STORE, 47243679887360, 47243679895551,
+ERASE, 47243678056448, 47243678056448,
+STORE, 47243678056448, 47243678060543,
+STORE, 47243678060544, 47243678064639,
+ERASE, 94448916905984, 94448916905984,
+STORE, 94448916905984, 94448916922367,
+STORE, 94448916922368, 94448916926463,
+ERASE, 140389117206528, 140389117206528,
+STORE, 140389117206528, 140389117210623,
+STORE, 140389117210624, 140389117214719,
+ERASE, 47243677949952, 47243677949952,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733068505088, 140737488351231,
+ERASE, 140733068505088, 140733068505088,
+STORE, 140733068505088, 140733068509183,
+STORE, 94207145750528, 94207146463231,
+ERASE, 94207145750528, 94207145750528,
+STORE, 94207145750528, 94207145799679,
+STORE, 94207145799680, 94207146463231,
+ERASE, 94207145799680, 94207145799680,
+STORE, 94207145799680, 94207146344447,
+STORE, 94207146344448, 94207146442751,
+STORE, 94207146442752, 94207146463231,
+STORE, 140684504911872, 140684505083903,
+ERASE, 140684504911872, 140684504911872,
+STORE, 140684504911872, 140684504915967,
+STORE, 140684504915968, 140684505083903,
+ERASE, 140684504915968, 140684504915968,
+STORE, 140684504915968, 140684505038847,
+STORE, 140684505038848, 140684505071615,
+STORE, 140684505071616, 140684505079807,
+STORE, 140684505079808, 140684505083903,
+STORE, 140733068607488, 140733068611583,
+STORE, 140733068595200, 140733068607487,
+STORE, 46948290084864, 46948290093055,
+STORE, 46948290093056, 46948290101247,
+STORE, 46948290101248, 46948290207743,
+STORE, 46948290117632, 46948290207743,
+STORE, 46948290101248, 46948290117631,
+ERASE, 46948290117632, 46948290117632,
+STORE, 46948290117632, 46948290191359,
+STORE, 46948290191360, 46948290207743,
+STORE, 46948290170880, 46948290191359,
+STORE, 46948290117632, 46948290170879,
+ERASE, 46948290117632, 46948290117632,
+STORE, 46948290117632, 46948290170879,
+STORE, 46948290187264, 46948290191359,
+STORE, 46948290170880, 46948290187263,
+ERASE, 46948290170880, 46948290170880,
+STORE, 46948290170880, 46948290187263,
+STORE, 46948290199552, 46948290207743,
+STORE, 46948290191360, 46948290199551,
+ERASE, 46948290191360, 46948290191360,
+STORE, 46948290191360, 46948290199551,
+ERASE, 46948290199552, 46948290199552,
+STORE, 46948290199552, 46948290207743,
+STORE, 46948290207744, 46948292046847,
+STORE, 46948290347008, 46948292046847,
+STORE, 46948290207744, 46948290347007,
+ERASE, 46948290347008, 46948290347008,
+STORE, 46948290347008, 46948292005887,
+STORE, 46948292005888, 46948292046847,
+STORE, 46948291690496, 46948292005887,
+STORE, 46948290347008, 46948291690495,
+ERASE, 46948290347008, 46948290347008,
+STORE, 46948290347008, 46948291690495,
+STORE, 46948292001792, 46948292005887,
+STORE, 46948291690496, 46948292001791,
+ERASE, 46948291690496, 46948291690496,
+STORE, 46948291690496, 46948292001791,
+STORE, 46948292030464, 46948292046847,
+STORE, 46948292005888, 46948292030463,
+ERASE, 46948292005888, 46948292005888,
+STORE, 46948292005888, 46948292030463,
+ERASE, 46948292030464, 46948292030464,
+STORE, 46948292030464, 46948292046847,
+STORE, 46948292030464, 46948292059135,
+ERASE, 46948292005888, 46948292005888,
+STORE, 46948292005888, 46948292022271,
+STORE, 46948292022272, 46948292030463,
+ERASE, 46948290191360, 46948290191360,
+STORE, 46948290191360, 46948290195455,
+STORE, 46948290195456, 46948290199551,
+ERASE, 94207146442752, 94207146442752,
+STORE, 94207146442752, 94207146459135,
+STORE, 94207146459136, 94207146463231,
+ERASE, 140684505071616, 140684505071616,
+STORE, 140684505071616, 140684505075711,
+STORE, 140684505075712, 140684505079807,
+ERASE, 46948290084864, 46948290084864,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726367158272, 140737488351231,
+ERASE, 140726367158272, 140726367158272,
+STORE, 140726367158272, 140726367162367,
+STORE, 94436124106752, 94436124819455,
+ERASE, 94436124106752, 94436124106752,
+STORE, 94436124106752, 94436124155903,
+STORE, 94436124155904, 94436124819455,
+ERASE, 94436124155904, 94436124155904,
+STORE, 94436124155904, 94436124700671,
+STORE, 94436124700672, 94436124798975,
+STORE, 94436124798976, 94436124819455,
+STORE, 140049025044480, 140049025216511,
+ERASE, 140049025044480, 140049025044480,
+STORE, 140049025044480, 140049025048575,
+STORE, 140049025048576, 140049025216511,
+ERASE, 140049025048576, 140049025048576,
+STORE, 140049025048576, 140049025171455,
+STORE, 140049025171456, 140049025204223,
+STORE, 140049025204224, 140049025212415,
+STORE, 140049025212416, 140049025216511,
+STORE, 140726367256576, 140726367260671,
+STORE, 140726367244288, 140726367256575,
+STORE, 47583769952256, 47583769960447,
+STORE, 47583769960448, 47583769968639,
+STORE, 47583769968640, 47583770075135,
+STORE, 47583769985024, 47583770075135,
+STORE, 47583769968640, 47583769985023,
+ERASE, 47583769985024, 47583769985024,
+STORE, 47583769985024, 47583770058751,
+STORE, 47583770058752, 47583770075135,
+STORE, 47583770038272, 47583770058751,
+STORE, 47583769985024, 47583770038271,
+ERASE, 47583769985024, 47583769985024,
+STORE, 47583769985024, 47583770038271,
+STORE, 47583770054656, 47583770058751,
+STORE, 47583770038272, 47583770054655,
+ERASE, 47583770038272, 47583770038272,
+STORE, 47583770038272, 47583770054655,
+STORE, 47583770066944, 47583770075135,
+STORE, 47583770058752, 47583770066943,
+ERASE, 47583770058752, 47583770058752,
+STORE, 47583770058752, 47583770066943,
+ERASE, 47583770066944, 47583770066944,
+STORE, 47583770066944, 47583770075135,
+STORE, 47583770075136, 47583771914239,
+STORE, 47583770214400, 47583771914239,
+STORE, 47583770075136, 47583770214399,
+ERASE, 47583770214400, 47583770214400,
+STORE, 47583770214400, 47583771873279,
+STORE, 47583771873280, 47583771914239,
+STORE, 47583771557888, 47583771873279,
+STORE, 47583770214400, 47583771557887,
+ERASE, 47583770214400, 47583770214400,
+STORE, 47583770214400, 47583771557887,
+STORE, 47583771869184, 47583771873279,
+STORE, 47583771557888, 47583771869183,
+ERASE, 47583771557888, 47583771557888,
+STORE, 47583771557888, 47583771869183,
+STORE, 47583771897856, 47583771914239,
+STORE, 47583771873280, 47583771897855,
+ERASE, 47583771873280, 47583771873280,
+STORE, 47583771873280, 47583771897855,
+ERASE, 47583771897856, 47583771897856,
+STORE, 47583771897856, 47583771914239,
+STORE, 47583771897856, 47583771926527,
+ERASE, 47583771873280, 47583771873280,
+STORE, 47583771873280, 47583771889663,
+STORE, 47583771889664, 47583771897855,
+ERASE, 47583770058752, 47583770058752,
+STORE, 47583770058752, 47583770062847,
+STORE, 47583770062848, 47583770066943,
+ERASE, 94436124798976, 94436124798976,
+STORE, 94436124798976, 94436124815359,
+STORE, 94436124815360, 94436124819455,
+ERASE, 140049025204224, 140049025204224,
+STORE, 140049025204224, 140049025208319,
+STORE, 140049025208320, 140049025212415,
+ERASE, 47583769952256, 47583769952256,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727116099584, 140737488351231,
+ERASE, 140727116099584, 140727116099584,
+STORE, 140727116099584, 140727116103679,
+STORE, 94166319734784, 94166320447487,
+ERASE, 94166319734784, 94166319734784,
+STORE, 94166319734784, 94166319783935,
+STORE, 94166319783936, 94166320447487,
+ERASE, 94166319783936, 94166319783936,
+STORE, 94166319783936, 94166320328703,
+STORE, 94166320328704, 94166320427007,
+STORE, 94166320427008, 94166320447487,
+STORE, 139976559542272, 139976559714303,
+ERASE, 139976559542272, 139976559542272,
+STORE, 139976559542272, 139976559546367,
+STORE, 139976559546368, 139976559714303,
+ERASE, 139976559546368, 139976559546368,
+STORE, 139976559546368, 139976559669247,
+STORE, 139976559669248, 139976559702015,
+STORE, 139976559702016, 139976559710207,
+STORE, 139976559710208, 139976559714303,
+STORE, 140727116222464, 140727116226559,
+STORE, 140727116210176, 140727116222463,
+STORE, 47656235454464, 47656235462655,
+STORE, 47656235462656, 47656235470847,
+STORE, 47656235470848, 47656235577343,
+STORE, 47656235487232, 47656235577343,
+STORE, 47656235470848, 47656235487231,
+ERASE, 47656235487232, 47656235487232,
+STORE, 47656235487232, 47656235560959,
+STORE, 47656235560960, 47656235577343,
+STORE, 47656235540480, 47656235560959,
+STORE, 47656235487232, 47656235540479,
+ERASE, 47656235487232, 47656235487232,
+STORE, 47656235487232, 47656235540479,
+STORE, 47656235556864, 47656235560959,
+STORE, 47656235540480, 47656235556863,
+ERASE, 47656235540480, 47656235540480,
+STORE, 47656235540480, 47656235556863,
+STORE, 47656235569152, 47656235577343,
+STORE, 47656235560960, 47656235569151,
+ERASE, 47656235560960, 47656235560960,
+STORE, 47656235560960, 47656235569151,
+ERASE, 47656235569152, 47656235569152,
+STORE, 47656235569152, 47656235577343,
+STORE, 47656235577344, 47656237416447,
+STORE, 47656235716608, 47656237416447,
+STORE, 47656235577344, 47656235716607,
+ERASE, 47656235716608, 47656235716608,
+STORE, 47656235716608, 47656237375487,
+STORE, 47656237375488, 47656237416447,
+STORE, 47656237060096, 47656237375487,
+STORE, 47656235716608, 47656237060095,
+ERASE, 47656235716608, 47656235716608,
+STORE, 47656235716608, 47656237060095,
+STORE, 47656237371392, 47656237375487,
+STORE, 47656237060096, 47656237371391,
+ERASE, 47656237060096, 47656237060096,
+STORE, 47656237060096, 47656237371391,
+STORE, 47656237400064, 47656237416447,
+STORE, 47656237375488, 47656237400063,
+ERASE, 47656237375488, 47656237375488,
+STORE, 47656237375488, 47656237400063,
+ERASE, 47656237400064, 47656237400064,
+STORE, 47656237400064, 47656237416447,
+STORE, 47656237400064, 47656237428735,
+ERASE, 47656237375488, 47656237375488,
+STORE, 47656237375488, 47656237391871,
+STORE, 47656237391872, 47656237400063,
+ERASE, 47656235560960, 47656235560960,
+STORE, 47656235560960, 47656235565055,
+STORE, 47656235565056, 47656235569151,
+ERASE, 94166320427008, 94166320427008,
+STORE, 94166320427008, 94166320443391,
+STORE, 94166320443392, 94166320447487,
+ERASE, 139976559702016, 139976559702016,
+STORE, 139976559702016, 139976559706111,
+STORE, 139976559706112, 139976559710207,
+ERASE, 47656235454464, 47656235454464,
+STORE, 94166332153856, 94166332289023,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726412816384, 140737488351231,
+ERASE, 140726412816384, 140726412816384,
+STORE, 140726412816384, 140726412820479,
+STORE, 94094884507648, 94094885220351,
+ERASE, 94094884507648, 94094884507648,
+STORE, 94094884507648, 94094884556799,
+STORE, 94094884556800, 94094885220351,
+ERASE, 94094884556800, 94094884556800,
+STORE, 94094884556800, 94094885101567,
+STORE, 94094885101568, 94094885199871,
+STORE, 94094885199872, 94094885220351,
+STORE, 139773773938688, 139773774110719,
+ERASE, 139773773938688, 139773773938688,
+STORE, 139773773938688, 139773773942783,
+STORE, 139773773942784, 139773774110719,
+ERASE, 139773773942784, 139773773942784,
+STORE, 139773773942784, 139773774065663,
+STORE, 139773774065664, 139773774098431,
+STORE, 139773774098432, 139773774106623,
+STORE, 139773774106624, 139773774110719,
+STORE, 140726412963840, 140726412967935,
+STORE, 140726412951552, 140726412963839,
+STORE, 47859021058048, 47859021066239,
+STORE, 47859021066240, 47859021074431,
+STORE, 47859021074432, 47859021180927,
+STORE, 47859021090816, 47859021180927,
+STORE, 47859021074432, 47859021090815,
+ERASE, 47859021090816, 47859021090816,
+STORE, 47859021090816, 47859021164543,
+STORE, 47859021164544, 47859021180927,
+STORE, 47859021144064, 47859021164543,
+STORE, 47859021090816, 47859021144063,
+ERASE, 47859021090816, 47859021090816,
+STORE, 47859021090816, 47859021144063,
+STORE, 47859021160448, 47859021164543,
+STORE, 47859021144064, 47859021160447,
+ERASE, 47859021144064, 47859021144064,
+STORE, 47859021144064, 47859021160447,
+STORE, 47859021172736, 47859021180927,
+STORE, 47859021164544, 47859021172735,
+ERASE, 47859021164544, 47859021164544,
+STORE, 47859021164544, 47859021172735,
+ERASE, 47859021172736, 47859021172736,
+STORE, 47859021172736, 47859021180927,
+STORE, 47859021180928, 47859023020031,
+STORE, 47859021320192, 47859023020031,
+STORE, 47859021180928, 47859021320191,
+ERASE, 47859021320192, 47859021320192,
+STORE, 47859021320192, 47859022979071,
+STORE, 47859022979072, 47859023020031,
+STORE, 47859022663680, 47859022979071,
+STORE, 47859021320192, 47859022663679,
+ERASE, 47859021320192, 47859021320192,
+STORE, 47859021320192, 47859022663679,
+STORE, 47859022974976, 47859022979071,
+STORE, 47859022663680, 47859022974975,
+ERASE, 47859022663680, 47859022663680,
+STORE, 47859022663680, 47859022974975,
+STORE, 47859023003648, 47859023020031,
+STORE, 47859022979072, 47859023003647,
+ERASE, 47859022979072, 47859022979072,
+STORE, 47859022979072, 47859023003647,
+ERASE, 47859023003648, 47859023003648,
+STORE, 47859023003648, 47859023020031,
+STORE, 47859023003648, 47859023032319,
+ERASE, 47859022979072, 47859022979072,
+STORE, 47859022979072, 47859022995455,
+STORE, 47859022995456, 47859023003647,
+ERASE, 47859021164544, 47859021164544,
+STORE, 47859021164544, 47859021168639,
+STORE, 47859021168640, 47859021172735,
+ERASE, 94094885199872, 94094885199872,
+STORE, 94094885199872, 94094885216255,
+STORE, 94094885216256, 94094885220351,
+ERASE, 139773774098432, 139773774098432,
+STORE, 139773774098432, 139773774102527,
+STORE, 139773774102528, 139773774106623,
+ERASE, 47859021058048, 47859021058048,
+STORE, 94094901108736, 94094901243903,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736567963648, 140737488351231,
+ERASE, 140736567963648, 140736567963648,
+STORE, 140736567963648, 140736567967743,
+STORE, 94924425748480, 94924426461183,
+ERASE, 94924425748480, 94924425748480,
+STORE, 94924425748480, 94924425797631,
+STORE, 94924425797632, 94924426461183,
+ERASE, 94924425797632, 94924425797632,
+STORE, 94924425797632, 94924426342399,
+STORE, 94924426342400, 94924426440703,
+STORE, 94924426440704, 94924426461183,
+STORE, 140042126319616, 140042126491647,
+ERASE, 140042126319616, 140042126319616,
+STORE, 140042126319616, 140042126323711,
+STORE, 140042126323712, 140042126491647,
+ERASE, 140042126323712, 140042126323712,
+STORE, 140042126323712, 140042126446591,
+STORE, 140042126446592, 140042126479359,
+STORE, 140042126479360, 140042126487551,
+STORE, 140042126487552, 140042126491647,
+STORE, 140736568672256, 140736568676351,
+STORE, 140736568659968, 140736568672255,
+STORE, 47590668677120, 47590668685311,
+STORE, 47590668685312, 47590668693503,
+STORE, 47590668693504, 47590668799999,
+STORE, 47590668709888, 47590668799999,
+STORE, 47590668693504, 47590668709887,
+ERASE, 47590668709888, 47590668709888,
+STORE, 47590668709888, 47590668783615,
+STORE, 47590668783616, 47590668799999,
+STORE, 47590668763136, 47590668783615,
+STORE, 47590668709888, 47590668763135,
+ERASE, 47590668709888, 47590668709888,
+STORE, 47590668709888, 47590668763135,
+STORE, 47590668779520, 47590668783615,
+STORE, 47590668763136, 47590668779519,
+ERASE, 47590668763136, 47590668763136,
+STORE, 47590668763136, 47590668779519,
+STORE, 47590668791808, 47590668799999,
+STORE, 47590668783616, 47590668791807,
+ERASE, 47590668783616, 47590668783616,
+STORE, 47590668783616, 47590668791807,
+ERASE, 47590668791808, 47590668791808,
+STORE, 47590668791808, 47590668799999,
+STORE, 47590668800000, 47590670639103,
+STORE, 47590668939264, 47590670639103,
+STORE, 47590668800000, 47590668939263,
+ERASE, 47590668939264, 47590668939264,
+STORE, 47590668939264, 47590670598143,
+STORE, 47590670598144, 47590670639103,
+STORE, 47590670282752, 47590670598143,
+STORE, 47590668939264, 47590670282751,
+ERASE, 47590668939264, 47590668939264,
+STORE, 47590668939264, 47590670282751,
+STORE, 47590670594048, 47590670598143,
+STORE, 47590670282752, 47590670594047,
+ERASE, 47590670282752, 47590670282752,
+STORE, 47590670282752, 47590670594047,
+STORE, 47590670622720, 47590670639103,
+STORE, 47590670598144, 47590670622719,
+ERASE, 47590670598144, 47590670598144,
+STORE, 47590670598144, 47590670622719,
+ERASE, 47590670622720, 47590670622720,
+STORE, 47590670622720, 47590670639103,
+STORE, 47590670622720, 47590670651391,
+ERASE, 47590670598144, 47590670598144,
+STORE, 47590670598144, 47590670614527,
+STORE, 47590670614528, 47590670622719,
+ERASE, 47590668783616, 47590668783616,
+STORE, 47590668783616, 47590668787711,
+STORE, 47590668787712, 47590668791807,
+ERASE, 94924426440704, 94924426440704,
+STORE, 94924426440704, 94924426457087,
+STORE, 94924426457088, 94924426461183,
+ERASE, 140042126479360, 140042126479360,
+STORE, 140042126479360, 140042126483455,
+STORE, 140042126483456, 140042126487551,
+ERASE, 47590668677120, 47590668677120,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733281439744, 140737488351231,
+ERASE, 140733281439744, 140733281439744,
+STORE, 140733281439744, 140733281443839,
+STORE, 94490667069440, 94490667782143,
+ERASE, 94490667069440, 94490667069440,
+STORE, 94490667069440, 94490667118591,
+STORE, 94490667118592, 94490667782143,
+ERASE, 94490667118592, 94490667118592,
+STORE, 94490667118592, 94490667663359,
+STORE, 94490667663360, 94490667761663,
+STORE, 94490667761664, 94490667782143,
+STORE, 139878215118848, 139878215290879,
+ERASE, 139878215118848, 139878215118848,
+STORE, 139878215118848, 139878215122943,
+STORE, 139878215122944, 139878215290879,
+ERASE, 139878215122944, 139878215122944,
+STORE, 139878215122944, 139878215245823,
+STORE, 139878215245824, 139878215278591,
+STORE, 139878215278592, 139878215286783,
+STORE, 139878215286784, 139878215290879,
+STORE, 140733281464320, 140733281468415,
+STORE, 140733281452032, 140733281464319,
+STORE, 47754579877888, 47754579886079,
+STORE, 47754579886080, 47754579894271,
+STORE, 47754579894272, 47754580000767,
+STORE, 47754579910656, 47754580000767,
+STORE, 47754579894272, 47754579910655,
+ERASE, 47754579910656, 47754579910656,
+STORE, 47754579910656, 47754579984383,
+STORE, 47754579984384, 47754580000767,
+STORE, 47754579963904, 47754579984383,
+STORE, 47754579910656, 47754579963903,
+ERASE, 47754579910656, 47754579910656,
+STORE, 47754579910656, 47754579963903,
+STORE, 47754579980288, 47754579984383,
+STORE, 47754579963904, 47754579980287,
+ERASE, 47754579963904, 47754579963904,
+STORE, 47754579963904, 47754579980287,
+STORE, 47754579992576, 47754580000767,
+STORE, 47754579984384, 47754579992575,
+ERASE, 47754579984384, 47754579984384,
+STORE, 47754579984384, 47754579992575,
+ERASE, 47754579992576, 47754579992576,
+STORE, 47754579992576, 47754580000767,
+STORE, 47754580000768, 47754581839871,
+STORE, 47754580140032, 47754581839871,
+STORE, 47754580000768, 47754580140031,
+ERASE, 47754580140032, 47754580140032,
+STORE, 47754580140032, 47754581798911,
+STORE, 47754581798912, 47754581839871,
+STORE, 47754581483520, 47754581798911,
+STORE, 47754580140032, 47754581483519,
+ERASE, 47754580140032, 47754580140032,
+STORE, 47754580140032, 47754581483519,
+STORE, 47754581794816, 47754581798911,
+STORE, 47754581483520, 47754581794815,
+ERASE, 47754581483520, 47754581483520,
+STORE, 47754581483520, 47754581794815,
+STORE, 47754581823488, 47754581839871,
+STORE, 47754581798912, 47754581823487,
+ERASE, 47754581798912, 47754581798912,
+STORE, 47754581798912, 47754581823487,
+ERASE, 47754581823488, 47754581823488,
+STORE, 47754581823488, 47754581839871,
+STORE, 47754581823488, 47754581852159,
+ERASE, 47754581798912, 47754581798912,
+STORE, 47754581798912, 47754581815295,
+STORE, 47754581815296, 47754581823487,
+ERASE, 47754579984384, 47754579984384,
+STORE, 47754579984384, 47754579988479,
+STORE, 47754579988480, 47754579992575,
+ERASE, 94490667761664, 94490667761664,
+STORE, 94490667761664, 94490667778047,
+STORE, 94490667778048, 94490667782143,
+ERASE, 139878215278592, 139878215278592,
+STORE, 139878215278592, 139878215282687,
+STORE, 139878215282688, 139878215286783,
+ERASE, 47754579877888, 47754579877888,
+STORE, 94490669649920, 94490669785087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140735382188032, 140737488351231,
+ERASE, 140735382188032, 140735382188032,
+STORE, 140735382188032, 140735382192127,
+STORE, 94150181302272, 94150182014975,
+ERASE, 94150181302272, 94150181302272,
+STORE, 94150181302272, 94150181351423,
+STORE, 94150181351424, 94150182014975,
+ERASE, 94150181351424, 94150181351424,
+STORE, 94150181351424, 94150181896191,
+STORE, 94150181896192, 94150181994495,
+STORE, 94150181994496, 94150182014975,
+STORE, 139679752458240, 139679752630271,
+ERASE, 139679752458240, 139679752458240,
+STORE, 139679752458240, 139679752462335,
+STORE, 139679752462336, 139679752630271,
+ERASE, 139679752462336, 139679752462336,
+STORE, 139679752462336, 139679752585215,
+STORE, 139679752585216, 139679752617983,
+STORE, 139679752617984, 139679752626175,
+STORE, 139679752626176, 139679752630271,
+STORE, 140735382536192, 140735382540287,
+STORE, 140735382523904, 140735382536191,
+STORE, 47953042538496, 47953042546687,
+STORE, 47953042546688, 47953042554879,
+STORE, 47953042554880, 47953042661375,
+STORE, 47953042571264, 47953042661375,
+STORE, 47953042554880, 47953042571263,
+ERASE, 47953042571264, 47953042571264,
+STORE, 47953042571264, 47953042644991,
+STORE, 47953042644992, 47953042661375,
+STORE, 47953042624512, 47953042644991,
+STORE, 47953042571264, 47953042624511,
+ERASE, 47953042571264, 47953042571264,
+STORE, 47953042571264, 47953042624511,
+STORE, 47953042640896, 47953042644991,
+STORE, 47953042624512, 47953042640895,
+ERASE, 47953042624512, 47953042624512,
+STORE, 47953042624512, 47953042640895,
+STORE, 47953042653184, 47953042661375,
+STORE, 47953042644992, 47953042653183,
+ERASE, 47953042644992, 47953042644992,
+STORE, 47953042644992, 47953042653183,
+ERASE, 47953042653184, 47953042653184,
+STORE, 47953042653184, 47953042661375,
+STORE, 47953042661376, 47953044500479,
+STORE, 47953042800640, 47953044500479,
+STORE, 47953042661376, 47953042800639,
+ERASE, 47953042800640, 47953042800640,
+STORE, 47953042800640, 47953044459519,
+STORE, 47953044459520, 47953044500479,
+STORE, 47953044144128, 47953044459519,
+STORE, 47953042800640, 47953044144127,
+ERASE, 47953042800640, 47953042800640,
+STORE, 47953042800640, 47953044144127,
+STORE, 47953044455424, 47953044459519,
+STORE, 47953044144128, 47953044455423,
+ERASE, 47953044144128, 47953044144128,
+STORE, 47953044144128, 47953044455423,
+STORE, 47953044484096, 47953044500479,
+STORE, 47953044459520, 47953044484095,
+ERASE, 47953044459520, 47953044459520,
+STORE, 47953044459520, 47953044484095,
+ERASE, 47953044484096, 47953044484096,
+STORE, 47953044484096, 47953044500479,
+STORE, 47953044484096, 47953044512767,
+ERASE, 47953044459520, 47953044459520,
+STORE, 47953044459520, 47953044475903,
+STORE, 47953044475904, 47953044484095,
+ERASE, 47953042644992, 47953042644992,
+STORE, 47953042644992, 47953042649087,
+STORE, 47953042649088, 47953042653183,
+ERASE, 94150181994496, 94150181994496,
+STORE, 94150181994496, 94150182010879,
+STORE, 94150182010880, 94150182014975,
+ERASE, 139679752617984, 139679752617984,
+STORE, 139679752617984, 139679752622079,
+STORE, 139679752622080, 139679752626175,
+ERASE, 47953042538496, 47953042538496,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737044123648, 140737488351231,
+ERASE, 140737044123648, 140737044123648,
+STORE, 140737044123648, 140737044127743,
+STORE, 94425324294144, 94425325006847,
+ERASE, 94425324294144, 94425324294144,
+STORE, 94425324294144, 94425324343295,
+STORE, 94425324343296, 94425325006847,
+ERASE, 94425324343296, 94425324343296,
+STORE, 94425324343296, 94425324888063,
+STORE, 94425324888064, 94425324986367,
+STORE, 94425324986368, 94425325006847,
+STORE, 140382015016960, 140382015188991,
+ERASE, 140382015016960, 140382015016960,
+STORE, 140382015016960, 140382015021055,
+STORE, 140382015021056, 140382015188991,
+ERASE, 140382015021056, 140382015021056,
+STORE, 140382015021056, 140382015143935,
+STORE, 140382015143936, 140382015176703,
+STORE, 140382015176704, 140382015184895,
+STORE, 140382015184896, 140382015188991,
+STORE, 140737045585920, 140737045590015,
+STORE, 140737045573632, 140737045585919,
+STORE, 47250779979776, 47250779987967,
+STORE, 47250779987968, 47250779996159,
+STORE, 47250779996160, 47250780102655,
+STORE, 47250780012544, 47250780102655,
+STORE, 47250779996160, 47250780012543,
+ERASE, 47250780012544, 47250780012544,
+STORE, 47250780012544, 47250780086271,
+STORE, 47250780086272, 47250780102655,
+STORE, 47250780065792, 47250780086271,
+STORE, 47250780012544, 47250780065791,
+ERASE, 47250780012544, 47250780012544,
+STORE, 47250780012544, 47250780065791,
+STORE, 47250780082176, 47250780086271,
+STORE, 47250780065792, 47250780082175,
+ERASE, 47250780065792, 47250780065792,
+STORE, 47250780065792, 47250780082175,
+STORE, 47250780094464, 47250780102655,
+STORE, 47250780086272, 47250780094463,
+ERASE, 47250780086272, 47250780086272,
+STORE, 47250780086272, 47250780094463,
+ERASE, 47250780094464, 47250780094464,
+STORE, 47250780094464, 47250780102655,
+STORE, 47250780102656, 47250781941759,
+STORE, 47250780241920, 47250781941759,
+STORE, 47250780102656, 47250780241919,
+ERASE, 47250780241920, 47250780241920,
+STORE, 47250780241920, 47250781900799,
+STORE, 47250781900800, 47250781941759,
+STORE, 47250781585408, 47250781900799,
+STORE, 47250780241920, 47250781585407,
+ERASE, 47250780241920, 47250780241920,
+STORE, 47250780241920, 47250781585407,
+STORE, 47250781896704, 47250781900799,
+STORE, 47250781585408, 47250781896703,
+ERASE, 47250781585408, 47250781585408,
+STORE, 47250781585408, 47250781896703,
+STORE, 47250781925376, 47250781941759,
+STORE, 47250781900800, 47250781925375,
+ERASE, 47250781900800, 47250781900800,
+STORE, 47250781900800, 47250781925375,
+ERASE, 47250781925376, 47250781925376,
+STORE, 47250781925376, 47250781941759,
+STORE, 47250781925376, 47250781954047,
+ERASE, 47250781900800, 47250781900800,
+STORE, 47250781900800, 47250781917183,
+STORE, 47250781917184, 47250781925375,
+ERASE, 47250780086272, 47250780086272,
+STORE, 47250780086272, 47250780090367,
+STORE, 47250780090368, 47250780094463,
+ERASE, 94425324986368, 94425324986368,
+STORE, 94425324986368, 94425325002751,
+STORE, 94425325002752, 94425325006847,
+ERASE, 140382015176704, 140382015176704,
+STORE, 140382015176704, 140382015180799,
+STORE, 140382015180800, 140382015184895,
+ERASE, 47250779979776, 47250779979776,
+STORE, 94425351438336, 94425351573503,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736801144832, 140737488351231,
+ERASE, 140736801144832, 140736801144832,
+STORE, 140736801144832, 140736801148927,
+STORE, 94629429358592, 94629430071295,
+ERASE, 94629429358592, 94629429358592,
+STORE, 94629429358592, 94629429407743,
+STORE, 94629429407744, 94629430071295,
+ERASE, 94629429407744, 94629429407744,
+STORE, 94629429407744, 94629429952511,
+STORE, 94629429952512, 94629430050815,
+STORE, 94629430050816, 94629430071295,
+STORE, 139801685483520, 139801685655551,
+ERASE, 139801685483520, 139801685483520,
+STORE, 139801685483520, 139801685487615,
+STORE, 139801685487616, 139801685655551,
+ERASE, 139801685487616, 139801685487616,
+STORE, 139801685487616, 139801685610495,
+STORE, 139801685610496, 139801685643263,
+STORE, 139801685643264, 139801685651455,
+STORE, 139801685651456, 139801685655551,
+STORE, 140736801198080, 140736801202175,
+STORE, 140736801185792, 140736801198079,
+STORE, 47831109513216, 47831109521407,
+STORE, 47831109521408, 47831109529599,
+STORE, 47831109529600, 47831109636095,
+STORE, 47831109545984, 47831109636095,
+STORE, 47831109529600, 47831109545983,
+ERASE, 47831109545984, 47831109545984,
+STORE, 47831109545984, 47831109619711,
+STORE, 47831109619712, 47831109636095,
+STORE, 47831109599232, 47831109619711,
+STORE, 47831109545984, 47831109599231,
+ERASE, 47831109545984, 47831109545984,
+STORE, 47831109545984, 47831109599231,
+STORE, 47831109615616, 47831109619711,
+STORE, 47831109599232, 47831109615615,
+ERASE, 47831109599232, 47831109599232,
+STORE, 47831109599232, 47831109615615,
+STORE, 47831109627904, 47831109636095,
+STORE, 47831109619712, 47831109627903,
+ERASE, 47831109619712, 47831109619712,
+STORE, 47831109619712, 47831109627903,
+ERASE, 47831109627904, 47831109627904,
+STORE, 47831109627904, 47831109636095,
+STORE, 47831109636096, 47831111475199,
+STORE, 47831109775360, 47831111475199,
+STORE, 47831109636096, 47831109775359,
+ERASE, 47831109775360, 47831109775360,
+STORE, 47831109775360, 47831111434239,
+STORE, 47831111434240, 47831111475199,
+STORE, 47831111118848, 47831111434239,
+STORE, 47831109775360, 47831111118847,
+ERASE, 47831109775360, 47831109775360,
+STORE, 47831109775360, 47831111118847,
+STORE, 47831111430144, 47831111434239,
+STORE, 47831111118848, 47831111430143,
+ERASE, 47831111118848, 47831111118848,
+STORE, 47831111118848, 47831111430143,
+STORE, 47831111458816, 47831111475199,
+STORE, 47831111434240, 47831111458815,
+ERASE, 47831111434240, 47831111434240,
+STORE, 47831111434240, 47831111458815,
+ERASE, 47831111458816, 47831111458816,
+STORE, 47831111458816, 47831111475199,
+STORE, 47831111458816, 47831111487487,
+ERASE, 47831111434240, 47831111434240,
+STORE, 47831111434240, 47831111450623,
+STORE, 47831111450624, 47831111458815,
+ERASE, 47831109619712, 47831109619712,
+STORE, 47831109619712, 47831109623807,
+STORE, 47831109623808, 47831109627903,
+ERASE, 94629430050816, 94629430050816,
+STORE, 94629430050816, 94629430067199,
+STORE, 94629430067200, 94629430071295,
+ERASE, 139801685643264, 139801685643264,
+STORE, 139801685643264, 139801685647359,
+STORE, 139801685647360, 139801685651455,
+ERASE, 47831109513216, 47831109513216,
+STORE, 140737488347136, 140737488351231,
+STORE, 140729419612160, 140737488351231,
+ERASE, 140729419612160, 140729419612160,
+STORE, 140729419612160, 140729419616255,
+STORE, 94443354148864, 94443354861567,
+ERASE, 94443354148864, 94443354148864,
+STORE, 94443354148864, 94443354198015,
+STORE, 94443354198016, 94443354861567,
+ERASE, 94443354198016, 94443354198016,
+STORE, 94443354198016, 94443354742783,
+STORE, 94443354742784, 94443354841087,
+STORE, 94443354841088, 94443354861567,
+STORE, 139741700038656, 139741700210687,
+ERASE, 139741700038656, 139741700038656,
+STORE, 139741700038656, 139741700042751,
+STORE, 139741700042752, 139741700210687,
+ERASE, 139741700042752, 139741700042752,
+STORE, 139741700042752, 139741700165631,
+STORE, 139741700165632, 139741700198399,
+STORE, 139741700198400, 139741700206591,
+STORE, 139741700206592, 139741700210687,
+STORE, 140729420574720, 140729420578815,
+STORE, 140729420562432, 140729420574719,
+STORE, 47891094958080, 47891094966271,
+STORE, 47891094966272, 47891094974463,
+STORE, 47891094974464, 47891095080959,
+STORE, 47891094990848, 47891095080959,
+STORE, 47891094974464, 47891094990847,
+ERASE, 47891094990848, 47891094990848,
+STORE, 47891094990848, 47891095064575,
+STORE, 47891095064576, 47891095080959,
+STORE, 47891095044096, 47891095064575,
+STORE, 47891094990848, 47891095044095,
+ERASE, 47891094990848, 47891094990848,
+STORE, 47891094990848, 47891095044095,
+STORE, 47891095060480, 47891095064575,
+STORE, 47891095044096, 47891095060479,
+ERASE, 47891095044096, 47891095044096,
+STORE, 47891095044096, 47891095060479,
+STORE, 47891095072768, 47891095080959,
+STORE, 47891095064576, 47891095072767,
+ERASE, 47891095064576, 47891095064576,
+STORE, 47891095064576, 47891095072767,
+ERASE, 47891095072768, 47891095072768,
+STORE, 47891095072768, 47891095080959,
+STORE, 47891095080960, 47891096920063,
+STORE, 47891095220224, 47891096920063,
+STORE, 47891095080960, 47891095220223,
+ERASE, 47891095220224, 47891095220224,
+STORE, 47891095220224, 47891096879103,
+STORE, 47891096879104, 47891096920063,
+STORE, 47891096563712, 47891096879103,
+STORE, 47891095220224, 47891096563711,
+ERASE, 47891095220224, 47891095220224,
+STORE, 47891095220224, 47891096563711,
+STORE, 47891096875008, 47891096879103,
+STORE, 47891096563712, 47891096875007,
+ERASE, 47891096563712, 47891096563712,
+STORE, 47891096563712, 47891096875007,
+STORE, 47891096903680, 47891096920063,
+STORE, 47891096879104, 47891096903679,
+ERASE, 47891096879104, 47891096879104,
+STORE, 47891096879104, 47891096903679,
+ERASE, 47891096903680, 47891096903680,
+STORE, 47891096903680, 47891096920063,
+STORE, 47891096903680, 47891096932351,
+ERASE, 47891096879104, 47891096879104,
+STORE, 47891096879104, 47891096895487,
+STORE, 47891096895488, 47891096903679,
+ERASE, 47891095064576, 47891095064576,
+STORE, 47891095064576, 47891095068671,
+STORE, 47891095068672, 47891095072767,
+ERASE, 94443354841088, 94443354841088,
+STORE, 94443354841088, 94443354857471,
+STORE, 94443354857472, 94443354861567,
+ERASE, 139741700198400, 139741700198400,
+STORE, 139741700198400, 139741700202495,
+STORE, 139741700202496, 139741700206591,
+ERASE, 47891094958080, 47891094958080,
+STORE, 94443360825344, 94443360960511,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722961661952, 140737488351231,
+ERASE, 140722961661952, 140722961661952,
+STORE, 140722961661952, 140722961666047,
+STORE, 94878388944896, 94878389657599,
+ERASE, 94878388944896, 94878388944896,
+STORE, 94878388944896, 94878388994047,
+STORE, 94878388994048, 94878389657599,
+ERASE, 94878388994048, 94878388994048,
+STORE, 94878388994048, 94878389538815,
+STORE, 94878389538816, 94878389637119,
+STORE, 94878389637120, 94878389657599,
+STORE, 140210690056192, 140210690228223,
+ERASE, 140210690056192, 140210690056192,
+STORE, 140210690056192, 140210690060287,
+STORE, 140210690060288, 140210690228223,
+ERASE, 140210690060288, 140210690060288,
+STORE, 140210690060288, 140210690183167,
+STORE, 140210690183168, 140210690215935,
+STORE, 140210690215936, 140210690224127,
+STORE, 140210690224128, 140210690228223,
+STORE, 140722963148800, 140722963152895,
+STORE, 140722963136512, 140722963148799,
+STORE, 47422104940544, 47422104948735,
+STORE, 47422104948736, 47422104956927,
+STORE, 47422104956928, 47422105063423,
+STORE, 47422104973312, 47422105063423,
+STORE, 47422104956928, 47422104973311,
+ERASE, 47422104973312, 47422104973312,
+STORE, 47422104973312, 47422105047039,
+STORE, 47422105047040, 47422105063423,
+STORE, 47422105026560, 47422105047039,
+STORE, 47422104973312, 47422105026559,
+ERASE, 47422104973312, 47422104973312,
+STORE, 47422104973312, 47422105026559,
+STORE, 47422105042944, 47422105047039,
+STORE, 47422105026560, 47422105042943,
+ERASE, 47422105026560, 47422105026560,
+STORE, 47422105026560, 47422105042943,
+STORE, 47422105055232, 47422105063423,
+STORE, 47422105047040, 47422105055231,
+ERASE, 47422105047040, 47422105047040,
+STORE, 47422105047040, 47422105055231,
+ERASE, 47422105055232, 47422105055232,
+STORE, 47422105055232, 47422105063423,
+STORE, 47422105063424, 47422106902527,
+STORE, 47422105202688, 47422106902527,
+STORE, 47422105063424, 47422105202687,
+ERASE, 47422105202688, 47422105202688,
+STORE, 47422105202688, 47422106861567,
+STORE, 47422106861568, 47422106902527,
+STORE, 47422106546176, 47422106861567,
+STORE, 47422105202688, 47422106546175,
+ERASE, 47422105202688, 47422105202688,
+STORE, 47422105202688, 47422106546175,
+STORE, 47422106857472, 47422106861567,
+STORE, 47422106546176, 47422106857471,
+ERASE, 47422106546176, 47422106546176,
+STORE, 47422106546176, 47422106857471,
+STORE, 47422106886144, 47422106902527,
+STORE, 47422106861568, 47422106886143,
+ERASE, 47422106861568, 47422106861568,
+STORE, 47422106861568, 47422106886143,
+ERASE, 47422106886144, 47422106886144,
+STORE, 47422106886144, 47422106902527,
+STORE, 47422106886144, 47422106914815,
+ERASE, 47422106861568, 47422106861568,
+STORE, 47422106861568, 47422106877951,
+STORE, 47422106877952, 47422106886143,
+ERASE, 47422105047040, 47422105047040,
+STORE, 47422105047040, 47422105051135,
+STORE, 47422105051136, 47422105055231,
+ERASE, 94878389637120, 94878389637120,
+STORE, 94878389637120, 94878389653503,
+STORE, 94878389653504, 94878389657599,
+ERASE, 140210690215936, 140210690215936,
+STORE, 140210690215936, 140210690220031,
+STORE, 140210690220032, 140210690224127,
+ERASE, 47422104940544, 47422104940544,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727690309632, 140737488351231,
+ERASE, 140727690309632, 140727690309632,
+STORE, 140727690309632, 140727690313727,
+STORE, 94121892208640, 94121892921343,
+ERASE, 94121892208640, 94121892208640,
+STORE, 94121892208640, 94121892257791,
+STORE, 94121892257792, 94121892921343,
+ERASE, 94121892257792, 94121892257792,
+STORE, 94121892257792, 94121892802559,
+STORE, 94121892802560, 94121892900863,
+STORE, 94121892900864, 94121892921343,
+STORE, 140662438326272, 140662438498303,
+ERASE, 140662438326272, 140662438326272,
+STORE, 140662438326272, 140662438330367,
+STORE, 140662438330368, 140662438498303,
+ERASE, 140662438330368, 140662438330368,
+STORE, 140662438330368, 140662438453247,
+STORE, 140662438453248, 140662438486015,
+STORE, 140662438486016, 140662438494207,
+STORE, 140662438494208, 140662438498303,
+STORE, 140727690379264, 140727690383359,
+STORE, 140727690366976, 140727690379263,
+STORE, 46970356670464, 46970356678655,
+STORE, 46970356678656, 46970356686847,
+STORE, 46970356686848, 46970356793343,
+STORE, 46970356703232, 46970356793343,
+STORE, 46970356686848, 46970356703231,
+ERASE, 46970356703232, 46970356703232,
+STORE, 46970356703232, 46970356776959,
+STORE, 46970356776960, 46970356793343,
+STORE, 46970356756480, 46970356776959,
+STORE, 46970356703232, 46970356756479,
+ERASE, 46970356703232, 46970356703232,
+STORE, 46970356703232, 46970356756479,
+STORE, 46970356772864, 46970356776959,
+STORE, 46970356756480, 46970356772863,
+ERASE, 46970356756480, 46970356756480,
+STORE, 46970356756480, 46970356772863,
+STORE, 46970356785152, 46970356793343,
+STORE, 46970356776960, 46970356785151,
+ERASE, 46970356776960, 46970356776960,
+STORE, 46970356776960, 46970356785151,
+ERASE, 46970356785152, 46970356785152,
+STORE, 46970356785152, 46970356793343,
+STORE, 46970356793344, 46970358632447,
+STORE, 46970356932608, 46970358632447,
+STORE, 46970356793344, 46970356932607,
+ERASE, 46970356932608, 46970356932608,
+STORE, 46970356932608, 46970358591487,
+STORE, 46970358591488, 46970358632447,
+STORE, 46970358276096, 46970358591487,
+STORE, 46970356932608, 46970358276095,
+ERASE, 46970356932608, 46970356932608,
+STORE, 46970356932608, 46970358276095,
+STORE, 46970358587392, 46970358591487,
+STORE, 46970358276096, 46970358587391,
+ERASE, 46970358276096, 46970358276096,
+STORE, 46970358276096, 46970358587391,
+STORE, 46970358616064, 46970358632447,
+STORE, 46970358591488, 46970358616063,
+ERASE, 46970358591488, 46970358591488,
+STORE, 46970358591488, 46970358616063,
+ERASE, 46970358616064, 46970358616064,
+STORE, 46970358616064, 46970358632447,
+STORE, 46970358616064, 46970358644735,
+ERASE, 46970358591488, 46970358591488,
+STORE, 46970358591488, 46970358607871,
+STORE, 46970358607872, 46970358616063,
+ERASE, 46970356776960, 46970356776960,
+STORE, 46970356776960, 46970356781055,
+STORE, 46970356781056, 46970356785151,
+ERASE, 94121892900864, 94121892900864,
+STORE, 94121892900864, 94121892917247,
+STORE, 94121892917248, 94121892921343,
+ERASE, 140662438486016, 140662438486016,
+STORE, 140662438486016, 140662438490111,
+STORE, 140662438490112, 140662438494207,
+ERASE, 46970356670464, 46970356670464,
+STORE, 94121898610688, 94121898745855,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737189351424, 140737488351231,
+ERASE, 140737189351424, 140737189351424,
+STORE, 140737189351424, 140737189355519,
+STORE, 93847948832768, 93847949545471,
+ERASE, 93847948832768, 93847948832768,
+STORE, 93847948832768, 93847948881919,
+STORE, 93847948881920, 93847949545471,
+ERASE, 93847948881920, 93847948881920,
+STORE, 93847948881920, 93847949426687,
+STORE, 93847949426688, 93847949524991,
+STORE, 93847949524992, 93847949545471,
+STORE, 139698989985792, 139698990157823,
+ERASE, 139698989985792, 139698989985792,
+STORE, 139698989985792, 139698989989887,
+STORE, 139698989989888, 139698990157823,
+ERASE, 139698989989888, 139698989989888,
+STORE, 139698989989888, 139698990112767,
+STORE, 139698990112768, 139698990145535,
+STORE, 139698990145536, 139698990153727,
+STORE, 139698990153728, 139698990157823,
+STORE, 140737189744640, 140737189748735,
+STORE, 140737189732352, 140737189744639,
+STORE, 47933805010944, 47933805019135,
+STORE, 47933805019136, 47933805027327,
+STORE, 47933805027328, 47933805133823,
+STORE, 47933805043712, 47933805133823,
+STORE, 47933805027328, 47933805043711,
+ERASE, 47933805043712, 47933805043712,
+STORE, 47933805043712, 47933805117439,
+STORE, 47933805117440, 47933805133823,
+STORE, 47933805096960, 47933805117439,
+STORE, 47933805043712, 47933805096959,
+ERASE, 47933805043712, 47933805043712,
+STORE, 47933805043712, 47933805096959,
+STORE, 47933805113344, 47933805117439,
+STORE, 47933805096960, 47933805113343,
+ERASE, 47933805096960, 47933805096960,
+STORE, 47933805096960, 47933805113343,
+STORE, 47933805125632, 47933805133823,
+STORE, 47933805117440, 47933805125631,
+ERASE, 47933805117440, 47933805117440,
+STORE, 47933805117440, 47933805125631,
+ERASE, 47933805125632, 47933805125632,
+STORE, 47933805125632, 47933805133823,
+STORE, 47933805133824, 47933806972927,
+STORE, 47933805273088, 47933806972927,
+STORE, 47933805133824, 47933805273087,
+ERASE, 47933805273088, 47933805273088,
+STORE, 47933805273088, 47933806931967,
+STORE, 47933806931968, 47933806972927,
+STORE, 47933806616576, 47933806931967,
+STORE, 47933805273088, 47933806616575,
+ERASE, 47933805273088, 47933805273088,
+STORE, 47933805273088, 47933806616575,
+STORE, 47933806927872, 47933806931967,
+STORE, 47933806616576, 47933806927871,
+ERASE, 47933806616576, 47933806616576,
+STORE, 47933806616576, 47933806927871,
+STORE, 47933806956544, 47933806972927,
+STORE, 47933806931968, 47933806956543,
+ERASE, 47933806931968, 47933806931968,
+STORE, 47933806931968, 47933806956543,
+ERASE, 47933806956544, 47933806956544,
+STORE, 47933806956544, 47933806972927,
+STORE, 47933806956544, 47933806985215,
+ERASE, 47933806931968, 47933806931968,
+STORE, 47933806931968, 47933806948351,
+STORE, 47933806948352, 47933806956543,
+ERASE, 47933805117440, 47933805117440,
+STORE, 47933805117440, 47933805121535,
+STORE, 47933805121536, 47933805125631,
+ERASE, 93847949524992, 93847949524992,
+STORE, 93847949524992, 93847949541375,
+STORE, 93847949541376, 93847949545471,
+ERASE, 139698990145536, 139698990145536,
+STORE, 139698990145536, 139698990149631,
+STORE, 139698990149632, 139698990153727,
+ERASE, 47933805010944, 47933805010944,
+STORE, 140737488347136, 140737488351231,
+STORE, 140725553991680, 140737488351231,
+ERASE, 140725553991680, 140725553991680,
+STORE, 140725553991680, 140725553995775,
+STORE, 93980056248320, 93980056961023,
+ERASE, 93980056248320, 93980056248320,
+STORE, 93980056248320, 93980056297471,
+STORE, 93980056297472, 93980056961023,
+ERASE, 93980056297472, 93980056297472,
+STORE, 93980056297472, 93980056842239,
+STORE, 93980056842240, 93980056940543,
+STORE, 93980056940544, 93980056961023,
+STORE, 140146588971008, 140146589143039,
+ERASE, 140146588971008, 140146588971008,
+STORE, 140146588971008, 140146588975103,
+STORE, 140146588975104, 140146589143039,
+ERASE, 140146588975104, 140146588975104,
+STORE, 140146588975104, 140146589097983,
+STORE, 140146589097984, 140146589130751,
+STORE, 140146589130752, 140146589138943,
+STORE, 140146589138944, 140146589143039,
+STORE, 140725554860032, 140725554864127,
+STORE, 140725554847744, 140725554860031,
+STORE, 47486206025728, 47486206033919,
+STORE, 47486206033920, 47486206042111,
+STORE, 47486206042112, 47486206148607,
+STORE, 47486206058496, 47486206148607,
+STORE, 47486206042112, 47486206058495,
+ERASE, 47486206058496, 47486206058496,
+STORE, 47486206058496, 47486206132223,
+STORE, 47486206132224, 47486206148607,
+STORE, 47486206111744, 47486206132223,
+STORE, 47486206058496, 47486206111743,
+ERASE, 47486206058496, 47486206058496,
+STORE, 47486206058496, 47486206111743,
+STORE, 47486206128128, 47486206132223,
+STORE, 47486206111744, 47486206128127,
+ERASE, 47486206111744, 47486206111744,
+STORE, 47486206111744, 47486206128127,
+STORE, 47486206140416, 47486206148607,
+STORE, 47486206132224, 47486206140415,
+ERASE, 47486206132224, 47486206132224,
+STORE, 47486206132224, 47486206140415,
+ERASE, 47486206140416, 47486206140416,
+STORE, 47486206140416, 47486206148607,
+STORE, 47486206148608, 47486207987711,
+STORE, 47486206287872, 47486207987711,
+STORE, 47486206148608, 47486206287871,
+ERASE, 47486206287872, 47486206287872,
+STORE, 47486206287872, 47486207946751,
+STORE, 47486207946752, 47486207987711,
+STORE, 47486207631360, 47486207946751,
+STORE, 47486206287872, 47486207631359,
+ERASE, 47486206287872, 47486206287872,
+STORE, 47486206287872, 47486207631359,
+STORE, 47486207942656, 47486207946751,
+STORE, 47486207631360, 47486207942655,
+ERASE, 47486207631360, 47486207631360,
+STORE, 47486207631360, 47486207942655,
+STORE, 47486207971328, 47486207987711,
+STORE, 47486207946752, 47486207971327,
+ERASE, 47486207946752, 47486207946752,
+STORE, 47486207946752, 47486207971327,
+ERASE, 47486207971328, 47486207971328,
+STORE, 47486207971328, 47486207987711,
+STORE, 47486207971328, 47486207999999,
+ERASE, 47486207946752, 47486207946752,
+STORE, 47486207946752, 47486207963135,
+STORE, 47486207963136, 47486207971327,
+ERASE, 47486206132224, 47486206132224,
+STORE, 47486206132224, 47486206136319,
+STORE, 47486206136320, 47486206140415,
+ERASE, 93980056940544, 93980056940544,
+STORE, 93980056940544, 93980056956927,
+STORE, 93980056956928, 93980056961023,
+ERASE, 140146589130752, 140146589130752,
+STORE, 140146589130752, 140146589134847,
+STORE, 140146589134848, 140146589138943,
+ERASE, 47486206025728, 47486206025728,
+STORE, 93980070006784, 93980070141951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727334776832, 140737488351231,
+ERASE, 140727334776832, 140727334776832,
+STORE, 140727334776832, 140727334780927,
+STORE, 94049747247104, 94049747959807,
+ERASE, 94049747247104, 94049747247104,
+STORE, 94049747247104, 94049747296255,
+STORE, 94049747296256, 94049747959807,
+ERASE, 94049747296256, 94049747296256,
+STORE, 94049747296256, 94049747841023,
+STORE, 94049747841024, 94049747939327,
+STORE, 94049747939328, 94049747959807,
+STORE, 140227307216896, 140227307388927,
+ERASE, 140227307216896, 140227307216896,
+STORE, 140227307216896, 140227307220991,
+STORE, 140227307220992, 140227307388927,
+ERASE, 140227307220992, 140227307220992,
+STORE, 140227307220992, 140227307343871,
+STORE, 140227307343872, 140227307376639,
+STORE, 140227307376640, 140227307384831,
+STORE, 140227307384832, 140227307388927,
+STORE, 140727335337984, 140727335342079,
+STORE, 140727335325696, 140727335337983,
+STORE, 47405487779840, 47405487788031,
+STORE, 47405487788032, 47405487796223,
+STORE, 47405487796224, 47405487902719,
+STORE, 47405487812608, 47405487902719,
+STORE, 47405487796224, 47405487812607,
+ERASE, 47405487812608, 47405487812608,
+STORE, 47405487812608, 47405487886335,
+STORE, 47405487886336, 47405487902719,
+STORE, 47405487865856, 47405487886335,
+STORE, 47405487812608, 47405487865855,
+ERASE, 47405487812608, 47405487812608,
+STORE, 47405487812608, 47405487865855,
+STORE, 47405487882240, 47405487886335,
+STORE, 47405487865856, 47405487882239,
+ERASE, 47405487865856, 47405487865856,
+STORE, 47405487865856, 47405487882239,
+STORE, 47405487894528, 47405487902719,
+STORE, 47405487886336, 47405487894527,
+ERASE, 47405487886336, 47405487886336,
+STORE, 47405487886336, 47405487894527,
+ERASE, 47405487894528, 47405487894528,
+STORE, 47405487894528, 47405487902719,
+STORE, 47405487902720, 47405489741823,
+STORE, 47405488041984, 47405489741823,
+STORE, 47405487902720, 47405488041983,
+ERASE, 47405488041984, 47405488041984,
+STORE, 47405488041984, 47405489700863,
+STORE, 47405489700864, 47405489741823,
+STORE, 47405489385472, 47405489700863,
+STORE, 47405488041984, 47405489385471,
+ERASE, 47405488041984, 47405488041984,
+STORE, 47405488041984, 47405489385471,
+STORE, 47405489696768, 47405489700863,
+STORE, 47405489385472, 47405489696767,
+ERASE, 47405489385472, 47405489385472,
+STORE, 47405489385472, 47405489696767,
+STORE, 47405489725440, 47405489741823,
+STORE, 47405489700864, 47405489725439,
+ERASE, 47405489700864, 47405489700864,
+STORE, 47405489700864, 47405489725439,
+ERASE, 47405489725440, 47405489725440,
+STORE, 47405489725440, 47405489741823,
+STORE, 47405489725440, 47405489754111,
+ERASE, 47405489700864, 47405489700864,
+STORE, 47405489700864, 47405489717247,
+STORE, 47405489717248, 47405489725439,
+ERASE, 47405487886336, 47405487886336,
+STORE, 47405487886336, 47405487890431,
+STORE, 47405487890432, 47405487894527,
+ERASE, 94049747939328, 94049747939328,
+STORE, 94049747939328, 94049747955711,
+STORE, 94049747955712, 94049747959807,
+ERASE, 140227307376640, 140227307376640,
+STORE, 140227307376640, 140227307380735,
+STORE, 140227307380736, 140227307384831,
+ERASE, 47405487779840, 47405487779840,
+STORE, 94049758810112, 94049758945279,
+STORE, 140737488347136, 140737488351231,
+STORE, 140727079718912, 140737488351231,
+ERASE, 140727079718912, 140727079718912,
+STORE, 140727079718912, 140727079723007,
+STORE, 94250996527104, 94250997239807,
+ERASE, 94250996527104, 94250996527104,
+STORE, 94250996527104, 94250996576255,
+STORE, 94250996576256, 94250997239807,
+ERASE, 94250996576256, 94250996576256,
+STORE, 94250996576256, 94250997121023,
+STORE, 94250997121024, 94250997219327,
+STORE, 94250997219328, 94250997239807,
+STORE, 140060022587392, 140060022759423,
+ERASE, 140060022587392, 140060022587392,
+STORE, 140060022587392, 140060022591487,
+STORE, 140060022591488, 140060022759423,
+ERASE, 140060022591488, 140060022591488,
+STORE, 140060022591488, 140060022714367,
+STORE, 140060022714368, 140060022747135,
+STORE, 140060022747136, 140060022755327,
+STORE, 140060022755328, 140060022759423,
+STORE, 140727079788544, 140727079792639,
+STORE, 140727079776256, 140727079788543,
+STORE, 47572772409344, 47572772417535,
+STORE, 47572772417536, 47572772425727,
+STORE, 47572772425728, 47572772532223,
+STORE, 47572772442112, 47572772532223,
+STORE, 47572772425728, 47572772442111,
+ERASE, 47572772442112, 47572772442112,
+STORE, 47572772442112, 47572772515839,
+STORE, 47572772515840, 47572772532223,
+STORE, 47572772495360, 47572772515839,
+STORE, 47572772442112, 47572772495359,
+ERASE, 47572772442112, 47572772442112,
+STORE, 47572772442112, 47572772495359,
+STORE, 47572772511744, 47572772515839,
+STORE, 47572772495360, 47572772511743,
+ERASE, 47572772495360, 47572772495360,
+STORE, 47572772495360, 47572772511743,
+STORE, 47572772524032, 47572772532223,
+STORE, 47572772515840, 47572772524031,
+ERASE, 47572772515840, 47572772515840,
+STORE, 47572772515840, 47572772524031,
+ERASE, 47572772524032, 47572772524032,
+STORE, 47572772524032, 47572772532223,
+STORE, 47572772532224, 47572774371327,
+STORE, 47572772671488, 47572774371327,
+STORE, 47572772532224, 47572772671487,
+ERASE, 47572772671488, 47572772671488,
+STORE, 47572772671488, 47572774330367,
+STORE, 47572774330368, 47572774371327,
+STORE, 47572774014976, 47572774330367,
+STORE, 47572772671488, 47572774014975,
+ERASE, 47572772671488, 47572772671488,
+STORE, 47572772671488, 47572774014975,
+STORE, 47572774326272, 47572774330367,
+STORE, 47572774014976, 47572774326271,
+ERASE, 47572774014976, 47572774014976,
+STORE, 47572774014976, 47572774326271,
+STORE, 47572774354944, 47572774371327,
+STORE, 47572774330368, 47572774354943,
+ERASE, 47572774330368, 47572774330368,
+STORE, 47572774330368, 47572774354943,
+ERASE, 47572774354944, 47572774354944,
+STORE, 47572774354944, 47572774371327,
+STORE, 47572774354944, 47572774383615,
+ERASE, 47572774330368, 47572774330368,
+STORE, 47572774330368, 47572774346751,
+STORE, 47572774346752, 47572774354943,
+ERASE, 47572772515840, 47572772515840,
+STORE, 47572772515840, 47572772519935,
+STORE, 47572772519936, 47572772524031,
+ERASE, 94250997219328, 94250997219328,
+STORE, 94250997219328, 94250997235711,
+STORE, 94250997235712, 94250997239807,
+ERASE, 140060022747136, 140060022747136,
+STORE, 140060022747136, 140060022751231,
+STORE, 140060022751232, 140060022755327,
+ERASE, 47572772409344, 47572772409344,
+STORE, 94251018305536, 94251018440703,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730012389376, 140737488351231,
+ERASE, 140730012389376, 140730012389376,
+STORE, 140730012389376, 140730012393471,
+STORE, 94382607675392, 94382607695871,
+ERASE, 94382607675392, 94382607675392,
+STORE, 94382607675392, 94382607679487,
+STORE, 94382607679488, 94382607695871,
+ERASE, 94382607679488, 94382607679488,
+STORE, 94382607679488, 94382607683583,
+STORE, 94382607683584, 94382607687679,
+STORE, 94382607687680, 94382607695871,
+STORE, 140252451454976, 140252451627007,
+ERASE, 140252451454976, 140252451454976,
+STORE, 140252451454976, 140252451459071,
+STORE, 140252451459072, 140252451627007,
+ERASE, 140252451459072, 140252451459072,
+STORE, 140252451459072, 140252451581951,
+STORE, 140252451581952, 140252451614719,
+STORE, 140252451614720, 140252451622911,
+STORE, 140252451622912, 140252451627007,
+STORE, 140730013548544, 140730013552639,
+STORE, 140730013536256, 140730013548543,
+STORE, 47380343541760, 47380343549951,
+STORE, 47380343549952, 47380343558143,
+STORE, 47380343558144, 47380345397247,
+STORE, 47380343697408, 47380345397247,
+STORE, 47380343558144, 47380343697407,
+ERASE, 47380343697408, 47380343697408,
+STORE, 47380343697408, 47380345356287,
+STORE, 47380345356288, 47380345397247,
+STORE, 47380345040896, 47380345356287,
+STORE, 47380343697408, 47380345040895,
+ERASE, 47380343697408, 47380343697408,
+STORE, 47380343697408, 47380345040895,
+STORE, 47380345352192, 47380345356287,
+STORE, 47380345040896, 47380345352191,
+ERASE, 47380345040896, 47380345040896,
+STORE, 47380345040896, 47380345352191,
+STORE, 47380345380864, 47380345397247,
+STORE, 47380345356288, 47380345380863,
+ERASE, 47380345356288, 47380345356288,
+STORE, 47380345356288, 47380345380863,
+ERASE, 47380345380864, 47380345380864,
+STORE, 47380345380864, 47380345397247,
+ERASE, 47380345356288, 47380345356288,
+STORE, 47380345356288, 47380345372671,
+STORE, 47380345372672, 47380345380863,
+ERASE, 94382607687680, 94382607687680,
+STORE, 94382607687680, 94382607691775,
+STORE, 94382607691776, 94382607695871,
+ERASE, 140252451614720, 140252451614720,
+STORE, 140252451614720, 140252451618815,
+STORE, 140252451618816, 140252451622911,
+ERASE, 47380343541760, 47380343541760,
+STORE, 94382626803712, 94382626938879,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730900271104, 140737488351231,
+ERASE, 140730900271104, 140730900271104,
+STORE, 140730900271104, 140730900275199,
+STORE, 93855478120448, 93855478337535,
+ERASE, 93855478120448, 93855478120448,
+STORE, 93855478120448, 93855478198271,
+STORE, 93855478198272, 93855478337535,
+ERASE, 93855478198272, 93855478198272,
+STORE, 93855478198272, 93855478243327,
+STORE, 93855478243328, 93855478288383,
+STORE, 93855478288384, 93855478337535,
+STORE, 140092686573568, 140092686745599,
+ERASE, 140092686573568, 140092686573568,
+STORE, 140092686573568, 140092686577663,
+STORE, 140092686577664, 140092686745599,
+ERASE, 140092686577664, 140092686577664,
+STORE, 140092686577664, 140092686700543,
+STORE, 140092686700544, 140092686733311,
+STORE, 140092686733312, 140092686741503,
+STORE, 140092686741504, 140092686745599,
+STORE, 140730900537344, 140730900541439,
+STORE, 140730900525056, 140730900537343,
+STORE, 47540108423168, 47540108431359,
+STORE, 47540108431360, 47540108439551,
+STORE, 47540108439552, 47540110278655,
+STORE, 47540108578816, 47540110278655,
+STORE, 47540108439552, 47540108578815,
+ERASE, 47540108578816, 47540108578816,
+STORE, 47540108578816, 47540110237695,
+STORE, 47540110237696, 47540110278655,
+STORE, 47540109922304, 47540110237695,
+STORE, 47540108578816, 47540109922303,
+ERASE, 47540108578816, 47540108578816,
+STORE, 47540108578816, 47540109922303,
+STORE, 47540110233600, 47540110237695,
+STORE, 47540109922304, 47540110233599,
+ERASE, 47540109922304, 47540109922304,
+STORE, 47540109922304, 47540110233599,
+STORE, 47540110262272, 47540110278655,
+STORE, 47540110237696, 47540110262271,
+ERASE, 47540110237696, 47540110237696,
+STORE, 47540110237696, 47540110262271,
+ERASE, 47540110262272, 47540110262272,
+STORE, 47540110262272, 47540110278655,
+ERASE, 47540110237696, 47540110237696,
+STORE, 47540110237696, 47540110254079,
+STORE, 47540110254080, 47540110262271,
+ERASE, 93855478288384, 93855478288384,
+STORE, 93855478288384, 93855478333439,
+STORE, 93855478333440, 93855478337535,
+ERASE, 140092686733312, 140092686733312,
+STORE, 140092686733312, 140092686737407,
+STORE, 140092686737408, 140092686741503,
+ERASE, 47540108423168, 47540108423168,
+STORE, 93855492222976, 93855492358143,
+STORE, 93855492222976, 93855492493311,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733498146816, 140737488351231,
+ERASE, 140733498146816, 140733498146816,
+STORE, 140733498146816, 140733498150911,
+STORE, 94170739654656, 94170740367359,
+ERASE, 94170739654656, 94170739654656,
+STORE, 94170739654656, 94170739703807,
+STORE, 94170739703808, 94170740367359,
+ERASE, 94170739703808, 94170739703808,
+STORE, 94170739703808, 94170740248575,
+STORE, 94170740248576, 94170740346879,
+STORE, 94170740346880, 94170740367359,
+STORE, 140024788877312, 140024789049343,
+ERASE, 140024788877312, 140024788877312,
+STORE, 140024788877312, 140024788881407,
+STORE, 140024788881408, 140024789049343,
+ERASE, 140024788881408, 140024788881408,
+STORE, 140024788881408, 140024789004287,
+STORE, 140024789004288, 140024789037055,
+STORE, 140024789037056, 140024789045247,
+STORE, 140024789045248, 140024789049343,
+STORE, 140733499023360, 140733499027455,
+STORE, 140733499011072, 140733499023359,
+STORE, 47608006119424, 47608006127615,
+STORE, 47608006127616, 47608006135807,
+STORE, 47608006135808, 47608006242303,
+STORE, 47608006152192, 47608006242303,
+STORE, 47608006135808, 47608006152191,
+ERASE, 47608006152192, 47608006152192,
+STORE, 47608006152192, 47608006225919,
+STORE, 47608006225920, 47608006242303,
+STORE, 47608006205440, 47608006225919,
+STORE, 47608006152192, 47608006205439,
+ERASE, 47608006152192, 47608006152192,
+STORE, 47608006152192, 47608006205439,
+STORE, 47608006221824, 47608006225919,
+STORE, 47608006205440, 47608006221823,
+ERASE, 47608006205440, 47608006205440,
+STORE, 47608006205440, 47608006221823,
+STORE, 47608006234112, 47608006242303,
+STORE, 47608006225920, 47608006234111,
+ERASE, 47608006225920, 47608006225920,
+STORE, 47608006225920, 47608006234111,
+ERASE, 47608006234112, 47608006234112,
+STORE, 47608006234112, 47608006242303,
+STORE, 47608006242304, 47608008081407,
+STORE, 47608006381568, 47608008081407,
+STORE, 47608006242304, 47608006381567,
+ERASE, 47608006381568, 47608006381568,
+STORE, 47608006381568, 47608008040447,
+STORE, 47608008040448, 47608008081407,
+STORE, 47608007725056, 47608008040447,
+STORE, 47608006381568, 47608007725055,
+ERASE, 47608006381568, 47608006381568,
+STORE, 47608006381568, 47608007725055,
+STORE, 47608008036352, 47608008040447,
+STORE, 47608007725056, 47608008036351,
+ERASE, 47608007725056, 47608007725056,
+STORE, 47608007725056, 47608008036351,
+STORE, 47608008065024, 47608008081407,
+STORE, 47608008040448, 47608008065023,
+ERASE, 47608008040448, 47608008040448,
+STORE, 47608008040448, 47608008065023,
+ERASE, 47608008065024, 47608008065024,
+STORE, 47608008065024, 47608008081407,
+STORE, 47608008065024, 47608008093695,
+ERASE, 47608008040448, 47608008040448,
+STORE, 47608008040448, 47608008056831,
+STORE, 47608008056832, 47608008065023,
+ERASE, 47608006225920, 47608006225920,
+STORE, 47608006225920, 47608006230015,
+STORE, 47608006230016, 47608006234111,
+ERASE, 94170740346880, 94170740346880,
+STORE, 94170740346880, 94170740363263,
+STORE, 94170740363264, 94170740367359,
+ERASE, 140024789037056, 140024789037056,
+STORE, 140024789037056, 140024789041151,
+STORE, 140024789041152, 140024789045247,
+ERASE, 47608006119424, 47608006119424,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730264326144, 140737488351231,
+ERASE, 140730264326144, 140730264326144,
+STORE, 140730264326144, 140730264330239,
+STORE, 94653216407552, 94653217120255,
+ERASE, 94653216407552, 94653216407552,
+STORE, 94653216407552, 94653216456703,
+STORE, 94653216456704, 94653217120255,
+ERASE, 94653216456704, 94653216456704,
+STORE, 94653216456704, 94653217001471,
+STORE, 94653217001472, 94653217099775,
+STORE, 94653217099776, 94653217120255,
+STORE, 140103617011712, 140103617183743,
+ERASE, 140103617011712, 140103617011712,
+STORE, 140103617011712, 140103617015807,
+STORE, 140103617015808, 140103617183743,
+ERASE, 140103617015808, 140103617015808,
+STORE, 140103617015808, 140103617138687,
+STORE, 140103617138688, 140103617171455,
+STORE, 140103617171456, 140103617179647,
+STORE, 140103617179648, 140103617183743,
+STORE, 140730265427968, 140730265432063,
+STORE, 140730265415680, 140730265427967,
+STORE, 47529177985024, 47529177993215,
+STORE, 47529177993216, 47529178001407,
+STORE, 47529178001408, 47529178107903,
+STORE, 47529178017792, 47529178107903,
+STORE, 47529178001408, 47529178017791,
+ERASE, 47529178017792, 47529178017792,
+STORE, 47529178017792, 47529178091519,
+STORE, 47529178091520, 47529178107903,
+STORE, 47529178071040, 47529178091519,
+STORE, 47529178017792, 47529178071039,
+ERASE, 47529178017792, 47529178017792,
+STORE, 47529178017792, 47529178071039,
+STORE, 47529178087424, 47529178091519,
+STORE, 47529178071040, 47529178087423,
+ERASE, 47529178071040, 47529178071040,
+STORE, 47529178071040, 47529178087423,
+STORE, 47529178099712, 47529178107903,
+STORE, 47529178091520, 47529178099711,
+ERASE, 47529178091520, 47529178091520,
+STORE, 47529178091520, 47529178099711,
+ERASE, 47529178099712, 47529178099712,
+STORE, 47529178099712, 47529178107903,
+STORE, 47529178107904, 47529179947007,
+STORE, 47529178247168, 47529179947007,
+STORE, 47529178107904, 47529178247167,
+ERASE, 47529178247168, 47529178247168,
+STORE, 47529178247168, 47529179906047,
+STORE, 47529179906048, 47529179947007,
+STORE, 47529179590656, 47529179906047,
+STORE, 47529178247168, 47529179590655,
+ERASE, 47529178247168, 47529178247168,
+STORE, 47529178247168, 47529179590655,
+STORE, 47529179901952, 47529179906047,
+STORE, 47529179590656, 47529179901951,
+ERASE, 47529179590656, 47529179590656,
+STORE, 47529179590656, 47529179901951,
+STORE, 47529179930624, 47529179947007,
+STORE, 47529179906048, 47529179930623,
+ERASE, 47529179906048, 47529179906048,
+STORE, 47529179906048, 47529179930623,
+ERASE, 47529179930624, 47529179930624,
+STORE, 47529179930624, 47529179947007,
+STORE, 47529179930624, 47529179959295,
+ERASE, 47529179906048, 47529179906048,
+STORE, 47529179906048, 47529179922431,
+STORE, 47529179922432, 47529179930623,
+ERASE, 47529178091520, 47529178091520,
+STORE, 47529178091520, 47529178095615,
+STORE, 47529178095616, 47529178099711,
+ERASE, 94653217099776, 94653217099776,
+STORE, 94653217099776, 94653217116159,
+STORE, 94653217116160, 94653217120255,
+ERASE, 140103617171456, 140103617171456,
+STORE, 140103617171456, 140103617175551,
+STORE, 140103617175552, 140103617179647,
+ERASE, 47529177985024, 47529177985024,
+STORE, 94653241135104, 94653241270271,
+STORE, 140737488347136, 140737488351231,
+STORE, 140736284549120, 140737488351231,
+ERASE, 140736284549120, 140736284549120,
+STORE, 140736284549120, 140736284553215,
+STORE, 93963663822848, 93963664506879,
+ERASE, 93963663822848, 93963663822848,
+STORE, 93963663822848, 93963663884287,
+STORE, 93963663884288, 93963664506879,
+ERASE, 93963663884288, 93963663884288,
+STORE, 93963663884288, 93963664240639,
+STORE, 93963664240640, 93963664379903,
+STORE, 93963664379904, 93963664506879,
+STORE, 140450188439552, 140450188611583,
+ERASE, 140450188439552, 140450188439552,
+STORE, 140450188439552, 140450188443647,
+STORE, 140450188443648, 140450188611583,
+ERASE, 140450188443648, 140450188443648,
+STORE, 140450188443648, 140450188566527,
+STORE, 140450188566528, 140450188599295,
+STORE, 140450188599296, 140450188607487,
+STORE, 140450188607488, 140450188611583,
+STORE, 140736284577792, 140736284581887,
+STORE, 140736284565504, 140736284577791,
+STORE, 47182606557184, 47182606565375,
+STORE, 47182606565376, 47182606573567,
+STORE, 47182606573568, 47182608412671,
+STORE, 47182606712832, 47182608412671,
+STORE, 47182606573568, 47182606712831,
+ERASE, 47182606712832, 47182606712832,
+STORE, 47182606712832, 47182608371711,
+STORE, 47182608371712, 47182608412671,
+STORE, 47182608056320, 47182608371711,
+STORE, 47182606712832, 47182608056319,
+ERASE, 47182606712832, 47182606712832,
+STORE, 47182606712832, 47182608056319,
+STORE, 47182608367616, 47182608371711,
+STORE, 47182608056320, 47182608367615,
+ERASE, 47182608056320, 47182608056320,
+STORE, 47182608056320, 47182608367615,
+STORE, 47182608396288, 47182608412671,
+STORE, 47182608371712, 47182608396287,
+ERASE, 47182608371712, 47182608371712,
+STORE, 47182608371712, 47182608396287,
+ERASE, 47182608396288, 47182608396288,
+STORE, 47182608396288, 47182608412671,
+STORE, 47182608412672, 47182608523263,
+STORE, 47182608429056, 47182608523263,
+STORE, 47182608412672, 47182608429055,
+ERASE, 47182608429056, 47182608429056,
+STORE, 47182608429056, 47182608515071,
+STORE, 47182608515072, 47182608523263,
+STORE, 47182608490496, 47182608515071,
+STORE, 47182608429056, 47182608490495,
+ERASE, 47182608429056, 47182608429056,
+STORE, 47182608429056, 47182608490495,
+STORE, 47182608510976, 47182608515071,
+STORE, 47182608490496, 47182608510975,
+ERASE, 47182608490496, 47182608490496,
+STORE, 47182608490496, 47182608510975,
+ERASE, 47182608515072, 47182608515072,
+STORE, 47182608515072, 47182608523263,
+STORE, 47182608523264, 47182608568319,
+ERASE, 47182608523264, 47182608523264,
+STORE, 47182608523264, 47182608531455,
+STORE, 47182608531456, 47182608568319,
+STORE, 47182608551936, 47182608568319,
+STORE, 47182608531456, 47182608551935,
+ERASE, 47182608531456, 47182608531456,
+STORE, 47182608531456, 47182608551935,
+STORE, 47182608560128, 47182608568319,
+STORE, 47182608551936, 47182608560127,
+ERASE, 47182608551936, 47182608551936,
+STORE, 47182608551936, 47182608568319,
+ERASE, 47182608551936, 47182608551936,
+STORE, 47182608551936, 47182608560127,
+STORE, 47182608560128, 47182608568319,
+ERASE, 47182608560128, 47182608560128,
+STORE, 47182608560128, 47182608568319,
+STORE, 47182608568320, 47182608916479,
+STORE, 47182608609280, 47182608916479,
+STORE, 47182608568320, 47182608609279,
+ERASE, 47182608609280, 47182608609280,
+STORE, 47182608609280, 47182608891903,
+STORE, 47182608891904, 47182608916479,
+STORE, 47182608822272, 47182608891903,
+STORE, 47182608609280, 47182608822271,
+ERASE, 47182608609280, 47182608609280,
+STORE, 47182608609280, 47182608822271,
+STORE, 47182608887808, 47182608891903,
+STORE, 47182608822272, 47182608887807,
+ERASE, 47182608822272, 47182608822272,
+STORE, 47182608822272, 47182608887807,
+ERASE, 47182608891904, 47182608891904,
+STORE, 47182608891904, 47182608916479,
+STORE, 47182608916480, 47182611177471,
+STORE, 47182609068032, 47182611177471,
+STORE, 47182608916480, 47182609068031,
+ERASE, 47182609068032, 47182609068032,
+STORE, 47182609068032, 47182611161087,
+STORE, 47182611161088, 47182611177471,
+STORE, 47182611169280, 47182611177471,
+STORE, 47182611161088, 47182611169279,
+ERASE, 47182611161088, 47182611161088,
+STORE, 47182611161088, 47182611169279,
+ERASE, 47182611169280, 47182611169280,
+STORE, 47182611169280, 47182611177471,
+STORE, 47182611177472, 47182611312639,
+ERASE, 47182611177472, 47182611177472,
+STORE, 47182611177472, 47182611202047,
+STORE, 47182611202048, 47182611312639,
+STORE, 47182611263488, 47182611312639,
+STORE, 47182611202048, 47182611263487,
+ERASE, 47182611202048, 47182611202048,
+STORE, 47182611202048, 47182611263487,
+STORE, 47182611288064, 47182611312639,
+STORE, 47182611263488, 47182611288063,
+ERASE, 47182611263488, 47182611263488,
+STORE, 47182611263488, 47182611312639,
+ERASE, 47182611263488, 47182611263488,
+STORE, 47182611263488, 47182611288063,
+STORE, 47182611288064, 47182611312639,
+STORE, 47182611296256, 47182611312639,
+STORE, 47182611288064, 47182611296255,
+ERASE, 47182611288064, 47182611288064,
+STORE, 47182611288064, 47182611296255,
+ERASE, 47182611296256, 47182611296256,
+STORE, 47182611296256, 47182611312639,
+STORE, 47182611296256, 47182611320831,
+STORE, 47182611320832, 47182611484671,
+ERASE, 47182611320832, 47182611320832,
+STORE, 47182611320832, 47182611333119,
+STORE, 47182611333120, 47182611484671,
+STORE, 47182611431424, 47182611484671,
+STORE, 47182611333120, 47182611431423,
+ERASE, 47182611333120, 47182611333120,
+STORE, 47182611333120, 47182611431423,
+STORE, 47182611476480, 47182611484671,
+STORE, 47182611431424, 47182611476479,
+ERASE, 47182611431424, 47182611431424,
+STORE, 47182611431424, 47182611484671,
+ERASE, 47182611431424, 47182611431424,
+STORE, 47182611431424, 47182611476479,
+STORE, 47182611476480, 47182611484671,
+ERASE, 47182611476480, 47182611476480,
+STORE, 47182611476480, 47182611484671,
+STORE, 47182611484672, 47182612082687,
+STORE, 47182611603456, 47182612082687,
+STORE, 47182611484672, 47182611603455,
+ERASE, 47182611603456, 47182611603456,
+STORE, 47182611603456, 47182612029439,
+STORE, 47182612029440, 47182612082687,
+STORE, 47182611918848, 47182612029439,
+STORE, 47182611603456, 47182611918847,
+ERASE, 47182611603456, 47182611603456,
+STORE, 47182611603456, 47182611918847,
+STORE, 47182612025344, 47182612029439,
+STORE, 47182611918848, 47182612025343,
+ERASE, 47182611918848, 47182611918848,
+STORE, 47182611918848, 47182612025343,
+ERASE, 47182612029440, 47182612029440,
+STORE, 47182612029440, 47182612082687,
+STORE, 47182612082688, 47182615134207,
+STORE, 47182612627456, 47182615134207,
+STORE, 47182612082688, 47182612627455,
+ERASE, 47182612627456, 47182612627456,
+STORE, 47182612627456, 47182614913023,
+STORE, 47182614913024, 47182615134207,
+STORE, 47182614323200, 47182614913023,
+STORE, 47182612627456, 47182614323199,
+ERASE, 47182612627456, 47182612627456,
+STORE, 47182612627456, 47182614323199,
+STORE, 47182614908928, 47182614913023,
+STORE, 47182614323200, 47182614908927,
+ERASE, 47182614323200, 47182614323200,
+STORE, 47182614323200, 47182614908927,
+STORE, 47182615117824, 47182615134207,
+STORE, 47182614913024, 47182615117823,
+ERASE, 47182614913024, 47182614913024,
+STORE, 47182614913024, 47182615117823,
+ERASE, 47182615117824, 47182615117824,
+STORE, 47182615117824, 47182615134207,
+STORE, 47182615134208, 47182615166975,
+ERASE, 47182615134208, 47182615134208,
+STORE, 47182615134208, 47182615142399,
+STORE, 47182615142400, 47182615166975,
+STORE, 47182615154688, 47182615166975,
+STORE, 47182615142400, 47182615154687,
+ERASE, 47182615142400, 47182615142400,
+STORE, 47182615142400, 47182615154687,
+STORE, 47182615158784, 47182615166975,
+STORE, 47182615154688, 47182615158783,
+ERASE, 47182615154688, 47182615154688,
+STORE, 47182615154688, 47182615166975,
+ERASE, 47182615154688, 47182615154688,
+STORE, 47182615154688, 47182615158783,
+STORE, 47182615158784, 47182615166975,
+ERASE, 47182615158784, 47182615158784,
+STORE, 47182615158784, 47182615166975,
+STORE, 47182615166976, 47182615203839,
+ERASE, 47182615166976, 47182615166976,
+STORE, 47182615166976, 47182615175167,
+STORE, 47182615175168, 47182615203839,
+STORE, 47182615191552, 47182615203839,
+STORE, 47182615175168, 47182615191551,
+ERASE, 47182615175168, 47182615175168,
+STORE, 47182615175168, 47182615191551,
+STORE, 47182615195648, 47182615203839,
+STORE, 47182615191552, 47182615195647,
+ERASE, 47182615191552, 47182615191552,
+STORE, 47182615191552, 47182615203839,
+ERASE, 47182615191552, 47182615191552,
+STORE, 47182615191552, 47182615195647,
+STORE, 47182615195648, 47182615203839,
+ERASE, 47182615195648, 47182615195648,
+STORE, 47182615195648, 47182615203839,
+STORE, 47182615203840, 47182615678975,
+ERASE, 47182615203840, 47182615203840,
+STORE, 47182615203840, 47182615212031,
+STORE, 47182615212032, 47182615678975,
+STORE, 47182615547904, 47182615678975,
+STORE, 47182615212032, 47182615547903,
+ERASE, 47182615212032, 47182615212032,
+STORE, 47182615212032, 47182615547903,
+STORE, 47182615670784, 47182615678975,
+STORE, 47182615547904, 47182615670783,
+ERASE, 47182615547904, 47182615547904,
+STORE, 47182615547904, 47182615678975,
+ERASE, 47182615547904, 47182615547904,
+STORE, 47182615547904, 47182615670783,
+STORE, 47182615670784, 47182615678975,
+ERASE, 47182615670784, 47182615670784,
+STORE, 47182615670784, 47182615678975,
+STORE, 47182615678976, 47182615687167,
+STORE, 47182615687168, 47182615707647,
+ERASE, 47182615687168, 47182615687168,
+STORE, 47182615687168, 47182615691263,
+STORE, 47182615691264, 47182615707647,
+STORE, 47182615695360, 47182615707647,
+STORE, 47182615691264, 47182615695359,
+ERASE, 47182615691264, 47182615691264,
+STORE, 47182615691264, 47182615695359,
+STORE, 47182615699456, 47182615707647,
+STORE, 47182615695360, 47182615699455,
+ERASE, 47182615695360, 47182615695360,
+STORE, 47182615695360, 47182615707647,
+ERASE, 47182615695360, 47182615695360,
+STORE, 47182615695360, 47182615699455,
+STORE, 47182615699456, 47182615707647,
+ERASE, 47182615699456, 47182615699456,
+STORE, 47182615699456, 47182615707647,
+STORE, 47182615707648, 47182615715839,
+ERASE, 47182608371712, 47182608371712,
+STORE, 47182608371712, 47182608388095,
+STORE, 47182608388096, 47182608396287,
+ERASE, 47182615699456, 47182615699456,
+STORE, 47182615699456, 47182615703551,
+STORE, 47182615703552, 47182615707647,
+ERASE, 47182611288064, 47182611288064,
+STORE, 47182611288064, 47182611292159,
+STORE, 47182611292160, 47182611296255,
+ERASE, 47182615670784, 47182615670784,
+STORE, 47182615670784, 47182615674879,
+STORE, 47182615674880, 47182615678975,
+ERASE, 47182615195648, 47182615195648,
+STORE, 47182615195648, 47182615199743,
+STORE, 47182615199744, 47182615203839,
+ERASE, 47182615158784, 47182615158784,
+STORE, 47182615158784, 47182615162879,
+STORE, 47182615162880, 47182615166975,
+ERASE, 47182614913024, 47182614913024,
+STORE, 47182614913024, 47182615109631,
+STORE, 47182615109632, 47182615117823,
+ERASE, 47182612029440, 47182612029440,
+STORE, 47182612029440, 47182612066303,
+STORE, 47182612066304, 47182612082687,
+ERASE, 47182611476480, 47182611476480,
+STORE, 47182611476480, 47182611480575,
+STORE, 47182611480576, 47182611484671,
+ERASE, 47182611161088, 47182611161088,
+STORE, 47182611161088, 47182611165183,
+STORE, 47182611165184, 47182611169279,
+ERASE, 47182608891904, 47182608891904,
+STORE, 47182608891904, 47182608912383,
+STORE, 47182608912384, 47182608916479,
+ERASE, 47182608560128, 47182608560128,
+STORE, 47182608560128, 47182608564223,
+STORE, 47182608564224, 47182608568319,
+ERASE, 47182608515072, 47182608515072,
+STORE, 47182608515072, 47182608519167,
+STORE, 47182608519168, 47182608523263,
+ERASE, 93963664379904, 93963664379904,
+STORE, 93963664379904, 93963664502783,
+STORE, 93963664502784, 93963664506879,
+ERASE, 140450188599296, 140450188599296,
+STORE, 140450188599296, 140450188603391,
+STORE, 140450188603392, 140450188607487,
+ERASE, 47182606557184, 47182606557184,
+STORE, 93963694723072, 93963694858239,
+STORE, 140737488347136, 140737488351231,
+STORE, 140730313261056, 140737488351231,
+ERASE, 140730313261056, 140730313261056,
+STORE, 140730313261056, 140730313265151,
+STORE, 94386579017728, 94386579697663,
+ERASE, 94386579017728, 94386579017728,
+STORE, 94386579017728, 94386579083263,
+STORE, 94386579083264, 94386579697663,
+ERASE, 94386579083264, 94386579083264,
+STORE, 94386579083264, 94386579431423,
+STORE, 94386579431424, 94386579570687,
+STORE, 94386579570688, 94386579697663,
+STORE, 140124810838016, 140124811010047,
+ERASE, 140124810838016, 140124810838016,
+STORE, 140124810838016, 140124810842111,
+STORE, 140124810842112, 140124811010047,
+ERASE, 140124810842112, 140124810842112,
+STORE, 140124810842112, 140124810964991,
+STORE, 140124810964992, 140124810997759,
+STORE, 140124810997760, 140124811005951,
+STORE, 140124811005952, 140124811010047,
+STORE, 140730313601024, 140730313605119,
+STORE, 140730313588736, 140730313601023,
+STORE, 47507984158720, 47507984166911,
+STORE, 47507984166912, 47507984175103,
+STORE, 47507984175104, 47507986014207,
+STORE, 47507984314368, 47507986014207,
+STORE, 47507984175104, 47507984314367,
+ERASE, 47507984314368, 47507984314368,
+STORE, 47507984314368, 47507985973247,
+STORE, 47507985973248, 47507986014207,
+STORE, 47507985657856, 47507985973247,
+STORE, 47507984314368, 47507985657855,
+ERASE, 47507984314368, 47507984314368,
+STORE, 47507984314368, 47507985657855,
+STORE, 47507985969152, 47507985973247,
+STORE, 47507985657856, 47507985969151,
+ERASE, 47507985657856, 47507985657856,
+STORE, 47507985657856, 47507985969151,
+STORE, 47507985997824, 47507986014207,
+STORE, 47507985973248, 47507985997823,
+ERASE, 47507985973248, 47507985973248,
+STORE, 47507985973248, 47507985997823,
+ERASE, 47507985997824, 47507985997824,
+STORE, 47507985997824, 47507986014207,
+STORE, 47507986014208, 47507986124799,
+STORE, 47507986030592, 47507986124799,
+STORE, 47507986014208, 47507986030591,
+ERASE, 47507986030592, 47507986030592,
+STORE, 47507986030592, 47507986116607,
+STORE, 47507986116608, 47507986124799,
+STORE, 47507986092032, 47507986116607,
+STORE, 47507986030592, 47507986092031,
+ERASE, 47507986030592, 47507986030592,
+STORE, 47507986030592, 47507986092031,
+STORE, 47507986112512, 47507986116607,
+STORE, 47507986092032, 47507986112511,
+ERASE, 47507986092032, 47507986092032,
+STORE, 47507986092032, 47507986112511,
+ERASE, 47507986116608, 47507986116608,
+STORE, 47507986116608, 47507986124799,
+STORE, 47507986124800, 47507986169855,
+ERASE, 47507986124800, 47507986124800,
+STORE, 47507986124800, 47507986132991,
+STORE, 47507986132992, 47507986169855,
+STORE, 47507986153472, 47507986169855,
+STORE, 47507986132992, 47507986153471,
+ERASE, 47507986132992, 47507986132992,
+STORE, 47507986132992, 47507986153471,
+STORE, 47507986161664, 47507986169855,
+STORE, 47507986153472, 47507986161663,
+ERASE, 47507986153472, 47507986153472,
+STORE, 47507986153472, 47507986169855,
+ERASE, 47507986153472, 47507986153472,
+STORE, 47507986153472, 47507986161663,
+STORE, 47507986161664, 47507986169855,
+ERASE, 47507986161664, 47507986161664,
+STORE, 47507986161664, 47507986169855,
+STORE, 47507986169856, 47507986518015,
+STORE, 47507986210816, 47507986518015,
+STORE, 47507986169856, 47507986210815,
+ERASE, 47507986210816, 47507986210816,
+STORE, 47507986210816, 47507986493439,
+STORE, 47507986493440, 47507986518015,
+STORE, 47507986423808, 47507986493439,
+STORE, 47507986210816, 47507986423807,
+ERASE, 47507986210816, 47507986210816,
+STORE, 47507986210816, 47507986423807,
+STORE, 47507986489344, 47507986493439,
+STORE, 47507986423808, 47507986489343,
+ERASE, 47507986423808, 47507986423808,
+STORE, 47507986423808, 47507986489343,
+ERASE, 47507986493440, 47507986493440,
+STORE, 47507986493440, 47507986518015,
+STORE, 47507986518016, 47507988779007,
+STORE, 47507986669568, 47507988779007,
+STORE, 47507986518016, 47507986669567,
+ERASE, 47507986669568, 47507986669568,
+STORE, 47507986669568, 47507988762623,
+STORE, 47507988762624, 47507988779007,
+STORE, 47507988770816, 47507988779007,
+STORE, 47507988762624, 47507988770815,
+ERASE, 47507988762624, 47507988762624,
+STORE, 47507988762624, 47507988770815,
+ERASE, 47507988770816, 47507988770816,
+STORE, 47507988770816, 47507988779007,
+STORE, 47507988779008, 47507988914175,
+ERASE, 47507988779008, 47507988779008,
+STORE, 47507988779008, 47507988803583,
+STORE, 47507988803584, 47507988914175,
+STORE, 47507988865024, 47507988914175,
+STORE, 47507988803584, 47507988865023,
+ERASE, 47507988803584, 47507988803584,
+STORE, 47507988803584, 47507988865023,
+STORE, 47507988889600, 47507988914175,
+STORE, 47507988865024, 47507988889599,
+ERASE, 47507988865024, 47507988865024,
+STORE, 47507988865024, 47507988914175,
+ERASE, 47507988865024, 47507988865024,
+STORE, 47507988865024, 47507988889599,
+STORE, 47507988889600, 47507988914175,
+STORE, 47507988897792, 47507988914175,
+STORE, 47507988889600, 47507988897791,
+ERASE, 47507988889600, 47507988889600,
+STORE, 47507988889600, 47507988897791,
+ERASE, 47507988897792, 47507988897792,
+STORE, 47507988897792, 47507988914175,
+STORE, 47507988897792, 47507988922367,
+STORE, 47507988922368, 47507989086207,
+ERASE, 47507988922368, 47507988922368,
+STORE, 47507988922368, 47507988934655,
+STORE, 47507988934656, 47507989086207,
+STORE, 47507989032960, 47507989086207,
+STORE, 47507988934656, 47507989032959,
+ERASE, 47507988934656, 47507988934656,
+STORE, 47507988934656, 47507989032959,
+STORE, 47507989078016, 47507989086207,
+STORE, 47507989032960, 47507989078015,
+ERASE, 47507989032960, 47507989032960,
+STORE, 47507989032960, 47507989086207,
+ERASE, 47507989032960, 47507989032960,
+STORE, 47507989032960, 47507989078015,
+STORE, 47507989078016, 47507989086207,
+ERASE, 47507989078016, 47507989078016,
+STORE, 47507989078016, 47507989086207,
+STORE, 47507989086208, 47507989684223,
+STORE, 47507989204992, 47507989684223,
+STORE, 47507989086208, 47507989204991,
+ERASE, 47507989204992, 47507989204992,
+STORE, 47507989204992, 47507989630975,
+STORE, 47507989630976, 47507989684223,
+STORE, 47507989520384, 47507989630975,
+STORE, 47507989204992, 47507989520383,
+ERASE, 47507989204992, 47507989204992,
+STORE, 47507989204992, 47507989520383,
+STORE, 47507989626880, 47507989630975,
+STORE, 47507989520384, 47507989626879,
+ERASE, 47507989520384, 47507989520384,
+STORE, 47507989520384, 47507989626879,
+ERASE, 47507989630976, 47507989630976,
+STORE, 47507989630976, 47507989684223,
+STORE, 47507989684224, 47507992735743,
+STORE, 47507990228992, 47507992735743,
+STORE, 47507989684224, 47507990228991,
+ERASE, 47507990228992, 47507990228992,
+STORE, 47507990228992, 47507992514559,
+STORE, 47507992514560, 47507992735743,
+STORE, 47507991924736, 47507992514559,
+STORE, 47507990228992, 47507991924735,
+ERASE, 47507990228992, 47507990228992,
+STORE, 47507990228992, 47507991924735,
+STORE, 47507992510464, 47507992514559,
+STORE, 47507991924736, 47507992510463,
+ERASE, 47507991924736, 47507991924736,
+STORE, 47507991924736, 47507992510463,
+STORE, 47507992719360, 47507992735743,
+STORE, 47507992514560, 47507992719359,
+ERASE, 47507992514560, 47507992514560,
+STORE, 47507992514560, 47507992719359,
+ERASE, 47507992719360, 47507992719360,
+STORE, 47507992719360, 47507992735743,
+STORE, 47507992735744, 47507992768511,
+ERASE, 47507992735744, 47507992735744,
+STORE, 47507992735744, 47507992743935,
+STORE, 47507992743936, 47507992768511,
+STORE, 47507992756224, 47507992768511,
+STORE, 47507992743936, 47507992756223,
+ERASE, 47507992743936, 47507992743936,
+STORE, 47507992743936, 47507992756223,
+STORE, 47507992760320, 47507992768511,
+STORE, 47507992756224, 47507992760319,
+ERASE, 47507992756224, 47507992756224,
+STORE, 47507992756224, 47507992768511,
+ERASE, 47507992756224, 47507992756224,
+STORE, 47507992756224, 47507992760319,
+STORE, 47507992760320, 47507992768511,
+ERASE, 47507992760320, 47507992760320,
+STORE, 47507992760320, 47507992768511,
+STORE, 47507992768512, 47507992805375,
+ERASE, 47507992768512, 47507992768512,
+STORE, 47507992768512, 47507992776703,
+STORE, 47507992776704, 47507992805375,
+STORE, 47507992793088, 47507992805375,
+STORE, 47507992776704, 47507992793087,
+ERASE, 47507992776704, 47507992776704,
+STORE, 47507992776704, 47507992793087,
+STORE, 47507992797184, 47507992805375,
+STORE, 47507992793088, 47507992797183,
+ERASE, 47507992793088, 47507992793088,
+STORE, 47507992793088, 47507992805375,
+ERASE, 47507992793088, 47507992793088,
+STORE, 47507992793088, 47507992797183,
+STORE, 47507992797184, 47507992805375,
+ERASE, 47507992797184, 47507992797184,
+STORE, 47507992797184, 47507992805375,
+STORE, 47507992805376, 47507993280511,
+ERASE, 47507992805376, 47507992805376,
+STORE, 47507992805376, 47507992813567,
+STORE, 47507992813568, 47507993280511,
+STORE, 47507993149440, 47507993280511,
+STORE, 47507992813568, 47507993149439,
+ERASE, 47507992813568, 47507992813568,
+STORE, 47507992813568, 47507993149439,
+STORE, 47507993272320, 47507993280511,
+STORE, 47507993149440, 47507993272319,
+ERASE, 47507993149440, 47507993149440,
+STORE, 47507993149440, 47507993280511,
+ERASE, 47507993149440, 47507993149440,
+STORE, 47507993149440, 47507993272319,
+STORE, 47507993272320, 47507993280511,
+ERASE, 47507993272320, 47507993272320,
+STORE, 47507993272320, 47507993280511,
+STORE, 47507993280512, 47507993288703,
+STORE, 47507993288704, 47507993309183,
+ERASE, 47507993288704, 47507993288704,
+STORE, 47507993288704, 47507993292799,
+STORE, 47507993292800, 47507993309183,
+STORE, 47507993296896, 47507993309183,
+STORE, 47507993292800, 47507993296895,
+ERASE, 47507993292800, 47507993292800,
+STORE, 47507993292800, 47507993296895,
+STORE, 47507993300992, 47507993309183,
+STORE, 47507993296896, 47507993300991,
+ERASE, 47507993296896, 47507993296896,
+STORE, 47507993296896, 47507993309183,
+ERASE, 47507993296896, 47507993296896,
+STORE, 47507993296896, 47507993300991,
+STORE, 47507993300992, 47507993309183,
+ERASE, 47507993300992, 47507993300992,
+STORE, 47507993300992, 47507993309183,
+STORE, 47507993309184, 47507993317375,
+ERASE, 47507985973248, 47507985973248,
+STORE, 47507985973248, 47507985989631,
+STORE, 47507985989632, 47507985997823,
+ERASE, 47507993300992, 47507993300992,
+STORE, 47507993300992, 47507993305087,
+STORE, 47507993305088, 47507993309183,
+ERASE, 47507988889600, 47507988889600,
+STORE, 47507988889600, 47507988893695,
+STORE, 47507988893696, 47507988897791,
+ERASE, 47507993272320, 47507993272320,
+STORE, 47507993272320, 47507993276415,
+STORE, 47507993276416, 47507993280511,
+ERASE, 47507992797184, 47507992797184,
+STORE, 47507992797184, 47507992801279,
+STORE, 47507992801280, 47507992805375,
+ERASE, 47507992760320, 47507992760320,
+STORE, 47507992760320, 47507992764415,
+STORE, 47507992764416, 47507992768511,
+ERASE, 47507992514560, 47507992514560,
+STORE, 47507992514560, 47507992711167,
+STORE, 47507992711168, 47507992719359,
+ERASE, 47507989630976, 47507989630976,
+STORE, 47507989630976, 47507989667839,
+STORE, 47507989667840, 47507989684223,
+ERASE, 47507989078016, 47507989078016,
+STORE, 47507989078016, 47507989082111,
+STORE, 47507989082112, 47507989086207,
+ERASE, 47507988762624, 47507988762624,
+STORE, 47507988762624, 47507988766719,
+STORE, 47507988766720, 47507988770815,
+ERASE, 47507986493440, 47507986493440,
+STORE, 47507986493440, 47507986513919,
+STORE, 47507986513920, 47507986518015,
+ERASE, 47507986161664, 47507986161664,
+STORE, 47507986161664, 47507986165759,
+STORE, 47507986165760, 47507986169855,
+ERASE, 47507986116608, 47507986116608,
+STORE, 47507986116608, 47507986120703,
+STORE, 47507986120704, 47507986124799,
+ERASE, 94386579570688, 94386579570688,
+STORE, 94386579570688, 94386579693567,
+STORE, 94386579693568, 94386579697663,
+ERASE, 140124810997760, 140124810997760,
+STORE, 140124810997760, 140124811001855,
+STORE, 140124811001856, 140124811005951,
+ERASE, 47507984158720, 47507984158720,
+STORE, 94386583982080, 94386584117247,
+STORE, 94386583982080, 94386584256511,
+ERASE, 94386583982080, 94386583982080,
+STORE, 94386583982080, 94386584223743,
+STORE, 94386584223744, 94386584256511,
+ERASE, 94386584223744, 94386584223744,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733763395584, 140737488351231,
+ERASE, 140733763395584, 140733763395584,
+STORE, 140733763395584, 140733763399679,
+STORE, 94011546472448, 94011547152383,
+ERASE, 94011546472448, 94011546472448,
+STORE, 94011546472448, 94011546537983,
+STORE, 94011546537984, 94011547152383,
+ERASE, 94011546537984, 94011546537984,
+STORE, 94011546537984, 94011546886143,
+STORE, 94011546886144, 94011547025407,
+STORE, 94011547025408, 94011547152383,
+STORE, 139757597949952, 139757598121983,
+ERASE, 139757597949952, 139757597949952,
+STORE, 139757597949952, 139757597954047,
+STORE, 139757597954048, 139757598121983,
+ERASE, 139757597954048, 139757597954048,
+STORE, 139757597954048, 139757598076927,
+STORE, 139757598076928, 139757598109695,
+STORE, 139757598109696, 139757598117887,
+STORE, 139757598117888, 139757598121983,
+STORE, 140733763596288, 140733763600383,
+STORE, 140733763584000, 140733763596287,
+STORE, 47875197046784, 47875197054975,
+STORE, 47875197054976, 47875197063167,
+STORE, 47875197063168, 47875198902271,
+STORE, 47875197202432, 47875198902271,
+STORE, 47875197063168, 47875197202431,
+ERASE, 47875197202432, 47875197202432,
+STORE, 47875197202432, 47875198861311,
+STORE, 47875198861312, 47875198902271,
+STORE, 47875198545920, 47875198861311,
+STORE, 47875197202432, 47875198545919,
+ERASE, 47875197202432, 47875197202432,
+STORE, 47875197202432, 47875198545919,
+STORE, 47875198857216, 47875198861311,
+STORE, 47875198545920, 47875198857215,
+ERASE, 47875198545920, 47875198545920,
+STORE, 47875198545920, 47875198857215,
+STORE, 47875198885888, 47875198902271,
+STORE, 47875198861312, 47875198885887,
+ERASE, 47875198861312, 47875198861312,
+STORE, 47875198861312, 47875198885887,
+ERASE, 47875198885888, 47875198885888,
+STORE, 47875198885888, 47875198902271,
+STORE, 47875198902272, 47875199012863,
+STORE, 47875198918656, 47875199012863,
+STORE, 47875198902272, 47875198918655,
+ERASE, 47875198918656, 47875198918656,
+STORE, 47875198918656, 47875199004671,
+STORE, 47875199004672, 47875199012863,
+STORE, 47875198980096, 47875199004671,
+STORE, 47875198918656, 47875198980095,
+ERASE, 47875198918656, 47875198918656,
+STORE, 47875198918656, 47875198980095,
+STORE, 47875199000576, 47875199004671,
+STORE, 47875198980096, 47875199000575,
+ERASE, 47875198980096, 47875198980096,
+STORE, 47875198980096, 47875199000575,
+ERASE, 47875199004672, 47875199004672,
+STORE, 47875199004672, 47875199012863,
+STORE, 47875199012864, 47875199057919,
+ERASE, 47875199012864, 47875199012864,
+STORE, 47875199012864, 47875199021055,
+STORE, 47875199021056, 47875199057919,
+STORE, 47875199041536, 47875199057919,
+STORE, 47875199021056, 47875199041535,
+ERASE, 47875199021056, 47875199021056,
+STORE, 47875199021056, 47875199041535,
+STORE, 47875199049728, 47875199057919,
+STORE, 47875199041536, 47875199049727,
+ERASE, 47875199041536, 47875199041536,
+STORE, 47875199041536, 47875199057919,
+ERASE, 47875199041536, 47875199041536,
+STORE, 47875199041536, 47875199049727,
+STORE, 47875199049728, 47875199057919,
+ERASE, 47875199049728, 47875199049728,
+STORE, 47875199049728, 47875199057919,
+STORE, 47875199057920, 47875199406079,
+STORE, 47875199098880, 47875199406079,
+STORE, 47875199057920, 47875199098879,
+ERASE, 47875199098880, 47875199098880,
+STORE, 47875199098880, 47875199381503,
+STORE, 47875199381504, 47875199406079,
+STORE, 47875199311872, 47875199381503,
+STORE, 47875199098880, 47875199311871,
+ERASE, 47875199098880, 47875199098880,
+STORE, 47875199098880, 47875199311871,
+STORE, 47875199377408, 47875199381503,
+STORE, 47875199311872, 47875199377407,
+ERASE, 47875199311872, 47875199311872,
+STORE, 47875199311872, 47875199377407,
+ERASE, 47875199381504, 47875199381504,
+STORE, 47875199381504, 47875199406079,
+STORE, 47875199406080, 47875201667071,
+STORE, 47875199557632, 47875201667071,
+STORE, 47875199406080, 47875199557631,
+ERASE, 47875199557632, 47875199557632,
+STORE, 47875199557632, 47875201650687,
+STORE, 47875201650688, 47875201667071,
+STORE, 47875201658880, 47875201667071,
+STORE, 47875201650688, 47875201658879,
+ERASE, 47875201650688, 47875201650688,
+STORE, 47875201650688, 47875201658879,
+ERASE, 47875201658880, 47875201658880,
+STORE, 47875201658880, 47875201667071,
+STORE, 47875201667072, 47875201802239,
+ERASE, 47875201667072, 47875201667072,
+STORE, 47875201667072, 47875201691647,
+STORE, 47875201691648, 47875201802239,
+STORE, 47875201753088, 47875201802239,
+STORE, 47875201691648, 47875201753087,
+ERASE, 47875201691648, 47875201691648,
+STORE, 47875201691648, 47875201753087,
+STORE, 47875201777664, 47875201802239,
+STORE, 47875201753088, 47875201777663,
+ERASE, 47875201753088, 47875201753088,
+STORE, 47875201753088, 47875201802239,
+ERASE, 47875201753088, 47875201753088,
+STORE, 47875201753088, 47875201777663,
+STORE, 47875201777664, 47875201802239,
+STORE, 47875201785856, 47875201802239,
+STORE, 47875201777664, 47875201785855,
+ERASE, 47875201777664, 47875201777664,
+STORE, 47875201777664, 47875201785855,
+ERASE, 47875201785856, 47875201785856,
+STORE, 47875201785856, 47875201802239,
+STORE, 47875201785856, 47875201810431,
+STORE, 47875201810432, 47875201974271,
+ERASE, 47875201810432, 47875201810432,
+STORE, 47875201810432, 47875201822719,
+STORE, 47875201822720, 47875201974271,
+STORE, 47875201921024, 47875201974271,
+STORE, 47875201822720, 47875201921023,
+ERASE, 47875201822720, 47875201822720,
+STORE, 47875201822720, 47875201921023,
+STORE, 47875201966080, 47875201974271,
+STORE, 47875201921024, 47875201966079,
+ERASE, 47875201921024, 47875201921024,
+STORE, 47875201921024, 47875201974271,
+ERASE, 47875201921024, 47875201921024,
+STORE, 47875201921024, 47875201966079,
+STORE, 47875201966080, 47875201974271,
+ERASE, 47875201966080, 47875201966080,
+STORE, 47875201966080, 47875201974271,
+STORE, 47875201974272, 47875202572287,
+STORE, 47875202093056, 47875202572287,
+STORE, 47875201974272, 47875202093055,
+ERASE, 47875202093056, 47875202093056,
+STORE, 47875202093056, 47875202519039,
+STORE, 47875202519040, 47875202572287,
+STORE, 47875202408448, 47875202519039,
+STORE, 47875202093056, 47875202408447,
+ERASE, 47875202093056, 47875202093056,
+STORE, 47875202093056, 47875202408447,
+STORE, 47875202514944, 47875202519039,
+STORE, 47875202408448, 47875202514943,
+ERASE, 47875202408448, 47875202408448,
+STORE, 47875202408448, 47875202514943,
+ERASE, 47875202519040, 47875202519040,
+STORE, 47875202519040, 47875202572287,
+STORE, 47875202572288, 47875205623807,
+STORE, 47875203117056, 47875205623807,
+STORE, 47875202572288, 47875203117055,
+ERASE, 47875203117056, 47875203117056,
+STORE, 47875203117056, 47875205402623,
+STORE, 47875205402624, 47875205623807,
+STORE, 47875204812800, 47875205402623,
+STORE, 47875203117056, 47875204812799,
+ERASE, 47875203117056, 47875203117056,
+STORE, 47875203117056, 47875204812799,
+STORE, 47875205398528, 47875205402623,
+STORE, 47875204812800, 47875205398527,
+ERASE, 47875204812800, 47875204812800,
+STORE, 47875204812800, 47875205398527,
+STORE, 47875205607424, 47875205623807,
+STORE, 47875205402624, 47875205607423,
+ERASE, 47875205402624, 47875205402624,
+STORE, 47875205402624, 47875205607423,
+ERASE, 47875205607424, 47875205607424,
+STORE, 47875205607424, 47875205623807,
+STORE, 47875205623808, 47875205656575,
+ERASE, 47875205623808, 47875205623808,
+STORE, 47875205623808, 47875205631999,
+STORE, 47875205632000, 47875205656575,
+STORE, 47875205644288, 47875205656575,
+STORE, 47875205632000, 47875205644287,
+ERASE, 47875205632000, 47875205632000,
+STORE, 47875205632000, 47875205644287,
+STORE, 47875205648384, 47875205656575,
+STORE, 47875205644288, 47875205648383,
+ERASE, 47875205644288, 47875205644288,
+STORE, 47875205644288, 47875205656575,
+ERASE, 47875205644288, 47875205644288,
+STORE, 47875205644288, 47875205648383,
+STORE, 47875205648384, 47875205656575,
+ERASE, 47875205648384, 47875205648384,
+STORE, 47875205648384, 47875205656575,
+STORE, 47875205656576, 47875205693439,
+ERASE, 47875205656576, 47875205656576,
+STORE, 47875205656576, 47875205664767,
+STORE, 47875205664768, 47875205693439,
+STORE, 47875205681152, 47875205693439,
+STORE, 47875205664768, 47875205681151,
+ERASE, 47875205664768, 47875205664768,
+STORE, 47875205664768, 47875205681151,
+STORE, 47875205685248, 47875205693439,
+STORE, 47875205681152, 47875205685247,
+ERASE, 47875205681152, 47875205681152,
+STORE, 47875205681152, 47875205693439,
+ERASE, 47875205681152, 47875205681152,
+STORE, 47875205681152, 47875205685247,
+STORE, 47875205685248, 47875205693439,
+ERASE, 47875205685248, 47875205685248,
+STORE, 47875205685248, 47875205693439,
+STORE, 47875205693440, 47875206168575,
+ERASE, 47875205693440, 47875205693440,
+STORE, 47875205693440, 47875205701631,
+STORE, 47875205701632, 47875206168575,
+STORE, 47875206037504, 47875206168575,
+STORE, 47875205701632, 47875206037503,
+ERASE, 47875205701632, 47875205701632,
+STORE, 47875205701632, 47875206037503,
+STORE, 47875206160384, 47875206168575,
+STORE, 47875206037504, 47875206160383,
+ERASE, 47875206037504, 47875206037504,
+STORE, 47875206037504, 47875206168575,
+ERASE, 47875206037504, 47875206037504,
+STORE, 47875206037504, 47875206160383,
+STORE, 47875206160384, 47875206168575,
+ERASE, 47875206160384, 47875206160384,
+STORE, 47875206160384, 47875206168575,
+STORE, 47875206168576, 47875206176767,
+STORE, 47875206176768, 47875206197247,
+ERASE, 47875206176768, 47875206176768,
+STORE, 47875206176768, 47875206180863,
+STORE, 47875206180864, 47875206197247,
+STORE, 47875206184960, 47875206197247,
+STORE, 47875206180864, 47875206184959,
+ERASE, 47875206180864, 47875206180864,
+STORE, 47875206180864, 47875206184959,
+STORE, 47875206189056, 47875206197247,
+STORE, 47875206184960, 47875206189055,
+ERASE, 47875206184960, 47875206184960,
+STORE, 47875206184960, 47875206197247,
+ERASE, 47875206184960, 47875206184960,
+STORE, 47875206184960, 47875206189055,
+STORE, 47875206189056, 47875206197247,
+ERASE, 47875206189056, 47875206189056,
+STORE, 47875206189056, 47875206197247,
+STORE, 47875206197248, 47875206205439,
+ERASE, 47875198861312, 47875198861312,
+STORE, 47875198861312, 47875198877695,
+STORE, 47875198877696, 47875198885887,
+ERASE, 47875206189056, 47875206189056,
+STORE, 47875206189056, 47875206193151,
+STORE, 47875206193152, 47875206197247,
+ERASE, 47875201777664, 47875201777664,
+STORE, 47875201777664, 47875201781759,
+STORE, 47875201781760, 47875201785855,
+ERASE, 47875206160384, 47875206160384,
+STORE, 47875206160384, 47875206164479,
+STORE, 47875206164480, 47875206168575,
+ERASE, 47875205685248, 47875205685248,
+STORE, 47875205685248, 47875205689343,
+STORE, 47875205689344, 47875205693439,
+ERASE, 47875205648384, 47875205648384,
+STORE, 47875205648384, 47875205652479,
+STORE, 47875205652480, 47875205656575,
+ERASE, 47875205402624, 47875205402624,
+STORE, 47875205402624, 47875205599231,
+STORE, 47875205599232, 47875205607423,
+ERASE, 47875202519040, 47875202519040,
+STORE, 47875202519040, 47875202555903,
+STORE, 47875202555904, 47875202572287,
+ERASE, 47875201966080, 47875201966080,
+STORE, 47875201966080, 47875201970175,
+STORE, 47875201970176, 47875201974271,
+ERASE, 47875201650688, 47875201650688,
+STORE, 47875201650688, 47875201654783,
+STORE, 47875201654784, 47875201658879,
+ERASE, 47875199381504, 47875199381504,
+STORE, 47875199381504, 47875199401983,
+STORE, 47875199401984, 47875199406079,
+ERASE, 47875199049728, 47875199049728,
+STORE, 47875199049728, 47875199053823,
+STORE, 47875199053824, 47875199057919,
+ERASE, 47875199004672, 47875199004672,
+STORE, 47875199004672, 47875199008767,
+STORE, 47875199008768, 47875199012863,
+ERASE, 94011547025408, 94011547025408,
+STORE, 94011547025408, 94011547148287,
+STORE, 94011547148288, 94011547152383,
+ERASE, 139757598109696, 139757598109696,
+STORE, 139757598109696, 139757598113791,
+STORE, 139757598113792, 139757598117887,
+ERASE, 47875197046784, 47875197046784,
+STORE, 94011557584896, 94011557720063,
+STORE, 94011557584896, 94011557855231,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557851135,
+STORE, 94011557851136, 94011557855231,
+ERASE, 94011557851136, 94011557851136,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557847039,
+STORE, 94011557847040, 94011557851135,
+ERASE, 94011557847040, 94011557847040,
+STORE, 94011557584896, 94011557982207,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557978111,
+STORE, 94011557978112, 94011557982207,
+ERASE, 94011557978112, 94011557978112,
+ERASE, 94011557584896, 94011557584896,
+STORE, 94011557584896, 94011557974015,
+STORE, 94011557974016, 94011557978111,
+ERASE, 94011557974016, 94011557974016,
+STORE, 140737488347136, 140737488351231,
+STORE, 140734130360320, 140737488351231,
+ERASE, 140734130360320, 140734130360320,
+STORE, 140734130360320, 140734130364415,
+STORE, 94641232105472, 94641232785407,
+ERASE, 94641232105472, 94641232105472,
+STORE, 94641232105472, 94641232171007,
+STORE, 94641232171008, 94641232785407,
+ERASE, 94641232171008, 94641232171008,
+STORE, 94641232171008, 94641232519167,
+STORE, 94641232519168, 94641232658431,
+STORE, 94641232658432, 94641232785407,
+STORE, 139726599516160, 139726599688191,
+ERASE, 139726599516160, 139726599516160,
+STORE, 139726599516160, 139726599520255,
+STORE, 139726599520256, 139726599688191,
+ERASE, 139726599520256, 139726599520256,
+STORE, 139726599520256, 139726599643135,
+STORE, 139726599643136, 139726599675903,
+STORE, 139726599675904, 139726599684095,
+STORE, 139726599684096, 139726599688191,
+STORE, 140734130446336, 140734130450431,
+STORE, 140734130434048, 140734130446335,
+STORE, 47906195480576, 47906195488767,
+STORE, 47906195488768, 47906195496959,
+STORE, 47906195496960, 47906197336063,
+STORE, 47906195636224, 47906197336063,
+STORE, 47906195496960, 47906195636223,
+ERASE, 47906195636224, 47906195636224,
+STORE, 47906195636224, 47906197295103,
+STORE, 47906197295104, 47906197336063,
+STORE, 47906196979712, 47906197295103,
+STORE, 47906195636224, 47906196979711,
+ERASE, 47906195636224, 47906195636224,
+STORE, 47906195636224, 47906196979711,
+STORE, 47906197291008, 47906197295103,
+STORE, 47906196979712, 47906197291007,
+ERASE, 47906196979712, 47906196979712,
+STORE, 47906196979712, 47906197291007,
+STORE, 47906197319680, 47906197336063,
+STORE, 47906197295104, 47906197319679,
+ERASE, 47906197295104, 47906197295104,
+STORE, 47906197295104, 47906197319679,
+ERASE, 47906197319680, 47906197319680,
+STORE, 47906197319680, 47906197336063,
+STORE, 47906197336064, 47906197446655,
+STORE, 47906197352448, 47906197446655,
+STORE, 47906197336064, 47906197352447,
+ERASE, 47906197352448, 47906197352448,
+STORE, 47906197352448, 47906197438463,
+STORE, 47906197438464, 47906197446655,
+STORE, 47906197413888, 47906197438463,
+STORE, 47906197352448, 47906197413887,
+ERASE, 47906197352448, 47906197352448,
+STORE, 47906197352448, 47906197413887,
+STORE, 47906197434368, 47906197438463,
+STORE, 47906197413888, 47906197434367,
+ERASE, 47906197413888, 47906197413888,
+STORE, 47906197413888, 47906197434367,
+ERASE, 47906197438464, 47906197438464,
+STORE, 47906197438464, 47906197446655,
+STORE, 47906197446656, 47906197491711,
+ERASE, 47906197446656, 47906197446656,
+STORE, 47906197446656, 47906197454847,
+STORE, 47906197454848, 47906197491711,
+STORE, 47906197475328, 47906197491711,
+STORE, 47906197454848, 47906197475327,
+ERASE, 47906197454848, 47906197454848,
+STORE, 47906197454848, 47906197475327,
+STORE, 47906197483520, 47906197491711,
+STORE, 47906197475328, 47906197483519,
+ERASE, 47906197475328, 47906197475328,
+STORE, 47906197475328, 47906197491711,
+ERASE, 47906197475328, 47906197475328,
+STORE, 47906197475328, 47906197483519,
+STORE, 47906197483520, 47906197491711,
+ERASE, 47906197483520, 47906197483520,
+STORE, 47906197483520, 47906197491711,
+STORE, 47906197491712, 47906197839871,
+STORE, 47906197532672, 47906197839871,
+STORE, 47906197491712, 47906197532671,
+ERASE, 47906197532672, 47906197532672,
+STORE, 47906197532672, 47906197815295,
+STORE, 47906197815296, 47906197839871,
+STORE, 47906197745664, 47906197815295,
+STORE, 47906197532672, 47906197745663,
+ERASE, 47906197532672, 47906197532672,
+STORE, 47906197532672, 47906197745663,
+STORE, 47906197811200, 47906197815295,
+STORE, 47906197745664, 47906197811199,
+ERASE, 47906197745664, 47906197745664,
+STORE, 47906197745664, 47906197811199,
+ERASE, 47906197815296, 47906197815296,
+STORE, 47906197815296, 47906197839871,
+STORE, 47906197839872, 47906200100863,
+STORE, 47906197991424, 47906200100863,
+STORE, 47906197839872, 47906197991423,
+ERASE, 47906197991424, 47906197991424,
+STORE, 47906197991424, 47906200084479,
+STORE, 47906200084480, 47906200100863,
+STORE, 47906200092672, 47906200100863,
+STORE, 47906200084480, 47906200092671,
+ERASE, 47906200084480, 47906200084480,
+STORE, 47906200084480, 47906200092671,
+ERASE, 47906200092672, 47906200092672,
+STORE, 47906200092672, 47906200100863,
+STORE, 47906200100864, 47906200236031,
+ERASE, 47906200100864, 47906200100864,
+STORE, 47906200100864, 47906200125439,
+STORE, 47906200125440, 47906200236031,
+STORE, 47906200186880, 47906200236031,
+STORE, 47906200125440, 47906200186879,
+ERASE, 47906200125440, 47906200125440,
+STORE, 47906200125440, 47906200186879,
+STORE, 47906200211456, 47906200236031,
+STORE, 47906200186880, 47906200211455,
+ERASE, 47906200186880, 47906200186880,
+STORE, 47906200186880, 47906200236031,
+ERASE, 47906200186880, 47906200186880,
+STORE, 47906200186880, 47906200211455,
+STORE, 47906200211456, 47906200236031,
+STORE, 47906200219648, 47906200236031,
+STORE, 47906200211456, 47906200219647,
+ERASE, 47906200211456, 47906200211456,
+STORE, 47906200211456, 47906200219647,
+ERASE, 47906200219648, 47906200219648,
+STORE, 47906200219648, 47906200236031,
+STORE, 47906200219648, 47906200244223,
+STORE, 47906200244224, 47906200408063,
+ERASE, 47906200244224, 47906200244224,
+STORE, 47906200244224, 47906200256511,
+STORE, 47906200256512, 47906200408063,
+STORE, 47906200354816, 47906200408063,
+STORE, 47906200256512, 47906200354815,
+ERASE, 47906200256512, 47906200256512,
+STORE, 47906200256512, 47906200354815,
+STORE, 47906200399872, 47906200408063,
+STORE, 47906200354816, 47906200399871,
+ERASE, 47906200354816, 47906200354816,
+STORE, 47906200354816, 47906200408063,
+ERASE, 47906200354816, 47906200354816,
+STORE, 47906200354816, 47906200399871,
+STORE, 47906200399872, 47906200408063,
+ERASE, 47906200399872, 47906200399872,
+STORE, 47906200399872, 47906200408063,
+STORE, 47906200408064, 47906201006079,
+STORE, 47906200526848, 47906201006079,
+STORE, 47906200408064, 47906200526847,
+ERASE, 47906200526848, 47906200526848,
+STORE, 47906200526848, 47906200952831,
+STORE, 47906200952832, 47906201006079,
+STORE, 47906200842240, 47906200952831,
+STORE, 47906200526848, 47906200842239,
+ERASE, 47906200526848, 47906200526848,
+STORE, 47906200526848, 47906200842239,
+STORE, 47906200948736, 47906200952831,
+STORE, 47906200842240, 47906200948735,
+ERASE, 47906200842240, 47906200842240,
+STORE, 47906200842240, 47906200948735,
+ERASE, 47906200952832, 47906200952832,
+STORE, 47906200952832, 47906201006079,
+STORE, 47906201006080, 47906204057599,
+STORE, 47906201550848, 47906204057599,
+STORE, 47906201006080, 47906201550847,
+ERASE, 47906201550848, 47906201550848,
+STORE, 47906201550848, 47906203836415,
+STORE, 47906203836416, 47906204057599,
+STORE, 47906203246592, 47906203836415,
+STORE, 47906201550848, 47906203246591,
+ERASE, 47906201550848, 47906201550848,
+STORE, 47906201550848, 47906203246591,
+STORE, 47906203832320, 47906203836415,
+STORE, 47906203246592, 47906203832319,
+ERASE, 47906203246592, 47906203246592,
+STORE, 47906203246592, 47906203832319,
+STORE, 47906204041216, 47906204057599,
+STORE, 47906203836416, 47906204041215,
+ERASE, 47906203836416, 47906203836416,
+STORE, 47906203836416, 47906204041215,
+ERASE, 47906204041216, 47906204041216,
+STORE, 47906204041216, 47906204057599,
+STORE, 47906204057600, 47906204090367,
+ERASE, 47906204057600, 47906204057600,
+STORE, 47906204057600, 47906204065791,
+STORE, 47906204065792, 47906204090367,
+STORE, 47906204078080, 47906204090367,
+STORE, 47906204065792, 47906204078079,
+ERASE, 47906204065792, 47906204065792,
+STORE, 47906204065792, 47906204078079,
+STORE, 47906204082176, 47906204090367,
+STORE, 47906204078080, 47906204082175,
+ERASE, 47906204078080, 47906204078080,
+STORE, 47906204078080, 47906204090367,
+ERASE, 47906204078080, 47906204078080,
+STORE, 47906204078080, 47906204082175,
+STORE, 47906204082176, 47906204090367,
+ERASE, 47906204082176, 47906204082176,
+STORE, 47906204082176, 47906204090367,
+STORE, 47906204090368, 47906204127231,
+ERASE, 47906204090368, 47906204090368,
+STORE, 47906204090368, 47906204098559,
+STORE, 47906204098560, 47906204127231,
+STORE, 47906204114944, 47906204127231,
+STORE, 47906204098560, 47906204114943,
+ERASE, 47906204098560, 47906204098560,
+STORE, 47906204098560, 47906204114943,
+STORE, 47906204119040, 47906204127231,
+STORE, 47906204114944, 47906204119039,
+ERASE, 47906204114944, 47906204114944,
+STORE, 47906204114944, 47906204127231,
+ERASE, 47906204114944, 47906204114944,
+STORE, 47906204114944, 47906204119039,
+STORE, 47906204119040, 47906204127231,
+ERASE, 47906204119040, 47906204119040,
+STORE, 47906204119040, 47906204127231,
+STORE, 47906204127232, 47906204602367,
+ERASE, 47906204127232, 47906204127232,
+STORE, 47906204127232, 47906204135423,
+STORE, 47906204135424, 47906204602367,
+STORE, 47906204471296, 47906204602367,
+STORE, 47906204135424, 47906204471295,
+ERASE, 47906204135424, 47906204135424,
+STORE, 47906204135424, 47906204471295,
+STORE, 47906204594176, 47906204602367,
+STORE, 47906204471296, 47906204594175,
+ERASE, 47906204471296, 47906204471296,
+STORE, 47906204471296, 47906204602367,
+ERASE, 47906204471296, 47906204471296,
+STORE, 47906204471296, 47906204594175,
+STORE, 47906204594176, 47906204602367,
+ERASE, 47906204594176, 47906204594176,
+STORE, 47906204594176, 47906204602367,
+STORE, 47906204602368, 47906204610559,
+STORE, 47906204610560, 47906204631039,
+ERASE, 47906204610560, 47906204610560,
+STORE, 47906204610560, 47906204614655,
+STORE, 47906204614656, 47906204631039,
+STORE, 47906204618752, 47906204631039,
+STORE, 47906204614656, 47906204618751,
+ERASE, 47906204614656, 47906204614656,
+STORE, 47906204614656, 47906204618751,
+STORE, 47906204622848, 47906204631039,
+STORE, 47906204618752, 47906204622847,
+ERASE, 47906204618752, 47906204618752,
+STORE, 47906204618752, 47906204631039,
+ERASE, 47906204618752, 47906204618752,
+STORE, 47906204618752, 47906204622847,
+STORE, 47906204622848, 47906204631039,
+ERASE, 47906204622848, 47906204622848,
+STORE, 47906204622848, 47906204631039,
+STORE, 47906204631040, 47906204639231,
+ERASE, 47906197295104, 47906197295104,
+STORE, 47906197295104, 47906197311487,
+STORE, 47906197311488, 47906197319679,
+ERASE, 47906204622848, 47906204622848,
+STORE, 47906204622848, 47906204626943,
+STORE, 47906204626944, 47906204631039,
+ERASE, 47906200211456, 47906200211456,
+STORE, 47906200211456, 47906200215551,
+STORE, 47906200215552, 47906200219647,
+ERASE, 47906204594176, 47906204594176,
+STORE, 47906204594176, 47906204598271,
+STORE, 47906204598272, 47906204602367,
+ERASE, 47906204119040, 47906204119040,
+STORE, 47906204119040, 47906204123135,
+STORE, 47906204123136, 47906204127231,
+ERASE, 47906204082176, 47906204082176,
+STORE, 47906204082176, 47906204086271,
+STORE, 47906204086272, 47906204090367,
+ERASE, 47906203836416, 47906203836416,
+STORE, 47906203836416, 47906204033023,
+STORE, 47906204033024, 47906204041215,
+ERASE, 47906200952832, 47906200952832,
+STORE, 47906200952832, 47906200989695,
+STORE, 47906200989696, 47906201006079,
+ERASE, 47906200399872, 47906200399872,
+STORE, 47906200399872, 47906200403967,
+STORE, 47906200403968, 47906200408063,
+ERASE, 47906200084480, 47906200084480,
+STORE, 47906200084480, 47906200088575,
+STORE, 47906200088576, 47906200092671,
+ERASE, 47906197815296, 47906197815296,
+STORE, 47906197815296, 47906197835775,
+STORE, 47906197835776, 47906197839871,
+ERASE, 47906197483520, 47906197483520,
+STORE, 47906197483520, 47906197487615,
+STORE, 47906197487616, 47906197491711,
+ERASE, 47906197438464, 47906197438464,
+STORE, 47906197438464, 47906197442559,
+STORE, 47906197442560, 47906197446655,
+ERASE, 94641232658432, 94641232658432,
+STORE, 94641232658432, 94641232781311,
+STORE, 94641232781312, 94641232785407,
+ERASE, 139726599675904, 139726599675904,
+STORE, 139726599675904, 139726599679999,
+STORE, 139726599680000, 139726599684095,
+ERASE, 47906195480576, 47906195480576,
+STORE, 94641242615808, 94641242750975,
+ };
+ static const unsigned long set11[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140732658499584, 140737488351231,
+ERASE, 140732658499584, 140732658499584,
+STORE, 140732658499584, 140732658503679,
+STORE, 94029856579584, 94029856751615,
+ERASE, 94029856579584, 94029856579584,
+STORE, 94029856579584, 94029856595967,
+STORE, 94029856595968, 94029856751615,
+ERASE, 94029856595968, 94029856595968,
+STORE, 94029856595968, 94029856698367,
+STORE, 94029856698368, 94029856739327,
+STORE, 94029856739328, 94029856751615,
+STORE, 140014592573440, 140014592745471,
+ERASE, 140014592573440, 140014592573440,
+STORE, 140014592573440, 140014592577535,
+STORE, 140014592577536, 140014592745471,
+ERASE, 140014592577536, 140014592577536,
+STORE, 140014592577536, 140014592700415,
+STORE, 140014592700416, 140014592733183,
+STORE, 140014592733184, 140014592741375,
+STORE, 140014592741376, 140014592745471,
+STORE, 140732658565120, 140732658569215,
+STORE, 140732658552832, 140732658565119,
+ };
+
+ static const unsigned long set12[] = { /* contains 12 values. */
+STORE, 140737488347136, 140737488351231,
+STORE, 140732658499584, 140737488351231,
+ERASE, 140732658499584, 140732658499584,
+STORE, 140732658499584, 140732658503679,
+STORE, 94029856579584, 94029856751615,
+ERASE, 94029856579584, 94029856579584,
+STORE, 94029856579584, 94029856595967,
+STORE, 94029856595968, 94029856751615,
+ERASE, 94029856595968, 94029856595968,
+STORE, 94029856595968, 94029856698367,
+STORE, 94029856698368, 94029856739327,
+STORE, 94029856739328, 94029856751615,
+STORE, 140014592573440, 140014592745471,
+ERASE, 140014592573440, 140014592573440,
+STORE, 140014592573440, 140014592577535,
+STORE, 140014592577536, 140014592745471,
+ERASE, 140014592577536, 140014592577536,
+STORE, 140014592577536, 140014592700415,
+STORE, 140014592700416, 140014592733183,
+STORE, 140014592733184, 140014592741375,
+STORE, 140014592741376, 140014592745471,
+STORE, 140732658565120, 140732658569215,
+STORE, 140732658552832, 140732658565119,
+STORE, 140014592741375, 140014592741375, /* contrived */
+STORE, 140014592733184, 140014592741376, /* creates first entry retry. */
+ };
+ static const unsigned long set13[] = {
+STORE, 140373516247040, 140373516251135,/*: ffffa2e7b0e10d80 */
+STORE, 140373516251136, 140373516255231,/*: ffffa2e7b1195d80 */
+STORE, 140373516255232, 140373516443647,/*: ffffa2e7b0e109c0 */
+STORE, 140373516443648, 140373516587007,/*: ffffa2e7b05fecc0 */
+STORE, 140373516963840, 140373518647295,/*: ffffa2e7bfbdcc00 */
+STORE, 140373518647296, 140373518663679,/*: ffffa2e7bf5d59c0 */
+STORE, 140373518663680, 140373518684159,/*: deleted (257) */
+STORE, 140373518680064, 140373518684159,/*: ffffa2e7b0e1cb40 */
+STORE, 140373518684160, 140373518688254,/*: ffffa2e7b05fec00 */
+STORE, 140373518688256, 140373518692351,/*: ffffa2e7bfbdcd80 */
+STORE, 140373518692352, 140373518696447,/*: ffffa2e7b0749e40 */
+ };
+ static const unsigned long set14[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140731667996672, 140737488351231,
+SNULL, 140731668000767, 140737488351231,
+STORE, 140731667996672, 140731668000767,
+STORE, 140731667865600, 140731668000767,
+STORE, 94077521272832, 94077521313791,
+SNULL, 94077521301503, 94077521313791,
+STORE, 94077521272832, 94077521301503,
+STORE, 94077521301504, 94077521313791,
+ERASE, 94077521301504, 94077521313791,
+STORE, 94077521305600, 94077521313791,
+STORE, 139826134630400, 139826136883199,
+SNULL, 139826134773759, 139826136883199,
+STORE, 139826134630400, 139826134773759,
+STORE, 139826134773760, 139826136883199,
+ERASE, 139826134773760, 139826136883199,
+STORE, 139826136870912, 139826136879103,
+STORE, 139826136879104, 139826136883199,
+STORE, 140731668013056, 140731668017151,
+STORE, 140731668000768, 140731668013055,
+STORE, 139826136862720, 139826136870911,
+STORE, 139826132406272, 139826134630399,
+SNULL, 139826134056959, 139826134630399,
+STORE, 139826132406272, 139826134056959,
+STORE, 139826134056960, 139826134630399,
+SNULL, 139826134056960, 139826134626303,
+STORE, 139826134626304, 139826134630399,
+STORE, 139826134056960, 139826134626303,
+ERASE, 139826134056960, 139826134626303,
+STORE, 139826134056960, 139826134626303,
+ERASE, 139826134626304, 139826134630399,
+STORE, 139826134626304, 139826134630399,
+STORE, 139826136842240, 139826136862719,
+STORE, 139826130022400, 139826132406271,
+SNULL, 139826130022400, 139826130288639,
+STORE, 139826130288640, 139826132406271,
+STORE, 139826130022400, 139826130288639,
+SNULL, 139826132381695, 139826132406271,
+STORE, 139826130288640, 139826132381695,
+STORE, 139826132381696, 139826132406271,
+SNULL, 139826132381696, 139826132402175,
+STORE, 139826132402176, 139826132406271,
+STORE, 139826132381696, 139826132402175,
+ERASE, 139826132381696, 139826132402175,
+STORE, 139826132381696, 139826132402175,
+ERASE, 139826132402176, 139826132406271,
+STORE, 139826132402176, 139826132406271,
+STORE, 139826127806464, 139826130022399,
+SNULL, 139826127806464, 139826127904767,
+STORE, 139826127904768, 139826130022399,
+STORE, 139826127806464, 139826127904767,
+SNULL, 139826129997823, 139826130022399,
+STORE, 139826127904768, 139826129997823,
+STORE, 139826129997824, 139826130022399,
+SNULL, 139826129997824, 139826130006015,
+STORE, 139826130006016, 139826130022399,
+STORE, 139826129997824, 139826130006015,
+ERASE, 139826129997824, 139826130006015,
+STORE, 139826129997824, 139826130006015,
+ERASE, 139826130006016, 139826130022399,
+STORE, 139826130006016, 139826130022399,
+STORE, 139826124009472, 139826127806463,
+SNULL, 139826124009472, 139826125668351,
+STORE, 139826125668352, 139826127806463,
+STORE, 139826124009472, 139826125668351,
+SNULL, 139826127765503, 139826127806463,
+STORE, 139826125668352, 139826127765503,
+STORE, 139826127765504, 139826127806463,
+SNULL, 139826127765504, 139826127790079,
+STORE, 139826127790080, 139826127806463,
+STORE, 139826127765504, 139826127790079,
+ERASE, 139826127765504, 139826127790079,
+STORE, 139826127765504, 139826127790079,
+ERASE, 139826127790080, 139826127806463,
+STORE, 139826127790080, 139826127806463,
+STORE, 139826121748480, 139826124009471,
+SNULL, 139826121748480, 139826121900031,
+STORE, 139826121900032, 139826124009471,
+STORE, 139826121748480, 139826121900031,
+SNULL, 139826123993087, 139826124009471,
+STORE, 139826121900032, 139826123993087,
+STORE, 139826123993088, 139826124009471,
+SNULL, 139826123993088, 139826124001279,
+STORE, 139826124001280, 139826124009471,
+STORE, 139826123993088, 139826124001279,
+ERASE, 139826123993088, 139826124001279,
+STORE, 139826123993088, 139826124001279,
+ERASE, 139826124001280, 139826124009471,
+STORE, 139826124001280, 139826124009471,
+STORE, 139826119626752, 139826121748479,
+SNULL, 139826119626752, 139826119643135,
+STORE, 139826119643136, 139826121748479,
+STORE, 139826119626752, 139826119643135,
+SNULL, 139826121740287, 139826121748479,
+STORE, 139826119643136, 139826121740287,
+STORE, 139826121740288, 139826121748479,
+ERASE, 139826121740288, 139826121748479,
+STORE, 139826121740288, 139826121748479,
+STORE, 139826136834048, 139826136842239,
+STORE, 139826117496832, 139826119626751,
+SNULL, 139826117496832, 139826117525503,
+STORE, 139826117525504, 139826119626751,
+STORE, 139826117496832, 139826117525503,
+SNULL, 139826119618559, 139826119626751,
+STORE, 139826117525504, 139826119618559,
+STORE, 139826119618560, 139826119626751,
+ERASE, 139826119618560, 139826119626751,
+STORE, 139826119618560, 139826119626751,
+STORE, 139826115244032, 139826117496831,
+SNULL, 139826115244032, 139826115395583,
+STORE, 139826115395584, 139826117496831,
+STORE, 139826115244032, 139826115395583,
+SNULL, 139826117488639, 139826117496831,
+STORE, 139826115395584, 139826117488639,
+STORE, 139826117488640, 139826117496831,
+ERASE, 139826117488640, 139826117496831,
+STORE, 139826117488640, 139826117496831,
+STORE, 139826113073152, 139826115244031,
+SNULL, 139826113073152, 139826113142783,
+STORE, 139826113142784, 139826115244031,
+STORE, 139826113073152, 139826113142783,
+SNULL, 139826115235839, 139826115244031,
+STORE, 139826113142784, 139826115235839,
+STORE, 139826115235840, 139826115244031,
+ERASE, 139826115235840, 139826115244031,
+STORE, 139826115235840, 139826115244031,
+STORE, 139826109861888, 139826113073151,
+SNULL, 139826109861888, 139826110939135,
+STORE, 139826110939136, 139826113073151,
+STORE, 139826109861888, 139826110939135,
+SNULL, 139826113036287, 139826113073151,
+STORE, 139826110939136, 139826113036287,
+STORE, 139826113036288, 139826113073151,
+ERASE, 139826113036288, 139826113073151,
+STORE, 139826113036288, 139826113073151,
+STORE, 139826107727872, 139826109861887,
+SNULL, 139826107727872, 139826107756543,
+STORE, 139826107756544, 139826109861887,
+STORE, 139826107727872, 139826107756543,
+SNULL, 139826109853695, 139826109861887,
+STORE, 139826107756544, 139826109853695,
+STORE, 139826109853696, 139826109861887,
+ERASE, 139826109853696, 139826109861887,
+STORE, 139826109853696, 139826109861887,
+STORE, 139826105417728, 139826107727871,
+SNULL, 139826105417728, 139826105622527,
+STORE, 139826105622528, 139826107727871,
+STORE, 139826105417728, 139826105622527,
+SNULL, 139826107719679, 139826107727871,
+STORE, 139826105622528, 139826107719679,
+STORE, 139826107719680, 139826107727871,
+ERASE, 139826107719680, 139826107727871,
+STORE, 139826107719680, 139826107727871,
+STORE, 139826136825856, 139826136842239,
+STORE, 139826103033856, 139826105417727,
+SNULL, 139826103033856, 139826103226367,
+STORE, 139826103226368, 139826105417727,
+STORE, 139826103033856, 139826103226367,
+SNULL, 139826105319423, 139826105417727,
+STORE, 139826103226368, 139826105319423,
+STORE, 139826105319424, 139826105417727,
+ERASE, 139826105319424, 139826105417727,
+STORE, 139826105319424, 139826105417727,
+STORE, 139826100916224, 139826103033855,
+SNULL, 139826100916224, 139826100932607,
+STORE, 139826100932608, 139826103033855,
+STORE, 139826100916224, 139826100932607,
+SNULL, 139826103025663, 139826103033855,
+STORE, 139826100932608, 139826103025663,
+STORE, 139826103025664, 139826103033855,
+ERASE, 139826103025664, 139826103033855,
+STORE, 139826103025664, 139826103033855,
+STORE, 139826098348032, 139826100916223,
+SNULL, 139826098348032, 139826098814975,
+STORE, 139826098814976, 139826100916223,
+STORE, 139826098348032, 139826098814975,
+SNULL, 139826100908031, 139826100916223,
+STORE, 139826098814976, 139826100908031,
+STORE, 139826100908032, 139826100916223,
+ERASE, 139826100908032, 139826100916223,
+STORE, 139826100908032, 139826100916223,
+STORE, 139826096234496, 139826098348031,
+SNULL, 139826096234496, 139826096246783,
+STORE, 139826096246784, 139826098348031,
+STORE, 139826096234496, 139826096246783,
+SNULL, 139826098339839, 139826098348031,
+STORE, 139826096246784, 139826098339839,
+STORE, 139826098339840, 139826098348031,
+ERASE, 139826098339840, 139826098348031,
+STORE, 139826098339840, 139826098348031,
+STORE, 139826094055424, 139826096234495,
+SNULL, 139826094055424, 139826094133247,
+STORE, 139826094133248, 139826096234495,
+STORE, 139826094055424, 139826094133247,
+SNULL, 139826096226303, 139826096234495,
+STORE, 139826094133248, 139826096226303,
+STORE, 139826096226304, 139826096234495,
+ERASE, 139826096226304, 139826096234495,
+STORE, 139826096226304, 139826096234495,
+STORE, 139826136817664, 139826136842239,
+STORE, 139826091937792, 139826094055423,
+SNULL, 139826091937792, 139826091954175,
+STORE, 139826091954176, 139826094055423,
+STORE, 139826091937792, 139826091954175,
+SNULL, 139826094047231, 139826094055423,
+STORE, 139826091954176, 139826094047231,
+STORE, 139826094047232, 139826094055423,
+ERASE, 139826094047232, 139826094055423,
+STORE, 139826094047232, 139826094055423,
+STORE, 139826136809472, 139826136842239,
+SNULL, 139826127781887, 139826127790079,
+STORE, 139826127765504, 139826127781887,
+STORE, 139826127781888, 139826127790079,
+SNULL, 139826094051327, 139826094055423,
+STORE, 139826094047232, 139826094051327,
+STORE, 139826094051328, 139826094055423,
+SNULL, 139826096230399, 139826096234495,
+STORE, 139826096226304, 139826096230399,
+STORE, 139826096230400, 139826096234495,
+SNULL, 139826098343935, 139826098348031,
+STORE, 139826098339840, 139826098343935,
+STORE, 139826098343936, 139826098348031,
+SNULL, 139826130001919, 139826130006015,
+STORE, 139826129997824, 139826130001919,
+STORE, 139826130001920, 139826130006015,
+SNULL, 139826100912127, 139826100916223,
+STORE, 139826100908032, 139826100912127,
+STORE, 139826100912128, 139826100916223,
+SNULL, 139826103029759, 139826103033855,
+STORE, 139826103025664, 139826103029759,
+STORE, 139826103029760, 139826103033855,
+SNULL, 139826105413631, 139826105417727,
+STORE, 139826105319424, 139826105413631,
+STORE, 139826105413632, 139826105417727,
+SNULL, 139826107723775, 139826107727871,
+STORE, 139826107719680, 139826107723775,
+STORE, 139826107723776, 139826107727871,
+SNULL, 139826109857791, 139826109861887,
+STORE, 139826109853696, 139826109857791,
+STORE, 139826109857792, 139826109861887,
+SNULL, 139826113044479, 139826113073151,
+STORE, 139826113036288, 139826113044479,
+STORE, 139826113044480, 139826113073151,
+SNULL, 139826115239935, 139826115244031,
+STORE, 139826115235840, 139826115239935,
+STORE, 139826115239936, 139826115244031,
+SNULL, 139826117492735, 139826117496831,
+STORE, 139826117488640, 139826117492735,
+STORE, 139826117492736, 139826117496831,
+SNULL, 139826119622655, 139826119626751,
+STORE, 139826119618560, 139826119622655,
+STORE, 139826119622656, 139826119626751,
+SNULL, 139826121744383, 139826121748479,
+STORE, 139826121740288, 139826121744383,
+STORE, 139826121744384, 139826121748479,
+SNULL, 139826123997183, 139826124001279,
+STORE, 139826123993088, 139826123997183,
+STORE, 139826123997184, 139826124001279,
+SNULL, 139826132398079, 139826132402175,
+STORE, 139826132381696, 139826132398079,
+STORE, 139826132398080, 139826132402175,
+SNULL, 139826134622207, 139826134626303,
+STORE, 139826134056960, 139826134622207,
+STORE, 139826134622208, 139826134626303,
+SNULL, 94077521309695, 94077521313791,
+STORE, 94077521305600, 94077521309695,
+STORE, 94077521309696, 94077521313791,
+SNULL, 139826136875007, 139826136879103,
+STORE, 139826136870912, 139826136875007,
+STORE, 139826136875008, 139826136879103,
+ERASE, 139826136842240, 139826136862719,
+STORE, 94077554049024, 94077554184191,
+STORE, 139826136543232, 139826136842239,
+STORE, 139826136276992, 139826136842239,
+STORE, 139826136010752, 139826136842239,
+STORE, 139826135744512, 139826136842239,
+SNULL, 139826136543231, 139826136842239,
+STORE, 139826135744512, 139826136543231,
+STORE, 139826136543232, 139826136842239,
+SNULL, 139826136543232, 139826136809471,
+STORE, 139826136809472, 139826136842239,
+STORE, 139826136543232, 139826136809471,
+ };
+ static const unsigned long set15[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140722061451264, 140737488351231,
+SNULL, 140722061455359, 140737488351231,
+STORE, 140722061451264, 140722061455359,
+STORE, 140722061320192, 140722061455359,
+STORE, 94728600248320, 94728600289279,
+SNULL, 94728600276991, 94728600289279,
+STORE, 94728600248320, 94728600276991,
+STORE, 94728600276992, 94728600289279,
+ERASE, 94728600276992, 94728600289279,
+STORE, 94728600281088, 94728600289279,
+STORE, 139906806779904, 139906809032703,
+SNULL, 139906806923263, 139906809032703,
+STORE, 139906806779904, 139906806923263,
+STORE, 139906806923264, 139906809032703,
+ERASE, 139906806923264, 139906809032703,
+STORE, 139906809020416, 139906809028607,
+STORE, 139906809028608, 139906809032703,
+STORE, 140722061692928, 140722061697023,
+STORE, 140722061680640, 140722061692927,
+STORE, 139906809012224, 139906809020415,
+STORE, 139906804555776, 139906806779903,
+SNULL, 139906806206463, 139906806779903,
+STORE, 139906804555776, 139906806206463,
+STORE, 139906806206464, 139906806779903,
+SNULL, 139906806206464, 139906806775807,
+STORE, 139906806775808, 139906806779903,
+STORE, 139906806206464, 139906806775807,
+ERASE, 139906806206464, 139906806775807,
+STORE, 139906806206464, 139906806775807,
+ERASE, 139906806775808, 139906806779903,
+STORE, 139906806775808, 139906806779903,
+STORE, 139906808991744, 139906809012223,
+STORE, 139906802171904, 139906804555775,
+SNULL, 139906802171904, 139906802438143,
+STORE, 139906802438144, 139906804555775,
+STORE, 139906802171904, 139906802438143,
+SNULL, 139906804531199, 139906804555775,
+STORE, 139906802438144, 139906804531199,
+STORE, 139906804531200, 139906804555775,
+SNULL, 139906804531200, 139906804551679,
+STORE, 139906804551680, 139906804555775,
+STORE, 139906804531200, 139906804551679,
+ERASE, 139906804531200, 139906804551679,
+STORE, 139906804531200, 139906804551679,
+ERASE, 139906804551680, 139906804555775,
+STORE, 139906804551680, 139906804555775,
+STORE, 139906799955968, 139906802171903,
+SNULL, 139906799955968, 139906800054271,
+STORE, 139906800054272, 139906802171903,
+STORE, 139906799955968, 139906800054271,
+SNULL, 139906802147327, 139906802171903,
+STORE, 139906800054272, 139906802147327,
+STORE, 139906802147328, 139906802171903,
+SNULL, 139906802147328, 139906802155519,
+STORE, 139906802155520, 139906802171903,
+STORE, 139906802147328, 139906802155519,
+ERASE, 139906802147328, 139906802155519,
+STORE, 139906802147328, 139906802155519,
+ERASE, 139906802155520, 139906802171903,
+STORE, 139906802155520, 139906802171903,
+STORE, 139906796158976, 139906799955967,
+SNULL, 139906796158976, 139906797817855,
+STORE, 139906797817856, 139906799955967,
+STORE, 139906796158976, 139906797817855,
+SNULL, 139906799915007, 139906799955967,
+STORE, 139906797817856, 139906799915007,
+STORE, 139906799915008, 139906799955967,
+SNULL, 139906799915008, 139906799939583,
+STORE, 139906799939584, 139906799955967,
+STORE, 139906799915008, 139906799939583,
+ERASE, 139906799915008, 139906799939583,
+STORE, 139906799915008, 139906799939583,
+ERASE, 139906799939584, 139906799955967,
+STORE, 139906799939584, 139906799955967,
+STORE, 139906793897984, 139906796158975,
+SNULL, 139906793897984, 139906794049535,
+STORE, 139906794049536, 139906796158975,
+STORE, 139906793897984, 139906794049535,
+SNULL, 139906796142591, 139906796158975,
+STORE, 139906794049536, 139906796142591,
+STORE, 139906796142592, 139906796158975,
+SNULL, 139906796142592, 139906796150783,
+STORE, 139906796150784, 139906796158975,
+STORE, 139906796142592, 139906796150783,
+ERASE, 139906796142592, 139906796150783,
+STORE, 139906796142592, 139906796150783,
+ERASE, 139906796150784, 139906796158975,
+STORE, 139906796150784, 139906796158975,
+STORE, 139906791776256, 139906793897983,
+SNULL, 139906791776256, 139906791792639,
+STORE, 139906791792640, 139906793897983,
+STORE, 139906791776256, 139906791792639,
+SNULL, 139906793889791, 139906793897983,
+STORE, 139906791792640, 139906793889791,
+STORE, 139906793889792, 139906793897983,
+ERASE, 139906793889792, 139906793897983,
+STORE, 139906793889792, 139906793897983,
+STORE, 139906808983552, 139906808991743,
+STORE, 139906789646336, 139906791776255,
+SNULL, 139906789646336, 139906789675007,
+STORE, 139906789675008, 139906791776255,
+STORE, 139906789646336, 139906789675007,
+SNULL, 139906791768063, 139906791776255,
+STORE, 139906789675008, 139906791768063,
+STORE, 139906791768064, 139906791776255,
+ERASE, 139906791768064, 139906791776255,
+STORE, 139906791768064, 139906791776255,
+STORE, 139906787393536, 139906789646335,
+SNULL, 139906787393536, 139906787545087,
+STORE, 139906787545088, 139906789646335,
+STORE, 139906787393536, 139906787545087,
+SNULL, 139906789638143, 139906789646335,
+STORE, 139906787545088, 139906789638143,
+STORE, 139906789638144, 139906789646335,
+ERASE, 139906789638144, 139906789646335,
+STORE, 139906789638144, 139906789646335,
+STORE, 139906785222656, 139906787393535,
+SNULL, 139906785222656, 139906785292287,
+STORE, 139906785292288, 139906787393535,
+STORE, 139906785222656, 139906785292287,
+SNULL, 139906787385343, 139906787393535,
+STORE, 139906785292288, 139906787385343,
+STORE, 139906787385344, 139906787393535,
+ERASE, 139906787385344, 139906787393535,
+STORE, 139906787385344, 139906787393535,
+STORE, 139906782011392, 139906785222655,
+SNULL, 139906782011392, 139906783088639,
+STORE, 139906783088640, 139906785222655,
+STORE, 139906782011392, 139906783088639,
+SNULL, 139906785185791, 139906785222655,
+STORE, 139906783088640, 139906785185791,
+STORE, 139906785185792, 139906785222655,
+ERASE, 139906785185792, 139906785222655,
+STORE, 139906785185792, 139906785222655,
+STORE, 139906779877376, 139906782011391,
+SNULL, 139906779877376, 139906779906047,
+STORE, 139906779906048, 139906782011391,
+STORE, 139906779877376, 139906779906047,
+SNULL, 139906782003199, 139906782011391,
+STORE, 139906779906048, 139906782003199,
+STORE, 139906782003200, 139906782011391,
+ERASE, 139906782003200, 139906782011391,
+STORE, 139906782003200, 139906782011391,
+STORE, 139906777567232, 139906779877375,
+SNULL, 139906777567232, 139906777772031,
+STORE, 139906777772032, 139906779877375,
+STORE, 139906777567232, 139906777772031,
+SNULL, 139906779869183, 139906779877375,
+STORE, 139906777772032, 139906779869183,
+STORE, 139906779869184, 139906779877375,
+ERASE, 139906779869184, 139906779877375,
+STORE, 139906779869184, 139906779877375,
+STORE, 139906808975360, 139906808991743,
+STORE, 139906775183360, 139906777567231,
+SNULL, 139906775183360, 139906775375871,
+STORE, 139906775375872, 139906777567231,
+STORE, 139906775183360, 139906775375871,
+SNULL, 139906777468927, 139906777567231,
+STORE, 139906775375872, 139906777468927,
+STORE, 139906777468928, 139906777567231,
+ERASE, 139906777468928, 139906777567231,
+STORE, 139906777468928, 139906777567231,
+STORE, 139906773065728, 139906775183359,
+SNULL, 139906773065728, 139906773082111,
+STORE, 139906773082112, 139906775183359,
+STORE, 139906773065728, 139906773082111,
+SNULL, 139906775175167, 139906775183359,
+STORE, 139906773082112, 139906775175167,
+STORE, 139906775175168, 139906775183359,
+ERASE, 139906775175168, 139906775183359,
+STORE, 139906775175168, 139906775183359,
+STORE, 139906770497536, 139906773065727,
+SNULL, 139906770497536, 139906770964479,
+STORE, 139906770964480, 139906773065727,
+STORE, 139906770497536, 139906770964479,
+SNULL, 139906773057535, 139906773065727,
+STORE, 139906770964480, 139906773057535,
+STORE, 139906773057536, 139906773065727,
+ERASE, 139906773057536, 139906773065727,
+STORE, 139906773057536, 139906773065727,
+STORE, 139906768384000, 139906770497535,
+SNULL, 139906768384000, 139906768396287,
+STORE, 139906768396288, 139906770497535,
+STORE, 139906768384000, 139906768396287,
+SNULL, 139906770489343, 139906770497535,
+STORE, 139906768396288, 139906770489343,
+STORE, 139906770489344, 139906770497535,
+ERASE, 139906770489344, 139906770497535,
+STORE, 139906770489344, 139906770497535,
+STORE, 139906766204928, 139906768383999,
+SNULL, 139906766204928, 139906766282751,
+STORE, 139906766282752, 139906768383999,
+STORE, 139906766204928, 139906766282751,
+SNULL, 139906768375807, 139906768383999,
+STORE, 139906766282752, 139906768375807,
+STORE, 139906768375808, 139906768383999,
+ERASE, 139906768375808, 139906768383999,
+STORE, 139906768375808, 139906768383999,
+STORE, 139906808967168, 139906808991743,
+STORE, 139906764087296, 139906766204927,
+SNULL, 139906764087296, 139906764103679,
+STORE, 139906764103680, 139906766204927,
+STORE, 139906764087296, 139906764103679,
+SNULL, 139906766196735, 139906766204927,
+STORE, 139906764103680, 139906766196735,
+STORE, 139906766196736, 139906766204927,
+ERASE, 139906766196736, 139906766204927,
+STORE, 139906766196736, 139906766204927,
+STORE, 139906808958976, 139906808991743,
+SNULL, 139906799931391, 139906799939583,
+STORE, 139906799915008, 139906799931391,
+STORE, 139906799931392, 139906799939583,
+SNULL, 139906766200831, 139906766204927,
+STORE, 139906766196736, 139906766200831,
+STORE, 139906766200832, 139906766204927,
+SNULL, 139906768379903, 139906768383999,
+STORE, 139906768375808, 139906768379903,
+STORE, 139906768379904, 139906768383999,
+SNULL, 139906770493439, 139906770497535,
+STORE, 139906770489344, 139906770493439,
+STORE, 139906770493440, 139906770497535,
+SNULL, 139906802151423, 139906802155519,
+STORE, 139906802147328, 139906802151423,
+STORE, 139906802151424, 139906802155519,
+SNULL, 139906773061631, 139906773065727,
+STORE, 139906773057536, 139906773061631,
+STORE, 139906773061632, 139906773065727,
+SNULL, 139906775179263, 139906775183359,
+STORE, 139906775175168, 139906775179263,
+STORE, 139906775179264, 139906775183359,
+SNULL, 139906777563135, 139906777567231,
+STORE, 139906777468928, 139906777563135,
+STORE, 139906777563136, 139906777567231,
+SNULL, 139906779873279, 139906779877375,
+STORE, 139906779869184, 139906779873279,
+STORE, 139906779873280, 139906779877375,
+SNULL, 139906782007295, 139906782011391,
+STORE, 139906782003200, 139906782007295,
+STORE, 139906782007296, 139906782011391,
+SNULL, 139906785193983, 139906785222655,
+STORE, 139906785185792, 139906785193983,
+STORE, 139906785193984, 139906785222655,
+SNULL, 139906787389439, 139906787393535,
+STORE, 139906787385344, 139906787389439,
+STORE, 139906787389440, 139906787393535,
+SNULL, 139906789642239, 139906789646335,
+STORE, 139906789638144, 139906789642239,
+STORE, 139906789642240, 139906789646335,
+SNULL, 139906791772159, 139906791776255,
+STORE, 139906791768064, 139906791772159,
+STORE, 139906791772160, 139906791776255,
+SNULL, 139906793893887, 139906793897983,
+STORE, 139906793889792, 139906793893887,
+STORE, 139906793893888, 139906793897983,
+SNULL, 139906796146687, 139906796150783,
+STORE, 139906796142592, 139906796146687,
+STORE, 139906796146688, 139906796150783,
+SNULL, 139906804547583, 139906804551679,
+STORE, 139906804531200, 139906804547583,
+STORE, 139906804547584, 139906804551679,
+SNULL, 139906806771711, 139906806775807,
+STORE, 139906806206464, 139906806771711,
+STORE, 139906806771712, 139906806775807,
+SNULL, 94728600285183, 94728600289279,
+STORE, 94728600281088, 94728600285183,
+STORE, 94728600285184, 94728600289279,
+SNULL, 139906809024511, 139906809028607,
+STORE, 139906809020416, 139906809024511,
+STORE, 139906809024512, 139906809028607,
+ERASE, 139906808991744, 139906809012223,
+STORE, 94728620138496, 94728620273663,
+STORE, 139906808692736, 139906808991743,
+STORE, 139906808426496, 139906808991743,
+STORE, 139906808160256, 139906808991743,
+STORE, 139906807894016, 139906808991743,
+SNULL, 139906808692735, 139906808991743,
+STORE, 139906807894016, 139906808692735,
+STORE, 139906808692736, 139906808991743,
+SNULL, 139906808692736, 139906808958975,
+STORE, 139906808958976, 139906808991743,
+STORE, 139906808692736, 139906808958975,
+ };
+
+ static const unsigned long set16[] = {
+STORE, 94174808662016, 94174809321471,
+STORE, 94174811414528, 94174811426815,
+STORE, 94174811426816, 94174811430911,
+STORE, 94174811430912, 94174811443199,
+STORE, 94174841700352, 94174841835519,
+STORE, 140173257838592, 140173259497471,
+STORE, 140173259497472, 140173261594623,
+STORE, 140173261594624, 140173261611007,
+STORE, 140173261611008, 140173261619199,
+STORE, 140173261619200, 140173261635583,
+STORE, 140173261635584, 140173261778943,
+STORE, 140173263863808, 140173263871999,
+STORE, 140173263876096, 140173263880191,
+STORE, 140173263880192, 140173263884287,
+STORE, 140173263884288, 140173263888383,
+STORE, 140729801007104, 140729801142271,
+STORE, 140729801617408, 140729801629695,
+STORE, 140729801629696, 140729801633791,
+STORE, 140737488347136, 140737488351231,
+STORE, 140728166858752, 140737488351231,
+SNULL, 140728166862847, 140737488351231,
+STORE, 140728166858752, 140728166862847,
+STORE, 140728166727680, 140728166862847,
+STORE, 93912949866496, 93912950337535,
+SNULL, 93912950288383, 93912950337535,
+STORE, 93912949866496, 93912950288383,
+STORE, 93912950288384, 93912950337535,
+ERASE, 93912950288384, 93912950337535,
+STORE, 93912950292480, 93912950337535,
+STORE, 139921863385088, 139921865637887,
+SNULL, 139921863528447, 139921865637887,
+STORE, 139921863385088, 139921863528447,
+STORE, 139921863528448, 139921865637887,
+ERASE, 139921863528448, 139921865637887,
+STORE, 139921865625600, 139921865633791,
+STORE, 139921865633792, 139921865637887,
+STORE, 140728167899136, 140728167903231,
+STORE, 140728167886848, 140728167899135,
+STORE, 139921865601024, 139921865625599,
+STORE, 139921865592832, 139921865601023,
+STORE, 139921861251072, 139921863385087,
+SNULL, 139921861251072, 139921861279743,
+STORE, 139921861279744, 139921863385087,
+STORE, 139921861251072, 139921861279743,
+SNULL, 139921863376895, 139921863385087,
+STORE, 139921861279744, 139921863376895,
+STORE, 139921863376896, 139921863385087,
+ERASE, 139921863376896, 139921863385087,
+STORE, 139921863376896, 139921863385087,
+STORE, 139921858867200, 139921861251071,
+SNULL, 139921858867200, 139921859133439,
+STORE, 139921859133440, 139921861251071,
+STORE, 139921858867200, 139921859133439,
+SNULL, 139921861226495, 139921861251071,
+STORE, 139921859133440, 139921861226495,
+STORE, 139921861226496, 139921861251071,
+SNULL, 139921861226496, 139921861246975,
+STORE, 139921861246976, 139921861251071,
+STORE, 139921861226496, 139921861246975,
+ERASE, 139921861226496, 139921861246975,
+STORE, 139921861226496, 139921861246975,
+ERASE, 139921861246976, 139921861251071,
+STORE, 139921861246976, 139921861251071,
+STORE, 139921856675840, 139921858867199,
+SNULL, 139921856675840, 139921856765951,
+STORE, 139921856765952, 139921858867199,
+STORE, 139921856675840, 139921856765951,
+SNULL, 139921858859007, 139921858867199,
+STORE, 139921856765952, 139921858859007,
+STORE, 139921858859008, 139921858867199,
+ERASE, 139921858859008, 139921858867199,
+STORE, 139921858859008, 139921858867199,
+STORE, 139921854414848, 139921856675839,
+SNULL, 139921854414848, 139921854566399,
+STORE, 139921854566400, 139921856675839,
+STORE, 139921854414848, 139921854566399,
+SNULL, 139921856659455, 139921856675839,
+STORE, 139921854566400, 139921856659455,
+STORE, 139921856659456, 139921856675839,
+SNULL, 139921856659456, 139921856667647,
+STORE, 139921856667648, 139921856675839,
+STORE, 139921856659456, 139921856667647,
+ERASE, 139921856659456, 139921856667647,
+STORE, 139921856659456, 139921856667647,
+ERASE, 139921856667648, 139921856675839,
+STORE, 139921856667648, 139921856675839,
+STORE, 139921852284928, 139921854414847,
+SNULL, 139921852284928, 139921852313599,
+STORE, 139921852313600, 139921854414847,
+STORE, 139921852284928, 139921852313599,
+SNULL, 139921854406655, 139921854414847,
+STORE, 139921852313600, 139921854406655,
+STORE, 139921854406656, 139921854414847,
+ERASE, 139921854406656, 139921854414847,
+STORE, 139921854406656, 139921854414847,
+STORE, 139921850068992, 139921852284927,
+SNULL, 139921850068992, 139921850167295,
+STORE, 139921850167296, 139921852284927,
+STORE, 139921850068992, 139921850167295,
+SNULL, 139921852260351, 139921852284927,
+STORE, 139921850167296, 139921852260351,
+STORE, 139921852260352, 139921852284927,
+SNULL, 139921852260352, 139921852268543,
+STORE, 139921852268544, 139921852284927,
+STORE, 139921852260352, 139921852268543,
+ERASE, 139921852260352, 139921852268543,
+STORE, 139921852260352, 139921852268543,
+ERASE, 139921852268544, 139921852284927,
+STORE, 139921852268544, 139921852284927,
+STORE, 139921865584640, 139921865601023,
+STORE, 139921846272000, 139921850068991,
+SNULL, 139921846272000, 139921847930879,
+STORE, 139921847930880, 139921850068991,
+STORE, 139921846272000, 139921847930879,
+SNULL, 139921850028031, 139921850068991,
+STORE, 139921847930880, 139921850028031,
+STORE, 139921850028032, 139921850068991,
+SNULL, 139921850028032, 139921850052607,
+STORE, 139921850052608, 139921850068991,
+STORE, 139921850028032, 139921850052607,
+ERASE, 139921850028032, 139921850052607,
+STORE, 139921850028032, 139921850052607,
+ERASE, 139921850052608, 139921850068991,
+STORE, 139921850052608, 139921850068991,
+STORE, 139921844154368, 139921846271999,
+SNULL, 139921844154368, 139921844170751,
+STORE, 139921844170752, 139921846271999,
+STORE, 139921844154368, 139921844170751,
+SNULL, 139921846263807, 139921846271999,
+STORE, 139921844170752, 139921846263807,
+STORE, 139921846263808, 139921846271999,
+ERASE, 139921846263808, 139921846271999,
+STORE, 139921846263808, 139921846271999,
+STORE, 139921842036736, 139921844154367,
+SNULL, 139921842036736, 139921842053119,
+STORE, 139921842053120, 139921844154367,
+STORE, 139921842036736, 139921842053119,
+SNULL, 139921844146175, 139921844154367,
+STORE, 139921842053120, 139921844146175,
+STORE, 139921844146176, 139921844154367,
+ERASE, 139921844146176, 139921844154367,
+STORE, 139921844146176, 139921844154367,
+STORE, 139921839468544, 139921842036735,
+SNULL, 139921839468544, 139921839935487,
+STORE, 139921839935488, 139921842036735,
+STORE, 139921839468544, 139921839935487,
+SNULL, 139921842028543, 139921842036735,
+STORE, 139921839935488, 139921842028543,
+STORE, 139921842028544, 139921842036735,
+ERASE, 139921842028544, 139921842036735,
+STORE, 139921842028544, 139921842036735,
+STORE, 139921837355008, 139921839468543,
+SNULL, 139921837355008, 139921837367295,
+STORE, 139921837367296, 139921839468543,
+STORE, 139921837355008, 139921837367295,
+SNULL, 139921839460351, 139921839468543,
+STORE, 139921837367296, 139921839460351,
+STORE, 139921839460352, 139921839468543,
+ERASE, 139921839460352, 139921839468543,
+STORE, 139921839460352, 139921839468543,
+STORE, 139921865576448, 139921865601023,
+STORE, 139921865564160, 139921865601023,
+SNULL, 139921850044415, 139921850052607,
+STORE, 139921850028032, 139921850044415,
+STORE, 139921850044416, 139921850052607,
+SNULL, 139921839464447, 139921839468543,
+STORE, 139921839460352, 139921839464447,
+STORE, 139921839464448, 139921839468543,
+SNULL, 139921852264447, 139921852268543,
+STORE, 139921852260352, 139921852264447,
+STORE, 139921852264448, 139921852268543,
+SNULL, 139921842032639, 139921842036735,
+STORE, 139921842028544, 139921842032639,
+STORE, 139921842032640, 139921842036735,
+SNULL, 139921844150271, 139921844154367,
+STORE, 139921844146176, 139921844150271,
+STORE, 139921844150272, 139921844154367,
+SNULL, 139921846267903, 139921846271999,
+STORE, 139921846263808, 139921846267903,
+STORE, 139921846267904, 139921846271999,
+SNULL, 139921854410751, 139921854414847,
+STORE, 139921854406656, 139921854410751,
+STORE, 139921854410752, 139921854414847,
+SNULL, 139921856663551, 139921856667647,
+STORE, 139921856659456, 139921856663551,
+STORE, 139921856663552, 139921856667647,
+SNULL, 139921858863103, 139921858867199,
+STORE, 139921858859008, 139921858863103,
+STORE, 139921858863104, 139921858867199,
+SNULL, 139921861242879, 139921861246975,
+STORE, 139921861226496, 139921861242879,
+STORE, 139921861242880, 139921861246975,
+SNULL, 139921863380991, 139921863385087,
+STORE, 139921863376896, 139921863380991,
+STORE, 139921863380992, 139921863385087,
+SNULL, 93912950333439, 93912950337535,
+STORE, 93912950292480, 93912950333439,
+STORE, 93912950333440, 93912950337535,
+SNULL, 139921865629695, 139921865633791,
+STORE, 139921865625600, 139921865629695,
+STORE, 139921865629696, 139921865633791,
+ERASE, 139921865601024, 139921865625599,
+STORE, 93912968110080, 93912968245247,
+STORE, 139921828913152, 139921837355007,
+STORE, 139921865621504, 139921865625599,
+STORE, 139921865617408, 139921865621503,
+STORE, 139921865613312, 139921865617407,
+STORE, 139921865547776, 139921865564159,
+ };
+
+ static const unsigned long set17[] = {
+STORE, 94397057224704, 94397057646591,
+STORE, 94397057650688, 94397057691647,
+STORE, 94397057691648, 94397057695743,
+STORE, 94397075271680, 94397075406847,
+STORE, 139953169051648, 139953169063935,
+STORE, 139953169063936, 139953171156991,
+STORE, 139953171156992, 139953171161087,
+STORE, 139953171161088, 139953171165183,
+STORE, 139953171165184, 139953171632127,
+STORE, 139953171632128, 139953173725183,
+STORE, 139953173725184, 139953173729279,
+STORE, 139953173729280, 139953173733375,
+STORE, 139953173733376, 139953173749759,
+STORE, 139953173749760, 139953175842815,
+STORE, 139953175842816, 139953175846911,
+STORE, 139953175846912, 139953175851007,
+STORE, 139953175851008, 139953175867391,
+STORE, 139953175867392, 139953177960447,
+STORE, 139953177960448, 139953177964543,
+STORE, 139953177964544, 139953177968639,
+STORE, 139953177968640, 139953179627519,
+STORE, 139953179627520, 139953181724671,
+STORE, 139953181724672, 139953181741055,
+STORE, 139953181741056, 139953181749247,
+STORE, 139953181749248, 139953181765631,
+STORE, 139953181765632, 139953181863935,
+STORE, 139953181863936, 139953183956991,
+STORE, 139953183956992, 139953183961087,
+STORE, 139953183961088, 139953183965183,
+STORE, 139953183965184, 139953183981567,
+STORE, 139953183981568, 139953184010239,
+STORE, 139953184010240, 139953186103295,
+STORE, 139953186103296, 139953186107391,
+STORE, 139953186107392, 139953186111487,
+STORE, 139953186111488, 139953186263039,
+STORE, 139953186263040, 139953188356095,
+STORE, 139953188356096, 139953188360191,
+STORE, 139953188360192, 139953188364287,
+STORE, 139953188364288, 139953188372479,
+STORE, 139953188372480, 139953188462591,
+STORE, 139953188462592, 139953190555647,
+STORE, 139953190555648, 139953190559743,
+STORE, 139953190559744, 139953190563839,
+STORE, 139953190563840, 139953190830079,
+STORE, 139953190830080, 139953192923135,
+STORE, 139953192923136, 139953192939519,
+STORE, 139953192939520, 139953192943615,
+STORE, 139953192943616, 139953192947711,
+STORE, 139953192947712, 139953192976383,
+STORE, 139953192976384, 139953195073535,
+STORE, 139953195073536, 139953195077631,
+STORE, 139953195077632, 139953195081727,
+STORE, 139953195081728, 139953195225087,
+STORE, 139953197281280, 139953197318143,
+STORE, 139953197322240, 139953197326335,
+STORE, 139953197326336, 139953197330431,
+STORE, 139953197330432, 139953197334527,
+STORE, 140720477511680, 140720477646847,
+STORE, 140720478302208, 140720478314495,
+STORE, 140720478314496, 140720478318591,
+ };
+ static const unsigned long set18[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140724953673728, 140737488351231,
+SNULL, 140724953677823, 140737488351231,
+STORE, 140724953673728, 140724953677823,
+STORE, 140724953542656, 140724953677823,
+STORE, 94675199266816, 94675199311871,
+SNULL, 94675199303679, 94675199311871,
+STORE, 94675199266816, 94675199303679,
+STORE, 94675199303680, 94675199311871,
+ERASE, 94675199303680, 94675199311871,
+STORE, 94675199303680, 94675199311871,
+STORE, 140222970605568, 140222972858367,
+SNULL, 140222970748927, 140222972858367,
+STORE, 140222970605568, 140222970748927,
+STORE, 140222970748928, 140222972858367,
+ERASE, 140222970748928, 140222972858367,
+STORE, 140222972846080, 140222972854271,
+STORE, 140222972854272, 140222972858367,
+STORE, 140724954365952, 140724954370047,
+STORE, 140724954353664, 140724954365951,
+STORE, 140222972841984, 140222972846079,
+STORE, 140222972833792, 140222972841983,
+STORE, 140222968475648, 140222970605567,
+SNULL, 140222968475648, 140222968504319,
+STORE, 140222968504320, 140222970605567,
+STORE, 140222968475648, 140222968504319,
+SNULL, 140222970597375, 140222970605567,
+STORE, 140222968504320, 140222970597375,
+STORE, 140222970597376, 140222970605567,
+ERASE, 140222970597376, 140222970605567,
+STORE, 140222970597376, 140222970605567,
+ };
+ static const unsigned long set19[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140725182459904, 140737488351231,
+SNULL, 140725182463999, 140737488351231,
+STORE, 140725182459904, 140725182463999,
+STORE, 140725182328832, 140725182463999,
+STORE, 94730166636544, 94730166763519,
+SNULL, 94730166747135, 94730166763519,
+STORE, 94730166636544, 94730166747135,
+STORE, 94730166747136, 94730166763519,
+ERASE, 94730166747136, 94730166763519,
+STORE, 94730166751232, 94730166763519,
+STORE, 140656834555904, 140656836808703,
+SNULL, 140656834699263, 140656836808703,
+STORE, 140656834555904, 140656834699263,
+STORE, 140656834699264, 140656836808703,
+ERASE, 140656834699264, 140656836808703,
+STORE, 140656836796416, 140656836804607,
+STORE, 140656836804608, 140656836808703,
+STORE, 140725183389696, 140725183393791,
+STORE, 140725183377408, 140725183389695,
+STORE, 140656836788224, 140656836796415,
+STORE, 140656832331776, 140656834555903,
+SNULL, 140656833982463, 140656834555903,
+STORE, 140656832331776, 140656833982463,
+STORE, 140656833982464, 140656834555903,
+SNULL, 140656833982464, 140656834551807,
+STORE, 140656834551808, 140656834555903,
+STORE, 140656833982464, 140656834551807,
+ERASE, 140656833982464, 140656834551807,
+STORE, 140656833982464, 140656834551807,
+ERASE, 140656834551808, 140656834555903,
+STORE, 140656834551808, 140656834555903,
+STORE, 140656836763648, 140656836788223,
+STORE, 140656830070784, 140656832331775,
+SNULL, 140656830070784, 140656830222335,
+STORE, 140656830222336, 140656832331775,
+STORE, 140656830070784, 140656830222335,
+SNULL, 140656832315391, 140656832331775,
+STORE, 140656830222336, 140656832315391,
+STORE, 140656832315392, 140656832331775,
+SNULL, 140656832315392, 140656832323583,
+STORE, 140656832323584, 140656832331775,
+STORE, 140656832315392, 140656832323583,
+ERASE, 140656832315392, 140656832323583,
+STORE, 140656832315392, 140656832323583,
+ERASE, 140656832323584, 140656832331775,
+STORE, 140656832323584, 140656832331775,
+STORE, 140656827940864, 140656830070783,
+SNULL, 140656827940864, 140656827969535,
+STORE, 140656827969536, 140656830070783,
+STORE, 140656827940864, 140656827969535,
+SNULL, 140656830062591, 140656830070783,
+STORE, 140656827969536, 140656830062591,
+STORE, 140656830062592, 140656830070783,
+ERASE, 140656830062592, 140656830070783,
+STORE, 140656830062592, 140656830070783,
+STORE, 140656825724928, 140656827940863,
+SNULL, 140656825724928, 140656825823231,
+STORE, 140656825823232, 140656827940863,
+STORE, 140656825724928, 140656825823231,
+SNULL, 140656827916287, 140656827940863,
+STORE, 140656825823232, 140656827916287,
+STORE, 140656827916288, 140656827940863,
+SNULL, 140656827916288, 140656827924479,
+STORE, 140656827924480, 140656827940863,
+STORE, 140656827916288, 140656827924479,
+ERASE, 140656827916288, 140656827924479,
+STORE, 140656827916288, 140656827924479,
+ERASE, 140656827924480, 140656827940863,
+STORE, 140656827924480, 140656827940863,
+STORE, 140656821927936, 140656825724927,
+SNULL, 140656821927936, 140656823586815,
+STORE, 140656823586816, 140656825724927,
+STORE, 140656821927936, 140656823586815,
+SNULL, 140656825683967, 140656825724927,
+STORE, 140656823586816, 140656825683967,
+STORE, 140656825683968, 140656825724927,
+SNULL, 140656825683968, 140656825708543,
+STORE, 140656825708544, 140656825724927,
+STORE, 140656825683968, 140656825708543,
+ERASE, 140656825683968, 140656825708543,
+STORE, 140656825683968, 140656825708543,
+ERASE, 140656825708544, 140656825724927,
+STORE, 140656825708544, 140656825724927,
+STORE, 140656819806208, 140656821927935,
+SNULL, 140656819806208, 140656819822591,
+STORE, 140656819822592, 140656821927935,
+STORE, 140656819806208, 140656819822591,
+SNULL, 140656821919743, 140656821927935,
+STORE, 140656819822592, 140656821919743,
+STORE, 140656821919744, 140656821927935,
+ERASE, 140656821919744, 140656821927935,
+STORE, 140656821919744, 140656821927935,
+STORE, 140656836755456, 140656836763647,
+STORE, 140656817553408, 140656819806207,
+SNULL, 140656817553408, 140656817704959,
+STORE, 140656817704960, 140656819806207,
+STORE, 140656817553408, 140656817704959,
+SNULL, 140656819798015, 140656819806207,
+STORE, 140656817704960, 140656819798015,
+STORE, 140656819798016, 140656819806207,
+ERASE, 140656819798016, 140656819806207,
+STORE, 140656819798016, 140656819806207,
+STORE, 140656815382528, 140656817553407,
+SNULL, 140656815382528, 140656815452159,
+STORE, 140656815452160, 140656817553407,
+STORE, 140656815382528, 140656815452159,
+SNULL, 140656817545215, 140656817553407,
+STORE, 140656815452160, 140656817545215,
+STORE, 140656817545216, 140656817553407,
+ERASE, 140656817545216, 140656817553407,
+STORE, 140656817545216, 140656817553407,
+STORE, 140656812171264, 140656815382527,
+SNULL, 140656812171264, 140656813248511,
+STORE, 140656813248512, 140656815382527,
+STORE, 140656812171264, 140656813248511,
+SNULL, 140656815345663, 140656815382527,
+STORE, 140656813248512, 140656815345663,
+STORE, 140656815345664, 140656815382527,
+ERASE, 140656815345664, 140656815382527,
+STORE, 140656815345664, 140656815382527,
+STORE, 140656810037248, 140656812171263,
+SNULL, 140656810037248, 140656810065919,
+STORE, 140656810065920, 140656812171263,
+STORE, 140656810037248, 140656810065919,
+SNULL, 140656812163071, 140656812171263,
+STORE, 140656810065920, 140656812163071,
+STORE, 140656812163072, 140656812171263,
+ERASE, 140656812163072, 140656812171263,
+STORE, 140656812163072, 140656812171263,
+STORE, 140656807727104, 140656810037247,
+SNULL, 140656807727104, 140656807931903,
+STORE, 140656807931904, 140656810037247,
+STORE, 140656807727104, 140656807931903,
+SNULL, 140656810029055, 140656810037247,
+STORE, 140656807931904, 140656810029055,
+STORE, 140656810029056, 140656810037247,
+ERASE, 140656810029056, 140656810037247,
+STORE, 140656810029056, 140656810037247,
+STORE, 140656805343232, 140656807727103,
+SNULL, 140656805343232, 140656805535743,
+STORE, 140656805535744, 140656807727103,
+STORE, 140656805343232, 140656805535743,
+SNULL, 140656807628799, 140656807727103,
+STORE, 140656805535744, 140656807628799,
+STORE, 140656807628800, 140656807727103,
+ERASE, 140656807628800, 140656807727103,
+STORE, 140656807628800, 140656807727103,
+STORE, 140656836747264, 140656836763647,
+STORE, 140656802775040, 140656805343231,
+SNULL, 140656802775040, 140656803241983,
+STORE, 140656803241984, 140656805343231,
+STORE, 140656802775040, 140656803241983,
+SNULL, 140656805335039, 140656805343231,
+STORE, 140656803241984, 140656805335039,
+STORE, 140656805335040, 140656805343231,
+ERASE, 140656805335040, 140656805343231,
+STORE, 140656805335040, 140656805343231,
+STORE, 140656800661504, 140656802775039,
+SNULL, 140656800661504, 140656800673791,
+STORE, 140656800673792, 140656802775039,
+STORE, 140656800661504, 140656800673791,
+SNULL, 140656802766847, 140656802775039,
+STORE, 140656800673792, 140656802766847,
+STORE, 140656802766848, 140656802775039,
+ERASE, 140656802766848, 140656802775039,
+STORE, 140656802766848, 140656802775039,
+STORE, 140656798482432, 140656800661503,
+SNULL, 140656798482432, 140656798560255,
+STORE, 140656798560256, 140656800661503,
+STORE, 140656798482432, 140656798560255,
+SNULL, 140656800653311, 140656800661503,
+STORE, 140656798560256, 140656800653311,
+STORE, 140656800653312, 140656800661503,
+ERASE, 140656800653312, 140656800661503,
+STORE, 140656800653312, 140656800661503,
+STORE, 140656796364800, 140656798482431,
+SNULL, 140656796364800, 140656796381183,
+STORE, 140656796381184, 140656798482431,
+STORE, 140656796364800, 140656796381183,
+SNULL, 140656798474239, 140656798482431,
+STORE, 140656796381184, 140656798474239,
+STORE, 140656798474240, 140656798482431,
+ERASE, 140656798474240, 140656798482431,
+STORE, 140656798474240, 140656798482431,
+STORE, 140656836739072, 140656836763647,
+STORE, 140656836726784, 140656836763647,
+SNULL, 140656825700351, 140656825708543,
+STORE, 140656825683968, 140656825700351,
+STORE, 140656825700352, 140656825708543,
+SNULL, 140656798478335, 140656798482431,
+STORE, 140656798474240, 140656798478335,
+STORE, 140656798478336, 140656798482431,
+SNULL, 140656800657407, 140656800661503,
+STORE, 140656800653312, 140656800657407,
+STORE, 140656800657408, 140656800661503,
+SNULL, 140656802770943, 140656802775039,
+STORE, 140656802766848, 140656802770943,
+STORE, 140656802770944, 140656802775039,
+SNULL, 140656827920383, 140656827924479,
+STORE, 140656827916288, 140656827920383,
+STORE, 140656827920384, 140656827924479,
+SNULL, 140656805339135, 140656805343231,
+STORE, 140656805335040, 140656805339135,
+STORE, 140656805339136, 140656805343231,
+SNULL, 140656807723007, 140656807727103,
+STORE, 140656807628800, 140656807723007,
+STORE, 140656807723008, 140656807727103,
+SNULL, 140656810033151, 140656810037247,
+STORE, 140656810029056, 140656810033151,
+STORE, 140656810033152, 140656810037247,
+SNULL, 140656812167167, 140656812171263,
+STORE, 140656812163072, 140656812167167,
+STORE, 140656812167168, 140656812171263,
+SNULL, 140656815353855, 140656815382527,
+STORE, 140656815345664, 140656815353855,
+STORE, 140656815353856, 140656815382527,
+SNULL, 140656817549311, 140656817553407,
+STORE, 140656817545216, 140656817549311,
+STORE, 140656817549312, 140656817553407,
+SNULL, 140656819802111, 140656819806207,
+STORE, 140656819798016, 140656819802111,
+STORE, 140656819802112, 140656819806207,
+SNULL, 140656821923839, 140656821927935,
+STORE, 140656821919744, 140656821923839,
+STORE, 140656821923840, 140656821927935,
+SNULL, 140656830066687, 140656830070783,
+STORE, 140656830062592, 140656830066687,
+STORE, 140656830066688, 140656830070783,
+SNULL, 140656832319487, 140656832323583,
+STORE, 140656832315392, 140656832319487,
+STORE, 140656832319488, 140656832323583,
+SNULL, 140656834547711, 140656834551807,
+STORE, 140656833982464, 140656834547711,
+STORE, 140656834547712, 140656834551807,
+SNULL, 94730166759423, 94730166763519,
+STORE, 94730166751232, 94730166759423,
+STORE, 94730166759424, 94730166763519,
+SNULL, 140656836800511, 140656836804607,
+STORE, 140656836796416, 140656836800511,
+STORE, 140656836800512, 140656836804607,
+ERASE, 140656836763648, 140656836788223,
+STORE, 94730171318272, 94730171453439,
+STORE, 140656836784128, 140656836788223,
+STORE, 140656836780032, 140656836784127,
+STORE, 140656791920640, 140656796364799,
+STORE, 140656836775936, 140656836780031,
+STORE, 140656787476480, 140656791920639,
+STORE, 140656779083776, 140656787476479,
+SNULL, 140656779087871, 140656787476479,
+STORE, 140656779083776, 140656779087871,
+STORE, 140656779087872, 140656787476479,
+STORE, 140656836771840, 140656836775935,
+STORE, 140656774639616, 140656779083775,
+STORE, 140656766246912, 140656774639615,
+SNULL, 140656766251007, 140656774639615,
+STORE, 140656766246912, 140656766251007,
+STORE, 140656766251008, 140656774639615,
+ERASE, 140656791920640, 140656796364799,
+ERASE, 140656836780032, 140656836784127,
+ERASE, 140656787476480, 140656791920639,
+ERASE, 140656836775936, 140656836780031,
+STORE, 140656836780032, 140656836784127,
+STORE, 140656791920640, 140656796364799,
+STORE, 140656836775936, 140656836780031,
+STORE, 140656787476480, 140656791920639,
+ERASE, 140656774639616, 140656779083775,
+ };
+ static const unsigned long set20[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140735952392192, 140737488351231,
+SNULL, 140735952396287, 140737488351231,
+STORE, 140735952392192, 140735952396287,
+STORE, 140735952261120, 140735952396287,
+STORE, 94849008947200, 94849009414143,
+SNULL, 94849009364991, 94849009414143,
+STORE, 94849008947200, 94849009364991,
+STORE, 94849009364992, 94849009414143,
+ERASE, 94849009364992, 94849009414143,
+STORE, 94849009364992, 94849009414143,
+STORE, 140590397943808, 140590400196607,
+SNULL, 140590398087167, 140590400196607,
+STORE, 140590397943808, 140590398087167,
+STORE, 140590398087168, 140590400196607,
+ERASE, 140590398087168, 140590400196607,
+STORE, 140590400184320, 140590400192511,
+STORE, 140590400192512, 140590400196607,
+STORE, 140735952850944, 140735952855039,
+STORE, 140735952838656, 140735952850943,
+STORE, 140590400180224, 140590400184319,
+STORE, 140590400172032, 140590400180223,
+STORE, 140590395809792, 140590397943807,
+SNULL, 140590395809792, 140590395838463,
+STORE, 140590395838464, 140590397943807,
+STORE, 140590395809792, 140590395838463,
+SNULL, 140590397935615, 140590397943807,
+STORE, 140590395838464, 140590397935615,
+STORE, 140590397935616, 140590397943807,
+ERASE, 140590397935616, 140590397943807,
+STORE, 140590397935616, 140590397943807,
+STORE, 140590393425920, 140590395809791,
+SNULL, 140590393425920, 140590393692159,
+STORE, 140590393692160, 140590395809791,
+STORE, 140590393425920, 140590393692159,
+SNULL, 140590395785215, 140590395809791,
+STORE, 140590393692160, 140590395785215,
+STORE, 140590395785216, 140590395809791,
+SNULL, 140590395785216, 140590395805695,
+STORE, 140590395805696, 140590395809791,
+STORE, 140590395785216, 140590395805695,
+ERASE, 140590395785216, 140590395805695,
+STORE, 140590395785216, 140590395805695,
+ERASE, 140590395805696, 140590395809791,
+STORE, 140590395805696, 140590395809791,
+STORE, 140590391234560, 140590393425919,
+SNULL, 140590391234560, 140590391324671,
+STORE, 140590391324672, 140590393425919,
+STORE, 140590391234560, 140590391324671,
+SNULL, 140590393417727, 140590393425919,
+STORE, 140590391324672, 140590393417727,
+STORE, 140590393417728, 140590393425919,
+ERASE, 140590393417728, 140590393425919,
+STORE, 140590393417728, 140590393425919,
+STORE, 140590388973568, 140590391234559,
+SNULL, 140590388973568, 140590389125119,
+STORE, 140590389125120, 140590391234559,
+STORE, 140590388973568, 140590389125119,
+SNULL, 140590391218175, 140590391234559,
+STORE, 140590389125120, 140590391218175,
+STORE, 140590391218176, 140590391234559,
+SNULL, 140590391218176, 140590391226367,
+STORE, 140590391226368, 140590391234559,
+STORE, 140590391218176, 140590391226367,
+ERASE, 140590391218176, 140590391226367,
+STORE, 140590391218176, 140590391226367,
+ERASE, 140590391226368, 140590391234559,
+STORE, 140590391226368, 140590391234559,
+STORE, 140590386843648, 140590388973567,
+SNULL, 140590386843648, 140590386872319,
+STORE, 140590386872320, 140590388973567,
+STORE, 140590386843648, 140590386872319,
+SNULL, 140590388965375, 140590388973567,
+STORE, 140590386872320, 140590388965375,
+STORE, 140590388965376, 140590388973567,
+ERASE, 140590388965376, 140590388973567,
+STORE, 140590388965376, 140590388973567,
+STORE, 140590384627712, 140590386843647,
+SNULL, 140590384627712, 140590384726015,
+STORE, 140590384726016, 140590386843647,
+STORE, 140590384627712, 140590384726015,
+SNULL, 140590386819071, 140590386843647,
+STORE, 140590384726016, 140590386819071,
+STORE, 140590386819072, 140590386843647,
+SNULL, 140590386819072, 140590386827263,
+STORE, 140590386827264, 140590386843647,
+STORE, 140590386819072, 140590386827263,
+ERASE, 140590386819072, 140590386827263,
+STORE, 140590386819072, 140590386827263,
+ERASE, 140590386827264, 140590386843647,
+STORE, 140590386827264, 140590386843647,
+STORE, 140590400163840, 140590400180223,
+STORE, 140590380830720, 140590384627711,
+SNULL, 140590380830720, 140590382489599,
+STORE, 140590382489600, 140590384627711,
+STORE, 140590380830720, 140590382489599,
+SNULL, 140590384586751, 140590384627711,
+STORE, 140590382489600, 140590384586751,
+STORE, 140590384586752, 140590384627711,
+SNULL, 140590384586752, 140590384611327,
+STORE, 140590384611328, 140590384627711,
+STORE, 140590384586752, 140590384611327,
+ERASE, 140590384586752, 140590384611327,
+STORE, 140590384586752, 140590384611327,
+ERASE, 140590384611328, 140590384627711,
+STORE, 140590384611328, 140590384627711,
+STORE, 140590378713088, 140590380830719,
+SNULL, 140590378713088, 140590378729471,
+STORE, 140590378729472, 140590380830719,
+STORE, 140590378713088, 140590378729471,
+SNULL, 140590380822527, 140590380830719,
+STORE, 140590378729472, 140590380822527,
+STORE, 140590380822528, 140590380830719,
+ERASE, 140590380822528, 140590380830719,
+STORE, 140590380822528, 140590380830719,
+STORE, 140590376595456, 140590378713087,
+SNULL, 140590376595456, 140590376611839,
+STORE, 140590376611840, 140590378713087,
+STORE, 140590376595456, 140590376611839,
+SNULL, 140590378704895, 140590378713087,
+STORE, 140590376611840, 140590378704895,
+STORE, 140590378704896, 140590378713087,
+ERASE, 140590378704896, 140590378713087,
+STORE, 140590378704896, 140590378713087,
+STORE, 140590374027264, 140590376595455,
+SNULL, 140590374027264, 140590374494207,
+STORE, 140590374494208, 140590376595455,
+STORE, 140590374027264, 140590374494207,
+SNULL, 140590376587263, 140590376595455,
+STORE, 140590374494208, 140590376587263,
+STORE, 140590376587264, 140590376595455,
+ERASE, 140590376587264, 140590376595455,
+STORE, 140590376587264, 140590376595455,
+STORE, 140590371913728, 140590374027263,
+SNULL, 140590371913728, 140590371926015,
+STORE, 140590371926016, 140590374027263,
+STORE, 140590371913728, 140590371926015,
+SNULL, 140590374019071, 140590374027263,
+STORE, 140590371926016, 140590374019071,
+STORE, 140590374019072, 140590374027263,
+ERASE, 140590374019072, 140590374027263,
+STORE, 140590374019072, 140590374027263,
+STORE, 140590400155648, 140590400180223,
+STORE, 140590400143360, 140590400180223,
+SNULL, 140590384603135, 140590384611327,
+STORE, 140590384586752, 140590384603135,
+STORE, 140590384603136, 140590384611327,
+SNULL, 140590374023167, 140590374027263,
+STORE, 140590374019072, 140590374023167,
+STORE, 140590374023168, 140590374027263,
+SNULL, 140590386823167, 140590386827263,
+STORE, 140590386819072, 140590386823167,
+STORE, 140590386823168, 140590386827263,
+SNULL, 140590376591359, 140590376595455,
+ };
+ static const unsigned long set21[] = {
+STORE, 93874710941696, 93874711363583,
+STORE, 93874711367680, 93874711408639,
+STORE, 93874711408640, 93874711412735,
+STORE, 93874720989184, 93874721124351,
+STORE, 140708365086720, 140708365099007,
+STORE, 140708365099008, 140708367192063,
+STORE, 140708367192064, 140708367196159,
+STORE, 140708367196160, 140708367200255,
+STORE, 140708367200256, 140708367667199,
+STORE, 140708367667200, 140708369760255,
+STORE, 140708369760256, 140708369764351,
+STORE, 140708369764352, 140708369768447,
+STORE, 140708369768448, 140708369784831,
+STORE, 140708369784832, 140708371877887,
+STORE, 140708371877888, 140708371881983,
+STORE, 140708371881984, 140708371886079,
+STORE, 140708371886080, 140708371902463,
+STORE, 140708371902464, 140708373995519,
+STORE, 140708373995520, 140708373999615,
+STORE, 140708373999616, 140708374003711,
+STORE, 140708374003712, 140708375662591,
+STORE, 140708375662592, 140708377759743,
+STORE, 140708377759744, 140708377776127,
+STORE, 140708377776128, 140708377784319,
+STORE, 140708377784320, 140708377800703,
+STORE, 140708377800704, 140708377899007,
+STORE, 140708377899008, 140708379992063,
+STORE, 140708379992064, 140708379996159,
+STORE, 140708379996160, 140708380000255,
+STORE, 140708380000256, 140708380016639,
+STORE, 140708380016640, 140708380045311,
+STORE, 140708380045312, 140708382138367,
+STORE, 140708382138368, 140708382142463,
+STORE, 140708382142464, 140708382146559,
+STORE, 140708382146560, 140708382298111,
+STORE, 140708382298112, 140708384391167,
+STORE, 140708384391168, 140708384395263,
+STORE, 140708384395264, 140708384399359,
+STORE, 140708384399360, 140708384407551,
+STORE, 140708384407552, 140708384497663,
+STORE, 140708384497664, 140708386590719,
+STORE, 140708386590720, 140708386594815,
+STORE, 140708386594816, 140708386598911,
+STORE, 140708386598912, 140708386865151,
+STORE, 140708386865152, 140708388958207,
+STORE, 140708388958208, 140708388974591,
+STORE, 140708388974592, 140708388978687,
+STORE, 140708388978688, 140708388982783,
+STORE, 140708388982784, 140708389011455,
+STORE, 140708389011456, 140708391108607,
+STORE, 140708391108608, 140708391112703,
+STORE, 140708391112704, 140708391116799,
+STORE, 140708391116800, 140708391260159,
+STORE, 140708393291776, 140708393308159,
+STORE, 140708393308160, 140708393312255,
+STORE, 140708393312256, 140708393316351,
+STORE, 140708393316352, 140708393353215,
+STORE, 140708393353216, 140708393357311,
+STORE, 140708393357312, 140708393361407,
+STORE, 140708393361408, 140708393365503,
+STORE, 140708393365504, 140708393369599,
+STORE, 140730557042688, 140730557177855,
+STORE, 140730557235200, 140730557247487,
+STORE, 140730557247488, 140730557251583,
+ERASE, 140708393353216, 140708393357311,
+ERASE, 140708393312256, 140708393316351,
+ERASE, 140708393308160, 140708393312255,
+ERASE, 140708393291776, 140708393308159,
+ };
+ static const unsigned long set22[] = {
+STORE, 93951397134336, 93951397183487,
+STORE, 93951397183488, 93951397728255,
+STORE, 93951397728256, 93951397826559,
+STORE, 93951397826560, 93951397842943,
+STORE, 93951397842944, 93951397847039,
+STORE, 93951425974272, 93951426109439,
+STORE, 140685152665600, 140685152677887,
+STORE, 140685152677888, 140685152829439,
+STORE, 140685152829440, 140685154181119,
+STORE, 140685154181120, 140685154484223,
+STORE, 140685154484224, 140685154496511,
+STORE, 140685154496512, 140685154508799,
+STORE, 140685154508800, 140685154525183,
+STORE, 140685154525184, 140685154541567,
+STORE, 140685154541568, 140685154590719,
+STORE, 140685154590720, 140685154603007,
+STORE, 140685154603008, 140685154607103,
+STORE, 140685154607104, 140685154611199,
+STORE, 140685154611200, 140685154615295,
+STORE, 140685154615296, 140685154631679,
+STORE, 140685154639872, 140685154643967,
+STORE, 140685154643968, 140685154766847,
+STORE, 140685154766848, 140685154799615,
+STORE, 140685154803712, 140685154807807,
+STORE, 140685154807808, 140685154811903,
+STORE, 140685154811904, 140685154815999,
+STORE, 140722188902400, 140722189037567,
+STORE, 140722189512704, 140722189524991,
+STORE, 140722189524992, 140722189529087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733429354496, 140737488351231,
+SNULL, 140733429358591, 140737488351231,
+STORE, 140733429354496, 140733429358591,
+STORE, 140733429223424, 140733429358591,
+STORE, 94526683537408, 94526683660287,
+SNULL, 94526683553791, 94526683660287,
+STORE, 94526683537408, 94526683553791,
+STORE, 94526683553792, 94526683660287,
+ERASE, 94526683553792, 94526683660287,
+STORE, 94526683553792, 94526683623423,
+STORE, 94526683623424, 94526683647999,
+STORE, 94526683652096, 94526683660287,
+STORE, 140551363747840, 140551363923967,
+SNULL, 140551363751935, 140551363923967,
+STORE, 140551363747840, 140551363751935,
+STORE, 140551363751936, 140551363923967,
+ERASE, 140551363751936, 140551363923967,
+STORE, 140551363751936, 140551363874815,
+STORE, 140551363874816, 140551363907583,
+STORE, 140551363911680, 140551363919871,
+STORE, 140551363919872, 140551363923967,
+STORE, 140733429690368, 140733429694463,
+STORE, 140733429678080, 140733429690367,
+STORE, 140551363739648, 140551363747839,
+STORE, 140551363731456, 140551363739647,
+STORE, 140551363379200, 140551363731455,
+SNULL, 140551363379200, 140551363420159,
+STORE, 140551363420160, 140551363731455,
+STORE, 140551363379200, 140551363420159,
+SNULL, 140551363706879, 140551363731455,
+STORE, 140551363420160, 140551363706879,
+STORE, 140551363706880, 140551363731455,
+SNULL, 140551363420160, 140551363637247,
+STORE, 140551363637248, 140551363706879,
+STORE, 140551363420160, 140551363637247,
+ERASE, 140551363420160, 140551363637247,
+STORE, 140551363420160, 140551363637247,
+SNULL, 140551363637248, 140551363702783,
+STORE, 140551363702784, 140551363706879,
+STORE, 140551363637248, 140551363702783,
+ERASE, 140551363637248, 140551363702783,
+STORE, 140551363637248, 140551363702783,
+ERASE, 140551363706880, 140551363731455,
+STORE, 140551363706880, 140551363731455,
+STORE, 140551361531904, 140551363379199,
+SNULL, 140551361683455, 140551363379199,
+STORE, 140551361531904, 140551361683455,
+STORE, 140551361683456, 140551363379199,
+SNULL, 140551361683456, 140551363035135,
+STORE, 140551363035136, 140551363379199,
+STORE, 140551361683456, 140551363035135,
+ERASE, 140551361683456, 140551363035135,
+STORE, 140551361683456, 140551363035135,
+SNULL, 140551363035136, 140551363338239,
+STORE, 140551363338240, 140551363379199,
+STORE, 140551363035136, 140551363338239,
+ERASE, 140551363035136, 140551363338239,
+STORE, 140551363035136, 140551363379199,
+SNULL, 140551363338239, 140551363379199,
+STORE, 140551363035136, 140551363338239,
+STORE, 140551363338240, 140551363379199,
+SNULL, 140551363338240, 140551363362815,
+STORE, 140551363362816, 140551363379199,
+STORE, 140551363338240, 140551363362815,
+ERASE, 140551363338240, 140551363362815,
+STORE, 140551363338240, 140551363362815,
+ERASE, 140551363362816, 140551363379199,
+STORE, 140551363362816, 140551363379199,
+STORE, 140551361519616, 140551361531903,
+SNULL, 140551363350527, 140551363362815,
+STORE, 140551363338240, 140551363350527,
+STORE, 140551363350528, 140551363362815,
+SNULL, 140551363727359, 140551363731455,
+STORE, 140551363706880, 140551363727359,
+STORE, 140551363727360, 140551363731455,
+SNULL, 94526683656191, 94526683660287,
+STORE, 94526683652096, 94526683656191,
+STORE, 94526683656192, 94526683660287,
+SNULL, 140551363915775, 140551363919871,
+STORE, 140551363911680, 140551363915775,
+STORE, 140551363915776, 140551363919871,
+ERASE, 140551363739648, 140551363747839,
+STORE, 94526715490304, 94526715625471,
+STORE, 140551361253376, 140551361531903,
+STORE, 140551360987136, 140551361531903,
+STORE, 140551360720896, 140551361531903,
+STORE, 140551360454656, 140551361531903,
+SNULL, 140551361253375, 140551361531903,
+STORE, 140551360454656, 140551361253375,
+STORE, 140551361253376, 140551361531903,
+SNULL, 140551361253376, 140551361519615,
+STORE, 140551361519616, 140551361531903,
+STORE, 140551361253376, 140551361519615,
+ERASE, 140551361253376, 140551361519615,
+ };
+
+ static const unsigned long set23[] = {
+STORE, 94014447943680, 94014448156671,
+STORE, 94014450253824, 94014450257919,
+STORE, 94014450257920, 94014450266111,
+STORE, 94014450266112, 94014450278399,
+STORE, 94014464225280, 94014464630783,
+STORE, 139761764306944, 139761765965823,
+STORE, 139761765965824, 139761768062975,
+STORE, 139761768062976, 139761768079359,
+STORE, 139761768079360, 139761768087551,
+STORE, 139761768087552, 139761768103935,
+STORE, 139761768103936, 139761768116223,
+STORE, 139761768116224, 139761770209279,
+STORE, 139761770209280, 139761770213375,
+STORE, 139761770213376, 139761770217471,
+STORE, 139761770217472, 139761770360831,
+STORE, 139761770729472, 139761772412927,
+STORE, 139761772412928, 139761772429311,
+STORE, 139761772457984, 139761772462079,
+STORE, 139761772462080, 139761772466175,
+STORE, 139761772466176, 139761772470271,
+STORE, 140724336517120, 140724336652287,
+STORE, 140724336955392, 140724336967679,
+STORE, 140724336967680, 140724336971775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140721840295936, 140737488351231,
+SNULL, 140721840300031, 140737488351231,
+STORE, 140721840295936, 140721840300031,
+STORE, 140721840164864, 140721840300031,
+STORE, 93937913667584, 93937915830271,
+SNULL, 93937913729023, 93937915830271,
+STORE, 93937913667584, 93937913729023,
+STORE, 93937913729024, 93937915830271,
+ERASE, 93937913729024, 93937915830271,
+STORE, 93937915822080, 93937915830271,
+STORE, 140598835335168, 140598837587967,
+SNULL, 140598835478527, 140598837587967,
+STORE, 140598835335168, 140598835478527,
+STORE, 140598835478528, 140598837587967,
+ERASE, 140598835478528, 140598837587967,
+STORE, 140598837575680, 140598837583871,
+STORE, 140598837583872, 140598837587967,
+STORE, 140721841086464, 140721841090559,
+STORE, 140721841074176, 140721841086463,
+STORE, 140598837547008, 140598837575679,
+STORE, 140598837538816, 140598837547007,
+STORE, 140598831538176, 140598835335167,
+SNULL, 140598831538176, 140598833197055,
+STORE, 140598833197056, 140598835335167,
+STORE, 140598831538176, 140598833197055,
+SNULL, 140598835294207, 140598835335167,
+STORE, 140598833197056, 140598835294207,
+STORE, 140598835294208, 140598835335167,
+SNULL, 140598835294208, 140598835318783,
+STORE, 140598835318784, 140598835335167,
+STORE, 140598835294208, 140598835318783,
+ERASE, 140598835294208, 140598835318783,
+STORE, 140598835294208, 140598835318783,
+ERASE, 140598835318784, 140598835335167,
+STORE, 140598835318784, 140598835335167,
+SNULL, 140598835310591, 140598835318783,
+STORE, 140598835294208, 140598835310591,
+STORE, 140598835310592, 140598835318783,
+SNULL, 93937915826175, 93937915830271,
+STORE, 93937915822080, 93937915826175,
+STORE, 93937915826176, 93937915830271,
+SNULL, 140598837579775, 140598837583871,
+STORE, 140598837575680, 140598837579775,
+STORE, 140598837579776, 140598837583871,
+ERASE, 140598837547008, 140598837575679,
+STORE, 93937929179136, 93937929314303,
+STORE, 140598835855360, 140598837538815,
+STORE, 140737488347136, 140737488351231,
+STORE, 140728187723776, 140737488351231,
+SNULL, 140728187727871, 140737488351231,
+STORE, 140728187723776, 140728187727871,
+STORE, 140728187592704, 140728187727871,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140583951437824, 140583953690623,
+SNULL, 140583951581183, 140583953690623,
+STORE, 140583951437824, 140583951581183,
+STORE, 140583951581184, 140583953690623,
+ERASE, 140583951581184, 140583953690623,
+STORE, 140583953678336, 140583953686527,
+STORE, 140583953686528, 140583953690623,
+STORE, 140728189116416, 140728189120511,
+STORE, 140728189104128, 140728189116415,
+STORE, 140583953649664, 140583953678335,
+STORE, 140583953641472, 140583953649663,
+STORE, 140583948275712, 140583951437823,
+SNULL, 140583948275712, 140583949336575,
+STORE, 140583949336576, 140583951437823,
+STORE, 140583948275712, 140583949336575,
+SNULL, 140583951429631, 140583951437823,
+STORE, 140583949336576, 140583951429631,
+STORE, 140583951429632, 140583951437823,
+ERASE, 140583951429632, 140583951437823,
+STORE, 140583951429632, 140583951437823,
+STORE, 140583944478720, 140583948275711,
+SNULL, 140583944478720, 140583946137599,
+STORE, 140583946137600, 140583948275711,
+STORE, 140583944478720, 140583946137599,
+SNULL, 140583948234751, 140583948275711,
+STORE, 140583946137600, 140583948234751,
+STORE, 140583948234752, 140583948275711,
+SNULL, 140583948234752, 140583948259327,
+STORE, 140583948259328, 140583948275711,
+STORE, 140583948234752, 140583948259327,
+ERASE, 140583948234752, 140583948259327,
+STORE, 140583948234752, 140583948259327,
+ERASE, 140583948259328, 140583948275711,
+STORE, 140583948259328, 140583948275711,
+STORE, 140583953629184, 140583953649663,
+SNULL, 140583948251135, 140583948259327,
+STORE, 140583948234752, 140583948251135,
+STORE, 140583948251136, 140583948259327,
+SNULL, 140583951433727, 140583951437823,
+STORE, 140583951429632, 140583951433727,
+STORE, 140583951433728, 140583951437823,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140583953682431, 140583953686527,
+STORE, 140583953678336, 140583953682431,
+STORE, 140583953682432, 140583953686527,
+ERASE, 140583953649664, 140583953678335,
+STORE, 17821696, 17956863,
+STORE, 17821696, 18104319,
+STORE, 140583951945728, 140583953629183,
+STORE, 94014447943680, 94014448156671,
+STORE, 94014450253824, 94014450257919,
+STORE, 94014450257920, 94014450266111,
+STORE, 94014450266112, 94014450278399,
+STORE, 94014464225280, 94014465196031,
+STORE, 139761764306944, 139761765965823,
+STORE, 139761765965824, 139761768062975,
+STORE, 139761768062976, 139761768079359,
+STORE, 139761768079360, 139761768087551,
+STORE, 139761768087552, 139761768103935,
+STORE, 139761768103936, 139761768116223,
+STORE, 139761768116224, 139761770209279,
+STORE, 139761770209280, 139761770213375,
+STORE, 139761770213376, 139761770217471,
+STORE, 139761770217472, 139761770360831,
+STORE, 139761770729472, 139761772412927,
+STORE, 139761772412928, 139761772429311,
+STORE, 139761772457984, 139761772462079,
+STORE, 139761772462080, 139761772466175,
+STORE, 139761772466176, 139761772470271,
+STORE, 140724336517120, 140724336652287,
+STORE, 140724336955392, 140724336967679,
+STORE, 140724336967680, 140724336971775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726063296512, 140737488351231,
+SNULL, 140726063300607, 140737488351231,
+STORE, 140726063296512, 140726063300607,
+STORE, 140726063165440, 140726063300607,
+STORE, 94016795934720, 94016798158847,
+SNULL, 94016796045311, 94016798158847,
+STORE, 94016795934720, 94016796045311,
+STORE, 94016796045312, 94016798158847,
+ERASE, 94016796045312, 94016798158847,
+STORE, 94016798138368, 94016798150655,
+STORE, 94016798150656, 94016798158847,
+STORE, 139975915966464, 139975918219263,
+SNULL, 139975916109823, 139975918219263,
+STORE, 139975915966464, 139975916109823,
+STORE, 139975916109824, 139975918219263,
+ERASE, 139975916109824, 139975918219263,
+STORE, 139975918206976, 139975918215167,
+STORE, 139975918215168, 139975918219263,
+STORE, 140726064541696, 140726064545791,
+STORE, 140726064529408, 140726064541695,
+STORE, 139975918178304, 139975918206975,
+STORE, 139975918170112, 139975918178303,
+STORE, 139975912169472, 139975915966463,
+SNULL, 139975912169472, 139975913828351,
+STORE, 139975913828352, 139975915966463,
+STORE, 139975912169472, 139975913828351,
+SNULL, 139975915925503, 139975915966463,
+STORE, 139975913828352, 139975915925503,
+STORE, 139975915925504, 139975915966463,
+SNULL, 139975915925504, 139975915950079,
+STORE, 139975915950080, 139975915966463,
+STORE, 139975915925504, 139975915950079,
+ERASE, 139975915925504, 139975915950079,
+STORE, 139975915925504, 139975915950079,
+ERASE, 139975915950080, 139975915966463,
+STORE, 139975915950080, 139975915966463,
+SNULL, 139975915941887, 139975915950079,
+STORE, 139975915925504, 139975915941887,
+STORE, 139975915941888, 139975915950079,
+SNULL, 94016798146559, 94016798150655,
+STORE, 94016798138368, 94016798146559,
+STORE, 94016798146560, 94016798150655,
+SNULL, 139975918211071, 139975918215167,
+STORE, 139975918206976, 139975918211071,
+STORE, 139975918211072, 139975918215167,
+ERASE, 139975918178304, 139975918206975,
+STORE, 94016804925440, 94016805060607,
+STORE, 94596177661952, 94596177772543,
+STORE, 94596179865600, 94596179873791,
+STORE, 94596179873792, 94596179877887,
+STORE, 94596179877888, 94596179886079,
+STORE, 94596211597312, 94596211863551,
+STORE, 140127351840768, 140127353499647,
+STORE, 140127353499648, 140127355596799,
+STORE, 140127355596800, 140127355613183,
+STORE, 140127355613184, 140127355621375,
+STORE, 140127355621376, 140127355637759,
+STORE, 140127355637760, 140127355781119,
+STORE, 140127357841408, 140127357849599,
+STORE, 140127357878272, 140127357882367,
+STORE, 140127357882368, 140127357886463,
+STORE, 140127357886464, 140127357890559,
+STORE, 140726167252992, 140726167392255,
+STORE, 140726167838720, 140726167851007,
+STORE, 140726167851008, 140726167855103,
+STORE, 140737488347136, 140737488351231,
+STORE, 140731874017280, 140737488351231,
+SNULL, 140731874021375, 140737488351231,
+STORE, 140731874017280, 140731874021375,
+STORE, 140731873886208, 140731874021375,
+STORE, 94178682265600, 94178684489727,
+SNULL, 94178682376191, 94178684489727,
+STORE, 94178682265600, 94178682376191,
+STORE, 94178682376192, 94178684489727,
+ERASE, 94178682376192, 94178684489727,
+STORE, 94178684469248, 94178684481535,
+STORE, 94178684481536, 94178684489727,
+STORE, 140460853403648, 140460855656447,
+SNULL, 140460853547007, 140460855656447,
+STORE, 140460853403648, 140460853547007,
+STORE, 140460853547008, 140460855656447,
+ERASE, 140460853547008, 140460855656447,
+STORE, 140460855644160, 140460855652351,
+STORE, 140460855652352, 140460855656447,
+STORE, 140731874103296, 140731874107391,
+STORE, 140731874091008, 140731874103295,
+STORE, 140460855615488, 140460855644159,
+STORE, 140460855607296, 140460855615487,
+STORE, 140460849606656, 140460853403647,
+SNULL, 140460849606656, 140460851265535,
+STORE, 140460851265536, 140460853403647,
+STORE, 140460849606656, 140460851265535,
+SNULL, 140460853362687, 140460853403647,
+STORE, 140460851265536, 140460853362687,
+STORE, 140460853362688, 140460853403647,
+SNULL, 140460853362688, 140460853387263,
+STORE, 140460853387264, 140460853403647,
+STORE, 140460853362688, 140460853387263,
+ERASE, 140460853362688, 140460853387263,
+STORE, 140460853362688, 140460853387263,
+ERASE, 140460853387264, 140460853403647,
+STORE, 140460853387264, 140460853403647,
+SNULL, 140460853379071, 140460853387263,
+STORE, 140460853362688, 140460853379071,
+STORE, 140460853379072, 140460853387263,
+SNULL, 94178684477439, 94178684481535,
+STORE, 94178684469248, 94178684477439,
+STORE, 94178684477440, 94178684481535,
+SNULL, 140460855648255, 140460855652351,
+STORE, 140460855644160, 140460855648255,
+STORE, 140460855648256, 140460855652351,
+ERASE, 140460855615488, 140460855644159,
+STORE, 94178692063232, 94178692198399,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733096603648, 140737488351231,
+SNULL, 140733096611839, 140737488351231,
+STORE, 140733096603648, 140733096611839,
+STORE, 140733096472576, 140733096611839,
+STORE, 94796716122112, 94796718325759,
+SNULL, 94796716224511, 94796718325759,
+STORE, 94796716122112, 94796716224511,
+STORE, 94796716224512, 94796718325759,
+ERASE, 94796716224512, 94796718325759,
+STORE, 94796718317568, 94796718325759,
+STORE, 139667892793344, 139667895046143,
+SNULL, 139667892936703, 139667895046143,
+STORE, 139667892793344, 139667892936703,
+STORE, 139667892936704, 139667895046143,
+ERASE, 139667892936704, 139667895046143,
+STORE, 139667895033856, 139667895042047,
+STORE, 139667895042048, 139667895046143,
+STORE, 140733096857600, 140733096861695,
+STORE, 140733096845312, 140733096857599,
+STORE, 139667895005184, 139667895033855,
+STORE, 139667894996992, 139667895005183,
+STORE, 139667890532352, 139667892793343,
+SNULL, 139667890532352, 139667890683903,
+STORE, 139667890683904, 139667892793343,
+STORE, 139667890532352, 139667890683903,
+SNULL, 139667892776959, 139667892793343,
+STORE, 139667890683904, 139667892776959,
+STORE, 139667892776960, 139667892793343,
+SNULL, 139667892776960, 139667892785151,
+STORE, 139667892785152, 139667892793343,
+STORE, 139667892776960, 139667892785151,
+ERASE, 139667892776960, 139667892785151,
+STORE, 139667892776960, 139667892785151,
+ERASE, 139667892785152, 139667892793343,
+STORE, 139667892785152, 139667892793343,
+STORE, 139667886735360, 139667890532351,
+SNULL, 139667886735360, 139667888394239,
+STORE, 139667888394240, 139667890532351,
+STORE, 139667886735360, 139667888394239,
+SNULL, 139667890491391, 139667890532351,
+STORE, 139667888394240, 139667890491391,
+STORE, 139667890491392, 139667890532351,
+SNULL, 139667890491392, 139667890515967,
+STORE, 139667890515968, 139667890532351,
+STORE, 139667890491392, 139667890515967,
+ERASE, 139667890491392, 139667890515967,
+STORE, 139667890491392, 139667890515967,
+ERASE, 139667890515968, 139667890532351,
+STORE, 139667890515968, 139667890532351,
+STORE, 139667884167168, 139667886735359,
+SNULL, 139667884167168, 139667884634111,
+STORE, 139667884634112, 139667886735359,
+STORE, 139667884167168, 139667884634111,
+SNULL, 139667886727167, 139667886735359,
+STORE, 139667884634112, 139667886727167,
+STORE, 139667886727168, 139667886735359,
+ERASE, 139667886727168, 139667886735359,
+STORE, 139667886727168, 139667886735359,
+STORE, 139667882053632, 139667884167167,
+SNULL, 139667882053632, 139667882065919,
+STORE, 139667882065920, 139667884167167,
+STORE, 139667882053632, 139667882065919,
+SNULL, 139667884158975, 139667884167167,
+STORE, 139667882065920, 139667884158975,
+STORE, 139667884158976, 139667884167167,
+ERASE, 139667884158976, 139667884167167,
+STORE, 139667884158976, 139667884167167,
+STORE, 139667879837696, 139667882053631,
+SNULL, 139667879837696, 139667879935999,
+STORE, 139667879936000, 139667882053631,
+STORE, 139667879837696, 139667879935999,
+SNULL, 139667882029055, 139667882053631,
+STORE, 139667879936000, 139667882029055,
+STORE, 139667882029056, 139667882053631,
+SNULL, 139667882029056, 139667882037247,
+STORE, 139667882037248, 139667882053631,
+STORE, 139667882029056, 139667882037247,
+ERASE, 139667882029056, 139667882037247,
+STORE, 139667882029056, 139667882037247,
+ERASE, 139667882037248, 139667882053631,
+STORE, 139667882037248, 139667882053631,
+STORE, 139667894988800, 139667895005183,
+SNULL, 139667890507775, 139667890515967,
+STORE, 139667890491392, 139667890507775,
+STORE, 139667890507776, 139667890515967,
+SNULL, 139667882033151, 139667882037247,
+STORE, 139667882029056, 139667882033151,
+STORE, 139667882033152, 139667882037247,
+SNULL, 139667884163071, 139667884167167,
+STORE, 139667884158976, 139667884163071,
+STORE, 139667884163072, 139667884167167,
+SNULL, 139667886731263, 139667886735359,
+STORE, 139667886727168, 139667886731263,
+STORE, 139667886731264, 139667886735359,
+SNULL, 139667892781055, 139667892785151,
+STORE, 139667892776960, 139667892781055,
+STORE, 139667892781056, 139667892785151,
+SNULL, 94796718321663, 94796718325759,
+STORE, 94796718317568, 94796718321663,
+STORE, 94796718321664, 94796718325759,
+SNULL, 139667895037951, 139667895042047,
+STORE, 139667895033856, 139667895037951,
+STORE, 139667895037952, 139667895042047,
+ERASE, 139667895005184, 139667895033855,
+STORE, 94796726063104, 94796726198271,
+STORE, 139667893305344, 139667894988799,
+STORE, 139667895005184, 139667895033855,
+STORE, 94796726063104, 94796726333439,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722489507840, 140737488351231,
+SNULL, 140722489516031, 140737488351231,
+STORE, 140722489507840, 140722489516031,
+STORE, 140722489376768, 140722489516031,
+STORE, 93980993265664, 93980995489791,
+SNULL, 93980993376255, 93980995489791,
+STORE, 93980993265664, 93980993376255,
+STORE, 93980993376256, 93980995489791,
+ERASE, 93980993376256, 93980995489791,
+STORE, 93980995469312, 93980995481599,
+STORE, 93980995481600, 93980995489791,
+STORE, 140261313593344, 140261315846143,
+SNULL, 140261313736703, 140261315846143,
+STORE, 140261313593344, 140261313736703,
+STORE, 140261313736704, 140261315846143,
+ERASE, 140261313736704, 140261315846143,
+STORE, 140261315833856, 140261315842047,
+STORE, 140261315842048, 140261315846143,
+STORE, 140722489675776, 140722489679871,
+STORE, 140722489663488, 140722489675775,
+STORE, 140261315805184, 140261315833855,
+STORE, 140261315796992, 140261315805183,
+STORE, 140261309796352, 140261313593343,
+SNULL, 140261309796352, 140261311455231,
+STORE, 140261311455232, 140261313593343,
+STORE, 140261309796352, 140261311455231,
+SNULL, 140261313552383, 140261313593343,
+STORE, 140261311455232, 140261313552383,
+STORE, 140261313552384, 140261313593343,
+SNULL, 140261313552384, 140261313576959,
+STORE, 140261313576960, 140261313593343,
+STORE, 140261313552384, 140261313576959,
+ERASE, 140261313552384, 140261313576959,
+STORE, 140261313552384, 140261313576959,
+ERASE, 140261313576960, 140261313593343,
+STORE, 140261313576960, 140261313593343,
+SNULL, 140261313568767, 140261313576959,
+STORE, 140261313552384, 140261313568767,
+STORE, 140261313568768, 140261313576959,
+SNULL, 93980995477503, 93980995481599,
+STORE, 93980995469312, 93980995477503,
+STORE, 93980995477504, 93980995481599,
+SNULL, 140261315837951, 140261315842047,
+STORE, 140261315833856, 140261315837951,
+STORE, 140261315837952, 140261315842047,
+ERASE, 140261315805184, 140261315833855,
+STORE, 93980997443584, 93980997578751,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140737488338944, 140737488351231,
+STORE, 140734059450368, 140737488351231,
+SNULL, 140734059462655, 140737488351231,
+STORE, 140734059450368, 140734059462655,
+STORE, 140734059319296, 140734059462655,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140307554983936, 140307557236735,
+SNULL, 140307555127295, 140307557236735,
+STORE, 140307554983936, 140307555127295,
+STORE, 140307555127296, 140307557236735,
+ERASE, 140307555127296, 140307557236735,
+STORE, 140307557224448, 140307557232639,
+STORE, 140307557232640, 140307557236735,
+STORE, 140734059483136, 140734059487231,
+STORE, 140734059470848, 140734059483135,
+STORE, 140307557195776, 140307557224447,
+STORE, 140307557187584, 140307557195775,
+STORE, 140307551821824, 140307554983935,
+SNULL, 140307551821824, 140307552882687,
+STORE, 140307552882688, 140307554983935,
+STORE, 140307551821824, 140307552882687,
+SNULL, 140307554975743, 140307554983935,
+STORE, 140307552882688, 140307554975743,
+STORE, 140307554975744, 140307554983935,
+ERASE, 140307554975744, 140307554983935,
+STORE, 140307554975744, 140307554983935,
+STORE, 140307548024832, 140307551821823,
+SNULL, 140307548024832, 140307549683711,
+STORE, 140307549683712, 140307551821823,
+STORE, 140307548024832, 140307549683711,
+SNULL, 140307551780863, 140307551821823,
+STORE, 140307549683712, 140307551780863,
+STORE, 140307551780864, 140307551821823,
+SNULL, 140307551780864, 140307551805439,
+STORE, 140307551805440, 140307551821823,
+STORE, 140307551780864, 140307551805439,
+ERASE, 140307551780864, 140307551805439,
+STORE, 140307551780864, 140307551805439,
+ERASE, 140307551805440, 140307551821823,
+STORE, 140307551805440, 140307551821823,
+STORE, 140307557175296, 140307557195775,
+SNULL, 140307551797247, 140307551805439,
+STORE, 140307551780864, 140307551797247,
+STORE, 140307551797248, 140307551805439,
+SNULL, 140307554979839, 140307554983935,
+STORE, 140307554975744, 140307554979839,
+STORE, 140307554979840, 140307554983935,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140307557228543, 140307557232639,
+STORE, 140307557224448, 140307557228543,
+STORE, 140307557228544, 140307557232639,
+ERASE, 140307557195776, 140307557224447,
+STORE, 39698432, 39833599,
+STORE, 39698432, 39981055,
+STORE, 94306485321728, 94306485432319,
+STORE, 94306487525376, 94306487533567,
+STORE, 94306487533568, 94306487537663,
+STORE, 94306487537664, 94306487545855,
+STORE, 94306488868864, 94306489004031,
+STORE, 140497673998336, 140497675657215,
+STORE, 140497675657216, 140497677754367,
+STORE, 140497677754368, 140497677770751,
+STORE, 140497677770752, 140497677778943,
+STORE, 140497677778944, 140497677795327,
+STORE, 140497677795328, 140497677938687,
+STORE, 140497679998976, 140497680007167,
+STORE, 140497680035840, 140497680039935,
+STORE, 140497680039936, 140497680044031,
+STORE, 140497680044032, 140497680048127,
+STORE, 140732780462080, 140732780601343,
+STORE, 140732782239744, 140732782252031,
+STORE, 140732782252032, 140732782256127,
+STORE, 94236915900416, 94236916011007,
+STORE, 94236918104064, 94236918112255,
+STORE, 94236918112256, 94236918116351,
+STORE, 94236918116352, 94236918124543,
+STORE, 94236939489280, 94236939624447,
+STORE, 140046091743232, 140046093402111,
+STORE, 140046093402112, 140046095499263,
+STORE, 140046095499264, 140046095515647,
+STORE, 140046095515648, 140046095523839,
+STORE, 140046095523840, 140046095540223,
+STORE, 140046095540224, 140046095683583,
+STORE, 140046097743872, 140046097752063,
+STORE, 140046097780736, 140046097784831,
+STORE, 140046097784832, 140046097788927,
+STORE, 140046097788928, 140046097793023,
+STORE, 140726694449152, 140726694588415,
+STORE, 140726695313408, 140726695325695,
+STORE, 140726695325696, 140726695329791,
+STORE, 94894582779904, 94894582992895,
+STORE, 94894585090048, 94894585094143,
+STORE, 94894585094144, 94894585102335,
+STORE, 94894585102336, 94894585114623,
+STORE, 94894592868352, 94894594293759,
+STORE, 139733563842560, 139733565501439,
+STORE, 139733565501440, 139733567598591,
+STORE, 139733567598592, 139733567614975,
+STORE, 139733567614976, 139733567623167,
+STORE, 139733567623168, 139733567639551,
+STORE, 139733567639552, 139733567651839,
+STORE, 139733567651840, 139733569744895,
+STORE, 139733569744896, 139733569748991,
+STORE, 139733569748992, 139733569753087,
+STORE, 139733569753088, 139733569896447,
+STORE, 139733570265088, 139733571948543,
+STORE, 139733571948544, 139733571964927,
+STORE, 139733571993600, 139733571997695,
+STORE, 139733571997696, 139733572001791,
+STORE, 139733572001792, 139733572005887,
+STORE, 140726369255424, 140726369394687,
+STORE, 140726370402304, 140726370414591,
+STORE, 140726370414592, 140726370418687,
+STORE, 94899236483072, 94899236696063,
+STORE, 94899238793216, 94899238797311,
+STORE, 94899238797312, 94899238805503,
+STORE, 94899238805504, 94899238817791,
+STORE, 94899263045632, 94899263979519,
+STORE, 140040959893504, 140040961552383,
+STORE, 140040961552384, 140040963649535,
+STORE, 140040963649536, 140040963665919,
+STORE, 140040963665920, 140040963674111,
+STORE, 140040963674112, 140040963690495,
+STORE, 140040963690496, 140040963702783,
+STORE, 140040963702784, 140040965795839,
+STORE, 140040965795840, 140040965799935,
+STORE, 140040965799936, 140040965804031,
+STORE, 140040965804032, 140040965947391,
+STORE, 140040966316032, 140040967999487,
+STORE, 140040967999488, 140040968015871,
+STORE, 140040968044544, 140040968048639,
+STORE, 140040968048640, 140040968052735,
+STORE, 140040968052736, 140040968056831,
+STORE, 140729921359872, 140729921499135,
+STORE, 140729921613824, 140729921626111,
+STORE, 140729921626112, 140729921630207,
+STORE, 94818265190400, 94818265403391,
+STORE, 94818267500544, 94818267504639,
+STORE, 94818267504640, 94818267512831,
+STORE, 94818267512832, 94818267525119,
+STORE, 94818283372544, 94818285858815,
+STORE, 139818425675776, 139818427334655,
+STORE, 139818427334656, 139818429431807,
+STORE, 139818429431808, 139818429448191,
+STORE, 139818429448192, 139818429456383,
+STORE, 139818429456384, 139818429472767,
+STORE, 139818429472768, 139818429485055,
+STORE, 139818429485056, 139818431578111,
+STORE, 139818431578112, 139818431582207,
+STORE, 139818431582208, 139818431586303,
+STORE, 139818431586304, 139818431729663,
+STORE, 139818432098304, 139818433781759,
+STORE, 139818433781760, 139818433798143,
+STORE, 139818433826816, 139818433830911,
+STORE, 139818433830912, 139818433835007,
+STORE, 139818433835008, 139818433839103,
+STORE, 140726170509312, 140726170648575,
+STORE, 140726171824128, 140726171836415,
+STORE, 140726171836416, 140726171840511,
+STORE, 94611513188352, 94611513401343,
+STORE, 94611515498496, 94611515502591,
+STORE, 94611515502592, 94611515510783,
+STORE, 94611515510784, 94611515523071,
+STORE, 94611516502016, 94611516907519,
+STORE, 140596246388736, 140596248047615,
+STORE, 140596248047616, 140596250144767,
+STORE, 140596250144768, 140596250161151,
+STORE, 140596250161152, 140596250169343,
+STORE, 140596250169344, 140596250185727,
+STORE, 140596250185728, 140596250198015,
+STORE, 140596250198016, 140596252291071,
+STORE, 140596252291072, 140596252295167,
+STORE, 140596252295168, 140596252299263,
+STORE, 140596252299264, 140596252442623,
+STORE, 140596252811264, 140596254494719,
+STORE, 140596254494720, 140596254511103,
+STORE, 140596254539776, 140596254543871,
+STORE, 140596254543872, 140596254547967,
+STORE, 140596254547968, 140596254552063,
+STORE, 140731551338496, 140731551477759,
+STORE, 140731551780864, 140731551793151,
+STORE, 140731551793152, 140731551797247,
+STORE, 94313835851776, 94313836064767,
+STORE, 94313838161920, 94313838166015,
+STORE, 94313838166016, 94313838174207,
+STORE, 94313838174208, 94313838186495,
+STORE, 94313858416640, 94313861906431,
+STORE, 140693503918080, 140693505576959,
+STORE, 140693505576960, 140693507674111,
+STORE, 140693507674112, 140693507690495,
+STORE, 140693507690496, 140693507698687,
+STORE, 140693507698688, 140693507715071,
+STORE, 140693507715072, 140693507727359,
+STORE, 140693507727360, 140693509820415,
+STORE, 140693509820416, 140693509824511,
+STORE, 140693509824512, 140693509828607,
+STORE, 140693509828608, 140693509971967,
+STORE, 140693510340608, 140693512024063,
+STORE, 140693512024064, 140693512040447,
+STORE, 140693512069120, 140693512073215,
+STORE, 140693512073216, 140693512077311,
+STORE, 140693512077312, 140693512081407,
+STORE, 140721116065792, 140721116205055,
+STORE, 140721117831168, 140721117843455,
+STORE, 140721117843456, 140721117847551,
+STORE, 94843650150400, 94843650363391,
+STORE, 94843652460544, 94843652464639,
+STORE, 94843652464640, 94843652472831,
+STORE, 94843652472832, 94843652485119,
+STORE, 94843685388288, 94843686281215,
+STORE, 140484193681408, 140484195340287,
+STORE, 140484195340288, 140484197437439,
+STORE, 140484197437440, 140484197453823,
+STORE, 140484197453824, 140484197462015,
+STORE, 140484197462016, 140484197478399,
+STORE, 140484197478400, 140484197490687,
+STORE, 140484197490688, 140484199583743,
+STORE, 140484199583744, 140484199587839,
+STORE, 140484199587840, 140484199591935,
+STORE, 140484199591936, 140484199735295,
+STORE, 140484200103936, 140484201787391,
+STORE, 140484201787392, 140484201803775,
+STORE, 140484201832448, 140484201836543,
+STORE, 140484201836544, 140484201840639,
+STORE, 140484201840640, 140484201844735,
+STORE, 140726294315008, 140726294454271,
+STORE, 140726295646208, 140726295658495,
+STORE, 140726295658496, 140726295662591,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140720422371328, 140737488351231,
+SNULL, 140720422379519, 140737488351231,
+STORE, 140720422371328, 140720422379519,
+STORE, 140720422240256, 140720422379519,
+STORE, 94417967845376, 94417970180095,
+SNULL, 94417968058367, 94417970180095,
+STORE, 94417967845376, 94417968058367,
+STORE, 94417968058368, 94417970180095,
+ERASE, 94417968058368, 94417970180095,
+STORE, 94417970155520, 94417970167807,
+STORE, 94417970167808, 94417970180095,
+STORE, 140252450045952, 140252452298751,
+SNULL, 140252450189311, 140252452298751,
+STORE, 140252450045952, 140252450189311,
+STORE, 140252450189312, 140252452298751,
+ERASE, 140252450189312, 140252452298751,
+STORE, 140252452286464, 140252452294655,
+STORE, 140252452294656, 140252452298751,
+STORE, 140720422416384, 140720422420479,
+STORE, 140720422404096, 140720422416383,
+STORE, 140252452257792, 140252452286463,
+STORE, 140252452249600, 140252452257791,
+STORE, 140252447932416, 140252450045951,
+SNULL, 140252447932416, 140252447944703,
+STORE, 140252447944704, 140252450045951,
+STORE, 140252447932416, 140252447944703,
+SNULL, 140252450037759, 140252450045951,
+STORE, 140252447944704, 140252450037759,
+STORE, 140252450037760, 140252450045951,
+ERASE, 140252450037760, 140252450045951,
+STORE, 140252450037760, 140252450045951,
+STORE, 140252444135424, 140252447932415,
+SNULL, 140252444135424, 140252445794303,
+STORE, 140252445794304, 140252447932415,
+STORE, 140252444135424, 140252445794303,
+SNULL, 140252447891455, 140252447932415,
+STORE, 140252445794304, 140252447891455,
+STORE, 140252447891456, 140252447932415,
+SNULL, 140252447891456, 140252447916031,
+STORE, 140252447916032, 140252447932415,
+STORE, 140252447891456, 140252447916031,
+ERASE, 140252447891456, 140252447916031,
+STORE, 140252447891456, 140252447916031,
+ERASE, 140252447916032, 140252447932415,
+STORE, 140252447916032, 140252447932415,
+STORE, 140252452241408, 140252452257791,
+SNULL, 140252447907839, 140252447916031,
+STORE, 140252447891456, 140252447907839,
+STORE, 140252447907840, 140252447916031,
+SNULL, 140252450041855, 140252450045951,
+STORE, 140252450037760, 140252450041855,
+STORE, 140252450041856, 140252450045951,
+SNULL, 94417970159615, 94417970167807,
+STORE, 94417970155520, 94417970159615,
+STORE, 94417970159616, 94417970167807,
+SNULL, 140252452290559, 140252452294655,
+STORE, 140252452286464, 140252452290559,
+STORE, 140252452290560, 140252452294655,
+ERASE, 140252452257792, 140252452286463,
+STORE, 94417996333056, 94417996468223,
+STORE, 140252450557952, 140252452241407,
+STORE, 94417996333056, 94417996603391,
+STORE, 94417996333056, 94417996738559,
+STORE, 94417996333056, 94417996910591,
+SNULL, 94417996881919, 94417996910591,
+STORE, 94417996333056, 94417996881919,
+STORE, 94417996881920, 94417996910591,
+ERASE, 94417996881920, 94417996910591,
+STORE, 94417996333056, 94417997017087,
+STORE, 94417996333056, 94417997152255,
+SNULL, 94417997135871, 94417997152255,
+STORE, 94417996333056, 94417997135871,
+STORE, 94417997135872, 94417997152255,
+ERASE, 94417997135872, 94417997152255,
+STORE, 94417996333056, 94417997291519,
+SNULL, 94417997271039, 94417997291519,
+STORE, 94417996333056, 94417997271039,
+STORE, 94417997271040, 94417997291519,
+ERASE, 94417997271040, 94417997291519,
+STORE, 94417996333056, 94417997406207,
+SNULL, 94417997381631, 94417997406207,
+STORE, 94417996333056, 94417997381631,
+STORE, 94417997381632, 94417997406207,
+ERASE, 94417997381632, 94417997406207,
+STORE, 94417996333056, 94417997516799,
+SNULL, 94417997488127, 94417997516799,
+STORE, 94417996333056, 94417997488127,
+STORE, 94417997488128, 94417997516799,
+ERASE, 94417997488128, 94417997516799,
+STORE, 94417996333056, 94417997643775,
+SNULL, 94417997631487, 94417997643775,
+STORE, 94417996333056, 94417997631487,
+STORE, 94417997631488, 94417997643775,
+ERASE, 94417997631488, 94417997643775,
+SNULL, 94417997590527, 94417997631487,
+STORE, 94417996333056, 94417997590527,
+STORE, 94417997590528, 94417997631487,
+ERASE, 94417997590528, 94417997631487,
+STORE, 94417996333056, 94417997733887,
+STORE, 94417996333056, 94417997869055,
+STORE, 94417996333056, 94417998004223,
+SNULL, 94417998000127, 94417998004223,
+STORE, 94417996333056, 94417998000127,
+STORE, 94417998000128, 94417998004223,
+ERASE, 94417998000128, 94417998004223,
+STORE, 94049170993152, 94049171206143,
+STORE, 94049173303296, 94049173307391,
+STORE, 94049173307392, 94049173315583,
+STORE, 94049173315584, 94049173327871,
+STORE, 94049176236032, 94049183645695,
+STORE, 139807795544064, 139807797202943,
+STORE, 139807797202944, 139807799300095,
+STORE, 139807799300096, 139807799316479,
+STORE, 139807799316480, 139807799324671,
+STORE, 139807799324672, 139807799341055,
+STORE, 139807799341056, 139807799353343,
+STORE, 139807799353344, 139807801446399,
+STORE, 139807801446400, 139807801450495,
+STORE, 139807801450496, 139807801454591,
+STORE, 139807801454592, 139807801597951,
+STORE, 139807801966592, 139807803650047,
+STORE, 139807803650048, 139807803666431,
+STORE, 139807803695104, 139807803699199,
+STORE, 139807803699200, 139807803703295,
+STORE, 139807803703296, 139807803707391,
+STORE, 140727555538944, 140727555678207,
+STORE, 140727555940352, 140727555952639,
+STORE, 140727555952640, 140727555956735,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722483441664, 140737488351231,
+SNULL, 140722483449855, 140737488351231,
+STORE, 140722483441664, 140722483449855,
+STORE, 140722483310592, 140722483449855,
+STORE, 94416704921600, 94416707145727,
+SNULL, 94416705032191, 94416707145727,
+STORE, 94416704921600, 94416705032191,
+STORE, 94416705032192, 94416707145727,
+ERASE, 94416705032192, 94416707145727,
+STORE, 94416707125248, 94416707137535,
+STORE, 94416707137536, 94416707145727,
+STORE, 140555439296512, 140555441549311,
+SNULL, 140555439439871, 140555441549311,
+STORE, 140555439296512, 140555439439871,
+STORE, 140555439439872, 140555441549311,
+ERASE, 140555439439872, 140555441549311,
+STORE, 140555441537024, 140555441545215,
+STORE, 140555441545216, 140555441549311,
+STORE, 140722484781056, 140722484785151,
+STORE, 140722484768768, 140722484781055,
+STORE, 140555441508352, 140555441537023,
+STORE, 140555441500160, 140555441508351,
+STORE, 140555435499520, 140555439296511,
+SNULL, 140555435499520, 140555437158399,
+STORE, 140555437158400, 140555439296511,
+STORE, 140555435499520, 140555437158399,
+SNULL, 140555439255551, 140555439296511,
+STORE, 140555437158400, 140555439255551,
+STORE, 140555439255552, 140555439296511,
+SNULL, 140555439255552, 140555439280127,
+STORE, 140555439280128, 140555439296511,
+STORE, 140555439255552, 140555439280127,
+ERASE, 140555439255552, 140555439280127,
+STORE, 140555439255552, 140555439280127,
+ERASE, 140555439280128, 140555439296511,
+STORE, 140555439280128, 140555439296511,
+SNULL, 140555439271935, 140555439280127,
+STORE, 140555439255552, 140555439271935,
+STORE, 140555439271936, 140555439280127,
+SNULL, 94416707133439, 94416707137535,
+STORE, 94416707125248, 94416707133439,
+STORE, 94416707133440, 94416707137535,
+SNULL, 140555441541119, 140555441545215,
+STORE, 140555441537024, 140555441541119,
+STORE, 140555441541120, 140555441545215,
+ERASE, 140555441508352, 140555441537023,
+STORE, 94416724672512, 94416724807679,
+STORE, 94686636953600, 94686637166591,
+STORE, 94686639263744, 94686639267839,
+STORE, 94686639267840, 94686639276031,
+STORE, 94686639276032, 94686639288319,
+STORE, 94686662193152, 94686663163903,
+STORE, 140312944431104, 140312946089983,
+STORE, 140312946089984, 140312948187135,
+STORE, 140312948187136, 140312948203519,
+STORE, 140312948203520, 140312948211711,
+STORE, 140312948211712, 140312948228095,
+STORE, 140312948228096, 140312948240383,
+STORE, 140312948240384, 140312950333439,
+STORE, 140312950333440, 140312950337535,
+STORE, 140312950337536, 140312950341631,
+STORE, 140312950341632, 140312950484991,
+STORE, 140312950853632, 140312952537087,
+STORE, 140312952537088, 140312952553471,
+STORE, 140312952582144, 140312952586239,
+STORE, 140312952586240, 140312952590335,
+STORE, 140312952590336, 140312952594431,
+STORE, 140730598920192, 140730599059455,
+STORE, 140730599108608, 140730599120895,
+STORE, 140730599120896, 140730599124991,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140726234079232, 140737488351231,
+SNULL, 140726234087423, 140737488351231,
+STORE, 140726234079232, 140726234087423,
+STORE, 140726233948160, 140726234087423,
+STORE, 94589467578368, 94589469802495,
+SNULL, 94589467688959, 94589469802495,
+STORE, 94589467578368, 94589467688959,
+STORE, 94589467688960, 94589469802495,
+ERASE, 94589467688960, 94589469802495,
+STORE, 94589469782016, 94589469794303,
+STORE, 94589469794304, 94589469802495,
+STORE, 140587082842112, 140587085094911,
+SNULL, 140587082985471, 140587085094911,
+STORE, 140587082842112, 140587082985471,
+STORE, 140587082985472, 140587085094911,
+ERASE, 140587082985472, 140587085094911,
+STORE, 140587085082624, 140587085090815,
+STORE, 140587085090816, 140587085094911,
+STORE, 140726234103808, 140726234107903,
+STORE, 140726234091520, 140726234103807,
+STORE, 140587085053952, 140587085082623,
+STORE, 140587085045760, 140587085053951,
+STORE, 140587079045120, 140587082842111,
+SNULL, 140587079045120, 140587080703999,
+STORE, 140587080704000, 140587082842111,
+STORE, 140587079045120, 140587080703999,
+SNULL, 140587082801151, 140587082842111,
+STORE, 140587080704000, 140587082801151,
+STORE, 140587082801152, 140587082842111,
+SNULL, 140587082801152, 140587082825727,
+STORE, 140587082825728, 140587082842111,
+STORE, 140587082801152, 140587082825727,
+ERASE, 140587082801152, 140587082825727,
+STORE, 140587082801152, 140587082825727,
+ERASE, 140587082825728, 140587082842111,
+STORE, 140587082825728, 140587082842111,
+SNULL, 140587082817535, 140587082825727,
+STORE, 140587082801152, 140587082817535,
+STORE, 140587082817536, 140587082825727,
+SNULL, 94589469790207, 94589469794303,
+STORE, 94589469782016, 94589469790207,
+STORE, 94589469790208, 94589469794303,
+SNULL, 140587085086719, 140587085090815,
+STORE, 140587085082624, 140587085086719,
+STORE, 140587085086720, 140587085090815,
+ERASE, 140587085053952, 140587085082623,
+STORE, 94589477507072, 94589477642239,
+STORE, 94225448325120, 94225448538111,
+STORE, 94225450635264, 94225450639359,
+STORE, 94225450639360, 94225450647551,
+STORE, 94225450647552, 94225450659839,
+STORE, 94225470246912, 94225473548287,
+STORE, 140199245496320, 140199247155199,
+STORE, 140199247155200, 140199249252351,
+STORE, 140199249252352, 140199249268735,
+STORE, 140199249268736, 140199249276927,
+STORE, 140199249276928, 140199249293311,
+STORE, 140199249293312, 140199249305599,
+STORE, 140199249305600, 140199251398655,
+STORE, 140199251398656, 140199251402751,
+STORE, 140199251402752, 140199251406847,
+STORE, 140199251406848, 140199251550207,
+STORE, 140199251918848, 140199253602303,
+STORE, 140199253602304, 140199253618687,
+STORE, 140199253647360, 140199253651455,
+STORE, 140199253651456, 140199253655551,
+STORE, 140199253655552, 140199253659647,
+STORE, 140726264414208, 140726264553471,
+STORE, 140726265843712, 140726265855999,
+STORE, 140726265856000, 140726265860095,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733508358144, 140737488351231,
+SNULL, 140733508366335, 140737488351231,
+STORE, 140733508358144, 140733508366335,
+STORE, 140733508227072, 140733508366335,
+STORE, 94766263947264, 94766266171391,
+SNULL, 94766264057855, 94766266171391,
+STORE, 94766263947264, 94766264057855,
+STORE, 94766264057856, 94766266171391,
+ERASE, 94766264057856, 94766266171391,
+STORE, 94766266150912, 94766266163199,
+STORE, 94766266163200, 94766266171391,
+STORE, 140693985132544, 140693987385343,
+SNULL, 140693985275903, 140693987385343,
+STORE, 140693985132544, 140693985275903,
+STORE, 140693985275904, 140693987385343,
+ERASE, 140693985275904, 140693987385343,
+STORE, 140693987373056, 140693987381247,
+STORE, 140693987381248, 140693987385343,
+STORE, 140733509939200, 140733509943295,
+STORE, 140733509926912, 140733509939199,
+STORE, 140693987344384, 140693987373055,
+STORE, 140693987336192, 140693987344383,
+STORE, 140693981335552, 140693985132543,
+SNULL, 140693981335552, 140693982994431,
+STORE, 140693982994432, 140693985132543,
+STORE, 140693981335552, 140693982994431,
+SNULL, 140693985091583, 140693985132543,
+STORE, 140693982994432, 140693985091583,
+STORE, 140693985091584, 140693985132543,
+SNULL, 140693985091584, 140693985116159,
+STORE, 140693985116160, 140693985132543,
+STORE, 140693985091584, 140693985116159,
+ERASE, 140693985091584, 140693985116159,
+STORE, 140693985091584, 140693985116159,
+ERASE, 140693985116160, 140693985132543,
+STORE, 140693985116160, 140693985132543,
+SNULL, 140693985107967, 140693985116159,
+STORE, 140693985091584, 140693985107967,
+STORE, 140693985107968, 140693985116159,
+SNULL, 94766266159103, 94766266163199,
+STORE, 94766266150912, 94766266159103,
+STORE, 94766266159104, 94766266163199,
+SNULL, 140693987377151, 140693987381247,
+STORE, 140693987373056, 140693987377151,
+STORE, 140693987377152, 140693987381247,
+ERASE, 140693987344384, 140693987373055,
+STORE, 94766282035200, 94766282170367,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724769353728, 140737488351231,
+SNULL, 140724769361919, 140737488351231,
+STORE, 140724769353728, 140724769361919,
+STORE, 140724769222656, 140724769361919,
+STORE, 94710460526592, 94710462750719,
+SNULL, 94710460637183, 94710462750719,
+STORE, 94710460526592, 94710460637183,
+STORE, 94710460637184, 94710462750719,
+ERASE, 94710460637184, 94710462750719,
+STORE, 94710462730240, 94710462742527,
+STORE, 94710462742528, 94710462750719,
+STORE, 140469764395008, 140469766647807,
+SNULL, 140469764538367, 140469766647807,
+STORE, 140469764395008, 140469764538367,
+STORE, 140469764538368, 140469766647807,
+ERASE, 140469764538368, 140469766647807,
+STORE, 140469766635520, 140469766643711,
+STORE, 140469766643712, 140469766647807,
+STORE, 140724770877440, 140724770881535,
+STORE, 140724770865152, 140724770877439,
+STORE, 140469766606848, 140469766635519,
+STORE, 140469766598656, 140469766606847,
+STORE, 140469760598016, 140469764395007,
+SNULL, 140469760598016, 140469762256895,
+STORE, 140469762256896, 140469764395007,
+STORE, 140469760598016, 140469762256895,
+SNULL, 140469764354047, 140469764395007,
+STORE, 140469762256896, 140469764354047,
+STORE, 140469764354048, 140469764395007,
+SNULL, 140469764354048, 140469764378623,
+STORE, 140469764378624, 140469764395007,
+STORE, 140469764354048, 140469764378623,
+ERASE, 140469764354048, 140469764378623,
+STORE, 140469764354048, 140469764378623,
+ERASE, 140469764378624, 140469764395007,
+STORE, 140469764378624, 140469764395007,
+SNULL, 140469764370431, 140469764378623,
+STORE, 140469764354048, 140469764370431,
+STORE, 140469764370432, 140469764378623,
+SNULL, 94710462738431, 94710462742527,
+STORE, 94710462730240, 94710462738431,
+STORE, 94710462738432, 94710462742527,
+SNULL, 140469766639615, 140469766643711,
+STORE, 140469766635520, 140469766639615,
+STORE, 140469766639616, 140469766643711,
+ERASE, 140469766606848, 140469766635519,
+STORE, 94710485581824, 94710485716991,
+STORE, 94105755795456, 94105756008447,
+STORE, 94105758105600, 94105758109695,
+STORE, 94105758109696, 94105758117887,
+STORE, 94105758117888, 94105758130175,
+STORE, 94105788981248, 94105794871295,
+STORE, 140641190031360, 140641191690239,
+STORE, 140641191690240, 140641193787391,
+STORE, 140641193787392, 140641193803775,
+STORE, 140641193803776, 140641193811967,
+STORE, 140641193811968, 140641193828351,
+STORE, 140641193828352, 140641193840639,
+STORE, 140641193840640, 140641195933695,
+STORE, 140641195933696, 140641195937791,
+STORE, 140641195937792, 140641195941887,
+STORE, 140641195941888, 140641196085247,
+STORE, 140641196453888, 140641198137343,
+STORE, 140641198137344, 140641198153727,
+STORE, 140641198182400, 140641198186495,
+STORE, 140641198186496, 140641198190591,
+STORE, 140641198190592, 140641198194687,
+STORE, 140731980034048, 140731980173311,
+STORE, 140731981078528, 140731981090815,
+STORE, 140731981090816, 140731981094911,
+STORE, 93828086431744, 93828086644735,
+STORE, 93828088741888, 93828088745983,
+STORE, 93828088745984, 93828088754175,
+STORE, 93828088754176, 93828088766463,
+STORE, 93828094193664, 93828096831487,
+STORE, 139844717334528, 139844718993407,
+STORE, 139844718993408, 139844721090559,
+STORE, 139844721090560, 139844721106943,
+STORE, 139844721106944, 139844721115135,
+STORE, 139844721115136, 139844721131519,
+STORE, 139844721131520, 139844721143807,
+STORE, 139844721143808, 139844723236863,
+STORE, 139844723236864, 139844723240959,
+STORE, 139844723240960, 139844723245055,
+STORE, 139844723245056, 139844723388415,
+STORE, 139844723757056, 139844725440511,
+STORE, 139844725440512, 139844725456895,
+STORE, 139844725485568, 139844725489663,
+STORE, 139844725489664, 139844725493759,
+STORE, 139844725493760, 139844725497855,
+STORE, 140729996185600, 140729996324863,
+STORE, 140729996828672, 140729996840959,
+STORE, 140729996840960, 140729996845055,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722494771200, 140737488351231,
+SNULL, 140722494775295, 140737488351231,
+STORE, 140722494771200, 140722494775295,
+STORE, 140722494640128, 140722494775295,
+STORE, 94324011311104, 94324013535231,
+SNULL, 94324011421695, 94324013535231,
+STORE, 94324011311104, 94324011421695,
+STORE, 94324011421696, 94324013535231,
+ERASE, 94324011421696, 94324013535231,
+STORE, 94324013514752, 94324013527039,
+STORE, 94324013527040, 94324013535231,
+STORE, 140151462309888, 140151464562687,
+SNULL, 140151462453247, 140151464562687,
+STORE, 140151462309888, 140151462453247,
+STORE, 140151462453248, 140151464562687,
+ERASE, 140151462453248, 140151464562687,
+STORE, 140151464550400, 140151464558591,
+STORE, 140151464558592, 140151464562687,
+STORE, 140722495467520, 140722495471615,
+STORE, 140722495455232, 140722495467519,
+STORE, 140151464521728, 140151464550399,
+STORE, 140151464513536, 140151464521727,
+STORE, 140151458512896, 140151462309887,
+SNULL, 140151458512896, 140151460171775,
+STORE, 140151460171776, 140151462309887,
+STORE, 140151458512896, 140151460171775,
+SNULL, 140151462268927, 140151462309887,
+STORE, 140151460171776, 140151462268927,
+STORE, 140151462268928, 140151462309887,
+SNULL, 140151462268928, 140151462293503,
+STORE, 140151462293504, 140151462309887,
+STORE, 140151462268928, 140151462293503,
+ERASE, 140151462268928, 140151462293503,
+STORE, 140151462268928, 140151462293503,
+ERASE, 140151462293504, 140151462309887,
+STORE, 140151462293504, 140151462309887,
+SNULL, 140151462285311, 140151462293503,
+STORE, 140151462268928, 140151462285311,
+STORE, 140151462285312, 140151462293503,
+SNULL, 94324013522943, 94324013527039,
+STORE, 94324013514752, 94324013522943,
+STORE, 94324013522944, 94324013527039,
+SNULL, 140151464554495, 140151464558591,
+STORE, 140151464550400, 140151464554495,
+STORE, 140151464554496, 140151464558591,
+ERASE, 140151464521728, 140151464550399,
+STORE, 94324024778752, 94324024913919,
+STORE, 94899262967808, 94899263180799,
+STORE, 94899265277952, 94899265282047,
+STORE, 94899265282048, 94899265290239,
+STORE, 94899265290240, 94899265302527,
+STORE, 94899295469568, 94899298689023,
+STORE, 140434388418560, 140434390077439,
+STORE, 140434390077440, 140434392174591,
+STORE, 140434392174592, 140434392190975,
+STORE, 140434392190976, 140434392199167,
+STORE, 140434392199168, 140434392215551,
+STORE, 140434392215552, 140434392227839,
+STORE, 140434392227840, 140434394320895,
+STORE, 140434394320896, 140434394324991,
+STORE, 140434394324992, 140434394329087,
+STORE, 140434394329088, 140434394472447,
+STORE, 140434394841088, 140434396524543,
+STORE, 140434396524544, 140434396540927,
+STORE, 140434396569600, 140434396573695,
+STORE, 140434396573696, 140434396577791,
+STORE, 140434396577792, 140434396581887,
+STORE, 140720618135552, 140720618274815,
+STORE, 140720618418176, 140720618430463,
+STORE, 140720618430464, 140720618434559,
+STORE, 94425529798656, 94425530011647,
+STORE, 94425532108800, 94425532112895,
+STORE, 94425532112896, 94425532121087,
+STORE, 94425532121088, 94425532133375,
+STORE, 94425557753856, 94425566576639,
+STORE, 140600528470016, 140600530128895,
+STORE, 140600530128896, 140600532226047,
+STORE, 140600532226048, 140600532242431,
+STORE, 140600532242432, 140600532250623,
+STORE, 140600532250624, 140600532267007,
+STORE, 140600532267008, 140600532279295,
+STORE, 140600532279296, 140600534372351,
+STORE, 140600534372352, 140600534376447,
+STORE, 140600534376448, 140600534380543,
+STORE, 140600534380544, 140600534523903,
+STORE, 140600534892544, 140600536575999,
+STORE, 140600536576000, 140600536592383,
+STORE, 140600536621056, 140600536625151,
+STORE, 140600536625152, 140600536629247,
+STORE, 140600536629248, 140600536633343,
+STORE, 140721857785856, 140721857925119,
+STORE, 140721858068480, 140721858080767,
+STORE, 140721858080768, 140721858084863,
+STORE, 94425529798656, 94425530011647,
+STORE, 94425532108800, 94425532112895,
+STORE, 94425532112896, 94425532121087,
+STORE, 94425532121088, 94425532133375,
+STORE, 94425557753856, 94425568772095,
+STORE, 140600528470016, 140600530128895,
+STORE, 140600530128896, 140600532226047,
+STORE, 140600532226048, 140600532242431,
+STORE, 140600532242432, 140600532250623,
+STORE, 140600532250624, 140600532267007,
+STORE, 140600532267008, 140600532279295,
+STORE, 140600532279296, 140600534372351,
+STORE, 140600534372352, 140600534376447,
+STORE, 140600534376448, 140600534380543,
+STORE, 140600534380544, 140600534523903,
+STORE, 140600534892544, 140600536575999,
+STORE, 140600536576000, 140600536592383,
+STORE, 140600536621056, 140600536625151,
+STORE, 140600536625152, 140600536629247,
+STORE, 140600536629248, 140600536633343,
+STORE, 140721857785856, 140721857925119,
+STORE, 140721858068480, 140721858080767,
+STORE, 140721858080768, 140721858084863,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735611645952, 140737488351231,
+SNULL, 140735611654143, 140737488351231,
+STORE, 140735611645952, 140735611654143,
+STORE, 140735611514880, 140735611654143,
+STORE, 94592137641984, 94592139866111,
+SNULL, 94592137752575, 94592139866111,
+STORE, 94592137641984, 94592137752575,
+STORE, 94592137752576, 94592139866111,
+ERASE, 94592137752576, 94592139866111,
+STORE, 94592139845632, 94592139857919,
+STORE, 94592139857920, 94592139866111,
+STORE, 140350425030656, 140350427283455,
+SNULL, 140350425174015, 140350427283455,
+STORE, 140350425030656, 140350425174015,
+STORE, 140350425174016, 140350427283455,
+ERASE, 140350425174016, 140350427283455,
+STORE, 140350427271168, 140350427279359,
+STORE, 140350427279360, 140350427283455,
+STORE, 140735612043264, 140735612047359,
+STORE, 140735612030976, 140735612043263,
+STORE, 140350427242496, 140350427271167,
+STORE, 140350427234304, 140350427242495,
+STORE, 140350421233664, 140350425030655,
+SNULL, 140350421233664, 140350422892543,
+STORE, 140350422892544, 140350425030655,
+STORE, 140350421233664, 140350422892543,
+SNULL, 140350424989695, 140350425030655,
+STORE, 140350422892544, 140350424989695,
+STORE, 140350424989696, 140350425030655,
+SNULL, 140350424989696, 140350425014271,
+STORE, 140350425014272, 140350425030655,
+STORE, 140350424989696, 140350425014271,
+ERASE, 140350424989696, 140350425014271,
+STORE, 140350424989696, 140350425014271,
+ERASE, 140350425014272, 140350425030655,
+STORE, 140350425014272, 140350425030655,
+SNULL, 140350425006079, 140350425014271,
+STORE, 140350424989696, 140350425006079,
+STORE, 140350425006080, 140350425014271,
+SNULL, 94592139853823, 94592139857919,
+STORE, 94592139845632, 94592139853823,
+STORE, 94592139853824, 94592139857919,
+SNULL, 140350427275263, 140350427279359,
+STORE, 140350427271168, 140350427275263,
+STORE, 140350427275264, 140350427279359,
+ERASE, 140350427242496, 140350427271167,
+STORE, 94592164823040, 94592164958207,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723500535808, 140737488351231,
+SNULL, 140723500543999, 140737488351231,
+STORE, 140723500535808, 140723500543999,
+STORE, 140723500404736, 140723500543999,
+STORE, 94458379010048, 94458381234175,
+SNULL, 94458379120639, 94458381234175,
+STORE, 94458379010048, 94458379120639,
+STORE, 94458379120640, 94458381234175,
+ERASE, 94458379120640, 94458381234175,
+STORE, 94458381213696, 94458381225983,
+STORE, 94458381225984, 94458381234175,
+STORE, 139771674230784, 139771676483583,
+SNULL, 139771674374143, 139771676483583,
+STORE, 139771674230784, 139771674374143,
+STORE, 139771674374144, 139771676483583,
+ERASE, 139771674374144, 139771676483583,
+STORE, 139771676471296, 139771676479487,
+STORE, 139771676479488, 139771676483583,
+STORE, 140723500769280, 140723500773375,
+STORE, 140723500756992, 140723500769279,
+STORE, 139771676442624, 139771676471295,
+STORE, 139771676434432, 139771676442623,
+STORE, 139771670433792, 139771674230783,
+SNULL, 139771670433792, 139771672092671,
+STORE, 139771672092672, 139771674230783,
+STORE, 139771670433792, 139771672092671,
+SNULL, 139771674189823, 139771674230783,
+STORE, 139771672092672, 139771674189823,
+STORE, 139771674189824, 139771674230783,
+SNULL, 139771674189824, 139771674214399,
+STORE, 139771674214400, 139771674230783,
+STORE, 139771674189824, 139771674214399,
+ERASE, 139771674189824, 139771674214399,
+STORE, 139771674189824, 139771674214399,
+ERASE, 139771674214400, 139771674230783,
+STORE, 139771674214400, 139771674230783,
+SNULL, 139771674206207, 139771674214399,
+STORE, 139771674189824, 139771674206207,
+STORE, 139771674206208, 139771674214399,
+SNULL, 94458381221887, 94458381225983,
+STORE, 94458381213696, 94458381221887,
+STORE, 94458381221888, 94458381225983,
+SNULL, 139771676475391, 139771676479487,
+STORE, 139771676471296, 139771676475391,
+STORE, 139771676475392, 139771676479487,
+ERASE, 139771676442624, 139771676471295,
+STORE, 94458401873920, 94458402009087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731316264960, 140737488351231,
+SNULL, 140731316273151, 140737488351231,
+STORE, 140731316264960, 140731316273151,
+STORE, 140731316133888, 140731316273151,
+STORE, 94437830881280, 94437833215999,
+SNULL, 94437831094271, 94437833215999,
+STORE, 94437830881280, 94437831094271,
+STORE, 94437831094272, 94437833215999,
+ERASE, 94437831094272, 94437833215999,
+STORE, 94437833191424, 94437833203711,
+STORE, 94437833203712, 94437833215999,
+STORE, 140265986031616, 140265988284415,
+SNULL, 140265986174975, 140265988284415,
+STORE, 140265986031616, 140265986174975,
+STORE, 140265986174976, 140265988284415,
+ERASE, 140265986174976, 140265988284415,
+STORE, 140265988272128, 140265988280319,
+STORE, 140265988280320, 140265988284415,
+STORE, 140731316318208, 140731316322303,
+STORE, 140731316305920, 140731316318207,
+STORE, 140265988243456, 140265988272127,
+STORE, 140265988235264, 140265988243455,
+STORE, 140265983918080, 140265986031615,
+SNULL, 140265983918080, 140265983930367,
+STORE, 140265983930368, 140265986031615,
+STORE, 140265983918080, 140265983930367,
+SNULL, 140265986023423, 140265986031615,
+STORE, 140265983930368, 140265986023423,
+STORE, 140265986023424, 140265986031615,
+ERASE, 140265986023424, 140265986031615,
+STORE, 140265986023424, 140265986031615,
+STORE, 140265980121088, 140265983918079,
+SNULL, 140265980121088, 140265981779967,
+STORE, 140265981779968, 140265983918079,
+STORE, 140265980121088, 140265981779967,
+SNULL, 140265983877119, 140265983918079,
+STORE, 140265981779968, 140265983877119,
+STORE, 140265983877120, 140265983918079,
+SNULL, 140265983877120, 140265983901695,
+STORE, 140265983901696, 140265983918079,
+STORE, 140265983877120, 140265983901695,
+ERASE, 140265983877120, 140265983901695,
+STORE, 140265983877120, 140265983901695,
+ERASE, 140265983901696, 140265983918079,
+STORE, 140265983901696, 140265983918079,
+STORE, 140265988227072, 140265988243455,
+SNULL, 140265983893503, 140265983901695,
+STORE, 140265983877120, 140265983893503,
+STORE, 140265983893504, 140265983901695,
+SNULL, 140265986027519, 140265986031615,
+STORE, 140265986023424, 140265986027519,
+STORE, 140265986027520, 140265986031615,
+SNULL, 94437833195519, 94437833203711,
+STORE, 94437833191424, 94437833195519,
+STORE, 94437833195520, 94437833203711,
+SNULL, 140265988276223, 140265988280319,
+STORE, 140265988272128, 140265988276223,
+STORE, 140265988276224, 140265988280319,
+ERASE, 140265988243456, 140265988272127,
+STORE, 94437847638016, 94437847773183,
+STORE, 140265986543616, 140265988227071,
+STORE, 94437847638016, 94437847908351,
+STORE, 94437847638016, 94437848043519,
+STORE, 94437847638016, 94437848190975,
+SNULL, 94437848178687, 94437848190975,
+STORE, 94437847638016, 94437848178687,
+STORE, 94437848178688, 94437848190975,
+ERASE, 94437848178688, 94437848190975,
+STORE, 94437847638016, 94437848330239,
+STORE, 94437847638016, 94437848465407,
+SNULL, 94437848444927, 94437848465407,
+STORE, 94437847638016, 94437848444927,
+STORE, 94437848444928, 94437848465407,
+ERASE, 94437848444928, 94437848465407,
+STORE, 94437847638016, 94437848584191,
+STORE, 94437847638016, 94437848719359,
+SNULL, 94437848678399, 94437848719359,
+STORE, 94437847638016, 94437848678399,
+STORE, 94437848678400, 94437848719359,
+ERASE, 94437848678400, 94437848719359,
+STORE, 94437847638016, 94437848842239,
+SNULL, 94437848825855, 94437848842239,
+STORE, 94437847638016, 94437848825855,
+STORE, 94437848825856, 94437848842239,
+ERASE, 94437848825856, 94437848842239,
+STORE, 94437847638016, 94437848961023,
+STORE, 94437847638016, 94437849096191,
+STORE, 94661814710272, 94661814923263,
+STORE, 94661817020416, 94661817024511,
+STORE, 94661817024512, 94661817032703,
+STORE, 94661817032704, 94661817044991,
+STORE, 94661840424960, 94661841240063,
+STORE, 140582259814400, 140582261473279,
+STORE, 140582261473280, 140582263570431,
+STORE, 140582263570432, 140582263586815,
+STORE, 140582263586816, 140582263595007,
+STORE, 140582263595008, 140582263611391,
+STORE, 140582263611392, 140582263623679,
+STORE, 140582263623680, 140582265716735,
+STORE, 140582265716736, 140582265720831,
+STORE, 140582265720832, 140582265724927,
+STORE, 140582265724928, 140582265868287,
+STORE, 140582266236928, 140582267920383,
+STORE, 140582267920384, 140582267936767,
+STORE, 140582267965440, 140582267969535,
+STORE, 140582267969536, 140582267973631,
+STORE, 140582267973632, 140582267977727,
+STORE, 140735472508928, 140735472648191,
+STORE, 140735472672768, 140735472685055,
+STORE, 140735472685056, 140735472689151,
+STORE, 94440069140480, 94440069353471,
+STORE, 94440071450624, 94440071454719,
+STORE, 94440071454720, 94440071462911,
+STORE, 94440071462912, 94440071475199,
+STORE, 94440072122368, 94440079048703,
+STORE, 140112218095616, 140112219754495,
+STORE, 140112219754496, 140112221851647,
+STORE, 140112221851648, 140112221868031,
+STORE, 140112221868032, 140112221876223,
+STORE, 140112221876224, 140112221892607,
+STORE, 140112221892608, 140112221904895,
+STORE, 140112221904896, 140112223997951,
+STORE, 140112223997952, 140112224002047,
+STORE, 140112224002048, 140112224006143,
+STORE, 140112224006144, 140112224149503,
+STORE, 140112224518144, 140112226201599,
+STORE, 140112226201600, 140112226217983,
+STORE, 140112226246656, 140112226250751,
+STORE, 140112226250752, 140112226254847,
+STORE, 140112226254848, 140112226258943,
+STORE, 140737460969472, 140737461108735,
+STORE, 140737462083584, 140737462095871,
+STORE, 140737462095872, 140737462099967,
+STORE, 94257654345728, 94257654390783,
+STORE, 94257656483840, 94257656487935,
+STORE, 94257656487936, 94257656492031,
+STORE, 94257656492032, 94257656496127,
+STORE, 94257665859584, 94257665994751,
+STORE, 140507070345216, 140507070386175,
+STORE, 140507070386176, 140507072483327,
+STORE, 140507072483328, 140507072487423,
+STORE, 140507072487424, 140507072491519,
+STORE, 140507072491520, 140507072516095,
+STORE, 140507072516096, 140507072561151,
+STORE, 140507072561152, 140507074654207,
+STORE, 140507074654208, 140507074658303,
+STORE, 140507074658304, 140507074662399,
+STORE, 140507074662400, 140507074744319,
+STORE, 140507074744320, 140507076841471,
+STORE, 140507076841472, 140507076845567,
+STORE, 140507076845568, 140507076849663,
+STORE, 140507076849664, 140507076857855,
+STORE, 140507076857856, 140507076886527,
+STORE, 140507076886528, 140507078979583,
+STORE, 140507078979584, 140507078983679,
+STORE, 140507078983680, 140507078987775,
+STORE, 140507078987776, 140507079086079,
+STORE, 140507079086080, 140507081179135,
+STORE, 140507081179136, 140507081183231,
+STORE, 140507081183232, 140507081187327,
+STORE, 140507081187328, 140507081203711,
+STORE, 140507081203712, 140507081220095,
+STORE, 140507081220096, 140507083317247,
+STORE, 140507083317248, 140507083321343,
+STORE, 140507083321344, 140507083325439,
+STORE, 140507083325440, 140507083792383,
+STORE, 140507083792384, 140507085885439,
+STORE, 140507085885440, 140507085889535,
+STORE, 140507085889536, 140507085893631,
+STORE, 140507085893632, 140507085905919,
+STORE, 140507085905920, 140507087998975,
+STORE, 140507087998976, 140507088003071,
+STORE, 140507088003072, 140507088007167,
+STORE, 140507088007168, 140507088125951,
+STORE, 140507088125952, 140507090219007,
+STORE, 140507090219008, 140507090223103,
+STORE, 140507090223104, 140507090227199,
+STORE, 140507090227200, 140507090268159,
+STORE, 140507090268160, 140507091927039,
+STORE, 140507091927040, 140507094024191,
+STORE, 140507094024192, 140507094040575,
+STORE, 140507094040576, 140507094048767,
+STORE, 140507094048768, 140507094065151,
+STORE, 140507094065152, 140507094216703,
+STORE, 140507094216704, 140507096309759,
+STORE, 140507096309760, 140507096313855,
+STORE, 140507096313856, 140507096317951,
+STORE, 140507096317952, 140507096326143,
+STORE, 140507096326144, 140507096379391,
+STORE, 140507096379392, 140507098472447,
+STORE, 140507098472448, 140507098476543,
+STORE, 140507098476544, 140507098480639,
+STORE, 140507098480640, 140507098623999,
+STORE, 140507098980352, 140507100663807,
+STORE, 140507100663808, 140507100692479,
+STORE, 140507100721152, 140507100725247,
+STORE, 140507100725248, 140507100729343,
+STORE, 140507100729344, 140507100733439,
+STORE, 140728152780800, 140728152915967,
+STORE, 140728153698304, 140728153710591,
+STORE, 140728153710592, 140728153714687,
+STORE, 140507068137472, 140507070345215,
+SNULL, 140507068137472, 140507068190719,
+STORE, 140507068190720, 140507070345215,
+STORE, 140507068137472, 140507068190719,
+SNULL, 140507070287871, 140507070345215,
+STORE, 140507068190720, 140507070287871,
+STORE, 140507070287872, 140507070345215,
+SNULL, 140507070287872, 140507070296063,
+STORE, 140507070296064, 140507070345215,
+STORE, 140507070287872, 140507070296063,
+ERASE, 140507070287872, 140507070296063,
+STORE, 140507070287872, 140507070296063,
+ERASE, 140507070296064, 140507070345215,
+STORE, 140507070296064, 140507070345215,
+STORE, 140507100692480, 140507100721151,
+STORE, 140507065810944, 140507068137471,
+SNULL, 140507065810944, 140507065843711,
+STORE, 140507065843712, 140507068137471,
+STORE, 140507065810944, 140507065843711,
+SNULL, 140507067940863, 140507068137471,
+STORE, 140507065843712, 140507067940863,
+STORE, 140507067940864, 140507068137471,
+SNULL, 140507067940864, 140507067949055,
+STORE, 140507067949056, 140507068137471,
+STORE, 140507067940864, 140507067949055,
+ERASE, 140507067940864, 140507067949055,
+STORE, 140507067940864, 140507067949055,
+ERASE, 140507067949056, 140507068137471,
+STORE, 140507067949056, 140507068137471,
+SNULL, 140507067944959, 140507067949055,
+STORE, 140507067940864, 140507067944959,
+STORE, 140507067944960, 140507067949055,
+SNULL, 140507070291967, 140507070296063,
+STORE, 140507070287872, 140507070291967,
+STORE, 140507070291968, 140507070296063,
+ERASE, 140507100692480, 140507100721151,
+STORE, 140507063705600, 140507065810943,
+SNULL, 140507063705600, 140507063709695,
+STORE, 140507063709696, 140507065810943,
+STORE, 140507063705600, 140507063709695,
+SNULL, 140507065802751, 140507065810943,
+STORE, 140507063709696, 140507065802751,
+STORE, 140507065802752, 140507065810943,
+ERASE, 140507065802752, 140507065810943,
+STORE, 140507065802752, 140507065810943,
+SNULL, 140507065806847, 140507065810943,
+STORE, 140507065802752, 140507065806847,
+STORE, 140507065806848, 140507065810943,
+STORE, 140507061600256, 140507063705599,
+SNULL, 140507061600256, 140507061604351,
+STORE, 140507061604352, 140507063705599,
+STORE, 140507061600256, 140507061604351,
+SNULL, 140507063697407, 140507063705599,
+STORE, 140507061604352, 140507063697407,
+STORE, 140507063697408, 140507063705599,
+ERASE, 140507063697408, 140507063705599,
+STORE, 140507063697408, 140507063705599,
+SNULL, 140507063701503, 140507063705599,
+STORE, 140507063697408, 140507063701503,
+STORE, 140507063701504, 140507063705599,
+STORE, 140507059490816, 140507061600255,
+SNULL, 140507059490816, 140507059499007,
+STORE, 140507059499008, 140507061600255,
+STORE, 140507059490816, 140507059499007,
+SNULL, 140507061592063, 140507061600255,
+STORE, 140507059499008, 140507061592063,
+STORE, 140507061592064, 140507061600255,
+ERASE, 140507061592064, 140507061600255,
+STORE, 140507061592064, 140507061600255,
+SNULL, 140507061596159, 140507061600255,
+STORE, 140507061592064, 140507061596159,
+STORE, 140507061596160, 140507061600255,
+STORE, 140507057377280, 140507059490815,
+SNULL, 140507057377280, 140507057389567,
+STORE, 140507057389568, 140507059490815,
+STORE, 140507057377280, 140507057389567,
+SNULL, 140507059482623, 140507059490815,
+STORE, 140507057389568, 140507059482623,
+STORE, 140507059482624, 140507059490815,
+ERASE, 140507059482624, 140507059490815,
+STORE, 140507059482624, 140507059490815,
+SNULL, 140507059486719, 140507059490815,
+STORE, 140507059482624, 140507059486719,
+STORE, 140507059486720, 140507059490815,
+STORE, 140507055255552, 140507057377279,
+SNULL, 140507055255552, 140507055276031,
+STORE, 140507055276032, 140507057377279,
+STORE, 140507055255552, 140507055276031,
+SNULL, 140507057369087, 140507057377279,
+STORE, 140507055276032, 140507057369087,
+STORE, 140507057369088, 140507057377279,
+ERASE, 140507057369088, 140507057377279,
+STORE, 140507057369088, 140507057377279,
+SNULL, 140507057373183, 140507057377279,
+STORE, 140507057369088, 140507057373183,
+STORE, 140507057373184, 140507057377279,
+STORE, 140507098693632, 140507098980351,
+SNULL, 140507098959871, 140507098980351,
+STORE, 140507098693632, 140507098959871,
+STORE, 140507098959872, 140507098980351,
+SNULL, 140507098959872, 140507098976255,
+STORE, 140507098976256, 140507098980351,
+STORE, 140507098959872, 140507098976255,
+ERASE, 140507098959872, 140507098976255,
+STORE, 140507098959872, 140507098976255,
+ERASE, 140507098976256, 140507098980351,
+STORE, 140507098976256, 140507098980351,
+STORE, 140507100692480, 140507100721151,
+STORE, 140507053125632, 140507055255551,
+SNULL, 140507053125632, 140507053154303,
+STORE, 140507053154304, 140507055255551,
+STORE, 140507053125632, 140507053154303,
+SNULL, 140507055247359, 140507055255551,
+STORE, 140507053154304, 140507055247359,
+STORE, 140507055247360, 140507055255551,
+ERASE, 140507055247360, 140507055255551,
+STORE, 140507055247360, 140507055255551,
+STORE, 140507051012096, 140507053125631,
+SNULL, 140507051012096, 140507051024383,
+STORE, 140507051024384, 140507053125631,
+STORE, 140507051012096, 140507051024383,
+SNULL, 140507053117439, 140507053125631,
+STORE, 140507051024384, 140507053117439,
+STORE, 140507053117440, 140507053125631,
+ERASE, 140507053117440, 140507053125631,
+STORE, 140507053117440, 140507053125631,
+SNULL, 140507053121535, 140507053125631,
+STORE, 140507053117440, 140507053121535,
+STORE, 140507053121536, 140507053125631,
+SNULL, 140507055251455, 140507055255551,
+STORE, 140507055247360, 140507055251455,
+STORE, 140507055251456, 140507055255551,
+SNULL, 140507098972159, 140507098976255,
+STORE, 140507098959872, 140507098972159,
+STORE, 140507098972160, 140507098976255,
+ERASE, 140507100692480, 140507100721151,
+STORE, 140507100717056, 140507100721151,
+ERASE, 140507100717056, 140507100721151,
+STORE, 140507100717056, 140507100721151,
+ERASE, 140507100717056, 140507100721151,
+STORE, 140507100717056, 140507100721151,
+ERASE, 140507100717056, 140507100721151,
+STORE, 140507100717056, 140507100721151,
+ERASE, 140507100717056, 140507100721151,
+STORE, 140507100692480, 140507100721151,
+ERASE, 140507068137472, 140507068190719,
+ERASE, 140507068190720, 140507070287871,
+ERASE, 140507070287872, 140507070291967,
+ERASE, 140507070291968, 140507070296063,
+ERASE, 140507070296064, 140507070345215,
+ERASE, 140507065810944, 140507065843711,
+ERASE, 140507065843712, 140507067940863,
+ERASE, 140507067940864, 140507067944959,
+ERASE, 140507067944960, 140507067949055,
+ERASE, 140507067949056, 140507068137471,
+ERASE, 140507063705600, 140507063709695,
+ERASE, 140507063709696, 140507065802751,
+ERASE, 140507065802752, 140507065806847,
+ERASE, 140507065806848, 140507065810943,
+ERASE, 140507061600256, 140507061604351,
+ERASE, 140507061604352, 140507063697407,
+ERASE, 140507063697408, 140507063701503,
+ERASE, 140507063701504, 140507063705599,
+ERASE, 140507059490816, 140507059499007,
+ERASE, 140507059499008, 140507061592063,
+ERASE, 140507061592064, 140507061596159,
+ERASE, 140507061596160, 140507061600255,
+ERASE, 140507057377280, 140507057389567,
+ERASE, 140507057389568, 140507059482623,
+ERASE, 140507059482624, 140507059486719,
+ERASE, 140507059486720, 140507059490815,
+ERASE, 140507055255552, 140507055276031,
+ERASE, 140507055276032, 140507057369087,
+ERASE, 140507057369088, 140507057373183,
+ERASE, 140507057373184, 140507057377279,
+ERASE, 140507098693632, 140507098959871,
+ERASE, 140507098959872, 140507098972159,
+ERASE, 140507098972160, 140507098976255,
+ERASE, 140507098976256, 140507098980351,
+ERASE, 140507051012096, 140507051024383,
+ERASE, 140507051024384, 140507053117439,
+ERASE, 140507053117440, 140507053121535,
+ERASE, 140507053121536, 140507053125631,
+STORE, 94036448296960, 94036448509951,
+STORE, 94036450607104, 94036450611199,
+STORE, 94036450611200, 94036450619391,
+STORE, 94036450619392, 94036450631679,
+STORE, 94036482445312, 94036502376447,
+STORE, 140469487013888, 140469488672767,
+STORE, 140469488672768, 140469490769919,
+STORE, 140469490769920, 140469490786303,
+STORE, 140469490786304, 140469490794495,
+STORE, 140469490794496, 140469490810879,
+STORE, 140469490810880, 140469490823167,
+STORE, 140469490823168, 140469492916223,
+STORE, 140469492916224, 140469492920319,
+STORE, 140469492920320, 140469492924415,
+STORE, 140469492924416, 140469493067775,
+STORE, 140469493436416, 140469495119871,
+STORE, 140469495119872, 140469495136255,
+STORE, 140469495164928, 140469495169023,
+STORE, 140469495169024, 140469495173119,
+STORE, 140469495173120, 140469495177215,
+STORE, 140732281446400, 140732281585663,
+STORE, 140732282736640, 140732282748927,
+STORE, 140732282748928, 140732282753023,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723411931136, 140737488351231,
+SNULL, 140723411939327, 140737488351231,
+STORE, 140723411931136, 140723411939327,
+STORE, 140723411800064, 140723411939327,
+STORE, 93993768685568, 93993770909695,
+SNULL, 93993768796159, 93993770909695,
+STORE, 93993768685568, 93993768796159,
+STORE, 93993768796160, 93993770909695,
+ERASE, 93993768796160, 93993770909695,
+STORE, 93993770889216, 93993770901503,
+STORE, 93993770901504, 93993770909695,
+STORE, 140508681740288, 140508683993087,
+SNULL, 140508681883647, 140508683993087,
+STORE, 140508681740288, 140508681883647,
+STORE, 140508681883648, 140508683993087,
+ERASE, 140508681883648, 140508683993087,
+STORE, 140508683980800, 140508683988991,
+STORE, 140508683988992, 140508683993087,
+STORE, 140723412070400, 140723412074495,
+STORE, 140723412058112, 140723412070399,
+STORE, 140508683952128, 140508683980799,
+STORE, 140508683943936, 140508683952127,
+STORE, 140508677943296, 140508681740287,
+SNULL, 140508677943296, 140508679602175,
+STORE, 140508679602176, 140508681740287,
+STORE, 140508677943296, 140508679602175,
+SNULL, 140508681699327, 140508681740287,
+STORE, 140508679602176, 140508681699327,
+STORE, 140508681699328, 140508681740287,
+SNULL, 140508681699328, 140508681723903,
+STORE, 140508681723904, 140508681740287,
+STORE, 140508681699328, 140508681723903,
+ERASE, 140508681699328, 140508681723903,
+STORE, 140508681699328, 140508681723903,
+ERASE, 140508681723904, 140508681740287,
+STORE, 140508681723904, 140508681740287,
+SNULL, 140508681715711, 140508681723903,
+STORE, 140508681699328, 140508681715711,
+STORE, 140508681715712, 140508681723903,
+SNULL, 93993770897407, 93993770901503,
+STORE, 93993770889216, 93993770897407,
+STORE, 93993770897408, 93993770901503,
+SNULL, 140508683984895, 140508683988991,
+STORE, 140508683980800, 140508683984895,
+STORE, 140508683984896, 140508683988991,
+ERASE, 140508683952128, 140508683980799,
+STORE, 93993791582208, 93993791717375,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140734685458432, 140737488351231,
+SNULL, 140734685466623, 140737488351231,
+STORE, 140734685458432, 140734685466623,
+STORE, 140734685327360, 140734685466623,
+STORE, 93832321548288, 93832323772415,
+SNULL, 93832321658879, 93832323772415,
+STORE, 93832321548288, 93832321658879,
+STORE, 93832321658880, 93832323772415,
+ERASE, 93832321658880, 93832323772415,
+STORE, 93832323751936, 93832323764223,
+STORE, 93832323764224, 93832323772415,
+STORE, 140650945118208, 140650947371007,
+SNULL, 140650945261567, 140650947371007,
+STORE, 140650945118208, 140650945261567,
+STORE, 140650945261568, 140650947371007,
+ERASE, 140650945261568, 140650947371007,
+STORE, 140650947358720, 140650947366911,
+STORE, 140650947366912, 140650947371007,
+STORE, 140734686081024, 140734686085119,
+STORE, 140734686068736, 140734686081023,
+STORE, 140650947330048, 140650947358719,
+STORE, 140650947321856, 140650947330047,
+STORE, 140650941321216, 140650945118207,
+SNULL, 140650941321216, 140650942980095,
+STORE, 140650942980096, 140650945118207,
+STORE, 140650941321216, 140650942980095,
+SNULL, 140650945077247, 140650945118207,
+STORE, 140650942980096, 140650945077247,
+STORE, 140650945077248, 140650945118207,
+SNULL, 140650945077248, 140650945101823,
+STORE, 140650945101824, 140650945118207,
+STORE, 140650945077248, 140650945101823,
+ERASE, 140650945077248, 140650945101823,
+STORE, 140650945077248, 140650945101823,
+ERASE, 140650945101824, 140650945118207,
+STORE, 140650945101824, 140650945118207,
+SNULL, 140650945093631, 140650945101823,
+STORE, 140650945077248, 140650945093631,
+STORE, 140650945093632, 140650945101823,
+SNULL, 93832323760127, 93832323764223,
+STORE, 93832323751936, 93832323760127,
+STORE, 93832323760128, 93832323764223,
+SNULL, 140650947362815, 140650947366911,
+STORE, 140650947358720, 140650947362815,
+STORE, 140650947362816, 140650947366911,
+ERASE, 140650947330048, 140650947358719,
+STORE, 93832331890688, 93832332025855,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728333520896, 140737488351231,
+SNULL, 140728333529087, 140737488351231,
+STORE, 140728333520896, 140728333529087,
+STORE, 140728333389824, 140728333529087,
+STORE, 94872734732288, 94872736956415,
+SNULL, 94872734842879, 94872736956415,
+STORE, 94872734732288, 94872734842879,
+STORE, 94872734842880, 94872736956415,
+ERASE, 94872734842880, 94872736956415,
+STORE, 94872736935936, 94872736948223,
+STORE, 94872736948224, 94872736956415,
+STORE, 139755193257984, 139755195510783,
+SNULL, 139755193401343, 139755195510783,
+STORE, 139755193257984, 139755193401343,
+STORE, 139755193401344, 139755195510783,
+ERASE, 139755193401344, 139755195510783,
+STORE, 139755195498496, 139755195506687,
+STORE, 139755195506688, 139755195510783,
+STORE, 140728333926400, 140728333930495,
+STORE, 140728333914112, 140728333926399,
+STORE, 139755195469824, 139755195498495,
+STORE, 139755195461632, 139755195469823,
+STORE, 139755189460992, 139755193257983,
+SNULL, 139755189460992, 139755191119871,
+STORE, 139755191119872, 139755193257983,
+STORE, 139755189460992, 139755191119871,
+SNULL, 139755193217023, 139755193257983,
+STORE, 139755191119872, 139755193217023,
+STORE, 139755193217024, 139755193257983,
+SNULL, 139755193217024, 139755193241599,
+STORE, 139755193241600, 139755193257983,
+STORE, 139755193217024, 139755193241599,
+ERASE, 139755193217024, 139755193241599,
+STORE, 139755193217024, 139755193241599,
+ERASE, 139755193241600, 139755193257983,
+STORE, 139755193241600, 139755193257983,
+SNULL, 139755193233407, 139755193241599,
+STORE, 139755193217024, 139755193233407,
+STORE, 139755193233408, 139755193241599,
+SNULL, 94872736944127, 94872736948223,
+STORE, 94872736935936, 94872736944127,
+STORE, 94872736944128, 94872736948223,
+SNULL, 139755195502591, 139755195506687,
+STORE, 139755195498496, 139755195502591,
+STORE, 139755195502592, 139755195506687,
+ERASE, 139755195469824, 139755195498495,
+STORE, 94872749744128, 94872749879295,
+STORE, 94720243642368, 94720243855359,
+STORE, 94720245952512, 94720245956607,
+STORE, 94720245956608, 94720245964799,
+STORE, 94720245964800, 94720245977087,
+STORE, 94720277745664, 94720278151167,
+STORE, 140453174497280, 140453176156159,
+STORE, 140453176156160, 140453178253311,
+STORE, 140453178253312, 140453178269695,
+STORE, 140453178269696, 140453178277887,
+STORE, 140453178277888, 140453178294271,
+STORE, 140453178294272, 140453178306559,
+STORE, 140453178306560, 140453180399615,
+STORE, 140453180399616, 140453180403711,
+STORE, 140453180403712, 140453180407807,
+STORE, 140453180407808, 140453180551167,
+STORE, 140453180919808, 140453182603263,
+STORE, 140453182603264, 140453182619647,
+STORE, 140453182648320, 140453182652415,
+STORE, 140453182652416, 140453182656511,
+STORE, 140453182656512, 140453182660607,
+STORE, 140733223923712, 140733224062975,
+STORE, 140733224808448, 140733224820735,
+STORE, 140733224820736, 140733224824831,
+STORE, 94321091141632, 94321091354623,
+STORE, 94321093451776, 94321093455871,
+STORE, 94321093455872, 94321093464063,
+STORE, 94321093464064, 94321093476351,
+STORE, 94321115873280, 94321117229055,
+STORE, 139695978840064, 139695980498943,
+STORE, 139695980498944, 139695982596095,
+STORE, 139695982596096, 139695982612479,
+STORE, 139695982612480, 139695982620671,
+STORE, 139695982620672, 139695982637055,
+STORE, 139695982637056, 139695982649343,
+STORE, 139695982649344, 139695984742399,
+STORE, 139695984742400, 139695984746495,
+STORE, 139695984746496, 139695984750591,
+STORE, 139695984750592, 139695984893951,
+STORE, 139695985262592, 139695986946047,
+STORE, 139695986946048, 139695986962431,
+STORE, 139695986991104, 139695986995199,
+STORE, 139695986995200, 139695986999295,
+STORE, 139695986999296, 139695987003391,
+STORE, 140734650564608, 140734650703871,
+STORE, 140734650785792, 140734650798079,
+STORE, 140734650798080, 140734650802175,
+STORE, 94523438456832, 94523438669823,
+STORE, 94523440766976, 94523440771071,
+STORE, 94523440771072, 94523440779263,
+STORE, 94523440779264, 94523440791551,
+STORE, 94523464544256, 94523465842687,
+STORE, 140453231493120, 140453233151999,
+STORE, 140453233152000, 140453235249151,
+STORE, 140453235249152, 140453235265535,
+STORE, 140453235265536, 140453235273727,
+STORE, 140453235273728, 140453235290111,
+STORE, 140453235290112, 140453235302399,
+STORE, 140453235302400, 140453237395455,
+STORE, 140453237395456, 140453237399551,
+STORE, 140453237399552, 140453237403647,
+STORE, 140453237403648, 140453237547007,
+STORE, 140453237915648, 140453239599103,
+STORE, 140453239599104, 140453239615487,
+STORE, 140453239644160, 140453239648255,
+STORE, 140453239648256, 140453239652351,
+STORE, 140453239652352, 140453239656447,
+STORE, 140734679445504, 140734679584767,
+STORE, 140734680018944, 140734680031231,
+STORE, 140734680031232, 140734680035327,
+STORE, 94614776987648, 94614777200639,
+STORE, 94614779297792, 94614779301887,
+STORE, 94614779301888, 94614779310079,
+STORE, 94614779310080, 94614779322367,
+STORE, 94614798467072, 94614800699391,
+STORE, 139677037182976, 139677038841855,
+STORE, 139677038841856, 139677040939007,
+STORE, 139677040939008, 139677040955391,
+STORE, 139677040955392, 139677040963583,
+STORE, 139677040963584, 139677040979967,
+STORE, 139677040979968, 139677040992255,
+STORE, 139677040992256, 139677043085311,
+STORE, 139677043085312, 139677043089407,
+STORE, 139677043089408, 139677043093503,
+STORE, 139677043093504, 139677043236863,
+STORE, 139677043605504, 139677045288959,
+STORE, 139677045288960, 139677045305343,
+STORE, 139677045334016, 139677045338111,
+STORE, 139677045338112, 139677045342207,
+STORE, 139677045342208, 139677045346303,
+STORE, 140721604411392, 140721604550655,
+STORE, 140721606135808, 140721606148095,
+STORE, 140721606148096, 140721606152191,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140729280544768, 140737488351231,
+SNULL, 140729280552959, 140737488351231,
+STORE, 140729280544768, 140729280552959,
+STORE, 140729280413696, 140729280552959,
+STORE, 94863939334144, 94863941558271,
+SNULL, 94863939444735, 94863941558271,
+STORE, 94863939334144, 94863939444735,
+STORE, 94863939444736, 94863941558271,
+ERASE, 94863939444736, 94863941558271,
+STORE, 94863941537792, 94863941550079,
+STORE, 94863941550080, 94863941558271,
+STORE, 139691047276544, 139691049529343,
+SNULL, 139691047419903, 139691049529343,
+STORE, 139691047276544, 139691047419903,
+STORE, 139691047419904, 139691049529343,
+ERASE, 139691047419904, 139691049529343,
+STORE, 139691049517056, 139691049525247,
+STORE, 139691049525248, 139691049529343,
+STORE, 140729281679360, 140729281683455,
+STORE, 140729281667072, 140729281679359,
+STORE, 139691049488384, 139691049517055,
+STORE, 139691049480192, 139691049488383,
+STORE, 139691043479552, 139691047276543,
+SNULL, 139691043479552, 139691045138431,
+STORE, 139691045138432, 139691047276543,
+STORE, 139691043479552, 139691045138431,
+SNULL, 139691047235583, 139691047276543,
+STORE, 139691045138432, 139691047235583,
+STORE, 139691047235584, 139691047276543,
+SNULL, 139691047235584, 139691047260159,
+STORE, 139691047260160, 139691047276543,
+STORE, 139691047235584, 139691047260159,
+ERASE, 139691047235584, 139691047260159,
+STORE, 139691047235584, 139691047260159,
+ERASE, 139691047260160, 139691047276543,
+STORE, 139691047260160, 139691047276543,
+SNULL, 139691047251967, 139691047260159,
+STORE, 139691047235584, 139691047251967,
+STORE, 139691047251968, 139691047260159,
+SNULL, 94863941545983, 94863941550079,
+STORE, 94863941537792, 94863941545983,
+STORE, 94863941545984, 94863941550079,
+SNULL, 139691049521151, 139691049525247,
+STORE, 139691049517056, 139691049521151,
+STORE, 139691049521152, 139691049525247,
+ERASE, 139691049488384, 139691049517055,
+STORE, 94863951294464, 94863951429631,
+STORE, 93998209294336, 93998209507327,
+STORE, 93998211604480, 93998211608575,
+STORE, 93998211608576, 93998211616767,
+STORE, 93998211616768, 93998211629055,
+STORE, 93998227210240, 93998227615743,
+STORE, 140243029913600, 140243031572479,
+STORE, 140243031572480, 140243033669631,
+STORE, 140243033669632, 140243033686015,
+STORE, 140243033686016, 140243033694207,
+STORE, 140243033694208, 140243033710591,
+STORE, 140243033710592, 140243033722879,
+STORE, 140243033722880, 140243035815935,
+STORE, 140243035815936, 140243035820031,
+STORE, 140243035820032, 140243035824127,
+STORE, 140243035824128, 140243035967487,
+STORE, 140243036336128, 140243038019583,
+STORE, 140243038019584, 140243038035967,
+STORE, 140243038064640, 140243038068735,
+STORE, 140243038068736, 140243038072831,
+STORE, 140243038072832, 140243038076927,
+STORE, 140734976479232, 140734976618495,
+STORE, 140734977978368, 140734977990655,
+STORE, 140734977990656, 140734977994751,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722742775808, 140737488351231,
+SNULL, 140722742783999, 140737488351231,
+STORE, 140722742775808, 140722742783999,
+STORE, 140722742644736, 140722742783999,
+STORE, 93857673662464, 93857675997183,
+SNULL, 93857673875455, 93857675997183,
+STORE, 93857673662464, 93857673875455,
+STORE, 93857673875456, 93857675997183,
+ERASE, 93857673875456, 93857675997183,
+STORE, 93857675972608, 93857675984895,
+STORE, 93857675984896, 93857675997183,
+STORE, 140629677498368, 140629679751167,
+SNULL, 140629677641727, 140629679751167,
+STORE, 140629677498368, 140629677641727,
+STORE, 140629677641728, 140629679751167,
+ERASE, 140629677641728, 140629679751167,
+STORE, 140629679738880, 140629679747071,
+STORE, 140629679747072, 140629679751167,
+STORE, 140722743222272, 140722743226367,
+STORE, 140722743209984, 140722743222271,
+STORE, 140629679710208, 140629679738879,
+STORE, 140629679702016, 140629679710207,
+STORE, 140629675384832, 140629677498367,
+SNULL, 140629675384832, 140629675397119,
+STORE, 140629675397120, 140629677498367,
+STORE, 140629675384832, 140629675397119,
+SNULL, 140629677490175, 140629677498367,
+STORE, 140629675397120, 140629677490175,
+STORE, 140629677490176, 140629677498367,
+ERASE, 140629677490176, 140629677498367,
+STORE, 140629677490176, 140629677498367,
+STORE, 140629671587840, 140629675384831,
+SNULL, 140629671587840, 140629673246719,
+STORE, 140629673246720, 140629675384831,
+STORE, 140629671587840, 140629673246719,
+SNULL, 140629675343871, 140629675384831,
+STORE, 140629673246720, 140629675343871,
+STORE, 140629675343872, 140629675384831,
+SNULL, 140629675343872, 140629675368447,
+STORE, 140629675368448, 140629675384831,
+STORE, 140629675343872, 140629675368447,
+ERASE, 140629675343872, 140629675368447,
+STORE, 140629675343872, 140629675368447,
+ERASE, 140629675368448, 140629675384831,
+STORE, 140629675368448, 140629675384831,
+STORE, 140629679693824, 140629679710207,
+SNULL, 140629675360255, 140629675368447,
+STORE, 140629675343872, 140629675360255,
+STORE, 140629675360256, 140629675368447,
+SNULL, 140629677494271, 140629677498367,
+STORE, 140629677490176, 140629677494271,
+STORE, 140629677494272, 140629677498367,
+SNULL, 93857675976703, 93857675984895,
+STORE, 93857675972608, 93857675976703,
+STORE, 93857675976704, 93857675984895,
+SNULL, 140629679742975, 140629679747071,
+STORE, 140629679738880, 140629679742975,
+STORE, 140629679742976, 140629679747071,
+ERASE, 140629679710208, 140629679738879,
+STORE, 93857705832448, 93857705967615,
+STORE, 140629678010368, 140629679693823,
+STORE, 93857705832448, 93857706102783,
+STORE, 93857705832448, 93857706237951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735922421760, 140737488351231,
+SNULL, 140735922429951, 140737488351231,
+STORE, 140735922421760, 140735922429951,
+STORE, 140735922290688, 140735922429951,
+STORE, 94651136139264, 94651138363391,
+SNULL, 94651136249855, 94651138363391,
+STORE, 94651136139264, 94651136249855,
+STORE, 94651136249856, 94651138363391,
+ERASE, 94651136249856, 94651138363391,
+STORE, 94651138342912, 94651138355199,
+STORE, 94651138355200, 94651138363391,
+STORE, 140325788266496, 140325790519295,
+SNULL, 140325788409855, 140325790519295,
+STORE, 140325788266496, 140325788409855,
+STORE, 140325788409856, 140325790519295,
+ERASE, 140325788409856, 140325790519295,
+STORE, 140325790507008, 140325790515199,
+STORE, 140325790515200, 140325790519295,
+STORE, 140735923572736, 140735923576831,
+STORE, 140735923560448, 140735923572735,
+STORE, 140325790478336, 140325790507007,
+STORE, 140325790470144, 140325790478335,
+STORE, 140325784469504, 140325788266495,
+SNULL, 140325784469504, 140325786128383,
+STORE, 140325786128384, 140325788266495,
+STORE, 140325784469504, 140325786128383,
+SNULL, 140325788225535, 140325788266495,
+STORE, 140325786128384, 140325788225535,
+STORE, 140325788225536, 140325788266495,
+SNULL, 140325788225536, 140325788250111,
+STORE, 140325788250112, 140325788266495,
+STORE, 140325788225536, 140325788250111,
+ERASE, 140325788225536, 140325788250111,
+STORE, 140325788225536, 140325788250111,
+ERASE, 140325788250112, 140325788266495,
+STORE, 140325788250112, 140325788266495,
+SNULL, 140325788241919, 140325788250111,
+STORE, 140325788225536, 140325788241919,
+STORE, 140325788241920, 140325788250111,
+SNULL, 94651138351103, 94651138355199,
+STORE, 94651138342912, 94651138351103,
+STORE, 94651138351104, 94651138355199,
+SNULL, 140325790511103, 140325790515199,
+STORE, 140325790507008, 140325790511103,
+STORE, 140325790511104, 140325790515199,
+ERASE, 140325790478336, 140325790507007,
+STORE, 94651146297344, 94651146432511,
+STORE, 94212330168320, 94212330381311,
+STORE, 94212332478464, 94212332482559,
+STORE, 94212332482560, 94212332490751,
+STORE, 94212332490752, 94212332503039,
+STORE, 94212348891136, 94212349825023,
+STORE, 140611630604288, 140611632263167,
+STORE, 140611632263168, 140611634360319,
+STORE, 140611634360320, 140611634376703,
+STORE, 140611634376704, 140611634384895,
+STORE, 140611634384896, 140611634401279,
+STORE, 140611634401280, 140611634413567,
+STORE, 140611634413568, 140611636506623,
+STORE, 140611636506624, 140611636510719,
+STORE, 140611636510720, 140611636514815,
+STORE, 140611636514816, 140611636658175,
+STORE, 140611637026816, 140611638710271,
+STORE, 140611638710272, 140611638726655,
+STORE, 140611638755328, 140611638759423,
+STORE, 140611638759424, 140611638763519,
+STORE, 140611638763520, 140611638767615,
+STORE, 140726974533632, 140726974672895,
+STORE, 140726974943232, 140726974955519,
+STORE, 140726974955520, 140726974959615,
+STORE, 94572463521792, 94572463734783,
+STORE, 94572465831936, 94572465836031,
+STORE, 94572465836032, 94572465844223,
+STORE, 94572465844224, 94572465856511,
+STORE, 94572491534336, 94572492865535,
+STORE, 140644351492096, 140644353150975,
+STORE, 140644353150976, 140644355248127,
+STORE, 140644355248128, 140644355264511,
+STORE, 140644355264512, 140644355272703,
+STORE, 140644355272704, 140644355289087,
+STORE, 140644355289088, 140644355301375,
+STORE, 140644355301376, 140644357394431,
+STORE, 140644357394432, 140644357398527,
+STORE, 140644357398528, 140644357402623,
+STORE, 140644357402624, 140644357545983,
+STORE, 140644357914624, 140644359598079,
+STORE, 140644359598080, 140644359614463,
+STORE, 140644359643136, 140644359647231,
+STORE, 140644359647232, 140644359651327,
+STORE, 140644359651328, 140644359655423,
+STORE, 140727841824768, 140727841964031,
+STORE, 140727843188736, 140727843201023,
+STORE, 140727843201024, 140727843205119,
+STORE, 94144315457536, 94144315670527,
+STORE, 94144317767680, 94144317771775,
+STORE, 94144317771776, 94144317779967,
+STORE, 94144317779968, 94144317792255,
+STORE, 94144318369792, 94144320815103,
+STORE, 140316717645824, 140316719304703,
+STORE, 140316719304704, 140316721401855,
+STORE, 140316721401856, 140316721418239,
+STORE, 140316721418240, 140316721426431,
+STORE, 140316721426432, 140316721442815,
+STORE, 140316721442816, 140316721455103,
+STORE, 140316721455104, 140316723548159,
+STORE, 140316723548160, 140316723552255,
+STORE, 140316723552256, 140316723556351,
+STORE, 140316723556352, 140316723699711,
+STORE, 140316724068352, 140316725751807,
+STORE, 140316725751808, 140316725768191,
+STORE, 140316725796864, 140316725800959,
+STORE, 140316725800960, 140316725805055,
+STORE, 140316725805056, 140316725809151,
+STORE, 140725744283648, 140725744422911,
+STORE, 140725745852416, 140725745864703,
+STORE, 140725745864704, 140725745868799,
+STORE, 94646858846208, 94646859059199,
+STORE, 94646861156352, 94646861160447,
+STORE, 94646861160448, 94646861168639,
+STORE, 94646861168640, 94646861180927,
+STORE, 94646879805440, 94646881894399,
+STORE, 140435449745408, 140435451404287,
+STORE, 140435451404288, 140435453501439,
+STORE, 140435453501440, 140435453517823,
+STORE, 140435453517824, 140435453526015,
+STORE, 140435453526016, 140435453542399,
+STORE, 140435453542400, 140435453554687,
+STORE, 140435453554688, 140435455647743,
+STORE, 140435455647744, 140435455651839,
+STORE, 140435455651840, 140435455655935,
+STORE, 140435455655936, 140435455799295,
+STORE, 140435456167936, 140435457851391,
+STORE, 140435457851392, 140435457867775,
+STORE, 140435457896448, 140435457900543,
+STORE, 140435457900544, 140435457904639,
+STORE, 140435457904640, 140435457908735,
+STORE, 140721033818112, 140721033957375,
+STORE, 140721034018816, 140721034031103,
+STORE, 140721034031104, 140721034035199,
+STORE, 94872903438336, 94872903651327,
+STORE, 94872905748480, 94872905752575,
+STORE, 94872905752576, 94872905760767,
+STORE, 94872905760768, 94872905773055,
+STORE, 94872931246080, 94872931651583,
+STORE, 139771607810048, 139771609468927,
+STORE, 139771609468928, 139771611566079,
+STORE, 139771611566080, 139771611582463,
+STORE, 139771611582464, 139771611590655,
+STORE, 139771611590656, 139771611607039,
+STORE, 139771611607040, 139771611619327,
+STORE, 139771611619328, 139771613712383,
+STORE, 139771613712384, 139771613716479,
+STORE, 139771613716480, 139771613720575,
+STORE, 139771613720576, 139771613863935,
+STORE, 139771614232576, 139771615916031,
+STORE, 139771615916032, 139771615932415,
+STORE, 139771615961088, 139771615965183,
+STORE, 139771615965184, 139771615969279,
+STORE, 139771615969280, 139771615973375,
+STORE, 140725402931200, 140725403070463,
+STORE, 140725403852800, 140725403865087,
+STORE, 140725403865088, 140725403869183,
+STORE, 94740737736704, 94740737949695,
+STORE, 94740740046848, 94740740050943,
+STORE, 94740740050944, 94740740059135,
+STORE, 94740740059136, 94740740071423,
+STORE, 94740743249920, 94740744724479,
+STORE, 140640287010816, 140640288669695,
+STORE, 140640288669696, 140640290766847,
+STORE, 140640290766848, 140640290783231,
+STORE, 140640290783232, 140640290791423,
+STORE, 140640290791424, 140640290807807,
+STORE, 140640290807808, 140640290820095,
+STORE, 140640290820096, 140640292913151,
+STORE, 140640292913152, 140640292917247,
+STORE, 140640292917248, 140640292921343,
+STORE, 140640292921344, 140640293064703,
+STORE, 140640293433344, 140640295116799,
+STORE, 140640295116800, 140640295133183,
+STORE, 140640295161856, 140640295165951,
+STORE, 140640295165952, 140640295170047,
+STORE, 140640295170048, 140640295174143,
+STORE, 140725133303808, 140725133443071,
+STORE, 140725133684736, 140725133697023,
+STORE, 140725133697024, 140725133701119,
+STORE, 140737488347136, 140737488351231,
+STORE, 140722826371072, 140737488351231,
+SNULL, 140722826375167, 140737488351231,
+STORE, 140722826371072, 140722826375167,
+STORE, 140722826240000, 140722826375167,
+STORE, 94113818611712, 94113820835839,
+SNULL, 94113818722303, 94113820835839,
+STORE, 94113818611712, 94113818722303,
+STORE, 94113818722304, 94113820835839,
+ERASE, 94113818722304, 94113820835839,
+STORE, 94113820815360, 94113820827647,
+STORE, 94113820827648, 94113820835839,
+STORE, 139628194508800, 139628196761599,
+SNULL, 139628194652159, 139628196761599,
+STORE, 139628194508800, 139628194652159,
+STORE, 139628194652160, 139628196761599,
+ERASE, 139628194652160, 139628196761599,
+STORE, 139628196749312, 139628196757503,
+STORE, 139628196757504, 139628196761599,
+STORE, 140722826727424, 140722826731519,
+STORE, 140722826715136, 140722826727423,
+STORE, 139628196720640, 139628196749311,
+STORE, 139628196712448, 139628196720639,
+STORE, 139628190711808, 139628194508799,
+SNULL, 139628190711808, 139628192370687,
+STORE, 139628192370688, 139628194508799,
+STORE, 139628190711808, 139628192370687,
+SNULL, 139628194467839, 139628194508799,
+STORE, 139628192370688, 139628194467839,
+STORE, 139628194467840, 139628194508799,
+SNULL, 139628194467840, 139628194492415,
+STORE, 139628194492416, 139628194508799,
+STORE, 139628194467840, 139628194492415,
+ERASE, 139628194467840, 139628194492415,
+STORE, 139628194467840, 139628194492415,
+ERASE, 139628194492416, 139628194508799,
+STORE, 139628194492416, 139628194508799,
+SNULL, 139628194484223, 139628194492415,
+STORE, 139628194467840, 139628194484223,
+STORE, 139628194484224, 139628194492415,
+SNULL, 94113820823551, 94113820827647,
+STORE, 94113820815360, 94113820823551,
+STORE, 94113820823552, 94113820827647,
+SNULL, 139628196753407, 139628196757503,
+STORE, 139628196749312, 139628196753407,
+STORE, 139628196753408, 139628196757503,
+ERASE, 139628196720640, 139628196749311,
+STORE, 94113830850560, 94113830985727,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731865833472, 140737488351231,
+SNULL, 140731865841663, 140737488351231,
+STORE, 140731865833472, 140731865841663,
+STORE, 140731865702400, 140731865841663,
+STORE, 94763339386880, 94763341611007,
+SNULL, 94763339497471, 94763341611007,
+STORE, 94763339386880, 94763339497471,
+STORE, 94763339497472, 94763341611007,
+ERASE, 94763339497472, 94763341611007,
+STORE, 94763341590528, 94763341602815,
+STORE, 94763341602816, 94763341611007,
+STORE, 139778398486528, 139778400739327,
+SNULL, 139778398629887, 139778400739327,
+STORE, 139778398486528, 139778398629887,
+STORE, 139778398629888, 139778400739327,
+ERASE, 139778398629888, 139778400739327,
+STORE, 139778400727040, 139778400735231,
+STORE, 139778400735232, 139778400739327,
+STORE, 140731865858048, 140731865862143,
+STORE, 140731865845760, 140731865858047,
+STORE, 139778400698368, 139778400727039,
+STORE, 139778400690176, 139778400698367,
+STORE, 139778394689536, 139778398486527,
+SNULL, 139778394689536, 139778396348415,
+STORE, 139778396348416, 139778398486527,
+STORE, 139778394689536, 139778396348415,
+SNULL, 139778398445567, 139778398486527,
+STORE, 139778396348416, 139778398445567,
+STORE, 139778398445568, 139778398486527,
+SNULL, 139778398445568, 139778398470143,
+STORE, 139778398470144, 139778398486527,
+STORE, 139778398445568, 139778398470143,
+ERASE, 139778398445568, 139778398470143,
+STORE, 139778398445568, 139778398470143,
+ERASE, 139778398470144, 139778398486527,
+STORE, 139778398470144, 139778398486527,
+SNULL, 139778398461951, 139778398470143,
+STORE, 139778398445568, 139778398461951,
+STORE, 139778398461952, 139778398470143,
+SNULL, 94763341598719, 94763341602815,
+STORE, 94763341590528, 94763341598719,
+STORE, 94763341598720, 94763341602815,
+SNULL, 139778400731135, 139778400735231,
+STORE, 139778400727040, 139778400731135,
+STORE, 139778400731136, 139778400735231,
+ERASE, 139778400698368, 139778400727039,
+STORE, 94763362197504, 94763362332671,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140737488338944, 140737488351231,
+STORE, 140732053192704, 140737488351231,
+SNULL, 140732053204991, 140737488351231,
+STORE, 140732053192704, 140732053204991,
+STORE, 140732053061632, 140732053204991,
+STORE, 4194304, 26279935,
+STORE, 28372992, 28454911,
+STORE, 28454912, 29806591,
+STORE, 140176018599936, 140176020852735,
+SNULL, 140176018743295, 140176020852735,
+STORE, 140176018599936, 140176018743295,
+STORE, 140176018743296, 140176020852735,
+ERASE, 140176018743296, 140176020852735,
+STORE, 140176020840448, 140176020848639,
+STORE, 140176020848640, 140176020852735,
+STORE, 140732053381120, 140732053385215,
+STORE, 140732053368832, 140732053381119,
+STORE, 140176020811776, 140176020840447,
+STORE, 140176020803584, 140176020811775,
+STORE, 140176014766080, 140176018599935,
+SNULL, 140176014766080, 140176016474111,
+STORE, 140176016474112, 140176018599935,
+STORE, 140176014766080, 140176016474111,
+SNULL, 140176018567167, 140176018599935,
+STORE, 140176016474112, 140176018567167,
+STORE, 140176018567168, 140176018599935,
+ERASE, 140176018567168, 140176018599935,
+STORE, 140176018567168, 140176018599935,
+STORE, 140176012570624, 140176014766079,
+SNULL, 140176012570624, 140176012664831,
+STORE, 140176012664832, 140176014766079,
+STORE, 140176012570624, 140176012664831,
+SNULL, 140176014757887, 140176014766079,
+STORE, 140176012664832, 140176014757887,
+STORE, 140176014757888, 140176014766079,
+ERASE, 140176014757888, 140176014766079,
+STORE, 140176014757888, 140176014766079,
+STORE, 140176010051584, 140176012570623,
+SNULL, 140176010051584, 140176010465279,
+STORE, 140176010465280, 140176012570623,
+STORE, 140176010051584, 140176010465279,
+SNULL, 140176012558335, 140176012570623,
+STORE, 140176010465280, 140176012558335,
+STORE, 140176012558336, 140176012570623,
+ERASE, 140176012558336, 140176012570623,
+STORE, 140176012558336, 140176012570623,
+STORE, 140176007417856, 140176010051583,
+SNULL, 140176007417856, 140176007946239,
+STORE, 140176007946240, 140176010051583,
+STORE, 140176007417856, 140176007946239,
+SNULL, 140176010043391, 140176010051583,
+STORE, 140176007946240, 140176010043391,
+STORE, 140176010043392, 140176010051583,
+ERASE, 140176010043392, 140176010051583,
+STORE, 140176010043392, 140176010051583,
+STORE, 140176005304320, 140176007417855,
+SNULL, 140176005304320, 140176005316607,
+STORE, 140176005316608, 140176007417855,
+STORE, 140176005304320, 140176005316607,
+SNULL, 140176007409663, 140176007417855,
+STORE, 140176005316608, 140176007409663,
+STORE, 140176007409664, 140176007417855,
+ERASE, 140176007409664, 140176007417855,
+STORE, 140176007409664, 140176007417855,
+STORE, 140176003100672, 140176005304319,
+SNULL, 140176003100672, 140176003203071,
+STORE, 140176003203072, 140176005304319,
+STORE, 140176003100672, 140176003203071,
+SNULL, 140176005296127, 140176005304319,
+STORE, 140176003203072, 140176005296127,
+STORE, 140176005296128, 140176005304319,
+ERASE, 140176005296128, 140176005304319,
+STORE, 140176005296128, 140176005304319,
+STORE, 140176020795392, 140176020811775,
+STORE, 140175999938560, 140176003100671,
+SNULL, 140175999938560, 140176000999423,
+STORE, 140176000999424, 140176003100671,
+STORE, 140175999938560, 140176000999423,
+SNULL, 140176003092479, 140176003100671,
+STORE, 140176000999424, 140176003092479,
+STORE, 140176003092480, 140176003100671,
+ERASE, 140176003092480, 140176003100671,
+STORE, 140176003092480, 140176003100671,
+STORE, 140175996141568, 140175999938559,
+SNULL, 140175996141568, 140175997800447,
+STORE, 140175997800448, 140175999938559,
+STORE, 140175996141568, 140175997800447,
+SNULL, 140175999897599, 140175999938559,
+STORE, 140175997800448, 140175999897599,
+STORE, 140175999897600, 140175999938559,
+SNULL, 140175999897600, 140175999922175,
+STORE, 140175999922176, 140175999938559,
+STORE, 140175999897600, 140175999922175,
+ERASE, 140175999897600, 140175999922175,
+STORE, 140175999897600, 140175999922175,
+ERASE, 140175999922176, 140175999938559,
+STORE, 140175999922176, 140175999938559,
+STORE, 140176020783104, 140176020811775,
+SNULL, 140175999913983, 140175999922175,
+STORE, 140175999897600, 140175999913983,
+STORE, 140175999913984, 140175999922175,
+SNULL, 140176003096575, 140176003100671,
+STORE, 140176003092480, 140176003096575,
+STORE, 140176003096576, 140176003100671,
+SNULL, 140176005300223, 140176005304319,
+STORE, 140176005296128, 140176005300223,
+STORE, 140176005300224, 140176005304319,
+SNULL, 140176007413759, 140176007417855,
+STORE, 140176007409664, 140176007413759,
+STORE, 140176007413760, 140176007417855,
+SNULL, 140176010047487, 140176010051583,
+STORE, 140176010043392, 140176010047487,
+STORE, 140176010047488, 140176010051583,
+SNULL, 140176012566527, 140176012570623,
+STORE, 140176012558336, 140176012566527,
+STORE, 140176012566528, 140176012570623,
+SNULL, 140176014761983, 140176014766079,
+STORE, 140176014757888, 140176014761983,
+STORE, 140176014761984, 140176014766079,
+SNULL, 140176018571263, 140176018599935,
+STORE, 140176018567168, 140176018571263,
+STORE, 140176018571264, 140176018599935,
+SNULL, 28405759, 28454911,
+STORE, 28372992, 28405759,
+STORE, 28405760, 28454911,
+SNULL, 140176020844543, 140176020848639,
+STORE, 140176020840448, 140176020844543,
+STORE, 140176020844544, 140176020848639,
+ERASE, 140176020811776, 140176020840447,
+STORE, 53080064, 53215231,
+STORE, 140176019099648, 140176020783103,
+STORE, 140176020836352, 140176020840447,
+STORE, 140176018964480, 140176019099647,
+STORE, 53080064, 53358591,
+STORE, 140175994044416, 140175996141567,
+STORE, 140176020828160, 140176020840447,
+STORE, 140176020819968, 140176020840447,
+STORE, 140176020783104, 140176020819967,
+STORE, 140176018948096, 140176019099647,
+STORE, 53080064, 53493759,
+STORE, 53080064, 53649407,
+STORE, 140176018939904, 140176019099647,
+STORE, 140176018931712, 140176019099647,
+STORE, 53080064, 53784575,
+STORE, 53080064, 53919743,
+STORE, 140176018915328, 140176019099647,
+STORE, 140176018907136, 140176019099647,
+STORE, 53080064, 54059007,
+STORE, 140175993769984, 140175996141567,
+STORE, 140176018747392, 140176019099647,
+STORE, 53080064, 54198271,
+SNULL, 54190079, 54198271,
+STORE, 53080064, 54190079,
+STORE, 54190080, 54198271,
+ERASE, 54190080, 54198271,
+SNULL, 54181887, 54190079,
+STORE, 53080064, 54181887,
+STORE, 54181888, 54190079,
+ERASE, 54181888, 54190079,
+SNULL, 54173695, 54181887,
+STORE, 53080064, 54173695,
+STORE, 54173696, 54181887,
+ERASE, 54173696, 54181887,
+SNULL, 54165503, 54173695,
+STORE, 53080064, 54165503,
+STORE, 54165504, 54173695,
+ERASE, 54165504, 54173695,
+STORE, 140175993753600, 140175996141567,
+STORE, 140175993688064, 140175996141567,
+STORE, 140175993655296, 140175996141567,
+STORE, 140175991558144, 140175996141567,
+STORE, 140175991492608, 140175996141567,
+STORE, 53080064, 54312959,
+STORE, 140175991361536, 140175996141567,
+STORE, 140175991099392, 140175996141567,
+STORE, 140175991091200, 140175996141567,
+STORE, 140175991074816, 140175996141567,
+STORE, 140175991066624, 140175996141567,
+STORE, 140175991058432, 140175996141567,
+STORE, 53080064, 54448127,
+SNULL, 54439935, 54448127,
+STORE, 53080064, 54439935,
+STORE, 54439936, 54448127,
+ERASE, 54439936, 54448127,
+SNULL, 54431743, 54439935,
+STORE, 53080064, 54431743,
+STORE, 54431744, 54439935,
+ERASE, 54431744, 54439935,
+SNULL, 54419455, 54431743,
+STORE, 53080064, 54419455,
+STORE, 54419456, 54431743,
+ERASE, 54419456, 54431743,
+SNULL, 54403071, 54419455,
+STORE, 53080064, 54403071,
+STORE, 54403072, 54419455,
+ERASE, 54403072, 54419455,
+STORE, 140175991042048, 140175996141567,
+STORE, 53080064, 54538239,
+SNULL, 54534143, 54538239,
+STORE, 53080064, 54534143,
+STORE, 54534144, 54538239,
+ERASE, 54534144, 54538239,
+SNULL, 54530047, 54534143,
+STORE, 53080064, 54530047,
+STORE, 54530048, 54534143,
+ERASE, 54530048, 54534143,
+SNULL, 54525951, 54530047,
+STORE, 53080064, 54525951,
+STORE, 54525952, 54530047,
+ERASE, 54525952, 54530047,
+SNULL, 54521855, 54525951,
+STORE, 53080064, 54521855,
+STORE, 54521856, 54525951,
+ERASE, 54521856, 54525951,
+SNULL, 54517759, 54521855,
+STORE, 53080064, 54517759,
+STORE, 54517760, 54521855,
+ERASE, 54517760, 54521855,
+SNULL, 54513663, 54517759,
+STORE, 53080064, 54513663,
+STORE, 54513664, 54517759,
+ERASE, 54513664, 54517759,
+SNULL, 54509567, 54513663,
+STORE, 53080064, 54509567,
+STORE, 54509568, 54513663,
+ERASE, 54509568, 54513663,
+STORE, 140175991025664, 140175996141567,
+STORE, 140175990992896, 140175996141567,
+STORE, 53080064, 54644735,
+SNULL, 54628351, 54644735,
+STORE, 53080064, 54628351,
+STORE, 54628352, 54644735,
+ERASE, 54628352, 54644735,
+SNULL, 54616063, 54628351,
+STORE, 53080064, 54616063,
+STORE, 54616064, 54628351,
+ERASE, 54616064, 54628351,
+STORE, 140175988895744, 140175996141567,
+STORE, 53080064, 54767615,
+STORE, 140175988879360, 140175996141567,
+STORE, 140175988617216, 140175996141567,
+STORE, 140175988609024, 140175996141567,
+STORE, 140175988600832, 140175996141567,
+STORE, 53080064, 54906879,
+SNULL, 54898687, 54906879,
+STORE, 53080064, 54898687,
+STORE, 54898688, 54906879,
+ERASE, 54898688, 54906879,
+SNULL, 54853631, 54898687,
+STORE, 53080064, 54853631,
+STORE, 54853632, 54898687,
+ERASE, 54853632, 54898687,
+STORE, 140175986503680, 140175996141567,
+STORE, 53080064, 54996991,
+STORE, 140175986495488, 140175996141567,
+STORE, 140175986487296, 140175996141567,
+STORE, 140175985438720, 140175996141567,
+STORE, 53080064, 55136255,
+STORE, 140175985405952, 140175996141567,
+STORE, 140175985139712, 140175996141567,
+SNULL, 140176018964479, 140176019099647,
+STORE, 140176018747392, 140176018964479,
+STORE, 140176018964480, 140176019099647,
+ERASE, 140176018964480, 140176019099647,
+STORE, 140175983042560, 140175996141567,
+STORE, 140175982518272, 140175996141567,
+STORE, 140175980421120, 140175996141567,
+STORE, 53080064, 55287807,
+STORE, 53080064, 55427071,
+STORE, 140176019091456, 140176019099647,
+STORE, 140176019083264, 140176019099647,
+STORE, 140176019075072, 140176019099647,
+STORE, 140176019066880, 140176019099647,
+STORE, 140176019058688, 140176019099647,
+STORE, 140175980158976, 140175996141567,
+STORE, 140176019050496, 140176019099647,
+STORE, 140176019042304, 140176019099647,
+STORE, 140176019034112, 140176019099647,
+STORE, 140176019025920, 140176019099647,
+STORE, 140176019017728, 140176019099647,
+STORE, 140176019009536, 140176019099647,
+STORE, 140176019001344, 140176019099647,
+STORE, 140176018993152, 140176019099647,
+STORE, 140176018984960, 140176019099647,
+STORE, 140176018976768, 140176019099647,
+STORE, 140176018968576, 140176019099647,
+STORE, 140175978061824, 140175996141567,
+STORE, 53080064, 55603199,
+STORE, 140175978029056, 140175996141567,
+STORE, 140175977996288, 140175996141567,
+STORE, 53080064, 55738367,
+STORE, 53080064, 55881727,
+STORE, 140175977963520, 140175996141567,
+STORE, 140175977930752, 140175996141567,
+STORE, 53080064, 56041471,
+STORE, 140175977897984, 140175996141567,
+STORE, 140175977865216, 140175996141567,
+SNULL, 55881727, 56041471,
+STORE, 53080064, 55881727,
+STORE, 55881728, 56041471,
+ERASE, 55881728, 56041471,
+SNULL, 55721983, 55881727,
+STORE, 53080064, 55721983,
+STORE, 55721984, 55881727,
+ERASE, 55721984, 55881727,
+SNULL, 55570431, 55721983,
+STORE, 53080064, 55570431,
+STORE, 55570432, 55721983,
+ERASE, 55570432, 55721983,
+STORE, 140175977857024, 140175996141567,
+STORE, 140175975759872, 140175996141567,
+STORE, 53080064, 55754751,
+STORE, 53080064, 55943167,
+STORE, 140175975751680, 140175996141567,
+STORE, 140175975743488, 140175996141567,
+STORE, 140175975735296, 140175996141567,
+STORE, 140175975727104, 140175996141567,
+STORE, 140175975718912, 140175996141567,
+STORE, 140175975710720, 140175996141567,
+STORE, 140175975702528, 140175996141567,
+STORE, 140175975694336, 140175996141567,
+STORE, 140175975686144, 140175996141567,
+STORE, 140175975677952, 140175996141567,
+STORE, 140175975669760, 140175996141567,
+STORE, 140175974621184, 140175996141567,
+STORE, 140175974612992, 140175996141567,
+STORE, 53080064, 56139775,
+STORE, 140175972515840, 140175996141567,
+STORE, 53080064, 56401919,
+STORE, 140175970418688, 140175996141567,
+STORE, 140175970410496, 140175996141567,
+STORE, 140175970402304, 140175996141567,
+STORE, 140175970394112, 140175996141567,
+STORE, 53080064, 56569855,
+STORE, 140175969865728, 140175996141567,
+SNULL, 140175985139711, 140175996141567,
+STORE, 140175969865728, 140175985139711,
+STORE, 140175985139712, 140175996141567,
+SNULL, 140175985139712, 140175985405951,
+STORE, 140175985405952, 140175996141567,
+STORE, 140175985139712, 140175985405951,
+ERASE, 140175985139712, 140175985405951,
+STORE, 140175965671424, 140175985139711,
+STORE, 140175985397760, 140175996141567,
+STORE, 140175985389568, 140175996141567,
+STORE, 140175985381376, 140175996141567,
+STORE, 140175985373184, 140175996141567,
+STORE, 140175985364992, 140175996141567,
+STORE, 140175985356800, 140175996141567,
+STORE, 140175985348608, 140175996141567,
+STORE, 140175985340416, 140175996141567,
+STORE, 140175985332224, 140175996141567,
+STORE, 140175985324032, 140175996141567,
+STORE, 140175985315840, 140175996141567,
+STORE, 140175985307648, 140175996141567,
+STORE, 140175985299456, 140175996141567,
+STORE, 140175985291264, 140175996141567,
+STORE, 140175985283072, 140175996141567,
+STORE, 140175985274880, 140175996141567,
+STORE, 140175963574272, 140175985139711,
+STORE, 140175985266688, 140175996141567,
+STORE, 140175961477120, 140175985139711,
+STORE, 53080064, 56831999,
+STORE, 140175959379968, 140175985139711,
+STORE, 140175985258496, 140175996141567,
+STORE, 140175957282816, 140175985139711,
+STORE, 140175985250304, 140175996141567,
+STORE, 140175985242112, 140175996141567,
+STORE, 140175985233920, 140175996141567,
+STORE, 140175985225728, 140175996141567,
+STORE, 140175985217536, 140175996141567,
+STORE, 140175957151744, 140175985139711,
+STORE, 140175956627456, 140175985139711,
+SNULL, 140175980158975, 140175985139711,
+STORE, 140175956627456, 140175980158975,
+STORE, 140175980158976, 140175985139711,
+SNULL, 140175980158976, 140175980421119,
+STORE, 140175980421120, 140175985139711,
+STORE, 140175980158976, 140175980421119,
+ERASE, 140175980158976, 140175980421119,
+STORE, 140175954530304, 140175980158975,
+STORE, 140175985209344, 140175996141567,
+STORE, 53080064, 57094143,
+STORE, 140175952433152, 140175980158975,
+STORE, 140175985192960, 140175996141567,
+STORE, 140175985184768, 140175996141567,
+STORE, 140175985176576, 140175996141567,
+STORE, 140175985168384, 140175996141567,
+STORE, 140175985160192, 140175996141567,
+STORE, 140175985152000, 140175996141567,
+STORE, 140175985143808, 140175996141567,
+STORE, 140175980412928, 140175985139711,
+STORE, 140175980404736, 140175985139711,
+STORE, 140175980396544, 140175985139711,
+STORE, 140175980388352, 140175985139711,
+STORE, 140175980380160, 140175985139711,
+STORE, 140175980371968, 140175985139711,
+STORE, 140175980363776, 140175985139711,
+STORE, 140175980355584, 140175985139711,
+STORE, 140175980347392, 140175985139711,
+STORE, 140175980339200, 140175985139711,
+STORE, 53080064, 57356287,
+SNULL, 140176018747392, 140176018907135,
+STORE, 140176018907136, 140176018964479,
+STORE, 140176018747392, 140176018907135,
+ERASE, 140176018747392, 140176018907135,
+STORE, 140175952146432, 140175980158975,
+STORE, 140175950049280, 140175980158975,
+SNULL, 140175952146431, 140175980158975,
+STORE, 140175950049280, 140175952146431,
+STORE, 140175952146432, 140175980158975,
+SNULL, 140175952146432, 140175952433151,
+STORE, 140175952433152, 140175980158975,
+STORE, 140175952146432, 140175952433151,
+ERASE, 140175952146432, 140175952433151,
+STORE, 140176018898944, 140176018964479,
+STORE, 53080064, 57749503,
+STORE, 140175949520896, 140175952146431,
+STORE, 140175947423744, 140175952146431,
+SNULL, 140175993769983, 140175996141567,
+STORE, 140175985143808, 140175993769983,
+STORE, 140175993769984, 140175996141567,
+SNULL, 140175993769984, 140175994044415,
+STORE, 140175994044416, 140175996141567,
+STORE, 140175993769984, 140175994044415,
+ERASE, 140175993769984, 140175994044415,
+STORE, 140176018890752, 140176018964479,
+STORE, 140176018882560, 140176018964479,
+STORE, 140176018874368, 140176018964479,
+STORE, 140176018866176, 140176018964479,
+STORE, 140176018849792, 140176018964479,
+STORE, 140176018841600, 140176018964479,
+STORE, 140176018825216, 140176018964479,
+STORE, 140176018817024, 140176018964479,
+STORE, 140176018800640, 140176018964479,
+STORE, 140176018792448, 140176018964479,
+STORE, 140176018759680, 140176018964479,
+STORE, 140176018751488, 140176018964479,
+STORE, 140175994028032, 140175996141567,
+STORE, 140176018743296, 140176018964479,
+STORE, 140175994011648, 140175996141567,
+STORE, 140175994003456, 140175996141567,
+STORE, 140175993987072, 140175996141567,
+STORE, 140175993978880, 140175996141567,
+STORE, 140175993946112, 140175996141567,
+STORE, 140175993937920, 140175996141567,
+STORE, 140175993921536, 140175996141567,
+STORE, 140175993913344, 140175996141567,
+STORE, 140175993896960, 140175996141567,
+STORE, 140175993888768, 140175996141567,
+STORE, 140175993872384, 140175996141567,
+STORE, 140175993864192, 140175996141567,
+STORE, 140175993831424, 140175996141567,
+STORE, 140175993823232, 140175996141567,
+STORE, 140175993806848, 140175996141567,
+STORE, 140175993798656, 140175996141567,
+STORE, 140175993782272, 140175996141567,
+STORE, 140175993774080, 140175996141567,
+STORE, 140175980322816, 140175985139711,
+STORE, 140175980314624, 140175985139711,
+STORE, 140175980281856, 140175985139711,
+STORE, 140175980273664, 140175985139711,
+STORE, 140175980257280, 140175985139711,
+STORE, 140175945326592, 140175952146431,
+STORE, 140175980249088, 140175985139711,
+STORE, 140175980232704, 140175985139711,
+STORE, 140175980224512, 140175985139711,
+STORE, 140175980208128, 140175985139711,
+STORE, 140175980199936, 140175985139711,
+STORE, 140175980167168, 140175985139711,
+STORE, 140175952433152, 140175985139711,
+STORE, 140175952416768, 140175985139711,
+STORE, 140175952408576, 140175985139711,
+STORE, 140175952392192, 140175985139711,
+STORE, 140175952384000, 140175985139711,
+STORE, 140175952367616, 140175985139711,
+STORE, 140175943229440, 140175952146431,
+STORE, 140175952359424, 140175985139711,
+STORE, 140175952326656, 140175985139711,
+STORE, 140175952318464, 140175985139711,
+STORE, 140175952302080, 140175985139711,
+STORE, 140175952293888, 140175985139711,
+STORE, 140175952277504, 140175985139711,
+STORE, 140175952269312, 140175985139711,
+STORE, 140175952252928, 140175985139711,
+STORE, 140175952244736, 140175985139711,
+STORE, 140175952211968, 140175985139711,
+STORE, 140175952203776, 140175985139711,
+STORE, 140175952187392, 140175985139711,
+STORE, 140175952179200, 140175985139711,
+STORE, 140175952162816, 140175985139711,
+STORE, 140175952154624, 140175985139711,
+STORE, 140175943213056, 140175952146431,
+STORE, 140175943213056, 140175985139711,
+STORE, 140175943180288, 140175985139711,
+STORE, 140175943172096, 140175985139711,
+STORE, 140175943155712, 140175985139711,
+STORE, 140175943147520, 140175985139711,
+STORE, 140175943131136, 140175985139711,
+STORE, 140175943122944, 140175985139711,
+STORE, 140175943106560, 140175985139711,
+STORE, 140175943098368, 140175985139711,
+STORE, 140175943065600, 140175985139711,
+STORE, 140175943057408, 140175985139711,
+STORE, 140175943041024, 140175985139711,
+STORE, 140175943032832, 140175985139711,
+STORE, 140175943016448, 140175985139711,
+STORE, 140175943008256, 140175985139711,
+STORE, 140175942991872, 140175985139711,
+STORE, 140175942983680, 140175985139711,
+STORE, 140175942950912, 140175985139711,
+STORE, 140175942942720, 140175985139711,
+STORE, 140175942926336, 140175985139711,
+STORE, 140175942918144, 140175985139711,
+STORE, 140175942901760, 140175985139711,
+STORE, 140175942893568, 140175985139711,
+STORE, 140175942877184, 140175985139711,
+STORE, 140175942868992, 140175985139711,
+STORE, 140175942836224, 140175985139711,
+STORE, 140175942828032, 140175985139711,
+STORE, 140175942811648, 140175985139711,
+STORE, 140175942803456, 140175985139711,
+STORE, 140175942787072, 140175985139711,
+STORE, 140175942778880, 140175985139711,
+STORE, 140175942762496, 140175985139711,
+STORE, 140175942754304, 140175985139711,
+STORE, 140175942721536, 140175985139711,
+STORE, 140175942713344, 140175985139711,
+STORE, 140175942696960, 140175985139711,
+STORE, 140175942688768, 140175985139711,
+STORE, 140175942672384, 140175985139711,
+STORE, 140175942664192, 140175985139711,
+STORE, 140175942647808, 140175985139711,
+STORE, 140175942639616, 140175985139711,
+STORE, 140175942606848, 140175985139711,
+STORE, 140175942598656, 140175985139711,
+STORE, 140175942582272, 140175985139711,
+STORE, 140175942574080, 140175985139711,
+STORE, 140175942557696, 140175985139711,
+STORE, 140175942549504, 140175985139711,
+STORE, 140175942533120, 140175985139711,
+STORE, 140175942524928, 140175985139711,
+STORE, 140175942492160, 140175985139711,
+STORE, 140175942483968, 140175985139711,
+STORE, 140175942467584, 140175985139711,
+STORE, 140175942459392, 140175985139711,
+STORE, 140175942443008, 140175985139711,
+STORE, 140175942434816, 140175985139711,
+STORE, 140175942418432, 140175985139711,
+STORE, 140175942410240, 140175985139711,
+STORE, 140175942377472, 140175985139711,
+STORE, 140175942369280, 140175985139711,
+STORE, 140175942352896, 140175985139711,
+STORE, 140175942344704, 140175985139711,
+STORE, 140175942328320, 140175985139711,
+STORE, 140175942320128, 140175985139711,
+STORE, 140175942303744, 140175985139711,
+STORE, 140175942295552, 140175985139711,
+STORE, 140175942262784, 140175985139711,
+STORE, 140175942254592, 140175985139711,
+STORE, 140175942238208, 140175985139711,
+STORE, 140175942230016, 140175985139711,
+STORE, 140175942213632, 140175985139711,
+STORE, 140175942205440, 140175985139711,
+STORE, 140175942189056, 140175985139711,
+STORE, 140175942180864, 140175985139711,
+STORE, 140175942148096, 140175985139711,
+STORE, 140175942139904, 140175985139711,
+STORE, 140175942123520, 140175985139711,
+STORE, 140175942115328, 140175985139711,
+STORE, 140175942098944, 140175985139711,
+STORE, 140175942090752, 140175985139711,
+STORE, 140175942074368, 140175985139711,
+STORE, 140175942066176, 140175985139711,
+STORE, 140175942033408, 140175985139711,
+STORE, 140175942025216, 140175985139711,
+STORE, 140175942008832, 140175985139711,
+STORE, 140175942000640, 140175985139711,
+STORE, 140175941984256, 140175985139711,
+STORE, 140175941976064, 140175985139711,
+STORE, 140175941959680, 140175985139711,
+STORE, 140175939862528, 140175985139711,
+STORE, 140175939854336, 140175985139711,
+STORE, 140175939821568, 140175985139711,
+STORE, 140175939813376, 140175985139711,
+STORE, 140175939796992, 140175985139711,
+STORE, 140175939788800, 140175985139711,
+STORE, 140175939772416, 140175985139711,
+STORE, 140175939764224, 140175985139711,
+STORE, 140175939747840, 140175985139711,
+STORE, 140175939739648, 140175985139711,
+STORE, 140175939706880, 140175985139711,
+STORE, 140175939698688, 140175985139711,
+STORE, 140175939682304, 140175985139711,
+STORE, 140175939674112, 140175985139711,
+STORE, 140175939657728, 140175985139711,
+STORE, 140175939649536, 140175985139711,
+STORE, 140175939633152, 140175985139711,
+STORE, 140175939624960, 140175985139711,
+STORE, 140175939592192, 140175985139711,
+STORE, 140175939584000, 140175985139711,
+STORE, 140175939567616, 140175985139711,
+STORE, 140175939559424, 140175985139711,
+STORE, 140175939543040, 140175985139711,
+STORE, 140175939534848, 140175985139711,
+STORE, 140175939518464, 140175985139711,
+STORE, 140175939510272, 140175985139711,
+STORE, 140175939477504, 140175985139711,
+STORE, 140175939469312, 140175985139711,
+STORE, 140175939452928, 140175985139711,
+STORE, 140175939444736, 140175985139711,
+STORE, 140175939428352, 140175985139711,
+STORE, 140175939420160, 140175985139711,
+STORE, 140175939403776, 140175985139711,
+STORE, 140175939395584, 140175985139711,
+STORE, 140175939362816, 140175985139711,
+STORE, 140175939354624, 140175985139711,
+STORE, 140175939338240, 140175985139711,
+STORE, 140175939330048, 140175985139711,
+STORE, 140175939313664, 140175985139711,
+STORE, 140175939305472, 140175985139711,
+STORE, 140175939289088, 140175985139711,
+STORE, 140175939280896, 140175985139711,
+STORE, 140175939248128, 140175985139711,
+STORE, 140175939239936, 140175985139711,
+STORE, 140175939223552, 140175985139711,
+STORE, 140175939215360, 140175985139711,
+STORE, 140175939198976, 140175985139711,
+STORE, 140175939190784, 140175985139711,
+STORE, 140175939174400, 140175985139711,
+STORE, 140175939166208, 140175985139711,
+STORE, 140175939133440, 140175985139711,
+STORE, 140175939125248, 140175985139711,
+STORE, 140175939108864, 140175985139711,
+STORE, 140175939100672, 140175985139711,
+STORE, 140175939084288, 140175985139711,
+STORE, 140175939076096, 140175985139711,
+STORE, 140175939059712, 140175985139711,
+STORE, 140175939051520, 140175985139711,
+STORE, 140175939018752, 140175985139711,
+STORE, 140175939010560, 140175985139711,
+STORE, 140175938994176, 140175985139711,
+STORE, 140175938985984, 140175985139711,
+STORE, 140175938969600, 140175985139711,
+STORE, 140175938961408, 140175985139711,
+STORE, 140175938945024, 140175985139711,
+STORE, 140175938936832, 140175985139711,
+STORE, 140175938904064, 140175985139711,
+STORE, 140175938895872, 140175985139711,
+STORE, 140175938879488, 140175985139711,
+STORE, 140175938871296, 140175985139711,
+STORE, 140175938854912, 140175985139711,
+STORE, 140175938846720, 140175985139711,
+STORE, 140175938830336, 140175985139711,
+STORE, 140175938822144, 140175985139711,
+STORE, 140175938789376, 140175985139711,
+STORE, 140175938781184, 140175985139711,
+STORE, 140175938764800, 140175985139711,
+STORE, 140175938756608, 140175985139711,
+STORE, 140175938740224, 140175985139711,
+STORE, 140175938732032, 140175985139711,
+STORE, 140175938715648, 140175985139711,
+STORE, 140175938707456, 140175985139711,
+STORE, 140175938674688, 140175985139711,
+STORE, 140175938666496, 140175985139711,
+STORE, 140175938650112, 140175985139711,
+STORE, 140175938641920, 140175985139711,
+STORE, 140175938625536, 140175985139711,
+STORE, 140175938617344, 140175985139711,
+STORE, 140175938600960, 140175985139711,
+STORE, 140175938592768, 140175985139711,
+STORE, 140175938560000, 140175985139711,
+STORE, 140175938551808, 140175985139711,
+STORE, 140175938535424, 140175985139711,
+STORE, 140175938527232, 140175985139711,
+STORE, 140175938510848, 140175985139711,
+STORE, 140175938502656, 140175985139711,
+STORE, 140175938486272, 140175985139711,
+STORE, 140175938478080, 140175985139711,
+STORE, 140175938445312, 140175985139711,
+STORE, 140175938437120, 140175985139711,
+STORE, 140175938420736, 140175985139711,
+STORE, 140175938412544, 140175985139711,
+STORE, 140175938396160, 140175985139711,
+STORE, 140175938387968, 140175985139711,
+STORE, 140175938371584, 140175985139711,
+STORE, 140175938363392, 140175985139711,
+STORE, 140175938330624, 140175985139711,
+STORE, 140175938322432, 140175985139711,
+STORE, 140175938306048, 140175985139711,
+STORE, 140175938297856, 140175985139711,
+STORE, 140175938281472, 140175985139711,
+STORE, 140175938273280, 140175985139711,
+STORE, 140175938256896, 140175985139711,
+STORE, 140175938248704, 140175985139711,
+STORE, 140175938215936, 140175985139711,
+STORE, 140175938207744, 140175985139711,
+STORE, 140175938191360, 140175985139711,
+STORE, 140175938183168, 140175985139711,
+STORE, 140175938166784, 140175985139711,
+STORE, 140175938158592, 140175985139711,
+STORE, 140175938142208, 140175985139711,
+STORE, 140175936045056, 140175985139711,
+STORE, 140175936036864, 140175985139711,
+STORE, 140175936004096, 140175985139711,
+STORE, 140175935995904, 140175985139711,
+STORE, 140175935979520, 140175985139711,
+STORE, 140175935971328, 140175985139711,
+STORE, 140175935954944, 140175985139711,
+STORE, 140175935946752, 140175985139711,
+STORE, 140175935930368, 140175985139711,
+STORE, 140175935922176, 140175985139711,
+STORE, 140175935889408, 140175985139711,
+STORE, 140175935881216, 140175985139711,
+STORE, 140175935864832, 140175985139711,
+STORE, 140175935856640, 140175985139711,
+STORE, 140175935840256, 140175985139711,
+STORE, 140175935832064, 140175985139711,
+STORE, 140175935815680, 140175985139711,
+STORE, 140175935807488, 140175985139711,
+STORE, 140175935774720, 140175985139711,
+STORE, 140175935766528, 140175985139711,
+STORE, 140175935750144, 140175985139711,
+STORE, 140175935741952, 140175985139711,
+STORE, 140175935725568, 140175985139711,
+STORE, 140175935717376, 140175985139711,
+STORE, 140175935700992, 140175985139711,
+STORE, 140175935692800, 140175985139711,
+STORE, 140175935660032, 140175985139711,
+STORE, 140175935651840, 140175985139711,
+STORE, 140175935635456, 140175985139711,
+STORE, 140175935627264, 140175985139711,
+STORE, 140175935610880, 140175985139711,
+STORE, 140175935602688, 140175985139711,
+STORE, 140175935586304, 140175985139711,
+STORE, 140175935578112, 140175985139711,
+STORE, 140175935545344, 140175985139711,
+STORE, 140175935537152, 140175985139711,
+STORE, 140175935520768, 140175985139711,
+STORE, 140175935512576, 140175985139711,
+STORE, 140175935496192, 140175985139711,
+STORE, 140175935488000, 140175985139711,
+STORE, 140175935471616, 140175985139711,
+STORE, 140175935463424, 140175985139711,
+STORE, 140175935430656, 140175985139711,
+STORE, 140175935422464, 140175985139711,
+STORE, 140175935406080, 140175985139711,
+STORE, 140175935397888, 140175985139711,
+STORE, 140175935381504, 140175985139711,
+STORE, 140175935373312, 140175985139711,
+STORE, 140175935356928, 140175985139711,
+STORE, 140175935348736, 140175985139711,
+STORE, 140175935315968, 140175985139711,
+STORE, 140175935307776, 140175985139711,
+STORE, 140175935291392, 140175985139711,
+STORE, 140175935283200, 140175985139711,
+STORE, 140175935266816, 140175985139711,
+STORE, 140175935258624, 140175985139711,
+STORE, 140175935242240, 140175985139711,
+STORE, 140175935234048, 140175985139711,
+STORE, 140175935201280, 140175985139711,
+STORE, 140175935193088, 140175985139711,
+STORE, 140175935176704, 140175985139711,
+STORE, 140175935168512, 140175985139711,
+STORE, 140175935152128, 140175985139711,
+STORE, 140175935143936, 140175985139711,
+STORE, 140175935127552, 140175985139711,
+STORE, 140175935119360, 140175985139711,
+STORE, 140175935086592, 140175985139711,
+STORE, 140175935078400, 140175985139711,
+STORE, 140175935062016, 140175985139711,
+STORE, 140175935053824, 140175985139711,
+STORE, 140175935037440, 140175985139711,
+STORE, 140175935029248, 140175985139711,
+STORE, 140175935012864, 140175985139711,
+STORE, 140175935004672, 140175985139711,
+STORE, 140175934971904, 140175985139711,
+STORE, 140175934963712, 140175985139711,
+STORE, 140175934947328, 140175985139711,
+STORE, 140175934939136, 140175985139711,
+STORE, 140175934922752, 140175985139711,
+STORE, 140175934914560, 140175985139711,
+STORE, 140175934898176, 140175985139711,
+STORE, 140175934889984, 140175985139711,
+STORE, 140175934857216, 140175985139711,
+STORE, 140175934849024, 140175985139711,
+STORE, 140175934832640, 140175985139711,
+STORE, 140175934824448, 140175985139711,
+STORE, 140175934808064, 140175985139711,
+STORE, 140175934799872, 140175985139711,
+STORE, 140175934783488, 140175985139711,
+STORE, 140175934775296, 140175985139711,
+STORE, 140175934742528, 140175985139711,
+STORE, 140175934734336, 140175985139711,
+STORE, 140175934717952, 140175985139711,
+STORE, 140175934709760, 140175985139711,
+STORE, 140175934693376, 140175985139711,
+STORE, 140175934685184, 140175985139711,
+STORE, 140175934668800, 140175985139711,
+STORE, 140175934660608, 140175985139711,
+STORE, 140175934627840, 140175985139711,
+STORE, 140175934619648, 140175985139711,
+STORE, 140175934603264, 140175985139711,
+STORE, 140175934595072, 140175985139711,
+STORE, 140175934578688, 140175985139711,
+STORE, 140175934570496, 140175985139711,
+STORE, 140175934554112, 140175985139711,
+STORE, 140175934545920, 140175985139711,
+STORE, 140175934513152, 140175985139711,
+STORE, 140175934504960, 140175985139711,
+STORE, 140175934488576, 140175985139711,
+STORE, 140175934480384, 140175985139711,
+STORE, 140175934464000, 140175985139711,
+STORE, 140175934455808, 140175985139711,
+STORE, 140175934439424, 140175985139711,
+STORE, 140175934431232, 140175985139711,
+STORE, 140175934398464, 140175985139711,
+STORE, 140175934390272, 140175985139711,
+STORE, 140175934373888, 140175985139711,
+STORE, 140175934365696, 140175985139711,
+STORE, 140175934349312, 140175985139711,
+STORE, 140175934341120, 140175985139711,
+STORE, 140175934324736, 140175985139711,
+STORE, 140175932227584, 140175985139711,
+STORE, 140175932219392, 140175985139711,
+STORE, 140175932186624, 140175985139711,
+STORE, 140175932178432, 140175985139711,
+STORE, 140175932162048, 140175985139711,
+STORE, 140175932153856, 140175985139711,
+STORE, 140175932137472, 140175985139711,
+STORE, 53080064, 57884671,
+STORE, 140175932129280, 140175985139711,
+STORE, 140175932112896, 140175985139711,
+STORE, 140175932104704, 140175985139711,
+STORE, 140175932071936, 140175985139711,
+STORE, 140175932063744, 140175985139711,
+STORE, 140175932047360, 140175985139711,
+STORE, 140175932039168, 140175985139711,
+STORE, 140175932022784, 140175985139711,
+STORE, 140175932014592, 140175985139711,
+STORE, 140175931998208, 140175985139711,
+STORE, 140175931990016, 140175985139711,
+STORE, 140175931957248, 140175985139711,
+STORE, 140175931949056, 140175985139711,
+STORE, 140175931932672, 140175985139711,
+STORE, 140175931924480, 140175985139711,
+STORE, 140175931908096, 140175985139711,
+STORE, 140175931899904, 140175985139711,
+STORE, 140175931883520, 140175985139711,
+STORE, 140175931875328, 140175985139711,
+STORE, 140175931842560, 140175985139711,
+STORE, 140175931834368, 140175985139711,
+STORE, 140175931817984, 140175985139711,
+STORE, 140175931809792, 140175985139711,
+STORE, 140175931793408, 140175985139711,
+STORE, 140175931785216, 140175985139711,
+STORE, 140175931768832, 140175985139711,
+STORE, 140175931760640, 140175985139711,
+STORE, 140175931727872, 140175985139711,
+STORE, 140175931719680, 140175985139711,
+STORE, 140175931703296, 140175985139711,
+STORE, 140175931695104, 140175985139711,
+STORE, 140175931678720, 140175985139711,
+STORE, 140175931670528, 140175985139711,
+STORE, 140175931654144, 140175985139711,
+STORE, 140175931645952, 140175985139711,
+STORE, 140175931613184, 140175985139711,
+STORE, 140175931604992, 140175985139711,
+STORE, 140175931588608, 140175985139711,
+STORE, 140175931580416, 140175985139711,
+STORE, 140175931564032, 140175985139711,
+STORE, 140175931555840, 140175985139711,
+STORE, 140175931539456, 140175985139711,
+STORE, 140175931531264, 140175985139711,
+STORE, 140175931498496, 140175985139711,
+STORE, 140175931490304, 140175985139711,
+STORE, 140175931473920, 140175985139711,
+STORE, 140175931465728, 140175985139711,
+STORE, 140175931449344, 140175985139711,
+STORE, 140175931441152, 140175985139711,
+STORE, 140175931424768, 140175985139711,
+STORE, 140175931416576, 140175985139711,
+STORE, 140175931383808, 140175985139711,
+STORE, 140175931375616, 140175985139711,
+STORE, 140175931359232, 140175985139711,
+STORE, 140175931351040, 140175985139711,
+STORE, 140175931334656, 140175985139711,
+STORE, 140175931326464, 140175985139711,
+STORE, 140175931310080, 140175985139711,
+STORE, 140175931301888, 140175985139711,
+STORE, 140175931269120, 140175985139711,
+STORE, 140175931260928, 140175985139711,
+STORE, 140175931244544, 140175985139711,
+STORE, 140175931236352, 140175985139711,
+STORE, 140175931219968, 140175985139711,
+STORE, 140175931211776, 140175985139711,
+STORE, 140175931195392, 140175985139711,
+STORE, 140175931187200, 140175985139711,
+STORE, 140175931154432, 140175985139711,
+STORE, 140175931146240, 140175985139711,
+STORE, 140175931129856, 140175985139711,
+STORE, 140175931121664, 140175985139711,
+STORE, 140175931105280, 140175985139711,
+STORE, 140175931097088, 140175985139711,
+STORE, 140175931080704, 140175985139711,
+STORE, 140175931072512, 140175985139711,
+STORE, 140175931039744, 140175985139711,
+STORE, 140175931031552, 140175985139711,
+STORE, 140175931015168, 140175985139711,
+STORE, 140175931006976, 140175985139711,
+STORE, 140175930990592, 140175985139711,
+STORE, 140175930982400, 140175985139711,
+STORE, 140175930966016, 140175985139711,
+STORE, 140175930957824, 140175985139711,
+STORE, 140175930925056, 140175985139711,
+STORE, 140175930916864, 140175985139711,
+STORE, 140175930900480, 140175985139711,
+STORE, 140175930892288, 140175985139711,
+STORE, 140175930875904, 140175985139711,
+STORE, 140175930867712, 140175985139711,
+STORE, 140175930851328, 140175985139711,
+STORE, 140175930843136, 140175985139711,
+STORE, 140175930810368, 140175985139711,
+STORE, 140175930802176, 140175985139711,
+STORE, 140175930785792, 140175985139711,
+STORE, 140175930777600, 140175985139711,
+STORE, 140175930761216, 140175985139711,
+STORE, 140175930753024, 140175985139711,
+STORE, 140175930736640, 140175985139711,
+STORE, 140175930728448, 140175985139711,
+STORE, 140175930695680, 140175985139711,
+STORE, 140175930687488, 140175985139711,
+STORE, 140175930671104, 140175985139711,
+STORE, 140175930662912, 140175985139711,
+STORE, 140175930646528, 140175985139711,
+STORE, 140175930638336, 140175985139711,
+STORE, 140175930621952, 140175985139711,
+STORE, 140175930613760, 140175985139711,
+STORE, 140175930580992, 140175985139711,
+STORE, 140175930572800, 140175985139711,
+STORE, 140175930556416, 140175985139711,
+STORE, 140175930548224, 140175985139711,
+STORE, 140175930531840, 140175985139711,
+STORE, 140175930523648, 140175985139711,
+STORE, 140175930507264, 140175985139711,
+STORE, 140175928410112, 140175985139711,
+STORE, 140175928401920, 140175985139711,
+STORE, 140175928369152, 140175985139711,
+STORE, 140175928360960, 140175985139711,
+STORE, 140175928344576, 140175985139711,
+STORE, 140175928336384, 140175985139711,
+STORE, 140175928320000, 140175985139711,
+STORE, 140175928311808, 140175985139711,
+STORE, 140175928295424, 140175985139711,
+STORE, 140175927242752, 140175985139711,
+SNULL, 140175956627455, 140175985139711,
+STORE, 140175927242752, 140175956627455,
+STORE, 140175956627456, 140175985139711,
+ };
+ static const unsigned long set24[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140735281639424, 140737488351231,
+SNULL, 140735281643519, 140737488351231,
+STORE, 140735281639424, 140735281643519,
+STORE, 140735281508352, 140735281643519,
+STORE, 94717834911744, 94717834928127,
+SNULL, 94717834915839, 94717834928127,
+STORE, 94717834911744, 94717834915839,
+STORE, 94717834915840, 94717834928127,
+ERASE, 94717834915840, 94717834928127,
+STORE, 94717834919936, 94717834928127,
+STORE, 140428246065152, 140428248317951,
+SNULL, 140428246208511, 140428248317951,
+STORE, 140428246065152, 140428246208511,
+STORE, 140428246208512, 140428248317951,
+ERASE, 140428246208512, 140428248317951,
+STORE, 140428248305664, 140428248313855,
+STORE, 140428248313856, 140428248317951,
+STORE, 140735281811456, 140735281815551,
+STORE, 140735281799168, 140735281811455,
+STORE, 140428248297472, 140428248305663,
+STORE, 140428243841024, 140428246065151,
+SNULL, 140428245491711, 140428246065151,
+STORE, 140428243841024, 140428245491711,
+STORE, 140428245491712, 140428246065151,
+SNULL, 140428245491712, 140428246061055,
+STORE, 140428246061056, 140428246065151,
+STORE, 140428245491712, 140428246061055,
+ERASE, 140428245491712, 140428246061055,
+STORE, 140428245491712, 140428246061055,
+ERASE, 140428246061056, 140428246065151,
+STORE, 140428246061056, 140428246065151,
+STORE, 140428248268800, 140428248297471,
+STORE, 140428241625088, 140428243841023,
+SNULL, 140428241625088, 140428241723391,
+STORE, 140428241723392, 140428243841023,
+STORE, 140428241625088, 140428241723391,
+SNULL, 140428243816447, 140428243841023,
+STORE, 140428241723392, 140428243816447,
+STORE, 140428243816448, 140428243841023,
+SNULL, 140428243816448, 140428243824639,
+STORE, 140428243824640, 140428243841023,
+STORE, 140428243816448, 140428243824639,
+ERASE, 140428243816448, 140428243824639,
+STORE, 140428243816448, 140428243824639,
+ERASE, 140428243824640, 140428243841023,
+STORE, 140428243824640, 140428243841023,
+STORE, 140428237828096, 140428241625087,
+SNULL, 140428237828096, 140428239486975,
+STORE, 140428239486976, 140428241625087,
+STORE, 140428237828096, 140428239486975,
+SNULL, 140428241584127, 140428241625087,
+STORE, 140428239486976, 140428241584127,
+STORE, 140428241584128, 140428241625087,
+SNULL, 140428241584128, 140428241608703,
+STORE, 140428241608704, 140428241625087,
+STORE, 140428241584128, 140428241608703,
+ERASE, 140428241584128, 140428241608703,
+STORE, 140428241584128, 140428241608703,
+ERASE, 140428241608704, 140428241625087,
+STORE, 140428241608704, 140428241625087,
+STORE, 140428235567104, 140428237828095,
+SNULL, 140428235567104, 140428235718655,
+STORE, 140428235718656, 140428237828095,
+STORE, 140428235567104, 140428235718655,
+SNULL, 140428237811711, 140428237828095,
+STORE, 140428235718656, 140428237811711,
+STORE, 140428237811712, 140428237828095,
+SNULL, 140428237811712, 140428237819903,
+STORE, 140428237819904, 140428237828095,
+STORE, 140428237811712, 140428237819903,
+ERASE, 140428237811712, 140428237819903,
+STORE, 140428237811712, 140428237819903,
+ERASE, 140428237819904, 140428237828095,
+STORE, 140428237819904, 140428237828095,
+STORE, 140428233445376, 140428235567103,
+SNULL, 140428233445376, 140428233461759,
+STORE, 140428233461760, 140428235567103,
+STORE, 140428233445376, 140428233461759,
+SNULL, 140428235558911, 140428235567103,
+STORE, 140428233461760, 140428235558911,
+STORE, 140428235558912, 140428235567103,
+ERASE, 140428235558912, 140428235567103,
+STORE, 140428235558912, 140428235567103,
+STORE, 140428231315456, 140428233445375,
+SNULL, 140428231315456, 140428231344127,
+STORE, 140428231344128, 140428233445375,
+STORE, 140428231315456, 140428231344127,
+SNULL, 140428233437183, 140428233445375,
+STORE, 140428231344128, 140428233437183,
+STORE, 140428233437184, 140428233445375,
+ERASE, 140428233437184, 140428233445375,
+STORE, 140428233437184, 140428233445375,
+STORE, 140428248260608, 140428248268799,
+STORE, 140428229062656, 140428231315455,
+SNULL, 140428229062656, 140428229214207,
+STORE, 140428229214208, 140428231315455,
+STORE, 140428229062656, 140428229214207,
+SNULL, 140428231307263, 140428231315455,
+STORE, 140428229214208, 140428231307263,
+STORE, 140428231307264, 140428231315455,
+ERASE, 140428231307264, 140428231315455,
+STORE, 140428231307264, 140428231315455,
+STORE, 140428226891776, 140428229062655,
+SNULL, 140428226891776, 140428226961407,
+STORE, 140428226961408, 140428229062655,
+STORE, 140428226891776, 140428226961407,
+SNULL, 140428229054463, 140428229062655,
+STORE, 140428226961408, 140428229054463,
+STORE, 140428229054464, 140428229062655,
+ERASE, 140428229054464, 140428229062655,
+STORE, 140428229054464, 140428229062655,
+STORE, 140428223680512, 140428226891775,
+SNULL, 140428223680512, 140428224757759,
+STORE, 140428224757760, 140428226891775,
+STORE, 140428223680512, 140428224757759,
+SNULL, 140428226854911, 140428226891775,
+STORE, 140428224757760, 140428226854911,
+STORE, 140428226854912, 140428226891775,
+ERASE, 140428226854912, 140428226891775,
+STORE, 140428226854912, 140428226891775,
+STORE, 140428221546496, 140428223680511,
+SNULL, 140428221546496, 140428221575167,
+STORE, 140428221575168, 140428223680511,
+STORE, 140428221546496, 140428221575167,
+SNULL, 140428223672319, 140428223680511,
+STORE, 140428221575168, 140428223672319,
+STORE, 140428223672320, 140428223680511,
+ERASE, 140428223672320, 140428223680511,
+STORE, 140428223672320, 140428223680511,
+STORE, 140428219236352, 140428221546495,
+SNULL, 140428219236352, 140428219441151,
+STORE, 140428219441152, 140428221546495,
+STORE, 140428219236352, 140428219441151,
+SNULL, 140428221538303, 140428221546495,
+STORE, 140428219441152, 140428221538303,
+STORE, 140428221538304, 140428221546495,
+ERASE, 140428221538304, 140428221546495,
+STORE, 140428221538304, 140428221546495,
+STORE, 140428216852480, 140428219236351,
+SNULL, 140428216852480, 140428217044991,
+STORE, 140428217044992, 140428219236351,
+STORE, 140428216852480, 140428217044991,
+SNULL, 140428219138047, 140428219236351,
+STORE, 140428217044992, 140428219138047,
+STORE, 140428219138048, 140428219236351,
+ERASE, 140428219138048, 140428219236351,
+STORE, 140428219138048, 140428219236351,
+STORE, 140428248252416, 140428248268799,
+STORE, 140428214284288, 140428216852479,
+SNULL, 140428214284288, 140428214751231,
+STORE, 140428214751232, 140428216852479,
+STORE, 140428214284288, 140428214751231,
+SNULL, 140428216844287, 140428216852479,
+STORE, 140428214751232, 140428216844287,
+STORE, 140428216844288, 140428216852479,
+ERASE, 140428216844288, 140428216852479,
+STORE, 140428216844288, 140428216852479,
+STORE, 140428212170752, 140428214284287,
+SNULL, 140428212170752, 140428212183039,
+STORE, 140428212183040, 140428214284287,
+STORE, 140428212170752, 140428212183039,
+SNULL, 140428214276095, 140428214284287,
+STORE, 140428212183040, 140428214276095,
+STORE, 140428214276096, 140428214284287,
+ERASE, 140428214276096, 140428214284287,
+STORE, 140428214276096, 140428214284287,
+STORE, 140428209991680, 140428212170751,
+SNULL, 140428209991680, 140428210069503,
+STORE, 140428210069504, 140428212170751,
+STORE, 140428209991680, 140428210069503,
+SNULL, 140428212162559, 140428212170751,
+STORE, 140428210069504, 140428212162559,
+STORE, 140428212162560, 140428212170751,
+ERASE, 140428212162560, 140428212170751,
+STORE, 140428212162560, 140428212170751,
+STORE, 140428207874048, 140428209991679,
+SNULL, 140428207874048, 140428207890431,
+STORE, 140428207890432, 140428209991679,
+STORE, 140428207874048, 140428207890431,
+SNULL, 140428209983487, 140428209991679,
+STORE, 140428207890432, 140428209983487,
+STORE, 140428209983488, 140428209991679,
+ERASE, 140428209983488, 140428209991679,
+STORE, 140428209983488, 140428209991679,
+STORE, 140428248244224, 140428248268799,
+STORE, 140428248231936, 140428248268799,
+SNULL, 140428241600511, 140428241608703,
+STORE, 140428241584128, 140428241600511,
+STORE, 140428241600512, 140428241608703,
+SNULL, 140428209987583, 140428209991679,
+STORE, 140428209983488, 140428209987583,
+STORE, 140428209987584, 140428209991679,
+SNULL, 140428212166655, 140428212170751,
+STORE, 140428212162560, 140428212166655,
+STORE, 140428212166656, 140428212170751,
+SNULL, 140428214280191, 140428214284287,
+STORE, 140428214276096, 140428214280191,
+STORE, 140428214280192, 140428214284287,
+SNULL, 140428243820543, 140428243824639,
+STORE, 140428243816448, 140428243820543,
+STORE, 140428243820544, 140428243824639,
+SNULL, 140428216848383, 140428216852479,
+STORE, 140428216844288, 140428216848383,
+STORE, 140428216848384, 140428216852479,
+SNULL, 140428219232255, 140428219236351,
+STORE, 140428219138048, 140428219232255,
+STORE, 140428219232256, 140428219236351,
+SNULL, 140428221542399, 140428221546495,
+STORE, 140428221538304, 140428221542399,
+STORE, 140428221542400, 140428221546495,
+SNULL, 140428223676415, 140428223680511,
+STORE, 140428223672320, 140428223676415,
+STORE, 140428223676416, 140428223680511,
+SNULL, 140428226863103, 140428226891775,
+STORE, 140428226854912, 140428226863103,
+STORE, 140428226863104, 140428226891775,
+SNULL, 140428229058559, 140428229062655,
+STORE, 140428229054464, 140428229058559,
+STORE, 140428229058560, 140428229062655,
+SNULL, 140428231311359, 140428231315455,
+STORE, 140428231307264, 140428231311359,
+STORE, 140428231311360, 140428231315455,
+SNULL, 140428233441279, 140428233445375,
+STORE, 140428233437184, 140428233441279,
+STORE, 140428233441280, 140428233445375,
+SNULL, 140428235563007, 140428235567103,
+STORE, 140428235558912, 140428235563007,
+STORE, 140428235563008, 140428235567103,
+SNULL, 140428237815807, 140428237819903,
+STORE, 140428237811712, 140428237815807,
+STORE, 140428237815808, 140428237819903,
+SNULL, 140428246056959, 140428246061055,
+STORE, 140428245491712, 140428246056959,
+STORE, 140428246056960, 140428246061055,
+SNULL, 94717834924031, 94717834928127,
+STORE, 94717834919936, 94717834924031,
+STORE, 94717834924032, 94717834928127,
+SNULL, 140428248309759, 140428248313855,
+STORE, 140428248305664, 140428248309759,
+STORE, 140428248309760, 140428248313855,
+ERASE, 140428248268800, 140428248297471,
+STORE, 94717843058688, 94717843193855,
+STORE, 94749677137920, 94749677559807,
+STORE, 94749677563904, 94749677604863,
+STORE, 94749677604864, 94749677608959,
+STORE, 94749710970880, 94749711241215,
+STORE, 140490884894720, 140490884935679,
+STORE, 140490884935680, 140490887032831,
+STORE, 140490887032832, 140490887036927,
+STORE, 140490887036928, 140490887041023,
+STORE, 140490887041024, 140490887065599,
+STORE, 140490887065600, 140490887110655,
+STORE, 140490887110656, 140490889203711,
+STORE, 140490889203712, 140490889207807,
+STORE, 140490889207808, 140490889211903,
+STORE, 140490889211904, 140490889293823,
+STORE, 140490889293824, 140490891390975,
+STORE, 140490891390976, 140490891395071,
+STORE, 140490891395072, 140490891399167,
+STORE, 140490891399168, 140490891407359,
+STORE, 140490891407360, 140490891436031,
+STORE, 140490891436032, 140490893529087,
+STORE, 140490893529088, 140490893533183,
+STORE, 140490893533184, 140490893537279,
+STORE, 140490893537280, 140490901979135,
+STORE, 140490901979136, 140490901991423,
+STORE, 140490901991424, 140490904084479,
+STORE, 140490904084480, 140490904088575,
+STORE, 140490904088576, 140490904092671,
+STORE, 140490904092672, 140490904559615,
+STORE, 140490904559616, 140490906652671,
+STORE, 140490906652672, 140490906656767,
+STORE, 140490906656768, 140490906660863,
+STORE, 140490906660864, 140490906677247,
+STORE, 140490906677248, 140490908770303,
+STORE, 140490908770304, 140490908774399,
+STORE, 140490908774400, 140490908778495,
+STORE, 140490908778496, 140490908794879,
+STORE, 140490908794880, 140490910887935,
+STORE, 140490910887936, 140490910892031,
+STORE, 140490910892032, 140490910896127,
+STORE, 140490910896128, 140490912555007,
+STORE, 140490912555008, 140490914652159,
+STORE, 140490914652160, 140490914668543,
+STORE, 140490914668544, 140490914676735,
+STORE, 140490914676736, 140490914693119,
+STORE, 140490914693120, 140490914791423,
+STORE, 140490914791424, 140490916884479,
+STORE, 140490916884480, 140490916888575,
+STORE, 140490916888576, 140490916892671,
+STORE, 140490916892672, 140490916909055,
+STORE, 140490916909056, 140490916937727,
+STORE, 140490916937728, 140490919030783,
+STORE, 140490919030784, 140490919034879,
+STORE, 140490919034880, 140490919038975,
+STORE, 140490919038976, 140490919190527,
+STORE, 140490919190528, 140490921283583,
+STORE, 140490921283584, 140490921287679,
+STORE, 140490921287680, 140490921291775,
+STORE, 140490921291776, 140490921299967,
+STORE, 140490921299968, 140490921390079,
+STORE, 140490921390080, 140490923483135,
+STORE, 140490923483136, 140490923487231,
+STORE, 140490923487232, 140490923491327,
+STORE, 140490923491328, 140490923757567,
+STORE, 140490923757568, 140490925850623,
+STORE, 140490925850624, 140490925867007,
+STORE, 140490925867008, 140490925871103,
+STORE, 140490925871104, 140490925875199,
+STORE, 140490925875200, 140490925903871,
+STORE, 140490925903872, 140490928001023,
+STORE, 140490928001024, 140490928005119,
+STORE, 140490928005120, 140490928009215,
+STORE, 140490928009216, 140490928152575,
+STORE, 140490930184192, 140490930221055,
+STORE, 140490930221056, 140490930237439,
+STORE, 140490930237440, 140490930241535,
+STORE, 140490930241536, 140490930245631,
+STORE, 140490930245632, 140490930249727,
+STORE, 140490930249728, 140490930253823,
+STORE, 140490930253824, 140490930257919,
+STORE, 140490930257920, 140490930262015,
+STORE, 140724611694592, 140724611829759,
+STORE, 140724612427776, 140724612440063,
+STORE, 140724612440064, 140724612444159,
+STORE, 94103163662336, 94103163772927,
+STORE, 94103165865984, 94103165874175,
+STORE, 94103165874176, 94103165878271,
+STORE, 94103165878272, 94103165886463,
+STORE, 94103182548992, 94103182684159,
+STORE, 140092694708224, 140092696367103,
+STORE, 140092696367104, 140092698464255,
+STORE, 140092698464256, 140092698480639,
+STORE, 140092698480640, 140092698488831,
+STORE, 140092698488832, 140092698505215,
+STORE, 140092698505216, 140092698648575,
+STORE, 140092700708864, 140092700717055,
+STORE, 140092700745728, 140092700749823,
+STORE, 140092700749824, 140092700753919,
+STORE, 140092700753920, 140092700758015,
+STORE, 140736800911360, 140736801046527,
+STORE, 140736802308096, 140736802320383,
+STORE, 140736802320384, 140736802324479,
+STORE, 93948802064384, 93948802174975,
+STORE, 93948804268032, 93948804276223,
+STORE, 93948804276224, 93948804280319,
+STORE, 93948804280320, 93948804288511,
+STORE, 93948806266880, 93948806402047,
+STORE, 140222999113728, 140223000772607,
+STORE, 140223000772608, 140223002869759,
+STORE, 140223002869760, 140223002886143,
+STORE, 140223002886144, 140223002894335,
+STORE, 140223002894336, 140223002910719,
+STORE, 140223002910720, 140223003054079,
+STORE, 140223005114368, 140223005122559,
+STORE, 140223005151232, 140223005155327,
+STORE, 140223005155328, 140223005159423,
+STORE, 140223005159424, 140223005163519,
+STORE, 140720877506560, 140720877641727,
+STORE, 140720878231552, 140720878243839,
+STORE, 140720878243840, 140720878247935,
+STORE, 140737488347136, 140737488351231,
+STORE, 140733232087040, 140737488351231,
+SNULL, 140733232091135, 140737488351231,
+STORE, 140733232087040, 140733232091135,
+STORE, 140733231955968, 140733232091135,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140161681321984, 140161683574783,
+SNULL, 140161681465343, 140161683574783,
+STORE, 140161681321984, 140161681465343,
+STORE, 140161681465344, 140161683574783,
+ERASE, 140161681465344, 140161683574783,
+STORE, 140161683562496, 140161683570687,
+STORE, 140161683570688, 140161683574783,
+STORE, 140733232214016, 140733232218111,
+STORE, 140733232201728, 140733232214015,
+STORE, 140161683533824, 140161683562495,
+STORE, 140161683525632, 140161683533823,
+STORE, 140161678159872, 140161681321983,
+SNULL, 140161678159872, 140161679220735,
+STORE, 140161679220736, 140161681321983,
+STORE, 140161678159872, 140161679220735,
+SNULL, 140161681313791, 140161681321983,
+STORE, 140161679220736, 140161681313791,
+STORE, 140161681313792, 140161681321983,
+ERASE, 140161681313792, 140161681321983,
+STORE, 140161681313792, 140161681321983,
+STORE, 140161674362880, 140161678159871,
+SNULL, 140161674362880, 140161676021759,
+STORE, 140161676021760, 140161678159871,
+STORE, 140161674362880, 140161676021759,
+SNULL, 140161678118911, 140161678159871,
+STORE, 140161676021760, 140161678118911,
+STORE, 140161678118912, 140161678159871,
+SNULL, 140161678118912, 140161678143487,
+STORE, 140161678143488, 140161678159871,
+STORE, 140161678118912, 140161678143487,
+ERASE, 140161678118912, 140161678143487,
+STORE, 140161678118912, 140161678143487,
+ERASE, 140161678143488, 140161678159871,
+STORE, 140161678143488, 140161678159871,
+STORE, 140161683513344, 140161683533823,
+SNULL, 140161678135295, 140161678143487,
+STORE, 140161678118912, 140161678135295,
+STORE, 140161678135296, 140161678143487,
+SNULL, 140161681317887, 140161681321983,
+STORE, 140161681313792, 140161681317887,
+STORE, 140161681317888, 140161681321983,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140161683566591, 140161683570687,
+STORE, 140161683562496, 140161683566591,
+STORE, 140161683566592, 140161683570687,
+ERASE, 140161683533824, 140161683562495,
+STORE, 25477120, 25612287,
+STORE, 25477120, 25759743,
+STORE, 140161681829888, 140161683513343,
+STORE, 25477120, 25915391,
+STORE, 25477120, 26054655,
+SNULL, 25800703, 26054655,
+STORE, 25477120, 25800703,
+STORE, 25800704, 26054655,
+ERASE, 25800704, 26054655,
+STORE, 140737488347136, 140737488351231,
+STORE, 140723218452480, 140737488351231,
+SNULL, 140723218456575, 140737488351231,
+STORE, 140723218452480, 140723218456575,
+STORE, 140723218321408, 140723218456575,
+STORE, 4194304, 26279935,
+STORE, 28372992, 28454911,
+STORE, 28454912, 29806591,
+STORE, 140398872264704, 140398874517503,
+SNULL, 140398872408063, 140398874517503,
+STORE, 140398872264704, 140398872408063,
+STORE, 140398872408064, 140398874517503,
+ERASE, 140398872408064, 140398874517503,
+STORE, 140398874505216, 140398874513407,
+STORE, 140398874513408, 140398874517503,
+STORE, 140723219247104, 140723219251199,
+STORE, 140723219234816, 140723219247103,
+STORE, 140398874476544, 140398874505215,
+STORE, 140398874468352, 140398874476543,
+STORE, 140398868430848, 140398872264703,
+SNULL, 140398868430848, 140398870138879,
+STORE, 140398870138880, 140398872264703,
+STORE, 140398868430848, 140398870138879,
+SNULL, 140398872231935, 140398872264703,
+STORE, 140398870138880, 140398872231935,
+STORE, 140398872231936, 140398872264703,
+ERASE, 140398872231936, 140398872264703,
+STORE, 140398872231936, 140398872264703,
+STORE, 140398866235392, 140398868430847,
+SNULL, 140398866235392, 140398866329599,
+STORE, 140398866329600, 140398868430847,
+STORE, 140398866235392, 140398866329599,
+SNULL, 140398868422655, 140398868430847,
+STORE, 140398866329600, 140398868422655,
+STORE, 140398868422656, 140398868430847,
+ERASE, 140398868422656, 140398868430847,
+STORE, 140398868422656, 140398868430847,
+STORE, 140398863716352, 140398866235391,
+SNULL, 140398863716352, 140398864130047,
+STORE, 140398864130048, 140398866235391,
+STORE, 140398863716352, 140398864130047,
+SNULL, 140398866223103, 140398866235391,
+STORE, 140398864130048, 140398866223103,
+STORE, 140398866223104, 140398866235391,
+ERASE, 140398866223104, 140398866235391,
+STORE, 140398866223104, 140398866235391,
+STORE, 140398861082624, 140398863716351,
+SNULL, 140398861082624, 140398861611007,
+STORE, 140398861611008, 140398863716351,
+STORE, 140398861082624, 140398861611007,
+SNULL, 140398863708159, 140398863716351,
+STORE, 140398861611008, 140398863708159,
+STORE, 140398863708160, 140398863716351,
+ERASE, 140398863708160, 140398863716351,
+STORE, 140398863708160, 140398863716351,
+STORE, 140398858969088, 140398861082623,
+SNULL, 140398858969088, 140398858981375,
+STORE, 140398858981376, 140398861082623,
+STORE, 140398858969088, 140398858981375,
+SNULL, 140398861074431, 140398861082623,
+STORE, 140398858981376, 140398861074431,
+STORE, 140398861074432, 140398861082623,
+ERASE, 140398861074432, 140398861082623,
+STORE, 140398861074432, 140398861082623,
+STORE, 140398856765440, 140398858969087,
+SNULL, 140398856765440, 140398856867839,
+STORE, 140398856867840, 140398858969087,
+STORE, 140398856765440, 140398856867839,
+SNULL, 140398858960895, 140398858969087,
+STORE, 140398856867840, 140398858960895,
+STORE, 140398858960896, 140398858969087,
+ERASE, 140398858960896, 140398858969087,
+STORE, 140398858960896, 140398858969087,
+STORE, 140398874460160, 140398874476543,
+STORE, 140398853603328, 140398856765439,
+SNULL, 140398853603328, 140398854664191,
+STORE, 140398854664192, 140398856765439,
+STORE, 140398853603328, 140398854664191,
+SNULL, 140398856757247, 140398856765439,
+STORE, 140398854664192, 140398856757247,
+STORE, 140398856757248, 140398856765439,
+ERASE, 140398856757248, 140398856765439,
+STORE, 140398856757248, 140398856765439,
+STORE, 140398849806336, 140398853603327,
+SNULL, 140398849806336, 140398851465215,
+STORE, 140398851465216, 140398853603327,
+STORE, 140398849806336, 140398851465215,
+SNULL, 140398853562367, 140398853603327,
+STORE, 140398851465216, 140398853562367,
+STORE, 140398853562368, 140398853603327,
+SNULL, 140398853562368, 140398853586943,
+STORE, 140398853586944, 140398853603327,
+STORE, 140398853562368, 140398853586943,
+ERASE, 140398853562368, 140398853586943,
+STORE, 140398853562368, 140398853586943,
+ERASE, 140398853586944, 140398853603327,
+STORE, 140398853586944, 140398853603327,
+STORE, 140398874447872, 140398874476543,
+SNULL, 140398853578751, 140398853586943,
+STORE, 140398853562368, 140398853578751,
+STORE, 140398853578752, 140398853586943,
+SNULL, 140398856761343, 140398856765439,
+STORE, 140398856757248, 140398856761343,
+STORE, 140398856761344, 140398856765439,
+SNULL, 140398858964991, 140398858969087,
+STORE, 140398858960896, 140398858964991,
+STORE, 140398858964992, 140398858969087,
+SNULL, 140398861078527, 140398861082623,
+STORE, 140398861074432, 140398861078527,
+STORE, 140398861078528, 140398861082623,
+SNULL, 140398863712255, 140398863716351,
+STORE, 140398863708160, 140398863712255,
+STORE, 140398863712256, 140398863716351,
+SNULL, 140398866231295, 140398866235391,
+STORE, 140398866223104, 140398866231295,
+STORE, 140398866231296, 140398866235391,
+SNULL, 140398868426751, 140398868430847,
+STORE, 140398868422656, 140398868426751,
+STORE, 140398868426752, 140398868430847,
+SNULL, 140398872236031, 140398872264703,
+STORE, 140398872231936, 140398872236031,
+STORE, 140398872236032, 140398872264703,
+SNULL, 28405759, 28454911,
+STORE, 28372992, 28405759,
+STORE, 28405760, 28454911,
+SNULL, 140398874509311, 140398874513407,
+STORE, 140398874505216, 140398874509311,
+STORE, 140398874509312, 140398874513407,
+ERASE, 140398874476544, 140398874505215,
+STORE, 43278336, 43413503,
+STORE, 140398872764416, 140398874447871,
+STORE, 140398874501120, 140398874505215,
+STORE, 140398872629248, 140398872764415,
+STORE, 43278336, 43556863,
+STORE, 140398847709184, 140398849806335,
+STORE, 140398874492928, 140398874505215,
+STORE, 140398874484736, 140398874505215,
+STORE, 140398874447872, 140398874484735,
+STORE, 140398872612864, 140398872764415,
+STORE, 43278336, 43692031,
+STORE, 43278336, 43880447,
+STORE, 140398872604672, 140398872764415,
+STORE, 140398872596480, 140398872764415,
+STORE, 43278336, 44044287,
+STORE, 140398872580096, 140398872764415,
+STORE, 140737488347136, 140737488351231,
+STORE, 140734403092480, 140737488351231,
+SNULL, 140734403096575, 140737488351231,
+STORE, 140734403092480, 140734403096575,
+STORE, 140734402961408, 140734403096575,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140240662380544, 140240664633343,
+SNULL, 140240662523903, 140240664633343,
+STORE, 140240662380544, 140240662523903,
+STORE, 140240662523904, 140240664633343,
+ERASE, 140240662523904, 140240664633343,
+STORE, 140240664621056, 140240664629247,
+STORE, 140240664629248, 140240664633343,
+STORE, 140734403145728, 140734403149823,
+STORE, 140734403133440, 140734403145727,
+STORE, 140240664592384, 140240664621055,
+STORE, 140240664584192, 140240664592383,
+STORE, 140240659218432, 140240662380543,
+SNULL, 140240659218432, 140240660279295,
+STORE, 140240660279296, 140240662380543,
+STORE, 140240659218432, 140240660279295,
+SNULL, 140240662372351, 140240662380543,
+STORE, 140240660279296, 140240662372351,
+STORE, 140240662372352, 140240662380543,
+ERASE, 140240662372352, 140240662380543,
+STORE, 140240662372352, 140240662380543,
+STORE, 140240655421440, 140240659218431,
+SNULL, 140240655421440, 140240657080319,
+STORE, 140240657080320, 140240659218431,
+STORE, 140240655421440, 140240657080319,
+SNULL, 140240659177471, 140240659218431,
+STORE, 140240657080320, 140240659177471,
+STORE, 140240659177472, 140240659218431,
+SNULL, 140240659177472, 140240659202047,
+STORE, 140240659202048, 140240659218431,
+STORE, 140240659177472, 140240659202047,
+ERASE, 140240659177472, 140240659202047,
+STORE, 140240659177472, 140240659202047,
+ERASE, 140240659202048, 140240659218431,
+STORE, 140240659202048, 140240659218431,
+STORE, 140240664571904, 140240664592383,
+SNULL, 140240659193855, 140240659202047,
+STORE, 140240659177472, 140240659193855,
+STORE, 140240659193856, 140240659202047,
+SNULL, 140240662376447, 140240662380543,
+STORE, 140240662372352, 140240662376447,
+STORE, 140240662376448, 140240662380543,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140240664625151, 140240664629247,
+STORE, 140240664621056, 140240664625151,
+STORE, 140240664625152, 140240664629247,
+ERASE, 140240664592384, 140240664621055,
+STORE, 30646272, 30781439,
+STORE, 30646272, 30928895,
+STORE, 140240662888448, 140240664571903,
+STORE, 94256659468288, 94256659578879,
+STORE, 94256661671936, 94256661680127,
+STORE, 94256661680128, 94256661684223,
+STORE, 94256661684224, 94256661692415,
+STORE, 94256687980544, 94256688115711,
+STORE, 139801712504832, 139801714163711,
+STORE, 139801714163712, 139801716260863,
+STORE, 139801716260864, 139801716277247,
+STORE, 139801716277248, 139801716285439,
+STORE, 139801716285440, 139801716301823,
+STORE, 139801716301824, 139801716445183,
+STORE, 139801718505472, 139801718513663,
+STORE, 139801718542336, 139801718546431,
+STORE, 139801718546432, 139801718550527,
+STORE, 139801718550528, 139801718554623,
+STORE, 140721575538688, 140721575673855,
+STORE, 140721577013248, 140721577025535,
+STORE, 140721577025536, 140721577029631,
+STORE, 140737488347136, 140737488351231,
+STORE, 140729259393024, 140737488351231,
+SNULL, 140729259397119, 140737488351231,
+STORE, 140729259393024, 140729259397119,
+STORE, 140729259261952, 140729259397119,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 139682376638464, 139682378891263,
+SNULL, 139682376781823, 139682378891263,
+STORE, 139682376638464, 139682376781823,
+STORE, 139682376781824, 139682378891263,
+ERASE, 139682376781824, 139682378891263,
+STORE, 139682378878976, 139682378887167,
+STORE, 139682378887168, 139682378891263,
+STORE, 140729260462080, 140729260466175,
+STORE, 140729260449792, 140729260462079,
+STORE, 139682378850304, 139682378878975,
+STORE, 139682378842112, 139682378850303,
+STORE, 139682373476352, 139682376638463,
+SNULL, 139682373476352, 139682374537215,
+STORE, 139682374537216, 139682376638463,
+STORE, 139682373476352, 139682374537215,
+SNULL, 139682376630271, 139682376638463,
+STORE, 139682374537216, 139682376630271,
+STORE, 139682376630272, 139682376638463,
+ERASE, 139682376630272, 139682376638463,
+STORE, 139682376630272, 139682376638463,
+STORE, 139682369679360, 139682373476351,
+SNULL, 139682369679360, 139682371338239,
+STORE, 139682371338240, 139682373476351,
+STORE, 139682369679360, 139682371338239,
+SNULL, 139682373435391, 139682373476351,
+STORE, 139682371338240, 139682373435391,
+STORE, 139682373435392, 139682373476351,
+SNULL, 139682373435392, 139682373459967,
+STORE, 139682373459968, 139682373476351,
+STORE, 139682373435392, 139682373459967,
+ERASE, 139682373435392, 139682373459967,
+STORE, 139682373435392, 139682373459967,
+ERASE, 139682373459968, 139682373476351,
+STORE, 139682373459968, 139682373476351,
+STORE, 139682378829824, 139682378850303,
+SNULL, 139682373451775, 139682373459967,
+STORE, 139682373435392, 139682373451775,
+STORE, 139682373451776, 139682373459967,
+SNULL, 139682376634367, 139682376638463,
+STORE, 139682376630272, 139682376634367,
+STORE, 139682376634368, 139682376638463,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 139682378883071, 139682378887167,
+STORE, 139682378878976, 139682378883071,
+STORE, 139682378883072, 139682378887167,
+ERASE, 139682378850304, 139682378878975,
+STORE, 10022912, 10158079,
+STORE, 10022912, 10305535,
+STORE, 139682377146368, 139682378829823,
+STORE, 140737488347136, 140737488351231,
+STORE, 140731831926784, 140737488351231,
+SNULL, 140731831930879, 140737488351231,
+STORE, 140731831926784, 140731831930879,
+STORE, 140731831795712, 140731831930879,
+STORE, 94615305261056, 94615307485183,
+SNULL, 94615305371647, 94615307485183,
+STORE, 94615305261056, 94615305371647,
+STORE, 94615305371648, 94615307485183,
+ERASE, 94615305371648, 94615307485183,
+STORE, 94615307464704, 94615307476991,
+STORE, 94615307476992, 94615307485183,
+STORE, 140163912994816, 140163915247615,
+SNULL, 140163913138175, 140163915247615,
+STORE, 140163912994816, 140163913138175,
+STORE, 140163913138176, 140163915247615,
+ERASE, 140163913138176, 140163915247615,
+STORE, 140163915235328, 140163915243519,
+STORE, 140163915243520, 140163915247615,
+STORE, 140731832217600, 140731832221695,
+STORE, 140731832205312, 140731832217599,
+STORE, 140163915206656, 140163915235327,
+STORE, 140163915198464, 140163915206655,
+STORE, 140163909197824, 140163912994815,
+SNULL, 140163909197824, 140163910856703,
+STORE, 140163910856704, 140163912994815,
+STORE, 140163909197824, 140163910856703,
+SNULL, 140163912953855, 140163912994815,
+STORE, 140163910856704, 140163912953855,
+STORE, 140163912953856, 140163912994815,
+SNULL, 140163912953856, 140163912978431,
+STORE, 140163912978432, 140163912994815,
+STORE, 140163912953856, 140163912978431,
+ERASE, 140163912953856, 140163912978431,
+STORE, 140163912953856, 140163912978431,
+ERASE, 140163912978432, 140163912994815,
+STORE, 140163912978432, 140163912994815,
+SNULL, 140163912970239, 140163912978431,
+STORE, 140163912953856, 140163912970239,
+STORE, 140163912970240, 140163912978431,
+SNULL, 94615307472895, 94615307476991,
+STORE, 94615307464704, 94615307472895,
+STORE, 94615307472896, 94615307476991,
+SNULL, 140163915239423, 140163915243519,
+STORE, 140163915235328, 140163915239423,
+STORE, 140163915239424, 140163915243519,
+ERASE, 140163915206656, 140163915235327,
+STORE, 94615330672640, 94615330807807,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725254479872, 140737488351231,
+SNULL, 140725254488063, 140737488351231,
+STORE, 140725254479872, 140725254488063,
+STORE, 140725254348800, 140725254488063,
+STORE, 94572781277184, 94572785741823,
+SNULL, 94572783312895, 94572785741823,
+STORE, 94572781277184, 94572783312895,
+STORE, 94572783312896, 94572785741823,
+ERASE, 94572783312896, 94572785741823,
+STORE, 94572785405952, 94572785455103,
+STORE, 94572785455104, 94572785741823,
+STORE, 139636001341440, 139636003594239,
+SNULL, 139636001484799, 139636003594239,
+STORE, 139636001341440, 139636001484799,
+STORE, 139636001484800, 139636003594239,
+ERASE, 139636001484800, 139636003594239,
+STORE, 139636003581952, 139636003590143,
+STORE, 139636003590144, 139636003594239,
+STORE, 140725255557120, 140725255561215,
+STORE, 140725255544832, 140725255557119,
+STORE, 139636003553280, 139636003581951,
+STORE, 139636003545088, 139636003553279,
+STORE, 139635998773248, 139636001341439,
+SNULL, 139635998773248, 139635999240191,
+STORE, 139635999240192, 139636001341439,
+STORE, 139635998773248, 139635999240191,
+SNULL, 139636001333247, 139636001341439,
+STORE, 139635999240192, 139636001333247,
+STORE, 139636001333248, 139636001341439,
+ERASE, 139636001333248, 139636001341439,
+STORE, 139636001333248, 139636001341439,
+STORE, 139635996569600, 139635998773247,
+SNULL, 139635996569600, 139635996671999,
+STORE, 139635996672000, 139635998773247,
+STORE, 139635996569600, 139635996671999,
+SNULL, 139635998765055, 139635998773247,
+STORE, 139635996672000, 139635998765055,
+STORE, 139635998765056, 139635998773247,
+ERASE, 139635998765056, 139635998773247,
+STORE, 139635998765056, 139635998773247,
+STORE, 139635994353664, 139635996569599,
+SNULL, 139635994353664, 139635994451967,
+STORE, 139635994451968, 139635996569599,
+STORE, 139635994353664, 139635994451967,
+SNULL, 139635996545023, 139635996569599,
+STORE, 139635994451968, 139635996545023,
+STORE, 139635996545024, 139635996569599,
+SNULL, 139635996545024, 139635996553215,
+STORE, 139635996553216, 139635996569599,
+STORE, 139635996545024, 139635996553215,
+ERASE, 139635996545024, 139635996553215,
+STORE, 139635996545024, 139635996553215,
+ERASE, 139635996553216, 139635996569599,
+STORE, 139635996553216, 139635996569599,
+STORE, 139635992223744, 139635994353663,
+SNULL, 139635992223744, 139635992252415,
+STORE, 139635992252416, 139635994353663,
+STORE, 139635992223744, 139635992252415,
+SNULL, 139635994345471, 139635994353663,
+STORE, 139635992252416, 139635994345471,
+STORE, 139635994345472, 139635994353663,
+ERASE, 139635994345472, 139635994353663,
+STORE, 139635994345472, 139635994353663,
+STORE, 139635988426752, 139635992223743,
+SNULL, 139635988426752, 139635990085631,
+STORE, 139635990085632, 139635992223743,
+STORE, 139635988426752, 139635990085631,
+SNULL, 139635992182783, 139635992223743,
+STORE, 139635990085632, 139635992182783,
+STORE, 139635992182784, 139635992223743,
+SNULL, 139635992182784, 139635992207359,
+STORE, 139635992207360, 139635992223743,
+STORE, 139635992182784, 139635992207359,
+ERASE, 139635992182784, 139635992207359,
+STORE, 139635992182784, 139635992207359,
+ERASE, 139635992207360, 139635992223743,
+STORE, 139635992207360, 139635992223743,
+STORE, 139636003536896, 139636003553279,
+SNULL, 139635992199167, 139635992207359,
+STORE, 139635992182784, 139635992199167,
+STORE, 139635992199168, 139635992207359,
+SNULL, 139635996549119, 139635996553215,
+STORE, 139635996545024, 139635996549119,
+STORE, 139635996549120, 139635996553215,
+SNULL, 139635994349567, 139635994353663,
+STORE, 139635994345472, 139635994349567,
+STORE, 139635994349568, 139635994353663,
+SNULL, 139635998769151, 139635998773247,
+STORE, 139635998765056, 139635998769151,
+STORE, 139635998769152, 139635998773247,
+SNULL, 139636001337343, 139636001341439,
+STORE, 139636001333248, 139636001337343,
+STORE, 139636001337344, 139636001341439,
+SNULL, 94572785418239, 94572785455103,
+STORE, 94572785405952, 94572785418239,
+STORE, 94572785418240, 94572785455103,
+SNULL, 139636003586047, 139636003590143,
+STORE, 139636003581952, 139636003586047,
+STORE, 139636003586048, 139636003590143,
+ERASE, 139636003553280, 139636003581951,
+STORE, 94572798435328, 94572798570495,
+STORE, 139636001853440, 139636003536895,
+STORE, 139635981426688, 139635988426751,
+STORE, 139635980615680, 139635981426687,
+STORE, 94572798435328, 94572798705663,
+STORE, 94572798435328, 94572798840831,
+STORE, 94572798435328, 94572798975999,
+STORE, 94572798435328, 94572799111167,
+STORE, 94572798435328, 94572799246335,
+STORE, 94572798435328, 94572799381503,
+STORE, 94572798435328, 94572799516671,
+STORE, 94572798435328, 94572799651839,
+STORE, 94572798435328, 94572799787007,
+STORE, 94572798435328, 94572799922175,
+STORE, 94572798435328, 94572800057343,
+STORE, 94572798435328, 94572800192511,
+STORE, 94572798435328, 94572800327679,
+STORE, 94572798435328, 94572800462847,
+STORE, 94572798435328, 94572800598015,
+STORE, 94572798435328, 94572800733183,
+STORE, 94572798435328, 94572800868351,
+STORE, 94572798435328, 94572801003519,
+STORE, 94572798435328, 94572801138687,
+STORE, 94572798435328, 94572801273855,
+STORE, 94572798435328, 94572801409023,
+STORE, 94572798435328, 94572801544191,
+STORE, 94572798435328, 94572801679359,
+STORE, 94572798435328, 94572801814527,
+STORE, 94572798435328, 94572801949695,
+STORE, 94572798435328, 94572802084863,
+STORE, 94572798435328, 94572802220031,
+STORE, 94572798435328, 94572802355199,
+STORE, 94572798435328, 94572802490367,
+STORE, 94572798435328, 94572802625535,
+STORE, 94572798435328, 94572802760703,
+STORE, 94572798435328, 94572802895871,
+STORE, 94572798435328, 94572803031039,
+STORE, 94572798435328, 94572803166207,
+STORE, 94572798435328, 94572803301375,
+STORE, 94572798435328, 94572803436543,
+STORE, 94572798435328, 94572803571711,
+STORE, 94572798435328, 94572803706879,
+STORE, 94572798435328, 94572803842047,
+STORE, 94572798435328, 94572803977215,
+STORE, 94572798435328, 94572804112383,
+STORE, 94572798435328, 94572804247551,
+STORE, 94572798435328, 94572804382719,
+STORE, 94572798435328, 94572804517887,
+STORE, 94572798435328, 94572804653055,
+STORE, 94572798435328, 94572804788223,
+STORE, 94572798435328, 94572804923391,
+STORE, 94572798435328, 94572805058559,
+STORE, 94572798435328, 94572805193727,
+STORE, 94572798435328, 94572805328895,
+STORE, 94572798435328, 94572805464063,
+STORE, 94572798435328, 94572805599231,
+STORE, 94572798435328, 94572805734399,
+STORE, 94572798435328, 94572805869567,
+STORE, 94572798435328, 94572806004735,
+STORE, 94572798435328, 94572806139903,
+STORE, 94572798435328, 94572806275071,
+STORE, 94572798435328, 94572806410239,
+STORE, 94572798435328, 94572806545407,
+STORE, 94572798435328, 94572806680575,
+STORE, 94572798435328, 94572806815743,
+STORE, 94572798435328, 94572806950911,
+STORE, 94572798435328, 94572807086079,
+STORE, 94572798435328, 94572807221247,
+STORE, 94572798435328, 94572807356415,
+STORE, 94572798435328, 94572807491583,
+STORE, 94572798435328, 94572807626751,
+STORE, 94572798435328, 94572807761919,
+STORE, 94572798435328, 94572807897087,
+STORE, 94572798435328, 94572808032255,
+STORE, 94572798435328, 94572808167423,
+STORE, 94572798435328, 94572808302591,
+STORE, 94572798435328, 94572808437759,
+STORE, 94572798435328, 94572808572927,
+ERASE, 139635981426688, 139635988426751,
+STORE, 139635985088512, 139635988426751,
+STORE, 139635778273280, 139635980615679,
+STORE, 139635567632384, 139635778273279,
+STORE, 94572798435328, 94572808716287,
+STORE, 139635984564224, 139635985088511,
+STORE, 139635559239680, 139635567632383,
+SNULL, 139635559243775, 139635567632383,
+STORE, 139635559239680, 139635559243775,
+STORE, 139635559243776, 139635567632383,
+STORE, 139635550846976, 139635559239679,
+SNULL, 139635550851071, 139635559239679,
+STORE, 139635550846976, 139635550851071,
+STORE, 139635550851072, 139635559239679,
+STORE, 139635542454272, 139635550846975,
+STORE, 139635408236544, 139635542454271,
+SNULL, 139635408236544, 139635426590719,
+STORE, 139635426590720, 139635542454271,
+STORE, 139635408236544, 139635426590719,
+ERASE, 139635408236544, 139635426590719,
+STORE, 139635292372992, 139635542454271,
+SNULL, 139635359481855, 139635542454271,
+STORE, 139635292372992, 139635359481855,
+STORE, 139635359481856, 139635542454271,
+SNULL, 139635359481856, 139635426590719,
+STORE, 139635426590720, 139635542454271,
+STORE, 139635359481856, 139635426590719,
+ERASE, 139635359481856, 139635426590719,
+SNULL, 139635542458367, 139635550846975,
+STORE, 139635542454272, 139635542458367,
+STORE, 139635542458368, 139635550846975,
+STORE, 139635418198016, 139635426590719,
+SNULL, 139635493699583, 139635542454271,
+STORE, 139635426590720, 139635493699583,
+STORE, 139635493699584, 139635542454271,
+ERASE, 139635493699584, 139635542454271,
+SNULL, 139635426725887, 139635493699583,
+STORE, 139635426590720, 139635426725887,
+STORE, 139635426725888, 139635493699583,
+SNULL, 139635292508159, 139635359481855,
+STORE, 139635292372992, 139635292508159,
+STORE, 139635292508160, 139635359481855,
+SNULL, 139635418202111, 139635426590719,
+STORE, 139635418198016, 139635418202111,
+STORE, 139635418202112, 139635426590719,
+STORE, 139635225264128, 139635292372991,
+STORE, 139635534061568, 139635542454271,
+SNULL, 139635534065663, 139635542454271,
+STORE, 139635534061568, 139635534065663,
+STORE, 139635534065664, 139635542454271,
+STORE, 139635525668864, 139635534061567,
+SNULL, 139635525672959, 139635534061567,
+STORE, 139635525668864, 139635525672959,
+STORE, 139635525672960, 139635534061567,
+SNULL, 139635225399295, 139635292372991,
+STORE, 139635225264128, 139635225399295,
+STORE, 139635225399296, 139635292372991,
+STORE, 139635091046400, 139635225264127,
+SNULL, 139635158155263, 139635225264127,
+STORE, 139635091046400, 139635158155263,
+STORE, 139635158155264, 139635225264127,
+ERASE, 139635158155264, 139635225264127,
+STORE, 139634956828672, 139635158155263,
+STORE, 139635517276160, 139635525668863,
+SNULL, 139635517280255, 139635525668863,
+STORE, 139635517276160, 139635517280255,
+STORE, 139635517280256, 139635525668863,
+SNULL, 139634956828672, 139635091046399,
+STORE, 139635091046400, 139635158155263,
+STORE, 139634956828672, 139635091046399,
+SNULL, 139635091181567, 139635158155263,
+STORE, 139635091046400, 139635091181567,
+STORE, 139635091181568, 139635158155263,
+SNULL, 139635023937535, 139635091046399,
+STORE, 139634956828672, 139635023937535,
+STORE, 139635023937536, 139635091046399,
+ERASE, 139635023937536, 139635091046399,
+STORE, 139634956828672, 139635091046399,
+SNULL, 139634956828672, 139635023937535,
+STORE, 139635023937536, 139635091046399,
+STORE, 139634956828672, 139635023937535,
+SNULL, 139635024072703, 139635091046399,
+STORE, 139635023937536, 139635024072703,
+STORE, 139635024072704, 139635091046399,
+STORE, 139635508883456, 139635517276159,
+SNULL, 139635508887551, 139635517276159,
+STORE, 139635508883456, 139635508887551,
+STORE, 139635508887552, 139635517276159,
+STORE, 139634822610944, 139635023937535,
+SNULL, 139634822610944, 139634956828671,
+STORE, 139634956828672, 139635023937535,
+STORE, 139634822610944, 139634956828671,
+SNULL, 139634956963839, 139635023937535,
+STORE, 139634956828672, 139634956963839,
+STORE, 139634956963840, 139635023937535,
+STORE, 139635500490752, 139635508883455,
+SNULL, 139634889719807, 139634956828671,
+STORE, 139634822610944, 139634889719807,
+STORE, 139634889719808, 139634956828671,
+ERASE, 139634889719808, 139634956828671,
+SNULL, 139635500494847, 139635508883455,
+STORE, 139635500490752, 139635500494847,
+STORE, 139635500494848, 139635508883455,
+SNULL, 139634822746111, 139634889719807,
+STORE, 139634822610944, 139634822746111,
+STORE, 139634822746112, 139634889719807,
+STORE, 139635409805312, 139635418198015,
+STORE, 139634822746112, 139634956828671,
+SNULL, 139634822746112, 139634889719807,
+STORE, 139634889719808, 139634956828671,
+STORE, 139634822746112, 139634889719807,
+SNULL, 139634889854975, 139634956828671,
+STORE, 139634889719808, 139634889854975,
+STORE, 139634889854976, 139634956828671,
+SNULL, 139635409809407, 139635418198015,
+STORE, 139635409805312, 139635409809407,
+STORE, 139635409809408, 139635418198015,
+STORE, 139635401412608, 139635409805311,
+STORE, 139634688393216, 139634822610943,
+SNULL, 139634755502079, 139634822610943,
+STORE, 139634688393216, 139634755502079,
+STORE, 139634755502080, 139634822610943,
+ERASE, 139634755502080, 139634822610943,
+SNULL, 139635401416703, 139635409805311,
+STORE, 139635401412608, 139635401416703,
+STORE, 139635401416704, 139635409805311,
+STORE, 139634554175488, 139634755502079,
+SNULL, 139634554175488, 139634688393215,
+STORE, 139634688393216, 139634755502079,
+STORE, 139634554175488, 139634688393215,
+SNULL, 139634688528383, 139634755502079,
+STORE, 139634688393216, 139634688528383,
+STORE, 139634688528384, 139634755502079,
+STORE, 139635393019904, 139635401412607,
+SNULL, 139634621284351, 139634688393215,
+STORE, 139634554175488, 139634621284351,
+STORE, 139634621284352, 139634688393215,
+ERASE, 139634621284352, 139634688393215,
+SNULL, 139634554310655, 139634621284351,
+STORE, 139634554175488, 139634554310655,
+STORE, 139634554310656, 139634621284351,
+STORE, 139634554310656, 139634688393215,
+SNULL, 139635393023999, 139635401412607,
+STORE, 139635393019904, 139635393023999,
+STORE, 139635393024000, 139635401412607,
+SNULL, 139634554310656, 139634621284351,
+STORE, 139634621284352, 139634688393215,
+STORE, 139634554310656, 139634621284351,
+SNULL, 139634621419519, 139634688393215,
+STORE, 139634621284352, 139634621419519,
+STORE, 139634621419520, 139634688393215,
+STORE, 139635384627200, 139635393019903,
+SNULL, 139635384631295, 139635393019903,
+STORE, 139635384627200, 139635384631295,
+STORE, 139635384631296, 139635393019903,
+STORE, 139635376234496, 139635384627199,
+SNULL, 139635376238591, 139635384627199,
+STORE, 139635376234496, 139635376238591,
+STORE, 139635376238592, 139635384627199,
+STORE, 139635367841792, 139635376234495,
+SNULL, 139635367845887, 139635376234495,
+STORE, 139635367841792, 139635367845887,
+STORE, 139635367845888, 139635376234495,
+STORE, 139634419957760, 139634554175487,
+SNULL, 139634487066623, 139634554175487,
+STORE, 139634419957760, 139634487066623,
+STORE, 139634487066624, 139634554175487,
+ERASE, 139634487066624, 139634554175487,
+STORE, 139635216871424, 139635225264127,
+SNULL, 139635216875519, 139635225264127,
+STORE, 139635216871424, 139635216875519,
+STORE, 139635216875520, 139635225264127,
+SNULL, 139634420092927, 139634487066623,
+STORE, 139634419957760, 139634420092927,
+STORE, 139634420092928, 139634487066623,
+STORE, 139635208478720, 139635216871423,
+SNULL, 139635208482815, 139635216871423,
+STORE, 139635208478720, 139635208482815,
+STORE, 139635208482816, 139635216871423,
+STORE, 139635200086016, 139635208478719,
+SNULL, 139635200090111, 139635208478719,
+STORE, 139635200086016, 139635200090111,
+STORE, 139635200090112, 139635208478719,
+STORE, 139635191693312, 139635200086015,
+SNULL, 139635191697407, 139635200086015,
+STORE, 139635191693312, 139635191697407,
+STORE, 139635191697408, 139635200086015,
+STORE, 139635183300608, 139635191693311,
+SNULL, 139635183304703, 139635191693311,
+STORE, 139635183300608, 139635183304703,
+STORE, 139635183304704, 139635191693311,
+STORE, 139634420092928, 139634554175487,
+SNULL, 139634420092928, 139634487066623,
+STORE, 139634487066624, 139634554175487,
+STORE, 139634420092928, 139634487066623,
+SNULL, 139634487201791, 139634554175487,
+STORE, 139634487066624, 139634487201791,
+STORE, 139634487201792, 139634554175487,
+ERASE, 139635559239680, 139635559243775,
+ERASE, 139635559243776, 139635567632383,
+ERASE, 139635550846976, 139635550851071,
+ERASE, 139635550851072, 139635559239679,
+ERASE, 139635542454272, 139635542458367,
+ERASE, 139635542458368, 139635550846975,
+ERASE, 139635418198016, 139635418202111,
+ERASE, 139635418202112, 139635426590719,
+ERASE, 139635534061568, 139635534065663,
+ERASE, 139635534065664, 139635542454271,
+ERASE, 139635525668864, 139635525672959,
+ERASE, 139635525672960, 139635534061567,
+ERASE, 139635517276160, 139635517280255,
+ERASE, 139635517280256, 139635525668863,
+ERASE, 139635508883456, 139635508887551,
+ERASE, 139635508887552, 139635517276159,
+ERASE, 139635500490752, 139635500494847,
+ERASE, 139635500494848, 139635508883455,
+ERASE, 139635409805312, 139635409809407,
+ERASE, 139635409809408, 139635418198015,
+ERASE, 139635401412608, 139635401416703,
+ERASE, 139635401416704, 139635409805311,
+ERASE, 139635393019904, 139635393023999,
+ERASE, 139635393024000, 139635401412607,
+ERASE, 139635384627200, 139635384631295,
+ERASE, 139635384631296, 139635393019903,
+ };
+ static const unsigned long set25[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722547441664, 140737488351231,
+SNULL, 140722547449855, 140737488351231,
+STORE, 140722547441664, 140722547449855,
+STORE, 140722547310592, 140722547449855,
+STORE, 94827521732608, 94827523956735,
+SNULL, 94827521843199, 94827523956735,
+STORE, 94827521732608, 94827521843199,
+STORE, 94827521843200, 94827523956735,
+ERASE, 94827521843200, 94827523956735,
+STORE, 94827523936256, 94827523948543,
+STORE, 94827523948544, 94827523956735,
+STORE, 139816136847360, 139816139100159,
+SNULL, 139816136990719, 139816139100159,
+STORE, 139816136847360, 139816136990719,
+STORE, 139816136990720, 139816139100159,
+ERASE, 139816136990720, 139816139100159,
+STORE, 139816139087872, 139816139096063,
+STORE, 139816139096064, 139816139100159,
+STORE, 140722548142080, 140722548146175,
+STORE, 140722548129792, 140722548142079,
+STORE, 139816139059200, 139816139087871,
+STORE, 139816139051008, 139816139059199,
+STORE, 139816133050368, 139816136847359,
+SNULL, 139816133050368, 139816134709247,
+STORE, 139816134709248, 139816136847359,
+STORE, 139816133050368, 139816134709247,
+SNULL, 139816136806399, 139816136847359,
+STORE, 139816134709248, 139816136806399,
+STORE, 139816136806400, 139816136847359,
+SNULL, 139816136806400, 139816136830975,
+STORE, 139816136830976, 139816136847359,
+STORE, 139816136806400, 139816136830975,
+ERASE, 139816136806400, 139816136830975,
+STORE, 139816136806400, 139816136830975,
+ERASE, 139816136830976, 139816136847359,
+STORE, 139816136830976, 139816136847359,
+SNULL, 139816136822783, 139816136830975,
+STORE, 139816136806400, 139816136822783,
+STORE, 139816136822784, 139816136830975,
+SNULL, 94827523944447, 94827523948543,
+STORE, 94827523936256, 94827523944447,
+STORE, 94827523944448, 94827523948543,
+SNULL, 139816139091967, 139816139096063,
+STORE, 139816139087872, 139816139091967,
+STORE, 139816139091968, 139816139096063,
+ERASE, 139816139059200, 139816139087871,
+STORE, 94827534970880, 94827535106047,
+STORE, 94114394132480, 94114394345471,
+STORE, 94114396442624, 94114396446719,
+STORE, 94114396446720, 94114396454911,
+STORE, 94114396454912, 94114396467199,
+STORE, 94114421575680, 94114427715583,
+STORE, 139934313955328, 139934315614207,
+STORE, 139934315614208, 139934317711359,
+STORE, 139934317711360, 139934317727743,
+STORE, 139934317727744, 139934317735935,
+STORE, 139934317735936, 139934317752319,
+STORE, 139934317752320, 139934317764607,
+STORE, 139934317764608, 139934319857663,
+STORE, 139934319857664, 139934319861759,
+STORE, 139934319861760, 139934319865855,
+STORE, 139934319865856, 139934320009215,
+STORE, 139934320377856, 139934322061311,
+STORE, 139934322061312, 139934322077695,
+STORE, 139934322106368, 139934322110463,
+STORE, 139934322110464, 139934322114559,
+STORE, 139934322114560, 139934322118655,
+STORE, 140731200376832, 140731200516095,
+STORE, 140731200929792, 140731200942079,
+STORE, 140731200942080, 140731200946175,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140734133174272, 140737488351231,
+SNULL, 140734133182463, 140737488351231,
+STORE, 140734133174272, 140734133182463,
+STORE, 140734133043200, 140734133182463,
+STORE, 94412675600384, 94412677824511,
+SNULL, 94412675710975, 94412677824511,
+STORE, 94412675600384, 94412675710975,
+STORE, 94412675710976, 94412677824511,
+ERASE, 94412675710976, 94412677824511,
+STORE, 94412677804032, 94412677816319,
+STORE, 94412677816320, 94412677824511,
+STORE, 140320087945216, 140320090198015,
+SNULL, 140320088088575, 140320090198015,
+STORE, 140320087945216, 140320088088575,
+STORE, 140320088088576, 140320090198015,
+ERASE, 140320088088576, 140320090198015,
+STORE, 140320090185728, 140320090193919,
+STORE, 140320090193920, 140320090198015,
+STORE, 140734134591488, 140734134595583,
+STORE, 140734134579200, 140734134591487,
+STORE, 140320090157056, 140320090185727,
+STORE, 140320090148864, 140320090157055,
+STORE, 140320084148224, 140320087945215,
+SNULL, 140320084148224, 140320085807103,
+STORE, 140320085807104, 140320087945215,
+STORE, 140320084148224, 140320085807103,
+SNULL, 140320087904255, 140320087945215,
+STORE, 140320085807104, 140320087904255,
+STORE, 140320087904256, 140320087945215,
+SNULL, 140320087904256, 140320087928831,
+STORE, 140320087928832, 140320087945215,
+STORE, 140320087904256, 140320087928831,
+ERASE, 140320087904256, 140320087928831,
+STORE, 140320087904256, 140320087928831,
+ERASE, 140320087928832, 140320087945215,
+STORE, 140320087928832, 140320087945215,
+SNULL, 140320087920639, 140320087928831,
+STORE, 140320087904256, 140320087920639,
+STORE, 140320087920640, 140320087928831,
+SNULL, 94412677812223, 94412677816319,
+STORE, 94412677804032, 94412677812223,
+STORE, 94412677812224, 94412677816319,
+SNULL, 140320090189823, 140320090193919,
+STORE, 140320090185728, 140320090189823,
+STORE, 140320090189824, 140320090193919,
+ERASE, 140320090157056, 140320090185727,
+STORE, 94412684546048, 94412684681215,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723005485056, 140737488351231,
+SNULL, 140723005493247, 140737488351231,
+STORE, 140723005485056, 140723005493247,
+STORE, 140723005353984, 140723005493247,
+STORE, 94387431936000, 94387434160127,
+SNULL, 94387432046591, 94387434160127,
+STORE, 94387431936000, 94387432046591,
+STORE, 94387432046592, 94387434160127,
+ERASE, 94387432046592, 94387434160127,
+STORE, 94387434139648, 94387434151935,
+STORE, 94387434151936, 94387434160127,
+STORE, 140151675392000, 140151677644799,
+SNULL, 140151675535359, 140151677644799,
+STORE, 140151675392000, 140151675535359,
+STORE, 140151675535360, 140151677644799,
+ERASE, 140151675535360, 140151677644799,
+STORE, 140151677632512, 140151677640703,
+STORE, 140151677640704, 140151677644799,
+STORE, 140723005784064, 140723005788159,
+STORE, 140723005771776, 140723005784063,
+STORE, 140151677603840, 140151677632511,
+STORE, 140151677595648, 140151677603839,
+STORE, 140151671595008, 140151675391999,
+SNULL, 140151671595008, 140151673253887,
+STORE, 140151673253888, 140151675391999,
+STORE, 140151671595008, 140151673253887,
+SNULL, 140151675351039, 140151675391999,
+STORE, 140151673253888, 140151675351039,
+STORE, 140151675351040, 140151675391999,
+SNULL, 140151675351040, 140151675375615,
+STORE, 140151675375616, 140151675391999,
+STORE, 140151675351040, 140151675375615,
+ERASE, 140151675351040, 140151675375615,
+STORE, 140151675351040, 140151675375615,
+ERASE, 140151675375616, 140151675391999,
+STORE, 140151675375616, 140151675391999,
+SNULL, 140151675367423, 140151675375615,
+STORE, 140151675351040, 140151675367423,
+STORE, 140151675367424, 140151675375615,
+SNULL, 94387434147839, 94387434151935,
+STORE, 94387434139648, 94387434147839,
+STORE, 94387434147840, 94387434151935,
+SNULL, 140151677636607, 140151677640703,
+STORE, 140151677632512, 140151677636607,
+STORE, 140151677636608, 140151677640703,
+ERASE, 140151677603840, 140151677632511,
+STORE, 94387458818048, 94387458953215,
+STORE, 94909010997248, 94909011210239,
+STORE, 94909013307392, 94909013311487,
+STORE, 94909013311488, 94909013319679,
+STORE, 94909013319680, 94909013331967,
+STORE, 94909014827008, 94909023371263,
+STORE, 140712411975680, 140712413634559,
+STORE, 140712413634560, 140712415731711,
+STORE, 140712415731712, 140712415748095,
+STORE, 140712415748096, 140712415756287,
+STORE, 140712415756288, 140712415772671,
+STORE, 140712415772672, 140712415784959,
+STORE, 140712415784960, 140712417878015,
+STORE, 140712417878016, 140712417882111,
+STORE, 140712417882112, 140712417886207,
+STORE, 140712417886208, 140712418029567,
+STORE, 140712418398208, 140712420081663,
+STORE, 140712420081664, 140712420098047,
+STORE, 140712420126720, 140712420130815,
+STORE, 140712420130816, 140712420134911,
+STORE, 140712420134912, 140712420139007,
+STORE, 140729293111296, 140729293250559,
+STORE, 140729293307904, 140729293320191,
+STORE, 140729293320192, 140729293324287,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140720541691904, 140737488351231,
+SNULL, 140720541700095, 140737488351231,
+STORE, 140720541691904, 140720541700095,
+STORE, 140720541560832, 140720541700095,
+STORE, 94203603419136, 94203605643263,
+SNULL, 94203603529727, 94203605643263,
+STORE, 94203603419136, 94203603529727,
+STORE, 94203603529728, 94203605643263,
+ERASE, 94203603529728, 94203605643263,
+STORE, 94203605622784, 94203605635071,
+STORE, 94203605635072, 94203605643263,
+STORE, 139847623081984, 139847625334783,
+SNULL, 139847623225343, 139847625334783,
+STORE, 139847623081984, 139847623225343,
+STORE, 139847623225344, 139847625334783,
+ERASE, 139847623225344, 139847625334783,
+STORE, 139847625322496, 139847625330687,
+STORE, 139847625330688, 139847625334783,
+STORE, 140720542547968, 140720542552063,
+STORE, 140720542535680, 140720542547967,
+STORE, 139847625293824, 139847625322495,
+STORE, 139847625285632, 139847625293823,
+STORE, 139847619284992, 139847623081983,
+SNULL, 139847619284992, 139847620943871,
+STORE, 139847620943872, 139847623081983,
+STORE, 139847619284992, 139847620943871,
+SNULL, 139847623041023, 139847623081983,
+STORE, 139847620943872, 139847623041023,
+STORE, 139847623041024, 139847623081983,
+SNULL, 139847623041024, 139847623065599,
+STORE, 139847623065600, 139847623081983,
+STORE, 139847623041024, 139847623065599,
+ERASE, 139847623041024, 139847623065599,
+STORE, 139847623041024, 139847623065599,
+ERASE, 139847623065600, 139847623081983,
+STORE, 139847623065600, 139847623081983,
+SNULL, 139847623057407, 139847623065599,
+STORE, 139847623041024, 139847623057407,
+STORE, 139847623057408, 139847623065599,
+SNULL, 94203605630975, 94203605635071,
+STORE, 94203605622784, 94203605630975,
+STORE, 94203605630976, 94203605635071,
+SNULL, 139847625326591, 139847625330687,
+STORE, 139847625322496, 139847625326591,
+STORE, 139847625326592, 139847625330687,
+ERASE, 139847625293824, 139847625322495,
+STORE, 94203634880512, 94203635015679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721428738048, 140737488351231,
+SNULL, 140721428746239, 140737488351231,
+STORE, 140721428738048, 140721428746239,
+STORE, 140721428606976, 140721428746239,
+STORE, 93968808378368, 93968810602495,
+SNULL, 93968808488959, 93968810602495,
+STORE, 93968808378368, 93968808488959,
+STORE, 93968808488960, 93968810602495,
+ERASE, 93968808488960, 93968810602495,
+STORE, 93968810582016, 93968810594303,
+STORE, 93968810594304, 93968810602495,
+STORE, 140397757026304, 140397759279103,
+SNULL, 140397757169663, 140397759279103,
+STORE, 140397757026304, 140397757169663,
+STORE, 140397757169664, 140397759279103,
+ERASE, 140397757169664, 140397759279103,
+STORE, 140397759266816, 140397759275007,
+STORE, 140397759275008, 140397759279103,
+STORE, 140721430368256, 140721430372351,
+STORE, 140721430355968, 140721430368255,
+STORE, 140397759238144, 140397759266815,
+STORE, 140397759229952, 140397759238143,
+STORE, 140397753229312, 140397757026303,
+SNULL, 140397753229312, 140397754888191,
+STORE, 140397754888192, 140397757026303,
+STORE, 140397753229312, 140397754888191,
+SNULL, 140397756985343, 140397757026303,
+STORE, 140397754888192, 140397756985343,
+STORE, 140397756985344, 140397757026303,
+SNULL, 140397756985344, 140397757009919,
+STORE, 140397757009920, 140397757026303,
+STORE, 140397756985344, 140397757009919,
+ERASE, 140397756985344, 140397757009919,
+STORE, 140397756985344, 140397757009919,
+ERASE, 140397757009920, 140397757026303,
+STORE, 140397757009920, 140397757026303,
+SNULL, 140397757001727, 140397757009919,
+STORE, 140397756985344, 140397757001727,
+STORE, 140397757001728, 140397757009919,
+SNULL, 93968810590207, 93968810594303,
+STORE, 93968810582016, 93968810590207,
+STORE, 93968810590208, 93968810594303,
+SNULL, 140397759270911, 140397759275007,
+STORE, 140397759266816, 140397759270911,
+STORE, 140397759270912, 140397759275007,
+ERASE, 140397759238144, 140397759266815,
+STORE, 93968837025792, 93968837160959,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721751044096, 140737488351231,
+SNULL, 140721751052287, 140737488351231,
+STORE, 140721751044096, 140721751052287,
+STORE, 140721750913024, 140721751052287,
+STORE, 94426051657728, 94426053881855,
+SNULL, 94426051768319, 94426053881855,
+STORE, 94426051657728, 94426051768319,
+STORE, 94426051768320, 94426053881855,
+ERASE, 94426051768320, 94426053881855,
+STORE, 94426053861376, 94426053873663,
+STORE, 94426053873664, 94426053881855,
+STORE, 140228456181760, 140228458434559,
+SNULL, 140228456325119, 140228458434559,
+STORE, 140228456181760, 140228456325119,
+STORE, 140228456325120, 140228458434559,
+ERASE, 140228456325120, 140228458434559,
+STORE, 140228458422272, 140228458430463,
+STORE, 140228458430464, 140228458434559,
+STORE, 140721751117824, 140721751121919,
+STORE, 140721751105536, 140721751117823,
+STORE, 140228458393600, 140228458422271,
+STORE, 140228458385408, 140228458393599,
+STORE, 140228452384768, 140228456181759,
+SNULL, 140228452384768, 140228454043647,
+STORE, 140228454043648, 140228456181759,
+STORE, 140228452384768, 140228454043647,
+SNULL, 140228456140799, 140228456181759,
+STORE, 140228454043648, 140228456140799,
+STORE, 140228456140800, 140228456181759,
+SNULL, 140228456140800, 140228456165375,
+STORE, 140228456165376, 140228456181759,
+STORE, 140228456140800, 140228456165375,
+ERASE, 140228456140800, 140228456165375,
+STORE, 140228456140800, 140228456165375,
+ERASE, 140228456165376, 140228456181759,
+STORE, 140228456165376, 140228456181759,
+SNULL, 140228456157183, 140228456165375,
+STORE, 140228456140800, 140228456157183,
+STORE, 140228456157184, 140228456165375,
+SNULL, 94426053869567, 94426053873663,
+STORE, 94426053861376, 94426053869567,
+STORE, 94426053869568, 94426053873663,
+SNULL, 140228458426367, 140228458430463,
+STORE, 140228458422272, 140228458426367,
+STORE, 140228458426368, 140228458430463,
+ERASE, 140228458393600, 140228458422271,
+STORE, 94426073681920, 94426073817087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732727623680, 140737488351231,
+SNULL, 140732727631871, 140737488351231,
+STORE, 140732727623680, 140732727631871,
+STORE, 140732727492608, 140732727631871,
+STORE, 94537485996032, 94537488220159,
+SNULL, 94537486106623, 94537488220159,
+STORE, 94537485996032, 94537486106623,
+STORE, 94537486106624, 94537488220159,
+ERASE, 94537486106624, 94537488220159,
+STORE, 94537488199680, 94537488211967,
+STORE, 94537488211968, 94537488220159,
+STORE, 140446578036736, 140446580289535,
+SNULL, 140446578180095, 140446580289535,
+STORE, 140446578036736, 140446578180095,
+STORE, 140446578180096, 140446580289535,
+ERASE, 140446578180096, 140446580289535,
+STORE, 140446580277248, 140446580285439,
+STORE, 140446580285440, 140446580289535,
+STORE, 140732727758848, 140732727762943,
+STORE, 140732727746560, 140732727758847,
+STORE, 140446580248576, 140446580277247,
+STORE, 140446580240384, 140446580248575,
+STORE, 140446574239744, 140446578036735,
+SNULL, 140446574239744, 140446575898623,
+STORE, 140446575898624, 140446578036735,
+STORE, 140446574239744, 140446575898623,
+SNULL, 140446577995775, 140446578036735,
+STORE, 140446575898624, 140446577995775,
+STORE, 140446577995776, 140446578036735,
+SNULL, 140446577995776, 140446578020351,
+STORE, 140446578020352, 140446578036735,
+STORE, 140446577995776, 140446578020351,
+ERASE, 140446577995776, 140446578020351,
+STORE, 140446577995776, 140446578020351,
+ERASE, 140446578020352, 140446578036735,
+STORE, 140446578020352, 140446578036735,
+SNULL, 140446578012159, 140446578020351,
+STORE, 140446577995776, 140446578012159,
+STORE, 140446578012160, 140446578020351,
+SNULL, 94537488207871, 94537488211967,
+STORE, 94537488199680, 94537488207871,
+STORE, 94537488207872, 94537488211967,
+SNULL, 140446580281343, 140446580285439,
+STORE, 140446580277248, 140446580281343,
+STORE, 140446580281344, 140446580285439,
+ERASE, 140446580248576, 140446580277247,
+STORE, 94537489014784, 94537489149951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728766808064, 140737488351231,
+SNULL, 140728766816255, 140737488351231,
+STORE, 140728766808064, 140728766816255,
+STORE, 140728766676992, 140728766816255,
+STORE, 94418513866752, 94418516090879,
+SNULL, 94418513977343, 94418516090879,
+STORE, 94418513866752, 94418513977343,
+STORE, 94418513977344, 94418516090879,
+ERASE, 94418513977344, 94418516090879,
+STORE, 94418516070400, 94418516082687,
+STORE, 94418516082688, 94418516090879,
+STORE, 140556479520768, 140556481773567,
+SNULL, 140556479664127, 140556481773567,
+STORE, 140556479520768, 140556479664127,
+STORE, 140556479664128, 140556481773567,
+ERASE, 140556479664128, 140556481773567,
+STORE, 140556481761280, 140556481769471,
+STORE, 140556481769472, 140556481773567,
+STORE, 140728767148032, 140728767152127,
+STORE, 140728767135744, 140728767148031,
+STORE, 140556481732608, 140556481761279,
+STORE, 140556481724416, 140556481732607,
+STORE, 140556475723776, 140556479520767,
+SNULL, 140556475723776, 140556477382655,
+STORE, 140556477382656, 140556479520767,
+STORE, 140556475723776, 140556477382655,
+SNULL, 140556479479807, 140556479520767,
+STORE, 140556477382656, 140556479479807,
+STORE, 140556479479808, 140556479520767,
+SNULL, 140556479479808, 140556479504383,
+STORE, 140556479504384, 140556479520767,
+STORE, 140556479479808, 140556479504383,
+ERASE, 140556479479808, 140556479504383,
+STORE, 140556479479808, 140556479504383,
+ERASE, 140556479504384, 140556479520767,
+STORE, 140556479504384, 140556479520767,
+SNULL, 140556479496191, 140556479504383,
+STORE, 140556479479808, 140556479496191,
+STORE, 140556479496192, 140556479504383,
+SNULL, 94418516078591, 94418516082687,
+STORE, 94418516070400, 94418516078591,
+STORE, 94418516078592, 94418516082687,
+SNULL, 140556481765375, 140556481769471,
+STORE, 140556481761280, 140556481765375,
+STORE, 140556481765376, 140556481769471,
+ERASE, 140556481732608, 140556481761279,
+STORE, 94418541113344, 94418541248511,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723945873408, 140737488351231,
+SNULL, 140723945881599, 140737488351231,
+STORE, 140723945873408, 140723945881599,
+STORE, 140723945742336, 140723945881599,
+STORE, 94543169773568, 94543171997695,
+SNULL, 94543169884159, 94543171997695,
+STORE, 94543169773568, 94543169884159,
+STORE, 94543169884160, 94543171997695,
+ERASE, 94543169884160, 94543171997695,
+STORE, 94543171977216, 94543171989503,
+STORE, 94543171989504, 94543171997695,
+STORE, 139890420883456, 139890423136255,
+SNULL, 139890421026815, 139890423136255,
+STORE, 139890420883456, 139890421026815,
+STORE, 139890421026816, 139890423136255,
+ERASE, 139890421026816, 139890423136255,
+STORE, 139890423123968, 139890423132159,
+STORE, 139890423132160, 139890423136255,
+STORE, 140723946102784, 140723946106879,
+STORE, 140723946090496, 140723946102783,
+STORE, 139890423095296, 139890423123967,
+STORE, 139890423087104, 139890423095295,
+STORE, 139890417086464, 139890420883455,
+SNULL, 139890417086464, 139890418745343,
+STORE, 139890418745344, 139890420883455,
+STORE, 139890417086464, 139890418745343,
+SNULL, 139890420842495, 139890420883455,
+STORE, 139890418745344, 139890420842495,
+STORE, 139890420842496, 139890420883455,
+SNULL, 139890420842496, 139890420867071,
+STORE, 139890420867072, 139890420883455,
+STORE, 139890420842496, 139890420867071,
+ERASE, 139890420842496, 139890420867071,
+STORE, 139890420842496, 139890420867071,
+ERASE, 139890420867072, 139890420883455,
+STORE, 139890420867072, 139890420883455,
+SNULL, 139890420858879, 139890420867071,
+STORE, 139890420842496, 139890420858879,
+STORE, 139890420858880, 139890420867071,
+SNULL, 94543171985407, 94543171989503,
+STORE, 94543171977216, 94543171985407,
+STORE, 94543171985408, 94543171989503,
+SNULL, 139890423128063, 139890423132159,
+STORE, 139890423123968, 139890423128063,
+STORE, 139890423128064, 139890423132159,
+ERASE, 139890423095296, 139890423123967,
+STORE, 94543197097984, 94543197233151,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140736205979648, 140737488351231,
+SNULL, 140736205987839, 140737488351231,
+STORE, 140736205979648, 140736205987839,
+STORE, 140736205848576, 140736205987839,
+STORE, 94913209913344, 94913212137471,
+SNULL, 94913210023935, 94913212137471,
+STORE, 94913209913344, 94913210023935,
+STORE, 94913210023936, 94913212137471,
+ERASE, 94913210023936, 94913212137471,
+STORE, 94913212116992, 94913212129279,
+STORE, 94913212129280, 94913212137471,
+STORE, 140006323052544, 140006325305343,
+SNULL, 140006323195903, 140006325305343,
+STORE, 140006323052544, 140006323195903,
+STORE, 140006323195904, 140006325305343,
+ERASE, 140006323195904, 140006325305343,
+STORE, 140006325293056, 140006325301247,
+STORE, 140006325301248, 140006325305343,
+STORE, 140736206716928, 140736206721023,
+STORE, 140736206704640, 140736206716927,
+STORE, 140006325264384, 140006325293055,
+STORE, 140006325256192, 140006325264383,
+STORE, 140006319255552, 140006323052543,
+SNULL, 140006319255552, 140006320914431,
+STORE, 140006320914432, 140006323052543,
+STORE, 140006319255552, 140006320914431,
+SNULL, 140006323011583, 140006323052543,
+STORE, 140006320914432, 140006323011583,
+STORE, 140006323011584, 140006323052543,
+SNULL, 140006323011584, 140006323036159,
+STORE, 140006323036160, 140006323052543,
+STORE, 140006323011584, 140006323036159,
+ERASE, 140006323011584, 140006323036159,
+STORE, 140006323011584, 140006323036159,
+ERASE, 140006323036160, 140006323052543,
+STORE, 140006323036160, 140006323052543,
+SNULL, 140006323027967, 140006323036159,
+STORE, 140006323011584, 140006323027967,
+STORE, 140006323027968, 140006323036159,
+SNULL, 94913212125183, 94913212129279,
+STORE, 94913212116992, 94913212125183,
+STORE, 94913212125184, 94913212129279,
+SNULL, 140006325297151, 140006325301247,
+STORE, 140006325293056, 140006325297151,
+STORE, 140006325297152, 140006325301247,
+ERASE, 140006325264384, 140006325293055,
+STORE, 94913239932928, 94913240068095,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140726926897152, 140737488351231,
+SNULL, 140726926905343, 140737488351231,
+STORE, 140726926897152, 140726926905343,
+STORE, 140726926766080, 140726926905343,
+STORE, 94213246820352, 94213249044479,
+SNULL, 94213246930943, 94213249044479,
+STORE, 94213246820352, 94213246930943,
+STORE, 94213246930944, 94213249044479,
+ERASE, 94213246930944, 94213249044479,
+STORE, 94213249024000, 94213249036287,
+STORE, 94213249036288, 94213249044479,
+STORE, 140368830242816, 140368832495615,
+SNULL, 140368830386175, 140368832495615,
+STORE, 140368830242816, 140368830386175,
+STORE, 140368830386176, 140368832495615,
+ERASE, 140368830386176, 140368832495615,
+STORE, 140368832483328, 140368832491519,
+STORE, 140368832491520, 140368832495615,
+STORE, 140726926999552, 140726927003647,
+STORE, 140726926987264, 140726926999551,
+STORE, 140368832454656, 140368832483327,
+STORE, 140368832446464, 140368832454655,
+STORE, 140368826445824, 140368830242815,
+SNULL, 140368826445824, 140368828104703,
+STORE, 140368828104704, 140368830242815,
+STORE, 140368826445824, 140368828104703,
+SNULL, 140368830201855, 140368830242815,
+STORE, 140368828104704, 140368830201855,
+STORE, 140368830201856, 140368830242815,
+SNULL, 140368830201856, 140368830226431,
+STORE, 140368830226432, 140368830242815,
+STORE, 140368830201856, 140368830226431,
+ERASE, 140368830201856, 140368830226431,
+STORE, 140368830201856, 140368830226431,
+ERASE, 140368830226432, 140368830242815,
+STORE, 140368830226432, 140368830242815,
+SNULL, 140368830218239, 140368830226431,
+STORE, 140368830201856, 140368830218239,
+STORE, 140368830218240, 140368830226431,
+SNULL, 94213249032191, 94213249036287,
+STORE, 94213249024000, 94213249032191,
+STORE, 94213249032192, 94213249036287,
+SNULL, 140368832487423, 140368832491519,
+STORE, 140368832483328, 140368832487423,
+STORE, 140368832487424, 140368832491519,
+ERASE, 140368832454656, 140368832483327,
+STORE, 94213267435520, 94213267570687,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728954130432, 140737488351231,
+SNULL, 140728954138623, 140737488351231,
+STORE, 140728954130432, 140728954138623,
+STORE, 140728953999360, 140728954138623,
+STORE, 94672570966016, 94672573190143,
+SNULL, 94672571076607, 94672573190143,
+STORE, 94672570966016, 94672571076607,
+STORE, 94672571076608, 94672573190143,
+ERASE, 94672571076608, 94672573190143,
+STORE, 94672573169664, 94672573181951,
+STORE, 94672573181952, 94672573190143,
+STORE, 140201696735232, 140201698988031,
+SNULL, 140201696878591, 140201698988031,
+STORE, 140201696735232, 140201696878591,
+STORE, 140201696878592, 140201698988031,
+ERASE, 140201696878592, 140201698988031,
+STORE, 140201698975744, 140201698983935,
+STORE, 140201698983936, 140201698988031,
+STORE, 140728954163200, 140728954167295,
+STORE, 140728954150912, 140728954163199,
+STORE, 140201698947072, 140201698975743,
+STORE, 140201698938880, 140201698947071,
+STORE, 140201692938240, 140201696735231,
+SNULL, 140201692938240, 140201694597119,
+STORE, 140201694597120, 140201696735231,
+STORE, 140201692938240, 140201694597119,
+SNULL, 140201696694271, 140201696735231,
+STORE, 140201694597120, 140201696694271,
+STORE, 140201696694272, 140201696735231,
+SNULL, 140201696694272, 140201696718847,
+STORE, 140201696718848, 140201696735231,
+STORE, 140201696694272, 140201696718847,
+ERASE, 140201696694272, 140201696718847,
+STORE, 140201696694272, 140201696718847,
+ERASE, 140201696718848, 140201696735231,
+STORE, 140201696718848, 140201696735231,
+SNULL, 140201696710655, 140201696718847,
+STORE, 140201696694272, 140201696710655,
+STORE, 140201696710656, 140201696718847,
+SNULL, 94672573177855, 94672573181951,
+STORE, 94672573169664, 94672573177855,
+STORE, 94672573177856, 94672573181951,
+SNULL, 140201698979839, 140201698983935,
+STORE, 140201698975744, 140201698979839,
+STORE, 140201698979840, 140201698983935,
+ERASE, 140201698947072, 140201698975743,
+STORE, 94672595689472, 94672595824639,
+STORE, 94114394132480, 94114394345471,
+STORE, 94114396442624, 94114396446719,
+STORE, 94114396446720, 94114396454911,
+STORE, 94114396454912, 94114396467199,
+STORE, 94114421575680, 94114428256255,
+STORE, 139934313955328, 139934315614207,
+STORE, 139934315614208, 139934317711359,
+STORE, 139934317711360, 139934317727743,
+STORE, 139934317727744, 139934317735935,
+STORE, 139934317735936, 139934317752319,
+STORE, 139934317752320, 139934317764607,
+STORE, 139934317764608, 139934319857663,
+STORE, 139934319857664, 139934319861759,
+STORE, 139934319861760, 139934319865855,
+STORE, 139934319865856, 139934320009215,
+STORE, 139934320377856, 139934322061311,
+STORE, 139934322061312, 139934322077695,
+STORE, 139934322106368, 139934322110463,
+STORE, 139934322110464, 139934322114559,
+STORE, 139934322114560, 139934322118655,
+STORE, 140731200376832, 140731200516095,
+STORE, 140731200929792, 140731200942079,
+STORE, 140731200942080, 140731200946175,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721532362752, 140737488351231,
+SNULL, 140721532370943, 140737488351231,
+STORE, 140721532362752, 140721532370943,
+STORE, 140721532231680, 140721532370943,
+STORE, 94467222597632, 94467224821759,
+SNULL, 94467222708223, 94467224821759,
+STORE, 94467222597632, 94467222708223,
+STORE, 94467222708224, 94467224821759,
+ERASE, 94467222708224, 94467224821759,
+STORE, 94467224801280, 94467224813567,
+STORE, 94467224813568, 94467224821759,
+STORE, 140191433543680, 140191435796479,
+SNULL, 140191433687039, 140191435796479,
+STORE, 140191433543680, 140191433687039,
+STORE, 140191433687040, 140191435796479,
+ERASE, 140191433687040, 140191435796479,
+STORE, 140191435784192, 140191435792383,
+STORE, 140191435792384, 140191435796479,
+STORE, 140721533034496, 140721533038591,
+STORE, 140721533022208, 140721533034495,
+STORE, 140191435755520, 140191435784191,
+STORE, 140191435747328, 140191435755519,
+STORE, 140191429746688, 140191433543679,
+SNULL, 140191429746688, 140191431405567,
+STORE, 140191431405568, 140191433543679,
+STORE, 140191429746688, 140191431405567,
+SNULL, 140191433502719, 140191433543679,
+STORE, 140191431405568, 140191433502719,
+STORE, 140191433502720, 140191433543679,
+SNULL, 140191433502720, 140191433527295,
+STORE, 140191433527296, 140191433543679,
+STORE, 140191433502720, 140191433527295,
+ERASE, 140191433502720, 140191433527295,
+STORE, 140191433502720, 140191433527295,
+ERASE, 140191433527296, 140191433543679,
+STORE, 140191433527296, 140191433543679,
+SNULL, 140191433519103, 140191433527295,
+STORE, 140191433502720, 140191433519103,
+STORE, 140191433519104, 140191433527295,
+SNULL, 94467224809471, 94467224813567,
+STORE, 94467224801280, 94467224809471,
+STORE, 94467224809472, 94467224813567,
+SNULL, 140191435788287, 140191435792383,
+STORE, 140191435784192, 140191435788287,
+STORE, 140191435788288, 140191435792383,
+ERASE, 140191435755520, 140191435784191,
+STORE, 94467251847168, 94467251982335,
+STORE, 94367895400448, 94367895613439,
+STORE, 94367897710592, 94367897714687,
+STORE, 94367897714688, 94367897722879,
+STORE, 94367897722880, 94367897735167,
+STORE, 94367925264384, 94367926861823,
+STORE, 139801317548032, 139801319206911,
+STORE, 139801319206912, 139801321304063,
+STORE, 139801321304064, 139801321320447,
+STORE, 139801321320448, 139801321328639,
+STORE, 139801321328640, 139801321345023,
+STORE, 139801321345024, 139801321357311,
+STORE, 139801321357312, 139801323450367,
+STORE, 139801323450368, 139801323454463,
+STORE, 139801323454464, 139801323458559,
+STORE, 139801323458560, 139801323601919,
+STORE, 139801323970560, 139801325654015,
+STORE, 139801325654016, 139801325670399,
+STORE, 139801325699072, 139801325703167,
+STORE, 139801325703168, 139801325707263,
+STORE, 139801325707264, 139801325711359,
+STORE, 140724442861568, 140724443000831,
+STORE, 140724443611136, 140724443623423,
+STORE, 140724443623424, 140724443627519,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731353149440, 140737488351231,
+SNULL, 140731353157631, 140737488351231,
+STORE, 140731353149440, 140731353157631,
+STORE, 140731353018368, 140731353157631,
+STORE, 94310379503616, 94310381838335,
+SNULL, 94310379716607, 94310381838335,
+STORE, 94310379503616, 94310379716607,
+STORE, 94310379716608, 94310381838335,
+ERASE, 94310379716608, 94310381838335,
+STORE, 94310381813760, 94310381826047,
+STORE, 94310381826048, 94310381838335,
+STORE, 140515434659840, 140515436912639,
+SNULL, 140515434803199, 140515436912639,
+STORE, 140515434659840, 140515434803199,
+STORE, 140515434803200, 140515436912639,
+ERASE, 140515434803200, 140515436912639,
+STORE, 140515436900352, 140515436908543,
+STORE, 140515436908544, 140515436912639,
+STORE, 140731353886720, 140731353890815,
+STORE, 140731353874432, 140731353886719,
+STORE, 140515436871680, 140515436900351,
+STORE, 140515436863488, 140515436871679,
+STORE, 140515432546304, 140515434659839,
+SNULL, 140515432546304, 140515432558591,
+STORE, 140515432558592, 140515434659839,
+STORE, 140515432546304, 140515432558591,
+SNULL, 140515434651647, 140515434659839,
+STORE, 140515432558592, 140515434651647,
+STORE, 140515434651648, 140515434659839,
+ERASE, 140515434651648, 140515434659839,
+STORE, 140515434651648, 140515434659839,
+STORE, 140515428749312, 140515432546303,
+SNULL, 140515428749312, 140515430408191,
+STORE, 140515430408192, 140515432546303,
+STORE, 140515428749312, 140515430408191,
+SNULL, 140515432505343, 140515432546303,
+STORE, 140515430408192, 140515432505343,
+STORE, 140515432505344, 140515432546303,
+SNULL, 140515432505344, 140515432529919,
+STORE, 140515432529920, 140515432546303,
+STORE, 140515432505344, 140515432529919,
+ERASE, 140515432505344, 140515432529919,
+STORE, 140515432505344, 140515432529919,
+ERASE, 140515432529920, 140515432546303,
+STORE, 140515432529920, 140515432546303,
+STORE, 140515436855296, 140515436871679,
+SNULL, 140515432521727, 140515432529919,
+STORE, 140515432505344, 140515432521727,
+STORE, 140515432521728, 140515432529919,
+SNULL, 140515434655743, 140515434659839,
+STORE, 140515434651648, 140515434655743,
+STORE, 140515434655744, 140515434659839,
+SNULL, 94310381817855, 94310381826047,
+STORE, 94310381813760, 94310381817855,
+STORE, 94310381817856, 94310381826047,
+SNULL, 140515436904447, 140515436908543,
+STORE, 140515436900352, 140515436904447,
+STORE, 140515436904448, 140515436908543,
+ERASE, 140515436871680, 140515436900351,
+STORE, 94310395457536, 94310395592703,
+STORE, 140515435171840, 140515436855295,
+STORE, 94310395457536, 94310395727871,
+STORE, 94310395457536, 94310395863039,
+STORE, 94310395457536, 94310396047359,
+SNULL, 94310396022783, 94310396047359,
+STORE, 94310395457536, 94310396022783,
+STORE, 94310396022784, 94310396047359,
+ERASE, 94310396022784, 94310396047359,
+STORE, 94310395457536, 94310396157951,
+STORE, 94310395457536, 94310396293119,
+SNULL, 94310396276735, 94310396293119,
+STORE, 94310395457536, 94310396276735,
+STORE, 94310396276736, 94310396293119,
+ERASE, 94310396276736, 94310396293119,
+STORE, 94310395457536, 94310396411903,
+SNULL, 94310396383231, 94310396411903,
+STORE, 94310395457536, 94310396383231,
+STORE, 94310396383232, 94310396411903,
+ERASE, 94310396383232, 94310396411903,
+STORE, 94310395457536, 94310396522495,
+STORE, 94310395457536, 94310396674047,
+SNULL, 94310396657663, 94310396674047,
+STORE, 94310395457536, 94310396657663,
+STORE, 94310396657664, 94310396674047,
+ERASE, 94310396657664, 94310396674047,
+SNULL, 94310396624895, 94310396657663,
+STORE, 94310395457536, 94310396624895,
+STORE, 94310396624896, 94310396657663,
+ERASE, 94310396624896, 94310396657663,
+STORE, 94310395457536, 94310396776447,
+SNULL, 94310396764159, 94310396776447,
+STORE, 94310395457536, 94310396764159,
+STORE, 94310396764160, 94310396776447,
+ERASE, 94310396764160, 94310396776447,
+SNULL, 94310396739583, 94310396764159,
+STORE, 94310395457536, 94310396739583,
+STORE, 94310396739584, 94310396764159,
+ERASE, 94310396739584, 94310396764159,
+STORE, 94310395457536, 94310396882943,
+STORE, 94310395457536, 94310397018111,
+STORE, 94310395457536, 94310397161471,
+STORE, 94310395457536, 94310397300735,
+SNULL, 94310397292543, 94310397300735,
+STORE, 94310395457536, 94310397292543,
+STORE, 94310397292544, 94310397300735,
+ERASE, 94310397292544, 94310397300735,
+STORE, 94359222210560, 94359222423551,
+STORE, 94359224520704, 94359224524799,
+STORE, 94359224524800, 94359224532991,
+STORE, 94359224532992, 94359224545279,
+STORE, 94359238348800, 94359239385087,
+STORE, 140675699838976, 140675701497855,
+STORE, 140675701497856, 140675703595007,
+STORE, 140675703595008, 140675703611391,
+STORE, 140675703611392, 140675703619583,
+STORE, 140675703619584, 140675703635967,
+STORE, 140675703635968, 140675703648255,
+STORE, 140675703648256, 140675705741311,
+STORE, 140675705741312, 140675705745407,
+STORE, 140675705745408, 140675705749503,
+STORE, 140675705749504, 140675705892863,
+STORE, 140675706261504, 140675707944959,
+STORE, 140675707944960, 140675707961343,
+STORE, 140675707990016, 140675707994111,
+STORE, 140675707994112, 140675707998207,
+STORE, 140675707998208, 140675708002303,
+STORE, 140721324634112, 140721324773375,
+STORE, 140721324810240, 140721324822527,
+STORE, 140721324822528, 140721324826623,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724099678208, 140737488351231,
+SNULL, 140724099686399, 140737488351231,
+STORE, 140724099678208, 140724099686399,
+STORE, 140724099547136, 140724099686399,
+STORE, 94586638516224, 94586640850943,
+SNULL, 94586638729215, 94586640850943,
+STORE, 94586638516224, 94586638729215,
+STORE, 94586638729216, 94586640850943,
+ERASE, 94586638729216, 94586640850943,
+STORE, 94586640826368, 94586640838655,
+STORE, 94586640838656, 94586640850943,
+STORE, 140371033796608, 140371036049407,
+SNULL, 140371033939967, 140371036049407,
+STORE, 140371033796608, 140371033939967,
+STORE, 140371033939968, 140371036049407,
+ERASE, 140371033939968, 140371036049407,
+STORE, 140371036037120, 140371036045311,
+STORE, 140371036045312, 140371036049407,
+STORE, 140724100001792, 140724100005887,
+STORE, 140724099989504, 140724100001791,
+STORE, 140371036008448, 140371036037119,
+STORE, 140371036000256, 140371036008447,
+STORE, 140371031683072, 140371033796607,
+SNULL, 140371031683072, 140371031695359,
+STORE, 140371031695360, 140371033796607,
+STORE, 140371031683072, 140371031695359,
+SNULL, 140371033788415, 140371033796607,
+STORE, 140371031695360, 140371033788415,
+STORE, 140371033788416, 140371033796607,
+ERASE, 140371033788416, 140371033796607,
+STORE, 140371033788416, 140371033796607,
+STORE, 140371027886080, 140371031683071,
+SNULL, 140371027886080, 140371029544959,
+STORE, 140371029544960, 140371031683071,
+STORE, 140371027886080, 140371029544959,
+SNULL, 140371031642111, 140371031683071,
+STORE, 140371029544960, 140371031642111,
+STORE, 140371031642112, 140371031683071,
+SNULL, 140371031642112, 140371031666687,
+STORE, 140371031666688, 140371031683071,
+STORE, 140371031642112, 140371031666687,
+ERASE, 140371031642112, 140371031666687,
+STORE, 140371031642112, 140371031666687,
+ERASE, 140371031666688, 140371031683071,
+STORE, 140371031666688, 140371031683071,
+STORE, 140371035992064, 140371036008447,
+SNULL, 140371031658495, 140371031666687,
+STORE, 140371031642112, 140371031658495,
+STORE, 140371031658496, 140371031666687,
+SNULL, 140371033792511, 140371033796607,
+STORE, 140371033788416, 140371033792511,
+STORE, 140371033792512, 140371033796607,
+SNULL, 94586640830463, 94586640838655,
+STORE, 94586640826368, 94586640830463,
+STORE, 94586640830464, 94586640838655,
+SNULL, 140371036041215, 140371036045311,
+STORE, 140371036037120, 140371036041215,
+STORE, 140371036041216, 140371036045311,
+ERASE, 140371036008448, 140371036037119,
+STORE, 94586663849984, 94586663985151,
+STORE, 140371034308608, 140371035992063,
+STORE, 94586663849984, 94586664120319,
+STORE, 94586663849984, 94586664255487,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140727532937216, 140737488351231,
+SNULL, 140727532945407, 140737488351231,
+STORE, 140727532937216, 140727532945407,
+STORE, 140727532806144, 140727532945407,
+STORE, 94849780191232, 94849782525951,
+SNULL, 94849780404223, 94849782525951,
+STORE, 94849780191232, 94849780404223,
+STORE, 94849780404224, 94849782525951,
+ERASE, 94849780404224, 94849782525951,
+STORE, 94849782501376, 94849782513663,
+STORE, 94849782513664, 94849782525951,
+STORE, 140382070218752, 140382072471551,
+SNULL, 140382070362111, 140382072471551,
+STORE, 140382070218752, 140382070362111,
+STORE, 140382070362112, 140382072471551,
+ERASE, 140382070362112, 140382072471551,
+STORE, 140382072459264, 140382072467455,
+STORE, 140382072467456, 140382072471551,
+STORE, 140727533092864, 140727533096959,
+STORE, 140727533080576, 140727533092863,
+STORE, 140382072430592, 140382072459263,
+STORE, 140382072422400, 140382072430591,
+STORE, 140382068105216, 140382070218751,
+SNULL, 140382068105216, 140382068117503,
+STORE, 140382068117504, 140382070218751,
+STORE, 140382068105216, 140382068117503,
+SNULL, 140382070210559, 140382070218751,
+STORE, 140382068117504, 140382070210559,
+STORE, 140382070210560, 140382070218751,
+ERASE, 140382070210560, 140382070218751,
+STORE, 140382070210560, 140382070218751,
+STORE, 140382064308224, 140382068105215,
+SNULL, 140382064308224, 140382065967103,
+STORE, 140382065967104, 140382068105215,
+STORE, 140382064308224, 140382065967103,
+SNULL, 140382068064255, 140382068105215,
+STORE, 140382065967104, 140382068064255,
+STORE, 140382068064256, 140382068105215,
+SNULL, 140382068064256, 140382068088831,
+STORE, 140382068088832, 140382068105215,
+STORE, 140382068064256, 140382068088831,
+ERASE, 140382068064256, 140382068088831,
+STORE, 140382068064256, 140382068088831,
+ERASE, 140382068088832, 140382068105215,
+STORE, 140382068088832, 140382068105215,
+STORE, 140382072414208, 140382072430591,
+SNULL, 140382068080639, 140382068088831,
+STORE, 140382068064256, 140382068080639,
+STORE, 140382068080640, 140382068088831,
+SNULL, 140382070214655, 140382070218751,
+STORE, 140382070210560, 140382070214655,
+STORE, 140382070214656, 140382070218751,
+SNULL, 94849782505471, 94849782513663,
+STORE, 94849782501376, 94849782505471,
+STORE, 94849782505472, 94849782513663,
+SNULL, 140382072463359, 140382072467455,
+STORE, 140382072459264, 140382072463359,
+STORE, 140382072463360, 140382072467455,
+ERASE, 140382072430592, 140382072459263,
+STORE, 94849782845440, 94849782980607,
+STORE, 140382070730752, 140382072414207,
+STORE, 94849782845440, 94849783115775,
+STORE, 94849782845440, 94849783250943,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722594377728, 140737488351231,
+SNULL, 140722594385919, 140737488351231,
+STORE, 140722594377728, 140722594385919,
+STORE, 140722594246656, 140722594385919,
+STORE, 94421466353664, 94421468577791,
+SNULL, 94421466464255, 94421468577791,
+STORE, 94421466353664, 94421466464255,
+STORE, 94421466464256, 94421468577791,
+ERASE, 94421466464256, 94421468577791,
+STORE, 94421468557312, 94421468569599,
+STORE, 94421468569600, 94421468577791,
+STORE, 140345458057216, 140345460310015,
+SNULL, 140345458200575, 140345460310015,
+STORE, 140345458057216, 140345458200575,
+STORE, 140345458200576, 140345460310015,
+ERASE, 140345458200576, 140345460310015,
+STORE, 140345460297728, 140345460305919,
+STORE, 140345460305920, 140345460310015,
+STORE, 140722595557376, 140722595561471,
+STORE, 140722595545088, 140722595557375,
+STORE, 140345460269056, 140345460297727,
+STORE, 140345460260864, 140345460269055,
+STORE, 140345454260224, 140345458057215,
+SNULL, 140345454260224, 140345455919103,
+STORE, 140345455919104, 140345458057215,
+STORE, 140345454260224, 140345455919103,
+SNULL, 140345458016255, 140345458057215,
+STORE, 140345455919104, 140345458016255,
+STORE, 140345458016256, 140345458057215,
+SNULL, 140345458016256, 140345458040831,
+STORE, 140345458040832, 140345458057215,
+STORE, 140345458016256, 140345458040831,
+ERASE, 140345458016256, 140345458040831,
+STORE, 140345458016256, 140345458040831,
+ERASE, 140345458040832, 140345458057215,
+STORE, 140345458040832, 140345458057215,
+SNULL, 140345458032639, 140345458040831,
+STORE, 140345458016256, 140345458032639,
+STORE, 140345458032640, 140345458040831,
+SNULL, 94421468565503, 94421468569599,
+STORE, 94421468557312, 94421468565503,
+STORE, 94421468565504, 94421468569599,
+SNULL, 140345460301823, 140345460305919,
+STORE, 140345460297728, 140345460301823,
+STORE, 140345460301824, 140345460305919,
+ERASE, 140345460269056, 140345460297727,
+STORE, 94421496004608, 94421496139775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140726096302080, 140737488351231,
+SNULL, 140726096310271, 140737488351231,
+STORE, 140726096302080, 140726096310271,
+STORE, 140726096171008, 140726096310271,
+STORE, 94101992124416, 94101994459135,
+SNULL, 94101992337407, 94101994459135,
+STORE, 94101992124416, 94101992337407,
+STORE, 94101992337408, 94101994459135,
+ERASE, 94101992337408, 94101994459135,
+STORE, 94101994434560, 94101994446847,
+STORE, 94101994446848, 94101994459135,
+STORE, 140192085594112, 140192087846911,
+SNULL, 140192085737471, 140192087846911,
+STORE, 140192085594112, 140192085737471,
+STORE, 140192085737472, 140192087846911,
+ERASE, 140192085737472, 140192087846911,
+STORE, 140192087834624, 140192087842815,
+STORE, 140192087842816, 140192087846911,
+STORE, 140726096375808, 140726096379903,
+STORE, 140726096363520, 140726096375807,
+STORE, 140192087805952, 140192087834623,
+STORE, 140192087797760, 140192087805951,
+STORE, 140192083480576, 140192085594111,
+SNULL, 140192083480576, 140192083492863,
+STORE, 140192083492864, 140192085594111,
+STORE, 140192083480576, 140192083492863,
+SNULL, 140192085585919, 140192085594111,
+STORE, 140192083492864, 140192085585919,
+STORE, 140192085585920, 140192085594111,
+ERASE, 140192085585920, 140192085594111,
+STORE, 140192085585920, 140192085594111,
+STORE, 140192079683584, 140192083480575,
+SNULL, 140192079683584, 140192081342463,
+STORE, 140192081342464, 140192083480575,
+STORE, 140192079683584, 140192081342463,
+SNULL, 140192083439615, 140192083480575,
+STORE, 140192081342464, 140192083439615,
+STORE, 140192083439616, 140192083480575,
+SNULL, 140192083439616, 140192083464191,
+STORE, 140192083464192, 140192083480575,
+STORE, 140192083439616, 140192083464191,
+ERASE, 140192083439616, 140192083464191,
+STORE, 140192083439616, 140192083464191,
+ERASE, 140192083464192, 140192083480575,
+STORE, 140192083464192, 140192083480575,
+STORE, 140192087789568, 140192087805951,
+SNULL, 140192083455999, 140192083464191,
+STORE, 140192083439616, 140192083455999,
+STORE, 140192083456000, 140192083464191,
+SNULL, 140192085590015, 140192085594111,
+STORE, 140192085585920, 140192085590015,
+STORE, 140192085590016, 140192085594111,
+SNULL, 94101994438655, 94101994446847,
+STORE, 94101994434560, 94101994438655,
+STORE, 94101994438656, 94101994446847,
+SNULL, 140192087838719, 140192087842815,
+STORE, 140192087834624, 140192087838719,
+STORE, 140192087838720, 140192087842815,
+ERASE, 140192087805952, 140192087834623,
+STORE, 94102011887616, 94102012022783,
+STORE, 140192086106112, 140192087789567,
+STORE, 94102011887616, 94102012157951,
+STORE, 94102011887616, 94102012293119,
+STORE, 94102011887616, 94102012440575,
+SNULL, 94102012428287, 94102012440575,
+STORE, 94102011887616, 94102012428287,
+STORE, 94102012428288, 94102012440575,
+ERASE, 94102012428288, 94102012440575,
+STORE, 94102011887616, 94102012579839,
+STORE, 94102011887616, 94102012715007,
+SNULL, 94102012694527, 94102012715007,
+STORE, 94102011887616, 94102012694527,
+STORE, 94102012694528, 94102012715007,
+ERASE, 94102012694528, 94102012715007,
+STORE, 94102011887616, 94102012833791,
+STORE, 94102011887616, 94102012968959,
+SNULL, 94102012927999, 94102012968959,
+STORE, 94102011887616, 94102012927999,
+STORE, 94102012928000, 94102012968959,
+ERASE, 94102012928000, 94102012968959,
+STORE, 94102011887616, 94102013091839,
+SNULL, 94102013075455, 94102013091839,
+STORE, 94102011887616, 94102013075455,
+STORE, 94102013075456, 94102013091839,
+ERASE, 94102013075456, 94102013091839,
+STORE, 94102011887616, 94102013210623,
+STORE, 94102011887616, 94102013345791,
+STORE, 93968727965696, 93968728178687,
+STORE, 93968730275840, 93968730279935,
+STORE, 93968730279936, 93968730288127,
+STORE, 93968730288128, 93968730300415,
+STORE, 93968731140096, 93968732704767,
+STORE, 140588443168768, 140588444827647,
+STORE, 140588444827648, 140588446924799,
+STORE, 140588446924800, 140588446941183,
+STORE, 140588446941184, 140588446949375,
+STORE, 140588446949376, 140588446965759,
+STORE, 140588446965760, 140588446978047,
+STORE, 140588446978048, 140588449071103,
+STORE, 140588449071104, 140588449075199,
+STORE, 140588449075200, 140588449079295,
+STORE, 140588449079296, 140588449222655,
+STORE, 140588449591296, 140588451274751,
+STORE, 140588451274752, 140588451291135,
+STORE, 140588451319808, 140588451323903,
+STORE, 140588451323904, 140588451327999,
+STORE, 140588451328000, 140588451332095,
+STORE, 140733877239808, 140733877379071,
+STORE, 140733878702080, 140733878714367,
+STORE, 140733878714368, 140733878718463,
+STORE, 93968727965696, 93968728178687,
+STORE, 93968730275840, 93968730279935,
+STORE, 93968730279936, 93968730288127,
+STORE, 93968730288128, 93968730300415,
+STORE, 93968731140096, 93968732991487,
+STORE, 140588443168768, 140588444827647,
+STORE, 140588444827648, 140588446924799,
+STORE, 140588446924800, 140588446941183,
+STORE, 140588446941184, 140588446949375,
+STORE, 140588446949376, 140588446965759,
+STORE, 140588446965760, 140588446978047,
+STORE, 140588446978048, 140588449071103,
+STORE, 140588449071104, 140588449075199,
+STORE, 140588449075200, 140588449079295,
+STORE, 140588449079296, 140588449222655,
+STORE, 140588449591296, 140588451274751,
+STORE, 140588451274752, 140588451291135,
+STORE, 140588451319808, 140588451323903,
+STORE, 140588451323904, 140588451327999,
+STORE, 140588451328000, 140588451332095,
+STORE, 140733877239808, 140733877379071,
+STORE, 140733878702080, 140733878714367,
+STORE, 140733878714368, 140733878718463,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733054472192, 140737488351231,
+SNULL, 140733054480383, 140737488351231,
+STORE, 140733054472192, 140733054480383,
+STORE, 140733054341120, 140733054480383,
+STORE, 93992873623552, 93992875847679,
+SNULL, 93992873734143, 93992875847679,
+STORE, 93992873623552, 93992873734143,
+STORE, 93992873734144, 93992875847679,
+ERASE, 93992873734144, 93992875847679,
+STORE, 93992875827200, 93992875839487,
+STORE, 93992875839488, 93992875847679,
+STORE, 139790881488896, 139790883741695,
+SNULL, 139790881632255, 139790883741695,
+STORE, 139790881488896, 139790881632255,
+STORE, 139790881632256, 139790883741695,
+ERASE, 139790881632256, 139790883741695,
+STORE, 139790883729408, 139790883737599,
+STORE, 139790883737600, 139790883741695,
+STORE, 140733054754816, 140733054758911,
+STORE, 140733054742528, 140733054754815,
+STORE, 139790883700736, 139790883729407,
+STORE, 139790883692544, 139790883700735,
+STORE, 139790877691904, 139790881488895,
+SNULL, 139790877691904, 139790879350783,
+STORE, 139790879350784, 139790881488895,
+STORE, 139790877691904, 139790879350783,
+SNULL, 139790881447935, 139790881488895,
+STORE, 139790879350784, 139790881447935,
+STORE, 139790881447936, 139790881488895,
+SNULL, 139790881447936, 139790881472511,
+STORE, 139790881472512, 139790881488895,
+STORE, 139790881447936, 139790881472511,
+ERASE, 139790881447936, 139790881472511,
+STORE, 139790881447936, 139790881472511,
+ERASE, 139790881472512, 139790881488895,
+STORE, 139790881472512, 139790881488895,
+SNULL, 139790881464319, 139790881472511,
+STORE, 139790881447936, 139790881464319,
+STORE, 139790881464320, 139790881472511,
+SNULL, 93992875835391, 93992875839487,
+STORE, 93992875827200, 93992875835391,
+STORE, 93992875835392, 93992875839487,
+SNULL, 139790883733503, 139790883737599,
+STORE, 139790883729408, 139790883733503,
+STORE, 139790883733504, 139790883737599,
+ERASE, 139790883700736, 139790883729407,
+STORE, 93992877031424, 93992877166591,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728550887424, 140737488351231,
+SNULL, 140728550895615, 140737488351231,
+STORE, 140728550887424, 140728550895615,
+STORE, 140728550756352, 140728550895615,
+STORE, 94707634077696, 94707636301823,
+SNULL, 94707634188287, 94707636301823,
+STORE, 94707634077696, 94707634188287,
+STORE, 94707634188288, 94707636301823,
+ERASE, 94707634188288, 94707636301823,
+STORE, 94707636281344, 94707636293631,
+STORE, 94707636293632, 94707636301823,
+STORE, 140553545666560, 140553547919359,
+SNULL, 140553545809919, 140553547919359,
+STORE, 140553545666560, 140553545809919,
+STORE, 140553545809920, 140553547919359,
+ERASE, 140553545809920, 140553547919359,
+STORE, 140553547907072, 140553547915263,
+STORE, 140553547915264, 140553547919359,
+STORE, 140728552374272, 140728552378367,
+STORE, 140728552361984, 140728552374271,
+STORE, 140553547878400, 140553547907071,
+STORE, 140553547870208, 140553547878399,
+STORE, 140553541869568, 140553545666559,
+SNULL, 140553541869568, 140553543528447,
+STORE, 140553543528448, 140553545666559,
+STORE, 140553541869568, 140553543528447,
+SNULL, 140553545625599, 140553545666559,
+STORE, 140553543528448, 140553545625599,
+STORE, 140553545625600, 140553545666559,
+SNULL, 140553545625600, 140553545650175,
+STORE, 140553545650176, 140553545666559,
+STORE, 140553545625600, 140553545650175,
+ERASE, 140553545625600, 140553545650175,
+STORE, 140553545625600, 140553545650175,
+ERASE, 140553545650176, 140553545666559,
+STORE, 140553545650176, 140553545666559,
+SNULL, 140553545641983, 140553545650175,
+STORE, 140553545625600, 140553545641983,
+STORE, 140553545641984, 140553545650175,
+SNULL, 94707636289535, 94707636293631,
+STORE, 94707636281344, 94707636289535,
+STORE, 94707636289536, 94707636293631,
+SNULL, 140553547911167, 140553547915263,
+STORE, 140553547907072, 140553547911167,
+STORE, 140553547911168, 140553547915263,
+ERASE, 140553547878400, 140553547907071,
+STORE, 94707651411968, 94707651547135,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732168695808, 140737488351231,
+SNULL, 140732168703999, 140737488351231,
+STORE, 140732168695808, 140732168703999,
+STORE, 140732168564736, 140732168703999,
+STORE, 94454287859712, 94454290083839,
+SNULL, 94454287970303, 94454290083839,
+STORE, 94454287859712, 94454287970303,
+STORE, 94454287970304, 94454290083839,
+ERASE, 94454287970304, 94454290083839,
+STORE, 94454290063360, 94454290075647,
+STORE, 94454290075648, 94454290083839,
+STORE, 140564947107840, 140564949360639,
+SNULL, 140564947251199, 140564949360639,
+STORE, 140564947107840, 140564947251199,
+STORE, 140564947251200, 140564949360639,
+ERASE, 140564947251200, 140564949360639,
+STORE, 140564949348352, 140564949356543,
+STORE, 140564949356544, 140564949360639,
+STORE, 140732168843264, 140732168847359,
+STORE, 140732168830976, 140732168843263,
+STORE, 140564949319680, 140564949348351,
+STORE, 140564949311488, 140564949319679,
+STORE, 140564943310848, 140564947107839,
+SNULL, 140564943310848, 140564944969727,
+STORE, 140564944969728, 140564947107839,
+STORE, 140564943310848, 140564944969727,
+SNULL, 140564947066879, 140564947107839,
+STORE, 140564944969728, 140564947066879,
+STORE, 140564947066880, 140564947107839,
+SNULL, 140564947066880, 140564947091455,
+STORE, 140564947091456, 140564947107839,
+STORE, 140564947066880, 140564947091455,
+ERASE, 140564947066880, 140564947091455,
+STORE, 140564947066880, 140564947091455,
+ERASE, 140564947091456, 140564947107839,
+STORE, 140564947091456, 140564947107839,
+SNULL, 140564947083263, 140564947091455,
+STORE, 140564947066880, 140564947083263,
+STORE, 140564947083264, 140564947091455,
+SNULL, 94454290071551, 94454290075647,
+STORE, 94454290063360, 94454290071551,
+STORE, 94454290071552, 94454290075647,
+SNULL, 140564949352447, 140564949356543,
+STORE, 140564949348352, 140564949352447,
+STORE, 140564949352448, 140564949356543,
+ERASE, 140564949319680, 140564949348351,
+STORE, 94454316236800, 94454316371967,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735155617792, 140737488351231,
+SNULL, 140735155625983, 140737488351231,
+STORE, 140735155617792, 140735155625983,
+STORE, 140735155486720, 140735155625983,
+STORE, 93915969556480, 93915971780607,
+SNULL, 93915969667071, 93915971780607,
+STORE, 93915969556480, 93915969667071,
+STORE, 93915969667072, 93915971780607,
+ERASE, 93915969667072, 93915971780607,
+STORE, 93915971760128, 93915971772415,
+STORE, 93915971772416, 93915971780607,
+STORE, 140141164605440, 140141166858239,
+SNULL, 140141164748799, 140141166858239,
+STORE, 140141164605440, 140141164748799,
+STORE, 140141164748800, 140141166858239,
+ERASE, 140141164748800, 140141166858239,
+STORE, 140141166845952, 140141166854143,
+STORE, 140141166854144, 140141166858239,
+STORE, 140735155691520, 140735155695615,
+STORE, 140735155679232, 140735155691519,
+STORE, 140141166817280, 140141166845951,
+STORE, 140141166809088, 140141166817279,
+STORE, 140141160808448, 140141164605439,
+SNULL, 140141160808448, 140141162467327,
+STORE, 140141162467328, 140141164605439,
+STORE, 140141160808448, 140141162467327,
+SNULL, 140141164564479, 140141164605439,
+STORE, 140141162467328, 140141164564479,
+STORE, 140141164564480, 140141164605439,
+SNULL, 140141164564480, 140141164589055,
+STORE, 140141164589056, 140141164605439,
+STORE, 140141164564480, 140141164589055,
+ERASE, 140141164564480, 140141164589055,
+STORE, 140141164564480, 140141164589055,
+ERASE, 140141164589056, 140141164605439,
+STORE, 140141164589056, 140141164605439,
+SNULL, 140141164580863, 140141164589055,
+STORE, 140141164564480, 140141164580863,
+STORE, 140141164580864, 140141164589055,
+SNULL, 93915971768319, 93915971772415,
+STORE, 93915971760128, 93915971768319,
+STORE, 93915971768320, 93915971772415,
+SNULL, 140141166850047, 140141166854143,
+STORE, 140141166845952, 140141166850047,
+STORE, 140141166850048, 140141166854143,
+ERASE, 140141166817280, 140141166845951,
+STORE, 93916002775040, 93916002910207,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728988409856, 140737488351231,
+SNULL, 140728988418047, 140737488351231,
+STORE, 140728988409856, 140728988418047,
+STORE, 140728988278784, 140728988418047,
+STORE, 94021634813952, 94021637038079,
+SNULL, 94021634924543, 94021637038079,
+STORE, 94021634813952, 94021634924543,
+STORE, 94021634924544, 94021637038079,
+ERASE, 94021634924544, 94021637038079,
+STORE, 94021637017600, 94021637029887,
+STORE, 94021637029888, 94021637038079,
+STORE, 140638014038016, 140638016290815,
+SNULL, 140638014181375, 140638016290815,
+STORE, 140638014038016, 140638014181375,
+STORE, 140638014181376, 140638016290815,
+ERASE, 140638014181376, 140638016290815,
+STORE, 140638016278528, 140638016286719,
+STORE, 140638016286720, 140638016290815,
+STORE, 140728988536832, 140728988540927,
+STORE, 140728988524544, 140728988536831,
+STORE, 140638016249856, 140638016278527,
+STORE, 140638016241664, 140638016249855,
+STORE, 140638010241024, 140638014038015,
+SNULL, 140638010241024, 140638011899903,
+STORE, 140638011899904, 140638014038015,
+STORE, 140638010241024, 140638011899903,
+SNULL, 140638013997055, 140638014038015,
+STORE, 140638011899904, 140638013997055,
+STORE, 140638013997056, 140638014038015,
+SNULL, 140638013997056, 140638014021631,
+STORE, 140638014021632, 140638014038015,
+STORE, 140638013997056, 140638014021631,
+ERASE, 140638013997056, 140638014021631,
+STORE, 140638013997056, 140638014021631,
+ERASE, 140638014021632, 140638014038015,
+STORE, 140638014021632, 140638014038015,
+SNULL, 140638014013439, 140638014021631,
+STORE, 140638013997056, 140638014013439,
+STORE, 140638014013440, 140638014021631,
+SNULL, 94021637025791, 94021637029887,
+STORE, 94021637017600, 94021637025791,
+STORE, 94021637025792, 94021637029887,
+SNULL, 140638016282623, 140638016286719,
+STORE, 140638016278528, 140638016282623,
+STORE, 140638016282624, 140638016286719,
+ERASE, 140638016249856, 140638016278527,
+STORE, 94021643124736, 94021643259903,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731219275776, 140737488351231,
+SNULL, 140731219283967, 140737488351231,
+STORE, 140731219275776, 140731219283967,
+STORE, 140731219144704, 140731219283967,
+STORE, 93888803647488, 93888805871615,
+SNULL, 93888803758079, 93888805871615,
+STORE, 93888803647488, 93888803758079,
+STORE, 93888803758080, 93888805871615,
+ERASE, 93888803758080, 93888805871615,
+STORE, 93888805851136, 93888805863423,
+STORE, 93888805863424, 93888805871615,
+STORE, 139630576934912, 139630579187711,
+SNULL, 139630577078271, 139630579187711,
+STORE, 139630576934912, 139630577078271,
+STORE, 139630577078272, 139630579187711,
+ERASE, 139630577078272, 139630579187711,
+STORE, 139630579175424, 139630579183615,
+STORE, 139630579183616, 139630579187711,
+STORE, 140731219718144, 140731219722239,
+STORE, 140731219705856, 140731219718143,
+STORE, 139630579146752, 139630579175423,
+STORE, 139630579138560, 139630579146751,
+STORE, 139630573137920, 139630576934911,
+SNULL, 139630573137920, 139630574796799,
+STORE, 139630574796800, 139630576934911,
+STORE, 139630573137920, 139630574796799,
+SNULL, 139630576893951, 139630576934911,
+STORE, 139630574796800, 139630576893951,
+STORE, 139630576893952, 139630576934911,
+SNULL, 139630576893952, 139630576918527,
+STORE, 139630576918528, 139630576934911,
+STORE, 139630576893952, 139630576918527,
+ERASE, 139630576893952, 139630576918527,
+STORE, 139630576893952, 139630576918527,
+ERASE, 139630576918528, 139630576934911,
+STORE, 139630576918528, 139630576934911,
+SNULL, 139630576910335, 139630576918527,
+STORE, 139630576893952, 139630576910335,
+STORE, 139630576910336, 139630576918527,
+SNULL, 93888805859327, 93888805863423,
+STORE, 93888805851136, 93888805859327,
+STORE, 93888805859328, 93888805863423,
+SNULL, 139630579179519, 139630579183615,
+STORE, 139630579175424, 139630579179519,
+STORE, 139630579179520, 139630579183615,
+ERASE, 139630579146752, 139630579175423,
+STORE, 93888822235136, 93888822370303,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733391151104, 140737488351231,
+SNULL, 140733391159295, 140737488351231,
+STORE, 140733391151104, 140733391159295,
+STORE, 140733391020032, 140733391159295,
+STORE, 94393875324928, 94393877549055,
+SNULL, 94393875435519, 94393877549055,
+STORE, 94393875324928, 94393875435519,
+STORE, 94393875435520, 94393877549055,
+ERASE, 94393875435520, 94393877549055,
+STORE, 94393877528576, 94393877540863,
+STORE, 94393877540864, 94393877549055,
+STORE, 140292111740928, 140292113993727,
+SNULL, 140292111884287, 140292113993727,
+STORE, 140292111740928, 140292111884287,
+STORE, 140292111884288, 140292113993727,
+ERASE, 140292111884288, 140292113993727,
+STORE, 140292113981440, 140292113989631,
+STORE, 140292113989632, 140292113993727,
+STORE, 140733391532032, 140733391536127,
+STORE, 140733391519744, 140733391532031,
+STORE, 140292113952768, 140292113981439,
+STORE, 140292113944576, 140292113952767,
+STORE, 140292107943936, 140292111740927,
+SNULL, 140292107943936, 140292109602815,
+STORE, 140292109602816, 140292111740927,
+STORE, 140292107943936, 140292109602815,
+SNULL, 140292111699967, 140292111740927,
+STORE, 140292109602816, 140292111699967,
+STORE, 140292111699968, 140292111740927,
+SNULL, 140292111699968, 140292111724543,
+STORE, 140292111724544, 140292111740927,
+STORE, 140292111699968, 140292111724543,
+ERASE, 140292111699968, 140292111724543,
+STORE, 140292111699968, 140292111724543,
+ERASE, 140292111724544, 140292111740927,
+STORE, 140292111724544, 140292111740927,
+SNULL, 140292111716351, 140292111724543,
+STORE, 140292111699968, 140292111716351,
+STORE, 140292111716352, 140292111724543,
+SNULL, 94393877536767, 94393877540863,
+STORE, 94393877528576, 94393877536767,
+STORE, 94393877536768, 94393877540863,
+SNULL, 140292113985535, 140292113989631,
+STORE, 140292113981440, 140292113985535,
+STORE, 140292113985536, 140292113989631,
+ERASE, 140292113952768, 140292113981439,
+STORE, 94393909342208, 94393909477375,
+STORE, 94458367512576, 94458367725567,
+STORE, 94458369822720, 94458369826815,
+STORE, 94458369826816, 94458369835007,
+STORE, 94458369835008, 94458369847295,
+STORE, 94458393292800, 94458399666175,
+STORE, 140619773841408, 140619775500287,
+STORE, 140619775500288, 140619777597439,
+STORE, 140619777597440, 140619777613823,
+STORE, 140619777613824, 140619777622015,
+STORE, 140619777622016, 140619777638399,
+STORE, 140619777638400, 140619777650687,
+STORE, 140619777650688, 140619779743743,
+STORE, 140619779743744, 140619779747839,
+STORE, 140619779747840, 140619779751935,
+STORE, 140619779751936, 140619779895295,
+STORE, 140619780263936, 140619781947391,
+STORE, 140619781947392, 140619781963775,
+STORE, 140619781992448, 140619781996543,
+STORE, 140619781996544, 140619782000639,
+STORE, 140619782000640, 140619782004735,
+STORE, 140725811675136, 140725811814399,
+STORE, 140725812813824, 140725812826111,
+STORE, 140725812826112, 140725812830207,
+STORE, 94458367512576, 94458367725567,
+STORE, 94458369822720, 94458369826815,
+STORE, 94458369826816, 94458369835007,
+STORE, 94458369835008, 94458369847295,
+STORE, 94458393292800, 94458400366591,
+STORE, 140619773841408, 140619775500287,
+STORE, 140619775500288, 140619777597439,
+STORE, 140619777597440, 140619777613823,
+STORE, 140619777613824, 140619777622015,
+STORE, 140619777622016, 140619777638399,
+STORE, 140619777638400, 140619777650687,
+STORE, 140619777650688, 140619779743743,
+STORE, 140619779743744, 140619779747839,
+STORE, 140619779747840, 140619779751935,
+STORE, 140619779751936, 140619779895295,
+STORE, 140619780263936, 140619781947391,
+STORE, 140619781947392, 140619781963775,
+STORE, 140619781992448, 140619781996543,
+STORE, 140619781996544, 140619782000639,
+STORE, 140619782000640, 140619782004735,
+STORE, 140725811675136, 140725811814399,
+STORE, 140725812813824, 140725812826111,
+STORE, 140725812826112, 140725812830207,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728740679680, 140737488351231,
+SNULL, 140728740687871, 140737488351231,
+STORE, 140728740679680, 140728740687871,
+STORE, 140728740548608, 140728740687871,
+STORE, 94764075249664, 94764077473791,
+SNULL, 94764075360255, 94764077473791,
+STORE, 94764075249664, 94764075360255,
+STORE, 94764075360256, 94764077473791,
+ERASE, 94764075360256, 94764077473791,
+STORE, 94764077453312, 94764077465599,
+STORE, 94764077465600, 94764077473791,
+STORE, 139766406791168, 139766409043967,
+SNULL, 139766406934527, 139766409043967,
+STORE, 139766406791168, 139766406934527,
+STORE, 139766406934528, 139766409043967,
+ERASE, 139766406934528, 139766409043967,
+STORE, 139766409031680, 139766409039871,
+STORE, 139766409039872, 139766409043967,
+STORE, 140728740913152, 140728740917247,
+STORE, 140728740900864, 140728740913151,
+STORE, 139766409003008, 139766409031679,
+STORE, 139766408994816, 139766409003007,
+STORE, 139766402994176, 139766406791167,
+SNULL, 139766402994176, 139766404653055,
+STORE, 139766404653056, 139766406791167,
+STORE, 139766402994176, 139766404653055,
+SNULL, 139766406750207, 139766406791167,
+STORE, 139766404653056, 139766406750207,
+STORE, 139766406750208, 139766406791167,
+SNULL, 139766406750208, 139766406774783,
+STORE, 139766406774784, 139766406791167,
+STORE, 139766406750208, 139766406774783,
+ERASE, 139766406750208, 139766406774783,
+STORE, 139766406750208, 139766406774783,
+ERASE, 139766406774784, 139766406791167,
+STORE, 139766406774784, 139766406791167,
+SNULL, 139766406766591, 139766406774783,
+STORE, 139766406750208, 139766406766591,
+STORE, 139766406766592, 139766406774783,
+SNULL, 94764077461503, 94764077465599,
+STORE, 94764077453312, 94764077461503,
+STORE, 94764077461504, 94764077465599,
+SNULL, 139766409035775, 139766409039871,
+STORE, 139766409031680, 139766409035775,
+STORE, 139766409035776, 139766409039871,
+ERASE, 139766409003008, 139766409031679,
+STORE, 94764090458112, 94764090593279,
+STORE, 94758057480192, 94758057590783,
+STORE, 94758059683840, 94758059692031,
+STORE, 94758059692032, 94758059696127,
+STORE, 94758059696128, 94758059704319,
+STORE, 94758083215360, 94758083350527,
+STORE, 139951456772096, 139951458430975,
+STORE, 139951458430976, 139951460528127,
+STORE, 139951460528128, 139951460544511,
+STORE, 139951460544512, 139951460552703,
+STORE, 139951460552704, 139951460569087,
+STORE, 139951460569088, 139951460712447,
+STORE, 139951462772736, 139951462780927,
+STORE, 139951462809600, 139951462813695,
+STORE, 139951462813696, 139951462817791,
+STORE, 139951462817792, 139951462821887,
+STORE, 140734098313216, 140734098452479,
+STORE, 140734098911232, 140734098923519,
+STORE, 140734098923520, 140734098927615,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724904095744, 140737488351231,
+SNULL, 140724904103935, 140737488351231,
+STORE, 140724904095744, 140724904103935,
+STORE, 140724903964672, 140724904103935,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140408497864704, 140408500117503,
+SNULL, 140408498008063, 140408500117503,
+STORE, 140408497864704, 140408498008063,
+STORE, 140408498008064, 140408500117503,
+ERASE, 140408498008064, 140408500117503,
+STORE, 140408500105216, 140408500113407,
+STORE, 140408500113408, 140408500117503,
+STORE, 140724905369600, 140724905373695,
+STORE, 140724905357312, 140724905369599,
+STORE, 140408500076544, 140408500105215,
+STORE, 140408500068352, 140408500076543,
+STORE, 140408494702592, 140408497864703,
+SNULL, 140408494702592, 140408495763455,
+STORE, 140408495763456, 140408497864703,
+STORE, 140408494702592, 140408495763455,
+SNULL, 140408497856511, 140408497864703,
+STORE, 140408495763456, 140408497856511,
+STORE, 140408497856512, 140408497864703,
+ERASE, 140408497856512, 140408497864703,
+STORE, 140408497856512, 140408497864703,
+STORE, 140408490905600, 140408494702591,
+SNULL, 140408490905600, 140408492564479,
+STORE, 140408492564480, 140408494702591,
+STORE, 140408490905600, 140408492564479,
+SNULL, 140408494661631, 140408494702591,
+STORE, 140408492564480, 140408494661631,
+STORE, 140408494661632, 140408494702591,
+SNULL, 140408494661632, 140408494686207,
+STORE, 140408494686208, 140408494702591,
+STORE, 140408494661632, 140408494686207,
+ERASE, 140408494661632, 140408494686207,
+STORE, 140408494661632, 140408494686207,
+ERASE, 140408494686208, 140408494702591,
+STORE, 140408494686208, 140408494702591,
+STORE, 140408500056064, 140408500076543,
+SNULL, 140408494678015, 140408494686207,
+STORE, 140408494661632, 140408494678015,
+STORE, 140408494678016, 140408494686207,
+SNULL, 140408497860607, 140408497864703,
+STORE, 140408497856512, 140408497860607,
+STORE, 140408497860608, 140408497864703,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140408500109311, 140408500113407,
+STORE, 140408500105216, 140408500109311,
+STORE, 140408500109312, 140408500113407,
+ERASE, 140408500076544, 140408500105215,
+STORE, 25235456, 25370623,
+STORE, 25235456, 25518079,
+STORE, 140408498372608, 140408500056063,
+STORE, 94543937388544, 94543937499135,
+STORE, 94543939592192, 94543939600383,
+STORE, 94543939600384, 94543939604479,
+STORE, 94543939604480, 94543939612671,
+STORE, 94543941447680, 94543941582847,
+STORE, 140282621947904, 140282623606783,
+STORE, 140282623606784, 140282625703935,
+STORE, 140282625703936, 140282625720319,
+STORE, 140282625720320, 140282625728511,
+STORE, 140282625728512, 140282625744895,
+STORE, 140282625744896, 140282625888255,
+STORE, 140282627948544, 140282627956735,
+STORE, 140282627985408, 140282627989503,
+STORE, 140282627989504, 140282627993599,
+STORE, 140282627993600, 140282627997695,
+STORE, 140728295723008, 140728295862271,
+STORE, 140728296476672, 140728296488959,
+STORE, 140728296488960, 140728296493055,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431510691839,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431510691839,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140737488338944, 140737488351231,
+STORE, 140736944451584, 140737488351231,
+SNULL, 140736944463871, 140737488351231,
+STORE, 140736944451584, 140736944463871,
+STORE, 140736944320512, 140736944463871,
+STORE, 4194304, 26279935,
+STORE, 28372992, 28454911,
+STORE, 28454912, 29806591,
+STORE, 139693609893888, 139693612146687,
+SNULL, 139693610037247, 139693612146687,
+STORE, 139693609893888, 139693610037247,
+STORE, 139693610037248, 139693612146687,
+ERASE, 139693610037248, 139693612146687,
+STORE, 139693612134400, 139693612142591,
+STORE, 139693612142592, 139693612146687,
+STORE, 140736945152000, 140736945156095,
+STORE, 140736945139712, 140736945151999,
+STORE, 139693612105728, 139693612134399,
+STORE, 139693612097536, 139693612105727,
+STORE, 139693606060032, 139693609893887,
+SNULL, 139693606060032, 139693607768063,
+STORE, 139693607768064, 139693609893887,
+STORE, 139693606060032, 139693607768063,
+SNULL, 139693609861119, 139693609893887,
+STORE, 139693607768064, 139693609861119,
+STORE, 139693609861120, 139693609893887,
+ERASE, 139693609861120, 139693609893887,
+STORE, 139693609861120, 139693609893887,
+STORE, 139693603864576, 139693606060031,
+SNULL, 139693603864576, 139693603958783,
+STORE, 139693603958784, 139693606060031,
+STORE, 139693603864576, 139693603958783,
+SNULL, 139693606051839, 139693606060031,
+STORE, 139693603958784, 139693606051839,
+STORE, 139693606051840, 139693606060031,
+ERASE, 139693606051840, 139693606060031,
+STORE, 139693606051840, 139693606060031,
+STORE, 139693601345536, 139693603864575,
+SNULL, 139693601345536, 139693601759231,
+STORE, 139693601759232, 139693603864575,
+STORE, 139693601345536, 139693601759231,
+SNULL, 139693603852287, 139693603864575,
+STORE, 139693601759232, 139693603852287,
+STORE, 139693603852288, 139693603864575,
+ERASE, 139693603852288, 139693603864575,
+STORE, 139693603852288, 139693603864575,
+STORE, 139693598711808, 139693601345535,
+SNULL, 139693598711808, 139693599240191,
+STORE, 139693599240192, 139693601345535,
+STORE, 139693598711808, 139693599240191,
+SNULL, 139693601337343, 139693601345535,
+STORE, 139693599240192, 139693601337343,
+STORE, 139693601337344, 139693601345535,
+ERASE, 139693601337344, 139693601345535,
+STORE, 139693601337344, 139693601345535,
+STORE, 139693596598272, 139693598711807,
+SNULL, 139693596598272, 139693596610559,
+STORE, 139693596610560, 139693598711807,
+STORE, 139693596598272, 139693596610559,
+SNULL, 139693598703615, 139693598711807,
+STORE, 139693596610560, 139693598703615,
+STORE, 139693598703616, 139693598711807,
+ERASE, 139693598703616, 139693598711807,
+STORE, 139693598703616, 139693598711807,
+STORE, 139693594394624, 139693596598271,
+SNULL, 139693594394624, 139693594497023,
+STORE, 139693594497024, 139693596598271,
+STORE, 139693594394624, 139693594497023,
+SNULL, 139693596590079, 139693596598271,
+STORE, 139693594497024, 139693596590079,
+STORE, 139693596590080, 139693596598271,
+ERASE, 139693596590080, 139693596598271,
+STORE, 139693596590080, 139693596598271,
+STORE, 139693612089344, 139693612105727,
+STORE, 139693591232512, 139693594394623,
+SNULL, 139693591232512, 139693592293375,
+STORE, 139693592293376, 139693594394623,
+STORE, 139693591232512, 139693592293375,
+SNULL, 139693594386431, 139693594394623,
+STORE, 139693592293376, 139693594386431,
+STORE, 139693594386432, 139693594394623,
+ERASE, 139693594386432, 139693594394623,
+STORE, 139693594386432, 139693594394623,
+STORE, 139693587435520, 139693591232511,
+SNULL, 139693587435520, 139693589094399,
+STORE, 139693589094400, 139693591232511,
+STORE, 139693587435520, 139693589094399,
+SNULL, 139693591191551, 139693591232511,
+STORE, 139693589094400, 139693591191551,
+STORE, 139693591191552, 139693591232511,
+SNULL, 139693591191552, 139693591216127,
+STORE, 139693591216128, 139693591232511,
+STORE, 139693591191552, 139693591216127,
+ERASE, 139693591191552, 139693591216127,
+STORE, 139693591191552, 139693591216127,
+ERASE, 139693591216128, 139693591232511,
+STORE, 139693591216128, 139693591232511,
+STORE, 139693612077056, 139693612105727,
+SNULL, 139693591207935, 139693591216127,
+STORE, 139693591191552, 139693591207935,
+STORE, 139693591207936, 139693591216127,
+SNULL, 139693594390527, 139693594394623,
+STORE, 139693594386432, 139693594390527,
+STORE, 139693594390528, 139693594394623,
+SNULL, 139693596594175, 139693596598271,
+STORE, 139693596590080, 139693596594175,
+STORE, 139693596594176, 139693596598271,
+SNULL, 139693598707711, 139693598711807,
+STORE, 139693598703616, 139693598707711,
+STORE, 139693598707712, 139693598711807,
+SNULL, 139693601341439, 139693601345535,
+STORE, 139693601337344, 139693601341439,
+STORE, 139693601341440, 139693601345535,
+SNULL, 139693603860479, 139693603864575,
+STORE, 139693603852288, 139693603860479,
+STORE, 139693603860480, 139693603864575,
+SNULL, 139693606055935, 139693606060031,
+STORE, 139693606051840, 139693606055935,
+STORE, 139693606055936, 139693606060031,
+SNULL, 139693609865215, 139693609893887,
+STORE, 139693609861120, 139693609865215,
+STORE, 139693609865216, 139693609893887,
+SNULL, 28405759, 28454911,
+STORE, 28372992, 28405759,
+STORE, 28405760, 28454911,
+SNULL, 139693612138495, 139693612142591,
+STORE, 139693612134400, 139693612138495,
+STORE, 139693612138496, 139693612142591,
+ERASE, 139693612105728, 139693612134399,
+STORE, 39976960, 40112127,
+STORE, 139693610393600, 139693612077055,
+STORE, 139693612130304, 139693612134399,
+STORE, 139693610258432, 139693610393599,
+STORE, 39976960, 40255487,
+STORE, 139693585338368, 139693587435519,
+STORE, 139693612122112, 139693612134399,
+STORE, 139693612113920, 139693612134399,
+STORE, 139693612077056, 139693612113919,
+STORE, 139693610242048, 139693610393599,
+STORE, 39976960, 40390655,
+STORE, 39976960, 40546303,
+STORE, 139693610233856, 139693610393599,
+STORE, 139693610225664, 139693610393599,
+STORE, 39976960, 40714239,
+STORE, 139693610209280, 139693610393599,
+STORE, 39976960, 40861695,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431528759295,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140729993904128, 140737488351231,
+SNULL, 140729993912319, 140737488351231,
+STORE, 140729993904128, 140729993912319,
+STORE, 140729993773056, 140729993912319,
+STORE, 93926271991808, 93926274215935,
+SNULL, 93926272102399, 93926274215935,
+STORE, 93926271991808, 93926272102399,
+STORE, 93926272102400, 93926274215935,
+ERASE, 93926272102400, 93926274215935,
+STORE, 93926274195456, 93926274207743,
+STORE, 93926274207744, 93926274215935,
+STORE, 139962167296000, 139962169548799,
+SNULL, 139962167439359, 139962169548799,
+STORE, 139962167296000, 139962167439359,
+STORE, 139962167439360, 139962169548799,
+ERASE, 139962167439360, 139962169548799,
+STORE, 139962169536512, 139962169544703,
+STORE, 139962169544704, 139962169548799,
+STORE, 140729995096064, 140729995100159,
+STORE, 140729995083776, 140729995096063,
+STORE, 139962169507840, 139962169536511,
+STORE, 139962169499648, 139962169507839,
+STORE, 139962163499008, 139962167295999,
+SNULL, 139962163499008, 139962165157887,
+STORE, 139962165157888, 139962167295999,
+STORE, 139962163499008, 139962165157887,
+SNULL, 139962167255039, 139962167295999,
+STORE, 139962165157888, 139962167255039,
+STORE, 139962167255040, 139962167295999,
+SNULL, 139962167255040, 139962167279615,
+STORE, 139962167279616, 139962167295999,
+STORE, 139962167255040, 139962167279615,
+ERASE, 139962167255040, 139962167279615,
+STORE, 139962167255040, 139962167279615,
+ERASE, 139962167279616, 139962167295999,
+STORE, 139962167279616, 139962167295999,
+SNULL, 139962167271423, 139962167279615,
+STORE, 139962167255040, 139962167271423,
+STORE, 139962167271424, 139962167279615,
+SNULL, 93926274203647, 93926274207743,
+STORE, 93926274195456, 93926274203647,
+STORE, 93926274203648, 93926274207743,
+SNULL, 139962169540607, 139962169544703,
+STORE, 139962169536512, 139962169540607,
+STORE, 139962169540608, 139962169544703,
+ERASE, 139962169507840, 139962169536511,
+STORE, 93926291120128, 93926291255295,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724960579584, 140737488351231,
+SNULL, 140724960587775, 140737488351231,
+STORE, 140724960579584, 140724960587775,
+STORE, 140724960448512, 140724960587775,
+STORE, 94246489489408, 94246491713535,
+SNULL, 94246489599999, 94246491713535,
+STORE, 94246489489408, 94246489599999,
+STORE, 94246489600000, 94246491713535,
+ERASE, 94246489600000, 94246491713535,
+STORE, 94246491693056, 94246491705343,
+STORE, 94246491705344, 94246491713535,
+STORE, 140098174926848, 140098177179647,
+SNULL, 140098175070207, 140098177179647,
+STORE, 140098174926848, 140098175070207,
+STORE, 140098175070208, 140098177179647,
+ERASE, 140098175070208, 140098177179647,
+STORE, 140098177167360, 140098177175551,
+STORE, 140098177175552, 140098177179647,
+STORE, 140724961439744, 140724961443839,
+STORE, 140724961427456, 140724961439743,
+STORE, 140098177138688, 140098177167359,
+STORE, 140098177130496, 140098177138687,
+STORE, 140098171129856, 140098174926847,
+SNULL, 140098171129856, 140098172788735,
+STORE, 140098172788736, 140098174926847,
+STORE, 140098171129856, 140098172788735,
+SNULL, 140098174885887, 140098174926847,
+STORE, 140098172788736, 140098174885887,
+STORE, 140098174885888, 140098174926847,
+SNULL, 140098174885888, 140098174910463,
+STORE, 140098174910464, 140098174926847,
+STORE, 140098174885888, 140098174910463,
+ERASE, 140098174885888, 140098174910463,
+STORE, 140098174885888, 140098174910463,
+ERASE, 140098174910464, 140098174926847,
+STORE, 140098174910464, 140098174926847,
+SNULL, 140098174902271, 140098174910463,
+STORE, 140098174885888, 140098174902271,
+STORE, 140098174902272, 140098174910463,
+SNULL, 94246491701247, 94246491705343,
+STORE, 94246491693056, 94246491701247,
+STORE, 94246491701248, 94246491705343,
+SNULL, 140098177171455, 140098177175551,
+STORE, 140098177167360, 140098177171455,
+STORE, 140098177171456, 140098177175551,
+ERASE, 140098177138688, 140098177167359,
+STORE, 94246516998144, 94246517133311,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140730522918912, 140737488351231,
+SNULL, 140730522927103, 140737488351231,
+STORE, 140730522918912, 140730522927103,
+STORE, 140730522787840, 140730522927103,
+STORE, 94196043120640, 94196045344767,
+SNULL, 94196043231231, 94196045344767,
+STORE, 94196043120640, 94196043231231,
+STORE, 94196043231232, 94196045344767,
+ERASE, 94196043231232, 94196045344767,
+STORE, 94196045324288, 94196045336575,
+STORE, 94196045336576, 94196045344767,
+STORE, 139815918940160, 139815921192959,
+SNULL, 139815919083519, 139815921192959,
+STORE, 139815918940160, 139815919083519,
+STORE, 139815919083520, 139815921192959,
+ERASE, 139815919083520, 139815921192959,
+STORE, 139815921180672, 139815921188863,
+STORE, 139815921188864, 139815921192959,
+STORE, 140730523344896, 140730523348991,
+STORE, 140730523332608, 140730523344895,
+STORE, 139815921152000, 139815921180671,
+STORE, 139815921143808, 139815921151999,
+STORE, 139815915143168, 139815918940159,
+SNULL, 139815915143168, 139815916802047,
+STORE, 139815916802048, 139815918940159,
+STORE, 139815915143168, 139815916802047,
+SNULL, 139815918899199, 139815918940159,
+STORE, 139815916802048, 139815918899199,
+STORE, 139815918899200, 139815918940159,
+SNULL, 139815918899200, 139815918923775,
+STORE, 139815918923776, 139815918940159,
+STORE, 139815918899200, 139815918923775,
+ERASE, 139815918899200, 139815918923775,
+STORE, 139815918899200, 139815918923775,
+ERASE, 139815918923776, 139815918940159,
+STORE, 139815918923776, 139815918940159,
+SNULL, 139815918915583, 139815918923775,
+STORE, 139815918899200, 139815918915583,
+STORE, 139815918915584, 139815918923775,
+SNULL, 94196045332479, 94196045336575,
+STORE, 94196045324288, 94196045332479,
+STORE, 94196045332480, 94196045336575,
+SNULL, 139815921184767, 139815921188863,
+STORE, 139815921180672, 139815921184767,
+STORE, 139815921184768, 139815921188863,
+ERASE, 139815921152000, 139815921180671,
+STORE, 94196076183552, 94196076318719,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722460393472, 140737488351231,
+SNULL, 140722460401663, 140737488351231,
+STORE, 140722460393472, 140722460401663,
+STORE, 140722460262400, 140722460401663,
+STORE, 94569810399232, 94569812623359,
+SNULL, 94569810509823, 94569812623359,
+STORE, 94569810399232, 94569810509823,
+STORE, 94569810509824, 94569812623359,
+ERASE, 94569810509824, 94569812623359,
+STORE, 94569812602880, 94569812615167,
+STORE, 94569812615168, 94569812623359,
+STORE, 139681565450240, 139681567703039,
+SNULL, 139681565593599, 139681567703039,
+STORE, 139681565450240, 139681565593599,
+STORE, 139681565593600, 139681567703039,
+ERASE, 139681565593600, 139681567703039,
+STORE, 139681567690752, 139681567698943,
+STORE, 139681567698944, 139681567703039,
+STORE, 140722460569600, 140722460573695,
+STORE, 140722460557312, 140722460569599,
+STORE, 139681567662080, 139681567690751,
+STORE, 139681567653888, 139681567662079,
+STORE, 139681561653248, 139681565450239,
+SNULL, 139681561653248, 139681563312127,
+STORE, 139681563312128, 139681565450239,
+STORE, 139681561653248, 139681563312127,
+SNULL, 139681565409279, 139681565450239,
+STORE, 139681563312128, 139681565409279,
+STORE, 139681565409280, 139681565450239,
+SNULL, 139681565409280, 139681565433855,
+STORE, 139681565433856, 139681565450239,
+STORE, 139681565409280, 139681565433855,
+ERASE, 139681565409280, 139681565433855,
+STORE, 139681565409280, 139681565433855,
+ERASE, 139681565433856, 139681565450239,
+STORE, 139681565433856, 139681565450239,
+SNULL, 139681565425663, 139681565433855,
+STORE, 139681565409280, 139681565425663,
+STORE, 139681565425664, 139681565433855,
+SNULL, 94569812611071, 94569812615167,
+STORE, 94569812602880, 94569812611071,
+STORE, 94569812611072, 94569812615167,
+SNULL, 139681567694847, 139681567698943,
+STORE, 139681567690752, 139681567694847,
+STORE, 139681567694848, 139681567698943,
+ERASE, 139681567662080, 139681567690751,
+STORE, 94569818066944, 94569818202111,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431534280703,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725452365824, 140737488351231,
+SNULL, 140725452374015, 140737488351231,
+STORE, 140725452365824, 140725452374015,
+STORE, 140725452234752, 140725452374015,
+STORE, 94395067465728, 94395069689855,
+SNULL, 94395067576319, 94395069689855,
+STORE, 94395067465728, 94395067576319,
+STORE, 94395067576320, 94395069689855,
+ERASE, 94395067576320, 94395069689855,
+STORE, 94395069669376, 94395069681663,
+STORE, 94395069681664, 94395069689855,
+STORE, 140269941211136, 140269943463935,
+SNULL, 140269941354495, 140269943463935,
+STORE, 140269941211136, 140269941354495,
+STORE, 140269941354496, 140269943463935,
+ERASE, 140269941354496, 140269943463935,
+STORE, 140269943451648, 140269943459839,
+STORE, 140269943459840, 140269943463935,
+STORE, 140725452558336, 140725452562431,
+STORE, 140725452546048, 140725452558335,
+STORE, 140269943422976, 140269943451647,
+STORE, 140269943414784, 140269943422975,
+STORE, 140269937414144, 140269941211135,
+SNULL, 140269937414144, 140269939073023,
+STORE, 140269939073024, 140269941211135,
+STORE, 140269937414144, 140269939073023,
+SNULL, 140269941170175, 140269941211135,
+STORE, 140269939073024, 140269941170175,
+STORE, 140269941170176, 140269941211135,
+SNULL, 140269941170176, 140269941194751,
+STORE, 140269941194752, 140269941211135,
+STORE, 140269941170176, 140269941194751,
+ERASE, 140269941170176, 140269941194751,
+STORE, 140269941170176, 140269941194751,
+ERASE, 140269941194752, 140269941211135,
+STORE, 140269941194752, 140269941211135,
+SNULL, 140269941186559, 140269941194751,
+STORE, 140269941170176, 140269941186559,
+STORE, 140269941186560, 140269941194751,
+SNULL, 94395069677567, 94395069681663,
+STORE, 94395069669376, 94395069677567,
+STORE, 94395069677568, 94395069681663,
+SNULL, 140269943455743, 140269943459839,
+STORE, 140269943451648, 140269943455743,
+STORE, 140269943455744, 140269943459839,
+ERASE, 140269943422976, 140269943451647,
+STORE, 94395101691904, 94395101827071,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733860118528, 140737488351231,
+SNULL, 140733860126719, 140737488351231,
+STORE, 140733860118528, 140733860126719,
+STORE, 140733859987456, 140733860126719,
+STORE, 94484752990208, 94484755214335,
+SNULL, 94484753100799, 94484755214335,
+STORE, 94484752990208, 94484753100799,
+STORE, 94484753100800, 94484755214335,
+ERASE, 94484753100800, 94484755214335,
+STORE, 94484755193856, 94484755206143,
+STORE, 94484755206144, 94484755214335,
+STORE, 139958922309632, 139958924562431,
+SNULL, 139958922452991, 139958924562431,
+STORE, 139958922309632, 139958922452991,
+STORE, 139958922452992, 139958924562431,
+ERASE, 139958922452992, 139958924562431,
+STORE, 139958924550144, 139958924558335,
+STORE, 139958924558336, 139958924562431,
+STORE, 140733860253696, 140733860257791,
+STORE, 140733860241408, 140733860253695,
+STORE, 139958924521472, 139958924550143,
+STORE, 139958924513280, 139958924521471,
+STORE, 139958918512640, 139958922309631,
+SNULL, 139958918512640, 139958920171519,
+STORE, 139958920171520, 139958922309631,
+STORE, 139958918512640, 139958920171519,
+SNULL, 139958922268671, 139958922309631,
+STORE, 139958920171520, 139958922268671,
+STORE, 139958922268672, 139958922309631,
+SNULL, 139958922268672, 139958922293247,
+STORE, 139958922293248, 139958922309631,
+STORE, 139958922268672, 139958922293247,
+ERASE, 139958922268672, 139958922293247,
+STORE, 139958922268672, 139958922293247,
+ERASE, 139958922293248, 139958922309631,
+STORE, 139958922293248, 139958922309631,
+SNULL, 139958922285055, 139958922293247,
+STORE, 139958922268672, 139958922285055,
+STORE, 139958922285056, 139958922293247,
+SNULL, 94484755202047, 94484755206143,
+STORE, 94484755193856, 94484755202047,
+STORE, 94484755202048, 94484755206143,
+SNULL, 139958924554239, 139958924558335,
+STORE, 139958924550144, 139958924554239,
+STORE, 139958924554240, 139958924558335,
+ERASE, 139958924521472, 139958924550143,
+STORE, 94484777615360, 94484777750527,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731051036672, 140737488351231,
+SNULL, 140731051044863, 140737488351231,
+STORE, 140731051036672, 140731051044863,
+STORE, 140731050905600, 140731051044863,
+STORE, 93945822998528, 93945825222655,
+SNULL, 93945823109119, 93945825222655,
+STORE, 93945822998528, 93945823109119,
+STORE, 93945823109120, 93945825222655,
+ERASE, 93945823109120, 93945825222655,
+STORE, 93945825202176, 93945825214463,
+STORE, 93945825214464, 93945825222655,
+STORE, 140153503997952, 140153506250751,
+SNULL, 140153504141311, 140153506250751,
+STORE, 140153503997952, 140153504141311,
+STORE, 140153504141312, 140153506250751,
+ERASE, 140153504141312, 140153506250751,
+STORE, 140153506238464, 140153506246655,
+STORE, 140153506246656, 140153506250751,
+STORE, 140731051331584, 140731051335679,
+STORE, 140731051319296, 140731051331583,
+STORE, 140153506209792, 140153506238463,
+STORE, 140153506201600, 140153506209791,
+STORE, 140153500200960, 140153503997951,
+SNULL, 140153500200960, 140153501859839,
+STORE, 140153501859840, 140153503997951,
+STORE, 140153500200960, 140153501859839,
+SNULL, 140153503956991, 140153503997951,
+STORE, 140153501859840, 140153503956991,
+STORE, 140153503956992, 140153503997951,
+SNULL, 140153503956992, 140153503981567,
+STORE, 140153503981568, 140153503997951,
+STORE, 140153503956992, 140153503981567,
+ERASE, 140153503956992, 140153503981567,
+STORE, 140153503956992, 140153503981567,
+ERASE, 140153503981568, 140153503997951,
+STORE, 140153503981568, 140153503997951,
+SNULL, 140153503973375, 140153503981567,
+STORE, 140153503956992, 140153503973375,
+STORE, 140153503973376, 140153503981567,
+SNULL, 93945825210367, 93945825214463,
+STORE, 93945825202176, 93945825210367,
+STORE, 93945825210368, 93945825214463,
+SNULL, 140153506242559, 140153506246655,
+STORE, 140153506238464, 140153506242559,
+STORE, 140153506242560, 140153506246655,
+ERASE, 140153506209792, 140153506238463,
+STORE, 93945854537728, 93945854672895,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431537885183,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140736025325568, 140737488351231,
+SNULL, 140736025333759, 140737488351231,
+STORE, 140736025325568, 140736025333759,
+STORE, 140736025194496, 140736025333759,
+STORE, 94809095172096, 94809097396223,
+SNULL, 94809095282687, 94809097396223,
+STORE, 94809095172096, 94809095282687,
+STORE, 94809095282688, 94809097396223,
+ERASE, 94809095282688, 94809097396223,
+STORE, 94809097375744, 94809097388031,
+STORE, 94809097388032, 94809097396223,
+STORE, 140194992517120, 140194994769919,
+SNULL, 140194992660479, 140194994769919,
+STORE, 140194992517120, 140194992660479,
+STORE, 140194992660480, 140194994769919,
+ERASE, 140194992660480, 140194994769919,
+STORE, 140194994757632, 140194994765823,
+STORE, 140194994765824, 140194994769919,
+STORE, 140736026173440, 140736026177535,
+STORE, 140736026161152, 140736026173439,
+STORE, 140194994728960, 140194994757631,
+STORE, 140194994720768, 140194994728959,
+STORE, 140194988720128, 140194992517119,
+SNULL, 140194988720128, 140194990379007,
+STORE, 140194990379008, 140194992517119,
+STORE, 140194988720128, 140194990379007,
+SNULL, 140194992476159, 140194992517119,
+STORE, 140194990379008, 140194992476159,
+STORE, 140194992476160, 140194992517119,
+SNULL, 140194992476160, 140194992500735,
+STORE, 140194992500736, 140194992517119,
+STORE, 140194992476160, 140194992500735,
+ERASE, 140194992476160, 140194992500735,
+STORE, 140194992476160, 140194992500735,
+ERASE, 140194992500736, 140194992517119,
+STORE, 140194992500736, 140194992517119,
+SNULL, 140194992492543, 140194992500735,
+STORE, 140194992476160, 140194992492543,
+STORE, 140194992492544, 140194992500735,
+SNULL, 94809097383935, 94809097388031,
+STORE, 94809097375744, 94809097383935,
+STORE, 94809097383936, 94809097388031,
+SNULL, 140194994761727, 140194994765823,
+STORE, 140194994757632, 140194994761727,
+STORE, 140194994761728, 140194994765823,
+ERASE, 140194994728960, 140194994757631,
+STORE, 94809124286464, 94809124421631,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140726342660096, 140737488351231,
+SNULL, 140726342668287, 140737488351231,
+STORE, 140726342660096, 140726342668287,
+STORE, 140726342529024, 140726342668287,
+STORE, 94140331462656, 94140333686783,
+SNULL, 94140331573247, 94140333686783,
+STORE, 94140331462656, 94140331573247,
+STORE, 94140331573248, 94140333686783,
+ERASE, 94140331573248, 94140333686783,
+STORE, 94140333666304, 94140333678591,
+STORE, 94140333678592, 94140333686783,
+STORE, 140714077208576, 140714079461375,
+SNULL, 140714077351935, 140714079461375,
+STORE, 140714077208576, 140714077351935,
+STORE, 140714077351936, 140714079461375,
+ERASE, 140714077351936, 140714079461375,
+STORE, 140714079449088, 140714079457279,
+STORE, 140714079457280, 140714079461375,
+STORE, 140726343933952, 140726343938047,
+STORE, 140726343921664, 140726343933951,
+STORE, 140714079420416, 140714079449087,
+STORE, 140714079412224, 140714079420415,
+STORE, 140714073411584, 140714077208575,
+SNULL, 140714073411584, 140714075070463,
+STORE, 140714075070464, 140714077208575,
+STORE, 140714073411584, 140714075070463,
+SNULL, 140714077167615, 140714077208575,
+STORE, 140714075070464, 140714077167615,
+STORE, 140714077167616, 140714077208575,
+SNULL, 140714077167616, 140714077192191,
+STORE, 140714077192192, 140714077208575,
+STORE, 140714077167616, 140714077192191,
+ERASE, 140714077167616, 140714077192191,
+STORE, 140714077167616, 140714077192191,
+ERASE, 140714077192192, 140714077208575,
+STORE, 140714077192192, 140714077208575,
+SNULL, 140714077183999, 140714077192191,
+STORE, 140714077167616, 140714077183999,
+STORE, 140714077184000, 140714077192191,
+SNULL, 94140333674495, 94140333678591,
+STORE, 94140333666304, 94140333674495,
+STORE, 94140333674496, 94140333678591,
+SNULL, 140714079453183, 140714079457279,
+STORE, 140714079449088, 140714079453183,
+STORE, 140714079453184, 140714079457279,
+ERASE, 140714079420416, 140714079449087,
+STORE, 94140341432320, 94140341567487,
+STORE, 94431504838656, 94431505051647,
+STORE, 94431507148800, 94431507152895,
+STORE, 94431507152896, 94431507161087,
+STORE, 94431507161088, 94431507173375,
+STORE, 94431510286336, 94431539601407,
+STORE, 139818797948928, 139818799607807,
+STORE, 139818799607808, 139818801704959,
+STORE, 139818801704960, 139818801721343,
+STORE, 139818801721344, 139818801729535,
+STORE, 139818801729536, 139818801745919,
+STORE, 139818801745920, 139818801758207,
+STORE, 139818801758208, 139818803851263,
+STORE, 139818803851264, 139818803855359,
+STORE, 139818803855360, 139818803859455,
+STORE, 139818803859456, 139818804002815,
+STORE, 139818804371456, 139818806054911,
+STORE, 139818806054912, 139818806071295,
+STORE, 139818806099968, 139818806104063,
+STORE, 139818806104064, 139818806108159,
+STORE, 139818806108160, 139818806112255,
+STORE, 140731430457344, 140731430596607,
+STORE, 140731431227392, 140731431239679,
+STORE, 140731431239680, 140731431243775,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725843607552, 140737488351231,
+SNULL, 140725843615743, 140737488351231,
+STORE, 140725843607552, 140725843615743,
+STORE, 140725843476480, 140725843615743,
+STORE, 94889043505152, 94889045839871,
+SNULL, 94889043718143, 94889045839871,
+STORE, 94889043505152, 94889043718143,
+STORE, 94889043718144, 94889045839871,
+ERASE, 94889043718144, 94889045839871,
+STORE, 94889045815296, 94889045827583,
+STORE, 94889045827584, 94889045839871,
+STORE, 140250965946368, 140250968199167,
+SNULL, 140250966089727, 140250968199167,
+STORE, 140250965946368, 140250966089727,
+STORE, 140250966089728, 140250968199167,
+ERASE, 140250966089728, 140250968199167,
+STORE, 140250968186880, 140250968195071,
+STORE, 140250968195072, 140250968199167,
+STORE, 140725844500480, 140725844504575,
+STORE, 140725844488192, 140725844500479,
+STORE, 140250968158208, 140250968186879,
+STORE, 140250968150016, 140250968158207,
+STORE, 140250963832832, 140250965946367,
+SNULL, 140250963832832, 140250963845119,
+STORE, 140250963845120, 140250965946367,
+STORE, 140250963832832, 140250963845119,
+SNULL, 140250965938175, 140250965946367,
+STORE, 140250963845120, 140250965938175,
+STORE, 140250965938176, 140250965946367,
+ERASE, 140250965938176, 140250965946367,
+STORE, 140250965938176, 140250965946367,
+STORE, 140250960035840, 140250963832831,
+SNULL, 140250960035840, 140250961694719,
+STORE, 140250961694720, 140250963832831,
+STORE, 140250960035840, 140250961694719,
+SNULL, 140250963791871, 140250963832831,
+STORE, 140250961694720, 140250963791871,
+STORE, 140250963791872, 140250963832831,
+SNULL, 140250963791872, 140250963816447,
+STORE, 140250963816448, 140250963832831,
+STORE, 140250963791872, 140250963816447,
+ERASE, 140250963791872, 140250963816447,
+STORE, 140250963791872, 140250963816447,
+ERASE, 140250963816448, 140250963832831,
+STORE, 140250963816448, 140250963832831,
+STORE, 140250968141824, 140250968158207,
+SNULL, 140250963808255, 140250963816447,
+STORE, 140250963791872, 140250963808255,
+STORE, 140250963808256, 140250963816447,
+SNULL, 140250965942271, 140250965946367,
+STORE, 140250965938176, 140250965942271,
+STORE, 140250965942272, 140250965946367,
+SNULL, 94889045819391, 94889045827583,
+STORE, 94889045815296, 94889045819391,
+STORE, 94889045819392, 94889045827583,
+SNULL, 140250968190975, 140250968195071,
+STORE, 140250968186880, 140250968190975,
+STORE, 140250968190976, 140250968195071,
+ERASE, 140250968158208, 140250968186879,
+STORE, 94889052213248, 94889052348415,
+STORE, 140250966458368, 140250968141823,
+STORE, 94889052213248, 94889052483583,
+STORE, 94889052213248, 94889052618751,
+STORE, 94170851819520, 94170852032511,
+STORE, 94170854129664, 94170854133759,
+STORE, 94170854133760, 94170854141951,
+STORE, 94170854141952, 94170854154239,
+STORE, 94170866515968, 94170867740671,
+STORE, 140062030422016, 140062032080895,
+STORE, 140062032080896, 140062034178047,
+STORE, 140062034178048, 140062034194431,
+STORE, 140062034194432, 140062034202623,
+STORE, 140062034202624, 140062034219007,
+STORE, 140062034219008, 140062034231295,
+STORE, 140062034231296, 140062036324351,
+STORE, 140062036324352, 140062036328447,
+STORE, 140062036328448, 140062036332543,
+STORE, 140062036332544, 140062036475903,
+STORE, 140062036844544, 140062038527999,
+STORE, 140062038528000, 140062038544383,
+STORE, 140062038573056, 140062038577151,
+STORE, 140062038577152, 140062038581247,
+STORE, 140062038581248, 140062038585343,
+STORE, 140736210550784, 140736210690047,
+STORE, 140736210759680, 140736210771967,
+STORE, 140736210771968, 140736210776063,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724272365568, 140737488351231,
+SNULL, 140724272373759, 140737488351231,
+STORE, 140724272365568, 140724272373759,
+STORE, 140724272234496, 140724272373759,
+STORE, 94607711965184, 94607714189311,
+SNULL, 94607712075775, 94607714189311,
+STORE, 94607711965184, 94607712075775,
+STORE, 94607712075776, 94607714189311,
+ERASE, 94607712075776, 94607714189311,
+STORE, 94607714168832, 94607714181119,
+STORE, 94607714181120, 94607714189311,
+STORE, 140054949253120, 140054951505919,
+SNULL, 140054949396479, 140054951505919,
+STORE, 140054949253120, 140054949396479,
+STORE, 140054949396480, 140054951505919,
+ERASE, 140054949396480, 140054951505919,
+STORE, 140054951493632, 140054951501823,
+STORE, 140054951501824, 140054951505919,
+STORE, 140724272992256, 140724272996351,
+STORE, 140724272979968, 140724272992255,
+STORE, 140054951464960, 140054951493631,
+STORE, 140054951456768, 140054951464959,
+STORE, 140054945456128, 140054949253119,
+SNULL, 140054945456128, 140054947115007,
+STORE, 140054947115008, 140054949253119,
+STORE, 140054945456128, 140054947115007,
+SNULL, 140054949212159, 140054949253119,
+STORE, 140054947115008, 140054949212159,
+STORE, 140054949212160, 140054949253119,
+SNULL, 140054949212160, 140054949236735,
+STORE, 140054949236736, 140054949253119,
+STORE, 140054949212160, 140054949236735,
+ERASE, 140054949212160, 140054949236735,
+STORE, 140054949212160, 140054949236735,
+ERASE, 140054949236736, 140054949253119,
+STORE, 140054949236736, 140054949253119,
+SNULL, 140054949228543, 140054949236735,
+STORE, 140054949212160, 140054949228543,
+STORE, 140054949228544, 140054949236735,
+SNULL, 94607714177023, 94607714181119,
+STORE, 94607714168832, 94607714177023,
+STORE, 94607714177024, 94607714181119,
+SNULL, 140054951497727, 140054951501823,
+STORE, 140054951493632, 140054951497727,
+STORE, 140054951497728, 140054951501823,
+ERASE, 140054951464960, 140054951493631,
+STORE, 94607733374976, 94607733510143,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733586923520, 140737488351231,
+SNULL, 140733586931711, 140737488351231,
+STORE, 140733586923520, 140733586931711,
+STORE, 140733586792448, 140733586931711,
+STORE, 93901634904064, 93901637128191,
+SNULL, 93901635014655, 93901637128191,
+STORE, 93901634904064, 93901635014655,
+STORE, 93901635014656, 93901637128191,
+ERASE, 93901635014656, 93901637128191,
+STORE, 93901637107712, 93901637119999,
+STORE, 93901637120000, 93901637128191,
+STORE, 140086104784896, 140086107037695,
+SNULL, 140086104928255, 140086107037695,
+STORE, 140086104784896, 140086104928255,
+STORE, 140086104928256, 140086107037695,
+ERASE, 140086104928256, 140086107037695,
+STORE, 140086107025408, 140086107033599,
+STORE, 140086107033600, 140086107037695,
+STORE, 140733587263488, 140733587267583,
+STORE, 140733587251200, 140733587263487,
+STORE, 140086106996736, 140086107025407,
+STORE, 140086106988544, 140086106996735,
+STORE, 140086100987904, 140086104784895,
+SNULL, 140086100987904, 140086102646783,
+STORE, 140086102646784, 140086104784895,
+STORE, 140086100987904, 140086102646783,
+SNULL, 140086104743935, 140086104784895,
+STORE, 140086102646784, 140086104743935,
+STORE, 140086104743936, 140086104784895,
+SNULL, 140086104743936, 140086104768511,
+STORE, 140086104768512, 140086104784895,
+STORE, 140086104743936, 140086104768511,
+ERASE, 140086104743936, 140086104768511,
+STORE, 140086104743936, 140086104768511,
+ERASE, 140086104768512, 140086104784895,
+STORE, 140086104768512, 140086104784895,
+SNULL, 140086104760319, 140086104768511,
+STORE, 140086104743936, 140086104760319,
+STORE, 140086104760320, 140086104768511,
+SNULL, 93901637115903, 93901637119999,
+STORE, 93901637107712, 93901637115903,
+STORE, 93901637115904, 93901637119999,
+SNULL, 140086107029503, 140086107033599,
+STORE, 140086107025408, 140086107029503,
+STORE, 140086107029504, 140086107033599,
+ERASE, 140086106996736, 140086107025407,
+STORE, 93901662715904, 93901662851071,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723365613568, 140737488351231,
+SNULL, 140723365621759, 140737488351231,
+STORE, 140723365613568, 140723365621759,
+STORE, 140723365482496, 140723365621759,
+STORE, 94759193546752, 94759195770879,
+SNULL, 94759193657343, 94759195770879,
+STORE, 94759193546752, 94759193657343,
+STORE, 94759193657344, 94759195770879,
+ERASE, 94759193657344, 94759195770879,
+STORE, 94759195750400, 94759195762687,
+STORE, 94759195762688, 94759195770879,
+STORE, 140607636246528, 140607638499327,
+SNULL, 140607636389887, 140607638499327,
+STORE, 140607636246528, 140607636389887,
+STORE, 140607636389888, 140607638499327,
+ERASE, 140607636389888, 140607638499327,
+STORE, 140607638487040, 140607638495231,
+STORE, 140607638495232, 140607638499327,
+STORE, 140723365900288, 140723365904383,
+STORE, 140723365888000, 140723365900287,
+STORE, 140607638458368, 140607638487039,
+STORE, 140607638450176, 140607638458367,
+STORE, 140607632449536, 140607636246527,
+SNULL, 140607632449536, 140607634108415,
+STORE, 140607634108416, 140607636246527,
+STORE, 140607632449536, 140607634108415,
+SNULL, 140607636205567, 140607636246527,
+STORE, 140607634108416, 140607636205567,
+STORE, 140607636205568, 140607636246527,
+SNULL, 140607636205568, 140607636230143,
+STORE, 140607636230144, 140607636246527,
+STORE, 140607636205568, 140607636230143,
+ERASE, 140607636205568, 140607636230143,
+STORE, 140607636205568, 140607636230143,
+ERASE, 140607636230144, 140607636246527,
+STORE, 140607636230144, 140607636246527,
+SNULL, 140607636221951, 140607636230143,
+STORE, 140607636205568, 140607636221951,
+STORE, 140607636221952, 140607636230143,
+SNULL, 94759195758591, 94759195762687,
+STORE, 94759195750400, 94759195758591,
+STORE, 94759195758592, 94759195762687,
+SNULL, 140607638491135, 140607638495231,
+STORE, 140607638487040, 140607638491135,
+STORE, 140607638491136, 140607638495231,
+ERASE, 140607638458368, 140607638487039,
+STORE, 94759204995072, 94759205130239,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732503789568, 140737488351231,
+SNULL, 140732503797759, 140737488351231,
+STORE, 140732503789568, 140732503797759,
+STORE, 140732503658496, 140732503797759,
+STORE, 94077792956416, 94077795180543,
+SNULL, 94077793067007, 94077795180543,
+STORE, 94077792956416, 94077793067007,
+STORE, 94077793067008, 94077795180543,
+ERASE, 94077793067008, 94077795180543,
+STORE, 94077795160064, 94077795172351,
+STORE, 94077795172352, 94077795180543,
+STORE, 140359874252800, 140359876505599,
+SNULL, 140359874396159, 140359876505599,
+STORE, 140359874252800, 140359874396159,
+STORE, 140359874396160, 140359876505599,
+ERASE, 140359874396160, 140359876505599,
+STORE, 140359876493312, 140359876501503,
+STORE, 140359876501504, 140359876505599,
+STORE, 140732504465408, 140732504469503,
+STORE, 140732504453120, 140732504465407,
+STORE, 140359876464640, 140359876493311,
+STORE, 140359876456448, 140359876464639,
+STORE, 140359870455808, 140359874252799,
+SNULL, 140359870455808, 140359872114687,
+STORE, 140359872114688, 140359874252799,
+STORE, 140359870455808, 140359872114687,
+SNULL, 140359874211839, 140359874252799,
+STORE, 140359872114688, 140359874211839,
+STORE, 140359874211840, 140359874252799,
+SNULL, 140359874211840, 140359874236415,
+STORE, 140359874236416, 140359874252799,
+STORE, 140359874211840, 140359874236415,
+ERASE, 140359874211840, 140359874236415,
+STORE, 140359874211840, 140359874236415,
+ERASE, 140359874236416, 140359874252799,
+STORE, 140359874236416, 140359874252799,
+SNULL, 140359874228223, 140359874236415,
+STORE, 140359874211840, 140359874228223,
+STORE, 140359874228224, 140359874236415,
+SNULL, 94077795168255, 94077795172351,
+STORE, 94077795160064, 94077795168255,
+STORE, 94077795168256, 94077795172351,
+SNULL, 140359876497407, 140359876501503,
+STORE, 140359876493312, 140359876497407,
+STORE, 140359876497408, 140359876501503,
+ERASE, 140359876464640, 140359876493311,
+STORE, 94077808717824, 94077808852991,
+STORE, 94549486252032, 94549486465023,
+STORE, 94549488562176, 94549488566271,
+STORE, 94549488566272, 94549488574463,
+STORE, 94549488574464, 94549488586751,
+STORE, 94549503492096, 94549506121727,
+STORE, 140085800894464, 140085802553343,
+STORE, 140085802553344, 140085804650495,
+STORE, 140085804650496, 140085804666879,
+STORE, 140085804666880, 140085804675071,
+STORE, 140085804675072, 140085804691455,
+STORE, 140085804691456, 140085804703743,
+STORE, 140085804703744, 140085806796799,
+STORE, 140085806796800, 140085806800895,
+STORE, 140085806800896, 140085806804991,
+STORE, 140085806804992, 140085806948351,
+STORE, 140085807316992, 140085809000447,
+STORE, 140085809000448, 140085809016831,
+STORE, 140085809045504, 140085809049599,
+STORE, 140085809049600, 140085809053695,
+STORE, 140085809053696, 140085809057791,
+STORE, 140731810545664, 140731810684927,
+STORE, 140731810967552, 140731810979839,
+STORE, 140731810979840, 140731810983935,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724752330752, 140737488351231,
+SNULL, 140724752338943, 140737488351231,
+STORE, 140724752330752, 140724752338943,
+STORE, 140724752199680, 140724752338943,
+STORE, 94656357539840, 94656359874559,
+SNULL, 94656357752831, 94656359874559,
+STORE, 94656357539840, 94656357752831,
+STORE, 94656357752832, 94656359874559,
+ERASE, 94656357752832, 94656359874559,
+STORE, 94656359849984, 94656359862271,
+STORE, 94656359862272, 94656359874559,
+STORE, 139632585203712, 139632587456511,
+SNULL, 139632585347071, 139632587456511,
+STORE, 139632585203712, 139632585347071,
+STORE, 139632585347072, 139632587456511,
+ERASE, 139632585347072, 139632587456511,
+STORE, 139632587444224, 139632587452415,
+STORE, 139632587452416, 139632587456511,
+STORE, 139632587440128, 139632587444223,
+STORE, 139632587427840, 139632587440127,
+STORE, 139632587399168, 139632587427839,
+STORE, 139632587390976, 139632587399167,
+STORE, 139632583090176, 139632585203711,
+SNULL, 139632583090176, 139632583102463,
+STORE, 139632583102464, 139632585203711,
+STORE, 139632583090176, 139632583102463,
+SNULL, 139632585195519, 139632585203711,
+STORE, 139632583102464, 139632585195519,
+STORE, 139632585195520, 139632585203711,
+ERASE, 139632585195520, 139632585203711,
+STORE, 139632585195520, 139632585203711,
+STORE, 139632579293184, 139632583090175,
+SNULL, 139632579293184, 139632580952063,
+STORE, 139632580952064, 139632583090175,
+STORE, 139632579293184, 139632580952063,
+SNULL, 139632583049215, 139632583090175,
+STORE, 139632580952064, 139632583049215,
+STORE, 139632583049216, 139632583090175,
+SNULL, 139632583049216, 139632583073791,
+STORE, 139632583073792, 139632583090175,
+STORE, 139632583049216, 139632583073791,
+ERASE, 139632583049216, 139632583073791,
+STORE, 139632583049216, 139632583073791,
+ERASE, 139632583073792, 139632583090175,
+STORE, 139632583073792, 139632583090175,
+STORE, 139632587382784, 139632587399167,
+SNULL, 139632583065599, 139632583073791,
+STORE, 139632583049216, 139632583065599,
+STORE, 139632583065600, 139632583073791,
+SNULL, 139632585199615, 139632585203711,
+STORE, 139632585195520, 139632585199615,
+STORE, 139632585199616, 139632585203711,
+SNULL, 94656359854079, 94656359862271,
+STORE, 94656359849984, 94656359854079,
+STORE, 94656359854080, 94656359862271,
+SNULL, 139632587448319, 139632587452415,
+STORE, 139632587444224, 139632587448319,
+STORE, 139632587448320, 139632587452415,
+ERASE, 139632587399168, 139632587427839,
+STORE, 94656378912768, 94656379047935,
+STORE, 139632585699328, 139632587382783,
+STORE, 94656378912768, 94656379183103,
+STORE, 94656378912768, 94656379318271,
+STORE, 94656378912768, 94656379494399,
+SNULL, 94656379469823, 94656379494399,
+STORE, 94656378912768, 94656379469823,
+STORE, 94656379469824, 94656379494399,
+ERASE, 94656379469824, 94656379494399,
+STORE, 94656378912768, 94656379621375,
+STORE, 94656378912768, 94656379756543,
+STORE, 94656378912768, 94656379912191,
+STORE, 94656378912768, 94656380055551,
+STORE, 94656378912768, 94656380190719,
+STORE, 94656378912768, 94656380338175,
+SNULL, 94656380313599, 94656380338175,
+STORE, 94656378912768, 94656380313599,
+STORE, 94656380313600, 94656380338175,
+ERASE, 94656380313600, 94656380338175,
+STORE, 94656378912768, 94656380448767,
+SNULL, 94656380432383, 94656380448767,
+STORE, 94656378912768, 94656380432383,
+STORE, 94656380432384, 94656380448767,
+ERASE, 94656380432384, 94656380448767,
+STORE, 94656378912768, 94656380567551,
+STORE, 94656378912768, 94656380719103,
+STORE, 94656378912768, 94656380858367,
+STORE, 94656378912768, 94656380997631,
+STORE, 94656378912768, 94656381132799,
+SNULL, 94656381124607, 94656381132799,
+STORE, 94656378912768, 94656381124607,
+STORE, 94656381124608, 94656381132799,
+ERASE, 94656381124608, 94656381132799,
+STORE, 94656378912768, 94656381276159,
+STORE, 94656378912768, 94656381427711,
+STORE, 94604087611392, 94604087824383,
+STORE, 94604089921536, 94604089925631,
+STORE, 94604089925632, 94604089933823,
+STORE, 94604089933824, 94604089946111,
+STORE, 94604105125888, 94604106424319,
+STORE, 140454937694208, 140454939353087,
+STORE, 140454939353088, 140454941450239,
+STORE, 140454941450240, 140454941466623,
+STORE, 140454941466624, 140454941474815,
+STORE, 140454941474816, 140454941491199,
+STORE, 140454941491200, 140454941503487,
+STORE, 140454941503488, 140454943596543,
+STORE, 140454943596544, 140454943600639,
+STORE, 140454943600640, 140454943604735,
+STORE, 140454943604736, 140454943748095,
+STORE, 140454944116736, 140454945800191,
+STORE, 140454945800192, 140454945816575,
+STORE, 140454945845248, 140454945849343,
+STORE, 140454945849344, 140454945853439,
+STORE, 140454945853440, 140454945857535,
+STORE, 140728438214656, 140728438353919,
+STORE, 140728439095296, 140728439107583,
+STORE, 140728439107584, 140728439111679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140727821099008, 140737488351231,
+SNULL, 140727821107199, 140737488351231,
+STORE, 140727821099008, 140727821107199,
+STORE, 140727820967936, 140727821107199,
+STORE, 94088457240576, 94088459575295,
+SNULL, 94088457453567, 94088459575295,
+STORE, 94088457240576, 94088457453567,
+STORE, 94088457453568, 94088459575295,
+ERASE, 94088457453568, 94088459575295,
+STORE, 94088459550720, 94088459563007,
+STORE, 94088459563008, 94088459575295,
+STORE, 140234378989568, 140234381242367,
+SNULL, 140234379132927, 140234381242367,
+STORE, 140234378989568, 140234379132927,
+STORE, 140234379132928, 140234381242367,
+ERASE, 140234379132928, 140234381242367,
+STORE, 140234381230080, 140234381238271,
+STORE, 140234381238272, 140234381242367,
+STORE, 140727822077952, 140727822082047,
+STORE, 140727822065664, 140727822077951,
+STORE, 140234381201408, 140234381230079,
+STORE, 140234381193216, 140234381201407,
+STORE, 140234376876032, 140234378989567,
+SNULL, 140234376876032, 140234376888319,
+STORE, 140234376888320, 140234378989567,
+STORE, 140234376876032, 140234376888319,
+SNULL, 140234378981375, 140234378989567,
+STORE, 140234376888320, 140234378981375,
+STORE, 140234378981376, 140234378989567,
+ERASE, 140234378981376, 140234378989567,
+STORE, 140234378981376, 140234378989567,
+STORE, 140234373079040, 140234376876031,
+SNULL, 140234373079040, 140234374737919,
+STORE, 140234374737920, 140234376876031,
+STORE, 140234373079040, 140234374737919,
+SNULL, 140234376835071, 140234376876031,
+STORE, 140234374737920, 140234376835071,
+STORE, 140234376835072, 140234376876031,
+SNULL, 140234376835072, 140234376859647,
+STORE, 140234376859648, 140234376876031,
+STORE, 140234376835072, 140234376859647,
+ERASE, 140234376835072, 140234376859647,
+STORE, 140234376835072, 140234376859647,
+ERASE, 140234376859648, 140234376876031,
+STORE, 140234376859648, 140234376876031,
+STORE, 140234381185024, 140234381201407,
+SNULL, 140234376851455, 140234376859647,
+STORE, 140234376835072, 140234376851455,
+STORE, 140234376851456, 140234376859647,
+SNULL, 140234378985471, 140234378989567,
+STORE, 140234378981376, 140234378985471,
+STORE, 140234378985472, 140234378989567,
+SNULL, 94088459554815, 94088459563007,
+STORE, 94088459550720, 94088459554815,
+STORE, 94088459554816, 94088459563007,
+SNULL, 140234381234175, 140234381238271,
+STORE, 140234381230080, 140234381234175,
+STORE, 140234381234176, 140234381238271,
+ERASE, 140234381201408, 140234381230079,
+STORE, 94088468852736, 94088468987903,
+STORE, 140234379501568, 140234381185023,
+STORE, 94088468852736, 94088469123071,
+STORE, 94088468852736, 94088469258239,
+STORE, 94110050402304, 94110050615295,
+STORE, 94110052712448, 94110052716543,
+STORE, 94110052716544, 94110052724735,
+STORE, 94110052724736, 94110052737023,
+STORE, 94110061875200, 94110062415871,
+STORE, 140139439357952, 140139441016831,
+STORE, 140139441016832, 140139443113983,
+STORE, 140139443113984, 140139443130367,
+STORE, 140139443130368, 140139443138559,
+STORE, 140139443138560, 140139443154943,
+STORE, 140139443154944, 140139443167231,
+STORE, 140139443167232, 140139445260287,
+STORE, 140139445260288, 140139445264383,
+STORE, 140139445264384, 140139445268479,
+STORE, 140139445268480, 140139445411839,
+STORE, 140139445780480, 140139447463935,
+STORE, 140139447463936, 140139447480319,
+STORE, 140139447508992, 140139447513087,
+STORE, 140139447513088, 140139447517183,
+STORE, 140139447517184, 140139447521279,
+STORE, 140731901427712, 140731901566975,
+STORE, 140731902259200, 140731902271487,
+STORE, 140731902271488, 140731902275583,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140727282622464, 140737488351231,
+SNULL, 140727282630655, 140737488351231,
+STORE, 140727282622464, 140727282630655,
+STORE, 140727282491392, 140727282630655,
+STORE, 94266649866240, 94266652200959,
+SNULL, 94266650079231, 94266652200959,
+STORE, 94266649866240, 94266650079231,
+STORE, 94266650079232, 94266652200959,
+ERASE, 94266650079232, 94266652200959,
+STORE, 94266652176384, 94266652188671,
+STORE, 94266652188672, 94266652200959,
+STORE, 139888497991680, 139888500244479,
+SNULL, 139888498135039, 139888500244479,
+STORE, 139888497991680, 139888498135039,
+STORE, 139888498135040, 139888500244479,
+ERASE, 139888498135040, 139888500244479,
+STORE, 139888500232192, 139888500240383,
+STORE, 139888500240384, 139888500244479,
+STORE, 140727283113984, 140727283118079,
+STORE, 140727283101696, 140727283113983,
+STORE, 139888500203520, 139888500232191,
+STORE, 139888500195328, 139888500203519,
+STORE, 139888495878144, 139888497991679,
+SNULL, 139888495878144, 139888495890431,
+STORE, 139888495890432, 139888497991679,
+STORE, 139888495878144, 139888495890431,
+SNULL, 139888497983487, 139888497991679,
+STORE, 139888495890432, 139888497983487,
+STORE, 139888497983488, 139888497991679,
+ERASE, 139888497983488, 139888497991679,
+STORE, 139888497983488, 139888497991679,
+STORE, 139888492081152, 139888495878143,
+SNULL, 139888492081152, 139888493740031,
+STORE, 139888493740032, 139888495878143,
+STORE, 139888492081152, 139888493740031,
+SNULL, 139888495837183, 139888495878143,
+STORE, 139888493740032, 139888495837183,
+STORE, 139888495837184, 139888495878143,
+SNULL, 139888495837184, 139888495861759,
+STORE, 139888495861760, 139888495878143,
+STORE, 139888495837184, 139888495861759,
+ERASE, 139888495837184, 139888495861759,
+STORE, 139888495837184, 139888495861759,
+ERASE, 139888495861760, 139888495878143,
+STORE, 139888495861760, 139888495878143,
+STORE, 139888500187136, 139888500203519,
+SNULL, 139888495853567, 139888495861759,
+STORE, 139888495837184, 139888495853567,
+STORE, 139888495853568, 139888495861759,
+SNULL, 139888497987583, 139888497991679,
+STORE, 139888497983488, 139888497987583,
+STORE, 139888497987584, 139888497991679,
+SNULL, 94266652180479, 94266652188671,
+STORE, 94266652176384, 94266652180479,
+STORE, 94266652180480, 94266652188671,
+SNULL, 139888500236287, 139888500240383,
+STORE, 139888500232192, 139888500236287,
+STORE, 139888500236288, 139888500240383,
+ERASE, 139888500203520, 139888500232191,
+STORE, 94266678542336, 94266678677503,
+STORE, 139888498503680, 139888500187135,
+STORE, 94266678542336, 94266678812671,
+STORE, 94266678542336, 94266678947839,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722507702272, 140737488351231,
+SNULL, 140722507710463, 140737488351231,
+STORE, 140722507702272, 140722507710463,
+STORE, 140722507571200, 140722507710463,
+STORE, 94313981394944, 94313983729663,
+SNULL, 94313981607935, 94313983729663,
+STORE, 94313981394944, 94313981607935,
+STORE, 94313981607936, 94313983729663,
+ERASE, 94313981607936, 94313983729663,
+STORE, 94313983705088, 94313983717375,
+STORE, 94313983717376, 94313983729663,
+STORE, 140456286076928, 140456288329727,
+SNULL, 140456286220287, 140456288329727,
+STORE, 140456286076928, 140456286220287,
+STORE, 140456286220288, 140456288329727,
+ERASE, 140456286220288, 140456288329727,
+STORE, 140456288317440, 140456288325631,
+STORE, 140456288325632, 140456288329727,
+STORE, 140722507997184, 140722508001279,
+STORE, 140722507984896, 140722507997183,
+STORE, 140456288288768, 140456288317439,
+STORE, 140456288280576, 140456288288767,
+STORE, 140456283963392, 140456286076927,
+SNULL, 140456283963392, 140456283975679,
+STORE, 140456283975680, 140456286076927,
+STORE, 140456283963392, 140456283975679,
+SNULL, 140456286068735, 140456286076927,
+STORE, 140456283975680, 140456286068735,
+STORE, 140456286068736, 140456286076927,
+ERASE, 140456286068736, 140456286076927,
+STORE, 140456286068736, 140456286076927,
+STORE, 140456280166400, 140456283963391,
+SNULL, 140456280166400, 140456281825279,
+STORE, 140456281825280, 140456283963391,
+STORE, 140456280166400, 140456281825279,
+SNULL, 140456283922431, 140456283963391,
+STORE, 140456281825280, 140456283922431,
+STORE, 140456283922432, 140456283963391,
+SNULL, 140456283922432, 140456283947007,
+STORE, 140456283947008, 140456283963391,
+STORE, 140456283922432, 140456283947007,
+ERASE, 140456283922432, 140456283947007,
+STORE, 140456283922432, 140456283947007,
+ERASE, 140456283947008, 140456283963391,
+STORE, 140456283947008, 140456283963391,
+STORE, 140456288272384, 140456288288767,
+SNULL, 140456283938815, 140456283947007,
+STORE, 140456283922432, 140456283938815,
+STORE, 140456283938816, 140456283947007,
+SNULL, 140456286072831, 140456286076927,
+STORE, 140456286068736, 140456286072831,
+STORE, 140456286072832, 140456286076927,
+SNULL, 94313983709183, 94313983717375,
+STORE, 94313983705088, 94313983709183,
+STORE, 94313983709184, 94313983717375,
+SNULL, 140456288321535, 140456288325631,
+STORE, 140456288317440, 140456288321535,
+STORE, 140456288321536, 140456288325631,
+ERASE, 140456288288768, 140456288317439,
+STORE, 94314006716416, 94314006851583,
+STORE, 140456286588928, 140456288272383,
+STORE, 94314006716416, 94314006986751,
+STORE, 94314006716416, 94314007121919,
+STORE, 93948644454400, 93948644667391,
+STORE, 93948646764544, 93948646768639,
+STORE, 93948646768640, 93948646776831,
+STORE, 93948646776832, 93948646789119,
+STORE, 93948664999936, 93948667142143,
+STORE, 140187350659072, 140187352317951,
+STORE, 140187352317952, 140187354415103,
+STORE, 140187354415104, 140187354431487,
+STORE, 140187354431488, 140187354439679,
+STORE, 140187354439680, 140187354456063,
+STORE, 140187354456064, 140187354468351,
+STORE, 140187354468352, 140187356561407,
+STORE, 140187356561408, 140187356565503,
+STORE, 140187356565504, 140187356569599,
+STORE, 140187356569600, 140187356712959,
+STORE, 140187357081600, 140187358765055,
+STORE, 140187358765056, 140187358781439,
+STORE, 140187358810112, 140187358814207,
+STORE, 140187358814208, 140187358818303,
+STORE, 140187358818304, 140187358822399,
+STORE, 140730484518912, 140730484658175,
+STORE, 140730485690368, 140730485702655,
+STORE, 140730485702656, 140730485706751,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721211551744, 140737488351231,
+SNULL, 140721211559935, 140737488351231,
+STORE, 140721211551744, 140721211559935,
+STORE, 140721211420672, 140721211559935,
+STORE, 94105221423104, 94105223757823,
+SNULL, 94105221636095, 94105223757823,
+STORE, 94105221423104, 94105221636095,
+STORE, 94105221636096, 94105223757823,
+ERASE, 94105221636096, 94105223757823,
+STORE, 94105223733248, 94105223745535,
+STORE, 94105223745536, 94105223757823,
+STORE, 140474453676032, 140474455928831,
+SNULL, 140474453819391, 140474455928831,
+STORE, 140474453676032, 140474453819391,
+STORE, 140474453819392, 140474455928831,
+ERASE, 140474453819392, 140474455928831,
+STORE, 140474455916544, 140474455924735,
+STORE, 140474455924736, 140474455928831,
+STORE, 140721211703296, 140721211707391,
+STORE, 140721211691008, 140721211703295,
+STORE, 140474455887872, 140474455916543,
+STORE, 140474455879680, 140474455887871,
+STORE, 140474451562496, 140474453676031,
+SNULL, 140474451562496, 140474451574783,
+STORE, 140474451574784, 140474453676031,
+STORE, 140474451562496, 140474451574783,
+SNULL, 140474453667839, 140474453676031,
+STORE, 140474451574784, 140474453667839,
+STORE, 140474453667840, 140474453676031,
+ERASE, 140474453667840, 140474453676031,
+STORE, 140474453667840, 140474453676031,
+STORE, 140474447765504, 140474451562495,
+SNULL, 140474447765504, 140474449424383,
+STORE, 140474449424384, 140474451562495,
+STORE, 140474447765504, 140474449424383,
+SNULL, 140474451521535, 140474451562495,
+STORE, 140474449424384, 140474451521535,
+STORE, 140474451521536, 140474451562495,
+SNULL, 140474451521536, 140474451546111,
+STORE, 140474451546112, 140474451562495,
+STORE, 140474451521536, 140474451546111,
+ERASE, 140474451521536, 140474451546111,
+STORE, 140474451521536, 140474451546111,
+ERASE, 140474451546112, 140474451562495,
+STORE, 140474451546112, 140474451562495,
+STORE, 140474455871488, 140474455887871,
+SNULL, 140474451537919, 140474451546111,
+STORE, 140474451521536, 140474451537919,
+STORE, 140474451537920, 140474451546111,
+SNULL, 140474453671935, 140474453676031,
+STORE, 140474453667840, 140474453671935,
+STORE, 140474453671936, 140474453676031,
+SNULL, 94105223737343, 94105223745535,
+STORE, 94105223733248, 94105223737343,
+STORE, 94105223737344, 94105223745535,
+SNULL, 140474455920639, 140474455924735,
+STORE, 140474455916544, 140474455920639,
+STORE, 140474455920640, 140474455924735,
+ERASE, 140474455887872, 140474455916543,
+STORE, 94105238712320, 94105238847487,
+STORE, 140474454188032, 140474455871487,
+STORE, 94105238712320, 94105238982655,
+STORE, 94105238712320, 94105239117823,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732356354048, 140737488351231,
+SNULL, 140732356362239, 140737488351231,
+STORE, 140732356354048, 140732356362239,
+STORE, 140732356222976, 140732356362239,
+STORE, 94461165989888, 94461168324607,
+SNULL, 94461166202879, 94461168324607,
+STORE, 94461165989888, 94461166202879,
+STORE, 94461166202880, 94461168324607,
+ERASE, 94461166202880, 94461168324607,
+STORE, 94461168300032, 94461168312319,
+STORE, 94461168312320, 94461168324607,
+STORE, 140317255110656, 140317257363455,
+SNULL, 140317255254015, 140317257363455,
+STORE, 140317255110656, 140317255254015,
+STORE, 140317255254016, 140317257363455,
+ERASE, 140317255254016, 140317257363455,
+STORE, 140317257351168, 140317257359359,
+STORE, 140317257359360, 140317257363455,
+STORE, 140732356583424, 140732356587519,
+STORE, 140732356571136, 140732356583423,
+STORE, 140317257322496, 140317257351167,
+STORE, 140317257314304, 140317257322495,
+STORE, 140317252997120, 140317255110655,
+SNULL, 140317252997120, 140317253009407,
+STORE, 140317253009408, 140317255110655,
+STORE, 140317252997120, 140317253009407,
+SNULL, 140317255102463, 140317255110655,
+STORE, 140317253009408, 140317255102463,
+STORE, 140317255102464, 140317255110655,
+ERASE, 140317255102464, 140317255110655,
+STORE, 140317255102464, 140317255110655,
+STORE, 140317249200128, 140317252997119,
+SNULL, 140317249200128, 140317250859007,
+STORE, 140317250859008, 140317252997119,
+STORE, 140317249200128, 140317250859007,
+SNULL, 140317252956159, 140317252997119,
+STORE, 140317250859008, 140317252956159,
+STORE, 140317252956160, 140317252997119,
+SNULL, 140317252956160, 140317252980735,
+STORE, 140317252980736, 140317252997119,
+STORE, 140317252956160, 140317252980735,
+ERASE, 140317252956160, 140317252980735,
+STORE, 140317252956160, 140317252980735,
+ERASE, 140317252980736, 140317252997119,
+STORE, 140317252980736, 140317252997119,
+STORE, 140317257306112, 140317257322495,
+SNULL, 140317252972543, 140317252980735,
+STORE, 140317252956160, 140317252972543,
+STORE, 140317252972544, 140317252980735,
+SNULL, 140317255106559, 140317255110655,
+STORE, 140317255102464, 140317255106559,
+STORE, 140317255106560, 140317255110655,
+SNULL, 94461168304127, 94461168312319,
+STORE, 94461168300032, 94461168304127,
+STORE, 94461168304128, 94461168312319,
+SNULL, 140317257355263, 140317257359359,
+STORE, 140317257351168, 140317257355263,
+STORE, 140317257355264, 140317257359359,
+ERASE, 140317257322496, 140317257351167,
+STORE, 94461195268096, 94461195403263,
+STORE, 140317255622656, 140317257306111,
+STORE, 94461195268096, 94461195538431,
+STORE, 94461195268096, 94461195673599,
+STORE, 94110050402304, 94110050615295,
+STORE, 94110052712448, 94110052716543,
+STORE, 94110052716544, 94110052724735,
+STORE, 94110052724736, 94110052737023,
+STORE, 94110061875200, 94110062415871,
+STORE, 140139439357952, 140139441016831,
+STORE, 140139441016832, 140139443113983,
+STORE, 140139443113984, 140139443130367,
+STORE, 140139443130368, 140139443138559,
+STORE, 140139443138560, 140139443154943,
+STORE, 140139443154944, 140139443167231,
+STORE, 140139443167232, 140139445260287,
+STORE, 140139445260288, 140139445264383,
+STORE, 140139445264384, 140139445268479,
+STORE, 140139445268480, 140139445411839,
+STORE, 140139445780480, 140139447463935,
+STORE, 140139447463936, 140139447480319,
+STORE, 140139447508992, 140139447513087,
+STORE, 140139447513088, 140139447517183,
+STORE, 140139447517184, 140139447521279,
+STORE, 140731901427712, 140731901566975,
+STORE, 140731902259200, 140731902271487,
+STORE, 140731902271488, 140731902275583,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140720941613056, 140737488351231,
+SNULL, 140720941621247, 140737488351231,
+STORE, 140720941613056, 140720941621247,
+STORE, 140720941481984, 140720941621247,
+STORE, 93902377721856, 93902379945983,
+SNULL, 93902377832447, 93902379945983,
+STORE, 93902377721856, 93902377832447,
+STORE, 93902377832448, 93902379945983,
+ERASE, 93902377832448, 93902379945983,
+STORE, 93902379925504, 93902379937791,
+STORE, 93902379937792, 93902379945983,
+STORE, 139836543635456, 139836545888255,
+SNULL, 139836543778815, 139836545888255,
+STORE, 139836543635456, 139836543778815,
+STORE, 139836543778816, 139836545888255,
+ERASE, 139836543778816, 139836545888255,
+STORE, 139836545875968, 139836545884159,
+STORE, 139836545884160, 139836545888255,
+STORE, 140720941711360, 140720941715455,
+STORE, 140720941699072, 140720941711359,
+STORE, 139836545847296, 139836545875967,
+STORE, 139836545839104, 139836545847295,
+STORE, 139836539838464, 139836543635455,
+SNULL, 139836539838464, 139836541497343,
+STORE, 139836541497344, 139836543635455,
+STORE, 139836539838464, 139836541497343,
+SNULL, 139836543594495, 139836543635455,
+STORE, 139836541497344, 139836543594495,
+STORE, 139836543594496, 139836543635455,
+SNULL, 139836543594496, 139836543619071,
+STORE, 139836543619072, 139836543635455,
+STORE, 139836543594496, 139836543619071,
+ERASE, 139836543594496, 139836543619071,
+STORE, 139836543594496, 139836543619071,
+ERASE, 139836543619072, 139836543635455,
+STORE, 139836543619072, 139836543635455,
+SNULL, 139836543610879, 139836543619071,
+STORE, 139836543594496, 139836543610879,
+STORE, 139836543610880, 139836543619071,
+SNULL, 93902379933695, 93902379937791,
+STORE, 93902379925504, 93902379933695,
+STORE, 93902379933696, 93902379937791,
+SNULL, 139836545880063, 139836545884159,
+STORE, 139836545875968, 139836545880063,
+STORE, 139836545880064, 139836545884159,
+ERASE, 139836545847296, 139836545875967,
+STORE, 93902396891136, 93902397026303,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140736538206208, 140737488351231,
+SNULL, 140736538214399, 140737488351231,
+STORE, 140736538206208, 140736538214399,
+STORE, 140736538075136, 140736538214399,
+STORE, 94173471399936, 94173473734655,
+SNULL, 94173471612927, 94173473734655,
+STORE, 94173471399936, 94173471612927,
+STORE, 94173471612928, 94173473734655,
+ERASE, 94173471612928, 94173473734655,
+STORE, 94173473710080, 94173473722367,
+STORE, 94173473722368, 94173473734655,
+STORE, 140035513556992, 140035515809791,
+SNULL, 140035513700351, 140035515809791,
+STORE, 140035513556992, 140035513700351,
+STORE, 140035513700352, 140035515809791,
+ERASE, 140035513700352, 140035515809791,
+STORE, 140035515797504, 140035515805695,
+STORE, 140035515805696, 140035515809791,
+STORE, 140736538329088, 140736538333183,
+STORE, 140736538316800, 140736538329087,
+STORE, 140035515768832, 140035515797503,
+STORE, 140035515760640, 140035515768831,
+STORE, 140035511443456, 140035513556991,
+SNULL, 140035511443456, 140035511455743,
+STORE, 140035511455744, 140035513556991,
+STORE, 140035511443456, 140035511455743,
+SNULL, 140035513548799, 140035513556991,
+STORE, 140035511455744, 140035513548799,
+STORE, 140035513548800, 140035513556991,
+ERASE, 140035513548800, 140035513556991,
+STORE, 140035513548800, 140035513556991,
+STORE, 140035507646464, 140035511443455,
+SNULL, 140035507646464, 140035509305343,
+STORE, 140035509305344, 140035511443455,
+STORE, 140035507646464, 140035509305343,
+SNULL, 140035511402495, 140035511443455,
+STORE, 140035509305344, 140035511402495,
+STORE, 140035511402496, 140035511443455,
+SNULL, 140035511402496, 140035511427071,
+STORE, 140035511427072, 140035511443455,
+STORE, 140035511402496, 140035511427071,
+ERASE, 140035511402496, 140035511427071,
+STORE, 140035511402496, 140035511427071,
+ERASE, 140035511427072, 140035511443455,
+STORE, 140035511427072, 140035511443455,
+STORE, 140035515752448, 140035515768831,
+SNULL, 140035511418879, 140035511427071,
+STORE, 140035511402496, 140035511418879,
+STORE, 140035511418880, 140035511427071,
+SNULL, 140035513552895, 140035513556991,
+STORE, 140035513548800, 140035513552895,
+STORE, 140035513552896, 140035513556991,
+SNULL, 94173473714175, 94173473722367,
+STORE, 94173473710080, 94173473714175,
+STORE, 94173473714176, 94173473722367,
+SNULL, 140035515801599, 140035515805695,
+STORE, 140035515797504, 140035515801599,
+STORE, 140035515801600, 140035515805695,
+ERASE, 140035515768832, 140035515797503,
+STORE, 94173478645760, 94173478780927,
+STORE, 140035514068992, 140035515752447,
+STORE, 94173478645760, 94173478916095,
+STORE, 94173478645760, 94173479051263,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140724216176640, 140737488351231,
+SNULL, 140724216184831, 140737488351231,
+STORE, 140724216176640, 140724216184831,
+STORE, 140724216045568, 140724216184831,
+STORE, 94870930628608, 94870932963327,
+SNULL, 94870930841599, 94870932963327,
+STORE, 94870930628608, 94870930841599,
+STORE, 94870930841600, 94870932963327,
+ERASE, 94870930841600, 94870932963327,
+STORE, 94870932938752, 94870932951039,
+STORE, 94870932951040, 94870932963327,
+STORE, 140453683736576, 140453685989375,
+SNULL, 140453683879935, 140453685989375,
+STORE, 140453683736576, 140453683879935,
+STORE, 140453683879936, 140453685989375,
+ERASE, 140453683879936, 140453685989375,
+STORE, 140453685977088, 140453685985279,
+STORE, 140453685985280, 140453685989375,
+STORE, 140724216832000, 140724216836095,
+STORE, 140724216819712, 140724216831999,
+STORE, 140453685948416, 140453685977087,
+STORE, 140453685940224, 140453685948415,
+STORE, 140453681623040, 140453683736575,
+SNULL, 140453681623040, 140453681635327,
+STORE, 140453681635328, 140453683736575,
+STORE, 140453681623040, 140453681635327,
+SNULL, 140453683728383, 140453683736575,
+STORE, 140453681635328, 140453683728383,
+STORE, 140453683728384, 140453683736575,
+ERASE, 140453683728384, 140453683736575,
+STORE, 140453683728384, 140453683736575,
+STORE, 140453677826048, 140453681623039,
+SNULL, 140453677826048, 140453679484927,
+STORE, 140453679484928, 140453681623039,
+STORE, 140453677826048, 140453679484927,
+SNULL, 140453681582079, 140453681623039,
+STORE, 140453679484928, 140453681582079,
+STORE, 140453681582080, 140453681623039,
+SNULL, 140453681582080, 140453681606655,
+STORE, 140453681606656, 140453681623039,
+STORE, 140453681582080, 140453681606655,
+ERASE, 140453681582080, 140453681606655,
+STORE, 140453681582080, 140453681606655,
+ERASE, 140453681606656, 140453681623039,
+STORE, 140453681606656, 140453681623039,
+STORE, 140453685932032, 140453685948415,
+SNULL, 140453681598463, 140453681606655,
+STORE, 140453681582080, 140453681598463,
+STORE, 140453681598464, 140453681606655,
+SNULL, 140453683732479, 140453683736575,
+STORE, 140453683728384, 140453683732479,
+STORE, 140453683732480, 140453683736575,
+SNULL, 94870932942847, 94870932951039,
+STORE, 94870932938752, 94870932942847,
+STORE, 94870932942848, 94870932951039,
+SNULL, 140453685981183, 140453685985279,
+STORE, 140453685977088, 140453685981183,
+STORE, 140453685981184, 140453685985279,
+ERASE, 140453685948416, 140453685977087,
+STORE, 94870940565504, 94870940700671,
+STORE, 140453684248576, 140453685932031,
+STORE, 94870940565504, 94870940835839,
+STORE, 94870940565504, 94870940971007,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731275661312, 140737488351231,
+SNULL, 140731275669503, 140737488351231,
+STORE, 140731275661312, 140731275669503,
+STORE, 140731275530240, 140731275669503,
+STORE, 94642788548608, 94642790883327,
+SNULL, 94642788761599, 94642790883327,
+STORE, 94642788548608, 94642788761599,
+STORE, 94642788761600, 94642790883327,
+ERASE, 94642788761600, 94642790883327,
+STORE, 94642790858752, 94642790871039,
+STORE, 94642790871040, 94642790883327,
+STORE, 140228458749952, 140228461002751,
+SNULL, 140228458893311, 140228461002751,
+STORE, 140228458749952, 140228458893311,
+STORE, 140228458893312, 140228461002751,
+ERASE, 140228458893312, 140228461002751,
+STORE, 140228460990464, 140228460998655,
+STORE, 140228460998656, 140228461002751,
+STORE, 140731276349440, 140731276353535,
+STORE, 140731276337152, 140731276349439,
+STORE, 140228460961792, 140228460990463,
+STORE, 140228460953600, 140228460961791,
+STORE, 140228456636416, 140228458749951,
+SNULL, 140228456636416, 140228456648703,
+STORE, 140228456648704, 140228458749951,
+STORE, 140228456636416, 140228456648703,
+SNULL, 140228458741759, 140228458749951,
+STORE, 140228456648704, 140228458741759,
+STORE, 140228458741760, 140228458749951,
+ERASE, 140228458741760, 140228458749951,
+STORE, 140228458741760, 140228458749951,
+STORE, 140228452839424, 140228456636415,
+SNULL, 140228452839424, 140228454498303,
+STORE, 140228454498304, 140228456636415,
+STORE, 140228452839424, 140228454498303,
+SNULL, 140228456595455, 140228456636415,
+STORE, 140228454498304, 140228456595455,
+STORE, 140228456595456, 140228456636415,
+SNULL, 140228456595456, 140228456620031,
+STORE, 140228456620032, 140228456636415,
+STORE, 140228456595456, 140228456620031,
+ERASE, 140228456595456, 140228456620031,
+STORE, 140228456595456, 140228456620031,
+ERASE, 140228456620032, 140228456636415,
+STORE, 140228456620032, 140228456636415,
+STORE, 140228460945408, 140228460961791,
+SNULL, 140228456611839, 140228456620031,
+STORE, 140228456595456, 140228456611839,
+STORE, 140228456611840, 140228456620031,
+SNULL, 140228458745855, 140228458749951,
+STORE, 140228458741760, 140228458745855,
+STORE, 140228458745856, 140228458749951,
+SNULL, 94642790862847, 94642790871039,
+STORE, 94642790858752, 94642790862847,
+STORE, 94642790862848, 94642790871039,
+SNULL, 140228460994559, 140228460998655,
+STORE, 140228460990464, 140228460994559,
+STORE, 140228460994560, 140228460998655,
+ERASE, 140228460961792, 140228460990463,
+STORE, 94642801549312, 94642801684479,
+STORE, 140228459261952, 140228460945407,
+STORE, 94642801549312, 94642801819647,
+STORE, 94642801549312, 94642801954815,
+STORE, 94604087611392, 94604087824383,
+STORE, 94604089921536, 94604089925631,
+STORE, 94604089925632, 94604089933823,
+STORE, 94604089933824, 94604089946111,
+STORE, 94604105125888, 94604106424319,
+STORE, 140454937694208, 140454939353087,
+STORE, 140454939353088, 140454941450239,
+STORE, 140454941450240, 140454941466623,
+STORE, 140454941466624, 140454941474815,
+STORE, 140454941474816, 140454941491199,
+STORE, 140454941491200, 140454941503487,
+STORE, 140454941503488, 140454943596543,
+STORE, 140454943596544, 140454943600639,
+STORE, 140454943600640, 140454943604735,
+STORE, 140454943604736, 140454943748095,
+STORE, 140454944116736, 140454945800191,
+STORE, 140454945800192, 140454945816575,
+STORE, 140454945845248, 140454945849343,
+STORE, 140454945849344, 140454945853439,
+STORE, 140454945853440, 140454945857535,
+STORE, 140728438214656, 140728438353919,
+STORE, 140728439095296, 140728439107583,
+STORE, 140728439107584, 140728439111679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721843453952, 140737488351231,
+SNULL, 140721843462143, 140737488351231,
+STORE, 140721843453952, 140721843462143,
+STORE, 140721843322880, 140721843462143,
+STORE, 94465962455040, 94465964789759,
+SNULL, 94465962668031, 94465964789759,
+STORE, 94465962455040, 94465962668031,
+STORE, 94465962668032, 94465964789759,
+ERASE, 94465962668032, 94465964789759,
+STORE, 94465964765184, 94465964777471,
+STORE, 94465964777472, 94465964789759,
+STORE, 139913488314368, 139913490567167,
+SNULL, 139913488457727, 139913490567167,
+STORE, 139913488314368, 139913488457727,
+STORE, 139913488457728, 139913490567167,
+ERASE, 139913488457728, 139913490567167,
+STORE, 139913490554880, 139913490563071,
+STORE, 139913490563072, 139913490567167,
+STORE, 140721843503104, 140721843507199,
+STORE, 140721843490816, 140721843503103,
+STORE, 139913490526208, 139913490554879,
+STORE, 139913490518016, 139913490526207,
+STORE, 139913486200832, 139913488314367,
+SNULL, 139913486200832, 139913486213119,
+STORE, 139913486213120, 139913488314367,
+STORE, 139913486200832, 139913486213119,
+SNULL, 139913488306175, 139913488314367,
+STORE, 139913486213120, 139913488306175,
+STORE, 139913488306176, 139913488314367,
+ERASE, 139913488306176, 139913488314367,
+STORE, 139913488306176, 139913488314367,
+STORE, 139913482403840, 139913486200831,
+SNULL, 139913482403840, 139913484062719,
+STORE, 139913484062720, 139913486200831,
+STORE, 139913482403840, 139913484062719,
+SNULL, 139913486159871, 139913486200831,
+STORE, 139913484062720, 139913486159871,
+STORE, 139913486159872, 139913486200831,
+SNULL, 139913486159872, 139913486184447,
+STORE, 139913486184448, 139913486200831,
+STORE, 139913486159872, 139913486184447,
+ERASE, 139913486159872, 139913486184447,
+STORE, 139913486159872, 139913486184447,
+ERASE, 139913486184448, 139913486200831,
+STORE, 139913486184448, 139913486200831,
+STORE, 139913490509824, 139913490526207,
+SNULL, 139913486176255, 139913486184447,
+STORE, 139913486159872, 139913486176255,
+STORE, 139913486176256, 139913486184447,
+SNULL, 139913488310271, 139913488314367,
+STORE, 139913488306176, 139913488310271,
+STORE, 139913488310272, 139913488314367,
+SNULL, 94465964769279, 94465964777471,
+STORE, 94465964765184, 94465964769279,
+STORE, 94465964769280, 94465964777471,
+SNULL, 139913490558975, 139913490563071,
+STORE, 139913490554880, 139913490558975,
+STORE, 139913490558976, 139913490563071,
+ERASE, 139913490526208, 139913490554879,
+STORE, 94465970024448, 94465970159615,
+STORE, 139913488826368, 139913490509823,
+STORE, 94465970024448, 94465970294783,
+STORE, 94465970024448, 94465970429951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140720583307264, 140737488351231,
+SNULL, 140720583315455, 140737488351231,
+STORE, 140720583307264, 140720583315455,
+STORE, 140720583176192, 140720583315455,
+STORE, 94212322082816, 94212324417535,
+SNULL, 94212322295807, 94212324417535,
+STORE, 94212322082816, 94212322295807,
+STORE, 94212322295808, 94212324417535,
+ERASE, 94212322295808, 94212324417535,
+STORE, 94212324392960, 94212324405247,
+STORE, 94212324405248, 94212324417535,
+STORE, 139659688538112, 139659690790911,
+SNULL, 139659688681471, 139659690790911,
+STORE, 139659688538112, 139659688681471,
+STORE, 139659688681472, 139659690790911,
+ERASE, 139659688681472, 139659690790911,
+STORE, 139659690778624, 139659690786815,
+STORE, 139659690786816, 139659690790911,
+STORE, 140720584781824, 140720584785919,
+STORE, 140720584769536, 140720584781823,
+STORE, 139659690749952, 139659690778623,
+STORE, 139659690741760, 139659690749951,
+STORE, 139659686424576, 139659688538111,
+SNULL, 139659686424576, 139659686436863,
+STORE, 139659686436864, 139659688538111,
+STORE, 139659686424576, 139659686436863,
+SNULL, 139659688529919, 139659688538111,
+STORE, 139659686436864, 139659688529919,
+STORE, 139659688529920, 139659688538111,
+ERASE, 139659688529920, 139659688538111,
+STORE, 139659688529920, 139659688538111,
+STORE, 139659682627584, 139659686424575,
+SNULL, 139659682627584, 139659684286463,
+STORE, 139659684286464, 139659686424575,
+STORE, 139659682627584, 139659684286463,
+SNULL, 139659686383615, 139659686424575,
+STORE, 139659684286464, 139659686383615,
+STORE, 139659686383616, 139659686424575,
+SNULL, 139659686383616, 139659686408191,
+STORE, 139659686408192, 139659686424575,
+STORE, 139659686383616, 139659686408191,
+ERASE, 139659686383616, 139659686408191,
+STORE, 139659686383616, 139659686408191,
+ERASE, 139659686408192, 139659686424575,
+STORE, 139659686408192, 139659686424575,
+STORE, 139659690733568, 139659690749951,
+SNULL, 139659686399999, 139659686408191,
+STORE, 139659686383616, 139659686399999,
+STORE, 139659686400000, 139659686408191,
+SNULL, 139659688534015, 139659688538111,
+STORE, 139659688529920, 139659688534015,
+STORE, 139659688534016, 139659688538111,
+SNULL, 94212324397055, 94212324405247,
+STORE, 94212324392960, 94212324397055,
+STORE, 94212324397056, 94212324405247,
+SNULL, 139659690782719, 139659690786815,
+STORE, 139659690778624, 139659690782719,
+STORE, 139659690782720, 139659690786815,
+ERASE, 139659690749952, 139659690778623,
+STORE, 94212355014656, 94212355149823,
+STORE, 139659689050112, 139659690733567,
+STORE, 94212355014656, 94212355284991,
+STORE, 94212355014656, 94212355420159,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140727689830400, 140737488351231,
+SNULL, 140727689838591, 140737488351231,
+STORE, 140727689830400, 140727689838591,
+STORE, 140727689699328, 140727689838591,
+STORE, 94572390281216, 94572392615935,
+SNULL, 94572390494207, 94572392615935,
+STORE, 94572390281216, 94572390494207,
+STORE, 94572390494208, 94572392615935,
+ERASE, 94572390494208, 94572392615935,
+STORE, 94572392591360, 94572392603647,
+STORE, 94572392603648, 94572392615935,
+STORE, 140575923769344, 140575926022143,
+SNULL, 140575923912703, 140575926022143,
+STORE, 140575923769344, 140575923912703,
+STORE, 140575923912704, 140575926022143,
+ERASE, 140575923912704, 140575926022143,
+STORE, 140575926009856, 140575926018047,
+STORE, 140575926018048, 140575926022143,
+STORE, 140727689871360, 140727689875455,
+STORE, 140727689859072, 140727689871359,
+STORE, 140575925981184, 140575926009855,
+STORE, 140575925972992, 140575925981183,
+STORE, 140575921655808, 140575923769343,
+SNULL, 140575921655808, 140575921668095,
+STORE, 140575921668096, 140575923769343,
+STORE, 140575921655808, 140575921668095,
+SNULL, 140575923761151, 140575923769343,
+STORE, 140575921668096, 140575923761151,
+STORE, 140575923761152, 140575923769343,
+ERASE, 140575923761152, 140575923769343,
+STORE, 140575923761152, 140575923769343,
+STORE, 140575917858816, 140575921655807,
+SNULL, 140575917858816, 140575919517695,
+STORE, 140575919517696, 140575921655807,
+STORE, 140575917858816, 140575919517695,
+SNULL, 140575921614847, 140575921655807,
+STORE, 140575919517696, 140575921614847,
+STORE, 140575921614848, 140575921655807,
+SNULL, 140575921614848, 140575921639423,
+STORE, 140575921639424, 140575921655807,
+STORE, 140575921614848, 140575921639423,
+ERASE, 140575921614848, 140575921639423,
+STORE, 140575921614848, 140575921639423,
+ERASE, 140575921639424, 140575921655807,
+STORE, 140575921639424, 140575921655807,
+STORE, 140575925964800, 140575925981183,
+SNULL, 140575921631231, 140575921639423,
+STORE, 140575921614848, 140575921631231,
+STORE, 140575921631232, 140575921639423,
+SNULL, 140575923765247, 140575923769343,
+STORE, 140575923761152, 140575923765247,
+STORE, 140575923765248, 140575923769343,
+SNULL, 94572392595455, 94572392603647,
+STORE, 94572392591360, 94572392595455,
+STORE, 94572392595456, 94572392603647,
+SNULL, 140575926013951, 140575926018047,
+STORE, 140575926009856, 140575926013951,
+STORE, 140575926013952, 140575926018047,
+ERASE, 140575925981184, 140575926009855,
+STORE, 94572402278400, 94572402413567,
+STORE, 140575924281344, 140575925964799,
+STORE, 94572402278400, 94572402548735,
+STORE, 94572402278400, 94572402683903,
+STORE, 94572402278400, 94572402851839,
+SNULL, 94572402827263, 94572402851839,
+STORE, 94572402278400, 94572402827263,
+STORE, 94572402827264, 94572402851839,
+ERASE, 94572402827264, 94572402851839,
+STORE, 94572402278400, 94572402966527,
+STORE, 94572402278400, 94572403109887,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725520506880, 140737488351231,
+SNULL, 140725520515071, 140737488351231,
+STORE, 140725520506880, 140725520515071,
+STORE, 140725520375808, 140725520515071,
+STORE, 93829948788736, 93829951012863,
+SNULL, 93829948899327, 93829951012863,
+STORE, 93829948788736, 93829948899327,
+STORE, 93829948899328, 93829951012863,
+ERASE, 93829948899328, 93829951012863,
+STORE, 93829950992384, 93829951004671,
+STORE, 93829951004672, 93829951012863,
+STORE, 140133696794624, 140133699047423,
+SNULL, 140133696937983, 140133699047423,
+STORE, 140133696794624, 140133696937983,
+STORE, 140133696937984, 140133699047423,
+ERASE, 140133696937984, 140133699047423,
+STORE, 140133699035136, 140133699043327,
+STORE, 140133699043328, 140133699047423,
+STORE, 140725520875520, 140725520879615,
+STORE, 140725520863232, 140725520875519,
+STORE, 140133699006464, 140133699035135,
+STORE, 140133698998272, 140133699006463,
+STORE, 140133692997632, 140133696794623,
+SNULL, 140133692997632, 140133694656511,
+STORE, 140133694656512, 140133696794623,
+STORE, 140133692997632, 140133694656511,
+SNULL, 140133696753663, 140133696794623,
+STORE, 140133694656512, 140133696753663,
+STORE, 140133696753664, 140133696794623,
+SNULL, 140133696753664, 140133696778239,
+STORE, 140133696778240, 140133696794623,
+STORE, 140133696753664, 140133696778239,
+ERASE, 140133696753664, 140133696778239,
+STORE, 140133696753664, 140133696778239,
+ERASE, 140133696778240, 140133696794623,
+STORE, 140133696778240, 140133696794623,
+SNULL, 140133696770047, 140133696778239,
+STORE, 140133696753664, 140133696770047,
+STORE, 140133696770048, 140133696778239,
+SNULL, 93829951000575, 93829951004671,
+STORE, 93829950992384, 93829951000575,
+STORE, 93829951000576, 93829951004671,
+SNULL, 140133699039231, 140133699043327,
+STORE, 140133699035136, 140133699039231,
+STORE, 140133699039232, 140133699043327,
+ERASE, 140133699006464, 140133699035135,
+STORE, 93829978693632, 93829978828799,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140736118022144, 140737488351231,
+SNULL, 140736118030335, 140737488351231,
+STORE, 140736118022144, 140736118030335,
+STORE, 140736117891072, 140736118030335,
+STORE, 94467663982592, 94467666206719,
+SNULL, 94467664093183, 94467666206719,
+STORE, 94467663982592, 94467664093183,
+STORE, 94467664093184, 94467666206719,
+ERASE, 94467664093184, 94467666206719,
+STORE, 94467666186240, 94467666198527,
+STORE, 94467666198528, 94467666206719,
+STORE, 140525377327104, 140525379579903,
+SNULL, 140525377470463, 140525379579903,
+STORE, 140525377327104, 140525377470463,
+STORE, 140525377470464, 140525379579903,
+ERASE, 140525377470464, 140525379579903,
+STORE, 140525379567616, 140525379575807,
+STORE, 140525379575808, 140525379579903,
+STORE, 140736118771712, 140736118775807,
+STORE, 140736118759424, 140736118771711,
+STORE, 140525379538944, 140525379567615,
+STORE, 140525379530752, 140525379538943,
+STORE, 140525373530112, 140525377327103,
+SNULL, 140525373530112, 140525375188991,
+STORE, 140525375188992, 140525377327103,
+STORE, 140525373530112, 140525375188991,
+SNULL, 140525377286143, 140525377327103,
+STORE, 140525375188992, 140525377286143,
+STORE, 140525377286144, 140525377327103,
+SNULL, 140525377286144, 140525377310719,
+STORE, 140525377310720, 140525377327103,
+STORE, 140525377286144, 140525377310719,
+ERASE, 140525377286144, 140525377310719,
+STORE, 140525377286144, 140525377310719,
+ERASE, 140525377310720, 140525377327103,
+STORE, 140525377310720, 140525377327103,
+SNULL, 140525377302527, 140525377310719,
+STORE, 140525377286144, 140525377302527,
+STORE, 140525377302528, 140525377310719,
+SNULL, 94467666194431, 94467666198527,
+STORE, 94467666186240, 94467666194431,
+STORE, 94467666194432, 94467666198527,
+SNULL, 140525379571711, 140525379575807,
+STORE, 140525379567616, 140525379571711,
+STORE, 140525379571712, 140525379575807,
+ERASE, 140525379538944, 140525379567615,
+STORE, 94467693379584, 94467693514751,
+STORE, 94200172744704, 94200172957695,
+STORE, 94200175054848, 94200175058943,
+STORE, 94200175058944, 94200175067135,
+STORE, 94200175067136, 94200175079423,
+STORE, 94200196673536, 94200198905855,
+STORE, 140053867720704, 140053869379583,
+STORE, 140053869379584, 140053871476735,
+STORE, 140053871476736, 140053871493119,
+STORE, 140053871493120, 140053871501311,
+STORE, 140053871501312, 140053871517695,
+STORE, 140053871517696, 140053871529983,
+STORE, 140053871529984, 140053873623039,
+STORE, 140053873623040, 140053873627135,
+STORE, 140053873627136, 140053873631231,
+STORE, 140053873631232, 140053873774591,
+STORE, 140053874143232, 140053875826687,
+STORE, 140053875826688, 140053875843071,
+STORE, 140053875871744, 140053875875839,
+STORE, 140053875875840, 140053875879935,
+STORE, 140053875879936, 140053875884031,
+STORE, 140728538484736, 140728538623999,
+STORE, 140728538652672, 140728538664959,
+STORE, 140728538664960, 140728538669055,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732307775488, 140737488351231,
+SNULL, 140732307783679, 140737488351231,
+STORE, 140732307775488, 140732307783679,
+STORE, 140732307644416, 140732307783679,
+STORE, 93831417630720, 93831419965439,
+SNULL, 93831417843711, 93831419965439,
+STORE, 93831417630720, 93831417843711,
+STORE, 93831417843712, 93831419965439,
+ERASE, 93831417843712, 93831419965439,
+STORE, 93831419940864, 93831419953151,
+STORE, 93831419953152, 93831419965439,
+STORE, 140241062088704, 140241064341503,
+SNULL, 140241062232063, 140241064341503,
+STORE, 140241062088704, 140241062232063,
+STORE, 140241062232064, 140241064341503,
+ERASE, 140241062232064, 140241064341503,
+STORE, 140241064329216, 140241064337407,
+STORE, 140241064337408, 140241064341503,
+STORE, 140732308140032, 140732308144127,
+STORE, 140732308127744, 140732308140031,
+STORE, 140241064300544, 140241064329215,
+STORE, 140241064292352, 140241064300543,
+STORE, 140241059975168, 140241062088703,
+SNULL, 140241059975168, 140241059987455,
+STORE, 140241059987456, 140241062088703,
+STORE, 140241059975168, 140241059987455,
+SNULL, 140241062080511, 140241062088703,
+STORE, 140241059987456, 140241062080511,
+STORE, 140241062080512, 140241062088703,
+ERASE, 140241062080512, 140241062088703,
+STORE, 140241062080512, 140241062088703,
+STORE, 140241056178176, 140241059975167,
+SNULL, 140241056178176, 140241057837055,
+STORE, 140241057837056, 140241059975167,
+STORE, 140241056178176, 140241057837055,
+SNULL, 140241059934207, 140241059975167,
+STORE, 140241057837056, 140241059934207,
+STORE, 140241059934208, 140241059975167,
+SNULL, 140241059934208, 140241059958783,
+STORE, 140241059958784, 140241059975167,
+STORE, 140241059934208, 140241059958783,
+ERASE, 140241059934208, 140241059958783,
+STORE, 140241059934208, 140241059958783,
+ERASE, 140241059958784, 140241059975167,
+STORE, 140241059958784, 140241059975167,
+STORE, 140241064284160, 140241064300543,
+SNULL, 140241059950591, 140241059958783,
+STORE, 140241059934208, 140241059950591,
+STORE, 140241059950592, 140241059958783,
+SNULL, 140241062084607, 140241062088703,
+STORE, 140241062080512, 140241062084607,
+STORE, 140241062084608, 140241062088703,
+SNULL, 93831419944959, 93831419953151,
+STORE, 93831419940864, 93831419944959,
+STORE, 93831419944960, 93831419953151,
+SNULL, 140241064333311, 140241064337407,
+STORE, 140241064329216, 140241064333311,
+STORE, 140241064333312, 140241064337407,
+ERASE, 140241064300544, 140241064329215,
+STORE, 93831435284480, 93831435419647,
+STORE, 140241062600704, 140241064284159,
+STORE, 93831435284480, 93831435554815,
+STORE, 93831435284480, 93831435689983,
+STORE, 93831435284480, 93831435862015,
+SNULL, 93831435837439, 93831435862015,
+STORE, 93831435284480, 93831435837439,
+STORE, 93831435837440, 93831435862015,
+ERASE, 93831435837440, 93831435862015,
+STORE, 93831435284480, 93831435972607,
+STORE, 93831435284480, 93831436107775,
+SNULL, 93831436091391, 93831436107775,
+STORE, 93831435284480, 93831436091391,
+STORE, 93831436091392, 93831436107775,
+ERASE, 93831436091392, 93831436107775,
+STORE, 93831435284480, 93831436226559,
+STORE, 93831435284480, 93831436361727,
+STORE, 93831435284480, 93831436505087,
+STORE, 93831435284480, 93831436652543,
+STORE, 93831435284480, 93831436787711,
+STORE, 93831435284480, 93831436926975,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140728546775040, 140737488351231,
+SNULL, 140728546783231, 140737488351231,
+STORE, 140728546775040, 140728546783231,
+STORE, 140728546643968, 140728546783231,
+STORE, 94456178786304, 94456181010431,
+SNULL, 94456178896895, 94456181010431,
+STORE, 94456178786304, 94456178896895,
+STORE, 94456178896896, 94456181010431,
+ERASE, 94456178896896, 94456181010431,
+STORE, 94456180989952, 94456181002239,
+STORE, 94456181002240, 94456181010431,
+STORE, 140221893091328, 140221895344127,
+SNULL, 140221893234687, 140221895344127,
+STORE, 140221893091328, 140221893234687,
+STORE, 140221893234688, 140221895344127,
+ERASE, 140221893234688, 140221895344127,
+STORE, 140221895331840, 140221895340031,
+STORE, 140221895340032, 140221895344127,
+STORE, 140728547803136, 140728547807231,
+STORE, 140728547790848, 140728547803135,
+STORE, 140221895303168, 140221895331839,
+STORE, 140221895294976, 140221895303167,
+STORE, 140221889294336, 140221893091327,
+SNULL, 140221889294336, 140221890953215,
+STORE, 140221890953216, 140221893091327,
+STORE, 140221889294336, 140221890953215,
+SNULL, 140221893050367, 140221893091327,
+STORE, 140221890953216, 140221893050367,
+STORE, 140221893050368, 140221893091327,
+SNULL, 140221893050368, 140221893074943,
+STORE, 140221893074944, 140221893091327,
+STORE, 140221893050368, 140221893074943,
+ERASE, 140221893050368, 140221893074943,
+STORE, 140221893050368, 140221893074943,
+ERASE, 140221893074944, 140221893091327,
+STORE, 140221893074944, 140221893091327,
+SNULL, 140221893066751, 140221893074943,
+STORE, 140221893050368, 140221893066751,
+STORE, 140221893066752, 140221893074943,
+SNULL, 94456180998143, 94456181002239,
+STORE, 94456180989952, 94456180998143,
+STORE, 94456180998144, 94456181002239,
+SNULL, 140221895335935, 140221895340031,
+STORE, 140221895331840, 140221895335935,
+STORE, 140221895335936, 140221895340031,
+ERASE, 140221895303168, 140221895331839,
+STORE, 94456203730944, 94456203866111,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140734438637568, 140737488351231,
+SNULL, 140734438645759, 140737488351231,
+STORE, 140734438637568, 140734438645759,
+STORE, 140734438506496, 140734438645759,
+STORE, 94652233351168, 94652235575295,
+SNULL, 94652233461759, 94652235575295,
+STORE, 94652233351168, 94652233461759,
+STORE, 94652233461760, 94652235575295,
+ERASE, 94652233461760, 94652235575295,
+STORE, 94652235554816, 94652235567103,
+STORE, 94652235567104, 94652235575295,
+STORE, 140536493195264, 140536495448063,
+SNULL, 140536493338623, 140536495448063,
+STORE, 140536493195264, 140536493338623,
+STORE, 140536493338624, 140536495448063,
+ERASE, 140536493338624, 140536495448063,
+STORE, 140536495435776, 140536495443967,
+STORE, 140536495443968, 140536495448063,
+STORE, 140734439002112, 140734439006207,
+STORE, 140734438989824, 140734439002111,
+STORE, 140536495407104, 140536495435775,
+STORE, 140536495398912, 140536495407103,
+STORE, 140536489398272, 140536493195263,
+SNULL, 140536489398272, 140536491057151,
+STORE, 140536491057152, 140536493195263,
+STORE, 140536489398272, 140536491057151,
+SNULL, 140536493154303, 140536493195263,
+STORE, 140536491057152, 140536493154303,
+STORE, 140536493154304, 140536493195263,
+SNULL, 140536493154304, 140536493178879,
+STORE, 140536493178880, 140536493195263,
+STORE, 140536493154304, 140536493178879,
+ERASE, 140536493154304, 140536493178879,
+STORE, 140536493154304, 140536493178879,
+ERASE, 140536493178880, 140536493195263,
+STORE, 140536493178880, 140536493195263,
+SNULL, 140536493170687, 140536493178879,
+STORE, 140536493154304, 140536493170687,
+STORE, 140536493170688, 140536493178879,
+SNULL, 94652235563007, 94652235567103,
+STORE, 94652235554816, 94652235563007,
+STORE, 94652235563008, 94652235567103,
+SNULL, 140536495439871, 140536495443967,
+STORE, 140536495435776, 140536495439871,
+STORE, 140536495439872, 140536495443967,
+ERASE, 140536495407104, 140536495435775,
+STORE, 94652265619456, 94652265754623,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140721814200320, 140737488351231,
+SNULL, 140721814208511, 140737488351231,
+STORE, 140721814200320, 140721814208511,
+STORE, 140721814069248, 140721814208511,
+STORE, 94062800691200, 94062802915327,
+SNULL, 94062800801791, 94062802915327,
+STORE, 94062800691200, 94062800801791,
+STORE, 94062800801792, 94062802915327,
+ERASE, 94062800801792, 94062802915327,
+STORE, 94062802894848, 94062802907135,
+STORE, 94062802907136, 94062802915327,
+STORE, 139717739700224, 139717741953023,
+SNULL, 139717739843583, 139717741953023,
+STORE, 139717739700224, 139717739843583,
+STORE, 139717739843584, 139717741953023,
+ERASE, 139717739843584, 139717741953023,
+STORE, 139717741940736, 139717741948927,
+STORE, 139717741948928, 139717741953023,
+STORE, 140721814224896, 140721814228991,
+STORE, 140721814212608, 140721814224895,
+STORE, 139717741912064, 139717741940735,
+STORE, 139717741903872, 139717741912063,
+STORE, 139717735903232, 139717739700223,
+SNULL, 139717735903232, 139717737562111,
+STORE, 139717737562112, 139717739700223,
+STORE, 139717735903232, 139717737562111,
+SNULL, 139717739659263, 139717739700223,
+STORE, 139717737562112, 139717739659263,
+STORE, 139717739659264, 139717739700223,
+SNULL, 139717739659264, 139717739683839,
+STORE, 139717739683840, 139717739700223,
+STORE, 139717739659264, 139717739683839,
+ERASE, 139717739659264, 139717739683839,
+STORE, 139717739659264, 139717739683839,
+ERASE, 139717739683840, 139717739700223,
+STORE, 139717739683840, 139717739700223,
+SNULL, 139717739675647, 139717739683839,
+STORE, 139717739659264, 139717739675647,
+STORE, 139717739675648, 139717739683839,
+SNULL, 94062802903039, 94062802907135,
+STORE, 94062802894848, 94062802903039,
+STORE, 94062802903040, 94062802907135,
+SNULL, 139717741944831, 139717741948927,
+STORE, 139717741940736, 139717741944831,
+STORE, 139717741944832, 139717741948927,
+ERASE, 139717741912064, 139717741940735,
+STORE, 94062814060544, 94062814195711,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723945754624, 140737488351231,
+SNULL, 140723945762815, 140737488351231,
+STORE, 140723945754624, 140723945762815,
+STORE, 140723945623552, 140723945762815,
+STORE, 94886119305216, 94886121639935,
+SNULL, 94886119518207, 94886121639935,
+STORE, 94886119305216, 94886119518207,
+STORE, 94886119518208, 94886121639935,
+ERASE, 94886119518208, 94886121639935,
+STORE, 94886121615360, 94886121627647,
+STORE, 94886121627648, 94886121639935,
+STORE, 140152532131840, 140152534384639,
+SNULL, 140152532275199, 140152534384639,
+STORE, 140152532131840, 140152532275199,
+STORE, 140152532275200, 140152534384639,
+ERASE, 140152532275200, 140152534384639,
+STORE, 140152534372352, 140152534380543,
+STORE, 140152534380544, 140152534384639,
+STORE, 140723946213376, 140723946217471,
+STORE, 140723946201088, 140723946213375,
+STORE, 140152534343680, 140152534372351,
+STORE, 140152534335488, 140152534343679,
+STORE, 140152530018304, 140152532131839,
+SNULL, 140152530018304, 140152530030591,
+STORE, 140152530030592, 140152532131839,
+STORE, 140152530018304, 140152530030591,
+SNULL, 140152532123647, 140152532131839,
+STORE, 140152530030592, 140152532123647,
+STORE, 140152532123648, 140152532131839,
+ERASE, 140152532123648, 140152532131839,
+STORE, 140152532123648, 140152532131839,
+STORE, 140152526221312, 140152530018303,
+SNULL, 140152526221312, 140152527880191,
+STORE, 140152527880192, 140152530018303,
+STORE, 140152526221312, 140152527880191,
+SNULL, 140152529977343, 140152530018303,
+STORE, 140152527880192, 140152529977343,
+STORE, 140152529977344, 140152530018303,
+SNULL, 140152529977344, 140152530001919,
+STORE, 140152530001920, 140152530018303,
+STORE, 140152529977344, 140152530001919,
+ERASE, 140152529977344, 140152530001919,
+STORE, 140152529977344, 140152530001919,
+ERASE, 140152530001920, 140152530018303,
+STORE, 140152530001920, 140152530018303,
+STORE, 140152534327296, 140152534343679,
+SNULL, 140152529993727, 140152530001919,
+STORE, 140152529977344, 140152529993727,
+STORE, 140152529993728, 140152530001919,
+SNULL, 140152532127743, 140152532131839,
+STORE, 140152532123648, 140152532127743,
+STORE, 140152532127744, 140152532131839,
+SNULL, 94886121619455, 94886121627647,
+STORE, 94886121615360, 94886121619455,
+STORE, 94886121619456, 94886121627647,
+SNULL, 140152534376447, 140152534380543,
+STORE, 140152534372352, 140152534376447,
+STORE, 140152534376448, 140152534380543,
+ERASE, 140152534343680, 140152534372351,
+STORE, 94886129770496, 94886129905663,
+STORE, 140152532643840, 140152534327295,
+STORE, 94886129770496, 94886130040831,
+STORE, 94886129770496, 94886130175999,
+STORE, 94886129770496, 94886130348031,
+SNULL, 94886130323455, 94886130348031,
+STORE, 94886129770496, 94886130323455,
+STORE, 94886130323456, 94886130348031,
+ERASE, 94886130323456, 94886130348031,
+STORE, 94886129770496, 94886130458623,
+STORE, 94886129770496, 94886130606079,
+SNULL, 94886130573311, 94886130606079,
+STORE, 94886129770496, 94886130573311,
+STORE, 94886130573312, 94886130606079,
+ERASE, 94886130573312, 94886130606079,
+STORE, 94886129770496, 94886130724863,
+STORE, 94886129770496, 94886130876415,
+STORE, 94886129770496, 94886131023871,
+STORE, 94886129770496, 94886131175423,
+STORE, 94886129770496, 94886131318783,
+STORE, 94886129770496, 94886131453951,
+SNULL, 94886131449855, 94886131453951,
+STORE, 94886129770496, 94886131449855,
+STORE, 94886131449856, 94886131453951,
+ERASE, 94886131449856, 94886131453951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735450779648, 140737488351231,
+SNULL, 140735450787839, 140737488351231,
+STORE, 140735450779648, 140735450787839,
+STORE, 140735450648576, 140735450787839,
+STORE, 93947794079744, 93947796414463,
+SNULL, 93947794292735, 93947796414463,
+STORE, 93947794079744, 93947794292735,
+STORE, 93947794292736, 93947796414463,
+ERASE, 93947794292736, 93947796414463,
+STORE, 93947796389888, 93947796402175,
+STORE, 93947796402176, 93947796414463,
+STORE, 139841993433088, 139841995685887,
+SNULL, 139841993576447, 139841995685887,
+STORE, 139841993433088, 139841993576447,
+STORE, 139841993576448, 139841995685887,
+ERASE, 139841993576448, 139841995685887,
+STORE, 139841995673600, 139841995681791,
+STORE, 139841995681792, 139841995685887,
+STORE, 140735451308032, 140735451312127,
+STORE, 140735451295744, 140735451308031,
+STORE, 139841995644928, 139841995673599,
+STORE, 139841995636736, 139841995644927,
+STORE, 139841991319552, 139841993433087,
+SNULL, 139841991319552, 139841991331839,
+STORE, 139841991331840, 139841993433087,
+STORE, 139841991319552, 139841991331839,
+SNULL, 139841993424895, 139841993433087,
+STORE, 139841991331840, 139841993424895,
+STORE, 139841993424896, 139841993433087,
+ERASE, 139841993424896, 139841993433087,
+STORE, 139841993424896, 139841993433087,
+STORE, 139841987522560, 139841991319551,
+SNULL, 139841987522560, 139841989181439,
+STORE, 139841989181440, 139841991319551,
+STORE, 139841987522560, 139841989181439,
+SNULL, 139841991278591, 139841991319551,
+STORE, 139841989181440, 139841991278591,
+STORE, 139841991278592, 139841991319551,
+SNULL, 139841991278592, 139841991303167,
+STORE, 139841991303168, 139841991319551,
+STORE, 139841991278592, 139841991303167,
+ERASE, 139841991278592, 139841991303167,
+STORE, 139841991278592, 139841991303167,
+ERASE, 139841991303168, 139841991319551,
+STORE, 139841991303168, 139841991319551,
+STORE, 139841995628544, 139841995644927,
+SNULL, 139841991294975, 139841991303167,
+STORE, 139841991278592, 139841991294975,
+STORE, 139841991294976, 139841991303167,
+SNULL, 139841993428991, 139841993433087,
+STORE, 139841993424896, 139841993428991,
+STORE, 139841993428992, 139841993433087,
+SNULL, 93947796393983, 93947796402175,
+STORE, 93947796389888, 93947796393983,
+STORE, 93947796393984, 93947796402175,
+SNULL, 139841995677695, 139841995681791,
+STORE, 139841995673600, 139841995677695,
+STORE, 139841995677696, 139841995681791,
+ERASE, 139841995644928, 139841995673599,
+STORE, 93947829739520, 93947829874687,
+STORE, 139841993945088, 139841995628543,
+STORE, 93947829739520, 93947830009855,
+STORE, 93947829739520, 93947830145023,
+STORE, 94659351814144, 94659352027135,
+STORE, 94659354124288, 94659354128383,
+STORE, 94659354128384, 94659354136575,
+STORE, 94659354136576, 94659354148863,
+STORE, 94659383476224, 94659385057279,
+STORE, 139959054557184, 139959056216063,
+STORE, 139959056216064, 139959058313215,
+STORE, 139959058313216, 139959058329599,
+STORE, 139959058329600, 139959058337791,
+STORE, 139959058337792, 139959058354175,
+STORE, 139959058354176, 139959058366463,
+STORE, 139959058366464, 139959060459519,
+STORE, 139959060459520, 139959060463615,
+STORE, 139959060463616, 139959060467711,
+STORE, 139959060467712, 139959060611071,
+STORE, 139959060979712, 139959062663167,
+STORE, 139959062663168, 139959062679551,
+STORE, 139959062708224, 139959062712319,
+STORE, 139959062712320, 139959062716415,
+STORE, 139959062716416, 139959062720511,
+STORE, 140735532539904, 140735532679167,
+STORE, 140735532830720, 140735532843007,
+STORE, 140735532843008, 140735532847103,
+STORE, 93894361829376, 93894362042367,
+STORE, 93894364139520, 93894364143615,
+STORE, 93894364143616, 93894364151807,
+STORE, 93894364151808, 93894364164095,
+STORE, 93894396944384, 93894397624319,
+STORE, 140075612573696, 140075614232575,
+STORE, 140075614232576, 140075616329727,
+STORE, 140075616329728, 140075616346111,
+STORE, 140075616346112, 140075616354303,
+STORE, 140075616354304, 140075616370687,
+STORE, 140075616370688, 140075616382975,
+STORE, 140075616382976, 140075618476031,
+STORE, 140075618476032, 140075618480127,
+STORE, 140075618480128, 140075618484223,
+STORE, 140075618484224, 140075618627583,
+STORE, 140075618996224, 140075620679679,
+STORE, 140075620679680, 140075620696063,
+STORE, 140075620724736, 140075620728831,
+STORE, 140075620728832, 140075620732927,
+STORE, 140075620732928, 140075620737023,
+STORE, 140720830312448, 140720830451711,
+STORE, 140720830631936, 140720830644223,
+STORE, 140720830644224, 140720830648319,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735116226560, 140737488351231,
+SNULL, 140735116234751, 140737488351231,
+STORE, 140735116226560, 140735116234751,
+STORE, 140735116095488, 140735116234751,
+STORE, 94873398054912, 94873400279039,
+SNULL, 94873398165503, 94873400279039,
+STORE, 94873398054912, 94873398165503,
+STORE, 94873398165504, 94873400279039,
+ERASE, 94873398165504, 94873400279039,
+STORE, 94873400258560, 94873400270847,
+STORE, 94873400270848, 94873400279039,
+STORE, 140303828606976, 140303830859775,
+SNULL, 140303828750335, 140303830859775,
+STORE, 140303828606976, 140303828750335,
+STORE, 140303828750336, 140303830859775,
+ERASE, 140303828750336, 140303830859775,
+STORE, 140303830847488, 140303830855679,
+STORE, 140303830855680, 140303830859775,
+STORE, 140735116251136, 140735116255231,
+STORE, 140735116238848, 140735116251135,
+STORE, 140303830818816, 140303830847487,
+STORE, 140303830810624, 140303830818815,
+STORE, 140303824809984, 140303828606975,
+SNULL, 140303824809984, 140303826468863,
+STORE, 140303826468864, 140303828606975,
+STORE, 140303824809984, 140303826468863,
+SNULL, 140303828566015, 140303828606975,
+STORE, 140303826468864, 140303828566015,
+STORE, 140303828566016, 140303828606975,
+SNULL, 140303828566016, 140303828590591,
+STORE, 140303828590592, 140303828606975,
+STORE, 140303828566016, 140303828590591,
+ERASE, 140303828566016, 140303828590591,
+STORE, 140303828566016, 140303828590591,
+ERASE, 140303828590592, 140303828606975,
+STORE, 140303828590592, 140303828606975,
+SNULL, 140303828582399, 140303828590591,
+STORE, 140303828566016, 140303828582399,
+STORE, 140303828582400, 140303828590591,
+SNULL, 94873400266751, 94873400270847,
+STORE, 94873400258560, 94873400266751,
+STORE, 94873400266752, 94873400270847,
+SNULL, 140303830851583, 140303830855679,
+STORE, 140303830847488, 140303830851583,
+STORE, 140303830851584, 140303830855679,
+ERASE, 140303830818816, 140303830847487,
+STORE, 94873413713920, 94873413849087,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140732349956096, 140737488351231,
+SNULL, 140732349964287, 140737488351231,
+STORE, 140732349956096, 140732349964287,
+STORE, 140732349825024, 140732349964287,
+STORE, 94009652736000, 94009655070719,
+SNULL, 94009652948991, 94009655070719,
+STORE, 94009652736000, 94009652948991,
+STORE, 94009652948992, 94009655070719,
+ERASE, 94009652948992, 94009655070719,
+STORE, 94009655046144, 94009655058431,
+STORE, 94009655058432, 94009655070719,
+STORE, 140295688531968, 140295690784767,
+SNULL, 140295688675327, 140295690784767,
+STORE, 140295688531968, 140295688675327,
+STORE, 140295688675328, 140295690784767,
+ERASE, 140295688675328, 140295690784767,
+STORE, 140295690772480, 140295690780671,
+STORE, 140295690780672, 140295690784767,
+STORE, 140732350005248, 140732350009343,
+STORE, 140732349992960, 140732350005247,
+STORE, 140295690743808, 140295690772479,
+STORE, 140295690735616, 140295690743807,
+STORE, 140295686418432, 140295688531967,
+SNULL, 140295686418432, 140295686430719,
+STORE, 140295686430720, 140295688531967,
+STORE, 140295686418432, 140295686430719,
+SNULL, 140295688523775, 140295688531967,
+STORE, 140295686430720, 140295688523775,
+STORE, 140295688523776, 140295688531967,
+ERASE, 140295688523776, 140295688531967,
+STORE, 140295688523776, 140295688531967,
+STORE, 140295682621440, 140295686418431,
+SNULL, 140295682621440, 140295684280319,
+STORE, 140295684280320, 140295686418431,
+STORE, 140295682621440, 140295684280319,
+SNULL, 140295686377471, 140295686418431,
+STORE, 140295684280320, 140295686377471,
+STORE, 140295686377472, 140295686418431,
+SNULL, 140295686377472, 140295686402047,
+STORE, 140295686402048, 140295686418431,
+STORE, 140295686377472, 140295686402047,
+ERASE, 140295686377472, 140295686402047,
+STORE, 140295686377472, 140295686402047,
+ERASE, 140295686402048, 140295686418431,
+STORE, 140295686402048, 140295686418431,
+STORE, 140295690727424, 140295690743807,
+SNULL, 140295686393855, 140295686402047,
+STORE, 140295686377472, 140295686393855,
+STORE, 140295686393856, 140295686402047,
+SNULL, 140295688527871, 140295688531967,
+STORE, 140295688523776, 140295688527871,
+STORE, 140295688527872, 140295688531967,
+SNULL, 94009655050239, 94009655058431,
+STORE, 94009655046144, 94009655050239,
+STORE, 94009655050240, 94009655058431,
+SNULL, 140295690776575, 140295690780671,
+STORE, 140295690772480, 140295690776575,
+STORE, 140295690776576, 140295690780671,
+ERASE, 140295690743808, 140295690772479,
+STORE, 94009672114176, 94009672249343,
+STORE, 140295689043968, 140295690727423,
+STORE, 94009672114176, 94009672384511,
+STORE, 94009672114176, 94009672519679,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722376515584, 140737488351231,
+SNULL, 140722376523775, 140737488351231,
+STORE, 140722376515584, 140722376523775,
+STORE, 140722376384512, 140722376523775,
+STORE, 94089815773184, 94089818107903,
+SNULL, 94089815986175, 94089818107903,
+STORE, 94089815773184, 94089815986175,
+STORE, 94089815986176, 94089818107903,
+ERASE, 94089815986176, 94089818107903,
+STORE, 94089818083328, 94089818095615,
+STORE, 94089818095616, 94089818107903,
+STORE, 140265595711488, 140265597964287,
+SNULL, 140265595854847, 140265597964287,
+STORE, 140265595711488, 140265595854847,
+STORE, 140265595854848, 140265597964287,
+ERASE, 140265595854848, 140265597964287,
+STORE, 140265597952000, 140265597960191,
+STORE, 140265597960192, 140265597964287,
+STORE, 140722378297344, 140722378301439,
+STORE, 140722378285056, 140722378297343,
+STORE, 140265597923328, 140265597951999,
+STORE, 140265597915136, 140265597923327,
+STORE, 140265593597952, 140265595711487,
+SNULL, 140265593597952, 140265593610239,
+STORE, 140265593610240, 140265595711487,
+STORE, 140265593597952, 140265593610239,
+SNULL, 140265595703295, 140265595711487,
+STORE, 140265593610240, 140265595703295,
+STORE, 140265595703296, 140265595711487,
+ERASE, 140265595703296, 140265595711487,
+STORE, 140265595703296, 140265595711487,
+STORE, 140265589800960, 140265593597951,
+SNULL, 140265589800960, 140265591459839,
+STORE, 140265591459840, 140265593597951,
+STORE, 140265589800960, 140265591459839,
+SNULL, 140265593556991, 140265593597951,
+STORE, 140265591459840, 140265593556991,
+STORE, 140265593556992, 140265593597951,
+SNULL, 140265593556992, 140265593581567,
+STORE, 140265593581568, 140265593597951,
+STORE, 140265593556992, 140265593581567,
+ERASE, 140265593556992, 140265593581567,
+STORE, 140265593556992, 140265593581567,
+ERASE, 140265593581568, 140265593597951,
+STORE, 140265593581568, 140265593597951,
+STORE, 140265597906944, 140265597923327,
+SNULL, 140265593573375, 140265593581567,
+STORE, 140265593556992, 140265593573375,
+STORE, 140265593573376, 140265593581567,
+SNULL, 140265595707391, 140265595711487,
+STORE, 140265595703296, 140265595707391,
+STORE, 140265595707392, 140265595711487,
+SNULL, 94089818087423, 94089818095615,
+STORE, 94089818083328, 94089818087423,
+STORE, 94089818087424, 94089818095615,
+SNULL, 140265597956095, 140265597960191,
+STORE, 140265597952000, 140265597956095,
+STORE, 140265597956096, 140265597960191,
+ERASE, 140265597923328, 140265597951999,
+STORE, 94089837146112, 94089837281279,
+STORE, 140265596223488, 140265597906943,
+STORE, 94089837146112, 94089837416447,
+STORE, 94089837146112, 94089837551615,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735265218560, 140737488351231,
+SNULL, 140735265226751, 140737488351231,
+STORE, 140735265218560, 140735265226751,
+STORE, 140735265087488, 140735265226751,
+STORE, 94250422370304, 94250424705023,
+SNULL, 94250422583295, 94250424705023,
+STORE, 94250422370304, 94250422583295,
+STORE, 94250422583296, 94250424705023,
+ERASE, 94250422583296, 94250424705023,
+STORE, 94250424680448, 94250424692735,
+STORE, 94250424692736, 94250424705023,
+STORE, 140344442474496, 140344444727295,
+SNULL, 140344442617855, 140344444727295,
+STORE, 140344442474496, 140344442617855,
+STORE, 140344442617856, 140344444727295,
+ERASE, 140344442617856, 140344444727295,
+STORE, 140344444715008, 140344444723199,
+STORE, 140344444723200, 140344444727295,
+STORE, 140735265341440, 140735265345535,
+STORE, 140735265329152, 140735265341439,
+STORE, 140344444686336, 140344444715007,
+STORE, 140344444678144, 140344444686335,
+STORE, 140344440360960, 140344442474495,
+SNULL, 140344440360960, 140344440373247,
+STORE, 140344440373248, 140344442474495,
+STORE, 140344440360960, 140344440373247,
+SNULL, 140344442466303, 140344442474495,
+STORE, 140344440373248, 140344442466303,
+STORE, 140344442466304, 140344442474495,
+ERASE, 140344442466304, 140344442474495,
+STORE, 140344442466304, 140344442474495,
+STORE, 140344436563968, 140344440360959,
+SNULL, 140344436563968, 140344438222847,
+STORE, 140344438222848, 140344440360959,
+STORE, 140344436563968, 140344438222847,
+SNULL, 140344440319999, 140344440360959,
+STORE, 140344438222848, 140344440319999,
+STORE, 140344440320000, 140344440360959,
+SNULL, 140344440320000, 140344440344575,
+STORE, 140344440344576, 140344440360959,
+STORE, 140344440320000, 140344440344575,
+ERASE, 140344440320000, 140344440344575,
+STORE, 140344440320000, 140344440344575,
+ERASE, 140344440344576, 140344440360959,
+STORE, 140344440344576, 140344440360959,
+STORE, 140344444669952, 140344444686335,
+SNULL, 140344440336383, 140344440344575,
+STORE, 140344440320000, 140344440336383,
+STORE, 140344440336384, 140344440344575,
+SNULL, 140344442470399, 140344442474495,
+STORE, 140344442466304, 140344442470399,
+STORE, 140344442470400, 140344442474495,
+SNULL, 94250424684543, 94250424692735,
+STORE, 94250424680448, 94250424684543,
+STORE, 94250424684544, 94250424692735,
+SNULL, 140344444719103, 140344444723199,
+STORE, 140344444715008, 140344444719103,
+STORE, 140344444719104, 140344444723199,
+ERASE, 140344444686336, 140344444715007,
+STORE, 94250445512704, 94250445647871,
+STORE, 140344442986496, 140344444669951,
+STORE, 94250445512704, 94250445783039,
+STORE, 94250445512704, 94250445918207,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725762719744, 140737488351231,
+SNULL, 140725762727935, 140737488351231,
+STORE, 140725762719744, 140725762727935,
+STORE, 140725762588672, 140725762727935,
+STORE, 94819009097728, 94819011432447,
+SNULL, 94819009310719, 94819011432447,
+STORE, 94819009097728, 94819009310719,
+STORE, 94819009310720, 94819011432447,
+ERASE, 94819009310720, 94819011432447,
+STORE, 94819011407872, 94819011420159,
+STORE, 94819011420160, 94819011432447,
+STORE, 139987985596416, 139987987849215,
+SNULL, 139987985739775, 139987987849215,
+STORE, 139987985596416, 139987985739775,
+STORE, 139987985739776, 139987987849215,
+ERASE, 139987985739776, 139987987849215,
+STORE, 139987987836928, 139987987845119,
+STORE, 139987987845120, 139987987849215,
+STORE, 140725763072000, 140725763076095,
+STORE, 140725763059712, 140725763071999,
+STORE, 139987987808256, 139987987836927,
+STORE, 139987987800064, 139987987808255,
+STORE, 139987983482880, 139987985596415,
+SNULL, 139987983482880, 139987983495167,
+STORE, 139987983495168, 139987985596415,
+STORE, 139987983482880, 139987983495167,
+SNULL, 139987985588223, 139987985596415,
+STORE, 139987983495168, 139987985588223,
+STORE, 139987985588224, 139987985596415,
+ERASE, 139987985588224, 139987985596415,
+STORE, 139987985588224, 139987985596415,
+STORE, 139987979685888, 139987983482879,
+SNULL, 139987979685888, 139987981344767,
+STORE, 139987981344768, 139987983482879,
+STORE, 139987979685888, 139987981344767,
+SNULL, 139987983441919, 139987983482879,
+STORE, 139987981344768, 139987983441919,
+STORE, 139987983441920, 139987983482879,
+SNULL, 139987983441920, 139987983466495,
+STORE, 139987983466496, 139987983482879,
+STORE, 139987983441920, 139987983466495,
+ERASE, 139987983441920, 139987983466495,
+STORE, 139987983441920, 139987983466495,
+ERASE, 139987983466496, 139987983482879,
+STORE, 139987983466496, 139987983482879,
+STORE, 139987987791872, 139987987808255,
+SNULL, 139987983458303, 139987983466495,
+STORE, 139987983441920, 139987983458303,
+STORE, 139987983458304, 139987983466495,
+SNULL, 139987985592319, 139987985596415,
+STORE, 139987985588224, 139987985592319,
+STORE, 139987985592320, 139987985596415,
+SNULL, 94819011411967, 94819011420159,
+STORE, 94819011407872, 94819011411967,
+STORE, 94819011411968, 94819011420159,
+SNULL, 139987987841023, 139987987845119,
+STORE, 139987987836928, 139987987841023,
+STORE, 139987987841024, 139987987845119,
+ERASE, 139987987808256, 139987987836927,
+STORE, 94819028176896, 94819028312063,
+STORE, 139987986108416, 139987987791871,
+STORE, 94819028176896, 94819028447231,
+STORE, 94819028176896, 94819028582399,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722475413504, 140737488351231,
+SNULL, 140722475421695, 140737488351231,
+STORE, 140722475413504, 140722475421695,
+STORE, 140722475282432, 140722475421695,
+STORE, 94620599119872, 94620601343999,
+SNULL, 94620599230463, 94620601343999,
+STORE, 94620599119872, 94620599230463,
+STORE, 94620599230464, 94620601343999,
+ERASE, 94620599230464, 94620601343999,
+STORE, 94620601323520, 94620601335807,
+STORE, 94620601335808, 94620601343999,
+STORE, 139891763060736, 139891765313535,
+SNULL, 139891763204095, 139891765313535,
+STORE, 139891763060736, 139891763204095,
+STORE, 139891763204096, 139891765313535,
+ERASE, 139891763204096, 139891765313535,
+STORE, 139891765301248, 139891765309439,
+STORE, 139891765309440, 139891765313535,
+STORE, 140722475700224, 140722475704319,
+STORE, 140722475687936, 140722475700223,
+STORE, 139891765272576, 139891765301247,
+STORE, 139891765264384, 139891765272575,
+STORE, 139891759263744, 139891763060735,
+SNULL, 139891759263744, 139891760922623,
+STORE, 139891760922624, 139891763060735,
+STORE, 139891759263744, 139891760922623,
+SNULL, 139891763019775, 139891763060735,
+STORE, 139891760922624, 139891763019775,
+STORE, 139891763019776, 139891763060735,
+SNULL, 139891763019776, 139891763044351,
+STORE, 139891763044352, 139891763060735,
+STORE, 139891763019776, 139891763044351,
+ERASE, 139891763019776, 139891763044351,
+STORE, 139891763019776, 139891763044351,
+ERASE, 139891763044352, 139891763060735,
+STORE, 139891763044352, 139891763060735,
+SNULL, 139891763036159, 139891763044351,
+STORE, 139891763019776, 139891763036159,
+STORE, 139891763036160, 139891763044351,
+SNULL, 94620601331711, 94620601335807,
+STORE, 94620601323520, 94620601331711,
+STORE, 94620601331712, 94620601335807,
+SNULL, 139891765305343, 139891765309439,
+STORE, 139891765301248, 139891765305343,
+STORE, 139891765305344, 139891765309439,
+ERASE, 139891765272576, 139891765301247,
+STORE, 94620610027520, 94620610162687,
+STORE, 94031976210432, 94031976423423,
+STORE, 94031978520576, 94031978524671,
+STORE, 94031978524672, 94031978532863,
+STORE, 94031978532864, 94031978545151,
+STORE, 94031990398976, 94031992565759,
+STORE, 140336240640000, 140336242298879,
+STORE, 140336242298880, 140336244396031,
+STORE, 140336244396032, 140336244412415,
+STORE, 140336244412416, 140336244420607,
+STORE, 140336244420608, 140336244436991,
+STORE, 140336244436992, 140336244449279,
+STORE, 140336244449280, 140336246542335,
+STORE, 140336246542336, 140336246546431,
+STORE, 140336246546432, 140336246550527,
+STORE, 140336246550528, 140336246693887,
+STORE, 140336247062528, 140336248745983,
+STORE, 140336248745984, 140336248762367,
+STORE, 140336248791040, 140336248795135,
+STORE, 140336248795136, 140336248799231,
+STORE, 140336248799232, 140336248803327,
+STORE, 140728500064256, 140728500203519,
+STORE, 140728501501952, 140728501514239,
+STORE, 140728501514240, 140728501518335,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140730503987200, 140737488351231,
+SNULL, 140730503995391, 140737488351231,
+STORE, 140730503987200, 140730503995391,
+STORE, 140730503856128, 140730503995391,
+STORE, 93866544205824, 93866546429951,
+SNULL, 93866544316415, 93866546429951,
+STORE, 93866544205824, 93866544316415,
+STORE, 93866544316416, 93866546429951,
+ERASE, 93866544316416, 93866546429951,
+STORE, 93866546409472, 93866546421759,
+STORE, 93866546421760, 93866546429951,
+STORE, 140216311959552, 140216314212351,
+SNULL, 140216312102911, 140216314212351,
+STORE, 140216311959552, 140216312102911,
+STORE, 140216312102912, 140216314212351,
+ERASE, 140216312102912, 140216314212351,
+STORE, 140216314200064, 140216314208255,
+STORE, 140216314208256, 140216314212351,
+STORE, 140730504626176, 140730504630271,
+STORE, 140730504613888, 140730504626175,
+STORE, 140216314171392, 140216314200063,
+STORE, 140216314163200, 140216314171391,
+STORE, 140216308162560, 140216311959551,
+SNULL, 140216308162560, 140216309821439,
+STORE, 140216309821440, 140216311959551,
+STORE, 140216308162560, 140216309821439,
+SNULL, 140216311918591, 140216311959551,
+STORE, 140216309821440, 140216311918591,
+STORE, 140216311918592, 140216311959551,
+SNULL, 140216311918592, 140216311943167,
+STORE, 140216311943168, 140216311959551,
+STORE, 140216311918592, 140216311943167,
+ERASE, 140216311918592, 140216311943167,
+STORE, 140216311918592, 140216311943167,
+ERASE, 140216311943168, 140216311959551,
+STORE, 140216311943168, 140216311959551,
+SNULL, 140216311934975, 140216311943167,
+STORE, 140216311918592, 140216311934975,
+STORE, 140216311934976, 140216311943167,
+SNULL, 93866546417663, 93866546421759,
+STORE, 93866546409472, 93866546417663,
+STORE, 93866546417664, 93866546421759,
+SNULL, 140216314204159, 140216314208255,
+STORE, 140216314200064, 140216314204159,
+STORE, 140216314204160, 140216314208255,
+ERASE, 140216314171392, 140216314200063,
+STORE, 93866550386688, 93866550521855,
+STORE, 94074292674560, 94074292887551,
+STORE, 94074294984704, 94074294988799,
+STORE, 94074294988800, 94074294996991,
+STORE, 94074294996992, 94074295009279,
+STORE, 94074300219392, 94074301378559,
+STORE, 139781563256832, 139781564915711,
+STORE, 139781564915712, 139781567012863,
+STORE, 139781567012864, 139781567029247,
+STORE, 139781567029248, 139781567037439,
+STORE, 139781567037440, 139781567053823,
+STORE, 139781567053824, 139781567066111,
+STORE, 139781567066112, 139781569159167,
+STORE, 139781569159168, 139781569163263,
+STORE, 139781569163264, 139781569167359,
+STORE, 139781569167360, 139781569310719,
+STORE, 139781569679360, 139781571362815,
+STORE, 139781571362816, 139781571379199,
+STORE, 139781571407872, 139781571411967,
+STORE, 139781571411968, 139781571416063,
+STORE, 139781571416064, 139781571420159,
+STORE, 140723688488960, 140723688628223,
+STORE, 140723689005056, 140723689017343,
+STORE, 140723689017344, 140723689021439,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735189745664, 140737488351231,
+SNULL, 140735189753855, 140737488351231,
+STORE, 140735189745664, 140735189753855,
+STORE, 140735189614592, 140735189753855,
+STORE, 94172072177664, 94172074512383,
+SNULL, 94172072390655, 94172074512383,
+STORE, 94172072177664, 94172072390655,
+STORE, 94172072390656, 94172074512383,
+ERASE, 94172072390656, 94172074512383,
+STORE, 94172074487808, 94172074500095,
+STORE, 94172074500096, 94172074512383,
+STORE, 140687827263488, 140687829516287,
+SNULL, 140687827406847, 140687829516287,
+STORE, 140687827263488, 140687827406847,
+STORE, 140687827406848, 140687829516287,
+ERASE, 140687827406848, 140687829516287,
+STORE, 140687829504000, 140687829512191,
+STORE, 140687829512192, 140687829516287,
+STORE, 140735189766144, 140735189770239,
+STORE, 140735189753856, 140735189766143,
+STORE, 140687829475328, 140687829503999,
+STORE, 140687829467136, 140687829475327,
+STORE, 140687825149952, 140687827263487,
+SNULL, 140687825149952, 140687825162239,
+STORE, 140687825162240, 140687827263487,
+STORE, 140687825149952, 140687825162239,
+SNULL, 140687827255295, 140687827263487,
+STORE, 140687825162240, 140687827255295,
+STORE, 140687827255296, 140687827263487,
+ERASE, 140687827255296, 140687827263487,
+STORE, 140687827255296, 140687827263487,
+STORE, 140687821352960, 140687825149951,
+SNULL, 140687821352960, 140687823011839,
+STORE, 140687823011840, 140687825149951,
+STORE, 140687821352960, 140687823011839,
+SNULL, 140687825108991, 140687825149951,
+STORE, 140687823011840, 140687825108991,
+STORE, 140687825108992, 140687825149951,
+SNULL, 140687825108992, 140687825133567,
+STORE, 140687825133568, 140687825149951,
+STORE, 140687825108992, 140687825133567,
+ERASE, 140687825108992, 140687825133567,
+STORE, 140687825108992, 140687825133567,
+ERASE, 140687825133568, 140687825149951,
+STORE, 140687825133568, 140687825149951,
+STORE, 140687829458944, 140687829475327,
+SNULL, 140687825125375, 140687825133567,
+STORE, 140687825108992, 140687825125375,
+STORE, 140687825125376, 140687825133567,
+SNULL, 140687827259391, 140687827263487,
+STORE, 140687827255296, 140687827259391,
+STORE, 140687827259392, 140687827263487,
+SNULL, 94172074491903, 94172074500095,
+STORE, 94172074487808, 94172074491903,
+STORE, 94172074491904, 94172074500095,
+SNULL, 140687829508095, 140687829512191,
+STORE, 140687829504000, 140687829508095,
+STORE, 140687829508096, 140687829512191,
+ERASE, 140687829475328, 140687829503999,
+STORE, 94172092432384, 94172092567551,
+STORE, 140687827775488, 140687829458943,
+STORE, 94172092432384, 94172092702719,
+STORE, 94172092432384, 94172092837887,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140737229504512, 140737488351231,
+SNULL, 140737229512703, 140737488351231,
+STORE, 140737229504512, 140737229512703,
+STORE, 140737229373440, 140737229512703,
+STORE, 94155246866432, 94155249090559,
+SNULL, 94155246977023, 94155249090559,
+STORE, 94155246866432, 94155246977023,
+STORE, 94155246977024, 94155249090559,
+ERASE, 94155246977024, 94155249090559,
+STORE, 94155249070080, 94155249082367,
+STORE, 94155249082368, 94155249090559,
+STORE, 140640993693696, 140640995946495,
+SNULL, 140640993837055, 140640995946495,
+STORE, 140640993693696, 140640993837055,
+STORE, 140640993837056, 140640995946495,
+ERASE, 140640993837056, 140640995946495,
+STORE, 140640995934208, 140640995942399,
+STORE, 140640995942400, 140640995946495,
+STORE, 140737230004224, 140737230008319,
+STORE, 140737229991936, 140737230004223,
+STORE, 140640995905536, 140640995934207,
+STORE, 140640995897344, 140640995905535,
+STORE, 140640989896704, 140640993693695,
+SNULL, 140640989896704, 140640991555583,
+STORE, 140640991555584, 140640993693695,
+STORE, 140640989896704, 140640991555583,
+SNULL, 140640993652735, 140640993693695,
+STORE, 140640991555584, 140640993652735,
+STORE, 140640993652736, 140640993693695,
+SNULL, 140640993652736, 140640993677311,
+STORE, 140640993677312, 140640993693695,
+STORE, 140640993652736, 140640993677311,
+ERASE, 140640993652736, 140640993677311,
+STORE, 140640993652736, 140640993677311,
+ERASE, 140640993677312, 140640993693695,
+STORE, 140640993677312, 140640993693695,
+SNULL, 140640993669119, 140640993677311,
+STORE, 140640993652736, 140640993669119,
+STORE, 140640993669120, 140640993677311,
+SNULL, 94155249078271, 94155249082367,
+STORE, 94155249070080, 94155249078271,
+STORE, 94155249078272, 94155249082367,
+SNULL, 140640995938303, 140640995942399,
+STORE, 140640995934208, 140640995938303,
+STORE, 140640995938304, 140640995942399,
+ERASE, 140640995905536, 140640995934207,
+STORE, 94155281035264, 94155281170431,
+STORE, 94088066453504, 94088066564095,
+STORE, 94088068657152, 94088068665343,
+STORE, 94088068665344, 94088068669439,
+STORE, 94088068669440, 94088068677631,
+STORE, 94088090214400, 94088090349567,
+STORE, 140503024627712, 140503026286591,
+STORE, 140503026286592, 140503028383743,
+STORE, 140503028383744, 140503028400127,
+STORE, 140503028400128, 140503028408319,
+STORE, 140503028408320, 140503028424703,
+STORE, 140503028424704, 140503028568063,
+STORE, 140503030628352, 140503030636543,
+STORE, 140503030665216, 140503030669311,
+STORE, 140503030669312, 140503030673407,
+STORE, 140503030673408, 140503030677503,
+STORE, 140730894725120, 140730894864383,
+STORE, 140730894880768, 140730894893055,
+STORE, 140730894893056, 140730894897151,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140730434342912, 140737488351231,
+SNULL, 140730434351103, 140737488351231,
+STORE, 140730434342912, 140730434351103,
+STORE, 140730434211840, 140730434351103,
+STORE, 4194304, 5128191,
+STORE, 7221248, 7241727,
+STORE, 7241728, 7249919,
+STORE, 140109041938432, 140109044191231,
+SNULL, 140109042081791, 140109044191231,
+STORE, 140109041938432, 140109042081791,
+STORE, 140109042081792, 140109044191231,
+ERASE, 140109042081792, 140109044191231,
+STORE, 140109044178944, 140109044187135,
+STORE, 140109044187136, 140109044191231,
+STORE, 140730434850816, 140730434854911,
+STORE, 140730434838528, 140730434850815,
+STORE, 140109044150272, 140109044178943,
+STORE, 140109044142080, 140109044150271,
+STORE, 140109038776320, 140109041938431,
+SNULL, 140109038776320, 140109039837183,
+STORE, 140109039837184, 140109041938431,
+STORE, 140109038776320, 140109039837183,
+SNULL, 140109041930239, 140109041938431,
+STORE, 140109039837184, 140109041930239,
+STORE, 140109041930240, 140109041938431,
+ERASE, 140109041930240, 140109041938431,
+STORE, 140109041930240, 140109041938431,
+STORE, 140109034979328, 140109038776319,
+SNULL, 140109034979328, 140109036638207,
+STORE, 140109036638208, 140109038776319,
+STORE, 140109034979328, 140109036638207,
+SNULL, 140109038735359, 140109038776319,
+STORE, 140109036638208, 140109038735359,
+STORE, 140109038735360, 140109038776319,
+SNULL, 140109038735360, 140109038759935,
+STORE, 140109038759936, 140109038776319,
+STORE, 140109038735360, 140109038759935,
+ERASE, 140109038735360, 140109038759935,
+STORE, 140109038735360, 140109038759935,
+ERASE, 140109038759936, 140109038776319,
+STORE, 140109038759936, 140109038776319,
+STORE, 140109044129792, 140109044150271,
+SNULL, 140109038751743, 140109038759935,
+STORE, 140109038735360, 140109038751743,
+STORE, 140109038751744, 140109038759935,
+SNULL, 140109041934335, 140109041938431,
+STORE, 140109041930240, 140109041934335,
+STORE, 140109041934336, 140109041938431,
+SNULL, 7233535, 7241727,
+STORE, 7221248, 7233535,
+STORE, 7233536, 7241727,
+SNULL, 140109044183039, 140109044187135,
+STORE, 140109044178944, 140109044183039,
+STORE, 140109044183040, 140109044187135,
+ERASE, 140109044150272, 140109044178943,
+STORE, 20000768, 20135935,
+STORE, 20000768, 20283391,
+STORE, 140109042446336, 140109044129791,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140730853408768, 140737488351231,
+SNULL, 140730853416959, 140737488351231,
+STORE, 140730853408768, 140730853416959,
+STORE, 140730853277696, 140730853416959,
+STORE, 94865902977024, 94865905311743,
+SNULL, 94865903190015, 94865905311743,
+STORE, 94865902977024, 94865903190015,
+STORE, 94865903190016, 94865905311743,
+ERASE, 94865903190016, 94865905311743,
+STORE, 94865905287168, 94865905299455,
+STORE, 94865905299456, 94865905311743,
+STORE, 139768865738752, 139768867991551,
+SNULL, 139768865882111, 139768867991551,
+STORE, 139768865738752, 139768865882111,
+STORE, 139768865882112, 139768867991551,
+ERASE, 139768865882112, 139768867991551,
+STORE, 139768867979264, 139768867987455,
+STORE, 139768867987456, 139768867991551,
+STORE, 140730853957632, 140730853961727,
+STORE, 140730853945344, 140730853957631,
+STORE, 139768867950592, 139768867979263,
+STORE, 139768867942400, 139768867950591,
+STORE, 139768863625216, 139768865738751,
+SNULL, 139768863625216, 139768863637503,
+STORE, 139768863637504, 139768865738751,
+STORE, 139768863625216, 139768863637503,
+SNULL, 139768865730559, 139768865738751,
+STORE, 139768863637504, 139768865730559,
+STORE, 139768865730560, 139768865738751,
+ERASE, 139768865730560, 139768865738751,
+STORE, 139768865730560, 139768865738751,
+STORE, 139768859828224, 139768863625215,
+SNULL, 139768859828224, 139768861487103,
+STORE, 139768861487104, 139768863625215,
+STORE, 139768859828224, 139768861487103,
+SNULL, 139768863584255, 139768863625215,
+STORE, 139768861487104, 139768863584255,
+STORE, 139768863584256, 139768863625215,
+SNULL, 139768863584256, 139768863608831,
+STORE, 139768863608832, 139768863625215,
+STORE, 139768863584256, 139768863608831,
+ERASE, 139768863584256, 139768863608831,
+STORE, 139768863584256, 139768863608831,
+ERASE, 139768863608832, 139768863625215,
+STORE, 139768863608832, 139768863625215,
+STORE, 139768867934208, 139768867950591,
+SNULL, 139768863600639, 139768863608831,
+STORE, 139768863584256, 139768863600639,
+STORE, 139768863600640, 139768863608831,
+SNULL, 139768865734655, 139768865738751,
+STORE, 139768865730560, 139768865734655,
+STORE, 139768865734656, 139768865738751,
+SNULL, 94865905291263, 94865905299455,
+STORE, 94865905287168, 94865905291263,
+STORE, 94865905291264, 94865905299455,
+SNULL, 139768867983359, 139768867987455,
+STORE, 139768867979264, 139768867983359,
+STORE, 139768867983360, 139768867987455,
+ERASE, 139768867950592, 139768867979263,
+STORE, 94865923670016, 94865923805183,
+STORE, 139768866250752, 139768867934207,
+STORE, 94865923670016, 94865923940351,
+STORE, 94865923670016, 94865924075519,
+STORE, 94865923670016, 94865924222975,
+SNULL, 94865924210687, 94865924222975,
+STORE, 94865923670016, 94865924210687,
+STORE, 94865924210688, 94865924222975,
+ERASE, 94865924210688, 94865924222975,
+STORE, 94865923670016, 94865924349951,
+STORE, 94865923670016, 94865924493311,
+STORE, 94865923670016, 94865924640767,
+SNULL, 94865924603903, 94865924640767,
+STORE, 94865923670016, 94865924603903,
+STORE, 94865924603904, 94865924640767,
+ERASE, 94865924603904, 94865924640767,
+STORE, 94865923670016, 94865924747263,
+STORE, 94865923670016, 94865924898815,
+SNULL, 94865924874239, 94865924898815,
+STORE, 94865923670016, 94865924874239,
+STORE, 94865924874240, 94865924898815,
+ERASE, 94865924874240, 94865924898815,
+STORE, 94865923670016, 94865925025791,
+SNULL, 94865925013503, 94865925025791,
+STORE, 94865923670016, 94865925013503,
+STORE, 94865925013504, 94865925025791,
+ERASE, 94865925013504, 94865925025791,
+SNULL, 94865924988927, 94865925013503,
+STORE, 94865923670016, 94865924988927,
+STORE, 94865924988928, 94865925013503,
+ERASE, 94865924988928, 94865925013503,
+STORE, 94865923670016, 94865925152767,
+SNULL, 94865925136383, 94865925152767,
+STORE, 94865923670016, 94865925136383,
+STORE, 94865925136384, 94865925152767,
+ERASE, 94865925136384, 94865925152767,
+STORE, 94865923670016, 94865925292031,
+SNULL, 94865925279743, 94865925292031,
+STORE, 94865923670016, 94865925279743,
+STORE, 94865925279744, 94865925292031,
+ERASE, 94865925279744, 94865925292031,
+SNULL, 94865925255167, 94865925279743,
+STORE, 94865923670016, 94865925255167,
+STORE, 94865925255168, 94865925279743,
+ERASE, 94865925255168, 94865925279743,
+STORE, 94865923670016, 94865925406719,
+SNULL, 94865925394431, 94865925406719,
+STORE, 94865923670016, 94865925394431,
+STORE, 94865925394432, 94865925406719,
+ERASE, 94865925394432, 94865925406719,
+STORE, 94865923670016, 94865925545983,
+SNULL, 94865925533695, 94865925545983,
+STORE, 94865923670016, 94865925533695,
+STORE, 94865925533696, 94865925545983,
+ERASE, 94865925533696, 94865925545983,
+SNULL, 94865925492735, 94865925533695,
+STORE, 94865923670016, 94865925492735,
+STORE, 94865925492736, 94865925533695,
+ERASE, 94865925492736, 94865925533695,
+STORE, 94865923670016, 94865925627903,
+SNULL, 94865925599231, 94865925627903,
+STORE, 94865923670016, 94865925599231,
+STORE, 94865925599232, 94865925627903,
+ERASE, 94865925599232, 94865925627903,
+STORE, 94865923670016, 94865925738495,
+SNULL, 94865925726207, 94865925738495,
+STORE, 94865923670016, 94865925726207,
+STORE, 94865925726208, 94865925738495,
+ERASE, 94865925726208, 94865925738495,
+STORE, 94865923670016, 94865925877759,
+SNULL, 94865925865471, 94865925877759,
+STORE, 94865923670016, 94865925865471,
+STORE, 94865925865472, 94865925877759,
+ERASE, 94865925865472, 94865925877759,
+STORE, 94865923670016, 94865926021119,
+SNULL, 94865926008831, 94865926021119,
+STORE, 94865923670016, 94865926008831,
+STORE, 94865926008832, 94865926021119,
+ERASE, 94865926008832, 94865926021119,
+SNULL, 94865925971967, 94865926008831,
+STORE, 94865923670016, 94865925971967,
+STORE, 94865925971968, 94865926008831,
+ERASE, 94865925971968, 94865926008831,
+STORE, 94865923670016, 94865926115327,
+STORE, 94865923670016, 94865926254591,
+SNULL, 94865926246399, 94865926254591,
+STORE, 94865923670016, 94865926246399,
+STORE, 94865926246400, 94865926254591,
+ERASE, 94865926246400, 94865926254591,
+STORE, 94865923670016, 94865926385663,
+STORE, 94865923670016, 94865926537215,
+STORE, 94865923670016, 94865926672383,
+STORE, 94865923670016, 94865926815743,
+STORE, 94865923670016, 94865926955007,
+STORE, 94865923670016, 94865927094271,
+STORE, 94865923670016, 94865927233535,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140731148435456, 140737488351231,
+SNULL, 140731148443647, 140737488351231,
+STORE, 140731148435456, 140731148443647,
+STORE, 140731148304384, 140731148443647,
+STORE, 94090775400448, 94090777735167,
+SNULL, 94090775613439, 94090777735167,
+STORE, 94090775400448, 94090775613439,
+STORE, 94090775613440, 94090777735167,
+ERASE, 94090775613440, 94090777735167,
+STORE, 94090777710592, 94090777722879,
+STORE, 94090777722880, 94090777735167,
+STORE, 140301090283520, 140301092536319,
+SNULL, 140301090426879, 140301092536319,
+STORE, 140301090283520, 140301090426879,
+STORE, 140301090426880, 140301092536319,
+ERASE, 140301090426880, 140301092536319,
+STORE, 140301092524032, 140301092532223,
+STORE, 140301092532224, 140301092536319,
+STORE, 140731148570624, 140731148574719,
+STORE, 140731148558336, 140731148570623,
+STORE, 140301092495360, 140301092524031,
+STORE, 140301092487168, 140301092495359,
+STORE, 140301088169984, 140301090283519,
+SNULL, 140301088169984, 140301088182271,
+STORE, 140301088182272, 140301090283519,
+STORE, 140301088169984, 140301088182271,
+SNULL, 140301090275327, 140301090283519,
+STORE, 140301088182272, 140301090275327,
+STORE, 140301090275328, 140301090283519,
+ERASE, 140301090275328, 140301090283519,
+STORE, 140301090275328, 140301090283519,
+STORE, 140301084372992, 140301088169983,
+SNULL, 140301084372992, 140301086031871,
+STORE, 140301086031872, 140301088169983,
+STORE, 140301084372992, 140301086031871,
+SNULL, 140301088129023, 140301088169983,
+STORE, 140301086031872, 140301088129023,
+STORE, 140301088129024, 140301088169983,
+SNULL, 140301088129024, 140301088153599,
+STORE, 140301088153600, 140301088169983,
+STORE, 140301088129024, 140301088153599,
+ERASE, 140301088129024, 140301088153599,
+STORE, 140301088129024, 140301088153599,
+ERASE, 140301088153600, 140301088169983,
+STORE, 140301088153600, 140301088169983,
+STORE, 140301092478976, 140301092495359,
+SNULL, 140301088145407, 140301088153599,
+STORE, 140301088129024, 140301088145407,
+STORE, 140301088145408, 140301088153599,
+SNULL, 140301090279423, 140301090283519,
+STORE, 140301090275328, 140301090279423,
+STORE, 140301090279424, 140301090283519,
+SNULL, 94090777714687, 94090777722879,
+STORE, 94090777710592, 94090777714687,
+STORE, 94090777714688, 94090777722879,
+SNULL, 140301092528127, 140301092532223,
+STORE, 140301092524032, 140301092528127,
+STORE, 140301092528128, 140301092532223,
+ERASE, 140301092495360, 140301092524031,
+STORE, 94090794590208, 94090794725375,
+STORE, 140301090795520, 140301092478975,
+STORE, 94090794590208, 94090794860543,
+STORE, 94090794590208, 94090794995711,
+STORE, 94090794590208, 94090795163647,
+SNULL, 94090795139071, 94090795163647,
+STORE, 94090794590208, 94090795139071,
+STORE, 94090795139072, 94090795163647,
+ERASE, 94090795139072, 94090795163647,
+STORE, 94090794590208, 94090795278335,
+STORE, 94090794590208, 94090795425791,
+SNULL, 94090795388927, 94090795425791,
+STORE, 94090794590208, 94090795388927,
+STORE, 94090795388928, 94090795425791,
+ERASE, 94090795388928, 94090795425791,
+STORE, 94090794590208, 94090795528191,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733084430336, 140737488351231,
+SNULL, 140733084438527, 140737488351231,
+STORE, 140733084430336, 140733084438527,
+STORE, 140733084299264, 140733084438527,
+STORE, 94116169183232, 94116171517951,
+SNULL, 94116169396223, 94116171517951,
+STORE, 94116169183232, 94116169396223,
+STORE, 94116169396224, 94116171517951,
+ERASE, 94116169396224, 94116171517951,
+STORE, 94116171493376, 94116171505663,
+STORE, 94116171505664, 94116171517951,
+STORE, 139772214128640, 139772216381439,
+SNULL, 139772214271999, 139772216381439,
+STORE, 139772214128640, 139772214271999,
+STORE, 139772214272000, 139772216381439,
+ERASE, 139772214272000, 139772216381439,
+STORE, 139772216369152, 139772216377343,
+STORE, 139772216377344, 139772216381439,
+STORE, 140733085270016, 140733085274111,
+STORE, 140733085257728, 140733085270015,
+STORE, 139772216340480, 139772216369151,
+STORE, 139772216332288, 139772216340479,
+STORE, 139772212015104, 139772214128639,
+SNULL, 139772212015104, 139772212027391,
+STORE, 139772212027392, 139772214128639,
+STORE, 139772212015104, 139772212027391,
+SNULL, 139772214120447, 139772214128639,
+STORE, 139772212027392, 139772214120447,
+STORE, 139772214120448, 139772214128639,
+ERASE, 139772214120448, 139772214128639,
+STORE, 139772214120448, 139772214128639,
+STORE, 139772208218112, 139772212015103,
+SNULL, 139772208218112, 139772209876991,
+STORE, 139772209876992, 139772212015103,
+STORE, 139772208218112, 139772209876991,
+SNULL, 139772211974143, 139772212015103,
+STORE, 139772209876992, 139772211974143,
+STORE, 139772211974144, 139772212015103,
+SNULL, 139772211974144, 139772211998719,
+STORE, 139772211998720, 139772212015103,
+STORE, 139772211974144, 139772211998719,
+ERASE, 139772211974144, 139772211998719,
+STORE, 139772211974144, 139772211998719,
+ERASE, 139772211998720, 139772212015103,
+STORE, 139772211998720, 139772212015103,
+STORE, 139772216324096, 139772216340479,
+SNULL, 139772211990527, 139772211998719,
+STORE, 139772211974144, 139772211990527,
+STORE, 139772211990528, 139772211998719,
+SNULL, 139772214124543, 139772214128639,
+STORE, 139772214120448, 139772214124543,
+STORE, 139772214124544, 139772214128639,
+SNULL, 94116171497471, 94116171505663,
+STORE, 94116171493376, 94116171497471,
+STORE, 94116171497472, 94116171505663,
+SNULL, 139772216373247, 139772216377343,
+STORE, 139772216369152, 139772216373247,
+STORE, 139772216373248, 139772216377343,
+ERASE, 139772216340480, 139772216369151,
+STORE, 94116199383040, 94116199518207,
+STORE, 139772214640640, 139772216324095,
+STORE, 94116199383040, 94116199653375,
+STORE, 94116199383040, 94116199788543,
+STORE, 140737488347136, 140737488351231,
+STORE, 140726067826688, 140737488351231,
+SNULL, 140726067830783, 140737488351231,
+STORE, 140726067826688, 140726067830783,
+STORE, 140726067695616, 140726067830783,
+STORE, 94535150673920, 94535152898047,
+SNULL, 94535150784511, 94535152898047,
+STORE, 94535150673920, 94535150784511,
+STORE, 94535150784512, 94535152898047,
+ERASE, 94535150784512, 94535152898047,
+STORE, 94535152877568, 94535152889855,
+STORE, 94535152889856, 94535152898047,
+STORE, 140381257314304, 140381259567103,
+SNULL, 140381257457663, 140381259567103,
+STORE, 140381257314304, 140381257457663,
+STORE, 140381257457664, 140381259567103,
+ERASE, 140381257457664, 140381259567103,
+STORE, 140381259554816, 140381259563007,
+STORE, 140381259563008, 140381259567103,
+STORE, 140726068060160, 140726068064255,
+STORE, 140726068047872, 140726068060159,
+STORE, 140381259526144, 140381259554815,
+STORE, 140381259517952, 140381259526143,
+STORE, 140381253517312, 140381257314303,
+SNULL, 140381253517312, 140381255176191,
+STORE, 140381255176192, 140381257314303,
+STORE, 140381253517312, 140381255176191,
+SNULL, 140381257273343, 140381257314303,
+STORE, 140381255176192, 140381257273343,
+STORE, 140381257273344, 140381257314303,
+SNULL, 140381257273344, 140381257297919,
+STORE, 140381257297920, 140381257314303,
+STORE, 140381257273344, 140381257297919,
+ERASE, 140381257273344, 140381257297919,
+STORE, 140381257273344, 140381257297919,
+ERASE, 140381257297920, 140381257314303,
+STORE, 140381257297920, 140381257314303,
+SNULL, 140381257289727, 140381257297919,
+STORE, 140381257273344, 140381257289727,
+STORE, 140381257289728, 140381257297919,
+SNULL, 94535152885759, 94535152889855,
+STORE, 94535152877568, 94535152885759,
+STORE, 94535152885760, 94535152889855,
+SNULL, 140381259558911, 140381259563007,
+STORE, 140381259554816, 140381259558911,
+STORE, 140381259558912, 140381259563007,
+ERASE, 140381259526144, 140381259554815,
+STORE, 94535186296832, 94535186431999,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140729189425152, 140737488351231,
+SNULL, 140729189433343, 140737488351231,
+STORE, 140729189425152, 140729189433343,
+STORE, 140729189294080, 140729189433343,
+STORE, 94428200128512, 94428202352639,
+SNULL, 94428200239103, 94428202352639,
+STORE, 94428200128512, 94428200239103,
+STORE, 94428200239104, 94428202352639,
+ERASE, 94428200239104, 94428202352639,
+STORE, 94428202332160, 94428202344447,
+STORE, 94428202344448, 94428202352639,
+STORE, 139707216986112, 139707219238911,
+SNULL, 139707217129471, 139707219238911,
+STORE, 139707216986112, 139707217129471,
+STORE, 139707217129472, 139707219238911,
+ERASE, 139707217129472, 139707219238911,
+STORE, 139707219226624, 139707219234815,
+STORE, 139707219234816, 139707219238911,
+STORE, 140729189785600, 140729189789695,
+STORE, 140729189773312, 140729189785599,
+STORE, 139707219197952, 139707219226623,
+STORE, 139707219189760, 139707219197951,
+STORE, 139707213189120, 139707216986111,
+SNULL, 139707213189120, 139707214847999,
+STORE, 139707214848000, 139707216986111,
+STORE, 139707213189120, 139707214847999,
+SNULL, 139707216945151, 139707216986111,
+STORE, 139707214848000, 139707216945151,
+STORE, 139707216945152, 139707216986111,
+SNULL, 139707216945152, 139707216969727,
+STORE, 139707216969728, 139707216986111,
+STORE, 139707216945152, 139707216969727,
+ERASE, 139707216945152, 139707216969727,
+STORE, 139707216945152, 139707216969727,
+ERASE, 139707216969728, 139707216986111,
+STORE, 139707216969728, 139707216986111,
+SNULL, 139707216961535, 139707216969727,
+STORE, 139707216945152, 139707216961535,
+STORE, 139707216961536, 139707216969727,
+SNULL, 94428202340351, 94428202344447,
+STORE, 94428202332160, 94428202340351,
+STORE, 94428202340352, 94428202344447,
+SNULL, 139707219230719, 139707219234815,
+STORE, 139707219226624, 139707219230719,
+STORE, 139707219230720, 139707219234815,
+ERASE, 139707219197952, 139707219226623,
+STORE, 94428208599040, 94428208734207,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722000953344, 140737488351231,
+SNULL, 140722000961535, 140737488351231,
+STORE, 140722000953344, 140722000961535,
+STORE, 140722000822272, 140722000961535,
+STORE, 94636494757888, 94636496982015,
+SNULL, 94636494868479, 94636496982015,
+STORE, 94636494757888, 94636494868479,
+STORE, 94636494868480, 94636496982015,
+ERASE, 94636494868480, 94636496982015,
+STORE, 94636496961536, 94636496973823,
+STORE, 94636496973824, 94636496982015,
+STORE, 140142275100672, 140142277353471,
+SNULL, 140142275244031, 140142277353471,
+STORE, 140142275100672, 140142275244031,
+STORE, 140142275244032, 140142277353471,
+ERASE, 140142275244032, 140142277353471,
+STORE, 140142277341184, 140142277349375,
+STORE, 140142277349376, 140142277353471,
+STORE, 140722002747392, 140722002751487,
+STORE, 140722002735104, 140722002747391,
+STORE, 140142277312512, 140142277341183,
+STORE, 140142277304320, 140142277312511,
+STORE, 140142271303680, 140142275100671,
+SNULL, 140142271303680, 140142272962559,
+STORE, 140142272962560, 140142275100671,
+STORE, 140142271303680, 140142272962559,
+SNULL, 140142275059711, 140142275100671,
+STORE, 140142272962560, 140142275059711,
+STORE, 140142275059712, 140142275100671,
+SNULL, 140142275059712, 140142275084287,
+STORE, 140142275084288, 140142275100671,
+STORE, 140142275059712, 140142275084287,
+ERASE, 140142275059712, 140142275084287,
+STORE, 140142275059712, 140142275084287,
+ERASE, 140142275084288, 140142275100671,
+STORE, 140142275084288, 140142275100671,
+SNULL, 140142275076095, 140142275084287,
+STORE, 140142275059712, 140142275076095,
+STORE, 140142275076096, 140142275084287,
+SNULL, 94636496969727, 94636496973823,
+STORE, 94636496961536, 94636496969727,
+STORE, 94636496969728, 94636496973823,
+SNULL, 140142277345279, 140142277349375,
+STORE, 140142277341184, 140142277345279,
+STORE, 140142277345280, 140142277349375,
+ERASE, 140142277312512, 140142277341183,
+STORE, 94636516286464, 94636516421631,
+STORE, 94071103692800, 94071103905791,
+STORE, 94071106002944, 94071106007039,
+STORE, 94071106007040, 94071106015231,
+STORE, 94071106015232, 94071106027519,
+STORE, 94071138521088, 94071140368383,
+STORE, 140145668190208, 140145669849087,
+STORE, 140145669849088, 140145671946239,
+STORE, 140145671946240, 140145671962623,
+STORE, 140145671962624, 140145671970815,
+STORE, 140145671970816, 140145671987199,
+STORE, 140145671987200, 140145671999487,
+STORE, 140145671999488, 140145674092543,
+STORE, 140145674092544, 140145674096639,
+STORE, 140145674096640, 140145674100735,
+STORE, 140145674100736, 140145674244095,
+STORE, 140145674612736, 140145676296191,
+STORE, 140145676296192, 140145676312575,
+STORE, 140145676341248, 140145676345343,
+STORE, 140145676345344, 140145676349439,
+STORE, 140145676349440, 140145676353535,
+STORE, 140734927740928, 140734927880191,
+STORE, 140734928842752, 140734928855039,
+STORE, 140734928855040, 140734928859135,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722342535168, 140737488351231,
+SNULL, 140722342543359, 140737488351231,
+STORE, 140722342535168, 140722342543359,
+STORE, 140722342404096, 140722342543359,
+STORE, 94399699714048, 94399702048767,
+SNULL, 94399699927039, 94399702048767,
+STORE, 94399699714048, 94399699927039,
+STORE, 94399699927040, 94399702048767,
+ERASE, 94399699927040, 94399702048767,
+STORE, 94399702024192, 94399702036479,
+STORE, 94399702036480, 94399702048767,
+STORE, 139811024748544, 139811027001343,
+SNULL, 139811024891903, 139811027001343,
+STORE, 139811024748544, 139811024891903,
+STORE, 139811024891904, 139811027001343,
+ERASE, 139811024891904, 139811027001343,
+STORE, 139811026989056, 139811026997247,
+STORE, 139811026997248, 139811027001343,
+STORE, 140722342707200, 140722342711295,
+STORE, 140722342694912, 140722342707199,
+STORE, 139811026960384, 139811026989055,
+STORE, 139811026952192, 139811026960383,
+STORE, 139811022635008, 139811024748543,
+SNULL, 139811022635008, 139811022647295,
+STORE, 139811022647296, 139811024748543,
+STORE, 139811022635008, 139811022647295,
+SNULL, 139811024740351, 139811024748543,
+STORE, 139811022647296, 139811024740351,
+STORE, 139811024740352, 139811024748543,
+ERASE, 139811024740352, 139811024748543,
+STORE, 139811024740352, 139811024748543,
+STORE, 139811018838016, 139811022635007,
+SNULL, 139811018838016, 139811020496895,
+STORE, 139811020496896, 139811022635007,
+STORE, 139811018838016, 139811020496895,
+SNULL, 139811022594047, 139811022635007,
+STORE, 139811020496896, 139811022594047,
+STORE, 139811022594048, 139811022635007,
+SNULL, 139811022594048, 139811022618623,
+STORE, 139811022618624, 139811022635007,
+STORE, 139811022594048, 139811022618623,
+ERASE, 139811022594048, 139811022618623,
+STORE, 139811022594048, 139811022618623,
+ERASE, 139811022618624, 139811022635007,
+STORE, 139811022618624, 139811022635007,
+STORE, 139811026944000, 139811026960383,
+SNULL, 139811022610431, 139811022618623,
+STORE, 139811022594048, 139811022610431,
+STORE, 139811022610432, 139811022618623,
+SNULL, 139811024744447, 139811024748543,
+STORE, 139811024740352, 139811024744447,
+STORE, 139811024744448, 139811024748543,
+SNULL, 94399702028287, 94399702036479,
+STORE, 94399702024192, 94399702028287,
+STORE, 94399702028288, 94399702036479,
+SNULL, 139811026993151, 139811026997247,
+STORE, 139811026989056, 139811026993151,
+STORE, 139811026993152, 139811026997247,
+ERASE, 139811026960384, 139811026989055,
+STORE, 94399723880448, 94399724015615,
+STORE, 139811025260544, 139811026943999,
+STORE, 94399723880448, 94399724150783,
+STORE, 94399723880448, 94399724285951,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140735364939776, 140737488351231,
+SNULL, 140735364947967, 140737488351231,
+STORE, 140735364939776, 140735364947967,
+STORE, 140735364808704, 140735364947967,
+STORE, 94421528674304, 94421531009023,
+SNULL, 94421528887295, 94421531009023,
+STORE, 94421528674304, 94421528887295,
+STORE, 94421528887296, 94421531009023,
+ERASE, 94421528887296, 94421531009023,
+STORE, 94421530984448, 94421530996735,
+STORE, 94421530996736, 94421531009023,
+STORE, 140162004742144, 140162006994943,
+SNULL, 140162004885503, 140162006994943,
+STORE, 140162004742144, 140162004885503,
+STORE, 140162004885504, 140162006994943,
+ERASE, 140162004885504, 140162006994943,
+STORE, 140162006982656, 140162006990847,
+STORE, 140162006990848, 140162006994943,
+STORE, 140735365402624, 140735365406719,
+STORE, 140735365390336, 140735365402623,
+STORE, 140162006953984, 140162006982655,
+STORE, 140162006945792, 140162006953983,
+STORE, 140162002628608, 140162004742143,
+SNULL, 140162002628608, 140162002640895,
+STORE, 140162002640896, 140162004742143,
+STORE, 140162002628608, 140162002640895,
+SNULL, 140162004733951, 140162004742143,
+STORE, 140162002640896, 140162004733951,
+STORE, 140162004733952, 140162004742143,
+ERASE, 140162004733952, 140162004742143,
+STORE, 140162004733952, 140162004742143,
+STORE, 140161998831616, 140162002628607,
+SNULL, 140161998831616, 140162000490495,
+STORE, 140162000490496, 140162002628607,
+STORE, 140161998831616, 140162000490495,
+SNULL, 140162002587647, 140162002628607,
+STORE, 140162000490496, 140162002587647,
+STORE, 140162002587648, 140162002628607,
+SNULL, 140162002587648, 140162002612223,
+STORE, 140162002612224, 140162002628607,
+STORE, 140162002587648, 140162002612223,
+ERASE, 140162002587648, 140162002612223,
+STORE, 140162002587648, 140162002612223,
+ERASE, 140162002612224, 140162002628607,
+STORE, 140162002612224, 140162002628607,
+STORE, 140162006937600, 140162006953983,
+SNULL, 140162002604031, 140162002612223,
+STORE, 140162002587648, 140162002604031,
+STORE, 140162002604032, 140162002612223,
+SNULL, 140162004738047, 140162004742143,
+STORE, 140162004733952, 140162004738047,
+STORE, 140162004738048, 140162004742143,
+SNULL, 94421530988543, 94421530996735,
+STORE, 94421530984448, 94421530988543,
+STORE, 94421530988544, 94421530996735,
+SNULL, 140162006986751, 140162006990847,
+STORE, 140162006982656, 140162006986751,
+STORE, 140162006986752, 140162006990847,
+ERASE, 140162006953984, 140162006982655,
+STORE, 94421551697920, 94421551833087,
+STORE, 140162005254144, 140162006937599,
+STORE, 94421551697920, 94421551968255,
+STORE, 94421551697920, 94421552103423,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140733498486784, 140737488351231,
+SNULL, 140733498494975, 140737488351231,
+STORE, 140733498486784, 140733498494975,
+STORE, 140733498355712, 140733498494975,
+STORE, 94567985836032, 94567988170751,
+SNULL, 94567986049023, 94567988170751,
+STORE, 94567985836032, 94567986049023,
+STORE, 94567986049024, 94567988170751,
+ERASE, 94567986049024, 94567988170751,
+STORE, 94567988146176, 94567988158463,
+STORE, 94567988158464, 94567988170751,
+STORE, 139634278572032, 139634280824831,
+SNULL, 139634278715391, 139634280824831,
+STORE, 139634278572032, 139634278715391,
+STORE, 139634278715392, 139634280824831,
+ERASE, 139634278715392, 139634280824831,
+STORE, 139634280812544, 139634280820735,
+STORE, 139634280820736, 139634280824831,
+STORE, 140733498544128, 140733498548223,
+STORE, 140733498531840, 140733498544127,
+STORE, 139634280783872, 139634280812543,
+STORE, 139634280775680, 139634280783871,
+STORE, 139634276458496, 139634278572031,
+SNULL, 139634276458496, 139634276470783,
+STORE, 139634276470784, 139634278572031,
+STORE, 139634276458496, 139634276470783,
+SNULL, 139634278563839, 139634278572031,
+STORE, 139634276470784, 139634278563839,
+STORE, 139634278563840, 139634278572031,
+ERASE, 139634278563840, 139634278572031,
+STORE, 139634278563840, 139634278572031,
+STORE, 139634272661504, 139634276458495,
+SNULL, 139634272661504, 139634274320383,
+STORE, 139634274320384, 139634276458495,
+STORE, 139634272661504, 139634274320383,
+SNULL, 139634276417535, 139634276458495,
+STORE, 139634274320384, 139634276417535,
+STORE, 139634276417536, 139634276458495,
+SNULL, 139634276417536, 139634276442111,
+STORE, 139634276442112, 139634276458495,
+STORE, 139634276417536, 139634276442111,
+ERASE, 139634276417536, 139634276442111,
+STORE, 139634276417536, 139634276442111,
+ERASE, 139634276442112, 139634276458495,
+STORE, 139634276442112, 139634276458495,
+STORE, 139634280767488, 139634280783871,
+SNULL, 139634276433919, 139634276442111,
+STORE, 139634276417536, 139634276433919,
+STORE, 139634276433920, 139634276442111,
+SNULL, 139634278567935, 139634278572031,
+STORE, 139634278563840, 139634278567935,
+STORE, 139634278567936, 139634278572031,
+SNULL, 94567988150271, 94567988158463,
+STORE, 94567988146176, 94567988150271,
+STORE, 94567988150272, 94567988158463,
+SNULL, 139634280816639, 139634280820735,
+STORE, 139634280812544, 139634280816639,
+STORE, 139634280816640, 139634280820735,
+ERASE, 139634280783872, 139634280812543,
+STORE, 94567996379136, 94567996514303,
+STORE, 139634279084032, 139634280767487,
+STORE, 94567996379136, 94567996649471,
+STORE, 94567996379136, 94567996784639,
+STORE, 94567996379136, 94567996960767,
+SNULL, 94567996932095, 94567996960767,
+STORE, 94567996379136, 94567996932095,
+STORE, 94567996932096, 94567996960767,
+ERASE, 94567996932096, 94567996960767,
+STORE, 94567996379136, 94567997071359,
+STORE, 94567996379136, 94567997206527,
+SNULL, 94567997186047, 94567997206527,
+STORE, 94567996379136, 94567997186047,
+STORE, 94567997186048, 94567997206527,
+ERASE, 94567997186048, 94567997206527,
+STORE, 94567996379136, 94567997358079,
+STORE, 94567996379136, 94567997493247,
+SNULL, 94567997476863, 94567997493247,
+STORE, 94567996379136, 94567997476863,
+STORE, 94567997476864, 94567997493247,
+ERASE, 94567997476864, 94567997493247,
+STORE, 94567996379136, 94567997612031,
+STORE, 94567996379136, 94567997767679,
+SNULL, 94567997739007, 94567997767679,
+STORE, 94567996379136, 94567997739007,
+STORE, 94567997739008, 94567997767679,
+ERASE, 94567997739008, 94567997767679,
+SNULL, 94567997698047, 94567997739007,
+STORE, 94567996379136, 94567997698047,
+STORE, 94567997698048, 94567997739007,
+ERASE, 94567997698048, 94567997739007,
+STORE, 94567996379136, 94567997853695,
+STORE, 94567996379136, 94567997988863,
+STORE, 94567996379136, 94567998132223,
+STORE, 94567996379136, 94567998275583,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140723667759104, 140737488351231,
+SNULL, 140723667767295, 140737488351231,
+STORE, 140723667759104, 140723667767295,
+STORE, 140723667628032, 140723667767295,
+STORE, 94231598800896, 94231601135615,
+SNULL, 94231599013887, 94231601135615,
+STORE, 94231598800896, 94231599013887,
+STORE, 94231599013888, 94231601135615,
+ERASE, 94231599013888, 94231601135615,
+STORE, 94231601111040, 94231601123327,
+STORE, 94231601123328, 94231601135615,
+STORE, 140269472649216, 140269474902015,
+SNULL, 140269472792575, 140269474902015,
+STORE, 140269472649216, 140269472792575,
+STORE, 140269472792576, 140269474902015,
+ERASE, 140269472792576, 140269474902015,
+STORE, 140269474889728, 140269474897919,
+STORE, 140269474897920, 140269474902015,
+STORE, 140723667836928, 140723667841023,
+STORE, 140723667824640, 140723667836927,
+STORE, 140269474861056, 140269474889727,
+STORE, 140269474852864, 140269474861055,
+STORE, 140269470535680, 140269472649215,
+SNULL, 140269470535680, 140269470547967,
+STORE, 140269470547968, 140269472649215,
+STORE, 140269470535680, 140269470547967,
+SNULL, 140269472641023, 140269472649215,
+STORE, 140269470547968, 140269472641023,
+STORE, 140269472641024, 140269472649215,
+ERASE, 140269472641024, 140269472649215,
+STORE, 140269472641024, 140269472649215,
+STORE, 140269466738688, 140269470535679,
+SNULL, 140269466738688, 140269468397567,
+STORE, 140269468397568, 140269470535679,
+STORE, 140269466738688, 140269468397567,
+SNULL, 140269470494719, 140269470535679,
+STORE, 140269468397568, 140269470494719,
+STORE, 140269470494720, 140269470535679,
+SNULL, 140269470494720, 140269470519295,
+STORE, 140269470519296, 140269470535679,
+STORE, 140269470494720, 140269470519295,
+ERASE, 140269470494720, 140269470519295,
+STORE, 140269470494720, 140269470519295,
+ERASE, 140269470519296, 140269470535679,
+STORE, 140269470519296, 140269470535679,
+STORE, 140269474844672, 140269474861055,
+SNULL, 140269470511103, 140269470519295,
+STORE, 140269470494720, 140269470511103,
+STORE, 140269470511104, 140269470519295,
+SNULL, 140269472645119, 140269472649215,
+STORE, 140269472641024, 140269472645119,
+STORE, 140269472645120, 140269472649215,
+SNULL, 94231601115135, 94231601123327,
+STORE, 94231601111040, 94231601115135,
+STORE, 94231601115136, 94231601123327,
+SNULL, 140269474893823, 140269474897919,
+STORE, 140269474889728, 140269474893823,
+STORE, 140269474893824, 140269474897919,
+ERASE, 140269474861056, 140269474889727,
+STORE, 94231626592256, 94231626727423,
+STORE, 140269473161216, 140269474844671,
+STORE, 94231626592256, 94231626862591,
+STORE, 94231626592256, 94231626997759,
+STORE, 94327178862592, 94327179075583,
+STORE, 94327181172736, 94327181176831,
+STORE, 94327181176832, 94327181185023,
+STORE, 94327181185024, 94327181197311,
+STORE, 94327185715200, 94327186685951,
+STORE, 140172071755776, 140172073414655,
+STORE, 140172073414656, 140172075511807,
+STORE, 140172075511808, 140172075528191,
+STORE, 140172075528192, 140172075536383,
+STORE, 140172075536384, 140172075552767,
+STORE, 140172075552768, 140172075565055,
+STORE, 140172075565056, 140172077658111,
+STORE, 140172077658112, 140172077662207,
+STORE, 140172077662208, 140172077666303,
+STORE, 140172077666304, 140172077809663,
+STORE, 140172078178304, 140172079861759,
+STORE, 140172079861760, 140172079878143,
+STORE, 140172079878144, 140172079906815,
+STORE, 140172079906816, 140172079910911,
+STORE, 140172079910912, 140172079915007,
+STORE, 140172079915008, 140172079919103,
+STORE, 140720358359040, 140720358494207,
+STORE, 140720358498304, 140720358510591,
+STORE, 140720358510592, 140720358514687,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140722548621312, 140737488351231,
+SNULL, 140722548629503, 140737488351231,
+STORE, 140722548621312, 140722548629503,
+STORE, 140722548490240, 140722548629503,
+STORE, 93949289504768, 93949291728895,
+SNULL, 93949289615359, 93949291728895,
+STORE, 93949289504768, 93949289615359,
+STORE, 93949289615360, 93949291728895,
+ERASE, 93949289615360, 93949291728895,
+STORE, 93949291708416, 93949291720703,
+STORE, 93949291720704, 93949291728895,
+STORE, 140305861902336, 140305864155135,
+SNULL, 140305862045695, 140305864155135,
+STORE, 140305861902336, 140305862045695,
+STORE, 140305862045696, 140305864155135,
+ERASE, 140305862045696, 140305864155135,
+STORE, 140305864142848, 140305864151039,
+STORE, 140305864151040, 140305864155135,
+STORE, 140722549821440, 140722549825535,
+STORE, 140722549809152, 140722549821439,
+STORE, 140305864114176, 140305864142847,
+STORE, 140305864105984, 140305864114175,
+STORE, 140305858105344, 140305861902335,
+SNULL, 140305858105344, 140305859764223,
+STORE, 140305859764224, 140305861902335,
+STORE, 140305858105344, 140305859764223,
+SNULL, 140305861861375, 140305861902335,
+STORE, 140305859764224, 140305861861375,
+STORE, 140305861861376, 140305861902335,
+SNULL, 140305861861376, 140305861885951,
+STORE, 140305861885952, 140305861902335,
+STORE, 140305861861376, 140305861885951,
+ERASE, 140305861861376, 140305861885951,
+STORE, 140305861861376, 140305861885951,
+ERASE, 140305861885952, 140305861902335,
+STORE, 140305861885952, 140305861902335,
+SNULL, 140305861877759, 140305861885951,
+STORE, 140305861861376, 140305861877759,
+STORE, 140305861877760, 140305861885951,
+SNULL, 93949291716607, 93949291720703,
+STORE, 93949291708416, 93949291716607,
+STORE, 93949291716608, 93949291720703,
+SNULL, 140305864146943, 140305864151039,
+STORE, 140305864142848, 140305864146943,
+STORE, 140305864146944, 140305864151039,
+ERASE, 140305864114176, 140305864142847,
+STORE, 93949324136448, 93949324271615,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140725754908672, 140737488351231,
+SNULL, 140725754916863, 140737488351231,
+STORE, 140725754908672, 140725754916863,
+STORE, 140725754777600, 140725754916863,
+STORE, 94831184375808, 94831186599935,
+SNULL, 94831184486399, 94831186599935,
+STORE, 94831184375808, 94831184486399,
+STORE, 94831184486400, 94831186599935,
+ERASE, 94831184486400, 94831186599935,
+STORE, 94831186579456, 94831186591743,
+STORE, 94831186591744, 94831186599935,
+STORE, 140605482479616, 140605484732415,
+SNULL, 140605482622975, 140605484732415,
+STORE, 140605482479616, 140605482622975,
+STORE, 140605482622976, 140605484732415,
+ERASE, 140605482622976, 140605484732415,
+STORE, 140605484720128, 140605484728319,
+STORE, 140605484728320, 140605484732415,
+STORE, 140725755670528, 140725755674623,
+STORE, 140725755658240, 140725755670527,
+STORE, 140605484691456, 140605484720127,
+STORE, 140605484683264, 140605484691455,
+STORE, 140605478682624, 140605482479615,
+SNULL, 140605478682624, 140605480341503,
+STORE, 140605480341504, 140605482479615,
+STORE, 140605478682624, 140605480341503,
+SNULL, 140605482438655, 140605482479615,
+STORE, 140605480341504, 140605482438655,
+STORE, 140605482438656, 140605482479615,
+SNULL, 140605482438656, 140605482463231,
+STORE, 140605482463232, 140605482479615,
+STORE, 140605482438656, 140605482463231,
+ERASE, 140605482438656, 140605482463231,
+STORE, 140605482438656, 140605482463231,
+ERASE, 140605482463232, 140605482479615,
+STORE, 140605482463232, 140605482479615,
+SNULL, 140605482455039, 140605482463231,
+STORE, 140605482438656, 140605482455039,
+STORE, 140605482455040, 140605482463231,
+SNULL, 94831186587647, 94831186591743,
+STORE, 94831186579456, 94831186587647,
+STORE, 94831186587648, 94831186591743,
+SNULL, 140605484724223, 140605484728319,
+STORE, 140605484720128, 140605484724223,
+STORE, 140605484724224, 140605484728319,
+ERASE, 140605484691456, 140605484720127,
+STORE, 94831217156096, 94831217291263,
+STORE, 94327178862592, 94327179075583,
+STORE, 94327181172736, 94327181176831,
+STORE, 94327181176832, 94327181185023,
+STORE, 94327181185024, 94327181197311,
+STORE, 94327185715200, 94327186685951,
+STORE, 140172071755776, 140172073414655,
+STORE, 140172073414656, 140172075511807,
+STORE, 140172075511808, 140172075528191,
+STORE, 140172075528192, 140172075536383,
+STORE, 140172075536384, 140172075552767,
+STORE, 140172075552768, 140172075565055,
+STORE, 140172075565056, 140172077658111,
+STORE, 140172077658112, 140172077662207,
+STORE, 140172077662208, 140172077666303,
+STORE, 140172077666304, 140172077809663,
+STORE, 140172078178304, 140172079861759,
+STORE, 140172079861760, 140172079878143,
+STORE, 140172079878144, 140172079906815,
+STORE, 140172079906816, 140172079910911,
+STORE, 140172079910912, 140172079915007,
+STORE, 140172079915008, 140172079919103,
+STORE, 140720358359040, 140720358494207,
+STORE, 140720358498304, 140720358510591,
+STORE, 140720358510592, 140720358514687,
+STORE, 140737488347136, 140737488351231,
+STORE, 140737488343040, 140737488351231,
+STORE, 140737488338944, 140737488351231,
+STORE, 140734529933312, 140737488351231,
+SNULL, 140734529945599, 140737488351231,
+STORE, 140734529933312, 140734529945599,
+STORE, 140734529802240, 140734529945599,
+STORE, 4194304, 26279935,
+STORE, 28372992, 28454911,
+STORE, 28454912, 29806591,
+STORE, 140249744060416, 140249746313215,
+SNULL, 140249744203775, 140249746313215,
+STORE, 140249744060416, 140249744203775,
+STORE, 140249744203776, 140249746313215,
+ERASE, 140249744203776, 140249746313215,
+STORE, 140249746300928, 140249746309119,
+STORE, 140249746309120, 140249746313215,
+STORE, 140734530174976, 140734530179071,
+STORE, 140734530162688, 140734530174975,
+STORE, 140249746272256, 140249746300927,
+STORE, 140249746264064, 140249746272255,
+STORE, 140249740226560, 140249744060415,
+SNULL, 140249740226560, 140249741934591,
+STORE, 140249741934592, 140249744060415,
+STORE, 140249740226560, 140249741934591,
+SNULL, 140249744027647, 140249744060415,
+STORE, 140249741934592, 140249744027647,
+STORE, 140249744027648, 140249744060415,
+ERASE, 140249744027648, 140249744060415,
+STORE, 140249744027648, 140249744060415,
+STORE, 140249738031104, 140249740226559,
+SNULL, 140249738031104, 140249738125311,
+STORE, 140249738125312, 140249740226559,
+STORE, 140249738031104, 140249738125311,
+SNULL, 140249740218367, 140249740226559,
+STORE, 140249738125312, 140249740218367,
+STORE, 140249740218368, 140249740226559,
+ERASE, 140249740218368, 140249740226559,
+STORE, 140249740218368, 140249740226559,
+STORE, 140249735512064, 140249738031103,
+SNULL, 140249735512064, 140249735925759,
+STORE, 140249735925760, 140249738031103,
+STORE, 140249735512064, 140249735925759,
+SNULL, 140249738018815, 140249738031103,
+STORE, 140249735925760, 140249738018815,
+STORE, 140249738018816, 140249738031103,
+ERASE, 140249738018816, 140249738031103,
+STORE, 140249738018816, 140249738031103,
+STORE, 140249732878336, 140249735512063,
+SNULL, 140249732878336, 140249733406719,
+STORE, 140249733406720, 140249735512063,
+STORE, 140249732878336, 140249733406719,
+SNULL, 140249735503871, 140249735512063,
+STORE, 140249733406720, 140249735503871,
+STORE, 140249735503872, 140249735512063,
+ERASE, 140249735503872, 140249735512063,
+STORE, 140249735503872, 140249735512063,
+STORE, 140249730764800, 140249732878335,
+SNULL, 140249730764800, 140249730777087,
+STORE, 140249730777088, 140249732878335,
+STORE, 140249730764800, 140249730777087,
+SNULL, 140249732870143, 140249732878335,
+STORE, 140249730777088, 140249732870143,
+STORE, 140249732870144, 140249732878335,
+ERASE, 140249732870144, 140249732878335,
+STORE, 140249732870144, 140249732878335,
+STORE, 140249728561152, 140249730764799,
+SNULL, 140249728561152, 140249728663551,
+STORE, 140249728663552, 140249730764799,
+STORE, 140249728561152, 140249728663551,
+SNULL, 140249730756607, 140249730764799,
+STORE, 140249728663552, 140249730756607,
+STORE, 140249730756608, 140249730764799,
+ERASE, 140249730756608, 140249730764799,
+STORE, 140249730756608, 140249730764799,
+STORE, 140249746255872, 140249746272255,
+STORE, 140249725399040, 140249728561151,
+SNULL, 140249725399040, 140249726459903,
+STORE, 140249726459904, 140249728561151,
+STORE, 140249725399040, 140249726459903,
+SNULL, 140249728552959, 140249728561151,
+STORE, 140249726459904, 140249728552959,
+STORE, 140249728552960, 140249728561151,
+ERASE, 140249728552960, 140249728561151,
+STORE, 140249728552960, 140249728561151,
+STORE, 140249721602048, 140249725399039,
+SNULL, 140249721602048, 140249723260927,
+STORE, 140249723260928, 140249725399039,
+STORE, 140249721602048, 140249723260927,
+SNULL, 140249725358079, 140249725399039,
+STORE, 140249723260928, 140249725358079,
+STORE, 140249725358080, 140249725399039,
+SNULL, 140249725358080, 140249725382655,
+STORE, 140249725382656, 140249725399039,
+STORE, 140249725358080, 140249725382655,
+ERASE, 140249725358080, 140249725382655,
+STORE, 140249725358080, 140249725382655,
+ERASE, 140249725382656, 140249725399039,
+STORE, 140249725382656, 140249725399039,
+STORE, 140249746243584, 140249746272255,
+SNULL, 140249725374463, 140249725382655,
+STORE, 140249725358080, 140249725374463,
+STORE, 140249725374464, 140249725382655,
+SNULL, 140249728557055, 140249728561151,
+STORE, 140249728552960, 140249728557055,
+STORE, 140249728557056, 140249728561151,
+SNULL, 140249730760703, 140249730764799,
+STORE, 140249730756608, 140249730760703,
+STORE, 140249730760704, 140249730764799,
+SNULL, 140249732874239, 140249732878335,
+STORE, 140249732870144, 140249732874239,
+STORE, 140249732874240, 140249732878335,
+SNULL, 140249735507967, 140249735512063,
+STORE, 140249735503872, 140249735507967,
+STORE, 140249735507968, 140249735512063,
+SNULL, 140249738027007, 140249738031103,
+STORE, 140249738018816, 140249738027007,
+STORE, 140249738027008, 140249738031103,
+SNULL, 140249740222463, 140249740226559,
+STORE, 140249740218368, 140249740222463,
+STORE, 140249740222464, 140249740226559,
+SNULL, 140249744031743, 140249744060415,
+STORE, 140249744027648, 140249744031743,
+STORE, 140249744031744, 140249744060415,
+SNULL, 28405759, 28454911,
+STORE, 28372992, 28405759,
+STORE, 28405760, 28454911,
+SNULL, 140249746305023, 140249746309119,
+STORE, 140249746300928, 140249746305023,
+STORE, 140249746305024, 140249746309119,
+ERASE, 140249746272256, 140249746300927,
+STORE, 33853440, 33988607,
+STORE, 140249744560128, 140249746243583,
+STORE, 140249746296832, 140249746300927,
+STORE, 140249744424960, 140249744560127,
+STORE, 33853440, 34131967,
+STORE, 140249719504896, 140249721602047,
+STORE, 140249746288640, 140249746300927,
+STORE, 140249746280448, 140249746300927,
+STORE, 140249746243584, 140249746280447,
+STORE, 140249744408576, 140249744560127,
+STORE, 33853440, 34267135,
+STORE, 33853440, 34422783,
+STORE, 140249744400384, 140249744560127,
+STORE, 140249744392192, 140249744560127,
+STORE, 33853440, 34557951,
+STORE, 33853440, 34693119,
+STORE, 140249744375808, 140249744560127,
+STORE, 140249744367616, 140249744560127,
+STORE, 33853440, 34832383,
+STORE, 140249719230464, 140249721602047,
+STORE, 140249744207872, 140249744560127,
+STORE, 33853440, 34971647,
+SNULL, 34963455, 34971647,
+STORE, 33853440, 34963455,
+STORE, 34963456, 34971647,
+ERASE, 34963456, 34971647,
+SNULL, 34955263, 34963455,
+STORE, 33853440, 34955263,
+STORE, 34955264, 34963455,
+ERASE, 34955264, 34963455,
+SNULL, 34947071, 34955263,
+STORE, 33853440, 34947071,
+STORE, 34947072, 34955263,
+ERASE, 34947072, 34955263,
+SNULL, 34938879, 34947071,
+STORE, 33853440, 34938879,
+STORE, 34938880, 34947071,
+ERASE, 34938880, 34947071,
+STORE, 140249719214080, 140249721602047,
+STORE, 140249719148544, 140249721602047,
+STORE, 140249719115776, 140249721602047,
+STORE, 140249717018624, 140249721602047,
+STORE, 140249716953088, 140249721602047,
+STORE, 33853440, 35086335,
+STORE, 140249716822016, 140249721602047,
+STORE, 140249716559872, 140249721602047,
+STORE, 140249716551680, 140249721602047,
+STORE, 140249716535296, 140249721602047,
+STORE, 140249716527104, 140249721602047,
+STORE, 140249716518912, 140249721602047,
+STORE, 33853440, 35221503,
+SNULL, 35213311, 35221503,
+STORE, 33853440, 35213311,
+STORE, 35213312, 35221503,
+ERASE, 35213312, 35221503,
+SNULL, 35205119, 35213311,
+STORE, 33853440, 35205119,
+STORE, 35205120, 35213311,
+ERASE, 35205120, 35213311,
+SNULL, 35192831, 35205119,
+STORE, 33853440, 35192831,
+STORE, 35192832, 35205119,
+ERASE, 35192832, 35205119,
+SNULL, 35176447, 35192831,
+STORE, 33853440, 35176447,
+STORE, 35176448, 35192831,
+ERASE, 35176448, 35192831,
+STORE, 140249716502528, 140249721602047,
+STORE, 33853440, 35311615,
+SNULL, 35307519, 35311615,
+STORE, 33853440, 35307519,
+STORE, 35307520, 35311615,
+ERASE, 35307520, 35311615,
+SNULL, 35303423, 35307519,
+STORE, 33853440, 35303423,
+STORE, 35303424, 35307519,
+ERASE, 35303424, 35307519,
+SNULL, 35299327, 35303423,
+STORE, 33853440, 35299327,
+STORE, 35299328, 35303423,
+ERASE, 35299328, 35303423,
+SNULL, 35295231, 35299327,
+STORE, 33853440, 35295231,
+STORE, 35295232, 35299327,
+ERASE, 35295232, 35299327,
+SNULL, 35291135, 35295231,
+STORE, 33853440, 35291135,
+STORE, 35291136, 35295231,
+ERASE, 35291136, 35295231,
+SNULL, 35287039, 35291135,
+STORE, 33853440, 35287039,
+STORE, 35287040, 35291135,
+ERASE, 35287040, 35291135,
+SNULL, 35282943, 35287039,
+STORE, 33853440, 35282943,
+STORE, 35282944, 35287039,
+ERASE, 35282944, 35287039,
+STORE, 140249716486144, 140249721602047,
+STORE, 140249716453376, 140249721602047,
+STORE, 33853440, 35418111,
+SNULL, 35401727, 35418111,
+STORE, 33853440, 35401727,
+STORE, 35401728, 35418111,
+ERASE, 35401728, 35418111,
+SNULL, 35389439, 35401727,
+STORE, 33853440, 35389439,
+STORE, 35389440, 35401727,
+ERASE, 35389440, 35401727,
+STORE, 140249714356224, 140249721602047,
+STORE, 33853440, 35540991,
+STORE, 140249714339840, 140249721602047,
+STORE, 140249714077696, 140249721602047,
+STORE, 140249714069504, 140249721602047,
+STORE, 140249714061312, 140249721602047,
+STORE, 33853440, 35680255,
+SNULL, 35672063, 35680255,
+STORE, 33853440, 35672063,
+STORE, 35672064, 35680255,
+ERASE, 35672064, 35680255,
+SNULL, 35627007, 35672063,
+STORE, 33853440, 35627007,
+STORE, 35627008, 35672063,
+ERASE, 35627008, 35672063,
+STORE, 140249711964160, 140249721602047,
+STORE, 33853440, 35762175,
+SNULL, 35753983, 35762175,
+STORE, 33853440, 35753983,
+STORE, 35753984, 35762175,
+ERASE, 35753984, 35762175,
+SNULL, 35745791, 35753983,
+STORE, 33853440, 35745791,
+STORE, 35745792, 35753983,
+ERASE, 35745792, 35753983,
+STORE, 140249711955968, 140249721602047,
+STORE, 140249711947776, 140249721602047,
+STORE, 140249710899200, 140249721602047,
+STORE, 140249710866432, 140249721602047,
+STORE, 140249710600192, 140249721602047,
+SNULL, 140249744424959, 140249744560127,
+STORE, 140249744207872, 140249744424959,
+STORE, 140249744424960, 140249744560127,
+ERASE, 140249744424960, 140249744560127,
+STORE, 140249708503040, 140249721602047,
+STORE, 33853440, 35885055,
+STORE, 140249707978752, 140249721602047,
+STORE, 140249705881600, 140249721602047,
+STORE, 33853440, 36036607,
+STORE, 33853440, 36175871,
+STORE, 140249744551936, 140249744560127,
+STORE, 140249744543744, 140249744560127,
+STORE, 140249744535552, 140249744560127,
+STORE, 140249744527360, 140249744560127,
+STORE, 140249744519168, 140249744560127,
+STORE, 140249705619456, 140249721602047,
+STORE, 140249744510976, 140249744560127,
+STORE, 140249744502784, 140249744560127,
+STORE, 140249744494592, 140249744560127,
+STORE, 140249744486400, 140249744560127,
+STORE, 140249744478208, 140249744560127,
+STORE, 140249744470016, 140249744560127,
+STORE, 140249744461824, 140249744560127,
+STORE, 140249744453632, 140249744560127,
+STORE, 140249744445440, 140249744560127,
+STORE, 140249744437248, 140249744560127,
+STORE, 140249744429056, 140249744560127,
+STORE, 140249703522304, 140249721602047,
+STORE, 33853440, 36311039,
+STORE, 140249703489536, 140249721602047,
+STORE, 33853440, 36474879,
+STORE, 140249703456768, 140249721602047,
+STORE, 33853440, 36622335,
+STORE, 140249703424000, 140249721602047,
+STORE, 140249703391232, 140249721602047,
+STORE, 33853440, 36810751,
+STORE, 140249703358464, 140249721602047,
+STORE, 140249703325696, 140249721602047,
+SNULL, 36655103, 36810751,
+STORE, 33853440, 36655103,
+STORE, 36655104, 36810751,
+ERASE, 36655104, 36810751,
+SNULL, 36438015, 36655103,
+STORE, 33853440, 36438015,
+STORE, 36438016, 36655103,
+ERASE, 36438016, 36655103,
+STORE, 140249703317504, 140249721602047,
+STORE, 140249701220352, 140249721602047,
+STORE, 33853440, 36585471,
+STORE, 33853440, 36782079,
+STORE, 140249701212160, 140249721602047,
+STORE, 140249701203968, 140249721602047,
+STORE, 140249701195776, 140249721602047,
+STORE, 140249701187584, 140249721602047,
+STORE, 140249701179392, 140249721602047,
+STORE, 140249701171200, 140249721602047,
+STORE, 140249701163008, 140249721602047,
+STORE, 140249701154816, 140249721602047,
+STORE, 140249701146624, 140249721602047,
+STORE, 140249701138432, 140249721602047,
+STORE, 140249701130240, 140249721602047,
+STORE, 140249700081664, 140249721602047,
+STORE, 140249700073472, 140249721602047,
+STORE, 33853440, 36978687,
+STORE, 140249697976320, 140249721602047,
+STORE, 33853440, 37240831,
+STORE, 140249695879168, 140249721602047,
+STORE, 140249695870976, 140249721602047,
+STORE, 140249695862784, 140249721602047,
+STORE, 140249695854592, 140249721602047,
+STORE, 140249695326208, 140249721602047,
+SNULL, 140249710600191, 140249721602047,
+STORE, 140249695326208, 140249710600191,
+STORE, 140249710600192, 140249721602047,
+SNULL, 140249710600192, 140249710866431,
+STORE, 140249710866432, 140249721602047,
+STORE, 140249710600192, 140249710866431,
+ERASE, 140249710600192, 140249710866431,
+STORE, 140249691131904, 140249710600191,
+STORE, 33853440, 37474303,
+STORE, 140249710858240, 140249721602047,
+STORE, 140249710850048, 140249721602047,
+STORE, 140249710841856, 140249721602047,
+STORE, 140249710833664, 140249721602047,
+STORE, 140249710825472, 140249721602047,
+STORE, 140249710817280, 140249721602047,
+STORE, 140249710809088, 140249721602047,
+STORE, 140249710800896, 140249721602047,
+STORE, 140249710792704, 140249721602047,
+STORE, 140249710784512, 140249721602047,
+STORE, 140249710776320, 140249721602047,
+STORE, 140249710768128, 140249721602047,
+STORE, 140249710759936, 140249721602047,
+STORE, 140249710751744, 140249721602047,
+STORE, 140249710743552, 140249721602047,
+STORE, 140249710735360, 140249721602047,
+STORE, 140249689034752, 140249710600191,
+STORE, 140249710727168, 140249721602047,
+STORE, 140249686937600, 140249710600191,
+STORE, 33853440, 37867519,
+STORE, 140249684840448, 140249710600191,
+STORE, 140249710718976, 140249721602047,
+STORE, 140249682743296, 140249710600191,
+STORE, 140249710710784, 140249721602047,
+STORE, 140249710702592, 140249721602047,
+STORE, 140249710694400, 140249721602047,
+STORE, 140249710686208, 140249721602047,
+STORE, 140249710678016, 140249721602047,
+STORE, 140249682612224, 140249710600191,
+STORE, 140249682087936, 140249710600191,
+SNULL, 140249705619455, 140249710600191,
+STORE, 140249682087936, 140249705619455,
+STORE, 140249705619456, 140249710600191,
+SNULL, 140249705619456, 140249705881599,
+STORE, 140249705881600, 140249710600191,
+STORE, 140249705619456, 140249705881599,
+ERASE, 140249705619456, 140249705881599,
+STORE, 140249679990784, 140249705619455,
+STORE, 140249710669824, 140249721602047,
+STORE, 140249677893632, 140249705619455,
+STORE, 140249710653440, 140249721602047,
+STORE, 140249710645248, 140249721602047,
+STORE, 140249710637056, 140249721602047,
+STORE, 140249710628864, 140249721602047,
+STORE, 140249710620672, 140249721602047,
+STORE, 140249710612480, 140249721602047,
+STORE, 140249710604288, 140249721602047,
+STORE, 140249705873408, 140249710600191,
+STORE, 140249705865216, 140249710600191,
+STORE, 140249705857024, 140249710600191,
+STORE, 140249705848832, 140249710600191,
+STORE, 140249705840640, 140249710600191,
+STORE, 140249705832448, 140249710600191,
+STORE, 140249705824256, 140249710600191,
+STORE, 140249705816064, 140249710600191,
+STORE, 140249705807872, 140249710600191,
+STORE, 140249705799680, 140249710600191,
+STORE, 33853440, 38129663,
+SNULL, 140249744207872, 140249744367615,
+STORE, 140249744367616, 140249744424959,
+STORE, 140249744207872, 140249744367615,
+ERASE, 140249744207872, 140249744367615,
+STORE, 140249677606912, 140249705619455,
+STORE, 140249675509760, 140249705619455,
+SNULL, 140249677606911, 140249705619455,
+STORE, 140249675509760, 140249677606911,
+STORE, 140249677606912, 140249705619455,
+SNULL, 140249677606912, 140249677893631,
+STORE, 140249677893632, 140249705619455,
+STORE, 140249677606912, 140249677893631,
+ERASE, 140249677606912, 140249677893631,
+STORE, 140249744359424, 140249744424959,
+STORE, 33853440, 38391807,
+STORE, 140249674981376, 140249677606911,
+STORE, 140249672884224, 140249677606911,
+SNULL, 140249719230463, 140249721602047,
+STORE, 140249710604288, 140249719230463,
+STORE, 140249719230464, 140249721602047,
+SNULL, 140249719230464, 140249719504895,
+STORE, 140249719504896, 140249721602047,
+STORE, 140249719230464, 140249719504895,
+ERASE, 140249719230464, 140249719504895,
+STORE, 140249744351232, 140249744424959,
+STORE, 140249744343040, 140249744424959,
+STORE, 140249744334848, 140249744424959,
+STORE, 140249744326656, 140249744424959,
+STORE, 140249744310272, 140249744424959,
+STORE, 140249744302080, 140249744424959,
+STORE, 140249744285696, 140249744424959,
+STORE, 140249744277504, 140249744424959,
+STORE, 140249744261120, 140249744424959,
+STORE, 140249744252928, 140249744424959,
+STORE, 140249744220160, 140249744424959,
+STORE, 140249744211968, 140249744424959,
+STORE, 140249719488512, 140249721602047,
+STORE, 140249744203776, 140249744424959,
+STORE, 140249719472128, 140249721602047,
+STORE, 140249719463936, 140249721602047,
+STORE, 140249719447552, 140249721602047,
+STORE, 140249719439360, 140249721602047,
+STORE, 140249719406592, 140249721602047,
+STORE, 140249719398400, 140249721602047,
+STORE, 140249719382016, 140249721602047,
+STORE, 140249719373824, 140249721602047,
+STORE, 140249719357440, 140249721602047,
+STORE, 140249719349248, 140249721602047,
+STORE, 140249719332864, 140249721602047,
+STORE, 140249719324672, 140249721602047,
+STORE, 140249719291904, 140249721602047,
+STORE, 140249719283712, 140249721602047,
+STORE, 140249719267328, 140249721602047,
+STORE, 140249719259136, 140249721602047,
+STORE, 140249719242752, 140249721602047,
+STORE, 140249719234560, 140249721602047,
+STORE, 140249705783296, 140249710600191,
+STORE, 140249705775104, 140249710600191,
+STORE, 140249705742336, 140249710600191,
+STORE, 140249705734144, 140249710600191,
+STORE, 140249705717760, 140249710600191,
+STORE, 140249670787072, 140249677606911,
+STORE, 140249705709568, 140249710600191,
+STORE, 140249705693184, 140249710600191,
+STORE, 140249705684992, 140249710600191,
+STORE, 140249705668608, 140249710600191,
+STORE, 140249705660416, 140249710600191,
+STORE, 140249705627648, 140249710600191,
+STORE, 140249677893632, 140249710600191,
+STORE, 140249677877248, 140249710600191,
+STORE, 140249677869056, 140249710600191,
+STORE, 140249677852672, 140249710600191,
+STORE, 140249677844480, 140249710600191,
+STORE, 140249677828096, 140249710600191,
+STORE, 140249668689920, 140249677606911,
+STORE, 140249677819904, 140249710600191,
+STORE, 140249677787136, 140249710600191,
+STORE, 140249677778944, 140249710600191,
+STORE, 140249677762560, 140249710600191,
+STORE, 140249677754368, 140249710600191,
+STORE, 140249677737984, 140249710600191,
+STORE, 140249677729792, 140249710600191,
+STORE, 140249677713408, 140249710600191,
+STORE, 140249677705216, 140249710600191,
+STORE, 140249677672448, 140249710600191,
+STORE, 140249677664256, 140249710600191,
+STORE, 140249677647872, 140249710600191,
+STORE, 140249677639680, 140249710600191,
+STORE, 140249677623296, 140249710600191,
+STORE, 140249677615104, 140249710600191,
+STORE, 140249668673536, 140249677606911,
+STORE, 140249668673536, 140249710600191,
+STORE, 140249668640768, 140249710600191,
+STORE, 140249668632576, 140249710600191,
+STORE, 140249668616192, 140249710600191,
+STORE, 140249668608000, 140249710600191,
+STORE, 140249668591616, 140249710600191,
+STORE, 140249668583424, 140249710600191,
+STORE, 140249668567040, 140249710600191,
+STORE, 140249668558848, 140249710600191,
+STORE, 140249668526080, 140249710600191,
+STORE, 140249668517888, 140249710600191,
+STORE, 140249668501504, 140249710600191,
+STORE, 140249668493312, 140249710600191,
+STORE, 140249668476928, 140249710600191,
+STORE, 140249668468736, 140249710600191,
+STORE, 140249668452352, 140249710600191,
+STORE, 140249668444160, 140249710600191,
+STORE, 140249668411392, 140249710600191,
+STORE, 140249668403200, 140249710600191,
+STORE, 140249668386816, 140249710600191,
+STORE, 140249668378624, 140249710600191,
+STORE, 140249668362240, 140249710600191,
+STORE, 140249668354048, 140249710600191,
+STORE, 140249668337664, 140249710600191,
+STORE, 140249668329472, 140249710600191,
+STORE, 140249668296704, 140249710600191,
+STORE, 140249668288512, 140249710600191,
+STORE, 140249668272128, 140249710600191,
+STORE, 140249668263936, 140249710600191,
+STORE, 140249668247552, 140249710600191,
+STORE, 140249668239360, 140249710600191,
+STORE, 140249668222976, 140249710600191,
+STORE, 140249668214784, 140249710600191,
+STORE, 140249668182016, 140249710600191,
+STORE, 140249668173824, 140249710600191,
+STORE, 140249668157440, 140249710600191,
+STORE, 140249668149248, 140249710600191,
+STORE, 140249668132864, 140249710600191,
+STORE, 140249668124672, 140249710600191,
+STORE, 140249668108288, 140249710600191,
+STORE, 140249668100096, 140249710600191,
+STORE, 140249668067328, 140249710600191,
+STORE, 140249668059136, 140249710600191,
+STORE, 140249668042752, 140249710600191,
+STORE, 140249668034560, 140249710600191,
+STORE, 140249668018176, 140249710600191,
+STORE, 140249668009984, 140249710600191,
+STORE, 140249667993600, 140249710600191,
+STORE, 140249667985408, 140249710600191,
+STORE, 140249667952640, 140249710600191,
+STORE, 140249667944448, 140249710600191,
+STORE, 140249667928064, 140249710600191,
+STORE, 140249667919872, 140249710600191,
+STORE, 140249667903488, 140249710600191,
+STORE, 140249667895296, 140249710600191,
+STORE, 140249667878912, 140249710600191,
+STORE, 140249667870720, 140249710600191,
+STORE, 140249667837952, 140249710600191,
+STORE, 140249667829760, 140249710600191,
+STORE, 140249667813376, 140249710600191,
+STORE, 140249667805184, 140249710600191,
+STORE, 140249667788800, 140249710600191,
+STORE, 140249667780608, 140249710600191,
+STORE, 140249667764224, 140249710600191,
+STORE, 140249667756032, 140249710600191,
+STORE, 140249667723264, 140249710600191,
+STORE, 140249667715072, 140249710600191,
+STORE, 140249667698688, 140249710600191,
+STORE, 140249667690496, 140249710600191,
+STORE, 140249667674112, 140249710600191,
+STORE, 140249667665920, 140249710600191,
+STORE, 140249667649536, 140249710600191,
+STORE, 140249667641344, 140249710600191,
+STORE, 140249667608576, 140249710600191,
+STORE, 140249667600384, 140249710600191,
+STORE, 140249667584000, 140249710600191,
+STORE, 140249667575808, 140249710600191,
+STORE, 140249667559424, 140249710600191,
+STORE, 140249667551232, 140249710600191,
+STORE, 140249667534848, 140249710600191,
+STORE, 140249667526656, 140249710600191,
+STORE, 140249667493888, 140249710600191,
+STORE, 140249667485696, 140249710600191,
+STORE, 140249667469312, 140249710600191,
+STORE, 140249667461120, 140249710600191,
+STORE, 140249667444736, 140249710600191,
+STORE, 140249667436544, 140249710600191,
+STORE, 140249667420160, 140249710600191,
+STORE, 140249665323008, 140249710600191,
+STORE, 140249665314816, 140249710600191,
+STORE, 140249665282048, 140249710600191,
+STORE, 140249665273856, 140249710600191,
+STORE, 140249665257472, 140249710600191,
+STORE, 140249665249280, 140249710600191,
+STORE, 140249665232896, 140249710600191,
+STORE, 140249665224704, 140249710600191,
+STORE, 140249665208320, 140249710600191,
+STORE, 140249665200128, 140249710600191,
+STORE, 140249665167360, 140249710600191,
+STORE, 140249665159168, 140249710600191,
+STORE, 140249665142784, 140249710600191,
+STORE, 140249665134592, 140249710600191,
+STORE, 140249665118208, 140249710600191,
+STORE, 140249665110016, 140249710600191,
+STORE, 140249665093632, 140249710600191,
+STORE, 140249665085440, 140249710600191,
+STORE, 140249665052672, 140249710600191,
+STORE, 140249665044480, 140249710600191,
+STORE, 140249665028096, 140249710600191,
+STORE, 140249665019904, 140249710600191,
+STORE, 140249665003520, 140249710600191,
+STORE, 140249664995328, 140249710600191,
+STORE, 140249664978944, 140249710600191,
+STORE, 140249664970752, 140249710600191,
+STORE, 140249664937984, 140249710600191,
+STORE, 140249664929792, 140249710600191,
+STORE, 140249664913408, 140249710600191,
+STORE, 140249664905216, 140249710600191,
+STORE, 140249664888832, 140249710600191,
+STORE, 140249664880640, 140249710600191,
+STORE, 140249664864256, 140249710600191,
+STORE, 140249664856064, 140249710600191,
+STORE, 140249664823296, 140249710600191,
+STORE, 140249664815104, 140249710600191,
+STORE, 140249664798720, 140249710600191,
+STORE, 140249664790528, 140249710600191,
+STORE, 140249664774144, 140249710600191,
+STORE, 140249664765952, 140249710600191,
+STORE, 140249664749568, 140249710600191,
+STORE, 140249664741376, 140249710600191,
+STORE, 140249664708608, 140249710600191,
+STORE, 140249664700416, 140249710600191,
+STORE, 140249664684032, 140249710600191,
+STORE, 140249664675840, 140249710600191,
+STORE, 140249664659456, 140249710600191,
+STORE, 140249664651264, 140249710600191,
+STORE, 140249664634880, 140249710600191,
+STORE, 140249664626688, 140249710600191,
+STORE, 140249664593920, 140249710600191,
+STORE, 140249664585728, 140249710600191,
+STORE, 140249664569344, 140249710600191,
+STORE, 140249664561152, 140249710600191,
+STORE, 140249664544768, 140249710600191,
+STORE, 140249664536576, 140249710600191,
+STORE, 140249664520192, 140249710600191,
+STORE, 140249664512000, 140249710600191,
+STORE, 140249664479232, 140249710600191,
+STORE, 140249664471040, 140249710600191,
+STORE, 140249664454656, 140249710600191,
+STORE, 140249664446464, 140249710600191,
+STORE, 140249664430080, 140249710600191,
+STORE, 140249664421888, 140249710600191,
+STORE, 140249664405504, 140249710600191,
+STORE, 140249664397312, 140249710600191,
+STORE, 140249664364544, 140249710600191,
+STORE, 140249664356352, 140249710600191,
+STORE, 140249664339968, 140249710600191,
+STORE, 140249664331776, 140249710600191,
+STORE, 140249664315392, 140249710600191,
+STORE, 140249664307200, 140249710600191,
+STORE, 140249664290816, 140249710600191,
+STORE, 140249664282624, 140249710600191,
+STORE, 140249664249856, 140249710600191,
+STORE, 140249664241664, 140249710600191,
+STORE, 140249664225280, 140249710600191,
+STORE, 140249664217088, 140249710600191,
+STORE, 140249664200704, 140249710600191,
+STORE, 140249664192512, 140249710600191,
+STORE, 140249664176128, 140249710600191,
+STORE, 140249664167936, 140249710600191,
+STORE, 140249664135168, 140249710600191,
+STORE, 140249664126976, 140249710600191,
+STORE, 140249664110592, 140249710600191,
+STORE, 140249664102400, 140249710600191,
+STORE, 140249664086016, 140249710600191,
+STORE, 140249664077824, 140249710600191,
+STORE, 140249664061440, 140249710600191,
+STORE, 140249664053248, 140249710600191,
+STORE, 140249664020480, 140249710600191,
+STORE, 140249664012288, 140249710600191,
+STORE, 140249663995904, 140249710600191,
+STORE, 140249663987712, 140249710600191,
+STORE, 140249663971328, 140249710600191,
+STORE, 140249663963136, 140249710600191,
+STORE, 140249663946752, 140249710600191,
+STORE, 140249663938560, 140249710600191,
+STORE, 140249663905792, 140249710600191,
+STORE, 140249663897600, 140249710600191,
+STORE, 140249663881216, 140249710600191,
+STORE, 140249663873024, 140249710600191,
+STORE, 140249663856640, 140249710600191,
+STORE, 140249663848448, 140249710600191,
+STORE, 140249663832064, 140249710600191,
+STORE, 140249663823872, 140249710600191,
+STORE, 140249663791104, 140249710600191,
+STORE, 140249663782912, 140249710600191,
+STORE, 140249663766528, 140249710600191,
+STORE, 140249663758336, 140249710600191,
+STORE, 140249663741952, 140249710600191,
+STORE, 140249663733760, 140249710600191,
+STORE, 140249663717376, 140249710600191,
+STORE, 140249663709184, 140249710600191,
+STORE, 140249663676416, 140249710600191,
+STORE, 140249663668224, 140249710600191,
+STORE, 140249663651840, 140249710600191,
+STORE, 140249663643648, 140249710600191,
+STORE, 140249663627264, 140249710600191,
+STORE, 33853440, 38526975,
+STORE, 140249663619072, 140249710600191,
+STORE, 140249663602688, 140249710600191,
+STORE, 140249661505536, 140249710600191,
+STORE, 140249661497344, 140249710600191,
+STORE, 140249661464576, 140249710600191,
+STORE, 140249661456384, 140249710600191,
+STORE, 140249661440000, 140249710600191,
+STORE, 140249661431808, 140249710600191,
+STORE, 140249661415424, 140249710600191,
+STORE, 140249661407232, 140249710600191,
+STORE, 140249661390848, 140249710600191,
+STORE, 140249661382656, 140249710600191,
+STORE, 140249661349888, 140249710600191,
+STORE, 140249661341696, 140249710600191,
+STORE, 140249661325312, 140249710600191,
+STORE, 140249661317120, 140249710600191,
+STORE, 140249661300736, 140249710600191,
+STORE, 140249661292544, 140249710600191,
+STORE, 140249661276160, 140249710600191,
+STORE, 140249661267968, 140249710600191,
+STORE, 140249661235200, 140249710600191,
+STORE, 140249661227008, 140249710600191,
+STORE, 140249661210624, 140249710600191,
+STORE, 140249661202432, 140249710600191,
+STORE, 140249661186048, 140249710600191,
+STORE, 140249661177856, 140249710600191,
+STORE, 140249661161472, 140249710600191,
+STORE, 140249661153280, 140249710600191,
+STORE, 140249661120512, 140249710600191,
+STORE, 140249661112320, 140249710600191,
+STORE, 140249661095936, 140249710600191,
+STORE, 140249661087744, 140249710600191,
+STORE, 140249661071360, 140249710600191,
+STORE, 140249661063168, 140249710600191,
+STORE, 140249661046784, 140249710600191,
+STORE, 140249661038592, 140249710600191,
+STORE, 140249661005824, 140249710600191,
+STORE, 140249660997632, 140249710600191,
+STORE, 140249660981248, 140249710600191,
+STORE, 140249660973056, 140249710600191,
+STORE, 140249660956672, 140249710600191,
+STORE, 140249660948480, 140249710600191,
+STORE, 140249660932096, 140249710600191,
+STORE, 140249660923904, 140249710600191,
+STORE, 140249660891136, 140249710600191,
+STORE, 140249660882944, 140249710600191,
+STORE, 140249660866560, 140249710600191,
+STORE, 140249660858368, 140249710600191,
+STORE, 140249660841984, 140249710600191,
+STORE, 140249660833792, 140249710600191,
+STORE, 140249660817408, 140249710600191,
+STORE, 140249660809216, 140249710600191,
+STORE, 140249660776448, 140249710600191,
+STORE, 140249660768256, 140249710600191,
+STORE, 140249660751872, 140249710600191,
+STORE, 140249660743680, 140249710600191,
+STORE, 140249660727296, 140249710600191,
+STORE, 140249660719104, 140249710600191,
+STORE, 140249660702720, 140249710600191,
+STORE, 140249660694528, 140249710600191,
+STORE, 140249660661760, 140249710600191,
+STORE, 140249660653568, 140249710600191,
+STORE, 140249660637184, 140249710600191,
+STORE, 140249660628992, 140249710600191,
+STORE, 140249660612608, 140249710600191,
+STORE, 140249660604416, 140249710600191,
+STORE, 140249660588032, 140249710600191,
+STORE, 140249660579840, 140249710600191,
+STORE, 140249660547072, 140249710600191,
+STORE, 140249660538880, 140249710600191,
+STORE, 140249660522496, 140249710600191,
+STORE, 140249660514304, 140249710600191,
+STORE, 140249660497920, 140249710600191,
+STORE, 140249660489728, 140249710600191,
+STORE, 140249660473344, 140249710600191,
+STORE, 140249660465152, 140249710600191,
+STORE, 140249660432384, 140249710600191,
+STORE, 140249660424192, 140249710600191,
+STORE, 140249660407808, 140249710600191,
+STORE, 140249660399616, 140249710600191,
+STORE, 140249660383232, 140249710600191,
+STORE, 140249660375040, 140249710600191,
+STORE, 140249660358656, 140249710600191,
+STORE, 140249660350464, 140249710600191,
+STORE, 140249660317696, 140249710600191,
+STORE, 140249660309504, 140249710600191,
+STORE, 140249660293120, 140249710600191,
+STORE, 140249660284928, 140249710600191,
+STORE, 140249660268544, 140249710600191,
+STORE, 140249660260352, 140249710600191,
+STORE, 140249660243968, 140249710600191,
+STORE, 140249660235776, 140249710600191,
+STORE, 140249660203008, 140249710600191,
+STORE, 140249660194816, 140249710600191,
+STORE, 140249660178432, 140249710600191,
+STORE, 140249660170240, 140249710600191,
+STORE, 140249660153856, 140249710600191,
+STORE, 140249660145664, 140249710600191,
+STORE, 140249660129280, 140249710600191,
+STORE, 140249660121088, 140249710600191,
+STORE, 140249660088320, 140249710600191,
+STORE, 140249660080128, 140249710600191,
+STORE, 140249660063744, 140249710600191,
+STORE, 140249660055552, 140249710600191,
+STORE, 140249660039168, 140249710600191,
+STORE, 140249660030976, 140249710600191,
+STORE, 140249660014592, 140249710600191,
+STORE, 140249660006400, 140249710600191,
+STORE, 140249659973632, 140249710600191,
+STORE, 140249659965440, 140249710600191,
+STORE, 140249659949056, 140249710600191,
+STORE, 140249659940864, 140249710600191,
+STORE, 140249659924480, 140249710600191,
+STORE, 140249659916288, 140249710600191,
+STORE, 140249659899904, 140249710600191,
+STORE, 140249659891712, 140249710600191,
+STORE, 140249659858944, 140249710600191,
+STORE, 140249659850752, 140249710600191,
+STORE, 140249659834368, 140249710600191,
+STORE, 140249659826176, 140249710600191,
+STORE, 140249659809792, 140249710600191,
+STORE, 140249659801600, 140249710600191,
+STORE, 140249659785216, 140249710600191,
+STORE, 140249657688064, 140249710600191,
+STORE, 140249657679872, 140249710600191,
+STORE, 140249657647104, 140249710600191,
+STORE, 140249657638912, 140249710600191,
+STORE, 140249657622528, 140249710600191,
+STORE, 140249657614336, 140249710600191,
+STORE, 140249657597952, 140249710600191,
+STORE, 140249657589760, 140249710600191,
+STORE, 140249657573376, 140249710600191,
+STORE, 140249657565184, 140249710600191,
+STORE, 140249657532416, 140249710600191,
+STORE, 140249657524224, 140249710600191,
+STORE, 140249657507840, 140249710600191,
+STORE, 140249657499648, 140249710600191,
+STORE, 140249657483264, 140249710600191,
+STORE, 140249657475072, 140249710600191,
+STORE, 140249657458688, 140249710600191,
+STORE, 140249657450496, 140249710600191,
+STORE, 140249657417728, 140249710600191,
+STORE, 140249657409536, 140249710600191,
+STORE, 140249657393152, 140249710600191,
+STORE, 140249657384960, 140249710600191,
+STORE, 140249657368576, 140249710600191,
+STORE, 140249657360384, 140249710600191,
+STORE, 140249657344000, 140249710600191,
+STORE, 140249657335808, 140249710600191,
+STORE, 140249657303040, 140249710600191,
+STORE, 140249657294848, 140249710600191,
+STORE, 140249657278464, 140249710600191,
+STORE, 140249657270272, 140249710600191,
+STORE, 140249657253888, 140249710600191,
+STORE, 140249657245696, 140249710600191,
+STORE, 140249657229312, 140249710600191,
+STORE, 140249657221120, 140249710600191,
+STORE, 140249657188352, 140249710600191,
+STORE, 140249657180160, 140249710600191,
+STORE, 140249657163776, 140249710600191,
+STORE, 140249657155584, 140249710600191,
+STORE, 140249657139200, 140249710600191,
+STORE, 140249657131008, 140249710600191,
+STORE, 140249657114624, 140249710600191,
+STORE, 140249657106432, 140249710600191,
+STORE, 140249657073664, 140249710600191,
+STORE, 140249657065472, 140249710600191,
+STORE, 140249657049088, 140249710600191,
+STORE, 140249657040896, 140249710600191,
+STORE, 140249657024512, 140249710600191,
+STORE, 140249657016320, 140249710600191,
+STORE, 140249656999936, 140249710600191,
+STORE, 140249656991744, 140249710600191,
+STORE, 140249656958976, 140249710600191,
+STORE, 140249656950784, 140249710600191,
+STORE, 140249656934400, 140249710600191,
+STORE, 140249656926208, 140249710600191,
+STORE, 140249656909824, 140249710600191,
+STORE, 140249656901632, 140249710600191,
+STORE, 140249656885248, 140249710600191,
+STORE, 140249656877056, 140249710600191,
+STORE, 140249656844288, 140249710600191,
+STORE, 140249656836096, 140249710600191,
+STORE, 140249656819712, 140249710600191,
+STORE, 140249656811520, 140249710600191,
+STORE, 140249656795136, 140249710600191,
+STORE, 33853440, 38662143,
+STORE, 140249656786944, 140249710600191,
+STORE, 140249656770560, 140249710600191,
+STORE, 140249656762368, 140249710600191,
+STORE, 140249656729600, 140249710600191,
+STORE, 140249656721408, 140249710600191,
+STORE, 140249656705024, 140249710600191,
+STORE, 140249656696832, 140249710600191,
+STORE, 140249656680448, 140249710600191,
+STORE, 140249656672256, 140249710600191,
+STORE, 140249656655872, 140249710600191,
+STORE, 140249656647680, 140249710600191,
+STORE, 140249656614912, 140249710600191,
+STORE, 140249656606720, 140249710600191,
+STORE, 140249656590336, 140249710600191,
+STORE, 140249656582144, 140249710600191,
+STORE, 140249656565760, 140249710600191,
+STORE, 140249656557568, 140249710600191,
+STORE, 140249656541184, 140249710600191,
+STORE, 140249656532992, 140249710600191,
+STORE, 140249656500224, 140249710600191,
+STORE, 140249656492032, 140249710600191,
+STORE, 140249656475648, 140249710600191,
+STORE, 140249656467456, 140249710600191,
+STORE, 140249656451072, 140249710600191,
+STORE, 140249656442880, 140249710600191,
+STORE, 140249656426496, 140249710600191,
+STORE, 140249656418304, 140249710600191,
+STORE, 140249656385536, 140249710600191,
+STORE, 140249656377344, 140249710600191,
+STORE, 140249656360960, 140249710600191,
+STORE, 140249656352768, 140249710600191,
+STORE, 140249656336384, 140249710600191,
+STORE, 140249656328192, 140249710600191,
+STORE, 140249656311808, 140249710600191,
+STORE, 140249656303616, 140249710600191,
+STORE, 140249656270848, 140249710600191,
+STORE, 140249656262656, 140249710600191,
+STORE, 140249656246272, 140249710600191,
+STORE, 140249656238080, 140249710600191,
+STORE, 140249656221696, 140249710600191,
+STORE, 140249656213504, 140249710600191,
+STORE, 140249656197120, 140249710600191,
+STORE, 140249656188928, 140249710600191,
+STORE, 140249656156160, 140249710600191,
+STORE, 140249656147968, 140249710600191,
+STORE, 140249656131584, 140249710600191,
+STORE, 140249656123392, 140249710600191,
+STORE, 140249656107008, 140249710600191,
+STORE, 140249656098816, 140249710600191,
+STORE, 140249656082432, 140249710600191,
+STORE, 140249656074240, 140249710600191,
+STORE, 140249656041472, 140249710600191,
+STORE, 140249656033280, 140249710600191,
+STORE, 140249656016896, 140249710600191,
+STORE, 140249656008704, 140249710600191,
+STORE, 140249655992320, 140249710600191,
+STORE, 140249655984128, 140249710600191,
+STORE, 140249655967744, 140249710600191,
+STORE, 140249653870592, 140249710600191,
+STORE, 140249653862400, 140249710600191,
+STORE, 140249653829632, 140249710600191,
+STORE, 140249653821440, 140249710600191,
+STORE, 140249653805056, 140249710600191,
+STORE, 140249653796864, 140249710600191,
+STORE, 140249653780480, 140249710600191,
+STORE, 140249653772288, 140249710600191,
+STORE, 140249653755904, 140249710600191,
+STORE, 140249652703232, 140249710600191,
+SNULL, 140249682087935, 140249710600191,
+STORE, 140249652703232, 140249682087935,
+STORE, 140249682087936, 140249710600191,
+ };
+
+ static const unsigned long set26[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140729464770560, 140737488351231,
+SNULL, 140729464774655, 140737488351231,
+STORE, 140729464770560, 140729464774655,
+STORE, 140729464639488, 140729464774655,
+STORE, 4194304, 5066751,
+STORE, 7159808, 7172095,
+STORE, 7172096, 7180287,
+STORE, 140729465114624, 140729465118719,
+STORE, 140729465102336, 140729465114623,
+STORE, 30867456, 30875647,
+STORE, 30867456, 31010815,
+STORE, 140109040988160, 140109042671615,
+STORE, 140109040959488, 140109040988159,
+STORE, 140109040943104, 140109040959487,
+ERASE, 140109040943104, 140109040959487,
+STORE, 140109040840704, 140109040959487,
+ERASE, 140109040840704, 140109040959487,
+STORE, 140109040951296, 140109040959487,
+ERASE, 140109040951296, 140109040959487,
+STORE, 140109040955392, 140109040959487,
+ERASE, 140109040955392, 140109040959487,
+ };
+ static const unsigned long set27[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140726128070656, 140737488351231,
+SNULL, 140726128074751, 140737488351231,
+STORE, 140726128070656, 140726128074751,
+STORE, 140726127939584, 140726128074751,
+STORE, 94478497189888, 94478499303423,
+SNULL, 94478497202175, 94478499303423,
+STORE, 94478497189888, 94478497202175,
+STORE, 94478497202176, 94478499303423,
+ERASE, 94478497202176, 94478499303423,
+STORE, 94478499295232, 94478499303423,
+STORE, 140415605723136, 140415607975935,
+SNULL, 140415605866495, 140415607975935,
+STORE, 140415605723136, 140415605866495,
+STORE, 140415605866496, 140415607975935,
+ERASE, 140415605866496, 140415607975935,
+STORE, 140415607963648, 140415607971839,
+STORE, 140415607971840, 140415607975935,
+STORE, 140726130024448, 140726130028543,
+STORE, 140726130012160, 140726130024447,
+STORE, 140415607934976, 140415607963647,
+STORE, 140415607926784, 140415607934975,
+STORE, 140415603245056, 140415605723135,
+SNULL, 140415603245056, 140415603613695,
+STORE, 140415603613696, 140415605723135,
+STORE, 140415603245056, 140415603613695,
+SNULL, 140415605710847, 140415605723135,
+STORE, 140415603613696, 140415605710847,
+STORE, 140415605710848, 140415605723135,
+ERASE, 140415605710848, 140415605723135,
+STORE, 140415605710848, 140415605723135,
+STORE, 140415599370240, 140415603245055,
+SNULL, 140415599370240, 140415601111039,
+STORE, 140415601111040, 140415603245055,
+STORE, 140415599370240, 140415601111039,
+SNULL, 140415603208191, 140415603245055,
+STORE, 140415601111040, 140415603208191,
+STORE, 140415603208192, 140415603245055,
+ERASE, 140415603208192, 140415603245055,
+STORE, 140415603208192, 140415603245055,
+STORE, 140415595692032, 140415599370239,
+SNULL, 140415595692032, 140415597207551,
+STORE, 140415597207552, 140415599370239,
+STORE, 140415595692032, 140415597207551,
+SNULL, 140415599304703, 140415599370239,
+STORE, 140415597207552, 140415599304703,
+STORE, 140415599304704, 140415599370239,
+SNULL, 140415599304704, 140415599353855,
+STORE, 140415599353856, 140415599370239,
+STORE, 140415599304704, 140415599353855,
+ERASE, 140415599304704, 140415599353855,
+STORE, 140415599304704, 140415599353855,
+ERASE, 140415599353856, 140415599370239,
+STORE, 140415599353856, 140415599370239,
+STORE, 140415593500672, 140415595692031,
+SNULL, 140415593500672, 140415593590783,
+STORE, 140415593590784, 140415595692031,
+STORE, 140415593500672, 140415593590783,
+SNULL, 140415595683839, 140415595692031,
+STORE, 140415593590784, 140415595683839,
+STORE, 140415595683840, 140415595692031,
+ERASE, 140415595683840, 140415595692031,
+STORE, 140415595683840, 140415595692031,
+STORE, 140415589703680, 140415593500671,
+SNULL, 140415589703680, 140415591362559,
+STORE, 140415591362560, 140415593500671,
+STORE, 140415589703680, 140415591362559,
+SNULL, 140415593459711, 140415593500671,
+STORE, 140415591362560, 140415593459711,
+STORE, 140415593459712, 140415593500671,
+SNULL, 140415593459712, 140415593484287,
+STORE, 140415593484288, 140415593500671,
+STORE, 140415593459712, 140415593484287,
+ERASE, 140415593459712, 140415593484287,
+STORE, 140415593459712, 140415593484287,
+ERASE, 140415593484288, 140415593500671,
+STORE, 140415593484288, 140415593500671,
+STORE, 140415587590144, 140415589703679,
+SNULL, 140415587590144, 140415587602431,
+STORE, 140415587602432, 140415589703679,
+STORE, 140415587590144, 140415587602431,
+SNULL, 140415589695487, 140415589703679,
+STORE, 140415587602432, 140415589695487,
+STORE, 140415589695488, 140415589703679,
+ERASE, 140415589695488, 140415589703679,
+STORE, 140415589695488, 140415589703679,
+STORE, 140415607918592, 140415607934975,
+STORE, 140415585398784, 140415587590143,
+SNULL, 140415585398784, 140415585480703,
+STORE, 140415585480704, 140415587590143,
+STORE, 140415585398784, 140415585480703,
+SNULL, 140415587573759, 140415587590143,
+STORE, 140415585480704, 140415587573759,
+STORE, 140415587573760, 140415587590143,
+SNULL, 140415587573760, 140415587581951,
+STORE, 140415587581952, 140415587590143,
+STORE, 140415587573760, 140415587581951,
+ERASE, 140415587573760, 140415587581951,
+STORE, 140415587573760, 140415587581951,
+ERASE, 140415587581952, 140415587590143,
+STORE, 140415587581952, 140415587590143,
+STORE, 140415583182848, 140415585398783,
+SNULL, 140415583182848, 140415583281151,
+STORE, 140415583281152, 140415585398783,
+STORE, 140415583182848, 140415583281151,
+SNULL, 140415585374207, 140415585398783,
+STORE, 140415583281152, 140415585374207,
+STORE, 140415585374208, 140415585398783,
+SNULL, 140415585374208, 140415585382399,
+STORE, 140415585382400, 140415585398783,
+STORE, 140415585374208, 140415585382399,
+ERASE, 140415585374208, 140415585382399,
+STORE, 140415585374208, 140415585382399,
+ERASE, 140415585382400, 140415585398783,
+STORE, 140415585382400, 140415585398783,
+STORE, 140415580979200, 140415583182847,
+SNULL, 140415580979200, 140415581081599,
+STORE, 140415581081600, 140415583182847,
+STORE, 140415580979200, 140415581081599,
+SNULL, 140415583174655, 140415583182847,
+STORE, 140415581081600, 140415583174655,
+STORE, 140415583174656, 140415583182847,
+ERASE, 140415583174656, 140415583182847,
+STORE, 140415583174656, 140415583182847,
+STORE, 140415578816512, 140415580979199,
+SNULL, 140415578816512, 140415578877951,
+STORE, 140415578877952, 140415580979199,
+STORE, 140415578816512, 140415578877951,
+SNULL, 140415580971007, 140415580979199,
+STORE, 140415578877952, 140415580971007,
+STORE, 140415580971008, 140415580979199,
+ERASE, 140415580971008, 140415580979199,
+STORE, 140415580971008, 140415580979199,
+STORE, 140415576563712, 140415578816511,
+SNULL, 140415576563712, 140415576715263,
+STORE, 140415576715264, 140415578816511,
+STORE, 140415576563712, 140415576715263,
+SNULL, 140415578808319, 140415578816511,
+STORE, 140415576715264, 140415578808319,
+STORE, 140415578808320, 140415578816511,
+ERASE, 140415578808320, 140415578816511,
+STORE, 140415578808320, 140415578816511,
+STORE, 140415574392832, 140415576563711,
+SNULL, 140415574392832, 140415574462463,
+STORE, 140415574462464, 140415576563711,
+STORE, 140415574392832, 140415574462463,
+SNULL, 140415576555519, 140415576563711,
+STORE, 140415574462464, 140415576555519,
+STORE, 140415576555520, 140415576563711,
+ERASE, 140415576555520, 140415576563711,
+STORE, 140415576555520, 140415576563711,
+STORE, 140415607910400, 140415607934975,
+STORE, 140415571230720, 140415574392831,
+SNULL, 140415571230720, 140415572291583,
+STORE, 140415572291584, 140415574392831,
+STORE, 140415571230720, 140415572291583,
+SNULL, 140415574384639, 140415574392831,
+STORE, 140415572291584, 140415574384639,
+STORE, 140415574384640, 140415574392831,
+ERASE, 140415574384640, 140415574392831,
+STORE, 140415574384640, 140415574392831,
+STORE, 140415607902208, 140415607934975,
+SNULL, 140415593476095, 140415593484287,
+STORE, 140415593459712, 140415593476095,
+STORE, 140415593476096, 140415593484287,
+SNULL, 140415574388735, 140415574392831,
+STORE, 140415574384640, 140415574388735,
+STORE, 140415574388736, 140415574392831,
+SNULL, 140415576559615, 140415576563711,
+STORE, 140415576555520, 140415576559615,
+STORE, 140415576559616, 140415576563711,
+SNULL, 140415589699583, 140415589703679,
+STORE, 140415589695488, 140415589699583,
+STORE, 140415589699584, 140415589703679,
+SNULL, 140415585378303, 140415585382399,
+STORE, 140415585374208, 140415585378303,
+STORE, 140415585378304, 140415585382399,
+SNULL, 140415578812415, 140415578816511,
+STORE, 140415578808320, 140415578812415,
+STORE, 140415578812416, 140415578816511,
+SNULL, 140415580975103, 140415580979199,
+STORE, 140415580971008, 140415580975103,
+STORE, 140415580975104, 140415580979199,
+SNULL, 140415583178751, 140415583182847,
+STORE, 140415583174656, 140415583178751,
+STORE, 140415583178752, 140415583182847,
+SNULL, 140415587577855, 140415587581951,
+STORE, 140415587573760, 140415587577855,
+STORE, 140415587577856, 140415587581951,
+SNULL, 140415595687935, 140415595692031,
+STORE, 140415595683840, 140415595687935,
+STORE, 140415595687936, 140415595692031,
+STORE, 140415607894016, 140415607934975,
+SNULL, 140415599345663, 140415599353855,
+STORE, 140415599304704, 140415599345663,
+STORE, 140415599345664, 140415599353855,
+SNULL, 140415603240959, 140415603245055,
+STORE, 140415603208192, 140415603240959,
+STORE, 140415603240960, 140415603245055,
+SNULL, 140415605719039, 140415605723135,
+STORE, 140415605710848, 140415605719039,
+STORE, 140415605719040, 140415605723135,
+SNULL, 94478499299327, 94478499303423,
+STORE, 94478499295232, 94478499299327,
+STORE, 94478499299328, 94478499303423,
+SNULL, 140415607967743, 140415607971839,
+STORE, 140415607963648, 140415607967743,
+STORE, 140415607967744, 140415607971839,
+ERASE, 140415607934976, 140415607963647,
+STORE, 94478511173632, 94478511378431,
+STORE, 140415606210560, 140415607894015,
+STORE, 140415607934976, 140415607963647,
+STORE, 94478511173632, 94478511513599,
+STORE, 94478511173632, 94478511648767,
+SNULL, 94478511615999, 94478511648767,
+STORE, 94478511173632, 94478511615999,
+STORE, 94478511616000, 94478511648767,
+ERASE, 94478511616000, 94478511648767,
+STORE, 94478511173632, 94478511751167,
+SNULL, 94478511747071, 94478511751167,
+STORE, 94478511173632, 94478511747071,
+STORE, 94478511747072, 94478511751167,
+ERASE, 94478511747072, 94478511751167,
+STORE, 94478511173632, 94478511882239,
+SNULL, 94478511878143, 94478511882239,
+STORE, 94478511173632, 94478511878143,
+STORE, 94478511878144, 94478511882239,
+ERASE, 94478511878144, 94478511882239,
+STORE, 94478511173632, 94478512013311,
+SNULL, 94478512009215, 94478512013311,
+STORE, 94478511173632, 94478512009215,
+STORE, 94478512009216, 94478512013311,
+ERASE, 94478512009216, 94478512013311,
+STORE, 94478511173632, 94478512144383,
+STORE, 94478511173632, 94478512279551,
+STORE, 140415606181888, 140415606210559,
+STORE, 140415569100800, 140415571230719,
+SNULL, 140415569100800, 140415569129471,
+STORE, 140415569129472, 140415571230719,
+STORE, 140415569100800, 140415569129471,
+SNULL, 140415571222527, 140415571230719,
+STORE, 140415569129472, 140415571222527,
+STORE, 140415571222528, 140415571230719,
+ERASE, 140415571222528, 140415571230719,
+STORE, 140415571222528, 140415571230719,
+STORE, 140415566905344, 140415569100799,
+SNULL, 140415566905344, 140415566987263,
+STORE, 140415566987264, 140415569100799,
+STORE, 140415566905344, 140415566987263,
+SNULL, 140415569084415, 140415569100799,
+STORE, 140415566987264, 140415569084415,
+STORE, 140415569084416, 140415569100799,
+SNULL, 140415569084416, 140415569092607,
+STORE, 140415569092608, 140415569100799,
+STORE, 140415569084416, 140415569092607,
+ERASE, 140415569084416, 140415569092607,
+STORE, 140415569084416, 140415569092607,
+ERASE, 140415569092608, 140415569100799,
+STORE, 140415569092608, 140415569100799,
+SNULL, 140415569088511, 140415569092607,
+STORE, 140415569084416, 140415569088511,
+STORE, 140415569088512, 140415569092607,
+SNULL, 140415571226623, 140415571230719,
+STORE, 140415571222528, 140415571226623,
+STORE, 140415571226624, 140415571230719,
+ERASE, 140415606181888, 140415606210559,
+STORE, 140415606181888, 140415606210559,
+STORE, 140415564759040, 140415566905343,
+SNULL, 140415564759040, 140415564804095,
+STORE, 140415564804096, 140415566905343,
+STORE, 140415564759040, 140415564804095,
+SNULL, 140415566897151, 140415566905343,
+STORE, 140415564804096, 140415566897151,
+STORE, 140415566897152, 140415566905343,
+ERASE, 140415566897152, 140415566905343,
+STORE, 140415566897152, 140415566905343,
+STORE, 140415562588160, 140415564759039,
+SNULL, 140415562588160, 140415562629119,
+STORE, 140415562629120, 140415564759039,
+STORE, 140415562588160, 140415562629119,
+SNULL, 140415564726271, 140415564759039,
+STORE, 140415562629120, 140415564726271,
+STORE, 140415564726272, 140415564759039,
+SNULL, 140415564726272, 140415564734463,
+STORE, 140415564734464, 140415564759039,
+STORE, 140415564726272, 140415564734463,
+ERASE, 140415564726272, 140415564734463,
+STORE, 140415564726272, 140415564734463,
+ERASE, 140415564734464, 140415564759039,
+STORE, 140415564734464, 140415564759039,
+SNULL, 140415564730367, 140415564734463,
+STORE, 140415564726272, 140415564730367,
+STORE, 140415564730368, 140415564734463,
+SNULL, 140415566901247, 140415566905343,
+STORE, 140415566897152, 140415566901247,
+STORE, 140415566901248, 140415566905343,
+ERASE, 140415606181888, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415605944320, 140415606210559,
+ERASE, 140415605944320, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 94478511173632, 94478512414719,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 140415606206464, 140415606210559,
+ERASE, 140415606206464, 140415606210559,
+STORE, 94478511173632, 94478512652287,
+STORE, 94478511173632, 94478512787455,
+STORE, 94478511173632, 94478512922623,
+STORE, 94478511173632, 94478513057791,
+STORE, 140415537422336, 140415562588159,
+STORE, 94478511173632, 94478513192959,
+STORE, 94478511173632, 94478513356799,
+STORE, 94478511173632, 94478513491967,
+STORE, 94478511173632, 94478513627135,
+STORE, 94478511173632, 94478513790975,
+STORE, 94478511173632, 94478513926143,
+STORE, 94478511173632, 94478514061311,
+STORE, 94478511173632, 94478514196479,
+STORE, 94478511173632, 94478514331647,
+STORE, 94478511173632, 94478514606079,
+STORE, 94478511173632, 94478514741247,
+STORE, 94478511173632, 94478514876415,
+STORE, 94478511173632, 94478515011583,
+STORE, 94478511173632, 94478515146751,
+STORE, 94478511173632, 94478515281919,
+STORE, 94478511173632, 94478515474431,
+STORE, 94478511173632, 94478515609599,
+STORE, 94478511173632, 94478515744767,
+STORE, 140415536922624, 140415562588159,
+STORE, 94478511173632, 94478515879935,
+STORE, 94478511173632, 94478516015103,
+STORE, 94478511173632, 94478516150271,
+STORE, 94478511173632, 94478516285439,
+STORE, 94478511173632, 94478516420607,
+STORE, 94478511173632, 94478516555775,
+STORE, 94478511173632, 94478516690943,
+STORE, 94478511173632, 94478516826111,
+STORE, 94478511173632, 94478516961279,
+STORE, 94478511173632, 94478517231615,
+STORE, 94478511173632, 94478517366783,
+STORE, 94478511173632, 94478517501951,
+STORE, 94478511173632, 94478517637119,
+STORE, 94478511173632, 94478517772287,
+STORE, 94478511173632, 94478517907455,
+STORE, 94478511173632, 94478518042623,
+STORE, 94478511173632, 94478518177791,
+STORE, 94478511173632, 94478518312959,
+STORE, 94478511173632, 94478518448127,
+STORE, 140415535910912, 140415562588159,
+SNULL, 140415536922623, 140415562588159,
+STORE, 140415535910912, 140415536922623,
+STORE, 140415536922624, 140415562588159,
+SNULL, 140415536922624, 140415537422335,
+STORE, 140415537422336, 140415562588159,
+STORE, 140415536922624, 140415537422335,
+ERASE, 140415536922624, 140415537422335,
+STORE, 94478511173632, 94478518583295,
+STORE, 94478511173632, 94478518718463,
+STORE, 94478511173632, 94478518853631,
+STORE, 94478511173632, 94478518988799,
+STORE, 94478511173632, 94478519123967,
+STORE, 94478511173632, 94478519259135,
+STORE, 140415509696512, 140415535910911,
+ERASE, 140415537422336, 140415562588159,
+STORE, 140415482433536, 140415509696511,
+ };
+ static const unsigned long set28[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140722475622400, 140737488351231,
+SNULL, 140722475626495, 140737488351231,
+STORE, 140722475622400, 140722475626495,
+STORE, 140722475491328, 140722475626495,
+STORE, 93865834291200, 93865836548095,
+SNULL, 93865834422271, 93865836548095,
+STORE, 93865834291200, 93865834422271,
+STORE, 93865834422272, 93865836548095,
+ERASE, 93865834422272, 93865836548095,
+STORE, 93865836519424, 93865836527615,
+STORE, 93865836527616, 93865836548095,
+STORE, 139918411104256, 139918413357055,
+SNULL, 139918411247615, 139918413357055,
+STORE, 139918411104256, 139918411247615,
+STORE, 139918411247616, 139918413357055,
+ERASE, 139918411247616, 139918413357055,
+STORE, 139918413344768, 139918413352959,
+STORE, 139918413352960, 139918413357055,
+STORE, 140722476642304, 140722476646399,
+STORE, 140722476630016, 140722476642303,
+STORE, 139918413316096, 139918413344767,
+STORE, 139918413307904, 139918413316095,
+STORE, 139918408888320, 139918411104255,
+SNULL, 139918408888320, 139918408986623,
+STORE, 139918408986624, 139918411104255,
+STORE, 139918408888320, 139918408986623,
+SNULL, 139918411079679, 139918411104255,
+STORE, 139918408986624, 139918411079679,
+STORE, 139918411079680, 139918411104255,
+SNULL, 139918411079680, 139918411087871,
+STORE, 139918411087872, 139918411104255,
+STORE, 139918411079680, 139918411087871,
+ERASE, 139918411079680, 139918411087871,
+STORE, 139918411079680, 139918411087871,
+ERASE, 139918411087872, 139918411104255,
+STORE, 139918411087872, 139918411104255,
+STORE, 139918405091328, 139918408888319,
+SNULL, 139918405091328, 139918406750207,
+STORE, 139918406750208, 139918408888319,
+STORE, 139918405091328, 139918406750207,
+SNULL, 139918408847359, 139918408888319,
+STORE, 139918406750208, 139918408847359,
+STORE, 139918408847360, 139918408888319,
+SNULL, 139918408847360, 139918408871935,
+STORE, 139918408871936, 139918408888319,
+STORE, 139918408847360, 139918408871935,
+ERASE, 139918408847360, 139918408871935,
+STORE, 139918408847360, 139918408871935,
+ERASE, 139918408871936, 139918408888319,
+STORE, 139918408871936, 139918408888319,
+STORE, 139918413299712, 139918413316095,
+SNULL, 139918408863743, 139918408871935,
+STORE, 139918408847360, 139918408863743,
+STORE, 139918408863744, 139918408871935,
+SNULL, 139918411083775, 139918411087871,
+STORE, 139918411079680, 139918411083775,
+STORE, 139918411083776, 139918411087871,
+SNULL, 93865836523519, 93865836527615,
+STORE, 93865836519424, 93865836523519,
+STORE, 93865836523520, 93865836527615,
+SNULL, 139918413348863, 139918413352959,
+STORE, 139918413344768, 139918413348863,
+STORE, 139918413348864, 139918413352959,
+ERASE, 139918413316096, 139918413344767,
+STORE, 93865848528896, 93865848664063,
+ };
+ static const unsigned long set29[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140734467944448, 140737488351231,
+SNULL, 140734467948543, 140737488351231,
+STORE, 140734467944448, 140734467948543,
+STORE, 140734467813376, 140734467948543,
+STORE, 94880407924736, 94880410177535,
+SNULL, 94880408055807, 94880410177535,
+STORE, 94880407924736, 94880408055807,
+STORE, 94880408055808, 94880410177535,
+ERASE, 94880408055808, 94880410177535,
+STORE, 94880410148864, 94880410157055,
+STORE, 94880410157056, 94880410177535,
+STORE, 140143367815168, 140143370067967,
+SNULL, 140143367958527, 140143370067967,
+STORE, 140143367815168, 140143367958527,
+STORE, 140143367958528, 140143370067967,
+ERASE, 140143367958528, 140143370067967,
+STORE, 140143370055680, 140143370063871,
+STORE, 140143370063872, 140143370067967,
+STORE, 140734468329472, 140734468333567,
+STORE, 140734468317184, 140734468329471,
+STORE, 140143370027008, 140143370055679,
+STORE, 140143370018816, 140143370027007,
+STORE, 140143365599232, 140143367815167,
+SNULL, 140143365599232, 140143365697535,
+STORE, 140143365697536, 140143367815167,
+STORE, 140143365599232, 140143365697535,
+SNULL, 140143367790591, 140143367815167,
+STORE, 140143365697536, 140143367790591,
+STORE, 140143367790592, 140143367815167,
+SNULL, 140143367790592, 140143367798783,
+STORE, 140143367798784, 140143367815167,
+STORE, 140143367790592, 140143367798783,
+ERASE, 140143367790592, 140143367798783,
+STORE, 140143367790592, 140143367798783,
+ERASE, 140143367798784, 140143367815167,
+STORE, 140143367798784, 140143367815167,
+STORE, 140143361802240, 140143365599231,
+SNULL, 140143361802240, 140143363461119,
+STORE, 140143363461120, 140143365599231,
+STORE, 140143361802240, 140143363461119,
+SNULL, 140143365558271, 140143365599231,
+STORE, 140143363461120, 140143365558271,
+STORE, 140143365558272, 140143365599231,
+SNULL, 140143365558272, 140143365582847,
+STORE, 140143365582848, 140143365599231,
+STORE, 140143365558272, 140143365582847,
+ERASE, 140143365558272, 140143365582847,
+STORE, 140143365558272, 140143365582847,
+ERASE, 140143365582848, 140143365599231,
+STORE, 140143365582848, 140143365599231,
+STORE, 140143370010624, 140143370027007,
+SNULL, 140143365574655, 140143365582847,
+STORE, 140143365558272, 140143365574655,
+STORE, 140143365574656, 140143365582847,
+SNULL, 140143367794687, 140143367798783,
+STORE, 140143367790592, 140143367794687,
+STORE, 140143367794688, 140143367798783,
+SNULL, 94880410152959, 94880410157055,
+STORE, 94880410148864, 94880410152959,
+STORE, 94880410152960, 94880410157055,
+SNULL, 140143370059775, 140143370063871,
+STORE, 140143370055680, 140143370059775,
+STORE, 140143370059776, 140143370063871,
+ERASE, 140143370027008, 140143370055679,
+STORE, 94880442400768, 94880442535935,
+STORE, 140143353409536, 140143361802239,
+SNULL, 140143353413631, 140143361802239,
+STORE, 140143353409536, 140143353413631,
+STORE, 140143353413632, 140143361802239,
+STORE, 140143345016832, 140143353409535,
+STORE, 140143210799104, 140143345016831,
+SNULL, 140143210799104, 140143239364607,
+STORE, 140143239364608, 140143345016831,
+STORE, 140143210799104, 140143239364607,
+ERASE, 140143210799104, 140143239364607,
+SNULL, 140143306473471, 140143345016831,
+STORE, 140143239364608, 140143306473471,
+STORE, 140143306473472, 140143345016831,
+ERASE, 140143306473472, 140143345016831,
+SNULL, 140143239499775, 140143306473471,
+STORE, 140143239364608, 140143239499775,
+STORE, 140143239499776, 140143306473471,
+SNULL, 140143345020927, 140143353409535,
+STORE, 140143345016832, 140143345020927,
+STORE, 140143345020928, 140143353409535,
+STORE, 140143336624128, 140143345016831,
+SNULL, 140143336628223, 140143345016831,
+STORE, 140143336624128, 140143336628223,
+STORE, 140143336628224, 140143345016831,
+STORE, 140143328231424, 140143336624127,
+SNULL, 140143328235519, 140143336624127,
+STORE, 140143328231424, 140143328235519,
+STORE, 140143328235520, 140143336624127,
+STORE, 140143319838720, 140143328231423,
+SNULL, 140143319842815, 140143328231423,
+STORE, 140143319838720, 140143319842815,
+STORE, 140143319842816, 140143328231423,
+STORE, 140143311446016, 140143319838719,
+STORE, 140143105146880, 140143239364607,
+STORE, 140143096754176, 140143105146879,
+STORE, 140143029645312, 140143096754175,
+ERASE, 140143029645312, 140143096754175,
+STORE, 140142962536448, 140143096754175,
+SNULL, 140142962536448, 140142970929151,
+STORE, 140142970929152, 140143096754175,
+STORE, 140142962536448, 140142970929151,
+ERASE, 140142962536448, 140142970929151,
+STORE, 140142962536448, 140142970929151,
+STORE, 140142828318720, 140142962536447,
+STORE, 140142819926016, 140142828318719,
+SNULL, 140142828318720, 140142836711423,
+STORE, 140142836711424, 140142962536447,
+STORE, 140142828318720, 140142836711423,
+ERASE, 140142828318720, 140142836711423,
+SNULL, 140143172255743, 140143239364607,
+STORE, 140143105146880, 140143172255743,
+STORE, 140143172255744, 140143239364607,
+ERASE, 140143172255744, 140143239364607,
+SNULL, 140143105282047, 140143172255743,
+STORE, 140143105146880, 140143105282047,
+STORE, 140143105282048, 140143172255743,
+SNULL, 140143038038015, 140143096754175,
+STORE, 140142970929152, 140143038038015,
+STORE, 140143038038016, 140143096754175,
+ERASE, 140143038038016, 140143096754175,
+SNULL, 140142971064319, 140143038038015,
+STORE, 140142970929152, 140142971064319,
+STORE, 140142971064320, 140143038038015,
+SNULL, 140142903820287, 140142962536447,
+STORE, 140142836711424, 140142903820287,
+STORE, 140142903820288, 140142962536447,
+ERASE, 140142903820288, 140142962536447,
+SNULL, 140142836846591, 140142903820287,
+STORE, 140142836711424, 140142836846591,
+STORE, 140142836846592, 140142903820287,
+STORE, 140142685708288, 140142819926015,
+SNULL, 140143311450111, 140143319838719,
+STORE, 140143311446016, 140143311450111,
+STORE, 140143311450112, 140143319838719,
+SNULL, 140142962540543, 140142970929151,
+STORE, 140142962536448, 140142962540543,
+STORE, 140142962540544, 140142970929151,
+SNULL, 140142685708288, 140142702493695,
+STORE, 140142702493696, 140142819926015,
+STORE, 140142685708288, 140142702493695,
+ERASE, 140142685708288, 140142702493695,
+SNULL, 140142769602559, 140142819926015,
+STORE, 140142702493696, 140142769602559,
+STORE, 140142769602560, 140142819926015,
+ERASE, 140142769602560, 140142819926015,
+SNULL, 140142702628863, 140142769602559,
+STORE, 140142702493696, 140142702628863,
+STORE, 140142702628864, 140142769602559,
+STORE, 140143230971904, 140143239364607,
+SNULL, 140143230975999, 140143239364607,
+STORE, 140143230971904, 140143230975999,
+STORE, 140143230976000, 140143239364607,
+SNULL, 140143096758271, 140143105146879,
+STORE, 140143096754176, 140143096758271,
+STORE, 140143096758272, 140143105146879,
+STORE, 140143222579200, 140143230971903,
+SNULL, 140143222583295, 140143230971903,
+STORE, 140143222579200, 140143222583295,
+STORE, 140143222583296, 140143230971903,
+STORE, 140143214186496, 140143222579199,
+SNULL, 140142819930111, 140142828318719,
+STORE, 140142819926016, 140142819930111,
+STORE, 140142819930112, 140142828318719,
+STORE, 140143205793792, 140143222579199,
+SNULL, 140143205793792, 140143214186495,
+STORE, 140143214186496, 140143222579199,
+STORE, 140143205793792, 140143214186495,
+SNULL, 140143214190591, 140143222579199,
+STORE, 140143214186496, 140143214190591,
+STORE, 140143214190592, 140143222579199,
+SNULL, 140143205797887, 140143214186495,
+STORE, 140143205793792, 140143205797887,
+STORE, 140143205797888, 140143214186495,
+STORE, 140143197401088, 140143205793791,
+SNULL, 140143197405183, 140143205793791,
+STORE, 140143197401088, 140143197405183,
+STORE, 140143197405184, 140143205793791,
+STORE, 140143189008384, 140143197401087,
+STORE, 140143180615680, 140143197401087,
+STORE, 140143088361472, 140143096754175,
+SNULL, 140143180619775, 140143197401087,
+STORE, 140143180615680, 140143180619775,
+STORE, 140143180619776, 140143197401087,
+SNULL, 140143180619776, 140143189008383,
+STORE, 140143189008384, 140143197401087,
+STORE, 140143180619776, 140143189008383,
+SNULL, 140143189012479, 140143197401087,
+STORE, 140143189008384, 140143189012479,
+STORE, 140143189012480, 140143197401087,
+SNULL, 140143088365567, 140143096754175,
+STORE, 140143088361472, 140143088365567,
+STORE, 140143088365568, 140143096754175,
+STORE, 140143079968768, 140143088361471,
+SNULL, 140143079972863, 140143088361471,
+STORE, 140143079968768, 140143079972863,
+STORE, 140143079972864, 140143088361471,
+STORE, 140143071576064, 140143079968767,
+SNULL, 140143071580159, 140143079968767,
+STORE, 140143071576064, 140143071580159,
+STORE, 140143071580160, 140143079968767,
+STORE, 140143063183360, 140143071576063,
+STORE, 140143054790656, 140143071576063,
+SNULL, 140143054794751, 140143071576063,
+STORE, 140143054790656, 140143054794751,
+STORE, 140143054794752, 140143071576063,
+SNULL, 140143054794752, 140143063183359,
+STORE, 140143063183360, 140143071576063,
+STORE, 140143054794752, 140143063183359,
+SNULL, 140143063187455, 140143071576063,
+STORE, 140143063183360, 140143063187455,
+STORE, 140143063187456, 140143071576063,
+STORE, 140143046397952, 140143054790655,
+STORE, 140142954143744, 140142962536447,
+STORE, 140142945751040, 140142962536447,
+STORE, 140142937358336, 140142962536447,
+STORE, 140142928965632, 140142962536447,
+STORE, 140142568275968, 140142702493695,
+SNULL, 140142635384831, 140142702493695,
+STORE, 140142568275968, 140142635384831,
+STORE, 140142635384832, 140142702493695,
+ERASE, 140142635384832, 140142702493695,
+STORE, 140142920572928, 140142962536447,
+STORE, 140142912180224, 140142962536447,
+STORE, 140142568275968, 140142702493695,
+SNULL, 140142568275968, 140142635384831,
+STORE, 140142635384832, 140142702493695,
+STORE, 140142568275968, 140142635384831,
+SNULL, 140142635519999, 140142702493695,
+STORE, 140142635384832, 140142635519999,
+STORE, 140142635520000, 140142702493695,
+STORE, 140142819930112, 140142836711423,
+STORE, 140142811533312, 140142819926015,
+STORE, 140142434058240, 140142635384831,
+SNULL, 140142501167103, 140142635384831,
+STORE, 140142434058240, 140142501167103,
+STORE, 140142501167104, 140142635384831,
+SNULL, 140142501167104, 140142568275967,
+STORE, 140142568275968, 140142635384831,
+STORE, 140142501167104, 140142568275967,
+ERASE, 140142501167104, 140142568275967,
+STORE, 140142299840512, 140142501167103,
+STORE, 140142803140608, 140142819926015,
+SNULL, 140142366949375, 140142501167103,
+STORE, 140142299840512, 140142366949375,
+STORE, 140142366949376, 140142501167103,
+SNULL, 140142366949376, 140142434058239,
+STORE, 140142434058240, 140142501167103,
+STORE, 140142366949376, 140142434058239,
+ERASE, 140142366949376, 140142434058239,
+STORE, 140142794747904, 140142819926015,
+STORE, 140142786355200, 140142819926015,
+STORE, 140142299840512, 140142501167103,
+STORE, 140142777962496, 140142819926015,
+STORE, 140142559883264, 140142568275967,
+STORE, 140142232731648, 140142501167103,
+STORE, 140142551490560, 140142568275967,
+SNULL, 140142777962496, 140142803140607,
+STORE, 140142803140608, 140142819926015,
+STORE, 140142777962496, 140142803140607,
+SNULL, 140142803144703, 140142819926015,
+STORE, 140142803140608, 140142803144703,
+STORE, 140142803144704, 140142819926015,
+STORE, 140142543097856, 140142568275967,
+STORE, 140142098513920, 140142501167103,
+SNULL, 140142165622783, 140142501167103,
+STORE, 140142098513920, 140142165622783,
+STORE, 140142165622784, 140142501167103,
+SNULL, 140142165622784, 140142232731647,
+STORE, 140142232731648, 140142501167103,
+STORE, 140142165622784, 140142232731647,
+ERASE, 140142165622784, 140142232731647,
+SNULL, 140142568411135, 140142635384831,
+STORE, 140142568275968, 140142568411135,
+STORE, 140142568411136, 140142635384831,
+STORE, 140141964296192, 140142165622783,
+SNULL, 140142912180224, 140142928965631,
+STORE, 140142928965632, 140142962536447,
+STORE, 140142912180224, 140142928965631,
+SNULL, 140142928969727, 140142962536447,
+STORE, 140142928965632, 140142928969727,
+STORE, 140142928969728, 140142962536447,
+STORE, 140141830078464, 140142165622783,
+SNULL, 140142912184319, 140142928965631,
+STORE, 140142912180224, 140142912184319,
+STORE, 140142912184320, 140142928965631,
+SNULL, 140142232731648, 140142434058239,
+STORE, 140142434058240, 140142501167103,
+STORE, 140142232731648, 140142434058239,
+SNULL, 140142434193407, 140142501167103,
+STORE, 140142434058240, 140142434193407,
+STORE, 140142434193408, 140142501167103,
+SNULL, 140142232731648, 140142299840511,
+STORE, 140142299840512, 140142434058239,
+STORE, 140142232731648, 140142299840511,
+SNULL, 140142299975679, 140142434058239,
+STORE, 140142299840512, 140142299975679,
+STORE, 140142299975680, 140142434058239,
+SNULL, 140142928969728, 140142954143743,
+STORE, 140142954143744, 140142962536447,
+STORE, 140142928969728, 140142954143743,
+SNULL, 140142954147839, 140142962536447,
+STORE, 140142954143744, 140142954147839,
+STORE, 140142954147840, 140142962536447,
+STORE, 140141830078464, 140142299840511,
+SNULL, 140142543097856, 140142559883263,
+STORE, 140142559883264, 140142568275967,
+STORE, 140142543097856, 140142559883263,
+SNULL, 140142559887359, 140142568275967,
+STORE, 140142559883264, 140142559887359,
+STORE, 140142559887360, 140142568275967,
+STORE, 140142534705152, 140142559883263,
+SNULL, 140142928969728, 140142945751039,
+STORE, 140142945751040, 140142954143743,
+STORE, 140142928969728, 140142945751039,
+SNULL, 140142945755135, 140142954143743,
+STORE, 140142945751040, 140142945755135,
+STORE, 140142945755136, 140142954143743,
+SNULL, 140142299975680, 140142366949375,
+STORE, 140142366949376, 140142434058239,
+STORE, 140142299975680, 140142366949375,
+SNULL, 140142367084543, 140142434058239,
+STORE, 140142366949376, 140142367084543,
+STORE, 140142367084544, 140142434058239,
+SNULL, 140142928969728, 140142937358335,
+STORE, 140142937358336, 140142945751039,
+STORE, 140142928969728, 140142937358335,
+SNULL, 140142937362431, 140142945751039,
+STORE, 140142937358336, 140142937362431,
+STORE, 140142937362432, 140142945751039,
+SNULL, 140141830078464, 140142232731647,
+STORE, 140142232731648, 140142299840511,
+STORE, 140141830078464, 140142232731647,
+SNULL, 140142232866815, 140142299840511,
+STORE, 140142232731648, 140142232866815,
+STORE, 140142232866816, 140142299840511,
+SNULL, 140142534705152, 140142543097855,
+STORE, 140142543097856, 140142559883263,
+STORE, 140142534705152, 140142543097855,
+SNULL, 140142543101951, 140142559883263,
+STORE, 140142543097856, 140142543101951,
+STORE, 140142543101952, 140142559883263,
+STORE, 140142526312448, 140142543097855,
+STORE, 140142517919744, 140142543097855,
+SNULL, 140141830078464, 140142098513919,
+STORE, 140142098513920, 140142232731647,
+STORE, 140141830078464, 140142098513919,
+SNULL, 140142098649087, 140142232731647,
+STORE, 140142098513920, 140142098649087,
+STORE, 140142098649088, 140142232731647,
+SNULL, 140142031405055, 140142098513919,
+STORE, 140141830078464, 140142031405055,
+STORE, 140142031405056, 140142098513919,
+ERASE, 140142031405056, 140142098513919,
+SNULL, 140141830078464, 140141964296191,
+STORE, 140141964296192, 140142031405055,
+STORE, 140141830078464, 140141964296191,
+SNULL, 140141964431359, 140142031405055,
+STORE, 140141964296192, 140141964431359,
+STORE, 140141964431360, 140142031405055,
+STORE, 140142509527040, 140142543097855,
+SNULL, 140141897187327, 140141964296191,
+STORE, 140141830078464, 140141897187327,
+STORE, 140141897187328, 140141964296191,
+ERASE, 140141897187328, 140141964296191,
+SNULL, 140141830213631, 140141897187327,
+STORE, 140141830078464, 140141830213631,
+STORE, 140141830213632, 140141897187327,
+SNULL, 140142803144704, 140142811533311,
+STORE, 140142811533312, 140142819926015,
+STORE, 140142803144704, 140142811533311,
+SNULL, 140142811537407, 140142819926015,
+STORE, 140142811533312, 140142811537407,
+STORE, 140142811537408, 140142819926015,
+SNULL, 140142098649088, 140142165622783,
+STORE, 140142165622784, 140142232731647,
+STORE, 140142098649088, 140142165622783,
+SNULL, 140142165757951, 140142232731647,
+STORE, 140142165622784, 140142165757951,
+STORE, 140142165757952, 140142232731647,
+STORE, 140142090121216, 140142098513919,
+SNULL, 140142777962496, 140142786355199,
+STORE, 140142786355200, 140142803140607,
+STORE, 140142777962496, 140142786355199,
+SNULL, 140142786359295, 140142803140607,
+STORE, 140142786355200, 140142786359295,
+STORE, 140142786359296, 140142803140607,
+SNULL, 140142509527040, 140142534705151,
+STORE, 140142534705152, 140142543097855,
+STORE, 140142509527040, 140142534705151,
+SNULL, 140142534709247, 140142543097855,
+STORE, 140142534705152, 140142534709247,
+STORE, 140142534709248, 140142543097855,
+STORE, 140142081728512, 140142098513919,
+SNULL, 140142786359296, 140142794747903,
+STORE, 140142794747904, 140142803140607,
+STORE, 140142786359296, 140142794747903,
+SNULL, 140142794751999, 140142803140607,
+STORE, 140142794747904, 140142794751999,
+STORE, 140142794752000, 140142803140607,
+STORE, 140142073335808, 140142098513919,
+SNULL, 140142073339903, 140142098513919,
+STORE, 140142073335808, 140142073339903,
+STORE, 140142073339904, 140142098513919,
+SNULL, 140142543101952, 140142551490559,
+STORE, 140142551490560, 140142559883263,
+STORE, 140142543101952, 140142551490559,
+SNULL, 140142551494655, 140142559883263,
+STORE, 140142551490560, 140142551494655,
+STORE, 140142551494656, 140142559883263,
+SNULL, 140142509527040, 140142517919743,
+STORE, 140142517919744, 140142534705151,
+STORE, 140142509527040, 140142517919743,
+SNULL, 140142517923839, 140142534705151,
+STORE, 140142517919744, 140142517923839,
+STORE, 140142517923840, 140142534705151,
+STORE, 140142064943104, 140142073335807,
+SNULL, 140142073339904, 140142090121215,
+STORE, 140142090121216, 140142098513919,
+STORE, 140142073339904, 140142090121215,
+SNULL, 140142090125311, 140142098513919,
+STORE, 140142090121216, 140142090125311,
+STORE, 140142090125312, 140142098513919,
+STORE, 140142056550400, 140142073335807,
+SNULL, 140142056554495, 140142073335807,
+STORE, 140142056550400, 140142056554495,
+STORE, 140142056554496, 140142073335807,
+STORE, 140142048157696, 140142056550399,
+SNULL, 140142509531135, 140142517919743,
+STORE, 140142509527040, 140142509531135,
+STORE, 140142509531136, 140142517919743,
+SNULL, 140142777966591, 140142786355199,
+STORE, 140142777962496, 140142777966591,
+STORE, 140142777966592, 140142786355199,
+SNULL, 140143046402047, 140143054790655,
+STORE, 140143046397952, 140143046402047,
+STORE, 140143046402048, 140143054790655,
+SNULL, 140142912184320, 140142920572927,
+STORE, 140142920572928, 140142928965631,
+STORE, 140142912184320, 140142920572927,
+SNULL, 140142920577023, 140142928965631,
+STORE, 140142920572928, 140142920577023,
+STORE, 140142920577024, 140142928965631,
+STORE, 140142039764992, 140142056550399,
+STORE, 140141955903488, 140141964296191,
+SNULL, 140142819930112, 140142828318719,
+STORE, 140142828318720, 140142836711423,
+STORE, 140142819930112, 140142828318719,
+SNULL, 140142828322815, 140142836711423,
+STORE, 140142828318720, 140142828322815,
+STORE, 140142828322816, 140142836711423,
+SNULL, 140142517923840, 140142526312447,
+STORE, 140142526312448, 140142534705151,
+STORE, 140142517923840, 140142526312447,
+SNULL, 140142526316543, 140142534705151,
+STORE, 140142526312448, 140142526316543,
+STORE, 140142526316544, 140142534705151,
+STORE, 140141947510784, 140141964296191,
+SNULL, 140142056554496, 140142064943103,
+STORE, 140142064943104, 140142073335807,
+STORE, 140142056554496, 140142064943103,
+SNULL, 140142064947199, 140142073335807,
+STORE, 140142064943104, 140142064947199,
+STORE, 140142064947200, 140142073335807,
+SNULL, 140142073339904, 140142081728511,
+STORE, 140142081728512, 140142090121215,
+STORE, 140142073339904, 140142081728511,
+SNULL, 140142081732607, 140142090121215,
+STORE, 140142081728512, 140142081732607,
+STORE, 140142081732608, 140142090121215,
+STORE, 140141939118080, 140141964296191,
+STORE, 140141930725376, 140141964296191,
+STORE, 140141922332672, 140141964296191,
+STORE, 140141913939968, 140141964296191,
+SNULL, 140141913939968, 140141922332671,
+STORE, 140141922332672, 140141964296191,
+STORE, 140141913939968, 140141922332671,
+SNULL, 140141922336767, 140141964296191,
+STORE, 140141922332672, 140141922336767,
+STORE, 140141922336768, 140141964296191,
+STORE, 140141905547264, 140141922332671,
+SNULL, 140141905551359, 140141922332671,
+STORE, 140141905547264, 140141905551359,
+STORE, 140141905551360, 140141922332671,
+STORE, 140141821685760, 140141830078463,
+STORE, 140141813293056, 140141830078463,
+STORE, 140141804900352, 140141830078463,
+STORE, 140141796507648, 140141830078463,
+SNULL, 140141796511743, 140141830078463,
+STORE, 140141796507648, 140141796511743,
+STORE, 140141796511744, 140141830078463,
+SNULL, 140141922336768, 140141955903487,
+STORE, 140141955903488, 140141964296191,
+STORE, 140141922336768, 140141955903487,
+SNULL, 140141955907583, 140141964296191,
+STORE, 140141955903488, 140141955907583,
+STORE, 140141955907584, 140141964296191,
+STORE, 140141788114944, 140141796507647,
+STORE, 140141779722240, 140141796507647,
+SNULL, 140141779722240, 140141788114943,
+STORE, 140141788114944, 140141796507647,
+STORE, 140141779722240, 140141788114943,
+SNULL, 140141788119039, 140141796507647,
+STORE, 140141788114944, 140141788119039,
+STORE, 140141788119040, 140141796507647,
+SNULL, 140141922336768, 140141947510783,
+STORE, 140141947510784, 140141955903487,
+STORE, 140141922336768, 140141947510783,
+SNULL, 140141947514879, 140141955903487,
+STORE, 140141947510784, 140141947514879,
+STORE, 140141947514880, 140141955903487,
+SNULL, 140142039764992, 140142048157695,
+STORE, 140142048157696, 140142056550399,
+STORE, 140142039764992, 140142048157695,
+SNULL, 140142048161791, 140142056550399,
+STORE, 140142048157696, 140142048161791,
+STORE, 140142048161792, 140142056550399,
+SNULL, 140142039769087, 140142048157695,
+STORE, 140142039764992, 140142039769087,
+STORE, 140142039769088, 140142048157695,
+SNULL, 140141796511744, 140141804900351,
+STORE, 140141804900352, 140141830078463,
+STORE, 140141796511744, 140141804900351,
+SNULL, 140141804904447, 140141830078463,
+STORE, 140141804900352, 140141804904447,
+STORE, 140141804904448, 140141830078463,
+STORE, 140141771329536, 140141788114943,
+STORE, 140141762936832, 140141788114943,
+STORE, 140141754544128, 140141788114943,
+SNULL, 140141804904448, 140141821685759,
+STORE, 140141821685760, 140141830078463,
+STORE, 140141804904448, 140141821685759,
+SNULL, 140141821689855, 140141830078463,
+STORE, 140141821685760, 140141821689855,
+STORE, 140141821689856, 140141830078463,
+SNULL, 140141922336768, 140141939118079,
+STORE, 140141939118080, 140141947510783,
+STORE, 140141922336768, 140141939118079,
+SNULL, 140141939122175, 140141947510783,
+STORE, 140141939118080, 140141939122175,
+STORE, 140141939122176, 140141947510783,
+SNULL, 140141905551360, 140141913939967,
+STORE, 140141913939968, 140141922332671,
+STORE, 140141905551360, 140141913939967,
+SNULL, 140141913944063, 140141922332671,
+STORE, 140141913939968, 140141913944063,
+STORE, 140141913944064, 140141922332671,
+STORE, 140141746151424, 140141788114943,
+STORE, 140141737758720, 140141788114943,
+SNULL, 140141804904448, 140141813293055,
+STORE, 140141813293056, 140141821685759,
+STORE, 140141804904448, 140141813293055,
+SNULL, 140141813297151, 140141821685759,
+STORE, 140141813293056, 140141813297151,
+STORE, 140141813297152, 140141821685759,
+STORE, 140141729366016, 140141788114943,
+STORE, 140141720973312, 140141788114943,
+STORE, 140141712580608, 140141788114943,
+SNULL, 140141712584703, 140141788114943,
+STORE, 140141712580608, 140141712584703,
+STORE, 140141712584704, 140141788114943,
+SNULL, 140141922336768, 140141930725375,
+STORE, 140141930725376, 140141939118079,
+STORE, 140141922336768, 140141930725375,
+SNULL, 140141930729471, 140141939118079,
+STORE, 140141930725376, 140141930729471,
+STORE, 140141930729472, 140141939118079,
+STORE, 140141704187904, 140141712580607,
+SNULL, 140141704191999, 140141712580607,
+STORE, 140141704187904, 140141704191999,
+STORE, 140141704192000, 140141712580607,
+STORE, 140141695795200, 140141704187903,
+STORE, 140141687402496, 140141704187903,
+SNULL, 140141712584704, 140141771329535,
+STORE, 140141771329536, 140141788114943,
+STORE, 140141712584704, 140141771329535,
+SNULL, 140141771333631, 140141788114943,
+STORE, 140141771329536, 140141771333631,
+STORE, 140141771333632, 140141788114943,
+SNULL, 140141771333632, 140141779722239,
+STORE, 140141779722240, 140141788114943,
+STORE, 140141771333632, 140141779722239,
+SNULL, 140141779726335, 140141788114943,
+STORE, 140141779722240, 140141779726335,
+STORE, 140141779726336, 140141788114943,
+STORE, 140141679009792, 140141704187903,
+SNULL, 140141679013887, 140141704187903,
+STORE, 140141679009792, 140141679013887,
+STORE, 140141679013888, 140141704187903,
+STORE, 140141670617088, 140141679009791,
+SNULL, 140141670621183, 140141679009791,
+STORE, 140141670617088, 140141670621183,
+STORE, 140141670621184, 140141679009791,
+STORE, 140141662224384, 140141670617087,
+SNULL, 140141712584704, 140141737758719,
+STORE, 140141737758720, 140141771329535,
+STORE, 140141712584704, 140141737758719,
+SNULL, 140141737762815, 140141771329535,
+STORE, 140141737758720, 140141737762815,
+STORE, 140141737762816, 140141771329535,
+SNULL, 140141712584704, 140141729366015,
+STORE, 140141729366016, 140141737758719,
+STORE, 140141712584704, 140141729366015,
+SNULL, 140141729370111, 140141737758719,
+STORE, 140141729366016, 140141729370111,
+STORE, 140141729370112, 140141737758719,
+SNULL, 140141737762816, 140141746151423,
+STORE, 140141746151424, 140141771329535,
+STORE, 140141737762816, 140141746151423,
+SNULL, 140141746155519, 140141771329535,
+STORE, 140141746151424, 140141746155519,
+STORE, 140141746155520, 140141771329535,
+STORE, 140141653831680, 140141670617087,
+SNULL, 140141746155520, 140141762936831,
+STORE, 140141762936832, 140141771329535,
+STORE, 140141746155520, 140141762936831,
+SNULL, 140141762940927, 140141771329535,
+STORE, 140141762936832, 140141762940927,
+STORE, 140141762940928, 140141771329535,
+STORE, 140141645438976, 140141670617087,
+SNULL, 140141645443071, 140141670617087,
+STORE, 140141645438976, 140141645443071,
+STORE, 140141645443072, 140141670617087,
+SNULL, 140141712584704, 140141720973311,
+STORE, 140141720973312, 140141729366015,
+STORE, 140141712584704, 140141720973311,
+SNULL, 140141720977407, 140141729366015,
+STORE, 140141720973312, 140141720977407,
+STORE, 140141720977408, 140141729366015,
+STORE, 140141637046272, 140141645438975,
+SNULL, 140141637050367, 140141645438975,
+STORE, 140141637046272, 140141637050367,
+STORE, 140141637050368, 140141645438975,
+STORE, 140141628653568, 140141637046271,
+SNULL, 140141628657663, 140141637046271,
+STORE, 140141628653568, 140141628657663,
+STORE, 140141628657664, 140141637046271,
+STORE, 140141620260864, 140141628653567,
+SNULL, 140141679013888, 140141687402495,
+STORE, 140141687402496, 140141704187903,
+STORE, 140141679013888, 140141687402495,
+SNULL, 140141687406591, 140141704187903,
+STORE, 140141687402496, 140141687406591,
+STORE, 140141687406592, 140141704187903,
+SNULL, 140141746155520, 140141754544127,
+STORE, 140141754544128, 140141762936831,
+STORE, 140141746155520, 140141754544127,
+SNULL, 140141754548223, 140141762936831,
+STORE, 140141754544128, 140141754548223,
+STORE, 140141754548224, 140141762936831,
+SNULL, 140141687406592, 140141695795199,
+STORE, 140141695795200, 140141704187903,
+STORE, 140141687406592, 140141695795199,
+SNULL, 140141695799295, 140141704187903,
+STORE, 140141695795200, 140141695799295,
+STORE, 140141695799296, 140141704187903,
+STORE, 140141611868160, 140141628653567,
+SNULL, 140141611872255, 140141628653567,
+STORE, 140141611868160, 140141611872255,
+STORE, 140141611872256, 140141628653567,
+SNULL, 140141645443072, 140141662224383,
+STORE, 140141662224384, 140141670617087,
+STORE, 140141645443072, 140141662224383,
+SNULL, 140141662228479, 140141670617087,
+STORE, 140141662224384, 140141662228479,
+STORE, 140141662228480, 140141670617087,
+STORE, 140141603475456, 140141611868159,
+SNULL, 140141603479551, 140141611868159,
+STORE, 140141603475456, 140141603479551,
+STORE, 140141603479552, 140141611868159,
+STORE, 140141595082752, 140141603475455,
+SNULL, 140141645443072, 140141653831679,
+STORE, 140141653831680, 140141662224383,
+STORE, 140141645443072, 140141653831679,
+SNULL, 140141653835775, 140141662224383,
+STORE, 140141653831680, 140141653835775,
+STORE, 140141653835776, 140141662224383,
+STORE, 140141586690048, 140141603475455,
+SNULL, 140141611872256, 140141620260863,
+STORE, 140141620260864, 140141628653567,
+STORE, 140141611872256, 140141620260863,
+SNULL, 140141620264959, 140141628653567,
+STORE, 140141620260864, 140141620264959,
+STORE, 140141620264960, 140141628653567,
+SNULL, 140141586690048, 140141595082751,
+STORE, 140141595082752, 140141603475455,
+STORE, 140141586690048, 140141595082751,
+SNULL, 140141595086847, 140141603475455,
+STORE, 140141595082752, 140141595086847,
+STORE, 140141595086848, 140141603475455,
+STORE, 140141578297344, 140141595082751,
+SNULL, 140141578301439, 140141595082751,
+STORE, 140141578297344, 140141578301439,
+STORE, 140141578301440, 140141595082751,
+SNULL, 140141578301440, 140141586690047,
+STORE, 140141586690048, 140141595082751,
+STORE, 140141578301440, 140141586690047,
+SNULL, 140141586694143, 140141595082751,
+STORE, 140141586690048, 140141586694143,
+STORE, 140141586694144, 140141595082751,
+STORE, 140143370027008, 140143370055679,
+STORE, 140143309254656, 140143311446015,
+SNULL, 140143309254656, 140143309344767,
+STORE, 140143309344768, 140143311446015,
+STORE, 140143309254656, 140143309344767,
+SNULL, 140143311437823, 140143311446015,
+STORE, 140143309344768, 140143311437823,
+STORE, 140143311437824, 140143311446015,
+ERASE, 140143311437824, 140143311446015,
+STORE, 140143311437824, 140143311446015,
+SNULL, 140143311441919, 140143311446015,
+STORE, 140143311437824, 140143311441919,
+STORE, 140143311441920, 140143311446015,
+ERASE, 140143370027008, 140143370055679,
+ERASE, 140142912180224, 140142912184319,
+ERASE, 140142912184320, 140142920572927,
+ERASE, 140142945751040, 140142945755135,
+ERASE, 140142945755136, 140142954143743,
+ERASE, 140142090121216, 140142090125311,
+ERASE, 140142090125312, 140142098513919,
+ERASE, 140142794747904, 140142794751999,
+ERASE, 140142794752000, 140142803140607,
+ERASE, 140141913939968, 140141913944063,
+ERASE, 140141913944064, 140141922332671,
+ERASE, 140141746151424, 140141746155519,
+ERASE, 140141746155520, 140141754544127,
+ERASE, 140142954143744, 140142954147839,
+ERASE, 140142954147840, 140142962536447,
+ERASE, 140142081728512, 140142081732607,
+ERASE, 140142081732608, 140142090121215,
+ERASE, 140141905547264, 140141905551359,
+ERASE, 140141905551360, 140141913939967,
+ERASE, 140141729366016, 140141729370111,
+ERASE, 140141729370112, 140141737758719,
+ERASE, 140142920572928, 140142920577023,
+ERASE, 140142920577024, 140142928965631,
+ERASE, 140142039764992, 140142039769087,
+ERASE, 140142039769088, 140142048157695,
+ERASE, 140141679009792, 140141679013887,
+ERASE, 140141679013888, 140141687402495,
+ERASE, 140142551490560, 140142551494655,
+ERASE, 140142551494656, 140142559883263,
+ERASE, 140141947510784, 140141947514879,
+ERASE, 140141947514880, 140141955903487,
+ERASE, 140141771329536, 140141771333631,
+ERASE, 140141771333632, 140141779722239,
+ERASE, 140142928965632, 140142928969727,
+ERASE, 140142928969728, 140142937358335,
+ERASE, 140142073335808, 140142073339903,
+ERASE, 140142073339904, 140142081728511,
+ERASE, 140142543097856, 140142543101951,
+ERASE, 140142543101952, 140142551490559,
+ERASE, 140141955903488, 140141955907583,
+ERASE, 140141955907584, 140141964296191,
+ERASE, 140141704187904, 140141704191999,
+ERASE, 140141704192000, 140141712580607,
+ERASE, 140142786355200, 140142786359295,
+ERASE, 140142786359296, 140142794747903,
+ERASE, 140142056550400, 140142056554495,
+ERASE, 140142056554496, 140142064943103,
+ERASE, 140142828318720, 140142828322815,
+ERASE, 140142828322816, 140142836711423,
+ERASE, 140141788114944, 140141788119039,
+ERASE, 140141788119040, 140141796507647,
+ERASE, 140141695795200, 140141695799295,
+ERASE, 140141695799296, 140141704187903,
+ERASE, 140141578297344, 140141578301439,
+ERASE, 140141578301440, 140141586690047,
+ERASE, 140141611868160, 140141611872255,
+ERASE, 140141611872256, 140141620260863,
+ERASE, 140142811533312, 140142811537407,
+ERASE, 140142811537408, 140142819926015,
+ERASE, 140142064943104, 140142064947199,
+ERASE, 140142064947200, 140142073335807,
+ERASE, 140141628653568, 140141628657663,
+ERASE, 140141628657664, 140141637046271,
+ERASE, 140143046397952, 140143046402047,
+ERASE, 140143046402048, 140143054790655,
+ERASE, 140141796507648, 140141796511743,
+ERASE, 140141796511744, 140141804900351,
+ERASE, 140142803140608, 140142803144703,
+ERASE, 140142803144704, 140142811533311,
+ERASE, 140142509527040, 140142509531135,
+ERASE, 140142509531136, 140142517919743,
+ERASE, 140141821685760, 140141821689855,
+ERASE, 140141821689856, 140141830078463,
+ERASE, 140142777962496, 140142777966591,
+ERASE, 140142777966592, 140142786355199,
+ERASE, 140141804900352, 140141804904447,
+ERASE, 140141804904448, 140141813293055,
+ERASE, 140141930725376, 140141930729471,
+ERASE, 140141930729472, 140141939118079,
+ERASE, 140142937358336, 140142937362431,
+ERASE, 140142937362432, 140142945751039,
+ERASE, 140142559883264, 140142559887359,
+ERASE, 140142559887360, 140142568275967,
+ERASE, 140142534705152, 140142534709247,
+ERASE, 140142534709248, 140142543097855,
+ERASE, 140142048157696, 140142048161791,
+ERASE, 140142048161792, 140142056550399,
+ERASE, 140141754544128, 140141754548223,
+ERASE, 140141754548224, 140141762936831,
+ERASE, 140141939118080, 140141939122175,
+ERASE, 140141939122176, 140141947510783,
+ERASE, 140141653831680, 140141653835775,
+ERASE, 140141653835776, 140141662224383,
+ERASE, 140141712580608, 140141712584703,
+ERASE, 140141712584704, 140141720973311,
+ERASE, 140141645438976, 140141645443071,
+ERASE, 140141645443072, 140141653831679,
+ERASE, 140141687402496, 140141687406591,
+ERASE, 140141687406592, 140141695795199,
+ERASE, 140141662224384, 140141662228479,
+ERASE, 140141662228480, 140141670617087,
+ERASE, 140141922332672, 140141922336767,
+ERASE, 140141922336768, 140141930725375,
+ERASE, 140141737758720, 140141737762815,
+ERASE, 140141737762816, 140141746151423,
+ERASE, 140141637046272, 140141637050367,
+ERASE, 140141637050368, 140141645438975,
+ERASE, 140142517919744, 140142517923839,
+ERASE, 140142517923840, 140142526312447,
+ERASE, 140143096754176, 140143096758271,
+ERASE, 140143096758272, 140143105146879,
+ERASE, 140141595082752, 140141595086847,
+ERASE, 140141595086848, 140141603475455,
+ERASE, 140141762936832, 140141762940927,
+ERASE, 140141762940928, 140141771329535,
+ERASE, 140143311446016, 140143311450111,
+ERASE, 140143311450112, 140143319838719,
+ERASE, 140142526312448, 140142526316543,
+ERASE, 140142526316544, 140142534705151,
+ERASE, 140142819926016, 140142819930111,
+ERASE, 140142819930112, 140142828318719,
+ERASE, 140143180615680, 140143180619775,
+ERASE, 140143180619776, 140143189008383,
+ERASE, 140142962536448, 140142962540543,
+ERASE, 140142962540544, 140142970929151,
+ERASE, 140143214186496, 140143214190591,
+ERASE, 140143214190592, 140143222579199,
+ERASE, 140143088361472, 140143088365567,
+ERASE, 140143088365568, 140143096754175,
+ERASE, 140141586690048, 140141586694143,
+ERASE, 140141586694144, 140141595082751,
+ERASE, 140143230971904, 140143230975999,
+ERASE, 140143230976000, 140143239364607,
+ERASE, 140141779722240, 140141779726335,
+ERASE, 140141779726336, 140141788114943,
+ERASE, 140141670617088, 140141670621183,
+ERASE, 140141670621184, 140141679009791,
+ERASE, 140141813293056, 140141813297151,
+ERASE, 140141813297152, 140141821685759,
+ERASE, 140143222579200, 140143222583295,
+ERASE, 140143222583296, 140143230971903,
+ERASE, 140143189008384, 140143189012479,
+ERASE, 140143189012480, 140143197401087,
+ERASE, 140143071576064, 140143071580159,
+ERASE, 140143071580160, 140143079968767,
+ERASE, 140141620260864, 140141620264959,
+ERASE, 140141620264960, 140141628653567,
+ERASE, 140141603475456, 140141603479551,
+ERASE, 140141603479552, 140141611868159,
+ERASE, 140141720973312, 140141720977407,
+ERASE, 140141720977408, 140141729366015,
+ERASE, 140143079968768, 140143079972863,
+ERASE, 140143079972864, 140143088361471,
+ERASE, 140143205793792, 140143205797887,
+ERASE, 140143205797888, 140143214186495,
+ };
+ static const unsigned long set30[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140733436743680, 140737488351231,
+SNULL, 140733436747775, 140737488351231,
+STORE, 140733436743680, 140733436747775,
+STORE, 140733436612608, 140733436747775,
+STORE, 94630728904704, 94630731157503,
+SNULL, 94630729035775, 94630731157503,
+STORE, 94630728904704, 94630729035775,
+STORE, 94630729035776, 94630731157503,
+ERASE, 94630729035776, 94630731157503,
+STORE, 94630731128832, 94630731137023,
+STORE, 94630731137024, 94630731157503,
+STORE, 140165750841344, 140165753094143,
+SNULL, 140165750984703, 140165753094143,
+STORE, 140165750841344, 140165750984703,
+STORE, 140165750984704, 140165753094143,
+ERASE, 140165750984704, 140165753094143,
+STORE, 140165753081856, 140165753090047,
+STORE, 140165753090048, 140165753094143,
+STORE, 140733436887040, 140733436891135,
+STORE, 140733436874752, 140733436887039,
+STORE, 140165753053184, 140165753081855,
+STORE, 140165753044992, 140165753053183,
+STORE, 140165748625408, 140165750841343,
+SNULL, 140165748625408, 140165748723711,
+STORE, 140165748723712, 140165750841343,
+STORE, 140165748625408, 140165748723711,
+SNULL, 140165750816767, 140165750841343,
+STORE, 140165748723712, 140165750816767,
+STORE, 140165750816768, 140165750841343,
+SNULL, 140165750816768, 140165750824959,
+STORE, 140165750824960, 140165750841343,
+STORE, 140165750816768, 140165750824959,
+ERASE, 140165750816768, 140165750824959,
+STORE, 140165750816768, 140165750824959,
+ERASE, 140165750824960, 140165750841343,
+STORE, 140165750824960, 140165750841343,
+STORE, 140165744828416, 140165748625407,
+SNULL, 140165744828416, 140165746487295,
+STORE, 140165746487296, 140165748625407,
+STORE, 140165744828416, 140165746487295,
+SNULL, 140165748584447, 140165748625407,
+STORE, 140165746487296, 140165748584447,
+STORE, 140165748584448, 140165748625407,
+SNULL, 140165748584448, 140165748609023,
+STORE, 140165748609024, 140165748625407,
+STORE, 140165748584448, 140165748609023,
+ERASE, 140165748584448, 140165748609023,
+STORE, 140165748584448, 140165748609023,
+ERASE, 140165748609024, 140165748625407,
+STORE, 140165748609024, 140165748625407,
+STORE, 140165753036800, 140165753053183,
+SNULL, 140165748600831, 140165748609023,
+STORE, 140165748584448, 140165748600831,
+STORE, 140165748600832, 140165748609023,
+SNULL, 140165750820863, 140165750824959,
+STORE, 140165750816768, 140165750820863,
+STORE, 140165750820864, 140165750824959,
+SNULL, 94630731132927, 94630731137023,
+STORE, 94630731128832, 94630731132927,
+STORE, 94630731132928, 94630731137023,
+SNULL, 140165753085951, 140165753090047,
+STORE, 140165753081856, 140165753085951,
+STORE, 140165753085952, 140165753090047,
+ERASE, 140165753053184, 140165753081855,
+STORE, 94630743547904, 94630743683071,
+STORE, 140165736435712, 140165744828415,
+SNULL, 140165736439807, 140165744828415,
+STORE, 140165736435712, 140165736439807,
+STORE, 140165736439808, 140165744828415,
+STORE, 140165728043008, 140165736435711,
+STORE, 140165593825280, 140165728043007,
+SNULL, 140165593825280, 140165653725183,
+STORE, 140165653725184, 140165728043007,
+STORE, 140165593825280, 140165653725183,
+ERASE, 140165593825280, 140165653725183,
+SNULL, 140165720834047, 140165728043007,
+STORE, 140165653725184, 140165720834047,
+STORE, 140165720834048, 140165728043007,
+ERASE, 140165720834048, 140165728043007,
+SNULL, 140165653860351, 140165720834047,
+STORE, 140165653725184, 140165653860351,
+STORE, 140165653860352, 140165720834047,
+SNULL, 140165728047103, 140165736435711,
+STORE, 140165728043008, 140165728047103,
+STORE, 140165728047104, 140165736435711,
+STORE, 140165645332480, 140165653725183,
+SNULL, 140165645336575, 140165653725183,
+STORE, 140165645332480, 140165645336575,
+STORE, 140165645336576, 140165653725183,
+STORE, 140165636939776, 140165645332479,
+SNULL, 140165636943871, 140165645332479,
+STORE, 140165636939776, 140165636943871,
+STORE, 140165636943872, 140165645332479,
+STORE, 140165628547072, 140165636939775,
+SNULL, 140165628551167, 140165636939775,
+STORE, 140165628547072, 140165628551167,
+STORE, 140165628551168, 140165636939775,
+STORE, 140165620154368, 140165628547071,
+STORE, 140165611761664, 140165628547071,
+STORE, 140165603368960, 140165628547071,
+STORE, 140165469151232, 140165603368959,
+SNULL, 140165469151232, 140165519507455,
+STORE, 140165519507456, 140165603368959,
+STORE, 140165469151232, 140165519507455,
+ERASE, 140165469151232, 140165519507455,
+SNULL, 140165586616319, 140165603368959,
+STORE, 140165519507456, 140165586616319,
+STORE, 140165586616320, 140165603368959,
+ERASE, 140165586616320, 140165603368959,
+STORE, 140165594976256, 140165628547071,
+STORE, 140165385289728, 140165586616319,
+SNULL, 140165452398591, 140165586616319,
+STORE, 140165385289728, 140165452398591,
+STORE, 140165452398592, 140165586616319,
+SNULL, 140165452398592, 140165519507455,
+STORE, 140165519507456, 140165586616319,
+STORE, 140165452398592, 140165519507455,
+ERASE, 140165452398592, 140165519507455,
+STORE, 140165251072000, 140165452398591,
+SNULL, 140165318180863, 140165452398591,
+STORE, 140165251072000, 140165318180863,
+STORE, 140165318180864, 140165452398591,
+SNULL, 140165318180864, 140165385289727,
+STORE, 140165385289728, 140165452398591,
+STORE, 140165318180864, 140165385289727,
+ERASE, 140165318180864, 140165385289727,
+SNULL, 140165519642623, 140165586616319,
+STORE, 140165519507456, 140165519642623,
+STORE, 140165519642624, 140165586616319,
+SNULL, 140165594976256, 140165611761663,
+STORE, 140165611761664, 140165628547071,
+STORE, 140165594976256, 140165611761663,
+SNULL, 140165611765759, 140165628547071,
+STORE, 140165611761664, 140165611765759,
+STORE, 140165611765760, 140165628547071,
+STORE, 140165385289728, 140165519507455,
+SNULL, 140165385424895, 140165519507455,
+STORE, 140165385289728, 140165385424895,
+STORE, 140165385424896, 140165519507455,
+SNULL, 140165594976256, 140165603368959,
+STORE, 140165603368960, 140165611761663,
+STORE, 140165594976256, 140165603368959,
+SNULL, 140165603373055, 140165611761663,
+STORE, 140165603368960, 140165603373055,
+STORE, 140165603373056, 140165611761663,
+SNULL, 140165251207167, 140165318180863,
+STORE, 140165251072000, 140165251207167,
+STORE, 140165251207168, 140165318180863,
+STORE, 140165376897024, 140165385289727,
+SNULL, 140165376901119, 140165385289727,
+STORE, 140165376897024, 140165376901119,
+STORE, 140165376901120, 140165385289727,
+SNULL, 140165385424896, 140165452398591,
+STORE, 140165452398592, 140165519507455,
+STORE, 140165385424896, 140165452398591,
+SNULL, 140165452533759, 140165519507455,
+STORE, 140165452398592, 140165452533759,
+STORE, 140165452533760, 140165519507455,
+STORE, 140165368504320, 140165376897023,
+SNULL, 140165594980351, 140165603368959,
+STORE, 140165594976256, 140165594980351,
+STORE, 140165594980352, 140165603368959,
+SNULL, 140165368508415, 140165376897023,
+STORE, 140165368504320, 140165368508415,
+STORE, 140165368508416, 140165376897023,
+SNULL, 140165611765760, 140165620154367,
+STORE, 140165620154368, 140165628547071,
+STORE, 140165611765760, 140165620154367,
+SNULL, 140165620158463, 140165628547071,
+STORE, 140165620154368, 140165620158463,
+STORE, 140165620158464, 140165628547071,
+STORE, 140165360111616, 140165368504319,
+STORE, 140165351718912, 140165368504319,
+STORE, 140165343326208, 140165368504319,
+SNULL, 140165343326208, 140165351718911,
+STORE, 140165351718912, 140165368504319,
+STORE, 140165343326208, 140165351718911,
+SNULL, 140165351723007, 140165368504319,
+STORE, 140165351718912, 140165351723007,
+STORE, 140165351723008, 140165368504319,
+SNULL, 140165343330303, 140165351718911,
+STORE, 140165343326208, 140165343330303,
+STORE, 140165343330304, 140165351718911,
+SNULL, 140165351723008, 140165360111615,
+STORE, 140165360111616, 140165368504319,
+STORE, 140165351723008, 140165360111615,
+SNULL, 140165360115711, 140165368504319,
+STORE, 140165360111616, 140165360115711,
+STORE, 140165360115712, 140165368504319,
+STORE, 140165334933504, 140165343326207,
+SNULL, 140165334937599, 140165343326207,
+STORE, 140165334933504, 140165334937599,
+STORE, 140165334937600, 140165343326207,
+STORE, 140165326540800, 140165334933503,
+STORE, 140165242679296, 140165251071999,
+SNULL, 140165242683391, 140165251071999,
+STORE, 140165242679296, 140165242683391,
+STORE, 140165242683392, 140165251071999,
+STORE, 140165234286592, 140165242679295,
+STORE, 140165225893888, 140165242679295,
+SNULL, 140165225897983, 140165242679295,
+STORE, 140165225893888, 140165225897983,
+STORE, 140165225897984, 140165242679295,
+SNULL, 140165225897984, 140165234286591,
+STORE, 140165234286592, 140165242679295,
+STORE, 140165225897984, 140165234286591,
+SNULL, 140165234290687, 140165242679295,
+STORE, 140165234286592, 140165234290687,
+STORE, 140165234290688, 140165242679295,
+SNULL, 140165326544895, 140165334933503,
+STORE, 140165326540800, 140165326544895,
+STORE, 140165326544896, 140165334933503,
+STORE, 140165217501184, 140165225893887,
+STORE, 140165209108480, 140165225893887,
+SNULL, 140165209108480, 140165217501183,
+STORE, 140165217501184, 140165225893887,
+STORE, 140165209108480, 140165217501183,
+SNULL, 140165217505279, 140165225893887,
+STORE, 140165217501184, 140165217505279,
+STORE, 140165217505280, 140165225893887,
+SNULL, 140165209112575, 140165217501183,
+STORE, 140165209108480, 140165209112575,
+STORE, 140165209112576, 140165217501183,
+STORE, 140165200715776, 140165209108479,
+STORE, 140165066498048, 140165200715775,
+SNULL, 140165066498048, 140165116854271,
+STORE, 140165116854272, 140165200715775,
+STORE, 140165066498048, 140165116854271,
+ERASE, 140165066498048, 140165116854271,
+SNULL, 140165183963135, 140165200715775,
+STORE, 140165116854272, 140165183963135,
+STORE, 140165183963136, 140165200715775,
+ERASE, 140165183963136, 140165200715775,
+SNULL, 140165116989439, 140165183963135,
+STORE, 140165116854272, 140165116989439,
+STORE, 140165116989440, 140165183963135,
+STORE, 140165192323072, 140165209108479,
+STORE, 140165108461568, 140165116854271,
+STORE, 140164974243840, 140165108461567,
+STORE, 140164965851136, 140164974243839,
+SNULL, 140164974243840, 140164982636543,
+STORE, 140164982636544, 140165108461567,
+STORE, 140164974243840, 140164982636543,
+ERASE, 140164974243840, 140164982636543,
+STORE, 140164965851136, 140164982636543,
+STORE, 140164957458432, 140164982636543,
+STORE, 140164949065728, 140164982636543,
+STORE, 140164940673024, 140164982636543,
+STORE, 140164806455296, 140164940673023,
+STORE, 140164798062592, 140164806455295,
+STORE, 140164789669888, 140164806455295,
+STORE, 140164655452160, 140164789669887,
+STORE, 140164647059456, 140164655452159,
+STORE, 140164638666752, 140164655452159,
+SNULL, 140164655452160, 140164714201087,
+STORE, 140164714201088, 140164789669887,
+STORE, 140164655452160, 140164714201087,
+ERASE, 140164655452160, 140164714201087,
+STORE, 140164705808384, 140164714201087,
+STORE, 140164697415680, 140164714201087,
+STORE, 140164504449024, 140164638666751,
+SNULL, 140164504449024, 140164512874495,
+STORE, 140164512874496, 140164638666751,
+STORE, 140164504449024, 140164512874495,
+ERASE, 140164504449024, 140164512874495,
+STORE, 140164689022976, 140164714201087,
+STORE, 140164680630272, 140164714201087,
+SNULL, 140164680634367, 140164714201087,
+STORE, 140164680630272, 140164680634367,
+STORE, 140164680634368, 140164714201087,
+STORE, 140164378656768, 140164638666751,
+SNULL, 140165192323072, 140165200715775,
+STORE, 140165200715776, 140165209108479,
+STORE, 140165192323072, 140165200715775,
+SNULL, 140165200719871, 140165209108479,
+STORE, 140165200715776, 140165200719871,
+STORE, 140165200719872, 140165209108479,
+SNULL, 140165049745407, 140165108461567,
+STORE, 140164982636544, 140165049745407,
+STORE, 140165049745408, 140165108461567,
+ERASE, 140165049745408, 140165108461567,
+SNULL, 140164982771711, 140165049745407,
+STORE, 140164982636544, 140164982771711,
+STORE, 140164982771712, 140165049745407,
+STORE, 140164244439040, 140164638666751,
+SNULL, 140164311547903, 140164638666751,
+STORE, 140164244439040, 140164311547903,
+STORE, 140164311547904, 140164638666751,
+SNULL, 140164311547904, 140164378656767,
+STORE, 140164378656768, 140164638666751,
+STORE, 140164311547904, 140164378656767,
+ERASE, 140164311547904, 140164378656767,
+SNULL, 140164806455296, 140164848418815,
+STORE, 140164848418816, 140164940673023,
+STORE, 140164806455296, 140164848418815,
+ERASE, 140164806455296, 140164848418815,
+SNULL, 140164915527679, 140164940673023,
+STORE, 140164848418816, 140164915527679,
+STORE, 140164915527680, 140164940673023,
+ERASE, 140164915527680, 140164940673023,
+STORE, 140164110221312, 140164311547903,
+SNULL, 140164177330175, 140164311547903,
+STORE, 140164110221312, 140164177330175,
+STORE, 140164177330176, 140164311547903,
+SNULL, 140164177330176, 140164244439039,
+STORE, 140164244439040, 140164311547903,
+STORE, 140164177330176, 140164244439039,
+ERASE, 140164177330176, 140164244439039,
+SNULL, 140164781309951, 140164789669887,
+STORE, 140164714201088, 140164781309951,
+STORE, 140164781309952, 140164789669887,
+ERASE, 140164781309952, 140164789669887,
+STORE, 140163976003584, 140164177330175,
+SNULL, 140164043112447, 140164177330175,
+STORE, 140163976003584, 140164043112447,
+STORE, 140164043112448, 140164177330175,
+SNULL, 140164043112448, 140164110221311,
+STORE, 140164110221312, 140164177330175,
+STORE, 140164043112448, 140164110221311,
+ERASE, 140164043112448, 140164110221311,
+SNULL, 140164579983359, 140164638666751,
+STORE, 140164378656768, 140164579983359,
+STORE, 140164579983360, 140164638666751,
+ERASE, 140164579983360, 140164638666751,
+STORE, 140163841785856, 140164043112447,
+SNULL, 140163908894719, 140164043112447,
+STORE, 140163841785856, 140163908894719,
+STORE, 140163908894720, 140164043112447,
+SNULL, 140163908894720, 140163976003583,
+STORE, 140163976003584, 140164043112447,
+STORE, 140163908894720, 140163976003583,
+ERASE, 140163908894720, 140163976003583,
+SNULL, 140164940673024, 140164965851135,
+STORE, 140164965851136, 140164982636543,
+STORE, 140164940673024, 140164965851135,
+SNULL, 140164965855231, 140164982636543,
+STORE, 140164965851136, 140164965855231,
+STORE, 140164965855232, 140164982636543,
+SNULL, 140164965855232, 140164974243839,
+STORE, 140164974243840, 140164982636543,
+STORE, 140164965855232, 140164974243839,
+SNULL, 140164974247935, 140164982636543,
+STORE, 140164974243840, 140164974247935,
+STORE, 140164974247936, 140164982636543,
+SNULL, 140164445765631, 140164579983359,
+STORE, 140164378656768, 140164445765631,
+STORE, 140164445765632, 140164579983359,
+SNULL, 140164445765632, 140164512874495,
+STORE, 140164512874496, 140164579983359,
+STORE, 140164445765632, 140164512874495,
+ERASE, 140164445765632, 140164512874495,
+SNULL, 140164378791935, 140164445765631,
+STORE, 140164378656768, 140164378791935,
+STORE, 140164378791936, 140164445765631,
+SNULL, 140164789673983, 140164806455295,
+STORE, 140164789669888, 140164789673983,
+STORE, 140164789673984, 140164806455295,
+SNULL, 140164789673984, 140164798062591,
+STORE, 140164798062592, 140164806455295,
+STORE, 140164789673984, 140164798062591,
+SNULL, 140164798066687, 140164806455295,
+STORE, 140164798062592, 140164798066687,
+STORE, 140164798066688, 140164806455295,
+SNULL, 140164638670847, 140164655452159,
+STORE, 140164638666752, 140164638670847,
+STORE, 140164638670848, 140164655452159,
+STORE, 140165100068864, 140165116854271,
+STORE, 140165091676160, 140165116854271,
+STORE, 140165083283456, 140165116854271,
+SNULL, 140164244574207, 140164311547903,
+STORE, 140164244439040, 140164244574207,
+STORE, 140164244574208, 140164311547903,
+SNULL, 140164848553983, 140164915527679,
+STORE, 140164848418816, 140164848553983,
+STORE, 140164848553984, 140164915527679,
+SNULL, 140164110356479, 140164177330175,
+STORE, 140164110221312, 140164110356479,
+STORE, 140164110356480, 140164177330175,
+SNULL, 140164714336255, 140164781309951,
+STORE, 140164714201088, 140164714336255,
+STORE, 140164714336256, 140164781309951,
+SNULL, 140163976138751, 140164043112447,
+STORE, 140163976003584, 140163976138751,
+STORE, 140163976138752, 140164043112447,
+SNULL, 140164513009663, 140164579983359,
+STORE, 140164512874496, 140164513009663,
+STORE, 140164513009664, 140164579983359,
+SNULL, 140163841921023, 140163908894719,
+STORE, 140163841785856, 140163841921023,
+STORE, 140163841921024, 140163908894719,
+SNULL, 140165083283456, 140165100068863,
+STORE, 140165100068864, 140165116854271,
+STORE, 140165083283456, 140165100068863,
+SNULL, 140165100072959, 140165116854271,
+STORE, 140165100068864, 140165100072959,
+STORE, 140165100072960, 140165116854271,
+SNULL, 140165100072960, 140165108461567,
+STORE, 140165108461568, 140165116854271,
+STORE, 140165100072960, 140165108461567,
+SNULL, 140165108465663, 140165116854271,
+STORE, 140165108461568, 140165108465663,
+STORE, 140165108465664, 140165116854271,
+STORE, 140165074890752, 140165100068863,
+SNULL, 140165074894847, 140165100068863,
+STORE, 140165074890752, 140165074894847,
+STORE, 140165074894848, 140165100068863,
+STORE, 140165066498048, 140165074890751,
+STORE, 140165058105344, 140165074890751,
+STORE, 140164932280320, 140164965851135,
+SNULL, 140165192327167, 140165200715775,
+STORE, 140165192323072, 140165192327167,
+STORE, 140165192327168, 140165200715775,
+STORE, 140164923887616, 140164965851135,
+SNULL, 140164923891711, 140164965851135,
+STORE, 140164923887616, 140164923891711,
+STORE, 140164923891712, 140164965851135,
+SNULL, 140164680634368, 140164705808383,
+STORE, 140164705808384, 140164714201087,
+STORE, 140164680634368, 140164705808383,
+SNULL, 140164705812479, 140164714201087,
+STORE, 140164705808384, 140164705812479,
+STORE, 140164705812480, 140164714201087,
+SNULL, 140164680634368, 140164697415679,
+STORE, 140164697415680, 140164705808383,
+STORE, 140164680634368, 140164697415679,
+SNULL, 140164697419775, 140164705808383,
+STORE, 140164697415680, 140164697419775,
+STORE, 140164697419776, 140164705808383,
+STORE, 140164840026112, 140164848418815,
+STORE, 140164831633408, 140164848418815,
+STORE, 140164823240704, 140164848418815,
+SNULL, 140165074894848, 140165083283455,
+STORE, 140165083283456, 140165100068863,
+STORE, 140165074894848, 140165083283455,
+SNULL, 140165083287551, 140165100068863,
+STORE, 140165083283456, 140165083287551,
+STORE, 140165083287552, 140165100068863,
+SNULL, 140165083287552, 140165091676159,
+STORE, 140165091676160, 140165100068863,
+STORE, 140165083287552, 140165091676159,
+SNULL, 140165091680255, 140165100068863,
+STORE, 140165091676160, 140165091680255,
+STORE, 140165091680256, 140165100068863,
+SNULL, 140164638670848, 140164647059455,
+STORE, 140164647059456, 140164655452159,
+STORE, 140164638670848, 140164647059455,
+SNULL, 140164647063551, 140164655452159,
+STORE, 140164647059456, 140164647063551,
+STORE, 140164647063552, 140164655452159,
+SNULL, 140164923891712, 140164940673023,
+STORE, 140164940673024, 140164965851135,
+STORE, 140164923891712, 140164940673023,
+SNULL, 140164940677119, 140164965851135,
+STORE, 140164940673024, 140164940677119,
+STORE, 140164940677120, 140164965851135,
+SNULL, 140164940677120, 140164949065727,
+STORE, 140164949065728, 140164965851135,
+STORE, 140164940677120, 140164949065727,
+SNULL, 140164949069823, 140164965851135,
+STORE, 140164949065728, 140164949069823,
+STORE, 140164949069824, 140164965851135,
+SNULL, 140164949069824, 140164957458431,
+STORE, 140164957458432, 140164965851135,
+STORE, 140164949069824, 140164957458431,
+SNULL, 140164957462527, 140164965851135,
+STORE, 140164957458432, 140164957462527,
+STORE, 140164957462528, 140164965851135,
+SNULL, 140164680634368, 140164689022975,
+STORE, 140164689022976, 140164697415679,
+STORE, 140164680634368, 140164689022975,
+SNULL, 140164689027071, 140164697415679,
+STORE, 140164689022976, 140164689027071,
+STORE, 140164689027072, 140164697415679,
+STORE, 140164814848000, 140164848418815,
+SNULL, 140165058105344, 140165066498047,
+STORE, 140165066498048, 140165074890751,
+STORE, 140165058105344, 140165066498047,
+SNULL, 140165066502143, 140165074890751,
+STORE, 140165066498048, 140165066502143,
+STORE, 140165066502144, 140165074890751,
+SNULL, 140165058109439, 140165066498047,
+STORE, 140165058105344, 140165058109439,
+STORE, 140165058109440, 140165066498047,
+STORE, 140164798066688, 140164814847999,
+SNULL, 140164798066688, 140164806455295,
+STORE, 140164806455296, 140164814847999,
+STORE, 140164798066688, 140164806455295,
+SNULL, 140164806459391, 140164814847999,
+STORE, 140164806455296, 140164806459391,
+STORE, 140164806459392, 140164814847999,
+SNULL, 140164923891712, 140164932280319,
+STORE, 140164932280320, 140164940673023,
+STORE, 140164923891712, 140164932280319,
+SNULL, 140164932284415, 140164940673023,
+STORE, 140164932280320, 140164932284415,
+STORE, 140164932284416, 140164940673023,
+STORE, 140164672237568, 140164680630271,
+STORE, 140164663844864, 140164680630271,
+STORE, 140164647063552, 140164680630271,
+SNULL, 140164647063552, 140164655452159,
+STORE, 140164655452160, 140164680630271,
+STORE, 140164647063552, 140164655452159,
+SNULL, 140164655456255, 140164680630271,
+STORE, 140164655452160, 140164655456255,
+STORE, 140164655456256, 140164680630271,
+STORE, 140164630274048, 140164638666751,
+SNULL, 140164814852095, 140164848418815,
+STORE, 140164814848000, 140164814852095,
+STORE, 140164814852096, 140164848418815,
+SNULL, 140164814852096, 140164831633407,
+STORE, 140164831633408, 140164848418815,
+STORE, 140164814852096, 140164831633407,
+SNULL, 140164831637503, 140164848418815,
+STORE, 140164831633408, 140164831637503,
+STORE, 140164831637504, 140164848418815,
+STORE, 140164621881344, 140164638666751,
+SNULL, 140164831637504, 140164840026111,
+STORE, 140164840026112, 140164848418815,
+STORE, 140164831637504, 140164840026111,
+SNULL, 140164840030207, 140164848418815,
+STORE, 140164840026112, 140164840030207,
+STORE, 140164840030208, 140164848418815,
+STORE, 140164613488640, 140164638666751,
+SNULL, 140164613492735, 140164638666751,
+STORE, 140164613488640, 140164613492735,
+STORE, 140164613492736, 140164638666751,
+STORE, 140164605095936, 140164613488639,
+SNULL, 140164605100031, 140164613488639,
+STORE, 140164605095936, 140164605100031,
+STORE, 140164605100032, 140164613488639,
+STORE, 140164596703232, 140164605095935,
+STORE, 140164588310528, 140164605095935,
+SNULL, 140164588314623, 140164605095935,
+STORE, 140164588310528, 140164588314623,
+STORE, 140164588314624, 140164605095935,
+STORE, 140164504481792, 140164512874495,
+STORE, 140164496089088, 140164512874495,
+SNULL, 140164496089088, 140164504481791,
+STORE, 140164504481792, 140164512874495,
+STORE, 140164496089088, 140164504481791,
+SNULL, 140164504485887, 140164512874495,
+STORE, 140164504481792, 140164504485887,
+STORE, 140164504485888, 140164512874495,
+SNULL, 140164613492736, 140164630274047,
+STORE, 140164630274048, 140164638666751,
+STORE, 140164613492736, 140164630274047,
+SNULL, 140164630278143, 140164638666751,
+STORE, 140164630274048, 140164630278143,
+STORE, 140164630278144, 140164638666751,
+STORE, 140164487696384, 140164504481791,
+STORE, 140164479303680, 140164504481791,
+SNULL, 140164814852096, 140164823240703,
+STORE, 140164823240704, 140164831633407,
+STORE, 140164814852096, 140164823240703,
+SNULL, 140164823244799, 140164831633407,
+STORE, 140164823240704, 140164823244799,
+STORE, 140164823244800, 140164831633407,
+STORE, 140164470910976, 140164504481791,
+SNULL, 140164470910976, 140164496089087,
+STORE, 140164496089088, 140164504481791,
+STORE, 140164470910976, 140164496089087,
+SNULL, 140164496093183, 140164504481791,
+STORE, 140164496089088, 140164496093183,
+STORE, 140164496093184, 140164504481791,
+SNULL, 140164655456256, 140164672237567,
+STORE, 140164672237568, 140164680630271,
+STORE, 140164655456256, 140164672237567,
+SNULL, 140164672241663, 140164680630271,
+STORE, 140164672237568, 140164672241663,
+STORE, 140164672241664, 140164680630271,
+STORE, 140164462518272, 140164496089087,
+STORE, 140164454125568, 140164496089087,
+SNULL, 140164655456256, 140164663844863,
+STORE, 140164663844864, 140164672237567,
+STORE, 140164655456256, 140164663844863,
+SNULL, 140164663848959, 140164672237567,
+STORE, 140164663844864, 140164663848959,
+STORE, 140164663848960, 140164672237567,
+STORE, 140164370264064, 140164378656767,
+STORE, 140164361871360, 140164378656767,
+STORE, 140164353478656, 140164378656767,
+STORE, 140164345085952, 140164378656767,
+SNULL, 140164345085952, 140164353478655,
+STORE, 140164353478656, 140164378656767,
+STORE, 140164345085952, 140164353478655,
+SNULL, 140164353482751, 140164378656767,
+STORE, 140164353478656, 140164353482751,
+STORE, 140164353482752, 140164378656767,
+SNULL, 140164454125568, 140164487696383,
+STORE, 140164487696384, 140164496089087,
+STORE, 140164454125568, 140164487696383,
+SNULL, 140164487700479, 140164496089087,
+STORE, 140164487696384, 140164487700479,
+STORE, 140164487700480, 140164496089087,
+STORE, 140164336693248, 140164353478655,
+SNULL, 140164336697343, 140164353478655,
+STORE, 140164336693248, 140164336697343,
+STORE, 140164336697344, 140164353478655,
+STORE, 140164328300544, 140164336693247,
+SNULL, 140164454125568, 140164479303679,
+STORE, 140164479303680, 140164487696383,
+STORE, 140164454125568, 140164479303679,
+SNULL, 140164479307775, 140164487696383,
+STORE, 140164479303680, 140164479307775,
+STORE, 140164479307776, 140164487696383,
+STORE, 140164319907840, 140164336693247,
+STORE, 140164236046336, 140164244439039,
+SNULL, 140164588314624, 140164596703231,
+STORE, 140164596703232, 140164605095935,
+STORE, 140164588314624, 140164596703231,
+SNULL, 140164596707327, 140164605095935,
+STORE, 140164596703232, 140164596707327,
+STORE, 140164596707328, 140164605095935,
+SNULL, 140164454125568, 140164462518271,
+STORE, 140164462518272, 140164479303679,
+STORE, 140164454125568, 140164462518271,
+SNULL, 140164462522367, 140164479303679,
+STORE, 140164462518272, 140164462522367,
+STORE, 140164462522368, 140164479303679,
+STORE, 140164227653632, 140164244439039,
+SNULL, 140164227657727, 140164244439039,
+STORE, 140164227653632, 140164227657727,
+STORE, 140164227657728, 140164244439039,
+SNULL, 140164462522368, 140164470910975,
+STORE, 140164470910976, 140164479303679,
+STORE, 140164462522368, 140164470910975,
+SNULL, 140164470915071, 140164479303679,
+STORE, 140164470910976, 140164470915071,
+STORE, 140164470915072, 140164479303679,
+SNULL, 140164613492736, 140164621881343,
+STORE, 140164621881344, 140164630274047,
+STORE, 140164613492736, 140164621881343,
+SNULL, 140164621885439, 140164630274047,
+STORE, 140164621881344, 140164621885439,
+STORE, 140164621885440, 140164630274047,
+SNULL, 140164353482752, 140164370264063,
+STORE, 140164370264064, 140164378656767,
+STORE, 140164353482752, 140164370264063,
+SNULL, 140164370268159, 140164378656767,
+STORE, 140164370264064, 140164370268159,
+STORE, 140164370268160, 140164378656767,
+STORE, 140164219260928, 140164227653631,
+SNULL, 140164319911935, 140164336693247,
+STORE, 140164319907840, 140164319911935,
+STORE, 140164319911936, 140164336693247,
+SNULL, 140164336697344, 140164345085951,
+STORE, 140164345085952, 140164353478655,
+STORE, 140164336697344, 140164345085951,
+SNULL, 140164345090047, 140164353478655,
+STORE, 140164345085952, 140164345090047,
+STORE, 140164345090048, 140164353478655,
+SNULL, 140164319911936, 140164328300543,
+STORE, 140164328300544, 140164336693247,
+STORE, 140164319911936, 140164328300543,
+SNULL, 140164328304639, 140164336693247,
+STORE, 140164328300544, 140164328304639,
+STORE, 140164328304640, 140164336693247,
+SNULL, 140164454129663, 140164462518271,
+STORE, 140164454125568, 140164454129663,
+STORE, 140164454129664, 140164462518271,
+STORE, 140164210868224, 140164227653631,
+STORE, 140164202475520, 140164227653631,
+STORE, 140164194082816, 140164227653631,
+SNULL, 140164194086911, 140164227653631,
+STORE, 140164194082816, 140164194086911,
+STORE, 140164194086912, 140164227653631,
+SNULL, 140164353482752, 140164361871359,
+STORE, 140164361871360, 140164370264063,
+STORE, 140164353482752, 140164361871359,
+SNULL, 140164361875455, 140164370264063,
+STORE, 140164361871360, 140164361875455,
+STORE, 140164361875456, 140164370264063,
+SNULL, 140164227657728, 140164236046335,
+STORE, 140164236046336, 140164244439039,
+STORE, 140164227657728, 140164236046335,
+SNULL, 140164236050431, 140164244439039,
+STORE, 140164236046336, 140164236050431,
+STORE, 140164236050432, 140164244439039,
+STORE, 140164185690112, 140164194082815,
+SNULL, 140164194086912, 140164219260927,
+STORE, 140164219260928, 140164227653631,
+STORE, 140164194086912, 140164219260927,
+SNULL, 140164219265023, 140164227653631,
+STORE, 140164219260928, 140164219265023,
+STORE, 140164219265024, 140164227653631,
+STORE, 140164101828608, 140164110221311,
+STORE, 140164093435904, 140164110221311,
+STORE, 140164085043200, 140164110221311,
+SNULL, 140164085047295, 140164110221311,
+STORE, 140164085043200, 140164085047295,
+STORE, 140164085047296, 140164110221311,
+STORE, 140164076650496, 140164085043199,
+SNULL, 140164185694207, 140164194082815,
+STORE, 140164185690112, 140164185694207,
+STORE, 140164185694208, 140164194082815,
+SNULL, 140164085047296, 140164101828607,
+STORE, 140164101828608, 140164110221311,
+STORE, 140164085047296, 140164101828607,
+SNULL, 140164101832703, 140164110221311,
+STORE, 140164101828608, 140164101832703,
+STORE, 140164101832704, 140164110221311,
+SNULL, 140164085047296, 140164093435903,
+STORE, 140164093435904, 140164101828607,
+STORE, 140164085047296, 140164093435903,
+SNULL, 140164093439999, 140164101828607,
+STORE, 140164093435904, 140164093439999,
+STORE, 140164093440000, 140164101828607,
+SNULL, 140164194086912, 140164202475519,
+STORE, 140164202475520, 140164219260927,
+STORE, 140164194086912, 140164202475519,
+SNULL, 140164202479615, 140164219260927,
+STORE, 140164202475520, 140164202479615,
+STORE, 140164202479616, 140164219260927,
+SNULL, 140164202479616, 140164210868223,
+STORE, 140164210868224, 140164219260927,
+STORE, 140164202479616, 140164210868223,
+SNULL, 140164210872319, 140164219260927,
+STORE, 140164210868224, 140164210872319,
+STORE, 140164210872320, 140164219260927,
+SNULL, 140164076654591, 140164085043199,
+STORE, 140164076650496, 140164076654591,
+STORE, 140164076654592, 140164085043199,
+STORE, 140164068257792, 140164076650495,
+SNULL, 140164068261887, 140164076650495,
+STORE, 140164068257792, 140164068261887,
+STORE, 140164068261888, 140164076650495,
+STORE, 140165753053184, 140165753081855,
+STORE, 140165725851648, 140165728043007,
+SNULL, 140165725851648, 140165725941759,
+STORE, 140165725941760, 140165728043007,
+STORE, 140165725851648, 140165725941759,
+SNULL, 140165728034815, 140165728043007,
+STORE, 140165725941760, 140165728034815,
+STORE, 140165728034816, 140165728043007,
+ERASE, 140165728034816, 140165728043007,
+STORE, 140165728034816, 140165728043007,
+SNULL, 140165728038911, 140165728043007,
+STORE, 140165728034816, 140165728038911,
+STORE, 140165728038912, 140165728043007,
+ERASE, 140165753053184, 140165753081855,
+ERASE, 140164638666752, 140164638670847,
+ERASE, 140164638670848, 140164647059455,
+ERASE, 140165091676160, 140165091680255,
+ERASE, 140165091680256, 140165100068863,
+ERASE, 140164613488640, 140164613492735,
+ERASE, 140164613492736, 140164621881343,
+ERASE, 140164319907840, 140164319911935,
+ERASE, 140164319911936, 140164328300543,
+ERASE, 140165620154368, 140165620158463,
+ERASE, 140165620158464, 140165628547071,
+ERASE, 140164798062592, 140164798066687,
+ERASE, 140164798066688, 140164806455295,
+ERASE, 140164789669888, 140164789673983,
+ERASE, 140164789673984, 140164798062591,
+ERASE, 140164965851136, 140164965855231,
+ERASE, 140164965855232, 140164974243839,
+ERASE, 140165074890752, 140165074894847,
+ERASE, 140165074894848, 140165083283455,
+ERASE, 140164672237568, 140164672241663,
+ERASE, 140164672241664, 140164680630271,
+ERASE, 140164454125568, 140164454129663,
+ERASE, 140164454129664, 140164462518271,
+ERASE, 140165200715776, 140165200719871,
+ERASE, 140165200719872, 140165209108479,
+ERASE, 140164932280320, 140164932284415,
+ERASE, 140164932284416, 140164940673023,
+ERASE, 140164663844864, 140164663848959,
+ERASE, 140164663848960, 140164672237567,
+ERASE, 140164697415680, 140164697419775,
+ERASE, 140164697419776, 140164705808383,
+ERASE, 140164831633408, 140164831637503,
+ERASE, 140164831637504, 140164840026111,
+ERASE, 140165192323072, 140165192327167,
+ERASE, 140165192327168, 140165200715775,
+ERASE, 140165108461568, 140165108465663,
+ERASE, 140165108465664, 140165116854271,
+ERASE, 140164840026112, 140164840030207,
+ERASE, 140164840030208, 140164848418815,
+ERASE, 140164647059456, 140164647063551,
+ERASE, 140164647063552, 140164655452159,
+ERASE, 140165083283456, 140165083287551,
+ERASE, 140165083287552, 140165091676159,
+ERASE, 140164923887616, 140164923891711,
+ERASE, 140164923891712, 140164932280319,
+ERASE, 140164823240704, 140164823244799,
+ERASE, 140164823244800, 140164831633407,
+ERASE, 140164227653632, 140164227657727,
+ERASE, 140164227657728, 140164236046335,
+ERASE, 140164957458432, 140164957462527,
+ERASE, 140164957462528, 140164965851135,
+ERASE, 140164680630272, 140164680634367,
+ERASE, 140164680634368, 140164689022975,
+ERASE, 140164974243840, 140164974247935,
+ERASE, 140164974247936, 140164982636543,
+ERASE, 140165066498048, 140165066502143,
+ERASE, 140165066502144, 140165074890751,
+ERASE, 140164621881344, 140164621885439,
+ERASE, 140164621885440, 140164630274047,
+ERASE, 140164949065728, 140164949069823,
+ERASE, 140164949069824, 140164957458431,
+ERASE, 140164588310528, 140164588314623,
+ERASE, 140164588314624, 140164596703231,
+ERASE, 140164806455296, 140164806459391,
+ERASE, 140164806459392, 140164814847999,
+ERASE, 140164940673024, 140164940677119,
+ERASE, 140164940677120, 140164949065727,
+ERASE, 140164596703232, 140164596707327,
+ERASE, 140164596707328, 140164605095935,
+ERASE, 140164605095936, 140164605100031,
+ERASE, 140164605100032, 140164613488639,
+ERASE, 140164655452160, 140164655456255,
+ERASE, 140164655456256, 140164663844863,
+ERASE, 140164705808384, 140164705812479,
+ERASE, 140164705812480, 140164714201087,
+ERASE, 140164689022976, 140164689027071,
+ERASE, 140164689027072, 140164697415679,
+ERASE, 140164630274048, 140164630278143,
+ERASE, 140164630278144, 140164638666751,
+ERASE, 140164479303680, 140164479307775,
+ERASE, 140164479307776, 140164487696383,
+ERASE, 140164236046336, 140164236050431,
+ERASE, 140164236050432, 140164244439039,
+ERASE, 140164085043200, 140164085047295,
+ERASE, 140164085047296, 140164093435903,
+ERASE, 140164345085952, 140164345090047,
+ERASE, 140164345090048, 140164353478655,
+ERASE, 140164101828608, 140164101832703,
+ERASE, 140164101832704, 140164110221311,
+ERASE, 140164370264064, 140164370268159,
+ERASE, 140164370268160, 140164378656767,
+ERASE, 140164336693248, 140164336697343,
+ERASE, 140164336697344, 140164345085951,
+ERASE, 140164194082816, 140164194086911,
+ERASE, 140164194086912, 140164202475519,
+ERASE, 140164353478656, 140164353482751,
+ERASE, 140164353482752, 140164361871359,
+ERASE, 140164210868224, 140164210872319,
+ERASE, 140164210872320, 140164219260927,
+ERASE, 140164814848000, 140164814852095,
+ERASE, 140164814852096, 140164823240703,
+ERASE, 140164504481792, 140164504485887,
+ERASE, 140164504485888, 140164512874495,
+ERASE, 140165100068864, 140165100072959,
+ERASE, 140165100072960, 140165108461567,
+ERASE, 140164361871360, 140164361875455,
+ERASE, 140164361875456, 140164370264063,
+ERASE, 140164470910976, 140164470915071,
+ERASE, 140164470915072, 140164479303679,
+ERASE, 140164076650496, 140164076654591,
+ERASE, 140164076654592, 140164085043199,
+ERASE, 140164202475520, 140164202479615,
+ERASE, 140164202479616, 140164210868223,
+ERASE, 140164462518272, 140164462522367,
+ERASE, 140164462522368, 140164470910975,
+ERASE, 140165351718912, 140165351723007,
+ERASE, 140165351723008, 140165360111615,
+ERASE, 140164328300544, 140164328304639,
+ERASE, 140164328304640, 140164336693247,
+ERASE, 140164093435904, 140164093439999,
+ERASE, 140164093440000, 140164101828607,
+ERASE, 140165603368960, 140165603373055,
+ERASE, 140165603373056, 140165611761663,
+ERASE, 140165368504320, 140165368508415,
+ERASE, 140165368508416, 140165376897023,
+ERASE, 140165334933504, 140165334937599,
+ERASE, 140165334937600, 140165343326207,
+ERASE, 140165594976256, 140165594980351,
+ERASE, 140165594980352, 140165603368959,
+ERASE, 140164487696384, 140164487700479,
+ERASE, 140164487700480, 140164496089087,
+ERASE, 140164219260928, 140164219265023,
+ERASE, 140164219265024, 140164227653631,
+ERASE, 140164185690112, 140164185694207,
+ERASE, 140164185694208, 140164194082815,
+ERASE, 140164068257792, 140164068261887,
+ERASE, 140164068261888, 140164076650495,
+ERASE, 140165225893888, 140165225897983,
+ERASE, 140165225897984, 140165234286591,
+ERASE, 140165058105344, 140165058109439,
+ };
+ static const unsigned long set31[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140730890784768, 140737488351231,
+SNULL, 140730890788863, 140737488351231,
+STORE, 140730890784768, 140730890788863,
+STORE, 140730890653696, 140730890788863,
+STORE, 94577123659776, 94577125912575,
+SNULL, 94577123790847, 94577125912575,
+STORE, 94577123659776, 94577123790847,
+STORE, 94577123790848, 94577125912575,
+ERASE, 94577123790848, 94577125912575,
+STORE, 94577125883904, 94577125892095,
+STORE, 94577125892096, 94577125912575,
+STORE, 140624060407808, 140624062660607,
+SNULL, 140624060551167, 140624062660607,
+STORE, 140624060407808, 140624060551167,
+STORE, 140624060551168, 140624062660607,
+ERASE, 140624060551168, 140624062660607,
+STORE, 140624062648320, 140624062656511,
+STORE, 140624062656512, 140624062660607,
+STORE, 140730892140544, 140730892144639,
+STORE, 140730892128256, 140730892140543,
+STORE, 140624062619648, 140624062648319,
+STORE, 140624062611456, 140624062619647,
+STORE, 140624058191872, 140624060407807,
+SNULL, 140624058191872, 140624058290175,
+STORE, 140624058290176, 140624060407807,
+STORE, 140624058191872, 140624058290175,
+SNULL, 140624060383231, 140624060407807,
+STORE, 140624058290176, 140624060383231,
+STORE, 140624060383232, 140624060407807,
+SNULL, 140624060383232, 140624060391423,
+STORE, 140624060391424, 140624060407807,
+STORE, 140624060383232, 140624060391423,
+ERASE, 140624060383232, 140624060391423,
+STORE, 140624060383232, 140624060391423,
+ERASE, 140624060391424, 140624060407807,
+STORE, 140624060391424, 140624060407807,
+STORE, 140624054394880, 140624058191871,
+SNULL, 140624054394880, 140624056053759,
+STORE, 140624056053760, 140624058191871,
+STORE, 140624054394880, 140624056053759,
+SNULL, 140624058150911, 140624058191871,
+STORE, 140624056053760, 140624058150911,
+STORE, 140624058150912, 140624058191871,
+SNULL, 140624058150912, 140624058175487,
+STORE, 140624058175488, 140624058191871,
+STORE, 140624058150912, 140624058175487,
+ERASE, 140624058150912, 140624058175487,
+STORE, 140624058150912, 140624058175487,
+ERASE, 140624058175488, 140624058191871,
+STORE, 140624058175488, 140624058191871,
+STORE, 140624062603264, 140624062619647,
+SNULL, 140624058167295, 140624058175487,
+STORE, 140624058150912, 140624058167295,
+STORE, 140624058167296, 140624058175487,
+SNULL, 140624060387327, 140624060391423,
+STORE, 140624060383232, 140624060387327,
+STORE, 140624060387328, 140624060391423,
+SNULL, 94577125887999, 94577125892095,
+STORE, 94577125883904, 94577125887999,
+STORE, 94577125888000, 94577125892095,
+SNULL, 140624062652415, 140624062656511,
+STORE, 140624062648320, 140624062652415,
+STORE, 140624062652416, 140624062656511,
+ERASE, 140624062619648, 140624062648319,
+STORE, 94577157709824, 94577157844991,
+STORE, 140624046002176, 140624054394879,
+SNULL, 140624046006271, 140624054394879,
+STORE, 140624046002176, 140624046006271,
+STORE, 140624046006272, 140624054394879,
+STORE, 140624037609472, 140624046002175,
+STORE, 140623903391744, 140624037609471,
+SNULL, 140623903391744, 140623940157439,
+STORE, 140623940157440, 140624037609471,
+STORE, 140623903391744, 140623940157439,
+ERASE, 140623903391744, 140623940157439,
+SNULL, 140624007266303, 140624037609471,
+STORE, 140623940157440, 140624007266303,
+STORE, 140624007266304, 140624037609471,
+ERASE, 140624007266304, 140624037609471,
+SNULL, 140623940292607, 140624007266303,
+STORE, 140623940157440, 140623940292607,
+STORE, 140623940292608, 140624007266303,
+SNULL, 140624037613567, 140624046002175,
+STORE, 140624037609472, 140624037613567,
+STORE, 140624037613568, 140624046002175,
+STORE, 140624029216768, 140624037609471,
+SNULL, 140624029220863, 140624037609471,
+STORE, 140624029216768, 140624029220863,
+STORE, 140624029220864, 140624037609471,
+STORE, 140624020824064, 140624029216767,
+SNULL, 140624020828159, 140624029216767,
+STORE, 140624020824064, 140624020828159,
+STORE, 140624020828160, 140624029216767,
+STORE, 140624012431360, 140624020824063,
+SNULL, 140624012435455, 140624020824063,
+STORE, 140624012431360, 140624012435455,
+STORE, 140624012435456, 140624020824063,
+STORE, 140623931764736, 140623940157439,
+STORE, 140623797547008, 140623931764735,
+SNULL, 140623797547008, 140623805939711,
+STORE, 140623805939712, 140623931764735,
+STORE, 140623797547008, 140623805939711,
+ERASE, 140623797547008, 140623805939711,
+SNULL, 140623873048575, 140623931764735,
+STORE, 140623805939712, 140623873048575,
+STORE, 140623873048576, 140623931764735,
+ERASE, 140623873048576, 140623931764735,
+STORE, 140623923372032, 140623940157439,
+STORE, 140623914979328, 140623940157439,
+STORE, 140623906586624, 140623940157439,
+STORE, 140623671721984, 140623873048575,
+SNULL, 140623738830847, 140623873048575,
+STORE, 140623671721984, 140623738830847,
+STORE, 140623738830848, 140623873048575,
+SNULL, 140623738830848, 140623805939711,
+STORE, 140623805939712, 140623873048575,
+STORE, 140623738830848, 140623805939711,
+ERASE, 140623738830848, 140623805939711,
+SNULL, 140623806074879, 140623873048575,
+STORE, 140623805939712, 140623806074879,
+STORE, 140623806074880, 140623873048575,
+SNULL, 140623906586624, 140623931764735,
+STORE, 140623931764736, 140623940157439,
+STORE, 140623906586624, 140623931764735,
+SNULL, 140623931768831, 140623940157439,
+STORE, 140623931764736, 140623931768831,
+STORE, 140623931768832, 140623940157439,
+STORE, 140623537504256, 140623738830847,
+SNULL, 140623537504256, 140623671721983,
+STORE, 140623671721984, 140623738830847,
+STORE, 140623537504256, 140623671721983,
+SNULL, 140623671857151, 140623738830847,
+STORE, 140623671721984, 140623671857151,
+STORE, 140623671857152, 140623738830847,
+SNULL, 140623604613119, 140623671721983,
+STORE, 140623537504256, 140623604613119,
+STORE, 140623604613120, 140623671721983,
+ERASE, 140623604613120, 140623671721983,
+SNULL, 140623537639423, 140623604613119,
+STORE, 140623537504256, 140623537639423,
+STORE, 140623537639424, 140623604613119,
+STORE, 140623537639424, 140623671721983,
+SNULL, 140623537639424, 140623604613119,
+STORE, 140623604613120, 140623671721983,
+STORE, 140623537639424, 140623604613119,
+SNULL, 140623604748287, 140623671721983,
+STORE, 140623604613120, 140623604748287,
+STORE, 140623604748288, 140623671721983,
+STORE, 140623898193920, 140623931764735,
+SNULL, 140623898193920, 140623923372031,
+STORE, 140623923372032, 140623931764735,
+STORE, 140623898193920, 140623923372031,
+SNULL, 140623923376127, 140623931764735,
+STORE, 140623923372032, 140623923376127,
+STORE, 140623923376128, 140623931764735,
+STORE, 140623889801216, 140623923372031,
+SNULL, 140623889801216, 140623898193919,
+STORE, 140623898193920, 140623923372031,
+STORE, 140623889801216, 140623898193919,
+SNULL, 140623898198015, 140623923372031,
+STORE, 140623898193920, 140623898198015,
+STORE, 140623898198016, 140623923372031,
+SNULL, 140623889805311, 140623898193919,
+STORE, 140623889801216, 140623889805311,
+STORE, 140623889805312, 140623898193919,
+SNULL, 140623898198016, 140623906586623,
+STORE, 140623906586624, 140623923372031,
+STORE, 140623898198016, 140623906586623,
+SNULL, 140623906590719, 140623923372031,
+STORE, 140623906586624, 140623906590719,
+STORE, 140623906590720, 140623923372031,
+STORE, 140623881408512, 140623889801215,
+SNULL, 140623906590720, 140623914979327,
+STORE, 140623914979328, 140623923372031,
+STORE, 140623906590720, 140623914979327,
+SNULL, 140623914983423, 140623923372031,
+STORE, 140623914979328, 140623914983423,
+STORE, 140623914983424, 140623923372031,
+SNULL, 140623881412607, 140623889801215,
+STORE, 140623881408512, 140623881412607,
+STORE, 140623881412608, 140623889801215,
+STORE, 140623797547008, 140623805939711,
+STORE, 140623789154304, 140623805939711,
+STORE, 140623780761600, 140623805939711,
+SNULL, 140623780761600, 140623789154303,
+STORE, 140623789154304, 140623805939711,
+STORE, 140623780761600, 140623789154303,
+SNULL, 140623789158399, 140623805939711,
+STORE, 140623789154304, 140623789158399,
+STORE, 140623789158400, 140623805939711,
+STORE, 140623772368896, 140623789154303,
+STORE, 140623763976192, 140623789154303,
+SNULL, 140623763976192, 140623780761599,
+STORE, 140623780761600, 140623789154303,
+STORE, 140623763976192, 140623780761599,
+SNULL, 140623780765695, 140623789154303,
+STORE, 140623780761600, 140623780765695,
+STORE, 140623780765696, 140623789154303,
+SNULL, 140623789158400, 140623797547007,
+STORE, 140623797547008, 140623805939711,
+STORE, 140623789158400, 140623797547007,
+SNULL, 140623797551103, 140623805939711,
+STORE, 140623797547008, 140623797551103,
+STORE, 140623797551104, 140623805939711,
+SNULL, 140623763976192, 140623772368895,
+STORE, 140623772368896, 140623780761599,
+STORE, 140623763976192, 140623772368895,
+SNULL, 140623772372991, 140623780761599,
+STORE, 140623772368896, 140623772372991,
+STORE, 140623772372992, 140623780761599,
+SNULL, 140623763980287, 140623772368895,
+STORE, 140623763976192, 140623763980287,
+STORE, 140623763980288, 140623772368895,
+STORE, 140623755583488, 140623763976191,
+STORE, 140623747190784, 140623763976191,
+SNULL, 140623747190784, 140623755583487,
+STORE, 140623755583488, 140623763976191,
+STORE, 140623747190784, 140623755583487,
+SNULL, 140623755587583, 140623763976191,
+STORE, 140623755583488, 140623755587583,
+STORE, 140623755587584, 140623763976191,
+STORE, 140623529111552, 140623537504255,
+SNULL, 140623747194879, 140623755583487,
+STORE, 140623747190784, 140623747194879,
+STORE, 140623747194880, 140623755583487,
+SNULL, 140623529115647, 140623537504255,
+STORE, 140623529111552, 140623529115647,
+STORE, 140623529115648, 140623537504255,
+STORE, 140623520718848, 140623529111551,
+SNULL, 140623520722943, 140623529111551,
+STORE, 140623520718848, 140623520722943,
+STORE, 140623520722944, 140623529111551,
+STORE, 140623512326144, 140623520718847,
+STORE, 140623503933440, 140623520718847,
+STORE, 140623495540736, 140623520718847,
+STORE, 140623361323008, 140623495540735,
+STORE, 140623227105280, 140623495540735,
+STORE, 140623218712576, 140623227105279,
+STORE, 140623084494848, 140623218712575,
+STORE, 140623076102144, 140623084494847,
+STORE, 140622941884416, 140623076102143,
+SNULL, 140622941884416, 140623000633343,
+STORE, 140623000633344, 140623076102143,
+STORE, 140622941884416, 140623000633343,
+ERASE, 140622941884416, 140623000633343,
+STORE, 140622992240640, 140623000633343,
+STORE, 140622983847936, 140623000633343,
+STORE, 140622849630208, 140622983847935,
+STORE, 140622841237504, 140622849630207,
+SNULL, 140622849630208, 140622866415615,
+STORE, 140622866415616, 140622983847935,
+STORE, 140622849630208, 140622866415615,
+ERASE, 140622849630208, 140622866415615,
+STORE, 140622858022912, 140622866415615,
+SNULL, 140622933524479, 140622983847935,
+STORE, 140622866415616, 140622933524479,
+STORE, 140622933524480, 140622983847935,
+ERASE, 140622933524480, 140622983847935,
+STORE, 140622975455232, 140623000633343,
+STORE, 140622707019776, 140622841237503,
+STORE, 140622967062528, 140623000633343,
+STORE, 140622572802048, 140622841237503,
+STORE, 140622958669824, 140623000633343,
+STORE, 140622438584320, 140622841237503,
+STORE, 140622950277120, 140623000633343,
+SNULL, 140622858027007, 140622866415615,
+STORE, 140622858022912, 140622858027007,
+STORE, 140622858027008, 140622866415615,
+STORE, 140622941884416, 140623000633343,
+STORE, 140622841237504, 140622858022911,
+SNULL, 140622841237504, 140622849630207,
+STORE, 140622849630208, 140622858022911,
+STORE, 140622841237504, 140622849630207,
+SNULL, 140622849634303, 140622858022911,
+STORE, 140622849630208, 140622849634303,
+STORE, 140622849634304, 140622858022911,
+STORE, 140622430191616, 140622438584319,
+SNULL, 140622430195711, 140622438584319,
+STORE, 140622430191616, 140622430195711,
+STORE, 140622430195712, 140622438584319,
+SNULL, 140623361323007, 140623495540735,
+STORE, 140623227105280, 140623361323007,
+STORE, 140623361323008, 140623495540735,
+SNULL, 140623361323008, 140623403286527,
+STORE, 140623403286528, 140623495540735,
+STORE, 140623361323008, 140623403286527,
+ERASE, 140623361323008, 140623403286527,
+SNULL, 140623470395391, 140623495540735,
+STORE, 140623403286528, 140623470395391,
+STORE, 140623470395392, 140623495540735,
+ERASE, 140623470395392, 140623495540735,
+SNULL, 140623227105280, 140623269068799,
+STORE, 140623269068800, 140623361323007,
+STORE, 140623227105280, 140623269068799,
+ERASE, 140623227105280, 140623269068799,
+SNULL, 140623084494848, 140623134851071,
+STORE, 140623134851072, 140623218712575,
+STORE, 140623084494848, 140623134851071,
+ERASE, 140623084494848, 140623134851071,
+SNULL, 140623201959935, 140623218712575,
+STORE, 140623134851072, 140623201959935,
+STORE, 140623201959936, 140623218712575,
+ERASE, 140623201959936, 140623218712575,
+SNULL, 140623067742207, 140623076102143,
+STORE, 140623000633344, 140623067742207,
+STORE, 140623067742208, 140623076102143,
+ERASE, 140623067742208, 140623076102143,
+STORE, 140622295973888, 140622430191615,
+SNULL, 140622295973888, 140622329544703,
+STORE, 140622329544704, 140622430191615,
+STORE, 140622295973888, 140622329544703,
+ERASE, 140622295973888, 140622329544703,
+SNULL, 140622866550783, 140622933524479,
+STORE, 140622866415616, 140622866550783,
+STORE, 140622866550784, 140622933524479,
+SNULL, 140622707019775, 140622841237503,
+STORE, 140622438584320, 140622707019775,
+STORE, 140622707019776, 140622841237503,
+SNULL, 140622707019776, 140622732197887,
+STORE, 140622732197888, 140622841237503,
+STORE, 140622707019776, 140622732197887,
+ERASE, 140622707019776, 140622732197887,
+SNULL, 140622799306751, 140622841237503,
+STORE, 140622732197888, 140622799306751,
+STORE, 140622799306752, 140622841237503,
+ERASE, 140622799306752, 140622841237503,
+SNULL, 140622572802047, 140622707019775,
+STORE, 140622438584320, 140622572802047,
+STORE, 140622572802048, 140622707019775,
+SNULL, 140622572802048, 140622597980159,
+STORE, 140622597980160, 140622707019775,
+STORE, 140622572802048, 140622597980159,
+ERASE, 140622572802048, 140622597980159,
+SNULL, 140622438584320, 140622463762431,
+STORE, 140622463762432, 140622572802047,
+STORE, 140622438584320, 140622463762431,
+ERASE, 140622438584320, 140622463762431,
+SNULL, 140622530871295, 140622572802047,
+STORE, 140622463762432, 140622530871295,
+STORE, 140622530871296, 140622572802047,
+ERASE, 140622530871296, 140622572802047,
+STORE, 140622195326976, 140622430191615,
+SNULL, 140622262435839, 140622430191615,
+STORE, 140622195326976, 140622262435839,
+STORE, 140622262435840, 140622430191615,
+SNULL, 140622262435840, 140622329544703,
+STORE, 140622329544704, 140622430191615,
+STORE, 140622262435840, 140622329544703,
+ERASE, 140622262435840, 140622329544703,
+SNULL, 140622841241599, 140622849630207,
+STORE, 140622841237504, 140622841241599,
+STORE, 140622841241600, 140622849630207,
+STORE, 140623487148032, 140623520718847,
+STORE, 140623478755328, 140623520718847,
+SNULL, 140622941884416, 140622983847935,
+STORE, 140622983847936, 140623000633343,
+STORE, 140622941884416, 140622983847935,
+SNULL, 140622983852031, 140623000633343,
+STORE, 140622983847936, 140622983852031,
+STORE, 140622983852032, 140623000633343,
+STORE, 140623394893824, 140623403286527,
+SNULL, 140623394897919, 140623403286527,
+STORE, 140623394893824, 140623394897919,
+STORE, 140623394897920, 140623403286527,
+SNULL, 140623403421695, 140623470395391,
+STORE, 140623403286528, 140623403421695,
+STORE, 140623403421696, 140623470395391,
+SNULL, 140623478755328, 140623503933439,
+STORE, 140623503933440, 140623520718847,
+STORE, 140623478755328, 140623503933439,
+SNULL, 140623503937535, 140623520718847,
+STORE, 140623503933440, 140623503937535,
+STORE, 140623503937536, 140623520718847,
+SNULL, 140623336177663, 140623361323007,
+STORE, 140623269068800, 140623336177663,
+STORE, 140623336177664, 140623361323007,
+ERASE, 140623336177664, 140623361323007,
+SNULL, 140623269203967, 140623336177663,
+STORE, 140623269068800, 140623269203967,
+STORE, 140623269203968, 140623336177663,
+SNULL, 140623134986239, 140623201959935,
+STORE, 140623134851072, 140623134986239,
+STORE, 140623134986240, 140623201959935,
+SNULL, 140623000768511, 140623067742207,
+STORE, 140623000633344, 140623000768511,
+STORE, 140623000768512, 140623067742207,
+SNULL, 140622396653567, 140622430191615,
+STORE, 140622329544704, 140622396653567,
+STORE, 140622396653568, 140622430191615,
+ERASE, 140622396653568, 140622430191615,
+SNULL, 140622732333055, 140622799306751,
+STORE, 140622732197888, 140622732333055,
+STORE, 140622732333056, 140622799306751,
+SNULL, 140622941884416, 140622975455231,
+STORE, 140622975455232, 140622983847935,
+STORE, 140622941884416, 140622975455231,
+SNULL, 140622975459327, 140622983847935,
+STORE, 140622975455232, 140622975459327,
+STORE, 140622975459328, 140622983847935,
+SNULL, 140622665089023, 140622707019775,
+STORE, 140622597980160, 140622665089023,
+STORE, 140622665089024, 140622707019775,
+ERASE, 140622665089024, 140622707019775,
+SNULL, 140622598115327, 140622665089023,
+STORE, 140622597980160, 140622598115327,
+STORE, 140622598115328, 140622665089023,
+SNULL, 140622463897599, 140622530871295,
+STORE, 140622463762432, 140622463897599,
+STORE, 140622463897600, 140622530871295,
+SNULL, 140622195462143, 140622262435839,
+STORE, 140622195326976, 140622195462143,
+STORE, 140622195462144, 140622262435839,
+STORE, 140623386501120, 140623394893823,
+SNULL, 140622941884416, 140622950277119,
+STORE, 140622950277120, 140622975455231,
+STORE, 140622941884416, 140622950277119,
+SNULL, 140622950281215, 140622975455231,
+STORE, 140622950277120, 140622950281215,
+STORE, 140622950281216, 140622975455231,
+SNULL, 140622941888511, 140622950277119,
+STORE, 140622941884416, 140622941888511,
+STORE, 140622941888512, 140622950277119,
+STORE, 140623378108416, 140623394893823,
+SNULL, 140623478755328, 140623495540735,
+STORE, 140623495540736, 140623503933439,
+STORE, 140623478755328, 140623495540735,
+SNULL, 140623495544831, 140623503933439,
+STORE, 140623495540736, 140623495544831,
+STORE, 140623495544832, 140623503933439,
+SNULL, 140623478755328, 140623487148031,
+STORE, 140623487148032, 140623495540735,
+STORE, 140623478755328, 140623487148031,
+SNULL, 140623487152127, 140623495540735,
+STORE, 140623487148032, 140623487152127,
+STORE, 140623487152128, 140623495540735,
+SNULL, 140623218716671, 140623227105279,
+STORE, 140623218712576, 140623218716671,
+STORE, 140623218716672, 140623227105279,
+SNULL, 140623076106239, 140623084494847,
+STORE, 140623076102144, 140623076106239,
+STORE, 140623076106240, 140623084494847,
+SNULL, 140622329679871, 140622396653567,
+STORE, 140622329544704, 140622329679871,
+STORE, 140622329679872, 140622396653567,
+SNULL, 140622950281216, 140622958669823,
+STORE, 140622958669824, 140622975455231,
+STORE, 140622950281216, 140622958669823,
+SNULL, 140622958673919, 140622975455231,
+STORE, 140622958669824, 140622958673919,
+STORE, 140622958673920, 140622975455231,
+SNULL, 140623503937536, 140623512326143,
+STORE, 140623512326144, 140623520718847,
+STORE, 140623503937536, 140623512326143,
+SNULL, 140623512330239, 140623520718847,
+STORE, 140623512326144, 140623512330239,
+STORE, 140623512330240, 140623520718847,
+SNULL, 140623378108416, 140623386501119,
+STORE, 140623386501120, 140623394893823,
+STORE, 140623378108416, 140623386501119,
+SNULL, 140623386505215, 140623394893823,
+STORE, 140623386501120, 140623386505215,
+STORE, 140623386505216, 140623394893823,
+STORE, 140623369715712, 140623386501119,
+STORE, 140623361323008, 140623386501119,
+STORE, 140623352930304, 140623386501119,
+SNULL, 140623352930304, 140623361323007,
+STORE, 140623361323008, 140623386501119,
+STORE, 140623352930304, 140623361323007,
+SNULL, 140623361327103, 140623386501119,
+STORE, 140623361323008, 140623361327103,
+STORE, 140623361327104, 140623386501119,
+SNULL, 140623478759423, 140623487148031,
+STORE, 140623478755328, 140623478759423,
+STORE, 140623478759424, 140623487148031,
+STORE, 140623344537600, 140623361323007,
+STORE, 140623260676096, 140623269068799,
+SNULL, 140622958673920, 140622967062527,
+STORE, 140622967062528, 140622975455231,
+STORE, 140622958673920, 140622967062527,
+SNULL, 140622967066623, 140622975455231,
+STORE, 140622967062528, 140622967066623,
+STORE, 140622967066624, 140622975455231,
+STORE, 140623252283392, 140623269068799,
+STORE, 140623243890688, 140623269068799,
+SNULL, 140622983852032, 140622992240639,
+STORE, 140622992240640, 140623000633343,
+STORE, 140622983852032, 140622992240639,
+SNULL, 140622992244735, 140623000633343,
+STORE, 140622992240640, 140622992244735,
+STORE, 140622992244736, 140623000633343,
+STORE, 140623235497984, 140623269068799,
+STORE, 140623218716672, 140623235497983,
+STORE, 140623210319872, 140623218712575,
+STORE, 140623126458368, 140623134851071,
+SNULL, 140623210323967, 140623218712575,
+STORE, 140623210319872, 140623210323967,
+STORE, 140623210323968, 140623218712575,
+SNULL, 140623218716672, 140623227105279,
+STORE, 140623227105280, 140623235497983,
+STORE, 140623218716672, 140623227105279,
+SNULL, 140623227109375, 140623235497983,
+STORE, 140623227105280, 140623227109375,
+STORE, 140623227109376, 140623235497983,
+STORE, 140623118065664, 140623134851071,
+STORE, 140623109672960, 140623134851071,
+SNULL, 140623109677055, 140623134851071,
+STORE, 140623109672960, 140623109677055,
+STORE, 140623109677056, 140623134851071,
+STORE, 140623101280256, 140623109672959,
+STORE, 140623092887552, 140623109672959,
+SNULL, 140623092887552, 140623101280255,
+STORE, 140623101280256, 140623109672959,
+STORE, 140623092887552, 140623101280255,
+SNULL, 140623101284351, 140623109672959,
+STORE, 140623101280256, 140623101284351,
+STORE, 140623101284352, 140623109672959,
+SNULL, 140623361327104, 140623378108415,
+STORE, 140623378108416, 140623386501119,
+STORE, 140623361327104, 140623378108415,
+SNULL, 140623378112511, 140623386501119,
+STORE, 140623378108416, 140623378112511,
+STORE, 140623378112512, 140623386501119,
+SNULL, 140623235497984, 140623243890687,
+STORE, 140623243890688, 140623269068799,
+STORE, 140623235497984, 140623243890687,
+SNULL, 140623243894783, 140623269068799,
+STORE, 140623243890688, 140623243894783,
+STORE, 140623243894784, 140623269068799,
+SNULL, 140623361327104, 140623369715711,
+STORE, 140623369715712, 140623378108415,
+STORE, 140623361327104, 140623369715711,
+SNULL, 140623369719807, 140623378108415,
+STORE, 140623369715712, 140623369719807,
+STORE, 140623369719808, 140623378108415,
+SNULL, 140623243894784, 140623252283391,
+STORE, 140623252283392, 140623269068799,
+STORE, 140623243894784, 140623252283391,
+SNULL, 140623252287487, 140623269068799,
+STORE, 140623252283392, 140623252287487,
+STORE, 140623252287488, 140623269068799,
+SNULL, 140623235502079, 140623243890687,
+STORE, 140623235497984, 140623235502079,
+STORE, 140623235502080, 140623243890687,
+SNULL, 140623344541695, 140623361323007,
+STORE, 140623344537600, 140623344541695,
+STORE, 140623344541696, 140623361323007,
+STORE, 140623076106240, 140623092887551,
+SNULL, 140623076106240, 140623084494847,
+STORE, 140623084494848, 140623092887551,
+STORE, 140623076106240, 140623084494847,
+SNULL, 140623084498943, 140623092887551,
+STORE, 140623084494848, 140623084498943,
+STORE, 140623084498944, 140623092887551,
+SNULL, 140623344541696, 140623352930303,
+STORE, 140623352930304, 140623361323007,
+STORE, 140623344541696, 140623352930303,
+SNULL, 140623352934399, 140623361323007,
+STORE, 140623352930304, 140623352934399,
+STORE, 140623352934400, 140623361323007,
+SNULL, 140623109677056, 140623118065663,
+STORE, 140623118065664, 140623134851071,
+STORE, 140623109677056, 140623118065663,
+SNULL, 140623118069759, 140623134851071,
+STORE, 140623118065664, 140623118069759,
+STORE, 140623118069760, 140623134851071,
+STORE, 140622832844800, 140622841237503,
+STORE, 140622824452096, 140622841237503,
+SNULL, 140622824452096, 140622832844799,
+STORE, 140622832844800, 140622841237503,
+STORE, 140622824452096, 140622832844799,
+SNULL, 140622832848895, 140622841237503,
+STORE, 140622832844800, 140622832848895,
+STORE, 140622832848896, 140622841237503,
+STORE, 140622816059392, 140622832844799,
+SNULL, 140623092891647, 140623101280255,
+STORE, 140623092887552, 140623092891647,
+STORE, 140623092891648, 140623101280255,
+SNULL, 140623118069760, 140623126458367,
+STORE, 140623126458368, 140623134851071,
+STORE, 140623118069760, 140623126458367,
+SNULL, 140623126462463, 140623134851071,
+STORE, 140623126458368, 140623126462463,
+STORE, 140623126462464, 140623134851071,
+SNULL, 140623252287488, 140623260676095,
+STORE, 140623260676096, 140623269068799,
+STORE, 140623252287488, 140623260676095,
+SNULL, 140623260680191, 140623269068799,
+STORE, 140623260676096, 140623260680191,
+STORE, 140623260680192, 140623269068799,
+STORE, 140622807666688, 140622832844799,
+STORE, 140622723805184, 140622732197887,
+STORE, 140622715412480, 140622732197887,
+STORE, 140622707019776, 140622732197887,
+SNULL, 140622707023871, 140622732197887,
+STORE, 140622707019776, 140622707023871,
+STORE, 140622707023872, 140622732197887,
+STORE, 140622698627072, 140622707019775,
+STORE, 140622690234368, 140622707019775,
+SNULL, 140622690238463, 140622707019775,
+STORE, 140622690234368, 140622690238463,
+STORE, 140622690238464, 140622707019775,
+SNULL, 140622807666688, 140622816059391,
+STORE, 140622816059392, 140622832844799,
+STORE, 140622807666688, 140622816059391,
+SNULL, 140622816063487, 140622832844799,
+STORE, 140622816059392, 140622816063487,
+STORE, 140622816063488, 140622832844799,
+STORE, 140622681841664, 140622690234367,
+STORE, 140622673448960, 140622690234367,
+SNULL, 140622673453055, 140622690234367,
+STORE, 140622673448960, 140622673453055,
+STORE, 140622673453056, 140622690234367,
+STORE, 140622589587456, 140622597980159,
+SNULL, 140622807670783, 140622816059391,
+STORE, 140622807666688, 140622807670783,
+STORE, 140622807670784, 140622816059391,
+STORE, 140622581194752, 140622597980159,
+SNULL, 140622581198847, 140622597980159,
+STORE, 140622581194752, 140622581198847,
+STORE, 140622581198848, 140622597980159,
+SNULL, 140622816063488, 140622824452095,
+STORE, 140622824452096, 140622832844799,
+STORE, 140622816063488, 140622824452095,
+SNULL, 140622824456191, 140622832844799,
+STORE, 140622824452096, 140622824456191,
+STORE, 140622824456192, 140622832844799,
+STORE, 140622572802048, 140622581194751,
+SNULL, 140622572806143, 140622581194751,
+STORE, 140622572802048, 140622572806143,
+STORE, 140622572806144, 140622581194751,
+STORE, 140622564409344, 140622572802047,
+STORE, 140622556016640, 140622572802047,
+SNULL, 140622556016640, 140622564409343,
+STORE, 140622564409344, 140622572802047,
+STORE, 140622556016640, 140622564409343,
+SNULL, 140622564413439, 140622572802047,
+STORE, 140622564409344, 140622564413439,
+STORE, 140622564413440, 140622572802047,
+SNULL, 140622690238464, 140622698627071,
+STORE, 140622698627072, 140622707019775,
+STORE, 140622690238464, 140622698627071,
+SNULL, 140622698631167, 140622707019775,
+STORE, 140622698627072, 140622698631167,
+STORE, 140622698631168, 140622707019775,
+SNULL, 140622707023872, 140622723805183,
+STORE, 140622723805184, 140622732197887,
+STORE, 140622707023872, 140622723805183,
+SNULL, 140622723809279, 140622732197887,
+STORE, 140622723805184, 140622723809279,
+STORE, 140622723809280, 140622732197887,
+SNULL, 140622707023872, 140622715412479,
+STORE, 140622715412480, 140622723805183,
+STORE, 140622707023872, 140622715412479,
+SNULL, 140622715416575, 140622723805183,
+STORE, 140622715412480, 140622715416575,
+STORE, 140622715416576, 140622723805183,
+STORE, 140622547623936, 140622564409343,
+SNULL, 140622547628031, 140622564409343,
+STORE, 140622547623936, 140622547628031,
+STORE, 140622547628032, 140622564409343,
+STORE, 140622539231232, 140622547623935,
+SNULL, 140622539235327, 140622547623935,
+STORE, 140622539231232, 140622539235327,
+STORE, 140622539235328, 140622547623935,
+SNULL, 140622581198848, 140622589587455,
+STORE, 140622589587456, 140622597980159,
+STORE, 140622581198848, 140622589587455,
+SNULL, 140622589591551, 140622597980159,
+STORE, 140622589587456, 140622589591551,
+STORE, 140622589591552, 140622597980159,
+STORE, 140622455369728, 140622463762431,
+SNULL, 140622455373823, 140622463762431,
+STORE, 140622455369728, 140622455373823,
+STORE, 140622455373824, 140622463762431,
+STORE, 140622446977024, 140622455369727,
+SNULL, 140622446981119, 140622455369727,
+STORE, 140622446977024, 140622446981119,
+STORE, 140622446981120, 140622455369727,
+SNULL, 140622547628032, 140622556016639,
+STORE, 140622556016640, 140622564409343,
+STORE, 140622547628032, 140622556016639,
+SNULL, 140622556020735, 140622564409343,
+STORE, 140622556016640, 140622556020735,
+STORE, 140622556020736, 140622564409343,
+STORE, 140622430195712, 140622446977023,
+STORE, 140622421798912, 140622430191615,
+SNULL, 140622430195712, 140622438584319,
+STORE, 140622438584320, 140622446977023,
+STORE, 140622430195712, 140622438584319,
+SNULL, 140622438588415, 140622446977023,
+STORE, 140622438584320, 140622438588415,
+STORE, 140622438588416, 140622446977023,
+STORE, 140622413406208, 140622430191615,
+STORE, 140622405013504, 140622430191615,
+SNULL, 140622405013504, 140622413406207,
+STORE, 140622413406208, 140622430191615,
+STORE, 140622405013504, 140622413406207,
+SNULL, 140622413410303, 140622430191615,
+STORE, 140622413406208, 140622413410303,
+STORE, 140622413410304, 140622430191615,
+SNULL, 140622673453056, 140622681841663,
+STORE, 140622681841664, 140622690234367,
+STORE, 140622673453056, 140622681841663,
+SNULL, 140622681845759, 140622690234367,
+STORE, 140622681841664, 140622681845759,
+STORE, 140622681845760, 140622690234367,
+STORE, 140622321152000, 140622329544703,
+SNULL, 140622413410304, 140622421798911,
+STORE, 140622421798912, 140622430191615,
+STORE, 140622413410304, 140622421798911,
+SNULL, 140622421803007, 140622430191615,
+STORE, 140622421798912, 140622421803007,
+STORE, 140622421803008, 140622430191615,
+STORE, 140622312759296, 140622329544703,
+SNULL, 140622312763391, 140622329544703,
+STORE, 140622312759296, 140622312763391,
+STORE, 140622312763392, 140622329544703,
+SNULL, 140622405017599, 140622413406207,
+STORE, 140622405013504, 140622405017599,
+STORE, 140622405017600, 140622413406207,
+STORE, 140622304366592, 140622312759295,
+SNULL, 140622304370687, 140622312759295,
+STORE, 140622304366592, 140622304370687,
+STORE, 140622304370688, 140622312759295,
+SNULL, 140622312763392, 140622321151999,
+STORE, 140622321152000, 140622329544703,
+STORE, 140622312763392, 140622321151999,
+SNULL, 140622321156095, 140622329544703,
+STORE, 140622321152000, 140622321156095,
+STORE, 140622321156096, 140622329544703,
+STORE, 140624062619648, 140624062648319,
+STORE, 140624010240000, 140624012431359,
+SNULL, 140624010240000, 140624010330111,
+STORE, 140624010330112, 140624012431359,
+STORE, 140624010240000, 140624010330111,
+SNULL, 140624012423167, 140624012431359,
+STORE, 140624010330112, 140624012423167,
+STORE, 140624012423168, 140624012431359,
+ERASE, 140624012423168, 140624012431359,
+STORE, 140624012423168, 140624012431359,
+SNULL, 140624012427263, 140624012431359,
+STORE, 140624012423168, 140624012427263,
+STORE, 140624012427264, 140624012431359,
+ERASE, 140624062619648, 140624062648319,
+ERASE, 140622849630208, 140622849634303,
+ERASE, 140622849634304, 140622858022911,
+ERASE, 140623394893824, 140623394897919,
+ERASE, 140623394897920, 140623403286527,
+ERASE, 140623361323008, 140623361327103,
+ERASE, 140623361327104, 140623369715711,
+ERASE, 140623084494848, 140623084498943,
+ERASE, 140623084498944, 140623092887551,
+ERASE, 140623931764736, 140623931768831,
+ERASE, 140623931768832, 140623940157439,
+ERASE, 140622841237504, 140622841241599,
+ERASE, 140622841241600, 140622849630207,
+ERASE, 140623487148032, 140623487152127,
+ERASE, 140623487152128, 140623495540735,
+ERASE, 140623109672960, 140623109677055,
+ERASE, 140623109677056, 140623118065663,
+ERASE, 140622983847936, 140622983852031,
+ERASE, 140622983852032, 140622992240639,
+ERASE, 140623352930304, 140623352934399,
+ERASE, 140623352934400, 140623361323007,
+ERASE, 140622564409344, 140622564413439,
+ERASE, 140622564413440, 140622572802047,
+ERASE, 140622430191616, 140622430195711,
+ERASE, 140622430195712, 140622438584319,
+ERASE, 140622958669824, 140622958673919,
+ERASE, 140622958673920, 140622967062527,
+ERASE, 140622992240640, 140622992244735,
+ERASE, 140622992244736, 140623000633343,
+ERASE, 140623227105280, 140623227109375,
+ERASE, 140623227109376, 140623235497983,
+ERASE, 140622321152000, 140622321156095,
+ERASE, 140622321156096, 140622329544703,
+ERASE, 140622858022912, 140622858027007,
+ERASE, 140622858027008, 140622866415615,
+ERASE, 140622975455232, 140622975459327,
+ERASE, 140622975459328, 140622983847935,
+ERASE, 140623378108416, 140623378112511,
+ERASE, 140623378112512, 140623386501119,
+ERASE, 140623495540736, 140623495544831,
+ERASE, 140623495544832, 140623503933439,
+ERASE, 140623118065664, 140623118069759,
+ERASE, 140623118069760, 140623126458367,
+ERASE, 140622572802048, 140622572806143,
+ERASE, 140622572806144, 140622581194751,
+ERASE, 140622421798912, 140622421803007,
+ERASE, 140622421803008, 140622430191615,
+ERASE, 140622967062528, 140622967066623,
+ERASE, 140622967066624, 140622975455231,
+ERASE, 140623252283392, 140623252287487,
+ERASE, 140623252287488, 140623260676095,
+ERASE, 140622673448960, 140622673453055,
+ERASE, 140622673453056, 140622681841663,
+ERASE, 140623076102144, 140623076106239,
+ERASE, 140623076106240, 140623084494847,
+ERASE, 140623101280256, 140623101284351,
+ERASE, 140623101284352, 140623109672959,
+ERASE, 140622715412480, 140622715416575,
+ERASE, 140622715416576, 140622723805183,
+ERASE, 140622405013504, 140622405017599,
+ERASE, 140622405017600, 140622413406207,
+ERASE, 140623478755328, 140623478759423,
+ERASE, 140623478759424, 140623487148031,
+ERASE, 140623906586624, 140623906590719,
+ERASE, 140623906590720, 140623914979327,
+ERASE, 140622950277120, 140622950281215,
+ERASE, 140622950281216, 140622958669823,
+ };
+ static const unsigned long set32[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140731244212224, 140737488351231,
+SNULL, 140731244216319, 140737488351231,
+STORE, 140731244212224, 140731244216319,
+STORE, 140731244081152, 140731244216319,
+STORE, 94427773984768, 94427776237567,
+SNULL, 94427774115839, 94427776237567,
+STORE, 94427773984768, 94427774115839,
+STORE, 94427774115840, 94427776237567,
+ERASE, 94427774115840, 94427776237567,
+STORE, 94427776208896, 94427776217087,
+STORE, 94427776217088, 94427776237567,
+STORE, 140401464893440, 140401467146239,
+SNULL, 140401465036799, 140401467146239,
+STORE, 140401464893440, 140401465036799,
+STORE, 140401465036800, 140401467146239,
+ERASE, 140401465036800, 140401467146239,
+STORE, 140401467133952, 140401467142143,
+STORE, 140401467142144, 140401467146239,
+STORE, 140731244507136, 140731244511231,
+STORE, 140731244494848, 140731244507135,
+STORE, 140401467105280, 140401467133951,
+STORE, 140401467097088, 140401467105279,
+STORE, 140401462677504, 140401464893439,
+SNULL, 140401462677504, 140401462775807,
+STORE, 140401462775808, 140401464893439,
+STORE, 140401462677504, 140401462775807,
+SNULL, 140401464868863, 140401464893439,
+STORE, 140401462775808, 140401464868863,
+STORE, 140401464868864, 140401464893439,
+SNULL, 140401464868864, 140401464877055,
+STORE, 140401464877056, 140401464893439,
+STORE, 140401464868864, 140401464877055,
+ERASE, 140401464868864, 140401464877055,
+STORE, 140401464868864, 140401464877055,
+ERASE, 140401464877056, 140401464893439,
+STORE, 140401464877056, 140401464893439,
+STORE, 140401458880512, 140401462677503,
+SNULL, 140401458880512, 140401460539391,
+STORE, 140401460539392, 140401462677503,
+STORE, 140401458880512, 140401460539391,
+SNULL, 140401462636543, 140401462677503,
+STORE, 140401460539392, 140401462636543,
+STORE, 140401462636544, 140401462677503,
+SNULL, 140401462636544, 140401462661119,
+STORE, 140401462661120, 140401462677503,
+STORE, 140401462636544, 140401462661119,
+ERASE, 140401462636544, 140401462661119,
+STORE, 140401462636544, 140401462661119,
+ERASE, 140401462661120, 140401462677503,
+STORE, 140401462661120, 140401462677503,
+STORE, 140401467088896, 140401467105279,
+SNULL, 140401462652927, 140401462661119,
+STORE, 140401462636544, 140401462652927,
+STORE, 140401462652928, 140401462661119,
+SNULL, 140401464872959, 140401464877055,
+STORE, 140401464868864, 140401464872959,
+STORE, 140401464872960, 140401464877055,
+SNULL, 94427776212991, 94427776217087,
+STORE, 94427776208896, 94427776212991,
+STORE, 94427776212992, 94427776217087,
+SNULL, 140401467138047, 140401467142143,
+STORE, 140401467133952, 140401467138047,
+STORE, 140401467138048, 140401467142143,
+ERASE, 140401467105280, 140401467133951,
+STORE, 94427784683520, 94427784818687,
+STORE, 140401450487808, 140401458880511,
+SNULL, 140401450491903, 140401458880511,
+STORE, 140401450487808, 140401450491903,
+STORE, 140401450491904, 140401458880511,
+STORE, 140401442095104, 140401450487807,
+STORE, 140401307877376, 140401442095103,
+SNULL, 140401307877376, 140401340055551,
+STORE, 140401340055552, 140401442095103,
+STORE, 140401307877376, 140401340055551,
+ERASE, 140401307877376, 140401340055551,
+SNULL, 140401407164415, 140401442095103,
+STORE, 140401340055552, 140401407164415,
+STORE, 140401407164416, 140401442095103,
+ERASE, 140401407164416, 140401442095103,
+SNULL, 140401340190719, 140401407164415,
+STORE, 140401340055552, 140401340190719,
+STORE, 140401340190720, 140401407164415,
+SNULL, 140401442099199, 140401450487807,
+STORE, 140401442095104, 140401442099199,
+STORE, 140401442099200, 140401450487807,
+STORE, 140401433702400, 140401442095103,
+SNULL, 140401433706495, 140401442095103,
+STORE, 140401433702400, 140401433706495,
+STORE, 140401433706496, 140401442095103,
+STORE, 140401425309696, 140401433702399,
+SNULL, 140401425313791, 140401433702399,
+STORE, 140401425309696, 140401425313791,
+STORE, 140401425313792, 140401433702399,
+STORE, 140401416916992, 140401425309695,
+SNULL, 140401416921087, 140401425309695,
+STORE, 140401416916992, 140401416921087,
+STORE, 140401416921088, 140401425309695,
+STORE, 140401408524288, 140401416916991,
+STORE, 140401205837824, 140401340055551,
+SNULL, 140401272946687, 140401340055551,
+STORE, 140401205837824, 140401272946687,
+STORE, 140401272946688, 140401340055551,
+ERASE, 140401272946688, 140401340055551,
+SNULL, 140401205972991, 140401272946687,
+STORE, 140401205837824, 140401205972991,
+STORE, 140401205972992, 140401272946687,
+STORE, 140401331662848, 140401340055551,
+STORE, 140401323270144, 140401340055551,
+STORE, 140401138728960, 140401205837823,
+STORE, 140401314877440, 140401340055551,
+SNULL, 140401408528383, 140401416916991,
+STORE, 140401408524288, 140401408528383,
+STORE, 140401408528384, 140401416916991,
+SNULL, 140401138864127, 140401205837823,
+STORE, 140401138728960, 140401138864127,
+STORE, 140401138864128, 140401205837823,
+STORE, 140401004511232, 140401138728959,
+SNULL, 140401071620095, 140401138728959,
+STORE, 140401004511232, 140401071620095,
+STORE, 140401071620096, 140401138728959,
+ERASE, 140401071620096, 140401138728959,
+STORE, 140400870293504, 140401071620095,
+SNULL, 140400937402367, 140401071620095,
+STORE, 140400870293504, 140400937402367,
+STORE, 140400937402368, 140401071620095,
+SNULL, 140400937402368, 140401004511231,
+STORE, 140401004511232, 140401071620095,
+STORE, 140400937402368, 140401004511231,
+ERASE, 140400937402368, 140401004511231,
+STORE, 140401306484736, 140401340055551,
+SNULL, 140401306484736, 140401323270143,
+STORE, 140401323270144, 140401340055551,
+STORE, 140401306484736, 140401323270143,
+SNULL, 140401323274239, 140401340055551,
+STORE, 140401323270144, 140401323274239,
+STORE, 140401323274240, 140401340055551,
+SNULL, 140401004646399, 140401071620095,
+STORE, 140401004511232, 140401004646399,
+STORE, 140401004646400, 140401071620095,
+SNULL, 140400870428671, 140400937402367,
+STORE, 140400870293504, 140400870428671,
+STORE, 140400870428672, 140400937402367,
+SNULL, 140401306488831, 140401323270143,
+STORE, 140401306484736, 140401306488831,
+STORE, 140401306488832, 140401323270143,
+STORE, 140401298092032, 140401306484735,
+SNULL, 140401306488832, 140401314877439,
+STORE, 140401314877440, 140401323270143,
+STORE, 140401306488832, 140401314877439,
+SNULL, 140401314881535, 140401323270143,
+STORE, 140401314877440, 140401314881535,
+STORE, 140401314881536, 140401323270143,
+SNULL, 140401323274240, 140401331662847,
+STORE, 140401331662848, 140401340055551,
+STORE, 140401323274240, 140401331662847,
+SNULL, 140401331666943, 140401340055551,
+STORE, 140401331662848, 140401331666943,
+STORE, 140401331666944, 140401340055551,
+SNULL, 140401298096127, 140401306484735,
+STORE, 140401298092032, 140401298096127,
+STORE, 140401298096128, 140401306484735,
+STORE, 140401289699328, 140401298092031,
+STORE, 140401281306624, 140401298092031,
+STORE, 140401130336256, 140401138728959,
+SNULL, 140401281306624, 140401289699327,
+STORE, 140401289699328, 140401298092031,
+STORE, 140401281306624, 140401289699327,
+SNULL, 140401289703423, 140401298092031,
+STORE, 140401289699328, 140401289703423,
+STORE, 140401289703424, 140401298092031,
+STORE, 140401121943552, 140401138728959,
+STORE, 140401113550848, 140401138728959,
+SNULL, 140401281310719, 140401289699327,
+STORE, 140401281306624, 140401281310719,
+STORE, 140401281310720, 140401289699327,
+SNULL, 140401113550848, 140401121943551,
+STORE, 140401121943552, 140401138728959,
+STORE, 140401113550848, 140401121943551,
+SNULL, 140401121947647, 140401138728959,
+STORE, 140401121943552, 140401121947647,
+STORE, 140401121947648, 140401138728959,
+STORE, 140401105158144, 140401121943551,
+SNULL, 140401121947648, 140401130336255,
+STORE, 140401130336256, 140401138728959,
+STORE, 140401121947648, 140401130336255,
+SNULL, 140401130340351, 140401138728959,
+STORE, 140401130336256, 140401130340351,
+STORE, 140401130340352, 140401138728959,
+STORE, 140401096765440, 140401121943551,
+SNULL, 140401096765440, 140401113550847,
+STORE, 140401113550848, 140401121943551,
+STORE, 140401096765440, 140401113550847,
+SNULL, 140401113554943, 140401121943551,
+STORE, 140401113550848, 140401113554943,
+STORE, 140401113554944, 140401121943551,
+STORE, 140401088372736, 140401113550847,
+SNULL, 140401088372736, 140401096765439,
+STORE, 140401096765440, 140401113550847,
+STORE, 140401088372736, 140401096765439,
+SNULL, 140401096769535, 140401113550847,
+STORE, 140401096765440, 140401096769535,
+STORE, 140401096769536, 140401113550847,
+SNULL, 140401096769536, 140401105158143,
+STORE, 140401105158144, 140401113550847,
+STORE, 140401096769536, 140401105158143,
+SNULL, 140401105162239, 140401113550847,
+STORE, 140401105158144, 140401105162239,
+STORE, 140401105162240, 140401113550847,
+SNULL, 140401088376831, 140401096765439,
+STORE, 140401088372736, 140401088376831,
+STORE, 140401088376832, 140401096765439,
+STORE, 140401079980032, 140401088372735,
+STORE, 140400996118528, 140401004511231,
+SNULL, 140401079984127, 140401088372735,
+STORE, 140401079980032, 140401079984127,
+STORE, 140401079984128, 140401088372735,
+SNULL, 140400996122623, 140401004511231,
+STORE, 140400996118528, 140400996122623,
+STORE, 140400996122624, 140401004511231,
+STORE, 140400987725824, 140400996118527,
+STORE, 140400979333120, 140400996118527,
+STORE, 140400803184640, 140400870293503,
+SNULL, 140400803319807, 140400870293503,
+STORE, 140400803184640, 140400803319807,
+STORE, 140400803319808, 140400870293503,
+SNULL, 140400979333120, 140400987725823,
+STORE, 140400987725824, 140400996118527,
+STORE, 140400979333120, 140400987725823,
+SNULL, 140400987729919, 140400996118527,
+STORE, 140400987725824, 140400987729919,
+STORE, 140400987729920, 140400996118527,
+STORE, 140400970940416, 140400987725823,
+STORE, 140400962547712, 140400987725823,
+STORE, 140400668966912, 140400803184639,
+STORE, 140400954155008, 140400987725823,
+STORE, 140400945762304, 140400987725823,
+STORE, 140400660574208, 140400668966911,
+STORE, 140400593465344, 140400660574207,
+STORE, 140400585072640, 140400593465343,
+STORE, 140400450854912, 140400585072639,
+STORE, 140400442462208, 140400450854911,
+STORE, 140400434069504, 140400450854911,
+STORE, 140400299851776, 140400434069503,
+STORE, 140400291459072, 140400299851775,
+SNULL, 140400299851776, 140400333422591,
+STORE, 140400333422592, 140400434069503,
+STORE, 140400299851776, 140400333422591,
+ERASE, 140400299851776, 140400333422591,
+STORE, 140400325029888, 140400333422591,
+STORE, 140400157241344, 140400291459071,
+STORE, 140400316637184, 140400333422591,
+STORE, 140400308244480, 140400333422591,
+STORE, 140400023023616, 140400291459071,
+STORE, 140400291459072, 140400333422591,
+SNULL, 140400023023616, 140400064987135,
+STORE, 140400064987136, 140400291459071,
+STORE, 140400023023616, 140400064987135,
+ERASE, 140400023023616, 140400064987135,
+STORE, 140400056594432, 140400064987135,
+SNULL, 140400056598527, 140400064987135,
+STORE, 140400056594432, 140400056598527,
+STORE, 140400056598528, 140400064987135,
+STORE, 140399989485568, 140400056594431,
+SNULL, 140400291459072, 140400316637183,
+STORE, 140400316637184, 140400333422591,
+STORE, 140400291459072, 140400316637183,
+SNULL, 140400316641279, 140400333422591,
+STORE, 140400316637184, 140400316641279,
+STORE, 140400316641280, 140400333422591,
+STORE, 140399855267840, 140400056594431,
+SNULL, 140399855267840, 140399863660543,
+STORE, 140399863660544, 140400056594431,
+STORE, 140399855267840, 140399863660543,
+ERASE, 140399855267840, 140399863660543,
+SNULL, 140400736075775, 140400803184639,
+STORE, 140400668966912, 140400736075775,
+STORE, 140400736075776, 140400803184639,
+ERASE, 140400736075776, 140400803184639,
+SNULL, 140400669102079, 140400736075775,
+STORE, 140400668966912, 140400669102079,
+STORE, 140400669102080, 140400736075775,
+STORE, 140400669102080, 140400803184639,
+SNULL, 140400669102080, 140400736075775,
+STORE, 140400736075776, 140400803184639,
+STORE, 140400669102080, 140400736075775,
+SNULL, 140400736210943, 140400803184639,
+STORE, 140400736075776, 140400736210943,
+STORE, 140400736210944, 140400803184639,
+ERASE, 140400593465344, 140400660574207,
+SNULL, 140400450854912, 140400467640319,
+STORE, 140400467640320, 140400585072639,
+STORE, 140400450854912, 140400467640319,
+ERASE, 140400450854912, 140400467640319,
+STORE, 140399729442816, 140400056594431,
+SNULL, 140400400531455, 140400434069503,
+STORE, 140400333422592, 140400400531455,
+STORE, 140400400531456, 140400434069503,
+ERASE, 140400400531456, 140400434069503,
+SNULL, 140400333557759, 140400400531455,
+STORE, 140400333422592, 140400333557759,
+STORE, 140400333557760, 140400400531455,
+SNULL, 140400157241343, 140400291459071,
+STORE, 140400064987136, 140400157241343,
+STORE, 140400157241344, 140400291459071,
+SNULL, 140400157241344, 140400199204863,
+STORE, 140400199204864, 140400291459071,
+STORE, 140400157241344, 140400199204863,
+ERASE, 140400157241344, 140400199204863,
+SNULL, 140400266313727, 140400291459071,
+STORE, 140400199204864, 140400266313727,
+STORE, 140400266313728, 140400291459071,
+ERASE, 140400266313728, 140400291459071,
+SNULL, 140400132095999, 140400157241343,
+STORE, 140400064987136, 140400132095999,
+STORE, 140400132096000, 140400157241343,
+ERASE, 140400132096000, 140400157241343,
+SNULL, 140400065122303, 140400132095999,
+STORE, 140400064987136, 140400065122303,
+STORE, 140400065122304, 140400132095999,
+SNULL, 140400945762304, 140400954155007,
+STORE, 140400954155008, 140400987725823,
+STORE, 140400945762304, 140400954155007,
+SNULL, 140400954159103, 140400987725823,
+STORE, 140400954155008, 140400954159103,
+STORE, 140400954159104, 140400987725823,
+SNULL, 140400434069504, 140400442462207,
+STORE, 140400442462208, 140400450854911,
+STORE, 140400434069504, 140400442462207,
+SNULL, 140400442466303, 140400450854911,
+STORE, 140400442462208, 140400442466303,
+STORE, 140400442466304, 140400450854911,
+SNULL, 140400291463167, 140400316637183,
+STORE, 140400291459072, 140400291463167,
+STORE, 140400291463168, 140400316637183,
+STORE, 140400652181504, 140400668966911,
+STORE, 140400643788800, 140400668966911,
+SNULL, 140400291463168, 140400299851775,
+STORE, 140400299851776, 140400316637183,
+STORE, 140400291463168, 140400299851775,
+SNULL, 140400299855871, 140400316637183,
+STORE, 140400299851776, 140400299855871,
+STORE, 140400299855872, 140400316637183,
+STORE, 140400635396096, 140400668966911,
+SNULL, 140400635396096, 140400643788799,
+STORE, 140400643788800, 140400668966911,
+STORE, 140400635396096, 140400643788799,
+SNULL, 140400643792895, 140400668966911,
+STORE, 140400643788800, 140400643792895,
+STORE, 140400643792896, 140400668966911,
+SNULL, 140399989485567, 140400056594431,
+STORE, 140399729442816, 140399989485567,
+STORE, 140399989485568, 140400056594431,
+ERASE, 140399989485568, 140400056594431,
+SNULL, 140399930769407, 140399989485567,
+STORE, 140399729442816, 140399930769407,
+STORE, 140399930769408, 140399989485567,
+ERASE, 140399930769408, 140399989485567,
+SNULL, 140400945766399, 140400954155007,
+STORE, 140400945762304, 140400945766399,
+STORE, 140400945766400, 140400954155007,
+SNULL, 140400534749183, 140400585072639,
+STORE, 140400467640320, 140400534749183,
+STORE, 140400534749184, 140400585072639,
+ERASE, 140400534749184, 140400585072639,
+SNULL, 140399796551679, 140399930769407,
+STORE, 140399729442816, 140399796551679,
+STORE, 140399796551680, 140399930769407,
+SNULL, 140399796551680, 140399863660543,
+STORE, 140399863660544, 140399930769407,
+STORE, 140399796551680, 140399863660543,
+ERASE, 140399796551680, 140399863660543,
+SNULL, 140400199340031, 140400266313727,
+STORE, 140400199204864, 140400199340031,
+STORE, 140400199340032, 140400266313727,
+STORE, 140400627003392, 140400643788799,
+SNULL, 140400316641280, 140400325029887,
+STORE, 140400325029888, 140400333422591,
+STORE, 140400316641280, 140400325029887,
+SNULL, 140400325033983, 140400333422591,
+STORE, 140400325029888, 140400325033983,
+STORE, 140400325033984, 140400333422591,
+SNULL, 140400627003392, 140400635396095,
+STORE, 140400635396096, 140400643788799,
+STORE, 140400627003392, 140400635396095,
+SNULL, 140400635400191, 140400643788799,
+STORE, 140400635396096, 140400635400191,
+STORE, 140400635400192, 140400643788799,
+SNULL, 140400434073599, 140400442462207,
+STORE, 140400434069504, 140400434073599,
+STORE, 140400434073600, 140400442462207,
+STORE, 140400618610688, 140400635396095,
+STORE, 140400610217984, 140400635396095,
+SNULL, 140400954159104, 140400962547711,
+STORE, 140400962547712, 140400987725823,
+STORE, 140400954159104, 140400962547711,
+SNULL, 140400962551807, 140400987725823,
+STORE, 140400962547712, 140400962551807,
+STORE, 140400962551808, 140400987725823,
+SNULL, 140400299855872, 140400308244479,
+STORE, 140400308244480, 140400316637183,
+STORE, 140400299855872, 140400308244479,
+SNULL, 140400308248575, 140400316637183,
+STORE, 140400308244480, 140400308248575,
+STORE, 140400308248576, 140400316637183,
+STORE, 140400601825280, 140400635396095,
+SNULL, 140400601829375, 140400635396095,
+STORE, 140400601825280, 140400601829375,
+STORE, 140400601829376, 140400635396095,
+STORE, 140400576679936, 140400593465343,
+SNULL, 140400576684031, 140400593465343,
+STORE, 140400576679936, 140400576684031,
+STORE, 140400576684032, 140400593465343,
+SNULL, 140400643792896, 140400652181503,
+STORE, 140400652181504, 140400668966911,
+STORE, 140400643792896, 140400652181503,
+SNULL, 140400652185599, 140400668966911,
+STORE, 140400652181504, 140400652185599,
+STORE, 140400652185600, 140400668966911,
+STORE, 140399595225088, 140399796551679,
+SNULL, 140399662333951, 140399796551679,
+STORE, 140399595225088, 140399662333951,
+STORE, 140399662333952, 140399796551679,
+SNULL, 140399662333952, 140399729442815,
+STORE, 140399729442816, 140399796551679,
+STORE, 140399662333952, 140399729442815,
+ERASE, 140399662333952, 140399729442815,
+SNULL, 140399863795711, 140399930769407,
+STORE, 140399863660544, 140399863795711,
+STORE, 140399863795712, 140399930769407,
+STORE, 140400568287232, 140400576679935,
+SNULL, 140400568291327, 140400576679935,
+STORE, 140400568287232, 140400568291327,
+STORE, 140400568291328, 140400576679935,
+SNULL, 140400467775487, 140400534749183,
+STORE, 140400467640320, 140400467775487,
+STORE, 140400467775488, 140400534749183,
+SNULL, 140399729577983, 140399796551679,
+STORE, 140399729442816, 140399729577983,
+STORE, 140399729577984, 140399796551679,
+SNULL, 140400601829376, 140400627003391,
+STORE, 140400627003392, 140400635396095,
+STORE, 140400601829376, 140400627003391,
+SNULL, 140400627007487, 140400635396095,
+STORE, 140400627003392, 140400627007487,
+STORE, 140400627007488, 140400635396095,
+STORE, 140400559894528, 140400568287231,
+STORE, 140400551501824, 140400568287231,
+STORE, 140400543109120, 140400568287231,
+STORE, 140400459247616, 140400467640319,
+STORE, 140400442466304, 140400467640319,
+SNULL, 140399595360255, 140399662333951,
+STORE, 140399595225088, 140399595360255,
+STORE, 140399595360256, 140399662333951,
+SNULL, 140400962551808, 140400970940415,
+STORE, 140400970940416, 140400987725823,
+STORE, 140400962551808, 140400970940415,
+SNULL, 140400970944511, 140400987725823,
+STORE, 140400970940416, 140400970944511,
+STORE, 140400970944512, 140400987725823,
+SNULL, 140400652185600, 140400660574207,
+STORE, 140400660574208, 140400668966911,
+STORE, 140400652185600, 140400660574207,
+SNULL, 140400660578303, 140400668966911,
+STORE, 140400660574208, 140400660578303,
+STORE, 140400660578304, 140400668966911,
+SNULL, 140400576684032, 140400585072639,
+STORE, 140400585072640, 140400593465343,
+STORE, 140400576684032, 140400585072639,
+SNULL, 140400585076735, 140400593465343,
+STORE, 140400585072640, 140400585076735,
+STORE, 140400585076736, 140400593465343,
+STORE, 140400425676800, 140400434069503,
+STORE, 140400417284096, 140400434069503,
+STORE, 140400408891392, 140400434069503,
+SNULL, 140400408891392, 140400417284095,
+STORE, 140400417284096, 140400434069503,
+STORE, 140400408891392, 140400417284095,
+SNULL, 140400417288191, 140400434069503,
+STORE, 140400417284096, 140400417288191,
+STORE, 140400417288192, 140400434069503,
+STORE, 140400283066368, 140400291459071,
+SNULL, 140400601829376, 140400618610687,
+STORE, 140400618610688, 140400627003391,
+STORE, 140400601829376, 140400618610687,
+SNULL, 140400618614783, 140400627003391,
+STORE, 140400618610688, 140400618614783,
+STORE, 140400618614784, 140400627003391,
+SNULL, 140400601829376, 140400610217983,
+STORE, 140400610217984, 140400618610687,
+STORE, 140400601829376, 140400610217983,
+SNULL, 140400610222079, 140400618610687,
+STORE, 140400610217984, 140400610222079,
+STORE, 140400610222080, 140400618610687,
+STORE, 140400274673664, 140400291459071,
+STORE, 140400190812160, 140400199204863,
+STORE, 140400182419456, 140400199204863,
+SNULL, 140400442466304, 140400450854911,
+STORE, 140400450854912, 140400467640319,
+STORE, 140400442466304, 140400450854911,
+SNULL, 140400450859007, 140400467640319,
+STORE, 140400450854912, 140400450859007,
+STORE, 140400450859008, 140400467640319,
+SNULL, 140400543109120, 140400559894527,
+STORE, 140400559894528, 140400568287231,
+STORE, 140400543109120, 140400559894527,
+SNULL, 140400559898623, 140400568287231,
+STORE, 140400559894528, 140400559898623,
+STORE, 140400559898624, 140400568287231,
+SNULL, 140400450859008, 140400459247615,
+STORE, 140400459247616, 140400467640319,
+STORE, 140400450859008, 140400459247615,
+SNULL, 140400459251711, 140400467640319,
+STORE, 140400459247616, 140400459251711,
+STORE, 140400459251712, 140400467640319,
+SNULL, 140400543113215, 140400559894527,
+STORE, 140400543109120, 140400543113215,
+STORE, 140400543113216, 140400559894527,
+SNULL, 140400970944512, 140400979333119,
+STORE, 140400979333120, 140400987725823,
+STORE, 140400970944512, 140400979333119,
+SNULL, 140400979337215, 140400987725823,
+STORE, 140400979333120, 140400979337215,
+STORE, 140400979337216, 140400987725823,
+STORE, 140400174026752, 140400199204863,
+SNULL, 140400174030847, 140400199204863,
+STORE, 140400174026752, 140400174030847,
+STORE, 140400174030848, 140400199204863,
+SNULL, 140400274673664, 140400283066367,
+STORE, 140400283066368, 140400291459071,
+STORE, 140400274673664, 140400283066367,
+SNULL, 140400283070463, 140400291459071,
+STORE, 140400283066368, 140400283070463,
+STORE, 140400283070464, 140400291459071,
+STORE, 140400165634048, 140400174026751,
+SNULL, 140400165638143, 140400174026751,
+STORE, 140400165634048, 140400165638143,
+STORE, 140400165638144, 140400174026751,
+SNULL, 140400174030848, 140400182419455,
+STORE, 140400182419456, 140400199204863,
+STORE, 140400174030848, 140400182419455,
+SNULL, 140400182423551, 140400199204863,
+STORE, 140400182419456, 140400182423551,
+STORE, 140400182423552, 140400199204863,
+SNULL, 140400182423552, 140400190812159,
+STORE, 140400190812160, 140400199204863,
+STORE, 140400182423552, 140400190812159,
+SNULL, 140400190816255, 140400199204863,
+STORE, 140400190812160, 140400190816255,
+STORE, 140400190816256, 140400199204863,
+STORE, 140400157241344, 140400165634047,
+SNULL, 140400157245439, 140400165634047,
+STORE, 140400157241344, 140400157245439,
+STORE, 140400157245440, 140400165634047,
+SNULL, 140400408895487, 140400417284095,
+STORE, 140400408891392, 140400408895487,
+STORE, 140400408895488, 140400417284095,
+SNULL, 140400417288192, 140400425676799,
+STORE, 140400425676800, 140400434069503,
+STORE, 140400417288192, 140400425676799,
+SNULL, 140400425680895, 140400434069503,
+STORE, 140400425676800, 140400425680895,
+STORE, 140400425680896, 140400434069503,
+STORE, 140400148848640, 140400157241343,
+SNULL, 140400148852735, 140400157241343,
+STORE, 140400148848640, 140400148852735,
+STORE, 140400148852736, 140400157241343,
+SNULL, 140400543113216, 140400551501823,
+STORE, 140400551501824, 140400559894527,
+STORE, 140400543113216, 140400551501823,
+SNULL, 140400551505919, 140400559894527,
+STORE, 140400551501824, 140400551505919,
+STORE, 140400551505920, 140400559894527,
+STORE, 140400140455936, 140400148848639,
+STORE, 140400048201728, 140400056594431,
+SNULL, 140400140460031, 140400148848639,
+STORE, 140400140455936, 140400140460031,
+STORE, 140400140460032, 140400148848639,
+STORE, 140400039809024, 140400056594431,
+SNULL, 140400039813119, 140400056594431,
+STORE, 140400039809024, 140400039813119,
+STORE, 140400039813120, 140400056594431,
+STORE, 140400031416320, 140400039809023,
+STORE, 140400023023616, 140400039809023,
+SNULL, 140400274677759, 140400283066367,
+STORE, 140400274673664, 140400274677759,
+STORE, 140400274677760, 140400283066367,
+STORE, 140400014630912, 140400039809023,
+STORE, 140400006238208, 140400039809023,
+STORE, 140399997845504, 140400039809023,
+SNULL, 140399997849599, 140400039809023,
+STORE, 140399997845504, 140399997849599,
+STORE, 140399997849600, 140400039809023,
+STORE, 140399989452800, 140399997845503,
+SNULL, 140399989456895, 140399997845503,
+STORE, 140399989452800, 140399989456895,
+STORE, 140399989456896, 140399997845503,
+STORE, 140399981060096, 140399989452799,
+SNULL, 140399981064191, 140399989452799,
+STORE, 140399981060096, 140399981064191,
+STORE, 140399981064192, 140399989452799,
+STORE, 140399972667392, 140399981060095,
+STORE, 140399964274688, 140399981060095,
+SNULL, 140399964278783, 140399981060095,
+STORE, 140399964274688, 140399964278783,
+STORE, 140399964278784, 140399981060095,
+SNULL, 140400039813120, 140400048201727,
+STORE, 140400048201728, 140400056594431,
+STORE, 140400039813120, 140400048201727,
+SNULL, 140400048205823, 140400056594431,
+STORE, 140400048201728, 140400048205823,
+STORE, 140400048205824, 140400056594431,
+SNULL, 140399997849600, 140400031416319,
+STORE, 140400031416320, 140400039809023,
+STORE, 140399997849600, 140400031416319,
+SNULL, 140400031420415, 140400039809023,
+STORE, 140400031416320, 140400031420415,
+STORE, 140400031420416, 140400039809023,
+STORE, 140399955881984, 140399964274687,
+SNULL, 140399955886079, 140399964274687,
+STORE, 140399955881984, 140399955886079,
+STORE, 140399955886080, 140399964274687,
+STORE, 140399947489280, 140399955881983,
+STORE, 140399939096576, 140399955881983,
+STORE, 140399855267840, 140399863660543,
+SNULL, 140399939100671, 140399955881983,
+STORE, 140399939096576, 140399939100671,
+STORE, 140399939100672, 140399955881983,
+SNULL, 140399997849600, 140400014630911,
+STORE, 140400014630912, 140400031416319,
+STORE, 140399997849600, 140400014630911,
+SNULL, 140400014635007, 140400031416319,
+STORE, 140400014630912, 140400014635007,
+STORE, 140400014635008, 140400031416319,
+SNULL, 140400014635008, 140400023023615,
+STORE, 140400023023616, 140400031416319,
+STORE, 140400014635008, 140400023023615,
+SNULL, 140400023027711, 140400031416319,
+STORE, 140400023023616, 140400023027711,
+STORE, 140400023027712, 140400031416319,
+SNULL, 140399997849600, 140400006238207,
+STORE, 140400006238208, 140400014630911,
+STORE, 140399997849600, 140400006238207,
+SNULL, 140400006242303, 140400014630911,
+STORE, 140400006238208, 140400006242303,
+STORE, 140400006242304, 140400014630911,
+STORE, 140399846875136, 140399863660543,
+STORE, 140399838482432, 140399863660543,
+SNULL, 140399838486527, 140399863660543,
+STORE, 140399838482432, 140399838486527,
+STORE, 140399838486528, 140399863660543,
+SNULL, 140399939100672, 140399947489279,
+STORE, 140399947489280, 140399955881983,
+STORE, 140399939100672, 140399947489279,
+SNULL, 140399947493375, 140399955881983,
+STORE, 140399947489280, 140399947493375,
+STORE, 140399947493376, 140399955881983,
+SNULL, 140399964278784, 140399972667391,
+STORE, 140399972667392, 140399981060095,
+STORE, 140399964278784, 140399972667391,
+SNULL, 140399972671487, 140399981060095,
+STORE, 140399972667392, 140399972671487,
+STORE, 140399972671488, 140399981060095,
+SNULL, 140399838486528, 140399855267839,
+STORE, 140399855267840, 140399863660543,
+STORE, 140399838486528, 140399855267839,
+SNULL, 140399855271935, 140399863660543,
+STORE, 140399855267840, 140399855271935,
+STORE, 140399855271936, 140399863660543,
+STORE, 140399830089728, 140399838482431,
+SNULL, 140399830093823, 140399838482431,
+STORE, 140399830089728, 140399830093823,
+STORE, 140399830093824, 140399838482431,
+STORE, 140399821697024, 140399830089727,
+SNULL, 140399821701119, 140399830089727,
+STORE, 140399821697024, 140399821701119,
+STORE, 140399821701120, 140399830089727,
+SNULL, 140399838486528, 140399846875135,
+STORE, 140399846875136, 140399855267839,
+STORE, 140399838486528, 140399846875135,
+SNULL, 140399846879231, 140399855267839,
+STORE, 140399846875136, 140399846879231,
+STORE, 140399846879232, 140399855267839,
+STORE, 140399813304320, 140399821697023,
+STORE, 140399804911616, 140399821697023,
+SNULL, 140399804915711, 140399821697023,
+STORE, 140399804911616, 140399804915711,
+STORE, 140399804915712, 140399821697023,
+STORE, 140399721050112, 140399729442815,
+SNULL, 140399804915712, 140399813304319,
+STORE, 140399813304320, 140399821697023,
+STORE, 140399804915712, 140399813304319,
+SNULL, 140399813308415, 140399821697023,
+STORE, 140399813304320, 140399813308415,
+STORE, 140399813308416, 140399821697023,
+SNULL, 140399721054207, 140399729442815,
+STORE, 140399721050112, 140399721054207,
+STORE, 140399721054208, 140399729442815,
+STORE, 140401467105280, 140401467133951,
+STORE, 140401279115264, 140401281306623,
+SNULL, 140401279115264, 140401279205375,
+STORE, 140401279205376, 140401281306623,
+STORE, 140401279115264, 140401279205375,
+SNULL, 140401281298431, 140401281306623,
+STORE, 140401279205376, 140401281298431,
+STORE, 140401281298432, 140401281306623,
+ERASE, 140401281298432, 140401281306623,
+STORE, 140401281298432, 140401281306623,
+SNULL, 140401281302527, 140401281306623,
+STORE, 140401281298432, 140401281302527,
+STORE, 140401281302528, 140401281306623,
+ERASE, 140401467105280, 140401467133951,
+ERASE, 140400056594432, 140400056598527,
+ERASE, 140400056598528, 140400064987135,
+ERASE, 140400635396096, 140400635400191,
+ERASE, 140400635400192, 140400643788799,
+ERASE, 140400408891392, 140400408895487,
+ERASE, 140400408895488, 140400417284095,
+ERASE, 140400299851776, 140400299855871,
+ERASE, 140400299855872, 140400308244479,
+ERASE, 140400627003392, 140400627007487,
+ERASE, 140400627007488, 140400635396095,
+ERASE, 140400954155008, 140400954159103,
+ERASE, 140400954159104, 140400962547711,
+ERASE, 140400291459072, 140400291463167,
+ERASE, 140400291463168, 140400299851775,
+ERASE, 140400643788800, 140400643792895,
+ERASE, 140400643792896, 140400652181503,
+ERASE, 140400325029888, 140400325033983,
+ERASE, 140400325033984, 140400333422591,
+ERASE, 140400610217984, 140400610222079,
+ERASE, 140400610222080, 140400618610687,
+ERASE, 140400190812160, 140400190816255,
+ERASE, 140400190816256, 140400199204863,
+ERASE, 140399964274688, 140399964278783,
+ERASE, 140399964278784, 140399972667391,
+ERASE, 140400945762304, 140400945766399,
+ERASE, 140400945766400, 140400954155007,
+ERASE, 140400568287232, 140400568291327,
+ERASE, 140400568291328, 140400576679935,
+ERASE, 140399972667392, 140399972671487,
+ERASE, 140399972671488, 140399981060095,
+ERASE, 140400962547712, 140400962551807,
+ERASE, 140400962551808, 140400970940415,
+ERASE, 140400987725824, 140400987729919,
+ERASE, 140400987729920, 140400996118527,
+ERASE, 140400652181504, 140400652185599,
+ERASE, 140400652185600, 140400660574207,
+ERASE, 140400450854912, 140400450859007,
+ERASE, 140400450859008, 140400459247615,
+ERASE, 140400031416320, 140400031420415,
+ERASE, 140400031420416, 140400039809023,
+ERASE, 140400308244480, 140400308248575,
+ERASE, 140400308248576, 140400316637183,
+ERASE, 140400434069504, 140400434073599,
+ERASE, 140400434073600, 140400442462207,
+ERASE, 140400543109120, 140400543113215,
+ERASE, 140400543113216, 140400551501823,
+ERASE, 140400023023616, 140400023027711,
+ERASE, 140400023027712, 140400031416319,
+ERASE, 140399813304320, 140399813308415,
+ERASE, 140399813308416, 140399821697023,
+ERASE, 140400316637184, 140400316641279,
+ERASE, 140400316641280, 140400325029887,
+ERASE, 140400585072640, 140400585076735,
+ERASE, 140400585076736, 140400593465343,
+ERASE, 140400148848640, 140400148852735,
+ERASE, 140400148852736, 140400157241343,
+ERASE, 140399955881984, 140399955886079,
+ERASE, 140399955886080, 140399964274687,
+ERASE, 140399821697024, 140399821701119,
+ERASE, 140399821701120, 140399830089727,
+ERASE, 140400601825280, 140400601829375,
+ERASE, 140400601829376, 140400610217983,
+ERASE, 140400979333120, 140400979337215,
+ERASE, 140400979337216, 140400987725823,
+ERASE, 140399997845504, 140399997849599,
+ERASE, 140399997849600, 140400006238207,
+ERASE, 140400459247616, 140400459251711,
+ERASE, 140400459251712, 140400467640319,
+ERASE, 140400551501824, 140400551505919,
+ERASE, 140400551505920, 140400559894527,
+ERASE, 140399939096576, 140399939100671,
+ERASE, 140399939100672, 140399947489279,
+ERASE, 140400442462208, 140400442466303,
+ERASE, 140400442466304, 140400450854911,
+ERASE, 140400576679936, 140400576684031,
+ERASE, 140400576684032, 140400585072639,
+ERASE, 140400559894528, 140400559898623,
+ERASE, 140400559898624, 140400568287231,
+ERASE, 140400417284096, 140400417288191,
+ERASE, 140400417288192, 140400425676799,
+ERASE, 140400283066368, 140400283070463,
+ERASE, 140400283070464, 140400291459071,
+ };
+ static const unsigned long set33[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140734562918400, 140737488351231,
+SNULL, 140734562922495, 140737488351231,
+STORE, 140734562918400, 140734562922495,
+STORE, 140734562787328, 140734562922495,
+STORE, 94133878984704, 94133881237503,
+SNULL, 94133879115775, 94133881237503,
+STORE, 94133878984704, 94133879115775,
+STORE, 94133879115776, 94133881237503,
+ERASE, 94133879115776, 94133881237503,
+STORE, 94133881208832, 94133881217023,
+STORE, 94133881217024, 94133881237503,
+STORE, 140583654043648, 140583656296447,
+SNULL, 140583654187007, 140583656296447,
+STORE, 140583654043648, 140583654187007,
+STORE, 140583654187008, 140583656296447,
+ERASE, 140583654187008, 140583656296447,
+STORE, 140583656284160, 140583656292351,
+STORE, 140583656292352, 140583656296447,
+STORE, 140734564319232, 140734564323327,
+STORE, 140734564306944, 140734564319231,
+STORE, 140583656255488, 140583656284159,
+STORE, 140583656247296, 140583656255487,
+STORE, 140583651827712, 140583654043647,
+SNULL, 140583651827712, 140583651926015,
+STORE, 140583651926016, 140583654043647,
+STORE, 140583651827712, 140583651926015,
+SNULL, 140583654019071, 140583654043647,
+STORE, 140583651926016, 140583654019071,
+STORE, 140583654019072, 140583654043647,
+SNULL, 140583654019072, 140583654027263,
+STORE, 140583654027264, 140583654043647,
+STORE, 140583654019072, 140583654027263,
+ERASE, 140583654019072, 140583654027263,
+STORE, 140583654019072, 140583654027263,
+ERASE, 140583654027264, 140583654043647,
+STORE, 140583654027264, 140583654043647,
+STORE, 140583648030720, 140583651827711,
+SNULL, 140583648030720, 140583649689599,
+STORE, 140583649689600, 140583651827711,
+STORE, 140583648030720, 140583649689599,
+SNULL, 140583651786751, 140583651827711,
+STORE, 140583649689600, 140583651786751,
+STORE, 140583651786752, 140583651827711,
+SNULL, 140583651786752, 140583651811327,
+STORE, 140583651811328, 140583651827711,
+STORE, 140583651786752, 140583651811327,
+ERASE, 140583651786752, 140583651811327,
+STORE, 140583651786752, 140583651811327,
+ERASE, 140583651811328, 140583651827711,
+STORE, 140583651811328, 140583651827711,
+STORE, 140583656239104, 140583656255487,
+SNULL, 140583651803135, 140583651811327,
+STORE, 140583651786752, 140583651803135,
+STORE, 140583651803136, 140583651811327,
+SNULL, 140583654023167, 140583654027263,
+STORE, 140583654019072, 140583654023167,
+STORE, 140583654023168, 140583654027263,
+SNULL, 94133881212927, 94133881217023,
+STORE, 94133881208832, 94133881212927,
+STORE, 94133881212928, 94133881217023,
+SNULL, 140583656288255, 140583656292351,
+STORE, 140583656284160, 140583656288255,
+STORE, 140583656288256, 140583656292351,
+ERASE, 140583656255488, 140583656284159,
+STORE, 94133881733120, 94133881868287,
+STORE, 140583639638016, 140583648030719,
+SNULL, 140583639642111, 140583648030719,
+STORE, 140583639638016, 140583639642111,
+STORE, 140583639642112, 140583648030719,
+STORE, 140583631245312, 140583639638015,
+STORE, 140583497027584, 140583631245311,
+SNULL, 140583497027584, 140583540621311,
+STORE, 140583540621312, 140583631245311,
+STORE, 140583497027584, 140583540621311,
+ERASE, 140583497027584, 140583540621311,
+SNULL, 140583607730175, 140583631245311,
+STORE, 140583540621312, 140583607730175,
+STORE, 140583607730176, 140583631245311,
+ERASE, 140583607730176, 140583631245311,
+SNULL, 140583540756479, 140583607730175,
+STORE, 140583540621312, 140583540756479,
+STORE, 140583540756480, 140583607730175,
+SNULL, 140583631249407, 140583639638015,
+STORE, 140583631245312, 140583631249407,
+STORE, 140583631249408, 140583639638015,
+STORE, 140583622852608, 140583631245311,
+SNULL, 140583622856703, 140583631245311,
+STORE, 140583622852608, 140583622856703,
+STORE, 140583622856704, 140583631245311,
+STORE, 140583614459904, 140583622852607,
+SNULL, 140583614463999, 140583622852607,
+STORE, 140583614459904, 140583614463999,
+STORE, 140583614464000, 140583622852607,
+STORE, 140583532228608, 140583540621311,
+SNULL, 140583532232703, 140583540621311,
+STORE, 140583532228608, 140583532232703,
+STORE, 140583532232704, 140583540621311,
+STORE, 140583523835904, 140583532228607,
+STORE, 140583515443200, 140583532228607,
+STORE, 140583507050496, 140583532228607,
+STORE, 140583372832768, 140583507050495,
+STORE, 140583364440064, 140583372832767,
+STORE, 140583230222336, 140583364440063,
+STORE, 140583096004608, 140583364440063,
+SNULL, 140583230222335, 140583364440063,
+STORE, 140583096004608, 140583230222335,
+STORE, 140583230222336, 140583364440063,
+SNULL, 140583230222336, 140583272185855,
+STORE, 140583272185856, 140583364440063,
+STORE, 140583230222336, 140583272185855,
+ERASE, 140583230222336, 140583272185855,
+STORE, 140582961786880, 140583230222335,
+SNULL, 140583372832768, 140583406403583,
+STORE, 140583406403584, 140583507050495,
+STORE, 140583372832768, 140583406403583,
+ERASE, 140583372832768, 140583406403583,
+SNULL, 140583473512447, 140583507050495,
+STORE, 140583406403584, 140583473512447,
+STORE, 140583473512448, 140583507050495,
+ERASE, 140583473512448, 140583507050495,
+SNULL, 140583096004607, 140583230222335,
+STORE, 140582961786880, 140583096004607,
+STORE, 140583096004608, 140583230222335,
+SNULL, 140583096004608, 140583137968127,
+STORE, 140583137968128, 140583230222335,
+STORE, 140583096004608, 140583137968127,
+ERASE, 140583096004608, 140583137968127,
+SNULL, 140583339294719, 140583364440063,
+STORE, 140583272185856, 140583339294719,
+STORE, 140583339294720, 140583364440063,
+ERASE, 140583339294720, 140583364440063,
+SNULL, 140583272321023, 140583339294719,
+STORE, 140583272185856, 140583272321023,
+STORE, 140583272321024, 140583339294719,
+SNULL, 140582961786880, 140583003750399,
+STORE, 140583003750400, 140583096004607,
+STORE, 140582961786880, 140583003750399,
+ERASE, 140582961786880, 140583003750399,
+ };
+
+ static const unsigned long set34[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140731327180800, 140737488351231,
+SNULL, 140731327184895, 140737488351231,
+STORE, 140731327180800, 140731327184895,
+STORE, 140731327049728, 140731327184895,
+STORE, 94632924487680, 94632926740479,
+SNULL, 94632924618751, 94632926740479,
+STORE, 94632924487680, 94632924618751,
+STORE, 94632924618752, 94632926740479,
+ERASE, 94632924618752, 94632926740479,
+STORE, 94632926711808, 94632926719999,
+STORE, 94632926720000, 94632926740479,
+STORE, 140012544888832, 140012547141631,
+SNULL, 140012545032191, 140012547141631,
+STORE, 140012544888832, 140012545032191,
+STORE, 140012545032192, 140012547141631,
+ERASE, 140012545032192, 140012547141631,
+STORE, 140012547129344, 140012547137535,
+STORE, 140012547137536, 140012547141631,
+STORE, 140731327725568, 140731327729663,
+STORE, 140731327713280, 140731327725567,
+STORE, 140012547100672, 140012547129343,
+STORE, 140012547092480, 140012547100671,
+STORE, 140012542672896, 140012544888831,
+SNULL, 140012542672896, 140012542771199,
+STORE, 140012542771200, 140012544888831,
+STORE, 140012542672896, 140012542771199,
+SNULL, 140012544864255, 140012544888831,
+STORE, 140012542771200, 140012544864255,
+STORE, 140012544864256, 140012544888831,
+SNULL, 140012544864256, 140012544872447,
+STORE, 140012544872448, 140012544888831,
+STORE, 140012544864256, 140012544872447,
+ERASE, 140012544864256, 140012544872447,
+STORE, 140012544864256, 140012544872447,
+ERASE, 140012544872448, 140012544888831,
+STORE, 140012544872448, 140012544888831,
+STORE, 140012538875904, 140012542672895,
+SNULL, 140012538875904, 140012540534783,
+STORE, 140012540534784, 140012542672895,
+STORE, 140012538875904, 140012540534783,
+SNULL, 140012542631935, 140012542672895,
+STORE, 140012540534784, 140012542631935,
+STORE, 140012542631936, 140012542672895,
+SNULL, 140012542631936, 140012542656511,
+STORE, 140012542656512, 140012542672895,
+STORE, 140012542631936, 140012542656511,
+ERASE, 140012542631936, 140012542656511,
+STORE, 140012542631936, 140012542656511,
+ERASE, 140012542656512, 140012542672895,
+STORE, 140012542656512, 140012542672895,
+STORE, 140012547084288, 140012547100671,
+SNULL, 140012542648319, 140012542656511,
+STORE, 140012542631936, 140012542648319,
+STORE, 140012542648320, 140012542656511,
+SNULL, 140012544868351, 140012544872447,
+STORE, 140012544864256, 140012544868351,
+STORE, 140012544868352, 140012544872447,
+SNULL, 94632926715903, 94632926719999,
+STORE, 94632926711808, 94632926715903,
+STORE, 94632926715904, 94632926719999,
+SNULL, 140012547133439, 140012547137535,
+STORE, 140012547129344, 140012547133439,
+STORE, 140012547133440, 140012547137535,
+ERASE, 140012547100672, 140012547129343,
+STORE, 94632939606016, 94632939741183,
+STORE, 140012530483200, 140012538875903,
+SNULL, 140012530487295, 140012538875903,
+STORE, 140012530483200, 140012530487295,
+STORE, 140012530487296, 140012538875903,
+STORE, 140012522090496, 140012530483199,
+STORE, 140012387872768, 140012522090495,
+SNULL, 140012387872768, 140012444188671,
+STORE, 140012444188672, 140012522090495,
+STORE, 140012387872768, 140012444188671,
+ERASE, 140012387872768, 140012444188671,
+SNULL, 140012511297535, 140012522090495,
+STORE, 140012444188672, 140012511297535,
+STORE, 140012511297536, 140012522090495,
+ERASE, 140012511297536, 140012522090495,
+SNULL, 140012444323839, 140012511297535,
+STORE, 140012444188672, 140012444323839,
+STORE, 140012444323840, 140012511297535,
+SNULL, 140012522094591, 140012530483199,
+STORE, 140012522090496, 140012522094591,
+STORE, 140012522094592, 140012530483199,
+STORE, 140012513697792, 140012522090495,
+SNULL, 140012513701887, 140012522090495,
+STORE, 140012513697792, 140012513701887,
+STORE, 140012513701888, 140012522090495,
+STORE, 140012435795968, 140012444188671,
+SNULL, 140012435800063, 140012444188671,
+STORE, 140012435795968, 140012435800063,
+STORE, 140012435800064, 140012444188671,
+STORE, 140012427403264, 140012435795967,
+SNULL, 140012427407359, 140012435795967,
+STORE, 140012427403264, 140012427407359,
+STORE, 140012427407360, 140012435795967,
+STORE, 140012419010560, 140012427403263,
+STORE, 140012410617856, 140012427403263,
+STORE, 140012276400128, 140012410617855,
+STORE, 140012268007424, 140012276400127,
+STORE, 140012133789696, 140012268007423,
+SNULL, 140012133789696, 140012175753215,
+STORE, 140012175753216, 140012268007423,
+STORE, 140012133789696, 140012175753215,
+ERASE, 140012133789696, 140012175753215,
+STORE, 140012041535488, 140012268007423,
+SNULL, 140012108644351, 140012268007423,
+STORE, 140012041535488, 140012108644351,
+STORE, 140012108644352, 140012268007423,
+SNULL, 140012108644352, 140012175753215,
+STORE, 140012175753216, 140012268007423,
+STORE, 140012108644352, 140012175753215,
+ERASE, 140012108644352, 140012175753215,
+SNULL, 140012276400128, 140012309970943,
+STORE, 140012309970944, 140012410617855,
+STORE, 140012276400128, 140012309970943,
+ERASE, 140012276400128, 140012309970943,
+STORE, 140012301578240, 140012309970943,
+STORE, 140012041535488, 140012268007423,
+SNULL, 140012242862079, 140012268007423,
+STORE, 140012041535488, 140012242862079,
+STORE, 140012242862080, 140012268007423,
+ERASE, 140012242862080, 140012268007423,
+SNULL, 140012041670655, 140012242862079,
+STORE, 140012041535488, 140012041670655,
+STORE, 140012041670656, 140012242862079,
+SNULL, 140012041670656, 140012108644351,
+STORE, 140012108644352, 140012242862079,
+STORE, 140012041670656, 140012108644351,
+SNULL, 140012108779519, 140012242862079,
+STORE, 140012108644352, 140012108779519,
+STORE, 140012108779520, 140012242862079,
+SNULL, 140012377079807, 140012410617855,
+STORE, 140012309970944, 140012377079807,
+STORE, 140012377079808, 140012410617855,
+ERASE, 140012377079808, 140012410617855,
+SNULL, 140012310106111, 140012377079807,
+STORE, 140012309970944, 140012310106111,
+STORE, 140012310106112, 140012377079807,
+SNULL, 140012410621951, 140012427403263,
+STORE, 140012410617856, 140012410621951,
+STORE, 140012410621952, 140012427403263,
+SNULL, 140012108779520, 140012175753215,
+STORE, 140012175753216, 140012242862079,
+STORE, 140012108779520, 140012175753215,
+SNULL, 140012175888383, 140012242862079,
+STORE, 140012175753216, 140012175888383,
+STORE, 140012175888384, 140012242862079,
+SNULL, 140012301582335, 140012309970943,
+STORE, 140012301578240, 140012301582335,
+STORE, 140012301582336, 140012309970943,
+SNULL, 140012410621952, 140012419010559,
+STORE, 140012419010560, 140012427403263,
+STORE, 140012410621952, 140012419010559,
+SNULL, 140012419014655, 140012427403263,
+STORE, 140012419010560, 140012419014655,
+STORE, 140012419014656, 140012427403263,
+SNULL, 140012268011519, 140012276400127,
+STORE, 140012268007424, 140012268011519,
+STORE, 140012268011520, 140012276400127,
+STORE, 140012402225152, 140012410617855,
+STORE, 140012393832448, 140012410617855,
+SNULL, 140012393832448, 140012402225151,
+STORE, 140012402225152, 140012410617855,
+STORE, 140012393832448, 140012402225151,
+SNULL, 140012402229247, 140012410617855,
+STORE, 140012402225152, 140012402229247,
+STORE, 140012402229248, 140012410617855,
+STORE, 140012385439744, 140012402225151,
+SNULL, 140012385439744, 140012393832447,
+STORE, 140012393832448, 140012402225151,
+STORE, 140012385439744, 140012393832447,
+SNULL, 140012393836543, 140012402225151,
+STORE, 140012393832448, 140012393836543,
+STORE, 140012393836544, 140012402225151,
+STORE, 140012293185536, 140012301578239,
+STORE, 140012284792832, 140012301578239,
+SNULL, 140012284792832, 140012293185535,
+STORE, 140012293185536, 140012301578239,
+STORE, 140012284792832, 140012293185535,
+SNULL, 140012293189631, 140012301578239,
+STORE, 140012293185536, 140012293189631,
+STORE, 140012293189632, 140012301578239,
+STORE, 140012268011520, 140012284792831,
+SNULL, 140012385443839, 140012393832447,
+STORE, 140012385439744, 140012385443839,
+STORE, 140012385443840, 140012393832447,
+STORE, 140012259614720, 140012268007423,
+SNULL, 140012259618815, 140012268007423,
+STORE, 140012259614720, 140012259618815,
+STORE, 140012259618816, 140012268007423,
+STORE, 140012251222016, 140012259614719,
+SNULL, 140012251226111, 140012259614719,
+STORE, 140012251222016, 140012251226111,
+STORE, 140012251226112, 140012259614719,
+SNULL, 140012284796927, 140012293185535,
+STORE, 140012284792832, 140012284796927,
+STORE, 140012284796928, 140012293185535,
+SNULL, 140012268011520, 140012276400127,
+STORE, 140012276400128, 140012284792831,
+STORE, 140012268011520, 140012276400127,
+SNULL, 140012276404223, 140012284792831,
+STORE, 140012276400128, 140012276404223,
+STORE, 140012276404224, 140012284792831,
+STORE, 140012033142784, 140012041535487,
+SNULL, 140012033146879, 140012041535487,
+STORE, 140012033142784, 140012033146879,
+STORE, 140012033146880, 140012041535487,
+STORE, 140012024750080, 140012033142783,
+STORE, 140012016357376, 140012033142783,
+SNULL, 140012016357376, 140012024750079,
+STORE, 140012024750080, 140012033142783,
+STORE, 140012016357376, 140012024750079,
+SNULL, 140012024754175, 140012033142783,
+STORE, 140012024750080, 140012024754175,
+STORE, 140012024754176, 140012033142783,
+SNULL, 140012016361471, 140012024750079,
+STORE, 140012016357376, 140012016361471,
+STORE, 140012016361472, 140012024750079,
+STORE, 140012007964672, 140012016357375,
+SNULL, 140012007968767, 140012016357375,
+STORE, 140012007964672, 140012007968767,
+STORE, 140012007968768, 140012016357375,
+STORE, 140011999571968, 140012007964671,
+STORE, 140011991179264, 140012007964671,
+STORE, 140011856961536, 140011991179263,
+STORE, 140011848568832, 140011856961535,
+STORE, 140011714351104, 140011848568831,
+SNULL, 140011714351104, 140011773100031,
+STORE, 140011773100032, 140011848568831,
+STORE, 140011714351104, 140011773100031,
+ERASE, 140011714351104, 140011773100031,
+STORE, 140011764707328, 140011773100031,
+STORE, 140011756314624, 140011773100031,
+STORE, 140011622096896, 140011756314623,
+STORE, 140011613704192, 140011622096895,
+STORE, 140011479486464, 140011613704191,
+STORE, 140011471093760, 140011479486463,
+SNULL, 140011479486464, 140011504664575,
+STORE, 140011504664576, 140011613704191,
+STORE, 140011479486464, 140011504664575,
+ERASE, 140011479486464, 140011504664575,
+STORE, 140011496271872, 140011504664575,
+STORE, 140011487879168, 140011504664575,
+STORE, 140011336876032, 140011471093759,
+SNULL, 140011336876032, 140011370446847,
+STORE, 140011370446848, 140011471093759,
+STORE, 140011336876032, 140011370446847,
+ERASE, 140011336876032, 140011370446847,
+STORE, 140011471093760, 140011487879167,
+STORE, 140011362054144, 140011370446847,
+SNULL, 140011362058239, 140011370446847,
+STORE, 140011362054144, 140011362058239,
+STORE, 140011362058240, 140011370446847,
+STORE, 140011353661440, 140011362054143,
+STORE, 140011345268736, 140011362054143,
+SNULL, 140011345272831, 140011362054143,
+STORE, 140011345268736, 140011345272831,
+STORE, 140011345272832, 140011362054143,
+STORE, 140011336876032, 140011345268735,
+STORE, 140011328483328, 140011345268735,
+SNULL, 140011328487423, 140011345268735,
+STORE, 140011328483328, 140011328487423,
+STORE, 140011328487424, 140011345268735,
+STORE, 140011320090624, 140011328483327,
+STORE, 140011185872896, 140011320090623,
+SNULL, 140011185872896, 140011236229119,
+STORE, 140011236229120, 140011320090623,
+STORE, 140011185872896, 140011236229119,
+ERASE, 140011185872896, 140011236229119,
+SNULL, 140011856961536, 140011907317759,
+STORE, 140011907317760, 140011991179263,
+STORE, 140011856961536, 140011907317759,
+ERASE, 140011856961536, 140011907317759,
+SNULL, 140011974426623, 140011991179263,
+STORE, 140011907317760, 140011974426623,
+STORE, 140011974426624, 140011991179263,
+ERASE, 140011974426624, 140011991179263,
+SNULL, 140011840208895, 140011848568831,
+STORE, 140011773100032, 140011840208895,
+STORE, 140011840208896, 140011848568831,
+ERASE, 140011840208896, 140011848568831,
+SNULL, 140011773235199, 140011840208895,
+STORE, 140011773100032, 140011773235199,
+STORE, 140011773235200, 140011840208895,
+STORE, 140011102011392, 140011320090623,
+SNULL, 140011169120255, 140011320090623,
+STORE, 140011102011392, 140011169120255,
+STORE, 140011169120256, 140011320090623,
+SNULL, 140011169120256, 140011236229119,
+STORE, 140011236229120, 140011320090623,
+STORE, 140011169120256, 140011236229119,
+ERASE, 140011169120256, 140011236229119,
+SNULL, 140011622096896, 140011638882303,
+STORE, 140011638882304, 140011756314623,
+STORE, 140011622096896, 140011638882303,
+ERASE, 140011622096896, 140011638882303,
+SNULL, 140011705991167, 140011756314623,
+STORE, 140011638882304, 140011705991167,
+STORE, 140011705991168, 140011756314623,
+ERASE, 140011705991168, 140011756314623,
+SNULL, 140011571773439, 140011613704191,
+STORE, 140011504664576, 140011571773439,
+STORE, 140011571773440, 140011613704191,
+ERASE, 140011571773440, 140011613704191,
+STORE, 140010967793664, 140011169120255,
+SNULL, 140011034902527, 140011169120255,
+STORE, 140010967793664, 140011034902527,
+STORE, 140011034902528, 140011169120255,
+SNULL, 140011034902528, 140011102011391,
+STORE, 140011102011392, 140011169120255,
+STORE, 140011034902528, 140011102011391,
+ERASE, 140011034902528, 140011102011391,
+STORE, 140010833575936, 140011034902527,
+SNULL, 140011437555711, 140011471093759,
+STORE, 140011370446848, 140011437555711,
+STORE, 140011437555712, 140011471093759,
+ERASE, 140011437555712, 140011471093759,
+SNULL, 140011370582015, 140011437555711,
+STORE, 140011370446848, 140011370582015,
+STORE, 140011370582016, 140011437555711,
+STORE, 140010699358208, 140011034902527,
+SNULL, 140011487883263, 140011504664575,
+STORE, 140011487879168, 140011487883263,
+STORE, 140011487883264, 140011504664575,
+SNULL, 140011345272832, 140011353661439,
+STORE, 140011353661440, 140011362054143,
+STORE, 140011345272832, 140011353661439,
+SNULL, 140011353665535, 140011362054143,
+STORE, 140011353661440, 140011353665535,
+STORE, 140011353665536, 140011362054143,
+SNULL, 140011328487424, 140011336876031,
+STORE, 140011336876032, 140011345268735,
+STORE, 140011328487424, 140011336876031,
+SNULL, 140011336880127, 140011345268735,
+STORE, 140011336876032, 140011336880127,
+STORE, 140011336880128, 140011345268735,
+SNULL, 140011303337983, 140011320090623,
+STORE, 140011236229120, 140011303337983,
+STORE, 140011303337984, 140011320090623,
+ERASE, 140011303337984, 140011320090623,
+SNULL, 140011907452927, 140011974426623,
+STORE, 140011907317760, 140011907452927,
+STORE, 140011907452928, 140011974426623,
+SNULL, 140011102146559, 140011169120255,
+STORE, 140011102011392, 140011102146559,
+STORE, 140011102146560, 140011169120255,
+SNULL, 140011639017471, 140011705991167,
+STORE, 140011638882304, 140011639017471,
+STORE, 140011639017472, 140011705991167,
+SNULL, 140011504799743, 140011571773439,
+STORE, 140011504664576, 140011504799743,
+STORE, 140011504799744, 140011571773439,
+SNULL, 140011613708287, 140011622096895,
+STORE, 140011613704192, 140011613708287,
+STORE, 140011613708288, 140011622096895,
+SNULL, 140010699358208, 140010967793663,
+STORE, 140010967793664, 140011034902527,
+STORE, 140010699358208, 140010967793663,
+SNULL, 140010967928831, 140011034902527,
+STORE, 140010967793664, 140010967928831,
+STORE, 140010967928832, 140011034902527,
+SNULL, 140010900684799, 140010967793663,
+STORE, 140010699358208, 140010900684799,
+STORE, 140010900684800, 140010967793663,
+ERASE, 140010900684800, 140010967793663,
+SNULL, 140010766467071, 140010900684799,
+STORE, 140010699358208, 140010766467071,
+STORE, 140010766467072, 140010900684799,
+SNULL, 140010766467072, 140010833575935,
+STORE, 140010833575936, 140010900684799,
+STORE, 140010766467072, 140010833575935,
+ERASE, 140010766467072, 140010833575935,
+SNULL, 140010699493375, 140010766467071,
+STORE, 140010699358208, 140010699493375,
+STORE, 140010699493376, 140010766467071,
+SNULL, 140011848572927, 140011856961535,
+STORE, 140011848568832, 140011848572927,
+STORE, 140011848572928, 140011856961535,
+STORE, 140011982786560, 140012007964671,
+STORE, 140011898925056, 140011907317759,
+SNULL, 140011898929151, 140011907317759,
+STORE, 140011898925056, 140011898929151,
+STORE, 140011898929152, 140011907317759,
+SNULL, 140011320094719, 140011328483327,
+STORE, 140011320090624, 140011320094719,
+STORE, 140011320094720, 140011328483327,
+STORE, 140011890532352, 140011898925055,
+STORE, 140011882139648, 140011898925055,
+SNULL, 140011882143743, 140011898925055,
+STORE, 140011882139648, 140011882143743,
+STORE, 140011882143744, 140011898925055,
+STORE, 140011873746944, 140011882139647,
+SNULL, 140011873751039, 140011882139647,
+STORE, 140011873746944, 140011873751039,
+STORE, 140011873751040, 140011882139647,
+SNULL, 140011236364287, 140011303337983,
+STORE, 140011236229120, 140011236364287,
+STORE, 140011236364288, 140011303337983,
+SNULL, 140011756318719, 140011773100031,
+STORE, 140011756314624, 140011756318719,
+STORE, 140011756318720, 140011773100031,
+SNULL, 140011756318720, 140011764707327,
+STORE, 140011764707328, 140011773100031,
+STORE, 140011756318720, 140011764707327,
+SNULL, 140011764711423, 140011773100031,
+STORE, 140011764707328, 140011764711423,
+STORE, 140011764711424, 140011773100031,
+SNULL, 140011471097855, 140011487879167,
+STORE, 140011471093760, 140011471097855,
+STORE, 140011471097856, 140011487879167,
+SNULL, 140010833711103, 140010900684799,
+STORE, 140010833575936, 140010833711103,
+STORE, 140010833711104, 140010900684799,
+SNULL, 140011982790655, 140012007964671,
+STORE, 140011982786560, 140011982790655,
+STORE, 140011982790656, 140012007964671,
+STORE, 140011865354240, 140011873746943,
+STORE, 140011848572928, 140011865354239,
+SNULL, 140011848572928, 140011856961535,
+STORE, 140011856961536, 140011865354239,
+STORE, 140011848572928, 140011856961535,
+SNULL, 140011856965631, 140011865354239,
+STORE, 140011856961536, 140011856965631,
+STORE, 140011856965632, 140011865354239,
+STORE, 140011747921920, 140011756314623,
+STORE, 140011739529216, 140011756314623,
+SNULL, 140011471097856, 140011479486463,
+STORE, 140011479486464, 140011487879167,
+STORE, 140011471097856, 140011479486463,
+SNULL, 140011479490559, 140011487879167,
+STORE, 140011479486464, 140011479490559,
+STORE, 140011479490560, 140011487879167,
+STORE, 140011731136512, 140011756314623,
+STORE, 140011722743808, 140011756314623,
+SNULL, 140011982790656, 140011999571967,
+STORE, 140011999571968, 140012007964671,
+STORE, 140011982790656, 140011999571967,
+SNULL, 140011999576063, 140012007964671,
+STORE, 140011999571968, 140011999576063,
+STORE, 140011999576064, 140012007964671,
+STORE, 140011714351104, 140011756314623,
+SNULL, 140011882143744, 140011890532351,
+STORE, 140011890532352, 140011898925055,
+STORE, 140011882143744, 140011890532351,
+SNULL, 140011890536447, 140011898925055,
+STORE, 140011890532352, 140011890536447,
+STORE, 140011890536448, 140011898925055,
+STORE, 140011630489600, 140011638882303,
+STORE, 140011613708288, 140011638882303,
+STORE, 140011605311488, 140011613704191,
+STORE, 140011596918784, 140011613704191,
+STORE, 140011588526080, 140011613704191,
+SNULL, 140011487883264, 140011496271871,
+STORE, 140011496271872, 140011504664575,
+STORE, 140011487883264, 140011496271871,
+SNULL, 140011496275967, 140011504664575,
+STORE, 140011496271872, 140011496275967,
+STORE, 140011496275968, 140011504664575,
+STORE, 140011580133376, 140011613704191,
+SNULL, 140011580137471, 140011613704191,
+STORE, 140011580133376, 140011580137471,
+STORE, 140011580137472, 140011613704191,
+SNULL, 140011982790656, 140011991179263,
+STORE, 140011991179264, 140011999571967,
+STORE, 140011982790656, 140011991179263,
+SNULL, 140011991183359, 140011999571967,
+STORE, 140011991179264, 140011991183359,
+STORE, 140011991183360, 140011999571967,
+SNULL, 140011865358335, 140011873746943,
+STORE, 140011865354240, 140011865358335,
+STORE, 140011865358336, 140011873746943,
+STORE, 140011462701056, 140011471093759,
+SNULL, 140011714351104, 140011739529215,
+STORE, 140011739529216, 140011756314623,
+STORE, 140011714351104, 140011739529215,
+SNULL, 140011739533311, 140011756314623,
+STORE, 140011739529216, 140011739533311,
+STORE, 140011739533312, 140011756314623,
+SNULL, 140011739533312, 140011747921919,
+STORE, 140011747921920, 140011756314623,
+STORE, 140011739533312, 140011747921919,
+SNULL, 140011747926015, 140011756314623,
+STORE, 140011747921920, 140011747926015,
+STORE, 140011747926016, 140011756314623,
+SNULL, 140011613708288, 140011630489599,
+STORE, 140011630489600, 140011638882303,
+STORE, 140011613708288, 140011630489599,
+SNULL, 140011630493695, 140011638882303,
+STORE, 140011630489600, 140011630493695,
+STORE, 140011630493696, 140011638882303,
+SNULL, 140011714351104, 140011722743807,
+STORE, 140011722743808, 140011739529215,
+STORE, 140011714351104, 140011722743807,
+SNULL, 140011722747903, 140011739529215,
+STORE, 140011722743808, 140011722747903,
+STORE, 140011722747904, 140011739529215,
+SNULL, 140011714355199, 140011722743807,
+STORE, 140011714351104, 140011714355199,
+STORE, 140011714355200, 140011722743807,
+SNULL, 140011722747904, 140011731136511,
+STORE, 140011731136512, 140011739529215,
+STORE, 140011722747904, 140011731136511,
+SNULL, 140011731140607, 140011739529215,
+STORE, 140011731136512, 140011731140607,
+STORE, 140011731140608, 140011739529215,
+STORE, 140011454308352, 140011471093759,
+STORE, 140011445915648, 140011471093759,
+SNULL, 140011580137472, 140011588526079,
+STORE, 140011588526080, 140011613704191,
+STORE, 140011580137472, 140011588526079,
+SNULL, 140011588530175, 140011613704191,
+STORE, 140011588526080, 140011588530175,
+STORE, 140011588530176, 140011613704191,
+SNULL, 140011445915648, 140011462701055,
+STORE, 140011462701056, 140011471093759,
+STORE, 140011445915648, 140011462701055,
+SNULL, 140011462705151, 140011471093759,
+STORE, 140011462701056, 140011462705151,
+STORE, 140011462705152, 140011471093759,
+SNULL, 140011588530176, 140011596918783,
+STORE, 140011596918784, 140011613704191,
+STORE, 140011588530176, 140011596918783,
+SNULL, 140011596922879, 140011613704191,
+STORE, 140011596918784, 140011596922879,
+STORE, 140011596922880, 140011613704191,
+SNULL, 140011596922880, 140011605311487,
+STORE, 140011605311488, 140011613704191,
+STORE, 140011596922880, 140011605311487,
+SNULL, 140011605315583, 140011613704191,
+STORE, 140011605311488, 140011605315583,
+STORE, 140011605315584, 140011613704191,
+SNULL, 140011613708288, 140011622096895,
+STORE, 140011622096896, 140011630489599,
+STORE, 140011613708288, 140011622096895,
+SNULL, 140011622100991, 140011630489599,
+STORE, 140011622096896, 140011622100991,
+STORE, 140011622100992, 140011630489599,
+STORE, 140011311697920, 140011320090623,
+STORE, 140011227836416, 140011236229119,
+STORE, 140011219443712, 140011236229119,
+SNULL, 140011219447807, 140011236229119,
+STORE, 140011219443712, 140011219447807,
+STORE, 140011219447808, 140011236229119,
+STORE, 140011211051008, 140011219443711,
+STORE, 140011202658304, 140011219443711,
+SNULL, 140011202662399, 140011219443711,
+STORE, 140011202658304, 140011202662399,
+STORE, 140011202662400, 140011219443711,
+STORE, 140011194265600, 140011202658303,
+STORE, 140011185872896, 140011202658303,
+STORE, 140011177480192, 140011202658303,
+STORE, 140011093618688, 140011102011391,
+SNULL, 140011445915648, 140011454308351,
+STORE, 140011454308352, 140011462701055,
+STORE, 140011445915648, 140011454308351,
+SNULL, 140011454312447, 140011462701055,
+STORE, 140011454308352, 140011454312447,
+STORE, 140011454312448, 140011462701055,
+STORE, 140011085225984, 140011102011391,
+SNULL, 140011085230079, 140011102011391,
+STORE, 140011085225984, 140011085230079,
+STORE, 140011085230080, 140011102011391,
+SNULL, 140011177484287, 140011202658303,
+STORE, 140011177480192, 140011177484287,
+STORE, 140011177484288, 140011202658303,
+SNULL, 140011445919743, 140011454308351,
+STORE, 140011445915648, 140011445919743,
+STORE, 140011445919744, 140011454308351,
+SNULL, 140011177484288, 140011185872895,
+STORE, 140011185872896, 140011202658303,
+STORE, 140011177484288, 140011185872895,
+SNULL, 140011185876991, 140011202658303,
+STORE, 140011185872896, 140011185876991,
+STORE, 140011185876992, 140011202658303,
+STORE, 140011076833280, 140011085225983,
+SNULL, 140011202662400, 140011211051007,
+STORE, 140011211051008, 140011219443711,
+STORE, 140011202662400, 140011211051007,
+SNULL, 140011211055103, 140011219443711,
+STORE, 140011211051008, 140011211055103,
+STORE, 140011211055104, 140011219443711,
+SNULL, 140011185876992, 140011194265599,
+STORE, 140011194265600, 140011202658303,
+STORE, 140011185876992, 140011194265599,
+SNULL, 140011194269695, 140011202658303,
+STORE, 140011194265600, 140011194269695,
+STORE, 140011194269696, 140011202658303,
+STORE, 140011068440576, 140011085225983,
+SNULL, 140011311702015, 140011320090623,
+STORE, 140011311697920, 140011311702015,
+STORE, 140011311702016, 140011320090623,
+STORE, 140011060047872, 140011085225983,
+SNULL, 140011060051967, 140011085225983,
+STORE, 140011060047872, 140011060051967,
+STORE, 140011060051968, 140011085225983,
+STORE, 140011051655168, 140011060047871,
+STORE, 140011043262464, 140011060047871,
+SNULL, 140011043266559, 140011060047871,
+STORE, 140011043262464, 140011043266559,
+STORE, 140011043266560, 140011060047871,
+SNULL, 140011219447808, 140011227836415,
+STORE, 140011227836416, 140011236229119,
+STORE, 140011219447808, 140011227836415,
+SNULL, 140011227840511, 140011236229119,
+STORE, 140011227836416, 140011227840511,
+STORE, 140011227840512, 140011236229119,
+SNULL, 140011085230080, 140011093618687,
+STORE, 140011093618688, 140011102011391,
+STORE, 140011085230080, 140011093618687,
+SNULL, 140011093622783, 140011102011391,
+STORE, 140011093618688, 140011093622783,
+STORE, 140011093622784, 140011102011391,
+STORE, 140010959400960, 140010967793663,
+STORE, 140010951008256, 140010967793663,
+SNULL, 140010951008256, 140010959400959,
+STORE, 140010959400960, 140010967793663,
+STORE, 140010951008256, 140010959400959,
+SNULL, 140010959405055, 140010967793663,
+STORE, 140010959400960, 140010959405055,
+STORE, 140010959405056, 140010967793663,
+STORE, 140010942615552, 140010959400959,
+STORE, 140010934222848, 140010959400959,
+SNULL, 140011060051968, 140011076833279,
+STORE, 140011076833280, 140011085225983,
+STORE, 140011060051968, 140011076833279,
+SNULL, 140011076837375, 140011085225983,
+STORE, 140011076833280, 140011076837375,
+STORE, 140011076837376, 140011085225983,
+SNULL, 140011043266560, 140011051655167,
+STORE, 140011051655168, 140011060047871,
+STORE, 140011043266560, 140011051655167,
+SNULL, 140011051659263, 140011060047871,
+STORE, 140011051655168, 140011051659263,
+STORE, 140011051659264, 140011060047871,
+STORE, 140010925830144, 140010959400959,
+SNULL, 140011060051968, 140011068440575,
+STORE, 140011068440576, 140011076833279,
+STORE, 140011060051968, 140011068440575,
+SNULL, 140011068444671, 140011076833279,
+STORE, 140011068440576, 140011068444671,
+STORE, 140011068444672, 140011076833279,
+STORE, 140010917437440, 140010959400959,
+STORE, 140010909044736, 140010959400959,
+STORE, 140010825183232, 140010833575935,
+SNULL, 140010909044736, 140010942615551,
+STORE, 140010942615552, 140010959400959,
+STORE, 140010909044736, 140010942615551,
+SNULL, 140010942619647, 140010959400959,
+STORE, 140010942615552, 140010942619647,
+STORE, 140010942619648, 140010959400959,
+SNULL, 140010909044736, 140010934222847,
+STORE, 140010934222848, 140010942615551,
+STORE, 140010909044736, 140010934222847,
+SNULL, 140010934226943, 140010942615551,
+STORE, 140010934222848, 140010934226943,
+STORE, 140010934226944, 140010942615551,
+SNULL, 140010909048831, 140010934222847,
+STORE, 140010909044736, 140010909048831,
+STORE, 140010909048832, 140010934222847,
+STORE, 140010816790528, 140010833575935,
+SNULL, 140010816794623, 140010833575935,
+STORE, 140010816790528, 140010816794623,
+STORE, 140010816794624, 140010833575935,
+STORE, 140010808397824, 140010816790527,
+SNULL, 140010942619648, 140010951008255,
+STORE, 140010951008256, 140010959400959,
+STORE, 140010942619648, 140010951008255,
+SNULL, 140010951012351, 140010959400959,
+STORE, 140010951008256, 140010951012351,
+STORE, 140010951012352, 140010959400959,
+STORE, 140010800005120, 140010816790527,
+SNULL, 140010800009215, 140010816790527,
+STORE, 140010800005120, 140010800009215,
+STORE, 140010800009216, 140010816790527,
+SNULL, 140010909048832, 140010925830143,
+STORE, 140010925830144, 140010934222847,
+STORE, 140010909048832, 140010925830143,
+SNULL, 140010925834239, 140010934222847,
+STORE, 140010925830144, 140010925834239,
+STORE, 140010925834240, 140010934222847,
+SNULL, 140010816794624, 140010825183231,
+STORE, 140010825183232, 140010833575935,
+STORE, 140010816794624, 140010825183231,
+SNULL, 140010825187327, 140010833575935,
+STORE, 140010825183232, 140010825187327,
+STORE, 140010825187328, 140010833575935,
+SNULL, 140010909048832, 140010917437439,
+STORE, 140010917437440, 140010925830143,
+STORE, 140010909048832, 140010917437439,
+SNULL, 140010917441535, 140010925830143,
+STORE, 140010917437440, 140010917441535,
+STORE, 140010917441536, 140010925830143,
+SNULL, 140010800009216, 140010808397823,
+STORE, 140010808397824, 140010816790527,
+STORE, 140010800009216, 140010808397823,
+SNULL, 140010808401919, 140010816790527,
+STORE, 140010808397824, 140010808401919,
+STORE, 140010808401920, 140010816790527,
+STORE, 140010791612416, 140010800005119,
+SNULL, 140010791616511, 140010800005119,
+STORE, 140010791612416, 140010791616511,
+STORE, 140010791616512, 140010800005119,
+STORE, 140012547100672, 140012547129343,
+STORE, 140012511506432, 140012513697791,
+SNULL, 140012511506432, 140012511596543,
+STORE, 140012511596544, 140012513697791,
+STORE, 140012511506432, 140012511596543,
+SNULL, 140012513689599, 140012513697791,
+STORE, 140012511596544, 140012513689599,
+STORE, 140012513689600, 140012513697791,
+ERASE, 140012513689600, 140012513697791,
+STORE, 140012513689600, 140012513697791,
+SNULL, 140012513693695, 140012513697791,
+STORE, 140012513689600, 140012513693695,
+STORE, 140012513693696, 140012513697791,
+ERASE, 140012547100672, 140012547129343,
+ERASE, 140011362054144, 140011362058239,
+ERASE, 140011362058240, 140011370446847,
+ERASE, 140011882139648, 140011882143743,
+ERASE, 140011882143744, 140011890532351,
+ERASE, 140011873746944, 140011873751039,
+ERASE, 140011873751040, 140011882139647,
+ERASE, 140011588526080, 140011588530175,
+ERASE, 140011588530176, 140011596918783,
+ERASE, 140011328483328, 140011328487423,
+ERASE, 140011328487424, 140011336876031,
+ERASE, 140011898925056, 140011898929151,
+ERASE, 140011898929152, 140011907317759,
+ERASE, 140011353661440, 140011353665535,
+ERASE, 140011353665536, 140011362054143,
+ERASE, 140011336876032, 140011336880127,
+ERASE, 140011336880128, 140011345268735,
+ERASE, 140011731136512, 140011731140607,
+ERASE, 140011731140608, 140011739529215,
+ERASE, 140011479486464, 140011479490559,
+ERASE, 140011479490560, 140011487879167,
+ERASE, 140011756314624, 140011756318719,
+ERASE, 140011756318720, 140011764707327,
+ERASE, 140011580133376, 140011580137471,
+ERASE, 140011580137472, 140011588526079,
+ERASE, 140011219443712, 140011219447807,
+ERASE, 140011219447808, 140011227836415,
+ERASE, 140011051655168, 140011051659263,
+ERASE, 140011051659264, 140011060047871,
+ERASE, 140011999571968, 140011999576063,
+ERASE, 140011999576064, 140012007964671,
+ERASE, 140011714351104, 140011714355199,
+ERASE, 140011714355200, 140011722743807,
+ERASE, 140011739529216, 140011739533311,
+ERASE, 140011739533312, 140011747921919,
+ERASE, 140011320090624, 140011320094719,
+ERASE, 140011320094720, 140011328483327,
+ERASE, 140011630489600, 140011630493695,
+ERASE, 140011630493696, 140011638882303,
+ERASE, 140011345268736, 140011345272831,
+ERASE, 140011345272832, 140011353661439,
+ERASE, 140011496271872, 140011496275967,
+ERASE, 140011496275968, 140011504664575,
+ERASE, 140011194265600, 140011194269695,
+ERASE, 140011194269696, 140011202658303,
+ERASE, 140011068440576, 140011068444671,
+ERASE, 140011068444672, 140011076833279,
+ERASE, 140010909044736, 140010909048831,
+ERASE, 140010909048832, 140010917437439,
+ERASE, 140011764707328, 140011764711423,
+ERASE, 140011764711424, 140011773100031,
+ERASE, 140011462701056, 140011462705151,
+ERASE, 140011462705152, 140011471093759,
+ERASE, 140011076833280, 140011076837375,
+ERASE, 140011076837376, 140011085225983,
+ERASE, 140011991179264, 140011991183359,
+ERASE, 140011991183360, 140011999571967,
+ERASE, 140011211051008, 140011211055103,
+ERASE, 140011211055104, 140011219443711,
+ERASE, 140010917437440, 140010917441535,
+ERASE, 140010917441536, 140010925830143,
+ERASE, 140011085225984, 140011085230079,
+ERASE, 140011085230080, 140011093618687,
+ERASE, 140011487879168, 140011487883263,
+ERASE, 140011487883264, 140011496271871,
+ERASE, 140011856961536, 140011856965631,
+ERASE, 140011856965632, 140011865354239,
+ERASE, 140011982786560, 140011982790655,
+ERASE, 140011982790656, 140011991179263,
+ERASE, 140011722743808, 140011722747903,
+ERASE, 140011722747904, 140011731136511,
+ERASE, 140011177480192, 140011177484287,
+ERASE, 140011177484288, 140011185872895,
+ERASE, 140011848568832, 140011848572927,
+ERASE, 140011848572928, 140011856961535,
+ERASE, 140011890532352, 140011890536447,
+ERASE, 140011890536448, 140011898925055,
+ERASE, 140011622096896, 140011622100991,
+ERASE, 140011622100992, 140011630489599,
+ERASE, 140011311697920, 140011311702015,
+ERASE, 140011311702016, 140011320090623,
+ERASE, 140011471093760, 140011471097855,
+ERASE, 140011471097856, 140011479486463,
+ERASE, 140011605311488, 140011605315583,
+ERASE, 140011605315584, 140011613704191,
+ERASE, 140010791612416, 140010791616511,
+ERASE, 140010791616512, 140010800005119,
+ERASE, 140010959400960, 140010959405055,
+ERASE, 140010959405056, 140010967793663,
+ERASE, 140011185872896, 140011185876991,
+ERASE, 140011185876992, 140011194265599,
+ERASE, 140011454308352, 140011454312447,
+ERASE, 140011454312448, 140011462701055,
+ERASE, 140011596918784, 140011596922879,
+ERASE, 140011596922880, 140011605311487,
+ERASE, 140011060047872, 140011060051967,
+ERASE, 140011060051968, 140011068440575,
+ERASE, 140010925830144, 140010925834239,
+ERASE, 140010925834240, 140010934222847,
+ERASE, 140011747921920, 140011747926015,
+ERASE, 140011747926016, 140011756314623,
+ERASE, 140011202658304, 140011202662399,
+ERASE, 140011202662400, 140011211051007,
+ERASE, 140010800005120, 140010800009215,
+ERASE, 140010800009216, 140010808397823,
+ERASE, 140011093618688, 140011093622783,
+ERASE, 140011093622784, 140011102011391,
+ERASE, 140010808397824, 140010808401919,
+ERASE, 140010808401920, 140010816790527,
+ERASE, 140012419010560, 140012419014655,
+ERASE, 140012419014656, 140012427403263,
+ERASE, 140010934222848, 140010934226943,
+ERASE, 140010934226944, 140010942615551,
+ERASE, 140010942615552, 140010942619647,
+ERASE, 140010942619648, 140010951008255,
+ERASE, 140011613704192, 140011613708287,
+ERASE, 140011613708288, 140011622096895,
+ERASE, 140011865354240, 140011865358335,
+ERASE, 140011865358336, 140011873746943,
+ERASE, 140012301578240, 140012301582335,
+ERASE, 140012301582336, 140012309970943,
+ERASE, 140012393832448, 140012393836543,
+ERASE, 140012393836544, 140012402225151,
+ERASE, 140012410617856, 140012410621951,
+ERASE, 140012410621952, 140012419010559,
+ERASE, 140012402225152, 140012402229247,
+ERASE, 140012402229248, 140012410617855,
+ERASE, 140012259614720, 140012259618815,
+ERASE, 140012259618816, 140012268007423,
+ERASE, 140012251222016, 140012251226111,
+ERASE, 140012251226112, 140012259614719,
+ERASE, 140012284792832, 140012284796927,
+ERASE, 140012284796928, 140012293185535,
+ERASE, 140011445915648, 140011445919743,
+ERASE, 140011445919744, 140011454308351,
+ERASE, 140010951008256, 140010951012351,
+ERASE, 140010951012352, 140010959400959,
+ERASE, 140011043262464, 140011043266559,
+ERASE, 140011043266560, 140011051655167,
+ERASE, 140010825183232, 140010825187327,
+ERASE, 140010825187328, 140010833575935,
+ERASE, 140012293185536, 140012293189631,
+ERASE, 140012293189632, 140012301578239,
+ERASE, 140012276400128, 140012276404223,
+ERASE, 140012276404224, 140012284792831,
+ERASE, 140012016357376, 140012016361471,
+ERASE, 140012016361472, 140012024750079,
+ERASE, 140012024750080, 140012024754175,
+ERASE, 140012024754176, 140012033142783,
+ERASE, 140011227836416, 140011227840511,
+ERASE, 140011227840512, 140011236229119,
+ERASE, 140010816790528, 140010816794623,
+ERASE, 140010816794624, 140010825183231,
+ERASE, 140012268007424, 140012268011519,
+ERASE, 140012268011520, 140012276400127,
+ERASE, 140012385439744, 140012385443839,
+ERASE, 140012385443840, 140012393832447,
+ERASE, 140012522090496, 140012522094591,
+ERASE, 140012522094592, 140012530483199,
+ERASE, 140012033142784, 140012033146879,
+ERASE, 140012033146880, 140012041535487,
+ };
+ static const unsigned long set35[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140730536939520, 140737488351231,
+SNULL, 140730536943615, 140737488351231,
+STORE, 140730536939520, 140730536943615,
+STORE, 140730536808448, 140730536943615,
+STORE, 94245239877632, 94245242130431,
+SNULL, 94245240008703, 94245242130431,
+STORE, 94245239877632, 94245240008703,
+STORE, 94245240008704, 94245242130431,
+ERASE, 94245240008704, 94245242130431,
+STORE, 94245242101760, 94245242109951,
+STORE, 94245242109952, 94245242130431,
+STORE, 140475575263232, 140475577516031,
+SNULL, 140475575406591, 140475577516031,
+STORE, 140475575263232, 140475575406591,
+STORE, 140475575406592, 140475577516031,
+ERASE, 140475575406592, 140475577516031,
+STORE, 140475577503744, 140475577511935,
+STORE, 140475577511936, 140475577516031,
+STORE, 140730538164224, 140730538168319,
+STORE, 140730538151936, 140730538164223,
+STORE, 140475577475072, 140475577503743,
+STORE, 140475577466880, 140475577475071,
+STORE, 140475573047296, 140475575263231,
+SNULL, 140475573047296, 140475573145599,
+STORE, 140475573145600, 140475575263231,
+STORE, 140475573047296, 140475573145599,
+SNULL, 140475575238655, 140475575263231,
+STORE, 140475573145600, 140475575238655,
+STORE, 140475575238656, 140475575263231,
+SNULL, 140475575238656, 140475575246847,
+STORE, 140475575246848, 140475575263231,
+STORE, 140475575238656, 140475575246847,
+ERASE, 140475575238656, 140475575246847,
+STORE, 140475575238656, 140475575246847,
+ERASE, 140475575246848, 140475575263231,
+STORE, 140475575246848, 140475575263231,
+STORE, 140475569250304, 140475573047295,
+SNULL, 140475569250304, 140475570909183,
+STORE, 140475570909184, 140475573047295,
+STORE, 140475569250304, 140475570909183,
+SNULL, 140475573006335, 140475573047295,
+STORE, 140475570909184, 140475573006335,
+STORE, 140475573006336, 140475573047295,
+SNULL, 140475573006336, 140475573030911,
+STORE, 140475573030912, 140475573047295,
+STORE, 140475573006336, 140475573030911,
+ERASE, 140475573006336, 140475573030911,
+STORE, 140475573006336, 140475573030911,
+ERASE, 140475573030912, 140475573047295,
+STORE, 140475573030912, 140475573047295,
+STORE, 140475577458688, 140475577475071,
+SNULL, 140475573022719, 140475573030911,
+STORE, 140475573006336, 140475573022719,
+STORE, 140475573022720, 140475573030911,
+SNULL, 140475575242751, 140475575246847,
+STORE, 140475575238656, 140475575242751,
+STORE, 140475575242752, 140475575246847,
+SNULL, 94245242105855, 94245242109951,
+STORE, 94245242101760, 94245242105855,
+STORE, 94245242105856, 94245242109951,
+SNULL, 140475577507839, 140475577511935,
+STORE, 140475577503744, 140475577507839,
+STORE, 140475577507840, 140475577511935,
+ERASE, 140475577475072, 140475577503743,
+STORE, 94245271216128, 94245271351295,
+STORE, 140475560857600, 140475569250303,
+SNULL, 140475560861695, 140475569250303,
+STORE, 140475560857600, 140475560861695,
+STORE, 140475560861696, 140475569250303,
+STORE, 140475552464896, 140475560857599,
+STORE, 140475418247168, 140475552464895,
+SNULL, 140475418247168, 140475428241407,
+STORE, 140475428241408, 140475552464895,
+STORE, 140475418247168, 140475428241407,
+ERASE, 140475418247168, 140475428241407,
+SNULL, 140475495350271, 140475552464895,
+STORE, 140475428241408, 140475495350271,
+STORE, 140475495350272, 140475552464895,
+ERASE, 140475495350272, 140475552464895,
+SNULL, 140475428376575, 140475495350271,
+STORE, 140475428241408, 140475428376575,
+STORE, 140475428376576, 140475495350271,
+SNULL, 140475552468991, 140475560857599,
+STORE, 140475552464896, 140475552468991,
+STORE, 140475552468992, 140475560857599,
+STORE, 140475544072192, 140475552464895,
+SNULL, 140475544076287, 140475552464895,
+STORE, 140475544072192, 140475544076287,
+STORE, 140475544076288, 140475552464895,
+STORE, 140475535679488, 140475544072191,
+SNULL, 140475535683583, 140475544072191,
+STORE, 140475535679488, 140475535683583,
+STORE, 140475535683584, 140475544072191,
+STORE, 140475527286784, 140475535679487,
+SNULL, 140475527290879, 140475535679487,
+STORE, 140475527286784, 140475527290879,
+STORE, 140475527290880, 140475535679487,
+STORE, 140475518894080, 140475527286783,
+STORE, 140475510501376, 140475527286783,
+STORE, 140475502108672, 140475527286783,
+STORE, 140475419848704, 140475428241407,
+STORE, 140475285630976, 140475419848703,
+SNULL, 140475285630976, 140475294023679,
+STORE, 140475294023680, 140475419848703,
+STORE, 140475285630976, 140475294023679,
+ERASE, 140475285630976, 140475294023679,
+STORE, 140475159805952, 140475419848703,
+STORE, 140475025588224, 140475419848703,
+SNULL, 140475092697087, 140475419848703,
+STORE, 140475025588224, 140475092697087,
+STORE, 140475092697088, 140475419848703,
+SNULL, 140475092697088, 140475159805951,
+STORE, 140475159805952, 140475419848703,
+STORE, 140475092697088, 140475159805951,
+ERASE, 140475092697088, 140475159805951,
+STORE, 140474891370496, 140475092697087,
+SNULL, 140474958479359, 140475092697087,
+STORE, 140474891370496, 140474958479359,
+STORE, 140474958479360, 140475092697087,
+SNULL, 140474958479360, 140475025588223,
+STORE, 140475025588224, 140475092697087,
+STORE, 140474958479360, 140475025588223,
+ERASE, 140474958479360, 140475025588223,
+SNULL, 140475361132543, 140475419848703,
+STORE, 140475159805952, 140475361132543,
+STORE, 140475361132544, 140475419848703,
+ERASE, 140475361132544, 140475419848703,
+SNULL, 140475159805952, 140475294023679,
+STORE, 140475294023680, 140475361132543,
+STORE, 140475159805952, 140475294023679,
+SNULL, 140475294158847, 140475361132543,
+STORE, 140475294023680, 140475294158847,
+STORE, 140475294158848, 140475361132543,
+SNULL, 140475226914815, 140475294023679,
+STORE, 140475159805952, 140475226914815,
+STORE, 140475226914816, 140475294023679,
+ERASE, 140475226914816, 140475294023679,
+SNULL, 140475025723391, 140475092697087,
+STORE, 140475025588224, 140475025723391,
+STORE, 140475025723392, 140475092697087,
+SNULL, 140475159941119, 140475226914815,
+STORE, 140475159805952, 140475159941119,
+STORE, 140475159941120, 140475226914815,
+SNULL, 140474891505663, 140474958479359,
+STORE, 140474891370496, 140474891505663,
+STORE, 140474891505664, 140474958479359,
+SNULL, 140475502108672, 140475518894079,
+STORE, 140475518894080, 140475527286783,
+STORE, 140475502108672, 140475518894079,
+SNULL, 140475518898175, 140475527286783,
+STORE, 140475518894080, 140475518898175,
+STORE, 140475518898176, 140475527286783,
+STORE, 140475411456000, 140475428241407,
+SNULL, 140475502112767, 140475518894079,
+STORE, 140475502108672, 140475502112767,
+STORE, 140475502112768, 140475518894079,
+SNULL, 140475411460095, 140475428241407,
+STORE, 140475411456000, 140475411460095,
+STORE, 140475411460096, 140475428241407,
+SNULL, 140475411460096, 140475419848703,
+STORE, 140475419848704, 140475428241407,
+STORE, 140475411460096, 140475419848703,
+SNULL, 140475419852799, 140475428241407,
+STORE, 140475419848704, 140475419852799,
+STORE, 140475419852800, 140475428241407,
+STORE, 140475403063296, 140475411455999,
+SNULL, 140475502112768, 140475510501375,
+STORE, 140475510501376, 140475518894079,
+STORE, 140475502112768, 140475510501375,
+SNULL, 140475510505471, 140475518894079,
+STORE, 140475510501376, 140475510505471,
+STORE, 140475510505472, 140475518894079,
+SNULL, 140475403067391, 140475411455999,
+STORE, 140475403063296, 140475403067391,
+STORE, 140475403067392, 140475411455999,
+STORE, 140475394670592, 140475403063295,
+SNULL, 140475394674687, 140475403063295,
+STORE, 140475394670592, 140475394674687,
+STORE, 140475394674688, 140475403063295,
+STORE, 140475386277888, 140475394670591,
+STORE, 140475377885184, 140475394670591,
+STORE, 140475369492480, 140475394670591,
+SNULL, 140475369496575, 140475394670591,
+STORE, 140475369492480, 140475369496575,
+STORE, 140475369496576, 140475394670591,
+SNULL, 140475369496576, 140475377885183,
+STORE, 140475377885184, 140475394670591,
+STORE, 140475369496576, 140475377885183,
+SNULL, 140475377889279, 140475394670591,
+STORE, 140475377885184, 140475377889279,
+STORE, 140475377889280, 140475394670591,
+STORE, 140475285630976, 140475294023679,
+SNULL, 140475377889280, 140475386277887,
+STORE, 140475386277888, 140475394670591,
+STORE, 140475377889280, 140475386277887,
+SNULL, 140475386281983, 140475394670591,
+STORE, 140475386277888, 140475386281983,
+STORE, 140475386281984, 140475394670591,
+SNULL, 140475285635071, 140475294023679,
+STORE, 140475285630976, 140475285635071,
+STORE, 140475285635072, 140475294023679,
+STORE, 140475277238272, 140475285630975,
+STORE, 140475268845568, 140475285630975,
+SNULL, 140475268845568, 140475277238271,
+STORE, 140475277238272, 140475285630975,
+STORE, 140475268845568, 140475277238271,
+SNULL, 140475277242367, 140475285630975,
+STORE, 140475277238272, 140475277242367,
+STORE, 140475277242368, 140475285630975,
+STORE, 140475260452864, 140475277238271,
+SNULL, 140475260452864, 140475268845567,
+STORE, 140475268845568, 140475277238271,
+STORE, 140475260452864, 140475268845567,
+SNULL, 140475268849663, 140475277238271,
+STORE, 140475268845568, 140475268849663,
+STORE, 140475268849664, 140475277238271,
+SNULL, 140475260456959, 140475268845567,
+STORE, 140475260452864, 140475260456959,
+STORE, 140475260456960, 140475268845567,
+STORE, 140475252060160, 140475260452863,
+SNULL, 140475252064255, 140475260452863,
+STORE, 140475252060160, 140475252064255,
+STORE, 140475252064256, 140475260452863,
+STORE, 140475243667456, 140475252060159,
+SNULL, 140475243671551, 140475252060159,
+STORE, 140475243667456, 140475243671551,
+STORE, 140475243671552, 140475252060159,
+STORE, 140475235274752, 140475243667455,
+STORE, 140475151413248, 140475159805951,
+STORE, 140474891505664, 140475025588223,
+STORE, 140475143020544, 140475159805951,
+SNULL, 140474891505664, 140474958479359,
+STORE, 140474958479360, 140475025588223,
+STORE, 140474891505664, 140474958479359,
+SNULL, 140474958614527, 140475025588223,
+STORE, 140474958479360, 140474958614527,
+STORE, 140474958614528, 140475025588223,
+STORE, 140474824261632, 140474891370495,
+SNULL, 140474824396799, 140474891370495,
+STORE, 140474824261632, 140474824396799,
+STORE, 140474824396800, 140474891370495,
+STORE, 140475134627840, 140475159805951,
+STORE, 140474690043904, 140474824261631,
+STORE, 140475126235136, 140475159805951,
+STORE, 140475117842432, 140475159805951,
+STORE, 140474622935040, 140474824261631,
+STORE, 140475109449728, 140475159805951,
+STORE, 140474488717312, 140474824261631,
+STORE, 140475101057024, 140475159805951,
+STORE, 140474480324608, 140474488717311,
+STORE, 140474413215744, 140474480324607,
+STORE, 140474404823040, 140474413215743,
+ERASE, 140474413215744, 140474480324607,
+STORE, 140474471931904, 140474488717311,
+STORE, 140474270605312, 140474404823039,
+SNULL, 140475101057024, 140475126235135,
+STORE, 140475126235136, 140475159805951,
+STORE, 140475101057024, 140475126235135,
+SNULL, 140475126239231, 140475159805951,
+STORE, 140475126235136, 140475126239231,
+STORE, 140475126239232, 140475159805951,
+STORE, 140474463539200, 140474488717311,
+STORE, 140474455146496, 140474488717311,
+SNULL, 140474455150591, 140474488717311,
+STORE, 140474455146496, 140474455150591,
+STORE, 140474455150592, 140474488717311,
+STORE, 140474446753792, 140474455146495,
+SNULL, 140474446757887, 140474455146495,
+STORE, 140474446753792, 140474446757887,
+STORE, 140474446757888, 140474455146495,
+STORE, 140474438361088, 140474446753791,
+STORE, 140474429968384, 140474446753791,
+SNULL, 140474429972479, 140474446753791,
+STORE, 140474429968384, 140474429972479,
+STORE, 140474429972480, 140474446753791,
+SNULL, 140475235278847, 140475243667455,
+STORE, 140475235274752, 140475235278847,
+STORE, 140475235278848, 140475243667455,
+SNULL, 140474757152767, 140474824261631,
+STORE, 140474488717312, 140474757152767,
+STORE, 140474757152768, 140474824261631,
+ERASE, 140474757152768, 140474824261631,
+SNULL, 140474488717312, 140474690043903,
+STORE, 140474690043904, 140474757152767,
+STORE, 140474488717312, 140474690043903,
+SNULL, 140474690179071, 140474757152767,
+STORE, 140474690043904, 140474690179071,
+STORE, 140474690179072, 140474757152767,
+SNULL, 140474488717312, 140474622935039,
+STORE, 140474622935040, 140474690043903,
+STORE, 140474488717312, 140474622935039,
+SNULL, 140474623070207, 140474690043903,
+STORE, 140474622935040, 140474623070207,
+STORE, 140474623070208, 140474690043903,
+SNULL, 140475101057024, 140475117842431,
+STORE, 140475117842432, 140475126235135,
+STORE, 140475101057024, 140475117842431,
+SNULL, 140475117846527, 140475126235135,
+STORE, 140475117842432, 140475117846527,
+STORE, 140475117846528, 140475126235135,
+SNULL, 140474555826175, 140474622935039,
+STORE, 140474488717312, 140474555826175,
+STORE, 140474555826176, 140474622935039,
+ERASE, 140474555826176, 140474622935039,
+STORE, 140474136387584, 140474404823039,
+SNULL, 140474136387584, 140474153172991,
+STORE, 140474153172992, 140474404823039,
+STORE, 140474136387584, 140474153172991,
+ERASE, 140474136387584, 140474153172991,
+STORE, 140474018955264, 140474404823039,
+STORE, 140473884737536, 140474404823039,
+SNULL, 140474086064127, 140474404823039,
+STORE, 140473884737536, 140474086064127,
+STORE, 140474086064128, 140474404823039,
+SNULL, 140474086064128, 140474153172991,
+STORE, 140474153172992, 140474404823039,
+STORE, 140474086064128, 140474153172991,
+ERASE, 140474086064128, 140474153172991,
+STORE, 140473750519808, 140474086064127,
+SNULL, 140473817628671, 140474086064127,
+STORE, 140473750519808, 140473817628671,
+STORE, 140473817628672, 140474086064127,
+SNULL, 140473817628672, 140473884737535,
+STORE, 140473884737536, 140474086064127,
+STORE, 140473817628672, 140473884737535,
+ERASE, 140473817628672, 140473884737535,
+SNULL, 140475126239232, 140475151413247,
+STORE, 140475151413248, 140475159805951,
+STORE, 140475126239232, 140475151413247,
+SNULL, 140475151417343, 140475159805951,
+STORE, 140475151413248, 140475151417343,
+STORE, 140475151417344, 140475159805951,
+SNULL, 140474270605311, 140474404823039,
+STORE, 140474153172992, 140474270605311,
+STORE, 140474270605312, 140474404823039,
+SNULL, 140474270605312, 140474287390719,
+STORE, 140474287390720, 140474404823039,
+STORE, 140474270605312, 140474287390719,
+ERASE, 140474270605312, 140474287390719,
+SNULL, 140474429972480, 140474438361087,
+STORE, 140474438361088, 140474446753791,
+STORE, 140474429972480, 140474438361087,
+SNULL, 140474438365183, 140474446753791,
+STORE, 140474438361088, 140474438365183,
+STORE, 140474438365184, 140474446753791,
+STORE, 140474815868928, 140474824261631,
+SNULL, 140474815873023, 140474824261631,
+STORE, 140474815868928, 140474815873023,
+STORE, 140474815873024, 140474824261631,
+SNULL, 140474220281855, 140474270605311,
+STORE, 140474153172992, 140474220281855,
+STORE, 140474220281856, 140474270605311,
+ERASE, 140474220281856, 140474270605311,
+SNULL, 140474488852479, 140474555826175,
+STORE, 140474488717312, 140474488852479,
+STORE, 140474488852480, 140474555826175,
+SNULL, 140475101057024, 140475109449727,
+STORE, 140475109449728, 140475117842431,
+STORE, 140475101057024, 140475109449727,
+SNULL, 140475109453823, 140475117842431,
+STORE, 140475109449728, 140475109453823,
+STORE, 140475109453824, 140475117842431,
+SNULL, 140473951846399, 140474086064127,
+STORE, 140473884737536, 140473951846399,
+STORE, 140473951846400, 140474086064127,
+SNULL, 140473951846400, 140474018955263,
+STORE, 140474018955264, 140474086064127,
+STORE, 140473951846400, 140474018955263,
+ERASE, 140473951846400, 140474018955263,
+SNULL, 140473884872703, 140473951846399,
+STORE, 140473884737536, 140473884872703,
+STORE, 140473884872704, 140473951846399,
+SNULL, 140474019090431, 140474086064127,
+STORE, 140474018955264, 140474019090431,
+STORE, 140474019090432, 140474086064127,
+SNULL, 140473750654975, 140473817628671,
+STORE, 140473750519808, 140473750654975,
+STORE, 140473750654976, 140473817628671,
+SNULL, 140474455150592, 140474463539199,
+STORE, 140474463539200, 140474488717311,
+STORE, 140474455150592, 140474463539199,
+SNULL, 140474463543295, 140474488717311,
+STORE, 140474463539200, 140474463543295,
+STORE, 140474463543296, 140474488717311,
+STORE, 140474807476224, 140474815868927,
+SNULL, 140474463543296, 140474471931903,
+STORE, 140474471931904, 140474488717311,
+STORE, 140474463543296, 140474471931903,
+SNULL, 140474471935999, 140474488717311,
+STORE, 140474471931904, 140474471935999,
+STORE, 140474471936000, 140474488717311,
+STORE, 140474799083520, 140474815868927,
+STORE, 140474790690816, 140474815868927,
+SNULL, 140474790690816, 140474799083519,
+STORE, 140474799083520, 140474815868927,
+STORE, 140474790690816, 140474799083519,
+SNULL, 140474799087615, 140474815868927,
+STORE, 140474799083520, 140474799087615,
+STORE, 140474799087616, 140474815868927,
+SNULL, 140474354499583, 140474404823039,
+STORE, 140474287390720, 140474354499583,
+STORE, 140474354499584, 140474404823039,
+ERASE, 140474354499584, 140474404823039,
+SNULL, 140474287525887, 140474354499583,
+STORE, 140474287390720, 140474287525887,
+STORE, 140474287525888, 140474354499583,
+STORE, 140474782298112, 140474799083519,
+STORE, 140474773905408, 140474799083519,
+SNULL, 140474773909503, 140474799083519,
+STORE, 140474773905408, 140474773909503,
+STORE, 140474773909504, 140474799083519,
+SNULL, 140475126239232, 140475134627839,
+STORE, 140475134627840, 140475151413247,
+STORE, 140475126239232, 140475134627839,
+SNULL, 140475134631935, 140475151413247,
+STORE, 140475134627840, 140475134631935,
+STORE, 140475134631936, 140475151413247,
+STORE, 140474765512704, 140474773905407,
+STORE, 140474614542336, 140474622935039,
+SNULL, 140474153308159, 140474220281855,
+STORE, 140474153172992, 140474153308159,
+STORE, 140474153308160, 140474220281855,
+SNULL, 140474404827135, 140474413215743,
+STORE, 140474404823040, 140474404827135,
+STORE, 140474404827136, 140474413215743,
+STORE, 140474606149632, 140474622935039,
+SNULL, 140474606153727, 140474622935039,
+STORE, 140474606149632, 140474606153727,
+STORE, 140474606153728, 140474622935039,
+STORE, 140474597756928, 140474606149631,
+SNULL, 140474597761023, 140474606149631,
+STORE, 140474597756928, 140474597761023,
+STORE, 140474597761024, 140474606149631,
+SNULL, 140475134631936, 140475143020543,
+STORE, 140475143020544, 140475151413247,
+STORE, 140475134631936, 140475143020543,
+SNULL, 140475143024639, 140475151413247,
+STORE, 140475143020544, 140475143024639,
+STORE, 140475143024640, 140475151413247,
+STORE, 140474589364224, 140474597756927,
+SNULL, 140474606153728, 140474614542335,
+STORE, 140474614542336, 140474622935039,
+STORE, 140474606153728, 140474614542335,
+SNULL, 140474614546431, 140474622935039,
+STORE, 140474614542336, 140474614546431,
+STORE, 140474614546432, 140474622935039,
+SNULL, 140474765516799, 140474773905407,
+STORE, 140474765512704, 140474765516799,
+STORE, 140474765516800, 140474773905407,
+STORE, 140474580971520, 140474597756927,
+SNULL, 140474773909504, 140474782298111,
+STORE, 140474782298112, 140474799083519,
+STORE, 140474773909504, 140474782298111,
+SNULL, 140474782302207, 140474799083519,
+STORE, 140474782298112, 140474782302207,
+STORE, 140474782302208, 140474799083519,
+SNULL, 140474471936000, 140474480324607,
+STORE, 140474480324608, 140474488717311,
+STORE, 140474471936000, 140474480324607,
+SNULL, 140474480328703, 140474488717311,
+STORE, 140474480324608, 140474480328703,
+STORE, 140474480328704, 140474488717311,
+STORE, 140474572578816, 140474597756927,
+SNULL, 140474572582911, 140474597756927,
+STORE, 140474572578816, 140474572582911,
+STORE, 140474572582912, 140474597756927,
+SNULL, 140474782302208, 140474790690815,
+STORE, 140474790690816, 140474799083519,
+STORE, 140474782302208, 140474790690815,
+SNULL, 140474790694911, 140474799083519,
+STORE, 140474790690816, 140474790694911,
+STORE, 140474790694912, 140474799083519,
+STORE, 140474564186112, 140474572578815,
+STORE, 140474421575680, 140474429968383,
+STORE, 140474396430336, 140474404823039,
+SNULL, 140474396434431, 140474404823039,
+STORE, 140474396430336, 140474396434431,
+STORE, 140474396434432, 140474404823039,
+STORE, 140474388037632, 140474396430335,
+SNULL, 140474799087616, 140474807476223,
+STORE, 140474807476224, 140474815868927,
+STORE, 140474799087616, 140474807476223,
+SNULL, 140474807480319, 140474815868927,
+STORE, 140474807476224, 140474807480319,
+STORE, 140474807480320, 140474815868927,
+SNULL, 140475101061119, 140475109449727,
+STORE, 140475101057024, 140475101061119,
+STORE, 140475101061120, 140475109449727,
+STORE, 140474379644928, 140474396430335,
+SNULL, 140474572582912, 140474589364223,
+STORE, 140474589364224, 140474597756927,
+STORE, 140474572582912, 140474589364223,
+SNULL, 140474589368319, 140474597756927,
+STORE, 140474589364224, 140474589368319,
+STORE, 140474589368320, 140474597756927,
+STORE, 140474371252224, 140474396430335,
+STORE, 140474362859520, 140474396430335,
+STORE, 140474278998016, 140474287390719,
+STORE, 140474270605312, 140474287390719,
+STORE, 140474262212608, 140474287390719,
+SNULL, 140474262216703, 140474287390719,
+STORE, 140474262212608, 140474262216703,
+STORE, 140474262216704, 140474287390719,
+STORE, 140474253819904, 140474262212607,
+SNULL, 140474253823999, 140474262212607,
+STORE, 140474253819904, 140474253823999,
+STORE, 140474253824000, 140474262212607,
+SNULL, 140474362859520, 140474388037631,
+STORE, 140474388037632, 140474396430335,
+STORE, 140474362859520, 140474388037631,
+SNULL, 140474388041727, 140474396430335,
+STORE, 140474388037632, 140474388041727,
+STORE, 140474388041728, 140474396430335,
+SNULL, 140474362859520, 140474379644927,
+STORE, 140474379644928, 140474388037631,
+STORE, 140474362859520, 140474379644927,
+SNULL, 140474379649023, 140474388037631,
+STORE, 140474379644928, 140474379649023,
+STORE, 140474379649024, 140474388037631,
+STORE, 140474245427200, 140474253819903,
+STORE, 140474237034496, 140474253819903,
+STORE, 140474228641792, 140474253819903,
+STORE, 140474144780288, 140474153172991,
+SNULL, 140474228645887, 140474253819903,
+STORE, 140474228641792, 140474228645887,
+STORE, 140474228645888, 140474253819903,
+SNULL, 140474564190207, 140474572578815,
+STORE, 140474564186112, 140474564190207,
+STORE, 140474564190208, 140474572578815,
+STORE, 140474136387584, 140474153172991,
+SNULL, 140474362859520, 140474371252223,
+STORE, 140474371252224, 140474379644927,
+STORE, 140474362859520, 140474371252223,
+SNULL, 140474371256319, 140474379644927,
+STORE, 140474371252224, 140474371256319,
+STORE, 140474371256320, 140474379644927,
+STORE, 140474127994880, 140474153172991,
+STORE, 140474119602176, 140474153172991,
+SNULL, 140474421579775, 140474429968383,
+STORE, 140474421575680, 140474421579775,
+STORE, 140474421579776, 140474429968383,
+STORE, 140474111209472, 140474153172991,
+SNULL, 140474111213567, 140474153172991,
+STORE, 140474111209472, 140474111213567,
+STORE, 140474111213568, 140474153172991,
+SNULL, 140474262216704, 140474270605311,
+STORE, 140474270605312, 140474287390719,
+STORE, 140474262216704, 140474270605311,
+SNULL, 140474270609407, 140474287390719,
+STORE, 140474270605312, 140474270609407,
+STORE, 140474270609408, 140474287390719,
+STORE, 140474102816768, 140474111209471,
+SNULL, 140474102820863, 140474111209471,
+STORE, 140474102816768, 140474102820863,
+STORE, 140474102820864, 140474111209471,
+SNULL, 140474270609408, 140474278998015,
+STORE, 140474278998016, 140474287390719,
+STORE, 140474270609408, 140474278998015,
+SNULL, 140474279002111, 140474287390719,
+STORE, 140474278998016, 140474279002111,
+STORE, 140474279002112, 140474287390719,
+STORE, 140474094424064, 140474102816767,
+SNULL, 140474572582912, 140474580971519,
+STORE, 140474580971520, 140474589364223,
+STORE, 140474572582912, 140474580971519,
+SNULL, 140474580975615, 140474589364223,
+STORE, 140474580971520, 140474580975615,
+STORE, 140474580975616, 140474589364223,
+SNULL, 140474362863615, 140474371252223,
+STORE, 140474362859520, 140474362863615,
+STORE, 140474362863616, 140474371252223,
+STORE, 140474010562560, 140474018955263,
+SNULL, 140474228645888, 140474245427199,
+STORE, 140474245427200, 140474253819903,
+STORE, 140474228645888, 140474245427199,
+SNULL, 140474245431295, 140474253819903,
+STORE, 140474245427200, 140474245431295,
+STORE, 140474245431296, 140474253819903,
+SNULL, 140474111213568, 140474136387583,
+STORE, 140474136387584, 140474153172991,
+STORE, 140474111213568, 140474136387583,
+SNULL, 140474136391679, 140474153172991,
+STORE, 140474136387584, 140474136391679,
+STORE, 140474136391680, 140474153172991,
+STORE, 140474002169856, 140474018955263,
+STORE, 140473993777152, 140474018955263,
+SNULL, 140474111213568, 140474127994879,
+STORE, 140474127994880, 140474136387583,
+STORE, 140474111213568, 140474127994879,
+SNULL, 140474127998975, 140474136387583,
+STORE, 140474127994880, 140474127998975,
+STORE, 140474127998976, 140474136387583,
+SNULL, 140474228645888, 140474237034495,
+STORE, 140474237034496, 140474245427199,
+STORE, 140474228645888, 140474237034495,
+SNULL, 140474237038591, 140474245427199,
+STORE, 140474237034496, 140474237038591,
+STORE, 140474237038592, 140474245427199,
+SNULL, 140474136391680, 140474144780287,
+STORE, 140474144780288, 140474153172991,
+STORE, 140474136391680, 140474144780287,
+SNULL, 140474144784383, 140474153172991,
+STORE, 140474144780288, 140474144784383,
+STORE, 140474144784384, 140474153172991,
+STORE, 140473985384448, 140474018955263,
+STORE, 140473976991744, 140474018955263,
+STORE, 140473968599040, 140474018955263,
+SNULL, 140473968603135, 140474018955263,
+STORE, 140473968599040, 140473968603135,
+STORE, 140473968603136, 140474018955263,
+SNULL, 140474111213568, 140474119602175,
+STORE, 140474119602176, 140474127994879,
+STORE, 140474111213568, 140474119602175,
+SNULL, 140474119606271, 140474127994879,
+STORE, 140474119602176, 140474119606271,
+STORE, 140474119606272, 140474127994879,
+STORE, 140473960206336, 140473968599039,
+SNULL, 140474094428159, 140474102816767,
+STORE, 140474094424064, 140474094428159,
+STORE, 140474094428160, 140474102816767,
+STORE, 140473876344832, 140473884737535,
+STORE, 140473867952128, 140473884737535,
+STORE, 140473859559424, 140473884737535,
+SNULL, 140473859563519, 140473884737535,
+STORE, 140473859559424, 140473859563519,
+STORE, 140473859563520, 140473884737535,
+SNULL, 140473968603136, 140473993777151,
+STORE, 140473993777152, 140474018955263,
+STORE, 140473968603136, 140473993777151,
+SNULL, 140473993781247, 140474018955263,
+STORE, 140473993777152, 140473993781247,
+STORE, 140473993781248, 140474018955263,
+SNULL, 140473960210431, 140473968599039,
+STORE, 140473960206336, 140473960210431,
+STORE, 140473960210432, 140473968599039,
+SNULL, 140473993781248, 140474010562559,
+STORE, 140474010562560, 140474018955263,
+STORE, 140473993781248, 140474010562559,
+SNULL, 140474010566655, 140474018955263,
+STORE, 140474010562560, 140474010566655,
+STORE, 140474010566656, 140474018955263,
+SNULL, 140473968603136, 140473985384447,
+STORE, 140473985384448, 140473993777151,
+STORE, 140473968603136, 140473985384447,
+SNULL, 140473985388543, 140473993777151,
+STORE, 140473985384448, 140473985388543,
+STORE, 140473985388544, 140473993777151,
+SNULL, 140473993781248, 140474002169855,
+STORE, 140474002169856, 140474010562559,
+STORE, 140473993781248, 140474002169855,
+SNULL, 140474002173951, 140474010562559,
+STORE, 140474002169856, 140474002173951,
+STORE, 140474002173952, 140474010562559,
+STORE, 140473851166720, 140473859559423,
+SNULL, 140473851170815, 140473859559423,
+STORE, 140473851166720, 140473851170815,
+STORE, 140473851170816, 140473859559423,
+SNULL, 140473968603136, 140473976991743,
+STORE, 140473976991744, 140473985384447,
+STORE, 140473968603136, 140473976991743,
+SNULL, 140473976995839, 140473985384447,
+STORE, 140473976991744, 140473976995839,
+STORE, 140473976995840, 140473985384447,
+STORE, 140473842774016, 140473851166719,
+SNULL, 140473859563520, 140473867952127,
+STORE, 140473867952128, 140473884737535,
+STORE, 140473859563520, 140473867952127,
+SNULL, 140473867956223, 140473884737535,
+STORE, 140473867952128, 140473867956223,
+STORE, 140473867956224, 140473884737535,
+SNULL, 140473867956224, 140473876344831,
+STORE, 140473876344832, 140473884737535,
+STORE, 140473867956224, 140473876344831,
+SNULL, 140473876348927, 140473884737535,
+STORE, 140473876344832, 140473876348927,
+STORE, 140473876348928, 140473884737535,
+STORE, 140473834381312, 140473851166719,
+SNULL, 140473834385407, 140473851166719,
+STORE, 140473834381312, 140473834385407,
+STORE, 140473834385408, 140473851166719,
+SNULL, 140473834385408, 140473842774015,
+STORE, 140473842774016, 140473851166719,
+STORE, 140473834385408, 140473842774015,
+SNULL, 140473842778111, 140473851166719,
+STORE, 140473842774016, 140473842778111,
+STORE, 140473842778112, 140473851166719,
+STORE, 140473825988608, 140473834381311,
+SNULL, 140473825992703, 140473834381311,
+STORE, 140473825988608, 140473825992703,
+STORE, 140473825992704, 140473834381311,
+STORE, 140475577475072, 140475577503743,
+STORE, 140475499917312, 140475502108671,
+SNULL, 140475499917312, 140475500007423,
+STORE, 140475500007424, 140475502108671,
+STORE, 140475499917312, 140475500007423,
+SNULL, 140475502100479, 140475502108671,
+STORE, 140475500007424, 140475502100479,
+STORE, 140475502100480, 140475502108671,
+ERASE, 140475502100480, 140475502108671,
+STORE, 140475502100480, 140475502108671,
+SNULL, 140475502104575, 140475502108671,
+STORE, 140475502100480, 140475502104575,
+STORE, 140475502104576, 140475502108671,
+ERASE, 140475577475072, 140475577503743,
+ERASE, 140475235274752, 140475235278847,
+ERASE, 140475235278848, 140475243667455,
+ERASE, 140474815868928, 140474815873023,
+ERASE, 140474815873024, 140474824261631,
+ERASE, 140474606149632, 140474606153727,
+ERASE, 140474606153728, 140474614542335,
+ERASE, 140474270605312, 140474270609407,
+ERASE, 140474270609408, 140474278998015,
+ERASE, 140474438361088, 140474438365183,
+ERASE, 140474438365184, 140474446753791,
+ERASE, 140474597756928, 140474597761023,
+ERASE, 140474597761024, 140474606149631,
+ERASE, 140475126235136, 140475126239231,
+ERASE, 140475126239232, 140475134627839,
+ERASE, 140474463539200, 140474463543295,
+ERASE, 140474463543296, 140474471931903,
+ERASE, 140474388037632, 140474388041727,
+ERASE, 140474388041728, 140474396430335,
+ERASE, 140474404823040, 140474404827135,
+ERASE, 140474404827136, 140474413215743,
+ERASE, 140474278998016, 140474279002111,
+ERASE, 140474279002112, 140474287390719,
+ERASE, 140474094424064, 140474094428159,
+ERASE, 140474094428160, 140474102816767,
+ERASE, 140473867952128, 140473867956223,
+ERASE, 140473867956224, 140473876344831,
+ERASE, 140475151413248, 140475151417343,
+ERASE, 140475151417344, 140475159805951,
+ERASE, 140474455146496, 140474455150591,
+ERASE, 140474455150592, 140474463539199,
+ERASE, 140474807476224, 140474807480319,
+ERASE, 140474807480320, 140474815868927,
+ERASE, 140475117842432, 140475117846527,
+ERASE, 140475117846528, 140475126235135,
+ERASE, 140474446753792, 140474446757887,
+ERASE, 140474446757888, 140474455146495,
+ERASE, 140474429968384, 140474429972479,
+ERASE, 140474429972480, 140474438361087,
+ERASE, 140474782298112, 140474782302207,
+ERASE, 140474782302208, 140474790690815,
+ERASE, 140474136387584, 140474136391679,
+ERASE, 140474136391680, 140474144780287,
+ERASE, 140474002169856, 140474002173951,
+ERASE, 140474002173952, 140474010562559,
+ERASE, 140475134627840, 140475134631935,
+ERASE, 140475134631936, 140475143020543,
+ERASE, 140474471931904, 140474471935999,
+ERASE, 140474471936000, 140474480324607,
+ERASE, 140474396430336, 140474396434431,
+ERASE, 140474396434432, 140474404823039,
+ };
+ static const unsigned long set36[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140723893125120, 140737488351231,
+SNULL, 140723893129215, 140737488351231,
+STORE, 140723893125120, 140723893129215,
+STORE, 140723892994048, 140723893129215,
+STORE, 94076829786112, 94076832038911,
+SNULL, 94076829917183, 94076832038911,
+STORE, 94076829786112, 94076829917183,
+STORE, 94076829917184, 94076832038911,
+ERASE, 94076829917184, 94076832038911,
+STORE, 94076832010240, 94076832018431,
+STORE, 94076832018432, 94076832038911,
+STORE, 140122444345344, 140122446598143,
+SNULL, 140122444488703, 140122446598143,
+STORE, 140122444345344, 140122444488703,
+STORE, 140122444488704, 140122446598143,
+ERASE, 140122444488704, 140122446598143,
+STORE, 140122446585856, 140122446594047,
+STORE, 140122446594048, 140122446598143,
+STORE, 140723893538816, 140723893542911,
+STORE, 140723893526528, 140723893538815,
+STORE, 140122446557184, 140122446585855,
+STORE, 140122446548992, 140122446557183,
+STORE, 140122442129408, 140122444345343,
+SNULL, 140122442129408, 140122442227711,
+STORE, 140122442227712, 140122444345343,
+STORE, 140122442129408, 140122442227711,
+SNULL, 140122444320767, 140122444345343,
+STORE, 140122442227712, 140122444320767,
+STORE, 140122444320768, 140122444345343,
+SNULL, 140122444320768, 140122444328959,
+STORE, 140122444328960, 140122444345343,
+STORE, 140122444320768, 140122444328959,
+ERASE, 140122444320768, 140122444328959,
+STORE, 140122444320768, 140122444328959,
+ERASE, 140122444328960, 140122444345343,
+STORE, 140122444328960, 140122444345343,
+STORE, 140122438332416, 140122442129407,
+SNULL, 140122438332416, 140122439991295,
+STORE, 140122439991296, 140122442129407,
+STORE, 140122438332416, 140122439991295,
+SNULL, 140122442088447, 140122442129407,
+STORE, 140122439991296, 140122442088447,
+STORE, 140122442088448, 140122442129407,
+SNULL, 140122442088448, 140122442113023,
+STORE, 140122442113024, 140122442129407,
+STORE, 140122442088448, 140122442113023,
+ERASE, 140122442088448, 140122442113023,
+STORE, 140122442088448, 140122442113023,
+ERASE, 140122442113024, 140122442129407,
+STORE, 140122442113024, 140122442129407,
+STORE, 140122446540800, 140122446557183,
+SNULL, 140122442104831, 140122442113023,
+STORE, 140122442088448, 140122442104831,
+STORE, 140122442104832, 140122442113023,
+SNULL, 140122444324863, 140122444328959,
+STORE, 140122444320768, 140122444324863,
+STORE, 140122444324864, 140122444328959,
+SNULL, 94076832014335, 94076832018431,
+STORE, 94076832010240, 94076832014335,
+STORE, 94076832014336, 94076832018431,
+SNULL, 140122446589951, 140122446594047,
+STORE, 140122446585856, 140122446589951,
+STORE, 140122446589952, 140122446594047,
+ERASE, 140122446557184, 140122446585855,
+STORE, 94076845723648, 94076845858815,
+STORE, 140122429939712, 140122438332415,
+SNULL, 140122429943807, 140122438332415,
+STORE, 140122429939712, 140122429943807,
+STORE, 140122429943808, 140122438332415,
+STORE, 140122421547008, 140122429939711,
+STORE, 140122287329280, 140122421547007,
+SNULL, 140122287329280, 140122301399039,
+STORE, 140122301399040, 140122421547007,
+STORE, 140122287329280, 140122301399039,
+ERASE, 140122287329280, 140122301399039,
+SNULL, 140122368507903, 140122421547007,
+STORE, 140122301399040, 140122368507903,
+STORE, 140122368507904, 140122421547007,
+ERASE, 140122368507904, 140122421547007,
+SNULL, 140122301534207, 140122368507903,
+STORE, 140122301399040, 140122301534207,
+STORE, 140122301534208, 140122368507903,
+SNULL, 140122421551103, 140122429939711,
+STORE, 140122421547008, 140122421551103,
+STORE, 140122421551104, 140122429939711,
+STORE, 140122413154304, 140122421547007,
+SNULL, 140122413158399, 140122421547007,
+STORE, 140122413154304, 140122413158399,
+STORE, 140122413158400, 140122421547007,
+STORE, 140122404761600, 140122413154303,
+SNULL, 140122404765695, 140122413154303,
+STORE, 140122404761600, 140122404765695,
+STORE, 140122404765696, 140122413154303,
+STORE, 140122396368896, 140122404761599,
+SNULL, 140122396372991, 140122404761599,
+STORE, 140122396368896, 140122396372991,
+STORE, 140122396372992, 140122404761599,
+STORE, 140122387976192, 140122396368895,
+STORE, 140122167181312, 140122301399039,
+SNULL, 140122234290175, 140122301399039,
+STORE, 140122167181312, 140122234290175,
+STORE, 140122234290176, 140122301399039,
+ERASE, 140122234290176, 140122301399039,
+SNULL, 140122167316479, 140122234290175,
+STORE, 140122167181312, 140122167316479,
+STORE, 140122167316480, 140122234290175,
+STORE, 140122379583488, 140122396368895,
+STORE, 140122371190784, 140122396368895,
+STORE, 140122167316480, 140122301399039,
+STORE, 140122158788608, 140122167181311,
+SNULL, 140122371190784, 140122387976191,
+STORE, 140122387976192, 140122396368895,
+STORE, 140122371190784, 140122387976191,
+SNULL, 140122387980287, 140122396368895,
+STORE, 140122387976192, 140122387980287,
+STORE, 140122387980288, 140122396368895,
+SNULL, 140122167316480, 140122234290175,
+STORE, 140122234290176, 140122301399039,
+STORE, 140122167316480, 140122234290175,
+SNULL, 140122234425343, 140122301399039,
+STORE, 140122234290176, 140122234425343,
+STORE, 140122234425344, 140122301399039,
+STORE, 140122024570880, 140122158788607,
+SNULL, 140122024570880, 140122032963583,
+STORE, 140122032963584, 140122158788607,
+STORE, 140122024570880, 140122032963583,
+ERASE, 140122024570880, 140122032963583,
+STORE, 140121898745856, 140122158788607,
+STORE, 140121890353152, 140121898745855,
+SNULL, 140122100072447, 140122158788607,
+STORE, 140121898745856, 140122100072447,
+STORE, 140122100072448, 140122158788607,
+ERASE, 140122100072448, 140122158788607,
+SNULL, 140121965854719, 140122100072447,
+STORE, 140121898745856, 140121965854719,
+STORE, 140121965854720, 140122100072447,
+SNULL, 140121965854720, 140122032963583,
+STORE, 140122032963584, 140122100072447,
+STORE, 140121965854720, 140122032963583,
+ERASE, 140121965854720, 140122032963583,
+SNULL, 140121898881023, 140121965854719,
+STORE, 140121898745856, 140121898881023,
+STORE, 140121898881024, 140121965854719,
+SNULL, 140121890357247, 140121898745855,
+STORE, 140121890353152, 140121890357247,
+STORE, 140121890357248, 140121898745855,
+SNULL, 140122371190784, 140122379583487,
+STORE, 140122379583488, 140122387976191,
+STORE, 140122371190784, 140122379583487,
+SNULL, 140122379587583, 140122387976191,
+STORE, 140122379583488, 140122379587583,
+STORE, 140122379587584, 140122387976191,
+SNULL, 140122033098751, 140122100072447,
+STORE, 140122032963584, 140122033098751,
+STORE, 140122033098752, 140122100072447,
+SNULL, 140122158792703, 140122167181311,
+STORE, 140122158788608, 140122158792703,
+STORE, 140122158792704, 140122167181311,
+STORE, 140122150395904, 140122158788607,
+STORE, 140122142003200, 140122158788607,
+SNULL, 140122142007295, 140122158788607,
+STORE, 140122142003200, 140122142007295,
+STORE, 140122142007296, 140122158788607,
+SNULL, 140122371194879, 140122379583487,
+STORE, 140122371190784, 140122371194879,
+STORE, 140122371194880, 140122379583487,
+SNULL, 140122142007296, 140122150395903,
+STORE, 140122150395904, 140122158788607,
+STORE, 140122142007296, 140122150395903,
+SNULL, 140122150399999, 140122158788607,
+STORE, 140122150395904, 140122150399999,
+STORE, 140122150400000, 140122158788607,
+STORE, 140122133610496, 140122142003199,
+STORE, 140122125217792, 140122142003199,
+STORE, 140122116825088, 140122142003199,
+SNULL, 140122116829183, 140122142003199,
+STORE, 140122116825088, 140122116829183,
+STORE, 140122116829184, 140122142003199,
+SNULL, 140122116829184, 140122133610495,
+STORE, 140122133610496, 140122142003199,
+STORE, 140122116829184, 140122133610495,
+SNULL, 140122133614591, 140122142003199,
+STORE, 140122133610496, 140122133614591,
+STORE, 140122133614592, 140122142003199,
+SNULL, 140122116829184, 140122125217791,
+STORE, 140122125217792, 140122133610495,
+STORE, 140122116829184, 140122125217791,
+SNULL, 140122125221887, 140122133610495,
+STORE, 140122125217792, 140122125221887,
+STORE, 140122125221888, 140122133610495,
+STORE, 140122108432384, 140122116825087,
+SNULL, 140122108436479, 140122116825087,
+STORE, 140122108432384, 140122108436479,
+STORE, 140122108436480, 140122116825087,
+STORE, 140122024570880, 140122032963583,
+STORE, 140122016178176, 140122032963583,
+SNULL, 140122016182271, 140122032963583,
+STORE, 140122016178176, 140122016182271,
+STORE, 140122016182272, 140122032963583,
+SNULL, 140122016182272, 140122024570879,
+STORE, 140122024570880, 140122032963583,
+STORE, 140122016182272, 140122024570879,
+SNULL, 140122024574975, 140122032963583,
+STORE, 140122024570880, 140122024574975,
+STORE, 140122024574976, 140122032963583,
+STORE, 140122007785472, 140122016178175,
+SNULL, 140122007789567, 140122016178175,
+STORE, 140122007785472, 140122007789567,
+STORE, 140122007789568, 140122016178175,
+STORE, 140121999392768, 140122007785471,
+STORE, 140121991000064, 140122007785471,
+SNULL, 140121991004159, 140122007785471,
+STORE, 140121991000064, 140121991004159,
+STORE, 140121991004160, 140122007785471,
+SNULL, 140121991004160, 140121999392767,
+STORE, 140121999392768, 140122007785471,
+STORE, 140121991004160, 140121999392767,
+SNULL, 140121999396863, 140122007785471,
+STORE, 140121999392768, 140121999396863,
+STORE, 140121999396864, 140122007785471,
+STORE, 140121982607360, 140121991000063,
+STORE, 140121823244288, 140121890353151,
+ERASE, 140121823244288, 140121890353151,
+STORE, 140121756135424, 140121890353151,
+SNULL, 140121756135424, 140121764528127,
+STORE, 140121764528128, 140121890353151,
+STORE, 140121756135424, 140121764528127,
+ERASE, 140121756135424, 140121764528127,
+SNULL, 140121831636991, 140121890353151,
+STORE, 140121764528128, 140121831636991,
+STORE, 140121831636992, 140121890353151,
+ERASE, 140121831636992, 140121890353151,
+STORE, 140121974214656, 140121991000063,
+STORE, 140121630310400, 140121831636991,
+SNULL, 140121697419263, 140121831636991,
+STORE, 140121630310400, 140121697419263,
+STORE, 140121697419264, 140121831636991,
+SNULL, 140121697419264, 140121764528127,
+STORE, 140121764528128, 140121831636991,
+STORE, 140121697419264, 140121764528127,
+ERASE, 140121697419264, 140121764528127,
+STORE, 140121881960448, 140121890353151,
+STORE, 140121630310400, 140121831636991,
+STORE, 140121873567744, 140121890353151,
+SNULL, 140121630310400, 140121697419263,
+STORE, 140121697419264, 140121831636991,
+STORE, 140121630310400, 140121697419263,
+SNULL, 140121697554431, 140121831636991,
+STORE, 140121697419264, 140121697554431,
+STORE, 140121697554432, 140121831636991,
+STORE, 140121865175040, 140121890353151,
+STORE, 140121856782336, 140121890353151,
+STORE, 140121848389632, 140121890353151,
+STORE, 140121839996928, 140121890353151,
+STORE, 140121496092672, 140121697419263,
+STORE, 140121487699968, 140121496092671,
+STORE, 140121420591104, 140121487699967,
+STORE, 140121412198400, 140121420591103,
+ERASE, 140121420591104, 140121487699967,
+STORE, 140121479307264, 140121496092671,
+STORE, 140121277980672, 140121412198399,
+SNULL, 140121277980672, 140121294766079,
+STORE, 140121294766080, 140121412198399,
+STORE, 140121277980672, 140121294766079,
+ERASE, 140121277980672, 140121294766079,
+STORE, 140121470914560, 140121496092671,
+STORE, 140121462521856, 140121496092671,
+STORE, 140121160548352, 140121412198399,
+STORE, 140121454129152, 140121496092671,
+SNULL, 140121227657215, 140121412198399,
+STORE, 140121160548352, 140121227657215,
+STORE, 140121227657216, 140121412198399,
+SNULL, 140121227657216, 140121294766079,
+STORE, 140121294766080, 140121412198399,
+STORE, 140121227657216, 140121294766079,
+ERASE, 140121227657216, 140121294766079,
+STORE, 140121445736448, 140121496092671,
+STORE, 140121437343744, 140121496092671,
+SNULL, 140121437343744, 140121445736447,
+STORE, 140121445736448, 140121496092671,
+STORE, 140121437343744, 140121445736447,
+SNULL, 140121445740543, 140121496092671,
+STORE, 140121445736448, 140121445740543,
+STORE, 140121445740544, 140121496092671,
+SNULL, 140121697554432, 140121764528127,
+STORE, 140121764528128, 140121831636991,
+STORE, 140121697554432, 140121764528127,
+SNULL, 140121764663295, 140121831636991,
+STORE, 140121764528128, 140121764663295,
+STORE, 140121764663296, 140121831636991,
+SNULL, 140121496092672, 140121630310399,
+STORE, 140121630310400, 140121697419263,
+STORE, 140121496092672, 140121630310399,
+SNULL, 140121630445567, 140121697419263,
+STORE, 140121630310400, 140121630445567,
+STORE, 140121630445568, 140121697419263,
+SNULL, 140121445740544, 140121454129151,
+STORE, 140121454129152, 140121496092671,
+STORE, 140121445740544, 140121454129151,
+SNULL, 140121454133247, 140121496092671,
+STORE, 140121454129152, 140121454133247,
+STORE, 140121454133248, 140121496092671,
+STORE, 140121026330624, 140121227657215,
+SNULL, 140121093439487, 140121227657215,
+STORE, 140121026330624, 140121093439487,
+STORE, 140121093439488, 140121227657215,
+SNULL, 140121093439488, 140121160548351,
+STORE, 140121160548352, 140121227657215,
+STORE, 140121093439488, 140121160548351,
+ERASE, 140121093439488, 140121160548351,
+SNULL, 140121563201535, 140121630310399,
+STORE, 140121496092672, 140121563201535,
+STORE, 140121563201536, 140121630310399,
+ERASE, 140121563201536, 140121630310399,
+STORE, 140120892112896, 140121093439487,
+SNULL, 140120959221759, 140121093439487,
+STORE, 140120892112896, 140120959221759,
+STORE, 140120959221760, 140121093439487,
+SNULL, 140120959221760, 140121026330623,
+STORE, 140121026330624, 140121093439487,
+STORE, 140120959221760, 140121026330623,
+ERASE, 140120959221760, 140121026330623,
+STORE, 140120757895168, 140120959221759,
+SNULL, 140121361874943, 140121412198399,
+STORE, 140121294766080, 140121361874943,
+STORE, 140121361874944, 140121412198399,
+ERASE, 140121361874944, 140121412198399,
+SNULL, 140121294901247, 140121361874943,
+STORE, 140121294766080, 140121294901247,
+STORE, 140121294901248, 140121361874943,
+STORE, 140120623677440, 140120959221759,
+SNULL, 140120690786303, 140120959221759,
+STORE, 140120623677440, 140120690786303,
+STORE, 140120690786304, 140120959221759,
+SNULL, 140120690786304, 140120757895167,
+STORE, 140120757895168, 140120959221759,
+STORE, 140120690786304, 140120757895167,
+ERASE, 140120690786304, 140120757895167,
+SNULL, 140121160683519, 140121227657215,
+STORE, 140121160548352, 140121160683519,
+STORE, 140121160683520, 140121227657215,
+SNULL, 140121974214656, 140121982607359,
+STORE, 140121982607360, 140121991000063,
+STORE, 140121974214656, 140121982607359,
+SNULL, 140121982611455, 140121991000063,
+STORE, 140121982607360, 140121982611455,
+STORE, 140121982611456, 140121991000063,
+SNULL, 140121839996928, 140121873567743,
+STORE, 140121873567744, 140121890353151,
+STORE, 140121839996928, 140121873567743,
+SNULL, 140121873571839, 140121890353151,
+STORE, 140121873567744, 140121873571839,
+STORE, 140121873571840, 140121890353151,
+SNULL, 140121873571840, 140121881960447,
+STORE, 140121881960448, 140121890353151,
+STORE, 140121873571840, 140121881960447,
+SNULL, 140121881964543, 140121890353151,
+STORE, 140121881960448, 140121881964543,
+STORE, 140121881964544, 140121890353151,
+SNULL, 140121840001023, 140121873567743,
+STORE, 140121839996928, 140121840001023,
+STORE, 140121840001024, 140121873567743,
+SNULL, 140121840001024, 140121865175039,
+STORE, 140121865175040, 140121873567743,
+STORE, 140121840001024, 140121865175039,
+SNULL, 140121865179135, 140121873567743,
+STORE, 140121865175040, 140121865179135,
+STORE, 140121865179136, 140121873567743,
+SNULL, 140121437347839, 140121445736447,
+STORE, 140121437343744, 140121437347839,
+STORE, 140121437347840, 140121445736447,
+STORE, 140121621917696, 140121630310399,
+STORE, 140121613524992, 140121630310399,
+SNULL, 140121026465791, 140121093439487,
+STORE, 140121026330624, 140121026465791,
+STORE, 140121026465792, 140121093439487,
+SNULL, 140121496227839, 140121563201535,
+STORE, 140121496092672, 140121496227839,
+STORE, 140121496227840, 140121563201535,
+SNULL, 140120757895168, 140120892112895,
+STORE, 140120892112896, 140120959221759,
+STORE, 140120757895168, 140120892112895,
+SNULL, 140120892248063, 140120959221759,
+STORE, 140120892112896, 140120892248063,
+STORE, 140120892248064, 140120959221759,
+SNULL, 140120825004031, 140120892112895,
+STORE, 140120757895168, 140120825004031,
+STORE, 140120825004032, 140120892112895,
+ERASE, 140120825004032, 140120892112895,
+SNULL, 140120623812607, 140120690786303,
+STORE, 140120623677440, 140120623812607,
+STORE, 140120623812608, 140120690786303,
+SNULL, 140120758030335, 140120825004031,
+STORE, 140120757895168, 140120758030335,
+STORE, 140120758030336, 140120825004031,
+SNULL, 140121454133248, 140121462521855,
+STORE, 140121462521856, 140121496092671,
+STORE, 140121454133248, 140121462521855,
+SNULL, 140121462525951, 140121496092671,
+STORE, 140121462521856, 140121462525951,
+STORE, 140121462525952, 140121496092671,
+STORE, 140121605132288, 140121630310399,
+SNULL, 140121605136383, 140121630310399,
+STORE, 140121605132288, 140121605136383,
+STORE, 140121605136384, 140121630310399,
+STORE, 140121596739584, 140121605132287,
+SNULL, 140121605136384, 140121621917695,
+STORE, 140121621917696, 140121630310399,
+STORE, 140121605136384, 140121621917695,
+SNULL, 140121621921791, 140121630310399,
+STORE, 140121621917696, 140121621921791,
+STORE, 140121621921792, 140121630310399,
+STORE, 140121588346880, 140121605132287,
+STORE, 140121579954176, 140121605132287,
+SNULL, 140121412202495, 140121420591103,
+STORE, 140121412198400, 140121412202495,
+STORE, 140121412202496, 140121420591103,
+SNULL, 140121974218751, 140121982607359,
+STORE, 140121974214656, 140121974218751,
+STORE, 140121974218752, 140121982607359,
+SNULL, 140121462525952, 140121479307263,
+STORE, 140121479307264, 140121496092671,
+STORE, 140121462525952, 140121479307263,
+SNULL, 140121479311359, 140121496092671,
+STORE, 140121479307264, 140121479311359,
+STORE, 140121479311360, 140121496092671,
+STORE, 140121571561472, 140121605132287,
+SNULL, 140121571565567, 140121605132287,
+STORE, 140121571561472, 140121571565567,
+STORE, 140121571565568, 140121605132287,
+STORE, 140121428951040, 140121437343743,
+SNULL, 140121428955135, 140121437343743,
+STORE, 140121428951040, 140121428955135,
+STORE, 140121428955136, 140121437343743,
+SNULL, 140121840001024, 140121856782335,
+STORE, 140121856782336, 140121865175039,
+STORE, 140121840001024, 140121856782335,
+SNULL, 140121856786431, 140121865175039,
+STORE, 140121856782336, 140121856786431,
+STORE, 140121856786432, 140121865175039,
+STORE, 140121403805696, 140121412198399,
+SNULL, 140121840001024, 140121848389631,
+STORE, 140121848389632, 140121856782335,
+STORE, 140121840001024, 140121848389631,
+SNULL, 140121848393727, 140121856782335,
+STORE, 140121848389632, 140121848393727,
+STORE, 140121848393728, 140121856782335,
+SNULL, 140121479311360, 140121487699967,
+STORE, 140121487699968, 140121496092671,
+STORE, 140121479311360, 140121487699967,
+SNULL, 140121487704063, 140121496092671,
+STORE, 140121487699968, 140121487704063,
+STORE, 140121487704064, 140121496092671,
+STORE, 140121395412992, 140121412198399,
+STORE, 140121387020288, 140121412198399,
+SNULL, 140121387024383, 140121412198399,
+STORE, 140121387020288, 140121387024383,
+STORE, 140121387024384, 140121412198399,
+SNULL, 140121605136384, 140121613524991,
+STORE, 140121613524992, 140121621917695,
+STORE, 140121605136384, 140121613524991,
+SNULL, 140121613529087, 140121621917695,
+STORE, 140121613524992, 140121613529087,
+STORE, 140121613529088, 140121621917695,
+SNULL, 140121462525952, 140121470914559,
+STORE, 140121470914560, 140121479307263,
+STORE, 140121462525952, 140121470914559,
+SNULL, 140121470918655, 140121479307263,
+STORE, 140121470914560, 140121470918655,
+STORE, 140121470918656, 140121479307263,
+STORE, 140121378627584, 140121387020287,
+SNULL, 140121378631679, 140121387020287,
+STORE, 140121378627584, 140121378631679,
+STORE, 140121378631680, 140121387020287,
+SNULL, 140121571565568, 140121596739583,
+STORE, 140121596739584, 140121605132287,
+STORE, 140121571565568, 140121596739583,
+SNULL, 140121596743679, 140121605132287,
+STORE, 140121596739584, 140121596743679,
+STORE, 140121596743680, 140121605132287,
+SNULL, 140121387024384, 140121403805695,
+STORE, 140121403805696, 140121412198399,
+STORE, 140121387024384, 140121403805695,
+SNULL, 140121403809791, 140121412198399,
+STORE, 140121403805696, 140121403809791,
+STORE, 140121403809792, 140121412198399,
+STORE, 140121370234880, 140121378627583,
+SNULL, 140121387024384, 140121395412991,
+STORE, 140121395412992, 140121403805695,
+STORE, 140121387024384, 140121395412991,
+SNULL, 140121395417087, 140121403805695,
+STORE, 140121395412992, 140121395417087,
+STORE, 140121395417088, 140121403805695,
+SNULL, 140121571565568, 140121588346879,
+STORE, 140121588346880, 140121596739583,
+STORE, 140121571565568, 140121588346879,
+SNULL, 140121588350975, 140121596739583,
+STORE, 140121588346880, 140121588350975,
+STORE, 140121588350976, 140121596739583,
+SNULL, 140121571565568, 140121579954175,
+STORE, 140121579954176, 140121588346879,
+STORE, 140121571565568, 140121579954175,
+SNULL, 140121579958271, 140121588346879,
+STORE, 140121579954176, 140121579958271,
+STORE, 140121579958272, 140121588346879,
+STORE, 140121286373376, 140121294766079,
+STORE, 140121277980672, 140121294766079,
+SNULL, 140121277980672, 140121286373375,
+STORE, 140121286373376, 140121294766079,
+STORE, 140121277980672, 140121286373375,
+SNULL, 140121286377471, 140121294766079,
+STORE, 140121286373376, 140121286377471,
+STORE, 140121286377472, 140121294766079,
+STORE, 140121269587968, 140121286373375,
+STORE, 140121261195264, 140121286373375,
+SNULL, 140121261195264, 140121269587967,
+STORE, 140121269587968, 140121286373375,
+STORE, 140121261195264, 140121269587967,
+SNULL, 140121269592063, 140121286373375,
+STORE, 140121269587968, 140121269592063,
+STORE, 140121269592064, 140121286373375,
+STORE, 140121252802560, 140121269587967,
+SNULL, 140121252806655, 140121269587967,
+STORE, 140121252802560, 140121252806655,
+STORE, 140121252806656, 140121269587967,
+STORE, 140121244409856, 140121252802559,
+STORE, 140121236017152, 140121252802559,
+SNULL, 140121236017152, 140121244409855,
+STORE, 140121244409856, 140121252802559,
+STORE, 140121236017152, 140121244409855,
+SNULL, 140121244413951, 140121252802559,
+STORE, 140121244409856, 140121244413951,
+STORE, 140121244413952, 140121252802559,
+SNULL, 140121370238975, 140121378627583,
+STORE, 140121370234880, 140121370238975,
+STORE, 140121370238976, 140121378627583,
+STORE, 140121152155648, 140121160548351,
+STORE, 140121143762944, 140121160548351,
+STORE, 140121135370240, 140121160548351,
+SNULL, 140121135374335, 140121160548351,
+STORE, 140121135370240, 140121135374335,
+STORE, 140121135374336, 140121160548351,
+STORE, 140121126977536, 140121135370239,
+STORE, 140121118584832, 140121135370239,
+STORE, 140121110192128, 140121135370239,
+SNULL, 140121110192128, 140121118584831,
+STORE, 140121118584832, 140121135370239,
+STORE, 140121110192128, 140121118584831,
+SNULL, 140121118588927, 140121135370239,
+STORE, 140121118584832, 140121118588927,
+STORE, 140121118588928, 140121135370239,
+STORE, 140121101799424, 140121118584831,
+STORE, 140121017937920, 140121026330623,
+STORE, 140121009545216, 140121026330623,
+SNULL, 140121009545216, 140121017937919,
+STORE, 140121017937920, 140121026330623,
+STORE, 140121009545216, 140121017937919,
+SNULL, 140121017942015, 140121026330623,
+STORE, 140121017937920, 140121017942015,
+STORE, 140121017942016, 140121026330623,
+SNULL, 140121269592064, 140121277980671,
+STORE, 140121277980672, 140121286373375,
+STORE, 140121269592064, 140121277980671,
+SNULL, 140121277984767, 140121286373375,
+STORE, 140121277980672, 140121277984767,
+STORE, 140121277984768, 140121286373375,
+STORE, 140121001152512, 140121017937919,
+SNULL, 140121252806656, 140121261195263,
+STORE, 140121261195264, 140121269587967,
+STORE, 140121252806656, 140121261195263,
+SNULL, 140121261199359, 140121269587967,
+STORE, 140121261195264, 140121261199359,
+STORE, 140121261199360, 140121269587967,
+SNULL, 140121135374336, 140121152155647,
+STORE, 140121152155648, 140121160548351,
+STORE, 140121135374336, 140121152155647,
+SNULL, 140121152159743, 140121160548351,
+STORE, 140121152155648, 140121152159743,
+STORE, 140121152159744, 140121160548351,
+STORE, 140120992759808, 140121017937919,
+STORE, 140120984367104, 140121017937919,
+STORE, 140120975974400, 140121017937919,
+SNULL, 140121101799424, 140121110192127,
+STORE, 140121110192128, 140121118584831,
+STORE, 140121101799424, 140121110192127,
+SNULL, 140121110196223, 140121118584831,
+STORE, 140121110192128, 140121110196223,
+STORE, 140121110196224, 140121118584831,
+SNULL, 140121118588928, 140121126977535,
+STORE, 140121126977536, 140121135370239,
+STORE, 140121118588928, 140121126977535,
+SNULL, 140121126981631, 140121135370239,
+STORE, 140121126977536, 140121126981631,
+STORE, 140121126981632, 140121135370239,
+STORE, 140120967581696, 140121017937919,
+STORE, 140120883720192, 140120892112895,
+SNULL, 140120883724287, 140120892112895,
+STORE, 140120883720192, 140120883724287,
+STORE, 140120883724288, 140120892112895,
+STORE, 140120875327488, 140120883720191,
+SNULL, 140121101803519, 140121110192127,
+STORE, 140121101799424, 140121101803519,
+STORE, 140121101803520, 140121110192127,
+SNULL, 140121135374336, 140121143762943,
+STORE, 140121143762944, 140121152155647,
+STORE, 140121135374336, 140121143762943,
+SNULL, 140121143767039, 140121152155647,
+STORE, 140121143762944, 140121143767039,
+STORE, 140121143767040, 140121152155647,
+STORE, 140120866934784, 140120883720191,
+SNULL, 140120967581696, 140120984367103,
+STORE, 140120984367104, 140121017937919,
+STORE, 140120967581696, 140120984367103,
+SNULL, 140120984371199, 140121017937919,
+STORE, 140120984367104, 140120984371199,
+STORE, 140120984371200, 140121017937919,
+STORE, 140120858542080, 140120883720191,
+SNULL, 140121236021247, 140121244409855,
+STORE, 140121236017152, 140121236021247,
+STORE, 140121236021248, 140121244409855,
+SNULL, 140120984371200, 140121009545215,
+STORE, 140121009545216, 140121017937919,
+STORE, 140120984371200, 140121009545215,
+SNULL, 140121009549311, 140121017937919,
+STORE, 140121009545216, 140121009549311,
+STORE, 140121009549312, 140121017937919,
+SNULL, 140120984371200, 140120992759807,
+STORE, 140120992759808, 140121009545215,
+STORE, 140120984371200, 140120992759807,
+SNULL, 140120992763903, 140121009545215,
+STORE, 140120992759808, 140120992763903,
+STORE, 140120992763904, 140121009545215,
+SNULL, 140120992763904, 140121001152511,
+STORE, 140121001152512, 140121009545215,
+STORE, 140120992763904, 140121001152511,
+SNULL, 140121001156607, 140121009545215,
+STORE, 140121001152512, 140121001156607,
+STORE, 140121001156608, 140121009545215,
+STORE, 140120850149376, 140120883720191,
+SNULL, 140120850153471, 140120883720191,
+STORE, 140120850149376, 140120850153471,
+STORE, 140120850153472, 140120883720191,
+SNULL, 140120967585791, 140120984367103,
+STORE, 140120967581696, 140120967585791,
+STORE, 140120967585792, 140120984367103,
+SNULL, 140120850153472, 140120866934783,
+STORE, 140120866934784, 140120883720191,
+STORE, 140120850153472, 140120866934783,
+SNULL, 140120866938879, 140120883720191,
+STORE, 140120866934784, 140120866938879,
+STORE, 140120866938880, 140120883720191,
+STORE, 140120841756672, 140120850149375,
+SNULL, 140120967585792, 140120975974399,
+STORE, 140120975974400, 140120984367103,
+STORE, 140120967585792, 140120975974399,
+SNULL, 140120975978495, 140120984367103,
+STORE, 140120975974400, 140120975978495,
+STORE, 140120975978496, 140120984367103,
+SNULL, 140120866938880, 140120875327487,
+STORE, 140120875327488, 140120883720191,
+STORE, 140120866938880, 140120875327487,
+SNULL, 140120875331583, 140120883720191,
+STORE, 140120875327488, 140120875331583,
+STORE, 140120875331584, 140120883720191,
+STORE, 140120833363968, 140120850149375,
+STORE, 140120749502464, 140120757895167,
+STORE, 140120741109760, 140120757895167,
+STORE, 140120732717056, 140120757895167,
+STORE, 140120724324352, 140120757895167,
+SNULL, 140120724324352, 140120732717055,
+STORE, 140120732717056, 140120757895167,
+STORE, 140120724324352, 140120732717055,
+SNULL, 140120732721151, 140120757895167,
+STORE, 140120732717056, 140120732721151,
+STORE, 140120732721152, 140120757895167,
+STORE, 140120715931648, 140120732717055,
+SNULL, 140120715935743, 140120732717055,
+STORE, 140120715931648, 140120715935743,
+STORE, 140120715935744, 140120732717055,
+SNULL, 140120850153472, 140120858542079,
+STORE, 140120858542080, 140120866934783,
+STORE, 140120850153472, 140120858542079,
+SNULL, 140120858546175, 140120866934783,
+STORE, 140120858542080, 140120858546175,
+STORE, 140120858546176, 140120866934783,
+STORE, 140120707538944, 140120715931647,
+SNULL, 140120707543039, 140120715931647,
+STORE, 140120707538944, 140120707543039,
+STORE, 140120707543040, 140120715931647,
+SNULL, 140120833368063, 140120850149375,
+STORE, 140120833363968, 140120833368063,
+STORE, 140120833368064, 140120850149375,
+SNULL, 140120833368064, 140120841756671,
+STORE, 140120841756672, 140120850149375,
+STORE, 140120833368064, 140120841756671,
+SNULL, 140120841760767, 140120850149375,
+STORE, 140120841756672, 140120841760767,
+STORE, 140120841760768, 140120850149375,
+STORE, 140120699146240, 140120707538943,
+SNULL, 140120715935744, 140120724324351,
+STORE, 140120724324352, 140120732717055,
+STORE, 140120715935744, 140120724324351,
+SNULL, 140120724328447, 140120732717055,
+STORE, 140120724324352, 140120724328447,
+STORE, 140120724328448, 140120732717055,
+SNULL, 140120732721152, 140120741109759,
+STORE, 140120741109760, 140120757895167,
+STORE, 140120732721152, 140120741109759,
+SNULL, 140120741113855, 140120757895167,
+STORE, 140120741109760, 140120741113855,
+STORE, 140120741113856, 140120757895167,
+SNULL, 140120741113856, 140120749502463,
+STORE, 140120749502464, 140120757895167,
+STORE, 140120741113856, 140120749502463,
+SNULL, 140120749506559, 140120757895167,
+STORE, 140120749502464, 140120749506559,
+STORE, 140120749506560, 140120757895167,
+SNULL, 140120699150335, 140120707538943,
+STORE, 140120699146240, 140120699150335,
+STORE, 140120699150336, 140120707538943,
+STORE, 140122446557184, 140122446585855,
+STORE, 140122368999424, 140122371190783,
+SNULL, 140122368999424, 140122369089535,
+STORE, 140122369089536, 140122371190783,
+STORE, 140122368999424, 140122369089535,
+SNULL, 140122371182591, 140122371190783,
+STORE, 140122369089536, 140122371182591,
+STORE, 140122371182592, 140122371190783,
+ERASE, 140122371182592, 140122371190783,
+STORE, 140122371182592, 140122371190783,
+SNULL, 140122371186687, 140122371190783,
+STORE, 140122371182592, 140122371186687,
+STORE, 140122371186688, 140122371190783,
+ERASE, 140122446557184, 140122446585855,
+ERASE, 140121445736448, 140121445740543,
+ERASE, 140121445740544, 140121454129151,
+ERASE, 140121621917696, 140121621921791,
+ERASE, 140121621921792, 140121630310399,
+ERASE, 140121579954176, 140121579958271,
+ERASE, 140121579958272, 140121588346879,
+ERASE, 140121261195264, 140121261199359,
+ERASE, 140121261199360, 140121269587967,
+ERASE, 140121454129152, 140121454133247,
+ERASE, 140121454133248, 140121462521855,
+ERASE, 140121588346880, 140121588350975,
+ERASE, 140121588350976, 140121596739583,
+ERASE, 140121135370240, 140121135374335,
+ERASE, 140121135374336, 140121143762943,
+ERASE, 140121881960448, 140121881964543,
+ERASE, 140121881964544, 140121890353151,
+ERASE, 140121428951040, 140121428955135,
+ERASE, 140121428955136, 140121437343743,
+ERASE, 140121387020288, 140121387024383,
+ERASE, 140121387024384, 140121395412991,
+ERASE, 140121487699968, 140121487704063,
+ERASE, 140121487704064, 140121496092671,
+ERASE, 140121437343744, 140121437347839,
+ERASE, 140121437347840, 140121445736447,
+ERASE, 140121613524992, 140121613529087,
+ERASE, 140121613529088, 140121621917695,
+ERASE, 140121856782336, 140121856786431,
+ERASE, 140121856786432, 140121865175039,
+ERASE, 140121252802560, 140121252806655,
+ERASE, 140121252806656, 140121261195263,
+ERASE, 140121839996928, 140121840001023,
+ERASE, 140121840001024, 140121848389631,
+ERASE, 140121596739584, 140121596743679,
+ERASE, 140121596743680, 140121605132287,
+ERASE, 140121009545216, 140121009549311,
+ERASE, 140121009549312, 140121017937919,
+ERASE, 140120724324352, 140120724328447,
+ERASE, 140120724328448, 140120732717055,
+ERASE, 140120883720192, 140120883724287,
+ERASE, 140120883724288, 140120892112895,
+ERASE, 140121982607360, 140121982611455,
+ERASE, 140121982611456, 140121991000063,
+ERASE, 140121571561472, 140121571565567,
+ERASE, 140121571565568, 140121579954175,
+ERASE, 140121286373376, 140121286377471,
+ERASE, 140121286377472, 140121294766079,
+ERASE, 140120875327488, 140120875331583,
+ERASE, 140120875331584, 140120883720191,
+ERASE, 140121848389632, 140121848393727,
+ERASE, 140121848393728, 140121856782335,
+ERASE, 140121370234880, 140121370238975,
+ERASE, 140121370238976, 140121378627583,
+ERASE, 140121143762944, 140121143767039,
+ERASE, 140121143767040, 140121152155647,
+ERASE, 140121118584832, 140121118588927,
+ERASE, 140121118588928, 140121126977535,
+ERASE, 140120866934784, 140120866938879,
+ERASE, 140120866938880, 140120875327487,
+ERASE, 140120741109760, 140120741113855,
+ERASE, 140120741113856, 140120749502463,
+ERASE, 140121865175040, 140121865179135,
+ERASE, 140121865179136, 140121873567743,
+ERASE, 140121403805696, 140121403809791,
+ERASE, 140121403809792, 140121412198399,
+ERASE, 140121236017152, 140121236021247,
+ERASE, 140121236021248, 140121244409855,
+ERASE, 140120732717056, 140120732721151,
+ERASE, 140120732721152, 140120741109759,
+ERASE, 140121017937920, 140121017942015,
+ERASE, 140121017942016, 140121026330623,
+ERASE, 140121873567744, 140121873571839,
+ERASE, 140121873571840, 140121881960447,
+ERASE, 140121470914560, 140121470918655,
+ERASE, 140121470918656, 140121479307263,
+ERASE, 140121126977536, 140121126981631,
+ERASE, 140121126981632, 140121135370239,
+ERASE, 140120850149376, 140120850153471,
+ERASE, 140120850153472, 140120858542079,
+ERASE, 140120707538944, 140120707543039,
+ERASE, 140120707543040, 140120715931647,
+ERASE, 140121479307264, 140121479311359,
+ERASE, 140121479311360, 140121487699967,
+ERASE, 140120967581696, 140120967585791,
+ERASE, 140120967585792, 140120975974399,
+ERASE, 140120841756672, 140120841760767,
+ERASE, 140120841760768, 140120850149375,
+ERASE, 140121412198400, 140121412202495,
+ERASE, 140121412202496, 140121420591103,
+ERASE, 140122158788608, 140122158792703,
+ERASE, 140122158792704, 140122167181311,
+ERASE, 140122142003200, 140122142007295,
+ERASE, 140122142007296, 140122150395903,
+ERASE, 140121101799424, 140121101803519,
+ERASE, 140121101803520, 140121110192127,
+ERASE, 140120858542080, 140120858546175,
+ERASE, 140120858546176, 140120866934783,
+ERASE, 140120833363968, 140120833368063,
+ERASE, 140120833368064, 140120841756671,
+ERASE, 140121277980672, 140121277984767,
+ERASE, 140121277984768, 140121286373375,
+ERASE, 140121001152512, 140121001156607,
+ERASE, 140121001156608, 140121009545215,
+ERASE, 140120749502464, 140120749506559,
+ERASE, 140120749506560, 140120757895167,
+ERASE, 140121605132288, 140121605136383,
+ERASE, 140121605136384, 140121613524991,
+ERASE, 140121378627584, 140121378631679,
+ERASE, 140121378631680, 140121387020287,
+ERASE, 140121110192128, 140121110196223,
+ERASE, 140121110196224, 140121118584831,
+ERASE, 140121462521856, 140121462525951,
+ERASE, 140121462525952, 140121470914559,
+ERASE, 140121395412992, 140121395417087,
+ERASE, 140121395417088, 140121403805695,
+ERASE, 140121152155648, 140121152159743,
+ERASE, 140121152159744, 140121160548351,
+ERASE, 140120992759808, 140120992763903,
+ERASE, 140120992763904, 140121001152511,
+ERASE, 140122387976192, 140122387980287,
+ERASE, 140122387980288, 140122396368895,
+ERASE, 140121890353152, 140121890357247,
+ERASE, 140121890357248, 140121898745855,
+ERASE, 140121269587968, 140121269592063,
+ERASE, 140121269592064, 140121277980671,
+ };
+ static const unsigned long set37[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140722404016128, 140737488351231,
+SNULL, 140722404020223, 140737488351231,
+STORE, 140722404016128, 140722404020223,
+STORE, 140722403885056, 140722404020223,
+STORE, 94637010001920, 94637012254719,
+SNULL, 94637010132991, 94637012254719,
+STORE, 94637010001920, 94637010132991,
+STORE, 94637010132992, 94637012254719,
+ERASE, 94637010132992, 94637012254719,
+STORE, 94637012226048, 94637012234239,
+STORE, 94637012234240, 94637012254719,
+STORE, 139760240594944, 139760242847743,
+SNULL, 139760240738303, 139760242847743,
+STORE, 139760240594944, 139760240738303,
+STORE, 139760240738304, 139760242847743,
+ERASE, 139760240738304, 139760242847743,
+STORE, 139760242835456, 139760242843647,
+STORE, 139760242843648, 139760242847743,
+STORE, 140722405232640, 140722405236735,
+STORE, 140722405220352, 140722405232639,
+STORE, 139760242806784, 139760242835455,
+STORE, 139760242798592, 139760242806783,
+STORE, 139760238379008, 139760240594943,
+SNULL, 139760238379008, 139760238477311,
+STORE, 139760238477312, 139760240594943,
+STORE, 139760238379008, 139760238477311,
+SNULL, 139760240570367, 139760240594943,
+STORE, 139760238477312, 139760240570367,
+STORE, 139760240570368, 139760240594943,
+SNULL, 139760240570368, 139760240578559,
+STORE, 139760240578560, 139760240594943,
+STORE, 139760240570368, 139760240578559,
+ERASE, 139760240570368, 139760240578559,
+STORE, 139760240570368, 139760240578559,
+ERASE, 139760240578560, 139760240594943,
+STORE, 139760240578560, 139760240594943,
+STORE, 139760234582016, 139760238379007,
+SNULL, 139760234582016, 139760236240895,
+STORE, 139760236240896, 139760238379007,
+STORE, 139760234582016, 139760236240895,
+SNULL, 139760238338047, 139760238379007,
+STORE, 139760236240896, 139760238338047,
+STORE, 139760238338048, 139760238379007,
+SNULL, 139760238338048, 139760238362623,
+STORE, 139760238362624, 139760238379007,
+STORE, 139760238338048, 139760238362623,
+ERASE, 139760238338048, 139760238362623,
+STORE, 139760238338048, 139760238362623,
+ERASE, 139760238362624, 139760238379007,
+STORE, 139760238362624, 139760238379007,
+STORE, 139760242790400, 139760242806783,
+SNULL, 139760238354431, 139760238362623,
+STORE, 139760238338048, 139760238354431,
+STORE, 139760238354432, 139760238362623,
+SNULL, 139760240574463, 139760240578559,
+STORE, 139760240570368, 139760240574463,
+STORE, 139760240574464, 139760240578559,
+SNULL, 94637012230143, 94637012234239,
+STORE, 94637012226048, 94637012230143,
+STORE, 94637012230144, 94637012234239,
+SNULL, 139760242839551, 139760242843647,
+STORE, 139760242835456, 139760242839551,
+STORE, 139760242839552, 139760242843647,
+ERASE, 139760242806784, 139760242835455,
+STORE, 94637033324544, 94637033459711,
+STORE, 139760226189312, 139760234582015,
+SNULL, 139760226193407, 139760234582015,
+STORE, 139760226189312, 139760226193407,
+STORE, 139760226193408, 139760234582015,
+STORE, 139760217796608, 139760226189311,
+STORE, 139760083578880, 139760217796607,
+SNULL, 139760083578880, 139760114860031,
+STORE, 139760114860032, 139760217796607,
+STORE, 139760083578880, 139760114860031,
+ERASE, 139760083578880, 139760114860031,
+SNULL, 139760181968895, 139760217796607,
+STORE, 139760114860032, 139760181968895,
+STORE, 139760181968896, 139760217796607,
+ERASE, 139760181968896, 139760217796607,
+SNULL, 139760114995199, 139760181968895,
+STORE, 139760114860032, 139760114995199,
+STORE, 139760114995200, 139760181968895,
+SNULL, 139760217800703, 139760226189311,
+STORE, 139760217796608, 139760217800703,
+STORE, 139760217800704, 139760226189311,
+STORE, 139760209403904, 139760217796607,
+SNULL, 139760209407999, 139760217796607,
+STORE, 139760209403904, 139760209407999,
+STORE, 139760209408000, 139760217796607,
+STORE, 139760201011200, 139760209403903,
+SNULL, 139760201015295, 139760209403903,
+STORE, 139760201011200, 139760201015295,
+STORE, 139760201015296, 139760209403903,
+STORE, 139760192618496, 139760201011199,
+SNULL, 139760192622591, 139760201011199,
+STORE, 139760192618496, 139760192622591,
+STORE, 139760192622592, 139760201011199,
+STORE, 139760184225792, 139760192618495,
+STORE, 139759980642304, 139760114860031,
+STORE, 139759972249600, 139759980642303,
+STORE, 139759963856896, 139759980642303,
+STORE, 139759955464192, 139759980642303,
+STORE, 139759888355328, 139759955464191,
+SNULL, 139760047751167, 139760114860031,
+STORE, 139759980642304, 139760047751167,
+STORE, 139760047751168, 139760114860031,
+ERASE, 139760047751168, 139760114860031,
+SNULL, 139759980777471, 139760047751167,
+STORE, 139759980642304, 139759980777471,
+STORE, 139759980777472, 139760047751167,
+STORE, 139759980777472, 139760114860031,
+SNULL, 139759980777472, 139760047751167,
+STORE, 139760047751168, 139760114860031,
+STORE, 139759980777472, 139760047751167,
+SNULL, 139760047886335, 139760114860031,
+STORE, 139760047751168, 139760047886335,
+STORE, 139760047886336, 139760114860031,
+STORE, 139759821246464, 139759955464191,
+SNULL, 139759821246464, 139759888355327,
+STORE, 139759888355328, 139759955464191,
+STORE, 139759821246464, 139759888355327,
+ERASE, 139759821246464, 139759888355327,
+ERASE, 139759888355328, 139759955464191,
+ };
+ static const unsigned long set38[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140730666221568, 140737488351231,
+SNULL, 140730666225663, 140737488351231,
+STORE, 140730666221568, 140730666225663,
+STORE, 140730666090496, 140730666225663,
+STORE, 94177584803840, 94177587056639,
+SNULL, 94177584934911, 94177587056639,
+STORE, 94177584803840, 94177584934911,
+STORE, 94177584934912, 94177587056639,
+ERASE, 94177584934912, 94177587056639,
+STORE, 94177587027968, 94177587036159,
+STORE, 94177587036160, 94177587056639,
+STORE, 140614382714880, 140614384967679,
+SNULL, 140614382858239, 140614384967679,
+STORE, 140614382714880, 140614382858239,
+STORE, 140614382858240, 140614384967679,
+ERASE, 140614382858240, 140614384967679,
+STORE, 140614384955392, 140614384963583,
+STORE, 140614384963584, 140614384967679,
+STORE, 140730666315776, 140730666319871,
+STORE, 140730666303488, 140730666315775,
+STORE, 140614384926720, 140614384955391,
+STORE, 140614384918528, 140614384926719,
+STORE, 140614380498944, 140614382714879,
+SNULL, 140614380498944, 140614380597247,
+STORE, 140614380597248, 140614382714879,
+STORE, 140614380498944, 140614380597247,
+SNULL, 140614382690303, 140614382714879,
+STORE, 140614380597248, 140614382690303,
+STORE, 140614382690304, 140614382714879,
+SNULL, 140614382690304, 140614382698495,
+STORE, 140614382698496, 140614382714879,
+STORE, 140614382690304, 140614382698495,
+ERASE, 140614382690304, 140614382698495,
+STORE, 140614382690304, 140614382698495,
+ERASE, 140614382698496, 140614382714879,
+STORE, 140614382698496, 140614382714879,
+STORE, 140614376701952, 140614380498943,
+SNULL, 140614376701952, 140614378360831,
+STORE, 140614378360832, 140614380498943,
+STORE, 140614376701952, 140614378360831,
+SNULL, 140614380457983, 140614380498943,
+STORE, 140614378360832, 140614380457983,
+STORE, 140614380457984, 140614380498943,
+SNULL, 140614380457984, 140614380482559,
+STORE, 140614380482560, 140614380498943,
+STORE, 140614380457984, 140614380482559,
+ERASE, 140614380457984, 140614380482559,
+STORE, 140614380457984, 140614380482559,
+ERASE, 140614380482560, 140614380498943,
+STORE, 140614380482560, 140614380498943,
+STORE, 140614384910336, 140614384926719,
+SNULL, 140614380474367, 140614380482559,
+STORE, 140614380457984, 140614380474367,
+STORE, 140614380474368, 140614380482559,
+SNULL, 140614382694399, 140614382698495,
+STORE, 140614382690304, 140614382694399,
+STORE, 140614382694400, 140614382698495,
+SNULL, 94177587032063, 94177587036159,
+STORE, 94177587027968, 94177587032063,
+STORE, 94177587032064, 94177587036159,
+SNULL, 140614384959487, 140614384963583,
+STORE, 140614384955392, 140614384959487,
+STORE, 140614384959488, 140614384963583,
+ERASE, 140614384926720, 140614384955391,
+STORE, 94177619791872, 94177619927039,
+STORE, 140614368309248, 140614376701951,
+SNULL, 140614368313343, 140614376701951,
+STORE, 140614368309248, 140614368313343,
+STORE, 140614368313344, 140614376701951,
+STORE, 140614359916544, 140614368309247,
+STORE, 140614225698816, 140614359916543,
+SNULL, 140614225698816, 140614276481023,
+STORE, 140614276481024, 140614359916543,
+STORE, 140614225698816, 140614276481023,
+ERASE, 140614225698816, 140614276481023,
+SNULL, 140614343589887, 140614359916543,
+STORE, 140614276481024, 140614343589887,
+STORE, 140614343589888, 140614359916543,
+ERASE, 140614343589888, 140614359916543,
+SNULL, 140614276616191, 140614343589887,
+STORE, 140614276481024, 140614276616191,
+STORE, 140614276616192, 140614343589887,
+SNULL, 140614359920639, 140614368309247,
+STORE, 140614359916544, 140614359920639,
+STORE, 140614359920640, 140614368309247,
+STORE, 140614351523840, 140614359916543,
+SNULL, 140614351527935, 140614359916543,
+STORE, 140614351523840, 140614351527935,
+STORE, 140614351527936, 140614359916543,
+STORE, 140614268088320, 140614276481023,
+SNULL, 140614268092415, 140614276481023,
+STORE, 140614268088320, 140614268092415,
+STORE, 140614268092416, 140614276481023,
+STORE, 140614259695616, 140614268088319,
+SNULL, 140614259699711, 140614268088319,
+STORE, 140614259695616, 140614259699711,
+STORE, 140614259699712, 140614268088319,
+STORE, 140614251302912, 140614259695615,
+STORE, 140614242910208, 140614259695615,
+STORE, 140614108692480, 140614242910207,
+SNULL, 140614108692480, 140614142263295,
+STORE, 140614142263296, 140614242910207,
+STORE, 140614108692480, 140614142263295,
+ERASE, 140614108692480, 140614142263295,
+STORE, 140614133870592, 140614142263295,
+STORE, 140613999652864, 140614133870591,
+SNULL, 140613999652864, 140614008045567,
+STORE, 140614008045568, 140614133870591,
+STORE, 140613999652864, 140614008045567,
+ERASE, 140613999652864, 140614008045567,
+STORE, 140613999652864, 140614008045567,
+STORE, 140613865435136, 140613999652863,
+SNULL, 140613865435136, 140613873827839,
+STORE, 140613873827840, 140613999652863,
+STORE, 140613865435136, 140613873827839,
+ERASE, 140613865435136, 140613873827839,
+SNULL, 140614209372159, 140614242910207,
+STORE, 140614142263296, 140614209372159,
+STORE, 140614209372160, 140614242910207,
+ERASE, 140614209372160, 140614242910207,
+SNULL, 140614142398463, 140614209372159,
+STORE, 140614142263296, 140614142398463,
+STORE, 140614142398464, 140614209372159,
+SNULL, 140614075154431, 140614133870591,
+STORE, 140614008045568, 140614075154431,
+STORE, 140614075154432, 140614133870591,
+ERASE, 140614075154432, 140614133870591,
+SNULL, 140614008180735, 140614075154431,
+STORE, 140614008045568, 140614008180735,
+STORE, 140614008180736, 140614075154431,
+SNULL, 140613940936703, 140613999652863,
+STORE, 140613873827840, 140613940936703,
+STORE, 140613940936704, 140613999652863,
+ERASE, 140613940936704, 140613999652863,
+SNULL, 140614242914303, 140614259695615,
+STORE, 140614242910208, 140614242914303,
+STORE, 140614242914304, 140614259695615,
+STORE, 140613739610112, 140613940936703,
+STORE, 140614234517504, 140614242910207,
+SNULL, 140614242914304, 140614251302911,
+STORE, 140614251302912, 140614259695615,
+STORE, 140614242914304, 140614251302911,
+SNULL, 140614251307007, 140614259695615,
+STORE, 140614251302912, 140614251307007,
+STORE, 140614251307008, 140614259695615,
+SNULL, 140613739610112, 140613873827839,
+STORE, 140613873827840, 140613940936703,
+STORE, 140613739610112, 140613873827839,
+SNULL, 140613873963007, 140613940936703,
+STORE, 140613873827840, 140613873963007,
+STORE, 140613873963008, 140613940936703,
+SNULL, 140614133874687, 140614142263295,
+STORE, 140614133870592, 140614133874687,
+STORE, 140614133874688, 140614142263295,
+SNULL, 140613806718975, 140613873827839,
+STORE, 140613739610112, 140613806718975,
+STORE, 140613806718976, 140613873827839,
+ERASE, 140613806718976, 140613873827839,
+STORE, 140614226124800, 140614242910207,
+SNULL, 140613739745279, 140613806718975,
+STORE, 140613739610112, 140613739745279,
+STORE, 140613739745280, 140613806718975,
+SNULL, 140613999656959, 140614008045567,
+STORE, 140613999652864, 140613999656959,
+STORE, 140613999656960, 140614008045567,
+SNULL, 140614226124800, 140614234517503,
+STORE, 140614234517504, 140614242910207,
+STORE, 140614226124800, 140614234517503,
+SNULL, 140614234521599, 140614242910207,
+STORE, 140614234517504, 140614234521599,
+STORE, 140614234521600, 140614242910207,
+STORE, 140614217732096, 140614234517503,
+STORE, 140614125477888, 140614133870591,
+SNULL, 140614125481983, 140614133870591,
+STORE, 140614125477888, 140614125481983,
+STORE, 140614125481984, 140614133870591,
+STORE, 140614117085184, 140614125477887,
+SNULL, 140614217736191, 140614234517503,
+STORE, 140614217732096, 140614217736191,
+STORE, 140614217736192, 140614234517503,
+SNULL, 140614117089279, 140614125477887,
+STORE, 140614117085184, 140614117089279,
+STORE, 140614117089280, 140614125477887,
+SNULL, 140614217736192, 140614226124799,
+STORE, 140614226124800, 140614234517503,
+STORE, 140614217736192, 140614226124799,
+SNULL, 140614226128895, 140614234517503,
+STORE, 140614226124800, 140614226128895,
+STORE, 140614226128896, 140614234517503,
+STORE, 140614108692480, 140614117085183,
+STORE, 140614100299776, 140614117085183,
+STORE, 140614091907072, 140614117085183,
+SNULL, 140614091907072, 140614108692479,
+STORE, 140614108692480, 140614117085183,
+STORE, 140614091907072, 140614108692479,
+SNULL, 140614108696575, 140614117085183,
+STORE, 140614108692480, 140614108696575,
+STORE, 140614108696576, 140614117085183,
+SNULL, 140614091907072, 140614100299775,
+STORE, 140614100299776, 140614108692479,
+STORE, 140614091907072, 140614100299775,
+SNULL, 140614100303871, 140614108692479,
+STORE, 140614100299776, 140614100303871,
+STORE, 140614100303872, 140614108692479,
+STORE, 140614083514368, 140614100299775,
+SNULL, 140614083518463, 140614100299775,
+STORE, 140614083514368, 140614083518463,
+STORE, 140614083518464, 140614100299775,
+STORE, 140613991260160, 140613999652863,
+SNULL, 140614083518464, 140614091907071,
+STORE, 140614091907072, 140614100299775,
+STORE, 140614083518464, 140614091907071,
+SNULL, 140614091911167, 140614100299775,
+STORE, 140614091907072, 140614091911167,
+STORE, 140614091911168, 140614100299775,
+SNULL, 140613991264255, 140613999652863,
+STORE, 140613991260160, 140613991264255,
+STORE, 140613991264256, 140613999652863,
+STORE, 140613982867456, 140613991260159,
+SNULL, 140613982871551, 140613991260159,
+STORE, 140613982867456, 140613982871551,
+STORE, 140613982871552, 140613991260159,
+STORE, 140613974474752, 140613982867455,
+SNULL, 140613974478847, 140613982867455,
+STORE, 140613974474752, 140613974478847,
+STORE, 140613974478848, 140613982867455,
+STORE, 140613966082048, 140613974474751,
+STORE, 140613739745280, 140613873827839,
+SNULL, 140613739745280, 140613806718975,
+STORE, 140613806718976, 140613873827839,
+STORE, 140613739745280, 140613806718975,
+SNULL, 140613806854143, 140613873827839,
+STORE, 140613806718976, 140613806854143,
+STORE, 140613806854144, 140613873827839,
+SNULL, 140613966086143, 140613974474751,
+STORE, 140613966082048, 140613966086143,
+STORE, 140613966086144, 140613974474751,
+STORE, 140613957689344, 140613966082047,
+STORE, 140613605392384, 140613739610111,
+STORE, 140613949296640, 140613966082047,
+STORE, 140613596999680, 140613605392383,
+STORE, 140613529890816, 140613596999679,
+STORE, 140613521498112, 140613529890815,
+STORE, 140613513105408, 140613529890815,
+STORE, 140613378887680, 140613513105407,
+SNULL, 140613378887680, 140613404065791,
+STORE, 140613404065792, 140613513105407,
+STORE, 140613378887680, 140613404065791,
+ERASE, 140613378887680, 140613404065791,
+STORE, 140613395673088, 140613404065791,
+STORE, 140613261455360, 140613395673087,
+SNULL, 140613261455360, 140613269848063,
+STORE, 140613269848064, 140613395673087,
+STORE, 140613261455360, 140613269848063,
+ERASE, 140613261455360, 140613269848063,
+STORE, 140613261455360, 140613269848063,
+STORE, 140613253062656, 140613269848063,
+STORE, 140613118844928, 140613253062655,
+STORE, 140613110452224, 140613118844927,
+SNULL, 140613118844928, 140613135630335,
+STORE, 140613135630336, 140613253062655,
+STORE, 140613118844928, 140613135630335,
+ERASE, 140613118844928, 140613135630335,
+STORE, 140613127237632, 140613135630335,
+STORE, 140613110452224, 140613135630335,
+STORE, 140612976234496, 140613110452223,
+STORE, 140612967841792, 140612976234495,
+STORE, 140612833624064, 140612967841791,
+STORE, 140612825231360, 140612833624063,
+STORE, 140612816838656, 140612833624063,
+STORE, 140612682620928, 140612816838655,
+STORE, 140612674228224, 140612682620927,
+SNULL, 140612682620928, 140612732977151,
+STORE, 140612732977152, 140612816838655,
+STORE, 140612682620928, 140612732977151,
+ERASE, 140612682620928, 140612732977151,
+SNULL, 140613672501247, 140613739610111,
+STORE, 140613605392384, 140613672501247,
+STORE, 140613672501248, 140613739610111,
+ERASE, 140613672501248, 140613739610111,
+SNULL, 140613605527551, 140613672501247,
+STORE, 140613605392384, 140613605527551,
+STORE, 140613605527552, 140613672501247,
+ERASE, 140613529890816, 140613596999679,
+STORE, 140612540010496, 140612674228223,
+SNULL, 140612540010496, 140612598759423,
+STORE, 140612598759424, 140612674228223,
+STORE, 140612540010496, 140612598759423,
+ERASE, 140612540010496, 140612598759423,
+SNULL, 140613471174655, 140613513105407,
+STORE, 140613404065792, 140613471174655,
+STORE, 140613471174656, 140613513105407,
+ERASE, 140613471174656, 140613513105407,
+SNULL, 140613404200959, 140613471174655,
+STORE, 140613404065792, 140613404200959,
+STORE, 140613404200960, 140613471174655,
+SNULL, 140613336956927, 140613395673087,
+STORE, 140613269848064, 140613336956927,
+STORE, 140613336956928, 140613395673087,
+ERASE, 140613336956928, 140613395673087,
+SNULL, 140612833624064, 140612867194879,
+STORE, 140612867194880, 140612967841791,
+STORE, 140612833624064, 140612867194879,
+ERASE, 140612833624064, 140612867194879,
+SNULL, 140612976234496, 140613001412607,
+STORE, 140613001412608, 140613110452223,
+STORE, 140612976234496, 140613001412607,
+ERASE, 140612976234496, 140613001412607,
+SNULL, 140613202739199, 140613253062655,
+STORE, 140613135630336, 140613202739199,
+STORE, 140613202739200, 140613253062655,
+ERASE, 140613202739200, 140613253062655,
+SNULL, 140613135765503, 140613202739199,
+STORE, 140613135630336, 140613135765503,
+STORE, 140613135765504, 140613202739199,
+SNULL, 140612816842751, 140612833624063,
+STORE, 140612816838656, 140612816842751,
+STORE, 140612816842752, 140612833624063,
+SNULL, 140613110456319, 140613135630335,
+STORE, 140613110452224, 140613110456319,
+STORE, 140613110456320, 140613135630335,
+SNULL, 140613949300735, 140613966082047,
+STORE, 140613949296640, 140613949300735,
+STORE, 140613949300736, 140613966082047,
+SNULL, 140613110456320, 140613118844927,
+STORE, 140613118844928, 140613135630335,
+STORE, 140613110456320, 140613118844927,
+SNULL, 140613118849023, 140613135630335,
+STORE, 140613118844928, 140613118849023,
+STORE, 140613118849024, 140613135630335,
+SNULL, 140612800086015, 140612816838655,
+STORE, 140612732977152, 140612800086015,
+STORE, 140612800086016, 140612816838655,
+ERASE, 140612800086016, 140612816838655,
+SNULL, 140613253062656, 140613261455359,
+STORE, 140613261455360, 140613269848063,
+STORE, 140613253062656, 140613261455359,
+SNULL, 140613261459455, 140613269848063,
+STORE, 140613261455360, 140613261459455,
+STORE, 140613261459456, 140613269848063,
+SNULL, 140612674232319, 140612682620927,
+STORE, 140612674228224, 140612674232319,
+STORE, 140612674232320, 140612682620927,
+STORE, 140613731217408, 140613739610111,
+STORE, 140613722824704, 140613739610111,
+SNULL, 140613949300736, 140613957689343,
+STORE, 140613957689344, 140613966082047,
+STORE, 140613949300736, 140613957689343,
+SNULL, 140613957693439, 140613966082047,
+STORE, 140613957689344, 140613957693439,
+STORE, 140613957693440, 140613966082047,
+STORE, 140612464541696, 140612674228223,
+SNULL, 140612531650559, 140612674228223,
+STORE, 140612464541696, 140612531650559,
+STORE, 140612531650560, 140612674228223,
+SNULL, 140612531650560, 140612598759423,
+STORE, 140612598759424, 140612674228223,
+STORE, 140612531650560, 140612598759423,
+ERASE, 140612531650560, 140612598759423,
+SNULL, 140612665868287, 140612674228223,
+STORE, 140612598759424, 140612665868287,
+STORE, 140612665868288, 140612674228223,
+ERASE, 140612665868288, 140612674228223,
+SNULL, 140613269983231, 140613336956927,
+STORE, 140613269848064, 140613269983231,
+STORE, 140613269983232, 140613336956927,
+SNULL, 140612934303743, 140612967841791,
+STORE, 140612867194880, 140612934303743,
+STORE, 140612934303744, 140612967841791,
+ERASE, 140612934303744, 140612967841791,
+SNULL, 140613068521471, 140613110452223,
+STORE, 140613001412608, 140613068521471,
+STORE, 140613068521472, 140613110452223,
+ERASE, 140613068521472, 140613110452223,
+STORE, 140613714432000, 140613739610111,
+SNULL, 140613001547775, 140613068521471,
+STORE, 140613001412608, 140613001547775,
+STORE, 140613001547776, 140613068521471,
+SNULL, 140612733112319, 140612800086015,
+STORE, 140612732977152, 140612733112319,
+STORE, 140612733112320, 140612800086015,
+SNULL, 140613513109503, 140613529890815,
+STORE, 140613513105408, 140613513109503,
+STORE, 140613513109504, 140613529890815,
+STORE, 140613706039296, 140613739610111,
+STORE, 140613697646592, 140613739610111,
+STORE, 140613689253888, 140613739610111,
+SNULL, 140613689257983, 140613739610111,
+STORE, 140613689253888, 140613689257983,
+STORE, 140613689257984, 140613739610111,
+SNULL, 140613253066751, 140613261455359,
+STORE, 140613253062656, 140613253066751,
+STORE, 140613253066752, 140613261455359,
+STORE, 140613680861184, 140613689253887,
+STORE, 140613588606976, 140613605392383,
+SNULL, 140613689257984, 140613731217407,
+STORE, 140613731217408, 140613739610111,
+STORE, 140613689257984, 140613731217407,
+SNULL, 140613731221503, 140613739610111,
+STORE, 140613731217408, 140613731221503,
+STORE, 140613731221504, 140613739610111,
+STORE, 140613580214272, 140613605392383,
+SNULL, 140612464676863, 140612531650559,
+STORE, 140612464541696, 140612464676863,
+STORE, 140612464676864, 140612531650559,
+SNULL, 140612598894591, 140612665868287,
+STORE, 140612598759424, 140612598894591,
+STORE, 140612598894592, 140612665868287,
+SNULL, 140612867330047, 140612934303743,
+STORE, 140612867194880, 140612867330047,
+STORE, 140612867330048, 140612934303743,
+STORE, 140613571821568, 140613605392383,
+SNULL, 140613571825663, 140613605392383,
+STORE, 140613571821568, 140613571825663,
+STORE, 140613571825664, 140613605392383,
+SNULL, 140613689257984, 140613722824703,
+STORE, 140613722824704, 140613731217407,
+STORE, 140613689257984, 140613722824703,
+SNULL, 140613722828799, 140613731217407,
+STORE, 140613722824704, 140613722828799,
+STORE, 140613722828800, 140613731217407,
+SNULL, 140613689257984, 140613714431999,
+STORE, 140613714432000, 140613722824703,
+STORE, 140613689257984, 140613714431999,
+SNULL, 140613714436095, 140613722824703,
+STORE, 140613714432000, 140613714436095,
+STORE, 140613714436096, 140613722824703,
+SNULL, 140612816842752, 140612825231359,
+STORE, 140612825231360, 140612833624063,
+STORE, 140612816842752, 140612825231359,
+SNULL, 140612825235455, 140612833624063,
+STORE, 140612825231360, 140612825235455,
+STORE, 140612825235456, 140612833624063,
+SNULL, 140613395677183, 140613404065791,
+STORE, 140613395673088, 140613395677183,
+STORE, 140613395677184, 140613404065791,
+SNULL, 140613689257984, 140613706039295,
+STORE, 140613706039296, 140613714431999,
+STORE, 140613689257984, 140613706039295,
+SNULL, 140613706043391, 140613714431999,
+STORE, 140613706039296, 140613706043391,
+STORE, 140613706043392, 140613714431999,
+SNULL, 140613118849024, 140613127237631,
+STORE, 140613127237632, 140613135630335,
+STORE, 140613118849024, 140613127237631,
+SNULL, 140613127241727, 140613135630335,
+STORE, 140613127237632, 140613127241727,
+STORE, 140613127241728, 140613135630335,
+SNULL, 140613571825664, 140613580214271,
+STORE, 140613580214272, 140613605392383,
+STORE, 140613571825664, 140613580214271,
+SNULL, 140613580218367, 140613605392383,
+STORE, 140613580214272, 140613580218367,
+STORE, 140613580218368, 140613605392383,
+SNULL, 140613689257984, 140613697646591,
+STORE, 140613697646592, 140613706039295,
+STORE, 140613689257984, 140613697646591,
+SNULL, 140613697650687, 140613706039295,
+STORE, 140613697646592, 140613697650687,
+STORE, 140613697650688, 140613706039295,
+SNULL, 140613680865279, 140613689253887,
+STORE, 140613680861184, 140613680865279,
+STORE, 140613680865280, 140613689253887,
+STORE, 140613563428864, 140613571821567,
+SNULL, 140613563432959, 140613571821567,
+STORE, 140613563428864, 140613563432959,
+STORE, 140613563432960, 140613571821567,
+SNULL, 140613580218368, 140613588606975,
+STORE, 140613588606976, 140613605392383,
+STORE, 140613580218368, 140613588606975,
+SNULL, 140613588611071, 140613605392383,
+STORE, 140613588606976, 140613588611071,
+STORE, 140613588611072, 140613605392383,
+SNULL, 140613513109504, 140613521498111,
+STORE, 140613521498112, 140613529890815,
+STORE, 140613513109504, 140613521498111,
+SNULL, 140613521502207, 140613529890815,
+STORE, 140613521498112, 140613521502207,
+STORE, 140613521502208, 140613529890815,
+SNULL, 140613588611072, 140613596999679,
+STORE, 140613596999680, 140613605392383,
+STORE, 140613588611072, 140613596999679,
+SNULL, 140613597003775, 140613605392383,
+STORE, 140613596999680, 140613597003775,
+STORE, 140613597003776, 140613605392383,
+STORE, 140613555036160, 140613563428863,
+SNULL, 140613555040255, 140613563428863,
+STORE, 140613555036160, 140613555040255,
+STORE, 140613555040256, 140613563428863,
+STORE, 140613546643456, 140613555036159,
+STORE, 140613538250752, 140613555036159,
+SNULL, 140613538250752, 140613546643455,
+STORE, 140613546643456, 140613555036159,
+STORE, 140613538250752, 140613546643455,
+SNULL, 140613546647551, 140613555036159,
+STORE, 140613546643456, 140613546647551,
+STORE, 140613546647552, 140613555036159,
+STORE, 140613504712704, 140613513105407,
+STORE, 140613496320000, 140613513105407,
+SNULL, 140613496324095, 140613513105407,
+STORE, 140613496320000, 140613496324095,
+STORE, 140613496324096, 140613513105407,
+STORE, 140613487927296, 140613496319999,
+SNULL, 140613487931391, 140613496319999,
+STORE, 140613487927296, 140613487931391,
+STORE, 140613487931392, 140613496319999,
+STORE, 140613479534592, 140613487927295,
+SNULL, 140612967845887, 140612976234495,
+STORE, 140612967841792, 140612967845887,
+STORE, 140612967845888, 140612976234495,
+STORE, 140613387280384, 140613395673087,
+STORE, 140613378887680, 140613395673087,
+SNULL, 140613378887680, 140613387280383,
+STORE, 140613387280384, 140613395673087,
+STORE, 140613378887680, 140613387280383,
+SNULL, 140613387284479, 140613395673087,
+STORE, 140613387280384, 140613387284479,
+STORE, 140613387284480, 140613395673087,
+STORE, 140613370494976, 140613387280383,
+STORE, 140613362102272, 140613387280383,
+SNULL, 140613479538687, 140613487927295,
+STORE, 140613479534592, 140613479538687,
+STORE, 140613479538688, 140613487927295,
+STORE, 140613353709568, 140613387280383,
+STORE, 140613345316864, 140613387280383,
+STORE, 140613244669952, 140613253062655,
+SNULL, 140613345320959, 140613387280383,
+STORE, 140613345316864, 140613345320959,
+STORE, 140613345320960, 140613387280383,
+SNULL, 140613538254847, 140613546643455,
+STORE, 140613538250752, 140613538254847,
+STORE, 140613538254848, 140613546643455,
+STORE, 140613236277248, 140613253062655,
+STORE, 140613227884544, 140613253062655,
+STORE, 140613219491840, 140613253062655,
+STORE, 140613211099136, 140613253062655,
+SNULL, 140613211103231, 140613253062655,
+STORE, 140613211099136, 140613211103231,
+STORE, 140613211103232, 140613253062655,
+STORE, 140613102059520, 140613110452223,
+STORE, 140613093666816, 140613110452223,
+SNULL, 140613093670911, 140613110452223,
+STORE, 140613093666816, 140613093670911,
+STORE, 140613093670912, 140613110452223,
+STORE, 140613085274112, 140613093666815,
+SNULL, 140613496324096, 140613504712703,
+STORE, 140613504712704, 140613513105407,
+STORE, 140613496324096, 140613504712703,
+SNULL, 140613504716799, 140613513105407,
+STORE, 140613504712704, 140613504716799,
+STORE, 140613504716800, 140613513105407,
+SNULL, 140613345320960, 140613378887679,
+STORE, 140613378887680, 140613387280383,
+STORE, 140613345320960, 140613378887679,
+SNULL, 140613378891775, 140613387280383,
+STORE, 140613378887680, 140613378891775,
+STORE, 140613378891776, 140613387280383,
+SNULL, 140613345320960, 140613362102271,
+STORE, 140613362102272, 140613378887679,
+STORE, 140613345320960, 140613362102271,
+SNULL, 140613362106367, 140613378887679,
+STORE, 140613362102272, 140613362106367,
+STORE, 140613362106368, 140613378887679,
+SNULL, 140613362106368, 140613370494975,
+STORE, 140613370494976, 140613378887679,
+STORE, 140613362106368, 140613370494975,
+SNULL, 140613370499071, 140613378887679,
+STORE, 140613370494976, 140613370499071,
+STORE, 140613370499072, 140613378887679,
+STORE, 140613076881408, 140613093666815,
+STORE, 140612993019904, 140613001412607,
+SNULL, 140613076885503, 140613093666815,
+STORE, 140613076881408, 140613076885503,
+STORE, 140613076885504, 140613093666815,
+SNULL, 140613093670912, 140613102059519,
+STORE, 140613102059520, 140613110452223,
+STORE, 140613093670912, 140613102059519,
+SNULL, 140613102063615, 140613110452223,
+STORE, 140613102059520, 140613102063615,
+STORE, 140613102063616, 140613110452223,
+SNULL, 140613076885504, 140613085274111,
+STORE, 140613085274112, 140613093666815,
+STORE, 140613076885504, 140613085274111,
+SNULL, 140613085278207, 140613093666815,
+STORE, 140613085274112, 140613085278207,
+STORE, 140613085278208, 140613093666815,
+STORE, 140612984627200, 140613001412607,
+STORE, 140612967845888, 140612984627199,
+SNULL, 140613211103232, 140613219491839,
+STORE, 140613219491840, 140613253062655,
+STORE, 140613211103232, 140613219491839,
+SNULL, 140613219495935, 140613253062655,
+STORE, 140613219491840, 140613219495935,
+STORE, 140613219495936, 140613253062655,
+STORE, 140612959449088, 140612967841791,
+STORE, 140612951056384, 140612967841791,
+SNULL, 140612951060479, 140612967841791,
+STORE, 140612951056384, 140612951060479,
+STORE, 140612951060480, 140612967841791,
+SNULL, 140613345320960, 140613353709567,
+STORE, 140613353709568, 140613362102271,
+STORE, 140613345320960, 140613353709567,
+SNULL, 140613353713663, 140613362102271,
+STORE, 140613353709568, 140613353713663,
+STORE, 140613353713664, 140613362102271,
+SNULL, 140613219495936, 140613244669951,
+STORE, 140613244669952, 140613253062655,
+STORE, 140613219495936, 140613244669951,
+SNULL, 140613244674047, 140613253062655,
+STORE, 140613244669952, 140613244674047,
+STORE, 140613244674048, 140613253062655,
+STORE, 140612942663680, 140612951056383,
+SNULL, 140613219495936, 140613236277247,
+STORE, 140613236277248, 140613244669951,
+STORE, 140613219495936, 140613236277247,
+SNULL, 140613236281343, 140613244669951,
+STORE, 140613236277248, 140613236281343,
+STORE, 140613236281344, 140613244669951,
+SNULL, 140613219495936, 140613227884543,
+STORE, 140613227884544, 140613236277247,
+STORE, 140613219495936, 140613227884543,
+SNULL, 140613227888639, 140613236277247,
+STORE, 140613227884544, 140613227888639,
+STORE, 140613227888640, 140613236277247,
+SNULL, 140612984627200, 140612993019903,
+STORE, 140612993019904, 140613001412607,
+STORE, 140612984627200, 140612993019903,
+SNULL, 140612993023999, 140613001412607,
+STORE, 140612993019904, 140612993023999,
+STORE, 140612993024000, 140613001412607,
+STORE, 140612858802176, 140612867194879,
+STORE, 140612850409472, 140612867194879,
+SNULL, 140612951060480, 140612959449087,
+STORE, 140612959449088, 140612967841791,
+STORE, 140612951060480, 140612959449087,
+SNULL, 140612959453183, 140612967841791,
+STORE, 140612959449088, 140612959453183,
+STORE, 140612959453184, 140612967841791,
+SNULL, 140612967845888, 140612976234495,
+STORE, 140612976234496, 140612984627199,
+STORE, 140612967845888, 140612976234495,
+SNULL, 140612976238591, 140612984627199,
+STORE, 140612976234496, 140612976238591,
+STORE, 140612976238592, 140612984627199,
+STORE, 140612842016768, 140612867194879,
+SNULL, 140612842020863, 140612867194879,
+STORE, 140612842016768, 140612842020863,
+STORE, 140612842020864, 140612867194879,
+SNULL, 140612984631295, 140612993019903,
+STORE, 140612984627200, 140612984631295,
+STORE, 140612984631296, 140612993019903,
+STORE, 140612825235456, 140612842016767,
+STORE, 140612808445952, 140612816838655,
+SNULL, 140612942667775, 140612951056383,
+STORE, 140612942663680, 140612942667775,
+STORE, 140612942667776, 140612951056383,
+STORE, 140612724584448, 140612732977151,
+SNULL, 140612724588543, 140612732977151,
+STORE, 140612724584448, 140612724588543,
+STORE, 140612724588544, 140612732977151,
+STORE, 140612716191744, 140612724584447,
+SNULL, 140612842020864, 140612850409471,
+STORE, 140612850409472, 140612867194879,
+STORE, 140612842020864, 140612850409471,
+SNULL, 140612850413567, 140612867194879,
+STORE, 140612850409472, 140612850413567,
+STORE, 140612850413568, 140612867194879,
+SNULL, 140612850413568, 140612858802175,
+STORE, 140612858802176, 140612867194879,
+STORE, 140612850413568, 140612858802175,
+SNULL, 140612858806271, 140612867194879,
+STORE, 140612858802176, 140612858806271,
+STORE, 140612858806272, 140612867194879,
+STORE, 140612707799040, 140612724584447,
+SNULL, 140612707803135, 140612724584447,
+STORE, 140612707799040, 140612707803135,
+STORE, 140612707803136, 140612724584447,
+SNULL, 140612707803136, 140612716191743,
+STORE, 140612716191744, 140612724584447,
+STORE, 140612707803136, 140612716191743,
+SNULL, 140612716195839, 140612724584447,
+STORE, 140612716191744, 140612716195839,
+STORE, 140612716195840, 140612724584447,
+SNULL, 140612808450047, 140612816838655,
+STORE, 140612808445952, 140612808450047,
+STORE, 140612808450048, 140612816838655,
+SNULL, 140612825235456, 140612833624063,
+STORE, 140612833624064, 140612842016767,
+STORE, 140612825235456, 140612833624063,
+SNULL, 140612833628159, 140612842016767,
+STORE, 140612833624064, 140612833628159,
+STORE, 140612833628160, 140612842016767,
+STORE, 140612699406336, 140612707799039,
+SNULL, 140612699410431, 140612707799039,
+STORE, 140612699406336, 140612699410431,
+STORE, 140612699410432, 140612707799039,
+STORE, 140614384926720, 140614384955391,
+STORE, 140614349332480, 140614351523839,
+SNULL, 140614349332480, 140614349422591,
+STORE, 140614349422592, 140614351523839,
+STORE, 140614349332480, 140614349422591,
+SNULL, 140614351515647, 140614351523839,
+STORE, 140614349422592, 140614351515647,
+STORE, 140614351515648, 140614351523839,
+ERASE, 140614351515648, 140614351523839,
+STORE, 140614351515648, 140614351523839,
+SNULL, 140614351519743, 140614351523839,
+STORE, 140614351515648, 140614351519743,
+STORE, 140614351519744, 140614351523839,
+ERASE, 140614384926720, 140614384955391,
+ERASE, 140613949296640, 140613949300735,
+ERASE, 140613949300736, 140613957689343,
+ERASE, 140613689253888, 140613689257983,
+ERASE, 140613689257984, 140613697646591,
+ERASE, 140613563428864, 140613563432959,
+ERASE, 140613563432960, 140613571821567,
+ERASE, 140613211099136, 140613211103231,
+ERASE, 140613211103232, 140613219491839,
+ERASE, 140614133870592, 140614133874687,
+ERASE, 140614133874688, 140614142263295,
+ERASE, 140612967841792, 140612967845887,
+ERASE, 140612967845888, 140612976234495,
+ERASE, 140613076881408, 140613076885503,
+ERASE, 140613076885504, 140613085274111,
+ERASE, 140612850409472, 140612850413567,
+ERASE, 140612850413568, 140612858802175,
+ERASE, 140613110452224, 140613110456319,
+ERASE, 140613110456320, 140613118844927,
+ERASE, 140613706039296, 140613706043391,
+ERASE, 140613706043392, 140613714431999,
+ERASE, 140613521498112, 140613521502207,
+ERASE, 140613521502208, 140613529890815,
+ERASE, 140613362102272, 140613362106367,
+ERASE, 140613362106368, 140613370494975,
+ERASE, 140613253062656, 140613253066751,
+ERASE, 140613253066752, 140613261455359,
+ERASE, 140612816838656, 140612816842751,
+ERASE, 140612816842752, 140612825231359,
+ERASE, 140613261455360, 140613261459455,
+ERASE, 140613261459456, 140613269848063,
+ERASE, 140613118844928, 140613118849023,
+ERASE, 140613118849024, 140613127237631,
+ERASE, 140613714432000, 140613714436095,
+ERASE, 140613714436096, 140613722824703,
+ERASE, 140613496320000, 140613496324095,
+ERASE, 140613496324096, 140613504712703,
+ERASE, 140613513105408, 140613513109503,
+ERASE, 140613513109504, 140613521498111,
+ERASE, 140613697646592, 140613697650687,
+ERASE, 140613697650688, 140613706039295,
+ERASE, 140613093666816, 140613093670911,
+ERASE, 140613093670912, 140613102059519,
+ERASE, 140612993019904, 140612993023999,
+ERASE, 140612993024000, 140613001412607,
+ERASE, 140613127237632, 140613127241727,
+ERASE, 140613127241728, 140613135630335,
+ERASE, 140613957689344, 140613957693439,
+ERASE, 140613957693440, 140613966082047,
+ERASE, 140613571821568, 140613571825663,
+ERASE, 140613571825664, 140613580214271,
+ERASE, 140613479534592, 140613479538687,
+ERASE, 140613479538688, 140613487927295,
+ERASE, 140612984627200, 140612984631295,
+ERASE, 140612984631296, 140612993019903,
+ERASE, 140613588606976, 140613588611071,
+ERASE, 140613588611072, 140613596999679,
+ERASE, 140613680861184, 140613680865279,
+ERASE, 140613680865280, 140613689253887,
+ERASE, 140613345316864, 140613345320959,
+ERASE, 140613345320960, 140613353709567,
+ERASE, 140613596999680, 140613597003775,
+ERASE, 140613597003776, 140613605392383,
+ERASE, 140613966082048, 140613966086143,
+ERASE, 140613966086144, 140613974474751,
+ERASE, 140613731217408, 140613731221503,
+ERASE, 140613731221504, 140613739610111,
+ERASE, 140613395673088, 140613395677183,
+ERASE, 140613395677184, 140613404065791,
+ERASE, 140612825231360, 140612825235455,
+ERASE, 140612825235456, 140612833624063,
+ERASE, 140612674228224, 140612674232319,
+ERASE, 140612674232320, 140612682620927,
+ERASE, 140613722824704, 140613722828799,
+ERASE, 140613722828800, 140613731217407,
+ERASE, 140613487927296, 140613487931391,
+ERASE, 140613487931392, 140613496319999,
+ERASE, 140613102059520, 140613102063615,
+ERASE, 140613102063616, 140613110452223,
+ERASE, 140614242910208, 140614242914303,
+ERASE, 140614242914304, 140614251302911,
+ERASE, 140612808445952, 140612808450047,
+ERASE, 140612808450048, 140612816838655,
+ERASE, 140613236277248, 140613236281343,
+ERASE, 140613236281344, 140613244669951,
+ERASE, 140613580214272, 140613580218367,
+ERASE, 140613580218368, 140613588606975,
+ERASE, 140613370494976, 140613370499071,
+ERASE, 140613370499072, 140613378887679,
+ERASE, 140613244669952, 140613244674047,
+ERASE, 140613244674048, 140613253062655,
+ERASE, 140612724584448, 140612724588543,
+ERASE, 140612724588544, 140612732977151,
+ERASE, 140612707799040, 140612707803135,
+ERASE, 140612707803136, 140612716191743,
+ERASE, 140613504712704, 140613504716799,
+ERASE, 140613504716800, 140613513105407,
+ };
+
+ static const unsigned long set39[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140736271417344, 140737488351231,
+SNULL, 140736271421439, 140737488351231,
+STORE, 140736271417344, 140736271421439,
+STORE, 140736271286272, 140736271421439,
+STORE, 94412930822144, 94412933074943,
+SNULL, 94412930953215, 94412933074943,
+STORE, 94412930822144, 94412930953215,
+STORE, 94412930953216, 94412933074943,
+ERASE, 94412930953216, 94412933074943,
+STORE, 94412933046272, 94412933054463,
+STORE, 94412933054464, 94412933074943,
+STORE, 140326136901632, 140326139154431,
+SNULL, 140326137044991, 140326139154431,
+STORE, 140326136901632, 140326137044991,
+STORE, 140326137044992, 140326139154431,
+ERASE, 140326137044992, 140326139154431,
+STORE, 140326139142144, 140326139150335,
+STORE, 140326139150336, 140326139154431,
+STORE, 140736271585280, 140736271589375,
+STORE, 140736271572992, 140736271585279,
+STORE, 140326139113472, 140326139142143,
+STORE, 140326139105280, 140326139113471,
+STORE, 140326134685696, 140326136901631,
+SNULL, 140326134685696, 140326134783999,
+STORE, 140326134784000, 140326136901631,
+STORE, 140326134685696, 140326134783999,
+SNULL, 140326136877055, 140326136901631,
+STORE, 140326134784000, 140326136877055,
+STORE, 140326136877056, 140326136901631,
+SNULL, 140326136877056, 140326136885247,
+STORE, 140326136885248, 140326136901631,
+STORE, 140326136877056, 140326136885247,
+ERASE, 140326136877056, 140326136885247,
+STORE, 140326136877056, 140326136885247,
+ERASE, 140326136885248, 140326136901631,
+STORE, 140326136885248, 140326136901631,
+STORE, 140326130888704, 140326134685695,
+SNULL, 140326130888704, 140326132547583,
+STORE, 140326132547584, 140326134685695,
+STORE, 140326130888704, 140326132547583,
+SNULL, 140326134644735, 140326134685695,
+STORE, 140326132547584, 140326134644735,
+STORE, 140326134644736, 140326134685695,
+SNULL, 140326134644736, 140326134669311,
+STORE, 140326134669312, 140326134685695,
+STORE, 140326134644736, 140326134669311,
+ERASE, 140326134644736, 140326134669311,
+STORE, 140326134644736, 140326134669311,
+ERASE, 140326134669312, 140326134685695,
+STORE, 140326134669312, 140326134685695,
+STORE, 140326139097088, 140326139113471,
+SNULL, 140326134661119, 140326134669311,
+STORE, 140326134644736, 140326134661119,
+STORE, 140326134661120, 140326134669311,
+SNULL, 140326136881151, 140326136885247,
+STORE, 140326136877056, 140326136881151,
+STORE, 140326136881152, 140326136885247,
+SNULL, 94412933050367, 94412933054463,
+STORE, 94412933046272, 94412933050367,
+STORE, 94412933050368, 94412933054463,
+SNULL, 140326139146239, 140326139150335,
+STORE, 140326139142144, 140326139146239,
+STORE, 140326139146240, 140326139150335,
+ERASE, 140326139113472, 140326139142143,
+STORE, 94412939493376, 94412939628543,
+STORE, 140326122496000, 140326130888703,
+SNULL, 140326122500095, 140326130888703,
+STORE, 140326122496000, 140326122500095,
+STORE, 140326122500096, 140326130888703,
+STORE, 140326114103296, 140326122495999,
+STORE, 140325979885568, 140326114103295,
+SNULL, 140325979885568, 140326043910143,
+STORE, 140326043910144, 140326114103295,
+STORE, 140325979885568, 140326043910143,
+ERASE, 140325979885568, 140326043910143,
+SNULL, 140326111019007, 140326114103295,
+STORE, 140326043910144, 140326111019007,
+STORE, 140326111019008, 140326114103295,
+ERASE, 140326111019008, 140326114103295,
+SNULL, 140326044045311, 140326111019007,
+STORE, 140326043910144, 140326044045311,
+STORE, 140326044045312, 140326111019007,
+SNULL, 140326114107391, 140326122495999,
+STORE, 140326114103296, 140326114107391,
+STORE, 140326114107392, 140326122495999,
+STORE, 140326035517440, 140326043910143,
+SNULL, 140326035521535, 140326043910143,
+STORE, 140326035517440, 140326035521535,
+STORE, 140326035521536, 140326043910143,
+STORE, 140326027124736, 140326035517439,
+SNULL, 140326027128831, 140326035517439,
+STORE, 140326027124736, 140326027128831,
+STORE, 140326027128832, 140326035517439,
+STORE, 140326018732032, 140326027124735,
+SNULL, 140326018736127, 140326027124735,
+STORE, 140326018732032, 140326018736127,
+STORE, 140326018736128, 140326027124735,
+STORE, 140326010339328, 140326018732031,
+STORE, 140326001946624, 140326018732031,
+STORE, 140325993553920, 140326018732031,
+STORE, 140325859336192, 140325993553919,
+SNULL, 140325859336192, 140325909692415,
+STORE, 140325909692416, 140325993553919,
+STORE, 140325859336192, 140325909692415,
+ERASE, 140325859336192, 140325909692415,
+SNULL, 140325976801279, 140325993553919,
+STORE, 140325909692416, 140325976801279,
+STORE, 140325976801280, 140325993553919,
+ERASE, 140325976801280, 140325993553919,
+STORE, 140325985161216, 140326018732031,
+STORE, 140325775474688, 140325976801279,
+STORE, 140325708365824, 140325976801279,
+SNULL, 140325708500991, 140325976801279,
+STORE, 140325708365824, 140325708500991,
+STORE, 140325708500992, 140325976801279,
+SNULL, 140325708500992, 140325909692415,
+STORE, 140325909692416, 140325976801279,
+STORE, 140325708500992, 140325909692415,
+SNULL, 140325909827583, 140325976801279,
+STORE, 140325909692416, 140325909827583,
+STORE, 140325909827584, 140325976801279,
+SNULL, 140325842583551, 140325909692415,
+STORE, 140325708500992, 140325842583551,
+STORE, 140325842583552, 140325909692415,
+ERASE, 140325842583552, 140325909692415,
+SNULL, 140325708500992, 140325775474687,
+STORE, 140325775474688, 140325842583551,
+STORE, 140325708500992, 140325775474687,
+SNULL, 140325775609855, 140325842583551,
+STORE, 140325775474688, 140325775609855,
+STORE, 140325775609856, 140325842583551,
+STORE, 140325775609856, 140325909692415,
+SNULL, 140325775609856, 140325842583551,
+STORE, 140325842583552, 140325909692415,
+STORE, 140325775609856, 140325842583551,
+SNULL, 140325842718719, 140325909692415,
+STORE, 140325842583552, 140325842718719,
+STORE, 140325842718720, 140325909692415,
+SNULL, 140325985161216, 140325993553919,
+STORE, 140325993553920, 140326018732031,
+STORE, 140325985161216, 140325993553919,
+SNULL, 140325993558015, 140326018732031,
+STORE, 140325993553920, 140325993558015,
+STORE, 140325993558016, 140326018732031,
+SNULL, 140325985165311, 140325993553919,
+STORE, 140325985161216, 140325985165311,
+STORE, 140325985165312, 140325993553919,
+SNULL, 140325993558016, 140326001946623,
+STORE, 140326001946624, 140326018732031,
+STORE, 140325993558016, 140326001946623,
+SNULL, 140326001950719, 140326018732031,
+STORE, 140326001946624, 140326001950719,
+STORE, 140326001950720, 140326018732031,
+SNULL, 140326001950720, 140326010339327,
+STORE, 140326010339328, 140326018732031,
+STORE, 140326001950720, 140326010339327,
+SNULL, 140326010343423, 140326018732031,
+STORE, 140326010339328, 140326010343423,
+STORE, 140326010343424, 140326018732031,
+STORE, 140325699973120, 140325708365823,
+STORE, 140325691580416, 140325708365823,
+STORE, 140325683187712, 140325708365823,
+SNULL, 140325683191807, 140325708365823,
+STORE, 140325683187712, 140325683191807,
+STORE, 140325683191808, 140325708365823,
+SNULL, 140325683191808, 140325699973119,
+STORE, 140325699973120, 140325708365823,
+STORE, 140325683191808, 140325699973119,
+SNULL, 140325699977215, 140325708365823,
+STORE, 140325699973120, 140325699977215,
+STORE, 140325699977216, 140325708365823,
+STORE, 140325674795008, 140325683187711,
+STORE, 140325666402304, 140325683187711,
+STORE, 140325658009600, 140325683187711,
+SNULL, 140325658009600, 140325666402303,
+STORE, 140325666402304, 140325683187711,
+STORE, 140325658009600, 140325666402303,
+SNULL, 140325666406399, 140325683187711,
+STORE, 140325666402304, 140325666406399,
+STORE, 140325666406400, 140325683187711,
+SNULL, 140325683191808, 140325691580415,
+STORE, 140325691580416, 140325699973119,
+STORE, 140325683191808, 140325691580415,
+SNULL, 140325691584511, 140325699973119,
+STORE, 140325691580416, 140325691584511,
+STORE, 140325691584512, 140325699973119,
+SNULL, 140325666406400, 140325674795007,
+STORE, 140325674795008, 140325683187711,
+STORE, 140325666406400, 140325674795007,
+SNULL, 140325674799103, 140325683187711,
+STORE, 140325674795008, 140325674799103,
+STORE, 140325674799104, 140325683187711,
+STORE, 140325649616896, 140325666402303,
+SNULL, 140325649616896, 140325658009599,
+STORE, 140325658009600, 140325666402303,
+STORE, 140325649616896, 140325658009599,
+SNULL, 140325658013695, 140325666402303,
+STORE, 140325658009600, 140325658013695,
+STORE, 140325658013696, 140325666402303,
+SNULL, 140325649620991, 140325658009599,
+STORE, 140325649616896, 140325649620991,
+STORE, 140325649620992, 140325658009599,
+STORE, 140325641224192, 140325649616895,
+STORE, 140325632831488, 140325649616895,
+SNULL, 140325632835583, 140325649616895,
+STORE, 140325632831488, 140325632835583,
+STORE, 140325632835584, 140325649616895,
+STORE, 140325624438784, 140325632831487,
+SNULL, 140325624442879, 140325632831487,
+STORE, 140325624438784, 140325624442879,
+STORE, 140325624442880, 140325632831487,
+SNULL, 140325632835584, 140325641224191,
+STORE, 140325641224192, 140325649616895,
+STORE, 140325632835584, 140325641224191,
+SNULL, 140325641228287, 140325649616895,
+STORE, 140325641224192, 140325641228287,
+STORE, 140325641228288, 140325649616895,
+STORE, 140325616046080, 140325624438783,
+SNULL, 140325616050175, 140325624438783,
+STORE, 140325616046080, 140325616050175,
+STORE, 140325616050176, 140325624438783,
+STORE, 140325607653376, 140325616046079,
+SNULL, 140325607657471, 140325616046079,
+STORE, 140325607653376, 140325607657471,
+STORE, 140325607657472, 140325616046079,
+STORE, 140325599260672, 140325607653375,
+STORE, 140325590867968, 140325607653375,
+STORE, 140325456650240, 140325590867967,
+SNULL, 140325456650240, 140325507039231,
+STORE, 140325507039232, 140325590867967,
+STORE, 140325456650240, 140325507039231,
+ERASE, 140325456650240, 140325507039231,
+STORE, 140325498646528, 140325507039231,
+STORE, 140325364428800, 140325498646527,
+SNULL, 140325364428800, 140325372821503,
+STORE, 140325372821504, 140325498646527,
+STORE, 140325364428800, 140325372821503,
+ERASE, 140325364428800, 140325372821503,
+STORE, 140325364428800, 140325372821503,
+STORE, 140325356036096, 140325372821503,
+STORE, 140325221818368, 140325356036095,
+SNULL, 140325221818368, 140325238603775,
+STORE, 140325238603776, 140325356036095,
+STORE, 140325221818368, 140325238603775,
+ERASE, 140325221818368, 140325238603775,
+STORE, 140325230211072, 140325238603775,
+STORE, 140325221818368, 140325238603775,
+STORE, 140325087600640, 140325221818367,
+STORE, 140325079207936, 140325087600639,
+SNULL, 140325087600640, 140325104386047,
+STORE, 140325104386048, 140325221818367,
+STORE, 140325087600640, 140325104386047,
+ERASE, 140325087600640, 140325104386047,
+STORE, 140325095993344, 140325104386047,
+STORE, 140325079207936, 140325104386047,
+STORE, 140324944990208, 140325079207935,
+SNULL, 140324944990208, 140324970168319,
+STORE, 140324970168320, 140325079207935,
+STORE, 140324944990208, 140324970168319,
+ERASE, 140324944990208, 140324970168319,
+STORE, 140324961775616, 140324970168319,
+STORE, 140324953382912, 140324970168319,
+STORE, 140324819165184, 140324953382911,
+STORE, 140324684947456, 140324953382911,
+STORE, 140324676554752, 140324684947455,
+STORE, 140324668162048, 140324684947455,
+STORE, 140324533944320, 140324668162047,
+STORE, 140324525551616, 140324533944319,
+SNULL, 140324533944320, 140324567515135,
+STORE, 140324567515136, 140324668162047,
+STORE, 140324533944320, 140324567515135,
+ERASE, 140324533944320, 140324567515135,
+STORE, 140324559122432, 140324567515135,
+STORE, 140324391333888, 140324525551615,
+SNULL, 140325574148095, 140325590867967,
+STORE, 140325507039232, 140325574148095,
+STORE, 140325574148096, 140325590867967,
+ERASE, 140325574148096, 140325590867967,
+SNULL, 140325439930367, 140325498646527,
+STORE, 140325372821504, 140325439930367,
+STORE, 140325439930368, 140325498646527,
+ERASE, 140325439930368, 140325498646527,
+SNULL, 140325305712639, 140325356036095,
+STORE, 140325238603776, 140325305712639,
+STORE, 140325305712640, 140325356036095,
+ERASE, 140325305712640, 140325356036095,
+SNULL, 140325171494911, 140325221818367,
+STORE, 140325104386048, 140325171494911,
+STORE, 140325171494912, 140325221818367,
+ERASE, 140325171494912, 140325221818367,
+SNULL, 140325104521215, 140325171494911,
+STORE, 140325104386048, 140325104521215,
+STORE, 140325104521216, 140325171494911,
+STORE, 140324257116160, 140324525551615,
+SNULL, 140324257116160, 140324299079679,
+STORE, 140324299079680, 140324525551615,
+STORE, 140324257116160, 140324299079679,
+ERASE, 140324257116160, 140324299079679,
+SNULL, 140325037277183, 140325079207935,
+STORE, 140324970168320, 140325037277183,
+STORE, 140325037277184, 140325079207935,
+ERASE, 140325037277184, 140325079207935,
+SNULL, 140324819165183, 140324953382911,
+STORE, 140324684947456, 140324819165183,
+STORE, 140324819165184, 140324953382911,
+SNULL, 140324819165184, 140324835950591,
+STORE, 140324835950592, 140324953382911,
+STORE, 140324819165184, 140324835950591,
+ERASE, 140324819165184, 140324835950591,
+SNULL, 140324903059455, 140324953382911,
+STORE, 140324835950592, 140324903059455,
+STORE, 140324903059456, 140324953382911,
+ERASE, 140324903059456, 140324953382911,
+SNULL, 140324684947456, 140324701732863,
+STORE, 140324701732864, 140324819165183,
+STORE, 140324684947456, 140324701732863,
+ERASE, 140324684947456, 140324701732863,
+SNULL, 140324768841727, 140324819165183,
+STORE, 140324701732864, 140324768841727,
+STORE, 140324768841728, 140324819165183,
+ERASE, 140324768841728, 140324819165183,
+SNULL, 140324634623999, 140324668162047,
+STORE, 140324567515136, 140324634623999,
+STORE, 140324634624000, 140324668162047,
+ERASE, 140324634624000, 140324668162047,
+SNULL, 140324391333887, 140324525551615,
+STORE, 140324299079680, 140324391333887,
+STORE, 140324391333888, 140324525551615,
+SNULL, 140324391333888, 140324433297407,
+STORE, 140324433297408, 140324525551615,
+STORE, 140324391333888, 140324433297407,
+ERASE, 140324391333888, 140324433297407,
+SNULL, 140325507174399, 140325574148095,
+STORE, 140325507039232, 140325507174399,
+STORE, 140325507174400, 140325574148095,
+SNULL, 140325590867968, 140325599260671,
+STORE, 140325599260672, 140325607653375,
+STORE, 140325590867968, 140325599260671,
+SNULL, 140325599264767, 140325607653375,
+STORE, 140325599260672, 140325599264767,
+STORE, 140325599264768, 140325607653375,
+SNULL, 140325372956671, 140325439930367,
+STORE, 140325372821504, 140325372956671,
+STORE, 140325372956672, 140325439930367,
+SNULL, 140324668166143, 140324684947455,
+STORE, 140324668162048, 140324668166143,
+STORE, 140324668166144, 140324684947455,
+SNULL, 140324525555711, 140324533944319,
+STORE, 140324525551616, 140324525555711,
+STORE, 140324525555712, 140324533944319,
+SNULL, 140324953382912, 140324961775615,
+STORE, 140324961775616, 140324970168319,
+STORE, 140324953382912, 140324961775615,
+SNULL, 140324961779711, 140324970168319,
+STORE, 140324961775616, 140324961779711,
+STORE, 140324961779712, 140324970168319,
+SNULL, 140325079212031, 140325104386047,
+STORE, 140325079207936, 140325079212031,
+STORE, 140325079212032, 140325104386047,
+SNULL, 140325221818368, 140325230211071,
+STORE, 140325230211072, 140325238603775,
+STORE, 140325221818368, 140325230211071,
+SNULL, 140325230215167, 140325238603775,
+STORE, 140325230211072, 140325230215167,
+STORE, 140325230215168, 140325238603775,
+SNULL, 140325356036096, 140325364428799,
+STORE, 140325364428800, 140325372821503,
+STORE, 140325356036096, 140325364428799,
+SNULL, 140325364432895, 140325372821503,
+ };
+ static const unsigned long set40[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140734309167104, 140737488351231,
+SNULL, 140734309171199, 140737488351231,
+STORE, 140734309167104, 140734309171199,
+STORE, 140734309036032, 140734309171199,
+STORE, 94270500081664, 94270502334463,
+SNULL, 94270500212735, 94270502334463,
+STORE, 94270500081664, 94270500212735,
+STORE, 94270500212736, 94270502334463,
+ERASE, 94270500212736, 94270502334463,
+STORE, 94270502305792, 94270502313983,
+STORE, 94270502313984, 94270502334463,
+STORE, 140321935110144, 140321937362943,
+SNULL, 140321935253503, 140321937362943,
+STORE, 140321935110144, 140321935253503,
+STORE, 140321935253504, 140321937362943,
+ERASE, 140321935253504, 140321937362943,
+STORE, 140321937350656, 140321937358847,
+STORE, 140321937358848, 140321937362943,
+STORE, 140734309625856, 140734309629951,
+STORE, 140734309613568, 140734309625855,
+STORE, 140321937321984, 140321937350655,
+STORE, 140321937313792, 140321937321983,
+STORE, 140321932894208, 140321935110143,
+SNULL, 140321932894208, 140321932992511,
+STORE, 140321932992512, 140321935110143,
+STORE, 140321932894208, 140321932992511,
+SNULL, 140321935085567, 140321935110143,
+STORE, 140321932992512, 140321935085567,
+STORE, 140321935085568, 140321935110143,
+SNULL, 140321935085568, 140321935093759,
+STORE, 140321935093760, 140321935110143,
+STORE, 140321935085568, 140321935093759,
+ERASE, 140321935085568, 140321935093759,
+STORE, 140321935085568, 140321935093759,
+ERASE, 140321935093760, 140321935110143,
+STORE, 140321935093760, 140321935110143,
+STORE, 140321929097216, 140321932894207,
+SNULL, 140321929097216, 140321930756095,
+STORE, 140321930756096, 140321932894207,
+STORE, 140321929097216, 140321930756095,
+SNULL, 140321932853247, 140321932894207,
+STORE, 140321930756096, 140321932853247,
+STORE, 140321932853248, 140321932894207,
+SNULL, 140321932853248, 140321932877823,
+STORE, 140321932877824, 140321932894207,
+STORE, 140321932853248, 140321932877823,
+ERASE, 140321932853248, 140321932877823,
+STORE, 140321932853248, 140321932877823,
+ERASE, 140321932877824, 140321932894207,
+STORE, 140321932877824, 140321932894207,
+STORE, 140321937305600, 140321937321983,
+SNULL, 140321932869631, 140321932877823,
+STORE, 140321932853248, 140321932869631,
+STORE, 140321932869632, 140321932877823,
+SNULL, 140321935089663, 140321935093759,
+STORE, 140321935085568, 140321935089663,
+STORE, 140321935089664, 140321935093759,
+SNULL, 94270502309887, 94270502313983,
+STORE, 94270502305792, 94270502309887,
+STORE, 94270502309888, 94270502313983,
+SNULL, 140321937354751, 140321937358847,
+STORE, 140321937350656, 140321937354751,
+STORE, 140321937354752, 140321937358847,
+ERASE, 140321937321984, 140321937350655,
+STORE, 94270507364352, 94270507499519,
+STORE, 140321920704512, 140321929097215,
+SNULL, 140321920708607, 140321929097215,
+STORE, 140321920704512, 140321920708607,
+STORE, 140321920708608, 140321929097215,
+STORE, 140321912311808, 140321920704511,
+STORE, 140321778094080, 140321912311807,
+SNULL, 140321778094080, 140321816051711,
+STORE, 140321816051712, 140321912311807,
+STORE, 140321778094080, 140321816051711,
+ERASE, 140321778094080, 140321816051711,
+SNULL, 140321883160575, 140321912311807,
+STORE, 140321816051712, 140321883160575,
+STORE, 140321883160576, 140321912311807,
+ERASE, 140321883160576, 140321912311807,
+SNULL, 140321816186879, 140321883160575,
+STORE, 140321816051712, 140321816186879,
+STORE, 140321816186880, 140321883160575,
+SNULL, 140321912315903, 140321920704511,
+STORE, 140321912311808, 140321912315903,
+STORE, 140321912315904, 140321920704511,
+STORE, 140321903919104, 140321912311807,
+SNULL, 140321903923199, 140321912311807,
+STORE, 140321903919104, 140321903923199,
+STORE, 140321903923200, 140321912311807,
+STORE, 140321895526400, 140321903919103,
+SNULL, 140321895530495, 140321903919103,
+STORE, 140321895526400, 140321895530495,
+STORE, 140321895530496, 140321903919103,
+STORE, 140321887133696, 140321895526399,
+SNULL, 140321887137791, 140321895526399,
+STORE, 140321887133696, 140321887137791,
+STORE, 140321887137792, 140321895526399,
+STORE, 140321807659008, 140321816051711,
+STORE, 140321673441280, 140321807659007,
+SNULL, 140321673441280, 140321681833983,
+STORE, 140321681833984, 140321807659007,
+STORE, 140321673441280, 140321681833983,
+ERASE, 140321673441280, 140321681833983,
+SNULL, 140321748942847, 140321807659007,
+STORE, 140321681833984, 140321748942847,
+STORE, 140321748942848, 140321807659007,
+ERASE, 140321748942848, 140321807659007,
+STORE, 140321799266304, 140321816051711,
+STORE, 140321790873600, 140321816051711,
+STORE, 140321782480896, 140321816051711,
+STORE, 140321547616256, 140321748942847,
+SNULL, 140321614725119, 140321748942847,
+STORE, 140321547616256, 140321614725119,
+STORE, 140321614725120, 140321748942847,
+SNULL, 140321614725120, 140321681833983,
+STORE, 140321681833984, 140321748942847,
+STORE, 140321614725120, 140321681833983,
+ERASE, 140321614725120, 140321681833983,
+SNULL, 140321681969151, 140321748942847,
+STORE, 140321681833984, 140321681969151,
+STORE, 140321681969152, 140321748942847,
+STORE, 140321547616256, 140321681833983,
+SNULL, 140321547616256, 140321614725119,
+STORE, 140321614725120, 140321681833983,
+STORE, 140321547616256, 140321614725119,
+SNULL, 140321614860287, 140321681833983,
+STORE, 140321614725120, 140321614860287,
+STORE, 140321614860288, 140321681833983,
+SNULL, 140321547751423, 140321614725119,
+STORE, 140321547616256, 140321547751423,
+STORE, 140321547751424, 140321614725119,
+STORE, 140321480507392, 140321547616255,
+SNULL, 140321782480896, 140321799266303,
+STORE, 140321799266304, 140321816051711,
+STORE, 140321782480896, 140321799266303,
+SNULL, 140321799270399, 140321816051711,
+STORE, 140321799266304, 140321799270399,
+STORE, 140321799270400, 140321816051711,
+STORE, 140321774088192, 140321799266303,
+SNULL, 140321774088192, 140321790873599,
+STORE, 140321790873600, 140321799266303,
+STORE, 140321774088192, 140321790873599,
+SNULL, 140321790877695, 140321799266303,
+STORE, 140321790873600, 140321790877695,
+STORE, 140321790877696, 140321799266303,
+SNULL, 140321480642559, 140321547616255,
+STORE, 140321480507392, 140321480642559,
+STORE, 140321480642560, 140321547616255,
+SNULL, 140321774088192, 140321782480895,
+STORE, 140321782480896, 140321790873599,
+STORE, 140321774088192, 140321782480895,
+SNULL, 140321782484991, 140321790873599,
+STORE, 140321782480896, 140321782484991,
+STORE, 140321782484992, 140321790873599,
+SNULL, 140321799270400, 140321807659007,
+STORE, 140321807659008, 140321816051711,
+STORE, 140321799270400, 140321807659007,
+SNULL, 140321807663103, 140321816051711,
+STORE, 140321807659008, 140321807663103,
+STORE, 140321807663104, 140321816051711,
+STORE, 140321765695488, 140321782480895,
+STORE, 140321757302784, 140321782480895,
+SNULL, 140321757306879, 140321782480895,
+STORE, 140321757302784, 140321757306879,
+STORE, 140321757306880, 140321782480895,
+STORE, 140321472114688, 140321480507391,
+STORE, 140321463721984, 140321480507391,
+SNULL, 140321463726079, 140321480507391,
+STORE, 140321463721984, 140321463726079,
+STORE, 140321463726080, 140321480507391,
+SNULL, 140321757306880, 140321774088191,
+STORE, 140321774088192, 140321782480895,
+STORE, 140321757306880, 140321774088191,
+SNULL, 140321774092287, 140321782480895,
+STORE, 140321774088192, 140321774092287,
+STORE, 140321774092288, 140321782480895,
+SNULL, 140321463726080, 140321472114687,
+STORE, 140321472114688, 140321480507391,
+STORE, 140321463726080, 140321472114687,
+SNULL, 140321472118783, 140321480507391,
+STORE, 140321472114688, 140321472118783,
+STORE, 140321472118784, 140321480507391,
+SNULL, 140321757306880, 140321765695487,
+STORE, 140321765695488, 140321774088191,
+STORE, 140321757306880, 140321765695487,
+SNULL, 140321765699583, 140321774088191,
+STORE, 140321765695488, 140321765699583,
+STORE, 140321765699584, 140321774088191,
+STORE, 140321455329280, 140321463721983,
+SNULL, 140321455333375, 140321463721983,
+STORE, 140321455329280, 140321455333375,
+STORE, 140321455333376, 140321463721983,
+STORE, 140321446936576, 140321455329279,
+STORE, 140321438543872, 140321455329279,
+STORE, 140321430151168, 140321455329279,
+SNULL, 140321430155263, 140321455329279,
+STORE, 140321430151168, 140321430155263,
+STORE, 140321430155264, 140321455329279,
+SNULL, 140321430155264, 140321446936575,
+STORE, 140321446936576, 140321455329279,
+STORE, 140321430155264, 140321446936575,
+SNULL, 140321446940671, 140321455329279,
+STORE, 140321446936576, 140321446940671,
+STORE, 140321446940672, 140321455329279,
+SNULL, 140321430155264, 140321438543871,
+STORE, 140321438543872, 140321446936575,
+STORE, 140321430155264, 140321438543871,
+SNULL, 140321438547967, 140321446936575,
+STORE, 140321438543872, 140321438547967,
+STORE, 140321438547968, 140321446936575,
+STORE, 140321421758464, 140321430151167,
+SNULL, 140321421762559, 140321430151167,
+STORE, 140321421758464, 140321421762559,
+STORE, 140321421762560, 140321430151167,
+STORE, 140321413365760, 140321421758463,
+SNULL, 140321413369855, 140321421758463,
+STORE, 140321413365760, 140321413369855,
+STORE, 140321413369856, 140321421758463,
+STORE, 140321404973056, 140321413365759,
+SNULL, 140321404977151, 140321413365759,
+STORE, 140321404973056, 140321404977151,
+STORE, 140321404977152, 140321413365759,
+STORE, 140321396580352, 140321404973055,
+STORE, 140321388187648, 140321404973055,
+STORE, 140321253969920, 140321388187647,
+SNULL, 140321253969920, 140321279180799,
+STORE, 140321279180800, 140321388187647,
+STORE, 140321253969920, 140321279180799,
+ERASE, 140321253969920, 140321279180799,
+SNULL, 140321346289663, 140321388187647,
+STORE, 140321279180800, 140321346289663,
+STORE, 140321346289664, 140321388187647,
+ERASE, 140321346289664, 140321388187647,
+STORE, 140321144963072, 140321346289663,
+STORE, 140321379794944, 140321404973055,
+STORE, 140321371402240, 140321404973055,
+STORE, 140321010745344, 140321346289663,
+STORE, 140321363009536, 140321404973055,
+SNULL, 140321077854207, 140321346289663,
+STORE, 140321010745344, 140321077854207,
+STORE, 140321077854208, 140321346289663,
+SNULL, 140321077854208, 140321144963071,
+STORE, 140321144963072, 140321346289663,
+STORE, 140321077854208, 140321144963071,
+ERASE, 140321077854208, 140321144963071,
+STORE, 140321354616832, 140321404973055,
+STORE, 140321136570368, 140321144963071,
+STORE, 140320943636480, 140321077854207,
+STORE, 140320876527616, 140321077854207,
+STORE, 140321128177664, 140321144963071,
+SNULL, 140320876662783, 140321077854207,
+STORE, 140320876527616, 140320876662783,
+STORE, 140320876662784, 140321077854207,
+STORE, 140321119784960, 140321144963071,
+STORE, 140321111392256, 140321144963071,
+STORE, 140320742309888, 140320876527615,
+STORE, 140321102999552, 140321144963071,
+STORE, 140320608092160, 140320876527615,
+SNULL, 140320675201023, 140320876527615,
+STORE, 140320608092160, 140320675201023,
+STORE, 140320675201024, 140320876527615,
+SNULL, 140320675201024, 140320742309887,
+STORE, 140320742309888, 140320876527615,
+STORE, 140320675201024, 140320742309887,
+ERASE, 140320675201024, 140320742309887,
+STORE, 140321094606848, 140321144963071,
+STORE, 140321086214144, 140321144963071,
+STORE, 140320608092160, 140320876527615,
+SNULL, 140320608092160, 140320675201023,
+STORE, 140320675201024, 140320876527615,
+STORE, 140320608092160, 140320675201023,
+SNULL, 140320675336191, 140320876527615,
+STORE, 140320675201024, 140320675336191,
+STORE, 140320675336192, 140320876527615,
+STORE, 140320599699456, 140320608092159,
+STORE, 140320591306752, 140320608092159,
+STORE, 140320457089024, 140320591306751,
+STORE, 140320448696320, 140320457089023,
+STORE, 140320314478592, 140320448696319,
+SNULL, 140321144963072, 140321279180799,
+STORE, 140321279180800, 140321346289663,
+STORE, 140321144963072, 140321279180799,
+SNULL, 140321279315967, 140321346289663,
+STORE, 140321279180800, 140321279315967,
+STORE, 140321279315968, 140321346289663,
+SNULL, 140321086214144, 140321136570367,
+STORE, 140321136570368, 140321144963071,
+STORE, 140321086214144, 140321136570367,
+SNULL, 140321136574463, 140321144963071,
+STORE, 140321136570368, 140321136574463,
+STORE, 140321136574464, 140321144963071,
+SNULL, 140321212071935, 140321279180799,
+STORE, 140321144963072, 140321212071935,
+STORE, 140321212071936, 140321279180799,
+ERASE, 140321212071936, 140321279180799,
+SNULL, 140321145098239, 140321212071935,
+STORE, 140321144963072, 140321145098239,
+STORE, 140321145098240, 140321212071935,
+SNULL, 140320876662784, 140321010745343,
+STORE, 140321010745344, 140321077854207,
+STORE, 140320876662784, 140321010745343,
+SNULL, 140321010880511, 140321077854207,
+STORE, 140321010745344, 140321010880511,
+STORE, 140321010880512, 140321077854207,
+SNULL, 140321354616832, 140321379794943,
+STORE, 140321379794944, 140321404973055,
+STORE, 140321354616832, 140321379794943,
+SNULL, 140321379799039, 140321404973055,
+STORE, 140321379794944, 140321379799039,
+STORE, 140321379799040, 140321404973055,
+SNULL, 140320876662784, 140320943636479,
+STORE, 140320943636480, 140321010745343,
+STORE, 140320876662784, 140320943636479,
+SNULL, 140320943771647, 140321010745343,
+STORE, 140320943636480, 140320943771647,
+STORE, 140320943771648, 140321010745343,
+SNULL, 140320809418751, 140320876527615,
+STORE, 140320675336192, 140320809418751,
+STORE, 140320809418752, 140320876527615,
+ERASE, 140320809418752, 140320876527615,
+SNULL, 140320675336192, 140320742309887,
+STORE, 140320742309888, 140320809418751,
+STORE, 140320675336192, 140320742309887,
+SNULL, 140320742445055, 140320809418751,
+STORE, 140320742309888, 140320742445055,
+STORE, 140320742445056, 140320809418751,
+SNULL, 140320608227327, 140320675201023,
+STORE, 140320608092160, 140320608227327,
+STORE, 140320608227328, 140320675201023,
+SNULL, 140320457089024, 140320473874431,
+STORE, 140320473874432, 140320591306751,
+STORE, 140320457089024, 140320473874431,
+ERASE, 140320457089024, 140320473874431,
+SNULL, 140320540983295, 140320591306751,
+STORE, 140320473874432, 140320540983295,
+STORE, 140320540983296, 140320591306751,
+ERASE, 140320540983296, 140320591306751,
+SNULL, 140320314478592, 140320339656703,
+STORE, 140320339656704, 140320448696319,
+STORE, 140320314478592, 140320339656703,
+ERASE, 140320314478592, 140320339656703,
+SNULL, 140321086214144, 140321128177663,
+STORE, 140321128177664, 140321136570367,
+STORE, 140321086214144, 140321128177663,
+SNULL, 140321128181759, 140321136570367,
+STORE, 140321128177664, 140321128181759,
+STORE, 140321128181760, 140321136570367,
+SNULL, 140321354616832, 140321371402239,
+STORE, 140321371402240, 140321379794943,
+STORE, 140321354616832, 140321371402239,
+SNULL, 140321371406335, 140321379794943,
+STORE, 140321371402240, 140321371406335,
+STORE, 140321371406336, 140321379794943,
+SNULL, 140320591310847, 140320608092159,
+STORE, 140320591306752, 140320591310847,
+STORE, 140320591310848, 140320608092159,
+SNULL, 140321354616832, 140321363009535,
+STORE, 140321363009536, 140321371402239,
+STORE, 140321354616832, 140321363009535,
+SNULL, 140321363013631, 140321371402239,
+STORE, 140321363009536, 140321363013631,
+STORE, 140321363013632, 140321371402239,
+SNULL, 140321086214144, 140321119784959,
+STORE, 140321119784960, 140321128177663,
+STORE, 140321086214144, 140321119784959,
+SNULL, 140321119789055, 140321128177663,
+STORE, 140321119784960, 140321119789055,
+STORE, 140321119789056, 140321128177663,
+SNULL, 140321086218239, 140321119784959,
+STORE, 140321086214144, 140321086218239,
+STORE, 140321086218240, 140321119784959,
+SNULL, 140321086218240, 140321094606847,
+STORE, 140321094606848, 140321119784959,
+STORE, 140321086218240, 140321094606847,
+SNULL, 140321094610943, 140321119784959,
+STORE, 140321094606848, 140321094610943,
+STORE, 140321094610944, 140321119784959,
+SNULL, 140320474009599, 140320540983295,
+STORE, 140320473874432, 140320474009599,
+STORE, 140320474009600, 140320540983295,
+SNULL, 140320406765567, 140320448696319,
+STORE, 140320339656704, 140320406765567,
+STORE, 140320406765568, 140320448696319,
+ERASE, 140320406765568, 140320448696319,
+SNULL, 140320339791871, 140320406765567,
+STORE, 140320339656704, 140320339791871,
+STORE, 140320339791872, 140320406765567,
+STORE, 140321270788096, 140321279180799,
+STORE, 140321262395392, 140321279180799,
+STORE, 140321254002688, 140321279180799,
+SNULL, 140321254002688, 140321262395391,
+STORE, 140321262395392, 140321279180799,
+STORE, 140321254002688, 140321262395391,
+SNULL, 140321262399487, 140321279180799,
+STORE, 140321262395392, 140321262399487,
+STORE, 140321262399488, 140321279180799,
+STORE, 140321245609984, 140321262395391,
+STORE, 140321237217280, 140321262395391,
+SNULL, 140321237217280, 140321245609983,
+STORE, 140321245609984, 140321262395391,
+STORE, 140321237217280, 140321245609983,
+SNULL, 140321245614079, 140321262395391,
+STORE, 140321245609984, 140321245614079,
+STORE, 140321245614080, 140321262395391,
+SNULL, 140321379799040, 140321388187647,
+STORE, 140321388187648, 140321404973055,
+STORE, 140321379799040, 140321388187647,
+SNULL, 140321388191743, 140321404973055,
+STORE, 140321388187648, 140321388191743,
+STORE, 140321388191744, 140321404973055,
+SNULL, 140321354620927, 140321363009535,
+STORE, 140321354616832, 140321354620927,
+STORE, 140321354620928, 140321363009535,
+SNULL, 140321388191744, 140321396580351,
+STORE, 140321396580352, 140321404973055,
+STORE, 140321388191744, 140321396580351,
+SNULL, 140321396584447, 140321404973055,
+STORE, 140321396580352, 140321396584447,
+STORE, 140321396584448, 140321404973055,
+SNULL, 140321094610944, 140321111392255,
+STORE, 140321111392256, 140321119784959,
+STORE, 140321094610944, 140321111392255,
+SNULL, 140321111396351, 140321119784959,
+STORE, 140321111392256, 140321111396351,
+STORE, 140321111396352, 140321119784959,
+STORE, 140321228824576, 140321245609983,
+SNULL, 140321094610944, 140321102999551,
+STORE, 140321102999552, 140321111392255,
+STORE, 140321094610944, 140321102999551,
+SNULL, 140321103003647, 140321111392255,
+STORE, 140321102999552, 140321103003647,
+STORE, 140321103003648, 140321111392255,
+STORE, 140321220431872, 140321245609983,
+SNULL, 140321220435967, 140321245609983,
+STORE, 140321220431872, 140321220435967,
+STORE, 140321220435968, 140321245609983,
+STORE, 140320868134912, 140320876527615,
+SNULL, 140320868139007, 140320876527615,
+STORE, 140320868134912, 140320868139007,
+STORE, 140320868139008, 140320876527615,
+SNULL, 140320591310848, 140320599699455,
+STORE, 140320599699456, 140320608092159,
+STORE, 140320591310848, 140320599699455,
+SNULL, 140320599703551, 140320608092159,
+STORE, 140320599699456, 140320599703551,
+STORE, 140320599703552, 140320608092159,
+STORE, 140320859742208, 140320868134911,
+SNULL, 140321262399488, 140321270788095,
+STORE, 140321270788096, 140321279180799,
+STORE, 140321262399488, 140321270788095,
+SNULL, 140321270792191, 140321279180799,
+STORE, 140321270788096, 140321270792191,
+STORE, 140321270792192, 140321279180799,
+STORE, 140320851349504, 140320868134911,
+STORE, 140320842956800, 140320868134911,
+STORE, 140320834564096, 140320868134911,
+STORE, 140320826171392, 140320868134911,
+SNULL, 140320826171392, 140320834564095,
+STORE, 140320834564096, 140320868134911,
+STORE, 140320826171392, 140320834564095,
+SNULL, 140320834568191, 140320868134911,
+STORE, 140320834564096, 140320834568191,
+STORE, 140320834568192, 140320868134911,
+SNULL, 140321220435968, 140321228824575,
+STORE, 140321228824576, 140321245609983,
+STORE, 140321220435968, 140321228824575,
+SNULL, 140321228828671, 140321245609983,
+STORE, 140321228824576, 140321228828671,
+STORE, 140321228828672, 140321245609983,
+STORE, 140320817778688, 140320834564095,
+SNULL, 140320817782783, 140320834564095,
+STORE, 140320817778688, 140320817782783,
+STORE, 140320817782784, 140320834564095,
+STORE, 140320582914048, 140320591306751,
+SNULL, 140321228828672, 140321237217279,
+STORE, 140321237217280, 140321245609983,
+STORE, 140321228828672, 140321237217279,
+SNULL, 140321237221375, 140321245609983,
+STORE, 140321237217280, 140321237221375,
+STORE, 140321237221376, 140321245609983,
+SNULL, 140320448700415, 140320457089023,
+STORE, 140320448696320, 140320448700415,
+STORE, 140320448700416, 140320457089023,
+SNULL, 140321245614080, 140321254002687,
+STORE, 140321254002688, 140321262395391,
+STORE, 140321245614080, 140321254002687,
+SNULL, 140321254006783, 140321262395391,
+STORE, 140321254002688, 140321254006783,
+STORE, 140321254006784, 140321262395391,
+STORE, 140320574521344, 140320591306751,
+SNULL, 140320574525439, 140320591306751,
+STORE, 140320574521344, 140320574525439,
+STORE, 140320574525440, 140320591306751,
+STORE, 140320566128640, 140320574521343,
+SNULL, 140320566132735, 140320574521343,
+STORE, 140320566128640, 140320566132735,
+STORE, 140320566132736, 140320574521343,
+SNULL, 140320574525440, 140320582914047,
+STORE, 140320582914048, 140320591306751,
+STORE, 140320574525440, 140320582914047,
+SNULL, 140320582918143, 140320591306751,
+STORE, 140320582914048, 140320582918143,
+STORE, 140320582918144, 140320591306751,
+STORE, 140320557735936, 140320566128639,
+SNULL, 140320557740031, 140320566128639,
+STORE, 140320557735936, 140320557740031,
+STORE, 140320557740032, 140320566128639,
+STORE, 140320549343232, 140320557735935,
+STORE, 140320465481728, 140320473874431,
+STORE, 140320448700416, 140320473874431,
+SNULL, 140320834568192, 140320859742207,
+STORE, 140320859742208, 140320868134911,
+STORE, 140320834568192, 140320859742207,
+SNULL, 140320859746303, 140320868134911,
+STORE, 140320859742208, 140320859746303,
+STORE, 140320859746304, 140320868134911,
+STORE, 140320440303616, 140320448696319,
+STORE, 140320431910912, 140320448696319,
+SNULL, 140320834568192, 140320851349503,
+STORE, 140320851349504, 140320859742207,
+STORE, 140320834568192, 140320851349503,
+SNULL, 140320851353599, 140320859742207,
+STORE, 140320851349504, 140320851353599,
+STORE, 140320851353600, 140320859742207,
+SNULL, 140320817782784, 140320826171391,
+STORE, 140320826171392, 140320834564095,
+STORE, 140320817782784, 140320826171391,
+SNULL, 140320826175487, 140320834564095,
+STORE, 140320826171392, 140320826175487,
+STORE, 140320826175488, 140320834564095,
+SNULL, 140320834568192, 140320842956799,
+STORE, 140320842956800, 140320851349503,
+STORE, 140320834568192, 140320842956799,
+SNULL, 140320842960895, 140320851349503,
+STORE, 140320842956800, 140320842960895,
+STORE, 140320842960896, 140320851349503,
+STORE, 140320423518208, 140320448696319,
+SNULL, 140320423522303, 140320448696319,
+STORE, 140320423518208, 140320423522303,
+STORE, 140320423522304, 140320448696319,
+STORE, 140320415125504, 140320423518207,
+STORE, 140320331264000, 140320339656703,
+STORE, 140320322871296, 140320339656703,
+STORE, 140320314478592, 140320339656703,
+SNULL, 140320314482687, 140320339656703,
+STORE, 140320314478592, 140320314482687,
+STORE, 140320314482688, 140320339656703,
+STORE, 140320306085888, 140320314478591,
+SNULL, 140320306089983, 140320314478591,
+STORE, 140320306085888, 140320306089983,
+STORE, 140320306089984, 140320314478591,
+STORE, 140320297693184, 140320306085887,
+SNULL, 140320297697279, 140320306085887,
+STORE, 140320297693184, 140320297697279,
+STORE, 140320297697280, 140320306085887,
+STORE, 140320289300480, 140320297693183,
+STORE, 140320280907776, 140320297693183,
+SNULL, 140320280911871, 140320297693183,
+STORE, 140320280907776, 140320280911871,
+STORE, 140320280911872, 140320297693183,
+SNULL, 140320423522304, 140320431910911,
+STORE, 140320431910912, 140320448696319,
+STORE, 140320423522304, 140320431910911,
+SNULL, 140320431915007, 140320448696319,
+STORE, 140320431910912, 140320431915007,
+STORE, 140320431915008, 140320448696319,
+SNULL, 140320549347327, 140320557735935,
+STORE, 140320549343232, 140320549347327,
+STORE, 140320549347328, 140320557735935,
+STORE, 140320272515072, 140320280907775,
+SNULL, 140320448700416, 140320457089023,
+STORE, 140320457089024, 140320473874431,
+STORE, 140320448700416, 140320457089023,
+SNULL, 140320457093119, 140320473874431,
+STORE, 140320457089024, 140320457093119,
+STORE, 140320457093120, 140320473874431,
+STORE, 140320264122368, 140320280907775,
+SNULL, 140320457093120, 140320465481727,
+STORE, 140320465481728, 140320473874431,
+STORE, 140320457093120, 140320465481727,
+SNULL, 140320465485823, 140320473874431,
+STORE, 140320465481728, 140320465485823,
+STORE, 140320465485824, 140320473874431,
+SNULL, 140320431915008, 140320440303615,
+STORE, 140320440303616, 140320448696319,
+STORE, 140320431915008, 140320440303615,
+SNULL, 140320440307711, 140320448696319,
+STORE, 140320440303616, 140320440307711,
+STORE, 140320440307712, 140320448696319,
+STORE, 140320255729664, 140320280907775,
+STORE, 140320247336960, 140320280907775,
+SNULL, 140320247341055, 140320280907775,
+STORE, 140320247336960, 140320247341055,
+STORE, 140320247341056, 140320280907775,
+STORE, 140320238944256, 140320247336959,
+STORE, 140320230551552, 140320247336959,
+SNULL, 140320230551552, 140320238944255,
+STORE, 140320238944256, 140320247336959,
+STORE, 140320230551552, 140320238944255,
+SNULL, 140320238948351, 140320247336959,
+STORE, 140320238944256, 140320238948351,
+STORE, 140320238948352, 140320247336959,
+SNULL, 140320314482688, 140320331263999,
+STORE, 140320331264000, 140320339656703,
+STORE, 140320314482688, 140320331263999,
+SNULL, 140320331268095, 140320339656703,
+STORE, 140320331264000, 140320331268095,
+STORE, 140320331268096, 140320339656703,
+SNULL, 140320280911872, 140320289300479,
+STORE, 140320289300480, 140320297693183,
+STORE, 140320280911872, 140320289300479,
+SNULL, 140320289304575, 140320297693183,
+STORE, 140320289300480, 140320289304575,
+STORE, 140320289304576, 140320297693183,
+SNULL, 140320415129599, 140320423518207,
+STORE, 140320415125504, 140320415129599,
+STORE, 140320415129600, 140320423518207,
+STORE, 140320222158848, 140320238944255,
+STORE, 140320213766144, 140320238944255,
+STORE, 140320205373440, 140320238944255,
+SNULL, 140320205377535, 140320238944255,
+STORE, 140320205373440, 140320205377535,
+STORE, 140320205377536, 140320238944255,
+SNULL, 140320314482688, 140320322871295,
+STORE, 140320322871296, 140320331263999,
+STORE, 140320314482688, 140320322871295,
+SNULL, 140320322875391, 140320331263999,
+STORE, 140320322871296, 140320322875391,
+STORE, 140320322875392, 140320331263999,
+SNULL, 140320247341056, 140320272515071,
+STORE, 140320272515072, 140320280907775,
+STORE, 140320247341056, 140320272515071,
+SNULL, 140320272519167, 140320280907775,
+STORE, 140320272515072, 140320272519167,
+STORE, 140320272519168, 140320280907775,
+SNULL, 140320247341056, 140320264122367,
+STORE, 140320264122368, 140320272515071,
+STORE, 140320247341056, 140320264122367,
+SNULL, 140320264126463, 140320272515071,
+STORE, 140320264122368, 140320264126463,
+STORE, 140320264126464, 140320272515071,
+SNULL, 140320205377536, 140320230551551,
+STORE, 140320230551552, 140320238944255,
+STORE, 140320205377536, 140320230551551,
+SNULL, 140320230555647, 140320238944255,
+STORE, 140320230551552, 140320230555647,
+STORE, 140320230555648, 140320238944255,
+STORE, 140320196980736, 140320205373439,
+SNULL, 140320196984831, 140320205373439,
+STORE, 140320196980736, 140320196984831,
+STORE, 140320196984832, 140320205373439,
+STORE, 140320188588032, 140320196980735,
+SNULL, 140320247341056, 140320255729663,
+STORE, 140320255729664, 140320264122367,
+STORE, 140320247341056, 140320255729663,
+SNULL, 140320255733759, 140320264122367,
+STORE, 140320255729664, 140320255733759,
+STORE, 140320255733760, 140320264122367,
+STORE, 140320180195328, 140320196980735,
+SNULL, 140320180199423, 140320196980735,
+STORE, 140320180195328, 140320180199423,
+STORE, 140320180199424, 140320196980735,
+STORE, 140320171802624, 140320180195327,
+STORE, 140320163409920, 140320180195327,
+SNULL, 140320163414015, 140320180195327,
+STORE, 140320163409920, 140320163414015,
+STORE, 140320163414016, 140320180195327,
+SNULL, 140320205377536, 140320222158847,
+STORE, 140320222158848, 140320230551551,
+STORE, 140320205377536, 140320222158847,
+SNULL, 140320222162943, 140320230551551,
+STORE, 140320222158848, 140320222162943,
+STORE, 140320222162944, 140320230551551,
+SNULL, 140320205377536, 140320213766143,
+STORE, 140320213766144, 140320222158847,
+STORE, 140320205377536, 140320213766143,
+SNULL, 140320213770239, 140320222158847,
+STORE, 140320213766144, 140320213770239,
+STORE, 140320213770240, 140320222158847,
+STORE, 140320155017216, 140320163409919,
+SNULL, 140320180199424, 140320188588031,
+STORE, 140320188588032, 140320196980735,
+STORE, 140320180199424, 140320188588031,
+SNULL, 140320188592127, 140320196980735,
+STORE, 140320188588032, 140320188592127,
+STORE, 140320188592128, 140320196980735,
+SNULL, 140320155021311, 140320163409919,
+STORE, 140320155017216, 140320155021311,
+STORE, 140320155021312, 140320163409919,
+SNULL, 140320163414016, 140320171802623,
+STORE, 140320171802624, 140320180195327,
+STORE, 140320163414016, 140320171802623,
+SNULL, 140320171806719, 140320180195327,
+STORE, 140320171802624, 140320171806719,
+STORE, 140320171806720, 140320180195327,
+STORE, 140320146624512, 140320155017215,
+SNULL, 140320146628607, 140320155017215,
+STORE, 140320146624512, 140320146628607,
+STORE, 140320146628608, 140320155017215,
+STORE, 140321937321984, 140321937350655,
+STORE, 140321884942336, 140321887133695,
+SNULL, 140321884942336, 140321885032447,
+STORE, 140321885032448, 140321887133695,
+STORE, 140321884942336, 140321885032447,
+SNULL, 140321887125503, 140321887133695,
+STORE, 140321885032448, 140321887125503,
+STORE, 140321887125504, 140321887133695,
+ERASE, 140321887125504, 140321887133695,
+STORE, 140321887125504, 140321887133695,
+SNULL, 140321887129599, 140321887133695,
+STORE, 140321887125504, 140321887129599,
+STORE, 140321887129600, 140321887133695,
+ERASE, 140321937321984, 140321937350655,
+ERASE, 140321086214144, 140321086218239,
+ERASE, 140321086218240, 140321094606847,
+ERASE, 140321119784960, 140321119789055,
+ERASE, 140321119789056, 140321128177663,
+ERASE, 140321245609984, 140321245614079,
+ERASE, 140321245614080, 140321254002687,
+ERASE, 140320574521344, 140320574525439,
+ERASE, 140320574525440, 140320582914047,
+ERASE, 140320297693184, 140320297697279,
+ERASE, 140320297697280, 140320306085887,
+ERASE, 140321354616832, 140321354620927,
+ERASE, 140321354620928, 140321363009535,
+ERASE, 140320834564096, 140320834568191,
+ERASE, 140320834568192, 140320842956799,
+ERASE, 140320591306752, 140320591310847,
+ERASE, 140320591310848, 140320599699455,
+ERASE, 140321136570368, 140321136574463,
+ERASE, 140321136574464, 140321144963071,
+ERASE, 140321237217280, 140321237221375,
+ERASE, 140321237221376, 140321245609983,
+ERASE, 140321363009536, 140321363013631,
+ERASE, 140321363013632, 140321371402239,
+ERASE, 140320599699456, 140320599703551,
+ERASE, 140320599703552, 140320608092159,
+ERASE, 140321396580352, 140321396584447,
+ERASE, 140321396584448, 140321404973055,
+ERASE, 140320566128640, 140320566132735,
+ERASE, 140320566132736, 140320574521343,
+ERASE, 140321094606848, 140321094610943,
+ERASE, 140321094610944, 140321102999551,
+ERASE, 140320582914048, 140320582918143,
+ERASE, 140320582918144, 140320591306751,
+ERASE, 140320289300480, 140320289304575,
+ERASE, 140320289304576, 140320297693183,
+ERASE, 140320163409920, 140320163414015,
+ };
+ static const unsigned long set41[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140728157171712, 140737488351231,
+SNULL, 140728157175807, 140737488351231,
+STORE, 140728157171712, 140728157175807,
+STORE, 140728157040640, 140728157175807,
+STORE, 94376106364928, 94376108613631,
+SNULL, 94376106487807, 94376108613631,
+STORE, 94376106364928, 94376106487807,
+STORE, 94376106487808, 94376108613631,
+SNULL, 94376106487808, 94376108613631,
+STORE, 94376108584960, 94376108593151,
+STORE, 94376108593152, 94376108613631,
+STORE, 140113496432640, 140113498685439,
+SNULL, 140113496575999, 140113498685439,
+STORE, 140113496432640, 140113496575999,
+STORE, 140113496576000, 140113498685439,
+SNULL, 140113496576000, 140113498685439,
+STORE, 140113498673152, 140113498681343,
+STORE, 140113498681344, 140113498685439,
+STORE, 140728157609984, 140728157618175,
+STORE, 140728157593600, 140728157609983,
+STORE, 140113498636288, 140113498673151,
+STORE, 140113498628096, 140113498636287,
+STORE, 140113492635648, 140113496432639,
+SNULL, 140113492635648, 140113494294527,
+STORE, 140113494294528, 140113496432639,
+STORE, 140113492635648, 140113494294527,
+SNULL, 140113496391679, 140113496432639,
+STORE, 140113494294528, 140113496391679,
+STORE, 140113496391680, 140113496432639,
+SNULL, 140113496391680, 140113496416255,
+STORE, 140113496416256, 140113496432639,
+STORE, 140113496391680, 140113496416255,
+SNULL, 140113496391680, 140113496416255,
+STORE, 140113496391680, 140113496416255,
+SNULL, 140113496416256, 140113496432639,
+STORE, 140113496416256, 140113496432639,
+SNULL, 140113496408063, 140113496416255,
+STORE, 140113496391680, 140113496408063,
+STORE, 140113496408064, 140113496416255,
+SNULL, 94376108589055, 94376108593151,
+STORE, 94376108584960, 94376108589055,
+STORE, 94376108589056, 94376108593151,
+SNULL, 140113498677247, 140113498681343,
+STORE, 140113498673152, 140113498677247,
+STORE, 140113498677248, 140113498681343,
+SNULL, 140113498636288, 140113498673151,
+STORE, 94376135090176, 94376135094271,
+STORE, 94376135090176, 94376135098367,
+STORE, 94376139288576, 94376139292671,
+STORE, 94376143482880, 94376143486975,
+STORE, 94376147677184, 94376147681279,
+STORE, 94376151871488, 94376151875583,
+STORE, 94376156065792, 94376156069887,
+STORE, 94376160260096, 94376160264191,
+STORE, 94376164454400, 94376164458495,
+STORE, 94376168648704, 94376168652799,
+STORE, 94376172843008, 94376172847103,
+STORE, 94376177037312, 94376177041407,
+STORE, 94376181231616, 94376181235711,
+STORE, 94376185425920, 94376185430015,
+STORE, 94376189620224, 94376189624319,
+STORE, 94376193814528, 94376193818623,
+STORE, 94376198008832, 94376198012927,
+STORE, 94376202203136, 94376202207231,
+STORE, 94376206397440, 94376206401535,
+STORE, 94376210591744, 94376210595839,
+STORE, 94376214786048, 94376214790143,
+STORE, 94376218980352, 94376218984447,
+STORE, 94376223174656, 94376223178751,
+STORE, 94376227368960, 94376227373055,
+STORE, 94376231563264, 94376231567359,
+STORE, 94376235757568, 94376235761663,
+STORE, 94376239951872, 94376239955967,
+STORE, 94376244146176, 94376244150271,
+STORE, 94376248340480, 94376248344575,
+STORE, 94376252534784, 94376252538879,
+STORE, 94376256729088, 94376256733183,
+STORE, 94376260923392, 94376260927487,
+STORE, 94376265117696, 94376265121791,
+STORE, 94376269312000, 94376269316095,
+STORE, 94376273506304, 94376273510399,
+STORE, 94376277700608, 94376277704703,
+STORE, 94376281894912, 94376281899007,
+STORE, 94376286089216, 94376286093311,
+STORE, 94376290283520, 94376290287615,
+STORE, 94376294477824, 94376294481919,
+STORE, 94376298672128, 94376298676223,
+STORE, 94376302866432, 94376302870527,
+STORE, 94376307060736, 94376307064831,
+STORE, 94376311255040, 94376311259135,
+STORE, 94376315449344, 94376315453439,
+STORE, 94376319643648, 94376319647743,
+STORE, 94376323837952, 94376323842047,
+STORE, 94376328032256, 94376328036351,
+STORE, 94376332226560, 94376332230655,
+STORE, 94376336420864, 94376336424959,
+STORE, 94376340615168, 94376340619263,
+STORE, 94376344809472, 94376344813567,
+STORE, 94376349003776, 94376349007871,
+STORE, 94376353198080, 94376353202175,
+STORE, 94376357392384, 94376357396479,
+STORE, 94376361586688, 94376361590783,
+STORE, 94376365780992, 94376365785087,
+STORE, 94376369975296, 94376369979391,
+STORE, 94376374169600, 94376374173695,
+STORE, 94376378363904, 94376378367999,
+STORE, 94376382558208, 94376382562303,
+STORE, 94376386752512, 94376386756607,
+STORE, 94376390946816, 94376390950911,
+STORE, 94376395141120, 94376395145215,
+STORE, 94376399335424, 94376399339519,
+STORE, 94376403529728, 94376403533823,
+STORE, 94376407724032, 94376407728127,
+STORE, 94376411918336, 94376411922431,
+STORE, 94376416112640, 94376416116735,
+STORE, 94376420306944, 94376420311039,
+STORE, 94376424501248, 94376424505343,
+STORE, 94376428695552, 94376428699647,
+STORE, 94376432889856, 94376432893951,
+STORE, 94376437084160, 94376437088255,
+STORE, 94376441278464, 94376441282559,
+STORE, 94376445472768, 94376445476863,
+STORE, 94376449667072, 94376449671167,
+STORE, 94376453861376, 94376453865471,
+STORE, 94376458055680, 94376458059775,
+STORE, 94376462249984, 94376462254079,
+STORE, 94376466444288, 94376466448383,
+STORE, 94376470638592, 94376470642687,
+STORE, 94376474832896, 94376474836991,
+STORE, 94376479027200, 94376479031295,
+STORE, 94376483221504, 94376483225599,
+STORE, 94376487415808, 94376487419903,
+STORE, 94376491610112, 94376491614207,
+STORE, 94376495804416, 94376495808511,
+STORE, 94376499998720, 94376500002815,
+STORE, 94376504193024, 94376504197119,
+STORE, 94376508387328, 94376508391423,
+STORE, 94376512581632, 94376512585727,
+STORE, 94376516775936, 94376516780031,
+STORE, 94376520970240, 94376520974335,
+STORE, 94376525164544, 94376525168639,
+STORE, 94376529358848, 94376529362943,
+STORE, 94376533553152, 94376533557247,
+STORE, 94376537747456, 94376537751551,
+STORE, 94376541941760, 94376541945855,
+STORE, 94376546136064, 94376546140159,
+STORE, 94376550330368, 94376550334463,
+STORE, 94376554524672, 94376554528767,
+STORE, 94376558718976, 94376558723071,
+STORE, 94376562913280, 94376562917375,
+STORE, 94376567107584, 94376567111679,
+STORE, 94376571301888, 94376571305983,
+STORE, 94376575496192, 94376575500287,
+STORE, 94376579690496, 94376579694591,
+STORE, 94376583884800, 94376583888895,
+STORE, 94376588079104, 94376588083199,
+STORE, 94376592273408, 94376592277503,
+STORE, 94376596467712, 94376596471807,
+STORE, 94376600662016, 94376600666111,
+STORE, 94376604856320, 94376604860415,
+STORE, 94376609050624, 94376609054719,
+STORE, 94376613244928, 94376613249023,
+STORE, 94376617439232, 94376617443327,
+STORE, 94376621633536, 94376621637631,
+STORE, 94376625827840, 94376625831935,
+STORE, 94376630022144, 94376630026239,
+STORE, 94376634216448, 94376634220543,
+STORE, 94376638410752, 94376638414847,
+STORE, 94376642605056, 94376642609151,
+STORE, 94376646799360, 94376646803455,
+STORE, 94376650993664, 94376650997759,
+STORE, 94376655187968, 94376655192063,
+STORE, 94376659382272, 94376659386367,
+STORE, 94376663576576, 94376663580671,
+STORE, 94376667770880, 94376667774975,
+STORE, 94376671965184, 94376671969279,
+STORE, 94376676159488, 94376676163583,
+STORE, 94376680353792, 94376680357887,
+STORE, 94376684548096, 94376684552191,
+STORE, 94376688742400, 94376688746495,
+STORE, 94376692936704, 94376692940799,
+STORE, 94376697131008, 94376697135103,
+STORE, 94376701325312, 94376701329407,
+STORE, 94376705519616, 94376705523711,
+STORE, 94376709713920, 94376709718015,
+STORE, 94376713908224, 94376713912319,
+STORE, 94376718102528, 94376718106623,
+STORE, 94376722296832, 94376722300927,
+STORE, 94376726491136, 94376726495231,
+STORE, 94376730685440, 94376730689535,
+STORE, 94376734879744, 94376734883839,
+STORE, 94376739074048, 94376739078143,
+STORE, 94376743268352, 94376743272447,
+STORE, 94376747462656, 94376747466751,
+STORE, 94376751656960, 94376751661055,
+STORE, 94376755851264, 94376755855359,
+STORE, 94376760045568, 94376760049663,
+STORE, 94376764239872, 94376764243967,
+STORE, 94376768434176, 94376768438271,
+STORE, 94376772628480, 94376772632575,
+STORE, 94376776822784, 94376776826879,
+STORE, 94376781017088, 94376781021183,
+STORE, 94376785211392, 94376785215487,
+STORE, 94376789405696, 94376789409791,
+STORE, 94376793600000, 94376793604095,
+STORE, 94376797794304, 94376797798399,
+STORE, 94376801988608, 94376801992703,
+STORE, 94376806182912, 94376806187007,
+STORE, 94376810377216, 94376810381311,
+STORE, 94376814571520, 94376814575615,
+STORE, 94376818765824, 94376818769919,
+STORE, 94376822960128, 94376822964223,
+STORE, 94376827154432, 94376827158527,
+STORE, 94376831348736, 94376831352831,
+STORE, 94376835543040, 94376835547135,
+STORE, 94376839737344, 94376839741439,
+STORE, 94376843931648, 94376843935743,
+STORE, 94376848125952, 94376848130047,
+STORE, 94376852320256, 94376852324351,
+STORE, 94376856514560, 94376856518655,
+STORE, 94376860708864, 94376860712959,
+STORE, 94376864903168, 94376864907263,
+STORE, 94376869097472, 94376869101567,
+STORE, 94376873291776, 94376873295871,
+STORE, 94376877486080, 94376877490175,
+STORE, 94376881680384, 94376881684479,
+STORE, 94376885874688, 94376885878783,
+STORE, 94376890068992, 94376890073087,
+STORE, 94376894263296, 94376894267391,
+STORE, 94376898457600, 94376898461695,
+STORE, 94376902651904, 94376902655999,
+STORE, 94376906846208, 94376906850303,
+STORE, 94376911040512, 94376911044607,
+STORE, 94376915234816, 94376915238911,
+STORE, 94376919429120, 94376919433215,
+STORE, 94376923623424, 94376923627519,
+STORE, 94376927817728, 94376927821823,
+STORE, 94376932012032, 94376932016127,
+STORE, 94376936206336, 94376936210431,
+STORE, 94376940400640, 94376940404735,
+STORE, 94376944594944, 94376944599039,
+STORE, 94376948789248, 94376948793343,
+STORE, 94376952983552, 94376952987647,
+STORE, 94376957177856, 94376957181951,
+STORE, 94376961372160, 94376961376255,
+STORE, 94376965566464, 94376965570559,
+STORE, 94376969760768, 94376969764863,
+STORE, 94376973955072, 94376973959167,
+STORE, 94376978149376, 94376978153471,
+STORE, 94376982343680, 94376982347775,
+STORE, 94376986537984, 94376986542079,
+STORE, 94376990732288, 94376990736383,
+STORE, 94376994926592, 94376994930687,
+STORE, 94376999120896, 94376999124991,
+STORE, 94377003315200, 94377003319295,
+STORE, 94377007509504, 94377007513599,
+STORE, 94377011703808, 94377011707903,
+STORE, 94377015898112, 94377015902207,
+STORE, 94377020092416, 94377020096511,
+STORE, 94377024286720, 94377024290815,
+STORE, 94377028481024, 94377028485119,
+STORE, 94377032675328, 94377032679423,
+STORE, 94377036869632, 94377036873727,
+STORE, 94377041063936, 94377041068031,
+STORE, 94377045258240, 94377045262335,
+STORE, 94377049452544, 94377049456639,
+STORE, 94377053646848, 94377053650943,
+STORE, 94377057841152, 94377057845247,
+STORE, 94377062035456, 94377062039551,
+STORE, 94377066229760, 94377066233855,
+STORE, 94377070424064, 94377070428159,
+STORE, 94377074618368, 94377074622463,
+STORE, 94377078812672, 94377078816767,
+STORE, 94377083006976, 94377083011071,
+STORE, 94377087201280, 94377087205375,
+STORE, 94377091395584, 94377091399679,
+STORE, 94377095589888, 94377095593983,
+STORE, 94377099784192, 94377099788287,
+STORE, 94377103978496, 94377103982591,
+STORE, 94377108172800, 94377108176895,
+STORE, 94377112367104, 94377112371199,
+STORE, 94377116561408, 94377116565503,
+STORE, 94377120755712, 94377120759807,
+STORE, 94377124950016, 94377124954111,
+STORE, 94377129144320, 94377129148415,
+STORE, 94377133338624, 94377133342719,
+STORE, 94377137532928, 94377137537023,
+STORE, 94377141727232, 94377141731327,
+STORE, 94377145921536, 94377145925631,
+STORE, 94377150115840, 94377150119935,
+STORE, 94377154310144, 94377154314239,
+STORE, 94377158504448, 94377158508543,
+STORE, 94377162698752, 94377162702847,
+STORE, 94377166893056, 94377166897151,
+STORE, 94377171087360, 94377171091455,
+STORE, 94377175281664, 94377175285759,
+STORE, 94377179475968, 94377179480063,
+STORE, 94377183670272, 94377183674367,
+STORE, 94377187864576, 94377187868671,
+STORE, 94377192058880, 94377192062975,
+STORE, 94377196253184, 94377196257279,
+STORE, 94377200447488, 94377200451583,
+STORE, 94377204641792, 94377204645887,
+SNULL, 94376135094271, 94376135098367,
+STORE, 94376135090176, 94376135094271,
+STORE, 94376135094272, 94376135098367,
+SNULL, 94376135094272, 94377208836095,
+ };
+ static const unsigned long set42[] = {
+STORE, 314572800, 1388314623,
+STORE, 1462157312, 1462169599,
+STORE, 1462169600, 1462185983,
+STORE, 1462185984, 1462190079,
+STORE, 1462190080, 1462194175,
+STORE, 1462194176, 1462198271,
+STORE, 1879986176, 1881800703,
+STORE, 1881800704, 1882034175,
+STORE, 1882034176, 1882193919,
+STORE, 1882193920, 1882406911,
+STORE, 1882406912, 1882451967,
+STORE, 1882451968, 1882996735,
+STORE, 1882996736, 1885892607,
+STORE, 1885892608, 1885896703,
+STORE, 1885896704, 1885904895,
+STORE, 1885904896, 1885908991,
+STORE, 1885908992, 1885913087,
+STORE, 1885913088, 1885966335,
+STORE, 1885966336, 1886232575,
+STORE, 1886232576, 1886236671,
+STORE, 1886236672, 1886240767,
+STORE, 1886240768, 1886244863,
+STORE, 1886244864, 1886248959,
+STORE, 1886248960, 1886294015,
+STORE, 1886294016, 1886494719,
+STORE, 1886494720, 1886498815,
+STORE, 1886498816, 1886502911,
+STORE, 1886502912, 1886507007,
+STORE, 1886507008, 1886511103,
+STORE, 1886511104, 1886556159,
+STORE, 1886556160, 1886629887,
+STORE, 1886629888, 1886633983,
+STORE, 1886633984, 1886638079,
+STORE, 1886638080, 1886642175,
+STORE, 1886642176, 1886646271,
+STORE, 1886646272, 1886666751,
+STORE, 1886666752, 1886670847,
+STORE, 1886670848, 1886674943,
+STORE, 1886674944, 1886679039,
+STORE, 1886679040, 1895419903,
+STORE, 1895419904, 1895550975,
+STORE, 1895550976, 1896148991,
+STORE, 1896148992, 1897189375,
+STORE, 1897189376, 1897701375,
+STORE, 1897701376, 1897803775,
+STORE, 1897803776, 1897816063,
+STORE, 1897816064, 1899913215,
+STORE, 1899913216, 1909379071,
+STORE, 1909379072, 1909387263,
+STORE, 1909387264, 1909391359,
+STORE, 1909391360, 1909432319,
+STORE, 1909432320, 1909436415,
+STORE, 1909436416, 1909440511,
+STORE, 1909440512, 1909460991,
+STORE, 1909460992, 1909547007,
+STORE, 1909547008, 1909551103,
+STORE, 1909551104, 1909555199,
+STORE, 1909555200, 1909559295,
+STORE, 1909559296, 1909563391,
+STORE, 1909563392, 1909739519,
+STORE, 1909739520, 1910566911,
+STORE, 1910566912, 1910571007,
+STORE, 1910571008, 1910575103,
+STORE, 1910575104, 1910579199,
+STORE, 1910579200, 1910583295,
+STORE, 1910583296, 1910587391,
+STORE, 1910587392, 1910620159,
+STORE, 1910620160, 1910624255,
+STORE, 1910624256, 1910628351,
+STORE, 1910628352, 1910632447,
+STORE, 1910632448, 1910652927,
+STORE, 1910652928, 1910657023,
+STORE, 1910657024, 1910661119,
+STORE, 1910661120, 1910665215,
+STORE, 1910665216, 1910669311,
+STORE, 1910669312, 1910677503,
+STORE, 1910677504, 1910681599,
+STORE, 1910681600, 1910685695,
+STORE, 1910685696, 1910689791,
+STORE, 1910689792, 1910697983,
+STORE, 1910697984, 1910702079,
+STORE, 1910702080, 1910706175,
+STORE, 1910706176, 1910710271,
+STORE, 1910710272, 1914093567,
+STORE, 1914093568, 1914097663,
+STORE, 1914097664, 1969434623,
+STORE, 1969434624, 1977819135,
+STORE, 3290435584, 3426750463,
+STORE, 3426750464, 3426754559,
+STORE, 3426754560, 3426762751,
+STORE, 3426762752, 3426766847,
+STORE, 3426766848, 3426770943,
+STORE, 3427037184, 3427061759,
+STORE, 3427061760, 3427135487,
+STORE, 3427135488, 3427143679,
+STORE, 3427143680, 3427147775,
+STORE, 3427147776, 3427209215,
+STORE, 3427319808, 3432116223,
+STORE, 3432116224, 3450130431,
+STORE, 3450130432, 3451027455,
+STORE, 3451027456, 3451031551,
+STORE, 3451031552, 3451461631,
+STORE, 3451736064, 3456688127,
+STORE, 3456688128, 3475222527,
+STORE, 3475222528, 3476119551,
+STORE, 3476119552, 3476127743,
+STORE, 3476127744, 3476553727,
+STORE, 3476631552, 3477315583,
+STORE, 3477315584, 3479949311,
+STORE, 3479949312, 3480002559,
+STORE, 3480002560, 3480006655,
+STORE, 3480006656, 3480432639,
+STORE, 3480539136, 3480543231,
+STORE, 3480543232, 3480547327,
+STORE, 3480547328, 3480555519,
+STORE, 3480854528, 3480903679,
+STORE, 3480903680, 3480969215,
+STORE, 3480969216, 3480977407,
+STORE, 3480977408, 3480981503,
+STORE, 3481030656, 3481092095,
+STORE, 3481092096, 3481235455,
+STORE, 3481235456, 3481243647,
+STORE, 3481243648, 3481247743,
+STORE, 3481436160, 3481444351,
+STORE, 3481444352, 3481456639,
+STORE, 3481456640, 3481460735,
+STORE, 3481460736, 3481464831,
+STORE, 3481587712, 3481645055,
+STORE, 3481645056, 3481772031,
+STORE, 3481772032, 3481776127,
+STORE, 3481776128, 3481780223,
+STORE, 3481874432, 3481935871,
+STORE, 3481935872, 3482030079,
+STORE, 3482030080, 3482038271,
+STORE, 3482038272, 3482042367,
+STORE, 3482198016, 3482230783,
+STORE, 3482230784, 3482271743,
+STORE, 3482271744, 3482279935,
+STORE, 3482279936, 3482284031,
+STORE, 3482562560, 3482566655,
+STORE, 3482566656, 3482570751,
+STORE, 3482570752, 3482574847,
+STORE, 3482636288, 3482689535,
+STORE, 3482689536, 3482746879,
+STORE, 3482746880, 3482755071,
+STORE, 3482755072, 3482759167,
+STORE, 3482972160, 3483062271,
+STORE, 3483062272, 3483242495,
+STORE, 3483242496, 3483246591,
+STORE, 3483246592, 3483250687,
+STORE, 3483398144, 3483688959,
+STORE, 3483688960, 3484114943,
+STORE, 3484114944, 3484131327,
+STORE, 3484131328, 3484135423,
+STORE, 3484135424, 3484143615,
+STORE, 3484184576, 3484475391,
+STORE, 3484475392, 3485028351,
+STORE, 3485028352, 3485057023,
+STORE, 3485057024, 3485061119,
+STORE, 3485360128, 3485364223,
+STORE, 3485364224, 3485368319,
+STORE, 3485368320, 3485372415,
+STORE, 3485589504, 3485593599,
+STORE, 3485593600, 3485597695,
+STORE, 3485597696, 3485601791,
+STORE, 3485913088, 3485937663,
+STORE, 3485937664, 3485974527,
+STORE, 3485974528, 3485982719,
+STORE, 3485982720, 3485986815,
+STORE, 3486052352, 3486056447,
+STORE, 3486056448, 3486064639,
+STORE, 3486064640, 3486068735,
+STORE, 3486068736, 3486072831,
+STORE, 3486294016, 3486302207,
+STORE, 3486302208, 3486306303,
+STORE, 3486306304, 3486310399,
+STORE, 3486310400, 3486314495,
+STORE, 3486670848, 3486679039,
+STORE, 3486679040, 3486683135,
+STORE, 3486683136, 3486687231,
+STORE, 3486687232, 3486691327,
+STORE, 3486863360, 3486871551,
+STORE, 3486871552, 3486875647,
+STORE, 3486875648, 3486879743,
+STORE, 3486879744, 3486883839,
+STORE, 3487584256, 3522543615,
+STORE, 3522543616, 3523321855,
+STORE, 3523321856, 3523342335,
+STORE, 3523342336, 3523387391,
+STORE, 3523387392, 3523391487,
+STORE, 3523391488, 3523395583,
+STORE, 3523477504, 3523686399,
+STORE, 3523686400, 3523981311,
+STORE, 3523981312, 3523997695,
+STORE, 3523997696, 3524001791,
+STORE, 3524177920, 3525013503,
+STORE, 3525013504, 3526582271,
+STORE, 3526582272, 3526606847,
+STORE, 3526606848, 3526610943,
+STORE, 3526610944, 3526615039,
+STORE, 3526672384, 3526746111,
+STORE, 3526746112, 3526860799,
+STORE, 3526860800, 3526868991,
+STORE, 3526868992, 3526873087,
+STORE, 3527000064, 3527475199,
+STORE, 3527475200, 3527479295,
+STORE, 3527479296, 3527573503,
+STORE, 3527573504, 3527581695,
+STORE, 3527581696, 3527585791,
+STORE, 3527585792, 3527606271,
+STORE, 3527909376, 3527913471,
+STORE, 3527913472, 3527917567,
+STORE, 3527917568, 3527921663,
+STORE, 3527950336, 3528011775,
+STORE, 3528011776, 3528093695,
+STORE, 3528093696, 3528101887,
+STORE, 3528101888, 3528105983,
+STORE, 3528228864, 3528241151,
+STORE, 3528241152, 3528261631,
+STORE, 3528261632, 3528265727,
+STORE, 3528273920, 3528593407,
+STORE, 3528593408, 3528609791,
+STORE, 3528609792, 3528638463,
+STORE, 3528638464, 3528642559,
+STORE, 3528642560, 3528646655,
+STORE, 3528880128, 3528912895,
+STORE, 3528912896, 3528962047,
+STORE, 3528962048, 3528966143,
+STORE, 3528966144, 3528970239,
+STORE, 3528982528, 3530293247,
+STORE, 3530366976, 3530825727,
+STORE, 3530825728, 3531317247,
+STORE, 3531317248, 3541041151,
+STORE, 3541041152, 3541303295,
+STORE, 3541430272, 3566206975,
+STORE, 3566206976, 3566993407,
+STORE, 3567239168, 3587571711,
+STORE, 3587571712, 3588284415,
+STORE, 3588284416, 3588661247,
+STORE, 3588661248, 3589066751,
+STORE, 3589066752, 3589574655,
+STORE, 3589574656, 3590078463,
+STORE, 3590078464, 3590373375,
+STORE, 3590373376, 3590668287,
+STORE, 3590668288, 3590963199,
+STORE, 3590963200, 3591294975,
+STORE, 3591294976, 3591602175,
+STORE, 3591602176, 3591933951,
+STORE, 3591933952, 3592241151,
+STORE, 3592241152, 3592572927,
+STORE, 3592572928, 3592876031,
+STORE, 3592876032, 3593211903,
+STORE, 3593211904, 3593547775,
+STORE, 3593547776, 3593650175,
+STORE, 3593650176, 3593928703,
+STORE, 3593928704, 3593936895,
+STORE, 3593936896, 3593940991,
+STORE, 3594006528, 3594301439,
+STORE, 3594301440, 3594739711,
+STORE, 3594739712, 3594756095,
+STORE, 3594756096, 3594760191,
+STORE, 3594760192, 3594768383,
+STORE, 3594952704, 3595051007,
+STORE, 3595051008, 3595223039,
+STORE, 3595223040, 3595227135,
+STORE, 3595227136, 3595235327,
+STORE, 3595431936, 3595775999,
+STORE, 3595776000, 3596701695,
+STORE, 3596701696, 3596742655,
+STORE, 3596742656, 3596746751,
+STORE, 3596746752, 3596750847,
+STORE, 3596767232, 3597070335,
+STORE, 3597070336, 3597402111,
+STORE, 3597402112, 3598188543,
+STORE, 3598262272, 3623428095,
+STORE, 3623428096, 3623432191,
+STORE, 3623432192, 3623436287,
+STORE, 3623436288, 3623440383,
+STORE, 3623616512, 3623878655,
+STORE, 3624169472, 3624300543,
+STORE, 3627524096, 3628523519,
+STORE, 3628523520, 3629522943,
+STORE, 3696631808, 3730186239,
+STORE, 3730186240, 3763740671,
+STORE, 3763740672, 3764027391,
+STORE, 3764027392, 3765133311,
+STORE, 3765133312, 3765145599,
+STORE, 3765145600, 3765149695,
+STORE, 3765178368, 3766022143,
+STORE, 3766022144, 3768791039,
+STORE, 3768791040, 3768840191,
+STORE, 3768840192, 3768844287,
+STORE, 3768897536, 3768913919,
+STORE, 3768913920, 3768934399,
+STORE, 3768934400, 3768938495,
+STORE, 3769016320, 3769147391,
+STORE, 3769147392, 3769233407,
+STORE, 3769233408, 3769356287,
+STORE, 3769356288, 3769360383,
+STORE, 3769360384, 3769368575,
+STORE, 3769376768, 3794542591,
+STORE, 3794542592, 3794599935,
+STORE, 3794599936, 3794731007,
+STORE, 3794731008, 3794735103,
+STORE, 3794735104, 3794743295,
+STORE, 3794849792, 3794980863,
+STORE, 3794980864, 3794984959,
+STORE, 3794984960, 3794989055,
+STORE, 3794989056, 3794993151,
+STORE, 3794993152, 3794997247,
+STORE, 3795103744, 3795128319,
+STORE, 3795128320, 3795165183,
+STORE, 3795165184, 3795169279,
+STORE, 3795169280, 3795173375,
+STORE, 3795210240, 3795357695,
+STORE, 3795357696, 3795365887,
+STORE, 3795365888, 3795374079,
+STORE, 3795374080, 3795378175,
+STORE, 3795378176, 3795382271,
+STORE, 3795406848, 3795738623,
+STORE, 3795738624, 3795742719,
+STORE, 3795742720, 3795755007,
+STORE, 3795755008, 3795759103,
+STORE, 3795763200, 3795894271,
+STORE, 3795894272, 3796041727,
+STORE, 3796041728, 3796054015,
+STORE, 3796054016, 3796066303,
+STORE, 3796066304, 3796070399,
+STORE, 3796176896, 3796205567,
+STORE, 3796205568, 3796250623,
+STORE, 3796250624, 3796254719,
+STORE, 3796254720, 3796258815,
+STORE, 3796262912, 3796393983,
+STORE, 3796393984, 3796516863,
+STORE, 3796516864, 3796873215,
+STORE, 3796873216, 3796885503,
+STORE, 3796885504, 3796889599,
+STORE, 3796963328, 3796967423,
+STORE, 3796967424, 3796975615,
+STORE, 3796975616, 3796979711,
+STORE, 3797000192, 3797307391,
+STORE, 3797307392, 3797311487,
+STORE, 3797311488, 3797315583,
+STORE, 3797315584, 3797323775,
+STORE, 3797327872, 3797450751,
+STORE, 3797450752, 3797458943,
+STORE, 3797458944, 3797471231,
+STORE, 3797471232, 3797475327,
+STORE, 3797577728, 3797700607,
+STORE, 3797700608, 3797721087,
+STORE, 3797721088, 3797733375,
+STORE, 3797733376, 3797741567,
+STORE, 3797741568, 3797864447,
+STORE, 3797864448, 3797995519,
+STORE, 3797995520, 3798048767,
+STORE, 3798048768, 3798179839,
+STORE, 3798179840, 3798188031,
+STORE, 3798188032, 3798192127,
+STORE, 3798290432, 3798302719,
+STORE, 3798302720, 3798323199,
+STORE, 3798323200, 3798327295,
+STORE, 3798327296, 3798331391,
+STORE, 3798429696, 3798433791,
+STORE, 3798433792, 3798552575,
+STORE, 3798552576, 3798556671,
+STORE, 3798556672, 3798568959,
+STORE, 3798568960, 3798573055,
+STORE, 3798573056, 3798581247,
+STORE, 3798618112, 3798749183,
+STORE, 3798749184, 3798855679,
+STORE, 3798855680, 3798966271,
+STORE, 3798966272, 3798982655,
+STORE, 3798982656, 3798986751,
+STORE, 3799101440, 3799171071,
+STORE, 3799171072, 3799240703,
+STORE, 3799240704, 3799248895,
+STORE, 3799248896, 3799252991,
+STORE, 3799326720, 3799650303,
+STORE, 3799650304, 3800629247,
+STORE, 3800629248, 3800641535,
+STORE, 3800641536, 3800645631,
+STORE, 3800645632, 3800649727,
+STORE, 3800649728, 3800903679,
+STORE, 3800903680, 3800936447,
+STORE, 3800936448, 3800969215,
+STORE, 3800969216, 3800981503,
+STORE, 3800981504, 3800985599,
+STORE, 3801001984, 3801133055,
+STORE, 3801133056, 3801202687,
+STORE, 3801202688, 3801591807,
+STORE, 3801591808, 3801599999,
+STORE, 3801600000, 3801604095,
+STORE, 3801604096, 3801608191,
+STORE, 3801608192, 3801739263,
+STORE, 3801739264, 3801755647,
+STORE, 3801755648, 3801796607,
+STORE, 3801796608, 3801804799,
+STORE, 3801804800, 3801808895,
+STORE, 3801878528, 3801944063,
+STORE, 3801944064, 3802116095,
+STORE, 3802116096, 3802124287,
+STORE, 3802124288, 3802128383,
+STORE, 3802136576, 3803447295,
+STORE, 3803492352, 3803553791,
+STORE, 3803553792, 3804233727,
+STORE, 3804233728, 3806068735,
+STORE, 3806121984, 3806253055,
+STORE, 3806253056, 3806674943,
+STORE, 3806674944, 3807117311,
+STORE, 3807117312, 3807379455,
+STORE, 3807379456, 3807432703,
+STORE, 3807432704, 3807563775,
+STORE, 3807563776, 3809202175,
+STORE, 3809202176, 3810250751,
+STORE, 3810250752, 3827027967,
+STORE, 3827027968, 3829125119,
+STORE, 3829125120, 3837513727,
+STORE, 3837513728, 3839610879,
+STORE, 3839610880, 3847999487,
+STORE, 3847999488, 3856392191,
+STORE, 3856392192, 3864784895,
+STORE, 3864784896, 3868983295,
+STORE, 3868983296, 3885760511,
+STORE, 3885760512, 3886809087,
+STORE, 3886809088, 3887857663,
+STORE, 3887857664, 3888119807,
+STORE, 3888144384, 3888148479,
+STORE, 3888148480, 3888218111,
+STORE, 3888218112, 3888222207,
+STORE, 3888222208, 3888353279,
+STORE, 3888353280, 3889172479,
+STORE, 3889172480, 3892314111,
+STORE, 3892314112, 3892576255,
+STORE, 3892588544, 3892637695,
+STORE, 3892637696, 3892686847,
+STORE, 3892686848, 3892744191,
+STORE, 3892748288, 3892785151,
+STORE, 3892785152, 3895459839,
+STORE, 3895459840, 3895721983,
+STORE, 3895738368, 3895885823,
+STORE, 3895885824, 3897081855,
+STORE, 3897081856, 3906482175,
+STORE, 3906482176, 3916144639,
+STORE, 3916144640, 3925766143,
+STORE, 3925766144, 3926974463,
+STORE, 3926974464, 3928367103,
+STORE, 3928367104, 3928911871,
+STORE, 3928911872, 3933995007,
+STORE, 3933995008, 3935830015,
+STORE, 3935830016, 3935846399,
+STORE, 3935879168, 3936010239,
+STORE, 3936010240, 3936026623,
+STORE, 3936026624, 3936034815,
+STORE, 3936034816, 3936051199,
+STORE, 3936051200, 3936055295,
+STORE, 3936071680, 3936137215,
+STORE, 3936137216, 3936202751,
+STORE, 3936202752, 3936219135,
+STORE, 3936235520, 3936251903,
+STORE, 3936268288, 3936276479,
+STORE, 3936276480, 3936284671,
+STORE, 3936284672, 3936288767,
+STORE, 3936288768, 3936292863,
+STORE, 3936296960, 3936354303,
+STORE, 3936354304, 3936616447,
+STORE, 3936628736, 3936669695,
+STORE, 3936669696, 3936747519,
+STORE, 3936747520, 3936870399,
+STORE, 3936870400, 3936874495,
+STORE, 3936874496, 3936878591,
+STORE, 3936882688, 3936903167,
+STORE, 3936911360, 3936948223,
+STORE, 3936948224, 3936964607,
+STORE, 3936964608, 3937103871,
+STORE, 3937103872, 3937107967,
+STORE, 3937132544, 3937161215,
+STORE, 3937189888, 3937255423,
+STORE, 3937255424, 3938512895,
+STORE, 3938512896, 3945435135,
+STORE, 3945435136, 3945476095,
+STORE, 3945476096, 3945484287,
+STORE, 3945484288, 3945496575,
+STORE, 3945500672, 3945541631,
+STORE, 3945558016, 3945566207,
+STORE, 3945566208, 3945594879,
+STORE, 3945594880, 3945598975,
+STORE, 3945598976, 3945603071,
+STORE, 3945611264, 3945742335,
+STORE, 3945742336, 3945844735,
+STORE, 3945844736, 3945848831,
+STORE, 3945848832, 3945861119,
+STORE, 3945861120, 3945865215,
+STORE, 3945869312, 3945897983,
+STORE, 3945897984, 3946303487,
+STORE, 3946303488, 3946397695,
+STORE, 3946397696, 3946569727,
+STORE, 3946569728, 3946573823,
+STORE, 3946573824, 3946594303,
+STORE, 3946594304, 3946663935,
+STORE, 3946663936, 3946708991,
+STORE, 3946708992, 3946823679,
+STORE, 3946823680, 3946827775,
+STORE, 3946827776, 3946831871,
+STORE, 3946831872, 3946860543,
+STORE, 3946893312, 3946897407,
+STORE, 3946897408, 3946905599,
+STORE, 3946905600, 3946909695,
+STORE, 3946909696, 3946913791,
+STORE, 3946913792, 3946930175,
+STORE, 3946930176, 3946967039,
+STORE, 3946967040, 3947102207,
+STORE, 3947102208, 3948412927,
+STORE, 3948441600, 3948556287,
+STORE, 3948556288, 3948576767,
+STORE, 3948576768, 3948597247,
+STORE, 3948597248, 3948605439,
+STORE, 3948605440, 3948609535,
+STORE, 3948609536, 3948654591,
+STORE, 3948654592, 3948781567,
+STORE, 3948781568, 3948822527,
+STORE, 3948822528, 3948904447,
+STORE, 3948904448, 3948908543,
+STORE, 3948908544, 3948912639,
+STORE, 3948945408, 3949043711,
+STORE, 3949043712, 3949174783,
+STORE, 3949174784, 3949191167,
+STORE, 3949191168, 3949195263,
+STORE, 3949207552, 3949252607,
+STORE, 3949252608, 3949256703,
+STORE, 3949256704, 3949363199,
+STORE, 3949363200, 3949367295,
+STORE, 3949367296, 3949379583,
+STORE, 3949379584, 3949383679,
+STORE, 3949383680, 3949400063,
+STORE, 3949400064, 3949404159,
+STORE, 3949416448, 3949481983,
+STORE, 3949481984, 3949486079,
+STORE, 3949486080, 3949592575,
+STORE, 3949592576, 3949596671,
+STORE, 3949596672, 3949621247,
+STORE, 3949621248, 3949662207,
+STORE, 3949662208, 3949666303,
+STORE, 3949694976, 3949727743,
+STORE, 3949727744, 3949731839,
+STORE, 3949731840, 3949838335,
+STORE, 3949838336, 3949842431,
+STORE, 3949842432, 3949846527,
+STORE, 3949846528, 3949854719,
+STORE, 3949854720, 3949858815,
+STORE, 3949858816, 3949862911,
+STORE, 3949867008, 3949891583,
+STORE, 3949891584, 3949928447,
+STORE, 3949928448, 3949993983,
+STORE, 3949993984, 3950043135,
+STORE, 3950043136, 3950059519,
+STORE, 3950059520, 3950096383,
+STORE, 3950096384, 3950100479,
+STORE, 3950100480, 3950104575,
+STORE, 3950104576, 3950157823,
+STORE, 3950157824, 3950292991,
+STORE, 3950292992, 3950346239,
+STORE, 3950346240, 3950477311,
+STORE, 3950477312, 3950485503,
+STORE, 3950485504, 3950489599,
+STORE, 3950493696, 3950510079,
+STORE, 3950510080, 3950661631,
+STORE, 3950661632, 3951005695,
+STORE, 3951005696, 3951026175,
+STORE, 3951026176, 3951030271,
+STORE, 3951030272, 3951054847,
+STORE, 3951054848, 3951116287,
+STORE, 3951116288, 3951144959,
+STORE, 3951144960, 3951149055,
+STORE, 3951149056, 3951194111,
+STORE, 3951194112, 3951202303,
+STORE, 3951202304, 3951206399,
+STORE, 3951210496, 3951226879,
+STORE, 3951226880, 3951329279,
+STORE, 3951329280, 3951366143,
+STORE, 3951366144, 3951411199,
+STORE, 3951411200, 3951415295,
+STORE, 3951415296, 3951419391,
+STORE, 3951419392, 3951452159,
+STORE, 3951452160, 3951566847,
+STORE, 3951566848, 3951812607,
+STORE, 3951812608, 3952173055,
+STORE, 3952173056, 3952214015,
+STORE, 3952214016, 3952218111,
+STORE, 3952222208, 3952250879,
+STORE, 3952250880, 3952369663,
+STORE, 3952369664, 3952488447,
+STORE, 3952488448, 3952627711,
+STORE, 3952627712, 3952635903,
+STORE, 3952635904, 3952639999,
+STORE, 3952652288, 3952668671,
+STORE, 3952668672, 3953000447,
+STORE, 3953000448, 3953004543,
+STORE, 3953004544, 3953008639,
+STORE, 3953008640, 3953012735,
+STORE, 3953012736, 3953037311,
+STORE, 3953037312, 3953151999,
+STORE, 3953152000, 3953291263,
+STORE, 3953291264, 3953324031,
+STORE, 3953324032, 3953364991,
+STORE, 3953364992, 3953373183,
+STORE, 3953373184, 3953377279,
+STORE, 3953381376, 3953410047,
+STORE, 3953410048, 3953491967,
+STORE, 3953491968, 3953643519,
+STORE, 3953643520, 3953651711,
+STORE, 3953651712, 3953655807,
+STORE, 3953659904, 3953766399,
+STORE, 3953766400, 3953774591,
+STORE, 3953774592, 3953786879,
+STORE, 3953786880, 3953790975,
+STORE, 3953790976, 3953823743,
+STORE, 3953823744, 3953963007,
+STORE, 3953963008, 3954024447,
+STORE, 3954024448, 3954118655,
+STORE, 3954118656, 3954122751,
+STORE, 3954122752, 3954126847,
+STORE, 3954130944, 3954184191,
+STORE, 3954184192, 3954294783,
+STORE, 3954294784, 3954323455,
+STORE, 3954323456, 3954393087,
+STORE, 3954393088, 3954397183,
+STORE, 3954397184, 3954401279,
+STORE, 3954401280, 3954405375,
+STORE, 3954409472, 3954528255,
+STORE, 3954528256, 3954737151,
+STORE, 3954737152, 3955052543,
+STORE, 3955052544, 3955060735,
+STORE, 3955060736, 3955064831,
+STORE, 3955068928, 3955105791,
+STORE, 3955105792, 3955167231,
+STORE, 3955167232, 3955277823,
+STORE, 3955277824, 3955310591,
+STORE, 3955310592, 3955351551,
+STORE, 3955351552, 3955359743,
+STORE, 3955359744, 3955363839,
+STORE, 3955363840, 3955392511,
+STORE, 3955392512, 3955453951,
+STORE, 3955453952, 3955601407,
+STORE, 3955601408, 3955777535,
+STORE, 3955777536, 3955982335,
+STORE, 3955982336, 3956011007,
+STORE, 3956011008, 3956015103,
+STORE, 3956023296, 3956039679,
+STORE, 3956039680, 3956125695,
+STORE, 3956125696, 3956129791,
+STORE, 3956129792, 3956133887,
+STORE, 3956133888, 3956137983,
+STORE, 3956142080, 3956449279,
+STORE, 3956449280, 3956543487,
+STORE, 3956543488, 3956719615,
+STORE, 3956719616, 3956731903,
+STORE, 3956731904, 3956735999,
+STORE, 3956744192, 3956793343,
+STORE, 3956793344, 3956887551,
+STORE, 3956887552, 3956953087,
+STORE, 3956953088, 3957035007,
+STORE, 3957035008, 3957039103,
+STORE, 3957039104, 3957047295,
+STORE, 3957047296, 3957071871,
+STORE, 3957071872, 3957231615,
+STORE, 3957231616, 3957563391,
+STORE, 3957563392, 3957579775,
+STORE, 3957579776, 3957583871,
+STORE, 3957592064, 3957608447,
+STORE, 3957608448, 3957878783,
+STORE, 3957878784, 3958591487,
+STORE, 3958591488, 3958599679,
+STORE, 3958599680, 3958607871,
+STORE, 3958607872, 3958620159,
+STORE, 3958620160, 3958624255,
+STORE, 3958624256, 3963199487,
+STORE, 3963199488, 3963285503,
+STORE, 3963285504, 3963371519,
+STORE, 3963371520, 3963428863,
+STORE, 3963428864, 3963555839,
+STORE, 3963555840, 3963559935,
+STORE, 3963559936, 3963564031,
+STORE, 3963568128, 3963596799,
+STORE, 3963596800, 3963682815,
+STORE, 3963682816, 3963695103,
+STORE, 3963695104, 3963711487,
+STORE, 3963711488, 3963715583,
+STORE, 3963719680, 3963752447,
+STORE, 3963752448, 3963846655,
+STORE, 3963846656, 3963932671,
+STORE, 3963932672, 3964444671,
+STORE, 3964444672, 3964448767,
+STORE, 3964448768, 3965808639,
+STORE, 3965808640, 3965845503,
+STORE, 3965845504, 3965849599,
+STORE, 3965853696, 3965935615,
+STORE, 3965935616, 3966017535,
+STORE, 3966017536, 3966103551,
+STORE, 3966103552, 3966685183,
+STORE, 3966685184, 3967705087,
+STORE, 3967705088, 3967758335,
+STORE, 3967758336, 3967762431,
+STORE, 3967762432, 3967770623,
+STORE, 3967770624, 3967799295,
+STORE, 3967799296, 3967848447,
+STORE, 3967848448, 3967868927,
+STORE, 3967868928, 3967901695,
+STORE, 3967901696, 3967905791,
+STORE, 3967905792, 3967909887,
+STORE, 3967909888, 3967995903,
+STORE, 3967995904, 3968077823,
+STORE, 3968077824, 3968159743,
+STORE, 3968159744, 3968167935,
+STORE, 3968167936, 3968172031,
+STORE, 3968172032, 3968192511,
+STORE, 3968192512, 3968196607,
+STORE, 3968196608, 3968200703,
+STORE, 3968208896, 3968516095,
+STORE, 3968516096, 3968528383,
+STORE, 3968528384, 3968552959,
+STORE, 3968552960, 3968557055,
+STORE, 3968561152, 3968593919,
+STORE, 3968593920, 3968626687,
+STORE, 3968626688, 3971153919,
+STORE, 3971153920, 3973754879,
+STORE, 3973754880, 3973804031,
+STORE, 3973804032, 3973820415,
+STORE, 3973820416, 3973832703,
+STORE, 3973840896, 3973873663,
+STORE, 3973873664, 3973967871,
+STORE, 3973967872, 3973976063,
+STORE, 3973976064, 3973984255,
+STORE, 3973984256, 3973988351,
+STORE, 3973988352, 3973992447,
+STORE, 3973996544, 3974008831,
+STORE, 3974008832, 3974045695,
+STORE, 3974045696, 3974139903,
+STORE, 3974139904, 3974254591,
+STORE, 3974254592, 3974275071,
+STORE, 3974275072, 3974291455,
+STORE, 3974291456, 3974295551,
+STORE, 3974295552, 3974373375,
+STORE, 3974373376, 3974524927,
+STORE, 3974524928, 3974529023,
+STORE, 3974529024, 3974537215,
+STORE, 3974537216, 3974541311,
+STORE, 3974541312, 3974545407,
+STORE, 3974545408, 3974627327,
+STORE, 3974627328, 3974680575,
+STORE, 3974680576, 3974811647,
+STORE, 3974811648, 3974819839,
+STORE, 3974819840, 3974823935,
+STORE, 3974832128, 3974918143,
+STORE, 3974918144, 3974963199,
+STORE, 3974963200, 3975077887,
+STORE, 3975077888, 3975090175,
+STORE, 3975090176, 3975094271,
+STORE, 3975094272, 3975102463,
+STORE, 3975102464, 3975114751,
+STORE, 3975114752, 3975266303,
+STORE, 3975266304, 3975274495,
+STORE, 3975274496, 3975286783,
+STORE, 3975286784, 3975290879,
+STORE, 3975290880, 3975299071,
+STORE, 3975299072, 3975315455,
+STORE, 3975315456, 3975430143,
+STORE, 3975430144, 3975536639,
+STORE, 3975536640, 3975651327,
+STORE, 3975651328, 3975655423,
+STORE, 3975655424, 3975659519,
+STORE, 3975659520, 3975770111,
+STORE, 3975770112, 3975778303,
+STORE, 3975778304, 3975790591,
+STORE, 3975790592, 3975794687,
+STORE, 3975794688, 3975798783,
+STORE, 3975798784, 3975831551,
+STORE, 3975831552, 3975872511,
+STORE, 3975872512, 3975987199,
+STORE, 3975987200, 3976134655,
+STORE, 3976134656, 3977175039,
+STORE, 3977175040, 3977183231,
+STORE, 3977183232, 3977191423,
+STORE, 3977191424, 3977195519,
+STORE, 3977199616, 3977248767,
+STORE, 3977248768, 3977539583,
+STORE, 3977539584, 3977965567,
+STORE, 3977965568, 3977981951,
+STORE, 3977981952, 3977986047,
+STORE, 3977986048, 3977994239,
+STORE, 3977994240, 3978002431,
+STORE, 3978002432, 3978084351,
+STORE, 3978084352, 3978125311,
+STORE, 3978125312, 3978174463,
+STORE, 3978174464, 3978178559,
+STORE, 3978178560, 3978182655,
+STORE, 3978182656, 3978207231,
+STORE, 3978207232, 3978297343,
+STORE, 3978297344, 3978301439,
+STORE, 3978301440, 3978305535,
+STORE, 3978305536, 3978309631,
+STORE, 3978309632, 3978317823,
+STORE, 3978317824, 3978625023,
+STORE, 3978625024, 3978657791,
+STORE, 3978657792, 3978727423,
+STORE, 3978727424, 3978735615,
+STORE, 3978735616, 3978739711,
+STORE, 3978739712, 3978760191,
+STORE, 3978760192, 3978842111,
+STORE, 3978842112, 3978850303,
+STORE, 3978850304, 3978858495,
+STORE, 3978858496, 3978862591,
+STORE, 3978862592, 3978895359,
+STORE, 3978895360, 3979014143,
+STORE, 3979014144, 3979132927,
+STORE, 3979132928, 3979288575,
+STORE, 3979288576, 3979481087,
+STORE, 3979481088, 3979489279,
+STORE, 3979489280, 3979493375,
+STORE, 3979497472, 3979583487,
+STORE, 3979583488, 3979673599,
+STORE, 3979673600, 3979718655,
+STORE, 3979718656, 3979829247,
+STORE, 3979829248, 3979841535,
+STORE, 3979841536, 3979882495,
+STORE, 3979882496, 3979964415,
+STORE, 3979964416, 3980013567,
+STORE, 3980013568, 3980148735,
+STORE, 3980148736, 3980152831,
+STORE, 3980152832, 3980320767,
+STORE, 3980320768, 3980337151,
+STORE, 3980337152, 3980341247,
+STORE, 3980345344, 3980365823,
+STORE, 3980365824, 3980423167,
+STORE, 3980423168, 3980460031,
+STORE, 3980460032, 3980500991,
+STORE, 3980500992, 3980509183,
+STORE, 3980509184, 3980513279,
+STORE, 3980513280, 3980546047,
+STORE, 3980546048, 3980660735,
+STORE, 3980660736, 3980951551,
+STORE, 3980951552, 3981500415,
+STORE, 3981500416, 3981529087,
+STORE, 3981529088, 3981533183,
+STORE, 3981537280, 3981549567,
+STORE, 3981549568, 3981598719,
+STORE, 3981598720, 3981717503,
+STORE, 3981717504, 3982127103,
+STORE, 3982127104, 3982675967,
+STORE, 3982675968, 3982733311,
+STORE, 3982733312, 3982737407,
+STORE, 3982741504, 3982860287,
+STORE, 3982860288, 3982905343,
+STORE, 3982905344, 3982966783,
+STORE, 3982966784, 3982974975,
+STORE, 3982974976, 3982979071,
+STORE, 3982979072, 3983032319,
+STORE, 3983032320, 3983085567,
+STORE, 3983085568, 3983208447,
+STORE, 3983208448, 3983212543,
+STORE, 3983212544, 3983220735,
+STORE, 3983220736, 3983224831,
+STORE, 3983224832, 3983237119,
+STORE, 3983237120, 3983351807,
+STORE, 3983351808, 3983376383,
+STORE, 3983376384, 3983392767,
+STORE, 3983392768, 3983396863,
+STORE, 3983396864, 3983400959,
+STORE, 3983400960, 3983417343,
+STORE, 3983417344, 3983753215,
+STORE, 3983753216, 3983757311,
+STORE, 3983757312, 3983761407,
+STORE, 3983761408, 3983765503,
+STORE, 3983765504, 3983769599,
+STORE, 3983769600, 3983880191,
+STORE, 3983880192, 3983892479,
+STORE, 3983892480, 3983900671,
+STORE, 3983900672, 3983904767,
+STORE, 3983904768, 3983908863,
+STORE, 3983908864, 3983941631,
+STORE, 3983941632, 3983990783,
+STORE, 3983990784, 3984097279,
+STORE, 3984097280, 3984105471,
+STORE, 3984105472, 3984117759,
+STORE, 3984117760, 3984121855,
+STORE, 3984121856, 3984125951,
+STORE, 3984125952, 3984134143,
+STORE, 3984134144, 3984150527,
+STORE, 3984150528, 3984416767,
+STORE, 3984416768, 3984470015,
+STORE, 3984470016, 3984564223,
+STORE, 3984564224, 3984568319,
+STORE, 3984572416, 3984629759,
+STORE, 3984629760, 3984805887,
+STORE, 3984805888, 3985096703,
+STORE, 3985096704, 3985104895,
+STORE, 3985104896, 3985108991,
+STORE, 3985113088, 3986862079,
+STORE, 3986862080, 3993640959,
+STORE, 3993640960, 3993739263,
+STORE, 3993739264, 3993743359,
+STORE, 3993743360, 3993759743,
+STORE, 3993759744, 3993780223,
+STORE, 3993780224, 3993784319,
+STORE, 3993784320, 3993792511,
+STORE, 3993792512, 3993796607,
+STORE, 3993796608, 3993800703,
+STORE, 3993804800, 3994214399,
+STORE, 3994214400, 3994218495,
+STORE, 3994218496, 3994222591,
+STORE, 3994222592, 3994226687,
+STORE, 3994230784, 3994243071,
+STORE, 3994243072, 3994255359,
+STORE, 3994255360, 3994304511,
+STORE, 3994304512, 3994386431,
+STORE, 3994386432, 3994509311,
+STORE, 3994509312, 3994521599,
+STORE, 3994521600, 3994525695,
+STORE, 3994529792, 3994542079,
+STORE, 3994542080, 3994660863,
+STORE, 3994660864, 3994705919,
+STORE, 3994705920, 3994796031,
+STORE, 3994796032, 3994800127,
+STORE, 3994800128, 3994804223,
+STORE, 3994804224, 3994812415,
+STORE, 3994812416, 3994845183,
+STORE, 3994845184, 3994898431,
+STORE, 3994898432, 3994902527,
+STORE, 3994902528, 3994906623,
+STORE, 3994910720, 3994931199,
+STORE, 3994931200, 3995181055,
+STORE, 3995181056, 3995222015,
+STORE, 3995222016, 3995275263,
+STORE, 3995275264, 3995279359,
+STORE, 3995279360, 3995283455,
+STORE, 3995283456, 3995291647,
+STORE, 3995291648, 3995324415,
+STORE, 3995324416, 3995451391,
+STORE, 3995451392, 3995697151,
+STORE, 3995697152, 3996078079,
+STORE, 3996078080, 3996086271,
+STORE, 3996086272, 3996090367,
+STORE, 3996094464, 3996119039,
+STORE, 3996119040, 3996200959,
+STORE, 3996200960, 3996229631,
+STORE, 3996229632, 3996233727,
+STORE, 3996233728, 3996282879,
+STORE, 3996282880, 3996291071,
+STORE, 3996291072, 3996295167,
+STORE, 3996299264, 3996311551,
+STORE, 3996311552, 3996430335,
+STORE, 3996430336, 3996467199,
+STORE, 3996467200, 3996504063,
+STORE, 3996504064, 3996512255,
+STORE, 3996512256, 3996516351,
+STORE, 3996516352, 3996540927,
+STORE, 3996540928, 3996671999,
+STORE, 3996672000, 3996676095,
+STORE, 3996676096, 3996684287,
+STORE, 3996684288, 3996688383,
+STORE, 3996688384, 3996692479,
+STORE, 3996692480, 3996717055,
+STORE, 3996717056, 3997048831,
+STORE, 3997048832, 3997057023,
+STORE, 3997057024, 3997073407,
+STORE, 3997073408, 3997077503,
+STORE, 3997077504, 3997081599,
+STORE, 3997081600, 3997097983,
+STORE, 3997097984, 3997179903,
+STORE, 3997179904, 3997356031,
+STORE, 3997356032, 3997650943,
+STORE, 3997650944, 3997675519,
+STORE, 3997675520, 3997679615,
+STORE, 3997683712, 3997700095,
+STORE, 3997700096, 3997745151,
+STORE, 3997745152, 3997802495,
+STORE, 3997802496, 3997810687,
+STORE, 3997810688, 3997814783,
+STORE, 3997814784, 3998064639,
+STORE, 3998064640, 3998081023,
+STORE, 3998081024, 3998085119,
+STORE, 3998085120, 3998130175,
+STORE, 3998130176, 3998134271,
+STORE, 3998134272, 3998142463,
+STORE, 3998142464, 3998179327,
+STORE, 3998179328, 3998212095,
+STORE, 3998212096, 3998326783,
+STORE, 3998326784, 3998351359,
+STORE, 3998351360, 3998392319,
+STORE, 3998392320, 3998396415,
+STORE, 3998396416, 3998400511,
+STORE, 3998400512, 3998433279,
+STORE, 3998433280, 3998466047,
+STORE, 3998466048, 3998613503,
+STORE, 3998613504, 3998666751,
+STORE, 3998666752, 3998724095,
+STORE, 3998724096, 3998732287,
+STORE, 3998732288, 3998736383,
+STORE, 3998736384, 3998760959,
+STORE, 3998760960, 3998777343,
+STORE, 3998777344, 3998822399,
+STORE, 3998822400, 3998826495,
+STORE, 3998826496, 3998830591,
+STORE, 3998830592, 3998863359,
+STORE, 3998863360, 3998900223,
+STORE, 3998900224, 3999043583,
+STORE, 3999043584, 3999121407,
+STORE, 3999121408, 3999215615,
+STORE, 3999215616, 3999223807,
+STORE, 3999223808, 3999227903,
+STORE, 3999227904, 3999236095,
+STORE, 3999236096, 3999268863,
+STORE, 3999268864, 3999301631,
+STORE, 3999301632, 3999354879,
+STORE, 3999354880, 3999428607,
+STORE, 3999428608, 3999436799,
+STORE, 3999436800, 3999440895,
+STORE, 3999444992, 3999461375,
+STORE, 3999461376, 3999584255,
+STORE, 3999584256, 3999760383,
+STORE, 3999760384, 4000219135,
+STORE, 4000219136, 4000235519,
+STORE, 4000235520, 4000251903,
+STORE, 4000251904, 4000501759,
+STORE, 4000501760, 4000505855,
+STORE, 4000505856, 4000509951,
+STORE, 4000509952, 4000518143,
+STORE, 4000518144, 4000522239,
+STORE, 4000522240, 4000587775,
+STORE, 4000587776, 4000645119,
+STORE, 4000645120, 4000813055,
+STORE, 4000813056, 4000817151,
+STORE, 4000821248, 4000837631,
+STORE, 4000837632, 4000870399,
+STORE, 4000870400, 4000874495,
+STORE, 4000874496, 4000878591,
+STORE, 4000878592, 4000882687,
+STORE, 4000882688, 4000886783,
+STORE, 4000886784, 4000890879,
+STORE, 4000890880, 4000907263,
+STORE, 4000907264, 4001214463,
+STORE, 4001214464, 4001558527,
+STORE, 4001558528, 4002484223,
+STORE, 4002484224, 4002525183,
+STORE, 4002525184, 4002529279,
+STORE, 4002529280, 4002533375,
+STORE, 4002533376, 4002537471,
+STORE, 4002537472, 4002660351,
+STORE, 4002660352, 4002779135,
+STORE, 4002779136, 4002791423,
+STORE, 4002791424, 4002799615,
+STORE, 4002799616, 4002807807,
+STORE, 4002807808, 4002811903,
+STORE, 4002811904, 4002828287,
+STORE, 4002828288, 4002910207,
+STORE, 4002910208, 4003028991,
+STORE, 4003028992, 4003037183,
+STORE, 4003037184, 4003045375,
+STORE, 4003045376, 4003049471,
+STORE, 4003049472, 4003053567,
+STORE, 4003053568, 4003057663,
+STORE, 4003057664, 4003065855,
+STORE, 4003065856, 4003135487,
+STORE, 4003135488, 4003446783,
+STORE, 4003446784, 4003450879,
+STORE, 4003450880, 4003454975,
+STORE, 4003454976, 4003459071,
+STORE, 4003459072, 4003463167,
+STORE, 4003463168, 4003495935,
+STORE, 4003495936, 4003569663,
+STORE, 4003569664, 4003573759,
+STORE, 4003573760, 4003704831,
+STORE, 4003704832, 4003708927,
+STORE, 4003708928, 4003713023,
+STORE, 4003713024, 4003737599,
+STORE, 4003737600, 4003770367,
+STORE, 4003770368, 4003876863,
+STORE, 4003876864, 4003880959,
+STORE, 4003880960, 4003885055,
+STORE, 4003885056, 4003889151,
+STORE, 4003889152, 4003893247,
+STORE, 4003893248, 4003897343,
+STORE, 4003897344, 4003962879,
+STORE, 4003962880, 4004069375,
+STORE, 4004069376, 4004093951,
+STORE, 4004093952, 4004118527,
+STORE, 4004118528, 4004122623,
+STORE, 4004122624, 4004126719,
+STORE, 4004126720, 4004155391,
+STORE, 4004155392, 4004286463,
+STORE, 4004286464, 4004384767,
+STORE, 4004384768, 4004388863,
+STORE, 4004388864, 4004646911,
+STORE, 4004646912, 4004655103,
+STORE, 4004655104, 4004659199,
+STORE, 4004659200, 4004667391,
+STORE, 4004667392, 4004683775,
+STORE, 4004683776, 4004814847,
+STORE, 4004814848, 4004818943,
+STORE, 4004818944, 4004823039,
+STORE, 4004823040, 4004827135,
+STORE, 4004827136, 4004835327,
+STORE, 4004835328, 4004954111,
+STORE, 4004954112, 4005085183,
+STORE, 4005085184, 4005306367,
+STORE, 4005306368, 4005765119,
+STORE, 4005765120, 4005789695,
+STORE, 4005789696, 4005793791,
+STORE, 4005793792, 4005801983,
+STORE, 4005801984, 4005920767,
+STORE, 4005920768, 4005945343,
+STORE, 4005945344, 4005949439,
+STORE, 4005949440, 4005986303,
+STORE, 4005986304, 4005990399,
+STORE, 4005990400, 4005994495,
+STORE, 4005994496, 4006002687,
+STORE, 4006002688, 4006109183,
+STORE, 4006109184, 4006117375,
+STORE, 4006117376, 4006121471,
+STORE, 4006121472, 4006133759,
+STORE, 4006133760, 4006137855,
+STORE, 4006137856, 4006141951,
+STORE, 4006141952, 4006150143,
+STORE, 4006150144, 4006391807,
+STORE, 4006391808, 4006445055,
+STORE, 4006445056, 4006563839,
+STORE, 4006563840, 4006572031,
+STORE, 4006572032, 4006576127,
+STORE, 4006576128, 4006584319,
+STORE, 4006584320, 4006694911,
+STORE, 4006694912, 4006739967,
+STORE, 4006739968, 4006776831,
+STORE, 4006776832, 4006785023,
+STORE, 4006785024, 4006789119,
+STORE, 4006789120, 4006797311,
+STORE, 4006797312, 4006813695,
+STORE, 4006813696, 4006846463,
+STORE, 4006846464, 4006977535,
+STORE, 4006977536, 4007006207,
+STORE, 4007006208, 4007010303,
+STORE, 4007010304, 4007067647,
+STORE, 4007067648, 4007075839,
+STORE, 4007075840, 4007084031,
+STORE, 4007084032, 4007100415,
+STORE, 4007100416, 4007116799,
+STORE, 4007116800, 4007133183,
+STORE, 4007133184, 4007153663,
+STORE, 4007153664, 4007178239,
+STORE, 4007178240, 4007202815,
+STORE, 4007202816, 4007206911,
+STORE, 4007206912, 4007272447,
+STORE, 4007272448, 4007276543,
+STORE, 4007276544, 4007280639,
+STORE, 4007280640, 4007284735,
+STORE, 4007284736, 4007292927,
+STORE, 4007292928, 4007423999,
+STORE, 4007424000, 4007448575,
+STORE, 4007448576, 4007452671,
+STORE, 4007452672, 4007505919,
+STORE, 4007505920, 4007510015,
+STORE, 4007510016, 4007514111,
+STORE, 4007514112, 4007645183,
+STORE, 4007645184, 4007776255,
+STORE, 4007776256, 4007780351,
+STORE, 4007780352, 4007784447,
+STORE, 4007784448, 4007788543,
+STORE, 4007788544, 4007809023,
+STORE, 4007809024, 4007829503,
+STORE, 4007829504, 4007960575,
+STORE, 4007960576, 4008091647,
+STORE, 4008091648, 4008296447,
+STORE, 4008296448, 4008890367,
+STORE, 4008890368, 4008898559,
+STORE, 4008898560, 4008902655,
+STORE, 4008902656, 4008996863,
+STORE, 4008996864, 4009041919,
+STORE, 4009041920, 4009082879,
+STORE, 4009082880, 4009091071,
+STORE, 4009091072, 4009107455,
+STORE, 4009107456, 4009349119,
+STORE, 4009349120, 4009373695,
+STORE, 4009373696, 4009414655,
+STORE, 4009414656, 4009422847,
+STORE, 4009422848, 4009426943,
+STORE, 4009426944, 4009447423,
+STORE, 4009447424, 4009471999,
+STORE, 4009472000, 4009512959,
+STORE, 4009512960, 4009594879,
+STORE, 4009594880, 4009598975,
+STORE, 4009598976, 4009697279,
+STORE, 4009697280, 4009713663,
+STORE, 4009713664, 4009717759,
+STORE, 4009717760, 4009721855,
+STORE, 4009721856, 4009730047,
+STORE, 4009730048, 4009861119,
+STORE, 4009861120, 4009951231,
+STORE, 4009951232, 4010131455,
+STORE, 4010131456, 4010135551,
+STORE, 4010135552, 4010139647,
+STORE, 4010139648, 4010143743,
+STORE, 4010143744, 4010164223,
+STORE, 4010164224, 4010295295,
+STORE, 4010295296, 4010299391,
+STORE, 4010299392, 4010491903,
+STORE, 4010491904, 4010495999,
+STORE, 4010496000, 4010668031,
+STORE, 4010668032, 4011028479,
+STORE, 4011028480, 4011053055,
+STORE, 4011053056, 4011057151,
+STORE, 4011057152, 4011118591,
+STORE, 4011118592, 4011126783,
+STORE, 4011126784, 4011130879,
+STORE, 4011130880, 4011143167,
+STORE, 4011143168, 4011147263,
+STORE, 4011147264, 4011167743,
+STORE, 4011167744, 4011171839,
+STORE, 4011171840, 4011360255,
+STORE, 4011360256, 4011364351,
+STORE, 4011364352, 4011626495,
+STORE, 4011626496, 4012216319,
+STORE, 4012216320, 4012228607,
+STORE, 4012228608, 4012232703,
+STORE, 4012232704, 4012236799,
+STORE, 4012236800, 4012240895,
+STORE, 4012240896, 4012261375,
+STORE, 4012261376, 4012392447,
+STORE, 4012392448, 4012466175,
+STORE, 4012466176, 4012597247,
+STORE, 4012597248, 4012601343,
+STORE, 4012601344, 4012605439,
+STORE, 4012605440, 4012609535,
+STORE, 4012609536, 4012679167,
+STORE, 4012679168, 4013563903,
+STORE, 4013563904, 4015366143,
+STORE, 4015366144, 4015411199,
+STORE, 4015411200, 4015415295,
+STORE, 4015415296, 4015419391,
+STORE, 4015419392, 4015542271,
+STORE, 4015542272, 4015550463,
+STORE, 4015550464, 4015558655,
+STORE, 4015558656, 4015562751,
+STORE, 4015562752, 4015583231,
+STORE, 4015583232, 4015587327,
+STORE, 4015587328, 4015603711,
+STORE, 4015665152, 4015669247,
+STORE, 4015669248, 4015812607,
+STORE, 4015812608, 4015816703,
+STORE, 4015816704, 4016111615,
+STORE, 4016111616, 4016467967,
+STORE, 4016467968, 4016508927,
+STORE, 4016508928, 4016517119,
+STORE, 4016517120, 4016525311,
+STORE, 4016525312, 4016586751,
+STORE, 4016586752, 4016664575,
+STORE, 4016664576, 4016697343,
+STORE, 4016697344, 4016742399,
+STORE, 4016742400, 4016746495,
+STORE, 4016746496, 4016750591,
+STORE, 4016750592, 4016758783,
+STORE, 4016799744, 4016844799,
+STORE, 4016844800, 4016902143,
+STORE, 4016902144, 4016992255,
+STORE, 4016992256, 4017000447,
+STORE, 4017000448, 4017004543,
+STORE, 4017004544, 4017008639,
+STORE, 4017008640, 4017016831,
+STORE, 4017016832, 4017020927,
+STORE, 4017020928, 4017127423,
+STORE, 4017127424, 4017131519,
+STORE, 4017131520, 4017229823,
+STORE, 4017229824, 4017422335,
+STORE, 4017422336, 4017438719,
+STORE, 4017438720, 4017442815,
+STORE, 4017442816, 4017446911,
+STORE, 4017446912, 4017455103,
+STORE, 4017455104, 4017766399,
+STORE, 4017766400, 4017909759,
+STORE, 4017909760, 4018081791,
+STORE, 4018081792, 4018089983,
+STORE, 4018089984, 4018094079,
+STORE, 4018094080, 4018098175,
+STORE, 4018098176, 4018327551,
+STORE, 4018327552, 4018331647,
+STORE, 4018331648, 4018339839,
+STORE, 4018339840, 4018348031,
+STORE, 4018348032, 4018610175,
+STORE, 4018610176, 4018626559,
+STORE, 4018626560, 4018647039,
+STORE, 4018647040, 4018651135,
+STORE, 4018651136, 4018749439,
+STORE, 4018749440, 4018761727,
+STORE, 4018761728, 4018802687,
+STORE, 4018802688, 4018806783,
+STORE, 4018806784, 4018810879,
+STORE, 4018810880, 4018814975,
+STORE, 4018814976, 4018823167,
+STORE, 4018823168, 4018954239,
+STORE, 4018954240, 4019007487,
+STORE, 4019007488, 4019068927,
+STORE, 4019068928, 4019077119,
+STORE, 4019077120, 4019081215,
+STORE, 4019081216, 4019093503,
+STORE, 4019093504, 4019208191,
+STORE, 4019208192, 4019232767,
+STORE, 4019232768, 4019265535,
+STORE, 4019265536, 4019269631,
+STORE, 4019269632, 4019277823,
+STORE, 4019277824, 4019458047,
+STORE, 4019458048, 4019519487,
+STORE, 4019519488, 4019613695,
+STORE, 4019613696, 4019621887,
+STORE, 4019621888, 4019625983,
+STORE, 4019625984, 4019630079,
+STORE, 4019630080, 4019744767,
+STORE, 4019744768, 4019822591,
+STORE, 4019822592, 4019929087,
+STORE, 4019929088, 4019941375,
+STORE, 4019941376, 4019945471,
+STORE, 4019945472, 4019961855,
+STORE, 4019961856, 4019994623,
+STORE, 4019994624, 4019998719,
+STORE, 4019998720, 4020002815,
+STORE, 4020002816, 4020006911,
+STORE, 4020006912, 4020011007,
+STORE, 4020011008, 4020256767,
+STORE, 4020256768, 4020326399,
+STORE, 4020326400, 4020457471,
+STORE, 4020457472, 4020469759,
+STORE, 4020469760, 4020473855,
+STORE, 4020473856, 4020482047,
+STORE, 4020482048, 4020711423,
+STORE, 4020711424, 4020715519,
+STORE, 4020715520, 4020719615,
+STORE, 4020719616, 4020723711,
+STORE, 4020723712, 4020805631,
+STORE, 4020805632, 4021051391,
+STORE, 4021051392, 4021460991,
+STORE, 4021460992, 4021469183,
+STORE, 4021469184, 4021473279,
+STORE, 4021473280, 4021571583,
+STORE, 4021571584, 4021633023,
+STORE, 4021633024, 4021727231,
+STORE, 4021727232, 4021735423,
+STORE, 4021735424, 4021739519,
+STORE, 4021739520, 4021747711,
+STORE, 4021747712, 4021829631,
+STORE, 4021829632, 4021866495,
+STORE, 4021866496, 4021919743,
+STORE, 4021919744, 4021927935,
+STORE, 4021927936, 4021932031,
+STORE, 4021932032, 4021944319,
+STORE, 4021944320, 4022157311,
+STORE, 4022157312, 4022161407,
+STORE, 4022161408, 4022173695,
+STORE, 4022173696, 4022177791,
+STORE, 4022177792, 4022472703,
+STORE, 4022472704, 4022509567,
+STORE, 4022509568, 4022583295,
+STORE, 4022583296, 4022587391,
+STORE, 4022587392, 4022591487,
+STORE, 4022591488, 4022607871,
+STORE, 4022607872, 4022657023,
+STORE, 4022657024, 4022722559,
+STORE, 4022722560, 4022730751,
+STORE, 4022730752, 4022734847,
+STORE, 4022734848, 4022865919,
+STORE, 4022865920, 4022943743,
+STORE, 4022943744, 4023062527,
+STORE, 4023062528, 4023074815,
+STORE, 4023074816, 4023078911,
+STORE, 4023078912, 4023128063,
+STORE, 4023128064, 4023218175,
+STORE, 4023218176, 4023361535,
+STORE, 4023361536, 4023373823,
+STORE, 4023373824, 4023377919,
+STORE, 4023377920, 4023558143,
+STORE, 4023558144, 4023631871,
+STORE, 4023631872, 4023816191,
+STORE, 4023816192, 4023820287,
+STORE, 4023820288, 4023824383,
+STORE, 4023824384, 4023832575,
+STORE, 4023832576, 4024078335,
+STORE, 4024078336, 4024197119,
+STORE, 4024197120, 4024389631,
+STORE, 4024389632, 4024406015,
+STORE, 4024406016, 4024410111,
+STORE, 4024410112, 4024422399,
+STORE, 4024422400, 4024619007,
+STORE, 4024619008, 4024639487,
+STORE, 4024639488, 4024655871,
+STORE, 4024655872, 4024664063,
+STORE, 4024664064, 4024668159,
+STORE, 4024668160, 4024676351,
+STORE, 4024676352, 4024905727,
+STORE, 4024905728, 4024909823,
+STORE, 4024909824, 4024918015,
+STORE, 4024918016, 4024922111,
+STORE, 4024922112, 4024930303,
+STORE, 4024930304, 4025110527,
+STORE, 4025110528, 4025176063,
+STORE, 4025176064, 4025208831,
+STORE, 4025208832, 4025212927,
+STORE, 4025212928, 4025217023,
+STORE, 4025217024, 4025348095,
+STORE, 4025348096, 4025372671,
+STORE, 4025372672, 4025458687,
+STORE, 4025458688, 4025466879,
+STORE, 4025466880, 4025565183,
+STORE, 4025565184, 4025757695,
+STORE, 4025757696, 4026249215,
+STORE, 4026249216, 4026261503,
+STORE, 4026261504, 4026265599,
+STORE, 4026265600, 4026269695,
+STORE, 4026269696, 4026302463,
+STORE, 4026302464, 4026306559,
+STORE, 4026306560, 4026314751,
+STORE, 4026314752, 4026318847,
+STORE, 4026318848, 4026322943,
+STORE, 4026322944, 4026327039,
+STORE, 4026327040, 4026654719,
+STORE, 4026654720, 4026671103,
+STORE, 4026671104, 4026720255,
+STORE, 4026720256, 4026724351,
+STORE, 4026724352, 4026728447,
+STORE, 4026728448, 4026732543,
+STORE, 4026732544, 4026863615,
+STORE, 4026863616, 4027027455,
+STORE, 4027027456, 4027031551,
+STORE, 4027031552, 4027514879,
+STORE, 4027514880, 4027531263,
+STORE, 4027531264, 4027535359,
+STORE, 4027535360, 4027539455,
+STORE, 4027539456, 4027785215,
+STORE, 4027785216, 4027789311,
+STORE, 4027789312, 4027793407,
+STORE, 4027793408, 4027797503,
+STORE, 4027797504, 4027863039,
+STORE, 4027863040, 4027899903,
+STORE, 4027899904, 4027949055,
+STORE, 4027949056, 4027957247,
+STORE, 4027957248, 4027961343,
+STORE, 4027961344, 4027965439,
+STORE, 4027965440, 4028194815,
+STORE, 4028194816, 4028252159,
+STORE, 4028252160, 4028338175,
+STORE, 4028338176, 4028350463,
+STORE, 4028350464, 4028354559,
+STORE, 4028354560, 4028452863,
+STORE, 4028452864, 4028489727,
+STORE, 4028489728, 4028530687,
+STORE, 4028530688, 4028538879,
+STORE, 4028538880, 4028542975,
+STORE, 4028542976, 4028551167,
+STORE, 4028551168, 4028665855,
+STORE, 4028665856, 4029349887,
+STORE, 4029349888, 4030468095,
+STORE, 4030468096, 4030513151,
+STORE, 4030513152, 4030517247,
+STORE, 4030517248, 4030525439,
+STORE, 4030525440, 4030529535,
+STORE, 4030529536, 4030758911,
+STORE, 4030758912, 4030828543,
+STORE, 4030828544, 4030943231,
+STORE, 4030943232, 4030951423,
+STORE, 4030951424, 4030955519,
+STORE, 4030955520, 4030967807,
+STORE, 4030967808, 4031131647,
+STORE, 4031131648, 4031135743,
+STORE, 4031135744, 4031139839,
+STORE, 4031139840, 4031148031,
+STORE, 4031148032, 4031152127,
+STORE, 4031152128, 4031160319,
+STORE, 4031160320, 4031504383,
+STORE, 4031504384, 4031598591,
+STORE, 4031598592, 4031754239,
+STORE, 4031754240, 4031766527,
+STORE, 4031766528, 4031770623,
+STORE, 4031770624, 4031774719,
+STORE, 4031774720, 4031782911,
+STORE, 4031782912, 4031799295,
+STORE, 4031799296, 4031856639,
+STORE, 4031856640, 4031983615,
+STORE, 4031983616, 4031987711,
+STORE, 4031987712, 4031991807,
+STORE, 4031991808, 4032270335,
+STORE, 4032270336, 4032274431,
+STORE, 4032274432, 4032282623,
+STORE, 4032282624, 4032286719,
+STORE, 4032286720, 4032290815,
+STORE, 4032290816, 4032389119,
+STORE, 4032389120, 4032397311,
+STORE, 4032397312, 4032405503,
+STORE, 4032405504, 4032413695,
+STORE, 4032413696, 4032417791,
+STORE, 4032417792, 4032565247,
+STORE, 4032565248, 4032593919,
+STORE, 4032593920, 4032737279,
+STORE, 4032737280, 4032741375,
+STORE, 4032741376, 4032745471,
+STORE, 4032745472, 4032770047,
+STORE, 4032770048, 4032933887,
+STORE, 4032933888, 4032999423,
+STORE, 4032999424, 4033032191,
+STORE, 4033032192, 4033036287,
+STORE, 4033036288, 4033040383,
+STORE, 4033040384, 4033105919,
+STORE, 4033105920, 4033396735,
+STORE, 4033396736, 4033822719,
+STORE, 4033822720, 4033839103,
+STORE, 4033839104, 4033843199,
+STORE, 4033843200, 4033851391,
+STORE, 4033851392, 4033863679,
+STORE, 4033863680, 4033880063,
+STORE, 4033880064, 4033933311,
+STORE, 4033933312, 4034023423,
+STORE, 4034023424, 4034031615,
+STORE, 4034031616, 4034035711,
+STORE, 4034035712, 4034043903,
+STORE, 4034043904, 4034142207,
+STORE, 4034142208, 4034191359,
+STORE, 4034191360, 4034260991,
+STORE, 4034260992, 4034269183,
+STORE, 4034269184, 4034273279,
+STORE, 4034273280, 4034281471,
+STORE, 4034281472, 4034412543,
+STORE, 4034412544, 4034445311,
+STORE, 4034445312, 4034490367,
+STORE, 4034490368, 4034494463,
+STORE, 4034494464, 4034498559,
+STORE, 4034498560, 4034662399,
+STORE, 4034662400, 4034666495,
+STORE, 4034666496, 4034670591,
+STORE, 4034670592, 4034674687,
+STORE, 4034674688, 4034678783,
+STORE, 4034678784, 4034682879,
+STORE, 4034682880, 4034781183,
+STORE, 4034781184, 4035043327,
+STORE, 4035043328, 4035047423,
+STORE, 4035047424, 4035055615,
+STORE, 4035055616, 4035059711,
+STORE, 4035059712, 4035063807,
+STORE, 4035063808, 4035067903,
+STORE, 4035067904, 4035100671,
+STORE, 4035100672, 4035375103,
+STORE, 4035375104, 4035383295,
+STORE, 4035383296, 4035395583,
+STORE, 4035395584, 4035399679,
+STORE, 4035399680, 4035403775,
+STORE, 4035403776, 4035407871,
+STORE, 4035407872, 4035411967,
+STORE, 4035411968, 4035477503,
+STORE, 4035477504, 4035608575,
+STORE, 4035608576, 4035641343,
+STORE, 4035641344, 4035682303,
+STORE, 4035682304, 4035686399,
+STORE, 4035686400, 4035690495,
+STORE, 4035690496, 4035694591,
+STORE, 4035694592, 4035743743,
+STORE, 4035743744, 4035784703,
+STORE, 4035784704, 4035829759,
+STORE, 4035829760, 4035837951,
+STORE, 4035837952, 4035842047,
+STORE, 4035842048, 4035846143,
+STORE, 4035846144, 4035850239,
+STORE, 4035850240, 4036001791,
+STORE, 4036001792, 4036005887,
+STORE, 4036005888, 4036214783,
+STORE, 4036214784, 4036218879,
+STORE, 4036218880, 4036603903,
+STORE, 4036603904, 4036648959,
+STORE, 4036648960, 4036653055,
+STORE, 4036653056, 4036657151,
+STORE, 4036657152, 4036665343,
+STORE, 4036665344, 4036780031,
+STORE, 4036780032, 4036829183,
+STORE, 4036829184, 4036984831,
+STORE, 4036984832, 4036993023,
+STORE, 4036993024, 4036997119,
+STORE, 4036997120, 4037001215,
+STORE, 4037001216, 4037009407,
+STORE, 4037009408, 4037025791,
+STORE, 4037025792, 4037095423,
+STORE, 4037095424, 4037181439,
+STORE, 4037181440, 4037193727,
+STORE, 4037193728, 4037197823,
+STORE, 4037197824, 4037206015,
+STORE, 4037206016, 4037320703,
+STORE, 4037320704, 4037337087,
+STORE, 4037337088, 4037349375,
+STORE, 4037349376, 4037357567,
+STORE, 4037357568, 4037361663,
+STORE, 4037369856, 4037386239,
+STORE, 4037386240, 4037672959,
+STORE, 4037672960, 4037689343,
+STORE, 4037689344, 4037730303,
+STORE, 4037730304, 4037734399,
+STORE, 4037734400, 4037738495,
+STORE, 4037738496, 4037742591,
+STORE, 4037742592, 4037758975,
+STORE, 4037758976, 4037890047,
+STORE, 4037890048, 4037931007,
+STORE, 4037931008, 4037976063,
+STORE, 4037976064, 4037984255,
+STORE, 4037984256, 4037988351,
+STORE, 4037988352, 4038053887,
+STORE, 4038053888, 4038184959,
+STORE, 4038184960, 4038189055,
+STORE, 4038189056, 4038197247,
+STORE, 4038197248, 4038201343,
+STORE, 4038201344, 4038205439,
+STORE, 4038205440, 4038209535,
+STORE, 4038217728, 4038250495,
+STORE, 4038250496, 4038512639,
+STORE, 4038512640, 4038516735,
+STORE, 4038516736, 4038520831,
+STORE, 4038520832, 4038524927,
+STORE, 4038524928, 4038529023,
+STORE, 4038529024, 4038533119,
+STORE, 4038541312, 4038623231,
+STORE, 4038623232, 4038754303,
+STORE, 4038754304, 4038885375,
+STORE, 4038885376, 4038889471,
+STORE, 4038897664, 4038963199,
+STORE, 4038963200, 4038967295,
+STORE, 4038967296, 4038983679,
+STORE, 4038983680, 4039114751,
+STORE, 4039114752, 4039245823,
+STORE, 4039245824, 4039376895,
+STORE, 4039376896, 4040687615,
+STORE, 4040687616, 4040691711,
+STORE, 4040691712, 4040806399,
+STORE, 4040806400, 4040937471,
+STORE, 4040937472, 4040941567,
+STORE, 4040945664, 4040949759,
+STORE, 4040949760, 4041080831,
+STORE, 4041080832, 4041211903,
+STORE, 4041211904, 4043046911,
+STORE, 4043046912, 4043051007,
+STORE, 4043051008, 4043055103,
+STORE, 4043055104, 4043137023,
+STORE, 4043137024, 4043141119,
+STORE, 4043141120, 4043145215,
+STORE, 4043145216, 4043153407,
+STORE, 4043153408, 4043186175,
+STORE, 4043186176, 4043317247,
+STORE, 4043317248, 4043448319,
+STORE, 4043448320, 4043579391,
+STORE, 4043579392, 4043583487,
+STORE, 4043583488, 4043599871,
+STORE, 4043599872, 4043661311,
+STORE, 4043661312, 4043792383,
+STORE, 4043792384, 4043796479,
+STORE, 4043796480, 4043800575,
+STORE, 4043800576, 4043816959,
+STORE, 4043816960, 4043821055,
+STORE, 4043821056, 4043825151,
+STORE, 4043825152, 4043829247,
+STORE, 4043829248, 4043833343,
+STORE, 4043833344, 4047241215,
+STORE, 4047241216, 4047249407,
+STORE, 4047249408, 4047253503,
+STORE, 4047253504, 4047323135,
+STORE, 4047323136, 4047327231,
+STORE, 4047327232, 4047458303,
+STORE, 4047458304, 4047589375,
+STORE, 4047589376, 4047720447,
+STORE, 4047720448, 4047773695,
+STORE, 4047773696, 4047790079,
+STORE, 4047790080, 4047921151,
+STORE, 4047921152, 4048052223,
+STORE, 4048052224, 4048183295,
+STORE, 4048183296, 4049002495,
+STORE, 4049002496, 4049133567,
+STORE, 4049133568, 4049154047,
+STORE, 4049154048, 4049158143,
+STORE, 4049158144, 4049162239,
+STORE, 4049162240, 4049166335,
+STORE, 4049166336, 4049174527,
+STORE, 4049174528, 4049182719,
+STORE, 4049182720, 4049186815,
+STORE, 4049186816, 4049190911,
+STORE, 4049190912, 4049195007,
+STORE, 4049195008, 4049203199,
+STORE, 4049203200, 4049207295,
+STORE, 4049207296, 4049211391,
+STORE, 4049211392, 4049215487,
+STORE, 4049215488, 4049219583,
+STORE, 4049219584, 4049227775,
+STORE, 4049227776, 4049231871,
+STORE, 4049231872, 4049235967,
+STORE, 4049235968, 4049244159,
+STORE, 4049244160, 4049248255,
+STORE, 4049248256, 4049252351,
+STORE, 4049252352, 4049256447,
+STORE, 4049256448, 4049268735,
+STORE, 4049268736, 4049272831,
+STORE, 4049272832, 4049313791,
+STORE, 4049313792, 4049723391,
+STORE, 4049723392, 4049727487,
+STORE, 4049727488, 4049858559,
+STORE, 4049858560, 4049989631,
+STORE, 4049989632, 4049993727,
+STORE, 4049993728, 4050026495,
+STORE, 4050026496, 4050030591,
+STORE, 4050030592, 4050161663,
+STORE, 4050161664, 4050169855,
+STORE, 4050169856, 4050223103,
+STORE, 4050223104, 4050632703,
+STORE, 4050632704, 4050636799,
+STORE, 4050636800, 4050640895,
+STORE, 4050640896, 4050644991,
+STORE, 4050644992, 4050661375,
+STORE, 4050661376, 4050665471,
+STORE, 4050665472, 4050673663,
+STORE, 4050673664, 4050677759,
+STORE, 4050677760, 4050694143,
+STORE, 4050694144, 4050702335,
+STORE, 4050702336, 4050956287,
+STORE, 4050956288, 4051963903,
+STORE, 4051963904, 4051980287,
+STORE, 4051980288, 4051988479,
+STORE, 4051988480, 4052000767,
+STORE, 4052000768, 4052004863,
+STORE, 4052004864, 4052029439,
+STORE, 4284014592, 4284018687,
+STORE, 4284018688, 4292403199,
+SNULL, 4041080832, 4041211903,
+SNULL, 3795763200, 3795894271,
+STORE, 3629522944, 3696631807,
+SNULL, 3663077375, 3696631807,
+STORE, 3629522944, 3663077375,
+STORE, 3663077376, 3696631807,
+SNULL, 3663077376, 3696631807,
+STORE, 3663077376, 3696631807,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3626471424, 3627524095,
+SNULL, 3626471424, 3626475519,
+STORE, 3626475520, 3627524095,
+STORE, 3626471424, 3626475519,
+SNULL, 3627519999, 3627524095,
+STORE, 3626475520, 3627519999,
+STORE, 3627520000, 3627524095,
+STORE, 3625418752, 3626475519,
+SNULL, 3625418752, 3625422847,
+STORE, 3625422848, 3626475519,
+STORE, 3625418752, 3625422847,
+SNULL, 3626467327, 3626475519,
+STORE, 3625422848, 3626467327,
+STORE, 3626467328, 3626475519,
+STORE, 3624366080, 3625422847,
+SNULL, 3624366080, 3624370175,
+STORE, 3624370176, 3625422847,
+STORE, 3624366080, 3624370175,
+SNULL, 3625414655, 3625422847,
+STORE, 3624370176, 3625414655,
+STORE, 3625414656, 3625422847,
+STORE, 4041191424, 4041211903,
+SNULL, 4041195519, 4041211903,
+STORE, 4041191424, 4041195519,
+STORE, 4041195520, 4041211903,
+STORE, 4041170944, 4041191423,
+SNULL, 4041175039, 4041191423,
+STORE, 4041170944, 4041175039,
+STORE, 4041175040, 4041191423,
+SNULL, 3625426943, 3626467327,
+STORE, 3625422848, 3625426943,
+STORE, 3625426944, 3626467327,
+STORE, 4041162752, 4041170943,
+SNULL, 3626479615, 3627519999,
+STORE, 3626475520, 3626479615,
+STORE, 3626479616, 3627519999,
+STORE, 4041154560, 4041162751,
+STORE, 4041154560, 4041170943,
+STORE, 4041134080, 4041154559,
+SNULL, 4041138175, 4041154559,
+STORE, 4041134080, 4041138175,
+STORE, 4041138176, 4041154559,
+SNULL, 3624374271, 3625414655,
+STORE, 3624370176, 3624374271,
+STORE, 3624374272, 3625414655,
+STORE, 4041125888, 4041134079,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+STORE, 3487174656, 3487584255,
+STORE, 4041121792, 4041125887,
+SNULL, 4041121792, 4041125887,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 3487174656, 3487584255,
+STORE, 3222274048, 3223326719,
+SNULL, 3222274048, 3222278143,
+STORE, 3222278144, 3223326719,
+STORE, 3222274048, 3222278143,
+SNULL, 3223322623, 3223326719,
+STORE, 3222278144, 3223322623,
+STORE, 3223322624, 3223326719,
+STORE, 3221221376, 3222278143,
+SNULL, 3221221376, 3221225471,
+STORE, 3221225472, 3222278143,
+STORE, 3221221376, 3221225471,
+SNULL, 3222269951, 3222278143,
+STORE, 3221225472, 3222269951,
+STORE, 3222269952, 3222278143,
+STORE, 3220168704, 3221225471,
+SNULL, 3220168704, 3220172799,
+STORE, 3220172800, 3221225471,
+STORE, 3220168704, 3220172799,
+SNULL, 3221217279, 3221225471,
+STORE, 3220172800, 3221217279,
+STORE, 3221217280, 3221225471,
+STORE, 4041117696, 4041125887,
+STORE, 4041117696, 4041134079,
+STORE, 3219083264, 3220172799,
+SNULL, 3219083264, 3219087359,
+STORE, 3219087360, 3220172799,
+STORE, 3219083264, 3219087359,
+SNULL, 3220164607, 3220172799,
+STORE, 3219087360, 3220164607,
+STORE, 3220164608, 3220172799,
+STORE, 4041109504, 4041117695,
+STORE, 4041109504, 4041134079,
+STORE, 3217997824, 3219087359,
+SNULL, 3217997824, 3218001919,
+STORE, 3218001920, 3219087359,
+STORE, 3217997824, 3218001919,
+SNULL, 3219079167, 3219087359,
+STORE, 3218001920, 3219079167,
+STORE, 3219079168, 3219087359,
+STORE, 4041101312, 4041109503,
+STORE, 4041101312, 4041134079,
+STORE, 3216912384, 3218001919,
+SNULL, 3216912384, 3216916479,
+STORE, 3216916480, 3218001919,
+STORE, 3216912384, 3216916479,
+SNULL, 3217993727, 3218001919,
+STORE, 3216916480, 3217993727,
+STORE, 3217993728, 3218001919,
+STORE, 4041093120, 4041101311,
+STORE, 4041093120, 4041134079,
+STORE, 3215826944, 3216916479,
+SNULL, 3215826944, 3215831039,
+STORE, 3215831040, 3216916479,
+STORE, 3215826944, 3215831039,
+SNULL, 3216908287, 3216916479,
+STORE, 3215831040, 3216908287,
+STORE, 3216908288, 3216916479,
+STORE, 4016779264, 4016799743,
+SNULL, 4016783359, 4016799743,
+STORE, 4016779264, 4016783359,
+STORE, 4016783360, 4016799743,
+STORE, 4016758784, 4016779263,
+SNULL, 4016762879, 4016779263,
+STORE, 4016758784, 4016762879,
+STORE, 4016762880, 4016779263,
+SNULL, 3222282239, 3223322623,
+STORE, 3222278144, 3222282239,
+STORE, 3222282240, 3223322623,
+STORE, 4041084928, 4041093119,
+STORE, 4041084928, 4041134079,
+SNULL, 3221229567, 3222269951,
+STORE, 3221225472, 3221229567,
+STORE, 3221229568, 3222269951,
+STORE, 4015644672, 4015665151,
+STORE, 4038889472, 4038897663,
+SNULL, 4015648767, 4015665151,
+STORE, 4015644672, 4015648767,
+STORE, 4015648768, 4015665151,
+STORE, 4015624192, 4015644671,
+SNULL, 4015628287, 4015644671,
+STORE, 4015624192, 4015628287,
+STORE, 4015628288, 4015644671,
+SNULL, 3219091455, 3220164607,
+STORE, 3219087360, 3219091455,
+STORE, 3219091456, 3220164607,
+STORE, 4015603712, 4015624191,
+SNULL, 4015607807, 4015624191,
+STORE, 4015603712, 4015607807,
+STORE, 4015607808, 4015624191,
+SNULL, 3218006015, 3219079167,
+STORE, 3218001920, 3218006015,
+STORE, 3218006016, 3219079167,
+STORE, 3949674496, 3949694975,
+SNULL, 3949678591, 3949694975,
+STORE, 3949674496, 3949678591,
+STORE, 3949678592, 3949694975,
+SNULL, 3216920575, 3217993727,
+STORE, 3216916480, 3216920575,
+STORE, 3216920576, 3217993727,
+STORE, 3948924928, 3948945407,
+SNULL, 3948929023, 3948945407,
+STORE, 3948924928, 3948929023,
+STORE, 3948929024, 3948945407,
+SNULL, 3215835135, 3216908287,
+STORE, 3215831040, 3215835135,
+STORE, 3215835136, 3216908287,
+SNULL, 3220176895, 3221217279,
+STORE, 3220172800, 3220176895,
+STORE, 3220176896, 3221217279,
+STORE, 3214786560, 3215826943,
+STORE, 3213733888, 3214786559,
+SNULL, 3213733888, 3213737983,
+STORE, 3213737984, 3214786559,
+STORE, 3213733888, 3213737983,
+SNULL, 3214782463, 3214786559,
+STORE, 3213737984, 3214782463,
+STORE, 3214782464, 3214786559,
+STORE, 4038533120, 4038541311,
+STORE, 3948421120, 3948441599,
+SNULL, 3948425215, 3948441599,
+STORE, 3948421120, 3948425215,
+STORE, 3948425216, 3948441599,
+SNULL, 3213742079, 3214782463,
+STORE, 3213737984, 3213742079,
+STORE, 3213742080, 3214782463,
+STORE, 4038209536, 4038217727,
+STORE, 3212681216, 3213737983,
+SNULL, 3212681216, 3212685311,
+STORE, 3212685312, 3213737983,
+STORE, 3212681216, 3212685311,
+SNULL, 3213729791, 3213737983,
+STORE, 3212685312, 3213729791,
+STORE, 3213729792, 3213737983,
+STORE, 3795763200, 3795894271,
+STORE, 3946872832, 3946893311,
+SNULL, 3946876927, 3946893311,
+STORE, 3946872832, 3946876927,
+STORE, 3946876928, 3946893311,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+STORE, 3487174656, 3487584255,
+SNULL, 3212689407, 3213729791,
+STORE, 3212685312, 3212689407,
+STORE, 3212689408, 3213729791,
+STORE, 4041080832, 4041084927,
+STORE, 4040941568, 4040945663,
+STORE, 4037361664, 4037369855,
+STORE, 4000817152, 4000821247,
+STORE, 3999440896, 3999444991,
+STORE, 3212161024, 3212681215,
+SNULL, 3212161024, 3212439551,
+STORE, 3212439552, 3212681215,
+STORE, 3212161024, 3212439551,
+SNULL, 3212161024, 3212439551,
+SNULL, 3212464127, 3212681215,
+STORE, 3212439552, 3212464127,
+STORE, 3212464128, 3212681215,
+SNULL, 3212464128, 3212681215,
+SNULL, 3212439552, 3212451839,
+STORE, 3212451840, 3212464127,
+STORE, 3212439552, 3212451839,
+SNULL, 3212439552, 3212451839,
+STORE, 3212439552, 3212451839,
+SNULL, 3212451840, 3212455935,
+STORE, 3212455936, 3212464127,
+STORE, 3212451840, 3212455935,
+SNULL, 3212451840, 3212455935,
+STORE, 3212451840, 3212455935,
+SNULL, 3212455936, 3212460031,
+STORE, 3212460032, 3212464127,
+STORE, 3212455936, 3212460031,
+SNULL, 3212455936, 3212460031,
+STORE, 3212455936, 3212460031,
+SNULL, 3212460032, 3212464127,
+STORE, 3212460032, 3212464127,
+STORE, 3997679616, 3997683711,
+SNULL, 4049235968, 4049240063,
+STORE, 4049240064, 4049244159,
+STORE, 4049235968, 4049240063,
+SNULL, 4049240064, 4049244159,
+STORE, 4049240064, 4049244159,
+SNULL, 3997679616, 3997683711,
+SNULL, 3999440896, 3999444991,
+SNULL, 4000817152, 4000821247,
+SNULL, 4040941568, 4040945663,
+SNULL, 4041080832, 4041084927,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 3487174656, 3487584255,
+SNULL, 3212451840, 3212455935,
+STORE, 3212451840, 3212455935,
+STORE, 4041080832, 4041084927,
+STORE, 3623890944, 3624169471,
+SNULL, 4041080832, 4041084927,
+STORE, 4041080832, 4041084927,
+SNULL, 4041080832, 4041084927,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+STORE, 4041080832, 4041084927,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+STORE, 3211386880, 3212439551,
+SNULL, 3211386880, 3211390975,
+STORE, 3211390976, 3212439551,
+STORE, 3211386880, 3211390975,
+SNULL, 3212435455, 3212439551,
+STORE, 3211390976, 3212435455,
+STORE, 3212435456, 3212439551,
+STORE, 4040941568, 4040945663,
+STORE, 3937169408, 3937189887,
+STORE, 3623485440, 3623616511,
+SNULL, 717225983, 1388314623,
+STORE, 314572800, 717225983,
+STORE, 717225984, 1388314623,
+SNULL, 717225984, 1388314623,
+STORE, 3937112064, 3937132543,
+SNULL, 3937116159, 3937132543,
+STORE, 3937112064, 3937116159,
+STORE, 3937116160, 3937132543,
+SNULL, 3211395071, 3212435455,
+STORE, 3211390976, 3211395071,
+STORE, 3211395072, 3212435455,
+STORE, 4000817152, 4000821247,
+STORE, 3974823936, 3974832127,
+STORE, 3595284480, 3595431935,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+STORE, 3487174656, 3487584255,
+STORE, 3999440896, 3999444991,
+STORE, 3997679616, 3997683711,
+STORE, 3996295168, 3996299263,
+STORE, 3996090368, 3996094463,
+STORE, 3210866688, 3211386879,
+SNULL, 3210866688, 3211001855,
+STORE, 3211001856, 3211386879,
+STORE, 3210866688, 3211001855,
+SNULL, 3210866688, 3211001855,
+SNULL, 3211038719, 3211386879,
+STORE, 3211001856, 3211038719,
+STORE, 3211038720, 3211386879,
+SNULL, 3211038720, 3211386879,
+SNULL, 3211001856, 3211022335,
+STORE, 3211022336, 3211038719,
+STORE, 3211001856, 3211022335,
+SNULL, 3211001856, 3211022335,
+STORE, 3211001856, 3211022335,
+SNULL, 3211022336, 3211030527,
+STORE, 3211030528, 3211038719,
+STORE, 3211022336, 3211030527,
+SNULL, 3211022336, 3211030527,
+STORE, 3211022336, 3211030527,
+SNULL, 3211030528, 3211034623,
+STORE, 3211034624, 3211038719,
+STORE, 3211030528, 3211034623,
+SNULL, 3211030528, 3211034623,
+STORE, 3211030528, 3211034623,
+SNULL, 3211034624, 3211038719,
+STORE, 3211034624, 3211038719,
+STORE, 3994906624, 3994910719,
+SNULL, 4049240064, 4049244159,
+STORE, 4049240064, 4049244159,
+SNULL, 3994906624, 3994910719,
+SNULL, 3996090368, 3996094463,
+SNULL, 3996295168, 3996299263,
+SNULL, 3997679616, 3997683711,
+SNULL, 3999440896, 3999444991,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 3487174656, 3487584255,
+SNULL, 3211022336, 3211030527,
+STORE, 3211022336, 3211030527,
+STORE, 3999440896, 3999444991,
+STORE, 3210199040, 3211001855,
+SNULL, 3999440896, 3999444991,
+STORE, 3999440896, 3999444991,
+SNULL, 3999440896, 3999444991,
+STORE, 3594821632, 3594952703,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 4048183296, 4048592895,
+STORE, 4048592896, 4049002495,
+STORE, 4048183296, 4048592895,
+STORE, 4048183296, 4049002495,
+SNULL, 1914101759, 1969434623,
+STORE, 1914097664, 1914101759,
+STORE, 1914101760, 1969434623,
+STORE, 3567108096, 3567239167,
+STORE, 3973832704, 3973840895,
+STORE, 3209113600, 3210199039,
+SNULL, 3209113600, 3209117695,
+STORE, 3209117696, 3210199039,
+STORE, 3209113600, 3209117695,
+SNULL, 3210194943, 3210199039,
+STORE, 3209117696, 3210194943,
+STORE, 3210194944, 3210199039,
+STORE, 3935858688, 3935879167,
+SNULL, 3935862783, 3935879167,
+STORE, 3935858688, 3935862783,
+STORE, 3935862784, 3935879167,
+SNULL, 3209121791, 3210194943,
+STORE, 3209117696, 3209121791,
+STORE, 3209121792, 3210194943,
+STORE, 3528749056, 3528880127,
+STORE, 3968200704, 3968208895,
+STORE, 3208028160, 3209117695,
+SNULL, 3208028160, 3208032255,
+STORE, 3208032256, 3209117695,
+STORE, 3208028160, 3208032255,
+SNULL, 3209109503, 3209117695,
+STORE, 3208032256, 3209109503,
+STORE, 3209109504, 3209117695,
+STORE, 3888123904, 3888144383,
+SNULL, 3888127999, 3888144383,
+STORE, 3888123904, 3888127999,
+STORE, 3888128000, 3888144383,
+SNULL, 3208036351, 3209109503,
+STORE, 3208032256, 3208036351,
+STORE, 3208036352, 3209109503,
+SNULL, 3968200704, 3968208895,
+SNULL, 3888123904, 3888144383,
+SNULL, 3209109504, 3209113599,
+STORE, 3209113600, 3209117695,
+STORE, 3209109504, 3209113599,
+SNULL, 3208028160, 3209113599,
+STORE, 3208060928, 3209117695,
+SNULL, 3208060928, 3208065023,
+STORE, 3208065024, 3209117695,
+STORE, 3208060928, 3208065023,
+SNULL, 3209109503, 3209117695,
+STORE, 3208065024, 3209109503,
+STORE, 3209109504, 3209117695,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3888123904, 3888144383,
+SNULL, 3888127999, 3888144383,
+STORE, 3888123904, 3888127999,
+STORE, 3888128000, 3888144383,
+SNULL, 3208069119, 3209109503,
+STORE, 3208065024, 3208069119,
+STORE, 3208069120, 3209109503,
+STORE, 3968200704, 3968208895,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3527778304, 3527909375,
+STORE, 3999440896, 3999444991,
+STORE, 3997679616, 3997683711,
+STORE, 1914097664, 1914105855,
+STORE, 1914105856, 1969434623,
+STORE, 3957583872, 3957592063,
+STORE, 3206975488, 3208065023,
+SNULL, 3206975488, 3206979583,
+STORE, 3206979584, 3208065023,
+STORE, 3206975488, 3206979583,
+SNULL, 3208056831, 3208065023,
+STORE, 3206979584, 3208056831,
+STORE, 3208056832, 3208065023,
+STORE, 3956736000, 3956744191,
+STORE, 3205890048, 3206979583,
+SNULL, 3205890048, 3205894143,
+STORE, 3205894144, 3206979583,
+STORE, 3205890048, 3205894143,
+SNULL, 3206971391, 3206979583,
+STORE, 3205894144, 3206971391,
+STORE, 3206971392, 3206979583,
+STORE, 3806101504, 3806121983,
+SNULL, 3806105599, 3806121983,
+STORE, 3806101504, 3806105599,
+STORE, 3806105600, 3806121983,
+SNULL, 3206983679, 3208056831,
+STORE, 3206979584, 3206983679,
+STORE, 3206983680, 3208056831,
+STORE, 3806081024, 3806101503,
+SNULL, 3806085119, 3806101503,
+STORE, 3806081024, 3806085119,
+STORE, 3806085120, 3806101503,
+SNULL, 3205898239, 3206971391,
+STORE, 3205894144, 3205898239,
+STORE, 3205898240, 3206971391,
+STORE, 3956015104, 3956023295,
+STORE, 3204804608, 3205894143,
+SNULL, 3204804608, 3204808703,
+STORE, 3204808704, 3205894143,
+STORE, 3204804608, 3204808703,
+SNULL, 3205885951, 3205894143,
+STORE, 3204808704, 3205885951,
+STORE, 3205885952, 3205894143,
+STORE, 3803471872, 3803492351,
+STORE, 3803451392, 3803471871,
+STORE, 3803451392, 3803492351,
+SNULL, 3957583872, 3957592063,
+SNULL, 3806101504, 3806121983,
+SNULL, 3206975487, 3206979583,
+STORE, 3206971392, 3206975487,
+STORE, 3206975488, 3206979583,
+SNULL, 3208056832, 3208060927,
+STORE, 3208060928, 3208065023,
+STORE, 3208056832, 3208060927,
+SNULL, 3206975488, 3208060927,
+STORE, 3801845760, 3801878527,
+STORE, 3806101504, 3806121983,
+SNULL, 3806105599, 3806121983,
+STORE, 3806101504, 3806105599,
+STORE, 3806105600, 3806121983,
+SNULL, 3204812799, 3205885951,
+STORE, 3204808704, 3204812799,
+STORE, 3204812800, 3205885951,
+STORE, 1914097664, 1914109951,
+STORE, 1914109952, 1969434623,
+STORE, 3957583872, 3957592063,
+STORE, 3206971392, 3208065023,
+SNULL, 3206971392, 3206979583,
+STORE, 3206979584, 3208065023,
+STORE, 3206971392, 3206979583,
+SNULL, 3208056831, 3208065023,
+STORE, 3206979584, 3208056831,
+STORE, 3208056832, 3208065023,
+STORE, 3801825280, 3801845759,
+SNULL, 3801829375, 3801845759,
+STORE, 3801825280, 3801829375,
+STORE, 3801829376, 3801845759,
+SNULL, 3206983679, 3208056831,
+STORE, 3206979584, 3206983679,
+STORE, 3206983680, 3208056831,
+STORE, 3202707456, 3204804607,
+SNULL, 3202707456, 3204804607,
+STORE, 3202707456, 3204804607,
+STORE, 3200610304, 3202707455,
+SNULL, 3202707456, 3204804607,
+SNULL, 3200610304, 3202707455,
+STORE, 3202707456, 3204804607,
+SNULL, 3202707456, 3204804607,
+STORE, 3202707456, 3204804607,
+SNULL, 3202707456, 3204804607,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3527647232, 3527778303,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+STORE, 3487059968, 3487584255,
+SNULL, 3487059968, 3487301631,
+STORE, 3487301632, 3487584255,
+STORE, 3487059968, 3487301631,
+SNULL, 3487059968, 3487301631,
+SNULL, 3487563775, 3487584255,
+STORE, 3487301632, 3487563775,
+STORE, 3487563776, 3487584255,
+SNULL, 3487563776, 3487584255,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3524046848, 3524177919,
+STORE, 3487170560, 3487301631,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3487039488, 3487170559,
+STORE, 3487039488, 3487301631,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3204280320, 3204804607,
+SNULL, 3204280320, 3204448255,
+STORE, 3204448256, 3204804607,
+STORE, 3204280320, 3204448255,
+SNULL, 3204280320, 3204448255,
+SNULL, 3204710399, 3204804607,
+STORE, 3204448256, 3204710399,
+STORE, 3204710400, 3204804607,
+SNULL, 3204710400, 3204804607,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3996295168, 3996299263,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+SNULL, 3996295168, 3996299263,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3486908416, 3487039487,
+STORE, 3486908416, 3487301631,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3223326720, 3290435583,
+SNULL, 3223326720, 3256881151,
+STORE, 3256881152, 3290435583,
+STORE, 3223326720, 3256881151,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+STORE, 3201826816, 3202351103,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+STORE, 3202351104, 3204448255,
+SNULL, 3202351104, 3204448255,
+SNULL, 3803471871, 3803492351,
+STORE, 3803451392, 3803471871,
+STORE, 3803471872, 3803492351,
+SNULL, 3803471872, 3803492351,
+SNULL, 3803451392, 3803471871,
+STORE, 3798999040, 3799101439,
+SNULL, 3798999040, 3799101439,
+STORE, 3952644096, 3952652287,
+STORE, 3203362816, 3204448255,
+SNULL, 3203362816, 3203366911,
+STORE, 3203366912, 3204448255,
+STORE, 3203362816, 3203366911,
+SNULL, 3204444159, 3204448255,
+STORE, 3203366912, 3204444159,
+STORE, 3204444160, 3204448255,
+STORE, 3803471872, 3803492351,
+SNULL, 3803475967, 3803492351,
+STORE, 3803471872, 3803475967,
+STORE, 3803475968, 3803492351,
+SNULL, 3203371007, 3204444159,
+STORE, 3203366912, 3203371007,
+STORE, 3203371008, 3204444159,
+STORE, 3199729664, 3201826815,
+SNULL, 3199729664, 3201826815,
+STORE, 3199729664, 3201826815,
+SNULL, 3199729664, 3201826815,
+STORE, 3199729664, 3201826815,
+SNULL, 3199729664, 3201826815,
+STORE, 3199729664, 3201826815,
+SNULL, 3199729664, 3201826815,
+STORE, 3199729664, 3201826815,
+SNULL, 3199729664, 3201826815,
+STORE, 3200774144, 3201826815,
+SNULL, 3200774144, 3200778239,
+STORE, 3200778240, 3201826815,
+STORE, 3200774144, 3200778239,
+SNULL, 3201822719, 3201826815,
+STORE, 3200778240, 3201822719,
+STORE, 3201822720, 3201826815,
+STORE, 3803451392, 3803471871,
+SNULL, 3803455487, 3803471871,
+STORE, 3803451392, 3803455487,
+STORE, 3803455488, 3803471871,
+SNULL, 3200782335, 3201822719,
+STORE, 3200778240, 3200782335,
+STORE, 3200782336, 3201822719,
+STORE, 3949666304, 3949674495,
+STORE, 3949408256, 3949416447,
+STORE, 3199688704, 3200778239,
+SNULL, 3199688704, 3199692799,
+STORE, 3199692800, 3200778239,
+STORE, 3199688704, 3199692799,
+SNULL, 3200770047, 3200778239,
+STORE, 3199692800, 3200770047,
+STORE, 3200770048, 3200778239,
+STORE, 3799306240, 3799326719,
+SNULL, 3799310335, 3799326719,
+STORE, 3799306240, 3799310335,
+STORE, 3799310336, 3799326719,
+SNULL, 3199696895, 3200770047,
+STORE, 3199692800, 3199696895,
+STORE, 3199696896, 3200770047,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+STORE, 3799277568, 3799306239,
+SNULL, 3799277568, 3799306239,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+SNULL, 4041162751, 4041170943,
+STORE, 4041154560, 4041162751,
+STORE, 4041162752, 4041170943,
+SNULL, 4041162752, 4041170943,
+SNULL, 4041154560, 4041162751,
+SNULL, 4041191424, 4041211903,
+SNULL, 4041170944, 4041191423,
+SNULL, 3626471423, 3626475519,
+STORE, 3626467328, 3626471423,
+STORE, 3626471424, 3626475519,
+SNULL, 3626471424, 3627524095,
+SNULL, 3625418751, 3625422847,
+STORE, 3625414656, 3625418751,
+STORE, 3625418752, 3625422847,
+SNULL, 3625418752, 3626471423,
+STORE, 3627393024, 3627524095,
+STORE, 3627261952, 3627393023,
+STORE, 3627261952, 3627524095,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+STORE, 3195494400, 3197591551,
+SNULL, 3197591552, 3199688703,
+SNULL, 3195494400, 3197591551,
+STORE, 3197591552, 3199688703,
+SNULL, 3197591552, 3199688703,
+STORE, 3197591552, 3199688703,
+STORE, 3195494400, 3197591551,
+SNULL, 3197591552, 3199688703,
+SNULL, 3195494400, 3197591551,
+STORE, 3798999040, 3799101439,
+SNULL, 3798999040, 3799101439,
+/*
+ * mmap: unmapped_area_topdown: ffff9a9f14ddaa80
+ * Gap was found: mt 4041162752 gap_end 4041183232
+ * mmap: window was 4052029440 - 4096 size 28672
+ * mmap: mas.min 4041154560 max 4041191423 mas.last 4041191423
+ * mmap: mas.index 4041162752 align mask 0 offset 0
+ * mmap: rb_find_vma find on 4041162752 => ffff9a9f03d19678 (ffff9a9f03d19678)
+ */
+ };
+
+ static const unsigned long set43[] = {
+STORE, 140737488347136, 140737488351231,
+STORE, 140734187720704, 140737488351231,
+SNULL, 140734187724800, 140737488351231,
+STORE, 140734187589632, 140734187724799,
+STORE, 4194304, 6443007,
+STORE, 4337664, 6443007,
+STORE, 4194304, 4337663,
+SNULL, 4337664, 6443007,
+STORE, 6430720, 6443007,
+STORE, 206158430208, 206160674815,
+STORE, 206158569472, 206160674815,
+STORE, 206158430208, 206158569471,
+SNULL, 206158569472, 206160674815,
+STORE, 206160662528, 206160670719,
+STORE, 206160670720, 206160674815,
+STORE, 140734188756992, 140734188765183,
+STORE, 140734188740608, 140734188756991,
+STORE, 140501948112896, 140501948116991,
+ };
+
+ int count = 0;
+ void *ptr = NULL;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ mt_set_non_kernel(3);
+ check_erase2_testset(mt, set, ARRAY_SIZE(set));
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set2, ARRAY_SIZE(set2));
+ start = 140735933894656;
+ MT_BUG_ON(mt, !!mt_find(mt, &start, 140735933906943UL));
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(2);
+ mt_init_flags(mt, 0);
+ check_erase2_testset(mt, set3, ARRAY_SIZE(set3));
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, 0);
+ check_erase2_testset(mt, set4, ARRAY_SIZE(set4));
+ rcu_read_lock();
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (xa_is_zero(entry))
+ continue;
+ }
+ rcu_read_unlock();
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_set_non_kernel(100);
+ check_erase2_testset(mt, set5, ARRAY_SIZE(set5));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set6, ARRAY_SIZE(set6));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set7, ARRAY_SIZE(set7));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set8, ARRAY_SIZE(set8));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set9, ARRAY_SIZE(set9));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set10, ARRAY_SIZE(set10));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set11, ARRAY_SIZE(set11));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 12288, 140014592737280, 0x2000);
+ MT_BUG_ON(mt, mas.last != 140014592573439);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mas.tree = mt;
+ count = 0;
+ mas.index = 0;
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set12, ARRAY_SIZE(set12));
+ rcu_barrier();
+ mas_for_each(&mas, entry, ULONG_MAX) {
+ if (xa_is_zero(entry))
+ continue;
+ BUG_ON(count > 12);
+ count++;
+ }
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set13, ARRAY_SIZE(set13));
+ mtree_erase(mt, 140373516443648);
+ rcu_read_lock();
+ mas_empty_area_rev(&mas, 0, 140373518663680, 4096);
+ rcu_read_unlock();
+ mtree_destroy(mt);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set14, ARRAY_SIZE(set14));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set15, ARRAY_SIZE(set15));
+ rcu_barrier();
+ mtree_destroy(mt);
+
+ /* set16 was to find a bug on limit updating at slot 0. */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set16, ARRAY_SIZE(set16));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 139921865637888, 0x6000);
+ MT_BUG_ON(mt, mas.last != 139921865547775);
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ /*
+ * set17 found a bug in walking backwards and not counting nulls at
+ * the end. This could cause a gap to be missed if the null had any
+ * size.
+ */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set17, ARRAY_SIZE(set17));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 139953197334528, 0x1000);
+ MT_BUG_ON(mt, mas.last != 139953197322239);
+/* MT_BUG_ON(mt, mas.index != 139953197318144); */
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ /*
+ * set18 found a bug in walking backwards and not setting the max from
+ * the node, but using the parent node. This was only an issue if the
+ * next slot in the parent had what we needed.
+ */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set18, ARRAY_SIZE(set18));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 140222972858368, 2215936);
+ MT_BUG_ON(mt, mas.last != 140222968475647);
+ /*MT_BUG_ON(mt, mas.index != 140222966259712); */
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ /*
+ * set19 found 2 bugs in prev.
+ * 1. If we hit root without finding anything, then there was an
+ * infinite loop.
+ * 2. The first ascending wasn't using the correct slot which may have
+ * caused missed entries.
+ */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set19, ARRAY_SIZE(set19));
+ rcu_barrier();
+ mas.index = 140656779083776;
+ entry = mas_find(&mas, ULONG_MAX);
+ MT_BUG_ON(mt, entry != xa_mk_value(140656779083776));
+ entry = mas_prev(&mas, 0);
+ MT_BUG_ON(mt, entry != xa_mk_value(140656766251008));
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ /*
+ * set20 found a bug in mas_may_move_gap due to the slot being
+ * overwritten during the __mas_add operation and setting it to zero.
+ */
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set20, ARRAY_SIZE(set20));
+ rcu_barrier();
+ check_load(mt, 94849009414144, NULL);
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set21, ARRAY_SIZE(set21));
+ rcu_barrier();
+ mt_validate(mt);
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(999);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set22, ARRAY_SIZE(set22));
+ rcu_barrier();
+ mt_validate(mt);
+ ptr = mtree_load(mt, 140551363362816);
+ MT_BUG_ON(mt, ptr == mtree_load(mt, 140551363420159));
+ mt_set_non_kernel(0);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set23, ARRAY_SIZE(set23));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set24, ARRAY_SIZE(set24));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set25, ARRAY_SIZE(set25));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* Split on NULL followed by delete - causes gap issues. */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set26, ARRAY_SIZE(set26));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 140109042671616, 409600);
+ MT_BUG_ON(mt, mas.last != 140109040959487);
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* Split on NULL followed by delete - causes gap issues. */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set27, ARRAY_SIZE(set27));
+ rcu_barrier();
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 140415537422336));
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set28, ARRAY_SIZE(set28));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 139918413357056, 2097152);
+ /* Search for the size of gap then align it (offset 0) */
+ mas.index = (mas.last + 1 - 2097152 - 0) & (~2093056);
+ MT_BUG_ON(mt, mas.index != 139918401601536);
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* This test found issues with retry moving rebalanced nodes so the
+ * incorrect parent pivot was updated.
+ */
+ mt_set_non_kernel(999);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set29, ARRAY_SIZE(set29));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* This test found issues with deleting all entries in a node when
+ * surrounded by entries in the next nodes, then deleting the entries
+ * surrounding the node filled with deleted entries.
+ */
+ mt_set_non_kernel(999);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set30, ARRAY_SIZE(set30));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* This test found an issue with deleting all entries in a node that was
+ * the end node and mas_gap incorrectly set next = curr, and curr = prev
+ * then moved next to the left, losing data.
+ */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set31, ARRAY_SIZE(set31));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set32, ARRAY_SIZE(set32));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+/*
+ * mmap: empty_area_topdown: ffff88821c9cb600 Gap was found:
+ * mt 140582827569152 gap_end 140582869532672
+ * mmap: window was 140583656296448 - 4096 size 134217728
+ * mmap: mas.min 94133881868288 max 140582961786879 mas.last 140582961786879
+ * mmap: mas.index 140582827569152 align mask 0 offset 0
+ * mmap: rb_find_vma find on
+ * 140582827569152 => ffff88821c5bad00 (ffff88821c5bad00)
+ */
+
+ /* move gap failed due to an entirely empty node */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set33, ARRAY_SIZE(set33));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 140583656296448, 134217728);
+ MT_BUG_ON(mt, mas.last != 140583003750399);
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /*
+ * Incorrect gap in tree caused by mas_prev not setting the limits
+ * correctly while walking down.
+ */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set34, ARRAY_SIZE(set34));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* Empty leaf at the end of a parent caused incorrect gap. */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set35, ARRAY_SIZE(set35));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mt_set_non_kernel(99);
+ /* Empty leaf at the end of a parent caused incorrect gap. */
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set36, ARRAY_SIZE(set36));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set37, ARRAY_SIZE(set37));
+ rcu_barrier();
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712));
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set38, ARRAY_SIZE(set38));
+ rcu_barrier();
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712));
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set39, ARRAY_SIZE(set39));
+ rcu_barrier();
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set40, ARRAY_SIZE(set40));
+ rcu_barrier();
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set41, ARRAY_SIZE(set41));
+ rcu_barrier();
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* move gap failed due to an entirely empty node. */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set42, ARRAY_SIZE(set42));
+ rcu_barrier();
+ mas_empty_area_rev(&mas, 4096, 4052029440, 28672);
+ MT_BUG_ON(mt, mas.last != 4041211903);
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* gap calc off by one */
+ mt_set_non_kernel(99);
+ mas_reset(&mas);
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ check_erase2_testset(mt, set43, ARRAY_SIZE(set43));
+ rcu_barrier();
+ mt_set_non_kernel(0);
+ mt_validate(mt);
+ mtree_destroy(mt);
+}
+#endif
+
+/* End of VM testcases */
+
+/* RCU stress testing */
+
+/* RCU reader helper function */
+static void rcu_reader_register(struct rcu_test_struct2 *test)
+{
+ rcu_register_thread();
+ uatomic_inc(&test->thread_count);
+
+ while (!test->start)
+ usleep(test->pause * 100);
+}
+
+static void rcu_reader_setup(struct rcu_reader_struct *reader,
+ unsigned int id, struct rcu_test_struct2 *test)
+{
+ reader->id = id;
+ reader->test = test;
+ reader->mod = reader->id % 10;
+ reader->del = (reader->mod + 1) % 10;
+ reader->flip = (reader->mod + 2) % 10;
+ reader->add = (reader->mod + 3) % 10;
+ reader->next = (reader->mod + 4) % 10;
+}
+/* RCU reader in increasing index */
+static void *rcu_reader_fwd(void *ptr)
+
+{
+ struct rcu_reader_struct *reader = (struct rcu_reader_struct *)ptr;
+ struct rcu_test_struct2 *test = reader->test;
+ unsigned long index = reader->id;
+ bool toggled, modified, deleted, added;
+ int i;
+ void *entry, *prev = NULL;
+ MA_STATE(mas, test->mt, 0, 0);
+
+ rcu_reader_register(test);
+ toggled = modified = deleted = added = false;
+
+ while (!test->stop) {
+ i = 0;
+ /* mas_for_each ?*/
+ rcu_read_lock();
+ mas_set(&mas, test->index[index]);
+ mas_for_each(&mas, entry, test->last[index + 9]) {
+ unsigned long r_start, r_end, alt_start;
+ void *expected, *alt;
+
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+
+ if (i == reader->del) {
+ if (!deleted) {
+ alt_start = test->index[index + reader->flip];
+ /* delete occurred. */
+ if (mas.index == alt_start) {
+ uatomic_inc(&test->seen_deleted);
+ deleted = true;
+ }
+ }
+ if (deleted) {
+ i = reader->flip;
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+ }
+ }
+
+ if (!added && (i == reader->add)) {
+ alt_start = test->index[index + reader->next];
+ if (mas.index == r_start) {
+ uatomic_inc(&test->seen_added);
+ added = true;
+ } else if (mas.index == alt_start) {
+ i = reader->next;
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+ }
+ }
+
+ RCU_MT_BUG_ON(test, mas.index != r_start);
+ RCU_MT_BUG_ON(test, mas.last != r_end);
+
+ if (i == reader->flip) {
+ alt = xa_mk_value(index + i + RCU_RANGE_COUNT);
+ if (prev) {
+ if (toggled && entry == expected)
+ uatomic_inc(&test->seen_toggle);
+ else if (!toggled && entry == alt)
+ uatomic_inc(&test->seen_toggle);
+ }
+
+ if (entry == expected)
+ toggled = false;
+ else if (entry == alt)
+ toggled = true;
+ else {
+ printk("!!%lu-%lu -> %p not %p or %p\n", mas.index, mas.last, entry, expected, alt);
+ RCU_MT_BUG_ON(test, 1);
+ }
+
+ prev = entry;
+ } else if (i == reader->mod) {
+ alt = xa_mk_value(index + i * 2 + 1 +
+ RCU_RANGE_COUNT);
+ if (entry != expected) {
+ if (!modified)
+ uatomic_inc(&test->seen_modified);
+ modified = true;
+ } else {
+ if (modified)
+ uatomic_inc(&test->seen_modified);
+ modified = false;
+ }
+
+ if (modified)
+ RCU_MT_BUG_ON(test, entry != alt);
+
+ } else {
+ if (entry != expected)
+ printk("!!%lu-%lu -> %p not %p\n", mas.index, mas.last, entry, expected);
+ RCU_MT_BUG_ON(test, entry != expected);
+ }
+
+ i++;
+ }
+ rcu_read_unlock();
+ usleep(test->pause);
+ }
+
+ rcu_unregister_thread();
+ return NULL;
+}
+/* RCU reader in decreasing index */
+static void *rcu_reader_rev(void *ptr)
+{
+ struct rcu_reader_struct *reader = (struct rcu_reader_struct *)ptr;
+ struct rcu_test_struct2 *test = reader->test;
+ unsigned long index = reader->id;
+ bool toggled, modified, deleted, added;
+ int i;
+ void *prev = NULL;
+ MA_STATE(mas, test->mt, 0, 0);
+
+ rcu_reader_register(test);
+ toggled = modified = deleted = added = false;
+
+
+ while (!test->stop) {
+ void *entry;
+
+ i = 9;
+ mas_set(&mas, test->index[index + i]);
+
+ rcu_read_lock();
+ while (i--) {
+ unsigned long r_start, r_end, alt_start;
+ void *expected, *alt;
+ int line = __LINE__;
+
+ entry = mas_prev(&mas, test->index[index]);
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+
+ if (i == reader->del) {
+ alt_start = test->index[index + reader->mod];
+ if (mas.index == alt_start) {
+ line = __LINE__;
+ if (!deleted)
+ uatomic_inc(&test->seen_deleted);
+ deleted = true;
+ }
+ if (deleted) {
+ line = __LINE__;
+ i = reader->mod;
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+ }
+ }
+ if (!added && (i == reader->add)) {
+ alt_start = test->index[index + reader->flip];
+ if (mas.index == r_start) {
+ line = __LINE__;
+ uatomic_inc(&test->seen_added);
+ added = true;
+ } else if (mas.index == alt_start) {
+ line = __LINE__;
+ i = reader->flip;
+ r_start = test->index[index + i];
+ r_end = test->last[index + i];
+ expected = xa_mk_value(r_start);
+ }
+ }
+
+ if (i == reader->mod)
+ line = __LINE__;
+ else if (i == reader->flip)
+ line = __LINE__;
+
+ if (mas.index != r_start) {
+ alt = xa_mk_value(index + i * 2 + 1 +
+ RCU_RANGE_COUNT);
+ mt_dump(test->mt, mt_dump_dec);
+ printk("Error: %lu-%lu %p != %lu-%lu %p %p line %d i %d\n",
+ mas.index, mas.last, entry,
+ r_start, r_end, expected, alt,
+ line, i);
+ }
+ RCU_MT_BUG_ON(test, mas.index != r_start);
+ RCU_MT_BUG_ON(test, mas.last != r_end);
+
+ if (i == reader->mod) {
+ alt = xa_mk_value(index + i * 2 + 1 +
+ RCU_RANGE_COUNT);
+
+ if (entry != expected) {
+ if (!modified)
+ uatomic_inc(&test->seen_modified);
+ modified = true;
+ } else {
+ if (modified)
+ uatomic_inc(&test->seen_modified);
+ modified = false;
+ }
+ if (modified)
+ RCU_MT_BUG_ON(test, entry != alt);
+
+
+ } else if (i == reader->flip) {
+ alt = xa_mk_value(index + i +
+ RCU_RANGE_COUNT);
+ if (prev) {
+ if (toggled && entry == expected)
+ uatomic_inc(&test->seen_toggle);
+ else if (!toggled && entry == alt)
+ uatomic_inc(&test->seen_toggle);
+ }
+
+ if (entry == expected)
+ toggled = false;
+ else if (entry == alt)
+ toggled = true;
+ else {
+ printk("%lu-%lu %p != %p or %p\n",
+ mas.index, mas.last, entry,
+ expected, alt);
+ RCU_MT_BUG_ON(test, 1);
+ }
+
+ prev = entry;
+ } else {
+ if (entry != expected)
+ printk("%lu-%lu %p != %p\n", mas.index,
+ mas.last, entry, expected);
+ RCU_MT_BUG_ON(test, entry != expected);
+ }
+ }
+ rcu_read_unlock();
+ usleep(test->pause);
+ }
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static void rcu_stress_rev(struct maple_tree *mt, struct rcu_test_struct2 *test,
+ int count, struct rcu_reader_struct *test_reader)
+{
+ int i, j = 10000;
+ bool toggle = true;
+
+ test->start = true; /* Release the hounds! */
+ usleep(5);
+
+ while (j--) {
+ toggle = !toggle;
+ i = count;
+ while (i--) {
+ unsigned long start, end;
+ struct rcu_reader_struct *this = &test_reader[i];
+
+ /* Mod offset */
+ if (j == 600) {
+ start = test->index[this->id + this->mod];
+ end = test->last[this->id + this->mod];
+ mtree_store_range(mt, start, end,
+ xa_mk_value(this->id + this->mod * 2 +
+ 1 + RCU_RANGE_COUNT),
+ GFP_KERNEL);
+ }
+
+ /* Toggle */
+ if (!(j % 5)) {
+ start = test->index[this->id + this->flip];
+ end = test->last[this->id + this->flip];
+ mtree_store_range(mt, start, end,
+ xa_mk_value((toggle ? start :
+ this->id + this->flip +
+ RCU_RANGE_COUNT)),
+ GFP_KERNEL);
+ }
+
+ /* delete */
+ if (j == 400) {
+ start = test->index[this->id + this->del];
+ end = test->last[this->id + this->del];
+ mtree_store_range(mt, start, end, NULL, GFP_KERNEL);
+ }
+
+ /* add */
+ if (j == 500) {
+ start = test->index[this->id + this->add];
+ end = test->last[this->id + this->add];
+ mtree_store_range(mt, start, end,
+ xa_mk_value(start), GFP_KERNEL);
+ }
+ }
+ usleep(test->pause);
+ /* If a test fails, don't flood the console */
+ if (test->stop)
+ break;
+ }
+}
+
+static void rcu_stress_fwd(struct maple_tree *mt, struct rcu_test_struct2 *test,
+ int count, struct rcu_reader_struct *test_reader)
+{
+ int j, i;
+ bool toggle = true;
+
+ test->start = true; /* Release the hounds! */
+ usleep(5);
+ for (j = 0; j < 10000; j++) {
+ toggle = !toggle;
+ for (i = 0; i < count; i++) {
+ unsigned long start, end;
+ struct rcu_reader_struct *this = &test_reader[i];
+
+ /* Mod offset */
+ if (j == 600) {
+ start = test->index[this->id + this->mod];
+ end = test->last[this->id + this->mod];
+ mtree_store_range(mt, start, end,
+ xa_mk_value(this->id + this->mod * 2 +
+ 1 + RCU_RANGE_COUNT),
+ GFP_KERNEL);
+ }
+
+ /* Toggle */
+ if (!(j % 5)) {
+ start = test->index[this->id + this->flip];
+ end = test->last[this->id + this->flip];
+ mtree_store_range(mt, start, end,
+ xa_mk_value((toggle ? start :
+ this->id + this->flip +
+ RCU_RANGE_COUNT)),
+ GFP_KERNEL);
+ }
+
+ /* delete */
+ if (j == 400) {
+ start = test->index[this->id + this->del];
+ end = test->last[this->id + this->del];
+ mtree_store_range(mt, start, end, NULL, GFP_KERNEL);
+ }
+
+ /* add */
+ if (j == 500) {
+ start = test->index[this->id + this->add];
+ end = test->last[this->id + this->add];
+ mtree_store_range(mt, start, end,
+ xa_mk_value(start), GFP_KERNEL);
+ }
+ }
+ usleep(test->pause);
+ /* If a test fails, don't flood the console */
+ if (test->stop)
+ break;
+ }
+}
+
+/*
+ * This is to check:
+ * 1. Range that is not ever present
+ * 2. Range that is always present
+ * 3. Things being added but not removed.
+ * 4. Things being removed but not added.
+ * 5. Things are being added and removed, searches my succeed or fail
+ *
+ * This sets up two readers for every 10 entries; one forward and one reverse
+ * reading.
+ */
+static void rcu_stress(struct maple_tree *mt, bool forward)
+{
+ unsigned int count, i;
+ unsigned long r, seed;
+ pthread_t readers[RCU_RANGE_COUNT / 5];
+ struct rcu_test_struct2 test;
+ struct rcu_reader_struct test_reader[RCU_RANGE_COUNT / 5];
+ void *(*function)(void *);
+
+ /* Test setup */
+ test.mt = mt;
+ test.pause = 5;
+ test.seen_toggle = 0;
+ test.seen_deleted = 0;
+ test.seen_added = 0;
+ test.seen_modified = 0;
+ test.thread_count = 0;
+ test.start = test.stop = false;
+ seed = time(NULL);
+ srand(seed);
+ for (i = 0; i < RCU_RANGE_COUNT; i++) {
+ r = seed + rand();
+ mtree_store_range(mt, seed, r,
+ xa_mk_value(seed), GFP_KERNEL);
+
+ /* Record start and end of entry */
+ test.index[i] = seed;
+ test.last[i] = r;
+ seed = 1 + r + rand() % 10;
+ }
+
+ i = count = ARRAY_SIZE(readers);
+ while (i--) {
+ unsigned long id;
+
+ id = i / 2 * 10;
+ if (i % 2)
+ function = rcu_reader_fwd;
+ else
+ function = rcu_reader_rev;
+
+ rcu_reader_setup(&test_reader[i], id, &test);
+ if (pthread_create(&readers[i], NULL, *function,
+ &test_reader[i])) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ struct rcu_reader_struct *this = &test_reader[i];
+ int add = this->id + this->add;
+
+ /* Remove add entries from the tree for later addition */
+ mtree_store_range(mt, test.index[add], test.last[add],
+ NULL, GFP_KERNEL);
+ }
+
+ mt_set_in_rcu(mt);
+ do {
+ usleep(5);
+ } while (test.thread_count > ARRAY_SIZE(readers));
+
+ if (forward)
+ rcu_stress_fwd(mt, &test, count, test_reader);
+ else
+ rcu_stress_rev(mt, &test, count, test_reader);
+
+ test.stop = true;
+ while (count--)
+ pthread_join(readers[count], NULL);
+
+ mt_validate(mt);
+}
+
+
+struct rcu_test_struct {
+ struct maple_tree *mt; /* the maple tree */
+ int count; /* Number of times to check value(s) */
+ unsigned long index; /* The first index to check */
+ void *entry1; /* The first entry value */
+ void *entry2; /* The second entry value */
+ void *entry3; /* The third entry value */
+
+ bool update_2;
+ bool update_3;
+ unsigned long range_start;
+ unsigned long range_end;
+ unsigned int loop_sleep;
+ unsigned int val_sleep;
+
+ unsigned int failed; /* failed detection for other threads */
+ unsigned int seen_entry2; /* Number of threads that have seen the new value */
+ unsigned int seen_entry3; /* Number of threads that have seen the new value */
+ unsigned int seen_both; /* Number of threads that have seen both new values */
+ unsigned int seen_toggle;
+ unsigned int seen_added;
+ unsigned int seen_removed;
+ unsigned long last; /* The end of the range to write. */
+
+ unsigned long removed; /* The index of the removed entry */
+ unsigned long added; /* The index of the removed entry */
+ unsigned long toggle; /* The index of the removed entry */
+};
+
+static inline
+int eval_rcu_entry(struct rcu_test_struct *test, void *entry, bool *update_2,
+ bool *update_3)
+{
+ if (entry == test->entry1)
+ return 0;
+
+ if (entry == test->entry2) {
+ if (!(*update_2)) {
+ uatomic_inc(&test->seen_entry2);
+ *update_2 = true;
+ if (update_3)
+ uatomic_inc(&test->seen_both);
+ }
+ return 0;
+ }
+
+ if (entry == test->entry3) {
+ if (!(*update_3)) {
+ uatomic_inc(&test->seen_entry3);
+ *update_3 = true;
+ if (update_2)
+ uatomic_inc(&test->seen_both);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * rcu_val() - Read a given value in the tree test->count times using the
+ * regular API
+ *
+ * @ptr: The pointer to the rcu_test_struct
+ */
+static void *rcu_val(void *ptr)
+{
+ struct rcu_test_struct *test = (struct rcu_test_struct *)ptr;
+ unsigned long count = test->count;
+ bool update_2 = false;
+ bool update_3 = false;
+ void *entry;
+
+ rcu_register_thread();
+ while (count--) {
+ usleep(test->val_sleep);
+ /*
+ * No locking required, regular API locking is handled in the
+ * maple tree code
+ */
+ entry = mtree_load(test->mt, test->index);
+ MT_BUG_ON(test->mt, eval_rcu_entry(test, entry, &update_2,
+ &update_3));
+ }
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/*
+ * rcu_loop() - Loop over a section of the maple tree, checking for an expected
+ * value using the advanced API
+ *
+ * @ptr - The pointer to the rcu_test_struct
+ */
+static void *rcu_loop(void *ptr)
+{
+ struct rcu_test_struct *test = (struct rcu_test_struct *)ptr;
+ unsigned long count = test->count;
+ void *entry, *expected;
+ bool update_2 = false;
+ bool update_3 = false;
+ MA_STATE(mas, test->mt, test->range_start, test->range_start);
+
+ rcu_register_thread();
+
+ /*
+ * Loop through the test->range_start - test->range_end test->count
+ * times
+ */
+ while (count--) {
+ usleep(test->loop_sleep);
+ rcu_read_lock();
+ mas_for_each(&mas, entry, test->range_end) {
+ /* The expected value is based on the start range. */
+ expected = xa_mk_value(mas.index ? mas.index / 10 : 0);
+
+ /* Out of the interesting range */
+ if (mas.index < test->index || mas.index > test->last) {
+ if (entry != expected) {
+ printk("%lx - %lx = %p not %p\n",
+ mas.index, mas.last, entry, expected);
+ }
+ MT_BUG_ON(test->mt, entry != expected);
+ continue;
+ }
+
+ if (entry == expected)
+ continue; /* Not seen. */
+
+ /* In the interesting range */
+ MT_BUG_ON(test->mt, eval_rcu_entry(test, entry,
+ &update_2,
+ &update_3));
+ }
+ rcu_read_unlock();
+ mas_set(&mas, test->range_start);
+ }
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static noinline
+void run_check_rcu(struct maple_tree *mt, struct rcu_test_struct *vals)
+{
+
+ int i;
+ void *(*function)(void *);
+ pthread_t readers[20];
+
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_in_rcu(mt));
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ if (i % 2)
+ function = rcu_loop;
+ else
+ function = rcu_val;
+
+ if (pthread_create(&readers[i], NULL, *function, vals)) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ usleep(5); /* small yield to ensure all threads are at least started. */
+ mtree_store_range(mt, vals->index, vals->last, vals->entry2,
+ GFP_KERNEL);
+ while (i--)
+ pthread_join(readers[i], NULL);
+
+ /* Make sure the test caught at least one update. */
+ MT_BUG_ON(mt, !vals->seen_entry2);
+}
+
+static void *rcu_slot_store_reader(void *ptr)
+{
+ struct rcu_test_struct3 *test = ptr;
+ MA_STATE(mas, test->mt, test->index, test->index);
+
+ rcu_register_thread();
+
+ rcu_read_lock();
+ while (!test->stop) {
+ mas_walk(&mas);
+ /* The length of growth to both sides must be equal. */
+ RCU_MT_BUG_ON(test, (test->index - mas.index) !=
+ (mas.last - test->last));
+ }
+ rcu_read_unlock();
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static noinline void run_check_rcu_slot_store(struct maple_tree *mt)
+{
+ pthread_t readers[20];
+ int range_cnt = 200, i, limit = 10000;
+ unsigned long len = ULONG_MAX / range_cnt, start, end;
+ struct rcu_test_struct3 test = {.stop = false, .mt = mt};
+
+ start = range_cnt / 2 * len;
+ end = start + len - 1;
+ test.index = start;
+ test.last = end;
+
+ for (i = 0; i < range_cnt; i++) {
+ mtree_store_range(mt, i * len, i * len + len - 1,
+ xa_mk_value(i * 100), GFP_KERNEL);
+ }
+
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_in_rcu(mt));
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ if (pthread_create(&readers[i], NULL, rcu_slot_store_reader,
+ &test)) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ usleep(5);
+
+ while (limit--) {
+ /* Step by step, expand the most middle range to both sides. */
+ mtree_store_range(mt, --start, ++end, xa_mk_value(100),
+ GFP_KERNEL);
+ }
+
+ test.stop = true;
+
+ while (i--)
+ pthread_join(readers[i], NULL);
+
+ mt_validate(mt);
+}
+
+static noinline
+void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
+{
+
+ int i;
+ void *(*function)(void *);
+ pthread_t readers[30];
+ unsigned int index = vals->index;
+
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_in_rcu(mt));
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ if (i % 2)
+ function = rcu_loop;
+ else
+ function = rcu_val;
+
+ if (pthread_create(&readers[i], NULL, *function, vals)) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ usleep(3); /* small yield to ensure all threads are at least started. */
+
+ while (index <= vals->last) {
+ mtree_store(mt, index,
+ (index % 2 ? vals->entry2 : vals->entry3),
+ GFP_KERNEL);
+ index++;
+ usleep(2);
+ }
+
+ while (i--)
+ pthread_join(readers[i], NULL);
+
+ /* Make sure the test caught at least one update. */
+ MT_BUG_ON(mt, !vals->seen_entry2);
+ MT_BUG_ON(mt, !vals->seen_entry3);
+ MT_BUG_ON(mt, !vals->seen_both);
+}
+
+static noinline void __init check_rcu_simulated(struct maple_tree *mt)
+{
+ unsigned long i, nr_entries = 1000;
+ unsigned long target = 4320;
+ unsigned long val = 0xDEAD;
+
+ MA_STATE(mas_writer, mt, 0, 0);
+ MA_STATE(mas_reader, mt, target, target);
+
+ rcu_register_thread();
+
+ mt_set_in_rcu(mt);
+ mas_lock(&mas_writer);
+ for (i = 0; i <= nr_entries; i++) {
+ mas_writer.index = i * 10;
+ mas_writer.last = i * 10 + 5;
+ mas_store_gfp(&mas_writer, xa_mk_value(i), GFP_KERNEL);
+ }
+ mas_unlock(&mas_writer);
+
+ /* Overwrite one entry with a new value. */
+ mas_set_range(&mas_writer, target, target + 5);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ mas_reset(&mas_reader);
+
+
+ /* Overwrite 1/2 the entry */
+ mas_set_range(&mas_writer, target, target + 2);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(val));
+ rcu_read_unlock();
+
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ mas_reset(&mas_reader);
+
+ /* Overwrite last 1/2 the entry */
+ mas_set_range(&mas_writer, target + 2, target + 5);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ rcu_read_unlock();
+
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ mas_reset(&mas_reader);
+
+ /* Overwrite more than the entry */
+ mas_set_range(&mas_writer, target - 5, target + 15);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ mas_reset(&mas_reader);
+
+ /* Overwrite more than the node. */
+ mas_set_range(&mas_writer, target - 400, target + 400);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ mas_reset(&mas_reader);
+
+ /* Overwrite the tree */
+ mas_set_range(&mas_writer, 0, ULONG_MAX);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ /* Clear out tree & recreate it */
+ mas_lock(&mas_writer);
+ mas_set_range(&mas_writer, 0, ULONG_MAX);
+ mas_store_gfp(&mas_writer, NULL, GFP_KERNEL);
+ mas_set_range(&mas_writer, 0, 0);
+ for (i = 0; i <= nr_entries; i++) {
+ mas_writer.index = i * 10;
+ mas_writer.last = i * 10 + 5;
+ mas_store_gfp(&mas_writer, xa_mk_value(i), GFP_KERNEL);
+ }
+ mas_unlock(&mas_writer);
+
+ /* next check */
+ /* Overwrite one entry with a new value. */
+ mas_reset(&mas_reader);
+ mas_set_range(&mas_writer, target, target + 5);
+ mas_set_range(&mas_reader, target, target);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_prev(&mas_reader, 0);
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_next(&mas_reader, ULONG_MAX) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ /* Restore value. */
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(target/10), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+
+ /* prev check */
+ /* Overwrite one entry with a new value. */
+ mas_reset(&mas_reader);
+ mas_set_range(&mas_writer, target, target + 5);
+ mas_set_range(&mas_reader, target, target);
+ rcu_read_lock();
+ MT_BUG_ON(mt, mas_walk(&mas_reader) != xa_mk_value(target/10));
+ mas_next(&mas_reader, ULONG_MAX);
+ mas_lock(&mas_writer);
+ mas_store_gfp(&mas_writer, xa_mk_value(val), GFP_KERNEL);
+ mas_unlock(&mas_writer);
+ MT_BUG_ON(mt, mas_prev(&mas_reader, 0) != xa_mk_value(val));
+ rcu_read_unlock();
+
+ rcu_unregister_thread();
+}
+
+static noinline void __init check_rcu_threaded(struct maple_tree *mt)
+{
+ unsigned long i, nr_entries = 1000;
+ struct rcu_test_struct vals;
+
+ vals.val_sleep = 200;
+ vals.loop_sleep = 110;
+
+ rcu_register_thread();
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+ /* Store across several slots. */
+ vals.count = 1000;
+ vals.mt = mt;
+ vals.index = 8650;
+ vals.last = 8666;
+ vals.entry1 = xa_mk_value(865);
+ vals.entry2 = xa_mk_value(8650);
+ vals.entry3 = xa_mk_value(8650);
+ vals.range_start = 0;
+ vals.range_end = ULONG_MAX;
+ vals.seen_entry2 = 0;
+ vals.seen_entry3 = 0;
+
+ run_check_rcu(mt, &vals);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ /* 4390-4395: value 439 (0x1b7) [0x36f] */
+ /* Store across several slots. */
+ /* Spanning store. */
+ vals.count = 10000;
+ vals.mt = mt;
+ vals.index = 4390;
+ vals.last = 4398;
+ vals.entry1 = xa_mk_value(4390);
+ vals.entry2 = xa_mk_value(439);
+ vals.entry3 = xa_mk_value(439);
+ vals.seen_entry2 = 0;
+ vals.range_start = 4316;
+ vals.range_end = 5035;
+ run_check_rcu(mt, &vals);
+ mtree_destroy(mt);
+
+ /* Check expanding range in RCU mode */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ run_check_rcu_slot_store(mt);
+ mtree_destroy(mt);
+
+ /* Forward writer for rcu stress */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ rcu_stress(mt, true);
+ mtree_destroy(mt);
+
+ /* Reverse writer for rcu stress */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ rcu_stress(mt, false);
+ mtree_destroy(mt);
+
+ /* Slow reader test with spanning store. */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= nr_entries; i++)
+ mtree_store_range(mt, i*10, i*10 + 5,
+ xa_mk_value(i), GFP_KERNEL);
+
+ /* 4390-4395: value 439 (0x1b7) [0x36f] */
+ /* Store across several slots. */
+ /* Spanning store. */
+ vals.count = 15000;
+ vals.mt = mt;
+ vals.index = 4390;
+ vals.last = 4398;
+ vals.entry1 = xa_mk_value(4390);
+ vals.entry2 = xa_mk_value(439);
+ vals.entry3 = xa_mk_value(4391);
+ vals.seen_toggle = 0;
+ vals.seen_added = 0;
+ vals.seen_removed = 0;
+ vals.range_start = 4316;
+ vals.range_end = 5035;
+ vals.removed = 4360;
+ vals.added = 4396;
+ vals.toggle = 4347;
+ vals.val_sleep = 400;
+ vals.loop_sleep = 200;
+ vals.seen_entry2 = 0;
+ vals.seen_entry3 = 0;
+ vals.seen_both = 0;
+ vals.entry3 = xa_mk_value(438);
+
+ run_check_rcu_slowread(mt, &vals);
+ rcu_unregister_thread();
+}
+/* End of RCU stress testing */
+
+/* Check tree structure by depth first searching */
+static void mas_dfs_preorder(struct ma_state *mas)
+{
+
+ struct maple_enode *prev;
+ unsigned char end, slot = 0;
+ unsigned long *pivots;
+
+ if (mas->status == ma_start) {
+ mas_start(mas);
+ return;
+ }
+
+ if (mte_is_leaf(mas->node) && mte_is_root(mas->node))
+ goto done;
+
+walk_up:
+ end = mas_data_end(mas);
+ if (mte_is_leaf(mas->node) ||
+ (slot > end)) {
+ if (mte_is_root(mas->node))
+ goto done;
+
+ slot = mte_parent_slot(mas->node) + 1;
+ mas_ascend(mas);
+ goto walk_up;
+ }
+
+ prev = mas->node;
+ mas->node = mas_get_slot(mas, slot);
+ if (!mas->node || slot > end) {
+ if (mte_is_root(prev))
+ goto done;
+
+ mas->node = prev;
+ slot = mte_parent_slot(mas->node) + 1;
+ mas_ascend(mas);
+ goto walk_up;
+ }
+ pivots = ma_pivots(mte_to_node(prev), mte_node_type(prev));
+ mas->max = mas_safe_pivot(mas, pivots, slot, mte_node_type(prev));
+ mas->min = mas_safe_min(mas, pivots, slot);
+
+ return;
+done:
+ mas->status = ma_none;
+}
+
+
+static void check_dfs_preorder(struct maple_tree *mt)
+{
+ unsigned long e, count = 0, max = 1000;
+
+ MA_STATE(mas, mt, 0, 0);
+
+ if (MAPLE_32BIT)
+ e = 37;
+ else
+ e = 74;
+
+ check_seq(mt, max, false);
+ do {
+ count++;
+ mas_dfs_preorder(&mas);
+ } while (!mas_is_none(&mas));
+ MT_BUG_ON(mt, count != e);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mas_reset(&mas);
+ count = 0;
+ if (!MAPLE_32BIT)
+ e = 77;
+
+ check_seq(mt, max, false);
+ do {
+ count++;
+ mas_dfs_preorder(&mas);
+ } while (!mas_is_none(&mas));
+ /*printk("count %lu\n", count); */
+ MT_BUG_ON(mt, count != e);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mas_reset(&mas);
+ count = 0;
+ check_rev_seq(mt, max, false);
+ do {
+ count++;
+ mas_dfs_preorder(&mas);
+ } while (!mas_is_none(&mas));
+ /*printk("count %lu\n", count); */
+ MT_BUG_ON(mt, count != e);
+ mtree_destroy(mt);
+
+ rcu_barrier();
+ /*
+ * pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n",
+ * max, mt_get_alloc_size()/1024, mt_nr_allocated(),
+ * mt_nr_tallocated());
+ */
+
+}
+/* End of depth first search tests */
+
+/* get height of the lowest non-leaf node with free space */
+static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+ char vacant_height = 0;
+ enum maple_type type;
+ unsigned long *pivots;
+ unsigned long min = 0;
+ unsigned long max = ULONG_MAX;
+ unsigned char offset;
+
+ /* start traversal */
+ mas_reset(mas);
+ mas_start(mas);
+ if (!xa_is_node(mas_root(mas)))
+ return 0;
+
+ type = mte_node_type(mas->node);
+ wr_mas->type = type;
+ while (!ma_is_leaf(type)) {
+ mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
+ offset = mas->offset;
+ mas->end = mas_data_end(mas);
+ pivots = ma_pivots(mte_to_node(mas->node), type);
+
+ if (pivots) {
+ if (offset)
+ min = pivots[mas->offset - 1];
+ if (offset < mas->end)
+ max = pivots[mas->offset];
+ }
+ wr_mas->r_max = offset < mas->end ? pivots[offset] : mas->max;
+
+ /* detect spanning write */
+ if (mas_is_span_wr(wr_mas))
+ break;
+
+ if (mas->end < mt_slot_count(mas->node) - 1)
+ vacant_height = mas->depth + 1;
+
+ mas_descend(mas);
+ type = mte_node_type(mas->node);
+ mas->depth++;
+ }
+
+ return vacant_height;
+}
+
+static int mas_allocated(struct ma_state *mas)
+{
+ int total = 0;
+
+ if (mas->alloc)
+ total++;
+
+ if (mas->sheaf)
+ total += kmem_cache_sheaf_size(mas->sheaf);
+
+ return total;
+}
+/* Preallocation testing */
+static noinline void __init check_prealloc(struct maple_tree *mt)
+{
+ unsigned long i, max = 100;
+ unsigned long allocated;
+ unsigned char height;
+ unsigned char vacant_height;
+ struct maple_node *mn;
+ void *ptr = check_prealloc;
+ MA_STATE(mas, mt, 10, 20);
+ MA_WR_STATE(wr_mas, &mas, ptr);
+
+ mt_set_non_kernel(1000);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ /* Spanning store */
+ mas_set_range(&mas, 470, 500);
+
+ mas_wr_preallocate(&wr_mas, ptr);
+ MT_BUG_ON(mt, mas.store_type != wr_spanning_store);
+ MT_BUG_ON(mt, mas_is_err(&mas));
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated == 0);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
+ mas_destroy(&mas);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+
+ mas_wr_preallocate(&wr_mas, ptr);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated == 0);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ mas_destroy(&mas);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+
+
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
+ mn = mas_pop_node(&mas);
+ MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
+ mn->parent = ma_parent_ptr(mn);
+ ma_free_rcu(mn);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ mas_destroy(&mas);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
+ mn = mas_pop_node(&mas);
+ MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ mas_destroy(&mas);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mn->parent = ma_parent_ptr(mn);
+ ma_free_rcu(mn);
+
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
+ mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+
+ /* Slot store does not need allocations */
+ mas_set_range(&mas, 6, 9);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+
+ mas_set_range(&mas, 6, 10);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+
+ /* Split */
+ mas_set_range(&mas, 54, 54);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
+ mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+ mt_set_non_kernel(1);
+ /* Spanning store */
+ mas_set_range(&mas, 1, 100);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mas_destroy(&mas);
+
+
+ /* Spanning store */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated == 0);
+ /*
+ * vacant height cannot be used to compute the number of nodes needed
+ * as the root contains two entries which means it is on the verge of
+ * insufficiency. The worst case full height of the tree is needed.
+ */
+ MT_BUG_ON(mt, allocated != height * 3 + 1);
+ mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+ mas_set_range(&mas, 0, 200);
+ mt_set_non_kernel(1);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
+ allocated = mas_allocated(&mas);
+ height = mas_mt_height(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+
+ /* Chaining multiple preallocations */
+ mt_set_in_rcu(mt);
+ mas_set_range(&mas, 800, 805); /* Slot store, should be 0 allocations */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mas.last = 809; /* Node store */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 1);
+ mas_store_prealloc(&mas, ptr);
+}
+/* End of preallocation testing */
+
+/* Spanning writes, writes that span nodes and layers of the tree */
+static noinline void __init check_spanning_write(struct maple_tree *mt)
+{
+ unsigned long i, max = 5000;
+ MA_STATE(mas, mt, 1200, 2380);
+
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 1205);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ for (i = 1; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mtree_lock(mt);
+ mas_set_range(&mas, 9, 50006); /* Will expand to 0 - ULONG_MAX */
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 1205);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mt_validate(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning store that requires a right cousin rebalance */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 0, 12900); /* Spans more than 2 levels */
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 1205);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test non-alloc tree spanning store */
+ mt_init_flags(mt, 0);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 0, 300);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 15);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning store that requires a right sibling rebalance */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 0, 12865);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 15);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning store that requires a left sibling rebalance */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 90, 13665);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 95);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning store that requires a left cousin rebalance */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 46805, 49995);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 46815);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /*
+ * Test spanning store that requires a left cousin rebalance all the way
+ * to root
+ */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mas_set_range(&mas, 32395, 49995);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 46815);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /*
+ * Test spanning store that requires a right cousin rebalance all the
+ * way to root
+ */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+ mas_set_range(&mas, 38875, 43190);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 38900);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning store ending at full node (depth 2)*/
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+ mtree_lock(mt);
+ mas_set(&mas, 47606);
+ mas_store_gfp(&mas, check_spanning_write, GFP_KERNEL);
+ mas_set(&mas, 47607);
+ mas_store_gfp(&mas, check_spanning_write, GFP_KERNEL);
+ mas_set(&mas, 47608);
+ mas_store_gfp(&mas, check_spanning_write, GFP_KERNEL);
+ mas_set(&mas, 47609);
+ mas_store_gfp(&mas, check_spanning_write, GFP_KERNEL);
+ /* Ensure the parent node is full */
+ mas_ascend(&mas);
+ MT_BUG_ON(mt, (mas_data_end(&mas)) != mt_slot_count(mas.node) - 1);
+ mas_set_range(&mas, 11516, 48940);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ /* Test spanning write with many levels of no siblings */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+ mas_set_range(&mas, 43200, 49999);
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mas_set(&mas, 43200);
+ MT_BUG_ON(mt, mas_walk(&mas) != NULL);
+ mtree_unlock(mt);
+ mtree_destroy(mt);
+
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i <= 100; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+
+ mtree_lock(mt);
+ mas_set_range(&mas, 76, 875);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ mtree_unlock(mt);
+}
+/* End of spanning write testing */
+
+/* Writes to a NULL area that are adjacent to other NULLs */
+static noinline void __init check_null_expand(struct maple_tree *mt)
+{
+ unsigned long i, max = 100;
+ unsigned char data_end;
+ MA_STATE(mas, mt, 959, 959);
+
+ for (i = 0; i <= max; i++)
+ mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
+ /* Test expanding null at start. */
+ mas_lock(&mas);
+ mas_walk(&mas);
+ data_end = mas_data_end(&mas);
+ mas_set_range(&mas, 959, 963);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ MT_BUG_ON(mt, mtree_load(mt, 963) != NULL);
+ MT_BUG_ON(mt, data_end != mas_data_end(&mas));
+
+ /* Test expanding null at end. */
+ mas_set(&mas, 880);
+ mas_walk(&mas);
+ data_end = mas_data_end(&mas);
+ mas_set_range(&mas, 884, 887);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ MT_BUG_ON(mt, mtree_load(mt, 884) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 889) != NULL);
+#if CONFIG_64BIT
+ MT_BUG_ON(mt, data_end != mas_data_end(&mas));
+#endif
+
+ /* Test expanding null at start and end. */
+ mas_set(&mas, 890);
+ mas_walk(&mas);
+ data_end = mas_data_end(&mas);
+ mas_set_range(&mas, 900, 905);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ MT_BUG_ON(mt, mtree_load(mt, 899) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 900) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 905) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 906) != NULL);
+#if CONFIG_64BIT
+ MT_BUG_ON(mt, data_end - 2 != mas_data_end(&mas));
+#endif
+
+ /* Test expanding null across multiple slots. */
+ mas_set(&mas, 800);
+ mas_walk(&mas);
+ data_end = mas_data_end(&mas);
+ mas_set_range(&mas, 810, 825);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+ MT_BUG_ON(mt, mtree_load(mt, 809) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 810) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 825) != NULL);
+ MT_BUG_ON(mt, mtree_load(mt, 826) != NULL);
+#if CONFIG_64BIT
+ MT_BUG_ON(mt, data_end - 4 != mas_data_end(&mas));
+#endif
+ mas_unlock(&mas);
+}
+/* End of NULL area expansions */
+
+/* Checking for no memory is best done outside the kernel */
+static noinline void __init check_nomem(struct maple_tree *mt)
+{
+ MA_STATE(ms, mt, 1, 1);
+
+ MT_BUG_ON(mt, !mtree_empty(mt));
+ /* Ensure no bypassing of allocation failures */
+ mt_set_non_kernel(0);
+
+ /* Storing something at 1 requires memory allocation */
+ MT_BUG_ON(mt, mtree_insert(mt, 1, &ms, GFP_ATOMIC) != -ENOMEM);
+ /* Storing something at 0 does not */
+ MT_BUG_ON(mt, mtree_insert(mt, 0, &ms, GFP_ATOMIC) != 0);
+
+ /*
+ * Simulate two threads racing; the first one fails to allocate
+ * memory to insert an entry at 1, then the second one succeeds
+ * in allocating memory to insert an entry at 2. The first one
+ * then needs to free the node it allocated. LeakSanitizer will
+ * notice this, as will the 'nr_allocated' debugging aid in the
+ * userspace test suite.
+ */
+ mtree_lock(mt);
+ mas_store(&ms, &ms); /* insert 1 -> &ms, fails. */
+ MT_BUG_ON(mt, ms.node != MA_ERROR(-ENOMEM));
+ mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */
+ MT_BUG_ON(mt, ms.status != ma_start);
+ mtree_unlock(mt);
+ MT_BUG_ON(mt, mtree_insert(mt, 2, mt, GFP_KERNEL) != 0);
+ mtree_lock(mt);
+ mas_store(&ms, &ms); /* insert 1 -> &ms */
+ mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */
+ mtree_unlock(mt);
+ mas_destroy(&ms);
+ mtree_destroy(mt);
+}
+
+static noinline void __init check_locky(struct maple_tree *mt)
+{
+ MA_STATE(ms, mt, 2, 2);
+ MA_STATE(reader, mt, 2, 2);
+
+ mt_set_non_kernel(2);
+ mt_set_in_rcu(mt);
+ mas_lock(&ms);
+ mas_store(&ms, &ms);
+ mas_set_range(&ms, 1, 3);
+ mas_store(&ms, &reader);
+ mas_unlock(&ms);
+ mt_clear_in_rcu(mt);
+}
+
+/*
+ * Compares two nodes except for the addresses stored in the nodes.
+ * Returns zero if they are the same, otherwise returns non-zero.
+ */
+static int __init compare_node(struct maple_enode *enode_a,
+ struct maple_enode *enode_b)
+{
+ struct maple_node *node_a, *node_b;
+ struct maple_node a, b;
+ void **slots_a, **slots_b; /* Do not use the rcu tag. */
+ enum maple_type type;
+ int i;
+
+ if (((unsigned long)enode_a & MAPLE_NODE_MASK) !=
+ ((unsigned long)enode_b & MAPLE_NODE_MASK)) {
+ pr_err("The lower 8 bits of enode are different.\n");
+ return -1;
+ }
+
+ type = mte_node_type(enode_a);
+ node_a = mte_to_node(enode_a);
+ node_b = mte_to_node(enode_b);
+ a = *node_a;
+ b = *node_b;
+
+ /* Do not compare addresses. */
+ if (ma_is_root(node_a) || ma_is_root(node_b)) {
+ a.parent = (struct maple_pnode *)((unsigned long)a.parent &
+ MA_ROOT_PARENT);
+ b.parent = (struct maple_pnode *)((unsigned long)b.parent &
+ MA_ROOT_PARENT);
+ } else {
+ a.parent = (struct maple_pnode *)((unsigned long)a.parent &
+ MAPLE_NODE_MASK);
+ b.parent = (struct maple_pnode *)((unsigned long)b.parent &
+ MAPLE_NODE_MASK);
+ }
+
+ if (a.parent != b.parent) {
+ pr_err("The lower 8 bits of parents are different. %p %p\n",
+ a.parent, b.parent);
+ return -1;
+ }
+
+ /*
+ * If it is a leaf node, the slots do not contain the node address, and
+ * no special processing of slots is required.
+ */
+ if (ma_is_leaf(type))
+ goto cmp;
+
+ slots_a = ma_slots(&a, type);
+ slots_b = ma_slots(&b, type);
+
+ for (i = 0; i < mt_slots[type]; i++) {
+ if (!slots_a[i] && !slots_b[i])
+ break;
+
+ if (!slots_a[i] || !slots_b[i]) {
+ pr_err("The number of slots is different.\n");
+ return -1;
+ }
+
+ /* Do not compare addresses in slots. */
+ ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK;
+ ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK;
+ }
+
+cmp:
+ /*
+ * Compare all contents of two nodes, including parent (except address),
+ * slots (except address), pivots, gaps and metadata.
+ */
+ return memcmp(&a, &b, sizeof(struct maple_node));
+}
+
+/*
+ * Compare two trees and return 0 if they are the same, non-zero otherwise.
+ */
+static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b)
+{
+ MA_STATE(mas_a, mt_a, 0, 0);
+ MA_STATE(mas_b, mt_b, 0, 0);
+
+ if (mt_a->ma_flags != mt_b->ma_flags) {
+ pr_err("The flags of the two trees are different.\n");
+ return -1;
+ }
+
+ mas_dfs_preorder(&mas_a);
+ mas_dfs_preorder(&mas_b);
+
+ if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) {
+ if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) {
+ pr_err("One is ma_root and the other is not.\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) {
+
+ if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) {
+ pr_err("One is ma_none and the other is not.\n");
+ return -1;
+ }
+
+ if (mas_a.min != mas_b.min ||
+ mas_a.max != mas_b.max) {
+ pr_err("mas->min, mas->max do not match.\n");
+ return -1;
+ }
+
+ if (compare_node(mas_a.node, mas_b.node)) {
+ pr_err("The contents of nodes %p and %p are different.\n",
+ mas_a.node, mas_b.node);
+ mt_dump(mt_a, mt_dump_dec);
+ mt_dump(mt_b, mt_dump_dec);
+ return -1;
+ }
+
+ mas_dfs_preorder(&mas_a);
+ mas_dfs_preorder(&mas_b);
+ }
+
+ return 0;
+}
+
+static __init void mas_subtree_max_range(struct ma_state *mas)
+{
+ unsigned long limit = mas->max;
+ MA_STATE(newmas, mas->tree, 0, 0);
+ void *entry;
+
+ mas_for_each(mas, entry, limit) {
+ if (mas->last - mas->index >=
+ newmas.last - newmas.index) {
+ newmas = *mas;
+ }
+ }
+
+ *mas = newmas;
+}
+
+/*
+ * build_full_tree() - Build a full tree.
+ * @mt: The tree to build.
+ * @flags: Use @flags to build the tree.
+ * @height: The height of the tree to build.
+ *
+ * Build a tree with full leaf nodes and internal nodes. Note that the height
+ * should not exceed 3, otherwise it will take a long time to build.
+ * Return: zero if the build is successful, non-zero if it fails.
+ */
+static __init int build_full_tree(struct maple_tree *mt, unsigned int flags,
+ int height)
+{
+ MA_STATE(mas, mt, 0, 0);
+ unsigned long step;
+ int ret = 0, cnt = 1;
+ enum maple_type type;
+
+ mt_init_flags(mt, flags);
+ mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL);
+
+ mtree_lock(mt);
+
+ while (1) {
+ mas_set(&mas, 0);
+ if (mt_height(mt) < height) {
+ mas.max = ULONG_MAX;
+ goto store;
+ }
+
+ while (1) {
+ mas_dfs_preorder(&mas);
+ if (mas_is_none(&mas))
+ goto unlock;
+
+ type = mte_node_type(mas.node);
+ if (mas_data_end(&mas) + 1 < mt_slots[type]) {
+ mas_set(&mas, mas.min);
+ goto store;
+ }
+ }
+store:
+ mas_subtree_max_range(&mas);
+ step = mas.last - mas.index;
+ if (step < 1) {
+ ret = -1;
+ goto unlock;
+ }
+
+ step /= 2;
+ mas.last = mas.index + step;
+ mas_store_gfp(&mas, xa_mk_value(5),
+ GFP_KERNEL);
+ ++cnt;
+ }
+unlock:
+ mtree_unlock(mt);
+
+ MT_BUG_ON(mt, mt_height(mt) != height);
+ /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */
+ return ret;
+}
+
+static noinline void __init check_mtree_dup(struct maple_tree *mt)
+{
+ DEFINE_MTREE(new);
+ int i, j, ret, count = 0;
+ unsigned int rand_seed = 17, rand;
+
+ /* store a value at [0, 0] */
+ mt_init_flags(mt, 0);
+ mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* The two trees have different attributes. */
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret != -EINVAL);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* The new tree is not empty */
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret != -EINVAL);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* Test for duplicating full trees. */
+ for (i = 1; i <= 3; i++) {
+ ret = build_full_tree(mt, 0, i);
+ MT_BUG_ON(mt, ret);
+ mt_init_flags(&new, 0);
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ for (i = 1; i <= 3; i++) {
+ ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i);
+ MT_BUG_ON(mt, ret);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* Test for normal duplicating. */
+ for (i = 0; i < 1000; i += 3) {
+ if (i & 1) {
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ } else {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ }
+
+ for (j = 0; j < i; j++) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* Test memory allocation failed. */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i < 30; i += 3) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+
+ /* Failed at the first node. */
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ mt_set_non_kernel(0);
+ ret = mtree_dup(mt, &new, GFP_NOWAIT);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(&new, ret != -ENOMEM);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* Random maple tree fails at a random node. */
+ for (i = 0; i < 1000; i += 3) {
+ if (i & 1) {
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ } else {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ }
+
+ for (j = 0; j < i; j++) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+ /*
+ * The rand() library function is not used, so we can generate
+ * the same random numbers on any platform.
+ */
+ rand_seed = rand_seed * 1103515245 + 12345;
+ rand = rand_seed / 65536 % 128;
+ mt_set_non_kernel(rand);
+
+ ret = mtree_dup(mt, &new, GFP_NOWAIT);
+ mt_set_non_kernel(0);
+ if (ret != 0) {
+ MT_BUG_ON(&new, ret != -ENOMEM);
+ count++;
+ mtree_destroy(mt);
+ continue;
+ }
+
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* pr_info("mtree_dup() fail %d times\n", count); */
+ BUG_ON(!count);
+}
+
+extern void test_kmem_cache_bulk(void);
+
+static inline void check_spanning_store_height(struct maple_tree *mt)
+{
+ int index = 0;
+ int last = 140;
+ MA_STATE(mas, mt, 0, 0);
+ mas_lock(&mas);
+ while (mt_height(mt) != 3) {
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ mas_set(&mas, ++index);
+ }
+
+ if (MAPLE_32BIT)
+ last = 155; /* 32 bit higher branching factor. */
+
+ mas_set_range(&mas, 90, last);
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
+ mas_unlock(&mas);
+}
+
+/*
+ * Test to check the path of a spanning rebalance which results in
+ * a collapse where the rebalancing of the child node leads to
+ * insufficieny in the parent node.
+ */
+static void check_collapsing_rebalance(struct maple_tree *mt)
+{
+ int i = 0;
+ MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
+
+ /* create a height 6 tree */
+ while (mt_height(mt) < 6) {
+ mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
+ i += 9;
+ }
+
+ /* delete all entries one at a time, starting from the right */
+ do {
+ mas_erase(&mas);
+ } while (mas_prev(&mas, 0) != NULL);
+
+ mtree_unlock(mt);
+}
+
+/* callback function used for check_nomem_writer_race() */
+static void writer2(void *maple_tree)
+{
+ struct maple_tree *mt = (struct maple_tree *)maple_tree;
+ MA_STATE(mas, mt, 6, 10);
+
+ mtree_lock(mas.tree);
+ mas_store(&mas, xa_mk_value(0xC));
+ mas_destroy(&mas);
+ mtree_unlock(mas.tree);
+}
+
+/*
+ * check_nomem_writer_race() - test a possible race in the mas_nomem() path
+ * @mt: The tree to build.
+ *
+ * There is a possible race condition in low memory conditions when mas_nomem()
+ * gives up its lock. A second writer can chagne the entry that the primary
+ * writer executing the mas_nomem() path is modifying. This test recreates this
+ * scenario to ensure we are handling it correctly.
+ */
+static void check_nomem_writer_race(struct maple_tree *mt)
+{
+ MA_STATE(mas, mt, 0, 5);
+
+ mt_set_non_kernel(0);
+ /* setup root with 2 values with NULL in between */
+ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
+ mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
+ mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL);
+
+ /* setup writer 2 that will trigger the race condition */
+ mt_set_private(mt);
+ mt_set_callback(writer2);
+
+ mtree_lock(mt);
+ /* erase 0-5 */
+ mas_erase(&mas);
+
+ /* index 6-10 should retain the value from writer 2 */
+ check_load(mt, 6, xa_mk_value(0xC));
+ mtree_unlock(mt);
+
+ mt_set_non_kernel(0);
+ /* test for the same race but with mas_store_gfp() */
+ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
+ mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
+
+ mas_set_range(&mas, 0, 5);
+
+ /* setup writer 2 that will trigger the race condition */
+ mt_set_private(mt);
+ mt_set_callback(writer2);
+
+ mtree_lock(mt);
+ mas_store_gfp(&mas, NULL, GFP_KERNEL);
+
+ /* ensure write made by writer 2 is retained */
+ check_load(mt, 6, xa_mk_value(0xC));
+
+ mt_set_private(NULL);
+ mt_set_callback(NULL);
+ mtree_unlock(mt);
+}
+
+ /* test to simulate expanding a vma from [0x7fffffffe000, 0x7ffffffff000)
+ * to [0x7ffde4ca1000, 0x7ffffffff000) and then shrinking the vma to
+ * [0x7ffde4ca1000, 0x7ffde4ca2000)
+ */
+static inline int check_vma_modification(struct maple_tree *mt)
+{
+#if defined(CONFIG_64BIT)
+ MA_STATE(mas, mt, 0, 0);
+
+ mtree_lock(mt);
+ /* vma with old start and old end */
+ __mas_set_range(&mas, 0x7fffffffe000, 0x7ffffffff000 - 1);
+ mas_preallocate(&mas, xa_mk_value(1), GFP_KERNEL);
+ mas_store_prealloc(&mas, xa_mk_value(1));
+
+ /* next write occurs partly in previous range [0, 0x7fffffffe000)*/
+ mas_prev_range(&mas, 0);
+ /* expand vma to {0x7ffde4ca1000, 0x7ffffffff000) */
+ __mas_set_range(&mas, 0x7ffde4ca1000, 0x7ffffffff000 - 1);
+ mas_preallocate(&mas, xa_mk_value(1), GFP_KERNEL);
+ mas_store_prealloc(&mas, xa_mk_value(1));
+
+ /* shrink vma to [0x7ffde4ca1000, 7ffde4ca2000) */
+ __mas_set_range(&mas, 0x7ffde4ca2000, 0x7ffffffff000 - 1);
+ mas_preallocate(&mas, NULL, GFP_KERNEL);
+ mas_store_prealloc(&mas, NULL);
+ mt_dump(mt, mt_dump_hex);
+
+ mas_destroy(&mas);
+ mtree_unlock(mt);
+#endif
+
+ return 0;
+}
+
+
+void farmer_tests(void)
+{
+ struct maple_node *node;
+ DEFINE_MTREE(tree);
+
+ mt_dump(&tree, mt_dump_dec);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | MT_FLAGS_USE_RCU);
+ check_vma_modification(&tree);
+ mtree_destroy(&tree);
+
+ tree.ma_root = xa_mk_value(0);
+ mt_dump(&tree, mt_dump_dec);
+
+ node = mt_alloc_one(GFP_KERNEL);
+ node->parent = (void *)((unsigned long)(&tree) | 1);
+ node->slot[0] = xa_mk_value(0);
+ node->slot[1] = xa_mk_value(1);
+ node->mr64.pivot[0] = 0;
+ node->mr64.pivot[1] = 1;
+ node->mr64.pivot[2] = 0;
+ tree.ma_root = mt_mk_node(node, maple_leaf_64);
+ mt_dump(&tree, mt_dump_dec);
+
+ node->parent = ma_parent_ptr(node);
+ ma_free_rcu(node);
+
+ /* Check things that will make lockdep angry */
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_locky(&tree);
+ mtree_destroy(&tree);
+ test_kmem_cache_bulk();
+
+ mt_init_flags(&tree, 0);
+ check_dfs_preorder(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU);
+ check_nomem_writer_race(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_prealloc(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_write(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_store_height(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_collapsing_rebalance(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_null_expand(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, 0);
+ check_mtree_dup(&tree);
+ mtree_destroy(&tree);
+
+ /* RCU testing */
+ mt_init_flags(&tree, 0);
+ check_erase_testset(&tree);
+ mtree_destroy(&tree);
+
+ if (!MAPLE_32BIT) {
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_rcu_simulated(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_rcu_threaded(&tree);
+ mtree_destroy(&tree);
+ }
+
+
+#if defined(CONFIG_64BIT)
+ /* Captures from VMs that found previous errors */
+ mt_init_flags(&tree, 0);
+ check_erase2_sets(&tree);
+ mtree_destroy(&tree);
+#endif
+
+
+ /* No memory handling */
+ check_nomem(&tree);
+}
+
+static unsigned long get_last_index(struct ma_state *mas)
+{
+ struct maple_node *node = mas_mn(mas);
+ enum maple_type mt = mte_node_type(mas->node);
+ unsigned long *pivots = ma_pivots(node, mt);
+ unsigned long last_index = mas_data_end(mas);
+
+ BUG_ON(last_index == 0);
+
+ return pivots[last_index - 1] + 1;
+}
+
+/*
+ * Assert that we handle spanning stores that consume the entirety of the right
+ * leaf node correctly.
+ */
+static void test_spanning_store_regression(void)
+{
+ unsigned long from = 0, to = 0;
+ DEFINE_MTREE(tree);
+ MA_STATE(mas, &tree, 0, 0);
+
+ /*
+ * Build a 3-level tree. We require a parent node below the root node
+ * and 2 leaf nodes under it, so we can span the entirety of the right
+ * hand node.
+ */
+ build_full_tree(&tree, 0, 3);
+
+ /* Descend into position at depth 2. */
+ mas_reset(&mas);
+ mas_start(&mas);
+ mas_descend(&mas);
+ mas_descend(&mas);
+
+ /*
+ * We need to establish a tree like the below.
+ *
+ * Then we can try a store in [from, to] which results in a spanned
+ * store across nodes B and C, with the maple state at the time of the
+ * write being such that only the subtree at A and below is considered.
+ *
+ * Height
+ * 0 Root Node
+ * / \
+ * pivot = to / \ pivot = ULONG_MAX
+ * / \
+ * 1 A [-----] ...
+ * / \
+ * pivot = from / \ pivot = to
+ * / \
+ * 2 (LEAVES) B [-----] [-----] C
+ * ^--- Last pivot to.
+ */
+ while (true) {
+ unsigned long tmp = get_last_index(&mas);
+
+ if (mas_next_sibling(&mas)) {
+ from = tmp;
+ to = mas.max;
+ } else {
+ break;
+ }
+ }
+
+ BUG_ON(from == 0 && to == 0);
+
+ /* Perform the store. */
+ mas_set_range(&mas, from, to);
+ mas_store_gfp(&mas, xa_mk_value(0xdead), GFP_KERNEL);
+
+ /* If the regression occurs, the validation will fail. */
+ mt_validate(&tree);
+
+ /* Cleanup. */
+ __mt_destroy(&tree);
+}
+
+static void regression_tests(void)
+{
+ test_spanning_store_regression();
+}
+
+void maple_tree_tests(void)
+{
+#if !defined(BENCH)
+ regression_tests();
+ farmer_tests();
+#endif
+ maple_tree_seed();
+ maple_tree_harvest();
+}
+
+int __weak main(void)
+{
+ maple_tree_init();
+ maple_tree_tests();
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 9eae0fb5a67d..eaff1b036989 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -159,7 +159,7 @@ void multiorder_tagged_iteration(struct xarray *xa)
item_kill_tree(xa);
}
-bool stop_iteration = false;
+bool stop_iteration;
static void *creator_func(void *ptr)
{
@@ -201,6 +201,7 @@ static void multiorder_iteration_race(struct xarray *xa)
pthread_t worker_thread[num_threads];
int i;
+ stop_iteration = false;
pthread_create(&worker_thread[0], NULL, &creator_func, xa);
for (i = 1; i < num_threads; i++)
pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
@@ -211,6 +212,65 @@ static void multiorder_iteration_race(struct xarray *xa)
item_kill_tree(xa);
}
+static void *load_creator(void *ptr)
+{
+ /* 'order' is set up to ensure we have sibling entries */
+ unsigned int order;
+ struct radix_tree_root *tree = ptr;
+ int i;
+
+ rcu_register_thread();
+ item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0);
+ item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0);
+ for (i = 0; i < 10000; i++) {
+ for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) {
+ unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
+ (1 << order);
+ item_insert_order(tree, index, order);
+ xa_set_mark(tree, index, XA_MARK_1);
+ item_delete_rcu(tree, index);
+ }
+ }
+ rcu_unregister_thread();
+
+ stop_iteration = true;
+ return NULL;
+}
+
+static void *load_worker(void *ptr)
+{
+ unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1;
+
+ rcu_register_thread();
+ while (!stop_iteration) {
+ unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1;
+ struct item *item = xa_load(ptr, index);
+ assert(!xa_is_internal(item));
+ item = xa_find(ptr, &find_index, index, XA_MARK_1);
+ assert(!xa_is_internal(item));
+ }
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static void load_race(struct xarray *xa)
+{
+ const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+ pthread_t worker_thread[num_threads];
+ int i;
+
+ stop_iteration = false;
+ pthread_create(&worker_thread[0], NULL, &load_creator, xa);
+ for (i = 1; i < num_threads; i++)
+ pthread_create(&worker_thread[i], NULL, &load_worker, xa);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(worker_thread[i], NULL);
+
+ item_kill_tree(xa);
+}
+
static DEFINE_XARRAY(array);
void multiorder_checks(void)
@@ -218,13 +278,23 @@ void multiorder_checks(void)
multiorder_iteration(&array);
multiorder_tagged_iteration(&array);
multiorder_iteration_race(&array);
+ load_race(&array);
radix_tree_cpu_dead(0);
}
-int __weak main(void)
+int __weak main(int argc, char **argv)
{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+ if (opt == 'v')
+ test_verbose++;
+ }
+
+ rcu_register_thread();
radix_tree_init();
multiorder_checks();
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index a61c7bcbc72d..63f468bf8245 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads);
- threads = malloc(nr_threads * sizeof(pthread_t *));
+ threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) {
arg = i;
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
index e61e43efe463..253208a8541b 100644
--- a/tools/testing/radix-tree/xarray.c
+++ b/tools/testing/radix-tree/xarray.c
@@ -4,16 +4,9 @@
* Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
*/
-#define XA_DEBUG
+#include "xarray-shared.h"
#include "test.h"
-#define module_init(x)
-#define module_exit(x)
-#define MODULE_AUTHOR(x)
-#define MODULE_LICENSE(x)
-#define dump_stack() assert(0)
-
-#include "../../../lib/xarray.c"
#undef XA_DEBUG
#include "../../../lib/test_xarray.c"
@@ -25,11 +18,13 @@ void xarray_tests(void)
int __weak main(void)
{
+ rcu_register_thread();
radix_tree_init();
xarray_tests();
radix_tree_cpu_dead(1);
rcu_barrier();
if (nr_allocated)
printf("nr_allocated = %d\n", nr_allocated);
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile
new file mode 100644
index 000000000000..d7bbae2af4c7
--- /dev/null
+++ b/tools/testing/rbtree/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+.PHONY: clean
+
+TARGETS = rbtree_test interval_tree_test
+OFILES = $(SHARED_OFILES) rbtree-shim.o interval_tree-shim.o maple-shim.o
+DEPS = ../../../include/linux/rbtree.h \
+ ../../../include/linux/rbtree_types.h \
+ ../../../include/linux/rbtree_augmented.h \
+ ../../../include/linux/interval_tree.h \
+ ../../../include/linux/interval_tree_generic.h \
+ ../../../lib/rbtree.c \
+ ../../../lib/interval_tree.c
+
+targets: $(TARGETS)
+
+include ../shared/shared.mk
+
+ifeq ($(DEBUG), 1)
+ CFLAGS += -g
+endif
+
+$(TARGETS): $(OFILES)
+
+rbtree-shim.o: $(DEPS)
+rbtree_test.o: ../../../lib/rbtree_test.c
+interval_tree-shim.o: $(DEPS)
+interval_tree-shim.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+interval_tree_test.o: ../../../lib/interval_tree_test.c
+interval_tree_test.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+
+clean:
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/*
diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c
new file mode 100644
index 000000000000..49bc5b534330
--- /dev/null
+++ b/tools/testing/rbtree/interval_tree_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * interval_tree.c: Userspace Interval Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+#include "maple-shared.h"
+
+#include "../../../lib/interval_tree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland interval tree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the interval tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the tree\n");
+ fprintf(stderr, " -q: Number of searches to the interval tree\n");
+ fprintf(stderr, " -s: Number of iterations searching the tree\n");
+ fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n");
+ fprintf(stderr, " -m: Largest value for the interval's endpoint\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void interval_tree_tests(void)
+{
+ interval_tree_test_init();
+ interval_tree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:q:s:am:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'q')
+ nsearches = strtoul(optarg, NULL, 0);
+ else if (opt == 's')
+ search_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'a')
+ search_all = true;
+ else if (opt == 'm')
+ max_endpoint = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ maple_tree_init();
+ interval_tree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c
new file mode 100644
index 000000000000..585c970f679e
--- /dev/null
+++ b/tools/testing/rbtree/rbtree_test.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rbtree_test.c: Userspace Red Black Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+
+#include "../../../lib/rbtree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland rbtree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the rb-tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n");
+ fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void rbtree_tests(void)
+{
+ rbtree_test_init();
+ rbtree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:c:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'c')
+ check_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ rbtree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/test.h b/tools/testing/rbtree/test.h
new file mode 100644
index 000000000000..f1f1b545b55a
--- /dev/null
+++ b/tools/testing/rbtree/test.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+void rbtree_tests(void);
+void interval_tree_tests(void);
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index f9a12005fcea..121ae78d6e88 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -51,7 +51,6 @@ static inline unsigned long page_to_phys(struct page *page)
#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
-#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#define __min(t1, t2, min1, min2, x, y) ({ \
t1 min1 = (x); \
@@ -74,7 +73,7 @@ static inline unsigned long page_to_phys(struct page *page)
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
-#define preemptible() (1)
+#define pagefault_disabled() (0)
static inline void *kmap(struct page *page)
{
@@ -127,7 +126,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
#define kmemleak_free(a)
#define PageSlab(p) (0)
-#define flush_kernel_dcache_page(p)
+#define flush_dcache_page(p)
#define MAX_ERRNO 4095
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 71c960dcd8a4..08465a701cd5 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -55,7 +55,6 @@ int main(void)
struct test *test, tests[] = {
{ -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 },
{ 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 },
- { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 },
{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 },
{ 0, 1, pfn(0), NULL, 1, sgmax, 1 },
{ 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 },
@@ -86,32 +85,46 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) {
int left_pages = test->pfn_app ? test->num_pages : 0;
+ struct sg_append_table append = {};
struct page *pages[MAX_PAGES];
- struct sg_table st;
- struct scatterlist *sg;
+ int ret;
set_pages(pages, test->pfn, test->num_pages);
- sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
- test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
- assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+ if (test->pfn_app)
+ ret = sg_alloc_append_table_from_pages(
+ &append, pages, test->num_pages, 0, test->size,
+ test->max_seg, left_pages, GFP_KERNEL);
+ else
+ ret = sg_alloc_table_from_pages_segment(
+ &append.sgt, pages, test->num_pages, 0,
+ test->size, test->max_seg, GFP_KERNEL);
+
+ assert(ret == test->alloc_ret);
if (test->alloc_ret)
continue;
if (test->pfn_app) {
set_pages(pages, test->pfn_app, test->num_pages);
- sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
- test->size, test->max_seg, sg, 0, GFP_KERNEL);
+ ret = sg_alloc_append_table_from_pages(
+ &append, pages, test->num_pages, 0, test->size,
+ test->max_seg, 0, GFP_KERNEL);
- assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+ assert(ret == test->alloc_ret);
}
- VALIDATE(st.nents == test->expected_segments, &st, test);
+ VALIDATE(append.sgt.nents == test->expected_segments,
+ &append.sgt, test);
if (!test->pfn_app)
- VALIDATE(st.orig_nents == test->expected_segments, &st, test);
-
- sg_free_table(&st);
+ VALIDATE(append.sgt.orig_nents ==
+ test->expected_segments,
+ &append.sgt, test);
+
+ if (test->pfn_app)
+ sg_free_append_table(&append);
+ else
+ sg_free_table(&append.sgt);
}
assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 055a5019b13c..674aaa02e396 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -3,7 +3,7 @@ gpiogpio-event-mon
gpiogpio-hammer
gpioinclude/
gpiolsgpio
-tpm2/SpaceTest.log
+kselftest_install/
# Python bytecode and cache
__pycache__/
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c42aacec5038..56e44a98d6a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,57 +1,108 @@
# SPDX-License-Identifier: GPL-2.0
-TARGETS = arm64
+TARGETS += acct
+TARGETS += alsa
+TARGETS += amd-pstate
+TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
+TARGETS += cachestat
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
+TARGETS += connector
TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
+TARGETS += damon
+TARGETS += devices/error_logs
+TARGETS += devices/probe
+TARGETS += dmabuf-heaps
TARGETS += drivers/dma-buf
+TARGETS += drivers/ntsync
+TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net
+TARGETS += drivers/net/bonding
+TARGETS += drivers/net/team
+TARGETS += drivers/net/virtio_net
+TARGETS += drivers/platform/x86/intel/ifs
+TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
+TARGETS += fchmodat2
TARGETS += filesystems
TARGETS += filesystems/binderfs
TARGETS += filesystems/epoll
+TARGETS += filesystems/fat
+TARGETS += filesystems/overlayfs
+TARGETS += filesystems/statmount
+TARGETS += filesystems/mount-notify
+TARGETS += filesystems/fuse
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
TARGETS += futex
TARGETS += gpio
+TARGETS += hid
TARGETS += intel_pstate
+TARGETS += iommu
TARGETS += ipc
TARGETS += ir
TARGETS += kcmp
TARGETS += kexec
+TARGETS += kselftest_harness
TARGETS += kvm
+TARGETS += landlock
TARGETS += lib
TARGETS += livepatch
+TARGETS += liveupdate
TARGETS += lkdtm
+TARGETS += lsm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
+TARGETS += mount_setattr
+TARGETS += move_mount_set_group
TARGETS += mqueue
+TARGETS += mseal_system_mappings
TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
+TARGETS += net/can
TARGETS += net/forwarding
+TARGETS += net/hsr
TARGETS += net/mptcp
-TARGETS += netfilter
+TARGETS += net/netfilter
+TARGETS += net/openvswitch
+TARGETS += net/ovpn
+TARGETS += net/packetdrill
+TARGETS += net/rds
+TARGETS += net/tcp_ao
+TARGETS += nolibc
TARGETS += nsfs
+TARGETS += pci_endpoint
+TARGETS += pcie_bwctrl
+TARGETS += perf_events
TARGETS += pidfd
TARGETS += pid_namespace
+TARGETS += power_supply
TARGETS += powerpc
+TARGETS += prctl
TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += resctrl
+TARGETS += riscv
+TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
+TARGETS += rust
+TARGETS += sched_ext
TARGETS += seccomp
TARGETS += sgx
-TARGETS += sigaltstack
+TARGETS += signal
TARGETS += size
TARGETS += sparc64
TARGETS += splice
@@ -60,16 +111,24 @@ TARGETS += sync
TARGETS += syscall_user_dispatch
TARGETS += sysctl
TARGETS += tc-testing
+TARGETS += tdx
+TARGETS += thermal/intel/power_floor
+TARGETS += thermal/intel/workload_hint
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
endif
TARGETS += tmpfs
TARGETS += tpm2
-TARGETS += user
+TARGETS += tty
+TARGETS += ublk
+TARGETS += uevent
+TARGETS += user_events
TARGETS += vDSO
-TARGETS += vm
+TARGETS += mm
+TARGETS += vfio
TARGETS += x86
+TARGETS += x86/bugs
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
# Run "make quicktest=1 run_tests" or
@@ -78,10 +137,17 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
-# User can optionally provide a TARGETS skiplist. By default we skip
-# BPF since it has cutting edge build time dependencies which require
-# more effort to install.
-SKIP_TARGETS ?= bpf
+# Networking tests want the net/lib target, include it automatically
+ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),)
+ifeq ($(filter net/lib,$(TARGETS)),)
+ INSTALL_DEP_TARGETS := net/lib
+endif
+endif
+
+# User can optionally provide a TARGETS skiplist. By default we skip
+# targets using BPF since it has cutting edge build time dependencies
+# which require more effort to install.
+SKIP_TARGETS ?= bpf sched_ext
ifneq ($(SKIP_TARGETS),)
TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
override TARGETS := $(TMP)
@@ -108,66 +174,46 @@ ifdef building_out_of_srctree
override LDFLAGS =
endif
-ifneq ($(O),)
- BUILD := $(O)/kselftest
+top_srcdir ?= ../../..
+
+ifeq ("$(origin O)", "command line")
+ KBUILD_OUTPUT := $(O)
+endif
+
+ifneq ($(KBUILD_OUTPUT),)
+ # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
+ # expand a shell special character '~'. We use a somewhat tedious way here.
+ abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
+ $(if $(abs_objtree),, \
+ $(error failed to create output directory "$(KBUILD_OUTPUT)"))
+ # $(realpath ...) resolves symlinks
+ abs_objtree := $(realpath $(abs_objtree))
+ BUILD := $(abs_objtree)/kselftest
+ KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
else
- ifneq ($(KBUILD_OUTPUT),)
- BUILD := $(KBUILD_OUTPUT)/kselftest
- else
- BUILD := $(shell pwd)
- DEFAULT_INSTALL_HDR_PATH := 1
- endif
+ BUILD := $(CURDIR)
+ abs_srctree := $(shell cd $(top_srcdir) && pwd)
+ KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
+ DEFAULT_INSTALL_HDR_PATH := 1
endif
# Prepare for headers install
-top_srcdir ?= ../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-export KSFT_KHDR_INSTALL_DONE := 1
export BUILD
-
-# build and run gpio when output directory is the src dir.
-# gpio has dependency on tools/gpio and builds tools/gpio
-# objects in the src directory in all cases making the src
-# repo dirty even when objects are relocated.
-ifneq (1,$(DEFAULT_INSTALL_HDR_PATH))
- TMP := $(filter-out gpio, $(TARGETS))
- TARGETS := $(TMP)
-endif
+export KHDR_INCLUDES
# set default goal to all, so make without a target runs all, even when
# all isn't the first target in the file.
.DEFAULT_GOAL := all
-# Install headers here once for all tests. KSFT_KHDR_INSTALL_DONE
-# is used to avoid running headers_install from lib.mk.
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-#
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Local build cases: "make kselftest", "make -C" - headers are installed
-# in the default INSTALL_HDR_PATH usr/include.
-khdr:
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-
-all: khdr
+all:
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
+ O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
@@ -175,7 +221,10 @@ all: khdr
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(BUILD) \
+ O=$(abs_objtree); \
done;
hotplug:
@@ -220,12 +269,18 @@ ifdef INSTALL_PATH
install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/ksft.py $(INSTALL_PATH)/kselftest/
install -m 744 run_kselftest.sh $(INSTALL_PATH)/
rm -f $(TEST_LIST)
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
+ INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(INSTALL_PATH) \
+ O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
@@ -237,11 +292,19 @@ ifdef INSTALL_PATH
@# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
- echo -n "Emit Tests for $$TARGET\n"; \
+ [ ! -d $(INSTALL_PATH)/$$TARGET ] && printf "Skipping non-existent dir: $$TARGET\n" && continue; \
+ printf "Emit Tests for $$TARGET\n"; \
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
-C $$TARGET emit_tests >> $(TEST_LIST); \
done;
+ @VERSION=$$(git describe HEAD 2>/dev/null); \
+ if [ -n "$$VERSION" ]; then \
+ echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \
+ printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \
+ else \
+ printf "Unable to get version from git describe\n"; \
+ fi
+ @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**"
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -254,9 +317,9 @@ gen_tar: install
@echo "Created ${TAR_PATH}"
clean:
- @for TARGET in $(TARGETS); do \
+ @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/acct/.gitignore b/tools/testing/selftests/acct/.gitignore
new file mode 100644
index 000000000000..7e78aac19038
--- /dev/null
+++ b/tools/testing/selftests/acct/.gitignore
@@ -0,0 +1,3 @@
+acct_syscall
+config
+process_log \ No newline at end of file
diff --git a/tools/testing/selftests/acct/Makefile b/tools/testing/selftests/acct/Makefile
new file mode 100644
index 000000000000..7e025099cf65
--- /dev/null
+++ b/tools/testing/selftests/acct/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := acct_syscall
+CFLAGS += -Wall
+
+include ../lib.mk \ No newline at end of file
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
new file mode 100644
index 000000000000..421adbdc299d
--- /dev/null
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* kselftest for acct() system call
+ * The acct() system call enables or disables process accounting.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+
+int main(void)
+{
+ char filename[] = "process_log";
+ FILE *fp;
+ pid_t child_pid;
+ int sz;
+
+ // Setting up kselftest framework
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ // Check if test is run a root
+ if (geteuid()) {
+ ksft_exit_skip("This test needs root to run!\n");
+ return 1;
+ }
+
+ // Create file to log closed processes
+ fp = fopen(filename, "w");
+
+ if (!fp) {
+ ksft_test_result_error("%s.\n", strerror(errno));
+ ksft_finished();
+ return 1;
+ }
+
+ acct(filename);
+
+ // Handle error conditions
+ if (errno) {
+ ksft_test_result_error("%s.\n", strerror(errno));
+ fclose(fp);
+ ksft_finished();
+ return 1;
+ }
+
+ // Create child process and wait for it to terminate.
+
+ child_pid = fork();
+
+ if (child_pid < 0) {
+ ksft_test_result_error("Creating a child process to log failed\n");
+ acct(NULL);
+ return 1;
+ } else if (child_pid > 0) {
+ wait(NULL);
+ fseek(fp, 0L, SEEK_END);
+ sz = ftell(fp);
+
+ acct(NULL);
+
+ if (sz <= 0) {
+ ksft_test_result_fail("Terminated child process not logged\n");
+ ksft_exit_fail();
+ return 1;
+ }
+
+ ksft_test_result_pass("Successfully logged terminated process.\n");
+ fclose(fp);
+ ksft_exit_pass();
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
new file mode 100644
index 000000000000..3dd8e1176b89
--- /dev/null
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -0,0 +1,5 @@
+global-timer
+mixer-test
+pcm-test
+test-pcmtest-driver
+utimer-test
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
new file mode 100644
index 000000000000..8dab90ad22bb
--- /dev/null
+++ b/tools/testing/selftests/alsa/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+ifneq ($(shell pkg-config --exists alsa && echo 0 || echo 1),0)
+$(error Package alsa not found, please install alsa development package or \
+ add directory containing `alsa.pc` in PKG_CONFIG_PATH)
+endif
+
+CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES)
+LDLIBS += $(shell pkg-config --libs alsa)
+ifeq ($(LDLIBS),)
+LDLIBS += -lasound
+endif
+CFLAGS += -L$(OUTPUT) -Wl,-rpath=./
+
+LDLIBS+=-lpthread
+
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver utimer-test
+
+TEST_GEN_PROGS_EXTENDED := libatest.so global-timer
+
+TEST_FILES := conf.d pcm-test.conf
+
+include ../lib.mk
+
+$(OUTPUT)/libatest.so: conf.c alsa-local.h
+ $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@
diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h
new file mode 100644
index 000000000000..29143ef52101
--- /dev/null
+++ b/tools/testing/selftests/alsa/alsa-local.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest configuration helpers for the hw specific configuration
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+#ifndef __ALSA_LOCAL_H
+#define __ALSA_LOCAL_H
+
+#include <alsa/asoundlib.h>
+
+snd_config_t *get_alsalib_config(void);
+
+snd_config_t *conf_load_from_file(const char *filename);
+void conf_load(void);
+void conf_free(void);
+snd_config_t *conf_by_card(int card);
+snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2);
+int conf_get_count(snd_config_t *root, const char *key1, const char *key2);
+const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def);
+long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def);
+int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def);
+void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
+ const char **array, int array_size, const char *def);
+
+struct card_cfg_data {
+ int card;
+ snd_config_t *config;
+ const char *filename;
+ const char *config_id;
+ struct card_cfg_data *next;
+};
+
+extern struct card_cfg_data *conf_cards;
+
+#endif /* __ALSA_LOCAL_H */
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
new file mode 100644
index 000000000000..317212078e36
--- /dev/null
+++ b/tools/testing/selftests/alsa/conf.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest configuration helpers for the hw specific configuration
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <dirent.h>
+#include <regex.h>
+#include <sys/stat.h>
+
+#include "kselftest.h"
+#include "alsa-local.h"
+
+#define SYSFS_ROOT "/sys"
+
+struct card_cfg_data *conf_cards;
+
+static const char *alsa_config =
+"ctl.hw {\n"
+" @args [ CARD ]\n"
+" @args.CARD.type string\n"
+" type hw\n"
+" card $CARD\n"
+"}\n"
+"pcm.hw {\n"
+" @args [ CARD DEV SUBDEV ]\n"
+" @args.CARD.type string\n"
+" @args.DEV.type integer\n"
+" @args.SUBDEV.type integer\n"
+" type hw\n"
+" card $CARD\n"
+" device $DEV\n"
+" subdevice $SUBDEV\n"
+"}\n"
+;
+
+#ifdef SND_LIB_VER
+#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6)
+#define LIB_HAS_LOAD_STRING
+#endif
+#endif
+
+#ifndef LIB_HAS_LOAD_STRING
+static int snd_config_load_string(snd_config_t **config, const char *s,
+ size_t size)
+{
+ snd_input_t *input;
+ snd_config_t *dst;
+ int err;
+
+ assert(config && s);
+ if (size == 0)
+ size = strlen(s);
+ err = snd_input_buffer_open(&input, s, size);
+ if (err < 0)
+ return err;
+ err = snd_config_top(&dst);
+ if (err < 0) {
+ snd_input_close(input);
+ return err;
+ }
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0) {
+ snd_config_delete(dst);
+ return err;
+ }
+ *config = dst;
+ return 0;
+}
+#endif
+
+snd_config_t *get_alsalib_config(void)
+{
+ snd_config_t *config;
+ int err;
+
+ err = snd_config_load_string(&config, alsa_config, strlen(alsa_config));
+ if (err < 0) {
+ ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n",
+ snd_strerror(err));
+ ksft_exit_fail();
+ }
+ return config;
+}
+
+static struct card_cfg_data *conf_data_by_card(int card, bool msg)
+{
+ struct card_cfg_data *conf;
+
+ for (conf = conf_cards; conf; conf = conf->next) {
+ if (conf->card == card) {
+ if (msg)
+ ksft_print_msg("using hw card config %s for card %d\n",
+ conf->filename, card);
+ return conf;
+ }
+ }
+ return NULL;
+}
+
+static void dump_config_tree(snd_config_t *top)
+{
+ snd_output_t *out;
+ int err;
+
+ err = snd_output_stdio_attach(&out, stdout, 0);
+ if (err < 0)
+ ksft_exit_fail_msg("stdout attach\n");
+ if (snd_config_save(top, out))
+ ksft_exit_fail_msg("config save\n");
+ snd_output_close(out);
+}
+
+snd_config_t *conf_load_from_file(const char *filename)
+{
+ snd_config_t *dst;
+ snd_input_t *input;
+ int err;
+
+ err = snd_input_stdio_open(&input, filename, "r");
+ if (err < 0)
+ ksft_exit_fail_msg("Unable to parse filename %s\n", filename);
+ err = snd_config_top(&dst);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0)
+ ksft_exit_fail_msg("Unable to parse filename %s\n", filename);
+ return dst;
+}
+
+static char *sysfs_get(const char *sysfs_root, const char *id)
+{
+ char path[PATH_MAX], link[PATH_MAX + 1];
+ struct stat sb;
+ ssize_t len;
+ char *e;
+ int fd;
+
+ if (id[0] == '/')
+ id++;
+ snprintf(path, sizeof(path), "%s/%s", sysfs_root, id);
+ if (lstat(path, &sb) != 0)
+ return NULL;
+ if (S_ISLNK(sb.st_mode)) {
+ len = readlink(path, link, sizeof(link) - 1);
+ if (len <= 0) {
+ ksft_exit_fail_msg("sysfs: cannot read link '%s': %s\n",
+ path, strerror(errno));
+ return NULL;
+ }
+ link[len] = '\0';
+ e = strrchr(link, '/');
+ if (e)
+ return strdup(e + 1);
+ return NULL;
+ }
+ if (S_ISDIR(sb.st_mode))
+ return NULL;
+ if ((sb.st_mode & S_IRUSR) == 0)
+ return NULL;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return NULL;
+ ksft_exit_fail_msg("sysfs: open failed for '%s': %s\n",
+ path, strerror(errno));
+ }
+ len = read(fd, path, sizeof(path)-1);
+ close(fd);
+ if (len < 0)
+ ksft_exit_fail_msg("sysfs: unable to read value '%s': %s\n",
+ path, strerror(errno));
+ while (len > 0 && path[len-1] == '\n')
+ len--;
+ path[len] = '\0';
+ e = strdup(path);
+ if (e == NULL)
+ ksft_exit_fail_msg("Out of memory\n");
+ return e;
+}
+
+static bool sysfs_match(const char *sysfs_root, snd_config_t *config)
+{
+ snd_config_t *node, *path_config, *regex_config;
+ snd_config_iterator_t i, next;
+ const char *path_string, *regex_string, *v;
+ regex_t re;
+ regmatch_t match[1];
+ int iter = 0, ret;
+
+ snd_config_for_each(i, next, config) {
+ node = snd_config_iterator_entry(i);
+ if (snd_config_search(node, "path", &path_config))
+ ksft_exit_fail_msg("Missing path field in the sysfs block\n");
+ if (snd_config_search(node, "regex", &regex_config))
+ ksft_exit_fail_msg("Missing regex field in the sysfs block\n");
+ if (snd_config_get_string(path_config, &path_string))
+ ksft_exit_fail_msg("Path field in the sysfs block is not a string\n");
+ if (snd_config_get_string(regex_config, &regex_string))
+ ksft_exit_fail_msg("Regex field in the sysfs block is not a string\n");
+ iter++;
+ v = sysfs_get(sysfs_root, path_string);
+ if (!v)
+ return false;
+ if (regcomp(&re, regex_string, REG_EXTENDED))
+ ksft_exit_fail_msg("Wrong regex '%s'\n", regex_string);
+ ret = regexec(&re, v, 1, match, 0);
+ regfree(&re);
+ if (ret)
+ return false;
+ }
+ return iter > 0;
+}
+
+static void assign_card_config(int card, const char *sysfs_card_root)
+{
+ struct card_cfg_data *data;
+ snd_config_t *sysfs_card_config;
+
+ for (data = conf_cards; data; data = data->next) {
+ snd_config_search(data->config, "sysfs", &sysfs_card_config);
+ if (!sysfs_match(sysfs_card_root, sysfs_card_config))
+ continue;
+
+ data->card = card;
+ break;
+ }
+}
+
+static void assign_card_configs(void)
+{
+ char fn[128];
+ int card;
+
+ for (card = 0; card < 32; card++) {
+ snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card);
+ if (access(fn, R_OK) == 0)
+ assign_card_config(card, fn);
+ }
+}
+
+static int filename_filter(const struct dirent *dirent)
+{
+ size_t flen;
+
+ if (dirent == NULL)
+ return 0;
+ if (dirent->d_type == DT_DIR)
+ return 0;
+ flen = strlen(dirent->d_name);
+ if (flen <= 5)
+ return 0;
+ if (strncmp(&dirent->d_name[flen-5], ".conf", 5) == 0)
+ return 1;
+ return 0;
+}
+
+static bool match_config(const char *filename)
+{
+ struct card_cfg_data *data;
+ snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
+ snd_config_iterator_t i, next;
+
+ config = conf_load_from_file(filename);
+ if (snd_config_search(config, "sysfs", &sysfs_config) ||
+ snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
+ if (snd_config_search(config, "card", &card_config) ||
+ snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
+ if (!sysfs_match(SYSFS_ROOT, sysfs_config))
+ return false;
+ snd_config_for_each(i, next, card_config) {
+ node = snd_config_iterator_entry(i);
+ if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
+ snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+
+ data = malloc(sizeof(*data));
+ if (!data)
+ ksft_exit_fail_msg("Out of memory\n");
+ data->filename = filename;
+ data->config = node;
+ data->card = -1;
+ if (snd_config_get_id(node, &data->config_id))
+ ksft_exit_fail_msg("snd_config_get_id failed for card\n");
+ data->next = conf_cards;
+ conf_cards = data;
+ }
+ return true;
+}
+
+void conf_load(void)
+{
+ const char *fn = "conf.d";
+ struct dirent **namelist;
+ int n, j;
+
+ n = scandir(fn, &namelist, filename_filter, alphasort);
+ if (n < 0)
+ ksft_exit_fail_msg("scandir: %s\n", strerror(errno));
+ for (j = 0; j < n; j++) {
+ size_t sl = strlen(fn) + strlen(namelist[j]->d_name) + 2;
+ char *filename = malloc(sl);
+ if (filename == NULL)
+ ksft_exit_fail_msg("Out of memory\n");
+ sprintf(filename, "%s/%s", fn, namelist[j]->d_name);
+ if (match_config(filename))
+ filename = NULL;
+ free(filename);
+ free(namelist[j]);
+ }
+ free(namelist);
+
+ assign_card_configs();
+}
+
+void conf_free(void)
+{
+ struct card_cfg_data *conf;
+
+ while (conf_cards) {
+ conf = conf_cards;
+ conf_cards = conf->next;
+ snd_config_delete(conf->config);
+ }
+}
+
+snd_config_t *conf_by_card(int card)
+{
+ struct card_cfg_data *conf;
+
+ conf = conf_data_by_card(card, true);
+ if (conf)
+ return conf->config;
+ return NULL;
+}
+
+static int conf_get_by_keys(snd_config_t *root, const char *key1,
+ const char *key2, snd_config_t **result)
+{
+ int ret;
+
+ if (key1) {
+ ret = snd_config_search(root, key1, &root);
+ if (ret != -ENOENT && ret < 0)
+ return ret;
+ }
+ if (key2)
+ ret = snd_config_search(root, key2, &root);
+ if (ret >= 0)
+ *result = root;
+ return ret;
+}
+
+snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2)
+{
+ int ret;
+
+ if (!root)
+ return NULL;
+ ret = conf_get_by_keys(root, key1, key2, &root);
+ if (ret == -ENOENT)
+ return NULL;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ return root;
+}
+
+int conf_get_count(snd_config_t *root, const char *key1, const char *key2)
+{
+ snd_config_t *cfg;
+ snd_config_iterator_t i, next;
+ int count, ret;
+
+ if (!root)
+ return -1;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return -1;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_type(cfg) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("key '%s'.'%s' is not a compound\n", key1, key2);
+ count = 0;
+ snd_config_for_each(i, next, cfg)
+ count++;
+ return count;
+}
+
+const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def)
+{
+ snd_config_t *cfg;
+ const char *s;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_string(cfg, &s))
+ ksft_exit_fail_msg("key '%s'.'%s' is not a string\n", key1, key2);
+ return s;
+}
+
+long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def)
+{
+ snd_config_t *cfg;
+ long l;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_integer(cfg, &l))
+ ksft_exit_fail_msg("key '%s'.'%s' is not an integer\n", key1, key2);
+ return l;
+}
+
+int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def)
+{
+ snd_config_t *cfg;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ ret = snd_config_get_bool(cfg);
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' is not a bool\n", key1, key2);
+ return !!ret;
+}
+
+void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
+ const char **array, int array_size, const char *def)
+{
+ snd_config_t *cfg;
+ char buf[16];
+ int ret, index;
+
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ cfg = NULL;
+ else if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ for (index = 0; index < array_size; index++) {
+ if (cfg == NULL) {
+ array[index] = def;
+ } else {
+ sprintf(buf, "%i", index);
+ array[index] = conf_get_string(cfg, buf, NULL, def);
+ }
+ }
+}
diff --git a/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf
new file mode 100644
index 000000000000..5b40a916295d
--- /dev/null
+++ b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf
@@ -0,0 +1,84 @@
+#
+# Example configuration for Lenovo ThinkPad P1 Gen2
+#
+
+#
+# Use regex match for the string read from the given sysfs path
+#
+# The sysfs root directory (/sys) is hardwired in the test code
+# (may be changed on demand).
+#
+# All strings must match.
+#
+sysfs [
+ {
+ path "class/dmi/id/product_sku"
+ regex "LENOVO_MT_20QU_BU_Think_FM_ThinkPad P1 Gen 2"
+ }
+]
+
+card.hda {
+ #
+ # Use regex match for the /sys/class/sound/card*/ tree (relative)
+ #
+ sysfs [
+ {
+ path "device/subsystem_device"
+ regex "0x229e"
+ }
+ {
+ path "device/subsystem_vendor"
+ regex "0x17aa"
+ }
+ ]
+
+ #
+ # PCM configuration
+ #
+ # pcm.0.0 - device 0 subdevice 0
+ #
+ pcm.0.0 {
+ PLAYBACK {
+ test.time1 {
+ access RW_INTERLEAVED # can be omitted - default
+ format S16_LE # can be omitted - default
+ rate 48000 # can be omitted - default
+ channels 2 # can be omitted - default
+ period_size 512
+ buffer_size 4096
+ }
+ test.time2 {
+ access RW_INTERLEAVED
+ format S16_LE
+ rate 48000
+ channels 2
+ period_size 24000
+ buffer_size 192000
+ }
+ test.time3 {
+ access RW_INTERLEAVED
+ format S16_LE
+ rate 44100
+ channels 2
+ period_size 24000
+ buffer_size 192000
+ }
+ }
+ CAPTURE {
+ # use default tests, check for the presence
+ }
+ }
+ #
+ # uncomment to force the missing device checks
+ #
+ #pcm.0.2 {
+ # PLAYBACK {
+ # # check for the presence
+ # }
+ #}
+ #pcm.0.3 {
+ # CAPTURE {
+ # # check for the presence
+ # }
+ #}
+}
diff --git a/tools/testing/selftests/alsa/global-timer.c b/tools/testing/selftests/alsa/global-timer.c
new file mode 100644
index 000000000000..c15ec0ba851a
--- /dev/null
+++ b/tools/testing/selftests/alsa/global-timer.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This tool is used by the utimer test, and it allows us to
+ * count the ticks of a global timer in a certain time frame
+ * (which is set by `timeout` parameter).
+ *
+ * Author: Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <alsa/asoundlib.h>
+#include <time.h>
+
+static int ticked;
+static void async_callback(snd_async_handler_t *ahandler)
+{
+ ticked++;
+}
+
+static char timer_name[64];
+static void bind_to_timer(int device, int subdevice, int timeout)
+{
+ snd_timer_t *handle;
+ snd_timer_params_t *params;
+ snd_async_handler_t *ahandler;
+
+ time_t end;
+
+ sprintf(timer_name, "hw:CLASS=%d,SCLASS=%d,DEV=%d,SUBDEV=%d",
+ SND_TIMER_CLASS_GLOBAL, SND_TIMER_SCLASS_NONE,
+ device, subdevice);
+
+ snd_timer_params_alloca(&params);
+
+ if (snd_timer_open(&handle, timer_name, SND_TIMER_OPEN_NONBLOCK) < 0) {
+ perror("Can't open the timer");
+ exit(EXIT_FAILURE);
+ }
+
+ snd_timer_params_set_auto_start(params, 1);
+ snd_timer_params_set_ticks(params, 1);
+ if (snd_timer_params(handle, params) < 0) {
+ perror("Can't set timer params");
+ exit(EXIT_FAILURE);
+ }
+
+ if (snd_async_add_timer_handler(&ahandler, handle, async_callback, NULL) < 0) {
+ perror("Can't create a handler");
+ exit(EXIT_FAILURE);
+ }
+ end = time(NULL) + timeout;
+ if (snd_timer_start(handle) < 0) {
+ perror("Failed to start the timer");
+ exit(EXIT_FAILURE);
+ }
+ printf("Timer has started\n");
+ while (time(NULL) <= end) {
+ /*
+ * Waiting for the timeout to elapse. Can't use sleep here, as it gets
+ * constantly interrupted by the signal from the timer (SIGIO)
+ */
+ }
+ snd_timer_stop(handle);
+ snd_timer_close(handle);
+}
+
+int main(int argc, char *argv[])
+{
+ int device, subdevice, timeout;
+
+ if (argc < 4) {
+ perror("Usage: %s <device> <subdevice> <timeout>");
+ return EXIT_FAILURE;
+ }
+
+ setlinebuf(stdout);
+
+ device = atoi(argv[1]);
+ subdevice = atoi(argv[2]);
+ timeout = atoi(argv[3]);
+
+ bind_to_timer(device, subdevice, timeout);
+
+ printf("Total ticks count: %d\n", ticked);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
new file mode 100644
index 000000000000..d4f845c32804
--- /dev/null
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -0,0 +1,1145 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA mixer API
+//
+// Original author: Mark Brown <broonie@kernel.org>
+// Copyright (c) 2021-2 Arm Limited
+
+// This test will iterate over all cards detected in the system, exercising
+// every mixer control it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <assert.h>
+#include <alsa/asoundlib.h>
+#include <poll.h>
+#include <stdint.h>
+
+#include "kselftest.h"
+#include "alsa-local.h"
+
+#define TESTS_PER_CONTROL 7
+
+struct card_data {
+ snd_ctl_t *handle;
+ int card;
+ snd_ctl_card_info_t *info;
+ const char *card_name;
+ struct pollfd pollfd;
+ int num_ctls;
+ snd_ctl_elem_list_t *ctls;
+ struct card_data *next;
+};
+
+struct ctl_data {
+ const char *name;
+ snd_ctl_elem_id_t *id;
+ snd_ctl_elem_info_t *info;
+ snd_ctl_elem_value_t *def_val;
+ int elem;
+ int event_missing;
+ int event_spurious;
+ struct card_data *card;
+ struct ctl_data *next;
+};
+
+int num_cards;
+int num_controls;
+struct card_data *card_list;
+struct ctl_data *ctl_list;
+
+static void find_controls(void)
+{
+ char name[32];
+ int card, ctl, err;
+ struct card_data *card_data;
+ struct ctl_data *ctl_data;
+ snd_config_t *config;
+ char *card_name, *card_longname;
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ config = get_alsalib_config();
+
+ while (card >= 0) {
+ sprintf(name, "hw:%d", card);
+
+ card_data = malloc(sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_open_lconf(&card_data->handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ err = snd_card_get_name(card, &card_name);
+ if (err != 0)
+ card_name = "Unknown";
+ err = snd_card_get_longname(card, &card_longname);
+ if (err != 0)
+ card_longname = "Unknown";
+
+ err = snd_ctl_card_info_malloc(&card_data->info);
+ if (err != 0)
+ ksft_exit_fail_msg("Failed to allocate card info: %d\n",
+ err);
+
+ err = snd_ctl_card_info(card_data->handle, card_data->info);
+ if (err == 0) {
+ card_data->card_name = snd_ctl_card_info_get_id(card_data->info);
+ if (!card_data->card_name)
+ ksft_print_msg("Failed to get card ID\n");
+ } else {
+ ksft_print_msg("Failed to get card info: %d\n", err);
+ }
+
+ if (!card_data->card_name)
+ card_data->card_name = "Unknown";
+
+ ksft_print_msg("Card %d/%s - %s (%s)\n", card,
+ card_data->card_name, card_name, card_longname);
+
+ /* Count controls */
+ snd_ctl_elem_list_malloc(&card_data->ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+ card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls);
+
+ /* Enumerate control information */
+ snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+
+ card_data->card = num_cards++;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ num_controls += card_data->num_ctls;
+
+ for (ctl = 0; ctl < card_data->num_ctls; ctl++) {
+ ctl_data = malloc(sizeof(*ctl_data));
+ if (!ctl_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ memset(ctl_data, 0, sizeof(*ctl_data));
+ ctl_data->card = card_data;
+ ctl_data->elem = ctl;
+ ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls,
+ ctl);
+
+ err = snd_ctl_elem_id_malloc(&ctl_data->id);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_info_malloc(&ctl_data->info);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_value_malloc(&ctl_data->def_val);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ snd_ctl_elem_list_get_id(card_data->ctls, ctl,
+ ctl_data->id);
+ snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id);
+ err = snd_ctl_elem_info(card_data->handle,
+ ctl_data->info);
+ if (err < 0) {
+ ksft_print_msg("%s getting info for %s\n",
+ snd_strerror(err),
+ ctl_data->name);
+ }
+
+ snd_ctl_elem_value_set_id(ctl_data->def_val,
+ ctl_data->id);
+
+ ctl_data->next = ctl_list;
+ ctl_list = ctl_data;
+ }
+
+ /* Set up for events */
+ err = snd_ctl_subscribe_events(card_data->handle, true);
+ if (err < 0) {
+ ksft_exit_fail_msg("snd_ctl_subscribe_events() failed for card %d: %d\n",
+ card, err);
+ }
+
+ err = snd_ctl_poll_descriptors_count(card_data->handle);
+ if (err != 1) {
+ ksft_exit_fail_msg("Unexpected descriptor count %d for card %d\n",
+ err, card);
+ }
+
+ err = snd_ctl_poll_descriptors(card_data->handle,
+ &card_data->pollfd, 1);
+ if (err != 1) {
+ ksft_exit_fail_msg("snd_ctl_poll_descriptors() failed for card %d: %d\n",
+ card, err);
+ }
+
+ next_card:
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+/*
+ * Block for up to timeout ms for an event, returns a negative value
+ * on error, 0 for no event and 1 for an event.
+ */
+static int wait_for_event(struct ctl_data *ctl, int timeout)
+{
+ unsigned short revents;
+ snd_ctl_event_t *event;
+ int err;
+ unsigned int mask = 0;
+ unsigned int ev_id;
+
+ snd_ctl_event_alloca(&event);
+
+ do {
+ err = poll(&(ctl->card->pollfd), 1, timeout);
+ if (err < 0) {
+ ksft_print_msg("poll() failed for %s: %s (%d)\n",
+ ctl->name, strerror(errno), errno);
+ return -1;
+ }
+ /* Timeout */
+ if (err == 0)
+ return 0;
+
+ err = snd_ctl_poll_descriptors_revents(ctl->card->handle,
+ &(ctl->card->pollfd),
+ 1, &revents);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_poll_descriptors_revents() failed for %s: %d\n",
+ ctl->name, err);
+ return err;
+ }
+ if (revents & POLLERR) {
+ ksft_print_msg("snd_ctl_poll_descriptors_revents() reported POLLERR for %s\n",
+ ctl->name);
+ return -1;
+ }
+ /* No read events */
+ if (!(revents & POLLIN)) {
+ ksft_print_msg("No POLLIN\n");
+ continue;
+ }
+
+ err = snd_ctl_read(ctl->card->handle, event);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_read() failed for %s: %d\n",
+ ctl->name, err);
+ return err;
+ }
+
+ if (snd_ctl_event_get_type(event) != SND_CTL_EVENT_ELEM)
+ continue;
+
+ /* The ID returned from the event is 1 less than numid */
+ mask = snd_ctl_event_elem_get_mask(event);
+ ev_id = snd_ctl_event_elem_get_numid(event);
+ if (ev_id != snd_ctl_elem_info_get_numid(ctl->info)) {
+ ksft_print_msg("Event for unexpected ctl %s\n",
+ snd_ctl_event_elem_get_name(event));
+ continue;
+ }
+
+ if ((mask & SND_CTL_EVENT_MASK_REMOVE) == SND_CTL_EVENT_MASK_REMOVE) {
+ ksft_print_msg("Removal event for %s\n",
+ ctl->name);
+ return -1;
+ }
+ } while ((mask & SND_CTL_EVENT_MASK_VALUE) != SND_CTL_EVENT_MASK_VALUE);
+
+ return 1;
+}
+
+static bool ctl_value_index_valid(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *val,
+ int index)
+{
+ long int_val;
+ long long int64_val;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_NONE:
+ ksft_print_msg("%s.%d Invalid control type NONE\n",
+ ctl->name, index);
+ return false;
+
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ int_val = snd_ctl_elem_value_get_boolean(val, index);
+ switch (int_val) {
+ case 0:
+ case 1:
+ break;
+ default:
+ ksft_print_msg("%s.%d Invalid boolean value %ld\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ int_val = snd_ctl_elem_value_get_integer(val, index);
+
+ if (int_val < snd_ctl_elem_info_get_min(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld less than minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+
+ if (int_val > snd_ctl_elem_info_get_max(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than maximum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step(ctl->info) &&
+ (int_val - snd_ctl_elem_info_get_min(ctl->info) %
+ snd_ctl_elem_info_get_step(ctl->info))) {
+ ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_step(ctl->info),
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ int64_val = snd_ctl_elem_value_get_integer64(val, index);
+
+ if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld less than minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+
+ if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld more than maximum %ld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step64(ctl->info) &&
+ (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) %
+ snd_ctl_elem_info_get_step64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_step64(ctl->info),
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ int_val = snd_ctl_elem_value_get_enumerated(val, index);
+
+ if (int_val < 0) {
+ ksft_print_msg("%s.%d negative value %ld for enumeration\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+
+ if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than item count %u\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_items(ctl->info));
+ return false;
+ }
+ break;
+
+ default:
+ /* No tests for other types */
+ break;
+ }
+
+ return true;
+}
+
+/*
+ * Check that the provided value meets the constraints for the
+ * provided control.
+ */
+static bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
+{
+ int i;
+ bool valid = true;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (!ctl_value_index_valid(ctl, val, i))
+ valid = false;
+
+ return valid;
+}
+
+/*
+ * Check that we can read the default value and it is valid. Write
+ * tests use the read value to restore the default.
+ */
+static void test_ctl_get_value(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ /* Can't test reading on an unreadable control */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s is not readable\n", ctl->name);
+ ksft_test_result_skip("get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ goto out;
+ }
+
+ if (!ctl_value_valid(ctl, ctl->def_val))
+ err = -EINVAL;
+
+out:
+ ksft_test_result(err >= 0, "get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static bool strend(const char *haystack, const char *needle)
+{
+ size_t haystack_len = strlen(haystack);
+ size_t needle_len = strlen(needle);
+
+ if (needle_len > haystack_len)
+ return false;
+ return strcmp(haystack + haystack_len - needle_len, needle) == 0;
+}
+
+static void test_ctl_name(struct ctl_data *ctl)
+{
+ bool name_ok = true;
+
+ ksft_print_msg("%s.%d %s\n", ctl->card->card_name, ctl->elem,
+ ctl->name);
+
+ /* Only boolean controls should end in Switch */
+ if (strend(ctl->name, " Switch")) {
+ if (snd_ctl_elem_info_get_type(ctl->info) != SND_CTL_ELEM_TYPE_BOOLEAN) {
+ ksft_print_msg("%d.%d %s ends in Switch but is not boolean\n",
+ ctl->card->card, ctl->elem, ctl->name);
+ name_ok = false;
+ }
+ }
+
+ /* Writeable boolean controls should end in Switch */
+ if (snd_ctl_elem_info_get_type(ctl->info) == SND_CTL_ELEM_TYPE_BOOLEAN &&
+ snd_ctl_elem_info_is_writable(ctl->info)) {
+ if (!strend(ctl->name, " Switch")) {
+ ksft_print_msg("%d.%d %s is a writeable boolean but not a Switch\n",
+ ctl->card->card, ctl->elem, ctl->name);
+ name_ok = false;
+ }
+ }
+
+ ksft_test_result(name_ok, "name.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static void show_values(struct ctl_data *ctl, snd_ctl_elem_value_t *orig_val,
+ snd_ctl_elem_value_t *read_val)
+{
+ long long orig_int, read_int;
+ int i;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ orig_int = snd_ctl_elem_value_get_boolean(orig_val, i);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ orig_int = snd_ctl_elem_value_get_integer(orig_val, i);
+ read_int = snd_ctl_elem_value_get_integer(read_val, i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ orig_int = snd_ctl_elem_value_get_integer64(orig_val,
+ i);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ orig_int = snd_ctl_elem_value_get_enumerated(orig_val,
+ i);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ i);
+ break;
+
+ default:
+ return;
+ }
+
+ ksft_print_msg("%s.%d orig %lld read %lld, is_volatile %d\n",
+ ctl->name, i, orig_int, read_int,
+ snd_ctl_elem_info_is_volatile(ctl->info));
+ }
+}
+
+static bool show_mismatch(struct ctl_data *ctl, int index,
+ snd_ctl_elem_value_t *read_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ long long expected_int, read_int;
+
+ /*
+ * We factor out the code to compare values representable as
+ * integers, ensure that check doesn't log otherwise.
+ */
+ expected_int = 0;
+ read_int = 0;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ expected_int = snd_ctl_elem_value_get_boolean(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ expected_int = snd_ctl_elem_value_get_integer(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ expected_int = snd_ctl_elem_value_get_integer64(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ expected_int = snd_ctl_elem_value_get_enumerated(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ index);
+ break;
+
+ default:
+ break;
+ }
+
+ if (expected_int != read_int) {
+ /*
+ * NOTE: The volatile attribute means that the hardware
+ * can voluntarily change the state of control element
+ * independent of any operation by software.
+ */
+ bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info);
+ ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n",
+ ctl->name, index, expected_int, read_int, is_volatile);
+ return !is_volatile;
+ } else {
+ return false;
+ }
+}
+
+/*
+ * Write a value then if possible verify that we get the expected
+ * result. An optional expected value can be provided if we expect
+ * the write to fail, for verifying that invalid writes don't corrupt
+ * anything.
+ */
+static int write_and_verify(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *write_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ int err, i;
+ bool error_expected, mismatch_shown;
+ snd_ctl_elem_value_t *initial_val, *read_val, *w_val;
+ snd_ctl_elem_value_alloca(&initial_val);
+ snd_ctl_elem_value_alloca(&read_val);
+ snd_ctl_elem_value_alloca(&w_val);
+
+ /*
+ * We need to copy the write value since writing can modify
+ * the value which causes surprises, and allocate an expected
+ * value if we expect to read back what we wrote.
+ */
+ snd_ctl_elem_value_copy(w_val, write_val);
+ if (expected_val) {
+ error_expected = true;
+ } else {
+ error_expected = false;
+ snd_ctl_elem_value_alloca(&expected_val);
+ snd_ctl_elem_value_copy(expected_val, write_val);
+ }
+
+ /* Store the value before we write */
+ if (snd_ctl_elem_info_is_readable(ctl->info)) {
+ snd_ctl_elem_value_set_id(initial_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, initial_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+ }
+
+ /*
+ * Do the write, if we have an expected value ignore the error
+ * and carry on to validate the expected value.
+ */
+ err = snd_ctl_elem_write(ctl->card->handle, w_val);
+ if (err < 0 && !error_expected) {
+ ksft_print_msg("snd_ctl_elem_write() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /* Can we do the verification part? */
+ if (!snd_ctl_elem_info_is_readable(ctl->info))
+ return err;
+
+ snd_ctl_elem_value_set_id(read_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, read_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /*
+ * We can't verify any specific value for volatile controls
+ * but we should still check that whatever we read is a valid
+ * vale for the control.
+ */
+ if (snd_ctl_elem_info_is_volatile(ctl->info)) {
+ if (!ctl_value_valid(ctl, read_val)) {
+ ksft_print_msg("Volatile control %s has invalid value\n",
+ ctl->name);
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Check for an event if the value changed, or confirm that
+ * there was none if it didn't. We rely on the kernel
+ * generating the notification before it returns from the
+ * write, this is currently true, should that ever change this
+ * will most likely break and need updating.
+ */
+ err = wait_for_event(ctl, 0);
+ if (snd_ctl_elem_value_compare(initial_val, read_val)) {
+ if (err < 1) {
+ ksft_print_msg("No event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_missing++;
+ }
+ } else {
+ if (err != 0) {
+ ksft_print_msg("Spurious event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_spurious++;
+ }
+ }
+
+ /*
+ * Use the libray to compare values, if there's a mismatch
+ * carry on and try to provide a more useful diagnostic than
+ * just "mismatch".
+ */
+ if (!snd_ctl_elem_value_compare(expected_val, read_val))
+ return 0;
+
+ mismatch_shown = false;
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (show_mismatch(ctl, i, read_val, expected_val))
+ mismatch_shown = true;
+
+ if (!mismatch_shown)
+ ksft_print_msg("%s read and written values differ\n",
+ ctl->name);
+
+ return -1;
+}
+
+/*
+ * Make sure we can write the default value back to the control, this
+ * should validate that at least some write works.
+ */
+static void test_ctl_write_default(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ /* No idea what the default was for unreadable controls */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s couldn't read default\n", ctl->name);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(err >= 0, "write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < 2; j++) {
+ snd_ctl_elem_value_set_boolean(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_integer(struct ctl_data *ctl)
+{
+ int err;
+ int i;
+ long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min(ctl->info);
+ j <= snd_ctl_elem_info_get_max(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
+{
+ int err, i;
+ long long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step64(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min64(ctl->info);
+ j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer64(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) {
+ snd_ctl_elem_value_set_enumerated(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static void test_ctl_write_valid(struct ctl_data *ctl)
+{
+ bool pass;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_valid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_valid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_valid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_valid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(pass, "write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static bool test_ctl_write_invalid_value(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *val)
+{
+ int err;
+
+ /* Ideally this will fail... */
+ err = snd_ctl_elem_write(ctl->card->handle, val);
+ if (err < 0)
+ return false;
+
+ /* ...but some devices will clamp to an in range value */
+ err = snd_ctl_elem_read(ctl->card->handle, val);
+ if (err < 0) {
+ ksft_print_msg("%s failed to read: %s\n",
+ ctl->name, snd_strerror(err));
+ return true;
+ }
+
+ return !ctl_value_valid(ctl, val);
+}
+
+static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_boolean(val, i, 2);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ if (snd_ctl_elem_info_get_min(ctl->info) != LONG_MIN) {
+ /* Just under range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i,
+ snd_ctl_elem_info_get_min(ctl->info) - 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Minimum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i, LONG_MIN);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ if (snd_ctl_elem_info_get_max(ctl->info) != LONG_MAX) {
+ /* Just over range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i,
+ snd_ctl_elem_info_get_max(ctl->info) + 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i, LONG_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ if (snd_ctl_elem_info_get_min64(ctl->info) != LLONG_MIN) {
+ /* Just under range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i,
+ snd_ctl_elem_info_get_min64(ctl->info) - 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Minimum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i, LLONG_MIN);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ if (snd_ctl_elem_info_get_max64(ctl->info) != LLONG_MAX) {
+ /* Just over range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i,
+ snd_ctl_elem_info_get_max64(ctl->info) + 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i, LLONG_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ /* One beyond maximum */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_enumerated(val, i,
+ snd_ctl_elem_info_get_items(ctl->info));
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_enumerated(val, i, UINT_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ }
+
+ return !fail;
+}
+
+
+static void test_ctl_write_invalid(struct ctl_data *ctl)
+{
+ bool pass;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_invalid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_invalid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_invalid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_invalid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(pass, "write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static void test_ctl_event_missing(struct ctl_data *ctl)
+{
+ ksft_test_result(!ctl->event_missing, "event_missing.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+static void test_ctl_event_spurious(struct ctl_data *ctl)
+{
+ ksft_test_result(!ctl->event_spurious, "event_spurious.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
+}
+
+int main(void)
+{
+ struct ctl_data *ctl;
+
+ ksft_print_header();
+
+ find_controls();
+
+ ksft_set_plan(num_controls * TESTS_PER_CONTROL);
+
+ for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) {
+ /*
+ * Must test get_value() before we write anything, the
+ * test stores the default value for later cleanup.
+ */
+ test_ctl_get_value(ctl);
+ test_ctl_name(ctl);
+ test_ctl_write_default(ctl);
+ test_ctl_write_valid(ctl);
+ test_ctl_write_invalid(ctl);
+ test_ctl_event_missing(ctl);
+ test_ctl_event_spurious(ctl);
+ }
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
new file mode 100644
index 000000000000..ee04ccef7d7c
--- /dev/null
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -0,0 +1,671 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA PCM API
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+// This test will iterate over all cards detected in the system, exercising
+// every PCM device it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "kselftest.h"
+#include "alsa-local.h"
+
+typedef struct timespec timestamp_t;
+
+struct card_data {
+ int card;
+ snd_ctl_card_info_t *info;
+ const char *name;
+ pthread_t thread;
+ struct card_data *next;
+};
+
+struct card_data *card_list;
+
+struct pcm_data {
+ snd_pcm_t *handle;
+ int card;
+ int device;
+ int subdevice;
+ const char *card_name;
+ snd_pcm_stream_t stream;
+ snd_config_t *pcm_config;
+ struct pcm_data *next;
+};
+
+struct pcm_data *pcm_list;
+
+int num_missing;
+struct pcm_data *pcm_missing;
+
+snd_config_t *default_pcm_config;
+
+/* Lock while reporting results since kselftest doesn't */
+pthread_mutex_t results_lock = PTHREAD_MUTEX_INITIALIZER;
+
+enum test_class {
+ TEST_CLASS_DEFAULT,
+ TEST_CLASS_SYSTEM,
+};
+
+void timestamp_now(timestamp_t *tstamp)
+{
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, tstamp))
+ ksft_exit_fail_msg("clock_get_time\n");
+}
+
+long long timestamp_diff_ms(timestamp_t *tstamp)
+{
+ timestamp_t now, diff;
+ timestamp_now(&now);
+ if (tstamp->tv_nsec > now.tv_nsec) {
+ diff.tv_sec = now.tv_sec - tstamp->tv_sec - 1;
+ diff.tv_nsec = (now.tv_nsec + 1000000000L) - tstamp->tv_nsec;
+ } else {
+ diff.tv_sec = now.tv_sec - tstamp->tv_sec;
+ diff.tv_nsec = now.tv_nsec - tstamp->tv_nsec;
+ }
+ return (diff.tv_sec * 1000) + ((diff.tv_nsec + 500000L) / 1000000L);
+}
+
+static long device_from_id(snd_config_t *node)
+{
+ const char *id;
+ char *end;
+ long v;
+
+ if (snd_config_get_id(node, &id))
+ ksft_exit_fail_msg("snd_config_get_id\n");
+ errno = 0;
+ v = strtol(id, &end, 10);
+ if (errno || *end)
+ return -1;
+ return v;
+}
+
+static void missing_device(int card, int device, int subdevice, snd_pcm_stream_t stream)
+{
+ struct pcm_data *pcm_data;
+
+ for (pcm_data = pcm_list; pcm_data != NULL; pcm_data = pcm_data->next) {
+ if (pcm_data->card != card)
+ continue;
+ if (pcm_data->device != device)
+ continue;
+ if (pcm_data->subdevice != subdevice)
+ continue;
+ if (pcm_data->stream != stream)
+ continue;
+ return;
+ }
+ pcm_data = calloc(1, sizeof(*pcm_data));
+ if (!pcm_data)
+ ksft_exit_fail_msg("Out of memory\n");
+ pcm_data->card = card;
+ pcm_data->device = device;
+ pcm_data->subdevice = subdevice;
+ pcm_data->stream = stream;
+ pcm_data->next = pcm_missing;
+ pcm_missing = pcm_data;
+ num_missing++;
+}
+
+static void missing_devices(int card, snd_config_t *card_config)
+{
+ snd_config_t *pcm_config, *node1, *node2;
+ snd_config_iterator_t i1, i2, next1, next2;
+ int device, subdevice;
+
+ pcm_config = conf_get_subtree(card_config, "pcm", NULL);
+ if (!pcm_config)
+ return;
+ snd_config_for_each(i1, next1, pcm_config) {
+ node1 = snd_config_iterator_entry(i1);
+ device = device_from_id(node1);
+ if (device < 0)
+ continue;
+ if (snd_config_get_type(node1) != SND_CONFIG_TYPE_COMPOUND)
+ continue;
+ snd_config_for_each(i2, next2, node1) {
+ node2 = snd_config_iterator_entry(i2);
+ subdevice = device_from_id(node2);
+ if (subdevice < 0)
+ continue;
+ if (conf_get_subtree(node2, "PLAYBACK", NULL))
+ missing_device(card, device, subdevice, SND_PCM_STREAM_PLAYBACK);
+ if (conf_get_subtree(node2, "CAPTURE", NULL))
+ missing_device(card, device, subdevice, SND_PCM_STREAM_CAPTURE);
+ }
+ }
+}
+
+static void find_pcms(void)
+{
+ char name[32], key[64];
+ char *card_name, *card_longname;
+ int card, dev, subdev, count, direction, err;
+ snd_pcm_stream_t stream;
+ struct pcm_data *pcm_data;
+ snd_ctl_t *handle;
+ snd_pcm_info_t *pcm_info;
+ snd_config_t *config, *card_config, *pcm_config;
+ struct card_data *card_data;
+
+ snd_pcm_info_alloca(&pcm_info);
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ config = get_alsalib_config();
+
+ while (card >= 0) {
+ card_data = calloc(1, sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ sprintf(name, "hw:%d", card);
+
+ err = snd_ctl_open_lconf(&handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ err = snd_card_get_name(card, &card_name);
+ if (err != 0)
+ card_name = "Unknown";
+ err = snd_card_get_longname(card, &card_longname);
+ if (err != 0)
+ card_longname = "Unknown";
+
+ err = snd_ctl_card_info_malloc(&card_data->info);
+ if (err != 0)
+ ksft_exit_fail_msg("Failed to allocate card info: %d\n",
+ err);
+
+ err = snd_ctl_card_info(handle, card_data->info);
+ if (err == 0) {
+ card_data->name = snd_ctl_card_info_get_id(card_data->info);
+ if (!card_data->name)
+ ksft_print_msg("Failed to get card ID\n");
+ } else {
+ ksft_print_msg("Failed to get card info: %d\n", err);
+ }
+
+ if (!card_data->name)
+ card_data->name = "Unknown";
+
+ ksft_print_msg("Card %d/%s - %s (%s)\n", card,
+ card_data->name, card_name, card_longname);
+
+ card_config = conf_by_card(card);
+
+ card_data->card = card;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ dev = -1;
+ while (1) {
+ if (snd_ctl_pcm_next_device(handle, &dev) < 0)
+ ksft_exit_fail_msg("snd_ctl_pcm_next_device\n");
+ if (dev < 0)
+ break;
+
+ for (direction = 0; direction < 2; direction++) {
+ stream = direction ? SND_PCM_STREAM_CAPTURE : SND_PCM_STREAM_PLAYBACK;
+ sprintf(key, "pcm.%d.%s", dev, snd_pcm_stream_name(stream));
+ pcm_config = conf_get_subtree(card_config, key, NULL);
+ if (conf_get_bool(card_config, key, "skip", false)) {
+ ksft_print_msg("skipping pcm %d.%d.%s\n", card, dev, snd_pcm_stream_name(stream));
+ continue;
+ }
+ snd_pcm_info_set_device(pcm_info, dev);
+ snd_pcm_info_set_subdevice(pcm_info, 0);
+ snd_pcm_info_set_stream(pcm_info, stream);
+ err = snd_ctl_pcm_info(handle, pcm_info);
+ if (err == -ENOENT)
+ continue;
+ if (err < 0)
+ ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n",
+ dev, 0, stream);
+
+ ksft_print_msg("%s.0 - %s\n", card_data->name,
+ snd_pcm_info_get_id(pcm_info));
+
+ count = snd_pcm_info_get_subdevices_count(pcm_info);
+ for (subdev = 0; subdev < count; subdev++) {
+ sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream));
+ if (conf_get_bool(card_config, key, "skip", false)) {
+ ksft_print_msg("skipping pcm %d.%d.%d.%s\n", card, dev,
+ subdev, snd_pcm_stream_name(stream));
+ continue;
+ }
+ pcm_data = calloc(1, sizeof(*pcm_data));
+ if (!pcm_data)
+ ksft_exit_fail_msg("Out of memory\n");
+ pcm_data->card = card;
+ pcm_data->device = dev;
+ pcm_data->subdevice = subdev;
+ pcm_data->card_name = card_data->name;
+ pcm_data->stream = stream;
+ pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL);
+ pcm_data->next = pcm_list;
+ pcm_list = pcm_data;
+ }
+ }
+ }
+
+ /* check for missing devices */
+ missing_devices(card, card_config);
+
+ next_card:
+ snd_ctl_close(handle);
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+static void test_pcm_time(struct pcm_data *data, enum test_class class,
+ const char *test_name, snd_config_t *pcm_cfg)
+{
+ char name[64], msg[256];
+ const int duration_s = 2, margin_ms = 100;
+ const int duration_ms = duration_s * 1000;
+ const char *cs;
+ int i, err;
+ snd_pcm_t *handle = NULL;
+ snd_pcm_access_t access = SND_PCM_ACCESS_RW_INTERLEAVED;
+ snd_pcm_format_t format, old_format;
+ const char *alt_formats[8];
+ unsigned char *samples = NULL;
+ snd_pcm_sframes_t frames;
+ long long ms;
+ long rate, channels, period_size, buffer_size;
+ unsigned int rrate;
+ snd_pcm_uframes_t rperiod_size, rbuffer_size, start_threshold;
+ timestamp_t tstamp;
+ bool pass = false;
+ snd_pcm_hw_params_t *hw_params;
+ snd_pcm_sw_params_t *sw_params;
+ const char *test_class_name;
+ bool skip = true;
+ const char *desc;
+
+ switch (class) {
+ case TEST_CLASS_DEFAULT:
+ test_class_name = "default";
+ break;
+ case TEST_CLASS_SYSTEM:
+ test_class_name = "system";
+ break;
+ default:
+ ksft_exit_fail_msg("Unknown test class %d\n", class);
+ break;
+ }
+
+ desc = conf_get_string(pcm_cfg, "description", NULL, NULL);
+ if (desc)
+ ksft_print_msg("%s.%s.%s.%d.%d.%s - %s\n",
+ test_class_name, test_name,
+ data->card_name, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ desc);
+
+
+ snd_pcm_hw_params_alloca(&hw_params);
+ snd_pcm_sw_params_alloca(&sw_params);
+
+ cs = conf_get_string(pcm_cfg, "format", NULL, "S16_LE");
+ format = snd_pcm_format_value(cs);
+ if (format == SND_PCM_FORMAT_UNKNOWN)
+ ksft_exit_fail_msg("Wrong format '%s'\n", cs);
+ conf_get_string_array(pcm_cfg, "alt_formats", NULL,
+ alt_formats, ARRAY_SIZE(alt_formats), NULL);
+ rate = conf_get_long(pcm_cfg, "rate", NULL, 48000);
+ channels = conf_get_long(pcm_cfg, "channels", NULL, 2);
+ period_size = conf_get_long(pcm_cfg, "period_size", NULL, 4096);
+ buffer_size = conf_get_long(pcm_cfg, "buffer_size", NULL, 16384);
+
+ samples = malloc((rate * channels * snd_pcm_format_physical_width(format)) / 8);
+ if (!samples)
+ ksft_exit_fail_msg("Out of memory\n");
+ snd_pcm_format_set_silence(format, samples, rate * channels);
+
+ sprintf(name, "hw:%d,%d,%d", data->card, data->device, data->subdevice);
+ err = snd_pcm_open(&handle, name, data->stream, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "Failed to get pcm handle: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ err = snd_pcm_hw_params_any(handle, hw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_any: %s", snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_rate_resample(handle, hw_params, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate_resample: %s", snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_access(handle, hw_params, access);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_access %s: %s",
+ snd_pcm_access_name(access), snd_strerror(err));
+ goto __close;
+ }
+ i = -1;
+__format:
+ err = snd_pcm_hw_params_set_format(handle, hw_params, format);
+ if (err < 0) {
+ i++;
+ if (i < ARRAY_SIZE(alt_formats) && alt_formats[i]) {
+ old_format = format;
+ format = snd_pcm_format_value(alt_formats[i]);
+ if (format != SND_PCM_FORMAT_UNKNOWN) {
+ ksft_print_msg("%s.%s.%d.%d.%s.%s format %s -> %s\n",
+ test_name,
+ data->card_name, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ snd_pcm_access_name(access),
+ snd_pcm_format_name(old_format),
+ snd_pcm_format_name(format));
+ samples = realloc(samples, (rate * channels *
+ snd_pcm_format_physical_width(format)) / 8);
+ if (!samples)
+ ksft_exit_fail_msg("Out of memory\n");
+ snd_pcm_format_set_silence(format, samples, rate * channels);
+ goto __format;
+ }
+ }
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_format %s: %s",
+ snd_pcm_format_name(format), snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_channels(handle, hw_params, channels);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_channels %ld: %s", channels, snd_strerror(err));
+ goto __close;
+ }
+ rrate = rate;
+ err = snd_pcm_hw_params_set_rate_near(handle, hw_params, &rrate, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate %ld: %s", rate, snd_strerror(err));
+ goto __close;
+ }
+ if (rrate != rate) {
+ snprintf(msg, sizeof(msg), "rate mismatch %ld != %u", rate, rrate);
+ goto __close;
+ }
+ rperiod_size = period_size;
+ err = snd_pcm_hw_params_set_period_size_near(handle, hw_params, &rperiod_size, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_period_size %ld: %s", period_size, snd_strerror(err));
+ goto __close;
+ }
+ rbuffer_size = buffer_size;
+ err = snd_pcm_hw_params_set_buffer_size_near(handle, hw_params, &rbuffer_size);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_buffer_size %ld: %s", buffer_size, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params(handle, hw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ err = snd_pcm_sw_params_current(handle, sw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_current: %s", snd_strerror(err));
+ goto __close;
+ }
+ if (data->stream == SND_PCM_STREAM_PLAYBACK) {
+ start_threshold = (rbuffer_size / rperiod_size) * rperiod_size;
+ } else {
+ start_threshold = rperiod_size;
+ }
+ err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, start_threshold);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_start_threshold %ld: %s", (long)start_threshold, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_sw_params_set_avail_min(handle, sw_params, rperiod_size);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_avail_min %ld: %s", (long)rperiod_size, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_sw_params(handle, sw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ ksft_print_msg("%s.%s.%s.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n",
+ test_class_name, test_name,
+ data->card_name, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ snd_pcm_access_name(access),
+ snd_pcm_format_name(format),
+ (long)rate, (long)channels,
+ (long)rperiod_size, (long)rbuffer_size,
+ (long)start_threshold);
+
+ /* Set all the params, actually run the test */
+ skip = false;
+
+ timestamp_now(&tstamp);
+ for (i = 0; i < duration_s; i++) {
+ if (data->stream == SND_PCM_STREAM_PLAYBACK) {
+ frames = snd_pcm_writei(handle, samples, rate);
+ if (frames < 0) {
+ snprintf(msg, sizeof(msg),
+ "Write failed: expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ if (frames < rate) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ } else {
+ frames = snd_pcm_readi(handle, samples, rate);
+ if (frames < 0) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ if (frames < rate) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ }
+ }
+
+ snd_pcm_drain(handle);
+ ms = timestamp_diff_ms(&tstamp);
+ if (ms < duration_ms - margin_ms || ms > duration_ms + margin_ms) {
+ snprintf(msg, sizeof(msg), "time mismatch: expected %dms got %lld", duration_ms, ms);
+ goto __close;
+ }
+
+ msg[0] = '\0';
+ pass = true;
+__close:
+ pthread_mutex_lock(&results_lock);
+
+ switch (class) {
+ case TEST_CLASS_SYSTEM:
+ test_class_name = "system";
+ /*
+ * Anything specified as specific to this system
+ * should always be supported.
+ */
+ ksft_test_result(!skip, "%s.%s.%s.%d.%d.%s.params\n",
+ test_class_name, test_name,
+ data->card_name, data->device,
+ data->subdevice,
+ snd_pcm_stream_name(data->stream));
+ break;
+ default:
+ break;
+ }
+
+ if (!skip)
+ ksft_test_result(pass, "%s.%s.%s.%d.%d.%s\n",
+ test_class_name, test_name,
+ data->card_name, data->device,
+ data->subdevice,
+ snd_pcm_stream_name(data->stream));
+ else
+ ksft_test_result_skip("%s.%s.%s.%d.%d.%s\n",
+ test_class_name, test_name,
+ data->card_name, data->device,
+ data->subdevice,
+ snd_pcm_stream_name(data->stream));
+
+ if (msg[0])
+ ksft_print_msg("%s\n", msg);
+
+ pthread_mutex_unlock(&results_lock);
+
+ free(samples);
+ if (handle)
+ snd_pcm_close(handle);
+}
+
+void run_time_tests(struct pcm_data *pcm, enum test_class class,
+ snd_config_t *cfg)
+{
+ const char *test_name, *test_type;
+ snd_config_t *pcm_cfg;
+ snd_config_iterator_t i, next;
+
+ if (!cfg)
+ return;
+
+ cfg = conf_get_subtree(cfg, "test", NULL);
+ if (cfg == NULL)
+ return;
+
+ snd_config_for_each(i, next, cfg) {
+ pcm_cfg = snd_config_iterator_entry(i);
+ if (snd_config_get_id(pcm_cfg, &test_name) < 0)
+ ksft_exit_fail_msg("snd_config_get_id\n");
+ test_type = conf_get_string(pcm_cfg, "type", NULL, "time");
+ if (strcmp(test_type, "time") == 0)
+ test_pcm_time(pcm, class, test_name, pcm_cfg);
+ else
+ ksft_exit_fail_msg("unknown test type '%s'\n", test_type);
+ }
+}
+
+void *card_thread(void *data)
+{
+ struct card_data *card = data;
+ struct pcm_data *pcm;
+
+ for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
+ if (pcm->card != card->card)
+ continue;
+
+ run_time_tests(pcm, TEST_CLASS_DEFAULT, default_pcm_config);
+ run_time_tests(pcm, TEST_CLASS_SYSTEM, pcm->pcm_config);
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ struct card_data *card;
+ struct card_cfg_data *conf;
+ struct pcm_data *pcm;
+ snd_config_t *global_config, *cfg;
+ int num_pcm_tests = 0, num_tests, num_std_pcm_tests;
+ int ret;
+ void *thread_ret;
+
+ ksft_print_header();
+
+ global_config = conf_load_from_file("pcm-test.conf");
+ default_pcm_config = conf_get_subtree(global_config, "pcm", NULL);
+ if (default_pcm_config == NULL)
+ ksft_exit_fail_msg("default pcm test configuration (pcm compound) is missing\n");
+
+ conf_load();
+
+ find_pcms();
+
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ num_missing++;
+
+ num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL);
+
+ for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
+ num_pcm_tests += num_std_pcm_tests;
+ cfg = pcm->pcm_config;
+ if (cfg == NULL)
+ continue;
+ /* Setting params is reported as a separate test */
+ num_tests = conf_get_count(cfg, "test", NULL) * 2;
+ if (num_tests > 0)
+ num_pcm_tests += num_tests;
+ }
+
+ ksft_set_plan(num_missing + num_pcm_tests);
+
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ ksft_test_result_fail("test.missing.%s.%s\n",
+ conf->filename, conf->config_id);
+
+ for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) {
+ ksft_test_result(false, "test.missing.%s.%d.%d.%s\n",
+ pcm->card_name, pcm->device, pcm->subdevice,
+ snd_pcm_stream_name(pcm->stream));
+ }
+
+ for (card = card_list; card != NULL; card = card->next) {
+ ret = pthread_create(&card->thread, NULL, card_thread, card);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to create card %d thread: %d (%s)\n",
+ card->card, ret,
+ strerror(errno));
+ }
+ }
+
+ for (card = card_list; card != NULL; card = card->next) {
+ ret = pthread_join(card->thread, &thread_ret);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to join card %d thread: %d (%s)\n",
+ card->card, ret,
+ strerror(errno));
+ }
+ }
+
+ snd_config_delete(global_config);
+ conf_free();
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/alsa/pcm-test.conf b/tools/testing/selftests/alsa/pcm-test.conf
new file mode 100644
index 000000000000..71bd3f78a6f2
--- /dev/null
+++ b/tools/testing/selftests/alsa/pcm-test.conf
@@ -0,0 +1,63 @@
+pcm.test.time1 {
+ description "8kHz mono large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 8000
+ channels 1
+ period_size 8000
+ buffer_size 32000
+}
+pcm.test.time2 {
+ description "8kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 8000
+ channels 2
+ period_size 8000
+ buffer_size 32000
+}
+pcm.test.time3 {
+ description "44.1kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 44100
+ channels 2
+ period_size 22500
+ buffer_size 192000
+}
+pcm.test.time4 {
+ description "48kHz stereo small periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 512
+ buffer_size 4096
+}
+pcm.test.time5 {
+ description "48kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 24000
+ buffer_size 192000
+}
+pcm.test.time6 {
+ description "48kHz 6 channel large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 48000
+ buffer_size 576000
+}
+pcm.test.time7 {
+ description "96kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 96000
+ channels 2
+ period_size 48000
+ buffer_size 192000
+}
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
new file mode 100644
index 000000000000..95065ef3b441
--- /dev/null
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is the test which covers PCM middle layer data transferring using
+ * the virtual pcm test driver (snd-pcmtest).
+ *
+ * Copyright 2023 Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <string.h>
+#include <alsa/asoundlib.h>
+#include "kselftest_harness.h"
+
+#define CH_NUM 4
+
+struct pattern_buf {
+ char buf[1024];
+ int len;
+};
+
+struct pattern_buf patterns[CH_NUM];
+
+struct pcmtest_test_params {
+ unsigned long buffer_size;
+ unsigned long period_size;
+ unsigned long channels;
+ unsigned int rate;
+ snd_pcm_access_t access;
+ size_t sec_buf_len;
+ size_t sample_size;
+ int time;
+ snd_pcm_format_t format;
+};
+
+static int read_patterns(void)
+{
+ FILE *fp, *fpl;
+ int i;
+ char pf[64];
+ char plf[64];
+
+ for (i = 0; i < CH_NUM; i++) {
+ sprintf(plf, "/sys/kernel/debug/pcmtest/fill_pattern%d_len", i);
+ fpl = fopen(plf, "r");
+ if (!fpl)
+ return -1;
+ fscanf(fpl, "%u", &patterns[i].len);
+ fclose(fpl);
+
+ sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i);
+ fp = fopen(pf, "r");
+ if (!fp)
+ return -1;
+ fread(patterns[i].buf, 1, patterns[i].len, fp);
+ fclose(fp);
+ }
+
+ return 0;
+}
+
+static int get_test_results(char *debug_name)
+{
+ int result;
+ FILE *f;
+ char fname[128];
+
+ sprintf(fname, "/sys/kernel/debug/pcmtest/%s", debug_name);
+
+ f = fopen(fname, "r");
+ if (!f) {
+ printf("Failed to open file\n");
+ return -1;
+ }
+ fscanf(f, "%d", &result);
+ fclose(f);
+
+ return result;
+}
+
+static size_t get_sec_buf_len(unsigned int rate, unsigned long channels, snd_pcm_format_t format)
+{
+ return rate * channels * snd_pcm_format_physical_width(format) / 8;
+}
+
+static int setup_handle(snd_pcm_t **handle, snd_pcm_sw_params_t *swparams,
+ snd_pcm_hw_params_t *hwparams, struct pcmtest_test_params *params,
+ int card, snd_pcm_stream_t stream)
+{
+ char pcm_name[32];
+ int err;
+
+ sprintf(pcm_name, "hw:%d,0,0", card);
+ err = snd_pcm_open(handle, pcm_name, stream, 0);
+ if (err < 0)
+ return err;
+ snd_pcm_hw_params_any(*handle, hwparams);
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_hw_params_set_access(*handle, hwparams, params->access);
+ snd_pcm_hw_params_set_format(*handle, hwparams, params->format);
+ snd_pcm_hw_params_set_channels(*handle, hwparams, params->channels);
+ snd_pcm_hw_params_set_rate_near(*handle, hwparams, &params->rate, 0);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params(*handle, hwparams);
+ snd_pcm_sw_params_current(*handle, swparams);
+
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_sw_params_set_avail_min(*handle, swparams, params->period_size);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_sw_params(*handle, swparams);
+ snd_pcm_hw_params(*handle, hwparams);
+
+ return 0;
+}
+
+FIXTURE(pcmtest) {
+ int card;
+ snd_pcm_sw_params_t *swparams;
+ snd_pcm_hw_params_t *hwparams;
+ struct pcmtest_test_params params;
+};
+
+FIXTURE_TEARDOWN(pcmtest) {
+}
+
+FIXTURE_SETUP(pcmtest) {
+ char *card_name;
+ int err;
+
+ if (geteuid())
+ SKIP(return, "This test needs root to run!");
+
+ err = read_patterns();
+ if (err)
+ SKIP(return, "Can't read patterns. Probably, module isn't loaded");
+
+ card_name = malloc(127);
+ ASSERT_NE(card_name, NULL);
+ self->params.buffer_size = 16384;
+ self->params.period_size = 4096;
+ self->params.channels = CH_NUM;
+ self->params.rate = 8000;
+ self->params.access = SND_PCM_ACCESS_RW_INTERLEAVED;
+ self->params.format = SND_PCM_FORMAT_S16_LE;
+ self->card = -1;
+ self->params.sample_size = snd_pcm_format_physical_width(self->params.format) / 8;
+
+ self->params.sec_buf_len = get_sec_buf_len(self->params.rate, self->params.channels,
+ self->params.format);
+ self->params.time = 4;
+
+ while (snd_card_next(&self->card) >= 0) {
+ if (self->card == -1)
+ break;
+ snd_card_get_name(self->card, &card_name);
+ if (!strcmp(card_name, "PCM-Test"))
+ break;
+ }
+ free(card_name);
+ ASSERT_NE(self->card, -1);
+}
+
+/*
+ * Here we are trying to send the looped monotonically increasing sequence of bytes to the driver.
+ * If our data isn't corrupted, the driver will set the content of 'pc_test' debugfs file to '1'
+ */
+TEST_F(pcmtest, playback) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t write_res;
+ int test_results;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_PLAYBACK), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ it = samples;
+ for (i = 0; i < self->params.sec_buf_len * params->time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ it[i] = patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len];
+ }
+ write_res = snd_pcm_writei(handle, samples, params->rate * params->time);
+ ASSERT_GE(write_res, 0);
+
+ snd_pcm_close(handle);
+ free(samples);
+ test_results = get_test_results("pc_test");
+ ASSERT_EQ(test_results, 1);
+}
+
+/*
+ * Here we test that the virtual alsa driver returns looped and monotonically increasing sequence
+ * of bytes. In the interleaved mode the buffer will contain samples in the following order:
+ * C0, C1, C2, C3, C0, C1, ...
+ */
+TEST_F(pcmtest, capture) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t read_res;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ read_res = snd_pcm_readi(handle, samples, params->rate * params->time);
+ ASSERT_GE(read_res, 0);
+ snd_pcm_close(handle);
+ it = (unsigned char *)samples;
+ for (i = 0; i < self->params.sec_buf_len * self->params.time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ ASSERT_EQ(it[i], patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len]);
+ }
+ free(samples);
+}
+
+// Test capture in the non-interleaved access mode. The are buffers for each recorded channel
+TEST_F(pcmtest, ni_capture) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+
+ for (i = 0; i < CH_NUM; i++)
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_readn(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+ snd_pcm_close(handle);
+
+ for (i = 0; i < CH_NUM; i++) {
+ for (j = 0; j < params.rate * params.time; j++)
+ ASSERT_EQ(chan_samples[i][j], patterns[i].buf[j % patterns[i].len]);
+ free(chan_samples[i]);
+ }
+ free(chan_samples);
+}
+
+TEST_F(pcmtest, ni_playback) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+ int test_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_PLAYBACK), 0);
+
+ for (i = 0; i < CH_NUM; i++) {
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+ for (j = 0; j < params.sec_buf_len * params.time; j++)
+ chan_samples[i][j] = patterns[i].buf[j % patterns[i].len];
+ }
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_writen(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+
+ snd_pcm_close(handle);
+ test_res = get_test_results("pc_test");
+ ASSERT_EQ(test_res, 1);
+
+ for (i = 0; i < CH_NUM; i++)
+ free(chan_samples[i]);
+ free(chan_samples);
+}
+
+/*
+ * Here we are testing the custom ioctl definition inside the virtual driver. If it triggers
+ * successfully, the driver sets the content of 'ioctl_test' debugfs file to '1'.
+ */
+TEST_F(pcmtest, reset_ioctl) {
+ snd_pcm_t *handle;
+ int test_res;
+ struct pcmtest_test_params *params = &self->params;
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_reset(handle);
+ test_res = get_test_results("ioctl_test");
+ ASSERT_EQ(test_res, 1);
+ snd_pcm_close(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
new file mode 100644
index 000000000000..c45cb226bd8f
--- /dev/null
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This test covers the functionality of userspace-driven ALSA timers. Such timers
+ * are purely virtual (so they don't directly depend on the hardware), and they could be
+ * created and triggered by userspace applications.
+ *
+ * Author: Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include "kselftest_harness.h"
+#include <sound/asound.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+
+#define FRAME_RATE 8000
+#define PERIOD_SIZE 4410
+#define UTIMER_DEFAULT_ID -1
+#define UTIMER_DEFAULT_FD -1
+#define NANO 1000000000ULL
+#define TICKS_COUNT 10
+#define TICKS_RECORDING_DELTA 5
+#define TIMER_OUTPUT_BUF_LEN 1024
+#define TIMER_FREQ_SEC 1
+#define RESULT_PREFIX_LEN strlen("Total ticks count: ")
+
+enum timer_app_event {
+ TIMER_APP_STARTED,
+ TIMER_APP_RESULT,
+ TIMER_NO_EVENT,
+};
+
+FIXTURE(timer_f) {
+ struct snd_timer_uinfo *utimer_info;
+};
+
+FIXTURE_SETUP(timer_f) {
+ int timer_dev_fd;
+
+ if (geteuid())
+ SKIP(return, "This test needs root to run!");
+
+ self->utimer_info = calloc(1, sizeof(*self->utimer_info));
+ ASSERT_NE(NULL, self->utimer_info);
+
+ /* Resolution is the time the period of frames takes in nanoseconds */
+ self->utimer_info->resolution = (NANO / FRAME_RATE * PERIOD_SIZE);
+
+ timer_dev_fd = open("/dev/snd/timer", O_RDONLY);
+ ASSERT_GE(timer_dev_fd, 0);
+
+ ASSERT_EQ(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, self->utimer_info), 0);
+ ASSERT_GE(self->utimer_info->fd, 0);
+
+ close(timer_dev_fd);
+}
+
+FIXTURE_TEARDOWN(timer_f) {
+ close(self->utimer_info->fd);
+ free(self->utimer_info);
+}
+
+static void *ticking_func(void *data)
+{
+ int i;
+ int *fd = (int *)data;
+
+ for (i = 0; i < TICKS_COUNT; i++) {
+ /* Well, trigger the timer! */
+ ioctl(*fd, SNDRV_TIMER_IOCTL_TRIGGER, NULL);
+ sleep(TIMER_FREQ_SEC);
+ }
+
+ return NULL;
+}
+
+static enum timer_app_event parse_timer_output(const char *s)
+{
+ if (strstr(s, "Timer has started"))
+ return TIMER_APP_STARTED;
+ if (strstr(s, "Total ticks count"))
+ return TIMER_APP_RESULT;
+
+ return TIMER_NO_EVENT;
+}
+
+static int parse_timer_result(const char *s)
+{
+ char *end;
+ long d;
+
+ d = strtol(s + RESULT_PREFIX_LEN, &end, 10);
+ if (end == s + RESULT_PREFIX_LEN)
+ return -1;
+
+ return d;
+}
+
+/*
+ * This test triggers the timer and counts ticks at the same time. The amount
+ * of the timer trigger calls should be equal to the amount of ticks received.
+ */
+TEST_F(timer_f, utimer) {
+ char command[64];
+ pthread_t ticking_thread;
+ int total_ticks = 0;
+ FILE *rfp;
+ char *buf = malloc(TIMER_OUTPUT_BUF_LEN);
+
+ ASSERT_NE(buf, NULL);
+
+ /* The timeout should be the ticks interval * count of ticks + some delta */
+ sprintf(command, "./global-timer %d %d %d", SNDRV_TIMER_GLOBAL_UDRIVEN,
+ self->utimer_info->id, TICKS_COUNT * TIMER_FREQ_SEC + TICKS_RECORDING_DELTA);
+
+ rfp = popen(command, "r");
+ while (fgets(buf, TIMER_OUTPUT_BUF_LEN, rfp)) {
+ buf[TIMER_OUTPUT_BUF_LEN - 1] = 0;
+ switch (parse_timer_output(buf)) {
+ case TIMER_APP_STARTED:
+ /* global-timer waits for timer to trigger, so start the ticking thread */
+ pthread_create(&ticking_thread, NULL, ticking_func,
+ &self->utimer_info->fd);
+ break;
+ case TIMER_APP_RESULT:
+ total_ticks = parse_timer_result(buf);
+ break;
+ case TIMER_NO_EVENT:
+ break;
+ }
+ }
+ pthread_join(ticking_thread, NULL);
+ ASSERT_EQ(total_ticks, TICKS_COUNT);
+ pclose(rfp);
+ free(buf);
+}
+
+TEST(wrong_timers_test) {
+ int timer_dev_fd;
+ int utimer_fd;
+ size_t i;
+ struct snd_timer_uinfo wrong_timer = {
+ .resolution = 0,
+ .id = UTIMER_DEFAULT_ID,
+ .fd = UTIMER_DEFAULT_FD,
+ };
+
+ timer_dev_fd = open("/dev/snd/timer", O_RDONLY);
+ ASSERT_GE(timer_dev_fd, 0);
+
+ utimer_fd = ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, &wrong_timer);
+ ASSERT_LT(utimer_fd, 0);
+ /* Check that id was not updated */
+ ASSERT_EQ(wrong_timer.id, UTIMER_DEFAULT_ID);
+
+ /* Test the NULL as an argument is processed correctly */
+ ASSERT_LT(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, NULL), 0);
+
+ close(timer_dev_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile
new file mode 100644
index 000000000000..c382f579fe94
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Makefile for amd-pstate/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
+TEST_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+TEST_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+endif
+
+TEST_PROGS += run.sh
+TEST_FILES += basic.sh tbench.sh gitsource.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/amd-pstate/basic.sh b/tools/testing/selftests/amd-pstate/basic.sh
new file mode 100755
index 000000000000..e4c43193e4a3
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/basic.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# amd-pstate-ut is a test module for testing the amd-pstate driver.
+# It can only run on x86 architectures and current cpufreq driver
+# must be amd-pstate.
+# (1) It can help all users to verify their processor support
+# (SBIOS/Firmware or Hardware).
+# (2) Kernel can have a basic function test to avoid the kernel
+# regression during the update.
+# (3) We can introduce more functional or performance tests to align
+# the result together, it will benefit power and performance scale optimization.
+
+# protect against multiple inclusion
+if [ $FILE_BASIC ]; then
+ return 0
+else
+ FILE_BASIC=DONE
+fi
+
+amd_pstate_basic()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running AMD P-state ut ***"
+ printf "\n---------------------------------------------\n"
+
+ if ! /sbin/modprobe -q -n amd-pstate-ut; then
+ echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]"
+ exit $ksft_skip
+ fi
+ if /sbin/modprobe -q amd-pstate-ut; then
+ /sbin/modprobe -q -r amd-pstate-ut
+ echo "amd-pstate-basic: ok"
+ else
+ echo "amd-pstate-basic: [FAIL]"
+ exit 1
+ fi
+}
diff --git a/tools/testing/selftests/amd-pstate/config b/tools/testing/selftests/amd-pstate/config
new file mode 100644
index 000000000000..f43103c9adc4
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/config
@@ -0,0 +1 @@
+CONFIG_X86_AMD_PSTATE_UT=m
diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh
new file mode 100755
index 000000000000..4cde62f90468
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/gitsource.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Testing and monitor the cpu desire performance, frequency, load,
+# power consumption and throughput etc. when this script trigger
+# gitsource test.
+# 1) Download and tar gitsource codes.
+# 2) Run gitsource benchmark on specific governors, ondemand or schedutil.
+# 3) Run tbench benchmark comparative test on acpi-cpufreq kernel driver.
+# 4) Get desire performance, frequency, load by perf.
+# 5) Get power consumption and throughput by amd_pstate_trace.py.
+# 6) Get run time by /usr/bin/time.
+# 7) Analyse test results and save it in file selftest.gitsource.csv.
+#8) Plot png images about time, energy and performance per watt for each test.
+
+# protect against multiple inclusion
+if [ $FILE_GITSOURCE ]; then
+ return 0
+else
+ FILE_GITSOURCE=DONE
+fi
+
+git_name="git-2.15.1"
+git_tar="$git_name.tar.gz"
+gitsource_url="https://github.com/git/git/archive/refs/tags/v2.15.1.tar.gz"
+gitsource_governors=("ondemand" "schedutil")
+
+# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: time $7: energy, $8: PPW
+store_csv_gitsource()
+{
+ echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_GIT.csv > /dev/null 2>&1
+}
+
+# clear some special lines
+clear_csv_gitsource()
+{
+ if [ -f $OUTFILE_GIT.csv ]; then
+ sed -i '/Comprison(%)/d' $OUTFILE_GIT.csv
+ sed -i "/$(scaling_name)/d" $OUTFILE_GIT.csv
+ fi
+}
+
+# find string $1 in file csv and get the number of lines
+get_lines_csv_gitsource()
+{
+ if [ -f $OUTFILE_GIT.csv ]; then
+ return `grep -c "$1" $OUTFILE_GIT.csv`
+ else
+ return 0
+ fi
+}
+
+pre_clear_gitsource()
+{
+ post_clear_gitsource
+ rm -rf gitsource_*.png
+ clear_csv_gitsource
+}
+
+post_clear_gitsource()
+{
+ rm -rf results/tracer-gitsource*
+ rm -rf $OUTFILE_GIT*.log
+ rm -rf $OUTFILE_GIT*.result
+}
+
+install_gitsource()
+{
+ if [ ! -d $SCRIPTDIR/$git_name ]; then
+ pushd $(pwd) > /dev/null 2>&1
+ cd $SCRIPTDIR
+ printf "Download gitsource, please wait a moment ...\n\n"
+ wget -O $git_tar $gitsource_url > /dev/null 2>&1
+
+ printf "Tar gitsource ...\n\n"
+ tar -xzf $git_tar
+ popd > /dev/null 2>&1
+ fi
+}
+
+# $1: governor, $2: loop
+run_gitsource()
+{
+ echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
+ $TRACER -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+
+ printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n"
+ BACKUP_DIR=$(pwd)
+ pushd $BACKUP_DIR > /dev/null 2>&1
+ cd $SCRIPTDIR/$git_name
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o $BACKUP_DIR/$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > $BACKUP_DIR/$OUTFILE_GIT-perf-$1-$2.log 2>&1
+ popd > /dev/null 2>&1
+
+ for job in `jobs -p`
+ do
+ echo "Waiting for job id $job"
+ wait $job
+ done
+}
+
+# $1: governor, $2: loop
+parse_gitsource()
+{
+ awk '{print $5}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-des-perf-$1-$2.log
+ avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-des-perf-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result
+
+ awk '{print $7}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-freq-$1-$2.log
+ avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-freq-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result
+
+ awk '{print $11}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-load-$1-$2.log
+ avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-load-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result
+
+ grep user $OUTFILE_GIT.time-gitsource-$1-$2.log | awk '{print $1}' | sed -e 's/user//' > $OUTFILE_GIT-time-$1-$2.log
+ time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1-$2.log)
+ printf "Gitsource-$1-#$2 user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result
+
+ grep Joules $OUTFILE_GIT-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_GIT-energy-$1-$2.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1-$2.log)
+ printf "Gitsource-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result
+
+ # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t
+ # seconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # 1/t 1/t 1
+ # ----- = ----- = ---
+ # P E/t E
+ # with unit given by 1 per joule.
+ ppw=`echo "scale=9;1/$en_sum" | bc | awk '{printf "%.9f", $0}'`
+ printf "Gitsource-$1-#$2 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_gitsource "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $time_sum $en_sum $ppw
+}
+
+# $1: governor
+loop_gitsource()
+{
+ printf "\nGitsource total test times is $LOOP_TIMES for $1\n\n"
+ for i in `seq 1 $LOOP_TIMES`
+ do
+ run_gitsource $1 $i
+ parse_gitsource $1 $i
+ done
+}
+
+# $1: governor
+gather_gitsource()
+{
+ printf "Gitsource test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_GIT.result
+ printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_GIT-des-perf-$1.log
+ avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-des-perf-$1.log)
+ printf "Gitsource-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_GIT-freq-$1.log
+ avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-freq-$1.log)
+ printf "Gitsource-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_GIT-load-$1.log
+ avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-load-$1.log)
+ printf "Gitsource-$1 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "user time(s):" | awk '{print $NF}' > $OUTFILE_GIT-time-$1.log
+ time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1.log)
+ printf "Gitsource-$1 total user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result
+
+ avg_time=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-time-$1.log)
+ printf "Gitsource-$1 avg user times(s): $avg_time\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_GIT-energy-$1.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1.log)
+ printf "Gitsource-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result
+
+ avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-energy-$1.log)
+ printf "Gitsource-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_GIT.result
+
+ # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t
+ # seconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # 1/t 1/t 1
+ # ----- = ----- = ---
+ # P E/t E
+ # with unit given by 1 per joule.
+ ppw=`echo "scale=9;1/$avg_en" | bc | awk '{printf "%.9f", $0}'`
+ printf "Gitsource-$1 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_gitsource "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_time $avg_en $ppw
+}
+
+# $1: base scaling_driver $2: base governor $3: comparison scaling_driver $4: comparison governor
+__calc_comp_gitsource()
+{
+ base=`grep "$1-$2" $OUTFILE_GIT.csv | grep "Average"`
+ comp=`grep "$3-$4" $OUTFILE_GIT.csv | grep "Average"`
+
+ if [ -n "$base" -a -n "$comp" ]; then
+ printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result
+ printf "Gitsource comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_GIT.result
+ printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result
+
+ # get the base values
+ des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//`
+ freq_base=`echo "$base" | awk '{print $4}' | sed s/,//`
+ load_base=`echo "$base" | awk '{print $5}' | sed s/,//`
+ time_base=`echo "$base" | awk '{print $6}' | sed s/,//`
+ energy_base=`echo "$base" | awk '{print $7}' | sed s/,//`
+ ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//`
+
+ # get the comparison values
+ des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//`
+ freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//`
+ load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//`
+ time_comp=`echo "$comp" | awk '{print $6}' | sed s/,//`
+ energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//`
+ ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//`
+
+ # compare the base and comp values
+ des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_GIT.result
+
+ freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_GIT.result
+
+ load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_GIT.result
+
+ time_drop=`echo "scale=4;($time_comp-$time_base)*100/$time_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 time base: $time_base comprison: $time_comp percent: $time_drop\n" | tee -a $OUTFILE_GIT.result
+
+ energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_GIT.result
+
+ ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ store_csv_gitsource "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$time_drop" "$energy_drop" "$ppw_drop"
+ fi
+}
+
+# calculate the comparison(%)
+calc_comp_gitsource()
+{
+ # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[0]} ${gitsource_governors[1]}
+
+ # amd-pstate-ondemand VS amd-pstate-schedutil
+ __calc_comp_gitsource ${all_scaling_names[1]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[1]}
+
+ # acpi-cpufreq-ondemand VS amd-pstate-ondemand
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[0]}
+
+ # acpi-cpufreq-schedutil VS amd-pstate-schedutil
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[1]} ${all_scaling_names[1]} ${gitsource_governors[1]}
+}
+
+# $1: file_name, $2: title, $3: ylable, $4: column
+plot_png_gitsource()
+{
+ # all_scaling_names[1] all_scaling_names[0] flag
+ # amd-pstate acpi-cpufreq
+ # N N 0
+ # N Y 1
+ # Y N 2
+ # Y Y 3
+ ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ flag=0
+ else
+ flag=1
+ fi
+ else
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ flag=2
+ else
+ flag=3
+ fi
+ fi
+
+ gnuplot << EOF
+ set term png
+ set output "$1"
+
+ set title "$2"
+ set xlabel "Test Cycles (round)"
+ set ylabel "$3"
+
+ set grid
+ set style data histogram
+ set style fill solid 0.5 border
+ set boxwidth 0.8
+
+ if ($flag == 1) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}"
+ } else {
+ if ($flag == 2) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}"
+ } else {
+ if ($flag == 3 ) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}"
+ }
+ }
+ }
+ quit
+EOF
+}
+
+amd_pstate_gitsource()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running gitsource ***"
+ printf "\n---------------------------------------------\n"
+
+ pre_clear_gitsource
+
+ install_gitsource
+
+ get_lines_csv_gitsource "Governor"
+ if [ $? -eq 0 ]; then
+ # add titles and unit for csv file
+ store_csv_gitsource "Governor" "Round" "Des-perf" "Freq" "Load" "Time" "Energy" "Performance Per Watt"
+ store_csv_gitsource "Unit" "" "" "GHz" "" "s" "J" "1/J"
+ fi
+
+ backup_governor
+ for governor in ${gitsource_governors[*]} ; do
+ printf "\nSpecified governor is $governor\n\n"
+ switch_governor $governor
+ loop_gitsource $governor
+ gather_gitsource $governor
+ done
+ restore_governor
+
+ plot_png_gitsource "gitsource_time.png" "Gitsource Benchmark Time" "Time (s)" 6
+ plot_png_gitsource "gitsource_energy.png" "Gitsource Benchmark Energy" "Energy (J)" 7
+ plot_png_gitsource "gitsource_ppw.png" "Gitsource Benchmark Performance Per Watt" "Performance Per Watt (1/J)" 8
+
+ calc_comp_gitsource
+
+ post_clear_gitsource
+}
diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh
new file mode 100755
index 000000000000..b053eea8bb19
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/run.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# protect against multiple inclusion
+if [ $FILE_MAIN ]; then
+ return 0
+else
+ FILE_MAIN=DONE
+fi
+
+SCRIPTDIR=`dirname "$0"`
+TRACER=$SCRIPTDIR/../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+
+source $SCRIPTDIR/basic.sh
+source $SCRIPTDIR/tbench.sh
+source $SCRIPTDIR/gitsource.sh
+
+# amd-pstate-ut only run on x86/x86_64 AMD systems.
+ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/')
+VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}')
+
+msg="Skip all tests:"
+FUNC=all
+OUTFILE=selftest
+OUTFILE_TBENCH="$OUTFILE.tbench"
+OUTFILE_GIT="$OUTFILE.gitsource"
+
+PERF=/usr/bin/perf
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+MAKE_CPUS=
+
+TIME_LIMIT=100
+PROCESS_NUM=128
+LOOP_TIMES=3
+TRACER_INTERVAL=10
+CURRENT_TEST=amd-pstate
+COMPARATIVE_TEST=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+all_scaling_names=("acpi-cpufreq" "amd-pstate")
+
+# Get current cpufreq scaling driver name
+scaling_name()
+{
+ if [ "$COMPARATIVE_TEST" = "" ]; then
+ echo "$CURRENT_TEST"
+ else
+ echo "$COMPARATIVE_TEST"
+ fi
+}
+
+# Counts CPUs with cpufreq directories
+count_cpus()
+{
+ count=0;
+
+ for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+ if [ -d $CPUROOT/$cpu/cpufreq ]; then
+ let count=count+1;
+ fi
+ done
+
+ echo $count;
+}
+
+# $1: policy
+find_current_governor()
+{
+ cat $CPUFREQROOT/$1/scaling_governor
+}
+
+backup_governor()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ cur_gov=$(find_current_governor $policy)
+ echo "$policy $cur_gov" >> $OUTFILE.backup_governor.log
+ done
+
+ printf "Governor $cur_gov backup done.\n"
+}
+
+restore_governor()
+{
+ i=0;
+
+ policies=$(awk '{print $1}' $OUTFILE.backup_governor.log)
+ for policy in $policies; do
+ let i++;
+ governor=$(sed -n ''$i'p' $OUTFILE.backup_governor.log | awk '{print $2}')
+
+ # switch governor
+ echo $governor > $CPUFREQROOT/$policy/scaling_governor
+ done
+
+ printf "Governor restored to $governor.\n"
+}
+
+# $1: governor
+switch_governor()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ filepath=$CPUFREQROOT/$policy/scaling_available_governors
+
+ # Exit if cpu isn't managed by cpufreq core
+ if [ ! -f $filepath ]; then
+ return;
+ fi
+
+ echo $1 > $CPUFREQROOT/$policy/scaling_governor
+ done
+
+ printf "Switched governor to $1.\n"
+}
+
+# All amd-pstate tests
+amd_pstate_all()
+{
+ printf "\n=============================================\n"
+ printf "***** Running AMD P-state Sanity Tests *****\n"
+ printf "=============================================\n\n"
+
+ count=$(count_cpus)
+ if [ $count = 0 ]; then
+ printf "No cpu is managed by cpufreq core, exiting\n"
+ exit;
+ else
+ printf "AMD P-state manages: $count CPUs\n"
+ fi
+
+ # unit test for amd-pstate kernel driver
+ amd_pstate_basic
+
+ # tbench
+ amd_pstate_tbench
+
+ # gitsource
+ amd_pstate_gitsource
+}
+
+help()
+{
+ printf "Usage: $0 [OPTION...]
+ [-h <help>]
+ [-o <output-file-for-dump>]
+ [-c <all: All testing,
+ basic: Basic testing,
+ tbench: Tbench testing,
+ gitsource: Gitsource testing.>]
+ [-t <tbench time limit>]
+ [-p <tbench process number>]
+ [-l <loop times for tbench>]
+ [-i <amd tracer interval>]
+ [-b <perf binary>]
+ [-m <comparative test: acpi-cpufreq>]
+ \n"
+ exit 2
+}
+
+parse_arguments()
+{
+ while getopts ho:c:t:p:l:i:b:m: arg
+ do
+ case $arg in
+ h) # --help
+ help
+ ;;
+
+ c) # --func_type (Function to perform: basic, tbench, gitsource (default: all))
+ FUNC=$OPTARG
+ ;;
+
+ o) # --output-file (Output file to store dumps)
+ OUTFILE=$OPTARG
+ ;;
+
+ t) # --tbench-time-limit
+ TIME_LIMIT=$OPTARG
+ ;;
+
+ p) # --tbench-process-number
+ PROCESS_NUM=$OPTARG
+ ;;
+
+ l) # --tbench/gitsource-loop-times
+ LOOP_TIMES=$OPTARG
+ ;;
+
+ i) # --amd-tracer-interval
+ TRACER_INTERVAL=$OPTARG
+ ;;
+
+ b) # --perf-binary
+ PERF=`realpath $OPTARG`
+ ;;
+
+ m) # --comparative-test
+ COMPARATIVE_TEST=$OPTARG
+ ;;
+
+ *)
+ help
+ ;;
+ esac
+ done
+}
+
+command_perf()
+{
+ if ! $PERF -v; then
+ echo $msg please install perf or provide perf binary path as argument >&2
+ exit $ksft_skip
+ fi
+}
+
+command_tbench()
+{
+ if ! command -v tbench > /dev/null; then
+ if apt policy dbench > /dev/null 2>&1; then
+ echo $msg apt install dbench >&2
+ exit $ksft_skip
+ elif yum list available | grep dbench > /dev/null 2>&1; then
+ echo $msg yum install dbench >&2
+ exit $ksft_skip
+ fi
+ fi
+
+ if ! command -v tbench > /dev/null; then
+ echo $msg please install tbench. >&2
+ exit $ksft_skip
+ fi
+}
+
+prerequisite()
+{
+ if ! echo "$ARCH" | grep -q x86; then
+ echo "$0 # Skipped: Test can only run on x86 architectures."
+ exit $ksft_skip
+ fi
+
+ if ! echo "$VENDOR" | grep -iq amd; then
+ echo "$0 # Skipped: Test can only run on AMD CPU."
+ echo "$0 # Current cpu vendor is $VENDOR."
+ exit $ksft_skip
+ fi
+
+ scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver)
+ if [ "$COMPARATIVE_TEST" = "" ]; then
+ if [ "$scaling_driver" != "$CURRENT_TEST" ]; then
+ echo "$0 # Skipped: Test can only run on $CURRENT_TEST driver or run comparative test."
+ echo "$0 # Please set X86_AMD_PSTATE enabled or run comparative test."
+ echo "$0 # Current cpufreq scaling driver is $scaling_driver."
+ exit $ksft_skip
+ fi
+ else
+ case "$FUNC" in
+ "tbench" | "gitsource")
+ if [ "$scaling_driver" != "$COMPARATIVE_TEST" ]; then
+ echo "$0 # Skipped: Comparison test can only run on $COMPARISON_TEST driver."
+ echo "$0 # Current cpufreq scaling driver is $scaling_driver."
+ exit $ksft_skip
+ fi
+ ;;
+
+ *)
+ echo "$0 # Skipped: Comparison test are only for tbench or gitsource."
+ echo "$0 # Current comparative test is for $FUNC."
+ exit $ksft_skip
+ ;;
+ esac
+ fi
+
+ if [ ! -w /dev ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+ fi
+
+ case "$FUNC" in
+ "all")
+ command_perf
+ command_tbench
+ ;;
+
+ "tbench")
+ command_perf
+ command_tbench
+ ;;
+
+ "gitsource")
+ command_perf
+ ;;
+ esac
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+
+ CPUROOT=$SYSFS/devices/system/cpu
+ CPUFREQROOT="$CPUROOT/cpufreq"
+
+ if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+ echo $msg cpus not available in sysfs >&2
+ exit 2
+ fi
+
+ if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+ echo $msg cpufreq directory not available in sysfs >&2
+ exit 2
+ fi
+}
+
+do_test()
+{
+ # Check if CPUs are managed by cpufreq or not
+ count=$(count_cpus)
+ MAKE_CPUS=$((count*2))
+
+ if [ $count = 0 ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ case "$FUNC" in
+ "all")
+ amd_pstate_all
+ ;;
+
+ "basic")
+ amd_pstate_basic
+ ;;
+
+ "tbench")
+ amd_pstate_tbench
+ ;;
+
+ "gitsource")
+ amd_pstate_gitsource
+ ;;
+
+ *)
+ echo "Invalid [-f] function type"
+ help
+ ;;
+ esac
+}
+
+# clear dumps
+pre_clear_dumps()
+{
+ case "$FUNC" in
+ "all")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf *.png
+ ;;
+
+ "tbench")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf tbench_*.png
+ ;;
+
+ "gitsource")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf gitsource_*.png
+ ;;
+
+ *)
+ ;;
+ esac
+}
+
+post_clear_dumps()
+{
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+pre_clear_dumps
+do_test | tee -a $OUTFILE.log
+post_clear_dumps
diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh
new file mode 100755
index 000000000000..2a98d9c9202e
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/tbench.sh
@@ -0,0 +1,339 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Testing and monitor the cpu desire performance, frequency, load,
+# power consumption and throughput etc.when this script trigger tbench
+# test cases.
+# 1) Run tbench benchmark on specific governors, ondemand or schedutil.
+# 2) Run tbench benchmark comparative test on acpi-cpufreq kernel driver.
+# 3) Get desire performance, frequency, load by perf.
+# 4) Get power consumption and throughput by amd_pstate_trace.py.
+# 5) Analyse test results and save it in file selftest.tbench.csv.
+# 6) Plot png images about performance, energy and performance per watt for each test.
+
+# protect against multiple inclusion
+if [ $FILE_TBENCH ]; then
+ return 0
+else
+ FILE_TBENCH=DONE
+fi
+
+tbench_governors=("ondemand" "schedutil")
+
+# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: performance, $7: energy, $8: performance per watt
+store_csv_tbench()
+{
+ echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_TBENCH.csv > /dev/null 2>&1
+}
+
+# clear some special lines
+clear_csv_tbench()
+{
+ if [ -f $OUTFILE_TBENCH.csv ]; then
+ sed -i '/Comprison(%)/d' $OUTFILE_TBENCH.csv
+ sed -i "/$(scaling_name)/d" $OUTFILE_TBENCH.csv
+ fi
+}
+
+# find string $1 in file csv and get the number of lines
+get_lines_csv_tbench()
+{
+ if [ -f $OUTFILE_TBENCH.csv ]; then
+ return `grep -c "$1" $OUTFILE_TBENCH.csv`
+ else
+ return 0
+ fi
+}
+
+pre_clear_tbench()
+{
+ post_clear_tbench
+ rm -rf tbench_*.png
+ clear_csv_tbench
+}
+
+post_clear_tbench()
+{
+ rm -rf results/tracer-tbench*
+ rm -rf $OUTFILE_TBENCH*.log
+ rm -rf $OUTFILE_TBENCH*.result
+
+}
+
+# $1: governor, $2: loop
+run_tbench()
+{
+ echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
+ $TRACER -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+
+ printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n"
+ tbench_srv > /dev/null 2>&1 &
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
+
+ pid=`pidof tbench_srv`
+ kill $pid
+
+ for job in `jobs -p`
+ do
+ echo "Waiting for job id $job"
+ wait $job
+ done
+}
+
+# $1: governor, $2: loop
+parse_tbench()
+{
+ awk '{print $5}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-des-perf-$1-$2.log
+ avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-des-perf-$1-$2.log)
+ printf "Tbench-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result
+
+ awk '{print $7}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-freq-$1-$2.log
+ avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-freq-$1-$2.log)
+ printf "Tbench-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result
+
+ awk '{print $11}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-load-$1-$2.log
+ avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-load-$1-$2.log)
+ printf "Tbench-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep Throughput $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $2}' > $OUTFILE_TBENCH-throughput-$1-$2.log
+ tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1-$2.log)
+ printf "Tbench-$1-#$2 throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep Joules $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_TBENCH-energy-$1-$2.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1-$2.log)
+ printf "Tbench-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds.
+ # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # T/t T/t T
+ # --- = --- = ---
+ # P E/t E
+ # with unit given by MB per joule.
+ ppw=`echo "scale=4;($TIME_LIMIT-1)*$tp_sum/$en_sum" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1-#$2 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_tbench "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $tp_sum $en_sum $ppw
+}
+
+# $1: governor
+loop_tbench()
+{
+ printf "\nTbench total test times is $LOOP_TIMES for $1\n\n"
+ for i in `seq 1 $LOOP_TIMES`
+ do
+ run_tbench $1 $i
+ parse_tbench $1 $i
+ done
+}
+
+# $1: governor
+gather_tbench()
+{
+ printf "Tbench test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_TBENCH.result
+ printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_TBENCH-des-perf-$1.log
+ avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-des-perf-$1.log)
+ printf "Tbench-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_TBENCH-freq-$1.log
+ avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-freq-$1.log)
+ printf "Tbench-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_TBENCH-load-$1.log
+ avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-load-$1.log)
+ printf "Tbench-$1 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "throughput(MB/s):" | awk '{print $NF}' > $OUTFILE_TBENCH-throughput-$1.log
+ tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1.log)
+ printf "Tbench-$1 total throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ avg_tp=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-throughput-$1.log)
+ printf "Tbench-$1 avg throughput(MB/s): $avg_tp\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_TBENCH-energy-$1.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1.log)
+ printf "Tbench-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-energy-$1.log)
+ printf "Tbench-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_TBENCH.result
+
+ # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds.
+ # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # T/t T/t T
+ # --- = --- = ---
+ # P E/t E
+ # with unit given by MB per joule.
+ ppw=`echo "scale=4;($TIME_LIMIT-1)*$avg_tp/$avg_en" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_tbench "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_tp $avg_en $ppw
+}
+
+# $1: base scaling_driver $2: base governor $3: comparative scaling_driver $4: comparative governor
+__calc_comp_tbench()
+{
+ base=`grep "$1-$2" $OUTFILE_TBENCH.csv | grep "Average"`
+ comp=`grep "$3-$4" $OUTFILE_TBENCH.csv | grep "Average"`
+
+ if [ -n "$base" -a -n "$comp" ]; then
+ printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result
+ printf "Tbench comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_TBENCH.result
+ printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result
+
+ # get the base values
+ des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//`
+ freq_base=`echo "$base" | awk '{print $4}' | sed s/,//`
+ load_base=`echo "$base" | awk '{print $5}' | sed s/,//`
+ perf_base=`echo "$base" | awk '{print $6}' | sed s/,//`
+ energy_base=`echo "$base" | awk '{print $7}' | sed s/,//`
+ ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//`
+
+ # get the comparative values
+ des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//`
+ freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//`
+ load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//`
+ perf_comp=`echo "$comp" | awk '{print $6}' | sed s/,//`
+ energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//`
+ ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//`
+
+ # compare the base and comp values
+ des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ perf_drop=`echo "scale=4;($perf_comp-$perf_base)*100/$perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 perf base: $perf_base comprison: $perf_comp percent: $perf_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ store_csv_tbench "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$perf_drop" "$energy_drop" "$ppw_drop"
+ fi
+}
+
+# calculate the comparison(%)
+calc_comp_tbench()
+{
+ # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[0]} ${tbench_governors[1]}
+
+ # amd-pstate-ondemand VS amd-pstate-schedutil
+ __calc_comp_tbench ${all_scaling_names[1]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[1]}
+
+ # acpi-cpufreq-ondemand VS amd-pstate-ondemand
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[0]}
+
+ # acpi-cpufreq-schedutil VS amd-pstate-schedutil
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[1]} ${all_scaling_names[1]} ${tbench_governors[1]}
+}
+
+# $1: file_name, $2: title, $3: ylable, $4: column
+plot_png_tbench()
+{
+ # all_scaling_names[1] all_scaling_names[0] flag
+ # amd-pstate acpi-cpufreq
+ # N N 0
+ # N Y 1
+ # Y N 2
+ # Y Y 3
+ ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ flag=0
+ else
+ flag=1
+ fi
+ else
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ flag=2
+ else
+ flag=3
+ fi
+ fi
+
+ gnuplot << EOF
+ set term png
+ set output "$1"
+
+ set title "$2"
+ set xlabel "Test Cycles (round)"
+ set ylabel "$3"
+
+ set grid
+ set style data histogram
+ set style fill solid 0.5 border
+ set boxwidth 0.8
+
+ if ($flag == 1) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}"
+ } else {
+ if ($flag == 2) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}"
+ } else {
+ if ($flag == 3 ) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}"
+ }
+ }
+ }
+ quit
+EOF
+}
+
+amd_pstate_tbench()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running tbench ***"
+ printf "\n---------------------------------------------\n"
+
+ pre_clear_tbench
+
+ get_lines_csv_tbench "Governor"
+ if [ $? -eq 0 ]; then
+ # add titles and unit for csv file
+ store_csv_tbench "Governor" "Round" "Des-perf" "Freq" "Load" "Performance" "Energy" "Performance Per Watt"
+ store_csv_tbench "Unit" "" "" "GHz" "" "MB/s" "J" "MB/J"
+ fi
+
+ backup_governor
+ for governor in ${tbench_governors[*]} ; do
+ printf "\nSpecified governor is $governor\n\n"
+ switch_governor $governor
+ loop_tbench $governor
+ gather_tbench $governor
+ done
+ restore_governor
+
+ plot_png_tbench "tbench_perfromance.png" "Tbench Benchmark Performance" "Performance" 6
+ plot_png_tbench "tbench_energy.png" "Tbench Benchmark Energy" "Energy (J)" 7
+ plot_png_tbench "tbench_ppw.png" "Tbench Benchmark Performance Per Watt" "Performance Per Watt (MB/J)" 8
+
+ calc_comp_tbench
+
+ post_clear_tbench
+}
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 2c9d012797a7..c4c72ee2ef55 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth fp mte
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs
else
ARM64_SUBTARGETS :=
endif
@@ -17,16 +17,11 @@ top_srcdir = $(realpath ../../../../)
# Additional include paths needed by kselftest.h and local headers
CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
-# Guessing where the Kernel headers could have been installed
-# depending on ENV config
-ifeq ($(KBUILD_OUTPUT),)
-khdr_dir = $(top_srcdir)/usr/include
-else
-# the KSFT preferred location when KBUILD_OUTPUT is set
-khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
-endif
+CFLAGS += $(KHDR_INCLUDES)
+
+CFLAGS += -I$(top_srcdir)/tools/include
-CFLAGS += -I$(khdr_dir)
+OUTPUT ?= $(CURDIR)
export CFLAGS
export top_srcdir
@@ -51,7 +46,7 @@ run_tests: all
done
# Avoid any output on non arm64 on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
new file mode 100644
index 000000000000..44f8b80f37e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -0,0 +1,4 @@
+hwcap
+ptrace
+syscall-abi
+tpidr2
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
new file mode 100644
index 000000000000..483488f8c2ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+
+TEST_GEN_PROGS := hwcap ptrace syscall-abi tpidr2
+
+include ../../lib.mk
+
+$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
+
+# Build with nolibc since TPIDR2 is intended to be actively managed by
+# libc and we're trying to test the functionality that it depends on here.
+$(OUTPUT)/tpidr2: tpidr2.c
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -static -include ../../../../include/nolibc/nolibc.h \
+ -I../.. -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
new file mode 100644
index 000000000000..c41640f18e4e
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include <linux/auxvec.h>
+
+#include "kselftest.h"
+
+#define TESTS_PER_HWCAP 3
+
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
+/*
+ * Function expected to generate exception when the feature is not
+ * supported and return when it is supported. If the specific exception
+ * is generated then the handler must be able to skip over the
+ * instruction safely.
+ *
+ * Note that it is expected that for many architecture extensions
+ * there are no specific traps due to no architecture state being
+ * added so we may not fault if running on a kernel which doesn't know
+ * to add the hwcap.
+ */
+typedef void (*sig_fn)(void);
+
+static void aes_sigill(void)
+{
+ /* AESE V0.16B, V0.16B */
+ asm volatile(".inst 0x4e284800" : : : );
+}
+
+static void atomics_sigill(void)
+{
+ /* STADD W0, [SP] */
+ asm volatile(".inst 0xb82003ff" : : : );
+}
+
+static void cmpbr_sigill(void)
+{
+ /* Not implemented, too complicated and unreliable anyway */
+}
+
+static void crc32_sigill(void)
+{
+ /* CRC32W W0, W0, W1 */
+ asm volatile(".inst 0x1ac14800" : : : );
+}
+
+static void cssc_sigill(void)
+{
+ /* CNT x0, x0 */
+ asm volatile(".inst 0xdac01c00" : : : "x0");
+}
+
+static void f8cvt_sigill(void)
+{
+ /* FSCALE V0.4H, V0.4H, V0.4H */
+ asm volatile(".inst 0x2ec03c00");
+}
+
+static void f8dp2_sigill(void)
+{
+ /* FDOT V0.4H, V0.4H, V0.5H */
+ asm volatile(".inst 0xe40fc00");
+}
+
+static void f8dp4_sigill(void)
+{
+ /* FDOT V0.2S, V0.2S, V0.2S */
+ asm volatile(".inst 0xe00fc00");
+}
+
+static void f8fma_sigill(void)
+{
+ /* FMLALB V0.8H, V0.16B, V0.16B */
+ asm volatile(".inst 0xec0fc00");
+}
+
+static void f8mm4_sigill(void)
+{
+ /* FMMLA V0.4SH, V0.16B, V0.16B */
+ asm volatile(".inst 0x6e00ec00");
+}
+
+static void f8mm8_sigill(void)
+{
+ /* FMMLA V0.4S, V0.16B, V0.16B */
+ asm volatile(".inst 0x6e80ec00");
+}
+
+static void faminmax_sigill(void)
+{
+ /* FAMIN V0.4H, V0.4H, V0.4H */
+ asm volatile(".inst 0x2ec01c00");
+}
+
+static void fp_sigill(void)
+{
+ asm volatile("fmov s0, #1");
+}
+
+static void fpmr_sigill(void)
+{
+ asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0");
+}
+
+static void fprcvt_sigill(void)
+{
+ /* FCVTAS S0, H0 */
+ asm volatile(".inst 0x1efa0000");
+}
+
+static void gcs_sigill(void)
+{
+ unsigned long *gcspr;
+
+ asm volatile(
+ "mrs %0, S3_3_C2_C5_1"
+ : "=r" (gcspr)
+ :
+ : "cc");
+}
+
+static void ilrcpc_sigill(void)
+{
+ /* LDAPUR W0, [SP, #8] */
+ asm volatile(".inst 0x994083e0" : : : );
+}
+
+static void jscvt_sigill(void)
+{
+ /* FJCVTZS W0, D0 */
+ asm volatile(".inst 0x1e7e0000" : : : );
+}
+
+static void lrcpc_sigill(void)
+{
+ /* LDAPR W0, [SP, #0] */
+ asm volatile(".inst 0xb8bfc3e0" : : : );
+}
+
+static void lse128_sigill(void)
+{
+ u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 };
+ register u64 *memp asm ("x0") = mem;
+ register u64 val0 asm ("x1") = 5;
+ register u64 val1 asm ("x2") = 4;
+
+ /* SWPP X1, X2, [X0] */
+ asm volatile(".inst 0x19228001"
+ : "+r" (memp), "+r" (val0), "+r" (val1)
+ :
+ : "cc", "memory");
+}
+
+static void lsfe_sigill(void)
+{
+ float __attribute__ ((aligned (16))) mem;
+ register float *memp asm ("x0") = &mem;
+
+ /* STFADD H0, [X0] */
+ asm volatile(".inst 0x7c20801f"
+ : "+r" (memp)
+ :
+ : "memory");
+}
+
+static void lut_sigill(void)
+{
+ /* LUTI2 V0.16B, { V0.16B }, V[0] */
+ asm volatile(".inst 0x4e801000");
+}
+
+static void mops_sigill(void)
+{
+ char dst[1], src[1];
+ register char *dstp asm ("x0") = dst;
+ register char *srcp asm ("x1") = src;
+ register long size asm ("x2") = 1;
+
+ /* CPYP [x0]!, [x1]!, x2! */
+ asm volatile(".inst 0x1d010440"
+ : "+r" (dstp), "+r" (srcp), "+r" (size)
+ :
+ : "cc", "memory");
+}
+
+static void pmull_sigill(void)
+{
+ /* PMULL V0.1Q, V0.1D, V0.1D */
+ asm volatile(".inst 0x0ee0e000" : : : );
+}
+
+static void poe_sigill(void)
+{
+ /* mrs x0, POR_EL0 */
+ asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0");
+}
+
+static void rng_sigill(void)
+{
+ asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
+}
+
+static void sha1_sigill(void)
+{
+ /* SHA1H S0, S0 */
+ asm volatile(".inst 0x5e280800" : : : );
+}
+
+static void sha2_sigill(void)
+{
+ /* SHA256H Q0, Q0, V0.4S */
+ asm volatile(".inst 0x5e004000" : : : );
+}
+
+static void sha512_sigill(void)
+{
+ /* SHA512H Q0, Q0, V0.2D */
+ asm volatile(".inst 0xce608000" : : : );
+}
+
+static void sme_sigill(void)
+{
+ /* RDSVL x0, #0 */
+ asm volatile(".inst 0x04bf5800" : : : "x0");
+}
+
+static void sme2_sigill(void)
+{
+ /* SMSTART ZA */
+ asm volatile("msr S0_3_C4_C5_3, xzr" : : : );
+
+ /* ZERO ZT0 */
+ asm volatile(".inst 0xc0480001" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme2p1_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */
+ asm volatile(".inst 0xc120C000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme2p2_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* UXTB Z0.D, P0/Z, Z0.D */
+ asm volatile(".inst 0x4c1a000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme_aes_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* AESD z0.b, z0.b, z0.b */
+ asm volatile(".inst 0x4522e400" : : : "z0");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme_sbitperm_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* BDEP Z0.B, Z0.B, Z0.B */
+ asm volatile(".inst 0x4500b400" : : : "z0");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smei16i32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+ asm volatile(".inst 0xa0800000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smebi32i32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+ asm volatile(".inst 0x80800008" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smeb16b16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+ asm volatile(".inst 0xC1E41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef16f16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */
+ asm volatile(".inst 0xc1a41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef8f16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT ZA.H[W0, 0], Z0.B-Z1.B, Z0.B-Z1.B */
+ asm volatile(".inst 0xc1a01020" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef8f32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT ZA.S[W0, 0], { Z0.B-Z1.B }, Z0.B[0] */
+ asm volatile(".inst 0xc1500038" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smelutv2_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* LUTI4 { Z0.B-Z3.B }, ZT0, { Z0-Z1 } */
+ asm volatile(".inst 0xc08b0000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8dp2_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT Z0.H, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0x64204400" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8dp4_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT Z0.S, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0xc1a41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8fma_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FMLALB Z0.8H, Z0.B, Z0.B */
+ asm volatile(".inst 0x64205000");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesfexpa_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FEXPA Z0.D, Z0.D */
+ asm volatile(".inst 0x04e0b800");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesmop4_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* SMOP4A ZA0.S, Z0.B, { Z0.B - Z1.B } */
+ asm volatile(".inst 0x80108000");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smestmop_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* STMOPA ZA0.S, { Z0.H - Z1.H }, Z0.H, Z20[0] */
+ asm volatile(".inst 0x80408008");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sve_sigill(void)
+{
+ /* RDVL x0, #0 */
+ asm volatile(".inst 0x04bf5000" : : : "x0");
+}
+
+static void sve2_sigill(void)
+{
+ /* SQABS Z0.b, P0/M, Z0.B */
+ asm volatile(".inst 0x4408A000" : : : "z0");
+}
+
+static void sve2p1_sigill(void)
+{
+ /* BFADD Z0.H, Z0.H, Z0.H */
+ asm volatile(".inst 0x65000000" : : : "z0");
+}
+
+static void sve2p2_sigill(void)
+{
+ /* NOT Z0.D, P0/Z, Z0.D */
+ asm volatile(".inst 0x4cea000" : : : "z0");
+}
+
+static void sveaes_sigill(void)
+{
+ /* AESD z0.b, z0.b, z0.b */
+ asm volatile(".inst 0x4522e400" : : : "z0");
+}
+
+static void sveaes2_sigill(void)
+{
+ /* AESD {Z0.B - Z1.B }, { Z0.B - Z1.B }, Z0.Q */
+ asm volatile(".inst 0x4522ec00" : : : "z0");
+}
+
+static void sveb16b16_sigill(void)
+{
+ /* BFADD Z0.H, Z0.H, Z0.H */
+ asm volatile(".inst 0x65000000" : : : );
+}
+
+static void svebfscale_sigill(void)
+{
+ /* BFSCALE Z0.H, P0/M, Z0.H, Z0.H */
+ asm volatile(".inst 0x65098000" : : : "z0");
+}
+
+static void svef16mm_sigill(void)
+{
+ /* FMMLA Z0.S, Z0.H, Z0.H */
+ asm volatile(".inst 0x6420e400");
+}
+
+static void svepmull_sigill(void)
+{
+ /* PMULLB Z0.Q, Z0.D, Z0.D */
+ asm volatile(".inst 0x45006800" : : : "z0");
+}
+
+static void svebitperm_sigill(void)
+{
+ /* BDEP Z0.B, Z0.B, Z0.B */
+ asm volatile(".inst 0x4500b400" : : : "z0");
+}
+
+static void svesha3_sigill(void)
+{
+ /* EOR3 Z0.D, Z0.D, Z0.D, Z0.D */
+ asm volatile(".inst 0x4203800" : : : "z0");
+}
+
+static void sveeltperm_sigill(void)
+{
+ /* COMPACT Z0.B, P0, Z0.B */
+ asm volatile(".inst 0x5218000" : : : "x0");
+}
+
+static void svesm4_sigill(void)
+{
+ /* SM4E Z0.S, Z0.S, Z0.S */
+ asm volatile(".inst 0x4523e000" : : : "z0");
+}
+
+static void svei8mm_sigill(void)
+{
+ /* USDOT Z0.S, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0x44a01800" : : : "z0");
+}
+
+static void svef32mm_sigill(void)
+{
+ /* FMMLA Z0.S, Z0.S, Z0.S */
+ asm volatile(".inst 0x64a0e400" : : : "z0");
+}
+
+static void svef64mm_sigill(void)
+{
+ /* FMMLA Z0.D, Z0.D, Z0.D */
+ asm volatile(".inst 0x64e0e400" : : : "z0");
+}
+
+static void svebf16_sigill(void)
+{
+ /* BFCVT Z0.H, P0/M, Z0.S */
+ asm volatile(".inst 0x658aa000" : : : "z0");
+}
+
+static void hbc_sigill(void)
+{
+ /* BC.EQ +4 */
+ asm volatile("cmp xzr, xzr\n"
+ ".inst 0x54000030" : : : "cc");
+}
+
+static void uscat_sigbus(void)
+{
+ /* unaligned atomic access */
+ asm volatile("ADD x1, sp, #2" : : : );
+ /* STADD W0, [X1] */
+ asm volatile(".inst 0xb820003f" : : : );
+}
+
+static void lrcpc3_sigill(void)
+{
+ int data[2] = { 1, 2 };
+
+ register int *src asm ("x0") = data;
+ register int data0 asm ("w2") = 0;
+ register int data1 asm ("w3") = 0;
+
+ /* LDIAPP w2, w3, [x0] */
+ asm volatile(".inst 0x99431802"
+ : "=r" (data0), "=r" (data1) : "r" (src) :);
+}
+
+static const struct hwcap_data {
+ const char *name;
+ unsigned long at_hwcap;
+ unsigned long hwcap_bit;
+ const char *cpuinfo;
+ sig_fn sigill_fn;
+ bool sigill_reliable;
+ sig_fn sigbus_fn;
+ bool sigbus_reliable;
+} hwcaps[] = {
+ {
+ .name = "AES",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_AES,
+ .cpuinfo = "aes",
+ .sigill_fn = aes_sigill,
+ },
+ {
+ .name = "CMPBR",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_CMPBR,
+ .cpuinfo = "cmpbr",
+ .sigill_fn = cmpbr_sigill,
+ },
+ {
+ .name = "CRC32",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_CRC32,
+ .cpuinfo = "crc32",
+ .sigill_fn = crc32_sigill,
+ },
+ {
+ .name = "CSSC",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_CSSC,
+ .cpuinfo = "cssc",
+ .sigill_fn = cssc_sigill,
+ },
+ {
+ .name = "F8CVT",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8CVT,
+ .cpuinfo = "f8cvt",
+ .sigill_fn = f8cvt_sigill,
+ },
+ {
+ .name = "F8DP4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8DP4,
+ .cpuinfo = "f8dp4",
+ .sigill_fn = f8dp4_sigill,
+ },
+ {
+ .name = "F8DP2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8DP2,
+ .cpuinfo = "f8dp2",
+ .sigill_fn = f8dp2_sigill,
+ },
+ {
+ .name = "F8E5M2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8E5M2,
+ .cpuinfo = "f8e5m2",
+ },
+ {
+ .name = "F8E4M3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8E4M3,
+ .cpuinfo = "f8e4m3",
+ },
+ {
+ .name = "F8FMA",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8FMA,
+ .cpuinfo = "f8fma",
+ .sigill_fn = f8fma_sigill,
+ },
+ {
+ .name = "F8MM8",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_F8MM8,
+ .cpuinfo = "f8mm8",
+ .sigill_fn = f8mm8_sigill,
+ },
+ {
+ .name = "F8MM4",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_F8MM4,
+ .cpuinfo = "f8mm4",
+ .sigill_fn = f8mm4_sigill,
+ },
+ {
+ .name = "FAMINMAX",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_FAMINMAX,
+ .cpuinfo = "faminmax",
+ .sigill_fn = faminmax_sigill,
+ },
+ {
+ .name = "FP",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_FP,
+ .cpuinfo = "fp",
+ .sigill_fn = fp_sigill,
+ },
+ {
+ .name = "FPMR",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_FPMR,
+ .cpuinfo = "fpmr",
+ .sigill_fn = fpmr_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "FPRCVT",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_FPRCVT,
+ .cpuinfo = "fprcvt",
+ .sigill_fn = fprcvt_sigill,
+ },
+ {
+ .name = "GCS",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_GCS,
+ .cpuinfo = "gcs",
+ .sigill_fn = gcs_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "JSCVT",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_JSCVT,
+ .cpuinfo = "jscvt",
+ .sigill_fn = jscvt_sigill,
+ },
+ {
+ .name = "LRCPC",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_LRCPC,
+ .cpuinfo = "lrcpc",
+ .sigill_fn = lrcpc_sigill,
+ },
+ {
+ .name = "LRCPC2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_ILRCPC,
+ .cpuinfo = "ilrcpc",
+ .sigill_fn = ilrcpc_sigill,
+ },
+ {
+ .name = "LRCPC3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LRCPC3,
+ .cpuinfo = "lrcpc3",
+ .sigill_fn = lrcpc3_sigill,
+ },
+ {
+ .name = "LSE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_ATOMICS,
+ .cpuinfo = "atomics",
+ .sigill_fn = atomics_sigill,
+ },
+ {
+ .name = "LSE2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_USCAT,
+ .cpuinfo = "uscat",
+ .sigill_fn = atomics_sigill,
+ .sigbus_fn = uscat_sigbus,
+ .sigbus_reliable = true,
+ },
+ {
+ .name = "LSE128",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LSE128,
+ .cpuinfo = "lse128",
+ .sigill_fn = lse128_sigill,
+ },
+ {
+ .name = "LSFE",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_LSFE,
+ .cpuinfo = "lsfe",
+ .sigill_fn = lsfe_sigill,
+ },
+ {
+ .name = "LUT",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LUT,
+ .cpuinfo = "lut",
+ .sigill_fn = lut_sigill,
+ },
+ {
+ .name = "MOPS",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_MOPS,
+ .cpuinfo = "mops",
+ .sigill_fn = mops_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "PMULL",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_PMULL,
+ .cpuinfo = "pmull",
+ .sigill_fn = pmull_sigill,
+ },
+ {
+ .name = "POE",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_POE,
+ .cpuinfo = "poe",
+ .sigill_fn = poe_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "RNG",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_RNG,
+ .cpuinfo = "rng",
+ .sigill_fn = rng_sigill,
+ },
+ {
+ .name = "RPRFM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_RPRFM,
+ .cpuinfo = "rprfm",
+ },
+ {
+ .name = "SHA1",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA1,
+ .cpuinfo = "sha1",
+ .sigill_fn = sha1_sigill,
+ },
+ {
+ .name = "SHA2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA2,
+ .cpuinfo = "sha2",
+ .sigill_fn = sha2_sigill,
+ },
+ {
+ .name = "SHA512",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA512,
+ .cpuinfo = "sha512",
+ .sigill_fn = sha512_sigill,
+ },
+ {
+ .name = "SME",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME,
+ .cpuinfo = "sme",
+ .sigill_fn = sme_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SME2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME2,
+ .cpuinfo = "sme2",
+ .sigill_fn = sme2_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SME 2.1",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME2P1,
+ .cpuinfo = "sme2p1",
+ .sigill_fn = sme2p1_sigill,
+ },
+ {
+ .name = "SME 2.2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME2P2,
+ .cpuinfo = "sme2p2",
+ .sigill_fn = sme2p2_sigill,
+ },
+ {
+ .name = "SME AES",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_AES,
+ .cpuinfo = "smeaes",
+ .sigill_fn = sme_aes_sigill,
+ },
+ {
+ .name = "SME I16I32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_I16I32,
+ .cpuinfo = "smei16i32",
+ .sigill_fn = smei16i32_sigill,
+ },
+ {
+ .name = "SME BI32I32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_BI32I32,
+ .cpuinfo = "smebi32i32",
+ .sigill_fn = smebi32i32_sigill,
+ },
+ {
+ .name = "SME B16B16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_B16B16,
+ .cpuinfo = "smeb16b16",
+ .sigill_fn = smeb16b16_sigill,
+ },
+ {
+ .name = "SME F16F16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F16F16,
+ .cpuinfo = "smef16f16",
+ .sigill_fn = smef16f16_sigill,
+ },
+ {
+ .name = "SME F8F16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F8F16,
+ .cpuinfo = "smef8f16",
+ .sigill_fn = smef8f16_sigill,
+ },
+ {
+ .name = "SME F8F32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F8F32,
+ .cpuinfo = "smef8f32",
+ .sigill_fn = smef8f32_sigill,
+ },
+ {
+ .name = "SME LUTV2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_LUTV2,
+ .cpuinfo = "smelutv2",
+ .sigill_fn = smelutv2_sigill,
+ },
+ {
+ .name = "SME SBITPERM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SBITPERM,
+ .cpuinfo = "smesbitperm",
+ .sigill_fn = sme_sbitperm_sigill,
+ },
+ {
+ .name = "SME SF8FMA",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8FMA,
+ .cpuinfo = "smesf8fma",
+ .sigill_fn = smesf8fma_sigill,
+ },
+ {
+ .name = "SME SF8DP2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8DP2,
+ .cpuinfo = "smesf8dp2",
+ .sigill_fn = smesf8dp2_sigill,
+ },
+ {
+ .name = "SME SF8DP4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8DP4,
+ .cpuinfo = "smesf8dp4",
+ .sigill_fn = smesf8dp4_sigill,
+ },
+ {
+ .name = "SME SFEXPA",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SFEXPA,
+ .cpuinfo = "smesfexpa",
+ .sigill_fn = smesfexpa_sigill,
+ },
+ {
+ .name = "SME SMOP4",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SMOP4,
+ .cpuinfo = "smesmop4",
+ .sigill_fn = smesmop4_sigill,
+ },
+ {
+ .name = "SME STMOP",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_STMOP,
+ .cpuinfo = "smestmop",
+ .sigill_fn = smestmop_sigill,
+ },
+ {
+ .name = "SVE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE,
+ .cpuinfo = "sve",
+ .sigill_fn = sve_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SVE 2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE2,
+ .cpuinfo = "sve2",
+ .sigill_fn = sve2_sigill,
+ },
+ {
+ .name = "SVE 2.1",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE2P1,
+ .cpuinfo = "sve2p1",
+ .sigill_fn = sve2p1_sigill,
+ },
+ {
+ .name = "SVE 2.2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE2P2,
+ .cpuinfo = "sve2p2",
+ .sigill_fn = sve2p2_sigill,
+ },
+ {
+ .name = "SVE AES",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEAES,
+ .cpuinfo = "sveaes",
+ .sigill_fn = sveaes_sigill,
+ },
+ {
+ .name = "SVE AES2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_AES2,
+ .cpuinfo = "sveaes2",
+ .sigill_fn = sveaes2_sigill,
+ },
+ {
+ .name = "SVE BFSCALE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_BFSCALE,
+ .cpuinfo = "svebfscale",
+ .sigill_fn = svebfscale_sigill,
+ },
+ {
+ .name = "SVE ELTPERM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_ELTPERM,
+ .cpuinfo = "sveeltperm",
+ .sigill_fn = sveeltperm_sigill,
+ },
+ {
+ .name = "SVE F16MM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_F16MM,
+ .cpuinfo = "svef16mm",
+ .sigill_fn = svef16mm_sigill,
+ },
+ {
+ .name = "SVE2 B16B16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE_B16B16,
+ .cpuinfo = "sveb16b16",
+ .sigill_fn = sveb16b16_sigill,
+ },
+ {
+ .name = "SVE2 PMULL",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEPMULL,
+ .cpuinfo = "svepmull",
+ .sigill_fn = svepmull_sigill,
+ },
+ {
+ .name = "SVE2 BITPERM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEBITPERM,
+ .cpuinfo = "svebitperm",
+ .sigill_fn = svebitperm_sigill,
+ },
+ {
+ .name = "SVE2 SHA3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVESHA3,
+ .cpuinfo = "svesha3",
+ .sigill_fn = svesha3_sigill,
+ },
+ {
+ .name = "SVE2 SM4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVESM4,
+ .cpuinfo = "svesm4",
+ .sigill_fn = svesm4_sigill,
+ },
+ {
+ .name = "SVE2 I8MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEI8MM,
+ .cpuinfo = "svei8mm",
+ .sigill_fn = svei8mm_sigill,
+ },
+ {
+ .name = "SVE2 F32MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEF32MM,
+ .cpuinfo = "svef32mm",
+ .sigill_fn = svef32mm_sigill,
+ },
+ {
+ .name = "SVE2 F64MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEF64MM,
+ .cpuinfo = "svef64mm",
+ .sigill_fn = svef64mm_sigill,
+ },
+ {
+ .name = "SVE2 BF16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEBF16,
+ .cpuinfo = "svebf16",
+ .sigill_fn = svebf16_sigill,
+ },
+ {
+ .name = "SVE2 EBF16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE_EBF16,
+ .cpuinfo = "sveebf16",
+ },
+ {
+ .name = "HBC",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_HBC,
+ .cpuinfo = "hbc",
+ .sigill_fn = hbc_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "MTE_FAR",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_FAR,
+ .cpuinfo = "mtefar",
+ },
+ {
+ .name = "MTE_STOREONLY",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_STORE_ONLY,
+ .cpuinfo = "mtestoreonly",
+ },
+};
+
+typedef void (*sighandler_fn)(int, siginfo_t *, void *);
+
+#define DEF_SIGHANDLER_FUNC(SIG, NUM) \
+static bool seen_##SIG; \
+static void handle_##SIG(int sig, siginfo_t *info, void *context) \
+{ \
+ ucontext_t *uc = context; \
+ \
+ seen_##SIG = true; \
+ /* Skip over the offending instruction */ \
+ uc->uc_mcontext.pc += 4; \
+}
+
+DEF_SIGHANDLER_FUNC(sigill, SIGILL);
+DEF_SIGHANDLER_FUNC(sigbus, SIGBUS);
+
+bool cpuinfo_present(const char *name)
+{
+ FILE *f;
+ char buf[2048], name_space[30], name_newline[30];
+ char *s;
+
+ /*
+ * The feature should appear with a leading space and either a
+ * trailing space or a newline.
+ */
+ snprintf(name_space, sizeof(name_space), " %s ", name);
+ snprintf(name_newline, sizeof(name_newline), " %s\n", name);
+
+ f = fopen("/proc/cpuinfo", "r");
+ if (!f) {
+ ksft_print_msg("Failed to open /proc/cpuinfo\n");
+ return false;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ /* Features: line? */
+ if (strncmp(buf, "Features\t:", strlen("Features\t:")) != 0)
+ continue;
+
+ /* All CPUs should be symmetric, don't read any more */
+ fclose(f);
+
+ s = strstr(buf, name_space);
+ if (s)
+ return true;
+ s = strstr(buf, name_newline);
+ if (s)
+ return true;
+
+ return false;
+ }
+
+ ksft_print_msg("Failed to find Features in /proc/cpuinfo\n");
+ fclose(f);
+ return false;
+}
+
+static int install_sigaction(int signum, sighandler_fn handler)
+{
+ int ret;
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(signum, &sa, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ return ret;
+}
+
+static void uninstall_sigaction(int signum)
+{
+ if (sigaction(signum, NULL, NULL) < 0)
+ ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n",
+ strerror(errno), errno);
+}
+
+#define DEF_INST_RAISE_SIG(SIG, NUM) \
+static bool inst_raise_##SIG(const struct hwcap_data *hwcap, \
+ bool have_hwcap) \
+{ \
+ if (!hwcap->SIG##_fn) { \
+ ksft_test_result_skip(#SIG"_%s\n", hwcap->name); \
+ /* assume that it would raise exception in default */ \
+ return true; \
+ } \
+ \
+ install_sigaction(NUM, handle_##SIG); \
+ \
+ seen_##SIG = false; \
+ hwcap->SIG##_fn(); \
+ \
+ if (have_hwcap) { \
+ /* Should be able to use the extension */ \
+ ksft_test_result(!seen_##SIG, \
+ #SIG"_%s\n", hwcap->name); \
+ } else if (hwcap->SIG##_reliable) { \
+ /* Guaranteed a SIGNAL */ \
+ ksft_test_result(seen_##SIG, \
+ #SIG"_%s\n", hwcap->name); \
+ } else { \
+ /* Missing SIGNAL might be fine */ \
+ ksft_print_msg(#SIG"_%sreported for %s\n", \
+ seen_##SIG ? "" : "not ", \
+ hwcap->name); \
+ ksft_test_result_skip(#SIG"_%s\n", \
+ hwcap->name); \
+ } \
+ \
+ uninstall_sigaction(NUM); \
+ return seen_##SIG; \
+}
+
+DEF_INST_RAISE_SIG(sigill, SIGILL);
+DEF_INST_RAISE_SIG(sigbus, SIGBUS);
+
+int main(void)
+{
+ int i;
+ const struct hwcap_data *hwcap;
+ bool have_cpuinfo, have_hwcap, raise_sigill;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
+
+ for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+ hwcap = &hwcaps[i];
+
+ have_hwcap = getauxval(hwcap->at_hwcap) & hwcap->hwcap_bit;
+ have_cpuinfo = cpuinfo_present(hwcap->cpuinfo);
+
+ if (have_hwcap)
+ ksft_print_msg("%s present\n", hwcap->name);
+
+ ksft_test_result(have_hwcap == have_cpuinfo,
+ "cpuinfo_match_%s\n", hwcap->name);
+
+ /*
+ * Testing for SIGBUS only makes sense after make sure
+ * that the instruction does not cause a SIGILL signal.
+ */
+ raise_sigill = inst_raise_sigill(hwcap, have_hwcap);
+ if (!raise_sigill)
+ inst_raise_sigbus(hwcap, have_hwcap);
+ else
+ ksft_test_result_skip("sigbus_%s\n", hwcap->name);
+ }
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
new file mode 100644
index 000000000000..0e46ac21c81d
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "kselftest.h"
+
+#define EXPECTED_TESTS 11
+
+#define MAX_TPIDRS 2
+
+static bool have_sme(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+static void test_tpidr(pid_t child)
+{
+ uint64_t read_val[MAX_TPIDRS];
+ uint64_t write_val[MAX_TPIDRS];
+ struct iovec read_iov, write_iov;
+ bool test_tpidr2 = false;
+ int ret, i;
+
+ read_iov.iov_base = read_val;
+ write_iov.iov_base = write_val;
+
+ /* Should be able to read a single TPIDR... */
+ read_iov.iov_len = sizeof(uint64_t);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ ksft_test_result(ret == 0, "read_tpidr_one\n");
+
+ /* ...write a new value.. */
+ write_iov.iov_len = sizeof(uint64_t);
+ write_val[0] = read_val[0] + 1;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+ ksft_test_result(ret == 0, "write_tpidr_one\n");
+
+ /* ...then read it back */
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ ksft_test_result(ret == 0 && write_val[0] == read_val[0],
+ "verify_tpidr_one\n");
+
+ /* If we have TPIDR2 we should be able to read it */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ if (ret == 0) {
+ /* If we have SME there should be two TPIDRs */
+ if (read_iov.iov_len >= sizeof(read_val))
+ test_tpidr2 = true;
+
+ if (have_sme() && test_tpidr2) {
+ ksft_test_result(test_tpidr2, "count_tpidrs\n");
+ } else {
+ ksft_test_result(read_iov.iov_len % sizeof(uint64_t) == 0,
+ "count_tpidrs\n");
+ }
+ } else {
+ ksft_test_result_fail("count_tpidrs\n");
+ }
+
+ if (test_tpidr2) {
+ /* Try to write new values to all known TPIDRs... */
+ write_iov.iov_len = sizeof(write_val);
+ for (i = 0; i < MAX_TPIDRS; i++)
+ write_val[i] = read_val[i] + 1;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+ ksft_test_result(ret == 0 &&
+ write_iov.iov_len == sizeof(write_val),
+ "tpidr2_write\n");
+
+ /* ...then read them back */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+
+ if (have_sme()) {
+ /* Should read back the written value */
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ memcmp(read_val, write_val,
+ sizeof(read_val)) == 0,
+ "tpidr2_read\n");
+ } else {
+ /* TPIDR2 should read as zero */
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ read_val[0] == write_val[0] &&
+ read_val[1] == 0,
+ "tpidr2_read\n");
+ }
+
+ /* Writing only TPIDR... */
+ write_iov.iov_len = sizeof(uint64_t);
+ memcpy(write_val, read_val, sizeof(read_val));
+ write_val[0] += 1;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+ if (ret == 0) {
+ /* ...should leave TPIDR2 untouched */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS,
+ &read_iov);
+
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ memcmp(read_val, write_val,
+ sizeof(read_val)) == 0,
+ "write_tpidr_only\n");
+ } else {
+ ksft_test_result_fail("write_tpidr_only\n");
+ }
+ } else {
+ ksft_test_result_skip("tpidr2_write\n");
+ ksft_test_result_skip("tpidr2_read\n");
+ ksft_test_result_skip("write_tpidr_only\n");
+ }
+}
+
+static void test_hw_debug(pid_t child, int type, const char *type_name)
+{
+ struct user_hwdebug_state state;
+ struct iovec iov;
+ int slots, arch, ret;
+
+ iov.iov_len = sizeof(state);
+ iov.iov_base = &state;
+
+ /* Should be able to read the values */
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ ksft_test_result(ret == 0, "read_%s\n", type_name);
+
+ if (ret == 0) {
+ /* Low 8 bits is the number of slots, next 4 bits the arch */
+ slots = state.dbg_info & 0xff;
+ arch = (state.dbg_info >> 8) & 0xf;
+
+ ksft_print_msg("%s version %d with %d slots\n", type_name,
+ arch, slots);
+
+ /* Zero is not currently architecturally valid */
+ ksft_test_result(arch, "%s_arch_set\n", type_name);
+ } else {
+ ksft_test_result_skip("%s_arch_set\n", type_name);
+ }
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_perror("PTRACE_TRACEME");
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_perror("raise(SIGSTOP)");
+
+ return EXIT_SUCCESS;
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ test_tpidr(child);
+ test_hw_debug(child, NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH");
+ test_hw_debug(child, NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK");
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
new file mode 100644
index 000000000000..66ab2e0bae5f
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+//
+// Assembly portion of the syscall ABI test
+
+//
+// Load values from memory into registers, invoke a syscall and save the
+// register values back to memory for later checking. The syscall to be
+// invoked is configured in x8 of the input GPR data.
+//
+// x0: SVE VL, 0 for FP only
+// x1: SME VL
+//
+// GPRs: gpr_in, gpr_out
+// FPRs: fpr_in, fpr_out
+// Zn: z_in, z_out
+// Pn: p_in, p_out
+// FFR: ffr_in, ffr_out
+// ZA: za_in, za_out
+// SVCR: svcr_in, svcr_out
+
+#include "syscall-abi.h"
+
+.arch_extension sve
+
+#define ID_AA64SMFR0_EL1_SMEver_SHIFT 56
+#define ID_AA64SMFR0_EL1_SMEver_WIDTH 4
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ .inst 0xe11f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ .inst 0xe13f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+.globl do_syscall
+do_syscall:
+ // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
+ stp x29, x30, [sp, #-112]!
+ mov x29, sp
+ stp x0, x1, [sp, #16]
+ stp x19, x20, [sp, #32]
+ stp x21, x22, [sp, #48]
+ stp x23, x24, [sp, #64]
+ stp x25, x26, [sp, #80]
+ stp x27, x28, [sp, #96]
+
+ // Set SVCR if we're doing SME
+ cbz x1, load_gpr
+ adrp x2, svcr_in
+ ldr x2, [x2, :lo12:svcr_in]
+ msr S3_3_C4_C2_2, x2
+
+ // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
+ tbz x2, #SVCR_ZA_SHIFT, load_gpr
+ mov w12, #0
+ ldr x2, =za_in
+1: _ldr_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 1b
+
+ // ZT0
+ mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+ #ID_AA64SMFR0_EL1_SMEver_WIDTH
+ cbz x2, load_gpr
+ adrp x2, zt_in
+ add x2, x2, :lo12:zt_in
+ _ldr_zt 2
+
+load_gpr:
+ // Load GPRs x8-x28, and save our SP/FP for later comparison
+ ldr x2, =gpr_in
+ add x2, x2, #64
+ ldp x8, x9, [x2], #16
+ ldp x10, x11, [x2], #16
+ ldp x12, x13, [x2], #16
+ ldp x14, x15, [x2], #16
+ ldp x16, x17, [x2], #16
+ ldp x18, x19, [x2], #16
+ ldp x20, x21, [x2], #16
+ ldp x22, x23, [x2], #16
+ ldp x24, x25, [x2], #16
+ ldp x26, x27, [x2], #16
+ ldr x28, [x2], #8
+ str x29, [x2], #8 // FP
+ str x30, [x2], #8 // LR
+
+ // Load FPRs if we're not doing neither SVE nor streaming SVE
+ cbnz x0, check_sve_in
+ ldr x2, =svcr_in
+ tbnz x2, #SVCR_SM_SHIFT, check_sve_in
+
+ ldr x2, =fpr_in
+ ldp q0, q1, [x2]
+ ldp q2, q3, [x2, #16 * 2]
+ ldp q4, q5, [x2, #16 * 4]
+ ldp q6, q7, [x2, #16 * 6]
+ ldp q8, q9, [x2, #16 * 8]
+ ldp q10, q11, [x2, #16 * 10]
+ ldp q12, q13, [x2, #16 * 12]
+ ldp q14, q15, [x2, #16 * 14]
+ ldp q16, q17, [x2, #16 * 16]
+ ldp q18, q19, [x2, #16 * 18]
+ ldp q20, q21, [x2, #16 * 20]
+ ldp q22, q23, [x2, #16 * 22]
+ ldp q24, q25, [x2, #16 * 24]
+ ldp q26, q27, [x2, #16 * 26]
+ ldp q28, q29, [x2, #16 * 28]
+ ldp q30, q31, [x2, #16 * 30]
+
+ b 2f
+
+check_sve_in:
+ // Load the SVE registers if we're doing SVE/SME
+
+ ldr x2, =z_in
+ ldr z0, [x2, #0, MUL VL]
+ ldr z1, [x2, #1, MUL VL]
+ ldr z2, [x2, #2, MUL VL]
+ ldr z3, [x2, #3, MUL VL]
+ ldr z4, [x2, #4, MUL VL]
+ ldr z5, [x2, #5, MUL VL]
+ ldr z6, [x2, #6, MUL VL]
+ ldr z7, [x2, #7, MUL VL]
+ ldr z8, [x2, #8, MUL VL]
+ ldr z9, [x2, #9, MUL VL]
+ ldr z10, [x2, #10, MUL VL]
+ ldr z11, [x2, #11, MUL VL]
+ ldr z12, [x2, #12, MUL VL]
+ ldr z13, [x2, #13, MUL VL]
+ ldr z14, [x2, #14, MUL VL]
+ ldr z15, [x2, #15, MUL VL]
+ ldr z16, [x2, #16, MUL VL]
+ ldr z17, [x2, #17, MUL VL]
+ ldr z18, [x2, #18, MUL VL]
+ ldr z19, [x2, #19, MUL VL]
+ ldr z20, [x2, #20, MUL VL]
+ ldr z21, [x2, #21, MUL VL]
+ ldr z22, [x2, #22, MUL VL]
+ ldr z23, [x2, #23, MUL VL]
+ ldr z24, [x2, #24, MUL VL]
+ ldr z25, [x2, #25, MUL VL]
+ ldr z26, [x2, #26, MUL VL]
+ ldr z27, [x2, #27, MUL VL]
+ ldr z28, [x2, #28, MUL VL]
+ ldr z29, [x2, #29, MUL VL]
+ ldr z30, [x2, #30, MUL VL]
+ ldr z31, [x2, #31, MUL VL]
+
+ // Only set a non-zero FFR, test patterns must be zero since the
+ // syscall should clear it - this lets us handle FA64.
+ ldr x2, =ffr_in
+ ldr p0, [x2]
+ ldr x2, [x2, #0]
+ cbz x2, 1f
+ wrffr p0.b
+1:
+
+ ldr x2, =p_in
+ ldr p0, [x2, #0, MUL VL]
+ ldr p1, [x2, #1, MUL VL]
+ ldr p2, [x2, #2, MUL VL]
+ ldr p3, [x2, #3, MUL VL]
+ ldr p4, [x2, #4, MUL VL]
+ ldr p5, [x2, #5, MUL VL]
+ ldr p6, [x2, #6, MUL VL]
+ ldr p7, [x2, #7, MUL VL]
+ ldr p8, [x2, #8, MUL VL]
+ ldr p9, [x2, #9, MUL VL]
+ ldr p10, [x2, #10, MUL VL]
+ ldr p11, [x2, #11, MUL VL]
+ ldr p12, [x2, #12, MUL VL]
+ ldr p13, [x2, #13, MUL VL]
+ ldr p14, [x2, #14, MUL VL]
+ ldr p15, [x2, #15, MUL VL]
+2:
+
+ // Do the syscall
+ svc #0
+
+ // Save GPRs x8-x30
+ ldr x2, =gpr_out
+ add x2, x2, #64
+ stp x8, x9, [x2], #16
+ stp x10, x11, [x2], #16
+ stp x12, x13, [x2], #16
+ stp x14, x15, [x2], #16
+ stp x16, x17, [x2], #16
+ stp x18, x19, [x2], #16
+ stp x20, x21, [x2], #16
+ stp x22, x23, [x2], #16
+ stp x24, x25, [x2], #16
+ stp x26, x27, [x2], #16
+ stp x28, x29, [x2], #16
+ str x30, [x2]
+
+ // Restore x0 and x1 for feature checks
+ ldp x0, x1, [sp, #16]
+
+ // Save FPSIMD state
+ ldr x2, =fpr_out
+ stp q0, q1, [x2]
+ stp q2, q3, [x2, #16 * 2]
+ stp q4, q5, [x2, #16 * 4]
+ stp q6, q7, [x2, #16 * 6]
+ stp q8, q9, [x2, #16 * 8]
+ stp q10, q11, [x2, #16 * 10]
+ stp q12, q13, [x2, #16 * 12]
+ stp q14, q15, [x2, #16 * 14]
+ stp q16, q17, [x2, #16 * 16]
+ stp q18, q19, [x2, #16 * 18]
+ stp q20, q21, [x2, #16 * 20]
+ stp q22, q23, [x2, #16 * 22]
+ stp q24, q25, [x2, #16 * 24]
+ stp q26, q27, [x2, #16 * 26]
+ stp q28, q29, [x2, #16 * 28]
+ stp q30, q31, [x2, #16 * 30]
+
+ // Save SVCR if we're doing SME
+ cbz x1, check_sve_out
+ mrs x2, S3_3_C4_C2_2
+ adrp x3, svcr_out
+ str x2, [x3, :lo12:svcr_out]
+
+ // Save ZA if it's enabled - uses x12 as scratch due to SME STR
+ tbz x2, #SVCR_ZA_SHIFT, check_sve_out
+ mov w12, #0
+ ldr x2, =za_out
+1: _str_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 1b
+
+ // ZT0
+ mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+ #ID_AA64SMFR0_EL1_SMEver_WIDTH
+ cbz x2, check_sve_out
+ adrp x2, zt_out
+ add x2, x2, :lo12:zt_out
+ _str_zt 2
+
+check_sve_out:
+ // Save the SVE state if we have some
+ cbz x0, 1f
+
+ ldr x2, =z_out
+ str z0, [x2, #0, MUL VL]
+ str z1, [x2, #1, MUL VL]
+ str z2, [x2, #2, MUL VL]
+ str z3, [x2, #3, MUL VL]
+ str z4, [x2, #4, MUL VL]
+ str z5, [x2, #5, MUL VL]
+ str z6, [x2, #6, MUL VL]
+ str z7, [x2, #7, MUL VL]
+ str z8, [x2, #8, MUL VL]
+ str z9, [x2, #9, MUL VL]
+ str z10, [x2, #10, MUL VL]
+ str z11, [x2, #11, MUL VL]
+ str z12, [x2, #12, MUL VL]
+ str z13, [x2, #13, MUL VL]
+ str z14, [x2, #14, MUL VL]
+ str z15, [x2, #15, MUL VL]
+ str z16, [x2, #16, MUL VL]
+ str z17, [x2, #17, MUL VL]
+ str z18, [x2, #18, MUL VL]
+ str z19, [x2, #19, MUL VL]
+ str z20, [x2, #20, MUL VL]
+ str z21, [x2, #21, MUL VL]
+ str z22, [x2, #22, MUL VL]
+ str z23, [x2, #23, MUL VL]
+ str z24, [x2, #24, MUL VL]
+ str z25, [x2, #25, MUL VL]
+ str z26, [x2, #26, MUL VL]
+ str z27, [x2, #27, MUL VL]
+ str z28, [x2, #28, MUL VL]
+ str z29, [x2, #29, MUL VL]
+ str z30, [x2, #30, MUL VL]
+ str z31, [x2, #31, MUL VL]
+
+ ldr x2, =p_out
+ str p0, [x2, #0, MUL VL]
+ str p1, [x2, #1, MUL VL]
+ str p2, [x2, #2, MUL VL]
+ str p3, [x2, #3, MUL VL]
+ str p4, [x2, #4, MUL VL]
+ str p5, [x2, #5, MUL VL]
+ str p6, [x2, #6, MUL VL]
+ str p7, [x2, #7, MUL VL]
+ str p8, [x2, #8, MUL VL]
+ str p9, [x2, #9, MUL VL]
+ str p10, [x2, #10, MUL VL]
+ str p11, [x2, #11, MUL VL]
+ str p12, [x2, #12, MUL VL]
+ str p13, [x2, #13, MUL VL]
+ str p14, [x2, #14, MUL VL]
+ str p15, [x2, #15, MUL VL]
+
+ // Only save FFR if we wrote a value for SME
+ ldr x2, =ffr_in
+ ldr x2, [x2, #0]
+ cbz x2, 1f
+ ldr x2, =ffr_out
+ rdffr p0.b
+ str p0, [x2]
+1:
+
+ // Restore callee saved registers x19-x30
+ ldp x19, x20, [sp, #32]
+ ldp x21, x22, [sp, #48]
+ ldp x23, x24, [sp, #64]
+ ldp x25, x26, [sp, #80]
+ ldp x27, x28, [sp, #96]
+ ldp x29, x30, [sp], #112
+
+ // Clear SVCR if we were doing SME so future tests don't have ZA
+ cbz x1, 1f
+ msr S3_3_C4_C2_2, xzr
+1:
+
+ ret
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
new file mode 100644
index 000000000000..b67e3e26fa6d
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include "kselftest.h"
+
+#include "syscall-abi.h"
+
+/*
+ * The kernel defines a much larger SVE_VQ_MAX than is expressable in
+ * the architecture, this creates a *lot* of overhead filling the
+ * buffers (especially ZA) on emulated platforms so use the actual
+ * architectural maximum instead.
+ */
+#define ARCH_SVE_VQ_MAX 16
+
+static int default_sme_vl;
+
+static int sve_vl_count;
+static unsigned int sve_vls[ARCH_SVE_VQ_MAX];
+static int sme_vl_count;
+static unsigned int sme_vls[ARCH_SVE_VQ_MAX];
+
+extern void do_syscall(int sve_vl, int sme_vl);
+
+static void fill_random(void *buf, size_t size)
+{
+ int i;
+ uint32_t *lbuf = buf;
+
+ /* random() returns a 32 bit number regardless of the size of long */
+ for (i = 0; i < size / sizeof(uint32_t); i++)
+ lbuf[i] = random();
+}
+
+/*
+ * We also repeat the test for several syscalls to try to expose different
+ * behaviour.
+ */
+static struct syscall_cfg {
+ int syscall_nr;
+ const char *name;
+} syscalls[] = {
+ { __NR_getpid, "getpid()" },
+ { __NR_sched_yield, "sched_yield()" },
+};
+
+#define NUM_GPR 31
+uint64_t gpr_in[NUM_GPR];
+uint64_t gpr_out[NUM_GPR];
+
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(gpr_in, sizeof(gpr_in));
+ gpr_in[8] = cfg->syscall_nr;
+ memset(gpr_out, 0, sizeof(gpr_out));
+}
+
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ /*
+ * GPR x0-x7 may be clobbered, and all others should be preserved.
+ */
+ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) {
+ if (gpr_in[i] != gpr_out[i]) {
+ ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %lx != %lx\n",
+ cfg->name, sve_vl, i,
+ gpr_in[i], gpr_out[i]);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+#define NUM_FPR 32
+uint64_t fpr_in[NUM_FPR * 2];
+uint64_t fpr_out[NUM_FPR * 2];
+uint64_t fpr_zero[NUM_FPR * 2];
+
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(fpr_in, sizeof(fpr_in));
+ memset(fpr_out, 0, sizeof(fpr_out));
+}
+
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ if (!sve_vl && !(svcr & SVCR_SM_MASK)) {
+ for (i = 0; i < ARRAY_SIZE(fpr_in); i++) {
+ if (fpr_in[i] != fpr_out[i]) {
+ ksft_print_msg("%s Q%d/%d mismatch %lx != %lx\n",
+ cfg->name,
+ i / 2, i % 2,
+ fpr_in[i], fpr_out[i]);
+ errors++;
+ }
+ }
+ }
+
+ /*
+ * In streaming mode the whole register set should be cleared
+ * by the transition out of streaming mode.
+ */
+ if (svcr & SVCR_SM_MASK) {
+ if (memcmp(fpr_zero, fpr_out, sizeof(fpr_out)) != 0) {
+ ksft_print_msg("%s FPSIMD registers non-zero exiting SM\n",
+ cfg->name);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+#define SVE_Z_SHARED_BYTES (128 / 8)
+
+static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(z_in, sizeof(z_in));
+ fill_random(z_out, sizeof(z_out));
+}
+
+static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vl;
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ for (i = 0; i < SVE_NUM_ZREGS; i++) {
+ uint8_t *in = &z_in[reg_size * i];
+ uint8_t *out = &z_out[reg_size * i];
+
+ if (svcr & SVCR_SM_MASK) {
+ /*
+ * In streaming mode the whole register should
+ * be cleared by the transition out of
+ * streaming mode.
+ */
+ if (memcmp(z_zero, out, reg_size) != 0) {
+ ksft_print_msg("%s SVE VL %d Z%d non-zero\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+ } else {
+ /*
+ * For standard SVE the low 128 bits should be
+ * preserved and any additional bits cleared.
+ */
+ if (memcmp(in, out, SVE_Z_SHARED_BYTES) != 0) {
+ ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+
+ if (reg_size > SVE_Z_SHARED_BYTES &&
+ (memcmp(z_zero, out + SVE_Z_SHARED_BYTES,
+ reg_size - SVE_Z_SHARED_BYTES) != 0)) {
+ ksft_print_msg("%s SVE VL %d Z%d high bits non-zero\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+ }
+ }
+
+ return errors;
+}
+
+uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(p_in, sizeof(p_in));
+ fill_random(p_out, sizeof(p_out));
+}
+
+static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ /* After a syscall the P registers should be zeroed */
+ for (i = 0; i < SVE_NUM_PREGS * reg_size; i++)
+ if (p_out[i])
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d predicate registers non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ /*
+ * If we are in streaming mode and do not have FA64 then FFR
+ * is unavailable.
+ */
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) {
+ memset(&ffr_in, 0, sizeof(ffr_in));
+ return;
+ }
+
+ /*
+ * It is only valid to set a contiguous set of bits starting
+ * at 0. For now since we're expecting this to be cleared by
+ * a syscall just set all bits.
+ */
+ memset(ffr_in, 0xff, sizeof(ffr_in));
+ fill_random(ffr_out, sizeof(ffr_out));
+}
+
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
+ return 0;
+
+ /* After a syscall FFR should be zeroed */
+ for (i = 0; i < reg_size; i++)
+ if (ffr_out[i])
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d FFR non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+uint64_t svcr_in, svcr_out;
+
+static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ svcr_in = svcr;
+}
+
+static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+
+ if (svcr_out & SVCR_SM_MASK) {
+ ksft_print_msg("%s Still in SM, SVCR %lx\n",
+ cfg->name, svcr_out);
+ errors++;
+ }
+
+ if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) {
+ ksft_print_msg("%s PSTATE.ZA changed, SVCR %lx != %lx\n",
+ cfg->name, svcr_in, svcr_out);
+ errors++;
+ }
+
+ return errors;
+}
+
+uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(za_in, sizeof(za_in));
+ memset(za_out, 0, sizeof(za_out));
+}
+
+static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sme_vl * sme_vl;
+ int errors = 0;
+
+ if (!(svcr & SVCR_ZA_MASK))
+ return 0;
+
+ if (memcmp(za_in, za_out, reg_size) != 0) {
+ ksft_print_msg("SME VL %d ZA does not match\n", sme_vl);
+ errors++;
+ }
+
+ return errors;
+}
+
+uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+
+static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(zt_in, sizeof(zt_in));
+ memset(zt_out, 0, sizeof(zt_out));
+}
+
+static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2))
+ return 0;
+
+ if (!(svcr & SVCR_ZA_MASK))
+ return 0;
+
+ if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) {
+ ksft_print_msg("SME VL %d ZT does not match\n", sme_vl);
+ errors++;
+ }
+
+ return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
+
+/*
+ * Each set of registers has a setup function which is called before
+ * the syscall to fill values in a global variable for loading by the
+ * test code and a check function which validates that the results are
+ * as expected. Vector lengths are passed everywhere, a vector length
+ * of 0 should be treated as do not test.
+ */
+static struct {
+ setup_fn setup;
+ check_fn check;
+} regset[] = {
+ { setup_gpr, check_gpr },
+ { setup_fpr, check_fpr },
+ { setup_z, check_z },
+ { setup_p, check_p },
+ { setup_ffr, check_ffr },
+ { setup_svcr, check_svcr },
+ { setup_za, check_za },
+ { setup_zt, check_zt },
+};
+
+static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ regset[i].setup(cfg, sve_vl, sme_vl, svcr);
+
+ do_syscall(sve_vl, sme_vl);
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ errors += regset[i].check(cfg, sve_vl, sme_vl, svcr);
+
+ return errors == 0;
+}
+
+static void test_one_syscall(struct syscall_cfg *cfg)
+{
+ int sve, sme;
+ int ret;
+
+ /* FPSIMD only case */
+ ksft_test_result(do_test(cfg, 0, default_sme_vl, 0),
+ "%s FPSIMD\n", cfg->name);
+
+ for (sve = 0; sve < sve_vl_count; sve++) {
+ ret = prctl(PR_SVE_SET_VL, sve_vls[sve]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, sve_vls[sve], default_sme_vl, 0),
+ "%s SVE VL %d\n", cfg->name, sve_vls[sve]);
+
+ for (sme = 0; sme < sme_vl_count; sme++) {
+ ret = prctl(PR_SME_SET_VL, sme_vls[sme]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme],
+ SVCR_ZA_MASK | SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM+ZA\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme], SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme], SVCR_ZA_MASK),
+ "%s SVE VL %d/SME VL %d ZA\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ }
+ }
+
+ for (sme = 0; sme < sme_vl_count; sme++) {
+ ret = prctl(PR_SME_SET_VL, sme_vls[sme]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme],
+ SVCR_ZA_MASK | SVCR_SM_MASK),
+ "%s SME VL %d SM+ZA\n",
+ cfg->name, sme_vls[sme]);
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_SM_MASK),
+ "%s SME VL %d SM\n",
+ cfg->name, sme_vls[sme]);
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_ZA_MASK),
+ "%s SME VL %d ZA\n",
+ cfg->name, sme_vls[sme]);
+ }
+}
+
+void sve_count_vls(void)
+{
+ unsigned int vq;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ return;
+
+ /*
+ * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
+ */
+ for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ sve_vls[sve_vl_count++] = vl;
+ }
+}
+
+void sme_count_vls(void)
+{
+ unsigned int vq;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+ return;
+
+ /*
+ * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
+ */
+ for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Found lowest VL */
+ if (sve_vq_from_vl(vl) > vq)
+ break;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ sme_vls[sme_vl_count++] = vl;
+ }
+
+ /* Ensure we configure a SME VL, used to flag if SVCR is set */
+ default_sme_vl = sme_vls[0];
+}
+
+int main(void)
+{
+ int i;
+ int tests = 1; /* FPSIMD */
+ int sme_ver;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ sve_count_vls();
+ sme_count_vls();
+
+ tests += sve_vl_count;
+ tests += sme_vl_count * 3;
+ tests += (sve_vl_count * sme_vl_count) * 3;
+ ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+ sme_ver = 2;
+ else
+ sme_ver = 1;
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ ksft_print_msg("SME%d with FA64\n", sme_ver);
+ else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ ksft_print_msg("SME%d without FA64\n", sme_ver);
+
+ for (i = 0; i < ARRAY_SIZE(syscalls); i++)
+ test_one_syscall(&syscalls[i]);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h
new file mode 100644
index 000000000000..bda5a87ad381
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#ifndef SYSCALL_ABI_H
+#define SYSCALL_ABI_H
+
+#define SVCR_ZA_MASK 2
+#define SVCR_SM_MASK 1
+
+#define SVCR_ZA_SHIFT 1
+#define SVCR_SM_SHIFT 0
+
+#endif
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
new file mode 100644
index 000000000000..1703543fb7c7
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "kselftest.h"
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+#define EXPECTED_TESTS 5
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+/* Processes should start with TPIDR2 == 0 */
+static int default_value(void)
+{
+ return get_tpidr2() == 0;
+}
+
+/* If we set TPIDR2 we should read that value */
+static int write_read(void)
+{
+ set_tpidr2(getpid());
+
+ return getpid() == get_tpidr2();
+}
+
+/* If we set a value we should read the same value after scheduling out */
+static int write_sleep_read(void)
+{
+ set_tpidr2(getpid());
+
+ msleep(100);
+
+ return getpid() == get_tpidr2();
+}
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value and be able to set its own
+ * value.
+ */
+static int write_fork_read(void)
+{
+ pid_t newpid, waiting, oldpid;
+ int status;
+
+ set_tpidr2(getpid());
+
+ oldpid = getpid();
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (get_tpidr2() != oldpid) {
+ ksft_print_msg("TPIDR2 changed in child: %llx\n",
+ get_tpidr2());
+ exit(0);
+ }
+
+ set_tpidr2(getpid());
+ if (get_tpidr2() == getpid()) {
+ exit(1);
+ } else {
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+ if (newpid < 0) {
+ ksft_print_msg("fork() failed: %d\n", newpid);
+ return 0;
+ }
+
+ for (;;) {
+ waiting = waitpid(newpid, &status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("waitpid() failed: %d\n", errno);
+ return 0;
+ }
+ if (waiting != newpid) {
+ ksft_print_msg("waitpid() returned wrong PID: %d != %d\n",
+ waiting, newpid);
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("child did not exit\n");
+ return 0;
+ }
+
+ if (getpid() != get_tpidr2()) {
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+/*
+ * sys_clone() has a lot of per architecture variation so just define
+ * it here rather than adding it to nolibc, plus the raw API is a
+ * little more convenient for this test.
+ */
+static int sys_clone(unsigned long clone_flags, unsigned long newsp,
+ int *parent_tidptr, unsigned long tls,
+ int *child_tidptr)
+{
+ return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
+ child_tidptr);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+
+/*
+ * If we clone with CLONE_VM then the value in the parent should
+ * be unchanged and the child should start with zero and be able to
+ * set its own value.
+ */
+static int write_clone_read(void)
+{
+ int parent_tid, child_tid;
+ pid_t parent, waiting;
+ int ret, status;
+ void *stack;
+
+ parent = getpid();
+ set_tpidr2(parent);
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack) {
+ ksft_print_msg("malloc() failed\n");
+ return 0;
+ }
+
+ ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE,
+ &parent_tid, 0, &child_tid);
+ if (ret == -1) {
+ ksft_print_msg("clone() failed: %d\n", errno);
+ return 0;
+ }
+
+ if (ret == 0) {
+ /* In child */
+ if (get_tpidr2() != 0) {
+ ksft_print_msg("TPIDR2 non-zero in child: %llx\n",
+ get_tpidr2());
+ exit(0);
+ }
+
+ if (gettid() == 0)
+ ksft_print_msg("Child TID==0\n");
+ set_tpidr2(gettid());
+ if (get_tpidr2() == gettid()) {
+ exit(1);
+ } else {
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+
+ for (;;) {
+ waiting = waitpid(ret, &status, __WCLONE);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("waitpid() failed: %d\n", errno);
+ return 0;
+ }
+ if (waiting != ret) {
+ ksft_print_msg("waitpid() returned wrong PID %d\n",
+ waiting);
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("child did not exit\n");
+ return 0;
+ }
+
+ if (parent != get_tpidr2()) {
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ ksft_print_msg("PID: %d\n", getpid());
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ ksft_test_result(default_value(), "default_value\n");
+ ksft_test_result(write_read(), "write_read\n");
+ ksft_test_result(write_sleep_read(), "write_sleep_read\n");
+ ksft_test_result(write_fork_read(), "write_fork_read\n");
+ ksft_test_result(write_clone_read(), "write_clone_read\n");
+
+ } else {
+ ksft_print_msg("SME support not present\n");
+
+ ksft_test_result_skip("default_value\n");
+ ksft_test_result_skip("write_read\n");
+ ksft_test_result_skip("write_sleep_read\n");
+ ksft_test_result_skip("write_fork_read\n");
+ ksft_test_result_skip("write_clone_read\n");
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
new file mode 100644
index 000000000000..05e4ee523a53
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := btitest nobtitest
+
+# These tests are built as freestanding binaries since otherwise BTI
+# support in ld.so is required which is not currently widespread; when
+# it is available it will still be useful to test this separately as the
+# cases for statically linked and dynamically lined binaries are
+# slightly different.
+
+CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0
+CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
+
+CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
+
+BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
+NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
+
+$(OUTPUT)/%-bti.o: %.c
+ $(BTI_CC_COMMAND)
+
+$(OUTPUT)/%-bti.o: %.S
+ $(BTI_CC_COMMAND)
+
+$(OUTPUT)/%-nobti.o: %.c
+ $(NOBTI_CC_COMMAND)
+
+$(OUTPUT)/%-nobti.o: %.S
+ $(NOBTI_CC_COMMAND)
+
+BTI_OBJS = \
+ $(OUTPUT)/test-bti.o \
+ $(OUTPUT)/signal-bti.o \
+ $(OUTPUT)/start-bti.o \
+ $(OUTPUT)/syscall-bti.o \
+ $(OUTPUT)/system-bti.o \
+ $(OUTPUT)/teststubs-bti.o \
+ $(OUTPUT)/trampoline-bti.o
+$(OUTPUT)/btitest: $(BTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+NOBTI_OBJS = \
+ $(OUTPUT)/test-nobti.o \
+ $(OUTPUT)/signal-nobti.o \
+ $(OUTPUT)/start-nobti.o \
+ $(OUTPUT)/syscall-nobti.o \
+ $(OUTPUT)/system-nobti.o \
+ $(OUTPUT)/teststubs-nobti.o \
+ $(OUTPUT)/trampoline-nobti.o
+$(OUTPUT)/nobtitest: $(NOBTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
new file mode 100644
index 000000000000..141cdcbf0b8f
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+
+.macro startfn name:req
+ .globl \name
+\name:
+ .macro endfn
+ .size \name, . - \name
+ .type \name, @function
+ .purgem endfn
+ .endm
+.endm
+
+.macro emit_aarch64_feature_1_and
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 2f - 1f
+ .long 6f - 3f
+ .long NT_GNU_PROPERTY_TYPE_0
+1: .string "GNU"
+2:
+ .align 3
+3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ .long 5f - 4f
+4:
+#if BTI
+ .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \
+ GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+ .long 0
+#endif
+5:
+ .align 3
+6:
+ .popsection
+.endm
+
+.macro paciasp
+ hint 0x19
+.endm
+
+.macro autiasp
+ hint 0x1d
+.endm
+
+.macro __bti_
+ hint 0x20
+.endm
+
+.macro __bti_c
+ hint 0x22
+.endm
+
+.macro __bti_j
+ hint 0x24
+.endm
+
+.macro __bti_jc
+ hint 0x26
+.endm
+
+.macro bti what=
+ __bti_\what
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h
new file mode 100644
index 000000000000..2aff9b10336e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/btitest.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef BTITEST_H
+#define BTITEST_H
+
+/* Trampolines for calling the test stubs: */
+void call_using_br_x0(void (*)(void));
+void call_using_br_x16(void (*)(void));
+void call_using_blr(void (*)(void));
+
+/* Test stubs: */
+void nohint_func(void);
+void bti_none_func(void);
+void bti_c_func(void);
+void bti_j_func(void);
+void bti_jc_func(void);
+void paciasp_func(void);
+
+#endif /* !BTITEST_H */
diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c
new file mode 100644
index 000000000000..f3fd29b91141
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+#include "signal.h"
+
+int sigemptyset(sigset_t *s)
+{
+ unsigned int i;
+
+ for (i = 0; i < _NSIG_WORDS; ++i)
+ s->sig[i] = 0;
+
+ return 0;
+}
+
+int sigaddset(sigset_t *s, int n)
+{
+ if (n < 1 || n > _NSIG)
+ return -EINVAL;
+
+ s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW;
+ return 0;
+}
+
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old)
+{
+ return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask));
+}
+
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old)
+{
+ return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask));
+}
diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h
new file mode 100644
index 000000000000..103457dc880e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SIGNAL_H
+#define SIGNAL_H
+
+#include <linux/signal.h>
+
+#include "system.h"
+
+typedef __sighandler_t sighandler_t;
+
+int sigemptyset(sigset_t *s);
+int sigaddset(sigset_t *s, int n);
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old);
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old);
+
+#endif /* ! SIGNAL_H */
diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S
new file mode 100644
index 000000000000..831f952e0572
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/start.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn _start
+ mov x0, sp
+ b start
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S
new file mode 100644
index 000000000000..8dde8b6f3db1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/syscall.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn syscall
+ bti c
+ mov w8, w0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+ mov x4, x5
+ mov x5, x6
+ mov x6, x7
+ svc #0
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
new file mode 100644
index 000000000000..93d772b00bfe
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <asm/unistd.h>
+
+void __noreturn exit(int n)
+{
+ syscall(__NR_exit, n);
+ unreachable();
+}
+
+ssize_t write(int fd, const void *buf, size_t size)
+{
+ return syscall(__NR_write, fd, buf, size);
+}
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
new file mode 100644
index 000000000000..2e9ee1284a0c
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+typedef __kernel_size_t size_t;
+typedef __kernel_ssize_t ssize_t;
+
+#include <linux/errno.h>
+#include <linux/compiler.h>
+
+#include <asm/hwcap.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+long syscall(int nr, ...);
+
+void __noreturn exit(int n);
+ssize_t write(int fd, const void *buf, size_t size);
+
+#endif /* ! SYSTEM_H */
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
new file mode 100644
index 000000000000..28a8e8a28a84
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019,2021 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/errno.h>
+#include <linux/auxvec.h>
+#include <linux/signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+typedef struct ucontext ucontext_t;
+
+#include "btitest.h"
+#include "signal.h"
+
+#define EXPECTED_TESTS 18
+
+static volatile unsigned int test_num = 1;
+static unsigned int test_passed;
+static unsigned int test_failed;
+static unsigned int test_skipped;
+
+static void fdputs(int fd, const char *str)
+{
+ size_t len = 0;
+ const char *p = str;
+
+ while (*p++)
+ ++len;
+
+ write(fd, str, len);
+}
+
+static void putstr(const char *str)
+{
+ fdputs(1, str);
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+#define puttestname(test_name, trampoline_name) do { \
+ putstr(test_name); \
+ putstr("/"); \
+ putstr(trampoline_name); \
+} while (0)
+
+void print_summary(void)
+{
+ putstr("# Totals: pass:");
+ putnum(test_passed);
+ putstr(" fail:");
+ putnum(test_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(test_skipped);
+ putstr(" error:0\n");
+}
+
+static const char *volatile current_test_name;
+static const char *volatile current_trampoline_name;
+static volatile int sigill_expected, sigill_received;
+
+static void handler(int n, siginfo_t *si __always_unused,
+ void *uc_ __always_unused)
+{
+ ucontext_t *uc = uc_;
+
+ putstr("# \t[SIGILL in ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr(", BTYPE=");
+ write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK)
+ >> PSR_BTYPE_SHIFT) * 2], 2);
+ if (!sigill_expected) {
+ putstr("]\n");
+ putstr("not ok ");
+ putnum(test_num);
+ putstr(" ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr("(unexpected SIGILL)\n");
+ print_summary();
+ exit(128 + n);
+ }
+
+ putstr(" (expected)]\n");
+ sigill_received = 1;
+ /* zap BTYPE so that resuming the faulting code will work */
+ uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK;
+}
+
+/* Does the system have BTI? */
+static bool have_bti;
+
+static void __do_test(void (*trampoline)(void (*)(void)),
+ void (*fn)(void),
+ const char *trampoline_name,
+ const char *name,
+ int expect_sigill)
+{
+ /*
+ * Branch Target exceptions should only happen for BTI
+ * binaries running on a system with BTI:
+ */
+ if (!BTI || !have_bti)
+ expect_sigill = 0;
+
+ sigill_expected = expect_sigill;
+ sigill_received = 0;
+ current_test_name = name;
+ current_trampoline_name = trampoline_name;
+
+ trampoline(fn);
+
+ if (expect_sigill && !sigill_received) {
+ putstr("not ok ");
+ test_failed++;
+ } else {
+ putstr("ok ");
+ test_passed++;
+ }
+ putnum(test_num++);
+ putstr(" ");
+ puttestname(name, trampoline_name);
+ putstr("\n");
+}
+
+#define do_test(expect_sigill_br_x0, \
+ expect_sigill_br_x16, \
+ expect_sigill_blr, \
+ name) \
+do { \
+ __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \
+ expect_sigill_br_x0); \
+ __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \
+ expect_sigill_br_x16); \
+ __do_test(call_using_blr, name, "call_using_blr", #name, \
+ expect_sigill_blr); \
+} while (0)
+
+void start(int *argcp)
+{
+ struct sigaction sa;
+ void *const *p;
+ const struct auxv_entry {
+ unsigned long type;
+ unsigned long val;
+ } *auxv;
+ unsigned long hwcap = 0, hwcap2 = 0;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ /* Gross hack for finding AT_HWCAP2 from the initial process stack: */
+ p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */
+ /* step over environment */
+ while (*p++)
+ ;
+ for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) {
+ switch (auxv->type) {
+ case AT_HWCAP:
+ hwcap = auxv->val;
+ break;
+ case AT_HWCAP2:
+ hwcap2 = auxv->val;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (hwcap & HWCAP_PACA)
+ putstr("# HWCAP_PACA present\n");
+ else
+ putstr("# HWCAP_PACA not present\n");
+
+ if (hwcap2 & HWCAP2_BTI) {
+ putstr("# HWCAP2_BTI present\n");
+ if (!(hwcap & HWCAP_PACA))
+ putstr("# Bad hardware? Expect problems.\n");
+ have_bti = true;
+ } else {
+ putstr("# HWCAP2_BTI not present\n");
+ have_bti = false;
+ }
+
+ putstr("# Test binary");
+ if (!BTI)
+ putstr(" not");
+ putstr(" built for BTI\n");
+
+ sa.sa_handler = (sighandler_t)(void *)handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGILL, &sa, NULL);
+ sigaddset(&sa.sa_mask, SIGILL);
+ sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+ do_test(1, 1, 1, nohint_func);
+ do_test(1, 1, 1, bti_none_func);
+ do_test(1, 0, 0, bti_c_func);
+ do_test(0, 0, 1, bti_j_func);
+ do_test(0, 0, 0, bti_jc_func);
+ do_test(1, 0, 0, paciasp_func);
+
+ print_summary();
+
+ if (test_num - 1 != EXPECTED_TESTS)
+ putstr("# WARNING - EXPECTED TEST COUNT WRONG\n");
+
+ if (test_failed)
+ exit(1);
+ else
+ exit(0);
+}
diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S
new file mode 100644
index 000000000000..b62c8c35f67e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/teststubs.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn bti_none_func
+ bti
+ ret
+endfn
+
+startfn bti_c_func
+ bti c
+ ret
+endfn
+
+startfn bti_j_func
+ bti j
+ ret
+endfn
+
+startfn bti_jc_func
+ bti jc
+ ret
+endfn
+
+startfn paciasp_func
+ paciasp
+ autiasp
+ ret
+endfn
+
+startfn nohint_func
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S
new file mode 100644
index 000000000000..09beb3f361f1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/trampoline.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn call_using_br_x0
+ bti c
+ br x0
+endfn
+
+startfn call_using_br_x16
+ bti c
+ mov x16, x0
+ br x16
+endfn
+
+startfn call_using_blr
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ blr x0
+ ldp x29, x30, [sp], #16
+ autiasp
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index d66f76d2a650..8362e7ec35ad 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,5 +1,18 @@
+fp-pidbench
+fp-ptrace
+fp-stress
fpsimd-test
+kernel-test
+rdvl-sme
+rdvl-sve
sve-probe-vls
sve-ptrace
sve-test
+ssve-test
+vec-syscfg
vlset
+za-fork
+za-ptrace
+za-test
+zt-ptrace
+zt-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a57009d3a0dc..d171021e4cdd 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,17 +1,54 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls
-TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../../)
-all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+CFLAGS += $(KHDR_INCLUDES)
-fpsimd-test: fpsimd-test.o
+TEST_GEN_PROGS := \
+ fp-ptrace \
+ fp-stress \
+ sve-ptrace sve-probe-vls \
+ vec-syscfg \
+ za-fork za-ptrace
+TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
+ kernel-test \
+ rdvl-sme rdvl-sve \
+ sve-test \
+ ssve-test \
+ za-test \
+ zt-ptrace \
+ zt-test \
+ vlset
+TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
+
+EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
+
+# Build with nolibc to avoid effects due to libc's clone() support
+$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/fp-ptrace: fp-ptrace.c fp-ptrace-asm.S
+$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-ptrace: sve-ptrace.c
+$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -DSSVE -nostdlib $^ -o $@
+$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/vlset: vlset.c
+$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -include ../../../../include/nolibc/nolibc.h -I../..\
+ -static -ffreestanding -Wall $^ -o $@
+$(OUTPUT)/za-ptrace: za-ptrace.c
+$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
-sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
-sve-probe-vls: sve-probe-vls.o
-sve-test: sve-test.o
+$(OUTPUT)/zt-ptrace: zt-ptrace.c
+$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o
$(CC) -nostdlib $^ -o $@
-vlset: vlset.o
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
new file mode 100644
index 000000000000..44004e53da33
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -0,0 +1,7 @@
+- Test unsupported values in the ABIs.
+- More coverage for ptrace:
+ - Get/set of FFR.
+ - Ensure ptraced processes actually see the register state visible through
+ the ptrace interface.
+ - Big endian.
+- Test PR_SVE_VL_INHERIT after a double fork.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
index a180851496ec..757b2fd75dd7 100644
--- a/tools/testing/selftests/arm64/fp/asm-offsets.h
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -3,6 +3,7 @@
#define sa_handler 0
#define sa_mask_sz 8
#define SIGUSR1 10
+#define SIGUSR2 12
#define SIGTERM 15
#define SIGINT 2
#define SIGABRT 6
diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S
new file mode 100644
index 000000000000..4b9728efc18d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-utils.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2021 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Utility functions for assembly code.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+.globl putc
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+.globl puts
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+.globl putdec
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+.globl putdecn
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+.globl puthexb
+
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+.globl puthexnibble
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+.globl dumphex
+
+ // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+.globl memcpy
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+.globl memfill_ae
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+.globl memclr
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+.globl memfill
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
index 8944f2189206..1fc46a5642c2 100644
--- a/tools/testing/selftests/arm64/fp/assembler.h
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -54,4 +54,30 @@ endfunction
.purgem \name\()_entry
.endm
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", @progbits, 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+
+.macro enable_gcs
+ // Run with GCS
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, PR_SHADOW_STACK_ENABLE
+ mov x2, xzr
+ mov x3, xzr
+ mov x4, xzr
+ mov x5, xzr
+ mov x8, #__NR_prctl
+ svc #0
+.endm
+
#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S
new file mode 100644
index 000000000000..73830f6bc99b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Trivial syscall overhead benchmark.
+//
+// This is implemented in asm to ensure that we don't have any issues with
+// system libraries using instructions that disrupt the test.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+.arch_extension sve
+
+.macro test_loop per_loop
+ mov x10, x20
+ mov x8, #__NR_getpid
+ mrs x11, CNTVCT_EL0
+1:
+ \per_loop
+ svc #0
+ sub x10, x10, #1
+ cbnz x10, 1b
+
+ mrs x12, CNTVCT_EL0
+ sub x0, x12, x11
+ bl putdec
+ puts "\n"
+.endm
+
+// Main program entry point
+.globl _start
+function _start
+ puts "Iterations per test: "
+ mov x20, #10000
+ lsl x20, x20, #8
+ mov x0, x20
+ bl putdec
+ puts "\n"
+
+ // Test having never used SVE
+ puts "No SVE: "
+ test_loop
+
+ // Check for SVE support - should use hwcap but that's hard in asm
+ mrs x0, ID_AA64PFR0_EL1
+ ubfx x0, x0, #32, #4
+ cbnz x0, 1f
+ puts "System does not support SVE\n"
+ b out
+1:
+
+ // Execute a SVE instruction
+ puts "SVE VL: "
+ rdvl x0, #8
+ bl putdec
+ puts "\n"
+
+ puts "SVE used once: "
+ test_loop
+
+ // Use SVE per syscall
+ puts "SVE used per syscall: "
+ test_loop "rdvl x0, #8"
+
+ // And we're done
+out:
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S
new file mode 100644
index 000000000000..82c3ab70e1cf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-3 ARM Limited.
+//
+// Assembly portion of the FP ptrace test
+
+//
+// Load values from memory into registers, break on a breakpoint, then
+// break on a further breakpoint
+//
+
+#include "fp-ptrace.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+// Load and save register values with pauses for ptrace
+//
+// x0 - HAVE_ flags indicating which features are in use
+
+.globl load_and_save
+load_and_save:
+ stp x11, x12, [sp, #-0x10]!
+
+ // This should be redundant in the SVE case
+ ldr x7, =v_in
+ ldp q0, q1, [x7]
+ ldp q2, q3, [x7, #16 * 2]
+ ldp q4, q5, [x7, #16 * 4]
+ ldp q6, q7, [x7, #16 * 6]
+ ldp q8, q9, [x7, #16 * 8]
+ ldp q10, q11, [x7, #16 * 10]
+ ldp q12, q13, [x7, #16 * 12]
+ ldp q14, q15, [x7, #16 * 14]
+ ldp q16, q17, [x7, #16 * 16]
+ ldp q18, q19, [x7, #16 * 18]
+ ldp q20, q21, [x7, #16 * 20]
+ ldp q22, q23, [x7, #16 * 22]
+ ldp q24, q25, [x7, #16 * 24]
+ ldp q26, q27, [x7, #16 * 26]
+ ldp q28, q29, [x7, #16 * 28]
+ ldp q30, q31, [x7, #16 * 30]
+
+ // SME?
+ tbz x0, #HAVE_SME_SHIFT, check_sve_in
+
+ adrp x7, svcr_in
+ ldr x7, [x7, :lo12:svcr_in]
+ // SVCR is 0 by default, avoid triggering SME if not in use
+ cbz x7, check_sve_in
+ msr S3_3_C4_C2_2, x7
+
+ // ZA?
+ tbz x7, #SVCR_ZA_SHIFT, check_sm_in
+ rdsvl 11, 1
+ mov w12, #0
+ ldr x6, =za_in
+1: _ldr_za 12, 6
+ add x6, x6, x11
+ add x12, x12, #1
+ cmp x11, x12
+ bne 1b
+
+ // ZT?
+ tbz x0, #HAVE_SME2_SHIFT, check_sm_in
+ adrp x6, zt_in
+ add x6, x6, :lo12:zt_in
+ _ldr_zt 6
+
+ // In streaming mode?
+check_sm_in:
+ tbz x7, #SVCR_SM_SHIFT, check_sve_in
+
+ // Load FFR if we have FA64
+ ubfx x4, x0, #HAVE_FA64_SHIFT, #1
+ b load_sve
+
+ // SVE?
+check_sve_in:
+ tbz x0, #HAVE_SVE_SHIFT, check_fpmr_in
+ mov x4, #1
+
+load_sve:
+ ldr x7, =z_in
+ ldr z0, [x7, #0, MUL VL]
+ ldr z1, [x7, #1, MUL VL]
+ ldr z2, [x7, #2, MUL VL]
+ ldr z3, [x7, #3, MUL VL]
+ ldr z4, [x7, #4, MUL VL]
+ ldr z5, [x7, #5, MUL VL]
+ ldr z6, [x7, #6, MUL VL]
+ ldr z7, [x7, #7, MUL VL]
+ ldr z8, [x7, #8, MUL VL]
+ ldr z9, [x7, #9, MUL VL]
+ ldr z10, [x7, #10, MUL VL]
+ ldr z11, [x7, #11, MUL VL]
+ ldr z12, [x7, #12, MUL VL]
+ ldr z13, [x7, #13, MUL VL]
+ ldr z14, [x7, #14, MUL VL]
+ ldr z15, [x7, #15, MUL VL]
+ ldr z16, [x7, #16, MUL VL]
+ ldr z17, [x7, #17, MUL VL]
+ ldr z18, [x7, #18, MUL VL]
+ ldr z19, [x7, #19, MUL VL]
+ ldr z20, [x7, #20, MUL VL]
+ ldr z21, [x7, #21, MUL VL]
+ ldr z22, [x7, #22, MUL VL]
+ ldr z23, [x7, #23, MUL VL]
+ ldr z24, [x7, #24, MUL VL]
+ ldr z25, [x7, #25, MUL VL]
+ ldr z26, [x7, #26, MUL VL]
+ ldr z27, [x7, #27, MUL VL]
+ ldr z28, [x7, #28, MUL VL]
+ ldr z29, [x7, #29, MUL VL]
+ ldr z30, [x7, #30, MUL VL]
+ ldr z31, [x7, #31, MUL VL]
+
+ // FFR is not present in base SME
+ cbz x4, 1f
+ ldr x7, =ffr_in
+ ldr p0, [x7]
+ ldr x7, [x7, #0]
+ cbz x7, 1f
+ wrffr p0.b
+1:
+
+ ldr x7, =p_in
+ ldr p0, [x7, #0, MUL VL]
+ ldr p1, [x7, #1, MUL VL]
+ ldr p2, [x7, #2, MUL VL]
+ ldr p3, [x7, #3, MUL VL]
+ ldr p4, [x7, #4, MUL VL]
+ ldr p5, [x7, #5, MUL VL]
+ ldr p6, [x7, #6, MUL VL]
+ ldr p7, [x7, #7, MUL VL]
+ ldr p8, [x7, #8, MUL VL]
+ ldr p9, [x7, #9, MUL VL]
+ ldr p10, [x7, #10, MUL VL]
+ ldr p11, [x7, #11, MUL VL]
+ ldr p12, [x7, #12, MUL VL]
+ ldr p13, [x7, #13, MUL VL]
+ ldr p14, [x7, #14, MUL VL]
+ ldr p15, [x7, #15, MUL VL]
+
+ // This has to come after we set PSTATE.SM
+check_fpmr_in:
+ tbz x0, #HAVE_FPMR_SHIFT, wait_for_writes
+ adrp x7, fpmr_in
+ ldr x7, [x7, :lo12:fpmr_in]
+ msr REG_FPMR, x7
+
+wait_for_writes:
+ // Wait for the parent
+ brk #0
+
+ // Save values
+ ldr x7, =v_out
+ stp q0, q1, [x7]
+ stp q2, q3, [x7, #16 * 2]
+ stp q4, q5, [x7, #16 * 4]
+ stp q6, q7, [x7, #16 * 6]
+ stp q8, q9, [x7, #16 * 8]
+ stp q10, q11, [x7, #16 * 10]
+ stp q12, q13, [x7, #16 * 12]
+ stp q14, q15, [x7, #16 * 14]
+ stp q16, q17, [x7, #16 * 16]
+ stp q18, q19, [x7, #16 * 18]
+ stp q20, q21, [x7, #16 * 20]
+ stp q22, q23, [x7, #16 * 22]
+ stp q24, q25, [x7, #16 * 24]
+ stp q26, q27, [x7, #16 * 26]
+ stp q28, q29, [x7, #16 * 28]
+ stp q30, q31, [x7, #16 * 30]
+
+ tbz x0, #HAVE_FPMR_SHIFT, check_sme_out
+ mrs x7, REG_FPMR
+ adrp x6, fpmr_out
+ str x7, [x6, :lo12:fpmr_out]
+
+check_sme_out:
+ tbz x0, #HAVE_SME_SHIFT, check_sve_out
+
+ rdsvl 11, 1
+ adrp x6, sme_vl_out
+ str x11, [x6, :lo12:sme_vl_out]
+
+ mrs x7, S3_3_C4_C2_2
+ adrp x6, svcr_out
+ str x7, [x6, :lo12:svcr_out]
+
+ // ZA?
+ tbz x7, #SVCR_ZA_SHIFT, check_sm_out
+ mov w12, #0
+ ldr x6, =za_out
+1: _str_za 12, 6
+ add x6, x6, x11
+ add x12, x12, #1
+ cmp x11, x12
+ bne 1b
+
+ // ZT?
+ tbz x0, #HAVE_SME2_SHIFT, check_sm_out
+ adrp x6, zt_out
+ add x6, x6, :lo12:zt_out
+ _str_zt 6
+
+ // In streaming mode?
+check_sm_out:
+ tbz x7, #SVCR_SM_SHIFT, check_sve_out
+
+ // Do we have FA64 and FFR?
+ ubfx x4, x0, #HAVE_FA64_SHIFT, #1
+ b read_sve
+
+ // SVE?
+check_sve_out:
+ tbz x0, #HAVE_SVE_SHIFT, wait_for_reads
+ mov x4, #1
+
+ rdvl x7, #1
+ adrp x6, sve_vl_out
+ str x7, [x6, :lo12:sve_vl_out]
+
+read_sve:
+ ldr x7, =z_out
+ str z0, [x7, #0, MUL VL]
+ str z1, [x7, #1, MUL VL]
+ str z2, [x7, #2, MUL VL]
+ str z3, [x7, #3, MUL VL]
+ str z4, [x7, #4, MUL VL]
+ str z5, [x7, #5, MUL VL]
+ str z6, [x7, #6, MUL VL]
+ str z7, [x7, #7, MUL VL]
+ str z8, [x7, #8, MUL VL]
+ str z9, [x7, #9, MUL VL]
+ str z10, [x7, #10, MUL VL]
+ str z11, [x7, #11, MUL VL]
+ str z12, [x7, #12, MUL VL]
+ str z13, [x7, #13, MUL VL]
+ str z14, [x7, #14, MUL VL]
+ str z15, [x7, #15, MUL VL]
+ str z16, [x7, #16, MUL VL]
+ str z17, [x7, #17, MUL VL]
+ str z18, [x7, #18, MUL VL]
+ str z19, [x7, #19, MUL VL]
+ str z20, [x7, #20, MUL VL]
+ str z21, [x7, #21, MUL VL]
+ str z22, [x7, #22, MUL VL]
+ str z23, [x7, #23, MUL VL]
+ str z24, [x7, #24, MUL VL]
+ str z25, [x7, #25, MUL VL]
+ str z26, [x7, #26, MUL VL]
+ str z27, [x7, #27, MUL VL]
+ str z28, [x7, #28, MUL VL]
+ str z29, [x7, #29, MUL VL]
+ str z30, [x7, #30, MUL VL]
+ str z31, [x7, #31, MUL VL]
+
+ ldr x7, =p_out
+ str p0, [x7, #0, MUL VL]
+ str p1, [x7, #1, MUL VL]
+ str p2, [x7, #2, MUL VL]
+ str p3, [x7, #3, MUL VL]
+ str p4, [x7, #4, MUL VL]
+ str p5, [x7, #5, MUL VL]
+ str p6, [x7, #6, MUL VL]
+ str p7, [x7, #7, MUL VL]
+ str p8, [x7, #8, MUL VL]
+ str p9, [x7, #9, MUL VL]
+ str p10, [x7, #10, MUL VL]
+ str p11, [x7, #11, MUL VL]
+ str p12, [x7, #12, MUL VL]
+ str p13, [x7, #13, MUL VL]
+ str p14, [x7, #14, MUL VL]
+ str p15, [x7, #15, MUL VL]
+
+ // Only save FFR if it exists
+ cbz x4, wait_for_reads
+ ldr x7, =ffr_out
+ rdffr p0.b
+ str p0, [x7]
+
+wait_for_reads:
+ // Wait for the parent
+ brk #0
+
+ // Ensure we don't leave ourselves in streaming mode
+ tbz x0, #HAVE_SME_SHIFT, out
+ msr S3_3_C4_C2_2, xzr
+
+out:
+ ldp x11, x12, [sp, #-0x10]
+ ret
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
new file mode 100644
index 000000000000..22c584b78be5
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -0,0 +1,1692 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+
+#include <linux/kernel.h>
+
+#include <asm/sigcontext.h>
+#include <asm/sve_context.h>
+#include <asm/ptrace.h>
+
+#include "kselftest.h"
+
+#include "fp-ptrace.h"
+
+#include <linux/bits.h>
+
+#define FPMR_LSCALE2_MASK GENMASK(37, 32)
+#define FPMR_NSCALE_MASK GENMASK(31, 24)
+#define FPMR_LSCALE_MASK GENMASK(22, 16)
+#define FPMR_OSC_MASK GENMASK(15, 15)
+#define FPMR_OSM_MASK GENMASK(14, 14)
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+#ifndef NT_ARM_ZT
+#define NT_ARM_ZT 0x40d
+#endif
+
+#ifndef NT_ARM_FPMR
+#define NT_ARM_FPMR 0x40e
+#endif
+
+#define ARCH_VQ_MAX 256
+
+/* VL 128..2048 in powers of 2 */
+#define MAX_NUM_VLS 5
+
+/*
+ * FPMR bits we can set without doing feature checks to see if values
+ * are valid.
+ */
+#define FPMR_SAFE_BITS (FPMR_LSCALE2_MASK | FPMR_NSCALE_MASK | \
+ FPMR_LSCALE_MASK | FPMR_OSC_MASK | FPMR_OSM_MASK)
+
+#define NUM_FPR 32
+__uint128_t v_in[NUM_FPR];
+__uint128_t v_expected[NUM_FPR];
+__uint128_t v_out[NUM_FPR];
+
+char z_in[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+char z_expected[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+char z_out[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+
+char p_in[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+char p_expected[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+char p_out[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+
+char ffr_in[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+char ffr_expected[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+char ffr_out[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+
+char za_in[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+char za_expected[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+char za_out[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+
+char zt_in[ZT_SIG_REG_BYTES];
+char zt_expected[ZT_SIG_REG_BYTES];
+char zt_out[ZT_SIG_REG_BYTES];
+
+uint64_t fpmr_in, fpmr_expected, fpmr_out;
+
+uint64_t sve_vl_out;
+uint64_t sme_vl_out;
+uint64_t svcr_in, svcr_expected, svcr_out;
+
+void load_and_save(int flags);
+
+static bool got_alarm;
+
+static void handle_alarm(int sig, siginfo_t *info, void *context)
+{
+ got_alarm = true;
+}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+ u64 a = swab64(x);
+ u64 b = swab64(x >> 64);
+
+ return ((__uint128_t)a << 64) | b;
+}
+#else
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+ return x;
+}
+#endif
+
+#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
+
+static bool sve_supported(void)
+{
+ return getauxval(AT_HWCAP) & HWCAP_SVE;
+}
+
+static bool sme_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+static bool sme2_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME2;
+}
+
+static bool fa64_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME_FA64;
+}
+
+static bool fpmr_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_FPMR;
+}
+
+static bool compare_buffer(const char *name, void *out,
+ void *expected, size_t size)
+{
+ void *tmp;
+
+ if (memcmp(out, expected, size) == 0)
+ return true;
+
+ ksft_print_msg("Mismatch in %s\n", name);
+
+ /* Did we just get zeros back? */
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_print_msg("OOM allocating %lu bytes for %s\n",
+ size, name);
+ ksft_exit_fail();
+ }
+ memset(tmp, 0, size);
+
+ if (memcmp(out, tmp, size) == 0)
+ ksft_print_msg("%s is zero\n", name);
+
+ free(tmp);
+
+ return false;
+}
+
+struct test_config {
+ int sve_vl_in;
+ int sve_vl_expected;
+ int sme_vl_in;
+ int sme_vl_expected;
+ int svcr_in;
+ int svcr_expected;
+};
+
+struct test_definition {
+ const char *name;
+ bool sve_vl_change;
+ bool (*supported)(struct test_config *config);
+ void (*set_expected_values)(struct test_config *config);
+ void (*modify_values)(pid_t child, struct test_config *test_config);
+};
+
+static int vl_in(struct test_config *config)
+{
+ int vl;
+
+ if (config->svcr_in & SVCR_SM)
+ vl = config->sme_vl_in;
+ else
+ vl = config->sve_vl_in;
+
+ return vl;
+}
+
+static int vl_expected(struct test_config *config)
+{
+ int vl;
+
+ if (config->svcr_expected & SVCR_SM)
+ vl = config->sme_vl_expected;
+ else
+ vl = config->sve_vl_expected;
+
+ return vl;
+}
+
+static void run_child(struct test_config *config)
+{
+ int ret, flags;
+
+ /* Let the parent attach to us */
+ ret = ptrace(PTRACE_TRACEME, 0, 0, 0);
+ if (ret < 0)
+ ksft_exit_fail_msg("PTRACE_TRACEME failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* VL setup */
+ if (sve_supported()) {
+ ret = prctl(PR_SVE_SET_VL, config->sve_vl_in);
+ if (ret != config->sve_vl_in) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ config->sve_vl_in, ret);
+ }
+ }
+
+ if (sme_supported()) {
+ ret = prctl(PR_SME_SET_VL, config->sme_vl_in);
+ if (ret != config->sme_vl_in) {
+ ksft_print_msg("Failed to set SME VL %d: %d\n",
+ config->sme_vl_in, ret);
+ }
+ }
+
+ /* Load values and wait for the parent */
+ flags = 0;
+ if (sve_supported())
+ flags |= HAVE_SVE;
+ if (sme_supported())
+ flags |= HAVE_SME;
+ if (sme2_supported())
+ flags |= HAVE_SME2;
+ if (fa64_supported())
+ flags |= HAVE_FA64;
+ if (fpmr_supported())
+ flags |= HAVE_FPMR;
+
+ load_and_save(flags);
+
+ exit(0);
+}
+
+static void read_one_child_regs(pid_t child, char *name,
+ struct iovec *iov_parent,
+ struct iovec *iov_child)
+{
+ int len = iov_parent->iov_len;
+ int ret;
+
+ ret = process_vm_readv(child, iov_parent, 1, iov_child, 1, 0);
+ if (ret == -1)
+ ksft_print_msg("%s read failed: %s (%d)\n",
+ name, strerror(errno), errno);
+ else if (ret != len)
+ ksft_print_msg("Short read of %s: %d\n", name, ret);
+}
+
+static void read_child_regs(pid_t child)
+{
+ struct iovec iov_parent, iov_child;
+
+ /*
+ * Since the child fork()ed from us the buffer addresses are
+ * the same in parent and child.
+ */
+ iov_parent.iov_base = &v_out;
+ iov_parent.iov_len = sizeof(v_out);
+ iov_child.iov_base = &v_out;
+ iov_child.iov_len = sizeof(v_out);
+ read_one_child_regs(child, "FPSIMD", &iov_parent, &iov_child);
+
+ if (sve_supported() || sme_supported()) {
+ iov_parent.iov_base = &sve_vl_out;
+ iov_parent.iov_len = sizeof(sve_vl_out);
+ iov_child.iov_base = &sve_vl_out;
+ iov_child.iov_len = sizeof(sve_vl_out);
+ read_one_child_regs(child, "SVE VL", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &z_out;
+ iov_parent.iov_len = sizeof(z_out);
+ iov_child.iov_base = &z_out;
+ iov_child.iov_len = sizeof(z_out);
+ read_one_child_regs(child, "Z", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &p_out;
+ iov_parent.iov_len = sizeof(p_out);
+ iov_child.iov_base = &p_out;
+ iov_child.iov_len = sizeof(p_out);
+ read_one_child_regs(child, "P", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &ffr_out;
+ iov_parent.iov_len = sizeof(ffr_out);
+ iov_child.iov_base = &ffr_out;
+ iov_child.iov_len = sizeof(ffr_out);
+ read_one_child_regs(child, "FFR", &iov_parent, &iov_child);
+ }
+
+ if (sme_supported()) {
+ iov_parent.iov_base = &sme_vl_out;
+ iov_parent.iov_len = sizeof(sme_vl_out);
+ iov_child.iov_base = &sme_vl_out;
+ iov_child.iov_len = sizeof(sme_vl_out);
+ read_one_child_regs(child, "SME VL", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &svcr_out;
+ iov_parent.iov_len = sizeof(svcr_out);
+ iov_child.iov_base = &svcr_out;
+ iov_child.iov_len = sizeof(svcr_out);
+ read_one_child_regs(child, "SVCR", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &za_out;
+ iov_parent.iov_len = sizeof(za_out);
+ iov_child.iov_base = &za_out;
+ iov_child.iov_len = sizeof(za_out);
+ read_one_child_regs(child, "ZA", &iov_parent, &iov_child);
+ }
+
+ if (sme2_supported()) {
+ iov_parent.iov_base = &zt_out;
+ iov_parent.iov_len = sizeof(zt_out);
+ iov_child.iov_base = &zt_out;
+ iov_child.iov_len = sizeof(zt_out);
+ read_one_child_regs(child, "ZT", &iov_parent, &iov_child);
+ }
+
+ if (fpmr_supported()) {
+ iov_parent.iov_base = &fpmr_out;
+ iov_parent.iov_len = sizeof(fpmr_out);
+ iov_child.iov_base = &fpmr_out;
+ iov_child.iov_len = sizeof(fpmr_out);
+ read_one_child_regs(child, "FPMR", &iov_parent, &iov_child);
+ }
+}
+
+static bool continue_breakpoint(pid_t child,
+ enum __ptrace_request restart_type)
+{
+ struct user_pt_regs pt_regs;
+ struct iovec iov;
+ int ret;
+
+ /* Get PC */
+ iov.iov_base = &pt_regs;
+ iov.iov_len = sizeof(pt_regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov);
+ if (ret < 0) {
+ ksft_print_msg("Failed to get PC: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ /* Skip over the BRK */
+ pt_regs.pc += 4;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, &iov);
+ if (ret < 0) {
+ ksft_print_msg("Failed to skip BRK: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ /* Restart */
+ ret = ptrace(restart_type, child, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("Failed to restart child: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_ptrace_values_sve(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sve_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sve_vl_in);
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte SVE buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial SVE: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ sve = iov.iov_base;
+
+ if (sve->vl != config->sve_vl_in) {
+ ksft_print_msg("Mismatch in initial SVE VL: %d != %d\n",
+ sve->vl, config->sve_vl_in);
+ pass = false;
+ }
+
+ /* If we are in streaming mode we should just read FPSIMD */
+ if ((config->svcr_in & SVCR_SM) && (sve->flags & SVE_PT_REGS_SVE)) {
+ ksft_print_msg("NT_ARM_SVE reports SVE with PSTATE.SM\n");
+ pass = false;
+ }
+
+ if (svcr_in & SVCR_SM) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SVE reports data with PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
+ }
+
+ /* The registers might be in completely different formats! */
+ if (sve->flags & SVE_PT_REGS_SVE) {
+ if (!compare_buffer("initial SVE Z",
+ iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_in, SVE_PT_SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SVE P",
+ iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_in, SVE_PT_SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SVE FFR",
+ iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_in, SVE_PT_SVE_PREG_SIZE(vq)))
+ pass = false;
+ } else {
+ fpsimd = iov.iov_base + SVE_PT_FPSIMD_OFFSET;
+ if (!compare_buffer("initial V via SVE", &fpsimd->vregs[0],
+ v_in, sizeof(v_in)))
+ pass = false;
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_ssve(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sme_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte SSVE buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_SSVE, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial SSVE: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ sve = iov.iov_base;
+
+ if (sve->vl != config->sme_vl_in) {
+ ksft_print_msg("Mismatch in initial SSVE VL: %d != %d\n",
+ sve->vl, config->sme_vl_in);
+ pass = false;
+ }
+
+ if ((config->svcr_in & SVCR_SM) && !(sve->flags & SVE_PT_REGS_SVE)) {
+ ksft_print_msg("NT_ARM_SSVE reports FPSIMD with PSTATE.SM\n");
+ pass = false;
+ }
+
+ if (!(svcr_in & SVCR_SM)) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SSVE reports data without PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
+ }
+
+ /* The registers might be in completely different formats! */
+ if (sve->flags & SVE_PT_REGS_SVE) {
+ if (!compare_buffer("initial SSVE Z",
+ iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_in, SVE_PT_SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SSVE P",
+ iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_in, SVE_PT_SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SSVE FFR",
+ iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_in, SVE_PT_SVE_PREG_SIZE(vq)))
+ pass = false;
+ } else {
+ fpsimd = iov.iov_base + SVE_PT_FPSIMD_OFFSET;
+ if (!compare_buffer("initial V via SSVE",
+ &fpsimd->vregs[0], v_in, sizeof(v_in)))
+ pass = false;
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_za(pid_t child, struct test_config *config)
+{
+ struct user_za_header *za;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sme_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ iov.iov_len = ZA_SIG_CONTEXT_SIZE(vq);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte ZA buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_ZA, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial ZA: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ za = iov.iov_base;
+
+ if (za->vl != config->sme_vl_in) {
+ ksft_print_msg("Mismatch in initial SME VL: %d != %d\n",
+ za->vl, config->sme_vl_in);
+ pass = false;
+ }
+
+ /* If PSTATE.ZA is not set we should just read the header */
+ if (config->svcr_in & SVCR_ZA) {
+ if (za->size != ZA_PT_SIZE(vq)) {
+ ksft_print_msg("Unexpected ZA ptrace read size: %d != %lu\n",
+ za->size, ZA_PT_SIZE(vq));
+ pass = false;
+ }
+
+ if (!compare_buffer("initial ZA",
+ iov.iov_base + ZA_PT_ZA_OFFSET,
+ za_in, ZA_PT_ZA_SIZE(vq)))
+ pass = false;
+ } else {
+ if (za->size != sizeof(*za)) {
+ ksft_print_msg("Unexpected ZA ptrace read size: %d != %lu\n",
+ za->size, sizeof(*za));
+ pass = false;
+ }
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_zt(pid_t child, struct test_config *config)
+{
+ uint8_t buf[512];
+ struct iovec iov;
+ int ret;
+
+ if (!sme2_supported())
+ return true;
+
+ iov.iov_base = &buf;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_ZT, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial ZT: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return compare_buffer("initial ZT", buf, zt_in, ZT_SIG_REG_BYTES);
+}
+
+static bool check_ptrace_values_fpmr(pid_t child, struct test_config *config)
+{
+ uint64_t val;
+ struct iovec iov;
+ int ret;
+
+ if (!fpmr_supported())
+ return true;
+
+ iov.iov_base = &val;
+ iov.iov_len = sizeof(val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_FPMR, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial FPMR: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return compare_buffer("initial FPMR", &val, &fpmr_in, sizeof(val));
+}
+
+static bool check_ptrace_values(pid_t child, struct test_config *config)
+{
+ bool pass = true;
+ struct user_fpsimd_state fpsimd;
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = &fpsimd;
+ iov.iov_len = sizeof(fpsimd);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PRFPREG, &iov);
+ if (ret == 0) {
+ if (!compare_buffer("initial V", &fpsimd.vregs, v_in,
+ sizeof(v_in))) {
+ pass = false;
+ }
+ } else {
+ ksft_print_msg("Failed to read initial V: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ }
+
+ if (!check_ptrace_values_sve(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_ssve(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_za(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_zt(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_fpmr(child, config))
+ pass = false;
+
+ return pass;
+}
+
+static bool run_parent(pid_t child, struct test_definition *test,
+ struct test_config *config)
+{
+ int wait_status, ret;
+ pid_t pid;
+ bool pass;
+
+ /* Initial attach */
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+ if (WIFEXITED(wait_status)) {
+ ksft_print_msg("Child exited loading values with status %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d loading values\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ /* Read initial values via ptrace */
+ pass = check_ptrace_values(child, config);
+
+ /* Do whatever writes we want to do */
+ if (test->modify_values)
+ test->modify_values(child, config);
+
+ if (!continue_breakpoint(child, PTRACE_CONT))
+ goto cleanup;
+
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+ if (WIFEXITED(wait_status)) {
+ ksft_print_msg("Child exited saving values with status %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d saving values\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ /* See what happened as a result */
+ read_child_regs(child);
+
+ if (!continue_breakpoint(child, PTRACE_DETACH))
+ goto cleanup;
+
+ /* The child should exit cleanly */
+ got_alarm = false;
+ alarm(1);
+ while (1) {
+ if (got_alarm) {
+ ksft_print_msg("Wait for child timed out\n");
+ goto cleanup;
+ }
+
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+ alarm(0);
+
+ if (got_alarm) {
+ ksft_print_msg("Timed out waiting for child\n");
+ pass = false;
+ goto cleanup;
+ }
+
+ if (pid == child && WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d cleaning up\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (pid == child && WIFEXITED(wait_status)) {
+ if (WEXITSTATUS(wait_status) != 0) {
+ ksft_print_msg("Child exited with error %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ }
+ } else {
+ ksft_print_msg("Child did not exit cleanly\n");
+ pass = false;
+ goto cleanup;
+ }
+
+ goto out;
+
+cleanup:
+ ret = kill(child, SIGKILL);
+ if (ret != 0) {
+ ksft_print_msg("kill() failed: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+out:
+ return pass;
+}
+
+static void fill_random(void *buf, size_t size)
+{
+ int i;
+ uint32_t *lbuf = buf;
+
+ /* random() returns a 32 bit number regardless of the size of long */
+ for (i = 0; i < size / sizeof(uint32_t); i++)
+ lbuf[i] = random();
+}
+
+static void fill_random_ffr(void *buf, size_t vq)
+{
+ uint8_t *lbuf = buf;
+ int bits, i;
+
+ /*
+ * Only values with a continuous set of 0..n bits set are
+ * valid for FFR, set all bits then clear a random number of
+ * high bits.
+ */
+ memset(buf, 0, __SVE_FFR_SIZE(vq));
+
+ bits = random() % (__SVE_FFR_SIZE(vq) * 8);
+ for (i = 0; i < bits / 8; i++)
+ lbuf[i] = 0xff;
+ if (bits / 8 != __SVE_FFR_SIZE(vq))
+ lbuf[i] = (1 << (bits % 8)) - 1;
+}
+
+static void fpsimd_to_sve(__uint128_t *v, char *z, int vl)
+{
+ int vq = __sve_vq_from_vl(vl);
+ int i;
+ __uint128_t *p;
+
+ if (!vl)
+ return;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ p = (__uint128_t *)&z[__SVE_ZREG_OFFSET(vq, i)];
+ *p = arm64_cpu_to_le128(v[i]);
+ }
+}
+
+static void set_initial_values(struct test_config *config)
+{
+ int vq = __sve_vq_from_vl(vl_in(config));
+ int sme_vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ svcr_in = config->svcr_in;
+ svcr_expected = config->svcr_expected;
+ svcr_out = 0;
+
+ fill_random(&v_in, sizeof(v_in));
+ memcpy(v_expected, v_in, sizeof(v_in));
+ memset(v_out, 0, sizeof(v_out));
+
+ /* Changes will be handled in the test case */
+ if (sve_supported() || (config->svcr_in & SVCR_SM)) {
+ /* The low 128 bits of Z are shared with the V registers */
+ fill_random(&z_in, __SVE_ZREGS_SIZE(vq));
+ fpsimd_to_sve(v_in, z_in, vl_in(config));
+ memcpy(z_expected, z_in, __SVE_ZREGS_SIZE(vq));
+ memset(z_out, 0, sizeof(z_out));
+
+ fill_random(&p_in, __SVE_PREGS_SIZE(vq));
+ memcpy(p_expected, p_in, __SVE_PREGS_SIZE(vq));
+ memset(p_out, 0, sizeof(p_out));
+
+ if ((config->svcr_in & SVCR_SM) && !fa64_supported())
+ memset(ffr_in, 0, __SVE_PREG_SIZE(vq));
+ else
+ fill_random_ffr(&ffr_in, vq);
+ memcpy(ffr_expected, ffr_in, __SVE_PREG_SIZE(vq));
+ memset(ffr_out, 0, __SVE_PREG_SIZE(vq));
+ }
+
+ if (config->svcr_in & SVCR_ZA)
+ fill_random(za_in, ZA_SIG_REGS_SIZE(sme_vq));
+ else
+ memset(za_in, 0, ZA_SIG_REGS_SIZE(sme_vq));
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(za_expected, za_in, ZA_SIG_REGS_SIZE(sme_vq));
+ else
+ memset(za_expected, 0, ZA_SIG_REGS_SIZE(sme_vq));
+ if (sme_supported())
+ memset(za_out, 0, sizeof(za_out));
+
+ if (sme2_supported()) {
+ if (config->svcr_in & SVCR_ZA)
+ fill_random(zt_in, ZT_SIG_REG_BYTES);
+ else
+ memset(zt_in, 0, ZT_SIG_REG_BYTES);
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(zt_expected, zt_in, ZT_SIG_REG_BYTES);
+ else
+ memset(zt_expected, 0, ZT_SIG_REG_BYTES);
+ memset(zt_out, 0, sizeof(zt_out));
+ }
+
+ if (fpmr_supported()) {
+ fill_random(&fpmr_in, sizeof(fpmr_in));
+ fpmr_in &= FPMR_SAFE_BITS;
+ fpmr_expected = fpmr_in;
+ } else {
+ fpmr_in = 0;
+ fpmr_expected = 0;
+ fpmr_out = 0;
+ }
+}
+
+static bool check_memory_values(struct test_config *config)
+{
+ bool pass = true;
+ int vq, sme_vq;
+
+ if (!compare_buffer("saved V", v_out, v_expected, sizeof(v_out)))
+ pass = false;
+
+ vq = __sve_vq_from_vl(vl_expected(config));
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (svcr_out != svcr_expected) {
+ ksft_print_msg("Mismatch in saved SVCR %lx != %lx\n",
+ svcr_out, svcr_expected);
+ pass = false;
+ }
+
+ if (sve_vl_out != config->sve_vl_expected) {
+ ksft_print_msg("Mismatch in SVE VL: %ld != %d\n",
+ sve_vl_out, config->sve_vl_expected);
+ pass = false;
+ }
+
+ if (sme_vl_out != config->sme_vl_expected) {
+ ksft_print_msg("Mismatch in SME VL: %ld != %d\n",
+ sme_vl_out, config->sme_vl_expected);
+ pass = false;
+ }
+
+ if (!compare_buffer("saved Z", z_out, z_expected,
+ __SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved P", p_out, p_expected,
+ __SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved FFR", ffr_out, ffr_expected,
+ __SVE_PREG_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved ZA", za_out, za_expected,
+ ZA_PT_ZA_SIZE(sme_vq)))
+ pass = false;
+
+ if (!compare_buffer("saved ZT", zt_out, zt_expected, ZT_SIG_REG_BYTES))
+ pass = false;
+
+ if (fpmr_out != fpmr_expected) {
+ ksft_print_msg("Mismatch in saved FPMR: %lx != %lx\n",
+ fpmr_out, fpmr_expected);
+ pass = false;
+ }
+
+ return pass;
+}
+
+static bool sve_sme_same(struct test_config *config)
+{
+ if (config->sve_vl_in != config->sve_vl_expected)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ if (config->svcr_in != config->svcr_expected)
+ return false;
+
+ return true;
+}
+
+static bool sve_write_supported(struct test_config *config)
+{
+ if (!sve_supported() && !sme_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM) {
+ if (config->sve_vl_in != config->sve_vl_expected) {
+ return false;
+ }
+
+ /* Changing the SME VL disables ZA */
+ if ((config->svcr_expected & SVCR_ZA) &&
+ (config->sme_vl_in != config->sme_vl_expected)) {
+ return false;
+ }
+ } else {
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ return false;
+ }
+
+ if (!sve_supported())
+ return false;
+ }
+
+ return true;
+}
+
+static bool sve_write_fpsimd_supported(struct test_config *config)
+{
+ if (!sve_supported() && !sme_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ return true;
+}
+
+static void fpsimd_write_expected(struct test_config *config)
+{
+ int vl;
+
+ fill_random(&v_expected, sizeof(v_expected));
+
+ /* The SVE registers are flushed by a FPSIMD write */
+ vl = vl_expected(config);
+
+ memset(z_expected, 0, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
+ memset(p_expected, 0, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(__sve_vq_from_vl(vl)));
+
+ fpsimd_to_sve(v_expected, z_expected, vl);
+}
+
+static void fpsimd_write(pid_t child, struct test_config *test_config)
+{
+ struct user_fpsimd_state fpsimd;
+ struct iovec iov;
+ int ret;
+
+ memset(&fpsimd, 0, sizeof(fpsimd));
+ memcpy(&fpsimd.vregs, v_expected, sizeof(v_expected));
+
+ iov.iov_base = &fpsimd;
+ iov.iov_len = sizeof(fpsimd);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_print_msg("FPSIMD set failed: (%s) %d\n",
+ strerror(errno), errno);
+}
+
+static bool fpmr_write_supported(struct test_config *config)
+{
+ if (!fpmr_supported())
+ return false;
+
+ if (!sve_sme_same(config))
+ return false;
+
+ return true;
+}
+
+static void fpmr_write_expected(struct test_config *config)
+{
+ fill_random(&fpmr_expected, sizeof(fpmr_expected));
+ fpmr_expected &= FPMR_SAFE_BITS;
+}
+
+static void fpmr_write(pid_t child, struct test_config *config)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_len = sizeof(fpmr_expected);
+ iov.iov_base = &fpmr_expected;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_FPMR, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write FPMR: %s (%d)\n",
+ strerror(errno), errno);
+}
+
+static void sve_write_expected(struct test_config *config)
+{
+ int vl = vl_expected(config);
+ int sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (!vl)
+ return;
+
+ fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
+ fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
+
+ if ((svcr_expected & SVCR_SM) && !fa64_supported())
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(sme_vq));
+ else
+ fill_random_ffr(ffr_expected, __sve_vq_from_vl(vl));
+
+ /* Share the low bits of Z with V */
+ fill_random(&v_expected, sizeof(v_expected));
+ fpsimd_to_sve(v_expected, z_expected, vl);
+
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+}
+
+static void sve_write_sve(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct iovec iov;
+ int ret, vl, vq, regset;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ if (!vl)
+ return;
+
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_SVE;
+ sve->vl = vl;
+
+ memcpy(iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_expected, SVE_PT_SVE_ZREGS_SIZE(vq));
+ memcpy(iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_expected, SVE_PT_SVE_PREGS_SIZE(vq));
+ memcpy(iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_expected, SVE_PT_SVE_PREG_SIZE(vq));
+
+ if (svcr_expected & SVCR_SM)
+ regset = NT_ARM_SSVE;
+ else
+ regset = NT_ARM_SVE;
+
+ ret = ptrace(PTRACE_SETREGSET, child, regset, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static void sve_write_fpsimd(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vl, vq;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->vl = vl;
+
+ fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET;
+ memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static bool za_write_supported(struct test_config *config)
+{
+ if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
+ return false;
+
+ return true;
+}
+
+static void za_write_expected(struct test_config *config)
+{
+ int sme_vq, sve_vq;
+
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA) {
+ fill_random(za_expected, ZA_PT_ZA_SIZE(sme_vq));
+ } else {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+
+ /* Changing the SME VL flushes ZT, SVE state */
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ sve_vq = __sve_vq_from_vl(vl_expected(config));
+ memset(z_expected, 0, __SVE_ZREGS_SIZE(sve_vq));
+ memset(p_expected, 0, __SVE_PREGS_SIZE(sve_vq));
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(sve_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+
+ fpsimd_to_sve(v_expected, z_expected, vl_expected(config));
+ }
+}
+
+static void za_write(pid_t child, struct test_config *config)
+{
+ struct user_za_header *za;
+ struct iovec iov;
+ int ret, vq;
+
+ vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA)
+ iov.iov_len = ZA_PT_SIZE(vq);
+ else
+ iov.iov_len = sizeof(*za);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte ZA write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ za = iov.iov_base;
+ za->size = iov.iov_len;
+ za->vl = config->sme_vl_expected;
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(iov.iov_base + ZA_PT_ZA_OFFSET, za_expected,
+ ZA_PT_ZA_SIZE(vq));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_ZA, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write ZA: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static bool zt_write_supported(struct test_config *config)
+{
+ if (!sme2_supported())
+ return false;
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+ if (!(config->svcr_expected & SVCR_ZA))
+ return false;
+ if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
+ return false;
+
+ return true;
+}
+
+static void zt_write_expected(struct test_config *config)
+{
+ int sme_vq;
+
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA) {
+ fill_random(zt_expected, sizeof(zt_expected));
+ } else {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+}
+
+static void zt_write(pid_t child, struct test_config *config)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ iov.iov_base = zt_expected;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_ZT, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write ZT: %s (%d)\n",
+ strerror(errno), errno);
+}
+
+/* Actually run a test */
+static void run_test(struct test_definition *test, struct test_config *config)
+{
+ pid_t child;
+ char name[1024];
+ bool pass;
+
+ if (sve_supported() && sme_supported())
+ snprintf(name, sizeof(name), "%s, SVE %d->%d, SME %d/%x->%d/%x",
+ test->name,
+ config->sve_vl_in, config->sve_vl_expected,
+ config->sme_vl_in, config->svcr_in,
+ config->sme_vl_expected, config->svcr_expected);
+ else if (sve_supported())
+ snprintf(name, sizeof(name), "%s, SVE %d->%d", test->name,
+ config->sve_vl_in, config->sve_vl_expected);
+ else if (sme_supported())
+ snprintf(name, sizeof(name), "%s, SME %d/%x->%d/%x",
+ test->name,
+ config->sme_vl_in, config->svcr_in,
+ config->sme_vl_expected, config->svcr_expected);
+ else
+ snprintf(name, sizeof(name), "%s", test->name);
+
+ if (test->supported && !test->supported(config)) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ set_initial_values(config);
+
+ if (test->set_expected_values)
+ test->set_expected_values(config);
+
+ child = fork();
+ if (child < 0)
+ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+ strerror(errno), errno);
+ /* run_child() never returns */
+ if (child == 0)
+ run_child(config);
+
+ pass = run_parent(child, test, config);
+ if (!check_memory_values(config))
+ pass = false;
+
+ ksft_test_result(pass, "%s\n", name);
+}
+
+static void run_tests(struct test_definition defs[], int count,
+ struct test_config *config)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ run_test(&defs[i], config);
+}
+
+static struct test_definition base_test_defs[] = {
+ {
+ .name = "No writes",
+ .supported = sve_sme_same,
+ },
+ {
+ .name = "FPSIMD write",
+ .supported = sve_sme_same,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = fpsimd_write,
+ },
+ {
+ .name = "FPMR write",
+ .supported = fpmr_write_supported,
+ .set_expected_values = fpmr_write_expected,
+ .modify_values = fpmr_write,
+ },
+};
+
+static struct test_definition sve_test_defs[] = {
+ {
+ .name = "SVE write",
+ .supported = sve_write_supported,
+ .set_expected_values = sve_write_expected,
+ .modify_values = sve_write_sve,
+ },
+ {
+ .name = "SVE write FPSIMD format",
+ .supported = sve_write_fpsimd_supported,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = sve_write_fpsimd,
+ },
+};
+
+static struct test_definition za_test_defs[] = {
+ {
+ .name = "ZA write",
+ .supported = za_write_supported,
+ .set_expected_values = za_write_expected,
+ .modify_values = za_write,
+ },
+};
+
+static struct test_definition zt_test_defs[] = {
+ {
+ .name = "ZT write",
+ .supported = zt_write_supported,
+ .set_expected_values = zt_write_expected,
+ .modify_values = zt_write,
+ },
+};
+
+static int sve_vls[MAX_NUM_VLS], sme_vls[MAX_NUM_VLS];
+static int sve_vl_count, sme_vl_count;
+
+static void probe_vls(const char *name, int vls[], int *vl_count, int set_vl)
+{
+ unsigned int vq;
+ int vl;
+
+ *vl_count = 0;
+
+ for (vq = ARCH_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(set_vl, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (*vl_count && (vl == vls[*vl_count - 1]))
+ break;
+
+ vq = sve_vq_from_vl(vl);
+
+ vls[*vl_count] = vl;
+ *vl_count += 1;
+ }
+
+ if (*vl_count > 2) {
+ /* Just use the minimum and maximum */
+ vls[1] = vls[*vl_count - 1];
+ ksft_print_msg("%d %s VLs, using %d and %d\n",
+ *vl_count, name, vls[0], vls[1]);
+ *vl_count = 2;
+ } else {
+ ksft_print_msg("%d %s VLs\n", *vl_count, name);
+ }
+}
+
+static struct {
+ int svcr_in, svcr_expected;
+} svcr_combinations[] = {
+ { .svcr_in = 0, .svcr_expected = 0, },
+ { .svcr_in = 0, .svcr_expected = SVCR_SM, },
+ { .svcr_in = 0, .svcr_expected = SVCR_ZA, },
+ /* Can't enable both SM and ZA with a single ptrace write */
+
+ { .svcr_in = SVCR_SM, .svcr_expected = 0, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_SM | SVCR_ZA, },
+
+ { .svcr_in = SVCR_ZA, .svcr_expected = 0, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_SM | SVCR_ZA, },
+
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = 0, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_SM | SVCR_ZA, },
+};
+
+static void run_sve_tests(void)
+{
+ struct test_config test_config;
+ int i, j;
+
+ if (!sve_supported())
+ return;
+
+ test_config.sme_vl_in = sme_vls[0];
+ test_config.sme_vl_expected = sme_vls[0];
+ test_config.svcr_in = 0;
+ test_config.svcr_expected = 0;
+
+ for (i = 0; i < sve_vl_count; i++) {
+ test_config.sve_vl_in = sve_vls[i];
+
+ for (j = 0; j < sve_vl_count; j++) {
+ test_config.sve_vl_expected = sve_vls[j];
+
+ run_tests(base_test_defs,
+ ARRAY_SIZE(base_test_defs),
+ &test_config);
+ if (sve_supported())
+ run_tests(sve_test_defs,
+ ARRAY_SIZE(sve_test_defs),
+ &test_config);
+ }
+ }
+}
+
+static void run_sme_tests(void)
+{
+ struct test_config test_config;
+ int i, j, k;
+
+ if (!sme_supported())
+ return;
+
+ test_config.sve_vl_in = sve_vls[0];
+ test_config.sve_vl_expected = sve_vls[0];
+
+ /*
+ * Every SME VL/SVCR combination
+ */
+ for (i = 0; i < sme_vl_count; i++) {
+ test_config.sme_vl_in = sme_vls[i];
+
+ for (j = 0; j < sme_vl_count; j++) {
+ test_config.sme_vl_expected = sme_vls[j];
+
+ for (k = 0; k < ARRAY_SIZE(svcr_combinations); k++) {
+ test_config.svcr_in = svcr_combinations[k].svcr_in;
+ test_config.svcr_expected = svcr_combinations[k].svcr_expected;
+
+ run_tests(base_test_defs,
+ ARRAY_SIZE(base_test_defs),
+ &test_config);
+ run_tests(sve_test_defs,
+ ARRAY_SIZE(sve_test_defs),
+ &test_config);
+ run_tests(za_test_defs,
+ ARRAY_SIZE(za_test_defs),
+ &test_config);
+
+ if (sme2_supported())
+ run_tests(zt_test_defs,
+ ARRAY_SIZE(zt_test_defs),
+ &test_config);
+ }
+ }
+ }
+}
+
+int main(void)
+{
+ struct test_config test_config;
+ struct sigaction sa;
+ int tests, ret, tmp;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (sve_supported()) {
+ probe_vls("SVE", sve_vls, &sve_vl_count, PR_SVE_SET_VL);
+
+ tests = ARRAY_SIZE(base_test_defs) +
+ ARRAY_SIZE(sve_test_defs);
+ tests *= sve_vl_count * sve_vl_count;
+ } else {
+ /* Only run the FPSIMD tests */
+ sve_vl_count = 1;
+ tests = ARRAY_SIZE(base_test_defs);
+ }
+
+ if (sme_supported()) {
+ probe_vls("SME", sme_vls, &sme_vl_count, PR_SME_SET_VL);
+
+ tmp = ARRAY_SIZE(base_test_defs) + ARRAY_SIZE(sve_test_defs)
+ + ARRAY_SIZE(za_test_defs);
+
+ if (sme2_supported())
+ tmp += ARRAY_SIZE(zt_test_defs);
+
+ tmp *= sme_vl_count * sme_vl_count;
+ tmp *= ARRAY_SIZE(svcr_combinations);
+ tests += tmp;
+ } else {
+ sme_vl_count = 1;
+ }
+
+ if (sme2_supported())
+ ksft_print_msg("SME2 supported\n");
+
+ if (fa64_supported())
+ ksft_print_msg("FA64 supported\n");
+
+ if (fpmr_supported())
+ ksft_print_msg("FPMR supported\n");
+
+ ksft_set_plan(tests);
+
+ /* Get signal handers ready before we start any children */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_alarm;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGALRM, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGALRM handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ /*
+ * Run the test set if there is no SVE or SME, with those we
+ * have to pick a VL for each run.
+ */
+ if (!sve_supported() && !sme_supported()) {
+ test_config.sve_vl_in = 0;
+ test_config.sve_vl_expected = 0;
+ test_config.sme_vl_in = 0;
+ test_config.sme_vl_expected = 0;
+ test_config.svcr_in = 0;
+ test_config.svcr_expected = 0;
+
+ run_tests(base_test_defs, ARRAY_SIZE(base_test_defs),
+ &test_config);
+ }
+
+ run_sve_tests();
+ run_sme_tests();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.h b/tools/testing/selftests/arm64/fp/fp-ptrace.h
new file mode 100644
index 000000000000..c06919aaf1f7
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-3 ARM Limited.
+
+#ifndef FP_PTRACE_H
+#define FP_PTRACE_H
+
+#define SVCR_SM_SHIFT 0
+#define SVCR_ZA_SHIFT 1
+
+#define SVCR_SM (1 << SVCR_SM_SHIFT)
+#define SVCR_ZA (1 << SVCR_ZA_SHIFT)
+
+#define HAVE_SVE_SHIFT 0
+#define HAVE_SME_SHIFT 1
+#define HAVE_SME2_SHIFT 2
+#define HAVE_FA64_SHIFT 3
+#define HAVE_FPMR_SHIFT 4
+
+#define HAVE_SVE (1 << HAVE_SVE_SHIFT)
+#define HAVE_SME (1 << HAVE_SME_SHIFT)
+#define HAVE_SME2 (1 << HAVE_SME2_SHIFT)
+#define HAVE_FA64 (1 << HAVE_FA64_SHIFT)
+#define HAVE_FPMR (1 << HAVE_FPMR_SHIFT)
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
new file mode 100644
index 000000000000..65e01aba96ff
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+#define MAX_VLS 16
+
+#define SIGNAL_INTERVAL_MS 25
+#define LOG_INTERVALS (1000 / SIGNAL_INTERVAL_MS)
+
+struct child_data {
+ char *name, *output;
+ pid_t pid;
+ int stdout;
+ bool output_seen;
+ bool exited;
+ int exit_status;
+};
+
+static int epoll_fd;
+static struct child_data *children;
+static struct epoll_event *evs;
+static int tests;
+static int num_children;
+static bool terminate;
+
+static int startup_pipe[2];
+
+static int num_processors(void)
+{
+ long nproc = sysconf(_SC_NPROCESSORS_CONF);
+ if (nproc < 0) {
+ perror("Unable to read number of processors\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return nproc;
+}
+
+static void child_start(struct child_data *child, const char *program)
+{
+ int ret, pipefd[2], i;
+ struct epoll_event ev;
+
+ ret = pipe(pipefd);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ child->pid = fork();
+ if (child->pid == -1)
+ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ if (!child->pid) {
+ /*
+ * In child, replace stdout with the pipe, errors to
+ * stderr from here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ printf("dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Duplicate the read side of the startup pipe to
+ * FD 3 so we can close everything else.
+ */
+ ret = dup2(startup_pipe[0], 3);
+ if (ret == -1) {
+ printf("dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Very dumb mechanism to clean open FDs other than
+ * stdio. We don't want O_CLOEXEC for the pipes...
+ */
+ for (i = 4; i < 8192; i++)
+ close(i);
+
+ /*
+ * Read from the startup pipe, there should be no data
+ * and we should block until it is closed. We just
+ * carry-on on error since this isn't super critical.
+ */
+ ret = read(3, &i, sizeof(i));
+ if (ret < 0)
+ printf("read(startp pipe) failed: %s (%d)\n",
+ strerror(errno), errno);
+ if (ret > 0)
+ printf("%d bytes of data on startup pipe\n", ret);
+ close(3);
+
+ ret = execl(program, program, NULL);
+ printf("execl(%s) failed: %d (%s)\n",
+ program, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ } else {
+ /*
+ * In parent, remember the child and close our copy of the
+ * write side of stdout.
+ */
+ close(pipefd[1]);
+ child->stdout = pipefd[0];
+ child->output = NULL;
+ child->exited = false;
+ child->output_seen = false;
+
+ ev.events = EPOLLIN | EPOLLHUP;
+ ev.data.ptr = child;
+
+ ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev);
+ if (ret < 0) {
+ ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n",
+ child->name, strerror(errno), errno);
+ }
+ }
+}
+
+static bool child_output_read(struct child_data *child)
+{
+ char read_data[1024];
+ char work[1024];
+ int ret, len, cur_work, cur_read;
+
+ ret = read(child->stdout, read_data, sizeof(read_data));
+ if (ret < 0) {
+ if (errno == EINTR)
+ return true;
+
+ ksft_print_msg("%s: read() failed: %s (%d)\n",
+ child->name, strerror(errno),
+ errno);
+ return false;
+ }
+ len = ret;
+
+ child->output_seen = true;
+
+ /* Pick up any partial read */
+ if (child->output) {
+ strncpy(work, child->output, sizeof(work) - 1);
+ cur_work = strnlen(work, sizeof(work));
+ free(child->output);
+ child->output = NULL;
+ } else {
+ cur_work = 0;
+ }
+
+ cur_read = 0;
+ while (cur_read < len) {
+ work[cur_work] = read_data[cur_read++];
+
+ if (work[cur_work] == '\n') {
+ work[cur_work] = '\0';
+ ksft_print_msg("%s: %s\n", child->name, work);
+ cur_work = 0;
+ } else {
+ cur_work++;
+ }
+ }
+
+ if (cur_work) {
+ work[cur_work] = '\0';
+ ret = asprintf(&child->output, "%s", work);
+ if (ret == -1)
+ ksft_exit_fail_msg("Out of memory\n");
+ }
+
+ return false;
+}
+
+static void child_output(struct child_data *child, uint32_t events,
+ bool flush)
+{
+ bool read_more;
+
+ if (events & EPOLLIN) {
+ do {
+ read_more = child_output_read(child);
+ } while (read_more);
+ }
+
+ if (events & EPOLLHUP) {
+ close(child->stdout);
+ child->stdout = -1;
+ flush = true;
+ }
+
+ if (flush && child->output) {
+ ksft_print_msg("%s: %s<EOF>\n", child->name, child->output);
+ free(child->output);
+ child->output = NULL;
+ }
+}
+
+static void child_tickle(struct child_data *child)
+{
+ if (child->output_seen && !child->exited)
+ kill(child->pid, SIGUSR1);
+}
+
+static void child_stop(struct child_data *child)
+{
+ if (!child->exited)
+ kill(child->pid, SIGTERM);
+}
+
+static void child_cleanup(struct child_data *child)
+{
+ pid_t ret;
+ int status;
+ bool fail = false;
+
+ if (!child->exited) {
+ do {
+ ret = waitpid(child->pid, &status, 0);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret == -1) {
+ ksft_print_msg("waitpid(%d) failed: %s (%d)\n",
+ child->pid, strerror(errno),
+ errno);
+ fail = true;
+ break;
+ }
+ } while (!WIFEXITED(status));
+ child->exit_status = WEXITSTATUS(status);
+ }
+
+ if (!child->output_seen) {
+ ksft_print_msg("%s no output seen\n", child->name);
+ fail = true;
+ }
+
+ if (child->exit_status != 0) {
+ ksft_print_msg("%s exited with error code %d\n",
+ child->name, child->exit_status);
+ fail = true;
+ }
+
+ ksft_test_result(!fail, "%s\n", child->name);
+}
+
+static void handle_child_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ bool found = false;
+
+ for (i = 0; i < num_children; i++) {
+ if (children[i].pid == info->si_pid) {
+ children[i].exited = true;
+ children[i].exit_status = info->si_status;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n",
+ info->si_pid, info->si_status);
+}
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+
+ /* If we're already exiting then don't signal again */
+ if (terminate)
+ return;
+
+ ksft_print_msg("Got signal, exiting...\n");
+
+ terminate = true;
+
+ /*
+ * This should be redundant, the main loop should clean up
+ * after us, but for safety stop everything we can here.
+ */
+ for (i = 0; i < num_children; i++)
+ child_stop(&children[i]);
+}
+
+static void start_fpsimd(struct child_data *child, int cpu, int copy)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./fpsimd-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_kernel(struct child_data *child, int cpu, int copy)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./kernel-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_sve(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = prctl(PR_SVE_SET_VL, vl | PR_SVE_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl);
+
+ ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./sve-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_ssve(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+ child_start(child, "./ssve-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_za(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = prctl(PR_SME_SET_VL, vl | PR_SVE_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+ ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./za-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_zt(struct child_data *child, int cpu)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "ZT-%d", cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./zt-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void probe_vls(int vls[], int *vl_count, int set_vl)
+{
+ unsigned int vq;
+ int vl;
+
+ *vl_count = 0;
+
+ for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(set_vl, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (*vl_count && (vl == vls[*vl_count - 1]))
+ break;
+
+ vq = sve_vq_from_vl(vl);
+
+ vls[*vl_count] = vl;
+ *vl_count += 1;
+ }
+}
+
+/* Handle any pending output without blocking */
+static void drain_output(bool flush)
+{
+ int ret = 1;
+ int i;
+
+ while (ret > 0) {
+ ret = epoll_wait(epoll_fd, evs, tests, 0);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ for (i = 0; i < ret; i++)
+ child_output(evs[i].data.ptr, evs[i].events, flush);
+ }
+}
+
+static const struct option options[] = {
+ { "timeout", required_argument, NULL, 't' },
+ { }
+};
+
+int main(int argc, char **argv)
+{
+ int ret;
+ int timeout = 10 * (1000 / SIGNAL_INTERVAL_MS);
+ int poll_interval = 5000;
+ int cpus, i, j, c;
+ int sve_vl_count, sme_vl_count;
+ bool all_children_started = false;
+ int seen_children;
+ int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
+ bool have_sme2;
+ struct sigaction sa;
+
+ while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
+ switch (c) {
+ case 't':
+ ret = sscanf(optarg, "%d", &timeout);
+ if (ret != 1)
+ ksft_exit_fail_msg("Failed to parse timeout %s\n",
+ optarg);
+ break;
+ default:
+ ksft_exit_fail_msg("Unknown argument\n");
+ }
+ }
+
+ cpus = num_processors();
+ tests = 0;
+
+ if (getauxval(AT_HWCAP) & HWCAP_SVE) {
+ probe_vls(sve_vls, &sve_vl_count, PR_SVE_SET_VL);
+ tests += sve_vl_count * cpus;
+ } else {
+ sve_vl_count = 0;
+ }
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME) {
+ probe_vls(sme_vls, &sme_vl_count, PR_SME_SET_VL);
+ tests += sme_vl_count * cpus * 2;
+ } else {
+ sme_vl_count = 0;
+ }
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2) {
+ tests += cpus;
+ have_sme2 = true;
+ } else {
+ have_sme2 = false;
+ }
+
+ tests += cpus * 2;
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n",
+ cpus, sve_vl_count, sme_vl_count,
+ have_sme2 ? "present" : "absent");
+
+ if (timeout > 0)
+ ksft_print_msg("Will run for %d\n", timeout);
+ else
+ ksft_print_msg("Will run until terminated\n");
+
+ children = calloc(sizeof(*children), tests);
+ if (!children)
+ ksft_exit_fail_msg("Unable to allocate child data\n");
+
+ ret = epoll_create1(EPOLL_CLOEXEC);
+ if (ret < 0)
+ ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n",
+ strerror(errno), ret);
+ epoll_fd = ret;
+
+ /* Create a pipe which children will block on before execing */
+ ret = pipe(startup_pipe);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* Get signal handers ready before we start any children */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_exit_signal;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGINT, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n",
+ strerror(errno), errno);
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n",
+ strerror(errno), errno);
+ sa.sa_sigaction = handle_child_signal;
+ ret = sigaction(SIGCHLD, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ evs = calloc(tests, sizeof(*evs));
+ if (!evs)
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
+ tests);
+
+ for (i = 0; i < cpus; i++) {
+ start_fpsimd(&children[num_children++], i, 0);
+ start_kernel(&children[num_children++], i, 0);
+
+ for (j = 0; j < sve_vl_count; j++)
+ start_sve(&children[num_children++], sve_vls[j], i);
+
+ for (j = 0; j < sme_vl_count; j++) {
+ start_ssve(&children[num_children++], sme_vls[j], i);
+ start_za(&children[num_children++], sme_vls[j], i);
+ }
+
+ if (have_sme2)
+ start_zt(&children[num_children++], i);
+ }
+
+ /*
+ * All children started, close the startup pipe and let them
+ * run.
+ */
+ close(startup_pipe[0]);
+ close(startup_pipe[1]);
+
+ for (;;) {
+ /* Did we get a signal asking us to exit? */
+ if (terminate)
+ break;
+
+ /*
+ * Timeout is counted in poll intervals with no
+ * output, the tests print during startup then are
+ * silent when running so this should ensure they all
+ * ran enough to install the signal handler, this is
+ * especially useful in emulation where we will both
+ * be slow and likely to have a large set of VLs.
+ */
+ ret = epoll_wait(epoll_fd, evs, tests, poll_interval);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ /* Output? */
+ if (ret > 0) {
+ for (i = 0; i < ret; i++) {
+ child_output(evs[i].data.ptr, evs[i].events,
+ false);
+ }
+ continue;
+ }
+
+ /* Otherwise epoll_wait() timed out */
+
+ /*
+ * If the child processes have not produced output they
+ * aren't actually running the tests yet .
+ */
+ if (!all_children_started) {
+ seen_children = 0;
+
+ for (i = 0; i < num_children; i++)
+ if (children[i].output_seen ||
+ children[i].exited)
+ seen_children++;
+
+ if (seen_children != num_children) {
+ ksft_print_msg("Waiting for %d children\n",
+ num_children - seen_children);
+ continue;
+ }
+
+ all_children_started = true;
+ poll_interval = SIGNAL_INTERVAL_MS;
+ }
+
+ if ((timeout % LOG_INTERVALS) == 0)
+ ksft_print_msg("Sending signals, timeout remaining: %d\n",
+ timeout);
+
+ for (i = 0; i < num_children; i++)
+ child_tickle(&children[i]);
+
+ /* Negative timeout means run indefinitely */
+ if (timeout < 0)
+ continue;
+ if (--timeout == 0)
+ break;
+ }
+
+ ksft_print_msg("Finishing up...\n");
+ terminate = true;
+
+ for (i = 0; i < tests; i++)
+ child_stop(&children[i]);
+
+ drain_output(false);
+
+ for (i = 0; i < tests; i++)
+ child_cleanup(&children[i]);
+
+ drain_output(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
index 0dbd594c2747..f89d67894c2e 100644
--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -33,131 +33,6 @@
define_accessor setv, NVR, _vldr
define_accessor getv, NVR, _vstr
-// Print a single character x0 to stdout
-// Clobbers x0-x2,x8
-function putc
- str x0, [sp, #-16]!
-
- mov x0, #1 // STDOUT_FILENO
- mov x1, sp
- mov x2, #1
- mov x8, #__NR_write
- svc #0
-
- add sp, sp, #16
- ret
-endfunction
-
-// Print a NUL-terminated string starting at address x0 to stdout
-// Clobbers x0-x3,x8
-function puts
- mov x1, x0
-
- mov x2, #0
-0: ldrb w3, [x0], #1
- cbz w3, 1f
- add x2, x2, #1
- b 0b
-
-1: mov w0, #1 // STDOUT_FILENO
- mov x8, #__NR_write
- svc #0
-
- ret
-endfunction
-
-// Utility macro to print a literal string
-// Clobbers x0-x4,x8
-.macro puts string
- .pushsection .rodata.str1.1, "aMS", 1
-.L__puts_literal\@: .string "\string"
- .popsection
-
- ldr x0, =.L__puts_literal\@
- bl puts
-.endm
-
-// Print an unsigned decimal number x0 to stdout
-// Clobbers x0-x4,x8
-function putdec
- mov x1, sp
- str x30, [sp, #-32]! // Result can't be > 20 digits
-
- mov x2, #0
- strb w2, [x1, #-1]! // Write the NUL terminator
-
- mov x2, #10
-0: udiv x3, x0, x2 // div-mod loop to generate the digits
- msub x0, x3, x2, x0
- add w0, w0, #'0'
- strb w0, [x1, #-1]!
- mov x0, x3
- cbnz x3, 0b
-
- ldrb w0, [x1]
- cbnz w0, 1f
- mov w0, #'0' // Print "0" for 0, not ""
- strb w0, [x1, #-1]!
-
-1: mov x0, x1
- bl puts
-
- ldr x30, [sp], #32
- ret
-endfunction
-
-// Print an unsigned decimal number x0 to stdout, followed by a newline
-// Clobbers x0-x5,x8
-function putdecn
- mov x5, x30
-
- bl putdec
- mov x0, #'\n'
- bl putc
-
- ret x5
-endfunction
-
-
-// Clobbers x0-x3,x8
-function puthexb
- str x30, [sp, #-0x10]!
-
- mov w3, w0
- lsr w0, w0, #4
- bl puthexnibble
- mov w0, w3
-
- ldr x30, [sp], #0x10
- // fall through to puthexnibble
-endfunction
-// Clobbers x0-x2,x8
-function puthexnibble
- and w0, w0, #0xf
- cmp w0, #10
- blo 1f
- add w0, w0, #'a' - ('9' + 1)
-1: add w0, w0, #'0'
- b putc
-endfunction
-
-// x0=data in, x1=size in, clobbers x0-x5,x8
-function dumphex
- str x30, [sp, #-0x10]!
-
- mov x4, x0
- mov x5, x1
-
-0: subs x5, x5, #1
- b.lo 1f
- ldrb w0, [x4], #1
- bl puthexb
- b 0b
-
-1: ldr x30, [sp], #0x10
- ret
-endfunction
-
// Declare some storate space to shadow the SVE register contents:
.pushsection .text
.data
@@ -168,18 +43,6 @@ scratch:
.space MAXVL_B
.popsection
-// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
-// Clobbers x0-x3
-function memcpy
- cmp x2, #0
- b.eq 1f
-0: ldrb w3, [x1], #1
- strb w3, [x0], #1
- subs x2, x2, #1
- b.ne 0b
-1: ret
-endfunction
-
// Generate a test pattern for storage in SVE registers
// x0: pid (16 bits)
// x1: register number (6 bits)
@@ -227,33 +90,6 @@ function setup_vreg
ret x4
endfunction
-// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
-// Clobbers x1, x2.
-function memfill_ae
- mov w2, #0xae
- b memfill
-endfunction
-
-// Fill x1 bytes starting at x0 with 0.
-// Clobbers x1, x2.
-function memclr
- mov w2, #0
-endfunction
- // fall through to memfill
-
-// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
-// Clobbers x1
-function memfill
- cmp x1, #0
- b.eq 1f
-
-0: strb w2, [x0], #1
- subs x1, x1, #1
- b.ne 0b
-
-1: ret
-endfunction
-
// Trivial memory compare: compare x2 bytes starting at address x0 with
// bytes starting at address x1.
// Returns only if all bytes match; otherwise, the program is aborted.
@@ -298,8 +134,7 @@ function check_vreg
b memcmp
endfunction
-// Any SVE register modified here can cause corruption in the main
-// thread -- but *only* the registers modified here.
+// Modify live register state, the signal return will undo our changes
function irritator_handler
// Increment the irritation signal count (x23):
ldr x0, [x2, #ucontext_regs + 8 * 23]
@@ -307,7 +142,6 @@ function irritator_handler
str x0, [x2, #ucontext_regs + 8 * 23]
// Corrupt some random V-regs
- adr x0, .text + (irritator_handler - .text) / 16 * 16
movi v0.8b, #7
movi v9.16b, #9
movi v31.8b, #31
@@ -315,6 +149,15 @@ function irritator_handler
ret
endfunction
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
function terminate_handler
mov w21, w0
mov x20, x2
@@ -370,7 +213,32 @@ endfunction
// Main program entry point
.globl _start
function _start
-_start:
+ enable_gcs
+
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
// Sanity-check and report the vector length
mov x19, #128
@@ -401,24 +269,6 @@ _start:
mov x0, x20
bl putdecn
- mov x23, #0 // Irritation signal count
-
- mov w0, #SIGINT
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
-
- mov w0, #SIGTERM
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
-
- mov w0, #SIGUSR1
- adr x1, irritator_handler
- mov w2, #SA_SIGINFO
- orr w2, w2, #SA_NODEFER
- bl setsignal
-
mov x22, #0 // generation number, increments per iteration
.Ltest_loop:
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
new file mode 100644
index 000000000000..0c40007d1282
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include <linux/kernel.h>
+#include <linux/if_alg.h>
+
+#define DATA_SIZE (16 * 4096)
+
+static int base, sock;
+
+static int digest_len;
+static char *ref;
+static char *digest;
+static char *alg_name;
+
+static struct iovec data_iov;
+static int zerocopy[2];
+static int sigs;
+static int iter;
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+ printf("Terminated by signal %d, iterations=%d, signals=%d\n",
+ sig, iter, sigs);
+ exit(0);
+}
+
+static void handle_kick_signal(int sig, siginfo_t *info, void *context)
+{
+ sigs++;
+}
+
+static char *drivers[] = {
+ "sha1-ce",
+ "sha224-arm64",
+ "sha224-arm64-neon",
+ "sha224-ce",
+ "sha256-arm64",
+ "sha256-arm64-neon",
+ "sha256-ce",
+ "sha384-ce",
+ "sha512-ce",
+ "sha3-224-ce",
+ "sha3-256-ce",
+ "sha3-384-ce",
+ "sha3-512-ce",
+ "sm3-ce",
+ "sm3-neon",
+};
+
+static bool create_socket(void)
+{
+ FILE *proc;
+ struct sockaddr_alg addr;
+ char buf[1024];
+ char *c, *driver_name;
+ bool is_shash, match;
+ int ret, i;
+
+ ret = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (ret < 0) {
+ if (errno == EAFNOSUPPORT) {
+ printf("AF_ALG not supported\n");
+ return false;
+ }
+
+ printf("Failed to create AF_ALG socket: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+ base = ret;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.salg_family = AF_ALG;
+ strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type));
+
+ proc = fopen("/proc/crypto", "r");
+ if (!proc) {
+ printf("Unable to open /proc/crypto\n");
+ return false;
+ }
+
+ driver_name = NULL;
+ is_shash = false;
+ match = false;
+
+ /* Look through /proc/crypto for a driver with kernel mode FP usage */
+ while (!match) {
+ c = fgets(buf, sizeof(buf), proc);
+ if (!c) {
+ if (feof(proc)) {
+ printf("Nothing found in /proc/crypto\n");
+ return false;
+ }
+ continue;
+ }
+
+ /* Algorithm descriptions are separated by a blank line */
+ if (*c == '\n') {
+ if (is_shash && driver_name) {
+ for (i = 0; i < ARRAY_SIZE(drivers); i++) {
+ if (strcmp(drivers[i],
+ driver_name) == 0) {
+ match = true;
+ }
+ }
+ }
+
+ if (!match) {
+ digest_len = 0;
+
+ free(driver_name);
+ driver_name = NULL;
+
+ free(alg_name);
+ alg_name = NULL;
+
+ is_shash = false;
+ }
+ continue;
+ }
+
+ /* Remove trailing newline */
+ c = strchr(buf, '\n');
+ if (c)
+ *c = '\0';
+
+ /* Find the field/value separator and start of the value */
+ c = strchr(buf, ':');
+ if (!c)
+ continue;
+ c += 2;
+
+ if (strncmp(buf, "digestsize", strlen("digestsize")) == 0)
+ sscanf(c, "%d", &digest_len);
+
+ if (strncmp(buf, "name", strlen("name")) == 0)
+ alg_name = strdup(c);
+
+ if (strncmp(buf, "driver", strlen("driver")) == 0)
+ driver_name = strdup(c);
+
+ if (strncmp(buf, "type", strlen("type")) == 0)
+ if (strncmp(c, "shash", strlen("shash")) == 0)
+ is_shash = true;
+ }
+
+ strncpy((char *)addr.salg_name, alg_name,
+ sizeof(addr.salg_name) - 1);
+
+ ret = bind(base, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ printf("Failed to bind %s: %s (%d)\n",
+ addr.salg_name, strerror(errno), errno);
+ return false;
+ }
+
+ ret = accept(base, NULL, 0);
+ if (ret < 0) {
+ printf("Failed to accept %s: %s (%d)\n",
+ addr.salg_name, strerror(errno), errno);
+ return false;
+ }
+
+ sock = ret;
+
+ ret = pipe(zerocopy);
+ if (ret != 0) {
+ printf("Failed to create zerocopy pipe: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ ref = malloc(digest_len);
+ if (!ref) {
+ printf("Failed to allocate %d byte reference\n", digest_len);
+ return false;
+ }
+
+ digest = malloc(digest_len);
+ if (!digest) {
+ printf("Failed to allocate %d byte digest\n", digest_len);
+ return false;
+ }
+
+ return true;
+}
+
+static bool compute_digest(void *buf)
+{
+ struct iovec iov;
+ int ret, wrote;
+
+ iov = data_iov;
+ while (iov.iov_len) {
+ ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT);
+ if (ret < 0) {
+ printf("Failed to send buffer: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ wrote = ret;
+ ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0);
+ if (ret < 0) {
+ printf("Failed to splice buffer: %s (%d)\n",
+ strerror(errno), errno);
+ } else if (ret != wrote) {
+ printf("Short splice: %d < %d\n", ret, wrote);
+ }
+
+ iov.iov_len -= wrote;
+ iov.iov_base += wrote;
+ }
+
+reread:
+ ret = recv(sock, buf, digest_len, 0);
+ if (ret == 0) {
+ printf("No digest returned\n");
+ return false;
+ }
+ if (ret != digest_len) {
+ if (errno == -EAGAIN)
+ goto reread;
+ printf("Failed to get digest: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return true;
+}
+
+int main(void)
+{
+ char *data;
+ struct sigaction sa;
+ int ret;
+
+ /* Ensure we have unbuffered output */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
+ /* The parent will communicate with us via signals */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_exit_signal;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret < 0)
+ printf("Failed to install SIGTERM handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ sa.sa_sigaction = handle_kick_signal;
+ ret = sigaction(SIGUSR1, &sa, NULL);
+ if (ret < 0)
+ printf("Failed to install SIGUSR1 handler: %s (%d)\n",
+ strerror(errno), errno);
+ ret = sigaction(SIGUSR2, &sa, NULL);
+ if (ret < 0)
+ printf("Failed to install SIGUSR2 handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ data = malloc(DATA_SIZE);
+ if (!data) {
+ printf("Failed to allocate data buffer\n");
+ return EXIT_FAILURE;
+ }
+ memset(data, 0, DATA_SIZE);
+
+ data_iov.iov_base = data;
+ data_iov.iov_len = DATA_SIZE;
+
+ /*
+ * If we can't create a socket assume it's a lack of system
+ * support and fall back to a basic FPSIMD test for the
+ * benefit of fp-stress.
+ */
+ if (!create_socket()) {
+ execl("./fpsimd-test", "./fpsimd-test", NULL);
+ printf("Failed to fall back to fspimd-test: %d (%s)\n",
+ errno, strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * Compute a reference digest we hope is repeatable, we do
+ * this at runtime partly to make it easier to play with
+ * parameters.
+ */
+ if (!compute_digest(ref)) {
+ printf("Failed to compute reference digest\n");
+ return EXIT_FAILURE;
+ }
+
+ printf("AF_ALG using %s\n", alg_name);
+
+ while (true) {
+ if (!compute_digest(digest)) {
+ printf("Failed to compute digest, iter=%d\n", iter);
+ return EXIT_FAILURE;
+ }
+
+ if (memcmp(ref, digest, digest_len) != 0) {
+ printf("Digest mismatch, iter=%d\n", iter);
+ return EXIT_FAILURE;
+ }
+
+ iter++;
+ }
+
+ return EXIT_FAILURE;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c
new file mode 100644
index 000000000000..49b0b2e08bac
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sme();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c
new file mode 100644
index 000000000000..7f8a13a18f5d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sve();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
new file mode 100644
index 000000000000..20dc29996dc6
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+.globl rdvl_sve
+rdvl_sve:
+ hint 34 // BTI C
+ rdvl x0, #1
+ ret
+
+.globl rdvl_sme
+rdvl_sme:
+ hint 34 // BTI C
+
+ rdsvl 0, 1
+
+ ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
new file mode 100644
index 000000000000..5d323679fbc9
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef RDVL_H
+#define RDVL_H
+
+int rdvl_sme(void);
+int rdvl_sve(void);
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h
new file mode 100644
index 000000000000..85b9184e0835
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sme-inst.h
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+
+#ifndef SME_INST_H
+#define SME_INST_H
+
+#define REG_FPMR S3_3_C4_C4_2
+
+/*
+ * RDSVL X\nx, #\imm
+ */
+.macro rdsvl nx, imm
+ .inst 0x4bf5800 \
+ | (\imm << 5) \
+ | (\nx)
+.endm
+
+.macro smstop
+ msr S0_3_C4_C6_3, xzr
+.endm
+
+.macro smstart_za
+ msr S0_3_C4_C5_3, xzr
+.endm
+
+.macro smstart_sm
+ msr S0_3_C4_C3_3, xzr
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ .inst 0xe11f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ .inst 0xe13f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress
new file mode 100644
index 000000000000..e2bd2cc184ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/ssve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./ssve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index b29cbc642c57..df0c1b6eb114 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -12,7 +12,8 @@
#include <sys/prctl.h>
#include <asm/sigcontext.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
+#include "rdvl.h"
int main(int argc, char **argv)
{
@@ -25,7 +26,7 @@ int main(int argc, char **argv)
ksft_set_plan(2);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available");
+ ksft_exit_skip("SVE not available\n");
/*
* Enumerate up to SVE_VQ_MAX vector lengths
@@ -38,6 +39,10 @@ int main(int argc, char **argv)
vl &= PR_SVE_VL_LEN_MASK;
+ if (rdvl_sve() != vl)
+ ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n",
+ vl, rdvl_sve());
+
if (!sve_vl_valid(vl))
ksft_exit_fail_msg("VL %d invalid\n", vl);
vq = sve_vq_from_vl(vl);
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
deleted file mode 100644
index 3e81f9fab574..000000000000
--- a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright (C) 2015-2019 ARM Limited.
-// Original author: Dave Martin <Dave.Martin@arm.com>
-#include <asm/unistd.h>
-
-.arch_extension sve
-
-.globl sve_store_patterns
-
-sve_store_patterns:
- mov x1, x0
-
- index z0.b, #0, #1
- str q0, [x1]
-
- mov w8, #__NR_getpid
- svc #0
- str q0, [x1, #0x10]
-
- mov z1.d, z0.d
- str q0, [x1, #0x20]
-
- mov w8, #__NR_getpid
- svc #0
- str q0, [x1, #0x30]
-
- mov z1.d, z0.d
- str q0, [x1, #0x40]
-
- ret
-
-.size sve_store_patterns, . - sve_store_patterns
-.type sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index b2282be6f938..28f6b996c5e2 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -1,15 +1,17 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2015-2020 ARM Limited.
+ * Copyright (C) 2015-2021 ARM Limited.
* Original author: Dave Martin <Dave.Martin@arm.com>
*/
#include <errno.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/auxv.h>
+#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/uio.h>
@@ -17,61 +19,113 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
#define NT_ARM_SVE 0x405
#endif
-/* Number of registers filled in by sve_store_patterns */
-#define NR_VREGS 5
-
-void sve_store_patterns(__uint128_t v[NR_VREGS]);
-
-static void dump(const void *buf, size_t size)
-{
- size_t i;
- const unsigned char *p = buf;
-
- for (i = 0; i < size; ++i)
- printf(" %.2x", *p++);
-}
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
-static int check_vregs(const __uint128_t vregs[NR_VREGS])
+/*
+ * The architecture defines the maximum VQ as 16 but for extensibility
+ * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running
+ * a *lot* more tests than are useful if we use it. Until the
+ * architecture is extended let's limit our coverage to what is
+ * currently allowed, plus one extra to ensure we cover constraining
+ * the VL as expected.
+ */
+#define TEST_VQ_MAX 17
+
+struct vec_type {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ int regset;
+ int prctl_set;
+};
+
+static const struct vec_type vec_types[] = {
+ {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .regset = NT_ARM_SVE,
+ .prctl_set = PR_SVE_SET_VL,
+ },
+ {
+ .name = "Streaming SVE",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .regset = NT_ARM_SSVE,
+ .prctl_set = PR_SME_SET_VL,
+ },
+};
+
+#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
+#define FLAG_TESTS 4
+#define FPSIMD_TESTS 2
+
+#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
+
+static void fill_buf(char *buf, size_t size)
{
int i;
- int ok = 1;
-
- for (i = 0; i < NR_VREGS; ++i) {
- printf("# v[%d]:", i);
- dump(&vregs[i], sizeof vregs[i]);
- putchar('\n');
-
- if (vregs[i] != vregs[0])
- ok = 0;
- }
- return ok;
+ for (i = 0; i < size; i++)
+ buf[i] = random();
}
static int do_child(void)
{
if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
- ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+ ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n",
+ strerror(errno), errno);
if (raise(SIGSTOP))
- ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+ ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n",
+ strerror(errno), errno);
return EXIT_SUCCESS;
}
-static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
+ return ret;
+}
+
+static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
+ void **buf, size_t *size)
{
struct user_sve_header *sve;
void *p;
- size_t sz = sizeof *sve;
+ size_t sz = sizeof(*sve);
struct iovec iov;
+ int ret;
while (1) {
if (*size < sz) {
@@ -87,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
iov.iov_base = *buf;
iov.iov_len = sz;
- if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+ ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov);
+ if (ret) {
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
goto error;
+ }
sve = *buf;
if (sve->size <= sz)
@@ -103,49 +160,592 @@ error:
return NULL;
}
-static int set_sve(pid_t pid, const struct user_sve_header *sve)
+static int set_sve(pid_t pid, const struct vec_type *type,
+ const struct user_sve_header *sve)
{
struct iovec iov;
+ int ret;
iov.iov_base = (void *)sve;
iov.iov_len = sve->size;
- return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
+}
+
+/* A read operation fails */
+static void read_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ void *ret;
+
+ ret = get_sve(child, type, (void **)&new_sve, &new_sve_size);
+
+ ksft_test_result(ret == NULL, "%s unsupported read fails\n",
+ type->name);
+
+ free(new_sve);
+}
+
+/* A write operation fails */
+static void write_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ /* Just the header, no data */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.vl = SVE_VL_MIN;
+ ret = set_sve(child, type, &sve);
+
+ ksft_test_result(ret != 0, "%s unsupported write fails\n",
+ type->name);
+}
+
+/* Validate setting and getting the inherit flag */
+static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ int ret;
+
+ /* First set the flag */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
+ sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
+ type->name);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have
+ * set the flags we expected.
+ */
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
+ return;
+ }
+
+ ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT,
+ "%s SVE_PT_VL_INHERIT set\n", type->name);
+
+ /* Now clear */
+ sve.flags &= ~SVE_PT_VL_INHERIT;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n",
+ type->name);
+ return;
+ }
+
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
+ return;
+ }
+
+ ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT),
+ "%s SVE_PT_VL_INHERIT cleared\n", type->name);
+
+ free(new_sve);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
+ unsigned int vl, bool *supported)
+{
+ struct user_sve_header sve;
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ int ret, prctl_vl;
+
+ *supported = false;
+
+ /* Check if the VL is supported in this process */
+ prctl_vl = prctl(type->prctl_set, vl);
+ if (prctl_vl == -1)
+ ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n",
+ type->name, strerror(errno), errno);
+
+ /* If the VL is not supported then a supported VL will be returned */
+ *supported = (prctl_vl == vl);
+
+ /* Set the VL by doing a set with no register payload */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.vl = vl;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u\n",
+ type->name, vl);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have the
+ * same VL that we got from prctl() on ourselves.
+ */
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u\n",
+ type->name, vl);
+ return;
+ }
+
+ ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n",
+ type->name, vl);
+
+ free(new_sve);
+}
+
+static void check_u32(unsigned int vl, const char *reg,
+ uint32_t *in, uint32_t *out, int *errors)
+{
+ if (*in != *out) {
+ printf("# VL %d %s wrote %x read %x\n",
+ vl, reg, *in, *out);
+ (*errors)++;
+ }
+}
+
+/* Set out of range VLs */
+static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ memset(&sve, 0, sizeof(sve));
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.size = sizeof(sve);
+
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name);
+
+ sve.vl = SVE_VL_MAX + SVE_VQ_BYTES;
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+}
+
+/* Access the FPSIMD registers via the SVE regset */
+static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 16; /* We don't care what the VL is */
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ /* This should only succeed for SVE */
+ ret = set_sve(child, type, sve);
+ ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0),
+ "%s FPSIMD set via SVE: %d\n",
+ type->name, ret);
+ if (ret)
+ goto out;
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_fail("get_fpsimd(): %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0)
+ ksft_test_result_pass("%s get_fpsimd() gave same state\n",
+ type->name);
+ else
+ ksft_test_result_fail("%s get_fpsimd() gave different state\n",
+ type->name);
+
+out:
+ free(svebuf);
+}
+
+/* Write the FPSIMD registers via the SVE regset when SVE is not supported */
+static void ptrace_sve_fpsimd_no_sve(pid_t child)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ /* On a system without SVE the VL should be set to 0 */
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 0;
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_sve(child, &vec_types[0], sve);
+ ksft_test_result(ret == 0, "FPSIMD write via SVE\n");
+ if (ret) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+ ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0,
+ "Verify FPSIMD write via SVE\n");
+
+out:
+ free(svebuf);
+}
+
+/* Validate attempting to set SVE data and read SVE data */
+static void ptrace_set_sve_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *write_buf;
+ void *read_buf = NULL;
+ struct user_sve_header *write_sve;
+ struct user_sve_header *read_sve;
+ size_t read_sve_size = 0;
+ unsigned int vq = sve_vq_from_vl(vl);
+ int ret, i;
+ size_t data_size;
+ int errors = 0;
+
+ data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
+ return;
+ }
+ write_sve = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_sve, 0, data_size);
+ write_sve->size = data_size;
+ write_sve->vl = vl;
+ write_sve->flags = SVE_PT_REGS_SVE;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq));
+
+ for (i = 0; i < __SVE_NUM_PREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ SVE_PT_SVE_PREG_SIZE(vq));
+
+ fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+ fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+ /* TODO: Generate a valid FFR pattern */
+
+ ret = set_sve(child, type, write_sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
+ read_sve = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_sve->size < write_sve->size) {
+ ksft_test_result_fail("%s wrote %d bytes, only read %d\n",
+ type->name, write_sve->size,
+ read_sve->size);
+ goto out_read;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq)) != 0) {
+ printf("# Mismatch in %u Z%d\n", vl, i);
+ errors++;
+ }
+ }
+
+ for (i = 0; i < __SVE_NUM_PREGS; i++) {
+ if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ SVE_PT_SVE_PREG_SIZE(vq)) != 0) {
+ printf("# Mismatch in %u P%d\n", vl, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+ read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+ check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+ read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+
+ ksft_test_result(errors == 0, "Set and get %s data for VL %u\n",
+ type->name, vl);
+
+out_read:
+ free(read_buf);
+out:
+ free(write_buf);
}
-static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
- unsigned int vlmax)
+/* Validate attempting to set SVE data and read it via the FPSIMD regset */
+static void ptrace_set_sve_get_fpsimd_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
{
- unsigned int vq;
- unsigned int i;
+ void *write_buf;
+ struct user_sve_header *write_sve;
+ unsigned int vq = sve_vq_from_vl(vl);
+ struct user_fpsimd_state fpsimd_state;
+ int ret, i;
+ size_t data_size;
+ int errors = 0;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN) {
+ ksft_test_result_skip("Big endian not supported\n");
+ return;
+ }
- if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
- ksft_exit_fail_msg("Dumping non-SVE register\n");
+ data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
+ return;
+ }
+ write_sve = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_sve, 0, data_size);
+ write_sve->size = data_size;
+ write_sve->vl = vl;
+ write_sve->flags = SVE_PT_REGS_SVE;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq));
+
+ fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+ fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+ ret = set_sve(child, type, write_sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
- if (vlmax > sve->vl)
- vlmax = sve->vl;
+ /* Read the data back */
+ if (get_fpsimd(child, &fpsimd_state)) {
+ ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n",
+ type->name, vl);
+ goto out;
+ }
- vq = sve_vq_from_vl(sve->vl);
- for (i = 0; i < num; ++i) {
- printf("# z%u:", i);
- dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
- vlmax);
- printf("%s\n", vlmax == sve->vl ? "" : " ...");
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ __uint128_t tmp = 0;
+
+ /*
+ * Z regs are stored endianness invariant, this won't
+ * work for big endian
+ */
+ memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ sizeof(tmp));
+
+ if (tmp != fpsimd_state.vregs[i]) {
+ printf("# Mismatch in FPSIMD for %s VL %u Z%d\n",
+ type->name, vl, i);
+ errors++;
+ }
}
+
+ check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+ &fpsimd_state.fpsr, &errors);
+ check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+ &fpsimd_state.fpcr, &errors);
+
+ ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n",
+ type->name, vl);
+
+out:
+ free(write_buf);
+}
+
+/* Validate attempting to set FPSIMD data and read it via the SVE regset */
+static void ptrace_set_fpsimd_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *read_buf = NULL;
+ unsigned char *p;
+ struct user_sve_header *read_sve;
+ unsigned int vq = sve_vq_from_vl(vl);
+ struct user_fpsimd_state write_fpsimd;
+ int ret, i, j;
+ size_t read_sve_size = 0;
+ size_t expected_size;
+ int errors = 0;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN) {
+ ksft_test_result_skip("Big endian not supported\n");
+ return;
+ }
+
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&write_fpsimd.vregs[i];
+
+ for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_fpsimd(child, &write_fpsimd);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set FPSIMD state: %d\n)",
+ ret);
+ return;
+ }
+
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
+ return;
+ }
+ read_sve = read_buf;
+
+ if (read_sve->vl != vl) {
+ ksft_test_result_fail("Child VL != expected VL: %u != %u\n",
+ read_sve->vl, vl);
+ goto out;
+ }
+
+ /* The kernel may return either SVE or FPSIMD format */
+ switch (read_sve->flags & SVE_PT_REGS_MASK) {
+ case SVE_PT_REGS_FPSIMD:
+ expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %ld bytes, expected %ld\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET,
+ sizeof(write_fpsimd));
+ if (ret != 0) {
+ ksft_print_msg("Read FPSIMD data mismatch\n");
+ errors++;
+ }
+ break;
+
+ case SVE_PT_REGS_SVE:
+ expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %ld bytes, expected %ld\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ __uint128_t tmp = 0;
+
+ /*
+ * Z regs are stored endianness invariant, this won't
+ * work for big endian
+ */
+ memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ sizeof(tmp));
+
+ if (tmp != write_fpsimd.vregs[i]) {
+ ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n",
+ type->name, vl, i, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", &write_fpsimd.fpsr,
+ read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+ check_u32(vl, "FPCR", &write_fpsimd.fpcr,
+ read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+ break;
+ default:
+ ksft_print_msg("Unexpected regs type %d\n",
+ read_sve->flags & SVE_PT_REGS_MASK);
+ errors++;
+ break;
+ }
+
+ ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n",
+ type->name, vl);
+
+out:
+ free(read_buf);
}
static int do_parent(pid_t child)
{
int ret = EXIT_FAILURE;
pid_t pid;
- int status;
+ int status, i;
siginfo_t si;
- void *svebuf = NULL, *newsvebuf;
- size_t svebufsz = 0, newsvebufsz;
- struct user_sve_header *sve, *new_sve;
- struct user_fpsimd_state *fpsimd;
- unsigned int i, j;
- unsigned char *p;
- unsigned int vq;
+ unsigned int vq, vl;
+ bool vl_supported;
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
/* Attach to the child */
while (1) {
@@ -167,8 +767,6 @@ static int do_parent(pid_t child)
if (WIFEXITED(status) || WIFSIGNALED(status))
ksft_exit_fail_msg("Child died unexpectedly\n");
- ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
- status);
if (!WIFSTOPPED(status))
goto error;
@@ -203,98 +801,90 @@ static int do_parent(pid_t child)
}
}
- sve = get_sve(pid, &svebuf, &svebufsz);
- if (!sve) {
- int e = errno;
-
- ksft_test_result_fail("get_sve: %s\n", strerror(errno));
- if (e == ESRCH)
- goto disappeared;
-
- goto error;
- } else {
- ksft_test_result_pass("get_sve\n");
- }
-
- ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
- "FPSIMD registers\n");
- if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
- goto error;
-
- fpsimd = (struct user_fpsimd_state *)((char *)sve +
- SVE_PT_FPSIMD_OFFSET);
- for (i = 0; i < 32; ++i) {
- p = (unsigned char *)&fpsimd->vregs[i];
-
- for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
- p[j] = j;
- }
-
- if (set_sve(pid, sve)) {
- int e = errno;
-
- ksft_test_result_fail("set_sve(FPSIMD): %s\n",
- strerror(errno));
- if (e == ESRCH)
- goto disappeared;
-
- goto error;
- }
-
- vq = sve_vq_from_vl(sve->vl);
-
- newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
- new_sve = newsvebuf = malloc(newsvebufsz);
- if (!new_sve) {
- errno = ENOMEM;
- perror(NULL);
- goto error;
- }
-
- *new_sve = *sve;
- new_sve->flags &= ~SVE_PT_REGS_MASK;
- new_sve->flags |= SVE_PT_REGS_SVE;
- memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
- 0, SVE_PT_SVE_ZREG_SIZE(vq));
- new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
- if (set_sve(pid, new_sve)) {
- int e = errno;
+ for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /*
+ * If the vector type isn't supported reads and writes
+ * should fail.
+ */
+ if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) {
+ read_fails(child, &vec_types[i]);
+ write_fails(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s unsupported read fails\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s unsupported write fails\n",
+ vec_types[i].name);
+ }
- ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
- if (e == ESRCH)
- goto disappeared;
+ /* FPSIMD via SVE regset */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_sve_fpsimd(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s FPSIMD set via SVE\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s FPSIMD read\n",
+ vec_types[i].name);
+ }
- goto error;
- }
+ /* prctl() flags */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_get_inherit(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n",
+ vec_types[i].name);
+ }
- new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
- if (!new_sve) {
- int e = errno;
+ /* Setting out of bounds VLs should fail */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_vl_ranges(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s Set invalid VL 0\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s Set invalid VL %d\n",
+ vec_types[i].name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+ }
- ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
- if (e == ESRCH)
- goto disappeared;
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ if (getauxval(vec_types[i].hwcap_type) &
+ vec_types[i].hwcap) {
+ ptrace_set_get_vl(child, &vec_types[i], vl,
+ &vl_supported);
+ } else {
+ ksft_test_result_skip("%s get/set VL %d\n",
+ vec_types[i].name, vl);
+ vl_supported = false;
+ }
- goto error;
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
+ ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+ ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl);
+ } else {
+ ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
+ vec_types[i].name, vl);
+ ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
+ vec_types[i].name, vl);
+ ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n",
+ vec_types[i].name, vl);
+ }
+ }
}
- ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
- "SVE registers\n");
- if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
- goto error;
-
- dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
-
- p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
- for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
- unsigned char expected = i;
-
- if (__BYTE_ORDER == __BIG_ENDIAN)
- expected = sizeof fpsimd->vregs[0] - 1 - expected;
-
- ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
- if (p[i] != expected)
- goto error;
+ /* We support SVE writes of FPSMID format on SME only systems */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE) &&
+ (getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ptrace_sve_fpsimd_no_sve(child);
+ } else {
+ ksft_test_result_skip("FPSIMD write via SVE\n");
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
}
ret = EXIT_SUCCESS;
@@ -309,19 +899,12 @@ disappeared:
int main(void)
{
int ret = EXIT_SUCCESS;
- __uint128_t v[NR_VREGS];
pid_t child;
- ksft_print_header();
- ksft_set_plan(20);
-
- if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available\n");
+ srandom(getpid());
- sve_store_patterns(v);
-
- if (!check_vregs(v))
- ksft_exit_fail_msg("Initial check_vregs() failed\n");
+ ksft_print_header();
+ ksft_set_plan(EXPECTED_TESTS);
child = fork();
if (!child)
@@ -332,5 +915,5 @@ int main(void)
ksft_print_cnts();
- return 0;
+ return ret;
}
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 9210691aa998..80e072f221cd 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -13,6 +13,7 @@
#include <asm/unistd.h>
#include "assembler.h"
#include "asm-offsets.h"
+#include "sme-inst.h"
#define NZR 32
#define NPR 16
@@ -46,130 +47,6 @@ define_accessor getz, NZR, _sve_str_v
define_accessor setp, NPR, _sve_ldr_p
define_accessor getp, NPR, _sve_str_p
-// Print a single character x0 to stdout
-// Clobbers x0-x2,x8
-function putc
- str x0, [sp, #-16]!
-
- mov x0, #1 // STDOUT_FILENO
- mov x1, sp
- mov x2, #1
- mov x8, #__NR_write
- svc #0
-
- add sp, sp, #16
- ret
-endfunction
-
-// Print a NUL-terminated string starting at address x0 to stdout
-// Clobbers x0-x3,x8
-function puts
- mov x1, x0
-
- mov x2, #0
-0: ldrb w3, [x0], #1
- cbz w3, 1f
- add x2, x2, #1
- b 0b
-
-1: mov w0, #1 // STDOUT_FILENO
- mov x8, #__NR_write
- svc #0
-
- ret
-endfunction
-
-// Utility macro to print a literal string
-// Clobbers x0-x4,x8
-.macro puts string
- .pushsection .rodata.str1.1, "aMS", 1
-.L__puts_literal\@: .string "\string"
- .popsection
-
- ldr x0, =.L__puts_literal\@
- bl puts
-.endm
-
-// Print an unsigned decimal number x0 to stdout
-// Clobbers x0-x4,x8
-function putdec
- mov x1, sp
- str x30, [sp, #-32]! // Result can't be > 20 digits
-
- mov x2, #0
- strb w2, [x1, #-1]! // Write the NUL terminator
-
- mov x2, #10
-0: udiv x3, x0, x2 // div-mod loop to generate the digits
- msub x0, x3, x2, x0
- add w0, w0, #'0'
- strb w0, [x1, #-1]!
- mov x0, x3
- cbnz x3, 0b
-
- ldrb w0, [x1]
- cbnz w0, 1f
- mov w0, #'0' // Print "0" for 0, not ""
- strb w0, [x1, #-1]!
-
-1: mov x0, x1
- bl puts
-
- ldr x30, [sp], #32
- ret
-endfunction
-
-// Print an unsigned decimal number x0 to stdout, followed by a newline
-// Clobbers x0-x5,x8
-function putdecn
- mov x5, x30
-
- bl putdec
- mov x0, #'\n'
- bl putc
-
- ret x5
-endfunction
-
-// Clobbers x0-x3,x8
-function puthexb
- str x30, [sp, #-0x10]!
-
- mov w3, w0
- lsr w0, w0, #4
- bl puthexnibble
- mov w0, w3
-
- ldr x30, [sp], #0x10
- // fall through to puthexnibble
-endfunction
-// Clobbers x0-x2,x8
-function puthexnibble
- and w0, w0, #0xf
- cmp w0, #10
- blo 1f
- add w0, w0, #'a' - ('9' + 1)
-1: add w0, w0, #'0'
- b putc
-endfunction
-
-// x0=data in, x1=size in, clobbers x0-x5,x8
-function dumphex
- str x30, [sp, #-0x10]!
-
- mov x4, x0
- mov x5, x1
-
-0: subs x5, x5, #1
- b.lo 1f
- ldrb w0, [x4], #1
- bl puthexb
- b 0b
-
-1: ldr x30, [sp], #0x10
- ret
-endfunction
-
// Declare some storate space to shadow the SVE register contents:
.pushsection .text
.data
@@ -184,18 +61,6 @@ scratch:
.space MAXVL_B
.popsection
-// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
-// Clobbers x0-x3
-function memcpy
- cmp x2, #0
- b.eq 1f
-0: ldrb w3, [x1], #1
- strb w3, [x0], #1
- subs x2, x2, #1
- b.ne 0b
-1: ret
-endfunction
-
// Generate a test pattern for storage in SVE registers
// x0: pid (16 bits)
// x1: register number (6 bits)
@@ -284,16 +149,29 @@ endfunction
// Set up test pattern in the FFR
// x0: pid
// x2: generation
+//
+// We need to generate a canonical FFR value, which consists of a number of
+// low "1" bits, followed by a number of zeros. This gives us 17 unique values
+// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
+// generation, and use that as the initial number of ones in the pattern.
+// We fill the upper lanes of FFR with zeros.
// Beware: corrupts P0.
function setup_ffr
+#ifndef SSVE
mov x4, x30
- bl pattern
+ and w0, w0, #0x3
+ bfi w0, w2, #2, #2
+ mov w1, #1
+ lsl w1, w1, w0
+ sub w1, w1, #1
+
ldr x0, =ffrref
- ldr x1, =scratch
- rdvl x2, #1
- lsr x2, x2, #3
- bl memcpy
+ strh w1, [x0], 2
+ rdvl x1, #1
+ lsr x1, x1, #3
+ sub x1, x1, #2
+ bl memclr
mov x0, #0
ldr x1, =ffrref
@@ -302,33 +180,9 @@ function setup_ffr
wrffr p0.b
ret x4
-endfunction
-
-// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
-// Clobbers x1, x2.
-function memfill_ae
- mov w2, #0xae
- b memfill
-endfunction
-
-// Fill x1 bytes starting at x0 with 0.
-// Clobbers x1, x2.
-function memclr
- mov w2, #0
-endfunction
- // fall through to memfill
-
-// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
-// Clobbers x1
-function memfill
- cmp x1, #0
- b.eq 1f
-
-0: strb w2, [x0], #1
- subs x1, x1, #1
- b.ne 0b
-
-1: ret
+#else
+ ret
+#endif
endfunction
// Trivial memory compare: compare x2 bytes starting at address x0 with
@@ -411,6 +265,7 @@ endfunction
// Beware -- corrupts P0.
// Clobbers x0-x5.
function check_ffr
+#ifndef SSVE
mov x3, x30
ldr x4, =scratch
@@ -431,10 +286,12 @@ function check_ffr
mov x2, x5
mov x30, x3
b memcmp
+#else
+ ret
+#endif
endfunction
-// Any SVE register modified here can cause corruption in the main
-// thread -- but *only* the registers modified here.
+// Modify live register state, the signal return will undo our changes
function irritator_handler
// Increment the irritation signal count (x23):
ldr x0, [x2, #ucontext_regs + 8 * 23]
@@ -442,14 +299,24 @@ function irritator_handler
str x0, [x2, #ucontext_regs + 8 * 23]
// Corrupt some random Z-regs
- adr x0, .text + (irritator_handler - .text) / 16 * 16
movi v0.8b, #1
movi v9.16b, #2
movi v31.8b, #3
// And P0
- rdffr p0.b
+ ptrue p0.d
+#ifndef SSVE
// And FFR
wrffr p15.b
+#endif
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
ret
endfunction
@@ -509,7 +376,37 @@ endfunction
// Main program entry point
.globl _start
function _start
-_start:
+ enable_gcs
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+#ifdef SSVE
+ puts "Streaming mode "
+ smstart_sm
+#endif
+
// Sanity-check and report the vector length
rdvl x19, #8
@@ -540,23 +437,9 @@ _start:
mov x0, x20
bl putdecn
- mov x23, #0 // Irritation signal count
-
- mov w0, #SIGINT
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
-
- mov w0, #SIGTERM
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
-
- mov w0, #SIGUSR1
- adr x1, irritator_handler
- mov w2, #SA_SIGINFO
- orr w2, w2, #SA_NODEFER
- bl setsignal
+#ifdef SSVE
+ smstart_sm // syscalls will have exited streaming mode
+#endif
mov x22, #0 // generation number, increments per iteration
.Ltest_loop:
@@ -590,6 +473,13 @@ _start:
// mov x8, #__NR_sched_yield // Encourage preemption
// svc #0
+#ifdef SSVE
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
+ and x1, x0, #3
+ cmp x1, #1
+ b.ne svcr_barf
+#endif
+
mov x21, #0
0: mov x0, x21
bl check_zreg
@@ -625,6 +515,10 @@ function barf
mov x11, x1 // actual data
mov x12, x2 // data size
+#ifdef SSVE
+ mrs x13, S3_3_C4_C2_2
+#endif
+
puts "Mismatch: PID="
mov x0, x20
bl putdec
@@ -644,6 +538,12 @@ function barf
bl dumphex
puts "]\n"
+#ifdef SSVE
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+#endif
+
mov x8, #__NR_getpid
svc #0
// fpsimd.c acitivty log dump hack
@@ -670,3 +570,15 @@ function vl_barf
mov x1, #1
svc #0
endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
new file mode 100644
index 000000000000..8dd932fdcdc4
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -0,0 +1,796 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "rdvl.h"
+
+#define ARCH_MIN_VL SVE_VL_MIN
+
+struct vec_data {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ const char *rdvl_binary;
+ int (*rdvl)(void);
+
+ int prctl_get;
+ int prctl_set;
+ const char *default_vl_file;
+
+ int default_vl;
+ int min_vl;
+ int max_vl;
+};
+
+#define VEC_SVE 0
+#define VEC_SME 1
+
+static struct vec_data vec_data[] = {
+ [VEC_SVE] = {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .rdvl = rdvl_sve,
+ .rdvl_binary = "./rdvl-sve",
+ .prctl_get = PR_SVE_GET_VL,
+ .prctl_set = PR_SVE_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
+ },
+ [VEC_SME] = {
+ .name = "SME",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .rdvl = rdvl_sme,
+ .rdvl_binary = "./rdvl-sme",
+ .prctl_get = PR_SME_GET_VL,
+ .prctl_set = PR_SME_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sme_default_vector_length",
+ },
+};
+
+static bool vec_type_supported(struct vec_data *data)
+{
+ return getauxval(data->hwcap_type) & data->hwcap;
+}
+
+static int stdio_read_integer(FILE *f, const char *what, int *val)
+{
+ int n = 0;
+ int ret;
+
+ ret = fscanf(f, "%d%*1[\n]%n", val, &n);
+ if (ret < 1 || n < 1) {
+ ksft_print_msg("failed to parse integer from %s\n", what);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Start a new process and return the vector length it sees */
+static int get_child_rdvl(struct vec_data *data)
+{
+ FILE *out;
+ int pipefd[2];
+ pid_t pid, child;
+ int read_vl, ret;
+
+ ret = pipe(pipefd);
+ if (ret == -1) {
+ ksft_print_msg("pipe() failed: %d (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ fflush(stdout);
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("fork() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ /* Child: put vector length on the pipe */
+ if (child == 0) {
+ /*
+ * Replace stdout with the pipe, errors to stderr from
+ * here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* exec() a new binary which puts the VL on stdout */
+ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
+ fprintf(stderr, "execl(%s) failed: %d (%s)\n",
+ data->rdvl_binary, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ }
+
+ close(pipefd[1]);
+
+ /* Parent; wait for the exit status from the child & verify it */
+ do {
+ pid = wait(&ret);
+ if (pid == -1) {
+ ksft_print_msg("wait() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ return -1;
+ }
+ } while (pid != child);
+
+ assert(pid == child);
+
+ if (!WIFEXITED(ret)) {
+ ksft_print_msg("child exited abnormally\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ if (WEXITSTATUS(ret) != 0) {
+ ksft_print_msg("child returned error %d\n",
+ WEXITSTATUS(ret));
+ close(pipefd[0]);
+ return -1;
+ }
+
+ out = fdopen(pipefd[0], "r");
+ if (!out) {
+ ksft_print_msg("failed to open child stdout\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ ret = stdio_read_integer(out, "child", &read_vl);
+ fclose(out);
+ if (ret != 0)
+ return ret;
+
+ return read_vl;
+}
+
+static int file_read_integer(const char *name, int *val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "r");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = stdio_read_integer(f, name, val);
+ fclose(f);
+
+ return ret;
+}
+
+static int file_write_integer(const char *name, int val)
+{
+ FILE *f;
+
+ f = fopen(name, "w");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ fprintf(f, "%d", val);
+ fclose(f);
+
+ return 0;
+}
+
+/*
+ * Verify that we can read the default VL via proc, checking that it
+ * is set in a freshly spawned child.
+ */
+static void proc_read_default(struct vec_data *data)
+{
+ int default_vl, child_vl, ret;
+
+ ret = file_read_integer(data->default_vl_file, &default_vl);
+ if (ret != 0)
+ return;
+
+ /* Is this the actual default seen by new processes? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != default_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ default_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s default vector length %d\n", data->name,
+ default_vl);
+ data->default_vl = default_vl;
+}
+
+/* Verify that we can write a minimum value and have it take effect */
+static void proc_write_min(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s minimum vector length %d\n", data->name,
+ new_default);
+ data->min_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Verify that we can write a maximum value and have it take effect */
+static void proc_write_max(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ /* -1 is accepted by the /proc interface as the maximum VL */
+ ret = file_write_integer(data->default_vl_file, -1);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s maximum vector length %d\n", data->name,
+ new_default);
+ data->max_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Can we read back a VL from prctl? */
+static void prctl_get(struct vec_data *data)
+{
+ int ret;
+
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ /* Mask out any flags */
+ ret &= PR_SVE_VL_LEN_MASK;
+
+ /* Is that what we can read back directly? */
+ if (ret == data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Does the prctl let us set the VL we already have? */
+static void prctl_set_same(struct vec_data *data)
+{
+ int cur_vl = data->rdvl();
+ int ret;
+
+ ret = prctl(data->prctl_set, cur_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ ksft_test_result(cur_vl == data->rdvl(),
+ "%s set VL %d and have VL %d\n",
+ data->name, cur_vl, data->rdvl());
+}
+
+/* Can we set a new VL for this process? */
+static void prctl_set(struct vec_data *data)
+{
+ int ret;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Try to set the minimum VL */
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) {
+ ksft_test_result_fail("%s prctl set %d but return value is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ if (data->rdvl() != data->min_vl) {
+ ksft_test_result_fail("%s set %d but RDVL is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ /* Try to set the maximum VL */
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->max_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) {
+ ksft_test_result_fail("%s prctl() set %d but return value is %d\n",
+ data->name, data->max_vl, data->rdvl());
+ return;
+ }
+
+ /* The _INHERIT flag should not be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (ret & PR_SVE_VL_INHERIT) {
+ ksft_test_result_fail("%s prctl() reports _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ ksft_test_result_pass("%s prctl() set min/max\n", data->name);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_no_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child has the default we just set */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->max_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->max_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length used default\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_for_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* The _INHERIT flag should be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+ if (!(ret & PR_SVE_VL_INHERIT)) {
+ ksft_test_result_fail("%s prctl() does not report _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length was inherited\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* _ONEXEC takes effect only in the child process */
+static void prctl_set_onexec(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Set a known value for the default and our current VL */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Set a different value for the child to have on exec */
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Our current VL should stay the same */
+ if (data->rdvl() != data->max_vl) {
+ ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n",
+ data->name);
+ return;
+ }
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n",
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length set on exec\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* For each VQ verify that setting via prctl() does the right thing */
+static void prctl_set_all_vqs(struct vec_data *data)
+{
+ int ret, vq, vl, new_vl, i;
+ int orig_vls[ARRAY_SIZE(vec_data)];
+ int errors = 0;
+
+ if (!data->min_vl || !data->max_vl) {
+ ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n",
+ data->name);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ if (!vec_type_supported(&vec_data[i]))
+ continue;
+ orig_vls[i] = vec_data[i].rdvl();
+ }
+
+ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* Attempt to set the VL */
+ ret = prctl(data->prctl_set, vl);
+ if (ret < 0) {
+ errors++;
+ ksft_print_msg("%s prctl set failed for %d: %d (%s)\n",
+ data->name, vl,
+ errno, strerror(errno));
+ continue;
+ }
+
+ new_vl = ret & PR_SVE_VL_LEN_MASK;
+
+ /* Check that we actually have the reported new VL */
+ if (data->rdvl() != new_vl) {
+ ksft_print_msg("Set %s VL %d but RDVL reports %d\n",
+ data->name, new_vl, data->rdvl());
+ errors++;
+ }
+
+ /* Did any other VLs change? */
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ if (&vec_data[i] == data)
+ continue;
+
+ if (!vec_type_supported(&vec_data[i]))
+ continue;
+
+ if (vec_data[i].rdvl() != orig_vls[i]) {
+ ksft_print_msg("%s VL changed from %d to %d\n",
+ vec_data[i].name, orig_vls[i],
+ vec_data[i].rdvl());
+ errors++;
+ }
+ }
+
+ /* Was that the VL we asked for? */
+ if (new_vl == vl)
+ continue;
+
+ /* Should round up to the minimum VL if below it */
+ if (vl < data->min_vl) {
+ if (new_vl != data->min_vl) {
+ ksft_print_msg("%s VL %d returned %d not minimum %d\n",
+ data->name, vl, new_vl,
+ data->min_vl);
+ errors++;
+ }
+
+ continue;
+ }
+
+ /* Should round down to maximum VL if above it */
+ if (vl > data->max_vl) {
+ if (new_vl != data->max_vl) {
+ ksft_print_msg("%s VL %d returned %d not maximum %d\n",
+ data->name, vl, new_vl,
+ data->max_vl);
+ errors++;
+ }
+
+ continue;
+ }
+
+ /* Otherwise we should've rounded down */
+ if (!(new_vl < vl)) {
+ ksft_print_msg("%s VL %d returned %d, did not round down\n",
+ data->name, vl, new_vl);
+ errors++;
+
+ continue;
+ }
+ }
+
+ ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n",
+ data->name, errors);
+}
+
+typedef void (*test_type)(struct vec_data *);
+
+static const test_type tests[] = {
+ /*
+ * The default/min/max tests must be first and in this order
+ * to provide data for other tests.
+ */
+ proc_read_default,
+ proc_write_min,
+ proc_write_max,
+
+ prctl_get,
+ prctl_set_same,
+ prctl_set,
+ prctl_set_no_child,
+ prctl_set_for_child,
+ prctl_set_onexec,
+ prctl_set_all_vqs,
+};
+
+static inline void smstart(void)
+{
+ asm volatile("msr S0_3_C4_C7_3, xzr");
+}
+
+static inline void smstart_sm(void)
+{
+ asm volatile("msr S0_3_C4_C3_3, xzr");
+}
+
+static inline void smstop(void)
+{
+ asm volatile("msr S0_3_C4_C6_3, xzr");
+}
+
+/*
+ * Verify we can change the SVE vector length while SME is active and
+ * continue to use SME afterwards.
+ */
+static void change_sve_with_za(void)
+{
+ struct vec_data *sve_data = &vec_data[VEC_SVE];
+ bool pass = true;
+ int ret, i;
+
+ if (sve_data->min_vl == sve_data->max_vl) {
+ ksft_print_msg("Only one SVE VL supported, can't change\n");
+ ksft_test_result_skip("change_sve_while_sme\n");
+ return;
+ }
+
+ /* Ensure we will trigger a change when we set the maximum */
+ ret = prctl(sve_data->prctl_set, sve_data->min_vl);
+ if (ret != sve_data->min_vl) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ sve_data->min_vl, ret);
+ pass = false;
+ }
+
+ /* Enable SM and ZA */
+ smstart();
+
+ /* Trigger another VL change */
+ ret = prctl(sve_data->prctl_set, sve_data->max_vl);
+ if (ret != sve_data->max_vl) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ sve_data->max_vl, ret);
+ pass = false;
+ }
+
+ /*
+ * Spin for a bit with SM enabled to try to trigger another
+ * save/restore. We can't use syscalls without exiting
+ * streaming mode.
+ */
+ for (i = 0; i < 100000000; i++)
+ smstart_sm();
+
+ /*
+ * TODO: Verify that ZA was preserved over the VL change and
+ * spin.
+ */
+
+ /* Clean up after ourselves */
+ smstop();
+ ret = prctl(sve_data->prctl_set, sve_data->default_vl);
+ if (ret != sve_data->default_vl) {
+ ksft_print_msg("Failed to restore SVE VL %d: %d\n",
+ sve_data->default_vl, ret);
+ pass = false;
+ }
+
+ ksft_test_result(pass, "change_sve_with_za\n");
+}
+
+typedef void (*test_all_type)(void);
+
+static const struct {
+ const char *name;
+ test_all_type test;
+} all_types_tests[] = {
+ { "change_sve_with_za", change_sve_with_za },
+};
+
+int main(void)
+{
+ bool all_supported = true;
+ int i, j;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) +
+ ARRAY_SIZE(all_types_tests));
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ struct vec_data *data = &vec_data[i];
+ unsigned long supported;
+
+ supported = vec_type_supported(data);
+ if (!supported)
+ all_supported = false;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (supported)
+ tests[j](data);
+ else
+ ksft_test_result_skip("%s not supported\n",
+ data->name);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) {
+ if (all_supported)
+ all_types_tests[i].test();
+ else
+ ksft_test_result_skip("%s\n", all_types_tests[i].name);
+ }
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
index 308d27a68226..76912a581a95 100644
--- a/tools/testing/selftests/arm64/fp/vlset.c
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -22,12 +22,15 @@ static int inherit = 0;
static int no_inherit = 0;
static int force = 0;
static unsigned long vl;
+static int set_ctl = PR_SVE_SET_VL;
+static int get_ctl = PR_SVE_GET_VL;
static const struct option options[] = {
{ "force", no_argument, NULL, 'f' },
{ "inherit", no_argument, NULL, 'i' },
{ "max", no_argument, NULL, 'M' },
{ "no-inherit", no_argument, &no_inherit, 1 },
+ { "sme", no_argument, NULL, 's' },
{ "help", no_argument, NULL, '?' },
{}
};
@@ -50,6 +53,9 @@ static int parse_options(int argc, char **argv)
case 'M': vl = SVE_VL_MAX; break;
case 'f': force = 1; break;
case 'i': inherit = 1; break;
+ case 's': set_ctl = PR_SME_SET_VL;
+ get_ctl = PR_SME_GET_VL;
+ break;
case 0: break;
default: goto error;
}
@@ -125,14 +131,14 @@ int main(int argc, char **argv)
if (inherit)
flags |= PR_SVE_VL_INHERIT;
- t = prctl(PR_SVE_SET_VL, vl | flags);
+ t = prctl(set_ctl, vl | flags);
if (t < 0) {
fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
program_name, strerror(errno));
goto error;
}
- t = prctl(PR_SVE_GET_VL);
+ t = prctl(get_ctl);
if (t == -1) {
fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
program_name, strerror(errno));
diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S
new file mode 100644
index 000000000000..2fafadd491c3
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAGIC 42
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+.pushsection .text
+.data
+.align 4
+scratch:
+ .space MAXVL_B
+.popsection
+
+.globl fork_test
+fork_test:
+ smstart_za
+
+ // For simplicity just set one word in one vector, other tests
+ // cover general data corruption issues.
+ ldr x0, =scratch
+ mov x1, #MAGIC
+ str x1, [x0]
+ mov w12, wzr
+ _ldr_za 12, 0 // ZA.H[W12] loaded from [X0]
+
+ // Tail call into the C portion that does the fork & verify
+ b fork_test_c
+
+.globl verify_fork
+verify_fork:
+ // SVCR should have ZA=1, SM=0
+ mrs x0, S3_3_C4_C2_2
+ and x1, x0, #3
+ cmp x1, #2
+ beq 1f
+ mov x0, xzr
+ b 100f
+1:
+
+ // ZA should still have the value we loaded
+ ldr x0, =scratch
+ mov w12, wzr
+ _str_za 12, 0 // ZA.H[W12] stored to [X0]
+ ldr x1, [x0]
+ cmp x1, #MAGIC
+ beq 2f
+ mov x0, xzr
+ b 100f
+
+2:
+ // All tests passed
+ mov x0, #1
+100:
+ ret
+
diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c
new file mode 100644
index 000000000000..587b94648222
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "kselftest.h"
+
+#define EXPECTED_TESTS 1
+
+int fork_test(void);
+int verify_fork(void);
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value. This is called from the
+ * fork_test() asm function.
+ */
+int fork_test_c(void)
+{
+ pid_t newpid, waiting;
+ int child_status, parent_result;
+
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (!verify_fork()) {
+ ksft_print_msg("ZA state invalid in child\n");
+ exit(0);
+ } else {
+ exit(1);
+ }
+ }
+ if (newpid < 0) {
+ ksft_print_msg("fork() failed: %d\n", newpid);
+
+ return 0;
+ }
+
+ parent_result = verify_fork();
+ if (!parent_result)
+ ksft_print_msg("ZA state invalid in parent\n");
+
+ for (;;) {
+ waiting = waitpid(newpid, &child_status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("waitpid() failed: %d\n", errno);
+ return 0;
+ }
+ if (waiting != newpid) {
+ ksft_print_msg("waitpid() returned wrong PID\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(child_status)) {
+ ksft_print_msg("child did not exit\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(child_status) && parent_result;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+
+ ksft_print_header();
+ ksft_set_plan(EXPECTED_TESTS);
+
+ ksft_print_msg("PID: %d\n", getpid());
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ ksft_test_result(fork_test(), "fork_test\n");
+
+ } else {
+ ksft_print_msg("SME not supported\n");
+ for (i = 0; i < EXPECTED_TESTS; i++) {
+ ksft_test_result_skip("fork_test\n");
+ }
+ }
+
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
new file mode 100644
index 000000000000..787eed22d059
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+/*
+ * The architecture defines the maximum VQ as 16 but for extensibility
+ * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running
+ * a *lot* more tests than are useful if we use it. Until the
+ * architecture is extended let's limit our coverage to what is
+ * currently allowed, plus one extra to ensure we cover constraining
+ * the VL as expected.
+ */
+#define TEST_VQ_MAX 17
+
+#define EXPECTED_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)",
+ strerror(errno), errno);
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+ struct user_za_header *za;
+ void *p;
+ size_t sz = sizeof(*za);
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+ goto error;
+
+ za = *buf;
+ if (za->size <= sz)
+ break;
+
+ sz = za->size;
+ }
+
+ return za;
+
+error:
+ return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)za;
+ iov.iov_len = za->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+ struct user_za_header za;
+ struct user_za_header *new_za = NULL;
+ size_t new_za_size = 0;
+ int ret, prctl_vl;
+
+ *supported = false;
+
+ /* Check if the VL is supported in this process */
+ prctl_vl = prctl(PR_SME_SET_VL, vl);
+ if (prctl_vl == -1)
+ ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* If the VL is not supported then a supported VL will be returned */
+ *supported = (prctl_vl == vl);
+
+ /* Set the VL by doing a set with no register payload */
+ memset(&za, 0, sizeof(za));
+ za.size = sizeof(za);
+ za.vl = vl;
+ ret = set_za(child, &za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u\n", vl);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have the
+ * same VL that we got from prctl() on ourselves.
+ */
+ if (!get_za(child, (void **)&new_za, &new_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u\n", vl);
+ return;
+ }
+
+ ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl);
+
+ free(new_za);
+}
+
+/* Validate attempting to set no ZA data and read it back */
+static void ptrace_set_no_data(pid_t child, unsigned int vl)
+{
+ void *read_buf = NULL;
+ struct user_za_header write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ int ret;
+
+ /* Set up some data and write it out */
+ memset(&write_za, 0, sizeof(write_za));
+ write_za.size = ZA_PT_ZA_OFFSET;
+ write_za.vl = vl;
+
+ ret = set_za(child, &write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u no data\n", vl);
+ return;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u no data\n", vl);
+ return;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za.size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za.size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(read_za->size == write_za.size,
+ "Disabled ZA for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+}
+
+/* Validate attempting to set data and read it back */
+static void ptrace_set_get_data(pid_t child, unsigned int vl)
+{
+ void *write_buf;
+ void *read_buf = NULL;
+ struct user_za_header *write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ unsigned int vq = sve_vq_from_vl(vl);
+ int ret;
+ size_t data_size;
+
+ data_size = ZA_PT_SIZE(vq);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %ld byte buffer for VL %u\n",
+ data_size, vl);
+ return;
+ }
+ write_za = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_za, 0, data_size);
+ write_za->size = data_size;
+ write_za->vl = vl;
+
+ fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq));
+
+ ret = set_za(child, write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u data\n", vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u data\n", vl);
+ goto out;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za->size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za->size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET,
+ read_buf + ZA_PT_ZA_OFFSET,
+ ZA_PT_ZA_SIZE(vq)) == 0,
+ "Data match for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+out:
+ free(write_buf);
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ unsigned int vq, vl;
+ bool vl_supported;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ ptrace_set_get_vl(child, vl, &vl_supported);
+
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_no_data(child, vl);
+ ptrace_set_get_data(child, vl);
+ } else {
+ ksft_test_result_skip("Disabled ZA for VL %u\n", vl);
+ ksft_test_result_skip("Get and set data for VL %u\n",
+ vl);
+ }
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ksft_set_plan(1);
+ ksft_exit_skip("SME not available\n");
+ }
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress
new file mode 100644
index 000000000000..5ac386b55b95
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./za-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
new file mode 100644
index 000000000000..9c33e13e9dc4
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZA context switch test
+// Repeatedly writes unique test patterns into each ZA tile
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+// Declare some storage space to shadow ZA register contents and a
+// scratch buffer for a vector.
+.pushsection .text
+.data
+.align 4
+zaref:
+ .space MAXVL_B * MAXVL_B
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in ZA
+// x0: pid
+// x1: row in ZA
+// x2: generation
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:16 pid
+// bits 15: 8 row number
+// bits 7: 0 32-bit lane index
+
+function pattern
+ mov w3, wzr
+ bfi w3, w0, #16, #12 // PID
+ bfi w3, w1, #8, #8 // Row
+ bfi w3, w2, #28, #4 // Generation
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w3, [x0], #4
+ add w3, w3, #1 // Lane
+ subs w1, w1, #1
+ b.ne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for ZA horizontal vector xn
+.macro _adrza xd, xn, nrtmp
+ ldr \xd, =zaref
+ rdsvl \nrtmp, 1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a ZA horizontal vector
+// x0: pid
+// x1: row number
+// x2: generation
+function setup_za
+ mov x4, x30
+ mov x12, x1 // Use x12 for vector select
+
+ bl pattern // Get pattern in scratch buffer
+ _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy // length set up in x2 by _adrza
+
+ _ldr_za 12, 5 // load vector w12 from pointer x5
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a ZA vector matches its shadow in memory, else abort
+// x0: row number
+// Clobbers x0-x7 and x12.
+function check_za
+ mov x3, x30
+
+ mov x12, x0
+ _adrza x5, x0, 6 // pointer to expected value in x5
+ mov x4, x0
+ ldr x7, =scratch // x7 is scratch
+
+ mov x0, x7 // Poison scratch
+ mov x1, x6
+ bl memfill_ae
+
+ _str_za 12, 7 // save vector w12 to pointer x7
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Modify the live SME register state, signal return will undo our changes
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // This will reset ZA to all bits 0
+ smstop
+ smstart_za
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ enable_gcs
+
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ puts "Streaming mode "
+ smstart_za
+
+ // Sanity-check and report the vector length
+
+ rdsvl 19, 8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdsvl 0, 8
+ cmp x0, x19
+ b.ne vl_barf
+
+ rdsvl 21, 1 // Set up ZA & shadow with test pattern
+0: mov x0, x20
+ sub x1, x21, #1
+ mov x2, x22
+ bl setup_za
+ subs x21, x21, #1
+ b.ne 0b
+
+ mov x8, #__NR_sched_yield // encourage preemption
+1:
+ svc #0
+
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
+ and x1, x0, #3
+ cmp x1, #2
+ b.ne svcr_barf
+
+ rdsvl 21, 1 // Verify that the data made it through
+ rdsvl 24, 1 // Verify that the data made it through
+0: sub x0, x24, x21
+ bl check_za
+ subs x21, x21, #1
+ bne 0b
+
+ add x22, x22, #1 // Everything still working
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+
+ mrs x13, S3_3_C4_C2_2
+
+ smstop
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", row="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
new file mode 100644
index 000000000000..f3fa49fd0fbd
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+#ifndef NT_ARM_ZT
+#define NT_ARM_ZT 0x40d
+#endif
+
+#define EXPECTED_TESTS 3
+
+static int sme_vl;
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+ struct user_za_header *za;
+ void *p;
+ size_t sz = sizeof(*za);
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+ goto error;
+
+ za = *buf;
+ if (za->size <= sz)
+ break;
+
+ sz = za->size;
+ }
+
+ return za;
+
+error:
+ return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)za;
+ iov.iov_len = za->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
+{
+ struct iovec iov;
+
+ iov.iov_base = zt;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)zt;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+/* Reading with ZA disabled returns all zeros */
+static void ptrace_za_disabled_read_zt(pid_t child)
+{
+ struct user_za_header za;
+ char zt[ZT_SIG_REG_BYTES];
+ int ret, i;
+ bool fail = false;
+
+ /* Disable PSTATE.ZA using the ZA interface */
+ memset(&za, 0, sizeof(za));
+ za.vl = sme_vl;
+ za.size = sizeof(za);
+
+ ret = set_za(child, &za);
+ if (ret != 0) {
+ ksft_print_msg("Failed to disable ZA\n");
+ fail = true;
+ }
+
+ /* Read back ZT */
+ ret = get_zt(child, zt);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read ZT\n");
+ fail = true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(zt); i++) {
+ if (zt[i]) {
+ ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]);
+ fail = true;
+ }
+ }
+
+ ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n");
+}
+
+/* Writing then reading ZT should return the data written */
+static void ptrace_set_get_zt(pid_t child)
+{
+ char zt_in[ZT_SIG_REG_BYTES];
+ char zt_out[ZT_SIG_REG_BYTES];
+ int ret, i;
+ bool fail = false;
+
+ fill_buf(zt_in, sizeof(zt_in));
+
+ ret = set_zt(child, zt_in);
+ if (ret != 0) {
+ ksft_print_msg("Failed to set ZT\n");
+ fail = true;
+ }
+
+ ret = get_zt(child, zt_out);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read ZT\n");
+ fail = true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(zt_in); i++) {
+ if (zt_in[i] != zt_out[i]) {
+ ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i,
+ zt_in[i], zt_out[i]);
+ fail = true;
+ }
+ }
+
+ ksft_test_result(!fail, "ptrace_set_get_zt\n");
+}
+
+/* Writing ZT should set PSTATE.ZA */
+static void ptrace_enable_za_via_zt(pid_t child)
+{
+ struct user_za_header za_in;
+ struct user_za_header *za_out;
+ char zt[ZT_SIG_REG_BYTES];
+ char *za_data;
+ size_t za_out_size;
+ int ret, i, vq;
+ bool fail = false;
+
+ /* Disable PSTATE.ZA using the ZA interface */
+ memset(&za_in, 0, sizeof(za_in));
+ za_in.vl = sme_vl;
+ za_in.size = sizeof(za_in);
+
+ ret = set_za(child, &za_in);
+ if (ret != 0) {
+ ksft_print_msg("Failed to disable ZA\n");
+ fail = true;
+ }
+
+ /* Write ZT */
+ fill_buf(zt, sizeof(zt));
+ ret = set_zt(child, zt);
+ if (ret != 0) {
+ ksft_print_msg("Failed to set ZT\n");
+ fail = true;
+ }
+
+ /* Read back ZA and check for register data */
+ za_out = NULL;
+ za_out_size = 0;
+ if (get_za(child, (void **)&za_out, &za_out_size)) {
+ /* Should have an unchanged VL */
+ if (za_out->vl != sme_vl) {
+ ksft_print_msg("VL changed from %d to %d\n",
+ sme_vl, za_out->vl);
+ fail = true;
+ }
+ vq = __sve_vq_from_vl(za_out->vl);
+ za_data = (char *)za_out + ZA_PT_ZA_OFFSET;
+
+ /* Should have register data */
+ if (za_out->size < ZA_PT_SIZE(vq)) {
+ ksft_print_msg("ZA data less than expected: %u < %u\n",
+ za_out->size, (unsigned int)ZA_PT_SIZE(vq));
+ fail = true;
+ vq = 0;
+ }
+
+ /* That register data should be non-zero */
+ for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) {
+ if (za_data[i]) {
+ ksft_print_msg("ZA byte %d is %x\n",
+ i, za_data[i]);
+ fail = true;
+ }
+ }
+ } else {
+ ksft_print_msg("Failed to read ZA\n");
+ fail = true;
+ }
+
+ ksft_test_result(!fail, "ptrace_enable_za_via_zt\n");
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ ptrace_za_disabled_read_zt(child);
+ ptrace_set_get_zt(child);
+ ptrace_enable_za_via_zt(child);
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) {
+ ksft_set_plan(1);
+ ksft_exit_skip("SME2 not available\n");
+ }
+
+ /* We need a valid SME VL to enable/disable ZA */
+ sme_vl = prctl(PR_SME_GET_VL);
+ if (sme_vl == -1) {
+ ksft_set_plan(1);
+ ksft_exit_skip("Failed to read SME VL: %d (%s)\n",
+ errno, strerror(errno));
+ }
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
new file mode 100644
index 000000000000..a8df05771670
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZT context switch test
+// Repeatedly writes unique test patterns into ZT0
+// and reads them back to verify integrity.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define ZT_SZ 512
+#define ZT_B (ZT_SZ / 8)
+
+// Declare some storage space to shadow ZT register contents and a
+// scratch buffer.
+.pushsection .text
+.data
+.align 4
+ztref:
+ .space ZT_B
+scratch:
+ .space ZT_B
+.popsection
+
+
+// Generate a test pattern for storage in ZT
+// x0: pid
+// x1: generation
+
+// These values are used to construct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:24 generation number (increments once per test_loop)
+// bits 23: 8 pid
+// bits 7: 0 32-bit lane index
+
+function pattern
+ mov w3, wzr
+ bfi w3, w0, #8, #16 // PID
+ bfi w3, w1, #24, #8 // Generation
+
+ ldr x0, =scratch
+ mov w1, #ZT_B / 4
+
+0: str w3, [x0], #4
+ add w3, w3, #1 // Lane
+ subs w1, w1, #1
+ b.ne 0b
+
+ ret
+endfunction
+
+// Set up test pattern in a ZT horizontal vector
+// x0: pid
+// x1: generation
+function setup_zt
+ mov x4, x30
+
+ bl pattern // Get pattern in scratch buffer
+ ldr x0, =ztref
+ ldr x1, =scratch
+ mov x2, #ZT_B
+ bl memcpy
+
+ ldr x0, =ztref
+ _ldr_zt 0 // load zt0 from pointer x0
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a ZT vector matches its shadow in memory, else abort
+// Clobbers x0-x3
+function check_zt
+ mov x3, x30
+
+ ldr x0, =scratch // Poison scratch
+ mov x1, #ZT_B
+ bl memfill_ae
+
+ ldr x0, =scratch
+ _str_zt 0
+
+ ldr x0, =ztref
+ ldr x1, =scratch
+ mov x2, #ZT_B
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Modify the live SME register state, signal return will undo our changes
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // This will reset ZT to all bits 0
+ smstop
+ smstart_za
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ enable_gcs
+
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ smstart_za
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ mov x0, x20
+ mov x1, x22
+ bl setup_zt
+
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
+ and x1, x0, #3
+ cmp x1, #2
+ b.ne svcr_barf
+
+ bl check_zt
+
+ add x22, x22, #1 // Everything still working
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+
+ mrs x13, S3_3_C4_C2_2
+ smstop
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore
new file mode 100644
index 000000000000..bbb8e40a7e52
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/.gitignore
@@ -0,0 +1,7 @@
+basic-gcs
+libc-gcs
+gcs-locking
+gcs-stress
+gcs-stress-thread
+gcspushm
+gcsstr
diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
new file mode 100644
index 000000000000..1fbbf0ca1f02
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 ARM Limited
+#
+# In order to avoid interaction with the toolchain and dynamic linker the
+# portions of these tests that interact with the GCS are implemented using
+# nolibc.
+#
+
+TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress gcspushm gcsstr
+TEST_GEN_PROGS_EXTENDED := gcs-stress-thread
+
+LDLIBS+=-lpthread
+
+include ../../lib.mk
+
+$(OUTPUT)/basic-gcs: basic-gcs.c
+ $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \
+ -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \
+ -I../../../../../usr/include \
+ -std=gnu99 -I../.. -g \
+ -ffreestanding $^ -o $@ -lgcc
+
+$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S
+ $(CC) -nostdlib $^ -o $@
+
+$(OUTPUT)/gcspushm: gcspushm.S
+ $(CC) -nostdlib $^ -o $@
+
+$(OUTPUT)/gcsstr: gcsstr.S
+ $(CC) -nostdlib $^ -o $@
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h
index e69de29bb2d1..e69de29bb2d1 100644
--- a/tools/testing/radix-tree/linux/compiler_types.h
+++ b/tools/testing/selftests/arm64/gcs/asm-offsets.h
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
new file mode 100644
index 000000000000..250977abc398
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Limited.
+ */
+
+#include <limits.h>
+#include <stdbool.h>
+
+#include <linux/prctl.h>
+
+#include <sys/mman.h>
+#include <asm/mman.h>
+#include <asm/hwcap.h>
+#include <linux/sched.h>
+
+#include "kselftest.h"
+#include "gcs-util.h"
+
+/* nolibc doesn't have sysconf(), just hard code the maximum */
+static size_t page_size = 65536;
+
+static __attribute__((noinline)) void valid_gcs_function(void)
+{
+ /* Do something the compiler can't optimise out */
+ my_syscall1(__NR_prctl, PR_SVE_GET_VL);
+}
+
+static inline int gcs_set_status(unsigned long mode)
+{
+ bool enabling = mode & PR_SHADOW_STACK_ENABLE;
+ int ret;
+ unsigned long new_mode;
+
+ /*
+ * The prctl takes 1 argument but we need to ensure that the
+ * other 3 values passed in registers to the syscall are zero
+ * since the kernel validates them.
+ */
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode,
+ 0, 0, 0);
+
+ if (ret == 0) {
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &new_mode, 0, 0, 0);
+ if (ret == 0) {
+ if (new_mode != mode) {
+ ksft_print_msg("Mode set to %lx not %lx\n",
+ new_mode, mode);
+ ret = -EINVAL;
+ }
+ } else {
+ ksft_print_msg("Failed to validate mode: %d\n", ret);
+ }
+
+ if (enabling != chkfeat_gcs()) {
+ ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n",
+ enabling ? "" : "not ",
+ chkfeat_gcs() ? "" : "not ");
+ ret = -EINVAL;
+ }
+ }
+
+ return ret;
+}
+
+/* Try to read the status */
+static bool read_status(void)
+{
+ unsigned long state;
+ int ret;
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &state, 0, 0, 0);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read state: %d\n", ret);
+ return false;
+ }
+
+ return state & PR_SHADOW_STACK_ENABLE;
+}
+
+/* Just a straight enable */
+static bool base_enable(void)
+{
+ int ret;
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
+ if (ret) {
+ ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+/* Check we can read GCSPR_EL0 when GCS is enabled */
+static bool read_gcspr_el0(void)
+{
+ unsigned long *gcspr_el0;
+
+ ksft_print_msg("GET GCSPR\n");
+ gcspr_el0 = get_gcspr();
+ ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0);
+
+ return true;
+}
+
+/* Also allow writes to stack */
+static bool enable_writeable(void)
+{
+ int ret;
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE);
+ if (ret) {
+ ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret);
+ return false;
+ }
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
+ if (ret) {
+ ksft_print_msg("failed to restore plain enable %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+/* Also allow writes to stack */
+static bool enable_push_pop(void)
+{
+ int ret;
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH);
+ if (ret) {
+ ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n",
+ ret);
+ return false;
+ }
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
+ if (ret) {
+ ksft_print_msg("failed to restore plain enable %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+/* Enable GCS and allow everything */
+static bool enable_all(void)
+{
+ int ret;
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH |
+ PR_SHADOW_STACK_WRITE);
+ if (ret) {
+ ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n",
+ ret);
+ return false;
+ }
+
+ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
+ if (ret) {
+ ksft_print_msg("failed to restore plain enable %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+static bool enable_invalid(void)
+{
+ int ret = gcs_set_status(ULONG_MAX);
+ if (ret == 0) {
+ ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX);
+ return false;
+ }
+
+ return true;
+}
+
+/* Map a GCS */
+static bool map_guarded_stack(void)
+{
+ int ret;
+ uint64_t *buf;
+ uint64_t expected_cap;
+ int elem;
+ bool pass = true;
+
+ buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size,
+ SHADOW_STACK_SET_MARKER |
+ SHADOW_STACK_SET_TOKEN);
+ if (buf == MAP_FAILED) {
+ ksft_print_msg("Failed to map %lu byte GCS: %d\n",
+ page_size, errno);
+ return false;
+ }
+ ksft_print_msg("Mapped GCS at %p-%p\n", buf,
+ (void *)((uint64_t)buf + page_size));
+
+ /* The top of the newly allocated region should be 0 */
+ elem = (page_size / sizeof(uint64_t)) - 1;
+ if (buf[elem]) {
+ ksft_print_msg("Last entry is 0x%llx not 0x0\n", buf[elem]);
+ pass = false;
+ }
+
+ /* Then a valid cap token */
+ elem--;
+ expected_cap = ((uint64_t)buf + page_size - 16);
+ expected_cap &= GCS_CAP_ADDR_MASK;
+ expected_cap |= GCS_CAP_VALID_TOKEN;
+ if (buf[elem] != expected_cap) {
+ ksft_print_msg("Cap entry is 0x%llx not 0x%llx\n",
+ buf[elem], expected_cap);
+ pass = false;
+ }
+ ksft_print_msg("cap token is 0x%llx\n", buf[elem]);
+
+ /* The rest should be zeros */
+ for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) {
+ if (!buf[elem])
+ continue;
+ ksft_print_msg("GCS slot %d is 0x%llx not 0x0\n",
+ elem, buf[elem]);
+ pass = false;
+ }
+
+ ret = munmap(buf, page_size);
+ if (ret != 0) {
+ ksft_print_msg("Failed to unmap %ld byte GCS: %d\n",
+ page_size, errno);
+ pass = false;
+ }
+
+ return pass;
+}
+
+/* A fork()ed process can run */
+static bool test_fork(void)
+{
+ unsigned long child_mode;
+ int ret, status;
+ pid_t pid;
+ bool pass = true;
+
+ pid = fork();
+ if (pid == -1) {
+ ksft_print_msg("fork() failed: %d\n", errno);
+ pass = false;
+ goto out;
+ }
+ if (pid == 0) {
+ /* In child, make sure we can call a function, read
+ * the GCS pointer and status and then exit */
+ valid_gcs_function();
+ get_gcspr();
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &child_mode, 0, 0, 0);
+ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in child\n");
+ ret = -EINVAL;
+ }
+
+ exit(ret);
+ }
+
+ /*
+ * In parent, check we can still do function calls then block
+ * for the child.
+ */
+ valid_gcs_function();
+
+ ksft_print_msg("Waiting for child %d\n", pid);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ ksft_print_msg("Failed to wait for child: %d\n",
+ errno);
+ return false;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child exited due to signal %d\n",
+ WTERMSIG(status));
+ pass = false;
+ } else {
+ if (WEXITSTATUS(status)) {
+ ksft_print_msg("Child exited with status %d\n",
+ WEXITSTATUS(status));
+ pass = false;
+ }
+ }
+
+out:
+
+ return pass;
+}
+
+/* A vfork()ed process can run and exit */
+static bool test_vfork(void)
+{
+ unsigned long child_mode;
+ int ret, status;
+ pid_t pid;
+ bool pass = true;
+
+ pid = vfork();
+ if (pid == -1) {
+ ksft_print_msg("vfork() failed: %d\n", errno);
+ pass = false;
+ goto out;
+ }
+ if (pid == 0) {
+ /*
+ * In child, make sure we can call a function, read
+ * the GCS pointer and status and then exit.
+ */
+ valid_gcs_function();
+ get_gcspr();
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &child_mode, 0, 0, 0);
+ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in child\n");
+ ret = EXIT_FAILURE;
+ }
+
+ _exit(ret);
+ }
+
+ /*
+ * In parent, check we can still do function calls then check
+ * on the child.
+ */
+ valid_gcs_function();
+
+ ksft_print_msg("Waiting for child %d\n", pid);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ ksft_print_msg("Failed to wait for child: %d\n",
+ errno);
+ return false;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child exited due to signal %d\n",
+ WTERMSIG(status));
+ pass = false;
+ } else if (WEXITSTATUS(status)) {
+ ksft_print_msg("Child exited with status %d\n",
+ WEXITSTATUS(status));
+ pass = false;
+ }
+
+out:
+
+ return pass;
+}
+
+typedef bool (*gcs_test)(void);
+
+static struct {
+ char *name;
+ gcs_test test;
+ bool needs_enable;
+} tests[] = {
+ { "read_status", read_status },
+ { "base_enable", base_enable, true },
+ { "read_gcspr_el0", read_gcspr_el0 },
+ { "enable_writeable", enable_writeable, true },
+ { "enable_push_pop", enable_push_pop, true },
+ { "enable_all", enable_all, true },
+ { "enable_invalid", enable_invalid, true },
+ { "map_guarded_stack", map_guarded_stack },
+ { "fork", test_fork },
+ { "vfork", test_vfork },
+};
+
+int main(void)
+{
+ int i, ret;
+ unsigned long gcs_mode;
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &gcs_mode, 0, 0, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret);
+
+ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
+ gcs_mode = PR_SHADOW_STACK_ENABLE;
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ gcs_mode, 0, 0, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret);
+ }
+
+ ksft_set_plan(ARRAY_SIZE(tests));
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name);
+ }
+
+ /* One last test: disable GCS, we can do this one time */
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
+ if (ret != 0)
+ ksft_print_msg("Failed to disable GCS: %d\n", ret);
+
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c
new file mode 100644
index 000000000000..1e6abb136ffd
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Limited.
+ *
+ * Tests for GCS mode locking. These tests rely on both having GCS
+ * unconfigured on entry and on the kselftest harness running each
+ * test in a fork()ed process which will have it's own mode.
+ */
+
+#include <limits.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest_harness.h"
+
+#include "gcs-util.h"
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = 0; \
+ register long _arg4 __asm__ ("x3") = 0; \
+ register long _arg5 __asm__ ("x4") = 0; \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_arg3), "r"(_arg4), \
+ "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* No mode bits are rejected for locking */
+TEST(lock_all_modes)
+{
+ int ret;
+
+ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE(valid_modes)
+{
+};
+
+FIXTURE_VARIANT(valid_modes)
+{
+ unsigned long mode;
+};
+
+FIXTURE_VARIANT_ADD(valid_modes, enable)
+{
+ .mode = PR_SHADOW_STACK_ENABLE,
+};
+
+FIXTURE_VARIANT_ADD(valid_modes, enable_write)
+{
+ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE,
+};
+
+FIXTURE_VARIANT_ADD(valid_modes, enable_push)
+{
+ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH,
+};
+
+FIXTURE_VARIANT_ADD(valid_modes, enable_write_push)
+{
+ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE |
+ PR_SHADOW_STACK_PUSH,
+};
+
+FIXTURE_SETUP(valid_modes)
+{
+}
+
+FIXTURE_TEARDOWN(valid_modes)
+{
+}
+
+/* We can set the mode at all */
+TEST_F(valid_modes, set)
+{
+ int ret;
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ variant->mode);
+ ASSERT_EQ(ret, 0);
+
+ _exit(0);
+}
+
+/* Enabling, locking then disabling is rejected */
+TEST_F(valid_modes, enable_lock_disable)
+{
+ unsigned long mode;
+ int ret;
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ variant->mode);
+ ASSERT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mode, variant->mode);
+
+ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0);
+ ASSERT_EQ(ret, -EBUSY);
+
+ _exit(0);
+}
+
+/* Locking then enabling is rejected */
+TEST_F(valid_modes, lock_enable)
+{
+ unsigned long mode;
+ int ret;
+
+ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ variant->mode);
+ ASSERT_EQ(ret, -EBUSY);
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mode, 0);
+
+ _exit(0);
+}
+
+/* Locking then changing other modes is fine */
+TEST_F(valid_modes, lock_enable_disable_others)
+{
+ unsigned long mode;
+ int ret;
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ variant->mode);
+ ASSERT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mode, variant->mode);
+
+ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ PR_SHADOW_STACK_ALL_MODES);
+ ASSERT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES);
+
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ variant->mode);
+ ASSERT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mode, variant->mode);
+
+ _exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long mode;
+ int ret;
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ if (ret) {
+ ksft_print_msg("Failed to read GCS state: %d\n", ret);
+ return EXIT_FAILURE;
+ }
+
+ if (mode & PR_SHADOW_STACK_ENABLE) {
+ ksft_print_msg("GCS was enabled, test unsupported\n");
+ return KSFT_SKIP;
+ }
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S
new file mode 100644
index 000000000000..b88b25217da5
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S
@@ -0,0 +1,311 @@
+// Program that loops for ever doing lots of recursions and system calls,
+// intended to be used as part of a stress test for GCS context switching.
+//
+// Copyright 2015-2023 Arm Ltd
+
+#include <asm/unistd.h>
+
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+
+#define si_code 8
+
+#define SIGINT 2
+#define SIGABRT 6
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGTERM 15
+#define SEGV_CPERR 10
+
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
+
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+
+#define GCSPR_EL0 S3_3_C2_C5_1
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+.globl putc
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+.globl puts
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", @progbits, 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+.globl putdec
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+.globl putdecn
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+.globl memclr
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+.globl memfill
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b abort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+
+function tickle_handler
+ // Perhaps collect GCSPR_EL0 here in future?
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error\n"
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+function segv_handler
+ // stash the siginfo_t *
+ mov x20, x1
+
+ // Disable GCS, we don't want additional faults logging things
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, xzr
+ mov x2, xzr
+ mov x3, xzr
+ mov x4, xzr
+ mov x5, xzr
+ mov x8, #__NR_prctl
+ svc #0
+
+ puts "Got SIGSEGV code "
+
+ ldr x21, [x20, #si_code]
+ mov x0, x21
+ bl putdec
+
+ // GCS faults should have si_code SEGV_CPERR
+ cmp x21, #SEGV_CPERR
+ bne 1f
+
+ puts " (GCS violation)"
+1:
+ mov x0, '\n'
+ bl putc
+ b abort
+endfunction
+
+// Recurse x20 times
+.macro recurse id
+function recurse\id
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ cmp x20, 0
+ beq 1f
+ sub x20, x20, 1
+ bl recurse\id
+
+1:
+ ldp x29, x30, [sp], #16
+
+ // Do a syscall immediately prior to returning to try to provoke
+ // scheduling and migration at a point where coherency issues
+ // might trigger.
+ mov x8, #__NR_getpid
+ svc #0
+
+ ret
+endfunction
+.endm
+
+// Generate and use two copies so we're changing the GCS contents
+recurse 1
+recurse 2
+
+.globl _start
+function _start
+ // Run with GCS
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, PR_SHADOW_STACK_ENABLE
+ mov x2, xzr
+ mov x3, xzr
+ mov x4, xzr
+ mov x5, xzr
+ mov x8, #__NR_prctl
+ svc #0
+ cbz x0, 1f
+ puts "Failed to enable GCS\n"
+ b abort
+1:
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGSEGV
+ adr x1, segv_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ puts "Running\n"
+
+loop:
+ // Small recursion depth so we're frequently flipping between
+ // the two recursors and changing what's on the stack
+ mov x20, #5
+ bl recurse1
+ mov x20, #5
+ bl recurse2
+ b loop
+endfunction
+
+abort:
+ mov x0, #255
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
new file mode 100644
index 000000000000..86d8cd42aee7
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022-3 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+struct child_data {
+ char *name, *output;
+ pid_t pid;
+ int stdout;
+ bool output_seen;
+ bool exited;
+ int exit_status;
+ int exit_signal;
+};
+
+static int epoll_fd;
+static struct child_data *children;
+static struct epoll_event *evs;
+static int tests;
+static int num_children;
+static bool terminate;
+
+static int startup_pipe[2];
+
+static int num_processors(void)
+{
+ long nproc = sysconf(_SC_NPROCESSORS_CONF);
+ if (nproc < 0) {
+ perror("Unable to read number of processors\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return nproc;
+}
+
+static void start_thread(struct child_data *child, int id)
+{
+ int ret, pipefd[2], i;
+ struct epoll_event ev;
+
+ ret = pipe(pipefd);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ child->pid = fork();
+ if (child->pid == -1)
+ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ if (!child->pid) {
+ /*
+ * In child, replace stdout with the pipe, errors to
+ * stderr from here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Duplicate the read side of the startup pipe to
+ * FD 3 so we can close everything else.
+ */
+ ret = dup2(startup_pipe[0], 3);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Very dumb mechanism to clean open FDs other than
+ * stdio. We don't want O_CLOEXEC for the pipes...
+ */
+ for (i = 4; i < 8192; i++)
+ close(i);
+
+ /*
+ * Read from the startup pipe, there should be no data
+ * and we should block until it is closed. We just
+ * carry on on error since this isn't super critical.
+ */
+ ret = read(3, &i, sizeof(i));
+ if (ret < 0)
+ fprintf(stderr, "read(startp pipe) failed: %s (%d)\n",
+ strerror(errno), errno);
+ if (ret > 0)
+ fprintf(stderr, "%d bytes of data on startup pipe\n",
+ ret);
+ close(3);
+
+ ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL);
+ fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n",
+ errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ } else {
+ /*
+ * In parent, remember the child and close our copy of the
+ * write side of stdout.
+ */
+ close(pipefd[1]);
+ child->stdout = pipefd[0];
+ child->output = NULL;
+ child->exited = false;
+ child->output_seen = false;
+
+ ev.events = EPOLLIN | EPOLLHUP;
+ ev.data.ptr = child;
+
+ ret = asprintf(&child->name, "Thread-%d", id);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev);
+ if (ret < 0) {
+ ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n",
+ child->name, strerror(errno), errno);
+ }
+ }
+
+ ksft_print_msg("Started %s\n", child->name);
+ num_children++;
+}
+
+static bool child_output_read(struct child_data *child)
+{
+ char read_data[1024];
+ char work[1024];
+ int ret, len, cur_work, cur_read;
+
+ ret = read(child->stdout, read_data, sizeof(read_data));
+ if (ret < 0) {
+ if (errno == EINTR)
+ return true;
+
+ ksft_print_msg("%s: read() failed: %s (%d)\n",
+ child->name, strerror(errno),
+ errno);
+ return false;
+ }
+ len = ret;
+
+ child->output_seen = true;
+
+ /* Pick up any partial read */
+ if (child->output) {
+ strncpy(work, child->output, sizeof(work) - 1);
+ cur_work = strnlen(work, sizeof(work));
+ free(child->output);
+ child->output = NULL;
+ } else {
+ cur_work = 0;
+ }
+
+ cur_read = 0;
+ while (cur_read < len) {
+ work[cur_work] = read_data[cur_read++];
+
+ if (work[cur_work] == '\n') {
+ work[cur_work] = '\0';
+ ksft_print_msg("%s: %s\n", child->name, work);
+ cur_work = 0;
+ } else {
+ cur_work++;
+ }
+ }
+
+ if (cur_work) {
+ work[cur_work] = '\0';
+ ret = asprintf(&child->output, "%s", work);
+ if (ret == -1)
+ ksft_exit_fail_msg("Out of memory\n");
+ }
+
+ return false;
+}
+
+static void child_output(struct child_data *child, uint32_t events,
+ bool flush)
+{
+ bool read_more;
+
+ if (events & EPOLLIN) {
+ do {
+ read_more = child_output_read(child);
+ } while (read_more);
+ }
+
+ if (events & EPOLLHUP) {
+ close(child->stdout);
+ child->stdout = -1;
+ flush = true;
+ }
+
+ if (flush && child->output) {
+ ksft_print_msg("%s: %s<EOF>\n", child->name, child->output);
+ free(child->output);
+ child->output = NULL;
+ }
+}
+
+static void child_tickle(struct child_data *child)
+{
+ if (child->output_seen && !child->exited)
+ kill(child->pid, SIGUSR1);
+}
+
+static void child_stop(struct child_data *child)
+{
+ if (!child->exited)
+ kill(child->pid, SIGTERM);
+}
+
+static void child_cleanup(struct child_data *child)
+{
+ pid_t ret;
+ int status;
+ bool fail = false;
+
+ if (!child->exited) {
+ do {
+ ret = waitpid(child->pid, &status, 0);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret == -1) {
+ ksft_print_msg("waitpid(%d) failed: %s (%d)\n",
+ child->pid, strerror(errno),
+ errno);
+ fail = true;
+ break;
+ }
+
+ if (WIFEXITED(status)) {
+ child->exit_status = WEXITSTATUS(status);
+ child->exited = true;
+ }
+
+ if (WIFSIGNALED(status)) {
+ child->exit_signal = WTERMSIG(status);
+ ksft_print_msg("%s: Exited due to signal %d\n",
+ child->name, child->exit_signal);
+ fail = true;
+ child->exited = true;
+ }
+ } while (!child->exited);
+ }
+
+ if (!child->output_seen) {
+ ksft_print_msg("%s no output seen\n", child->name);
+ fail = true;
+ }
+
+ if (child->exit_status != 0) {
+ ksft_print_msg("%s exited with error code %d\n",
+ child->name, child->exit_status);
+ fail = true;
+ }
+
+ ksft_test_result(!fail, "%s\n", child->name);
+}
+
+static void handle_child_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ bool found = false;
+
+ for (i = 0; i < num_children; i++) {
+ if (children[i].pid == info->si_pid) {
+ children[i].exited = true;
+ children[i].exit_status = info->si_status;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n",
+ info->si_pid, info->si_status);
+}
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+
+ /* If we're already exiting then don't signal again */
+ if (terminate)
+ return;
+
+ ksft_print_msg("Got signal, exiting...\n");
+
+ terminate = true;
+
+ /*
+ * This should be redundant, the main loop should clean up
+ * after us, but for safety stop everything we can here.
+ */
+ for (i = 0; i < num_children; i++)
+ child_stop(&children[i]);
+}
+
+/* Handle any pending output without blocking */
+static void drain_output(bool flush)
+{
+ int ret = 1;
+ int i;
+
+ while (ret > 0) {
+ ret = epoll_wait(epoll_fd, evs, tests, 0);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ for (i = 0; i < ret; i++)
+ child_output(evs[i].data.ptr, evs[i].events, flush);
+ }
+}
+
+static const struct option options[] = {
+ { "timeout", required_argument, NULL, 't' },
+ { }
+};
+
+int main(int argc, char **argv)
+{
+ int seen_children;
+ bool all_children_started = false;
+ int gcs_threads;
+ int timeout = 10;
+ int ret, cpus, i, c;
+ struct sigaction sa;
+
+ while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
+ switch (c) {
+ case 't':
+ ret = sscanf(optarg, "%d", &timeout);
+ if (ret != 1)
+ ksft_exit_fail_msg("Failed to parse timeout %s\n",
+ optarg);
+ break;
+ default:
+ ksft_exit_fail_msg("Unknown argument\n");
+ }
+ }
+
+ cpus = num_processors();
+ tests = 0;
+
+ if (getauxval(AT_HWCAP) & HWCAP_GCS) {
+ /* One extra thread, trying to trigger migrations */
+ gcs_threads = cpus + 1;
+ tests += gcs_threads;
+ } else {
+ gcs_threads = 0;
+ }
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ ksft_print_msg("%d CPUs, %d GCS threads\n",
+ cpus, gcs_threads);
+
+ if (!tests)
+ ksft_exit_skip("No tests scheduled\n");
+
+ if (timeout > 0)
+ ksft_print_msg("Will run for %ds\n", timeout);
+ else
+ ksft_print_msg("Will run until terminated\n");
+
+ children = calloc(sizeof(*children), tests);
+ if (!children)
+ ksft_exit_fail_msg("Unable to allocate child data\n");
+
+ ret = epoll_create1(EPOLL_CLOEXEC);
+ if (ret < 0)
+ ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n",
+ strerror(errno), ret);
+ epoll_fd = ret;
+
+ /* Create a pipe which children will block on before execing */
+ ret = pipe(startup_pipe);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* Get signal handers ready before we start any children */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_exit_signal;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGINT, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n",
+ strerror(errno), errno);
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n",
+ strerror(errno), errno);
+ sa.sa_sigaction = handle_child_signal;
+ ret = sigaction(SIGCHLD, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ evs = calloc(tests, sizeof(*evs));
+ if (!evs)
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
+ tests);
+
+ for (i = 0; i < gcs_threads; i++)
+ start_thread(&children[i], i);
+
+ /*
+ * All children started, close the startup pipe and let them
+ * run.
+ */
+ close(startup_pipe[0]);
+ close(startup_pipe[1]);
+
+ timeout *= 10;
+ for (;;) {
+ /* Did we get a signal asking us to exit? */
+ if (terminate)
+ break;
+
+ /*
+ * Timeout is counted in 100ms with no output, the
+ * tests print during startup then are silent when
+ * running so this should ensure they all ran enough
+ * to install the signal handler, this is especially
+ * useful in emulation where we will both be slow and
+ * likely to have a large set of VLs.
+ */
+ ret = epoll_wait(epoll_fd, evs, tests, 100);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ /* Output? */
+ if (ret > 0) {
+ for (i = 0; i < ret; i++) {
+ child_output(evs[i].data.ptr, evs[i].events,
+ false);
+ }
+ continue;
+ }
+
+ /* Otherwise epoll_wait() timed out */
+
+ /*
+ * If the child processes have not produced output they
+ * aren't actually running the tests yet.
+ */
+ if (!all_children_started) {
+ seen_children = 0;
+
+ for (i = 0; i < num_children; i++)
+ if (children[i].output_seen ||
+ children[i].exited)
+ seen_children++;
+
+ if (seen_children != num_children) {
+ ksft_print_msg("Waiting for %d children\n",
+ num_children - seen_children);
+ continue;
+ }
+
+ all_children_started = true;
+ }
+
+ ksft_print_msg("Sending signals, timeout remaining: %d00ms\n",
+ timeout);
+
+ for (i = 0; i < num_children; i++)
+ child_tickle(&children[i]);
+
+ /* Negative timeout means run indefinitely */
+ if (timeout < 0)
+ continue;
+ if (--timeout == 0)
+ break;
+ }
+
+ ksft_print_msg("Finishing up...\n");
+ terminate = true;
+
+ for (i = 0; i < tests; i++)
+ child_stop(&children[i]);
+
+ drain_output(false);
+
+ for (i = 0; i < tests; i++)
+ child_cleanup(&children[i]);
+
+ drain_output(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h
new file mode 100644
index 000000000000..c99a6b39ac14
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcs-util.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Limited.
+ */
+
+#ifndef GCS_UTIL_H
+#define GCS_UTIL_H
+
+#include <stdbool.h>
+
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 453
+#endif
+
+#ifndef __NR_prctl
+#define __NR_prctl 167
+#endif
+
+#ifndef NT_ARM_GCS
+#define NT_ARM_GCS 0x410
+
+struct user_gcs {
+ __u64 features_enabled;
+ __u64 features_locked;
+ __u64 gcspr_el0;
+};
+#endif
+
+/* Shadow Stack/Guarded Control Stack interface */
+#define PR_GET_SHADOW_STACK_STATUS 74
+#define PR_SET_SHADOW_STACK_STATUS 75
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+#define PR_SHADOW_STACK_ALL_MODES \
+ PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
+
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
+#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */
+
+#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL)
+#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL)
+#define GCS_CAP_VALID_TOKEN 1
+#define GCS_CAP_IN_PROGRESS_TOKEN 5
+
+#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
+
+static inline unsigned long *get_gcspr(void)
+{
+ unsigned long *gcspr;
+
+ asm volatile(
+ "mrs %0, S3_3_C2_C5_1"
+ : "=r" (gcspr)
+ :
+ : "cc");
+
+ return gcspr;
+}
+
+static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt)
+{
+ asm volatile (
+ "sys #3, C7, C7, #2, %0\n"
+ :
+ : "rZ" (Xt)
+ : "memory");
+}
+
+static inline unsigned long __attribute__((always_inline)) *gcsss2(void)
+{
+ unsigned long *Xt;
+
+ asm volatile(
+ "SYSL %0, #3, C7, C7, #3\n"
+ : "=r" (Xt)
+ :
+ : "memory");
+
+ return Xt;
+}
+
+static inline bool chkfeat_gcs(void)
+{
+ register long val __asm__ ("x16") = 1;
+
+ /* CHKFEAT x16 */
+ asm volatile(
+ "hint #0x28\n"
+ : "=r" (val)
+ : "r" (val));
+
+ return val != 1;
+}
+
+#endif
diff --git a/tools/testing/selftests/arm64/gcs/gcspushm.S b/tools/testing/selftests/arm64/gcs/gcspushm.S
new file mode 100644
index 000000000000..bbe17c1325ac
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcspushm.S
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright 2024 Arm Limited
+//
+// Give ourselves GCS push permissions then use them
+
+#include <asm/unistd.h>
+
+/* Shadow Stack/Guarded Control Stack interface */
+#define PR_GET_SHADOW_STACK_STATUS 74
+#define PR_SET_SHADOW_STACK_STATUS 75
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+#define KSFT_SKIP 4
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+.globl putc
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+.globl puts
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", @progbits, 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+.globl _start
+function _start
+ // Run with GCS
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH
+ mov x2, xzr
+ mov x3, xzr
+ mov x4, xzr
+ mov x5, xzr
+ mov x8, #__NR_prctl
+ svc #0
+ cbz x0, 1f
+ puts "Failed to enable GCS with push permission\n"
+ mov x0, #KSFT_SKIP
+ b 2f
+1:
+ sys #3, c7, c7, #0, x0 // GCSPUSHM
+ sysl x0, #3, c7, c7, #1 // GCSPOPM
+
+ mov x0, #0
+2:
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/gcs/gcsstr.S b/tools/testing/selftests/arm64/gcs/gcsstr.S
new file mode 100644
index 000000000000..a42bba6e30b1
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/gcsstr.S
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright 2024 Arm Limited
+//
+// Give ourselves GCS write permissions then use them
+
+#include <asm/unistd.h>
+
+/* Shadow Stack/Guarded Control Stack interface */
+#define PR_GET_SHADOW_STACK_STATUS 74
+#define PR_SET_SHADOW_STACK_STATUS 75
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+#define GCSPR_EL0 S3_3_C2_C5_1
+
+#define KSFT_SKIP 4
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+.globl putc
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+.globl puts
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", @progbits, 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+.globl _start
+function _start
+ // Run with GCS
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE
+ mov x2, xzr
+ mov x3, xzr
+ mov x4, xzr
+ mov x5, xzr
+ mov x8, #__NR_prctl
+ svc #0
+ cbz x0, 1f
+ puts "Failed to enable GCS with write permission\n"
+ mov x0, #KSFT_SKIP
+ b 2f
+1:
+ mrs x0, GCSPR_EL0
+ sub x0, x0, #8
+ .inst 0xd91f1c01 // GCSSTR x1, x0
+
+ mov x0, #0
+2:
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c
new file mode 100644
index 000000000000..17b2fabfec38
--- /dev/null
+++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c
@@ -0,0 +1,728 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <stdbool.h>
+
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/uio.h>
+
+#include <asm/hwcap.h>
+#include <asm/mman.h>
+
+#include <linux/compiler.h>
+
+#include "kselftest_harness.h"
+
+#include "gcs-util.h"
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = 0; \
+ register long _arg4 __asm__ ("x3") = 0; \
+ register long _arg5 __asm__ ("x4") = 0; \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_arg3), "r"(_arg4), \
+ "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+static noinline void gcs_recurse(int depth)
+{
+ if (depth)
+ gcs_recurse(depth - 1);
+
+ /* Prevent tail call optimization so we actually recurse */
+ asm volatile("dsb sy" : : : "memory");
+}
+
+/* Smoke test that a function call and return works*/
+TEST(can_call_function)
+{
+ gcs_recurse(0);
+}
+
+static void *gcs_test_thread(void *arg)
+{
+ int ret;
+ unsigned long mode;
+
+ /*
+ * Some libcs don't seem to fill unused arguments with 0 but
+ * the kernel validates this so we supply all 5 arguments.
+ */
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
+ if (ret != 0) {
+ ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret);
+ return NULL;
+ }
+
+ if (!(mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in thread, mode is %lu\n",
+ mode);
+ return NULL;
+ }
+
+ /* Just in case... */
+ gcs_recurse(0);
+
+ /* Use a non-NULL value to indicate a pass */
+ return &gcs_test_thread;
+}
+
+/* Verify that if we start a new thread it has GCS enabled */
+TEST(gcs_enabled_thread)
+{
+ pthread_t thread;
+ void *thread_ret;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, gcs_test_thread, NULL);
+ ASSERT_TRUE(ret == 0);
+ if (ret != 0)
+ return;
+
+ ret = pthread_join(thread, &thread_ret);
+ ASSERT_TRUE(ret == 0);
+ if (ret != 0)
+ return;
+
+ ASSERT_TRUE(thread_ret != NULL);
+}
+
+/* Read the GCS until we find the terminator */
+TEST(gcs_find_terminator)
+{
+ unsigned long *gcs, *cur;
+
+ gcs = get_gcspr();
+ cur = gcs;
+ while (*cur)
+ cur++;
+
+ ksft_print_msg("GCS in use from %p-%p\n", gcs, cur);
+
+ /*
+ * We should have at least whatever called into this test so
+ * the two pointer should differ.
+ */
+ ASSERT_TRUE(gcs != cur);
+}
+
+/*
+ * We can access a GCS via ptrace
+ *
+ * This could usefully have a fixture but note that each test is
+ * fork()ed into a new child whcih causes issues. Might be better to
+ * lift at least some of this out into a separate, non-harness, test
+ * program.
+ */
+TEST(ptrace_read_write)
+{
+ pid_t child, pid;
+ int ret, status;
+ siginfo_t si;
+ uint64_t val, rval, gcspr;
+ struct user_gcs child_gcs;
+ struct iovec iov, local_iov, remote_iov;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("fork() failed: %d (%s)\n",
+ errno, strerror(errno));
+ ASSERT_NE(child, -1);
+ }
+
+ if (child == 0) {
+ /*
+ * In child, make sure there's something on the stack and
+ * ask to be traced.
+ */
+ gcs_recurse(0);
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME %s",
+ strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP) %s",
+ strerror(errno));
+
+ return;
+ }
+
+ ksft_print_msg("Child: %d\n", child);
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ ksft_print_msg("wait() failed: %s",
+ strerror(errno));
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH) {
+ ASSERT_NE(errno, ESRCH);
+ return;
+ }
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_print_msg("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH) {
+ ASSERT_NE(errno, ESRCH);
+ return;
+ }
+
+ ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno));
+ goto error;
+ }
+ }
+
+ /* Where is the child GCS? */
+ iov.iov_base = &child_gcs;
+ iov.iov_len = sizeof(child_gcs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read child GCS state: %s (%d)\n",
+ strerror(errno), errno);
+ goto error;
+ }
+
+ /* We should have inherited GCS over fork(), confirm */
+ if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) {
+ ASSERT_TRUE(child_gcs.features_enabled &
+ PR_SHADOW_STACK_ENABLE);
+ goto error;
+ }
+
+ gcspr = child_gcs.gcspr_el0;
+ ksft_print_msg("Child GCSPR 0x%lx, flags %llx, locked %llx\n",
+ gcspr, child_gcs.features_enabled,
+ child_gcs.features_locked);
+
+ /* Ideally we'd cross check with the child memory map */
+
+ errno = 0;
+ val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL);
+ ret = errno;
+ if (ret != 0)
+ ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n",
+ strerror(ret), ret);
+ EXPECT_EQ(ret, 0);
+
+ /* The child should be in a function, the GCSPR shouldn't be 0 */
+ EXPECT_NE(val, 0);
+
+ /* Same thing via process_vm_readv() */
+ local_iov.iov_base = &rval;
+ local_iov.iov_len = sizeof(rval);
+ remote_iov.iov_base = (void *)gcspr;
+ remote_iov.iov_len = sizeof(rval);
+ ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0);
+ if (ret == -1)
+ ksft_print_msg("process_vm_readv() failed: %s (%d)\n",
+ strerror(errno), errno);
+ EXPECT_EQ(ret, sizeof(rval));
+ EXPECT_EQ(val, rval);
+
+ /* Write data via a peek */
+ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL);
+ if (ret == -1)
+ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n",
+ strerror(errno), errno);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL));
+
+ /* Restore what we had before */
+ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val);
+ if (ret == -1)
+ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n",
+ strerror(errno), errno);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL));
+
+ /* That's all, folks */
+ kill(child, SIGKILL);
+ return;
+
+error:
+ kill(child, SIGKILL);
+ ASSERT_FALSE(true);
+}
+
+FIXTURE(map_gcs)
+{
+ unsigned long *stack;
+};
+
+FIXTURE_VARIANT(map_gcs)
+{
+ size_t stack_size;
+ unsigned long flags;
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker)
+{
+ .stack_size = 2 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s2k_cap)
+{
+ .stack_size = 2 * 1024,
+ .flags = SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s2k_marker)
+{
+ .stack_size = 2 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s2k)
+{
+ .stack_size = 2 * 1024,
+ .flags = 0,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker)
+{
+ .stack_size = 4 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s4k_cap)
+{
+ .stack_size = 4 * 1024,
+ .flags = SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s3k_marker)
+{
+ .stack_size = 4 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s4k)
+{
+ .stack_size = 4 * 1024,
+ .flags = 0,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker)
+{
+ .stack_size = 16 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s16k_cap)
+{
+ .stack_size = 16 * 1024,
+ .flags = SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s16k_marker)
+{
+ .stack_size = 16 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s16k)
+{
+ .stack_size = 16 * 1024,
+ .flags = 0,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker)
+{
+ .stack_size = 64 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s64k_cap)
+{
+ .stack_size = 64 * 1024,
+ .flags = SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s64k_marker)
+{
+ .stack_size = 64 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s64k)
+{
+ .stack_size = 64 * 1024,
+ .flags = 0,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker)
+{
+ .stack_size = 128 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s128k_cap)
+{
+ .stack_size = 128 * 1024,
+ .flags = SHADOW_STACK_SET_TOKEN,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s128k_marker)
+{
+ .stack_size = 128 * 1024,
+ .flags = SHADOW_STACK_SET_MARKER,
+};
+
+FIXTURE_VARIANT_ADD(map_gcs, s128k)
+{
+ .stack_size = 128 * 1024,
+ .flags = 0,
+};
+
+FIXTURE_SETUP(map_gcs)
+{
+ self->stack = (void *)syscall(__NR_map_shadow_stack, 0,
+ variant->stack_size,
+ variant->flags);
+ ASSERT_FALSE(self->stack == MAP_FAILED);
+ ksft_print_msg("Allocated stack from %p-%p\n", self->stack,
+ self->stack + variant->stack_size);
+}
+
+FIXTURE_TEARDOWN(map_gcs)
+{
+ int ret;
+
+ if (self->stack != MAP_FAILED) {
+ ret = munmap(self->stack, variant->stack_size);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+/* The stack has a cap token */
+TEST_F(map_gcs, stack_capped)
+{
+ unsigned long *stack = self->stack;
+ size_t cap_index;
+
+ cap_index = (variant->stack_size / sizeof(unsigned long));
+
+ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
+ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
+ cap_index -= 2;
+ break;
+ case SHADOW_STACK_SET_TOKEN:
+ cap_index -= 1;
+ break;
+ case SHADOW_STACK_SET_MARKER:
+ case 0:
+ /* No cap, no test */
+ return;
+ }
+
+ ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index]));
+}
+
+/* The top of the stack is 0 */
+TEST_F(map_gcs, stack_terminated)
+{
+ unsigned long *stack = self->stack;
+ size_t term_index;
+
+ if (!(variant->flags & SHADOW_STACK_SET_MARKER))
+ return;
+
+ term_index = (variant->stack_size / sizeof(unsigned long)) - 1;
+
+ ASSERT_EQ(stack[term_index], 0);
+}
+
+/* Writes should fault */
+TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV)
+{
+ self->stack[0] = 0;
+}
+
+/* Put it all together, we can safely switch to and from the stack */
+TEST_F(map_gcs, stack_switch)
+{
+ size_t cap_index;
+ cap_index = (variant->stack_size / sizeof(unsigned long));
+ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0;
+
+ /* Skip over the stack terminator and point at the cap */
+ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
+ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
+ cap_index -= 2;
+ break;
+ case SHADOW_STACK_SET_TOKEN:
+ cap_index -= 1;
+ break;
+ case SHADOW_STACK_SET_MARKER:
+ case 0:
+ /* No cap, no test */
+ return;
+ }
+ pivot_gcspr_el0 = &self->stack[cap_index];
+
+ /* Pivot to the new GCS */
+ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n",
+ pivot_gcspr_el0, get_gcspr(),
+ *pivot_gcspr_el0);
+ gcsss1(pivot_gcspr_el0);
+ orig_gcspr_el0 = gcsss2();
+ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n",
+ get_gcspr(), orig_gcspr_el0,
+ *pivot_gcspr_el0);
+
+ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr());
+
+ /* New GCS must be in the new buffer */
+ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack);
+ ASSERT_TRUE((unsigned long)get_gcspr() <=
+ (unsigned long)self->stack + variant->stack_size);
+
+ /* We should be able to use all but 2 slots of the new stack */
+ ksft_print_msg("Recursing %zu levels\n", cap_index - 1);
+ gcs_recurse(cap_index - 1);
+
+ /* Pivot back to the original GCS */
+ gcsss1(orig_gcspr_el0);
+ pivot_gcspr_el0 = gcsss2();
+
+ gcs_recurse(0);
+ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr());
+}
+
+/* We fault if we try to go beyond the end of the stack */
+TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV)
+{
+ size_t cap_index;
+ cap_index = (variant->stack_size / sizeof(unsigned long));
+ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0;
+
+ /* Skip over the stack terminator and point at the cap */
+ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
+ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
+ cap_index -= 2;
+ break;
+ case SHADOW_STACK_SET_TOKEN:
+ cap_index -= 1;
+ break;
+ case SHADOW_STACK_SET_MARKER:
+ case 0:
+ /* No cap, no test but we need to SEGV to avoid a false fail */
+ orig_gcspr_el0 = get_gcspr();
+ *orig_gcspr_el0 = 0;
+ return;
+ }
+ pivot_gcspr_el0 = &self->stack[cap_index];
+
+ /* Pivot to the new GCS */
+ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n",
+ pivot_gcspr_el0, get_gcspr(),
+ *pivot_gcspr_el0);
+ gcsss1(pivot_gcspr_el0);
+ orig_gcspr_el0 = gcsss2();
+ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n",
+ pivot_gcspr_el0, orig_gcspr_el0,
+ *pivot_gcspr_el0);
+
+ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr());
+
+ /* New GCS must be in the new buffer */
+ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack);
+ ASSERT_TRUE((unsigned long)get_gcspr() <=
+ (unsigned long)self->stack + variant->stack_size);
+
+ /* Now try to recurse, we should fault doing this. */
+ ksft_print_msg("Recursing %zu levels...\n", cap_index + 1);
+ gcs_recurse(cap_index + 1);
+ ksft_print_msg("...done\n");
+
+ /* Clean up properly to try to guard against spurious passes. */
+ gcsss1(orig_gcspr_el0);
+ pivot_gcspr_el0 = gcsss2();
+ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr());
+}
+
+FIXTURE(map_invalid_gcs)
+{
+};
+
+FIXTURE_VARIANT(map_invalid_gcs)
+{
+ size_t stack_size;
+};
+
+FIXTURE_SETUP(map_invalid_gcs)
+{
+}
+
+FIXTURE_TEARDOWN(map_invalid_gcs)
+{
+}
+
+/* GCS must be larger than 16 bytes */
+FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small)
+{
+ .stack_size = 8,
+};
+
+/* GCS size must be 16 byte aligned */
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 };
+FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 };
+
+TEST_F(map_invalid_gcs, do_map)
+{
+ void *stack;
+
+ stack = (void *)syscall(__NR_map_shadow_stack, 0,
+ variant->stack_size, 0);
+ ASSERT_TRUE(stack == MAP_FAILED);
+ if (stack != MAP_FAILED)
+ munmap(stack, variant->stack_size);
+}
+
+FIXTURE(invalid_mprotect)
+{
+ unsigned long *stack;
+ size_t stack_size;
+};
+
+FIXTURE_VARIANT(invalid_mprotect)
+{
+ unsigned long flags;
+};
+
+FIXTURE_SETUP(invalid_mprotect)
+{
+ self->stack_size = sysconf(_SC_PAGE_SIZE);
+ self->stack = (void *)syscall(__NR_map_shadow_stack, 0,
+ self->stack_size, 0);
+ ASSERT_FALSE(self->stack == MAP_FAILED);
+ ksft_print_msg("Allocated stack from %p-%p\n", self->stack,
+ self->stack + self->stack_size);
+}
+
+FIXTURE_TEARDOWN(invalid_mprotect)
+{
+ int ret;
+
+ if (self->stack != MAP_FAILED) {
+ ret = munmap(self->stack, self->stack_size);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+FIXTURE_VARIANT_ADD(invalid_mprotect, exec)
+{
+ .flags = PROT_EXEC,
+};
+
+TEST_F(invalid_mprotect, do_map)
+{
+ int ret;
+
+ ret = mprotect(self->stack, self->stack_size, variant->flags);
+ ASSERT_EQ(ret, -1);
+}
+
+TEST_F(invalid_mprotect, do_map_read)
+{
+ int ret;
+
+ ret = mprotect(self->stack, self->stack_size,
+ variant->flags | PROT_READ);
+ ASSERT_EQ(ret, -1);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long gcs_mode;
+ int ret;
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
+ /*
+ * Force shadow stacks on, our tests *should* be fine with or
+ * without libc support and with or without this having ended
+ * up tagged for GCS and enabled by the dynamic linker. We
+ * can't use the libc prctl() function since we can't return
+ * from enabling the stack.
+ */
+ ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode);
+ if (ret) {
+ ksft_print_msg("Failed to read GCS state: %d\n", ret);
+ return EXIT_FAILURE;
+ }
+
+ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
+ gcs_mode = PR_SHADOW_STACK_ENABLE;
+ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
+ gcs_mode);
+ if (ret) {
+ ksft_print_msg("Failed to configure GCS: %d\n", ret);
+ return EXIT_FAILURE;
+ }
+ }
+
+ /* Avoid returning in case libc doesn't understand GCS */
+ exit(test_harness_run(argc, argv));
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index bc3ac63f3314..052d0f9f92b3 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,6 +1,8 @@
check_buffer_fill
+check_gcr_el1_cswitch
check_tags_inclusion
check_child_memory
check_mmap_options
+check_prctl
check_ksm_options
check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 0b3af552632a..0d7ac3db8390 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -1,29 +1,41 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2020 ARM Limited
-CFLAGS += -std=gnu99 -I. -lpthread
+CFLAGS += -std=gnu99 -I. -pthread
+LDFLAGS += -pthread
SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
PROGS := $(patsubst %.c,%,$(SRCS))
-#Add mte compiler option
-ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
-CFLAGS += -march=armv8.5-a+memtag
+ifeq ($(LLVM),)
+# For GCC check that the toolchain has MTE support.
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
endif
#check if the compiler works well
-mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.5-a+memtag -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+else
+
+# All supported clang versions also support MTE.
+mte_cc_support := 1
+
+endif
ifeq ($(mte_cc_support),1)
# Generated binaries to be installed by top KSFT script
TEST_GEN_PROGS := $(PROGS)
-# Get Kernel headers installed and use them.
-KSFT_KHDR_INSTALL := 1
+else
+ $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
+ $(warning test program "mte" will not be created.)
endif
# Include KSFT lib.mk.
include ../../lib.mk
ifeq ($(mte_cc_support),1)
-$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S
endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index 75fc482d63b6..ff4e07503349 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -31,8 +31,8 @@ static int check_buffer_by_byte(int mem_type, int mode)
int i, j, item;
bool err;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
- item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
@@ -68,8 +68,8 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
bool err;
char *und_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
- item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
underflow_range, 0);
@@ -91,7 +91,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
for (j = 0; j < sizes[i]; j++) {
if (ptr[j] != '1') {
err = true;
- ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n",
j, ptr);
break;
}
@@ -164,8 +164,8 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode,
size_t tagged_size, overflow_size;
char *over_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
- item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
0, overflow_range);
@@ -189,7 +189,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode,
for (j = 0; j < sizes[i]; j++) {
if (ptr[j] != '1') {
err = true;
- ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n",
j, ptr);
break;
}
@@ -337,8 +337,8 @@ static int check_buffer_by_block(int mem_type, int mode)
{
int i, item, result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
- item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ item = ARRAY_SIZE(sizes);
cur_mte_cxt.fault_valid = false;
for (i = 0; i < item; i++) {
result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
@@ -366,9 +366,9 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping)
{
char *ptr;
int run, fd;
- int total = sizeof(sizes)/sizeof(int);
+ int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
/* check initial tags for anonymous mmap */
ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
@@ -404,7 +404,7 @@ int main(int argc, char *argv[])
{
int err;
size_t page_size = getpagesize();
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
sizes[item - 3] = page_size - 1;
sizes[item - 2] = page_size;
@@ -415,7 +415,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(20);
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 43bd94f853ba..5e97ee792e4d 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -85,10 +85,10 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
{
char *ptr;
int run, result;
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
- item = sizeof(sizes)/sizeof(int);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = ARRAY_SIZE(sizes);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < item; run++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
UNDERFLOW, OVERFLOW);
@@ -107,9 +107,9 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
{
char *ptr, *map_ptr;
int run, fd, map_size, result = KSFT_PASS;
- int total = sizeof(sizes)/sizeof(int);
+ int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -144,7 +144,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
int main(int argc, char *argv[])
{
int err;
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
page_size = getpagesize();
if (!page_size) {
@@ -160,8 +160,8 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
- mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(12);
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
index a876db1f096a..325bca0de0f6 100644
--- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -19,17 +19,6 @@
#include "kselftest.h"
#include "mte_common_util.h"
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
-# define PR_MTE_TCF_SHIFT 1
-# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TAG_SHIFT 3
-# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
-
#include "mte_def.h"
#define NUM_ITERATIONS 1024
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
new file mode 100644
index 000000000000..aad1234c7e0f
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2024 Ampere Computing LLC
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TAG_CHECK_ON 0
+#define TAG_CHECK_OFF 1
+
+static unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+static bool is_hugetlb_allocated(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return false;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugetlb: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+
+ if (hps > 0)
+ return true;
+
+ return false;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+ FILE *f;
+
+ f = fopen(str, "w");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return;
+ }
+ fprintf(f, "%lu", val);
+ fclose(f);
+}
+
+static void allocate_hugetlb()
+{
+ write_sysfs("/proc/sys/vm/nr_hugepages", 2);
+}
+
+static void free_hugetlb()
+{
+ write_sysfs("/proc/sys/vm/nr_hugepages", 0);
+}
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+ int i, parent_tag, child_tag, fault, child_status;
+ pid_t child;
+
+ parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+ fault = 0;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("FAIL: child process creation\n");
+ return KSFT_FAIL;
+ } else if (child == 0) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ /* Do copy on write */
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ for (i = 0; i < size; i += MT_GRANULE_SIZE) {
+ child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (parent_tag != child_tag) {
+ ksft_print_msg("FAIL: child mte tag (%d) mismatch\n", i);
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ }
+check_child_tag_inheritance_err:
+ _exit(fault);
+ }
+ /* Wait for child process to terminate */
+ wait(&child_status);
+ if (WIFEXITED(child_status))
+ fault = WEXITSTATUS(child_status);
+ else
+ fault = 1;
+ return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int result;
+ unsigned long map_size;
+
+ map_size = default_huge_page_size();
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)map_ptr, map_size);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)map_ptr, map_size);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+ munmap((void *)map_ptr, map_size);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, map_size, mode, tag_check);
+ mte_clear_tags((void *)ptr, map_size);
+ mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+ if (result == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+ char *map_ptr;
+ int prot_flag, result;
+ unsigned long map_size;
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ map_size = default_huge_page_size();
+ map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
+ 0, 0);
+ if (check_allocated_memory_range(map_ptr, map_size, mem_type,
+ 0, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type,
+ 0, 0);
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(map_ptr, map_size, mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, 0, 0);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int result;
+ unsigned long map_size;
+
+ map_size = default_huge_page_size();
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
+ ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
+ 0, 0);
+ if (check_allocated_memory_range(ptr, map_size, mem_type,
+ 0, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+ result = check_child_tag_inheritance(ptr, map_size, mode);
+ mte_free_memory_tag_range((void *)ptr, map_size, mem_type, 0, 0);
+ if (result == KSFT_FAIL)
+ return result;
+
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ void *map_ptr;
+ unsigned long map_size;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
+
+ allocate_hugetlb();
+
+ if (!is_hugetlb_allocated()) {
+ ksft_print_msg("ERR: Unable allocate hugetlb pages\n");
+ return KSFT_FAIL;
+ }
+
+ /* Check if MTE supports hugetlb mappings */
+ map_size = default_huge_page_size();
+ map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ if (map_ptr == MAP_FAILED)
+ ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n");
+ else
+ munmap(map_ptr, map_size);
+
+ /* Set test plan */
+ ksft_set_plan(12);
+
+ mte_enable_pstate_tco();
+
+ evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF),
+ "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check off\n");
+
+ mte_disable_pstate_tco();
+ evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF),
+ "Check hugetlb memory with private mapping, no error mode, mmap memory and tag check off\n");
+
+ evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON),
+ "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON),
+ "Check hugetlb memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON),
+ "Check hugetlb memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON),
+ "Check hugetlb memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+ evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check child hugetlb memory with private mapping, async error mode and mmap memory\n");
+ evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
+ "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ free_hugetlb();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 3b23c4d61d38..0cf5faef1724 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str)
ksft_print_msg("ERR: missing %s\n", str);
return 0;
}
- fscanf(f, "%lu", &val);
+ if (fscanf(f, "%lu", &val) != 1) {
+ ksft_print_msg("ERR: parsing %s\n", str);
+ val = 0;
+ }
fclose(f);
return val;
}
@@ -103,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping)
return err;
}
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -138,8 +141,8 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
index a04b12c21ac9..c100af3012cb 100644
--- a/tools/testing/selftests/arm64/mte/check_mmap_options.c
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -23,6 +24,35 @@
#define OVERFLOW MT_GRANULE_SIZE
#define TAG_CHECK_ON 0
#define TAG_CHECK_OFF 1
+#define ATAG_CHECK_ON 1
+#define ATAG_CHECK_OFF 0
+
+#define TEST_NAME_MAX 256
+
+enum mte_mem_check_type {
+ CHECK_ANON_MEM = 0,
+ CHECK_FILE_MEM = 1,
+ CHECK_CLEAR_PROT_MTE = 2,
+};
+
+enum mte_tag_op_type {
+ TAG_OP_ALL = 0,
+ TAG_OP_STONLY = 1,
+};
+
+struct check_mmap_testcase {
+ int check_type;
+ int mem_type;
+ int mte_sync;
+ int mapping;
+ int tag_check;
+ int atag_check;
+ int tag_op;
+ bool enable_tco;
+};
+
+#define TAG_OP_ALL 0
+#define TAG_OP_STONLY 1
static size_t page_size;
static int sizes[] = {
@@ -30,8 +60,17 @@ static int sizes[] = {
/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
};
-static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+static int check_mte_memory(char *ptr, int size, int mode,
+ int tag_check,int atag_check, int tag_op)
{
+ char buf[MT_GRANULE_SIZE];
+
+ if (!mtefar_support && atag_check == ATAG_CHECK_ON)
+ return KSFT_SKIP;
+
+ if (atag_check == ATAG_CHECK_ON)
+ ptr = mte_insert_atag(ptr);
+
mte_initialize_current_context(mode, (uintptr_t)ptr, size);
memset(ptr, '1', size);
mte_wait_after_trig();
@@ -54,17 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
return KSFT_FAIL;
+ if (tag_op == TAG_OP_STONLY) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memcpy(buf, ptr + size, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+ }
+
return KSFT_PASS;
}
-static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, result, map_size;
- int item = sizeof(sizes)/sizeof(int);
+ int item = ARRAY_SIZE(sizes);
- item = sizeof(sizes)/sizeof(int);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < item; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
@@ -80,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i
munmap((void *)map_ptr, map_size);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
mte_free_memory((void *)map_ptr, map_size, mem_type, false);
- if (result == KSFT_FAIL)
- return KSFT_FAIL;
+ if (result != KSFT_PASS)
+ return result;
}
return KSFT_PASS;
}
-static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_file_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, fd, map_size;
- int total = sizeof(sizes)/sizeof(int);
+ int total = ARRAY_SIZE(sizes);
int result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -118,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
munmap((void *)map_ptr, map_size);
close(fd);
- if (result == KSFT_FAIL)
- break;
+ if (result != KSFT_PASS)
+ return result;
}
- return result;
+ return KSFT_PASS;
}
-static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check)
{
char *ptr, *map_ptr;
int run, prot_flag, result, fd, map_size;
- int total = sizeof(sizes)/sizeof(int);
+ int total = ARRAY_SIZE(sizes);
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -151,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
fd = create_temp_file();
if (fd == -1)
@@ -175,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
close(fd);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
}
return KSFT_PASS;
}
+const char *format_test_name(struct check_mmap_testcase *tc)
+{
+ static char test_name[TEST_NAME_MAX];
+ const char *check_type_str;
+ const char *mem_type_str;
+ const char *sync_str;
+ const char *mapping_str;
+ const char *tag_check_str;
+ const char *atag_check_str;
+ const char *tag_op_str;
+
+ switch (tc->check_type) {
+ case CHECK_ANON_MEM:
+ check_type_str = "anonymous memory";
+ break;
+ case CHECK_FILE_MEM:
+ check_type_str = "file memory";
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ check_type_str = "clear PROT_MTE flags";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mem_type) {
+ case USE_MMAP:
+ mem_type_str = "mmap";
+ break;
+ case USE_MPROTECT:
+ mem_type_str = "mmap/mprotect";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mte_sync) {
+ case MTE_NONE_ERR:
+ sync_str = "no error";
+ break;
+ case MTE_SYNC_ERR:
+ sync_str = "sync error";
+ break;
+ case MTE_ASYNC_ERR:
+ sync_str = "async error";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mapping) {
+ case MAP_SHARED:
+ mapping_str = "shared";
+ break;
+ case MAP_PRIVATE:
+ mapping_str = "private";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->tag_check) {
+ case TAG_CHECK_ON:
+ tag_check_str = "tag check on";
+ break;
+ case TAG_CHECK_OFF:
+ tag_check_str = "tag check off";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->atag_check) {
+ case ATAG_CHECK_ON:
+ atag_check_str = "with address tag [63:60]";
+ break;
+ case ATAG_CHECK_OFF:
+ atag_check_str = "without address tag [63:60]";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, sizeof(test_name),
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str);
+
+ switch (tc->tag_op) {
+ case TAG_OP_ALL:
+ tag_op_str = "";
+ break;
+ case TAG_OP_STONLY:
+ tag_op_str = " / store-only";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, TEST_NAME_MAX,
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str, tag_op_str);
+
+ return test_name;
+}
+
int main(int argc, char *argv[])
{
- int err;
- int item = sizeof(sizes)/sizeof(int);
+ int err, i;
+ int item = ARRAY_SIZE(sizes);
+ struct check_mmap_testcase test_cases[]= {
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ };
err = mte_default_setup();
if (err)
@@ -201,64 +957,51 @@ int main(int argc, char *argv[])
sizes[item - 2] = page_size;
sizes[item - 1] = page_size + 1;
- /* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
-
/* Set test plan */
- ksft_set_plan(22);
-
- mte_enable_pstate_tco();
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
-
- mte_disable_pstate_tco();
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
- evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+ ksft_set_plan(ARRAY_SIZE(test_cases));
+
+ for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) {
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+ mte_register_signal(SIGSEGV, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+
+ if (test_cases[i].enable_tco)
+ mte_enable_pstate_tco();
+ else
+ mte_disable_pstate_tco();
+
+ switch (test_cases[i].check_type) {
+ case CHECK_ANON_MEM:
+ evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_FILE_MEM:
+ evaluate_test(check_file_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].atag_check),
+ format_test_name(&test_cases[i]));
+ break;
+ default:
+ exit(KSFT_FAIL);
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
new file mode 100644
index 000000000000..f7f320defa7b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 ARM Limited
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
+static int set_tagged_addr_ctrl(int val)
+{
+ int ret;
+
+ ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+static int get_tagged_addr_ctrl(void)
+{
+ int ret;
+
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+/*
+ * Read the current mode without having done any configuration, should
+ * run first.
+ */
+void check_basic_read(void)
+{
+ int ret;
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("check_basic_read\n");
+ return;
+ }
+
+ if (ret & PR_MTE_TCF_SYNC)
+ ksft_print_msg("SYNC enabled\n");
+ if (ret & PR_MTE_TCF_ASYNC)
+ ksft_print_msg("ASYNC enabled\n");
+
+ /* Any configuration is valid */
+ ksft_test_result_pass("check_basic_read\n");
+}
+
+/*
+ * Attempt to set a specified combination of modes.
+ */
+void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask)
+{
+ int ret;
+
+ if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ ret = set_tagged_addr_ctrl(mask);
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) {
+ ksft_test_result_pass("%s\n", name);
+ } else {
+ ksft_print_msg("Got %x, expected %x\n",
+ (ret & (int)PR_MTE_TCF_MASK), mask);
+ ksft_test_result_fail("%s\n", name);
+ }
+}
+
+struct mte_mode {
+ int mask;
+ int hwcap2;
+ int hwcap3;
+ const char *name;
+} mte_modes[] = {
+ { PR_MTE_TCF_NONE, 0, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" },
+ { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" },
+};
+
+int main(void)
+{
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(mte_modes));
+
+ check_basic_read();
+ for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3,
+ mte_modes[i].mask);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index deaef1f61076..4b764f2a8185 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -23,10 +23,13 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
{
mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
/* Check the validity of the tagged pointer */
- memset((void *)ptr, '1', BUFFER_SIZE);
+ memset(ptr, '1', BUFFER_SIZE);
mte_wait_after_trig();
- if (cur_mte_cxt.fault_valid)
+ if (cur_mte_cxt.fault_valid) {
+ ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
+ ptr, ptr + BUFFER_SIZE, mode);
return KSFT_FAIL;
+ }
/* Proceed further for nonzero tags */
if (!MT_FETCH_TAG((uintptr_t)ptr))
return KSFT_PASS;
@@ -34,30 +37,35 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
/* Check the validity outside the range */
ptr[BUFFER_SIZE] = '2';
mte_wait_after_trig();
- if (!cur_mte_cxt.fault_valid)
+ if (!cur_mte_cxt.fault_valid) {
+ ksft_print_msg("No valid fault recorded for %p in mode %x\n",
+ ptr, mode);
return KSFT_FAIL;
- else
+ } else {
return KSFT_PASS;
+ }
}
static int check_single_included_tags(int mem_type, int mode)
{
char *ptr;
- int tag, run, result = KSFT_PASS;
+ int tag, run, ret, result = KSFT_PASS;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false);
+ if (ret != 0)
+ result = KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
- ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
/* Check tag value */
if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
- ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%x\n",
MT_FETCH_TAG((uintptr_t)ptr),
MT_INCLUDE_VALID_TAG(tag));
result = KSFT_FAIL;
@@ -66,7 +74,7 @@ static int check_single_included_tags(int mem_type, int mode)
result = verify_mte_pointer_validity(ptr, mode);
}
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
@@ -76,20 +84,20 @@ static int check_multiple_included_tags(int mem_type, int mode)
int tag, run, result = KSFT_PASS;
unsigned long excl_mask = 0;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
excl_mask |= 1 << tag;
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false);
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
- ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
/* Check tag value */
if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
- ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%lx\n",
MT_FETCH_TAG((uintptr_t)ptr),
MT_INCLUDE_VALID_TAGS(excl_mask));
result = KSFT_FAIL;
@@ -98,21 +106,23 @@ static int check_multiple_included_tags(int mem_type, int mode)
result = verify_mte_pointer_validity(ptr, mode);
}
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
static int check_all_included_tags(int mem_type, int mode)
{
char *ptr;
- int run, result = KSFT_PASS;
+ int run, ret, result = KSFT_PASS;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false);
+ if (ret != 0)
+ return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -122,20 +132,22 @@ static int check_all_included_tags(int mem_type, int mode)
*/
result = verify_mte_pointer_validity(ptr, mode);
}
- mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
return result;
}
static int check_none_included_tags(int mem_type, int mode)
{
char *ptr;
- int run;
+ int run, ret;
- ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false);
+ if (ret != 0)
+ return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; run < RUNS; run++) {
ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -147,12 +159,12 @@ static int check_none_included_tags(int mem_type, int mode)
}
mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
/* Check the write validity of the untagged pointer */
- memset((void *)ptr, '1', BUFFER_SIZE);
+ memset(ptr, '1', BUFFER_SIZE);
mte_wait_after_trig();
if (cur_mte_cxt.fault_valid)
break;
}
- mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+ mte_free_memory(ptr, BUFFER_SIZE, mem_type, false);
if (cur_mte_cxt.fault_valid)
return KSFT_FAIL;
else
@@ -168,7 +180,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 4bfa80f2a8c3..fb7936c4e097 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -11,6 +12,7 @@
#include <string.h>
#include <ucontext.h>
#include <unistd.h>
+#include <sys/uio.h>
#include <sys/mman.h>
#include "kselftest.h"
@@ -19,21 +21,36 @@
static size_t page_sz;
-static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+#define TEST_NAME_MAX 100
+
+enum test_type {
+ READ_TEST,
+ WRITE_TEST,
+ READV_TEST,
+ WRITEV_TEST,
+ LAST_TEST,
+};
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+ int tag_offset, int tag_len,
+ enum test_type test_type)
{
int fd, i, err;
char val = 'A';
- size_t len, read_len;
+ ssize_t len, syscall_len;
void *ptr, *ptr_next;
+ int fileoff, ptroff, size;
+ int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
- err = KSFT_FAIL;
+ err = KSFT_PASS;
len = 2 * page_sz;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
fd = create_temp_file();
if (fd == -1)
return KSFT_FAIL;
for (i = 0; i < len; i++)
- write(fd, &val, sizeof(val));
+ if (write(fd, &val, sizeof(val)) != sizeof(val))
+ return KSFT_FAIL;
lseek(fd, 0, 0);
ptr = mte_allocate_memory(len, mem_type, mapping, true);
if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
@@ -42,9 +59,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
}
mte_initialize_current_context(mode, (uintptr_t)ptr, len);
/* Copy from file into buffer with valid tag */
- read_len = read(fd, ptr, len);
+ syscall_len = read(fd, ptr, len);
mte_wait_after_trig();
- if (cur_mte_cxt.fault_valid || read_len < len)
+ if (cur_mte_cxt.fault_valid || syscall_len < len)
goto usermem_acc_err;
/* Verify same pattern is read */
for (i = 0; i < len; i++)
@@ -53,36 +70,136 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
if (i < len)
goto usermem_acc_err;
- /* Tag the next half of memory with different value */
- ptr_next = (void *)((unsigned long)ptr + page_sz);
+ if (!tag_len)
+ tag_len = len - tag_offset;
+ /* Tag a part of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + tag_offset);
ptr_next = mte_insert_new_tag(ptr_next);
- mte_set_tag_address_range(ptr_next, page_sz);
+ mte_set_tag_address_range(ptr_next, tag_len);
- lseek(fd, 0, 0);
- /* Copy from file into buffer with invalid tag */
- read_len = read(fd, ptr, len);
- mte_wait_after_trig();
- /*
- * Accessing user memory in kernel with invalid tag should fail in sync
- * mode without fault but may not fail in async mode as per the
- * implemented MTE userspace support in Arm64 kernel.
- */
- if (mode == MTE_SYNC_ERR &&
- !cur_mte_cxt.fault_valid && read_len < len) {
- err = KSFT_PASS;
- } else if (mode == MTE_ASYNC_ERR &&
- !cur_mte_cxt.fault_valid && read_len == len) {
- err = KSFT_PASS;
+ for (fileoff = 0; fileoff < 16; fileoff++) {
+ for (ptroff = 0; ptroff < 16; ptroff++) {
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ size = sizes[i];
+ lseek(fd, 0, 0);
+
+ /* perform file operation on buffer with invalid tag */
+ switch (test_type) {
+ case READ_TEST:
+ syscall_len = read(fd, ptr + ptroff, size);
+ break;
+ case WRITE_TEST:
+ syscall_len = write(fd, ptr + ptroff, size);
+ break;
+ case READV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = readv(fd, iov, 1);
+ break;
+ }
+ case WRITEV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = writev(fd, iov, 1);
+ break;
+ }
+ case LAST_TEST:
+ goto usermem_acc_err;
+ }
+
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (cur_mte_cxt.fault_valid) {
+ goto usermem_acc_err;
+ }
+ if (mode == MTE_SYNC_ERR && syscall_len < len) {
+ /* test passed */
+ } else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
+ /* test passed */
+ } else {
+ goto usermem_acc_err;
+ }
+ }
+ }
}
+
+ goto exit;
+
usermem_acc_err:
+ err = KSFT_FAIL;
+exit:
mte_free_memory((void *)ptr, len, mem_type, true);
close(fd);
return err;
}
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+ const char* test_type;
+ const char* mte_type;
+ const char* map_type;
+
+ switch (type) {
+ case READ_TEST:
+ test_type = "read";
+ break;
+ case WRITE_TEST:
+ test_type = "write";
+ break;
+ case READV_TEST:
+ test_type = "readv";
+ break;
+ case WRITEV_TEST:
+ test_type = "writev";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (sync) {
+ case MTE_SYNC_ERR:
+ mte_type = "MTE_SYNC_ERR";
+ break;
+ case MTE_ASYNC_ERR:
+ mte_type = "MTE_ASYNC_ERR";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (map) {
+ case MAP_SHARED:
+ map_type = "MAP_SHARED";
+ break;
+ case MAP_PRIVATE:
+ map_type = "MAP_PRIVATE";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(name, name_len,
+ "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+ test_type, mte_type, map_type, len, offset);
+}
+
int main(int argc, char *argv[])
{
int err;
+ int t, s, m, l, o;
+ int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+ int maps[] = {MAP_SHARED, MAP_PRIVATE};
+ int tag_lens[] = {0, MT_GRANULE_SIZE};
+ int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+ char test_name[TEST_NAME_MAX];
page_sz = getpagesize();
if (!page_sz) {
@@ -94,20 +211,31 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
- ksft_set_plan(4);
+ ksft_set_plan(64);
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check memory access from kernel in sync mode, private mapping and mmap memory\n");
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
- "Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
-
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
- "Check memory access from kernel in async mode, private mapping and mmap memory\n");
- evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
- "Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+ for (t = 0; t < LAST_TEST; t++) {
+ for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+ for (m = 0; m < ARRAY_SIZE(maps); m++) {
+ for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+ for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+ int sync = mte_sync[s];
+ int map = maps[m];
+ int offset = tag_offsets[o];
+ int tag_len = tag_lens[l];
+ int res = check_usermem_access_fault(USE_MMAP, sync,
+ map, offset,
+ tag_len, t);
+ format_test_name(test_name, TEST_NAME_MAX,
+ t, sync, map, tag_len, offset);
+ evaluate_test(res, test_name);
+ }
+ }
+ }
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 39f8908988ea..397e57dd946a 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -6,6 +6,7 @@
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#include <unistd.h>
#include <linux/auxvec.h>
@@ -19,35 +20,64 @@
#include "mte_common_util.h"
#include "mte_def.h"
+#ifndef SA_EXPOSE_TAGBITS
+#define SA_EXPOSE_TAGBITS 0x00000800
+#endif
+
#define INIT_BUFFER_SIZE 256
struct mte_fault_cxt cur_mte_cxt;
+bool mtefar_support;
+bool mtestonly_support;
static unsigned int mte_cur_mode;
static unsigned int mte_cur_pstate_tco;
+static bool mte_cur_stonly;
void mte_default_handler(int signum, siginfo_t *si, void *uc)
{
+ struct sigaction sa;
unsigned long addr = (unsigned long)si->si_addr;
+ unsigned char si_tag, si_atag;
+
+ sigaction(signum, NULL, &sa);
+
+ if (sa.sa_flags & SA_EXPOSE_TAGBITS) {
+ si_tag = MT_FETCH_TAG(addr);
+ si_atag = MT_FETCH_ATAG(addr);
+ addr = MT_CLEAR_TAGS(addr);
+ } else {
+ si_tag = 0;
+ si_atag = 0;
+ }
if (signum == SIGSEGV) {
#ifdef DEBUG
- ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
- ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag);
#endif
if (si->si_code == SEGV_MTEAERR) {
if (cur_mte_cxt.trig_si_code == si->si_code)
cur_mte_cxt.fault_valid = true;
+ else
+ ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=%llx, fault addr=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc,
+ addr);
return;
}
/* Compare the context for precise error */
else if (si->si_code == SEGV_MTESERR) {
+ if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) {
+ ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag);
+ exit(KSFT_FAIL);
+ }
+
if (cur_mte_cxt.trig_si_code == si->si_code &&
((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -60,14 +90,14 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
exit(1);
}
} else if (signum == SIGBUS) {
- ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n",
((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
if ((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -75,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
}
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags)
{
struct sigaction sa;
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO;
+
+ if (export_tags && signal == SIGSEGV)
+ sa.sa_flags |= SA_EXPOSE_TAGBITS;
+
sigemptyset(&sa.sa_mask);
sigaction(signal, &sa, NULL);
}
@@ -96,7 +131,7 @@ void *mte_insert_tags(void *ptr, size_t size)
int align_size;
if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
- ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr);
return NULL;
}
align_size = MT_ALIGN_UP(size);
@@ -108,7 +143,7 @@ void *mte_insert_tags(void *ptr, size_t size)
void mte_clear_tags(void *ptr, size_t size)
{
if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
- ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr);
return;
}
size = MT_ALIGN_UP(size);
@@ -116,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size)
mte_clear_tag_address_range(ptr, size);
}
+void *mte_insert_atag(void *ptr)
+{
+ unsigned char atag;
+
+ atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0;
+ return (void *)MT_SET_ATAG((unsigned long)ptr, atag);
+}
+
+void *mte_clear_atag(void *ptr)
+{
+ return (void *)MT_CLEAR_ATAG((unsigned long)ptr);
+}
+
static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
size_t range_before, size_t range_after,
bool tags, int fd)
@@ -124,13 +172,16 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
int prot_flag, map_flag;
size_t entire_size = size + range_before + range_after;
- if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
- mem_type != USE_MPROTECT) {
+ switch (mem_type) {
+ case USE_MALLOC:
+ return malloc(entire_size) + range_before;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ break;
+ default:
ksft_print_msg("FAIL: Invalid allocate request\n");
return NULL;
}
- if (mem_type == USE_MALLOC)
- return malloc(entire_size) + range_before;
prot_flag = PROT_READ | PROT_WRITE;
if (mem_type == USE_MMAP)
@@ -143,13 +194,13 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
map_flag |= MAP_PRIVATE;
ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
if (ptr == MAP_FAILED) {
- ksft_print_msg("FAIL: mmap allocation\n");
+ ksft_perror("mmap()");
return NULL;
}
if (mem_type == USE_MPROTECT) {
if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+ ksft_perror("mprotect(PROT_MTE)");
munmap(ptr, size);
- ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
return NULL;
}
}
@@ -181,10 +232,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags
}
/* Initialize the file for mappable size */
lseek(fd, 0, SEEK_SET);
- for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
- write(fd, buffer, INIT_BUFFER_SIZE);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) {
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ ksft_perror("initialising buffer");
+ return NULL;
+ }
+ }
index -= INIT_BUFFER_SIZE;
- write(fd, buffer, size - index);
+ if (write(fd, buffer, size - index) != size - index) {
+ ksft_perror("initialising buffer");
+ return NULL;
+ }
return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
}
@@ -202,9 +260,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
/* Initialize the file for mappable size */
lseek(fd, 0, SEEK_SET);
for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
- write(fd, buffer, INIT_BUFFER_SIZE);
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ ksft_perror("initialising buffer");
+ return NULL;
+ }
index -= INIT_BUFFER_SIZE;
- write(fd, buffer, map_size - index);
+ if (write(fd, buffer, map_size - index) != map_size - index) {
+ ksft_perror("initialising buffer");
+ return NULL;
+ }
return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
range_after, true, fd);
}
@@ -252,51 +316,68 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
cur_mte_cxt.trig_si_code = 0;
}
-int mte_switch_mode(int mte_option, unsigned long incl_mask)
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly)
{
unsigned long en = 0;
- if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
- mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
- ksft_print_msg("FAIL: Invalid mte config option\n");
+ switch (mte_option) {
+ case MTE_NONE_ERR:
+ case MTE_SYNC_ERR:
+ case MTE_ASYNC_ERR:
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option);
return -EINVAL;
}
+
+ if (incl_mask & ~MT_INCLUDE_TAG_MASK) {
+ ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
+ return -EINVAL;
+ }
+
en = PR_TAGGED_ADDR_ENABLE;
- if (mte_option == MTE_SYNC_ERR)
+ switch (mte_option) {
+ case MTE_SYNC_ERR:
en |= PR_MTE_TCF_SYNC;
- else if (mte_option == MTE_ASYNC_ERR)
+ break;
+ case MTE_ASYNC_ERR:
en |= PR_MTE_TCF_ASYNC;
- else if (mte_option == MTE_NONE_ERR)
+ break;
+ case MTE_NONE_ERR:
en |= PR_MTE_TCF_NONE;
+ break;
+ }
+
+ if (mtestonly_support && stonly)
+ en |= PR_MTE_STORE_ONLY;
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
- if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
return -EINVAL;
}
return 0;
}
-#define ID_AA64PFR1_MTE_SHIFT 8
-#define ID_AA64PFR1_MTE 2
-
int mte_default_setup(void)
{
- unsigned long hwcaps = getauxval(AT_HWCAP);
+ unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+ unsigned long hwcaps3 = getauxval(AT_HWCAP3);
unsigned long en = 0;
int ret;
- if (!(hwcaps & HWCAP_CPUID)) {
- ksft_print_msg("FAIL: CPUID registers unavailable\n");
- return KSFT_FAIL;
- }
- /* Read ID_AA64PFR1_EL1 register */
- asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
- if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
- ksft_print_msg("FAIL: MTE features unavailable\n");
- return KSFT_SKIP;
- }
+ /* To generate random address tag */
+ srandom(time(NULL));
+
+ if (!(hwcaps2 & HWCAP2_MTE))
+ ksft_exit_skip("MTE features unavailable\n");
+
+ mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR);
+
+ if (hwcaps3 & HWCAP3_MTE_STORE_ONLY)
+ mtestonly_support = true;
+
/* Get current mte mode */
ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
if (ret < 0) {
@@ -310,6 +391,8 @@ int mte_default_setup(void)
else if (ret & PR_MTE_TCF_NONE)
mte_cur_mode = MTE_NONE_ERR;
+ mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false;
+
mte_cur_pstate_tco = mte_get_pstate_tco();
/* Disable PSTATE.TCO */
mte_disable_pstate_tco();
@@ -318,7 +401,7 @@ int mte_default_setup(void)
void mte_restore_setup(void)
{
- mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly);
if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
mte_enable_pstate_tco();
else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
@@ -333,6 +416,7 @@ int create_temp_file(void)
/* Create a file in the tmpfs filesystem */
fd = mkstemp(&filename[0]);
if (fd == -1) {
+ ksft_perror(filename);
ksft_print_msg("FAIL: Unable to open temporary file\n");
return 0;
}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index 195a7d1879e6..250d671329a5 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -37,10 +37,13 @@ struct mte_fault_cxt {
};
extern struct mte_fault_cxt cur_mte_cxt;
+extern bool mtefar_support;
+extern bool mtestonly_support;
/* MTE utility functions */
void mte_default_handler(int signum, siginfo_t *si, void *uc);
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags);
void mte_wait_after_trig(void);
void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
@@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
size_t range_before, size_t range_after);
void *mte_insert_tags(void *ptr, size_t size);
void mte_clear_tags(void *ptr, size_t size);
+void *mte_insert_atag(void *ptr);
+void *mte_clear_atag(void *ptr);
int mte_default_setup(void);
void mte_restore_setup(void);
-int mte_switch_mode(int mte_option, unsigned long incl_mask);
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly);
void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
/* Common utility functions */
@@ -75,10 +80,21 @@ unsigned int mte_get_pstate_tco(void);
/* Test framework static inline functions/macros */
static inline void evaluate_test(int err, const char *msg)
{
- if (err == KSFT_PASS)
- ksft_test_result_pass(msg);
- else if (err == KSFT_FAIL)
- ksft_test_result_fail(msg);
+ switch (err) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s", msg);
+ break;
+ case KSFT_FAIL:
+ ksft_test_result_fail("%s", msg);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s", msg);
+ break;
+ default:
+ ksft_test_result_error("Unknown return code %d from %s",
+ err, msg);
+ break;
+ }
}
static inline int check_allocated_memory(void *ptr, size_t size,
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
index 9b188254b61a..6ad22f07c9b8 100644
--- a/tools/testing/selftests/arm64/mte/mte_def.h
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -42,6 +42,8 @@
#define MT_TAG_COUNT 16
#define MT_INCLUDE_TAG_MASK 0xFFFF
#define MT_EXCLUDE_TAG_MASK 0x0
+#define MT_ATAG_SHIFT 60
+#define MT_ATAG_MASK 0xFUL
#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
@@ -49,6 +51,12 @@
#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT))
+#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT))
+#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK))
+
+#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x)))
+
#define MT_PSTATE_TCO_SHIFT 25
#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
#define MT_PSTATE_TCO_EN 1
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
index a02c04cd0aac..a55dbbc56ed1 100644
--- a/tools/testing/selftests/arm64/mte/mte_helper.S
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -3,6 +3,8 @@
#include "mte_def.h"
+.arch armv8.5-a+memtag
+
#define ENTRY(name) \
.globl name ;\
.p2align 2;\
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
index 72e290b0b10c..b5a1c80e0ead 100644
--- a/tools/testing/selftests/arm64/pauth/Makefile
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -7,8 +7,14 @@ CC := $(CROSS_COMPILE)gcc
endif
CFLAGS += -mbranch-protection=pac-ret
+
+# All supported LLVMs have PAC, test for GCC
+ifeq ($(LLVM),1)
+pauth_cc_support := 1
+else
# check if the compiler supports ARMv8.3 and branch protection with PAuth
pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+endif
ifeq ($(pauth_cc_support),1)
TEST_GEN_PROGS := pac
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
index 4435600ca400..e597861b26d6 100644
--- a/tools/testing/selftests/arm64/pauth/exec_target.c
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -13,7 +13,12 @@ int main(void)
unsigned long hwcaps;
size_t val;
- fread(&val, sizeof(size_t), 1, stdin);
+ size_t size = fread(&val, sizeof(size_t), 1, stdin);
+
+ if (size != 1) {
+ fprintf(stderr, "Could not read input from stdin\n");
+ return EXIT_FAILURE;
+ }
/* don't try to execute illegal (unimplemented) instructions) caller
* should have checked this and keep worker simple
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 592fe538506e..67d138057707 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -10,10 +10,10 @@
#include <setjmp.h>
#include <sched.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "helper.h"
-#define PAC_COLLISION_ATTEMPTS 10
+#define PAC_COLLISION_ATTEMPTS 1000
/*
* The kernel sets TBID by default. So bits 55 and above should remain
* untouched no matter what.
@@ -25,13 +25,15 @@
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* data key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACA)) \
+ SKIP(return, "PAUTH not enabled"); \
} while (0)
#define ASSERT_GENERIC_PAUTH_ENABLED() \
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACG)) \
+ SKIP(return, "Generic PAUTH not enabled"); \
} while (0)
void sign_specific(struct signatures *sign, size_t val)
@@ -180,6 +182,9 @@ int exec_sign_all(struct signatures *signed_vals, size_t val)
return -1;
}
+ close(new_stdin[1]);
+ close(new_stdout[0]);
+
return 0;
}
@@ -256,7 +261,7 @@ TEST(single_thread_different_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
@@ -299,7 +304,7 @@ TEST(exec_changed_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 78c902045ca7..b257db665a35 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,4 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
+fpmr_*
+poe_*
+gcs_*
+sme_*
+ssve_*
+sve_*
+tpidr2_*
+za_*
+zt_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index ac4ad0005715..1381039fb36f 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -2,7 +2,7 @@
# Copyright (C) 2019 ARM Limited
# Additional include paths needed by kselftest.h and local headers
-CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+CFLAGS += -std=gnu99 -I.
SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c))
PROGS := $(patsubst %.c,%,$(SRCS))
@@ -11,7 +11,6 @@ PROGS := $(patsubst %.c,%,$(SRCS))
TEST_GEN_PROGS := $(notdir $(PROGS))
# Get Kernel headers installed and use them.
-KSFT_KHDR_INSTALL := 1
# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
# to account for any OUTPUT target-dirs optionally provided by
@@ -23,6 +22,10 @@ $(TEST_GEN_PROGS): $(PROGS)
# Common test-unit targets to build common-layout test-cases executables
# Needs secondary expansion to properly include the testcase c-file in pre-reqs
+COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \
+ signals.S sve_helpers.c
+COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h
+
.SECONDEXPANSION:
-$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h
- $(CC) $(CFLAGS) $^ -o $@
+$(PROGS): $$@.c ${COMMON_SOURCES} ${COMMON_HEADERS}
+ $(CC) $(CFLAGS) ${@}.c ${COMMON_SOURCES} -o $@
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c
new file mode 100644
index 000000000000..0acc121af306
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#include <stdbool.h>
+#include <kselftest.h>
+#include <asm/sigcontext.h>
+#include <sys/prctl.h>
+
+unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls)
+{
+ int vq, vl;
+ int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL;
+ int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(pr_set_vl, vq * 16);
+ if (vl == -1)
+ return KSFT_FAIL;
+
+ vl &= len_mask;
+
+ /*
+ * Unlike SVE, SME does not require the minimum vector length
+ * to be implemented, or the VLs to be consecutive, so any call
+ * to the prctl might return the single implemented VL, which
+ * might be larger than 16. So to avoid this loop never
+ * terminating, bail out here when we find a higher VL than
+ * we asked for.
+ * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP.
+ */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ if (nvls < min_vls) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return KSFT_SKIP;
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h
new file mode 100644
index 000000000000..ca133b93375f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#ifndef __SVE_HELPERS_H__
+#define __SVE_HELPERS_H__
+
+#include <stdbool.h>
+
+#define VLS_USE_SVE false
+#define VLS_USE_SME true
+
+extern unsigned int vls[];
+extern unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls);
+
+static inline uint64_t get_svcr(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, S3_3_C4_C2_2\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+#endif
diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c
index 416b1ff43199..1304c8ec0f2f 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.c
+++ b/tools/testing/selftests/arm64/signal/test_signals.c
@@ -7,16 +7,28 @@
* Each test provides its own tde struct tdescr descriptor to link with
* this wrapper. Framework provides common helpers.
*/
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
#include <kselftest.h>
#include "test_signals.h"
#include "test_signals_utils.h"
-struct tdescr *current;
+struct tdescr *current = &tde;
int main(int argc, char *argv[])
{
- current = &tde;
+ /*
+ * Ensure GCS is at least enabled throughout the tests if
+ * supported, otherwise the inability to return from the
+ * function that enabled GCS makes it very inconvenient to set
+ * up test cases. The prctl() may fail if GCS was locked by
+ * libc setup code.
+ */
+ if (getauxval(AT_HWCAP) & HWCAP_GCS)
+ gcs_set_state(PR_SHADOW_STACK_ENABLE);
ksft_print_msg("%s :: %s\n", current->name, current->descr);
if (test_setup(current) && test_init(current)) {
@@ -25,5 +37,6 @@ int main(int argc, char *argv[])
}
test_result(current);
- return current->result;
+ /* Do not return in case GCS was enabled */
+ exit(current->result);
}
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f96baf1cef1a..ee75a2c25ce7 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -9,9 +9,7 @@
#include <ucontext.h>
/*
- * Using ARCH specific and sanitized Kernel headers installed by KSFT
- * framework since we asked for it by setting flag KSFT_KHDR_INSTALL
- * in our Makefile.
+ * Using ARCH specific and sanitized Kernel headers from the tree.
*/
#include <asm/ptrace.h>
#include <asm/hwcap.h>
@@ -33,10 +31,20 @@
*/
enum {
FSSBS_BIT,
+ FSVE_BIT,
+ FSME_BIT,
+ FSME_FA64_BIT,
+ FSME2_BIT,
+ FGCS_BIT,
FMAX_END
};
#define FEAT_SSBS (1UL << FSSBS_BIT)
+#define FEAT_SVE (1UL << FSVE_BIT)
+#define FEAT_SME (1UL << FSME_BIT)
+#define FEAT_SME_FA64 (1UL << FSME_FA64_BIT)
+#define FEAT_SME2 (1UL << FSME2_BIT)
+#define FEAT_GCS (1UL << FGCS_BIT)
/*
* A descriptor used to describe and configure a test case.
@@ -51,6 +59,7 @@ struct tdescr {
char *name;
char *descr;
unsigned long feats_required;
+ unsigned long feats_incompatible;
/* bitmask of effectively supported feats: populated at run-time */
unsigned long feats_supported;
bool initialized;
@@ -62,6 +71,10 @@ struct tdescr {
* Zero when no signal is expected on success
*/
int sig_ok;
+ /*
+ * expected si_code for sig_ok, or 0 to not check
+ */
+ int sig_ok_code;
/* signum expected on unsupported CPU features. */
int sig_unsupp;
/* a timeout in second for test completion */
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2de6e5ed5e25..5d3621921cfe 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -26,6 +26,11 @@ static int sig_copyctx = SIGTRAP;
static char const *const feats_names[FMAX_END] = {
" SSBS ",
+ " SVE ",
+ " SME ",
+ " FA64 ",
+ " SME2 ",
+ " GCS ",
};
#define MAX_FEATS_SZ 128
@@ -35,6 +40,8 @@ static inline char *feats_to_string(unsigned long feats)
{
size_t flen = MAX_FEATS_SZ - 1;
+ feats_string[0] = '\0';
+
for (int i = 0; i < FMAX_END; i++) {
if (feats & (1UL << i)) {
size_t tlen = strlen(feats_names[i]);
@@ -136,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td,
"current->token ZEROED...test is probably broken!\n");
abort();
}
- /*
- * Trying to narrow down the SEGV to the ones generated by Kernel itself
- * via arm64_notify_segfault(). This is a best-effort check anyway, and
- * the si_code check may need to change if this aspect of the kernel
- * ABI changes.
- */
- if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
- fprintf(stdout,
- "si_code != SEGV_ACCERR...test is probably broken!\n");
- abort();
+ if (td->sig_ok_code) {
+ if (si->si_code != td->sig_ok_code) {
+ fprintf(stdout, "si_code is %d not %d\n",
+ si->si_code, td->sig_ok_code);
+ abort();
+ }
+ } else {
+ /*
+ * Trying to narrow down the SEGV to the ones
+ * generated by Kernel itself via
+ * arm64_notify_segfault(). This is a best-effort
+ * check anyway, and the si_code check may need to
+ * change if this aspect of the kernel ABI changes.
+ */
+ if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
+ fprintf(stdout,
+ "si_code != SEGV_ACCERR...test is probably broken!\n");
+ abort();
+ }
}
td->pass = 1;
/*
@@ -160,15 +176,66 @@ static bool handle_signal_ok(struct tdescr *td,
}
static bool handle_signal_copyctx(struct tdescr *td,
- siginfo_t *si, void *uc)
+ siginfo_t *si, void *uc_in)
{
+ ucontext_t *uc = uc_in;
+ struct _aarch64_ctx *head;
+ struct extra_context *extra, *copied_extra;
+ size_t offset = 0;
+ size_t to_copy;
+
+ ASSERT_GOOD_CONTEXT(uc);
+
/* Mangling PC to avoid loops on original BRK instr */
- ((ucontext_t *)uc)->uc_mcontext.pc += 4;
- memcpy(td->live_uc, uc, td->live_sz);
- ASSERT_GOOD_CONTEXT(td->live_uc);
+ uc->uc_mcontext.pc += 4;
+
+ /*
+ * Check for an preserve any extra data too with fixups.
+ */
+ head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+ head = get_header(head, EXTRA_MAGIC, td->live_sz, &offset);
+ if (head) {
+ extra = (struct extra_context *)head;
+
+ /*
+ * The extra buffer must be immediately after the
+ * extra_context and a 16 byte terminator. Include it
+ * in the copy, this was previously validated in
+ * ASSERT_GOOD_CONTEXT().
+ */
+ to_copy = __builtin_offsetof(ucontext_t,
+ uc_mcontext.__reserved);
+ to_copy += offset + sizeof(struct extra_context) + 16;
+ to_copy += extra->size;
+ copied_extra = (struct extra_context *)&(td->live_uc->uc_mcontext.__reserved[offset]);
+ } else {
+ copied_extra = NULL;
+ to_copy = sizeof(ucontext_t);
+ }
+
+ if (to_copy > td->live_sz) {
+ fprintf(stderr,
+ "Not enough space to grab context, %lu/%lu bytes\n",
+ td->live_sz, to_copy);
+ return false;
+ }
+
+ memcpy(td->live_uc, uc, to_copy);
+
+ /*
+ * If there was any EXTRA_CONTEXT fix up the size to be the
+ * struct extra_context and the following terminator record,
+ * this means that the rest of the code does not need to have
+ * special handling for the record and we don't need to fix up
+ * datap for the new location.
+ */
+ if (copied_extra)
+ copied_extra->head.size = sizeof(*copied_extra) + 16;
+
td->live_uc_valid = 1;
fprintf(stderr,
- "GOOD CONTEXT grabbed from sig_copyctx handler\n");
+ "%lu byte GOOD CONTEXT grabbed from sig_copyctx handler\n",
+ to_copy);
return true;
}
@@ -192,7 +259,8 @@ static void default_handler(int signum, siginfo_t *si, void *uc)
fprintf(stderr, "-- Timeout !\n");
} else {
fprintf(stderr,
- "-- RX UNEXPECTED SIGNAL: %d\n", signum);
+ "-- RX UNEXPECTED SIGNAL: %d code %d address %p\n",
+ signum, si->si_code, si->si_addr);
}
default_result(current, 1);
}
@@ -255,7 +323,7 @@ int test_init(struct tdescr *td)
td->minsigstksz = MINSIGSTKSZ;
fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
- if (td->feats_required) {
+ if (td->feats_required || td->feats_incompatible) {
td->feats_supported = 0;
/*
* Checking for CPU required features using both the
@@ -263,16 +331,43 @@ int test_init(struct tdescr *td)
*/
if (getauxval(AT_HWCAP) & HWCAP_SSBS)
td->feats_supported |= FEAT_SSBS;
- if (feats_ok(td))
- fprintf(stderr,
- "Required Features: [%s] supported\n",
- feats_to_string(td->feats_required &
- td->feats_supported));
- else
- fprintf(stderr,
- "Required Features: [%s] NOT supported\n",
- feats_to_string(td->feats_required &
- ~td->feats_supported));
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ td->feats_supported |= FEAT_SVE;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ td->feats_supported |= FEAT_SME;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ td->feats_supported |= FEAT_SME_FA64;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+ td->feats_supported |= FEAT_SME2;
+ if (getauxval(AT_HWCAP) & HWCAP_GCS)
+ td->feats_supported |= FEAT_GCS;
+ if (feats_ok(td)) {
+ if (td->feats_required & td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] supported\n",
+ feats_to_string(td->feats_required &
+ td->feats_supported));
+ if (!(td->feats_incompatible & td->feats_supported))
+ fprintf(stderr,
+ "Incompatible Features: [%s] absent\n",
+ feats_to_string(td->feats_incompatible));
+ } else {
+ if ((td->feats_required & td->feats_supported) !=
+ td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] NOT supported\n",
+ feats_to_string(td->feats_required &
+ ~td->feats_supported));
+ if (td->feats_incompatible & td->feats_supported)
+ fprintf(stderr,
+ "Incompatible Features: [%s] supported\n",
+ feats_to_string(td->feats_incompatible &
+ ~td->feats_supported));
+
+
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
@@ -304,14 +399,12 @@ int test_setup(struct tdescr *td)
int test_run(struct tdescr *td)
{
- if (td->sig_trig) {
- if (td->trigger)
- return td->trigger(td);
- else
- return default_trigger(td);
- } else {
+ if (td->trigger)
+ return td->trigger(td);
+ else if (td->sig_trig)
+ return default_trigger(td);
+ else
return td->run(td, NULL, NULL);
- }
}
void test_result(struct tdescr *td)
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274..36fc12b3cd60 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -6,8 +6,11 @@
#include <assert.h>
#include <stdio.h>
+#include <stdint.h>
#include <string.h>
+#include <linux/compiler.h>
+
#include "test_signals.h"
int test_init(struct tdescr *td);
@@ -16,8 +19,48 @@ void test_cleanup(struct tdescr *td);
int test_run(struct tdescr *td);
void test_result(struct tdescr *td);
+#ifndef __NR_prctl
+#define __NR_prctl 167
+#endif
+
+/*
+ * The prctl takes 1 argument but we need to ensure that the other
+ * values passed in registers to the syscall are zero since the kernel
+ * validates them.
+ */
+#define gcs_set_state(state) \
+ ({ \
+ register long _num __asm__ ("x8") = __NR_prctl; \
+ register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \
+ register long _arg2 __asm__ ("x1") = (long)(state); \
+ register long _arg3 __asm__ ("x2") = 0; \
+ register long _arg4 __asm__ ("x3") = 0; \
+ register long _arg5 __asm__ ("x4") = 0; \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_arg3), "r"(_arg4), \
+ "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+ })
+
+static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void)
+{
+ uint64_t val;
+
+ asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val));
+
+ return val;
+}
+
static inline bool feats_ok(struct tdescr *td)
{
+ if (td->feats_incompatible & td->feats_supported)
+ return false;
return (td->feats_required & td->feats_supported) == td->feats_required;
}
@@ -54,16 +97,29 @@ static inline bool feats_ok(struct tdescr *td)
* at sizeof(ucontext_t).
*/
static __always_inline bool get_current_context(struct tdescr *td,
- ucontext_t *dest_uc)
+ ucontext_t *dest_uc,
+ size_t dest_sz)
{
static volatile bool seen_already;
+ int i;
+ char *uc = (char *)dest_uc;
assert(td && dest_uc);
/* it's a genuine invocation..reinit */
seen_already = 0;
td->live_uc_valid = 0;
- td->live_sz = sizeof(*dest_uc);
- memset(dest_uc, 0x00, td->live_sz);
+ td->live_sz = dest_sz;
+
+ /*
+ * This is a memset() but we don't want the compiler to
+ * optimise it into either instructions or a library call
+ * which might be incompatible with streaming mode.
+ */
+ for (i = 0; i < td->live_sz; i++) {
+ uc[i] = 0;
+ OPTIMIZER_HIDE_VAR(uc[0]);
+ }
+
td->live_uc = dest_uc;
/*
* Grab ucontext_t triggering a SIGTRAP.
@@ -101,6 +157,17 @@ static __always_inline bool get_current_context(struct tdescr *td,
: "memory");
/*
+ * If we were grabbing a streaming mode context then we may
+ * have entered streaming mode behind the system's back and
+ * libc or compiler generated code might decide to do
+ * something invalid in streaming mode, or potentially even
+ * the state of ZA. Issue a SMSTOP to exit both now we have
+ * grabbed the state.
+ */
+ if (td->feats_supported & FEAT_SME)
+ asm volatile("msr S0_3_C4_C6_3, xzr");
+
+ /*
* If we get here with seen_already==1 it implies the td->live_uc
* context has been used to get back here....this probably means
* a test has failed to cause a SEGV...anyway live_uc does not
diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO
new file mode 100644
index 000000000000..1f7fba8194fe
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/TODO
@@ -0,0 +1 @@
+- Validate that register contents are saved and restored as expected.
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
index 8dc600a7d4fd..8c7f00ea9823 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_magic_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
index b3c362100666..1c03f6b638e0 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
@@ -24,7 +24,7 @@ static int fake_sigreturn_bad_size_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
index a44b88bfc81a..bc22f64b544e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* at least HDR_SZ for the badly sized terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
index afe8915f0998..63e3906b631c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
index 1e089e66f9f3..d00625ff12c2 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
@@ -19,7 +19,7 @@ static int fake_sigreturn_misaligned_run(struct tdescr *td,
siginfo_t *si, ucontext_t *uc)
{
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* Forcing sigframe on misaligned SP (16 + 3) */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
index 08ecd8073a1a..f805138cb20d 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
@@ -23,7 +23,7 @@ static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td,
struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
new file mode 100644
index 000000000000..dfd6a2badf9f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the streaming SVE vector length in a signal
+ * handler, this is not supported and is expected to segfault.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SME, 2);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct za_context *za;
+
+ /* Get a signal context with a SME ZA frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct za_context)) {
+ fprintf(stderr, "Register data present, aborting\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ za->vl, vls[0]);
+ za->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SSVE_CHANGE",
+ .descr = "Attempt to change Streaming SVE VL",
+ .feats_required = FEAT_SME,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = fake_sigreturn_ssve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
new file mode 100644
index 000000000000..e1ccf8f85a70
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the SVE vector length in a signal hander, this is not
+ * supported and is expected to segfault.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SVE, 2);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static int fake_sigreturn_sve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SVE frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "SVE register state active, skipping\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SVE_CHANGE",
+ .descr = "Attempt to change SVE VL",
+ .feats_required = FEAT_SVE,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = fake_sigreturn_sve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c
new file mode 100644
index 000000000000..e9d24685e741
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the FPMR register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_FPMR "S3_3_C4_C4_2"
+
+static uint64_t get_fpmr(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_FPMR "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+int fpmr_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct fpmr_context *fpmr_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_fpmr;
+ __u64 orig_fpmr;
+
+ have_fpmr = getauxval(AT_HWCAP2) & HWCAP2_FPMR;
+ if (have_fpmr)
+ orig_fpmr = get_fpmr();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ fpmr_ctx = (struct fpmr_context *)
+ get_header(head, FPMR_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = fpmr_ctx != NULL;
+
+ fprintf(stderr, "FPMR sigframe %s on system %s FPMR\n",
+ in_sigframe ? "present" : "absent",
+ have_fpmr ? "with" : "without");
+
+ td->pass = (in_sigframe == have_fpmr);
+
+ if (have_fpmr && fpmr_ctx) {
+ if (fpmr_ctx->fpmr != orig_fpmr) {
+ fprintf(stderr, "FPMR in frame is %llx, was %llx\n",
+ fpmr_ctx->fpmr, orig_fpmr);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "FPMR",
+ .descr = "Validate that FPMR is present as expected",
+ .timeout = 3,
+ .run = fpmr_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c
new file mode 100644
index 000000000000..6228448b2ae7
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+/*
+ * We should get this from asm/siginfo.h but the testsuite is being
+ * clever with redefining siginfo_t.
+ */
+#ifndef SEGV_CPERR
+#define SEGV_CPERR 10
+#endif
+
+static inline void gcsss1(uint64_t Xt)
+{
+ asm volatile (
+ "sys #3, C7, C7, #2, %0\n"
+ :
+ : "rZ" (Xt)
+ : "memory");
+}
+
+static int gcs_op_fault_trigger(struct tdescr *td)
+{
+ /*
+ * The slot below our current GCS should be in a valid GCS but
+ * must not have a valid cap in it.
+ */
+ gcsss1(get_gcspr_el0() - 8);
+
+ return 0;
+}
+
+static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "Invalid GCS operation",
+ .descr = "An invalid GCS operation generates the expected signal",
+ .feats_required = FEAT_GCS,
+ .timeout = 3,
+ .sig_ok = SIGSEGV,
+ .sig_ok_code = SEGV_CPERR,
+ .sanity_disabled = true,
+ .trigger = gcs_op_fault_trigger,
+ .run = gcs_op_fault_signal,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c
new file mode 100644
index 000000000000..b405d82321da
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 64];
+} context;
+
+static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct gcs_context *gcs;
+ unsigned long expected, gcspr;
+ uint64_t *u64_val;
+ int ret;
+
+ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0);
+ if (ret != 0) {
+ fprintf(stderr, "Unable to query GCS status\n");
+ return 1;
+ }
+
+ /* We expect a cap to be added to the GCS in the signal frame */
+ gcspr = get_gcspr_el0();
+ gcspr -= 8;
+ fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr);
+
+ if (!get_current_context(td, &context.uc, sizeof(context))) {
+ fprintf(stderr, "Failed getting context\n");
+ return 1;
+ }
+
+ /* Ensure that the signal restore token was consumed */
+ u64_val = (uint64_t *)get_gcspr_el0() + 1;
+ if (*u64_val) {
+ fprintf(stderr, "GCS value at %p is %lx not 0\n",
+ u64_val, *u64_val);
+ return 1;
+ }
+
+ fprintf(stderr, "Got context\n");
+
+ head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!head) {
+ fprintf(stderr, "No GCS context\n");
+ return 1;
+ }
+
+ gcs = (struct gcs_context *)head;
+
+ /* Basic size validation is done in get_current_context() */
+
+ if (gcs->features_enabled != expected) {
+ fprintf(stderr, "Features enabled %llx but expected %lx\n",
+ gcs->features_enabled, expected);
+ return 1;
+ }
+
+ if (gcs->gcspr != gcspr) {
+ fprintf(stderr, "Got GCSPR %llx but expected %lx\n",
+ gcs->gcspr, gcspr);
+ return 1;
+ }
+
+ fprintf(stderr, "GCS context validated\n");
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "GCS basics",
+ .descr = "Validate a GCS signal context",
+ .feats_required = FEAT_GCS,
+ .timeout = 3,
+ .run = gcs_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c
new file mode 100644
index 000000000000..faeabb18c4b2
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static uint64_t *gcs_page;
+
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 453
+#endif
+
+static bool alloc_gcs(struct tdescr *td)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+
+ gcs_page = (void *)syscall(__NR_map_shadow_stack, 0,
+ page_size, 0);
+ if (gcs_page == MAP_FAILED) {
+ fprintf(stderr, "Failed to map %ld byte GCS: %d\n",
+ page_size, errno);
+ return false;
+ }
+
+ return true;
+}
+
+static int gcs_write_fault_trigger(struct tdescr *td)
+{
+ /* Verify that the page is readable (ie, not completely unmapped) */
+ fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]);
+
+ /* A regular write should trigger a fault */
+ gcs_page[0] = EINVAL;
+
+ return 0;
+}
+
+static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ return 1;
+}
+
+
+struct tdescr tde = {
+ .name = "GCS write fault",
+ .descr = "Normal writes to a GCS segfault",
+ .feats_required = FEAT_GCS,
+ .timeout = 3,
+ .sig_ok = SIGSEGV,
+ .sanity_disabled = true,
+ .init = alloc_gcs,
+ .trigger = gcs_write_fault_trigger,
+ .run = gcs_write_fault_signal,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
new file mode 100644
index 000000000000..36bd9940ee05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Arm Limited
+ *
+ * Verify that the POR_EL0 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_POR_EL0 "S3_3_C10_C2_4"
+
+static uint64_t get_por_el0(void)
+{
+ uint64_t val;
+
+ asm volatile(
+ "mrs %0, " SYS_POR_EL0 "\n"
+ : "=r"(val)
+ :
+ : );
+
+ return val;
+}
+
+int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct poe_context *poe_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_poe;
+ __u64 orig_poe;
+
+ have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE;
+ if (have_poe)
+ orig_poe = get_por_el0();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ poe_ctx = (struct poe_context *)
+ get_header(head, POE_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = poe_ctx != NULL;
+
+ fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n",
+ in_sigframe ? "present" : "absent",
+ have_poe ? "with" : "without");
+
+ td->pass = (in_sigframe == have_poe);
+
+ /*
+ * Check that the value we read back was the one present at
+ * the time that the signal was triggered.
+ */
+ if (have_poe && poe_ctx) {
+ if (poe_ctx->por_el0 != orig_poe) {
+ fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n",
+ poe_ctx->por_el0, orig_poe);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "POR_EL0",
+ .descr = "Validate that POR_EL0 is present as expected",
+ .timeout = 3,
+ .run = poe_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
new file mode 100644
index 000000000000..f9d76ae32bba
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using a streaming mode instruction without enabling it
+ * generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_no_sm_trigger(struct tdescr *td)
+{
+ /* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */
+ asm volatile(".inst 0xd503457f ; .inst 0xc0900000");
+
+ return 0;
+}
+
+int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME trap without SM",
+ .descr = "Check that we get a SIGILL if we use streaming mode without enabling it",
+ .timeout = 3,
+ .feats_required = FEAT_SME, /* We need a SMSTART ZA */
+ .sanity_disabled = true,
+ .trigger = sme_trap_no_sm_trigger,
+ .run = sme_trap_no_sm_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
new file mode 100644
index 000000000000..e469ae5348e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_non_streaming_trigger(struct tdescr *td)
+{
+ /*
+ * The framework will handle SIGILL so we need to exit SM to
+ * stop any other code triggering a further SIGILL down the
+ * line from using a streaming-illegal instruction.
+ */
+ asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \
+ cnt v0.16b, v0.16b; \
+ .inst 0xd503447f /* SMSTOP ZA */");
+
+ return 0;
+}
+
+int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME SM trap unsupported instruction",
+ .descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode",
+ .feats_required = FEAT_SME,
+ .feats_incompatible = FEAT_SME_FA64,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_non_streaming_trigger,
+ .run = sme_trap_non_streaming_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
new file mode 100644
index 000000000000..3a7747af4715
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that accessing ZA without enabling it generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_za_trigger(struct tdescr *td)
+{
+ /* ZERO ZA */
+ asm volatile(".inst 0xc00800ff");
+
+ return 0;
+}
+
+int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME ZA trap",
+ .descr = "Check that we get a SIGILL if we access ZA without enabling",
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_za_trigger,
+ .run = sme_trap_za_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
new file mode 100644
index 000000000000..75f387f2db81
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SME vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sme_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SME_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct za_context *za;
+
+ /* Get a signal context which should have a ZA frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+ za = (struct za_context *)head;
+
+ if (za->vl != vl) {
+ fprintf(stderr, "ZA sigframe VL %u, expected %u\n",
+ za->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SME VL",
+ .descr = "Check that we get the right SME VL reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = get_sme_vl,
+ .run = sme_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
new file mode 100644
index 000000000000..1dbca9afb13c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the streaming SVE register context in signal frames is
+ * set up as expected.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 64];
+} context;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SME, 1);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static void setup_ssve_regs(void)
+{
+ /* smstart sm; real data is TODO */
+ asm volatile(".inst 0xd503437f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct sve_context *ssve;
+ int ret;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ ret = prctl(PR_SME_SET_VL, vl);
+ if (ret != vl) {
+ fprintf(stderr, "Failed to set VL, got %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_ssve_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ ssve = (struct sve_context *)head;
+ if (ssve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl);
+ return 1;
+ }
+
+ if (!(ssve->flags & SVE_SIG_FLAG_SM)) {
+ fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n");
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, ssve->vl);
+
+ if (get_svcr() != 0) {
+ fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr());
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "Streaming SVE registers",
+ .descr = "Check that we get the right Streaming SVE registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
new file mode 100644
index 000000000000..5557e116e973
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that both the streaming SVE and ZA register context in
+ * signal frames is set up as expected when enabled simultaneously.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SME, 1);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static void setup_regs(void)
+{
+ /* smstart sm; real data is TODO */
+ asm volatile(".inst 0xd503437f" : : : );
+
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct _aarch64_ctx *regs;
+ struct sve_context *ssve;
+ struct za_context *za;
+ int ret;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ ret = prctl(PR_SME_SET_VL, vl);
+ if (ret != vl) {
+ fprintf(stderr, "Failed to set VL, got %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have the SVE and ZA
+ * frames in it.
+ */
+ setup_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ regs = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!regs) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ ssve = (struct sve_context *)regs;
+ if (ssve->vl != vl) {
+ fprintf(stderr, "Got SSVE VL %d, expected %d\n", ssve->vl, vl);
+ return 1;
+ }
+
+ if (!(ssve->flags & SVE_SIG_FLAG_SM)) {
+ fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n");
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected SSVE size %u and VL %d\n",
+ regs->size, ssve->vl);
+
+ regs = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!regs) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)regs;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got ZA VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected ZA size %u and VL %d\n",
+ regs->size, za->vl);
+
+ /* We didn't load any data into ZA so it should be all zeros */
+ if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) {
+ fprintf(stderr, "ZA data invalid\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "Streaming SVE registers",
+ .descr = "Check that we get the right Streaming SVE registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
new file mode 100644
index 000000000000..8143eb1c58c1
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the SVE register context in signal frames is set up as
+ * expected.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 64];
+} context;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SVE, 1);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static void setup_sve_regs(void)
+{
+ /* RDVL x16, #1 so we should have SVE regs; real data is TODO */
+ asm volatile(".inst 0x04bf5030" : : : "x16" );
+}
+
+static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct sve_context *sve;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SVE_SET_VL, vl) == -1) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_sve_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+ if (sve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", sve->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, sve->vl);
+
+ return 0;
+}
+
+static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sve_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE registers",
+ .descr = "Check that we get the right SVE registers reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = sve_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
new file mode 100644
index 000000000000..aa835acec062
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SVE vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sve_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SVE_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context which should have a SVE frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+ sve = (struct sve_context *)head;
+
+ if (sve->vl != vl) {
+ fprintf(stderr, "sigframe VL %u, expected %u\n",
+ sve->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE VL",
+ .descr = "Check that we get the right SVE VL reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = get_sve_vl,
+ .run = sve_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 61ebcdf63831..0c1a6b26afac 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -1,31 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2019 ARM Limited */
-#include "testcases.h"
-
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset)
-{
- size_t offs = 0;
- struct _aarch64_ctx *found = NULL;
-
- if (!head || resv_sz < HDR_SZ)
- return found;
- while (offs <= resv_sz - HDR_SZ &&
- head->magic != magic && head->magic) {
- offs += head->size;
- head = GET_RESV_NEXT_HEAD(head);
- }
- if (head->magic == magic) {
- found = head;
- if (offset)
- *offset = offs;
- }
+#include <ctype.h>
+#include <string.h>
- return found;
-}
+#include "testcases.h"
-bool validate_extra_context(struct extra_context *extra, char **err)
+bool validate_extra_context(struct extra_context *extra, char **err,
+ void **extra_data, size_t *extra_size)
{
struct _aarch64_ctx *term;
@@ -33,7 +15,7 @@ bool validate_extra_context(struct extra_context *extra, char **err)
return false;
fprintf(stderr, "Validating EXTRA...\n");
- term = GET_RESV_NEXT_HEAD(extra);
+ term = GET_RESV_NEXT_HEAD(&extra->head);
if (!term || term->magic || term->size) {
*err = "Missing terminator after EXTRA context";
return false;
@@ -42,11 +24,84 @@ bool validate_extra_context(struct extra_context *extra, char **err)
*err = "Extra DATAP misaligned";
else if (extra->size & 0x0fUL)
*err = "Extra SIZE misaligned";
- else if (extra->datap != (uint64_t)term + sizeof(*term))
+ else if (extra->datap != (uint64_t)term + 0x10UL)
*err = "Extra DATAP misplaced (not contiguous)";
if (*err)
return false;
+ *extra_data = (void *)extra->datap;
+ *extra_size = extra->size;
+
+ return true;
+}
+
+bool validate_sve_context(struct sve_context *sve, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl)) + 15) / 16) * 16;
+
+ if (!sve || !err)
+ return false;
+
+ /* Either a bare sve_context or a sve_context followed by regs data */
+ if ((sve->head.size != sizeof(struct sve_context)) &&
+ (sve->head.size != regs_size)) {
+ *err = "bad size for SVE context";
+ return false;
+ }
+
+ if (!sve_vl_valid(sve->vl)) {
+ *err = "SVE VL invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
+bool validate_za_context(struct za_context *za, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16;
+
+ if (!za || !err)
+ return false;
+
+ /* Either a bare za_context or a za_context followed by regs data */
+ if ((za->head.size != sizeof(struct za_context)) &&
+ (za->head.size != regs_size)) {
+ *err = "bad size for ZA context";
+ return false;
+ }
+
+ if (!sve_vl_valid(za->vl)) {
+ *err = "SME VL in ZA context invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
+bool validate_zt_context(struct zt_context *zt, char **err)
+{
+ if (!zt || !err)
+ return false;
+
+ /* If the context is present there should be at least one register */
+ if (zt->nregs == 0) {
+ *err = "no registers";
+ return false;
+ }
+
+ /* Size should agree with the number of registers */
+ if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) {
+ *err = "register count does not match size";
+ return false;
+ }
+
return true;
}
@@ -55,9 +110,16 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
bool terminated = false;
size_t offs = 0;
int flags = 0;
+ int new_flags, i;
struct extra_context *extra = NULL;
+ struct sve_context *sve = NULL;
+ struct za_context *za = NULL;
+ struct zt_context *zt = NULL;
struct _aarch64_ctx *head =
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+ void *extra_data = NULL;
+ size_t extra_sz = 0;
+ char magic[4];
if (!err)
return false;
@@ -68,12 +130,24 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
+ new_flags = 0;
+
switch (head->magic) {
case 0:
- if (head->size)
+ if (head->size) {
*err = "Bad size for terminator";
- else
+ } else if (extra_data) {
+ /* End of main data, walking the extra data */
+ head = extra_data;
+ resv_sz = extra_sz;
+ offs = 0;
+
+ extra_data = NULL;
+ extra_sz = 0;
+ continue;
+ } else {
terminated = true;
+ }
break;
case FPSIMD_MAGIC:
if (flags & FPSIMD_CTX)
@@ -81,19 +155,55 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
else if (head->size !=
sizeof(struct fpsimd_context))
*err = "Bad size for fpsimd_context";
- flags |= FPSIMD_CTX;
+ new_flags |= FPSIMD_CTX;
break;
case ESR_MAGIC:
if (head->size != sizeof(struct esr_context))
*err = "Bad size for esr_context";
break;
+ case POE_MAGIC:
+ if (head->size != sizeof(struct poe_context))
+ *err = "Bad size for poe_context";
+ break;
+ case TPIDR2_MAGIC:
+ if (head->size != sizeof(struct tpidr2_context))
+ *err = "Bad size for tpidr2_context";
+ break;
case SVE_MAGIC:
if (flags & SVE_CTX)
*err = "Multiple SVE_MAGIC";
+ /* Size is validated in validate_sve_context() */
+ sve = (struct sve_context *)head;
+ new_flags |= SVE_CTX;
+ break;
+ case ZA_MAGIC:
+ if (flags & ZA_CTX)
+ *err = "Multiple ZA_MAGIC";
+ /* Size is validated in validate_za_context() */
+ za = (struct za_context *)head;
+ new_flags |= ZA_CTX;
+ break;
+ case ZT_MAGIC:
+ if (flags & ZT_CTX)
+ *err = "Multiple ZT_MAGIC";
+ /* Size is validated in validate_za_context() */
+ zt = (struct zt_context *)head;
+ new_flags |= ZT_CTX;
+ break;
+ case FPMR_MAGIC:
+ if (flags & FPMR_CTX)
+ *err = "Multiple FPMR_MAGIC";
else if (head->size !=
- sizeof(struct sve_context))
- *err = "Bad size for sve_context";
- flags |= SVE_CTX;
+ sizeof(struct fpmr_context))
+ *err = "Bad size for fpmr_context";
+ new_flags |= FPMR_CTX;
+ break;
+ case GCS_MAGIC:
+ if (flags & GCS_CTX)
+ *err = "Multiple GCS_MAGIC";
+ if (head->size != sizeof(struct gcs_context))
+ *err = "Bad size for gcs_context";
+ new_flags |= GCS_CTX;
break;
case EXTRA_MAGIC:
if (flags & EXTRA_CTX)
@@ -101,7 +211,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
else if (head->size !=
sizeof(struct extra_context))
*err = "Bad size for extra_context";
- flags |= EXTRA_CTX;
+ new_flags |= EXTRA_CTX;
extra = (struct extra_context *)head;
break;
case KSFT_BAD_MAGIC:
@@ -117,11 +227,19 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
/*
* A still unknown Magic: potentially freshly added
* to the Kernel code and still unknown to the
- * tests.
+ * tests. Magic numbers are supposed to be allocated
+ * as somewhat meaningful ASCII strings so try to
+ * print as such as well as the raw number.
*/
+ memcpy(magic, &head->magic, sizeof(magic));
+ for (i = 0; i < sizeof(magic); i++)
+ if (!isalnum(magic[i]))
+ magic[i] = '?';
+
fprintf(stdout,
- "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n",
- head->magic);
+ "SKIP Unknown MAGIC: 0x%X (%c%c%c%c) - Is KSFT arm64/signal up to date ?\n",
+ head->magic,
+ magic[3], magic[2], magic[1], magic[0]);
break;
}
@@ -134,10 +252,22 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
- if (flags & EXTRA_CTX)
- if (!validate_extra_context(extra, err))
+ if (new_flags & EXTRA_CTX)
+ if (!validate_extra_context(extra, err,
+ &extra_data, &extra_sz))
+ return false;
+ if (new_flags & SVE_CTX)
+ if (!validate_sve_context(sve, err))
+ return false;
+ if (new_flags & ZA_CTX)
+ if (!validate_za_context(za, err))
+ return false;
+ if (new_flags & ZT_CTX)
+ if (!validate_zt_context(zt, err))
return false;
+ flags |= new_flags;
+
head = GET_RESV_NEXT_HEAD(head);
}
@@ -146,6 +276,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
+ if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) {
+ *err = "ZT context but no ZA context";
+ return false;
+ }
+
return true;
}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index ad884c135314..98b97efdda23 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -16,19 +16,33 @@
#define FPSIMD_CTX (1 << 0)
#define SVE_CTX (1 << 1)
-#define EXTRA_CTX (1 << 2)
+#define ZA_CTX (1 << 2)
+#define EXTRA_CTX (1 << 3)
+#define ZT_CTX (1 << 4)
+#define FPMR_CTX (1 << 5)
+#define GCS_CTX (1 << 6)
#define KSFT_BAD_MAGIC 0xdeadbeef
#define HDR_SZ \
sizeof(struct _aarch64_ctx)
+#define GET_UC_RESV_HEAD(uc) \
+ (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved))
+
#define GET_SF_RESV_HEAD(sf) \
(struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
#define GET_SF_RESV_SIZE(sf) \
sizeof((sf).uc.uc_mcontext.__reserved)
+#define GET_BUF_RESV_HEAD(buf) \
+ (struct _aarch64_ctx *)(&(buf).uc.uc_mcontext.__reserved)
+
+#define GET_BUF_RESV_SIZE(buf) \
+ (sizeof(buf) - sizeof(buf.uc) + \
+ sizeof((buf).uc.uc_mcontext.__reserved))
+
#define GET_UCP_RESV_SIZE(ucp) \
sizeof((ucp)->uc_mcontext.__reserved)
@@ -78,10 +92,29 @@ struct fake_sigframe {
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
-bool validate_extra_context(struct extra_context *extra, char **err);
+static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *found = NULL;
+
+ if (!head || resv_sz < HDR_SZ)
+ return found;
+
+ while (offs <= resv_sz - HDR_SZ &&
+ head->magic != magic && head->magic) {
+ offs += head->size;
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+ if (head->magic == magic) {
+ found = head;
+ if (offset)
+ *offset = offs;
+ }
+
+ return found;
+}
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset);
static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
size_t resv_sz,
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
new file mode 100644
index 000000000000..f9a86c00c28c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is restored.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+
+static uint64_t initial_tpidr2;
+
+static bool save_tpidr2(struct tdescr *td)
+{
+ initial_tpidr2 = get_tpidr2();
+ fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2);
+
+ return true;
+}
+
+static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ uint64_t my_tpidr2 = get_tpidr2();
+
+ my_tpidr2++;
+ fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2);
+ set_tpidr2(my_tpidr2);
+
+ return 0;
+}
+
+static void check_tpidr2(struct tdescr *td)
+{
+ uint64_t tpidr2 = get_tpidr2();
+
+ td->pass = tpidr2 == initial_tpidr2;
+
+ if (td->pass)
+ fprintf(stderr, "TPIDR2 restored\n");
+ else
+ fprintf(stderr, "TPIDR2 was %lx but is now %lx\n",
+ initial_tpidr2, tpidr2);
+}
+
+struct tdescr tde = {
+ .name = "TPIDR2 restore",
+ .descr = "Validate that TPIDR2 is restored from the sigframe",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .sig_trig = SIGUSR1,
+ .init = save_tpidr2,
+ .run = modify_tpidr2,
+ .check_result = check_tpidr2,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c
new file mode 100644
index 000000000000..6a2c82bf7ead
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+int tpidr2_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct tpidr2_context *tpidr2_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_sme;
+ __u64 orig_tpidr2;
+
+ have_sme = getauxval(AT_HWCAP2) & HWCAP2_SME;
+ if (have_sme)
+ orig_tpidr2 = get_tpidr2();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ tpidr2_ctx = (struct tpidr2_context *)
+ get_header(head, TPIDR2_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = tpidr2_ctx != NULL;
+
+ fprintf(stderr, "TPIDR2 sigframe %s on system %s SME\n",
+ in_sigframe ? "present" : "absent",
+ have_sme ? "with" : "without");
+
+ td->pass = (in_sigframe == have_sme);
+
+ /*
+ * Check that the value we read back was the one present at
+ * the time that the signal was triggered. TPIDR2 is owned by
+ * libc so we can't safely choose the value and it is possible
+ * that we may need to revisit this in future if something
+ * starts deciding to set a new TPIDR2 between us reading and
+ * the signal.
+ */
+ if (have_sme && tpidr2_ctx) {
+ if (tpidr2_ctx->tpidr2 != orig_tpidr2) {
+ fprintf(stderr, "TPIDR2 in frame is %llx, was %llx\n",
+ tpidr2_ctx->tpidr2, orig_tpidr2);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "TPIDR2",
+ .descr = "Validate that TPIDR2 is present as expected",
+ .timeout = 3,
+ .run = tpidr2_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
new file mode 100644
index 000000000000..ce26e9c2fa5e
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SME, 1);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct za_context *za;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SME_SET_VL, vl) != vl) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ if (head->size != ZA_SIG_REGS_OFFSET) {
+ fprintf(stderr, "Context size %u, expected %lu\n",
+ head->size, ZA_SIG_REGS_OFFSET);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, za->vl);
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZA registers - ZA disabled",
+ .descr = "Check ZA context with ZA disabled",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
new file mode 100644
index 000000000000..badaead5326a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "sve_helpers.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int res = sve_fill_vls(VLS_USE_SME, 1);
+
+ if (!res)
+ return true;
+
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
+
+ return false;
+}
+
+static void setup_za_regs(void)
+{
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct za_context *za;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SME_SET_VL, vl) != vl) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_za_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ if (head->size != ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl))) {
+ fprintf(stderr, "ZA context size %u, expected %lu\n",
+ head->size, ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl)));
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, za->vl);
+
+ /* We didn't load any data into ZA so it should be all zeros */
+ if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) {
+ fprintf(stderr, "ZA data invalid\n");
+ return 1;
+ }
+
+ if (get_svcr() != 0) {
+ fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr());
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZA register",
+ .descr = "Check that we get the right ZA registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c
new file mode 100644
index 000000000000..34f69bcf821e
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+
+ /*
+ * Get a signal context which should not have a ZT frame and
+ * registers in it.
+ */
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (head) {
+ fprintf(stderr, "Got unexpected ZT context\n");
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZT register data not present",
+ .descr = "Validate that ZT is not present when ZA is disabled",
+ .feats_required = FEAT_SME2,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .run = zt_no_regs_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
new file mode 100644
index 000000000000..2e384d731618
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+static void enable_za(void)
+{
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct zt_context *zt;
+ char *zeros;
+
+ /*
+ * Get a signal context which should have a ZT frame and registers
+ * in it.
+ */
+ enable_za();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZT context\n");
+ return 1;
+ }
+
+ zt = (struct zt_context *)head;
+ if (zt->nregs == 0) {
+ fprintf(stderr, "Got context with no registers\n");
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected size %u for %d registers\n",
+ head->size, zt->nregs);
+
+ /* We didn't load any data into ZT so it should be all zeros */
+ zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs));
+ if (!zeros) {
+ fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs);
+ return 1;
+ }
+ memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs));
+
+ if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
+ fprintf(stderr, "ZT data invalid\n");
+ free(zeros);
+ return 1;
+ }
+
+ free(zeros);
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZT register data",
+ .descr = "Validate that ZT is present and has data when ZA is enabled",
+ .feats_required = FEAT_SME2,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .run = zt_regs_run,
+};
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile
index 41cb75070511..0a77f35295fb 100644
--- a/tools/testing/selftests/arm64/tags/Makefile
+++ b/tools/testing/selftests/arm64/tags/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := tags_test
-TEST_PROGS := run_tags_test.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh
deleted file mode 100755
index 745f11379930..000000000000
--- a/tools/testing/selftests/arm64/tags/run_tags_test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "--------------------"
-echo "running tags test"
-echo "--------------------"
-./tags_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
-else
- echo "[PASS]"
-fi
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 5701163460ef..375ab47f0edb 100644
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -6,6 +6,7 @@
#include <stdint.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
+#include "kselftest.h"
#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
@@ -16,16 +17,21 @@ int main(void)
static int tbi_enabled = 0;
unsigned long tag = 0;
struct utsname *ptr;
- int err;
+
+ ksft_print_header();
+ ksft_set_plan(1);
if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0)
tbi_enabled = 1;
ptr = (struct utsname *)malloc(sizeof(*ptr));
+ if (!ptr)
+ ksft_exit_fail_perror("Failed to allocate utsname buffer");
+
if (tbi_enabled)
tag = 0x42;
ptr = (struct utsname *)SET_TAG(ptr, tag);
- err = uname(ptr);
+ ksft_test_result(!uname(ptr), "Syscall successful with tagged address\n");
free(ptr);
- return err;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c0c48fdb9ac1..19c1638e312a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,37 +1,47 @@
# SPDX-License-Identifier: GPL-2.0-only
+bpftool
+bpf-helpers*
+bpf-syscall*
test_verifier
test_maps
test_lru_map
-test_lpm_map
test_tag
FEATURE-DUMP.libbpf
+FEATURE-DUMP.selftests
fixdep
-test_dev_cgroup
-/test_progs*
+/test_progs
+/test_progs-no_alu32
+/test_progs-bpf_gcc
+/test_progs-cpuv4
test_verifier_log
feature
-test_sock
-test_sock_addr
urandom_read
test_sockmap
test_lirc_mode2_user
-get_cgroup_id_user
-test_skb_cgroup_id_user
-test_cgroup_storage
-test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
-test_tcp_check_syncookie_user
-test_sysctl
xdping
test_cpp
+test_progs_verification_cert
+*.d
+*.subskel.h
*.skel.h
+*.lskel.h
/no_alu32
/bpf_gcc
+/cpuv4
+/host-tools
/tools
-/runqslower
/bench
+/veristat
+/sign-file
+/uprobe_multi
*.ko
-xdpxceiver
+*.tmp
+xskxceiver
+xdp_redirect_multi
+xdp_synproxy
+xdp_hw_metadata
+xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
new file mode 100644
index 000000000000..f748f2c33b22
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -0,0 +1,7 @@
+# TEMPORARY
+# Alphabetical order
+get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
+stacktrace_build_id
+stacktrace_build_id_nmi
+task_fd_query_rawtp
+varlen
diff --git a/tools/testing/selftests/bpf/DENYLIST.riscv64 b/tools/testing/selftests/bpf/DENYLIST.riscv64
new file mode 100644
index 000000000000..4fc4dfdde293
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.riscv64
@@ -0,0 +1,3 @@
+# riscv64 deny list for BPF CI and local vmtest
+exceptions # JIT does not support exceptions
+tailcalls/tailcall_bpf2bpf* # JIT does not support mixing bpf2bpf and tailcalls
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
new file mode 100644
index 000000000000..a17baf8c6fd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -0,0 +1,4 @@
+# TEMPORARY
+# Alphabetical order
+get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
+stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 044bfdcf5b74..b7030a6e2e76 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../../scripts/Kbuild.include
+include ../../../build/Build.include
include ../../../scripts/Makefile.arch
include ../../../scripts/Makefile.include
@@ -10,10 +10,16 @@ TOOLSDIR := $(abspath ../../..)
LIBDIR := $(TOOLSDIR)/lib
BPFDIR := $(LIBDIR)/bpf
TOOLSINCDIR := $(TOOLSDIR)/include
+TOOLSARCHINCDIR := $(TOOLSDIR)/arch/$(SRCARCH)/include
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
APIDIR := $(TOOLSINCDIR)/uapi
+ifneq ($(O),)
+GENDIR := $(O)/include/generated
+else
GENDIR := $(abspath ../../../../include/generated)
+endif
GENHDR := $(GENDIR)/autoconf.h
+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
ifneq ($(wildcard $(GENHDR)),)
GENFLAGS := -DHAVE_GENHDR
@@ -21,79 +27,116 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+SAN_LDFLAGS ?= $(SAN_CFLAGS)
+RELEASE ?=
+OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
+
+LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
+LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+
+SKIP_DOCS ?=
+SKIP_LLVM ?=
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \
+ -Wall -Werror -fno-omit-frame-pointer \
+ -Wno-unused-but-set-variable \
+ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
- -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
- -Dbpf_prog_load=bpf_prog_test_load \
- -Dbpf_load_program=bpf_test_load_program
-LDLIBS += -lcap -lelf -lz -lrt -lpthread
+ -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
+LDFLAGS += $(SAN_LDFLAGS)
+LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
+
+PCAP_CFLAGS := $(shell $(PKG_CONFIG) --cflags libpcap 2>/dev/null && echo "-DTRAFFIC_MONITOR=1")
+PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null)
+LDLIBS += $(PCAP_LIBS)
+CFLAGS += $(PCAP_CFLAGS)
+
+# Some utility functions use LLVM libraries
+jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS)
+
+ifneq ($(LLVM),)
+# Silence some warnings when compiled with clang
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
+# Check whether bpf cpu=v4 is supported or not by clang
+ifneq ($(shell $(CLANG) --target=bpf -mcpu=help 2>&1 | grep 'v4'),)
+CLANG_CPUV4 := 1
+endif
# Order correspond to 'make run_tests' order
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_verifier_log test_dev_cgroup \
- test_sock test_sockmap get_cgroup_id_user \
- test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sysctl \
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
+ test_sockmap \
+ test_tcpnotify_user \
test_progs-no_alu32
+TEST_INST_SUBDIRS := no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
+TEST_INST_SUBDIRS += bpf_gcc
+
+# The following tests contain C code that, although technically legal,
+# triggers GCC warnings that cannot be disabled: declaration of
+# anonymous struct types in function parameter lists.
+progs/btf_dump_test_case_bitfields.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error
+
endif
-TEST_GEN_FILES = test_lwt_ip_encap.o \
- test_tc_edt.o
-TEST_FILES = xsk_prereqs.sh \
- $(wildcard progs/btf_dump_test_case_*.c)
+ifneq ($(CLANG_CPUV4),)
+TEST_GEN_PROGS += test_progs-cpuv4
+TEST_INST_SUBDIRS += cpuv4
+endif
+
+TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
- test_xdp_redirect.sh \
- test_xdp_meta.sh \
- test_xdp_veth.sh \
- test_offload.py \
- test_sock_addr.sh \
- test_tunnel.sh \
- test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_skb_cgroup_id.sh \
- test_flow_dissector.sh \
- test_xdp_vlan_mode_generic.sh \
- test_xdp_vlan_mode_native.sh \
- test_lwt_ip_encap.sh \
- test_tcp_check_syncookie.sh \
- test_tc_tunnel.sh \
- test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
+ test_bpftool_map.sh \
test_bpftool_metadata.sh \
- test_xsk.sh
+ test_doc_build.sh \
+ test_xsk.sh \
+ test_xdp_features.sh
+
+TEST_PROGS_EXTENDED := \
+ ima_setup.sh verify_sig_setup.sh \
+ test_bpftool.py
-TEST_PROGS_EXTENDED := with_addr.sh \
- with_tunnels.sh \
- test_xdp_vlan.sh
+TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
+TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
- flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver
-
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
-
-# Emit succinct information message describing current building step
-# $1 - generic step name (e.g., CC, LINK, etc);
-# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
-# $3 - target (assumed to be file); only file name will be emitted;
-# $4 - optional extra arg, emitted as-is, if provided.
-ifeq ($(V),1)
-Q =
-msg =
-else
-Q = @
-msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
-MAKEFLAGS += --no-print-directory
+TEST_GEN_PROGS_EXTENDED = \
+ bench \
+ flow_dissector_load \
+ test_cpp \
+ test_lirc_mode2_user \
+ veristat \
+ xdp_features \
+ xdp_hw_metadata \
+ xdp_synproxy \
+ xdping \
+ xskxceiver
+
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
+
+ifneq ($(V),1)
submake_extras := feature_display=0
endif
@@ -101,12 +144,53 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
- $(Q)$(MAKE) -C bpf_testmod clean
+ $(Q)$(RM) -r $(TEST_GEN_PROGS)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED)
+ $(Q)$(RM) -r $(TEST_GEN_FILES)
+ $(Q)$(RM) -r $(TEST_KMODS)
+ $(Q)$(RM) -r $(EXTRA_CLEAN)
+ $(Q)$(MAKE) -C test_kmods clean
+ $(Q)$(MAKE) docs-clean
endef
include ../lib.mk
+NON_CHECK_FEAT_TARGETS := clean docs-clean
+CHECK_FEAT := $(filter-out $(NON_CHECK_FEAT_TARGETS),$(or $(MAKECMDGOALS), "none"))
+ifneq ($(CHECK_FEAT),)
+FEATURE_USER := .selftests
+FEATURE_TESTS := llvm
+FEATURE_DISPLAY := $(FEATURE_TESTS)
+
+# Makefile.feature expects OUTPUT to end with a slash
+ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1)
+$(let OUTPUT,$(OUTPUT)/,\
+ $(eval include ../../../build/Makefile.feature))
+else
+override OUTPUT := $(OUTPUT)/
+$(eval include ../../../build/Makefile.feature)
+override OUTPUT := $(patsubst %/,%,$(OUTPUT))
+endif
+endif
+
+ifneq ($(SKIP_LLVM),1)
+ifeq ($(feature-llvm),1)
+ LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
+ LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
+ # both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
+ LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
+ # Prefer linking statically if it's available, otherwise fallback to shared
+ ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
+ LLVM_LDLIBS += -lstdc++
+ else
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ endif
+ LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
+endif
+endif
+
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
@@ -114,13 +198,14 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
ifneq ($(CROSS_COMPILE),)
HOST_BUILD_DIR := $(BUILD_DIR)/host
HOST_SCRATCH_DIR := $(OUTPUT)/host-tools
+HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include
else
HOST_BUILD_DIR := $(BUILD_DIR)
HOST_SCRATCH_DIR := $(SCRATCH_DIR)
+HOST_INCLUDE_DIR := $(INCLUDE_DIR)
endif
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
-
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
@@ -131,19 +216,17 @@ ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif
-# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# Define simple and short `make test_progs`, `make test_maps`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
-$(notdir $(TEST_GEN_PROGS) \
- $(TEST_PROGS) \
- $(TEST_PROGS_EXTENDED) \
- $(TEST_GEN_PROGS_EXTENDED) \
- $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+$(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \
+ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
# sort removes libbpf duplicates when not cross-building
-MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
- $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \
+MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
+ $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \
+ $(HOST_BUILD_DIR)/resolve_btfids \
$(INCLUDE_DIR))
$(MAKE_DIRS):
$(call msg,MKDIR,,$@)
@@ -157,74 +240,156 @@ $(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
-$(OUTPUT)/urandom_read: urandom_read.c
- $(call msg,BINARY,,$@)
- $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv))
+LLD := lld
+else
+LLD := $(shell command -v $(LD))
+endif
-$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
+# Filter out -static for liburandom_read.so and its dependent targets so that static builds
+# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map
+ $(call msg,LIB,,$@)
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
+ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \
+ -Wno-unused-command-line-argument \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,--version-script=liburandom_read.map \
+ -fPIC -shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
+ $(call msg,BINARY,,$@)
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+ -Wno-unused-command-line-argument \
+ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,-rpath=. -o $@
+
+$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
+ $(call msg,SIGN-FILE,,$@)
+ $(Q)$(CC) $(shell $(PKG_CONFIG) --cflags libcrypto 2> /dev/null) \
+ $< -o $@ \
+ $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
+
+# This should really be a grouped target, but make versions before 4.3 don't
+# support that for regular rules. However, pattern matching rules are implicitly
+# treated as grouped even with older versions of make, so as a workaround, the
+# subst() turns the rule into a pattern matching rule
+$(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch])
+ $(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation
+ $(Q)$(MAKE) $(submake_extras) -C test_kmods \
+ RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \
+ EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
+
+$(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS))
$(call msg,MOD,,$@)
- $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
- $(Q)$(MAKE) $(submake_extras) -C bpf_testmod
- $(Q)cp bpf_testmod/bpf_testmod.ko $@
+ $(Q)cp test_kmods/$(@F) $@
-$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
- $(call msg,CC,,$@)
- $(Q)$(CC) -c $(CFLAGS) -o $@ $<
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
+ifneq ($(CROSS_COMPILE),)
+CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+TRUNNER_BPFTOOL := $(CROSS_BPFTOOL)
+USE_BOOTSTRAP := ""
+else
+TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
+USE_BOOTSTRAP := "bootstrap/"
+endif
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
- $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
- OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
- cp $(SCRATCH_DIR)/runqslower $@
-
-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-
-$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
-$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_sock: cgroup_helpers.c
-$(OUTPUT)/test_sock_addr: cgroup_helpers.c
-$(OUTPUT)/test_sockmap: cgroup_helpers.c
-$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
-$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
-$(OUTPUT)/test_sock_fields: cgroup_helpers.c
-$(OUTPUT)/test_sysctl: cgroup_helpers.c
+TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
+
+TESTING_HELPERS := $(OUTPUT)/testing_helpers.o
+CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o
+UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o
+TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
+JSON_WRITER := $(OUTPUT)/json_writer.o
+CAP_HELPERS := $(OUTPUT)/cap_helpers.o
+NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
+
+$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
+$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tag: $(TESTING_HELPERS)
+$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
+$(OUTPUT)/xdping: $(TESTING_HELPERS)
+$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
+$(OUTPUT)/test_maps: $(TESTING_HELPERS)
+$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS)
+$(OUTPUT)/xsk.o: $(BPFOBJ)
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- CC=$(HOSTCC) LD=$(HOSTLD) \
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
- prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
- $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
- $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
- -C $(BPFTOOLDIR)/Documentation \
- OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
- prefix= DESTDIR=$(SCRATCH_DIR)/ install
+ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
+
+ifneq ($(CROSS_COMPILE),)
+$(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(BPFOBJ) | $(BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \
+ OUTPUT=$(BUILD_DIR)/bpftool/ \
+ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
+endif
+
+ifneq ($(SKIP_DOCS),1)
+all: docs
+endif
+
+docs:
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+ $(Q)$(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- ../../../include/uapi/linux/bpf.h \
- | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
-$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- ../../../include/uapi/linux/bpf.h \
- | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+ ARCH= CROSS_COMPILE= \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \
+ EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ CC="$(HOSTCC)" LD="$(HOSTLD)" \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+# vmlinux.h is first dumped to a temporary file and then compared to
+# the previous version. This helps to avoid unnecessary re-builds of
+# $(TRUNNER_BPF_OBJS)
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
- $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $(INCLUDE_DIR)/.vmlinux.h.tmp
+ $(Q)cmp -s $(INCLUDE_DIR)/.vmlinux.h.tmp $@ || mv $(INCLUDE_DIR)/.vmlinux.h.tmp $@
else
$(call msg,CP,,$@)
$(Q)cp "$(VMLINUX_H)" $@
@@ -238,80 +403,145 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
$(TOOLSDIR)/lib/ctype.c \
$(TOOLSDIR)/lib/str_error_r.c
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
- CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \
+ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \
+ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
define get_sys_includes
-$(shell $(1) -v -E - </dev/null 2>&1 \
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep -E 'MIPS(EL|EB)|_MIPS_SZ(PTR|LONG) |_MIPS_SIM |_ABI(O32|N32|64) ' | awk '{printf("-D%s=%s ", $$2, $$3)}')
endef
# Determine target endianness.
-IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+IS_LITTLE_ENDIAN := $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
-MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+MENDIAN:=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+BPF_TARGET_ENDIAN:=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb)
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
- -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(abspath $(OUTPUT)/../usr/include) \
+ -std=gnu11 \
+ -fno-strict-aliasing \
+ -Wno-compare-distinct-pointer-types \
+ -Wno-initializer-overrides \
+ #
+# TODO: enable me -Wsign-compare
-CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
+$(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h
# Build BPF object using Clang
# $1 - input .c file
# $2 - output .o file
# $3 - CFLAGS
+# $4 - binary name
define CLANG_BPF_BUILD_RULE
- $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
+ $(call msg,CLNG-BPF,$4,$2)
+ $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
- $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
+ $(call msg,CLNG-BPF,$4,$2)
+ $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v2 -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4
+define CLANG_CPUV4_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$4,$2)
+ $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v4 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
- $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
+ $(call msg,GCC-BPF,$4,$2)
+ $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
+ linked_vars.skel.h linked_maps.skel.h \
+ test_subskeleton.skel.h test_subskeleton_lib.skel.h \
+ test_usdt.skel.h
+
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
+ core_kern.c core_kern_overflow.c test_ringbuf.c \
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
+ test_ringbuf_overwrite.c
+
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
+
+# Generate both light skeleton and libbpf skeleton for these
+LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
+ kfunc_call_test_subprog.c
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
+
+test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
+linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
+linked_vars.skel.h-deps := linked_vars1.bpf.o linked_vars2.bpf.o
+linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
+test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
+xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
+xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
+xdp_features.skel.h-deps := xdp_features.bpf.o
+
+LINKED_BPF_OBJS := $(foreach skel,$(LINKED_SKELS),$($(skel)-deps))
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(LINKED_BPF_OBJS))
+
+HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
+ $(addprefix $(BPFDIR)/, bpf_core_read.h \
+ bpf_endian.h \
+ bpf_helpers.h \
+ bpf_tracing.h)
+
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
TRUNNER_BINARY := $1$(if $2,-)$2
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
$$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
+ $$(filter %.c,$(TRUNNER_LIB_SOURCES)))
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
-TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.bpf.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
- $$(filter-out $(SKEL_BLACKLIST), \
+ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
+TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -322,7 +552,7 @@ endef
# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER_RULES
ifeq ($($(TRUNNER_OUTPUT)-dir),)
@@ -336,19 +566,68 @@ endif
# input/output directory combination
ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),)
$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
-$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
+$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+ $(HEADERS_FOR_BPF_OBJS) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
- $(TRUNNER_BPF_CFLAGS))
+ $(TRUNNER_BPF_CFLAGS) \
+ $$($$<-CFLAGS) \
+ $$($$<-$2-CFLAGS),$(TRUNNER_BINARY))
-$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
- $(TRUNNER_OUTPUT)/%.o \
- | $(BPFTOOL) $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@
+ $(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
+$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
+
+# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
+.SECONDEXPANSION:
+$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_OUTPUT)/%: $$$$(%-deps) $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
+ $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
+ $(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
+
+# When the compiler generates a %.d file, only skel basenames (not
+# full paths) are specified as prerequisites for corresponding %.o
+# file. vpath directives below instruct make to search for skel files
+# in TRUNNER_OUTPUT, if they are not present in the working directory.
+vpath %.skel.h $(TRUNNER_OUTPUT)
+vpath %.lskel.h $(TRUNNER_OUTPUT)
+vpath %.subskel.h $(TRUNNER_OUTPUT)
+
endif
# ensure we set up tests.h header generation rule just once
@@ -356,10 +635,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
$(TRUNNER_TESTS_DIR)-tests-hdr := y
$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
$$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@)
- $$(shell ( cd $(TRUNNER_TESTS_DIR); \
- echo '/* Generated header, do not edit */'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
+ $$(shell (echo '/* Generated header, do not edit */'; \
+ sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \
+ $(TRUNNER_TESTS_DIR)/*.c | sort ; \
) > $$@)
endif
@@ -367,50 +645,114 @@ endif
# Note: we cd into output directory to ensure embedded BPF object is found
$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_TESTS_DIR)/%.c \
- $(TRUNNER_EXTRA_HDRS) \
- $(TRUNNER_BPF_OBJS) \
- $(TRUNNER_BPF_SKELS) \
- $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ | $(TRUNNER_OUTPUT)/%.test.d
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -MMD -MT $$@ -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+
+$(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
+ $(TRUNNER_TESTS_DIR)/%.c \
+ $(TRUNNER_EXTRA_HDRS) \
+ $(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_LSKELS_SIGNED) \
+ $(TRUNNER_BPF_SKELS_LINKED) \
+ $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+
+ifeq ($(filter clean docs-clean,$(MAKECMDGOALS)),)
+include $(wildcard $(TRUNNER_TEST_OBJS:.o=.d))
+endif
+
+# add per extra obj CFGLAGS definitions
+$(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
+ $(eval $(TRUNNER_OUTPUT)/$(N).o: CFLAGS += $($(N).c-CFLAGS)))
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
$(TRUNNER_EXTRA_HDRS) \
+ $(VERIFY_SIG_HDR) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
-# only copy extra resources if in flavored build
+$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
+ $$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
+# some X.test.o files have runtime dependencies on Y.bpf.o files
+$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
+
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(TRUNNER_LIB_OBJS) \
$(RESOLVE_BTFIDS) \
+ $(TRUNNER_BPFTOOL) \
+ $(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
- $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
+ $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
+ $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
+ $(OUTPUT)/$(if $2,$2/)bpftool
endef
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+ $(Q)mkdir -p $(BUILD_DIR)
+ $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+ $(Q)ln -fs $< test_progs_verification_cert && \
+ xxd -i test_progs_verification_cert > $@
+
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
-TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
- network_helpers.c testing_helpers.c \
- btf_helpers.c flow_dissector_load.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
- ima_setup.sh \
- $(wildcard progs/btf_dump_test_case_*.c)
+TRUNNER_EXTRA_SOURCES := test_progs.c \
+ cgroup_helpers.c \
+ trace_helpers.c \
+ network_helpers.c \
+ testing_helpers.c \
+ btf_helpers.c \
+ cap_helpers.c \
+ unpriv_helpers.c \
+ netlink_helpers.c \
+ jit_disasm_helpers.c \
+ io_helpers.c \
+ test_loader.c \
+ xsk.c \
+ disasm.c \
+ disasm_helpers.c \
+ json_writer.c \
+ $(VERIFY_SIG_HDR) \
+ flow_dissector_load.h \
+ ip_check_defrag_frags.h
+TRUNNER_LIB_SOURCES := find_bit.c
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
+ $(OUTPUT)/liburandom_read.so \
+ $(OUTPUT)/xdp_synproxy \
+ $(OUTPUT)/sign-file \
+ $(OUTPUT)/uprobe_multi \
+ $(TEST_KMOD_TARGETS) \
+ ima_setup.sh \
+ $(VERIFY_SIG_SETUP) \
+ $(wildcard progs/btf_dump_test_case_*.c) \
+ $(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
# Define test_progs-no_alu32 test runner.
@@ -418,10 +760,17 @@ TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
+# Define test_progs-cpuv4 test runner.
+ifneq ($(CLANG_CPUV4),)
+TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4))
+endif
+
# Define test_progs BPF-GCC-flavored test runner.
ifneq ($(BPF_GCC),)
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,)
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
endif
@@ -429,6 +778,7 @@ endif
TRUNNER_TESTS_DIR := map_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_LIB_SOURCES :=
TRUNNER_EXTRA_FILES :=
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
TRUNNER_BPF_CFLAGS :=
@@ -449,30 +799,110 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+# Include find_bit.c to compile xskxceiver.
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xdp_hw_metadata.skel.h | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp_features.skel.h | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
# Make sure we are able to include and link libbpf against c++.
+CXXFLAGS += $(CFLAGS)
+CXXFLAGS := $(subst -D_GNU_SOURCE=,,$(CXXFLAGS))
+CXXFLAGS := $(subst -std=gnu11,-std=gnu++11,$(CXXFLAGS))
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CXXFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
-$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
$(call msg,CC,,$@)
- $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/perfbuf_bench.skel.h
-$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
+$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
+$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
+$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
+$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
+$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
+$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
+$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
-$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+$(OUTPUT)/bench: $(OUTPUT)/bench.o \
+ $(TESTING_HELPERS) \
+ $(TRACE_HELPERS) \
+ $(CGROUP_HELPERS) \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
$(OUTPUT)/bench_trigger.o \
- $(OUTPUT)/bench_ringbufs.o
+ $(OUTPUT)/bench_ringbufs.o \
+ $(OUTPUT)/bench_bloom_filter_map.o \
+ $(OUTPUT)/bench_bpf_loop.o \
+ $(OUTPUT)/bench_strncmp.o \
+ $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+ $(OUTPUT)/bench_local_storage.o \
+ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
+ $(OUTPUT)/bench_bpf_hashmap_lookup.o \
+ $(OUTPUT)/bench_local_storage_create.o \
+ $(OUTPUT)/bench_htab_mem.o \
+ $(OUTPUT)/bench_bpf_crypto.o \
+ $(OUTPUT)/bench_sockmap.o \
+ $(OUTPUT)/bench_lpm_trie_map.o \
+ #
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
+# This works around GCC warning about snprintf truncating strings like:
+#
+# char a[PATH_MAX], b[PATH_MAX];
+# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation
+$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation
+$(OUTPUT)/veristat.o: $(BPFOBJ)
+$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+
+# Linking uprobe_multi can fail due to relocation overflows on mips.
+$(OUTPUT)/uprobe_multi: CFLAGS += $(if $(filter mips, $(ARCH)),-mxgot)
+$(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) -Wl,-T,uprobe_multi.ld -O0 $(LDFLAGS) \
+ $(filter-out %.ld,$^) $(LDLIBS) -o $@
+
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
- feature \
- $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ feature bpftool $(TEST_KMOD_TARGETS) \
+ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
+ no_alu32 cpuv4 bpf_gcc \
+ liburandom_read.so) \
+ $(OUTPUT)/FEATURE-DUMP.selftests \
+ test_progs_verification_cert
+
+.PHONY: docs docs-clean
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ @for DIR in $(TEST_INST_SUBDIRS); do \
+ mkdir -p $(INSTALL_PATH)/$$DIR; \
+ rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
+ done
+endef
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..f7f9e7088bb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
+man7dir = $(mandir)/man7
+
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers syscall
+
+docs: $(DOCTARGETS)
+syscall: man2
+helpers: man7
+man2: $(DOC_MAN2)
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+ $$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+ --filename $$< > $$@
+
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+ $$(error "rst2man not found, but required to generate man pages")
+endif
+ $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+ $$(QUIET_GEN)mv $$@.tmp $$@
+
+docs-clean-$1:
+ $$(call QUIET_CLEAN, eBPF_$1-manpage)
+ $(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
+
+docs-install-$1: docs
+ $$(call QUIET_INSTALL, eBPF_$1-manpage)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+ $(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
+
+docs-uninstall-$1:
+ $$(call QUIET_UNINST, eBPF_$1-manpage)
+ $(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+ $(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
+
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..776fbe3cb8f9 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -6,30 +6,116 @@ General instructions on running selftests can be found in
__ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests
+=============
+BPF CI System
+=============
+
+BPF employs a continuous integration (CI) system to check patch submission in an
+automated fashion. The system runs selftests for each patch in a series. Results
+are propagated to patchwork, where failures are highlighted similar to
+violations of other checks (such as additional warnings being emitted or a
+``scripts/checkpatch.pl`` reported deficiency):
+
+ https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
+
+The CI system executes tests on multiple architectures. It uses a kernel
+configuration derived from both the generic and architecture specific config
+file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and
+``config.x86_64``).
+
+Denylisting Tests
+=================
+
+It is possible for some architectures to not have support for all BPF features.
+In such a case tests in CI may fail. An example of such a shortcoming is BPF
+trampoline support on IBM's s390x architecture. For cases like this, an in-tree
+deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can
+be used to prevent the test from running on such an architecture.
+
+In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is
+honored on every architecture running tests.
+
+These files are organized in three columns. The first column lists the test in
+question. This can be the name of a test suite or of an individual test. The
+remaining two columns provide additional meta data that helps identify and
+classify the entry: column two is a copy and paste of the error being reported
+when running the test in the setting in question. The third column, if
+available, summarizes the underlying problem. A value of ``trampoline``, for
+example, indicates that lack of trampoline support is causing the test to fail.
+This last entry helps identify tests that can be re-enabled once such support is
+added.
+
=========================
Running Selftests in a VM
=========================
It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``.
The script tries to ensure that the tests are run with the same environment as they
-would be run post-submit in the CI used by the Maintainers.
+would be run post-submit in the CI used by the Maintainers, with the exception
+that deny lists are not automatically honored.
+
+This script uses the in-tree kernel configuration and downloads a VM userspace
+image from the system used by the CI. It builds the kernel (without overwriting
+your existing Kconfig), recompiles the bpf selftests, runs them (by default
+``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by
+default in ``~/.bpf_selftests``).
-This script downloads a suitable Kconfig and VM userspace image from the system used by
-the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the
-bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
-saves the resulting output (by default in ``~/.bpf_selftests``).
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
-For more information on about using the script, run:
+For more information about using the script, run:
.. code-block:: console
$ tools/testing/selftests/bpf/vmtest.sh -h
+In case of linker errors when running selftests, try using static linking:
+
+.. code-block:: console
+
+ $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh
+
+.. note:: Some distros may not support static linking.
+
.. note:: The script uses pahole and clang based on host environment setting.
If you want to change pahole and llvm, you can change `PATH` environment
variable in the beginning of script.
-.. note:: The script currently only supports x86_64.
+Running vmtest on RV64
+======================
+To speed up testing and avoid various dependency issues, it is recommended to
+run vmtest in a Docker container. Before running vmtest, we need to prepare
+Docker container and local rootfs image. The overall steps are as follows:
+
+1. Create Docker container as shown in link [0].
+
+2. Use mkrootfs_debian.sh script [1] to build local rootfs image:
+
+.. code-block:: console
+
+ $ sudo ./mkrootfs_debian.sh --arch riscv64 --distro noble
+
+3. Start Docker container [0] and run vmtest in the container:
+
+.. code-block:: console
+
+ $ PLATFORM=riscv64 CROSS_COMPILE=riscv64-linux-gnu- \
+ tools/testing/selftests/bpf/vmtest.sh \
+ -l <path of local rootfs image> -- \
+ ./test_progs -d \
+ \"$(cat tools/testing/selftests/bpf/DENYLIST.riscv64 \
+ | cut -d'#' -f1 \
+ | sed -e 's/^[[:space:]]*//' \
+ -e 's/[[:space:]]*$//' \
+ | tr -s '\n' ',' \
+ )\"
+
+Link: https://github.com/pulehui/riscv-bpf-vmtest.git [0]
+Link: https://github.com/libbpf/ci/blob/main/rootfs/mkrootfs_debian.sh [1]
Additional information about selftest failures are
documented here.
@@ -59,7 +145,7 @@ the insn 20 undoes map_value addition. It is currently impossible for the
verifier to understand such speculative pointer arithmetic.
Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
-__ https://reviews.llvm.org/D85570
+__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6
The corresponding C code
@@ -109,7 +195,46 @@ This is due to a llvm BPF backend bug. `The fix`__
has been pushed to llvm 10.x release branch and will be
available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
-__ https://reviews.llvm.org/D78466
+__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0
+
+bpf_verif_scale/loop6.bpf.o test failure with Clang 12
+======================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+ * ``bpf_verif_scale/loop6.bpf.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+ R1 type=ctx expected=fp
+ The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+ 15: bc 51 00 00 00 00 00 00 w1 = w5
+ 16: 04 01 00 00 ff ff ff ff w1 += -1
+ 17: 67 05 00 00 20 00 00 00 r5 <<= 32
+ 18: 77 05 00 00 20 00 00 00 r5 >>= 32
+ 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+ 20: b7 05 00 00 06 00 00 00 r5 = 6
+ 00000000000000a8 <LBB0_4>:
+ 21: b7 02 00 00 00 00 00 00 r2 = 0
+ 22: b7 01 00 00 00 00 00 00 r1 = 0
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+ 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229
BPF CO-RE-based tests and Clang version
=======================================
@@ -126,8 +251,89 @@ failures:
- __builtin_btf_type_id() [0_, 1_, 2_];
- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
-.. _0: https://reviews.llvm.org/D74572
-.. _1: https://reviews.llvm.org/D74668
-.. _2: https://reviews.llvm.org/D85174
-.. _3: https://reviews.llvm.org/D83878
-.. _4: https://reviews.llvm.org/D83242
+.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9
+.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f
+.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
+.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c
+.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF. It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+ libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c
+
+btf_tag test and Clang version
+==============================
+
+The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
+btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
+The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires
+pahole version ``1.23``.
+
+Without them, the btf_tag selftest will be skipped and you will observe:
+
+.. code-block:: console
+
+ #<test_num> btf_tag:SKIP
+
+.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6
+.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_ made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+ libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.bpf.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
+
+Clang dependencies for the u32 spill test (xdpwall)
+===================================================
+The xdpwall selftest requires a change in `Clang 14`__.
+
+Without it, the xdpwall selftest will fail and the error message
+from running test_progs will look like:
+
+.. code-block:: console
+
+ test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007
+
+__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5
diff --git a/tools/testing/selftests/bpf/autoconf_helper.h b/tools/testing/selftests/bpf/autoconf_helper.h
new file mode 100644
index 000000000000..5b243b9cdf8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/autoconf_helper.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 332ed2f7b402..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -8,16 +8,17 @@
#include <fcntl.h>
#include <pthread.h>
#include <sys/sysinfo.h>
-#include <sys/resource.h>
#include <signal.h>
#include "bench.h"
+#include "bpf_util.h"
#include "testing_helpers.h"
struct env env = {
.warmup_sec = 1,
.duration_sec = 5,
.affinity = false,
- .consumer_cnt = 1,
+ .quiet = false,
+ .consumer_cnt = 0,
.producer_cnt = 1,
};
@@ -29,25 +30,39 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return vfprintf(stderr, format, args);
}
-static int bump_memlock_rlimit(void)
+void setup_libbpf(void)
{
- struct rlimit rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+}
+
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ long total = res->false_hits + res->hits + res->drops;
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
- return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n",
+ res->false_hits, total, ((float)res->false_hits / total) * 100);
}
-void setup_libbpf()
+void false_hits_report_final(struct bench_res res[], int res_cnt)
{
- int err;
+ long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0;
+ int i;
- libbpf_set_print(libbpf_print_fn);
+ for (i = 0; i < res_cnt; i++) {
+ total_hits += res[i].hits;
+ total_false_hits += res[i].false_hits;
+ total_drops += res[i].drops;
+ }
+ total_ops = total_hits + total_false_hits + total_drops;
- err = bump_memlock_rlimit();
- if (err)
- fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+ printf("Summary: %ld false hits of %ld total operations. ",
+ total_false_hits, total_ops);
+ printf("Percentage = %2.2f %%\n",
+ ((float)total_false_hits / total_ops) * 100);
}
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
@@ -62,20 +77,59 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
printf("Iter %3d (%7.3lfus): ",
iter, (delta_ns - 1000000000) / 1000.0);
- printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
- hits_per_sec, hits_per_prod, drops_per_sec);
+ printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n",
+ hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
+}
+
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
}
void hits_drops_report_final(struct bench_res res[], int res_cnt)
{
int i;
- double hits_mean = 0.0, drops_mean = 0.0;
- double hits_stddev = 0.0, drops_stddev = 0.0;
+ double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0;
+ double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0;
+ double total_ops;
for (i = 0; i < res_cnt; i++) {
hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
}
+ total_ops_mean = hits_mean + drops_mean;
if (res_cnt > 1) {
for (i = 0; i < res_cnt; i++) {
@@ -85,14 +139,101 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
(drops_mean - res[i].drops / 1000000.0) /
(res_cnt - 1.0);
+ total_ops = res[i].hits + res[i].drops;
+ total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) *
+ (total_ops_mean - total_ops / 1000000.0) /
+ (res_cnt - 1.0);
}
hits_stddev = sqrt(hits_stddev);
drops_stddev = sqrt(drops_stddev);
+ total_ops_stddev = sqrt(total_ops_stddev);
}
printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
hits_mean, hits_stddev, hits_mean / env.producer_cnt);
- printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+ printf("drops %8.3lf \u00B1 %5.3lfM/s, ",
drops_mean, drops_stddev);
+ printf("total operations %8.3lf \u00B1 %5.3lfM/s\n",
+ total_ops_mean, total_ops_stddev);
+}
+
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
+}
+
+void ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++)
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
+ hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+ printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
+}
+
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double important_hits_per_sec, hits_per_sec;
+ double delta_sec = delta_ns / 1000000000.0;
+
+ hits_per_sec = res->hits / 1000000.0 / delta_sec;
+ important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s ", hits_per_sec);
+ printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+ double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ important_hits_stddev +=
+ (important_hits_mean - res[i].important_hits / 1000000.0) *
+ (important_hits_mean - res[i].important_hits / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ important_hits_stddev = sqrt(important_hits_stddev);
+ }
+ printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+ hits_mean, hits_stddev);
+ printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+ printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+ important_hits_mean, important_hits_stddev);
}
const char *argp_program_version = "benchmark";
@@ -123,6 +264,7 @@ static const struct argp_option opts[] = {
{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
{ "verbose", 'v', NULL, 0, "Verbose debug output"},
{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+ { "quiet", 'q', NULL, 0, "Be more quiet"},
{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
"Set of CPUs for producer threads; implies --affinity"},
{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -131,16 +273,42 @@ static const struct argp_option opts[] = {
};
extern struct argp bench_ringbufs_argp;
+extern struct argp bench_bloom_map_argp;
+extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
+extern struct argp bench_strncmp_argp;
+extern struct argp bench_hashmap_lookup_argp;
+extern struct argp bench_local_storage_create_argp;
+extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;
+extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+ { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
+ { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+ { &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
+ { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+ { &bench_local_storage_rcu_tasks_trace_argp, 0,
+ "local_storage RCU Tasks Trace slowdown benchmark", 0 },
+ { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
+ { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
+ { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+ { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
+ { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+ { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+ { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
{},
};
+/* Make pos_args global, so that we can run argp_parse twice, if necessary */
+static int pos_args;
+
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
- static int pos_args;
-
switch (key) {
case 'v':
env.verbose = true;
@@ -164,14 +332,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'p':
env.producer_cnt = strtol(arg, NULL, 10);
- if (env.producer_cnt <= 0) {
+ if (env.producer_cnt < 0) {
fprintf(stderr, "Invalid producer count: %s\n", arg);
argp_usage(state);
}
break;
case 'c':
env.consumer_cnt = strtol(arg, NULL, 10);
- if (env.consumer_cnt <= 0) {
+ if (env.consumer_cnt < 0) {
fprintf(stderr, "Invalid consumer count: %s\n", arg);
argp_usage(state);
}
@@ -179,6 +347,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'a':
env.affinity = true;
break;
+ case 'q':
+ env.quiet = true;
+ break;
case ARG_PROD_AFFINITY_SET:
env.affinity = true;
if (parse_num_list(arg, &env.prod_cpus.cpus,
@@ -209,7 +380,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return 0;
}
-static void parse_cmdline_args(int argc, char **argv)
+static void parse_cmdline_args_init(int argc, char **argv)
{
static const struct argp argp = {
.options = opts,
@@ -219,9 +390,25 @@ static void parse_cmdline_args(int argc, char **argv)
};
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
exit(1);
- if (!env.list && !env.bench_name) {
- argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
- exit(1);
+}
+
+static void parse_cmdline_args_final(int argc, char **argv)
+{
+ struct argp_child bench_parsers[2] = {};
+ const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ .children = bench_parsers,
+ };
+
+ /* Parse arguments the second time with the correct set of parsers */
+ if (bench->argp) {
+ bench_parsers[0].argp = bench->argp;
+ bench_parsers[0].header = bench->name;
+ pos_args = 0;
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ exit(1);
}
}
@@ -265,12 +452,14 @@ static void setup_timer()
static void set_thread_affinity(pthread_t thread, int cpu)
{
cpu_set_t cpuset;
+ int err;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
- if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+ err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+ if (err) {
fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
- cpu, errno);
+ cpu, -err);
exit(1);
}
}
@@ -291,7 +480,7 @@ static int next_cpu(struct cpu_set *cpu_set)
exit(1);
}
- return cpu_set->next_cpu++;
+ return cpu_set->next_cpu++ % env.nr_cpus;
}
static struct bench_state {
@@ -311,17 +500,75 @@ extern const struct bench bench_rename_kretprobe;
extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-extern const struct bench bench_trig_base;
-extern const struct bench bench_trig_tp;
-extern const struct bench bench_trig_rawtp;
+
+/* pure counting benchmarks to establish theoretical limits */
+extern const struct bench bench_trig_usermode_count;
+extern const struct bench bench_trig_syscall_count;
+extern const struct bench bench_trig_kernel_count;
+
+/* batched, staying mostly in-kernel benchmarks */
extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_kretprobe;
+extern const struct bench bench_trig_kprobe_multi;
+extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
-extern const struct bench bench_trig_fentry_sleep;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
+extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+
+/* uprobe/uretprobe benchmarks */
+extern const struct bench bench_trig_uprobe_nop;
+extern const struct bench bench_trig_uretprobe_nop;
+extern const struct bench bench_trig_uprobe_push;
+extern const struct bench bench_trig_uretprobe_push;
+extern const struct bench bench_trig_uprobe_ret;
+extern const struct bench bench_trig_uretprobe_ret;
+extern const struct bench bench_trig_uprobe_multi_nop;
+extern const struct bench bench_trig_uretprobe_multi_nop;
+extern const struct bench bench_trig_uprobe_multi_push;
+extern const struct bench bench_trig_uretprobe_multi_push;
+extern const struct bench bench_trig_uprobe_multi_ret;
+extern const struct bench bench_trig_uretprobe_multi_ret;
+#ifdef __x86_64__
+extern const struct bench bench_trig_uprobe_nop5;
+extern const struct bench bench_trig_uretprobe_nop5;
+extern const struct bench bench_trig_uprobe_multi_nop5;
+extern const struct bench bench_trig_uretprobe_multi_nop5;
+#endif
+
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
extern const struct bench bench_pb_custom;
+extern const struct bench bench_bloom_lookup;
+extern const struct bench bench_bloom_update;
+extern const struct bench bench_bloom_false_positive;
+extern const struct bench bench_hashmap_without_bloom;
+extern const struct bench bench_hashmap_with_bloom;
+extern const struct bench bench_bpf_loop;
+extern const struct bench bench_strncmp_no_helper;
+extern const struct bench bench_strncmp_helper;
+extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
+extern const struct bench bench_bpf_hashmap_lookup;
+extern const struct bench bench_local_storage_create;
+extern const struct bench bench_htab_mem;
+extern const struct bench bench_crypto_encrypt;
+extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -332,28 +579,82 @@ static const struct bench *benchs[] = {
&bench_rename_rawtp,
&bench_rename_fentry,
&bench_rename_fexit,
- &bench_trig_base,
- &bench_trig_tp,
- &bench_trig_rawtp,
+ /* pure counting benchmarks for establishing theoretical limits */
+ &bench_trig_usermode_count,
+ &bench_trig_kernel_count,
+ &bench_trig_syscall_count,
+ /* batched, staying mostly in-kernel triggers */
&bench_trig_kprobe,
+ &bench_trig_kretprobe,
+ &bench_trig_kprobe_multi,
+ &bench_trig_kretprobe_multi,
&bench_trig_fentry,
- &bench_trig_fentry_sleep,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
+ &bench_trig_fexit,
&bench_trig_fmodret,
+ &bench_trig_tp,
+ &bench_trig_rawtp,
+ /* uprobes */
+ &bench_trig_uprobe_nop,
+ &bench_trig_uretprobe_nop,
+ &bench_trig_uprobe_push,
+ &bench_trig_uretprobe_push,
+ &bench_trig_uprobe_ret,
+ &bench_trig_uretprobe_ret,
+ &bench_trig_uprobe_multi_nop,
+ &bench_trig_uretprobe_multi_nop,
+ &bench_trig_uprobe_multi_push,
+ &bench_trig_uretprobe_multi_push,
+ &bench_trig_uprobe_multi_ret,
+ &bench_trig_uretprobe_multi_ret,
+#ifdef __x86_64__
+ &bench_trig_uprobe_nop5,
+ &bench_trig_uretprobe_nop5,
+ &bench_trig_uprobe_multi_nop5,
+ &bench_trig_uretprobe_multi_nop5,
+#endif
+ /* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
&bench_pb_custom,
+ &bench_bloom_lookup,
+ &bench_bloom_update,
+ &bench_bloom_false_positive,
+ &bench_hashmap_without_bloom,
+ &bench_hashmap_with_bloom,
+ &bench_bpf_loop,
+ &bench_strncmp_no_helper,
+ &bench_strncmp_helper,
+ &bench_bpf_hashmap_full_update,
+ &bench_local_storage_cache_seq_get,
+ &bench_local_storage_cache_interleaved_get,
+ &bench_local_storage_cache_hashmap_control,
+ &bench_local_storage_tasks_trace,
+ &bench_bpf_hashmap_lookup,
+ &bench_local_storage_create,
+ &bench_htab_mem,
+ &bench_crypto_encrypt,
+ &bench_crypto_decrypt,
+ &bench_sockmap,
+ &bench_lpm_trie_noop,
+ &bench_lpm_trie_baseline,
+ &bench_lpm_trie_lookup,
+ &bench_lpm_trie_insert,
+ &bench_lpm_trie_update,
+ &bench_lpm_trie_delete,
+ &bench_lpm_trie_free,
};
-static void setup_benchmark()
+static void find_benchmark(void)
{
- int i, err;
+ int i;
if (!env.bench_name) {
fprintf(stderr, "benchmark name is not specified\n");
exit(1);
}
-
for (i = 0; i < ARRAY_SIZE(benchs); i++) {
if (strcmp(benchs[i]->name, env.bench_name) == 0) {
bench = benchs[i];
@@ -364,8 +665,14 @@ static void setup_benchmark()
fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
exit(1);
}
+}
+
+static void setup_benchmark(void)
+{
+ int i, err;
- printf("Setting up benchmark '%s'...\n", bench->name);
+ if (!env.quiet)
+ printf("Setting up benchmark '%s'...\n", bench->name);
state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
@@ -380,11 +687,15 @@ static void setup_benchmark()
bench->setup();
for (i = 0; i < env.consumer_cnt; i++) {
+ if (!bench->consumer_thread) {
+ fprintf(stderr, "benchmark doesn't support consumers!\n");
+ exit(1);
+ }
err = pthread_create(&state.consumers[i], NULL,
bench->consumer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create consumer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -399,11 +710,15 @@ static void setup_benchmark()
env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
for (i = 0; i < env.producer_cnt; i++) {
+ if (!bench->producer_thread) {
+ fprintf(stderr, "benchmark doesn't support producers!\n");
+ exit(1);
+ }
err = pthread_create(&state.producers[i], NULL,
bench->producer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create producer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -411,7 +726,8 @@ static void setup_benchmark()
next_cpu(&env.prod_cpus));
}
- printf("Benchmark '%s' started.\n", bench->name);
+ if (!env.quiet)
+ printf("Benchmark '%s' started.\n", bench->name);
}
static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
@@ -435,7 +751,8 @@ static void collect_measurements(long delta_ns) {
int main(int argc, char **argv)
{
- parse_cmdline_args(argc, argv);
+ env.nr_cpus = get_nprocs();
+ parse_cmdline_args_init(argc, argv);
if (env.list) {
int i;
@@ -447,6 +764,9 @@ int main(int argc, char **argv)
return 0;
}
+ find_benchmark();
+ parse_cmdline_args_final(argc, argv);
+
setup_benchmark();
setup_timer();
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index c1f48a473b02..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -10,6 +10,7 @@
#include <math.h>
#include <time.h>
#include <sys/syscall.h>
+#include <limits.h>
struct cpu_set {
bool *cpus;
@@ -24,21 +25,35 @@ struct env {
bool verbose;
bool list;
bool affinity;
+ bool quiet;
int consumer_cnt;
int producer_cnt;
+ int nr_cpus;
struct cpu_set prod_cpus;
struct cpu_set cons_cpus;
};
+struct basic_stats {
+ double mean;
+ double stddev;
+};
+
struct bench_res {
long hits;
long drops;
+ long false_hits;
+ long important_hits;
+ unsigned long gp_ns;
+ unsigned long gp_ct;
+ unsigned int stime;
+ unsigned long duration_ns;
};
struct bench {
const char *name;
- void (*validate)();
- void (*setup)();
+ const struct argp *argp;
+ void (*validate)(void);
+ void (*setup)(void);
void *(*producer_thread)(void *ctx);
void *(*consumer_thread)(void *ctx);
void (*measure)(struct bench_res* res);
@@ -53,17 +68,20 @@ struct counter {
extern struct env env;
extern const struct bench *bench;
-void setup_libbpf();
+void setup_libbpf(void);
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
void hits_drops_report_final(struct bench_res res[], int res_cnt);
-
-static inline __u64 get_time_ns() {
- struct timespec t;
-
- clock_gettime(CLOCK_MONOTONIC, &t);
-
- return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
-}
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
+void false_hits_report_final(struct bench_res res[], int res_cnt);
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
+void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
+void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
static inline void atomic_inc(long *value)
{
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
new file mode 100644
index 000000000000..e289dd1a14ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include <linux/log2.h>
+#include <pthread.h>
+#include "bench.h"
+#include "bloom_filter_bench.skel.h"
+#include "bpf_util.h"
+
+static struct ctx {
+ bool use_array_map;
+ bool use_hashmap;
+ bool hashmap_use_bloom;
+ bool count_false_hits;
+
+ struct bloom_filter_bench *skel;
+
+ int bloom_fd;
+ int hashmap_fd;
+ int array_map_fd;
+
+ pthread_mutex_t map_done_mtx;
+ pthread_cond_t map_done_cv;
+ bool map_done;
+ bool map_prepare_err;
+
+ __u32 next_map_idx;
+} ctx = {
+ .map_done_mtx = PTHREAD_MUTEX_INITIALIZER,
+ .map_done_cv = PTHREAD_COND_INITIALIZER,
+};
+
+struct stat {
+ __u32 stats[3];
+};
+
+static struct {
+ __u32 nr_entries;
+ __u8 nr_hash_funcs;
+ __u8 value_size;
+} args = {
+ .nr_entries = 1000,
+ .nr_hash_funcs = 3,
+ .value_size = 8,
+};
+
+enum {
+ ARG_NR_ENTRIES = 3000,
+ ARG_NR_HASH_FUNCS = 3001,
+ ARG_VALUE_SIZE = 3002,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Set number of expected unique entries in the bloom filter"},
+ { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0,
+ "Set number of hash functions in the bloom filter"},
+ { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+ "Set value size (in bytes) of bloom filter entries"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_NR_HASH_FUNCS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > 15) {
+ fprintf(stderr,
+ "The bloom filter must use 1 to 15 hash functions.");
+ argp_usage(state);
+ }
+ args.nr_hash_funcs = ret;
+ break;
+ case ARG_VALUE_SIZE:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 2 || ret > 256) {
+ fprintf(stderr,
+ "Invalid value size. Must be between 2 and 256 bytes");
+ argp_usage(state);
+ }
+ args.value_size = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bloom_map_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr,
+ "The bloom filter benchmarks do not support consumer\n");
+ exit(1);
+ }
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+static void *map_prepare_thread(void *arg)
+{
+ __u32 val_size, i;
+ void *val = NULL;
+ int err;
+
+ val_size = args.value_size;
+ val = malloc(val_size);
+ if (!val) {
+ ctx.map_prepare_err = true;
+ goto done;
+ }
+
+ while (true) {
+ i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED);
+ if (i > args.nr_entries)
+ break;
+
+again:
+ /* Populate hashmap, bloom filter map, and array map with the same
+ * random values
+ */
+ err = syscall(__NR_getrandom, val, val_size, 0);
+ if (err != val_size) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to get random value: %d\n", -errno);
+ break;
+ }
+
+ if (ctx.use_hashmap) {
+ err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST);
+ if (err) {
+ if (err != -EEXIST) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to add elem to hashmap: %d\n",
+ -errno);
+ break;
+ }
+ goto again;
+ }
+ }
+
+ i--;
+
+ if (ctx.use_array_map) {
+ err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0);
+ if (err) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to add elem to array map: %d\n", -errno);
+ break;
+ }
+ }
+
+ if (ctx.use_hashmap && !ctx.hashmap_use_bloom)
+ continue;
+
+ err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0);
+ if (err) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr,
+ "failed to add elem to bloom filter map: %d\n", -errno);
+ break;
+ }
+ }
+done:
+ pthread_mutex_lock(&ctx.map_done_mtx);
+ ctx.map_done = true;
+ pthread_cond_signal(&ctx.map_done_cv);
+ pthread_mutex_unlock(&ctx.map_done_mtx);
+
+ if (val)
+ free(val);
+
+ return NULL;
+}
+
+static void populate_maps(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ pthread_t map_thread;
+ int i, err, nr_rand_bytes;
+
+ ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map);
+ ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap);
+ ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map);
+
+ for (i = 0; i < nr_cpus; i++) {
+ err = pthread_create(&map_thread, NULL, map_prepare_thread,
+ NULL);
+ if (err) {
+ fprintf(stderr, "failed to create pthread: %d\n", -errno);
+ exit(1);
+ }
+ }
+
+ pthread_mutex_lock(&ctx.map_done_mtx);
+ while (!ctx.map_done)
+ pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx);
+ pthread_mutex_unlock(&ctx.map_done_mtx);
+
+ if (ctx.map_prepare_err)
+ exit(1);
+
+ nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals,
+ ctx.skel->rodata->nr_rand_bytes, 0);
+ if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) {
+ fprintf(stderr, "failed to get random bytes\n");
+ exit(1);
+ }
+}
+
+static void check_args(void)
+{
+ if (args.value_size < 8) {
+ __u64 nr_unique_entries = 1ULL << (args.value_size * 8);
+
+ if (args.nr_entries > nr_unique_entries) {
+ fprintf(stderr,
+ "Not enough unique values for the nr_entries requested\n");
+ exit(1);
+ }
+ }
+}
+
+static struct bloom_filter_bench *setup_skeleton(void)
+{
+ struct bloom_filter_bench *skel;
+
+ check_args();
+
+ setup_libbpf();
+
+ skel = bloom_filter_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom;
+ skel->rodata->count_false_hits = ctx.count_false_hits;
+
+ /* Resize number of entries */
+ bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries);
+
+ bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries);
+
+ bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries);
+
+ /* Set value size */
+ bpf_map__set_value_size(skel->maps.array_map, args.value_size);
+
+ bpf_map__set_value_size(skel->maps.bloom_map, args.value_size);
+
+ bpf_map__set_value_size(skel->maps.hashmap, args.value_size);
+
+ /* For the hashmap, we use the value as the key as well */
+ bpf_map__set_key_size(skel->maps.hashmap, args.value_size);
+
+ skel->bss->value_size = args.value_size;
+
+ /* Set number of hash functions */
+ bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs);
+
+ if (bloom_filter_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static void bloom_lookup_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_array_map = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void bloom_update_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_array_map = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_update);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void false_positive_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+ ctx.hashmap_use_bloom = true;
+ ctx.count_false_hits = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void hashmap_with_bloom_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+ ctx.hashmap_use_bloom = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void hashmap_no_bloom_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0;
+ static unsigned long last_hits, last_drops, last_false_hits;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int hit_key, drop_key, false_hit_key;
+ int i;
+
+ hit_key = ctx.skel->rodata->hit_key;
+ drop_key = ctx.skel->rodata->drop_key;
+ false_hit_key = ctx.skel->rodata->false_hit_key;
+
+ if (ctx.skel->bss->error != 0) {
+ fprintf(stderr, "error (%d) when searching the bloom filter\n",
+ ctx.skel->bss->error);
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i];
+
+ total_hits += s->stats[hit_key];
+ total_drops += s->stats[drop_key];
+ total_false_hits += s->stats[false_hit_key];
+ }
+
+ res->hits = total_hits - last_hits;
+ res->drops = total_drops - last_drops;
+ res->false_hits = total_false_hits - last_false_hits;
+
+ last_hits = total_hits;
+ last_drops = total_drops;
+ last_false_hits = total_false_hits;
+}
+
+const struct bench bench_bloom_lookup = {
+ .name = "bloom-lookup",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = bloom_lookup_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_update = {
+ .name = "bloom-update",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = bloom_update_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_false_positive = {
+ .name = "bloom-false-positive",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = false_positive_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = false_hits_report_progress,
+ .report_final = false_hits_report_final,
+};
+
+const struct bench bench_hashmap_without_bloom = {
+ .name = "hashmap-without-bloom",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = hashmap_no_bloom_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_hashmap_with_bloom = {
+ .name = "hashmap-with-bloom",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = hashmap_with_bloom_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
new file mode 100644
index 000000000000..2845edaba8db
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include "bench.h"
+#include "crypto_bench.skel.h"
+
+#define MAX_CIPHER_LEN 32
+static char *input;
+static struct crypto_ctx {
+ struct crypto_bench *skel;
+ int pfd;
+} ctx;
+
+static struct crypto_args {
+ u32 crypto_len;
+ char *crypto_cipher;
+} args = {
+ .crypto_len = 16,
+ .crypto_cipher = "ecb(aes)",
+};
+
+enum {
+ ARG_CRYPTO_LEN = 5000,
+ ARG_CRYPTO_CIPHER = 5001,
+};
+
+static const struct argp_option opts[] = {
+ { "crypto-len", ARG_CRYPTO_LEN, "CRYPTO_LEN", 0,
+ "Set the length of crypto buffer" },
+ { "crypto-cipher", ARG_CRYPTO_CIPHER, "CRYPTO_CIPHER", 0,
+ "Set the cipher to use (default:ecb(aes))" },
+ {},
+};
+
+static error_t crypto_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CRYPTO_LEN:
+ args.crypto_len = strtoul(arg, NULL, 10);
+ if (!args.crypto_len ||
+ args.crypto_len > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "Invalid crypto buffer len (limit %zu)\n",
+ sizeof(ctx.skel->bss->dst));
+ argp_usage(state);
+ }
+ break;
+ case ARG_CRYPTO_CIPHER:
+ args.crypto_cipher = strdup(arg);
+ if (!strlen(args.crypto_cipher) ||
+ strlen(args.crypto_cipher) > MAX_CIPHER_LEN) {
+ fprintf(stderr, "Invalid crypto cipher len (limit %d)\n",
+ MAX_CIPHER_LEN);
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_crypto_argp = {
+ .options = opts,
+ .parser = crypto_parse_arg,
+};
+
+static void crypto_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "bpf crypto benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void crypto_setup(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ int err, pfd;
+ size_t i, sz;
+
+ sz = args.crypto_len;
+ if (!sz || sz > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "invalid encrypt buffer size (source %zu, target %zu)\n",
+ sz, sizeof(ctx.skel->bss->dst));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = crypto_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ snprintf(ctx.skel->bss->cipher, 128, "%s", args.crypto_cipher);
+ memcpy(ctx.skel->bss->key, "12345678testtest", 16);
+ ctx.skel->bss->key_len = 16;
+ ctx.skel->bss->authsize = 0;
+
+ srandom(time(NULL));
+ input = malloc(sz);
+ for (i = 0; i < sz - 1; i++)
+ input[i] = '1' + random() % 9;
+ input[sz - 1] = '\0';
+
+ ctx.skel->rodata->len = args.crypto_len;
+
+ err = crypto_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ pfd = bpf_program__fd(ctx.skel->progs.crypto_setup);
+ if (pfd < 0) {
+ fprintf(stderr, "failed to get fd for setup prog\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (err || ctx.skel->bss->status) {
+ fprintf(stderr, "failed to run setup prog: err %d, status %d\n",
+ err, ctx.skel->bss->status);
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void crypto_encrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_encrypt);
+}
+
+static void crypto_decrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_decrypt);
+}
+
+static void crypto_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void *crypto_producer(void *unused)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .repeat = 64,
+ .data_in = input,
+ .data_size_in = args.crypto_len,
+ );
+
+ while (true)
+ (void)bpf_prog_test_run_opts(ctx.pfd, &opts);
+ return NULL;
+}
+
+const struct bench bench_crypto_encrypt = {
+ .name = "crypto-encrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_encrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_crypto_decrypt = {
+ .name = "crypto-decrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_decrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
new file mode 100644
index 000000000000..ee1dc12c5e5e
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "bench.h"
+#include "bpf_hashmap_full_update_bench.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_full_update_bench *skel;
+} ctx;
+
+#define MAX_LOOP_NUM 10000
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd, i, max_entries;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = MAX_LOOP_NUM;
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ /* fill hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench);
+ for (i = 0; i < max_entries; i++)
+ bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int i;
+
+ for (i = 0; i < nr_cpus; i++) {
+ u64 time = ctx.skel->bss->percpu_time[i];
+
+ if (!time)
+ continue;
+
+ printf("%d:hash_map_full_perf %lld events per sec\n",
+ i, ctx.skel->bss->nr_loops * 1000000000ll / time);
+ }
+}
+
+const struct bench bench_bpf_hashmap_full_update = {
+ .name = "bpf-hashmap-full-update",
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..279ff1b8b5b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <sys/random.h>
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_lookup.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_lookup *skel;
+} ctx;
+
+/* only available to kernel, so define it here */
+#define BPF_MAX_LOOPS (1<<23)
+
+#define MAX_KEY_SIZE 1024 /* the size of the key map */
+
+static struct {
+ __u32 key_size;
+ __u32 map_flags;
+ __u32 max_entries;
+ __u32 nr_entries;
+ __u32 nr_loops;
+} args = {
+ .key_size = 4,
+ .map_flags = 0,
+ .max_entries = 1000,
+ .nr_entries = 500,
+ .nr_loops = 1000000,
+};
+
+enum {
+ ARG_KEY_SIZE = 8001,
+ ARG_MAP_FLAGS,
+ ARG_MAX_ENTRIES,
+ ARG_NR_ENTRIES,
+ ARG_NR_LOOPS,
+};
+
+static const struct argp_option opts[] = {
+ { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0,
+ "The hashmap key size (max 1024)"},
+ { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0,
+ "The hashmap flags passed to BPF_MAP_CREATE"},
+ { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0,
+ "The hashmap max entries"},
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "The number of entries to insert/lookup"},
+ { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0,
+ "The number of loops for the benchmark"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_KEY_SIZE:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > MAX_KEY_SIZE) {
+ fprintf(stderr, "invalid key_size");
+ argp_usage(state);
+ }
+ args.key_size = ret;
+ break;
+ case ARG_MAP_FLAGS:
+ ret = strtol(arg, NULL, 0);
+ if (ret < 0 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid map_flags");
+ argp_usage(state);
+ }
+ args.map_flags = ret;
+ break;
+ case ARG_MAX_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid max_entries");
+ argp_usage(state);
+ }
+ args.max_entries = ret;
+ break;
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_entries");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_NR_LOOPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > BPF_MAX_LOOPS) {
+ fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n",
+ ret, BPF_MAX_LOOPS);
+ argp_usage(state);
+ }
+ args.nr_loops = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_hashmap_lookup_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_entries > args.max_entries) {
+ fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n",
+ args.max_entries, args.nr_entries);
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static inline void patch_key(u32 i, u32 *key)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ *key = i + 1;
+#else
+ *key = __builtin_bswap32(i + 1);
+#endif
+ /* the rest of key is random */
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd;
+ int ret;
+ int i;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_lookup__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries);
+ bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size);
+ bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8);
+ bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags);
+
+ ctx.skel->bss->nr_entries = args.nr_entries;
+ ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries;
+
+ if (args.key_size > 4) {
+ for (i = 1; i < args.key_size/4; i++)
+ ctx.skel->bss->key[i] = 2654435761 * i;
+ }
+
+ ret = bpf_hashmap_lookup__load(ctx.skel);
+ if (ret) {
+ bpf_hashmap_lookup__destroy(ctx.skel);
+ fprintf(stderr, "failed to load map: %s", strerror(-ret));
+ exit(1);
+ }
+
+ /* fill in the hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ for (u64 i = 0; i < args.nr_entries; i++) {
+ patch_key(i, ctx.skel->bss->key);
+ bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY);
+ }
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static inline double events_from_time(u64 time)
+{
+ if (time)
+ return args.nr_loops * 1000000000llu / time / 1000000.0L;
+
+ return 0;
+}
+
+static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time)
+{
+ int i, n = 0;
+
+ *events_mean = 0;
+ *events_stddev = 0;
+ *mean_time = 0;
+
+ for (i = 0; i < 32; i++) {
+ if (!times[i])
+ break;
+ *mean_time += times[i];
+ *events_mean += events_from_time(times[i]);
+ n += 1;
+ }
+ if (!n)
+ return 0;
+
+ *mean_time /= n;
+ *events_mean /= n;
+
+ if (n > 1) {
+ for (i = 0; i < n; i++) {
+ double events_i = *events_mean - events_from_time(times[i]);
+ *events_stddev += events_i * events_i / (n - 1);
+ }
+ *events_stddev = sqrt(*events_stddev);
+ }
+
+ return n;
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ double events_mean, events_stddev;
+ u64 mean_time;
+ int i, n;
+
+ for (i = 0; i < nr_cpus; i++) {
+ n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean,
+ &events_stddev, &mean_time);
+ if (n == 0)
+ continue;
+
+ if (env.quiet) {
+ /* we expect only one cpu to be present */
+ if (env.affinity)
+ printf("%.3lf\n", events_mean);
+ else
+ printf("cpu%02d %.3lf\n", i, events_mean);
+ } else {
+ printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec"
+ " (approximated from %d samples of ~%lums)\n",
+ i, events_mean, 2*events_stddev,
+ n, mean_time / 1000000);
+ }
+ }
+}
+
+const struct bench bench_bpf_hashmap_lookup = {
+ .name = "bpf-hashmap-lookup",
+ .argp = &bench_hashmap_lookup_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
new file mode 100644
index 000000000000..a705cfb2bccc
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_loop_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_loop_bench *skel;
+} ctx;
+
+static struct {
+ __u32 nr_loops;
+} args = {
+ .nr_loops = 10,
+};
+
+enum {
+ ARG_NR_LOOPS = 4000,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
+ "Set number of loops for the bpf_loop helper"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_NR_LOOPS:
+ args.nr_loops = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bpf_loop_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_loop_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = args.nr_loops;
+}
+
+const struct bench bench_bpf_loop = {
+ .name = "bpf-loop",
+ .argp = &bench_bpf_loop_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
index befba7a82643..ba89ed3936b7 100644
--- a/tools/testing/selftests/bpf/benchs/bench_count.c
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -18,11 +18,6 @@ static void *count_global_producer(void *input)
return NULL;
}
-static void *count_global_consumer(void *input)
-{
- return NULL;
-}
-
static void count_global_measure(struct bench_res *res)
{
struct count_global_ctx *ctx = &count_global_ctx;
@@ -36,11 +31,11 @@ static struct count_local_ctx {
struct counter *hits;
} count_local_ctx;
-static void count_local_setup()
+static void count_local_setup(void)
{
struct count_local_ctx *ctx = &count_local_ctx;
- ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+ ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits));
if (!ctx->hits)
exit(1);
}
@@ -56,11 +51,6 @@ static void *count_local_producer(void *input)
return NULL;
}
-static void *count_local_consumer(void *input)
-{
- return NULL;
-}
-
static void count_local_measure(struct bench_res *res)
{
struct count_local_ctx *ctx = &count_local_ctx;
@@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res)
const struct bench bench_count_global = {
.name = "count-global",
.producer_thread = count_global_producer,
- .consumer_thread = count_global_consumer,
.measure = count_global_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -84,7 +73,6 @@ const struct bench bench_count_local = {
.name = "count-local",
.setup = count_local_setup,
.producer_thread = count_local_producer,
- .consumer_thread = count_local_consumer,
.measure = count_local_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
new file mode 100644
index 000000000000..297e32390cd1
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+
+#include "bench.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "htab_mem_bench.skel.h"
+
+struct htab_mem_use_case {
+ const char *name;
+ const char **progs;
+ /* Do synchronization between addition thread and deletion thread */
+ bool need_sync;
+};
+
+static struct htab_mem_ctx {
+ const struct htab_mem_use_case *uc;
+ struct htab_mem_bench *skel;
+ pthread_barrier_t *notify;
+ int fd;
+} ctx;
+
+const char *ow_progs[] = {"overwrite", NULL};
+const char *batch_progs[] = {"batch_add_batch_del", NULL};
+const char *add_del_progs[] = {"add_only", "del_only", NULL};
+const static struct htab_mem_use_case use_cases[] = {
+ { .name = "overwrite", .progs = ow_progs },
+ { .name = "batch_add_batch_del", .progs = batch_progs },
+ { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true },
+};
+
+static struct htab_mem_args {
+ u32 value_size;
+ const char *use_case;
+ bool preallocated;
+} args = {
+ .value_size = 8,
+ .use_case = "overwrite",
+ .preallocated = false,
+};
+
+enum {
+ ARG_VALUE_SIZE = 10000,
+ ARG_USE_CASE = 10001,
+ ARG_PREALLOCATED = 10002,
+};
+
+static const struct argp_option opts[] = {
+ { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+ "Set the value size of hash map (default 8)" },
+ { "use-case", ARG_USE_CASE, "USE_CASE", 0,
+ "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" },
+ { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" },
+ {},
+};
+
+static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_VALUE_SIZE:
+ args.value_size = strtoul(arg, NULL, 10);
+ if (args.value_size > 4096) {
+ fprintf(stderr, "too big value size %u\n", args.value_size);
+ argp_usage(state);
+ }
+ break;
+ case ARG_USE_CASE:
+ args.use_case = strdup(arg);
+ if (!args.use_case) {
+ fprintf(stderr, "no mem for use-case\n");
+ argp_usage(state);
+ }
+ break;
+ case ARG_PREALLOCATED:
+ args.preallocated = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_htab_mem_argp = {
+ .options = opts,
+ .parser = htab_mem_parse_arg,
+};
+
+static void htab_mem_validate(void)
+{
+ if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) {
+ fprintf(stderr, "%s needs an even number of producers\n", args.use_case);
+ exit(1);
+ }
+}
+
+static int htab_mem_bench_init_barriers(void)
+{
+ pthread_barrier_t *barriers;
+ unsigned int i, nr;
+
+ if (!ctx.uc->need_sync)
+ return 0;
+
+ nr = (env.producer_cnt + 1) / 2;
+ barriers = calloc(nr, sizeof(*barriers));
+ if (!barriers)
+ return -1;
+
+ /* Used for synchronization between two threads */
+ for (i = 0; i < nr; i++)
+ pthread_barrier_init(&barriers[i], NULL, 2);
+
+ ctx.notify = barriers;
+ return 0;
+}
+
+static void htab_mem_bench_exit_barriers(void)
+{
+ unsigned int i, nr;
+
+ if (!ctx.notify)
+ return;
+
+ nr = (env.producer_cnt + 1) / 2;
+ for (i = 0; i < nr; i++)
+ pthread_barrier_destroy(&ctx.notify[i]);
+ free(ctx.notify);
+}
+
+static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++) {
+ if (!strcmp(name, use_cases[i].name))
+ return &use_cases[i];
+ }
+
+ fprintf(stderr, "no such use-case: %s\n", name);
+ fprintf(stderr, "available use case:");
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++)
+ fprintf(stderr, " %s", use_cases[i].name);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+static void htab_mem_setup(void)
+{
+ struct bpf_map *map;
+ const char **names;
+ int err;
+
+ setup_libbpf();
+
+ ctx.uc = htab_mem_find_use_case_or_exit(args.use_case);
+ err = htab_mem_bench_init_barriers();
+ if (err) {
+ fprintf(stderr, "failed to init barrier\n");
+ exit(1);
+ }
+
+ ctx.fd = cgroup_setup_and_join("/htab_mem");
+ if (ctx.fd < 0)
+ goto cleanup;
+
+ ctx.skel = htab_mem_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ goto cleanup;
+ }
+
+ map = ctx.skel->maps.htab;
+ bpf_map__set_value_size(map, args.value_size);
+ /* Ensure that different CPUs can operate on different subset */
+ bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus));
+ if (args.preallocated)
+ bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC);
+
+ names = ctx.uc->progs;
+ while (*names) {
+ struct bpf_program *prog;
+
+ prog = bpf_object__find_program_by_name(ctx.skel->obj, *names);
+ if (!prog) {
+ fprintf(stderr, "no such program %s\n", *names);
+ goto cleanup;
+ }
+ bpf_program__set_autoload(prog, true);
+ names++;
+ }
+ ctx.skel->bss->nr_thread = env.producer_cnt;
+
+ err = htab_mem_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ goto cleanup;
+ }
+ err = htab_mem_bench__attach(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ goto cleanup;
+ }
+ return;
+
+cleanup:
+ htab_mem_bench__destroy(ctx.skel);
+ htab_mem_bench_exit_barriers();
+ if (ctx.fd >= 0) {
+ close(ctx.fd);
+ cleanup_cgroup_environment();
+ }
+ exit(1);
+}
+
+static void htab_mem_add_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Do addition */
+ (void)syscall(__NR_getpgid, 0);
+ /* Notify deletion thread to do deletion */
+ pthread_barrier_wait(notify);
+ /* Wait for deletion to complete */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void htab_mem_delete_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Wait for addition to complete */
+ pthread_barrier_wait(notify);
+ /* Do deletion */
+ (void)syscall(__NR_getppid);
+ /* Notify addition thread to do addition */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void *htab_mem_producer(void *arg)
+{
+ pthread_barrier_t *notify;
+ int seq;
+
+ if (!ctx.uc->need_sync) {
+ while (true)
+ (void)syscall(__NR_getpgid, 0);
+ return NULL;
+ }
+
+ seq = (long)arg;
+ notify = &ctx.notify[seq / 2];
+ if (seq & 1)
+ htab_mem_delete_fn(notify);
+ else
+ htab_mem_add_fn(notify);
+ return NULL;
+}
+
+static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
+{
+ char buf[32];
+ ssize_t got;
+ int fd;
+
+ fd = openat(ctx.fd, name, O_RDONLY);
+ if (fd < 0) {
+ /* cgroup v1 ? */
+ fprintf(stderr, "no %s\n", name);
+ *value = 0;
+ return;
+ }
+
+ got = read(fd, buf, sizeof(buf) - 1);
+ close(fd);
+ if (got <= 0) {
+ *value = 0;
+ return;
+ }
+ buf[got] = 0;
+
+ *value = strtoull(buf, NULL, 0);
+}
+
+static void htab_mem_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt;
+ htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct);
+}
+
+static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double loop, mem;
+
+ loop = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ mem = res->gp_ct / 1048576.0;
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+ printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem);
+}
+
+static void htab_mem_report_final(struct bench_res res[], int res_cnt)
+{
+ double mem_mean = 0.0, mem_stddev = 0.0;
+ double loop_mean = 0.0, loop_stddev = 0.0;
+ unsigned long peak_mem;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt);
+ }
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ loop_stddev += (loop_mean - res[i].hits / 1000.0) *
+ (loop_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) *
+ (mem_mean - res[i].gp_ct / 1048576.0) /
+ (res_cnt - 1.0);
+ }
+ loop_stddev = sqrt(loop_stddev);
+ mem_stddev = sqrt(mem_stddev);
+ }
+
+ htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem);
+ printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB,"
+ " peak memory usage %7.2lfMiB\n",
+ loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0);
+
+ close(ctx.fd);
+ cleanup_cgroup_environment();
+}
+
+const struct bench bench_htab_mem = {
+ .name = "htab-mem",
+ .argp = &bench_htab_mem_argp,
+ .validate = htab_mem_validate,
+ .setup = htab_mem_setup,
+ .producer_thread = htab_mem_producer,
+ .measure = htab_mem_measure,
+ .report_progress = htab_mem_report_progress,
+ .report_final = htab_mem_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644
index 000000000000..452499428ceb
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+ __u32 nr_maps;
+ __u32 hashmap_nr_keys_used;
+} args = {
+ .nr_maps = 1000,
+ .hashmap_nr_keys_used = 1000,
+};
+
+enum {
+ ARG_NR_MAPS = 6000,
+ ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+ "Set number of local_storage maps"},
+ { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+ 0, "When doing hashmap test, set number of hashmap keys test uses"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_MAPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_maps");
+ argp_usage(state);
+ }
+ args.nr_maps = ret;
+ break;
+ case ARG_HASHMAP_NR_KEYS_USED:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid hashmap_nr_keys_used");
+ argp_usage(state);
+ }
+ args.hashmap_nr_keys_used = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_maps > MAX_NR_MAPS) {
+ fprintf(stderr, "nr_maps must be <= 1000\n");
+ exit(1);
+ }
+
+ if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+ fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+ exit(1);
+ }
+}
+
+static struct {
+ struct local_storage_bench *skel;
+ void *bpf_obj;
+ struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+ int i, key, val;
+
+ /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+ * populate the hashmap for a similar comparison
+ */
+ for (i = 0; i < HASHMAP_SZ; i++) {
+ key = val = i;
+ if (bpf_map_update_elem(fd, &key, &val, 0)) {
+ fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+ exit(1);
+ }
+ }
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+ struct bpf_map *inner_map;
+ int i, fd, mim_fd, err;
+
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+ if (!hashmap)
+ create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+ ctx.skel->rodata->num_maps = args.nr_maps;
+ ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+ inner_map = bpf_map__inner_map(ctx.array_of_maps);
+ create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+ create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+ err = local_storage_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "Error loading skeleton\n");
+ goto err_out;
+ }
+
+ create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+ mim_fd = bpf_map__fd(ctx.array_of_maps);
+ if (mim_fd < 0) {
+ fprintf(stderr, "Error getting map_in_map fd\n");
+ goto err_out;
+ }
+
+ for (i = 0; i < args.nr_maps; i++) {
+ if (hashmap)
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), HASHMAP_SZ, &create_opts);
+ else
+ fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+ sizeof(int), 0, &create_opts);
+ if (fd < 0) {
+ fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+ goto err_out;
+ }
+
+ if (hashmap)
+ prepopulate_hashmap(fd);
+
+ err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+ if (err) {
+ fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+ goto err_out;
+ }
+ }
+
+ if (!bpf_program__attach(prog)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void hashmap_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_hash_maps;
+ skel->rodata->use_hashmap = 1;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 1;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ * is to mimic environment where many progs are accessing their local_storage
+ * maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+ .name = "local-storage-cache-seq-get",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = local_storage_cache_get_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+ .name = "local-storage-cache-int-get",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = local_storage_cache_get_interleaved_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+ .name = "local-storage-cache-hashmap-control",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = hashmap_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
new file mode 100644
index 000000000000..e2ff8ea1cb79
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+#include <argp.h>
+
+#include "bench.h"
+#include "bench_local_storage_create.skel.h"
+
+struct thread {
+ int *fds;
+ pthread_t *pthds;
+ int *pthd_results;
+};
+
+static struct bench_local_storage_create *skel;
+static struct thread *threads;
+static long create_owner_errs;
+static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
+static int batch_sz = 32;
+
+enum {
+ ARG_BATCH_SZ = 9000,
+ ARG_STORAGE_TYPE = 9001,
+};
+
+static const struct argp_option opts[] = {
+ { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
+ "The number of storage creations in each batch" },
+ { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
+ "The type of local storage to test (socket or task)" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ int ret;
+
+ switch (key) {
+ case ARG_BATCH_SZ:
+ ret = atoi(arg);
+ if (ret < 1) {
+ fprintf(stderr, "invalid batch-size\n");
+ argp_usage(state);
+ }
+ batch_sz = ret;
+ break;
+ case ARG_STORAGE_TYPE:
+ if (!strcmp(arg, "task")) {
+ storage_type = BPF_MAP_TYPE_TASK_STORAGE;
+ } else if (!strcmp(arg, "socket")) {
+ storage_type = BPF_MAP_TYPE_SK_STORAGE;
+ } else {
+ fprintf(stderr, "invalid storage-type (socket or task)\n");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_create_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr,
+ "local-storage-create benchmark does not need consumer\n");
+ exit(1);
+ }
+}
+
+static void setup(void)
+{
+ int i;
+
+ skel = bench_local_storage_create__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ skel->bss->bench_pid = getpid();
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ if (!bpf_program__attach(skel->progs.socket_post_create)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ } else {
+ if (!bpf_program__attach(skel->progs.sched_process_fork)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ }
+
+ if (!bpf_program__attach(skel->progs.kmalloc)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+
+ threads = calloc(env.producer_cnt, sizeof(*threads));
+
+ if (!threads) {
+ fprintf(stderr, "cannot alloc thread_res\n");
+ exit(1);
+ }
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ struct thread *t = &threads[i];
+
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ t->fds = malloc(batch_sz * sizeof(*t->fds));
+ if (!t->fds) {
+ fprintf(stderr, "cannot alloc t->fds\n");
+ exit(1);
+ }
+ } else {
+ t->pthds = malloc(batch_sz * sizeof(*t->pthds));
+ if (!t->pthds) {
+ fprintf(stderr, "cannot alloc t->pthds\n");
+ exit(1);
+ }
+ t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
+ if (!t->pthd_results) {
+ fprintf(stderr, "cannot alloc t->pthd_results\n");
+ exit(1);
+ }
+ }
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&skel->bss->create_cnts, 0);
+ res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0);
+}
+
+static void *sk_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ int *fds = t->fds;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (fds[i] == -1)
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (fds[i] != -1)
+ close(fds[i]);
+ }
+ }
+
+ return NULL;
+}
+
+static void *thread_func(void *arg)
+{
+ return NULL;
+}
+
+static void *task_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ pthread_t *pthds = t->pthds;
+ int *pthd_results = t->pthd_results;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
+ if (pthd_results[i])
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (!pthd_results[i])
+ pthread_join(pthds[i], NULL);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
+ return sk_producer(input);
+ else
+ return task_producer(input);
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double creates_per_sec, kmallocs_per_create;
+
+ creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ kmallocs_per_create = (double)res->drops / res->hits;
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("creates %8.3lfk/s (%7.3lfk/prod), ",
+ creates_per_sec, creates_per_sec / env.producer_cnt);
+ printf("%3.2lf kmallocs/create\n", kmallocs_per_create);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double creates_mean = 0.0, creates_stddev = 0.0;
+ long total_creates = 0, total_kmallocs = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ total_creates += res[i].hits;
+ total_kmallocs += res[i].drops;
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ creates_stddev += (creates_mean - res[i].hits / 1000.0) *
+ (creates_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ creates_stddev = sqrt(creates_stddev);
+ }
+ printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
+ creates_mean, creates_stddev, creates_mean / env.producer_cnt);
+ printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
+ if (create_owner_errs || skel->bss->create_errs)
+ printf("%s() errors %ld create_errs %ld\n",
+ storage_type == BPF_MAP_TYPE_SK_STORAGE ?
+ "socket" : "pthread_create",
+ create_owner_errs,
+ skel->bss->create_errs);
+}
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_local_storage_create = {
+ .name = "local-storage-create",
+ .argp = &bench_local_storage_create_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644
index 000000000000..edf0b00418c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+ __u32 nr_procs;
+ __u32 kthread_pid;
+} args = {
+ .nr_procs = 1000,
+ .kthread_pid = 0,
+};
+
+enum {
+ ARG_NR_PROCS = 7000,
+ ARG_KTHREAD_PID = 7001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+ "Set number of user processes to spin up"},
+ { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+ "Pid of rcu_tasks_trace kthread for ticks tracking"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_PROCS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_procs\n");
+ argp_usage(state);
+ }
+ args.nr_procs = ret;
+ break;
+ case ARG_KTHREAD_PID:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1) {
+ fprintf(stderr, "invalid kthread_pid\n");
+ argp_usage(state);
+ }
+ args.kthread_pid = ret;
+ break;
+break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_procs > MAX_SLEEP_PROCS) {
+ fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+ MAX_SLEEP_PROCS);
+ exit(1);
+ }
+}
+
+static long kthread_pid_ticks(void)
+{
+ char procfs_path[100];
+ long stime;
+ FILE *f;
+
+ if (!args.kthread_pid)
+ return -1;
+
+ sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+ f = fopen(procfs_path, "r");
+ if (!f) {
+ fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+ goto err_out;
+ }
+ if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+ fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+ goto err_out;
+ }
+ fclose(f);
+ return stime;
+
+err_out:
+ if (f)
+ fclose(f);
+ exit(1);
+ return 0;
+}
+
+static struct {
+ struct local_storage_rcu_tasks_trace_bench *skel;
+ long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+ while (true) {
+ sleep(rand() % 4);
+ syscall(__NR_getpgid);
+ }
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+ int i, err, forkret, runner_pid;
+
+ runner_pid = getpid();
+
+ for (i = 0; i < args.nr_procs; i++) {
+ forkret = fork();
+ if (forkret < 0) {
+ fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+ args.nr_procs);
+ goto err_out;
+ }
+
+ if (!forkret) {
+ err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (err < 0) {
+ fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+ goto err_out;
+ }
+
+ if (getppid() != runner_pid) {
+ fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+ goto err_out;
+ }
+ sleep_and_loop();
+ }
+ }
+ printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+ setup_libbpf();
+
+ ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "Error doing open_and_load, exiting\n");
+ goto err_out;
+ }
+
+ ctx.prev_kthread_stime = kthread_pid_ticks();
+
+ if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ long ticks;
+
+ res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+ res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+ ticks = kthread_pid_ticks();
+ res->stime = ticks - ctx.prev_kthread_stime;
+ ctx.prev_kthread_stime = ticks;
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ if (ctx.skel->bss->unexpected) {
+ fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+ fprintf(stderr, "Data can't be trusted, exiting\n");
+ exit(1);
+ }
+
+ if (env.quiet)
+ return;
+
+ printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+ iter, res->gp_ns / (double)res->gp_ct);
+ printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+ iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ struct basic_stats gp_stat;
+
+ grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY tasks_trace grace period latency");
+ printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+ grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY ticks per tasks_trace grace period");
+ printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+ .name = "local-storage-tasks-trace",
+ .argp = &bench_local_storage_rcu_tasks_trace_argp,
+ .validate = validate,
+ .setup = local_storage_tasks_trace_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+ struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+ __u32 nr_entries;
+ __u32 prefixlen;
+ bool random;
+} args = {
+ .nr_entries = 0,
+ .prefixlen = 32,
+ .random = false,
+};
+
+enum {
+ ARG_NR_ENTRIES = 9000,
+ ARG_PREFIX_LEN,
+ ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Number of unique entries in the LPM trie" },
+ { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+ "Number of prefix bits to use in the LPM trie" },
+ { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+ {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_PREFIX_LEN:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid prefix_len value.");
+ argp_usage(state);
+ }
+ args.prefixlen = ret;
+ break;
+ case ARG_RANDOM:
+ args.random = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+ .options = opts,
+ .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer\n");
+ exit(1);
+ }
+
+ if (args.nr_entries == 0) {
+ fprintf(stderr, "Missing --nr_entries parameter\n");
+ exit(1);
+ }
+
+ if ((1UL << args.prefixlen) < args.nr_entries) {
+ fprintf(stderr, "prefix_len value too small for nr_entries\n");
+ exit(1);
+ }
+}
+
+static void lpm_insert_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-insert does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_delete_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-delete does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_free_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-free requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-free does not support --random\n");
+ exit(1);
+ }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch update keys to map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void empty_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch delete keys for map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void attach_prog(void)
+{
+ int i;
+
+ ctx.bench = lpm_trie_bench__open_and_load();
+ if (!ctx.bench) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.bench->bss->nr_entries = args.nr_entries;
+ ctx.bench->bss->prefixlen = args.prefixlen;
+ ctx.bench->bss->random = args.random;
+
+ if (lpm_trie_bench__attach(ctx.bench)) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ exit(1);
+ }
+
+ keys = calloc(args.nr_entries, sizeof(*keys));
+ vals = calloc(args.nr_entries, sizeof(*vals));
+
+ for (i = 0; i < args.nr_entries; i++) {
+ struct trie_key *k = &keys[i];
+ __u32 *v = &vals[i];
+
+ k->prefixlen = args.prefixlen;
+ k->data = i;
+ *v = 1;
+ }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+ int fd;
+
+ attach_prog();
+
+ fd = bpf_map__fd(ctx.bench->maps.trie_map);
+ fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+ res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+ int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+ switch (ctx.bench->bss->op) {
+ case LPM_OP_INSERT:
+ /* trie_map needs to be emptied */
+ empty_map(fd);
+ break;
+ case LPM_OP_DELETE:
+ /* trie_map needs to be refilled */
+ fill_map(fd);
+ break;
+ default:
+ fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+ ctx.bench->bss->op);
+ exit(1);
+ }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+ int err;
+ char in[ETH_HLEN]; /* unused */
+
+ LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+ .data_size_in = sizeof(in), .repeat = 1, );
+
+ while (true) {
+ int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err) {
+ fprintf(stderr, "failed to run BPF prog: %d\n", err);
+ exit(1);
+ }
+
+ /* Check for kernel error code */
+ if ((int)opts.retval < 0) {
+ fprintf(stderr, "BPF prog returned error: %d\n",
+ opts.retval);
+ exit(1);
+ }
+
+ switch (opts.retval) {
+ case LPM_BENCH_SUCCESS:
+ break;
+ case LPM_BENCH_REINIT_MAP:
+ bench_reinit_map();
+ break;
+ default:
+ fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+ opts.retval, ctx.bench->bss->op);
+ exit(1);
+ }
+ }
+
+ return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+ while (true) {
+ struct lpm_trie_map *skel;
+
+ skel = lpm_trie_map__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ fill_map(bpf_map__fd(skel->maps.trie_free_map));
+ lpm_trie_map__destroy(skel);
+ }
+
+ return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+ long delta_ns, double rate_divisor,
+ char rate)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / rate_divisor /
+ (res->duration_ns / (double)NSEC_PER_SEC);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter,
+ (delta_ns - NSEC_PER_SEC) / 1000.0);
+ printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+ hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+ double lat_divisor, double rate_divisor,
+ char rate, const char *unit)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ double latency = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ double val = res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_mean += val / (0.0 + res_cnt);
+ latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ double val =
+ res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_stddev += (hits_mean - val) * (hits_mean - val) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+ hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+ rate);
+ printf("latency %8.3lf %s/op\n",
+ latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000.0;
+ char rate = 'K';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1000000.0;
+ double rate_divisor = 1000.0;
+ const char *unit = "ms";
+ char rate = 'K';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+ .name = "lpm-trie-noop",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_noop_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+ .name = "lpm-trie-baseline",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_baseline_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+ .name = "lpm-trie-lookup",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_lookup_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+ .name = "lpm-trie-insert",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_insert_validate,
+ .setup = lpm_insert_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = insert_ops_report_progress,
+ .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+ .name = "lpm-trie-update",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_update_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+ .name = "lpm-trie-delete",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_delete_validate,
+ .setup = lpm_delete_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = delete_ops_report_progress,
+ .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+ .name = "lpm-trie-free",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_free_validate,
+ .setup = lpm_free_setup,
+ .producer_thread = lpm_free_producer,
+ .measure = lpm_measure,
+ .report_progress = free_ops_report_progress,
+ .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index a967674098ad..bf66893c7a33 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -11,14 +11,14 @@ static struct ctx {
int fd;
} ctx;
-static void validate()
+static void validate(void)
{
if (env.producer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
exit(1);
}
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -43,7 +43,7 @@ static void measure(struct bench_res *res)
res->hits = atomic_swap(&ctx.hits.value, 0);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
@@ -65,58 +65,52 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
-static void setup_base()
+static void setup_base(void)
{
setup_ctx();
}
-static void setup_kprobe()
+static void setup_kprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog1);
}
-static void setup_kretprobe()
+static void setup_kretprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog2);
}
-static void setup_rawtp()
+static void setup_rawtp(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog3);
}
-static void setup_fentry()
+static void setup_fentry(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog4);
}
-static void setup_fexit()
+static void setup_fexit(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog5);
}
-static void *consumer(void *input)
-{
- return NULL;
-}
-
const struct bench bench_rename_base = {
.name = "rename-base",
.validate = validate,
.setup = setup_base,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -127,7 +121,6 @@ const struct bench bench_rename_kprobe = {
.validate = validate,
.setup = setup_kprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -138,7 +131,6 @@ const struct bench bench_rename_kretprobe = {
.validate = validate,
.setup = setup_kretprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -149,7 +141,6 @@ const struct bench bench_rename_rawtp = {
.validate = validate,
.setup = setup_rawtp,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -160,7 +151,6 @@ const struct bench bench_rename_fentry = {
.validate = validate,
.setup = setup_fentry,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -171,7 +161,6 @@ const struct bench bench_rename_fexit = {
.validate = validate,
.setup = setup_fexit,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index bde6c9d4cbd4..01bdce692799 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,8 @@ static struct {
int ringbuf_sz; /* per-ringbuf, in bytes */
bool ringbuf_use_output; /* use slower output API */
int perfbuf_sz; /* per-CPU size, in pages */
+ bool overwrite;
+ bool bench_producer;
} args = {
.back2back = false,
.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
.ringbuf_sz = 512 * 1024,
.ringbuf_use_output = false,
.perfbuf_sz = 128,
+ .overwrite = false,
+ .bench_producer = false,
};
enum {
@@ -35,6 +39,8 @@ enum {
ARG_RB_BATCH_CNT = 2002,
ARG_RB_SAMPLED = 2003,
ARG_RB_SAMPLE_RATE = 2004,
+ ARG_RB_OVERWRITE = 2005,
+ ARG_RB_BENCH_PRODUCER = 2006,
};
static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
+ { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
{},
};
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case ARG_RB_OVERWRITE:
+ args.overwrite = true;
+ break;
+ case ARG_RB_BENCH_PRODUCER:
+ args.bench_producer = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -88,15 +102,40 @@ const struct argp bench_ringbufs_argp = {
static struct counter buf_hits;
-static inline void bufs_trigger_batch()
+static inline void bufs_trigger_batch(void)
{
(void)syscall(__NR_getpgid);
}
-static void bufs_validate()
+static void bufs_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
+ fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
+ exit(1);
+ }
+
+ if (args.overwrite && !args.bench_producer) {
+ fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && env.consumer_cnt != 0) {
+ fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.back2back) {
+ fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.sampled) {
+ fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (!args.bench_producer && env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
exit(1);
}
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
- res->hits = atomic_swap(&buf_hits.value, 0);
+ if (args.bench_producer)
+ res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+ else
+ res->hits = atomic_swap(&buf_hits.value, 0);
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct ringbuf_bench *ringbuf_setup_skeleton()
+static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
+ __u32 flags;
+ struct bpf_map *ringbuf;
struct ringbuf_bench *skel;
setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton()
skel->rodata->batch_cnt = args.batch_cnt;
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+ skel->rodata->bench_producer = args.bench_producer;
if (args.sampled)
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+ ringbuf = skel->maps.ringbuf;
+ if (args.overwrite) {
+ flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
+ bpf_map__set_map_flags(ringbuf, flags);
+ }
+
+ bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
@@ -167,21 +218,23 @@ static int buf_process_sample(void *ctx, void *data, size_t len)
return 0;
}
-static void ringbuf_libbpf_setup()
+static void ringbuf_libbpf_setup(void)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
struct bpf_link *link;
+ int map_fd;
ctx->skel = ringbuf_setup_skeleton();
- ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
- buf_process_sample, NULL, NULL);
+
+ map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
if (!ctx->ringbuf) {
fprintf(stderr, "failed to create ringbuf\n");
exit(1);
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
@@ -223,7 +276,7 @@ static void ringbuf_custom_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static void ringbuf_custom_setup()
+static void ringbuf_custom_setup(void)
{
struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
const size_t page_size = getpagesize();
@@ -271,7 +324,7 @@ static void ringbuf_custom_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -352,7 +405,7 @@ static void perfbuf_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct perfbuf_bench *perfbuf_setup_skeleton()
+static struct perfbuf_bench *perfbuf_setup_skeleton(void)
{
struct perfbuf_bench *skel;
@@ -390,21 +443,16 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu,
return LIBBPF_PERF_EVENT_CONT;
}
-static void perfbuf_libbpf_setup()
+static void perfbuf_libbpf_setup(void)
{
struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
struct perf_event_attr attr;
- struct perf_buffer_raw_opts pb_opts = {
- .event_cb = perfbuf_process_sample_raw,
- .ctx = (void *)(long)0,
- .attr = &attr,
- };
struct bpf_link *link;
ctx->skel = perfbuf_setup_skeleton();
memset(&attr, 0, sizeof(attr));
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
/* notify only every Nth sample */
@@ -423,14 +471,15 @@ static void perfbuf_libbpf_setup()
}
ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
- args.perfbuf_sz, &pb_opts);
+ args.perfbuf_sz, &attr,
+ perfbuf_process_sample_raw, NULL, NULL);
if (!ctx->perfbuf) {
fprintf(stderr, "failed to create perfbuf\n");
exit(1);
}
link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -522,6 +571,7 @@ static void *perfbuf_custom_consumer(void *input)
const struct bench bench_rb_libbpf = {
.name = "rb-libbpf",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = ringbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
@@ -533,6 +583,7 @@ const struct bench bench_rb_libbpf = {
const struct bench bench_rb_custom = {
.name = "rb-custom",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = ringbuf_custom_setup,
.producer_thread = bufs_sample_producer,
@@ -544,6 +595,7 @@ const struct bench bench_rb_custom = {
const struct bench bench_pb_libbpf = {
.name = "pb-libbpf",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = perfbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
@@ -555,6 +607,7 @@ const struct bench bench_pb_libbpf = {
const struct bench bench_pb_custom = {
.name = "pb-custom",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = perfbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..cfc072aa7fff
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ * ARG_FW_RX_PASS:
+ * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ * ARG_FW_RX_VERDICT_EGRESS:
+ * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ * ARG_FW_RX_VERDICT_INGRESS:
+ * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ * ARG_FW_TX_PASS:
+ * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ * ARG_FW_TX_VERDICT_INGRESS:
+ * send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ * ARG_FW_TX_VERDICT_EGRESS:
+ * send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+ ARG_FW_RX_NORMAL = 11000,
+ ARG_FW_RX_PASS,
+ ARG_FW_RX_VERDICT_EGRESS,
+ ARG_FW_RX_VERDICT_INGRESS,
+ ARG_FW_TX_NORMAL,
+ ARG_FW_TX_PASS,
+ ARG_FW_TX_VERDICT_INGRESS,
+ ARG_FW_TX_VERDICT_EGRESS,
+ ARG_CTL_RX_STRP,
+ ARG_CONSUMER_DELAY_TIME,
+ ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() ( \
+ TXMODE_BPF_PASS() || \
+ TXMODE_BPF_INGRESS() || \
+ TXMODE_BPF_EGRESS())
+
+#define TXMODE() ( \
+ TXMODE_NORMAL() || \
+ TXMODE_BPF())
+
+#define RXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() ( \
+ RXMODE_BPF_VERDICT_INGRESS() || \
+ RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() ( \
+ RXMODE_BPF_PASS() || \
+ RXMODE_BPF_VERDICT())
+
+#define RXMODE() ( \
+ RXMODE_NORMAL() || \
+ RXMODE_BPF())
+
+static struct socmap_ctx {
+ struct bench_sockmap_prog *skel;
+ enum SOCKMAP_ARG_FLAG mode;
+ #define c1 fds[0]
+ #define p1 fds[1]
+ #define c2 fds[2]
+ #define p2 fds[3]
+ #define sfd fds[4]
+ int fds[5];
+ long send_calls;
+ long read_calls;
+ long prod_send;
+ long user_read;
+ int file_size;
+ int delay_consumer;
+ int prod_run_time;
+ int strp_size;
+} ctx = {
+ .prod_send = 0,
+ .user_read = 0,
+ .file_size = FILE_SIZE,
+ .mode = ARG_FW_RX_VERDICT_EGRESS,
+ .fds = {0},
+ .delay_consumer = 0,
+ .prod_run_time = 0,
+ .strp_size = 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+ if (ctx.fds[i] > 0)
+ close(ctx.fds[i]);
+ }
+
+ bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (flags == -1)
+ return false;
+ flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+ return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+ struct sockaddr_storage addr;
+ int err, cfd, pfd;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+ if (err) {
+ fprintf(stderr, "getsockname error %d\n", errno);
+ return err;
+ }
+ cfd = socket(AF_INET, type, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "socket error %d\n", errno);
+ return err;
+ }
+
+ err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+ if (err && errno != EINPROGRESS) {
+ fprintf(stderr, "connect error %d\n", errno);
+ return err;
+ }
+
+ pfd = accept(ctx.sfd, NULL, NULL);
+ if (pfd < 0) {
+ fprintf(stderr, "accept error %d\n", errno);
+ return err;
+ }
+ *c = cfd;
+ *p = pfd;
+ return 0;
+}
+
+static int create_sockets(void)
+{
+ struct sockaddr_storage addr;
+ int err, one = 1;
+ socklen_t addr_len;
+
+ init_addr(&addr, &addr_len);
+ ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (ctx.sfd < 0) {
+ fprintf(stderr, "socket error:%d\n", errno);
+ return ctx.sfd;
+ }
+ err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err) {
+ fprintf(stderr, "setsockopt error:%d\n", errno);
+ return err;
+ }
+
+ err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+ if (err) {
+ fprintf(stderr, "bind error:%d\n", errno);
+ return err;
+ }
+
+ err = listen(ctx.sfd, SOMAXCONN);
+ if (err) {
+ fprintf(stderr, "listen error:%d\n", errno);
+ return err;
+ }
+
+ err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 1 error\n");
+ return err;
+ }
+
+ err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 2 error\n");
+ return err;
+ }
+ printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+ ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+ return 0;
+}
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+ !env.affinity)
+ goto err;
+ return;
+err:
+ fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+ exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+ int verdict, pass, parser, map;
+ int zero = 0, one = 1;
+ int err;
+
+ parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+ verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+ pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+ map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+ if (ctx.strp_size != 0) {
+ ctx.skel->bss->pkt_size = ctx.strp_size;
+ err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return err;
+ }
+
+ if (RXMODE_BPF_VERDICT())
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ else if (RXMODE_BPF_PASS())
+ err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (RXMODE_BPF_PASS())
+ return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ if (err < 0)
+ return err;
+
+ if (RXMODE_BPF_VERDICT_INGRESS()) {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ } else {
+ err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ }
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+ int zero = 0, one = 1;
+ int prog, map;
+ int err;
+
+ map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+ prog = TXMODE_BPF_PASS() ?
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+ err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (TXMODE_BPF_EGRESS()) {
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ } else {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ }
+
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void setup(void)
+{
+ int err;
+
+ ctx.skel = bench_sockmap_prog__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ if (create_sockets()) {
+ fprintf(stderr, "create_net_mode error\n");
+ goto err;
+ }
+
+ if (RXMODE_BPF()) {
+ err = setup_rx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else if (TXMODE_BPF()) {
+ err = setup_tx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else {
+ fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+ goto err;
+ }
+
+ return;
+
+err:
+ bench_sockmap_prog_destroy();
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->drops = atomic_swap(&ctx.prod_send, 0);
+ res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+ res->false_hits = atomic_swap(&ctx.user_read, 0);
+ res->important_hits = atomic_swap(&ctx.send_calls, 0);
+ res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+ for (int i = 0 ; i < rcv; i++) {
+ if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+ fprintf(stderr, "verify data fail");
+ exit(1);
+ }
+ (*check_pos)++;
+ if (*check_pos >= FILE_SIZE)
+ *check_pos = 0;
+ }
+}
+
+static void *consumer(void *input)
+{
+ int rcv, sent;
+ int check_pos = 0;
+ int tid = (long)input;
+ int recv_buf_size = FILE_SIZE;
+ char *buf = malloc(recv_buf_size);
+ int delay_read = ctx.delay_consumer;
+
+ if (!buf) {
+ fprintf(stderr, "fail to init read buffer");
+ return NULL;
+ }
+
+ while (true) {
+ if (tid == 1) {
+ /* consumer 1 is unused for tx test and stream verdict test */
+ if (RXMODE_BPF() || TXMODE())
+ return NULL;
+ /* it's only for RX_NORMAL which service as reserve-proxy mode */
+ rcv = read(ctx.p1, buf, recv_buf_size);
+ if (rcv < 0) {
+ fprintf(stderr, "fail to read p1");
+ return NULL;
+ }
+
+ sent = send(ctx.p2, buf, recv_buf_size, 0);
+ if (sent < 0) {
+ fprintf(stderr, "fail to send p2");
+ return NULL;
+ }
+ } else {
+ if (delay_read != 0) {
+ if (delay_read < 0)
+ return NULL;
+ sleep(delay_read);
+ delay_read = 0;
+ }
+ /* read real endpoint by consumer 0 */
+ atomic_inc(&ctx.read_calls);
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ if (rcv < 0 && errno != EAGAIN) {
+ fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+ return NULL;
+ }
+ verify_data(&check_pos, buf, rcv);
+ atomic_add(&ctx.user_read, rcv);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ int off = 0, fp, need_sent, sent;
+ int file_size = ctx.file_size;
+ struct timespec ts1, ts2;
+ int target;
+ FILE *file;
+
+ file = tmpfile();
+ if (!file) {
+ fprintf(stderr, "create file for sendfile");
+ return NULL;
+ }
+
+ /* we need simple verify */
+ for (int i = 0; i < file_size; i++) {
+ if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+ fprintf(stderr, "init tmpfile error");
+ return NULL;
+ }
+ if (++off >= sizeof(snd_data))
+ off = 0;
+ }
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+
+ fp = fileno(file);
+ need_sent = file_size;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+ if (RXMODE_BPF_VERDICT())
+ target = ctx.c1;
+ else if (TXMODE_BPF_EGRESS())
+ target = ctx.p1;
+ else
+ target = ctx.p2;
+ set_non_block(target, true);
+ while (true) {
+ if (ctx.prod_run_time) {
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+ return NULL;
+ }
+
+ errno = 0;
+ atomic_inc(&ctx.send_calls);
+ sent = sendfile(target, fp, NULL, need_sent);
+ if (sent < 0) {
+ if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+ fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+ sent, errno, strerror(errno));
+ return NULL;
+ }
+ continue;
+ } else if (sent < need_sent) {
+ need_sent -= sent;
+ atomic_add(&ctx.prod_send, sent);
+ continue;
+ }
+ atomic_add(&ctx.prod_send, need_sent);
+ need_sent = file_size;
+ lseek(fp, 0, SEEK_SET);
+ }
+
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+ prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+ speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+ bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+ read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+ "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+ prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double verdict_mbs_mean = 0.0;
+ long verdict_total = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ verdict_total += res[i].hits / 1000000.0;
+ }
+
+ printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+ verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+ { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+ "simple reserve-proxy mode, no bfp enabled"},
+ { "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+ "enable strparser and set the encapsulation size"},
+ { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+ "simple c-s mode, no bfp enabled"},
+ { "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+ "forward msg to ingress queue of another socket"},
+ { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+ "forward msg to egress queue of another socket"},
+ { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+ "delay consumer start"},
+ { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+ "producer duration"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+ ctx.mode = key;
+ break;
+ case ARG_CONSUMER_DELAY_TIME:
+ ctx.delay_consumer = strtol(arg, NULL, 10);
+ break;
+ case ARG_PRODUCER_DURATION:
+ ctx.prod_run_time = strtol(arg, NULL, 10);
+ break;
+ case ARG_CTL_RX_STRP:
+ ctx.strp_size = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_sockmap = {
+ .name = "sockmap",
+ .argp = &bench_sockmap_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
new file mode 100644
index 000000000000..a5e1428fd7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include "bench.h"
+#include "strncmp_bench.skel.h"
+
+static struct strncmp_ctx {
+ struct strncmp_bench *skel;
+} ctx;
+
+static struct strncmp_args {
+ u32 cmp_str_len;
+} args = {
+ .cmp_str_len = 32,
+};
+
+enum {
+ ARG_CMP_STR_LEN = 5000,
+};
+
+static const struct argp_option opts[] = {
+ { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0,
+ "Set the length of compared string" },
+ {},
+};
+
+static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CMP_STR_LEN:
+ args.cmp_str_len = strtoul(arg, NULL, 10);
+ if (!args.cmp_str_len ||
+ args.cmp_str_len >= sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "Invalid cmp str len (limit %zu)\n",
+ sizeof(ctx.skel->bss->str));
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_strncmp_argp = {
+ .options = opts,
+ .parser = strncmp_parse_arg,
+};
+
+static void strncmp_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "strncmp benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_setup(void)
+{
+ int err;
+ char *target;
+ size_t i, sz;
+
+ sz = sizeof(ctx.skel->rodata->target);
+ if (!sz || sz < sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "invalid string size (target %zu, src %zu)\n",
+ sz, sizeof(ctx.skel->bss->str));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = strncmp_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ srandom(time(NULL));
+ target = ctx.skel->rodata->target;
+ for (i = 0; i < sz - 1; i++)
+ target[i] = '1' + random() % 9;
+ target[sz - 1] = '\0';
+
+ ctx.skel->rodata->cmp_str_len = args.cmp_str_len;
+
+ memcpy(ctx.skel->bss->str, target, args.cmp_str_len);
+ ctx.skel->bss->str[args.cmp_str_len] = '\0';
+ /* Make bss->str < rodata->target */
+ ctx.skel->bss->str[args.cmp_str_len - 1] -= 1;
+
+ err = strncmp_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ strncmp_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void strncmp_attach_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_no_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper);
+}
+
+static void strncmp_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_helper);
+}
+
+static void *strncmp_producer(void *ctx)
+{
+ while (true)
+ (void)syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void strncmp_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+const struct bench bench_strncmp_no_helper = {
+ .name = "strncmp-no-helper",
+ .argp = &bench_strncmp_argp,
+ .validate = strncmp_validate,
+ .setup = strncmp_no_helper_setup,
+ .producer_thread = strncmp_producer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_strncmp_helper = {
+ .name = "strncmp-helper",
+ .argp = &bench_strncmp_argp,
+ .validate = strncmp_validate,
+ .setup = strncmp_helper_setup,
+ .producer_thread = strncmp_producer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2a0b6c9885a4..34018fc3927f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -1,58 +1,162 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "bpf_util.h"
#include "bench.h"
#include "trigger_bench.skel.h"
+#include "trace_helpers.h"
+
+#define MAX_TRIG_BATCH_ITERS 1000
+
+static struct {
+ __u32 batch_iters;
+} args = {
+ .batch_iters = 100,
+};
+
+enum {
+ ARG_TRIG_BATCH_ITERS = 7000,
+};
+
+static const struct argp_option opts[] = {
+ { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
+ "Number of in-kernel iterations per one driver test run"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_TRIG_BATCH_ITERS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
+ fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
+ 1, MAX_TRIG_BATCH_ITERS);
+ argp_usage(state);
+ }
+ args.batch_iters = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_trigger_batch_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* adjust slot shift in inc_hits() if changing */
+#define MAX_BUCKETS 256
+
+#pragma GCC diagnostic ignored "-Wattributes"
/* BPF triggering benchmarks */
static struct trigger_ctx {
struct trigger_bench *skel;
+ bool usermode_counters;
+ int driver_prog_fd;
} ctx;
-static struct counter base_hits;
+static struct counter base_hits[MAX_BUCKETS];
+
+static __always_inline void inc_counter(struct counter *counters)
+{
+ static __thread int tid = 0;
+ unsigned slot;
+
+ if (unlikely(tid == 0))
+ tid = sys_gettid();
+
+ /* multiplicative hashing, it's fast */
+ slot = 2654435769U * tid;
+ slot >>= 24;
+
+ atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
+}
-static void trigger_validate()
+static long sum_and_reset_counters(struct counter *counters)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ int i;
+ long sum = 0;
+
+ for (i = 0; i < MAX_BUCKETS; i++)
+ sum += atomic_swap(&counters[i].value, 0);
+ return sum;
+}
+
+static void trigger_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
-static void *trigger_base_producer(void *input)
+static void *trigger_producer(void *input)
{
- while (true) {
- (void)syscall(__NR_getpgid);
- atomic_inc(&base_hits.value);
+ if (ctx.usermode_counters) {
+ while (true) {
+ (void)syscall(__NR_getpgid);
+ inc_counter(base_hits);
+ }
+ } else {
+ while (true)
+ (void)syscall(__NR_getpgid);
}
return NULL;
}
-static void trigger_base_measure(struct bench_res *res)
+static void *trigger_producer_batch(void *input)
{
- res->hits = atomic_swap(&base_hits.value, 0);
-}
+ int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
-static void *trigger_producer(void *input)
-{
while (true)
- (void)syscall(__NR_getpgid);
+ bpf_prog_test_run_opts(fd, NULL);
+
return NULL;
}
static void trigger_measure(struct bench_res *res)
{
- res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ if (ctx.usermode_counters)
+ res->hits = sum_and_reset_counters(base_hits);
+ else
+ res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
- ctx.skel = trigger_bench__open_and_load();
+ ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+
+ /* default "driver" BPF program */
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
+
+ ctx.skel->rodata->batch_iters = args.batch_iters;
+}
+
+static void load_ctx(void)
+{
+ int err;
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
}
static void attach_bpf(struct bpf_program *prog)
@@ -60,125 +164,448 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
-static void trigger_tp_setup()
+static void trigger_syscall_count_setup(void)
{
- setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_tp);
+ ctx.usermode_counters = true;
}
-static void trigger_rawtp_setup()
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
}
-static void trigger_kprobe_setup()
+static void trigger_kprobe_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
-static void trigger_fentry_setup()
+static void trigger_kretprobe_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
+ load_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
+}
+
+static void trigger_kprobe_multi_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
+ load_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
+}
+
+static void trigger_kretprobe_multi_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
+ load_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
+}
+
+static void trigger_fentry_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
-static void trigger_fentry_sleep_setup()
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ /* Some recursive functions will be skipped in
+ * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+ * overhead. However, it's difficut to skip all the recursive
+ * functions for a debug kernel.
+ *
+ * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+ * a debug kernel.
+ */
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ fprintf(stderr, "failed to get ksyms\n");
+ exit(1);
+ }
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
{
+ struct bpf_program *prog, *empty;
+
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
}
-static void trigger_fmodret_setup()
+static void trigger_kretprobe_multi_all_setup(void)
{
+ struct bpf_program *prog, *empty;
+
setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
+static void trigger_fexit_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
+ load_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fexit);
+}
+
+static void trigger_fmodret_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
-static void *trigger_consumer(void *input)
+static void trigger_tp_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
+}
+
+/* make sure call is not inlined and not avoided by compiler, so __weak and
+ * inline asm volatile in the body of the function
+ *
+ * There is a performance difference between uprobing at nop location vs other
+ * instructions. So use two different targets, one of which starts with nop
+ * and another doesn't.
+ *
+ * GCC doesn't generate stack setup preamble for these functions due to them
+ * having no input arguments and doing nothing in the body.
+ */
+__nocf_check __weak void uprobe_target_nop(void)
{
+ asm volatile ("nop");
+}
+
+__weak void opaque_noop_func(void)
+{
+}
+
+__nocf_check __weak int uprobe_target_push(void)
+{
+ /* overhead of function call is negligible compared to uprobe
+ * triggering, so this shouldn't affect benchmark results much
+ */
+ opaque_noop_func();
+ return 1;
+}
+
+__nocf_check __weak void uprobe_target_ret(void)
+{
+ asm volatile ("");
+}
+
+static void *uprobe_producer_count(void *input)
+{
+ while (true) {
+ uprobe_target_nop();
+ inc_counter(base_hits);
+ }
return NULL;
}
-const struct bench bench_trig_base = {
- .name = "trig-base",
- .validate = trigger_validate,
- .producer_thread = trigger_base_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_base_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+static void *uprobe_producer_nop(void *input)
+{
+ while (true)
+ uprobe_target_nop();
+ return NULL;
+}
-const struct bench bench_trig_tp = {
- .name = "trig-tp",
- .validate = trigger_validate,
- .setup = trigger_tp_setup,
- .producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+static void *uprobe_producer_push(void *input)
+{
+ while (true)
+ uprobe_target_push();
+ return NULL;
+}
-const struct bench bench_trig_rawtp = {
- .name = "trig-rawtp",
- .validate = trigger_validate,
- .setup = trigger_rawtp_setup,
- .producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+static void *uprobe_producer_ret(void *input)
+{
+ while (true)
+ uprobe_target_ret();
+ return NULL;
+}
-const struct bench bench_trig_kprobe = {
- .name = "trig-kprobe",
- .validate = trigger_validate,
- .setup = trigger_kprobe_setup,
- .producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+#ifdef __x86_64__
+__nocf_check __weak void uprobe_target_nop5(void)
+{
+ asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+}
-const struct bench bench_trig_fentry = {
- .name = "trig-fentry",
- .validate = trigger_validate,
- .setup = trigger_fentry_setup,
- .producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+static void *uprobe_producer_nop5(void *input)
+{
+ while (true)
+ uprobe_target_nop5();
+ return NULL;
+}
+#endif
-const struct bench bench_trig_fentry_sleep = {
- .name = "trig-fentry-sleep",
- .validate = trigger_validate,
- .setup = trigger_fentry_sleep_setup,
- .producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
+{
+ size_t uprobe_offset;
+ struct bpf_link *link;
+ int err;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ if (use_multi)
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
+ else
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
-const struct bench bench_trig_fmodret = {
- .name = "trig-fmodret",
+ uprobe_offset = get_uprobe_offset(target_addr);
+ if (use_multi) {
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .retprobe = use_retprobe,
+ .cnt = 1,
+ .offsets = &uprobe_offset,
+ );
+ link = bpf_program__attach_uprobe_multi(
+ ctx.skel->progs.bench_trigger_uprobe_multi,
+ -1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
+ ctx.skel->links.bench_trigger_uprobe_multi = link;
+ } else {
+ link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
+ use_retprobe,
+ -1 /* all PIDs */,
+ "/proc/self/exe",
+ uprobe_offset);
+ ctx.skel->links.bench_trigger_uprobe = link;
+ }
+ if (!link) {
+ fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
+ exit(1);
+ }
+}
+
+static void usermode_count_setup(void)
+{
+ ctx.usermode_counters = true;
+}
+
+static void uprobe_nop_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_nop);
+}
+
+static void uretprobe_nop_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_nop);
+}
+
+static void uprobe_push_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_push);
+}
+
+static void uretprobe_push_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_push);
+}
+
+static void uprobe_ret_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_ret);
+}
+
+static void uretprobe_ret_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_ret);
+}
+
+static void uprobe_multi_nop_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_nop);
+}
+
+static void uretprobe_multi_nop_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_nop);
+}
+
+static void uprobe_multi_push_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_push);
+}
+
+static void uretprobe_multi_push_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_push);
+}
+
+static void uprobe_multi_ret_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_ret);
+}
+
+static void uretprobe_multi_ret_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_ret);
+}
+
+#ifdef __x86_64__
+static void uprobe_nop5_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_nop5_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uprobe_multi_nop5_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_multi_nop5_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_nop5);
+}
+#endif
+
+const struct bench bench_trig_syscall_count = {
+ .name = "trig-syscall-count",
.validate = trigger_validate,
- .setup = trigger_fmodret_setup,
+ .setup = trigger_syscall_count_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
+
+/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
+#define BENCH_TRIG_KERNEL(KIND, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .setup = trigger_##KIND##_setup, \
+ .producer_thread = trigger_producer_batch, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+ .argp = &bench_trigger_batch_argp, \
+}
+
+BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
+BENCH_TRIG_KERNEL(kprobe, "kprobe");
+BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
+BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
+BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
+BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
+BENCH_TRIG_KERNEL(fexit, "fexit");
+BENCH_TRIG_KERNEL(fmodret, "fmodret");
+BENCH_TRIG_KERNEL(tp, "tp");
+BENCH_TRIG_KERNEL(rawtp, "rawtp");
+
+/* uprobe benchmarks */
+#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .validate = trigger_validate, \
+ .setup = KIND##_setup, \
+ .producer_thread = uprobe_producer_##PRODUCER, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+}
+
+BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
+BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
+BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
+BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
+BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
+BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
+BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
+BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
+BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
+BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
+BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
+BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
+#ifdef __x86_64__
+BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
+BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
+BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
new file mode 100755
index 000000000000..8ffd385ab2f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Bloom filter map"
+for v in 2 4 8 16 40; do
+for t in 1 4 8 12 16; do
+for h in {1..10}; do
+subtitle "value_size: $v bytes, # threads: $t, # hashes: $h"
+ for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+ printf "%'d entries -\n" $e
+ printf "\t"
+ summarize "Lookups, total operations: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)"
+ printf "\t"
+ summarize "Updates, total operations: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)"
+ printf "\t"
+ summarize_percentage "False positive rate: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)"
+ done
+ printf "\n"
+done
+done
+done
+
+header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)"
+for v in 2 4 8 16 40; do
+for h in {1..10}; do
+subtitle "value_size: $v, # hashes: $h"
+ for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+ printf "%'d entries -\n" $e
+ printf "\t"
+ summarize_total "Hashmap without bloom filter: " \
+ "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)"
+ printf "\t"
+ summarize_total "Hashmap with bloom filter: " \
+ "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)"
+ done
+ printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
new file mode 100755
index 000000000000..cd2efd3fdef3
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update)
+printf "$summary"
+printf "\n"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
new file mode 100755
index 000000000000..d4f5f73b356b
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for t in 1 4 8 12 16; do
+for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do
+subtitle "nr_loops: $i, nr_threads: $t"
+ summarize_ops "bpf_loop: " \
+ "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)"
+ printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
new file mode 100755
index 000000000000..9ff5832463a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+htab_mem()
+{
+ echo -n "per-prod-op: "
+ echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/"
+ echo -n -e ", avg mem: "
+ echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/"
+ echo -n ", peak mem: "
+ echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/"
+}
+
+summarize_htab_mem()
+{
+ local bench="$1"
+ local summary=$(echo $2 | tail -n1)
+
+ printf "%-20s %s\n" "$bench" "$(htab_mem $summary)"
+}
+
+htab_mem_bench()
+{
+ local name
+
+ for name in overwrite batch_add_batch_del add_del_on_diff_cpu
+ do
+ summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")"
+ done
+}
+
+header "preallocated"
+htab_mem_bench "--preallocated"
+
+header "normal bpf ma"
+htab_mem_bench
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755
index 000000000000..2eb2b513a173
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+ summarize_local_storage "hashmap (control) sequential get: "\
+ "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+ printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+ summarize_local_storage "local_storage cache sequential get: "\
+ "$(./bench --nr_maps $i local-storage-cache-seq-get)"
+ summarize_local_storage "local_storage cache interleaved get: "\
+ "$(./bench --nr_maps $i local-storage-cache-int-get)"
+ printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755
index 000000000000..3e8a969f2096
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+ echo "error: Couldn't find rcu_tasks_trace_kthread"
+ exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
index 16f774b1cdbe..7b281dbe4165 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in base kprobe kretprobe rawtp fentry fexit fmodret
+for i in base kprobe kretprobe rawtp fentry fexit
do
summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-10s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index af4aa04caba6..83e05e837871 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -1,75 +1,55 @@
#!/bin/bash
-set -eufo pipefail
-
-RUN_BENCH="sudo ./bench -w3 -d10 -a"
-
-function hits()
-{
- echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
-
-function drops()
-{
- echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
+source ./benchs/run_common.sh
-function header()
-{
- local len=${#1}
-
- printf "\n%s\n" "$1"
- for i in $(seq 1 $len); do printf '='; done
- printf '\n'
-}
+set -eufo pipefail
-function summarize()
-{
- bench="$1"
- summary=$(echo $2 | tail -n1)
- printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
-}
+RUN_RB_BENCH="$RUN_BENCH -c1"
header "Single-producer, parallel producer"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH $b)"
+ summarize $b "$($RUN_RB_BENCH $b)"
done
header "Single-producer, parallel producer, sampled notification"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-sampled $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-sampled $b)"
done
header "Single-producer, back-to-back mode"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-b2b $b)"
- summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)"
done
header "Ringbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+ summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
done
header "Perfbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+ summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
done
header "Ringbuf back-to-back, reserve+commit vs output"
-summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
-summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)"
header "Ringbuf sampled, reserve+commit vs output"
-summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
-summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)"
header "Single-producer, consumer/producer competing on the same CPU, low batch count"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
done
header "Ringbuf, multi-producer contention"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
- summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+ summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
new file mode 100755
index 000000000000..142697284b45
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for s in 1 8 64 512 2048 4095; do
+ for b in no-helper helper; do
+ summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})"
+ done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index 78e83f243294..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -2,8 +2,22 @@
set -eufo pipefail
-for i in base tp rawtp kprobe fentry fmodret
-do
- summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
- printf "%-10s: %s\n" $i "$summary"
+def_tests=( \
+ usermode-count kernel-count syscall-count \
+ fentry fexit fmodret \
+ rawtp tp \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
+)
+
+tests=("$@")
+if [ ${#tests[@]} -eq 0 ]; then
+ tests=("${def_tests[@]}")
+fi
+
+p=${PROD_CNT:-1}
+
+for t in "${tests[@]}"; do
+ summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $t "$summary"
done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
new file mode 100755
index 000000000000..03f55405484b
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
new file mode 100644
index 000000000000..d9f40af82006
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function header()
+{
+ local len=${#1}
+
+ printf "\n%s\n" "$1"
+ for i in $(seq 1 $len); do printf '='; done
+ printf '\n'
+}
+
+function subtitle()
+{
+ local len=${#1}
+ printf "\t%s\n" "$1"
+}
+
+function hits()
+{
+ echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+ echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function percentage()
+{
+ echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
+}
+
+function ops()
+{
+ echo -n "throughput: "
+ echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", latency: "
+ echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+}
+
+function local_storage()
+{
+ echo -n "hits throughput: "
+ echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", hits latency: "
+ echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+ echo -n ", important_hits throughput: "
+ echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
+function total()
+{
+ echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function summarize()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+function summarize_percentage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
+}
+
+function summarize_ops()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(ops $summary)"
+}
+
+function summarize_local_storage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
+function summarize_total()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(total $summary)"
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h
new file mode 100644
index 000000000000..c27678299e0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+#ifndef __round_mask
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#endif
+#ifndef round_up
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#endif
+
+#ifdef __BPF__
+#define NR_CPUS (sizeof(struct cpumask) * 8)
+
+static void __arena * __arena page_frag_cur_page[NR_CPUS];
+static int __arena page_frag_cur_offset[NR_CPUS];
+
+/* Simple page_frag allocator */
+static inline void __arena* bpf_alloc(unsigned int size)
+{
+ __u64 __arena *obj_cnt;
+ __u32 cpu = bpf_get_smp_processor_id();
+ void __arena *page = page_frag_cur_page[cpu];
+ int __arena *cur_offset = &page_frag_cur_offset[cpu];
+ int offset;
+
+ size = round_up(size, 8);
+ if (size >= PAGE_SIZE - 8)
+ return NULL;
+ if (!page) {
+refill:
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return NULL;
+ cast_kern(page);
+ page_frag_cur_page[cpu] = page;
+ *cur_offset = PAGE_SIZE - 8;
+ obj_cnt = page + PAGE_SIZE - 8;
+ *obj_cnt = 0;
+ } else {
+ cast_kern(page);
+ obj_cnt = page + PAGE_SIZE - 8;
+ }
+
+ offset = *cur_offset - size;
+ if (offset < 0)
+ goto refill;
+
+ (*obj_cnt)++;
+ *cur_offset = offset;
+ return page + offset;
+}
+
+static inline void bpf_free(void __arena *addr)
+{
+ __u64 __arena *obj_cnt;
+
+ addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1));
+ obj_cnt = addr + PAGE_SIZE - 8;
+ if (--(*obj_cnt) == 0)
+ bpf_arena_free_pages(&arena, addr, 1);
+}
+#else
+static inline void __arena* bpf_alloc(unsigned int size) { return NULL; }
+static inline void bpf_free(void __arena *addr) {}
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
new file mode 100644
index 000000000000..16f8ce832004
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+#ifdef __BPF__ /* when compiled as bpf program */
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+#define __arena
+#define __arena_global SEC(".addr_space.1")
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
+
+#else /* when compiled as user space code */
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+__weak char arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h
new file mode 100644
index 000000000000..acc01a876668
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_htab.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include <errno.h>
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct htab_bucket {
+ struct arena_list_head head;
+};
+typedef struct htab_bucket __arena htab_bucket_t;
+
+struct htab {
+ htab_bucket_t *buckets;
+ int n_buckets;
+};
+typedef struct htab __arena htab_t;
+
+static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash)
+{
+ htab_bucket_t *b = htab->buckets;
+
+ cast_kern(b);
+ return &b[hash & (htab->n_buckets - 1)];
+}
+
+static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash)
+{
+ return &__select_bucket(htab, hash)->head;
+}
+
+struct hashtab_elem {
+ int hash;
+ int key;
+ int value;
+ struct arena_list_node hash_node;
+};
+typedef struct hashtab_elem __arena hashtab_elem_t;
+
+static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key)
+{
+ hashtab_elem_t *l;
+
+ list_for_each_entry(l, head, hash_node)
+ if (l->hash == hash && l->key == key)
+ return l;
+
+ return NULL;
+}
+
+static int htab_hash(int key)
+{
+ return key;
+}
+
+__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key)
+{
+ hashtab_elem_t *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+ if (l_old)
+ return l_old->value;
+ return 0;
+}
+
+__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value)
+{
+ hashtab_elem_t *l_new = NULL, *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+
+ l_new = bpf_alloc(sizeof(*l_new));
+ if (!l_new)
+ return -ENOMEM;
+ l_new->key = key;
+ l_new->hash = htab_hash(key);
+ l_new->value = value;
+
+ list_add_head(&l_new->hash_node, head);
+ if (l_old) {
+ list_del(&l_old->hash_node);
+ bpf_free(l_old);
+ }
+ return 0;
+}
+
+void htab_init(htab_t *htab)
+{
+ void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+
+ cast_user(buckets);
+ htab->buckets = buckets;
+ htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket);
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
new file mode 100644
index 000000000000..e16fa7d95fcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+struct arena_list_node;
+
+typedef struct arena_list_node __arena arena_list_node_t;
+
+struct arena_list_node {
+ arena_list_node_t *next;
+ arena_list_node_t * __arena *pprev;
+};
+
+struct arena_list_head {
+ struct arena_list_node __arena *first;
+};
+typedef struct arena_list_head __arena arena_list_head_t;
+
+#define list_entry(ptr, type, member) arena_container_of(ptr, type, member)
+
+#define list_entry_safe(ptr, type, member) \
+ ({ typeof(*ptr) * ___ptr = (ptr); \
+ ___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \
+ })
+
+#ifndef __BPF__
+static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; }
+static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
+static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
+#define cond_break ({})
+#define can_loop true
+#endif
+
+/* Safely walk link list elements. Deletion of elements is allowed. */
+#define list_for_each_entry(pos, head, member) \
+ for (void * ___tmp = (pos = list_entry_safe((head)->first, \
+ typeof(*(pos)), member), \
+ (void *)0); \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }) && can_loop; \
+ pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
+
+static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
+{
+ arena_list_node_t *first = h->first, * __arena *tmp;
+
+ cast_user(first);
+ cast_kern(n);
+ WRITE_ONCE(n->next, first);
+ cast_kern(first);
+ if (first) {
+ tmp = &n->next;
+ cast_user(tmp);
+ WRITE_ONCE(first->pprev, tmp);
+ }
+ cast_user(n);
+ WRITE_ONCE(h->first, n);
+
+ tmp = &h->first;
+ cast_user(tmp);
+ cast_kern(n);
+ WRITE_ONCE(n->pprev, tmp);
+}
+
+static inline void __list_del(arena_list_node_t *n)
+{
+ arena_list_node_t *next = n->next;
+ arena_list_node_t * __arena *pprev = n->pprev;
+
+ cast_user(next);
+ cast_kern(pprev);
+ WRITE_ONCE(*pprev, next);
+ if (next) {
+ cast_user(pprev);
+ cast_kern(next);
+ WRITE_ONCE(next->pprev, pprev);
+ }
+}
+
+#define POISON_POINTER_DELTA 0
+
+#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA)
+
+static inline void list_del(arena_list_node_t *n)
+{
+ __list_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
new file mode 100644
index 000000000000..c1b6eaa905bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
+{
+ const char __arena *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ cond_break;
+ return sc - s;
+}
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const __arena *back_pat = NULL, *back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const __arena *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ cond_break;
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ __attribute__((__fallthrough__));
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ cond_break;
+ }
+ return false;
+}
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
new file mode 100644
index 000000000000..c550e5711967
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ATOMIC_H
+#define BPF_ATOMIC_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+extern bool CONFIG_X86_64 __kconfig __weak;
+
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ * non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+ unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x) \
+ typeof(_Generic((x), \
+ char: (char)0, \
+ __scalar_type_to_expr_cases(char), \
+ __scalar_type_to_expr_cases(short), \
+ __scalar_type_to_expr_cases(int), \
+ __scalar_type_to_expr_cases(long), \
+ __scalar_type_to_expr_cases(long long), \
+ default: (typeof(x))0))
+
+/* No-op for BPF */
+#define cpu_relax() ({})
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val))
+
+#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new)
+
+#define try_cmpxchg(p, pold, new) \
+ ({ \
+ __unqual_typeof(*(pold)) __o = *(pold); \
+ __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \
+ if (__r != __o) \
+ *(pold) = __r; \
+ __r == __o; \
+ })
+
+#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define smp_mb() \
+ ({ \
+ volatile unsigned long __val; \
+ __sync_fetch_and_add(&__val, 0); \
+ })
+
+#define smp_rmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+#define smp_wmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */
+#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); })
+
+#define smp_load_acquire(p) \
+ ({ \
+ __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ __v; \
+ })
+
+#define smp_store_release(p, val) \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ WRITE_ONCE(*(p), val); \
+ })
+
+#define smp_cond_load_relaxed_label(p, cond_expr, label) \
+ ({ \
+ typeof(p) __ptr = (p); \
+ __unqual_typeof(*(p)) VAL; \
+ for (;;) { \
+ VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \
+ if (cond_expr) \
+ break; \
+ cond_break_label(label); \
+ cpu_relax(); \
+ } \
+ (typeof(*(p)))VAL; \
+ })
+
+#define smp_cond_load_acquire_label(p, cond_expr, label) \
+ ({ \
+ __unqual_typeof(*p) __val = \
+ smp_cond_load_relaxed_label(p, cond_expr, label); \
+ smp_acquire__after_ctrl_dep(); \
+ (typeof(*(p)))__val; \
+ })
+
+#define atomic_read(p) READ_ONCE((p)->counter)
+
+#define atomic_cond_read_relaxed_label(p, cond_expr, label) \
+ smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label)
+
+#define atomic_cond_read_acquire_label(p, cond_expr, label) \
+ smp_cond_load_acquire_label(&(p)->counter, cond_expr, label)
+
+#define atomic_try_cmpxchg_relaxed(p, pold, new) \
+ try_cmpxchg_relaxed(&(p)->counter, pold, new)
+
+#define atomic_try_cmpxchg_acquire(p, pold, new) \
+ try_cmpxchg_acquire(&(p)->counter, pold, new)
+
+#endif /* BPF_ATOMIC_H */
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
new file mode 100644
index 000000000000..2cd9165c7348
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -0,0 +1,656 @@
+#ifndef __BPF_EXPERIMENTAL__
+#define __BPF_EXPERIMENTAL__
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
+
+/* Description
+ * Allocates an object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * A pointer to an object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_new_impl */
+#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * Void.
+ */
+extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Increment the refcount on a refcounted local kptr, turning the
+ * non-owning reference input into an owning reference in the process.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * An owning reference to the object pointed to by 'kptr'
+ */
+extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_refcount_acquire_impl */
+#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
+
+/* Description
+ * Add a new entry to the beginning of the BPF linked list.
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a list
+ */
+extern int bpf_list_push_front_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_list_push_front_impl */
+#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0)
+
+/* Description
+ * Add a new entry to the end of the BPF linked list.
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a list
+ */
+extern int bpf_list_push_back_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_list_push_back_impl */
+#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0)
+
+/* Description
+ * Remove the entry at the beginning of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
+
+/* Description
+ * Remove the entry at the end of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
+
+/* Description
+ * Remove 'node' from rbtree with root 'root'
+ * Returns
+ * Pointer to the removed node, or NULL if 'root' didn't contain 'node'
+ */
+extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node) __ksym;
+
+/* Description
+ * Add 'node' to rbtree with root 'root' using comparator 'less'
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a tree
+ */
+extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_rbtree_add_impl */
+#define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0)
+
+/* Description
+ * Return the first (leftmost) node in input tree
+ * Returns
+ * Pointer to the node, which is _not_ removed from the tree. If the tree
+ * contains no nodes, returns NULL.
+ */
+extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+
+/* Description
+ * Allocates a percpu object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * A pointer to a percpu object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_percpu_obj_new_impl */
+#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated percpu object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * Void.
+ */
+extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+struct bpf_iter_task_vma;
+
+extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+ struct task_struct *task,
+ __u64 addr) __ksym;
+extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
+extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Throw a BPF exception from the program, immediately terminating its
+ * execution and unwinding the stack. The supplied 'cookie' parameter
+ * will be the return value of the program when an exception is thrown,
+ * and the default exception callback is used. Otherwise, if an exception
+ * callback is set using the '__exception_cb(callback)' declaration tag
+ * on the main program, the 'cookie' parameter will be the callback's only
+ * input argument.
+ *
+ * Thus, in case of default exception callback, 'cookie' is subjected to
+ * constraints on the program's return value (as with R0 on exit).
+ * Otherwise, the return value of the marked exception callback will be
+ * subjected to the same checks.
+ *
+ * Note that throwing an exception with lingering resources (locks,
+ * references, etc.) will lead to a verification error.
+ *
+ * Note that callbacks *cannot* call this helper.
+ * Returns
+ * Never.
+ * Throws
+ * An exception with the specified 'cookie' value.
+ */
+extern void bpf_throw(u64 cookie) __ksym;
+
+/* Description
+ * Acquire a reference on the exe_file member field belonging to the
+ * mm_struct that is nested within the supplied task_struct. The supplied
+ * task_struct must be trusted/referenced.
+ * Returns
+ * A referenced file pointer pointing to the exe_file member field of the
+ * mm_struct nested in the supplied task_struct, or NULL.
+ */
+extern struct file *bpf_get_task_exe_file(struct task_struct *task) __ksym;
+
+/* Description
+ * Release a reference on the supplied file. The supplied file must be
+ * acquired.
+ */
+extern void bpf_put_file(struct file *file) __ksym;
+
+/* Description
+ * Resolve a pathname for the supplied path and store it in the supplied
+ * buffer. The supplied path must be trusted/referenced.
+ * Returns
+ * A positive integer corresponding to the length of the resolved pathname,
+ * including the NULL termination character, stored in the supplied
+ * buffer. On error, a negative integer is returned.
+ */
+extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
+
+/* This macro must be used to mark the exception callback corresponding to the
+ * main program. For example:
+ *
+ * int exception_cb(u64 cookie) {
+ * return cookie;
+ * }
+ *
+ * SEC("tc")
+ * __exception_cb(exception_cb)
+ * int main_prog(struct __sk_buff *ctx) {
+ * ...
+ * return TC_ACT_OK;
+ * }
+ *
+ * Here, exception callback for the main program will be 'exception_cb'. Note
+ * that this attribute can only be used once, and multiple exception callbacks
+ * specified for the main program will lead to verification error.
+ */
+#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
+
+#define __bpf_assert_signed(x) _Generic((x), \
+ unsigned long: 0, \
+ unsigned long long: 0, \
+ signed long: 1, \
+ signed long long: 1 \
+)
+
+#define __bpf_assert_check(LHS, op, RHS) \
+ _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+ _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \
+ _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \
+ _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
+
+#define __bpf_assert(LHS, op, cons, RHS, VAL) \
+ ({ \
+ (void)bpf_throw; \
+ asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \
+ : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \
+ })
+
+#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \
+ ({ \
+ __bpf_assert_check(LHS, op, RHS); \
+ if (__bpf_assert_signed(LHS) && !(supp_sign)) \
+ __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \
+ else \
+ __bpf_assert(LHS, #op, cons, RHS, VAL); \
+ })
+
+#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \
+ ({ \
+ if (sizeof(typeof(RHS)) == 8) { \
+ const typeof(RHS) rhs_var = (RHS); \
+ __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \
+ } else { \
+ __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \
+ } \
+ })
+
+#define __cmp_cannot_be_signed(x) \
+ __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
+ __builtin_strcmp(#x, "&") == 0
+
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \
+ ({ \
+ __label__ l_true; \
+ bool ret = DEFAULT; \
+ asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \
+ :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
+ ret = !DEFAULT; \
+l_true: \
+ ret; \
+ })
+
+/* C type conversions coupled with comparison operator are tricky.
+ * Make sure BPF program is compiled with -Wsign-compare then
+ * __lhs OP __rhs below will catch the mistake.
+ * Be aware that we check only __lhs to figure out the sign of compare.
+ */
+#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \
+ ({ \
+ typeof(LHS) __lhs = (LHS); \
+ typeof(RHS) __rhs = (RHS); \
+ bool ret; \
+ _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+ (void)(__lhs OP __rhs); \
+ if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \
+ if (sizeof(__rhs) == 8) \
+ /* "i" will truncate 64-bit constant into s32, \
+ * so we have to use extra register via "r". \
+ */ \
+ ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \
+ else \
+ ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \
+ } else { \
+ if (sizeof(__rhs) == 8) \
+ ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \
+ else \
+ ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \
+ } \
+ ret; \
+ })
+
+#ifndef bpf_cmp_unlikely
+#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
+#endif
+
+#ifndef bpf_cmp_likely
+#define bpf_cmp_likely(LHS, OP, RHS) \
+ ({ \
+ bool ret = 0; \
+ if (__builtin_strcmp(#OP, "==") == 0) \
+ ret = _bpf_cmp(LHS, !=, RHS, false); \
+ else if (__builtin_strcmp(#OP, "!=") == 0) \
+ ret = _bpf_cmp(LHS, ==, RHS, false); \
+ else if (__builtin_strcmp(#OP, "<=") == 0) \
+ ret = _bpf_cmp(LHS, >, RHS, false); \
+ else if (__builtin_strcmp(#OP, "<") == 0) \
+ ret = _bpf_cmp(LHS, >=, RHS, false); \
+ else if (__builtin_strcmp(#OP, ">") == 0) \
+ ret = _bpf_cmp(LHS, <=, RHS, false); \
+ else if (__builtin_strcmp(#OP, ">=") == 0) \
+ ret = _bpf_cmp(LHS, <, RHS, false); \
+ else \
+ asm volatile("r0 " #OP " invalid compare"); \
+ ret; \
+ })
+#endif
+
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#endif
+#endif
+
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
+#ifndef bpf_nop_mov
+#define bpf_nop_mov(var) \
+ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
+#endif
+
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
+
+typedef struct {
+} __bpf_preempt_t;
+
+static inline __bpf_preempt_t __bpf_preempt_constructor(void)
+{
+ __bpf_preempt_t ret = {};
+
+ bpf_preempt_disable();
+ return ret;
+}
+static inline void __bpf_preempt_destructor(__bpf_preempt_t *t)
+{
+ bpf_preempt_enable();
+}
+#define bpf_guard_preempt() \
+ __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \
+ __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \
+ __bpf_preempt_constructor()
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert(cond) if (!(cond)) bpf_throw(0);
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_range(LHS, BEG, END) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, 0, false); \
+ __bpf_assert_op(LHS, <=, END, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_range_with(LHS, BEG, END, value) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, value, false); \
+ __bpf_assert_op(LHS, <=, END, value, false); \
+ })
+
+struct bpf_iter_css_task;
+struct cgroup_subsys_state;
+extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
+ struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
+extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
+
+struct bpf_iter_task;
+extern int bpf_iter_task_new(struct bpf_iter_task *it,
+ struct task_struct *task, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
+extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
+
+struct bpf_iter_css;
+extern int bpf_iter_css_new(struct bpf_iter_css *it,
+ struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
+extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
+extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+
+extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
+extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags__k, void *aux__ign) __ksym;
+#define bpf_wq_set_callback(timer, cb, flags) \
+ bpf_wq_set_callback_impl(timer, cb, flags, NULL)
+
+struct bpf_iter_kmem_cache;
+extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
+extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
+extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
+extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
+ struct bpf_dynptr *value_p) __weak __ksym;
+
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define HARDIRQ_BITS 4
+#define NMI_BITS 4
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+ int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+ return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+ return 0;
+}
+
+/* Description
+ * Report whether it is in interrupt context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
new file mode 100644
index 000000000000..e0189254bb6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -0,0 +1,98 @@
+#ifndef __BPF_KFUNCS__
+#define __BPF_KFUNCS__
+
+struct bpf_sock_addr_kern;
+
+/* Description
+ * Initializes an skb-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
+/* Description
+ * Initializes an xdp-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
+/* Description
+ * Obtain a read-only pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
+ void *buffer, __u64 buffer__szk) __ksym __weak;
+
+/* Description
+ * Obtain a read-write pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
+ __u64 buffer__szk) __ksym __weak;
+
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
+
+/* Description
+ * Modify the address of a AF_UNIX sockaddr.
+ * Returns
+ * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified.
+ */
+extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+ const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+
+/* Description
+ * Allocate and configure a reqsk and link it with a listener and skb.
+ * Returns
+ * Error code
+ */
+struct sock;
+struct bpf_tcp_req_attrs;
+extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
+
+extern int bpf_get_file_xattr(struct file *file, const char *name,
+ struct bpf_dynptr *value_ptr) __ksym;
+extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
+
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+ struct bpf_dynptr *sig_ptr,
+ struct bpf_key *trusted_keyring) __ksym;
+
+extern bool bpf_session_is_return(void) __ksym __weak;
+extern __u64 *bpf_session_cookie(void) __ksym __weak;
+
+struct dentry;
+/* Description
+ * Returns xattr of a dentry
+ * Returns
+ * Error code
+ */
+extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
+ struct bpf_dynptr *value_ptr) __ksym __weak;
+
+extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags) __ksym __weak;
+extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 719ab56cdb5d..bc4555a003a7 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,24 +2,22 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
- struct ____btf_map_##name { \
- type_key key; \
- type_val value; \
- }; \
- struct ____btf_map_##name \
- __attribute__ ((section(".maps." #name), used)) \
- ____btf_map_##name = { }
-
+#if __GNUC__ && !__clang__
+/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We
+ * provide the "standard" names as synonyms of the corresponding GCC
+ * builtins. Note how the SKB argument is ignored.
+ */
+#define load_byte(skb, off) __builtin_bpf_load_byte(off)
+#define load_half(skb, off) __builtin_bpf_load_half(off)
+#define load_word(skb, off) __builtin_bpf_load_word(off)
+#else
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
*/
-unsigned long long load_byte(void *skb,
- unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
- unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
- unsigned long long off) asm("llvm.bpf.load.word");
+unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word");
+#endif
#endif
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
deleted file mode 100644
index 9dac9b30f8ef..000000000000
--- a/tools/testing/selftests/bpf/bpf_rlimit.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <sys/resource.h>
-#include <stdio.h>
-
-static __attribute__((constructor)) void bpf_rlimit_ctor(void)
-{
- struct rlimit rlim_old, rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- getrlimit(RLIMIT_MEMLOCK, &rlim_old);
- /* For the sake of running the test cases, we temporarily
- * set rlimit to infinity in order for kernel to focus on
- * errors from actual test cases and not getting noise
- * from hitting memlock limits. The limit is on per-process
- * basis and not a global one, hence destructor not really
- * needed here.
- */
- if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
- perror("Unable to lift memlock rlimit");
- /* Trying out lower limit, but expect potential test
- * case failures from this!
- */
- rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
- rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
- setrlimit(RLIMIT_MEMLOCK, &rlim_new);
- }
-}
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
deleted file mode 100644
index 91f0fac632f4..000000000000
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_TCP_HELPERS_H
-#define __BPF_TCP_HELPERS_H
-
-#include <stdbool.h>
-#include <linux/types.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_tracing.h>
-
-#define BPF_STRUCT_OPS(name, args...) \
-SEC("struct_ops/"#name) \
-BPF_PROG(name, args)
-
-#define tcp_jiffies32 ((__u32)bpf_jiffies64())
-
-struct sock_common {
- unsigned char skc_state;
- __u16 skc_num;
-} __attribute__((preserve_access_index));
-
-enum sk_pacing {
- SK_PACING_NONE = 0,
- SK_PACING_NEEDED = 1,
- SK_PACING_FQ = 2,
-};
-
-struct sock {
- struct sock_common __sk_common;
- unsigned long sk_pacing_rate;
- __u32 sk_pacing_status; /* see enum sk_pacing */
-} __attribute__((preserve_access_index));
-
-struct inet_sock {
- struct sock sk;
-} __attribute__((preserve_access_index));
-
-struct inet_connection_sock {
- struct inet_sock icsk_inet;
- __u8 icsk_ca_state:6,
- icsk_ca_setsockopt:1,
- icsk_ca_dst_locked:1;
- struct {
- __u8 pending;
- } icsk_ack;
- __u64 icsk_ca_priv[104 / sizeof(__u64)];
-} __attribute__((preserve_access_index));
-
-struct request_sock {
- struct sock_common __req_common;
-} __attribute__((preserve_access_index));
-
-struct tcp_sock {
- struct inet_connection_sock inet_conn;
-
- __u32 rcv_nxt;
- __u32 snd_nxt;
- __u32 snd_una;
- __u32 window_clamp;
- __u8 ecn_flags;
- __u32 delivered;
- __u32 delivered_ce;
- __u32 snd_cwnd;
- __u32 snd_cwnd_cnt;
- __u32 snd_cwnd_clamp;
- __u32 snd_ssthresh;
- __u8 syn_data:1, /* SYN includes data */
- syn_fastopen:1, /* SYN includes Fast Open option */
- syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
- syn_fastopen_ch:1, /* Active TFO re-enabling probe */
- syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
- save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
- __u32 max_packets_out;
- __u32 lsndtime;
- __u32 prior_cwnd;
- __u64 tcp_mstamp; /* most recent packet received/sent */
-} __attribute__((preserve_access_index));
-
-static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
-{
- return (struct inet_connection_sock *)sk;
-}
-
-static __always_inline void *inet_csk_ca(const struct sock *sk)
-{
- return (void *)inet_csk(sk)->icsk_ca_priv;
-}
-
-static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-static __always_inline bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) before(seq1, seq2)
-
-#define TCP_ECN_OK 1
-#define TCP_ECN_QUEUE_CWR 2
-#define TCP_ECN_DEMAND_CWR 4
-#define TCP_ECN_SEEN 8
-
-enum inet_csk_ack_state_t {
- ICSK_ACK_SCHED = 1,
- ICSK_ACK_TIMER = 2,
- ICSK_ACK_PUSHED = 4,
- ICSK_ACK_PUSHED2 = 8,
- ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
-};
-
-enum tcp_ca_event {
- CA_EVENT_TX_START = 0,
- CA_EVENT_CWND_RESTART = 1,
- CA_EVENT_COMPLETE_CWR = 2,
- CA_EVENT_LOSS = 3,
- CA_EVENT_ECN_NO_CE = 4,
- CA_EVENT_ECN_IS_CE = 5,
-};
-
-struct ack_sample {
- __u32 pkts_acked;
- __s32 rtt_us;
- __u32 in_flight;
-} __attribute__((preserve_access_index));
-
-struct rate_sample {
- __u64 prior_mstamp; /* starting timestamp for interval */
- __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
- __s32 delivered; /* number of packets delivered over interval */
- long interval_us; /* time for tp->delivered to incr "delivered" */
- __u32 snd_interval_us; /* snd interval for delivered packets */
- __u32 rcv_interval_us; /* rcv interval for delivered packets */
- long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
- int losses; /* number of packets marked lost upon ACK */
- __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
- __u32 prior_in_flight; /* in flight before this ACK */
- bool is_app_limited; /* is sample from packet with bubble in pipe? */
- bool is_retrans; /* is sample from retransmission? */
- bool is_ack_delayed; /* is this (likely) a delayed ACK? */
-} __attribute__((preserve_access_index));
-
-#define TCP_CA_NAME_MAX 16
-#define TCP_CONG_NEEDS_ECN 0x2
-
-struct tcp_congestion_ops {
- char name[TCP_CA_NAME_MAX];
- __u32 flags;
-
- /* initialize private data (optional) */
- void (*init)(struct sock *sk);
- /* cleanup private data (optional) */
- void (*release)(struct sock *sk);
-
- /* return slow start threshold (required) */
- __u32 (*ssthresh)(struct sock *sk);
- /* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
- /* call before changing ca_state (optional) */
- void (*set_state)(struct sock *sk, __u8 new_state);
- /* call when cwnd event occurs (optional) */
- void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
- /* call when ack arrives (optional) */
- void (*in_ack_event)(struct sock *sk, __u32 flags);
- /* new value of cwnd after loss (required) */
- __u32 (*undo_cwnd)(struct sock *sk);
- /* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
- __u32 (*min_tso_segs)(struct sock *sk);
- /* returns the multiplier used in tcp_sndbuf_expand (optional) */
- __u32 (*sndbuf_expand)(struct sock *sk);
- /* call when packets are delivered to update cwnd and pacing rate,
- * after all the ca_state processing. (optional)
- */
- void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
- void *owner;
-};
-
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min_not_zero(x, y) ({ \
- typeof(x) __x = (x); \
- typeof(y) __y = (y); \
- __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
- __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
- return acked;
-}
-
-static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
-{
- return tp->snd_cwnd < tp->snd_ssthresh;
-}
-
-static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- /* If in slow start, ensure cwnd grows to twice what was ACKed. */
- if (tcp_in_slow_start(tp))
- return tp->snd_cwnd < 2 * tp->max_packets_out;
-
- return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
-}
-
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
-{
- /* If credits accumulated at a higher w, apply them gently now. */
- if (tp->snd_cwnd_cnt >= w) {
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
- }
-
- tp->snd_cwnd_cnt += acked;
- if (tp->snd_cwnd_cnt >= w) {
- __u32 delta = tp->snd_cwnd_cnt / w;
-
- tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
- }
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
-}
-
-#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
deleted file mode 100644
index 15cb36c4483a..000000000000
--- a/tools/testing/selftests/bpf/bpf_testmod/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_testmod.ko
-
-obj-m += bpf_testmod.o
-CFLAGS_bpf_testmod.o = -I$(src)
-
-all:
- +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
- +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
deleted file mode 100644
index 141d8da687d2..000000000000
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2020 Facebook */
-#include <linux/error-injection.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/percpu-defs.h>
-#include <linux/sysfs.h>
-#include <linux/tracepoint.h>
-#include "bpf_testmod.h"
-
-#define CREATE_TRACE_POINTS
-#include "bpf_testmod-events.h"
-
-DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
-
-noinline ssize_t
-bpf_testmod_test_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t len)
-{
- struct bpf_testmod_test_read_ctx ctx = {
- .buf = buf,
- .off = off,
- .len = len,
- };
-
- trace_bpf_testmod_test_read(current, &ctx);
-
- return -EIO; /* always fail */
-}
-EXPORT_SYMBOL(bpf_testmod_test_read);
-ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
-
-noinline ssize_t
-bpf_testmod_test_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t len)
-{
- struct bpf_testmod_test_write_ctx ctx = {
- .buf = buf,
- .off = off,
- .len = len,
- };
-
- trace_bpf_testmod_test_write_bare(current, &ctx);
-
- return -EIO; /* always fail */
-}
-EXPORT_SYMBOL(bpf_testmod_test_write);
-ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
-
-static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
- .attr = { .name = "bpf_testmod", .mode = 0666, },
- .read = bpf_testmod_test_read,
- .write = bpf_testmod_test_write,
-};
-
-static int bpf_testmod_init(void)
-{
- return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-}
-
-static void bpf_testmod_exit(void)
-{
- return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-}
-
-module_init(bpf_testmod_init);
-module_exit(bpf_testmod_exit);
-
-MODULE_AUTHOR("Andrii Nakryiko");
-MODULE_DESCRIPTION("BPF selftests module");
-MODULE_LICENSE("Dual BSD/GPL");
-
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
deleted file mode 100644
index b3892dc40111..000000000000
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2020 Facebook */
-#ifndef _BPF_TESTMOD_H
-#define _BPF_TESTMOD_H
-
-#include <linux/types.h>
-
-struct bpf_testmod_test_read_ctx {
- char *buf;
- loff_t off;
- size_t len;
-};
-
-struct bpf_testmod_test_write_ctx {
- char *buf;
- loff_t off;
- size_t len;
-};
-
-#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a3352a64c067..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <syscall.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
static inline unsigned int bpf_num_possible_cpus(void)
@@ -20,6 +21,25 @@ static inline unsigned int bpf_num_possible_cpus(void)
return possible_cpus;
}
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
#define BPF_DECLARE_PERCPU(type, name) \
@@ -40,4 +60,18 @@ static inline unsigned int bpf_num_possible_cpus(void)
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
#endif
+/* Availability of gettid across glibc versions is hit-and-miss, therefore
+ * fallback to syscall in this macro and use it everywhere.
+ */
+#ifndef sys_gettid
+#define sys_gettid() syscall(SYS_gettid)
+#endif
+
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index 48f90490f922..1c1c2c26690a 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -23,11 +23,15 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
+ [BTF_KIND_DECL_TAG] = "DECL_TAG",
+ [BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
};
static const char *btf_kind_str(__u16 kind)
{
- if (kind > BTF_KIND_DATASEC)
+ if (kind > BTF_KIND_ENUM64)
return "UNKNOWN";
return btf_kind_str_mapping[kind];
}
@@ -107,6 +111,7 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
fprintf(out, " type_id=%u", t->type);
break;
case BTF_KIND_ARRAY: {
@@ -135,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
case BTF_KIND_ENUM: {
const struct btf_enum *v = btf_enum(t);
+ const char *fmt_str;
- fprintf(out, " size=%u vlen=%u", t->size, vlen);
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u";
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
for (i = 0; i < vlen; i++, v++) {
- fprintf(out, "\n\t'%s' val=%u",
+ fprintf(out, fmt_str,
btf_str(btf, v->name_off), v->val);
}
break;
}
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ const char *fmt_str;
+
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu";
+
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, fmt_str,
+ btf_str(btf, v->name_off),
+ ((__u64)v->val_hi32 << 32) | v->val_lo32);
+ }
+ break;
+ }
case BTF_KIND_FWD:
fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
break;
@@ -173,6 +196,13 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
break;
}
+ case BTF_KIND_FLOAT:
+ fprintf(out, " size=%u", t->size);
+ break;
+ case BTF_KIND_DECL_TAG:
+ fprintf(out, " type_id=%u component_idx=%d",
+ t->type, btf_decl_tag(t)->component_idx);
+ break;
default:
break;
}
@@ -206,7 +236,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[])
int i;
bool ok = true;
- ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types");
+ ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types");
for (i = 1; i <= nr_types; i++) {
if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump"))
@@ -229,7 +259,6 @@ const char *btf_type_c_dump(const struct btf *btf)
static char buf[16 * 1024];
FILE *buf_file;
struct btf_dump *d = NULL;
- struct btf_dump_opts opts = {};
int err, i;
buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
@@ -238,22 +267,26 @@ const char *btf_type_c_dump(const struct btf *btf)
return NULL;
}
- opts.ctx = buf_file;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL);
if (libbpf_get_error(d)) {
fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
- return NULL;
+ goto err_out;
}
- for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ for (i = 1; i < btf__type_cnt(btf); i++) {
err = btf_dump__dump_type(d, i);
if (err) {
fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
- return NULL;
+ goto err_out;
}
}
+ btf_dump__free(d);
fflush(buf_file);
fclose(buf_file);
return buf;
+err_out:
+ btf_dump__free(d);
+ fclose(buf_file);
+ return NULL;
}
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
new file mode 100644
index 000000000000..98f840c3a38f
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cap_helpers.h"
+
+/* Avoid including <sys/capability.h> from the libcap-devel package,
+ * so directly declare them here and use them from glibc.
+ */
+int capget(cap_user_header_t header, cap_user_data_t data);
+int capset(cap_user_header_t header, const cap_user_data_t data);
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ __u32 cap0 = caps;
+ __u32 cap1 = caps >> 32;
+ int err;
+
+ err = capget(&hdr, data);
+ if (err)
+ return -errno;
+
+ if (old_caps)
+ *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+ if ((data[0].effective & cap0) == cap0 &&
+ (data[1].effective & cap1) == cap1)
+ return 0;
+
+ data[0].effective |= cap0;
+ data[1].effective |= cap1;
+ err = capset(&hdr, data);
+ if (err)
+ return -errno;
+
+ return 0;
+}
+
+int cap_disable_effective(__u64 caps, __u64 *old_caps)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ __u32 cap0 = caps;
+ __u32 cap1 = caps >> 32;
+ int err;
+
+ err = capget(&hdr, data);
+ if (err)
+ return -errno;
+
+ if (old_caps)
+ *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+ if (!(data[0].effective & cap0) && !(data[1].effective & cap1))
+ return 0;
+
+ data[0].effective &= ~cap0;
+ data[1].effective &= ~cap1;
+ err = capset(&hdr, data);
+ if (err)
+ return -errno;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
new file mode 100644
index 000000000000..8dcb28557f76
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CAP_HELPERS_H
+#define __CAP_HELPERS_H
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <errno.h>
+
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
+#ifndef CAP_BPF
+#define CAP_BPF 39
+#endif
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps);
+int cap_disable_effective(__u64 caps, __u64 *old_caps);
+
+#endif
diff --git a/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
new file mode 100644
index 000000000000..a525d3544fd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+BPF_RETVAL_HOOK(ingress, "cgroup_skb/ingress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(egress, "cgroup_skb/egress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(sock_create, "cgroup/sock_create", bpf_sock, 0)
+BPF_RETVAL_HOOK(sock_ops, "sockops", bpf_sock_ops, -EINVAL)
+BPF_RETVAL_HOOK(dev, "cgroup/dev", bpf_cgroup_dev_ctx, 0)
+BPF_RETVAL_HOOK(bind4, "cgroup/bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(bind6, "cgroup/bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect4, "cgroup/connect4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect6, "cgroup/connect6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind4, "cgroup/post_bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind6, "cgroup/post_bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg4, "cgroup/sendmsg4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg6, "cgroup/sendmsg6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sysctl, "cgroup/sysctl", bpf_sysctl, 0)
+BPF_RETVAL_HOOK(recvmsg4, "cgroup/recvmsg4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(recvmsg6, "cgroup/recvmsg6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockopt, "cgroup/getsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(setsockopt, "cgroup/setsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(getpeername4, "cgroup/getpeername4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getpeername6, "cgroup/getpeername6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname4, "cgroup/getsockname4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname6, "cgroup/getsockname6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(sock_release, "cgroup/sock_release", bpf_sock, 0)
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 033051717ba5..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -4,6 +4,7 @@
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/xattr.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
@@ -12,62 +13,81 @@
#include <unistd.h>
#include <ftw.h>
-
#include "cgroup_helpers.h"
+#include "bpf_util.h"
/*
* To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
+ * we create a new mount namespace, and cgroup namespace. The cgroupv2
+ * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
+ * have cgroupv2 enabled at this point in time. It's easier to create our
+ * own mount namespace and manage it ourselves. We assume /mnt exists.
*
- * We assume /mnt exists.
+ * Related cgroupv1 helpers are named *classid*(), since we only use the
+ * net_cls controller for tagging net_cls.classid. We assume the default
+ * mount under /sys/fs/cgroup/net_cls, which should be the case for the
+ * vast majority of users.
*/
#define WALK_FD_LIMIT 16
+
#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
+#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+
+#define format_cgroup_path_pid(buf, path, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid, path)
+
#define format_cgroup_path(buf, path) \
- snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
- CGROUP_WORK_DIR, path)
+ format_cgroup_path_pid(buf, path, getpid())
-/**
- * enable_all_controllers() - Enable all available cgroup v2 controllers
- *
- * Enable all available cgroup v2 controllers in order to increase
- * the code coverage.
- *
- * If successful, 0 is returned.
- */
-static int enable_all_controllers(char *cgroup_path)
+#define format_parent_cgroup_path(buf, path) \
+ format_cgroup_path_pid(buf, path, getppid())
+
+#define format_classid_path_pid(buf, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf) \
+ format_classid_path_pid(buf, getpid())
+
+static __thread bool cgroup_workdir_mounted;
+
+static void __cleanup_cgroup_environment(void);
+
+static int __enable_controllers(const char *cgroup_path, const char *controllers)
{
char path[PATH_MAX + 1];
- char buf[PATH_MAX];
+ char enable[PATH_MAX + 1];
char *c, *c2;
int fd, cfd;
ssize_t len;
- snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
- fd = open(path, O_RDONLY);
- if (fd < 0) {
- log_err("Opening cgroup.controllers: %s", path);
- return 1;
- }
-
- len = read(fd, buf, sizeof(buf) - 1);
- if (len < 0) {
+ /* If not controllers are passed, enable all available controllers */
+ if (!controllers) {
+ snprintf(path, sizeof(path), "%s/cgroup.controllers",
+ cgroup_path);
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup.controllers: %s", path);
+ return 1;
+ }
+ len = read(fd, enable, sizeof(enable) - 1);
+ if (len < 0) {
+ close(fd);
+ log_err("Reading cgroup.controllers: %s", path);
+ return 1;
+ } else if (len == 0) { /* No controllers to enable */
+ close(fd);
+ return 0;
+ }
+ enable[len] = 0;
close(fd);
- log_err("Reading cgroup.controllers: %s", path);
- return 1;
+ } else {
+ bpf_strlcpy(enable, controllers, sizeof(enable));
}
- buf[len] = 0;
- close(fd);
-
- /* No controllers available? We're probably on cgroup v1. */
- if (len == 0)
- return 0;
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
cfd = open(path, O_RDWR);
@@ -76,7 +96,7 @@ static int enable_all_controllers(char *cgroup_path)
return 1;
}
- for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+ for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
if (dprintf(cfd, "+%s\n", c) <= 0) {
log_err("Enabling controller %s: %s", c, path);
close(cfd);
@@ -88,6 +108,87 @@ static int enable_all_controllers(char *cgroup_path)
}
/**
+ * enable_controllers() - Enable cgroup v2 controllers
+ * @relative_path: The cgroup path, relative to the workdir
+ * @controllers: List of controllers to enable in cgroup.controllers format
+ *
+ *
+ * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
+ * available controllers.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_controllers(const char *relative_path, const char *controllers)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return __enable_controllers(cgroup_path, controllers);
+}
+
+static int __write_cgroup_file(const char *cgroup_path, const char *file,
+ const char *buf)
+{
+ char file_path[PATH_MAX + 1];
+ int fd;
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
+ fd = open(file_path, O_RDWR);
+ if (fd < 0) {
+ log_err("Opening %s", file_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%s", buf) <= 0) {
+ log_err("Writing to %s", file_path);
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
+/**
+ * write_cgroup_file() - Write to a cgroup file
+ * @relative_path: The cgroup path, relative to the workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file(const char *relative_path, const char *file,
+ const char *buf)
+{
+ char cgroup_path[PATH_MAX - 24];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return __write_cgroup_file(cgroup_path, file, buf);
+}
+
+/**
+ * write_cgroup_file_parent() - Write to a cgroup file in the parent process
+ * workdir
+ * @relative_path: The cgroup path, relative to the parent process workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory under the parent process
+ * workdir.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+ const char *buf)
+{
+ char cgroup_path[PATH_MAX - 24];
+
+ format_parent_cgroup_path(cgroup_path, relative_path);
+ return __write_cgroup_file(cgroup_path, file, buf);
+}
+
+/**
* setup_cgroup_environment() - Setup the cgroup environment
*
* After calling this function, cleanup_cgroup_environment should be called
@@ -102,6 +203,11 @@ int setup_cgroup_environment(void)
format_cgroup_path(cgroup_workdir, "");
+ if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir mount");
+ return 1;
+ }
+
if (unshare(CLONE_NEWNS)) {
log_err("unshare");
return 1;
@@ -116,16 +222,19 @@ int setup_cgroup_environment(void)
log_err("mount cgroup2");
return 1;
}
+ cgroup_workdir_mounted = true;
/* Cleanup existing failed runs, now that the environment is setup */
- cleanup_cgroup_environment();
+ __cleanup_cgroup_environment();
if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
log_err("mkdir cgroup work dir");
return 1;
}
- if (enable_all_controllers(cgroup_workdir))
+ /* Enable all available controllers to increase test coverage */
+ if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
+ __enable_controllers(cgroup_workdir, NULL))
return 1;
return 0;
@@ -139,8 +248,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
return 0;
}
-
-static int join_cgroup_from_top(char *cgroup_path)
+static int join_cgroup_from_top(const char *cgroup_path)
{
char cgroup_procs_path[PATH_MAX + 1];
pid_t pid = getpid();
@@ -166,7 +274,7 @@ static int join_cgroup_from_top(char *cgroup_path)
/**
* join_cgroup() - Join a cgroup
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
*
* This function expects a cgroup to already be created, relative to the cgroup
* work dir, and it joins it. For example, passing "/my-cgroup" as the path
@@ -175,19 +283,82 @@ static int join_cgroup_from_top(char *cgroup_path)
*
* On success, it returns 0, otherwise on failure it returns 1.
*/
-int join_cgroup(const char *path)
+int join_cgroup(const char *relative_path)
{
char cgroup_path[PATH_MAX + 1];
- format_cgroup_path(cgroup_path, path);
+ format_cgroup_path(cgroup_path, relative_path);
return join_cgroup_from_top(cgroup_path);
}
/**
+ * join_root_cgroup() - Join the root cgroup
+ *
+ * This function joins the root cgroup.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_root_cgroup(void)
+{
+ return join_cgroup_from_top(CGROUP_MOUNT_PATH);
+}
+
+/**
+ * join_parent_cgroup() - Join a cgroup in the parent process workdir
+ * @relative_path: The cgroup path, relative to parent process workdir, to join
+ *
+ * See join_cgroup().
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_parent_cgroup(const char *relative_path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_parent_cgroup_path(cgroup_path, relative_path);
+ return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * set_cgroup_xattr() - Set xattr on a cgroup dir
+ * @relative_path: The cgroup path, relative to the workdir, to set xattr
+ * @name: xattr name
+ * @value: xattr value
+ *
+ * This function set xattr on cgroup dir.
+ *
+ * On success, it returns 0, otherwise on failure it returns -1.
+ */
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);
+}
+
+/**
+ * __cleanup_cgroup_environment() - Delete temporary cgroups
+ *
+ * This is a helper for cleanup_cgroup_environment() that is responsible for
+ * deletion of all temporary cgroups that have been created during the test.
+ */
+static void __cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
* cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
*
* This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
+ * have been created during the test and unmount the cgroup testing work
* directory.
*
* At call time, it moves the calling process to the root cgroup, and then
@@ -198,16 +369,72 @@ int join_cgroup(const char *path)
*/
void cleanup_cgroup_environment(void)
{
- char cgroup_workdir[PATH_MAX + 1];
+ __cleanup_cgroup_environment();
+ if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
+ log_err("umount cgroup2");
+ cgroup_workdir_mounted = false;
+}
- format_cgroup_path(cgroup_workdir, "");
- join_cgroup_from_top(CGROUP_MOUNT_PATH);
- nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+/**
+ * get_root_cgroup() - Get the FD of the root cgroup
+ *
+ * On success, it returns the file descriptor. On failure, it returns -1.
+ * If there is a failure, it prints the error to stderr.
+ */
+int get_root_cgroup(void)
+{
+ int fd;
+
+ fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening root cgroup");
+ return -1;
+ }
+ return fd;
+}
+
+/*
+ * remove_cgroup() - Remove a cgroup
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup(const char *relative_path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path_pid(cgroup_path, relative_path, pid);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
*
* This function creates a cgroup under the top level workdir and returns the
* file descriptor. It is idempotent.
@@ -215,14 +442,14 @@ void cleanup_cgroup_environment(void)
* On success, it returns the file descriptor. On failure it returns -1.
* If there is a failure, it prints the error to stderr.
*/
-int create_and_get_cgroup(const char *path)
+int create_and_get_cgroup(const char *relative_path)
{
char cgroup_path[PATH_MAX + 1];
int fd;
- format_cgroup_path(cgroup_path, path);
+ format_cgroup_path(cgroup_path, relative_path);
if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
- log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+ log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
return -1;
}
@@ -236,26 +463,23 @@ int create_and_get_cgroup(const char *path)
}
/**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
*
* On success, it returns the cgroup id. On failure it returns 0,
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id(const char *path)
+static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
unsigned long long cgid;
unsigned char raw_bytes[8];
} id;
- char cgroup_workdir[PATH_MAX + 1];
struct file_handle *fhp, *fhp2;
unsigned long long ret = 0;
- format_cgroup_path(cgroup_workdir, path);
-
dirfd = AT_FDCWD;
flags = 0;
fhsize = sizeof(*fhp);
@@ -291,6 +515,14 @@ free_mem:
return ret;
}
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, relative_path);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
int cgroup_setup_and_join(const char *path) {
int cg_fd;
@@ -313,3 +545,207 @@ int cgroup_setup_and_join(const char *path) {
}
return cg_fd;
}
+
+/**
+ * setup_classid_environment() - Setup the cgroupv1 net_cls environment
+ *
+ * This function should only be called in a custom mount namespace, e.g.
+ * created by running setup_cgroup_environment.
+ *
+ * After calling this function, cleanup_classid_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+
+ if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
+ errno != EBUSY) {
+ log_err("mount cgroup base");
+ return 1;
+ }
+
+ if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup net_cls");
+ return 1;
+ }
+
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+ if (errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ if (rmdir(NETCLS_MOUNT_PATH)) {
+ log_err("rmdir cgroup net_cls");
+ return 1;
+ }
+ if (umount(CGROUP_MOUNT_DFLT)) {
+ log_err("umount cgroup base");
+ return 1;
+ }
+ }
+
+ cleanup_classid_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * set_classid() - Set a cgroupv1 net_cls classid
+ *
+ * Writes the classid into the cgroup work dir's net_cls.classid
+ * file in order to later on trigger socket tagging.
+ *
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1. If there
+ * is a failure, it prints the error to stderr.
+ */
+int set_classid(void)
+{
+ char cgroup_workdir[PATH_MAX - 42];
+ char cgroup_classid_path[PATH_MAX + 1];
+ int fd, rc = 0;
+
+ format_classid_path(cgroup_workdir);
+ snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
+ "%s/net_cls.classid", cgroup_workdir);
+
+ fd = open(cgroup_classid_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup classid: %s", cgroup_classid_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%u\n", getpid()) < 0) {
+ log_err("Setting cgroup classid");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_classid() - Join a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * join it here. This causes the process sockets to be tagged with the given
+ * net_cls classid.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return join_cgroup_from_top(cgroup_workdir);
+}
+
+/**
+ * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ join_cgroup_from_top(NETCLS_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+ char *c, *c2, *c3, *c4;
+ bool found = false;
+ char line[1024];
+ FILE *file;
+ int i, id;
+
+ if (!subsys_name)
+ return -1;
+
+ file = fopen("/proc/self/cgroup", "r");
+ if (!file) {
+ log_err("fopen /proc/self/cgroup");
+ return -1;
+ }
+
+ while (fgets(line, 1024, file)) {
+ i = 0;
+ for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+ if (i == 0) {
+ id = strtol(c, NULL, 10);
+ } else if (i == 1) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+
+ /* Multiple subsystems may share one single mount point */
+ for (c3 = strtok_r(c, ",", &c4); c3;
+ c3 = strtok_r(NULL, ",", &c4)) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+ }
+ }
+ i++;
+ }
+ if (found)
+ break;
+ }
+ fclose(file);
+ return found ? id : -1;
+}
+
+/**
+ * open_classid() - Open a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * open it here.
+ *
+ * On success, it returns the file descriptor. On failure it returns -1.
+ */
+int open_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return open(cgroup_workdir, O_RDONLY);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5fe3d88e4f0d..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CGROUP_HELPERS_H
#define __CGROUP_HELPERS_H
+
#include <errno.h>
#include <string.h>
@@ -8,12 +9,38 @@
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+/* cgroupv2 related */
+int enable_controllers(const char *relative_path, const char *controllers);
+int write_cgroup_file(const char *relative_path, const char *file,
+ const char *buf);
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+ const char *buf);
+int cgroup_setup_and_join(const char *relative_path);
+int get_root_cgroup(void);
+int create_and_get_cgroup(const char *relative_path);
+void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
+unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
+
+int join_cgroup(const char *relative_path);
+int join_root_cgroup(void);
+int join_parent_cgroup(const char *relative_path);
+
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value);
-int cgroup_setup_and_join(const char *path);
-int create_and_get_cgroup(const char *path);
-int join_cgroup(const char *path);
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(const char *path);
-#endif
+/* cgroupv1 related */
+int set_classid(void);
+int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
+int open_classid(void);
+
+int setup_classid_environment(void);
+void cleanup_classid_environment(void);
+
+#endif /* __CGROUP_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/cgroup_tcp_skb.h b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
new file mode 100644
index 000000000000..7f6b24f102fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+/* Define states of a socket to tracking messages sending to and from the
+ * socket.
+ *
+ * These states are based on rfc9293 with some modifications to support
+ * tracking of messages sent out from a socket. For example, when a SYN is
+ * received, a new socket is transiting to the SYN_RECV state defined in
+ * rfc9293. But, we put it in SYN_RECV_SENDING_SYN_ACK state and when
+ * SYN-ACK is sent out, it moves to SYN_RECV state. With this modification,
+ * we can track the message sent out from a socket.
+ */
+
+#ifndef __CGROUP_TCP_SKB_H__
+#define __CGROUP_TCP_SKB_H__
+
+enum {
+ INIT,
+ CLOSED,
+ SYN_SENT,
+ SYN_RECV_SENDING_SYN_ACK,
+ SYN_RECV,
+ ESTABLISHED,
+ FIN_WAIT1,
+ FIN_WAIT2,
+ CLOSE_WAIT_SENDING_ACK,
+ CLOSE_WAIT,
+ CLOSING,
+ LAST_ACK,
+ TIME_WAIT_SENDING_ACK,
+ TIME_WAIT,
+};
+
+#endif /* __CGROUP_TCP_SKB_H__ */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37e1f303fc11..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,46 +1,133 @@
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_BPF=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_NET_CLS_BPF=m
CONFIG_BPF_EVENTS=y
-CONFIG_TEST_BPF=m
-CONFIG_CGROUP_BPF=y
-CONFIG_NETDEVSIM=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_INGRESS=y
-CONFIG_NET_IPIP=y
-CONFIG_IPV6=y
-CONFIG_NET_IPGRE_DEMUX=y
-CONFIG_NET_IPGRE=y
-CONFIG_IPV6_GRE=y
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_HMAC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_VXLAN=y
-CONFIG_GENEVE=y
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_LWTUNNEL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_BPF_LIRC_MODE2=y
+CONFIG_BPF_LSM=y
CONFIG_BPF_STREAM_PARSER=y
-CONFIG_XDP_SOCKETS=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
+CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_USER_API=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_CRYPTO_SKCIPHER=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_AES=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DUMMY=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
-CONFIG_IPV6_TUNNEL=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FS_VERITY=y
+CONFIG_GENEVE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IMA=y
+CONFIG_IMA_READ_POLICY=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_INET_ESP=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IPV6=y
+CONFIG_IPV6_FOU=y
+CONFIG_IPV6_FOU_TUNNEL=y
CONFIG_IPV6_GRE=y
CONFIG_IPV6_SEG6_BPF=y
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_KEYS=y
+CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
+CONFIG_LWTUNNEL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_IPV6_SIT=m
-CONFIG_BPF_JIT=y
-CONFIG_BPF_LSM=y
+CONFIG_MPLS_IPTUNNEL=y
+CONFIG_MPLS_ROUTING=y
+CONFIG_MPTCP=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPIP=y
+CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCHED=y
+CONFIG_NETDEVSIM=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETKIT=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_TABLES=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NF_FLOW_TABLE=y
+CONFIG_NF_FLOW_TABLE_INET=y
+CONFIG_NETFILTER_NETLINK=y
+CONFIG_NFT_FLOW_OFFLOAD=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_NF_NAT=y
+CONFIG_PACKET=y
+CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
CONFIG_SECURITY=y
-CONFIG_LIRC=y
-CONFIG_IMA=y
CONFIG_SECURITYFS=y
-CONFIG_IMA_WRITE_POLICY=y
-CONFIG_IMA_READ_POLICY=y
-CONFIG_BLK_DEV_LOOP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
+CONFIG_USERFAULTFD=y
+CONFIG_VSOCKETS=y
+CONFIG_VXLAN=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XFRM_INTERFACE=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
new file mode 100644
index 000000000000..7efad36ceb26
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -0,0 +1,154 @@
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_ARM_SMMU_V3=y
+CONFIG_ATA=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BRIDGE=m
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_COMPAT=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_REDUCED=n
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DRM=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_FB=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KPROBES=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAILBOX=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NET_KEY=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NLMON=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET_DIAG=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_PL320_MBOX=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TPM=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
new file mode 100644
index 000000000000..b53afb5e0b71
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -0,0 +1,92 @@
+CONFIG_ALTIVEC=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1280
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NONPORTABLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_PSERIES=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_THREAD_SHIFT=14
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VSX=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
new file mode 100644
index 000000000000..7bee24a79a71
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -0,0 +1,83 @@
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FRAME_POINTER=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_INET=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NONPORTABLE=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_RISCV_ISA_C=y
+CONFIG_RISCV_PMU=y
+CONFIG_RISCV_PMU_SBI=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
new file mode 100644
index 000000000000..db61878148e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -0,0 +1,125 @@
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FANOTIFY=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_MARCH_Z10_FEATURES=y
+CONFIG_HAVE_MARCH_Z196_FEATURES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MARCH_Z196=y
+CONFIG_MARCH_Z196_TUNE=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_KEY=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..da543b24c144
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,15 @@
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_9P_FS=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_FUSE_FS=y
+CONFIG_FUSE_PASSTHROUGH=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
new file mode 100644
index 000000000000..42ad817b00ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -0,0 +1,227 @@
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_AGP_SIS=y
+CONFIG_AGP_VIA=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_AUDIT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BONDING=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMA=y
+CONFIG_CMA_AREAS=7
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPUSETS=y
+CONFIG_CRYPTO_BLAKE2B=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_DCB=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEFAULT_FQ_CODEL=y
+CONFIG_DEFAULT_RENO=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VESA=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_MINI_4x6=y
+CONFIG_FONTS=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_PHY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_KYE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPET=y
+CONFIG_HUGETLBFS=y
+CONFIG_HWPOISON_INJECT=y
+CONFIG_HZ_1000=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INTEL_POWERCLAMP=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IRQ_POLL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_KEXEC=y
+CONFIG_KPROBES=y
+CONFIG_KSM=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_LOG_BUF_SHIFT=21
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=0
+CONFIG_LOGO=y
+CONFIG_LSM="selinux,bpf,integrity"
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MCORE2=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETLABEL=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=128
+CONFIG_NUMA=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_NVMEM=y
+CONFIG_OSF_PARTITION=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_PREEMPT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SMP=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYNC_FILE=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_USER_NS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_VETH=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_MSR=y
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_XDP_SOCKETS_DIAG=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZEROPLUS_FF=y
diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c
new file mode 120000
index 000000000000..b1571927bd54
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.c
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h
new file mode 120000
index 000000000000..8054fd497340
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.h
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/disasm_helpers.c b/tools/testing/selftests/bpf/disasm_helpers.c
new file mode 100644
index 000000000000..f529f1c8c171
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm_helpers.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include <bpf/bpf.h>
+#include "disasm.h"
+
+struct print_insn_context {
+ char scratch[16];
+ char *buf;
+ size_t sz;
+};
+
+static void print_insn_cb(void *private_data, const char *fmt, ...)
+{
+ struct print_insn_context *ctx = private_data;
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(ctx->buf, ctx->sz, fmt, args);
+ va_end(args);
+}
+
+static const char *print_call_cb(void *private_data, const struct bpf_insn *insn)
+{
+ struct print_insn_context *ctx = private_data;
+
+ /* For pseudo calls verifier.c:jit_subprogs() hides original
+ * imm to insn->off and changes insn->imm to be an index of
+ * the subprog instead.
+ */
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
+ snprintf(ctx->scratch, sizeof(ctx->scratch), "%+d", insn->off);
+ return ctx->scratch;
+ }
+
+ return NULL;
+}
+
+struct bpf_insn *disasm_insn(struct bpf_insn *insn, char *buf, size_t buf_sz)
+{
+ struct print_insn_context ctx = {
+ .buf = buf,
+ .sz = buf_sz,
+ };
+ struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn_cb,
+ .cb_call = print_call_cb,
+ .private_data = &ctx,
+ };
+ char *tmp, *pfx_end, *sfx_start;
+ bool double_insn;
+ int len;
+
+ print_bpf_insn(&cbs, insn, true);
+ /* We share code with kernel BPF disassembler, it adds '(FF) ' prefix
+ * for each instruction (FF stands for instruction `code` byte).
+ * Remove the prefix inplace, and also simplify call instructions.
+ * E.g.: "(85) call foo#10" -> "call foo".
+ * Also remove newline in the end (the 'max(strlen(buf) - 1, 0)' thing).
+ */
+ pfx_end = buf + 5;
+ sfx_start = buf + max((int)strlen(buf) - 1, 0);
+ if (strncmp(pfx_end, "call ", 5) == 0 && (tmp = strrchr(buf, '#')))
+ sfx_start = tmp;
+ len = sfx_start - pfx_end;
+ memmove(buf, pfx_end, len);
+ buf[len] = 0;
+ double_insn = insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+ return insn + (double_insn ? 2 : 1);
+}
diff --git a/tools/testing/selftests/bpf/disasm_helpers.h b/tools/testing/selftests/bpf/disasm_helpers.h
new file mode 100644
index 000000000000..7b26cab70099
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm_helpers.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __DISASM_HELPERS_H
+#define __DISASM_HELPERS_H
+
+#include <stdlib.h>
+
+struct bpf_insn;
+
+struct bpf_insn *disasm_insn(struct bpf_insn *insn, char *buf, size_t buf_sz);
+
+#endif /* __DISASM_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 3fd83b9dc1bf..c8be6406777f 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,13 +11,12 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "flow_dissector_load.h"
const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
const char *cfg_map_name = "jmp_table";
bool cfg_attach = true;
-char *cfg_section_name;
+char *cfg_prog_name;
char *cfg_path_name;
static void load_and_attach_program(void)
@@ -25,7 +24,10 @@ static void load_and_attach_program(void)
int prog_fd, ret;
struct bpf_object *obj;
- ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
cfg_map_name, NULL, &prog_fd, NULL);
if (ret)
error(1, 0, "bpf_flow_load %s", cfg_path_name);
@@ -75,15 +77,15 @@ static void parse_opts(int argc, char **argv)
break;
case 'p':
if (cfg_path_name)
- error(1, 0, "only one prog name can be given");
+ error(1, 0, "only one path can be given");
cfg_path_name = optarg;
break;
case 's':
- if (cfg_section_name)
- error(1, 0, "only one section can be given");
+ if (cfg_prog_name)
+ error(1, 0, "only one prog can be given");
- cfg_section_name = optarg;
+ cfg_prog_name = optarg;
break;
}
}
@@ -94,7 +96,7 @@ static void parse_opts(int argc, char **argv)
if (cfg_attach && !cfg_path_name)
error(1, 0, "must provide a path to the BPF program");
- if (cfg_attach && !cfg_section_name)
+ if (cfg_attach && !cfg_prog_name)
error(1, 0, "must provide a section name");
}
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 7290401ec172..f40b585f4e7e 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -4,10 +4,11 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
static inline int bpf_flow_load(struct bpf_object **obj,
const char *path,
- const char *section_name,
+ const char *prog_name,
const char *map_name,
const char *keys_map_name,
int *prog_fd,
@@ -18,18 +19,12 @@ static inline int bpf_flow_load(struct bpf_object **obj,
int prog_array_fd;
int ret, fd, i;
- ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
+ ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
prog_fd);
if (ret)
return ret;
- main_prog = NULL;
- bpf_object__for_each_program(prog, *obj) {
- if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
- main_prog = prog;
- break;
- }
- }
+ main_prog = bpf_object__find_program_by_name(*obj, prog_name);
if (!main_prog)
return -1;
diff --git a/tools/testing/selftests/bpf/generate_udp_fragments.py b/tools/testing/selftests/bpf/generate_udp_fragments.py
new file mode 100755
index 000000000000..2b8a1187991c
--- /dev/null
+++ b/tools/testing/selftests/bpf/generate_udp_fragments.py
@@ -0,0 +1,90 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This script helps generate fragmented UDP packets.
+
+While it is technically possible to dynamically generate
+fragmented packets in C, it is much harder to read and write
+said code. `scapy` is relatively industry standard and really
+easy to read / write.
+
+So we choose to write this script that generates a valid C
+header. Rerun script and commit generated file after any
+modifications.
+"""
+
+import argparse
+import os
+
+from scapy.all import *
+
+
+# These constants must stay in sync with `ip_check_defrag.c`
+VETH1_ADDR = "172.16.1.200"
+VETH0_ADDR6 = "fc00::100"
+VETH1_ADDR6 = "fc00::200"
+CLIENT_PORT = 48878
+SERVER_PORT = 48879
+MAGIC_MESSAGE = "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME"
+
+
+def print_header(f):
+ f.write("// SPDX-License-Identifier: GPL-2.0\n")
+ f.write("/* DO NOT EDIT -- this file is generated */\n")
+ f.write("\n")
+ f.write("#ifndef _IP_CHECK_DEFRAG_FRAGS_H\n")
+ f.write("#define _IP_CHECK_DEFRAG_FRAGS_H\n")
+ f.write("\n")
+ f.write("#include <stdint.h>\n")
+ f.write("\n")
+
+
+def print_frags(f, frags, v6):
+ for idx, frag in enumerate(frags):
+ # 10 bytes per line to keep width in check
+ chunks = [frag[i : i + 10] for i in range(0, len(frag), 10)]
+ chunks_fmted = [", ".join([str(hex(b)) for b in chunk]) for chunk in chunks]
+ suffix = "6" if v6 else ""
+
+ f.write(f"static uint8_t frag{suffix}_{idx}[] = {{\n")
+ for chunk in chunks_fmted:
+ f.write(f"\t{chunk},\n")
+ f.write(f"}};\n")
+
+
+def print_trailer(f):
+ f.write("\n")
+ f.write("#endif /* _IP_CHECK_DEFRAG_FRAGS_H */\n")
+
+
+def main(f):
+ # srcip of 0 is filled in by IP_HDRINCL
+ sip = "0.0.0.0"
+ sip6 = VETH0_ADDR6
+ dip = VETH1_ADDR
+ dip6 = VETH1_ADDR6
+ sport = CLIENT_PORT
+ dport = SERVER_PORT
+ payload = MAGIC_MESSAGE.encode()
+
+ # Disable UDPv4 checksums to keep code simpler
+ pkt = IP(src=sip,dst=dip) / UDP(sport=sport,dport=dport,chksum=0) / Raw(load=payload)
+ # UDPv6 requires a checksum
+ # Also pin the ipv6 fragment header ID, otherwise it's a random value
+ pkt6 = IPv6(src=sip6,dst=dip6) / IPv6ExtHdrFragment(id=0xBEEF) / UDP(sport=sport,dport=dport) / Raw(load=payload)
+
+ frags = [f.build() for f in pkt.fragment(24)]
+ frags6 = [f.build() for f in fragment6(pkt6, 72)]
+
+ print_header(f)
+ print_frags(f, frags, False)
+ print_frags(f, frags6, True)
+ print_trailer(f)
+
+
+if __name__ == "__main__":
+ dir = os.path.dirname(os.path.realpath(__file__))
+ header = f"{dir}/ip_check_defrag_frags.h"
+ with open(header, "w") as f:
+ main(f)
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
deleted file mode 100644
index b8d6aef99db4..000000000000
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <syscall.h>
-#include <unistd.h>
-#include <linux/perf_event.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s:FAIL:%s ", __func__, tag); \
- printf(format); \
- } else { \
- printf("%s:PASS:%s\n", __func__, tag); \
- } \
- __ret; \
-})
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map)
- return -1;
- return bpf_map__fd(map);
-}
-
-#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
-
-int main(int argc, char **argv)
-{
- const char *probe_name = "syscalls/sys_enter_nanosleep";
- const char *file = "get_cgroup_id_kern.o";
- int err, bytes, efd, prog_fd, pmu_fd;
- int cgroup_fd, cgidmap_fd, pidmap_fd;
- struct perf_event_attr attr = {};
- struct bpf_object *obj;
- __u64 kcgid = 0, ucgid;
- __u32 key = 0, pid;
- int exit_code = 1;
- char buf[256];
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
- return 1;
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
- goto cleanup_cgroup_env;
-
- cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
- if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
- cgidmap_fd, errno))
- goto close_prog;
-
- pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
- if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
- pidmap_fd, errno))
- goto close_prog;
-
- pid = getpid();
- bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
-
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
- efd = open(buf, O_RDONLY, 0);
- if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
- goto close_prog;
- bytes = read(efd, buf, sizeof(buf));
- close(efd);
- if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
- "bytes %d errno %d\n", bytes, errno))
- goto close_prog;
-
- attr.config = strtol(buf, NULL, 0);
- attr.type = PERF_TYPE_TRACEPOINT;
- attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = 1;
- attr.wakeup_events = 1;
-
- /* attach to this pid so the all bpf invocations will be in the
- * cgroup associated with this pid.
- */
- pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
- if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
- errno))
- goto close_prog;
-
- err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
- if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
- errno))
- goto close_pmu;
-
- err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
- if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
- errno))
- goto close_pmu;
-
- /* trigger some syscalls */
- sleep(1);
-
- err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
- if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
- goto close_pmu;
-
- ucgid = get_cgroup_id(TEST_CGROUP);
- if (CHECK(kcgid != ucgid, "compare_cgroup_id",
- "kern cgid %llx user cgid %llx", kcgid, ucgid))
- goto close_pmu;
-
- exit_code = 0;
- printf("%s:PASS\n", argv[0]);
-
-close_pmu:
- close(pmu_fd);
-close_prog:
- bpf_object__close(obj);
-cleanup_cgroup_env:
- cleanup_cgroup_environment();
- return exit_code;
-}
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
index 719225b16626..1c638d9dce1a 100644
--- a/tools/testing/selftests/bpf/gnu/stubs.h
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -1 +1 @@
-/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 8e62581113a3..8ecead4ccad0 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -12,7 +12,7 @@ LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
usage()
{
- echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+ echo "Usage: $0 <setup|cleanup|run|modify-bin|restore-bin|load-policy> <existing_tmp_dir>"
exit 1
}
@@ -51,6 +51,7 @@ setup()
ensure_mount_securityfs
echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+ echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${mount_dir}/policy_test
}
cleanup() {
@@ -77,6 +78,32 @@ run()
exec "${copied_bin_path}"
}
+modify_bin()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+ echo "mod" >> "${copied_bin_path}"
+}
+
+restore_bin()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+ truncate -s -4 "${copied_bin_path}"
+}
+
+load_policy()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+
+ echo ${mount_dir}/policy_test > ${IMA_POLICY_FILE} 2> /dev/null
+}
+
catch()
{
local exit_code="$1"
@@ -105,6 +132,12 @@ main()
cleanup "${tmp_dir}"
elif [[ "${action}" == "run" ]]; then
run "${tmp_dir}"
+ elif [[ "${action}" == "modify-bin" ]]; then
+ modify_bin "${tmp_dir}"
+ elif [[ "${action}" == "restore-bin" ]]; then
+ restore_bin "${tmp_dir}"
+ elif [[ "${action}" == "load-policy" ]]; then
+ load_policy "${tmp_dir}"
else
echo "Unknown action: ${action}"
exit 1
diff --git a/tools/testing/selftests/bpf/io_helpers.c b/tools/testing/selftests/bpf/io_helpers.c
new file mode 100644
index 000000000000..4ada0a74aa1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/io_helpers.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/select.h>
+#include <unistd.h>
+#include <errno.h>
+
+int read_with_timeout(int fd, char *buf, size_t count, long usec)
+{
+ const long M = 1000 * 1000;
+ struct timeval tv = { usec / M, usec % M };
+ fd_set fds;
+ int err;
+
+ FD_ZERO(&fds);
+ FD_SET(fd, &fds);
+ err = select(fd + 1, &fds, NULL, NULL, &tv);
+ if (err < 0)
+ return err;
+ if (FD_ISSET(fd, &fds))
+ return read(fd, buf, count);
+ return -EAGAIN;
+}
diff --git a/tools/testing/selftests/bpf/io_helpers.h b/tools/testing/selftests/bpf/io_helpers.h
new file mode 100644
index 000000000000..21e1134cd3ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/io_helpers.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+
+/* As a regular read(2), but allows to specify a timeout in micro-seconds.
+ * Returns -EAGAIN on timeout.
+ */
+int read_with_timeout(int fd, char *buf, size_t count, long usec);
diff --git a/tools/testing/selftests/bpf/ip_check_defrag_frags.h b/tools/testing/selftests/bpf/ip_check_defrag_frags.h
new file mode 100644
index 000000000000..70ab7e9fa22b
--- /dev/null
+++ b/tools/testing/selftests/bpf/ip_check_defrag_frags.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* DO NOT EDIT -- this file is generated */
+
+#ifndef _IP_CHECK_DEFRAG_FRAGS_H
+#define _IP_CHECK_DEFRAG_FRAGS_H
+
+#include <stdint.h>
+
+static uint8_t frag_0[] = {
+ 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x0, 0x40, 0x11,
+ 0xac, 0xe8, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0xbe, 0xee, 0xbe, 0xef, 0x0, 0x3a, 0x0, 0x0, 0x54, 0x48,
+ 0x49, 0x53, 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20,
+ 0x4f, 0x52, 0x49, 0x47,
+};
+static uint8_t frag_1[] = {
+ 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x3, 0x40, 0x11,
+ 0xac, 0xe5, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0x49, 0x4e, 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41,
+ 0x47, 0x45, 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45,
+ 0x20, 0x52, 0x45, 0x41,
+};
+static uint8_t frag_2[] = {
+ 0x45, 0x0, 0x0, 0x1e, 0x0, 0x1, 0x0, 0x6, 0x40, 0x11,
+ 0xcc, 0xf0, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0x53, 0x53, 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45,
+};
+static uint8_t frag6_0[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x1, 0x0, 0x0, 0xbe, 0xef, 0xbe, 0xee,
+ 0xbe, 0xef, 0x0, 0x3a, 0xd0, 0xf8, 0x54, 0x48, 0x49, 0x53,
+ 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20, 0x4f, 0x52,
+ 0x49, 0x47,
+};
+static uint8_t frag6_1[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x19, 0x0, 0x0, 0xbe, 0xef, 0x49, 0x4e,
+ 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41, 0x47, 0x45,
+ 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45, 0x20, 0x52,
+ 0x45, 0x41,
+};
+static uint8_t frag6_2[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x12, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x30, 0x0, 0x0, 0xbe, 0xef, 0x53, 0x53,
+ 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45,
+};
+
+#endif /* _IP_CHECK_DEFRAG_FRAGS_H */
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c
new file mode 100644
index 000000000000..febd6b12e372
--- /dev/null
+++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <test_progs.h>
+
+#ifdef HAVE_LLVM_SUPPORT
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/TargetMachine.h>
+
+/* The intent is to use get_jited_program_text() for small test
+ * programs written in BPF assembly, thus assume that 32 local labels
+ * would be sufficient.
+ */
+#define MAX_LOCAL_LABELS 32
+
+/* Local labels are encoded as 'L42', this requires 4 bytes of storage:
+ * 3 characters + zero byte
+ */
+#define LOCAL_LABEL_LEN 4
+
+static bool llvm_initialized;
+
+struct local_labels {
+ bool print_phase;
+ __u32 prog_len;
+ __u32 cnt;
+ __u32 pcs[MAX_LOCAL_LABELS];
+ char names[MAX_LOCAL_LABELS][LOCAL_LABEL_LEN];
+};
+
+static const char *lookup_symbol(void *data, uint64_t ref_value, uint64_t *ref_type,
+ uint64_t ref_pc, const char **ref_name)
+{
+ struct local_labels *labels = data;
+ uint64_t type = *ref_type;
+ int i;
+
+ *ref_type = LLVMDisassembler_ReferenceType_InOut_None;
+ *ref_name = NULL;
+ if (type != LLVMDisassembler_ReferenceType_In_Branch)
+ return NULL;
+ /* Depending on labels->print_phase either discover local labels or
+ * return a name assigned with local jump target:
+ * - if print_phase is true and ref_value is in labels->pcs,
+ * return corresponding labels->name.
+ * - if print_phase is false, save program-local jump targets
+ * in labels->pcs;
+ */
+ if (labels->print_phase) {
+ for (i = 0; i < labels->cnt; ++i)
+ if (labels->pcs[i] == ref_value)
+ return labels->names[i];
+ } else {
+ if (labels->cnt < MAX_LOCAL_LABELS && ref_value < labels->prog_len)
+ labels->pcs[labels->cnt++] = ref_value;
+ }
+ return NULL;
+}
+
+static int disasm_insn(LLVMDisasmContextRef ctx, uint8_t *image, __u32 len, __u32 pc,
+ char *buf, __u32 buf_sz)
+{
+ int i, cnt;
+
+ cnt = LLVMDisasmInstruction(ctx, image + pc, len - pc, pc,
+ buf, buf_sz);
+ if (cnt > 0)
+ return cnt;
+ PRINT_FAIL("Can't disasm instruction at offset %d:", pc);
+ for (i = 0; i < 16 && pc + i < len; ++i)
+ printf(" %02x", image[pc + i]);
+ printf("\n");
+ return -EINVAL;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+ __u32 a = *(__u32 *)_a;
+ __u32 b = *(__u32 *)_b;
+
+ if (a < b)
+ return -1;
+ if (a > b)
+ return 1;
+ return 0;
+}
+
+static int disasm_one_func(FILE *text_out, uint8_t *image, __u32 len)
+{
+ char *label, *colon, *triple = NULL;
+ LLVMDisasmContextRef ctx = NULL;
+ struct local_labels labels = {};
+ __u32 *label_pc, pc;
+ int i, cnt, err = 0;
+ char buf[64];
+
+ triple = LLVMGetDefaultTargetTriple();
+ ctx = LLVMCreateDisasm(triple, &labels, 0, NULL, lookup_symbol);
+ if (!ASSERT_OK_PTR(ctx, "LLVMCreateDisasm")) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ cnt = LLVMSetDisasmOptions(ctx, LLVMDisassembler_Option_PrintImmHex);
+ if (!ASSERT_EQ(cnt, 1, "LLVMSetDisasmOptions")) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* discover labels */
+ labels.prog_len = len;
+ pc = 0;
+ while (pc < len) {
+ cnt = disasm_insn(ctx, image, len, pc, buf, 1);
+ if (cnt < 0) {
+ err = cnt;
+ goto out;
+ }
+ pc += cnt;
+ }
+ qsort(labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32);
+ for (i = 0; i < labels.cnt; ++i)
+ /* gcc is unable to infer upper bound for labels.cnt and assumes
+ * it to be U32_MAX. U32_MAX takes 10 decimal digits.
+ * snprintf below prints into labels.names[*],
+ * which has space only for two digits and a letter.
+ * To avoid truncation warning use (i % MAX_LOCAL_LABELS),
+ * which informs gcc about printed value upper bound.
+ */
+ snprintf(labels.names[i], sizeof(labels.names[i]), "L%d", i % MAX_LOCAL_LABELS);
+
+ /* now print with labels */
+ labels.print_phase = true;
+ pc = 0;
+ while (pc < len) {
+ cnt = disasm_insn(ctx, image, len, pc, buf, sizeof(buf));
+ if (cnt < 0) {
+ err = cnt;
+ goto out;
+ }
+ label_pc = bsearch(&pc, labels.pcs, labels.cnt, sizeof(*labels.pcs), cmp_u32);
+ label = "";
+ colon = "";
+ if (label_pc) {
+ label = labels.names[label_pc - labels.pcs];
+ colon = ":";
+ }
+ fprintf(text_out, "%x:\t", pc);
+ for (i = 0; i < cnt; ++i)
+ fprintf(text_out, "%02x ", image[pc + i]);
+ for (i = cnt * 3; i < 12 * 3; ++i)
+ fputc(' ', text_out);
+ fprintf(text_out, "%s%s%s\n", label, colon, buf);
+ pc += cnt;
+ }
+
+out:
+ if (triple)
+ LLVMDisposeMessage(triple);
+ if (ctx)
+ LLVMDisasmDispose(ctx);
+ return err;
+}
+
+int get_jited_program_text(int fd, char *text, size_t text_sz)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 jited_funcs, len, pc;
+ __u32 *func_lens = NULL;
+ FILE *text_out = NULL;
+ uint8_t *image = NULL;
+ int i, err = 0;
+
+ if (!llvm_initialized) {
+ LLVMInitializeAllTargetInfos();
+ LLVMInitializeAllTargetMCs();
+ LLVMInitializeAllDisassemblers();
+ llvm_initialized = 1;
+ }
+
+ text_out = fmemopen(text, text_sz, "w");
+ if (!ASSERT_OK_PTR(text_out, "open_memstream")) {
+ err = -errno;
+ goto out;
+ }
+
+ /* first call is to find out jited program len */
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #1"))
+ goto out;
+
+ len = info.jited_prog_len;
+ image = malloc(len);
+ if (!ASSERT_OK_PTR(image, "malloc(info.jited_prog_len)")) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ jited_funcs = info.nr_jited_func_lens;
+ func_lens = malloc(jited_funcs * sizeof(__u32));
+ if (!ASSERT_OK_PTR(func_lens, "malloc(info.nr_jited_func_lens)")) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.jited_prog_insns = (__u64)image;
+ info.jited_prog_len = len;
+ info.jited_func_lens = (__u64)func_lens;
+ info.nr_jited_func_lens = jited_funcs;
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #2"))
+ goto out;
+
+ for (pc = 0, i = 0; i < jited_funcs; ++i) {
+ fprintf(text_out, "func #%d:\n", i);
+ disasm_one_func(text_out, image + pc, func_lens[i]);
+ fprintf(text_out, "\n");
+ pc += func_lens[i];
+ }
+
+out:
+ if (text_out)
+ fclose(text_out);
+ if (image)
+ free(image);
+ if (func_lens)
+ free(func_lens);
+ return err;
+}
+
+#else /* HAVE_LLVM_SUPPORT */
+
+int get_jited_program_text(int fd, char *text, size_t text_sz)
+{
+ if (env.verbosity >= VERBOSE_VERY)
+ printf("compiled w/o llvm development libraries, can't dis-assembly binary code");
+ return -EOPNOTSUPP;
+}
+
+#endif /* HAVE_LLVM_SUPPORT */
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.h b/tools/testing/selftests/bpf/jit_disasm_helpers.h
new file mode 100644
index 000000000000..e6924fd65ecf
--- /dev/null
+++ b/tools/testing/selftests/bpf/jit_disasm_helpers.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __JIT_DISASM_HELPERS_H
+#define __JIT_DISASM_HELPERS_H
+
+#include <stddef.h>
+
+int get_jited_program_text(int fd, char *text, size_t text_sz);
+
+#endif /* __JIT_DISASM_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c
new file mode 120000
index 000000000000..5effa31e2f39
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.c
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h
new file mode 120000
index 000000000000..e0a264c26752
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.h
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map
new file mode 100644
index 000000000000..38a97a419a04
--- /dev/null
+++ b/tools/testing/selftests/bpf/liburandom_read.map
@@ -0,0 +1,15 @@
+LIBURANDOM_READ_1.0.0 {
+ global:
+ urandlib_api;
+ urandlib_api_sameoffset;
+ urandlib_read_without_sema;
+ urandlib_read_with_sema;
+ urandlib_read_with_sema_semaphore;
+ local:
+ *;
+};
+
+LIBURANDOM_READ_2.0.0 {
+ global:
+ urandlib_api;
+} LIBURANDOM_READ_1.0.0;
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f0a64d8ac59a..b595556315bc 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -3,16 +3,20 @@
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <test_maps.h>
+static int nr_cpus;
+
static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
- int *values)
+ __s64 *values, bool is_pcpu)
{
- int i, err;
+ int i, j, err;
+ int cpu_offset = 0;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
@@ -20,22 +24,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
for (i = 0; i < max_entries; i++) {
keys[i] = i;
- values[i] = i + 1;
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++)
+ (values + cpu_offset)[j] = i + 1 + j;
+ } else {
+ values[i] = i + 1;
+ }
}
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
}
-static void map_batch_verify(int *visited, __u32 max_entries,
- int *keys, int *values)
+static void map_batch_verify(int *visited, __u32 max_entries, int *keys,
+ __s64 *values, bool is_pcpu)
{
- int i;
+ int i, j;
+ int cpu_offset = 0;
memset(visited, 0, max_entries * sizeof(*visited));
for (i = 0; i < max_entries; i++) {
- CHECK(keys[i] + 1 != values[i], "key/value checking",
- "error: i %d key %d value %d\n", i, keys[i], values[i]);
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++) {
+ __s64 value = (values + cpu_offset)[j];
+ CHECK(keys[i] + j + 1 != value,
+ "key/value checking",
+ "error: i %d j %d key %d value %lld\n", i,
+ j, keys[i], value);
+ }
+ } else {
+ CHECK(keys[i] + 1 != values[i], "key/value checking",
+ "error: i %d key %d value %lld\n", i, keys[i],
+ values[i]);
+ }
visited[i] = 1;
}
for (i = 0; i < max_entries; i++) {
@@ -44,59 +67,53 @@ static void map_batch_verify(int *visited, __u32 max_entries,
}
}
-void test_array_map_batch_ops(void)
+static void __test_map_lookup_and_update_batch(bool is_pcpu)
{
- struct bpf_create_map_attr xattr = {
- .name = "array_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- };
- int map_fd, *keys, *values, *visited;
+ int map_fd, *keys, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
- bool nospace_err;
__u64 batch = 0;
- int err, step;
+ int err, step, value_size;
+ void *values;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY,
+ "array_map", sizeof(int), sizeof(__s64), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
+
+ value_size = sizeof(__s64);
+ if (is_pcpu)
+ value_size *= nr_cpus;
- keys = malloc(max_entries * sizeof(int));
- values = malloc(max_entries * sizeof(int));
- visited = malloc(max_entries * sizeof(int));
+ keys = calloc(max_entries, sizeof(*keys));
+ values = calloc(max_entries, value_size);
+ visited = calloc(max_entries, sizeof(*visited));
CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
strerror(errno));
- /* populate elements to the map */
- map_batch_update(map_fd, max_entries, keys, values);
-
/* test 1: lookup in a loop with various steps. */
total_success = 0;
for (step = 1; step < max_entries; step++) {
- map_batch_update(map_fd, max_entries, keys, values);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
memset(keys, 0, max_entries * sizeof(*keys));
- memset(values, 0, max_entries * sizeof(*values));
+ memset(values, 0, max_entries * value_size);
batch = 0;
total = 0;
/* iteratively lookup/delete elements with 'step'
* elements each.
*/
count = step;
- nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
- total ? &batch : NULL, &batch,
- keys + total,
- values + total,
- &count, &opts);
+ total ? &batch : NULL,
+ &batch, keys + total,
+ values + total * value_size,
+ &count, &opts);
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
@@ -107,13 +124,10 @@ void test_array_map_batch_ops(void)
}
- if (nospace_err == true)
- continue;
-
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
total_success++;
}
@@ -121,9 +135,31 @@ void test_array_map_batch_ops(void)
CHECK(total_success == 0, "check total_success",
"unexpected failure\n");
- printf("%s:PASS\n", __func__);
-
free(keys);
free(values);
free(visited);
+ close(map_fd);
+}
+
+static void array_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(false);
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void array_percpu_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(true);
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_array_map_batch_ops(void)
+{
+ nr_cpus = libbpf_num_possible_cpus();
+
+ CHECK(nr_cpus < 0, "nr_cpus checking",
+ "error: get possible cpus failed");
+
+ array_map_batch_ops();
+ array_percpu_map_batch_ops();
}
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
index 976bf415fbdd..5da493b94ae2 100644
--- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -83,22 +84,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
int err, step, value_size;
bool nospace_err;
void *values;
- struct bpf_create_map_attr xattr = {
- .name = "hash_map",
- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
- BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- };
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH,
+ "hash_map", sizeof(int), sizeof(int), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
value_size = is_pcpu ? sizeof(value) : sizeof(int);
keys = malloc(max_entries * sizeof(int));
@@ -203,7 +197,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
CHECK(total != max_entries, "delete with steps",
"total = %u, max_entries = %u\n", total, max_entries);
- /* check map is empty, errono == ENOENT */
+ /* check map is empty, errno == ENOENT */
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
"error: %s\n", strerror(errno));
@@ -262,6 +256,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
free(visited);
if (!is_pcpu)
free(values);
+ close(map_fd);
}
void htab_map_batch_ops(void)
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
index 006be3963977..d32e4edac930 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
@@ -20,13 +20,14 @@
#include <string.h>
#include <time.h>
#include <unistd.h>
+#include <endian.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <bpf/bpf.h>
+#include <test_maps.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
struct tlpm_node {
struct tlpm_node *next;
@@ -34,6 +35,22 @@ struct tlpm_node {
uint8_t key[];
};
+struct lpm_trie_bytes_key {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ unsigned char data[8];
+};
+
+struct lpm_trie_int_key {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ unsigned int data;
+};
+
static struct tlpm_node *tlpm_match(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits);
@@ -208,9 +225,11 @@ static void test_lpm_order(void)
static void test_lpm_map(int keysize)
{
- size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ volatile size_t n_matches, n_matches_after_delete;
+ size_t i, j, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
uint8_t *data, *value;
int r, map;
@@ -222,7 +241,7 @@ static void test_lpm_map(int keysize)
n_matches = 0;
n_matches_after_delete = 0;
n_nodes = 1 << 8;
- n_lookups = 1 << 16;
+ n_lookups = 1 << 9;
data = alloca(keysize);
memset(data, 0, keysize);
@@ -233,11 +252,11 @@ static void test_lpm_map(int keysize)
key = alloca(sizeof(*key) + keysize);
memset(key, 0, sizeof(*key) + keysize);
- map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
sizeof(*key) + keysize,
keysize + 1,
4096,
- BPF_F_NO_PREALLOC);
+ &opts);
assert(map >= 0);
for (i = 0; i < n_nodes; ++i) {
@@ -329,8 +348,9 @@ static void test_lpm_map(int keysize)
static void test_lpm_ipaddr(void)
{
- struct bpf_lpm_trie_key *key_ipv4;
- struct bpf_lpm_trie_key *key_ipv6;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key_ipv4;
+ struct bpf_lpm_trie_key_u8 *key_ipv6;
size_t key_size_ipv4;
size_t key_size_ipv6;
int map_fd_ipv4;
@@ -342,14 +362,14 @@ static void test_lpm_ipaddr(void)
key_ipv4 = alloca(key_size_ipv4);
key_ipv6 = alloca(key_size_ipv6);
- map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv4, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv4 >= 0);
- map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv6, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv6 >= 0);
/* Fill data some IPv4 and IPv6 address ranges */
@@ -406,16 +426,13 @@ static void test_lpm_ipaddr(void)
/* Test some lookups that should not match any entry */
inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
- assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT);
close(map_fd_ipv4);
close(map_fd_ipv6);
@@ -423,7 +440,8 @@ static void test_lpm_ipaddr(void)
static void test_lpm_delete(void)
{
- struct bpf_lpm_trie_key *key;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key;
size_t key_size;
int map_fd;
__u64 value;
@@ -431,9 +449,9 @@ static void test_lpm_delete(void)
key_size = sizeof(*key) + sizeof(__u32);
key = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd >= 0);
/* Add nodes:
@@ -471,18 +489,15 @@ static void test_lpm_delete(void)
/* remove non-existent node */
key->prefixlen = 32;
inet_pton(AF_INET, "10.0.0.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
key->prefixlen = 30; // unused prefix so far
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
key->prefixlen = 16; // same prefix as the root node
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
/* assert initial lookup */
key->prefixlen = 32;
@@ -527,15 +542,15 @@ static void test_lpm_delete(void)
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.128.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
close(map_fd);
}
static void test_lpm_get_next_key(void)
{
- struct bpf_lpm_trie_key *key_p, *next_key_p;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
size_t key_size;
__u32 value = 0;
int map_fd;
@@ -544,13 +559,11 @@ static void test_lpm_get_next_key(void)
key_p = alloca(key_size);
next_key_p = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts);
assert(map_fd >= 0);
/* empty tree. get_next_key should return ENOENT */
- assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT);
/* get and verify the first key, get the second one should fail. */
key_p->prefixlen = 16;
@@ -562,8 +575,7 @@ static void test_lpm_get_next_key(void)
assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
key_p->data[1] == 168);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should get the first one in post order. */
key_p->prefixlen = 8;
@@ -587,8 +599,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total three) */
key_p->prefixlen = 24;
@@ -611,8 +622,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total four) */
key_p->prefixlen = 24;
@@ -640,8 +650,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total five) */
key_p->prefixlen = 28;
@@ -675,8 +684,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should return the first one in post order */
key_p->prefixlen = 22;
@@ -703,9 +711,9 @@ static void *lpm_test_command(void *arg)
{
int i, j, ret, iter, key_size;
struct lpm_mt_test_info *info = arg;
- struct bpf_lpm_trie_key *key_p;
+ struct bpf_lpm_trie_key_u8 *key_p;
- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_size = sizeof(*key_p) + sizeof(__u32);
key_p = alloca(key_size);
for (iter = 0; iter < info->iter; iter++)
for (i = 0; i < MAX_TEST_KEYS; i++) {
@@ -727,7 +735,7 @@ static void *lpm_test_command(void *arg)
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
assert(ret == 0 || errno == ENOENT);
} else {
- struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
}
@@ -753,6 +761,7 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
static void test_lpm_multi_thread(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct lpm_mt_test_info info[4];
size_t key_size, value_size;
pthread_t thread_id[4];
@@ -761,9 +770,8 @@ static void test_lpm_multi_thread(void)
/* create a trie */
value_size = sizeof(__u32);
- key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
- 100, BPF_F_NO_PREALLOC);
+ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
/* create 4 threads to test update, delete, lookup and get_next_key */
setup_lpm_mt_test_info(&info[0], map_fd);
@@ -780,7 +788,379 @@ static void test_lpm_multi_thread(void)
close(map_fd);
}
-int main(void)
+static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ int fd;
+
+ opts.map_flags = BPF_F_NO_PREALLOC;
+ fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries,
+ &opts);
+ CHECK(fd < 0, "bpf_map_create", "error %d\n", errno);
+
+ return fd;
+}
+
+static void test_lpm_trie_update_flags(void)
+{
+ struct lpm_trie_int_key key;
+ unsigned int value, got;
+ int fd, err;
+
+ fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+ /* invalid flags (Error) */
+ key.prefixlen = 32;
+ key.data = 0;
+ value = 0;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK);
+ CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+ /* invalid flags (Error) */
+ key.prefixlen = 32;
+ key.data = 0;
+ value = 0;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST);
+ CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+ /* overwrite an empty qp-trie (Error) */
+ key.prefixlen = 32;
+ key.data = 0;
+ value = 2;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+ CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err);
+
+ /* add a new node */
+ key.prefixlen = 16;
+ key.data = 0;
+ value = 1;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* add the same node as new node (Error) */
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err != -EEXIST, "add new elem again", "error %d\n", err);
+
+ /* overwrite the existed node */
+ value = 4;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+ CHECK(err, "overwrite elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* overwrite the node */
+ value = 1;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+ CHECK(err, "update elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* overwrite a non-existent node which is the prefix of the first
+ * node (Error).
+ */
+ key.prefixlen = 8;
+ key.data = 0;
+ value = 2;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+ CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+ /* add a new node which is the prefix of the first node */
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup key", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* add another new node which will be the sibling of the first node */
+ key.prefixlen = 9;
+ key.data = htobe32(1 << 23);
+ value = 5;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup key", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* overwrite the third node */
+ value = 3;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+ CHECK(err, "overwrite elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup key", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* delete the second node to make it an intermediate node */
+ key.prefixlen = 8;
+ key.data = 0;
+ err = bpf_map_delete_elem(fd, &key);
+ CHECK(err, "del elem", "error %d\n", err);
+
+ /* overwrite the intermediate node (Error) */
+ value = 2;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+ CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+ close(fd);
+}
+
+static void test_lpm_trie_update_full_map(void)
+{
+ struct lpm_trie_int_key key;
+ int value, got;
+ int fd, err;
+
+ fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+ /* add a new node */
+ key.prefixlen = 16;
+ key.data = 0;
+ value = 0;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* add new node */
+ key.prefixlen = 8;
+ key.data = 0;
+ value = 1;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* add new node */
+ key.prefixlen = 9;
+ key.data = htobe32(1 << 23);
+ value = 2;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add new elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* try to add more node (Error) */
+ key.prefixlen = 32;
+ key.data = 0;
+ value = 3;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+ CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err);
+
+ /* update the value of an existed node with BPF_EXIST */
+ key.prefixlen = 16;
+ key.data = 0;
+ value = 4;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+ CHECK(err, "overwrite elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ /* update the value of an existed node with BPF_ANY */
+ key.prefixlen = 9;
+ key.data = htobe32(1 << 23);
+ value = 5;
+ err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+ CHECK(err, "overwrite elem", "error %d\n", err);
+ got = 0;
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "error %d\n", err);
+ CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+ close(fd);
+}
+
+static int cmp_str(const void *a, const void *b)
+{
+ const char *str_a = *(const char **)a, *str_b = *(const char **)b;
+
+ return strcmp(str_a, str_b);
+}
+
+/* Save strings in LPM trie. The trailing '\0' for each string will be
+ * accounted in the prefixlen. The strings returned during the iteration
+ * should be sorted as expected.
+ */
+static void test_lpm_trie_iterate_strs(void)
+{
+ static const char * const keys[] = {
+ "ab", "abO", "abc", "abo", "abS", "abcd",
+ };
+ const char *sorted_keys[ARRAY_SIZE(keys)];
+ struct lpm_trie_bytes_key key, next_key;
+ unsigned int value, got, i, j, len;
+ struct lpm_trie_bytes_key *cur;
+ int fd, err;
+
+ fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys));
+
+ for (i = 0; i < ARRAY_SIZE(keys); i++) {
+ unsigned int flags;
+
+ /* add i-th element */
+ flags = i % 2 ? BPF_NOEXIST : 0;
+ len = strlen(keys[i]);
+ /* include the trailing '\0' */
+ key.prefixlen = (len + 1) * 8;
+ memset(key.data, 0, sizeof(key.data));
+ memcpy(key.data, keys[i], len);
+ value = i + 100;
+ err = bpf_map_update_elem(fd, &key, &value, flags);
+ CHECK(err, "add elem", "#%u error %d\n", i, err);
+
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "#%u error %d\n", i, err);
+ CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got);
+
+ /* re-add i-th element (Error) */
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err);
+
+ /* Overwrite i-th element */
+ flags = i % 2 ? 0 : BPF_EXIST;
+ value = i;
+ err = bpf_map_update_elem(fd, &key, &value, flags);
+ CHECK(err, "update elem", "error %d\n", err);
+
+ /* Lookup #[0~i] elements */
+ for (j = 0; j <= i; j++) {
+ len = strlen(keys[j]);
+ key.prefixlen = (len + 1) * 8;
+ memset(key.data, 0, sizeof(key.data));
+ memcpy(key.data, keys[j], len);
+ err = bpf_map_lookup_elem(fd, &key, &got);
+ CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err);
+ CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n",
+ i, j, value, got);
+ }
+ }
+
+ /* Add element to a full qp-trie (Error) */
+ key.prefixlen = sizeof(key.data) * 8;
+ memset(key.data, 0, sizeof(key.data));
+ value = 0;
+ err = bpf_map_update_elem(fd, &key, &value, 0);
+ CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err);
+
+ /* Iterate sorted elements: no deletion */
+ memcpy(sorted_keys, keys, sizeof(keys));
+ qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str);
+ cur = NULL;
+ for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+ len = strlen(sorted_keys[i]);
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err, "iterate", "#%u error %d\n", i, err);
+ CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+ "#%u invalid len %u expect %u\n",
+ i, next_key.prefixlen, (len + 1) * 8);
+ CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+ "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+ cur = &next_key;
+ }
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+ /* Iterate sorted elements: delete the found key after each iteration */
+ cur = NULL;
+ for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+ len = strlen(sorted_keys[i]);
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err, "iterate", "#%u error %d\n", i, err);
+ CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+ "#%u invalid len %u expect %u\n",
+ i, next_key.prefixlen, (len + 1) * 8);
+ CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+ "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+ cur = &next_key;
+
+ err = bpf_map_delete_elem(fd, cur);
+ CHECK(err, "delete", "#%u error %d\n", i, err);
+ }
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err);
+
+ close(fd);
+}
+
+/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of
+ * LPM trie will return these integers in big-endian order, therefore, convert
+ * these integers to big-endian before update. After each iteration, delete the
+ * found key (the smallest integer) and expect the next iteration will return
+ * the second smallest number.
+ */
+static void test_lpm_trie_iterate_ints(void)
+{
+ struct lpm_trie_int_key key, next_key;
+ unsigned int i, max_entries;
+ struct lpm_trie_int_key *cur;
+ unsigned int *data_set;
+ int fd, err;
+ bool value;
+
+ max_entries = 4096;
+ data_set = calloc(max_entries, sizeof(*data_set));
+ CHECK(!data_set, "malloc", "no mem\n");
+ for (i = 0; i < max_entries; i++)
+ data_set[i] = i;
+
+ fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries);
+ value = true;
+ for (i = 0; i < max_entries; i++) {
+ key.prefixlen = 32;
+ key.data = htobe32(data_set[i]);
+
+ err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ CHECK(err, "add elem", "#%u error %d\n", i, err);
+ }
+
+ cur = NULL;
+ for (i = 0; i < max_entries; i++) {
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err, "iterate", "#%u error %d\n", i, err);
+ CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n",
+ i, next_key.prefixlen);
+ CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n",
+ i, be32toh(next_key.data), data_set[i]);
+ cur = &next_key;
+
+ /*
+ * Delete the minimal key, the next call of bpf_get_next_key()
+ * will return the second minimal key.
+ */
+ err = bpf_map_delete_elem(fd, &next_key);
+ CHECK(err, "del elem", "#%u elem error %d\n", i, err);
+ }
+ err = bpf_map_get_next_key(fd, cur, &next_key);
+ CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+ err = bpf_map_get_next_key(fd, NULL, &next_key);
+ CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err);
+
+ free(data_set);
+
+ close(fd);
+}
+
+void test_lpm_trie_map_basic_ops(void)
{
int i;
@@ -799,6 +1179,10 @@ int main(void)
test_lpm_get_next_key();
test_lpm_multi_thread();
- printf("test_lpm: OK\n");
- return 0;
+ test_lpm_trie_update_flags();
+ test_lpm_trie_update_full_map();
+ test_lpm_trie_iterate_strs();
+ test_lpm_trie_iterate_ints();
+
+ printf("%s: PASS\n", __func__);
}
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..fe3e19f96244
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ __u32 i;
+ int err;
+ char buff[16] = { 0 };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i].prefix = 32;
+ snprintf(buff, 16, "192.168.1.%d", i + 1);
+ inet_pton(AF_INET, buff, &keys[i].ipv4);
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ char buff[16] = { 0 };
+ int lower_byte = 0;
+ __u32 i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+ CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+ "sscanf()", "error: i %d\n", i);
+ CHECK(lower_byte != values[i], "key/value checking",
+ "error: i %d key %s value %d\n", i, buff, values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct test_lpm_key *keys, key;
+ int map_fd, *values, *visited;
+ __u32 step, count, total, total_success;
+ const __u32 max_entries = 10;
+ __u64 batch = 0;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
+ sizeof(struct test_lpm_key), sizeof(int),
+ max_entries, &create_opts);
+ CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
+ strerror(errno));
+
+ keys = malloc(max_entries * sizeof(struct test_lpm_key));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total, values + total, &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd, keys + total, &count,
+ &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errno == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+ close(map_fd);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c
new file mode 100644
index 000000000000..0ba015686492
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ __u32 data;
+};
+
+struct get_next_key_ctx {
+ struct test_lpm_key key;
+ bool start;
+ bool stop;
+ int map_fd;
+ int loop;
+};
+
+static void *get_next_key_fn(void *arg)
+{
+ struct get_next_key_ctx *ctx = arg;
+ struct test_lpm_key next_key;
+ int i = 0;
+
+ while (!ctx->start)
+ usleep(1);
+
+ while (!ctx->stop && i++ < ctx->loop)
+ bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key);
+
+ return NULL;
+}
+
+static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids,
+ unsigned int nr)
+{
+ unsigned int i;
+
+ ctx->stop = true;
+ ctx->start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+void test_lpm_trie_map_get_next_key(void)
+{
+#define MAX_NR_THREADS 8
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts,
+ .map_flags = BPF_F_NO_PREALLOC);
+ struct test_lpm_key key = {};
+ __u32 val = 0;
+ int map_fd;
+ const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix));
+ const __u32 max_entries = max_prefixlen + 1;
+ unsigned int i, nr = MAX_NR_THREADS, loop = 65536;
+ pthread_t tids[MAX_NR_THREADS];
+ struct get_next_key_ctx ctx;
+ int err;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
+ sizeof(struct test_lpm_key), sizeof(__u32),
+ max_entries, &create_opts);
+ CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
+ strerror(errno));
+
+ for (i = 0; i <= max_prefixlen; i++) {
+ key.prefix = i;
+ err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+ CHECK(err, "bpf_map_update_elem()", "error:%s\n",
+ strerror(errno));
+ }
+
+ ctx.start = false;
+ ctx.stop = false;
+ ctx.map_fd = map_fd;
+ ctx.loop = loop;
+ memcpy(&ctx.key, &key, sizeof(key));
+
+ for (i = 0; i < nr; i++) {
+ err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx);
+ if (err) {
+ abort_get_next_key(&ctx, tids, i);
+ CHECK(err, "pthread_create", "error %d\n", err);
+ }
+ }
+
+ ctx.start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+
+ printf("%s:PASS\n", __func__);
+
+ close(map_fd);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
new file mode 100644
index 000000000000..79c3ccadb962
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+#define OUTER_MAP_ENTRIES 10
+
+static __u32 get_map_id_from_fd(int map_fd)
+{
+ struct bpf_map_info map_info = {};
+ uint32_t info_len = sizeof(map_info);
+ int ret;
+
+ ret = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ CHECK(ret < 0, "Finding map info failed", "error:%s\n",
+ strerror(errno));
+
+ return map_info.id;
+}
+
+/* This creates number of OUTER_MAP_ENTRIES maps that will be stored
+ * in outer map and return the created map_fds
+ */
+static void create_inner_maps(enum bpf_map_type map_type,
+ __u32 *inner_map_fds)
+{
+ int map_fd, map_index, ret;
+ __u32 map_key = 0, map_id;
+ char map_name[16];
+
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
+ memset(map_name, 0, sizeof(map_name));
+ snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index);
+ map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
+ sizeof(__u32), 1, NULL);
+ CHECK(map_fd < 0,
+ "inner bpf_map_create() failed",
+ "map_type=(%d) map_name(%s), error:%s\n",
+ map_type, map_name, strerror(errno));
+
+ /* keep track of the inner map fd as it is required
+ * to add records in outer map
+ */
+ inner_map_fds[map_index] = map_fd;
+
+ /* Add entry into this created map
+ * eg: map1 key = 0, value = map1's map id
+ * map2 key = 0, value = map2's map id
+ */
+ map_id = get_map_id_from_fd(map_fd);
+ ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0);
+ CHECK(ret != 0,
+ "bpf_map_update_elem failed",
+ "map_type=(%d) map_name(%s), error:%s\n",
+ map_type, map_name, strerror(errno));
+ }
+}
+
+static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd)
+{
+ int outer_map_fd;
+ LIBBPF_OPTS(bpf_map_create_opts, attr);
+
+ attr.inner_map_fd = inner_map_fd;
+ outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32),
+ sizeof(__u32), OUTER_MAP_ENTRIES,
+ &attr);
+ CHECK(outer_map_fd < 0,
+ "outer bpf_map_create()",
+ "map_type=(%d), error:%s\n",
+ map_type, strerror(errno));
+
+ return outer_map_fd;
+}
+
+static void validate_fetch_results(int outer_map_fd,
+ __u32 *fetched_keys, __u32 *fetched_values,
+ __u32 max_entries_fetched)
+{
+ __u32 inner_map_key, inner_map_value;
+ int inner_map_fd, entry, err;
+ __u32 outer_map_value;
+
+ for (entry = 0; entry < max_entries_fetched; ++entry) {
+ outer_map_value = fetched_values[entry];
+ inner_map_fd = bpf_map_get_fd_by_id(outer_map_value);
+ CHECK(inner_map_fd < 0,
+ "Failed to get inner map fd",
+ "from id(%d), error=%s\n",
+ outer_map_value, strerror(errno));
+ err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key);
+ CHECK(err != 0,
+ "Failed to get inner map key",
+ "error=%s\n", strerror(errno));
+
+ err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key,
+ &inner_map_value);
+
+ close(inner_map_fd);
+
+ CHECK(err != 0,
+ "Failed to get inner map value",
+ "for key(%d), error=%s\n",
+ inner_map_key, strerror(errno));
+
+ /* Actual value validation */
+ CHECK(outer_map_value != inner_map_value,
+ "Failed to validate inner map value",
+ "fetched(%d) and lookedup(%d)!\n",
+ outer_map_value, inner_map_value);
+ }
+}
+
+static void fetch_and_validate(int outer_map_fd,
+ struct bpf_map_batch_opts *opts,
+ __u32 batch_size, bool delete_entries,
+ bool has_holes)
+{
+ int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+ __u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+ __u32 batch_key = 0, fetch_count, step_size = batch_size;
+ __u32 value_size = sizeof(__u32);
+
+ /* Total entries needs to be fetched */
+ fetched_keys = calloc(max_entries, value_size);
+ fetched_values = calloc(max_entries, value_size);
+ CHECK((!fetched_keys || !fetched_values),
+ "Memory allocation failed for fetched_keys or fetched_values",
+ "error=%s\n", strerror(errno));
+
+ /* hash map may not always return full batch */
+ for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
+ fetch_count = step_size;
+ err = delete_entries
+ ? bpf_map_lookup_and_delete_batch(outer_map_fd,
+ total_fetched ? &batch_key : NULL,
+ &batch_key,
+ fetched_keys + total_fetched,
+ fetched_values + total_fetched,
+ &fetch_count, opts)
+ : bpf_map_lookup_batch(outer_map_fd,
+ total_fetched ? &batch_key : NULL,
+ &batch_key,
+ fetched_keys + total_fetched,
+ fetched_values + total_fetched,
+ &fetch_count, opts);
+
+ if (err && errno == ENOSPC) {
+ /* Fetch again with higher batch size */
+ total_fetched = 0;
+ step_size += batch_size;
+ continue;
+ }
+
+ CHECK((err < 0 && (errno != ENOENT)),
+ "lookup with steps failed",
+ "error: %s\n", strerror(errno));
+
+ /* Update the total fetched number */
+ total_fetched += fetch_count;
+ if (err)
+ break;
+ }
+
+ CHECK((total_fetched != max_entries),
+ "Unable to fetch expected entries !",
+ "total_fetched(%d) and max_entries(%d) error: (%d):%s\n",
+ total_fetched, max_entries, errno, strerror(errno));
+
+ /* validate the fetched entries */
+ validate_fetch_results(outer_map_fd, fetched_keys,
+ fetched_values, total_fetched);
+ printf("batch_op(%s) is successful with batch_size(%d)\n",
+ delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size);
+
+ free(fetched_keys);
+ free(fetched_values);
+}
+
+static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
+ enum bpf_map_type inner_map_type,
+ bool has_holes)
+{
+ __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+ __u32 *outer_map_keys, *inner_map_fds;
+ LIBBPF_OPTS(bpf_map_batch_opts, opts);
+ __u32 value_size = sizeof(__u32);
+ int batch_size[2] = {5, 10};
+ __u32 map_index, op_index;
+ int outer_map_fd, ret;
+
+ outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+ inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
+ CHECK((!outer_map_keys || !inner_map_fds),
+ "Memory allocation failed for outer_map_keys or inner_map_fds",
+ "error=%s\n", strerror(errno));
+
+ create_inner_maps(inner_map_type, inner_map_fds);
+
+ outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds);
+ /* create outer map keys */
+ for (map_index = 0; map_index < max_entries; map_index++)
+ outer_map_keys[map_index] =
+ ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+ ? 9 : 1000) - map_index;
+
+ /* This condition is only meaningful for array of maps.
+ *
+ * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+ * max_entries is short for n, then outer_map_keys looks like:
+ *
+ * [n, n-1, ... 2, 1]
+ *
+ * We change it to
+ *
+ * [n, n-1, ... 2, 0]
+ *
+ * So it will leave key 1 as a hole. It will serve to test the
+ * correctness when batch on an array: a "non-exist" key might be
+ * actually allocated and returned from key iteration.
+ */
+ if (has_holes)
+ outer_map_keys[max_entries - 1]--;
+
+ /* batch operation - map_update */
+ ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
+ inner_map_fds, &max_entries, &opts);
+ CHECK(ret != 0,
+ "Failed to update the outer map batch ops",
+ "error=%s\n", strerror(errno));
+
+ /* batch operation - map_lookup */
+ for (op_index = 0; op_index < 2; ++op_index)
+ fetch_and_validate(outer_map_fd, &opts,
+ batch_size[op_index], false,
+ has_holes);
+
+ /* batch operation - map_lookup_delete */
+ if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ fetch_and_validate(outer_map_fd, &opts,
+ max_entries, true /*delete*/,
+ has_holes);
+
+ /* close all map fds */
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
+ close(inner_map_fds[map_index]);
+ close(outer_map_fd);
+
+ free(inner_map_fds);
+ free(outer_map_keys);
+}
+
+void test_map_in_map_batch_ops_array(void)
+{
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
+ printf("%s:PASS with inner ARRAY map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
+ printf("%s:PASS with inner HASH map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+ printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+ printf("%s:PASS with inner HASH map with holes\n", __func__);
+}
+
+void test_map_in_map_batch_ops_hash(void)
+{
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
+ printf("%s:PASS with inner ARRAY map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
+ printf("%s:PASS with inner HASH map\n", __func__);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
new file mode 100644
index 000000000000..1c7c04288eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+#include "map_percpu_stats.skel.h"
+
+#define MAX_ENTRIES 16384
+#define MAX_ENTRIES_HASH_OF_MAPS 64
+#define N_THREADS 8
+#define MAX_MAP_KEY_SIZE 4
+#define PCPU_MIN_UNIT_SIZE 32768
+
+static void map_info(int map_fd, struct bpf_map_info *info)
+{
+ __u32 len = sizeof(*info);
+ int ret;
+
+ memset(info, 0, sizeof(*info));
+
+ ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
+ CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
+}
+
+static const char *map_type_to_s(__u32 type)
+{
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ return "HASH";
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ return "PERCPU_HASH";
+ case BPF_MAP_TYPE_LRU_HASH:
+ return "LRU_HASH";
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ return "LRU_PERCPU_HASH";
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ return "BPF_MAP_TYPE_HASH_OF_MAPS";
+ default:
+ return "<define-me>";
+ }
+}
+
+static __u32 map_count_elements(__u32 type, int map_fd)
+{
+ __u32 key = -1;
+ int n = 0;
+
+ while (!bpf_map_get_next_key(map_fd, &key, &key))
+ n++;
+ return n;
+}
+
+#define BATCH true
+
+static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
+{
+ static __u8 values[(8 << 10) * MAX_ENTRIES];
+ void *in_batch = NULL, *out_batch;
+ __u32 save_count = count;
+ int ret;
+
+ ret = bpf_map_lookup_and_delete_batch(map_fd,
+ &in_batch, &out_batch,
+ keys, values, &count,
+ NULL);
+
+ /*
+ * Despite what uapi header says, lookup_and_delete_batch will return
+ * -ENOENT in case we successfully have deleted all elements, so check
+ * this separately
+ */
+ CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
+ "error: %s\n", strerror(errno));
+
+ CHECK(count != save_count,
+ "bpf_map_lookup_and_delete_batch",
+ "deleted not all elements: removed=%u expected=%u\n",
+ count, save_count);
+}
+
+static void delete_all_elements(__u32 type, int map_fd, bool batch)
+{
+ static __u8 val[8 << 10]; /* enough for 1024 CPUs */
+ __u32 key = -1;
+ void *keys;
+ __u32 i, n;
+ int ret;
+
+ keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
+ CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
+
+ for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
+ memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
+
+ if (batch) {
+ /* Can't mix delete_batch and delete_and_lookup_batch because
+ * they have different semantics in relation to the keys
+ * argument. However, delete_batch utilize map_delete_elem,
+ * so we actually test it in non-batch scenario */
+ delete_and_lookup_batch(map_fd, keys, n);
+ } else {
+ /* Intentionally mix delete and lookup_and_delete so we can test both */
+ for (i = 0; i < n; i++) {
+ void *keyp = keys + i*MAX_MAP_KEY_SIZE;
+
+ if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ ret = bpf_map_delete_elem(map_fd, keyp);
+ CHECK(ret < 0, "bpf_map_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ } else {
+ ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
+ CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ }
+ }
+ }
+
+ free(keys);
+}
+
+static bool is_lru(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_LRU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool is_percpu(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+struct upsert_opts {
+ __u32 map_type;
+ int map_fd;
+ __u32 n;
+ bool retry_for_nomem;
+};
+
+static int create_small_hash(void)
+{
+ int map_fd;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), "small");
+
+ return map_fd;
+}
+
+static bool retry_for_nomem_fn(int err)
+{
+ return err == ENOMEM;
+}
+
+static void *patch_map_thread(void *arg)
+{
+ /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+ static __u8 blob[8 << 10];
+ struct upsert_opts *opts = arg;
+ void *val_ptr;
+ int val;
+ int ret;
+ int i;
+
+ for (i = 0; i < opts->n; i++) {
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ val = create_small_hash();
+ val_ptr = &val;
+ } else if (is_percpu(opts->map_type)) {
+ val_ptr = blob;
+ } else {
+ val = rand();
+ val_ptr = &val;
+ }
+
+ /* 2 seconds may be enough ? */
+ if (opts->retry_for_nomem)
+ ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+ 40, retry_for_nomem_fn);
+ else
+ ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
+ CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
+
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ close(val);
+ }
+ return NULL;
+}
+
+static void upsert_elements(struct upsert_opts *opts)
+{
+ pthread_t threads[N_THREADS];
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
+ CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_join(i[threads], NULL);
+ CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
+ }
+}
+
+static __u32 read_cur_elements(int iter_fd)
+{
+ char buf[64];
+ ssize_t n;
+ __u32 ret;
+
+ n = read(iter_fd, buf, sizeof(buf)-1);
+ CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
+ buf[n] = '\0';
+
+ errno = 0;
+ ret = (__u32)strtol(buf, NULL, 10);
+ CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
+
+ return ret;
+}
+
+static __u32 get_cur_elements(int map_id)
+{
+ struct map_percpu_stats *skel;
+ struct bpf_link *link;
+ __u32 n_elements;
+ int iter_fd;
+ int ret;
+
+ skel = map_percpu_stats__open();
+ CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
+
+ skel->bss->target_id = map_id;
+
+ ret = map_percpu_stats__load(skel);
+ CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
+
+ n_elements = read_cur_elements(iter_fd);
+
+ close(iter_fd);
+ bpf_link__destroy(link);
+ map_percpu_stats__destroy(skel);
+
+ return n_elements;
+}
+
+static void check_expected_number_elements(__u32 n_inserted, int map_fd,
+ struct bpf_map_info *info)
+{
+ __u32 n_real;
+ __u32 n_iter;
+
+ /* Count the current number of elements in the map by iterating through
+ * all the map keys via bpf_get_next_key
+ */
+ n_real = map_count_elements(info->type, map_fd);
+
+ /* The "real" number of elements should be the same as the inserted
+ * number of elements in all cases except LRU maps, where some elements
+ * may have been evicted
+ */
+ if (n_inserted == 0 || !is_lru(info->type))
+ CHECK(n_inserted != n_real, "map_count_elements",
+ "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
+
+ /* Count the current number of elements in the map using an iterator */
+ n_iter = get_cur_elements(info->id);
+
+ /* Both counts should be the same, as all updates are over */
+ CHECK(n_iter != n_real, "get_cur_elements",
+ "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
+ n_iter, n_real, map_type_to_s(info->type), info->map_flags);
+}
+
+static void __test(int map_fd)
+{
+ struct upsert_opts opts = {
+ .map_fd = map_fd,
+ };
+ struct bpf_map_info info;
+
+ map_info(map_fd, &info);
+ opts.map_type = info.type;
+ opts.n = info.max_entries;
+
+ /* Reduce the number of elements we are updating such that we don't
+ * bump into -E2BIG from non-preallocated hash maps, but still will
+ * have some evictions for LRU maps */
+ if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
+ opts.n -= 512;
+ else
+ opts.n /= 2;
+
+ /* per-cpu bpf memory allocator may not be able to allocate per-cpu
+ * pointer successfully and it can not refill free llist timely, and
+ * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+ * the problem temporarily.
+ */
+ opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
+ /*
+ * Upsert keys [0, n) under some competition: with random values from
+ * N_THREADS threads. Check values, then delete all elements and check
+ * values again.
+ */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, !BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ /* Now do the same, but using batch delete operations */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ close(map_fd);
+}
+
+static int map_create_opts(__u32 type, const char *name,
+ struct bpf_map_create_opts *map_opts,
+ __u32 key_size, __u32 val_size)
+{
+ int max_entries;
+ int map_fd;
+
+ if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ max_entries = MAX_ENTRIES_HASH_OF_MAPS;
+ else
+ max_entries = MAX_ENTRIES;
+
+ map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), name);
+
+ return map_fd;
+}
+
+static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
+{
+ return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
+}
+
+static int create_hash(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
+
+ return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
+}
+
+static int create_percpu_hash(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
+
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
+}
+
+static int create_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
+}
+
+static int create_percpu_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
+}
+
+static int create_lru_hash(__u32 type, __u32 map_flags)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
+
+ return map_create(type, "lru_hash", &map_opts);
+}
+
+static int create_hash_of_maps(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .map_flags = BPF_F_NO_PREALLOC,
+ .inner_map_fd = create_small_hash(),
+ );
+ int ret;
+
+ ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
+ &map_opts, sizeof(int), sizeof(int));
+ close(map_opts.inner_map_fd);
+ return ret;
+}
+
+static void map_percpu_stats_hash(void)
+{
+ __test(create_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash(void)
+{
+ __test(create_percpu_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_prealloc(void)
+{
+ __test(create_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash_prealloc(void)
+{
+ __test(create_percpu_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_of_maps(void)
+{
+ __test(create_hash_of_maps());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_map_value_size(void)
+{
+ int fd;
+ int value_sz = PCPU_MIN_UNIT_SIZE + 1;
+ struct bpf_map_create_opts opts = { .sz = sizeof(opts) };
+ enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH };
+ for (int i = 0; i < ARRAY_SIZE(map_types); i++) {
+ fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts);
+ CHECK(fd < 0 && errno != E2BIG, "percpu map value size",
+ "error: %s\n", strerror(errno));
+ }
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_map_percpu_stats(void)
+{
+ map_percpu_stats_hash();
+ map_percpu_stats_percpu_hash();
+ map_percpu_stats_hash_prealloc();
+ map_percpu_stats_percpu_hash_prealloc();
+ map_percpu_stats_lru_hash();
+ map_percpu_stats_lru_hash_no_common();
+ map_percpu_stats_percpu_lru_hash();
+ map_percpu_stats_percpu_lru_hash_no_common();
+ map_percpu_stats_hash_of_maps();
+ map_percpu_stats_map_value_size();
+}
diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
index e569edc679d8..af10c309359a 100644
--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
@@ -19,16 +19,12 @@
#include <test_btf.h>
#include <test_maps.h>
-static struct bpf_create_map_attr xattr = {
- .name = "sk_storage_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .map_flags = BPF_F_NO_PREALLOC,
- .max_entries = 0,
- .key_size = 4,
- .value_size = 8,
+static struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
.btf_key_type_id = 1,
.btf_value_type_id = 3,
.btf_fd = -1,
+ .map_flags = BPF_F_NO_PREALLOC,
};
static unsigned int nr_sk_threads_done;
@@ -140,7 +136,7 @@ static int load_btf(void)
memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types),
btf_str_sec, sizeof(btf_str_sec));
- return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0);
+ return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL);
}
static int create_sk_storage_map(void)
@@ -150,13 +146,13 @@ static int create_sk_storage_map(void)
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
- map_fd = bpf_create_map_xattr(&xattr);
- xattr.btf_fd = -1;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ map_opts.btf_fd = -1;
close(btf_fd);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "errno:%d\n", errno);
+ "bpf_map_create()", "errno:%d\n", errno);
return map_fd;
}
@@ -416,7 +412,7 @@ static void test_sk_storage_map_stress_free(void)
rlim_new.rlim_max = rlim_new.rlim_cur + 128;
err = setrlimit(RLIMIT_NOFILE, &rlim_new);
CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d",
- rlim_new.rlim_cur, errno);
+ (unsigned long) rlim_new.rlim_cur, errno);
}
err = do_sk_storage_map_stress_free();
@@ -462,21 +458,21 @@ static void test_sk_storage_map_basic(void)
struct {
int cnt;
int lock;
- } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value;
- struct bpf_create_map_attr bad_xattr;
+ } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value;
+ struct bpf_map_create_opts bad_xattr;
int btf_fd, map_fd, sk_fd, err;
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n",
sk_fd, errno);
- map_fd = bpf_create_map_xattr(&xattr);
- CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ CHECK(map_fd == -1, "bpf_map_create(good_xattr)",
"map_fd:%d errno:%d\n", map_fd, errno);
/* Add new elem */
@@ -487,38 +483,41 @@ static void test_sk_storage_map_basic(void)
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_EXIST | BPF_F_LOCK);
CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST);
CHECK(err, "bpf_map_update_elem(BPF_EXIST)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Update with BPF_NOEXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_NOEXIST | BPF_F_LOCK);
CHECK(!err || errno != EEXIST,
@@ -530,22 +529,23 @@ static void test_sk_storage_map_basic(void)
value.cnt -= 1;
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt again and update with map_flags == 0 */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &sk_fd);
@@ -560,31 +560,29 @@ static void test_sk_storage_map_basic(void)
CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 0;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 3;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
- bad_xattr.max_entries = 1;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.map_flags = 0;
- err = bpf_create_map_xattr(&bad_xattr);
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- xattr.btf_fd = -1;
+ map_opts.btf_fd = -1;
close(btf_fd);
close(map_fd);
close(sk_fd);
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
new file mode 100644
index 000000000000..a4121d2248ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "test_maps.h"
+#include "task_local_storage_helpers.h"
+#include "read_bpf_task_storage_busy.skel.h"
+
+struct lookup_ctx {
+ bool start;
+ bool stop;
+ int pid_fd;
+ int map_fd;
+ int loop;
+};
+
+static void *lookup_fn(void *arg)
+{
+ struct lookup_ctx *ctx = arg;
+ long value;
+ int i = 0;
+
+ while (!ctx->start)
+ usleep(1);
+
+ while (!ctx->stop && i++ < ctx->loop)
+ bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
+ return NULL;
+}
+
+static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
+{
+ unsigned int i;
+
+ ctx->stop = true;
+ ctx->start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+}
+
+void test_task_storage_map_stress_lookup(void)
+{
+#define MAX_NR_THREAD 4096
+ unsigned int i, nr = 256, loop = 8192, cpu = 0;
+ struct read_bpf_task_storage_busy *skel;
+ pthread_t tids[MAX_NR_THREAD];
+ struct lookup_ctx ctx;
+ cpu_set_t old, new;
+ const char *cfg;
+ int err;
+
+ cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
+ if (cfg) {
+ nr = atoi(cfg);
+ if (nr > MAX_NR_THREAD)
+ nr = MAX_NR_THREAD;
+ }
+ cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
+ if (cfg)
+ loop = atoi(cfg);
+ cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
+ if (cfg)
+ cpu = atoi(cfg);
+
+ skel = read_bpf_task_storage_busy__open_and_load();
+ err = libbpf_get_error(skel);
+ CHECK(err, "open_and_load", "error %d\n", err);
+
+ /* Only for a fully preemptible kernel */
+ if (!skel->kconfig->CONFIG_PREEMPTION) {
+ printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
+ read_bpf_task_storage_busy__destroy(skel);
+ skips++;
+ return;
+ }
+
+ /* Save the old affinity setting */
+ sched_getaffinity(getpid(), sizeof(old), &old);
+
+ /* Pinned on a specific CPU */
+ CPU_ZERO(&new);
+ CPU_SET(cpu, &new);
+ sched_setaffinity(getpid(), sizeof(new), &new);
+
+ ctx.start = false;
+ ctx.stop = false;
+ ctx.pid_fd = sys_pidfd_open(getpid(), 0);
+ ctx.map_fd = bpf_map__fd(skel->maps.task);
+ ctx.loop = loop;
+ for (i = 0; i < nr; i++) {
+ err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
+ if (err) {
+ abort_lookup(&ctx, tids, i);
+ CHECK(err, "pthread_create", "error %d\n", err);
+ goto out;
+ }
+ }
+
+ ctx.start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+
+ skel->bss->pid = getpid();
+ err = read_bpf_task_storage_busy__attach(skel);
+ CHECK(err, "attach", "error %d\n", err);
+
+ /* Trigger program */
+ sys_gettid();
+ skel->bss->pid = 0;
+
+ CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
+out:
+ read_bpf_task_storage_busy__destroy(skel);
+ /* Restore affinity setting */
+ sched_setaffinity(getpid(), sizeof(old), &old);
+ printf("%s:PASS\n", __func__);
+}
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 81084c1c2c23..2d4a58e4e39c 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It is about 128 bytes on x86_64 and 512 bytes on s390x, but allocate more to
+ * account for possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 768
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c
new file mode 100644
index 000000000000..caf36eb1d032
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Taken & modified from iproute2's libnetlink.c
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/socket.h>
+
+#include "netlink_helpers.h"
+
+static int rcvbuf = 1024 * 1024;
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+ if (rth->fd >= 0) {
+ close(rth->fd);
+ rth->fd = -1;
+ }
+}
+
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
+ int protocol)
+{
+ socklen_t addr_len;
+ int sndbuf = 32768;
+ int one = 1;
+
+ memset(rth, 0, sizeof(*rth));
+ rth->proto = protocol;
+ rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
+ if (rth->fd < 0) {
+ perror("Cannot open netlink socket");
+ return -1;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf, sizeof(sndbuf)) < 0) {
+ perror("SO_SNDBUF");
+ goto err;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
+ &rcvbuf, sizeof(rcvbuf)) < 0) {
+ perror("SO_RCVBUF");
+ goto err;
+ }
+
+ /* Older kernels may no support extended ACK reporting */
+ setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+
+ memset(&rth->local, 0, sizeof(rth->local));
+ rth->local.nl_family = AF_NETLINK;
+ rth->local.nl_groups = subscriptions;
+
+ if (bind(rth->fd, (struct sockaddr *)&rth->local,
+ sizeof(rth->local)) < 0) {
+ perror("Cannot bind netlink socket");
+ goto err;
+ }
+ addr_len = sizeof(rth->local);
+ if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
+ &addr_len) < 0) {
+ perror("Cannot getsockname");
+ goto err;
+ }
+ if (addr_len != sizeof(rth->local)) {
+ fprintf(stderr, "Wrong address length %d\n", addr_len);
+ goto err;
+ }
+ if (rth->local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "Wrong address family %d\n",
+ rth->local.nl_family);
+ goto err;
+ }
+ rth->seq = time(NULL);
+ return 0;
+err:
+ rtnl_close(rth);
+ return -1;
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+{
+ return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
+}
+
+static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(fd, msg, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+ if (len < 0) {
+ fprintf(stderr, "netlink receive error %s (%d)\n",
+ strerror(errno), errno);
+ return -errno;
+ }
+ if (len == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -ENODATA;
+ }
+ return len;
+}
+
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+ struct iovec *iov = msg->msg_iov;
+ char *buf;
+ int len;
+
+ iov->iov_base = NULL;
+ iov->iov_len = 0;
+
+ len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
+ if (len < 0)
+ return len;
+ if (len < 32768)
+ len = 32768;
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "malloc error: not enough buffer\n");
+ return -ENOMEM;
+ }
+ iov->iov_base = buf;
+ iov->iov_len = len;
+ len = __rtnl_recvmsg(fd, msg, 0);
+ if (len < 0) {
+ free(buf);
+ return len;
+ }
+ if (answer)
+ *answer = buf;
+ else
+ free(buf);
+ return len;
+}
+
+static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
+ nl_ext_ack_fn_t errfn)
+{
+ fprintf(stderr, "RTNETLINK answers: %s\n",
+ strerror(-err->error));
+}
+
+static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
+ size_t iovlen, struct nlmsghdr **answer,
+ bool show_rtnl_err, nl_ext_ack_fn_t errfn)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct iovec riov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ unsigned int seq = 0;
+ struct nlmsghdr *h;
+ int i, status;
+ char *buf;
+
+ for (i = 0; i < iovlen; i++) {
+ h = iov[i].iov_base;
+ h->nlmsg_seq = seq = ++rtnl->seq;
+ if (answer == NULL)
+ h->nlmsg_flags |= NLM_F_ACK;
+ }
+ status = sendmsg(rtnl->fd, &msg, 0);
+ if (status < 0) {
+ perror("Cannot talk to rtnetlink");
+ return -1;
+ }
+ /* change msg to use the response iov */
+ msg.msg_iov = &riov;
+ msg.msg_iovlen = 1;
+ i = 0;
+ while (1) {
+next:
+ status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+ ++i;
+ if (status < 0)
+ return status;
+ if (msg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "Sender address length == %d!\n",
+ msg.msg_namelen);
+ exit(1);
+ }
+ for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
+ int len = h->nlmsg_len;
+ int l = len - sizeof(*h);
+
+ if (l < 0 || len > status) {
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Truncated message!\n");
+ free(buf);
+ return -1;
+ }
+ fprintf(stderr,
+ "Malformed message: len=%d!\n",
+ len);
+ exit(1);
+ }
+ if (nladdr.nl_pid != 0 ||
+ h->nlmsg_pid != rtnl->local.nl_pid ||
+ h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) {
+ /* Don't forget to skip that message. */
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ continue;
+ }
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+ int error = err->error;
+
+ if (l < sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "ERROR truncated\n");
+ free(buf);
+ return -1;
+ }
+ if (error) {
+ errno = -error;
+ if (rtnl->proto != NETLINK_SOCK_DIAG &&
+ show_rtnl_err)
+ rtnl_talk_error(h, err, errfn);
+ }
+ if (i < iovlen) {
+ free(buf);
+ goto next;
+ }
+ if (error) {
+ free(buf);
+ return -i;
+ }
+ if (answer)
+ *answer = (struct nlmsghdr *)buf;
+ else
+ free(buf);
+ return 0;
+ }
+ if (answer) {
+ *answer = (struct nlmsghdr *)buf;
+ return 0;
+ }
+ fprintf(stderr, "Unexpected reply!\n");
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ }
+ free(buf);
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated!\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "Remnant of size %d!\n", status);
+ exit(1);
+ }
+ }
+}
+
+static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer, bool show_rtnl_err,
+ nl_ext_ack_fn_t errfn)
+{
+ struct iovec iov = {
+ .iov_base = n,
+ .iov_len = n->nlmsg_len,
+ };
+
+ return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn);
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+{
+ return __rtnl_talk(rtnl, n, answer, true, NULL);
+}
+
+int addattr(struct nlmsghdr *n, int maxlen, int type)
+{
+ return addattr_l(n, maxlen, type, NULL, 0);
+}
+
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u8));
+}
+
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u16));
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u32));
+}
+
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u64));
+}
+
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str)
+{
+ return addattr_l(n, maxlen, type, str, strlen(str)+1);
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
+ int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ if (alen)
+ memcpy(RTA_DATA(rta), data, alen);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+ return 0;
+}
+
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len)
+{
+ if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+
+ memcpy(NLMSG_TAIL(n), data, len);
+ memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
+ return 0;
+}
+
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
+{
+ struct rtattr *nest = NLMSG_TAIL(n);
+
+ addattr_l(n, maxlen, type, NULL, 0);
+ return nest;
+}
+
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
+{
+ nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest;
+ return n->nlmsg_len;
+}
diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h
new file mode 100644
index 000000000000..68116818a47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef NETLINK_HELPERS_H
+#define NETLINK_HELPERS_H
+
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle {
+ int fd;
+ struct sockaddr_nl local;
+ struct sockaddr_nl peer;
+ __u32 seq;
+ __u32 dump;
+ int proto;
+ FILE *dump_fp;
+#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
+#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02
+#define RTNL_HANDLE_F_STRICT_CHK 0x04
+ int flags;
+};
+
+#define NLMSG_TAIL(nmsg) \
+ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
+ const struct nlmsghdr *inner_nlh);
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+ __attribute__((warn_unused_result));
+void rtnl_close(struct rtnl_handle *rth);
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+ __attribute__((warn_unused_result));
+
+int addattr(struct nlmsghdr *n, int maxlen, int type);
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
+#endif /* NETLINK_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da0..0a6a5561bed3 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -1,18 +1,44 @@
// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <sched.h>
#include <arpa/inet.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/eventfd.h>
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/limits.h>
+
+#include <linux/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <net/if.h>
#include "bpf_util.h"
#include "network_helpers.h"
+#include "test_progs.h"
+
+#ifdef TRAFFIC_MONITOR
+/* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
+#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
+#include <pcap/pcap.h>
+#include <pcap/dlt.h>
+#endif
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(MSG, ...) ({ \
@@ -40,7 +66,9 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-static int settimeo(int fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int settimeo(int fd, int timeout_ms)
{
struct timeval timeout = { .tv_sec = 3 };
@@ -66,32 +94,42 @@ static int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
- int fd;
+ int on = 1, fd;
- if (make_sockaddr(family, addr_str, port, &addr, &len))
- return -1;
+ if (!opts)
+ opts = &default_opts;
- fd = socket(family, type, 0);
+ fd = socket(addr->ss_family, type, opts->proto);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (type == SOCK_STREAM &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
+ log_err("Failed to enable SO_REUSEADDR");
goto error_close;
+ }
+
+ if (opts->post_socket_cb &&
+ opts->post_socket_cb(fd, opts->cb_opts)) {
+ log_err("Failed to call post_socket_cb");
+ goto error_close;
+ }
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
if (type == SOCK_STREAM) {
- if (listen(fd, 1) < 0) {
+ if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
log_err("Failed to listed on socket");
goto error_close;
}
@@ -104,6 +142,90 @@ error_close:
return -1;
}
+int start_server_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (!opts)
+ opts = &default_opts;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return start_server_addr(type, &addr, addrlen, opts);
+}
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return start_server_str(family, type, addr_str, port, &opts);
+}
+
+static int reuseport_cb(int fd, void *opts)
+{
+ int on = 1;
+
+ return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .post_socket_cb = reuseport_cb,
+ };
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = start_server_addr(type, &addr, addrlen, &opts);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
+
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms)
{
@@ -141,24 +263,80 @@ error_close:
return -1;
}
-static int connect_fd_to_addr(int fd,
- const struct sockaddr_storage *addr,
- socklen_t addrlen)
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts)
+{
+ int fd;
+
+ if (!opts)
+ opts = &default_opts;
+
+ fd = socket(family, type, opts->proto);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->post_socket_cb &&
+ opts->post_socket_cb(fd, opts->cb_opts))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
+ int fd;
+
+ if (!opts)
+ opts = &default_opts;
+
+ fd = client_socket(addr->ss_family, type, opts);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
log_err("Failed to connect to server");
+ save_errno_close(fd);
return -1;
}
- return 0;
+ return fd;
}
-int connect_to_fd(int server_fd, int timeout_ms)
+int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (!opts)
+ opts = &default_opts;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return connect_to_addr(type, &addr, addrlen, opts);
+}
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
- struct sockaddr_in *addr_in;
socklen_t addrlen, optlen;
- int fd, type;
+ int type;
+
+ if (!opts)
+ opts = &default_opts;
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
@@ -172,24 +350,25 @@ int connect_to_fd(int server_fd, int timeout_ms)
return -1;
}
- addr_in = (struct sockaddr_in *)&addr;
- fd = socket(addr_in->sin_family, type, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (settimeo(fd, timeout_ms))
- goto error_close;
+ return connect_to_addr(type, &addr, addrlen, opts);
+}
- if (connect_fd_to_addr(fd, &addr, addrlen))
- goto error_close;
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+ socklen_t optlen;
+ int protocol;
- return fd;
+ optlen = sizeof(protocol);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+ log_err("getsockopt(SOL_PROTOCOL)");
+ return -1;
+ }
+ opts.proto = protocol;
-error_close:
- save_errno_close(fd);
- return -1;
+ return connect_to_fd_opts(server_fd, &opts);
}
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
@@ -205,8 +384,10 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return -1;
}
- if (connect_fd_to_addr(client_fd, &addr, len))
+ if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
+ log_err("Failed to connect to server");
return -1;
+ }
return 0;
}
@@ -217,6 +398,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
+ memset(addr, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
@@ -230,6 +412,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
+ memset(addr, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
@@ -240,6 +423,859 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (len)
*len = sizeof(*sin6);
return 0;
+ } else if (family == AF_UNIX) {
+ /* Note that we always use abstract unix sockets to avoid having
+ * to clean up leftover files.
+ */
+ struct sockaddr_un *sun = (void *)addr;
+
+ memset(addr, 0, sizeof(*sun));
+ sun->sun_family = family;
+ sun->sun_path[0] = 0;
+ strcpy(sun->sun_path + 1, addr_str);
+ if (len)
+ *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
+ return 0;
}
return -1;
}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
+ }
+ return "ping";
+}
+
+int append_tid(char *str, size_t sz)
+{
+ size_t end;
+
+ if (!str)
+ return -1;
+
+ end = strlen(str);
+ if (end + 8 > sz)
+ return -1;
+
+ sprintf(&str[end], "%07ld", sys_gettid());
+ str[end + 7] = '\0';
+
+ return 0;
+}
+
+int remove_netns(const char *name)
+{
+ char *cmd;
+ int r;
+
+ r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
+ if (r < 0) {
+ log_err("Failed to malloc cmd");
+ return -1;
+ }
+
+ r = system(cmd);
+ free(cmd);
+ return r;
+}
+
+int make_netns(const char *name)
+{
+ char *cmd;
+ int r;
+
+ r = asprintf(&cmd, "ip netns add %s", name);
+ if (r < 0) {
+ log_err("Failed to malloc cmd");
+ return -1;
+ }
+
+ r = system(cmd);
+ free(cmd);
+
+ if (r)
+ return r;
+
+ r = asprintf(&cmd, "ip -n %s link set lo up", name);
+ if (r < 0) {
+ log_err("Failed to malloc cmd for setting up lo");
+ remove_netns(name);
+ return -1;
+ }
+
+ r = system(cmd);
+ free(cmd);
+
+ return r;
+}
+
+struct nstoken {
+ int orig_netns_fd;
+};
+
+struct nstoken *open_netns(const char *name)
+{
+ int nsfd;
+ char nspath[PATH_MAX];
+ int err;
+ struct nstoken *token;
+
+ token = calloc(1, sizeof(struct nstoken));
+ if (!token) {
+ log_err("Failed to malloc token");
+ return NULL;
+ }
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (token->orig_netns_fd == -1) {
+ log_err("Failed to open(/proc/self/ns/net)");
+ goto fail;
+ }
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd == -1) {
+ log_err("Failed to open(%s)", nspath);
+ goto fail;
+ }
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ if (err) {
+ log_err("Failed to setns(nsfd)");
+ goto fail;
+ }
+
+ return token;
+fail:
+ if (token->orig_netns_fd != -1)
+ close(token->orig_netns_fd);
+ free(token);
+ return NULL;
+}
+
+void close_netns(struct nstoken *token)
+{
+ if (!token)
+ return;
+
+ if (setns(token->orig_netns_fd, CLONE_NEWNET))
+ log_err("Failed to setns(orig_netns_fd)");
+ close(token->orig_netns_fd);
+ free(token);
+}
+
+int open_tuntap(const char *dev_name, bool need_mac)
+{
+ int err = 0;
+ struct ifreq ifr;
+ int fd = open("/dev/net/tun", O_RDWR);
+
+ if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
+ return -1;
+
+ ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
+ strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
+ close(fd);
+ return -1;
+ }
+
+ err = fcntl(fd, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+int get_socket_local_port(int sock_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ int err;
+
+ err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
+ if (err < 0)
+ return err;
+
+ if (addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
+
+ return sin->sin_port;
+ } else if (addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
+
+ return sin->sin6_port;
+ }
+
+ return -1;
+}
+
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_GRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_SRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+struct send_recv_arg {
+ int fd;
+ uint32_t bytes;
+ int stop;
+};
+
+static void *send_recv_server(void *arg)
+{
+ struct send_recv_arg *a = (struct send_recv_arg *)arg;
+ ssize_t nr_sent = 0, bytes = 0;
+ char batch[1500];
+ int err = 0, fd;
+
+ fd = accept(a->fd, NULL, NULL);
+ while (fd == -1) {
+ if (errno == EINTR)
+ continue;
+ err = -errno;
+ goto done;
+ }
+
+ if (settimeo(fd, 0)) {
+ err = -errno;
+ goto done;
+ }
+
+ while (bytes < a->bytes && !READ_ONCE(a->stop)) {
+ nr_sent = send(fd, &batch,
+ MIN(a->bytes - bytes, sizeof(batch)), 0);
+ if (nr_sent == -1 && errno == EINTR)
+ continue;
+ if (nr_sent == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_sent;
+ }
+
+ if (bytes != a->bytes) {
+ log_err("send %zd expected %u", bytes, a->bytes);
+ if (!err)
+ err = bytes > a->bytes ? -E2BIG : -EINTR;
+ }
+
+done:
+ if (fd >= 0)
+ close(fd);
+ if (err) {
+ WRITE_ONCE(a->stop, 1);
+ return ERR_PTR(err);
+ }
+ return NULL;
+}
+
+int send_recv_data(int lfd, int fd, uint32_t total_bytes)
+{
+ ssize_t nr_recv = 0, bytes = 0;
+ struct send_recv_arg arg = {
+ .fd = lfd,
+ .bytes = total_bytes,
+ .stop = 0,
+ };
+ pthread_t srv_thread;
+ void *thread_ret;
+ char batch[1500];
+ int err = 0;
+
+ err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
+ if (err) {
+ log_err("Failed to pthread_create");
+ return err;
+ }
+
+ /* recv total_bytes */
+ while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
+ nr_recv = recv(fd, &batch,
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
+ if (nr_recv == -1 && errno == EINTR)
+ continue;
+ if (nr_recv == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_recv;
+ }
+
+ if (bytes != total_bytes) {
+ log_err("recv %zd expected %u", bytes, total_bytes);
+ if (!err)
+ err = bytes > total_bytes ? -E2BIG : -EINTR;
+ }
+
+ WRITE_ONCE(arg.stop, 1);
+ pthread_join(srv_thread, &thread_ret);
+ if (IS_ERR(thread_ret)) {
+ log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
+ err = err ? : PTR_ERR(thread_ret);
+ }
+
+ return err;
+}
+
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
+{
+ int ifindex, ret;
+
+ if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
+ "at least one program fd is valid"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+ .priority = 1, .prog_fd = ingress_fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+ .priority = 1, .prog_fd = egress_fd);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (ingress_fd >= 0) {
+ hook.attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(&hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ if (egress_fd >= 0) {
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(&hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+#ifdef TRAFFIC_MONITOR
+struct tmonitor_ctx {
+ pcap_t *pcap;
+ pcap_dumper_t *dumper;
+ pthread_t thread;
+ int wake_fd;
+
+ volatile bool done;
+ char pkt_fname[PATH_MAX];
+ int pcap_fd;
+};
+
+static int __base_pr(const char *format, va_list args)
+{
+ return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ tm_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+ return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+ tm_print_fn_t print_fn;
+ va_list args;
+
+ print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
+ return;
+
+ va_start(args, format);
+ print_fn(format, args);
+ va_end(args);
+}
+
+/* Is this packet captured with a Ethernet protocol type? */
+static bool is_ethernet(const u_char *packet)
+{
+ u16 arphdr_type;
+
+ memcpy(&arphdr_type, packet + 8, 2);
+ arphdr_type = ntohs(arphdr_type);
+
+ /* Except the following cases, the protocol type contains the
+ * Ethernet protocol type for the packet.
+ *
+ * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
+ */
+ switch (arphdr_type) {
+ case 770: /* ARPHRD_FRAD */
+ case 778: /* ARPHDR_IPGRE */
+ case 803: /* ARPHRD_IEEE80211_RADIOTAP */
+ tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
+ return false;
+ }
+ return true;
+}
+
+static const char * const pkt_types[] = {
+ "In",
+ "B", /* Broadcast */
+ "M", /* Multicast */
+ "C", /* Captured with the promiscuous mode */
+ "Out",
+};
+
+static const char *pkt_type_str(u16 pkt_type)
+{
+ if (pkt_type < ARRAY_SIZE(pkt_types))
+ return pkt_types[pkt_type];
+ return "Unknown";
+}
+
+#define MAX_FLAGS_STRLEN 21
+/* Show the information of the transport layer in the packet */
+static void show_transport(const u_char *packet, u16 len, u32 ifindex,
+ const char *src_addr, const char *dst_addr,
+ u16 proto, bool ipv6, u8 pkt_type)
+{
+ char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
+ const char *transport_str;
+ u16 src_port, dst_port;
+ struct udphdr *udp;
+ struct tcphdr *tcp;
+
+ ifname = if_indextoname(ifindex, _ifname);
+ if (!ifname) {
+ snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
+ ifname = _ifname;
+ }
+
+ if (proto == IPPROTO_UDP) {
+ udp = (struct udphdr *)packet;
+ src_port = ntohs(udp->source);
+ dst_port = ntohs(udp->dest);
+ transport_str = "UDP";
+ } else if (proto == IPPROTO_TCP) {
+ tcp = (struct tcphdr *)packet;
+ src_port = ntohs(tcp->source);
+ dst_port = ntohs(tcp->dest);
+ transport_str = "TCP";
+ } else if (proto == IPPROTO_ICMP) {
+ tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
+ return;
+ } else if (proto == IPPROTO_ICMPV6) {
+ tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
+ return;
+ } else {
+ tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+ ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+ src_addr, dst_addr, proto);
+ return;
+ }
+
+ /* TCP or UDP*/
+
+ if (proto == IPPROTO_TCP)
+ snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
+ tcp->fin ? ", FIN" : "",
+ tcp->syn ? ", SYN" : "",
+ tcp->rst ? ", RST" : "",
+ tcp->ack ? ", ACK" : "");
+
+ if (ipv6)
+ tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
+ else
+ tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
+}
+
+static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
+{
+ char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
+ struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
+ const char *src, *dst;
+ u_char proto;
+
+ src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
+ if (!src)
+ src = "<invalid>";
+ dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
+ if (!dst)
+ dst = "<invalid>";
+ proto = pkt->nexthdr;
+ show_transport(packet + sizeof(struct ipv6hdr),
+ ntohs(pkt->payload_len),
+ ifindex, src, dst, proto, true, pkt_type);
+}
+
+static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
+{
+ char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
+ struct iphdr *pkt = (struct iphdr *)packet;
+ const char *src, *dst;
+ u_char proto;
+
+ src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
+ if (!src)
+ src = "<invalid>";
+ dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
+ if (!dst)
+ dst = "<invalid>";
+ proto = pkt->protocol;
+ show_transport(packet + sizeof(struct iphdr),
+ ntohs(pkt->tot_len),
+ ifindex, src, dst, proto, false, pkt_type);
+}
+
+static void *traffic_monitor_thread(void *arg)
+{
+ char *ifname, _ifname[IF_NAMESIZE];
+ const u_char *packet, *payload;
+ struct tmonitor_ctx *ctx = arg;
+ pcap_dumper_t *dumper = ctx->dumper;
+ int fd = ctx->pcap_fd, nfds, r;
+ int wake_fd = ctx->wake_fd;
+ struct pcap_pkthdr header;
+ pcap_t *pcap = ctx->pcap;
+ u32 ifindex;
+ fd_set fds;
+ u16 proto;
+ u8 ptype;
+
+ nfds = (fd > wake_fd ? fd : wake_fd) + 1;
+ FD_ZERO(&fds);
+
+ while (!ctx->done) {
+ FD_SET(fd, &fds);
+ FD_SET(wake_fd, &fds);
+ r = select(nfds, &fds, NULL, NULL, NULL);
+ if (!r)
+ continue;
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ log_err("Fail to select on pcap fd and wake fd");
+ break;
+ }
+
+ /* This instance of pcap is non-blocking */
+ packet = pcap_next(pcap, &header);
+ if (!packet)
+ continue;
+
+ /* According to the man page of pcap_dump(), first argument
+ * is the pcap_dumper_t pointer even it's argument type is
+ * u_char *.
+ */
+ pcap_dump((u_char *)dumper, &header, packet);
+
+ /* Not sure what other types of packets look like. Here, we
+ * parse only Ethernet and compatible packets.
+ */
+ if (!is_ethernet(packet))
+ continue;
+
+ /* Skip SLL2 header
+ * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
+ *
+ * Although the document doesn't mention that, the payload
+ * doesn't include the Ethernet header. The payload starts
+ * from the first byte of the network layer header.
+ */
+ payload = packet + 20;
+
+ memcpy(&proto, packet, 2);
+ proto = ntohs(proto);
+ memcpy(&ifindex, packet + 4, 4);
+ ifindex = ntohl(ifindex);
+ ptype = packet[10];
+
+ if (proto == ETH_P_IPV6) {
+ show_ipv6_packet(payload, ifindex, ptype);
+ } else if (proto == ETH_P_IP) {
+ show_ipv4_packet(payload, ifindex, ptype);
+ } else {
+ ifname = if_indextoname(ifindex, _ifname);
+ if (!ifname) {
+ snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
+ ifname = _ifname;
+ }
+
+ tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+ ifname, pkt_type_str(ptype), proto);
+ }
+ }
+
+ return NULL;
+}
+
+/* Prepare the pcap handle to capture packets.
+ *
+ * This pcap is non-blocking and immediate mode is enabled to receive
+ * captured packets as soon as possible. The snaplen is set to 1024 bytes
+ * to limit the size of captured content. The format of the link-layer
+ * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
+ * technologies.
+ */
+static pcap_t *traffic_monitor_prepare_pcap(void)
+{
+ char errbuf[PCAP_ERRBUF_SIZE];
+ pcap_t *pcap;
+ int r;
+
+ /* Listen on all NICs in the namespace */
+ pcap = pcap_create("any", errbuf);
+ if (!pcap) {
+ log_err("Failed to open pcap: %s", errbuf);
+ return NULL;
+ }
+ /* Limit the size of the packet (first N bytes) */
+ r = pcap_set_snaplen(pcap, 1024);
+ if (r) {
+ log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
+ goto error;
+ }
+ /* To receive packets as fast as possible */
+ r = pcap_set_immediate_mode(pcap, 1);
+ if (r) {
+ log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
+ goto error;
+ }
+ r = pcap_setnonblock(pcap, 1, errbuf);
+ if (r) {
+ log_err("Failed to set nonblock: %s", errbuf);
+ goto error;
+ }
+ r = pcap_activate(pcap);
+ if (r) {
+ log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
+ goto error;
+ }
+ /* Determine the format of the link-layer header */
+ r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
+ if (r) {
+ log_err("Failed to set datalink: %s", pcap_geterr(pcap));
+ goto error;
+ }
+
+ return pcap;
+error:
+ pcap_close(pcap);
+ return NULL;
+}
+
+static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
+{
+ char *p;
+
+ if (subtest_name)
+ snprintf(buf, len, "%s__%s", test_name, subtest_name);
+ else
+ snprintf(buf, len, "%s", test_name);
+ while ((p = strchr(buf, '/')))
+ *p = '_';
+ while ((p = strchr(buf, ' ')))
+ *p = '_';
+}
+
+#define PCAP_DIR "/tmp/tmon_pcap"
+
+/* Start to monitor the network traffic in the given network namespace.
+ *
+ * netns: the name of the network namespace to monitor. If NULL, the
+ * current network namespace is monitored.
+ * test_name: the name of the running test.
+ * subtest_name: the name of the running subtest if there is. It should be
+ * NULL if it is not a subtest.
+ *
+ * This function will start a thread to capture packets going through NICs
+ * in the give network namespace.
+ */
+struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
+ const char *subtest_name)
+{
+ struct nstoken *nstoken = NULL;
+ struct tmonitor_ctx *ctx;
+ char test_name_buf[64];
+ static int tmon_seq;
+ int r;
+
+ if (netns) {
+ nstoken = open_netns(netns);
+ if (!nstoken)
+ return NULL;
+ }
+ ctx = malloc(sizeof(*ctx));
+ if (!ctx) {
+ log_err("Failed to malloc ctx");
+ goto fail_ctx;
+ }
+ memset(ctx, 0, sizeof(*ctx));
+
+ encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
+ snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
+ PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
+ test_name_buf, netns ? netns : "unknown");
+
+ r = mkdir(PCAP_DIR, 0755);
+ if (r && errno != EEXIST) {
+ log_err("Failed to create " PCAP_DIR);
+ goto fail_pcap;
+ }
+
+ ctx->pcap = traffic_monitor_prepare_pcap();
+ if (!ctx->pcap)
+ goto fail_pcap;
+ ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
+ if (ctx->pcap_fd < 0) {
+ log_err("Failed to get pcap fd");
+ goto fail_dumper;
+ }
+
+ /* Create a packet file */
+ ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
+ if (!ctx->dumper) {
+ log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
+ goto fail_dumper;
+ }
+
+ /* Create an eventfd to wake up the monitor thread */
+ ctx->wake_fd = eventfd(0, 0);
+ if (ctx->wake_fd < 0) {
+ log_err("Failed to create eventfd");
+ goto fail_eventfd;
+ }
+
+ r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
+ if (r) {
+ log_err("Failed to create thread");
+ goto fail;
+ }
+
+ close_netns(nstoken);
+
+ return ctx;
+
+fail:
+ close(ctx->wake_fd);
+
+fail_eventfd:
+ pcap_dump_close(ctx->dumper);
+ unlink(ctx->pkt_fname);
+
+fail_dumper:
+ pcap_close(ctx->pcap);
+
+fail_pcap:
+ free(ctx);
+
+fail_ctx:
+ close_netns(nstoken);
+
+ return NULL;
+}
+
+static void traffic_monitor_release(struct tmonitor_ctx *ctx)
+{
+ pcap_close(ctx->pcap);
+ pcap_dump_close(ctx->dumper);
+
+ close(ctx->wake_fd);
+
+ free(ctx);
+}
+
+/* Stop the network traffic monitor.
+ *
+ * ctx: the context returned by traffic_monitor_start()
+ */
+void traffic_monitor_stop(struct tmonitor_ctx *ctx)
+{
+ __u64 w = 1;
+
+ if (!ctx)
+ return;
+
+ /* Stop the monitor thread */
+ ctx->done = true;
+ /* Wake up the background thread. */
+ write(ctx->wake_fd, &w, sizeof(w));
+ pthread_join(ctx->thread, NULL);
+
+ tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+
+ traffic_monitor_release(ctx);
+}
+
+#endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba1..79a010c88e11 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -1,22 +1,47 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NETWORK_HELPERS_H
#define __NETWORK_HELPERS_H
+#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/types.h>
typedef __u16 __sum16;
#include <linux/if_ether.h>
#include <linux/if_packet.h>
+#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <linux/err.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
+#include <net/if.h>
+#include <stdio.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ int timeout_ms;
+ int proto;
+ /* +ve: Passed to listen() as-is.
+ * 0: Default when the test does not set
+ * a particular value during the struct init.
+ * It is changed to 1 before passing to listen().
+ * Most tests only have one on-going connection.
+ * -ve: It is changed to 0 before passing to listen().
+ * It is useful to force syncookie without
+ * changing the "tcp_syncookies" sysctl from 1 to 2.
+ */
+ int backlog;
+ int (*post_socket_cb)(int fd, void *opts);
+ void *cb_opts;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -33,13 +58,239 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
+int settimeo(int fd, int timeout_ms);
+int start_server_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
+void free_fds(int *fds, unsigned int nr_close_fds);
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts);
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
+int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts);
int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
+int get_socket_local_port(int sock_fd);
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+
+int open_tuntap(const char *dev_name, bool need_mac);
+
+struct nstoken;
+/**
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
+ */
+struct nstoken *open_netns(const char *name);
+void close_netns(struct nstoken *token);
+int send_recv_data(int lfd, int fd, uint32_t total_bytes);
+int make_netns(const char *name);
+int remove_netns(const char *name);
+
+/**
+ * append_tid() - Append thread ID to the given string.
+ *
+ * @str: string to extend
+ * @sz: string's size
+ *
+ * 8 characters are used to append the thread ID (7 digits + '\0')
+ *
+ * Returns -1 on errors, 0 otherwise
+ */
+int append_tid(char *str, size_t sz);
+
+static __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+
+ return (__u16)~csum;
+}
+
+static __wsum csum_partial(const void *buf, int len, __wsum sum)
+{
+ __u16 *p = (__u16 *)buf;
+ int num_u16 = len >> 1;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += p[i];
+
+ return sum;
+}
+
+static inline __sum16 build_ip_csum(struct iphdr *iph)
+{
+ __u32 sum = 0;
+ __u16 *p;
+
+ iph->check = 0;
+ p = (void *)iph;
+ sum = csum_partial(p, iph->ihl << 2, 0);
+
+ return csum_fold(sum);
+}
+
+/**
+ * csum_tcpudp_magic - compute IP pseudo-header checksum
+ *
+ * Compute the IPv4 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IP source address
+ * @daddr: IP dest address
+ * @len: IP data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __wsum csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+ s += htons(proto + len);
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+/**
+ * csum_ipv6_magic - compute IPv6 pseudo-header checksum
+ *
+ * Compute the ipv6 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IPv6 source address
+ * @daddr: IPv6 dest address
+ * @len: IPv6 data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto,
+ __wsum csum)
+{
+ __u64 s = csum;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ s += (__u32)saddr->s6_addr32[i];
+ for (i = 0; i < 4; i++)
+ s += (__u32)daddr->s6_addr32[i];
+ s += htons(proto + len);
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+/**
+ * build_udp_v4_csum - compute UDP checksum for UDP over IPv4
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IP
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @iph IP header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+
+static inline __sum16 build_udp_v4_csum(const struct iphdr *iph,
+ const struct udphdr *udph)
+{
+ unsigned long sum;
+
+ sum = csum_partial(udph, ntohs(udph->len), 0);
+ return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len),
+ IPPROTO_UDP, sum);
+}
+
+/**
+ * build_udp_v6_csum - compute UDP checksum for UDP over IPv6
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IPv6
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @ip6h IPv6 header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
+ const struct udphdr *udph)
+{
+ unsigned long sum;
+
+ sum = csum_partial(udph, ntohs(udph->len), 0);
+ return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len),
+ IPPROTO_UDP, sum);
+}
+
+struct tmonitor_ctx;
+
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
+/**
+ * tc_prog_attach - attach BPF program(s) to an interface
+ *
+ * Takes file descriptors pointing to at least one, at most two BPF
+ * programs, and attach those programs to an interface ingress, egress or
+ * both.
+ *
+ * @dev: string containing the interface name
+ * @ingress_fd: file descriptor of the program to attach to interface ingress
+ * @egress_fd: file descriptor of the program to attach to interface egress
+ *
+ * Returns 0 on success, -1 if no valid file descriptor has been found, if
+ * the interface name is invalid or if an error ocurred during attach.
+ */
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
+
+#ifdef TRAFFIC_MONITOR
+struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
+ const char *subtest_name);
+void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
+#else
+static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
+ const char *subtest_name)
+{
+ return NULL;
+}
+
+static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
+{
+}
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ return NULL;
+}
+#endif
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
new file mode 100644
index 000000000000..08131782437c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <test_progs.h>
+#include "test_access_variable_array.skel.h"
+
+void test_access_variable_array(void)
+{
+ struct test_access_variable_array *skel;
+
+ skel = test_access_variable_array__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_access_variable_array__open_and_load"))
+ return;
+
+ test_access_variable_array__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 5861446d0777..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -2,10 +2,11 @@
#include <test_progs.h>
#define MAX_INSNS 512
-#define MAX_MATCHES 16
+#define MAX_MATCHES 24
struct bpf_reg_match {
unsigned int line;
+ const char *reg;
const char *match;
};
@@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv2"},
- {2, "R3_w=inv4"},
- {3, "R3_w=inv8"},
- {4, "R3_w=inv16"},
- {5, "R3_w=inv32"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3", "2"},
+ {1, "R3", "4"},
+ {2, "R3", "8"},
+ {3, "R3", "16"},
+ {4, "R3", "32"},
},
},
{
@@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv1"},
- {2, "R3_w=inv2"},
- {3, "R3_w=inv4"},
- {4, "R3_w=inv8"},
- {5, "R3_w=inv16"},
- {6, "R3_w=inv1"},
- {7, "R4_w=inv32"},
- {8, "R4_w=inv16"},
- {9, "R4_w=inv8"},
- {10, "R4_w=inv4"},
- {11, "R4_w=inv2"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3", "1"},
+ {1, "R3", "2"},
+ {2, "R3", "4"},
+ {3, "R3", "8"},
+ {4, "R3", "16"},
+ {5, "R3", "1"},
+ {6, "R4", "32"},
+ {7, "R4", "16"},
+ {8, "R4", "8"},
+ {9, "R4", "4"},
+ {10, "R4", "2"},
},
},
{
@@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv4"},
- {2, "R3_w=inv8"},
- {3, "R3_w=inv10"},
- {4, "R4_w=inv8"},
- {5, "R4_w=inv12"},
- {6, "R4_w=inv14"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3", "4"},
+ {1, "R3", "8"},
+ {2, "R3", "10"},
+ {3, "R4", "8"},
+ {4, "R4", "12"},
+ {5, "R4", "14"},
},
},
{
@@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv7"},
- {2, "R3_w=inv7"},
- {3, "R3_w=inv14"},
- {4, "R3_w=inv56"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3", "7"},
+ {1, "R3", "7"},
+ {2, "R3", "14"},
+ {3, "R3", "56"},
},
},
@@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {18, "R3=pkt_end(id=0,off=0,imm=0)"},
- {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
- {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {6, "R0", "pkt(off=8,r=8)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R3", "var_off=(0x0; 0x1fe)"},
+ {8, "R3", "var_off=(0x0; 0x3fc)"},
+ {9, "R3", "var_off=(0x0; 0x7f8)"},
+ {10, "R3", "var_off=(0x0; 0xff0)"},
+ {12, "R3", "pkt_end()"},
+ {17, "R4", "var_off=(0x0; 0xff)"},
+ {18, "R4", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4", "var_off=(0x0; 0xff0)"},
+ {20, "R4", "var_off=(0x0; 0x7f8)"},
+ {21, "R4", "var_off=(0x0; 0x3fc)"},
+ {22, "R4", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
- {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R4", "var_off=(0x0; 0xff)"},
+ {8, "R4", "var_off=(0x0; 0xff)"},
+ {9, "R4", "var_off=(0x0; 0xff)"},
+ {10, "R4", "var_off=(0x0; 0x1fe)"},
+ {11, "R4", "var_off=(0x0; 0xff)"},
+ {12, "R4", "var_off=(0x0; 0x3fc)"},
+ {13, "R4", "var_off=(0x0; 0xff)"},
+ {14, "R4", "var_off=(0x0; 0x7f8)"},
+ {15, "R4", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
- {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
- {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
- {10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
- {10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
- {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
- {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {2, "R5", "pkt(r=0)"},
+ {4, "R5", "pkt(off=14,r=0)"},
+ {5, "R4", "pkt(off=14,r=0)"},
+ {9, "R2", "pkt(r=18)"},
+ {10, "R5", "pkt(off=14,r=18)"},
+ {10, "R4", "var_off=(0x0; 0xff)"},
+ {13, "R4", "var_off=(0x0; 0xffff)"},
+ {14, "R4", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -267,6 +268,7 @@ static struct bpf_align_test tests[] = {
*/
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
@@ -280,6 +282,7 @@ static struct bpf_align_test tests[] = {
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
@@ -296,59 +299,67 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {11, "R5", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
* offset is considered using reg->aux_off_align which
* is 4 and meets the load's requirements.
*/
- {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R4", "var_off=(0x0; 0x3fc)"},
+ {15, "R5", "var_off=(0x0; 0x3fc)"},
/* Variable offset is added to R5 packet pointer,
- * resulting in auxiliary alignment of 4.
+ * resulting in auxiliary alignment of 4. To avoid BPF
+ * verifier's precision backtracking logging
+ * interfering we also have a no-op R4 = R5
+ * instruction to validate R5 state. We also check
+ * that R4 is what it should be in such case.
*/
- {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R4", "var_off=(0x0; 0x3fc)"},
+ {18, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {19, "R5", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
* aligned, so the total offset is 4-byte aligned and
* meets the load's requirements.
*/
- {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {24, "R4", "var_off=(0x0; 0x3fc)"},
+ {24, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w=pkt(id=0,off=14,r=8"},
+ {26, "R5", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
- * variable offset of (4n).
+ * variable offset of (4n). See comment for insn #18
+ * for R4 = R5 trick.
*/
- {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R4", "var_off=(0x0; 0x3fc)"},
+ {28, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {29, "R5", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {31, "R4", "var_off=(0x0; 0x7fc)"},
+ {31, "R5", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
- {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {35, "R4", "var_off=(0x0; 0x7fc)"},
+ {35, "R5", "var_off=(0x0; 0x7fc)"},
},
},
{
@@ -386,36 +397,36 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {8, "R6", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {11, "R5", "var_off=(0x2; 0x7fc)"},
+ {12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {15, "R5", "var_off=(0x2; 0x7fc)"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {17, "R6", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {19, "R5", "var_off=(0x2; 0xffc)"},
+ {20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {23, "R5", "var_off=(0x2; 0xffc)"},
},
},
{
@@ -448,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ {3, "R5", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
- {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -467,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -502,24 +513,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2", "pkt(r=8)"},
+ {8, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {9, "R6", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R7", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
- {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {14, "R6", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
-
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
{
@@ -556,23 +566,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ {6, "R2", "pkt(r=8)"},
+ {9, "R6", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ {10, "R6", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ {14, "R7", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {16, "R5", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
};
@@ -594,16 +604,23 @@ static int do_test_single(struct bpf_align_test *test)
struct bpf_insn *prog = test->insns;
int prog_type = test->prog_type;
char bpf_vlog_copy[32768];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .prog_flags = BPF_F_STRICT_ALIGNMENT,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = 2,
+ );
+ const char *main_pass_start = "0: R1=ctx() R10=fp0";
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
+ char *start;
int fd_prog;
int ret;
prog_len = probe_filter_length(prog);
- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len, BPF_F_STRICT_ALIGNMENT,
- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+ fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, prog_len, &opts);
if (fd_prog < 0 && test->result != REJECT) {
printf("Failed to load program.\n");
printf("%s", bpf_vlog);
@@ -617,34 +634,63 @@ static int do_test_single(struct bpf_align_test *test)
ret = 0;
/* We make a local copy so that we can strtok() it */
strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
- line_ptr = strtok(bpf_vlog_copy, "\n");
+ start = strstr(bpf_vlog_copy, main_pass_start);
+ if (!start) {
+ ret = 1;
+ printf("Can't find initial line '%s'\n", main_pass_start);
+ goto out;
+ }
+ line_ptr = strtok(start, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
+ const char *p;
+ int tmp;
if (!m.match)
break;
while (line_ptr) {
cur_line = -1;
sscanf(line_ptr, "%u: ", &cur_line);
+ if (cur_line == -1)
+ sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line);
if (cur_line == m.line)
break;
line_ptr = strtok(NULL, "\n");
}
if (!line_ptr) {
- printf("Failed to find line %u for match: %s\n",
- m.line, m.match);
+ printf("Failed to find line %u for match: %s=%s\n",
+ m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
}
- if (!strstr(line_ptr, m.match)) {
- printf("Failed to find match %u: %s\n",
- m.line, m.match);
+ /* Check the next line as well in case the previous line
+ * did not have a corresponding bpf insn. Example:
+ * func#0 @0
+ * 0: R1=ctx() R10=fp0
+ * 0: (b7) r3 = 2 ; R3_w=2
+ *
+ * Sometimes it's actually two lines below, e.g. when
+ * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
+ * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+ */
+ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+ cur_line = -1;
+ line_ptr = strtok(NULL, "\n");
+ sscanf(line_ptr ?: "", "%u: ", &cur_line);
+ if (!line_ptr || cur_line != m.line)
+ break;
+ }
+ if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+ printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
}
}
+out:
if (fd_prog >= 0)
close(fd_prog);
}
@@ -661,6 +707,6 @@ void test_align(void)
if (!test__start_subtest(test->descr))
continue;
- CHECK_FAIL(do_test_single(test));
+ ASSERT_OK(do_test_single(test), test->descr);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
new file mode 100644
index 000000000000..d98577a6babc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_atomics.skel.h"
+
+static void test_add(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.add);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->add64_value, 3, "add64_value");
+ ASSERT_EQ(skel->arena->add64_result, 1, "add64_result");
+
+ ASSERT_EQ(skel->arena->add32_value, 3, "add32_value");
+ ASSERT_EQ(skel->arena->add32_result, 1, "add32_result");
+
+ ASSERT_EQ(skel->arena->add_stack_value_copy, 3, "add_stack_value");
+ ASSERT_EQ(skel->arena->add_stack_result, 1, "add_stack_result");
+
+ ASSERT_EQ(skel->arena->add_noreturn_value, 3, "add_noreturn_value");
+}
+
+static void test_sub(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.sub);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->sub64_value, -1, "sub64_value");
+ ASSERT_EQ(skel->arena->sub64_result, 1, "sub64_result");
+
+ ASSERT_EQ(skel->arena->sub32_value, -1, "sub32_value");
+ ASSERT_EQ(skel->arena->sub32_result, 1, "sub32_result");
+
+ ASSERT_EQ(skel->arena->sub_stack_value_copy, -1, "sub_stack_value");
+ ASSERT_EQ(skel->arena->sub_stack_result, 1, "sub_stack_result");
+
+ ASSERT_EQ(skel->arena->sub_noreturn_value, -1, "sub_noreturn_value");
+}
+
+static void test_and(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.and);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->and64_value, 0x010ull << 32, "and64_value");
+ ASSERT_EQ(skel->arena->and32_value, 0x010, "and32_value");
+}
+
+static void test_or(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.or);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->or64_value, 0x111ull << 32, "or64_value");
+ ASSERT_EQ(skel->arena->or32_value, 0x111, "or32_value");
+}
+
+static void test_xor(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xor);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xor64_value, 0x101ull << 32, "xor64_value");
+ ASSERT_EQ(skel->arena->xor32_value, 0x101, "xor32_value");
+}
+
+static void test_cmpxchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->cmpxchg64_value, 2, "cmpxchg64_value");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed");
+
+ ASSERT_EQ(skel->arena->cmpxchg32_value, 2, "lcmpxchg32_value");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
+}
+
+static void test_xchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xchg64_value, 2, "xchg64_value");
+ ASSERT_EQ(skel->arena->xchg64_result, 1, "xchg64_result");
+
+ ASSERT_EQ(skel->arena->xchg32_value, 2, "xchg32_value");
+ ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result");
+}
+
+static void test_uaf(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.uaf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
+}
+
+static void test_load_acquire(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.load_acquire);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->load_acquire8_result, 0x12,
+ "load_acquire8_result");
+ ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234,
+ "load_acquire16_result");
+ ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678,
+ "load_acquire32_result");
+ ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef,
+ "load_acquire64_result");
+}
+
+static void test_store_release(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.store_release);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->store_release8_result, 0x12,
+ "store_release8_result");
+ ASSERT_EQ(skel->arena->store_release16_result, 0x1234,
+ "store_release16_result");
+ ASSERT_EQ(skel->arena->store_release32_result, 0x12345678,
+ "store_release32_result");
+ ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef,
+ "store_release64_result");
+}
+
+void test_arena_atomics(void)
+{
+ struct arena_atomics *skel;
+ int err;
+
+ skel = arena_atomics__open();
+ if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
+ return;
+
+ if (skel->data->skip_all_tests) {
+ printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ err = arena_atomics__load(skel);
+ if (!ASSERT_OK(err, "arena atomics skeleton load"))
+ return;
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("add"))
+ test_add(skel);
+ if (test__start_subtest("sub"))
+ test_sub(skel);
+ if (test__start_subtest("and"))
+ test_and(skel);
+ if (test__start_subtest("or"))
+ test_or(skel);
+ if (test__start_subtest("xor"))
+ test_xor(skel);
+ if (test__start_subtest("cmpxchg"))
+ test_cmpxchg(skel);
+ if (test__start_subtest("xchg"))
+ test_xchg(skel);
+ if (test__start_subtest("uaf"))
+ test_uaf(skel);
+ if (test__start_subtest("load_acquire"))
+ test_load_acquire(skel);
+ if (test__start_subtest("store_release"))
+ test_store_release(skel);
+
+cleanup:
+ arena_atomics__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
new file mode 100644
index 000000000000..d69fd2465f53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
+#include "arena_htab_asm.skel.h"
+#include "arena_htab.skel.h"
+
+#include "bpf_arena_htab.h"
+
+static void test_arena_htab_common(struct htab *htab)
+{
+ int i;
+
+ printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets);
+ ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL");
+ for (i = 0; htab->buckets && i < 16; i += 4) {
+ /*
+ * Walk htab buckets and link lists since all pointers are correct,
+ * though they were written by bpf program.
+ */
+ int val = htab_lookup_elem(htab, i);
+
+ ASSERT_EQ(i, val, "key == value");
+ }
+}
+
+static void test_arena_htab_llvm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab *skel;
+ struct htab *htab;
+ size_t arena_sz;
+ void *area;
+ int ret;
+
+ skel = arena_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load"))
+ return;
+
+ area = bpf_map__initial_value(skel->maps.arena, &arena_sz);
+ /* fault-in a page with pgoff == 0 as sanity check */
+ *(volatile int *)area = 0x55aa;
+
+ /* bpf prog will allocate more pages */
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+out:
+ arena_htab__destroy(skel);
+}
+
+static void test_arena_htab_asm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab_asm *skel;
+ struct htab *htab;
+ int ret;
+
+ skel = arena_htab_asm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+ arena_htab_asm__destroy(skel);
+}
+
+void test_arena_htab(void)
+{
+ if (test__start_subtest("arena_htab_llvm"))
+ test_arena_htab_llvm();
+ if (test__start_subtest("arena_htab_asm"))
+ test_arena_htab_asm();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
new file mode 100644
index 000000000000..d15867cddde0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
+
+#include "bpf_arena_list.h"
+#include "arena_list.skel.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+static int list_sum(struct arena_list_head *head)
+{
+ struct elem __arena *n;
+ int sum = 0;
+
+ list_for_each_entry(n, head, node)
+ sum += n->value;
+ return sum;
+}
+
+static void test_arena_list_add_del(int cnt)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_list *skel;
+ int expected_sum = (u64)cnt * (cnt - 1) / 2;
+ int ret, sum;
+
+ skel = arena_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ return;
+
+ skel->bss->cnt = cnt;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, expected_sum, "sum of elems");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+ ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts);
+ ASSERT_OK(ret, "ret_del");
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, 0, "sum of list elems after del");
+ ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+out:
+ arena_list__destroy(skel);
+}
+
+void test_arena_list(void)
+{
+ if (test__start_subtest("arena_list_1"))
+ test_arena_list_add_del(1);
+ if (test__start_subtest("arena_list_1000"))
+ test_arena_list_add_del(1000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
new file mode 100644
index 000000000000..693fd86fbde6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+struct __qspinlock { int val; };
+typedef struct __qspinlock arena_spinlock_t;
+
+struct arena_qnode {
+ unsigned long next;
+ int count;
+ int locked;
+};
+
+#include "arena_spin_lock.skel.h"
+
+static long cpu;
+static int repeat;
+
+pthread_barrier_t barrier;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = repeat,
+ );
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity");
+
+ err = pthread_barrier_wait(&barrier);
+ if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0)
+ ASSERT_FALSE(true, "pthread_barrier");
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run err");
+
+ if (topts.retval == -EOPNOTSUPP)
+ goto end;
+
+ ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+
+end:
+ pthread_exit(arg);
+}
+
+static void test_arena_spin_lock_size(int size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct arena_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ int nthreads;
+ void *ret;
+
+ nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+ if (nthreads < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = arena_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
+ return;
+
+ if (skel->data->test_skip == 2) {
+ test__skip();
+ goto end;
+ }
+ skel->bss->cs_count = size;
+ skel->bss->limit = repeat * nthreads;
+
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
+
+ prog_fd = bpf_program__fd(skel->progs.prog);
+ for (i = 0; i < nthreads; i++) {
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end_barrier;
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end_barrier;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end_barrier;
+ }
+
+ if (skel->data->test_skip == 3) {
+ printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+ __func__);
+ test__skip();
+ goto end_barrier;
+ }
+
+ ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
+
+end_barrier:
+ pthread_barrier_destroy(&barrier);
+end:
+ arena_spin_lock__destroy(skel);
+ return;
+}
+
+void test_arena_spin_lock(void)
+{
+ repeat = 1000;
+ if (test__start_subtest("arena_spin_lock_1"))
+ test_arena_spin_lock_size(1);
+ cpu = 0;
+ if (test__start_subtest("arena_spin_lock_1000"))
+ test_arena_spin_lock_size(1000);
+ cpu = 0;
+ repeat = 100;
+ if (test__start_subtest("arena_spin_lock_50000"))
+ test_arena_spin_lock_size(50000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
new file mode 100644
index 000000000000..f81a0c066505
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_strsearch.skel.h"
+
+static void test_arena_str(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_strsearch *skel;
+ int ret;
+
+ skel = arena_strsearch__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ }
+ arena_strsearch__destroy(skel);
+}
+
+void test_arena_strsearch(void)
+{
+ if (test__start_subtest("arena_strsearch"))
+ test_arena_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
new file mode 100644
index 000000000000..e27d66b75fb1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void init_test_filter_set(struct test_filter_set *set)
+{
+ set->cnt = 0;
+ set->tests = NULL;
+}
+
+static void free_test_filter_set(struct test_filter_set *set)
+{
+ int i, j;
+
+ for (i = 0; i < set->cnt; i++) {
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
+ free(set->tests[i].subtests);
+ free(set->tests[i].name);
+ }
+
+ free(set->tests);
+ init_test_filter_set(set);
+}
+
+static void test_parse_test_list(void)
+{
+ struct test_filter_set set;
+
+ init_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "test filters count"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie",
+ &set,
+ true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true),
+ "parsing");
+ ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing");
+ ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 3, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("t", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name");
+ ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name");
+error:
+ free_test_filter_set(&set);
+}
+
+static void test_parse_test_list_file(void)
+{
+ struct test_filter_set set;
+ char tmpfile[80];
+ FILE *fp;
+ int fd;
+
+ snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX");
+ fd = mkstemp(tmpfile);
+ if (!ASSERT_GE(fd, 0, "create tmp"))
+ return;
+
+ fp = fdopen(fd, "w");
+ if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) {
+ close(fd);
+ goto out_remove;
+ }
+
+ fprintf(fp, "# comment\n");
+ fprintf(fp, " test_with_spaces \n");
+ fprintf(fp, "testA/subtest # comment\n");
+ fprintf(fp, "testB#comment with no space\n");
+ fprintf(fp, "testB # duplicate\n");
+ fprintf(fp, "testA/subtest # subtest duplicate\n");
+ fprintf(fp, "testA/subtest2\n");
+ fprintf(fp, "testC_no_eof_newline");
+ fflush(fp);
+
+ if (!ASSERT_OK(ferror(fp), "prepare tmp"))
+ goto out_fclose;
+
+ if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+ goto out_fclose;
+
+ init_test_filter_set(&set);
+
+ if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+ goto out_fclose;
+
+ if (!ASSERT_EQ(set.cnt, 4, "test count"))
+ goto out_free_set;
+
+ ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
+ ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count");
+ ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0");
+ ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1");
+ ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
+ ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+
+out_free_set:
+ free_test_filter_set(&set);
+out_fclose:
+ fclose(fp);
+out_remove:
+ remove(tmpfile);
+}
+
+void test_arg_parsing(void)
+{
+ if (test__start_subtest("test_parse_test_list"))
+ test_parse_test_list();
+ if (test__start_subtest("test_parse_test_list_file"))
+ test_parse_test_list_file();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/assign_reuse.c b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c
new file mode 100644
index 000000000000..989ee4d9785b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include "network_helpers.h"
+#include "test_assign_reuse.skel.h"
+
+#define NS_TEST "assign_reuse"
+#define LOOPBACK 1
+#define PORT 4443
+
+static int attach_reuseport(int sock_fd, int prog_fd)
+{
+ return setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+}
+
+static __u64 cookie(int fd)
+{
+ __u64 cookie = 0;
+ socklen_t cookie_len = sizeof(cookie);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len);
+ ASSERT_OK(ret, "cookie");
+ ASSERT_GT(cookie, 0, "cookie_invalid");
+
+ return cookie;
+}
+
+static int echo_test_udp(int fd_sv)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+ char buff[1] = {};
+ int fd_cl = -1, ret;
+
+ fd_cl = connect_to_fd(fd_sv, 100);
+ ASSERT_GT(fd_cl, 0, "create_client");
+ ASSERT_EQ(getsockname(fd_cl, (void *)&addr, &len), 0, "getsockname");
+
+ ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client");
+
+ ret = recv(fd_sv, buff, sizeof(buff), 0);
+ if (ret < 0) {
+ close(fd_cl);
+ return errno;
+ }
+
+ ASSERT_EQ(ret, 1, "recv_server");
+ ASSERT_EQ(sendto(fd_sv, buff, sizeof(buff), 0, (void *)&addr, len), 1, "send_server");
+ ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client");
+ close(fd_cl);
+ return 0;
+}
+
+static int echo_test_tcp(int fd_sv)
+{
+ char buff[1] = {};
+ int fd_cl = -1, fd_sv_cl = -1;
+
+ fd_cl = connect_to_fd(fd_sv, 100);
+ if (fd_cl < 0)
+ return errno;
+
+ fd_sv_cl = accept(fd_sv, NULL, NULL);
+ ASSERT_GE(fd_sv_cl, 0, "accept_fd");
+
+ ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client");
+ ASSERT_EQ(recv(fd_sv_cl, buff, sizeof(buff), 0), 1, "recv_server");
+ ASSERT_EQ(send(fd_sv_cl, buff, sizeof(buff), 0), 1, "send_server");
+ ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client");
+ close(fd_sv_cl);
+ close(fd_cl);
+ return 0;
+}
+
+void run_assign_reuse(int family, int sotype, const char *ip, __u16 port)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .ifindex = LOOPBACK,
+ .attach_point = BPF_TC_INGRESS,
+ );
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts,
+ .handle = 1,
+ .priority = 1,
+ );
+ bool hook_created = false, tc_attached = false;
+ int ret, fd_tc, fd_accept, fd_drop, fd_map;
+ int *fd_sv = NULL;
+ __u64 fd_val;
+ struct test_assign_reuse *skel;
+ const int zero = 0;
+
+ skel = test_assign_reuse__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->rodata->dest_port = port;
+
+ ret = test_assign_reuse__load(skel);
+ if (!ASSERT_OK(ret, "skel_load"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->sk_cookie_seen, 0, "cookie_init");
+
+ fd_tc = bpf_program__fd(skel->progs.tc_main);
+ fd_accept = bpf_program__fd(skel->progs.reuse_accept);
+ fd_drop = bpf_program__fd(skel->progs.reuse_drop);
+ fd_map = bpf_map__fd(skel->maps.sk_map);
+
+ fd_sv = start_reuseport_server(family, sotype, ip, port, 100, 1);
+ if (!ASSERT_NEQ(fd_sv, NULL, "start_reuseport_server"))
+ goto cleanup;
+
+ ret = attach_reuseport(*fd_sv, fd_drop);
+ if (!ASSERT_OK(ret, "attach_reuseport"))
+ goto cleanup;
+
+ fd_val = *fd_sv;
+ ret = bpf_map_update_elem(fd_map, &zero, &fd_val, BPF_NOEXIST);
+ if (!ASSERT_OK(ret, "bpf_sk_map"))
+ goto cleanup;
+
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (ret == 0)
+ hook_created = true;
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd_tc;
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+
+ if (sotype == SOCK_STREAM)
+ ASSERT_EQ(echo_test_tcp(*fd_sv), ECONNREFUSED, "drop_tcp");
+ else
+ ASSERT_EQ(echo_test_udp(*fd_sv), EAGAIN, "drop_udp");
+ ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once");
+
+ skel->bss->sk_cookie_seen = 0;
+ skel->bss->reuseport_executed = 0;
+ ASSERT_OK(attach_reuseport(*fd_sv, fd_accept), "attach_reuseport(accept)");
+
+ if (sotype == SOCK_STREAM)
+ ASSERT_EQ(echo_test_tcp(*fd_sv), 0, "echo_tcp");
+ else
+ ASSERT_EQ(echo_test_udp(*fd_sv), 0, "echo_udp");
+
+ ASSERT_EQ(skel->bss->sk_cookie_seen, cookie(*fd_sv),
+ "cookie_mismatch");
+ ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ ret = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ test_assign_reuse__destroy(skel);
+ free_fds(fd_sv, 1);
+}
+
+void test_assign_reuse(void)
+{
+ struct nstoken *tok = NULL;
+
+ SYS(out, "ip netns add %s", NS_TEST);
+ SYS(cleanup, "ip -net %s link set dev lo up", NS_TEST);
+
+ tok = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(tok, "netns token"))
+ return;
+
+ if (test__start_subtest("tcpv4"))
+ run_assign_reuse(AF_INET, SOCK_STREAM, "127.0.0.1", PORT);
+ if (test__start_subtest("tcpv6"))
+ run_assign_reuse(AF_INET6, SOCK_STREAM, "::1", PORT);
+ if (test__start_subtest("udpv4"))
+ run_assign_reuse(AF_INET, SOCK_DGRAM, "127.0.0.1", PORT);
+ if (test__start_subtest("udpv6"))
+ run_assign_reuse(AF_INET6, SOCK_DGRAM, "::1", PORT);
+
+cleanup:
+ close_netns(tok);
+ SYS_NOFAIL("ip netns delete %s", NS_TEST);
+out:
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
new file mode 100644
index 000000000000..118abc29b236
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "async_stack_depth.skel.h"
+
+void test_async_stack_depth(void)
+{
+ RUN_TESTS(async_stack_depth);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 21efe7bbf10d..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -2,24 +2,20 @@
#include <test_progs.h>
-#include "atomics.skel.h"
+#include "atomics.lskel.h"
-static void test_add(struct atomics *skel)
+static void test_add(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.add);
- if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.add.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.add);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
@@ -31,28 +27,20 @@ static void test_add(struct atomics *skel)
ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result");
ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
-
-cleanup:
- bpf_link__destroy(link);
}
-static void test_sub(struct atomics *skel)
+static void test_sub(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.sub);
- if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.sub.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.sub);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run sub",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
ASSERT_EQ(skel->bss->sub64_result, 1, "sub64_result");
@@ -64,27 +52,20 @@ static void test_sub(struct atomics *skel)
ASSERT_EQ(skel->bss->sub_stack_result, 1, "sub_stack_result");
ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
-
-cleanup:
- bpf_link__destroy(link);
}
-static void test_and(struct atomics *skel)
+static void test_and(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.and);
- if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.and.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.and);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run and",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
ASSERT_EQ(skel->bss->and64_result, 0x110ull << 32, "and64_result");
@@ -93,27 +74,20 @@ static void test_and(struct atomics *skel)
ASSERT_EQ(skel->bss->and32_result, 0x110, "and32_result");
ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
-cleanup:
- bpf_link__destroy(link);
}
-static void test_or(struct atomics *skel)
+static void test_or(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.or);
- if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.or.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.or);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run or",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
ASSERT_EQ(skel->bss->or64_result, 0x110ull << 32, "or64_result");
@@ -122,26 +96,20 @@ static void test_or(struct atomics *skel)
ASSERT_EQ(skel->bss->or32_result, 0x110, "or32_result");
ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
-cleanup:
- bpf_link__destroy(link);
}
-static void test_xor(struct atomics *skel)
+static void test_xor(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.xor);
- if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.xor.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.xor);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run xor",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
ASSERT_EQ(skel->bss->xor64_result, 0x110ull << 32, "xor64_result");
@@ -150,26 +118,20 @@ static void test_xor(struct atomics *skel)
ASSERT_EQ(skel->bss->xor32_result, 0x110, "xor32_result");
ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
-cleanup:
- bpf_link__destroy(link);
}
-static void test_cmpxchg(struct atomics *skel)
+static void test_cmpxchg(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.cmpxchg);
- if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.cmpxchg.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.cmpxchg);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
ASSERT_EQ(skel->bss->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
@@ -178,53 +140,49 @@ static void test_cmpxchg(struct atomics *skel)
ASSERT_EQ(skel->data->cmpxchg32_value, 2, "lcmpxchg32_value");
ASSERT_EQ(skel->bss->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
-
-cleanup:
- bpf_link__destroy(link);
}
-static void test_xchg(struct atomics *skel)
+static void test_xchg(struct atomics_lskel *skel)
{
int err, prog_fd;
- __u32 duration = 0, retval;
- struct bpf_link *link;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- link = bpf_program__attach(skel->progs.xchg);
- if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link)))
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.xchg.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
return;
-
- prog_fd = bpf_program__fd(skel->progs.xchg);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "test_run add",
- "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
- goto cleanup;
ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
ASSERT_EQ(skel->bss->xchg64_result, 1, "xchg64_result");
ASSERT_EQ(skel->data->xchg32_value, 2, "xchg32_value");
ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
-
-cleanup:
- bpf_link__destroy(link);
}
void test_atomics(void)
{
- struct atomics *skel;
- __u32 duration = 0;
+ struct atomics_lskel *skel;
+ int err;
- skel = atomics__open_and_load();
- if (CHECK(!skel, "skel_load", "atomics skeleton failed\n"))
+ skel = atomics_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
return;
+ skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = atomics_lskel__load(skel);
+ if (!ASSERT_OK(err, "atomics skeleton load"))
+ goto cleanup;
+
if (skel->data->skip_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
__func__);
test__skip();
goto cleanup;
}
+ skel->bss->pid = getpid();
if (test__start_subtest("add"))
test_add(skel);
@@ -242,5 +200,5 @@ void test_atomics(void)
test_xchg(skel);
cleanup:
- atomics__destroy(skel);
+ atomics_lskel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,105 +1,441 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_attach_kprobe_sleepable.skel.h"
+#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
}
-void test_attach_probe(void)
+/* attach point for byname uprobe */
+static noinline void trigger_func2(void)
+{
+ asm volatile ("");
+}
+
+/* attach point for byname sleepable uprobe */
+static noinline void trigger_func3(void)
+{
+ asm volatile ("");
+}
+
+/* attach point for ref_ctr */
+static noinline void trigger_func4(void)
{
- int duration = 0;
+ asm volatile ("");
+}
+
+static char test_data[] = "test_data";
+
+/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */
+static void test_attach_probe_manual(enum probe_attach_mode attach_mode)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
- struct test_attach_probe* skel;
- size_t uprobe_offset;
- ssize_t base_addr;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
- base_addr = get_base_addr();
- if (CHECK(base_addr < 0, "get_base_addr",
- "failed to find base addr: %zd", base_addr))
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
return;
- uprobe_offset = (size_t)&get_base_addr - base_addr;
- skel = test_attach_probe__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
- return;
- if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
goto cleanup;
- kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
- false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link)))
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = attach_mode;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
- kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
- true /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link)))
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link)))
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = attach_mode;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link)))
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
+ /* attach uprobe by function name manually */
+ uprobe_opts.func_name = "trigger_func2";
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = 0;
+ skel->links.handle_uprobe_byname =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+ 0 /* this pid */,
+ "/proc/self/exe",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+ goto cleanup;
+
/* trigger & validate kprobe && kretprobe */
usleep(1);
- if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+ /* trigger & validate uprobe & uretprobe */
+ trigger_func();
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
+ char path[PATH_MAX] = {0};
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
goto cleanup;
- if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ path,
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
- /* trigger & validate uprobe & uretprobe */
- get_base_addr();
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ path,
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
- if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
- "wrong uprobe res: %d\n", skel->bss->uprobe_res))
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
goto cleanup;
- if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
+ skel->links.handle_kprobe = kprobe_link;
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
goto cleanup;
+ skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
+static void test_attach_probe_auto(struct test_attach_probe *skel)
+{
+ struct bpf_link *uprobe_err_link;
+
+ /* auto-attachable kprobe and kretprobe */
+ skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+ skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ return;
+
+ /* verify auto-attach works */
+ skel->links.handle_uretprobe_byname =
+ bpf_program__attach(skel->progs.handle_uretprobe_byname);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
+ return;
+
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
+ ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+}
+
+static void test_uprobe_lib(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ FILE *devnull;
+
+ /* test attach by name for a library function, using the library
+ * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+ */
+ uprobe_opts.func_name = "fopen";
+ uprobe_opts.retprobe = false;
+ skel->links.handle_uprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+ 0 /* this pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
+ return;
+
+ uprobe_opts.func_name = "fclose";
+ uprobe_opts.retprobe = true;
+ skel->links.handle_uretprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+ -1 /* any pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
+ return;
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen("/dev/null", "r");
+ fclose(devnull);
+
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+}
+
+static void test_uprobe_ref_ctr(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ ssize_t uprobe_offset, ref_ctr_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func4);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr"))
+ return;
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
+
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr"))
+ return;
+ skel->links.handle_uprobe_ref_ctr = uprobe_link;
+
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr"))
+ return;
+ skel->links.handle_uretprobe_ref_ctr = uretprobe_link;
+}
+
+static void test_kprobe_sleepable(void)
+{
+ struct test_attach_kprobe_sleepable *skel;
+
+ skel = test_attach_kprobe_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open"))
+ return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel),
+ "skel_kprobe_sleepable_load"))
+ goto cleanup;
+
+ /* sleepable kprobes should not attach successfully */
+ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
+ ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable");
+
+cleanup:
+ test_attach_kprobe_sleepable__destroy(skel);
+}
+
+static void test_uprobe_sleepable(struct test_attach_probe *skel)
+{
+ /* test sleepable uprobe and uretprobe variants */
+ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
+ return;
+
+ skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
+ return;
+
+ skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
+ return;
+
+ skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
+ return;
+
+ skel->bss->user_ptr = test_data;
+
+ /* trigger & validate sleepable uprobe attached by name */
+ trigger_func3();
+
+ ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_str_sleepable_res, 10, "check_uprobe_byname3_str_sleepable_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_res, 11, "check_uprobe_byname3_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 12, "check_uretprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_str_sleepable_res, 13, "check_uretprobe_byname3_str_sleepable_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_res, 14, "check_uretprobe_byname3_res");
+}
+
+void test_attach_probe(void)
+{
+ struct test_attach_probe *skel;
+
+ skel = test_attach_probe__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
+ goto cleanup;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ if (test__start_subtest("manual-default"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT);
+ if (test__start_subtest("manual-legacy"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY);
+ if (test__start_subtest("manual-perf"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("manual-link"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LINK);
+
+ if (test__start_subtest("auto"))
+ test_attach_probe_auto(skel);
+ if (test__start_subtest("kprobe-sleepable"))
+ test_kprobe_sleepable();
+ if (test__start_subtest("uprobe-lib"))
+ test_uprobe_lib(skel);
+ if (test__start_subtest("uprobe-sleepable"))
+ test_uprobe_sleepable(skel);
+ if (test__start_subtest("uprobe-ref_ctr"))
+ test_uprobe_ref_ctr(skel);
+
+ if (test__start_subtest("uprobe-long_name"))
+ test_attach_uprobe_long_event_name();
+ if (test__start_subtest("kprobe-long_name"))
+ test_attach_kprobe_long_event_name();
+ if (test__start_subtest("kprobe-write-ctx"))
+ test_attach_kprobe_write_ctx();
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c
new file mode 100644
index 000000000000..dc5e01d279bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include "test_autoattach.skel.h"
+
+void test_autoattach(void)
+{
+ struct test_autoattach *skel;
+
+ skel = test_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto cleanup;
+
+ /* disable auto-attach for prog2 */
+ bpf_program__set_autoattach(skel->progs.prog2, false);
+ ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1");
+ ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2");
+ if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1");
+ ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2");
+
+cleanup:
+ test_autoattach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
new file mode 100644
index 000000000000..6a707213e46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "bad_struct_ops.skel.h"
+#include "bad_struct_ops2.skel.h"
+
+static void invalid_prog_reuse(void)
+{
+ struct bad_struct_ops *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops__load(skel);
+ log = stop_libbpf_log_capture();
+ ASSERT_ERR(err, "bad_struct_ops__load should fail");
+ ASSERT_HAS_SUBSTR(log,
+ "struct_ops init_kern testmod_2 func ptr test_1: invalid reuse of prog test_1",
+ "expected init_kern message");
+
+cleanup:
+ free(log);
+ bad_struct_ops__destroy(skel);
+}
+
+static void unused_program(void)
+{
+ struct bad_struct_ops2 *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops2__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops2__open"))
+ return;
+
+ /* struct_ops programs not referenced from any maps are open
+ * with autoload set to true.
+ */
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo autoload == true");
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops2__load(skel);
+ ASSERT_ERR(err, "bad_struct_ops2__load should fail");
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log, "prog 'foo': failed to load",
+ "message about 'foo' failing to load");
+
+cleanup:
+ free(log);
+ bad_struct_ops2__destroy(skel);
+}
+
+void test_bad_struct_ops(void)
+{
+ if (test__start_subtest("invalid_prog_reuse"))
+ invalid_prog_reuse();
+ if (test__start_subtest("unused_program"))
+ unused_program();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index d0f06e40c16d..f7cd129cb82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,12 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/capability.h>
-static int duration;
+#include "test_progs.h"
+#include "cap_helpers.h"
+#include "bind_perm.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
void try_bind(int family, int port, int expected_errno)
{
@@ -16,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
int fd = -1;
fd = socket(family, SOCK_STREAM, 0);
- if (CHECK(fd < 0, "fd", "errno %d", errno))
+ if (!ASSERT_GE(fd, 0, "socket"))
goto close_socket;
if (family == AF_INET) {
@@ -38,45 +47,18 @@ close_socket:
close(fd);
}
-bool cap_net_bind_service(cap_flag_value_t flag)
-{
- const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
- cap_flag_value_t original_value;
- bool was_effective = false;
- cap_t caps;
-
- caps = cap_get_proc();
- if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
- goto free_caps;
-
- if (CHECK(cap_get_flag(caps, CAP_NET_BIND_SERVICE, CAP_EFFECTIVE,
- &original_value),
- "cap_get_flag", "errno %d", errno))
- goto free_caps;
-
- was_effective = (original_value == CAP_SET);
-
- if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
- flag),
- "cap_set_flag", "errno %d", errno))
- goto free_caps;
-
- if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
- goto free_caps;
-
-free_caps:
- CHECK(cap_free(caps), "cap_free", "errno %d", errno);
- return was_effective;
-}
-
void test_bind_perm(void)
{
- bool cap_was_effective;
+ const __u64 net_bind_svc_cap = 1ULL << CAP_NET_BIND_SERVICE;
struct bind_perm *skel;
+ __u64 old_caps = 0;
int cgroup_fd;
+ if (create_netns())
+ return;
+
cgroup_fd = test__join_cgroup("/bind_perm");
- if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
skel = bind_perm__open_and_load();
@@ -91,7 +73,8 @@ void test_bind_perm(void)
if (!ASSERT_OK_PTR(skel, "bind_v6_prog"))
goto close_skeleton;
- cap_was_effective = cap_net_bind_service(CAP_CLEAR);
+ ASSERT_OK(cap_disable_effective(net_bind_svc_cap, &old_caps),
+ "cap_disable_effective");
try_bind(AF_INET, 110, EACCES);
try_bind(AF_INET6, 110, EACCES);
@@ -99,8 +82,9 @@ void test_bind_perm(void)
try_bind(AF_INET, 111, 0);
try_bind(AF_INET6, 111, 0);
- if (cap_was_effective)
- cap_net_bind_service(CAP_SET);
+ if (old_caps & net_bind_svc_cap)
+ ASSERT_OK(cap_enable_effective(net_bind_svc_cap, NULL),
+ "cap_enable_effective");
close_skeleton:
bind_perm__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
new file mode 100644
index 000000000000..42b49870e520
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <sys/syscall.h>
+#include <limits.h>
+#include <test_progs.h>
+#include "bloom_filter_map.skel.h"
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
+static void test_fail_cases(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ __u32 value = 0;
+ int fd, err;
+
+ /* Invalid key size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 4, sizeof(value), 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid key size"))
+ close(fd);
+
+ /* Invalid value size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, 0, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
+ close(fd);
+
+ /* Invalid value size: too big */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large"))
+ close(fd);
+
+ /* Invalid max entries size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
+ close(fd);
+
+ /* Bloom filter maps do not support BPF_F_NO_PREALLOC */
+ opts.map_flags = BPF_F_NO_PREALLOC;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid flags"))
+ close(fd);
+
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, NULL);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter"))
+ return;
+
+ /* Test invalid flags */
+ err = bpf_map_update_elem(fd, NULL, &value, -1);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, 10000);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ close(fd);
+}
+
+static void test_success_cases(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ char value[11];
+ int fd, err;
+
+ /* Create a map */
+ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ opts.numa_node = NUMA_NO_NODE;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
+ return;
+
+ /* Add a value to the bloom filter */
+ err = bpf_map_update_elem(fd, NULL, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"))
+ goto done;
+
+ /* Lookup a value in the bloom filter */
+ err = bpf_map_lookup_elem(fd, NULL, &value);
+ ASSERT_OK(err, "bpf_map_update_elem bloom filter success case");
+
+done:
+ close(fd);
+}
+
+static void check_bloom(struct bloom_filter_map *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.check_bloom);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->error, 0, "error");
+
+ bpf_link__destroy(link);
+}
+
+static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals,
+ __u32 nr_rand_vals)
+{
+ int outer_map_fd, inner_map_fd, err, i, key = 0;
+ struct bpf_link *link;
+
+ /* Create a bloom filter map that will be used as the inner map */
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(*rand_vals),
+ nr_rand_vals, NULL);
+ if (!ASSERT_GE(inner_map_fd, 0, "bpf_map_create bloom filter inner map"))
+ return;
+
+ for (i = 0; i < nr_rand_vals; i++) {
+ err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to inner_map_fd"))
+ goto done;
+ }
+
+ /* Add the bloom filter map to the outer map */
+ outer_map_fd = bpf_map__fd(skel->maps.outer_map);
+ err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "Add bloom filter map to outer map"))
+ goto done;
+
+ /* Attach the bloom_filter_inner_map prog */
+ link = bpf_program__attach(skel->progs.inner_map);
+ if (!ASSERT_OK_PTR(link, "link"))
+ goto delete_inner_map;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->error, 0, "error");
+
+ bpf_link__destroy(link);
+
+delete_inner_map:
+ /* Ensure the inner bloom filter map can be deleted */
+ err = bpf_map_delete_elem(outer_map_fd, &key);
+ ASSERT_OK(err, "Delete inner bloom filter map");
+
+done:
+ close(inner_map_fd);
+}
+
+static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals,
+ __u32 *out_nr_rand_vals)
+{
+ struct bloom_filter_map *skel;
+ int random_data_fd, bloom_fd;
+ __u32 *rand_vals = NULL;
+ __u32 map_size, val;
+ int err, i;
+
+ /* Set up a bloom filter map skeleton */
+ skel = bloom_filter_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load"))
+ return -EINVAL;
+
+ /* Set up rand_vals */
+ map_size = bpf_map__max_entries(skel->maps.map_random_data);
+ rand_vals = malloc(sizeof(*rand_vals) * map_size);
+ if (!rand_vals) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* Generate random values and populate both skeletons */
+ random_data_fd = bpf_map__fd(skel->maps.map_random_data);
+ bloom_fd = bpf_map__fd(skel->maps.map_bloom);
+ for (i = 0; i < map_size; i++) {
+ val = rand();
+
+ err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to map_random_data"))
+ goto error;
+
+ err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to map_bloom"))
+ goto error;
+
+ rand_vals[i] = val;
+ }
+
+ *out_skel = skel;
+ *out_rand_vals = rand_vals;
+ *out_nr_rand_vals = map_size;
+
+ return 0;
+
+error:
+ bloom_filter_map__destroy(skel);
+ if (rand_vals)
+ free(rand_vals);
+ return err;
+}
+
+void test_bloom_filter_map(void)
+{
+ __u32 *rand_vals = NULL, nr_rand_vals = 0;
+ struct bloom_filter_map *skel = NULL;
+ int err;
+
+ test_fail_cases();
+ test_success_cases();
+
+ err = setup_progs(&skel, &rand_vals, &nr_rand_vals);
+ if (err)
+ return;
+
+ test_inner_map(skel, rand_vals, nr_rand_vals);
+ free(rand_vals);
+
+ check_bloom(skel);
+
+ bloom_filter_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..75f4dff7d042
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+#include "test_bpf_cookie.skel.h"
+#include "kprobe_multi.skel.h"
+#include "uprobe_multi.skel.h"
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
+}
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void kprobe_multi_test_run(struct kprobe_multi *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+ ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+ ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+ ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+ ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+ ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+ ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+ ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+ ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+ ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+ ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+ ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+}
+
+static void kprobe_multi_link_api_subtest(void)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1;
+ struct kprobe_multi *skel = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ unsigned long long addrs[8];
+ __u64 cookies[8];
+
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ goto cleanup;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+#define GET_ADDR(__sym, __addr) ({ \
+ __addr = ksym_get_addr(__sym); \
+ if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr " #__sym)) \
+ goto cleanup; \
+})
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test3", addrs[1]);
+ GET_ADDR("bpf_fentry_test4", addrs[2]);
+ GET_ADDR("bpf_fentry_test5", addrs[3]);
+ GET_ADDR("bpf_fentry_test6", addrs[4]);
+ GET_ADDR("bpf_fentry_test7", addrs[5]);
+ GET_ADDR("bpf_fentry_test2", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+#undef GET_ADDR
+
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
+
+ opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+ opts.kprobe_multi.cookies = (const __u64 *) &cookies;
+ prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+ goto cleanup;
+
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
+
+ opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel);
+
+cleanup:
+ close(link1_fd);
+ close(link2_fd);
+ kprobe_multi__destroy(skel);
+}
+
+static void kprobe_multi_attach_api_subtest(void)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct kprobe_multi *skel = NULL;
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test2",
+ "bpf_fentry_test8",
+ };
+ __u64 cookies[8];
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = cookies;
+
+ link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
+
+ opts.retprobe = true;
+
+ link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ kprobe_multi__destroy(skel);
+}
+
+/* defined in prog_tests/uprobe_multi_test.c */
+void uprobe_multi_func_1(void);
+void uprobe_multi_func_2(void);
+void uprobe_multi_func_3(void);
+
+static void uprobe_multi_test_run(struct uprobe_multi *skel)
+{
+ skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
+ skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2;
+ skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result");
+}
+
+static void uprobe_multi_attach_api_subtest(void)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct uprobe_multi *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+ __u64 cookies[3];
+
+ cookies[0] = 3; /* uprobe_multi_func_1 */
+ cookies[1] = 1; /* uprobe_multi_func_2 */
+ cookies[2] = 2; /* uprobe_multi_func_3 */
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = &cookies[0];
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi"))
+ goto cleanup;
+
+ link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ cookies[0] = 2; /* uprobe_multi_func_1 */
+ cookies[1] = 3; /* uprobe_multi_func_2 */
+ cookies[2] = 1; /* uprobe_multi_func_3 */
+
+ opts.retprobe = true;
+ link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ uprobe_multi__destroy(skel);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ ssize_t uprobe_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ trigger_func();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.sample_period = 100000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+static int verify_tracing_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie");
+
+ return 0;
+}
+
+static void tracing_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, err;
+ int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+ skel->bss->fentry_res = 0;
+ skel->bss->fexit_res = 0;
+
+ cookie = 0x10000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+ link_opts.tracing.cookie = cookie;
+ fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts);
+ if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
+ goto cleanup;
+
+ err = verify_tracing_link_info(fentry_fd, cookie);
+ if (!ASSERT_OK(err, "verify_tracing_link_info"))
+ goto cleanup;
+
+ cookie = 0x20000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fexit_test1);
+ link_opts.tracing.cookie = cookie;
+ fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts);
+ if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create"))
+ goto cleanup;
+
+ cookie = 0x30000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ link_opts.tracing.cookie = cookie;
+ fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts);
+ if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+ bpf_prog_test_run_opts(prog_fd, &opts);
+
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ bpf_prog_test_run_opts(prog_fd, &opts);
+
+ ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res");
+ ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res");
+ ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res");
+
+cleanup:
+ if (fentry_fd >= 0)
+ close(fentry_fd);
+ if (fexit_fd >= 0)
+ close(fexit_fd);
+ if (fmod_ret_fd >= 0)
+ close(fmod_ret_fd);
+}
+
+int stack_mprotect(void);
+
+static void lsm_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd;
+ int lsm_fd = -1;
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ int err;
+
+ skel->bss->lsm_res = 0;
+
+ cookie = 0x90000000000090L;
+ prog_fd = bpf_program__fd(skel->progs.test_int_hook);
+ link_opts.tracing.cookie = cookie;
+ lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts);
+ if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create"))
+ goto cleanup;
+
+ err = stack_mprotect();
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res");
+
+cleanup:
+ if (lsm_fd >= 0)
+ close(lsm_fd);
+}
+
+static void tp_btf_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_trace_opts, trace_opts);
+
+ /* There are three different ways to attach tp_btf (BTF-aware raw
+ * tracepoint) programs. Let's test all of them.
+ */
+ prog_fd = bpf_program__fd(skel->progs.handle_tp_btf);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->tp_btf_res = 0;
+
+ raw_tp_opts.cookie = cookie = 0x11000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "raw_tp_open_res");
+
+ /* low-level generic bpf_link_create() API */
+ skel->bss->tp_btf_res = 0;
+
+ link_opts.tracing.cookie = cookie = 0x22000000000000L;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_RAW_TP, &link_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "link_create_res");
+
+ /* high-level bpf_link-based bpf_program__attach_trace_opts() API */
+ skel->bss->tp_btf_res = 0;
+
+ trace_opts.cookie = cookie = 0x33000000000000L;
+ link = bpf_program__attach_trace_opts(skel->progs.handle_tp_btf, &trace_opts);
+ if (!ASSERT_OK_PTR(link, "attach_trace_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "attach_trace_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
+static int verify_raw_tp_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie");
+
+ return 0;
+}
+
+static void raw_tp_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int err, prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
+
+ /* There are two different ways to attach raw_tp programs */
+ prog_fd = bpf_program__fd(skel->progs.handle_raw_tp);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->raw_tp_res = 0;
+
+ raw_tp_opts.tp_name = "sys_enter";
+ raw_tp_opts.cookie = cookie = 0x55000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+
+ err = verify_raw_tp_link_info(link_fd, cookie);
+ if (!ASSERT_OK(err, "verify_raw_tp_link_info"))
+ goto cleanup;
+
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "raw_tp_open_res");
+
+ /* high-level bpf_link-based bpf_program__attach_raw_tracepoint_opts() API */
+ skel->bss->raw_tp_res = 0;
+
+ opts.cookie = cookie = 0x66000000000000L;
+ link = bpf_program__attach_raw_tracepoint_opts(skel->progs.handle_raw_tp,
+ "sys_enter", &opts);
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "attach_raw_tp_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = sys_gettid();
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("multi_kprobe_link_api"))
+ kprobe_multi_link_api_subtest();
+ if (test__start_subtest("multi_kprobe_attach_api"))
+ kprobe_multi_attach_api_subtest();
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("multi_uprobe_attach_api"))
+ uprobe_multi_attach_api_subtest();
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+ if (test__start_subtest("trampoline"))
+ tracing_subtest(skel);
+ if (test__start_subtest("lsm"))
+ lsm_subtest(skel);
+ if (test__start_subtest("tp_btf"))
+ tp_btf_subtest(skel);
+ if (test__start_subtest("raw_tp"))
+ raw_tp_subtest(skel);
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
new file mode 100644
index 000000000000..d138cc7b1bda
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <sys/syscall.h>
+#include <bpf/bpf.h>
+
+#include "bpf_gotox.skel.h"
+
+static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .ctx_in = ctx_in,
+ .ctx_size_in = ctx_size_in,
+ );
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+}
+
+static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
+{
+ if (skel->data->skip)
+ test__skip();
+ else
+ check(skel);
+}
+
+static void check_simple(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->ret_user = 0;
+
+ __test_run(prog, &ctx_in, sizeof(ctx_in));
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+static void check_simple_fentry(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->in_user = ctx_in;
+ skel->bss->ret_user = 0;
+
+ /* trigger */
+ usleep(1);
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+/* validate that for two loads of the same jump table libbpf generates only one map */
+static void check_one_map_two_jumps(struct bpf_gotox *skel)
+{
+ struct bpf_prog_info prog_info;
+ struct bpf_map_info map_info;
+ __u32 len;
+ __u32 map_ids[16];
+ int prog_fd, map_fd;
+ int ret;
+ int i;
+ bool seen = false;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.map_ids = (long)map_ids;
+ prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
+ prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
+ return;
+
+ len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
+ return;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
+ return;
+
+ len = sizeof(map_info);
+ memset(&map_info, 0, len);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
+ close(map_fd);
+ return;
+ }
+
+ if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
+ if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
+ close(map_fd);
+ return;
+ }
+ seen = true;
+ }
+ close(map_fd);
+ }
+
+ ASSERT_EQ(seen, true, "no INSN_ARRAY map");
+}
+
+static void check_one_switch(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch, in[i], out[i]);
+}
+
+static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
+}
+
+static void check_two_switches(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.two_switches, in[i], out[i]);
+}
+
+static void check_big_jump_table(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 11, 27, 31, 22, 45, 99};
+ __u64 out[] = {2, 3, 4, 5, 19, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
+}
+
+static void check_one_jump_two_maps(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
+}
+
+static void check_static_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
+}
+
+static void check_nonstatic_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
+}
+
+static void check_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.simple_test_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_static_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_static_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+void test_bpf_gotox(void)
+{
+ struct bpf_gotox *skel;
+ int ret;
+
+ skel = bpf_gotox__open();
+ if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
+ return;
+
+ ret = bpf_gotox__load(skel);
+ if (!ASSERT_OK(ret, "bpf_gotox__load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("one-switch"))
+ __subtest(skel, check_one_switch);
+
+ if (test__start_subtest("one-switch-non-zero-sec-offset"))
+ __subtest(skel, check_one_switch_non_zero_sec_off);
+
+ if (test__start_subtest("two-switches"))
+ __subtest(skel, check_two_switches);
+
+ if (test__start_subtest("big-jump-table"))
+ __subtest(skel, check_big_jump_table);
+
+ if (test__start_subtest("static-global"))
+ __subtest(skel, check_static_global);
+
+ if (test__start_subtest("nonstatic-global"))
+ __subtest(skel, check_nonstatic_global);
+
+ if (test__start_subtest("other-sec"))
+ __subtest(skel, check_other_sec);
+
+ if (test__start_subtest("static-global-other-sec"))
+ __subtest(skel, check_static_global_other_sec);
+
+ if (test__start_subtest("nonstatic-global-other-sec"))
+ __subtest(skel, check_nonstatic_global_other_sec);
+
+ if (test__start_subtest("one-jump-two-maps"))
+ __subtest(skel, check_one_jump_two_maps);
+
+ if (test__start_subtest("one-map-two-jumps"))
+ __subtest(skel, check_one_map_two_jumps);
+
+ bpf_gotox__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
new file mode 100644
index 000000000000..269870bec941
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/bpf.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+static int map_create(__u32 map_type, __u32 max_entries)
+{
+ const char *map_name = "insn_array";
+ __u32 key_size = 4;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+
+ return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.fd_array = fd_array;
+ opts.fd_array_cnt = fd_array_cnt;
+
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
+}
+
+static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
+{
+ struct bpf_insn_array_value val = {};
+ int prog_fd = -1, map_fd, i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < insn_cnt; i++) {
+ val.orig_off = map_in[i];
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < insn_cnt; i++) {
+ char buf[64];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
+ ASSERT_EQ(val.xlated_off, map_out[i], buf);
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/*
+ * Load a program, which will not be anyhow mangled by the verifier. Add an
+ * insn_array map pointing to every instruction. Check that it hasn't changed
+ * after the program load.
+ */
+static void check_one_to_one_mapping(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 2, 3, 4, 5};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Load a program with two patches (get jiffies, for simplicity). Add an
+ * insn_array map pointing to every instruction. Check how it was changed
+ * after the program load.
+ */
+static void check_simple(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 4, 5, 8, 9};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Verifier can delete code in two cases: nops & dead code. From insn
+ * array's point of view, the two cases are the same, so test using
+ * the simplest method: by loading some nops
+ */
+static void check_deletions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, -1, 1, -1, 2, 3};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Same test as check_deletions, but also add code which adds instructions
+ */
+static void check_deletions_with_functions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10};
+ __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Try to load a program with a map which points to outside of the program
+ */
+static void check_out_of_bounds_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = ARRAY_SIZE(insns); /* too big */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+/*
+ * Try to load a program with a map which points to the middle of 16-bit insn
+ */
+static void check_mid_insn_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = 1; /* middle of 16-byte instruction */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+static void check_incorrect_index(void)
+{
+ check_out_of_bounds_index();
+ check_mid_insn_index();
+}
+
+static int set_bpf_jit_harden(char *level)
+{
+ char old_level;
+ int err = -1;
+ int fd = -1;
+
+ fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
+ return -1;
+ }
+
+ err = read(fd, &old_level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ err = write(fd, level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ err = 0;
+ *level = old_level;
+end:
+ if (fd >= 0)
+ close(fd);
+ return err;
+}
+
+static void check_blindness(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ char bpf_jit_harden = '@'; /* non-exizsting value */
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ bpf_jit_harden = '2';
+ if (set_bpf_jit_harden(&bpf_jit_harden)) {
+ bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ char fmt[32];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
+ ASSERT_EQ(val.xlated_off, i * 3, fmt);
+ }
+
+cleanup:
+ /* restore the old one */
+ if (bpf_jit_harden != '@')
+ set_bpf_jit_harden(&bpf_jit_harden);
+
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Once map was initialized, it should be frozen */
+static void check_load_unfrozen_map(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: now freeze the map, the program should load fine */
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Map can be used only by one BPF program */
+static void check_no_map_reuse(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd, extra_fd = -1;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable without fd_array */
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
+ goto cleanup;
+
+cleanup:
+ close(extra_fd);
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_no_lookup(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ insns[0].imm = map_fd;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable with normal map */
+ close(map_fd);
+ map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
+ insns[0].imm = map_fd;
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_side(void)
+{
+ check_bpf_no_lookup();
+}
+
+static void __test_bpf_insn_array(void)
+{
+ /* Test if offsets are adjusted properly */
+
+ if (test__start_subtest("one2one"))
+ check_one_to_one_mapping();
+
+ if (test__start_subtest("simple"))
+ check_simple();
+
+ if (test__start_subtest("deletions"))
+ check_deletions();
+
+ if (test__start_subtest("deletions-with-functions"))
+ check_deletions_with_functions();
+
+ if (test__start_subtest("blindness"))
+ check_blindness();
+
+ /* Check all kinds of operations and related restrictions */
+
+ if (test__start_subtest("incorrect-index"))
+ check_incorrect_index();
+
+ if (test__start_subtest("load-unfrozen-map"))
+ check_load_unfrozen_map();
+
+ if (test__start_subtest("no-map-reuse"))
+ check_no_map_reuse();
+
+ if (test__start_subtest("bpf-side-ops"))
+ check_bpf_side();
+}
+#else
+static void __test_bpf_insn_array(void)
+{
+ test__skip();
+}
+#endif
+
+void test_bpf_insn_array(void)
+{
+ __test_bpf_insn_array();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 74c45d557a2b..5225d69bf79b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1,18 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <task_local_storage_helpers.h>
#include "bpf_iter_ipv6_route.skel.h"
#include "bpf_iter_netlink.skel.h"
#include "bpf_iter_bpf_map.skel.h"
-#include "bpf_iter_task.skel.h"
+#include "bpf_iter_tasks.skel.h"
#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
-#include "bpf_iter_task_vma.skel.h"
+#include "bpf_iter_task_vmas.skel.h"
#include "bpf_iter_task_btf.skel.h"
#include "bpf_iter_tcp4.skel.h"
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
+#include "bpf_iter_vma_offset.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -25,39 +30,39 @@
#include "bpf_iter_bpf_sk_storage_map.skel.h"
#include "bpf_iter_test_kern5.skel.h"
#include "bpf_iter_test_kern6.skel.h"
-
-static int duration;
+#include "bpf_iter_bpf_link.skel.h"
+#include "bpf_iter_ksym.skel.h"
+#include "bpf_iter_sockmap.skel.h"
static void test_btf_id_or_null(void)
{
struct bpf_iter_test_kern3 *skel;
skel = bpf_iter_test_kern3__open_and_load();
- if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
- "skeleton open_and_load unexpectedly succeeded\n")) {
+ if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) {
bpf_iter_test_kern3__destroy(skel);
return;
}
}
-static void do_dummy_read(struct bpf_program *prog)
+static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
{
struct bpf_link *link;
char buf[16] = {};
int iter_fd, len;
- link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ link = bpf_program__attach_iter(prog, opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* not check contents, but ensure read() ends without error */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+ ASSERT_GE(len, 0, "read");
close(iter_fd);
@@ -65,6 +70,55 @@ free_link:
bpf_link__destroy(link);
}
+static void do_dummy_read(struct bpf_program *prog)
+{
+ do_dummy_read_opts(prog, NULL);
+}
+
+static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog,
+ struct bpf_map *map)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd, len;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(map);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_map_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_map_iter")) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ /* Close link and map fd prematurely */
+ bpf_link__destroy(link);
+ bpf_object__destroy_skeleton(*skel);
+ *skel = NULL;
+
+ /* Try to let map free work to run first if map is freed */
+ usleep(100);
+ /* Memory used by both sock map and sock local storage map are
+ * freed after two synchronize_rcu() calls, so wait for it
+ */
+ kern_sync_rcu();
+ kern_sync_rcu();
+
+ /* Read after both map fd and link fd are closed */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ ASSERT_GE(len, 0, "read_iterator");
+
+ close(iter_fd);
+}
+
static int read_fd_into_buffer(int fd, char *buf, int size)
{
int bufleft = size;
@@ -86,8 +140,7 @@ static void test_ipv6_route(void)
struct bpf_iter_ipv6_route *skel;
skel = bpf_iter_ipv6_route__open_and_load();
- if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load"))
return;
do_dummy_read(skel->progs.dump_ipv6_route);
@@ -100,8 +153,7 @@ static void test_netlink(void)
struct bpf_iter_netlink *skel;
skel = bpf_iter_netlink__open_and_load();
- if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load"))
return;
do_dummy_read(skel->progs.dump_netlink);
@@ -114,8 +166,7 @@ static void test_bpf_map(void)
struct bpf_iter_bpf_map *skel;
skel = bpf_iter_bpf_map__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load"))
return;
do_dummy_read(skel->progs.dump_bpf_map);
@@ -123,18 +174,236 @@ static void test_bpf_map(void)
bpf_iter_bpf_map__destroy(skel);
}
-static void test_task(void)
+static void check_bpf_link_info(const struct bpf_program *prog)
{
- struct bpf_iter_task *skel;
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link_info info = {};
+ struct bpf_link *link;
+ __u32 info_len;
+ int err;
- skel = bpf_iter_task__open_and_load();
- if (CHECK(!skel, "bpf_iter_task__open_and_load",
- "skeleton open_and_load failed\n"))
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
- do_dummy_read(skel->progs.dump_task);
+ info_len = sizeof(info);
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid");
+
+ bpf_link__destroy(link);
+}
+
+static pthread_mutex_t do_nothing_mutex;
- bpf_iter_task__destroy(skel);
+static void *do_nothing_wait(void *arg)
+{
+ pthread_mutex_lock(&do_nothing_mutex);
+ pthread_mutex_unlock(&do_nothing_mutex);
+
+ pthread_exit(arg);
+}
+
+static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
+ int *num_unknown, int *num_known)
+{
+ struct bpf_iter_tasks *skel;
+ pthread_t thread_id;
+ void *ret;
+
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
+ return;
+
+ ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
+
+ ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ skel->bss->tid = sys_gettid();
+
+ do_dummy_read_opts(skel->progs.dump_task, opts);
+
+ *num_unknown = skel->bss->num_unknown_tid;
+ *num_known = skel->bss->num_known_tid;
+
+ ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+ ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+ "pthread_join");
+
+ bpf_iter_tasks__destroy(skel);
+}
+
+static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
+{
+ int num_unknown_tid, num_known_tid;
+
+ test_task_common_nocheck(opts, &num_unknown_tid, &num_known_tid);
+ ASSERT_EQ(num_unknown_tid, num_unknown, "check_num_unknown_tid");
+ ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid");
+}
+
+static void *run_test_task_tid(void *arg)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ int num_unknown_tid, num_known_tid;
+
+ ASSERT_NEQ(getpid(), sys_gettid(), "check_new_thread_id");
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = sys_gettid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ test_task_common(&opts, 0, 1);
+
+ linfo.task.tid = 0;
+ linfo.task.pid = getpid();
+ /* This includes the parent thread, this thread, watchdog timer thread
+ * and the do_nothing_wait thread
+ */
+ test_task_common(&opts, 3, 1);
+
+ test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid);
+ ASSERT_GT(num_unknown_tid, 2, "check_num_unknown_tid");
+ ASSERT_EQ(num_known_tid, 1, "check_num_known_tid");
+
+ return NULL;
+}
+
+static void test_task_tid(void)
+{
+ pthread_t thread_id;
+
+ /* Create a new thread so pid and tid aren't the same */
+ ASSERT_OK(pthread_create(&thread_id, NULL, &run_test_task_tid, NULL),
+ "pthread_create");
+ ASSERT_FALSE(pthread_join(thread_id, NULL), "pthread_join");
+}
+
+static void test_task_pid(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_common(&opts, 2, 1);
+}
+
+static void test_task_pidfd(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ int pidfd;
+
+ pidfd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_GT(pidfd, 0, "sys_pidfd_open"))
+ return;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid_fd = pidfd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_common(&opts, 2, 1);
+
+ close(pidfd);
+}
+
+static void test_task_sleepable(void)
+{
+ struct bpf_iter_tasks *skel;
+ int pid, status, err, data_pipe[2], finish_pipe[2], c = 0;
+ char *test_data = NULL;
+ char *test_data_long = NULL;
+ char *data[2];
+
+ if (!ASSERT_OK(pipe(data_pipe), "data_pipe") ||
+ !ASSERT_OK(pipe(finish_pipe), "finish_pipe"))
+ return;
+
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
+ return;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ return;
+
+ if (pid == 0) {
+ /* child */
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
+
+ test_data = malloc(sizeof(char) * 10);
+ strncpy(test_data, "test_data", 10);
+ test_data[9] = '\0';
+
+ test_data_long = malloc(sizeof(char) * 5000);
+ for (int i = 0; i < 5000; ++i) {
+ if (i % 2 == 0)
+ test_data_long[i] = 'b';
+ else
+ test_data_long[i] = 'a';
+ }
+ test_data_long[4999] = '\0';
+
+ data[0] = test_data;
+ data[1] = test_data_long;
+
+ write(data_pipe[1], &data, sizeof(data));
+
+ /* keep child alive until after the test */
+ err = read(finish_pipe[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+ _exit(0);
+ }
+
+ /* parent */
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+
+ err = read(data_pipe[0], &data, sizeof(data));
+ ASSERT_EQ(err, sizeof(data), "read_check");
+
+ skel->bss->user_ptr = data[0];
+ skel->bss->user_ptr_long = data[1];
+ skel->bss->pid = pid;
+
+ do_dummy_read(skel->progs.dump_task_sleepable);
+
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+ "num_expected_failure_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+ "num_success_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+ "num_expected_failure_copy_from_user_task_str");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+ "num_success_copy_from_user_task_str");
+
+ bpf_iter_tasks__destroy(skel);
+
+ write(finish_pipe[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+ ASSERT_EQ(status, 0, "zero_child_exit");
+
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
}
static void test_task_stack(void)
@@ -142,47 +411,61 @@ static void test_task_stack(void)
struct bpf_iter_task_stack *skel;
skel = bpf_iter_task_stack__open_and_load();
- if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load"))
return;
do_dummy_read(skel->progs.dump_task_stack);
+ do_dummy_read(skel->progs.get_task_user_stacks);
- bpf_iter_task_stack__destroy(skel);
-}
+ ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
-static void *do_nothing(void *arg)
-{
- pthread_exit(arg);
+ bpf_iter_task_stack__destroy(skel);
}
static void test_task_file(void)
{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_task_file *skel;
+ union bpf_iter_link_info linfo;
pthread_t thread_id;
void *ret;
skel = bpf_iter_task_file__open_and_load();
- if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load"))
return;
skel->bss->tgid = getpid();
- if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
- "pthread_create", "pthread_create failed\n"))
- goto done;
+ ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
+
+ ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ do_dummy_read_opts(skel->progs.dump_task_file, &opts);
+
+ ASSERT_EQ(skel->bss->count, 0, "check_count");
+ ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+ skel->bss->last_tgid = 0;
+ skel->bss->count = 0;
+ skel->bss->unique_tgid_count = 0;
do_dummy_read(skel->progs.dump_task_file);
- if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
- "pthread_join", "pthread_join failed\n"))
- goto done;
+ ASSERT_EQ(skel->bss->count, 0, "check_count");
+ ASSERT_GT(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+ check_bpf_link_info(skel->progs.dump_task_file);
- CHECK(skel->bss->count != 0, "check_count",
- "invalid non pthread file visit count %d\n", skel->bss->count);
+ ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+ ASSERT_OK(pthread_join(thread_id, &ret), "pthread_join");
+ ASSERT_NULL(ret, "pthread_join");
-done:
bpf_iter_task_file__destroy(skel);
}
@@ -200,11 +483,11 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
int ret = 0;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return ret;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
@@ -215,12 +498,11 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
goto free_link;
}
- if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "read"))
goto free_link;
- CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
- "check for btf representation of task_struct in iter data",
- "struct task_struct not found");
+ ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
+ "check for btf representation of task_struct in iter data");
free_link:
if (iter_fd > 0)
close(iter_fd);
@@ -235,8 +517,7 @@ static void test_task_btf(void)
int ret;
skel = bpf_iter_task_btf__open_and_load();
- if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load"))
return;
bss = skel->bss;
@@ -245,12 +526,10 @@ static void test_task_btf(void)
if (ret)
goto cleanup;
- if (CHECK(bss->tasks == 0, "check if iterated over tasks",
- "no task iteration, did BPF program run?\n"))
+ if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?"))
goto cleanup;
- CHECK(bss->seq_err != 0, "check for unexpected err",
- "bpf_seq_printf_btf returned %ld", bss->seq_err);
+ ASSERT_EQ(bss->seq_err, 0, "check for unexpected err");
cleanup:
bpf_iter_task_btf__destroy(skel);
@@ -261,8 +540,7 @@ static void test_tcp4(void)
struct bpf_iter_tcp4 *skel;
skel = bpf_iter_tcp4__open_and_load();
- if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load"))
return;
do_dummy_read(skel->progs.dump_tcp4);
@@ -275,8 +553,7 @@ static void test_tcp6(void)
struct bpf_iter_tcp6 *skel;
skel = bpf_iter_tcp6__open_and_load();
- if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load"))
return;
do_dummy_read(skel->progs.dump_tcp6);
@@ -289,8 +566,7 @@ static void test_udp4(void)
struct bpf_iter_udp4 *skel;
skel = bpf_iter_udp4__open_and_load();
- if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load"))
return;
do_dummy_read(skel->progs.dump_udp4);
@@ -303,8 +579,7 @@ static void test_udp6(void)
struct bpf_iter_udp6 *skel;
skel = bpf_iter_udp6__open_and_load();
- if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load"))
return;
do_dummy_read(skel->progs.dump_udp6);
@@ -312,27 +587,38 @@ static void test_udp6(void)
bpf_iter_udp6__destroy(skel);
}
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
{
- int err = -1, len, read_buf_len, start;
+ int len, read_buf_len, start;
char buf[16] = {};
read_buf_len = read_one_char ? 1 : 16;
start = 0;
while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
start += len;
- if (CHECK(start >= 16, "read", "read len %d\n", len))
+ if (!ASSERT_LT(start, 16, "read"))
return -1;
read_buf_len = read_one_char ? 1 : 16 - start;
}
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
return -1;
- err = strcmp(buf, expected);
- if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
- buf, expected))
+ if (!ASSERT_STREQ(buf, expected, "read"))
return -1;
return 0;
@@ -345,19 +631,17 @@ static void test_anon_iter(bool read_one_char)
int iter_fd, err;
skel = bpf_iter_test_kern1__open_and_load();
- if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load"))
return;
err = bpf_iter_test_kern1__attach(skel);
- if (CHECK(err, "bpf_iter_test_kern1__attach",
- "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) {
goto out;
}
link = skel->links.dump_task;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto out;
do_read_with_fd(iter_fd, "abcd", read_one_char);
@@ -372,8 +656,7 @@ static int do_read(const char *path, const char *expected)
int err, iter_fd;
iter_fd = open(path, O_RDONLY);
- if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
- path, strerror(errno)))
+ if (!ASSERT_GE(iter_fd, 0, "open"))
return -1;
err = do_read_with_fd(iter_fd, expected, false);
@@ -390,19 +673,18 @@ static void test_file_iter(void)
int err;
skel1 = bpf_iter_test_kern1__open_and_load();
- if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load"))
return;
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
/* unlink this path if it exists. */
unlink(path);
err = bpf_link__pin(link, path);
- if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ if (!ASSERT_OK(err, "pin_iter"))
goto free_link;
err = do_read(path, "abcd");
@@ -414,12 +696,11 @@ static void test_file_iter(void)
* should change.
*/
skel2 = bpf_iter_test_kern2__open_and_load();
- if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load"))
goto unlink_path;
err = bpf_link__update_program(link, skel2->progs.dump_task);
- if (CHECK(err, "update_prog", "update_prog failed\n"))
+ if (!ASSERT_OK(err, "update_prog"))
goto destroy_skel2;
do_read(path, "ABCD");
@@ -445,8 +726,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
char *buf;
skel = bpf_iter_test_kern4__open();
- if (CHECK(!skel, "bpf_iter_test_kern4__open",
- "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open"))
return;
/* create two maps: bpf program will only do bpf_seq_write
@@ -454,13 +734,11 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
* fills seq_file buffer and then the other will trigger
* overflow and needs restart.
*/
- map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map1_fd < 0, "bpf_create_map",
- "map_creation failed: %s\n", strerror(errno)))
+ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
goto out;
- map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map2_fd < 0, "bpf_create_map",
- "map_creation failed: %s\n", strerror(errno)))
+ map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
goto free_map1;
/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -482,34 +760,32 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
}
skel->rodata->ret1 = ret1;
- if (CHECK(bpf_iter_test_kern4__load(skel),
- "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+ if (!ASSERT_OK(bpf_iter_test_kern4__load(skel),
+ "bpf_iter_test_kern4__load"))
goto free_map2;
/* setup filtering map_id in bpf program */
map_info_len = sizeof(map_info);
- err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map1_id = map_info.id;
- err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map2_id = map_info.id;
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto free_map2;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
buf = malloc(expected_read_len);
- if (!buf)
+ if (!ASSERT_OK_PTR(buf, "malloc"))
goto close_iter;
/* do read */
@@ -518,16 +794,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- CHECK(len != -1 || errno != E2BIG, "read",
- "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
- len, strerror(errno));
+ ASSERT_EQ(len, -1, "read");
+ ASSERT_EQ(errno, E2BIG, "read");
goto free_buf;
} else if (!ret1) {
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
} else {
do {
@@ -536,27 +810,20 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
total_read_len += len;
} while (len > 0 || len == -EAGAIN);
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
}
- if (CHECK(total_read_len != expected_read_len, "read",
- "total len %u, expected len %u\n", total_read_len,
- expected_read_len))
+ if (!ASSERT_EQ(total_read_len, expected_read_len, "read"))
goto free_buf;
- if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
- "expected 1 actual %d\n", skel->bss->map1_accessed))
+ if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed"))
goto free_buf;
- if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
- "expected 2 actual %d\n", skel->bss->map2_accessed))
+ if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed"))
goto free_buf;
- CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
- "map2_seqnum", "two different seqnum %lld %lld\n",
- skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+ ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum");
free_buf:
free(buf);
@@ -574,7 +841,7 @@ out:
static void test_bpf_hash_map(void)
{
- __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ __u32 expected_key_a = 0, expected_key_b = 0;
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_hash_map *skel;
int err, i, len, map_fd, iter_fd;
@@ -589,15 +856,13 @@ static void test_bpf_hash_map(void)
char buf[64];
skel = bpf_iter_bpf_hash_map__open();
- if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
- "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open"))
return;
skel->bss->in_test_mode = true;
err = bpf_iter_bpf_hash_map__load(skel);
- if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
- "skeleton load failed\n"))
+ if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load"))
goto out;
/* iterator with hashmap2 and hashmap3 should fail */
@@ -606,14 +871,12 @@ static void test_bpf_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap2 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap3 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
/* hashmap1 should be good, update map values here */
@@ -625,41 +888,40 @@ static void test_bpf_hash_map(void)
val = i + 4;
expected_key_a += key.a;
expected_key_b += key.b;
- expected_key_c += key.c;
expected_val += val;
err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
+ /* Sleepable program is prohibited for hash map iterator */
+ linfo.map.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.sleepable_dummy_dump, &opts);
+ if (!ASSERT_ERR_PTR(link, "attach_sleepable_prog_to_iter"))
+ goto out;
+
linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
- if (CHECK(skel->bss->key_sum_a != expected_key_a,
- "key_sum_a", "got %u expected %u\n",
- skel->bss->key_sum_a, expected_key_a))
+ if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
goto close_iter;
- if (CHECK(skel->bss->key_sum_b != expected_key_b,
- "key_sum_b", "got %u expected %u\n",
- skel->bss->key_sum_b, expected_key_b))
+ if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
goto close_iter;
- if (CHECK(skel->bss->val_sum != expected_val,
- "val_sum", "got %llu expected %llu\n",
- skel->bss->val_sum, expected_val))
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
goto close_iter;
close_iter:
@@ -672,7 +934,7 @@ out:
static void test_bpf_percpu_hash_map(void)
{
- __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ __u32 expected_key_a = 0, expected_key_b = 0;
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_percpu_hash_map *skel;
int err, i, j, len, map_fd, iter_fd;
@@ -687,18 +949,17 @@ static void test_bpf_percpu_hash_map(void)
char buf[64];
void *val;
- val = malloc(8 * bpf_num_possible_cpus());
-
skel = bpf_iter_bpf_percpu_hash_map__open();
- if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
- "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open"))
return;
skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_hash_map__load(skel);
- if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
- "skeleton load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
goto out;
/* update map values here */
@@ -709,7 +970,6 @@ static void test_bpf_percpu_hash_map(void)
key.c = i + 3;
expected_key_a += key.a;
expected_key_b += key.b;
- expected_key_c += key.c;
for (j = 0; j < bpf_num_possible_cpus(); j++) {
*(__u32 *)(val + j * 8) = i + j;
@@ -717,7 +977,7 @@ static void test_bpf_percpu_hash_map(void)
}
err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
@@ -726,31 +986,25 @@ static void test_bpf_percpu_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
- if (CHECK(skel->bss->key_sum_a != expected_key_a,
- "key_sum_a", "got %u expected %u\n",
- skel->bss->key_sum_a, expected_key_a))
+ if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
goto close_iter;
- if (CHECK(skel->bss->key_sum_b != expected_key_b,
- "key_sum_b", "got %u expected %u\n",
- skel->bss->key_sum_b, expected_key_b))
+ if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
goto close_iter;
- if (CHECK(skel->bss->val_sum != expected_val,
- "val_sum", "got %u expected %u\n",
- skel->bss->val_sum, expected_val))
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
goto close_iter;
close_iter:
@@ -759,23 +1013,23 @@ free_link:
bpf_link__destroy(link);
out:
bpf_iter_bpf_percpu_hash_map__destroy(skel);
+ free(val);
}
static void test_bpf_array_map(void)
{
__u64 val, expected_val = 0, res_first_val, first_val = 0;
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
- __u32 expected_key = 0, res_first_key;
+ __u32 key, expected_key = 0, res_first_key;
+ int err, i, map_fd, hash_fd, iter_fd;
struct bpf_iter_bpf_array_map *skel;
union bpf_iter_link_info linfo;
- int err, i, map_fd, iter_fd;
struct bpf_link *link;
char buf[64] = {};
int len, start;
skel = bpf_iter_bpf_array_map__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
return;
map_fd = bpf_map__fd(skel->maps.arraymap1);
@@ -788,7 +1042,7 @@ static void test_bpf_array_map(void)
first_val = val;
err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
@@ -797,46 +1051,46 @@ static void test_bpf_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
start = 0;
while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
start += len;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
res_first_key = *(__u32 *)buf;
res_first_val = *(__u64 *)(buf + sizeof(__u32));
- if (CHECK(res_first_key != 0 || res_first_val != first_val,
- "bpf_seq_write",
- "seq_write failure: first key %u vs expected 0, "
- " first value %llu vs expected %llu\n",
- res_first_key, res_first_val, first_val))
+ if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+ !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
goto close_iter;
- if (CHECK(skel->bss->key_sum != expected_key,
- "key_sum", "got %u expected %u\n",
- skel->bss->key_sum, expected_key))
+ if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
goto close_iter;
- if (CHECK(skel->bss->val_sum != expected_val,
- "val_sum", "got %llu expected %llu\n",
- skel->bss->val_sum, expected_val))
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
goto close_iter;
+ hash_fd = bpf_map__fd(skel->maps.hashmap1);
for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
err = bpf_map_lookup_elem(map_fd, &i, &val);
- if (CHECK(err, "map_lookup", "map_lookup failed\n"))
- goto out;
- if (CHECK(i != val, "invalid_val",
- "got value %llu expected %u\n", val, i))
- goto out;
+ if (!ASSERT_OK(err, "map_lookup arraymap1"))
+ goto close_iter;
+ if (!ASSERT_EQ(i, val, "invalid_val arraymap1"))
+ goto close_iter;
+
+ val = i + 4;
+ err = bpf_map_lookup_elem(hash_fd, &val, &key);
+ if (!ASSERT_OK(err, "map_lookup hashmap1"))
+ goto close_iter;
+ if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1"))
+ goto close_iter;
}
close_iter:
@@ -847,6 +1101,20 @@ out:
bpf_iter_bpf_array_map__destroy(skel);
}
+static void test_bpf_array_map_iter_fd(void)
+{
+ struct bpf_iter_bpf_array_map *skel;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map,
+ skel->maps.arraymap1);
+
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
static void test_bpf_percpu_array_map(void)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -859,18 +1127,17 @@ static void test_bpf_percpu_array_map(void)
void *val;
int len;
- val = malloc(8 * bpf_num_possible_cpus());
-
skel = bpf_iter_bpf_percpu_array_map__open();
- if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
- "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open"))
return;
skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_array_map__load(skel);
- if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
- "skeleton load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
goto out;
/* update map values here */
@@ -884,7 +1151,7 @@ static void test_bpf_percpu_array_map(void)
}
err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
@@ -893,27 +1160,23 @@ static void test_bpf_percpu_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
- if (CHECK(skel->bss->key_sum != expected_key,
- "key_sum", "got %u expected %u\n",
- skel->bss->key_sum, expected_key))
+ if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
goto close_iter;
- if (CHECK(skel->bss->val_sum != expected_val,
- "val_sum", "got %u expected %u\n",
- skel->bss->val_sum, expected_val))
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
goto close_iter;
close_iter:
@@ -922,6 +1185,7 @@ free_link:
bpf_link__destroy(link);
out:
bpf_iter_bpf_percpu_array_map__destroy(skel);
+ free(val);
}
/* An iterator program deletes all local storage in a map. */
@@ -937,17 +1201,17 @@ static void test_bpf_sk_storage_delete(void)
char buf[64];
skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
return;
map_fd = bpf_map__fd(skel->maps.sk_stg_map);
sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
+
err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
memset(&linfo, 0, sizeof(linfo));
@@ -956,24 +1220,29 @@ static void test_bpf_sk_storage_delete(void)
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
&opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
- goto close_iter;
+
+ /* Note: The following assertions serve to ensure
+ * the value was deleted. It does so by asserting
+ * that bpf_map_lookup_elem has failed. This might
+ * seem counterintuitive at first.
+ */
+ ASSERT_ERR(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
close_iter:
close(iter_fd);
@@ -998,38 +1267,35 @@ static void test_bpf_sk_storage_get(void)
int sock_fd = -1;
skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
return;
sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
err = listen(sock_fd, 1);
- if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+ if (!ASSERT_OK(err, "listen"))
goto close_socket;
map_fd = bpf_map__fd(skel->maps.sk_stg_map);
err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
- if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto close_socket;
do_dummy_read(skel->progs.fill_socket_owner);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- getpid(), val, err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+ !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
goto close_socket;
do_dummy_read(skel->progs.negate_socket_local_storage);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- -getpid(), val, err);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
close_socket:
close(sock_fd);
@@ -1037,6 +1303,20 @@ out:
bpf_iter_bpf_sk_storage_helpers__destroy(skel);
}
+static void test_bpf_sk_storage_map_iter_fd(void)
+{
+ struct bpf_iter_bpf_sk_storage_map *skel;
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.rw_bpf_sk_storage_map,
+ skel->maps.sk_stg_map);
+
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
static void test_bpf_sk_storage_map(void)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1049,15 +1329,14 @@ static void test_bpf_sk_storage_map(void)
char buf[64];
skel = bpf_iter_bpf_sk_storage_map__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
return;
map_fd = bpf_map__fd(skel->maps.sk_stg_map);
num_sockets = ARRAY_SIZE(sock_fd);
for (i = 0; i < num_sockets; i++) {
sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+ if (!ASSERT_GE(sock_fd[i], 0, "socket"))
goto out;
val = i + 1;
@@ -1065,7 +1344,7 @@ static void test_bpf_sk_storage_map(void)
err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
BPF_NOEXIST);
- if (CHECK(err, "map_update", "map_update failed\n"))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
@@ -1073,31 +1352,43 @@ static void test_bpf_sk_storage_map(void)
linfo.map.map_fd = map_fd;
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
- link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ link = bpf_program__attach_iter(skel->progs.oob_write_bpf_sk_storage_map, &opts);
+ err = libbpf_get_error(link);
+ if (!ASSERT_EQ(err, -EACCES, "attach_oob_write_iter")) {
+ if (!err)
+ bpf_link__destroy(link);
+ goto out;
+ }
+
+ link = bpf_program__attach_iter(skel->progs.rw_bpf_sk_storage_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
+ skel->bss->to_add_val = time(NULL);
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
- if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
- "ipv6_sk_count", "got %u expected %u\n",
- skel->bss->ipv6_sk_count, num_sockets))
+ if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count"))
goto close_iter;
- if (CHECK(skel->bss->val_sum != expected_val,
- "val_sum", "got %u expected %u\n",
- skel->bss->val_sum, expected_val))
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
goto close_iter;
+ for (i = 0; i < num_sockets; i++) {
+ err = bpf_map_lookup_elem(map_fd, &sock_fd[i], &val);
+ if (!ASSERT_OK(err, "map_lookup") ||
+ !ASSERT_EQ(val, i + 1 + skel->bss->to_add_val, "check_map_value"))
+ break;
+ }
+
close_iter:
close(iter_fd);
free_link:
@@ -1118,8 +1409,7 @@ static void test_rdonly_buf_out_of_bound(void)
struct bpf_link *link;
skel = bpf_iter_test_kern5__open_and_load();
- if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load"))
return;
memset(&linfo, 0, sizeof(linfo));
@@ -1127,7 +1417,7 @@ static void test_rdonly_buf_out_of_bound(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
bpf_link__destroy(link);
bpf_iter_test_kern5__destroy(skel);
@@ -1138,11 +1428,36 @@ static void test_buf_neg_offset(void)
struct bpf_iter_test_kern6 *skel;
skel = bpf_iter_test_kern6__open_and_load();
- if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
- "skeleton open_and_load unexpected success\n"))
+ if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load"))
bpf_iter_test_kern6__destroy(skel);
}
+static void test_link_iter(void)
+{
+ struct bpf_iter_bpf_link *skel;
+
+ skel = bpf_iter_bpf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_bpf_link);
+
+ bpf_iter_bpf_link__destroy(skel);
+}
+
+static void test_ksym_iter(void)
+{
+ struct bpf_iter_ksym *skel;
+
+ skel = bpf_iter_ksym__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ksym);
+
+ bpf_iter_ksym__destroy(skel);
+}
+
#define CMP_BUFFER_SIZE 1024
static char task_vma_output[CMP_BUFFER_SIZE];
static char proc_maps_output[CMP_BUFFER_SIZE];
@@ -1163,74 +1478,245 @@ static void str_strip_first_line(char *str)
*dst = '\0';
}
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
-static void test_task_vma(void)
+static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
{
int err, iter_fd = -1, proc_maps_fd = -1;
- struct bpf_iter_task_vma *skel;
+ struct bpf_iter_task_vmas *skel;
int len, read_size = 4;
char maps_path[64];
- skel = bpf_iter_task_vma__open();
- if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
return;
skel->bss->pid = getpid();
+ skel->bss->one_task = opts ? 1 : 0;
- err = bpf_iter_task_vma__load(skel);
- if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
goto out;
skel->links.proc_maps = bpf_program__attach_iter(
- skel->progs.proc_maps, NULL);
+ skel->progs.proc_maps, opts);
- if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
- "attach iterator failed\n")) {
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
skel->links.proc_maps = NULL;
goto out;
}
iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto out;
/* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
- * to trigger seq_file corner cases. The expected output is much
- * longer than 1kB, so the while loop will terminate.
+ * to trigger seq_file corner cases.
*/
len = 0;
while (len < CMP_BUFFER_SIZE) {
err = read_fd_into_buffer(iter_fd, task_vma_output + len,
- min(read_size, CMP_BUFFER_SIZE - len));
- if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
+ MIN(read_size, CMP_BUFFER_SIZE - len));
+ if (!err)
+ break;
+ if (!ASSERT_GE(err, 0, "read_iter_fd"))
goto out;
len += err;
}
+ if (opts)
+ ASSERT_EQ(skel->bss->one_task_error, 0, "unexpected task");
/* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
proc_maps_fd = open(maps_path, O_RDONLY);
- if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
+ if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps"))
goto out;
err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
- if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
+ if (!ASSERT_GE(err, 0, "read_prog_maps_fd"))
goto out;
/* strip and compare the first line of the two files */
str_strip_first_line(task_vma_output);
str_strip_first_line(proc_maps_output);
- CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
- "found mismatch\n");
+ ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
+
+ check_bpf_link_info(skel->progs.proc_maps);
+
out:
close(proc_maps_fd);
close(iter_fd);
- bpf_iter_task_vma__destroy(skel);
+ bpf_iter_task_vmas__destroy(skel);
+}
+
+static void test_task_vma_dead_task(void)
+{
+ struct bpf_iter_task_vmas *skel;
+ int wstatus, child_pid = -1;
+ time_t start_tm, cur_tm;
+ int err, iter_fd = -1;
+ int wait_sec = 3;
+
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, NULL);
+
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
+ skel->links.proc_maps = NULL;
+ goto out;
+ }
+
+ start_tm = time(NULL);
+ cur_tm = start_tm;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ /* Fork short-lived processes in the background. */
+ while (cur_tm < start_tm + wait_sec) {
+ system("echo > /dev/null");
+ cur_tm = time(NULL);
+ }
+ exit(0);
+ }
+
+ if (!ASSERT_GE(child_pid, 0, "fork_child"))
+ goto out;
+
+ while (cur_tm < start_tm + wait_sec) {
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto out;
+
+ /* Drain all data from iter_fd. */
+ while (cur_tm < start_tm + wait_sec) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
+ if (!ASSERT_GE(err, 0, "read_iter_fd"))
+ goto out;
+
+ cur_tm = time(NULL);
+
+ if (err == 0)
+ break;
+ }
+
+ close(iter_fd);
+ iter_fd = -1;
+ }
+
+ check_bpf_link_info(skel->progs.proc_maps);
+
+out:
+ waitpid(child_pid, &wstatus, 0);
+ close(iter_fd);
+ bpf_iter_task_vmas__destroy(skel);
+}
+
+void test_bpf_sockmap_map_iter_fd(void)
+{
+ struct bpf_iter_sockmap *skel;
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.copy, skel->maps.sockmap);
+
+ bpf_iter_sockmap__destroy(skel);
+}
+
+static void test_task_vma(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_vma_common(&opts);
+ test_task_vma_common(NULL);
+}
+
+/* uprobe attach point */
+static noinline int trigger_func(int arg)
+{
+ asm volatile ("");
+ return arg + 1;
+}
+
+static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc)
+{
+ struct bpf_iter_vma_offset *skel;
+ char buf[16] = {};
+ int iter_fd, len;
+ int pgsz, shift;
+
+ skel = bpf_iter_vma_offset__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_vma_offset__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->address = (uintptr_t)trigger_func;
+ for (pgsz = getpagesize(), shift = 0; pgsz > 1; pgsz >>= 1, shift++)
+ ;
+ skel->bss->page_shift = shift;
+
+ skel->links.get_vma_offset = bpf_program__attach_iter(skel->progs.get_vma_offset, opts);
+ if (!ASSERT_OK_PTR(skel->links.get_vma_offset, "attach_iter"))
+ goto exit;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.get_vma_offset));
+ if (!ASSERT_GT(iter_fd, 0, "create_iter"))
+ goto exit;
+
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ buf[15] = 0;
+ ASSERT_EQ(strcmp(buf, "OK\n"), 0, "strcmp");
+
+ ASSERT_EQ(skel->bss->offset, get_uprobe_offset(trigger_func), "offset");
+ if (one_proc)
+ ASSERT_EQ(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+ else
+ ASSERT_GT(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+
+ close(iter_fd);
+
+exit:
+ bpf_iter_vma_offset__destroy(skel);
+}
+
+static void test_task_vma_offset(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_vma_offset_common(&opts, true);
+
+ linfo.task.pid = 0;
+ linfo.task.tid = getpid();
+ test_task_vma_offset_common(&opts, true);
+
+ test_task_vma_offset_common(NULL, false);
}
void test_bpf_iter(void)
{
+ ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init");
+
if (test__start_subtest("btf_id_or_null"))
test_btf_id_or_null();
if (test__start_subtest("ipv6_route"))
@@ -1239,14 +1725,22 @@ void test_bpf_iter(void)
test_netlink();
if (test__start_subtest("bpf_map"))
test_bpf_map();
- if (test__start_subtest("task"))
- test_task();
+ if (test__start_subtest("task_tid"))
+ test_task_tid();
+ if (test__start_subtest("task_pid"))
+ test_task_pid();
+ if (test__start_subtest("task_pidfd"))
+ test_task_pidfd();
+ if (test__start_subtest("task_sleepable"))
+ test_task_sleepable();
if (test__start_subtest("task_stack"))
test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
if (test__start_subtest("task_vma"))
test_task_vma();
+ if (test__start_subtest("task_vma_dead_task"))
+ test_task_vma_dead_task();
if (test__start_subtest("task_btf"))
test_task_btf();
if (test__start_subtest("tcp4"))
@@ -1257,6 +1751,8 @@ void test_bpf_iter(void)
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
@@ -1275,10 +1771,14 @@ void test_bpf_iter(void)
test_bpf_percpu_hash_map();
if (test__start_subtest("bpf_array_map"))
test_bpf_array_map();
+ if (test__start_subtest("bpf_array_map_iter_fd"))
+ test_bpf_array_map_iter_fd();
if (test__start_subtest("bpf_percpu_array_map"))
test_bpf_percpu_array_map();
if (test__start_subtest("bpf_sk_storage_map"))
test_bpf_sk_storage_map();
+ if (test__start_subtest("bpf_sk_storage_map_iter_fd"))
+ test_bpf_sk_storage_map_iter_fd();
if (test__start_subtest("bpf_sk_storage_delete"))
test_bpf_sk_storage_delete();
if (test__start_subtest("bpf_sk_storage_get"))
@@ -1287,4 +1787,12 @@ void test_bpf_iter(void)
test_rdonly_buf_out_of_bound();
if (test__start_subtest("buf-neg-offset"))
test_buf_neg_offset();
+ if (test__start_subtest("link-iter"))
+ test_link_iter();
+ if (test__start_subtest("ksym"))
+ test_ksym_iter();
+ if (test__start_subtest("bpf_sockmap_map_iter_fd"))
+ test_bpf_sockmap_map_iter_fd();
+ if (test__start_subtest("vma_offset"))
+ test_task_vma_offset();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..16bed9dd8e6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, (struct sockaddr *)&addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void serial_test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..ee725d4d98a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ .sun_path = "",
+ };
+ socklen_t len;
+ int fd, err;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_NEQ(fd, -1, "socket"))
+ return -1;
+
+ len = offsetof(struct sockaddr_un, sun_path);
+ err = bind(fd, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ len = sizeof(addr);
+ err = getsockname(fd, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+
+ memcpy(&skel->bss->sun_path, &addr.sun_path,
+ len - offsetof(struct sockaddr_un, sun_path));
+
+ return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < NR_CASES; i++) {
+ if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+ "bpf_(get|set)sockopt"))
+ return;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->data->sndbuf_setsockopt[i]),
+ sizeof(skel->data->sndbuf_setsockopt[i]));
+ if (!ASSERT_OK(err, "setsockopt"))
+ return;
+
+ optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+ err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->bss->sndbuf_getsockopt_expected[i]),
+ &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ return;
+
+ if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+ skel->bss->sndbuf_getsockopt_expected[i],
+ "bpf_(get|set)sockopt"))
+ return;
+ }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+ struct bpf_iter_setsockopt_unix *skel;
+ int err, unix_fd, iter_fd;
+ char buf;
+
+ skel = bpf_iter_setsockopt_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ unix_fd = create_unix_socket(skel);
+ if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+ goto destroy;
+
+ skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+ if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+ goto destroy;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto destroy;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto destroy;
+
+ test_sndbuf(skel, unix_fd);
+destroy:
+ bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
new file mode 100644
index 000000000000..4cd8a25afe68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "bpf_loop.skel.h"
+
+static void check_nr_loops(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* test 0 loops */
+ skel->bss->nr_loops = 0;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "0 loops");
+
+ /* test 500 loops */
+ skel->bss->nr_loops = 500;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "500 loops");
+ ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output");
+
+ /* test exceeding the max limit */
+ skel->bss->nr_loops = -1;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit");
+
+ bpf_link__destroy(link);
+}
+
+static void check_callback_fn_stop(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* testing that loop is stopped when callback_fn returns 1 */
+ skel->bss->nr_loops = 400;
+ skel->data->stop_index = 50;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1,
+ "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2,
+ "g_output");
+
+ bpf_link__destroy(link);
+}
+
+static void check_null_callback_ctx(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that user is able to pass in a null callback_ctx */
+ link = bpf_program__attach(skel->progs.prog_null_ctx);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = 10;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "nr_loops_returned");
+
+ bpf_link__destroy(link);
+}
+
+static void check_invalid_flags(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that passing in non-zero flags returns -EINVAL */
+ link = bpf_program__attach(skel->progs.prog_invalid_flags);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -EINVAL, "err");
+
+ bpf_link__destroy(link);
+}
+
+static void check_nested_calls(struct bpf_loop *skel)
+{
+ __u32 nr_loops = 100, nested_callback_nr_loops = 4;
+ struct bpf_link *link;
+
+ /* check that nested calls are supported */
+ link = bpf_program__attach(skel->progs.prog_nested_calls);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = nr_loops;
+ skel->bss->nested_callback_nr_loops = nested_callback_nr_loops;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops
+ * nested_callback_nr_loops, "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops
+ * nr_loops, "g_output");
+
+ bpf_link__destroy(link);
+}
+
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+ struct bpf_link *link =
+ bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->callback_selector = 0x0F;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+ skel->bss->callback_selector = 0xF0;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+ bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+ struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+ const int max_key = 12;
+ int key;
+ int map_fd;
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.map1);
+
+ if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+ goto out;
+
+ for (key = 1; key <= max_key; ++key) {
+ int val = key;
+ int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto out;
+ }
+
+ usleep(1);
+
+ for (key = 1; key <= max_key; ++key) {
+ int val;
+ int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+ goto out;
+ }
+
+out:
+ bpf_link__destroy(link);
+}
+
+void test_bpf_loop(void)
+{
+ struct bpf_loop *skel;
+
+ skel = bpf_loop__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("check_nr_loops"))
+ check_nr_loops(skel);
+ if (test__start_subtest("check_callback_fn_stop"))
+ check_callback_fn_stop(skel);
+ if (test__start_subtest("check_null_callback_ctx"))
+ check_null_callback_ctx(skel);
+ if (test__start_subtest("check_invalid_flags"))
+ check_invalid_flags(skel);
+ if (test__start_subtest("check_nested_calls"))
+ check_nested_calls(skel);
+ if (test__start_subtest("check_non_constant_callback"))
+ check_non_constant_callback(skel);
+ if (test__start_subtest("check_stack"))
+ check_stack(skel);
+
+ bpf_loop__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644
index 000000000000..ecc3d47919ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+#include "testing_helpers.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+ const char *str_open;
+ void *(*bpf_open_and_load)();
+ void (*bpf_destroy)(void *);
+};
+
+enum bpf_test_state {
+ _TS_INVALID,
+ TS_MODULE_LOAD,
+ TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum bpf_test_state state = _TS_INVALID;
+
+static void *load_module_thread(void *p)
+{
+
+ if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail"))
+ atomic_store(&state, TS_MODULE_LOAD);
+ else
+ atomic_store(&state, TS_MODULE_LOAD_FAIL);
+ return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+ return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+ struct uffdio_register uffd_register = {};
+ struct uffdio_api uffd_api = {};
+ int uffd;
+
+ uffd = sys_userfaultfd(O_CLOEXEC);
+ if (uffd < 0)
+ return -errno;
+
+ uffd_api.api = UFFD_API;
+ uffd_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+ close(uffd);
+ return -1;
+ }
+
+ uffd_register.range.start = (unsigned long)fault_addr;
+ uffd_register.range.len = getpagesize();
+ uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+ close(uffd);
+ return -1;
+ }
+ return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+ void *fault_addr, *skel_fail;
+ struct bpf_mod_race *skel;
+ struct uffd_msg uffd_msg;
+ pthread_t load_mod_thrd;
+ _Atomic int *blockingp;
+ int uffd, ret;
+
+ fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+ return;
+
+ if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod"))
+ goto end_mmap;
+
+ skel = bpf_mod_race__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+ goto end_module;
+
+ skel->rodata->bpf_mod_race_config.tgid = getpid();
+ skel->rodata->bpf_mod_race_config.inject_error = -4242;
+ skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+ if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+ goto end_destroy;
+ blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+ if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+ goto end_destroy;
+
+ uffd = test_setup_uffd(fault_addr);
+ if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+ "load module thread"))
+ goto end_uffd;
+
+ /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+ while (!atomic_load(&state) && !atomic_load(blockingp))
+ ;
+ if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+ goto end_join;
+ if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+ pthread_kill(load_mod_thrd, SIGKILL);
+ goto end_uffd;
+ }
+
+ /* We might have set bpf_blocking to 1, but may have not blocked in
+ * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+ */
+ if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+ "read uffd block event"))
+ goto end_join;
+ if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+ goto end_join;
+
+ /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+ * module state is still MODULE_STATE_COMING because mod->init hasn't
+ * returned. This is the time we try to load a program calling kfunc and
+ * check if we get ENXIO from verifier.
+ */
+ skel_fail = config->bpf_open_and_load();
+ ret = errno;
+ if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+ /* Close uffd to unblock load_mod_thrd */
+ close(uffd);
+ uffd = -1;
+ while (atomic_load(blockingp) != 2)
+ ;
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+ config->bpf_destroy(skel_fail);
+ goto end_join;
+
+ }
+ ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+ ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+ close(uffd);
+ uffd = -1;
+end_join:
+ pthread_join(load_mod_thrd, NULL);
+ if (uffd < 0)
+ ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+ if (uffd >= 0)
+ close(uffd);
+end_destroy:
+ bpf_mod_race__destroy(skel);
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+ unload_bpf_testmod(false);
+ ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod");
+end_mmap:
+ munmap(fault_addr, 4096);
+ atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+ .str_open = "ksym_race__open_and_load",
+ .bpf_open_and_load = (void *)ksym_race__open_and_load,
+ .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+ .str_open = "kfunc_call_race__open_and_load",
+ .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+ .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+ if (test__start_subtest("ksym (used_btfs UAF)"))
+ test_bpf_mod_race_config(&ksym_config);
+ if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+ test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644
index 000000000000..dd6512fa652b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include "test_bpf_nf.skel.h"
+#include "test_bpf_nf_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+ const char *prog_name;
+ const char *err_msg;
+} test_bpf_nf_fail_tests[] = {
+ { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" },
+ { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
+ { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
+ { "write_not_allowlisted_field", "no write support to nf_conn at off" },
+};
+
+enum {
+ TEST_XDP,
+ TEST_TC_BPF,
+};
+
+#define TIMEOUT_MS 3000
+#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \
+ IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \
+ IPS_SRC_NAT | IPS_DST_NAT)
+
+static int connect_to_server(int srv_fd)
+{
+ int fd = -1;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto out;
+
+ if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) {
+ close(fd);
+ fd = -1;
+ }
+out:
+ return fd;
+}
+
+static void test_bpf_nf_ct(int mode)
+{
+ const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
+ int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
+ struct sockaddr_in peer_addr = {};
+ struct test_bpf_nf *skel;
+ int prog_fd, err;
+ socklen_t len;
+ u16 srv_port;
+ char cmd[128];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ if (SYS_NOFAIL("iptables-legacy --version")) {
+ fprintf(stdout, "Missing required iptables-legacy tool\n");
+ test__skip();
+ return;
+ }
+
+ skel = test_bpf_nf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+ return;
+
+ /* Enable connection tracking */
+ snprintf(cmd, sizeof(cmd), iptables, "-A");
+ if (!ASSERT_OK(system(cmd), cmd))
+ goto end;
+
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto end;
+
+ srv_port = get_socket_local_port(srv_fd);
+ if (!ASSERT_GE(srv_port, 0, "get_sock_local_port"))
+ goto end;
+
+ client_fd = connect_to_server(srv_fd);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
+ goto end;
+
+ len = sizeof(peer_addr);
+ srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len);
+ if (!ASSERT_GE(srv_client_fd, 0, "accept"))
+ goto end;
+ if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len"))
+ goto end;
+
+ skel->bss->saddr = peer_addr.sin_addr.s_addr;
+ skel->bss->sport = peer_addr.sin_port;
+ skel->bss->daddr = peer_addr.sin_addr.s_addr;
+ skel->bss->dport = srv_port;
+
+ if (mode == TEST_XDP)
+ prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+ else
+ prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto end;
+
+ ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+ ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+ ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+ ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+ ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+ ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+ ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+ ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry");
+ ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry");
+ ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup");
+ /* allow some tolerance for test_delta_timeout value to avoid races. */
+ ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
+ ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
+ ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
+ ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update ");
+ ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
+ ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
+ ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
+ ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
+ ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
+ ASSERT_EQ(skel->bss->test_ct_zone_dir_enoent_lookup, -ENOENT, "Test ENOENT for lookup with wrong ct zone dir");
+ ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
+
+end:
+ if (client_fd != -1)
+ close(client_fd);
+ if (srv_client_fd != -1)
+ close(srv_client_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+
+ snprintf(cmd, sizeof(cmd), iptables, "-D");
+ system(cmd);
+ test_bpf_nf__destroy(skel);
+}
+
+static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_bpf_nf_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_bpf_nf_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_bpf_nf_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_bpf_nf_fail__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+ int i;
+ if (test__start_subtest("xdp-ct"))
+ test_bpf_nf_ct(TEST_XDP);
+ if (test__start_subtest("tc-bpf-ct"))
+ test_bpf_nf_ct(TEST_TC_BPF);
+ for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) {
+ if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name))
+ test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name,
+ test_bpf_nf_fail_tests[i].err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 284d5921c345..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -3,11 +3,11 @@
#define nr_iters 2
-void test_bpf_obj_id(void)
+void serial_test_bpf_obj_id(void)
{
const __u64 array_magic_value = 0xfaceb00c;
const __u32 array_key = 0;
- const char *file = "./test_obj_id.o";
+ const char *file = "./test_obj_id.bpf.o";
const char *expected_prog_name = "test_obj_id";
const char *expected_map_name = "test_map_id";
const __u64 nsec_per_sec = 1000000000;
@@ -25,7 +25,7 @@ void test_bpf_obj_id(void)
*/
__u32 map_ids[nr_iters + 1];
char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
- __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ __u32 i, next_id, info_len, nr_id_found;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
__u64 array_value;
@@ -33,45 +33,46 @@ void test_bpf_obj_id(void)
time_t now, load_time;
err = bpf_prog_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
err = bpf_map_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
err = bpf_link_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
- /* Check bpf_obj_get_info_by_fd() */
+ /* Check bpf_map_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
now = time(NULL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_prog_test_load"))
continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- if (CHECK_FAIL(map_fds[i] < 0))
+ if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
goto done;
+
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto done;
- prog = bpf_object__find_program_by_title(objs[i],
- "raw_tp/sys_enter");
- if (CHECK_FAIL(!prog))
+ prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto done;
+
links[i] = bpf_program__attach(prog);
err = libbpf_get_error(links[i]);
- if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ if (!ASSERT_OK(err, "bpf_program__attach")) {
links[i] = NULL;
goto done;
}
@@ -79,26 +80,16 @@ void test_bpf_obj_id(void)
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
bzero(&map_infos[i], info_len);
- err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+ err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
&info_len);
- if (CHECK(err ||
- map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
- map_infos[i].key_size != sizeof(__u32) ||
- map_infos[i].value_size != sizeof(__u64) ||
- map_infos[i].max_entries != 1 ||
- map_infos[i].map_flags != 0 ||
- info_len != sizeof(struct bpf_map_info) ||
- strcmp((char *)map_infos[i].name, expected_map_name),
- "get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
- err, errno,
- map_infos[i].type, BPF_MAP_TYPE_ARRAY,
- info_len, sizeof(struct bpf_map_info),
- map_infos[i].key_size,
- map_infos[i].value_size,
- map_infos[i].max_entries,
- map_infos[i].map_flags,
- map_infos[i].name, expected_map_name))
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+ !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+ !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+ !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+ !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+ !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+ !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
goto done;
/* Check getting prog info */
@@ -112,48 +103,34 @@ void test_bpf_obj_id(void)
prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
+
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
- err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
- &info_len);
+
+ err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
+ &info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
- if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
- info_len != sizeof(struct bpf_prog_info) ||
- (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
- (env.jit_enabled &&
- !memcmp(jited_insns, zeros, sizeof(zeros))) ||
- !prog_infos[i].xlated_prog_len ||
- !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
- load_time < now - 60 || load_time > now + 60 ||
- prog_infos[i].created_by_uid != my_uid ||
- prog_infos[i].nr_map_ids != 1 ||
- *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
- strcmp((char *)prog_infos[i].name, expected_prog_name),
- "get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
- "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
- "jited_prog %d xlated_prog %d load_time %lu(%lu) "
- "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
- "name %s(%s)\n",
- err, errno, i,
- prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
- info_len, sizeof(struct bpf_prog_info),
- env.jit_enabled,
- prog_infos[i].jited_prog_len,
- prog_infos[i].xlated_prog_len,
- !!memcmp(jited_insns, zeros, sizeof(zeros)),
- !!memcmp(xlated_insns, zeros, sizeof(zeros)),
- load_time, now,
- prog_infos[i].created_by_uid, my_uid,
- prog_infos[i].nr_map_ids, 1,
- *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
- prog_infos[i].name, expected_prog_name))
+
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+ "jited_insns") ||
+ !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+ !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+ !ASSERT_GE(load_time, (now - 60), "load_time") ||
+ !ASSERT_LE(load_time, (now + 60), "load_time") ||
+ !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+ !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+ !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+ !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
goto done;
/* Check getting link info */
@@ -161,27 +138,14 @@ void test_bpf_obj_id(void)
bzero(&link_infos[i], info_len);
link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
- err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
- &link_infos[i], &info_len);
- if (CHECK(err ||
- link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
- link_infos[i].prog_id != prog_infos[i].id ||
- link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
- strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter") ||
- info_len != sizeof(struct bpf_link_info),
- "get-link-info(fd)",
- "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
- "prog_id %d (%d) tp_name %s(%s)\n",
- err, errno,
- info_len, sizeof(struct bpf_link_info),
- link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
- link_infos[i].id,
- link_infos[i].prog_id, prog_infos[i].id,
- (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter"))
+ err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
+ &link_infos[i], &info_len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+ !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+ !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+ !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+ !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
goto done;
-
}
/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void test_bpf_obj_id(void)
while (!bpf_prog_get_next_id(next_id, &next_id)) {
struct bpf_prog_info prog_info = {};
__u32 saved_map_id;
- int prog_fd;
+ int prog_fd, cmp_res;
info_len = sizeof(prog_info);
@@ -198,9 +162,7 @@ void test_bpf_obj_id(void)
if (prog_fd < 0 && errno == ENOENT)
/* The bpf_prog is in the dead row */
continue;
- if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
- "prog_fd %d next_id %d errno %d\n",
- prog_fd, next_id, errno))
+ if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -217,10 +179,9 @@ void test_bpf_obj_id(void)
* prog_info.map_ids = NULL
*/
prog_info.nr_map_ids = 1;
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
- if (CHECK(!err || errno != EFAULT,
- "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
- err, errno, EFAULT))
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
break;
bzero(&prog_info, sizeof(prog_info));
info_len = sizeof(prog_info);
@@ -228,30 +189,25 @@ void test_bpf_obj_id(void)
saved_map_id = *(int *)((long)prog_infos[i].map_ids);
prog_info.map_ids = prog_infos[i].map_ids;
prog_info.nr_map_ids = 2;
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
prog_infos[i].jited_prog_insns = 0;
prog_infos[i].xlated_prog_insns = 0;
- CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
- memcmp(&prog_info, &prog_infos[i], info_len) ||
- *(int *)(long)prog_info.map_ids != saved_map_id,
- "get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
- err, errno, info_len, sizeof(struct bpf_prog_info),
- memcmp(&prog_info, &prog_infos[i], info_len),
- *(int *)(long)prog_info.map_ids, saved_map_id);
+ cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+ ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
close(prog_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total prog id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
/* Check bpf_map_get_next_id() */
nr_id_found = 0;
next_id = 0;
while (!bpf_map_get_next_id(next_id, &next_id)) {
struct bpf_map_info map_info = {};
- int map_fd;
+ int map_fd, cmp_res;
info_len = sizeof(map_info);
@@ -259,9 +215,7 @@ void test_bpf_obj_id(void)
if (map_fd < 0 && errno == ENOENT)
/* The bpf_map is in the dead row */
continue;
- if (CHECK(map_fd < 0, "get-map-fd(next_id)",
- "map_fd %d next_id %u errno %d\n",
- map_fd, next_id, errno))
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
goto done;
- err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
- CHECK(err || info_len != sizeof(struct bpf_map_info) ||
- memcmp(&map_info, &map_infos[i], info_len) ||
- array_value != array_magic_value,
- "check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
- err, errno, info_len, sizeof(struct bpf_map_info),
- memcmp(&map_info, &map_infos[i], info_len),
- array_value, array_magic_value);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+ ASSERT_OK(err, "bpf_map_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(array_value, array_magic_value, "array_value");
close(map_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total map id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
/* Check bpf_link_get_next_id() */
nr_id_found = 0;
@@ -308,9 +256,7 @@ void test_bpf_obj_id(void)
if (link_fd < 0 && errno == ENOENT)
/* The bpf_link is in the dead row */
continue;
- if (CHECK(link_fd < 0, "get-link-fd(next_id)",
- "link_fd %d next_id %u errno %d\n",
- link_fd, next_id, errno))
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -322,20 +268,16 @@ void test_bpf_obj_id(void)
nr_id_found++;
- err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+ err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
cmp_res = memcmp(&link_info, &link_infos[i],
offsetof(struct bpf_link_info, raw_tracepoint));
- CHECK(err || info_len != sizeof(link_info) || cmp_res,
- "check get-link-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d\n",
- err, errno, info_len, sizeof(struct bpf_link_info),
- cmp_res);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
close(link_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total link id found by get_next_id",
- "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
done:
for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
new file mode 100644
index 000000000000..ee0458a5ce78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+__attribute__((unused))
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned int ms_flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags);
+}
+
+static void bpf_obj_pinning_detached(void)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ int fs_fd = -1, mnt_fd = -1;
+ int map_fd = -1, map_fd2 = -1;
+ int zero = 0, src_value, dst_value, err;
+ const char *map_name = "fsmount_map";
+
+ /* A bunch of below UAPI calls are constructed based on reading:
+ * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html
+ */
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ if (!ASSERT_GE(fs_fd, 0, "fs_fd"))
+ goto cleanup;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (!ASSERT_OK(err, "fs_create"))
+ goto cleanup;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_GE(mnt_fd, 0, "mnt_fd"))
+ goto cleanup;
+
+ /* If we wanted to expose detached mount in the file system, we'd do
+ * something like below. But the whole point is that we actually don't
+ * even have to expose BPF FS in the file system to be able to work
+ * (pin/get objects) with it.
+ *
+ * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH);
+ * if (!ASSERT_OK(err, "move_mount"))
+ * goto cleanup;
+ */
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto cleanup;
+
+ /* pin BPF map into detached BPF FS through mnt_fd */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = mnt_fd;
+ err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts);
+ if (!ASSERT_OK(err, "map_pin"))
+ goto cleanup;
+
+ /* get BPF map from detached BPF FS through mnt_fd */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = mnt_fd;
+ map_fd2 = bpf_obj_get_opts(map_name, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ src_value = 0xcafebeef;
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq1");
+ ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2");
+
+cleanup:
+ if (map_fd >= 0)
+ ASSERT_OK(close(map_fd), "close_map_fd");
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ if (fs_fd >= 0)
+ ASSERT_OK(close(fs_fd), "close_fs_fd");
+ if (mnt_fd >= 0)
+ ASSERT_OK(close(mnt_fd), "close_mnt_fd");
+}
+
+enum path_kind
+{
+ PATH_STR_ABS,
+ PATH_STR_REL,
+ PATH_FD_REL,
+};
+
+static void validate_pin(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(pin_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts);
+ ASSERT_OK(err, "obj_pin");
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
+ close(pin_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ map_fd2 = bpf_obj_get(abs_path);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void validate_get(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin(map_fd, abs_path);
+ if (!ASSERT_OK(err, "pin_map"))
+ return;
+
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(get_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ map_fd2 = bpf_obj_get_opts(pin_path, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
+ close(get_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void bpf_obj_pinning_mounted(enum path_kind path_kind)
+{
+ const char *map_name = "mounted_map";
+ int map_fd;
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ return;
+
+ validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind);
+ validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind);
+ ASSERT_OK(close(map_fd), "close_map_fd");
+}
+
+void test_bpf_obj_pinning()
+{
+ if (test__start_subtest("detached"))
+ bpf_obj_pinning_detached();
+ if (test__start_subtest("mounted-str-abs"))
+ bpf_obj_pinning_mounted(PATH_STR_ABS);
+ if (test__start_subtest("mounted-str-rel"))
+ bpf_obj_pinning_mounted(PATH_STR_REL);
+ if (test__start_subtest("mounted-fd-rel"))
+ bpf_obj_pinning_mounted(PATH_FD_REL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_ROOT,
+ .handle = 0x8000000,
+ .qdisc = qdisc);
+ int srv_fd = -1, cli_fd = -1;
+ int err;
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(err, "attach qdisc"))
+ return;
+
+ srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_OK_FD(srv_fd, "start server"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+ goto done;
+
+ err = send_recv_data(srv_fd, cli_fd, total_bytes);
+ ASSERT_OK(err, "send_recv_data");
+
+done:
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+
+ bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+ struct bpf_qdisc_fifo *fifo_skel;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ do_test("bpf_fifo");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+ struct bpf_qdisc_fq *fq_skel;
+
+ fq_skel = bpf_qdisc_fq__open_and_load();
+ if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+ goto out;
+
+ do_test("bpf_fq");
+out:
+ bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ hook.ifindex = if_nametoindex("veth0");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ err = bpf_tc_hook_create(&hook);
+ ASSERT_OK(err, "attach qdisc");
+
+ bpf_tc_hook_destroy(&hook);
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "tc qdisc add dev lo root handle 1: htb");
+ SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_ERR(err, "attach qdisc"))
+ bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+ SYS(out, "tc qdisc delete dev lo root htb");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+ struct bpf_qdisc_fail__incompl_ops *skel;
+ struct bpf_link *link;
+
+ skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.test);
+ if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+ bpf_link__destroy(link);
+
+ bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+ FILE *f;
+ int num;
+
+ f = fopen("/proc/sys/net/core/default_qdisc", "r");
+ if (!f)
+ return -errno;
+
+ num = fscanf(f, "%s", qdisc_name);
+ fclose(f);
+
+ return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+ char default_qdisc[IFNAMSIZ] = {};
+ struct bpf_qdisc_fifo *fifo_skel;
+ struct netns_obj *netns = NULL;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ err = get_default_qdisc(default_qdisc);
+ if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+ goto out;
+
+ err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+ if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+ goto out;
+
+ netns = netns_new("bpf_qdisc_ns", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ netns_free(netns);
+ if (default_qdisc[0])
+ write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+ if (test__start_subtest("fifo"))
+ test_fifo();
+ if (test__start_subtest("fq"))
+ test_fq();
+ if (test__start_subtest("attach to mq"))
+ test_qdisc_attach_to_mq();
+ if (test__start_subtest("attach to non root"))
+ test_qdisc_attach_to_non_root();
+ if (test__start_subtest("incompl_ops"))
+ test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+ test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 37c5494a0381..b7d1b52309d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -4,227 +4,603 @@
#include <linux/err.h>
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
-
-#define min(a, b) ((a) < (b) ? (a) : (b))
+#include "bpf_tcp_nogpl.skel.h"
+#include "tcp_ca_update.skel.h"
+#include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
+#include "tcp_ca_kfunc.skel.h"
+#include "bpf_cc_cubic.skel.h"
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
-static int stop, duration;
-static int settimeo(int fd)
+struct cb_opts {
+ const char *cc;
+ int map_fd;
+};
+
+static int settcpca(int fd, const char *tcp_ca)
{
int err;
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
+ err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
+ if (!ASSERT_NEQ(err, -1, "setsockopt"))
return -1;
return 0;
}
-static int settcpca(int fd, const char *tcp_ca)
+static bool start_test(char *addr_str,
+ const struct network_helper_opts *srv_opts,
+ const struct network_helper_opts *cli_opts,
+ int *srv_fd, int *cli_fd)
+{
+ *srv_fd = start_server_str(AF_INET6, SOCK_STREAM, addr_str, 0, srv_opts);
+ if (!ASSERT_NEQ(*srv_fd, -1, "start_server_str"))
+ goto err;
+
+ /* connect to server */
+ *cli_fd = connect_to_fd_opts(*srv_fd, cli_opts);
+ if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts"))
+ goto err;
+
+ return true;
+
+err:
+ if (*srv_fd != -1) {
+ close(*srv_fd);
+ *srv_fd = -1;
+ }
+ if (*cli_fd != -1) {
+ close(*cli_fd);
+ *cli_fd = -1;
+ }
+ return false;
+}
+
+static void do_test(const struct network_helper_opts *opts)
+{
+ int lfd = -1, fd = -1;
+
+ if (!start_test(NULL, opts, opts, &lfd, &fd))
+ goto done;
+
+ ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data");
+
+done:
+ if (lfd != -1)
+ close(lfd);
+ if (fd != -1)
+ close(fd);
+}
+
+static int cc_cb(int fd, void *opts)
+{
+ struct cb_opts *cb_opts = (struct cb_opts *)opts;
+
+ return settcpca(fd, cb_opts->cc);
+}
+
+static void test_cubic(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_cubic",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_cubic *cubic_skel;
+ struct bpf_link *link;
+
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_cubic__destroy(cubic_skel);
+ return;
+ }
+
+ do_test(&opts);
+
+ ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
+
+ bpf_link__destroy(link);
+ bpf_cubic__destroy(cubic_skel);
+}
+
+static int stg_post_socket_cb(int fd, void *opts)
{
+ struct cb_opts *cb_opts = (struct cb_opts *)opts;
int err;
- err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
- if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
- errno))
- return -1;
+ err = settcpca(fd, cb_opts->cc);
+ if (err)
+ return err;
+
+ err = bpf_map_update_elem(cb_opts->map_fd, &fd,
+ &expected_stg, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
+ return err;
return 0;
}
-static void *server(void *arg)
+static void test_dctcp(void)
{
- int lfd = (int)(long)arg, err = 0, fd;
- ssize_t nr_sent = 0, bytes = 0;
- char batch[1500];
+ struct cb_opts cb_opts = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct network_helper_opts cli_opts = {
+ .post_socket_cb = stg_post_socket_cb,
+ .cb_opts = &cb_opts,
+ };
+ int lfd = -1, fd = -1, tmp_stg, err;
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link;
- fd = accept(lfd, NULL, NULL);
- while (fd == -1) {
- if (errno == EINTR)
- continue;
- err = -errno;
- goto done;
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_dctcp__destroy(dctcp_skel);
+ return;
}
- if (settimeo(fd)) {
- err = -errno;
+ cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map);
+ if (!start_test(NULL, &opts, &cli_opts, &lfd, &fd))
goto done;
- }
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_sent = send(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_sent == -1 && errno == EINTR)
- continue;
- if (nr_sent == -1) {
- err = -errno;
- break;
- }
- bytes += nr_sent;
- }
+ err = bpf_map_lookup_elem(cb_opts.map_fd, &fd, &tmp_stg);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+ !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
+ goto done;
- CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
- bytes, total_bytes, nr_sent, errno);
+ ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data");
+ ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
if (fd != -1)
close(fd);
- if (err) {
- WRITE_ONCE(stop, 1);
- return ERR_PTR(err);
+}
+
+static void test_dctcp_autoattach_map(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link;
+
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
+ return;
+
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp, true);
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp_nouse, false);
+
+ if (!ASSERT_OK(bpf_dctcp__attach(dctcp_skel), "bpf_dctcp__attach"))
+ goto destroy;
+
+ /* struct_ops is auto-attached */
+ link = dctcp_skel->links.dctcp;
+ if (!ASSERT_OK_PTR(link, "link"))
+ goto destroy;
+
+ do_test(&opts);
+
+destroy:
+ bpf_dctcp__destroy(dctcp_skel);
+}
+
+static char *err_str;
+static bool found;
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ const char *prog_name, *log_buf;
+
+ if (level != LIBBPF_WARN ||
+ !strstr(format, "-- BEGIN PROG LOAD LOG --")) {
+ vprintf(format, args);
+ return 0;
}
- return NULL;
+
+ prog_name = va_arg(args, char *);
+ log_buf = va_arg(args, char *);
+ if (!log_buf)
+ goto out;
+ if (err_str && strstr(log_buf, err_str) != NULL)
+ found = true;
+out:
+ printf(format, prog_name, log_buf);
+ return 0;
}
-static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
+static void test_invalid_license(void)
{
- struct sockaddr_in6 sa6 = {};
- ssize_t nr_recv = 0, bytes = 0;
- int lfd = -1, fd = -1;
- pthread_t srv_thread;
- socklen_t addrlen = sizeof(sa6);
- void *thread_ret;
- char batch[1500];
- int err;
+ libbpf_print_fn_t old_print_fn;
+ struct bpf_tcp_nogpl *skel;
- WRITE_ONCE(stop, 0);
+ err_str = "struct ops programs must have a GPL compatible license";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
- lfd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
- return;
- fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
- close(lfd);
+ skel = bpf_tcp_nogpl__open_and_load();
+ ASSERT_NULL(skel, "bpf_tcp_nogpl");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ bpf_tcp_nogpl__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ struct cb_opts dctcp = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts srv_opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &dctcp,
+ };
+ struct cb_opts cubic = {
+ .cc = "cubic",
+ };
+ struct network_helper_opts cli_opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cubic,
+ };
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
return;
- }
+ strcpy(dctcp_skel->rodata->fallback_cc, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
- if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
goto done;
- /* bind, listen and start server thread to accept */
- sa6.sin6_family = AF_INET6;
- sa6.sin6_addr = in6addr_loopback;
- err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+ if (!start_test("::1", &srv_opts, &cli_opts, &lfd, &cli_fd))
goto done;
- err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
goto done;
- err = listen(lfd, 1);
- if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+ /* All setsockopt(TCP_CONGESTION) in the recurred
+ * bpf_dctcp->init() should fail with -EBUSY.
+ */
+ ASSERT_EQ(dctcp_skel->bss->ebusy_cnt, 3, "ebusy_cnt");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
- if (sk_stg_map) {
- err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
- &expected_stg, BPF_NOEXIST);
- if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
- goto done;
- }
+done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
- /* connect to server */
- err = connect(fd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "connect", "errno:%d\n", errno))
- goto done;
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
- if (sk_stg_map) {
- int tmp_stg;
+ err_str = "program of this type cannot use helper bpf_setsockopt";
+ found = false;
- err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
- &tmp_stg);
- if (CHECK(!err || errno != ENOENT,
- "bpf_map_lookup_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
- goto done;
- }
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
- err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
- goto done;
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
- /* recv total_bytes */
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_recv = recv(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_recv == -1 && errno == EINTR)
- continue;
- if (nr_recv == -1)
- break;
- bytes += nr_recv;
- }
+ bpf_dctcp_release__destroy(rel_skel);
+}
- CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
- bytes, total_bytes, nr_recv, errno);
+static void test_write_sk_pacing(void)
+{
+ struct tcp_ca_write_sk_pacing *skel;
+ struct bpf_link *link;
- WRITE_ONCE(stop, 1);
- pthread_join(srv_thread, &thread_ret);
- CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
- PTR_ERR(thread_ret));
-done:
- close(lfd);
- close(fd);
+ skel = tcp_ca_write_sk_pacing__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_write_sk_pacing__destroy(skel);
}
-static void test_cubic(void)
+static void test_incompl_cong_ops(void)
{
- struct bpf_cubic *cubic_skel;
+ struct tcp_ca_incompl_cong_ops *skel;
struct bpf_link *link;
- cubic_skel = bpf_cubic__open_and_load();
- if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+ skel = tcp_ca_incompl_cong_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
- link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
- bpf_cubic__destroy(cubic_skel);
+ /* That cong_avoid() and cong_control() are missing is only reported at
+ * this point:
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+ ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct tcp_ca_unsupp_cong_op *skel;
+
+ err_str = "attach to unsupported member get_info";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = tcp_ca_unsupp_cong_op__open_and_load();
+ ASSERT_NULL(skel, "open_and_load");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ tcp_ca_unsupp_cong_op__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
+static void test_update_ca(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
return;
- }
- do_test("bpf_cubic", NULL);
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
+
+ do_test(&opts);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_update_2);
+ ASSERT_OK(err, "update_map");
+
+ do_test(&opts);
+ ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+ ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
bpf_link__destroy(link);
- bpf_cubic__destroy(cubic_skel);
+out:
+ tcp_ca_update__destroy(skel);
}
-static void test_dctcp(void)
+static void test_update_wrong(void)
{
- struct bpf_dctcp *dctcp_skel;
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct tcp_ca_update *skel;
struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
- dctcp_skel = bpf_dctcp__open_and_load();
- if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
return;
- link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
- bpf_dctcp__destroy(dctcp_skel);
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
+
+ do_test(&opts);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_wrong);
+ ASSERT_ERR(err, "update_map");
+
+ do_test(&opts);
+ ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+
+ bpf_link__destroy(link);
+out:
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_mixed_links(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct tcp_ca_update *skel;
+ struct bpf_link *link, *link_nl;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
+ if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"))
+ goto out;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test(&opts);
+ ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_no_link);
+ ASSERT_ERR(err, "update_map");
+
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_nl);
+out:
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_multi_links(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ /* A map should be able to be used to create links multiple
+ * times.
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+ bpf_link__destroy(link);
+
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_link_replace(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd"))
+ goto out;
+
+ /* BPF_F_REPLACE with a wrong old map Fd. It should fail!
+ *
+ * With BPF_F_REPLACE, the link should be updated only if the
+ * old map fd given here matches the map backing the link.
+ */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_ERR(err, "bpf_link_update_fail");
+
+ /* BPF_F_REPLACE with a correct old map Fd. It should success! */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2);
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_OK(err, "bpf_link_update_success");
+
+ bpf_link__destroy(link);
+
+out:
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_tcp_ca_kfunc(void)
+{
+ struct tcp_ca_kfunc *skel;
+
+ skel = tcp_ca_kfunc__open_and_load();
+ ASSERT_OK_PTR(skel, "tcp_ca_kfunc__open_and_load");
+ tcp_ca_kfunc__destroy(skel);
+}
+
+static void test_cc_cubic(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_cc_cubic",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_cc_cubic *cc_cubic_skel;
+ struct bpf_link *link;
+
+ cc_cubic_skel = bpf_cc_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cc_cubic_skel, "bpf_cc_cubic__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(cc_cubic_skel->maps.cc_cubic);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_cc_cubic__destroy(cc_cubic_skel);
return;
}
- do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
- CHECK(dctcp_skel->bss->stg_result != expected_stg,
- "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
- dctcp_skel->bss->stg_result, expected_stg);
+ do_test(&opts);
bpf_link__destroy(link);
- bpf_dctcp__destroy(dctcp_skel);
+ bpf_cc_cubic__destroy(cc_cubic_skel);
}
void test_bpf_tcp_ca(void)
@@ -233,4 +609,32 @@ void test_bpf_tcp_ca(void)
test_dctcp();
if (test__start_subtest("cubic"))
test_cubic();
+ if (test__start_subtest("invalid_license"))
+ test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
+ if (test__start_subtest("write_sk_pacing"))
+ test_write_sk_pacing();
+ if (test__start_subtest("incompl_cong_ops"))
+ test_incompl_cong_ops();
+ if (test__start_subtest("unsupp_cong_op"))
+ test_unsupp_cong_op();
+ if (test__start_subtest("update_ca"))
+ test_update_ca();
+ if (test__start_subtest("update_wrong"))
+ test_update_wrong();
+ if (test__start_subtest("mixed_links"))
+ test_mixed_links();
+ if (test__start_subtest("multi_links"))
+ test_multi_links();
+ if (test__start_subtest("link_replace"))
+ test_link_replace();
+ if (test__start_subtest("tcp_ca_kfunc"))
+ test_tcp_ca_kfunc();
+ if (test__start_subtest("cc_cubic"))
+ test_cc_cubic();
+ if (test__start_subtest("dctcp_autoattach_map"))
+ test_dctcp_autoattach_map();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..73f669014b69 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -19,101 +19,215 @@ extern int extra_prog_load_log_flags;
static int check_load(const char *file, enum bpf_prog_type type)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj = NULL;
- int err, prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.log_level = 4 | extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ struct bpf_program *prog;
+ int err;
+
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ return err;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ bpf_program__set_type(prog, type);
+ bpf_program__set_flags(prog, testing_prog_flags());
+ bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
+
+ err = bpf_object__load(obj);
+
+err_out:
bpf_object__close(obj);
return err;
}
-struct scale_test_def {
- const char *file;
- enum bpf_prog_type attach_type;
- bool fails;
-};
-
-void test_bpf_verif_scale(void)
-{
- struct scale_test_def tests[] = {
- { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ },
-
- { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS },
- { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
- { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
-
- { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* full unroll by llvm */
- { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* partial unroll. llvm will unroll loop ~150 times.
- * C loop count -> 600.
- * Asm loop count -> 4.
- * 16k insns in loop body.
- * Total of 5 such loops. Total program size ~82k insns.
- */
- { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* no unroll at all.
- * C loop count -> 600.
- * ASM loop count -> 600.
- * ~110 insns in loop body.
- * Total of 5 such loops. Total program size ~1500 insns.
- */
- { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
- { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
-
- /* partial unroll. 19k insn in a loop.
- * Total program size 20.8k insn.
- * ~350k processed_insns
- */
- { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* no unroll, tiny loops */
- { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* non-inlined subprogs */
- { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
- { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
-
- { "test_xdp_loop.o", BPF_PROG_TYPE_XDP },
- { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL },
- };
+static void scale_test(const char *file,
+ enum bpf_prog_type attach_type,
+ bool should_fail)
+{
libbpf_print_fn_t old_print_fn = NULL;
- int err, i;
+ int err;
if (env.verifier_stats) {
test__force_log();
old_print_fn = libbpf_set_print(libbpf_debug_print);
}
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- const struct scale_test_def *test = &tests[i];
-
- if (!test__start_subtest(test->file))
- continue;
-
- err = check_load(test->file, test->attach_type);
- CHECK_FAIL(err && !test->fails);
- }
+ err = check_load(file, attach_type);
+ if (should_fail)
+ ASSERT_ERR(err, "expect_error");
+ else
+ ASSERT_OK(err, "expect_success");
if (env.verifier_stats)
libbpf_set_print(old_print_fn);
}
+
+void test_verif_scale1()
+{
+ scale_test("test_verif_scale1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale2()
+{
+ scale_test("test_verif_scale2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale3()
+{
+ scale_test("test_verif_scale3.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_pyperf_global()
+{
+ scale_test("pyperf_global.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf_subprogs()
+{
+ scale_test("pyperf_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf50()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf50.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf100()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf100.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf180()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf180.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600()
+{
+ /* partial unroll. llvm will unroll loop ~150 times.
+ * C loop count -> 600.
+ * Asm loop count -> 4.
+ * 16k insns in loop body.
+ * Total of 5 such loops. Total program size ~82k insns.
+ */
+ scale_test("pyperf600.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("pyperf600_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_nounroll()
+{
+ /* no unroll at all.
+ * C loop count -> 600.
+ * ASM loop count -> 600.
+ * ~110 insns in loop body.
+ * Total of 5 such loops. Total program size ~1500 insns.
+ */
+ scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_iter()
+{
+ /* open-coded BPF iterator version */
+ scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop1()
+{
+ scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop2()
+{
+ scale_test("loop2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop3_fail()
+{
+ scale_test("loop3.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */);
+}
+
+void test_verif_scale_loop4()
+{
+ scale_test("loop4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop5()
+{
+ scale_test("loop5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop6()
+{
+ scale_test("loop6.bpf.o", BPF_PROG_TYPE_KPROBE, false);
+}
+
+void test_verif_scale_strobemeta()
+{
+ /* partial unroll. 19k insn in a loop.
+ * Total program size 20.8k insn.
+ * ~350k processed_insns
+ */
+ scale_test("strobemeta.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("strobemeta_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll1()
+{
+ /* no unroll, tiny loops */
+ scale_test("strobemeta_nounroll1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll2()
+{
+ /* no unroll, tiny loops */
+ scale_test("strobemeta_nounroll2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_subprogs()
+{
+ /* non-inlined subprogs */
+ scale_test("strobemeta_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_sysctl_loop1()
+{
+ scale_test("test_sysctl_loop1.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_sysctl_loop2()
+{
+ scale_test("test_sysctl_loop2.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_xdp_loop()
+{
+ scale_test("test_xdp_loop.bpf.o", BPF_PROG_TYPE_XDP, false);
+}
+
+void test_verif_scale_seg6_loop()
+{
+ scale_test("test_seg6_loop.bpf.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false);
+}
+
+void test_verif_twfw()
+{
+ scale_test("twfw.bpf.o", BPF_PROG_TYPE_CGROUP_SKB, false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 6a7ee7420701..054ecb6b1e9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -8,7 +8,6 @@
#include <linux/filter.h>
#include <linux/unistd.h>
#include <bpf/bpf.h>
-#include <sys/resource.h>
#include <libelf.h>
#include <gelf.h>
#include <string.h>
@@ -22,7 +21,6 @@
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "../test_btf.h"
#include "test_progs.h"
@@ -36,11 +34,10 @@ static bool always_log;
#undef CHECK
#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
-#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
-#define NAME_NTH(N) (0xffff0000 | N)
-#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
+#define NAME_NTH(N) (0xfffe0000 | N)
+#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000)
#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
#define MAX_NR_RAW_U32 1024
@@ -882,6 +879,34 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Invalid elem",
},
+{
+ .descr = "var after datasec, ptr followed by modifier",
+ .raw_types = {
+ /* .bss section */ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2),
+ sizeof(void*)+4),
+ BTF_VAR_SECINFO_ENC(4, 0, sizeof(void*)),
+ BTF_VAR_SECINFO_ENC(6, sizeof(void*), 4),
+ /* int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ BTF_VAR_ENC(NAME_TBD, 3, 0), /* [4] */
+ /* const int */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0a\0b\0c\0",
+ .str_sec_size = sizeof("\0a\0b\0c\0"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void*)+4,
+ .key_type_id = 0,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
/* Test member exceeds the size of struct.
*
* struct A {
@@ -1903,7 +1928,7 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
- BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_TYPE_ENC(0, 0x20000000, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -2899,26 +2924,6 @@ static struct btf_raw_test raw_tests[] = {
},
{
- .descr = "invalid enum kind_flag",
- .raw_types = {
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */
- BTF_ENUM_ENC(NAME_TBD, 0),
- BTF_END_RAW,
- },
- BTF_STR_SEC("\0A"),
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_name = "enum_type_check_btf",
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .key_type_id = 1,
- .value_type_id = 1,
- .max_entries = 4,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
-},
-
-{
.descr = "valid fwd kind_flag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -3530,6 +3535,728 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 1,
.max_entries = 1,
},
+{
+ .descr = "datasec: name '?.foo bar:buz' is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0?.foo bar:buz"),
+},
+{
+ .descr = "datasec: name with non-printable first char not is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0\7foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
+ .descr = "datasec: name '\\0' is not ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC \0 */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
+ .descr = "type name '?foo' is not ok",
+ .raw_types = {
+ /* union ?foo; */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0?foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+
+{
+ .descr = "float test #1, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
+ BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+ BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+ BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+ BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+ "\0floats\0a\0b\0c\0d\0e"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "float test #2, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "float test #3, invalid kind_flag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info kind_flag",
+},
+{
+ .descr = "float test #4, member does not fit",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+{
+ .descr = "float test #5, member is not properly aligned",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member is not properly aligned",
+},
+{
+ .descr = "float test #6, invalid size",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 6,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_size",
+},
+
+{
+ .descr = "decl_tag test #1, struct/member, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #2, union/member, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #3, variable, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0global\0tag1\0tag2"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #4, func/parameter, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #5, invalid value",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_DECL_TAG_ENC(0, 2, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid value",
+},
+{
+ .descr = "decl_tag test #6, invalid target type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 1, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "decl_tag test #7, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "decl_tag test #8, tag with kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #9, var, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #10, struct member, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #11, func parameter, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #12, < -1 component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #13, typedef, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #14, typedef, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #15, func, invalid func proto",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), /* [2] */
+ BTF_FUNC_ENC(NAME_TBD, 8), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0func"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "decl_tag test #16, func proto, return type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 2), (-1), /* [3] */
+ BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .btf_load_err = true,
+ .err_str = "Invalid return type",
+},
+{
+ .descr = "decl_tag test #17, func proto, argument",
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */
+ BTF_FUNC_PROTO_ENC(0, 1), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1\0var"),
+ .btf_load_err = true,
+ .err_str = "Invalid arg#1",
+},
+{
+ .descr = "decl_tag test #18, decl_tag as the map key type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 8,
+ .value_size = 4,
+ .key_type_id = 3,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
+ .descr = "decl_tag test #19, decl_tag as the map value type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 4,
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
+ .descr = "type_tag test #1",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "type_tag test #2, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_CONST_ENC(3), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #3, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #4, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #5, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(1), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "type_tag test #6, type tag order",
+ .raw_types = {
+ BTF_PTR_ENC(2), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */
+ BTF_PTR_ENC(6), /* [5] */
+ BTF_CONST_ENC(2), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #7, tag with kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "enum64 test #1, unsigned, size 8",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "enum64 test #2, signed, size 4",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, -1, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
}; /* struct btf_raw_test raw_tests[] */
@@ -3637,20 +4364,40 @@ static void *btf_raw_create(const struct btf_header *hdr,
next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL;
done:
+ free(strs_idx);
if (err) {
- if (raw_btf)
- free(raw_btf);
- if (strs_idx)
- free(strs_idx);
+ free(raw_btf);
return NULL;
}
return raw_btf;
}
+static int load_raw_btf(const void *raw_data, size_t raw_size)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ int btf_fd;
+
+ if (always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ }
+
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ if (btf_fd < 0 && !always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ }
+
+ return btf_fd;
+}
+
static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int map_fd = -1, btf_fd = -1;
unsigned int raw_btf_size;
struct btf_header *hdr;
@@ -3676,44 +4423,40 @@ static void do_test_raw(unsigned int test_num)
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- err = ((btf_fd == -1) != test->btf_load_err);
+ err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
- "expected err_str:%s", test->err_str)) {
+ "expected err_str:%s\n", test->err_str)) {
err = -1;
goto done;
}
- if (err || btf_fd == -1)
+ if (err || btf_fd < 0)
goto done;
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
+ if (!test->map_type)
+ goto done;
- map_fd = bpf_create_map_xattr(&create_attr);
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
- err = ((map_fd == -1) != test->map_create_err);
+ err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
map_fd, test->map_create_err);
done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
}
@@ -3808,10 +4551,8 @@ static int test_big_btf_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3827,7 +4568,7 @@ static int test_big_btf_info(unsigned int test_num)
info->btf = ptr_to_u64(user_btf);
info->btf_size = raw_btf_size;
- err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(!err, "!err")) {
err = -1;
goto done;
@@ -3840,7 +4581,7 @@ static int test_big_btf_info(unsigned int test_num)
* to userspace.
*/
info_garbage.garbage = 0;
- err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(err || info_len != sizeof(*info),
"err:%d errno:%d info_len:%u sizeof(*info):%zu",
err, errno, info_len, sizeof(*info))) {
@@ -3857,7 +4598,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
return err;
@@ -3866,7 +4607,7 @@ done:
static int test_btf_id(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
uint8_t *raw_btf = NULL, *user_btf[2] = {};
int btf_fd[2] = {-1, -1}, map_fd = -1;
struct bpf_map_info map_info = {};
@@ -3896,30 +4637,28 @@ static int test_btf_id(unsigned int test_num)
info[i].btf_size = raw_btf_size;
}
- btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
info_len = sizeof(info[0]);
- err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd[0], &info[0], &info_len);
if (CHECK(err, "errno:%d", errno)) {
err = -1;
goto done;
}
btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
- if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
ret = 0;
- err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd[1], &info[1], &info_len);
if (CHECK(err || info[0].id != info[1].id ||
info[0].btf_size != info[1].btf_size ||
(ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
@@ -3931,23 +4670,18 @@ static int test_btf_id(unsigned int test_num)
}
/* Test btf members in struct bpf_map_info */
- create_attr.name = "test_btf_id";
- create_attr.map_type = BPF_MAP_TYPE_ARRAY;
- create_attr.key_size = sizeof(int);
- create_attr.value_size = sizeof(unsigned int);
- create_attr.max_entries = 4;
- create_attr.btf_fd = btf_fd[0];
- create_attr.btf_key_type_id = 1;
- create_attr.btf_value_type_id = 2;
-
- map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ opts.btf_fd = btf_fd[0];
+ opts.btf_key_type_id = 1;
+ opts.btf_value_type_id = 2;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id",
+ sizeof(int), sizeof(int), 4, &opts);
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
info_len = sizeof(map_info);
- err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
if (CHECK(err || map_info.btf_id != info[0].id ||
map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
"err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
@@ -3964,7 +4698,7 @@ static int test_btf_id(unsigned int test_num)
/* Test BTF ID is removed from the kernel */
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3974,11 +4708,6 @@ static int test_btf_id(unsigned int test_num)
/* The map holds the last ref to BTF and its btf_id */
close(map_fd);
map_fd = -1;
- btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
- err = -1;
- goto done;
- }
fprintf(stderr, "OK");
@@ -3987,11 +4716,11 @@ done:
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
for (i = 0; i < 2; i++) {
free(user_btf[i]);
- if (btf_fd[i] != -1)
+ if (btf_fd[i] >= 0)
close(btf_fd[i]);
}
@@ -4033,10 +4762,8 @@ static void do_test_get_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4052,7 +4779,7 @@ static void do_test_get_info(unsigned int test_num)
info.btf_size = user_btf_size;
ret = 0;
- err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
if (CHECK(err || !info.id || info_len != sizeof(info) ||
info.btf_size != raw_btf_size ||
(ret = memcmp(raw_btf, user_btf, expected_nbytes)),
@@ -4082,7 +4809,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
}
@@ -4092,9 +4819,8 @@ struct btf_file_test {
};
static struct btf_file_test file_tests[] = {
- { .file = "test_btf_haskv.o", },
- { .file = "test_btf_newkv.o", },
- { .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
+ { .file = "test_btf_newkv.bpf.o", },
+ { .file = "test_btf_nokv.bpf.o", .btf_kv_notfound = true, },
};
static void do_test_file(unsigned int test_num)
@@ -4119,8 +4845,9 @@ static void do_test_file(unsigned int test_num)
return;
btf = btf__parse_elf(test->file, &btf_ext);
- if (IS_ERR(btf)) {
- if (PTR_ERR(btf) == -ENOENT) {
+ err = libbpf_get_error(btf);
+ if (err) {
+ if (err == -ENOENT) {
printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
test__skip();
return;
@@ -4132,11 +4859,14 @@ static void do_test_file(unsigned int test_num)
has_btf_ext = btf_ext != NULL;
btf_ext__free(btf_ext);
+ /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
obj = bpf_object__open(test->file);
- if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "obj: %d", err))
return;
- prog = bpf_program__next(NULL, obj);
+ prog = bpf_object__next_program(obj, NULL);
if (CHECK(!prog, "Cannot find bpf_prog")) {
err = -1;
goto done;
@@ -4166,9 +4896,9 @@ static void do_test_file(unsigned int test_num)
/* get necessary program info */
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4198,9 +4928,9 @@ static void do_test_file(unsigned int test_num)
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4218,7 +4948,8 @@ static void do_test_file(unsigned int test_num)
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
@@ -4254,6 +4985,9 @@ skip:
fprintf(stderr, "OK");
done:
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
}
@@ -4301,7 +5035,7 @@ struct pprint_mapv_int128 {
static struct btf_raw_test pprint_test_template[] = {
{
.raw_types = {
- /* unsighed char */ /* [1] */
+ /* unsigned char */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
/* unsigned short */ /* [2] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
@@ -4368,7 +5102,7 @@ static struct btf_raw_test pprint_test_template[] = {
* be encoded with kind_flag set.
*/
.raw_types = {
- /* unsighed char */ /* [1] */
+ /* unsigned char */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
/* unsigned short */ /* [2] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
@@ -4435,7 +5169,7 @@ static struct btf_raw_test pprint_test_template[] = {
* will have both int and enum types.
*/
.raw_types = {
- /* unsighed char */ /* [1] */
+ /* unsigned char */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
/* unsigned short */ /* [2] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
@@ -4604,6 +5338,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
#endif
assert(0);
+ return 0;
}
static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
@@ -4725,7 +5460,7 @@ static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
bool ordered_map, lossless_map, percpu_map;
int err, ret, num_cpus, rounded_value_size;
unsigned int key, nr_read_elems;
@@ -4751,27 +5486,20 @@ static void do_test_pprint(int test_num)
return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d\n", errno)) {
err = -1;
goto done;
}
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
-
- map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4779,7 +5507,7 @@ static void do_test_pprint(int test_num)
ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
"/sys/fs/bpf", test->map_name);
- if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+ if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long",
"/sys/fs/bpf", test->map_name)) {
err = -1;
goto done;
@@ -4852,7 +5580,7 @@ static void do_test_pprint(int test_num)
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
}
@@ -4868,7 +5596,7 @@ static void do_test_pprint(int test_num)
cpu, cmapv);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
cmapv = cmapv + rounded_value_size;
@@ -4906,9 +5634,9 @@ done:
fprintf(stderr, "OK");
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
if (pin_file)
fclose(pin_file);
@@ -5819,8 +6547,8 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
/* get necessary lens */
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
return -1;
}
@@ -5849,8 +6577,8 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
info.nr_func_info = nr_func_info;
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -5913,8 +6641,8 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
nr_jited_func_lens = nr_jited_ksyms;
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
err = -1;
goto done;
}
@@ -5987,13 +6715,13 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
info.jited_func_lens = ptr_to_u64(jited_func_lens);
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
/*
* Only recheck the info.*line_info* fields.
* Other fields are not the concern of this test.
*/
- if (CHECK(err == -1 ||
+ if (CHECK(err < 0 ||
info.nr_line_info != cnt ||
(jited_cnt && !info.jited_line_info) ||
info.nr_jited_line_info != jited_cnt ||
@@ -6109,7 +6837,7 @@ done:
static void do_test_info_raw(unsigned int test_num)
{
const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
- unsigned int raw_btf_size, linfo_str_off, linfo_size;
+ unsigned int raw_btf_size, linfo_str_off, linfo_size = 0;
int btf_fd = -1, prog_fd = -1, err = 0;
void *raw_btf, *patched_linfo = NULL;
const char *ret_next_str;
@@ -6125,12 +6853,10 @@ static void do_test_info_raw(unsigned int test_num)
return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
err = -1;
goto done;
}
@@ -6143,7 +6869,8 @@ static void do_test_info_raw(unsigned int test_num)
patched_linfo = patch_name_tbd(test->line_info,
test->str_sec, linfo_str_off,
test->str_sec_size, &linfo_size);
- if (IS_ERR(patched_linfo)) {
+ err = libbpf_get_error(patched_linfo);
+ if (err) {
fprintf(stderr, "error in creating raw bpf_line_info");
err = -1;
goto done;
@@ -6167,7 +6894,7 @@ static void do_test_info_raw(unsigned int test_num)
}
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- err = ((prog_fd == -1) != test->expected_prog_load_failure);
+ err = ((prog_fd < 0) != test->expected_prog_load_failure);
if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
prog_fd, test->expected_prog_load_failure, errno) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6176,7 +6903,7 @@ static void do_test_info_raw(unsigned int test_num)
goto done;
}
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto done;
err = test_get_finfo(test, prog_fd);
@@ -6193,12 +6920,12 @@ done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (prog_fd != -1)
+ if (prog_fd >= 0)
close(prog_fd);
- if (!IS_ERR(patched_linfo))
+ if (!libbpf_get_error(patched_linfo))
free(patched_linfo);
}
@@ -6215,7 +6942,7 @@ struct btf_dedup_test {
struct btf_dedup_opts opts;
};
-const struct btf_dedup_test dedup_tests[] = {
+static struct btf_dedup_test dedup_tests[] = {
{
.descr = "dedup: unused strings filtering",
@@ -6235,9 +6962,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: strings deduplication",
@@ -6260,9 +6984,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long int"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: struct example #1",
@@ -6281,57 +7002,67 @@ const struct btf_dedup_test dedup_tests[] = {
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ /* tag -> [3] struct s */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */
+ /* tag -> [3] struct s, member 1 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */
/* full copy of the above */
- BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
- BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
- BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
- BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
- BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
- BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
- BTF_PTR_ENC(9), /* [10] */
- BTF_PTR_ENC(12), /* [11] */
- BTF_CONST_ENC(7), /* [12] */
+ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */
+ BTF_TYPE_ARRAY_ENC(9, 9, 16), /* [10] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [11] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 12, 0),
+ BTF_MEMBER_ENC(NAME_NTH(4), 13, 64),
+ BTF_MEMBER_ENC(NAME_NTH(5), 10, 128),
+ BTF_MEMBER_ENC(NAME_NTH(6), 9, 640),
+ BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),
+ BTF_PTR_ENC(11), /* [12] */
+ BTF_PTR_ENC(14), /* [13] */
+ BTF_CONST_ENC(9), /* [14] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+ BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
},
.expect = {
.raw_types = {
/* int */
- BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
- BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
+ BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(4), 9, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
},
{
@@ -6375,8 +7106,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6422,8 +7152,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6447,11 +7176,19 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
.expect = {
.raw_types = {
@@ -6472,18 +7209,23 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
},
{
- .descr = "dedup: no int duplicates",
+ .descr = "dedup: no int/float duplicates",
.input = {
.raw_types = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6498,9 +7240,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.expect = {
.raw_types = {
@@ -6516,12 +7264,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
},
{
@@ -6538,7 +7289,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_ENUM_ENC(NAME_NTH(4), 456),
/* [4] fwd enum 'e2' after full enum */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
- /* [5] incompatible fwd enum with different size */
+ /* [5] fwd enum with different size, size does not matter for fwd */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
/* [6] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
@@ -6555,18 +7306,13 @@ const struct btf_dedup_test dedup_tests[] = {
/* [2] full enum 'e2' */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(4), 456),
- /* [3] incompatible fwd enum with different size */
- BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
- /* [4] incompatible full enum with different value */
+ /* [3] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(2), 321),
BTF_END_RAW,
},
BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: datasec and vars pass-through",
@@ -6609,11 +7355,738 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0.bss\0t"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1
+ .force_collisions = true
+ },
+},
+{
+ .descr = "dedup: func/func_arg/var tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
+ /* tag -> t */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */
+ /* tag -> func */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */
+ /* tag -> func arg a1 */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+ },
+},
+{
+ .descr = "dedup: func/func_param tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [4] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */
+ /* tag -> f: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */
+ /* tag -> f/a2: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */
+ /* tag -> f: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */
+ /* tag -> f/a2: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: struct/struct_member tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ /* tag -> t: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ /* tag -> t/m2: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */
+ /* tag -> t: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */
+ /* tag -> t/m2: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: recursive typedef",
+ /*
+ * This test simulates a recursive typedef, which in GO is defined as such:
+ *
+ * type Foo func() Foo
+ *
+ * In BTF terms, this is represented as a TYPEDEF referencing
+ * a FUNC_PROTO that returns the same TYPEDEF.
+ */
+ .input = {
+ .raw_types = {
+ /*
+ * [1] typedef Foo -> func() Foo
+ * [2] func_proto() -> Foo
+ * [3] typedef Foo -> func() Foo
+ * [4] func_proto() -> Foo
+ */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */
+ BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+},
+{
+ .descr = "dedup: typedef",
+ /*
+ * // CU 1:
+ * typedef int foo;
+ *
+ * // CU 2:
+ * typedef int foo;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ /* CU 2 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+},
+{
+ .descr = "dedup: typedef tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */
+ /* tag -> t: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */
+ /* tag -> t: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #1",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */
+ BTF_PTR_ENC(8), /* [9] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> int */
+ BTF_PTR_ENC(2), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #2",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #3",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #4",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #5, struct",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
+ },
+},
+{
+ .descr = "dedup: enum64, standalone",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum64, fwd resolution",
+ .input = {
+ .raw_types = {
+ /* [1] fwd enum64 'e1' before full enum */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [2] full enum64 'e1' after fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [3] full enum64 'e2' before fwd */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [4] fwd enum64 'e2' after full enum */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [5] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] full enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [2] full enum64 'e2' */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [3] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+},
+{
+ .descr = "dedup: enum and enum64, no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum of different size: no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum fwd to enum64",
+ .input = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] enum 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: enum64 fwd to enum",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration struct",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration union",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ * Same as "dedup: standalone fwd declaration struct" but for unions.
+ *
+ * // CU 1:
+ * union foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_TBD, 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration wrong kind",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration name conflict",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - two candidates for CU2:foo dedup, thus it is unchanged;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ *
+ * // CU 3:
+ * struct foo { int x; int y; };
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
},
},
-
};
static int btf_type_size(const struct btf_type *t)
@@ -6630,11 +8103,15 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
@@ -6646,6 +8123,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(struct btf_var);
case BTF_KIND_DATASEC:
return base_size + vlen * sizeof(struct btf_var_secinfo);
+ case BTF_KIND_DECL_TAG:
+ return base_size + sizeof(struct btf_decl_tag);
default:
fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
return -EINVAL;
@@ -6666,7 +8145,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
static void do_test_dedup(unsigned int test_num)
{
- const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
+ struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
const struct btf_header *test_hdr, *expect_hdr;
struct btf *test_btf = NULL, *expect_btf = NULL;
@@ -6689,9 +8168,9 @@ static void do_test_dedup(unsigned int test_num)
return;
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(test_btf);
free(raw_btf);
- if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
- PTR_ERR(test_btf))) {
+ if (CHECK(err, "invalid test_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6703,21 +8182,22 @@ static void do_test_dedup(unsigned int test_num)
if (!raw_btf)
return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(expect_btf);
free(raw_btf);
- if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
- PTR_ERR(expect_btf))) {
+ if (CHECK(err, "invalid expect_btf errno:%d", err)) {
err = -1;
goto done;
}
- err = btf__dedup(test_btf, NULL, &test->opts);
+ test->opts.sz = sizeof(test->opts);
+ err = btf__dedup(test_btf, &test->opts);
if (CHECK(err, "btf_dedup failed errno:%d", err)) {
err = -1;
goto done;
}
- test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
- expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
+ test_btf_data = btf__raw_data(test_btf, &test_btf_size);
+ expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size);
if (CHECK(test_btf_size != expect_btf_size,
"test_btf_size:%u != expect_btf_size:%u",
test_btf_size, expect_btf_size)) {
@@ -6771,8 +8251,8 @@ static void do_test_dedup(unsigned int test_num)
expect_str_cur += expect_len + 1;
}
- test_nr_types = btf__get_nr_types(test_btf);
- expect_nr_types = btf__get_nr_types(expect_btf);
+ test_nr_types = btf__type_cnt(test_btf);
+ expect_nr_types = btf__type_cnt(expect_btf);
if (CHECK(test_nr_types != expect_nr_types,
"test_nr_types:%u != expect_nr_types:%u",
test_nr_types, expect_nr_types)) {
@@ -6780,7 +8260,7 @@ static void do_test_dedup(unsigned int test_num)
goto done;
}
- for (i = 1; i <= test_nr_types; i++) {
+ for (i = 1; i < test_nr_types; i++) {
const struct btf_type *test_type, *expect_type;
int test_size, expect_size;
@@ -6816,10 +8296,8 @@ static void do_test_dedup(unsigned int test_num)
}
done:
- if (!IS_ERR(test_btf))
- btf__free(test_btf);
- if (!IS_ERR(expect_btf))
- btf__free(expect_btf);
+ btf__free(test_btf);
+ btf__free(expect_btf);
}
void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index 64554fd33547..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -92,7 +92,7 @@ struct s2 {\n\
int *f3;\n\
};\n\n", "c_dump");
- err = btf__dedup(btf2, NULL, NULL);
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -143,6 +143,10 @@ static void test_split_fwd_resolve() {
btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */
btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
/* } */
+ /* keep this not a part of type the graph to test btf_dedup_resolve_fwds */
+ btf__add_struct(btf1, "s3", 4); /* [6] struct s3 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
VALIDATE_RAW_BTF(
btf1,
@@ -153,20 +157,24 @@ static void test_split_fwd_resolve() {
"\t'f1' type_id=2 bits_offset=0\n"
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0");
btf2 = btf__new_empty_split(btf1);
if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
goto cleanup;
- btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */
- btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */
- btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */
- btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */
- btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */
- btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */
- btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */
+ btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [7] int */
+ btf__add_ptr(btf2, 11); /* [8] ptr to struct s1 */
+ btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [9] fwd for struct s2 */
+ btf__add_ptr(btf2, 9); /* [10] ptr to fwd struct s2 */
+ btf__add_struct(btf2, "s1", 16); /* [11] struct s1 { */
+ btf__add_field(btf2, "f1", 8, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf2, "f2", 10, 64, 0); /* struct s2 *f2; */
/* } */
+ btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT); /* [12] fwd for struct s3 */
+ btf__add_ptr(btf2, 12); /* [13] ptr to struct s1 */
VALIDATE_RAW_BTF(
btf2,
@@ -178,15 +186,19 @@ static void test_split_fwd_resolve() {
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
"\t'f1' type_id=1 bits_offset=0",
- "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
- "[7] PTR '(anon)' type_id=10",
- "[8] FWD 's2' fwd_kind=struct",
- "[9] PTR '(anon)' type_id=8",
- "[10] STRUCT 's1' size=16 vlen=2\n"
- "\t'f1' type_id=7 bits_offset=0\n"
- "\t'f2' type_id=9 bits_offset=64");
-
- err = btf__dedup(btf2, NULL, NULL);
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[8] PTR '(anon)' type_id=11",
+ "[9] FWD 's2' fwd_kind=struct",
+ "[10] PTR '(anon)' type_id=9",
+ "[11] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=8 bits_offset=0\n"
+ "\t'f2' type_id=10 bits_offset=64",
+ "[12] FWD 's3' fwd_kind=struct",
+ "[13] PTR '(anon)' type_id=12");
+
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -199,7 +211,10 @@ static void test_split_fwd_resolve() {
"\t'f1' type_id=2 bits_offset=0\n"
"\t'f2' type_id=3 bits_offset=64",
"[5] STRUCT 's2' size=4 vlen=1\n"
- "\t'f1' type_id=1 bits_offset=0");
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] PTR '(anon)' type_id=6");
cleanup:
btf__free(btf2);
@@ -283,7 +298,7 @@ static void test_split_struct_duped() {
"[13] STRUCT 's3' size=8 vlen=1\n"
"\t'f1' type_id=12 bits_offset=0");
- err = btf__dedup(btf2, NULL, NULL);
+ err = btf__dedup(btf2, NULL);
if (!ASSERT_OK(err, "btf_dedup"))
goto cleanup;
@@ -314,6 +329,216 @@ cleanup:
btf__free(btf1);
}
+static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id)
+{
+#define ID(n) (start_id + n)
+ btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+
+ btf__add_struct(btf, "s", 8); /* [2] struct s { */
+ btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */
+ btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+#undef ID
+}
+
+static void test_split_dup_struct_in_cu()
+{
+ struct btf *btf1, *btf2 = NULL;
+ int err;
+
+ /* generate the base data.. */
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf_add_dup_struct_in_cu(btf1, 0);
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=4 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* ..dedup them... */
+ err = btf__dedup(btf1, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* and add the same data on top of it */
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf_add_dup_struct_in_cu(btf2, 3);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[5] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=6 bits_offset=0\n"
+ "\t'b' type_id=7 bits_offset=0",
+ "[6] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32",
+ "[7] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ /* after dedup it should match the original data */
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ * struct bin_attribute *bin_attr,
+ * char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+ "bpf_testmod_test_write",
+ "bpf_kfunc_call_test3",
+ "bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+ struct btf *vmlinux_btf, *btf1 = NULL;
+ int i, nr_base_types;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+ return;
+ nr_base_types = btf__type_cnt(vmlinux_btf);
+ if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+ goto cleanup;
+
+ btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+ if (!ASSERT_OK_PTR(btf1, "split_btf"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+ const struct btf_param *p;
+ const struct btf_type *t;
+ __u16 vlen;
+ __u32 id;
+ int j;
+
+ id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+ if (!ASSERT_GE(id, nr_base_types, "func_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "func_id_type"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, t->type);
+ if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+ goto cleanup;
+ if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+ goto cleanup;
+ vlen = btf_vlen(t);
+
+ for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+ /* bpf_testmod uses resilient split BTF, so any
+ * reference types will be added to split BTF and their
+ * associated targets will be base BTF types; for example
+ * for a "struct sock *" the PTR will be in split BTF
+ * while the "struct sock" will be in base.
+ *
+ * In some cases like loff_t we have to resolve
+ * multiple typedefs hence the while() loop below.
+ *
+ * Note that resilient split BTF generation depends
+ * on pahole version, so we do not assert that
+ * reference types are in split BTF, as if pahole
+ * does not support resilient split BTF they will
+ * also be base BTF types.
+ */
+ id = p->type;
+ do {
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "param_ref_type"))
+ goto cleanup;
+ if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+ break;
+ id = t->type;
+ } while (true);
+
+ if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+ goto cleanup;
+ }
+ }
+cleanup:
+ btf__free(btf1);
+ btf__free(vmlinux_btf);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -322,4 +547,8 @@ void test_btf_dedup_split()
test_split_struct_duped();
if (test__start_subtest("split_fwd_resolve"))
test_split_fwd_resolve();
+ if (test__start_subtest("split_dup_struct_in_cu"))
+ test_split_dup_struct_in_cu();
+ if (test__start_subtest("split_module"))
+ test_split_module();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
new file mode 100644
index 000000000000..fb67ae195a73
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+/* Fabricate base, split BTF with references to base types needed; then create
+ * split BTF with distilled base BTF and ensure expectations are met:
+ * - only referenced base types from split BTF are present
+ * - struct/union/enum are represented as empty unless anonymous, when they
+ * are represented in full in split BTF
+ */
+static void test_distilled_base(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 1); /* [2] ptr to int */
+ btf__add_struct(btf1, "s1", 8); /* [3] struct s1 { */
+ btf__add_field(btf1, "f1", 2, 0, 0); /* int *f1; */
+ /* } */
+ btf__add_struct(btf1, "", 12); /* [4] struct { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 3, 32, 0); /* struct s1 f2; */
+ /* } */
+ btf__add_int(btf1, "unsigned int", 4, 0); /* [5] unsigned int */
+ btf__add_union(btf1, "u1", 12); /* [6] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 2, 0, 0); /* int *f2; */
+ /* } */
+ btf__add_union(btf1, "", 4); /* [7] union { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_enum(btf1, "e1", 4); /* [8] enum e1 { */
+ btf__add_enum_value(btf1, "v1", 1); /* v1 = 1; */
+ /* } */
+ btf__add_enum(btf1, "", 4); /* [9] enum { */
+ btf__add_enum_value(btf1, "av1", 2); /* av1 = 2; */
+ /* } */
+ btf__add_enum64(btf1, "e641", 8, true); /* [10] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1024); /* v1 = 1024; */
+ /* } */
+ btf__add_enum64(btf1, "", 8, true); /* [11] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1025); /* v1 = 1025; */
+ /* } */
+ btf__add_struct(btf1, "unneeded", 4); /* [12] struct unneeded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_struct(btf1, "embedded", 4); /* [13] struct embedded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_func_proto(btf1, 1); /* [14] int (*)(int *p1); */
+ btf__add_func_param(btf1, "p1", 1);
+
+ btf__add_array(btf1, 1, 1, 3); /* [15] int [3]; */
+
+ btf__add_struct(btf1, "from_proto", 4); /* [16] struct from_proto { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_union(btf1, "u1", 4); /* [17] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_ptr(btf2, 3); /* [18] ptr to struct s1 */
+ /* add ptr to struct anon */
+ btf__add_ptr(btf2, 4); /* [19] ptr to struct (anon) */
+ btf__add_const(btf2, 6); /* [20] const union u1 */
+ btf__add_restrict(btf2, 7); /* [21] restrict union (anon) */
+ btf__add_volatile(btf2, 8); /* [22] volatile enum e1 */
+ btf__add_typedef(btf2, "et", 9); /* [23] typedef enum (anon) */
+ btf__add_const(btf2, 10); /* [24] const enum64 e641 */
+ btf__add_ptr(btf2, 11); /* [25] restrict enum64 (anon) */
+ btf__add_struct(btf2, "with_embedded", 4); /* [26] struct with_embedded { */
+ btf__add_field(btf2, "f1", 13, 0, 0); /* struct embedded f1; */
+ /* } */
+ btf__add_func(btf2, "fn", BTF_FUNC_STATIC, 14); /* [27] int fn(int p1); */
+ btf__add_typedef(btf2, "arraytype", 15); /* [28] typedef int[3] foo; */
+ btf__add_func_proto(btf2, 1); /* [29] int (*)(struct from proto p1); */
+ btf__add_func_param(btf2, "p1", 16);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=4",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=7",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=9",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=11",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=14 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=15",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16");
+
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(8, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's1' size=8 vlen=0",
+ "[3] UNION 'u1' size=12 vlen=0",
+ "[4] ENUM 'e1' encoding=UNSIGNED size=4 vlen=0",
+ "[5] ENUM 'e641' encoding=UNSIGNED size=8 vlen=0",
+ "[6] STRUCT 'embedded' size=4 vlen=0",
+ "[7] STRUCT 'from_proto' size=4 vlen=0",
+ /* split BTF; these types should match split BTF above from 17-28, with
+ * updated type id references
+ */
+ "[8] PTR '(anon)' type_id=2",
+ "[9] PTR '(anon)' type_id=20",
+ "[10] CONST '(anon)' type_id=3",
+ "[11] RESTRICT '(anon)' type_id=21",
+ "[12] VOLATILE '(anon)' type_id=4",
+ "[13] TYPEDEF 'et' type_id=22",
+ "[14] CONST '(anon)' type_id=5",
+ "[15] PTR '(anon)' type_id=23",
+ "[16] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=6 bits_offset=0",
+ "[17] FUNC 'fn' type_id=24 linkage=static",
+ "[18] TYPEDEF 'arraytype' type_id=25",
+ "[19] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=7",
+ /* split BTF types added from original base BTF below */
+ "[20] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=32",
+ "[21] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[22] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[23] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[24] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[25] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=30",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=31",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=32",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=33",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=34 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=35",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16",
+ /* below here are (duplicate) anon base types added by distill
+ * process to split BTF.
+ */
+ "[30] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[31] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[32] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[33] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[34] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[35] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* ensure we can cope with multiple types with the same name in
+ * distilled base BTF. In this case because sizes are different,
+ * we can still disambiguate them.
+ */
+static void test_distilled_base_multi(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* If a needed type is not present in the base BTF we wish to relocate
+ * with, btf__relocate() should error our.
+ */
+static void test_distilled_base_missing_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ goto cleanup;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in distilled base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf1), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err2(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ goto cleanup;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* create split reference BTF from vmlinux + split BTF with a few type references;
+ * ensure the resultant split reference BTF is as expected, containing only types
+ * needed to disambiguate references from split BTF.
+ */
+static void test_distilled_base_vmlinux(void)
+{
+ struct btf *split_btf = NULL, *vmlinux_btf = btf__load_vmlinux_btf();
+ struct btf *split_dist = NULL, *base_dist = NULL;
+ __s32 int_id, myint_id;
+
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux"))
+ return;
+ int_id = btf__find_by_name_kind(vmlinux_btf, "int", BTF_KIND_INT);
+ if (!ASSERT_GT(int_id, 0, "find_int"))
+ goto cleanup;
+ split_btf = btf__new_empty_split(vmlinux_btf);
+ if (!ASSERT_OK_PTR(split_btf, "new_split"))
+ goto cleanup;
+ myint_id = btf__add_typedef(split_btf, "myint", int_id);
+ btf__add_ptr(split_btf, myint_id);
+
+ if (!ASSERT_EQ(btf__distill_base(split_btf, &base_dist, &split_dist), 0,
+ "distill_vmlinux_base"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(split_dist, "split_distilled") ||
+ !ASSERT_OK_PTR(base_dist, "base_dist"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ split_dist,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] TYPEDEF 'myint' type_id=1",
+ "[3] PTR '(anon)' type_id=2");
+
+cleanup:
+ btf__free(split_dist);
+ btf__free(base_dist);
+ btf__free(split_btf);
+ btf__free(vmlinux_btf);
+}
+
+/* Split and new base BTFs should inherit endianness from source BTF. */
+static void test_distilled_endianness(void)
+{
+ struct btf *base = NULL, *split = NULL, *new_base = NULL, *new_split = NULL;
+ struct btf *new_base1 = NULL, *new_split1 = NULL;
+ enum btf_endianness inverse_endianness;
+ const void *raw_data;
+ __u32 size;
+
+ base = btf__new_empty();
+ if (!ASSERT_OK_PTR(base, "empty_main_btf"))
+ return;
+ inverse_endianness = btf__endianness(base) == BTF_LITTLE_ENDIAN ? BTF_BIG_ENDIAN
+ : BTF_LITTLE_ENDIAN;
+ btf__set_endianness(base, inverse_endianness);
+ btf__add_int(base, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ base,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ split = btf__new_empty_split(base);
+ if (!ASSERT_OK_PTR(split, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(split, 1);
+ VALIDATE_RAW_BTF(
+ split,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ if (!ASSERT_EQ(0, btf__distill_base(split, &new_base, &new_split),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(new_base, "distilled_base") ||
+ !ASSERT_OK_PTR(new_split, "distilled_split") ||
+ !ASSERT_EQ(2, btf__type_cnt(new_base), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ new_split,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+
+ raw_data = btf__raw_data(new_base, &size);
+ if (!ASSERT_OK_PTR(raw_data, "btf__raw_data #1"))
+ goto cleanup;
+ new_base1 = btf__new(raw_data, size);
+ if (!ASSERT_OK_PTR(new_base1, "new_base1 = btf__new()"))
+ goto cleanup;
+ raw_data = btf__raw_data(new_split, &size);
+ if (!ASSERT_OK_PTR(raw_data, "btf__raw_data #2"))
+ goto cleanup;
+ new_split1 = btf__new_split(raw_data, size, new_base1);
+ if (!ASSERT_OK_PTR(new_split1, "new_split1 = btf__new()"))
+ goto cleanup;
+
+ ASSERT_EQ(btf__endianness(new_base1), inverse_endianness, "new_base1 endianness");
+ ASSERT_EQ(btf__endianness(new_split1), inverse_endianness, "new_split1 endianness");
+ VALIDATE_RAW_BTF(
+ new_split1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+cleanup:
+ btf__free(new_split1);
+ btf__free(new_base1);
+ btf__free(new_split);
+ btf__free(new_base);
+ btf__free(split);
+ btf__free(base);
+}
+
+/* If a needed composite type, which is the member of composite type
+ * in the split BTF, has a different size in the base BTF we wish to
+ * relocate with, btf__relocate() should error out.
+ */
+static void test_distilled_base_embedded_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_struct(btf1, "s1", 4); /* [2] struct s1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_struct(btf2, "with_embedded", 8); /* [3] struct with_embedded { */
+ btf__add_field(btf2, "e1", 2, 0, 0); /* struct s1 e1; */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[3] STRUCT 'with_embedded' size=8 vlen=1\n"
+ "\t'e1' type_id=2 bits_offset=0");
+
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] STRUCT 's1' size=4 vlen=0",
+ "[2] STRUCT 'with_embedded' size=8 vlen=1\n"
+ "\t'e1' type_id=1 bits_offset=0");
+
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ goto cleanup;
+
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ /* struct with the same name but different size */
+ btf__add_struct(btf5, "s1", 8); /* [2] struct s1 { */
+ btf__add_field(btf5, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+void test_btf_distill(void)
+{
+ if (test__start_subtest("distilled_base"))
+ test_distilled_base();
+ if (test__start_subtest("distilled_base_multi"))
+ test_distilled_base_multi();
+ if (test__start_subtest("distilled_base_missing_err"))
+ test_distilled_base_missing_err();
+ if (test__start_subtest("distilled_base_multi_err"))
+ test_distilled_base_multi_err();
+ if (test__start_subtest("distilled_base_multi_err2"))
+ test_distilled_base_multi_err2();
+ if (test__start_subtest("distilled_base_embedded_err"))
+ test_distilled_base_embedded_err();
+ if (test__start_subtest("distilled_base_vmlinux"))
+ test_distilled_base_vmlinux();
+ if (test__start_subtest("distilled_endianness"))
+ test_distilled_endianness();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c60091ee8a21..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -13,29 +13,28 @@ static struct btf_dump_test_case {
const char *name;
const char *file;
bool known_ptr_sz;
- struct btf_dump_opts opts;
} btf_dump_test_cases[] = {
- {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}},
- {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}},
- {"btf_dump: padding", "btf_dump_test_case_padding", true, {}},
- {"btf_dump: packing", "btf_dump_test_case_packing", true, {}},
- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}},
- {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}},
- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}},
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", true},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", false},
+ {"btf_dump: padding", "btf_dump_test_case_padding", true},
+ {"btf_dump: packing", "btf_dump_test_case_packing", true},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", false},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false},
};
-static int btf_dump_all_types(const struct btf *btf,
- const struct btf_dump_opts *opts)
+static int btf_dump_all_types(const struct btf *btf, void *ctx)
{
- size_t type_cnt = btf__get_nr_types(btf);
+ size_t type_cnt = btf__type_cnt(btf);
struct btf_dump *d;
int err = 0, id;
- d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ d = btf_dump__new(btf, btf_dump_printf, ctx, NULL);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
- for (id = 1; id <= type_cnt; id++) {
+ for (id = 1; id < type_cnt; id++) {
err = btf_dump__dump_type(d, id);
if (err)
goto done;
@@ -53,11 +52,10 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
int err = 0, fd = -1;
FILE *f = NULL;
- snprintf(test_file, sizeof(test_file), "%s.o", t->file);
+ snprintf(test_file, sizeof(test_file), "%s.bpf.o", t->file);
btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf), "btf_parse_elf",
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
err = -PTR_ERR(btf);
btf = NULL;
goto done;
@@ -65,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
* so it's impossible to determine correct pointer size; but if they
- * do, it should be 8 regardless of host architecture, becaues BPF
+ * do, it should be 8 regardless of host architecture, because BPF
* target is always 64-bit
*/
if (!t->known_ptr_sz) {
@@ -77,7 +75,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
fd = mkstemp(out_file);
- if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+ if (!ASSERT_GE(fd, 0, "create_tmp")) {
err = fd;
goto done;
}
@@ -88,8 +86,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
goto done;
}
- t->opts.ctx = f;
- err = btf_dump_all_types(btf, &t->opts);
+ err = btf_dump_all_types(btf, f);
fclose(f);
close(fd);
if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) {
@@ -129,60 +126,118 @@ done:
return err;
}
-static char *dump_buf;
-static size_t dump_buf_sz;
-static FILE *dump_buf_file;
+struct test_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *dump_buf;
+ size_t dump_buf_sz;
+ FILE *dump_buf_file;
+};
-void test_btf_dump_incremental(void)
+static void test_ctx__free(struct test_ctx *t)
{
- struct btf *btf = NULL;
- struct btf_dump *d = NULL;
- struct btf_dump_opts opts;
- int id, err, i;
+ fclose(t->dump_buf_file);
+ free(t->dump_buf);
+ btf_dump__free(t->d);
+ btf__free(t->btf);
+}
- dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
- if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
- return;
- btf = btf__new_empty();
- if (!ASSERT_OK_PTR(btf, "new_empty"))
+static int test_ctx__init(struct test_ctx *t)
+{
+ t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz);
+ if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream"))
+ return -1;
+ t->btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(t->btf, "new_empty"))
goto err_out;
- opts.ctx = dump_buf_file;
- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
- if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL);
+ if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new"))
goto err_out;
+ return 0;
+
+err_out:
+ test_ctx__free(t);
+ return -1;
+}
+
+static void test_ctx__dump_and_compare(struct test_ctx *t,
+ const char *expected_output,
+ const char *message)
+{
+ int i, err;
+
+ for (i = 1; i < btf__type_cnt(t->btf); i++) {
+ err = btf_dump__dump_type(t->d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(t->dump_buf_file);
+ t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+ ASSERT_STREQ(t->dump_buf, expected_output, message);
+}
+
+static void test_btf_dump_incremental(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
/* First, generate BTF corresponding to the following C code:
*
- * enum { VAL = 1 };
+ * enum x;
+ *
+ * enum x { X = 1 };
+ *
+ * enum { Y = 1 };
+ *
+ * struct s;
*
* struct s { int x; };
*
*/
+ id = btf__add_enum(btf, "x", 4);
+ ASSERT_EQ(id, 1, "enum_declaration_id");
+ id = btf__add_enum(btf, "x", 4);
+ ASSERT_EQ(id, 2, "named_enum_id");
+ err = btf__add_enum_value(btf, "X", 1);
+ ASSERT_OK(err, "named_enum_val_ok");
+
id = btf__add_enum(btf, NULL, 4);
- ASSERT_EQ(id, 1, "enum_id");
- err = btf__add_enum_value(btf, "VAL", 1);
- ASSERT_OK(err, "enum_val_ok");
+ ASSERT_EQ(id, 3, "anon_enum_id");
+ err = btf__add_enum_value(btf, "Y", 1);
+ ASSERT_OK(err, "anon_enum_val_ok");
id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
- ASSERT_EQ(id, 2, "int_id");
+ ASSERT_EQ(id, 4, "int_id");
+
+ id = btf__add_fwd(btf, "s", BTF_FWD_STRUCT);
+ ASSERT_EQ(id, 5, "fwd_id");
id = btf__add_struct(btf, "s", 4);
- ASSERT_EQ(id, 3, "struct_id");
- err = btf__add_field(btf, "x", 2, 0, 0);
+ ASSERT_EQ(id, 6, "struct_id");
+ err = btf__add_field(btf, "x", 4, 0, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i <= btf__get_nr_types(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
+"enum x;\n"
+"\n"
+"enum x {\n"
+" X = 1,\n"
+"};\n"
+"\n"
"enum {\n"
-" VAL = 1,\n"
+" Y = 1,\n"
"};\n"
"\n"
+"struct s;\n"
+"\n"
"struct s {\n"
" int x;\n"
"};\n\n", "c_dump1");
@@ -201,38 +256,797 @@ void test_btf_dump_incremental(void)
* enum values don't conflict;
*
*/
- fseek(dump_buf_file, 0, SEEK_SET);
+ fseek(t.dump_buf_file, 0, SEEK_SET);
id = btf__add_struct(btf, "s", 4);
- ASSERT_EQ(id, 4, "struct_id");
- err = btf__add_field(btf, "x", 1, 0, 0);
+ ASSERT_EQ(id, 7, "struct_id");
+ err = btf__add_field(btf, "x", 2, 0, 0);
ASSERT_OK(err, "field_ok");
- err = btf__add_field(btf, "s", 3, 32, 0);
+ err = btf__add_field(btf, "y", 3, 32, 0);
+ ASSERT_OK(err, "field_ok");
+ err = btf__add_field(btf, "s", 6, 64, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i <= btf__get_nr_types(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"struct s___2 {\n"
+" enum x x;\n"
" enum {\n"
-" VAL___2 = 1,\n"
-" } x;\n"
+" Y___2 = 1,\n"
+" } y;\n"
" struct s s;\n"
"};\n\n" , "c_dump1");
-err_out:
- fclose(dump_buf_file);
- free(dump_buf);
+ test_ctx__free(&t);
+}
+
+static void test_btf_dump_type_tags(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
+ /* Generate BTF corresponding to the following C code:
+ *
+ * struct s {
+ * void __attribute__((btf_type_tag(\"void_tag\"))) *p1;
+ * void __attribute__((void_attr)) *p2;
+ * };
+ *
+ */
+
+ id = btf__add_type_tag(btf, "void_tag", 0);
+ ASSERT_EQ(id, 1, "type_tag_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 2, "void_ptr_id1");
+
+ id = btf__add_type_attr(btf, "void_attr", 0);
+ ASSERT_EQ(id, 3, "type_attr_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 4, "void_ptr_id2");
+
+ id = btf__add_struct(btf, "s", 8);
+ ASSERT_EQ(id, 5, "struct_id");
+ err = btf__add_field(btf, "p1", 2, 0, 0);
+ ASSERT_OK(err, "field_ok1");
+ err = btf__add_field(btf, "p2", 4, 0, 0);
+ ASSERT_OK(err, "field_ok2");
+
+ test_ctx__dump_and_compare(&t,
+"struct s {\n"
+" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n"
+" void __attribute__((void_attr)) *p2;\n"
+"};\n\n", "dump_and_compare");
+
+ test_ctx__free(&t);
+}
+
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ unsigned __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ /* gcc encode unsigned __int128 type with name "__int128 unsigned" in dwarf,
+ * and clang encode it with name "unsigned __int128" in dwarf.
+ * Do an availability test for either variant before doing actual test.
+ */
+ if (btf__find_by_name(btf, "unsigned __int128") > 0) {
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, unsigned __int128, BTF_F_COMPACT,
+ "(unsigned __int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "unsigned __int128", NULL, 0, &i, 16, str,
+ "(unsigned __int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump unsigned __int128");
+ } else if (btf__find_by_name(btf, "__int128 unsigned") > 0) {
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128 unsigned, BTF_F_COMPACT,
+ "(__int128 unsigned)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128 unsigned", NULL, 0, &i, 16, str,
+ "(__int128 unsigned)0xfffffffffffffffffffffffffffffffe"),
+ "dump unsigned __int128");
+ } else {
+ ASSERT_TRUE(false, "unsigned_int128_not_found");
+ }
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .fop_flags = (fop_flags_t)4294967295,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}",
+ { .cgroup = { .order = 1, .cgroup_fd = 1, }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words overflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_hwtstamp = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_hwtstamp = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#if 0
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+#endif
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT,
+ "static int bpf_cgrp_storage_busy = (int)2", 2);
+}
+
+struct btf_dump_string_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *str;
+ struct btf_dump_type_data_opts *opts;
+ int array_id;
+};
+
+static int btf_dump_one_string(struct btf_dump_string_ctx *ctx,
+ char *ptr, size_t ptr_sz,
+ const char *expected_val)
+{
+ size_t type_sz;
+ int ret;
+
+ ctx->str[0] = '\0';
+ type_sz = btf__resolve_size(ctx->btf, ctx->array_id);
+ ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+static void btf_dump_strings(struct btf_dump_string_ctx *ctx)
+{
+ struct btf_dump_type_data_opts *opts = ctx->opts;
+
+ opts->emit_strings = true;
+
+ opts->compact = true;
+ opts->emit_zeroes = false;
+
+ opts->skip_names = false;
+ btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\"");
+
+ opts->skip_names = true;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->emit_zeroes = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->compact = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* Non-printable characters come out as hex. */
+ btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\"");
+ btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\"");
+
+ /*
+ * Strings that are too long for the specified type ("char[4]")
+ * should fall back to the current behavior.
+ */
+ opts->compact = true;
+ btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]");
+
+ /*
+ * Strings that are too short for the specified type ("char[4]")
+ * should work normally.
+ */
+ btf_dump_one_string(ctx, "ab", 3, "\"ab\"");
+
+ /* Non-NUL-terminated arrays don't get printed as strings. */
+ char food[4] = { 'f', 'o', 'o', 'd' };
+ char bye[3] = { 'b', 'y', 'e' };
+
+ btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]");
+ btf_dump_one_string(ctx, bye, 3, "['b','y','e',]");
+
+ /* The embedded NUL should terminate the string. */
+ char embed[4] = { 'f', 'o', '\0', 'd' };
+
+ btf_dump_one_string(ctx, embed, 4, "\"fo\"");
+}
+
+static void test_btf_dump_string_data(void)
+{
+ struct test_ctx t = {};
+ char str[STRSIZE];
+ struct btf_dump *d;
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ struct btf_dump_string_ctx ctx;
+ int char_id, int_id, array_id;
+
+ if (test_ctx__init(&t))
+ return;
+
+ d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Generate BTF for a four-element char array. */
+ char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR);
+ ASSERT_EQ(char_id, 1, "char_id");
+ int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(int_id, 2, "int_id");
+ array_id = btf__add_array(t.btf, int_id, char_id, 4);
+ ASSERT_EQ(array_id, 3, "array_id");
+
+ ctx.btf = t.btf;
+ ctx.d = d;
+ ctx.str = str;
+ ctx.opts = &opts;
+ ctx.array_id = array_id;
+
+ btf_dump_strings(&ctx);
+
+ btf_dump__free(d);
+ test_ctx__free(&t);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf;
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ btf = btf__parse("xdping_kern.bpf.o", NULL);
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.bpf.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ goto out;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+out:
btf_dump__free(d);
btf__free(btf);
}
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +1059,38 @@ void test_btf_dump() {
}
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+
+ if (test__start_subtest("btf_dump: type_tags"))
+ test_btf_dump_type_tags();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ if (test__start_subtest("btf_dump: string_data"))
+ test_btf_dump_string_data();
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
index 8c52d72c876e..5b9f84dbeb43 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -6,15 +6,13 @@
#include <test_progs.h>
#include <bpf/btf.h>
-static int duration = 0;
-
void test_btf_endian() {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
enum btf_endianness endian = BTF_LITTLE_ENDIAN;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
enum btf_endianness endian = BTF_BIG_ENDIAN;
#else
-#error "Unrecognized __BYTE_ORDER"
+#error "Unrecognized __BYTE_ORDER__"
#endif
enum btf_endianness swap_endian = 1 - endian;
struct btf *btf = NULL, *swap_btf = NULL;
@@ -25,7 +23,7 @@ void test_btf_endian() {
int var_id;
/* Load BTF in native endianness */
- btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+ btf = btf__parse_elf("btf_dump_test_case_syntax.bpf.o", NULL);
if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
goto err_out;
@@ -34,7 +32,7 @@ void test_btf_endian() {
ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
/* Get raw BTF data in non-native endianness... */
- raw_data = btf__get_raw_data(btf, &raw_sz);
+ raw_data = btf__raw_data(btf, &raw_sz);
if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
goto err_out;
@@ -44,9 +42,9 @@ void test_btf_endian() {
goto err_out;
ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
- ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+ ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
- swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
goto err_out;
@@ -60,7 +58,7 @@ void test_btf_endian() {
/* swap it back to native endianness */
btf__set_endianness(swap_btf, endian);
- swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
goto err_out;
@@ -71,13 +69,13 @@ void test_btf_endian() {
/* now modify original BTF */
var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
- CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+ ASSERT_GT(var_id, 0, "var_id");
btf__free(swap_btf);
swap_btf = NULL;
btf__set_endianness(btf, swap_endian);
- raw_data = btf__get_raw_data(btf, &raw_sz);
+ raw_data = btf__raw_data(btf, &raw_sz);
if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
goto err_out;
@@ -87,7 +85,7 @@ void test_btf_endian() {
goto err_out;
ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
- ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+ ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
/* the type should appear as if it was stored in native endianness */
t = btf__type_by_id(swap_btf, var_id);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c
new file mode 100644
index 000000000000..32159d3eb281
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+#include "bpf/libbpf_internal.h"
+
+struct field_data {
+ __u32 ids[5];
+ const char *strs[5];
+} fields[] = {
+ { .ids = {}, .strs = {} },
+ { .ids = {}, .strs = { "int" } },
+ { .ids = {}, .strs = { "int64" } },
+ { .ids = { 1 }, .strs = { "" } },
+ { .ids = { 2, 1 }, .strs = { "" } },
+ { .ids = { 3, 1 }, .strs = { "s1", "f1", "f2" } },
+ { .ids = { 1, 5 }, .strs = { "u1", "f1", "f2" } },
+ { .ids = {}, .strs = { "e1", "v1", "v2" } },
+ { .ids = {}, .strs = { "fw1" } },
+ { .ids = { 1 }, .strs = { "t" } },
+ { .ids = { 2 }, .strs = { "" } },
+ { .ids = { 1 }, .strs = { "" } },
+ { .ids = { 3 }, .strs = { "" } },
+ { .ids = { 1, 1, 3 }, .strs = { "", "p1", "p2" } },
+ { .ids = { 13 }, .strs = { "func" } },
+ { .ids = { 1 }, .strs = { "var1" } },
+ { .ids = { 3 }, .strs = { "var2" } },
+ { .ids = {}, .strs = { "float" } },
+ { .ids = { 11 }, .strs = { "decltag" } },
+ { .ids = { 6 }, .strs = { "typetag" } },
+ { .ids = {}, .strs = { "e64", "eval1", "eval2", "eval3" } },
+ { .ids = { 15, 16 }, .strs = { "datasec1" } }
+
+};
+
+/* Fabricate BTF with various types and check BTF field iteration finds types,
+ * strings expected.
+ */
+void test_btf_field_iter(void)
+{
+ struct btf *btf = NULL;
+ int id;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf, "int64", 8, BTF_INT_SIGNED); /* [2] int64 */
+ btf__add_ptr(btf, 1); /* [3] int * */
+ btf__add_array(btf, 1, 2, 3); /* [4] int64[3] */
+ btf__add_struct(btf, "s1", 12); /* [5] struct s1 { */
+ btf__add_field(btf, "f1", 3, 0, 0); /* int *f1; */
+ btf__add_field(btf, "f2", 1, 0, 0); /* int f2; */
+ /* } */
+ btf__add_union(btf, "u1", 12); /* [6] union u1 { */
+ btf__add_field(btf, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", 5, 0, 0); /* struct s1 f2; */
+ /* } */
+ btf__add_enum(btf, "e1", 4); /* [7] enum e1 { */
+ btf__add_enum_value(btf, "v1", 1); /* v1 = 1; */
+ btf__add_enum_value(btf, "v2", 2); /* v2 = 2; */
+ /* } */
+
+ btf__add_fwd(btf, "fw1", BTF_FWD_STRUCT); /* [8] struct fw1; */
+ btf__add_typedef(btf, "t", 1); /* [9] typedef int t; */
+ btf__add_volatile(btf, 2); /* [10] volatile int64; */
+ btf__add_const(btf, 1); /* [11] const int; */
+ btf__add_restrict(btf, 3); /* [12] restrict int *; */
+ btf__add_func_proto(btf, 1); /* [13] int (*)(int p1, int *p2); */
+ btf__add_func_param(btf, "p1", 1);
+ btf__add_func_param(btf, "p2", 3);
+
+ btf__add_func(btf, "func", BTF_FUNC_GLOBAL, 13);/* [14] int func(int p1, int *p2); */
+ btf__add_var(btf, "var1", BTF_VAR_STATIC, 1); /* [15] static int var1; */
+ btf__add_var(btf, "var2", BTF_VAR_STATIC, 3); /* [16] static int *var2; */
+ btf__add_float(btf, "float", 4); /* [17] float; */
+ btf__add_decl_tag(btf, "decltag", 11, -1); /* [18] decltag const int; */
+ btf__add_type_tag(btf, "typetag", 6); /* [19] typetag union u1; */
+ btf__add_enum64(btf, "e64", 8, true); /* [20] enum { */
+ btf__add_enum64_value(btf, "eval1", 1000); /* eval1 = 1000, */
+ btf__add_enum64_value(btf, "eval2", 2000); /* eval2 = 2000, */
+ btf__add_enum64_value(btf, "eval3", 3000); /* eval3 = 3000 */
+ /* } */
+ btf__add_datasec(btf, "datasec1", 12); /* [21] datasec datasec1 */
+ btf__add_datasec_var_info(btf, 15, 0, 4);
+ btf__add_datasec_var_info(btf, 16, 4, 8);
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int64' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=3",
+ "[5] STRUCT 's1' size=12 vlen=2\n"
+ "\t'f1' type_id=3 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=0",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=5 bits_offset=0",
+ "[7] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[8] FWD 'fw1' fwd_kind=struct",
+ "[9] TYPEDEF 't' type_id=1",
+ "[10] VOLATILE '(anon)' type_id=2",
+ "[11] CONST '(anon)' type_id=1",
+ "[12] RESTRICT '(anon)' type_id=3",
+ "[13] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=3",
+ "[14] FUNC 'func' type_id=13 linkage=global",
+ "[15] VAR 'var1' type_id=1, linkage=static",
+ "[16] VAR 'var2' type_id=3, linkage=static",
+ "[17] FLOAT 'float' size=4",
+ "[18] DECL_TAG 'decltag' type_id=11 component_idx=-1",
+ "[19] TYPE_TAG 'typetag' type_id=6",
+ "[20] ENUM64 'e64' encoding=SIGNED size=8 vlen=3\n"
+ "\t'eval1' val=1000\n"
+ "\t'eval2' val=2000\n"
+ "\t'eval3' val=3000",
+ "[21] DATASEC 'datasec1' size=12 vlen=2\n"
+ "\ttype_id=15 offset=0 size=4\n"
+ "\ttype_id=16 offset=4 size=8");
+
+ for (id = 1; id < btf__type_cnt(btf); id++) {
+ struct btf_type *t = btf_type_by_id(btf, id);
+ struct btf_field_iter it_strs, it_ids;
+ int str_idx = 0, id_idx = 0;
+ __u32 *next_str, *next_id;
+
+ if (!ASSERT_OK_PTR(t, "btf_type_by_id"))
+ break;
+ if (!ASSERT_OK(btf_field_iter_init(&it_strs, t, BTF_FIELD_ITER_STRS),
+ "iter_init_strs"))
+ break;
+ if (!ASSERT_OK(btf_field_iter_init(&it_ids, t, BTF_FIELD_ITER_IDS),
+ "iter_init_ids"))
+ break;
+ while ((next_str = btf_field_iter_next(&it_strs))) {
+ const char *str = btf__str_by_offset(btf, *next_str);
+
+ if (!ASSERT_OK(strcmp(fields[id].strs[str_idx], str), "field_str_match"))
+ break;
+ str_idx++;
+ }
+ /* ensure no more strings are expected */
+ ASSERT_EQ(fields[id].strs[str_idx], NULL, "field_str_cnt");
+
+ while ((next_id = btf_field_iter_next(&it_ids))) {
+ if (!ASSERT_EQ(*next_id, fields[id].ids[id_idx], "field_id_match"))
+ break;
+ id_idx++;
+ }
+ /* ensure no more ids are expected */
+ ASSERT_EQ(fields[id].ids[id_idx], 0, "field_id_cnt");
+ }
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index eb90a6b8850d..f66ceccd7029 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -14,7 +14,7 @@ static __u32 bpf_map_id(struct bpf_map *map)
int err;
memset(&info, 0, info_len);
- err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
if (err)
return 0;
return info.id;
@@ -25,7 +25,7 @@ static void test_lookup_update(void)
int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
struct test_btf_map_in_map *skel;
- int err, key = 0, val, i, fd;
+ int err, key = 0, val, i;
skel = test_btf_map_in_map__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -102,30 +102,6 @@ static void test_lookup_update(void)
CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n");
CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n");
- test_btf_map_in_map__destroy(skel);
- skel = NULL;
-
- /* we need to either wait for or force synchronize_rcu(), before
- * checking for "still exists" condition, otherwise map could still be
- * resolvable by ID, causing false positives.
- *
- * Older kernels (5.8 and earlier) freed map only after two
- * synchronize_rcu()s, so trigger two, to be entirely sure.
- */
- CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
- CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
-
- fd = bpf_map_get_fd_by_id(map1_id);
- if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) {
- close(fd);
- goto cleanup;
- }
- fd = bpf_map_get_fd_by_id(map2_id);
- if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) {
- close(fd);
- goto cleanup;
- }
-
cleanup:
test_btf_map_in_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 762f6a9da8b5..cf15cc3be491 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -9,6 +9,7 @@
#include <string.h>
#include <errno.h>
#include <sched.h>
+#include <net/if.h>
#include <linux/compiler.h>
#include <bpf/libbpf.h>
@@ -16,50 +17,37 @@
#include "test_progs.h"
#include "test_btf_skc_cls_ingress.skel.h"
-static struct test_btf_skc_cls_ingress *skel;
-static struct sockaddr_in6 srv_sa6;
-static __u32 duration;
+#define TEST_NS "skc_cls_ingress"
-#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
+#define BIT(n) (1 << (n))
+#define TEST_MODE_IPV4 BIT(0)
+#define TEST_MODE_IPV6 BIT(1)
+#define TEST_MODE_DUAL (TEST_MODE_IPV4 | TEST_MODE_IPV6)
-static int write_sysctl(const char *sysctl, const char *value)
-{
- int fd, err, len;
-
- fd = open(sysctl, O_WRONLY);
- if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
- sysctl, strerror(errno), errno))
- return -1;
-
- len = strlen(value);
- err = write(fd, value, len);
- close(fd);
- if (CHECK(err != len, "write sysctl",
- "write(%s, %s, %d): err:%d %s (%d)\n",
- sysctl, value, len, err, strerror(errno), errno))
- return -1;
-
- return 0;
-}
+#define SERVER_ADDR_IPV4 "127.0.0.1"
+#define SERVER_ADDR_IPV6 "::1"
+#define SERVER_ADDR_DUAL "::0"
+/* RFC791, 576 for minimal IPv4 datagram, minus 40 bytes of TCP header */
+#define MIN_IPV4_MSS 536
-static int prepare_netns(void)
+static struct netns_obj *prepare_netns(struct test_btf_skc_cls_ingress *skel)
{
- if (CHECK(unshare(CLONE_NEWNET), "create netns",
- "unshare(CLONE_NEWNET): %s (%d)",
- strerror(errno), errno))
- return -1;
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.cls_ingress));
+ struct netns_obj *ns = NULL;
- if (CHECK(system("ip link set dev lo up"),
- "ip link set dev lo up", "failed\n"))
- return -1;
+ ns = netns_new(TEST_NS, true);
+ if (!ASSERT_OK_PTR(ns, "create and join netns"))
+ return ns;
- if (CHECK(system("tc qdisc add dev lo clsact"),
- "tc qdisc add dev lo clsact", "failed\n"))
- return -1;
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ goto free_ns;
- if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
- "install tc cls-prog at ingress", "failed\n"))
- return -1;
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ goto free_ns;
/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
* bpf_tcp_gen_syncookie() helper.
@@ -67,168 +55,221 @@ static int prepare_netns(void)
if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
- return -1;
+ goto free_ns;
- return 0;
+ return ns;
+
+free_ns:
+ netns_free(ns);
+ return NULL;
}
-static void reset_test(void)
+static void reset_test(struct test_btf_skc_cls_ingress *skel)
{
+ memset(&skel->bss->srv_sa4, 0, sizeof(skel->bss->srv_sa4));
memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
skel->bss->listen_tp_sport = 0;
skel->bss->req_sk_sport = 0;
skel->bss->recv_cookie = 0;
skel->bss->gen_cookie = 0;
skel->bss->linum = 0;
+ skel->bss->mss = 0;
}
-static void print_err_line(void)
+static void print_err_line(struct test_btf_skc_cls_ingress *skel)
{
if (skel->bss->linum)
printf("bpf prog error at line %u\n", skel->bss->linum);
}
-static void test_conn(void)
+static int v6only_true(int fd, void *opts)
+{
+ int mode = true;
+
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
+}
+
+static int v6only_false(int fd, void *opts)
{
- int listen_fd = -1, cli_fd = -1, err;
- socklen_t addrlen = sizeof(srv_sa6);
+ int mode = false;
+
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
+}
+
+static void run_test(struct test_btf_skc_cls_ingress *skel, bool gen_cookies,
+ int ip_mode)
+{
+ const char *tcp_syncookies = gen_cookies ? "2" : "1";
+ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
+ struct network_helper_opts opts = { 0 };
+ struct sockaddr_storage *addr;
+ struct sockaddr_in6 srv_sa6;
+ struct sockaddr_in srv_sa4;
+ socklen_t addr_len;
+ int sock_family;
+ char *srv_addr;
int srv_port;
- if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ switch (ip_mode) {
+ case TEST_MODE_IPV4:
+ sock_family = AF_INET;
+ srv_addr = SERVER_ADDR_IPV4;
+ addr = (struct sockaddr_storage *)&srv_sa4;
+ addr_len = sizeof(srv_sa4);
+ break;
+ case TEST_MODE_IPV6:
+ opts.post_socket_cb = v6only_true;
+ sock_family = AF_INET6;
+ srv_addr = SERVER_ADDR_IPV6;
+ addr = (struct sockaddr_storage *)&srv_sa6;
+ addr_len = sizeof(srv_sa6);
+ break;
+ case TEST_MODE_DUAL:
+ opts.post_socket_cb = v6only_false;
+ sock_family = AF_INET6;
+ srv_addr = SERVER_ADDR_DUAL;
+ addr = (struct sockaddr_storage *)&srv_sa6;
+ addr_len = sizeof(srv_sa6);
+ break;
+ default:
+ PRINT_FAIL("Unknown IP mode %d", ip_mode);
return;
+ }
- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
- if (CHECK_FAIL(listen_fd == -1))
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", tcp_syncookies))
return;
- err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
- if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
- errno))
+ listen_fd = start_server_str(sock_family, SOCK_STREAM, srv_addr, 0,
+ &opts);
+ if (!ASSERT_OK_FD(listen_fd, "start server"))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)addr, &addr_len);
+ if (!ASSERT_OK(err, "getsockname(listen_fd)"))
goto done;
- memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
- srv_port = ntohs(srv_sa6.sin6_port);
- cli_fd = connect_to_fd(listen_fd, 0);
- if (CHECK_FAIL(cli_fd == -1))
+ switch (ip_mode) {
+ case TEST_MODE_IPV4:
+ memcpy(&skel->bss->srv_sa4, &srv_sa4, sizeof(srv_sa4));
+ srv_port = ntohs(srv_sa4.sin_port);
+ break;
+ case TEST_MODE_IPV6:
+ case TEST_MODE_DUAL:
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+ break;
+ default:
goto done;
+ }
- if (CHECK(skel->bss->listen_tp_sport != srv_port ||
- skel->bss->req_sk_sport != srv_port,
- "Unexpected sk src port",
- "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
- skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
- srv_port))
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect client"))
goto done;
- if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
- "Unexpected syncookie states",
- "gen_cookie:%u recv_cookie:%u\n",
- skel->bss->gen_cookie, skel->bss->recv_cookie))
+ srv_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_OK_FD(srv_fd, "accept connection"))
goto done;
- CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
- skel->bss->linum);
+ ASSERT_EQ(skel->bss->listen_tp_sport, srv_port, "listen tp src port");
+
+ if (!gen_cookies) {
+ ASSERT_EQ(skel->bss->req_sk_sport, srv_port,
+ "request socket source port with syncookies disabled");
+ ASSERT_EQ(skel->bss->gen_cookie, 0,
+ "generated syncookie with syncookies disabled");
+ ASSERT_EQ(skel->bss->recv_cookie, 0,
+ "received syncookie with syncookies disabled");
+ } else {
+ ASSERT_EQ(skel->bss->req_sk_sport, 0,
+ "request socket source port with syncookies enabled");
+ ASSERT_NEQ(skel->bss->gen_cookie, 0,
+ "syncookie properly generated");
+ ASSERT_EQ(skel->bss->gen_cookie, skel->bss->recv_cookie,
+ "matching syncookies on client and server");
+ ASSERT_GT(skel->bss->mss, MIN_IPV4_MSS,
+ "MSS in cookie min value");
+ ASSERT_LT(skel->bss->mss, USHRT_MAX,
+ "MSS in cookie max value");
+ }
done:
if (listen_fd != -1)
close(listen_fd);
if (cli_fd != -1)
close(cli_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
}
-static void test_syncookie(void)
+static void test_conn_ipv4(struct test_btf_skc_cls_ingress *skel)
{
- int listen_fd = -1, cli_fd = -1, err;
- socklen_t addrlen = sizeof(srv_sa6);
- int srv_port;
-
- /* Enforce syncookie mode */
- if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
- return;
-
- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
- if (CHECK_FAIL(listen_fd == -1))
- return;
-
- err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
- if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
- errno))
- goto done;
- memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
- srv_port = ntohs(srv_sa6.sin6_port);
-
- cli_fd = connect_to_fd(listen_fd, 0);
- if (CHECK_FAIL(cli_fd == -1))
- goto done;
+ run_test(skel, false, TEST_MODE_IPV4);
+}
- if (CHECK(skel->bss->listen_tp_sport != srv_port,
- "Unexpected tp src port",
- "listen_tp_sport:%u expected:%u\n",
- skel->bss->listen_tp_sport, srv_port))
- goto done;
+static void test_conn_ipv6(struct test_btf_skc_cls_ingress *skel)
+{
+ run_test(skel, false, TEST_MODE_IPV6);
+}
- if (CHECK(skel->bss->req_sk_sport,
- "Unexpected req_sk src port",
- "req_sk_sport:%u expected:0\n",
- skel->bss->req_sk_sport))
- goto done;
+static void test_conn_dual(struct test_btf_skc_cls_ingress *skel)
+{
+ run_test(skel, false, TEST_MODE_DUAL);
+}
- if (CHECK(!skel->bss->gen_cookie ||
- skel->bss->gen_cookie != skel->bss->recv_cookie,
- "Unexpected syncookie states",
- "gen_cookie:%u recv_cookie:%u\n",
- skel->bss->gen_cookie, skel->bss->recv_cookie))
- goto done;
+static void test_syncookie_ipv4(struct test_btf_skc_cls_ingress *skel)
+{
+ run_test(skel, true, TEST_MODE_IPV4);
+}
- CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
- skel->bss->linum);
+static void test_syncookie_ipv6(struct test_btf_skc_cls_ingress *skel)
+{
+ run_test(skel, true, TEST_MODE_IPV6);
+}
-done:
- if (listen_fd != -1)
- close(listen_fd);
- if (cli_fd != -1)
- close(cli_fd);
+static void test_syncookie_dual(struct test_btf_skc_cls_ingress *skel)
+{
+ run_test(skel, true, TEST_MODE_DUAL);
}
struct test {
const char *desc;
- void (*run)(void);
+ void (*run)(struct test_btf_skc_cls_ingress *skel);
};
#define DEF_TEST(name) { #name, test_##name }
static struct test tests[] = {
- DEF_TEST(conn),
- DEF_TEST(syncookie),
+ DEF_TEST(conn_ipv4),
+ DEF_TEST(conn_ipv6),
+ DEF_TEST(conn_dual),
+ DEF_TEST(syncookie_ipv4),
+ DEF_TEST(syncookie_ipv6),
+ DEF_TEST(syncookie_dual),
};
void test_btf_skc_cls_ingress(void)
{
- int i, err;
+ struct test_btf_skc_cls_ingress *skel;
+ struct netns_obj *ns;
+ int i;
skel = test_btf_skc_cls_ingress__open_and_load();
- if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(skel, "test_btf_skc_cls_ingress__open_and_load"))
return;
- err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
- if (CHECK(err, "bpf_program__pin",
- "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
- test_btf_skc_cls_ingress__destroy(skel);
- return;
- }
-
for (i = 0; i < ARRAY_SIZE(tests); i++) {
if (!test__start_subtest(tests[i].desc))
continue;
- if (prepare_netns())
+ ns = prepare_netns(skel);
+ if (!ns)
break;
- tests[i].run();
+ tests[i].run(skel);
- print_err_line();
- reset_test();
+ print_err_line(skel);
+ reset_test(skel);
+ netns_free(ns);
}
- bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
test_btf_skc_cls_ingress__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index ca7c2a91610a..2d47cad50a51 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,11 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
-void test_btf_split() {
- struct btf_dump_opts opts;
+/* Write raw BTF to file, return number of bytes written or negative errno */
+static ssize_t btf_raw_write(struct btf *btf, char *file)
+{
+ ssize_t written = 0;
+ const void *data;
+ __u32 size = 0;
+ int fd, ret;
+
+ fd = mkstemp(file);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return -errno;
+
+ data = btf__raw_data(btf, &size);
+ if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
+ close(fd);
+ return -EINVAL;
+ }
+ while (written < size) {
+ ret = write(fd, data + written, size - written);
+ if (!ASSERT_GE(ret, 0, "write succeeded")) {
+ close(fd);
+ return -errno;
+ }
+ written += ret;
+ }
+ close(fd);
+ return written;
+}
+
+static void __test_btf_split(bool multi)
+{
+ char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
+ char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
+ char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
+ ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
struct btf_dump *d = NULL;
- const struct btf_type *t;
- struct btf *btf1, *btf2;
+ const struct btf_type *t, *ot;
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
+ struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -64,15 +98,46 @@ void test_btf_split() {
ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+ if (multi) {
+ btf3 = btf__new_empty_split(btf2);
+ if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+ goto cleanup;
+ } else {
+ btf3 = btf2;
+ }
+
+ btf__add_union(btf3, "u1", 16); /* [5] union u1 { */
+ btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */
+ btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */
+ /* } */
+
+ if (multi) {
+ t = btf__type_by_id(btf2, 5);
+ ASSERT_NULL(t, "multisplit_type_in_first_split");
+ }
+
+ t = btf__type_by_id(btf3, 5);
+ if (!ASSERT_OK_PTR(t, "split_union_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+ ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+ t = btf__type_by_id(btf3, 1);
+ if (!ASSERT_OK_PTR(t, "split_base_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
/* BTF-to-C dump of split BTF */
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- opts.ctx = dump_buf_file;
- d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf);
+ d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
- for (i = 1; i <= btf__get_nr_types(btf2); i++) {
+ for (i = 1; i < btf__type_cnt(btf3); i++) {
err = btf_dump__dump_type(d, i);
ASSERT_OK(err, "dump_type_ok");
}
@@ -81,14 +146,56 @@ void test_btf_split() {
ASSERT_STREQ(dump_buf,
"struct s1 {\n"
" int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
"struct s2 {\n"
" struct s1 f1;\n"
" int f2;\n"
" int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+" struct s2 f1;\n"
+" int uf2;\n"
"};\n\n", "c_dump");
+ /* write base, split BTFs to files and ensure parsing succeeds */
+ base_btf_sz = btf_raw_write(btf1, base_btf_file);
+ if (base_btf_sz < 0)
+ goto cleanup;
+ split_btf_sz = btf_raw_write(btf2, split_btf_file);
+ if (split_btf_sz < 0)
+ goto cleanup;
+ btf4 = btf__parse(base_btf_file, NULL);
+ if (!ASSERT_OK_PTR(btf4, "parse_base"))
+ goto cleanup;
+ btf5 = btf__parse_split(split_btf_file, btf4);
+ if (!ASSERT_OK_PTR(btf5, "parse_split"))
+ goto cleanup;
+ if (multi) {
+ multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
+ if (multisplit_btf_sz < 0)
+ goto cleanup;
+ btf6 = btf__parse_split(multisplit_btf_file, btf5);
+ if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
+ goto cleanup;
+ } else {
+ btf6 = btf5;
+ }
+
+ if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
+ goto cleanup;
+
+ /* compare parsed to original BTF */
+ for (i = 1; i < btf__type_cnt(btf6); i++) {
+ t = btf__type_by_id(btf6, i);
+ if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
+ goto cleanup;
+ ot = btf__type_by_id(btf3, i);
+ if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
+ goto cleanup;
+ if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
+ goto cleanup;
+ }
+
cleanup:
if (dump_buf_file)
fclose(dump_buf_file);
@@ -96,4 +203,24 @@ cleanup:
btf_dump__free(d);
btf__free(btf1);
btf__free(btf2);
+ if (btf2 != btf3)
+ btf__free(btf3);
+ btf__free(btf4);
+ btf__free(btf5);
+ if (btf5 != btf6)
+ btf__free(btf6);
+ if (base_btf_sz > 0)
+ unlink(base_btf_file);
+ if (split_btf_sz > 0)
+ unlink(split_btf_file);
+ if (multisplit_btf_sz > 0)
+ unlink(multisplit_btf_file);
+}
+
+void test_btf_split(void)
+{
+ if (test__start_subtest("single_split"))
+ __test_btf_split(false);
+ if (test__start_subtest("multi_split"))
+ __test_btf_split(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+ struct stat st;
+ __u64 btf_size, end;
+ void *raw_data = NULL;
+ int fd = -1;
+ long page_size;
+ struct btf *btf = NULL;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (!ASSERT_GE(page_size, 0, "get_page_size"))
+ goto cleanup;
+
+ if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+ goto cleanup;
+
+ btf_size = st.st_size;
+ end = (btf_size + page_size - 1) / page_size * page_size;
+
+ fd = open(path, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_btf"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+ "mprotect_writable"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+ "mprotect_executable"))
+ goto cleanup;
+
+ /* Check padding is zeroed */
+ for (int i = btf_size; i < end; i++) {
+ if (((__u8 *)raw_data)[i] != 0) {
+ PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+ goto cleanup;
+ }
+ }
+
+ btf = btf__new_split(raw_data, btf_size, base);
+ if (!ASSERT_OK_PTR(btf, "parse_btf"))
+ goto cleanup;
+
+cleanup:
+ btf__free(btf);
+ if (raw_data && raw_data != MAP_FAILED)
+ munmap(raw_data, btf_size);
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+ test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
new file mode 100644
index 000000000000..071430cd54de
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "test_btf_decl_tag.skel.h"
+
+/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
+struct btf_type_tag_test {
+ int **p;
+};
+#include "btf_type_tag.skel.h"
+#include "btf_type_tag_user.skel.h"
+#include "btf_type_tag_percpu.skel.h"
+
+static void test_btf_decl_tag(void)
+{
+ struct test_btf_decl_tag *skel;
+
+ skel = test_btf_decl_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
+ return;
+
+ if (skel->rodata->skip_tests) {
+ printf("%s:SKIP: btf_decl_tag attribute not supported", __func__);
+ test__skip();
+ }
+
+ test_btf_decl_tag__destroy(skel);
+}
+
+static void test_btf_type_tag(void)
+{
+ struct btf_type_tag *skel;
+
+ skel = btf_type_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag"))
+ return;
+
+ if (skel->rodata->skip_tests) {
+ printf("%s:SKIP: btf_type_tag attribute not supported", __func__);
+ test__skip();
+ }
+
+ btf_type_tag__destroy(skel);
+}
+
+/* loads vmlinux_btf as well as module_btf. If the caller passes NULL as
+ * module_btf, it will not load module btf.
+ *
+ * Returns 0 on success.
+ * Return -1 On error. In case of error, the loaded btf will be freed and the
+ * input parameters will be set to pointing to NULL.
+ */
+static int load_btfs(struct btf **vmlinux_btf, struct btf **module_btf,
+ bool needs_vmlinux_tag)
+{
+ const char *module_name = "bpf_testmod";
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return -1;
+ }
+
+ *vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(*vmlinux_btf, "could not load vmlinux BTF"))
+ return -1;
+
+ if (!needs_vmlinux_tag)
+ goto load_module_btf;
+
+ /* skip the test if the vmlinux does not have __user tags */
+ type_id = btf__find_by_name_kind(*vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+ test__skip();
+ goto free_vmlinux_btf;
+ }
+
+load_module_btf:
+ /* skip loading module_btf, if not requested by caller */
+ if (!module_btf)
+ return 0;
+
+ *module_btf = btf__load_module_btf(module_name, *vmlinux_btf);
+ if (!ASSERT_OK_PTR(*module_btf, "could not load module BTF"))
+ goto free_vmlinux_btf;
+
+ /* skip the test if the module does not have __user tags */
+ type_id = btf__find_by_name_kind(*module_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
+ test__skip();
+ goto free_module_btf;
+ }
+
+ return 0;
+
+free_module_btf:
+ btf__free(*module_btf);
+free_vmlinux_btf:
+ btf__free(*vmlinux_btf);
+
+ *vmlinux_btf = NULL;
+ if (module_btf)
+ *module_btf = NULL;
+ return -1;
+}
+
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+ struct btf *vmlinux_btf = NULL, *module_btf = NULL;
+ struct btf_type_tag_user *skel;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+ return;
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
+ if (load_test_user1)
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ else
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_user(void)
+{
+ struct btf_type_tag_user *skel;
+ struct btf *vmlinux_btf = NULL;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
+ return;
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+cleanup:
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_mod_percpu(bool load_test_percpu1)
+{
+ struct btf *vmlinux_btf, *module_btf;
+ struct btf_type_tag_percpu *skel;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+ return;
+
+ skel = btf_type_tag_percpu__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+ bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+ if (load_test_percpu1)
+ bpf_program__set_autoload(skel->progs.test_percpu2, false);
+ else
+ bpf_program__set_autoload(skel->progs.test_percpu1, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_percpu");
+
+ btf_type_tag_percpu__destroy(skel);
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_percpu(bool load_test)
+{
+ struct btf_type_tag_percpu *skel;
+ struct btf *vmlinux_btf = NULL;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
+ return;
+
+ skel = btf_type_tag_percpu__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_percpu2, false);
+ bpf_program__set_autoload(skel->progs.test_percpu1, false);
+ if (load_test) {
+ bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_percpu_load");
+ } else {
+ bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_OK(err, "btf_type_tag_percpu_helper");
+ }
+
+ btf_type_tag_percpu__destroy(skel);
+
+cleanup:
+ btf__free(vmlinux_btf);
+}
+
+void test_btf_tag(void)
+{
+ if (test__start_subtest("btf_decl_tag"))
+ test_btf_decl_tag();
+ if (test__start_subtest("btf_type_tag"))
+ test_btf_type_tag();
+
+ if (test__start_subtest("btf_type_tag_user_mod1"))
+ test_btf_type_tag_mod_user(true);
+ if (test__start_subtest("btf_type_tag_user_mod2"))
+ test_btf_type_tag_mod_user(false);
+ if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
+ test_btf_type_tag_vmlinux_user();
+
+ if (test__start_subtest("btf_type_tag_percpu_mod1"))
+ test_btf_type_tag_mod_percpu(true);
+ if (test__start_subtest("btf_type_tag_percpu_mod2"))
+ test_btf_type_tag_mod_percpu(false);
+ if (test__start_subtest("btf_type_tag_percpu_vmlinux_load"))
+ test_btf_type_tag_vmlinux_percpu(true);
+ if (test__start_subtest("btf_type_tag_percpu_vmlinux_helper"))
+ test_btf_type_tag_vmlinux_percpu(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index f36da15b134f..6e36de1302fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,21 +4,16 @@
#include <bpf/btf.h>
#include "btf_helpers.h"
-static int duration = 0;
-
-void test_btf_write() {
+static void gen_btf(struct btf *btf)
+{
const struct btf_var_secinfo *vi;
const struct btf_type *t;
const struct btf_member *m;
+ const struct btf_enum64 *v64;
const struct btf_enum *v;
const struct btf_param *p;
- struct btf *btf;
int id, err, str_off;
- btf = btf__new_empty();
- if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
- return;
-
str_off = btf__find_str(btf, "int");
ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
@@ -177,7 +172,7 @@ void test_btf_write() {
ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
ASSERT_EQ(v->val, 2, "v2_val");
ASSERT_STREQ(btf_type_raw_dump(btf, 9),
- "[9] ENUM 'e1' size=4 vlen=2\n"
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
"\t'v1' val=1\n"
"\t'v2' val=2", "raw_dump");
@@ -208,7 +203,7 @@ void test_btf_write() {
ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
ASSERT_EQ(t->size, 4, "enum_fwd_sz");
ASSERT_STREQ(btf_type_raw_dump(btf, 12),
- "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump");
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump");
/* TYPEDEF */
id = btf__add_typedef(btf, "typedef1", 1);
@@ -283,5 +278,229 @@ void test_btf_write() {
"[17] DATASEC 'datasec1' size=12 vlen=1\n"
"\ttype_id=1 offset=4 size=8", "raw_dump");
+ /* DECL_TAG */
+ id = btf__add_decl_tag(btf, "tag1", 16, -1);
+ ASSERT_EQ(id, 18, "tag_id");
+ t = btf__type_by_id(btf, 18);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 16, "tag_type");
+ ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 18),
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump");
+
+ id = btf__add_decl_tag(btf, "tag2", 14, 1);
+ ASSERT_EQ(id, 19, "tag_id");
+ t = btf__type_by_id(btf, 19);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 14, "tag_type");
+ ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 19),
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump");
+
+ /* TYPE_TAG */
+ id = btf__add_type_tag(btf, "tag1", 1);
+ ASSERT_EQ(id, 20, "tag_id");
+ t = btf__type_by_id(btf, 20);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 1, "tag_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 20),
+ "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump");
+
+ /* ENUM64 */
+ id = btf__add_enum64(btf, "e1", 8, true);
+ ASSERT_EQ(id, 21, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", -1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */
+ ASSERT_OK(err, "v2_res");
+ t = btf__type_by_id(btf, 21);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ v64 = btf_enum64(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name");
+ ASSERT_EQ(v64->val_hi32, 0x1, "v2_val");
+ ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 21),
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345", "raw_dump");
+
+ id = btf__add_enum64(btf, "e1", 8, false);
+ ASSERT_EQ(id, 22, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */
+ ASSERT_OK(err, "v1_res");
+ t = btf__type_by_id(btf, 22);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 22),
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615", "raw_dump");
+}
+
+static void test_btf_add()
+{
+ struct btf *btf;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
+ return;
+
+ gen_btf(btf);
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] CONST '(anon)' type_id=5",
+ "[4] VOLATILE '(anon)' type_id=3",
+ "[5] RESTRICT '(anon)' type_id=4",
+ "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+ "[7] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+ "[8] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[10] FWD 'struct_fwd' fwd_kind=struct",
+ "[11] FWD 'union_fwd' fwd_kind=union",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[13] TYPEDEF 'typedef1' type_id=1",
+ "[14] FUNC 'func1' type_id=15 linkage=global",
+ "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=2",
+ "[16] VAR 'var1' type_id=1, linkage=global-alloc",
+ "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=1 offset=4 size=8",
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
+
btf__free(btf);
}
+
+static void test_btf_add_btf()
+{
+ struct btf *btf1 = NULL, *btf2 = NULL;
+ int id;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "btf1"))
+ return;
+
+ btf2 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf2, "btf2"))
+ goto cleanup;
+
+ gen_btf(btf1);
+ gen_btf(btf2);
+
+ id = btf__add_btf(btf1, btf2);
+ if (!ASSERT_EQ(id, 23, "id"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] CONST '(anon)' type_id=5",
+ "[4] VOLATILE '(anon)' type_id=3",
+ "[5] RESTRICT '(anon)' type_id=4",
+ "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+ "[7] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+ "[8] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[10] FWD 'struct_fwd' fwd_kind=struct",
+ "[11] FWD 'union_fwd' fwd_kind=union",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[13] TYPEDEF 'typedef1' type_id=1",
+ "[14] FUNC 'func1' type_id=15 linkage=global",
+ "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=2",
+ "[16] VAR 'var1' type_id=1, linkage=global-alloc",
+ "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=1 offset=4 size=8",
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615",
+
+ /* types appended from the second BTF */
+ "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[24] PTR '(anon)' type_id=23",
+ "[25] CONST '(anon)' type_id=27",
+ "[26] VOLATILE '(anon)' type_id=25",
+ "[27] RESTRICT '(anon)' type_id=26",
+ "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10",
+ "[29] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=23 bits_offset=0\n"
+ "\t'f2' type_id=23 bits_offset=32 bitfield_size=16",
+ "[30] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=23 bits_offset=0 bitfield_size=16",
+ "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[32] FWD 'struct_fwd' fwd_kind=struct",
+ "[33] FWD 'union_fwd' fwd_kind=union",
+ "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[35] TYPEDEF 'typedef1' type_id=23",
+ "[36] FUNC 'func1' type_id=37 linkage=global",
+ "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n"
+ "\t'p1' type_id=23\n"
+ "\t'p2' type_id=24",
+ "[38] VAR 'var1' type_id=23, linkage=global-alloc",
+ "[39] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=23 offset=4 size=8",
+ "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1",
+ "[41] DECL_TAG 'tag2' type_id=36 component_idx=1",
+ "[42] TYPE_TAG 'tag1' type_id=23",
+ "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
+
+cleanup:
+ btf__free(btf1);
+ btf__free(btf2);
+}
+
+void test_btf_write()
+{
+ if (test__start_subtest("btf_add"))
+ test_btf_add();
+ if (test__start_subtest("btf_add_btf"))
+ test_btf_add_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/build_id.c b/tools/testing/selftests/bpf/prog_tests/build_id.c
new file mode 100644
index 000000000000..aec9c8d6bc96
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/build_id.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "test_build_id.skel.h"
+
+static char build_id[BPF_BUILD_ID_SIZE];
+static int build_id_sz;
+
+static void print_stack(struct bpf_stack_build_id *stack, int frame_cnt)
+{
+ int i, j;
+
+ for (i = 0; i < frame_cnt; i++) {
+ printf("FRAME #%02d: ", i);
+ switch (stack[i].status) {
+ case BPF_STACK_BUILD_ID_EMPTY:
+ printf("<EMPTY>\n");
+ break;
+ case BPF_STACK_BUILD_ID_VALID:
+ printf("BUILD ID = ");
+ for (j = 0; j < BPF_BUILD_ID_SIZE; j++)
+ printf("%02hhx", (unsigned)stack[i].build_id[j]);
+ printf(" OFFSET = %llx", (unsigned long long)stack[i].offset);
+ break;
+ case BPF_STACK_BUILD_ID_IP:
+ printf("IP = %llx", (unsigned long long)stack[i].ip);
+ break;
+ default:
+ printf("UNEXPECTED STATUS %d ", stack[i].status);
+ break;
+ }
+ printf("\n");
+ }
+}
+
+static void subtest_nofault(bool build_id_resident)
+{
+ struct test_build_id *skel;
+ struct bpf_stack_build_id *stack;
+ int frame_cnt;
+
+ skel = test_build_id__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->links.uprobe_nofault = bpf_program__attach(skel->progs.uprobe_nofault);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_nofault, "link"))
+ goto cleanup;
+
+ if (build_id_resident)
+ ASSERT_OK(system("./uprobe_multi uprobe-paged-in"), "trigger_uprobe");
+ else
+ ASSERT_OK(system("./uprobe_multi uprobe-paged-out"), "trigger_uprobe");
+
+ if (!ASSERT_GT(skel->bss->res_nofault, 0, "res"))
+ goto cleanup;
+
+ stack = skel->bss->stack_nofault;
+ frame_cnt = skel->bss->res_nofault / sizeof(struct bpf_stack_build_id);
+ if (env.verbosity >= VERBOSE_NORMAL)
+ print_stack(stack, frame_cnt);
+
+ if (build_id_resident) {
+ ASSERT_EQ(stack[0].status, BPF_STACK_BUILD_ID_VALID, "build_id_status");
+ ASSERT_EQ(memcmp(stack[0].build_id, build_id, build_id_sz), 0, "build_id_match");
+ } else {
+ ASSERT_EQ(stack[0].status, BPF_STACK_BUILD_ID_IP, "build_id_status");
+ }
+
+cleanup:
+ test_build_id__destroy(skel);
+}
+
+static void subtest_sleepable(void)
+{
+ struct test_build_id *skel;
+ struct bpf_stack_build_id *stack;
+ int frame_cnt;
+
+ skel = test_build_id__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->links.uprobe_sleepable = bpf_program__attach(skel->progs.uprobe_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_sleepable, "link"))
+ goto cleanup;
+
+ /* force build ID to not be paged in */
+ ASSERT_OK(system("./uprobe_multi uprobe-paged-out"), "trigger_uprobe");
+
+ if (!ASSERT_GT(skel->bss->res_sleepable, 0, "res"))
+ goto cleanup;
+
+ stack = skel->bss->stack_sleepable;
+ frame_cnt = skel->bss->res_sleepable / sizeof(struct bpf_stack_build_id);
+ if (env.verbosity >= VERBOSE_NORMAL)
+ print_stack(stack, frame_cnt);
+
+ ASSERT_EQ(stack[0].status, BPF_STACK_BUILD_ID_VALID, "build_id_status");
+ ASSERT_EQ(memcmp(stack[0].build_id, build_id, build_id_sz), 0, "build_id_match");
+
+cleanup:
+ test_build_id__destroy(skel);
+}
+
+void serial_test_build_id(void)
+{
+ build_id_sz = read_build_id("uprobe_multi", build_id, sizeof(build_id));
+ ASSERT_EQ(build_id_sz, BPF_BUILD_ID_SIZE, "parse_build_id");
+
+ if (test__start_subtest("nofault-paged-out"))
+ subtest_nofault(false /* not resident */);
+ if (test__start_subtest("nofault-paged-in"))
+ subtest_nofault(true /* resident */);
+ if (test__start_subtest("sleepable"))
+ subtest_sleepable();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
new file mode 100644
index 000000000000..c40df623a8f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "cb_refs.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+ const char *prog_name;
+ const char *err_msg;
+} cb_refs_tests[] = {
+ { "underflow_prog", "must point to scalar, or struct with scalar" },
+ { "leak_prog", "Possibly NULL pointer passed to helper arg2" },
+ { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */
+ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */
+};
+
+void test_cb_refs(void)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct bpf_program *prog;
+ struct cb_refs *skel;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cb_refs_tests); i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ skel = cb_refs__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "cb_refs__open_and_load"))
+ return;
+ prog = bpf_object__find_program_by_name(skel->obj, cb_refs_tests[i].prog_name);
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_ERR(cb_refs__load(skel), "cb_refs__load"))
+ bpf_prog_test_run_opts(bpf_program__fd(prog), &run_opts);
+ if (!ASSERT_OK_PTR(strstr(log_buf, cb_refs_tests[i].err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", cb_refs_tests[i].err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+ cb_refs__destroy(skel);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 643dfa35419c..10224f845568 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -56,8 +56,9 @@ static bool assert_storage_noexist(struct bpf_map *map, const void *key)
static bool connect_send(const char *cgroup_path)
{
- bool res = true;
int server_fd = -1, client_fd = -1;
+ char message[] = "message";
+ bool res = true;
if (join_cgroup(cgroup_path))
goto out_clean;
@@ -70,7 +71,10 @@ static bool connect_send(const char *cgroup_path)
if (client_fd < 0)
goto out_clean;
- if (send(client_fd, "message", strlen("message"), 0) < 0)
+ if (send(client_fd, &message, sizeof(message), 0) < 0)
+ goto out_clean;
+
+ if (read(server_fd, &message, sizeof(message)) < 0)
goto out_clean;
res = false;
@@ -102,8 +106,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_link = bpf_program__attach_cgroup(obj->progs.egress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
- "err %ld", PTR_ERR(parent_link)))
+ if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +129,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_link = bpf_program__attach_cgroup(obj->progs.egress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_link), "child-cg-attach",
- "err %ld", PTR_ERR(child_link)))
+ if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +149,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_link))
- bpf_link__destroy(parent_link);
- if (!IS_ERR(child_link))
- bpf_link__destroy(child_link);
+ bpf_link__destroy(parent_link);
+ bpf_link__destroy(child_link);
cg_storage_multi_egress_only__destroy(obj);
}
@@ -176,18 +176,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -217,22 +214,19 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
/* Attach to parent and child cgroup, trigger packet from child.
* Assert that there is six additional runs, parent cgroup egresses and
* ingress, child cgroup egresses and ingress.
- * Assert that egree and ingress storages are separate.
+ * Assert that egress and ingress storages are separate.
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +258,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_isolated__destroy(obj);
}
@@ -301,18 +289,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +323,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,23 +357,17 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_shared__destroy(obj);
}
-void test_cg_storage_multi(void)
+void serial_test_cg_storage_multi(void)
{
int parent_cgroup_fd = -1, child_cgroup_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..25332e596750
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+ goto cleanup;
+
+ err = bpf_link__destroy(fentry_link);
+ ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+ struct test_cgroup1_hierarchy *skel;
+ __u64 current_cgid;
+ int hid, err;
+
+ skel = test_cgroup1_hierarchy__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+ if (!ASSERT_OK(err, "fentry_set_target"))
+ goto destroy;
+
+ err = test_cgroup1_hierarchy__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto destroy;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto destroy;
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ goto cleanup_cgroup;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ current_cgid = get_classid_cgroup_id();
+ if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+ goto cleanup;
+
+ hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+ goto cleanup;
+ skel->bss->target_hid = hid;
+
+ if (test__start_subtest("test_cgroup1_hierarchy")) {
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_root_cgid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_invalid_level")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgid")) {
+ skel->bss->target_ancestor_cgid = 0;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_hid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ skel->bss->target_hid = -1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name2")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_sleepable_prog")) {
+ skel->bss->target_hid = hid;
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_sleepable(skel);
+ }
+
+cleanup:
+ cleanup_classid_environment();
+cleanup_cgroup:
+ cleanup_cgroup_environment();
+destroy:
+ test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c
new file mode 100644
index 000000000000..3f9ffdf71343
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "cgroup_ancestor.skel.h"
+
+#define CGROUP_PATH "/skb_cgroup_test"
+#define TEST_NS "cgroup_ancestor_ns"
+#define NUM_CGROUP_LEVELS 4
+#define WAIT_AUTO_IP_MAX_ATTEMPT 10
+#define DST_ADDR "::1"
+#define DST_PORT 1234
+#define MAX_ASSERT_NAME 32
+
+struct test_data {
+ struct cgroup_ancestor *skel;
+ struct bpf_tc_hook qdisc;
+ struct bpf_tc_opts tc_attach;
+ struct nstoken *ns;
+};
+
+static int send_datagram(void)
+{
+ unsigned char buf[] = "some random test data";
+ struct sockaddr_in6 addr = { .sin6_family = AF_INET6,
+ .sin6_port = htons(DST_PORT), };
+ int sock, n;
+
+ if (!ASSERT_EQ(inet_pton(AF_INET6, DST_ADDR, &addr.sin6_addr), 1,
+ "inet_pton"))
+ return -1;
+
+ sock = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_OK_FD(sock, "create socket"))
+ return sock;
+
+ if (!ASSERT_OK(connect(sock, (struct sockaddr *)&addr, sizeof(addr)), "connect")) {
+ close(sock);
+ return -1;
+ }
+
+ n = sendto(sock, buf, sizeof(buf), 0, (const struct sockaddr *)&addr,
+ sizeof(addr));
+ close(sock);
+ return ASSERT_EQ(n, sizeof(buf), "send data") ? 0 : -1;
+}
+
+static int setup_network(struct test_data *t)
+{
+ SYS(fail, "ip netns add %s", TEST_NS);
+ t->ns = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(t->ns, "open netns"))
+ goto cleanup_ns;
+
+ SYS(close_ns, "ip link set lo up");
+
+ memset(&t->qdisc, 0, sizeof(t->qdisc));
+ t->qdisc.sz = sizeof(t->qdisc);
+ t->qdisc.attach_point = BPF_TC_EGRESS;
+ t->qdisc.ifindex = if_nametoindex("lo");
+ if (!ASSERT_NEQ(t->qdisc.ifindex, 0, "if_nametoindex"))
+ goto close_ns;
+ if (!ASSERT_OK(bpf_tc_hook_create(&t->qdisc), "qdisc add"))
+ goto close_ns;
+
+ memset(&t->tc_attach, 0, sizeof(t->tc_attach));
+ t->tc_attach.sz = sizeof(t->tc_attach);
+ t->tc_attach.prog_fd = bpf_program__fd(t->skel->progs.log_cgroup_id);
+ if (!ASSERT_OK(bpf_tc_attach(&t->qdisc, &t->tc_attach), "filter add"))
+ goto cleanup_qdisc;
+
+ return 0;
+
+cleanup_qdisc:
+ bpf_tc_hook_destroy(&t->qdisc);
+close_ns:
+ close_netns(t->ns);
+cleanup_ns:
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
+fail:
+ return 1;
+}
+
+static void cleanup_network(struct test_data *t)
+{
+ bpf_tc_detach(&t->qdisc, &t->tc_attach);
+ bpf_tc_hook_destroy(&t->qdisc);
+ close_netns(t->ns);
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
+}
+
+static void check_ancestors_ids(struct test_data *t)
+{
+ __u64 expected_ids[NUM_CGROUP_LEVELS];
+ char assert_name[MAX_ASSERT_NAME];
+ __u32 level;
+
+ expected_ids[0] = get_cgroup_id("/.."); /* root cgroup */
+ expected_ids[1] = get_cgroup_id("");
+ expected_ids[2] = get_cgroup_id(CGROUP_PATH);
+ expected_ids[3] = 0; /* non-existent cgroup */
+
+ for (level = 0; level < NUM_CGROUP_LEVELS; level++) {
+ snprintf(assert_name, MAX_ASSERT_NAME,
+ "ancestor id at level %d", level);
+ ASSERT_EQ(t->skel->bss->cgroup_ids[level], expected_ids[level],
+ assert_name);
+ }
+}
+
+void test_cgroup_ancestor(void)
+{
+ struct test_data t;
+ int cgroup_fd;
+
+ t.skel = cgroup_ancestor__open_and_load();
+ if (!ASSERT_OK_PTR(t.skel, "open and load"))
+ return;
+
+ t.skel->bss->dport = htons(DST_PORT);
+ cgroup_fd = cgroup_setup_and_join(CGROUP_PATH);
+ if (cgroup_fd < 0)
+ goto cleanup_progs;
+
+ if (setup_network(&t))
+ goto cleanup_cgroups;
+
+ if (send_datagram())
+ goto cleanup_network;
+
+ check_ancestors_ids(&t);
+
+cleanup_network:
+ cleanup_network(&t);
+cleanup_cgroups:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+cleanup_progs:
+ cgroup_ancestor__destroy(t.skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 70e94e783070..9367bd2f0ae1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -14,14 +14,14 @@ static int prog_load(void)
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
-void test_cgroup_attach_autodetach(void)
+void serial_test_cgroup_attach_autodetach(void)
{
__u32 duration = 0, prog_cnt = 4, attach_flags;
int allow_prog[2] = {-1};
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 0a1fc9816cef..db0b7bac78d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val)
int cgroup_storage_fd, percpu_cgroup_storage_fd;
if (map_fd < 0)
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- percpu_cgroup_storage_fd = bpf_create_map(
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ percpu_cgroup_storage_fd = bpf_map_create(
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (percpu_cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
@@ -63,10 +63,10 @@ static int prog_load_cnt(int verdict, int val)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
int ret;
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
@@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val)
return ret;
}
-void test_cgroup_attach_multi(void)
+void serial_test_cgroup_attach_multi(void)
{
__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt) != -1);
+ prog_ids, &prog_cnt) >= 0);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
@@ -194,14 +194,14 @@ void test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_override", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_REPLACE;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_multi", "unexpected success\n"))
goto err;
@@ -209,7 +209,7 @@ void test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
attach_opts.replace_prog_fd = -1;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_bad_fd", "unexpected success\n"))
goto err;
@@ -217,7 +217,7 @@ void test_cgroup_attach_multi(void)
/* replacing a program that is not attached to cgroup should fail */
attach_opts.replace_prog_fd = allow_prog[3];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_ent", "unexpected success\n"))
goto err;
@@ -225,14 +225,14 @@ void test_cgroup_attach_multi(void)
/* replace 1st from the top program */
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
/* replace program with itself */
attach_opts.replace_prog_fd = allow_prog[6];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9e96f8d87fea..9421a5b7f4e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -16,14 +16,14 @@ static int prog_load(int verdict)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
-void test_cgroup_attach_override(void)
+void serial_test_cgroup_attach_override(void)
{
int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
__u32 duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_dev.c b/tools/testing/selftests/bpf/prog_tests/cgroup_dev.c
new file mode 100644
index 000000000000..5ab7547e38c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_dev.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "dev_cgroup.skel.h"
+
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
+#define TEST_BUFFER_SIZE 64
+
+static void test_mknod(const char *path, mode_t mode, int dev_major,
+ int dev_minor, int expected_ret, int expected_errno)
+{
+ int ret;
+
+ unlink(path);
+ ret = mknod(path, mode, makedev(dev_major, dev_minor));
+ ASSERT_EQ(ret, expected_ret, "mknod");
+ if (expected_ret)
+ ASSERT_EQ(errno, expected_errno, "mknod errno");
+ else
+ unlink(path);
+}
+
+static void test_read(const char *path, char *buf, int buf_size,
+ int expected_ret, int expected_errno)
+{
+ int ret, fd;
+
+ fd = open(path, O_RDONLY);
+
+ /* A bare open on unauthorized device should fail */
+ if (expected_ret < 0) {
+ ASSERT_EQ(fd, expected_ret, "open ret for read");
+ ASSERT_EQ(errno, expected_errno, "open errno for read");
+ if (fd >= 0)
+ close(fd);
+ return;
+ }
+
+ if (!ASSERT_OK_FD(fd, "open ret for read"))
+ return;
+
+ ret = read(fd, buf, buf_size);
+ ASSERT_EQ(ret, expected_ret, "read");
+
+ close(fd);
+}
+
+static void test_write(const char *path, char *buf, int buf_size,
+ int expected_ret, int expected_errno)
+{
+ int ret, fd;
+
+ fd = open(path, O_WRONLY);
+
+ /* A bare open on unauthorized device should fail */
+ if (expected_ret < 0) {
+ ASSERT_EQ(fd, expected_ret, "open ret for write");
+ ASSERT_EQ(errno, expected_errno, "open errno for write");
+ if (fd >= 0)
+ close(fd);
+ return;
+ }
+
+ if (!ASSERT_OK_FD(fd, "open ret for write"))
+ return;
+
+ ret = write(fd, buf, buf_size);
+ ASSERT_EQ(ret, expected_ret, "write");
+
+ close(fd);
+}
+
+void test_cgroup_dev(void)
+{
+ char buf[TEST_BUFFER_SIZE] = "some random test data";
+ struct dev_cgroup *skel;
+ int cgroup_fd;
+
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (!ASSERT_OK_FD(cgroup_fd, "cgroup switch"))
+ return;
+
+ skel = dev_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "load program"))
+ goto cleanup_cgroup;
+
+ skel->links.bpf_prog1 =
+ bpf_program__attach_cgroup(skel->progs.bpf_prog1, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_prog1, "attach_program"))
+ goto cleanup_progs;
+
+ if (test__start_subtest("allow-mknod"))
+ test_mknod("/dev/test_dev_cgroup_null", S_IFCHR, 1, 3, 0, 0);
+
+ if (test__start_subtest("allow-read"))
+ test_read("/dev/urandom", buf, TEST_BUFFER_SIZE,
+ TEST_BUFFER_SIZE, 0);
+
+ if (test__start_subtest("allow-write"))
+ test_write("/dev/null", buf, TEST_BUFFER_SIZE,
+ TEST_BUFFER_SIZE, 0);
+
+ if (test__start_subtest("deny-mknod"))
+ test_mknod("/dev/test_dev_cgroup_zero", S_IFCHR, 1, 5, -1,
+ EPERM);
+
+ if (test__start_subtest("deny-read"))
+ test_read("/dev/random", buf, TEST_BUFFER_SIZE, -1, EPERM);
+
+ if (test__start_subtest("deny-write"))
+ test_write("/dev/zero", buf, TEST_BUFFER_SIZE, -1, EPERM);
+
+ if (test__start_subtest("deny-mknod-wrong-type"))
+ test_mknod("/dev/test_dev_cgroup_block", S_IFBLK, 1, 3, -1,
+ EPERM);
+
+cleanup_progs:
+ dev_cgroup__destroy(skel);
+cleanup_cgroup:
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_get_current_cgroup_id.c b/tools/testing/selftests/bpf/prog_tests/cgroup_get_current_cgroup_id.c
new file mode 100644
index 000000000000..7a1643b03bf3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_get_current_cgroup_id.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "get_cgroup_id_kern.skel.h"
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+void test_cgroup_get_current_cgroup_id(void)
+{
+ struct get_cgroup_id_kern *skel;
+ const struct timespec req = {
+ .tv_sec = 0,
+ .tv_nsec = 1,
+ };
+ int cgroup_fd;
+ __u64 ucgid;
+
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (!ASSERT_OK_FD(cgroup_fd, "cgroup switch"))
+ return;
+
+ skel = get_cgroup_id_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "load program"))
+ goto cleanup_cgroup;
+
+ if (!ASSERT_OK(get_cgroup_id_kern__attach(skel), "attach bpf program"))
+ goto cleanup_progs;
+
+ skel->bss->expected_pid = getpid();
+ /* trigger the syscall on which is attached the tested prog */
+ if (!ASSERT_OK(syscall(__NR_nanosleep, &req, NULL), "nanosleep"))
+ goto cleanup_progs;
+
+ ucgid = get_cgroup_id(TEST_CGROUP);
+
+ ASSERT_EQ(skel->bss->cg_id, ucgid, "compare cgroup ids");
+
+cleanup_progs:
+ get_cgroup_id_kern__destroy(skel);
+cleanup_cgroup:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644
index 000000000000..2bb5773d6f99
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+#include "cgroup_getset_retval_hooks.skel.h"
+
+#define SOL_CUSTOM 0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, assert that
+ * we actually get that error when we run setsockopt()
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, and one that gets the
+ * previously set errno. Assert that we get the same errno back.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that gets the previously set errno.
+ * Assert that, without anything setting one, we get 0.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that gets the previously set errno, and then
+ * one that sets the errno to EUNATCH. Assert that the get does not
+ * see EUNATCH set later, and does not prevent EUNATCH from being set.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+ * and then one that gets the exported errno. Assert both the syscall
+ * and the helper sees the last set errno.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that return a reject without setting errno
+ * (legacy reject), and one that gets the errno. Assert that for
+ * backward compatibility the syscall result in EPERM, and this
+ * is also visible to the helper.
+ */
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, then one that return a reject
+ * without setting errno, and then one that gets the exported errno.
+ * Assert both the syscall and the helper's errno are unaffected by
+ * the second prog (i.e. legacy rejects does not override the errno
+ * to EPERM).
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that gets previously set errno. Assert that the
+ * error from kernel is in both ctx_retval_value and retval_value.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+ * overrides the value from kernel.
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+ * ctx retval. Assert that the clearing ctx retval is synced to helper
+ * and clears any errors both from kernel and BPF..
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_clear_retval);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+struct exposed_hook {
+ const char *name;
+ int expected_err;
+} exposed_hooks[] = {
+
+#define BPF_RETVAL_HOOK(NAME, SECTION, CTX, EXPECTED_ERR) \
+ { \
+ .name = #NAME, \
+ .expected_err = EXPECTED_ERR, \
+ },
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
+};
+
+static void test_exposed_hooks(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_hooks *skel;
+ struct bpf_program *prog;
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(exposed_hooks); i++) {
+ skel = cgroup_getset_retval_hooks__open();
+ if (!ASSERT_OK_PTR(skel, "cgroup_getset_retval_hooks__open"))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, exposed_hooks[i].name);
+ if (!ASSERT_NEQ(prog, NULL, "bpf_object__find_program_by_name"))
+ goto close_skel;
+
+ err = bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(err, "bpf_program__set_autoload"))
+ goto close_skel;
+
+ err = cgroup_getset_retval_hooks__load(skel);
+ ASSERT_EQ(err, exposed_hooks[i].expected_err, "expected_err");
+
+close_skel:
+ cgroup_getset_retval_hooks__destroy(skel);
+ }
+}
+
+void test_cgroup_getset_retval(void)
+{
+ int cgroup_fd = -1;
+ int sock_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto close_fd;
+
+ sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(sock_fd, 0, "start-server"))
+ goto close_fd;
+
+ if (test__start_subtest("setsockopt-set"))
+ test_setsockopt_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-set_and_get"))
+ test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero"))
+ test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero_and_set"))
+ test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-override"))
+ test_setsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_eperm"))
+ test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_no_override"))
+ test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-get"))
+ test_getsockopt_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-override"))
+ test_getsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-retval_sync"))
+ test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("exposed_hooks"))
+ test_exposed_hooks(cgroup_fd, sock_fd);
+
+close_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..3bd27d2ea668
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test makes sure BPF stats collection using rstat works correctly.
+ * The test uses 3 BPF progs:
+ * (a) counter: This BPF prog is invoked every time we attach a process to a
+ * cgroup and locklessly increments a percpu counter.
+ * The program then calls cgroup_rstat_updated() to inform rstat
+ * of an update on the (cpu, cgroup) pair.
+ *
+ * (b) flusher: This BPF prog is invoked when an rstat flush is ongoing, it
+ * aggregates all percpu counters to a total counter, and also
+ * propagates the changes to the ancestor cgroups.
+ *
+ * (c) dumper: This BPF prog is a cgroup_iter. It is used to output the total
+ * counter of a cgroup through reading a file in userspace.
+ *
+ * The test sets up a cgroup hierarchy, and the above programs. It spawns a few
+ * processes in the leaf cgroups and makes sure all the counters are aggregated
+ * correctly.
+ *
+ * Copyright 2022 Google LLC.
+ */
+#include <asm-generic/errno.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "cgroup_hierarchical_stats.skel.h"
+
+#define PAGE_SIZE 4096
+#define MB(x) (x << 20)
+
+#define PROCESSES_PER_CGROUP 3
+
+#define BPFFS_ROOT "/sys/fs/bpf/"
+#define BPFFS_ATTACH_COUNTERS BPFFS_ROOT "attach_counters/"
+
+#define CG_ROOT_NAME "root"
+#define CG_ROOT_ID 1
+
+#define CGROUP_PATH(p, n) {.path = p"/"n, .name = n}
+
+static struct {
+ const char *path, *name;
+ unsigned long long id;
+ int fd;
+} cgroups[] = {
+ CGROUP_PATH("/", "test"),
+ CGROUP_PATH("/test", "child1"),
+ CGROUP_PATH("/test", "child2"),
+ CGROUP_PATH("/test/child1", "child1_1"),
+ CGROUP_PATH("/test/child1", "child1_2"),
+ CGROUP_PATH("/test/child2", "child2_1"),
+ CGROUP_PATH("/test/child2", "child2_2"),
+};
+
+#define N_CGROUPS ARRAY_SIZE(cgroups)
+#define N_NON_LEAF_CGROUPS 3
+
+static int root_cgroup_fd;
+static bool mounted_bpffs;
+
+/* reads file at 'path' to 'buf', returns 0 on success. */
+static int read_from_file(const char *path, char *buf, size_t size)
+{
+ int fd, len;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ len = read(fd, buf, size);
+ close(fd);
+ if (len < 0)
+ return len;
+
+ buf[len] = 0;
+ return 0;
+}
+
+/* mounts bpffs and mkdir for reading stats, returns 0 on success. */
+static int setup_bpffs(void)
+{
+ int err;
+
+ /* Mount bpffs */
+ err = mount("bpf", BPFFS_ROOT, "bpf", 0, NULL);
+ mounted_bpffs = !err;
+ if (ASSERT_FALSE(err && errno != EBUSY, "mount"))
+ return err;
+
+ /* Create a directory to contain stat files in bpffs */
+ err = mkdir(BPFFS_ATTACH_COUNTERS, 0755);
+ if (!ASSERT_OK(err, "mkdir"))
+ return err;
+
+ return 0;
+}
+
+static void cleanup_bpffs(void)
+{
+ /* Remove created directory in bpffs */
+ ASSERT_OK(rmdir(BPFFS_ATTACH_COUNTERS), "rmdir "BPFFS_ATTACH_COUNTERS);
+
+ /* Unmount bpffs, if it wasn't already mounted when we started */
+ if (mounted_bpffs)
+ return;
+
+ ASSERT_OK(umount(BPFFS_ROOT), "unmount bpffs");
+}
+
+/* sets up cgroups, returns 0 on success. */
+static int setup_cgroups(void)
+{
+ int i, fd, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ return err;
+
+ root_cgroup_fd = get_root_cgroup();
+ if (!ASSERT_GE(root_cgroup_fd, 0, "get_root_cgroup"))
+ return root_cgroup_fd;
+
+ for (i = 0; i < N_CGROUPS; i++) {
+ fd = create_and_get_cgroup(cgroups[i].path);
+ if (!ASSERT_GE(fd, 0, "create_and_get_cgroup"))
+ return fd;
+
+ cgroups[i].fd = fd;
+ cgroups[i].id = get_cgroup_id(cgroups[i].path);
+ }
+ return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+ close(root_cgroup_fd);
+ for (int i = 0; i < N_CGROUPS; i++)
+ close(cgroups[i].fd);
+ cleanup_cgroup_environment();
+}
+
+/* Sets up cgroup hiearchary, returns 0 on success. */
+static int setup_hierarchy(void)
+{
+ return setup_bpffs() || setup_cgroups();
+}
+
+static void destroy_hierarchy(void)
+{
+ cleanup_cgroups();
+ cleanup_bpffs();
+}
+
+static int attach_processes(void)
+{
+ int i, j, status;
+
+ /* In every leaf cgroup, attach 3 processes */
+ for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) {
+ for (j = 0; j < PROCESSES_PER_CGROUP; j++) {
+ pid_t pid;
+
+ /* Create child and attach to cgroup */
+ pid = fork();
+ if (pid == 0) {
+ if (join_parent_cgroup(cgroups[i].path))
+ exit(EACCES);
+ exit(0);
+ }
+
+ /* Cleanup child */
+ waitpid(pid, &status, 0);
+ if (!ASSERT_TRUE(WIFEXITED(status), "child process exited"))
+ return 1;
+ if (!ASSERT_EQ(WEXITSTATUS(status), 0,
+ "child process exit code"))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static unsigned long long
+get_attach_counter(unsigned long long cgroup_id, const char *file_name)
+{
+ unsigned long long attach_counter = 0, id = 0;
+ static char buf[128], path[128];
+
+ /* For every cgroup, read the file generated by cgroup_iter */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
+ if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter"))
+ return 0;
+
+ /* Check the output file formatting */
+ ASSERT_EQ(sscanf(buf, "cg_id: %llu, attach_counter: %llu\n",
+ &id, &attach_counter), 2, "output format");
+
+ /* Check that the cgroup_id is displayed correctly */
+ ASSERT_EQ(id, cgroup_id, "cgroup_id");
+ /* Check that the counter is non-zero */
+ ASSERT_GT(attach_counter, 0, "attach counter non-zero");
+ return attach_counter;
+}
+
+static void check_attach_counters(void)
+{
+ unsigned long long attach_counters[N_CGROUPS], root_attach_counter;
+ int i;
+
+ for (i = 0; i < N_CGROUPS; i++)
+ attach_counters[i] = get_attach_counter(cgroups[i].id,
+ cgroups[i].name);
+
+ /* Read stats for root too */
+ root_attach_counter = get_attach_counter(CG_ROOT_ID, CG_ROOT_NAME);
+
+ /* Check that all leafs cgroups have an attach counter of 3 */
+ for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++)
+ ASSERT_EQ(attach_counters[i], PROCESSES_PER_CGROUP,
+ "leaf cgroup attach counter");
+
+ /* Check that child1 == child1_1 + child1_2 */
+ ASSERT_EQ(attach_counters[1], attach_counters[3] + attach_counters[4],
+ "child1_counter");
+ /* Check that child2 == child2_1 + child2_2 */
+ ASSERT_EQ(attach_counters[2], attach_counters[5] + attach_counters[6],
+ "child2_counter");
+ /* Check that test == child1 + child2 */
+ ASSERT_EQ(attach_counters[0], attach_counters[1] + attach_counters[2],
+ "test_counter");
+ /* Check that root >= test */
+ ASSERT_GE(root_attach_counter, attach_counters[1], "root_counter");
+}
+
+/* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure.
+ */
+static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj,
+ int cgroup_fd, const char *file_name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo = {};
+ struct bpf_link *link;
+ static char path[128];
+ int err;
+
+ /*
+ * Create an iter link, parameterized by cgroup_fd. We only want to
+ * traverse one cgroup, so set the traversal order to "self".
+ */
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(obj->progs.dumper, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return -EFAULT;
+
+ /* Pin the link to a bpffs file */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
+ err = bpf_link__pin(link, path);
+ ASSERT_OK(err, "pin cgroup_iter");
+
+ /* Remove the link, leaving only the ref held by the pinned file */
+ bpf_link__destroy(link);
+ return err;
+}
+
+/* Sets up programs for collecting stats, returns 0 on success. */
+static int setup_progs(struct cgroup_hierarchical_stats **skel)
+{
+ int i, err;
+
+ *skel = cgroup_hierarchical_stats__open_and_load();
+ if (!ASSERT_OK_PTR(*skel, "open_and_load"))
+ return 1;
+
+ /* Attach cgroup_iter program that will dump the stats to cgroups */
+ for (i = 0; i < N_CGROUPS; i++) {
+ err = setup_cgroup_iter(*skel, cgroups[i].fd, cgroups[i].name);
+ if (!ASSERT_OK(err, "setup_cgroup_iter"))
+ return err;
+ }
+
+ /* Also dump stats for root */
+ err = setup_cgroup_iter(*skel, root_cgroup_fd, CG_ROOT_NAME);
+ if (!ASSERT_OK(err, "setup_cgroup_iter"))
+ return err;
+
+ bpf_program__set_autoattach((*skel)->progs.dumper, false);
+ err = cgroup_hierarchical_stats__attach(*skel);
+ if (!ASSERT_OK(err, "attach"))
+ return err;
+
+ return 0;
+}
+
+static void destroy_progs(struct cgroup_hierarchical_stats *skel)
+{
+ static char path[128];
+ int i;
+
+ for (i = 0; i < N_CGROUPS; i++) {
+ /* Delete files in bpffs that cgroup_iters are pinned in */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS,
+ cgroups[i].name);
+ ASSERT_OK(remove(path), "remove cgroup_iter pin");
+ }
+
+ /* Delete root file in bpffs */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, CG_ROOT_NAME);
+ ASSERT_OK(remove(path), "remove cgroup_iter root pin");
+ cgroup_hierarchical_stats__destroy(skel);
+}
+
+void test_cgroup_hierarchical_stats(void)
+{
+ struct cgroup_hierarchical_stats *skel = NULL;
+
+ if (setup_hierarchy())
+ goto hierarchy_cleanup;
+ if (setup_progs(&skel))
+ goto cleanup;
+ if (attach_processes())
+ goto cleanup;
+ check_attach_counters();
+cleanup:
+ destroy_progs(skel);
+hierarchy_cleanup:
+ destroy_hierarchy();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
new file mode 100644
index 000000000000..574d9a0cdc8e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "iters_css_task.skel.h"
+#include "cgroup_iter.skel.h"
+#include "cgroup_helpers.h"
+
+#define ROOT 0
+#define PARENT 1
+#define CHILD1 2
+#define CHILD2 3
+#define NUM_CGROUPS 4
+
+#define PROLOGUE "prologue\n"
+#define EPILOGUE "epilogue\n"
+
+static const char *cg_path[] = {
+ "/", "/parent", "/parent/child1", "/parent/child2"
+};
+
+static int cg_fd[] = {-1, -1, -1, -1};
+static unsigned long long cg_id[] = {0, 0, 0, 0};
+static char expected_output[64];
+
+static int setup_cgroups(void)
+{
+ int fd, i = 0;
+
+ for (i = 0; i < NUM_CGROUPS; i++) {
+ fd = create_and_get_cgroup(cg_path[i]);
+ if (fd < 0)
+ return fd;
+
+ cg_fd[i] = fd;
+ cg_id[i] = get_cgroup_id(cg_path[i]);
+ }
+ return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_CGROUPS; i++)
+ close(cg_fd[i]);
+}
+
+static void read_from_cgroup_iter(struct bpf_program *prog, int cgroup_fd,
+ int order, const char *testname)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+ int len, iter_fd;
+ static char buf[128];
+ size_t left;
+ char *p;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = order;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (iter_fd < 0)
+ goto free_link;
+
+ memset(buf, 0, sizeof(buf));
+ left = ARRAY_SIZE(buf);
+ p = buf;
+ while ((len = read(iter_fd, p, left)) > 0) {
+ p += len;
+ left -= len;
+ }
+
+ ASSERT_STREQ(buf, expected_output, testname);
+
+ /* read() after iter finishes should be ok. */
+ if (len == 0)
+ ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+}
+
+/* Invalid cgroup. */
+static void test_invalid_cgroup(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = (__u32)-1;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ ASSERT_ERR_PTR(link, "attach_iter");
+ bpf_link__destroy(link);
+}
+
+/* Specifying both cgroup_fd and cgroup_id is invalid. */
+static void test_invalid_cgroup_spec(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = (__u32)cg_fd[PARENT];
+ linfo.cgroup.cgroup_id = (__u64)cg_id[PARENT];
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ ASSERT_ERR_PTR(link, "attach_iter");
+ bpf_link__destroy(link);
+}
+
+/* Preorder walk prints parent and child in order. */
+static void test_walk_preorder(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+ cg_id[PARENT], cg_id[CHILD1], cg_id[CHILD2]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_PRE, "preorder");
+}
+
+/* Postorder walk prints child and parent in order. */
+static void test_walk_postorder(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+ cg_id[CHILD1], cg_id[CHILD2], cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_POST, "postorder");
+}
+
+/* Walking parents prints parent and then root. */
+static void test_walk_ancestors_up(struct cgroup_iter *skel)
+{
+ /* terminate the walk when ROOT is met. */
+ skel->bss->terminal_cgroup = cg_id[ROOT];
+
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n" EPILOGUE,
+ cg_id[PARENT], cg_id[ROOT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_ANCESTORS_UP, "ancestors_up");
+
+ skel->bss->terminal_cgroup = 0;
+}
+
+/* Early termination prints parent only. */
+static void test_early_termination(struct cgroup_iter *skel)
+{
+ /* terminate the walk after the first element is processed. */
+ skel->bss->terminate_early = 1;
+
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_PRE, "early_termination");
+
+ skel->bss->terminate_early = 0;
+}
+
+/* Waling self prints self only. */
+static void test_walk_self_only(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_SELF_ONLY, "self_only");
+}
+
+static void test_walk_dead_self_only(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ char expected_output[128], buf[128];
+ const char *cgrp_name = "/dead";
+ union bpf_iter_link_info linfo;
+ int len, cgrp_fd, iter_fd;
+ struct bpf_link *link;
+ size_t left;
+ char *p;
+
+ cgrp_fd = create_and_get_cgroup(cgrp_name);
+ if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
+ return;
+
+ /* The cgroup will be dead during read() iteration, so it only has
+ * epilogue in the output
+ */
+ snprintf(expected_output, sizeof(expected_output), EPILOGUE);
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgrp_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto close_cgrp;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto free_link;
+
+ /* Close link fd and cgroup fd */
+ bpf_link__destroy(link);
+ close(cgrp_fd);
+
+ /* Remove cgroup to mark it as dead */
+ remove_cgroup(cgrp_name);
+
+ /* Two kern_sync_rcu() and usleep() pairs are used to wait for the
+ * releases of cgroup css, and the last kern_sync_rcu() and usleep()
+ * pair is used to wait for the free of cgroup itself.
+ */
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(1000);
+
+ memset(buf, 0, sizeof(buf));
+ left = ARRAY_SIZE(buf);
+ p = buf;
+ while ((len = read(iter_fd, p, left)) > 0) {
+ p += len;
+ left -= len;
+ }
+
+ ASSERT_STREQ(buf, expected_output, "dead cgroup output");
+
+ /* read() after iter finishes should be ok. */
+ if (len == 0)
+ ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+ close(iter_fd);
+ return;
+free_link:
+ bpf_link__destroy(link);
+close_cgrp:
+ close(cgrp_fd);
+}
+
+static void test_walk_self_only_css_task(void)
+{
+ struct iters_css_task *skel;
+ int err;
+
+ skel = iters_css_task__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
+
+ err = iters_css_task__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = join_cgroup(cg_path[CHILD2]);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
+ BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+cleanup:
+ iters_css_task__destroy(skel);
+}
+
+void test_cgroup_iter(void)
+{
+ struct cgroup_iter *skel = NULL;
+
+ if (setup_cgroup_environment())
+ return;
+
+ if (setup_cgroups())
+ goto out;
+
+ skel = cgroup_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_iter__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("cgroup_iter__invalid_cgroup"))
+ test_invalid_cgroup(skel);
+ if (test__start_subtest("cgroup_iter__invalid_cgroup_spec"))
+ test_invalid_cgroup_spec(skel);
+ if (test__start_subtest("cgroup_iter__preorder"))
+ test_walk_preorder(skel);
+ if (test__start_subtest("cgroup_iter__postorder"))
+ test_walk_postorder(skel);
+ if (test__start_subtest("cgroup_iter__ancestors_up_walk"))
+ test_walk_ancestors_up(skel);
+ if (test__start_subtest("cgroup_iter__early_termination"))
+ test_early_termination(skel);
+ if (test__start_subtest("cgroup_iter__self_only"))
+ test_walk_self_only(skel);
+ if (test__start_subtest("cgroup_iter__dead_self_only"))
+ test_walk_dead_self_only(skel);
+ if (test__start_subtest("cgroup_iter__self_only_css_task"))
+ test_walk_self_only_css_task();
+
+out:
+ cgroup_iter__destroy(skel);
+ cleanup_cgroups();
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 4d9b514b3fd9..15093a69510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls)
return 0;
}
-void test_cgroup_link(void)
+void serial_test_cgroup_link(void)
{
struct {
const char *path;
@@ -54,7 +54,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
cgs[i].fd = create_and_get_cgroup(cgs[i].path);
- if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
goto cleanup;
}
@@ -65,17 +65,15 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
- if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
- i, PTR_ERR(links[i])))
+ if (!ASSERT_OK_PTR(links[i], "cg_attach"))
goto cleanup;
}
ping_and_check(cg_nr, 0);
- /* query the number of effective progs and attach flags in root cg */
+ /* query the number of attached progs and attach flags in root cg */
err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
- BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
- &prog_cnt);
+ 0, &attach_flags, NULL, &prog_cnt);
CHECK_FAIL(err);
CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
@@ -86,17 +84,15 @@ void test_cgroup_link(void)
BPF_F_QUERY_EFFECTIVE, NULL, NULL,
&prog_cnt);
CHECK_FAIL(err);
- CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
/* query the effective prog IDs in last cg */
err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
- BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt);
+ BPF_F_QUERY_EFFECTIVE, NULL, prog_ids,
+ &prog_cnt);
CHECK_FAIL(err);
- CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
@@ -121,8 +117,7 @@ void test_cgroup_link(void)
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
@@ -147,7 +142,7 @@ void test_cgroup_link(void)
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
@@ -165,8 +160,7 @@ void test_cgroup_link(void)
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr, 0);
@@ -249,8 +243,7 @@ cleanup:
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
- if (!IS_ERR(links[i]))
- bpf_link__destroy(links[i]);
+ bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
new file mode 100644
index 000000000000..bb60704a3ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_mprog.skel.h"
+
+static void assert_mprog_count(int cg, int atype, int expected)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(cg, atype, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static void test_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ optd.expected_revision = 5;
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ LIBBPF_OPTS_RESET(optd);
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ LIBBPF_OPTS(bpf_cgroup_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = id_from_link_fd(bpf_link__fd(link1)),
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "link_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, fd4;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/preorder_prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, -EINVAL, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ struct cgroup_mprog *skel;
+ __u32 fd2;
+ int cg;
+
+ cg = test__join_cgroup("/preorder_link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_ERR_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_invalid_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ __u32 fd1, fd2, id2;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/invalid_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ id2 = id_from_prog_fd(fd2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+void test_cgroup_mprog_opts(void)
+{
+ if (test__start_subtest("prog_attach_detach"))
+ test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("link_attach_detach"))
+ test_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_prog_attach_detach"))
+ test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_link_attach_detach"))
+ test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("invalid_attach_detach"))
+ test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
new file mode 100644
index 000000000000..a36d2e968bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_p_atype, prog_p2_atype;
+ int prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen = 1;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ LIBBPF_OPTS_RESET(opts);
+ opts.flags = BPF_F_ALLOW_MULTI;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE;
+ if (has_relative_fd)
+ opts.relative_fd = prog_p_fd;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ result = skel->bss->result;
+ ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_mprog_ordering(void)
+{
+ int cg_parent = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
new file mode 100644
index 000000000000..d4d583872fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype;
+ int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ buf = 0x00;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.child;
+ prog_c_fd = bpf_program__fd(prog);
+ prog_c_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.child_2;
+ prog_c2_fd = bpf_program__fd(prog);
+ prog_c2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2"))
+ goto detach_child;
+
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_child_2;
+
+ result = skel->bss->result;
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only");
+ else
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only");
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto detach_child_2;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2,
+ "parent and child");
+ else
+ ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3,
+ "parent and child");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+detach_child_2:
+ ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype),
+ "bpf_prog_detach2-child_2");
+detach_child:
+ ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype),
+ "bpf_prog_detach2-child");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_preorder(void)
+{
+ int cg_parent = -1, cg_child = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ cg_child = test__join_cgroup("/parent/child");
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_child);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e1a90c10db8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_skb_direct_packet_access.skel.h"
+
+void test_cgroup_skb_prog_run_direct_packet_access(void)
+{
+ int err;
+ struct cgroup_skb_direct_packet_access *skel;
+ char test_skb[64] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = test_skb,
+ .data_size_in = sizeof(test_skb),
+ );
+
+ skel = cgroup_skb_direct_packet_access__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_skb_direct_packet_access__open_and_load"))
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.direct_packet_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts err");
+ ASSERT_EQ(topts.retval, 1, "retval");
+
+ ASSERT_NEQ(skel->bss->data_end, 0, "data_end");
+
+ cgroup_skb_direct_packet_access__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 464edc1c1708..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
goto cleanup;
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
- if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "cgroup_attach"))
goto cleanup;
run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c
new file mode 100644
index 000000000000..cf395715ced4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_storage.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "cgroup_storage.skel.h"
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+#define TEST_NS "cgroup_storage_ns"
+#define PING_CMD "ping localhost -c 1 -W 1 -q"
+
+static int setup_network(struct nstoken **token)
+{
+ SYS(fail, "ip netns add %s", TEST_NS);
+ *token = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(*token, "open netns"))
+ goto cleanup_ns;
+ SYS(cleanup_ns, "ip link set lo up");
+
+ return 0;
+
+cleanup_ns:
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
+fail:
+ return -1;
+}
+
+static void cleanup_network(struct nstoken *ns)
+{
+ close_netns(ns);
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
+}
+
+void test_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_key key;
+ struct cgroup_storage *skel;
+ struct nstoken *ns = NULL;
+ unsigned long long value;
+ int cgroup_fd;
+ int err;
+
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (!ASSERT_OK_FD(cgroup_fd, "create cgroup"))
+ return;
+
+ if (!ASSERT_OK(setup_network(&ns), "setup network"))
+ goto cleanup_cgroup;
+
+ skel = cgroup_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "load program"))
+ goto cleanup_network;
+
+ skel->links.bpf_prog =
+ bpf_program__attach_cgroup(skel->progs.bpf_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_prog, "attach program"))
+ goto cleanup_progs;
+
+ /* Check that one out of every two packets is dropped */
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_OK(err, "first ping");
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_NEQ(err, 0, "second ping");
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_OK(err, "third ping");
+
+ err = bpf_map__get_next_key(skel->maps.cgroup_storage, NULL, &key,
+ sizeof(key));
+ if (!ASSERT_OK(err, "get first key"))
+ goto cleanup_progs;
+ err = bpf_map__lookup_elem(skel->maps.cgroup_storage, &key, sizeof(key),
+ &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "first packet count read"))
+ goto cleanup_progs;
+
+ /* Add one to the packet counter, check again packet filtering */
+ value++;
+ err = bpf_map__update_elem(skel->maps.cgroup_storage, &key, sizeof(key),
+ &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "increment packet counter"))
+ goto cleanup_progs;
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_OK(err, "fourth ping");
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_NEQ(err, 0, "fifth ping");
+ err = SYS_NOFAIL(PING_CMD);
+ ASSERT_OK(err, "sixth ping");
+
+cleanup_progs:
+ cgroup_storage__destroy(skel);
+cleanup_network:
+ cleanup_network(ns);
+cleanup_cgroup:
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..a1542faf7873
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "testing_helpers.h"
+#include "cgroup_tcp_skb.skel.h"
+#include "cgroup_tcp_skb.h"
+#include "network_helpers.h"
+
+#define CGROUP_TCP_SKB_PATH "/test_cgroup_tcp_skb"
+
+static int install_filters(int cgroup_fd,
+ struct bpf_link **egress_link,
+ struct bpf_link **ingress_link,
+ struct bpf_program *egress_prog,
+ struct bpf_program *ingress_prog,
+ struct cgroup_tcp_skb *skel)
+{
+ /* Prepare filters */
+ skel->bss->g_sock_state = 0;
+ skel->bss->g_unexpected = 0;
+ *egress_link =
+ bpf_program__attach_cgroup(egress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(egress_link, "egress_link"))
+ return -1;
+ *ingress_link =
+ bpf_program__attach_cgroup(ingress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(ingress_link, "ingress_link"))
+ return -1;
+
+ return 0;
+}
+
+static void uninstall_filters(struct bpf_link **egress_link,
+ struct bpf_link **ingress_link)
+{
+ bpf_link__destroy(*egress_link);
+ *egress_link = NULL;
+ bpf_link__destroy(*ingress_link);
+ *ingress_link = NULL;
+}
+
+static int create_client_sock_v6(void)
+{
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Connect to the server in a cgroup from the outside of the cgroup. */
+static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ int err, cp;
+ char buf[5];
+ int port;
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ port = get_socket_local_port(*listen_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+ skel->bss->g_sock_port = ntohs(port);
+
+ /* Connect client to server */
+ err = connect_fd_to_fd(*client_fd, *listen_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+/* Connect to the server out of a cgroup from inside the cgroup. */
+static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+
+{
+ int err, cp;
+ char buf[5];
+ int port;
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ port = get_socket_local_port(*listen_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+ skel->bss->g_sock_port = ntohs(port);
+
+ /* Connect client to server */
+ err = connect_fd_to_fd(*client_fd, *listen_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+static int close_connection(int *closing_fd, int *peer_fd, int *listen_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ __u32 saved_packet_count = 0;
+ int err;
+ int i;
+
+ /* Wait for ACKs to be sent */
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ for (i = 0;
+ skel->bss->g_packet_count != saved_packet_count && i < 10;
+ i++) {
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ }
+ if (!ASSERT_EQ(skel->bss->g_packet_count, saved_packet_count,
+ "packet_count"))
+ return -1;
+
+ skel->bss->g_packet_count = 0;
+ saved_packet_count = 0;
+
+ /* Half shutdown to make sure the closing socket having a chance to
+ * receive a FIN from the peer.
+ */
+ err = shutdown(*closing_fd, SHUT_WR);
+ if (!ASSERT_OK(err, "shutdown closing_fd"))
+ return -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ saved_packet_count = skel->bss->g_packet_count;
+
+ /* Fully shutdown the connection */
+ err = close(*peer_fd);
+ if (!ASSERT_OK(err, "close peer_fd"))
+ return -1;
+ *peer_fd = -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ err = close(*closing_fd);
+ if (!ASSERT_OK(err, "close closing_fd"))
+ return -1;
+ *closing_fd = -1;
+
+ close(*listen_fd);
+ *listen_fd = -1;
+
+ return 0;
+}
+
+/* This test case includes four scenarios:
+ * 1. Connect to the server from outside the cgroup and close the connection
+ * from outside the cgroup.
+ * 2. Connect to the server from outside the cgroup and close the connection
+ * from inside the cgroup.
+ * 3. Connect to the server from inside the cgroup and close the connection
+ * from outside the cgroup.
+ * 4. Connect to the server from inside the cgroup and close the connection
+ * from inside the cgroup.
+ *
+ * The test case is to verify that cgroup_skb/{egress,ingress} filters
+ * receive expected packets including SYN, SYN/ACK, ACK, FIN, and FIN/ACK.
+ */
+void test_cgroup_tcp_skb(void)
+{
+ struct bpf_link *ingress_link = NULL;
+ struct bpf_link *egress_link = NULL;
+ int client_fd = -1, listen_fd = -1;
+ struct cgroup_tcp_skb *skel;
+ int service_fd = -1;
+ int cgroup_fd = -1;
+ int err;
+
+ skel = cgroup_tcp_skb__open_and_load();
+ if (!ASSERT_OK(!skel, "skel_open_load"))
+ return;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+
+ cgroup_fd = create_and_get_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ goto cleanup;
+
+ /* Scenario 1 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress,
+ skel->progs.server_ingress,
+ skel);
+ if (!ASSERT_OK(err, "install_filters"))
+ goto cleanup;
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 2 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress_srv,
+ skel->progs.server_ingress_srv,
+ skel);
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 3 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress_srv,
+ skel->progs.client_ingress_srv,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 4 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress,
+ skel->progs.client_ingress,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+cleanup:
+ close(client_fd);
+ close(listen_fd);
+ close(service_fd);
+ close(cgroup_fd);
+ bpf_link__destroy(egress_link);
+ bpf_link__destroy(ingress_link);
+ cleanup_cgroup_environment();
+ cgroup_tcp_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
new file mode 100644
index 000000000000..37c1cc52ed98
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "connect4_dropper.skel.h"
+
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int run_test(int cgroup_fd, int server_fd, bool classid)
+{
+ struct connect4_dropper *skel;
+ int fd, err = 0, port;
+
+ skel = connect4_dropper__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -1;
+
+ port = get_socket_local_port(server_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+
+ skel->bss->port = ntohs(port);
+
+ skel->links.connect_v4_dropper =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
+ err = -1;
+ goto out;
+ }
+
+ if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
+ err = -1;
+ goto out;
+ }
+
+ errno = 0;
+ fd = connect_to_fd_opts(server_fd, NULL);
+ if (fd >= 0) {
+ log_err("Unexpected success to connect to server");
+ err = -1;
+ close(fd);
+ } else if (errno != EPERM) {
+ log_err("Unexpected errno from connect to server");
+ err = -1;
+ }
+out:
+ connect4_dropper__destroy(skel);
+ return err;
+}
+
+void test_cgroup_v1v2(void)
+{
+ struct network_helper_opts opts = {};
+ int server_fd, client_fd, cgroup_fd;
+
+ /* Step 1: Check base connectivity works without any BPF. */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd"))
+ return;
+ client_fd = connect_to_fd_opts(server_fd, &opts);
+ if (!ASSERT_GE(client_fd, 0, "client_fd")) {
+ close(server_fd);
+ return;
+ }
+ close(client_fd);
+ close(server_fd);
+
+ /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
+ cgroup_fd = test__join_cgroup("/connect_dropper");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd")) {
+ close(cgroup_fd);
+ return;
+ }
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
+ setup_classid_environment();
+ set_classid();
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
+ cleanup_classid_environment();
+ close(server_fd);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
new file mode 100644
index 000000000000..5ad904e9d15d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "read_cgroupfs_xattr.skel.h"
+#include "cgroup_read_xattr.skel.h"
+
+#define CGROUP_FS_PARENT "foo/"
+#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/"
+#define TMP_FILE "/tmp/selftests_cgroup_xattr"
+
+static const char xattr_value_a[] = "bpf_selftest_value_a";
+static const char xattr_value_b[] = "bpf_selftest_value_b";
+static const char xattr_name[] = "user.bpf_test";
+
+static void test_read_cgroup_xattr(void)
+{
+ int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1;
+ struct read_cgroupfs_xattr *skel = NULL;
+
+ parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT);
+ if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup"))
+ return;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a),
+ "set parent xattr"))
+ goto out;
+
+ child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD);
+ if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup"))
+ goto out;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b),
+ "set child xattr"))
+ goto out;
+
+ skel = read_cgroupfs_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->target_pid = sys_gettid();
+
+ if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
+ goto out;
+
+ tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT);
+ ASSERT_OK_FD(tmp_fd, "open tmp file");
+ close(tmp_fd);
+
+ ASSERT_TRUE(skel->bss->found_value_a, "found_value_a");
+ ASSERT_TRUE(skel->bss->found_value_b, "found_value_b");
+
+out:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+ read_cgroupfs_xattr__destroy(skel);
+ unlink(TMP_FILE);
+}
+
+void test_cgroup_xattr(void)
+{
+ RUN_TESTS(cgroup_read_xattr);
+
+ if (test__start_subtest("read_cgroupfs_xattr"))
+ test_read_cgroup_xattr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
new file mode 100644
index 000000000000..4b42fbc96efc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <cgroup_helpers.h>
+#include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
+
+#include "cgrp_kfunc_failure.skel.h"
+#include "cgrp_kfunc_success.skel.h"
+
+static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void)
+{
+ struct cgrp_kfunc_success *skel;
+ int err;
+
+ skel = cgrp_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = cgrp_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static int mkdir_rm_test_dir(void)
+{
+ int fd;
+ const char *cgrp_path = "cgrp_kfunc";
+
+ fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd"))
+ return -1;
+
+ close(fd);
+ remove_cgroup(cgrp_path);
+
+ return 0;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count");
+ if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count");
+ ASSERT_OK(skel->bss->err, "post_rmdir_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ cgrp_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_cgrp_acquire_release_argument",
+ "test_cgrp_acquire_leave_in_map",
+ "test_cgrp_xchg_release",
+ "test_cgrp_get_release",
+ "test_cgrp_get_ancestors",
+ "test_cgrp_from_id",
+};
+
+static void test_cgrp_from_id_ns(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ int pid, pipe_fd[2];
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = skel->progs.test_cgrp_from_id_ns;
+
+ if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork result")) {
+ close(pipe_fd[0]);
+ close(pipe_fd[1]);
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ int ret = 0;
+
+ close(pipe_fd[0]);
+
+ if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+ exit(1);
+
+ if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+ exit(1);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "test run ret"))
+ exit(1);
+
+ if (!ASSERT_OK(opts.retval, "test run retval"))
+ exit(1);
+
+ if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+ exit(1);
+
+ exit(0);
+ } else {
+ int res;
+
+ close(pipe_fd[1]);
+
+ ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+ ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+ remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+ ASSERT_OK(res, "result from run");
+ }
+
+ close(pipe_fd[0]);
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+}
+
+void test_cgrp_kfunc(void)
+{
+ int i, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "cgrp_env_setup"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ if (test__start_subtest("test_cgrp_from_id_ns"))
+ test_cgrp_from_id_ns();
+
+ RUN_TESTS(cgrp_kfunc_failure);
+
+cleanup:
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
new file mode 100644
index 000000000000..9015e2c2ab12
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include "cgrp_ls_tp_btf.skel.h"
+#include "cgrp_ls_recursion.skel.h"
+#include "cgrp_ls_attach_cgroup.skel.h"
+#include "cgrp_ls_negative.skel.h"
+#include "cgrp_ls_sleepable.skel.h"
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+
+struct socket_cookie {
+ __u64 cookie_key;
+ __u64 cookie_value;
+};
+
+static bool is_cgroup1;
+static int target_hid;
+
+#define CGROUP_MODE_SET(skel) \
+{ \
+ skel->bss->is_cgroup1 = is_cgroup1; \
+ skel->bss->target_hid = target_hid; \
+}
+
+static void cgroup_mode_value_init(bool cgroup, int hid)
+{
+ is_cgroup1 = cgroup;
+ target_hid = hid;
+}
+
+static void test_tp_btf(int cgroup_fd)
+{
+ struct cgrp_ls_tp_btf *skel;
+ long val1 = 1, val2 = 0;
+ int err;
+
+ skel = cgrp_ls_tp_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ /* populate a value in map_b */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update_elem"))
+ goto out;
+
+ /* check value */
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2);
+ if (!ASSERT_OK(err, "map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val"))
+ goto out;
+
+ /* delete value */
+ err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd);
+ if (!ASSERT_OK(err, "map_delete_elem"))
+ goto out;
+
+ skel->bss->target_pid = sys_gettid();
+
+ err = cgrp_ls_tp_btf__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ sys_gettid();
+ sys_gettid();
+
+ skel->bss->target_pid = 0;
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ cgrp_ls_tp_btf__destroy(skel);
+}
+
+static void test_attach_cgroup(int cgroup_fd)
+{
+ int server_fd = 0, client_fd = 0, err = 0;
+ socklen_t addr_len = sizeof(struct sockaddr_in6);
+ struct cgrp_ls_attach_cgroup *skel;
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ struct socket_cookie val;
+
+ skel = cgrp_ls_attach_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->links.set_cookie = bpf_program__attach_cgroup(
+ skel->progs.set_cookie, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach"))
+ goto out;
+
+ skel->links.update_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.update_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach"))
+ goto out;
+
+ skel->links.update_cookie_tracing = bpf_program__attach(
+ skel->progs.update_cookie_tracing);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach"))
+ goto out;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto close_server_fd;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies),
+ &cgroup_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(socket_cookies)"))
+ goto close_client_fd;
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close_client_fd;
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value");
+
+close_client_fd:
+ close(client_fd);
+close_server_fd:
+ close(server_fd);
+out:
+ cgrp_ls_attach_cgroup__destroy(skel);
+}
+
+static void test_recursion(int cgroup_fd)
+{
+ struct cgrp_ls_recursion *skel;
+ int err;
+
+ skel = cgrp_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ err = cgrp_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ sys_gettid();
+
+out:
+ cgrp_ls_recursion__destroy(skel);
+}
+
+static void test_negative(void)
+{
+ struct cgrp_ls_negative *skel;
+
+ skel = cgrp_ls_negative__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) {
+ cgrp_ls_negative__destroy(skel);
+ return;
+ }
+}
+
+static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct cgrp_ls_sleepable *skel;
+ struct bpf_link *link;
+ int err, iter_fd;
+ char buf[16];
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.cgroup_iter, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto out;
+
+ /* trigger the program run */
+ (void)read(iter_fd, buf, sizeof(buf));
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+
+ close(iter_fd);
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_yes_rcu_lock(__u64 cgroup_id)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+ skel->bss->target_pid = sys_gettid();
+
+ bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = cgrp_ls_sleepable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_no_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_ERR(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_cgrp1_no_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_OK(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void cgrp2_local_storage(void)
+{
+ __u64 cgroup_id;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/cgrp_local_storage");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
+ return;
+
+ cgroup_mode_value_init(0, -1);
+
+ cgroup_id = get_cgroup_id("/cgrp_local_storage");
+ if (test__start_subtest("tp_btf"))
+ test_tp_btf(cgroup_fd);
+ if (test__start_subtest("attach_cgroup"))
+ test_attach_cgroup(cgroup_fd);
+ if (test__start_subtest("recursion"))
+ test_recursion(cgroup_fd);
+ if (test__start_subtest("negative"))
+ test_negative();
+ if (test__start_subtest("cgroup_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+ if (test__start_subtest("yes_rcu_lock"))
+ test_yes_rcu_lock(cgroup_id);
+ if (test__start_subtest("no_rcu_lock"))
+ test_no_rcu_lock();
+
+ close(cgroup_fd);
+}
+
+static void cgrp1_local_storage(void)
+{
+ int cgrp1_fd, cgrp1_hid, cgrp1_id, err;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ return;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ cgrp1_fd = open_classid();
+ if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd"))
+ goto cleanup;
+
+ cgrp1_id = get_classid_cgroup_id();
+ if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id"))
+ goto close_fd;
+
+ cgrp1_hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid"))
+ goto close_fd;
+
+ cgroup_mode_value_init(1, cgrp1_hid);
+
+ if (test__start_subtest("cgrp1_tp_btf"))
+ test_tp_btf(cgrp1_fd);
+ if (test__start_subtest("cgrp1_recursion"))
+ test_recursion(cgrp1_fd);
+ if (test__start_subtest("cgrp1_negative"))
+ test_negative();
+ if (test__start_subtest("cgrp1_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id);
+ if (test__start_subtest("cgrp1_yes_rcu_lock"))
+ test_yes_rcu_lock(cgrp1_id);
+ if (test__start_subtest("cgrp1_no_rcu_lock"))
+ test_cgrp1_no_rcu_lock();
+
+close_fd:
+ close(cgrp1_fd);
+cleanup:
+ cleanup_classid_environment();
+}
+
+void test_cgrp_local_storage(void)
+{
+ cgrp2_local_storage();
+ cgrp1_local_storage();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 36af1c138faf..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,13 +53,13 @@ static void test_check_mtu_xdp_attach(void)
prog = skel->progs.xdp_use_helper_basic;
link = bpf_program__attach_xdp(prog, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto out;
skel->links.xdp_use_helper_basic = link;
memset(&link_info, 0, sizeof(link_info));
fd = bpf_link__fd(link);
- err = bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
if (CHECK(err, "link_info", "failed: %d\n", err))
goto out;
@@ -79,28 +79,21 @@ static void test_check_mtu_run_xdp(struct test_check_mtu *skel,
struct bpf_program *prog,
__u32 mtu_expect)
{
- const char *prog_name = bpf_program__name(prog);
int retval_expect = XDP_PASS;
__u32 mtu_result = 0;
char buf[256] = {};
- int err;
- struct bpf_prog_test_run_attr tattr = {
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.repeat = 1,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
- .prog_fd = bpf_program__fd(prog),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0, "bpf_prog_test_run",
- "prog_name:%s (err %d errno %d retval %d)\n",
- prog_name, err, errno, tattr.retval);
+ );
- CHECK(tattr.retval != retval_expect, "retval",
- "progname:%s unexpected retval=%d expected=%d\n",
- prog_name, tattr.retval, retval_expect);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
/* Extract MTU that BPF-prog got */
mtu_result = skel->bss->global_bpf_mtu_xdp;
@@ -128,6 +121,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex)
test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu);
cleanup:
test_check_mtu__destroy(skel);
@@ -137,34 +132,47 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
struct bpf_program *prog,
__u32 mtu_expect)
{
- const char *prog_name = bpf_program__name(prog);
int retval_expect = BPF_OK;
__u32 mtu_result = 0;
char buf[256] = {};
- int err;
- struct bpf_prog_test_run_attr tattr = {
- .repeat = 1,
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
- .prog_fd = bpf_program__fd(prog),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0, "bpf_prog_test_run",
- "prog_name:%s (err %d errno %d retval %d)\n",
- prog_name, err, errno, tattr.retval);
+ .repeat = 1,
+ );
- CHECK(tattr.retval != retval_expect, "retval",
- "progname:%s unexpected retval=%d expected=%d\n",
- prog_name, tattr.retval, retval_expect);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
/* Extract MTU that BPF-prog got */
mtu_result = skel->bss->global_bpf_mtu_tc;
ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
}
+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+ int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+ struct __sk_buff skb = {
+ .gso_size = 10,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ /* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+ SYS_NOFAIL("ip link set dev lo mtu 10");
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}
static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
{
@@ -187,13 +195,16 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+ test_chk_segs_flag(skel, mtu);
cleanup:
test_check_mtu__destroy(skel);
}
-void test_check_mtu(void)
+void test_ns_check_mtu(void)
{
- __u32 mtu_lo;
+ int mtu_lo;
if (test__start_subtest("bpf_check_mtu XDP-attach"))
test_check_mtu_xdp_attach();
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index e075d03ab630..7488a7606e6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -10,9 +10,11 @@
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_dynptr.skel.h"
#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
@@ -20,112 +22,37 @@
static int duration = 0;
-struct addr_port {
- in_port_t port;
- union {
- struct in_addr in_addr;
- struct in6_addr in6_addr;
- };
-};
-
-struct tuple {
- int family;
- struct addr_port src;
- struct addr_port dst;
-};
-
-static int start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto err;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
-static int connect_to_server(const struct sockaddr *addr, socklen_t len,
- int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
-static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
-{
- const struct sockaddr_in6 *in6;
- const struct sockaddr_in *in;
-
- switch (sa->sa_family) {
- case AF_INET:
- in = (const struct sockaddr_in *)sa;
- ap->in_addr = in->sin_addr;
- ap->port = in->sin_port;
- return true;
-
- case AF_INET6:
- in6 = (const struct sockaddr_in6 *)sa;
- ap->in6_addr = in6->sin6_addr;
- ap->port = in6->sin6_port;
- return true;
-
- default:
- return false;
- }
-}
-static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
- int *server, int *conn, struct tuple *tuple)
+static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
+ int *server, int *conn,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
{
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
- struct sockaddr *sa = (struct sockaddr *)&ss;
- *server = start_server(addr, len, type);
+ *server = start_server_addr(type, addr, len, NULL);
if (*server < 0)
return false;
- if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+ if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
goto close_server;
- *conn = connect_to_server(sa, slen, type);
+ *conn = connect_to_addr(type, &ss, slen, NULL);
if (*conn < 0)
goto close_server;
/* We want to simulate packets arriving at conn, so we have to
* swap src and dst.
*/
- slen = sizeof(ss);
- if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
+ slen = sizeof(*dst);
+ if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
goto close_conn;
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+ slen = sizeof(*src);
+ if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
goto close_conn;
- slen = sizeof(ss);
- if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
- goto close_conn;
-
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
- goto close_conn;
-
- tuple->family = ss.ss_family;
return true;
close_conn:
@@ -141,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
{
struct sockaddr_in *addr4;
struct sockaddr_in6 *addr6;
+ memset(addr, 0, sizeof(*addr));
switch (family) {
case AF_INET:
addr4 = (struct sockaddr_in *)addr;
- memset(addr4, 0, sizeof(*addr4));
addr4->sin_family = family;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
return sizeof(*addr4);
case AF_INET6:
addr6 = (struct sockaddr_in6 *)addr;
- memset(addr6, 0, sizeof(*addr6));
addr6->sin6_family = family;
addr6->sin6_addr = in6addr_loopback;
return sizeof(*addr6);
@@ -161,7 +87,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
}
}
-static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+static bool was_decapsulated(struct bpf_test_run_opts *tattr)
{
return tattr->data_size_out < tattr->data_size_in;
}
@@ -273,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
}
static size_t build_input(const struct test_cfg *test, void *const buf,
- const struct tuple *tuple)
+ const struct sockaddr_storage *src,
+ const struct sockaddr_storage *dst)
{
- in_port_t sport = tuple->src.port;
+ struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
+ struct sockaddr_in *src_in = (struct sockaddr_in *)src;
+ struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
+ sa_family_t family = src->ss_family;
+ in_port_t sport, dport;
encap_headers_t encap;
struct iphdr ip;
struct ipv6hdr ipv6;
@@ -285,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
uint8_t *p = buf;
int proto;
+ sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
+ dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
+
proto = IPPROTO_IPIP;
- if (tuple->family == AF_INET6)
+ if (family == AF_INET6)
proto = IPPROTO_IPV6;
encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
@@ -301,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
if (test->type == UDP)
proto = IPPROTO_UDP;
- switch (tuple->family) {
+ switch (family) {
case AF_INET:
ip = (struct iphdr){
.ihl = 5,
.version = 4,
.ttl = IPDEFTTL,
.protocol = proto,
- .saddr = tuple->src.in_addr.s_addr,
- .daddr = tuple->dst.in_addr.s_addr,
+ .saddr = src_in->sin_addr.s_addr,
+ .daddr = dst_in->sin_addr.s_addr,
};
p = mempcpy(p, &ip, sizeof(ip));
break;
@@ -318,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
.version = 6,
.hop_limit = IPDEFTTL,
.nexthdr = proto,
- .saddr = tuple->src.in6_addr,
- .daddr = tuple->dst.in6_addr,
+ .saddr = src_in6->sin6_addr,
+ .daddr = dst_in6->sin6_addr,
};
p = mempcpy(p, &ipv6, sizeof(ipv6));
break;
@@ -334,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
case TCP:
tcp = (struct tcphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
+ .syn = (test->flags == SYN),
+ .ack = (test->flags == ACK),
};
- if (test->flags == SYN)
- tcp.syn = true;
- if (test->flags == ACK)
- tcp.ack = true;
p = mempcpy(p, &tcp, sizeof(tcp));
break;
case UDP:
udp = (struct udphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
};
p = mempcpy(p, &udp, sizeof(udp));
break;
@@ -367,43 +300,43 @@ static void close_fds(int *fds, int n)
static void test_cls_redirect_common(struct bpf_program *prog)
{
- struct bpf_prog_test_run_attr tattr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
- struct sockaddr *addr;
socklen_t slen;
- int i, j, err;
+ int i, j, err, prog_fd;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
- struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
- addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
if (CHECK_FAIL(!slen))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
&servers[UDP][i], &conns[UDP][i],
- &tuples[UDP][i])))
+ &srcs[UDP][i], &dsts[UDP][i])))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
&servers[TCP][i], &conns[TCP][i],
- &tuples[TCP][i])))
+ &srcs[TCP][i], &dsts[TCP][i])))
goto cleanup;
}
- tattr.prog_fd = bpf_program__fd(prog);
+ prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
for (j = 0; j < ARRAY_SIZE(families); j++) {
- struct tuple *tuple = &tuples[test->type][j];
+ struct sockaddr_storage *src = &srcs[test->type][j];
+ struct sockaddr_storage *dst = &dsts[test->type][j];
char input[256];
char tmp[256];
- test_str(tmp, sizeof(tmp), test, tuple->family);
+ test_str(tmp, sizeof(tmp), test, families[j]);
if (!test__start_subtest(tmp))
continue;
@@ -411,11 +344,11 @@ static void test_cls_redirect_common(struct bpf_program *prog)
tattr.data_size_out = sizeof(tmp);
tattr.data_in = input;
- tattr.data_size_in = build_input(test, input, tuple);
+ tattr.data_size_in = build_input(test, input, src, dst);
if (CHECK_FAIL(!tattr.data_size_in))
continue;
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
if (CHECK_FAIL(err))
continue;
@@ -446,6 +379,28 @@ cleanup:
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+static void test_cls_redirect_dynptr(void)
+{
+ struct test_cls_redirect_dynptr *skel;
+ int err;
+
+ skel = test_cls_redirect_dynptr__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_dynptr__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_dynptr__destroy(skel);
+}
+
static void test_cls_redirect_inlined(void)
{
struct test_cls_redirect *skel;
@@ -496,4 +451,6 @@ void test_cls_redirect(void)
test_cls_redirect_inlined();
if (test__start_subtest("cls_redirect_subprogs"))
test_cls_redirect_subprogs();
+ if (test__start_subtest("cls_redirect_dynptr"))
+ test_cls_redirect_dynptr();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
new file mode 100644
index 000000000000..285f20241fe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "compute_live_registers.skel.h"
+#include "test_progs.h"
+
+void test_compute_live_registers(void)
+{
+ RUN_TESTS(compute_live_registers);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 9229db2f5ca5..24d553109f8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -51,24 +51,25 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
bool v4 = family == AF_INET;
__u16 expected_local_port = v4 ? 22222 : 22223;
__u16 expected_peer_port = 60000;
- struct bpf_prog_load_attr attr = {
- .file = v4 ? "./connect_force_port4.o" :
- "./connect_force_port6.o",
- };
struct bpf_program *prog;
struct bpf_object *obj;
- int xlate_fd, fd, err;
+ const char *obj_file = v4 ? "connect_force_port4.bpf.o" : "connect_force_port6.bpf.o";
+ int fd, err;
__u32 duration = 0;
- err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "bpf_obj_open"))
return -1;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "bpf_obj_load")) {
+ err = -EIO;
+ goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/connect4" :
- "cgroup/connect6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "connect4" :
+ "connect6");
if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -82,9 +83,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getpeername4" :
- "cgroup/getpeername6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getpeername4" :
+ "getpeername6");
if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -98,9 +99,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getsockname4" :
- "cgroup/getsockname6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getsockname4" :
+ "getsockname6");
if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
err = -EIO;
goto close_bpf_object;
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
new file mode 100644
index 000000000000..40fe571f2fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#define _GNU_SOURCE
+#include <sys/mount.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "connect_ping.skel.h"
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+static const struct in6_addr bindaddr_v6 = BINDADDR_V6;
+
+static void subtest(int cgroup_fd, struct connect_ping *skel,
+ int family, int do_bind)
+{
+ struct sockaddr_in sa4 = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct sockaddr_in6 sa6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+ };
+ struct sockaddr *sa = NULL;
+ socklen_t sa_len;
+ int protocol = -1;
+ int sock_fd;
+
+ switch (family) {
+ case AF_INET:
+ sa = (struct sockaddr *)&sa4;
+ sa_len = sizeof(sa4);
+ protocol = IPPROTO_ICMP;
+ break;
+ case AF_INET6:
+ sa = (struct sockaddr *)&sa6;
+ sa_len = sizeof(sa6);
+ protocol = IPPROTO_ICMPV6;
+ break;
+ }
+
+ memset(skel->bss, 0, sizeof(*skel->bss));
+ skel->bss->do_bind = do_bind;
+
+ sock_fd = socket(family, SOCK_DGRAM, protocol);
+ if (!ASSERT_GE(sock_fd, 0, "sock-create"))
+ return;
+
+ if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect"))
+ goto close_sock;
+
+ if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0,
+ "invocations_v4"))
+ goto close_sock;
+ if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0,
+ "invocations_v6"))
+ goto close_sock;
+ if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error"))
+ goto close_sock;
+
+ if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len),
+ "getsockname"))
+ goto close_sock;
+
+ switch (family) {
+ case AF_INET:
+ if (!ASSERT_EQ(sa4.sin_family, family, "sin_family"))
+ goto close_sock;
+ if (!ASSERT_EQ(sa4.sin_addr.s_addr,
+ htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK),
+ "sin_addr"))
+ goto close_sock;
+ break;
+ case AF_INET6:
+ if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family"))
+ goto close_sock;
+ if (!ASSERT_EQ(memcmp(&sa6.sin6_addr,
+ do_bind ? &bindaddr_v6 : &in6addr_loopback,
+ sizeof(sa6.sin6_addr)),
+ 0, "sin6_addr"))
+ goto close_sock;
+ break;
+ }
+
+close_sock:
+ close(sock_fd);
+}
+
+void test_connect_ping(void)
+{
+ struct connect_ping *skel;
+ int cgroup_fd;
+
+ if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare"))
+ return;
+
+ /* overmount sysfs, and making original sysfs private so overmount
+ * does not propagate to other mntns.
+ */
+ if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL),
+ "remount-private-sys"))
+ return;
+ if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL),
+ "mount-sys"))
+ return;
+ if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL),
+ "mount-bpf"))
+ goto clean_mount;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up"))
+ goto clean_mount;
+ if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4"))
+ goto clean_mount;
+ if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6"))
+ goto clean_mount;
+ if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"))
+ goto clean_mount;
+
+ cgroup_fd = test__join_cgroup("/connect_ping");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto clean_mount;
+
+ skel = connect_ping__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel-load"))
+ goto close_cgroup;
+ skel->links.connect_v4_prog =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4"))
+ goto skel_destroy;
+ skel->links.connect_v6_prog =
+ bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6"))
+ goto skel_destroy;
+
+ /* Connect a v4 ping socket to localhost, assert that only v4 is called,
+ * and called exactly once, and that the socket's bound address is
+ * original loopback address.
+ */
+ if (test__start_subtest("ipv4"))
+ subtest(cgroup_fd, skel, AF_INET, 0);
+
+ /* Connect a v4 ping socket to localhost, assert that only v4 is called,
+ * and called exactly once, and that the socket's bound address is
+ * address we explicitly bound.
+ */
+ if (test__start_subtest("ipv4-bind"))
+ subtest(cgroup_fd, skel, AF_INET, 1);
+
+ /* Connect a v6 ping socket to localhost, assert that only v6 is called,
+ * and called exactly once, and that the socket's bound address is
+ * original loopback address.
+ */
+ if (test__start_subtest("ipv6"))
+ subtest(cgroup_fd, skel, AF_INET6, 0);
+
+ /* Connect a v6 ping socket to localhost, assert that only v6 is called,
+ * and called exactly once, and that the socket's bound address is
+ * address we explicitly bound.
+ */
+ if (test__start_subtest("ipv6-bind"))
+ subtest(cgroup_fd, skel, AF_INET6, 1);
+
+skel_destroy:
+ connect_ping__destroy(skel);
+
+close_cgroup:
+ close(cgroup_fd);
+
+clean_mount:
+ umount2("/sys", MNT_DETACH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 981c251453d9..f2ce4fd1cdae 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -53,8 +53,8 @@ void test_core_autosize(void)
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
int err, fd = -1, zero = 0;
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct test_core_autosize* skel = NULL;
- struct bpf_object_load_attr load_attr = {};
struct bpf_program *prog;
struct bpf_map *bss_map;
struct btf *btf = NULL;
@@ -112,7 +112,7 @@ void test_core_autosize(void)
if (!ASSERT_OK_PTR(f, "btf_fdopen"))
goto cleanup;
- raw_data = btf__get_raw_data(btf, &raw_sz);
+ raw_data = btf__raw_data(btf, &raw_sz);
if (!ASSERT_OK_PTR(raw_data, "raw_data"))
goto cleanup;
written = fwrite(raw_data, 1, raw_sz, f);
@@ -125,9 +125,10 @@ void test_core_autosize(void)
fd = -1;
/* open and load BPF program with custom BTF as the kernel BTF */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
/* disable handle_signed() for now */
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
goto cleanup;
bpf_program__set_autoload(prog, false);
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(skel->obj);
if (!ASSERT_OK(err, "prog_load"))
goto cleanup;
@@ -164,11 +163,11 @@ void test_core_autosize(void)
usleep(1);
- bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
+ bss_map = bpf_object__find_map_by_name(skel->obj, ".bss");
if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
goto cleanup;
- err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+ err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0);
if (!ASSERT_OK(err, "bss_lookup"))
goto cleanup;
@@ -204,14 +203,13 @@ void test_core_autosize(void)
skel = NULL;
/* now re-load with handle_signed() enabled, it should fail loading */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
- if (!ASSERT_ERR(err, "bad_prog_load"))
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index 1931a158510e..63a51e9f3630 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -39,6 +39,7 @@ static struct test_case {
"CONFIG_STR=\"abracad\"\n"
"CONFIG_MISSING=0",
.data = {
+ .unkn_virt_val = 0,
.bpf_syscall = false,
.tristate_val = TRI_MODULE,
.bool_val = true,
@@ -121,7 +122,7 @@ static struct test_case {
void test_core_extern(void)
{
const uint32_t kern_ver = get_kernel_version();
- int err, duration = 0, i, j;
+ int err, i, j;
struct test_core_extern *skel = NULL;
uint64_t *got, *exp;
int n = sizeof(*skel->data) / sizeof(uint64_t);
@@ -136,19 +137,17 @@ void test_core_extern(void)
continue;
skel = test_core_extern__open_opts(&opts);
- if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
err = test_core_extern__load(skel);
if (t->fails) {
- CHECK(!err, "skel_load",
- "shouldn't succeed open/load of skeleton\n");
+ ASSERT_ERR(err, "skel_load_should_fail");
goto cleanup;
- } else if (CHECK(err, "skel_load",
- "failed to open/load skeleton\n")) {
+ } else if (!ASSERT_OK(err, "skel_load")) {
goto cleanup;
}
err = test_core_extern__attach(skel);
- if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
+ if (!ASSERT_OK(err, "attach_raw_tp"))
goto cleanup;
usleep(1);
@@ -158,9 +157,7 @@ void test_core_extern(void)
got = (uint64_t *)skel->data;
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
- CHECK(got[j] != exp[j], "check_res",
- "result #%d: expected %llx, but got %llx\n",
- j, (__u64)exp[j], (__u64)got[j]);
+ ASSERT_EQ(got[j], exp[j], "result");
}
cleanup:
test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
new file mode 100644
index 000000000000..6a5a1c019a5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "core_kern.lskel.h"
+
+void test_core_kern_lskel(void)
+{
+ struct core_kern_lskel *skel;
+ int link_fd;
+
+ skel = core_kern_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link_fd = core_kern_lskel__core_relo_proto__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+ ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists");
+ ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists");
+ ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested");
+
+cleanup:
+ core_kern_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
new file mode 100644
index 000000000000..04cc145bc26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "core_kern_overflow.lskel.h"
+
+void test_core_kern_overflow_lskel(void)
+{
+ struct core_kern_overflow_lskel *skel;
+
+ skel = core_kern_overflow_lskel__open_and_load();
+ if (!ASSERT_NULL(skel, "open_and_load"))
+ core_kern_overflow_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 06eb956ff7bb..08963c82f30b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
#include <test_progs.h>
#include "progs/core_reloc_types.h"
-#include "bpf_testmod/bpf_testmod.h"
+#include "test_kmods/bpf_testmod.h"
+#include <linux/limits.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <bpf/btf.h>
@@ -10,9 +12,9 @@ static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
-#define MODULES_CASE(name, sec_name, tp_name) { \
+#define MODULES_CASE(name, pg_name, tp_name) { \
.case_name = name, \
- .bpf_obj_file = "test_core_reloc_module.o", \
+ .bpf_obj_file = "test_core_reloc_module.bpf.o", \
.btf_src_file = NULL, /* find in kernel module BTFs */ \
.input = "", \
.input_len = 0, \
@@ -28,9 +30,9 @@ static int duration = 0;
.comm_len = sizeof("test_progs"), \
}, \
.output_len = sizeof(struct core_reloc_module_output), \
- .prog_sec_name = sec_name, \
+ .prog_name = pg_name, \
.raw_tp_name = tp_name, \
- .trigger = trigger_module_test_read, \
+ .trigger = __trigger_module_test_read, \
.needs_testmod = true, \
}
@@ -42,8 +44,10 @@ static int duration = 0;
#define FLAVORS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_flavors.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_flavors.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_flavors" \
#define FLAVORS_CASE(name) { \
FLAVORS_CASE_COMMON(name), \
@@ -65,8 +69,10 @@ static int duration = 0;
#define NESTING_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_nesting.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_nesting.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_nesting" \
#define NESTING_CASE(name) { \
NESTING_CASE_COMMON(name), \
@@ -79,10 +85,11 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
- .a = { [2] = 1 }, \
+ .a = { [2] = 1, [3] = 11 }, \
.b = { [1] = { [2] = { [3] = 2 } } }, \
.c = { [1] = { .c = 3 } }, \
.d = { [0] = { [0] = { .d = 4 } } }, \
@@ -90,8 +97,10 @@ static int duration = 0;
#define ARRAYS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_arrays.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_arrays.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_arrays" \
#define ARRAYS_CASE(name) { \
ARRAYS_CASE_COMMON(name), \
@@ -99,6 +108,7 @@ static int duration = 0;
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
.a2 = 1, \
+ .a3 = 12, \
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
@@ -122,8 +132,10 @@ static int duration = 0;
#define PRIMITIVES_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_primitives.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_primitives.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_primitives" \
#define PRIMITIVES_CASE(name) { \
PRIMITIVES_CASE_COMMON(name), \
@@ -140,8 +152,8 @@ static int duration = 0;
#define MODS_CASE(name) { \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_mods.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .bpf_obj_file = "test_core_reloc_mods.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) { \
.a = 1, \
.b = 2, \
@@ -158,12 +170,14 @@ static int duration = 0;
.e = 5, .f = 6, .g = 7, .h = 8, \
}, \
.output_len = sizeof(struct core_reloc_mods_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_mods", \
}
#define PTR_AS_ARR_CASE(name) { \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_ptr_as_arr.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .bpf_obj_file = "test_core_reloc_ptr_as_arr.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
.input = (const char *)&(struct core_reloc_##name []){ \
{ .a = 1 }, \
{ .a = 2 }, \
@@ -174,6 +188,8 @@ static int duration = 0;
.a = 3, \
}, \
.output_len = sizeof(struct core_reloc_ptr_as_arr), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ptr_as_arr", \
}
#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -189,8 +205,10 @@ static int duration = 0;
#define INTS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_ints.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_ints.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ints"
#define INTS_CASE(name) { \
INTS_CASE_COMMON(name), \
@@ -207,65 +225,78 @@ static int duration = 0;
#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
-
-#define FIELD_EXISTS_ERR_CASE(name) { \
- FIELD_EXISTS_CASE_COMMON(name), \
- .fails = true, \
-}
+ .bpf_obj_file = "test_core_reloc_existence.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_existence"
#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \
.case_name = test_name_prefix#name, \
.bpf_obj_file = objfile, \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o"
#define BITFIELDS_CASE(name, ...) { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
- "direct:", name), \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o", \
+ "probed:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
- "probed:", name), \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o", \
+ "direct:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
- .prog_sec_name = "tp_btf/sys_enter", \
+ .prog_name = "test_core_bitfields_direct", \
}
#define BITFIELDS_ERR_CASE(name) { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o", \
"probed:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o", \
"direct:", name), \
- .prog_sec_name = "tp_btf/sys_enter", \
.fails = true, \
+ .run_btfgen_fails = true, \
+ .prog_name = "test_core_bitfields_direct", \
}
#define SIZE_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_size.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
+ .bpf_obj_file = "test_core_reloc_size.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_size"
#define SIZE_OUTPUT_DATA(type) \
STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \
.int_sz = sizeof(((type *)0)->int_field), \
+ .int_off = offsetof(type, int_field), \
.struct_sz = sizeof(((type *)0)->struct_field), \
+ .struct_off = offsetof(type, struct_field), \
.union_sz = sizeof(((type *)0)->union_field), \
+ .union_off = offsetof(type, union_field), \
.arr_sz = sizeof(((type *)0)->arr_field), \
- .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
+ .arr_off = offsetof(type, arr_field), \
+ .arr_elem_sz = sizeof(((type *)0)->arr_field[1]), \
+ .arr_elem_off = offsetof(type, arr_field[1]), \
.ptr_sz = 8, /* always 8-byte pointer for BPF */ \
+ .ptr_off = offsetof(type, ptr_field), \
.enum_sz = sizeof(((type *)0)->enum_field), \
+ .enum_off = offsetof(type, enum_field), \
+ .float_sz = sizeof(((type *)0)->float_field), \
+ .float_off = offsetof(type, float_field), \
}
#define SIZE_CASE(name) { \
@@ -278,12 +309,15 @@ static int duration = 0;
#define SIZE_ERR_CASE(name) { \
SIZE_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define TYPE_BASED_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_type_based.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_type_based.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_based"
#define TYPE_BASED_CASE(name, ...) { \
TYPE_BASED_CASE_COMMON(name), \
@@ -299,8 +333,10 @@ static int duration = 0;
#define TYPE_ID_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_type_id.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_type_id.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_id"
#define TYPE_ID_CASE(name, setup_fn) { \
TYPE_ID_CASE_COMMON(name), \
@@ -316,8 +352,10 @@ static int duration = 0;
#define ENUMVAL_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_enumval.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_enumval.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enumval"
#define ENUMVAL_CASE(name, ...) { \
ENUMVAL_CASE_COMMON(name), \
@@ -331,6 +369,25 @@ static int duration = 0;
.fails = true, \
}
+#define ENUM64VAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enum64val.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enum64val"
+
+#define ENUM64VAL_CASE(name, ...) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enum64val_output), \
+}
+
+#define ENUM64VAL_ERR_CASE(name) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
struct core_reloc_test_case;
typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
@@ -345,9 +402,10 @@ struct core_reloc_test_case {
const char *output;
int output_len;
bool fails;
+ bool run_btfgen_fails;
bool needs_testmod;
bool relaxed_core_relocs;
- const char *prog_sec_name;
+ const char *prog_name;
const char *raw_tp_name;
setup_test_fn setup;
trigger_test_fn trigger;
@@ -373,8 +431,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
const char *name;
int i;
- if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
- CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
btf__free(local_btf);
btf__free(targ_btf);
return -EINVAL;
@@ -387,7 +444,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
exp->local_anon_void_ptr = -1;
exp->local_anon_arr = -1;
- for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+ for (i = 1; i < btf__type_cnt(local_btf); i++)
{
t = btf__type_by_id(local_btf, i);
/* we are interested only in anonymous types */
@@ -439,7 +496,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
static int setup_type_id_case_success(struct core_reloc_test_case *test) {
struct core_reloc_type_id_output *exp = (void *)test->output;
- struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ struct btf *targ_btf;
int err;
err = setup_type_id_case_local(test);
@@ -480,28 +537,19 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test)
return 0;
}
-static int trigger_module_test_read(const struct core_reloc_test_case *test)
+static int __trigger_module_test_read(const struct core_reloc_test_case *test)
{
struct core_reloc_module_output *exp = (void *)test->output;
- int fd, err;
-
- fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
- err = -errno;
- if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
- return err;
-
- read(fd, NULL, exp->len); /* request expected number of bytes */
- close(fd);
+ trigger_module_test_read(exp->len);
return 0;
}
-
-static struct core_reloc_test_case test_cases[] = {
+static const struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
.case_name = "kernel",
- .bpf_obj_file = "test_core_reloc_kernel.o",
+ .bpf_obj_file = "test_core_reloc_kernel.bpf.o",
.btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
.input = "",
.input_len = 0,
@@ -509,13 +557,16 @@ static struct core_reloc_test_case test_cases[] = {
.valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
.comm = "test_progs",
.comm_len = sizeof("test_progs"),
+ .local_task_struct_matches = true,
},
.output_len = sizeof(struct core_reloc_kernel_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_kernel",
},
/* validate we can find kernel module BTF types for relocs/attach */
- MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
- MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL),
+ MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"),
+ MODULES_CASE("module_direct", "test_core_module_direct", NULL),
/* validate BPF program can use multiple flavors to match against
* single target BTF type
@@ -550,9 +601,9 @@ static struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_too_small),
ARRAYS_ERR_CASE(arrays___err_too_shallow),
ARRAYS_ERR_CASE(arrays___err_non_array),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+ ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),
@@ -581,8 +632,8 @@ static struct core_reloc_test_case test_cases[] = {
/* validate edge cases of capturing relocations */
{
.case_name = "misc",
- .bpf_obj_file = "test_core_reloc_misc.o",
- .btf_src_file = "btf__core_reloc_misc.o",
+ .bpf_obj_file = "test_core_reloc_misc.bpf.o",
+ .btf_src_file = "btf__core_reloc_misc.bpf.o",
.input = (const char *)&(struct core_reloc_misc_extensible[]){
{ .a = 1 },
{ .a = 2 }, /* not read */
@@ -595,6 +646,8 @@ static struct core_reloc_test_case test_cases[] = {
.c = 0, /* BUG in clang, should be 3 */
},
.output_len = sizeof(struct core_reloc_misc_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_misc",
},
/* validate field existence checks */
@@ -642,13 +695,25 @@ static struct core_reloc_test_case test_cases[] = {
},
.output_len = sizeof(struct core_reloc_existence_output),
},
-
- FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
- FIELD_EXISTS_ERR_CASE(existence__err_int_type),
- FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
- FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
- FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
- FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
+ {
+ FIELD_EXISTS_CASE_COMMON(existence___wrong_field_defs),
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___wrong_field_defs) {
+ },
+ .input_len = sizeof(struct core_reloc_existence___wrong_field_defs),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+ .a_exists = 0,
+ .b_exists = 0,
+ .c_exists = 0,
+ .arr_exists = 0,
+ .s_exists = 0,
+ .a_value = 0xff000001u,
+ .b_value = 0xff000002u,
+ .c_value = 0xff000003u,
+ .arr_value = 0xff000004u,
+ .s_value = 0xff000005u,
+ },
+ .output_len = sizeof(struct core_reloc_existence_output),
+ },
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -684,14 +749,16 @@ static struct core_reloc_test_case test_cases[] = {
}),
BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
- /* size relocation checks */
+ /* field size and offset relocation checks */
SIZE_CASE(size),
SIZE_CASE(size___diff_sz),
+ SIZE_CASE(size___diff_offs),
SIZE_ERR_CASE(size___err_ambiguous),
- /* validate type existence and size relocations */
+ /* validate type existence, match, and size relocations */
TYPE_BASED_CASE(type_based, {
.struct_exists = 1,
+ .complex_struct_exists = 1,
.union_exists = 1,
.enum_exists = 1,
.typedef_named_struct_exists = 1,
@@ -700,8 +767,24 @@ static struct core_reloc_test_case test_cases[] = {
.typedef_int_exists = 1,
.typedef_enum_exists = 1,
.typedef_void_ptr_exists = 1,
+ .typedef_restrict_ptr_exists = 1,
.typedef_func_proto_exists = 1,
.typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 1,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_restrict_ptr_matches = 1,
+ .typedef_func_proto_matches = 1,
+ .typedef_arr_matches = 1,
+
.struct_sz = sizeof(struct a_struct),
.union_sz = sizeof(union a_union),
.enum_sz = sizeof(enum an_enum),
@@ -717,6 +800,45 @@ static struct core_reloc_test_case test_cases[] = {
TYPE_BASED_CASE(type_based___all_missing, {
/* all zeros */
}),
+ TYPE_BASED_CASE(type_based___diff, {
+ .struct_exists = 1,
+ .complex_struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
+ .struct_sz = sizeof(struct a_struct___diff),
+ .union_sz = sizeof(union a_union___diff),
+ .enum_sz = sizeof(enum an_enum___diff),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff),
+ .typedef_int_sz = sizeof(int_typedef___diff),
+ .typedef_enum_sz = sizeof(enum_typedef___diff),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff),
+ .typedef_arr_sz = sizeof(arr_typedef___diff),
+ }),
TYPE_BASED_CASE(type_based___diff_sz, {
.struct_exists = 1,
.union_exists = 1,
@@ -729,6 +851,19 @@ static struct core_reloc_test_case test_cases[] = {
.typedef_void_ptr_exists = 1,
.typedef_func_proto_exists = 1,
.typedef_arr_exists = 1,
+
+ .struct_matches = 0,
+ .union_matches = 0,
+ .enum_matches = 0,
+ .typedef_named_struct_matches = 0,
+ .typedef_anon_struct_matches = 0,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 0,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
.struct_sz = sizeof(struct a_struct___diff_sz),
.union_sz = sizeof(union a_union___diff_sz),
.enum_sz = sizeof(enum an_enum___diff_sz),
@@ -743,10 +878,12 @@ static struct core_reloc_test_case test_cases[] = {
}),
TYPE_BASED_CASE(type_based___incompat, {
.enum_exists = 1,
+ .enum_matches = 1,
.enum_sz = sizeof(enum an_enum),
}),
TYPE_BASED_CASE(type_based___fn_wrong_args, {
.struct_exists = 1,
+ .struct_matches = 1,
.struct_sz = sizeof(struct a_struct),
}),
@@ -792,6 +929,45 @@ static struct core_reloc_test_case test_cases[] = {
.anon_val2 = 0x222,
}),
ENUMVAL_ERR_CASE(enumval___err_missing),
+
+ /* 64bit enumerator value existence and value relocations */
+ ENUM64VAL_CASE(enum64val, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x1ffffffffULL,
+ .unsigned_val2 = 0x2,
+ .signed_val1 = 0x1ffffffffLL,
+ .signed_val2 = -2,
+ }),
+ ENUM64VAL_CASE(enum64val___diff, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x101ffffffffULL,
+ .unsigned_val2 = 0x202ffffffffULL,
+ .signed_val1 = -101,
+ .signed_val2 = -202,
+ }),
+ ENUM64VAL_CASE(enum64val___val3_missing, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = false,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = false,
+ .unsigned_val1 = 0x111ffffffffULL,
+ .unsigned_val2 = 0x222,
+ .signed_val1 = 0x111ffffffffLL,
+ .signed_val2 = -222,
+ }),
+ ENUM64VAL_ERR_CASE(enum64val___err_missing),
};
struct data {
@@ -807,13 +983,27 @@ static size_t roundup_page(size_t sz)
return (sz + page_size - 1) / page_size * page_size;
}
-void test_core_reloc(void)
+static int run_btfgen(const char *src_btf, const char *dst_btf, const char *objpath)
+{
+ char command[4096];
+ int n;
+
+ n = snprintf(command, sizeof(command),
+ "./bpftool gen min_core_btf %s %s %s",
+ src_btf, dst_btf, objpath);
+ if (n < 0 || n >= sizeof(command))
+ return -1;
+
+ return system(command);
+}
+
+static void run_core_reloc_tests(bool use_btfgen)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
- struct core_reloc_test_case *test_case;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+ struct core_reloc_test_case *test_case, test_case_copy;
const char *tp_name, *probe_name;
- int err, i, equal;
+ int err, i, equal, fd;
struct bpf_link *link = NULL;
struct bpf_map *data_map;
struct bpf_program *prog;
@@ -822,10 +1012,14 @@ void test_core_reloc(void)
struct data *data;
void *mmap_data = NULL;
- my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32);
+ my_pid_tgid = getpid() | ((uint64_t)sys_gettid() << 32);
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
- test_case = &test_cases[i];
+ char btf_file[] = "/tmp/core_reloc.btf.XXXXXX";
+
+ test_case_copy = test_cases[i];
+ test_case = &test_case_copy;
+
if (!test__start_subtest(test_case->case_name))
continue;
@@ -834,40 +1028,58 @@ void test_core_reloc(void)
continue;
}
+ /* generate a "minimal" BTF file and use it as source */
+ if (use_btfgen) {
+
+ if (!test_case->btf_src_file || test_case->run_btfgen_fails) {
+ test__skip();
+ continue;
+ }
+
+ fd = mkstemp(btf_file);
+ if (!ASSERT_GE(fd, 0, "btf_tmp"))
+ continue;
+ close(fd); /* we only need the path */
+ err = run_btfgen(test_case->btf_src_file, btf_file,
+ test_case->bpf_obj_file);
+ if (!ASSERT_OK(err, "run_btfgen"))
+ continue;
+
+ test_case->btf_src_file = btf_file;
+ }
+
if (test_case->setup) {
err = test_case->setup(test_case);
if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
continue;
}
- obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- test_case->bpf_obj_file, PTR_ERR(obj)))
- continue;
-
- probe_name = "raw_tracepoint/sys_enter";
- tp_name = "sys_enter";
- if (test_case->prog_sec_name) {
- probe_name = test_case->prog_sec_name;
- tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ continue;
}
- prog = bpf_object__find_program_by_title(obj, probe_name);
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
+ goto cleanup;
+
+ probe_name = test_case->prog_name;
+ tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
+ prog = bpf_object__find_program_by_name(obj, probe_name);
if (CHECK(!prog, "find_probe",
"prog '%s' not found\n", probe_name))
goto cleanup;
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
if (!test_case->fails)
- CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
+ ASSERT_OK(err, "obj_load");
goto cleanup;
}
- data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto cleanup;
@@ -881,12 +1093,12 @@ void test_core_reloc(void)
data = mmap_data;
memset(mmap_data, 0, sizeof(*data));
- memcpy(data->in, test_case->input, test_case->input_len);
+ if (test_case->input_len)
+ memcpy(data->in, test_case->input, test_case->input_len);
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
/* trigger test run */
@@ -902,10 +1114,8 @@ void test_core_reloc(void)
goto cleanup;
}
- if (test_case->fails) {
- CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+ if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail"))
goto cleanup;
- }
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
@@ -929,10 +1139,20 @@ cleanup:
CHECK_FAIL(munmap(mmap_data, mmap_sz));
mmap_data = NULL;
}
- if (!IS_ERR_OR_NULL(link)) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ if (use_btfgen)
+ remove(test_case->btf_src_file);
+ bpf_link__destroy(link);
+ link = NULL;
bpf_object__close(obj);
}
}
+
+void test_core_reloc(void)
+{
+ run_core_reloc_tests(false);
+}
+
+void test_core_reloc_btfgen(void)
+{
+ run_core_reloc_tests(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc_raw.c b/tools/testing/selftests/bpf/prog_tests/core_reloc_raw.c
new file mode 100644
index 000000000000..a18d3680fb16
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc_raw.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test cases that can't load programs using libbpf and need direct
+ * BPF syscall access
+ */
+
+#include <sys/syscall.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "test_progs.h"
+#include "test_btf.h"
+#include "bpf/libbpf_internal.h"
+
+static char log[16 * 1024];
+
+/* Check that verifier rejects BPF program containing relocation
+ * pointing to non-existent BTF type.
+ */
+static void test_bad_local_id(void)
+{
+ struct test_btf {
+ struct btf_header hdr;
+ __u32 types[15];
+ char strings[128];
+ } raw_btf = {
+ .hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_off = 0,
+ .type_len = sizeof(raw_btf.types),
+ .str_off = offsetof(struct test_btf, strings) -
+ offsetof(struct test_btf, types),
+ .str_len = sizeof(raw_btf.strings),
+ },
+ .types = {
+ BTF_PTR_ENC(0), /* [1] void* */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [2] int */
+ BTF_FUNC_PROTO_ENC(2, 1), /* [3] int (*)(void*) */
+ BTF_FUNC_PROTO_ARG_ENC(8, 1),
+ BTF_FUNC_ENC(8, 3) /* [4] FUNC 'foo' type_id=2 */
+ },
+ .strings = "\0int\0 0\0foo\0"
+ };
+ __u32 log_level = 1 | 2 | 4;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .log_buf = log,
+ .log_size = sizeof(log),
+ .log_level = log_level,
+ );
+ struct bpf_insn insns[] = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ struct bpf_func_info funcs[] = {
+ {
+ .insn_off = 0,
+ .type_id = 4,
+ }
+ };
+ struct bpf_core_relo relos[] = {
+ {
+ .insn_off = 0, /* patch first instruction (r0 = 0) */
+ .type_id = 100500, /* !!! this type id does not exist */
+ .access_str_off = 6, /* offset of "0" */
+ .kind = BPF_CORE_TYPE_ID_LOCAL,
+ }
+ };
+ union bpf_attr attr;
+ int saved_errno;
+ int prog_fd = -1;
+ int btf_fd = -1;
+
+ btf_fd = bpf_btf_load(&raw_btf, sizeof(raw_btf), &opts);
+ saved_errno = errno;
+ if (btf_fd < 0 || env.verbosity > VERBOSE_NORMAL) {
+ printf("-------- BTF load log start --------\n");
+ printf("%s", log);
+ printf("-------- BTF load log end ----------\n");
+ }
+ if (btf_fd < 0) {
+ PRINT_FAIL("bpf_btf_load() failed, errno=%d\n", saved_errno);
+ return;
+ }
+
+ log[0] = 0;
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_btf_fd = btf_fd;
+ attr.prog_type = BPF_TRACE_RAW_TP;
+ attr.license = (__u64)"GPL";
+ attr.insns = (__u64)&insns;
+ attr.insn_cnt = sizeof(insns) / sizeof(*insns);
+ attr.log_buf = (__u64)log;
+ attr.log_size = sizeof(log);
+ attr.log_level = log_level;
+ attr.func_info = (__u64)funcs;
+ attr.func_info_cnt = sizeof(funcs) / sizeof(*funcs);
+ attr.func_info_rec_size = sizeof(*funcs);
+ attr.core_relos = (__u64)relos;
+ attr.core_relo_cnt = sizeof(relos) / sizeof(*relos);
+ attr.core_relo_rec_size = sizeof(*relos);
+ prog_fd = sys_bpf_prog_load(&attr, sizeof(attr), 1);
+ saved_errno = errno;
+ if (prog_fd < 0 || env.verbosity > VERBOSE_NORMAL) {
+ printf("-------- program load log start --------\n");
+ printf("%s", log);
+ printf("-------- program load log end ----------\n");
+ }
+ if (prog_fd >= 0) {
+ PRINT_FAIL("sys_bpf_prog_load() expected to fail\n");
+ goto out;
+ }
+ ASSERT_HAS_SUBSTR(log, "relo #0: bad type id 100500", "program load log");
+
+out:
+ close(prog_fd);
+ close(btf_fd);
+}
+
+void test_core_reloc_raw(void)
+{
+ if (test__start_subtest("bad_local_id"))
+ test_bad_local_id();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
index 6acb0e94d4d7..4a2c256c8db6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_retro.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -6,31 +6,32 @@
void test_core_retro(void)
{
- int err, zero = 0, res, duration = 0, my_pid = getpid();
+ int err, zero = 0, res, my_pid = getpid();
struct test_core_retro *skel;
/* load program */
skel = test_core_retro__open_and_load();
- if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto out_close;
- err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
- if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+ err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero),
+ &my_pid, sizeof(my_pid), 0);
+ if (!ASSERT_OK(err, "map_update"))
goto out_close;
/* attach probe */
err = test_core_retro__attach(skel);
- if (CHECK(err, "attach_kprobe", "err %d\n", err))
+ if (!ASSERT_OK(err, "attach_kprobe"))
goto out_close;
/* trigger */
usleep(1);
- err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
- if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+ err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0);
+ if (!ASSERT_OK(err, "map_lookup"))
goto out_close;
- CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
+ ASSERT_EQ(res, my_pid, "pid_check");
out_close:
test_core_retro__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
new file mode 100644
index 000000000000..6c45330a5ca3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "cpumask_failure.skel.h"
+#include "cpumask_success.skel.h"
+
+static const char * const cpumask_success_testcases[] = {
+ "test_alloc_free_cpumask",
+ "test_set_clear_cpu",
+ "test_setall_clear_cpu",
+ "test_first_firstzero_cpu",
+ "test_firstand_nocpu",
+ "test_test_and_set_clear",
+ "test_and_or_xor",
+ "test_intersects_subset",
+ "test_copy_any_anyand",
+ "test_insert_leave",
+ "test_insert_remove_release",
+ "test_global_mask_rcu",
+ "test_global_mask_array_one_rcu",
+ "test_global_mask_array_rcu",
+ "test_global_mask_array_l2_rcu",
+ "test_global_mask_nested_rcu",
+ "test_global_mask_nested_deep_rcu",
+ "test_global_mask_nested_deep_array_rcu",
+ "test_cpumask_weight",
+ "test_refcount_null_tracking",
+ "test_populate_reject_small_mask",
+ "test_populate_reject_unaligned",
+ "test_populate",
+};
+
+static void verify_success(const char *prog_name)
+{
+ struct cpumask_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+ pid_t child_pid;
+ int status, err;
+
+ skel = cpumask_success__open();
+ if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = cpumask_success__load(skel);
+ if (!ASSERT_OK(err, "cpumask_success__load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ cpumask_success__destroy(skel);
+}
+
+void test_cpumask(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cpumask_success_testcases); i++) {
+ if (!test__start_subtest(cpumask_success_testcases[i]))
+ continue;
+
+ verify_success(cpumask_success_testcases[i]);
+ }
+
+ RUN_TESTS(cpumask_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
new file mode 100644
index 000000000000..42bd07f7218d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/if_alg.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "crypto_sanity.skel.h"
+#include "crypto_basic.skel.h"
+
+#define NS_TEST "crypto_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+static const unsigned char crypto_key[] = "testtest12345678";
+static const char plain_text[] = "stringtoencrypt0";
+static int opfd = -1, tfmfd = -1;
+static const char algo[] = "ecb(aes)";
+static int init_afalg(void)
+{
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher",
+ .salg_name = "ecb(aes)"
+ };
+
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1)
+ return errno;
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) == -1)
+ return errno;
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, crypto_key, 16) == -1)
+ return errno;
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1)
+ return errno;
+ return 0;
+}
+
+static void deinit_afalg(void)
+{
+ if (tfmfd != -1)
+ close(tfmfd);
+ if (opfd != -1)
+ close(opfd);
+}
+
+static void do_crypt_afalg(const void *src, void *dst, int size, bool encrypt)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(4)] = {0};
+ struct iovec iov;
+
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_ALG;
+ cmsg->cmsg_type = ALG_SET_OP;
+ cmsg->cmsg_len = CMSG_LEN(4);
+ *(__u32 *)CMSG_DATA(cmsg) = encrypt ? ALG_OP_ENCRYPT : ALG_OP_DECRYPT;
+
+ iov.iov_base = (char *)src;
+ iov.iov_len = size;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ sendmsg(opfd, &msg, 0);
+ read(opfd, dst, size);
+}
+
+void test_crypto_basic(void)
+{
+ RUN_TESTS(crypto_basic);
+}
+
+void test_crypto_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_enc);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_dec);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct nstoken *nstoken = NULL;
+ struct crypto_sanity *skel;
+ char afalg_plain[16] = {0};
+ char afalg_dst[16] = {0};
+ struct sockaddr_in6 addr;
+ int sockfd, err, pfd;
+ socklen_t addrlen;
+ u16 udp_test_port;
+
+ skel = crypto_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ err = init_afalg();
+ if (!ASSERT_OK(err, "AF_ALG init fail"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ skel->bss->key_len = 16;
+ skel->bss->authsize = 0;
+ udp_test_port = skel->data->udp_test_port;
+ memcpy(skel->bss->key, crypto_key, sizeof(crypto_key));
+ snprintf(skel->bss->algo, 128, "%s", algo);
+ pfd = bpf_program__fd(skel->progs.skb_crypto_setup);
+ if (!ASSERT_GT(pfd, 0, "skb_crypto_setup fd"))
+ goto fail;
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (!ASSERT_OK(err, "skb_crypto_setup") ||
+ !ASSERT_OK(opts.retval, "skb_crypto_setup retval"))
+ goto fail;
+
+ if (!ASSERT_OK(skel->bss->status, "skb_crypto_setup status"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, udp_test_port,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+
+ tc_attach_enc.prog_fd = bpf_program__fd(skel->progs.encrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "attach encrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "encrypt socket"))
+ goto fail;
+ err = sendto(sockfd, plain_text, sizeof(plain_text), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(plain_text), "encrypt send"))
+ goto fail;
+
+ do_crypt_afalg(plain_text, afalg_dst, sizeof(afalg_dst), true);
+
+ if (!ASSERT_OK(skel->bss->status, "encrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_dst, sizeof(afalg_dst), "encrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_enc.flags = tc_attach_enc.prog_fd = tc_attach_enc.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "bpf_tc_detach encrypt"))
+ goto fail;
+
+ tc_attach_dec.prog_fd = bpf_program__fd(skel->progs.decrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_dec);
+ if (!ASSERT_OK(err, "attach decrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "decrypt socket"))
+ goto fail;
+ err = sendto(sockfd, afalg_dst, sizeof(afalg_dst), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(afalg_dst), "decrypt send"))
+ goto fail;
+
+ do_crypt_afalg(afalg_dst, afalg_plain, sizeof(afalg_plain), false);
+
+ if (!ASSERT_OK(skel->bss->status, "decrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_plain, sizeof(afalg_plain), "decrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_dec.flags = tc_attach_dec.prog_fd = tc_attach_dec.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_dec);
+ ASSERT_OK(err, "bpf_tc_detach decrypt");
+
+fail:
+ close_netns(nstoken);
+ deinit_afalg();
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ crypto_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
new file mode 100644
index 000000000000..dd75ccb03770
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -0,0 +1,821 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <regex.h>
+#include <test_progs.h>
+
+#include "bpf/btf.h"
+#include "bpf_util.h"
+#include "linux/filter.h"
+#include "linux/kernel.h"
+#include "disasm_helpers.h"
+
+#define MAX_PROG_TEXT_SZ (32 * 1024)
+
+/* The code in this file serves the sole purpose of executing test cases
+ * specified in the test_cases array. Each test case specifies a program
+ * type, context field offset, and disassembly patterns that correspond
+ * to read and write instructions generated by
+ * verifier.c:convert_ctx_access() for accessing that field.
+ *
+ * For each test case, up to three programs are created:
+ * - One that uses BPF_LDX_MEM to read the context field.
+ * - One that uses BPF_STX_MEM to write to the context field.
+ * - One that uses BPF_ST_MEM to write to the context field.
+ *
+ * The disassembly of each program is then compared with the pattern
+ * specified in the test case.
+ */
+struct test_case {
+ char *name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int field_offset;
+ int field_sz;
+ /* Program generated for BPF_ST_MEM uses value 42 by default,
+ * this field allows to specify custom value.
+ */
+ struct {
+ bool use;
+ int value;
+ } st_value;
+ /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */
+ char *read;
+ /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and
+ * BPF_ST_MEM (field_sz, ctx, src, field_offset)
+ */
+ char *write;
+ /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_st;
+ /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_stx;
+};
+
+#define N(_prog_type, type, field, name_extra...) \
+ .name = #_prog_type "." #field name_extra, \
+ .prog_type = BPF_PROG_TYPE_##_prog_type, \
+ .field_offset = offsetof(type, field), \
+ .field_sz = sizeof(typeof(((type *)NULL)->field))
+
+static struct test_case test_cases[] = {
+/* Sign extension on s390 changes the pattern */
+#if defined(__x86_64__) || defined(__aarch64__)
+ {
+ N(SCHED_CLS, struct __sk_buff, tstamp),
+ .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "if w11 & 0x4 goto pc+1;"
+ "goto pc+4;"
+ "if w11 & 0x3 goto pc+1;"
+ "goto pc+2;"
+ "$dst = 0;"
+ "goto pc+1;"
+ "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
+ .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "if w11 & 0x4 goto pc+1;"
+ "goto pc+2;"
+ "w11 &= -4;"
+ "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
+ "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
+ },
+#endif
+ {
+ N(SCHED_CLS, struct __sk_buff, priority),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::priority);",
+ .write = "*(u32 *)($ctx + sk_buff::priority) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, mark),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::mark);",
+ .write = "*(u32 *)($ctx + sk_buff::mark) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, cb[0]),
+ .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));",
+ .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_classid),
+ .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));",
+ .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_index),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);",
+ .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, queue_mapping),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);",
+ .write_stx = "if $src >= 0xffff goto pc+1;"
+ "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ },
+ {
+ /* This is a corner case in filter.c:bpf_convert_ctx_access() */
+ N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"),
+ .st_value = { true, USHRT_MAX },
+ .write_st = "goto pc+0;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, bound_dev_if),
+ .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);",
+ .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, mark),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_mark);",
+ .write = "*(u32 *)($ctx + sock::sk_mark) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, priority),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_priority);",
+ .write = "*(u32 *)($ctx + sock::sk_priority) = $src;",
+ },
+ {
+ N(SOCK_OPS, struct bpf_sock_ops, replylong[0]),
+ .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);",
+ .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;",
+ },
+ {
+ N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +0);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +0) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#else
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +4);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +4) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#endif
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, sk),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, level),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optname),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, retval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "$dst = *(u64 *)($dst + task_struct::bpf_ctx);"
+ "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);",
+ .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);"
+ "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+};
+
+#undef N
+
+static regex_t *ident_regex;
+static regex_t *field_regex;
+
+static char *skip_space(char *str)
+{
+ while (*str && isspace(*str))
+ ++str;
+ return str;
+}
+
+static char *skip_space_and_semi(char *str)
+{
+ while (*str && (isspace(*str) || *str == ';'))
+ ++str;
+ return str;
+}
+
+static char *match_str(char *str, char *prefix)
+{
+ while (*str && *prefix && *str == *prefix) {
+ ++str;
+ ++prefix;
+ }
+ if (*prefix)
+ return NULL;
+ return str;
+}
+
+static char *match_number(char *str, int num)
+{
+ char *next;
+ int snum = strtol(str, &next, 10);
+
+ if (next - str == 0 || num != snum)
+ return NULL;
+
+ return next;
+}
+
+static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off)
+{
+ const struct btf_type *type = btf__type_by_id(btf, btf_id);
+ const struct btf_member *m;
+ __u16 mnum;
+ int i;
+
+ if (!type) {
+ PRINT_FAIL("Can't find btf_type for id %d\n", btf_id);
+ return -1;
+ }
+
+ if (!btf_is_struct(type) && !btf_is_union(type)) {
+ PRINT_FAIL("BTF id %d is not struct or union\n", btf_id);
+ return -1;
+ }
+
+ m = btf_members(type);
+ mnum = btf_vlen(type);
+
+ for (i = 0; i < mnum; ++i, ++m) {
+ const char *mname = btf__name_by_offset(btf, m->name_off);
+
+ if (strcmp(mname, "") == 0) {
+ int msize = find_field_offset_aux(btf, m->type, field_name,
+ off + m->offset);
+ if (msize >= 0)
+ return msize;
+ }
+
+ if (strcmp(mname, field_name))
+ continue;
+
+ return (off + m->offset) / 8;
+ }
+
+ return -1;
+}
+
+static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches)
+{
+ int type_sz = matches[1].rm_eo - matches[1].rm_so;
+ int field_sz = matches[2].rm_eo - matches[2].rm_so;
+ char *type = pattern + matches[1].rm_so;
+ char *field = pattern + matches[2].rm_so;
+ char field_str[128] = {};
+ char type_str[128] = {};
+ int btf_id, field_offset;
+
+ if (type_sz >= sizeof(type_str)) {
+ PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz);
+ return -1;
+ }
+
+ if (field_sz >= sizeof(field_str)) {
+ PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz);
+ return -1;
+ }
+
+ strncpy(type_str, type, type_sz);
+ strncpy(field_str, field, field_sz);
+ btf_id = btf__find_by_name(btf, type_str);
+ if (btf_id < 0) {
+ PRINT_FAIL("No BTF info for type %s\n", type_str);
+ return -1;
+ }
+
+ field_offset = find_field_offset_aux(btf, btf_id, field_str, 0);
+ if (field_offset < 0) {
+ PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str);
+ return -1;
+ }
+
+ return field_offset;
+}
+
+static regex_t *compile_regex(char *pat)
+{
+ regex_t *re;
+ int err;
+
+ re = malloc(sizeof(regex_t));
+ if (!re) {
+ PRINT_FAIL("Can't alloc regex\n");
+ return NULL;
+ }
+
+ err = regcomp(re, pat, REG_EXTENDED);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ free(re);
+ return NULL;
+ }
+
+ return re;
+}
+
+static void free_regex(regex_t *re)
+{
+ if (!re)
+ return;
+
+ regfree(re);
+ free(re);
+}
+
+static u32 max_line_len(char *str)
+{
+ u32 max_line = 0;
+ char *next = str;
+
+ while (next) {
+ next = strchr(str, '\n');
+ if (next) {
+ max_line = max_t(u32, max_line, (next - str));
+ str = next + 1;
+ } else {
+ max_line = max_t(u32, max_line, strlen(str));
+ }
+ }
+
+ return min(max_line, 60u);
+}
+
+/* Print strings `pattern_origin` and `text_origin` side by side,
+ * assume `pattern_pos` and `text_pos` designate location within
+ * corresponding origin string where match diverges.
+ * The output should look like:
+ *
+ * Can't match disassembly(left) with pattern(right):
+ * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1)
+ * ^ ^
+ * r0 = 0 ;
+ * exit ;
+ */
+static void print_match_error(FILE *out,
+ char *pattern_origin, char *text_origin,
+ char *pattern_pos, char *text_pos)
+{
+ char *pattern = pattern_origin;
+ char *text = text_origin;
+ int middle = max_line_len(text) + 2;
+
+ fprintf(out, "Can't match disassembly(left) with pattern(right):\n");
+ while (*pattern || *text) {
+ int column = 0;
+ int mark1 = -1;
+ int mark2 = -1;
+
+ /* Print one line from text */
+ while (*text && *text != '\n') {
+ if (text == text_pos)
+ mark1 = column;
+ fputc(*text, out);
+ ++text;
+ ++column;
+ }
+ if (text == text_pos)
+ mark1 = column;
+
+ /* Pad to the middle */
+ while (column < middle) {
+ fputc(' ', out);
+ ++column;
+ }
+ fputs("; ", out);
+ column += 3;
+
+ /* Print one line from pattern, pattern lines are terminated by ';' */
+ while (*pattern && *pattern != ';') {
+ if (pattern == pattern_pos)
+ mark2 = column;
+ fputc(*pattern, out);
+ ++pattern;
+ ++column;
+ }
+ if (pattern == pattern_pos)
+ mark2 = column;
+
+ fputc('\n', out);
+ if (*pattern)
+ ++pattern;
+ if (*text)
+ ++text;
+
+ /* If pattern and text diverge at this line, print an
+ * additional line with '^' marks, highlighting
+ * positions where match fails.
+ */
+ if (mark1 > 0 || mark2 > 0) {
+ for (column = 0; column <= max(mark1, mark2); ++column) {
+ if (column == mark1 || column == mark2)
+ fputc('^', out);
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+ }
+}
+
+/* Test if `text` matches `pattern`. Pattern consists of the following elements:
+ *
+ * - Field offset references:
+ *
+ * <type>::<field>
+ *
+ * When such reference is encountered BTF is used to compute numerical
+ * value for the offset of <field> in <type>. The `text` is expected to
+ * contain matching numerical value.
+ *
+ * - Field groups:
+ *
+ * $(<type>::<field> [+ <type>::<field>]*)
+ *
+ * Allows to specify an offset that is a sum of multiple field offsets.
+ * The `text` is expected to contain matching numerical value.
+ *
+ * - Variable references, e.g. `$src`, `$dst`, `$ctx`.
+ * These are substitutions specified in `reg_map` array.
+ * If a substring of pattern is equal to `reg_map[i][0]` the `text` is
+ * expected to contain `reg_map[i][1]` in the matching position.
+ *
+ * - Whitespace is ignored, ';' counts as whitespace for `pattern`.
+ *
+ * - Any other characters, `pattern` and `text` should match one-to-one.
+ *
+ * Example of a pattern:
+ *
+ * __________ fields group ________________
+ * ' '
+ * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;
+ * ^^^^ '______________________'
+ * variable reference field offset reference
+ */
+static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2])
+{
+ char *pattern_origin = pattern;
+ char *text_origin = text;
+ regmatch_t matches[3];
+
+_continue:
+ while (*pattern) {
+ if (!*text)
+ goto err;
+
+ /* Skip whitespace */
+ if (isspace(*pattern) || *pattern == ';') {
+ if (!isspace(*text) && text != text_origin && isalnum(text[-1]))
+ goto err;
+ pattern = skip_space_and_semi(pattern);
+ text = skip_space(text);
+ continue;
+ }
+
+ /* Check for variable references */
+ for (int i = 0; reg_map[i][0]; ++i) {
+ char *pattern_next, *text_next;
+
+ pattern_next = match_str(pattern, reg_map[i][0]);
+ if (!pattern_next)
+ continue;
+
+ text_next = match_str(text, reg_map[i][1]);
+ if (!text_next)
+ goto err;
+
+ pattern = pattern_next;
+ text = text_next;
+ goto _continue;
+ }
+
+ /* Match field group:
+ * $(sk_buff::cb + qdisc_skb_cb::tc_classid)
+ */
+ if (strncmp(pattern, "$(", 2) == 0) {
+ char *group_start = pattern, *text_next;
+ int acc_offset = 0;
+
+ pattern += 2;
+
+ for (;;) {
+ int field_offset;
+
+ pattern = skip_space(pattern);
+ if (!*pattern) {
+ PRINT_FAIL("Unexpected end of pattern\n");
+ goto err;
+ }
+
+ if (*pattern == ')') {
+ ++pattern;
+ break;
+ }
+
+ if (*pattern == '+') {
+ ++pattern;
+ continue;
+ }
+
+ printf("pattern: %s\n", pattern);
+ if (regexec(field_regex, pattern, 3, matches, 0) != 0) {
+ PRINT_FAIL("Field reference expected\n");
+ goto err;
+ }
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ acc_offset += field_offset;
+ }
+
+ text_next = match_number(text, acc_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for group offset %.*s (%d)\n",
+ (int)(pattern - group_start),
+ group_start,
+ acc_offset);
+ goto err;
+ }
+ text = text_next;
+ }
+
+ /* Match field reference:
+ * sk_buff::cb
+ */
+ if (regexec(field_regex, pattern, 3, matches, 0) == 0) {
+ int field_offset;
+ char *text_next;
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ text_next = match_number(text, field_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for field offset %.*s (%d)\n",
+ (int)matches[0].rm_eo, pattern, field_offset);
+ goto err;
+ }
+
+ pattern += matches[0].rm_eo;
+ text = text_next;
+ continue;
+ }
+
+ /* If pattern points to identifier not followed by '::'
+ * skip the identifier to avoid n^2 application of the
+ * field reference rule.
+ */
+ if (regexec(ident_regex, pattern, 1, matches, 0) == 0) {
+ if (strncmp(pattern, text, matches[0].rm_eo) != 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ text += matches[0].rm_eo;
+ continue;
+ }
+
+ /* Match literally */
+ if (*pattern != *text)
+ goto err;
+
+ ++pattern;
+ ++text;
+ }
+
+ return true;
+
+err:
+ test__fail();
+ print_match_error(stdout, pattern_origin, text_origin, pattern, text);
+ return false;
+}
+
+struct prog_info {
+ char *prog_kind;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ struct bpf_insn *prog;
+ u32 prog_len;
+};
+
+static void match_program(struct btf *btf,
+ struct prog_info *pinfo,
+ char *pattern,
+ char *reg_map[][2],
+ bool skip_first_insn)
+{
+ struct bpf_insn *buf = NULL, *insn, *insn_end;
+ int err = 0, prog_fd = 0;
+ FILE *prog_out = NULL;
+ char insn_buf[64];
+ char *text = NULL;
+ __u32 cnt = 0;
+
+ text = calloc(MAX_PROG_TEXT_SZ, 1);
+ if (!text) {
+ PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ);
+ goto out;
+ }
+
+ // TODO: log level
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ opts.log_buf = text;
+ opts.log_size = MAX_PROG_TEXT_SZ;
+ opts.log_level = 1 | 2 | 4;
+ opts.expected_attach_type = pinfo->expected_attach_type;
+
+ prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL",
+ pinfo->prog, pinfo->prog_len, &opts);
+ if (prog_fd < 0) {
+ PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n",
+ errno, strerror(errno), text);
+ goto out;
+ }
+
+ memset(text, 0, MAX_PROG_TEXT_SZ);
+
+ err = get_xlated_program(prog_fd, &buf, &cnt);
+ if (err) {
+ PRINT_FAIL("Can't load back BPF program\n");
+ goto out;
+ }
+
+ prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w");
+ if (!prog_out) {
+ PRINT_FAIL("Can't open memory stream\n");
+ goto out;
+ }
+ insn_end = buf + cnt;
+ insn = buf + (skip_first_insn ? 1 : 0);
+ while (insn < insn_end) {
+ insn = disasm_insn(insn, insn_buf, sizeof(insn_buf));
+ fprintf(prog_out, "%s\n", insn_buf);
+ }
+ fclose(prog_out);
+
+ ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map),
+ pinfo->prog_kind);
+
+out:
+ if (prog_fd)
+ close(prog_fd);
+ free(buf);
+ free(text);
+}
+
+static void run_one_testcase(struct btf *btf, struct test_case *test)
+{
+ struct prog_info pinfo = {};
+ int bpf_sz;
+
+ if (!test__start_subtest(test->name))
+ return;
+
+ switch (test->field_sz) {
+ case 8:
+ bpf_sz = BPF_DW;
+ break;
+ case 4:
+ bpf_sz = BPF_W;
+ break;
+ case 2:
+ bpf_sz = BPF_H;
+ break;
+ case 1:
+ bpf_sz = BPF_B;
+ break;
+ default:
+ PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz);
+ return;
+ }
+
+ pinfo.prog_type = test->prog_type;
+ pinfo.expected_attach_type = test->expected_attach_type;
+
+ if (test->read) {
+ struct bpf_insn ldx_prog[] = {
+ BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$dst", "r2" },
+ {}
+ };
+
+ pinfo.prog_kind = "LDX";
+ pinfo.prog = ldx_prog;
+ pinfo.prog_len = ARRAY_SIZE(ldx_prog);
+ match_program(btf, &pinfo, test->read, reg_map, false);
+ }
+
+ if (test->write || test->write_st || test->write_stx) {
+ struct bpf_insn stx_prog[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *stx_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "r2" },
+ {}
+ };
+ struct bpf_insn st_prog[] = {
+ BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset,
+ test->st_value.use ? test->st_value.value : 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *st_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "42" },
+ {}
+ };
+
+ if (test->write || test->write_stx) {
+ char *pattern = test->write_stx ? test->write_stx : test->write;
+
+ pinfo.prog_kind = "STX";
+ pinfo.prog = stx_prog;
+ pinfo.prog_len = ARRAY_SIZE(stx_prog);
+ match_program(btf, &pinfo, pattern, stx_reg_map, true);
+ }
+
+ if (test->write || test->write_st) {
+ char *pattern = test->write_st ? test->write_st : test->write;
+
+ pinfo.prog_kind = "ST";
+ pinfo.prog = st_prog;
+ pinfo.prog_len = ARRAY_SIZE(st_prog);
+ match_program(btf, &pinfo, pattern, st_reg_map, false);
+ }
+ }
+
+ test__end_subtest();
+}
+
+void test_ctx_rewrite(void)
+{
+ struct btf *btf;
+ int i;
+
+ field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)");
+ ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+");
+ if (!field_regex || !ident_regex)
+ return;
+
+ btf = btf__load_vmlinux_btf();
+ if (!btf) {
+ PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno));
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); ++i)
+ run_one_testcase(btf, &test_cases[i]);
+
+out:
+ btf__free(btf);
+ free_regex(field_regex);
+ free_regex(ident_regex);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
new file mode 100644
index 000000000000..b2dfc5954aea
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include "test_custom_sec_handlers.skel.h"
+
+#define COOKIE_ABC1 1
+#define COOKIE_ABC2 2
+#define COOKIE_CUSTOM 3
+#define COOKIE_FALLBACK 4
+#define COOKIE_KPROBE 5
+
+static int custom_setup_prog(struct bpf_program *prog, long cookie)
+{
+ if (cookie == COOKIE_ABC1)
+ bpf_program__set_autoload(prog, false);
+
+ return 0;
+}
+
+static int custom_prepare_load_prog(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts, long cookie)
+{
+ if (cookie == COOKIE_FALLBACK)
+ opts->prog_flags |= BPF_F_SLEEPABLE;
+ else if (cookie == COOKIE_ABC1)
+ ASSERT_FALSE(true, "unexpected preload for abc");
+
+ return 0;
+}
+
+static int custom_attach_prog(const struct bpf_program *prog, long cookie,
+ struct bpf_link **link)
+{
+ switch (cookie) {
+ case COOKIE_ABC2:
+ *link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ return libbpf_get_error(*link);
+ case COOKIE_CUSTOM:
+ *link = bpf_program__attach_tracepoint(prog, "syscalls", "sys_enter_nanosleep");
+ return libbpf_get_error(*link);
+ case COOKIE_KPROBE:
+ case COOKIE_FALLBACK:
+ /* no auto-attach for SEC("xyz") and SEC("kprobe") */
+ *link = NULL;
+ return 0;
+ default:
+ ASSERT_FALSE(true, "unexpected cookie");
+ return -EINVAL;
+ }
+}
+
+static int abc1_id;
+static int abc2_id;
+static int custom_id;
+static int fallback_id;
+static int kprobe_id;
+
+__attribute__((constructor))
+static void register_sec_handlers(void)
+{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, abc1_opts,
+ .cookie = COOKIE_ABC1,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = NULL,
+ );
+ LIBBPF_OPTS(libbpf_prog_handler_opts, abc2_opts,
+ .cookie = COOKIE_ABC2,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = custom_attach_prog,
+ );
+ LIBBPF_OPTS(libbpf_prog_handler_opts, custom_opts,
+ .cookie = COOKIE_CUSTOM,
+ .prog_setup_fn = NULL,
+ .prog_prepare_load_fn = NULL,
+ .prog_attach_fn = custom_attach_prog,
+ );
+
+ abc1_id = libbpf_register_prog_handler("abc", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc1_opts);
+ abc2_id = libbpf_register_prog_handler("abc/", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc2_opts);
+ custom_id = libbpf_register_prog_handler("custom+", BPF_PROG_TYPE_TRACEPOINT, 0, &custom_opts);
+}
+
+__attribute__((destructor))
+static void unregister_sec_handlers(void)
+{
+ libbpf_unregister_prog_handler(abc1_id);
+ libbpf_unregister_prog_handler(abc2_id);
+ libbpf_unregister_prog_handler(custom_id);
+}
+
+void test_custom_sec_handlers(void)
+{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, opts,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = custom_attach_prog,
+ );
+ struct test_custom_sec_handlers* skel;
+ int err;
+
+ ASSERT_GT(abc1_id, 0, "abc1_id");
+ ASSERT_GT(abc2_id, 0, "abc2_id");
+ ASSERT_GT(custom_id, 0, "custom_id");
+
+ /* override libbpf's handle of SEC("kprobe/...") but also allow pure
+ * SEC("kprobe") due to "kprobe+" specifier. Register it as
+ * TRACEPOINT, just for fun.
+ */
+ opts.cookie = COOKIE_KPROBE;
+ kprobe_id = libbpf_register_prog_handler("kprobe+", BPF_PROG_TYPE_TRACEPOINT, 0, &opts);
+ /* fallback treats everything as BPF_PROG_TYPE_SYSCALL program to test
+ * setting custom BPF_F_SLEEPABLE bit in preload handler
+ */
+ opts.cookie = COOKIE_FALLBACK;
+ fallback_id = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_SYSCALL, 0, &opts);
+
+ if (!ASSERT_GT(fallback_id, 0, "fallback_id") /* || !ASSERT_GT(kprobe_id, 0, "kprobe_id")*/) {
+ if (fallback_id > 0)
+ libbpf_unregister_prog_handler(fallback_id);
+ if (kprobe_id > 0)
+ libbpf_unregister_prog_handler(kprobe_id);
+ return;
+ }
+
+ /* open skeleton and validate assumptions */
+ skel = test_custom_sec_handlers__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__type(skel->progs.abc1), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc1_type");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.abc1), "abc1_autoload");
+
+ ASSERT_EQ(bpf_program__type(skel->progs.abc2), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc2_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.custom1), BPF_PROG_TYPE_TRACEPOINT, "custom1_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.custom2), BPF_PROG_TYPE_TRACEPOINT, "custom2_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.kprobe1), BPF_PROG_TYPE_TRACEPOINT, "kprobe1_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.xyz), BPF_PROG_TYPE_SYSCALL, "xyz_type");
+
+ skel->rodata->my_pid = getpid();
+
+ /* now attempt to load everything */
+ err = test_custom_sec_handlers__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ /* now try to auto-attach everything */
+ err = test_custom_sec_handlers__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ skel->links.xyz = bpf_program__attach(skel->progs.kprobe1);
+ ASSERT_EQ(errno, EOPNOTSUPP, "xyz_attach_err");
+ ASSERT_ERR_PTR(skel->links.xyz, "xyz_attach");
+
+ /* trigger programs */
+ usleep(1);
+
+ /* SEC("abc") is set to not auto-loaded */
+ ASSERT_FALSE(skel->bss->abc1_called, "abc1_called");
+ ASSERT_TRUE(skel->bss->abc2_called, "abc2_called");
+ ASSERT_TRUE(skel->bss->custom1_called, "custom1_called");
+ ASSERT_TRUE(skel->bss->custom2_called, "custom2_called");
+ /* SEC("kprobe") shouldn't be auto-attached */
+ ASSERT_FALSE(skel->bss->kprobe1_called, "kprobe1_called");
+ /* SEC("xyz") shouldn't be auto-attached */
+ ASSERT_FALSE(skel->bss->xyz_called, "xyz_called");
+
+cleanup:
+ test_custom_sec_handlers__destroy(skel);
+
+ ASSERT_OK(libbpf_unregister_prog_handler(fallback_id), "unregister_fallback");
+ ASSERT_OK(libbpf_unregister_prog_handler(kprobe_id), "unregister_kprobe");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index 0a577a248d34..ccc768592e66 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -9,6 +9,19 @@
#define MAX_FILES 7
#include "test_d_path.skel.h"
+#include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
+
+/* sys_close_range is not around for long time, so let's
+ * make sure we can call it on systems with older glibc
+ */
+#ifndef __NR_close_range
+#ifdef __alpha__
+#define __NR_close_range 546
+#else
+#define __NR_close_range 436
+#endif
+#endif
static int duration;
@@ -88,7 +101,11 @@ static int trigger_fstat_events(pid_t pid)
fstat(indicatorfd, &fileStat);
out_close:
- /* triggers filp_close */
+ /* sys_close no longer triggers filp_close, but we can
+ * call sys_close_range instead which still does
+ */
+#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+
close(pipefd[0]);
close(pipefd[1]);
close(sockfd);
@@ -96,10 +113,12 @@ out_close:
close(devfd);
close(localfd);
close(indicatorfd);
+
+#undef close
return ret;
}
-void test_d_path(void)
+static void test_d_path_basic(void)
{
struct test_d_path__bss *bss;
struct test_d_path *skel;
@@ -155,3 +174,35 @@ void test_d_path(void)
cleanup:
test_d_path__destroy(skel);
}
+
+static void test_d_path_check_rdonly_mem(void)
+{
+ struct test_d_path_check_rdonly_mem *skel;
+
+ skel = test_d_path_check_rdonly_mem__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
+
+ test_d_path_check_rdonly_mem__destroy(skel);
+}
+
+static void test_d_path_check_types(void)
+{
+ struct test_d_path_check_types *skel;
+
+ skel = test_d_path_check_types__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+ test_d_path_check_types__destroy(skel);
+}
+
+void test_d_path(void)
+{
+ if (test__start_subtest("basic"))
+ test_d_path_basic();
+
+ if (test__start_subtest("check_rdonly_mem"))
+ test_d_path_check_rdonly_mem();
+
+ if (test__start_subtest("check_alloc_mem"))
+ test_d_path_check_types();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
new file mode 100644
index 000000000000..d79f398ec6b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "decap_sanity.skel.h"
+
+#define NS_TEST "decap_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+#define UDP_TEST_PORT 7777
+
+void test_decap_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ struct nstoken *nstoken = NULL;
+ struct decap_sanity *skel;
+ struct sockaddr_in6 addr;
+ socklen_t addrlen;
+ char buf[128] = {};
+ int sockfd, err;
+
+ skel = decap_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ tc_attach.prog_fd = bpf_program__fd(skel->progs.decap_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach);
+ if (!ASSERT_OK(err, "attach filter"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "socket"))
+ goto fail;
+ err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(buf), "send"))
+ goto fail;
+
+ ASSERT_TRUE(skel->bss->init_csum_partial, "init_csum_partial");
+ ASSERT_TRUE(skel->bss->final_csum_none, "final_csum_none");
+ ASSERT_FALSE(skel->bss->broken_csum_start, "broken_csum_start");
+
+fail:
+ if (nstoken) {
+ bpf_tc_hook_destroy(&qdisc_hook);
+ close_netns(nstoken);
+ }
+ SYS_NOFAIL("ip netns del " NS_TEST);
+ decap_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
new file mode 100644
index 000000000000..1bc6241b755b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_deny_namespace.skel.h"
+#include <sched.h>
+#include "cap_helpers.h"
+#include <stdio.h>
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+/* negative return value -> some internal error
+ * positive return value -> userns creation failed
+ * 0 -> userns creation succeeded
+ */
+static int create_user_ns(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWUSER))
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ return wait_for_pid(pid);
+}
+
+static void test_userns_create_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_enable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "priv new user ns");
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+static void test_unpriv_userns_create_no_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+void test_deny_namespace(void)
+{
+ struct test_deny_namespace *skel = NULL;
+ int err;
+
+ if (test__start_subtest("unpriv_userns_create_no_bpf"))
+ test_unpriv_userns_create_no_bpf();
+
+ skel = test_deny_namespace__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel load"))
+ goto close_prog;
+
+ err = test_deny_namespace__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto close_prog;
+
+ if (test__start_subtest("userns_create_bpf"))
+ test_userns_create_bpf();
+
+ test_deny_namespace__detach(skel);
+
+close_prog:
+ test_deny_namespace__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+ struct udmabuf_create create;
+ int dev_udmabuf, memfd, local_udmabuf;
+
+ udmabuf_test_buffer_size = 10 * getpagesize();
+
+ if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+ if (!ASSERT_OK_FD(memfd, "memfd_create"))
+ return -1;
+
+ if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+ goto close_memfd;
+
+ dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+ if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+ goto close_memfd;
+
+ memset(&create, 0, sizeof(create));
+ create.memfd = memfd;
+ create.flags = UDMABUF_FLAGS_CLOEXEC;
+ create.offset = 0;
+ create.size = udmabuf_test_buffer_size;
+
+ local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+ close(dev_udmabuf);
+ if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+ goto close_udmabuf;
+
+ return local_udmabuf;
+
+close_udmabuf:
+ close(local_udmabuf);
+close_memfd:
+ close(memfd);
+ return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+ sysheap_test_buffer_size = 20 * getpagesize();
+
+ struct dma_heap_allocation_data data = {
+ .len = sysheap_test_buffer_size,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = 0,
+ };
+ int heap_fd, ret;
+
+ if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+ if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+ return -1;
+
+ ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ close(heap_fd);
+ if (!ASSERT_OK(ret, "syheap alloc"))
+ return -1;
+
+ if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+ goto close_sysheap_dmabuf;
+
+ return data.fd;
+
+close_sysheap_dmabuf:
+ close(data.fd);
+ return -1;
+}
+
+static int create_test_buffers(void)
+{
+ udmabuf = create_udmabuf();
+ sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ if (udmabuf < 0 || sysheap_dmabuf < 0)
+ return -1;
+
+ return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+ close(udmabuf);
+ udmabuf = -1;
+
+ close(sysheap_dmabuf);
+ sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+ unsigned long inode;
+ unsigned long size;
+ char name[DMA_BUF_NAME_LEN];
+ char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+ unsigned long size,
+ const char *name, const char *exporter)
+{
+ return size == bufinfo->size &&
+ !strcmp(name, bufinfo->name) &&
+ !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[256];
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+ bool found_test_sysheap_dmabuf = false;
+ bool found_test_udmabuf = false;
+ struct DmabufInfo bufinfo;
+ size_t linesize = 0;
+ char *line = NULL;
+ FILE *iter_file;
+ int iter_fd, f = INODE;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ iter_file = fdopen(iter_fd, "r");
+ if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+ goto close_iter_fd;
+
+ while (getline(&line, &linesize, iter_file) != -1) {
+ if (f % FIELD_COUNT == INODE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+ "read inode");
+ } else if (f % FIELD_COUNT == SIZE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+ "read size");
+ } else if (f % FIELD_COUNT == NAME) {
+ ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+ "read name");
+ } else if (f % FIELD_COUNT == EXPORTER) {
+ ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+ "read exporter");
+
+ if (check_dmabuf_info(&bufinfo,
+ sysheap_test_buffer_size,
+ sysheap_test_buffer_name,
+ "system"))
+ found_test_sysheap_dmabuf = true;
+ else if (check_dmabuf_info(&bufinfo,
+ udmabuf_test_buffer_size,
+ udmabuf_test_buffer_name,
+ "udmabuf"))
+ found_test_udmabuf = true;
+ }
+ ++f;
+ }
+
+ ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+ ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+ ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+ free(line);
+ fclose(iter_file);
+close_iter_fd:
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ char key[DMA_BUF_NAME_LEN];
+ int err, fd;
+ bool found;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+ return;
+
+ do {
+ ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+ ASSERT_TRUE(found, "found test buffer");
+ } while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+ struct dmabuf_iter *skel = NULL;
+ int map_fd;
+ const bool f = false;
+
+ skel = dmabuf_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+ if (!ASSERT_OK_FD(map_fd, "map_fd"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+ "insert udmabuf"))
+ goto destroy_skel;
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+ "insert sysheap buffer"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+ goto destroy;
+
+ if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ if (test__start_subtest("no_infinite_reads"))
+ subtest_dmabuf_iter_check_no_infinite_reads(skel);
+ if (test__start_subtest("default_iter"))
+ subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("open_coded"))
+ subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+ destroy_test_buffers();
+destroy_skel:
+ dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
new file mode 100644
index 000000000000..d3d94596ab79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "dummy_st_ops_success.skel.h"
+#include "dummy_st_ops_fail.skel.h"
+#include "trace_dummy_st_ops.skel.h"
+
+/* Need to keep consistent with definition in include/linux/bpf.h */
+struct bpf_dummy_ops_state {
+ int val;
+};
+
+static void test_dummy_st_ops_attach(void)
+{
+ struct dummy_st_ops_success *skel;
+ struct bpf_link *link;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.dummy_1);
+ ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_init_ret_value(void)
+{
+ __u64 args[1] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_1);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_init_ptr_arg(void)
+{
+ int exp_retval = 0xbeef;
+ struct bpf_dummy_ops_state in_state = {
+ .val = exp_retval,
+ };
+ __u64 args[1] = {(unsigned long)&in_state};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct trace_dummy_st_ops *trace_skel;
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_1);
+
+ trace_skel = trace_dummy_st_ops__open();
+ if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open"))
+ goto done;
+
+ err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1,
+ fd, "test_1");
+ if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)"))
+ goto done;
+
+ err = trace_dummy_st_ops__load(trace_skel);
+ if (!ASSERT_OK(err, "load(trace_skel)"))
+ goto done;
+
+ err = trace_dummy_st_ops__attach(trace_skel);
+ if (!ASSERT_OK(err, "attach(trace_skel)"))
+ goto done;
+
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
+ ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+ ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
+
+done:
+ dummy_st_ops_success__destroy(skel);
+ trace_dummy_st_ops__destroy(trace_skel);
+}
+
+static void test_dummy_multiple_args(void)
+{
+ struct bpf_dummy_ops_state st = { 7 };
+ __u64 args[5] = {(__u64)&st, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+ size_t i;
+ char name[8];
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_2);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ args[0] = 7;
+ for (i = 0; i < ARRAY_SIZE(args); i++) {
+ snprintf(name, sizeof(name), "arg %zu", i);
+ ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
+ }
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_sleepable(void)
+{
+ struct bpf_dummy_ops_state st;
+ __u64 args[1] = {(__u64)&st};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_sleepable);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+/* dummy_st_ops.test_sleepable() parameter is not marked as nullable,
+ * thus bpf_prog_test_run_opts() below should be rejected as it tries
+ * to pass NULL for this parameter.
+ */
+static void test_dummy_sleepable_reject_null(void)
+{
+ __u64 args[1] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_sleepable);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_EQ(err, -EINVAL, "test_run");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+void test_dummy_st_ops(void)
+{
+ if (test__start_subtest("dummy_st_ops_attach"))
+ test_dummy_st_ops_attach();
+ if (test__start_subtest("dummy_init_ret_value"))
+ test_dummy_init_ret_value();
+ if (test__start_subtest("dummy_init_ptr_arg"))
+ test_dummy_init_ptr_arg();
+ if (test__start_subtest("dummy_multiple_args"))
+ test_dummy_multiple_args();
+ if (test__start_subtest("dummy_sleepable"))
+ test_dummy_sleepable();
+ if (test__start_subtest("dummy_sleepable_reject_null"))
+ test_dummy_sleepable_reject_null();
+
+ RUN_TESTS(dummy_st_ops_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
new file mode 100644
index 000000000000..b9f86cb91e81
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "dynptr_fail.skel.h"
+#include "dynptr_success.skel.h"
+
+enum test_setup_type {
+ SETUP_SYSCALL_SLEEP,
+ SETUP_SKB_PROG,
+ SETUP_SKB_PROG_TP,
+ SETUP_XDP_PROG,
+};
+
+static struct {
+ const char *prog_name;
+ enum test_setup_type type;
+} success_tests[] = {
+ {"test_read_write", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy_xdp", SETUP_XDP_PROG},
+ {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG},
+ {"test_ringbuf", SETUP_SYSCALL_SLEEP},
+ {"test_skb_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
+ {"test_adjust", SETUP_SYSCALL_SLEEP},
+ {"test_adjust_err", SETUP_SYSCALL_SLEEP},
+ {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_rdonly", SETUP_SKB_PROG},
+ {"test_dynptr_clone", SETUP_SKB_PROG},
+ {"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
+ {"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
+ {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+ {"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+ {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
+};
+
+#define PAGE_SIZE_64K 65536
+
+static void verify_success(const char *prog_name, enum test_setup_type setup_type)
+{
+ char user_data[384] = {[0 ... 382] = 'a', '\0'};
+ struct dynptr_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err;
+
+ skel = dynptr_success__open();
+ if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, true);
+
+ err = dynptr_success__load(skel);
+ if (!ASSERT_OK(err, "dynptr_success__load"))
+ goto cleanup;
+
+ skel->bss->user_ptr = user_data;
+ skel->data->test_len[0] = sizeof(user_data);
+ memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
+ switch (setup_type) {
+ case SETUP_SYSCALL_SLEEP:
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ bpf_link__destroy(link);
+ break;
+ case SETUP_SKB_PROG:
+ {
+ int prog_fd;
+ char buf[64];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ case SETUP_SKB_PROG_TP:
+ {
+ struct __sk_buff skb = {};
+ struct bpf_object *obj;
+ int aux_prog_fd;
+
+ /* Just use its test_run to trigger kfree_skb tracepoint */
+ err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &aux_prog_fd);
+ if (!ASSERT_OK(err, "prog_load sched cls"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
+ bpf_link__destroy(link);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ case SETUP_XDP_PROG:
+ {
+ char data[90000];
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .repeat = 1,
+ );
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = sizeof(data);
+ else
+ opts.data_size_in = 5000;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ }
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ dynptr_success__destroy(skel);
+}
+
+void test_dynptr(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i].prog_name))
+ continue;
+
+ verify_success(success_tests[i].prog_name, success_tests[i].type);
+ }
+
+ RUN_TESTS(dynptr_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
new file mode 100644
index 000000000000..438583e1f2d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include "empty_skb.skel.h"
+
+void test_empty_skb(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
+ struct empty_skb *bpf_obj = NULL;
+ struct nstoken *tok = NULL;
+ struct bpf_program *prog;
+ char eth_hlen_pp[15];
+ char eth_hlen[14];
+ int veth_ifindex;
+ int ipip_ifindex;
+ int err;
+ int i;
+
+ struct {
+ const char *msg;
+ const void *data_in;
+ __u32 data_size_in;
+ int *ifindex;
+ int err;
+ int ret;
+ int lwt_egress_ret; /* expected retval at lwt/egress */
+ bool success_on_tc;
+ } tests[] = {
+ /* Empty packets are always rejected. */
+
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "veth empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &veth_ifindex,
+ .err = -EINVAL,
+ },
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "ipip empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &ipip_ifindex,
+ .err = -EINVAL,
+ },
+
+ /* ETH_HLEN-sized packets:
+ * - can not be redirected at LWT_XMIT
+ * - can be redirected at TC to non-tunneling dest
+ */
+
+ {
+ /* __bpf_redirect_common */
+ .msg = "veth ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &veth_ifindex,
+ .ret = -ERANGE,
+ .lwt_egress_ret = -ERANGE,
+ .success_on_tc = true,
+ },
+ {
+ /* __bpf_redirect_no_mac
+ *
+ * lwt: skb->len=0 <= skb_network_offset=0
+ * tc: skb->len=14 <= skb_network_offset=14
+ */
+ .msg = "ipip ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &ipip_ifindex,
+ .ret = -ERANGE,
+ .lwt_egress_ret = -ERANGE,
+ },
+
+ /* ETH_HLEN+1-sized packet should be redirected. */
+
+ {
+ .msg = "veth ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &veth_ifindex,
+ .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */
+ },
+ {
+ .msg = "ipip ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &ipip_ifindex,
+ },
+ };
+
+ SYS(out, "ip netns add empty_skb");
+ tok = open_netns("empty_skb");
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "ip link set dev veth0 up");
+ SYS(out, "ip link set dev veth1 up");
+ SYS(out, "ip addr add 10.0.0.1/8 dev veth0");
+ SYS(out, "ip addr add 10.0.0.2/8 dev veth1");
+ veth_ifindex = if_nametoindex("veth0");
+
+ SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+ SYS(out, "ip link set ipip0 up");
+ SYS(out, "ip addr add 192.168.1.1/16 dev ipip0");
+ ipip_ifindex = if_nametoindex("ipip0");
+
+ bpf_obj = empty_skb__open_and_load();
+ if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ bpf_object__for_each_program(prog, bpf_obj->obj) {
+ bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL;
+ bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+ int expected_ret;
+ char buf[128];
+
+ expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret;
+
+ tattr.data_in = tests[i].data_in;
+ tattr.data_size_in = tests[i].data_size_in;
+
+ tattr.data_size_out = 0;
+ bpf_obj->bss->ifindex = *tests[i].ifindex;
+ bpf_obj->bss->ret = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr);
+ sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog));
+
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(err, 0, buf);
+ else
+ ASSERT_EQ(err, tests[i].err, buf);
+ sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog));
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(bpf_obj->bss->ret, 0, buf);
+ else
+ ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf);
+ }
+ }
+
+out:
+ if (bpf_obj)
+ empty_skb__destroy(bpf_obj);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del empty_skb");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
index 2cb2085917e7..75f85d0fe74a 100644
--- a/tools/testing/selftests/bpf/prog_tests/enable_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -28,7 +28,7 @@ void test_enable_stats(void)
prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
memset(&info, 0, info_len);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
if (CHECK(err, "get_prog_info",
"failed to get bpf_prog_info for fd %d\n", prog_fd))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c
new file mode 100644
index 000000000000..516f4a13013c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "exceptions.skel.h"
+#include "exceptions_ext.skel.h"
+#include "exceptions_fail.skel.h"
+#include "exceptions_assert.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static void test_exceptions_failure(void)
+{
+ RUN_TESTS(exceptions_fail);
+}
+
+static void test_exceptions_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, ropts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct exceptions_ext *eskel = NULL;
+ struct exceptions *skel;
+ int ret;
+
+ skel = exceptions__open();
+ if (!ASSERT_OK_PTR(skel, "exceptions__open"))
+ return;
+
+ ret = exceptions__load(skel);
+ if (!ASSERT_OK(ret, "exceptions__load"))
+ goto done;
+
+ if (!ASSERT_OK(bpf_map_update_elem(bpf_map__fd(skel->maps.jmp_table), &(int){0},
+ &(int){bpf_program__fd(skel->progs.exception_tail_call_target)}, BPF_ANY),
+ "bpf_map_update_elem jmp_table"))
+ goto done;
+
+#define RUN_SUCCESS(_prog, return_val) \
+ if (!test__start_subtest(#_prog)) goto _prog##_##return_val; \
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs._prog), &ropts); \
+ ASSERT_OK(ret, #_prog " prog run ret"); \
+ ASSERT_EQ(ropts.retval, return_val, #_prog " prog run retval"); \
+ _prog##_##return_val:
+
+ RUN_SUCCESS(exception_throw_always_1, 64);
+ RUN_SUCCESS(exception_throw_always_2, 32);
+ RUN_SUCCESS(exception_throw_unwind_1, 16);
+ RUN_SUCCESS(exception_throw_unwind_2, 32);
+ RUN_SUCCESS(exception_throw_default, 0);
+ RUN_SUCCESS(exception_throw_default_value, 5);
+ RUN_SUCCESS(exception_tail_call, 24);
+ RUN_SUCCESS(exception_ext, 0);
+ RUN_SUCCESS(exception_ext_mod_cb_runtime, 35);
+ RUN_SUCCESS(exception_throw_subprog, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc_with, 1);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc_with, 100);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc_with, 105);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc_with, 200);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc_with, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc_with, 101);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc_with, 99);
+ RUN_SUCCESS(exception_assert_range, 1);
+ RUN_SUCCESS(exception_assert_range_with, 1);
+ RUN_SUCCESS(exception_bad_assert_range, 0);
+ RUN_SUCCESS(exception_bad_assert_range_with, 10);
+
+#define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \
+ { \
+ LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf, \
+ .kernel_log_size = sizeof(log_buf), \
+ .kernel_log_level = 2); \
+ exceptions_ext__destroy(eskel); \
+ eskel = exceptions_ext__open_opts(&o); \
+ struct bpf_program *prog = NULL; \
+ struct bpf_link *link = NULL; \
+ if (!ASSERT_OK_PTR(eskel, "exceptions_ext__open")) \
+ goto done; \
+ (expr); \
+ ASSERT_OK_PTR(bpf_program__name(prog), bpf_program__name(prog)); \
+ if (!ASSERT_EQ(exceptions_ext__load(eskel), load_ret, \
+ "exceptions_ext__load")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ if (load_ret != 0) { \
+ if (!ASSERT_OK_PTR(strstr(log_buf, msg), "strstr")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ } \
+ if (!load_ret && attach_err) { \
+ if (!ASSERT_ERR_PTR(link = bpf_program__attach(prog), "attach err")) \
+ goto done; \
+ } else if (!load_ret) { \
+ if (!ASSERT_OK_PTR(link = bpf_program__attach(prog), "attach ok")) \
+ goto done; \
+ (void)(after_link); \
+ bpf_link__destroy(link); \
+ } \
+ }
+
+ if (test__start_subtest("non-throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("non-throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension (with custom cb) -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "Extension programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext_mod_cb_runtime, 131); }));
+
+ if (test__start_subtest("throwing extension (with custom cb) -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext),
+ "exception_ext_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext, 128); }));
+
+ if (test__start_subtest("non-throwing fentry -> non-throwing subprog"))
+ /* non-throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> non-throwing subprog"))
+ /* throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fentry -> throwing subprog"))
+ /* non-throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> throwing subprog"))
+ /* throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> non-throwing subprog"))
+ /* non-throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> non-throwing subprog"))
+ /* throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> throwing subprog"))
+ /* non-throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> throwing subprog"))
+ /* throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing global subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing subprog"))
+ /* non-throwing extension -> throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "throwing_subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing global subprog"))
+ /* non-throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> throwing global subprog"))
+ /* throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> non-throwing global subprog"))
+ /* throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> main subprog"))
+ /* non-throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> main subprog"))
+ /* throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+done:
+ exceptions_ext__destroy(eskel);
+ exceptions__destroy(skel);
+}
+
+static void test_exceptions_assertions(void)
+{
+ RUN_TESTS(exceptions_assert);
+}
+
+void test_exceptions(void)
+{
+ test_exceptions_success();
+ test_exceptions_failure();
+ test_exceptions_assertions();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/exhandler.c b/tools/testing/selftests/bpf/prog_tests/exhandler.c
new file mode 100644
index 000000000000..118bb182ee20
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exhandler.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+/* Test that verifies exception handling is working. fork()
+ * triggers task_newtask tracepoint; that new task will have a
+ * NULL pointer task_works, and the associated task->task_works->func
+ * should not be NULL if task_works itself is non-NULL.
+ *
+ * So to verify exception handling we want to see a NULL task_works
+ * and task_works->func; if we see this we can conclude that the
+ * exception handler ran when we attempted to dereference task->task_works
+ * and zeroed the destination register.
+ */
+#include "exhandler_kern.skel.h"
+
+void test_exhandler(void)
+{
+ int err = 0, duration = 0, status;
+ struct exhandler_kern *skel;
+ pid_t cpid;
+
+ skel = exhandler_kern__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ err = exhandler_kern__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto cleanup;
+ cpid = fork();
+ if (!ASSERT_GT(cpid, -1, "fork failed"))
+ goto cleanup;
+ if (cpid == 0)
+ _exit(0);
+ waitpid(cpid, &status, 0);
+
+ ASSERT_NEQ(skel->bss->exception_triggered, 0, "verify exceptions occurred");
+cleanup:
+ exhandler_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
new file mode 100644
index 000000000000..c534b4d5f9da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/btf.h>
+#include <bpf/bpf.h>
+
+#include "../test_btf.h"
+
+static inline int new_map(void)
+{
+ const char *name = NULL;
+ __u32 max_entries = 1;
+ __u32 value_size = 8;
+ __u32 key_size = 4;
+
+ return bpf_map_create(BPF_MAP_TYPE_ARRAY, name,
+ key_size, value_size,
+ max_entries, NULL);
+}
+
+static int new_btf(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(raw_btf.types),
+ .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+ .str_len = sizeof(raw_btf.str),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+
+ return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL);
+}
+
+#define Close(FD) do { \
+ if ((FD) >= 0) { \
+ close(FD); \
+ FD = -1; \
+ } \
+} while(0)
+
+static bool map_exists(__u32 id)
+{
+ int fd;
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd >= 0) {
+ close(fd);
+ return true;
+ }
+ return false;
+}
+
+static bool btf_exists(__u32 id)
+{
+ int fd;
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd >= 0) {
+ close(fd);
+ return true;
+ }
+ return false;
+}
+
+static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids)
+{
+ __u32 len = sizeof(struct bpf_prog_info);
+ struct bpf_prog_info info;
+ int err;
+
+ memset(&info, 0, len);
+ info.nr_map_ids = *nr_map_ids;
+ info.map_ids = ptr_to_u64(map_ids);
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ return -1;
+
+ *nr_map_ids = info.nr_map_ids;
+
+ return 0;
+}
+
+static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt)
+{
+ /* A trivial program which uses one map */
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.fd_array = fd_array;
+ opts.fd_array_cnt = fd_array_cnt;
+
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+static int load_test_prog(const int *fd_array, int fd_array_cnt)
+{
+ int map_fd;
+ int ret;
+
+ map_fd = new_map();
+ if (!ASSERT_GE(map_fd, 0, "new_map"))
+ return map_fd;
+
+ ret = __load_test_prog(map_fd, fd_array, fd_array_cnt);
+ close(map_fd);
+ return ret;
+}
+
+static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids)
+{
+ int err;
+
+ err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids);
+ if (!ASSERT_OK(err, "bpf_prog_get_map_ids"))
+ return false;
+ if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids"))
+ return false;
+
+ return true;
+}
+
+/*
+ * Load a program, which uses one map. No fd_array maps are present.
+ * On return only one map is expected to be bound to prog.
+ */
+static void check_fd_array_cnt__no_fd_array(void)
+{
+ __u32 map_ids[16];
+ __u32 nr_map_ids;
+ int prog_fd = -1;
+
+ prog_fd = load_test_prog(NULL, 0);
+ if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+ return;
+ nr_map_ids = ARRAY_SIZE(map_ids);
+ check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids);
+ close(prog_fd);
+}
+
+/*
+ * Load a program, which uses one map, and pass two extra, non-equal, maps in
+ * fd_array with fd_array_cnt=2. On return three maps are expected to be bound
+ * to the program.
+ */
+static void check_fd_array_cnt__fd_array_ok(void)
+{
+ int extra_fds[2] = { -1, -1 };
+ __u32 map_ids[16];
+ __u32 nr_map_ids;
+ int prog_fd = -1;
+
+ extra_fds[0] = new_map();
+ if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+ goto cleanup;
+ extra_fds[1] = new_map();
+ if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+ goto cleanup;
+ prog_fd = load_test_prog(extra_fds, 2);
+ if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+ goto cleanup;
+ nr_map_ids = ARRAY_SIZE(map_ids);
+ if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+ goto cleanup;
+
+ /* maps should still exist when original file descriptors are closed */
+ Close(extra_fds[0]);
+ Close(extra_fds[1]);
+ if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist"))
+ goto cleanup;
+ if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist"))
+ goto cleanup;
+
+ /* some fds might be invalid, so ignore return codes */
+cleanup:
+ Close(extra_fds[1]);
+ Close(extra_fds[0]);
+ Close(prog_fd);
+}
+
+/*
+ * Load a program with a few extra maps duplicated in the fd_array.
+ * After the load maps should only be referenced once.
+ */
+static void check_fd_array_cnt__duplicated_maps(void)
+{
+ int extra_fds[4] = { -1, -1, -1, -1 };
+ __u32 map_ids[16];
+ __u32 nr_map_ids;
+ int prog_fd = -1;
+
+ extra_fds[0] = extra_fds[2] = new_map();
+ if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+ goto cleanup;
+ extra_fds[1] = extra_fds[3] = new_map();
+ if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+ goto cleanup;
+ prog_fd = load_test_prog(extra_fds, 4);
+ if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+ goto cleanup;
+ nr_map_ids = ARRAY_SIZE(map_ids);
+ if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+ goto cleanup;
+
+ /* maps should still exist when original file descriptors are closed */
+ Close(extra_fds[0]);
+ Close(extra_fds[1]);
+ if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+ goto cleanup;
+ if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist"))
+ goto cleanup;
+
+ /* some fds might be invalid, so ignore return codes */
+cleanup:
+ Close(extra_fds[1]);
+ Close(extra_fds[0]);
+ Close(prog_fd);
+}
+
+/*
+ * Check that if maps which are referenced by a program are
+ * passed in fd_array, then they will be referenced only once
+ */
+static void check_fd_array_cnt__referenced_maps_in_fd_array(void)
+{
+ int extra_fds[1] = { -1 };
+ __u32 map_ids[16];
+ __u32 nr_map_ids;
+ int prog_fd = -1;
+
+ extra_fds[0] = new_map();
+ if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+ goto cleanup;
+ prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1);
+ if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+ goto cleanup;
+ nr_map_ids = ARRAY_SIZE(map_ids);
+ if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids))
+ goto cleanup;
+
+ /* map should still exist when original file descriptor is closed */
+ Close(extra_fds[0]);
+ if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+ goto cleanup;
+
+ /* some fds might be invalid, so ignore return codes */
+cleanup:
+ Close(extra_fds[0]);
+ Close(prog_fd);
+}
+
+static int get_btf_id_by_fd(int btf_fd, __u32 *id)
+{
+ struct bpf_btf_info info;
+ __u32 info_len = sizeof(info);
+ int err;
+
+ memset(&info, 0, info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
+ if (err)
+ return err;
+ if (id)
+ *id = info.id;
+ return 0;
+}
+
+/*
+ * Check that fd_array operates properly for btfs. Namely, to check that
+ * passing a btf fd in fd_array increases its reference count, do the
+ * following:
+ * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
+ * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
+ * 3) Close the btf_fd, now refcnt=1
+ * Wait and check that BTF still exists.
+ */
+static void check_fd_array_cnt__referenced_btfs(void)
+{
+ int extra_fds[1] = { -1 };
+ int prog_fd = -1;
+ __u32 btf_id;
+ int tries;
+ int err;
+
+ extra_fds[0] = new_btf();
+ if (!ASSERT_GE(extra_fds[0], 0, "new_btf"))
+ goto cleanup;
+ prog_fd = load_test_prog(extra_fds, 1);
+ if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+ goto cleanup;
+
+ /* btf should still exist when original file descriptor is closed */
+ err = get_btf_id_by_fd(extra_fds[0], &btf_id);
+ if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd"))
+ goto cleanup;
+
+ Close(extra_fds[0]);
+
+ if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist"))
+ goto cleanup;
+
+ Close(prog_fd);
+
+ /* The program is freed by a workqueue, so no reliable
+ * way to sync, so just wait a bit (max ~1 second). */
+ for (tries = 100; tries >= 0; tries--) {
+ usleep(1000);
+
+ if (!btf_exists(btf_id))
+ break;
+
+ if (tries)
+ continue;
+
+ PRINT_FAIL("btf should have been freed");
+ }
+
+ /* some fds might be invalid, so ignore return codes */
+cleanup:
+ Close(extra_fds[0]);
+ Close(prog_fd);
+}
+
+/*
+ * Test that a program with trash in fd_array can't be loaded:
+ * only map and BTF file descriptors should be accepted.
+ */
+static void check_fd_array_cnt__fd_array_with_trash(void)
+{
+ int extra_fds[3] = { -1, -1, -1 };
+ int prog_fd = -1;
+
+ extra_fds[0] = new_map();
+ if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+ goto cleanup;
+ extra_fds[1] = new_btf();
+ if (!ASSERT_GE(extra_fds[1], 0, "new_btf"))
+ goto cleanup;
+
+ /* trash 1: not a file descriptor */
+ extra_fds[2] = 0xbeef;
+ prog_fd = load_test_prog(extra_fds, 3);
+ if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF"))
+ goto cleanup;
+
+ /* trash 2: not a map or btf */
+ extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(extra_fds[2], 0, "socket"))
+ goto cleanup;
+
+ prog_fd = load_test_prog(extra_fds, 3);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL"))
+ goto cleanup;
+
+ /* Validate that the prog is ok if trash is removed */
+ Close(extra_fds[2]);
+ extra_fds[2] = new_btf();
+ if (!ASSERT_GE(extra_fds[2], 0, "new_btf"))
+ goto cleanup;
+
+ prog_fd = load_test_prog(extra_fds, 3);
+ if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded"))
+ goto cleanup;
+
+ /* some fds might be invalid, so ignore return codes */
+cleanup:
+ Close(extra_fds[2]);
+ Close(extra_fds[1]);
+ Close(extra_fds[0]);
+}
+
+/*
+ * Test that a program with too big fd_array can't be loaded.
+ */
+static void check_fd_array_cnt__fd_array_too_big(void)
+{
+ int extra_fds[65];
+ int prog_fd = -1;
+ int i;
+
+ for (i = 0; i < 65; i++) {
+ extra_fds[i] = new_map();
+ if (!ASSERT_GE(extra_fds[i], 0, "new_map"))
+ goto cleanup_fds;
+ }
+
+ prog_fd = load_test_prog(extra_fds, 65);
+ ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG");
+
+cleanup_fds:
+ while (i > 0)
+ Close(extra_fds[--i]);
+}
+
+void test_fd_array_cnt(void)
+{
+ if (test__start_subtest("no-fd-array"))
+ check_fd_array_cnt__no_fd_array();
+
+ if (test__start_subtest("fd-array-ok"))
+ check_fd_array_cnt__fd_array_ok();
+
+ if (test__start_subtest("fd-array-dup-input"))
+ check_fd_array_cnt__duplicated_maps();
+
+ if (test__start_subtest("fd-array-ref-maps-in-array"))
+ check_fd_array_cnt__referenced_maps_in_fd_array();
+
+ if (test__start_subtest("fd-array-ref-btfs"))
+ check_fd_array_cnt__referenced_btfs();
+
+ if (test__start_subtest("fd-array-trash-input"))
+ check_fd_array_cnt__fd_array_with_trash();
+
+ if (test__start_subtest("fd-array-2big"))
+ check_fd_array_cnt__fd_array_too_big();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ unsigned int entries;
+ bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0, value;
+ int inner_fd, err;
+
+ err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+ if (err) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, err);
+ }
+
+ inner_fd = bpf_map_get_fd_by_id(value);
+ if (inner_fd < 0) {
+ /* The old map has been freed */
+ if (inner_fd == -ENOENT)
+ continue;
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, inner_fd);
+ }
+
+ err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+
+ if (value != key) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(4, -EINVAL);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (inner_fd < 0) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, inner_fd);
+ }
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, err);
+ }
+
+ err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned int key = i, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "new array"))
+ return -1;
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (!ASSERT_OK(err, "init array")) {
+ close(inner_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+ if (!ASSERT_OK(err, "init outer")) {
+ close(inner_fd);
+ return -1;
+ }
+ close(inner_fd);
+ }
+
+ return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+ const char *value;
+
+ value = getenv(name);
+ if (!value)
+ return dft;
+
+ return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+ unsigned int i, wr_nr = 8, rd_nr = 16;
+ pthread_t tids[wr_nr + rd_nr];
+ struct fd_htab_lookup *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = fd_htab_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.outer_map);
+ ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+ ctx.stop = false;
+ ctx.entries = 8;
+
+ err = setup_htab(ctx.fd, ctx.entries);
+ if (err)
+ goto destroy;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ void *ret = NULL;
+ char desc[32];
+
+ if (!tids[i])
+ continue;
+
+ snprintf(desc, sizeof(desc), "thread %u", i + 1);
+ err = pthread_join(tids[i], &ret);
+ ASSERT_OK(err, desc);
+ ASSERT_EQ(ret, NULL, desc);
+ }
+destroy:
+ fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,49 +1,56 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
void test_fentry_fexit(void)
{
- struct fentry_test *fentry_skel = NULL;
- struct fexit_test *fexit_skel = NULL;
+ struct fentry_test_lskel *fentry_skel = NULL;
+ struct fexit_test_lskel *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
- __u32 duration = 0, retval;
int err, prog_fd, i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto close_prog;
+
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ goto close_prog;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
goto close_prog;
- err = fentry_test__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ err = fentry_test_lskel__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
- err = fexit_test__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ err = fexit_test_lskel__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_attach"))
goto close_prog;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test retval");
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
for (i = 0; i < 8; i++) {
- CHECK(fentry_res[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
- CHECK(fexit_res[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+ ASSERT_EQ(fentry_res[i], 1, "fentry result");
+ ASSERT_EQ(fexit_res[i], 1, "fexit result");
}
close_prog:
- fentry_test__destroy(fentry_skel);
- fexit_test__destroy(fexit_skel);
+ fentry_test_lskel__destroy(fentry_skel);
+ fexit_test_lskel__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 04ebbf1cb390..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,37 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fentry_many_args.skel.h"
-void test_fentry_test(void)
+static int fentry_test_common(struct fentry_test_lskel *fentry_skel)
{
- struct fentry_test *fentry_skel = NULL;
int err, prog_fd, i;
- __u32 duration = 0, retval;
+ int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
- goto cleanup;
+ err = fentry_test_lskel__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_attach"))
+ return err;
- err = fentry_test__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
- goto cleanup;
+ /* Check that already linked program can't be attached again. */
+ link_fd = fentry_test_lskel__test1__attach(fentry_skel);
+ if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
+ return -1;
- prog_fd = bpf_program__fd(fentry_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fentry_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fentry_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fentry_result"))
+ return -1;
}
+ fentry_test_lskel__detach(fentry_skel);
+
+ /* zero results for re-attach test */
+ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss));
+ return 0;
+}
+
+static void fentry_test(void)
+{
+ struct fentry_test_lskel *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+ goto cleanup;
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto cleanup;
+
+ err = fentry_test_common(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_first_attach"))
+ goto cleanup;
+
+ err = fentry_test_common(fentry_skel);
+ ASSERT_OK(err, "fentry_second_attach");
+
cleanup:
- fentry_test__destroy(fentry_skel);
+ fentry_test_lskel__destroy(fentry_skel);
+}
+
+static void fentry_many_args(void)
+{
+ struct fentry_many_args *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_many_args_skel_load"))
+ goto cleanup;
+
+ err = fentry_many_args__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fentry_skel->bss->test1_result, 1,
+ "fentry_many_args_result1");
+ ASSERT_EQ(fentry_skel->bss->test2_result, 1,
+ "fentry_many_args_result2");
+ ASSERT_EQ(fentry_skel->bss->test3_result, 1,
+ "fentry_many_args_result3");
+
+cleanup:
+ fentry_many_args__destroy(fentry_skel);
+}
+
+void test_fentry_test(void)
+{
+ if (test__start_subtest("fentry"))
+ fentry_test();
+ if (test__start_subtest("fentry_many_args"))
+ fentry_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 5c0448910426..f29fc789c14b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -3,6 +3,9 @@
#include <test_progs.h>
#include <network_helpers.h>
#include <bpf/btf.h>
+#include "bind4_prog.skel.h"
+#include "freplace_progmap.skel.h"
+#include "xdp_dummy.skel.h"
typedef int (*test_cb)(struct bpf_object *obj);
@@ -58,42 +61,89 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
test_cb cb)
{
struct bpf_object *obj = NULL, *tgt_obj;
- struct bpf_program **prog = NULL;
+ __u32 tgt_prog_id, info_len;
+ struct bpf_prog_info prog_info = {};
+ struct bpf_program **prog = NULL, *p;
struct bpf_link **link = NULL;
- __u32 duration = 0, retval;
int err, tgt_fd, i;
-
- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ struct btf *btf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
- target_obj_file, err, errno))
+ if (!ASSERT_OK(err, "tgt_prog_load"))
return;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .attach_prog_fd = tgt_fd,
- );
+
+ info_len = sizeof(prog_info);
+ err = bpf_prog_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+ if (!ASSERT_OK(err, "tgt_fd_get_info"))
+ goto close_prog;
+
+ tgt_prog_id = prog_info.id;
+ btf = bpf_object__btf(tgt_obj);
link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ if (!ASSERT_OK_PTR(link, "link_ptr"))
+ goto close_prog;
+
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
+ if (!ASSERT_OK_PTR(prog, "prog_ptr"))
goto close_prog;
- obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
+ bpf_object__for_each_program(p, obj) {
+ err = bpf_program__set_attach_target(p, tgt_fd, NULL);
+ ASSERT_OK(err, "set_attach_target");
+ }
+
err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ if (!ASSERT_OK(err, "obj_load"))
goto close_prog;
for (i = 0; i < prog_cnt; i++) {
- prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ struct bpf_link_info link_info;
+ struct bpf_program *pos;
+ const char *pos_sec_name;
+ char *tgt_name;
+ __s32 btf_id;
+
+ tgt_name = strstr(prog_name[i], "/");
+ if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
goto close_prog;
+ btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
+ prog[i] = NULL;
+ bpf_object__for_each_program(pos, obj) {
+ pos_sec_name = bpf_program__section_name(pos);
+ if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) {
+ prog[i] = pos;
+ break;
+ }
+ }
+ if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
+ goto close_prog;
+
link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(link[i], "attach_trace"))
goto close_prog;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link[i]),
+ &link_info, &info_len);
+ ASSERT_OK(err, "link_fd_get_info");
+ ASSERT_EQ(link_info.tracing.attach_type,
+ bpf_program__expected_attach_type(prog[i]),
+ "link_attach_type");
+ ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+ ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
}
if (cb) {
@@ -105,21 +155,17 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
if (!run_prog)
goto close_prog;
- err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
+ ASSERT_OK(err, "prog_run");
+ ASSERT_EQ(topts.retval, 0, "prog_run_ret");
if (check_data_map(obj, prog_cnt, false))
goto close_prog;
close_prog:
for (i = 0; i < prog_cnt; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
bpf_object__close(tgt_obj);
free(link);
free(prog);
@@ -130,8 +176,8 @@ static void test_target_no_callees(void)
const char *prog_name[] = {
"fexit/test_pkt_md_access",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
- "./test_pkt_md_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.bpf.o",
+ "./test_pkt_md_access.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, true, NULL);
}
@@ -144,8 +190,8 @@ static void test_target_yes_callees(void)
"fexit/test_pkt_access_subprog2",
"fexit/test_pkt_access_subprog3",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
- "./test_pkt_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, true, NULL);
}
@@ -162,8 +208,8 @@ static void test_func_replace(void)
"freplace/get_constant",
"freplace/test_pkt_write_access_subprog",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
- "./test_pkt_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, true, NULL);
}
@@ -173,42 +219,44 @@ static void test_func_replace_verify(void)
const char *prog_name[] = {
"freplace/do_bind",
};
- test_fexit_bpf2bpf_common("./freplace_connect4.o",
- "./connect4_prog.o",
+ test_fexit_bpf2bpf_common("./freplace_connect4.bpf.o",
+ "./connect4_prog.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, false, NULL);
}
static int test_second_attach(struct bpf_object *obj)
{
- const char *prog_name = "freplace/get_constant";
- const char *tgt_name = prog_name + 9; /* cut off freplace/ */
- const char *tgt_obj_file = "./test_pkt_access.o";
+ const char *prog_name = "security_new_get_constant";
+ const char *tgt_name = "get_constant";
+ const char *tgt_obj_file = "./test_pkt_access.bpf.o";
struct bpf_program *prog = NULL;
struct bpf_object *tgt_obj;
- __u32 duration = 0, retval;
struct bpf_link *link;
int err = 0, tgt_fd;
-
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
+
+ prog = bpf_object__find_program_by_name(obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "find_prog"))
return -ENOENT;
- err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
- tgt_obj_file, err, errno))
+ if (!ASSERT_OK(err, "second_prog_load"))
return err;
link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
- if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ if (!ASSERT_OK_PTR(link, "second_link"))
goto out;
- err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
+ if (!ASSERT_OK(err, "ipv6 test_run"))
+ goto out;
+ if (!ASSERT_OK(topts.retval, "ipv6 retval"))
goto out;
err = check_data_map(obj, 1, true);
@@ -226,8 +274,8 @@ static void test_func_replace_multi(void)
const char *prog_name[] = {
"freplace/get_constant",
};
- test_fexit_bpf2bpf_common("./freplace_get_constant.o",
- "./test_pkt_access.o",
+ test_fexit_bpf2bpf_common("./freplace_get_constant.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, true, test_second_attach);
}
@@ -235,45 +283,47 @@ static void test_func_replace_multi(void)
static void test_fmod_ret_freplace(void)
{
struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
- const char *freplace_name = "./freplace_get_constant.o";
- const char *fmod_ret_name = "./fmod_ret_freplace.o";
+ const char *freplace_name = "./freplace_get_constant.bpf.o";
+ const char *fmod_ret_name = "./fmod_ret_freplace.bpf.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
- const char *tgt_name = "./test_pkt_access.o";
+ const char *tgt_name = "./test_pkt_access.bpf.o";
struct bpf_link *freplace_link = NULL;
struct bpf_program *prog;
__u32 duration = 0;
- int err, pkt_fd;
+ int err, pkt_fd, attach_prog_fd;
- err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
/* the target prog should load fine */
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
tgt_name, err, errno))
return;
- opts.attach_prog_fd = pkt_fd;
- freplace_obj = bpf_object__open_file(freplace_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
- "failed to open %s: %ld\n", freplace_name,
- PTR_ERR(freplace_obj)))
+ freplace_obj = bpf_object__open_file(freplace_name, NULL);
+ if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
goto out;
+ prog = bpf_object__next_program(freplace_obj, NULL);
+ err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+ ASSERT_OK(err, "freplace__set_attach_target");
+
err = bpf_object__load(freplace_obj);
if (CHECK(err, "freplace_obj_load", "err %d\n", err))
goto out;
- prog = bpf_program__next(NULL, freplace_obj);
freplace_link = bpf_program__attach_trace(prog);
- if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
goto out;
- opts.attach_prog_fd = bpf_program__fd(prog);
- fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
- "failed to open %s: %ld\n", fmod_ret_name,
- PTR_ERR(fmod_obj)))
+ fmod_obj = bpf_object__open_file(fmod_ret_name, NULL);
+ if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
goto out;
+ attach_prog_fd = bpf_program__fd(prog);
+ prog = bpf_object__next_program(fmod_obj, NULL);
+ err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL);
+ ASSERT_OK(err, "fmod_ret_set_attach_target");
+
err = bpf_object__load(fmod_obj);
if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
goto out;
@@ -291,66 +341,235 @@ static void test_func_sockmap_update(void)
const char *prog_name[] = {
"freplace/cls_redirect",
};
- test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
- "./test_cls_redirect.o",
+ test_fexit_bpf2bpf_common("./freplace_cls_redirect.bpf.o",
+ "./test_cls_redirect.bpf.o",
ARRAY_SIZE(prog_name),
prog_name, false, NULL);
}
static void test_obj_load_failure_common(const char *obj_file,
- const char *target_obj_file)
-
+ const char *target_obj_file,
+ const char *exp_msg)
{
/*
* standalone test that asserts failure to load freplace prog
* because of invalid return code.
*/
struct bpf_object *obj = NULL, *pkt_obj;
+ struct bpf_program *prog;
+ char log_buf[64 * 1024];
int err, pkt_fd;
__u32 duration = 0;
- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
/* the target prog should load fine */
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
target_obj_file, err, errno))
return;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .attach_prog_fd = pkt_fd,
- );
-
- obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
+ prog = bpf_object__next_program(obj, NULL);
+ err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+ ASSERT_OK(err, "set_attach_target");
+
+ log_buf[0] = '\0';
+ if (exp_msg)
+ bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+ if (env.verbosity > VERBOSE_NONE)
+ bpf_program__set_log_level(prog, 2);
+
/* It should fail to load the program */
err = bpf_object__load(obj);
+ if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */
+ printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf);
if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
goto close_prog;
+ if (exp_msg)
+ ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg");
close_prog:
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_object__close(obj);
bpf_object__close(pkt_obj);
}
static void test_func_replace_return_code(void)
{
/* test invalid return code in the replaced program */
- test_obj_load_failure_common("./freplace_connect_v4_prog.o",
- "./connect4_prog.o");
+ test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o",
+ "./connect4_prog.bpf.o", NULL);
}
static void test_func_map_prog_compatibility(void)
{
/* test with spin lock map value in the replaced program */
- test_obj_load_failure_common("./freplace_attach_probe.o",
- "./test_attach_probe.o");
+ test_obj_load_failure_common("./freplace_attach_probe.bpf.o",
+ "./test_attach_probe.bpf.o", NULL);
+}
+
+static void test_func_replace_unreliable(void)
+{
+ /* freplace'ing unreliable main prog should fail with error
+ * "Cannot replace static functions"
+ */
+ test_obj_load_failure_common("freplace_unreliable_prog.bpf.o",
+ "./verifier_btf_unreliable_prog.bpf.o",
+ "Cannot replace static functions");
+}
+
+static void test_func_replace_global_func(void)
+{
+ const char *prog_name[] = {
+ "freplace/test_pkt_access",
+ };
+
+ test_fexit_bpf2bpf_common("./freplace_global_func.bpf.o",
+ "./test_pkt_access.bpf.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
+static int find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ struct btf *btf;
+ int ret;
+
+ ret = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
+ if (ret)
+ return ret;
+
+ if (!info.btf_id)
+ return -EINVAL;
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ ret = libbpf_get_error(btf);
+ if (ret)
+ return ret;
+
+ ret = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ return ret;
+}
+
+static int load_fentry(int attach_prog_fd, int attach_btf_id)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .attach_prog_fd = attach_prog_fd,
+ .attach_btf_id = attach_btf_id,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_TRACING,
+ "bind4_fentry",
+ "GPL",
+ insns,
+ ARRAY_SIZE(insns),
+ &opts);
+}
+
+static void test_fentry_to_cgroup_bpf(void)
+{
+ struct bind4_prog *skel = NULL;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int cgroup_fd = -1;
+ int fentry_fd = -1;
+ int btf_id;
+
+ cgroup_fd = test__join_cgroup("/fentry_to_cgroup_bpf");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+
+ skel = bind4_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto cleanup;
+
+ skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.bind_v4_prog, "bpf_program__attach_cgroup"))
+ goto cleanup;
+
+ btf_id = find_prog_btf_id("bind_v4_prog", bpf_program__fd(skel->progs.bind_v4_prog));
+ if (!ASSERT_GE(btf_id, 0, "find_prog_btf_id"))
+ goto cleanup;
+
+ fentry_fd = load_fentry(bpf_program__fd(skel->progs.bind_v4_prog), btf_id);
+ if (!ASSERT_GE(fentry_fd, 0, "load_fentry"))
+ goto cleanup;
+
+ /* Make sure bpf_prog_get_info_by_fd works correctly when attaching
+ * to another BPF program.
+ */
+
+ ASSERT_OK(bpf_prog_get_info_by_fd(fentry_fd, &info, &info_len),
+ "bpf_prog_get_info_by_fd");
+
+ ASSERT_EQ(info.btf_id, 0, "info.btf_id");
+ ASSERT_EQ(info.attach_btf_id, btf_id, "info.attach_btf_id");
+ ASSERT_GT(info.attach_btf_obj_id, 0, "info.attach_btf_obj_id");
+
+cleanup:
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ if (fentry_fd >= 0)
+ close(fentry_fd);
+ bind4_prog__destroy(skel);
+}
+
+static void test_func_replace_progmap(void)
+{
+ struct bpf_cpumap_val value = { .qsize = 1 };
+ struct freplace_progmap *skel = NULL;
+ struct xdp_dummy *tgt_skel = NULL;
+ __u32 key = 0;
+ int err;
+
+ skel = freplace_progmap__open();
+ if (!ASSERT_OK_PTR(skel, "prog_open"))
+ return;
+
+ tgt_skel = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(tgt_skel, "tgt_prog_load"))
+ goto out;
+
+ err = bpf_program__set_attach_target(skel->progs.xdp_cpumap_prog,
+ bpf_program__fd(tgt_skel->progs.xdp_dummy_prog),
+ "xdp_dummy_prog");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = freplace_progmap__load(skel);
+ if (!ASSERT_OK(err, "obj_load"))
+ goto out;
+
+ /* Prior to fixing the kernel, loading the PROG_TYPE_EXT 'redirect'
+ * program above will cause the map owner type of 'cpumap' to be set to
+ * PROG_TYPE_EXT. This in turn will cause the bpf_map_update_elem()
+ * below to fail, because the program we are inserting into the map is
+ * of PROG_TYPE_XDP. After fixing the kernel, the initial ownership will
+ * be correctly resolved to the *target* of the PROG_TYPE_EXT program
+ * (i.e., PROG_TYPE_XDP) and the map update will succeed.
+ */
+ value.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_drop_prog);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.cpu_map),
+ &key, &value, 0);
+ ASSERT_OK(err, "map_update");
+
+out:
+ xdp_dummy__destroy(tgt_skel);
+ freplace_progmap__destroy(skel);
}
-void test_fexit_bpf2bpf(void)
+/* NOTE: affect other tests, must run in serial mode */
+void serial_test_fexit_bpf2bpf(void)
{
if (test__start_subtest("target_no_callees"))
test_target_no_callees();
@@ -366,8 +585,16 @@ void test_fexit_bpf2bpf(void)
test_func_replace_return_code();
if (test__start_subtest("func_map_prog_compatibility"))
test_func_map_prog_compatibility();
+ if (test__start_subtest("func_replace_unreliable"))
+ test_func_replace_unreliable();
if (test__start_subtest("func_replace_multi"))
test_func_replace_multi();
if (test__start_subtest("fmod_ret_freplace"))
test_fmod_ret_freplace();
+ if (test__start_subtest("func_replace_global_func"))
+ test_func_replace_global_func();
+ if (test__start_subtest("fentry_to_cgroup_bpf"))
+ test_fentry_to_cgroup_bpf();
+ if (test__start_subtest("func_replace_progmap"))
+ test_func_replace_progmap();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
new file mode 100644
index 000000000000..552a0875ca6d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include "fexit_sleep.lskel.h"
+
+static int do_sleep(void *skel)
+{
+ struct fexit_sleep_lskel *fexit_skel = skel;
+ struct timespec ts1 = { .tv_nsec = 1 };
+ struct timespec ts2 = { .tv_sec = 10 };
+
+ fexit_skel->bss->pid = getpid();
+ (void)syscall(__NR_nanosleep, &ts1, NULL);
+ (void)syscall(__NR_nanosleep, &ts2, NULL);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+void test_fexit_sleep(void)
+{
+ struct fexit_sleep_lskel *fexit_skel = NULL;
+ int wstatus, duration = 0;
+ pid_t cpid;
+ char *child_stack = NULL;
+ int err, fexit_cnt;
+
+ fexit_skel = fexit_sleep_lskel__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
+
+ err = fexit_sleep_lskel__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
+
+ child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (!ASSERT_NEQ(child_stack, MAP_FAILED, "mmap"))
+ goto cleanup;
+
+ cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ goto cleanup;
+
+ /* wait until first sys_nanosleep ends and second sys_nanosleep starts */
+ while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2);
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+ /* close progs and detach them. That will trigger two nop5->jmp5 rewrites
+ * in the trampolines to skip nanosleep_fexit prog.
+ * The nanosleep_fentry prog will get detached first.
+ * The nanosleep_fexit prog will get detached second.
+ * Detaching will trigger freeing of both progs JITed images.
+ * There will be two dying bpf_tramp_image-s, but only the initial
+ * bpf_tramp_image (with both _fentry and _fexit progs will be stuck
+ * waiting for percpu_ref_kill to confirm). The other one
+ * will be freed quickly.
+ */
+ close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+ close(fexit_skel->progs.nanosleep_fexit.prog_fd);
+ fexit_sleep_lskel__detach(fexit_skel);
+
+ /* kill the thread to unwind sys_nanosleep stack through the trampoline */
+ kill(cpid, 9);
+
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ goto cleanup;
+ if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
+ goto cleanup;
+
+ /* The bypassed nanosleep_fexit prog shouldn't have executed.
+ * Unlike progs the maps were not freed and directly accessible.
+ */
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+cleanup:
+ munmap(child_stack, STACK_SIZE);
+ fexit_sleep_lskel__destroy(fexit_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 7c9b62e971f1..14c91b6f1e83 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -1,76 +1,60 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "bpf_util.h"
-/* that's kernel internal BPF_MAX_TRAMP_PROGS define */
-#define CNT 38
-
-void test_fexit_stress(void)
+void serial_test_fexit_stress(void)
{
- char test_skb[128] = {};
- int fexit_fd[CNT] = {};
- int link_fd[CNT] = {};
- __u32 duration = 0;
- char error[4096];
- __u32 prog_ret;
- int err, i, filter_fd;
+ int bpf_max_tramp_links, err, i;
+ int *fd, *fexit_fd, *link_fd;
+
+ bpf_max_tramp_links = get_bpf_max_tramp_links();
+ if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+ return;
+ fd = calloc(bpf_max_tramp_links * 2, sizeof(*fd));
+ if (!ASSERT_OK_PTR(fd, "fd"))
+ return;
+ fexit_fd = fd;
+ link_fd = fd + bpf_max_tramp_links;
const struct bpf_insn trace_program[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_TRACING,
- .license = "GPL",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.expected_attach_type = BPF_TRACE_FEXIT,
- };
+ );
- const struct bpf_insn skb_program[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
-
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
- load_attr.expected_attach_type);
- if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+ trace_opts.expected_attach_type);
+ if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id"))
goto out;
- load_attr.attach_btf_id = err;
-
- for (i = 0; i < CNT; i++) {
- fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error));
- if (CHECK(fexit_fd[i] < 0, "fexit loaded",
- "failed: %d errno %d\n", fexit_fd[i], errno))
+ trace_opts.attach_btf_id = err;
+
+ for (i = 0; i < bpf_max_tramp_links; i++) {
+ fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ ARRAY_SIZE(trace_program),
+ &trace_opts);
+ if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
goto out;
- link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
- if (CHECK(link_fd[i] < 0, "fexit attach failed",
- "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+ link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL);
+ if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
goto out;
}
- filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
- if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
- filter_fd, errno))
- goto out;
+ err = bpf_prog_test_run_opts(fexit_fd[0], &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
- close(filter_fd);
- CHECK_FAIL(err);
out:
- for (i = 0; i < CNT; i++) {
- if (link_fd[i])
+ for (i = 0; i < bpf_max_tramp_links; i++) {
+ if (link_fd[i] > 0)
close(link_fd[i]);
- if (fexit_fd[i])
+ if (fexit_fd[i] > 0)
close(fexit_fd[i]);
}
+ free(fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 78d7a2765c27..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,37 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
+#include "fexit_many_args.skel.h"
-void test_fexit_test(void)
+static int fexit_test_common(struct fexit_test_lskel *fexit_skel)
{
- struct fexit_test *fexit_skel = NULL;
int err, prog_fd, i;
- __u32 duration = 0, retval;
+ int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fexit_skel = fexit_test__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
- goto cleanup;
+ err = fexit_test_lskel__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_attach"))
+ return err;
- err = fexit_test__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
- goto cleanup;
+ /* Check that already linked program can't be attached again. */
+ link_fd = fexit_test_lskel__test1__attach(fexit_skel);
+ if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
+ return -1;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fexit_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fexit_result"))
+ return -1;
}
+ fexit_test_lskel__detach(fexit_skel);
+
+ /* zero results for re-attach test */
+ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss));
+ return 0;
+}
+
+static void fexit_test(void)
+{
+ struct fexit_test_lskel *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+ goto cleanup;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
+ goto cleanup;
+
+ err = fexit_test_common(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_first_attach"))
+ goto cleanup;
+
+ err = fexit_test_common(fexit_skel);
+ ASSERT_OK(err, "fexit_second_attach");
+
cleanup:
- fexit_test__destroy(fexit_skel);
+ fexit_test_lskel__destroy(fexit_skel);
+}
+
+static void fexit_many_args(void)
+{
+ struct fexit_many_args *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_many_args_skel_load"))
+ goto cleanup;
+
+ err = fexit_many_args__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fexit_skel->bss->test1_result, 1,
+ "fexit_many_args_result1");
+ ASSERT_EQ(fexit_skel->bss->test2_result, 1,
+ "fexit_many_args_result2");
+ ASSERT_EQ(fexit_skel->bss->test3_result, 1,
+ "fexit_many_args_result3");
+
+cleanup:
+ fexit_many_args__destroy(fexit_skel);
+}
+
+void test_fexit_test(void)
+{
+ if (test__start_subtest("fexit"))
+ fexit_test();
+ if (test__start_subtest("fexit_many_args"))
+ fexit_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
new file mode 100644
index 000000000000..bd7658958004
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "fib_lookup.skel.h"
+
+#define NS_TEST "fib_lookup_ns"
+#define IPV6_IFACE_ADDR "face::face"
+#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
+#define IPV6_ADDR_DST "face::3"
+#define IPV6_NUD_FAILED_ADDR "face::1"
+#define IPV6_NUD_STALE_ADDR "face::2"
+#define IPV4_IFACE_ADDR "10.0.0.254"
+#define IPV4_IFACE_ADDR_SEC "10.1.0.254"
+#define IPV4_ADDR_DST "10.2.0.254"
+#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
+#define IPV4_NUD_STALE_ADDR "10.0.0.2"
+#define IPV4_TBID_ADDR "172.0.0.254"
+#define IPV4_TBID_NET "172.0.0.0"
+#define IPV4_TBID_DST "172.0.0.2"
+#define IPV6_TBID_ADDR "fd00::FFFF"
+#define IPV6_TBID_NET "fd00::"
+#define IPV6_TBID_DST "fd00::2"
+#define MARK_NO_POLICY 33
+#define MARK 42
+#define MARK_TABLE "200"
+#define IPV4_REMOTE_DST "1.2.3.4"
+#define IPV4_LOCAL "10.4.0.3"
+#define IPV4_GW1 "10.4.0.1"
+#define IPV4_GW2 "10.4.0.2"
+#define IPV6_REMOTE_DST "be:ef::b0:10"
+#define IPV6_LOCAL "fd01::3"
+#define IPV6_GW1 "fd01::1"
+#define IPV6_GW2 "fd01::2"
+#define DMAC "11:11:11:11:11:11"
+#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
+#define DMAC2 "01:01:01:01:01:01"
+#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, }
+
+struct fib_lookup_test {
+ const char *desc;
+ const char *daddr;
+ int expected_ret;
+ const char *expected_src;
+ const char *expected_dst;
+ int lookup_flags;
+ __u32 tbid;
+ __u8 dmac[6];
+ __u32 mark;
+};
+
+static const struct fib_lookup_test tests[] = {
+ { .desc = "IPv6 failed neigh",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+ { .desc = "IPv6 stale neigh",
+ .daddr = IPV6_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .dmac = DMAC_INIT, },
+ { .desc = "IPv6 skip neigh",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 failed neigh",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+ { .desc = "IPv4 stale neigh",
+ .daddr = IPV4_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .dmac = DMAC_INIT, },
+ { .desc = "IPv4 skip neigh",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 TBID lookup failure",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv4 TBID lookup success",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv6 TBID lookup failure",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv6 TBID lookup success",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv4 set src addr from netdev",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set src addr from netdev",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 set prefsrc addr from route",
+ .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set prefsrc addr route",
+ .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ /* policy routing */
+ { .desc = "IPv4 policy routing, default",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 policy routing, mark doesn't point to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv4 policy routing, mark points to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, default",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 policy routing, mark doesn't point to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv6 policy routing, mark points to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+};
+
+static int setup_netns(void)
+{
+ int err;
+
+ SYS(fail, "ip link add veth1 type veth peer name veth2");
+ SYS(fail, "ip link set dev veth1 up");
+ SYS(fail, "ip link set dev veth2 up");
+
+ err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
+
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+
+ /* Setup for prefsrc IP addr selection */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
+
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
+
+ /* Setup for tbid lookup tests */
+ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
+ SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2);
+
+ SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR);
+ SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2);
+
+ err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/conf/veth1/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
+ goto fail;
+
+ /* Setup for policy routing tests */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_LOCAL);
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_LOCAL);
+ SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
+ SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
+ SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
+ SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
+ SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+ SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_lookup_params(struct bpf_fib_lookup *params,
+ const struct fib_lookup_test *test,
+ int ifindex)
+{
+ int ret;
+
+ memset(params, 0, sizeof(*params));
+
+ params->l4_protocol = IPPROTO_TCP;
+ params->ifindex = ifindex;
+ params->tbid = test->tbid;
+ params->mark = test->mark;
+
+ if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
+ params->family = AF_INET6;
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
+ return -1;
+ }
+
+ return 0;
+ }
+
+ ret = inet_pton(AF_INET, test->daddr, &params->ipv4_dst);
+ if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
+ return -1;
+ params->family = AF_INET;
+
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static void mac_str(char *b, const __u8 *mac)
+{
+ sprintf(b, "%02X:%02X:%02X:%02X:%02X:%02X",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+}
+
+static void assert_ip_address(int family, void *addr, const char *expected_str)
+{
+ char str[INET6_ADDRSTRLEN];
+ u8 expected_addr[16];
+ int addr_len = 0;
+ int ret;
+
+ switch (family) {
+ case AF_INET6:
+ ret = inet_pton(AF_INET6, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)");
+ addr_len = 16;
+ break;
+ case AF_INET:
+ ret = inet_pton(AF_INET, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)");
+ addr_len = 4;
+ break;
+ default:
+ PRINT_FAIL("invalid address family: %d", family);
+ break;
+ }
+
+ if (memcmp(addr, expected_addr, addr_len)) {
+ inet_ntop(family, addr, str, sizeof(str));
+ PRINT_FAIL("expected %s actual %s ", expected_str, str);
+ }
+}
+
+static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_src, expected);
+}
+
+static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_dst, expected);
+}
+
+void test_fib_lookup(void)
+{
+ struct bpf_fib_lookup *fib_params;
+ struct nstoken *nstoken = NULL;
+ struct __sk_buff skb = { };
+ struct fib_lookup *skel;
+ int prog_fd, err, ret, i;
+
+ /* The test does not use the skb->data, so
+ * use pkt_v6 for both v6 and v4 test.
+ */
+ LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ skel = fib_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.fib_lookup);
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ if (setup_netns())
+ goto fail;
+
+ skb.ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(skb.ifindex, 0, "if_nametoindex(veth1)"))
+ goto fail;
+
+ fib_params = &skel->bss->fib_params;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ printf("Testing %s ", tests[i].desc);
+
+ if (set_lookup_params(fib_params, &tests[i], skb.ifindex))
+ continue;
+
+ skel->bss->fib_lookup_ret = -1;
+ skel->bss->lookup_flags = tests[i].lookup_flags;
+
+ err = bpf_prog_test_run_opts(prog_fd, &run_opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ continue;
+
+ ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
+ "fib_lookup_ret");
+
+ if (tests[i].expected_src)
+ assert_src_ip(fib_params, tests[i].expected_src);
+
+ if (tests[i].expected_dst)
+ assert_dst_ip(fib_params, tests[i].expected_dst);
+
+ ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
+ if (!ASSERT_EQ(ret, 0, "dmac not match")) {
+ char expected[18], actual[18];
+
+ mac_str(expected, tests[i].dmac);
+ mac_str(actual, fib_params->dmac);
+ printf("dmac expected %s actual %s ", expected, actual);
+ }
+
+ // ensure tbid is zero'd out after fib lookup.
+ if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) {
+ if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0,
+ "expected fib_params.tbid to be zero"))
+ goto fail;
+ }
+ }
+
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " NS_TEST);
+ fib_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c
new file mode 100644
index 000000000000..5cde32b35da4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "file_reader.skel.h"
+#include "file_reader_fail.skel.h"
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const char *user_ptr = "hello world";
+char file_contents[256000];
+
+void *get_executable_base_addr(void)
+{
+ Dl_info info;
+
+ if (!dladdr((void *)&get_executable_base_addr, &info)) {
+ fprintf(stderr, "dladdr failed\n");
+ return NULL;
+ }
+
+ return info.dli_fbase;
+}
+
+static int initialize_file_contents(void)
+{
+ int fd, page_sz = sysconf(_SC_PAGESIZE);
+ ssize_t n = 0, cur, off;
+ void *addr;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
+ return 1;
+
+ do {
+ cur = read(fd, file_contents + n, sizeof(file_contents) - n);
+ if (!ASSERT_GT(cur, 0, "read success"))
+ break;
+ n += cur;
+ } while (n < sizeof(file_contents));
+
+ close(fd);
+
+ if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
+ return 1;
+
+ addr = get_executable_base_addr();
+ if (!ASSERT_NEQ(addr, NULL, "get executable address"))
+ return 1;
+
+ /* page-align base file address */
+ addr = (void *)((unsigned long)addr & ~(page_sz - 1));
+
+ /*
+ * Page out range 0..512K, use 0..256K for positive tests and
+ * 256K..512K for negative tests expecting page faults
+ */
+ for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
+ if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
+ "madvise pageout"))
+ return errno;
+ }
+
+ return 0;
+}
+
+static void run_test(const char *prog_name)
+{
+ struct file_reader *skel;
+ struct bpf_program *prog;
+ int err, fd;
+
+ err = initialize_file_contents();
+ if (!ASSERT_OK(err, "initialize file contents"))
+ return;
+
+ skel = file_reader__open();
+ if (!ASSERT_OK_PTR(skel, "file_reader__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
+ }
+
+ memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
+ skel->bss->pid = getpid();
+
+ err = file_reader__load(skel);
+ if (!ASSERT_OK(err, "file_reader__load"))
+ goto cleanup;
+
+ err = file_reader__attach(skel);
+ if (!ASSERT_OK(err, "file_reader__attach"))
+ goto cleanup;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (fd >= 0)
+ close(fd);
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+ ASSERT_EQ(skel->bss->run_success, 1, "run_success");
+cleanup:
+ file_reader__destroy(skel);
+}
+
+void test_file_reader(void)
+{
+ if (test__start_subtest("on_open_expect_fault"))
+ run_test("on_open_expect_fault");
+
+ if (test__start_subtest("on_open_validate_file_read"))
+ run_test("on_open_validate_file_read");
+
+ if (test__start_subtest("negative"))
+ RUN_TESTS(file_reader_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
new file mode 100644
index 000000000000..e40114620751
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -0,0 +1,652 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/limits.h>
+#include <test_progs.h>
+#include "trace_helpers.h"
+#include "test_fill_link_info.skel.h"
+#include "bpf/libbpf_internal.h"
+
+#define TP_CAT "sched"
+#define TP_NAME "sched_switch"
+
+static const char *kmulti_syms[] = {
+ "bpf_fentry_test2",
+ "bpf_fentry_test1",
+ "bpf_fentry_test3",
+};
+#define KMULTI_CNT ARRAY_SIZE(kmulti_syms)
+static __u64 kmulti_addrs[KMULTI_CNT];
+static __u64 kmulti_cookies[] = { 3, 1, 2 };
+
+#define KPROBE_FUNC "bpf_fentry_test1"
+static __u64 kprobe_addr;
+
+#define UPROBE_FILE "/proc/self/exe"
+static ssize_t uprobe_offset;
+/* uprobe attach point */
+static noinline void uprobe_func(void)
+{
+ asm volatile ("");
+}
+
+#define PERF_EVENT_COOKIE 0xdeadbeef
+
+static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
+ ssize_t offset, ssize_t entry_offset)
+{
+ ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ char buf[PATH_MAX];
+ int err;
+
+ memset(&info, 0, sizeof(info));
+ buf[0] = '\0';
+
+again:
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type"))
+ return -1;
+ if (!ASSERT_EQ(info.perf_event.type, type, "perf_type_match"))
+ return -1;
+
+ switch (info.perf_event.type) {
+ case BPF_PERF_EVENT_KPROBE:
+ case BPF_PERF_EVENT_KRETPROBE:
+ ASSERT_EQ(info.perf_event.kprobe.offset, offset, "kprobe_offset");
+
+ /* In case kernel.kptr_restrict is not permitted or MAX_SYMS is reached */
+ if (addr)
+ ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset,
+ "kprobe_addr");
+
+ ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie");
+
+ ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1,
+ "name_len");
+ if (!info.perf_event.kprobe.func_name) {
+ info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
+ info.perf_event.kprobe.name_len = sizeof(buf);
+ goto again;
+ }
+
+ err = strncmp(u64_to_ptr(info.perf_event.kprobe.func_name), KPROBE_FUNC,
+ strlen(KPROBE_FUNC));
+ ASSERT_EQ(err, 0, "cmp_kprobe_func_name");
+ break;
+ case BPF_PERF_EVENT_TRACEPOINT:
+ ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1,
+ "name_len");
+ if (!info.perf_event.tracepoint.tp_name) {
+ info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf);
+ info.perf_event.tracepoint.name_len = sizeof(buf);
+ goto again;
+ }
+
+ ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie");
+
+ err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME,
+ strlen(TP_NAME));
+ ASSERT_EQ(err, 0, "cmp_tp_name");
+ break;
+ case BPF_PERF_EVENT_UPROBE:
+ case BPF_PERF_EVENT_URETPROBE:
+ ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+ ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
+
+ ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
+ "name_len");
+ if (!info.perf_event.uprobe.file_name) {
+ info.perf_event.uprobe.file_name = ptr_to_u64(&buf);
+ info.perf_event.uprobe.name_len = sizeof(buf);
+ goto again;
+ }
+
+ ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie");
+
+ err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE,
+ strlen(UPROBE_FILE));
+ ASSERT_EQ(err, 0, "cmp_file_name");
+ break;
+ case BPF_PERF_EVENT_EVENT:
+ ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type");
+ ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config");
+ ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie");
+ break;
+ default:
+ err = -1;
+ break;
+ }
+ return err;
+}
+
+static void kprobe_fill_invalid_user_buffer(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ info.perf_event.kprobe.func_name = 0x1; /* invalid address */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_buff_and_len");
+
+ info.perf_event.kprobe.name_len = 64;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff");
+
+ info.perf_event.kprobe.func_name = 0;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_len");
+
+ ASSERT_EQ(info.perf_event.kprobe.addr, 0, "func_addr");
+ ASSERT_EQ(info.perf_event.kprobe.offset, 0, "func_offset");
+ ASSERT_EQ(info.perf_event.type, 0, "type");
+}
+
+static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
+ enum bpf_perf_event_type type,
+ bool invalid)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .attach_mode = PROBE_ATTACH_MODE_LINK,
+ .retprobe = type == BPF_PERF_EVENT_KRETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ ssize_t entry_offset = 0;
+ struct bpf_link *link;
+ int link_fd, err;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ if (!invalid) {
+ /* See also arch_adjust_kprobe_addr(). */
+ if (skel->kconfig->CONFIG_X86_KERNEL_IBT)
+ entry_offset = 4;
+ if (skel->kconfig->CONFIG_PPC64 &&
+ skel->kconfig->CONFIG_KPROBES_ON_FTRACE &&
+ !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE)
+ entry_offset = 4;
+ err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset);
+ ASSERT_OK(err, "verify_perf_link_info");
+ } else {
+ kprobe_fill_invalid_user_buffer(link_fd);
+ }
+ bpf_link__destroy(link);
+}
+
+static void test_tp_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err;
+
+ link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+}
+
+static void test_event_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err, pfd;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_PAGE_FAULTS,
+ .freq = 1,
+ .sample_freq = 1,
+ .size = sizeof(struct perf_event_attr),
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */,
+ -1 /* group id */, 0 /* flags */);
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ return;
+
+ link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_event"))
+ goto error;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+
+error:
+ close(pfd);
+}
+
+static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
+ enum bpf_perf_event_type type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = type == BPF_PERF_EVENT_URETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ const char *sema[1] = {
+ "uprobe_link_info_sema_1",
+ };
+ __u64 *ref_ctr_offset;
+ struct bpf_link *link;
+ int link_fd, err;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+ (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ opts.ref_ctr_offset = *ref_ctr_offset;
+ link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
+ 0, /* self pid */
+ UPROBE_FILE, uprobe_offset,
+ &opts);
+ if (!ASSERT_OK_PTR(link, "attach_uprobe"))
+ goto out;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+out:
+ free(ref_ctr_offset);
+}
+
+static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
+{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int flags, i, err;
+
+ memset(&info, 0, sizeof(info));
+
+again:
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_KPROBE_MULTI, "kmulti_type"))
+ return -1;
+
+ ASSERT_EQ(info.kprobe_multi.count, KMULTI_CNT, "func_cnt");
+ flags = info.kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN;
+ if (!retprobe)
+ ASSERT_EQ(flags, 0, "kmulti_flags");
+ else
+ ASSERT_NEQ(flags, 0, "kretmulti_flags");
+
+ if (!info.kprobe_multi.addrs) {
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ goto again;
+ }
+ for (i = 0; i < KMULTI_CNT; i++) {
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0,
+ "kmulti_cookies_value");
+ }
+ return 0;
+}
+
+static void verify_kmulti_invalid_user_buffer(int fd)
+{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int err, i;
+
+ memset(&info, 0, sizeof(info));
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "no_addr");
+
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.count = 0;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "no_cnt");
+
+ for (i = 0; i < KMULTI_CNT; i++)
+ addrs[i] = 0;
+ info.kprobe_multi.count = KMULTI_CNT - 1;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -ENOSPC, "smaller_cnt");
+ for (i = 0; i < KMULTI_CNT - 1; i++)
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(addrs[i], 0, "kmulti_addrs");
+
+ for (i = 0; i < KMULTI_CNT; i++)
+ addrs[i] = 0;
+ info.kprobe_multi.count = KMULTI_CNT + 1;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, 0, "bigger_cnt");
+ for (i = 0; i < KMULTI_CNT; i++)
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies");
+
+ /* cookies && !count */
+ info.kprobe_multi.count = 0;
+ info.kprobe_multi.addrs = ptr_to_u64(NULL);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_cookies_count");
+}
+
+static int symbols_cmp_r(const void *a, const void *b)
+{
+ const char **str_a = (const char **) a;
+ const char **str_b = (const char **) b;
+
+ return strcmp(*str_a, *str_b);
+}
+
+static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
+ bool retprobe, bool cookies,
+ bool invalid)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct bpf_link *link;
+ int link_fd, err;
+
+ opts.syms = kmulti_syms;
+ opts.cookies = cookies ? kmulti_cookies : NULL;
+ opts.cnt = KMULTI_CNT;
+ opts.retprobe = retprobe;
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ if (!invalid) {
+ err = verify_kmulti_link_info(link_fd, retprobe, cookies);
+ ASSERT_OK(err, "verify_kmulti_link_info");
+ } else {
+ verify_kmulti_invalid_user_buffer(link_fd);
+ }
+ bpf_link__destroy(link);
+}
+
+#define SEC(name) __attribute__((section(name), used))
+
+static short uprobe_link_info_sema_1 SEC(".probes");
+static short uprobe_link_info_sema_2 SEC(".probes");
+static short uprobe_link_info_sema_3 SEC(".probes");
+
+noinline void uprobe_link_info_func_1(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_1++;
+}
+
+noinline void uprobe_link_info_func_2(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_2++;
+}
+
+noinline void uprobe_link_info_func_3(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_3++;
+}
+
+static int
+verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets,
+ __u64 *cookies, __u64 *ref_ctr_offsets)
+{
+ char path[PATH_MAX], path_buf[PATH_MAX];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ __u64 ref_ctr_offsets_buf[3];
+ __u64 offsets_buf[3];
+ __u64 cookies_buf[3];
+ int i, err, bit;
+ __u32 count = 0;
+
+ memset(path, 0, sizeof(path));
+ err = readlink("/proc/self/exe", path, sizeof(path));
+ if (!ASSERT_NEQ(err, -1, "readlink"))
+ return -1;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ return -1;
+
+ ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count");
+ ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1,
+ "info.uprobe_multi.path_size");
+
+ for (bit = 0; bit < 8; bit++) {
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path = ptr_to_u64(path_buf);
+ info.uprobe_multi.path_size = sizeof(path_buf);
+ info.uprobe_multi.count = count;
+
+ if (bit & 0x1)
+ info.uprobe_multi.offsets = ptr_to_u64(offsets_buf);
+ if (bit & 0x2)
+ info.uprobe_multi.cookies = ptr_to_u64(cookies_buf);
+ if (bit & 0x4)
+ info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets_buf);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_UPROBE_MULTI, "info.type"))
+ return -1;
+
+ ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid");
+ ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count");
+ ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN,
+ retprobe, "info.uprobe_multi.flags.retprobe");
+ ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size");
+ ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path");
+
+ for (i = 0; i < info.uprobe_multi.count; i++) {
+ if (info.uprobe_multi.offsets)
+ ASSERT_EQ(offsets_buf[i], offsets[i], "info.uprobe_multi.offsets");
+ if (info.uprobe_multi.cookies)
+ ASSERT_EQ(cookies_buf[i], cookies[i], "info.uprobe_multi.cookies");
+ if (info.uprobe_multi.ref_ctr_offsets) {
+ ASSERT_EQ(ref_ctr_offsets_buf[i], ref_ctr_offsets[i],
+ "info.uprobe_multi.ref_ctr_offsets");
+ }
+ }
+ count = count ?: info.uprobe_multi.count;
+ }
+
+ return 0;
+}
+
+static void verify_umulti_invalid_user_buffer(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ __u64 buf[3];
+ int err;
+
+ /* upath_size defined, not path */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path_size = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "failed_upath_size");
+
+ /* path defined, but small */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path = ptr_to_u64(buf);
+ info.uprobe_multi.path_size = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_LT(err, 0, "failed_upath_small");
+
+ /* path has wrong pointer */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path_size = PATH_MAX;
+ info.uprobe_multi.path = 123;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "failed_bad_path_ptr");
+
+ /* count zero, with offsets */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = ptr_to_u64(buf);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "failed_count");
+
+ /* offsets not big enough */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = ptr_to_u64(buf);
+ info.uprobe_multi.count = 2;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -ENOSPC, "failed_small_count");
+
+ /* offsets has wrong pointer */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = 123;
+ info.uprobe_multi.count = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "failed_wrong_offsets");
+}
+
+static void test_uprobe_multi_fill_link_info(struct test_fill_link_info *skel,
+ bool retprobe, bool invalid)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .retprobe = retprobe,
+ );
+ const char *syms[3] = {
+ "uprobe_link_info_func_1",
+ "uprobe_link_info_func_2",
+ "uprobe_link_info_func_3",
+ };
+ __u64 cookies[3] = {
+ 0xdead,
+ 0xbeef,
+ 0xcafe,
+ };
+ const char *sema[3] = {
+ "uprobe_link_info_sema_1",
+ "uprobe_link_info_sema_2",
+ "uprobe_link_info_sema_3",
+ };
+ __u64 *offsets = NULL, *ref_ctr_offsets;
+ struct bpf_link *link;
+ int link_fd, err;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 3, sema,
+ (unsigned long **) &ref_ctr_offsets, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 3, syms,
+ (unsigned long **) &offsets, STT_FUNC);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func"))
+ goto out;
+
+ opts.syms = syms;
+ opts.cookies = &cookies[0];
+ opts.ref_ctr_offsets = (unsigned long *) &ref_ctr_offsets[0];
+ opts.cnt = ARRAY_SIZE(syms);
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.umulti_run, 0,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto out;
+
+ link_fd = bpf_link__fd(link);
+ if (invalid)
+ verify_umulti_invalid_user_buffer(link_fd);
+ else
+ verify_umulti_link_info(link_fd, retprobe, offsets, cookies, ref_ctr_offsets);
+
+ bpf_link__destroy(link);
+out:
+ free(ref_ctr_offsets);
+ free(offsets);
+}
+
+void test_fill_link_info(void)
+{
+ struct test_fill_link_info *skel;
+ int i;
+
+ skel = test_fill_link_info__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* load kallsyms to compare the addr */
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ goto cleanup;
+
+ kprobe_addr = ksym_get_addr(KPROBE_FUNC);
+ if (test__start_subtest("kprobe_link_info"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, false);
+ if (test__start_subtest("kretprobe_link_info"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KRETPROBE, false);
+ if (test__start_subtest("kprobe_invalid_ubuff"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true);
+ if (test__start_subtest("tracepoint_link_info"))
+ test_tp_fill_link_info(skel);
+ if (test__start_subtest("event_link_info"))
+ test_event_fill_link_info(skel);
+
+ uprobe_offset = get_uprobe_offset(&uprobe_func);
+ if (test__start_subtest("uprobe_link_info"))
+ test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_UPROBE);
+ if (test__start_subtest("uretprobe_link_info"))
+ test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_URETPROBE);
+
+ qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r);
+ for (i = 0; i < KMULTI_CNT; i++)
+ kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]);
+ if (test__start_subtest("kprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, false, false, false);
+ test_kprobe_multi_fill_link_info(skel, false, true, false);
+ }
+ if (test__start_subtest("kretprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, true, false, false);
+ test_kprobe_multi_fill_link_info(skel, true, true, false);
+ }
+ if (test__start_subtest("kprobe_multi_invalid_ubuff"))
+ test_kprobe_multi_fill_link_info(skel, true, true, true);
+
+ if (test__start_subtest("uprobe_multi_link_info"))
+ test_uprobe_multi_fill_link_info(skel, false, false);
+ if (test__start_subtest("uretprobe_multi_link_info"))
+ test_uprobe_multi_fill_link_info(skel, true, false);
+ if (test__start_subtest("uprobe_multi_invalid"))
+ test_uprobe_multi_fill_link_info(skel, false, true);
+
+cleanup:
+ test_fill_link_info__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
new file mode 100644
index 000000000000..f7619e0ade10
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "find_vma.skel.h"
+#include "find_vma_fail1.skel.h"
+#include "find_vma_fail2.skel.h"
+
+static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret, bool need_test)
+{
+ if (need_test) {
+ ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec");
+ ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret");
+ ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret");
+ ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs");
+ }
+
+ skel->bss->found_vm_exec = 0;
+ skel->data->find_addr_ret = -1;
+ skel->data->find_zero_ret = -1;
+ skel->bss->d_iname[0] = 0;
+}
+
+static int open_pe(void)
+{
+ struct perf_event_attr attr = {0};
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+
+ return pfd >= 0 ? pfd : -errno;
+}
+
+static bool find_vma_pe_condition(struct find_vma *skel)
+{
+ return skel->bss->found_vm_exec == 0 ||
+ skel->data->find_addr_ret != 0 ||
+ skel->data->find_zero_ret == -1 ||
+ strcmp(skel->bss->d_iname, "test_progs") != 0;
+}
+
+static void test_find_vma_pe(struct find_vma *skel)
+{
+ struct bpf_link *link = NULL;
+ volatile int j = 0;
+ int pfd, i;
+ const int one_bn = 1000000000;
+
+ pfd = open_pe();
+ if (pfd < 0) {
+ if (pfd == -ENOENT || pfd == -EOPNOTSUPP) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(skel->progs.handle_pe, pfd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ for (i = 0; i < one_bn && find_vma_pe_condition(skel); ++i)
+ ++j;
+
+ test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */, i == one_bn);
+cleanup:
+ bpf_link__destroy(link);
+ close(pfd);
+}
+
+static void test_find_vma_kprobe(struct find_vma *skel)
+{
+ int err;
+
+ err = find_vma__attach(skel);
+ if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+ return;
+
+ getpgid(skel->bss->target_pid);
+ test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */, true);
+}
+
+static void test_illegal_write_vma(void)
+{
+ struct find_vma_fail1 *skel;
+
+ skel = find_vma_fail1__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail1__open_and_load"))
+ find_vma_fail1__destroy(skel);
+}
+
+static void test_illegal_write_task(void)
+{
+ struct find_vma_fail2 *skel;
+
+ skel = find_vma_fail2__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail2__open_and_load"))
+ find_vma_fail2__destroy(skel);
+}
+
+void serial_test_find_vma(void)
+{
+ struct find_vma *skel;
+
+ skel = find_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "find_vma__open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe;
+
+ test_find_vma_pe(skel);
+ test_find_vma_kprobe(skel);
+
+ find_vma__destroy(skel);
+ test_illegal_write_vma();
+ test_illegal_write_task();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cd6dc80edf18..08bae13248c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,43 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
#include <test_progs.h>
#include <network_helpers.h>
-#include <error.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
#include <sys/uio.h>
#include "bpf_flow.skel.h"
+#define TEST_NS "flow_dissector_ns"
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+#define TEST_NAME_MAX_LEN 64
+
#ifndef IP_MF
#define IP_MF 0x2000
#endif
-#define CHECK_FLOW_KEYS(desc, got, expected) \
- CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
- desc, \
- "nhoff=%u/%u " \
- "thoff=%u/%u " \
- "addr_proto=0x%x/0x%x " \
- "is_frag=%u/%u " \
- "is_first_frag=%u/%u " \
- "is_encap=%u/%u " \
- "ip_proto=0x%x/0x%x " \
- "n_proto=0x%x/0x%x " \
- "flow_label=0x%x/0x%x " \
- "sport=%u/%u " \
- "dport=%u/%u\n", \
- got.nhoff, expected.nhoff, \
- got.thoff, expected.thoff, \
- got.addr_proto, expected.addr_proto, \
- got.is_frag, expected.is_frag, \
- got.is_first_frag, expected.is_first_frag, \
- got.is_encap, expected.is_encap, \
- got.ip_proto, expected.ip_proto, \
- got.n_proto, expected.n_proto, \
- got.flow_label, expected.flow_label, \
- got.sport, expected.sport, \
- got.dport, expected.dport)
-
struct ipv4_pkt {
struct ethhdr eth;
struct iphdr iph;
@@ -87,6 +64,19 @@ struct dvlan_ipv6_pkt {
struct tcphdr tcp;
} __packed;
+struct gre_base_hdr {
+ __be16 flags;
+ __be16 protocol;
+} gre_base_hdr;
+
+struct gre_minimal_pkt {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct gre_base_hdr gre_hdr;
+ struct iphdr iph_inner;
+ struct tcphdr tcp;
+} __packed;
+
struct test {
const char *name;
union {
@@ -96,14 +86,15 @@ struct test {
struct ipv6_pkt ipv6;
struct ipv6_frag_pkt ipv6_frag;
struct dvlan_ipv6_pkt dvlan_ipv6;
+ struct gre_minimal_pkt gre_minimal;
} pkt;
struct bpf_flow_keys keys;
__u32 flags;
+ __u32 retval;
};
#define VLAN_HLEN 4
-static __u32 duration;
struct test tests[] = {
{
.name = "ipv4",
@@ -125,6 +116,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6",
@@ -145,6 +137,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "802.1q-ipv4",
@@ -167,6 +160,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "802.1ad-ipv6",
@@ -190,6 +184,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipv4-frag",
@@ -216,6 +211,7 @@ struct test tests[] = {
.dport = 8080,
},
.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .retval = BPF_OK,
},
{
.name = "ipv4-no-frag",
@@ -238,6 +234,7 @@ struct test tests[] = {
.is_frag = true,
.is_first_frag = true,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-frag",
@@ -264,6 +261,7 @@ struct test tests[] = {
.dport = 8080,
},
.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .retval = BPF_OK,
},
{
.name = "ipv6-no-frag",
@@ -286,6 +284,7 @@ struct test tests[] = {
.is_frag = true,
.is_first_frag = true,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-flow-label",
@@ -308,6 +307,7 @@ struct test tests[] = {
.dport = 8080,
.flow_label = __bpf_constant_htonl(0xbeeef),
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-no-flow-label",
@@ -330,6 +330,31 @@ struct test tests[] = {
.flow_label = __bpf_constant_htonl(0xbeeef),
},
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
+ },
+ {
+ .name = "ipv6-empty-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0x00, 0x00, 0x00 },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
},
{
.name = "ipip-encap",
@@ -341,8 +366,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -358,6 +383,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipip-no-encap",
@@ -369,8 +395,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -385,9 +411,158 @@ struct test tests[] = {
.is_encap = true,
},
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ .retval = BPF_OK,
+ },
+ {
+ .name = "ipip-encap-dissector-continue",
+ .pkt.ipip = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_IPIP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.saddr = __bpf_constant_htonl(FLOW_CONTINUE_SADDR),
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
+ .tcp.doff = 5,
+ .tcp.source = 99,
+ .tcp.dest = 9090,
+ },
+ .retval = BPF_FLOW_DISSECTOR_CONTINUE,
+ },
+ {
+ .name = "ip-gre",
+ .pkt.gre_minimal = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_GRE,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .gre_hdr = {
+ .flags = 0,
+ .protocol = __bpf_constant_htons(ETH_P_IP),
+ },
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr) * 2 +
+ sizeof(struct gre_base_hdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_encap = true,
+ .sport = 80,
+ .dport = 8080,
+ },
+ .retval = BPF_OK,
+ },
+ {
+ .name = "ip-gre-no-encap",
+ .pkt.ipip = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_GRE,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr)
+ + sizeof(struct gre_base_hdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_GRE,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+ .is_encap = true,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ .retval = BPF_OK,
},
};
+void serial_test_flow_dissector_namespace(void)
+{
+ struct bpf_flow *skel;
+ struct nstoken *ns;
+ int err, prog_fd;
+
+ skel = bpf_flow__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (!ASSERT_OK_FD(prog_fd, "get dissector fd"))
+ goto out_destroy_skel;
+
+ /* We must be able to attach a flow dissector to root namespace */
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (!ASSERT_OK(err, "attach on root namespace ok"))
+ goto out_destroy_skel;
+
+ err = make_netns(TEST_NS);
+ if (!ASSERT_OK(err, "create non-root net namespace"))
+ goto out_destroy_skel;
+
+ /* We must not be able to additionally attach a flow dissector to a
+ * non-root net namespace
+ */
+ ns = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(ns, "enter non-root net namespace"))
+ goto out_clean_ns;
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (!ASSERT_ERR(err,
+ "refuse new flow dissector in non-root net namespace"))
+ bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+ else
+ ASSERT_EQ(errno, EEXIST,
+ "refused because of already attached prog");
+ close_netns(ns);
+
+ /* If no flow dissector is attached to the root namespace, we must
+ * be able to attach one to a non-root net namespace
+ */
+ bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+ ns = open_netns(TEST_NS);
+ ASSERT_OK_PTR(ns, "enter non-root net namespace");
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ close_netns(ns);
+ ASSERT_OK(err, "accept new flow dissector in non-root net namespace");
+
+ /* If a flow dissector is attached to non-root net namespace, attaching
+ * a flow dissector to root namespace must fail
+ */
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (!ASSERT_ERR(err, "refuse new flow dissector on root namespace"))
+ bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+ else
+ ASSERT_EQ(errno, EEXIST,
+ "refused because of already attached prog");
+
+ ns = open_netns(TEST_NS);
+ bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+ close_netns(ns);
+out_clean_ns:
+ remove_netns(TEST_NS);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
+}
+
static int create_tap(const char *ifname)
{
struct ifreq ifr = {
@@ -457,10 +632,10 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
if (map_fd < 0)
return -1;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (!prog)
return -1;
@@ -475,22 +650,27 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
return 0;
}
-static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys,
+ char *test_suffix)
{
+ char test_name[TEST_NAME_MAX_LEN];
int i, err, keys_fd;
keys_fd = bpf_map__fd(keys);
- if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd"))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
/* Keep in sync with 'flags' from eth_get_headlen. */
__u32 eth_get_headlen_flags =
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
struct bpf_flow_keys flow_keys = {};
__u32 key = (__u32)(tests[i].keys.sport) << 16 |
tests[i].keys.dport;
+ snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name,
+ test_suffix);
+ if (!test__start_subtest(test_name))
+ continue;
/* For skb-less case we can't pass input flags; run
* only the tests that have a matching set of flags.
@@ -500,120 +680,175 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
continue;
err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
- CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+ if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap"))
+ continue;
+
+ /* check the stored flow_keys only if BPF_OK expected */
+ if (tests[i].retval != BPF_OK)
+ continue;
err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ continue;
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
- CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+ ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+ sizeof(struct bpf_flow_keys),
+ "returned flow keys");
err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ ASSERT_OK(err, "bpf_map_delete_elem");
}
}
-static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_direct_attach(void)
{
- int err, prog_fd;
+ int err, prog_fd, tap_fd;
+ struct bpf_flow *skel;
+ struct netns_obj *ns;
- prog_fd = bpf_program__fd(skel->progs._dissect);
- if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+ if (!ASSERT_OK_PTR(ns, "create and open netns"))
return;
+ skel = bpf_flow__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+ goto out_clean_ns;
+
+ err = init_prog_array(skel->obj, skel->maps.jmp_table);
+ if (!ASSERT_OK(err, "init_prog_array"))
+ goto out_destroy_skel;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
+ goto out_destroy_skel;
+
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
- if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
- return;
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out_destroy_skel;
- run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+ tap_fd = create_tap("tap0");
+ if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+ goto out_destroy_skel;
+ err = ifup("tap0");
+ if (!ASSERT_OK(err, "ifup"))
+ goto out_close_tap;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+ "non-skb-direct-attach");
err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
- CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "bpf_prog_detach2");
+
+out_close_tap:
+ close(tap_fd);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
+out_clean_ns:
+ netns_free(ns);
}
-static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_indirect_attach(void)
{
+ int err, net_fd, tap_fd;
+ struct bpf_flow *skel;
struct bpf_link *link;
- int err, net_fd;
+ struct netns_obj *ns;
- net_fd = open("/proc/self/ns/net", O_RDONLY);
- if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+ ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+ if (!ASSERT_OK_PTR(ns, "create and open netns"))
return;
+ skel = bpf_flow__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+ goto out_clean_ns;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net"))
+ goto out_destroy_skel;
+
+ err = init_prog_array(skel->obj, skel->maps.jmp_table);
+ if (!ASSERT_OK(err, "init_prog_array"))
+ goto out_destroy_skel;
+
+ tap_fd = create_tap("tap0");
+ if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+ goto out_close_ns;
+ err = ifup("tap0");
+ if (!ASSERT_OK(err, "ifup"))
+ goto out_close_tap;
+
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
- if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
- goto out_close;
+ if (!ASSERT_OK_PTR(link, "attach_netns"))
+ goto out_close_tap;
- run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+ "non-skb-indirect-attach");
err = bpf_link__destroy(link);
- CHECK(err, "bpf_link__destroy", "err %d\n", err);
-out_close:
+ ASSERT_OK(err, "bpf_link__destroy");
+
+out_close_tap:
+ close(tap_fd);
+out_close_ns:
close(net_fd);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
+out_clean_ns:
+ netns_free(ns);
}
-void test_flow_dissector(void)
+void test_flow_dissector_skb(void)
{
- int i, err, prog_fd, keys_fd = -1, tap_fd;
+ char test_name[TEST_NAME_MAX_LEN];
struct bpf_flow *skel;
+ int i, err, prog_fd;
skel = bpf_flow__open_and_load();
- if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
return;
- prog_fd = bpf_program__fd(skel->progs._dissect);
- if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
- goto out_destroy_skel;
- keys_fd = bpf_map__fd(skel->maps.last_dissection);
- if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
- goto out_destroy_skel;
err = init_prog_array(skel->obj, skel->maps.jmp_table);
- if (CHECK(err, "init_prog_array", "err %d\n", err))
+ if (!ASSERT_OK(err, "init_prog_array"))
+ goto out_destroy_skel;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
goto out_destroy_skel;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
- struct bpf_prog_test_run_attr tattr = {
- .prog_fd = prog_fd,
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &tests[i].pkt,
.data_size_in = sizeof(tests[i].pkt),
.data_out = &flow_keys,
- };
+ );
static struct bpf_flow_keys ctx = {};
+ snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name);
+ if (!test__start_subtest(test_name))
+ continue;
+
if (tests[i].flags) {
- tattr.ctx_in = &ctx;
- tattr.ctx_size_in = sizeof(ctx);
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
ctx.flags = tests[i].flags;
}
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
- err || tattr.retval != 1,
- tests[i].name,
- "err %d errno %d retval %d duration %d size %u/%zu\n",
- err, errno, tattr.retval, tattr.duration,
- tattr.data_size_out, sizeof(flow_keys));
- CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
- }
-
- /* Do the same tests but for skb-less flow dissector.
- * We use a known path in the net/tun driver that calls
- * eth_get_headlen and we manually export bpf_flow_keys
- * via BPF map in this case.
- */
-
- tap_fd = create_tap("tap0");
- CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
- err = ifup("tap0");
- CHECK(err, "ifup", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, tests[i].retval, "test_run retval");
- /* Test direct prog attachment */
- test_skb_less_prog_attach(skel, tap_fd);
- /* Test indirect prog attachment via link */
- test_skb_less_link_create(skel, tap_fd);
+ /* check the resulting flow_keys only if BPF_OK returned */
+ if (topts.retval != BPF_OK)
+ continue;
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
+ ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+ sizeof(struct bpf_flow_keys),
+ "returned flow keys");
+ }
- close(tap_fd);
out_destroy_skel:
bpf_flow__destroy(skel);
}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
new file mode 100644
index 000000000000..80b153d3ddec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <bpf/bpf.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "bpf_util.h"
+#include "bpf_flow.skel.h"
+
+#define CFG_PORT_INNER 8000
+#define CFG_PORT_GUE 6080
+#define SUBTEST_NAME_MAX_LEN 32
+#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN)
+#define MAX_SOURCE_PORTS 3
+#define TEST_PACKETS_COUNT 10
+#define TEST_PACKET_LEN 100
+#define TEST_PACKET_PATTERN 'a'
+#define TEST_IPV4 "192.168.0.1/32"
+#define TEST_IPV6 "100::a/128"
+#define TEST_TUNNEL_REMOTE "127.0.0.2"
+#define TEST_TUNNEL_LOCAL "127.0.0.1"
+
+#define INIT_ADDR4(addr4, port) \
+ { \
+ .sin_family = AF_INET, \
+ .sin_port = __constant_htons(port), \
+ .sin_addr.s_addr = __constant_htonl(addr4), \
+ }
+
+#define INIT_ADDR6(addr6, port) \
+ { \
+ .sin6_family = AF_INET6, \
+ .sin6_port = __constant_htons(port), \
+ .sin6_addr = addr6, \
+ }
+#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0)
+#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER)
+#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0)
+#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0)
+
+#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_IN6_DST_ADDR_DEFAULT \
+ INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+
+#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0)
+#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0)
+#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER)
+
+struct grehdr {
+ uint16_t unused;
+ uint16_t protocol;
+} __packed;
+
+struct guehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hlen : 5, control : 1, version : 2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 version : 2, control : 1, hlen : 5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 proto_ctype;
+ __be16 flags;
+ };
+ __be32 word;
+ };
+};
+
+static char buf[ETH_DATA_LEN];
+
+struct test_configuration {
+ char name[SUBTEST_NAME_MAX_LEN];
+ int (*test_setup)(void);
+ void (*test_teardown)(void);
+ int source_ports[MAX_SOURCE_PORTS];
+ int cfg_l3_inner;
+ struct sockaddr_in in_saddr4;
+ struct sockaddr_in in_daddr4;
+ struct sockaddr_in6 in_saddr6;
+ struct sockaddr_in6 in_daddr6;
+ int cfg_l3_outer;
+ struct sockaddr_in out_saddr4;
+ struct sockaddr_in out_daddr4;
+ struct sockaddr_in6 out_saddr6;
+ struct sockaddr_in6 out_daddr6;
+ int cfg_encap_proto;
+ uint8_t cfg_dsfield_inner;
+ uint8_t cfg_dsfield_outer;
+ int cfg_l3_extra;
+ struct sockaddr_in extra_saddr4;
+ struct sockaddr_in extra_daddr4;
+ struct sockaddr_in6 extra_saddr6;
+ struct sockaddr_in6 extra_daddr6;
+};
+
+static unsigned long util_gettime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void build_ipv4_header(void *header, uint8_t proto, uint32_t src,
+ uint32_t dst, int payload_len, uint8_t tos)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = tos;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = proto;
+ iph->saddr = src;
+ iph->daddr = dst;
+ iph->check = build_ip_csum((void *)iph);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t)dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+ const struct sockaddr_in6 *src,
+ const struct sockaddr_in6 *dst, int payload_len,
+ uint8_t dsfield)
+{
+ struct ipv6hdr *ip6h = header;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 8;
+ ipv6_set_dsfield(ip6h, dsfield);
+
+ memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static void build_udp_header(void *header, int payload_len, uint16_t sport,
+ uint16_t dport, int family)
+{
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(sport);
+ udph->dest = htons(dport);
+ udph->len = htons(len);
+ udph->check = 0;
+ if (family == AF_INET)
+ udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+ udph);
+ else
+ udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+ udph);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+ struct guehdr *gueh = header;
+
+ gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+ struct grehdr *greh = header;
+
+ greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+ if (family == AF_INET)
+ return sizeof(struct iphdr);
+ else
+ return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(const struct test_configuration *test, uint16_t sport)
+{
+ int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+ int el3_len = 0, packet_len;
+
+ memset(buf, 0, ETH_DATA_LEN);
+
+ if (test->cfg_l3_extra)
+ el3_len = l3_length(test->cfg_l3_extra);
+
+ /* calculate header offsets */
+ if (test->cfg_encap_proto) {
+ ol3_len = l3_length(test->cfg_l3_outer);
+
+ if (test->cfg_encap_proto == IPPROTO_GRE)
+ ol4_len = sizeof(struct grehdr);
+ else if (test->cfg_encap_proto == IPPROTO_UDP)
+ ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ }
+
+ il3_len = l3_length(test->cfg_l3_inner);
+ il4_len = sizeof(struct udphdr);
+
+ packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len +
+ TEST_PACKET_LEN;
+ if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size"))
+ return -1;
+
+ /*
+ * Fill packet from inside out, to calculate correct checksums.
+ * But create ip before udp headers, as udp uses ip for pseudo-sum.
+ */
+ memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+ TEST_PACKET_PATTERN, TEST_PACKET_LEN);
+
+ /* add zero byte for udp csum padding */
+ buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] =
+ 0;
+
+ switch (test->cfg_l3_inner) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr,
+ test->in_daddr4.sin_addr.s_addr,
+ il4_len + TEST_PACKET_LEN,
+ test->cfg_dsfield_inner);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP, &test->in_saddr6,
+ &test->in_daddr6, il4_len + TEST_PACKET_LEN,
+ test->cfg_dsfield_inner);
+ break;
+ }
+
+ build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+ TEST_PACKET_LEN, sport, CFG_PORT_INNER,
+ test->cfg_l3_inner);
+
+ if (!test->cfg_encap_proto)
+ return il3_len + il4_len + TEST_PACKET_LEN;
+
+ switch (test->cfg_l3_outer) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len, test->cfg_encap_proto,
+ test->out_saddr4.sin_addr.s_addr,
+ test->out_daddr4.sin_addr.s_addr,
+ ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+ test->cfg_dsfield_outer);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len, test->cfg_encap_proto,
+ &test->out_saddr6, &test->out_daddr6,
+ ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+ test->cfg_dsfield_outer);
+ break;
+ }
+
+ switch (test->cfg_encap_proto) {
+ case IPPROTO_UDP:
+ build_gue_header(buf + el3_len + ol3_len + ol4_len -
+ sizeof(struct guehdr),
+ test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP :
+ IPPROTO_IPV6);
+ build_udp_header(buf + el3_len + ol3_len,
+ sizeof(struct guehdr) + il3_len + il4_len +
+ TEST_PACKET_LEN,
+ sport, CFG_PORT_GUE, test->cfg_l3_outer);
+ break;
+ case IPPROTO_GRE:
+ build_gre_header(buf + el3_len + ol3_len,
+ test->cfg_l3_inner == PF_INET ? ETH_P_IP :
+ ETH_P_IPV6);
+ break;
+ }
+
+ switch (test->cfg_l3_extra) {
+ case PF_INET:
+ build_ipv4_header(buf,
+ test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+ IPPROTO_IPV6,
+ test->extra_saddr4.sin_addr.s_addr,
+ test->extra_daddr4.sin_addr.s_addr,
+ ol3_len + ol4_len + il3_len + il4_len +
+ TEST_PACKET_LEN,
+ 0);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf,
+ test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+ IPPROTO_IPV6,
+ &test->extra_saddr6, &test->extra_daddr6,
+ ol3_len + ol4_len + il3_len + il4_len +
+ TEST_PACKET_LEN,
+ 0);
+ break;
+ }
+
+ return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+ TEST_PACKET_LEN;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(const struct test_configuration *test)
+{
+ int family, fd, ret;
+
+ if (test->cfg_l3_extra)
+ family = test->cfg_l3_extra;
+ else if (test->cfg_l3_outer)
+ family = test->cfg_l3_outer;
+ else
+ family = test->cfg_l3_inner;
+
+ fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_OK_FD(fd, "setup tx socket"))
+ return fd;
+
+ if (test->cfg_l3_extra) {
+ if (test->cfg_l3_extra == PF_INET)
+ ret = connect(fd, (void *)&test->extra_daddr4,
+ sizeof(test->extra_daddr4));
+ else
+ ret = connect(fd, (void *)&test->extra_daddr6,
+ sizeof(test->extra_daddr6));
+ if (!ASSERT_OK(ret, "connect")) {
+ close(fd);
+ return ret;
+ }
+ } else if (test->cfg_l3_outer) {
+ /* connect to destination if not encapsulated */
+ if (test->cfg_l3_outer == PF_INET)
+ ret = connect(fd, (void *)&test->out_daddr4,
+ sizeof(test->out_daddr4));
+ else
+ ret = connect(fd, (void *)&test->out_daddr6,
+ sizeof(test->out_daddr6));
+ if (!ASSERT_OK(ret, "connect")) {
+ close(fd);
+ return ret;
+ }
+ } else {
+ /* otherwise using loopback */
+ if (test->cfg_l3_inner == PF_INET)
+ ret = connect(fd, (void *)&test->in_daddr4,
+ sizeof(test->in_daddr4));
+ else
+ ret = connect(fd, (void *)&test->in_daddr6,
+ sizeof(test->in_daddr6));
+ if (!ASSERT_OK(ret, "connect")) {
+ close(fd);
+ return ret;
+ }
+ }
+
+ return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(const struct test_configuration *test)
+{
+ int fd, ret;
+
+ fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0);
+ if (!ASSERT_OK_FD(fd, "socket rx"))
+ return fd;
+
+ if (test->cfg_l3_inner == PF_INET)
+ ret = bind(fd, (void *)&test->in_daddr4,
+ sizeof(test->in_daddr4));
+ else
+ ret = bind(fd, (void *)&test->in_daddr6,
+ sizeof(test->in_daddr6));
+ if (!ASSERT_OK(ret, "bind rx")) {
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+ int ret;
+
+ ret = write(fd, pkt, len);
+ return ret != len;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = events;
+
+ ret = poll(&pfd, 1, timeout);
+ return ret;
+}
+
+static int do_rx(int fd)
+{
+ char rbuf;
+ int ret, num = 0;
+
+ while (1) {
+ ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret < 0)
+ return -1;
+ if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern"))
+ return -1;
+ num++;
+ }
+
+ return num;
+}
+
+static int run_test(const struct test_configuration *test,
+ int source_port_index)
+{
+ int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err;
+ unsigned long tstop, tcur;
+
+ fdr = setup_rx(test);
+ fdt = setup_tx(test);
+ if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) {
+ err = -1;
+ goto out_close_sockets;
+ }
+
+ len = build_packet(test,
+ (uint16_t)test->source_ports[source_port_index]);
+ if (!ASSERT_GT(len, 0, "build test packet"))
+ return -1;
+
+ tcur = util_gettime();
+ tstop = tcur;
+
+ while (tx < TEST_PACKETS_COUNT) {
+ if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx"))
+ break;
+ tx++;
+ err = do_rx(fdr);
+ if (!ASSERT_GE(err, 0, "do_rx"))
+ break;
+ rx += err;
+ }
+
+ /* read straggler packets, if any */
+ if (rx < tx) {
+ tstop = util_gettime() + 100;
+ while (rx < tx) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+
+ err = do_poll(fdr, POLLIN, tstop - tcur);
+ if (err < 0)
+ break;
+ err = do_rx(fdr);
+ if (err >= 0)
+ rx += err;
+ }
+ }
+
+out_close_sockets:
+ close(fdt);
+ close(fdr);
+ return rx;
+}
+
+static int attach_and_configure_program(struct bpf_flow *skel)
+{
+ struct bpf_map *prog_array = skel->maps.jmp_table;
+ int main_prog_fd, sub_prog_fd, map_fd, i, err;
+ struct bpf_program *prog;
+ char prog_name[32];
+
+ main_prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (main_prog_fd < 0)
+ return main_prog_fd;
+
+ err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (err)
+ return err;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (map_fd < 0)
+ return map_fd;
+
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!prog)
+ return -1;
+
+ sub_prog_fd = bpf_program__fd(prog);
+ if (sub_prog_fd < 0)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY);
+ if (err)
+ return -1;
+ }
+
+ return main_prog_fd;
+}
+
+static void detach_program(struct bpf_flow *skel, int prog_fd)
+{
+ bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+}
+
+static int set_port_drop(int pf, bool multi_port)
+{
+ char dst_port[16];
+
+ snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
+ SYS(fail, "tc qdisc add dev lo ingress");
+ SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
+ "dev lo",
+ "parent FFFF:",
+ "protocol", pf == PF_INET6 ? "ipv6" : "ip",
+ "pref 1337",
+ "flower",
+ "ip_proto udp",
+ "src_port", multi_port ? "8-10" : "9",
+ "dst_port", dst_port,
+ "action drop");
+ return 0;
+
+fail_delete_qdisc:
+ SYS_NOFAIL("tc qdisc del dev lo ingress");
+fail:
+ return 1;
+}
+
+static void remove_filter(void)
+{
+ SYS_NOFAIL("tc filter del dev lo ingress");
+ SYS_NOFAIL("tc qdisc del dev lo ingress");
+}
+
+static int ipv4_setup(void)
+{
+ return set_port_drop(PF_INET, false);
+}
+
+static int ipv6_setup(void)
+{
+ return set_port_drop(PF_INET6, false);
+}
+
+static int port_range_setup(void)
+{
+ return set_port_drop(PF_INET, true);
+}
+
+static int set_addresses(void)
+{
+ SYS(out, "ip -4 addr add %s dev lo", TEST_IPV4);
+ SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6);
+ return 0;
+out_remove_ipv4:
+ SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+out:
+ return -1;
+}
+
+static void unset_addresses(void)
+{
+ SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+ SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6);
+}
+
+static int ipip_setup(void)
+{
+ if (!ASSERT_OK(set_addresses(), "configure addresses"))
+ return -1;
+ if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+ goto out_unset_addresses;
+ SYS(out_remove_filter,
+ "ip link add ipip_test type ipip remote %s local %s dev lo",
+ TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+ SYS(out_clean_netif, "ip link set ipip_test up");
+ return 0;
+
+out_clean_netif:
+ SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+ remove_filter();
+out_unset_addresses:
+ unset_addresses();
+ return -1;
+}
+
+static void ipip_shutdown(void)
+{
+ SYS_NOFAIL("ip link del ipip_test");
+ remove_filter();
+ unset_addresses();
+}
+
+static int gre_setup(void)
+{
+ if (!ASSERT_OK(set_addresses(), "configure addresses"))
+ return -1;
+ if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+ goto out_unset_addresses;
+ SYS(out_remove_filter,
+ "ip link add gre_test type gre remote %s local %s dev lo",
+ TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+ SYS(out_clean_netif, "ip link set gre_test up");
+ return 0;
+
+out_clean_netif:
+ SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+ remove_filter();
+out_unset_addresses:
+ unset_addresses();
+ return -1;
+}
+
+static void gre_shutdown(void)
+{
+ SYS_NOFAIL("ip link del gre_test");
+ remove_filter();
+ unset_addresses();
+}
+
+static const struct test_configuration tests_input[] = {
+ {
+ .name = "ipv4",
+ .test_setup = ipv4_setup,
+ .test_teardown = remove_filter,
+ .source_ports = { 8, 9, 10 },
+ .cfg_l3_inner = PF_INET,
+ .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+ .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT
+ },
+ {
+ .name = "ipv4_continue_dissect",
+ .test_setup = ipv4_setup,
+ .test_teardown = remove_filter,
+ .source_ports = { 8, 9, 10 },
+ .cfg_l3_inner = PF_INET,
+ .in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE,
+ .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+ {
+ .name = "ipip",
+ .test_setup = ipip_setup,
+ .test_teardown = ipip_shutdown,
+ .source_ports = { 8, 9, 10 },
+ .cfg_l3_inner = PF_INET,
+ .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+ .in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+ .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+ .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+ .cfg_l3_outer = PF_INET,
+ .cfg_encap_proto = IPPROTO_IPIP,
+
+ },
+ {
+ .name = "gre",
+ .test_setup = gre_setup,
+ .test_teardown = gre_shutdown,
+ .source_ports = { 8, 9, 10 },
+ .cfg_l3_inner = PF_INET,
+ .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+ .in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+ .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+ .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+ .cfg_l3_outer = PF_INET,
+ .cfg_encap_proto = IPPROTO_GRE,
+ },
+ {
+ .name = "port_range",
+ .test_setup = port_range_setup,
+ .test_teardown = remove_filter,
+ .source_ports = { 7, 9, 11 },
+ .cfg_l3_inner = PF_INET,
+ .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+ .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+ {
+ .name = "ipv6",
+ .test_setup = ipv6_setup,
+ .test_teardown = remove_filter,
+ .source_ports = { 8, 9, 10 },
+ .cfg_l3_inner = PF_INET6,
+ .in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT,
+ .in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT
+ },
+};
+
+struct test_ctx {
+ struct bpf_flow *skel;
+ struct netns_obj *ns;
+ int prog_fd;
+};
+
+static int test_global_init(struct test_ctx *ctx)
+{
+ int err;
+
+ ctx->skel = bpf_flow__open_and_load();
+ if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector"))
+ return -1;
+
+ ctx->ns = netns_new("flow_dissector_classification", true);
+ if (!ASSERT_OK_PTR(ctx->ns, "switch ns"))
+ goto out_destroy_skel;
+
+ err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0");
+ err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
+ err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0");
+ if (!ASSERT_OK(err, "configure net tunables"))
+ goto out_clean_ns;
+
+ ctx->prog_fd = attach_and_configure_program(ctx->skel);
+ if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program"))
+ goto out_clean_ns;
+ return 0;
+out_clean_ns:
+ netns_free(ctx->ns);
+out_destroy_skel:
+ bpf_flow__destroy(ctx->skel);
+ return -1;
+}
+
+static void test_global_shutdown(struct test_ctx *ctx)
+{
+ detach_program(ctx->skel, ctx->prog_fd);
+ netns_free(ctx->ns);
+ bpf_flow__destroy(ctx->skel);
+}
+
+void test_flow_dissector_classification(void)
+{
+ struct test_ctx ctx;
+ const struct test_configuration *test;
+ int i;
+
+ if (test_global_init(&ctx))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests_input); i++) {
+ if (!test__start_subtest(tests_input[i].name))
+ continue;
+ test = &tests_input[i];
+ /* All tests are expected to have one rx-ok port first,
+ * then a non-working rx port, and finally a rx-ok port
+ */
+ if (test->test_setup &&
+ !ASSERT_OK(test->test_setup(), "init filter"))
+ continue;
+
+ ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT,
+ "test first port");
+ ASSERT_EQ(run_test(test, 1), 0, "test second port");
+ ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT,
+ "test third port");
+ if (test->test_teardown)
+ test->test_teardown();
+ }
+ test_global_shutdown(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 0e8a4d2f023d..c7a47b57ac91 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -2,10 +2,9 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_flow_dissector_load_bytes(void)
+void serial_test_flow_dissector_load_bytes(void)
{
struct bpf_flow_keys flow_keys;
- __u32 duration = 0, retval, size;
struct bpf_insn prog[] = {
// BPF_REG_1 - 1st argument: context
// BPF_REG_2 - 2nd argument: offset, start at first byte
@@ -27,22 +26,25 @@ void test_flow_dissector_load_bytes(void)
BPF_EXIT_INSN(),
};
int fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = &flow_keys,
+ .data_size_out = sizeof(flow_keys),
+ .repeat = 1,
+ );
/* make sure bpf_skb_load_bytes is not allowed from skb-less context
*/
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+ fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
- CHECK(fd < 0,
- "flow_dissector-bpf_skb_load_bytes-load",
- "fd %d errno %d\n",
- fd, errno);
+ ASSERT_GE(fd, 0, "bpf_test_load_program good fd");
- err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1,
- "flow_dissector-bpf_skb_load_bytes",
- "err %d errno %d retval %d duration %d size %u/%zu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
+ err = bpf_prog_test_run_opts(fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
+ ASSERT_EQ(topts.retval, BPF_OK, "test_run retval");
if (fd >= -1)
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..9333f7346d15 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type type)
};
int fd;
- fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
- perror("bpf_load_program");
+ perror("bpf_test_load_program");
return fd;
}
@@ -60,9 +60,9 @@ static __u32 query_prog_id(int prog)
__u32 info_len = sizeof(info);
int err;
- err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog, &info, &info_len);
if (CHECK_FAIL(err || info_len != sizeof(info))) {
- perror("bpf_obj_get_info_by_fd");
+ perror("bpf_prog_get_info_by_fd");
return 0;
}
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when another link exists */
errno = 0;
link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
perror("bpf_prog_attach(prog2) expected E2BIG");
- if (link2 != -1)
+ if (link2 >= 0)
close(link2);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when prog attached */
errno = 0;
link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ if (CHECK_FAIL(link >= 0 || errno != EEXIST))
perror("bpf_link_create(prog2) expected EEXIST");
- if (link != -1)
+ if (link >= 0)
close(link);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -497,7 +497,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
}
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -521,7 +521,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
link_id = info.id;
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -546,7 +546,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
netns = -1;
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -623,12 +623,12 @@ static void run_tests(int netns)
}
out_close:
for (i = 0; i < ARRAY_SIZE(progs); i++) {
- if (progs[i] != -1)
+ if (progs[i] >= 0)
CHECK_FAIL(close(progs[i]));
}
}
-void test_flow_dissector_reattach(void)
+void serial_test_flow_dissector_reattach(void)
{
int err, new_net, saved_net;
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..5fea3209566e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
+#include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+ int i, err, max_entries;
+ struct for_each_hash_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ size_t percpu_val_sz;
+ __u32 key, num_cpus;
+ __u64 val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_hash_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_val_sz = sizeof(__u64) * num_cpus;
+ percpu_valbuf = malloc(percpu_val_sz);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 1;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+ percpu_valbuf, percpu_val_sz, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+ ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+ key = 1;
+ err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0);
+ ASSERT_ERR(err, "hashmap_lookup");
+
+ ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+ ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+ ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+ ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+ ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+ ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+ free(percpu_valbuf);
+ for_each_hash_map_elem__destroy(skel);
+}
+
+static void test_array_map(void)
+{
+ __u32 key, num_cpus, max_entries;
+ int i, err;
+ struct for_each_array_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ size_t percpu_val_sz;
+ __u64 val, expected_total;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_array_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+ return;
+
+ expected_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ /* skip the last iteration for expected total */
+ if (i != max_entries - 1)
+ expected_total += val;
+ err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_val_sz = sizeof(__u64) * num_cpus;
+ percpu_valbuf = malloc(percpu_val_sz);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 0;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+ percpu_valbuf, percpu_val_sz, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+ ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+ free(percpu_valbuf);
+ for_each_array_map_elem__destroy(skel);
+}
+
+static void test_write_map_key(void)
+{
+ struct for_each_map_elem_write_key *skel;
+
+ skel = for_each_map_elem_write_key__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+ for_each_map_elem_write_key__destroy(skel);
+}
+
+static void test_multi_maps(void)
+{
+ struct for_each_multi_maps *skel;
+ __u64 val, array_total, hash_total;
+ __u32 key, max_entries;
+ int i, err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_multi_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_multi_maps__open_and_load"))
+ return;
+
+ array_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ array_total += val;
+ err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "array_map_update"))
+ goto out;
+ }
+
+ hash_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i + 100;
+ val = i + 1;
+ hash_total += val;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "hash_map_update"))
+ goto out;
+ }
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 1;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, array_total, "array output");
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, hash_total, "hash output");
+
+out:
+ for_each_multi_maps__destroy(skel);
+}
+
+static void test_hash_modify(void)
+{
+ struct for_each_hash_modify *skel;
+ int max_entries, i, err;
+ __u64 key, val;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1
+ );
+
+ skel = for_each_hash_modify__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+
+out:
+ for_each_hash_modify__destroy(skel);
+}
+
+void test_for_each(void)
+{
+ if (test__start_subtest("hash_map"))
+ test_hash_map();
+ if (test__start_subtest("array_map"))
+ test_array_map();
+ if (test__start_subtest("write_map_key"))
+ test_write_map_key();
+ if (test__start_subtest("multi_maps"))
+ test_multi_maps();
+ if (test__start_subtest("hash_modify"))
+ test_hash_modify();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
new file mode 100644
index 000000000000..0de8facca4c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+
+#include "free_timer.skel.h"
+
+struct run_ctx {
+ struct bpf_program *start_prog;
+ struct bpf_program *overwrite_prog;
+ pthread_barrier_t notify;
+ int loop;
+ bool start;
+ bool stop;
+};
+
+static void start_threads(struct run_ctx *ctx)
+{
+ ctx->start = true;
+}
+
+static void stop_threads(struct run_ctx *ctx)
+{
+ ctx->stop = true;
+ /* Guarantee the order between ->stop and ->start */
+ __atomic_store_n(&ctx->start, true, __ATOMIC_RELEASE);
+}
+
+static int wait_for_start(struct run_ctx *ctx)
+{
+ while (!__atomic_load_n(&ctx->start, __ATOMIC_ACQUIRE))
+ usleep(10);
+
+ return ctx->stop;
+}
+
+static void *overwrite_timer_fn(void *arg)
+{
+ struct run_ctx *ctx = arg;
+ int loop, fd, err;
+ cpu_set_t cpuset;
+ long ret = 0;
+
+ /* Pin on CPU 0 */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ /* Is the thread being stopped ? */
+ err = wait_for_start(ctx);
+ if (err)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->overwrite_prog);
+ loop = ctx->loop;
+ while (loop-- > 0) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ /* Wait for start thread to complete */
+ pthread_barrier_wait(&ctx->notify);
+
+ /* Overwrite timers */
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err)
+ ret |= 1;
+ else if (opts.retval)
+ ret |= 2;
+
+ /* Notify start thread to start timers */
+ pthread_barrier_wait(&ctx->notify);
+ }
+
+ return (void *)ret;
+}
+
+static void *start_timer_fn(void *arg)
+{
+ struct run_ctx *ctx = arg;
+ int loop, fd, err;
+ cpu_set_t cpuset;
+ long ret = 0;
+
+ /* Pin on CPU 1 */
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ /* Is the thread being stopped ? */
+ err = wait_for_start(ctx);
+ if (err)
+ return NULL;
+
+ fd = bpf_program__fd(ctx->start_prog);
+ loop = ctx->loop;
+ while (loop-- > 0) {
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ /* Run the prog to start timer */
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err)
+ ret |= 4;
+ else if (opts.retval)
+ ret |= 8;
+
+ /* Notify overwrite thread to do overwrite */
+ pthread_barrier_wait(&ctx->notify);
+
+ /* Wait for overwrite thread to complete */
+ pthread_barrier_wait(&ctx->notify);
+ }
+
+ return (void *)ret;
+}
+
+void test_free_timer(void)
+{
+ struct free_timer *skel;
+ struct bpf_program *prog;
+ struct run_ctx ctx;
+ pthread_t tid[2];
+ void *ret;
+ int err;
+
+ skel = free_timer__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ prog = bpf_object__find_program_by_name(skel->obj, "start_timer");
+ if (!ASSERT_OK_PTR(prog, "find start prog"))
+ goto out;
+ ctx.start_prog = prog;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "overwrite_timer");
+ if (!ASSERT_OK_PTR(prog, "find overwrite prog"))
+ goto out;
+ ctx.overwrite_prog = prog;
+
+ pthread_barrier_init(&ctx.notify, NULL, 2);
+ ctx.loop = 10;
+
+ err = pthread_create(&tid[0], NULL, start_timer_fn, &ctx);
+ if (!ASSERT_OK(err, "create start_timer"))
+ goto out;
+
+ err = pthread_create(&tid[1], NULL, overwrite_timer_fn, &ctx);
+ if (!ASSERT_OK(err, "create overwrite_timer")) {
+ stop_threads(&ctx);
+ goto out;
+ }
+
+ start_threads(&ctx);
+
+ ret = NULL;
+ err = pthread_join(tid[0], &ret);
+ ASSERT_EQ(err | (long)ret, 0, "start_timer");
+ ret = NULL;
+ err = pthread_join(tid[1], &ret);
+ ASSERT_EQ(err | (long)ret, 0, "overwrite_timer");
+out:
+ free_timer__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
new file mode 100644
index 000000000000..43a26ec69a8e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_get_xattr.skel.h"
+#include "test_set_remove_xattr.skel.h"
+#include "test_fsverity.skel.h"
+
+static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
+
+static void test_get_xattr(const char *name, const char *value, bool allow_access)
+{
+ struct test_get_xattr *skel = NULL;
+ int fd = -1, err;
+ int v[32];
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ err = setxattr(testfile, name, value, strlen(value) + 1, 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel = test_get_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_get_xattr__attach(skel);
+
+ if (!ASSERT_OK(err, "test_get_xattr__attach"))
+ goto out;
+
+ fd = open(testfile, O_RDONLY, 0644);
+
+ if (!ASSERT_GE(fd, 0, "open_file"))
+ goto out;
+
+ /* Trigger security_inode_getxattr */
+ err = getxattr(testfile, name, v, sizeof(v));
+
+ if (allow_access) {
+ ASSERT_EQ(err, -1, "getxattr_return");
+ ASSERT_EQ(errno, EINVAL, "getxattr_errno");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ } else {
+ ASSERT_EQ(err, strlen(value) + 1, "getxattr_return");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry");
+ }
+
+out:
+ close(fd);
+ test_get_xattr__destroy(skel);
+ remove(testfile);
+}
+
+/* xattr value we will set to security.bpf.foo */
+static const char value_foo[] = "hello";
+
+static void read_and_validate_foo(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo");
+ ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo");
+}
+
+static void set_foo(struct test_set_remove_xattr *skel)
+{
+ ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0),
+ "setxattr foo");
+}
+
+static void validate_bar_match(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar");
+ ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0,
+ "strncmp value_bar");
+}
+
+static void validate_bar_removed(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_LT(err, 0, "getxattr size bar should fail");
+}
+
+static void test_set_remove_xattr(void)
+{
+ struct test_set_remove_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ skel = test_set_remove_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load"))
+ return;
+
+ /* Set security.bpf.foo to "hello" */
+ err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_set_remove_xattr__attach(skel);
+ if (!ASSERT_OK(err, "test_set_remove_xattr__attach"))
+ goto out;
+
+ /* First, test not _locked version of the kfuncs with getxattr. */
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ read_and_validate_foo(skel);
+ validate_bar_match(skel);
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ read_and_validate_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail");
+
+ /* Second, test _locked version of the kfuncs, with setxattr */
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ set_foo(skel);
+ validate_bar_match(skel);
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ set_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success,
+ "locked_set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success,
+ "locked_remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail,
+ "locked_set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail,
+ "locked_remove_security_selinux_fail");
+
+out:
+ close(fd);
+ test_set_remove_xattr__destroy(skel);
+ remove(testfile);
+}
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+static void test_fsverity(void)
+{
+ struct fsverity_enable_arg arg = {0};
+ struct test_fsverity *skel = NULL;
+ struct fsverity_digest *d;
+ int fd, err;
+ char buffer[4096];
+
+ fd = open(testfile, O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ /* Write random buffer, so the file is not empty */
+ err = write(fd, buffer, 4096);
+ if (!ASSERT_EQ(err, 4096, "write_file"))
+ goto out;
+ close(fd);
+
+ /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */
+ fd = open(testfile, O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "open_file1"))
+ return;
+
+ /* Enable fsverity for the file.
+ * If the file system doesn't support verity, this will fail. Skip
+ * the test in such case.
+ */
+ arg.version = 1;
+ arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ arg.block_size = 4096;
+ err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg);
+ if (err) {
+ printf("%s:SKIP:local fs doesn't support fsverity (%d)\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_fsverity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load"))
+ goto out;
+
+ /* Get fsverity_digest from ioctl */
+ d = (struct fsverity_digest *)skel->bss->expected_digest;
+ d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ d->digest_size = SHA256_DIGEST_SIZE;
+ err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest);
+ if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_fsverity__attach(skel);
+ if (!ASSERT_OK(err, "test_fsverity__attach"))
+ goto out;
+
+ /* Reopen the file to trigger the program */
+ close(fd);
+ fd = open(testfile, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_file2"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity");
+ ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches");
+out:
+ close(fd);
+ test_fsverity__destroy(skel);
+ remove(testfile);
+}
+
+void test_fs_kfuncs(void)
+{
+ /* Matches xattr_names in progs/test_get_xattr.c */
+ if (test__start_subtest("user_xattr"))
+ test_get_xattr("user.kfuncs", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr"))
+ test_get_xattr("security.bpf.xxx", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr_error"))
+ test_get_xattr("security.bpf", "hello", false);
+
+ if (test__start_subtest("security_selinux_xattr_error"))
+ test_get_xattr("security.selinux", "hello", false);
+
+ if (test__start_subtest("set_remove_xattr"))
+ test_set_remove_xattr();
+
+ if (test__start_subtest("fsverity"))
+ test_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
new file mode 100644
index 000000000000..0394a1156d99
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "get_branch_snapshot.skel.h"
+
+static int *pfd_array;
+static int cpu_cnt;
+
+static bool is_hypervisor(void)
+{
+ char *line = NULL;
+ bool ret = false;
+ size_t len;
+ FILE *fp;
+
+ fp = fopen("/proc/cpuinfo", "r");
+ if (!fp)
+ return false;
+
+ while (getline(&line, &len, fp) != -1) {
+ if (!strncmp(line, "flags", 5)) {
+ if (strstr(line, "hypervisor") != NULL)
+ ret = true;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(fp);
+ return ret;
+}
+
+static int create_perf_events(void)
+{
+ struct perf_event_attr attr = {0};
+ int cpu;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
+ PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+
+ cpu_cnt = libbpf_num_possible_cpus();
+ pfd_array = malloc(sizeof(int) * cpu_cnt);
+ if (!pfd_array) {
+ cpu_cnt = 0;
+ return 1;
+ }
+
+ for (cpu = 0; cpu < cpu_cnt; cpu++) {
+ pfd_array[cpu] = syscall(__NR_perf_event_open, &attr,
+ -1, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+ if (pfd_array[cpu] < 0)
+ break;
+ }
+
+ return cpu == 0;
+}
+
+static void close_perf_events(void)
+{
+ int cpu, fd;
+
+ for (cpu = 0; cpu < cpu_cnt; cpu++) {
+ fd = pfd_array[cpu];
+ if (fd < 0)
+ break;
+ close(fd);
+ }
+ free(pfd_array);
+}
+
+void serial_test_get_branch_snapshot(void)
+{
+ struct get_branch_snapshot *skel = NULL;
+ int err;
+
+ /* Skip the test before we fix LBR snapshot for hypervisor. */
+ if (is_hypervisor()) {
+ test__skip();
+ return;
+ }
+
+ if (create_perf_events()) {
+ test__skip(); /* system doesn't support LBR */
+ goto cleanup;
+ }
+
+ skel = get_branch_snapshot__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load"))
+ goto cleanup;
+
+ err = kallsyms_find("bpf_testmod_loop_test", &skel->bss->address_low);
+ if (!ASSERT_OK(err, "kallsyms_find"))
+ goto cleanup;
+
+ /* Just a guess for the end of this function, as module functions
+ * in /proc/kallsyms could come in any order.
+ */
+ skel->bss->address_high = skel->bss->address_low + 128;
+
+ err = get_branch_snapshot__attach(skel);
+ if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+ goto cleanup;
+
+ trigger_module_test_read(100);
+
+ if (skel->bss->total_entries < 16) {
+ /* too few entries for the hit/waste test */
+ test__skip();
+ goto cleanup;
+ }
+
+ ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr");
+
+ /* Given we stop LBR in software, we will waste a few entries.
+ * But we should try to waste as few as possible entries. We are at
+ * about 7 on x86_64 systems.
+ * Add a check for < 10 so that we get heads-up when something
+ * changes and wastes too many entries.
+ */
+ ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
+
+cleanup:
+ get_branch_snapshot__destroy(skel);
+ close_perf_events();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
new file mode 100644
index 000000000000..64a9c95d4acf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_args_test.skel.h"
+
+void test_get_func_args_test(void)
+{
+ struct get_func_args_test *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_args_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
+ return;
+
+ err = get_func_args_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_args_test__attach"))
+ goto cleanup;
+
+ /* This runs bpf_fentry_test* functions and triggers
+ * fentry/fexit programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ /* This runs bpf_modify_return_test function and triggers
+ * fmod_ret_test and fexit_test programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(topts.retval >> 16, 1, "test_run");
+ ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+
+cleanup:
+ get_func_args_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644
index 000000000000..c40242dfa8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+#include "get_func_ip_uprobe_test.skel.h"
+
+static noinline void uprobe_trigger(void)
+{
+}
+
+static void test_function_entry(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ err = get_func_ip_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ skel->bss->uprobe_trigger = (unsigned long) uprobe_trigger;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ prog_fd = bpf_program__fd(skel->progs.test5);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "test_run");
+
+ uprobe_trigger();
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+ ASSERT_EQ(skel->bss->test8_result, 1, "test8_result");
+
+cleanup:
+ get_func_ip_test__destroy(skel);
+}
+
+#ifdef __x86_64__
+extern void uprobe_trigger_body(void);
+asm(
+".globl uprobe_trigger_body\n"
+".type uprobe_trigger_body, @function\n"
+"uprobe_trigger_body:\n"
+" nop\n"
+" ret\n"
+);
+
+static void test_function_body_kprobe(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ LIBBPF_OPTS(bpf_kprobe_opts, kopts);
+ struct bpf_link *link6 = NULL;
+ int err, prog_fd;
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ /* test6 is x86_64 specific and is disabled by default,
+ * enable it for body test.
+ */
+ bpf_program__set_autoload(skel->progs.test6, true);
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ kopts.offset = skel->kconfig->CONFIG_X86_KERNEL_IBT ? 9 : 5;
+
+ link6 = bpf_program__attach_kprobe_opts(skel->progs.test6, "bpf_fentry_test6", &kopts);
+ if (!ASSERT_OK_PTR(link6, "link6"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+
+cleanup:
+ bpf_link__destroy(link6);
+ get_func_ip_test__destroy(skel);
+}
+
+static void test_function_body_uprobe(void)
+{
+ struct get_func_ip_uprobe_test *skel = NULL;
+ int err;
+
+ skel = get_func_ip_uprobe_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_uprobe_test__open_and_load"))
+ return;
+
+ err = get_func_ip_uprobe_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ skel->bss->uprobe_trigger_body = (unsigned long) uprobe_trigger_body;
+
+ uprobe_trigger_body();
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+
+cleanup:
+ get_func_ip_uprobe_test__destroy(skel);
+}
+
+static void test_function_body(void)
+{
+ test_function_body_kprobe();
+ test_function_body_uprobe();
+}
+#else
+#define test_function_body()
+#endif
+
+void test_get_func_ip_test(void)
+{
+ test_function_entry();
+ test_function_body();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..858e0575f502 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -24,12 +24,15 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
{
bool good_kern_stack = false, good_user_stack = false;
const char *nonjit_func = "___bpf_prog_run";
- struct get_stack_trace_t *e = data;
+ /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte
+ * alignment, so copy data into a local variable, for simplicity
+ */
+ struct get_stack_trace_t e;
int i, num_stack;
- static __u64 cnt;
struct ksym *ks;
- cnt++;
+ memset(&e, 0, sizeof(e));
+ memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e));
if (size < sizeof(struct get_stack_trace_t)) {
__u64 *raw_data = data;
@@ -57,19 +60,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
good_user_stack = true;
}
} else {
- num_stack = e->kern_stack_size / sizeof(__u64);
+ num_stack = e.kern_stack_size / sizeof(__u64);
if (env.jit_enabled) {
good_kern_stack = num_stack > 0;
} else {
for (i = 0; i < num_stack; i++) {
- ks = ksym_search(e->kern_stack[i]);
+ ks = ksym_search(e.kern_stack[i]);
if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
good_kern_stack = true;
break;
}
}
}
- if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+ if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0)
good_user_stack = true;
}
@@ -81,11 +84,10 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
void test_get_stack_raw_tp(void)
{
- const char *file = "./test_get_stack_rawtp.o";
- const char *file_err = "./test_get_stack_rawtp_err.o";
- const char *prog_name = "raw_tracepoint/sys_enter";
+ const char *file = "./test_get_stack_rawtp.bpf.o";
+ const char *file_err = "./test_get_stack_rawtp_err.bpf.o";
+ const char *prog_name = "bpf_prog1";
int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
- struct perf_buffer_opts pb_opts = {};
struct perf_buffer *pb = NULL;
struct bpf_link *link = NULL;
struct timespec tv = {0, 10};
@@ -94,15 +96,15 @@ void test_get_stack_raw_tp(void)
struct bpf_map *map;
cpu_set_t cpu_set;
- err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
goto close_prog;
@@ -121,12 +123,12 @@ void test_get_stack_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
- pb_opts.sample_cb = get_stack_print_output;
- pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output,
+ NULL, NULL, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
/* trigger some syscall action */
@@ -141,9 +143,7 @@ void test_get_stack_raw_tp(void)
}
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ bpf_link__destroy(link);
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index d884b2ed5bc5..2715c68301f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -27,7 +27,7 @@ void test_get_stackid_cannot_attach(void)
return;
/* override program type */
- bpf_program__set_perf_event(skel->progs.oncpu);
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
close(pmu_fd);
/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,8 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
- "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
+ bpf_link__destroy(skel->links.oncpu);
close(pmu_fd);
/* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +81,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
close(pmu_fd);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 9efa7e50eab2..fadfb64e2a71 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -29,7 +29,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{ "relocate .rodata reference", 10, ~0 },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
CHECK(err || num != tests[i].num, tests[i].name,
"err %d result %llx expected %llx\n",
@@ -58,7 +58,7 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
{ "relocate .bss reference", 4, "\0\0hello" },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
@@ -92,7 +92,7 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
{ "relocate .data reference", 3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
@@ -103,18 +103,25 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
{
int err = -ENOMEM, map_fd, zero = 0;
- struct bpf_map *map;
+ struct bpf_map *map, *map2;
__u8 *buff;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
- if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+ if (!ASSERT_OK_PTR(map, "map"))
+ return;
+ if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal"))
+ return;
+
+ /* ensure we can lookup internal maps by their ELF names */
+ map2 = bpf_object__find_map_by_name(obj, ".rodata");
+ if (!ASSERT_EQ(map, map2, "same_maps"))
return;
map_fd = bpf_map__fd(map);
if (CHECK_FAIL(map_fd < 0))
return;
- buff = malloc(bpf_map__def(map)->value_size);
+ buff = malloc(bpf_map__value_size(map));
if (buff)
err = bpf_map_update_elem(map_fd, &zero, buff, 0);
free(buff);
@@ -124,25 +131,27 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
void test_global_data(void)
{
- const char *file = "./test_global_data.o";
- __u32 duration = 0, retval;
+ const char *file = "./test_global_data.bpf.o";
struct bpf_object *obj;
int err, prog_fd;
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load program"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "pass global data run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "pass global data run err");
+ ASSERT_OK(topts.retval, "pass global data run retval");
- test_global_data_number(obj, duration);
- test_global_data_string(obj, duration);
- test_global_data_struct(obj, duration);
- test_global_data_rdonly(obj, duration);
+ test_global_data_number(obj, topts.duration);
+ test_global_data_string(obj, topts.duration);
+ test_global_data_struct(obj, topts.duration);
+ test_global_data_rdonly(obj, topts.duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index ee46b11f1f9a..8466332d7406 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -3,7 +3,7 @@
void test_global_data_init(void)
{
- const char *file = "./test_global_data.o";
+ const char *file = "./test_global_data.bpf.o";
int err = -ENOMEM, map_fd, zero = 0;
__u8 *buff = NULL, *newval = NULL;
struct bpf_object *obj;
@@ -16,11 +16,11 @@ void test_global_data_init(void)
if (CHECK_FAIL(err))
return;
- map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ map = bpf_object__find_map_by_name(obj, ".rodata");
if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
goto out;
- sz = bpf_map__def(map)->value_size;
+ sz = bpf_map__value_size(map);
newval = malloc(sz);
if (CHECK_FAIL(!newval))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 8bcc2869102f..d997099f62d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -39,20 +39,22 @@ static void test_global_func_args0(struct bpf_object *obj)
void test_global_func_args(void)
{
- const char *file = "./test_global_func_args.o";
- __u32 retval;
+ const char *file = "./test_global_func_args.bpf.o";
struct bpf_object *obj;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK(err, "load program", "error %d loading %s\n", err, file))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "pass global func args run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
test_global_func_args0(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
new file mode 100644
index 000000000000..65309894b27a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "verifier_global_subprogs.skel.h"
+#include "freplace_dead_global_func.skel.h"
+
+void test_global_func_dead_code(void)
+{
+ struct verifier_global_subprogs *tgt_skel = NULL;
+ struct freplace_dead_global_func *skel = NULL;
+ char log_buf[4096];
+ int err, tgt_fd;
+
+ /* first, try to load target with good global subprog */
+ tgt_skel = verifier_global_subprogs__open();
+ if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open"))
+ return;
+
+ bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true);
+
+ err = verifier_global_subprogs__load(tgt_skel);
+ if (!ASSERT_OK(err, "tgt_skel_good_load"))
+ goto out;
+
+ tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success);
+
+ /* Attach to good non-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_good_open"))
+ goto out;
+
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good");
+ ASSERT_OK(err, "attach_target_good");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_OK(err, "skel_good_load"))
+ goto out;
+
+ freplace_dead_global_func__destroy(skel);
+
+ /* Try attaching to dead code-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_dead_open"))
+ goto out;
+
+ bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf));
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead");
+ ASSERT_OK(err, "attach_target_dead");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_ERR(err, "skel_dead_load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg");
+
+out:
+ verifier_global_subprogs__destroy(tgt_skel);
+ freplace_dead_global_func__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
new file mode 100644
index 000000000000..56b5baef35c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "test_global_map_resize.skel.h"
+#include "test_progs.h"
+
+static void run_prog_bss_array_sum(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void run_prog_data_array_sum(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void global_map_resize_bss_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz, new_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->bss->array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.bss;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->bss->array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = array_len;
+ skel->rodata->data_array_len = 1;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_bss_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_data_subtest(void)
+{
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz, new_sz;
+ int err;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->data_custom->my_array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.data_custom;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->data_custom->my_array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = 1;
+ skel->rodata->data_array_len = array_len;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_data_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_invalid_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ __u32 element_sz, desired_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ return;
+
+ /* attempt to resize a global datasec map to size
+ * which does NOT align with array
+ */
+ map = skel->maps.data_custom;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf"))
+ goto teardown;
+ /* set desired size a fraction of element size beyond an aligned size */
+ element_sz = sizeof(skel->data_custom->my_array[0]);
+ desired_sz = element_sz + element_sz / 2;
+ /* confirm desired size does NOT align with array */
+ if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map whose only var is NOT an array */
+ map = skel->maps.data_non_array;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf"))
+ goto teardown;
+ /* set desired size to arbitrary value */
+ desired_sz = 1024;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map
+ * whose last var is NOT an array
+ */
+ map = skel->maps.data_array_not_last;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf"))
+ goto teardown;
+ /* set desired size to a multiple of element size */
+ element_sz = sizeof(skel->data_array_not_last->my_array_first[0]);
+ desired_sz = element_sz * 8;
+ /* confirm desired size aligns with array */
+ if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+void test_global_map_resize(void)
+{
+ if (test__start_subtest("global_map_resize_bss"))
+ global_map_resize_bss_subtest();
+
+ if (test__start_subtest("global_map_resize_data"))
+ global_map_resize_data_subtest();
+
+ if (test__start_subtest("global_map_resize_invalid"))
+ global_map_resize_invalid_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..d358a223fd2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -7,17 +7,18 @@
*/
#include "test_progs.h"
#include "bpf/hashmap.h"
+#include <stddef.h>
static int duration = 0;
-static size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(long k, void *ctx)
{
- return (long)k;
+ return k;
}
-static bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(long a, long b, void *ctx)
{
- return (long)a == (long)b;
+ return a == b;
}
static inline size_t next_pow_2(size_t n)
@@ -48,13 +49,12 @@ static void test_hashmap_generic(void)
struct hashmap *map;
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(1024 + i);
+ long oldk, k = i;
+ long oldv, v = 1024 + i;
err = hashmap__update(map, k, v, &oldk, &oldv);
if (CHECK(err != -ENOENT, "hashmap__update",
@@ -65,20 +65,18 @@ static void test_hashmap_generic(void)
err = hashmap__add(map, k, v);
} else {
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
- "unexpected k/v: %p=%p\n", oldk, oldv))
+ if (CHECK(oldk != 0 || oldv != 0, "check_kv",
+ "unexpected k/v: %ld=%ld\n", oldk, oldv))
goto cleanup;
}
- if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
- if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -92,8 +90,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 1024, "check_kv",
@@ -105,8 +103,8 @@ static void test_hashmap_generic(void)
goto cleanup;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(256 + i);
+ long oldk, k = i;
+ long oldv, v = 256 + i;
err = hashmap__add(map, k, v);
if (CHECK(err != -EEXIST, "hashmap__add",
@@ -120,13 +118,13 @@ static void test_hashmap_generic(void)
if (CHECK(err, "elem_upd",
"failed to update k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -140,8 +138,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 256, "elem_check",
@@ -153,7 +151,7 @@ static void test_hashmap_generic(void)
goto cleanup;
found_cnt = 0;
- hashmap__for_each_key_entry(map, entry, (void *)0) {
+ hashmap__for_each_key_entry(map, entry, 0) {
found_cnt++;
}
if (CHECK(!found_cnt, "found_cnt",
@@ -162,27 +160,25 @@ static void test_hashmap_generic(void)
found_msk = 0;
found_cnt = 0;
- hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
- const void *oldk, *k;
- void *oldv, *v;
+ hashmap__for_each_key_entry_safe(map, entry, tmp, 0) {
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "check_old",
"invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)oldk, (long)oldv))
+ "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv))
goto cleanup;
}
@@ -199,26 +195,24 @@ static void test_hashmap_generic(void)
goto cleanup;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- const void *oldk, *k;
- void *oldv, *v;
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "elem_check",
"invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)k, (long)v))
+ "unexpectedly deleted k/v %ld = %ld\n", k, v))
goto cleanup;
}
@@ -236,7 +230,7 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
goto cleanup;
}
@@ -244,22 +238,107 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
+ goto cleanup;
+ }
+
+cleanup:
+ hashmap__free(map);
+}
+
+static size_t str_hash_fn(long a, void *ctx)
+{
+ return str_hash((char *)a);
+}
+
+static bool str_equal_fn(long a, long b, void *ctx)
+{
+ return strcmp((char *)a, (char *)b) == 0;
+}
+
+/* Verify that hashmap interface works with pointer keys and values */
+static void test_hashmap_ptr_iface(void)
+{
+ const char *key, *value, *old_key, *old_value;
+ struct hashmap_entry *cur;
+ struct hashmap *map;
+ int err, i, bkt;
+
+ map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n"))
goto cleanup;
+
+#define CHECK_STR(fn, var, expected) \
+ CHECK(strcmp(var, (expected)), (fn), \
+ "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected))
+
+ err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "a");
+ CHECK_STR("hashmap__update", old_value, "apricot");
+
+ err = hashmap__add(map, "b", "banana");
+ if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value);
+ if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__set", old_key, "b");
+ CHECK_STR("hashmap__set", old_value, "banana");
+
+ err = hashmap__update(map, "b", "blueberry", &old_key, &old_value);
+ if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "b");
+ CHECK_STR("hashmap__update", old_value, "breadfruit");
+
+ err = hashmap__append(map, "c", "cherry");
+ if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value),
+ "hashmap__delete", "expected to have entry for 'c'\n"))
+ goto cleanup;
+ CHECK_STR("hashmap__delete", old_key, "c");
+ CHECK_STR("hashmap__delete", old_value, "cherry");
+
+ CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n");
+ CHECK_STR("hashmap__find", value, "blueberry");
+
+ if (CHECK(!hashmap__delete(map, "b", NULL, NULL),
+ "hashmap__delete", "expected to have entry for 'b'\n"))
+ goto cleanup;
+
+ i = 0;
+ hashmap__for_each_entry(map, cur, bkt) {
+ if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries"))
+ goto cleanup;
+ key = cur->pkey;
+ value = cur->pvalue;
+ CHECK_STR("entry", key, "a");
+ CHECK_STR("entry", value, "apple");
+ i++;
}
+#undef CHECK_STR
cleanup:
hashmap__free(map);
}
-static size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(long k, void *ctx)
{
return 0;
}
static void test_hashmap_multimap(void)
{
- void *k1 = (void *)0, *k2 = (void *)1;
+ long k1 = 0, k2 = 1;
struct hashmap_entry *entry;
struct hashmap *map;
long found_msk;
@@ -267,31 +346,30 @@ static void test_hashmap_multimap(void)
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
/* set up multimap:
* [0] -> 1, 2, 4;
* [1] -> 8, 16, 32;
*/
- err = hashmap__append(map, k1, (void *)1);
+ err = hashmap__append(map, k1, 1);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)2);
+ err = hashmap__append(map, k1, 2);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)4);
+ err = hashmap__append(map, k1, 4);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)8);
+ err = hashmap__append(map, k2, 8);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)16);
+ err = hashmap__append(map, k2, 16);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)32);
+ err = hashmap__append(map, k2, 32);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
@@ -302,7 +380,7 @@ static void test_hashmap_multimap(void)
/* verify global iteration still works and sees all values */
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
"not all keys iterated: %lx\n", found_msk))
@@ -311,7 +389,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 1 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k1) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
"invalid k1 values: %lx\n", found_msk))
@@ -320,7 +398,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 2 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k2) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
"invalid k2 values: %lx\n", found_msk))
@@ -335,12 +413,11 @@ static void test_hashmap_empty()
struct hashmap_entry *entry;
int bkt;
struct hashmap *map;
- void *k = (void *)0;
+ long k = 0;
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
goto cleanup;
if (CHECK(hashmap__size(map) != 0, "hashmap__size",
@@ -377,4 +454,6 @@ void test_hashmap()
test_hashmap_multimap();
if (test__start_subtest("empty"))
test_hashmap_empty();
+ if (test__start_subtest("ptr_iface"))
+ test_hashmap_ptr_iface();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
new file mode 100644
index 000000000000..0354f9b82c65
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_helper_restricted.skel.h"
+
+void test_helper_restricted(void)
+{
+ int prog_i = 0, prog_cnt;
+
+ do {
+ struct test_helper_restricted *test;
+ int err;
+
+ test = test_helper_restricted__open();
+ if (!ASSERT_OK_PTR(test, "open"))
+ return;
+
+ prog_cnt = test->skeleton->prog_cnt;
+
+ for (int j = 0; j < prog_cnt; ++j) {
+ struct bpf_program *prog = *test->skeleton->progs[j].prog;
+
+ bpf_program__set_autoload(prog, true);
+ }
+
+ err = test_helper_restricted__load(test);
+ ASSERT_ERR(err, "load_should_fail");
+
+ test_helper_restricted__destroy(test);
+ } while (++prog_i < prog_cnt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
new file mode 100644
index 000000000000..a742dd994d60
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_reuse.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ bool stop;
+};
+
+struct htab_val {
+ unsigned int lock;
+ unsigned int data;
+};
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ struct htab_val value;
+ unsigned int key;
+
+ /* Use BPF_F_LOCK to use spin-lock in map value. */
+ key = 7;
+ bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK);
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ struct htab_val value;
+ unsigned int key;
+
+ key = 7;
+ value.lock = 0;
+ value.data = key;
+ bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+ bpf_map_delete_elem(ctx->fd, &key);
+
+ key = 24;
+ value.lock = 0;
+ value.data = key;
+ bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+ bpf_map_delete_elem(ctx->fd, &key);
+ }
+
+ return NULL;
+}
+
+void test_htab_reuse(void)
+{
+ unsigned int i, wr_nr = 1, rd_nr = 4;
+ pthread_t tids[wr_nr + rd_nr];
+ struct htab_reuse *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = htab_reuse__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.htab);
+ ctx.loop = 500;
+ ctx.stop = false;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ if (!tids[i])
+ continue;
+ pthread_join(tids[i], NULL);
+ }
+ htab_reuse__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
new file mode 100644
index 000000000000..d0b405eb2966
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_update.skel.h"
+
+struct htab_update_ctx {
+ int fd;
+ int loop;
+ bool stop;
+};
+
+static void test_reenter_update(void)
+{
+ struct htab_update *skel;
+ void *value = NULL;
+ unsigned int key, value_size;
+ int err;
+
+ skel = htab_update__open();
+ if (!ASSERT_OK_PTR(skel, "htab_update__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
+ err = htab_update__load(skel);
+ if (!ASSERT_TRUE(!err, "htab_update__load") || err)
+ goto out;
+
+ skel->bss->pid = getpid();
+ err = htab_update__attach(skel);
+ if (!ASSERT_OK(err, "htab_update__attach"))
+ goto out;
+
+ value_size = bpf_map__value_size(skel->maps.htab);
+
+ value = calloc(1, value_size);
+ if (!ASSERT_OK_PTR(value, "calloc value"))
+ goto out;
+ /*
+ * First update: plain insert. This should NOT trigger the re-entrancy
+ * path, because there is no old element to free yet.
+ */
+ key = 0;
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "first update (insert)"))
+ goto out;
+
+ /*
+ * Second update: replace existing element with same key and trigger
+ * the reentrancy of bpf_map_update_elem().
+ * check_and_free_fields() calls bpf_obj_free_fields() on the old
+ * value, which is where fentry program runs and performs a nested
+ * bpf_map_update_elem(), triggering -EDEADLK.
+ */
+ memset(value, 0, value_size);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "second update (replace)"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
+out:
+ htab_update__destroy(skel);
+}
+
+static void *htab_update_thread(void *arg)
+{
+ struct htab_update_ctx *ctx = arg;
+ cpu_set_t cpus;
+ int i;
+
+ /* Pinned on CPU 0 */
+ CPU_ZERO(&cpus);
+ CPU_SET(0, &cpus);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus);
+
+ i = 0;
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int key = 0, value = 0;
+ int err;
+
+ err = bpf_map_update_elem(ctx->fd, &key, &value, 0);
+ if (err) {
+ ctx->stop = true;
+ return (void *)(long)err;
+ }
+ }
+
+ return NULL;
+}
+
+static void test_concurrent_update(void)
+{
+ struct htab_update_ctx ctx;
+ struct htab_update *skel;
+ unsigned int i, nr;
+ pthread_t *tids;
+ int err;
+
+ skel = htab_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "htab_update__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.htab);
+ ctx.loop = 1000;
+ ctx.stop = false;
+
+ nr = 4;
+ tids = calloc(nr, sizeof(*tids));
+ if (!ASSERT_NEQ(tids, NULL, "no mem"))
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_thread, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ unsigned int j;
+
+ ctx.stop = true;
+ for (j = 0; j < i; j++)
+ pthread_join(tids[j], NULL);
+ goto out;
+ }
+ }
+
+ for (i = 0; i < nr; i++) {
+ void *thread_err = NULL;
+
+ pthread_join(tids[i], &thread_err);
+ ASSERT_EQ(thread_err, NULL, "update error");
+ }
+
+out:
+ if (tids)
+ free(tids);
+ htab_update__destroy(skel);
+}
+
+void test_htab_update(void)
+{
+ if (test__start_subtest("reenter_update"))
+ test_reenter_update();
+ if (test__start_subtest("concurrent_update"))
+ test_concurrent_update();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
new file mode 100644
index 000000000000..9ab4cd195108
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "inner_array_lookup.skel.h"
+
+void test_inner_array_lookup(void)
+{
+ int map1_fd, err;
+ int key = 3;
+ int val = 1;
+ struct inner_array_lookup *skel;
+
+ skel = inner_array_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load_skeleton"))
+ return;
+
+ err = inner_array_lookup__attach(skel);
+ if (!ASSERT_OK(err, "skeleton_attach"))
+ goto cleanup;
+
+ map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ bpf_map_update_elem(map1_fd, &key, &val, 0);
+
+ /* Probe should have set the element at index 3 to 2 */
+ bpf_map_lookup_elem(map1_fd, &key, &val);
+ ASSERT_EQ(val, 2, "value_is_2");
+
+cleanup:
+ inner_array_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
new file mode 100644
index 000000000000..4ddb8a5fece8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <net/if.h>
+#include <linux/netfilter.h>
+#include <network_helpers.h>
+#include "ip_check_defrag.skel.h"
+#include "ip_check_defrag_frags.h"
+
+/*
+ * This selftest spins up a client and an echo server, each in their own
+ * network namespace. The client will send a fragmented message to the server.
+ * The prog attached to the server will shoot down any fragments. Thus, if
+ * the server is able to correctly echo back the message to the client, we will
+ * have verified that netfilter is reassembling packets for us.
+ *
+ * Topology:
+ * =========
+ * NS0 | NS1
+ * |
+ * client | server
+ * ---------- | ----------
+ * | veth0 | --------- | veth1 |
+ * ---------- peer ----------
+ * |
+ * | with bpf
+ */
+
+#define NS0 "defrag_ns0"
+#define NS1 "defrag_ns1"
+#define VETH0 "veth0"
+#define VETH1 "veth1"
+#define VETH0_ADDR "172.16.1.100"
+#define VETH0_ADDR6 "fc00::100"
+/* The following constants must stay in sync with `generate_udp_fragments.py` */
+#define VETH1_ADDR "172.16.1.200"
+#define VETH1_ADDR6 "fc00::200"
+#define CLIENT_PORT 48878
+#define SERVER_PORT 48879
+#define MAGIC_MESSAGE "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME"
+
+static int setup_topology(bool ipv6)
+{
+ bool up;
+ int i;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip link add " VETH0 " netns " NS0 " type veth peer name " VETH1 " netns " NS1);
+ if (ipv6) {
+ SYS(fail, "ip -6 -net " NS0 " addr add " VETH0_ADDR6 "/64 dev " VETH0 " nodad");
+ SYS(fail, "ip -6 -net " NS1 " addr add " VETH1_ADDR6 "/64 dev " VETH1 " nodad");
+ } else {
+ SYS(fail, "ip -net " NS0 " addr add " VETH0_ADDR "/24 dev " VETH0);
+ SYS(fail, "ip -net " NS1 " addr add " VETH1_ADDR "/24 dev " VETH1);
+ }
+ SYS(fail, "ip -net " NS0 " link set dev " VETH0 " up");
+ SYS(fail, "ip -net " NS1 " link set dev " VETH1 " up");
+
+ /* Wait for up to 5s for links to come up */
+ for (i = 0; i < 5; ++i) {
+ if (ipv6)
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6);
+ else
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR);
+
+ if (up)
+ break;
+ }
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void cleanup_topology(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+}
+
+static int attach(struct ip_check_defrag *skel, bool ipv6)
+{
+ LIBBPF_OPTS(bpf_netfilter_opts, opts,
+ .pf = ipv6 ? NFPROTO_IPV6 : NFPROTO_IPV4,
+ .priority = 42,
+ .flags = BPF_F_NETFILTER_IP_DEFRAG);
+ struct nstoken *nstoken;
+ int err = -1;
+
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto out;
+
+ skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts);
+ if (!ASSERT_OK_PTR(skel->links.defrag, "program attach"))
+ goto out;
+
+ err = 0;
+out:
+ close_netns(nstoken);
+ return err;
+}
+
+static int send_frags(int client)
+{
+ struct sockaddr_storage saddr;
+ struct sockaddr *saddr_p;
+ socklen_t saddr_len;
+ int err;
+
+ saddr_p = (struct sockaddr *)&saddr;
+ err = make_sockaddr(AF_INET, VETH1_ADDR, SERVER_PORT, &saddr, &saddr_len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ return -1;
+
+ err = sendto(client, frag_0, sizeof(frag_0), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_0"))
+ return -1;
+
+ err = sendto(client, frag_1, sizeof(frag_1), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_1"))
+ return -1;
+
+ err = sendto(client, frag_2, sizeof(frag_2), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_2"))
+ return -1;
+
+ return 0;
+}
+
+static int send_frags6(int client)
+{
+ struct sockaddr_storage saddr;
+ struct sockaddr *saddr_p;
+ socklen_t saddr_len;
+ int err;
+
+ saddr_p = (struct sockaddr *)&saddr;
+ /* Port needs to be set to 0 for raw ipv6 socket for some reason */
+ err = make_sockaddr(AF_INET6, VETH1_ADDR6, 0, &saddr, &saddr_len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ return -1;
+
+ err = sendto(client, frag6_0, sizeof(frag6_0), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_0"))
+ return -1;
+
+ err = sendto(client, frag6_1, sizeof(frag6_1), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_1"))
+ return -1;
+
+ err = sendto(client, frag6_2, sizeof(frag6_2), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_2"))
+ return -1;
+
+ return 0;
+}
+
+void test_bpf_ip_check_defrag_ok(bool ipv6)
+{
+ int family = ipv6 ? AF_INET6 : AF_INET;
+ struct network_helper_opts rx_opts = {
+ .timeout_ms = 1000,
+ };
+ struct network_helper_opts tx_ops = {
+ .timeout_ms = 1000,
+ .proto = IPPROTO_RAW,
+ };
+ struct sockaddr_storage caddr;
+ struct ip_check_defrag *skel;
+ struct nstoken *nstoken;
+ int client_tx_fd = -1;
+ int client_rx_fd = -1;
+ socklen_t caddr_len;
+ int srv_fd = -1;
+ char buf[1024];
+ int len, err;
+
+ skel = ip_check_defrag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(setup_topology(ipv6), "setup_topology"))
+ goto out;
+
+ if (!ASSERT_OK(attach(skel, ipv6), "attach"))
+ goto out;
+
+ /* Start server in ns1 */
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns1"))
+ goto out;
+ srv_fd = start_server(family, SOCK_DGRAM, NULL, SERVER_PORT, 0);
+ close_netns(nstoken);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto out;
+
+ /* Open tx raw socket in ns0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ client_tx_fd = client_socket(family, SOCK_RAW, &tx_ops);
+ close_netns(nstoken);
+ if (!ASSERT_GE(client_tx_fd, 0, "client_socket"))
+ goto out;
+
+ /* Open rx socket in ns0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ client_rx_fd = client_socket(family, SOCK_DGRAM, &rx_opts);
+ close_netns(nstoken);
+ if (!ASSERT_GE(client_rx_fd, 0, "client_socket"))
+ goto out;
+
+ /* Bind rx socket to a premeditated port */
+ memset(&caddr, 0, sizeof(caddr));
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ if (ipv6) {
+ struct sockaddr_in6 *c = (struct sockaddr_in6 *)&caddr;
+
+ c->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, VETH0_ADDR6, &c->sin6_addr);
+ c->sin6_port = htons(CLIENT_PORT);
+ err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c));
+ } else {
+ struct sockaddr_in *c = (struct sockaddr_in *)&caddr;
+
+ c->sin_family = AF_INET;
+ inet_pton(AF_INET, VETH0_ADDR, &c->sin_addr);
+ c->sin_port = htons(CLIENT_PORT);
+ err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c));
+ }
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "bind"))
+ goto out;
+
+ /* Send message in fragments */
+ if (ipv6) {
+ if (!ASSERT_OK(send_frags6(client_tx_fd), "send_frags6"))
+ goto out;
+ } else {
+ if (!ASSERT_OK(send_frags(client_tx_fd), "send_frags"))
+ goto out;
+ }
+
+ if (!ASSERT_EQ(skel->bss->shootdowns, 0, "shootdowns"))
+ goto out;
+
+ /* Receive reassembled msg on server and echo back to client */
+ caddr_len = sizeof(caddr);
+ len = recvfrom(srv_fd, buf, sizeof(buf), 0, (struct sockaddr *)&caddr, &caddr_len);
+ if (!ASSERT_GE(len, 0, "server recvfrom"))
+ goto out;
+ len = sendto(srv_fd, buf, len, 0, (struct sockaddr *)&caddr, caddr_len);
+ if (!ASSERT_GE(len, 0, "server sendto"))
+ goto out;
+
+ /* Expect reassembed message to be echoed back */
+ len = recvfrom(client_rx_fd, buf, sizeof(buf), 0, NULL, NULL);
+ if (!ASSERT_EQ(len, sizeof(MAGIC_MESSAGE) - 1, "client short read"))
+ goto out;
+
+out:
+ if (client_rx_fd != -1)
+ close(client_rx_fd);
+ if (client_tx_fd != -1)
+ close(client_tx_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ cleanup_topology();
+ ip_check_defrag__destroy(skel);
+}
+
+void test_bpf_ip_check_defrag(void)
+{
+ if (test__start_subtest("v4"))
+ test_bpf_ip_check_defrag_ok(false);
+ if (test__start_subtest("v6"))
+ test_bpf_ip_check_defrag_ok(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
new file mode 100644
index 000000000000..3cea71f9c500
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "iters.skel.h"
+#include "iters_state_safety.skel.h"
+#include "iters_looping.skel.h"
+#include "iters_num.skel.h"
+#include "iters_testmod.skel.h"
+#include "iters_testmod_seq.skel.h"
+#include "iters_task_vma.skel.h"
+#include "iters_task.skel.h"
+#include "iters_css_task.skel.h"
+#include "iters_css.skel.h"
+#include "iters_task_failure.skel.h"
+
+static void subtest_num_iters(void)
+{
+ struct iters_num *skel;
+ int err;
+
+ skel = iters_num__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_num__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_num__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty_zero);
+ VALIDATE_CASE(empty_int_min);
+ VALIDATE_CASE(empty_int_max);
+ VALIDATE_CASE(empty_minus_one);
+
+ VALIDATE_CASE(simple_sum);
+ VALIDATE_CASE(neg_sum);
+ VALIDATE_CASE(very_neg_sum);
+ VALIDATE_CASE(neg_pos_sum);
+
+ VALIDATE_CASE(invalid_range);
+ VALIDATE_CASE(max_range);
+ VALIDATE_CASE(e2big_range);
+
+ VALIDATE_CASE(succ_elem_cnt);
+ VALIDATE_CASE(overfetched_elem_cnt);
+ VALIDATE_CASE(fail_elem_cnt);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_num__destroy(skel);
+}
+
+static void subtest_testmod_seq_iters(void)
+{
+ struct iters_testmod_seq *skel;
+ int err;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ skel = iters_testmod_seq__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_testmod_seq__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_testmod_seq__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty);
+ VALIDATE_CASE(full);
+ VALIDATE_CASE(truncated);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_testmod_seq__destroy(skel);
+}
+
+static void subtest_task_vma_iters(void)
+{
+ unsigned long start, end, bpf_iter_start, bpf_iter_end;
+ struct iters_task_vma *skel;
+ char rest_of_line[1000];
+ unsigned int seen;
+ FILE *f = NULL;
+ int err;
+
+ skel = iters_task_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = iters_task_vma__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ getpgid(skel->bss->target_pid);
+ iters_task_vma__detach(skel);
+
+ if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero"))
+ goto cleanup;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!ASSERT_OK_PTR(f, "proc_maps_fopen"))
+ goto cleanup;
+
+ seen = 0;
+ while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) {
+ /* [vsyscall] vma isn't _really_ part of task->mm vmas.
+ * /proc/PID/maps returns it when out of vmas - see get_gate_vma
+ * calls in fs/proc/task_mmu.c
+ */
+ if (strstr(rest_of_line, "[vsyscall]"))
+ continue;
+
+ bpf_iter_start = skel->bss->vm_ranges[seen].vm_start;
+ bpf_iter_end = skel->bss->vm_ranges[seen].vm_end;
+
+ ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match");
+ ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match");
+ seen++;
+ }
+
+ if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ iters_task_vma__destroy(skel);
+}
+
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+ pthread_mutex_lock(&do_nothing_mutex);
+ pthread_mutex_unlock(&do_nothing_mutex);
+
+ pthread_exit(arg);
+}
+
+#define thread_num 2
+
+static void subtest_task_iters(void)
+{
+ struct iters_task *skel = NULL;
+ pthread_t thread_ids[thread_num];
+ void *ret;
+ int err;
+
+ skel = iters_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+ skel->bss->target_pid = getpid();
+ err = iters_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ pthread_mutex_lock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_create(&thread_ids[i], NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ syscall(SYS_getpgid);
+ iters_task__detach(skel);
+ ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
+ ASSERT_EQ(skel->bss->threads_cnt, thread_num + 2, "threads_cnt");
+ ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 2, "proc_threads_cnt");
+ ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
+ pthread_mutex_unlock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
+cleanup:
+ iters_task__destroy(skel);
+}
+
+extern int stack_mprotect(void);
+
+static void subtest_css_task_iters(void)
+{
+ struct iters_css_task *skel = NULL;
+ int err, cg_fd, cg_id;
+ const char *cgrp_path = "/cg1";
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ cg_fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GE(cg_fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ cg_id = get_cgroup_id(cgrp_path);
+ err = join_cgroup(cgrp_path);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel = iters_css_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->cg_id = cg_id;
+ err = iters_css_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ err = stack_mprotect();
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ goto cleanup;
+ iters_css_task__detach(skel);
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css_task__destroy(skel);
+}
+
+static void subtest_css_iters(void)
+{
+ struct iters_css *skel = NULL;
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int err, cg_nr = ARRAY_SIZE(cgs);
+ int i;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (!ASSERT_GE(cgs[i].fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ }
+
+ skel = iters_css__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->root_cg_id = get_cgroup_id(cgs[0].path);
+ skel->bss->leaf_cg_id = get_cgroup_id(cgs[cg_nr - 1].path);
+ err = iters_css__attach(skel);
+
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+
+ syscall(SYS_getpgid);
+ ASSERT_EQ(skel->bss->pre_order_cnt, cg_nr, "pre_order_cnt");
+ ASSERT_EQ(skel->bss->first_cg_id, get_cgroup_id(cgs[0].path), "first_cg_id");
+
+ ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
+ ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
+ ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+ iters_css__detach(skel);
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css__destroy(skel);
+}
+
+void test_iters(void)
+{
+ RUN_TESTS(iters_state_safety);
+ RUN_TESTS(iters_looping);
+ RUN_TESTS(iters);
+ RUN_TESTS(iters_css_task);
+
+ if (env.has_testmod) {
+ RUN_TESTS(iters_testmod);
+ RUN_TESTS(iters_testmod_seq);
+ }
+
+ if (test__start_subtest("num"))
+ subtest_num_iters();
+ if (test__start_subtest("testmod_seq"))
+ subtest_testmod_seq_iters();
+ if (test__start_subtest("task_vma"))
+ subtest_task_vma_iters();
+ if (test__start_subtest("task"))
+ subtest_task_iters();
+ if (test__start_subtest("css_task"))
+ subtest_css_task_iters();
+ if (test__start_subtest("css"))
+ subtest_css_iters();
+ RUN_TESTS(iters_task_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
new file mode 100644
index 000000000000..3add34df5767
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "jeq_infer_not_null_fail.skel.h"
+
+void test_jeq_infer_not_null(void)
+{
+ RUN_TESTS(jeq_infer_not_null_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
new file mode 100644
index 000000000000..5639428607e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "jit_probe_mem.skel.h"
+
+void test_jit_probe_mem(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct jit_probe_mem *skel;
+ int ret;
+
+ skel = jit_probe_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "jit_probe_mem__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_jit_probe_mem), &opts);
+ ASSERT_OK(ret, "jit_probe_mem ret");
+ ASSERT_OK(opts.retval, "jit_probe_mem opts.retval");
+ ASSERT_EQ(skel->data->total_sum, 192, "jit_probe_mem total_sum");
+
+ jit_probe_mem__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
new file mode 100644
index 000000000000..97b00c7efe94
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Microsoft */
+#include <test_progs.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "test_kernel_flag.skel.h"
+
+void test_kernel_flag(void)
+{
+ struct test_kernel_flag *lsm_skel;
+ struct kfunc_call_test *skel = NULL;
+ struct kfunc_call_test_lskel *lskel = NULL;
+ int ret;
+
+ lsm_skel = test_kernel_flag__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
+ return;
+
+ lsm_skel->bss->monitored_tid = sys_gettid();
+
+ ret = test_kernel_flag__attach(lsm_skel);
+ if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
+ goto close_prog;
+
+ /* Test with skel. This should pass the gatekeeper */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto close_prog;
+
+ /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_ERR_PTR(lskel, "lskel"))
+ goto close_prog;
+
+close_prog:
+ if (skel)
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+
+ lsm_skel->bss->monitored_tid = 0;
+ test_kernel_flag__destroy(lsm_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 42c3a3103c26..34f8822fd221 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
#include <test_progs.h>
#include <network_helpers.h>
+#include "kfree_skb.skel.h"
struct meta {
int ifindex;
@@ -35,7 +37,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
"cb32_0 %x != %x\n",
meta->cb32_0, cb.cb32[0]))
return;
- if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+ if (CHECK(pkt_v6->eth.h_proto != htons(ETH_P_IPV6), "check_eth",
"h_proto %x\n", pkt_v6->eth.h_proto))
return;
if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
@@ -48,83 +50,60 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
*(bool *)ctx = true;
}
-void test_kfree_skb(void)
+/* TODO: fix kernel panic caused by this test in parallel mode */
+void serial_test_kfree_skb(void)
{
struct __sk_buff skb = {};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v6,
.data_size_in = sizeof(pkt_v6),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
- };
- struct bpf_prog_load_attr attr = {
- .file = "./kfree_skb.o",
- };
-
- struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL;
- struct bpf_map *perf_buf_map, *global_data;
- struct bpf_program *prog, *fentry, *fexit;
- struct bpf_object *obj, *obj2 = NULL;
- struct perf_buffer_opts pb_opts = {};
+ );
+ struct kfree_skb *skel = NULL;
+ struct bpf_link *link;
+ struct bpf_object *obj;
struct perf_buffer *pb = NULL;
- int err, kfree_skb_fd;
+ int err, prog_fd;
bool passed = false;
__u32 duration = 0;
const int zero = 0;
bool test_ok[2];
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &tattr.prog_fd);
+ err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
- if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ skel = kfree_skb__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kfree_skb_skel"))
goto close_prog;
- prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
- if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
- goto close_prog;
- fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans");
- if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
- fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans");
- if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
-
- global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss");
- if (CHECK(!global_data, "find global data", "not found\n"))
+ link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
+ skel->links.trace_kfree_skb = link;
- link = bpf_program__attach_raw_tracepoint(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
- goto close_prog;
- link_fentry = bpf_program__attach_trace(fentry);
- if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
- PTR_ERR(link_fentry)))
- goto close_prog;
- link_fexit = bpf_program__attach_trace(fexit);
- if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
- PTR_ERR(link_fexit)))
+ link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fentry"))
goto close_prog;
+ skel->links.fentry_eth_type_trans = link;
- perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fexit"))
goto close_prog;
+ skel->links.fexit_eth_type_trans = link;
/* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &passed, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
- err = bpf_prog_test_run_xattr(&tattr);
- duration = tattr.duration;
- CHECK(err || tattr.retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, tattr.retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test_run retval");
/* read perf buffer */
err = perf_buffer__poll(pb, 100);
@@ -134,9 +113,9 @@ void test_kfree_skb(void)
/* make sure kfree_skb program was triggered
* and it sent expected skb into ring buffer
*/
- CHECK_FAIL(!passed);
+ ASSERT_TRUE(passed, "passed");
- err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
@@ -144,12 +123,6 @@ void test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(link_fentry))
- bpf_link__destroy(link_fentry);
- if (!IS_ERR_OR_NULL(link_fexit))
- bpf_link__destroy(link_fexit);
bpf_object__close(obj);
- bpf_object__close(obj2);
+ kfree_skb__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..f79c8e53cb3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_fail.skel.h"
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "kfunc_call_test_subprog.skel.h"
+#include "kfunc_call_test_subprog.lskel.h"
+#include "kfunc_call_destructive.skel.h"
+
+#include "cap_helpers.h"
+
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+enum kfunc_test_type {
+ tc_test = 0,
+ syscall_test,
+ syscall_null_ctx_test,
+};
+
+struct kfunc_test_params {
+ const char *prog_name;
+ unsigned long lskel_prog_desc_offset;
+ int retval;
+ enum kfunc_test_type test_type;
+ const char *expected_err_msg;
+};
+
+#define __BPF_TEST_SUCCESS(name, __retval, type) \
+ { \
+ .prog_name = #name, \
+ .lskel_prog_desc_offset = offsetof(struct kfunc_call_test_lskel, progs.name), \
+ .retval = __retval, \
+ .test_type = type, \
+ .expected_err_msg = NULL, \
+ }
+
+#define __BPF_TEST_FAIL(name, __retval, type, error_msg) \
+ { \
+ .prog_name = #name, \
+ .lskel_prog_desc_offset = 0 /* unused when test is failing */, \
+ .retval = __retval, \
+ .test_type = type, \
+ .expected_err_msg = error_msg, \
+ }
+
+#define TC_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, tc_test)
+#define SYSCALL_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_test)
+#define SYSCALL_NULL_CTX_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_null_ctx_test)
+
+#define TC_FAIL(name, retval, error_msg) __BPF_TEST_FAIL(name, retval, tc_test, error_msg)
+#define SYSCALL_NULL_CTX_FAIL(name, retval, error_msg) \
+ __BPF_TEST_FAIL(name, retval, syscall_null_ctx_test, error_msg)
+
+static struct kfunc_test_params kfunc_tests[] = {
+ /* failure cases:
+ * if retval is 0 -> the program will fail to load and the error message is an error
+ * if retval is not 0 -> the program can be loaded but running it will gives the
+ * provided return value. The error message is thus the one
+ * from a successful load
+ */
+ SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_fail, -EINVAL, "processed 4 insns"),
+ SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_null_fail, -EINVAL, "processed 4 insns"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_rdonly, 0, "R0 cannot write into rdonly_mem"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_use_after_free, 0, "invalid mem access 'scalar'"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_oob, 0, "min value is outside of the allowed memory range"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_not_const, 0, "is not a const"),
+ TC_FAIL(kfunc_call_test_mem_acquire_fail, 0, "acquire kernel function does not return PTR_TO_BTF_ID"),
+ TC_FAIL(kfunc_call_test_pointer_arg_type_mismatch, 0, "arg#0 expected pointer to ctx, but got scalar"),
+
+ /* success cases */
+ TC_TEST(kfunc_call_test1, 12),
+ TC_TEST(kfunc_call_test2, 3),
+ TC_TEST(kfunc_call_test4, -1234),
+ TC_TEST(kfunc_call_test_ref_btf_id, 0),
+ TC_TEST(kfunc_call_test_get_mem, 42),
+ SYSCALL_TEST(kfunc_syscall_test, 0),
+ SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
+ TC_TEST(kfunc_call_test_static_unused_arg, 0),
+ TC_TEST(kfunc_call_ctx, 0),
+};
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+static void verify_success(struct kfunc_test_params *param)
+{
+ struct kfunc_call_test_lskel *lskel = NULL;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_desc *lskel_prog;
+ struct kfunc_call_test *skel;
+ struct bpf_program *prog;
+ int prog_fd, err;
+ struct syscall_test_args args = {
+ .size = 10,
+ };
+
+ switch (param->test_type) {
+ case syscall_test:
+ topts.ctx_in = &args;
+ topts.ctx_size_in = sizeof(args);
+ /* fallthrough */
+ case syscall_null_ctx_test:
+ break;
+ case tc_test:
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
+ topts.repeat = 1;
+ break;
+ }
+
+ /* first test with normal libbpf */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, param->prog_name))
+ goto cleanup;
+
+ if (!ASSERT_EQ(topts.retval, param->retval, "retval"))
+ goto cleanup;
+
+ /* second test with light skeletons */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(lskel, "lskel"))
+ goto cleanup;
+
+ lskel_prog = (struct bpf_prog_desc *)((char *)lskel + param->lskel_prog_desc_offset);
+
+ prog_fd = lskel_prog->prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, param->prog_name))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, param->retval, "retval");
+
+cleanup:
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+}
+
+static void verify_fail(struct kfunc_test_params *param)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_program *prog;
+ struct kfunc_call_fail *skel;
+ int prog_fd, err;
+ struct syscall_test_args args = {
+ .size = 10,
+ };
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 1;
+
+ switch (param->test_type) {
+ case syscall_test:
+ topts.ctx_in = &args;
+ topts.ctx_size_in = sizeof(args);
+ /* fallthrough */
+ case syscall_null_ctx_test:
+ break;
+ case tc_test:
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
+ topts.repeat = 1;
+ break;
+ }
+
+ skel = kfunc_call_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "kfunc_call_fail__open_opts"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, true);
+
+ err = kfunc_call_fail__load(skel);
+ if (!param->retval) {
+ /* the verifier is supposed to complain and refuses to load */
+ if (!ASSERT_ERR(err, "unexpected load success"))
+ goto out_err;
+
+ } else {
+ /* the program is loaded but must dynamically fail */
+ if (!ASSERT_OK(err, "unexpected load error"))
+ goto out_err;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_EQ(err, param->retval, param->prog_name))
+ goto out_err;
+ }
+
+out_err:
+ if (!ASSERT_OK_PTR(strstr(obj_log_buf, param->expected_err_msg), "expected_err_msg")) {
+ fprintf(stderr, "Expected err_msg: %s\n", param->expected_err_msg);
+ fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+ }
+
+cleanup:
+ kfunc_call_fail__destroy(skel);
+}
+
+static void test_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kfunc_tests); i++) {
+ if (!test__start_subtest(kfunc_tests[i].prog_name))
+ continue;
+
+ if (!kfunc_tests[i].expected_err_msg)
+ verify_success(&kfunc_tests[i]);
+ else
+ verify_fail(&kfunc_tests[i]);
+ }
+}
+
+static void test_subprog(void)
+{
+ struct kfunc_call_test_subprog *skel;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = kfunc_call_test_subprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
+
+ kfunc_call_test_subprog__destroy(skel);
+}
+
+static void test_subprog_lskel(void)
+{
+ struct kfunc_call_test_subprog_lskel *skel;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = kfunc_call_test_subprog_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
+
+ kfunc_call_test_subprog_lskel__destroy(skel);
+}
+
+static int test_destructive_open_and_load(void)
+{
+ struct kfunc_call_destructive *skel;
+ int err;
+
+ skel = kfunc_call_destructive__open();
+ if (!ASSERT_OK_PTR(skel, "prog_open"))
+ return -1;
+
+ err = kfunc_call_destructive__load(skel);
+
+ kfunc_call_destructive__destroy(skel);
+
+ return err;
+}
+
+static void test_destructive(void)
+{
+ __u64 save_caps = 0;
+
+ ASSERT_OK(test_destructive_open_and_load(), "successful_load");
+
+ if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps"))
+ return;
+
+ ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure");
+
+ cap_enable_effective(save_caps, NULL);
+}
+
+void test_kfunc_call(void)
+{
+ test_main();
+
+ if (test__start_subtest("subprog"))
+ test_subprog();
+
+ if (test__start_subtest("subprog_lskel"))
+ test_subprog_lskel();
+
+ if (test__start_subtest("destructive"))
+ test_destructive();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
new file mode 100644
index 000000000000..8cd298b78e44
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022 Facebook
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+#include "test_kfunc_dynptr_param.skel.h"
+
+static struct {
+ const char *prog_name;
+ int expected_runtime_err;
+} kfunc_dynptr_tests[] = {
+ {"dynptr_data_null", -EBADMSG},
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+static bool has_pkcs7_kfunc_support(void)
+{
+ struct test_kfunc_dynptr_param *skel;
+ libbpf_print_fn_t old_print_cb;
+ int err;
+
+ skel = test_kfunc_dynptr_param__open();
+ if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+ return false;
+
+ kfunc_not_supported = false;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ err = test_kfunc_dynptr_param__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (err < 0 && kfunc_not_supported) {
+ fprintf(stderr,
+ "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+ __func__);
+ test_kfunc_dynptr_param__destroy(skel);
+ return false;
+ }
+
+ test_kfunc_dynptr_param__destroy(skel);
+
+ return true;
+}
+
+static void verify_success(const char *prog_name, int expected_runtime_err)
+{
+ struct test_kfunc_dynptr_param *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ __u32 next_id;
+ int err;
+
+ skel = test_kfunc_dynptr_param__open();
+ if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = test_kfunc_dynptr_param__load(skel);
+
+ if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ err = bpf_prog_get_next_id(0, &next_id);
+
+ bpf_link__destroy(link);
+
+ if (!ASSERT_OK(err, "bpf_prog_get_next_id"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, expected_runtime_err, "err");
+
+cleanup:
+ test_kfunc_dynptr_param__destroy(skel);
+}
+
+void test_kfunc_dynptr_param(void)
+{
+ int i;
+
+ if (!has_pkcs7_kfunc_support())
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) {
+ if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name))
+ continue;
+
+ verify_success(kfunc_dynptr_tests[i].prog_name,
+ kfunc_dynptr_tests[i].expected_runtime_err);
+ }
+ RUN_TESTS(test_kfunc_dynptr_param);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c
new file mode 100644
index 000000000000..48c0560d398e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <testing_helpers.h>
+
+#include "kfunc_module_order.skel.h"
+
+static int test_run_prog(const struct bpf_program *prog,
+ struct bpf_test_run_opts *opts)
+{
+ int err;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ return err;
+
+ if (!ASSERT_EQ((int)opts->retval, 0, bpf_program__name(prog)))
+ return -EINVAL;
+
+ return 0;
+}
+
+void test_kfunc_module_order(void)
+{
+ struct kfunc_module_order *skel;
+ char pkt_data[64] = {};
+ int err = 0;
+
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, test_opts, .data_in = pkt_data,
+ .data_size_in = sizeof(pkt_data));
+
+ err = load_module("bpf_test_modorder_x.ko",
+ env_verbosity > VERBOSE_NONE);
+ if (!ASSERT_OK(err, "load bpf_test_modorder_x.ko"))
+ return;
+
+ err = load_module("bpf_test_modorder_y.ko",
+ env_verbosity > VERBOSE_NONE);
+ if (!ASSERT_OK(err, "load bpf_test_modorder_y.ko"))
+ goto exit_modx;
+
+ skel = kfunc_module_order__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kfunc_module_order__open_and_load()")) {
+ err = -EINVAL;
+ goto exit_mods;
+ }
+
+ test_run_prog(skel->progs.call_kfunc_xy, &test_opts);
+ test_run_prog(skel->progs.call_kfunc_yx, &test_opts);
+
+ kfunc_module_order__destroy(skel);
+exit_mods:
+ unload_module("bpf_test_modorder_y", env_verbosity > VERBOSE_NONE);
+exit_modx:
+ unload_module("bpf_test_modorder_x", env_verbosity > VERBOSE_NONE);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
new file mode 100644
index 000000000000..c8f4dcaac7c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2024 Meta Platforms, Inc */
+
+#include <test_progs.h>
+#include "test_kfunc_param_nullable.skel.h"
+
+void test_kfunc_param_nullable(void)
+{
+ RUN_TESTS(test_kfunc_param_nullable);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
new file mode 100644
index 000000000000..6e35e13c2022
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "kmem_cache_iter.skel.h"
+
+#define SLAB_NAME_MAX 32
+
+struct kmem_cache_result {
+ char name[SLAB_NAME_MAX];
+ long obj_size;
+};
+
+static void subtest_kmem_cache_iter_check_task_struct(struct kmem_cache_iter *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .flags = 0, /* Run it with the current task */
+ );
+ int prog_fd = bpf_program__fd(skel->progs.check_task_struct);
+
+ /* Get task_struct and check it if's from a slab cache */
+ ASSERT_OK(bpf_prog_test_run_opts(prog_fd, &opts), "prog_test_run");
+
+ /* The BPF program should set 'found' variable */
+ ASSERT_EQ(skel->bss->task_struct_found, 1, "task_struct_found");
+}
+
+static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
+{
+ FILE *fp;
+ int map_fd;
+ char name[SLAB_NAME_MAX];
+ unsigned long objsize;
+ char rest_of_line[1000];
+ struct kmem_cache_result r;
+ int seen = 0;
+
+ fp = fopen("/proc/slabinfo", "r");
+ if (fp == NULL) {
+ /* CONFIG_SLUB_DEBUG is not enabled */
+ return;
+ }
+
+ map_fd = bpf_map__fd(skel->maps.slab_result);
+
+ /* Ignore first two lines for header */
+ fscanf(fp, "slabinfo - version: %*d.%*d\n");
+ fscanf(fp, "# %*s %*s %*s %*s %*s %*s : %[^\n]\n", rest_of_line);
+
+ /* Compare name and objsize only - others can be changes frequently */
+ while (fscanf(fp, "%s %*u %*u %lu %*u %*u : %[^\n]\n",
+ name, &objsize, rest_of_line) == 3) {
+ int ret = bpf_map_lookup_elem(map_fd, &seen, &r);
+
+ if (!ASSERT_OK(ret, "kmem_cache_lookup"))
+ break;
+
+ ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
+ "kmem_cache_name");
+ ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
+
+ seen++;
+ }
+
+ ASSERT_EQ(skel->bss->kmem_cache_seen, seen, "kmem_cache_seen_eq");
+
+ fclose(fp);
+}
+
+static void subtest_kmem_cache_iter_open_coded(struct kmem_cache_iter *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, fd;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.open_coded_iter);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ /* It should be same as we've seen from the explicit iterator */
+ ASSERT_EQ(skel->bss->open_coded_seen, skel->bss->kmem_cache_seen, "open_code_seen_eq");
+}
+
+void test_kmem_cache_iter(void)
+{
+ struct kmem_cache_iter *skel = NULL;
+ char buf[256];
+ int iter_fd;
+
+ skel = kmem_cache_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kmem_cache_iter__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(kmem_cache_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.slab_info_collector));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto destroy;
+
+ memset(buf, 0, sizeof(buf));
+ while (read(iter_fd, buf, sizeof(buf)) > 0) {
+ /* Read out all contents */
+ printf("%s", buf);
+ }
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ if (test__start_subtest("check_task_struct"))
+ subtest_kmem_cache_iter_check_task_struct(skel);
+ if (test__start_subtest("check_slabinfo"))
+ subtest_kmem_cache_iter_check_slabinfo(skel);
+ if (test__start_subtest("open_coded_iter"))
+ subtest_kmem_cache_iter_open_coded(skel);
+
+ close(iter_fd);
+
+destroy:
+ kmem_cache_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
new file mode 100644
index 000000000000..6cfaa978bc9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+#include "kprobe_multi_empty.skel.h"
+#include "kprobe_multi_override.skel.h"
+#include "kprobe_multi_session.skel.h"
+#include "kprobe_multi_session_cookie.skel.h"
+#include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf/hashmap.h"
+
+static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+ ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+ ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+ ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+ ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+ ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+ if (test_return) {
+ ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+ ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+ ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+ ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+ ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+ ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+ }
+}
+
+static void test_skel_api(void)
+{
+ struct kprobe_multi *skel = NULL;
+ int err;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ err = kprobe_multi__attach(skel);
+ if (!ASSERT_OK(err, "kprobe_multi__attach"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel, true);
+
+cleanup:
+ kprobe_multi__destroy(skel);
+}
+
+static void test_link_api(struct bpf_link_create_opts *opts)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1;
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+ if (!ASSERT_GE(link1_fd, 0, "link_fd"))
+ goto cleanup;
+
+ opts->kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+ if (!ASSERT_GE(link2_fd, 0, "link_fd"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel, true);
+
+cleanup:
+ if (link1_fd != -1)
+ close(link1_fd);
+ if (link2_fd != -1)
+ close(link2_fd);
+ kprobe_multi__destroy(skel);
+}
+
+#define GET_ADDR(__sym, __addr) ({ \
+ __addr = ksym_get_addr(__sym); \
+ if (!ASSERT_NEQ(__addr, 0, "kallsyms load failed for " #__sym)) \
+ return; \
+})
+
+static void test_link_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ unsigned long long addrs[8];
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test2", addrs[1]);
+ GET_ADDR("bpf_fentry_test3", addrs[2]);
+ GET_ADDR("bpf_fentry_test4", addrs[3]);
+ GET_ADDR("bpf_fentry_test5", addrs[4]);
+ GET_ADDR("bpf_fentry_test6", addrs[5]);
+ GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+ opts.kprobe_multi.addrs = (const unsigned long*) addrs;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+ test_link_api(&opts);
+}
+
+static void test_link_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test8",
+ };
+
+ opts.kprobe_multi.syms = syms;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(syms);
+ test_link_api(&opts);
+}
+
+static void
+test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ pattern, opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ if (opts) {
+ opts->retprobe = true;
+ link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual,
+ pattern, opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+ }
+
+ kprobe_multi_test_run(skel, !!opts);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ kprobe_multi__destroy(skel);
+}
+
+static void test_attach_api_pattern(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+
+ test_attach_api("bpf_fentry_test*", &opts);
+ test_attach_api("bpf_fentry_test?", NULL);
+}
+
+static void test_attach_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ unsigned long long addrs[8];
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test2", addrs[1]);
+ GET_ADDR("bpf_fentry_test3", addrs[2]);
+ GET_ADDR("bpf_fentry_test4", addrs[3]);
+ GET_ADDR("bpf_fentry_test5", addrs[4]);
+ GET_ADDR("bpf_fentry_test6", addrs[5]);
+ GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+ opts.addrs = (const unsigned long *) addrs;
+ opts.cnt = ARRAY_SIZE(addrs);
+ test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test8",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_fails(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi *skel = NULL;
+ struct bpf_link *link = NULL;
+ unsigned long long addrs[2];
+ const char *syms[2] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ };
+ __u64 cookies[2];
+ int saved_error;
+
+ addrs[0] = ksym_get_addr("bpf_fentry_test1");
+ addrs[1] = ksym_get_addr("bpf_fentry_test2");
+
+ if (!ASSERT_FALSE(!addrs[0] || !addrs[1], "ksym_get_addr"))
+ goto cleanup;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ /* fail_1 - pattern and opts NULL */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, NULL);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_1"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error"))
+ goto cleanup;
+
+ /* fail_2 - both addrs and syms set */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_2"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error"))
+ goto cleanup;
+
+ /* fail_3 - pattern and addrs set */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = NULL;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_3"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error"))
+ goto cleanup;
+
+ /* fail_4 - pattern and cnt set */
+ opts.addrs = NULL;
+ opts.syms = NULL;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_4"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error"))
+ goto cleanup;
+
+ /* fail_5 - pattern and cookies */
+ opts.addrs = NULL;
+ opts.syms = NULL;
+ opts.cnt = 0;
+ opts.cookies = cookies;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_5"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error"))
+ goto cleanup;
+
+ /* fail_6 - abnormal cnt */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = NULL;
+ opts.cnt = INT_MAX;
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_6"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error"))
+ goto cleanup;
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi__destroy(skel);
+}
+
+static void test_session_skel_api(void)
+{
+ struct kprobe_multi_session *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int i, err, prog_fd;
+
+ skel = kprobe_multi_session__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_session__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_session__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ /* bpf_fentry_test1-4 trigger return probe, result is 2 */
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result");
+
+ /* bpf_fentry_test5-8 trigger only entry probe, result is 1 */
+ for (i = 4; i < 8; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 1, "kprobe_session_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session__destroy(skel);
+}
+
+static void test_session_cookie_skel_api(void)
+{
+ struct kprobe_multi_session_cookie *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int err, prog_fd;
+
+ skel = kprobe_multi_session_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session_cookie__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_wrapper__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test_kprobe_1_result, 1, "test_kprobe_1_result");
+ ASSERT_EQ(skel->bss->test_kprobe_2_result, 2, "test_kprobe_2_result");
+ ASSERT_EQ(skel->bss->test_kprobe_3_result, 3, "test_kprobe_3_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session_cookie__destroy(skel);
+}
+
+static void test_unique_match(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+ return;
+
+ opts.unique_match = true;
+ skel->bss->pid = getpid();
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "bpf_fentry_test*", &opts);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "bpf_fentry_test8*", &opts);
+ if (ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_multi__destroy(skel);
+}
+
+static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
+{
+ long attach_start_ns, attach_end_ns;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+ struct bpf_link *link = NULL;
+
+ attach_start_ns = get_time_ns();
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
+ NULL, opts);
+ attach_end_ns = get_time_ns();
+
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+ return;
+
+ detach_start_ns = get_time_ns();
+ bpf_link__destroy(link);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: found %lu functions\n", __func__, opts->cnt);
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+static void test_kprobe_multi_bench_attach(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ if (syms)
+ free(syms);
+}
+
+static void test_kprobe_multi_bench_attach_addr(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ unsigned long *addrs = NULL;
+ size_t cnt = 0;
+ int err;
+
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
+ if (err == -ENOENT) {
+ test__skip();
+ return;
+ }
+
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.addrs = addrs;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ free(addrs);
+}
+
+static void test_attach_override(void)
+{
+ struct kprobe_multi_override *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_multi_override__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ /* The test_override calls bpf_override_return so it should fail
+ * to attach to bpf_fentry_test1 function, which is not on error
+ * injection list.
+ */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ /* The should_fail_bio function is on error injection list,
+ * attach should succeed.
+ */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ "should_fail_bio", NULL);
+ if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+
+cleanup:
+ kprobe_multi_override__destroy(skel);
+}
+
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
+void serial_test_kprobe_multi_bench_attach(void)
+{
+ if (test__start_subtest("kernel"))
+ test_kprobe_multi_bench_attach(true);
+ if (test__start_subtest("modules"))
+ test_kprobe_multi_bench_attach(false);
+ if (test__start_subtest("kernel"))
+ test_kprobe_multi_bench_attach_addr(true);
+ if (test__start_subtest("modules"))
+ test_kprobe_multi_bench_attach_addr(false);
+}
+
+void test_kprobe_multi_test(void)
+{
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ return;
+
+ if (test__start_subtest("skel_api"))
+ test_skel_api();
+ if (test__start_subtest("link_api_addrs"))
+ test_link_api_syms();
+ if (test__start_subtest("link_api_syms"))
+ test_link_api_addrs();
+ if (test__start_subtest("attach_api_pattern"))
+ test_attach_api_pattern();
+ if (test__start_subtest("attach_api_addrs"))
+ test_attach_api_addrs();
+ if (test__start_subtest("attach_api_syms"))
+ test_attach_api_syms();
+ if (test__start_subtest("attach_api_fails"))
+ test_attach_api_fails();
+ if (test__start_subtest("attach_override"))
+ test_attach_override();
+ if (test__start_subtest("session"))
+ test_session_skel_api();
+ if (test__start_subtest("session_cookie"))
+ test_session_cookie_skel_api();
+ if (test__start_subtest("unique_match"))
+ test_unique_match();
+ if (test__start_subtest("attach_write_ctx"))
+ test_attach_write_ctx();
+ RUN_TESTS(kprobe_multi_verifier);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
new file mode 100644
index 000000000000..9d03528f05db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+#include "bpf/libbpf_internal.h"
+
+static struct ksyms *ksyms;
+
+static void kprobe_multi_testmod_check(struct kprobe_multi *skel)
+{
+ ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result");
+
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result");
+}
+
+static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts)
+{
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.test_kprobe_testmod,
+ NULL, opts);
+ if (!skel->links.test_kprobe_testmod)
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.test_kretprobe_testmod,
+ NULL, opts);
+ if (!skel->links.test_kretprobe_testmod)
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+ kprobe_multi_testmod_check(skel);
+
+cleanup:
+ kprobe_multi__destroy(skel);
+}
+
+static void test_testmod_attach_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ unsigned long long addrs[3];
+
+ addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1");
+ ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local");
+ addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2");
+ ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local");
+ addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3");
+ ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local");
+
+ opts.addrs = (const unsigned long *) addrs;
+ opts.cnt = ARRAY_SIZE(addrs);
+
+ test_testmod_attach_api(&opts);
+}
+
+static void test_testmod_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "bpf_testmod_fentry_test1",
+ "bpf_testmod_fentry_test2",
+ "bpf_testmod_fentry_test3",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_testmod_attach_api(&opts);
+}
+
+void serial_test_kprobe_multi_testmod_test(void)
+{
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return;
+
+ if (test__start_subtest("testmod_attach_api_syms"))
+ test_testmod_attach_api_syms();
+
+ if (test__start_subtest("testmod_attach_api_addrs"))
+ test_testmod_attach_api_addrs();
+
+ free_kallsyms_local(ksyms);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
new file mode 100644
index 000000000000..7def158da9eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "linux/filter.h"
+#include "kptr_xchg_inline.skel.h"
+
+void test_kptr_xchg_inline(void)
+{
+ struct kptr_xchg_inline *skel;
+ struct bpf_insn *insn = NULL;
+ struct bpf_insn exp;
+ unsigned int cnt;
+ int err;
+
+#if !(defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64))
+ test__skip();
+ return;
+#endif
+
+ skel = kptr_xchg_inline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt);
+ if (!ASSERT_OK(err, "prog insn"))
+ goto out;
+
+ /* The original instructions are:
+ * r1 = map[id:xxx][0]+0
+ * r2 = 0
+ * call bpf_kptr_xchg#yyy
+ *
+ * call bpf_kptr_xchg#yyy will be inlined as:
+ * r0 = r2
+ * r0 = atomic64_xchg((u64 *)(r1 +0), r0)
+ */
+ if (!ASSERT_GT(cnt, 5, "insn cnt"))
+ goto out;
+
+ exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov"))
+ goto out;
+
+ exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg"))
+ goto out;
+out:
+ free(insn);
+ kptr_xchg_inline__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index b295969b263b..dc7aab532fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -5,8 +5,6 @@
#include "test_ksyms.skel.h"
#include <sys/stat.h>
-static int duration;
-
void test_ksyms(void)
{
const char *btf_path = "/sys/kernel/btf/vmlinux";
@@ -18,43 +16,37 @@ void test_ksyms(void)
int err;
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "bpf_link_fops: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "bpf_link_fops: ksym_find"))
return;
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "__per_cpu_start: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "__per_cpu_start: ksym_find"))
return;
- if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ if (!ASSERT_OK(stat(btf_path, &st), "stat_btf"))
return;
btf_size = st.st_size;
skel = test_ksyms__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_ksyms__open_and_load"))
return;
err = test_ksyms__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ksyms__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
data = skel->data;
- CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
- "got 0x%llx, exp 0x%llx\n",
- data->out__bpf_link_fops, link_fops_addr);
- CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
- "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
- CHECK(data->out__btf_size != btf_size, "btf_size",
- "got %llu, exp %llu\n", data->out__btf_size, btf_size);
- CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
- "got %llu, exp %llu\n", data->out__per_cpu_start,
- per_cpu_start_addr);
+ ASSERT_EQ(data->out__bpf_link_fops, link_fops_addr, "bpf_link_fops");
+ ASSERT_EQ(data->out__bpf_link_fops1, 0, "bpf_link_fops1");
+ ASSERT_EQ(data->out__btf_size, btf_size, "btf_size");
+ ASSERT_EQ(data->out__per_cpu_start, per_cpu_start_addr, "__per_cpu_start");
cleanup:
test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index b58b775d19f3..1d7a2f1e0731 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -6,6 +6,9 @@
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
+#include "test_ksyms_weak.lskel.h"
+#include "test_ksyms_btf_write_check.skel.h"
static int duration;
@@ -81,14 +84,81 @@ static void test_null_check(void)
test_ksyms_btf_null_check__destroy(skel);
}
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (!ASSERT_OK(err, "test_ksyms_weak__attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
+static void test_weak_syms_lskel(void)
+{
+ struct test_ksyms_weak_lskel *skel;
+ struct test_ksyms_weak_lskel__data *data;
+ int err;
+
+ skel = test_ksyms_weak_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load"))
+ return;
+
+ err = test_ksyms_weak_lskel__attach(skel);
+ if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak_lskel__destroy(skel);
+}
+
+static void test_write_check(bool test_handler1)
+{
+ struct test_ksyms_btf_write_check *skel;
+
+ skel = test_ksyms_btf_write_check__open();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+ return;
+ bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+ ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+ "unexpected load of a prog writing to ksym memory\n");
+
+ test_ksyms_btf_write_check__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
struct btf *btf;
btf = libbpf_find_kernel_btf();
- if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
- PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "btf_exists"))
return;
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
@@ -106,4 +176,16 @@ void test_ksyms_btf(void)
if (test__start_subtest("null_check"))
test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
+
+ if (test__start_subtest("weak_ksyms_lskel"))
+ test_weak_syms_lskel();
+
+ if (test__start_subtest("write_check1"))
+ test_write_check(true);
+
+ if (test__start_subtest("write_check2"))
+ test_write_check(false);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 4c232b456479..a1ebac70ec29 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -2,30 +2,68 @@
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
-#include <bpf/libbpf.h>
-#include <bpf/btf.h>
+#include <network_helpers.h>
+#include "test_ksyms_module.lskel.h"
#include "test_ksyms_module.skel.h"
-static int duration;
-
-void test_ksyms_module(void)
+static void test_ksyms_module_lskel(void)
{
- struct test_ksyms_module* skel;
+ struct test_ksyms_module_lskel *skel;
int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
- skel = test_ksyms_module__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!env.has_testmod) {
+ test__skip();
return;
+ }
- err = test_ksyms_module__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ skel = test_ksyms_module_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
+ return;
+ err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
goto cleanup;
+ ASSERT_EQ(topts.retval, 0, "retval");
+ ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
+cleanup:
+ test_ksyms_module_lskel__destroy(skel);
+}
- usleep(1);
+static void test_ksyms_module_libbpf(void)
+{
+ struct test_ksyms_module *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
- ASSERT_EQ(skel->bss->triggered, true, "triggered");
- ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val");
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+ skel = test_ksyms_module__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
+ return;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+ ASSERT_EQ(topts.retval, 0, "retval");
+ ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
cleanup:
test_ksyms_module__destroy(skel);
}
+
+void test_ksyms_module(void)
+{
+ if (test__start_subtest("lskel"))
+ test_ksyms_module_lskel();
+ if (test__start_subtest("libbpf"))
+ test_ksyms_module_libbpf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 8073105548ff..1eab286b14fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -23,14 +23,18 @@ static void test_l4lb(const char *file)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
int err, i, prog_fd, map_fd;
__u64 bytes = 0, pkts = 0;
struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -49,19 +53,24 @@ static void test_l4lb(const char *file)
goto out;
bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf); /* reset out size */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic");
map_fd = bpf_find_map(__func__, obj, "stats");
if (map_fd < 0)
@@ -81,7 +90,9 @@ out:
void test_l4lb_all(void)
{
if (test__start_subtest("l4lb_inline"))
- test_l4lb("test_l4lb.o");
+ test_l4lb("test_l4lb.bpf.o");
if (test__start_subtest("l4lb_noinline"))
- test_l4lb("test_l4lb_noinline.o");
+ test_l4lb("test_l4lb_noinline.bpf.o");
+ if (test__start_subtest("l4lb_noinline_dynptr"))
+ test_l4lb("test_l4lb_noinline_dynptr.bpf.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/legacy_printk.c b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
new file mode 100644
index 000000000000..ec6e45f2a644
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "test_legacy_printk.skel.h"
+
+static int execute_one_variant(bool legacy)
+{
+ struct test_legacy_printk *skel;
+ int err, zero = 0, my_pid = getpid(), res, map_fd;
+
+ skel = test_legacy_printk__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -errno;
+
+ bpf_program__set_autoload(skel->progs.handle_legacy, legacy);
+ bpf_program__set_autoload(skel->progs.handle_modern, !legacy);
+
+ err = test_legacy_printk__load(skel);
+ /* no ASSERT_OK, we expect one of two variants can fail here */
+ if (err)
+ goto err_out;
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.my_pid_map);
+ err = bpf_map_update_elem(map_fd, &zero, &my_pid, BPF_ANY);
+ if (!ASSERT_OK(err, "my_pid_map_update"))
+ goto err_out;
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ } else {
+ skel->bss->my_pid_var = my_pid;
+ }
+
+ err = test_legacy_printk__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto err_out;
+
+ usleep(1); /* trigger */
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.res_map);
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ if (!ASSERT_OK(err, "res_map_lookup"))
+ goto err_out;
+ } else {
+ res = skel->bss->res_var;
+ }
+
+ if (!ASSERT_GT(res, 0, "res")) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+err_out:
+ test_legacy_printk__destroy(skel);
+ return err;
+}
+
+void test_legacy_printk(void)
+{
+ /* legacy variant should work everywhere */
+ ASSERT_OK(execute_one_variant(true /* legacy */), "legacy_case");
+
+ /* execute modern variant, can fail the load on old kernels */
+ execute_one_variant(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..a3f238f51d05
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+
+#include "test_libbpf_get_fd_by_id_opts.skel.h"
+
+void test_libbpf_get_fd_by_id_opts(void)
+{
+ struct test_libbpf_get_fd_by_id_opts *skel;
+ struct bpf_map_info info_m = {};
+ __u32 len = sizeof(info_m), value;
+ int ret, zero = 0, fd = -1;
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, fd_opts_rdonly,
+ .open_flags = BPF_F_RDONLY,
+ );
+
+ skel = test_libbpf_get_fd_by_id_opts__open_and_load();
+ if (!ASSERT_OK_PTR(skel,
+ "test_libbpf_get_fd_by_id_opts__open_and_load"))
+ return;
+
+ ret = test_libbpf_get_fd_by_id_opts__attach(skel);
+ if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach"))
+ goto close_prog;
+
+ ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.data_input),
+ &info_m, &len);
+ if (!ASSERT_OK(ret, "bpf_map_get_info_by_fd"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id(info_m.id);
+ if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id_opts(info_m.id, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id_opts"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id_opts(info_m.id, &fd_opts_rdonly);
+ if (!ASSERT_GE(fd, 0, "bpf_map_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* Map lookup should work with read-only fd. */
+ ret = bpf_map_lookup_elem(fd, &zero, &value);
+ if (!ASSERT_OK(ret, "bpf_map_lookup_elem"))
+ goto close_prog;
+
+ if (!ASSERT_EQ(value, 0, "map value mismatch"))
+ goto close_prog;
+
+ /* Map update should not work with read-only fd. */
+ ret = bpf_map_update_elem(fd, &zero, &len, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem"))
+ goto close_prog;
+
+ /* Map update should work with read-write fd. */
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.data_input), &zero,
+ &len, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem"))
+ goto close_prog;
+
+ /* Prog get fd with opts set should not work (no kernel support). */
+ ret = bpf_prog_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_prog_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* Link get fd with opts set should not work (no kernel support). */
+ ret = bpf_link_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_link_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* BTF get fd with opts set should not work (no kernel support). */
+ ret = bpf_btf_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ ASSERT_EQ(ret, -EINVAL, "bpf_btf_get_fd_by_id_opts");
+
+close_prog:
+ if (fd >= 0)
+ close(fd);
+
+ test_libbpf_get_fd_by_id_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
new file mode 100644
index 000000000000..4ed46ed58a7b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+void test_libbpf_probe_prog_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_prog_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *prog_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ int res;
+
+ if (prog_type == BPF_PROG_TYPE_UNSPEC)
+ continue;
+ if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+ continue;
+
+ if (!test__start_subtest(prog_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL);
+ ASSERT_EQ(res, 1, prog_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_map_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_map_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *map_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ int res;
+
+ if (map_type == BPF_MAP_TYPE_UNSPEC)
+ continue;
+ if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+ continue;
+
+ if (!test__start_subtest(map_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_map_type(map_type, NULL);
+ ASSERT_EQ(res, 1, map_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_helpers(void)
+{
+#define CASE(prog, helper, supp) { \
+ .prog_type_name = "BPF_PROG_TYPE_" # prog, \
+ .helper_name = "bpf_" # helper, \
+ .prog_type = BPF_PROG_TYPE_ ## prog, \
+ .helper_id = BPF_FUNC_ ## helper, \
+ .supported = supp, \
+}
+ const struct case_def {
+ const char *prog_type_name;
+ const char *helper_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_func_id helper_id;
+ bool supported;
+ } cases[] = {
+ CASE(KPROBE, unspec, false),
+ CASE(KPROBE, map_lookup_elem, true),
+ CASE(KPROBE, loop, true),
+
+ CASE(KPROBE, ktime_get_coarse_ns, false),
+ CASE(SOCKET_FILTER, ktime_get_coarse_ns, true),
+
+ CASE(KPROBE, sys_bpf, false),
+ CASE(SYSCALL, sys_bpf, true),
+ };
+ size_t case_cnt = ARRAY_SIZE(cases), i;
+ char buf[128];
+
+ for (i = 0; i < case_cnt; i++) {
+ const struct case_def *d = &cases[i];
+ int res;
+
+ snprintf(buf, sizeof(buf), "%s+%s", d->prog_type_name, d->helper_name);
+
+ if (!test__start_subtest(buf))
+ continue;
+
+ res = libbpf_probe_bpf_helper(d->prog_type, d->helper_id, NULL);
+ ASSERT_EQ(res, d->supported, buf);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
new file mode 100644
index 000000000000..62ea855ec4d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <ctype.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/*
+ * Utility function uppercasing an entire string.
+ */
+static void uppercase(char *s)
+{
+ for (; *s != '\0'; s++)
+ *s = toupper(*s);
+}
+
+/*
+ * Test case to check that all bpf_attach_type variants are covered by
+ * libbpf_bpf_attach_type_str.
+ */
+static void test_libbpf_bpf_attach_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_attach_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_attach_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val;
+ const char *attach_type_name;
+ const char *attach_type_str;
+ char buf[256];
+
+ if (attach_type == __MAX_BPF_ATTACH_TYPE)
+ continue;
+
+ attach_type_name = btf__str_by_offset(btf, e->name_off);
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ ASSERT_OK_PTR(attach_type_str, attach_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, attach_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_link_type variants are covered by
+ * libbpf_bpf_link_type_str.
+ */
+static void test_libbpf_bpf_link_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_link_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_link_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_link_type link_type = (enum bpf_link_type)e->val;
+ const char *link_type_name;
+ const char *link_type_str;
+ char buf[256];
+
+ if (link_type == __MAX_BPF_LINK_TYPE)
+ continue;
+
+ link_type_name = btf__str_by_offset(btf, e->name_off);
+ link_type_str = libbpf_bpf_link_type_str(link_type);
+ ASSERT_OK_PTR(link_type_str, link_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, link_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_map_type variants are covered by
+ * libbpf_bpf_map_type_str.
+ */
+static void test_libbpf_bpf_map_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ const char *map_type_name;
+ const char *map_type_str;
+ char buf[256];
+
+ if (map_type == __MAX_BPF_MAP_TYPE)
+ continue;
+
+ map_type_name = btf__str_by_offset(btf, e->name_off);
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ ASSERT_OK_PTR(map_type_str, map_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str);
+ uppercase(buf);
+
+ /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
+ * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value
+ * (map_type). For this enum value, libbpf_bpf_map_type_str() picks
+ * BPF_MAP_TYPE_CGROUP_STORAGE. The same for
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE.
+ */
+ if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0)
+ continue;
+ if (strcmp(map_type_name, "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED") == 0)
+ continue;
+
+ ASSERT_STREQ(buf, map_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_prog_type variants are covered by
+ * libbpf_bpf_prog_type_str.
+ */
+static void test_libbpf_bpf_prog_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ const char *prog_type_name;
+ const char *prog_type_str;
+ char buf[256];
+
+ if (prog_type == __MAX_BPF_PROG_TYPE)
+ continue;
+
+ prog_type_name = btf__str_by_offset(btf, e->name_off);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ ASSERT_OK_PTR(prog_type_str, prog_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, prog_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Run all libbpf str conversion tests.
+ */
+void test_libbpf_str(void)
+{
+ if (test__start_subtest("bpf_attach_type_str"))
+ test_libbpf_bpf_attach_type_str();
+
+ if (test__start_subtest("bpf_link_type_str"))
+ test_libbpf_bpf_link_type_str();
+
+ if (test__start_subtest("bpf_map_type_str"))
+ test_libbpf_bpf_map_type_str();
+
+ if (test__start_subtest("bpf_prog_type_str"))
+ test_libbpf_bpf_prog_type_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
int err, i;
link = bpf_program__attach(prog);
- if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
- if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_open"))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
- if (!IS_ERR(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
}
void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..fa639b021f7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+ int err;
+ struct linked_funcs *skel;
+
+ skel = linked_funcs__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and
+ * are set to not autoload by default
+ */
+ bpf_program__set_autoload(skel->progs.handler1, true);
+ bpf_program__set_autoload(skel->progs.handler2, true);
+
+ skel->rodata->my_tid = sys_gettid();
+ skel->bss->syscall_id = SYS_getpgid;
+
+ err = linked_funcs__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_funcs__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+ ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+ ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+ ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+ ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+ /* output_weak2 should never be updated */
+ ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+ linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
new file mode 100644
index 000000000000..14c5a7ef0e87
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+#include <test_btf.h>
+#include <linux/btf.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "linked_list.skel.h"
+#include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} linked_list_fail_tests[] = {
+#define TEST(test, off) \
+ { #test "_missing_lock_push_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_push_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
+ TEST(kptr, 40)
+ TEST(global, 16)
+ TEST(map, 0)
+ TEST(inner_map, 0)
+#undef TEST
+#define TEST(test, op) \
+ { #test "_kptr_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=40 must be held for bpf_list_head" }, \
+ { #test "_global_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
+ { #test "_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
+ { #test "_inner_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" },
+ TEST(kptr, push_front)
+ TEST(kptr, push_back)
+ TEST(kptr, pop_front)
+ TEST(kptr, pop_back)
+ TEST(global, push_front)
+ TEST(global, push_back)
+ TEST(global, pop_front)
+ TEST(global, pop_back)
+ TEST(map, push_front)
+ TEST(map, push_back)
+ TEST(map, pop_front)
+ TEST(map, pop_back)
+ TEST(inner_map, push_front)
+ TEST(inner_map, push_back)
+ TEST(inner_map, pop_front)
+ TEST(inner_map, pop_back)
+#undef TEST
+ { "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
+ { "obj_new_no_composite", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+ { "obj_new_no_struct", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+ { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
+ { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
+ { "obj_new_acq", "Unreleased reference id=" },
+ { "use_after_drop", "invalid mem access 'scalar'" },
+ { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" },
+ { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_read_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_write_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_read_node", "direct access to bpf_list_node is disallowed" },
+ { "direct_write_node", "direct access to bpf_list_node is disallowed" },
+ { "use_after_unlock_push_front", "invalid mem access 'scalar'" },
+ { "use_after_unlock_push_back", "invalid mem access 'scalar'" },
+ { "double_push_front", "arg#1 expected pointer to allocated object" },
+ { "double_push_back", "arg#1 expected pointer to allocated object" },
+ { "no_node_value_type", "bpf_list_node not found at offset=0" },
+ { "incorrect_value_type",
+ "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
+ "but arg is at offset=0 in struct bar" },
+ { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_node_off1", "bpf_list_node not found at offset=49" },
+ { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
+ { "no_head_type", "bpf_list_head not found at offset=0" },
+ { "incorrect_head_var_off1", "R1 doesn't have constant offset" },
+ { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_head_off1", "bpf_list_head not found at offset=25" },
+ { "incorrect_head_off2", "bpf_list_head not found at offset=1" },
+ { "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+ { "pop_back_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+};
+
+static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct linked_list_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = linked_list_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = linked_list_fail__load(skel);
+ if (!ASSERT_ERR(ret, "linked_list_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ linked_list_fail__destroy(skel);
+}
+
+static void clear_fields(struct bpf_map *map)
+{
+ char buf[24];
+ int key = 0;
+
+ memset(buf, 0xff, sizeof(buf));
+ ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields");
+}
+
+enum {
+ TEST_ALL,
+ PUSH_POP,
+ PUSH_POP_MULT,
+ LIST_IN_LIST,
+};
+
+static void test_linked_list_success(int mode, bool leave_in_map)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct linked_list *skel;
+ int ret;
+
+ skel = linked_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load"))
+ return;
+
+ if (mode == LIST_IN_LIST)
+ goto lil;
+ if (mode == PUSH_POP_MULT)
+ goto ppm;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts);
+ ASSERT_OK(ret, "map_list_push_pop");
+ ASSERT_OK(opts.retval, "map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
+ ASSERT_OK(ret, "global_list_push_pop");
+ ASSERT_OK(opts.retval, "global_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts);
+ ASSERT_OK(ret, "global_list_push_pop_nested");
+ ASSERT_OK(opts.retval, "global_list_push_pop_nested retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts);
+ ASSERT_OK(ret, "global_list_array_push_pop");
+ ASSERT_OK(opts.retval, "global_list_array_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP)
+ goto end;
+
+ppm:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "global_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP_MULT)
+ goto end;
+
+lil:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts);
+ ASSERT_OK(ret, "map_list_in_list");
+ ASSERT_OK(opts.retval, "map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts);
+ ASSERT_OK(ret, "inner_map_list_in_list");
+ ASSERT_OK(opts.retval, "inner_map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
+ ASSERT_OK(ret, "global_list_in_list");
+ ASSERT_OK(opts.retval, "global_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+end:
+ linked_list__destroy(skel);
+}
+
+#define SPIN_LOCK 2
+#define LIST_HEAD 3
+#define LIST_NODE 4
+
+static struct btf *init_btf(void)
+{
+ int id, lid, hid, nid;
+ struct btf *btf;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf__new_empty"))
+ return NULL;
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ if (!ASSERT_EQ(id, 1, "btf__add_int"))
+ goto end;
+ lid = btf__add_struct(btf, "bpf_spin_lock", 4);
+ if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock"))
+ goto end;
+ hid = btf__add_struct(btf, "bpf_list_head", 16);
+ if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
+ goto end;
+ nid = btf__add_struct(btf, "bpf_list_node", 24);
+ if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
+ goto end;
+ return btf;
+end:
+ btf__free(btf);
+ return NULL;
+}
+
+static void list_and_rb_node_same_struct(bool refcount_field)
+{
+ int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id;
+ struct btf *btf;
+ int id, err;
+
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ return;
+
+ bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 32);
+ if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node"))
+ return;
+
+ if (refcount_field) {
+ bpf_refcount_btf_id = btf__add_struct(btf, "bpf_refcount", 4);
+ if (!ASSERT_GT(bpf_refcount_btf_id, 0, "btf__add_struct bpf_refcount"))
+ return;
+ }
+
+ id = btf__add_struct(btf, "bar", refcount_field ? 60 : 56);
+ if (!ASSERT_GT(id, 0, "btf__add_struct bar"))
+ return;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ return;
+ err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ return;
+ if (refcount_field) {
+ err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 448, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::ref"))
+ return;
+ }
+
+ foo_btf_id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_GT(foo_btf_id, 0, "btf__add_struct foo"))
+ return;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ return;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ return;
+ id = btf__add_decl_tag(btf, "contains:bar:a", foo_btf_id, 0);
+ if (!ASSERT_GT(id, 0, "btf__add_decl_tag contains:bar:a"))
+ return;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, refcount_field ? 0 : -EINVAL, "check btf");
+ btf__free(btf);
+}
+
+static void test_btf(void)
+{
+ struct btf *btf = NULL;
+ int id, err;
+
+ while (test__start_subtest("btf: too many locks")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "c", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -E2BIG, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing lock")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 16);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 16);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: bad offset")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EEXIST, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing contains:")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing struct")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing node")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ btf__free(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ break;
+ }
+
+ while (test__start_subtest("btf: node incorrect type")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 4);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: multiple bpf_list_node with name b")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 52);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::d"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned AA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned ABA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 28);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 24);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 28);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 24);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 24);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
+ break;
+ id = btf__add_struct(btf, "baz", 44);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
+ if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
+ break;
+ id = btf__add_struct(btf, "bam", 24);
+ if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bam::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: list_node and rb_node in same struct")) {
+ list_and_rb_node_same_struct(true);
+ break;
+ }
+
+ while (test__start_subtest("btf: list_node and rb_node in same struct, no bpf_refcount")) {
+ list_and_rb_node_same_struct(false);
+ break;
+ }
+}
+
+void test_linked_list(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) {
+ if (!test__start_subtest(linked_list_fail_tests[i].prog_name))
+ continue;
+ test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name,
+ linked_list_fail_tests[i].err_msg);
+ }
+ test_btf();
+ test_linked_list_success(PUSH_POP, false);
+ test_linked_list_success(PUSH_POP, true);
+ test_linked_list_success(PUSH_POP_MULT, false);
+ test_linked_list_success(PUSH_POP_MULT, true);
+ test_linked_list_success(LIST_IN_LIST, false);
+ test_linked_list_success(LIST_IN_LIST, true);
+ test_linked_list_success(TEST_ALL, false);
+}
+
+void test_linked_list_peek(void)
+{
+ RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+ int err;
+ struct linked_maps *skel;
+
+ skel = linked_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = linked_maps__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+ ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+ ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+ linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+ int err;
+ struct linked_vars *skel;
+
+ skel = linked_vars__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->input_bss1 = 1000;
+ skel->bss->input_bss2 = 2000;
+ skel->bss->input_bss_weak = 3000;
+
+ err = linked_vars__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_vars__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+ ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+ /* 10 comes from "winner" input_data_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+ ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+ /* 100 comes from "winner" input_rodata_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+ ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+ linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+ char path[4096];
+
+ /* CI will set KBUILD_OUTPUT */
+ snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+ getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+ return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+ /* Disable the livepatch before unloading the module */
+ system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+ unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+ char buf[4096];
+ int fd, ret;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+ return;
+
+ ret = read(fd, buf, sizeof(buf));
+ if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+ goto out;
+
+ ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+ close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+ struct livepatch_trampoline *skel = NULL;
+ int err;
+
+ skel = livepatch_trampoline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ if (!fexit_first) {
+ /* fentry program is loaded first by default */
+ err = livepatch_trampoline__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+ } else {
+ /* Manually load fexit program first. */
+ skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+ goto out;
+
+ skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+ goto out;
+ }
+
+ read_proc_cmdline();
+
+ ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+ ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+ livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+ int retry_cnt = 0;
+
+retry:
+ if (load_livepatch()) {
+ if (retry_cnt) {
+ ASSERT_OK(1, "load_livepatch");
+ goto out;
+ }
+ /*
+ * Something else (previous run of the same test?) loaded
+ * the KLP module. Unload the KLP module and retry.
+ */
+ unload_livepatch();
+ retry_cnt++;
+ goto retry;
+ }
+
+ if (test__start_subtest("fentry_first"))
+ __test_livepatch_trampoline(false);
+
+ if (test__start_subtest("fexit_first"))
+ __test_livepatch_trampoline(true);
+out:
+ unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index 5a2a689dbb68..581c0eb0a0a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -27,8 +27,8 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd);
+ err = bpf_prog_test_load("./load_bytes_relative.bpf.o", BPF_PROG_TYPE_CGROUP_SKB,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
goto close_server_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
new file mode 100644
index 000000000000..827e713f6cf1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "local_kptr_stash.skel.h"
+#include "local_kptr_stash_fail.skel.h"
+static void test_local_kptr_stash_simple(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_plain(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_plain run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_local_with_root(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.unstash_rb_node), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_EQ(opts.retval, 42, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_refcount_acquire_without_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+ ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+ ASSERT_OK(ret, "stash_refcounted_node run");
+ ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+ ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_fail(void)
+{
+ RUN_TESTS(local_kptr_stash_fail);
+}
+
+void test_local_kptr_stash(void)
+{
+ if (test__start_subtest("local_kptr_stash_simple"))
+ test_local_kptr_stash_simple();
+ if (test__start_subtest("local_kptr_stash_plain"))
+ test_local_kptr_stash_plain();
+ if (test__start_subtest("local_kptr_stash_local_with_root"))
+ test_local_kptr_stash_local_with_root();
+ if (test__start_subtest("local_kptr_stash_unstash"))
+ test_local_kptr_stash_unstash();
+ if (test__start_subtest("refcount_acquire_without_unstash"))
+ test_refcount_acquire_without_unstash();
+ if (test__start_subtest("local_kptr_stash_fail"))
+ test_local_kptr_stash_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
new file mode 100644
index 000000000000..d6f14a232002
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+#include "bpf_util.h"
+
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+static size_t libbpf_log_pos;
+static char libbpf_log_buf[1024 * 1024];
+static bool libbpf_log_error;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args)
+{
+ int emitted_cnt;
+ size_t left_cnt;
+
+ left_cnt = sizeof(libbpf_log_buf) - libbpf_log_pos;
+ emitted_cnt = vsnprintf(libbpf_log_buf + libbpf_log_pos, left_cnt, fmt, args);
+
+ if (emitted_cnt < 0 || emitted_cnt + 1 > left_cnt) {
+ libbpf_log_error = true;
+ return 0;
+ }
+
+ libbpf_log_pos += emitted_cnt;
+ return 0;
+}
+
+static void obj_load_log_buf(void)
+{
+ libbpf_print_fn_t old_print_cb = libbpf_set_print(libbpf_print_cb);
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ struct test_log_buf* skel;
+ char *obj_log_buf, *good_log_buf, *bad_log_buf;
+ int err;
+
+ obj_log_buf = malloc(3 * log_buf_sz);
+ if (!ASSERT_OK_PTR(obj_log_buf, "obj_log_buf"))
+ return;
+
+ good_log_buf = obj_log_buf + log_buf_sz;
+ bad_log_buf = obj_log_buf + 2 * log_buf_sz;
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 4; /* for BTF this will turn into 1 */
+
+ /* In the first round every prog has its own log_buf, so libbpf logs
+ * don't have program failure logs
+ */
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set very verbose level for good_prog so we always get detailed logs */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 2);
+
+ bpf_program__set_log_buf(skel->progs.bad_prog, bad_log_buf, log_buf_sz);
+ /* log_level 0 with custom log_buf means that verbose logs are not
+ * requested if program load is successful, but libbpf should retry
+ * with log_level 1 on error and put program's verbose load log into
+ * custom log_buf
+ */
+ bpf_program__set_log_level(skel->progs.bad_prog, 0);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* there should be no prog loading log because we specified per-prog log buf */
+ ASSERT_NULL(strstr(libbpf_log_buf, "-- BEGIN PROG LOAD LOG --"), "unexp_libbpf_log");
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
+ "libbpf_log_not_empty");
+ ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
+ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
+ "good_log_verbose");
+ ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
+ "bad_log_not_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+ /* reset everything */
+ test_log_buf__destroy(skel);
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+ libbpf_log_buf[0] = '\0';
+ libbpf_log_pos = 0;
+ libbpf_log_error = false;
+
+ /* In the second round we let bad_prog's failure be logged through print callback */
+ opts.kernel_log_buf = NULL; /* let everything through into print callback */
+ opts.kernel_log_size = 0;
+ opts.kernel_log_level = 1;
+
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set normal verbose level for good_prog to check log_level is taken into account */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 1);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* this time prog loading error should be logged through print callback */
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "libbpf: prog 'bad_prog': -- BEGIN PROG LOAD LOG --"),
+ "libbpf_log_correct");
+ ASSERT_STREQ(obj_log_buf, "", "obj_log__empty");
+ ASSERT_STREQ(good_log_buf, "processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "good_log_ok");
+ ASSERT_STREQ(bad_log_buf, "", "bad_log_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+cleanup:
+ free(obj_log_buf);
+ test_log_buf__destroy(skel);
+ libbpf_set_print(old_print_cb);
+}
+
+static void bpf_prog_load_log_buf(void)
+{
+ const struct bpf_insn good_prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t good_prog_insn_cnt = ARRAY_SIZE(good_prog_insns);
+ const struct bpf_insn bad_prog_insns[] = {
+ BPF_EXIT_INSN(),
+ };
+ size_t bad_prog_insn_cnt = ARRAY_SIZE(bad_prog_insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ char *log_buf;
+ int fd = -1;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ return;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf should stay empty for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+
+ /* log_level == 2 should always fill log_buf, even for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+
+ /* log_level == 0 should fill log_buf for bad prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "bad_prog", "GPL",
+ bad_prog_insns, bad_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "R0 !read_ok"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+
+ free(log_buf);
+}
+
+static void bpf_btf_load_log_buf(void)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ const void *raw_btf_data;
+ __u32 raw_btf_size;
+ struct btf *btf;
+ char *log_buf = NULL;
+ int fd = -1;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+ goto cleanup;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ goto cleanup;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf should stay empty for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 2 should always fill log_buf, even for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "magic: 0xeb9f"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* make BTF bad, add pointer pointing to non-existing type */
+ ASSERT_GT(btf__add_ptr(btf, 100), 0, "bad_ptr_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_bad"))
+ goto cleanup;
+
+ /* log_level == 0 should fill log_buf for bad BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "[2] PTR (anon) type_id=100 Invalid type_id"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+cleanup:
+ free(log_buf);
+ btf__free(btf);
+}
+
+void test_log_buf(void)
+{
+ if (test__start_subtest("obj_load_log_buf"))
+ obj_load_log_buf();
+ if (test__start_subtest("bpf_prog_load_log_buf"))
+ bpf_prog_load_log_buf();
+ if (test__start_subtest("bpf_btf_load_log_buf"))
+ bpf_btf_load_log_buf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
new file mode 100644
index 000000000000..90a98e23be61
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_fixup.skel.h"
+
+enum trunc_type {
+ TRUNC_NONE,
+ TRUNC_PARTIAL,
+ TRUNC_FULL,
+};
+
+static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo, true);
+ memset(log_buf, 0, sizeof(log_buf));
+ bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+ bpf_program__set_log_level(skel->progs.bad_relo, 1 | 8); /* BPF_LOG_FIXED to force truncation */
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ "0: <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_sz> ",
+ "log_buf_part1");
+
+ switch (trunc_type) {
+ case TRUNC_NONE:
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "log_buf_end");
+ break;
+ case TRUNC_PARTIAL:
+ /* we should get full libbpf message patch */
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ case TRUNC_FULL:
+ /* we shouldn't get second part of libbpf message patch */
+ ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"),
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ }
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void bad_core_relo_subprog(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo_subprog, true);
+ bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ ": <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_off> ",
+ "log_buf");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void missing_map(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_map__set_autocreate(skel->maps.missing_map, false);
+
+ bpf_program__set_autoload(skel->progs.use_missing_map, true);
+ bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate");
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ ": <invalid BPF map reference>\n"
+ "BPF map 'missing_map' is referenced but wasn't created\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void missing_kfunc(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.use_missing_kfunc, true);
+ bpf_program__set_log_buf(skel->progs.use_missing_kfunc, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ "0: <invalid kfunc call>\n"
+ "kfunc 'bpf_nonexistent_kfunc' is referenced but wasn't resolved\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+void test_log_fixup(void)
+{
+ if (test__start_subtest("bad_core_relo_trunc_none"))
+ bad_core_relo(0, TRUNC_NONE /* full buf */);
+ if (test__start_subtest("bad_core_relo_trunc_partial"))
+ bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+ if (test__start_subtest("bad_core_relo_trunc_full"))
+ bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */);
+ if (test__start_subtest("bad_core_relo_subprog"))
+ bad_core_relo_subprog();
+ if (test__start_subtest("missing_map"))
+ missing_map();
+ if (test__start_subtest("missing_kfunc"))
+ missing_kfunc();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..a767bb4a271c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+ __u64 key, value = START_VALUE;
+ int err;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+ __u64 key, value[nr_cpus];
+ int i, err;
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = START_VALUE;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+ int *map_fd)
+{
+ struct test_lookup_and_delete *skel;
+ int err;
+
+ skel = test_lookup_and_delete__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hash_map, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = test_lookup_and_delete__load(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+ goto cleanup;
+
+ *map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+ return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+ __u64 value)
+{
+ int err;
+
+ skel->bss->set_pid = getpid();
+ skel->bss->set_key = key;
+ skel->bss->set_value = value;
+
+ err = test_lookup_and_delete__attach(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+ return -1;
+
+ syscall(__NR_getpgid);
+
+ test_lookup_and_delete__detach(skel);
+
+ return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(val != START_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ }
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Value should match the new value. */
+ if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i, cpucnt = 0;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element 1. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Clean value. */
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = 0;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Check if only one CPU has set the value. */
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+ if (val) {
+ if (CHECK(val != NEW_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ cpucnt++;
+ }
+ }
+ if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ cpucnt))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+
+ if (test__start_subtest("lookup_and_delete"))
+ test_lookup_and_delete_hash();
+ if (test__start_subtest("lookup_and_delete_percpu"))
+ test_lookup_and_delete_percpu_hash();
+ if (test__start_subtest("lookup_and_delete_lru"))
+ test_lookup_and_delete_lru_hash();
+ if (test__start_subtest("lookup_and_delete_lru_percpu"))
+ test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_key.c b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
new file mode 100644
index 000000000000..68025e88f352
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <linux/keyctl.h>
+#include <test_progs.h>
+
+#include "test_lookup_key.skel.h"
+
+#define KEY_LOOKUP_CREATE 0x01
+#define KEY_LOOKUP_PARTIAL 0x02
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ char *func;
+
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ func = va_arg(args, char *);
+
+ if (strcmp(func, "bpf_lookup_user_key") && strcmp(func, "bpf_key_put") &&
+ strcmp(func, "bpf_lookup_system_key"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+void test_lookup_key(void)
+{
+ libbpf_print_fn_t old_print_cb;
+ struct test_lookup_key *skel;
+ __u32 next_id;
+ int ret;
+
+ skel = test_lookup_key__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_key__open"))
+ return;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ ret = test_lookup_key__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (ret < 0 && kfunc_not_supported) {
+ printf("%s:SKIP:bpf_lookup_*_key(), bpf_key_put() kfuncs not supported\n",
+ __func__);
+ test__skip();
+ goto close_prog;
+ }
+
+ if (!ASSERT_OK(ret, "test_lookup_key__load"))
+ goto close_prog;
+
+ ret = test_lookup_key__attach(skel);
+ if (!ASSERT_OK(ret, "test_lookup_key__attach"))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+ skel->bss->key_serial = KEY_SPEC_THREAD_KEYRING;
+
+ /* The thread-specific keyring does not exist, this test fails. */
+ skel->bss->flags = 0;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Force creation of the thread-specific keyring, this test succeeds. */
+ skel->bss->flags = KEY_LOOKUP_CREATE;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Pass both lookup flags for parameter validation. */
+ skel->bss->flags = KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Pass invalid flags. */
+ skel->bss->flags = UINT64_MAX;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ skel->bss->key_serial = 0;
+ skel->bss->key_id = 1;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ skel->bss->key_id = UINT32_MAX;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ ASSERT_LT(ret, 0, "bpf_prog_get_next_id");
+
+close_prog:
+ skel->bss->monitored_pid = 0;
+ test_lookup_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lru_bug.c b/tools/testing/selftests/bpf/prog_tests/lru_bug.c
new file mode 100644
index 000000000000..3c7822390827
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lru_bug.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "lru_bug.skel.h"
+
+void test_lru_bug(void)
+{
+ struct lru_bug *skel;
+ int ret;
+
+ skel = lru_bug__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lru_bug__open_and_load"))
+ return;
+ ret = lru_bug__attach(skel);
+ if (!ASSERT_OK(ret, "lru_bug__attach"))
+ goto end;
+ usleep(1);
+ ASSERT_OK(skel->data->result, "prealloc_lru_pop doesn't call check_and_init_map_value");
+end:
+ lru_bug__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644
index 000000000000..6df25de8f080
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "lsm_cgroup.skel.h"
+#include "lsm_cgroup_nonvoid.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static struct btf *btf;
+
+static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, p);
+ int cnt = 0;
+ int i;
+
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ if (!attach_func)
+ return p.prog_cnt;
+
+ /* When attach_func is provided, count the number of progs that
+ * attach to the given symbol.
+ */
+
+ if (!btf)
+ btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux"))
+ return -1;
+
+ p.prog_ids = malloc(sizeof(u32) * p.prog_cnt);
+ p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt);
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ for (i = 0; i < p.prog_cnt; i++) {
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int fd;
+
+ fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
+ ASSERT_GE(fd, 0, "prog_get_fd_by_id");
+ ASSERT_OK(bpf_prog_get_info_by_fd(fd, &info, &info_len),
+ "prog_info_by_fd");
+ close(fd);
+
+ if (info.attach_btf_id ==
+ btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC))
+ cnt++;
+ }
+
+ free(p.prog_ids);
+ free(p.prog_attach_flags);
+
+ return cnt;
+}
+
+static void test_lsm_cgroup_functional(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1;
+ int listen_fd, client_fd, accepted_fd;
+ struct lsm_cgroup *skel = NULL;
+ int post_create_prog_fd2 = -1;
+ int post_create_prog_fd = -1;
+ int bind_link_fd2 = -1;
+ int bind_prog_fd2 = -1;
+ int alloc_prog_fd = -1;
+ int bind_prog_fd = -1;
+ int bind_link_fd = -1;
+ int clone_prog_fd = -1;
+ int err, fd, prio;
+ socklen_t socklen;
+
+ cgroup_fd3 = test__join_cgroup("/sock_policy_empty");
+ if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup"))
+ goto close_cgroup;
+
+ cgroup_fd2 = test__join_cgroup("/sock_policy_reuse");
+ if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse"))
+ goto close_cgroup;
+
+ cgroup_fd = test__join_cgroup("/sock_policy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto close_cgroup;
+
+ skel = lsm_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto close_cgroup;
+
+ post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+ bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+ bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+ alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+ clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count");
+ err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (err == -ENOTSUPP) {
+ test__skip();
+ goto close_cgroup;
+ }
+ if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count");
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count");
+ err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach clone_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count");
+
+ /* Make sure replacing works. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count");
+ err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ attach_opts.replace_prog_fd = post_create_prog_fd;
+ err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP, &attach_opts);
+ if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ /* Try the same attach/replace via link API. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ update_opts.old_prog_fd = bind_prog_fd;
+ update_opts.flags = BPF_F_REPLACE;
+
+ err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+ if (!ASSERT_OK(err, "link update bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ /* Attach another instance of bind program to another cgroup.
+ * This should trigger the reuse of the trampoline shim (two
+ * programs attaching to the same btf_id).
+ */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK))
+ /* AF_UNIX is prohibited. */
+ ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+ close(fd);
+
+ /* AF_INET6 gets default policy (sk_priority). */
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 123, "sk_priority");
+
+ close(fd);
+
+ /* TX-only AF_PACKET is allowed. */
+
+ ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+ "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+ fd = socket(AF_PACKET, SOCK_RAW, 0);
+ ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+ /* TX-only AF_PACKET can not be rebound. */
+
+ struct sockaddr_ll sa = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_ALL),
+ };
+ ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+ "bind(ETH_P_ALL)");
+
+ close(fd);
+
+ /* Trigger passive open. */
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ ASSERT_GE(listen_fd, 0, "start_server");
+ client_fd = connect_to_fd(listen_fd, 0);
+ ASSERT_GE(client_fd, 0, "connect_to_fd");
+ accepted_fd = accept(listen_fd, NULL, NULL);
+ ASSERT_GE(accepted_fd, 0, "accept");
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 234, "sk_priority");
+
+ /* These are replaced and never called. */
+ ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+ ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+ /* AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW
+ * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ if (skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK)
+ /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2");
+ else
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+ /* start_server
+ * bind(ETH_P_ALL)
+ */
+ ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+ /* Single accept(). */
+ ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+ /* AF_UNIX+SOCK_STREAM (failed)
+ * AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW (failed)
+ * AF_PACKET+SOCK_RAW
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+ close(listen_fd);
+ close(client_fd);
+ close(accepted_fd);
+
+ /* Make sure other cgroup doesn't trigger the programs. */
+
+ if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup"))
+ goto detach_cgroup;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 0, "sk_priority");
+
+ close(fd);
+
+detach_cgroup:
+ ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_create");
+ close(bind_link_fd);
+ /* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+ ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_alloc");
+ ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+ close(cgroup_fd);
+ close(cgroup_fd2);
+ close(cgroup_fd3);
+ lsm_cgroup__destroy(skel);
+}
+
+static void test_lsm_cgroup_nonvoid(void)
+{
+ struct lsm_cgroup_nonvoid *skel = NULL;
+
+ skel = lsm_cgroup_nonvoid__open_and_load();
+ ASSERT_NULL(skel, "open succeeds");
+ lsm_cgroup_nonvoid__destroy(skel);
+}
+
+void test_lsm_cgroup(void)
+{
+ if (test__start_subtest("functional"))
+ test_lsm_cgroup_functional();
+ if (test__start_subtest("nonvoid"))
+ test_lsm_cgroup_nonvoid();
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
new file mode 100644
index 000000000000..ccec0fcdabc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LWT_HELPERS_H
+#define __LWT_HELPERS_H
+
+#include <time.h>
+#include <net/if.h>
+#include <linux/icmp.h>
+
+#include "test_progs.h"
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+#define RUN_TEST(name) \
+ ({ \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_create(), "netns_create")) { \
+ struct nstoken *token = open_netns(NETNS); \
+ if (ASSERT_OK_PTR(token, "setns")) { \
+ test_ ## name(); \
+ close_netns(token); \
+ } \
+ netns_delete(); \
+ } \
+ })
+
+static inline int netns_create(void)
+{
+ return system("ip netns add " NETNS);
+}
+
+static inline int netns_delete(void)
+{
+ return system("ip netns del " NETNS ">/dev/null 2>&1");
+}
+
+#define ICMP_PAYLOAD_SIZE 100
+
+/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
+static int __expect_icmp_ipv4(char *buf, ssize_t len)
+{
+ struct iphdr *ip = (struct iphdr *)buf;
+ struct icmphdr *icmp = (struct icmphdr *)(ip + 1);
+ ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp);
+
+ if (len < min_header_len)
+ return -1;
+
+ if (ip->protocol != IPPROTO_ICMP)
+ return -1;
+
+ if (icmp->type != ICMP_ECHO)
+ return -1;
+
+ return len == ICMP_PAYLOAD_SIZE + min_header_len;
+}
+
+typedef int (*filter_t) (char *, ssize_t);
+
+/* wait_for_packet - wait for a packet that matches the filter
+ *
+ * @fd: tun fd/packet socket to read packet
+ * @filter: filter function, returning 1 if matches
+ * @timeout: timeout to wait for the packet
+ *
+ * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error.
+ */
+static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout)
+{
+ char buf[4096];
+ int max_retry = 5; /* in case we read some spurious packets */
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ while (max_retry--) {
+ /* Linux modifies timeout arg... So make a copy */
+ struct timeval copied_timeout = *timeout;
+ ssize_t ret = -1;
+
+ FD_SET(fd, &fds);
+
+ ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout);
+ if (ret <= 0) {
+ if (errno == EINTR)
+ continue;
+ else if (errno == EAGAIN || ret == 0)
+ return 0;
+
+ log_err("select failed");
+ return -1;
+ }
+
+ ret = read(fd, buf, sizeof(buf));
+
+ if (ret <= 0) {
+ log_err("read(dev): %ld", ret);
+ return -1;
+ }
+
+ if (filter && filter(buf, ret) > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif /* __LWT_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
new file mode 100644
index 000000000000..b6391af5f6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <netinet/in.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define BPF_FILE "test_lwt_ip_encap.bpf.o"
+
+#define NETNS_NAME_SIZE 32
+#define NETNS_BASE "ns-lwt-ip-encap"
+
+#define IP4_ADDR_1 "172.16.1.100"
+#define IP4_ADDR_2 "172.16.2.100"
+#define IP4_ADDR_3 "172.16.3.100"
+#define IP4_ADDR_4 "172.16.4.100"
+#define IP4_ADDR_5 "172.16.5.100"
+#define IP4_ADDR_6 "172.16.6.100"
+#define IP4_ADDR_7 "172.16.7.100"
+#define IP4_ADDR_8 "172.16.8.100"
+#define IP4_ADDR_GRE "172.16.16.100"
+
+#define IP4_ADDR_SRC IP4_ADDR_1
+#define IP4_ADDR_DST IP4_ADDR_4
+
+#define IP6_ADDR_1 "fb01::1"
+#define IP6_ADDR_2 "fb02::1"
+#define IP6_ADDR_3 "fb03::1"
+#define IP6_ADDR_4 "fb04::1"
+#define IP6_ADDR_5 "fb05::1"
+#define IP6_ADDR_6 "fb06::1"
+#define IP6_ADDR_7 "fb07::1"
+#define IP6_ADDR_8 "fb08::1"
+#define IP6_ADDR_GRE "fb10::1"
+
+#define IP6_ADDR_SRC IP6_ADDR_1
+#define IP6_ADDR_DST IP6_ADDR_4
+
+/* Setup/topology:
+ *
+ * NS1 NS2 NS3
+ * veth1 <---> veth2 veth3 <---> veth4 (the top route)
+ * veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
+ *
+ * Each vethN gets IP[4|6]_ADDR_N address.
+ *
+ * IP*_ADDR_SRC = IP*_ADDR_1
+ * IP*_ADDR_DST = IP*_ADDR_4
+ *
+ * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST.
+ *
+ * By default, routes are configured to allow packets to go
+ * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route).
+ *
+ * A GRE device is installed in NS3 with IP*_ADDR_GRE, and
+ * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8
+ * (the bottom route).
+ *
+ * Tests:
+ *
+ * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping
+ * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE.
+ *
+ * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ *
+ * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ */
+
+static int create_ns(char *name, size_t name_sz)
+{
+ if (!name)
+ goto fail;
+
+ if (!ASSERT_OK(append_tid(name, name_sz), "append TID"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", name);
+
+ /* rp_filter gets confused by what these tests are doing, so disable it */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name);
+ /* Disable IPv6 DAD because it sometimes takes too long and fails tests */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_top_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1);
+ SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2);
+ SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3);
+ SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4);
+
+ SYS(fail, "ip -n %s link set dev veth1 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth2 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth3 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth4 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5);
+ SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6);
+ SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7);
+ SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8);
+
+ SYS(fail, "ip -n %s link set dev veth5 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth6 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth7 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth8 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int configure_vrf(const char *ns1, const char *ns2)
+{
+ if (!ns1 || !ns2)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns1);
+ SYS(fail, "ip -n %s link set red up", ns1);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s link set veth1 vrf red", ns1);
+ SYS(fail, "ip -n %s link set veth5 vrf red", ns1);
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns2);
+ SYS(fail, "ip -n %s link set red up", ns2);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s link set veth2 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth3 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth6 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth7 vrf red", ns2);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_ns1(const char *ns1, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns1 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns2(const char *ns2, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns2 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns2);
+ if (!ASSERT_OK_PTR(nstoken, "open ns2"))
+ goto fail;
+
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2);
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf);
+ SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf);
+ SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns3(const char *ns3)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns3)
+ goto fail;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7);
+
+ /* Configure IPv4 GRE device */
+ SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255",
+ IP4_ADDR_1, IP4_ADDR_GRE);
+ SYS(fail, "ip link set gre_dev up");
+ SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE);
+
+ /* Configure IPv6 GRE device */
+ SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255",
+ IP6_ADDR_1, IP6_ADDR_GRE);
+ SYS(fail, "ip link set gre6_dev up");
+ SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf)
+{
+ if (!ns1 || !ns2 || !ns3 || !vrf)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3);
+ SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3);
+
+ if (vrf[0]) {
+ if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf"))
+ goto fail;
+ }
+ if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes"))
+ goto fail;
+
+ /* Link bottom route to the GRE tunnels */
+ SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s",
+ ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s",
+ ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s",
+ ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s",
+ ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define GSO_SIZE 5000
+#define GSO_TCP_PORT 9000
+/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */
+static int test_gso_fix(const char *ns1, const char *ns3, int family)
+{
+ const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST;
+ char gso_packet[GSO_SIZE] = {};
+ struct nstoken *nstoken = NULL;
+ int sfd, cfd, afd;
+ ssize_t bytes;
+ int ret = -1;
+
+ if (!ns1 || !ns3)
+ return ret;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ return ret;
+
+ sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "connect to server"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ afd = accept(sfd, NULL, NULL);
+ if (!ASSERT_OK_FD(afd, "accept"))
+ goto close_client;
+
+ /* Send a packet larger than MTU */
+ bytes = send(cfd, gso_packet, GSO_SIZE, 0);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet"))
+ goto close_accept;
+
+ /* Verify we received all expected bytes */
+ bytes = read(afd, gso_packet, GSO_SIZE);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet"))
+ goto close_accept;
+
+ ret = 0;
+
+close_accept:
+ close(afd);
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+ return ret;
+}
+
+static int check_ping_ok(const char *ns1)
+{
+ SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST);
+ SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST);
+ return 0;
+fail:
+ return -1;
+}
+
+static int check_ping_fails(const char *ns1)
+{
+ int ret;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ return 0;
+}
+
+#define EGRESS true
+#define INGRESS false
+#define IPV4_ENCAP true
+#define IPV6_ENCAP false
+static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf)
+{
+ char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-";
+ char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-";
+ char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-";
+ char *sec = ipv4_encap ? "encap_gre" : "encap_gre6";
+
+ if (!vrf)
+ return;
+
+ if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3"))
+ goto out;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network"))
+ goto out;
+
+ /* By default, pings work */
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Remove NS2->DST routes, ping fails */
+ SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf);
+ SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf);
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Install replacement routes (LWT/eBPF), pings succeed */
+ if (egress) {
+ SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ } else {
+ SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ }
+
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Skip GSO tests with VRF: VRF routing needs properly assigned
+ * source IP/device, which is easy to do with ping but hard with TCP.
+ */
+ if (egress && !vrf[0]) {
+ if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO"))
+ goto out;
+ }
+
+ /* Negative test: remove routes to GRE devices: ping fails */
+ if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev"))
+ goto out;
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Another negative test */
+ if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf),
+ "add unreachable routes"))
+ goto out;
+ ASSERT_OK(check_ping_fails(ns1), "ping expected fail");
+
+out:
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+ SYS_NOFAIL("ip netns del %s", ns3);
+}
+
+void test_lwt_ip_encap_vrf_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_vrf_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "");
+}
+
+void test_lwt_ip_encap_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
new file mode 100644
index 000000000000..b6e8d822e8e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Test suite of lwt_xmit BPF programs that redirect packets
+ * The file tests focus not only if these programs work as expected normally,
+ * but also if they can handle abnormal situations gracefully.
+ *
+ * WARNING
+ * -------
+ * This test suite may crash the kernel, thus should be run in a VM.
+ *
+ * Setup:
+ * ---------
+ * All tests are performed in a single netns. Two lwt encap routes are setup for
+ * each subtest:
+ *
+ * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err
+ * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err
+ *
+ * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section
+ * of this object holds a program entry to test. The BPF object is built from
+ * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the
+ * attachment for lwt programs are not supported by libbpf yet.
+ *
+ * For testing, ping commands are run in the test netns:
+ *
+ * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100
+ * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100
+ *
+ * Scenarios:
+ * --------------------------------
+ * 1. Redirect to a running tap/tun device
+ * 2. Redirect to a down tap/tun device
+ * 3. Redirect to a vlan device with lower layer down
+ *
+ * Case 1, ping packets should be received by packet socket on target device
+ * when redirected to ingress, and by tun/tap fd when redirected to egress.
+ *
+ * Case 2,3 are considered successful as long as they do not crash the kernel
+ * as a regression.
+ *
+ * Case 1,2 use tap device to test redirect to device that requires MAC
+ * header, and tun device to test the case with no MAC header added.
+ */
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#define NETNS "ns_lwt_redirect"
+#include "lwt_helpers.h"
+#include "test_progs.h"
+#include "network_helpers.h"
+
+#define BPF_OBJECT "test_lwt_redirect.bpf.o"
+#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac")
+#define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac")
+#define LOCAL_SRC "10.0.0.1"
+#define CIDR_TO_INGRESS "10.0.0.0/24"
+#define CIDR_TO_EGRESS "20.0.0.0/24"
+
+/* ping to redirect toward given dev, with last byte of dest IP being the target
+ * device index.
+ *
+ * Note: ping command inside BPF-CI is busybox version, so it does not have certain
+ * function, such like -m option to set packet mark.
+ */
+static void ping_dev(const char *dev, bool is_ingress)
+{
+ int link_index = if_nametoindex(dev);
+ char ip[256];
+
+ if (!ASSERT_GE(link_index, 0, "if_nametoindex"))
+ return;
+
+ if (is_ingress)
+ snprintf(ip, sizeof(ip), "10.0.0.%d", link_index);
+ else
+ snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
+
+ /* We won't get a reply. Don't fail here */
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
+ ip, ICMP_PAYLOAD_SIZE);
+}
+
+static int new_packet_sock(const char *ifname)
+{
+ int err = 0;
+ int ignore_outgoing = 1;
+ int ifindex = -1;
+ int s = -1;
+
+ s = socket(AF_PACKET, SOCK_RAW, 0);
+ if (!ASSERT_GE(s, 0, "socket(AF_PACKET)"))
+ return -1;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) {
+ close(s);
+ return -1;
+ }
+
+ struct sockaddr_ll addr = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_IP),
+ .sll_ifindex = ifindex,
+ };
+
+ err = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ if (!ASSERT_OK(err, "bind(AF_PACKET)")) {
+ close(s);
+ return -1;
+ }
+
+ /* Use packet socket to capture only the ingress, so we can distinguish
+ * the case where a regression that actually redirects the packet to
+ * the egress.
+ */
+ err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING,
+ &ignore_outgoing, sizeof(ignore_outgoing));
+ if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) {
+ close(s);
+ return -1;
+ }
+
+ err = fcntl(s, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(s);
+ return -1;
+ }
+
+ return s;
+}
+
+static int expect_icmp(char *buf, ssize_t len)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ if (len < (ssize_t)sizeof(*eth))
+ return -1;
+
+ if (eth->h_proto == htons(ETH_P_IP))
+ return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth));
+
+ return -1;
+}
+
+static int expect_icmp_nomac(char *buf, ssize_t len)
+{
+ return __expect_icmp_ipv4(buf, len);
+}
+
+static void send_and_capture_test_packets(const char *test_name, int tap_fd,
+ const char *target_dev, bool need_mac)
+{
+ int psock = -1;
+ struct timeval timeo = {
+ .tv_sec = 0,
+ .tv_usec = 250000,
+ };
+ int ret = -1;
+
+ filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac;
+
+ ping_dev(target_dev, false);
+
+ ret = wait_for_packet(tap_fd, filter, &timeo);
+ if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) {
+ log_err("%s egress test fails", test_name);
+ goto out;
+ }
+
+ psock = new_packet_sock(target_dev);
+ ping_dev(target_dev, true);
+
+ ret = wait_for_packet(psock, filter, &timeo);
+ if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) {
+ log_err("%s ingress test fails", test_name);
+ goto out;
+ }
+
+out:
+ if (psock >= 0)
+ close(psock);
+}
+
+static int setup_redirect_target(const char *target_dev, bool need_mac)
+{
+ int target_index = -1;
+ int tap_fd = -1;
+
+ tap_fd = open_tuntap(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto fail;
+
+ target_index = if_nametoindex(target_dev);
+ if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
+ goto fail;
+
+ SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
+ SYS(fail, "ip link add link_err type dummy");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
+ SYS(fail, "ip link set link_err up");
+ SYS(fail, "ip link set %s up", target_dev);
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
+ CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac));
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
+ CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac));
+
+ return tap_fd;
+
+fail:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ return -1;
+}
+
+static void test_lwt_redirect_normal(void)
+{
+ const char *target_dev = "tap0";
+ int tap_fd = -1;
+ bool need_mac = true;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
+ close(tap_fd);
+}
+
+static void test_lwt_redirect_normal_nomac(void)
+{
+ const char *target_dev = "tun0";
+ int tap_fd = -1;
+ bool need_mac = false;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
+ close(tap_fd);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+static void __test_lwt_redirect_dev_down(bool need_mac)
+{
+ const char *target_dev = "tap0";
+ int tap_fd = -1;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ SYS(out, "ip link set %s down", target_dev);
+ ping_dev(target_dev, true);
+ ping_dev(target_dev, false);
+
+out:
+ close(tap_fd);
+}
+
+static void test_lwt_redirect_dev_down(void)
+{
+ __test_lwt_redirect_dev_down(true);
+}
+
+static void test_lwt_redirect_dev_down_nomac(void)
+{
+ __test_lwt_redirect_dev_down(false);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+static void test_lwt_redirect_dev_carrier_down(void)
+{
+ const char *lower_dev = "tap0";
+ const char *vlan_dev = "vlan100";
+ int tap_fd = -1;
+
+ tap_fd = setup_redirect_target(lower_dev, true);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev);
+ SYS(out, "ip link set %s up", vlan_dev);
+ SYS(out, "ip link set %s down", lower_dev);
+ ping_dev(vlan_dev, true);
+ ping_dev(vlan_dev, false);
+
+out:
+ close(tap_fd);
+}
+
+static void *test_lwt_redirect_run(void *arg)
+{
+ netns_delete();
+ RUN_TEST(lwt_redirect_normal);
+ RUN_TEST(lwt_redirect_normal_nomac);
+ RUN_TEST(lwt_redirect_dev_down);
+ RUN_TEST(lwt_redirect_dev_down_nomac);
+ RUN_TEST(lwt_redirect_dev_carrier_down);
+ return NULL;
+}
+
+void test_lwt_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
new file mode 100644
index 000000000000..6c50c0f63f43
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Test suite of lwt BPF programs that reroutes packets
+ * The file tests focus not only if these programs work as expected normally,
+ * but also if they can handle abnormal situations gracefully. This test
+ * suite currently only covers lwt_xmit hook. lwt_in tests have not been
+ * implemented.
+ *
+ * WARNING
+ * -------
+ * This test suite can crash the kernel, thus should be run in a VM.
+ *
+ * Setup:
+ * ---------
+ * all tests are performed in a single netns. A lwt encap route is setup for
+ * each subtest:
+ *
+ * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
+ *
+ * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
+ * a single test program entry. This program sets packet mark by last byte of
+ * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
+ * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
+ * to avoid route loop. We didn't use generated BPF skeleton since the
+ * attachment for lwt programs are not supported by libbpf yet.
+ *
+ * The test program will bring up a tun device, and sets up the following
+ * routes:
+ *
+ * ip rule add pref 100 from all fwmark <tun_index> lookup 100
+ * ip route add table 100 default dev tun0
+ *
+ * For normal testing, a ping command is running in the test netns:
+ *
+ * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
+ *
+ * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
+ * socket will try to overflow the fq queue and trigger qdisc drop error.
+ *
+ * Scenarios:
+ * --------------------------------
+ * 1. Reroute to a running tun device
+ * 2. Reroute to a device where qdisc drop
+ *
+ * For case 1, ping packets should be received by the tun device.
+ *
+ * For case 2, force UDP packets to overflow fq limit. As long as kernel
+ * is not crashed, it is considered successful.
+ */
+#define NETNS "ns_lwt_reroute"
+#include <netinet/in.h>
+#include "lwt_helpers.h"
+#include "network_helpers.h"
+#include <linux/net_tstamp.h>
+
+#define BPF_OBJECT "test_lwt_reroute.bpf.o"
+#define LOCAL_SRC "10.0.0.1"
+#define TEST_CIDR "10.0.0.0/24"
+#define XMIT_HOOK "xmit"
+#define XMIT_SECTION "lwt_xmit"
+#define NSEC_PER_SEC 1000000000ULL
+
+/* send a ping to be rerouted to the target device */
+static void ping_once(const char *ip)
+{
+ /* We won't get a reply. Don't fail here */
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
+ ip, ICMP_PAYLOAD_SIZE);
+}
+
+/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
+ * error. This is done via TX tstamp to force buffering delayed packets.
+ */
+static int overflow_fq(int snd_target, const char *target_ip)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(1234),
+ };
+
+ char data_buf[8]; /* only #pkts matter, so use a random small buffer */
+ char control_buf[CMSG_SPACE(sizeof(uint64_t))];
+ struct iovec iov = {
+ .iov_base = data_buf,
+ .iov_len = sizeof(data_buf),
+ };
+ int err = -1;
+ int s = -1;
+ struct sock_txtime txtime_on = {
+ .clockid = CLOCK_MONOTONIC,
+ .flags = 0,
+ };
+ struct msghdr msg = {
+ .msg_name = &addr,
+ .msg_namelen = sizeof(addr),
+ .msg_control = control_buf,
+ .msg_controllen = sizeof(control_buf),
+ .msg_iovlen = 1,
+ .msg_iov = &iov,
+ };
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+
+ memset(data_buf, 0, sizeof(data_buf));
+
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto out;
+
+ err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
+ if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
+ goto out;
+
+ err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
+ if (!ASSERT_EQ(err, 1, "inet_pton"))
+ goto out;
+
+ while (snd_target > 0) {
+ struct timespec now;
+
+ memset(control_buf, 0, sizeof(control_buf));
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
+
+ err = clock_gettime(CLOCK_MONOTONIC, &now);
+ if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
+ err = -1;
+ goto out;
+ }
+
+ *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
+ now.tv_nsec;
+
+ /* we will intentionally send more than fq limit, so ignore
+ * the error here.
+ */
+ sendmsg(s, &msg, MSG_NOSIGNAL);
+ snd_target--;
+ }
+
+ /* no kernel crash so far is considered success */
+ err = 0;
+
+out:
+ if (s >= 0)
+ close(s);
+
+ return err;
+}
+
+static int setup(const char *tun_dev)
+{
+ int target_index = -1;
+ int tap_fd = -1;
+
+ tap_fd = open_tuntap(tun_dev, false);
+ if (!ASSERT_GE(tap_fd, 0, "open_tun"))
+ return -1;
+
+ target_index = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
+ return -1;
+
+ SYS(fail, "ip link add link_err type dummy");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
+ SYS(fail, "ip link set link_err up");
+ SYS(fail, "ip link set %s up", tun_dev);
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
+ TEST_CIDR, BPF_OBJECT);
+
+ SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
+ target_index);
+ SYS(fail, "ip route add t 100 default dev %s", tun_dev);
+
+ return tap_fd;
+
+fail:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ return -1;
+}
+
+static void test_lwt_reroute_normal_xmit(void)
+{
+ const char *tun_dev = "tun0";
+ int tun_fd = -1;
+ int ifindex = -1;
+ char ip[256];
+ struct timeval timeo = {
+ .tv_sec = 0,
+ .tv_usec = 250000,
+ };
+
+ tun_fd = setup(tun_dev);
+ if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
+ return;
+
+ ifindex = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
+ return;
+
+ snprintf(ip, 256, "10.0.0.%d", ifindex);
+
+ /* ping packets should be received by the tun device */
+ ping_once(ip);
+
+ if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
+ "wait_for_packet"))
+ log_err("%s xmit", __func__);
+}
+
+/*
+ * Test the failure case when the skb is dropped at the qdisc. This is a
+ * regression prevention at the xmit hook only.
+ */
+static void test_lwt_reroute_qdisc_dropped(void)
+{
+ const char *tun_dev = "tun0";
+ int tun_fd = -1;
+ int ifindex = -1;
+ char ip[256];
+
+ tun_fd = setup(tun_dev);
+ if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
+ goto fail;
+
+ SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
+
+ ifindex = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
+ return;
+
+ snprintf(ip, 256, "10.0.0.%d", ifindex);
+ ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
+
+fail:
+ if (tun_fd >= 0)
+ close(tun_fd);
+}
+
+static void *test_lwt_reroute_run(void *arg)
+{
+ netns_delete();
+ RUN_TEST(lwt_reroute_normal_xmit);
+ RUN_TEST(lwt_reroute_qdisc_dropped);
+ return NULL;
+}
+
+void test_lwt_reroute(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
new file mode 100644
index 000000000000..3bc730b7c7fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Connects 6 network namespaces through veths.
+ * Each NS may have different IPv6 global scope addresses :
+ *
+ * NS1 NS2 NS3 NS4 NS5 NS6
+ * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo
+ * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6
+ * fd00 ::4
+ * fc42 ::1
+ *
+ * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+ * IPv6 header with a Segment Routing Header, with segments :
+ * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+ *
+ * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+ * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+ * - fd00::2 : remove the TLV, change the flags, add a tag
+ * - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+ *
+ * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+ * Each End.BPF action will validate the operations applied on the SRH by the
+ * previous BPF program in the chain, otherwise the packet is dropped.
+ *
+ * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+ * datagram can be read on NS6 when binding to fb00::6.
+ */
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define NETNS_BASE "lwt-seg6local-"
+#define BPF_FILE "test_lwt_seg6local.bpf.o"
+
+static void cleanup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns);
+}
+
+static int setup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS(fail, "ip netns add %s%d", NETNS_BASE, ns);
+
+ SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++) {
+ int local_id = ns * 2 - 1;
+ int peer_id = ns * 2;
+ int next_ns = ns + 1;
+
+ SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d",
+ NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns);
+
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id);
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id);
+
+ /* All link scope addresses to veths */
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, ns, local_id, peer_id, local_id);
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, next_ns, peer_id, local_id, peer_id);
+ }
+
+
+ SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE);
+
+ SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail,
+ "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE);
+
+ SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++)
+ SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1",
+ NETNS_BASE, ns);
+
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define SERVER_PORT 7330
+#define CLIENT_PORT 2121
+void test_lwt_seg6local(void)
+{
+ struct sockaddr_in6 server_addr = {};
+ const char *ns1 = NETNS_BASE "1";
+ const char *ns6 = NETNS_BASE "6";
+ struct nstoken *nstoken = NULL;
+ const char *foobar = "foobar";
+ ssize_t bytes;
+ int sfd, cfd;
+ char buf[7];
+
+ if (!ASSERT_OK(setup(), "setup"))
+ goto out;
+
+ nstoken = open_netns(ns6);
+ if (!ASSERT_OK_PTR(nstoken, "open ns6"))
+ goto out;
+
+ sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "start client"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Send a packet larger than MTU */
+ server_addr.sin6_family = AF_INET6;
+ server_addr.sin6_port = htons(SERVER_PORT);
+ if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1,
+ "build target addr"))
+ goto close_client;
+
+ bytes = sendto(cfd, foobar, sizeof(foobar), 0,
+ (struct sockaddr *)&server_addr, sizeof(server_addr));
+ if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet"))
+ goto close_client;
+
+ /* Verify we received all expected bytes */
+ bytes = read(sfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet"))
+ goto close_client;
+ ASSERT_STREQ(buf, foobar, "check udp packet");
+
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+out:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c
new file mode 100644
index 000000000000..2c4ef6037573
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "normal_map_btf.skel.h"
+#include "map_in_map_btf.skel.h"
+
+static void do_test_normal_map_btf(void)
+{
+ struct normal_map_btf *skel;
+ int i, err, new_fd = -1;
+ int map_fd_arr[64];
+
+ skel = normal_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = normal_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Use percpu_array to slow bpf_map_free_deferred() down.
+ * The memory allocation may fail, so doesn't check the returned fd.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++)
+ map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL);
+
+ /* Close array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.array));
+out:
+ normal_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the release of the map
+ * btf which is held by other maps (e.g, bss). After that, array map
+ * holds the last reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(4000);
+ /* Spawn multiple kworkers to delay the invocation of
+ * bpf_map_free_deferred() for array map.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) {
+ if (map_fd_arr[i] < 0)
+ continue;
+ close(map_fd_arr[i]);
+ }
+ close(new_fd);
+}
+
+static void do_test_map_in_map_btf(void)
+{
+ int err, zero = 0, new_fd = -1;
+ struct map_in_map_btf *skel;
+
+ skel = map_in_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = map_in_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Close inner_array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.inner_array));
+ /* Defer the free of inner_array */
+ err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0);
+ ASSERT_OK(err, "delete inner map");
+out:
+ map_in_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the free of the outer
+ * map and the release of map btf. After that, inner map holds the last
+ * reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(10000);
+ close(new_fd);
+}
+
+void test_map_btf(void)
+{
+ if (test__start_subtest("array_btf"))
+ do_test_normal_map_btf();
+ if (test__start_subtest("inner_array_btf"))
+ do_test_map_in_map_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, true);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+ err = map_excl__load(skel);
+ ASSERT_OK(err, "map_excl__load");
+out:
+ map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, false);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+ err = map_excl__load(skel);
+ ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+ map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+ if (test__start_subtest("map_excl_allowed"))
+ test_map_excl_allowed();
+ if (test__start_subtest("map_excl_denied"))
+ test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
new file mode 100644
index 000000000000..286a9fb469e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "access_map_in_map.skel.h"
+#include "update_map_in_htab.skel.h"
+
+struct thread_ctx {
+ pthread_barrier_t barrier;
+ int outer_map_fd;
+ int start, abort;
+ int loop, err;
+};
+
+static int wait_for_start_or_abort(struct thread_ctx *ctx)
+{
+ while (!ctx->start && !ctx->abort)
+ usleep(1);
+ return ctx->abort ? -1 : 0;
+}
+
+static void *update_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop, err = 0;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ int fd, zero = 0;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (fd < 0) {
+ err |= 1;
+ pthread_barrier_wait(&ctx->barrier);
+ continue;
+ }
+
+ /* Remove the old inner map */
+ if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
+ err |= 2;
+ close(fd);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ ctx->err = err;
+
+ return NULL;
+}
+
+static void *access_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ /* Access the old inner map */
+ syscall(SYS_getpgid);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ return NULL;
+}
+
+static void test_map_in_map_access(const char *prog_name, const char *map_name)
+{
+ struct access_map_in_map *skel;
+ struct bpf_map *outer_map;
+ struct bpf_program *prog;
+ struct thread_ctx ctx;
+ pthread_t tid[2];
+ int err;
+
+ skel = access_map_in_map__open();
+ if (!ASSERT_OK_PTR(skel, "access_map_in_map open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "find program"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
+ outer_map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(outer_map, "find map"))
+ goto out;
+
+ err = access_map_in_map__load(skel);
+ if (!ASSERT_OK(err, "access_map_in_map load"))
+ goto out;
+
+ err = access_map_in_map__attach(skel);
+ if (!ASSERT_OK(err, "access_map_in_map attach"))
+ goto out;
+
+ skel->bss->tgid = getpid();
+
+ memset(&ctx, 0, sizeof(ctx));
+ pthread_barrier_init(&ctx.barrier, NULL, 2);
+ ctx.outer_map_fd = bpf_map__fd(outer_map);
+ ctx.loop = 4;
+
+ err = pthread_create(&tid[0], NULL, update_map_fn, &ctx);
+ if (!ASSERT_OK(err, "close_thread"))
+ goto out;
+
+ err = pthread_create(&tid[1], NULL, access_map_fn, &ctx);
+ if (!ASSERT_OK(err, "read_thread")) {
+ ctx.abort = 1;
+ pthread_join(tid[0], NULL);
+ goto out;
+ }
+
+ ctx.start = 1;
+ pthread_join(tid[0], NULL);
+ pthread_join(tid[1], NULL);
+
+ ASSERT_OK(ctx.err, "err");
+out:
+ access_map_in_map__destroy(skel);
+}
+
+static void add_del_fd_htab(int outer_fd)
+{
+ int inner_fd, err;
+ int key = 1;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner1"))
+ return;
+ err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "add"))
+ return;
+
+ /* Delete */
+ err = bpf_map_delete_elem(outer_fd, &key);
+ ASSERT_OK(err, "del");
+}
+
+static void overwrite_fd_htab(int outer_fd)
+{
+ int inner_fd, err;
+ int key = 1;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner1"))
+ return;
+ err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "add"))
+ return;
+
+ /* Overwrite */
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner2"))
+ goto out;
+ err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_EXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "overwrite"))
+ goto out;
+
+ err = bpf_map_delete_elem(outer_fd, &key);
+ ASSERT_OK(err, "del");
+ return;
+out:
+ bpf_map_delete_elem(outer_fd, &key);
+}
+
+static void lookup_delete_fd_htab(int outer_fd)
+{
+ int key = 1, value;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner1"))
+ return;
+ err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "add"))
+ return;
+
+ /* lookup_and_delete is not supported for htab of maps */
+ err = bpf_map_lookup_and_delete_elem(outer_fd, &key, &value);
+ ASSERT_EQ(err, -ENOTSUPP, "lookup_del");
+
+ err = bpf_map_delete_elem(outer_fd, &key);
+ ASSERT_OK(err, "del");
+}
+
+static void batched_lookup_delete_fd_htab(int outer_fd)
+{
+ int keys[2] = {1, 2}, values[2];
+ unsigned int cnt, batch;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner1"))
+ return;
+
+ err = bpf_map_update_elem(outer_fd, &keys[0], &inner_fd, BPF_NOEXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "add1"))
+ return;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "inner2"))
+ goto out;
+ err = bpf_map_update_elem(outer_fd, &keys[1], &inner_fd, BPF_NOEXIST);
+ close(inner_fd);
+ if (!ASSERT_OK(err, "add2"))
+ goto out;
+
+ /* batched lookup_and_delete */
+ cnt = ARRAY_SIZE(keys);
+ err = bpf_map_lookup_and_delete_batch(outer_fd, NULL, &batch, keys, values, &cnt, NULL);
+ ASSERT_TRUE((!err || err == -ENOENT), "delete_batch ret");
+ ASSERT_EQ(cnt, ARRAY_SIZE(keys), "delete_batch cnt");
+
+out:
+ bpf_map_delete_elem(outer_fd, &keys[0]);
+}
+
+static void test_update_map_in_htab(bool preallocate)
+{
+ struct update_map_in_htab *skel;
+ int err, fd;
+
+ skel = update_map_in_htab__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ err = update_map_in_htab__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto out;
+
+ fd = preallocate ? bpf_map__fd(skel->maps.outer_htab_map) :
+ bpf_map__fd(skel->maps.outer_alloc_htab_map);
+
+ add_del_fd_htab(fd);
+ overwrite_fd_htab(fd);
+ lookup_delete_fd_htab(fd);
+ batched_lookup_delete_fd_htab(fd);
+out:
+ update_map_in_htab__destroy(skel);
+}
+
+void test_map_in_map(void)
+{
+ if (test__start_subtest("acc_map_in_array"))
+ test_map_in_map_access("access_map_in_array", "outer_array_map");
+ if (test__start_subtest("sleepable_acc_map_in_array"))
+ test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map");
+ if (test__start_subtest("acc_map_in_htab"))
+ test_map_in_map_access("access_map_in_htab", "outer_htab_map");
+ if (test__start_subtest("sleepable_acc_map_in_htab"))
+ test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
+ if (test__start_subtest("update_map_in_htab"))
+ test_update_map_in_htab(true);
+ if (test__start_subtest("update_map_in_alloc_htab"))
+ test_update_map_in_htab(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
new file mode 100644
index 000000000000..8743df599567
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_kptr.skel.h"
+#include "map_kptr_fail.skel.h"
+#include "rcu_tasks_trace_gp.skel.h"
+
+static void test_map_kptr_success(bool test_run)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ int key = 0, ret, cpu;
+ struct map_kptr *skel;
+ char buf[16], *pbuf;
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref1 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval");
+
+ if (test_run)
+ goto exit;
+
+ cpu = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus"))
+ goto exit;
+
+ pbuf = calloc(cpu, sizeof(buf));
+ if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)"))
+ goto exit;
+
+ ret = bpf_map__update_elem(skel->maps.array_map,
+ &key, sizeof(key), buf, sizeof(buf), 0);
+ ASSERT_OK(ret, "array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__update_elem(skel->maps.pcpu_array_map,
+ &key, sizeof(key), pbuf, cpu * sizeof(buf), 0);
+ ASSERT_OK(ret, "pcpu_array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete");
+ skel->data->ref--;
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval");
+
+ free(pbuf);
+exit:
+ map_kptr__destroy(skel);
+}
+
+static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
+{
+ long gp_seq = READ_ONCE(rcu->bss->gp_seq);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace),
+ &opts), "do_call_rcu_tasks_trace"))
+ return -EFAULT;
+ if (!ASSERT_OK(opts.retval, "opts.retval == 0"))
+ return -EFAULT;
+ while (gp_seq == READ_ONCE(rcu->bss->gp_seq))
+ sched_yield();
+ return 0;
+}
+
+void serial_test_map_kptr(void)
+{
+ struct rcu_tasks_trace_gp *skel;
+
+ RUN_TESTS(map_kptr_fail);
+
+ skel = rcu_tasks_trace_gp__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
+ return;
+ if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach"))
+ goto end;
+
+ if (test__start_subtest("success-map")) {
+ test_map_kptr_success(true);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on bpf_map_free_deferred */
+ test_map_kptr_success(false);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on synchronous delete elem */
+ test_map_kptr_success(true);
+ }
+
+end:
+ rcu_tasks_trace_gp__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ce17b1ed8709..1d6726f01dd2 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -4,14 +4,17 @@
static void *spin_lock_thread(void *arg)
{
- __u32 duration, retval;
int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
pthread_exit(arg);
}
@@ -46,16 +49,16 @@ out:
void test_map_lock(void)
{
- const char *file = "./test_map_lock.o";
+ const char *file = "./test_map_lock.bpf.o";
int prog_fd, map_fd[2], vars[17] = {};
pthread_t thread_id[6];
struct bpf_object *obj = NULL;
int err = 0, key = 0, i;
void *ret;
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK_FAIL(err)) {
- printf("test_map_lock:bpf_prog_load errno %d\n", errno);
+ printf("test_map_lock:bpf_prog_test_load errno %d\n", errno);
goto close_prog;
}
map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..bfb1bf3fd427
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <test_progs.h>
+#include "test_map_lookup_percpu_elem.skel.h"
+
+void test_map_lookup_percpu_elem(void)
+{
+ struct test_map_lookup_percpu_elem *skel;
+ __u64 key = 0, sum;
+ int ret, i, nr_cpus = libbpf_num_possible_cpus();
+ __u64 *buf;
+
+ buf = malloc(nr_cpus*sizeof(__u64));
+ if (!ASSERT_OK_PTR(buf, "malloc"))
+ return;
+
+ for (i = 0; i < nr_cpus; i++)
+ buf[i] = i;
+ sum = (nr_cpus - 1) * nr_cpus / 2;
+
+ skel = test_map_lookup_percpu_elem__open();
+ if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open"))
+ goto exit;
+
+ skel->rodata->my_pid = getpid();
+ skel->rodata->nr_cpus = nr_cpus;
+
+ ret = test_map_lookup_percpu_elem__load(skel);
+ if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load"))
+ goto cleanup;
+
+ ret = test_map_lookup_percpu_elem__attach(skel);
+ if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach"))
+ goto cleanup;
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_array_map update");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_hash_map update");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_lru_hash_map update");
+
+ syscall(__NR_getuid);
+
+ test_map_lookup_percpu_elem__detach(skel);
+
+ ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem");
+ ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem");
+ ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem");
+
+cleanup:
+ test_map_lookup_percpu_elem__destroy(skel);
+exit:
+ free(buf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ops.c b/tools/testing/selftests/bpf/prog_tests/map_ops.c
new file mode 100644
index 000000000000..be5e42a413b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ops.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "test_map_ops.skel.h"
+#include "test_progs.h"
+
+static void map_update(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void map_delete(void)
+{
+ (void)syscall(__NR_getppid);
+}
+
+static void map_push(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void map_pop(void)
+{
+ (void)syscall(__NR_geteuid);
+}
+
+static void map_peek(void)
+{
+ (void)syscall(__NR_getgid);
+}
+
+static void map_for_each_pass(void)
+{
+ (void)syscall(__NR_gettid);
+}
+
+static void map_for_each_fail(void)
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static int setup(struct test_map_ops **skel)
+{
+ int err = 0;
+
+ if (!skel)
+ return -1;
+
+ *skel = test_map_ops__open();
+ if (!ASSERT_OK_PTR(*skel, "test_map_ops__open"))
+ return -1;
+
+ (*skel)->rodata->pid = getpid();
+
+ err = test_map_ops__load(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__load"))
+ return err;
+
+ err = test_map_ops__attach(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__attach"))
+ return err;
+
+ return err;
+}
+
+static void teardown(struct test_map_ops **skel)
+{
+ if (skel && *skel)
+ test_map_ops__destroy(*skel);
+}
+
+static void map_ops_update_delete_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_update();
+ ASSERT_OK(skel->bss->err, "map_update_initial");
+
+ map_update();
+ ASSERT_LT(skel->bss->err, 0, "map_update_existing");
+ ASSERT_EQ(skel->bss->err, -EEXIST, "map_update_existing");
+
+ map_delete();
+ ASSERT_OK(skel->bss->err, "map_delete_existing");
+
+ map_delete();
+ ASSERT_LT(skel->bss->err, 0, "map_delete_non_existing");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_delete_non_existing");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_push_peek_pop_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_push();
+ ASSERT_OK(skel->bss->err, "map_push_initial");
+
+ map_push();
+ ASSERT_LT(skel->bss->err, 0, "map_push_when_full");
+ ASSERT_EQ(skel->bss->err, -E2BIG, "map_push_when_full");
+
+ map_peek();
+ ASSERT_OK(skel->bss->err, "map_peek");
+
+ map_pop();
+ ASSERT_OK(skel->bss->err, "map_pop");
+
+ map_peek();
+ ASSERT_LT(skel->bss->err, 0, "map_peek_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_peek_when_empty");
+
+ map_pop();
+ ASSERT_LT(skel->bss->err, 0, "map_pop_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_pop_when_empty");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_for_each_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_for_each_pass();
+ /* expect to iterate over 1 element */
+ ASSERT_EQ(skel->bss->err, 1, "map_for_each_no_flags");
+
+ map_for_each_fail();
+ ASSERT_LT(skel->bss->err, 0, "map_for_each_with_flags");
+ ASSERT_EQ(skel->bss->err, -EINVAL, "map_for_each_with_flags");
+
+teardown:
+ teardown(&skel);
+}
+
+void test_map_ops(void)
+{
+ if (test__start_subtest("map_ops_update_delete"))
+ map_ops_update_delete_subtest();
+
+ if (test__start_subtest("map_ops_push_peek_pop"))
+ map_ops_push_peek_pop_subtest();
+
+ if (test__start_subtest("map_ops_for_each"))
+ map_ops_for_each_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index c230a573c373..43e502acf050 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -4,29 +4,42 @@
#include <test_progs.h>
#include <network_helpers.h>
-#include "map_ptr_kern.skel.h"
+#include "map_ptr_kern.lskel.h"
void test_map_ptr(void)
{
- struct map_ptr_kern *skel;
- __u32 duration = 0, retval;
+ struct map_ptr_kern_lskel *skel;
char buf[128];
int err;
-
- skel = map_ptr_kern__open_and_load();
- if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ int page_size = getpagesize();
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ skel = map_ptr_kern_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
- sizeof(pkt_v4), buf, NULL, &retval, NULL);
+ skel->maps.m_ringbuf.max_entries = page_size;
+
+ err = map_ptr_kern_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = page_size;
+
+ err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts);
- if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+ if (!ASSERT_OK(err, "test_run"))
goto cleanup;
- if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
- skel->bss->g_map_type, skel->bss->g_line))
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
goto cleanup;
cleanup:
- map_ptr_kern__destroy(skel);
+ map_ptr_kern_lskel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..40d4f687bd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "mem_rdonly_untrusted.skel.h"
+
+void test_mem_rdonly_untrusted(void)
+{
+ RUN_TESTS(mem_rdonly_untrusted);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
index 2c53eade88e3..8b67dfc10f5c 100644
--- a/tools/testing/selftests/bpf/prog_tests/metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -16,7 +16,7 @@ static int duration;
static int prog_holds_map(int prog_fd, int map_fd)
{
struct bpf_prog_info prog_info = {};
- struct bpf_prog_info map_info = {};
+ struct bpf_map_info map_info = {};
__u32 prog_info_len;
__u32 map_info_len;
__u32 *map_ids;
@@ -25,12 +25,12 @@ static int prog_holds_map(int prog_fd, int map_fd)
int i;
map_info_len = sizeof(map_info);
- ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ ret = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
if (ret)
return -errno;
prog_info_len = sizeof(prog_info);
- ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
if (ret)
return -errno;
@@ -44,7 +44,7 @@ static int prog_holds_map(int prog_fd, int map_fd)
prog_info.map_ids = ptr_to_u64(map_ids);
prog_info_len = sizeof(prog_info);
- ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
if (ret) {
ret = -errno;
goto free_map_ids;
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..653b0a20fab9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. call listen() for 4 server sockets.
+ * 2. call connect() for 25 client sockets.
+ * 3. call listen() for 1 server socket. (migration target)
+ * 4. update a map to migrate all child sockets
+ * to the last server socket (migrate_map[cookie] = 4)
+ * 5. call shutdown() for first 4 server sockets
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 6. call listen() for the second server socket.
+ * 7. call shutdown() for the last server
+ * and migrate the requests in the accept queue
+ * to the second server socket.
+ * 8. call listen() for the last server.
+ * 9. call shutdown() for the second server
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+ const char *name;
+ __s64 servers[NR_SERVERS];
+ __s64 clients[NR_CLIENTS];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int family;
+ int state;
+ bool drop_ack;
+ bool expire_synack_timer;
+ bool fastopen;
+ struct bpf_link *link;
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (fds[i] != -1) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+ int err = 0, fd, len;
+
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (!ASSERT_NEQ(fd, -1, "open"))
+ return -1;
+
+ if (restore) {
+ len = write(fd, buf, *saved_len);
+ if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+ err = -1;
+ } else {
+ *saved_len = read(fd, buf, size);
+ if (!ASSERT_GE(*saved_len, 1, "read")) {
+ err = -1;
+ goto close;
+ }
+
+ err = lseek(fd, 0, SEEK_SET);
+ if (!ASSERT_OK(err, "lseek"))
+ goto close;
+
+ /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+ * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+ */
+ len = write(fd, "519", 3);
+ if (!ASSERT_EQ(len, 3, "write - setup"))
+ err = -1;
+ }
+
+close:
+ close(fd);
+
+ return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ if (test_case->family == AF_INET)
+ skel->bss->server_port = ((struct sockaddr_in *)
+ &test_case->addr)->sin_port;
+ else
+ skel->bss->server_port = ((struct sockaddr_in6 *)
+ &test_case->addr)->sin6_port;
+
+ test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+ IFINDEX_LO);
+ if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+ return -1;
+
+ return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+ int err;
+
+ err = bpf_link__destroy(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__destroy"))
+ return -1;
+
+ test_case->link = NULL;
+
+ return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+ prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+ make_sockaddr(test_case->family,
+ test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+ &test_case->addr, &test_case->addrlen);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+ return -1;
+
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_REUSEPORT, &reuseport, sizeof(reuseport));
+ if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+ return -1;
+
+ err = bind(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ if (i == 0) {
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (!ASSERT_OK(err,
+ "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+ return -1;
+
+ err = getsockname(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ &test_case->addrlen);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+ }
+
+ if (test_case->fastopen) {
+ err = setsockopt(test_case->servers[i],
+ SOL_TCP, TCP_FASTOPEN,
+ &qlen, sizeof(qlen));
+ if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+ return -1;
+ }
+
+ /* All requests will be tied to the first four listeners */
+ if (i != MIGRATED_TO) {
+ err = listen(test_case->servers[i], qlen);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+ char buf[MSGLEN] = MSG;
+ int i, err;
+
+ for (i = 0; i < NR_CLIENTS; i++) {
+ test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+ return -1;
+
+ /* The attached XDP program drops only the final ACK, so
+ * clients will transition to TCP_ESTABLISHED immediately.
+ */
+ err = settimeo(test_case->clients[i], 100);
+ if (!ASSERT_OK(err, "settimeo"))
+ return -1;
+
+ if (test_case->fastopen) {
+ int fastopen = 1;
+
+ err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+ TCP_FASTOPEN_CONNECT, &fastopen,
+ sizeof(fastopen));
+ if (!ASSERT_OK(err,
+ "setsockopt - TCP_FASTOPEN_CONNECT"))
+ return -1;
+ }
+
+ err = connect(test_case->clients[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "connect"))
+ return -1;
+
+ err = write(test_case->clients[i], buf, MSGLEN);
+ if (!ASSERT_EQ(err, MSGLEN, "write"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, migrated_to = MIGRATED_TO;
+ int reuseport_map_fd, migrate_map_fd;
+ __u64 value;
+
+ reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+ migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ value = (__u64)test_case->servers[i];
+ err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+ int i, err;
+
+ /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+ * to the last listener based on eBPF.
+ */
+ for (i = 0; i < MIGRATED_TO; i++) {
+ err = shutdown(test_case->servers[i], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+ }
+
+ /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+ if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+ return 0;
+
+ /* Note that we use the second listener instead of the
+ * first one here.
+ *
+ * The fist listener is bind()ed with port 0 and,
+ * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+ * calling listen() again will bind() the first listener
+ * on a new ephemeral port and detach it from the existing
+ * reuseport group. (See: __inet_bind(), tcp_set_state())
+ *
+ * OTOH, the second one is bind()ed with a specific port,
+ * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+ * resurrect the listener on the existing reuseport group.
+ */
+ err = listen(test_case->servers[1], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate from the last listener to the second one.
+ *
+ * All listeners were detached out of the reuseport_map,
+ * so migration will be done by kernel random pick from here.
+ */
+ err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ /* Back to the existing reuseport group */
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate back to the last one from the second one */
+ err = shutdown(test_case->servers[1], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err, cnt = 0, client;
+ char buf[MSGLEN];
+
+ err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+ if (!ASSERT_OK(err, "settimeo"))
+ goto out;
+
+ for (; cnt < NR_CLIENTS; cnt++) {
+ client = accept(test_case->servers[MIGRATED_TO],
+ (struct sockaddr *)&addr, &len);
+ if (!ASSERT_NEQ(client, -1, "accept"))
+ goto out;
+
+ memset(buf, 0, MSGLEN);
+ read(client, &buf, MSGLEN);
+ close(client);
+
+ if (!ASSERT_STREQ(buf, MSG, "read"))
+ goto out;
+ }
+
+out:
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+ switch (test_case->state) {
+ case BPF_TCP_ESTABLISHED:
+ cnt = skel->bss->migrated_at_close;
+ break;
+ case BPF_TCP_SYN_RECV:
+ cnt = skel->bss->migrated_at_close_fastopen;
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (test_case->expire_synack_timer)
+ cnt = skel->bss->migrated_at_send_synack;
+ else
+ cnt = skel->bss->migrated_at_recv_ack;
+ break;
+ default:
+ cnt = 0;
+ }
+
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int err, saved_len;
+ char buf[16];
+
+ skel->bss->migrated_at_close = 0;
+ skel->bss->migrated_at_close_fastopen = 0;
+ skel->bss->migrated_at_send_synack = 0;
+ skel->bss->migrated_at_recv_ack = 0;
+
+ init_fds(test_case->servers, NR_SERVERS);
+ init_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->fastopen) {
+ memset(buf, 0, sizeof(buf));
+
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+ if (!ASSERT_OK(err, "setup_fastopen - setup"))
+ return;
+ }
+
+ err = start_servers(test_case, skel);
+ if (!ASSERT_OK(err, "start_servers"))
+ goto close_servers;
+
+ if (test_case->drop_ack) {
+ /* Drop the final ACK of the 3-way handshake and stick the
+ * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+ */
+ err = drop_ack(test_case, skel);
+ if (!ASSERT_OK(err, "drop_ack"))
+ goto close_servers;
+ }
+
+ /* Tie requests to the first four listeners */
+ err = start_clients(test_case);
+ if (!ASSERT_OK(err, "start_clients"))
+ goto close_clients;
+
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_clients;
+
+ err = update_maps(test_case, skel);
+ if (!ASSERT_OK(err, "fill_maps"))
+ goto close_clients;
+
+ /* Migrate the requests in the accept queue only.
+ * TCP_NEW_SYN_RECV requests are not migrated at this point.
+ */
+ err = migrate_dance(test_case);
+ if (!ASSERT_OK(err, "migrate_dance"))
+ goto close_clients;
+
+ if (test_case->expire_synack_timer) {
+ /* Wait for SYN+ACK timers to expire so that
+ * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+ */
+ sleep(1);
+ }
+
+ if (test_case->link) {
+ /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+ err = pass_ack(test_case);
+ if (!ASSERT_OK(err, "pass_ack"))
+ goto close_clients;
+ }
+
+ count_requests(test_case, skel);
+
+close_clients:
+ close_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->link) {
+ err = pass_ack(test_case);
+ ASSERT_OK(err, "pass_ack - clean up");
+ }
+
+close_servers:
+ close_fds(test_case->servers, NR_SERVERS);
+
+ if (test_case->fastopen) {
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+ ASSERT_OK(err, "setup_fastopen - restore");
+ }
+}
+
+void serial_test_migrate_reuseport(void)
+{
+ struct test_migrate_reuseport *skel;
+ int i;
+
+ skel = test_migrate_reuseport__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test__start_subtest(test_cases[i].name);
+ run_test(&test_cases[i], skel);
+ }
+
+ test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
new file mode 100644
index 000000000000..ed8857ae914a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "missed_kprobe.skel.h"
+#include "missed_kprobe_recursion.skel.h"
+#include "missed_tp_recursion.skel.h"
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc, which has also kprobe on. The latter won't get triggered due
+ * to kprobe recursion check and kprobe missed counter is incremented.
+ */
+static void test_missed_perf_kprobe(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link_info info = {};
+ struct missed_kprobe *skel;
+ __u32 len = sizeof(info);
+ int err, prog_fd;
+
+ skel = missed_kprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type");
+ ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type");
+ ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed");
+
+cleanup:
+ missed_kprobe__destroy(skel);
+}
+
+static __u64 get_missed_count(int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ return (__u64) -1;
+ return info.recursion_misses;
+}
+
+/*
+ * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached.
+ *
+ * Because fprobe (kprobe.multi attach layear) does not have strict recursion
+ * check the kprobe's bpf_prog_active check is hit for test2-5.
+ */
+static void test_missed_kprobe_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_kprobe_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_kprobe_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test6)), 1, "test6_recursion_misses");
+
+cleanup:
+ missed_kprobe_recursion__destroy(skel);
+}
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes
+ * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234]
+ * programs attached to it.
+ *
+ * Because kprobe execution goes through bpf_prog_active check, programs
+ * attached to the tracepoint will fail the recursion check and increment
+ * the recursion_misses stats.
+ */
+static void test_missed_tp_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_tp_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_tp_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_tp_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_tp_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+
+cleanup:
+ missed_tp_recursion__destroy(skel);
+}
+
+void test_missed(void)
+{
+ if (test__start_subtest("perf_kprobe"))
+ test_missed_perf_kprobe();
+ if (test__start_subtest("kprobe_recursion"))
+ test_missed_kprobe_recursion();
+ if (test__start_subtest("tp_recursion"))
+ test_missed_tp_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 9c3c5c0f068f..a271d5a0f7ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -29,28 +29,42 @@ void test_mmap(void)
struct test_mmap *skel;
__u64 val = 0;
- skel = test_mmap__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ skel = test_mmap__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ /* at least 4 pages of data */
+ err = bpf_map__set_max_entries(skel->maps.data_map,
+ 4 * (page_size / sizeof(u64)));
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_mmap__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
- tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
- munmap(tmp1, 4096);
+ munmap(tmp1, page_size);
goto cleanup;
}
/* now double-check if it's mmap()'able at all */
- tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
goto cleanup;
/* get map's ID */
memset(&map_info, 0, map_info_sz);
- err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
+ err = bpf_map_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
if (CHECK(err, "map_get_info", "failed %d\n", errno))
goto cleanup;
data_map_id = map_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index 97fec70c600b..a70c99c2f8c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -15,51 +15,47 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
{
struct modify_return *skel = NULL;
int err, prog_fd;
- __u32 duration = 0, retval;
__u16 side_effect;
__s16 ret;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
skel = modify_return__open_and_load();
- if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
err = modify_return__attach(skel);
- if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "modify_return__attach failed"))
goto cleanup;
skel->bss->input_retval = input_retval;
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
- &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
- CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+ side_effect = UPPER(topts.retval);
+ ret = LOWER(topts.retval);
- side_effect = UPPER(retval);
- ret = LOWER(retval);
+ ASSERT_EQ(ret, want_ret, "test_run ret");
+ ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect");
+ ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result");
+ ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
+ ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
- CHECK(ret != want_ret, "test_run",
- "unexpected ret: %d, expected: %d\n", ret, want_ret);
- CHECK(side_effect != want_side_effect, "modify_return",
- "unexpected side_effect: %d\n", side_effect);
-
- CHECK(skel->bss->fentry_result != 1, "modify_return",
- "fentry failed\n");
- CHECK(skel->bss->fexit_result != 1, "modify_return",
- "fexit failed\n");
- CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
- "fmod_ret failed\n");
+ ASSERT_EQ(skel->bss->fentry_result2, 1, "modify_return fentry_result2");
+ ASSERT_EQ(skel->bss->fexit_result2, 1, "modify_return fexit_result2");
+ ASSERT_EQ(skel->bss->fmod_ret_result2, 1, "modify_return fmod_ret_result2");
cleanup:
modify_return__destroy(skel);
}
-void test_modify_return(void)
+/* TODO: conflict with get_func_ip_test */
+void serial_test_modify_return(void)
{
run_test(0 /* input_retval */,
- 1 /* want_side_effect */,
- 4 /* want_ret */);
+ 2 /* want_side_effect */,
+ 33 /* want_ret */);
run_test(-EINVAL /* input_retval */,
0 /* want_side_effect */,
- -EINVAL /* want_ret */);
+ -EINVAL * 2 /* want_ret */);
}
-
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 5bc53d53d86e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -2,46 +2,34 @@
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
+#include <stdbool.h>
#include "test_module_attach.skel.h"
+#include "testing_helpers.h"
static int duration;
-static int trigger_module_test_read(int read_sz)
+static int trigger_module_test_writable(int *val)
{
int fd, err;
+ char buf[65];
+ ssize_t rd;
- fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+ fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
err = -errno;
- if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+ if (!ASSERT_GE(fd, 0, "testmode_file_open"))
return err;
- read(fd, NULL, read_sz);
- close(fd);
-
- return 0;
-}
-
-static int trigger_module_test_write(int write_sz)
-{
- int fd, err;
- char *buf = malloc(write_sz);
-
- if (!buf)
- return -ENOMEM;
-
- memset(buf, 'a', write_sz);
- buf[write_sz-1] = '\0';
-
- fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
+ rd = read(fd, buf, sizeof(buf) - 1);
err = -errno;
- if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) {
- free(buf);
+ if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) {
+ close(fd);
return err;
}
- write(fd, buf, write_sz);
+ buf[rd] = '\0';
+ *val = strtol(buf, NULL, 0);
close(fd);
- free(buf);
+
return 0;
}
@@ -51,7 +39,9 @@ void test_module_attach(void)
const int WRITE_SZ = 457;
struct test_module_attach* skel;
struct test_module_attach__bss *bss;
+ struct bpf_link *link;
int err;
+ int writable_val = 0;
skel = test_module_attach__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -61,6 +51,10 @@ void test_module_attach(void)
0, "bpf_testmod_test_read");
ASSERT_OK(err, "set_attach_target");
+ err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual,
+ 0, "bpf_testmod:bpf_testmod_test_read");
+ ASSERT_OK(err, "set_attach_target_explicit");
+
err = test_module_attach__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton\n"))
return;
@@ -80,10 +74,44 @@ void test_module_attach(void)
ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
+ ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit");
+ ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual");
ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
+ bss->raw_tp_writable_bare_early_ret = true;
+ bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4;
+ ASSERT_OK(trigger_module_test_writable(&writable_val),
+ "trigger_writable");
+ ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in");
+ ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val,
+ "writable_test_out");
+
+ test_module_attach__detach(skel);
+
+ /* attach fentry/fexit and make sure it gets module reference */
+ link = bpf_program__attach(skel->progs.handle_fentry);
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach(skel->progs.handle_fexit);
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach(skel->progs.kprobe_multi);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
cleanup:
test_module_attach__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
new file mode 100644
index 000000000000..bea05f78de5f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+#include "cgroup_helpers.h"
+#include "bpf_util.h"
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_fentry_shadow_test";
+
+static int get_bpf_testmod_btf_fd(void)
+{
+ struct bpf_btf_info info;
+ char name[64];
+ __u32 id = 0, len;
+ int err, fd;
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ log_err("failed to iterate BTF objects");
+ return err;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue; /* expected race: BTF was unloaded */
+ err = -errno;
+ log_err("failed to get FD for BTF object #%d", id);
+ return err;
+ }
+
+ len = sizeof(info);
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err = -errno;
+ log_err("failed to get info for BTF object #%d", id);
+ close(fd);
+ return err;
+ }
+
+ if (strcmp(name, module_name) == 0)
+ return fd;
+
+ close(fd);
+ }
+ return -ENOENT;
+}
+
+void test_module_fentry_shadow(void)
+{
+ struct btf *vmlinux_btf = NULL, *mod_btf = NULL;
+ int err, i;
+ int btf_fd[2] = {};
+ int prog_fd[2] = {};
+ int link_fd[2] = {};
+ __s32 btf_id[2] = {};
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ );
+
+ const struct bpf_insn trace_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux_btf"))
+ return;
+
+ btf_fd[1] = get_bpf_testmod_btf_fd();
+ if (!ASSERT_GE(btf_fd[1], 0, "get_bpf_testmod_btf_fd"))
+ goto out;
+
+ mod_btf = btf_get_from_fd(btf_fd[1], vmlinux_btf);
+ if (!ASSERT_OK_PTR(mod_btf, "btf_get_from_fd"))
+ goto out;
+
+ btf_id[0] = btf__find_by_name_kind(vmlinux_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[0], 0, "btf_find_by_name"))
+ goto out;
+
+ btf_id[1] = btf__find_by_name_kind(mod_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[1], 0, "btf_find_by_name"))
+ goto out;
+
+ for (i = 0; i < 2; i++) {
+ load_opts.attach_btf_id = btf_id[i];
+ load_opts.attach_btf_obj_fd = btf_fd[i];
+ prog_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ ARRAY_SIZE(trace_program),
+ &load_opts);
+ if (!ASSERT_GE(prog_fd[i], 0, "bpf_prog_load"))
+ goto out;
+
+ /* If the verifier incorrectly resolves addresses of the
+ * shadowed functions and uses the same address for both the
+ * vmlinux and the bpf_testmod functions, this will fail on
+ * attempting to create two trampolines for the same address,
+ * which is forbidden.
+ */
+ link_fd[i] = bpf_link_create(prog_fd[i], 0, BPF_TRACE_FENTRY, NULL);
+ if (!ASSERT_GE(link_fd[i], 0, "bpf_link_create"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd[0], NULL);
+ ASSERT_OK(err, "running test");
+
+out:
+ btf__free(vmlinux_btf);
+ btf__free(mod_btf);
+ for (i = 0; i < 2; i++) {
+ if (btf_fd[i])
+ close(btf_fd[i]);
+ if (prog_fd[i] > 0)
+ close(prog_fd[i]);
+ if (link_fd[i] > 0)
+ close(link_fd[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
new file mode 100644
index 000000000000..8fade8bdc451
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -0,0 +1,587 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/const.h>
+#include <netinet/in.h>
+#include <test_progs.h>
+#include <unistd.h>
+#include <errno.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "mptcp_sock.skel.h"
+#include "mptcpify.skel.h"
+#include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
+
+#define NS_TEST "mptcp_ns"
+#define ADDR_1 "10.0.1.1"
+#define ADDR_2 "10.0.1.2"
+#define PORT_1 10001
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+#ifndef MPTCP_INFO
+#define MPTCP_INFO 1
+#endif
+#ifndef MPTCP_INFO_FLAG_FALLBACK
+#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
+#endif
+#ifndef MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED
+#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
+#endif
+
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
+struct __mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+};
+
+struct mptcp_storage {
+ __u32 invoked;
+ __u32 is_mptcp;
+ struct sock *sk;
+ __u32 token;
+ struct sock *first;
+ char ca_name[TCP_CA_NAME_MAX];
+};
+
+static int start_mptcp_server(int family, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .proto = IPPROTO_MPTCP,
+ };
+
+ return start_server_str(family, SOCK_STREAM, addr_str, port, &opts);
+}
+
+static int verify_tsk(int map_fd, int client_fd)
+{
+ int err, cfd = client_fd;
+ struct mptcp_storage val;
+
+ err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ return err;
+
+ if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+ err++;
+
+ if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp"))
+ err++;
+
+ return err;
+}
+
+static void get_msk_ca_name(char ca_name[])
+{
+ size_t len;
+ int fd;
+
+ fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control"))
+ return;
+
+ len = read(fd, ca_name, TCP_CA_NAME_MAX);
+ if (!ASSERT_GT(len, 0, "failed to read ca_name"))
+ goto err;
+
+ if (len > 0 && ca_name[len - 1] == '\n')
+ ca_name[len - 1] = '\0';
+
+err:
+ close(fd);
+}
+
+static int verify_msk(int map_fd, int client_fd, __u32 token)
+{
+ char ca_name[TCP_CA_NAME_MAX];
+ int err, cfd = client_fd;
+ struct mptcp_storage val;
+
+ if (!ASSERT_GT(token, 0, "invalid token"))
+ return -1;
+
+ get_msk_ca_name(ca_name);
+
+ err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ return err;
+
+ if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+ err++;
+
+ if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp"))
+ err++;
+
+ if (!ASSERT_EQ(val.token, token, "unexpected token"))
+ err++;
+
+ if (!ASSERT_EQ(val.first, val.sk, "unexpected first"))
+ err++;
+
+ if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name"))
+ err++;
+
+ return err;
+}
+
+static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
+{
+ int client_fd, prog_fd, map_fd, err;
+ struct mptcp_sock *sock_skel;
+
+ sock_skel = mptcp_sock__open_and_load();
+ if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
+ return libbpf_get_error(sock_skel);
+
+ err = mptcp_sock__attach(sock_skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(sock_skel->progs._sockops);
+ map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map);
+ err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+ err = -EIO;
+ goto out;
+ }
+
+ err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) :
+ verify_tsk(map_fd, client_fd);
+
+ close(client_fd);
+
+out:
+ mptcp_sock__destroy(sock_skel);
+ return err;
+}
+
+static void test_base(void)
+{
+ struct netns_obj *netns = NULL;
+ int server_fd, cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcp");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto fail;
+
+ /* without MPTCP */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto with_mptcp;
+
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp");
+
+ close(server_fd);
+
+with_mptcp:
+ /* with MPTCP */
+ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
+ goto fail;
+
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
+
+ close(server_fd);
+
+fail:
+ netns_free(netns);
+ close(cgroup_fd);
+}
+
+static void send_byte(int fd)
+{
+ char b = 0x55;
+
+ ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
+}
+
+static int verify_mptcpify(int server_fd, int client_fd)
+{
+ struct __mptcp_info info;
+ socklen_t optlen;
+ int protocol;
+ int err = 0;
+
+ optlen = sizeof(protocol);
+ if (!ASSERT_OK(getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen),
+ "getsockopt(SOL_PROTOCOL)"))
+ return -1;
+
+ if (!ASSERT_EQ(protocol, IPPROTO_MPTCP, "protocol isn't MPTCP"))
+ err++;
+
+ optlen = sizeof(info);
+ if (!ASSERT_OK(getsockopt(client_fd, SOL_MPTCP, MPTCP_INFO, &info, &optlen),
+ "getsockopt(MPTCP_INFO)"))
+ return -1;
+
+ if (!ASSERT_GE(info.mptcpi_flags, 0, "unexpected mptcpi_flags"))
+ err++;
+ if (!ASSERT_FALSE(info.mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK,
+ "MPTCP fallback"))
+ err++;
+ if (!ASSERT_TRUE(info.mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED,
+ "no remote key received"))
+ err++;
+
+ return err;
+}
+
+static int run_mptcpify(int cgroup_fd)
+{
+ int server_fd, client_fd, err = 0;
+ struct mptcpify *mptcpify_skel;
+
+ mptcpify_skel = mptcpify__open_and_load();
+ if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
+ return libbpf_get_error(mptcpify_skel);
+
+ mptcpify_skel->bss->pid = getpid();
+
+ err = mptcpify__attach(mptcpify_skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* without MPTCP */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server")) {
+ err = -EIO;
+ goto out;
+ }
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+ err = -EIO;
+ goto close_server;
+ }
+
+ send_byte(client_fd);
+
+ err = verify_mptcpify(server_fd, client_fd);
+
+ close(client_fd);
+close_server:
+ close(server_fd);
+out:
+ mptcpify__destroy(mptcpify_skel);
+ return err;
+}
+
+static void test_mptcpify(void)
+{
+ struct netns_obj *netns = NULL;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcpify");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto fail;
+
+ ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");
+
+fail:
+ netns_free(netns);
+ close(cgroup_fd);
+}
+
+static int endpoint_init(char *flags)
+{
+ SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST);
+ SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1);
+ SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST);
+ SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2);
+ SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST);
+ if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) {
+ printf("'ip mptcp' not supported, skip this test.\n");
+ test__skip();
+ goto fail;
+ }
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void wait_for_new_subflows(int fd)
+{
+ socklen_t len;
+ u8 subflows;
+ int err, i;
+
+ len = sizeof(subflows);
+ /* Wait max 5 sec for new subflows to be created */
+ for (i = 0; i < 50; i++) {
+ err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len);
+ if (!err && subflows > 0)
+ break;
+
+ usleep(100000); /* 0.1s */
+ }
+}
+
+static void run_subflow(void)
+{
+ int server_fd, client_fd, err;
+ char new[TCP_CA_NAME_MAX];
+ char cc[TCP_CA_NAME_MAX];
+ unsigned int mark;
+ socklen_t len;
+
+ server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
+ if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
+ return;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
+ goto close_server;
+
+ send_byte(client_fd);
+ wait_for_new_subflows(client_fd);
+
+ len = sizeof(mark);
+ err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
+ if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))
+ ASSERT_EQ(mark, 0, "mark");
+
+ len = sizeof(new);
+ err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len);
+ if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) {
+ get_msk_ca_name(cc);
+ ASSERT_STREQ(new, cc, "cc");
+ }
+
+ close(client_fd);
+close_server:
+ close(server_fd);
+}
+
+static void test_subflow(void)
+{
+ struct mptcp_subflow *skel;
+ struct netns_obj *netns;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcp_subflow");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow"))
+ return;
+
+ skel = mptcp_subflow__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow"))
+ goto close_cgroup;
+
+ skel->bss->pid = getpid();
+
+ skel->links.mptcp_subflow =
+ bpf_program__attach_cgroup(skel->progs.mptcp_subflow, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.mptcp_subflow, "attach mptcp_subflow"))
+ goto skel_destroy;
+
+ skel->links._getsockopt_subflow =
+ bpf_program__attach_cgroup(skel->progs._getsockopt_subflow, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow"))
+ goto skel_destroy;
+
+ if (endpoint_init("subflow") < 0)
+ goto close_netns;
+
+ run_subflow();
+
+close_netns:
+ netns_free(netns);
+skel_destroy:
+ mptcp_subflow__destroy(skel);
+close_cgroup:
+ close(cgroup_fd);
+}
+
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+ int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+ char snd[9] = "123456789";
+ char rcv[10];
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client without MPTCP enabled */
+ client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd1 = accept(listen_fd, NULL, 0);
+ skel->bss->sk_index = 1;
+ client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd2 = accept(listen_fd, NULL, 0);
+ /* test normal redirect behavior: data sent by client_fd1 can be
+ * received by client_fd2
+ */
+ skel->bss->redirect_idx = 1;
+ sent = send(client_fd1, snd, sizeof(snd), 0);
+ if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+ goto end;
+
+ /* try to recv more bytes to avoid truncation check */
+ recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+ if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+ goto end;
+
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (client_fd2 >= 0)
+ close(client_fd2);
+ if (server_fd1 >= 0)
+ close(server_fd1);
+ if (server_fd2 >= 0)
+ close(server_fd2);
+ close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+ int err, zero = 0;
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client with MPTCP enabled */
+ client_fd1 = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+ goto end;
+
+ /* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+ if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+ goto end;
+
+ server_fd = accept(listen_fd, NULL, 0);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &server_fd, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+ goto end;
+
+ /* MPTCP client should also be disallowed */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &client_fd1, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+ goto end;
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (server_fd >= 0)
+ close(server_fd);
+ close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+ struct mptcp_sockmap *skel;
+ struct netns_obj *netns;
+ int cgroup_fd, err;
+
+ cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+ return;
+
+ skel = mptcp_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+ goto close_cgroup;
+
+ skel->links.mptcp_sockmap_inject =
+ bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+ goto skel_destroy;
+
+ err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+ bpf_map__fd(skel->maps.sock_map),
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+ goto skel_destroy;
+
+ if (endpoint_init("subflow") < 0)
+ goto close_netns;
+
+ test_sockmap_with_mptcp_fallback(skel);
+ test_sockmap_reject_mptcp(skel);
+
+close_netns:
+ netns_free(netns);
+skel_destroy:
+ mptcp_sockmap__destroy(skel);
+close_cgroup:
+ close(cgroup_fd);
+}
+
+void test_mptcp(void)
+{
+ if (test__start_subtest("base"))
+ test_base();
+ if (test__start_subtest("mptcpify"))
+ test_mptcpify();
+ if (test__start_subtest("subflow"))
+ test_subflow();
+ if (test__start_subtest("sockmap"))
+ test_mptcp_sockmap();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/nested_trust.c b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
new file mode 100644
index 000000000000..54a112ad5f9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "nested_trust_failure.skel.h"
+#include "nested_trust_success.skel.h"
+#include "nested_acquire.skel.h"
+
+void test_nested_trust(void)
+{
+ RUN_TESTS(nested_trust_success);
+ RUN_TESTS(nested_trust_failure);
+
+ if (env.has_testmod)
+ RUN_TESTS(nested_acquire);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
new file mode 100644
index 000000000000..dbfd87499b6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
@@ -0,0 +1,239 @@
+#include <linux/net_tstamp.h>
+#include <sys/time.h>
+#include <linux/errqueue.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "net_timestamping.skel.h"
+
+#define CG_NAME "/net-timestamping-test"
+#define NSEC_PER_SEC 1000000000LL
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct net_timestamping *skel;
+static const int cfg_payload_len = 30;
+static struct timespec usr_ts;
+static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */
+int SK_TS_SCHED;
+int SK_TS_TXSW;
+int SK_TS_ACK;
+
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+static void validate_key(int tskey, int tstype)
+{
+ static int expected_tskey = -1;
+
+ if (tstype == SCM_TSTAMP_SCHED)
+ expected_tskey = cfg_payload_len - 1;
+
+ ASSERT_EQ(expected_tskey, tskey, "tskey mismatch");
+
+ expected_tskey = tskey;
+}
+
+static void validate_timestamp(struct timespec *cur, struct timespec *prev)
+{
+ int64_t cur_ns, prev_ns;
+
+ cur_ns = timespec_to_ns64(cur);
+ prev_ns = timespec_to_ns64(prev);
+
+ ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency");
+}
+
+static void test_socket_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey)
+{
+ static struct timespec prev_ts;
+
+ validate_key(tskey, tstype);
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ validate_timestamp(&tss->ts[0], &usr_ts);
+ SK_TS_SCHED += 1;
+ break;
+ case SCM_TSTAMP_SND:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_TXSW += 1;
+ break;
+ case SCM_TSTAMP_ACK:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_ACK += 1;
+ break;
+ }
+
+ prev_ts = tss->ts[0];
+}
+
+static void test_recv_errmsg_cmsg(struct msghdr *msg)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *)CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR) ||
+ (cm->cmsg_level == SOL_PACKET &&
+ cm->cmsg_type == PACKET_TX_TIMESTAMP)) {
+ serr = (void *)CMSG_DATA(cm);
+ ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING,
+ "cmsg type");
+ }
+
+ if (serr && tss)
+ test_socket_timestamp(tss, serr->ee_info,
+ serr->ee_data);
+ }
+}
+
+static bool socket_recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ char data[cfg_payload_len];
+ static struct msghdr msg;
+ struct iovec entry;
+ int n = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ n = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (n == -1)
+ ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE");
+
+ if (n >= 0)
+ test_recv_errmsg_cmsg(&msg);
+
+ return n == -1;
+}
+
+static void test_socket_timestamping(int fd)
+{
+ while (!socket_recv_errmsg(fd));
+
+ ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED");
+ ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND");
+ ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK");
+
+ SK_TS_SCHED = 0;
+ SK_TS_TXSW = 0;
+ SK_TS_ACK = 0;
+}
+
+static void test_tcp(int family, bool enable_socket_timestamping)
+{
+ struct net_timestamping__bss *bss;
+ char buf[cfg_payload_len];
+ int sfd = -1, cfd = -1;
+ unsigned int sock_opt;
+ struct netns_obj *ns;
+ int cg_fd;
+ int ret;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
+ return;
+
+ ns = netns_new("net_timestamping_ns", true);
+ if (!ASSERT_OK_PTR(ns, "create ns"))
+ goto out;
+
+ skel = net_timestamping__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
+ goto out;
+
+ if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel"))
+ goto out;
+
+ skel->links.skops_sockopt =
+ bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+ goto out;
+
+ bss = skel->bss;
+ memset(bss, 0, sizeof(*bss));
+
+ skel->bss->monitored_pid = getpid();
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_OK_FD(sfd, "start_server"))
+ goto out;
+
+ cfd = connect_to_fd(sfd, 0);
+ if (!ASSERT_OK_FD(cfd, "connect_to_fd_server"))
+ goto out;
+
+ if (enable_socket_timestamping) {
+ sock_opt = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID |
+ SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK;
+ ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &sock_opt, sizeof(sock_opt));
+ if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING"))
+ goto out;
+
+ ret = clock_gettime(CLOCK_REALTIME, &usr_ts);
+ if (!ASSERT_OK(ret, "get user time"))
+ goto out;
+ }
+
+ ret = write(cfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(ret, sizeof(buf), "send to server"))
+ goto out;
+
+ if (enable_socket_timestamping)
+ test_socket_timestamping(cfd);
+
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
+ ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
+ ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
+ ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
+
+out:
+ if (sfd >= 0)
+ close(sfd);
+ if (cfd >= 0)
+ close(cfd);
+ net_timestamping__destroy(skel);
+ netns_free(ns);
+ close(cg_fd);
+}
+
+void test_net_timestamping(void)
+{
+ if (test__start_subtest("INET4: bpf timestamping"))
+ test_tcp(AF_INET, false);
+ if (test__start_subtest("INET4: bpf and socket timestamping"))
+ test_tcp(AF_INET, true);
+ if (test__start_subtest("INET6: bpf timestamping"))
+ test_tcp(AF_INET6, false);
+ if (test__start_subtest("INET6: bpf and socket timestamping"))
+ test_tcp(AF_INET6, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644
index 000000000000..c3333edd029f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void serial_test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = bpf_num_possible_cpus();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_GE(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_GE(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
new file mode 100644
index 000000000000..2f52fa2641ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <netinet/in.h>
+#include <linux/netfilter.h>
+
+#include "test_progs.h"
+#include "test_netfilter_link_attach.skel.h"
+
+struct nf_link_test {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+
+ bool expect_success;
+ const char * const name;
+};
+
+static const struct nf_link_test nf_hook_link_tests[] = {
+ { .name = "allzero", },
+ { .pf = NFPROTO_NUMPROTO, .name = "invalid-pf", },
+ { .pf = NFPROTO_IPV4, .hooknum = 42, .name = "invalid-hooknum", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MIN, .name = "invalid-priority-min", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MAX, .name = "invalid-priority-max", },
+ { .pf = NFPROTO_IPV4, .flags = UINT_MAX, .name = "invalid-flags", },
+
+ { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", },
+
+ {
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = -10000,
+ .flags = 0,
+ .expect_success = true,
+ .name = "attach ipv4",
+ },
+ {
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 10001,
+ .flags = BPF_F_NETFILTER_IP_DEFRAG,
+ .expect_success = true,
+ .name = "attach ipv6",
+ },
+};
+
+static void verify_netfilter_link_info(struct bpf_link *link, const struct nf_link_test nf_expected)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int err, fd;
+
+ memset(&info, 0, len);
+
+ fd = bpf_link__fd(link);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_OK(err, "get_link_info");
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_NETFILTER, "info link type");
+ ASSERT_EQ(info.netfilter.pf, nf_expected.pf, "info nf protocol family");
+ ASSERT_EQ(info.netfilter.hooknum, nf_expected.hooknum, "info nf hooknum");
+ ASSERT_EQ(info.netfilter.priority, nf_expected.priority, "info nf priority");
+ ASSERT_EQ(info.netfilter.flags, nf_expected.flags, "info nf flags");
+}
+
+void test_netfilter_link_attach(void)
+{
+ struct test_netfilter_link_attach *skel;
+ struct bpf_program *prog;
+ LIBBPF_OPTS(bpf_netfilter_opts, opts);
+ int i;
+
+ skel = test_netfilter_link_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_netfilter_link_attach__open_and_load"))
+ goto out;
+
+ prog = skel->progs.nf_link_attach_test;
+ if (!ASSERT_OK_PTR(prog, "attach program"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(nf_hook_link_tests); i++) {
+ struct bpf_link *link;
+
+ if (!test__start_subtest(nf_hook_link_tests[i].name))
+ continue;
+
+#define X(opts, m, i) opts.m = nf_hook_link_tests[(i)].m
+ X(opts, pf, i);
+ X(opts, hooknum, i);
+ X(opts, priority, i);
+ X(opts, flags, i);
+#undef X
+ link = bpf_program__attach_netfilter(prog, &opts);
+ if (nf_hook_link_tests[i].expect_success) {
+ struct bpf_link *link2;
+
+ if (!ASSERT_OK_PTR(link, "program attach successful"))
+ continue;
+
+ verify_netfilter_link_info(link, nf_hook_link_tests[i]);
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority");
+
+ if (!ASSERT_OK(bpf_link__destroy(link), "link destroy"))
+ break;
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ if (!ASSERT_OK_PTR(link2, "program reattach successful"))
+ continue;
+
+ verify_netfilter_link_info(link2, nf_hook_link_tests[i]);
+
+ if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy"))
+ break;
+ } else {
+ ASSERT_ERR_PTR(link, "program load failure");
+ }
+ }
+
+out:
+ test_netfilter_link_attach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..e00cd34586dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define loopback 1
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict, tc_fd;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach_sk_msg"))
+ goto done;
+
+ tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx);
+ err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta);
+ if (!ASSERT_OK(err, "prog_attach_tcx"))
+ goto done;
+
+ skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb"))
+ goto cleanup_tc;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto cleanup_tc;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto cleanup_tc;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto cleanup_tc;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto cleanup_tc;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto cleanup_tc;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto cleanup_tc;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg");
+ ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx");
+ ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx");
+ ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb");
+ ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb");
+
+cleanup_tc:
+ err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 31a3114906e2..99c953f2be21 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -11,77 +11,217 @@
#include <sched.h>
#include <sys/wait.h>
#include <sys/mount.h>
-#include <sys/fcntl.h>
+#include <fcntl.h>
+#include "network_helpers.h"
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE];
-static int test_current_pid_tgid(void *args)
+static int get_pid_tgid(pid_t *pid, pid_t *tgid,
+ struct test_ns_current_pid_tgid__bss *bss)
{
- struct test_ns_current_pid_tgid__bss *bss;
- struct test_ns_current_pid_tgid *skel;
- int err = -1, duration = 0;
- pid_t tgid, pid;
struct stat st;
+ int err;
- skel = test_ns_current_pid_tgid__open_and_load();
- if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n"))
- goto cleanup;
-
- pid = syscall(SYS_gettid);
- tgid = getpid();
+ *pid = sys_gettid();
+ *tgid = getpid();
err = stat("/proc/self/ns/pid", &st);
- if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err))
- goto cleanup;
+ if (!ASSERT_OK(err, "stat /proc/self/ns/pid"))
+ return err;
- bss = skel->bss;
bss->dev = st.st_dev;
bss->ino = st.st_ino;
bss->user_pid = 0;
bss->user_tgid = 0;
+ return 0;
+}
+
+static int test_current_pid_tgid_tp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.tp_handler, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
err = test_ns_current_pid_tgid__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
- ASSERT_EQ(bss->user_pid, pid, "pid");
- ASSERT_EQ(bss->user_tgid, tgid, "tgid");
- err = 0;
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_cgrp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int server_fd = -1, ret = -1, err;
+ int cgroup_fd = *(int *)args;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.cgroup_bind4, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
+
+ skel->links.cgroup_bind4 = bpf_program__attach_cgroup(
+ skel->progs.cgroup_bind4, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.cgroup_bind4, "bpf_program__attach_cgroup"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
cleanup:
- test_ns_current_pid_tgid__destroy(skel);
+ if (server_fd >= 0)
+ close(server_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_sk_msg(void *args)
+{
+ int verdict, map, server_fd = -1, client_fd = -1;
+ struct test_ns_current_pid_tgid__bss *bss;
+ static const char send_msg[] = "message";
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err, key = 0;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.sk_msg, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, skel->bss))
+ goto cleanup;
+
+ verdict = bpf_program__fd(skel->progs.sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
- return err;
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto cleanup;
+
+ err = bpf_map_update_elem(map, &key, &client_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto cleanup;
+
+ err = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (!ASSERT_EQ(err, sizeof(send_msg), "send(msg)"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ if (server_fd >= 0)
+ close(server_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
}
-static void test_ns_current_pid_tgid_new_ns(void)
+static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
{
- int wstatus, duration = 0;
+ int wstatus;
pid_t cpid;
/* Create a process in a new namespace, this process
* will be the init process of this new namespace hence will be pid 1.
*/
- cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
- CLONE_NEWPID | SIGCHLD, NULL);
+ cpid = clone(fn, child_stack + STACK_SIZE,
+ CLONE_NEWPID | SIGCHLD, arg);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (!ASSERT_NEQ(cpid, -1, "clone"))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (!ASSERT_NEQ(waitpid(cpid, &wstatus, 0), -1, "waitpid"))
return;
- if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
+ if (!ASSERT_OK(WEXITSTATUS(wstatus), "newns_pidtgid"))
return;
}
-void test_ns_current_pid_tgid(void)
+/* TODO: use a different tracepoint */
+void serial_test_current_pid_tgid(void)
{
- if (test__start_subtest("ns_current_pid_tgid_root_ns"))
- test_current_pid_tgid(NULL);
- if (test__start_subtest("ns_current_pid_tgid_new_ns"))
- test_ns_current_pid_tgid_new_ns();
+ if (test__start_subtest("root_ns_tp"))
+ test_current_pid_tgid_tp(NULL);
+ if (test__start_subtest("new_ns_tp"))
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
}
+
+void test_ns_current_pid_tgid_cgrp(void)
+{
+ int cgroup_fd = test__join_cgroup("/sock_addr");
+
+ if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) {
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
+ }
+}
+
+void test_ns_current_pid_tgid_sk_msg(void)
+{
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL);
+}
+
+
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..7093edca6e08 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -20,7 +20,7 @@ void test_obj_name(void)
__u32 duration = 0;
int i;
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
size_t name_len = strlen(tests[i].name) + 1;
union bpf_attr attr;
size_t ncopy;
@@ -38,13 +38,13 @@ void test_obj_name(void)
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-prog-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
memcpy(attr.map_name, tests[i].name, ncopy);
fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-map-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..e9c07d561ded
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_parse_tcp_hdr_opt.skel.h"
+#include "test_parse_tcp_hdr_opt_dynptr.skel.h"
+#include "test_tcp_hdr_options.h"
+
+struct test_pkt {
+ struct ipv6_packet pk6_v6;
+ u8 options[16];
+} __packed;
+
+struct test_pkt pkt = {
+ .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .pk6_v6.iph.nexthdr = IPPROTO_TCP,
+ .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .pk6_v6.tcp.urg_ptr = 123,
+ .pk6_v6.tcp.doff = 9, /* 16 bytes of options */
+
+ .options = {
+ TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP,
+ 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL
+ },
+};
+
+static void test_parse_opt(void)
+{
+ struct test_parse_tcp_hdr_opt *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt__destroy(skel);
+}
+
+static void test_parse_opt_dynptr(void)
+{
+ struct test_parse_tcp_hdr_opt_dynptr *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt_dynptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt_dynptr__destroy(skel);
+}
+
+void test_parse_tcp_hdr_opt(void)
+{
+ if (test__start_subtest("parse_tcp_hdr_opt"))
+ test_parse_opt();
+ if (test__start_subtest("parse_tcp_hdr_opt_dynptr"))
+ test_parse_opt_dynptr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
new file mode 100644
index 000000000000..343da65864d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "percpu_alloc_array.skel.h"
+#include "percpu_alloc_cgrp_local_storage.skel.h"
+#include "percpu_alloc_fail.skel.h"
+
+static void test_array(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_1, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_2, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_3, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_4, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map 1-4");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map 1-4");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_array_sleepable(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_10, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_10);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map_10");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map_10");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_cgrp_local_storage(void)
+{
+ struct percpu_alloc_cgrp_local_storage *skel;
+ int err, cgroup_fd, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ cgroup_fd = test__join_cgroup("/percpu_alloc");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /percpu_alloc"))
+ return;
+
+ skel = percpu_alloc_cgrp_local_storage__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open"))
+ goto close_fd;
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_cgrp_local_storage__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__load"))
+ goto destroy_skel;
+
+ err = percpu_alloc_cgrp_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__attach"))
+ goto destroy_skel;
+
+ prog_fd = bpf_program__fd(skel->progs.test_cgrp_local_storage_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(topts.retval, 0, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+
+destroy_skel:
+ percpu_alloc_cgrp_local_storage__destroy(skel);
+close_fd:
+ close(cgroup_fd);
+}
+
+static void test_failure(void) {
+ RUN_TESTS(percpu_alloc_fail);
+}
+
+void test_percpu_alloc(void)
+{
+ if (test__start_subtest("array"))
+ test_array();
+ if (test__start_subtest("array_sleepable"))
+ test_array_sleepable();
+ if (test__start_subtest("cgrp_local_storage"))
+ test_cgrp_local_storage();
+ if (test__start_subtest("failure_tests"))
+ test_failure();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..0a7ef770c487 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
int pbe_size = sizeof(struct perf_branch_entry);
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
int written_stack = skel->bss->written_stack_out;
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -74,7 +82,7 @@ static void test_perf_branches_common(int perf_fd,
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
goto out_destroy;
- /* spin the loop for a while (random high number) */
- for (i = 0; i < 1000000; ++i)
+
+ /* Spin the loop for a while by using a high iteration count, and by
+ * checking whether the specific run count marker has been explicitly
+ * incremented at least once by the backing perf_event BPF program.
+ */
+ for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
++j;
test_perf_branches__detach(skel);
@@ -110,17 +122,17 @@ static void test_perf_branches_hw(void)
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.freq = 1;
- attr.sample_freq = 4000;
+ attr.sample_freq = 1000;
attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
/*
- * Some setups don't support branch records (virtual machines, !x86),
- * so skip test in this case.
+ * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
+ * 3 which only supports BRS), so skip test in this case.
*/
- if (pfd == -1) {
- if (errno == ENOENT || errno == EOPNOTSUPP) {
+ if (pfd < 0) {
+ if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
test__skip();
@@ -151,7 +163,7 @@ static void test_perf_branches_no_hw(void)
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
- attr.sample_freq = 4000;
+ attr.sample_freq = 1000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index ca9f0895ec84..5fc2b3a0711e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -43,10 +43,10 @@ int trigger_on_cpu(int cpu)
return 0;
}
-void test_perf_buffer(void)
+void serial_test_perf_buffer(void)
{
- int err, on_len, nr_on_cpus = 0, nr_cpus, i;
- struct perf_buffer_opts pb_opts = {};
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i, j;
+ int zero = 0, my_pid = getpid();
struct test_perf_buffer *skel;
cpu_set_t cpu_seen;
struct perf_buffer *pb;
@@ -71,16 +71,19 @@ void test_perf_buffer(void)
if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
goto out_close;
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0);
+ if (!ASSERT_OK(err, "my_pid_update"))
+ goto out_close;
+
/* attach probe */
err = test_perf_buffer__attach(skel);
if (CHECK(err, "attach_kprobe", "err %d\n", err))
goto out_close;
/* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &cpu_seen;
- pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &cpu_seen, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
@@ -107,19 +110,19 @@ void test_perf_buffer(void)
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
- if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
- "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt",
+ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus))
goto out_close;
- for (i = 0; i < nr_cpus; i++) {
+ for (i = 0, j = 0; i < nr_cpus; i++) {
if (i >= on_len || !online[i])
continue;
- fd = perf_buffer__buffer_fd(pb, i);
+ fd = perf_buffer__buffer_fd(pb, j);
CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
last_fd = fd;
- err = perf_buffer__consume_buffer(pb, i);
+ err = perf_buffer__consume_buffer(pb, j);
if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
goto out_close;
@@ -127,12 +130,13 @@ void test_perf_buffer(void)
if (trigger_on_cpu(i))
goto out_close;
- err = perf_buffer__consume_buffer(pb, i);
- if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+ err = perf_buffer__consume_buffer(pb, j);
+ if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err))
goto out_close;
if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
goto out_close;
+ j++;
}
out_free_pb:
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 72c3690844fb..f4aad35afae1 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -63,7 +63,8 @@ void test_perf_event_stackmap(void)
PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES |
PERF_SAMPLE_BRANCH_CALL_STACK,
- .sample_period = 5000,
+ .freq = 1,
+ .sample_freq = read_perf_max_sample_freq(),
.size = sizeof(struct perf_event_attr),
};
struct perf_event_stackmap *skel;
@@ -97,8 +98,7 @@ void test_perf_event_stackmap(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..d940ff87fa08
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "test_perf_link.skel.h"
+
+#define BURN_TIMEOUT_MS 100
+#define BURN_TIMEOUT_NS BURN_TIMEOUT_MS * 1000000
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+/* TODO: often fails in concurrent mode */
+void serial_test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+ __u64 timeout_time_ns;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ timeout_time_ns = get_time_ns() + BURN_TIMEOUT_NS;
+ while (true) {
+ burn_cpu();
+ if (skel->bss->run_cnt > 0)
+ break;
+ if (!ASSERT_LT(get_time_ns(), timeout_time_ns, "run_cnt_timeout"))
+ break;
+ }
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_skip.c b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
new file mode 100644
index 000000000000..37d8618800e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <test_progs.h>
+#include "test_perf_skip.skel.h"
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/mman.h>
+
+#ifndef TRAP_PERF
+#define TRAP_PERF 6
+#endif
+
+int sigio_count, sigtrap_count;
+
+static void handle_sigio(int sig __always_unused)
+{
+ ++sigio_count;
+}
+
+static void handle_sigtrap(int signum __always_unused,
+ siginfo_t *info,
+ void *ucontext __always_unused)
+{
+ ASSERT_EQ(info->si_code, TRAP_PERF, "si_code");
+ ++sigtrap_count;
+}
+
+static noinline int test_function(void)
+{
+ asm volatile ("");
+ return 0;
+}
+
+void serial_test_perf_skip(void)
+{
+ struct sigaction action = {};
+ struct sigaction previous_sigtrap;
+ sighandler_t previous_sigio = SIG_ERR;
+ struct test_perf_skip *skel = NULL;
+ struct perf_event_attr attr = {};
+ int perf_fd = -1;
+ int err;
+ struct f_owner_ex owner;
+ struct bpf_link *prog_link = NULL;
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = handle_sigtrap;
+ sigemptyset(&action.sa_mask);
+ if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction"))
+ return;
+
+ previous_sigio = signal(SIGIO, handle_sigio);
+ if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal"))
+ goto cleanup;
+
+ skel = test_perf_skip__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.size = sizeof(attr);
+ attr.bp_type = HW_BREAKPOINT_X;
+ attr.bp_addr = (uintptr_t)test_function;
+ attr.bp_len = sizeof(long);
+ attr.sample_period = 1;
+ attr.sample_type = PERF_SAMPLE_IP;
+ attr.pinned = 1;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.precise_ip = 3;
+ attr.sigtrap = 1;
+ attr.remove_on_exec = 1;
+
+ perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+ if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n");
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(perf_fd < 0, "perf_event_open"))
+ goto cleanup;
+
+ /* Configure the perf event to signal on sample. */
+ err = fcntl(perf_fd, F_SETFL, O_ASYNC);
+ if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)"))
+ goto cleanup;
+
+ owner.type = F_OWNER_TID;
+ owner.pid = syscall(__NR_gettid);
+ err = fcntl(perf_fd, F_SETOWN_EX, &owner);
+ if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)"))
+ goto cleanup;
+
+ /* Allow at most one sample. A sample rejected by bpf should
+ * not count against this.
+ */
+ err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1);
+ if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)"))
+ goto cleanup;
+
+ prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd);
+ if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event"))
+ goto cleanup;
+
+ /* Configure the bpf program to suppress the sample. */
+ skel->bss->ip = (uintptr_t)test_function;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 0, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 0, "sigtrap_count");
+
+ /* Configure the bpf program to allow the sample. */
+ skel->bss->ip = 0;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+ /* Test that the sample above is the only one allowed (by perf, not
+ * by bpf)
+ */
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+cleanup:
+ bpf_link__destroy(prog_link);
+ if (perf_fd >= 0)
+ close(perf_fd);
+ test_perf_skip__destroy(skel);
+
+ if (previous_sigio != SIG_ERR)
+ signal(SIGIO, previous_sigio);
+ sigaction(SIGTRAP, &previous_sigtrap, NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index fcf54b3a1dd0..c799a3c5ad1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -18,7 +18,7 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
if (CHECK(!map, "find map", "NULL map"))
return 0;
- err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
+ err = bpf_map_get_info_by_fd(bpf_map__fd(map),
&map_info, &map_info_len);
CHECK(err, "get map info", "err %d errno %d", err, errno);
return map_info.id;
@@ -26,13 +26,13 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
void test_pinning(void)
{
- const char *file_invalid = "./test_pinning_invalid.o";
+ const char *file_invalid = "./test_pinning_invalid.bpf.o";
const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
const char *nopinpath = "/sys/fs/bpf/nopinmap";
const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
const char *custpath = "/sys/fs/bpf/custom";
const char *pinpath = "/sys/fs/bpf/pinmap";
- const char *file = "./test_pinning.o";
+ const char *file = "./test_pinning.bpf.o";
__u32 map_id, map_id2, duration = 0;
struct stat statbuf = {};
struct bpf_object *obj;
@@ -125,6 +125,10 @@ void test_pinning(void)
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
@@ -232,8 +241,8 @@ void test_pinning(void)
goto out;
}
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
- sizeof(__u64), 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32),
+ sizeof(__u64), 1, NULL);
if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+ const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+ const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+ struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* load the object a first time */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+ goto out;
+
+ /* load the object a second time, re-using the pinned map */
+ skel2 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+ goto out;
+
+ /* we can close the reference safely without
+ * the map's refcount falling to 0
+ */
+ test_pinning_devmap__destroy(skel1);
+ skel1 = NULL;
+
+ /* now, swap the pins */
+ err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "swap pins"))
+ goto out;
+
+ /* load the object again, this time the re-use should fail */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+ goto out;
+
+out:
+ unlink(pinpath1);
+ unlink(pinpath2);
+ test_pinning_devmap__destroy(skel1);
+ test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
new file mode 100644
index 000000000000..16bd74be3dbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_pinning_htab.skel.h"
+
+static void unpin_map(const char *map_name, const char *pin_path)
+{
+ struct test_pinning_htab *skel;
+ struct bpf_map *map;
+ int err;
+
+ skel = test_pinning_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
+ goto out;
+
+ err = bpf_map__pin(map, pin_path);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = bpf_map__unpin(map, pin_path);
+ ASSERT_OK(err, "bpf_map__unpin");
+out:
+ test_pinning_htab__destroy(skel);
+}
+
+void test_pinning_htab(void)
+{
+ if (test__start_subtest("timer_prealloc"))
+ unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
+ if (test__start_subtest("timer_no_prealloc"))
+ unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 44b514fabccd..682e4ff45b01 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -4,25 +4,29 @@
void test_pkt_access(void)
{
- const char *file = "./test_pkt_access.o";
+ const char *file = "./test_pkt_access.bpf.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 100000,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv4",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv4 test_run_opts retval");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = 0; /* reset from last call */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv6 test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 939015cd6dba..0d85e0642811 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -4,20 +4,22 @@
void test_pkt_md_access(void)
{
- const char *file = "./test_pkt_md_access.o";
+ const char *file = "./test_pkt_md_access.bpf.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempt_lock.c b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
new file mode 100644
index 000000000000..02917c672441
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <preempt_lock.skel.h>
+
+void test_preempt_lock(void)
+{
+ RUN_TESTS(preempt_lock);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..3a2ec3923fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <test_progs.h>
+
+#include "preempted_bpf_ma_op.skel.h"
+
+#define ALLOC_THREAD_NR 4
+#define ALLOC_LOOP_NR 512
+
+struct alloc_ctx {
+ /* output */
+ int run_err;
+ /* input */
+ int fd;
+ bool *nomem_err;
+};
+
+static void *run_alloc_prog(void *data)
+{
+ struct alloc_ctx *ctx = data;
+ cpu_set_t cpu_set;
+ int i;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+
+ for (i = 0; i < ALLOC_LOOP_NR && !*ctx->nomem_err; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ err = bpf_prog_test_run_opts(ctx->fd, &topts);
+ ctx->run_err |= err | topts.retval;
+ }
+
+ return NULL;
+}
+
+void test_preempted_bpf_ma_op(void)
+{
+ struct alloc_ctx ctx[ALLOC_THREAD_NR];
+ struct preempted_bpf_ma_op *skel;
+ pthread_t tid[ALLOC_THREAD_NR];
+ int i, err;
+
+ skel = preempted_bpf_ma_op__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ err = preempted_bpf_ma_op__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ struct bpf_program *prog;
+ char name[8];
+
+ snprintf(name, sizeof(name), "test%d", i);
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "no test prog"))
+ goto out;
+
+ ctx[i].run_err = 0;
+ ctx[i].fd = bpf_program__fd(prog);
+ ctx[i].nomem_err = &skel->bss->nomem_err;
+ }
+
+ memset(tid, 0, sizeof(tid));
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ err = pthread_create(&tid[i], NULL, run_alloc_prog, &ctx[i]);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ if (!tid[i])
+ break;
+ pthread_join(tid[i], NULL);
+ ASSERT_EQ(ctx[i].run_err, 0, "run prog err");
+ }
+
+ ASSERT_FALSE(skel->bss->nomem_err, "ENOMEM");
+out:
+ preempted_bpf_ma_op__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c
new file mode 100644
index 000000000000..fb5cdad97116
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prepare.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "prepare.skel.h"
+
+static bool check_prepared(struct bpf_object *obj)
+{
+ bool is_prepared = true;
+ const struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__fd(map) < 0)
+ is_prepared = false;
+ }
+
+ return is_prepared;
+}
+
+static void test_prepare_no_load(void)
+{
+ struct prepare *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+static void test_prepare_load(void)
+{
+ struct prepare *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+ err = prepare__load(skel);
+ if (!ASSERT_OK(err, "prepare__load"))
+ goto cleanup;
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.program);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+void test_prepare(void)
+{
+ if (test__start_subtest("prepare_load"))
+ test_prepare_load();
+ if (test__start_subtest("prepare_no_load"))
+ test_prepare_no_load();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
new file mode 100644
index 000000000000..5d3c00a08a88
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "pro_epilogue.skel.h"
+#include "epilogue_tailcall.skel.h"
+#include "pro_epilogue_goto_start.skel.h"
+#include "epilogue_exit.skel.h"
+#include "pro_epilogue_with_kfunc.skel.h"
+
+struct st_ops_args {
+ __u64 a;
+};
+
+static void test_tailcall(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct epilogue_tailcall *skel;
+ struct st_ops_args args;
+ int err, prog_fd;
+
+ skel = epilogue_tailcall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "epilogue_tailcall__open_and_load"))
+ return;
+
+ topts.ctx_in = &args;
+ topts.ctx_size_in = sizeof(args);
+
+ skel->links.epilogue_tailcall =
+ bpf_map__attach_struct_ops(skel->maps.epilogue_tailcall);
+ if (!ASSERT_OK_PTR(skel->links.epilogue_tailcall, "attach_struct_ops"))
+ goto done;
+
+ /* Both test_epilogue_tailcall and test_epilogue_subprog are
+ * patched with epilogue. When syscall_epilogue_tailcall()
+ * is run, test_epilogue_tailcall() is triggered.
+ * It executes a tail call and control is transferred to
+ * test_epilogue_subprog(). Only test_epilogue_subprog()
+ * does args->a += 1, thus final args.a value of 10001
+ * guarantees that only the epilogue of the
+ * test_epilogue_subprog is executed.
+ */
+ memset(&args, 0, sizeof(args));
+ prog_fd = bpf_program__fd(skel->progs.syscall_epilogue_tailcall);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(args.a, 10001, "args.a");
+ ASSERT_EQ(topts.retval, 10001 * 2, "topts.retval");
+
+done:
+ epilogue_tailcall__destroy(skel);
+}
+
+void test_pro_epilogue(void)
+{
+ RUN_TESTS(pro_epilogue);
+ RUN_TESTS(pro_epilogue_goto_start);
+ RUN_TESTS(epilogue_exit);
+ RUN_TESTS(pro_epilogue_with_kfunc);
+ if (test__start_subtest("tailcall"))
+ test_tailcall();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..8721671321de 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -1,27 +1,38 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-void test_probe_user(void)
+/* TODO: corrupts other tests uses connect() */
+void serial_test_probe_user(void)
{
- const char *prog_name = "kprobe/__sys_connect";
- const char *obj_file = "./test_probe_user.o";
+ static const char *const prog_names[] = {
+ "handle_sys_connect",
+#if defined(__s390x__)
+ "handle_sys_socketcall",
+#endif
+ };
+ enum { prog_count = ARRAY_SIZE(prog_names) };
+ const char *obj_file = "./test_probe_user.bpf.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
struct sockaddr curr, orig, tmp;
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
- struct bpf_link *kprobe_link = NULL;
- struct bpf_program *kprobe_prog;
+ struct bpf_link *kprobe_links[prog_count] = {};
+ struct bpf_program *kprobe_progs[prog_count];
struct bpf_object *obj;
static const int zero = 0;
+ size_t i;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!kprobe_prog, "find_probe",
- "prog '%s' not found\n", prog_name))
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_progs[i] =
+ bpf_object__find_program_by_name(obj, prog_names[i]);
+ if (CHECK(!kprobe_progs[i], "find_probe",
+ "prog '%s' not found\n", prog_names[i]))
+ goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -32,11 +43,10 @@ void test_probe_user(void)
"err %d\n", results_map_fd))
goto cleanup;
- kprobe_link = bpf_program__attach(kprobe_prog);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_links[i] = bpf_program__attach(kprobe_progs[i]);
+ if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe"))
+ goto cleanup;
}
memset(&curr, 0, sizeof(curr));
@@ -71,6 +81,7 @@ void test_probe_user(void)
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
cleanup:
- bpf_link__destroy(kprobe_link);
+ for (i = 0; i < prog_count; i++)
+ bpf_link__destroy(kprobe_links[i]);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_array_init.c b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
new file mode 100644
index 000000000000..fc4657619739
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_prog_array_init.skel.h"
+
+void test_prog_array_init(void)
+{
+ struct test_prog_array_init *skel;
+ int err;
+
+ skel = test_prog_array_init__open();
+ if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+ return;
+
+ skel->rodata->my_pid = getpid();
+
+ err = test_prog_array_init__load(skel);
+ if (!ASSERT_OK(err, "could not load BPF object"))
+ goto cleanup;
+
+ skel->links.entry = bpf_program__attach_raw_tracepoint(skel->progs.entry, "sys_enter");
+ if (!ASSERT_OK_PTR(skel->links.entry, "could not attach BPF program"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->value, 42, "unexpected value");
+
+cleanup:
+ test_prog_array_init__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
new file mode 100644
index 000000000000..01f1d1b6715a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ return;
+
+ CHECK(run_cnt != info.run_cnt, "run_cnt",
+ "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_opts(void)
+{
+ struct test_pkt_access *skel;
+ int err, stats_fd = -1, prog_fd;
+ char buf[10] = {};
+ __u64 run_cnt = 0;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .repeat = 1,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = 5,
+ );
+
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd"))
+ return;
+
+ skel = test_pkt_access__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_EQ(errno, ENOSPC, "test_run errno");
+ ASSERT_ERR(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out");
+ ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+ topts.data_out = NULL;
+ topts.data_size_out = 0;
+ topts.repeat = 2;
+ errno = 0;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(errno, "run_no_output errno");
+ ASSERT_OK(err, "run_no_output err");
+ ASSERT_OK(topts.retval, "run_no_output retval");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+cleanup:
+ if (skel)
+ test_pkt_access__destroy(skel);
+ if (stats_fd >= 0)
+ close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
deleted file mode 100644
index 935a294f049a..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-void test_prog_run_xattr(void)
-{
- const char *file = "./test_pkt_access.o";
- struct bpf_object *obj;
- char buf[10];
- int err;
- struct bpf_prog_test_run_attr tattr = {
- .repeat = 1,
- .data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
- .data_out = buf,
- .data_size_out = 5,
- };
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
- return;
-
- memset(buf, 0, sizeof(buf));
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
- "incorrect output size, want %zu have %u\n",
- sizeof(pkt_v4), tattr.data_size_out);
-
- CHECK_ATTR(buf[5] != 0, "overflow",
- "BPF_PROG_TEST_RUN ignored size hint\n");
-
- tattr.data_out = NULL;
- tattr.data_size_out = 0;
- errno = 0;
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err || errno || tattr.retval, "run_no_output",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- tattr.data_size_out = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
-
- bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
new file mode 100644
index 000000000000..7607cfc2408c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void clear_test_state(struct test_state *state)
+{
+ state->error_cnt = 0;
+ state->sub_succ_cnt = 0;
+ state->skip_cnt = 0;
+}
+
+void test_prog_tests_framework(void)
+{
+ struct test_state *state = env.test_state;
+
+ /* in all the ASSERT calls below we need to return on the first
+ * error due to the fact that we are cleaning the test state after
+ * each dummy subtest
+ */
+
+ /* test we properly count skipped tests with subtests */
+ if (test__start_subtest("test_good_subtest"))
+ test__end_subtest();
+ if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_fail_subtest")) {
+ test__fail();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+}
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+ struct expected_msgs msgs;
+ int i, j, error_cnt;
+ const struct {
+ const char *name;
+ const char *log;
+ const char *expected;
+ struct expect_msg *pats;
+ } cases[] = {
+ {
+ .name = "simple-ok",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "simple-fail",
+ .log = "aaabbbddd",
+ .expected = "MATCHED SUBSTR: 'aaa'\n"
+ "EXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-mid",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ { .substr = "bar", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-tail",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-head",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "foo", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-head",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa", .negative = true },
+ { .substr = "bbb" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-tail",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "bbb" },
+ { .substr = "ccc", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-1",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-2",
+ .log = "aaabbb222ccc",
+ .expected = "UNEXPECTED SUBSTR: '222'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "222", .negative = true },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cases); i++) {
+ if (test__start_subtest(cases[i].name)) {
+ error_cnt = env.subtest_state->error_cnt;
+ msgs.patterns = cases[i].pats;
+ msgs.cnt = 0;
+ for (j = 0; cases[i].pats[j].substr; j++)
+ msgs.cnt++;
+ validate_msgs(cases[i].log, &msgs, dummy_emit);
+ fflush(stderr);
+ env.subtest_state->error_cnt = error_cnt;
+ if (cases[i].expected)
+ ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+ else
+ ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+ test__end_subtest();
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
new file mode 100644
index 000000000000..8d077d150c56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <test_progs.h>
+#include "test_ptr_untrusted.skel.h"
+
+#define TP_NAME "sched_switch"
+
+void serial_test_ptr_untrusted(void)
+{
+ struct test_ptr_untrusted *skel;
+ int err;
+
+ skel = test_ptr_untrusted__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* First, attach lsm prog */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "lsm_attach"))
+ goto cleanup;
+
+ /* Second, attach raw_tp prog. The lsm prog will be triggered. */
+ skel->links.raw_tp_run = bpf_program__attach_raw_tracepoint(skel->progs.raw_tp_run,
+ TP_NAME);
+ if (!ASSERT_OK_PTR(skel->links.raw_tp_run, "raw_tp_attach"))
+ goto cleanup;
+
+ err = strncmp(skel->bss->tp_name, TP_NAME, strlen(TP_NAME));
+ ASSERT_EQ(err, 0, "cmp_tp_name");
+
+cleanup:
+ test_ptr_untrusted__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index f47e7b1cb32c..a043af9cd6d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -10,24 +10,31 @@ enum {
static void test_queue_stack_map_by_type(int type)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE], duration, retval, size, val;
+ __u32 vals[MAP_SIZE], val;
int i, err, prog_fd, map_in_fd, map_out_fd;
char file[32], buf[128];
struct bpf_object *obj;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr iph = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
/* Fill test values to be used */
for (i = 0; i < MAP_SIZE; i++)
vals[i] = rand();
if (type == QUEUE)
- strncpy(file, "./test_queue_map.o", sizeof(file));
+ strncpy(file, "./test_queue_map.bpf.o", sizeof(file));
else if (type == STACK)
- strncpy(file, "./test_stack_map.o", sizeof(file));
+ strncpy(file, "./test_stack_map.bpf.o", sizeof(file));
else
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -58,36 +65,37 @@ static void test_queue_stack_map_by_type(int type)
pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
}
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- if (err || retval || size != sizeof(pkt_v4) ||
- iph->daddr != val)
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (err || topts.retval ||
+ topts.data_size_out != sizeof(pkt_v4))
+ break;
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
+ if (iph.daddr != val)
break;
}
- CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
- "bpf_map_pop_elem",
- "err %d errno %d retval %d size %d iph->daddr %u\n",
- err, errno, retval, size, iph->daddr);
+ ASSERT_OK(err, "bpf_map_pop_elem");
+ ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "bpf_map_pop_elem data_size_out");
+ ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr");
/* Queue is empty, program should return TC_ACT_SHOT */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
- "check-queue-stack-map-empty",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "check-queue-stack-map-empty");
+ ASSERT_EQ(topts.retval, 2 /* TC_ACT_SHOT */,
+ "check-queue-stack-map-empty test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "check-queue-stack-map-empty data_size_out");
/* Check that the program pushed elements correctly */
for (i = 0; i < MAP_SIZE; i++) {
err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
- if (err || val != vals[i] * 5)
- break;
+ ASSERT_OK(err, "bpf_map_lookup_and_delete_elem");
+ ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val");
}
-
- CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
- "bpf_map_push_elem", "err %d value %u\n", err, val);
-
out:
pkt_v4.iph.saddr = 0;
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
new file mode 100644
index 000000000000..43676a9922dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "raw_tp_null.skel.h"
+#include "raw_tp_null_fail.skel.h"
+
+void test_raw_tp_null(void)
+{
+ struct raw_tp_null *skel;
+
+ RUN_TESTS(raw_tp_null_fail);
+
+ skel = raw_tp_null__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load"))
+ return;
+
+ skel->bss->tid = sys_gettid();
+
+ if (!ASSERT_OK(raw_tp_null__attach(skel), "raw_tp_null__attach"))
+ goto end;
+
+ ASSERT_OK(trigger_module_test_read(2), "trigger testmod read");
+ ASSERT_EQ(skel->bss->i, 3, "invocations");
+
+end:
+ raw_tp_null__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index c5fb191874ac..fe5b8fae2c36 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -5,59 +5,54 @@
#include "bpf/libbpf_internal.h"
#include "test_raw_tp_test_run.skel.h"
-static int duration;
-
void test_raw_tp_test_run(void)
{
- struct bpf_prog_test_run_attr test_attr = {};
int comm_fd = -1, err, nr_online, i, prog_fd;
__u64 args[2] = {0x1234ULL, 0x5678ULL};
int expected_retval = 0x1234 + 0x5678;
struct test_raw_tp_test_run *skel;
char buf[] = "new_name";
bool *online = NULL;
- DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
- .ctx_in = args,
- .ctx_size_in = sizeof(args),
- .flags = BPF_F_TEST_RUN_ON_CPU,
- );
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
&nr_online);
- if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+ if (!ASSERT_OK(err, "parse_cpu_mask_file"))
return;
skel = test_raw_tp_test_run__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
err = test_raw_tp_test_run__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "skel_attach"))
goto cleanup;
comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+ if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
goto cleanup;
err = write(comm_fd, buf, sizeof(buf));
- CHECK(err < 0, "task rename", "err %d", errno);
+ ASSERT_GE(err, 0, "task rename");
- CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
- CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+ ASSERT_NEQ(skel->bss->count, 0, "check_count");
+ ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu");
prog_fd = bpf_program__fd(skel->progs.rename);
- test_attr.prog_fd = prog_fd;
- test_attr.ctx_in = args;
- test_attr.ctx_size_in = sizeof(__u64);
+ opts.ctx_in = args;
+ opts.ctx_size_in = sizeof(__u64);
- err = bpf_prog_test_run_xattr(&test_attr);
- CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_NEQ(err, 0, "test_run should fail for too small ctx");
- test_attr.ctx_size_in = sizeof(args);
- err = bpf_prog_test_run_xattr(&test_attr);
- CHECK(err < 0, "test_run", "err %d\n", errno);
- CHECK(test_attr.retval != expected_retval, "check_retval",
- "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+ opts.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
for (i = 0; i < nr_online; i++) {
if (!online[i])
@@ -66,28 +61,23 @@ void test_raw_tp_test_run(void)
opts.cpu = i;
opts.retval = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err < 0, "test_run_opts", "err %d\n", errno);
- CHECK(skel->data->on_cpu != i, "check_on_cpu",
- "expect %d got %d\n", i, skel->data->on_cpu);
- CHECK(opts.retval != expected_retval,
- "check_retval", "expect 0x%x, got 0x%x\n",
- expected_retval, opts.retval);
+ ASSERT_OK(err, "test_run_opts");
+ ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
}
/* invalid cpu ID should fail with ENXIO */
opts.cpu = 0xffffffff;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != ENXIO,
- "test_run_opts_fail",
- "should failed with ENXIO\n");
+ ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO");
+ ASSERT_ERR(err, "test_run_opts_fail");
/* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
opts.cpu = 1;
opts.flags = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != EINVAL,
- "test_run_opts_fail",
- "should failed with EINVAL\n");
+ ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL");
+ ASSERT_ERR(err, "test_run_opts_fail");
cleanup:
close(comm_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
index 9807336a3016..216b0dfac0fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include <linux/nbd.h>
+#include "bpf_util.h"
void test_raw_tp_writable_reject_nbd_invalid(void)
{
@@ -18,15 +19,15 @@ void test_raw_tp_writable_reject_nbd_invalid(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = program,
- .insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ program, ARRAY_SIZE(program),
+ &opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
"failed: %d errno %d\n", bpf_fd, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 5c45424cac5f..e3668058b7bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -2,8 +2,10 @@
#include <test_progs.h>
#include <linux/nbd.h>
+#include "bpf_util.h"
-void test_raw_tp_writable_test_run(void)
+/* NOTE: conflict with other tests. */
+void serial_test_raw_tp_writable_test_run(void)
{
__u32 duration = 0;
char error[4096];
@@ -16,15 +18,15 @@ void test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ trace_program, ARRAY_SIZE(trace_program),
+ &trace_opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
"failed: %d errno %d\n", bpf_fd, errno))
return;
@@ -34,15 +36,14 @@ void test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL v2",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int filter_fd =
- bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2",
+ skb_program, ARRAY_SIZE(skb_program),
+ &skb_opts);
if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
filter_fd, errno))
goto out_bpffd;
@@ -56,21 +57,23 @@ void test_raw_tp_writable_test_run(void)
0,
};
- __u32 prog_ret;
- int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = test_skb,
+ .data_size_in = sizeof(test_skb),
+ .repeat = 1,
+ );
+ int err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 42, "test_run",
"tracepoint did not modify return value\n");
- CHECK(prog_ret != 0, "test_run_ret",
+ CHECK(topts.retval != 0, "test_run_ret",
"socket_filter did not return 0\n");
close(tp_fd);
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
- &prog_ret, 0);
+ err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 0, "test_run_notrace",
"test_run failed with %d errno %d\n", err, errno);
- CHECK(prog_ret != 0, "test_run_ret_notrace",
+ CHECK(topts.retval != 0, "test_run_ret_notrace",
"socket_filter did not return 0\n");
out_filterfd:
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
new file mode 100644
index 000000000000..d8f3d7a45fe9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "rbtree.skel.h"
+#include "rbtree_fail.skel.h"
+#include "rbtree_btf_fail__wrong_node_type.skel.h"
+#include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
+
+static void test_rbtree_add_nodes(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes), &opts);
+ ASSERT_OK(ret, "rbtree_add_nodes run");
+ ASSERT_OK(opts.retval, "rbtree_add_nodes retval");
+ ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes less_callback_ran");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_nodes_nested(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes_nested), &opts);
+ ASSERT_OK(ret, "rbtree_add_nodes_nested run");
+ ASSERT_OK(opts.retval, "rbtree_add_nodes_nested retval");
+ ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes_nested less_callback_ran");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_add_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_add_and_remove retval");
+ ASSERT_EQ(skel->data->removed_key, 5, "rbtree_add_and_remove first removed key");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_and_remove_array(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove_array), &opts);
+ ASSERT_OK(ret, "rbtree_add_and_remove_array");
+ ASSERT_OK(opts.retval, "rbtree_add_and_remove_array retval");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_first_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_first_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_first_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_first_and_remove retval");
+ ASSERT_EQ(skel->data->first_data[0], 2, "rbtree_first_and_remove first rbtree_first()");
+ ASSERT_EQ(skel->data->removed_key, 1, "rbtree_first_and_remove first removed key");
+ ASSERT_EQ(skel->data->first_data[1], 4, "rbtree_first_and_remove second rbtree_first()");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_api_release_aliasing(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_api_release_aliasing), &opts);
+ ASSERT_OK(ret, "rbtree_api_release_aliasing");
+ ASSERT_OK(opts.retval, "rbtree_api_release_aliasing retval");
+ ASSERT_EQ(skel->data->first_data[0], 42, "rbtree_api_release_aliasing first rbtree_remove()");
+ ASSERT_EQ(skel->data->first_data[1], -1, "rbtree_api_release_aliasing second rbtree_remove()");
+
+ rbtree__destroy(skel);
+}
+
+void test_rbtree_success(void)
+{
+ if (test__start_subtest("rbtree_add_nodes"))
+ test_rbtree_add_nodes();
+ if (test__start_subtest("rbtree_add_nodes_nested"))
+ test_rbtree_add_nodes_nested();
+ if (test__start_subtest("rbtree_add_and_remove"))
+ test_rbtree_add_and_remove();
+ if (test__start_subtest("rbtree_add_and_remove_array"))
+ test_rbtree_add_and_remove_array();
+ if (test__start_subtest("rbtree_first_and_remove"))
+ test_rbtree_first_and_remove();
+ if (test__start_subtest("rbtree_api_release_aliasing"))
+ test_rbtree_api_release_aliasing();
+}
+
+#define BTF_FAIL_TEST(suffix) \
+void test_rbtree_btf_fail__##suffix(void) \
+{ \
+ struct rbtree_btf_fail__##suffix *skel; \
+ \
+ skel = rbtree_btf_fail__##suffix##__open_and_load(); \
+ if (!ASSERT_ERR_PTR(skel, \
+ "rbtree_btf_fail__" #suffix "__open_and_load unexpected success")) \
+ rbtree_btf_fail__##suffix##__destroy(skel); \
+}
+
+#define RUN_BTF_FAIL_TEST(suffix) \
+ if (test__start_subtest("rbtree_btf_fail__" #suffix)) \
+ test_rbtree_btf_fail__##suffix();
+
+BTF_FAIL_TEST(wrong_node_type);
+BTF_FAIL_TEST(add_wrong_type);
+
+void test_rbtree_btf_fail(void)
+{
+ RUN_BTF_FAIL_TEST(wrong_node_type);
+ RUN_BTF_FAIL_TEST(add_wrong_type);
+}
+
+void test_rbtree_fail(void)
+{
+ RUN_TESTS(rbtree_fail);
+}
+
+void test_rbtree_search(void)
+{
+ RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
new file mode 100644
index 000000000000..246eb259c08a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "rcu_read_lock.skel.h"
+#include "cgroup_helpers.h"
+
+static unsigned long long cgroup_id;
+
+static void test_success(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = sys_gettid();
+
+ bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
+ bpf_program__set_autoload(skel->progs.task_succ, true);
+ bpf_program__set_autoload(skel->progs.two_regions, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
+ bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val");
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static void test_rcuptr_acquire(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = sys_gettid();
+
+ bpf_program__set_autoload(skel->progs.task_acquire, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ ASSERT_OK(err, "skel_attach");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static const char * const inproper_region_tests[] = {
+ "miss_lock",
+ "no_lock",
+ "miss_unlock",
+ "non_sleepable_rcu_mismatch",
+ "inproper_sleepable_helper",
+ "inproper_sleepable_kfunc",
+ "nested_rcu_region_unbalanced_1",
+ "nested_rcu_region_unbalanced_2",
+ "rcu_read_lock_global_subprog_lock",
+ "rcu_read_lock_global_subprog_unlock",
+ "rcu_read_lock_sleepable_helper_global_subprog",
+ "rcu_read_lock_sleepable_kfunc_global_subprog",
+ "rcu_read_lock_sleepable_global_subprog_indirect",
+};
+
+static void test_inproper_region(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+static const char * const rcuptr_misuse_tests[] = {
+ "task_untrusted_rcuptr",
+ "cross_rcu_region",
+};
+
+static void test_rcuptr_misuse(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+void test_rcu_read_lock(void)
+{
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/rcu_read_lock");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
+ goto out;
+
+ cgroup_id = get_cgroup_id("/rcu_read_lock");
+ if (test__start_subtest("success"))
+ test_success();
+ if (test__start_subtest("rcuptr_acquire"))
+ test_rcuptr_acquire();
+ if (test__start_subtest("negative_tests_inproper_region"))
+ test_inproper_region();
+ if (test__start_subtest("negative_tests_rcuptr_misuse"))
+ test_rcuptr_misuse();
+ close(cgroup_fd);
+out:;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..19e2f2526dbd 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -16,7 +16,7 @@ struct rdonly_map_subtest {
void test_rdonly_maps(void)
{
- const char *file = "test_rdonly_maps.o";
+ const char *file = "test_rdonly_maps.bpf.o";
struct rdonly_map_subtest subtests[] = {
{ "skip loop", "skip_loop", 0, 0 },
{ "part loop", "part_loop", 3, 2 + 3 + 4 },
@@ -30,14 +30,14 @@ void test_rdonly_maps(void)
struct bss bss;
obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
goto cleanup;
- bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss");
+ bss_map = bpf_object__find_map_by_name(obj, ".bss");
if (CHECK(!bss_map, "find_bss_map", "failed\n"))
goto cleanup;
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
- t->prog_name, PTR_ERR(link))) {
- link = NULL;
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- }
/* trigger probe */
usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
new file mode 100644
index 000000000000..a8d1eaa67020
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+ const char *name;
+ int ret;
+} all_read[] = {
+ { .name = "probe_read_kernel", .ret = -ERANGE },
+ { .name = "probe_read_kernel_str", .ret = -ERANGE },
+ { .name = "probe_read", .ret = -ERANGE },
+ { .name = "probe_read_str", .ret = -ERANGE },
+ { .name = "probe_read_user", .ret = -EFAULT },
+ { .name = "probe_read_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user", .ret = -EFAULT },
+ { .name = "copy_from_user_task", .ret = -EFAULT },
+ { .name = "copy_from_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user_task_str", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+ struct read_vsyscall *skel;
+ unsigned int i;
+ int err;
+
+#if !defined(__x86_64__)
+ test__skip();
+ return;
+#endif
+ skel = read_vsyscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ err = read_vsyscall__attach(skel);
+ if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+ goto out;
+
+ /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+ * but it doesn't affect the returned error codes.
+ */
+ skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+ usleep(1);
+
+ for (i = 0; i < ARRAY_SIZE(all_read); i++)
+ ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+ read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c
index 0e378d63fe18..23552d3e3365 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursion.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursion.c
@@ -20,19 +20,19 @@ void test_recursion(void)
goto out;
ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0");
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1");
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2");
ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0");
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1");
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2");
- err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup),
- &prog_info, &prog_info_len);
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.on_delete),
+ &prog_info, &prog_info_len);
if (!ASSERT_OK(err, "get_prog_info"))
goto out;
ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses");
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
new file mode 100644
index 000000000000..0ffa01d54ce2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <test_progs.h>
+#include "fentry_recursive.skel.h"
+#include "fentry_recursive_target.skel.h"
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+
+/* Test recursive attachment of tracing progs with more than one nesting level
+ * is not possible. Create a chain of attachment, verify that the last prog
+ * will fail. Depending on the arguments, following cases are tested:
+ *
+ * - Recursive loading of tracing progs, without attaching (attach = false,
+ * detach = false). The chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach of tracing progs (attach = true, detach = false). The
+ * chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * attach fentry1 -> target
+ * load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach and detach of tracing progs (attach = true, detach =
+ * true). This validates that attach_tracing_prog flag will be set throughout
+ * the whole lifecycle of an fentry prog, independently from whether it's
+ * detached. The chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * attach fentry1 -> target
+ * detach fentry1
+ * load fentry2 -> fentry1 (fail)
+ */
+static void test_recursive_fentry_chain(bool attach, bool detach)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_chain[2] = {};
+ struct bpf_program *prog;
+ int prev_fd, err;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+ return;
+
+ /* Create an attachment chain with two fentry progs */
+ for (int i = 0; i < 2; i++) {
+ tracing_chain[i] = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_chain[i], "fentry_recursive__open"))
+ goto close_prog;
+
+ /* The first prog in the chain is going to be attached to the target
+ * fentry program, the second one to the previous in the chain.
+ */
+ prog = tracing_chain[i]->progs.recursive_attach;
+ if (i == 0) {
+ prev_fd = bpf_program__fd(target_skel->progs.test1);
+ err = bpf_program__set_attach_target(prog, prev_fd, "test1");
+ } else {
+ prev_fd = bpf_program__fd(tracing_chain[i-1]->progs.recursive_attach);
+ err = bpf_program__set_attach_target(prog, prev_fd, "recursive_attach");
+ }
+
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ err = fentry_recursive__load(tracing_chain[i]);
+ /* The first attach should succeed, the second fail */
+ if (i == 0) {
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto close_prog;
+
+ if (attach) {
+ err = fentry_recursive__attach(tracing_chain[i]);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto close_prog;
+ }
+
+ if (detach) {
+ /* Flag attach_tracing_prog should still be set, preventing
+ * attachment of the following prog.
+ */
+ fentry_recursive__detach(tracing_chain[i]);
+ }
+ } else {
+ if (!ASSERT_ERR(err, "fentry_recursive__load"))
+ goto close_prog;
+ }
+ }
+
+close_prog:
+ fentry_recursive_target__destroy(target_skel);
+ for (int i = 0; i < 2; i++) {
+ fentry_recursive__destroy(tracing_chain[i]);
+ }
+}
+
+void test_recursive_fentry(void)
+{
+ if (test__start_subtest("attach"))
+ test_recursive_fentry_chain(true, false);
+ if (test__start_subtest("load"))
+ test_recursive_fentry_chain(false, false);
+ if (test__start_subtest("detach"))
+ test_recursive_fentry_chain(true, true);
+}
+
+/* Test that a tracing prog reattachment (when we land in
+ * "prog->aux->dst_trampoline and tgt_prog is NULL" branch in
+ * bpf_tracing_prog_attach) does not lead to a crash due to missing attach_btf
+ */
+void test_fentry_attach_btf_presence(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, link_fd, tgt_prog_fd;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto close_prog;
+
+ prog = tracing_skel->progs.recursive_attach;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd, "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto close_prog;
+
+ tgt_prog_fd = bpf_program__fd(tracing_skel->progs.recursive_attach);
+ link_fd = bpf_link_create(tgt_prog_fd, 0, BPF_TRACE_FENTRY, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto close_prog;
+
+ fentry_recursive__detach(tracing_skel);
+
+ err = fentry_recursive__attach(tracing_skel);
+ ASSERT_ERR(err, "fentry_recursive__attach");
+
+close_prog:
+ fentry_recursive_target__destroy(target_skel);
+ fentry_recursive__destroy(tracing_skel);
+}
+
+static void *fentry_target_test_run(void *arg)
+{
+ for (;;) {
+ int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ if (prog_fd == -1)
+ break;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "fentry_target test_run"))
+ break;
+ }
+
+ return NULL;
+}
+
+void test_fentry_attach_stress(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, i, tgt_prog_fd;
+ pthread_t thread;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel,
+ "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = pthread_create(&thread, NULL,
+ fentry_target_test_run, &tgt_prog_fd);
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ for (i = 0; i < 1000; i++) {
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto stop_thread;
+
+ prog = tracing_skel->progs.recursive_attach;
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd,
+ "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto stop_thread;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto stop_thread;
+
+ err = fentry_recursive__attach(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto stop_thread;
+
+ fentry_recursive__destroy(tracing_skel);
+ tracing_skel = NULL;
+ }
+
+stop_thread:
+ __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST);
+ err = pthread_join(thread, NULL);
+ ASSERT_OK(err, "pthread_join");
+close_prog:
+ fentry_recursive__destroy(tracing_skel);
+ fentry_recursive_target__destroy(target_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
new file mode 100644
index 000000000000..d2c0542716a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "refcounted_kptr.skel.h"
+#include "refcounted_kptr_fail.skel.h"
+
+void test_refcounted_kptr(void)
+{
+ RUN_TESTS(refcounted_kptr);
+}
+
+void test_refcounted_kptr_fail(void)
+{
+ RUN_TESTS(refcounted_kptr_fail);
+}
+
+void test_refcounted_kptr_wrong_owner(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct refcounted_kptr *skel;
+ int ret;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
+ refcounted_kptr__destroy(skel);
+}
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+ struct refcounted_kptr *skel;
+ int cpu_nr, fd, err, key = 0;
+ struct bpf_map *map;
+ size_t values_sz;
+ u64 *values;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ cpu_nr = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ values = calloc(cpu_nr, sizeof(u64));
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+ free(values);
+ return;
+ }
+
+ values_sz = cpu_nr * sizeof(u64);
+ memset(values, 0, values_sz);
+
+ map = skel->maps.percpu_hash;
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+ goto out;
+
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+ refcounted_kptr__destroy(skel);
+ free(values);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index ac1ee10cffd8..d863205bbe95 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -3,50 +3,61 @@
void test_reference_tracking(void)
{
- const char *file = "test_sk_lookup_kern.o";
+ const char *file = "test_sk_lookup_kern.bpf.o";
const char *obj_name = "ref_track";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
.object_name = obj_name,
.relaxed_maps = true,
);
- struct bpf_object *obj;
+ struct bpf_object *obj_iter, *obj = NULL;
struct bpf_program *prog;
__u32 duration = 0;
int err = 0;
- obj = bpf_object__open_file(file, &open_opts);
- if (CHECK_FAIL(IS_ERR(obj)))
+ obj_iter = bpf_object__open_file(file, &open_opts);
+ if (!ASSERT_OK_PTR(obj_iter, "obj_iter_open_file"))
return;
- if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+ if (CHECK(strcmp(bpf_object__name(obj_iter), obj_name), "obj_name",
"wrong obj name '%s', expected '%s'\n",
- bpf_object__name(obj), obj_name))
+ bpf_object__name(obj_iter), obj_name))
goto cleanup;
- bpf_object__for_each_program(prog, obj) {
- const char *title;
+ bpf_object__for_each_program(prog, obj_iter) {
+ struct bpf_program *p;
+ const char *name;
- /* Ignore .text sections */
- title = bpf_program__section_name(prog);
- if (strstr(title, ".text") != NULL)
+ name = bpf_program__name(prog);
+ if (!test__start_subtest(name))
continue;
- if (!test__start_subtest(title))
- continue;
+ obj = bpf_object__open_file(file, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+ goto cleanup;
+
+ /* all programs are not loaded by default, so just set
+ * autoload to true for the single prog under test
+ */
+ p = bpf_object__find_program_by_name(obj, name);
+ bpf_program__set_autoload(p, true);
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
- err = !bpf_program__load(prog, "GPL", 0);
+ err = !bpf_object__load(obj);
libbpf_set_print(old_print_fn);
} else {
- err = bpf_program__load(prog, "GPL", 0);
+ err = bpf_object__load(obj);
}
- CHECK(err, title, "\n");
+ ASSERT_OK(err, name);
+
+ bpf_object__close(obj);
+ obj = NULL;
}
cleanup:
bpf_object__close(obj);
+ bpf_object__close(obj_iter);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..d93a0c7b1786
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+ size_t buf_sz;
+ int pos;
+ char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N) \
+ struct { struct strbuf buf; char data[(N)]; } ___##name; \
+ struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ s->pos += vsnprintf(s->buf + s->pos,
+ s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+ fmt, args);
+ va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+ default: printf("min_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+ default: printf("max_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x;
+ case U32: return (u32)x;
+ case S64: return (s64)x;
+ case S32: return (u32)(s32)x;
+ default: printf("cast_t!\n"); exit(1);
+ }
+}
+
+static const char *t_str(enum num_t t)
+{
+ switch (t) {
+ case U64: return "u64";
+ case U32: return "u32";
+ case S64: return "s64";
+ case S32: return "s32";
+ default: printf("t_str!\n"); exit(1);
+ }
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+ switch (t) {
+ case U64: return false;
+ case U32: return true;
+ case S64: return false;
+ case S32: return true;
+ default: printf("t_is_32!\n"); exit(1);
+ }
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+ switch (t) {
+ case U64: return S64;
+ case U32: return S32;
+ case S64: return S64;
+ case S32: return S32;
+ default: printf("t_signed!\n"); exit(1);
+ }
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+ switch (t) {
+ case U64: return U64;
+ case U32: return U32;
+ case S64: return U64;
+ case S32: return U32;
+ default: printf("t_unsigned!\n"); exit(1);
+ }
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+ case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+ case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+ case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+ default: printf("num_is_small!\n"); exit(1);
+ }
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+ bool is_small = num_is_small(t, x);
+
+ if (is_small) {
+ switch (t) {
+ case U64: return snappendf(sb, "%llu", (u64)x);
+ case U32: return snappendf(sb, "%u", (u32)x);
+ case S64: return snappendf(sb, "%lld", (s64)x);
+ case S32: return snappendf(sb, "%d", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ } else {
+ switch (t) {
+ case U64:
+ if (x == U64_MAX)
+ return snappendf(sb, "U64_MAX");
+ else if (x >= U64_MAX - 256)
+ return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+ else
+ return snappendf(sb, "%#llx", (u64)x);
+ case U32:
+ if ((u32)x == U32_MAX)
+ return snappendf(sb, "U32_MAX");
+ else if ((u32)x >= U32_MAX - 256)
+ return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+ else
+ return snappendf(sb, "%#x", (u32)x);
+ case S64:
+ if ((s64)x == S64_MAX)
+ return snappendf(sb, "S64_MAX");
+ else if ((s64)x >= S64_MAX - 256)
+ return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+ else if ((s64)x == S64_MIN)
+ return snappendf(sb, "S64_MIN");
+ else if ((s64)x <= S64_MIN + 256)
+ return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+ else
+ return snappendf(sb, "%#llx", (s64)x);
+ case S32:
+ if ((s32)x == S32_MAX)
+ return snappendf(sb, "S32_MAX");
+ else if ((s32)x >= S32_MAX - 256)
+ return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+ else if ((s32)x == S32_MIN)
+ return snappendf(sb, "S32_MIN");
+ else if ((s32)x <= S32_MIN + 256)
+ return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+ else
+ return snappendf(sb, "%#x", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ }
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+ u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+ if (x.a == x.b)
+ return snprintf_num(t, sb, x.a);
+
+ snappendf(sb, "[");
+ snprintf_num(t, sb, x.a);
+ snappendf(sb, "; ");
+ snprintf_num(t, sb, x.b);
+ snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 128);
+
+ snprintf_range(t, sb, x);
+ printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+ [U64] = { 0, U64_MAX },
+ [U32] = { 0, U32_MAX },
+ [S64] = { (u64)S64_MIN, (u64)S64_MAX },
+ [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+ switch (t) {
+ case U64: return unkn[U32];
+ case U32: return unkn[U32];
+ case S64: return unkn[U32];
+ case S32: return unkn[S32];
+ default: printf("unkn_subreg!\n"); exit(1);
+ }
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+ switch (t) {
+ case U64: return (struct range){ (u64)a, (u64)b };
+ case U32: return (struct range){ (u32)a, (u32)b };
+ case S64: return (struct range){ (s64)a, (s64)b };
+ case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+ default: printf("range!\n"); exit(1);
+ }
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+ return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+ u64 a = x.a, b = x.b;
+
+ /* if upper 32 bits are constant, lower 32 bits should form a proper
+ * s32 range to be correct
+ */
+ if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+ return range(S32, a, b);
+
+ /* Special case where upper bits form a small sequence of two
+ * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+ * 0x00000000 is also valid), while lower bits form a proper s32 range
+ * going from negative numbers to positive numbers.
+ *
+ * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+ * over full 64-bit numbers range will form a proper [-16, 16]
+ * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+ */
+ if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+ return range(S32, a, b);
+
+ /* otherwise we can't derive much meaningful information */
+ return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+ u64 a = (u64)x.a, b = (u64)x.b;
+
+ switch (to_t) {
+ case U64:
+ return x;
+ case U32:
+ if (upper32(a) != upper32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ if (sign64(a) != sign64(b))
+ return unkn[S64];
+ return range(S64, a, b);
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_u64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+ s64 a = (s64)x.a, b = (s64)x.b;
+
+ switch (to_t) {
+ case U64:
+ /* equivalent to (s64)a <= (s64)b check */
+ if (sign64(a) != sign64(b))
+ return unkn[U64];
+ return range(U64, a, b);
+ case U32:
+ if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ return x;
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_s64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+ u32 a = (u32)x.a, b = (u32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case S64:
+ /* u32 is always a valid zero-extended u64/s64 */
+ return range(to_t, a, b);
+ case U32:
+ return x;
+ case S32:
+ return range_cast_to_s32(range(U32, a, b));
+ default: printf("range_cast_u32!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+ s32 a = (s32)x.a, b = (s32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case U32:
+ case S64:
+ if (sign32(a) != sign32(b))
+ return unkn[to_t];
+ return range(to_t, a, b);
+ case S32:
+ return x;
+ default: printf("range_cast_s32!\n"); exit(1);
+ }
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+ switch (from_t) {
+ case U64: return range_cast_u64(to_t, from);
+ case U32: return range_cast_u32(to_t, from);
+ case S64: return range_cast_s64(to_t, from);
+ case S32: return range_cast_s32(to_t, from);
+ default: printf("range_cast!\n"); exit(1);
+ }
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return true;
+ case U32: return upper32(x) == 0;
+ case S64: return true;
+ case S32: return upper32(x) == 0;
+ default: printf("is_valid_num!\n"); exit(1);
+ }
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+ if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+ return false;
+
+ switch (t) {
+ case U64: return (u64)x.a <= (u64)x.b;
+ case U32: return (u32)x.a <= (u32)x.b;
+ case S64: return (s64)x.a <= (s64)x.b;
+ case S32: return (s32)x.a <= (s32)x.b;
+ default: printf("is_valid_range!\n"); exit(1);
+ }
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+ return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+ struct range y_cast;
+
+ y_cast = range_cast(y_t, x_t, y);
+
+ /* If we know that
+ * - *x* is in the range of signed 32bit value, and
+ * - *y_cast* range is 32-bit signed non-negative
+ * then *x* range can be improved with *y_cast* such that *x* range
+ * is 32-bit signed non-negative. Otherwise, if the new range for *x*
+ * allows upper 32-bit * 0xffffffff then the eventual new range for
+ * *x* will be out of signed 32-bit range which violates the origin
+ * *x* range.
+ */
+ if (x_t == S64 && y_t == S32 && y_cast.a <= S32_MAX && y_cast.b <= S32_MAX &&
+ (s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX)
+ return range_improve(x_t, x, y_cast);
+
+ /* the case when new range knowledge, *y*, is a 32-bit subregister
+ * range, while previous range knowledge, *x*, is a full register
+ * 64-bit range, needs special treatment to take into account upper 32
+ * bits of full register range
+ */
+ if (t_is_32(y_t) && !t_is_32(x_t)) {
+ struct range x_swap;
+
+ /* some combinations of upper 32 bits and sign bit can lead to
+ * invalid ranges, in such cases it's easier to detect them
+ * after cast/swap than try to enumerate all the conditions
+ * under which transformation and knowledge transfer is valid
+ */
+ x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+ if (!is_valid_range(x_t, x_swap))
+ return x;
+ return range_improve(x_t, x, x_swap);
+ }
+
+ if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) {
+ if (x_t == S64 && x.a > x.b) {
+ if (x.b < y.a && x.a <= y.b)
+ return range(x_t, x.a, y.b);
+ if (x.a > y.b && x.b >= y.a)
+ return range(x_t, y.a, x.b);
+ } else if (x_t == U64 && y.a > y.b) {
+ if (y.b < x.a && y.a <= x.b)
+ return range(x_t, y.a, x.b);
+ if (y.a > x.b && y.b >= x.a)
+ return range(x_t, x.a, y.b);
+ }
+ }
+
+ /* otherwise, plain range cast and intersection works */
+ return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+ switch (op) {
+ case OP_LT: return OP_GE;
+ case OP_LE: return OP_GT;
+ case OP_GT: return OP_LE;
+ case OP_GE: return OP_LT;
+ case OP_EQ: return OP_NE;
+ case OP_NE: return OP_EQ;
+ default: printf("complement_op!\n"); exit(1);
+ }
+}
+
+static const char *op_str(enum op op)
+{
+ switch (op) {
+ case OP_LT: return "<";
+ case OP_LE: return "<=";
+ case OP_GT: return ">";
+ case OP_GE: return ">=";
+ case OP_EQ: return "==";
+ case OP_NE: return "!=";
+ default: printf("op_str!\n"); exit(1);
+ }
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a register with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do { \
+ switch (op) { \
+ case OP_LT: return (T)x.a < (T)y.b; \
+ case OP_LE: return (T)x.a <= (T)y.b; \
+ case OP_GT: return (T)x.b > (T)y.a; \
+ case OP_GE: return (T)x.b >= (T)y.a; \
+ case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \
+ case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \
+ default: printf("range_canbe op %d\n", op); exit(1); \
+ } \
+} while (0)
+
+ switch (t) {
+ case U64: { range_canbe(u64); }
+ case U32: { range_canbe(u32); }
+ case S64: { range_canbe(s64); }
+ case S32: { range_canbe(s32); }
+ default: printf("range_canbe!\n"); exit(1);
+ }
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a register with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ /* always op <=> ! canbe complement(op) */
+ return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a register with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ * 1 - always taken;
+ * 0 - never taken,
+ * -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ if (range_always_op(t, x, y, op))
+ return 1;
+ if (range_never_op(t, x, y, op))
+ return 0;
+ return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+ enum op op, struct range *newx, struct range *newy)
+{
+ if (!range_canbe_op(t, x, y, op)) {
+ /* nothing to adjust, can't happen, return original values */
+ *newx = x;
+ *newy = y;
+ return;
+ }
+ switch (op) {
+ case OP_LT:
+ *newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+ *newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+ break;
+ case OP_LE:
+ *newx = range(t, x.a, min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), y.b);
+ break;
+ case OP_GT:
+ *newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+ *newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+ break;
+ case OP_GE:
+ *newx = range(t, max_t(t, x.a, y.a), x.b);
+ *newy = range(t, y.a, min_t(t, x.b, y.b));
+ break;
+ case OP_EQ:
+ *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ break;
+ case OP_NE:
+ /* below logic is supported by the verifier now */
+ if (x.a == x.b && x.a == y.a) {
+ /* X is a constant matching left side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a + 1, y.b);
+ } else if (x.a == x.b && x.b == y.b) {
+ /* X is a constant matching right side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b - 1);
+ } else if (y.a == y.b && x.a == y.a) {
+ /* Y is a constant matching left side of X */
+ *newx = range(t, x.a + 1, x.b);
+ *newy = range(t, y.a, y.b);
+ } else if (y.a == y.b && x.b == y.b) {
+ /* Y is a constant matching right side of X */
+ *newx = range(t, x.a, x.b - 1);
+ *newy = range(t, y.a, y.b);
+ } else {
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ }
+
+ break;
+ default:
+ break;
+ }
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+ struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+ bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 512);
+ enum num_t t;
+ int cnt = 0;
+
+ if (!r->valid) {
+ printf("<not found>%s", sfx);
+ return;
+ }
+
+ snappendf(sb, "scalar(");
+ for (t = first_t; t <= last_t; t++) {
+ snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+ snprintf_range(t, sb, r->r[t]);
+ }
+ snappendf(sb, ")");
+
+ printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+ enum num_t d_t, struct range old, struct range new,
+ const char *ctx)
+{
+ printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+ print_range(s_t, src, "");
+ printf(" (%s)DST_OLD=", t_str(d_t));
+ print_range(d_t, old, "");
+ printf(" (%s)DST_NEW=", t_str(d_t));
+ print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+ enum num_t d_t, s_t;
+ struct range old;
+ bool keep_going = false;
+
+again:
+ /* try to derive new knowledge from just learned range x of type t */
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+ }
+ }
+
+ /* now see if we can derive anything new from updated reg_state's ranges */
+ for (s_t = first_t; s_t <= last_t; s_t++) {
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+ }
+ }
+ }
+
+ /* keep refining until we converge */
+ if (keep_going) {
+ keep_going = false;
+ goto again;
+ }
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+ enum num_t tt;
+
+ rs->valid = true;
+ for (tt = first_t; tt <= last_t; tt++)
+ rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+ reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+ struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+ char buf[32];
+ enum num_t ts[2];
+ struct reg_state xx = *x, yy = *y;
+ int i, t_cnt;
+ struct range z1, z2;
+
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic, so we need to process
+ * both signed and unsigned domains at the same time
+ */
+ ts[0] = t_unsigned(t);
+ ts[1] = t_signed(t);
+ t_cnt = 2;
+ } else {
+ ts[0] = t;
+ t_cnt = 1;
+ }
+
+ for (i = 0; i < t_cnt; i++) {
+ t = ts[i];
+ z1 = x->r[t];
+ z2 = y->r[t];
+
+ range_cond(t, z1, z2, op, &z1, &z2);
+
+ if (newx) {
+ snprintf(buf, sizeof(buf), "%s R1", ctx);
+ reg_state_refine(&xx, t, z1, buf);
+ }
+ if (newy) {
+ snprintf(buf, sizeof(buf), "%s R2", ctx);
+ reg_state_refine(&yy, t, z2, buf);
+ }
+ }
+
+ if (newx)
+ *newx = xx;
+ if (newy)
+ *newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+ enum op op)
+{
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic */
+ enum num_t tu = t_unsigned(t);
+ enum num_t ts = t_signed(t);
+ int br_u, br_s, br;
+
+ br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+ br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+ if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+ ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+ /* if 64-bit ranges are indecisive, use 32-bit subranges to
+ * eliminate always/never taken branches, if possible
+ */
+ if (br_u == -1 && (t == U64 || t == S64)) {
+ br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+ /* we can only reject for OP_EQ, never take branch
+ * based on lower 32 bits
+ */
+ if (op == OP_EQ && br == 0)
+ return 0;
+ /* for OP_NEQ we can be conclusive only if lower 32 bits
+ * differ and thus inequality branch is always taken
+ */
+ if (op == OP_NE && br == 1)
+ return 1;
+
+ br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+ if (op == OP_EQ && br == 0)
+ return 0;
+ if (op == OP_NE && br == 1)
+ return 1;
+ }
+
+ return br_u >= 0 ? br_u : br_s;
+ }
+ return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+ /* whether to init full register (r1) or sub-register (w1) */
+ bool init_subregs;
+ /* whether to establish initial value range on full register (r1) or
+ * sub-register (w1)
+ */
+ bool setup_subregs;
+ /* whether to establish initial value range using signed or unsigned
+ * comparisons (i.e., initialize umin/umax or smin/smax directly)
+ */
+ bool setup_signed;
+ /* whether to perform comparison on full registers or sub-registers */
+ bool compare_subregs;
+ /* whether to perform comparison using signed or unsigned operations */
+ bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+ int branch_taken, struct case_spec spec,
+ char *log_buf, size_t log_sz,
+ int *false_pos, int *true_pos)
+{
+#define emit(insn) ({ \
+ struct bpf_insn __insns[] = { insn }; \
+ int __i; \
+ for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \
+ insns[cur_pos + __i] = __insns[__i]; \
+ cur_pos += __i; \
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+ int cur_pos = 0, exit_pos, fd, op_code;
+ struct bpf_insn insns[64];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_level = 2,
+ .log_buf = log_buf,
+ .log_size = log_sz,
+ .prog_flags = testing_prog_flags(),
+ );
+
+ /* ; skip exit block below
+ * goto +2;
+ */
+ emit(BPF_JMP_A(2));
+ exit_pos = cur_pos;
+ /* ; exit block for all the preparatory conditionals
+ * out:
+ * r0 = 0;
+ * exit;
+ */
+ emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(BPF_EXIT_INSN());
+ /*
+ * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+ * call bpf_get_current_pid_tgid;
+ * r6 = r0; | w6 = w0;
+ * call bpf_get_current_pid_tgid;
+ * r7 = r0; | w7 = w0;
+ */
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ /* ; setup initial r6/w6 possible value range ([x.a, x.b])
+ * r1 = %[x.a] ll; | w1 = %[x.a];
+ * r2 = %[x.b] ll; | w2 = %[x.b];
+ * if r6 < r1 goto out; | if w6 < w1 goto out;
+ * if r6 > r2 goto out; | if w6 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; setup initial r7/w7 possible value range ([y.a, y.b])
+ * r1 = %[y.a] ll; | w1 = %[y.a];
+ * r2 = %[y.b] ll; | w2 = %[y.b];
+ * if r7 < r1 goto out; | if w7 < w1 goto out;
+ * if r7 > r2 goto out; | if w7 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; range test instruction
+ * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+ */
+ switch (op) {
+ case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+ case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+ case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+ case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+ case OP_EQ: op_code = BPF_JEQ; break;
+ case OP_NE: op_code = BPF_JNE; break;
+ default:
+ printf("unrecognized op %d\n", op);
+ return -ENOTSUP;
+ }
+ /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * ; this is used for debugging, as verifier doesn't always print
+ * ; registers states as of condition jump instruction (e.g., when
+ * ; precision marking happens)
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ */
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (spec.compare_subregs)
+ emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ else
+ emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *false_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 1) /* false branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ else
+ emit(BPF_EXIT_INSN());
+ /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *true_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 0) /* true branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+ "GPL", insns, cur_pos, &opts);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+ /* There are two generic forms for SCALAR register:
+ * - known constant: R6_rwD=P%lld
+ * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+ * list of optional range specifiers:
+ * - umin=%llu, if missing, assumed 0;
+ * - umax=%llu, if missing, assumed U64_MAX;
+ * - smin=%lld, if missing, assumed S64_MIN;
+ * - smax=%lld, if missing, assumed S64_MAX;
+ * - umin32=%d, if missing, assumed 0;
+ * - umax32=%d, if missing, assumed U32_MAX;
+ * - smin32=%d, if missing, assumed S32_MIN;
+ * - smax32=%d, if missing, assumed S32_MAX;
+ * - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+ *
+ * If some of the values are equal, they will be grouped (but min/max
+ * are not mixed together, and similarly negative values are not
+ * grouped with non-negative ones). E.g.:
+ *
+ * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+ *
+ * _rwD part is optional (and any of the letters can be missing).
+ * P (precision mark) is optional as well.
+ *
+ * Anything inside scalar() is optional, including id, of course.
+ */
+ struct {
+ const char *pfx;
+ u64 *dst, def;
+ bool is_32, is_set;
+ } *f, fields[8] = {
+ {"smin=", &reg->r[S64].a, S64_MIN},
+ {"smax=", &reg->r[S64].b, S64_MAX},
+ {"umin=", &reg->r[U64].a, 0},
+ {"umax=", &reg->r[U64].b, U64_MAX},
+ {"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+ {"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+ {"umin32=", &reg->r[U32].a, 0, true},
+ {"umax32=", &reg->r[U32].b, U32_MAX, true},
+ };
+ const char *p;
+ int i;
+
+ p = strchr(s, '=');
+ if (!p)
+ return -EINVAL;
+ p++;
+ if (*p == 'P')
+ p++;
+
+ if (!str_has_pfx(p, "scalar(")) {
+ long long sval;
+ enum num_t t;
+
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &sval) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &sval) != 1)
+ return -EINVAL;
+ }
+
+ reg->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ reg->r[t] = range(t, sval, sval);
+ }
+ return 0;
+ }
+
+ p += sizeof("scalar");
+ while (p) {
+ int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!str_has_pfx(p, f->pfx))
+ continue;
+ midxs[mcnt++] = i;
+ p += strlen(f->pfx);
+ }
+
+ if (mcnt) {
+ /* populate all matched fields */
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &val) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &val) != 1)
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mcnt; i++) {
+ f = &fields[midxs[i]];
+ f->is_set = true;
+ *f->dst = f->is_32 ? (u64)(u32)val : val;
+ }
+ } else if (str_has_pfx(p, "var_off")) {
+ /* skip "var_off=(0x0; 0x3f)" part completely */
+ p = strchr(p, ')');
+ if (!p)
+ return -EINVAL;
+ p++;
+ }
+
+ p = strpbrk(p, ",)");
+ if (*p == ')')
+ break;
+ if (p)
+ p++;
+ }
+
+ reg->valid = true;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!f->is_set)
+ *f->dst = f->def;
+ }
+
+ return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+ int false_pos, int true_pos,
+ struct reg_state *false1_reg, struct reg_state *false2_reg,
+ struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+ struct {
+ int insn_idx;
+ int reg_idx;
+ const char *reg_upper;
+ struct reg_state *state;
+ } specs[] = {
+ {false_pos, 6, "R6=", false1_reg},
+ {false_pos + 1, 7, "R7=", false2_reg},
+ {true_pos, 6, "R6=", true1_reg},
+ {true_pos + 1, 7, "R7=", true2_reg},
+ };
+ char buf[32];
+ const char *p = log_buf, *q;
+ int i, err;
+
+ for (i = 0; i < 4; i++) {
+ sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+ spec.compare_subregs ? "bc" : "bf",
+ spec.compare_subregs ? "w0" : "r0",
+ spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+ q = strstr(p, buf);
+ if (!q) {
+ *specs[i].state = (struct reg_state){.valid = false};
+ continue;
+ }
+ p = strstr(q, specs[i].reg_upper);
+ if (!p)
+ return -EINVAL;
+ err = parse_reg_state(p, specs[i].state);
+ if (err)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+ const char *ctx1, const char *ctx2)
+{
+ DEFINE_STRBUF(sb, 512);
+
+ if (range_eq(x, y))
+ return true;
+
+ snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+ snprintf_range(t, sb, x);
+ snappendf(sb, " != ");
+ snprintf_range(t, sb, y);
+
+ printf("%s\n", sb->buf);
+
+ return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+ bool ok = true;
+ enum num_t t;
+
+ if (r->valid != e->valid) {
+ printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+ r->valid ? "<valid>" : "<invalid>",
+ e->valid ? "<valid>" : "<invalid>");
+ return false;
+ }
+
+ if (!r->valid)
+ return true;
+
+ for (t = first_t; t <= last_t; t++) {
+ if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+ ok = false;
+ }
+
+ return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+ const char *p;
+
+ while (buf[0]) {
+ p = strchrnul(buf, '\n');
+
+ /* filter out irrelevant precision backtracking logs */
+ if (str_has_pfx(buf, "mark_precise: "))
+ goto skip_line;
+
+ printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+ buf = *p == '\0' ? p : p + 1;
+ }
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op,
+ struct reg_state *fr1, struct reg_state *fr2,
+ struct reg_state *tr1, struct reg_state *tr2,
+ int *branch_taken)
+{
+ const u64 A = x.a;
+ const u64 B = x.b;
+ const u64 C = y.a;
+ const u64 D = y.b;
+ struct reg_state rc;
+ enum op rev_op = complement_op(op);
+ enum num_t t;
+
+ fr1->valid = fr2->valid = true;
+ tr1->valid = tr2->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ /* if we are initializing using 32-bit subregisters,
+ * full registers get upper 32 bits zeroed automatically
+ */
+ struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+ fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+ }
+
+ /* step 1: r1 >= A, r2 >= C */
+ reg_state_set_const(&rc, init_t, A);
+ reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+ reg_state_set_const(&rc, init_t, C);
+ reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 2: r1 <= B, r2 <= D */
+ reg_state_set_const(&rc, init_t, B);
+ reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+ reg_state_set_const(&rc, init_t, D);
+ reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 3: r1 <op> r2 */
+ *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+ fr1->valid = fr2->valid = false;
+ tr1->valid = tr2->valid = false;
+ if (*branch_taken != 1) { /* FALSE is possible */
+ fr1->valid = fr2->valid = true;
+ reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+ }
+ if (*branch_taken != 0) { /* TRUE is possible */
+ tr1->valid = tr2->valid = true;
+ reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+ }
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+ printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+ printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+ printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+ }
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+ 0,
+ 1,
+ U32_MAX,
+ U32_MAX - 1,
+ S32_MAX,
+ (u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+ 0,
+ 1,
+ 2, (u32)-2,
+ 255, (u32)-255,
+ UINT_MAX,
+ UINT_MAX - 1,
+ INT_MAX,
+ (u32)INT_MIN,
+};
+
+struct ctx {
+ int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+ u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ u32 usubvals[ARRAY_SIZE(lower_seeds)];
+ s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+ struct range *uranges, *sranges;
+ struct range *usubranges, *ssubranges;
+ int max_failure_cnt, cur_failure_cnt;
+ int total_case_cnt, case_cnt;
+ int rand_case_cnt;
+ unsigned rand_seed;
+ __u64 start_ns;
+ char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+ free(ctx->uranges);
+ free(ctx->sranges);
+ free(ctx->usubranges);
+ free(ctx->ssubranges);
+}
+
+struct subtest_case {
+ enum num_t init_t;
+ enum num_t cond_t;
+ struct range x;
+ struct range y;
+ enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+ snappendf(sb, "(%s)", t_str(t->init_t));
+ snprintf_range(t->init_t, sb, t->x);
+ snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+ snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op)
+{
+ char log_buf[256 * 1024];
+ size_t log_sz = sizeof(log_buf);
+ int err, false_pos = 0, true_pos = 0, branch_taken;
+ struct reg_state fr1, fr2, tr1, tr2;
+ struct reg_state fe1, fe2, te1, te2;
+ bool failed = false;
+ struct case_spec spec = {
+ .init_subregs = (init_t == U32 || init_t == S32),
+ .setup_subregs = (init_t == U32 || init_t == S32),
+ .setup_signed = (init_t == S64 || init_t == S32),
+ .compare_subregs = (cond_t == U32 || cond_t == S32),
+ .compare_signed = (cond_t == S64 || cond_t == S32),
+ };
+
+ log_buf[0] = '\0';
+
+ sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+ err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+ log_buf, log_sz, &false_pos, &true_pos);
+ if (err) {
+ ASSERT_OK(err, "load_range_cmp_prog");
+ failed = true;
+ }
+
+ err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+ &fr1, &fr2, &tr1, &tr2);
+ if (err) {
+ ASSERT_OK(err, "parse_range_cmp_log");
+ failed = true;
+ }
+
+ if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+ !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+ !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+ !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+ failed = true;
+ }
+
+ if (failed || env.verbosity >= VERBOSE_NORMAL) {
+ if (failed || env.verbosity >= VERBOSE_VERY) {
+ printf("VERIFIER LOG:\n========================\n");
+ print_verifier_log(log_buf);
+ printf("=====================\n");
+ }
+ printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n");
+ printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+ printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n");
+ printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+ printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n");
+ printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n");
+ printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n");
+ printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n");
+
+ return failed ? -EINVAL : 0;
+ }
+
+ return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, bool is_subtest)
+{
+ DEFINE_STRBUF(sb, 256);
+ int err;
+ struct subtest_case sub = {
+ .init_t = init_t,
+ .cond_t = cond_t,
+ .x = x,
+ .y = y,
+ };
+
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, false /* ignore op */);
+ if (is_subtest && !test__start_subtest(sb->buf))
+ return 0;
+
+ for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, true /* print op */);
+
+ if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+ printf("TEST CASE: %s\n", sb->buf);
+
+ err = verify_case_op(init_t, cond_t, x, y, sub.op);
+ if (err || env.verbosity >= VERBOSE_NORMAL)
+ ASSERT_OK(err, sb->buf);
+ if (err) {
+ ctx->cur_failure_cnt++;
+ if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+ return err;
+ return 0; /* keep testing other cases */
+ }
+ ctx->case_cnt++;
+ if ((ctx->case_cnt % 10000) == 0) {
+ double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+ u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+ double remain_ns = elapsed_ns / progress * (1 - progress);
+
+ fprintf(env.stderr_saved, "PROGRESS (%s): %d/%d (%.2lf%%), "
+ "elapsed %llu mins (%.2lf hrs), "
+ "ETA %.0lf mins (%.2lf hrs)\n",
+ ctx->progress_ctx,
+ ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+ elapsed_ns / 1000000000 / 60,
+ elapsed_ns / 1000000000.0 / 3600,
+ remain_ns / 1000000000.0 / 60,
+ remain_ns / 1000000000.0 / 3600);
+ }
+ }
+
+ return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y)
+{
+ return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+ u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+ u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+ s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+ s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+ for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+ ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+ }
+ }
+
+ /* sort and compact uvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->uvals[j] == ctx->uvals[i])
+ continue;
+ j++;
+ ctx->uvals[j] = ctx->uvals[i];
+ }
+ ctx->val_cnt = j + 1;
+
+ /* we have exactly the same number of s64 values, they are just in
+ * a different order than u64s, so just sort them differently
+ */
+ for (i = 0; i < ctx->val_cnt; i++)
+ ctx->svals[i] = ctx->uvals[i];
+ qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U64, sb1, ctx->uvals[i]);
+ snprintf_num(S64, sb2, ctx->svals[i]);
+ printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+
+ /* 32-bit values are generated separately */
+ cnt = 0;
+ for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+ ctx->usubvals[cnt++] = lower_seeds[i];
+ }
+
+ /* sort and compact usubvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->usubvals[j] == ctx->usubvals[i])
+ continue;
+ j++;
+ ctx->usubvals[j] = ctx->usubvals[i];
+ }
+ ctx->subval_cnt = j + 1;
+
+ for (i = 0; i < ctx->subval_cnt; i++)
+ ctx->ssubvals[i] = ctx->usubvals[i];
+ qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U32, sb1, ctx->usubvals[i]);
+ snprintf_num(S32, sb2, ctx->ssubvals[i]);
+ printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+ snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+ printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->range_cnt = cnt;
+
+ ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+ if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+ return -EINVAL;
+ ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+ if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+ ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+ cnt++;
+ }
+ }
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+ snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+ printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->subrange_cnt = cnt;
+
+ ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+ if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+ return -EINVAL;
+ ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+ if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+ ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+ cnt++;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+ const char *s;
+
+ if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+ errno = 0;
+ ctx->max_failure_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->max_failure_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+ errno = 0;
+ ctx->rand_case_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->rand_case_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+ errno = 0;
+ ctx->rand_seed = strtoul(s, NULL, 10);
+ if (errno) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+ const char *s;
+ int err;
+
+ if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+ test__skip();
+ return -ENOTSUP;
+ }
+
+ err = parse_env_vars(ctx);
+ if (err)
+ return err;
+
+ gen_vals(ctx);
+ err = gen_ranges(ctx);
+ if (err) {
+ ASSERT_OK(err, "gen_ranges");
+ return err;
+ }
+
+ return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u64 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+ vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.val_cnt; i++) {
+ for (j = 0; j < ctx.range_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u64|s64)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u64|s64)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u32 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+ vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.subval_cnt; i++) {
+ for (j = 0; j < ctx.subrange_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u32|s32)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u32|s32)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ const struct range *ranges;
+ int i, j, rcnt;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ switch (init_t)
+ {
+ case U64:
+ ranges = ctx.uranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case U32:
+ ranges = ctx.usubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ case S64:
+ ranges = ctx.sranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case S32:
+ ranges = ctx.ssubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ default:
+ printf("validate_gen_range_vs_range!\n");
+ exit(1);
+ }
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x RANGE, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < rcnt; i++) {
+ for (j = i; j < rcnt; j++) {
+ /* (<range> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+ goto cleanup;
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditional numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+ /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+ * seems to be 1<<31, so we need to call it thrice to get full u64;
+ * we'll use roughly equal split: 22 + 21 + 21 bits
+ */
+ return ((u64)random() << 42) |
+ (((u64)random() & RAND_21BIT_MASK) << 21) |
+ (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+ return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+ u64 x = rand_const(t), y = rand_const(t);
+
+ return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+ struct ctx ctx;
+ struct range range1, range2;
+ int err, i;
+ u64 t;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ err = parse_env_vars(&ctx);
+ if (err) {
+ ASSERT_OK(err, "parse_env_vars");
+ return;
+ }
+
+ if (ctx.rand_case_cnt == 0)
+ ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+ if (ctx.rand_seed == 0)
+ ctx.rand_seed = (unsigned)get_time_ns();
+
+ srandom(ctx.rand_seed);
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+ ctx.rand_seed, const_range ? "CONST" : "RANGE",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.rand_case_cnt; i++) {
+ range1 = rand_range(init_t);
+ if (const_range) {
+ t = rand_const(init_t);
+ range2 = range(init_t, t, t);
+ } else {
+ range2 = rand_range(init_t);
+ }
+
+ /* <range1> x <range2> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+ goto cleanup;
+ /* <range2> x <range1> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+ goto cleanup;
+ }
+
+cleanup:
+ /* make sure we report random seed for reproducing */
+ ASSERT_TRUE(true, ctx.progress_ctx);
+ cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+ {U64, U64, {0, 0xffffffff}, {0, 0}},
+ {U64, U64, {0, 0x80000000}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+ {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+ /* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+ {U64, U64, {0, 1}, {1, 0x80000000}},
+ {U64, S64, {0, 1}, {1, 0x80000000}},
+ {U64, U32, {0, 1}, {1, 0x80000000}},
+ {U64, S32, {0, 1}, {1, 0x80000000}},
+
+ {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0, 0xffffffffULL}, {1, 1}},
+ {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+ {U64, U32, {0, 0x100000000}, {0, 0}},
+ {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+ {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+ /* these are tricky cases where lower 32 bits allow to tighten 64
+ * bit boundaries based on tightened lower 32 bit boundaries
+ */
+ {U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x100000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x100000001ULL}, {0, 0}},
+ {U64, S32, {0, 0x180000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+ /* verifier knows about [-1, 0] range for s32 for this case already */
+ {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+ /* but didn't know about these cases initially */
+ {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+ {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+ /* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+ * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+ */
+ {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+ {U32, U32, {1, U32_MAX}, {0, 0}},
+
+ {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+ {S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}},
+ {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+ {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}},
+ {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}},
+
+ /* edge overlap testings for BPF_NE */
+ {U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}},
+ {U64, U64, {0, U64_MAX}, {0, 0}},
+ {S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}},
+ {S64, U64, {S64_MIN, 0}, {0, 0}},
+ {S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}},
+ {U32, U32, {0, U32_MAX}, {0, 0}},
+ {U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+ {S32, U32, {(u32)S32_MIN, 0}, {0, 0}},
+ {S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}},
+ {S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}},
+ {S64, U32, {0x0, 0x1f}, {0xffffffff80000000ULL, 0x000000007fffffffULL}},
+ {S64, U32, {0x0, 0x1f}, {0xffffffffffff8000ULL, 0x0000000000007fffULL}},
+ {S64, U32, {0x0, 0x1f}, {0xffffffffffffff80ULL, 0x000000000000007fULL}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+ struct ctx ctx;
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+ struct subtest_case *c = &crafted_cases[i];
+
+ verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+ verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+ }
+
+ cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
new file mode 100644
index 000000000000..f0a8c828f8f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+#include "res_spin_lock.skel.h"
+#include "res_spin_lock_fail.skel.h"
+
+void test_res_spin_lock_failure(void)
+{
+ RUN_TESTS(res_spin_lock_fail);
+}
+
+static volatile int skip;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ while (!READ_ONCE(skip)) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (err || topts.retval) {
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ break;
+ }
+ }
+ pthread_exit(arg);
+}
+
+void test_res_spin_lock_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct res_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = res_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load"))
+ return;
+ /* AA deadlock */
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ /* Multi-threaded ABBA deadlock. */
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB);
+ for (i = 0; i < 16; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ topts.retval = 0;
+ topts.repeat = 1000;
+ int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA);
+ while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) {
+ err = bpf_prog_test_run_opts(fd, &topts);
+ }
+
+ WRITE_ONCE(skip, true);
+
+ for (i = 0; i < 16; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+
+ ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err");
+ ASSERT_OK(err, "err");
+ ASSERT_EQ(topts.retval, -EDEADLK, "timeout");
+end:
+ res_spin_lock__destroy(skel);
+ return;
+}
+
+void serial_test_res_spin_lock_stress(void)
+{
+ if (libbpf_num_possible_cpus() < 3) {
+ test__skip();
+ return;
+ }
+
+ ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+ /*
+ * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
+ * other cases (ABBA, ABBCCA).
+ */
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 6ace5e9efec1..51544372f52e 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -44,7 +44,7 @@ BTF_ID(union, U)
BTF_ID(func, func)
extern __u32 test_list_global[];
-BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_LIST_GLOBAL(test_list_global, 1)
BTF_ID_UNUSED
BTF_ID(typedef, S)
BTF_ID(typedef, T)
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
}
for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
- if (test_symbols[i].id != -1)
+ if (test_symbols[i].id >= 0)
continue;
if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
@@ -101,14 +101,14 @@ static int resolve_symbols(void)
int type_id;
__u32 nr;
- btf = btf__parse_elf("btf_data.o", NULL);
+ btf = btf__parse_elf("btf_data.bpf.o", NULL);
if (CHECK(libbpf_get_error(btf), "resolve",
- "Failed to load BTF from btf_data.o\n"))
+ "Failed to load BTF from btf_data.bpf.o\n"))
return -1;
- nr = btf__get_nr_types(btf);
+ nr = btf__type_cnt(btf);
- for (type_id = 1; type_id <= nr; type_id++) {
+ for (type_id = 1; type_id < nr; type_id++) {
if (__resolve_symbol(btf, type_id))
break;
}
@@ -117,14 +117,14 @@ static int resolve_symbols(void)
return 0;
}
-int test_resolve_btfids(void)
+void test_resolve_btfids(void)
{
__u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
unsigned int i, j;
int ret = 0;
if (resolve_symbols())
- return -1;
+ return;
/* Check BTF_ID_LIST(test_list_local) and
* BTF_ID_LIST_GLOBAL(test_list_global) IDs
@@ -138,7 +138,7 @@ int test_resolve_btfids(void)
test_symbols[i].name,
test_list[i], test_symbols[i].id);
if (ret)
- return ret;
+ return;
}
}
@@ -160,13 +160,8 @@ int test_resolve_btfids(void)
break;
if (i > 0) {
- ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
- "sort_check",
- "test_set is not sorted\n");
- if (ret)
- break;
+ if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check"))
+ return;
}
}
-
- return ret;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index fddbc5db5d6a..64520684d2cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,12 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+
+#include "test_ringbuf.lskel.h"
+#include "test_ringbuf_n.lskel.h"
+#include "test_ringbuf_map_key.lskel.h"
+#include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_overwrite.lskel.h"
#define EDONE 7777
@@ -58,7 +63,8 @@ static int process_sample(void *ctx, void *data, size_t len)
}
}
-static struct test_ringbuf *skel;
+static struct test_ringbuf_map_key_lskel *skel_map_key;
+static struct test_ringbuf_lskel *skel;
static struct ring_buffer *ringbuf;
static void trigger_samples()
@@ -81,38 +87,154 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
-void test_ringbuf(void)
+static void ringbuf_write_subtest(void)
+{
+ struct test_ringbuf_write_lskel *skel;
+ int page_size = getpagesize();
+ size_t *mmap_ptr;
+ int err, rb_fd;
+
+ skel = test_ringbuf_write_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = 0x40000;
+
+ err = test_ringbuf_write_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
+ goto cleanup;
+ *mmap_ptr = 0x30000;
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
+ goto cleanup;
+
+ err = test_ringbuf_write_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup_ringbuf;
+
+ skel->bss->discarded = 0;
+ skel->bss->passed = 0;
+
+ /* trigger exactly two samples */
+ syscall(__NR_getpgid);
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->discarded, 2, "discarded");
+ ASSERT_EQ(skel->bss->passed, 0, "passed");
+
+ test_ringbuf_write_lskel__detach(skel);
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_write_lskel__destroy(skel);
+}
+
+static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
pthread_t thread;
long bg_ret = -1;
- int err, cnt;
-
- skel = test_ringbuf__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ int err, cnt, rb_fd;
+ int page_size = getpagesize();
+ void *mmap_ptr, *tmp_ptr;
+ struct ring *ring;
+ int map_fd;
+ unsigned long avail_data, ring_size, cons_pos, prod_pos;
+
+ skel = test_ringbuf_lskel__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ skel->maps.ringbuf.max_entries = page_size;
+
+ err = test_ringbuf_lskel__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+ /* good read/write cons_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+ tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+ if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+ goto cleanup;
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ /* bad writeable prod_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+ ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+ /* bad writeable data pages */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+ ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+ mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+ /* good read-only pages */
+ mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+ /* good read-only pages with initial offset */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
- ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
process_sample, NULL, NULL);
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
- err = test_ringbuf__attach(skel);
+ err = test_ringbuf_lskel__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
goto cleanup;
trigger_samples();
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+
+ map_fd = ring__map_fd(ring);
+ ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd");
+
/* 2 submitted + 1 discarded records */
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->avail_data);
- CHECK(skel->bss->ring_size != 4096,
+ CHECK(skel->bss->ring_size != page_size,
"err_ring_size", "exp %ld, got %ld\n",
- 4096L, skel->bss->ring_size);
+ (long)page_size, skel->bss->ring_size);
CHECK(skel->bss->cons_pos != 0,
"err_cons_pos", "exp %ld, got %ld\n",
0L, skel->bss->cons_pos);
@@ -120,6 +242,18 @@ void test_ringbuf(void)
"err_prod_pos", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->prod_pos);
+ /* verify getting this data directly via the ring object yields the same
+ * results
+ */
+ avail_data = ring__avail_data_size(ring);
+ ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size");
+ ring_size = ring__size(ring);
+ ASSERT_EQ(ring_size, page_size, "ring_ring_size");
+ cons_pos = ring__consumer_pos(ring);
+ ASSERT_EQ(cons_pos, 0, "ring_cons_pos");
+ prod_pos = ring__producer_pos(ring);
+ ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos");
+
/* poll for samples */
err = ring_buffer__poll(ringbuf, -1);
@@ -226,6 +360,10 @@ void test_ringbuf(void)
err = ring_buffer__consume(ringbuf);
CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+ /* also consume using ring__consume to make sure it works the same */
+ err = ring__consume(ring);
+ ASSERT_GE(err, 0, "ring_consume");
+
/* 3 rounds, 2 samples each */
cnt = atomic_xchg(&sample_cnt, 0);
CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
@@ -238,8 +376,200 @@ void test_ringbuf(void)
CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
1L, skel->bss->discarded);
- test_ringbuf__detach(skel);
+ test_ringbuf_lskel__detach(skel);
cleanup:
ring_buffer__free(ringbuf);
- test_ringbuf__destroy(skel);
+ test_ringbuf_lskel__destroy(skel);
+}
+
+/*
+ * Test ring_buffer__consume_n() by producing N_TOT_SAMPLES samples in the ring
+ * buffer, via getpid(), and consuming them in chunks of N_SAMPLES.
+ */
+#define N_TOT_SAMPLES 32
+#define N_SAMPLES 4
+
+/* Sample value to verify the callback validity */
+#define SAMPLE_VALUE 42L
+
+static int process_n_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ ASSERT_EQ(s->value, SAMPLE_VALUE, "sample_value");
+
+ return 0;
+}
+
+static void ringbuf_n_subtest(void)
+{
+ struct test_ringbuf_n_lskel *skel_n;
+ int err, i;
+
+ skel_n = test_ringbuf_n_lskel__open();
+ if (!ASSERT_OK_PTR(skel_n, "test_ringbuf_n_lskel__open"))
+ return;
+
+ skel_n->maps.ringbuf.max_entries = getpagesize();
+ skel_n->bss->pid = getpid();
+
+ err = test_ringbuf_n_lskel__load(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel_n->maps.ringbuf.map_fd,
+ process_n_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_n_lskel__attach(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ /* Produce N_TOT_SAMPLES samples in the ring buffer by calling getpid() */
+ skel_n->bss->value = SAMPLE_VALUE;
+ for (i = 0; i < N_TOT_SAMPLES; i++)
+ syscall(__NR_getpgid);
+
+ /* Consume all samples from the ring buffer in batches of N_SAMPLES */
+ for (i = 0; i < N_TOT_SAMPLES; i += err) {
+ err = ring_buffer__consume_n(ringbuf, N_SAMPLES);
+ if (!ASSERT_EQ(err, N_SAMPLES, "rb_consume"))
+ goto cleanup_ringbuf;
+ }
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_n_lskel__destroy(skel_n);
+}
+
+static int process_map_key_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s;
+ int err, val;
+
+ s = data;
+ switch (s->seq) {
+ case 1:
+ ASSERT_EQ(s->value, 42, "sample_value");
+ err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd,
+ s, &val);
+ ASSERT_OK(err, "hash_map bpf_map_lookup_elem");
+ ASSERT_EQ(val, 1, "hash_map val");
+ return -EDONE;
+ default:
+ return 0;
+ }
+}
+
+static void ringbuf_map_key_subtest(void)
+{
+ int err;
+
+ skel_map_key = test_ringbuf_map_key_lskel__open();
+ if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open"))
+ return;
+
+ skel_map_key->maps.ringbuf.max_entries = getpagesize();
+ skel_map_key->bss->pid = getpid();
+
+ err = test_ringbuf_map_key_lskel__load(skel_map_key);
+ if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd,
+ process_map_key_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_map_key_lskel__attach(skel_map_key);
+ if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ syscall(__NR_getpgid);
+ ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq");
+ err = ring_buffer__poll(ringbuf, -1);
+ ASSERT_EQ(err, -EDONE, "ring_buffer__poll");
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_map_key_lskel__destroy(skel_map_key);
+}
+
+static void ringbuf_overwrite_mode_subtest(void)
+{
+ unsigned long size, len1, len2, len3, len4, len5;
+ unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
+ struct test_ringbuf_overwrite_lskel *skel;
+ int page_size = getpagesize();
+ int err;
+
+ skel = test_ringbuf_overwrite_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ size = page_size;
+ len1 = page_size / 2;
+ len2 = page_size / 4;
+ len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
+ len4 = len3 - 8;
+ len5 = len3; /* retry with len3 */
+
+ skel->maps.ringbuf.max_entries = size;
+ skel->rodata->LEN1 = len1;
+ skel->rodata->LEN2 = len2;
+ skel->rodata->LEN3 = len3;
+ skel->rodata->LEN4 = len4;
+ skel->rodata->LEN5 = len5;
+
+ skel->bss->pid = getpid();
+
+ err = test_ringbuf_overwrite_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_ringbuf_overwrite_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
+ ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
+ ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
+ ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
+ ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
+
+ ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
+
+ expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
+
+ ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
+
+ expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
+
+ expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
+
+ test_ringbuf_overwrite_lskel__detach(skel);
+cleanup:
+ test_ringbuf_overwrite_lskel__destroy(skel);
+}
+
+void test_ringbuf(void)
+{
+ if (test__start_subtest("ringbuf"))
+ ringbuf_subtest();
+ if (test__start_subtest("ringbuf_n"))
+ ringbuf_n_subtest();
+ if (test__start_subtest("ringbuf_map_key"))
+ ringbuf_map_key_subtest();
+ if (test__start_subtest("ringbuf_write"))
+ ringbuf_write_subtest();
+ if (test__start_subtest("ringbuf_overwrite_mode"))
+ ringbuf_overwrite_mode_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index d37161e59bb2..58522195081b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,43 @@ static int process_sample(void *ctx, void *data, size_t len)
void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
+ struct ring *ring_old;
+ struct ring *ring;
int err;
+ int page_size = getpagesize();
+ int proto_fd = -1;
- skel = test_ringbuf_multi__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf_multi__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ /* validate ringbuf size adjustment logic */
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final");
+
+ proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
+ if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+ if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ close(proto_fd);
+ proto_fd = -1;
+
+ /* make sure we can't resize ringbuf after object load */
+ if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -56,11 +86,24 @@ void test_ringbuf_multi(void)
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
+ /* verify ring_buffer__ring returns expected results */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+ ring_old = ring;
+ ring = ring_buffer__ring(ringbuf, 1);
+ ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1");
+
err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
process_sample, (void *)(long)2);
if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
goto cleanup;
+ /* verify adding a new ring didn't invalidate our older pointer */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again"))
+ goto cleanup;
+
err = test_ringbuf_multi__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
goto cleanup;
@@ -97,6 +140,8 @@ void test_ringbuf_multi(void)
2L, skel->bss->total);
cleanup:
+ if (proto_fd >= 0)
+ close(proto_fd);
ring_buffer__free(ringbuf);
test_ringbuf_multi__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 8b571890c57e..c3d78846f31a 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -124,6 +124,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_INET6_CONNECT},
},
{
+ "cgroup/connect_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT},
+ {0, BPF_CGROUP_UNIX_CONNECT},
+ },
+ {
"cgroup/sendmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
{0, BPF_CGROUP_UDP4_SENDMSG},
@@ -134,6 +139,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_SENDMSG},
},
{
+ "cgroup/sendmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG},
+ {0, BPF_CGROUP_UNIX_SENDMSG},
+ },
+ {
"cgroup/recvmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
{0, BPF_CGROUP_UDP4_RECVMSG},
@@ -144,6 +154,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_RECVMSG},
},
{
+ "cgroup/recvmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG},
+ {0, BPF_CGROUP_UNIX_RECVMSG},
+ },
+ {
"cgroup/sysctl",
{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
{0, BPF_CGROUP_SYSCTL},
@@ -158,6 +173,36 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
{0, BPF_CGROUP_SETSOCKOPT},
},
+ {
+ "cgroup/getpeername4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME},
+ {0, BPF_CGROUP_INET4_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME},
+ {0, BPF_CGROUP_INET6_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME},
+ {0, BPF_CGROUP_UNIX_GETPEERNAME},
+ },
+ {
+ "cgroup/getsockname4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME},
+ {0, BPF_CGROUP_INET4_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME},
+ {0, BPF_CGROUP_INET6_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME},
+ {0, BPF_CGROUP_UNIX_GETSOCKNAME},
+ },
};
static void test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..3dbcc091f16c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -18,7 +18,6 @@
#include <netinet/in.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "test_progs.h"
@@ -38,17 +37,11 @@ static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array = -1, outer_map = -1;
static enum bpf_map_type inner_map_type;
static int select_by_skb_data_prog;
-static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
-static int saved_tcp_fo = -1;
static __u32 index_zero;
static int epfd;
-static union sa46 {
- struct sockaddr_in6 v6;
- struct sockaddr_in v4;
- sa_family_t family;
-} srv_sa;
+static struct sockaddr_storage srv_sa;
#define RET_IF(condition, tag, format...) ({ \
if (CHECK_FAIL(condition)) { \
@@ -66,30 +59,21 @@ static union sa46 {
static int create_maps(enum bpf_map_type inner_type)
{
- struct bpf_create_map_attr attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
inner_map_type = inner_type;
/* Creating reuseport_array */
- attr.name = "reuseport_array";
- attr.map_type = inner_type;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = REUSEPORT_ARRAY_SIZE;
-
- reuseport_array = bpf_create_map_xattr(&attr);
- RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ reuseport_array = bpf_map_create(inner_type, "reuseport_array",
+ sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL);
+ RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
- attr.name = "outer_map";
- attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = 1;
- attr.inner_map_fd = reuseport_array;
- outer_map = bpf_create_map_xattr(&attr);
- RET_ERR(outer_map == -1, "creating outer_map",
+ opts.inner_map_fd = reuseport_array;
+ outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map",
+ sizeof(__u32), sizeof(__u32), 1, &opts);
+ RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
@@ -101,9 +85,10 @@ static int prepare_bpf_obj(void)
struct bpf_map *map;
int err;
- obj = bpf_object__open("test_select_reuseport_kern.o");
- RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+ obj = bpf_object__open("test_select_reuseport_kern.bpf.o");
+ err = libbpf_get_error(obj);
+ RET_ERR(err, "open test_select_reuseport_kern.bpf.o",
+ "obj:%p PTR_ERR(obj):%d\n", obj, err);
map = bpf_object__find_map_by_name(obj, "outer_map");
RET_ERR(!map, "find outer_map", "!map\n");
@@ -113,57 +98,57 @@ static int prepare_bpf_obj(void)
err = bpf_object__load(obj);
RET_ERR(err, "load bpf_object", "err:%d\n", err);
- prog = bpf_program__next(NULL, obj);
+ prog = bpf_object__next_program(obj, NULL);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- RET_ERR(result_map == -1, "get result_map fd",
+ RET_ERR(result_map < 0, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- RET_ERR(linum_map == -1, "get linum_map fd",
+ RET_ERR(linum_map < 0, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- RET_ERR(data_check_map == -1, "get data_check_map fd",
+ RET_ERR(data_check_map < 0, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
}
-static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_loopback;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
else
- sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
}
-static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_any;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
else
- sa->v4.sin_addr.s_addr = INADDR_ANY;
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
}
static int read_int_sysctl(const char *sysctl)
@@ -202,14 +187,6 @@ static int write_int_sysctl(const char *sysctl, int v)
return 0;
}
-static void restore_sysctls(void)
-{
- if (saved_tcp_fo != -1)
- write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
- if (saved_tcp_syncookie != -1)
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
-}
-
static int enable_fastopen(void)
{
int fo;
@@ -237,7 +214,7 @@ static long get_linum(void)
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
@@ -247,18 +224,18 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
int cli_fd)
{
struct data_check expected = {}, result;
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
socklen_t addrlen;
int err;
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
@@ -270,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
}
if (family == AF_INET6) {
+ struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
+ struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IPV6);
- expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
- !srv_sa.v6.sin6_addr.s6_addr32[2] &&
- !srv_sa.v6.sin6_addr.s6_addr32[1] &&
- !srv_sa.v6.sin6_addr.s6_addr32[0];
+ expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
+ !srv_v6->sin6_addr.s6_addr32[2] &&
+ !srv_v6->sin6_addr.s6_addr32[1] &&
+ !srv_v6->sin6_addr.s6_addr32[0];
- memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
- sizeof(cli_sa.v6.sin6_addr));
+ memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
+ sizeof(cli_v6->sin6_addr));
memcpy(&expected.skb_addrs[4], &in6addr_loopback,
sizeof(in6addr_loopback));
- expected.skb_ports[0] = cli_sa.v6.sin6_port;
- expected.skb_ports[1] = srv_sa.v6.sin6_port;
+ expected.skb_ports[0] = cli_v6->sin6_port;
+ expected.skb_ports[1] = srv_v6->sin6_port;
} else {
+ struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
+ struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IP);
- expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+ expected.bind_inany = !srv_v4->sin_addr.s_addr;
- expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+ expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
- expected.skb_ports[0] = cli_sa.v4.sin_port;
- expected.skb_ports[1] = srv_sa.v4.sin_port;
+ expected.skb_ports[0] = cli_v4->sin_port;
+ expected.skb_ports[1] = srv_v4->sin_port;
}
if (memcmp(&result, &expected, offsetof(struct data_check,
@@ -347,7 +330,7 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
@@ -383,16 +366,15 @@ static void check_results(void)
static int send_data(int type, sa_family_t family, void *data, size_t len,
enum result expected)
{
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
int fd, err;
fd = socket(family, type, 0);
RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
- sa46_init_loopback(&cli_sa, family);
+ ss_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
-
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
RET_ERR(err != len && expected >= PASS,
@@ -524,12 +506,12 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- RET_IF(err == -1 || tmp_index != -1,
+ RET_IF(err < 0 || tmp_index >= 0,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
@@ -569,7 +551,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
@@ -584,7 +566,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
@@ -608,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
socklen_t addrlen;
if (inany)
- sa46_init_inany(&srv_sa, family);
+ ss_init_inany(&srv_sa, family);
else
- sa46_init_loopback(&srv_sa, family);
+ ss_init_loopback(&srv_sa, family);
addrlen = sizeof(srv_sa);
/*
@@ -632,24 +614,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- RET_IF(err == -1, "listen()",
+ RET_IF(err < 0, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- RET_IF(err == -1, "update_elem(reuseport_array)",
+ RET_IF(err < 0, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
@@ -682,7 +664,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
@@ -691,7 +673,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
- RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
@@ -720,18 +702,18 @@ static void cleanup_per_test(bool no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
- RET_IF(err == -1, "delete_elem(outer_map)",
+ RET_IF(err < 0, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- if (outer_map != -1) {
+ if (outer_map >= 0) {
close(outer_map);
outer_map = -1;
}
- if (reuseport_array != -1) {
+ if (reuseport_array >= 0) {
close(reuseport_array);
reuseport_array = -1;
}
@@ -802,6 +784,7 @@ static void test_config(int sotype, sa_family_t family, bool inany)
TEST_INIT(test_pass_on_err),
TEST_INIT(test_detach_bpf),
};
+ struct netns_obj *netns;
char s[MAX_TEST_NAME];
const struct test *t;
@@ -817,9 +800,21 @@ static void test_config(int sotype, sa_family_t family, bool inany)
if (!test__start_subtest(s))
continue;
+ netns = netns_new("select_reuseport", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ continue;
+
+ if (CHECK_FAIL(enable_fastopen()))
+ goto out;
+ if (CHECK_FAIL(disable_syncookie()))
+ goto out;
+
setup_per_test(sotype, family, inany, t->no_inner_map);
t->fn(sotype, family);
cleanup_per_test(t->no_inner_map);
+
+out:
+ netns_free(netns);
}
}
@@ -857,23 +852,9 @@ out:
cleanup();
}
-void test_select_reuseport(void)
+void serial_test_select_reuseport(void)
{
- saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
- if (saved_tcp_fo < 0)
- goto out;
- saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
- if (saved_tcp_syncookie < 0)
- goto out;
-
- if (enable_fastopen())
- goto out;
- if (disable_syncookie())
- goto out;
-
test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
test_map_type(BPF_MAP_TYPE_SOCKMAP);
test_map_type(BPF_MAP_TYPE_SOCKHASH);
-out:
- restore_sysctls();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 7043e6ded0e6..7ac4d5a488aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,38 +1,46 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
+#include "io_helpers.h"
-static volatile int sigusr1_received = 0;
+static int sigusr1_received;
static void sigusr1_handler(int signum)
{
- sigusr1_received++;
+ sigusr1_received = 8;
+}
+
+static void sigusr1_siginfo_handler(int s, siginfo_t *i, void *v)
+{
+ sigusr1_received = (int)(long long)i->si_value.sival_ptr;
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread, bool remote)
{
struct test_send_signal_kern *skel;
+ struct sigaction sa;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
+ volatile int j = 0;
+ int retry_count;
char buf[256];
pid_t pid;
+ int old_prio;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -42,25 +50,56 @@ static void test_send_signal_common(struct perf_event_attr *attr,
if (pid == 0) {
/* install signal handler and notify parent */
- signal(SIGUSR1, sigusr1_handler);
+ if (remote) {
+ sa.sa_sigaction = sigusr1_siginfo_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ ASSERT_NEQ(sigaction(SIGUSR1, &sa, NULL), -1, "sigaction");
+ } else {
+ ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal");
+ }
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ if (!remote) {
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+ }
+
/* notify parent signal handler is installed */
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
- sleep(1);
+ for (int i = 0; i < 1000000000 && !sigusr1_received; i++) {
+ j /= i + j + 1;
+ if (remote)
+ sleep(1);
+ else
+ if (!attr)
+ /* trigger the nanosleep tracepoint program. */
+ usleep(1);
+ }
+
+ buf[0] = sigusr1_received;
- buf[0] = sigusr1_received ? '2' : '0';
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(sigusr1_received, 8, "sigusr1_received");
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ if (!remote)
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -71,86 +110,132 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ if (remote) {
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+ }
+
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
- pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
- -1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!remote)
+ pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */,
+ -1 /* group id */, 0 /* flags */);
+ else
+ pmu_fd = syscall(__NR_perf_event_open, attr, getpid(), -1 /* cpu */,
+ -1 /* group id */, 0 /* flags */);
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
- if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
/* wait until child signal handler installed */
- CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
- skel->bss->pid = pid;
- skel->bss->sig = SIGUSR1;
skel->bss->signal_thread = signal_thread;
+ skel->bss->sig = SIGUSR1;
+ if (!remote) {
+ skel->bss->target_pid = 0;
+ skel->bss->pid = pid;
+ } else {
+ skel->bss->target_pid = pid;
+ skel->bss->pid = getpid();
+ }
/* notify child that bpf program can send_signal now */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
-
- /* wait for result */
- err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
+
+ for (retry_count = 0;;) {
+ /* For the remote test, the BPF program is triggered from this
+ * process but the other process/thread is signaled.
+ */
+ if (remote) {
+ if (!attr) {
+ for (int i = 0; i < 10; i++)
+ usleep(1);
+ } else {
+ for (int i = 0; i < 100000000; i++)
+ j /= i + 1;
+ }
+ }
+ /* wait for result */
+ err = read_with_timeout(pipe_c2p[0], buf, 1, 100);
+ if (err == -EAGAIN && retry_count++ < 10000)
+ continue;
+ break;
+ }
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], 8, "incorrect result");
/* notify child safe to exit */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
destroy_skel:
test_send_signal_kern__destroy(skel);
+ /* restore the old priority */
+ if (remote)
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
skel_open_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
+ /*
+ * Child is either about to exit cleanly or stuck in case of errors.
+ * Nudge it to exit.
+ */
+ kill(pid, SIGKILL);
wait(NULL);
}
-static void test_send_signal_tracepoint(bool signal_thread)
+static void test_send_signal_tracepoint(bool signal_thread, bool remote)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread, remote);
}
-static void test_send_signal_perf(bool signal_thread)
+static void test_send_signal_perf(bool signal_thread, bool remote)
{
struct perf_event_attr attr = {
- .sample_period = 1,
+ .freq = 1,
+ .sample_freq = 1000,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread, remote);
}
-static void test_send_signal_nmi(bool signal_thread)
+static void test_send_signal_nmi(bool signal_thread, bool remote)
{
struct perf_event_attr attr = {
- .sample_period = 1,
+ .freq = 1,
+ .sample_freq = 1000,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
@@ -162,7 +247,7 @@ static void test_send_signal_nmi(bool signal_thread)
pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
-1 /* cpu */, -1 /* group_fd */, 0 /* flags */);
if (pmu_fd == -1) {
- if (errno == ENOENT) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
test__skip();
@@ -173,21 +258,35 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread, remote);
}
void test_send_signal(void)
{
if (test__start_subtest("send_signal_tracepoint"))
- test_send_signal_tracepoint(false);
+ test_send_signal_tracepoint(false, false);
if (test__start_subtest("send_signal_perf"))
- test_send_signal_perf(false);
+ test_send_signal_perf(false, false);
if (test__start_subtest("send_signal_nmi"))
- test_send_signal_nmi(false);
+ test_send_signal_nmi(false, false);
if (test__start_subtest("send_signal_tracepoint_thread"))
- test_send_signal_tracepoint(true);
+ test_send_signal_tracepoint(true, false);
if (test__start_subtest("send_signal_perf_thread"))
- test_send_signal_perf(true);
+ test_send_signal_perf(true, false);
if (test__start_subtest("send_signal_nmi_thread"))
- test_send_signal_nmi(true);
+ test_send_signal_nmi(true, false);
+
+ /* Signal remote thread and thread group */
+ if (test__start_subtest("send_signal_tracepoint_remote"))
+ test_send_signal_tracepoint(false, true);
+ if (test__start_subtest("send_signal_perf_remote"))
+ test_send_signal_perf(false, true);
+ if (test__start_subtest("send_signal_nmi_remote"))
+ test_send_signal_nmi(false, true);
+ if (test__start_subtest("send_signal_tracepoint_thread_remote"))
+ test_send_signal_tracepoint(true, true);
+ if (test__start_subtest("send_signal_perf_thread_remote"))
+ test_send_signal_perf(true, true);
+ if (test__start_subtest("send_signal_nmi_thread_remote"))
+ test_send_signal_nmi(true, true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
index 189a34a7addb..15dacfcfaa6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -25,7 +25,8 @@ static void *worker(void *p)
return NULL;
}
-void test_send_signal_sched_switch(void)
+/* NOTE: cause events loss */
+void serial_test_send_signal_sched_switch(void)
{
struct test_send_signal_kern *skel;
pthread_t threads[THREAD_COUNT];
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
new file mode 100644
index 000000000000..e4dac529d424
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <linux/socket.h>
+#include <linux/tls.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "setget_sockopt.skel.h"
+
+#define CG_NAME "/setget-sockopt-test"
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct setget_sockopt *skel;
+static int cg_fd;
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link add dev binddevtest1 type veth peer name binddevtest2"),
+ "add veth"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev binddevtest1 up"),
+ "bring veth up"))
+ return -1;
+
+ return 0;
+}
+
+static void test_tcp(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ int sfd, cfd;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+
+ cfd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) {
+ close(sfd);
+ return;
+ }
+ close(sfd);
+ close(cfd);
+
+ ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+ ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+ ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+}
+
+static void test_udp(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ int sfd;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_DGRAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+ close(sfd);
+
+ ASSERT_GE(bss->nr_socket_post_create, 1, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
+}
+
+static void test_ktls(int family)
+{
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct setget_sockopt__bss *bss = skel->bss;
+ int cfd = -1, sfd = -1, fd = -1, ret;
+ char buf;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+ fd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(fd, 0, "connect_to_fd"))
+ goto err_out;
+
+ cfd = accept(sfd, NULL, 0);
+ if (!ASSERT_GE(cfd, 0, "accept"))
+ goto err_out;
+
+ close(sfd);
+ sfd = -1;
+
+ /* Setup KTLS */
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+ ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ memset(&aes128, 0, sizeof(aes128));
+ aes128.info.version = TLS_1_2_VERSION;
+ aes128.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &aes128, sizeof(aes128));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &aes128, sizeof(aes128));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ /* KTLS is enabled */
+
+ close(fd);
+ /* At this point, the cfd socket is at the CLOSE_WAIT state
+ * and still run TLS protocol. The test for
+ * BPF_TCP_CLOSE_WAIT should be run at this point.
+ */
+ ret = read(cfd, &buf, sizeof(buf));
+ ASSERT_EQ(ret, 0, "read");
+ close(cfd);
+
+ ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+ ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+ ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+ ASSERT_EQ(bss->nr_fin_wait1, 1, "nr_fin_wait1");
+ return;
+
+err_out:
+ close(fd);
+ close(cfd);
+ close(sfd);
+}
+
+static void test_nonstandard_opt(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ struct bpf_link *getsockopt_link = NULL;
+ int sfd = -1, fd = -1, cfd = -1, flags;
+ socklen_t flagslen = sizeof(flags);
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+
+ fd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
+ goto err_out;
+
+ /* cgroup/getsockopt prog will intercept getsockopt() below and
+ * retrieve the tcp socket bpf_sock_ops_cb_flags value for the
+ * accept()ed socket; this was set earlier in the passive established
+ * callback for the accept()ed socket via bpf_setsockopt().
+ */
+ getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
+ if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
+ goto err_out;
+
+ cfd = accept(sfd, NULL, 0);
+ if (!ASSERT_GE(cfd, 0, "accept"))
+ goto err_out;
+
+ if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
+ "getsockopt_flags"))
+ goto err_out;
+ ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
+ "cb_flags_set");
+err_out:
+ close(sfd);
+ if (fd != -1)
+ close(fd);
+ if (cfd != -1)
+ close(cfd);
+ bpf_link__destroy(getsockopt_link);
+}
+
+void test_setget_sockopt(void)
+{
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
+ return;
+
+ if (create_netns())
+ goto done;
+
+ skel = setget_sockopt__open();
+ if (!ASSERT_OK_PTR(skel, "open skel"))
+ goto done;
+
+ strcpy(skel->rodata->veth, "binddevtest1");
+ skel->rodata->veth_ifindex = if_nametoindex("binddevtest1");
+ if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex"))
+ goto done;
+
+ if (!ASSERT_OK(setget_sockopt__load(skel), "load skel"))
+ goto done;
+
+ skel->links.skops_sockopt =
+ bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+ goto done;
+
+ skel->links.socket_post_create =
+ bpf_program__attach_cgroup(skel->progs.socket_post_create, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.socket_post_create, "attach_cgroup"))
+ goto done;
+
+ test_tcp(AF_INET6);
+ test_tcp(AF_INET);
+ test_udp(AF_INET6);
+ test_udp(AF_INET);
+ test_ktls(AF_INET6);
+ test_ktls(AF_INET);
+ test_nonstandard_opt(AF_INET);
+ test_nonstandard_opt(AF_INET6);
+
+done:
+ setget_sockopt__destroy(skel);
+ close(cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c
new file mode 100644
index 000000000000..604a0b1423d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sha256.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+
+#define MAX_LEN 4096
+
+/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */
+void test_sha256(void)
+{
+ /*
+ * The correctness of this value was verified by running this test with
+ * libbpf_sha256() replaced by OpenSSL's SHA256().
+ */
+ static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = {
+ 0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74,
+ 0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04,
+ 0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90,
+ 0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42,
+ };
+ __u64 seed = 0;
+ __u8 *data = NULL, *digests = NULL;
+ __u8 digest_of_digests[SHA256_DIGEST_LENGTH];
+ size_t i;
+
+ data = malloc(MAX_LEN);
+ if (!ASSERT_OK_PTR(data, "malloc"))
+ goto out;
+ digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH);
+ if (!ASSERT_OK_PTR(digests, "malloc"))
+ goto out;
+
+ /* Generate MAX_LEN bytes of "random" data deterministically. */
+ for (i = 0; i < MAX_LEN; i++) {
+ seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1);
+ data[i] = (__u8)(seed >> 16);
+ }
+
+ /* Calculate a digest for each length 0 through MAX_LEN inclusively. */
+ for (i = 0; i <= MAX_LEN; i++)
+ libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]);
+
+ /* Calculate and verify the digest of all the digests. */
+ libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH,
+ digest_of_digests);
+ ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests,
+ SHA256_DIGEST_LENGTH, "digest_of_digests");
+out:
+ free(data);
+ free(digests);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index dfcbddcbe4d3..70b49da5ca0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -13,36 +13,37 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
struct itimerval timeo = {
.it_value.tv_usec = 100000, /* 100ms */
};
- __u32 duration = 0, retval;
int prog_fd;
int err;
int i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 0xffffffff,
+ );
for (i = 0; i < ARRAY_SIZE(prog); i++)
prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN();
- prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog),
+ prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
"GPL", 0, NULL, 0);
- CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+ ASSERT_GE(prog_fd, 0, "test-run load");
err = sigaction(SIGALRM, &sigalrm_action, NULL);
- CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+ ASSERT_OK(err, "test-run-signal-sigaction");
err = setitimer(ITIMER_REAL, &timeo, NULL);
- CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
+ ASSERT_OK(err, "test-run-signal-timer");
- err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(duration > 500000000, /* 500ms */
- "test-run-signal-duration",
- "duration %dns > 500ms\n",
- duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_LE(topts.duration, 500000000 /* 500ms */,
+ "test-run-signal-duration");
signal(SIGALRM, SIG_DFL);
}
-void test_signal_pending(enum bpf_prog_type prog_type)
+void test_signal_pending(void)
{
test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER);
test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 3a469099f30d..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include "test_progs.h"
+#include "network_helpers.h"
#define BIND_PORT 1234
#define CONNECT_PORT 4321
@@ -22,14 +23,30 @@
#define NS_SELF "/proc/self/ns/net"
#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int stop, duration;
static bool
configure_stack(void)
{
+ char tc_version[128];
char tc_cmd[BUFSIZ];
+ char *prog;
+ FILE *tc;
+
+ /* Check whether tc is built with libbpf. */
+ tc = popen("tc -V", "r");
+ if (CHECK_FAIL(!tc))
+ return false;
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+ pclose(tc);
+ return false;
+ }
+ if (strstr(tc_version, ", libbpf "))
+ prog = "test_sk_assign_libbpf.bpf.o";
+ else
+ prog = "test_sk_assign.bpf.o";
+ if (CHECK_FAIL(pclose(tc)))
+ return false;
/* Move to a new networking namespace */
if (CHECK_FAIL(unshare(CLONE_NEWNET)))
@@ -46,9 +63,9 @@ configure_stack(void)
/* Load qdisc, BPF program */
if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
return false;
- sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
- "direct-action object-file ./test_sk_assign.o",
- "section classifier/sk_assign_test",
+ sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf",
+ "direct-action object-file", prog,
+ "section tc",
(env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
if (CHECK(system(tc_cmd), "BPF load failed;",
"run with -vv for more info\n"))
@@ -57,52 +74,6 @@ configure_stack(void)
return true;
}
-static int
-start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto close_out;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int
-connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = -1;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
static in_port_t
get_port(int fd)
{
@@ -129,15 +100,12 @@ get_port(int fd)
static ssize_t
rcv_msg(int srv_client, int type)
{
- struct sockaddr_storage ss;
char buf[BUFSIZ];
- socklen_t slen;
if (type == SOCK_STREAM)
return read(srv_client, &buf, sizeof(buf));
else
- return recvfrom(srv_client, &buf, sizeof(buf), 0,
- (struct sockaddr *)&ss, &slen);
+ return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL);
}
static int
@@ -148,7 +116,7 @@ run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
in_port_t port;
int ret = 1;
- client = connect_to_server(addr, len, type);
+ client = connect_to_addr(type, (struct sockaddr_storage *)addr, len, NULL);
if (client == -1) {
perror("Cannot connect to server");
goto out;
@@ -297,7 +265,9 @@ void test_sk_assign(void)
continue;
prepare_addr(test->addr, test->family, BIND_PORT, false);
addr = (const struct sockaddr *)test->addr;
- server = start_server(addr, test->len, test->type);
+ server = start_server_addr(test->type,
+ (const struct sockaddr_storage *)addr,
+ test->len, NULL);
if (server == -1)
goto close;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES 32
+#define NR_SOCKETS 2
+#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE 1024
+#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+ char name[8];
+ int family;
+ int type;
+ int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+ long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int server, i, err = 0;
+
+ server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (!ASSERT_GE(server, 0, "start_server_str"))
+ return server;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = connect_to_fd(server, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "connect_to_fd");
+ err = sk[i];
+ break;
+ }
+
+ sk[i + 1] = accept(server, NULL, NULL);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "accept");
+ err = sk[i + 1];
+ break;
+ }
+ }
+
+ close(server);
+
+ return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int i, j, err, rcvbuf = BUF_TOTAL;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "start_server");
+ return sk[i];
+ }
+
+ sk[i + 1] = connect_to_fd(sk[i], 0);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+ return sk[i + 1];
+ }
+
+ err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+ if (err) {
+ ASSERT_EQ(err, 0, "connect_fd_to_fd");
+ return err;
+ }
+
+ for (j = 0; j < 2; j++) {
+ err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+ if (err) {
+ ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+ bool *activated, long *memory_allocated)
+{
+ int sk;
+
+ *activated = true;
+
+ /* AF_INET and AF_INET6 share the same memory_allocated.
+ * tcp_init_sock() is called by AF_INET and AF_INET6,
+ * but udp_lib_init_sock() is inline.
+ */
+ sk = socket(AF_INET, test_case->type, 0);
+ if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+ return -1;
+
+ close(sk);
+
+ return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->tcp_activated,
+ &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->udp_activated,
+ &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+ struct sk_bypass_prot_mem *skel, bool bypass)
+{
+ char buf[BUF_SINGLE] = {};
+ long memory_allocated[2];
+ int sk[NR_SOCKETS];
+ int err, i, j;
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++)
+ sk[i] = -1;
+
+ err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+ if (err)
+ goto close;
+
+ memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+ /* allocate pages >= NR_PAGES */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+ /* Avoid too noisy logs when something failed. */
+ if (bytes != sizeof(buf)) {
+ ASSERT_EQ(bytes, sizeof(buf), "send");
+ if (bytes < 0) {
+ err = bytes;
+ goto drain;
+ }
+ }
+ }
+ }
+
+ memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+ if (bypass)
+ ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+ else
+ ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+ if (test_case->type == SOCK_DGRAM) {
+ /* UDP starts purging sk->sk_receive_queue after one RCU
+ * grace period, then udp_memory_allocated goes down,
+ * so drain the queue before close().
+ */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+ if (bytes == sizeof(buf))
+ continue;
+ if (bytes != -1 || errno != EAGAIN)
+ PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+ break;
+ }
+ }
+ }
+
+close:
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ if (sk[i] < 0)
+ break;
+
+ close(sk[i]);
+ }
+
+ return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+ struct sk_bypass_prot_mem *skel;
+ struct nstoken *nstoken;
+ int cgroup, err;
+
+ skel = sk_bypass_prot_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = sk_bypass_prot_mem__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto destroy_skel;
+
+ cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+ if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+ goto destroy_skel;
+
+ err = make_netns("sk_bypass_prot_mem");
+ if (!ASSERT_EQ(err, 0, "make_netns"))
+ goto close_cgroup;
+
+ nstoken = open_netns("sk_bypass_prot_mem");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto remove_netns;
+
+ err = check_bypass(test_case, skel, false);
+ if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+ goto close_netns;
+
+ skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+ if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+ close_netns(nstoken);
+remove_netns:
+ remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+ close(cgroup);
+destroy_skel:
+ sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+ {
+ .name = "TCP ",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDP ",
+ .family = AF_INET,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+ {
+ .name = "TCPv6",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDPv6",
+ .family = AF_INET6,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (test__start_subtest(test_cases[i].name))
+ run_test(&test_cases[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9ff0412e1fd3..023c31bde229 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -18,7 +18,6 @@
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
-#include <error.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -30,7 +29,6 @@
#include <bpf/bpf.h>
#include "test_progs.h"
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
@@ -48,8 +46,6 @@
#define INT_IP6 "fd00::2"
#define INT_PORT 8008
-#define IO_TIMEOUT_SEC 3
-
enum server {
SERVER_A = 0,
SERVER_B = 1,
@@ -78,6 +74,12 @@ struct test {
bool reuseport_has_conns; /* Add a connected socket to reuseport group */
};
+struct cb_opts {
+ int family;
+ int sotype;
+ bool reuseport;
+};
+
static __u32 duration; /* for CHECK macro */
static bool is_ipv6(const char *ip)
@@ -103,59 +105,14 @@ static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
return 0;
}
-static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
-{
- return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
- addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
-}
-
-static int make_socket(int sotype, const char *ip, int port,
- struct sockaddr_storage *addr)
+static int setsockopts(int fd, void *opts)
{
- struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
- int err, family, fd;
-
- family = is_ipv6(ip) ? AF_INET6 : AF_INET;
- err = make_sockaddr(family, ip, port, addr, NULL);
- if (CHECK(err, "make_address", "failed\n"))
- return -1;
-
- fd = socket(addr->ss_family, sotype, 0);
- if (CHECK(fd < 0, "socket", "failed\n")) {
- log_err("failed to make socket");
- return -1;
- }
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
- if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
- log_err("failed to set SNDTIMEO");
- close(fd);
- return -1;
- }
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
- if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
- log_err("failed to set RCVTIMEO");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-static int make_server(int sotype, const char *ip, int port,
- struct bpf_program *reuseport_prog)
-{
- struct sockaddr_storage addr = {0};
+ struct cb_opts *co = (struct cb_opts *)opts;
const int one = 1;
- int err, fd = -1;
-
- fd = make_socket(sotype, ip, port, &addr);
- if (fd < 0)
- return -1;
+ int err = 0;
/* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
- if (sotype == SOCK_DGRAM) {
+ if (co->sotype == SOCK_DGRAM) {
err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
@@ -164,7 +121,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ if (co->sotype == SOCK_DGRAM && co->family == AF_INET6) {
err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
@@ -173,7 +130,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (sotype == SOCK_STREAM) {
+ if (co->sotype == SOCK_STREAM) {
err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
@@ -182,7 +139,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (reuseport_prog) {
+ if (co->reuseport) {
err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
sizeof(one));
if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
@@ -191,19 +148,28 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- err = bind(fd, (void *)&addr, inetaddr_len(&addr));
- if (CHECK(err, "bind", "failed\n")) {
- log_err("failed to bind listen socket");
- goto fail;
- }
+fail:
+ return err;
+}
- if (sotype == SOCK_STREAM) {
- err = listen(fd, SOMAXCONN);
- if (CHECK(err, "make_server", "listen")) {
- log_err("failed to listen on port %d", port);
- goto fail;
- }
- }
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct cb_opts cb_opts = {
+ .family = is_ipv6(ip) ? AF_INET6 : AF_INET,
+ .sotype = sotype,
+ .reuseport = reuseport_prog,
+ };
+ struct network_helper_opts opts = {
+ .backlog = SOMAXCONN,
+ .post_socket_cb = setsockopts,
+ .cb_opts = &cb_opts,
+ };
+ int err, fd;
+
+ fd = start_server_str(cb_opts.family, sotype, ip, port, &opts);
+ if (!ASSERT_OK_FD(fd, "start_server_str"))
+ return -1;
/* Late attach reuseport prog so we can have one init path */
if (reuseport_prog) {
@@ -220,25 +186,46 @@ fail:
return -1;
}
-static int make_client(int sotype, const char *ip, int port)
+static __u64 socket_cookie(int fd)
{
- struct sockaddr_storage addr = {0};
- int err, fd;
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
- fd = make_socket(sotype, ip, port, &addr);
- if (fd < 0)
- return -1;
+ if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+ "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+ return 0;
+ return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+ const char *remote_ip, __u16 remote_port)
+{
+ void *local, *remote;
+ int err;
- err = connect(fd, (void *)&addr, inetaddr_len(&addr));
- if (CHECK(err, "make_client", "connect")) {
- log_err("failed to connect client socket");
- goto fail;
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->local_port = local_port;
+ ctx->remote_port = htons(remote_port);
+
+ if (is_ipv6(local_ip)) {
+ ctx->family = AF_INET6;
+ local = &ctx->local_ip6[0];
+ remote = &ctx->remote_ip6[0];
+ } else {
+ ctx->family = AF_INET;
+ local = &ctx->local_ip4;
+ remote = &ctx->remote_ip4;
}
- return fd;
-fail:
- close(fd);
- return -1;
+ err = inet_pton(ctx->family, local_ip, local);
+ if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+ return 1;
+
+ err = inet_pton(ctx->family, remote_ip, remote);
+ if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+ return 1;
+
+ return 0;
}
static int send_byte(int fd)
@@ -365,18 +352,12 @@ static int udp_recv_send(int server_fd)
}
/* Reply from original destination address. */
- fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
- if (CHECK(fd < 0, "socket", "failed\n")) {
+ fd = start_server_addr(SOCK_DGRAM, dst_addr, sizeof(*dst_addr), NULL);
+ if (!ASSERT_OK_FD(fd, "start_server_addr")) {
log_err("failed to create tx socket");
return -1;
}
- ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
- if (CHECK(ret, "bind", "failed\n")) {
- log_err("failed to bind tx socket");
- goto out;
- }
-
msg.msg_control = NULL;
msg.msg_controllen = 0;
n = sendmsg(fd, &msg, 0);
@@ -438,7 +419,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
}
link = bpf_program__attach_netns(prog, net_fd);
- if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
errno = -PTR_ERR(link);
log_err("failed to attach program '%s' to netns",
bpf_program__name(prog));
@@ -556,7 +537,7 @@ close:
static void run_lookup_prog(const struct test *t)
{
- int server_fds[MAX_SERVERS] = { -1 };
+ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
int client_fd, reuse_conn_fd = -1;
struct bpf_link *lookup_link;
int i, err;
@@ -588,9 +569,6 @@ static void run_lookup_prog(const struct test *t)
* BPF socket lookup.
*/
if (t->reuseport_has_conns) {
- struct sockaddr_storage addr = {};
- socklen_t len = sizeof(addr);
-
/* Add an extra socket to reuseport group */
reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
t->listen_at.port,
@@ -598,17 +576,15 @@ static void run_lookup_prog(const struct test *t)
if (reuse_conn_fd < 0)
goto close;
- /* Connect the extra socket to itself */
- err = getsockname(reuse_conn_fd, (void *)&addr, &len);
- if (CHECK(err, "getsockname", "errno %d\n", errno))
- goto close;
- err = connect(reuse_conn_fd, (void *)&addr, len);
- if (CHECK(err, "connect", "errno %d\n", errno))
+ /* Connect the extra socket to itself */
+ err = connect_fd_to_fd(reuse_conn_fd, reuse_conn_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
goto close;
}
- client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
- if (client_fd < 0)
+ client_fd = connect_to_addr_str(is_ipv6(t->connect_to.ip) ? AF_INET6 : AF_INET,
+ t->sotype, t->connect_to.ip, t->connect_to.port, NULL);
+ if (!ASSERT_OK_FD(client_fd, "connect_to_addr_str"))
goto close;
if (t->sotype == SOCK_STREAM)
@@ -823,9 +799,11 @@ static void test_redirect_lookup(struct test_sk_lookup *skel)
static void drop_on_lookup(const struct test *t)
{
+ int family = is_ipv6(t->connect_to.ip) ? AF_INET6 : AF_INET;
struct sockaddr_storage dst = {};
int client_fd, server_fd, err;
struct bpf_link *lookup_link;
+ socklen_t len;
ssize_t n;
lookup_link = attach_lookup_prog(t->lookup_prog);
@@ -837,12 +815,14 @@ static void drop_on_lookup(const struct test *t)
if (server_fd < 0)
goto detach;
- client_fd = make_socket(t->sotype, t->connect_to.ip,
- t->connect_to.port, &dst);
- if (client_fd < 0)
+ client_fd = client_socket(family, t->sotype, NULL);
+ if (!ASSERT_OK_FD(client_fd, "client_socket"))
goto close_srv;
- err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ err = make_sockaddr(family, t->connect_to.ip, t->connect_to.port, &dst, &len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto close_all;
+ err = connect(client_fd, (void *)&dst, len);
if (t->sotype == SOCK_DGRAM) {
err = send_byte(client_fd);
if (err)
@@ -895,6 +875,37 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel)
.connect_to = { EXT_IP6, EXT_PORT },
.listen_at = { EXT_IP6, INT_PORT },
},
+ /* The program will drop on success, meaning that the ifindex
+ * was 1.
+ */
+ {
+ .desc = "TCP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
};
const struct test *t;
@@ -906,9 +917,11 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel)
static void drop_on_reuseport(const struct test *t)
{
+ int family = is_ipv6(t->connect_to.ip) ? AF_INET6 : AF_INET;
struct sockaddr_storage dst = { 0 };
int client, server1, server2, err;
struct bpf_link *lookup_link;
+ socklen_t len;
ssize_t n;
lookup_link = attach_lookup_prog(t->lookup_prog);
@@ -922,7 +935,7 @@ static void drop_on_reuseport(const struct test *t)
err = update_lookup_map(t->sock_map, SERVER_A, server1);
if (err)
- goto detach;
+ goto close_srv1;
/* second server on destination address we should never reach */
server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
@@ -930,12 +943,14 @@ static void drop_on_reuseport(const struct test *t)
if (server2 < 0)
goto close_srv1;
- client = make_socket(t->sotype, t->connect_to.ip,
- t->connect_to.port, &dst);
- if (client < 0)
+ client = client_socket(family, t->sotype, NULL);
+ if (!ASSERT_OK_FD(client, "client_socket"))
goto close_srv2;
- err = connect(client, (void *)&dst, inetaddr_len(&dst));
+ err = make_sockaddr(family, t->connect_to.ip, t->connect_to.port, &dst, &len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto close_all;
+ err = connect(client, (void *)&dst, len);
if (t->sotype == SOCK_DGRAM) {
err = send_byte(client);
if (err)
@@ -1009,18 +1024,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
static void run_sk_assign(struct test_sk_lookup *skel,
struct bpf_program *lookup_prog,
- const char *listen_ip, const char *connect_ip)
+ const char *remote_ip, const char *local_ip)
{
- int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
- struct bpf_link *lookup_link;
+ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
+ struct bpf_sk_lookup ctx;
+ __u64 server_cookie;
int i, err;
- lookup_link = attach_lookup_prog(lookup_prog);
- if (!lookup_link)
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ .ctx_out = &ctx,
+ .ctx_size_out = sizeof(ctx),
+ );
+
+ if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
return;
+ ctx.protocol = IPPROTO_TCP;
+
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
- server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
if (server_fds[i] < 0)
goto close_servers;
@@ -1030,23 +1054,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
goto close_servers;
}
- client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
- if (client_fd < 0)
+ server_cookie = socket_cookie(server_fds[SERVER_B]);
+ if (!server_cookie)
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+ if (CHECK(err, "test_run", "failed with error %d\n", errno))
goto close_servers;
- peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
- if (CHECK(peer_fd < 0, "accept", "failed\n"))
- goto close_client;
+ if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
+ goto close_servers;
+
+ CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+ "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
- close(peer_fd);
-close_client:
- close(client_fd);
close_servers:
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
if (server_fds[i] != -1)
close(server_fds[i]);
}
- bpf_link__destroy(lookup_link);
}
static void run_sk_assign_v4(struct test_sk_lookup *skel,
@@ -1071,8 +1097,8 @@ static void run_sk_assign_connected(struct test_sk_lookup *skel,
if (server_fd < 0)
return;
- connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
- if (connected_fd < 0)
+ connected_fd = connect_to_addr_str(AF_INET, sotype, EXT_IP4, EXT_PORT, NULL);
+ if (!ASSERT_OK_FD(connected_fd, "connect_to_addr_str"))
goto out_close_server;
/* Put a connected socket in redirect map */
@@ -1085,8 +1111,8 @@ static void run_sk_assign_connected(struct test_sk_lookup *skel,
goto out_close_connected;
/* Try to redirect TCP SYN / UDP packet to a connected socket */
- client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
- if (client_fd < 0)
+ client_fd = connect_to_addr_str(AF_INET, sotype, EXT_IP4, EXT_PORT, NULL);
+ if (!ASSERT_OK_FD(client_fd, "connect_to_addr_str"))
goto out_unlink_prog;
if (sotype == SOCK_DGRAM) {
send_byte(client_fd);
@@ -1138,6 +1164,7 @@ static void run_multi_prog_lookup(const struct test_multi_prog *t)
int map_fd, server_fd, client_fd;
struct bpf_link *link1, *link2;
int prog_idx, done, err;
+ socklen_t len;
map_fd = bpf_map__fd(t->run_map);
@@ -1167,11 +1194,14 @@ static void run_multi_prog_lookup(const struct test_multi_prog *t)
if (err)
goto out_close_server;
- client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
- if (client_fd < 0)
+ client_fd = client_socket(AF_INET, SOCK_STREAM, NULL);
+ if (!ASSERT_OK_FD(client_fd, "client_socket"))
goto out_close_server;
- err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ err = make_sockaddr(AF_INET, EXT_IP4, EXT_PORT, &dst, &len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto out_close_client;
+ err = connect(client_fd, (void *)&dst, len);
if (CHECK(err && !t->expect_errno, "connect",
"unexpected error %d\n", errno))
goto out_close_client;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
new file mode 100644
index 000000000000..f35852d245e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "sk_storage_omem_uncharge.skel.h"
+
+void test_sk_storage_omem_uncharge(void)
+{
+ struct sk_storage_omem_uncharge *skel;
+ int sk_fd = -1, map_fd, err, value;
+ socklen_t optlen;
+
+ skel = sk_storage_omem_uncharge__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+ map_fd = bpf_map__fd(skel->maps.sk_storage);
+
+ /* A standalone socket not binding to addr:port,
+ * so nentns is not needed.
+ */
+ sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sk_fd, 0, "socket"))
+ goto done;
+
+ optlen = sizeof(skel->bss->cookie);
+ err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
+ goto done;
+
+ value = 0;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
+ goto done;
+
+ value = 0xdeadbeef;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
+ goto done;
+
+ err = sk_storage_omem_uncharge__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto done;
+
+ close(sk_fd);
+ sk_fd = -1;
+
+ ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
+ ASSERT_EQ(skel->bss->omem, 0, "omem");
+
+done:
+ sk_storage_omem_uncharge__destroy(skel);
+ if (sk_fd != -1)
+ close(sk_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
index 2b392590e8ca..547ae53cde74 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
@@ -105,7 +105,7 @@ out:
close(listen_fd);
}
-void test_sk_storage_tracing(void)
+void serial_test_sk_storage_tracing(void)
{
struct test_sk_storage_trace_itself *skel_itself;
int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index fafeddaad6a9..33f950e2dae3 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,98 +11,81 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .ingress_ifindex = 11,
.ifindex = 1,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
.gso_size = 10,
+ .hwtstamp = 11,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
- int i;
+ int err, prog_fd, i;
- err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_ctx.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
/* ctx_in != NULL, ctx_size_in == 0 */
tattr.ctx_size_in = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_in");
tattr.ctx_size_in = sizeof(skb);
/* ctx_out != NULL, ctx_size_out == 0 */
tattr.ctx_size_out = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_out");
tattr.ctx_size_out = sizeof(skb);
/* non-zero [len, tc_index] fields should be rejected*/
skb.len = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "len");
skb.len = 0;
skb.tc_index = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "tc_index");
skb.tc_index = 0;
/* non-zero [hash, sk] fields should be rejected */
skb.hash = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "hash");
skb.hash = 0;
skb.sk = (struct bpf_sock *)1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "sk");
skb.sk = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0 || tattr.retval,
- "run",
- "err %d errno %d retval %d\n",
- err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
- "ctx_size_out",
- "incorrect output size, want %zu have %u\n",
- sizeof(skb), tattr.ctx_size_out);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(tattr.retval, "test_run retval");
+ ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out");
for (i = 0; i < 5; i++)
- CHECK_ATTR(skb.cb[i] != i + 2,
- "ctx_out_cb",
- "skb->cb[i] == %d, expected %d\n",
- skb.cb[i], i + 2);
- CHECK_ATTR(skb.priority != 7,
- "ctx_out_priority",
- "skb->priority == %d, expected %d\n",
- skb.priority, 7);
- CHECK_ATTR(skb.ifindex != 1,
- "ctx_out_ifindex",
- "skb->ifindex == %d, expected %d\n",
- skb.ifindex, 1);
- CHECK_ATTR(skb.tstamp != 8,
- "ctx_out_tstamp",
- "skb->tstamp == %lld, expected %d\n",
- skb.tstamp, 8);
- CHECK_ATTR(skb.mark != 10,
- "ctx_out_mark",
- "skb->mark == %u, expected %d\n",
- skb.mark, 10);
+ ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb");
+ ASSERT_EQ(skb.priority, 7, "ctx_out_priority");
+ ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex");
+ ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex");
+ ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp");
+ ASSERT_EQ(skb.mark, 10, "ctx_out_mark");
+
+ bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index f302ad84a298..f7ee25f290f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -9,22 +9,22 @@ void test_skb_helpers(void)
.gso_segs = 8,
.gso_size = 10,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
+ int err, prog_fd;
- err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_helpers.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
new file mode 100644
index 000000000000..d7f83c0a40a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_load_bytes.skel.h"
+
+void test_skb_load_bytes(void)
+{
+ struct skb_load_bytes *skel;
+ int err, prog_fd, test_result;
+ struct __sk_buff skb = { 0 };
+
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ skel = skb_load_bytes__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.skb_process);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)(-1);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, -EFAULT, "offset -1"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)10;
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, 0, "offset 10"))
+ goto out;
+
+out:
+ skb_load_bytes__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
new file mode 100644
index 000000000000..3eefdfed1db9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <sys/un.h>
+#include "test_skc_to_unix_sock.skel.h"
+
+static const char *sock_path = "@skc_to_unix_sock";
+
+void test_skc_to_unix_sock(void)
+{
+ struct test_skc_to_unix_sock *skel;
+ struct sockaddr_un sockaddr;
+ int err, sockfd = 0;
+
+ skel = test_skc_to_unix_sock__open();
+ if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+ return;
+
+ skel->rodata->my_pid = getpid();
+
+ err = test_skc_to_unix_sock__load(skel);
+ if (!ASSERT_OK(err, "could not load BPF object"))
+ goto cleanup;
+
+ err = test_skc_to_unix_sock__attach(skel);
+ if (!ASSERT_OK(err, "could not attach BPF object"))
+ goto cleanup;
+
+ /* trigger unix_listen */
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_GT(sockfd, 0, "socket failed"))
+ goto cleanup;
+
+ memset(&sockaddr, 0, sizeof(sockaddr));
+ sockaddr.sun_family = AF_UNIX;
+ strncpy(sockaddr.sun_path, sock_path, strlen(sock_path));
+ sockaddr.sun_path[0] = '\0';
+
+ err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
+ if (!ASSERT_OK(err, "bind failed"))
+ goto cleanup;
+
+ err = listen(sockfd, 1);
+ if (!ASSERT_OK(err, "listen failed"))
+ goto cleanup;
+
+ ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed");
+
+cleanup:
+ if (sockfd)
+ close(sockfd);
+ test_skc_to_unix_sock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fe87b77af459..bc6817aee9aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include <sys/mman.h>
struct s {
int a;
@@ -16,8 +17,14 @@ void test_skeleton(void)
struct test_skeleton* skel;
struct test_skeleton__bss *bss;
struct test_skeleton__data *data;
+ struct test_skeleton__data_dyn *data_dyn;
struct test_skeleton__rodata *rodata;
+ struct test_skeleton__rodata_dyn *rodata_dyn;
struct test_skeleton__kconfig *kcfg;
+ const void *elf_bytes;
+ size_t elf_bytes_sz = 0;
+ void *m;
+ int i, fd;
skel = test_skeleton__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -28,7 +35,12 @@ void test_skeleton(void)
bss = skel->bss;
data = skel->data;
+ data_dyn = skel->data_dyn;
rodata = skel->rodata;
+ rodata_dyn = skel->rodata_dyn;
+
+ ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name");
+ ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name");
/* validate values are pre-initialized correctly */
CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1);
@@ -44,6 +56,12 @@ void test_skeleton(void)
CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
+ ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr");
+
/* validate we can pre-setup global variables, even in .bss */
data->in1 = 10;
data->in2 = 11;
@@ -51,6 +69,10 @@ void test_skeleton(void)
bss->in4 = 13;
rodata->in.in6 = 14;
+ rodata_dyn->in_dynarr_sz = 4;
+ for (i = 0; i < 4; i++)
+ rodata_dyn->in_dynarr[i] = i + 10;
+
err = test_skeleton__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
@@ -62,6 +84,10 @@ void test_skeleton(void)
CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
+ ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr");
+
/* now set new values and attach to get them into outX variables */
data->in1 = 1;
data->in2 = 2;
@@ -71,6 +97,8 @@ void test_skeleton(void)
bss->in5.b = 6;
kcfg = skel->kconfig;
+ skel->data_read_mostly->read_mostly_var = 123;
+
err = test_skeleton__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
@@ -82,10 +110,8 @@ void test_skeleton(void)
CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
- CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
- bss->handler_out5.a, 5);
- CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
- bss->handler_out5.b, 6);
+ CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+ CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
@@ -93,6 +119,24 @@ void test_skeleton(void)
CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
"got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr");
+
+ ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var");
+
+ ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
+
+ fd = bpf_map__fd(skel->maps.data_non_mmapable);
+ m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success"))
+ munmap(m, getpagesize());
+
+ ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags");
+
+ elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
+ ASSERT_OK_PTR(elf_bytes, "elf_bytes");
+ ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
+
cleanup:
test_skeleton__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..594441acb707
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT "schedule schedule+0x0/"
+#define MIN_SYM_RET sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT "str1 a b c d e longstr"
+#define EXP_STR_RET sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT " 4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+static void test_snprintf_positive(void)
+{
+ char exp_addr_out[] = EXP_ADDR_OUT;
+ char exp_sym_out[] = EXP_SYM_OUT;
+ struct test_snprintf *skel;
+
+ skel = test_snprintf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+ ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+ ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+ ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+ ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+ sizeof(exp_sym_out) - 1), "sym_out");
+ ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+ ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+ sizeof(exp_addr_out) - 1), "addr_out");
+ ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+ ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+ ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+ ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+ ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+ ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+ ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+ ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+ ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+ ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+ test_snprintf__destroy(skel);
+}
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+ struct test_snprintf_single *skel;
+ int ret;
+
+ skel = test_snprintf_single__open();
+ if (!skel)
+ return -EINVAL;
+
+ memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10));
+
+ ret = test_snprintf_single__load(skel);
+ test_snprintf_single__destroy(skel);
+
+ return ret;
+}
+
+static void test_snprintf_negative(void)
+{
+ ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+ ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+ ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+ ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+ ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+ ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+ ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+ ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
+ ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+ ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+ ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
+ ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
+}
+
+void test_snprintf(void)
+{
+ if (test__start_subtest("snprintf_positive"))
+ test_snprintf_positive();
+ if (test__start_subtest("snprintf_negative"))
+ test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
index 686b40f11a45..dd41b826be30 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -6,7 +6,7 @@
/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
* are formatted correctly.
*/
-void test_snprintf_btf(void)
+void serial_test_snprintf_btf(void)
{
struct netif_receive_skb *skel;
struct netif_receive_skb__bss *bss;
@@ -42,9 +42,7 @@ void test_snprintf_btf(void)
* and it set expected return values from bpf_trace_printk()s
* and all tests ran.
*/
- if (CHECK(bss->ret <= 0,
- "bpf_snprintf_btf: got return value",
- "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+ if (!ASSERT_GT(bss->ret, 0, "bpf_snprintf_ret"))
goto cleanup;
if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
new file mode 100644
index 000000000000..b2efabbed220
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -0,0 +1,2660 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/un.h>
+
+#include "test_progs.h"
+
+#include "sock_addr_kern.skel.h"
+#include "bind4_prog.skel.h"
+#include "bind6_prog.skel.h"
+#include "connect_unix_prog.skel.h"
+#include "connect4_prog.skel.h"
+#include "connect6_prog.skel.h"
+#include "sendmsg4_prog.skel.h"
+#include "sendmsg6_prog.skel.h"
+#include "recvmsg4_prog.skel.h"
+#include "recvmsg6_prog.skel.h"
+#include "sendmsg_unix_prog.skel.h"
+#include "recvmsg_unix_prog.skel.h"
+#include "getsockname4_prog.skel.h"
+#include "getsockname6_prog.skel.h"
+#include "getsockname_unix_prog.skel.h"
+#include "getpeername4_prog.skel.h"
+#include "getpeername6_prog.skel.h"
+#include "getpeername_unix_prog.skel.h"
+#include "network_helpers.h"
+
+#define TEST_NS "sock_addr"
+#define TEST_IF_PREFIX "test_sock_addr"
+#define TEST_IPV4 "127.0.0.4"
+#define TEST_IPV6 "::6"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
+#define SRC4_REWRITE_IP TEST_IPV4
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
+#define SRC6_IP "::1"
+#define SRC6_REWRITE_IP TEST_IPV6
+#define WILDCARD6_IP "::"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
+#define SERVUN_ADDRESS "bpf_cgroup_unix_test"
+#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
+#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+
+#define save_errno_do(op) ({ int __save = errno; op; errno = __save; })
+
+enum sock_addr_test_type {
+ SOCK_ADDR_TEST_BIND,
+ SOCK_ADDR_TEST_CONNECT,
+ SOCK_ADDR_TEST_SENDMSG,
+ SOCK_ADDR_TEST_RECVMSG,
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ SOCK_ADDR_TEST_GETPEERNAME,
+};
+
+typedef void *(*load_fn)(int cgroup_fd,
+ enum bpf_attach_type attach_type,
+ bool expect_reject);
+typedef void (*destroy_fn)(void *skel);
+
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port);
+
+struct init_sock_args {
+ int af;
+ int type;
+};
+
+struct addr_args {
+ char addr[sizeof(struct sockaddr_storage)];
+ int addrlen;
+};
+
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
+
+static struct sock_addr_kern *skel;
+
+static int run_bpf_prog(const char *prog_name, void *ctx, int ctx_size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_program *prog;
+ int prog_fd, err;
+
+ topts.ctx_in = ctx;
+ topts.ctx_size_in = ctx_size;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto err;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, prog_name))
+ goto err;
+
+ err = topts.retval;
+ errno = -topts.retval;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
+}
+
+static int kernel_init_sock(int af, int type, int protocol)
+{
+ struct init_sock_args args = {
+ .af = af,
+ .type = type,
+ };
+
+ return run_bpf_prog("init_sock", &args, sizeof(args));
+}
+
+static int kernel_close_sock(int fd)
+{
+ return run_bpf_prog("close_sock", NULL, 0);
+}
+
+static int sock_addr_op(const char *name, struct sockaddr *addr,
+ socklen_t *addrlen, bool expect_change)
+{
+ struct addr_args args;
+ int err;
+
+ if (addrlen)
+ args.addrlen = *addrlen;
+
+ if (addr)
+ memcpy(&args.addr, addr, *addrlen);
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!expect_change && addr)
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ *addrlen,
+ (struct sockaddr_storage *)&args.addr,
+ args.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ if (addrlen)
+ *addrlen = args.addrlen;
+
+ if (addr)
+ memcpy(addr, &args.addr, *addrlen);
+
+ return err;
+}
+
+static int send_msg_op(const char *name, struct sockaddr *addr,
+ socklen_t addrlen, const char *msg, int msglen)
+{
+ struct sendmsg_args args;
+ int err;
+
+ memset(&args, 0, sizeof(args));
+ memcpy(&args.addr.addr, addr, addrlen);
+ args.addr.addrlen = addrlen;
+ memcpy(args.msg, msg, msglen);
+ args.msglen = msglen;
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ addrlen,
+ (struct sockaddr_storage *)&args.addr.addr,
+ args.addr.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ return err;
+}
+
+static int kernel_connect(struct sockaddr *addr, socklen_t addrlen)
+{
+ return sock_addr_op("kernel_connect", addr, &addrlen, false);
+}
+
+static int kernel_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
+{
+ return sock_addr_op("kernel_bind", addr, &addrlen, false);
+}
+
+static int kernel_listen(void)
+{
+ return sock_addr_op("kernel_listen", NULL, NULL, false);
+}
+
+static int kernel_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
+{
+ return send_msg_op("kernel_sendmsg", addr, addrlen, msg, msglen);
+}
+
+static int sock_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
+{
+ return send_msg_op("sock_sendmsg", addr, addrlen, msg, msglen);
+}
+
+static int kernel_getsockname(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getsockname", addr, addrlen, true);
+}
+
+static int kernel_getpeername(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getpeername", addr, addrlen, true);
+}
+
+int kernel_connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
+{
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(addr->ss_family, type, 0),
+ "kernel_init_sock"))
+ goto err;
+
+ if (kernel_connect((struct sockaddr *)addr, addrlen) < 0)
+ goto err;
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
+}
+
+int kernel_start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(family, type, 0), "kernel_init_sock"))
+ goto err;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ goto err;
+
+ if (kernel_bind(0, (struct sockaddr *)&addr, addrlen) < 0)
+ goto err;
+
+ if (type == SOCK_STREAM) {
+ if (!ASSERT_OK(kernel_listen(), "kernel_listen"))
+ goto err;
+ }
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
+}
+
+struct sock_ops {
+ int (*connect_to_addr)(int type, const struct sockaddr_storage *addr,
+ socklen_t addrlen,
+ const struct network_helper_opts *opts);
+ int (*start_server)(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms);
+ int (*socket)(int famil, int type, int protocol);
+ int (*bind)(int fd, struct sockaddr *addr, socklen_t addrlen);
+ int (*getsockname)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*getpeername)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*sendmsg)(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen);
+ int (*close)(int fd);
+};
+
+static int user_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
+{
+ struct msghdr hdr;
+ struct iovec iov;
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addrlen;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ return sendmsg(fd, &hdr, 0);
+}
+
+static int user_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
+{
+ return bind(fd, (const struct sockaddr *)addr, addrlen);
+}
+
+struct sock_ops user_ops = {
+ .connect_to_addr = connect_to_addr,
+ .start_server = start_server,
+ .socket = socket,
+ .bind = user_bind,
+ .getsockname = getsockname,
+ .getpeername = getpeername,
+ .sendmsg = user_sendmsg,
+ .close = close,
+};
+
+struct sock_ops kern_ops_sock_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = sock_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_ops kern_ops_kernel_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = kernel_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_addr_test {
+ enum sock_addr_test_type type;
+ const char *name;
+ /* BPF prog properties */
+ load_fn loadfn;
+ destroy_fn destroyfn;
+ enum bpf_attach_type attach_type;
+ /* Socket operations */
+ struct sock_ops *ops;
+ /* Socket properties */
+ int socket_family;
+ int socket_type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_addr;
+ unsigned short requested_port;
+ const char *expected_addr;
+ unsigned short expected_port;
+ const char *expected_src_addr;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SYSCALL_EPERM,
+ SYSCALL_ENOTSUPP,
+ SUCCESS,
+ } expected_result;
+};
+
+#define BPF_SKEL_FUNCS_RAW(skel_name, prog_name) \
+static void *prog_name##_load_raw(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ int prog_fd = -1; \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(skel_name##__load(skel), "load")) \
+ goto cleanup; \
+ prog_fd = bpf_program__fd(skel->progs.prog_name); \
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) \
+ goto cleanup; \
+ if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, \
+ BPF_F_ALLOW_OVERRIDE), "bpf_prog_attach") { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+cleanup: \
+ if (prog_fd > 0) \
+ bpf_prog_detach(cgroup_fd, attach_type); \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy_raw(void *progfd) \
+{ \
+ /* No-op. *_load_raw does all cleanup. */ \
+} \
+
+#define BPF_SKEL_FUNCS(skel_name, prog_name) \
+static void *prog_name##_load(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(bpf_program__set_expected_attach_type(skel->progs.prog_name, \
+ attach_type), \
+ "set_expected_attach_type")) \
+ goto cleanup; \
+ if (skel_name##__load(skel)) { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+ skel->links.prog_name = bpf_program__attach_cgroup( \
+ skel->progs.prog_name, cgroup_fd); \
+ if (!ASSERT_OK_PTR(skel->links.prog_name, "prog_attach")) \
+ goto cleanup; \
+ return skel; \
+cleanup: \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy(void *skel) \
+{ \
+ skel_name##__destroy(skel); \
+}
+
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS_RAW(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_deny_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS_RAW(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_deny_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS_RAW(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_deny_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS_RAW(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_deny_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS_RAW(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_deny_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_preserve_dst_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_v4mapped_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_wildcard_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_deny_prog);
+BPF_SKEL_FUNCS(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS_RAW(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS_RAW(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS_RAW(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS_RAW(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS_RAW(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS(getpeername6_prog, getpeername_v6_prog);
+BPF_SKEL_FUNCS_RAW(getpeername6_prog, getpeername_v6_prog);
+
+static struct sock_addr_test tests[] = {
+ /* bind - system calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: load prog with wrong expected attach type",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: attach prog with wrong attach type",
+ bind_v4_prog_load_raw,
+ bind_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: load prog with wrong expected attach type",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: attach prog with wrong attach type",
+ bind_v6_prog_load_raw,
+ bind_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* bind - kernel calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* connect - system calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: load prog with wrong expected attach type",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: attach prog with wrong attach type",
+ connect_v4_prog_load_raw,
+ connect_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: load prog with wrong expected attach type",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: attach prog with wrong attach type",
+ connect_v6_prog_load_raw,
+ connect_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect (stream)",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect deny (stream)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: attach prog with wrong attach type",
+ connect_unix_prog_load_raw,
+ connect_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* connect - kernel calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect (dgram)",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect deny (dgram)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - system calls */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg_v4_prog_load_raw,
+ sendmsg_v4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg IPv4-mapped IPv6 (dgram)",
+ sendmsg_v6_v4mapped_prog_load,
+ sendmsg_v6_v4mapped_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_ENOTSUPP,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg dst IP = [::] (BSD'ism) (dgram)",
+ sendmsg_v6_wildcard_prog_load,
+ sendmsg_v6_wildcard_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg_v6_prog_load_raw,
+ sendmsg_v6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: attach prog with wrong attach type",
+ sendmsg_unix_prog_load_raw,
+ sendmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* sendmsg - kernel calls (sock_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - kernel calls (kernel_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: kernel_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* recvmsg - system calls */
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: recvfrom (dgram)",
+ recvmsg4_prog_load,
+ recvmsg4_prog_destroy,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: attach prog with wrong attach type",
+ recvmsg4_prog_load_raw,
+ recvmsg4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg6: recvfrom (dgram)",
+ recvmsg6_prog_load,
+ recvmsg6_prog_destroy,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg6: attach prog with wrong attach type",
+ recvmsg6_prog_load_raw,
+ recvmsg6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: recvfrom (dgram)",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: recvfrom (stream)",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: attach prog with wrong attach type",
+ recvmsg_unix_prog_load_raw,
+ recvmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ ATTACH_REJECT,
+ },
+
+ /* getsockname - system calls */
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: attach prog with wrong attach type",
+ getsockname_v4_prog_load_raw,
+ getsockname_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: attach prog with wrong attach type",
+ getsockname_v6_prog_load_raw,
+ getsockname_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: getsockname",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: attach prog with wrong attach type",
+ getsockname_unix_prog_load_raw,
+ getsockname_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getsockname - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: kernel_getsockname",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+
+ /* getpeername - system calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: attach prog with wrong attach type",
+ getpeername_v4_prog_load_raw,
+ getpeername_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: attach prog with wrong attach type",
+ getpeername_v6_prog_load_raw,
+ getpeername_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: getpeername",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: attach prog with wrong attach type",
+ getpeername_unix_prog_load_raw,
+ getpeername_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getpeername - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: kernel_getpeername",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+};
+
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port)
+{
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+ const struct sockaddr_un *un1, *un2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1_len != addr2_len)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
+ } else if (addr1->ss_family == AF_UNIX) {
+ un1 = (const struct sockaddr_un *)addr1;
+ un2 = (const struct sockaddr_un *)addr2;
+ return memcmp(un1, un2, addr1_len);
+ }
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
+}
+
+static int load_sock_addr_kern(void)
+{
+ int err;
+
+ skel = sock_addr_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto err;
+
+ err = 0;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
+}
+
+static void unload_sock_addr_kern(void)
+{
+ sock_addr_kern__destroy(skel);
+}
+
+static int test_bind(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
+ test->requested_addr,
+ test->requested_port, 0);
+ if (serv < 0) {
+ err = errno;
+ goto err;
+ }
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr,
+ expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+ /* Try to connect to server just in case */
+ client = connect_to_addr(test->socket_type, &expected_addr, expected_addr_len, NULL);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+cleanup:
+ err = 0;
+err:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ test->ops->close(serv);
+
+ return err;
+}
+
+static int test_connect(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr, expected_src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage),
+ expected_src_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
+ if (client < 0) {
+ err = errno;
+ goto err;
+ }
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &expected_src_addr, &expected_src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+ }
+
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = cmp_sock_addr(test->ops->getsockname, client,
+ &expected_src_addr, expected_src_addr_len,
+ false);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+ }
+cleanup:
+ err = 0;
+err:
+ if (client != -1)
+ test->ops->close(client);
+ if (serv != -1)
+ close(serv);
+
+ return err;
+}
+
+static int test_xmsg(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ src_addr_len = sizeof(struct sockaddr_storage);
+ char data = 'a';
+ int serv = -1, client = -1, err;
+
+ /* Unlike the other tests, here we test that we can rewrite the src addr
+ * with a recvmsg() hook.
+ */
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ client = test->ops->socket(test->socket_family, test->socket_type, 0);
+ if (!ASSERT_GE(client, 0, "socket"))
+ goto cleanup;
+
+ /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */
+ if (test->socket_family == AF_UNIX) {
+ err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = test->ops->bind(client, (struct sockaddr *)&src_addr,
+ src_addr_len);
+ if (!ASSERT_OK(err, "bind"))
+ goto cleanup;
+ }
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->socket_type == SOCK_DGRAM) {
+ err = test->ops->sendmsg(client, (struct sockaddr *)&addr,
+ addr_len, &data, sizeof(data));
+ if (err < 0) {
+ err = errno;
+ goto err;
+ }
+
+ if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
+ goto cleanup;
+ } else {
+ /* Testing with connection-oriented sockets is only valid for
+ * recvmsg() tests.
+ */
+ if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg"))
+ goto cleanup;
+
+ err = connect(client, (const struct sockaddr *)&addr, addr_len);
+ if (!ASSERT_OK(err, "connect"))
+ goto cleanup;
+
+ err = send(client, &data, sizeof(data), 0);
+ if (!ASSERT_EQ(err, sizeof(data), "send"))
+ goto cleanup;
+
+ err = listen(serv, 0);
+ if (!ASSERT_OK(err, "listen"))
+ goto cleanup;
+
+ err = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(err, 0, "accept"))
+ goto cleanup;
+
+ close(serv);
+ serv = err;
+ }
+
+ addr_len = src_addr_len = sizeof(struct sockaddr_storage);
+
+ err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, sizeof(data), "recvfrom"))
+ goto cleanup;
+
+ ASSERT_EQ(data, 'a', "data mismatch");
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false);
+ if (!ASSERT_EQ(err, 0, "cmp_addr"))
+ goto cleanup;
+ }
+
+cleanup:
+ err = 0;
+err:
+ if (client != -1)
+ test->ops->close(client);
+ if (serv != -1)
+ close(serv);
+
+ return err;
+}
+
+static int test_getsockname(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, err;
+
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+cleanup:
+ if (serv != -1)
+ test->ops->close(serv);
+
+ return 0;
+}
+
+static int test_getpeername(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+cleanup:
+ if (client != -1)
+ test->ops->close(client);
+ if (serv != -1)
+ close(serv);
+
+ return 0;
+}
+
+static int setup_test_env(struct nstoken **tok)
+{
+ int err;
+
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+ SYS(fail, "ip netns add %s", TEST_NS);
+ *tok = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(*tok, "netns token"))
+ goto fail;
+
+ SYS(fail, "ip link add dev %s1 type veth peer name %s2", TEST_IF_PREFIX,
+ TEST_IF_PREFIX);
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip link set %s1 up", TEST_IF_PREFIX);
+ SYS(fail, "ip link set %s2 up", TEST_IF_PREFIX);
+ SYS(fail, "ip -4 addr add %s/8 dev %s1", TEST_IPV4, TEST_IF_PREFIX);
+ SYS(fail, "ip -6 addr add %s/128 nodad dev %s1", TEST_IPV6, TEST_IF_PREFIX);
+
+ err = 0;
+ goto out;
+fail:
+ err = -1;
+ close_netns(*tok);
+ *tok = NULL;
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+out:
+ return err;
+}
+
+static void cleanup_test_env(struct nstoken *tok)
+{
+ close_netns(tok);
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+}
+
+void test_sock_addr(void)
+{
+ struct nstoken *tok = NULL;
+ int cgroup_fd = -1;
+ void *skel;
+
+ cgroup_fd = test__join_cgroup("/sock_addr");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ if (!ASSERT_OK(setup_test_env(&tok), "setup_test_env"))
+ goto cleanup;
+
+ if (!ASSERT_OK(load_sock_addr_kern(), "load_sock_addr_kern"))
+ goto cleanup;
+
+ for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
+ struct sock_addr_test *test = &tests[i];
+ int err;
+
+ if (!test__start_subtest(test->name))
+ continue;
+
+ skel = test->loadfn(cgroup_fd, test->attach_type,
+ test->expected_result == LOAD_REJECT ||
+ test->expected_result == ATTACH_REJECT);
+ if (!skel)
+ continue;
+
+ switch (test->type) {
+ /* Not exercised yet but we leave this code here for when the
+ * INET and INET6 sockaddr tests are migrated to this file in
+ * the future.
+ */
+ case SOCK_ADDR_TEST_BIND:
+ err = test_bind(test);
+ break;
+ case SOCK_ADDR_TEST_CONNECT:
+ err = test_connect(test);
+ break;
+ case SOCK_ADDR_TEST_SENDMSG:
+ case SOCK_ADDR_TEST_RECVMSG:
+ err = test_xmsg(test);
+ break;
+ case SOCK_ADDR_TEST_GETSOCKNAME:
+ err = test_getsockname(test);
+ break;
+ case SOCK_ADDR_TEST_GETPEERNAME:
+ err = test_getpeername(test);
+ break;
+ default:
+ ASSERT_TRUE(false, "Unknown sock addr test type");
+ err = -EINVAL;
+ break;
+ }
+
+ if (test->expected_result == SYSCALL_EPERM)
+ ASSERT_EQ(err, EPERM, "socket operation returns EPERM");
+ else if (test->expected_result == SYSCALL_ENOTSUPP)
+ ASSERT_EQ(err, ENOTSUPP, "socket operation returns ENOTSUPP");
+ else if (test->expected_result == SUCCESS)
+ ASSERT_OK(err, "socket operation succeeds");
+
+ test->destroyfn(skel);
+ }
+
+cleanup:
+ unload_sock_addr_kern();
+ cleanup_test_env(tok);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_create.c b/tools/testing/selftests/bpf/prog_tests/sock_create.c
new file mode 100644
index 000000000000..187ffc5e60c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_create.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+static char bpf_log_buf[4096];
+static bool verbose;
+
+enum sock_create_test_error {
+ OK = 0,
+ DENY_CREATE,
+};
+
+static struct sock_create_test {
+ const char *descr;
+ const struct bpf_insn insns[64];
+ enum bpf_attach_type attach_type;
+ enum bpf_attach_type expected_attach_type;
+
+ int domain;
+ int type;
+ int protocol;
+
+ int optname;
+ int optval;
+ enum sock_create_test_error error;
+} tests[] = {
+ {
+ .descr = "AF_INET set priority",
+ .insns = {
+ /* r3 = 123 (priority) */
+ BPF_MOV64_IMM(BPF_REG_3, 123),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, priority)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_PRIORITY,
+ .optval = 123,
+ },
+ {
+ .descr = "AF_INET6 set priority",
+ .insns = {
+ /* r3 = 123 (priority) */
+ BPF_MOV64_IMM(BPF_REG_3, 123),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, priority)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_PRIORITY,
+ .optval = 123,
+ },
+ {
+ .descr = "AF_INET set mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* get uid of process */
+ BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
+
+ /* if uid is 0, use given mark(666), else use uid as the mark */
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 666),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, mark)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_MARK,
+ .optval = 666,
+ },
+ {
+ .descr = "AF_INET6 set mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* get uid of process */
+ BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
+
+ /* if uid is 0, use given mark(666), else use uid as the mark */
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 666),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, mark)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_MARK,
+ .optval = 666,
+ },
+ {
+ .descr = "AF_INET bound to iface",
+ .insns = {
+ /* r3 = 1 (lo interface) */
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, bound_dev_if)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_BINDTOIFINDEX,
+ .optval = 1,
+ },
+ {
+ .descr = "AF_INET6 bound to iface",
+ .insns = {
+ /* r3 = 1 (lo interface) */
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct bpf_sock, bound_dev_if)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+
+ .optname = SO_BINDTOIFINDEX,
+ .optval = 1,
+ },
+ {
+ .descr = "block AF_INET, SOCK_DGRAM, IPPROTO_ICMP socket",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+
+ /* sock->family == AF_INET */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET, 5),
+
+ /* sock->type == SOCK_DGRAM */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3),
+
+ /* sock->protocol == IPPROTO_ICMP */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, protocol)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMP, 1),
+
+ /* return 0 (block) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMP,
+
+ .error = DENY_CREATE,
+ },
+ {
+ .descr = "block AF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6 socket",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+
+ /* sock->family == AF_INET6 */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET6, 5),
+
+ /* sock->type == SOCK_DGRAM */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3),
+
+ /* sock->protocol == IPPROTO_ICMPV6 */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_sock, protocol)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMPV6, 1),
+
+ /* return 0 (block) */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+
+ .error = DENY_CREATE,
+ },
+ {
+ .descr = "load w/o expected_attach_type (compat mode)",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+static int load_prog(const struct bpf_insn *insns,
+ enum bpf_attach_type expected_attach_type)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = expected_attach_type,
+ .log_level = 2,
+ .log_buf = bpf_log_buf,
+ .log_size = sizeof(bpf_log_buf),
+ );
+ int fd, insns_cnt = 0;
+
+ for (;
+ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ insns_cnt++) {
+ }
+ insns_cnt++;
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns,
+ insns_cnt, &opts);
+ if (verbose && fd < 0)
+ fprintf(stderr, "%s\n", bpf_log_buf);
+
+ return fd;
+}
+
+static int run_test(int cgroup_fd, struct sock_create_test *test)
+{
+ int sock_fd, err, prog_fd, optval, ret = -1;
+ socklen_t optlen = sizeof(optval);
+
+ prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (prog_fd < 0) {
+ log_err("Failed to load BPF program");
+ return -1;
+ }
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (err < 0) {
+ log_err("Failed to attach BPF program");
+ goto close_prog_fd;
+ }
+
+ sock_fd = socket(test->domain, test->type, test->protocol);
+ if (sock_fd < 0) {
+ if (test->error == DENY_CREATE)
+ ret = 0;
+ else
+ log_err("Failed to create socket");
+
+ goto detach_prog;
+ }
+
+ if (test->optname) {
+ err = getsockopt(sock_fd, SOL_SOCKET, test->optname, &optval, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt");
+ goto cleanup;
+ }
+
+ if (optval != test->optval) {
+ errno = 0;
+ log_err("getsockopt returned unexpected optval");
+ goto cleanup;
+ }
+ }
+
+ ret = test->error != OK;
+
+cleanup:
+ close(sock_fd);
+detach_prog:
+ bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+close_prog_fd:
+ close(prog_fd);
+ return ret;
+}
+
+void test_sock_create(void)
+{
+ int cgroup_fd, i;
+
+ cgroup_fd = test__join_cgroup("/sock_create");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].descr))
+ continue;
+
+ ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
+ }
+
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
new file mode 100644
index 000000000000..9c11938fe597
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/bpf_endian.h>
+
+#include "sock_destroy_prog.skel.h"
+#include "sock_destroy_prog_fail.skel.h"
+#include "network_helpers.h"
+
+#define TEST_NS "sock_destroy_netns"
+
+static void start_iter_sockets(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[50] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ ASSERT_GE(len, 0, "read");
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_tcp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys connected client sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_tcp_server(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n, serv_port;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(serv);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_server);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, n = 0;
+
+ serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ /* UDP sockets have an overriding error code after they are disconnected,
+ * so we don't check for ECONNABORTED error code.
+ */
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_server(struct sock_destroy_prog *skel)
+{
+ int *listen_fds = NULL, n, i, serv_port;
+ unsigned int num_listens = 5;
+ char buf[1];
+
+ /* Start reuseport servers. */
+ listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM,
+ "::1", 0, 0, num_listens);
+ if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(listen_fds[0]);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_server);
+
+ for (i = 0; i < num_listens; ++i) {
+ n = read(listen_fds[i], buf, sizeof(buf));
+ if (!ASSERT_EQ(n, -1, "read") ||
+ !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"))
+ break;
+ }
+ ASSERT_EQ(i, num_listens, "server socket");
+
+cleanup:
+ free_fds(listen_fds, num_listens);
+}
+
+void test_sock_destroy(void)
+{
+ struct sock_destroy_prog *skel;
+ struct nstoken *nstoken = NULL;
+ int cgroup_fd;
+
+ skel = sock_destroy_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/sock_destroy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ skel->links.sock_connect = bpf_program__attach_cgroup(
+ skel->progs.sock_connect, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach"))
+ goto cleanup;
+
+ SYS(cleanup, "ip netns add %s", TEST_NS);
+ SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS);
+
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto cleanup;
+
+ if (test__start_subtest("tcp_client"))
+ test_tcp_client(skel);
+ if (test__start_subtest("tcp_server"))
+ test_tcp_server(skel);
+ if (test__start_subtest("udp_client"))
+ test_udp_client(skel);
+ if (test__start_subtest("udp_server"))
+ test_udp_server(skel);
+
+ RUN_TESTS(sock_destroy_prog_fail);
+
+cleanup:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_NS);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ sock_destroy_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index af87118e748e..7d23166c77af 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
+#define _GNU_SOURCE
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
+#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
@@ -15,12 +17,12 @@
#include "network_helpers.h"
#include "cgroup_helpers.h"
#include "test_progs.h"
-#include "bpf_rlimit.h"
#include "test_sock_fields.skel.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
@@ -43,8 +45,16 @@ static __u64 child_cg_id;
static int linum_map_fd;
static __u32 duration;
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+static bool create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return false;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return false;
+
+ return true;
+}
static void print_sk(const struct bpf_sock *sk, const char *prefix)
{
@@ -92,19 +102,24 @@ static void check_result(void)
{
struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
struct bpf_sock srv_sk, cli_sk, listen_sk;
- __u32 ingress_linum, egress_linum;
+ __u32 idx, ingress_linum, egress_linum, linum;
int err;
- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
- &egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ idx = EGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
- &ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ idx = INGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
+ idx = READ_SK_DST_PORT_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
+ ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
+ ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
+
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
@@ -263,7 +278,7 @@ static void test(void)
char buf[DATA_LEN];
/* Prepare listen_fd */
- listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
/* start_server() has logged the error details */
if (CHECK_FAIL(listen_fd == -1))
goto done;
@@ -329,10 +344,14 @@ done:
close(listen_fd);
}
-void test_sock_fields(void)
+void serial_test_sock_fields(void)
{
- struct bpf_link *egress_link = NULL, *ingress_link = NULL;
int parent_cg_fd = -1, child_cg_fd = -1;
+ struct bpf_link *link;
+
+ /* Use a dedicated netns to have a fixed listen port */
+ if (!create_netns())
+ return;
/* Create a cgroup, get fd, and join it */
parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
@@ -353,17 +372,20 @@ void test_sock_fields(void)
if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
goto done;
- egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
- child_cg_fd);
- if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
- PTR_ERR(egress_link)))
+ link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
+ goto done;
+ skel->links.egress_read_sock_fields = link;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
goto done;
+ skel->links.ingress_read_sock_fields = link;
- ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
- child_cg_fd);
- if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
- PTR_ERR(ingress_link)))
+ link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
goto done;
+ skel->links.read_sk_dst_port = link;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
@@ -372,11 +394,9 @@ void test_sock_fields(void)
test();
done:
- bpf_link__destroy(egress_link);
- bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel);
- if (child_cg_fd != -1)
+ if (child_cg_fd >= 0)
close(child_cg_fd);
- if (parent_cg_fd != -1)
+ if (parent_cg_fd >= 0)
close(parent_cg_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
new file mode 100644
index 000000000000..27781df8f2fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -0,0 +1,994 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Meta
+
+#include <poll.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "sock_iter_batch.skel.h"
+
+#define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
+
+static const int init_batch_size = 16;
+static const int nr_soreuse = 4;
+
+struct iter_out {
+ int idx;
+ __u64 cookie;
+} __packed;
+
+struct sock_count {
+ __u64 cookie;
+ int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+ int insert = -1;
+ int i = 0;
+
+ for (; i < counts_len; i++) {
+ if (!counts[i].cookie) {
+ insert = i;
+ } else if (counts[i].cookie == cookie) {
+ insert = i;
+ break;
+ }
+ }
+ if (insert < 0)
+ return insert;
+
+ counts[insert].cookie = cookie;
+ counts[insert].count++;
+
+ return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+ int counts_len)
+{
+ struct iter_out out;
+ int nread = 1;
+ int i = 0;
+
+ for (; nread > 0 && (n < 0 || i < n); i++) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+ break;
+ ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+ }
+
+ ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+ return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+ &cookie_len), "getsockopt(SO_COOKIE)"))
+ return 0;
+ return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+ __u64 cookie = socket_cookie(fd);
+ int i = 0;
+
+ for (; cookie && i < counts_len; i++)
+ if (cookie == counts[i].cookie)
+ return true;
+
+ return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ if (was_seen(fds[i], counts, n))
+ return i;
+ return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+ int i, nread, iter_fd;
+ int nth_sock_idx = -1;
+ struct iter_out out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ return -1;
+
+ for (; n >= 0; n--) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_GE(nread, 1, "nread"))
+ goto done;
+ }
+
+ for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+ if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+ nth_sock_idx = i;
+done:
+ close(iter_fd);
+ return nth_sock_idx;
+}
+
+static void destroy(int fd)
+{
+ struct sock_iter_batch *skel = NULL;
+ __u64 cookie = socket_cookie(fd);
+ struct bpf_link *link = NULL;
+ int iter_fd = -1;
+ int nread;
+ __u64 out;
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ skel->rodata->destroy_cookie = cookie;
+
+ if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ /* Delete matching socket. */
+ nread = read(iter_fd, &out, sizeof(out));
+ ASSERT_GE(nread, 0, "nread");
+ if (nread)
+ ASSERT_EQ(out, cookie, "cookie matches");
+done:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+ close(fd);
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+ __u64 cookie = socket_cookie(fd);
+ int count = 0;
+ int i = 0;
+
+ for (; cookie && !count && i < n; i++)
+ if (cookie == counts[i].cookie)
+ count = counts[i].count;
+
+ return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+ struct sock_count counts[], int counts_len)
+{
+ int seen_once = 0;
+ int seen_cnt;
+ int i = 0;
+
+ for (; i < fds_len; i++) {
+ /* Skip any sockets that were closed or that weren't seen
+ * exactly once.
+ */
+ if (fds[i] < 0)
+ continue;
+ seen_cnt = get_seen_count(fds[i], counts, counts_len);
+ if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+ seen_once++;
+ }
+
+ ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static int accept_from_one(struct pollfd *server_poll_fds,
+ int server_poll_fds_len)
+{
+ static const int poll_timeout_ms = 5000; /* 5s */
+ int ret;
+ int i;
+
+ ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+ if (!ASSERT_EQ(ret, 1, "poll"))
+ return -1;
+
+ for (i = 0; i < server_poll_fds_len; i++)
+ if (server_poll_fds[i].revents & POLLIN)
+ return accept(server_poll_fds[i].fd, NULL, NULL);
+
+ return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+ __u16 port, int nr_connects, int *server_fds,
+ int server_fds_len)
+{
+ struct pollfd *server_poll_fds = NULL;
+ int *established_socks = NULL;
+ int i;
+
+ server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+ if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+ return NULL;
+
+ for (i = 0; i < server_fds_len; i++) {
+ server_poll_fds[i].fd = server_fds[i];
+ server_poll_fds[i].events = POLLIN;
+ }
+
+ i = 0;
+
+ established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+ if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+ goto error;
+
+ while (nr_connects--) {
+ established_socks[i] = connect_to_addr_str(family, sock_type,
+ addr, port, NULL);
+ if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+ goto error;
+ i++;
+ established_socks[i] = accept_from_one(server_poll_fds,
+ server_fds_len);
+ if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+ goto error;
+ i++;
+ }
+
+ free(server_poll_fds);
+ return established_socks;
+error:
+ free_fds(established_socks, i);
+ free(server_poll_fds);
+ return NULL;
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_seen_socket(socks, counts, counts_len);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_seen_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Leave one established socket. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_unseen_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx, i;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ for (i = 0; i < socks_len - 1; i++) {
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void remove_all_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *close_idx = NULL;
+ int i;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+ if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+ return;
+ for (i = 0; i < established_socks_len - 1; i++) {
+ close_idx[i] = get_nth_socket(established_socks,
+ established_socks_len, link,
+ listen_socks_len + i);
+ if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+ return;
+ }
+
+ for (i = 0; i < established_socks_len - 1; i++) {
+ destroy(established_socks[close_idx[i]]);
+ established_socks[close_idx[i]] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+ free(close_idx);
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Double the number of sockets in the bucket. */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void add_some_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts,
+ int counts_len, struct bpf_link *link,
+ int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established_socks_len - 1 sockets. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw established_socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+
+ /* Double the number of established sockets in the bucket. */
+ new_socks = connect_to_server(family, sock_type, addr, port,
+ established_socks_len / 2, listen_socks,
+ listen_socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+done:
+ free_fds(new_socks, established_socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socket just to initialize the batch. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Double the number of sockets in the bucket to force a realloc on the
+ * next read.
+ */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket from the first set was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void force_realloc_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ /* Iterate through all sockets to trigger a realloc. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+}
+
+struct test_case {
+ void (*test)(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd);
+ const char *description;
+ int ehash_buckets;
+ int connections;
+ int init_socks;
+ int max_socks;
+ int sock_type;
+ int family;
+};
+
+static struct test_case resume_tests[] = {
+ {
+ .description = "udp: resume after removing a seen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "udp: resume after removing one unseen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "udp: resume after removing all unseen sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "udp: resume after adding a few sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "udp: force a realloc to occur",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "tcp: force a realloc to occur (listening)",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen_established,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen_established,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all_established,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = add_some_established,
+ },
+ {
+ .description = "tcp: force a realloc to occur (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ /* Bucket size will need to double when going from listening to
+ * established sockets.
+ */
+ .connections = init_batch_size,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse + (init_batch_size * 2),
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = force_realloc_established,
+ },
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+ struct sock_iter_batch *skel = NULL;
+ struct sock_count *counts = NULL;
+ static const __u16 port = 10001;
+ struct nstoken *nstoken = NULL;
+ struct bpf_link *link = NULL;
+ int *established_fds = NULL;
+ int err, iter_fd = -1;
+ const char *addr;
+ int *fds = NULL;
+
+ if (tc->ehash_buckets) {
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+ tc->ehash_buckets);
+ SYS(done, "ip netns add %s", TEST_CHILD_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+ nstoken = open_netns(TEST_CHILD_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+ goto done;
+ }
+
+ counts = calloc(tc->max_socks, sizeof(*counts));
+ if (!ASSERT_OK_PTR(counts, "counts"))
+ goto done;
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ /* Prepare a bucket of sockets in the kernel hashtable */
+ addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+ fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+ goto done;
+ if (tc->connections) {
+ established_fds = connect_to_server(tc->family, tc->sock_type,
+ addr, port,
+ tc->connections, fds,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+ goto done;
+ }
+ skel->rodata->ports[0] = 0;
+ skel->rodata->ports[1] = 0;
+ skel->rodata->sf = tc->family;
+ skel->rodata->ss = 0;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+ established_fds, tc->connections*2, counts, tc->max_socks,
+ link, iter_fd);
+done:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
+ free(counts);
+ free_fds(fds, tc->init_socks);
+ free_fds(established_fds, tc->connections*2);
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+ if (test__start_subtest(resume_tests[i].description)) {
+ do_resume_test(&resume_tests[i]);
+ }
+ }
+}
+
+static void do_test(int sock_type, bool onebyone)
+{
+ int err, i, nread, to_read, total_read, iter_fd = -1;
+ struct iter_out outputs[nr_soreuse];
+ struct bpf_link *link = NULL;
+ struct sock_iter_batch *skel;
+ int first_idx, second_idx;
+ int *fds[2] = {};
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ return;
+
+ /* Prepare 2 buckets of sockets in the kernel hashtable */
+ for (i = 0; i < ARRAY_SIZE(fds); i++) {
+ int local_port;
+
+ fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0,
+ nr_soreuse);
+ if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server"))
+ goto done;
+ local_port = get_socket_local_port(*fds[i]);
+ if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+ goto done;
+ skel->rodata->ports[i] = ntohs(local_port);
+ }
+ skel->rodata->sf = AF_INET6;
+ if (sock_type == SOCK_STREAM)
+ skel->rodata->ss = TCP_LISTEN;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto done;
+
+ /* Test reading a bucket (either from fds[0] or fds[1]).
+ * Only read "nr_soreuse - 1" number of sockets
+ * from a bucket and leave one socket out from
+ * that bucket on purpose.
+ */
+ to_read = (nr_soreuse - 1) * sizeof(*outputs);
+ total_read = 0;
+ first_idx = -1;
+ do {
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
+ break;
+ total_read += nread;
+
+ if (first_idx == -1)
+ first_idx = outputs[0].idx;
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
+ } while (total_read < to_read);
+ ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
+ ASSERT_EQ(total_read, to_read, "total_read");
+
+ free_fds(fds[first_idx], nr_soreuse);
+ fds[first_idx] = NULL;
+
+ /* Read the "whole" second bucket */
+ to_read = nr_soreuse * sizeof(*outputs);
+ total_read = 0;
+ second_idx = !first_idx;
+ do {
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
+ break;
+ total_read += nread;
+
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
+ } while (total_read <= to_read);
+ ASSERT_EQ(nread, 0, "nread");
+ /* Both so_reuseport ports should be in different buckets, so
+ * total_read must equal to the expected to_read.
+ *
+ * For a very unlikely case, both ports collide at the same bucket,
+ * the bucket offset (i.e. 3) will be skipped and it cannot
+ * expect the to_read number of bytes.
+ */
+ if (skel->bss->bucket[0] != skel->bss->bucket[1])
+ ASSERT_EQ(total_read, to_read, "total_read");
+
+done:
+ for (i = 0; i < ARRAY_SIZE(fds); i++)
+ free_fds(fds[i], nr_soreuse);
+ if (iter_fd < 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+void test_sock_iter_batch(void)
+{
+ struct nstoken *nstoken = NULL;
+
+ SYS_NOFAIL("ip netns del " TEST_NS);
+ SYS(done, "ip netns add %s", TEST_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_NS);
+
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto done;
+
+ if (test__start_subtest("tcp")) {
+ do_test(SOCK_STREAM, true);
+ do_test(SOCK_STREAM, false);
+ }
+ if (test__start_subtest("udp")) {
+ do_test(SOCK_DGRAM, true);
+ do_test(SOCK_DGRAM, false);
+ }
+ do_resume_tests();
+ close_netns(nstoken);
+
+done:
+ SYS_NOFAIL("ip netns del " TEST_NS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c
new file mode 100644
index 000000000000..788135c9c673
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#define TEST_NS "sock_post_bind"
+
+static char bpf_log_buf[4096];
+
+static struct sock_post_bind_test {
+ const char *descr;
+ /* BPF prog properties */
+ const struct bpf_insn insns[64];
+ enum bpf_attach_type attach_type;
+ enum bpf_attach_type expected_attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* Endpoint to bind() to */
+ const char *ip;
+ unsigned short port;
+ unsigned short port_retry;
+
+ /* Expected test result */
+ enum {
+ ATTACH_REJECT,
+ BIND_REJECT,
+ SUCCESS,
+ RETRY_SUCCESS,
+ RETRY_REJECT
+ } result;
+} tests[] = {
+ {
+ .descr = "attach type mismatch bind4 vs bind6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = ATTACH_REJECT,
+ },
+ {
+ .descr = "attach type mismatch bind6 vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
+ },
+ {
+ .descr = "attach type mismatch default vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
+ },
+ {
+ .descr = "attach type mismatch bind6 vs sock_create",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = ATTACH_REJECT,
+ },
+ {
+ .descr = "bind4 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = BIND_REJECT,
+ },
+ {
+ .descr = "bind6 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = BIND_REJECT,
+ },
+ {
+ .descr = "bind6 deny specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .result = BIND_REJECT,
+ },
+ {
+ .descr = "bind4 allow specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .result = SUCCESS,
+ },
+ {
+ .descr = "bind4 deny specific IP & port of TCP, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 deny specific IP & port of UDP, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind6 deny specific IP & port, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .port_retry = 9000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = SUCCESS,
+ },
+ {
+ .descr = "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = SUCCESS,
+ },
+};
+
+static int load_prog(const struct bpf_insn *insns,
+ enum bpf_attach_type expected_attach_type)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = expected_attach_type,
+ .log_level = 2,
+ .log_buf = bpf_log_buf,
+ .log_size = sizeof(bpf_log_buf),
+ );
+ int fd, insns_cnt = 0;
+
+ for (;
+ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ insns_cnt++) {
+ }
+ insns_cnt++;
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns,
+ insns_cnt, &opts);
+ if (fd < 0)
+ fprintf(stderr, "%s\n", bpf_log_buf);
+
+ return fd;
+}
+
+static int bind_sock(int domain, int type, const char *ip,
+ unsigned short port, unsigned short port_retry)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+ int sockfd = -1;
+ socklen_t len;
+ int res = SUCCESS;
+
+ sockfd = socket(domain, type, 0);
+ if (sockfd < 0)
+ goto err;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (domain == AF_INET) {
+ len = sizeof(struct sockaddr_in);
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+ goto err;
+ } else if (domain == AF_INET6) {
+ len = sizeof(struct sockaddr_in6);
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+ goto err;
+ } else {
+ goto err;
+ }
+
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (errno != EPERM)
+ goto err;
+ if (port_retry)
+ goto retry;
+ res = BIND_REJECT;
+ goto out;
+ }
+
+ goto out;
+retry:
+ if (domain == AF_INET)
+ addr4->sin_port = htons(port_retry);
+ else
+ addr6->sin6_port = htons(port_retry);
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ if (errno != EPERM)
+ goto err;
+ res = RETRY_REJECT;
+ } else {
+ res = RETRY_SUCCESS;
+ }
+ goto out;
+err:
+ res = -1;
+out:
+ close(sockfd);
+ return res;
+}
+
+static int run_test(int cgroup_fd, struct sock_post_bind_test *test)
+{
+ int err, prog_fd, res, ret = 0;
+
+ prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (prog_fd < 0)
+ goto err;
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (err < 0) {
+ if (test->result == ATTACH_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ res = bind_sock(test->domain, test->type, test->ip, test->port,
+ test->port_retry);
+ if (res > 0 && test->result == res)
+ goto out;
+err:
+ ret = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (prog_fd != -1)
+ bpf_prog_detach(cgroup_fd, test->attach_type);
+ close(prog_fd);
+ return ret;
+}
+
+void test_sock_post_bind(void)
+{
+ struct netns_obj *ns;
+ int cgroup_fd;
+ int i;
+
+ cgroup_fd = test__join_cgroup("/post_bind");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup"))
+ return;
+
+ ns = netns_new(TEST_NS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].descr))
+ continue;
+
+ ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
+ }
+
+cleanup:
+ netns_free(ns);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
new file mode 100644
index 000000000000..0d59503a0c73
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -0,0 +1,473 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SOCKET_HELPERS__
+#define __SOCKET_HELPERS__
+
+#include <sys/un.h>
+#include <linux/vm_sockets.h>
+
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+/* include/linux/compiler_types.h */
+#if __STDC_VERSION__ < 202311L && !defined(auto)
+# define auto __auto_type
+#endif
+
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue) \
+ ({ \
+ auto __ptr = &(p); \
+ auto __val = *__ptr; \
+ *__ptr = nullvalue; \
+ __val; \
+ })
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
+/* Wrappers that fail the test on error and report it. */
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+static inline void close_fd(int *fd)
+{
+ if (*fd >= 0)
+ xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->sun_family = AF_UNIX;
+ *len = sizeof(sa_family_t);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ case AF_UNIX:
+ init_addr_loopback_unix(ss, len);
+ return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = 0;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set wfds;
+ int r, eval;
+ socklen_t esize = sizeof(eval);
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+
+ r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+ if (r != 1)
+ return -1;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+ return -1;
+ if (eval != 0) {
+ errno = eval;
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+ __close_fd int s, c = -1, p = -1;
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return s;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ return c;
+
+ init_addr_loopback(family, &addr, &len);
+ err = xbind(c, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (err) {
+ if (errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ return err;
+ }
+
+ err = poll_connect(c, IO_TIMEOUT_SEC);
+ if (err) {
+ FAIL_ERRNO("poll_connect");
+ return err;
+ }
+ }
+
+ switch (sotype & SOCK_TYPE_MASK) {
+ case SOCK_DGRAM:
+ err = xgetsockname(c, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ err = xconnect(s, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ *p0 = take_fd(s);
+ break;
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ return p;
+
+ *p0 = take_fd(p);
+ break;
+ default:
+ FAIL("Unsupported socket type %#x", sotype);
+ return -EOPNOTSUPP;
+ }
+
+ *p1 = take_fd(c);
+ return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+ int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+
+ return err;
+}
+
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+ socklen_t opt_len;
+ int domain, type;
+
+ opt_len = sizeof(domain);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+ opt_len = sizeof(type);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+ switch (domain) {
+ case AF_INET:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp4";
+ case SOCK_DGRAM:
+ return "udp4";
+ }
+ break;
+ case AF_INET6:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp6";
+ case SOCK_DGRAM:
+ return "udp6";
+ }
+ break;
+ case AF_UNIX:
+ switch (type) {
+ case SOCK_STREAM:
+ return "u_str";
+ case SOCK_DGRAM:
+ return "u_dgr";
+ case SOCK_SEQPACKET:
+ return "u_seq";
+ }
+ break;
+ case AF_VSOCK:
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ case SOCK_SEQPACKET:
+ return "v_seq";
+ }
+ break;
+ }
+
+ return "???";
+}
+
+#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b8b48cac2ac3..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -2,13 +2,21 @@
// Copyright (c) 2020 Cloudflare
#include <error.h>
#include <netinet/tcp.h>
+#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
+#include "test_sockmap_pass_prog.skel.h"
+#include "test_sockmap_drop_prog.skel.h"
+#include "test_sockmap_change_tail.skel.h"
#include "bpf_iter_sockmap.skel.h"
+#include "sockmap_helpers.h"
+
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
@@ -25,21 +33,21 @@ static int connected_socket_v4(void)
int s, repair, err;
s = socket(AF_INET, SOCK_STREAM, 0);
- if (CHECK_FAIL(s == -1))
+ if (!ASSERT_GE(s, 0, "socket"))
goto error;
repair = TCP_REPAIR_ON;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
goto error;
err = connect(s, (struct sockaddr *)&addr, len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "connect"))
goto error;
repair = TCP_REPAIR_OFF_NO_WP;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
goto error;
return s;
@@ -52,7 +60,7 @@ error:
static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
{
__u32 i, max_entries = bpf_map__max_entries(src);
- int err, duration = 0, src_fd, dst_fd;
+ int err, src_fd, dst_fd;
src_fd = bpf_map__fd(src);
dst_fd = bpf_map__fd(dst);
@@ -63,20 +71,18 @@ static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
if (err && errno == ENOENT) {
err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
- CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
- CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
- strerror(errno));
+ ASSERT_ERR(err, "map_lookup_elem(dst)");
+ ASSERT_EQ(errno, ENOENT, "map_lookup_elem(dst)");
continue;
}
- if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+ if (!ASSERT_OK(err, "lookup_elem(src)"))
continue;
err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
- if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+ if (!ASSERT_OK(err, "lookup_elem(dst)"))
continue;
- CHECK(dst_cookie != src_cookie, "cookie mismatch",
- "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+ ASSERT_EQ(dst_cookie, src_cookie, "cookie mismatch");
}
}
@@ -87,71 +93,157 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
int s, map, err;
s = connected_socket_v4();
- if (CHECK_FAIL(s == -1))
+ if (!ASSERT_GE(s, 0, "connected_socket_v4"))
return;
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
- perror("bpf_create_map");
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
+ if (!ASSERT_GE(map, 0, "bpf_map_create"))
goto out;
- }
err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_update");
+ if (!ASSERT_OK(err, "bpf_map_update"))
goto out;
- }
out:
close(map);
close(s);
}
+static void test_sockmap_vsock_delete_on_close(void)
+{
+ int map, c, p, err, zero = 0;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+ goto close_socks;
+
+ xclose(c);
+ xclose(p);
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
+ ASSERT_OK(err, "after close(), bpf_map_update");
+
+close_socks:
+ xclose(c);
+ xclose(p);
+close_map:
+ xclose(map);
+}
+
static void test_skmsg_helpers(enum bpf_map_type map_type)
{
struct test_skmsg_load_helpers *skel;
int err, map, verdict;
skel = test_skmsg_load_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_skmsg_load_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
return;
- }
verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
map = bpf_map__fd(skel->maps.sock_map);
err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
- if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach");
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- }
err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
- if (CHECK_FAIL(err)) {
- perror("bpf_prog_detach2");
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_skmsg_load_helpers__destroy(skel);
+}
+
+static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
+{
+ struct bpf_program *prog, *prog_clone, *prog_clone2;
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct test_skmsg_load_helpers *skel;
+ struct bpf_link *link, *link2;
+ int err, map;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
+ return;
+
+ prog = skel->progs.prog_msg_verdict;
+ prog_clone = skel->progs.prog_msg_verdict_clone;
+ prog_clone2 = skel->progs.prog_msg_verdict_clone2;
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog has been created. */
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_ERR(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog type has been created. */
+ link2 = bpf_program__attach_sockmap(prog_clone, map);
+ if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) {
+ bpf_link__detach(link2);
goto out;
}
+
+ err = bpf_link__update_program(link, prog_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different type attempts to do update. */
+ err = bpf_link__update_program(link, skel->progs.prog_skb_verdict);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since the old prog does not match the one in the kernel. */
+ opts.old_prog_fd = bpf_program__fd(prog_clone2);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_ERR(err, "bpf_link_update"))
+ goto out;
+
+ opts.old_prog_fd = bpf_program__fd(prog_clone);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(err, "bpf_link_update"))
+ goto out;
out:
+ bpf_link__detach(link);
test_skmsg_load_helpers__destroy(skel);
}
static void test_sockmap_update(enum bpf_map_type map_type)
{
- struct bpf_prog_test_run_attr tattr;
- int err, prog, src, duration = 0;
+ int err, prog, src;
struct test_sockmap_update *skel;
struct bpf_map *dst_map;
const __u32 zero = 0;
char dummy[14] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ .repeat = 1,
+ );
__s64 sk;
sk = connected_socket_v4();
- if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+ if (!ASSERT_NEQ(sk, -1, "connected_socket_v4"))
return;
skel = test_sockmap_update__open_and_load();
- if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
goto close_sk;
prog = bpf_program__fd(skel->progs.copy_sock_map);
@@ -162,19 +254,13 @@ static void test_sockmap_update(enum bpf_map_type map_type)
dst_map = skel->maps.dst_sock_hash;
err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
- if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+ if (!ASSERT_OK(err, "update_elem(src)"))
goto out;
- tattr = (struct bpf_prog_test_run_attr){
- .prog_fd = prog,
- .repeat = 1,
- .data_in = dummy,
- .data_size_in = sizeof(dummy),
- };
-
- err = bpf_prog_test_run_xattr(&tattr);
- if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
- "errno=%u retval=%u\n", errno, tattr.retval))
+ err = bpf_prog_test_run_opts(prog, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ goto out;
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
goto out;
compare_cookies(skel->maps.src, dst_map);
@@ -188,17 +274,16 @@ close_sk:
static void test_sockmap_invalid_update(void)
{
struct test_sockmap_invalid_update *skel;
- int duration = 0;
skel = test_sockmap_invalid_update__open_and_load();
- if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+ if (!ASSERT_NULL(skel, "open_and_load"))
test_sockmap_invalid_update__destroy(skel);
}
static void test_sockmap_copy(enum bpf_map_type map_type)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
- int err, len, src_fd, iter_fd, duration = 0;
+ int err, len, src_fd, iter_fd;
union bpf_iter_link_info linfo = {};
__u32 i, num_sockets, num_elems;
struct bpf_iter_sockmap *skel;
@@ -208,7 +293,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
char buf[64];
skel = bpf_iter_sockmap__open_and_load();
- if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
return;
if (map_type == BPF_MAP_TYPE_SOCKMAP) {
@@ -222,7 +307,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
}
sock_fd = calloc(num_sockets, sizeof(*sock_fd));
- if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+ if (!ASSERT_OK_PTR(sock_fd, "calloc(sock_fd)"))
goto out;
for (i = 0; i < num_sockets; i++)
@@ -232,11 +317,11 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
for (i = 0; i < num_sockets; i++) {
sock_fd[i] = connected_socket_v4();
- if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+ if (!ASSERT_NEQ(sock_fd[i], -1, "connected_socket_v4"))
goto out;
err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
- if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+ if (!ASSERT_OK(err, "map_update"))
goto out;
}
@@ -244,26 +329,24 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.copy, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
- if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
- skel->bss->elems, num_elems))
+ if (!ASSERT_EQ(skel->bss->elems, num_elems, "elems"))
goto close_iter;
- if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
- skel->bss->socks, num_sockets))
+ if (!ASSERT_EQ(skel->bss->socks, num_sockets, "socks"))
goto close_iter;
compare_cookies(src, skel->maps.dst);
@@ -281,12 +364,692 @@ out:
bpf_iter_sockmap__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+ enum bpf_attach_type second)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ int err, map, verdict;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, first, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ err = bpf_prog_attach(verdict, map, second, 0);
+ ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
+
+ err = bpf_prog_detach2(verdict, map, first);
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_attach_with_link(void)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+ prog = skel->progs.prog_skb_verdict;
+ map = bpf_map__fd(skel->maps.sock_map);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ bpf_link__detach(link);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since attaching with the same prog/map has been done. */
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap"))
+ bpf_link__detach(link);
+
+ err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT);
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
+static __u32 query_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(info_len, sizeof(info), "bpf_prog_get_info_by_fd"))
+ return 0;
+
+ return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+ struct test_sockmap_progs_query *skel;
+ int err, map_fd, verdict_fd;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+
+ skel = test_sockmap_progs_query__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ if (attach_type == BPF_SK_MSG_VERDICT)
+ verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+ else
+ verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+ err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+ goto out;
+
+ prog_cnt = 1;
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+ ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+ "wrong prog_ids on query");
+
+ bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+ test_sockmap_progs_query__destroy(skel);
+}
+
+#define MAX_EVENTS 10
+static void test_sockmap_skb_verdict_shutdown(void)
+{
+ int n, err, map, verdict, c1 = -1, p1 = -1;
+ struct epoll_event ev, events[MAX_EVENTS];
+ struct test_sockmap_pass_prog *skel;
+ int zero = 0;
+ int epollfd;
+ char b;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (err < 0)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (err < 0)
+ goto out_close;
+
+ shutdown(p1, SHUT_WR);
+
+ ev.events = EPOLLIN;
+ ev.data.fd = c1;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
+ goto out_close;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
+ goto out_close;
+ err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
+ goto out_close;
+
+ n = recv(c1, &b, 1, MSG_DONTWAIT);
+ ASSERT_EQ(n, 0, "recv(fin)");
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ int expected, zero = 0, sent, recvd, avail;
+ struct test_sockmap_pass_prog *pass = NULL;
+ struct test_sockmap_drop_prog *drop = NULL;
+ char buf[256] = "0123456789";
+
+ if (pass_prog) {
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ expected = sizeof(buf);
+ } else {
+ drop = test_sockmap_drop_prog__open_and_load();
+ if (!ASSERT_OK_PTR(drop, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
+ map = bpf_map__fd(drop->maps.sock_map_rx);
+ /* On drop data is consumed immediately and copied_seq inc'd */
+ expected = 0;
+ }
+
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, &buf, sizeof(buf), 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
+ /* On DROP test there will be no data to read */
+ if (pass_prog) {
+ recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
+ }
+
+out_close:
+ close(c0);
+ close(p0);
+ close(c1);
+ close(p1);
+out:
+ if (pass_prog)
+ test_sockmap_pass_prog__destroy(pass);
+ else
+ test_sockmap_drop_prog__destroy(drop);
+}
+
+static void test_sockmap_skb_verdict_change_tail(void)
+{
+ struct test_sockmap_change_tail *skel;
+ int err, map, verdict;
+ int c1, p1, sent, recvd;
+ int zero = 0;
+ char buf[2];
+
+ skel = test_sockmap_change_tail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+ sent = xsend(p1, "Tr", 2, 0);
+ ASSERT_EQ(sent, 2, "xsend(p1)");
+ recvd = recv(c1, buf, 2, 0);
+ ASSERT_EQ(recvd, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ sent = xsend(p1, "G", 1, 0);
+ ASSERT_EQ(sent, 1, "xsend(p1)");
+ recvd = recv(c1, buf, 2, 0);
+ ASSERT_EQ(recvd, 2, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ sent = xsend(p1, "E", 1, 0);
+ ASSERT_EQ(sent, 1, "xsend(p1)");
+ recvd = recv(c1, buf, 1, 0);
+ ASSERT_EQ(recvd, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_change_tail__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_peek_helper(int map)
+{
+ int err, c1, p1, zero = 0, sent, recvd, avail;
+ char snd[256] = "0123456789";
+ char rcv[256] = "0";
+
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pair()"))
+ return;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, snd, sizeof(snd), 0);
+ ASSERT_EQ(sent, sizeof(snd), "xsend(p1)");
+ recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)");
+ recvd = recv(c1, rcv, sizeof(rcv), 0);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)");
+
+out_close:
+ close(c1);
+ close(p1);
+}
+
+static void test_sockmap_skb_verdict_peek(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ int err, map, verdict;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+
+out:
+ test_sockmap_pass_prog__destroy(pass);
+}
+
+static void test_sockmap_skb_verdict_peek_with_link(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ prog = pass->progs.prog_skb_verdict;
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ err = bpf_link__update_program(link, pass->progs.prog_skb_verdict_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different attach type attempts to do update. */
+ err = bpf_link__update_program(link, pass->progs.prog_skb_parser);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+ ASSERT_EQ(pass->bss->clone_called, 1, "clone_called");
+out:
+ bpf_link__detach(link);
+ test_sockmap_pass_prog__destroy(pass);
+}
+
+static void test_sockmap_unconnected_unix(void)
+{
+ int err, map, stream = 0, dgram = 0, zero = 0;
+ struct test_sockmap_pass_prog *skel;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ stream = xsocket(AF_UNIX, SOCK_STREAM, 0);
+ if (stream < 0)
+ return;
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ close(stream);
+ return;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY);
+ ASSERT_ERR(err, "bpf_map_update_elem(stream)");
+
+ err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+
+ close(stream);
+ close(dgram);
+}
+
+static void test_sockmap_many_socket(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map, entry = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err)
+ goto out;
+
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+ for (entry--; entry >= 0; entry--) {
+ err = bpf_map_delete_elem(map, &entry);
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ }
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(tcp);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_many_maps(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map[2], entry = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map[0] = bpf_map__fd(skel->maps.sock_map_rx);
+ map[1] = bpf_map__fd(skel->maps.sock_map_tx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err)
+ goto out;
+
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+ for (entry--; entry >= 0; entry--) {
+ err = bpf_map_delete_elem(map[1], &entry);
+ entry--;
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ err = bpf_map_delete_elem(map[0], &entry);
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ }
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(tcp);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_same_sock(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map, zero = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err) {
+ close(tcp);
+ goto out;
+ }
+
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+
+ close(tcp);
+ err = bpf_map_delete_elem(map, &zero);
+ ASSERT_ERR(err, "bpf_map_delete_elem(entry)");
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_vsock_poll(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int err, map, conn, peer;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ char buf = 'x';
+ int zero = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer))
+ goto destroy;
+
+ prog = skel->progs.prog_skb_verdict;
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto close;
+
+ err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto detach;
+
+ if (xsend(peer, &buf, 1, 0) != 1)
+ goto detach;
+
+ err = poll_read(conn, IO_TIMEOUT_SEC);
+ if (!ASSERT_OK(err, "poll"))
+ goto detach;
+
+ if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
+ FAIL("xrecv_nonblock");
+detach:
+ bpf_link__detach(link);
+close:
+ xclose(conn);
+ xclose(peer);
+destroy:
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_vsock_unconnected(void)
+{
+ struct sockaddr_storage addr;
+ int map, s, zero = 0;
+ socklen_t alen;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0)
+ goto close_map;
+
+ /* Fail connect(), but trigger transport assignment. */
+ init_addr_loopback(AF_VSOCK, &addr, &alen);
+ if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+ goto close_sock;
+
+ ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+ xclose(s);
+close_map:
+ xclose(map);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap vsock delete on close"))
+ test_sockmap_vsock_delete_on_close();
if (test__start_subtest("sockmap sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
@@ -301,4 +1064,48 @@ void test_sockmap_basic(void)
test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash copy"))
test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict attach")) {
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_VERDICT);
+ }
+ if (test__start_subtest("sockmap skb_verdict attach_with_link"))
+ test_sockmap_skb_verdict_attach_with_link();
+ if (test__start_subtest("sockmap msg_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+ if (test__start_subtest("sockmap stream_parser progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+ if (test__start_subtest("sockmap stream_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict shutdown"))
+ test_sockmap_skb_verdict_shutdown();
+ if (test__start_subtest("sockmap skb_verdict fionread"))
+ test_sockmap_skb_verdict_fionread(true);
+ if (test__start_subtest("sockmap skb_verdict fionread on drop"))
+ test_sockmap_skb_verdict_fionread(false);
+ if (test__start_subtest("sockmap skb_verdict change tail"))
+ test_sockmap_skb_verdict_change_tail();
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
+ test_sockmap_skb_verdict_peek();
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
+ test_sockmap_skb_verdict_peek_with_link();
+ if (test__start_subtest("sockmap unconnected af_unix"))
+ test_sockmap_unconnected_unix();
+ if (test__start_subtest("sockmap one socket to many map entries"))
+ test_sockmap_many_socket();
+ if (test__start_subtest("sockmap one socket to many maps"))
+ test_sockmap_many_maps();
+ if (test__start_subtest("sockmap same socket replace"))
+ test_sockmap_same_sock();
+ if (test__start_subtest("sockmap sk_msg attach sockmap helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict vsock poll"))
+ test_sockmap_skb_verdict_vsock_poll();
+ if (test__start_subtest("sockmap vsock unconnected"))
+ test_sockmap_vsock_unconnected();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
new file mode 100644
index 000000000000..d815efac52fd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -0,0 +1,83 @@
+#ifndef __SOCKMAP_HELPERS__
+#define __SOCKMAP_HELPERS__
+
+#include "socket_helpers.h"
+
+#define MAX_TEST_NAME 80
+
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
+
+#define __always_unused __attribute__((__unused__))
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
+{
+ int err;
+
+ err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
+ if (err)
+ return err;
+
+ return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
+}
+
+#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,122 +3,440 @@
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
-
+#include <error.h>
+#include <netinet/tcp.h>
+#include <linux/tls.h>
#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
{
- int err, s;
+ int err;
+ struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+ struct tls12_crypto_info_aes_gcm_128 crypto_tx;
- s = socket(family, SOCK_STREAM, 0);
- if (CHECK_FAIL(s == -1)) {
- perror("socket");
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ memset(&crypto_rx, 0, sizeof(crypto_rx));
+ memset(&crypto_tx, 0, sizeof(crypto_tx));
+ crypto_rx.info.version = TLS_1_2_VERSION;
+ crypto_tx.info.version = TLS_1_2_VERSION;
+ crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+ return 0;
+out:
+ return -1;
+}
+
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
+{
+ int err;
+
+ err = create_pair(family, sotype, c, p);
+ if (!ASSERT_OK(err, "create_pair()"))
return -1;
- }
- err = listen(s, SOMAXCONN);
- if (CHECK_FAIL(err)) {
- perror("listen");
+ err = init_ktls_pairs(*c, *p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
return -1;
+ return 0;
+}
+
+static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+ struct sockaddr_in6 *v6;
+ struct sockaddr_in *v4;
+ int err, s, zero = 0;
+
+ switch (family) {
+ case AF_INET:
+ v4 = (struct sockaddr_in *)&addr;
+ v4->sin_family = AF_INET;
+ break;
+ case AF_INET6:
+ v6 = (struct sockaddr_in6 *)&addr;
+ v6->sin6_family = AF_INET6;
+ break;
+ default:
+ PRINT_FAIL("unsupported socket family %d", family);
+ return;
}
- return s;
+ s = socket(family, SOCK_STREAM, 0);
+ if (!ASSERT_GE(s, 0, "socket"))
+ return;
+
+ err = bind(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ goto close;
+
+ err = getsockname(s, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close;
+
+ err = connect(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "connect"))
+ goto close;
+
+ /* save sk->sk_prot and set it to tls_prots */
+ err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto close;
+
+ /* sockmap update should not affect saved sk_prot */
+ err = bpf_map_update_elem(map, &zero, &s, BPF_ANY);
+ if (!ASSERT_ERR(err, "sockmap update elem"))
+ goto close;
+
+ /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */
+ err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ ASSERT_OK(err, "setsockopt(TCP_NODELAY)");
+
+close:
+ close(s);
}
-static int disconnect(int fd)
+static const char *fmt_test_name(const char *subtest_name, int family,
+ enum bpf_map_type map_type)
{
- struct sockaddr unspec = { AF_UNSPEC };
+ const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH";
+ const char *family_str = AF_INET ? "IPv4" : "IPv6";
+ static char test_name[MAX_TEST_NAME];
- return connect(fd, &unspec, sizeof(unspec));
+ snprintf(test_name, MAX_TEST_NAME,
+ "sockmap_ktls %s %s %s",
+ subtest_name, family_str, map_type_str);
+
+ return test_name;
}
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static void test_sockmap_ktls_offload(int family, int sotype)
{
- struct sockaddr_storage addr = {0};
- socklen_t len = sizeof(addr);
- int err, cli, srv, zero = 0;
+ int err;
+ int c = 0, p = 0, sent, recvd;
+ char msg[12] = "hello world\0";
+ char rcv[13];
+
+ err = create_ktls_pairs(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_ktls_pairs()"))
+ goto out;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_OK(err, "send(msg)"))
+ goto out;
+
+ recvd = recv(p, rcv, sizeof(rcv), 0);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sent, "length mismatch"))
+ goto out;
- srv = tcp_server(family);
- if (srv == -1)
+ ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+ int err, off;
+ int i, j;
+ int start_push = 0, push_len = 0;
+ int c = 0, p = 0, one = 1, sent, recvd;
+ int prog_fd, map_fd;
+ char msg[12] = "hello world\0";
+ char rcv[20] = {0};
+ struct test_sockmap_ktls *skel;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
return;
- err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (CHECK_FAIL(err)) {
- perror("getsockopt");
- goto close_srv;
- }
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
- cli = socket(family, SOCK_STREAM, 0);
- if (CHECK_FAIL(cli == -1)) {
- perror("socket");
- goto close_srv;
- }
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
- err = connect(cli, (struct sockaddr *)&addr, len);
- if (CHECK_FAIL(err)) {
- perror("connect");
- goto close_cli;
- }
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
- err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_update_elem");
- goto close_cli;
+ skel->bss->cork_byte = sizeof(msg);
+ if (push) {
+ start_push = 1;
+ push_len = 2;
}
+ skel->bss->push_start = start_push;
+ skel->bss->push_end = push_len;
- err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (CHECK_FAIL(err)) {
- perror("setsockopt(TCP_ULP)");
- goto close_cli;
+ off = sizeof(msg) / 2;
+ sent = send(c, msg, off, 0);
+ if (!ASSERT_EQ(sent, off, "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "expected no data"))
+ goto out;
+
+ /* send remaining msg */
+ sent = send(c, msg + off, sizeof(msg) - off, 0);
+ if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+ goto out;
+
+ for (i = 0, j = 0; i < recvd;) {
+ /* skip checking the data that has been pushed in */
+ if (i >= start_push && i <= start_push + push_len - 1) {
+ i++;
+ continue;
+ }
+ if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+ goto out;
+ i++;
+ j++;
}
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+ int c = -1, p = -1, one = 1, two = 2;
+ struct test_sockmap_ktls *skel;
+ unsigned char *data = NULL;
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ int prog_fd, map_fd;
+ int txrx_buf = 1024;
+ int iov_length = 8192;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+ err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+ if (!ASSERT_OK(err, "set buf limit"))
+ goto out;
- err = bpf_map_delete_elem(map, &zero);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_delete_elem");
- goto close_cli;
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out;
+
+ skel->bss->apply_bytes = 1024;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ data = calloc(iov_length, sizeof(char));
+ if (!data)
+ goto out;
+
+ iov[0].iov_base = data;
+ iov[0].iov_len = iov_length;
+ iov[1].iov_base = data;
+ iov[1].iov_len = iov_length;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+
+ for (;;) {
+ err = sendmsg(c, &msg, MSG_DONTWAIT);
+ if (err <= 0)
+ break;
}
- err = disconnect(cli);
- if (CHECK_FAIL(err))
- perror("disconnect");
+out:
+ if (data)
+ free(data);
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
-close_cli:
- close(cli);
-close_srv:
- close(srv);
+ test_sockmap_ktls__destroy(skel);
}
-static void run_tests(int family, enum bpf_map_type map_type)
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
{
- char test_name[MAX_TEST_NAME];
- int map;
+ char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+ int c = 0, p = 0, one = 1, sent, recvd;
+ struct test_sockmap_ktls *skel;
+ int prog_fd, map_fd;
+ char rcv[50] = {0};
+ int err;
+ int i, m, r;
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
- perror("bpf_map_create");
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ struct {
+ int pop_start;
+ int pop_len;
+ } pop_policy[] = {
+ /* trim the start */
+ {0, 2},
+ {0, 10},
+ {1, 2},
+ {1, 10},
+ /* trim the end */
+ {35, 2},
+ /* New entries should be added before this line */
+ {-1, -1},
+ };
+
+ i = 0;
+ while (pop_policy[i].pop_start >= 0) {
+ skel->bss->pop_start = pop_policy[i].pop_start;
+ skel->bss->pop_end = pop_policy[i].pop_len;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+ goto out;
+
+ /* verify the data
+ * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+ * | |
+ * popped data
+ */
+ for (m = 0, r = 0; m < sizeof(msg);) {
+ /* skip checking the data that has been popped */
+ if (m >= pop_policy[i].pop_start &&
+ m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+ m++;
+ continue;
+ }
+
+ if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+ goto out;
+ m++;
+ r++;
+ }
+ i++;
}
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
- snprintf(test_name, MAX_TEST_NAME,
- "sockmap_ktls disconnect_after_delete %s %s",
- family == AF_INET ? "IPv4" : "IPv6",
- map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
- if (!test__start_subtest(test_name))
+static void run_tests(int family, enum bpf_map_type map_type)
+{
+ int map;
+
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
+ if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- test_sockmap_ktls_disconnect_after_delete(family, map);
+ if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
+ test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
+static void run_ktls_test(int family, int sotype)
+{
+ if (test__start_subtest("tls simple offload"))
+ test_sockmap_ktls_offload(family, sotype);
+ if (test__start_subtest("tls tx cork"))
+ test_sockmap_ktls_tx_cork(family, sotype, false);
+ if (test__start_subtest("tls tx cork with push"))
+ test_sockmap_ktls_tx_cork(family, sotype, true);
+ if (test__start_subtest("tls tx egress with no buf"))
+ test_sockmap_ktls_tx_no_buf(family, sotype, true);
+ if (test__start_subtest("tls tx with pop"))
+ test_sockmap_ktls_tx_pop(family, sotype);
+}
+
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+ run_ktls_test(AF_INET, SOCK_STREAM);
+ run_ktls_test(AF_INET6, SOCK_STREAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..f1bdccc7e4e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -18,6 +18,7 @@
#include <string.h>
#include <sys/select.h>
#include <unistd.h>
+#include <linux/vm_sockets.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -26,302 +27,12 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
-#define MAX_TEST_NAME 80
-
-#define _FAIL(errnum, fmt...) \
- ({ \
- error_at_line(0, (errnum), __func__, __LINE__, fmt); \
- CHECK_FAIL(true); \
- })
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg) \
- ({ \
- char __buf[MAX_STRERR_LEN]; \
- libbpf_strerror((err), __buf, sizeof(__buf)); \
- FAIL("%s: %s", (msg), __buf); \
- })
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len) \
- ({ \
- int __ret = \
- accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("accept"); \
- __ret; \
- })
-
-#define xbind(fd, addr, len) \
- ({ \
- int __ret = bind((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("bind"); \
- __ret; \
- })
-
-#define xclose(fd) \
- ({ \
- int __ret = close((fd)); \
- if (__ret == -1) \
- FAIL_ERRNO("close"); \
- __ret; \
- })
-
-#define xconnect(fd, addr, len) \
- ({ \
- int __ret = connect((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("connect"); \
- __ret; \
- })
-
-#define xgetsockname(fd, addr, len) \
- ({ \
- int __ret = getsockname((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockname"); \
- __ret; \
- })
-
-#define xgetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = getsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xlisten(fd, backlog) \
- ({ \
- int __ret = listen((fd), (backlog)); \
- if (__ret == -1) \
- FAIL_ERRNO("listen"); \
- __ret; \
- })
-
-#define xsetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = setsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("setsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xsend(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = send((fd), (buf), (len), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("send"); \
- __ret; \
- })
-
-#define xrecv_nonblock(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
- IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("recv"); \
- __ret; \
- })
-
-#define xsocket(family, sotype, flags) \
- ({ \
- int __ret = socket(family, sotype, flags); \
- if (__ret == -1) \
- FAIL_ERRNO("socket"); \
- __ret; \
- })
-
-#define xbpf_map_delete_elem(fd, key) \
- ({ \
- int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_delete"); \
- __ret; \
- })
-
-#define xbpf_map_lookup_elem(fd, key, val) \
- ({ \
- int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_lookup"); \
- __ret; \
- })
-
-#define xbpf_map_update_elem(fd, key, val, flags) \
- ({ \
- int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_update"); \
- __ret; \
- })
-
-#define xbpf_prog_attach(prog, target, type, flags) \
- ({ \
- int __ret = \
- bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("prog_attach(" #type ")"); \
- __ret; \
- })
-
-#define xbpf_prog_detach2(prog, target, type) \
- ({ \
- int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret == -1) \
- FAIL_ERRNO("prog_detach2(" #type ")"); \
- __ret; \
- })
-
-#define xpthread_create(thread, attr, func, arg) \
- ({ \
- int __ret = pthread_create((thread), (attr), (func), (arg)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_create"); \
- __ret; \
- })
-
-#define xpthread_join(thread, retval) \
- ({ \
- int __ret = pthread_join((thread), (retval)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_join"); \
- __ret; \
- })
-
-static int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
-static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
- addr4->sin_family = AF_INET;
- addr4->sin_port = 0;
- addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- *len = sizeof(*addr4);
-}
-
-static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
- addr6->sin6_family = AF_INET6;
- addr6->sin6_port = 0;
- addr6->sin6_addr = in6addr_loopback;
- *len = sizeof(*addr6);
-}
-
-static void init_addr_loopback(int family, struct sockaddr_storage *ss,
- socklen_t *len)
-{
- switch (family) {
- case AF_INET:
- init_addr_loopback4(ss, len);
- return;
- case AF_INET6:
- init_addr_loopback6(ss, len);
- return;
- default:
- FAIL("unsupported address family %d", family);
- }
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
- return (struct sockaddr *)ss;
-}
-
-static int enable_reuseport(int s, int progfd)
-{
- int err, one = 1;
-
- err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
- if (err)
- return -1;
- err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
- sizeof(progfd));
- if (err)
- return -1;
-
- return 0;
-}
-
-static int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err, s;
-
- init_addr_loopback(family, &addr, &len);
-
- s = xsocket(family, sotype, 0);
- if (s == -1)
- return -1;
-
- if (progfd >= 0)
- enable_reuseport(s, progfd);
-
- err = xbind(s, sockaddr(&addr), len);
- if (err)
- goto close;
-
- if (sotype & SOCK_DGRAM)
- return s;
+#include "sockmap_helpers.h"
- err = xlisten(s, SOMAXCONN);
- if (err)
- goto close;
-
- return s;
-close:
- xclose(s);
- return -1;
-}
+#define NO_FLAGS 0
-static int socket_loopback(int family, int sotype)
-{
- return socket_loopback_reuseport(family, sotype, -1);
-}
-
-static void test_insert_invalid(int family, int sotype, int mapfd)
+static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -338,7 +49,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd)
FAIL_ERRNO("map_update: expected EBADF");
}
-static void test_insert_opened(int family, int sotype, int mapfd)
+static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -351,16 +63,19 @@ static void test_insert_opened(int family, int sotype, int mapfd)
errno = 0;
value = s;
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
- if (!err || errno != EOPNOTSUPP)
- FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+ if (sotype == SOCK_STREAM) {
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+ } else if (err)
+ FAIL_ERRNO("map_update: expected success");
xclose(s);
}
-static void test_insert_bound(int family, int sotype, int mapfd)
+static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
- socklen_t len;
+ socklen_t len = 0;
u32 key = 0;
u64 value;
int err, s;
@@ -384,7 +99,8 @@ close:
xclose(s);
}
-static void test_insert(int family, int sotype, int mapfd)
+static void test_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -400,7 +116,8 @@ static void test_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_insert(int family, int sotype, int mapfd)
+static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -417,7 +134,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_close(int family, int sotype, int mapfd)
+static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -440,7 +158,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
}
-static void test_lookup_after_insert(int family, int sotype, int mapfd)
+static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 cookie, value;
socklen_t len;
@@ -468,7 +187,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_after_delete(int family, int sotype, int mapfd)
+static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -491,7 +211,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key, value32;
int err, s;
@@ -500,8 +221,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
if (s < 0)
return;
- mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
- sizeof(value32), 1, 0);
+ mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
+ sizeof(value32), 1, NULL);
if (mapfd < 0) {
FAIL_ERRNO("map_create");
goto close;
@@ -521,7 +242,8 @@ close:
xclose(s);
}
-static void test_update_existing(int family, int sotype, int mapfd)
+static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int s1, s2;
u64 value;
@@ -549,7 +271,7 @@ close_s1:
/* Exercise the code path where we destroy child sockets that never
* got accept()'ed, aka orphans, when parent socket gets closed.
*/
-static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+static void do_destroy_orphan_child(int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -580,10 +302,38 @@ close_srv:
xclose(s);
}
+static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd)
+{
+ int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ const struct test {
+ int progfd;
+ enum bpf_attach_type atype;
+ } tests[] = {
+ { -1, -1 },
+ { msg_verdict, BPF_SK_MSG_VERDICT },
+ { skb_verdict, BPF_SK_SKB_VERDICT },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (t->progfd != -1 &&
+ xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
+ return;
+
+ do_destroy_orphan_child(family, sotype, mapfd);
+
+ if (t->progfd != -1)
+ xbpf_prog_detach2(t->progfd, mapfd, t->atype);
+ }
+}
+
/* Perform a passive open after removing listening socket from SOCKMAP
* to ensure that callbacks get restored properly.
*/
-static void test_clone_after_delete(int family, int sotype, int mapfd)
+static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -619,7 +369,8 @@ close_srv:
* SOCKMAP, but got accept()'ed only after the parent has been removed
* from SOCKMAP, gets cloned without parent psock state or callbacks.
*/
-static void test_accept_after_delete(int family, int sotype, int mapfd)
+static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0;
@@ -673,7 +424,8 @@ close_srv:
/* Check that child socket that got created and accepted while parent
* was in a SOCKMAP is cloned without parent psock state or callbacks.
*/
-static void test_accept_before_delete(int family, int sotype, int mapfd)
+static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0, one = 1;
@@ -782,7 +534,8 @@ done:
return NULL;
}
-static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
struct sockaddr_storage addr;
@@ -845,7 +598,8 @@ static void *listen_thread(void *arg)
return NULL;
}
-static void test_race_insert_listen(int family, int socktype, int mapfd)
+static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
+ int family, int socktype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
const u32 zero = 0;
@@ -923,59 +677,22 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- struct sockaddr_storage addr;
- int s, c0, c1, p0, p1;
+ int c0, c1, p0, p1;
unsigned int pass;
- socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
zero_verdict_count(verd_mapfd);
- s = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (s < 0)
- return;
-
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- goto close_srv;
-
- c0 = xsocket(family, sotype, 0);
- if (c0 < 0)
- goto close_srv;
- err = xconnect(c0, sockaddr(&addr), len);
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
if (err)
- goto close_cli0;
-
- p0 = xaccept_nonblock(s, NULL, NULL);
- if (p0 < 0)
- goto close_cli0;
-
- c1 = xsocket(family, sotype, 0);
- if (c1 < 0)
- goto close_peer0;
- err = xconnect(c1, sockaddr(&addr), len);
- if (err)
- goto close_cli1;
-
- p1 = xaccept_nonblock(s, NULL, NULL);
- if (p1 < 0)
- goto close_cli1;
-
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
+ return;
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
- goto close_peer1;
+ goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0)
@@ -983,39 +700,33 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
- goto close_peer1;
+ goto close;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
- goto close_peer1;
+ goto close;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = read(c0, &b, 1);
+ n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
- FAIL_ERRNO("%s: read", log_prefix);
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
- FAIL("%s: incomplete read", log_prefix);
+ FAIL("%s: incomplete recv", log_prefix);
-close_peer1:
+close:
xclose(p1);
-close_cli1:
xclose(c1);
-close_peer0:
xclose(p0);
-close_cli0:
xclose(c0);
-close_srv:
- xclose(s);
}
static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1053,6 +764,24 @@ static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int link_fd;
+
+ link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ return;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ close(link_fd);
+}
+
static void redir_to_listening(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -1061,7 +790,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +814,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1125,8 +845,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1164,6 +884,92 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ struct bpf_program *verdict = skel->progs.prog_msg_verdict;
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ struct bpf_link *link;
+
+ link = bpf_program__attach_sockmap(verdict, sock_map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ return;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ bpf_link__detach(link);
+}
+
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+ int c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ int err, n, key, value;
+ char buf[] = "abc";
+
+ key = 0;
+ value = sizeof(buf) - 1;
+ err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+ if (err)
+ return;
+
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
+ if (err)
+ goto clean_parser_map;
+
+ err = add_to_sockmap(sock_map, p0, p1);
+ if (err)
+ goto close;
+
+ n = xsend(c1, buf, sizeof(buf), 0);
+ if (n == -1)
+ goto close;
+ if (n < sizeof(buf))
+ FAIL("incomplete write");
+
+ n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+ if (n != sizeof(buf) - 1)
+ FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+ xclose(c0);
+ xclose(p0);
+ xclose(c1);
+ xclose(p1);
+
+clean_parser_map:
+ key = 0;
+ value = 0;
+ xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+ int parser_map = bpf_map__fd(skel->maps.parser_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_partial(family, sotype, sock_map, parser_map);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
static void test_reuseport_select_listening(int family, int sotype,
int sock_map, int verd_map,
int reuseport_prog)
@@ -1346,7 +1152,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1165,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1420,14 +1219,12 @@ close_srv1:
static void test_ops_cleanup(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
int err, mapfd;
u32 key;
- def = bpf_map__def(map);
mapfd = bpf_map__fd(map);
- for (key = 0; key < def->max_entries; key++) {
+ for (key = 0; key < bpf_map__max_entries(map); key++) {
err = bpf_map_delete_elem(mapfd, &key);
if (err && errno != EINVAL && errno != ENOENT)
FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1441,6 +1238,10 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
+ case AF_VSOCK:
+ return "VSOCK";
default:
return "unknown";
}
@@ -1448,13 +1249,13 @@ static const char *family_str(sa_family_t family)
static const char *map_type_str(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
+ int type;
- def = bpf_map__def(map);
- if (IS_ERR(def))
+ if (!map)
return "invalid";
+ type = bpf_map__type(map);
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
return "sockmap";
case BPF_MAP_TYPE_SOCKHASH:
@@ -1480,7 +1281,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
int family, int sotype)
{
const struct op_test {
- void (*fn)(int family, int sotype, int mapfd);
+ void (*fn)(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd);
const char *name;
int sotype;
} tests[] = {
@@ -1527,7 +1329,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
if (!test__start_subtest(s))
continue;
- t->fn(family, sotype, map_fd);
+ t->fn(skel, family, sotype, map_fd);
test_ops_cleanup(map);
}
}
@@ -1542,8 +1344,11 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
} tests[] = {
TEST(test_skb_redir_to_connected),
TEST(test_skb_redir_to_listening),
+ TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
+ TEST(test_msg_redir_to_connected_with_link),
TEST(test_msg_redir_to_listening),
+ TEST(test_msg_redir_to_listening_with_link),
};
const char *family_name, *map_name;
const struct redir_test *t;
@@ -1613,7 +1418,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_reuseport(skel, map, family, SOCK_DGRAM);
}
-void test_sockmap_listen(void)
+void serial_test_sockmap_listen(void)
{
struct test_sockmap_listen *skel;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ * x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ * x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED _BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD _BITUL(1)
+
+enum prog_type {
+ SK_MSG_EGRESS,
+ SK_MSG_INGRESS,
+ SK_SKB_EGRESS,
+ SK_SKB_INGRESS,
+};
+
+enum {
+ SEND_INNER = 0,
+ SEND_OUTER,
+};
+
+enum {
+ RECV_INNER = 0,
+ RECV_OUTER,
+};
+
+struct maps {
+ int in;
+ int out;
+ int verd;
+};
+
+struct combo_spec {
+ enum prog_type prog_type;
+ const char *in, *out;
+};
+
+struct redir_spec {
+ const char *name;
+ int idx_send;
+ int idx_recv;
+ enum prog_type prog_type;
+};
+
+struct socket_spec {
+ int family;
+ int sotype;
+ int send_flags;
+ int in[2];
+ int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+ return create_socket_pairs(s->family, s->sotype,
+ &s->in[0], &s->out[0],
+ &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+ xclose(s->in[0]);
+ xclose(s->in[1]);
+ xclose(s->out[0]);
+ xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+ struct test_sockmap_redir *skel, int *prog_fd,
+ enum bpf_attach_type *attach_type,
+ int *redirect_flags)
+{
+ enum prog_type type = redir->prog_type;
+ struct bpf_program *prog;
+ bool sk_msg;
+
+ sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+ prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+ *prog_fd = bpf_program__fd(prog);
+ *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+ if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+ *redirect_flags = BPF_F_INGRESS;
+ else
+ *redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+ ssize_t n;
+ char buf;
+
+ errno = 0;
+ n = recv(fd, &buf, 1, flags);
+ if (n < 0 && expect_success)
+ FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+ if (!n && !expect_success)
+ FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+ int sd_recv, int map_verd, int status)
+{
+ unsigned int drop, pass;
+ char recv_buf;
+ ssize_t n;
+
+get_verdict:
+ if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+ return;
+
+ if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+ sched_yield();
+ goto get_verdict;
+ }
+
+ if (pass != 0) {
+ FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+ return;
+ }
+
+ /* If nothing was dropped, packet should have reached the peer */
+ if (drop == 0) {
+ errno = 0;
+ n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1)
+ FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+ }
+
+ /* Ensure queues are empty */
+ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+ if (sd_in != sd_send)
+ try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+ try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+ if (sd_recv != sd_out)
+ try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+ int sd_in, int sd_out, int sd_recv,
+ struct maps *maps, int status)
+{
+ unsigned int drop, pass;
+ char *send_buf = "ab";
+ char recv_buf = '\0';
+ ssize_t n, len = 1;
+
+ /* Zero out the verdict map */
+ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+ xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+ return;
+
+ if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+ return;
+
+ if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+ goto del_in;
+
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ if (send_flags & MSG_OOB)
+ len++;
+ n = send(sd_send, send_buf, len, send_flags);
+ if (n >= 0 && n < len)
+ FAIL("incomplete send");
+ if (n < 0) {
+ /* sk_msg redirect combo not supported? */
+ if (status & SUPPORTED || errno != EACCES)
+ FAIL_ERRNO("send");
+ goto out;
+ }
+
+ if (!(status & SUPPORTED)) {
+ handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+ maps->verd, status);
+ goto out;
+ }
+
+ errno = 0;
+ n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1) {
+ FAIL_ERRNO("recv_timeout()");
+ goto out;
+ }
+
+ /* Check verdict _after_ recv(); af_vsock may need time to catch up */
+ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+ goto out;
+
+ if (drop != 0 || pass != 1)
+ FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+ drop, pass);
+
+ if (recv_buf != send_buf[0])
+ FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+ if (send_flags & MSG_OOB) {
+ /* Fail reading OOB while in sockmap */
+ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ /* Remove sd_out from sockmap */
+ xbpf_map_delete_elem(maps->out, &u32(0));
+
+ /* Check that OOB was dropped on redirect */
+ try_recv("recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ goto del_in;
+ }
+out:
+ xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+ xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+ const char *out)
+{
+ /* Matching based on strings returned by socket_kind_to_str():
+ * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+ * Plus a wildcard: any
+ * Not in use: u_seq, v_dgr
+ */
+ struct combo_spec *c, combos[] = {
+ /* Send to local: TCP -> any, but vsock */
+ { SK_MSG_INGRESS, "tcp", "tcp" },
+ { SK_MSG_INGRESS, "tcp", "udp" },
+ { SK_MSG_INGRESS, "tcp", "u_str" },
+ { SK_MSG_INGRESS, "tcp", "u_dgr" },
+
+ /* Send to egress: TCP -> TCP */
+ { SK_MSG_EGRESS, "tcp", "tcp" },
+
+ /* Ingress to egress: any -> any */
+ { SK_SKB_EGRESS, "any", "any" },
+
+ /* Ingress to local: any -> any, but vsock */
+ { SK_SKB_INGRESS, "any", "tcp" },
+ { SK_SKB_INGRESS, "any", "udp" },
+ { SK_SKB_INGRESS, "any", "u_str" },
+ { SK_SKB_INGRESS, "any", "u_dgr" },
+ };
+
+ for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+ if (c->prog_type == type &&
+ (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+ (!strcmp(c->out, "any") || strstarts(out, c->out)))
+ return SUPPORTED;
+ }
+
+ return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+ const char *out)
+{
+ int status = is_redir_supported(type, in, out);
+
+ if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+ status |= UNSUPPORTED_RACY_VERD;
+
+ return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps, struct socket_spec *s_in,
+ struct socket_spec *s_out)
+{
+ int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+ const char *in_str, *out_str;
+ char s[MAX_TEST_NAME];
+
+ fd_in = s_in->in[0];
+ fd_out = s_out->out[0];
+ fd_send = s_in->in[redir->idx_send];
+ fd_peer = s_in->in[redir->idx_send ^ 1];
+ fd_recv = s_out->out[redir->idx_recv];
+ flags = s_in->send_flags;
+
+ in_str = socket_kind_to_str(fd_in);
+ out_str = socket_kind_to_str(fd_out);
+ status = get_support_status(redir->prog_type, in_str, out_str);
+
+ snprintf(s, sizeof(s),
+ "%-4s %-17s %-5s %s %-5s%6s",
+ /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+ redir->name,
+ in_str,
+ status & SUPPORTED ? "→" : " ",
+ out_str,
+ (flags & MSG_OOB) ? "(OOB)" : "");
+
+ if (!test__start_subtest(s))
+ return;
+
+ test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+ maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps)
+{
+ struct socket_spec *s, sockets[] = {
+ { AF_INET, SOCK_STREAM },
+ // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+ { AF_INET6, SOCK_STREAM },
+ { AF_INET, SOCK_DGRAM },
+ { AF_INET6, SOCK_DGRAM },
+ { AF_UNIX, SOCK_STREAM },
+ { AF_UNIX, SOCK_STREAM, MSG_OOB },
+ { AF_UNIX, SOCK_DGRAM },
+ // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
+ { AF_VSOCK, SOCK_STREAM },
+ // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
+ { AF_VSOCK, SOCK_SEQPACKET },
+ };
+
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ if (socket_spec_pairs(s))
+ goto out;
+
+ /* Intra-proto */
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ test_socket(type, redir, maps, s, s);
+
+ /* Cross-proto */
+ for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+ for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+ struct socket_spec *out = &sockets[j];
+ struct socket_spec *in = &sockets[i];
+
+ /* Skip intra-proto and between variants */
+ if (out->send_flags ||
+ (in->family == out->family &&
+ in->sotype == out->sotype))
+ continue;
+
+ test_socket(type, redir, maps, in, out);
+ }
+ }
+out:
+ while (--s >= sockets)
+ socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+ struct redir_spec *r, redirs[] = {
+ { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+ { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+ { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+ { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+ };
+
+ for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+ enum bpf_attach_type attach_type;
+ struct test_sockmap_redir *skel;
+ struct maps maps;
+ int prog_fd;
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!skel) {
+ FAIL("open_and_load");
+ return;
+ }
+
+ switch (type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ break;
+ case BPF_MAP_TYPE_SOCKHASH:
+ maps.in = bpf_map__fd(skel->maps.nop_hash);
+ maps.out = bpf_map__fd(skel->maps.sock_hash);
+ break;
+ default:
+ FAIL("Unsupported bpf_map_type");
+ return;
+ }
+
+ skel->bss->redirect_type = type;
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ get_redir_params(r, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+
+ if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+ return;
+
+ test_redir(type, r, &maps);
+
+ if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+ return;
+
+ test_sockmap_redir__destroy(skel);
+ }
+}
+
+void serial_test_sockmap_redir(void)
+{
+ test_map(BPF_MAP_TYPE_SOCKMAP);
+ test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+ int recvd;
+ bool retried = false;
+ char rcv[10];
+
+retry:
+ errno = 0;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+ if (recvd < 0 && errno == EAGAIN && retried == false) {
+ /* On the first call, EAGAIN will certainly be returned.
+ * A 1-second wait is enough for the workqueue to finish.
+ */
+ sleep(1);
+ retried = true;
+ goto retry;
+ }
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "data mismatch"))
+ return -1;
+ return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+ bool need_parser)
+{
+ struct test_sockmap_strp *strp = NULL;
+ int verdict, parser;
+ int err;
+
+ strp = test_sockmap_strp__open_and_load();
+ *out_map = bpf_map__fd(strp->maps.sock_map);
+
+ if (need_parser)
+ parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+ else
+ parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+ if (pass)
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+ else
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+ err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+ goto err;
+
+ err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto err;
+
+ return strp;
+err:
+ test_sockmap_strp__destroy(strp);
+ return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ * ------ ------
+ * | pkt4 || pkt1 |... > remote socket
+ * ------ ------ / ------ ------
+ * | pkt8 | pkt7 |...
+ * ------ ------ \ ------ ------
+ * | pkt3 || pkt2 |... > local socket
+ * ------ ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+ int i, j, zero = 0, one = 1, recvd;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ int test_cnt = 6;
+ char rcv[10];
+ struct {
+ char data[7];
+ int data_len;
+ int send_cnt;
+ int *receiver;
+ } send_dir[2] = {
+ /* data expected to deliver to local */
+ {"llllll", 6, 0, &p0},
+ /* data expected to deliver to remote */
+ {"rrrrr", 5, 0, &c1}
+ };
+
+ strp = sockmap_strp_init(&map, false, false);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+ goto out_close;
+
+ /* deliver data with data size greater than 5 to local */
+ strp->data->verdict_max_size = 5;
+
+ for (i = 0; i < test_cnt; i++) {
+ int d = i % 2;
+
+ xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+ send_dir[d].send_cnt++;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < send_dir[i].send_cnt; j++) {
+ int expected = send_dir[i].data_len;
+
+ recvd = recv_timeout(*send_dir[i].receiver, rcv,
+ expected, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+ goto out_close;
+ if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+ "data mismatch"))
+ goto out_close;
+ }
+ }
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * | packet1 | packet2 | ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+ int i, zero = 0;
+ int sent, recvd, total;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char *snd = NULL, *rcv = NULL;
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* construct multiple packets in one buffer */
+ total = test_packet_num * STRP_PKT_FULL_LEN;
+ snd = malloc(total);
+ rcv = malloc(total + 1);
+ if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+ !ASSERT_TRUE(rcv, "malloc(rcv)"))
+ goto out_close;
+
+ for (i = 0; i < test_packet_num; i++) {
+ memcpy(snd + i * STRP_PKT_FULL_LEN,
+ packet, STRP_PKT_FULL_LEN);
+ }
+
+ sent = xsend(c, snd, total, 0);
+ if (!ASSERT_EQ(sent, total, "xsend(c)"))
+ goto out_close;
+
+ /* try to recv one more byte to avoid truncation check */
+ recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+ goto out_close;
+
+ /* we sent TCP segment with multiple encapsulation
+ * then check whether packets are handled correctly
+ */
+ if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+ if (snd)
+ free(snd);
+ if (rcv)
+ free(rcv);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+ int zero = 0, recvd, off;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* 1.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+ goto out_close;
+
+ /* 1.2 send remaining head and body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+ /* 2.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+
+ /* 2.2 send remaining head and partial body, 1 byte body left */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+ off = STRP_PKT_FULL_LEN - 1;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+ goto out_close;
+
+ /* 2.3 send remaining body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+ int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+ int err, map;
+ int c = -1, p = -1;
+ int test_cnt = 10, i;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* inject some data before bpf process, it should be read
+ * correctly because we check sk_receive_queue in
+ * tcp_bpf_recvmsg_parser().
+ */
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+ goto out_close;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out_close;
+
+ /* consume previous data we injected */
+ if (sockmap_strp_consume_pre_data(p))
+ goto out_close;
+
+ /* Previously, we encountered issues such as deadlocks and
+ * sequence errors that resulted in the inability to read
+ * continuously. Therefore, we perform multiple iterations
+ * of testing here.
+ */
+ for (i = 0; i < test_cnt; i++) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "memcmp, data mismatch"))
+ goto out_close;
+ }
+
+ if (fionread) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+ goto out_close;
+
+ err = ioctl(p, FIONREAD, &avail);
+ if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+ !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "second memcmp, data mismatch"))
+ goto out_close;
+ }
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+ int zero = 0, one = 1, sent, recvd, off;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, false, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ /* We simulate a reverse proxy server.
+ * When p0 receives data from c0, we forward it to c1.
+ * From c1's perspective, it will consider this data
+ * as being sent by p1.
+ */
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+ goto out_close;
+
+ recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "received data does not match the sent data"))
+ goto out_close;
+
+ /* send again to ensure the stream is functioning correctly. */
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+ goto out_close;
+
+ /* partial read */
+ off = STRP_PKT_FULL_LEN / 2;
+ recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "partial received data does not match the sent data"))
+ goto out_close;
+
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+ if (test__start_subtest("sockmap strp tcp pass"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp v6 pass"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp pass fionread"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp verdict"))
+ test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp v6 verdict"))
+ test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp partial read"))
+ test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp multiple packets"))
+ test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp dispatch"))
+ test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 3e8517a8395a..eaac83a7f388 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -1,14 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <io_uring/mini_liburing.h>
#include "cgroup_helpers.h"
static char bpf_log_buf[4096];
static bool verbose;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
enum sockopt_test_error {
OK = 0,
DENY_LOAD,
DENY_ATTACH,
+ EOPNOTSUPP_GETSOCKOPT,
EPERM_GETSOCKOPT,
EFAULT_GETSOCKOPT,
EPERM_SETSOCKOPT,
@@ -18,6 +24,7 @@ enum sockopt_test_error {
static struct sockopt_test {
const char *descr;
const struct bpf_insn insns[64];
+ enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
enum bpf_attach_type expected_attach_type;
@@ -33,6 +40,7 @@ static struct sockopt_test {
socklen_t get_optlen_ret;
enum sockopt_test_error error;
+ bool io_uring_support;
} tests[] = {
/* ==================== getsockopt ==================== */
@@ -246,7 +254,9 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny bigger ctx->optlen",
@@ -271,12 +281,34 @@ static struct sockopt_test {
.get_optlen = 64,
.error = EFAULT_GETSOCKOPT,
+ .io_uring_support = true,
},
{
- .descr = "getsockopt: deny arbitrary ctx->retval",
+ .descr = "getsockopt: ignore >PAGE_SIZE optlen",
.insns = {
- /* ctx->retval = 123 */
- BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF),
+ /* } */
+
+ /* retval changes are ignored */
+ /* ctx->retval = 5 */
+ BPF_MOV64_IMM(BPF_REG_0, 5),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct bpf_sockopt, retval)),
@@ -287,9 +319,12 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
- .get_optlen = 64,
-
- .error = EFAULT_GETSOCKOPT,
+ .get_level = 1234,
+ .get_optname = 5678,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = PAGE_SIZE + 1,
+ .error = EOPNOTSUPP_GETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: support smaller ctx->optlen",
@@ -309,8 +344,10 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
.get_optlen_ret = 32,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny writing to ctx->optval",
@@ -490,6 +527,7 @@ static struct sockopt_test {
.set_level = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->level",
@@ -544,6 +582,7 @@ static struct sockopt_test {
.set_optname = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->optname",
@@ -596,6 +635,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: ctx->optlen == -1 is ok",
@@ -612,6 +652,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
@@ -630,6 +671,7 @@ static struct sockopt_test {
.set_optlen = 4,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen > input optlen",
@@ -647,6 +689,46 @@ static struct sockopt_test {
.set_optlen = 64,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
+ },
+ {
+ .descr = "setsockopt: ignore >PAGE_SIZE optlen",
+ .insns = {
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+ /* } */
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_level = SOL_IP,
+ .set_optname = IP_TOS,
+ .set_optval = {},
+ .set_optlen = PAGE_SIZE + 1,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = 4,
},
{
.descr = "setsockopt: allow changing ctx->optlen within bounds",
@@ -847,40 +929,157 @@ static struct sockopt_test {
.error = EPERM_SETSOCKOPT,
},
+
+ /* ==================== prog_type ==================== */
+
+ {
+ .descr = "can attach only BPF_CGROUP_SETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
+
+ {
+ .descr = "can attach only BPF_CGROUP_GETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
};
static int load_prog(const struct bpf_insn *insns,
+ enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type)
{
- struct bpf_load_program_attr attr = {
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.expected_attach_type = expected_attach_type,
- .insns = insns,
- .license = "GPL",
.log_level = 2,
- };
- int fd;
+ .log_buf = bpf_log_buf,
+ .log_size = sizeof(bpf_log_buf),
+ );
+ int fd, insns_cnt = 0;
for (;
- insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
- attr.insns_cnt++) {
+ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ insns_cnt++) {
}
- attr.insns_cnt++;
+ insns_cnt++;
- fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
if (verbose && fd < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
return fd;
}
-static int run_test(int cgroup_fd, struct sockopt_test *test)
+/* Core function that handles io_uring ring initialization,
+ * sending SQE with sockopt command and waiting for the CQE.
+ */
+static int uring_sockopt(int op, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ int err;
+
+ err = io_uring_queue_init(1, &ring, 0);
+ if (!ASSERT_OK(err, "io_uring initialization"))
+ return err;
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!ASSERT_NEQ(sqe, NULL, "Get an SQE")) {
+ err = -1;
+ goto fail;
+ }
+
+ io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen);
+
+ err = io_uring_submit(&ring);
+ if (!ASSERT_EQ(err, 1, "Submit SQE"))
+ goto fail;
+
+ err = io_uring_wait_cqe(&ring, &cqe);
+ if (!ASSERT_OK(err, "Wait for CQE"))
+ goto fail;
+
+ err = cqe->res;
+
+fail:
+ io_uring_queue_exit(&ring);
+
+ return err;
+}
+
+static int uring_setsockopt(int fd, int level, int optname, const void *optval,
+ socklen_t optlen)
+{
+ return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname,
+ optval, optlen);
+}
+
+static int uring_getsockopt(int fd, int level, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname,
+ optval, *optlen);
+ if (ret < 0)
+ return ret;
+
+ /* Populate optlen back to be compatible with systemcall interface,
+ * and simplify the test.
+ */
+ *optlen = ret;
+
+ return 0;
+}
+
+/* Execute the setsocktopt operation */
+static int call_setsockopt(bool use_io_uring, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ if (use_io_uring)
+ return uring_setsockopt(fd, level, optname, optval, optlen);
+
+ return setsockopt(fd, level, optname, optval, optlen);
+}
+
+/* Execute the getsocktopt operation */
+static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
+ void *optval, socklen_t *optlen)
{
- int sock_fd, err, prog_fd;
+ if (use_io_uring)
+ return uring_getsockopt(fd, level, optname, optval, optlen);
+
+ return getsockopt(fd, level, optname, optval, optlen);
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring,
+ bool use_link)
+{
+ int prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ int sock_fd, err, prog_fd, link_fd = -1;
void *optval = NULL;
int ret = 0;
- prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (test->prog_type)
+ prog_type = test->prog_type;
+
+ prog_fd = load_prog(test->insns, prog_type, test->expected_attach_type);
if (prog_fd < 0) {
if (test->error == DENY_LOAD)
return 0;
@@ -889,7 +1088,12 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
return -1;
}
- err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (use_link) {
+ err = bpf_link_create(prog_fd, cgroup_fd, test->attach_type, NULL);
+ link_fd = err;
+ } else {
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ }
if (err < 0) {
if (test->error == DENY_ATTACH)
goto close_prog_fd;
@@ -907,8 +1111,16 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->set_optlen) {
- err = setsockopt(sock_fd, test->set_level, test->set_optname,
- test->set_optval, test->set_optlen);
+ if (test->set_optlen >= PAGE_SIZE) {
+ int num_pages = test->set_optlen / PAGE_SIZE;
+ int remainder = test->set_optlen % PAGE_SIZE;
+
+ test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
+ err = call_setsockopt(use_io_uring, sock_fd, test->set_level,
+ test->set_optname, test->set_optval,
+ test->set_optlen);
if (err) {
if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
goto close_sock_fd;
@@ -922,14 +1134,24 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->get_optlen) {
+ if (test->get_optlen >= PAGE_SIZE) {
+ int num_pages = test->get_optlen / PAGE_SIZE;
+ int remainder = test->get_optlen % PAGE_SIZE;
+
+ test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
optval = malloc(test->get_optlen);
+ memset(optval, 0, test->get_optlen);
socklen_t optlen = test->get_optlen;
socklen_t expected_get_optlen = test->get_optlen_ret ?:
test->get_optlen;
- err = getsockopt(sock_fd, test->get_level, test->get_optname,
- optval, &optlen);
+ err = call_getsockopt(use_io_uring, sock_fd, test->get_level,
+ test->get_optname, optval, &optlen);
if (err) {
+ if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
+ goto free_optval;
if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
goto free_optval;
if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
@@ -962,7 +1184,12 @@ free_optval:
close_sock_fd:
close(sock_fd);
detach_prog:
- bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ if (use_link) {
+ if (link_fd >= 0)
+ close(link_fd);
+ } else {
+ bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ }
close_prog_fd:
close(prog_fd);
return ret;
@@ -973,12 +1200,20 @@ void test_sockopt(void)
int cgroup_fd, i;
cgroup_fd = test__join_cgroup("/sockopt");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- test__start_subtest(tests[i].descr);
- CHECK_FAIL(run_test(cgroup_fd, &tests[i]));
+ if (!test__start_subtest(tests[i].descr))
+ continue;
+
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, false),
+ tests[i].descr);
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, true),
+ tests[i].descr);
+ if (tests[i].io_uring_support)
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], true, false),
+ tests[i].descr);
}
close(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index ec281b0363b8..7cd8be2780ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -1,41 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "sockopt_inherit.skel.h"
#define SOL_CUSTOM 0xdeadbeef
#define CUSTOM_INHERIT1 0
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
-
static int verify_sockopt(int fd, int optname, const char *msg, char expected)
{
socklen_t optlen = 1;
@@ -76,20 +50,16 @@ static void *server_thread(void *arg)
pthread_cond_signal(&server_started);
pthread_mutex_unlock(&server_started_mtx);
- if (CHECK_FAIL(err < 0)) {
- perror("Failed to listed on socket");
+ if (!ASSERT_GE(err, 0, "listed on socket"))
return NULL;
- }
err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (CHECK_FAIL(client_fd < 0)) {
- perror("Failed to accept client");
+ if (!ASSERT_GE(client_fd, 0, "accept client"))
return NULL;
- }
err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
@@ -100,125 +70,93 @@ static void *server_thread(void *arg)
return (void *)(long)err;
}
-static int start_server(void)
+static int custom_cb(int fd, void *opts)
{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
char buf;
int err;
- int fd;
int i;
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
buf = 0x01;
err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
if (err) {
log_err("Failed to call setsockopt(%d)", i);
- close(fd);
return -1;
}
}
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
return 0;
}
static void run_test(int cgroup_fd)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_inherit.o",
+ struct bpf_link *link_getsockopt = NULL;
+ struct bpf_link *link_setsockopt = NULL;
+ struct network_helper_opts opts = {
+ .post_socket_cb = custom_cb,
};
int server_fd = -1, client_fd;
- struct bpf_object *obj;
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct sockopt_inherit *obj;
void *server_err;
pthread_t tid;
- int ignored;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = sockopt_inherit__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt"))
goto close_bpf_object;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
+ link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
goto close_bpf_object;
- server_fd = start_server();
- if (CHECK_FAIL(server_fd < 0))
+ server_fd = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr,
+ sizeof(addr), &opts);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_bpf_object;
pthread_mutex_lock(&server_started_mtx);
- if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
+ if (!ASSERT_OK(pthread_create(&tid, NULL, server_thread,
+ (void *)&server_fd), "pthread_create")) {
+ pthread_mutex_unlock(&server_started_mtx);
goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
- client_fd = connect_to_server(server_fd);
- if (CHECK_FAIL(client_fd < 0))
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto close_server_fd;
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0));
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0));
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0));
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0), "verify_sockopt1");
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0), "verify_sockopt2");
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0), "verify_sockopt ener");
pthread_join(tid, &server_err);
err = (int)(long)server_err;
- CHECK_FAIL(err);
+ ASSERT_OK(err, "pthread_join retval");
close(client_fd);
close_server_fd:
close(server_fd);
close_bpf_object:
- bpf_object__close(obj);
+ bpf_link__destroy(link_getsockopt);
+ bpf_link__destroy(link_setsockopt);
+
+ sockopt_inherit__destroy(obj);
}
void test_sockopt_inherit(void)
@@ -226,7 +164,7 @@ void test_sockopt_inherit(void)
int cgroup_fd;
cgroup_fd = test__join_cgroup("/sockopt_inherit");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
return;
run_test(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 51fac975b316..759bbb6f8c5f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -2,61 +2,13 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, BPF_F_ALLOW_MULTI);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
+#include "sockopt_multi.skel.h"
-static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err)
- return -1;
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog)
- return -1;
-
- err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
- attach_type);
- if (err)
- return -1;
-
- return 0;
-}
-
-static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - child: 0x80 -> 0x90
*/
- err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child"))
goto detach;
buf = 0x00;
@@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: 0x90 -> 0xA0
*/
- err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent"))
goto detach;
buf = 0x00;
@@ -157,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: unexpected 0x40, EPERM
*/
- err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
- if (err) {
- log_err("Failed to detach child program");
- goto detach;
- }
+ bpf_link__destroy(link_child);
+ link_child = NULL;
buf = 0x00;
optlen = 1;
@@ -198,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/getsockopt/child");
- prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
-static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -236,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach child program and make sure it adds 0x10. */
- err = prog_attach(obj, cg_child, "cgroup/setsockopt");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child"))
goto detach;
buf = 0x80;
@@ -263,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach parent program and make sure it adds another 0x10. */
- err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent"))
goto detach;
buf = 0x80;
@@ -289,45 +244,42 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/setsockopt");
- prog_detach(obj, cg_parent, "cgroup/setsockopt");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
void test_sockopt_multi(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_multi.o",
- };
int cg_parent = -1, cg_child = -1;
- struct bpf_object *obj = NULL;
+ struct sockopt_multi *obj = NULL;
int sock_fd = -1;
- int err = -1;
- int ignored;
cg_parent = test__join_cgroup("/parent");
- if (CHECK_FAIL(cg_parent < 0))
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
goto out;
cg_child = test__join_cgroup("/parent/child");
- if (CHECK_FAIL(cg_child < 0))
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
goto out;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = sockopt_multi__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
goto out;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
- if (CHECK_FAIL(sock_fd < 0))
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
- CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd));
- CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd));
+ ASSERT_OK(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd), "getsockopt_test");
+ ASSERT_OK(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd), "setsockopt_test");
out:
close(sock_fd);
- bpf_object__close(obj);
+ sockopt_multi__destroy(obj);
close(cg_child);
close(cg_parent);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b2d300e9fd4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ skel->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index d5b44b135c00..ba6b3ec1156a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -2,7 +2,9 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
+#include <linux/netlink.h>
+#include "sockopt_sk.skel.h"
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -172,79 +174,76 @@ static int getsetsockopt(void)
}
memset(&buf, 0, sizeof(buf));
- buf.zc.address = 12345; /* rejected by BPF */
+ buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
optlen = sizeof(buf.zc);
errno = 0;
err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
- if (errno != EPERM) {
+ if (errno != EINVAL) {
log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
err, errno);
goto err;
}
- free(big_buf);
- close(fd);
- return 0;
-err:
- free(big_buf);
- close(fd);
- return -1;
-}
+ /* optval=NULL case is handled correctly */
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
+ close(fd);
+ fd = socket(AF_NETLINK, SOCK_RAW, 0);
+ if (fd < 0) {
+ log_err("Failed to create AF_NETLINK socket");
return -1;
}
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
+ buf.u32 = 1;
+ optlen = sizeof(__u32);
+ err = setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &buf, optlen);
+ if (err) {
+ log_err("Unexpected getsockopt(NETLINK_ADD_MEMBERSHIP) err=%d errno=%d",
+ err, errno);
+ goto err;
}
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
+ optlen = 0;
+ err = getsockopt(fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &optlen);
if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
+ log_err("Unexpected getsockopt(NETLINK_LIST_MEMBERSHIPS) err=%d errno=%d",
+ err, errno);
+ goto err;
}
+ ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
+ free(big_buf);
+ close(fd);
return 0;
+err:
+ free(big_buf);
+ close(fd);
+ return -1;
}
static void run_test(int cgroup_fd)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_sk.o",
- };
- struct bpf_object *obj;
- int ignored;
- int err;
-
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
- return;
+ struct sockopt_sk *skel;
+
+ skel = sockopt_sk__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = getpagesize();
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._setsockopt =
+ bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+ goto cleanup;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto cleanup;
- CHECK_FAIL(getsetsockopt());
+ ASSERT_OK(getsetsockopt(), "getsetsockopt");
-close_bpf_object:
- bpf_object__close(obj);
+cleanup:
+ sockopt_sk__destroy(skel);
}
void test_sockopt_sk(void)
@@ -252,7 +251,7 @@ void test_sockopt_sk(void)
int cgroup_fd;
cgroup_fd = test__join_cgroup("/sockopt_sk");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /sockopt_sk"))
return;
run_test(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
new file mode 100644
index 000000000000..254fbfeab06a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <regex.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_spin_lock.skel.h"
+#include "test_spin_lock_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} spin_lock_fail_tests[] = {
+ { "lock_id_kptr_preserve",
+ "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
+ "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=ptr_ expected=percpu_ptr_" },
+ { "lock_id_global_zero",
+ "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mapval_preserve",
+ "[0-9]\\+: (bf) r1 = r0 ;"
+ " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_innermapval_preserve",
+ "[0-9]\\+: (bf) r1 = r0 ;"
+ " R0=map_value(id=2,ks=4,vs=8)"
+ " R1=map_value(id=2,ks=4,vs=8)\n"
+ "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
+ { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" },
+};
+
+static int match_regex(const char *pattern, const char *string)
+{
+ int err, rc;
+ regex_t re;
+
+ err = regcomp(&re, pattern, REG_NOSUB);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, &re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ return -1;
+ }
+ rc = regexec(&re, string, 0, NULL, 0);
+ regfree(&re);
+ return rc == 0 ? 1 : 0;
+}
+
+static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_spin_lock_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_spin_lock_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_spin_lock_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail"))
+ goto end;
+
+ /* Skip check if JIT does not support kfuncs */
+ if (strstr(log_buf, "JIT does not support calling kernel function")) {
+ test__skip();
+ goto end;
+ }
+
+ ret = match_regex(err_msg, log_buf);
+ if (!ASSERT_GE(ret, 0, "match_regex"))
+ goto end;
+
+ if (!ASSERT_TRUE(ret, "no match for expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_spin_lock_fail__destroy(skel);
+}
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ pthread_exit(arg);
+}
+
+void test_spin_lock_success(void)
+{
+ struct test_spin_lock *skel;
+ pthread_t thread_id[4];
+ int prog_fd, i;
+ void *ret;
+
+ skel = test_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test);
+ for (i = 0; i < 4; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+end:
+ test_spin_lock__destroy(skel);
+}
+
+void test_spin_lock(void)
+{
+ int i;
+
+ test_spin_lock_success();
+
+ for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) {
+ if (!test__start_subtest(spin_lock_fail_tests[i].prog_name))
+ continue;
+ test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name,
+ spin_lock_fail_tests[i].err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
deleted file mode 100644
index 7577a77a4c4c..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-static void *spin_lock_thread(void *arg)
-{
- __u32 duration, retval;
- int err, prog_fd = *(u32 *) arg;
-
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
- pthread_exit(arg);
-}
-
-void test_spinlock(void)
-{
- const char *file = "./test_spin_lock.o";
- pthread_t thread_id[4];
- struct bpf_object *obj = NULL;
- int prog_fd;
- int err = 0, i;
- void *ret;
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (CHECK_FAIL(err)) {
- printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
- goto close_prog;
- }
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd)))
- goto close_prog;
-
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
- ret != (void *)&prog_fd))
- goto close_prog;
-close_prog:
- bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index e8399ae50e77..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -7,13 +7,12 @@ void test_stacktrace_build_id(void)
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
struct test_stacktrace_build_id *skel;
- int err, stack_trace_len;
- __u32 key, previous_key, val, duration = 0;
- char buf[256];
- int i, j;
+ int err, stack_trace_len, build_id_size;
+ __u32 key, prev_key, val, duration = 0;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
int build_id_matches = 0;
- int retry = 1;
+ int i, retry = 1;
retry:
skel = test_stacktrace_build_id__open_and_load();
@@ -40,7 +39,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
@@ -52,20 +51,19 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
- if (CHECK(err, "get build_id with readelf",
+ if (CHECK(err, "read_build_id",
"err %d errno %d\n", err, errno))
goto cleanup;
- err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
do {
- char build_id[64];
-
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
@@ -73,14 +71,11 @@ retry:
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
- previous_key = key;
- } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+ prev_key = key;
+ } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
/* stack_map_get_build_id_offset() is racy and sometimes can return
* BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 11a769e18f5d..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -2,21 +2,6 @@
#include <test_progs.h>
#include "test_stacktrace_build_id.skel.h"
-static __u64 read_perf_max_sample_freq(void)
-{
- __u64 sample_freq = 5000; /* fallback to 5000 on error */
- FILE *f;
- __u32 duration = 0;
-
- f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
- if (f == NULL)
- return sample_freq;
- CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
- "return default value: 5000,err %d\n", -errno);
- fclose(f);
- return sample_freq;
-}
-
void test_stacktrace_build_id_nmi(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd;
@@ -27,12 +12,11 @@ void test_stacktrace_build_id_nmi(void)
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
- __u32 key, previous_key, val, duration = 0;
- char buf[256];
- int i, j;
+ __u32 key, prev_key, val, duration = 0;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
- int build_id_matches = 0;
- int retry = 1;
+ int build_id_matches = 0, build_id_size;
+ int i, retry = 1;
attr.sample_freq = read_perf_max_sample_freq();
@@ -42,7 +26,7 @@ retry:
return;
/* override program type */
- bpf_program__set_perf_event(skel->progs.oncpu);
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
@@ -51,7 +35,7 @@ retry:
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
- if (pmu_fd < 0 && errno == ENOENT) {
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
test__skip();
goto cleanup;
@@ -62,8 +46,7 @@ retry:
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
@@ -83,7 +66,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
@@ -95,35 +78,32 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
goto cleanup;
- err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
do {
- char build_id[64];
-
- err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key),
+ id_offs, sizeof(id_offs), 0);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
- previous_key = key;
- } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+ prev_key = key;
+ } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
/* stack_map_get_build_id_offset() is racy and sometimes can return
* BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+ __u64 ips[PERF_MAX_STACK_DEPTH];
+ struct ksyms *ksyms = NULL;
+ int i, err = 0;
+ va_list args;
+
+ /* sorted by addr */
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return -1;
+
+ /* unlikely, but... */
+ if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+ return -1;
+
+ err = bpf_map_lookup_elem(fd, &key, ips);
+ if (err)
+ goto out;
+
+ /*
+ * Compare all symbols provided via arguments with stacktrace ips,
+ * and their related symbol addresses.t
+ */
+ va_start(args, cnt);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned long val;
+ struct ksym *ksym;
+
+ val = va_arg(args, unsigned long);
+ ksym = ksym_search_local(ksyms, ips[i]);
+ if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+ break;
+ ASSERT_EQ(ksym->addr, val, "stack_cmp");
+ }
+
+ va_end(args);
+
+out:
+ free_kallsyms_local(ksyms);
+ return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.kprobe_multi_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+ __u32 info_len = sizeof(struct bpf_prog_info);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_info info = {};
+ struct stacktrace_ips *skel;
+ __u64 bpf_prog_ksym = 0;
+ int err;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+ skel->progs.rawtp_test,
+ "bpf_testmod_test_read");
+ if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+ goto cleanup;
+
+ /* get bpf program address */
+ info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+ info.nr_jited_ksyms = 1;
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+ &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+ bpf_prog_ksym,
+ ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+ if (test__start_subtest("kprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(false);
+ if (test__start_subtest("kretprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(true);
+ if (test__start_subtest("raw_tp"))
+ test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+ test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+ __test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "stacktrace_map.skel.h"
void test_stacktrace_map(void)
{
+ struct stacktrace_map *skel;
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/sched/sched_switch";
- int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.o";
- __u32 key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
+ int err, stack_trace_len;
+ __u32 key, val, stack_id, duration = 0;
+ __u64 stack[PERF_MAX_STACK_DEPTH];
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = stacktrace_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
- goto close_prog;
-
- /* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK_FAIL(control_map_fd < 0))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK_FAIL(stackid_hmap_fd < 0))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK_FAIL(stackmap_fd < 0))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK_FAIL(stack_amap_fd < 0))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ err = stacktrace_map__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
/* give some time for bpf program run */
sleep(1);
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
-
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+ goto out;
+
+ stack_id = skel->bss->stack_id;
+ err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+ goto out;
+
+ err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+ goto out;
+out:
+ stacktrace_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -3,25 +3,25 @@
void test_stacktrace_map_raw_tp(void)
{
- const char *prog_name = "tracepoint/sched/sched_switch";
+ const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.o";
+ const char *file = "./stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link = NULL;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
/* find map fds */
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
new file mode 100644
index 000000000000..dc2ccf6a14d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_map_skip.skel.h"
+
+#define TEST_STACK_DEPTH 2
+
+void test_stacktrace_map_skip(void)
+{
+ struct stacktrace_map_skip *skel;
+ int stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ int err, stack_trace_len;
+
+ skel = stacktrace_map_skip__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ /* find map fds */
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ if (!ASSERT_GE(stackid_hmap_fd, 0, "stackid_hmap fd"))
+ goto out;
+
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ if (!ASSERT_GE(stackmap_fd, 0, "stackmap fd"))
+ goto out;
+
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ if (!ASSERT_GE(stack_amap_fd, 0, "stack_amap fd"))
+ goto out;
+
+ skel->bss->pid = getpid();
+
+ err = stacktrace_map_skip__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ skel->bss->control = 1;
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vice versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
+ goto out;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (!ASSERT_OK(err, "compare_map_keys stackmap vs. stackid_hmap"))
+ goto out;
+
+ stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ if (!ASSERT_OK(err, "compare_stack_ips stackmap vs. stack_amap"))
+ goto out;
+
+ if (!ASSERT_EQ(skel->bss->failed, 0, "skip_failed"))
+ goto out;
+
+out:
+ stacktrace_map_skip__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
new file mode 100644
index 000000000000..5c4e3014e063
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_static_linked.skel.h"
+
+void test_static_linked(void)
+{
+ int err;
+ struct test_static_linked* skel;
+
+ skel = test_static_linked__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 1;
+ skel->rodata->rovar2 = 4;
+
+ err = test_static_linked__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_static_linked__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ usleep(1);
+
+ ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
+
+cleanup:
+ test_static_linked__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
new file mode 100644
index 000000000000..c3cce5c292bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+
+#include "stream.skel.h"
+#include "stream_fail.skel.h"
+
+void test_stream_failure(void)
+{
+ RUN_TESTS(stream_fail);
+}
+
+void test_stream_success(void)
+{
+ RUN_TESTS(stream);
+ return;
+}
+
+void test_stream_syscall(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ struct stream *skel;
+ int ret, prog_fd;
+ char buf[64];
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.stream_syscall);
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EINVAL, "bad prog_fd");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -ENOENT, "bad stream id");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EFAULT, "bad stream buf");
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 2, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 1, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stdout");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stderr");
+
+ stream__destroy(skel);
+}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret, prog_fd;
+ char fault_addr[64];
+ char buf[1024];
+
+ prog_fd = bpf_program__fd(prog);
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+ }
+}
+
+void test_stream_arena_fault_address(void)
+{
+ struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+ printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+ test__skip();
+ return;
+#endif
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ if (test__start_subtest("read_fault"))
+ test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+ if (test__start_subtest("write_fault"))
+ test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
new file mode 100644
index 000000000000..0f3bf594e7a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include <test_progs.h>
+#include "string_kfuncs_success.skel.h"
+#include "string_kfuncs_failure1.skel.h"
+#include "string_kfuncs_failure2.skel.h"
+#include <sys/mman.h>
+
+static const char * const test_cases[] = {
+ "strcmp",
+ "strcasecmp",
+ "strchr",
+ "strchrnul",
+ "strnchr",
+ "strrchr",
+ "strlen",
+ "strnlen",
+ "strspn_str",
+ "strspn_accept",
+ "strcspn_str",
+ "strcspn_reject",
+ "strstr",
+ "strcasestr",
+ "strnstr",
+ "strncasestr",
+};
+
+void run_too_long_tests(void)
+{
+ struct string_kfuncs_failure2 *skel;
+ struct bpf_program *prog;
+ char test_name[256];
+ int err, i;
+
+ skel = string_kfuncs_failure2__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load"))
+ return;
+
+ memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str));
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ sprintf(test_name, "test_%s_too_long", test_cases[i]);
+ if (!test__start_subtest(test_name))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, test_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG");
+ }
+
+cleanup:
+ string_kfuncs_failure2__destroy(skel);
+}
+
+void test_string_kfuncs(void)
+{
+ RUN_TESTS(string_kfuncs_success);
+ RUN_TESTS(string_kfuncs_failure1);
+
+ run_too_long_tests();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
new file mode 100644
index 000000000000..a5cc593c1e1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_autocreate.skel.h"
+#include "struct_ops_autocreate2.skel.h"
+
+static void cant_load_full_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ char *log = NULL;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+ /* The testmod_2 map BTF type (struct bpf_testmod_ops___v2) doesn't
+ * match the BTF of the actual struct bpf_testmod_ops defined in the
+ * kernel, so we should fail to load it if we don't disable autocreate
+ * for that map.
+ */
+ err = struct_ops_autocreate__load(skel);
+ log = stop_libbpf_log_capture();
+ if (!ASSERT_ERR(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log, "libbpf: struct_ops init_kern", "init_kern message");
+ ASSERT_EQ(err, -ENOTSUP, "errno should be ENOTSUP");
+
+cleanup:
+ free(log);
+ struct_ops_autocreate__destroy(skel);
+}
+
+static int check_test_1_link(struct struct_ops_autocreate *skel, struct bpf_map *map)
+{
+ struct bpf_link *link;
+ int err;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ return -1;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+ bpf_link__destroy(link);
+ return err;
+}
+
+static void can_load_partial_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 default autoload");
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_2), "test_2 default autoload");
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload");
+
+ check_test_1_link(skel, skel->maps.testmod_1);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+static void optional_maps(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_1), "testmod_1 autocreate");
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_2), "testmod_2 autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map), "optional_map autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map2), "optional_map2 autocreate");
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_1, false);
+ err |= bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ err |= bpf_map__set_autocreate(skel->maps.optional_map2, true);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ check_test_1_link(skel, skel->maps.optional_map2);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+/* Swap test_mod1->test_1 program from 'bar' to 'foo' using shadow vars.
+ * test_mod1 load should enable autoload for 'foo'.
+ */
+static void autoload_and_shadow_vars(void)
+{
+ struct struct_ops_autocreate2 *skel = NULL;
+ struct bpf_link *link = NULL;
+ int err;
+
+ skel = struct_ops_autocreate2__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.foo), "foo default autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar default autoload");
+
+ /* loading map testmod_1 would switch foo's autoload to true */
+ skel->struct_ops.testmod_1->test_1 = skel->progs.foo;
+
+ err = struct_ops_autocreate2__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar actual autoload");
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto cleanup;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ struct_ops_autocreate2__destroy(skel);
+}
+
+void test_struct_ops_autocreate(void)
+{
+ if (test__start_subtest("cant_load_full_object"))
+ cant_load_full_object();
+ if (test__start_subtest("can_load_partial_object"))
+ can_load_partial_object();
+ if (test__start_subtest("autoload_and_shadow_vars"))
+ autoload_and_shadow_vars();
+ if (test__start_subtest("optional_maps"))
+ optional_maps();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
new file mode 100644
index 000000000000..4006879ca3fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_private_stack.skel.h"
+#include "struct_ops_private_stack_fail.skel.h"
+#include "struct_ops_private_stack_recur.skel.h"
+
+static void test_private_stack(void)
+{
+ struct struct_ops_private_stack *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_private_stack__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open"))
+ return;
+
+ if (skel->data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ err = struct_ops_private_stack__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_private_stack__load"))
+ goto cleanup;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->val_i, 3, "val_i");
+ ASSERT_EQ(skel->bss->val_j, 8, "val_j");
+
+ bpf_link__destroy(link);
+
+cleanup:
+ struct_ops_private_stack__destroy(skel);
+}
+
+static void test_private_stack_fail(void)
+{
+ struct struct_ops_private_stack_fail *skel;
+ int err;
+
+ skel = struct_ops_private_stack_fail__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open"))
+ return;
+
+ if (skel->data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ err = struct_ops_private_stack_fail__load(skel);
+ if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load"))
+ goto cleanup;
+ return;
+
+cleanup:
+ struct_ops_private_stack_fail__destroy(skel);
+}
+
+static void test_private_stack_recur(void)
+{
+ struct struct_ops_private_stack_recur *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_private_stack_recur__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open"))
+ return;
+
+ if (skel->data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ err = struct_ops_private_stack_recur__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load"))
+ goto cleanup;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->val_j, 3, "val_j");
+
+ bpf_link__destroy(link);
+
+cleanup:
+ struct_ops_private_stack_recur__destroy(skel);
+}
+
+void test_struct_ops_private_stack(void)
+{
+ if (test__start_subtest("private_stack"))
+ test_private_stack();
+ if (test__start_subtest("private_stack_fail"))
+ test_private_stack_fail();
+ if (test__start_subtest("private_stack_recur"))
+ test_private_stack_recur();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
index 3f3d2ac4dd57..903f35a9e62e 100644
--- a/tools/testing/selftests/bpf/prog_tests/subprogs.c
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -1,32 +1,83 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
-#include <time.h>
#include "test_subprogs.skel.h"
#include "test_subprogs_unused.skel.h"
-static int duration;
+struct toggler_ctx {
+ int fd;
+ bool stop;
+};
-void test_subprogs(void)
+static void *toggle_jit_harden(void *arg)
+{
+ struct toggler_ctx *ctx = arg;
+ char two = '2';
+ char zero = '0';
+
+ while (!ctx->stop) {
+ lseek(ctx->fd, SEEK_SET, 0);
+ write(ctx->fd, &two, sizeof(two));
+ lseek(ctx->fd, SEEK_SET, 0);
+ write(ctx->fd, &zero, sizeof(zero));
+ }
+
+ return NULL;
+}
+
+static void test_subprogs_with_jit_harden_toggling(void)
+{
+ struct toggler_ctx ctx;
+ pthread_t toggler;
+ int err;
+ unsigned int i, loop = 10;
+
+ ctx.fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR);
+ if (!ASSERT_GE(ctx.fd, 0, "open bpf_jit_harden"))
+ return;
+
+ ctx.stop = false;
+ err = pthread_create(&toggler, NULL, toggle_jit_harden, &ctx);
+ if (!ASSERT_OK(err, "new toggler"))
+ goto out;
+
+ /* Make toggler thread to run */
+ usleep(1);
+
+ for (i = 0; i < loop; i++) {
+ struct test_subprogs *skel = test_subprogs__open_and_load();
+
+ if (!ASSERT_OK_PTR(skel, "skel open"))
+ break;
+ test_subprogs__destroy(skel);
+ }
+
+ ctx.stop = true;
+ pthread_join(toggler, NULL);
+out:
+ close(ctx.fd);
+}
+
+static void test_subprogs_alone(void)
{
struct test_subprogs *skel;
struct test_subprogs_unused *skel2;
int err;
skel = test_subprogs__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
err = test_subprogs__attach(skel);
- if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+ if (!ASSERT_OK(err, "skel attach"))
goto cleanup;
usleep(1);
- CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
- CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
- CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
- CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+ ASSERT_EQ(skel->bss->res1, 12, "res1");
+ ASSERT_EQ(skel->bss->res2, 17, "res2");
+ ASSERT_EQ(skel->bss->res3, 19, "res3");
+ ASSERT_EQ(skel->bss->res4, 36, "res4");
skel2 = test_subprogs_unused__open_and_load();
ASSERT_OK_PTR(skel2, "unused_progs_skel");
@@ -35,3 +86,11 @@ void test_subprogs(void)
cleanup:
test_subprogs__destroy(skel);
}
+
+void test_subprogs(void)
+{
+ if (test__start_subtest("subprogs_alone"))
+ test_subprogs_alone();
+ if (test__start_subtest("subprogs_and_jit_harden"))
+ test_subprogs_with_jit_harden_toggling();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
new file mode 100644
index 000000000000..3afd9f775f68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_subprogs_extable.skel.h"
+
+void test_subprogs_extable(void)
+{
+ const int read_sz = 456;
+ struct test_subprogs_extable *skel;
+ int err;
+
+ skel = test_subprogs_extable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = test_subprogs_extable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ ASSERT_OK(trigger_module_test_read(read_sz), "trigger_read");
+
+ ASSERT_NEQ(skel->bss->triggered, 0, "verify at least one program ran");
+
+ test_subprogs_extable__detach(skel);
+
+cleanup:
+ test_subprogs_extable__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subskeleton.c b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
new file mode 100644
index 000000000000..fdf13ed0152a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "test_subskeleton.skel.h"
+#include "test_subskeleton_lib.subskel.h"
+
+static void subskeleton_lib_setup(struct bpf_object *obj)
+{
+ struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+
+ if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+ return;
+
+ *lib->rodata.var1 = 1;
+ *lib->data.var2 = 2;
+ lib->bss.var3->var3_1 = 3;
+ lib->bss.var3->var3_2 = 4;
+
+ test_subskeleton_lib__destroy(lib);
+}
+
+static int subskeleton_lib_subresult(struct bpf_object *obj)
+{
+ struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+ int result;
+
+ if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+ return -EINVAL;
+
+ result = *lib->bss.libout1;
+ ASSERT_EQ(result, 1 + 2 + 3 + 4 + 5 + 6, "lib subresult");
+
+ ASSERT_OK_PTR(lib->progs.lib_perf_handler, "lib_perf_handler");
+ ASSERT_STREQ(bpf_program__name(lib->progs.lib_perf_handler),
+ "lib_perf_handler", "program name");
+
+ ASSERT_OK_PTR(lib->maps.map1, "map1");
+ ASSERT_STREQ(bpf_map__name(lib->maps.map1), "map1", "map name");
+
+ ASSERT_EQ(*lib->data.var5, 5, "__weak var5");
+ ASSERT_EQ(*lib->data.var6, 6, "extern var6");
+ ASSERT_TRUE(*lib->kconfig.CONFIG_BPF_SYSCALL, "CONFIG_BPF_SYSCALL");
+
+ test_subskeleton_lib__destroy(lib);
+ return result;
+}
+
+/* initialize and load through skeleton, then instantiate subskeleton out of it */
+static void subtest_skel_subskeleton(void)
+{
+ int err, result;
+ struct test_subskeleton *skel;
+
+ skel = test_subskeleton__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 10;
+ skel->rodata->var1 = 1;
+ subskeleton_lib_setup(skel->obj);
+
+ err = test_subskeleton__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_subskeleton__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ result = subskeleton_lib_subresult(skel->obj) * 10;
+ ASSERT_EQ(skel->bss->out1, result, "unexpected calculation");
+
+cleanup:
+ test_subskeleton__destroy(skel);
+}
+
+/* initialize and load through generic bpf_object API, then instantiate subskeleton out of it */
+static void subtest_obj_subskeleton(void)
+{
+ int err, result;
+ const void *elf_bytes;
+ size_t elf_bytes_sz = 0, rodata_sz = 0, bss_sz = 0;
+ struct bpf_object *obj;
+ const struct bpf_map *map;
+ const struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+ struct test_subskeleton__rodata *rodata;
+ struct test_subskeleton__bss *bss;
+
+ elf_bytes = test_subskeleton__elf_bytes(&elf_bytes_sz);
+ if (!ASSERT_OK_PTR(elf_bytes, "elf_bytes"))
+ return;
+
+ obj = bpf_object__open_mem(elf_bytes, elf_bytes_sz, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
+ return;
+
+ map = bpf_object__find_map_by_name(obj, ".rodata");
+ if (!ASSERT_OK_PTR(map, "rodata_map_by_name"))
+ goto cleanup;
+
+ rodata = bpf_map__initial_value(map, &rodata_sz);
+ if (!ASSERT_OK_PTR(rodata, "rodata_get"))
+ goto cleanup;
+
+ rodata->rovar1 = 10;
+ rodata->var1 = 1;
+ subskeleton_lib_setup(obj);
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "obj_load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(obj, "handler1");
+ if (!ASSERT_OK_PTR(prog, "prog_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_OK_PTR(map, "bss_map_by_name"))
+ goto cleanup;
+
+ bss = bpf_map__initial_value(map, &bss_sz);
+ if (!ASSERT_OK_PTR(rodata, "rodata_get"))
+ goto cleanup;
+
+ result = subskeleton_lib_subresult(obj) * 10;
+ ASSERT_EQ(bss->out1, result, "out1");
+
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+}
+
+
+void test_subskeleton(void)
+{
+ if (test__start_subtest("skel_subskel"))
+ subtest_skel_subskeleton();
+ if (test__start_subtest("obj_subskel"))
+ subtest_obj_subskeleton();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
new file mode 100644
index 000000000000..5dd6c120a838
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "summarization_freplace.skel.h"
+#include "summarization.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+ if (env.verbosity >= VERBOSE_VERY)
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name,
+ const char *to_be_replaced,
+ const char *replacement,
+ bool expect_load,
+ const char *err_msg)
+{
+ struct summarization_freplace *freplace = NULL;
+ struct bpf_program *freplace_prog = NULL;
+ struct bpf_program *main_prog = NULL;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct summarization *main = NULL;
+ char log[16*1024];
+ int err;
+
+ opts.kernel_log_buf = log;
+ opts.kernel_log_size = sizeof(log);
+ if (env.verbosity >= VERBOSE_SUPER)
+ opts.kernel_log_level = 1 | 2 | 4;
+ main = summarization__open_opts(&opts);
+ if (!ASSERT_OK_PTR(main, "summarization__open"))
+ goto out;
+ main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+ if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+ goto out;
+ bpf_program__set_autoload(main_prog, true);
+ err = summarization__load(main);
+ print_verifier_log(log);
+ if (!ASSERT_OK(err, "summarization__load"))
+ goto out;
+ freplace = summarization_freplace__open_opts(&opts);
+ if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open"))
+ goto out;
+ freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
+ if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+ goto out;
+ bpf_program__set_autoload(freplace_prog, true);
+ bpf_program__set_autoattach(freplace_prog, true);
+ bpf_program__set_attach_target(freplace_prog,
+ bpf_program__fd(main_prog),
+ to_be_replaced);
+ err = summarization_freplace__load(freplace);
+ print_verifier_log(log);
+
+ /* The might_sleep extension doesn't work yet as sleepable calls are not
+ * allowed, but preserve the check in case it's supported later and then
+ * this particular combination can be enabled.
+ */
+ if (!strcmp("might_sleep", replacement) && err) {
+ ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+ ASSERT_EQ(err, -EINVAL, "err");
+ test__skip();
+ goto out;
+ }
+
+ if (expect_load) {
+ ASSERT_OK(err, "summarization_freplace__load");
+ } else {
+ ASSERT_ERR(err, "summarization_freplace__load");
+ ASSERT_HAS_SUBSTR(log, err_msg, "error log");
+ }
+
+out:
+ summarization_freplace__destroy(freplace);
+ summarization__destroy(main);
+}
+
+/* There are two global subprograms in both summarization.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
+ *
+ * This holds for might_sleep programs. It is ok to replace might_sleep with
+ * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced
+ * with might_sleep.
+ */
+void test_summarization_freplace(void)
+{
+ struct {
+ const char *main;
+ const char *to_be_replaced;
+ bool has_side_effect;
+ } mains[2][4] = {
+ {
+ { "main_changes_with_subprogs", "changes_pkt_data", true },
+ { "main_changes_with_subprogs", "does_not_change_pkt_data", false },
+ { "main_changes", "main_changes", true },
+ { "main_does_not_change", "main_does_not_change", false },
+ },
+ {
+ { "main_might_sleep_with_subprogs", "might_sleep", true },
+ { "main_might_sleep_with_subprogs", "does_not_sleep", false },
+ { "main_might_sleep", "main_might_sleep", true },
+ { "main_does_not_sleep", "main_does_not_sleep", false },
+ },
+ };
+ const char *pkt_err = "Extension program changes packet data";
+ const char *slp_err = "Extension program may sleep";
+ struct {
+ const char *func;
+ bool has_side_effect;
+ const char *err_msg;
+ } replacements[2][2] = {
+ {
+ { "changes_pkt_data", true, pkt_err },
+ { "does_not_change_pkt_data", false, pkt_err },
+ },
+ {
+ { "might_sleep", true, slp_err },
+ { "does_not_sleep", false, slp_err },
+ },
+ };
+ char buf[64];
+
+ for (int t = 0; t < 2; t++) {
+ for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+ for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+ snprintf(buf, sizeof(buf), "%s_with_%s",
+ mains[t][i].to_be_replaced, replacements[t][j].func);
+ if (!test__start_subtest(buf))
+ continue;
+ test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func,
+ mains[t][i].has_side_effect || !replacements[t][j].has_side_effect,
+ replacements[t][j].err_msg);
+ }
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..0be8301c0ffd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+static void test_syscall_load_prog(void)
+{
+ static char verifier_log[8192];
+ struct args ctx = {
+ .max_entries = 1024,
+ .log_buf = (uintptr_t) verifier_log,
+ .log_size = sizeof(verifier_log),
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ );
+ struct syscall *skel = NULL;
+ __u64 key = 12, value = 0;
+ int err, prog_fd;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.load_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(tattr.retval, 1, "retval");
+ ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+ ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+ ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+ "verifier_log");
+
+ err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+ ASSERT_EQ(err, 0, "map_lookup");
+ ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+ syscall__destroy(skel);
+ if (ctx.prog_fd > 0)
+ close(ctx.prog_fd);
+ if (ctx.map_fd > 0)
+ close(ctx.map_fd);
+ if (ctx.btf_fd > 0)
+ close(ctx.btf_fd);
+}
+
+static void test_syscall_update_outer_map(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct syscall *skel;
+ int err, prog_fd;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.update_outer_map);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(opts.retval, 1, "retval");
+cleanup:
+ syscall__destroy(skel);
+}
+
+void test_syscall(void)
+{
+ if (test__start_subtest("load_prog"))
+ test_syscall_load_prog();
+ if (test__start_subtest("update_outer_map"))
+ test_syscall_update_outer_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index ee27d68d2a1c..0ab36503c3b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,6 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
#include <test_progs.h>
#include <network_helpers.h>
+#include "tailcall_poke.skel.h"
+#include "tailcall_bpf2bpf_hierarchy2.skel.h"
+#include "tailcall_bpf2bpf_hierarchy3.skel.h"
+#include "tailcall_freplace.skel.h"
+#include "tc_bpf2bpf.skel.h"
+#include "tailcall_fail.skel.h"
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -12,16 +19,20 @@ static void test_tailcall_1(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
-
- err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -37,10 +48,10 @@ static void test_tailcall_1(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -53,26 +64,24 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -85,16 +94,15 @@ static void test_tailcall_1(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", j);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -107,33 +115,30 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != j, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, j, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err >= 0 || errno != ENOENT))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
@@ -150,16 +155,20 @@ static void test_tailcall_2(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
-
- err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -175,10 +184,10 @@ static void test_tailcall_2(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -191,52 +200,52 @@ static void test_tailcall_2(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 2, "tailcall retval");
i = 2;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
out:
bpf_object__close(obj);
}
-/* test_tailcall_3 checks that the count value of the tail call limit
- * enforcement matches with expectations.
- */
-static void test_tailcall_3(void)
+static void test_tailcall_count(const char *which, bool test_fentry,
+ bool test_fexit)
{
+ struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+ struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
int err, map_fd, prog_fd, main_fd, data_fd, i, val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
- struct bpf_object *obj;
- __u32 retval, duration;
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
- err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -252,7 +261,7 @@ static void test_tailcall_3(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
if (CHECK_FAIL(!prog))
goto out;
@@ -265,37 +274,137 @@ static void test_tailcall_3(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ if (test_fentry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+ }
+
+ if (test_fexit) {
+ fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+ if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fexit_obj);
+ if (!ASSERT_OK(err, "load fexit_obj"))
+ goto out;
+
+ fexit_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
+
+ if (test_fentry) {
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 33, "fentry count");
+ }
+
+ if (test_fexit) {
+ data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fexit.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fexit.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 33, "fexit count");
+ }
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
+ bpf_link__destroy(fentry_link);
+ bpf_link__destroy(fexit_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(fexit_obj);
bpf_object__close(obj);
}
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses direct jump.
+ */
+static void test_tailcall_3(void)
+{
+ test_tailcall_count("tailcall3.bpf.o", false, false);
+}
+
+/* test_tailcall_6 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses indirect jump.
+ */
+static void test_tailcall_6(void)
+{
+ test_tailcall_count("tailcall6.bpf.o", false, false);
+}
+
/* test_tailcall_4 checks that the kernel properly selects indirect jump
* for the case where the key is not known. Latter is passed via global
* data to select different targets we can compare return value of.
@@ -306,17 +415,21 @@ static void test_tailcall_4(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
-
- err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -334,16 +447,16 @@ static void test_tailcall_4(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -356,18 +469,17 @@ static void test_tailcall_4(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -376,10 +488,9 @@ static void test_tailcall_4(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
bpf_object__close(obj);
@@ -394,17 +505,21 @@ static void test_tailcall_5(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
-
- err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -422,16 +537,16 @@ static void test_tailcall_5(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -444,18 +559,17 @@ static void test_tailcall_5(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -464,10 +578,9 @@ static void test_tailcall_5(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
bpf_object__close(obj);
@@ -482,15 +595,19 @@ static void test_tailcall_bpf2bpf_1(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
-
- err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf1.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -507,10 +624,10 @@ static void test_tailcall_bpf2bpf_1(void)
goto out;
/* nop -> jmp */
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -523,10 +640,9 @@ static void test_tailcall_bpf2bpf_1(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != 1, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
/* jmp -> nop, call subprog that will do tailcall */
i = 1;
@@ -534,10 +650,9 @@ static void test_tailcall_bpf2bpf_1(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
/* make sure that subprog can access ctx and entry prog that
* called this subprog can properly return
@@ -547,11 +662,9 @@ static void test_tailcall_bpf2bpf_1(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- 0, &retval, &duration);
- CHECK(err || retval != sizeof(pkt_v4) * 2,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -566,15 +679,19 @@ static void test_tailcall_bpf2bpf_2(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char buff[128] = {};
-
- err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -590,7 +707,7 @@ static void test_tailcall_bpf2bpf_2(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
if (CHECK_FAIL(!prog))
goto out;
@@ -603,33 +720,31 @@ static void test_tailcall_bpf2bpf_2(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
bpf_object__close(obj);
}
@@ -644,15 +759,19 @@ static void test_tailcall_bpf2bpf_3(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
-
- err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf3.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -668,10 +787,10 @@ static void test_tailcall_bpf2bpf_3(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -684,37 +803,33 @@ static void test_tailcall_bpf2bpf_3(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 3,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
i = 1;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4),
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 2,
- "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
out:
bpf_object__close(obj);
}
+#include "tailcall_bpf2bpf4.skel.h"
+
/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
* across tailcalls combined with bpf2bpf calls. for making sure that tailcall
* counter behaves correctly, bpf program will go through following flow:
@@ -727,22 +842,31 @@ out:
* the loop begins. At the end of the test make sure that the global counter is
* equal to 31, because tailcall counter includes the first two tailcalls
* whereas global counter is incremented only on loop presented on flow above.
+ *
+ * The noise parameter is used to insert bpf_map_update calls into the logic
+ * to force verifier to patch instructions. This allows us to ensure jump
+ * logic remains correct with instruction movement.
*/
-static void test_tailcall_bpf2bpf_4(void)
+static void test_tailcall_bpf2bpf_4(bool noise)
{
- int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ int err, map_fd, prog_fd, main_fd, data_fd, i;
+ struct tailcall_bpf2bpf4__bss val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
-
- err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf4.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -758,10 +882,10 @@ static void test_tailcall_bpf2bpf_4(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -774,28 +898,761 @@ static void test_tailcall_bpf2bpf_4(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
-
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
+
+ i = 0;
+ val.noise = noise;
+ val.count = 0;
+ err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val.count, 31, "tailcall count");
+
+out:
+ bpf_object__close(obj);
+}
+
+#include "tailcall_bpf2bpf6.skel.h"
+
+/* Tail call counting works even when there is data on stack which is
+ * not aligned to 8 bytes.
+ */
+static void test_tailcall_bpf2bpf_6(void)
+{
+ struct tailcall_bpf2bpf6 *obj;
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ obj = tailcall_bpf2bpf6__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "open and load"))
return;
+ main_fd = bpf_program__fd(obj->progs.entry);
+ if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+ goto out;
+
+ map_fd = bpf_map__fd(obj->maps.jmp_table);
+ if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+ goto out;
+
+ prog_fd = bpf_program__fd(obj->progs.classifier_0);
+ if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "jmp_table map update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "entry prog test run");
+ ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+ data_fd = bpf_map__fd(obj->maps.bss);
+ if (!ASSERT_GE(data_fd, 0, "bss map fd"))
+ goto out;
+
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "bss map lookup");
+ ASSERT_EQ(val, 1, "done flag is set");
out:
+ tailcall_bpf2bpf6__destroy(obj);
+}
+
+/* test_tailcall_bpf2bpf_fentry checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, false);
+}
+
+/* test_tailcall_bpf2bpf_fexit checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", false, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf call is traced by both fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_fentry_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_entry checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf caller is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry_entry(void)
+{
+ struct bpf_object *tgt_obj = NULL, *fentry_obj = NULL;
+ int err, map_fd, prog_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_link *fentry_link = NULL;
+ struct bpf_program *prog;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS,
+ &tgt_obj, &prog_fd);
+ if (!ASSERT_OK(err, "load tgt_obj"))
+ return;
+
+ prog_array = bpf_object__find_map_by_name(tgt_obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_FALSE(map_fd < 0, "find jmp_table map fd"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(tgt_obj, "classifier_0");
+ if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_FALSE(prog_fd < 0, "find classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd, "classifier_0");
+ if (!ASSERT_OK(err, "set_attach_target classifier_0"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(tgt_obj, "tailcall.bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0, "find tailcall.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 34, "tailcall count");
+
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 1, "fentry count");
+
+out:
+ bpf_link__destroy(fentry_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(tgt_obj);
+}
+
+#define JMP_TABLE "/sys/fs/bpf/jmp_table"
+
+static int poke_thread_exit;
+
+static void *poke_update(void *arg)
+{
+ __u32 zero = 0, prog1_fd, prog2_fd, map_fd;
+ struct tailcall_poke *call = arg;
+
+ map_fd = bpf_map__fd(call->maps.jmp_table);
+ prog1_fd = bpf_program__fd(call->progs.call1);
+ prog2_fd = bpf_program__fd(call->progs.call2);
+
+ while (!poke_thread_exit) {
+ bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY);
+ bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY);
+ }
+
+ return NULL;
+}
+
+/*
+ * We are trying to hit prog array update during another program load
+ * that shares the same prog array map.
+ *
+ * For that we share the jmp_table map between two skeleton instances
+ * by pinning the jmp_table to same path. Then first skeleton instance
+ * periodically updates jmp_table in 'poke update' thread while we load
+ * the second skeleton instance in the main thread.
+ */
+static void test_tailcall_poke(void)
+{
+ struct tailcall_poke *call, *test;
+ int err, cnt = 10;
+ pthread_t thread;
+
+ unlink(JMP_TABLE);
+
+ call = tailcall_poke__open_and_load();
+ if (!ASSERT_OK_PTR(call, "tailcall_poke__open"))
+ return;
+
+ err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = pthread_create(&thread, NULL, poke_update, call);
+ if (!ASSERT_OK(err, "new toggler"))
+ goto out;
+
+ while (cnt--) {
+ test = tailcall_poke__open();
+ if (!ASSERT_OK_PTR(test, "tailcall_poke__open"))
+ break;
+
+ err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE);
+ if (!ASSERT_OK(err, "bpf_map__pin")) {
+ tailcall_poke__destroy(test);
+ break;
+ }
+
+ bpf_program__set_autoload(test->progs.test, true);
+ bpf_program__set_autoload(test->progs.call1, false);
+ bpf_program__set_autoload(test->progs.call2, false);
+
+ err = tailcall_poke__load(test);
+ tailcall_poke__destroy(test);
+ if (!ASSERT_OK(err, "tailcall_poke__load"))
+ break;
+ }
+
+ poke_thread_exit = 1;
+ ASSERT_OK(pthread_join(thread, NULL), "pthread_join");
+
+out:
+ bpf_map__unpin(call->maps.jmp_table, JMP_TABLE);
+ tailcall_poke__destroy(call);
+}
+
+static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
+ bool test_fexit,
+ bool test_fentry_entry)
+{
+ int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val;
+ struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+ struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
+ struct bpf_program *prog, *fentry_prog;
+ struct bpf_map *prog_array, *data_map;
+ int fentry_prog_fd;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (!ASSERT_OK(err, "load obj"))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (!ASSERT_OK_PTR(prog, "find entry prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto out;
+
+ if (test_fentry_entry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_hierarchy_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ fentry_prog = bpf_object__find_program_by_name(fentry_obj,
+ "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fentry_prog, prog_fd,
+ "entry");
+ if (!ASSERT_OK(err, "set_attach_target entry"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(fentry_prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+
+ fentry_prog_fd = bpf_program__fd(fentry_prog);
+ if (!ASSERT_GE(fentry_prog_fd, 0, "fentry_prog_fd"))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(fentry_obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &fentry_prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find data_map"))
+ goto out;
+
+ } else {
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find data_map"))
+ goto out;
+ }
+
+ if (test_fentry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+ }
+
+ if (test_fexit) {
+ fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+ if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fexit_obj);
+ if (!ASSERT_OK(err, "load fexit_obj"))
+ goto out;
+
+ fexit_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ main_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(main_data_fd, 0, "main_data_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(main_data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 34, "tailcall count");
+
+ if (test_fentry) {
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ fentry_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(fentry_data_fd, 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(fentry_data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 68, "fentry count");
+ }
+
+ if (test_fexit) {
+ data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fexit.bss map"))
+ goto out;
+
+ fexit_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(fexit_data_fd, 0,
+ "find tailcall_bpf2bpf_fexit.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(fexit_data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 68, "fexit count");
+ }
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (!ASSERT_OK(err, "delete_elem from jmp_table"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ i = 0;
+ err = bpf_map_lookup_elem(main_data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 35, "tailcall count");
+
+ if (test_fentry) {
+ i = 0;
+ err = bpf_map_lookup_elem(fentry_data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 70, "fentry count");
+ }
+
+ if (test_fexit) {
+ i = 0;
+ err = bpf_map_lookup_elem(fexit_data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 70, "fexit count");
+ }
+
+out:
+ bpf_link__destroy(fentry_link);
+ bpf_link__destroy(fexit_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(fexit_obj);
bpf_object__close(obj);
}
+/* test_tailcall_bpf2bpf_hierarchy_1 checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcalls are preceded
+ * with two bpf2bpf calls.
+ *
+ * subprog --tailcall-> entry
+ * entry <
+ * subprog --tailcall-> entry
+ */
+static void test_tailcall_bpf2bpf_hierarchy_1(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ false, false, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry checks that the count value of the
+ * tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls, and the two subprogs are traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ true, false, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcalls are preceded
+ * with two bpf2bpf calls, and the two subprogs are traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fexit(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ false, true, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry_fexit checks that the count value of
+ * the tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls, and the two subprogs are traced by both
+ * fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry_fexit(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ true, true, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry_entry checks that the count value of
+ * the tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls in fentry.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry_entry(void)
+{
+ test_tailcall_hierarchy_count("tc_dummy.bpf.o", false, false, true);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_2 checks that the count value of the tail
+ * call limit enforcement matches with expectations:
+ *
+ * subprog_tail0 --tailcall-> classifier_0 -> subprog_tail0
+ * entry <
+ * subprog_tail1 --tailcall-> classifier_1 -> subprog_tail1
+ */
+static void test_tailcall_bpf2bpf_hierarchy_2(void)
+{
+ RUN_TESTS(tailcall_bpf2bpf_hierarchy2);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_3 checks that the count value of the tail
+ * call limit enforcement matches with expectations:
+ *
+ * subprog with jmp_table0 to classifier_0
+ * entry --tailcall-> classifier_0 <
+ * subprog with jmp_table1 to classifier_0
+ */
+static void test_tailcall_bpf2bpf_hierarchy_3(void)
+{
+ RUN_TESTS(tailcall_bpf2bpf_hierarchy3);
+}
+
+/* test_tailcall_freplace checks that the freplace prog fails to update the
+ * prog_array map, no matter whether the freplace prog attaches to its target.
+ */
+static void test_tailcall_freplace(void)
+{
+ struct tailcall_freplace *freplace_skel = NULL;
+ struct bpf_link *freplace_link = NULL;
+ struct bpf_program *freplace_prog;
+ struct tc_bpf2bpf *tc_skel = NULL;
+ int prog_fd, tc_prog_fd, map_fd;
+ char buff[128] = {};
+ int err, key;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ freplace_skel = tailcall_freplace__open();
+ if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open"))
+ return;
+
+ tc_skel = tc_bpf2bpf__open_and_load();
+ if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load"))
+ goto out;
+
+ tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc);
+ freplace_prog = freplace_skel->progs.entry_freplace;
+ err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd,
+ "subprog_tc");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = tailcall_freplace__load(freplace_skel);
+ if (!ASSERT_OK(err, "tailcall_freplace__load"))
+ goto out;
+
+ map_fd = bpf_map__fd(freplace_skel->maps.jmp_table);
+ prog_fd = bpf_program__fd(freplace_prog);
+ key = 0;
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ ASSERT_ERR(err, "update jmp_table failure");
+
+ freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd,
+ "subprog_tc");
+ if (!ASSERT_OK_PTR(freplace_link, "attach_freplace"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ ASSERT_ERR(err, "update jmp_table failure");
+
+out:
+ bpf_link__destroy(freplace_link);
+ tailcall_freplace__destroy(freplace_skel);
+ tc_bpf2bpf__destroy(tc_skel);
+}
+
+/* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail
+ * callee prog with freplace prog or fails to update an extended prog to
+ * prog_array map.
+ */
+static void test_tailcall_bpf2bpf_freplace(void)
+{
+ struct tailcall_freplace *freplace_skel = NULL;
+ struct bpf_link *freplace_link = NULL;
+ struct tc_bpf2bpf *tc_skel = NULL;
+ char buff[128] = {};
+ int prog_fd, map_fd;
+ int err, key;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ tc_skel = tc_bpf2bpf__open_and_load();
+ if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(tc_skel->progs.entry_tc);
+ freplace_skel = tailcall_freplace__open();
+ if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open"))
+ goto out;
+
+ err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace,
+ prog_fd, "subprog_tc");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = tailcall_freplace__load(freplace_skel);
+ if (!ASSERT_OK(err, "tailcall_freplace__load"))
+ goto out;
+
+ /* OK to attach then detach freplace prog. */
+
+ freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace,
+ prog_fd, "subprog_tc");
+ if (!ASSERT_OK_PTR(freplace_link, "attach_freplace"))
+ goto out;
+
+ err = bpf_link__destroy(freplace_link);
+ freplace_link = NULL;
+ if (!ASSERT_OK(err, "destroy link"))
+ goto out;
+
+ /* OK to update prog_array map then delete element from the map. */
+
+ key = 0;
+ map_fd = bpf_map__fd(freplace_skel->maps.jmp_table);
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &key);
+ if (!ASSERT_OK(err, "delete_elem from jmp_table"))
+ goto out;
+
+ /* Fail to attach a tail callee prog with freplace prog. */
+
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace,
+ prog_fd, "subprog_tc");
+ if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure"))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &key);
+ if (!ASSERT_OK(err, "delete_elem from jmp_table"))
+ goto out;
+
+ /* Fail to update an extended prog to prog_array map. */
+
+ freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace,
+ prog_fd, "subprog_tc");
+ if (!ASSERT_OK_PTR(freplace_link, "attach_freplace"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (!ASSERT_ERR(err, "update jmp_table failure"))
+ goto out;
+
+out:
+ bpf_link__destroy(freplace_link);
+ tailcall_freplace__destroy(freplace_skel);
+ tc_bpf2bpf__destroy(tc_skel);
+}
+
+static void test_tailcall_failure()
+{
+ RUN_TESTS(tailcall_fail);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -808,6 +1665,8 @@ void test_tailcalls(void)
test_tailcall_4();
if (test__start_subtest("tailcall_5"))
test_tailcall_5();
+ if (test__start_subtest("tailcall_6"))
+ test_tailcall_6();
if (test__start_subtest("tailcall_bpf2bpf_1"))
test_tailcall_bpf2bpf_1();
if (test__start_subtest("tailcall_bpf2bpf_2"))
@@ -815,5 +1674,37 @@ void test_tailcalls(void)
if (test__start_subtest("tailcall_bpf2bpf_3"))
test_tailcall_bpf2bpf_3();
if (test__start_subtest("tailcall_bpf2bpf_4"))
- test_tailcall_bpf2bpf_4();
+ test_tailcall_bpf2bpf_4(false);
+ if (test__start_subtest("tailcall_bpf2bpf_5"))
+ test_tailcall_bpf2bpf_4(true);
+ if (test__start_subtest("tailcall_bpf2bpf_6"))
+ test_tailcall_bpf2bpf_6();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry"))
+ test_tailcall_bpf2bpf_fentry();
+ if (test__start_subtest("tailcall_bpf2bpf_fexit"))
+ test_tailcall_bpf2bpf_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_fexit"))
+ test_tailcall_bpf2bpf_fentry_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
+ test_tailcall_bpf2bpf_fentry_entry();
+ if (test__start_subtest("tailcall_poke"))
+ test_tailcall_poke();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_1"))
+ test_tailcall_bpf2bpf_hierarchy_1();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry"))
+ test_tailcall_bpf2bpf_hierarchy_fentry();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fexit"))
+ test_tailcall_bpf2bpf_hierarchy_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_fexit"))
+ test_tailcall_bpf2bpf_hierarchy_fentry_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_entry"))
+ test_tailcall_bpf2bpf_hierarchy_fentry_entry();
+ test_tailcall_bpf2bpf_hierarchy_2();
+ test_tailcall_bpf2bpf_hierarchy_3();
+ if (test__start_subtest("tailcall_freplace"))
+ test_tailcall_freplace();
+ if (test__start_subtest("tailcall_bpf2bpf_freplace"))
+ test_tailcall_bpf2bpf_freplace();
+ if (test__start_subtest("tailcall_failure"))
+ test_tailcall_failure();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 1bdc1d86a50c..3d34bab01e48 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -3,7 +3,7 @@
void test_task_fd_query_rawtp(void)
{
- const char *file = "./test_get_stack_rawtp.o";
+ const char *file = "./test_get_stack_rawtp.bpf.o";
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
struct bpf_object *obj;
@@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void)
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index 3f131b8fe328..c91eda624657 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -4,7 +4,7 @@
static void test_task_fd_query_tp_core(const char *probe_name,
const char *tp_name)
{
- const char *file = "./test_tracepoint.o";
+ const char *file = "./test_tracepoint.bpf.o";
int err, bytes, efd, prog_fd, pmu_fd;
struct perf_event_attr attr = {};
__u64 probe_offset, probe_addr;
@@ -13,12 +13,17 @@ static void test_task_fd_query_tp_core(const char *probe_name,
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto close_prog;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/%s/id", probe_name);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
new file mode 100644
index 000000000000..83b90335967a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <unistd.h>
+
+#include "task_kfunc_failure.skel.h"
+#include "task_kfunc_success.skel.h"
+
+static struct task_kfunc_success *open_load_task_kfunc_skel(void)
+{
+ struct task_kfunc_success *skel;
+ int err;
+
+ skel = task_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = task_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ task_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct task_kfunc_success *skel;
+ int status;
+ pid_t child_pid;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_task_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_spawn_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ task_kfunc_success__destroy(skel);
+}
+
+static int run_vpid_test(void *prog_name)
+{
+ struct task_kfunc_success *skel;
+ struct bpf_program *prog;
+ int prog_fd, err = 0;
+
+ if (getpid() != 1)
+ return 1;
+
+ skel = open_load_task_kfunc_skel();
+ if (!skel)
+ return 2;
+
+ if (skel->bss->err) {
+ err = 3;
+ goto cleanup;
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!prog) {
+ err = 4;
+ goto cleanup;
+ }
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ err = 5;
+ goto cleanup;
+ }
+
+ if (bpf_prog_test_run_opts(prog_fd, NULL)) {
+ err = 6;
+ goto cleanup;
+ }
+
+ if (skel->bss->err)
+ err = 7 + skel->bss->err;
+cleanup:
+ task_kfunc_success__destroy(skel);
+ return err;
+}
+
+static void run_vpid_success_test(const char *prog_name)
+{
+ const int stack_size = 1024 * 1024;
+ int child_pid, wstatus;
+ char *stack;
+
+ stack = (char *)malloc(stack_size);
+ if (!ASSERT_OK_PTR(stack, "clone_stack"))
+ return;
+
+ child_pid = clone(run_vpid_test, stack + stack_size,
+ CLONE_NEWPID | SIGCHLD, (void *)prog_name);
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+
+ if (!ASSERT_GT(waitpid(child_pid, &wstatus, 0), -1, "waitpid"))
+ goto cleanup;
+
+ if (WEXITSTATUS(wstatus) > 7)
+ ASSERT_OK(WEXITSTATUS(wstatus) - 7, "vpid_test_failure");
+ else
+ ASSERT_OK(WEXITSTATUS(wstatus), "run_vpid_test_err");
+cleanup:
+ free(stack);
+}
+
+static const char * const success_tests[] = {
+ "test_task_acquire_release_argument",
+ "test_task_acquire_release_current",
+ "test_task_acquire_leave_in_map",
+ "test_task_xchg_release",
+ "test_task_map_acquire_release",
+ "test_task_current_acquire_release",
+ "test_task_from_pid_arg",
+ "test_task_from_pid_current",
+ "test_task_from_pid_invalid",
+ "task_kfunc_acquire_trusted_walked",
+ "test_task_kfunc_flavor_relo",
+ "test_task_kfunc_flavor_relo_not_found",
+};
+
+static const char * const vpid_success_tests[] = {
+ "test_task_from_vpid_current",
+ "test_task_from_vpid_invalid",
+};
+
+void test_task_kfunc(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) {
+ if (!test__start_subtest(vpid_success_tests[i]))
+ continue;
+
+ run_vpid_success_test(vpid_success_tests[i]);
+ }
+
+ RUN_TESTS(task_kfunc_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
new file mode 100644
index 000000000000..2de38776a2d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_H
+#define __TASK_LOCAL_DATA_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+#include <pthread.h>
+#endif
+
+#include <bpf/bpf.h>
+
+/*
+ * OPTIONS
+ *
+ * Define the option before including the header
+ *
+ * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
+ *
+ * Thread-specific memory for storing TLD is allocated lazily on the first call to
+ * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
+ * to prevent memory leak. Pthread will be included if the option is defined. A pthread
+ * key will be registered with a destructor that calls tld_free().
+ *
+ *
+ * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
+ * (default: 64 bytes)
+ *
+ * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
+ * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
+ * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
+ * for these TLDs. This additional memory is allocated for every thread that calls
+ * tld_get_data() even if no tld_create_key are actually called, so be mindful of
+ * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
+ * will be allocated for TLDs created with it.
+ *
+ *
+ * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
+ *
+ * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
+ * TLD_MAX_DATA_CNT.
+ *
+ *
+ * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ *
+ * When allocating the memory for storing TLDs, we need to make sure there is a memory
+ * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
+ * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
+ * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
+ * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
+ * as not all memory allocator can use the exact amount of memory requested to fulfill
+ * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
+ * option to always use aligned_alloc() if the implementation has low memory overhead.
+ */
+
+#define TLD_PAGE_SIZE getpagesize()
+#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#ifndef TLD_DYN_DATA_SIZE
+#define TLD_DYN_DATA_SIZE 64
+#endif
+
+#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ _Atomic __u16 size;
+};
+
+struct tld_meta_u {
+ _Atomic __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[];
+};
+
+struct tld_map_value {
+ void *data;
+ struct tld_meta_u *meta;
+};
+
+struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
+__thread struct tld_data_u *tld_data_p __attribute__((weak));
+__thread void *tld_data_alloc_p __attribute__((weak));
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+pthread_key_t tld_pthread_key __attribute__((weak));
+
+static void tld_free(void);
+
+static void __tld_thread_exit_handler(void *unused)
+{
+ tld_free();
+}
+#endif
+
+static int __tld_init_meta_p(void)
+{
+ struct tld_meta_u *meta, *uninit = NULL;
+ int err = 0;
+
+ meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
+ if (!meta) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memset(meta, 0, TLD_PAGE_SIZE);
+ meta->size = TLD_DYN_DATA_SIZE;
+
+ if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
+ free(meta);
+ goto out;
+ }
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+#endif
+out:
+ return err;
+}
+
+static int __tld_init_data_p(int map_fd)
+{
+ bool use_aligned_alloc = false;
+ struct tld_map_value map_val;
+ struct tld_data_u *data;
+ void *data_alloc = NULL;
+ int err, tid_fd = -1;
+
+ tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
+ if (tid_fd < 0) {
+ err = -errno;
+ goto out;
+ }
+
+#ifdef TLD_DATA_USE_ALIGNED_ALLOC
+ use_aligned_alloc = true;
+#endif
+
+ /*
+ * tld_meta_p->size = TLD_DYN_DATA_SIZE +
+ * total size of TLDs defined via TLD_DEFINE_KEY()
+ */
+ data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
+ aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
+ malloc(tld_meta_p->size * 2);
+ if (!data_alloc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
+ * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
+ * enough memory.
+ */
+ if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
+ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
+ data = data_alloc;
+ data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
+ offsetof(struct tld_data_u, data);
+ } else {
+ map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data->start = offsetof(struct tld_data_u, data);
+ }
+ map_val.meta = TLD_READ_ONCE(tld_meta_p);
+
+ err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
+ if (err) {
+ free(data_alloc);
+ goto out;
+ }
+
+ tld_data_p = data;
+ tld_data_alloc_p = data_alloc;
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_setspecific(tld_pthread_key, (void *)1);
+#endif
+out:
+ if (tid_fd >= 0)
+ close(tid_fd);
+ return err;
+}
+
+static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
+{
+ int err, i, sz, off = 0;
+ __u8 cnt;
+
+ if (!TLD_READ_ONCE(tld_meta_p)) {
+ err = __tld_init_meta_p();
+ if (err)
+ return (tld_key_t){err};
+ }
+
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+retry:
+ cnt = atomic_load(&tld_meta_p->cnt);
+ if (i < cnt) {
+ /* A metadata is not ready until size is updated with a non-zero value */
+ while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
+ sched_yield();
+
+ if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
+ return (tld_key_t){-EEXIST};
+
+ off += TLD_ROUND_UP(sz, 8);
+ continue;
+ }
+
+ /*
+ * TLD_DEFINE_KEY() is given memory upto a page while at most
+ * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
+ */
+ if (dyn_data) {
+ if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
+ return (tld_key_t){-E2BIG};
+ } else {
+ if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
+ return (tld_key_t){-E2BIG};
+ tld_meta_p->size += TLD_ROUND_UP(size, 8);
+ }
+
+ /*
+ * Only one tld_create_key() can increase the current cnt by one and
+ * takes the latest available slot. Other threads will check again if a new
+ * TLD can still be added, and then compete for the new slot after the
+ * succeeding thread update the size.
+ */
+ if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
+ goto retry;
+
+ strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+ atomic_store(&tld_meta_p->metadata[i].size, size);
+ return (tld_key_t){(__s16)off};
+ }
+
+ return (tld_key_t){-ENOSPC};
+}
+
+/**
+ * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
+ *
+ * @name: The name of the TLD
+ * @size: The size of the TLD
+ * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
+ *
+ * The macro can only be used in file scope.
+ *
+ * A global variable key of opaque type, tld_key_t, will be declared and initialized before
+ * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
+ * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
+ * bpf programs can also fetch the same key by name.
+ *
+ * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
+ * enough memory will be allocated for each thread on the first call to tld_get_data().
+ */
+#define TLD_DEFINE_KEY(key, name, size) \
+tld_key_t key; \
+ \
+__attribute__((constructor)) \
+void __tld_define_key_##key(void) \
+{ \
+ key = __tld_create_key(name, size, false); \
+}
+
+/**
+ * tld_create_key() - Create a TLD and return a key associated with the TLD.
+ *
+ * @name: The name the TLD
+ * @size: The size of the TLD
+ *
+ * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
+ * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
+ * locate the TLD. bpf programs can also fetch the same key by name.
+ *
+ * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
+ * not known statically or a TLD needs to be created conditionally)
+ *
+ * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
+ * created dynamically with tld_create_key(). Since only a user page is pinned to the
+ * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
+ * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
+ */
+__attribute__((unused))
+static tld_key_t tld_create_key(const char *name, size_t size)
+{
+ return __tld_create_key(name, size, true);
+}
+
+__attribute__((unused))
+static inline bool tld_key_is_err(tld_key_t key)
+{
+ return key.off < 0;
+}
+
+__attribute__((unused))
+static inline int tld_key_err_or_zero(tld_key_t key)
+{
+ return tld_key_is_err(key) ? key.off : 0;
+}
+
+/**
+ * tld_get_data() - Get a pointer to the TLD associated with the given key of the
+ * calling thread.
+ *
+ * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
+ * of task local data.
+ * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
+ *
+ * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
+ * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
+ * aligned.
+ *
+ * Threads that call tld_get_data() must call tld_free() on exit to prevent
+ * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
+ */
+__attribute__((unused))
+static void *tld_get_data(int map_fd, tld_key_t key)
+{
+ if (!TLD_READ_ONCE(tld_meta_p))
+ return NULL;
+
+ /* tld_data_p is allocated on the first invocation of tld_get_data() */
+ if (!tld_data_p && __tld_init_data_p(map_fd))
+ return NULL;
+
+ return tld_data_p->data + key.off;
+}
+
+/**
+ * tld_free() - Free task local data memory of the calling thread
+ *
+ * For the calling thread, all pointers to TLDs acquired before will become invalid.
+ *
+ * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
+ * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
+ * to free the memory automatically.
+ */
+__attribute__((unused))
+static void tld_free(void)
+{
+ if (tld_data_alloc_p) {
+ free(tld_data_alloc_p);
+ tld_data_alloc_p = NULL;
+ tld_data_p = NULL;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __TASK_LOCAL_DATA_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..42e822ea352f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "task_local_storage_helpers.h"
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
+#include "task_storage_nodeadlock.skel.h"
+#include "uptr_test_common.h"
+#include "task_ls_uptr.skel.h"
+#include "uptr_update_failure.skel.h"
+#include "uptr_failure.skel.h"
+#include "uptr_map_failure.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+ struct task_local_storage *skel;
+ int err;
+
+ skel = task_local_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = sys_gettid();
+
+ err = task_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ sys_gettid();
+ sys_gettid();
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+ struct task_local_storage_exit_creds *skel;
+ int err, run_count, sync_rcu_calls = 0;
+ const int MAX_SYNC_RCU_CALLS = 1000;
+
+ skel = task_local_storage_exit_creds__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_local_storage_exit_creds__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger at least one exit_creds() */
+ if (CHECK_FAIL(system("ls > /dev/null")))
+ goto out;
+
+ /* kern_sync_rcu is not enough on its own as the read section we want
+ * to wait for may start after we enter synchronize_rcu, so our call
+ * won't wait for the section to finish. Loop on the run counter
+ * as well to ensure the program has run.
+ */
+ do {
+ kern_sync_rcu();
+ run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
+ } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
+
+ ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
+ "sync_rcu count too high");
+ ASSERT_NEQ(run_count, 0, "run_count");
+ ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+ ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+ task_local_storage_exit_creds__destroy(skel);
+}
+
+static void test_recursion(void)
+{
+ int err, map_fd, prog_fd, task_fd;
+ struct task_ls_recursion *skel;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ long value;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
+ return;
+
+ skel = task_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ err = task_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ skel->bss->test_pid = getpid();
+ sys_gettid();
+ skel->bss->test_pid = 0;
+ task_ls_recursion__detach(skel);
+
+ /* Refer to the comment in BPF_PROG(on_update) for
+ * the explanation on the value 201 and 100.
+ */
+ map_fd = bpf_map__fd(skel->maps.map_a);
+ err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+ ASSERT_OK(err, "lookup map_a");
+ ASSERT_EQ(value, 201, "map_a value");
+ ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+
+ map_fd = bpf_map__fd(skel->maps.map_b);
+ err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+ ASSERT_OK(err, "lookup map_b");
+ ASSERT_EQ(value, 100, "map_b value");
+
+ prog_fd = bpf_program__fd(skel->progs.on_update);
+ memset(&info, 0, sizeof(info));
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+
+ prog_fd = bpf_program__fd(skel->progs.on_enter);
+ memset(&info, 0, sizeof(info));
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
+
+out:
+ close(task_fd);
+ task_ls_recursion__destroy(skel);
+}
+
+static bool stop;
+
+static void waitall(const pthread_t *tids, int nr)
+{
+ int i;
+
+ stop = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+}
+
+static void *sock_create_loop(void *arg)
+{
+ struct task_storage_nodeadlock *skel = arg;
+ int fd;
+
+ while (!stop) {
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ close(fd);
+ if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
+ stop = true;
+ }
+
+ return NULL;
+}
+
+static void test_nodeadlock(void)
+{
+ struct task_storage_nodeadlock *skel;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ const int nr_threads = 32;
+ pthread_t tids[nr_threads];
+ int i, prog_fd, err;
+ cpu_set_t old, new;
+
+ /* Pin all threads to one cpu to increase the chance of preemption
+ * in a sleepable bpf prog.
+ */
+ CPU_ZERO(&new);
+ CPU_SET(0, &new);
+ err = sched_getaffinity(getpid(), sizeof(old), &old);
+ if (!ASSERT_OK(err, "getaffinity"))
+ return;
+ err = sched_setaffinity(getpid(), sizeof(new), &new);
+ if (!ASSERT_OK(err, "setaffinity"))
+ return;
+
+ skel = task_storage_nodeadlock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto done;
+
+ /* Unnecessary recursion and deadlock detection are reproducible
+ * in the preemptible kernel.
+ */
+ if (!skel->kconfig->CONFIG_PREEMPTION) {
+ test__skip();
+ goto done;
+ }
+
+ err = task_storage_nodeadlock__attach(skel);
+ ASSERT_OK(err, "attach prog");
+
+ for (i = 0; i < nr_threads; i++) {
+ err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
+ if (err) {
+ /* Only assert once here to avoid excessive
+ * PASS printing during test failure.
+ */
+ ASSERT_OK(err, "pthread_create");
+ waitall(tids, i);
+ goto done;
+ }
+ }
+
+ /* With 32 threads, 1s is enough to reproduce the issue */
+ sleep(1);
+ waitall(tids, nr_threads);
+
+ info_len = sizeof(info);
+ prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
+
+ ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
+ ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
+
+done:
+ task_storage_nodeadlock__destroy(skel);
+ sched_setaffinity(getpid(), sizeof(old), &old);
+}
+
+static struct user_data udata __attribute__((aligned(16))) = {
+ .a = 1,
+ .b = 2,
+};
+
+static struct user_data udata2 __attribute__((aligned(16))) = {
+ .a = 3,
+ .b = 4,
+};
+
+static void check_udata2(int expected)
+{
+ udata2.result = udata2.nested_result = 0;
+ usleep(1);
+ ASSERT_EQ(udata2.result, expected, "udata2.result");
+ ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result");
+}
+
+static void test_uptr_basic(void)
+{
+ int map_fd, parent_task_fd, ev_fd;
+ struct value_type value = {};
+ struct task_ls_uptr *skel;
+ pid_t child_pid, my_tid;
+ __u64 ev_dummy_data = 1;
+ int err;
+
+ my_tid = sys_gettid();
+ parent_task_fd = sys_pidfd_open(my_tid, 0);
+ if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd"))
+ return;
+
+ ev_fd = eventfd(0, 0);
+ if (!ASSERT_OK_FD(ev_fd, "ev_fd")) {
+ close(parent_task_fd);
+ return;
+ }
+
+ skel = task_ls_uptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ map_fd = bpf_map__fd(skel->maps.datamap);
+ value.udata = &udata;
+ value.nested.udata = &udata;
+ err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "update_elem(udata)"))
+ goto out;
+
+ err = task_ls_uptr__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ child_pid = fork();
+ if (!ASSERT_NEQ(child_pid, -1, "fork"))
+ goto out;
+
+ /* Call syscall in the child process, but access the map value of
+ * the parent process in the BPF program to check if the user kptr
+ * is translated/mapped correctly.
+ */
+ if (child_pid == 0) {
+ /* child */
+
+ /* Overwrite the user_data in the child process to check if
+ * the BPF program accesses the user_data of the parent.
+ */
+ udata.a = 0;
+ udata.b = 0;
+
+ /* Wait for the parent to set child_pid */
+ read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
+ exit(0);
+ }
+
+ skel->bss->parent_pid = my_tid;
+ skel->bss->target_pid = child_pid;
+
+ write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
+
+ err = waitpid(child_pid, NULL, 0);
+ ASSERT_EQ(err, child_pid, "waitpid");
+ ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result");
+ ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result");
+
+ skel->bss->target_pid = my_tid;
+
+ /* update_elem: uptr changes from udata1 to udata2 */
+ value.udata = &udata2;
+ value.nested.udata = &udata2;
+ err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
+ if (!ASSERT_OK(err, "update_elem(udata2)"))
+ goto out;
+ check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
+
+ /* update_elem: uptr changes from udata2 uptr to NULL */
+ memset(&value, 0, sizeof(value));
+ err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
+ if (!ASSERT_OK(err, "update_elem(udata2)"))
+ goto out;
+ check_udata2(0);
+
+ /* update_elem: uptr changes from NULL to udata2 */
+ value.udata = &udata2;
+ value.nested.udata = &udata2;
+ err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
+ if (!ASSERT_OK(err, "update_elem(udata2)"))
+ goto out;
+ check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
+
+ /* Check if user programs can access the value of user kptrs
+ * through bpf_map_lookup_elem(). Make sure the kernel value is not
+ * leaked.
+ */
+ err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ ASSERT_EQ(value.udata, NULL, "value.udata");
+ ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata");
+
+ /* delete_elem */
+ err = bpf_map_delete_elem(map_fd, &parent_task_fd);
+ ASSERT_OK(err, "delete_elem(udata2)");
+ check_udata2(0);
+
+ /* update_elem: add uptr back to test map_free */
+ value.udata = &udata2;
+ value.nested.udata = &udata2;
+ err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
+ ASSERT_OK(err, "update_elem(udata2)");
+
+out:
+ task_ls_uptr__destroy(skel);
+ close(ev_fd);
+ close(parent_task_fd);
+}
+
+static void test_uptr_across_pages(void)
+{
+ int page_size = getpagesize();
+ struct value_type value = {};
+ struct task_ls_uptr *skel;
+ int err, task_fd, map_fd;
+ void *mem;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_OK_FD(task_fd, "task_fd"))
+ return;
+
+ mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) {
+ close(task_fd);
+ return;
+ }
+
+ skel = task_ls_uptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ map_fd = bpf_map__fd(skel->maps.datamap);
+ value.udata = mem + page_size - offsetof(struct user_data, b);
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
+ if (!ASSERT_ERR(err, "update_elem(udata)"))
+ goto out;
+ ASSERT_EQ(errno, EOPNOTSUPP, "errno");
+
+ value.udata = mem + page_size - sizeof(struct user_data);
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
+ ASSERT_OK(err, "update_elem(udata)");
+
+out:
+ task_ls_uptr__destroy(skel);
+ close(task_fd);
+ munmap(mem, page_size * 2);
+}
+
+static void test_uptr_update_failure(void)
+{
+ struct value_lock_type value = {};
+ struct uptr_update_failure *skel;
+ int err, task_fd, map_fd;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_OK_FD(task_fd, "task_fd"))
+ return;
+
+ skel = uptr_update_failure__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ map_fd = bpf_map__fd(skel->maps.datamap);
+
+ value.udata = &udata;
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK);
+ if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)"))
+ goto out;
+ ASSERT_EQ(errno, EOPNOTSUPP, "errno");
+
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST);
+ if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)"))
+ goto out;
+ ASSERT_EQ(errno, ENOENT, "errno");
+
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)"))
+ goto out;
+
+ value.udata = &udata2;
+ err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
+ if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)"))
+ goto out;
+ ASSERT_EQ(errno, EEXIST, "errno");
+
+out:
+ uptr_update_failure__destroy(skel);
+ close(task_fd);
+}
+
+static void test_uptr_map_failure(const char *map_name, int expected_errno)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, create_attr);
+ struct uptr_map_failure *skel;
+ struct bpf_map *map;
+ struct btf *btf;
+ int map_fd, err;
+
+ skel = uptr_map_failure__open();
+ if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open"))
+ return;
+
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ btf = bpf_object__btf(skel->obj);
+ err = btf__load_into_kernel(btf);
+ if (!ASSERT_OK(err, "btf__load_into_kernel"))
+ goto done;
+
+ create_attr.map_flags = bpf_map__map_flags(map);
+ create_attr.btf_fd = btf__fd(btf);
+ create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map);
+ create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map);
+ map_fd = bpf_map_create(bpf_map__type(map), map_name,
+ bpf_map__key_size(map), bpf_map__value_size(map),
+ 0, &create_attr);
+ if (ASSERT_ERR_FD(map_fd, "map_create"))
+ ASSERT_EQ(errno, expected_errno, "errno");
+ else
+ close(map_fd);
+
+done:
+ uptr_map_failure__destroy(skel);
+}
+
+void test_task_local_storage(void)
+{
+ if (test__start_subtest("sys_enter_exit"))
+ test_sys_enter_exit();
+ if (test__start_subtest("exit_creds"))
+ test_exit_creds();
+ if (test__start_subtest("recursion"))
+ test_recursion();
+ if (test__start_subtest("nodeadlock"))
+ test_nodeadlock();
+ if (test__start_subtest("uptr_basic"))
+ test_uptr_basic();
+ if (test__start_subtest("uptr_across_pages"))
+ test_uptr_across_pages();
+ if (test__start_subtest("uptr_update_failure"))
+ test_uptr_update_failure();
+ if (test__start_subtest("uptr_map_failure_e2big")) {
+ if (getpagesize() == PAGE_SIZE)
+ test_uptr_map_failure("large_uptr_map", E2BIG);
+ else
+ test__skip();
+ }
+ if (test__start_subtest("uptr_map_failure_size0"))
+ test_uptr_map_failure("empty_uptr_map", EINVAL);
+ if (test__start_subtest("uptr_map_failure_kstruct"))
+ test_uptr_map_failure("kstruct_uptr_map", EINVAL);
+ RUN_TESTS(uptr_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..f000734a3d1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_task_pt_regs.skel.h"
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
+}
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ ssize_t uprobe_offset;
+ bool match;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ return;
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ trigger_func();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
new file mode 100644
index 000000000000..626d76fe43a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_under_cgroup.skel.h"
+
+#define FOO "/foo"
+
+void test_task_under_cgroup(void)
+{
+ struct test_task_under_cgroup *skel;
+ int ret, foo;
+ pid_t pid;
+
+ foo = test__join_cgroup(FOO);
+ if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
+ return;
+
+ skel = test_task_under_cgroup__open();
+ if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
+ goto cleanup;
+
+ skel->rodata->local_pid = getpid();
+ skel->bss->remote_pid = getpid();
+ skel->rodata->cgid = get_cgroup_id(FOO);
+
+ ret = test_task_under_cgroup__load(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
+ goto cleanup;
+
+ /* First, attach the LSM program, and then it will be triggered when the
+ * TP_BTF program is attached.
+ */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "attach_lsm"))
+ goto cleanup;
+
+ skel->links.tp_btf_run = bpf_program__attach_trace(skel->progs.tp_btf_run);
+ if (!ASSERT_OK_PTR(skel->links.tp_btf_run, "attach_tp_btf"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0)
+ exit(0);
+
+ ret = (pid == -1);
+ if (ASSERT_OK(ret, "fork process"))
+ wait(NULL);
+
+ test_task_under_cgroup__detach(skel);
+
+ ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
+ "test task_under_cgroup");
+
+cleanup:
+ test_task_under_cgroup__destroy(skel);
+ close(foo);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
new file mode 100644
index 000000000000..450d17d91a56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work_stress.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdatomic.h>
+
+struct test_data {
+ int prog_fd;
+ atomic_int exit;
+};
+
+void *runner(void *test_data)
+{
+ struct test_data *td = test_data;
+ int err = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ while (!err && !atomic_load(&td->exit))
+ err = bpf_prog_test_run_opts(td->prog_fd, &opts);
+
+ return NULL;
+}
+
+static int get_env_int(const char *str, int def)
+{
+ const char *s = getenv(str);
+ char *end;
+ int retval;
+
+ if (!s || !*s)
+ return def;
+ errno = 0;
+ retval = strtol(s, &end, 10);
+ if (errno || *end || retval < 0)
+ return def;
+ return retval;
+}
+
+static void task_work_run(bool enable_delete)
+{
+ struct task_work_stress *skel;
+ struct bpf_program *scheduler, *deleter;
+ int nthreads = 16;
+ int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1);
+ pthread_t tid[nthreads], tid_del;
+ bool started[nthreads], started_del = false;
+ struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 };
+ int i, err;
+
+ skel = task_work_stress__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work");
+ bpf_program__set_autoload(scheduler, true);
+
+ deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work");
+ bpf_program__set_autoload(deleter, true);
+
+ err = task_work_stress__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ for (i = 0; i < nthreads; ++i)
+ started[i] = false;
+
+ td_sched.prog_fd = bpf_program__fd(scheduler);
+ for (i = 0; i < nthreads; ++i) {
+ if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started[i] = true;
+ }
+
+ if (enable_delete)
+ atomic_store(&td_del.exit, 0);
+
+ td_del.prog_fd = bpf_program__fd(deleter);
+ if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started_del = true;
+
+ /* Run stress test for some time */
+ sleep(test_time_s);
+
+cancel:
+ atomic_store(&td_sched.exit, 1);
+ atomic_store(&td_del.exit, 1);
+ for (i = 0; i < nthreads; ++i) {
+ if (started[i])
+ pthread_join(tid[i], NULL);
+ }
+
+ if (started_del)
+ pthread_join(tid_del, NULL);
+
+ ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled");
+ /* Some scheduling attempts should have failed due to contention */
+ ASSERT_GT(skel->bss->schedule_error, 0, "schedule error");
+
+ if (enable_delete) {
+ /* If delete thread is enabled, it has cancelled some callbacks */
+ ASSERT_GT(skel->bss->delete_success, 0, "delete success");
+ ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ } else {
+ /* Without delete thread number of scheduled callbacks is the same as fired */
+ ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ }
+
+cleanup:
+ task_work_stress__destroy(skel);
+}
+
+void test_task_work_stress(void)
+{
+ if (test__start_subtest("no_delete"))
+ task_work_run(false);
+ if (test__start_subtest("with_delete"))
+ task_work_run(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..48b55539331e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "cap_helpers.h"
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd) \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \
+ .flags = BPF_TC_F_REPLACE); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \
+ .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int ret;
+
+ ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(ret, "bpf_prog_get_info_by_fd"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ return ret;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+ opts.prog_id = 0;
+ opts.flags = BPF_TC_F_REPLACE;
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+ goto end;
+
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_query(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_query"))
+ goto end;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+end:
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_detach(hook, &opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+ int ret;
+
+ ret = bpf_tc_hook_create(NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+ return -EINVAL;
+
+ /* hook ifindex = 0 */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ /* hook ifindex < 0 */
+ inv_hook.ifindex = -1;
+
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ inv_hook.ifindex = LO_IFINDEX;
+
+ /* hook.attach_point invalid */
+ inv_hook.attach_point = 0xabcd;
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* hook.attach_point valid, but parent invalid */
+ inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_CUSTOM;
+ inv_hook.parent = 0;
+ /* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+ * attach_point of the hook.
+ */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* detach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_detach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+ }
+
+ /* query */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_query(NULL, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ /* when chain is not present, kernel returns -EINVAL */
+ ret = bpf_tc_query(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+ return -EINVAL;
+ }
+
+ /* attach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_attach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+ return -EINVAL;
+
+ opts_hp.flags = 42;
+ ret = bpf_tc_attach(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_pf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+ return -EINVAL;
+ opts_pf.prog_fd = opts_pf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_hf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+ return -EINVAL;
+ opts_hf.prog_fd = opts_hf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_f);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+ return -EINVAL;
+ opts_f.prog_fd = opts_f.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+ }
+
+ return 0;
+}
+
+void tc_bpf_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_INGRESS);
+ struct test_tc_bpf *skel = NULL;
+ bool hook_created = false;
+ int cls_fd, ret;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ return;
+
+ cls_fd = bpf_program__fd(skel->progs.cls);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (ret == 0)
+ hook_created = true;
+
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+ goto end;
+
+ hook.attach_point = BPF_TC_CUSTOM;
+ hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+ goto end;
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ ret = bpf_tc_hook_destroy(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ goto end;
+
+ hook.attach_point = BPF_TC_INGRESS;
+ hook.parent = 0;
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal egress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_api(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+end:
+ if (hook_created) {
+ hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&hook);
+ }
+ test_tc_bpf__destroy(skel);
+}
+
+void tc_bpf_non_root(void)
+{
+ struct test_tc_bpf *skel = NULL;
+ __u64 caps = 0;
+ int ret;
+
+ /* In case CAP_BPF and CAP_PERFMON is not set */
+ ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps);
+ if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin"))
+ return;
+ ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL);
+ if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
+ goto restore_cap;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ goto restore_cap;
+
+ test_tc_bpf__destroy(skel);
+
+restore_cap:
+ if (caps)
+ cap_enable_effective(caps, NULL);
+}
+
+void test_tc_bpf(void)
+{
+ if (test__start_subtest("tc_bpf_root"))
+ tc_bpf_root();
+ if (test__start_subtest("tc_bpf_non_root"))
+ tc_bpf_non_root();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
new file mode 100644
index 000000000000..74752233e779
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_change_tail.skel.h"
+#include "socket_helpers.h"
+
+#define LO_IFINDEX 1
+
+void test_tc_change_tail(void)
+{
+ LIBBPF_OPTS(bpf_tcx_opts, tcx_opts);
+ struct test_tc_change_tail *skel = NULL;
+ struct bpf_link *link;
+ int c1, p1;
+ char buf[2];
+ int ret;
+
+ skel = test_tc_change_tail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load"))
+ return;
+
+ link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX,
+ &tcx_opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx"))
+ goto destroy;
+
+ skel->links.change_tail = link;
+ ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1);
+ if (!ASSERT_OK(ret, "create_pair"))
+ goto destroy;
+
+ ret = xsend(p1, "Tr", 2, 0);
+ ASSERT_EQ(ret, 2, "xsend(p1)");
+ ret = recv(c1, buf, 2, 0);
+ ASSERT_EQ(ret, 2, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ ret = xsend(p1, "G", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 2, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+ ret = xsend(p1, "E", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 1, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+ ret = xsend(p1, "Z", 1, 0);
+ ASSERT_EQ(ret, 1, "xsend(p1)");
+ ret = recv(c1, buf, 1, 0);
+ ASSERT_EQ(ret, 1, "recv(c1)");
+ ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+ close(c1);
+ close(p1);
+destroy:
+ test_tc_change_tail__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
new file mode 100644
index 000000000000..d52a62af77bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef TC_HELPERS
+#define TC_HELPERS
+#include <test_progs.h>
+
+#ifndef loopback
+# define loopback 1
+#endif
+
+static inline __u32 ifindex_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ return link_info.tcx.ifindex;
+}
+
+static inline void __assert_mprog_count(int target, int expected, int ifindex)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(ifindex, target, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static inline void assert_mprog_count(int target, int expected)
+{
+ __assert_mprog_count(target, expected, loopback);
+}
+
+static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected)
+{
+ __assert_mprog_count(target, expected, ifindex);
+}
+
+static inline void tc_skel_reset_all_seen(struct test_tc_link *skel)
+{
+ memset(skel->bss, 0, sizeof(*skel->bss));
+}
+
+#endif /* TC_HELPERS */
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
new file mode 100644
index 000000000000..2186a24e7d8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -0,0 +1,1962 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <uapi/linux/pkt_sched.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+
+#include "netlink_helpers.h"
+#include "tc_helpers.h"
+
+void test_ns_tc_links_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+}
+
+static void test_tc_links_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid2, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid2, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_before(void)
+{
+ test_tc_links_before_target(BPF_TCX_INGRESS);
+ test_tc_links_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_after(void)
+{
+ test_tc_links_after_target(BPF_TCX_INGRESS);
+ test_tc_links_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 2;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_revision(void)
+{
+ test_tc_links_revision_target(BPF_TCX_INGRESS);
+ test_tc_links_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ bool hook_created = false, tc_attached = false;
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ assert_mprog_count(target, 1);
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_links_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, lid1, lid2;
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_id = pid1,
+ .expected_revision = 2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK | BPF_F_AFTER,
+ .relative_id = lid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_link__update_program(skel->links.tc2, skel->progs.tc3);
+ if (!ASSERT_OK(err, "link_update"))
+ goto cleanup;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "link_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1);
+ if (!ASSERT_OK(err, "link_update_self"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_replace(void)
+{
+ test_tc_links_replace_target(BPF_TCX_INGRESS);
+ test_tc_links_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.flags = BPF_F_BEFORE | BPF_F_AFTER;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = pid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = 42,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, 0, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = ~0,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ .relative_id = pid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_invalid(void)
+{
+ test_tc_links_invalid_target(BPF_TCX_INGRESS);
+ test_tc_links_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid1, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid1, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_prepend(void)
+{
+ test_tc_links_prepend_target(BPF_TCX_INGRESS);
+ test_tc_links_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_append(void)
+{
+ test_tc_links_append_target(BPF_TCX_INGRESS);
+ test_tc_links_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, pid3, pid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex");
+
+ test_tc_link__destroy(skel);
+ return;
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void test_ns_tc_links_dev_cleanup(void)
+{
+ test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ __u32 pid1, pid2, pid3;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc5, target),
+ 0, "tc5_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc6, target),
+ 0, "tc6_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc5));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc6));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+
+ link = bpf_program__attach_tcx(skel->progs.tc6, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc6 = link;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6");
+
+ err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4);
+ if (!ASSERT_OK(err, "link_update"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+ err = bpf_link__detach(skel->links.tc6);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 0);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup:
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_links_chain_mixed(void)
+{
+ test_tc_chain_mixed(BPF_TCX_INGRESS);
+ test_tc_chain_mixed(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_ingress(int target, bool chain_tc_old,
+ bool tcx_teardown_first)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts,
+ .handle = 1,
+ .priority = 1,
+ );
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .ifindex = loopback,
+ .attach_point = BPF_TC_CUSTOM,
+ .parent = TC_H_INGRESS,
+ );
+ bool hook_created = false, tc_attached = false;
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ ASSERT_OK(system("tc qdisc add dev lo ingress"), "add_ingress");
+ hook_created = true;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ assert_mprog_count(target, 1);
+ if (hook_created && tcx_teardown_first)
+ ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress");
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ test_tc_link__destroy(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ if (hook_created && !tcx_teardown_first)
+ ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress");
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_links_ingress(void)
+{
+ test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
+ test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
+ test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
+}
+
+struct qdisc_req {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[1024];
+};
+
+static int qdisc_replace(int ifindex, const char *kind, bool block)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct qdisc_req req;
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_NEWQDISC;
+ req.t.tcm_family = AF_UNSPEC;
+ req.t.tcm_ifindex = ifindex;
+ req.t.tcm_parent = 0xfffffff1;
+
+ addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
+ if (block)
+ addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+void test_ns_tc_links_dev_chain0(void)
+{
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
+ ifindex = if_nametoindex("foo");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+ err = qdisc_replace(ifindex, "ingress", true);
+ if (!ASSERT_OK(err, "attaching ingress"))
+ goto cleanup;
+ ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
+ err = qdisc_replace(ifindex, "clsact", false);
+ if (!ASSERT_OK(err, "attaching clsact"))
+ goto cleanup;
+ /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
+ * triggered the issue without the fix reliably 100% of the time.
+ */
+ sleep(5);
+ ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
+cleanup:
+ ASSERT_OK(system("ip link del dev foo"), "del veth");
+ ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
+ ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
+}
+
+static void test_tc_links_dev_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, pid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ tc_hook.ifindex = ifindex;
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex");
+
+ test_tc_link__destroy(skel);
+ return;
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void test_ns_tc_links_dev_mixed(void)
+{
+ test_tc_links_dev_mixed(BPF_TCX_INGRESS);
+ test_tc_links_dev_mixed(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
new file mode 100644
index 000000000000..2461d183dee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define netkit_peer "nk0"
+#define netkit_name "nk1"
+
+#define ping_addr_neigh 0x0a000002 /* 10.0.0.2 */
+#define ping_addr_noneigh 0x0a000003 /* 10.0.0.3 */
+
+#include "test_tc_link.skel.h"
+#include "netlink_helpers.h"
+#include "tc_helpers.h"
+
+#define NETKIT_HEADROOM 32
+#define NETKIT_TAILROOM 8
+
+#define MARK 42
+#define PRIO 0xeb9f
+#define ICMP_ECHO 8
+
+#define FLAG_ADJUST_ROOM (1 << 0)
+#define FLAG_SAME_NETNS (1 << 1)
+
+struct icmphdr {
+ __u8 type;
+ __u8 code;
+ __sum16 checksum;
+ struct {
+ __be16 id;
+ __be16 sequence;
+ } echo;
+};
+
+struct iplink_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+};
+
+static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
+ int scrub, int peer_scrub, __u32 flags)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct iplink_req req = {};
+ struct rtattr *linkinfo, *data;
+ const char *type = "netkit";
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name,
+ strlen(netkit_name));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ if (flags & FLAG_ADJUST_ROOM) {
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM);
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM);
+ }
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ *ifindex = if_nametoindex(netkit_name);
+
+ ASSERT_GT(*ifindex, 0, "retrieve_ifindex");
+ ASSERT_OK(system("ip netns add foo"), "create netns");
+ ASSERT_OK(system("ip link set dev " netkit_name " up"),
+ "up primary");
+ ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
+ "addr primary");
+
+ if (mode == NETKIT_L3) {
+ ASSERT_EQ(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512,
+ "set hwaddress");
+ } else {
+ ASSERT_OK(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd"),
+ "set hwaddress");
+ }
+ if (flags & FLAG_SAME_NETNS) {
+ ASSERT_OK(system("ip link set dev " netkit_peer " up"),
+ "up peer");
+ ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
+ "addr peer");
+ } else {
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+ }
+ return err;
+}
+
+static void move_netkit(void)
+{
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+}
+
+static void destroy_netkit(void)
+{
+ ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
+ ASSERT_OK(system("ip netns del foo"), "delete netns");
+ ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex");
+}
+
+static int __send_icmp(__u32 dest)
+{
+ int sock, ret, mark = MARK, prio = PRIO;
+ struct sockaddr_in addr;
+ struct icmphdr icmp;
+
+ ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
+ if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
+ return ret;
+
+ sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+ if (!ASSERT_GE(sock, 0, "icmp_socket"))
+ return -errno;
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ netkit_name, strlen(netkit_name) + 1);
+ if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
+ goto out;
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+ if (!ASSERT_OK(ret, "setsockopt(SO_MARK)"))
+ goto out;
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
+ &prio, sizeof(prio));
+ if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)"))
+ goto out;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(dest);
+
+ memset(&icmp, 0, sizeof(icmp));
+ icmp.type = ICMP_ECHO;
+ icmp.echo.id = 1234;
+ icmp.echo.sequence = 1;
+
+ ret = sendto(sock, &icmp, sizeof(icmp), 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (!ASSERT_GE(ret, 0, "icmp_sendto"))
+ ret = -errno;
+ else
+ ret = 0;
+out:
+ close(sock);
+ return ret;
+}
+
+static int send_icmp(void)
+{
+ return __send_icmp(ping_addr_neigh);
+}
+
+void serial_test_tc_netkit_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_multi_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ target), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ target), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_links(void)
+{
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, fd1, fd2;
+ __u32 prog_ids[3];
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
+ if (err)
+ return;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ pid1 = id_from_prog_fd(fd1);
+ pid2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd1;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_fd1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_fd2:
+ err = bpf_prog_detach_opts(fd2, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup_fd1:
+ err = bpf_prog_detach_opts(fd1, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_opts(void)
+{
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+void serial_test_tc_netkit_device(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex, ifindex2;
+
+ err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3,
+ BPF_NETKIT_PRIMARY), 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_neigh_links(void)
+{
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+}
+
+static void serial_test_tc_netkit_pkt_type_mode(int mode)
+{
+ LIBBPF_OPTS(bpf_netkit_opts, optl_nk);
+ LIBBPF_OPTS(bpf_tcx_opts, optl_tcx);
+ int err, ifindex, ifindex2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7,
+ BPF_TCX_INGRESS), 0, "tc7_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc7 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1);
+
+ move_netkit();
+
+ tc_skel_reset_all_seen(skel);
+ skel->bss->set_type = true;
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7");
+
+ ASSERT_EQ(skel->bss->seen_host, true, "seen_host");
+ ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_pkt_type(void)
+{
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
+}
+
+static void serial_test_tc_netkit_scrub_type(int scrub, bool room)
+{
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, scrub, scrub,
+ room ? FLAG_ADJUST_ROOM : 0);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8,
+ BPF_NETKIT_PRIMARY), 0, "tc8_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8");
+
+ link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc8 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
+ ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
+ ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
+ ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom");
+ ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_scrub(void)
+{
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false);
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
new file mode 100644
index 000000000000..dd7a138d8c3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -0,0 +1,2814 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+#include "tc_helpers.h"
+
+void test_ns_tc_opts_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[2];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, loopback, BPF_TCX_INGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_in;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd2, loopback, BPF_TCX_EGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_in;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_eg;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_eg:
+ err = bpf_prog_detach_opts(fd2, loopback, BPF_TCX_EGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_eg");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup_in:
+ err = bpf_prog_detach_opts(fd1, loopback, BPF_TCX_INGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_in");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+static void test_tc_opts_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_before(void)
+{
+ test_tc_opts_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_after(void)
+{
+ test_tc_opts_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[3];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, -ESTALE, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ESTALE, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_revision(void)
+{
+ test_tc_opts_revision_target(BPF_TCX_INGRESS);
+ test_tc_opts_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ bool hook_created = false, tc_attached = false;
+ __u32 fd1, fd2, fd3, id1, id2, id3;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd3;
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+cleanup_detach:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 0);
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_opts_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_opts_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, id1, id2, id3, detach_fd;
+ __u32 prog_ids[4], prog_flags[4];
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_attach_flags = prog_flags;
+ optq.prog_ids = prog_ids;
+
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd3;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_BEFORE,
+ .replace_prog_fd = fd3,
+ .relative_fd = fd1,
+ .expected_revision = 4,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER | BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 5,
+ );
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_replace(void)
+{
+ test_tc_opts_replace_target(BPF_TCX_INGRESS);
+ test_tc_opts_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ID,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach_x1");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_invalid(void)
+{
+ test_tc_opts_invalid_target(BPF_TCX_INGRESS);
+ test_tc_opts_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_prepend(void)
+{
+ test_tc_opts_prepend_target(BPF_TCX_INGRESS);
+ test_tc_opts_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_append(void)
+{
+ test_tc_opts_append_target(BPF_TCX_INGRESS);
+ test_tc_opts_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ err = bpf_prog_attach_opts(fd3, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ err = bpf_prog_attach_opts(fd4, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+ return;
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void test_ns_tc_opts_dev_cleanup(void)
+{
+ test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_mixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, pid4, lid2, lid4;
+ __u32 prog_flags[4], link_flags[4];
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, detach_fd;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ detach_fd = bpf_program__fd(skel->progs.tc1);
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EBUSY, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ detach_fd = bpf_program__fd(skel->progs.tc3);
+
+ assert_mprog_count(target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc4),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ optq.prog_ids = prog_ids;
+ optq.prog_attach_flags = prog_flags;
+ optq.link_ids = link_ids;
+ optq.link_attach_flags = link_flags;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(link_ids, 0, sizeof(link_ids));
+ memset(link_flags, 0, sizeof(link_flags));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup1;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.link_ids[0], 0, "link_ids[0]");
+ ASSERT_EQ(optq.link_attach_flags[0], 0, "link_flags[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.link_attach_flags[1], 0, "link_flags[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+ ASSERT_EQ(optq.link_ids[2], lid4, "link_ids[2]");
+ ASSERT_EQ(optq.link_attach_flags[2], 0, "link_flags[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_attach_flags[3], 0, "prog_flags[3]");
+ ASSERT_EQ(optq.link_ids[3], 0, "link_ids[3]");
+ ASSERT_EQ(optq.link_attach_flags[3], 0, "link_flags[3]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+cleanup1:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_opts_mixed(void)
+{
+ test_tc_opts_mixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_mixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_demixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ __u32 pid1, pid2;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -EBUSY, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+ goto cleanup;
+
+cleanup1:
+ err = bpf_prog_detach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_opts_demixed(void)
+{
+ test_tc_opts_demixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_demixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_detach(void)
+{
+ test_tc_opts_detach_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_detach_before(void)
+{
+ test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_detach_after(void)
+{
+ test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ int err;
+
+ assert_mprog_count(target, 0);
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ ASSERT_OK(err, "bpf_tc_hook_create");
+ assert_mprog_count(target, 0);
+ }
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ if (chain_tc_old) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ assert_mprog_count(target, 0);
+}
+
+void test_ns_tc_opts_delete_empty(void)
+{
+ test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
+ test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
+ test_tc_opts_delete_empty(BPF_TCX_INGRESS, true);
+ test_tc_opts_delete_empty(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_chain_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, id1, id2, id3;
+ struct test_tc_link *skel;
+ int err, detach_fd;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc4);
+ fd2 = bpf_program__fd(skel->progs.tc5);
+ fd3 = bpf_program__fd(skel->progs.tc6);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd2;
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup_hook;
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_filter;
+
+ detach_fd = fd3;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd3,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_opts;
+
+ detach_fd = fd1;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup_opts:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup_filter:
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+
+cleanup_hook:
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_chain_mixed(void)
+{
+ test_tc_chain_mixed(BPF_TCX_INGRESS);
+ test_tc_chain_mixed(BPF_TCX_EGRESS);
+}
+
+static int generate_dummy_prog(void)
+{
+ const struct bpf_insn prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t prog_insn_cnt = ARRAY_SIZE(prog_insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 256;
+ char log_buf[log_buf_sz];
+ int fd = -1;
+
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL",
+ prog_insns, prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "log_0");
+ ASSERT_GE(fd, 0, "prog_fd");
+ return fd;
+}
+
+static void test_tc_opts_max_target(int target, int flags, bool relative)
+{
+ int err, ifindex, i, prog_fd, last_fd = -1;
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ const int max_progs = 63;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ for (i = 0; i < max_progs; i++) {
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count_ifindex(ifindex, target, i + 1);
+ if (i == max_progs - 1 && relative)
+ last_fd = prog_fd;
+ else
+ close(prog_fd);
+ }
+
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ opta.flags = flags;
+ if (last_fd > 0)
+ opta.relative_fd = last_fd;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_64_attach");
+ assert_mprog_count_ifindex(ifindex, target, max_progs);
+ close(prog_fd);
+cleanup:
+ if (last_fd > 0)
+ close(last_fd);
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void test_ns_tc_opts_max(void)
+{
+ test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false);
+}
+
+static void test_tc_opts_query_target(int target)
+{
+ const size_t attr_size = offsetofend(union bpf_attr, query);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ union bpf_attr attr;
+ __u32 prog_ids[10];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ /* Test 1: Double query via libbpf API */
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids, NULL, "prog_ids");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+ /* Test 2: Double query via bpf_attr & bpf(2) directly */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 3: Query with smaller prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 2;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, ENOSPC, "prog_query_should_fail");
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 4: Query with larger prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 10;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 5: Query with NULL prog_ids array but with count > 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.count = sizeof(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 6: Query with non-NULL prog_ids array but with count == 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 7: Query with invalid flags */
+ attr.query.attach_flags = 0;
+ attr.query.query_flags = 1;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+ attr.query.attach_flags = 1;
+ attr.query.query_flags = 0;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_query(void)
+{
+ test_tc_opts_query_target(BPF_TCX_INGRESS);
+ test_tc_opts_query_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_query_attach_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct test_tc_link *skel;
+ __u32 prog_ids[2];
+ __u32 fd1, id1;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ id1 = id_from_prog_fd(fd1);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 0, "count");
+ ASSERT_EQ(optq.revision, 1, "revision");
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = optq.revision,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup1;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void test_ns_tc_opts_query_attach(void)
+{
+ test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
+ test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 000000000000..76d72a59365e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,1303 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "netlink_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+#include "test_tc_dtime.skel.h"
+
+#ifndef TCP_TX_DELAY
+#define TCP_TX_DELAY 37
+#endif
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
+#define IP4_PORT 9004
+
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
+
+#define TIMEOUT_MILLIS 10000
+#define NSEC_PER_SEC 1000000000ULL
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+static struct netns_obj *netns_objs[3];
+
+static int write_file(const char *path, const char *newval)
+{
+ FILE *f;
+
+ f = fopen(path, "r+");
+ if (!f)
+ return -1;
+ if (fwrite(newval, strlen(newval), 1, f) != 1) {
+ log_err("writing to %s failed", path);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+ struct netns_obj **ns_obj = netns_objs;
+ const char * const *ns = namespaces;
+
+ while (*ns) {
+ if (strcmp(verb, "add") == 0) {
+ *ns_obj = netns_new(*ns, false);
+ if (!ASSERT_OK_PTR(*ns_obj, "netns_new"))
+ return -1;
+ } else {
+ if (!ASSERT_OK_PTR(*ns_obj, "netns_obj is NULL"))
+ return -1;
+ netns_free(*ns_obj);
+ *ns_obj = NULL;
+ }
+ ns++;
+ ns_obj++;
+ }
+ return 0;
+}
+
+static void netns_setup_namespaces_nofail(const char *verb)
+{
+ struct netns_obj **ns_obj = netns_objs;
+ const char * const *ns = namespaces;
+
+ while (*ns) {
+ if (strcmp(verb, "add") == 0) {
+ *ns_obj = netns_new(*ns, false);
+ } else {
+ if (*ns_obj)
+ netns_free(*ns_obj);
+ *ns_obj = NULL;
+ }
+ ns++;
+ ns_obj++;
+ }
+}
+
+enum dev_mode {
+ MODE_VETH,
+ MODE_NETKIT,
+};
+
+struct netns_setup_result {
+ enum dev_mode dev_mode;
+ int ifindex_src;
+ int ifindex_src_fwd;
+ int ifindex_dst;
+ int ifindex_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+ char path[PATH_MAX];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+ if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int create_netkit(int mode, char *prim, char *peer)
+{
+ struct rtattr *linkinfo, *data, *peer_info;
+ struct rtnl_handle rth = { .fd = -1 };
+ const char *type = "netkit";
+ struct {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+ } req = {};
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+ req.n.nlmsg_len += sizeof(struct ifinfomsg);
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+ addattr_nest_end(&req.n, peer_info);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+ struct nstoken *nstoken = NULL;
+ char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ char src_addr[IFADDR_STR_LEN + 1] = {};
+ int err;
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip link add src address " MAC_SRC " type veth "
+ "peer name src_fwd address " MAC_SRC_FWD);
+ SYS(fail, "ip link add dst address " MAC_DST " type veth "
+ "peer name dst_fwd address " MAC_DST_FWD);
+ } else if (result->dev_mode == MODE_NETKIT) {
+ err = create_netkit(NETKIT_L3, "src", "src_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_src"))
+ goto fail;
+ err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_dst"))
+ goto fail;
+ }
+
+ if (get_ifaddr("src_fwd", src_fwd_addr))
+ goto fail;
+
+ if (get_ifaddr("src", src_addr))
+ goto fail;
+
+ result->ifindex_src = if_nametoindex("src");
+ if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
+ goto fail;
+
+ result->ifindex_src_fwd = if_nametoindex("src_fwd");
+ if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
+ goto fail;
+
+ result->ifindex_dst = if_nametoindex("dst");
+ if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
+ goto fail;
+
+ result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+ if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
+ goto fail;
+
+ SYS(fail, "ip link set src netns " NS_SRC);
+ SYS(fail, "ip link set src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst netns " NS_DST);
+
+ /** setup in 'src' namespace */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto fail;
+
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+ SYS(fail, "ip link set dev src up");
+
+ SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+ src_fwd_addr);
+ SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+ src_fwd_addr);
+ }
+
+ close_netns(nstoken);
+
+ /** setup in 'fwd' namespace */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto fail;
+
+ /* The fwd netns automatically gets a v6 LL address / routes, but also
+ * needs v4 one in order to start ARP probing. IP4_NET route is added
+ * to the endpoints so that the ARP processing will reply.
+ */
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+ SYS(fail, "ip link set dev src_fwd up");
+ SYS(fail, "ip link set dev dst_fwd up");
+
+ SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
+ SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
+ }
+
+ close_netns(nstoken);
+
+ /** setup in 'dst' namespace */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ goto fail;
+
+ SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+ SYS(fail, "ip link set dev dst up");
+ SYS(fail, "ip link set dev lo up");
+
+ SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+ }
+
+ close_netns(nstoken);
+
+ return 0;
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ return -1;
+}
+
+static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
+{
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->ifindex = ifindex;
+ qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(qdisc_hook);
+ snprintf(err_str, sizeof(err_str),
+ "qdisc add dev %s clsact",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
+ enum bpf_tc_attach_point xgress,
+ const struct bpf_program *prog, int priority)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->attach_point = xgress;
+ tc_attach.prog_fd = bpf_program__fd(prog);
+ tc_attach.priority = priority;
+ err = bpf_tc_attach(qdisc_hook, &tc_attach);
+ snprintf(err_str, sizeof(err_str),
+ "filter add dev %s %s prio %d bpf da %s",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
+ xgress == BPF_TC_INGRESS ? "ingress" : "egress",
+ priority, bpf_program__name(prog));
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
+ if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
+ goto fail; \
+})
+
+#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
+ if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
+ goto fail; \
+})
+
+static int netns_load_bpf(const struct bpf_program *src_prog,
+ const struct bpf_program *dst_prog,
+ const struct bpf_program *chk_prog,
+ const struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ int err;
+
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress bpf da src_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+ /* tc filter add dev src_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+ /* tc filter add dev dst_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+ int listen_fd = -1, accept_fd = -1, client_fd = -1;
+ char buf[] = "testing testing";
+ int n;
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+
+ listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ goto done;
+
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+
+ n = read(accept_fd, buf, sizeof(buf));
+ ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+ SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_connectivity(void)
+{
+ test_tcp(AF_INET, IP4_DST, IP4_PORT);
+ test_ping(AF_INET, IP4_DST);
+ test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+ test_ping(AF_INET6, IP6_DST);
+}
+
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
+static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
+{
+ struct timespec pkt_ts = {};
+ char ctl[CMSG_SPACE(sizeof(pkt_ts))];
+ struct timespec now_ts;
+ struct msghdr msg = {};
+ __u64 now_ns, pkt_ns;
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ char data[32];
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &ctl;
+ msg.msg_controllen = sizeof(ctl);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (!ASSERT_EQ(ret, s, "recvmsg"))
+ return -1;
+ ASSERT_STRNEQ(data, expected, s, "expected rcv data");
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMPNS)
+ memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
+
+ pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+ if (tstamp) {
+ /* caller will check the tstamp itself */
+ *tstamp = pkt_ns;
+ return 0;
+ }
+
+ ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
+
+ ret = clock_gettime(CLOCK_REALTIME, &now_ts);
+ ASSERT_OK(ret, "clock_gettime");
+ now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+ if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
+ ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
+ "check rcv tstamp");
+ return 0;
+}
+
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+ __rcv_tstamp(fd, expected, s, NULL);
+}
+
+static int wait_netstamp_needed_key(void)
+{
+ int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+ __u64 tstamp = 0;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return -1;
+
+ srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto done;
+
+ err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
+ goto done;
+
+again:
+ n = write(cli_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
+ if (!ASSERT_OK(err, "__rcv_tstamp"))
+ goto done;
+ if (!tstamp && nretries++ < 5) {
+ sleep(1);
+ printf("netstamp_needed_key retry#%d\n", nretries);
+ goto again;
+ }
+
+done:
+ if (!tstamp && srv_fd != -1) {
+ close(srv_fd);
+ srv_fd = -1;
+ }
+ if (cli_fd != -1)
+ close(cli_fd);
+ close_netns(nstoken);
+ return srv_fd;
+}
+
+static void snd_tstamp(int fd, char *b, size_t s)
+{
+ struct sock_txtime opt = { .clockid = CLOCK_TAI };
+ char ctl[CMSG_SPACE(sizeof(__u64))];
+ struct timespec now_ts;
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ __u64 now_ns;
+ int ret;
+
+ ret = clock_gettime(CLOCK_TAI, &now_ts);
+ ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
+ now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+ iov.iov_base = b;
+ iov.iov_len = s;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &ctl;
+ msg.msg_controllen = sizeof(ctl);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
+ *(__u64 *)CMSG_DATA(cmsg) = now_ns;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
+ ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
+
+ ret = sendmsg(fd, &msg, 0);
+ ASSERT_EQ(ret, s, "sendmsg");
+}
+
+static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
+{
+ int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+ listen_fd = start_server(family, type, addr, port, 0);
+ close_netns(nstoken);
+
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ return;
+
+ /* Ensure the kernel puts the (rcv) timestamp for all skb */
+ err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
+ goto done;
+
+ if (type == SOCK_STREAM) {
+ /* Ensure the kernel set EDT when sending out rst/ack
+ * from the kernel's ctl_sk.
+ */
+ err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
+ sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
+ goto done;
+ }
+
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ close_netns(nstoken);
+
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ if (type == SOCK_STREAM) {
+ int n;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ rcv_tstamp(accept_fd, buf, sizeof(buf));
+ } else {
+ snd_tstamp(client_fd, buf, sizeof(buf));
+ rcv_tstamp(listen_fd, buf, sizeof(buf));
+ }
+
+done:
+ close(listen_fd);
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (client_fd != -1)
+ close(client_fd);
+}
+
+static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
+ const struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
+ struct nstoken *nstoken;
+ int err;
+
+ /* setup ns_src tc progs */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return -1;
+ /* tc qdisc add dev src clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ /* tc filter add dev src ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev src egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ close_netns(nstoken);
+
+ /* setup ns_dst tc progs */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
+ return -1;
+ /* tc qdisc add dev dst clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+ /* tc filter add dev dst ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev dst egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ close_netns(nstoken);
+
+ /* setup ns_fwd tc progs */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ return -1;
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
+
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
+ close_netns(nstoken);
+ return 0;
+
+fail:
+ close_netns(nstoken);
+ return err;
+}
+
+enum {
+ INGRESS_FWDNS_P100,
+ INGRESS_FWDNS_P101,
+ EGRESS_FWDNS_P100,
+ EGRESS_FWDNS_P101,
+ INGRESS_ENDHOST,
+ EGRESS_ENDHOST,
+ SET_DTIME,
+ __MAX_CNT,
+};
+
+const char *cnt_names[] = {
+ "ingress_fwdns_p100",
+ "ingress_fwdns_p101",
+ "egress_fwdns_p100",
+ "egress_fwdns_p101",
+ "ingress_endhost",
+ "egress_endhost",
+ "set_dtime",
+};
+
+enum {
+ TCP_IP6_CLEAR_DTIME,
+ TCP_IP4,
+ TCP_IP6,
+ UDP_IP4,
+ UDP_IP6,
+ TCP_IP4_RT_FWD,
+ TCP_IP6_RT_FWD,
+ UDP_IP4_RT_FWD,
+ UDP_IP6_RT_FWD,
+ UKN_TEST,
+ __NR_TESTS,
+};
+
+const char *test_names[] = {
+ "tcp ip6 clear dtime",
+ "tcp ip4",
+ "tcp ip6",
+ "udp ip4",
+ "udp ip6",
+ "tcp ip4 rt fwd",
+ "tcp ip6 rt fwd",
+ "udp ip4 rt fwd",
+ "udp ip6 rt fwd",
+};
+
+static const char *dtime_cnt_str(int test, int cnt)
+{
+ static char name[64];
+
+ snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
+
+ return name;
+}
+
+static const char *dtime_err_str(int test, int cnt)
+{
+ static char name[64];
+
+ snprintf(name, sizeof(name), "%s %s errs", test_names[test],
+ cnt_names[cnt]);
+
+ return name;
+}
+
+static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
+{
+ int i, t = TCP_IP6_CLEAR_DTIME;
+ __u32 *dtimes = skel->bss->dtimes[t];
+ __u32 *errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
+
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P101));
+ ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, EGRESS_FWDNS_P100));
+ ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
+ dtime_cnt_str(t, EGRESS_FWDNS_P101));
+ ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
+ dtime_cnt_str(t, EGRESS_ENDHOST));
+ ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
+ dtime_cnt_str(t, INGRESS_ENDHOST));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+ __u32 *dtimes, *errs;
+ const char *addr;
+ int i, t;
+
+ if (family == AF_INET) {
+ t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
+ addr = IP4_DST;
+ } else {
+ t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
+ addr = IP6_DST;
+ }
+
+ dtimes = skel->bss->dtimes[t];
+ errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
+
+ /* fwdns_prio100 prog does not read delivery_time_type, so
+ * kernel puts the (rcv) timestamp in __sk_buff->tstamp
+ */
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
+ ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+ __u32 *dtimes, *errs;
+ const char *addr;
+ int i, t;
+
+ if (family == AF_INET) {
+ t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
+ addr = IP4_DST;
+ } else {
+ t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
+ addr = IP6_DST;
+ }
+
+ dtimes = skel->bss->dtimes[t];
+ errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
+
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
+ ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
+{
+ struct test_tc_dtime *skel;
+ struct nstoken *nstoken;
+ int hold_tstamp_fd, err;
+
+ /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
+ * is no delay in the kernel net_enable_timestamp().
+ * This ensures the following tests must have
+ * non zero rcv tstamp in the recvmsg().
+ */
+ hold_tstamp_fd = wait_netstamp_needed_key();
+ if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
+ return;
+
+ skel = test_tc_dtime__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_dtime__load(skel);
+ if (!ASSERT_OK(err, "test_tc_dtime__load"))
+ goto done;
+
+ if (netns_load_dtime_bpf(skel, setup_result))
+ goto done;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto done;
+ err = set_forwarding(false);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "disable forwarding"))
+ goto done;
+
+ test_tcp_clear_dtime(skel);
+
+ test_tcp_dtime(skel, AF_INET, true);
+ test_tcp_dtime(skel, AF_INET6, true);
+ test_udp_dtime(skel, AF_INET, true);
+ test_udp_dtime(skel, AF_INET6, true);
+
+ /* Test the kernel ip[6]_forward path instead
+ * of bpf_redirect_neigh().
+ */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto done;
+ err = set_forwarding(true);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "enable forwarding"))
+ goto done;
+
+ test_tcp_dtime(skel, AF_INET, false);
+ test_tcp_dtime(skel, AF_INET6, false);
+ test_udp_dtime(skel, AF_INET, false);
+ test_udp_dtime(skel, AF_INET6, false);
+
+done:
+ test_tc_dtime__destroy(skel);
+ close(hold_tstamp_fd);
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh_fib__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+ goto done;
+
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ /* bpf_fib_lookup() checks if forwarding is enabled */
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_neigh__load(skel);
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken;
+ struct test_tc_peer *skel;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS(fail, "ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd = -1;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return;
+
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
+
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
+
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
+
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = if_nametoindex("tun_fwd");
+ if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on dst_fwd to drop non-redirected packets.
+ */
+ /* tc qdisc add dev tun_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
+ /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
+ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
+
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+ /* tc filter add dev dst_fwd egress bpf da tc_chk */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+
+ /* Setup route and neigh tables */
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
+
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+#define RUN_TEST(name, mode) \
+ ({ \
+ struct netns_setup_result setup_result = { .dev_mode = mode, }; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ netns_setup_namespaces_nofail("delete");
+
+ RUN_TEST(tc_redirect_peer, MODE_VETH);
+ RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+ RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+ RUN_TEST(tc_redirect_neigh, MODE_VETH);
+ RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+ RUN_TEST(tc_redirect_dtime, MODE_VETH);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
new file mode 100644
index 000000000000..eaf441dc7e79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_custom_syncookie.skel.h"
+
+static struct test_tcp_custom_syncookie_case {
+ int family, type;
+ char addr[16];
+ char name[10];
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .addr = "127.0.0.1",
+ },
+ {
+ .name = "IPv6 TCP",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .addr = "::1",
+ },
+};
+
+static int setup_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "ip"))
+ goto err;
+
+ if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"),
+ "write_sysctl"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int setup_tc(struct test_tcp_custom_syncookie *skel)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie));
+
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+#define msg "Hello World"
+#define msglen 11
+
+static void transfer_message(int sender, int receiver)
+{
+ char buf[msglen];
+ int ret;
+
+ ret = send(sender, msg, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "send"))
+ return;
+
+ memset(buf, 0, sizeof(buf));
+
+ ret = recv(receiver, buf, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "recv"))
+ return;
+
+ ret = strncmp(buf, msg, msglen);
+ if (!ASSERT_EQ(ret, 0, "strncmp"))
+ return;
+}
+
+static void create_connection(struct test_tcp_custom_syncookie_case *test_case)
+{
+ int server, client, child;
+
+ server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0);
+ if (!ASSERT_NEQ(server, -1, "start_server"))
+ return;
+
+ client = connect_to_fd(server, 0);
+ if (!ASSERT_NEQ(client, -1, "connect_to_fd"))
+ goto close_server;
+
+ child = accept(server, NULL, 0);
+ if (!ASSERT_NEQ(child, -1, "accept"))
+ goto close_client;
+
+ transfer_message(client, child);
+ transfer_message(child, client);
+
+ close(child);
+close_client:
+ close(client);
+close_server:
+ close(server);
+}
+
+void test_tcp_custom_syncookie(void)
+{
+ struct test_tcp_custom_syncookie *skel;
+ int i;
+
+ if (setup_netns())
+ return;
+
+ skel = test_tcp_custom_syncookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (setup_tc(skel))
+ goto destroy_skel;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (!test__start_subtest(test_cases[i].name))
+ continue;
+
+ skel->bss->handled_syn = false;
+ skel->bss->handled_ack = false;
+
+ create_connection(&test_cases[i]);
+
+ ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc.");
+ ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc");
+ }
+
+destroy_skel:
+ system("tc qdisc del dev lo clsact");
+
+ test_tcp_custom_syncookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index 594307dffd13..e070bca2b764 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -3,14 +3,12 @@
void test_tcp_estats(void)
{
- const char *file = "./test_tcp_estats.o";
+ const char *file = "./test_tcp_estats.bpf.o";
int err, prog_fd;
struct bpf_object *obj;
- __u32 duration = 0;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- CHECK(err, "", "err %d errno %d\n", err, errno);
- if (err)
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (!ASSERT_OK(err, ""))
return;
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 08d19cafd5e8..56685fc03c7e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -42,33 +42,10 @@ struct sk_fds {
static int create_netns(void)
{
- if (CHECK(unshare(CLONE_NEWNET), "create netns",
- "unshare(CLONE_NEWNET): %s (%d)",
- strerror(errno), errno))
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
return -1;
- if (CHECK(system("ip link set dev lo up"), "run ip cmd",
- "failed to bring lo link up\n"))
- return -1;
-
- return 0;
-}
-
-static int write_sysctl(const char *sysctl, const char *value)
-{
- int fd, err, len;
-
- fd = open(sysctl, O_WRONLY);
- if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
- sysctl, strerror(errno), errno))
- return -1;
-
- len = strlen(value);
- err = write(fd, value, len);
- close(fd);
- if (CHECK(err != len, "write sysctl",
- "write(%s, %s): err:%d %s (%d)\n",
- sysctl, value, err, strerror(errno), errno))
+ if (!ASSERT_OK(system("ip link set dev lo up"), "run ip cmd"))
return -1;
return 0;
@@ -100,16 +77,12 @@ static int sk_fds_shutdown(struct sk_fds *sk_fds)
shutdown(sk_fds->active_fd, SHUT_WR);
ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
- if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
- "ret:%d %s (%d)\n",
- ret, strerror(errno), errno))
+ if (!ASSERT_EQ(ret, 0, "read-after-shutdown(passive_fd):"))
return -1;
shutdown(sk_fds->passive_fd, SHUT_WR);
ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
- if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
- "ret:%d %s (%d)\n",
- ret, strerror(errno), errno))
+ if (!ASSERT_EQ(ret, 0, "read-after-shutdown(active_fd):"))
return -1;
return 0;
@@ -122,8 +95,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
socklen_t len;
sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
- if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
- strerror(errno), errno))
+ if (!ASSERT_NEQ(sk_fds->srv_fd, -1, "start_server"))
goto error;
if (fast_open)
@@ -132,28 +104,25 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
else
sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
- if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+ if (!ASSERT_NEQ(sk_fds->active_fd, -1, "")) {
close(sk_fds->srv_fd);
goto error;
}
len = sizeof(addr6);
- if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
- &len), "getsockname(srv_fd)", "%s (%d)\n",
- strerror(errno), errno))
+ if (!ASSERT_OK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(srv_fd)"))
goto error_close;
sk_fds->passive_lport = ntohs(addr6.sin6_port);
len = sizeof(addr6);
- if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
- &len), "getsockname(active_fd)", "%s (%d)\n",
- strerror(errno), errno))
+ if (!ASSERT_OK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(active_fd)"))
goto error_close;
sk_fds->active_lport = ntohs(addr6.sin6_port);
sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
- if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
- strerror(errno), errno))
+ if (!ASSERT_NEQ(sk_fds->passive_fd, -1, "accept(srv_fd)"))
goto error_close;
if (fast_open) {
@@ -161,8 +130,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
int ret;
ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
- if (CHECK(ret != sizeof(fast), "read fastopen syn data",
- "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+ if (!ASSERT_EQ(ret, sizeof(fast), "read fastopen syn data")) {
close(sk_fds->passive_fd);
goto error_close;
}
@@ -183,8 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp,
const struct bpf_test_option *act,
const char *hdr_desc)
{
- if (CHECK(memcmp(exp, act, sizeof(*exp)),
- "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+ if (!ASSERT_EQ(memcmp(exp, act, sizeof(*exp)), 0, hdr_desc)) {
print_option(exp, "expected: ");
print_option(act, " actual: ");
return -1;
@@ -198,13 +165,11 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd,
{
struct hdr_stg act;
- if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
- "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
- stg_desc, strerror(errno), errno))
+ if (!ASSERT_OK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+ "map_lookup(hdr_stg_map_fd)"))
return -1;
- if (CHECK(memcmp(exp, &act, sizeof(*exp)),
- "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+ if (!ASSERT_EQ(memcmp(exp, &act, sizeof(*exp)), 0, stg_desc)) {
print_hdr_stg(exp, "expected: ");
print_hdr_stg(&act, " actual: ");
return -1;
@@ -248,9 +213,8 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
if (sk_fds_shutdown(sk_fds))
goto check_linum;
- if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
- "Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
- skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
+ if (!ASSERT_EQ(expected_inherit_cb_flags, skel->bss->inherit_cb_flags,
+ "inherit_cb_flags"))
goto check_linum;
if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
@@ -277,7 +241,7 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
"active_fin_in");
check_linum:
- CHECK_FAIL(check_error_linum(sk_fds));
+ ASSERT_FALSE(check_error_linum(sk_fds), "check_error_linum");
sk_fds_close(sk_fds);
}
@@ -353,8 +317,7 @@ static void fastopen_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, true)) {
@@ -384,7 +347,7 @@ static void syncookie_estab(void)
exp_active_estab_in.max_delack_ms = 22;
exp_passive_hdr_stg.syncookie = true;
- exp_active_hdr_stg.resend_syn = true,
+ exp_active_hdr_stg.resend_syn = true;
prepare_out();
@@ -398,8 +361,7 @@ static void syncookie_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +393,7 @@ static void fin(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +432,7 @@ static void __simple_estab(bool exprm)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +469,7 @@ static void misc(void)
return;
link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -522,26 +481,20 @@ static void misc(void)
/* MSG_EOR to ensure skb will not be combined */
ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
MSG_EOR);
- if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
- ret))
+ if (!ASSERT_EQ(ret, sizeof(send_msg), "send(msg)"))
goto check_linum;
ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
- if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
- ret))
+ if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
goto check_linum;
}
if (sk_fds_shutdown(&sk_fds))
goto check_linum;
- CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
- "expected (1) != actual (%u)\n",
- misc_skel->bss->nr_syn);
+ ASSERT_EQ(misc_skel->bss->nr_syn, 1, "unexpected nr_syn");
- CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
- "expected (%u) != actual (%u)\n",
- nr_data, misc_skel->bss->nr_data);
+ ASSERT_EQ(misc_skel->bss->nr_data, nr_data, "unexpected nr_data");
/* The last ACK may have been delayed, so it is either 1 or 2. */
CHECK(misc_skel->bss->nr_pure_ack != 1 &&
@@ -550,12 +503,12 @@ static void misc(void)
"expected (1 or 2) != actual (%u)\n",
misc_skel->bss->nr_pure_ack);
- CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
- "expected (1) != actual (%u)\n",
- misc_skel->bss->nr_fin);
+ ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin");
+
+ ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
check_linum:
- CHECK_FAIL(check_error_linum(&sk_fds));
+ ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
sk_fds_close(&sk_fds);
bpf_link__destroy(link);
}
@@ -580,15 +533,15 @@ void test_tcp_hdr_options(void)
int i;
skel = test_tcp_hdr_options__open_and_load();
- if (CHECK(!skel, "open and load skel", "failed"))
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
return;
misc_skel = test_misc_tcp_hdr_options__open_and_load();
- if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+ if (!ASSERT_OK_PTR(misc_skel, "open and load misc test skel"))
goto skel_destroy;
cg_fd = test__join_cgroup(CG_NAME);
- if (CHECK_FAIL(cg_fd < 0))
+ if (!ASSERT_GE(cg_fd, 0, "join_cgroup"))
goto skel_destroy;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index d207e968e6b1..c38784c1c066 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
#include <test_progs.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
+#include "tcp_rtt.skel.h"
struct tcp_rtt_storage {
__u32 invoked;
@@ -9,14 +11,16 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
static void send_byte(int fd)
{
char b = 0x55;
- if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1))
- perror("Failed to send single byte");
+ ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
}
static int wait_for_ack(int fd, int retries)
@@ -50,10 +54,8 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
int err = 0;
struct tcp_rtt_storage val;
- if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) {
- perror("Failed to read socket storage");
+ if (!ASSERT_GE(bpf_map_lookup_elem(map_fd, &client_fd, &val), 0, "read socket storage"))
return -1;
- }
if (val.invoked != invoked) {
log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
@@ -85,32 +87,35 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
err++;
}
+ /* Precise values of mrtt and srtt are unavailable, just make sure they are nonzero */
+ if (val.mrtt_us == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[0] (mrtt_us) %u == 0", msg, val.mrtt_us);
+ err++;
+ }
+
+ if (val.srtt == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[1] (srtt) %u == 0", msg, val.srtt);
+ err++;
+ }
+
return err;
}
static int run_test(int cgroup_fd, int server_fd)
{
- struct bpf_prog_load_attr attr = {
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
- .file = "./tcp_rtt.o",
- .expected_attach_type = BPF_CGROUP_SOCK_OPS,
- };
- struct bpf_object *obj;
- struct bpf_map *map;
+ struct tcp_rtt *skel;
int client_fd;
int prog_fd;
int map_fd;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ skel = tcp_rtt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load"))
return -1;
- }
- map = bpf_map__next(NULL, obj);
- map_fd = bpf_map__fd(map);
+ map_fd = bpf_map__fd(skel->maps.socket_storage_map);
+ prog_fd = bpf_program__fd(skel->progs._sockops);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
@@ -149,7 +154,7 @@ close_client_fd:
close(client_fd);
close_bpf_object:
- bpf_object__close(obj);
+ tcp_rtt__destroy(skel);
return err;
}
@@ -158,14 +163,14 @@ void test_tcp_rtt(void)
int server_fd, cgroup_fd;
cgroup_fd = test__join_cgroup("/tcp_rtt");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /tcp_rtt"))
return;
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
- if (CHECK_FAIL(server_fd < 0))
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_cgroup_fd;
- CHECK_FAIL(run_test(cgroup_fd, server_fd));
+ ASSERT_OK(run_test(cgroup_fd, server_fd), "run_test");
close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 87923d2865b7..7e8fe1bad03f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -8,8 +8,6 @@
#define LO_ADDR6 "::1"
#define CG_NAME "/tcpbpf-user-test"
-static __u32 duration;
-
static void verify_result(struct tcpbpf_globals *result)
{
__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
@@ -22,9 +20,7 @@ static void verify_result(struct tcpbpf_globals *result)
(1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
/* check global map */
- CHECK(expected_events != result->event_map, "event_map",
- "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
- result->event_map, expected_events);
+ ASSERT_EQ(expected_events, result->event_map, "event_map");
ASSERT_EQ(result->bytes_received, 501, "bytes_received");
ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked");
@@ -56,18 +52,15 @@ static void run_test(struct tcpbpf_globals *result)
int i, rv;
listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
- if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n",
- listen_fd, errno))
+ if (!ASSERT_NEQ(listen_fd, -1, "start_server"))
goto done;
cli_fd = connect_to_fd(listen_fd, 0);
- if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)",
- "cli_fd:%d errno:%d\n", cli_fd, errno))
+ if (!ASSERT_NEQ(cli_fd, -1, "connect_to_fd(listen_fd)"))
goto done;
accept_fd = accept(listen_fd, NULL, NULL);
- if (CHECK(accept_fd == -1, "accept(listen_fd)",
- "accept_fd:%d errno:%d\n", accept_fd, errno))
+ if (!ASSERT_NEQ(accept_fd, -1, "accept(listen_fd)"))
goto done;
/* Send 1000B of '+'s from cli_fd -> accept_fd */
@@ -75,11 +68,11 @@ static void run_test(struct tcpbpf_globals *result)
buf[i] = '+';
rv = send(cli_fd, buf, 1000, 0);
- if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+ if (!ASSERT_EQ(rv, 1000, "send(cli_fd)"))
goto done;
rv = recv(accept_fd, buf, 1000, 0);
- if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+ if (!ASSERT_EQ(rv, 1000, "recv(accept_fd)"))
goto done;
/* Send 500B of '.'s from accept_fd ->cli_fd */
@@ -87,11 +80,11 @@ static void run_test(struct tcpbpf_globals *result)
buf[i] = '.';
rv = send(accept_fd, buf, 500, 0);
- if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+ if (!ASSERT_EQ(rv, 500, "send(accept_fd)"))
goto done;
rv = recv(cli_fd, buf, 500, 0);
- if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+ if (!ASSERT_EQ(rv, 500, "recv(cli_fd)"))
goto done;
/*
@@ -100,12 +93,12 @@ static void run_test(struct tcpbpf_globals *result)
*/
shutdown(accept_fd, SHUT_WR);
err = recv(cli_fd, buf, 1, 0);
- if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "recv(cli_fd) for fin"))
goto done;
shutdown(cli_fd, SHUT_WR);
err = recv(accept_fd, buf, 1, 0);
- CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno);
+ ASSERT_OK(err, "recv(accept_fd) for fin");
done:
if (accept_fd != -1)
close(accept_fd);
@@ -124,12 +117,11 @@ void test_tcpbpf_user(void)
int cg_fd = -1;
skel = test_tcpbpf_kern__open_and_load();
- if (CHECK(!skel, "open and load skel", "failed"))
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
return;
cg_fd = test__join_cgroup(CG_NAME);
- if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")",
- "cg_fd:%d errno:%d", cg_fd, errno))
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup(" CG_NAME ")"))
goto err;
skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
new file mode 100644
index 000000000000..ccae0b31ac6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#include "test_bpf_ma.skel.h"
+
+static void do_bpf_ma_test(const char *name)
+{
+ struct test_bpf_ma *skel;
+ struct bpf_program *prog;
+ struct btf *btf;
+ int i, err, id;
+ char tname[32];
+
+ skel = test_bpf_ma__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ btf = bpf_object__btf(skel->obj);
+ if (!ASSERT_OK_PTR(btf, "btf"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
+ snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]);
+ id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (!ASSERT_GT(id, 0, tname))
+ goto out;
+ skel->rodata->data_btf_ids[i] = id;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) {
+ snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]);
+ id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (!ASSERT_GT(id, 0, tname))
+ goto out;
+ skel->rodata->percpu_data_btf_ids[i] = id;
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "invalid prog name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
+ err = test_bpf_ma__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto out;
+
+ err = test_bpf_ma__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_OK(skel->bss->err, "test error");
+out:
+ test_bpf_ma__destroy(skel);
+}
+
+void test_test_bpf_ma(void)
+{
+ if (test__start_subtest("batch_alloc_free"))
+ do_bpf_ma_test("test_batch_alloc_free");
+ if (test__start_subtest("free_through_map_free"))
+ do_bpf_ma_test("test_free_through_map_free");
+ if (test__start_subtest("batch_percpu_alloc_free"))
+ do_bpf_ma_test("test_batch_percpu_alloc_free");
+ if (test__start_subtest("percpu_free_through_map_free"))
+ do_bpf_ma_test("test_percpu_free_through_map_free");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
new file mode 100644
index 000000000000..de22734abc4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/genetlink.h>
+#include "network_helpers.h"
+#include "bpf_smc.skel.h"
+
+#ifndef IPPROTO_SMC
+#define IPPROTO_SMC 256
+#endif
+
+#define CLIENT_IP "127.0.0.1"
+#define SERVER_IP "127.0.1.0"
+#define SERVER_IP_VIA_RISK_PATH "127.0.2.0"
+
+#define SERVICE_1 80
+#define SERVICE_2 443
+#define SERVICE_3 8443
+
+#define TEST_NS "bpf_smc_netns"
+
+static struct netns_obj *test_netns;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+#if defined(__s390x__)
+/* s390x has default seid */
+static bool setup_ueid(void) { return true; }
+static void cleanup_ueid(void) {}
+#else
+enum {
+ SMC_NETLINK_ADD_UEID = 10,
+ SMC_NETLINK_REMOVE_UEID
+};
+
+enum {
+ SMC_NLA_EID_TABLE_UNSPEC,
+ SMC_NLA_EID_TABLE_ENTRY, /* string */
+};
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[1024];
+};
+
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN)
+
+#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK"
+#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID"
+
+static uint16_t smc_nl_family_id = -1;
+
+static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg = {0};
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = nlmsg_flags;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 1;
+ na = (struct nlattr *)GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *)&msg;
+ buflen = msg.n.nlmsg_len;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static bool get_smc_nl_family_id(void)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlattr *nl;
+ int fd, ret;
+ pid_t pid;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "nl_family socket"))
+ return false;
+
+ pid = getpid();
+
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "nl_family bind"))
+ goto fail;
+
+ ret = send_cmd(fd, GENL_ID_CTRL, pid,
+ NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
+ strlen(SMC_GENL_FAMILY_NAME));
+ if (!ASSERT_OK(ret, "nl_family query"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "nl_family response"))
+ goto fail;
+
+ nl = (struct nlattr *)GENLMSG_DATA(&msg);
+ nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
+ if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
+ goto fail;
+
+ smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool smc_ueid(int op)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlmsgerr *err;
+ char test_ueid[32];
+ int fd, ret;
+ pid_t pid;
+
+ /* UEID required */
+ memset(test_ueid, '\x20', sizeof(test_ueid));
+ memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "ueid socket"))
+ return false;
+
+ pid = getpid();
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "ueid bind"))
+ goto fail;
+
+ ret = send_cmd(fd, smc_nl_family_id, pid,
+ NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
+ (void *)test_ueid, sizeof(test_ueid));
+ if (!ASSERT_OK(ret, "ueid cmd"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "ueid response"))
+ goto fail;
+
+ if (msg.n.nlmsg_type == NLMSG_ERROR) {
+ err = NLMSG_DATA(&msg);
+ switch (op) {
+ case SMC_NETLINK_REMOVE_UEID:
+ if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
+ "ueid remove"))
+ goto fail;
+ break;
+ case SMC_NETLINK_ADD_UEID:
+ if (!ASSERT_OK(err->error, "ueid add"))
+ goto fail;
+ break;
+ default:
+ break;
+ }
+ }
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool setup_ueid(void)
+{
+ /* get smc nl id */
+ if (!get_smc_nl_family_id())
+ return false;
+ /* clear old ueid for bpftest */
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+ /* smc-loopback required ueid */
+ return smc_ueid(SMC_NETLINK_ADD_UEID);
+}
+
+static void cleanup_ueid(void)
+{
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+}
+#endif /* __s390x__ */
+
+static bool setup_netns(void)
+{
+ test_netns = netns_new(TEST_NS, true);
+ if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
+ goto fail_netns;
+
+ SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
+ SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
+
+ return true;
+fail_ip:
+ netns_free(test_netns);
+fail_netns:
+ return false;
+}
+
+static void cleanup_netns(void)
+{
+ netns_free(test_netns);
+}
+
+static bool setup_smc(void)
+{
+ if (!setup_ueid())
+ return false;
+
+ if (!setup_netns())
+ goto fail_netns;
+
+ return true;
+fail_netns:
+ cleanup_ueid();
+ return false;
+}
+
+static int set_client_addr_cb(int fd, void *opts)
+{
+ const char *src = (const char *)opts;
+ struct sockaddr_in localaddr;
+
+ localaddr.sin_family = AF_INET;
+ localaddr.sin_port = htons(0);
+ localaddr.sin_addr.s_addr = inet_addr(src);
+ return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
+}
+
+static void run_link(const char *src, const char *dst, int port)
+{
+ struct network_helper_opts opts = {0};
+ int server, client;
+
+ server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
+ if (!ASSERT_OK_FD(server, "start service_1"))
+ return;
+
+ opts.proto = IPPROTO_TCP;
+ opts.post_socket_cb = set_client_addr_cb;
+ opts.cb_opts = (void *)src;
+
+ client = connect_to_fd_opts(server, &opts);
+ if (!ASSERT_OK_FD(client, "start connect"))
+ goto fail_client;
+
+ close(client);
+fail_client:
+ close(server);
+}
+
+static void block_link(int map_fd, const char *src, const char *dst)
+{
+ struct smc_policy_ip_value val = { .mode = /* block */ 0 };
+ struct smc_policy_ip_key key = {
+ .sip = inet_addr(src),
+ .dip = inet_addr(dst),
+ };
+
+ bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+}
+
+/*
+ * This test describes a real-life service topology as follows:
+ *
+ * +-------------> service_1
+ * link 1 | |
+ * +--------------------> server | link 2
+ * | | V
+ * | +-------------> service_2
+ * | link 3
+ * client -------------------> server_via_unsafe_path -> service_3
+ *
+ * Among them,
+ * 1. link-1 is very suitable for using SMC.
+ * 2. link-2 is not suitable for using SMC, because the mode of this link is
+ * kind of short-link services.
+ * 3. link-3 is also not suitable for using SMC, because the RDMA link is
+ * unavailable and needs to go through a long timeout before it can fallback
+ * to TCP.
+ * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
+ */
+static void test_topo(void)
+{
+ struct bpf_smc *skel;
+ int rc, map_fd;
+
+ skel = bpf_smc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
+ return;
+
+ rc = bpf_smc__attach(skel);
+ if (!ASSERT_OK(rc, "bpf_smc__attach"))
+ goto fail;
+
+ map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
+ if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
+ goto fail;
+
+ /* Mock the process of transparent replacement, since we will modify
+ * protocol to ipproto_smc accropding to it via
+ * fmod_ret/update_socket_protocol.
+ */
+ write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
+
+ /* Configure ip strat */
+ block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
+ block_link(map_fd, SERVER_IP, SERVER_IP);
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
+ /* should go with smc fallback */
+ run_link(SERVER_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc fallback */
+ run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
+
+fail:
+ bpf_smc__destroy(skel);
+}
+
+void test_bpf_smc(void)
+{
+ if (!setup_smc()) {
+ printf("setup for smc test failed, test SKIP:\n");
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("topo"))
+ test_topo();
+
+ cleanup_ueid();
+ cleanup_netns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
new file mode 100644
index 000000000000..1750c29b94f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <test_progs.h>
+#include "bpf_syscall_macro.skel.h"
+
+void test_bpf_syscall_macro(void)
+{
+ struct bpf_syscall_macro *skel = NULL;
+ int err;
+ int exp_arg1 = 1001;
+ unsigned long exp_arg2 = 12;
+ unsigned long exp_arg3 = 13;
+ unsigned long exp_arg4 = 14;
+ unsigned long exp_arg5 = 15;
+ loff_t off_in, off_out;
+ ssize_t r;
+
+ /* check whether it can open program */
+ skel = bpf_syscall_macro__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open"))
+ return;
+
+ skel->rodata->filter_pid = getpid();
+
+ /* check whether it can load program */
+ err = bpf_syscall_macro__load(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__load"))
+ goto cleanup;
+
+ /* check whether it can attach kprobe */
+ err = bpf_syscall_macro__attach(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__attach"))
+ goto cleanup;
+
+ /* check whether args of syscall are copied correctly */
+ prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+
+ ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+ ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
+ ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#else
+ ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#endif
+ ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4");
+ ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5");
+
+ /* check whether args of syscall are copied correctly for CORE variants */
+ ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant");
+ ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant");
+ ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#else
+ ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#endif
+ ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
+ ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
+
+ ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+ ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+ ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+ ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+ ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
+ r = splice(-42, &off_in, 42, &off_out, 0x12340000, SPLICE_F_NONBLOCK);
+ err = -errno;
+ ASSERT_EQ(r, -1, "splice_res");
+ ASSERT_EQ(err, -EBADF, "splice_err");
+
+ ASSERT_EQ(skel->bss->splice_fd_in, -42, "splice_arg1");
+ ASSERT_EQ(skel->bss->splice_off_in, (__u64)&off_in, "splice_arg2");
+ ASSERT_EQ(skel->bss->splice_fd_out, 42, "splice_arg3");
+ ASSERT_EQ(skel->bss->splice_off_out, (__u64)&off_out, "splice_arg4");
+ ASSERT_EQ(skel->bss->splice_len, 0x12340000, "splice_arg5");
+ ASSERT_EQ(skel->bss->splice_flags, SPLICE_F_NONBLOCK, "splice_arg6");
+
+cleanup:
+ bpf_syscall_macro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
index 172c999e523c..ea933fd151c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -1,13 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#define _GNU_SOURCE
+#include <stdio.h>
#include <sched.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <test_progs.h>
-#define TDIR "/sys/kernel/debug"
+/* TDIR must be in a location we can create a directory in. */
+#define TDIR "/tmp/test_bpffs_testdir"
static int read_iter(char *file)
{
@@ -18,61 +20,129 @@ static int read_iter(char *file)
fd = open(file, 0);
if (fd < 0)
return -1;
- while ((len = read(fd, buf, sizeof(buf))) > 0)
+ while ((len = read(fd, buf, sizeof(buf))) > 0) {
+ buf[sizeof(buf) - 1] = '\0';
if (strstr(buf, "iter")) {
close(fd);
return 0;
}
+ }
close(fd);
return -1;
}
static int fn(void)
{
- int err, duration = 0;
+ struct stat a, b, c;
+ int err, map;
err = unshare(CLONE_NEWNS);
- if (CHECK(err, "unshare", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "unshare"))
goto out;
err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
- if (CHECK(err, "mount /", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mount /"))
goto out;
- err = umount(TDIR);
- if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+ err = mkdir(TDIR, 0777);
+ /* If the directory already exists we can carry on. It may be left over
+ * from a previous run.
+ */
+ if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR))
goto out;
err = mount("none", TDIR, "tmpfs", 0, NULL);
- if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mount tmpfs"))
goto out;
err = mkdir(TDIR "/fs1", 0777);
- if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mkdir " TDIR "/fs1"))
goto out;
err = mkdir(TDIR "/fs2", 0777);
- if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mkdir " TDIR "/fs2"))
goto out;
err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
- if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs1"))
goto out;
err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
- if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+ if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs2"))
goto out;
err = read_iter(TDIR "/fs1/maps.debug");
- if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+ if (!ASSERT_OK(err, "reading " TDIR "/fs1/maps.debug"))
goto out;
err = read_iter(TDIR "/fs2/progs.debug");
- if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+ if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug"))
goto out;
+
+ err = mkdir(TDIR "/fs1/a", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a"))
+ goto out;
+ err = mkdir(TDIR "/fs1/a/1", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a/1"))
+ goto out;
+ err = mkdir(TDIR "/fs1/b", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/b"))
+ goto out;
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_GT(map, 0, "create_map(ARRAY)"))
+ goto out;
+ err = bpf_obj_pin(map, TDIR "/fs1/c");
+ if (!ASSERT_OK(err, "pin map"))
+ goto out;
+ close(map);
+
+ /* Check that RENAME_EXCHANGE works for directories. */
+ err = stat(TDIR "/fs1/a", &a);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/a)"))
+ goto out;
+ err = renameat2(0, TDIR "/fs1/a", 0, TDIR "/fs1/b", RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "renameat2(/fs1/a, /fs1/b, RENAME_EXCHANGE)"))
+ goto out;
+ err = stat(TDIR "/fs1/b", &b);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)"))
+ goto out;
+ if (!ASSERT_EQ(a.st_ino, b.st_ino, "b should have a's inode"))
+ goto out;
+ err = access(TDIR "/fs1/b/1", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b/1)"))
+ goto out;
+
+ /* Check that RENAME_EXCHANGE works for mixed file types. */
+ err = stat(TDIR "/fs1/c", &c);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/map)"))
+ goto out;
+ err = renameat2(0, TDIR "/fs1/c", 0, TDIR "/fs1/b", RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "renameat2(/fs1/c, /fs1/b, RENAME_EXCHANGE)"))
+ goto out;
+ err = stat(TDIR "/fs1/b", &b);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)"))
+ goto out;
+ if (!ASSERT_EQ(c.st_ino, b.st_ino, "b should have c's inode"))
+ goto out;
+ err = access(TDIR "/fs1/c/1", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/c/1)"))
+ goto out;
+
+ /* Check that RENAME_NOREPLACE works. */
+ err = renameat2(0, TDIR "/fs1/b", 0, TDIR "/fs1/a", RENAME_NOREPLACE);
+ if (!ASSERT_ERR(err, "renameat2(RENAME_NOREPLACE)")) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = access(TDIR "/fs1/b", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b)"))
+ goto out;
+
out:
umount(TDIR "/fs1");
umount(TDIR "/fs2");
rmdir(TDIR "/fs1");
rmdir(TDIR "/fs2");
umount(TDIR);
+ rmdir(TDIR);
exit(err);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
index 2559bb775762..9c0200c132d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
@@ -9,18 +9,10 @@
#include "bprm_opts.skel.h"
#include "network_helpers.h"
-
-#ifndef __NR_pidfd_open
-#define __NR_pidfd_open 434
-#endif
+#include "task_local_storage_helpers.h"
static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
-static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(__NR_pidfd_open, pid, flags);
-}
-
static int update_storage(int map_fd, int secureexec)
{
int task_fd, ret = 0;
@@ -59,7 +51,7 @@ static int run_set_secureexec(int map_fd, int secureexec)
exit(ret);
/* If the binary is executed with securexec=1, the dynamic
- * loader ingores and unsets certain variables like LD_PRELOAD,
+ * loader ignores and unsets certain variables like LD_PRELOAD,
* TMPDIR etc. TMPDIR is used here to simplify the example, as
* LD_PRELOAD requires a real .so file.
*
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+ struct test_btf_ext *skel;
+ struct bpf_prog_info info;
+ struct bpf_line_info line_info[128], *libbpf_line_info;
+ struct bpf_func_info func_info[128], *libbpf_func_info;
+ __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+ int err, fd;
+
+ skel = test_btf_ext__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.line_info = ptr_to_u64(&line_info);
+ info.nr_line_info = sizeof(line_info);
+ info.line_info_rec_size = sizeof(*line_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_line_info"))
+ goto out;
+
+ libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+ libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info);
+ info.nr_func_info = sizeof(func_info);
+ info.func_info_rec_size = sizeof(*func_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_func_info"))
+ goto out;
+
+ libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+ libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+ if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+ goto out;
+ if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+ goto out;
+ ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+ "line_info");
+ ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+ "func_info");
+out:
+ test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+ if (test__start_subtest("line_func_info"))
+ subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c
new file mode 100644
index 000000000000..107b20d43e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates */
+#include <test_progs.h>
+#include "csum_diff_test.skel.h"
+
+#define BUFF_SZ 512
+
+struct testcase {
+ unsigned long long to_buff[BUFF_SZ / 8];
+ unsigned int to_buff_len;
+ unsigned long long from_buff[BUFF_SZ / 8];
+ unsigned int from_buff_len;
+ unsigned short seed;
+ unsigned short result;
+};
+
+#define NUM_PUSH_TESTS 4
+
+struct testcase push_tests[NUM_PUSH_TESTS] = {
+ {
+ .to_buff = {
+ 0xdeadbeefdeadbeef,
+ },
+ .to_buff_len = 8,
+ .from_buff = {},
+ .from_buff_len = 0,
+ .seed = 0,
+ .result = 0x3b3b
+ },
+ {
+ .to_buff = {
+ 0xdeadbeefdeadbeef,
+ 0xbeefdeadbeefdead,
+ },
+ .to_buff_len = 16,
+ .from_buff = {},
+ .from_buff_len = 0,
+ .seed = 0x1234,
+ .result = 0x88aa
+ },
+ {
+ .to_buff = {
+ 0xdeadbeefdeadbeef,
+ 0xbeefdeadbeefdead,
+ },
+ .to_buff_len = 15,
+ .from_buff = {},
+ .from_buff_len = 0,
+ .seed = 0x1234,
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ .result = 0xcaa9
+#else
+ .result = 0x87fd
+#endif
+ },
+ {
+ .to_buff = {
+ 0x327b23c66b8b4567,
+ 0x66334873643c9869,
+ 0x19495cff74b0dc51,
+ 0x625558ec2ae8944a,
+ 0x46e87ccd238e1f29,
+ 0x507ed7ab3d1b58ba,
+ 0x41b71efb2eb141f2,
+ 0x7545e14679e2a9e3,
+ 0x5bd062c2515f007c,
+ 0x4db127f812200854,
+ 0x1f16e9e80216231b,
+ 0x66ef438d1190cde7,
+ 0x3352255a140e0f76,
+ 0x0ded7263109cf92e,
+ 0x1befd79f7fdcc233,
+ 0x6b68079a41a7c4c9,
+ 0x25e45d324e6afb66,
+ 0x431bd7b7519b500d,
+ 0x7c83e4583f2dba31,
+ 0x62bbd95a257130a3,
+ 0x628c895d436c6125,
+ 0x721da317333ab105,
+ 0x2d1d5ae92443a858,
+ 0x75a2a8d46763845e,
+ 0x79838cb208edbdab,
+ 0x0b03e0c64353d0cd,
+ 0x54e49eb4189a769b,
+ 0x2ca8861171f32454,
+ 0x02901d820836c40e,
+ 0x081386413a95f874,
+ 0x7c3dbd3d1e7ff521,
+ 0x6ceaf087737b8ddc,
+ 0x4516dde922221a70,
+ 0x614fd4a13006c83e,
+ 0x5577f8e1419ac241,
+ 0x05072367440badfc,
+ 0x77465f013804823e,
+ 0x5c482a977724c67e,
+ 0x5e884adc2463b9ea,
+ 0x2d51779651ead36b,
+ 0x153ea438580bd78f,
+ 0x70a64e2a3855585c,
+ 0x2a487cb06a2342ec,
+ 0x725a06fb1d4ed43b,
+ 0x57e4ccaf2cd89a32,
+ 0x4b588f547a6d8d3c,
+ 0x6de91b18542289ec,
+ 0x7644a45c38437fdb,
+ 0x684a481a32fff902,
+ 0x749abb43579478fe,
+ 0x1ba026fa3dc240fb,
+ 0x75c6c33a79a1deaa,
+ 0x70c6a52912e685fb,
+ 0x374a3fe6520eedd1,
+ 0x23f9c13c4f4ef005,
+ 0x275ac794649bb77c,
+ 0x1cf10fd839386575,
+ 0x235ba861180115be,
+ 0x354fe9f947398c89,
+ 0x741226bb15b5af5c,
+ 0x10233c990d34b6a8,
+ 0x615740953f6ab60f,
+ 0x77ae35eb7e0c57b1,
+ 0x310c50b3579be4f1,
+ },
+ .to_buff_len = 512,
+ .from_buff = {},
+ .from_buff_len = 0,
+ .seed = 0xffff,
+ .result = 0xca45
+ },
+};
+
+#define NUM_PULL_TESTS 4
+
+struct testcase pull_tests[NUM_PULL_TESTS] = {
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef,
+ },
+ .from_buff_len = 8,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0,
+ .result = 0xc4c4
+ },
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef,
+ 0xbeefdeadbeefdead,
+ },
+ .from_buff_len = 16,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0x1234,
+ .result = 0x9bbd
+ },
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef,
+ 0xbeefdeadbeefdead,
+ },
+ .from_buff_len = 15,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0x1234,
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ .result = 0x59be
+#else
+ .result = 0x9c6a
+#endif
+ },
+ {
+ .from_buff = {
+ 0x327b23c66b8b4567,
+ 0x66334873643c9869,
+ 0x19495cff74b0dc51,
+ 0x625558ec2ae8944a,
+ 0x46e87ccd238e1f29,
+ 0x507ed7ab3d1b58ba,
+ 0x41b71efb2eb141f2,
+ 0x7545e14679e2a9e3,
+ 0x5bd062c2515f007c,
+ 0x4db127f812200854,
+ 0x1f16e9e80216231b,
+ 0x66ef438d1190cde7,
+ 0x3352255a140e0f76,
+ 0x0ded7263109cf92e,
+ 0x1befd79f7fdcc233,
+ 0x6b68079a41a7c4c9,
+ 0x25e45d324e6afb66,
+ 0x431bd7b7519b500d,
+ 0x7c83e4583f2dba31,
+ 0x62bbd95a257130a3,
+ 0x628c895d436c6125,
+ 0x721da317333ab105,
+ 0x2d1d5ae92443a858,
+ 0x75a2a8d46763845e,
+ 0x79838cb208edbdab,
+ 0x0b03e0c64353d0cd,
+ 0x54e49eb4189a769b,
+ 0x2ca8861171f32454,
+ 0x02901d820836c40e,
+ 0x081386413a95f874,
+ 0x7c3dbd3d1e7ff521,
+ 0x6ceaf087737b8ddc,
+ 0x4516dde922221a70,
+ 0x614fd4a13006c83e,
+ 0x5577f8e1419ac241,
+ 0x05072367440badfc,
+ 0x77465f013804823e,
+ 0x5c482a977724c67e,
+ 0x5e884adc2463b9ea,
+ 0x2d51779651ead36b,
+ 0x153ea438580bd78f,
+ 0x70a64e2a3855585c,
+ 0x2a487cb06a2342ec,
+ 0x725a06fb1d4ed43b,
+ 0x57e4ccaf2cd89a32,
+ 0x4b588f547a6d8d3c,
+ 0x6de91b18542289ec,
+ 0x7644a45c38437fdb,
+ 0x684a481a32fff902,
+ 0x749abb43579478fe,
+ 0x1ba026fa3dc240fb,
+ 0x75c6c33a79a1deaa,
+ 0x70c6a52912e685fb,
+ 0x374a3fe6520eedd1,
+ 0x23f9c13c4f4ef005,
+ 0x275ac794649bb77c,
+ 0x1cf10fd839386575,
+ 0x235ba861180115be,
+ 0x354fe9f947398c89,
+ 0x741226bb15b5af5c,
+ 0x10233c990d34b6a8,
+ 0x615740953f6ab60f,
+ 0x77ae35eb7e0c57b1,
+ 0x310c50b3579be4f1,
+ },
+ .from_buff_len = 512,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0xffff,
+ .result = 0x35ba
+ },
+};
+
+#define NUM_DIFF_TESTS 4
+
+struct testcase diff_tests[NUM_DIFF_TESTS] = {
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef,
+ },
+ .from_buff_len = 8,
+ .to_buff = {
+ 0xabababababababab,
+ },
+ .to_buff_len = 8,
+ .seed = 0,
+ .result = 0x7373
+ },
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef,
+ },
+ .from_buff_len = 7,
+ .to_buff = {
+ 0xabababababababab,
+ },
+ .to_buff_len = 7,
+ .seed = 0,
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ .result = 0xa673
+#else
+ .result = 0x73b7
+#endif
+ },
+ {
+ .from_buff = {
+ 0,
+ },
+ .from_buff_len = 8,
+ .to_buff = {
+ 0xabababababababab,
+ },
+ .to_buff_len = 8,
+ .seed = 0,
+ .result = 0xaeae
+ },
+ {
+ .from_buff = {
+ 0xdeadbeefdeadbeef
+ },
+ .from_buff_len = 8,
+ .to_buff = {
+ 0,
+ },
+ .to_buff_len = 8,
+ .seed = 0xffff,
+ .result = 0xc4c4
+ },
+};
+
+#define NUM_EDGE_TESTS 4
+
+struct testcase edge_tests[NUM_EDGE_TESTS] = {
+ {
+ .from_buff = {},
+ .from_buff_len = 0,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0,
+ .result = 0
+ },
+ {
+ .from_buff = {
+ 0x1234
+ },
+ .from_buff_len = 0,
+ .to_buff = {
+ 0x1234
+ },
+ .to_buff_len = 0,
+ .seed = 0,
+ .result = 0
+ },
+ {
+ .from_buff = {},
+ .from_buff_len = 0,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0x1234,
+ .result = 0x1234
+ },
+ {
+ .from_buff = {},
+ .from_buff_len = 512,
+ .to_buff = {},
+ .to_buff_len = 0,
+ .seed = 0xffff,
+ .result = 0xffff
+ },
+};
+
+static unsigned short trigger_csum_diff(const struct csum_diff_test *skel)
+{
+ u8 tmp_out[64 << 2] = {};
+ u8 tmp_in[64] = {};
+ int err;
+ int pfd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = tmp_in,
+ .data_size_in = sizeof(tmp_in),
+ .data_out = tmp_out,
+ .data_size_out = sizeof(tmp_out),
+ .repeat = 1,
+ );
+ pfd = bpf_program__fd(skel->progs.compute_checksum);
+ err = bpf_prog_test_run_opts(pfd, &topts);
+ if (err)
+ return -1;
+
+ return skel->bss->result;
+}
+
+static void test_csum_diff(struct testcase *tests, int num_tests)
+{
+ struct csum_diff_test *skel;
+ unsigned short got;
+ int err;
+
+ for (int i = 0; i < num_tests; i++) {
+ skel = csum_diff_test__open();
+ if (!ASSERT_OK_PTR(skel, "csum_diff_test open"))
+ return;
+
+ skel->rodata->to_buff_len = tests[i].to_buff_len;
+ skel->rodata->from_buff_len = tests[i].from_buff_len;
+
+ err = csum_diff_test__load(skel);
+ if (!ASSERT_EQ(err, 0, "csum_diff_test load"))
+ goto out;
+
+ memcpy(skel->bss->to_buff, tests[i].to_buff, tests[i].to_buff_len);
+ memcpy(skel->bss->from_buff, tests[i].from_buff, tests[i].from_buff_len);
+ skel->bss->seed = tests[i].seed;
+
+ got = trigger_csum_diff(skel);
+ ASSERT_EQ(got, tests[i].result, "csum_diff result");
+
+ csum_diff_test__destroy(skel);
+ }
+
+ return;
+out:
+ csum_diff_test__destroy(skel);
+}
+
+void test_test_csum_diff(void)
+{
+ if (test__start_subtest("csum_diff_push"))
+ test_csum_diff(push_tests, NUM_PUSH_TESTS);
+ if (test__start_subtest("csum_diff_pull"))
+ test_csum_diff(pull_tests, NUM_PULL_TESTS);
+ if (test__start_subtest("csum_diff_diff"))
+ test_csum_diff(diff_tests, NUM_DIFF_TESTS);
+ if (test__start_subtest("csum_diff_edge"))
+ test_csum_diff(edge_tests, NUM_EDGE_TESTS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 7e13129f593a..e905cbaf6b3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -1,91 +1,162 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
+#include "test_global_func1.skel.h"
+#include "test_global_func2.skel.h"
+#include "test_global_func3.skel.h"
+#include "test_global_func4.skel.h"
+#include "test_global_func5.skel.h"
+#include "test_global_func6.skel.h"
+#include "test_global_func7.skel.h"
+#include "test_global_func8.skel.h"
+#include "test_global_func9.skel.h"
+#include "test_global_func10.skel.h"
+#include "test_global_func11.skel.h"
+#include "test_global_func12.skel.h"
+#include "test_global_func13.skel.h"
+#include "test_global_func14.skel.h"
+#include "test_global_func15.skel.h"
+#include "test_global_func16.skel.h"
+#include "test_global_func17.skel.h"
+#include "test_global_func_ctx_args.skel.h"
-const char *err_str;
-bool found;
+#include "bpf/libbpf_internal.h"
+#include "btf_helpers.h"
-static int libbpf_debug_print(enum libbpf_print_level level,
- const char *format, va_list args)
+static void check_ctx_arg_type(const struct btf *btf, const struct btf_param *p)
{
- char *log_buf;
+ const struct btf_type *t;
+ const char *s;
- if (level != LIBBPF_WARN ||
- strcmp(format, "libbpf: \n%s\n")) {
- vprintf(format, args);
- return 0;
+ t = btf__type_by_id(btf, p->type);
+ if (!ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_t"))
+ return;
+
+ s = btf_type_raw_dump(btf, t->type);
+ if (!ASSERT_HAS_SUBSTR(s, "STRUCT 'bpf_perf_event_data' size=0 vlen=0",
+ "ctx_struct_t"))
+ return;
+}
+
+static void subtest_ctx_arg_rewrite(void)
+{
+ struct test_global_func_ctx_args *skel = NULL;
+ struct bpf_prog_info info;
+ char func_info_buf[1024] __attribute__((aligned(8)));
+ struct bpf_func_info_min *rec;
+ struct btf *btf = NULL;
+ __u32 info_len = sizeof(info);
+ int err, fd, i;
+ struct btf *kern_btf = NULL;
+
+ kern_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(kern_btf, "kern_btf_load"))
+ return;
+
+ /* simple detection of kernel native arg:ctx tag support */
+ if (btf__find_by_name_kind(kern_btf, "bpf_subprog_arg_info", BTF_KIND_STRUCT) > 0) {
+ test__skip();
+ btf__free(kern_btf);
+ return;
}
+ btf__free(kern_btf);
+
+ skel = test_global_func_ctx_args__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
- log_buf = va_arg(args, char *);
- if (!log_buf)
+ bpf_program__set_autoload(skel->progs.arg_tag_ctx_perf, true);
+
+ err = test_global_func_ctx_args__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
goto out;
- if (err_str && strstr(log_buf, err_str) == 0)
- found = true;
-out:
- printf(format, log_buf);
- return 0;
-}
-extern int extra_prog_load_log_flags;
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info_buf);
+ info.nr_func_info = 3;
+ info.func_info_rec_size = sizeof(struct bpf_func_info_min);
-static int check_load(const char *file)
-{
- struct bpf_prog_load_attr attr;
- struct bpf_object *obj = NULL;
- int err, prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = BPF_PROG_TYPE_UNSPEC;
- attr.log_level = extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- found = false;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
- bpf_object__close(obj);
- return err;
-}
+ fd = bpf_program__fd(skel->progs.arg_tag_ctx_perf);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_info"))
+ goto out;
+
+ if (!ASSERT_EQ(info.nr_func_info, 3, "nr_func_info"))
+ goto out;
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!ASSERT_OK_PTR(btf, "obj_kern_btf"))
+ goto out;
+
+ rec = (struct bpf_func_info_min *)func_info_buf;
+ for (i = 0; i < info.nr_func_info; i++, rec = (void *)rec + info.func_info_rec_size) {
+ const struct btf_type *fn_t, *proto_t;
+ const char *name;
+
+ if (rec->insn_off == 0)
+ continue; /* main prog, skip */
+
+ fn_t = btf__type_by_id(btf, rec->type_id);
+ if (!ASSERT_OK_PTR(fn_t, "fn_type"))
+ goto out;
+ if (!ASSERT_EQ(btf_kind(fn_t), BTF_KIND_FUNC, "fn_type_kind"))
+ goto out;
+ proto_t = btf__type_by_id(btf, fn_t->type);
+ if (!ASSERT_OK_PTR(proto_t, "proto_type"))
+ goto out;
+
+ name = btf__name_by_offset(btf, fn_t->name_off);
+ if (strcmp(name, "subprog_ctx_tag") == 0) {
+ /* int subprog_ctx_tag(void *ctx __arg_ctx) */
+ if (!ASSERT_EQ(btf_vlen(proto_t), 1, "arg_cnt"))
+ goto out;
-struct test_def {
- const char *file;
- const char *err_str;
-};
+ /* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+ } else if (strcmp(name, "subprog_multi_ctx_tags") == 0) {
+ /* int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+ * struct my_struct *mem,
+ * void *ctx2 __arg_ctx)
+ */
+ if (!ASSERT_EQ(btf_vlen(proto_t), 3, "arg_cnt"))
+ goto out;
+
+ /* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+ /* arg 2 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[2]);
+ } else {
+ ASSERT_FAIL("unexpected subprog %s", name);
+ goto out;
+ }
+ }
+
+out:
+ btf__free(btf);
+ test_global_func_ctx_args__destroy(skel);
+}
void test_test_global_funcs(void)
{
- struct test_def tests[] = {
- { "test_global_func1.o", "combined stack size of 4 calls is 544" },
- { "test_global_func2.o" },
- { "test_global_func3.o" , "the call stack of 8 frames" },
- { "test_global_func4.o" },
- { "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
- { "test_global_func6.o" , "modified ctx ptr R2" },
- { "test_global_func7.o" , "foo() doesn't return scalar" },
- { "test_global_func8.o" },
- { "test_global_func9.o" },
- { "test_global_func10.o", "invalid indirect read from stack" },
- { "test_global_func11.o", "Caller passes invalid args into func#1" },
- { "test_global_func12.o", "invalid mem access 'mem_or_null'" },
- { "test_global_func13.o", "Caller passes invalid args into func#1" },
- { "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
- { "test_global_func15.o", "At program exit the register R0 has value" },
- { "test_global_func16.o", "invalid indirect read from stack" },
- };
- libbpf_print_fn_t old_print_fn = NULL;
- int err, i, duration = 0;
-
- old_print_fn = libbpf_set_print(libbpf_debug_print);
-
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- const struct test_def *test = &tests[i];
-
- if (!test__start_subtest(test->file))
- continue;
-
- err_str = test->err_str;
- err = check_load(test->file);
- CHECK_FAIL(!!err ^ !!err_str);
- if (err_str)
- CHECK(found, "", "expected string '%s'", err_str);
- }
- libbpf_set_print(old_print_fn);
+ RUN_TESTS(test_global_func1);
+ RUN_TESTS(test_global_func2);
+ RUN_TESTS(test_global_func3);
+ RUN_TESTS(test_global_func4);
+ RUN_TESTS(test_global_func5);
+ RUN_TESTS(test_global_func6);
+ RUN_TESTS(test_global_func7);
+ RUN_TESTS(test_global_func8);
+ RUN_TESTS(test_global_func9);
+ RUN_TESTS(test_global_func10);
+ RUN_TESTS(test_global_func11);
+ RUN_TESTS(test_global_func12);
+ RUN_TESTS(test_global_func13);
+ RUN_TESTS(test_global_func14);
+ RUN_TESTS(test_global_func15);
+ RUN_TESTS(test_global_func16);
+ RUN_TESTS(test_global_func17);
+ RUN_TESTS(test_global_func_ctx_args);
+
+ if (test__start_subtest("ctx_arg_rewrite"))
+ subtest_ctx_arg_rewrite();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b54bc0c351b7..810b14981c2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -13,14 +13,17 @@
#include "ima.skel.h"
-static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+#define MAX_SAMPLES 4
+
+static int _run_measured_process(const char *measured_dir, u32 *monitored_pid,
+ const char *cmd)
{
int child_pid, child_status;
child_pid = fork();
if (child_pid == 0) {
*monitored_pid = getpid();
- execlp("./ima_setup.sh", "./ima_setup.sh", "run", measured_dir,
+ execlp("./ima_setup.sh", "./ima_setup.sh", cmd, measured_dir,
NULL);
exit(errno);
@@ -32,22 +35,42 @@ static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
return -EINVAL;
}
-static u64 ima_hash_from_bpf;
+static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+{
+ return _run_measured_process(measured_dir, monitored_pid, "run");
+}
+
+static u64 ima_hash_from_bpf[MAX_SAMPLES];
+static int ima_hash_from_bpf_idx;
static int process_sample(void *ctx, void *data, size_t len)
{
- ima_hash_from_bpf = *((u64 *)data);
+ if (ima_hash_from_bpf_idx >= MAX_SAMPLES)
+ return -ENOSPC;
+
+ ima_hash_from_bpf[ima_hash_from_bpf_idx++] = *((u64 *)data);
return 0;
}
+static void test_init(struct ima__bss *bss)
+{
+ ima_hash_from_bpf_idx = 0;
+
+ bss->use_ima_file_hash = false;
+ bss->enable_bprm_creds_for_exec = false;
+ bss->enable_kernel_read_file = false;
+ bss->test_deny = false;
+}
+
void test_test_ima(void)
{
char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
const char *measured_dir;
+ u64 bin_true_sample;
char cmd[256];
- int err, duration = 0;
+ int err, duration = 0, fresh_digest_idx = 0;
struct ima *skel = NULL;
skel = ima__open_and_load();
@@ -68,20 +91,150 @@ void test_test_ima(void)
goto close_prog;
snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
- if (CHECK_FAIL(system(cmd)))
+ err = system(cmd);
+ if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
goto close_clean;
+ /*
+ * Test #1
+ * - Goal: obtain a sample with the bpf_ima_inode_hash() helper
+ * - Expected result: 1 sample (/bin/true)
+ */
+ test_init(skel->bss);
err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
- if (CHECK(err, "run_measured_process", "err = %d\n", err))
+ if (CHECK(err, "run_measured_process #1", "err = %d\n", err))
goto close_clean;
err = ring_buffer__consume(ringbuf);
ASSERT_EQ(err, 1, "num_samples_or_err");
- ASSERT_NEQ(ima_hash_from_bpf, 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+
+ /*
+ * Test #2
+ * - Goal: obtain samples with the bpf_ima_file_hash() helper
+ * - Expected result: 2 samples (./ima_setup.sh, /bin/true)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #2", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 2, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+ bin_true_sample = ima_hash_from_bpf[1];
+
+ /*
+ * Test #3
+ * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest
+ * - Expected result:
+ * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied
+ * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is
+ * not applied
+ *
+ * If commit 62622dab0a28 ("ima: return IMA digest value only when
+ * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses
+ * to give a non-fresh digest, hence the correct result is 1 instead of
+ * 2.
+ */
+ test_init(skel->bss);
+
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "modify-bin");
+ if (CHECK(err, "modify-bin #3", "err = %d\n", err))
+ goto close_clean;
+
+ skel->bss->enable_bprm_creds_for_exec = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #3", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_GE(err, 1, "num_samples_or_err");
+ if (err == 2) {
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample,
+ "sample_equal_or_err");
+ fresh_digest_idx = 1;
+ }
+
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash");
+ /* IMA refreshed the digest. */
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample,
+ "sample_equal_or_err");
+
+ /*
+ * Test #4
+ * - Goal: verify that bpf_ima_file_hash() returns a fresh digest
+ * - Expected result: 4 samples (./ima_setup.sh: fresh, fresh;
+ * /bin/true: fresh, fresh)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ skel->bss->enable_bprm_creds_for_exec = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #4", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 4, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[2], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[3], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[2], bin_true_sample,
+ "sample_different_or_err");
+ ASSERT_EQ(ima_hash_from_bpf[3], ima_hash_from_bpf[2],
+ "sample_equal_or_err");
+
+ skel->bss->use_ima_file_hash = false;
+ skel->bss->enable_bprm_creds_for_exec = false;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "restore-bin");
+ if (CHECK(err, "restore-bin #3", "err = %d\n", err))
+ goto close_clean;
+
+ /*
+ * Test #5
+ * - Goal: obtain a sample from the kernel_read_file hook
+ * - Expected result: 2 samples (./ima_setup.sh, policy_test)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ skel->bss->enable_kernel_read_file = true;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "load-policy");
+ if (CHECK(err, "run_measured_process #5", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 2, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+
+ /*
+ * Test #6
+ * - Goal: ensure that the kernel_read_file hook denies an operation
+ * - Expected result: 0 samples
+ */
+ test_init(skel->bss);
+ skel->bss->enable_kernel_read_file = true;
+ skel->bss->test_deny = true;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "load-policy");
+ if (CHECK(!err, "run_measured_process #6", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 0, "num_samples_or_err");
close_clean:
snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
- CHECK_FAIL(system(cmd));
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
close_prog:
+ ring_buffer__free(ringbuf);
ima__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c
new file mode 100644
index 000000000000..375677c19146
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates.*/
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_ldsx_insn.skel.h"
+
+static void test_map_val_and_probed_memory(void)
+{
+ struct test_ldsx_insn *skel;
+ int err;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ return;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto out;
+ }
+
+ bpf_program__set_autoload(skel->progs.rdonly_map_prog, true);
+ bpf_program__set_autoload(skel->progs.map_val_prog, true);
+ bpf_program__set_autoload(skel->progs.test_ptr_struct_arg, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto out;
+
+ err = test_ldsx_insn__attach(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->done1, 1, "done1");
+ ASSERT_EQ(skel->bss->ret1, 1, "ret1");
+ ASSERT_EQ(skel->bss->done2, 1, "done2");
+ ASSERT_EQ(skel->bss->ret2, 1, "ret2");
+ ASSERT_EQ(skel->bss->int_member, -1, "int_member");
+
+out:
+ test_ldsx_insn__destroy(skel);
+}
+
+static void test_ctx_member_sign_ext(void)
+{
+ struct test_ldsx_insn *skel;
+ int err, fd, cgroup_fd;
+ char buf[16] = {0};
+ socklen_t optlen;
+
+ cgroup_fd = test__join_cgroup("/ldsx_test");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /ldsx_test"))
+ return;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ goto close_cgroup_fd;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto destroy_skel;
+ }
+
+ bpf_program__set_autoload(skel->progs._getsockopt, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto destroy_skel;
+
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto destroy_skel;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto destroy_skel;
+
+ optlen = sizeof(buf);
+ (void)getsockopt(fd, SOL_IP, IP_TTL, buf, &optlen);
+
+ ASSERT_EQ(skel->bss->set_optlen, -1, "optlen");
+ ASSERT_EQ(skel->bss->set_retval, -1, "retval");
+
+ close(fd);
+destroy_skel:
+ test_ldsx_insn__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
+
+static void test_ctx_member_narrow_sign_ext(void)
+{
+ struct test_ldsx_insn *skel;
+ struct __sk_buff skb = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+ int err, prog_fd;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ return;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto out;
+ }
+
+ bpf_program__set_autoload(skel->progs._tc, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs._tc);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(skel->bss->set_mark, -2, "set_mark");
+
+out:
+ test_ldsx_insn__destroy(skel);
+}
+
+void test_ldsx_insn(void)
+{
+ if (test__start_subtest("map_val and probed_memory"))
+ test_map_val_and_probed_memory();
+ if (test__start_subtest("ctx_member_sign_ext"))
+ test_ctx_member_sign_ext();
+ if (test__start_subtest("ctx_member_narrow_sign_ext"))
+ test_ctx_member_narrow_sign_ext();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index d2c16eaae367..bcf2e1905ed7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -11,33 +11,19 @@
#include "local_storage.skel.h"
#include "network_helpers.h"
-
-#ifndef __NR_pidfd_open
-#define __NR_pidfd_open 434
-#endif
-
-static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(__NR_pidfd_open, pid, flags);
-}
-
-static unsigned int duration;
+#include "task_local_storage_helpers.h"
#define TEST_STORAGE_VALUE 0xbeefdead
struct storage {
void *inode;
unsigned int value;
- /* Lock ensures that spin locked versions of local stoage operations
- * also work, most operations in this tests are still single threaded
- */
- struct bpf_spin_lock lock;
};
/* Fork and exec the provided rm binary and return the exit code of the
* forked process and its pid.
*/
-static int run_self_unlink(int *monitored_pid, const char *rm_path)
+static int run_self_unlink(struct local_storage *skel, const char *rm_path)
{
int child_pid, child_status, ret;
int null_fd;
@@ -49,7 +35,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
dup2(null_fd, STDERR_FILENO);
close(null_fd);
- *monitored_pid = getpid();
+ skel->bss->monitored_pid = getpid();
/* Use the copied /usr/bin/rm to delete itself
* /tmp/copy_of_rm /tmp/copy_of_rm.
*/
@@ -58,6 +44,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
exit(errno);
} else if (child_pid > 0) {
waitpid(child_pid, &child_status, 0);
+ ASSERT_EQ(skel->data->task_storage_result, 0, "task_storage_result");
return WEXITSTATUS(child_status);
}
@@ -66,46 +53,36 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
static bool check_syscall_operations(int map_fd, int obj_fd)
{
- struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
- lookup_val = { .value = 0, .lock = { 0 } };
+ struct storage val = { .value = TEST_STORAGE_VALUE },
+ lookup_val = { .value = 0 };
int err;
/* Looking up an existing element should fail initially */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "err:%d errno:%d\n", err, errno))
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
return false;
/* Create a new element */
- err = bpf_map_update_elem(map_fd, &obj_fd, &val,
- BPF_NOEXIST | BPF_F_LOCK);
- if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
- errno))
+ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
return false;
/* Lookup the newly created element */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
- if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
- errno))
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
return false;
/* Check the value of the newly created element */
- if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem",
- "value got = %x errno:%d", lookup_val.value, val.value))
+ if (!ASSERT_EQ(lookup_val.value, val.value, "bpf_map_lookup_elem"))
return false;
err = bpf_map_delete_elem(map_fd, &obj_fd);
- if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err,
- errno))
+ if (!ASSERT_OK(err, "bpf_map_delete_elem()"))
return false;
/* The lookup should fail, now that the element has been deleted */
- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
- BPF_F_LOCK);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "err:%d errno:%d\n", err, errno))
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
return false;
return true;
@@ -120,35 +97,32 @@ void test_test_local_storage(void)
char cmd[256];
skel = local_storage__open_and_load();
- if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto close_prog;
err = local_storage__attach(skel);
- if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "attach"))
goto close_prog;
task_fd = sys_pidfd_open(getpid(), 0);
- if (CHECK(task_fd < 0, "pidfd_open",
- "failed to get pidfd err:%d, errno:%d", task_fd, errno))
+ if (!ASSERT_GE(task_fd, 0, "pidfd_open"))
goto close_prog;
if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map),
task_fd))
goto close_prog;
- if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp",
- "unable to create tmpdir: %d\n", errno))
+ if (!ASSERT_OK_PTR(mkdtemp(tmp_dir_path), "mkdtemp"))
goto close_prog;
snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm",
tmp_dir_path);
snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path);
- if (CHECK_FAIL(system(cmd)))
+ if (!ASSERT_OK(system(cmd), "system(cp)"))
goto close_prog_rmdir;
rm_fd = open(tmp_exec_path, O_RDONLY);
- if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d",
- tmp_exec_path, rm_fd, errno))
+ if (!ASSERT_GE(rm_fd, 0, "open(tmp_exec_path)"))
goto close_prog_rmdir;
if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
@@ -160,8 +134,8 @@ void test_test_local_storage(void)
* unlink its executable. This operation should be denied by the loaded
* LSM program.
*/
- err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
- if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err))
+ err = run_self_unlink(skel, tmp_exec_path);
+ if (!ASSERT_EQ(err, EPERM, "run_self_unlink"))
goto close_prog_rmdir;
/* Set the process being monitored to be the current process */
@@ -172,18 +146,16 @@ void test_test_local_storage(void)
*/
snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr",
tmp_dir_path, tmp_dir_path);
- if (CHECK_FAIL(system(cmd)))
+ if (!ASSERT_OK(system(cmd), "system(mv)"))
goto close_prog_rmdir;
- CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
- "inode_local_storage not set\n");
+ ASSERT_EQ(skel->data->inode_storage_result, 0, "inode_storage_result");
serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
- if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ if (!ASSERT_GE(serv_sk, 0, "start_server"))
goto close_prog_rmdir;
- CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
- "sk_local_storage not set\n");
+ ASSERT_EQ(skel->data->sk_storage_result, 0, "sk_storage_result");
if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
serv_sk))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2755e4f81499..bdc4fc06bc5a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include "lsm.skel.h"
+#include "lsm_tailcall.skel.h"
char *CMD_ARGS[] = {"true", NULL};
@@ -51,44 +52,109 @@ int exec_cmd(int *monitored_pid)
return -EINVAL;
}
-void test_test_lsm(void)
+static int test_lsm(struct lsm *skel)
{
- struct lsm *skel = NULL;
- int err, duration = 0;
+ struct bpf_link *link;
int buf = 1234;
-
- skel = lsm__open_and_load();
- if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
- goto close_prog;
+ int err;
err = lsm__attach(skel);
- if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
- goto close_prog;
+ if (!ASSERT_OK(err, "attach"))
+ return err;
+
+ /* Check that already linked program can't be attached again. */
+ link = bpf_program__attach(skel->progs.test_int_hook);
+ if (!ASSERT_ERR_PTR(link, "attach_link"))
+ return -1;
err = exec_cmd(&skel->bss->monitored_pid);
- if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
- goto close_prog;
+ if (!ASSERT_OK(err, "exec_cmd"))
+ return err;
- CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
- skel->bss->bprm_count);
+ ASSERT_EQ(skel->bss->bprm_count, 1, "bprm_count");
skel->bss->monitored_pid = getpid();
err = stack_mprotect();
- if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
- errno))
- goto close_prog;
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ return err;
- CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
- "mprotect_count = %d\n", skel->bss->mprotect_count);
+ ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count");
syscall(__NR_setdomainname, &buf, -2L);
syscall(__NR_setdomainname, 0, -3L);
syscall(__NR_setdomainname, ~0L, -4L);
- CHECK(skel->bss->copy_test != 3, "copy_test",
- "copy_test = %d\n", skel->bss->copy_test);
+ ASSERT_EQ(skel->bss->copy_test, 3, "copy_test");
+
+ lsm__detach(skel);
+
+ skel->bss->copy_test = 0;
+ skel->bss->bprm_count = 0;
+ skel->bss->mprotect_count = 0;
+ return 0;
+}
+
+static void test_lsm_basic(void)
+{
+ struct lsm *skel = NULL;
+ int err;
+
+ skel = lsm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lsm_skel_load"))
+ goto close_prog;
+
+ err = test_lsm(skel);
+ if (!ASSERT_OK(err, "test_lsm_first_attach"))
+ goto close_prog;
+
+ err = test_lsm(skel);
+ ASSERT_OK(err, "test_lsm_second_attach");
close_prog:
lsm__destroy(skel);
}
+
+static void test_lsm_tailcall(void)
+{
+ struct lsm_tailcall *skel = NULL;
+ int map_fd, prog_fd;
+ int err, key;
+
+ skel = lsm_tailcall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lsm_tailcall__skel_load"))
+ goto close_prog;
+
+ map_fd = bpf_map__fd(skel->maps.jmp_table);
+ if (CHECK_FAIL(map_fd < 0))
+ goto close_prog;
+
+ prog_fd = bpf_program__fd(skel->progs.lsm_file_permission_prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto close_prog;
+
+ key = 0;
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(!err))
+ goto close_prog;
+
+ prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto close_prog;
+
+ err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto close_prog;
+
+close_prog:
+ lsm_tailcall__destroy(skel);
+}
+
+void test_test_lsm(void)
+{
+ if (test__start_subtest("lsm_basic"))
+ test_lsm_basic();
+ if (test__start_subtest("lsm_tailcall"))
+ test_lsm_tailcall();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_mmap_inner_array.c b/tools/testing/selftests/bpf/prog_tests/test_mmap_inner_array.c
new file mode 100644
index 000000000000..ce745776ed18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_mmap_inner_array.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include "mmap_inner_array.skel.h"
+
+void test_mmap_inner_array(void)
+{
+ const long page_size = sysconf(_SC_PAGE_SIZE);
+ struct mmap_inner_array *skel;
+ int inner_array_fd, err;
+ void *tmp;
+ __u64 *val;
+
+ skel = mmap_inner_array__open_and_load();
+
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ inner_array_fd = bpf_map__fd(skel->maps.inner_array);
+ tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, inner_array_fd, 0);
+ if (!ASSERT_OK_PTR(tmp, "inner array mmap"))
+ goto out;
+ val = (void *)tmp;
+
+ err = mmap_inner_array__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out_unmap;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+
+ /* pid is set, pid_match == true and outer_map_match == false */
+ ASSERT_TRUE(skel->bss->pid_match, "pid match 1");
+ ASSERT_FALSE(skel->bss->outer_map_match, "outer map match 1");
+ ASSERT_FALSE(skel->bss->done, "done 1");
+ ASSERT_EQ(*val, 0, "value match 1");
+
+ err = bpf_map__update_elem(skel->maps.outer_map,
+ &skel->bss->pid, sizeof(skel->bss->pid),
+ &inner_array_fd, sizeof(inner_array_fd),
+ BPF_ANY);
+ if (!ASSERT_OK(err, "update elem"))
+ goto out_unmap;
+ usleep(1);
+
+ /* outer map key is set, outer_map_match == true */
+ ASSERT_TRUE(skel->bss->pid_match, "pid match 2");
+ ASSERT_TRUE(skel->bss->outer_map_match, "outer map match 2");
+ ASSERT_TRUE(skel->bss->done, "done 2");
+ ASSERT_EQ(*val, skel->data->match_value, "value match 2");
+
+out_unmap:
+ munmap(tmp, page_size);
+out:
+ mmap_inner_array__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 9966685866fd..f27013e38d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -56,11 +56,11 @@ static void setaffinity(void)
void test_test_overhead(void)
{
- const char *kprobe_name = "kprobe/__set_task_comm";
- const char *kretprobe_name = "kretprobe/__set_task_comm";
- const char *raw_tp_name = "raw_tp/task_rename";
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
+ const char *kprobe_name = "prog1";
+ const char *kretprobe_name = "prog2";
+ const char *raw_tp_name = "prog3";
+ const char *fentry_name = "prog4";
+ const char *fexit_name = "prog5";
const char *kprobe_func = "__set_task_comm";
struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
struct bpf_program *fentry_prog, *fexit_prog;
@@ -72,27 +72,27 @@ void test_test_overhead(void)
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
return;
- obj = bpf_object__open_file("./test_overhead.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ obj = bpf_object__open_file("./test_overhead.bpf.o", NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+ kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", kprobe_name))
goto cleanup;
- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+ kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name);
if (CHECK(!kretprobe_prog, "find_probe",
"prog '%s' not found\n", kretprobe_name))
goto cleanup;
- raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name);
+ raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name);
if (CHECK(!raw_tp_prog, "find_probe",
"prog '%s' not found\n", raw_tp_name))
goto cleanup;
- fentry_prog = bpf_object__find_program_by_title(obj, fentry_name);
+ fentry_prog = bpf_object__find_program_by_name(obj, fentry_name);
if (CHECK(!fentry_prog, "find_probe",
"prog '%s' not found\n", fentry_name))
goto cleanup;
- fexit_prog = bpf_object__find_program_by_title(obj, fexit_name);
+ fexit_prog = bpf_object__find_program_by_name(obj, fexit_name);
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
@@ -108,7 +108,7 @@ void test_test_overhead(void)
/* attach kprobe */
link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
goto cleanup;
test_run("kprobe");
bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
/* attach kretprobe */
link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
goto cleanup;
test_run("kretprobe");
bpf_link__destroy(link);
/* attach raw_tp */
link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
test_run("raw_tp");
bpf_link__destroy(link);
/* attach fentry */
link = bpf_program__attach_trace(fentry_prog);
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
test_run("fentry");
bpf_link__destroy(link);
/* attach fexit */
link = bpf_program__attach_trace(fexit_prog);
- if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
index 4ca275101ee0..de24e8f0e738 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_profiler.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -8,20 +8,20 @@
static int sanity_run(struct bpf_program *prog)
{
- struct bpf_prog_test_run_attr test_attr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, test_attr);
__u64 args[] = {1, 2, 3};
- __u32 duration = 0;
int err, prog_fd;
prog_fd = bpf_program__fd(prog);
- test_attr.prog_fd = prog_fd;
test_attr.ctx_in = args;
test_attr.ctx_size_in = sizeof(args);
- err = bpf_prog_test_run_xattr(&test_attr);
- if (CHECK(err || test_attr.retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, test_attr.retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &test_attr);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+
+ if (!ASSERT_OK(test_attr.retval, "test_run retval"))
return -1;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index cf1215531920..09ca13bdf6ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -6,15 +6,19 @@
static int sanity_run(struct bpf_program *prog)
{
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ .flags = BPF_F_TEST_SKB_CHECKSUM_COMPLETE,
+ );
prog_fd = bpf_program__fd(prog);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- if (CHECK(err || retval != 123, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration))
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+ if (!ASSERT_EQ(topts.retval, 123, "test_run retval"))
return -1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
new file mode 100644
index 000000000000..baceb0de9d49
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "strncmp_test.skel.h"
+
+static int trigger_strncmp(const struct strncmp_test *skel)
+{
+ int cmp;
+
+ usleep(1);
+
+ cmp = skel->bss->cmp_ret;
+ if (cmp > 0)
+ return 1;
+ if (cmp < 0)
+ return -1;
+ return 0;
+}
+
+/*
+ * Compare str and target after making str[i] != target[i].
+ * When exp is -1, make str[i] < target[i] and delta = -1.
+ */
+static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
+ int exp)
+{
+ size_t nr = sizeof(skel->bss->str);
+ char *str = skel->bss->str;
+ int delta = exp;
+ int got;
+ size_t i;
+
+ memcpy(str, skel->rodata->target, nr);
+ for (i = 0; i < nr - 1; i++) {
+ str[i] += delta;
+
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, exp, name);
+
+ str[i] -= delta;
+ }
+}
+
+static void test_strncmp_ret(void)
+{
+ struct strncmp_test *skel;
+ int err, got;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.do_strncmp, true);
+
+ err = strncmp_test__load(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test load"))
+ goto out;
+
+ err = strncmp_test__attach(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test attach"))
+ goto out;
+
+ skel->bss->target_pid = getpid();
+
+ /* Empty str */
+ skel->bss->str[0] = '\0';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, -1, "strncmp: empty str");
+
+ /* Same string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 0, "strncmp: same str");
+
+ /* Not-null-terminated string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ skel->bss->str[sizeof(skel->bss->str) - 1] = 'A';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 1, "strncmp: not-null-term str");
+
+ strncmp_full_str_cmp(skel, "strncmp: less than", -1);
+ strncmp_full_str_cmp(skel, "strncmp: greater than", 1);
+out:
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_const_str_size(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_writable_target(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_writable_target");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_null_term_target(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
+
+ strncmp_test__destroy(skel);
+}
+
+void test_test_strncmp(void)
+{
+ if (test__start_subtest("strncmp_ret"))
+ test_strncmp_ret();
+ if (test__start_subtest("strncmp_bad_not_const_str_size"))
+ test_strncmp_bad_not_const_str_size();
+ if (test__start_subtest("strncmp_bad_writable_target"))
+ test_strncmp_bad_writable_target();
+ if (test__start_subtest("strncmp_bad_not_null_term_target"))
+ test_strncmp_bad_not_null_term_target();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
new file mode 100644
index 000000000000..fd8762ba4b67
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_id_ops_mapping1.skel.h"
+#include "struct_ops_id_ops_mapping2.skel.h"
+
+static void test_st_ops_id_ops_mapping(void)
+{
+ struct struct_ops_id_ops_mapping1 *skel1 = NULL;
+ struct struct_ops_id_ops_mapping2 *skel2 = NULL;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int err, pid, prog1_fd, prog2_fd;
+
+ skel1 = struct_ops_id_ops_mapping1__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open"))
+ goto out;
+
+ skel2 = struct_ops_id_ops_mapping2__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open"))
+ goto out;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel1->bss->st_ops_id = info.id;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel2->bss->st_ops_id = info.id;
+
+ err = struct_ops_id_ops_mapping1__attach(skel1);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach"))
+ goto out;
+
+ err = struct_ops_id_ops_mapping2__attach(skel2);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel1->bss->test_pid = pid;
+ skel2->bss->test_pid = pid;
+ sys_gettid();
+ skel1->bss->test_pid = 0;
+ skel2->bss->test_pid = 0;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ prog1_fd = bpf_program__fd(skel1->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog1_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ prog2_fd = bpf_program__fd(skel2->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog2_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err");
+ ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err");
+
+out:
+ struct_ops_id_ops_mapping1__destroy(skel1);
+ struct_ops_id_ops_mapping2__destroy(skel2);
+}
+
+void test_struct_ops_id_ops_mapping(void)
+{
+ if (test__start_subtest("st_ops_id_ops_mapping"))
+ test_st_ops_id_ops_mapping();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
new file mode 100644
index 000000000000..467cc72a3588
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
@@ -0,0 +1,16 @@
+#include <test_progs.h>
+
+#include "struct_ops_kptr_return.skel.h"
+#include "struct_ops_kptr_return_fail__wrong_type.skel.h"
+#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h"
+#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h"
+#include "struct_ops_kptr_return_fail__local_kptr.skel.h"
+
+void test_struct_ops_kptr_return(void)
+{
+ RUN_TESTS(struct_ops_kptr_return);
+ RUN_TESTS(struct_ops_kptr_return_fail__wrong_type);
+ RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar);
+ RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset);
+ RUN_TESTS(struct_ops_kptr_return_fail__local_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
new file mode 100644
index 000000000000..01dc2613c8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_maybe_null.skel.h"
+#include "struct_ops_maybe_null_fail.skel.h"
+
+/* Test that the verifier accepts a program that access a nullable pointer
+ * with a proper check.
+ */
+static void maybe_null(void)
+{
+ struct struct_ops_maybe_null *skel;
+
+ skel = struct_ops_maybe_null__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load"))
+ return;
+
+ struct_ops_maybe_null__destroy(skel);
+}
+
+/* Test that the verifier rejects a program that access a nullable pointer
+ * without a check beforehand.
+ */
+static void maybe_null_fail(void)
+{
+ struct struct_ops_maybe_null_fail *skel;
+
+ skel = struct_ops_maybe_null_fail__open_and_load();
+ if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load"))
+ return;
+
+ struct_ops_maybe_null_fail__destroy(skel);
+}
+
+void test_struct_ops_maybe_null(void)
+{
+ /* The verifier verifies the programs at load time, so testing both
+ * programs in the same compile-unit is complicated. We run them in
+ * separate objects to simplify the testing.
+ */
+ if (test__start_subtest("maybe_null"))
+ maybe_null();
+ if (test__start_subtest("maybe_null_fail"))
+ maybe_null_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
new file mode 100644
index 000000000000..75a0dea511b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <time.h>
+
+#include <sys/epoll.h>
+
+#include "struct_ops_module.skel.h"
+#include "struct_ops_nulled_out_cb.skel.h"
+#include "struct_ops_forgotten_cb.skel.h"
+#include "struct_ops_detach.skel.h"
+#include "unsupported_ops.skel.h"
+
+static void check_map_info(struct bpf_map_info *info)
+{
+ struct bpf_btf_info btf_info;
+ char btf_name[256];
+ u32 btf_info_len = sizeof(btf_info);
+ int err, fd;
+
+ fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id);
+ if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd"))
+ return;
+
+ memset(&btf_info, 0, sizeof(btf_info));
+ btf_info.name = ptr_to_u64(btf_name);
+ btf_info.name_len = sizeof(btf_name);
+ err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len);
+ if (!ASSERT_OK(err, "get_value_type_btf_obj_info"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name"))
+ goto cleanup;
+
+cleanup:
+ close(fd);
+}
+
+static int attach_ops_and_check(struct struct_ops_module *skel,
+ struct bpf_map *map,
+ int expected_test_2_result)
+{
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(link, "attach_test_mod_1");
+ if (!link)
+ return -1;
+
+ /* test_{1,2}() would be called from bpf_dummy_reg() in bpf_testmod.c */
+ ASSERT_EQ(skel->bss->test_1_result, 0xdeadbeef, "test_1_result");
+ ASSERT_EQ(skel->bss->test_2_result, expected_test_2_result, "test_2_result");
+
+ bpf_link__destroy(link);
+ return 0;
+}
+
+static void test_struct_ops_load(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_map_info info = {};
+ int err;
+ u32 len;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_1->data = 13;
+ skel->struct_ops.testmod_1->test_2 = skel->progs.test_3;
+ /* Since test_2() is not being used, it should be disabled from
+ * auto-loading, or it will fail to load.
+ */
+ bpf_program__set_autoload(skel->progs.test_2, false);
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_module_load"))
+ goto cleanup;
+
+ len = sizeof(info);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info,
+ &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto cleanup;
+
+ check_map_info(&info);
+ /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c
+ *
+ * In bpf_testmod.c it will pass 4 and 13 (the value of data) to
+ * .test_2. So, the value of test_2_result should be 20 (4 + 13 +
+ * 3).
+ */
+ if (!attach_ops_and_check(skel, skel->maps.testmod_1, 20))
+ goto cleanup;
+ if (!attach_ops_and_check(skel, skel->maps.testmod_2, 12))
+ goto cleanup;
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+static void test_struct_ops_not_zeroed(void)
+{
+ struct struct_ops_module *skel;
+ int err;
+
+ /* zeroed is 0, and zeroed_op is null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_zeroed->zeroed = 0;
+ /* zeroed_op prog should be not loaded automatically now */
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+
+ err = struct_ops_module__load(skel);
+ ASSERT_OK(err, "struct_ops_module_load");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed is not 0 */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since struct
+ * bpf_testmod_ops in the kernel has no "zeroed" field and the
+ * value of "zeroed" is non-zero.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed = 0xdeadbeef;
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed_op is not null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed_op"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since the value of its
+ * "zeroed_op" is not null.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed_op = skel->progs.test_3;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed_op");
+
+ struct_ops_module__destroy(skel);
+}
+
+/* The signature of an implementation might not match the signature of the
+ * function pointer prototype defined in the BPF program. This mismatch
+ * should be allowed as long as the behavior of the operator program
+ * adheres to the signature in the kernel. Libbpf should not enforce the
+ * signature; rather, let the kernel verifier handle the enforcement.
+ */
+static void test_struct_ops_incompatible(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
+ if (ASSERT_OK_PTR(link, "attach_struct_ops"))
+ bpf_link__destroy(link);
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+/* validate that it's ok to "turn off" callback that kernel supports */
+static void test_struct_ops_nulled_out_cb(void)
+{
+ struct struct_ops_nulled_out_cb *skel;
+ int err;
+
+ skel = struct_ops_nulled_out_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* kernel knows about test_1, but we still null it out */
+ skel->struct_ops.ops->test_1 = NULL;
+
+ err = struct_ops_nulled_out_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_1_turn_off), "prog_autoload");
+ ASSERT_LT(bpf_program__fd(skel->progs.test_1_turn_off), 0, "prog_fd");
+
+cleanup:
+ struct_ops_nulled_out_cb__destroy(skel);
+}
+
+/* validate that libbpf generates reasonable error message if struct_ops is
+ * not referenced in any struct_ops map
+ */
+static void test_struct_ops_forgotten_cb(void)
+{
+ struct struct_ops_forgotten_cb *skel;
+ char *log;
+ int err;
+
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ start_libbpf_log_capture();
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
+ goto cleanup;
+
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log,
+ "prog 'test_1_forgotten': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?",
+ "libbpf_log");
+ free(log);
+
+ struct_ops_forgotten_cb__destroy(skel);
+
+ /* now let's programmatically use it, we should be fine now */
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->struct_ops.ops->test_1 = skel->progs.test_1_forgotten; /* not anymore */
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+cleanup:
+ struct_ops_forgotten_cb__destroy(skel);
+}
+
+/* Detach a link from a user space program */
+static void test_detach_link(void)
+{
+ struct epoll_event ev, events[2];
+ struct struct_ops_detach *skel;
+ struct bpf_link *link = NULL;
+ int fd, epollfd = -1, nfds;
+ int err;
+
+ skel = struct_ops_detach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ fd = bpf_link__fd(link);
+ if (!ASSERT_GE(fd, 0, "link_fd"))
+ goto cleanup;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GE(epollfd, 0, "epoll_create1"))
+ goto cleanup;
+
+ ev.events = EPOLLHUP;
+ ev.data.fd = fd;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl"))
+ goto cleanup;
+
+ err = bpf_link__detach(link);
+ if (!ASSERT_OK(err, "detach_link"))
+ goto cleanup;
+
+ /* Wait for EPOLLHUP */
+ nfds = epoll_wait(epollfd, events, 2, 500);
+ if (!ASSERT_EQ(nfds, 1, "epoll_wait"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd"))
+ goto cleanup;
+ if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events"))
+ goto cleanup;
+
+cleanup:
+ if (epollfd >= 0)
+ close(epollfd);
+ bpf_link__destroy(link);
+ struct_ops_detach__destroy(skel);
+}
+
+void serial_test_struct_ops_module(void)
+{
+ if (test__start_subtest("struct_ops_load"))
+ test_struct_ops_load();
+ if (test__start_subtest("struct_ops_not_zeroed"))
+ test_struct_ops_not_zeroed();
+ if (test__start_subtest("struct_ops_incompatible"))
+ test_struct_ops_incompatible();
+ if (test__start_subtest("struct_ops_null_out_cb"))
+ test_struct_ops_nulled_out_cb();
+ if (test__start_subtest("struct_ops_forgotten_cb"))
+ test_struct_ops_forgotten_cb();
+ if (test__start_subtest("test_detach_link"))
+ test_detach_link();
+ RUN_TESTS(unsupported_ops);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
new file mode 100644
index 000000000000..645d32b5160c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_multi_pages.skel.h"
+
+static void do_struct_ops_multi_pages(void)
+{
+ struct struct_ops_multi_pages *skel;
+ struct bpf_link *link;
+
+ /* The size of all trampolines of skel->maps.multi_pages should be
+ * over 1 page (at least for x86).
+ */
+ skel = struct_ops_multi_pages__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
+ ASSERT_OK_PTR(link, "attach_multi_pages");
+
+ bpf_link__destroy(link);
+ struct_ops_multi_pages__destroy(skel);
+}
+
+void test_struct_ops_multi_pages(void)
+{
+ if (test__start_subtest("multi_pages"))
+ do_struct_ops_multi_pages();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
new file mode 100644
index 000000000000..106ea447965a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <testing_helpers.h>
+
+static void load_bpf_test_no_cfi(void)
+{
+ int fd;
+ int err;
+
+ fd = open("bpf_test_no_cfi.ko", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open"))
+ return;
+
+ /* The module will try to register a struct_ops type without
+ * cfi_stubs and with cfi_stubs.
+ *
+ * The one without cfi_stub should fail. The module will be loaded
+ * successfully only if the result of the registration is as
+ * expected, or it fails.
+ */
+ err = finit_module(fd, "", 0);
+ close(fd);
+ if (!ASSERT_OK(err, "finit_module"))
+ return;
+
+ err = delete_module("bpf_test_no_cfi", 0);
+ ASSERT_OK(err, "delete_module");
+}
+
+void test_struct_ops_no_cfi(void)
+{
+ if (test__start_subtest("load_bpf_test_no_cfi"))
+ load_bpf_test_no_cfi();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
new file mode 100644
index 000000000000..da60c715fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
@@ -0,0 +1,14 @@
+#include <test_progs.h>
+
+#include "struct_ops_refcounted.skel.h"
+#include "struct_ops_refcounted_fail__ref_leak.skel.h"
+#include "struct_ops_refcounted_fail__global_subprog.skel.h"
+#include "struct_ops_refcounted_fail__tail_call.skel.h"
+
+void test_struct_ops_refcounted(void)
+{
+ RUN_TESTS(struct_ops_refcounted);
+ RUN_TESTS(struct_ops_refcounted_fail__ref_leak);
+ RUN_TESTS(struct_ops_refcounted_fail__global_subprog);
+ RUN_TESTS(struct_ops_refcounted_fail__tail_call);
+}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index a20a919244c0..273dd41ca09e 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -1,21 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+#include "test_progs.h"
#include "cgroup_helpers.h"
#define CG_PATH "/foo"
@@ -124,7 +110,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:write sysctl:write read ok narrow",
.insns = {
/* u64 w = (u16)write & 1; */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
#else
@@ -184,7 +170,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:file_pos sysctl:read read ok narrow",
.insns = {
/* If (file_pos == X) */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
#else
@@ -1372,7 +1358,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: deny all writes",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_WRONLY,
@@ -1381,7 +1367,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: deny access by name",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
@@ -1389,7 +1375,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: read tcp_mem",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
@@ -1435,14 +1421,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
const char *sysctl_path)
{
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
- int ret;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int ret, insn_cnt;
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
+ insn_cnt = probe_prog_length(prog);
if (test->fixup_value_insn) {
char buf[128];
@@ -1465,7 +1447,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
return -1;
}
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts);
if (ret < 0 && test->result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
@@ -1476,15 +1461,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
static int load_sysctl_prog_file(struct sysctl_test *test)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj;
int prog_fd;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = test->prog_file;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
-
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) {
if (test->result != LOAD_REJECT)
log_err(">>> Loading program (%s) error.\n",
test->prog_file);
@@ -1566,7 +1546,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
goto err;
}
- if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+ if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) {
if (test->result == ATTACH_REJECT)
goto out;
else
@@ -1614,23 +1594,19 @@ static int run_tests(int cgfd)
return fails ? -1 : 0;
}
-int main(int argc, char **argv)
+void test_sysctl(void)
{
- int cgfd = -1;
- int err = 0;
+ int cgfd;
cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
+ if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup"))
+ goto out;
- if (run_tests(cgfd))
- goto err;
+ if (!ASSERT_OK(run_tests(cgfd), "run_tests"))
+ goto out;
- goto out;
-err:
- err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
- return err;
+ return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
new file mode 100644
index 000000000000..9fd6306b455c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#define TLD_FREE_DATA_ON_THREAD_EXIT
+#define TLD_DYN_DATA_SIZE 4096
+#include "task_local_data.h"
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+#include "test_task_local_data.skel.h"
+
+TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
+
+/*
+ * Reset task local data between subtests by clearing metadata other
+ * than the statically defined value0. This is safe as subtests run
+ * sequentially. Users of task local data library should not touch
+ * library internal.
+ */
+static void reset_tld(void)
+{
+ if (TLD_READ_ONCE(tld_meta_p)) {
+ /* Remove TLDs created by tld_create_key() */
+ tld_meta_p->cnt = 1;
+ tld_meta_p->size = TLD_DYN_DATA_SIZE;
+ memset(&tld_meta_p->metadata[1], 0,
+ (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata));
+ }
+}
+
+/* Serialize access to bpf program's global variables */
+static pthread_mutex_t global_mutex;
+
+static tld_key_t *tld_keys;
+
+#define TEST_BASIC_THREAD_NUM 32
+
+void *test_task_local_data_basic_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct test_task_local_data *skel = (struct test_task_local_data *)arg;
+ int fd, err, tid, *value0, *value1;
+ struct test_tld_struct *value2;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ value0 = tld_get_data(fd, value0_key);
+ if (!ASSERT_OK_PTR(value0, "tld_get_data"))
+ goto out;
+
+ value1 = tld_get_data(fd, tld_keys[1]);
+ if (!ASSERT_OK_PTR(value1, "tld_get_data"))
+ goto out;
+
+ value2 = tld_get_data(fd, tld_keys[2]);
+ if (!ASSERT_OK_PTR(value2, "tld_get_data"))
+ goto out;
+
+ tid = sys_gettid();
+
+ *value0 = tid + 0;
+ *value1 = tid + 1;
+ value2->a = tid + 2;
+ value2->b = tid + 3;
+ value2->c = tid + 4;
+ value2->d = tid + 5;
+
+ pthread_mutex_lock(&global_mutex);
+ /* Run task_main that read task local data and save to global variables */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+ /* Make sure valueX are indeed local to threads */
+ ASSERT_EQ(*value0, tid + 0, "value0");
+ ASSERT_EQ(*value1, tid + 1, "value1");
+ ASSERT_EQ(value2->a, tid + 2, "value2.a");
+ ASSERT_EQ(value2->b, tid + 3, "value2.b");
+ ASSERT_EQ(value2->c, tid + 4, "value2.c");
+ ASSERT_EQ(value2->d, tid + 5, "value2.d");
+
+ *value0 = tid + 5;
+ *value1 = tid + 4;
+ value2->a = tid + 3;
+ value2->b = tid + 2;
+ value2->c = tid + 1;
+ value2->d = tid + 0;
+
+ /* Run task_main again */
+ pthread_mutex_lock(&global_mutex);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+out:
+ pthread_exit(NULL);
+}
+
+static void test_task_local_data_basic(void)
+{
+ struct test_task_local_data *skel;
+ pthread_t thread[TEST_BASIC_THREAD_NUM];
+ char dummy_key_name[TLD_NAME_LEN];
+ tld_key_t key;
+ int i, err;
+
+ reset_tld();
+
+ ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init");
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[1] = tld_create_key("value1", sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key");
+ tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key");
+
+ /*
+ * Shouldn't be able to store data exceed a page. Create a TLD just big
+ * enough to exceed a page. TLDs already created are int value0, int
+ * value1, and struct test_tld_struct value2.
+ */
+ key = tld_create_key("value_not_exist",
+ TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1);
+ ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key");
+
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key");
+
+ /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */
+ for (i = 3; i < TLD_MAX_DATA_CNT; i++) {
+ snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i);
+ tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key");
+ }
+ key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key");
+
+ /* Access TLDs from multiple threads and check if they are thread-specific */
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) {
+ err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto out;
+ }
+
+out:
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++)
+ pthread_join(thread[i], NULL);
+
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3)
+
+void *test_task_local_data_race_thread(void *arg)
+{
+ int err = 0, id = (intptr_t)arg;
+ char key_name[32];
+ tld_key_t key;
+
+ key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1);
+ if (tld_key_err_or_zero(key) != -E2BIG) {
+ err = 1;
+ goto out;
+ }
+
+ /* Only one thread will succeed in creating value1 */
+ key = tld_create_key("value1", sizeof(int));
+ if (!tld_key_is_err(key))
+ tld_keys[1] = key;
+
+ /* Only one thread will succeed in creating value2 */
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ if (!tld_key_is_err(key))
+ tld_keys[2] = key;
+
+ snprintf(key_name, 32, "thread_%d", id);
+ tld_keys[id] = tld_create_key(key_name, sizeof(int));
+ if (tld_key_is_err(tld_keys[id]))
+ err = 2;
+out:
+ return (void *)(intptr_t)err;
+}
+
+static void test_task_local_data_race(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ pthread_t thread[TEST_RACE_THREAD_NUM];
+ struct test_task_local_data *skel;
+ int fd, i, j, err, *data;
+ void *ret = NULL;
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[0] = value0_key;
+
+ for (j = 0; j < 100; j++) {
+ reset_tld();
+
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ /*
+ * Try to make tld_create_key() race with each other. Call
+ * tld_create_key(), both valid and invalid, from different threads.
+ */
+ err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread,
+ (void *)(intptr_t)(i + 3));
+ if (CHECK_FAIL(err))
+ break;
+ }
+
+ /* Wait for all tld_create_key() to return */
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ pthread_join(thread[i], &ret);
+ if (CHECK_FAIL(ret))
+ break;
+ }
+
+ /* Write a unique number to each TLD */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(!data))
+ break;
+ *data = i;
+ }
+
+ /* Read TLDs and check the value to see if any address collides with another */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(*data != i))
+ break;
+ }
+
+ /* Run task_main to make sure no invalid TLDs are added */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+ }
+out:
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+void test_task_local_data(void)
+{
+ if (test__start_subtest("task_local_data_basic"))
+ test_task_local_data_basic();
+ if (test__start_subtest("task_local_data_race"))
+ test_task_local_data_race();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
new file mode 100644
index 000000000000..774b31a5f6ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work.skel.h"
+#include "task_work_fail.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+static int perf_event_open(__u32 type, __u64 config, int pid)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 100000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0);
+}
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+static int verify_map(struct bpf_map *map, const char *expected_data)
+{
+ int err;
+ struct elem value;
+ int processed_values = 0;
+ int k, sz;
+
+ sz = bpf_map__max_entries(map);
+ for (k = 0; k < sz; ++k) {
+ err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0);
+ if (err)
+ continue;
+ if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) {
+ fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data,
+ value.data, bpf_map__name(map));
+ return 2;
+ }
+ processed_values++;
+ }
+
+ return processed_values == 0;
+}
+
+static void task_work_run(const char *prog_name, const char *map_name)
+{
+ struct task_work *skel;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ struct bpf_link *link = NULL;
+ int err, pe_fd = -1, pid, status, pipefd[2];
+ char user_string[] = "hello world";
+
+ if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe"))
+ return;
+
+ pid = fork();
+ if (pid == 0) {
+ __u64 num = 1;
+ int i;
+ char buf;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, sizeof(buf));
+ close(pipefd[0]);
+
+ for (i = 0; i < 10000; ++i)
+ num *= time(0) % 7;
+ (void)num;
+ exit(0);
+ }
+ if (!ASSERT_GT(pid, 0, "fork() failed")) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return;
+ }
+
+ skel = task_work__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, false);
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "prog_name"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, true);
+ skel->bss->user_ptr = (char *)user_string;
+
+ err = task_work__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid);
+ if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) {
+ fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ /* perf event fd ownership is passed to bpf_link */
+ pe_fd = -1;
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ /* Wait to collect some samples */
+ waitpid(pid, &status, 0);
+ pid = 0;
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "find map_name"))
+ goto cleanup;
+ if (!ASSERT_OK(verify_map(map, user_string), "verify map"))
+ goto cleanup;
+cleanup:
+ if (pe_fd >= 0)
+ close(pe_fd);
+ bpf_link__destroy(link);
+ task_work__destroy(skel);
+ if (pid > 0) {
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ waitpid(pid, &status, 0);
+ }
+}
+
+void test_task_work(void)
+{
+ if (test__start_subtest("test_task_work_hash_map"))
+ task_work_run("oncpu_hash_map", "hmap");
+
+ if (test__start_subtest("test_task_work_array_map"))
+ task_work_run("oncpu_array_map", "arrmap");
+
+ if (test__start_subtest("test_task_work_lru_map"))
+ task_work_run("oncpu_lru_map", "lrumap");
+
+ RUN_TESTS(task_work_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
new file mode 100644
index 000000000000..462512fb191f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * BPF-based flow shaping
+ *
+ * The test brings up two veth in two isolated namespaces, attach some flow
+ * shaping program onto it, and ensures that a manual speedtest maximum
+ * value matches the rate set in the BPF shapers.
+ */
+
+#include <asm-generic/socket.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_edt.skel.h"
+
+#define SERVER_NS "tc-edt-server-ns"
+#define CLIENT_NS "tc-edt-client-ns"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP4_ADDR_VETH2_HEX 0xC0A80102
+
+#define TIMEOUT_MS 2000
+#define TEST_PORT 9000
+#define TARGET_RATE_MBPS 5.0
+#define TX_BYTES_COUNT (1 * 1000 * 1000)
+#define RATE_ERROR_PERCENT 2.0
+
+struct connection {
+ int server_listen_fd;
+ int server_conn_fd;
+ int client_conn_fd;
+};
+
+static int setup(struct test_tc_edt *skel)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int ret;
+
+ if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns"))
+ goto fail;
+ if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+ goto fail_delete_client_ns;
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail_delete_server_ns;
+ SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s",
+ "veth2 netns " SERVER_NS);
+ SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+ SYS(fail_close_client_ns, "ip link set veth1 up");
+
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "enter server ns"))
+ goto fail_close_client_ns;
+ SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+ SYS(fail_close_server_ns, "ip link set veth2 up");
+ SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq");
+ ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog));
+ if (!ASSERT_OK(ret, "attach bpf prog"))
+ goto fail_close_server_ns;
+ skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000;
+ close_netns(nstoken_server);
+ close_netns(nstoken_client);
+
+ return 0;
+
+fail_close_server_ns:
+ close_netns(nstoken_server);
+fail_close_client_ns:
+ close_netns(nstoken_client);
+fail_delete_server_ns:
+ remove_netns(SERVER_NS);
+fail_delete_client_ns:
+ remove_netns(CLIENT_NS);
+fail:
+ return -1;
+}
+
+static void cleanup(void)
+{
+ remove_netns(CLIENT_NS);
+ remove_netns(SERVER_NS);
+}
+
+static void run_test(void)
+{
+ int server_fd, client_fd, err;
+ double rate_mbps, rate_error;
+ struct nstoken *nstoken;
+ __u64 ts_start, ts_end;
+
+ nstoken = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2,
+ TEST_PORT, TIMEOUT_MS);
+ if (!ASSERT_OK_FD(server_fd, "start server"))
+ return;
+
+ close_netns(nstoken);
+ nstoken = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+ return;
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_OK_FD(client_fd, "connect client"))
+ return;
+
+ ts_start = get_time_ns();
+ err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT);
+ ts_end = get_time_ns();
+ close_netns(nstoken);
+ ASSERT_OK(err, "send_recv_data");
+
+ rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0);
+ rate_error =
+ fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS);
+
+ ASSERT_LE(rate_error, RATE_ERROR_PERCENT,
+ "rate error is lower than threshold");
+}
+
+void test_tc_edt(void)
+{
+ struct test_tc_edt *skel;
+
+ skel = test_tc_edt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open and load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "global setup"))
+ return;
+
+ run_test();
+
+ cleanup();
+ test_tc_edt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
new file mode 100644
index 000000000000..0fe0a8f62486
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -0,0 +1,714 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ * The file tests BPF network tunnels implementation. For each tunnel
+ * type, the test validates that:
+ * - basic communication can first be established between the two veths
+ * - when adding a BPF-based encapsulation on client egress, it now fails
+ * to communicate with the server
+ * - when adding a kernel-based decapsulation on server ingress, client
+ * can now connect
+ * - when replacing the kernel-based decapsulation with a BPF-based one,
+ * the client can still connect
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_tunnel.skel.h"
+
+#define SERVER_NS "tc-tunnel-server-ns"
+#define CLIENT_NS "tc-tunnel-client-ns"
+#define MAC_ADDR_VETH1 "00:11:22:33:44:55"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP6_ADDR_VETH1 "fd::1"
+#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP6_ADDR_VETH2 "fd::2"
+
+#define TEST_NAME_MAX_LEN 64
+#define PROG_NAME_MAX_LEN 64
+#define TUNNEL_ARGS_MAX_LEN 128
+#define BUFFER_LEN 2000
+#define DEFAULT_TEST_DATA_SIZE 100
+#define GSO_TEST_DATA_SIZE BUFFER_LEN
+
+#define TIMEOUT_MS 1000
+#define TEST_PORT 8000
+#define UDP_PORT 5555
+#define MPLS_UDP_PORT 6635
+#define FOU_MPLS_PROTO 137
+#define VXLAN_ID 1
+#define VXLAN_PORT 8472
+#define MPLS_TABLE_ENTRIES_COUNT 65536
+
+static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
+
+struct subtest_cfg {
+ char *ebpf_tun_type;
+ char *iproute_tun_type;
+ char *mac_tun_type;
+ int ipproto;
+ void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
+ bool tunnel_need_veth_mac;
+ bool configure_fou_rx_port;
+ char *tmode;
+ bool expect_kern_decap_failure;
+ bool configure_mpls;
+ bool test_gso;
+ char *tunnel_client_addr;
+ char *tunnel_server_addr;
+ char name[TEST_NAME_MAX_LEN];
+ char *server_addr;
+ int client_egress_prog_fd;
+ int server_ingress_prog_fd;
+ char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
+ int server_fd;
+};
+
+struct connection {
+ int client_fd;
+ int server_fd;
+};
+
+static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
+{
+ int ret;
+
+ ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
+ cfg->mac_tun_type);
+
+ return ret < 0 ? ret : 0;
+}
+
+static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
+{
+ char prog_name[PROG_NAME_MAX_LEN];
+ struct bpf_program *prog;
+ int ret;
+
+ ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
+ if (ret < 0)
+ return ret;
+ ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
+ if (ret < 0)
+ return ret;
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!prog)
+ return -1;
+
+ cfg->client_egress_prog_fd = bpf_program__fd(prog);
+ cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
+ return 0;
+}
+
+static void set_subtest_addresses(struct subtest_cfg *cfg)
+{
+ if (cfg->ipproto == 6)
+ cfg->server_addr = IP6_ADDR_VETH2;
+ else
+ cfg->server_addr = IP4_ADDR_VETH2;
+
+ /* Some specific tunnel types need specific addressing, it then
+ * has been already set in the configuration table. Otherwise,
+ * deduce the relevant addressing from the ipproto
+ */
+ if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
+ return;
+
+ if (cfg->ipproto == 6) {
+ cfg->tunnel_client_addr = IP6_ADDR_VETH1;
+ cfg->tunnel_server_addr = IP6_ADDR_VETH2;
+ } else {
+ cfg->tunnel_client_addr = IP4_ADDR_VETH1;
+ cfg->tunnel_server_addr = IP4_ADDR_VETH2;
+ }
+}
+
+static int run_server(struct subtest_cfg *cfg)
+{
+ int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+ struct nstoken *nstoken;
+ struct network_helper_opts opts = {
+ .timeout_ms = TIMEOUT_MS
+ };
+
+ nstoken = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return -1;
+
+ cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr,
+ TEST_PORT, &opts);
+ close_netns(nstoken);
+ if (!ASSERT_OK_FD(cfg->server_fd, "start server"))
+ return -1;
+
+ return 0;
+}
+
+static int check_server_rx_data(struct subtest_cfg *cfg,
+ struct connection *conn, int len)
+{
+ int err;
+
+ memset(rx_buffer, 0, BUFFER_LEN);
+ err = recv(conn->server_fd, rx_buffer, len, 0);
+ if (!ASSERT_EQ(err, len, "check rx data len"))
+ return 1;
+ if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
+ return 1;
+ return 0;
+}
+
+static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
+{
+ struct network_helper_opts opts = {.timeout_ms = 500};
+ int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+ struct connection *conn = NULL;
+ int client_fd, server_fd;
+
+ conn = malloc(sizeof(struct connection));
+ if (!conn)
+ return conn;
+
+ client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
+ TEST_PORT, &opts);
+
+ if (client_fd < 0) {
+ free(conn);
+ return NULL;
+ }
+
+ server_fd = accept(cfg->server_fd, NULL, NULL);
+ if (server_fd < 0) {
+ close(client_fd);
+ free(conn);
+ return NULL;
+ }
+
+ conn->server_fd = server_fd;
+ conn->client_fd = client_fd;
+
+ return conn;
+}
+
+static void disconnect_client_from_server(struct subtest_cfg *cfg,
+ struct connection *conn)
+{
+ close(conn->server_fd);
+ close(conn->client_fd);
+ free(conn);
+}
+
+static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
+{
+ struct connection *conn;
+ int err, res = -1;
+
+ conn = connect_client_to_server(cfg);
+ if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
+ goto end;
+ else if (!must_succeed)
+ return 0;
+
+ if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
+ return -1;
+
+ err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
+ if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
+ goto end;
+ if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+ goto end;
+
+ if (!cfg->test_gso) {
+ res = 0;
+ goto end;
+ }
+
+ err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
+ if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
+ goto end;
+ if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+ goto end;
+
+ res = 0;
+end:
+ disconnect_client_from_server(cfg, conn);
+ return res;
+}
+
+static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+ snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
+ VXLAN_ID, VXLAN_PORT);
+}
+
+static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+ bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
+
+ snprintf(dst, TUNNEL_ARGS_MAX_LEN,
+ "encap fou encap-sport auto encap-dport %d",
+ is_mpls ? MPLS_UDP_PORT : UDP_PORT);
+}
+
+static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
+{
+ bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
+ int fou_proto;
+
+ if (is_mpls)
+ fou_proto = FOU_MPLS_PROTO;
+ else
+ fou_proto = cfg->ipproto == 6 ? 41 : 4;
+
+ SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
+ is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
+ cfg->ipproto == 6 ? " -6" : "");
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_fou_rx_port(struct subtest_cfg *cfg)
+{
+ return configure_fou_rx_port(cfg, true);
+}
+
+static int del_fou_rx_port(struct subtest_cfg *cfg)
+{
+ return configure_fou_rx_port(cfg, false);
+}
+
+static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
+{
+ SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
+{
+ SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
+ MPLS_TABLE_ENTRIES_COUNT);
+ SYS(fail, "ip -f mpls route add 1000 dev lo");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
+ SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_encapsulation(struct subtest_cfg *cfg)
+{
+ int ret;
+
+ ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
+
+ return ret;
+}
+
+static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken = open_netns(SERVER_NS);
+ int ret = -1;
+
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return ret;
+
+ if (cfg->configure_fou_rx_port &&
+ !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
+ goto fail;
+ SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
+ cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
+ cfg->tunnel_client_addr, cfg->tunnel_server_addr,
+ cfg->extra_decap_mod_args);
+ if (cfg->tunnel_need_veth_mac &&
+ !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
+ goto fail;
+ if (cfg->configure_mpls &&
+ (!ASSERT_OK(configure_kernel_for_mpls(cfg),
+ "configure MPLS decap")))
+ goto fail;
+ SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
+ SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
+ SYS(fail, "ip link set dev testtun0 up");
+
+ ret = 0;
+fail:
+ close_netns(nstoken);
+ return ret;
+}
+
+static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+ SYS_NOFAIL("ip link del testtun0");
+ if (cfg->configure_mpls)
+ SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
+ if (cfg->configure_fou_rx_port)
+ del_fou_rx_port(cfg);
+}
+
+static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken = open_netns(SERVER_NS);
+ int ret = -1;
+
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return ret;
+
+ if (!cfg->expect_kern_decap_failure)
+ SYS(fail, "ip link del testtun0");
+
+ if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
+ "attach_program"))
+ goto fail;
+
+ ret = 0;
+fail:
+ close_netns(nstoken);
+ return ret;
+}
+
+static void run_test(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken;
+
+ if (!ASSERT_OK(run_server(cfg), "run server"))
+ return;
+
+ nstoken = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+ goto fail;
+
+ /* Basic communication must work */
+ if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
+ goto fail;
+
+ /* Attach encapsulation program to client */
+ if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
+ goto fail;
+
+ /* If supported, insert kernel decap module, connection must succeed */
+ if (!cfg->expect_kern_decap_failure) {
+ if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
+ "configure kernel decapsulation"))
+ goto fail;
+ if (!ASSERT_OK(send_and_test_data(cfg, true),
+ "connect with encap prog and kern decap"))
+ goto fail;
+ }
+
+ /* Replace kernel decapsulation with BPF decapsulation, test must pass */
+ if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
+ goto fail;
+ ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
+
+fail:
+ close_netns(nstoken);
+ close(cfg->server_fd);
+}
+
+static int setup(void)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int fd, err;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open urandom"))
+ goto fail;
+ err = read(fd, tx_buffer, BUFFER_LEN);
+ close(fd);
+
+ if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
+ goto fail;
+
+ /* Configure the testing network */
+ if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
+ !ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+ goto fail;
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail_delete_ns;
+ SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
+ "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
+ "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
+ SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
+ SYS(fail_close_ns_client, "ip link set veth1 up");
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+ goto fail_close_ns_client;
+ SYS(fail_close_ns_server, "ip link set veth2 up");
+
+ close_netns(nstoken_server);
+ close_netns(nstoken_client);
+ return 0;
+
+fail_close_ns_server:
+ close_netns(nstoken_server);
+fail_close_ns_client:
+ close_netns(nstoken_client);
+fail_delete_ns:
+ SYS_NOFAIL("ip netns del " CLIENT_NS);
+ SYS_NOFAIL("ip netns del " SERVER_NS);
+fail:
+ return -1;
+}
+
+static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int ret = -1;
+
+ set_subtest_addresses(cfg);
+ if (!ASSERT_OK(set_subtest_progs(cfg, skel),
+ "find subtest progs"))
+ goto fail;
+ if (cfg->extra_decap_mod_args_cb)
+ cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail;
+ SYS(fail_close_client_ns,
+ "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+ SYS(fail_close_client_ns, "ip -4 route flush table main");
+ SYS(fail_close_client_ns,
+ "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
+ SYS(fail_close_client_ns,
+ "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
+ SYS(fail_close_client_ns, "ip -6 route flush table main");
+ SYS(fail_close_client_ns,
+ "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+ goto fail_close_client_ns;
+ SYS(fail_close_server_ns,
+ "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+ SYS(fail_close_server_ns,
+ "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
+
+ ret = 0;
+
+fail_close_server_ns:
+ close_netns(nstoken_server);
+fail_close_client_ns:
+ close_netns(nstoken_client);
+fail:
+ return ret;
+}
+
+
+static void subtest_cleanup(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(CLIENT_NS);
+ if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+ SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
+ SYS_NOFAIL("ip a flush veth1");
+ close_netns(nstoken);
+ }
+ nstoken = open_netns(SERVER_NS);
+ if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+ SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
+ SYS_NOFAIL("ip a flush veth2");
+ if (!cfg->expect_kern_decap_failure)
+ remove_kernel_decapsulation(cfg);
+ close_netns(nstoken);
+ }
+}
+
+static void cleanup(void)
+{
+ remove_netns(CLIENT_NS);
+ remove_netns(SERVER_NS);
+}
+
+static struct subtest_cfg subtests_cfg[] = {
+ {
+ .ebpf_tun_type = "ipip",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ },
+ {
+ .ebpf_tun_type = "ipip6",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 4,
+ .tunnel_client_addr = IP6_ADDR_VETH1,
+ .tunnel_server_addr = IP6_ADDR_VETH2,
+ },
+ {
+ .ebpf_tun_type = "ip6tnl",
+ .iproute_tun_type = "ip6tnl",
+ .mac_tun_type = "none",
+ .ipproto = 6,
+ },
+ {
+ .mac_tun_type = "none",
+ .ebpf_tun_type = "sit",
+ .iproute_tun_type = "sit",
+ .ipproto = 6,
+ .tunnel_client_addr = IP4_ADDR_VETH1,
+ .tunnel_server_addr = IP4_ADDR_VETH2,
+ },
+ {
+ .ebpf_tun_type = "vxlan",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "vxlan",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+ .tunnel_need_veth_mac = true
+ },
+ {
+ .ebpf_tun_type = "ip6vxlan",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "vxlan",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+ .tunnel_need_veth_mac = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "gre",
+ .ipproto = 4,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "gretap",
+ .ipproto = 4,
+ .tunnel_need_veth_mac = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "gre",
+ .ipproto = 4,
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6gre",
+ .ipproto = 6,
+ .test_gso = true,
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ip6gretap",
+ .ipproto = 6,
+ .tunnel_need_veth_mac = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ip6gre",
+ .ipproto = 6,
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .tmode = "mode any ttl 255",
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .tmode = "mode any ttl 255",
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+};
+
+void test_tc_tunnel(void)
+{
+ struct test_tc_tunnel *skel;
+ struct subtest_cfg *cfg;
+ int i, ret;
+
+ skel = test_tc_tunnel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open and load"))
+ return;
+
+ if (!ASSERT_OK(setup(), "global setup"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
+ cfg = &subtests_cfg[i];
+ ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
+ if (ret < 0 || !test__start_subtest(cfg->name))
+ continue;
+ if (subtest_setup(skel, cfg) == 0)
+ run_test(cfg);
+ subtest_cleanup(cfg);
+ }
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
new file mode 100644
index 000000000000..eb9309931272
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -0,0 +1,1074 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ * The file tests BPF network tunnel implementation.
+ *
+ * Topology:
+ * ---------
+ * root namespace | at_ns0 namespace
+ * |
+ * ----------- | -----------
+ * | tnl dev | | | tnl dev | (overlay network)
+ * ----------- | -----------
+ * metadata-mode | metadata-mode
+ * with bpf | with bpf
+ * |
+ * ---------- | ----------
+ * | veth1 | --------- | veth0 | (underlay network)
+ * ---------- peer ----------
+ *
+ *
+ * Device Configuration
+ * --------------------
+ * root namespace with metadata-mode tunnel + BPF
+ * Device names and addresses:
+ * veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay)
+ * IP 2: 172.16.1.20, IPv6: 00::bb (underlay)
+ * tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
+ *
+ * Namespace at_ns0 with native tunnel
+ * Device names and addresses:
+ * veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+ * tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
+ *
+ *
+ * End-to-end ping packet flow
+ * ---------------------------
+ * Most of the tests start by namespace creation, device configuration,
+ * then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
+ * from root namespace, the following operations happen:
+ * 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+ * 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+ * with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose
+ * the primary or secondary ip of veth1 as the local ip of tunnel. The
+ * choice is made based on the value of bpf map local_ip_map.
+ * 3) Outer tunnel header is prepended and route the packet to veth1's egress.
+ * 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0.
+ * 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet.
+ * 6) Forward the packet to the overlay tnl dev.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tunnel_kern.skel.h"
+
+#define IP4_ADDR_VETH0 "172.16.1.100"
+#define IP4_ADDR1_VETH1 "172.16.1.200"
+#define IP4_ADDR2_VETH1 "172.16.1.20"
+#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
+#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+#define IP6_ADDR_TUNL_DEV0 "fc80::100"
+#define IP6_ADDR_TUNL_DEV1 "fc80::200"
+
+#define IP6_ADDR_VETH0 "::11"
+#define IP6_ADDR1_VETH1 "::22"
+#define IP6_ADDR2_VETH1 "::bb"
+
+#define IP4_ADDR1_HEX_VETH1 0xac1001c8
+#define IP4_ADDR2_HEX_VETH1 0xac100114
+#define IP6_ADDR1_HEX_VETH1 0x22
+#define IP6_ADDR2_HEX_VETH1 0xbb
+
+#define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
+#define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
+#define MAC_VETH1 "52:54:00:d9:03:00"
+
+#define VXLAN_TUNL_DEV0 "vxlan00"
+#define VXLAN_TUNL_DEV1 "vxlan11"
+#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00"
+#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11"
+
+#define IPIP_TUNL_DEV0 "ipip00"
+#define IPIP_TUNL_DEV1 "ipip11"
+
+#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
+#define XFRM_ENC "0x22222222222222222222222222222222"
+#define XFRM_SPI_IN_TO_OUT 0x1
+#define XFRM_SPI_OUT_TO_IN 0x2
+
+#define GRE_TUNL_DEV0 "gre00"
+#define GRE_TUNL_DEV1 "gre11"
+
+#define IP6GRE_TUNL_DEV0 "ip6gre00"
+#define IP6GRE_TUNL_DEV1 "ip6gre11"
+
+#define ERSPAN_TUNL_DEV0 "erspan00"
+#define ERSPAN_TUNL_DEV1 "erspan11"
+
+#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00"
+#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11"
+
+#define GENEVE_TUNL_DEV0 "geneve00"
+#define GENEVE_TUNL_DEV1 "geneve11"
+
+#define IP6GENEVE_TUNL_DEV0 "ip6geneve00"
+#define IP6GENEVE_TUNL_DEV1 "ip6geneve11"
+
+#define IP6TNL_TUNL_DEV0 "ip6tnl00"
+#define IP6TNL_TUNL_DEV1 "ip6tnl11"
+
+#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
+
+static int config_device(void)
+{
+ SYS(fail, "ip netns add at_ns0");
+ SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
+ SYS(fail, "ip link set veth0 netns at_ns0");
+ SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+ SYS(fail, "ip link set dev veth1 up mtu 1500");
+ SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
+ SYS_NOFAIL("ip link del veth1");
+ SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1);
+}
+
+static int add_vxlan_tunnel(void)
+{
+ /* at_ns0 namespace */
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+ VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
+ VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
+ VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+ IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
+ IP4_ADDR2_VETH1, MAC_VETH1);
+
+ /* root namespace */
+ SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789",
+ VXLAN_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip neigh add %s lladdr %s dev %s",
+ IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_vxlan_tunnel(void)
+{
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+ VXLAN_TUNL_DEV0);
+ SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1);
+}
+
+static int add_ip6vxlan_tunnel(void)
+{
+ SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+ IP6_ADDR_VETH0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up");
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ /* at_ns0 namespace */
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+ IP6VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
+ IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
+ IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+
+ /* root namespace */
+ SYS(fail, "ip link add dev %s type vxlan external dstport 4789",
+ IP6VXLAN_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up",
+ IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_ip6vxlan_tunnel(void)
+{
+ SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0",
+ IP6_ADDR_VETH0);
+ SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+ IP6VXLAN_TUNL_DEV0);
+ SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1);
+}
+
+enum ipip_encap {
+ NONE = 0,
+ FOU = 1,
+ GUE = 2,
+};
+
+static int set_ipip_encap(const char *ipproto, const char *type)
+{
+ SYS(fail, "ip -n at_ns0 fou add port 5555 %s", ipproto);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap %s",
+ IPIP_TUNL_DEV0, type);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap-dport 5555",
+ IPIP_TUNL_DEV0);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_ipv4_addr(const char *dev0, const char *dev1)
+{
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip link set dev %s up", dev1);
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_ipip_tunnel(enum ipip_encap encap)
+{
+ int err;
+ const char *ipproto, *type;
+
+ switch (encap) {
+ case FOU:
+ ipproto = "ipproto 4";
+ type = "fou";
+ break;
+ case GUE:
+ ipproto = "gue";
+ type = ipproto;
+ break;
+ default:
+ ipproto = NULL;
+ type = ipproto;
+ }
+
+ /* at_ns0 namespace */
+ SYS(fail, "ip -n at_ns0 link add dev %s type ipip local %s remote %s",
+ IPIP_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ if (type && ipproto) {
+ err = set_ipip_encap(ipproto, type);
+ if (!ASSERT_OK(err, "set_ipip_encap"))
+ goto fail;
+ }
+
+ SYS(fail, "ip -n at_ns0 link set dev %s up", IPIP_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24",
+ IPIP_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace */
+ if (type && ipproto)
+ SYS(fail, "ip fou add port 5555 %s", ipproto);
+ SYS(fail, "ip link add dev %s type ipip external", IPIP_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", IPIP_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", IPIP_TUNL_DEV1,
+ IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_ipip_tunnel(void)
+{
+ SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0);
+ SYS_NOFAIL("ip -n at_ns0 fou del port 5555");
+ SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1);
+ SYS_NOFAIL("ip fou del port 5555");
+}
+
+static int add_xfrm_tunnel(void)
+{
+ /* at_ns0 namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
+ IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip route add %s dev veth1 via %s src %s",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_xfrm_tunnel(void)
+{
+ SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
+ SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
+}
+
+static int add_ipv4_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static void delete_tunnel(const char *dev0, const char *dev1)
+{
+ if (!dev0 || !dev1)
+ return;
+
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0);
+ SYS_NOFAIL("ip link delete dev %s", dev1);
+}
+
+static int set_ipv6_addr(const char *dev0, const char *dev1)
+{
+ /* disable IPv6 DAD because it might take too long and fail tests */
+ SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0);
+ SYS(fail, "ip -n at_ns0 link set dev veth0 up");
+ SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", dev1);
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_ipv6_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s",
+ dev0, type, opt, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_ip6geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s",
+ dev0, type, opt, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int test_ping(int family, const char *addr)
+{
+ SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static void ping_dev0(void)
+{
+ /* ping from root namespace test */
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+}
+
+static void ping_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static void ping6_veth0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_VETH0);
+}
+
+static void ping6_dev0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0);
+}
+
+static void ping6_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP6_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static void test_vxlan_tunnel(void)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int local_ip_map_fd = -1;
+ int set_src_prog_fd, get_src_prog_fd;
+ int set_dst_prog_fd;
+ int key = 0;
+ uint local_ip;
+ int err;
+
+ /* add vxlan tunnel */
+ err = add_vxlan_tunnel();
+ if (!ASSERT_OK(err, "add vxlan tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
+ set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
+ if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* load and attach bpf prog to veth dev tc hook point */
+ set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
+ if (tc_prog_attach("veth1", set_dst_prog_fd, -1))
+ goto done;
+
+ /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
+ if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
+ goto done;
+ close_netns(nstoken);
+
+ /* use veth1 ip 2 as tunnel source ip */
+ local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+ if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd"))
+ goto done;
+ local_ip = IP4_ADDR2_HEX_VETH1;
+ err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+ if (!ASSERT_OK(err, "update bpf local_ip_map"))
+ goto done;
+
+ /* ping test */
+ ping_dev0();
+
+done:
+ /* delete vxlan tunnel */
+ delete_vxlan_tunnel();
+ if (local_ip_map_fd >= 0)
+ close(local_ip_map_fd);
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6vxlan_tunnel(void)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int local_ip_map_fd = -1;
+ int set_src_prog_fd, get_src_prog_fd;
+ int set_dst_prog_fd;
+ int key = 0;
+ uint local_ip;
+ int err;
+
+ /* add vxlan tunnel */
+ err = add_ip6vxlan_tunnel();
+ if (!ASSERT_OK(err, "add_ip6vxlan_tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
+ set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
+ if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
+ if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
+ goto done;
+ close_netns(nstoken);
+
+ /* use veth1 ip 2 as tunnel source ip */
+ local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+ if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd"))
+ goto done;
+ local_ip = IP6_ADDR2_HEX_VETH1;
+ err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+ if (!ASSERT_OK(err, "update bpf local_ip_map"))
+ goto done;
+
+ /* ping test */
+ ping_dev0();
+
+done:
+ /* delete ipv6 vxlan tunnel */
+ delete_ip6vxlan_tunnel();
+ if (local_ip_map_fd >= 0)
+ close(local_ip_map_fd);
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ipip_tunnel(enum ipip_encap encap)
+{
+ struct test_tunnel_kern *skel = NULL;
+ int set_src_prog_fd, get_src_prog_fd;
+ int err;
+
+ /* add ipip tunnel */
+ err = add_ipip_tunnel(encap);
+ if (!ASSERT_OK(err, "add_ipip_tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+
+ switch (encap) {
+ case FOU:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_fou_set_tunnel);
+ break;
+ case GUE:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_gue_set_tunnel);
+ break;
+ default:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_set_tunnel);
+ }
+
+ if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+
+done:
+ /* delete ipip tunnel */
+ delete_ipip_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_xfrm_tunnel(void)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct test_tunnel_kern *skel = NULL;
+ int xdp_prog_fd;
+ int tc_prog_fd;
+ int ifindex;
+ int err;
+
+ err = add_xfrm_tunnel();
+ if (!ASSERT_OK(err, "add_xfrm_tunnel"))
+ return;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+
+
+ /* attach tc prog to tunnel dev */
+ tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
+ if (tc_prog_attach("veth1", tc_prog_fd, -1))
+ goto done;
+
+ /* attach xdp prog to tunnel dev */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
+ xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "bpf_xdp_attach"))
+ goto done;
+
+ ping_dev1();
+
+ if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
+ goto done;
+
+done:
+ delete_xfrm_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+enum gre_test {
+ GRE,
+ GRE_NOKEY,
+ GRETAP,
+ GRETAP_NOKEY,
+};
+
+static void test_gre_tunnel(enum gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case GRE:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRE_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+
+done:
+ delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6gre_test {
+ IP6GRE,
+ IP6GRETAP
+};
+
+static void test_ip6gre_tunnel(enum ip6gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case IP6GRE:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gre", "flowlabel 0xbcdef key 2");
+ break;
+ case IP6GRETAP:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gretap", "flowlabel 0xbcdef key 2");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
+ if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping6_dev1();
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum erspan_test {
+ V1,
+ V2
+};
+
+static void test_erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
+ if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
+ if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1,
+ "geneve", "dstport 6081");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
+ if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1,
+ "geneve", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
+ if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6tnl_test {
+ IPIP6,
+ IP6IP6
+};
+
+static void test_ip6tnl_tunnel(enum ip6tnl_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ switch (test) {
+ case IPIP6:
+ set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel);
+ break;
+ case IP6IP6:
+ set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
+ break;
+ }
+ if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ switch (test) {
+ case IPIP6:
+ ping_dev0();
+ ping_dev1();
+ break;
+ case IP6IP6:
+ ping6_dev0();
+ ping6_dev1();
+ break;
+ }
+
+done:
+ delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+#define RUN_TEST(name, ...) \
+ ({ \
+ if (test__start_subtest(#name)) { \
+ config_device(); \
+ test_ ## name(__VA_ARGS__); \
+ cleanup(); \
+ } \
+ })
+
+static void *test_tunnel_run_tests(void *arg)
+{
+ RUN_TEST(vxlan_tunnel);
+ RUN_TEST(ip6vxlan_tunnel);
+ RUN_TEST(ipip_tunnel, NONE);
+ RUN_TEST(ipip_tunnel, FOU);
+ RUN_TEST(ipip_tunnel, GUE);
+ RUN_TEST(xfrm_tunnel);
+ RUN_TEST(gre_tunnel, GRE);
+ RUN_TEST(gre_tunnel, GRE_NOKEY);
+ RUN_TEST(gre_tunnel, GRETAP);
+ RUN_TEST(gre_tunnel, GRETAP_NOKEY);
+ RUN_TEST(ip6gre_tunnel, IP6GRE);
+ RUN_TEST(ip6gre_tunnel, IP6GRETAP);
+ RUN_TEST(erspan_tunnel, V1);
+ RUN_TEST(erspan_tunnel, V2);
+ RUN_TEST(ip6erspan_tunnel, V1);
+ RUN_TEST(ip6erspan_tunnel, V2);
+ RUN_TEST(geneve_tunnel);
+ RUN_TEST(ip6geneve_tunnel);
+ RUN_TEST(ip6tnl_tunnel, IPIP6);
+ RUN_TEST(ip6tnl_tunnel, IP6IP6);
+
+ return NULL;
+}
+
+void test_tunnel(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
new file mode 100644
index 000000000000..b38c16b4247f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+
+#define __CHECK_STR(str, name) \
+ do { \
+ if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \
+ goto out; \
+ } while (0)
+
+struct fixture {
+ char tmpfile[80];
+ int fd;
+ char *output;
+ size_t sz;
+ char veristat[80];
+};
+
+static struct fixture *init_fixture(void)
+{
+ struct fixture *fix = malloc(sizeof(struct fixture));
+
+ /* for no_alu32 and cpuv4 veristat is in parent folder */
+ if (access("./veristat", F_OK) == 0)
+ strcpy(fix->veristat, "./veristat");
+ else if (access("../veristat", F_OK) == 0)
+ strcpy(fix->veristat, "../veristat");
+ else
+ PRINT_FAIL("Can't find veristat binary");
+
+ snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX");
+ fix->fd = mkstemp(fix->tmpfile);
+ fix->sz = 1000000;
+ fix->output = malloc(fix->sz);
+ return fix;
+}
+
+static void teardown_fixture(struct fixture *fix)
+{
+ free(fix->output);
+ close(fix->fd);
+ remove(fix->tmpfile);
+ free(fix);
+}
+
+static void test_set_global_vars_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS(out,
+ "%s set_global_vars.bpf.o"\
+ " -G \"var_s64 = 0xf000000000000001\" "\
+ " -G \"var_u64 = 0xfedcba9876543210\" "\
+ " -G \"var_s32 = -0x80000000\" "\
+ " -G \"var_u32 = 0x76543210\" "\
+ " -G \"var_s16 = -32768\" "\
+ " -G \"var_u16 = 60652\" "\
+ " -G \"var_s8 = -128\" "\
+ " -G \"var_u8 = 255\" "\
+ " -G \"var_ea = EA2\" "\
+ " -G \"var_eb = EB2\" "\
+ " -G \"var_ec=EC2\" "\
+ " -G \"var_b = 1\" "\
+ " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\
+ " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+ " -G \"union1.struct3.var_u8_h = 0xaa\" "\
+ " -G \"arr[3]= 171\" " \
+ " -G \"arr[EA2] =172\" " \
+ " -G \"enum_arr[EC2]=EA3\" " \
+ " -G \"three_d[31][7][EA2]=173\"" \
+ " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \
+ " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \
+ " -vl2 > %s", fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=128 ", "var_s8 = -128");
+ __CHECK_STR("=255 ", "var_u8 = 255");
+ __CHECK_STR("=11 ", "var_ea = EA2");
+ __CHECK_STR("=12 ", "var_eb = EB2");
+ __CHECK_STR("=13 ", "var_ec = EC2");
+ __CHECK_STR("=1 ", "var_b = 1");
+ __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
+ __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa");
+ __CHECK_STR("=171 ", "arr[3]= 171");
+ __CHECK_STR("=172 ", "arr[EA2] =172");
+ __CHECK_STR("=10 ", "enum_arr[EC2]=EA3");
+ __CHECK_STR("=173 ", "matrix[31][7][11]=173");
+ __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+ __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_from_file_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+ char input_file[80];
+ const char *vars = "var_s16 = -32768\nvar_u16 = 60652";
+ int fd;
+
+ snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX");
+ fd = mkstemp(input_file);
+ if (!ASSERT_GE(fd, 0, "valid fd"))
+ goto out;
+
+ write(fd, vars, strlen(vars));
+ syncfs(fd);
+ SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
+ fix->veristat, input_file, fix->tmpfile);
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+
+out:
+ close(fd);
+ remove(input_file);
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_out_of_range(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_unsupported_ptr_array_type(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_out_of_bounds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Array index 99 is out of bounds", "arr[99]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_not_found(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_for_non_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_no_array_index_for_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1");
+
+out:
+ teardown_fixture(fix);
+}
+
+void test_veristat(void)
+{
+ if (test__start_subtest("set_global_vars_succeeds"))
+ test_set_global_vars_succeeds();
+
+ if (test__start_subtest("set_global_vars_out_of_range"))
+ test_set_global_vars_out_of_range();
+
+ if (test__start_subtest("set_global_vars_from_file_succeeds"))
+ test_set_global_vars_from_file_succeeds();
+
+ if (test__start_subtest("test_unsupported_ptr_array_type"))
+ test_unsupported_ptr_array_type();
+
+ if (test__start_subtest("test_array_out_of_bounds"))
+ test_array_out_of_bounds();
+
+ if (test__start_subtest("test_array_index_not_found"))
+ test_array_index_not_found();
+
+ if (test__start_subtest("test_array_index_for_non_array"))
+ test_array_index_for_non_array();
+
+ if (test__start_subtest("test_no_array_index_for_array"))
+ test_no_array_index_for_array();
+
+}
+
+#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
new file mode 100644
index 000000000000..3e98a1665936
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Create 3 namespaces with 3 veth peers, and forward packets in-between using
+ * native XDP
+ *
+ * Network topology:
+ * ---------- ---------- ----------
+ * | NS1 | | NS2 | | NS3 |
+ * | veth11 | | veth22 | | veth33 |
+ * ----|----- -----|---- -----|----
+ * | | |
+ * ----|------------------|----------------|----
+ * | veth1 veth2 veth3 |
+ * | |
+ * | NSO |
+ * ---------------------------------------------
+ *
+ * Test cases:
+ * - [test_xdp_veth_redirect] : ping veth33 from veth11
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_TX) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * ^ | ^ | ^ |
+ * | | | | | |
+ * | ------------------ ------------------ |
+ * -----------------------------------------
+ *
+ * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11
+ * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS
+ * -> echo request received by all except veth11
+ * - IPv4 ping : BPF_F_BROADCAST
+ * -> echo request received by all veth
+ * - [test_xdp_veth_egress]:
+ * - all src mac should be the magic mac
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_PASS) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * | ^ ^
+ * | | |
+ * ----------------------------------------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_map.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+#include <uapi/linux/if_link.h>
+
+#define VETH_PAIRS_COUNT 3
+#define VETH_NAME_MAX_LEN 32
+#define IP_MAX_LEN 16
+#define IP_SRC "10.1.1.11"
+#define IP_DST "10.1.1.33"
+#define IP_NEIGH "10.1.1.253"
+#define PROG_NAME_MAX_LEN 128
+#define NS_NAME_MAX_LEN 32
+
+struct veth_configuration {
+ char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */
+ char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/
+ char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */
+ int next_veth; /* Local interface to redirect traffic to */
+ char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */
+};
+
+struct net_configuration {
+ char ns0_name[NS_NAME_MAX_LEN];
+ struct veth_configuration veth_cfg[VETH_PAIRS_COUNT];
+};
+
+static const struct net_configuration default_config = {
+ .ns0_name = "ns0-",
+ {
+ {
+ .local_veth = "veth1-",
+ .remote_veth = "veth11",
+ .next_veth = 1,
+ .remote_addr = IP_SRC,
+ .namespace = "ns-veth11-"
+ },
+ {
+ .local_veth = "veth2-",
+ .remote_veth = "veth22",
+ .next_veth = 2,
+ .remote_addr = "",
+ .namespace = "ns-veth22-"
+ },
+ {
+ .local_veth = "veth3-",
+ .remote_veth = "veth33",
+ .next_veth = 0,
+ .remote_addr = IP_DST,
+ .namespace = "ns-veth33-"
+ }
+ }
+};
+
+struct prog_configuration {
+ char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */
+ char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */
+ u32 local_flags; /* XDP flags to use on local_veth */
+ u32 remote_flags; /* XDP flags to use on remote_veth */
+};
+
+static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj,
+ struct net_configuration *net_config,
+ struct prog_configuration *prog, int index)
+{
+ struct bpf_program *local_prog, *remote_prog;
+ struct nstoken *nstoken;
+ int interface, ret, i;
+
+ for (i = 0; i < nb_obj; i++) {
+ local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name);
+ if (local_prog)
+ break;
+ }
+ if (!ASSERT_OK_PTR(local_prog, "find local program"))
+ return -1;
+
+ for (i = 0; i < nb_obj; i++) {
+ remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name);
+ if (remote_prog)
+ break;
+ }
+ if (!ASSERT_OK_PTR(remote_prog, "find remote program"))
+ return -1;
+
+ interface = if_nametoindex(net_config->veth_cfg[index].local_veth);
+ if (!ASSERT_NEQ(interface, 0, "non zero interface index"))
+ return -1;
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog),
+ prog[index].local_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to local veth"))
+ return -1;
+
+ nstoken = open_netns(net_config->veth_cfg[index].namespace);
+ if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace"))
+ return -1;
+
+ interface = if_nametoindex(net_config->veth_cfg[index].remote_veth);
+ if (!ASSERT_NEQ(interface, 0, "non zero interface index")) {
+ close_netns(nstoken);
+ return -1;
+ }
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog),
+ prog[index].remote_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to remote veth")) {
+ close_netns(nstoken);
+ return -1;
+ }
+
+ close_netns(nstoken);
+ return 0;
+}
+
+static int create_network(struct net_configuration *net_config)
+{
+ struct nstoken *nstoken = NULL;
+ int i, err;
+
+ memcpy(net_config, &default_config, sizeof(struct net_configuration));
+
+ /* Create unique namespaces */
+ err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns0 name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->ns0_name);
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace);
+ }
+
+ /* Create interfaces */
+ nstoken = open_netns(net_config->ns0_name);
+ if (!nstoken)
+ goto fail;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ SYS(fail, "ip link add %s type veth peer name %s netns %s",
+ net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth,
+ net_config->veth_cfg[i].namespace);
+ SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth);
+ if (net_config->veth_cfg[i].remote_addr[0])
+ SYS(fail, "ip -n %s addr add %s/24 dev %s",
+ net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_addr,
+ net_config->veth_cfg[i].remote_veth);
+ SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_veth);
+ }
+
+ close_netns(nstoken);
+ return 0;
+
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static void cleanup_network(struct net_configuration *net_config)
+{
+ int i;
+
+ SYS_NOFAIL("ip netns del %s", net_config->ns0_name);
+ for (i = 0; i < VETH_PAIRS_COUNT; i++)
+ SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace);
+}
+
+#define VETH_REDIRECT_SKEL_NB 3
+static void xdp_veth_redirect(u32 flags)
+{
+ struct prog_configuration ping_config[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_0",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_1",
+ .remote_name = "xdp_tx",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_2",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ int map_fd;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_tx;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ /* Then configure the redirect map and attach programs to interfaces */
+ map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port);
+ if (!ASSERT_OK_FD(map_fd, "open redirect map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_tx->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int next_veth = net_config.veth_cfg[i].next_veth;
+ int interface_id;
+ int err;
+
+ interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth);
+ if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
+ goto destroy_xdp_redirect_map;
+ err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
+ if (!ASSERT_OK(err, "configure interface redirection through map"))
+ goto destroy_xdp_redirect_map;
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, ping_config, i))
+ goto destroy_xdp_redirect_map;
+ }
+
+ /* Test: if all interfaces are properly configured, we must be able to ping
+ * veth33 from veth11
+ */
+ ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
+ net_config.veth_cfg[0].namespace, IP_DST), "ping");
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_tx:
+ xdp_tx__destroy(xdp_tx);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+#define BROADCAST_REDIRECT_SKEL_NB 2
+static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_0",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_1",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_2",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ }
+ };
+ struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB];
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ u16 protocol = ETH_P_IP;
+ int group_map;
+ int flags_map;
+ int cnt_map;
+ u64 cnt = 0;
+ int i, err;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
+ return;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "init IP count"))
+ goto destroy_xdp_redirect_map;
+
+ cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt);
+ if (!ASSERT_OK_FD(cnt_map, "open rxcnt map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_redirect_multi_kern->obj;
+ bpf_objs[1] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ err = bpf_map_lookup_elem(cnt_map, &i, &cnt);
+ if (!ASSERT_OK(err, "get IP cnt"))
+ goto destroy_xdp_redirect_map;
+
+ if (redirect_flags & BPF_F_EXCLUDE_INGRESS)
+ /* veth11 shouldn't receive the ICMP requests;
+ * others should
+ */
+ ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt");
+ else
+ /* All remote veth should receive the ICMP requests */
+ ASSERT_EQ(cnt, 4, "compare IP cnt");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+
+ cleanup_network(&net_config);
+}
+
+#define VETH_EGRESS_SKEL_NB 3
+static void xdp_veth_egress(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_2",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < 2; i++) {
+ u32 key = i;
+ u64 res;
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+void test_xdp_veth_redirect(void)
+{
+ if (test__start_subtest("0"))
+ xdp_veth_redirect(0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_SKB_MODE);
+}
+
+void test_xdp_veth_broadcast_redirect(void)
+{
+ if (test__start_subtest("0/BROADCAST"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST);
+
+ if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("DRV_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("SKB_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+void test_xdp_veth_egress(void)
+{
+ if (test__start_subtest("0/egress"))
+ xdp_veth_egress(0);
+
+ if (test__start_subtest("DRV_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_SKB_MODE);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
new file mode 100644
index 000000000000..5af28f359cfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/if_link.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "network_helpers.h"
+#include "test_xsk.h"
+#include "xsk_xdp_common.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define DEFAULT_BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_JUMBO_SIZE 9000
+#define MAX_INTERFACES 2
+#define MAX_TEARDOWN_ITER 10
+#define MAX_TX_BUDGET_DEFAULT 32
+#define PKT_DUMP_NB_TO_PRINT 16
+/* Just to align the data in the packet */
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2)
+#define POLL_TMOUT 1000
+#define THREAD_TMOUT 3
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_DESC__INVALID_OPTION (0xffff)
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+bool opt_verbose;
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+ u32 *ptr = (u32 *)dest, i;
+
+ start /= sizeof(*ptr);
+ size /= sizeof(*ptr);
+ for (i = 0; i < size; i++)
+ ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+ memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+ return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+ return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+ return umem->num_frames * umem->frame_size;
+}
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+ u64 size)
+{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = umem->frame_size,
+ .frame_headroom = umem->frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int ret;
+
+ if (umem->fill_size)
+ cfg.fill_size = umem->fill_size;
+
+ if (umem->comp_size)
+ cfg.comp_size = umem->comp_size;
+
+ if (umem->unaligned_mode)
+ cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
+ if (ret)
+ return ret;
+
+ umem->buffer = buffer;
+ if (ifobj->shared_umem && ifobj->rx_on) {
+ umem->base_addr = umem_size(umem);
+ umem->next_buffer = umem_size(umem);
+ }
+
+ return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+ u64 addr;
+
+ addr = umem->next_buffer;
+ umem->next_buffer += umem->frame_size;
+ if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+ umem->next_buffer = umem->base_addr;
+
+ return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+ umem->next_buffer = 0;
+}
+
+static int enable_busy_poll(struct xsk_socket_info *xsk)
+{
+ int sock_opt;
+
+ sock_opt = 1;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ sock_opt = 20;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ sock_opt = xsk->batch_size;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared)
+{
+ struct xsk_socket_config cfg = {};
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
+
+ xsk->umem = umem;
+ cfg.rx_size = xsk->rxqsize;
+ cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg.bind_flags = ifobject->bind_flags;
+ if (shared)
+ cfg.bind_flags |= XDP_SHARED_UMEM;
+ if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+ cfg.bind_flags |= XDP_USE_SG;
+ if (umem->comp_size)
+ cfg.tx_size = umem->comp_size;
+ if (umem->fill_size)
+ cfg.rx_size = umem->fill_size;
+
+ txr = ifobject->tx_on ? &xsk->tx : NULL;
+ rxr = ifobject->rx_on ? &xsk->rx : NULL;
+ return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+ unsigned int max_skb_frags = 0;
+ FILE *file;
+
+ file = fopen(MAX_SKB_FRAGS_PATH, "r");
+ if (!file) {
+ ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+ return 0;
+ }
+
+ if (fscanf(file, "%u", &max_skb_frags) != 1)
+ ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+ fclose(file);
+ return max_skb_frags;
+}
+
+static int set_ring_size(struct ifobject *ifobj)
+{
+ int ret;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+ if (!ret)
+ break;
+
+ /* Retry if it fails */
+ if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+ return -errno;
+
+ usleep(USLEEP_MAX);
+ }
+
+ return ret;
+}
+
+int hw_ring_size_reset(struct ifobject *ifobj)
+{
+ ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+ ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+ return set_ring_size(ifobj);
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx)
+{
+ u32 i, j;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->xsk = &ifobj->xsk_arr[0];
+ ifobj->use_poll = false;
+ ifobj->use_fill_ring = true;
+ ifobj->release_rx = true;
+ ifobj->validation_func = NULL;
+ ifobj->use_metadata = false;
+
+ if (i == 0) {
+ ifobj->rx_on = false;
+ ifobj->tx_on = true;
+ } else {
+ ifobj->rx_on = true;
+ ifobj->tx_on = false;
+ }
+
+ memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+ ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+ ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+ for (j = 0; j < MAX_SOCKETS; j++) {
+ memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+ ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
+ if (i == 0)
+ ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+ else
+ ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+ memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+ memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+ ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+ ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+ }
+ }
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ test->ifobj_tx = ifobj_tx;
+ test->ifobj_rx = ifobj_rx;
+ test->current_step = 0;
+ test->total_steps = 1;
+ test->nb_sockets = 1;
+ test->fail = false;
+ test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
+ test->mtu = MAX_ETH_PKT_SIZE;
+ test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+ test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run)
+{
+ struct pkt_stream *tx_pkt_stream;
+ struct pkt_stream *rx_pkt_stream;
+ u32 i;
+
+ tx_pkt_stream = test->tx_pkt_stream_default;
+ rx_pkt_stream = test->rx_pkt_stream_default;
+ memset(test, 0, sizeof(*test));
+ test->tx_pkt_stream_default = tx_pkt_stream;
+ test->rx_pkt_stream_default = rx_pkt_stream;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+ if (mode == TEST_MODE_ZC)
+ ifobj->bind_flags |= XDP_ZEROCOPY;
+ else
+ ifobj->bind_flags |= XDP_COPY;
+ }
+
+ memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+ test->test_func = test_to_run->test_func;
+ test->mode = mode;
+ __test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+ __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+ struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+ struct bpf_map *xskmap_tx)
+{
+ test->xdp_prog_rx = xdp_prog_rx;
+ test->xdp_prog_tx = xdp_prog_tx;
+ test->xskmap_rx = xskmap_rx;
+ test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+ int err;
+
+ if (test->ifobj_rx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_rx->mtu = mtu;
+ }
+ if (test->ifobj_tx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_tx->mtu = mtu;
+ }
+
+ return 0;
+}
+
+void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream) {
+ pkt_stream->current_pkt_nb = 0;
+ pkt_stream->nb_rx_pkts = 0;
+ }
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+ (*pkts_sent)++;
+ if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+ pkt_stream->current_pkt_nb++;
+ }
+ return NULL;
+}
+
+void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+ free(pkt_stream->pkts);
+ free(pkt_stream);
+}
+
+void pkt_stream_restore_default(struct test_spec *test)
+{
+ struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+ if (tx_pkt_stream != test->tx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+ test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+ }
+
+ if (rx_pkt_stream != test->rx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+ test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+ }
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = calloc(1, sizeof(*pkt_stream));
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts) {
+ free(pkt_stream);
+ return NULL;
+ }
+
+ pkt_stream->nb_pkts = nb_pkts;
+ return pkt_stream;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+ u32 nb_frags = 1, next_frag;
+
+ if (!pkt)
+ return 1;
+
+ if (!pkt_stream->verbatim) {
+ if (!pkt->valid || !pkt->len)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+ }
+
+ /* Search for the end of the packet in verbatim mode */
+ if (!pkt_continues(pkt->options))
+ return nb_frags;
+
+ next_frag = pkt_stream->current_pkt_nb;
+ pkt++;
+ while (next_frag++ < pkt_stream->nb_pkts) {
+ nb_frags++;
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ break;
+ pkt++;
+ }
+ return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+ return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ pkt->offset = offset;
+ pkt->len = len;
+ pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ bool prev_pkt_valid = pkt->valid;
+
+ pkt_set(pkt_stream, pkt, offset, len);
+ pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+ return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = __pkt_stream_alloc(nb_pkts);
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->nb_pkts = nb_pkts;
+ pkt_stream->max_pkt_len = pkt_len;
+ for (i = 0; i < nb_pkts; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt->pkt_nb = nb_start + i * nb_off;
+ }
+
+ return pkt_stream;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+ return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
+{
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+
+ if (!ifobj->xsk->pkt_stream)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ int ret;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ if (ret)
+ return ret;
+
+ return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
+}
+
+static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+ int offset)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+ if (!pkt_stream)
+ return -ENOMEM;
+
+ for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+ pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+ ifobj->xsk->pkt_stream = pkt_stream;
+
+ return 0;
+}
+
+static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+ int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+
+ if (ret)
+ return ret;
+
+ return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static int pkt_stream_receive_half(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ u32 i;
+
+ if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default)
+ /* Packet stream has already been replaced so we have to release this one.
+ * The newly created one will be freed by the restore_default() at the
+ * end of the test
+ */
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+ pkt_stream->pkts[0].len);
+ if (!test->ifobj_rx->xsk->pkt_stream)
+ return -ENOMEM;
+
+ pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+ for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+ pkt_stream->pkts[i].valid = false;
+
+ pkt_stream->nb_valid_entries /= 2;
+
+ return 0;
+}
+
+static int pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+ pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ }
+
+ return 0;
+}
+
+static void release_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream;
+ int i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ /* later_free_{rx/tx} will be freed by restore_default() */
+ if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx)
+ pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream);
+ if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx)
+ pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream);
+ }
+
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+ if (!pkt->valid)
+ return pkt->offset;
+ return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+ pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+ u32 pkt_nb, u32 bytes_written)
+{
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ if (len < MIN_PKT_SIZE)
+ return;
+
+ if (!bytes_written) {
+ gen_eth_hdr(xsk, data);
+
+ len -= PKT_HDR_SIZE;
+ data += PKT_HDR_SIZE;
+ } else {
+ bytes_written -= PKT_HDR_SIZE;
+ }
+
+ write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+ u32 nb_frames, bool verbatim)
+{
+ u32 i, len = 0, pkt_nb = 0, payload = 0;
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_alloc(nb_frames);
+ if (!pkt_stream)
+ return NULL;
+
+ for (i = 0; i < nb_frames; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+ struct pkt *frame = &frames[i];
+
+ pkt->offset = frame->offset;
+ if (verbatim) {
+ *pkt = *frame;
+ pkt->pkt_nb = payload;
+ if (!frame->valid || !pkt_continues(frame->options))
+ payload++;
+ } else {
+ if (frame->valid)
+ len += frame->len;
+ if (frame->valid && pkt_continues(frame->options))
+ continue;
+
+ pkt->pkt_nb = pkt_nb;
+ pkt->len = len;
+ pkt->valid = frame->valid;
+ pkt->options = 0;
+
+ len = 0;
+ }
+
+ print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+ pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+ if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+ pkt_stream->max_pkt_len = pkt->len;
+
+ if (pkt->valid)
+ pkt_stream->nb_valid_entries++;
+
+ pkt_nb++;
+ }
+
+ pkt_stream->nb_pkts = pkt_nb;
+ pkt_stream->verbatim = verbatim;
+ return pkt_stream;
+}
+
+static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+
+ return 0;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ u32 seqnum, pkt_nb;
+
+ seqnum = ntohl(*data) & 0xffff;
+ pkt_nb = ntohl(*data) >> 16;
+ ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+ data++;
+ }
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+ struct ethhdr *ethhdr = pkt;
+ u32 i, *data;
+
+ if (eth_header) {
+ /*extract L2 frame */
+ ksft_print_msg("DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+ ksft_print_msg("\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+ data = pkt + PKT_HDR_SIZE;
+ } else {
+ data = pkt;
+ }
+
+ /*extract L5 frame */
+ ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+ pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+ ksft_print_msg("....");
+ if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+ ksft_print_msg("\n.... ");
+ pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+ PKT_DUMP_NB_TO_PRINT);
+ }
+ ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+ u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+ u32 offset = addr % umem->frame_size, expected_offset;
+ int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+ if (!umem->unaligned_mode)
+ pkt_offset = 0;
+
+ expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+ if (offset == expected_offset)
+ return true;
+
+ ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+ return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+ void *data = xsk_umem__get_data(buffer, addr);
+ struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+ if (meta->count != pkt->pkt_nb) {
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
+ return false;
+ }
+
+ return true;
+}
+
+static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ return -EINVAL;
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ return ret;
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ *supported = adjust_value != -EOPNOTSUPP;
+
+ return 0;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+ u32 bytes_processed)
+{
+ u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ addr -= umem->base_addr;
+
+ if (addr >= umem->num_frames * umem->frame_size ||
+ addr + len > umem->num_frames * umem->frame_size) {
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+ if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+
+ pkt_data = data;
+ if (!bytes_processed) {
+ pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+ len -= PKT_HDR_SIZE;
+ } else {
+ bytes_processed -= PKT_HDR_SIZE;
+ }
+
+ expected_seqnum = bytes_processed / sizeof(*pkt_data);
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ pkt_nb = ntohl(*pkt_data) >> 16;
+
+ if (expected_pkt_nb != pkt_nb) {
+ ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+ __func__, expected_pkt_nb, pkt_nb);
+ goto error;
+ }
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ pkt_data += words_to_end;
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ expected_seqnum += words_to_end;
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ return true;
+
+error:
+ pkt_dump(data, len, !bytes_processed);
+ return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+ if (pkt->len != len) {
+ ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+ __func__, pkt->len, len);
+ pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+ return false;
+ }
+
+ return true;
+}
+
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
+int kick_tx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
+ if (ret >= 0)
+ return TEST_PASS;
+ if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+ usleep(100);
+ return TEST_PASS;
+ }
+ return TEST_FAILURE;
+}
+
+int kick_rx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+ unsigned int rcvd;
+ u32 idx;
+ int ret;
+
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+ if (rcvd) {
+ if (rcvd > xsk->outstanding_tx) {
+ u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+ ksft_print_msg("[%s] Too many packets completed\n", __func__);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
+ return TEST_FAILURE;
+ }
+
+ xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+ xsk->outstanding_tx -= rcvd;
+ }
+
+ return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct ifobject *ifobj = test->ifobj_rx;
+ struct xsk_umem_info *umem = xsk->umem;
+ struct pollfd fds = { };
+ struct pkt *pkt;
+ u64 first_addr = 0;
+ int ret;
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLIN;
+
+ ret = kick_rx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (ifobj->use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_CONTINUE;
+ }
+
+ if (!(fds.revents & POLLIN))
+ return TEST_CONTINUE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
+ if (!rcvd)
+ return TEST_CONTINUE;
+
+ if (ifobj->use_fill_ring) {
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+ }
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ }
+ }
+
+ while (frags_processed < rcvd) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
+
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+
+ if (!nb_frags) {
+ pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+ if (!pkt) {
+ ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+ __func__, addr, desc->len);
+ return TEST_FAILURE;
+ }
+ }
+
+ print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+ addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+ !is_metadata_correct(pkt, umem->buffer, addr)))
+ return TEST_FAILURE;
+
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
+ if (ifobj->use_fill_ring)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+ if (pkt_continues(desc->options))
+ continue;
+
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
+
+ pkt_stream->nb_rx_pkts++;
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
+ }
+ frags_processed -= nb_frags;
+ }
+
+ if (ifobj->use_fill_ring)
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
+ if (ifobj->release_rx)
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight -= pkts_sent;
+ pthread_mutex_unlock(&pacing_mutex);
+ pkts_sent = 0;
+
+ return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+ unsigned long *bitmap)
+{
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+ if (!pkt_stream) {
+ __set_bit(sock_num, bitmap);
+ return false;
+ }
+
+ if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+ __set_bit(sock_num, bitmap);
+ if (bitmap_full(bitmap, test->nb_sockets))
+ return true;
+ }
+
+ return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ struct xsk_socket_info *xsk;
+ u32 sock_num = 0;
+ int res, ret;
+
+ bitmap_zero(bitmap, test->nb_sockets);
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (1) {
+ xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+ if ((all_packets_received(test, xsk, sock_num, bitmap)))
+ break;
+
+ res = __receive_pkts(test, xsk);
+ if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ return res;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+ sock_num = (sock_num + 1) % test->nb_sockets;
+ }
+
+ return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+ u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct xsk_umem_info *umem = ifobject->umem;
+ bool use_poll = ifobject->use_poll;
+ struct pollfd fds = { };
+ int ret;
+
+ buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+ /* pkts_in_flight might be negative if many invalid packets are sent */
+ if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+ buffer_len)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ return TEST_CONTINUE;
+ }
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLOUT;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (timeout) {
+ if (ret < 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ if (ret == 0)
+ return TEST_PASS;
+ break;
+ }
+ if (ret <= 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ for (i = 0; i < xsk->batch_size; i++) {
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+ if (!pkt)
+ break;
+
+ nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+ if (nb_frags > xsk->batch_size - i) {
+ pkt_stream_cancel(pkt_stream);
+ xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
+ break;
+ }
+ nb_frags_left = nb_frags;
+
+ while (nb_frags_left--) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+ if (pkt_stream->verbatim) {
+ tx_desc->len = pkt->len;
+ tx_desc->options = pkt->options;
+ } else if (nb_frags_left) {
+ tx_desc->len = umem->frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = pkt->len - bytes_written;
+ tx_desc->options = 0;
+ }
+ if (pkt->valid)
+ pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ bytes_written);
+ bytes_written += tx_desc->len;
+
+ print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+ tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+ if (nb_frags_left) {
+ i++;
+ if (pkt_stream->verbatim)
+ pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ }
+ }
+
+ if (pkt && pkt->valid) {
+ valid_pkts++;
+ valid_frags += nb_frags;
+ }
+ }
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight += valid_pkts;
+ pthread_mutex_unlock(&pacing_mutex);
+
+ xsk_ring_prod__submit(&xsk->tx, i);
+ xsk->outstanding_tx += valid_frags;
+
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret <= 0) {
+ if (ret == 0 && timeout)
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
+
+ if (!timeout) {
+ if (complete_pkts(xsk, i))
+ return TEST_FAILURE;
+
+ usleep(10);
+ return TEST_PASS;
+ }
+
+ return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ int ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (xsk->outstanding_tx) {
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+ return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+ bool timeout = !is_umem_valid(test->ifobj_rx);
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ u32 i, ret;
+
+ bitmap_zero(bitmap, test->nb_sockets);
+
+ while (!(all_packets_sent(test, bitmap))) {
+ for (i = 0; i < test->nb_sockets; i++) {
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+ if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+ __set_bit(i, bitmap);
+ continue;
+ }
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
+
+ if ((ret || test->fail) && !timeout)
+ return TEST_FAILURE;
+
+ if (ret == TEST_PASS && timeout)
+ return ret;
+
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+ int fd = xsk_socket__fd(xsk), err;
+ socklen_t optlen, expected_len;
+
+ optlen = sizeof(*stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ expected_len = sizeof(struct xdp_statistics);
+ if (optlen != expected_len) {
+ ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+ __func__, expected_len, optlen);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ /* The receiver calls getsockopt after receiving the last (valid)
+ * packet which is not the final packet sent in this test (valid and
+ * invalid packets are sent in alternating fashion with the final
+ * packet being invalid). Since the last packet may or may not have
+ * been dropped already, both outcomes must be allowed.
+ */
+ if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_ring_full)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_fill_ring_empty_descs)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
+ ifobject->xsk->pkt_stream->nb_pkts);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int xsk_configure(struct test_spec *test, struct ifobject *ifobject,
+ struct xsk_umem_info *umem, bool tx)
+{
+ int i, ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+ ifobject, shared);
+ if (!ret)
+ break;
+
+ /* Retry if it fails as xsk_socket__create() is asynchronous */
+ if (ctr >= SOCK_RECONF_CTR)
+ return ret;
+ usleep(USLEEP_MAX);
+ }
+ if (ifobject->busy_poll) {
+ ret = enable_busy_poll(&ifobject->xsk_arr[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+ int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true);
+
+ if (ret)
+ return ret;
+ ifobject->xsk = &ifobject->xsk_arr[0];
+ ifobject->xskmap = test->ifobj_rx->xskmap;
+ memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+ ifobject->umem->base_addr = 0;
+
+ return 0;
+}
+
+static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+ bool fill_up)
+{
+ u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+ u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+ int ret;
+
+ if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ buffers_to_fill = umem->num_frames;
+ else
+ buffers_to_fill = umem->fill_size;
+
+ ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+ if (ret != buffers_to_fill)
+ return -ENOSPC;
+
+ while (filled < buffers_to_fill) {
+ struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+ u64 addr;
+ u32 i;
+
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+ if (!pkt) {
+ if (!fill_up)
+ break;
+ addr = filled * umem->frame_size + umem->base_addr;
+ } else if (pkt->offset >= 0) {
+ addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+ } else {
+ addr = pkt->offset + umem_alloc_buffer(umem);
+ }
+
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+ if (++filled >= buffers_to_fill)
+ break;
+ }
+ }
+ xsk_ring_prod__submit(&umem->fq, filled);
+ xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+ pkt_stream_reset(pkt_stream);
+ umem_reset_alloc(umem);
+
+ return 0;
+}
+
+static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int mmap_flags;
+ u64 umem_sz;
+ void *bufs;
+ int ret;
+ u32 i;
+
+ umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+ mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+
+ if (ifobject->umem->unaligned_mode)
+ mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+ if (ifobject->shared_umem)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ return -errno;
+
+ ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+ if (ret)
+ return ret;
+
+ ret = xsk_configure(test, ifobject, ifobject->umem, false);
+ if (ret)
+ return ret;
+
+ ifobject->xsk = &ifobject->xsk_arr[0];
+
+ if (!ifobject->rx_on)
+ return 0;
+
+ ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream,
+ ifobject->use_fill_ring);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ ifobject->xsk = &ifobject->xsk_arr[i];
+ ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void *worker_testapp_validate_tx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_tx;
+ int err;
+
+ if (test->current_step == 1) {
+ if (!ifobject->shared_umem) {
+ if (thread_common_ops(test, ifobject)) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+ } else {
+ if (thread_common_ops_tx(test, ifobject)) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+ }
+ }
+
+ err = send_pkts(test, ifobject);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+ if (err)
+ test->fail = true;
+
+ pthread_exit(NULL);
+}
+
+void *worker_testapp_validate_rx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_rx;
+ int err;
+
+ if (test->current_step == 1) {
+ err = thread_common_ops(test, ifobject);
+ } else {
+ xsk_clear_xskmap(ifobject->xskmap);
+ err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+ if (err)
+ ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+ strerror(-err));
+ }
+
+ pthread_barrier_wait(&barr);
+
+ /* We leave only now in case of error to avoid getting stuck in the barrier */
+ if (err) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+
+ err = receive_pkts(test);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+
+ if (err) {
+ if (!test->adjust_tail) {
+ test->fail = true;
+ } else {
+ bool supported;
+
+ if (is_adjust_tail_supported(ifobject->xdp_progs, &supported))
+ test->fail = true;
+ else if (!supported)
+ test->adjust_tail_support = false;
+ else
+ test->fail = true;
+ }
+ }
+
+ pthread_exit(NULL);
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+ u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+ if (ifobj->shared_umem)
+ umem_sz *= 2;
+
+ umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ xsk_umem__delete(ifobj->umem->umem);
+ munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+ pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_rx;
+
+ return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_tx;
+
+ return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+ struct bpf_map *xskmap, enum test_mode mode)
+{
+ int err;
+
+ xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+ err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+ if (err) {
+ ksft_print_msg("Error attaching XDP program\n");
+ return err;
+ }
+
+ if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+ if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+ ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+ return -EINVAL;
+ }
+
+ ifobj->xdp_prog = xdp_prog;
+ ifobj->xskmap = xskmap;
+ ifobj->mode = mode;
+
+ return 0;
+}
+
+static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+ struct ifobject *ifobj_tx)
+{
+ int err = 0;
+
+ if (xdp_prog_changed_rx(test)) {
+ err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+ if (err)
+ return err;
+ }
+
+ if (!ifobj_tx || ifobj_tx->shared_umem)
+ return 0;
+
+ if (xdp_prog_changed_tx(test))
+ err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+
+ return err;
+}
+
+static void clean_sockets(struct test_spec *test, struct ifobject *ifobj)
+{
+ u32 i;
+
+ if (!ifobj || !test)
+ return;
+
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj->xsk_arr[i].xsk);
+}
+
+static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+ if (!ifobj1)
+ return;
+
+ testapp_clean_xsk_umem(ifobj1);
+ if (ifobj2 && !ifobj2->shared_umem)
+ testapp_clean_xsk_umem(ifobj2);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+ struct ifobject *ifobj2)
+{
+ pthread_t t0, t1;
+ int err;
+
+ if (test->mtu > MAX_ETH_PKT_SIZE) {
+ if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+ (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+ ksft_print_msg("Multi buffer for zero-copy not supported.\n");
+ return TEST_SKIP;
+ }
+ if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+ (ifobj2 && !ifobj2->multi_buff_supp))) {
+ ksft_print_msg("Multi buffer not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+ err = test_spec_set_mtu(test, test->mtu);
+ if (err) {
+ ksft_print_msg("Error, could not set mtu.\n");
+ return TEST_FAILURE;
+ }
+
+ if (ifobj2) {
+ if (pthread_barrier_init(&barr, NULL, 2))
+ return TEST_FAILURE;
+ pkt_stream_reset(ifobj2->xsk->pkt_stream);
+ }
+
+ test->current_step++;
+ pkt_stream_reset(ifobj1->xsk->pkt_stream);
+ pkts_in_flight = 0;
+
+ signal(SIGUSR1, handler);
+ /*Spawn RX thread */
+ pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+ if (ifobj2) {
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr)) {
+ pthread_kill(t0, SIGUSR1);
+ clean_sockets(test, ifobj1);
+ clean_umem(test, ifobj1, NULL);
+ return TEST_FAILURE;
+ }
+
+ /*Spawn TX thread */
+ pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+ pthread_join(t1, NULL);
+ }
+
+ if (!ifobj2)
+ pthread_kill(t0, SIGUSR1);
+ else
+ pthread_join(t0, NULL);
+
+ if (test->total_steps == test->current_step || test->fail) {
+ clean_sockets(test, ifobj1);
+ clean_sockets(test, ifobj2);
+ clean_umem(test, ifobj1, ifobj2);
+ }
+
+ if (test->fail)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+ struct ifobject *ifobj_rx = test->ifobj_rx;
+ struct ifobject *ifobj_tx = test->ifobj_tx;
+
+ if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+ (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+ ksft_print_msg("No huge pages present.\n");
+ return TEST_SKIP;
+ }
+
+ if (test->set_ring) {
+ if (ifobj_tx->hw_ring_size_supp) {
+ if (set_ring_size(ifobj_tx)) {
+ ksft_print_msg("Failed to change HW ring size.\n");
+ return TEST_FAILURE;
+ }
+ } else {
+ ksft_print_msg("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+
+ if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx))
+ return TEST_FAILURE;
+ return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+ return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+int testapp_teardown(struct test_spec *test)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+ test_spec_reset(test);
+ }
+
+ return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+ thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+ struct ifobject *tmp_ifobj = (*ifobj1);
+
+ (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+ (*ifobj2)->func_ptr = tmp_func_ptr;
+
+ *ifobj1 = *ifobj2;
+ *ifobj2 = tmp_ifobj;
+}
+
+int testapp_bidirectional(struct test_spec *test)
+{
+ int res;
+
+ test->ifobj_tx->rx_on = true;
+ test->ifobj_rx->tx_on = true;
+ test->total_steps = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ print_verbose("Switching Tx/Rx direction\n");
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+ int ret;
+
+ test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+ test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+ test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+ test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+ ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+ if (ret)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+ test->total_steps = 2;
+ test->nb_sockets = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ if (swap_xsk_resources(test)) {
+ clean_sockets(test, test->ifobj_rx);
+ clean_sockets(test, test->ifobj_tx);
+ clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+ return TEST_FAILURE;
+ }
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_headroom(struct test_spec *test)
+{
+ test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_dropped(struct test_spec *test)
+{
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0))
+ return TEST_FAILURE;
+ test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+ XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+ if (pkt_stream_receive_half(test))
+ return TEST_FAILURE;
+ test->ifobj_rx->validation_func = validate_rx_dropped;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+ if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0))
+ return TEST_FAILURE;
+ test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_full(struct test_spec *test)
+{
+ if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+ test->ifobj_rx->release_rx = false;
+ test->ifobj_rx->validation_func = validate_rx_full;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_fill_empty(struct test_spec *test)
+{
+ if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->use_fill_ring = false;
+ test->ifobj_rx->validation_func = validate_fill_empty;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* Let half of the packets straddle a 4K buffer boundary */
+ if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2))
+ return TEST_FAILURE;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_single_pkt(struct test_spec *test)
+{
+ struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+ return TEST_FAILURE;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc_mb(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Valid packet for synch to start with */
+ {0, MIN_PKT_SIZE, 0, true, 0},
+ /* Zero frame len is not legal */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, 0, 0, false, 0},
+ /* Invalid address in the second frame */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid len in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid options in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+ /* Transmit 2 frags, receive 3 */
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+ /* Middle frame crosses chunk boundary with small length */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true, 0}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a chunk boundary allowed */
+ pkts[12].valid = true;
+ pkts[13].valid = true;
+ }
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Zero packet address allowed */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Allowed packet */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Straddling the start of umem */
+ {-2, MIN_PKT_SIZE, 0, false},
+ /* Packet too large */
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ /* Up to end of umem allowed */
+ {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+ /* After umem ends */
+ {umem_size, MIN_PKT_SIZE, 0, false},
+ /* Straddle the end of umem */
+ {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 4K boundary */
+ {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 2K boundary */
+ {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a page boundary allowed */
+ pkts[7].valid = true;
+ }
+ if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+ /* Crossing a 2K frame size boundary not allowed */
+ pkts[8].valid = false;
+ }
+
+ if (test->ifobj_tx->shared_umem) {
+ pkts[4].offset += umem_size;
+ pkts[5].offset += umem_size;
+ pkts[6].offset += umem_size;
+ }
+
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_drop(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ if (pkt_stream_receive_half(test))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+ skel_tx->progs.xsk_xdp_populate_metadata,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+ test->ifobj_rx->use_metadata = true;
+
+ skel_rx->bss->count = 0;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_shared_umem(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+ int ret;
+
+ test->total_steps = 1;
+ test->nb_sockets = 2;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+ skel_tx->progs.xsk_xdp_shared_umem,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ if (pkt_stream_even_odd_sequence(test))
+ return TEST_FAILURE;
+
+ ret = testapp_validate_traffic(test);
+
+ release_even_odd_sequence(test);
+
+ return ret;
+}
+
+int testapp_poll_txq_tmout(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+ test->ifobj_tx->umem->frame_size = 2048;
+ if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048))
+ return TEST_FAILURE;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+int testapp_too_many_frags(struct test_spec *test)
+{
+ struct pkt *pkts;
+ u32 max_frags, i;
+ int ret = TEST_FAILURE;
+
+ if (test->mode == TEST_MODE_ZC) {
+ max_frags = test->ifobj_tx->xdp_zc_max_segs;
+ } else {
+ max_frags = get_max_skb_frags();
+ if (!max_frags) {
+ ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n");
+ max_frags = 17;
+ }
+ max_frags += 1;
+ }
+
+ pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+ if (!pkts)
+ return TEST_FAILURE;
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+
+ /* Valid packet for synch */
+ pkts[0].len = MIN_PKT_SIZE;
+ pkts[0].valid = true;
+
+ /* One valid packet with the max amount of frags */
+ for (i = 1; i < max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = true;
+ }
+ pkts[max_frags].options = 0;
+
+ /* An invalid packet with the max amount of frags but signals packet
+ * continues on the last frag
+ */
+ for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = false;
+ }
+
+ /* Valid packet for synch */
+ pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+ pkts[2 * max_frags + 1].valid = true;
+
+ if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) {
+ free(pkts);
+ return TEST_FAILURE;
+ }
+
+ ret = testapp_validate_traffic(test);
+ free(pkts);
+ return ret;
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+ ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+ if (libbpf_get_error(ifobj->xdp_progs))
+ return libbpf_get_error(ifobj->xdp_progs);
+
+ return 0;
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+ size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ void *bufs;
+
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+ if (bufs == MAP_FAILED)
+ return false;
+
+ mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ munmap(bufs, mmap_sz);
+ return true;
+}
+
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int err;
+
+ ifobj->func_ptr = func_ptr;
+
+ err = xsk_load_xdp_programs(ifobj);
+ if (err) {
+ ksft_print_msg("Error loading XDP program\n");
+ return err;
+ }
+
+ if (hugepages_present())
+ ifobj->unaligned_supp = true;
+
+ err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (err) {
+ ksft_print_msg("Error querying XDP capabilities\n");
+ return err;
+ }
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+ ifobj->multi_buff_supp = true;
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (query_opts.xdp_zc_max_segs > 1) {
+ ifobj->multi_buff_zc_supp = true;
+ ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+ } else {
+ ifobj->xdp_zc_max_segs = 0;
+ }
+ }
+
+ return 0;
+}
+
+int testapp_send_receive(struct test_spec *test)
+{
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_rx(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_tx(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_aligned_inv_desc(struct test_spec *test)
+{
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+ u64 page_size, umem_size;
+
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_xdp_metadata(struct test_spec *test)
+{
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+ test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+ test->ifobj_tx->xsk->batch_size = 1;
+ test->ifobj_rx->xsk->batch_size = 1;
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch size to hw_ring_size - 1 */
+ test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+ test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
+ test->ifobj_rx->umem->num_frames = max_descs;
+ test->ifobj_rx->umem->fill_size = max_descs;
+ test->ifobj_rx->umem->comp_size = max_descs;
+ test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+ * updating the Rx HW tail register.
+ */
+ test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) {
+ clean_sockets(test, test->ifobj_tx);
+ clean_sockets(test, test->ifobj_rx);
+ clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+ return TEST_FAILURE;
+ }
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ if (ret)
+ return TEST_FAILURE;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+ if (ret)
+ return TEST_FAILURE;
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
+struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+ if (!ifobj->umem)
+ goto out_umem;
+
+ return ifobj;
+
+out_umem:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
new file mode 100644
index 000000000000..8fc78a057de0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TEST_XSK_H_
+#define TEST_XSK_H_
+
+#include <linux/ethtool.h>
+#include <linux/if_xdp.h>
+
+#include "../kselftest.h"
+#include "xsk.h"
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+
+#define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_PKT_SIZE 1518
+#define MAX_INTERFACE_NAME_CHARS 16
+#define MAX_TEST_NAME_SIZE 48
+#define SOCK_RECONF_CTR 10
+#define USLEEP_MAX 10000
+
+extern bool opt_verbose;
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+
+static inline u32 ceil_u32(u32 a, u32 b)
+{
+ return (a + b - 1) / b;
+}
+
+static inline u64 ceil_u64(u64 a, u64 b)
+{
+ return (a + b - 1) / b;
+}
+
+/* Simple test */
+enum test_mode {
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_ZC,
+ TEST_MODE_ALL
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_prod tx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+ struct pkt_stream *pkt_stream;
+ u32 outstanding_tx;
+ u32 rxqsize;
+ u32 batch_size;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ bool check_consumer;
+};
+
+int kick_rx(struct xsk_socket_info *xsk);
+int kick_tx(struct xsk_socket_info *xsk);
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ u64 next_buffer;
+ u32 num_frames;
+ u32 frame_headroom;
+ void *buffer;
+ u32 frame_size;
+ u32 base_addr;
+ u32 fill_size;
+ u32 comp_size;
+ bool unaligned_mode;
+};
+
+struct set_hw_ring {
+ u32 default_tx;
+ u32 default_rx;
+};
+
+int hw_ring_size_reset(struct ifobject *ifobj);
+
+struct ifobject {
+ char ifname[MAX_INTERFACE_NAME_CHARS];
+ struct xsk_socket_info *xsk;
+ struct xsk_socket_info *xsk_arr;
+ struct xsk_umem_info *umem;
+ thread_func_t func_ptr;
+ validation_func_t validation_func;
+ struct xsk_xdp_progs *xdp_progs;
+ struct bpf_map *xskmap;
+ struct bpf_program *xdp_prog;
+ struct ethtool_ringparam ring;
+ struct set_hw_ring set_ring;
+ enum test_mode mode;
+ int ifindex;
+ int mtu;
+ u32 bind_flags;
+ u32 xdp_zc_max_segs;
+ bool tx_on;
+ bool rx_on;
+ bool use_poll;
+ bool busy_poll;
+ bool use_fill_ring;
+ bool release_rx;
+ bool shared_umem;
+ bool use_metadata;
+ bool unaligned_supp;
+ bool multi_buff_supp;
+ bool multi_buff_zc_supp;
+ bool hw_ring_size_supp;
+};
+struct ifobject *ifobject_create(void);
+void ifobject_delete(struct ifobject *ifobj);
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr);
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size);
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared);
+
+
+struct pkt {
+ int offset;
+ u32 len;
+ u32 pkt_nb;
+ bool valid;
+ u16 options;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ u32 current_pkt_nb;
+ struct pkt *pkts;
+ u32 max_pkt_len;
+ u32 nb_rx_pkts;
+ u32 nb_valid_entries;
+ bool verbatim;
+};
+
+static inline bool pkt_continues(u32 options)
+{
+ return options & XDP_PKT_CONTD;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len);
+void pkt_stream_delete(struct pkt_stream *pkt_stream);
+void pkt_stream_reset(struct pkt_stream *pkt_stream);
+void pkt_stream_restore_default(struct test_spec *test);
+
+struct test_spec {
+ struct ifobject *ifobj_tx;
+ struct ifobject *ifobj_rx;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct pkt_stream *rx_pkt_stream_default;
+ struct bpf_program *xdp_prog_rx;
+ struct bpf_program *xdp_prog_tx;
+ struct bpf_map *xskmap_rx;
+ struct bpf_map *xskmap_tx;
+ test_func_t test_func;
+ int mtu;
+ u16 total_steps;
+ u16 current_step;
+ u16 nb_sockets;
+ bool fail;
+ bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
+ enum test_mode mode;
+ char name[MAX_TEST_NAME_SIZE];
+};
+
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static inline char *mode_string(struct test_spec *test)
+{
+ switch (test->mode) {
+ case TEST_MODE_SKB:
+ return "SKB";
+ case TEST_MODE_DRV:
+ return "DRV";
+ case TEST_MODE_ZC:
+ return "ZC";
+ default:
+ return "BOGUS";
+ }
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run);
+
+int testapp_adjust_tail_grow(struct test_spec *test);
+int testapp_adjust_tail_grow_mb(struct test_spec *test);
+int testapp_adjust_tail_shrink(struct test_spec *test);
+int testapp_adjust_tail_shrink_mb(struct test_spec *test);
+int testapp_aligned_inv_desc(struct test_spec *test);
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test);
+int testapp_aligned_inv_desc_mb(struct test_spec *test);
+int testapp_bidirectional(struct test_spec *test);
+int testapp_headroom(struct test_spec *test);
+int testapp_hw_sw_max_ring_size(struct test_spec *test);
+int testapp_hw_sw_min_ring_size(struct test_spec *test);
+int testapp_poll_rx(struct test_spec *test);
+int testapp_poll_rxq_tmout(struct test_spec *test);
+int testapp_poll_tx(struct test_spec *test);
+int testapp_poll_txq_tmout(struct test_spec *test);
+int testapp_send_receive(struct test_spec *test);
+int testapp_send_receive_2k_frame(struct test_spec *test);
+int testapp_send_receive_mb(struct test_spec *test);
+int testapp_send_receive_unaligned(struct test_spec *test);
+int testapp_send_receive_unaligned_mb(struct test_spec *test);
+int testapp_single_pkt(struct test_spec *test);
+int testapp_stats_fill_empty(struct test_spec *test);
+int testapp_stats_rx_dropped(struct test_spec *test);
+int testapp_stats_tx_invalid_descs(struct test_spec *test);
+int testapp_stats_rx_full(struct test_spec *test);
+int testapp_teardown(struct test_spec *test);
+int testapp_too_many_frags(struct test_spec *test);
+int testapp_tx_queue_consumer(struct test_spec *test);
+int testapp_unaligned_inv_desc(struct test_spec *test);
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test);
+int testapp_unaligned_inv_desc_mb(struct test_spec *test);
+int testapp_xdp_drop(struct test_spec *test);
+int testapp_xdp_metadata(struct test_spec *test);
+int testapp_xdp_metadata_mb(struct test_spec *test);
+int testapp_xdp_prog_cleanup(struct test_spec *test);
+int testapp_xdp_shared_umem(struct test_spec *test);
+
+void *worker_testapp_validate_rx(void *arg);
+void *worker_testapp_validate_tx(void *arg);
+
+static const struct test_spec tests[] = {
+ {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+ {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+ {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+ {.name = "POLL_RX", .test_func = testapp_poll_rx},
+ {.name = "POLL_TX", .test_func = testapp_poll_tx},
+ {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+ {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+ {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+ {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+ {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+ {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+ {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+ {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+ {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+ {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+ {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+ {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+ {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+ {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+ {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+ {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+ {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
+ };
+
+static const struct test_spec ci_skip_tests[] = {
+ /* Flaky tests */
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+ /* Tests with huge page dependency */
+ {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+ {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+ {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+ .test_func = testapp_unaligned_inv_desc_4001_frame},
+ {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+ .test_func = testapp_send_receive_unaligned_mb},
+ {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+ /* Test with HW ring size dependency */
+ {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+ {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ /* Too long test */
+ {.name = "TEARDOWN", .test_func = testapp_teardown},
+};
+
+
+#endif /* TEST_XSK_H_ */
diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
new file mode 100644
index 000000000000..f45af1b0ef2c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_time_tai.skel.h"
+
+#include <time.h>
+#include <stdint.h>
+
+#define TAI_THRESHOLD 1000000000ULL /* 1s */
+#define NSEC_PER_SEC 1000000000ULL
+
+static __u64 ts_to_ns(const struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+void test_time_tai(void)
+{
+ struct __sk_buff skb = {
+ .cb[0] = 0,
+ .cb[1] = 0,
+ .tstamp = 0,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ );
+ struct test_time_tai *skel;
+ struct timespec now_tai;
+ __u64 ts1, ts2, now;
+ int ret, prog_fd;
+
+ /* Open and load */
+ skel = test_time_tai__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tai_open"))
+ return;
+
+ /* Run test program */
+ prog_fd = bpf_program__fd(skel->progs.time_tai);
+ ret = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(ret, "test_run");
+
+ /* Retrieve generated TAI timestamps */
+ ts1 = skb.tstamp;
+ ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32);
+
+ /* TAI != 0 */
+ ASSERT_NEQ(ts1, 0, "tai_ts1");
+ ASSERT_NEQ(ts2, 0, "tai_ts2");
+
+ /* TAI is moving forward only */
+ ASSERT_GE(ts2, ts1, "tai_forward");
+
+ /* Check for future */
+ ret = clock_gettime(CLOCK_TAI, &now_tai);
+ ASSERT_EQ(ret, 0, "tai_gettime");
+ now = ts_to_ns(&now_tai);
+
+ ASSERT_TRUE(now > ts1, "tai_future_ts1");
+ ASSERT_TRUE(now > ts2, "tai_future_ts2");
+
+ /* Check for reasonable range */
+ ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1");
+ ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2");
+
+ test_time_tai__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..34f9ccce2602
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+#include "timer_failure.skel.h"
+#include "timer_interrupt.skel.h"
+
+#define NUM_THR 8
+
+static void *spin_lock_thread(void *arg)
+{
+ int i, err, prog_fd = *(int *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ for (i = 0; i < 10000; i++) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err") ||
+ !ASSERT_OK(topts.retval, "test_run_opts retval"))
+ break;
+ }
+
+ pthread_exit(arg);
+}
+
+static int timer(struct timer *timer_skel)
+{
+ int i, err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ pthread_t thread_id[NUM_THR];
+ void *ret;
+
+ err = timer__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+ ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+ timer__detach(timer_skel);
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+ /* check that timer_cb1() was executed 10+10 times */
+ ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+ ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+ /* check that timer_cb2() was executed twice */
+ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+ /* check that timer_cb3() was executed twice */
+ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+
+ /* check that timer_cb_pinned() was executed twice */
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
+
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.race);
+ for (i = 0; i < NUM_THR; i++) {
+ err = pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (i) {
+ err = pthread_join(thread_id[--i], &ret);
+ if (ASSERT_OK(err, "pthread_join"))
+ ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+ }
+
+ return 0;
+}
+
+/* TODO: use pid filtering */
+void serial_test_timer(void)
+{
+ struct timer *timer_skel = NULL;
+ int err;
+
+ timer_skel = timer__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ return;
+
+ err = timer(timer_skel);
+ ASSERT_OK(err, "timer");
+ timer__destroy(timer_skel);
+
+ RUN_TESTS(timer_failure);
+}
+
+void test_timer_interrupt(void)
+{
+ struct timer_interrupt *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_interrupt__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load"))
+ return;
+
+ err = timer_interrupt__attach(skel);
+ if (!ASSERT_OK(err, "timer_interrupt__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+
+ usleep(50);
+
+ ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt");
+ if (skel->bss->preempt_count)
+ ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb");
+
+out:
+ timer_interrupt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
new file mode 100644
index 000000000000..b841597c8a3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "timer_crash.skel.h"
+
+enum {
+ MODE_ARRAY,
+ MODE_HASH,
+};
+
+static void test_timer_crash_mode(int mode)
+{
+ struct timer_crash *skel;
+
+ skel = timer_crash__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
+ return;
+ skel->bss->pid = getpid();
+ skel->bss->crash_map = mode;
+ if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach"))
+ goto end;
+ usleep(1);
+end:
+ timer_crash__destroy(skel);
+}
+
+void test_timer_crash(void)
+{
+ if (test__start_subtest("array"))
+ test_timer_crash_mode(MODE_ARRAY);
+ if (test__start_subtest("hash"))
+ test_timer_crash_mode(MODE_HASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 000000000000..eb303fa1e09a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = timer_lockup__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644
index 000000000000..c930c7d7105b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+ __u64 cnt1, cnt2;
+ int err, prog_fd, key1 = 1;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ err = timer_mim__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+ timer_mim__detach(timer_skel);
+
+ /* check that timer_cb[12] are incrementing 'cnt' */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
+ ASSERT_GT(cnt2, cnt1, "cnt");
+
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+ close(bpf_map__fd(timer_skel->maps.inner_htab));
+ err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0);
+ ASSERT_EQ(err, 0, "delete inner map");
+
+ /* check that timer_cb[12] are no longer running */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
+ ASSERT_EQ(cnt2, cnt1, "cnt");
+
+ return 0;
+}
+
+void serial_test_timer_mim(void)
+{
+ struct timer_mim_reject *timer_reject_skel = NULL;
+ libbpf_print_fn_t old_print_fn = NULL;
+ struct timer_mim *timer_skel = NULL;
+ int err;
+
+ old_print_fn = libbpf_set_print(NULL);
+ timer_reject_skel = timer_mim_reject__open_and_load();
+ libbpf_set_print(old_print_fn);
+ if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+ goto cleanup;
+
+ timer_skel = timer_mim__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer_mim(timer_skel);
+ ASSERT_OK(err, "timer_mim");
+cleanup:
+ timer_mim__destroy(timer_skel);
+ timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..b81dde283052
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,1197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
+#include "token_lsm.skel.h"
+#include "priv_freplace_prog.skel.h"
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+ return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+ return cap_disable_effective((1ULL << CAP_BPF) |
+ (1ULL << CAP_PERFMON) |
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+ return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
+{
+ char buf[32];
+ int err;
+
+ if (!mask_str) {
+ if (mask == ~0ULL) {
+ mask_str = "any";
+ } else {
+ snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+ mask_str = buf;
+ }
+ }
+
+ err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+ mask_str, 0);
+ if (err < 0)
+ err = -errno;
+ return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+ __u64 cmds;
+ __u64 maps;
+ __u64 progs;
+ __u64 attachs;
+ const char *cmds_str;
+ const char *maps_str;
+ const char *progs_str;
+ const char *attachs_str;
+};
+
+static int create_bpffs_fd(void)
+{
+ int fs_fd;
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ ASSERT_GE(fs_fd, 0, "fs_fd");
+
+ return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+ int err;
+
+ /* set up token delegation mount options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
+ if (!ASSERT_OK(err, "fs_cfg_cmds"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
+ if (!ASSERT_OK(err, "fs_cfg_maps"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
+ if (!ASSERT_OK(err, "fs_cfg_progs"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
+ if (!ASSERT_OK(err, "fs_cfg_attachs"))
+ return err;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (err < 0)
+ return -errno;
+
+ return 0;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1] = { fd }, err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+ err = sendmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "sendmsg"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1], err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ err = recvmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "recvmsg"))
+ return -EINVAL;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+ !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+ !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+ !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+ return -EINVAL;
+
+ memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+ *fd = fds[0];
+
+ return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+ int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
+ struct token_lsm *lsm_skel = NULL;
+ char one;
+
+ /* load and attach LSM "policy" before we go into unpriv userns */
+ lsm_skel = token_lsm__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ lsm_skel->bss->my_pid = getpid();
+ err = token_lsm__attach(lsm_skel);
+ if (!ASSERT_OK(err, "lsm_skel_attach"))
+ goto cleanup;
+
+ /* setup userns with root mappings */
+ err = create_and_enter_userns();
+ if (!ASSERT_OK(err, "create_and_enter_userns"))
+ goto cleanup;
+
+ /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "create_mountns"))
+ goto cleanup;
+
+ err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (!ASSERT_OK(err, "remount_root"))
+ goto cleanup;
+
+ fs_fd = create_bpffs_fd();
+ if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, fs_fd);
+ if (!ASSERT_OK(err, "send_fs_fd"))
+ goto cleanup;
+
+ /* wait that the parent reads the fd, does the fsconfig() calls
+ * and send us a signal that it is done
+ */
+ err = read(sock_fd, &one, sizeof(one));
+ if (!ASSERT_GE(err, 0, "read_one"))
+ goto cleanup;
+
+ /* avoid mucking around with mount namespaces and mounting at
+ * well-known path, just create O_PATH fd for detached mount
+ */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_OK_FD(mnt_fd, "mnt_fd"))
+ goto cleanup;
+
+ /* try to fspick() BPF FS and try to add some delegation options */
+ fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+ if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot reconfigure to set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+ if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* create BPF token FD and pass it to parent for some extra checks */
+ token_fd = bpf_token_create(bpffs_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = sendfd(sock_fd, token_fd);
+ if (!ASSERT_OK(err, "send_token_fd"))
+ goto cleanup;
+ zclose(token_fd);
+
+ /* do custom test logic with customly set up BPF FS instance */
+ err = callback(bpffs_fd, lsm_skel);
+ if (!ASSERT_OK(err, "test_callback"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ zclose(sock_fd);
+ zclose(mnt_fd);
+ zclose(fs_fd);
+ zclose(bpffs_fd);
+ zclose(token_fd);
+
+ lsm_skel->bss->my_pid = 0;
+ token_lsm__destroy(lsm_skel);
+
+ exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+ int fs_fd = -1, token_fd = -1, err;
+ char one = 1;
+
+ err = recvfd(sock_fd, &fs_fd);
+ if (!ASSERT_OK(err, "recv_bpffs_fd"))
+ goto cleanup;
+
+ err = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* notify the child that we did the fsconfig() calls and it can proceed. */
+ err = write(sock_fd, &one, sizeof(one));
+ if (!ASSERT_EQ(err, sizeof(one), "send_one"))
+ goto cleanup;
+ zclose(fs_fd);
+
+ /* receive BPF token FD back from child for some extra tests */
+ err = recvfd(sock_fd, &token_fd);
+ if (!ASSERT_OK(err, "recv_token_fd"))
+ goto cleanup;
+
+ err = wait_for_pid(child_pid);
+ ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+ zclose(sock_fd);
+ zclose(fs_fd);
+ zclose(token_fd);
+
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts,
+ child_callback_fn child_cb)
+{
+ int sock_fds[2] = { -1, -1 };
+ int child_pid = 0, err;
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+ if (!ASSERT_OK(err, "socketpair"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GE(child_pid, 0, "fork"))
+ goto cleanup;
+
+ if (child_pid == 0) {
+ zclose(sock_fds[0]);
+ return child(sock_fds[1], bpffs_opts, child_cb);
+
+ } else {
+ zclose(sock_fds[1]);
+ return parent(child_pid, bpffs_opts, sock_fds[0]);
+ }
+
+cleanup:
+ zclose(sock_fds[0]);
+ zclose(sock_fds[1]);
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int err, token_fd = -1, map_fd = -1;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no token, no CAP_BPF -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* token without CAP_BPF -> fail */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* CAP_BPF without token -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* finally, namespaced CAP_BPF + token -> success */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+cleanup:
+ zclose(token_fd);
+ zclose(map_fd);
+ return err;
+}
+
+static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+ int err, token_fd = -1, btf_fd = -1;
+ const void *raw_btf_data;
+ struct btf *btf = NULL;
+ __u32 raw_btf_size;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* setup a trivial BTF data to load to the kernel */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ goto cleanup;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+ goto cleanup;
+
+ /* no token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = 0;
+ btf_opts.token_fd = 0;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* token + CAP_BPF -> success */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ btf__free(btf);
+ zclose(btf_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+ int err, token_fd = -1, prog_fd = -1;
+ struct bpf_insn insns[] = {
+ /* bpf_jiffies64() requires CAP_BPF */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ /* bpf_get_current_task() requires CAP_PERFMON */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+ /* r0 = 0; exit; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* validate we can successfully load BPF program with token; this
+ * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+ * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+ * BPF token wired properly in a bunch of places in the kernel
+ */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_opts.expected_attach_type = BPF_XDP;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no token + caps -> failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no caps + token -> failure */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no caps + no token -> definitely a failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = 0;
+cleanup:
+ zclose(prog_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_map *skel;
+ int err;
+
+ skel = priv_map__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_map__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_prog *skel;
+ int err;
+
+ skel = priv_prog__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_prog__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_capable() with LSM */
+ lsm_skel->bss->reject_capable = true;
+ lsm_skel->bss->reject_cmd = false;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_cmd() with LSM */
+ lsm_skel->bss->reject_capable = false;
+ lsm_skel->bss->reject_cmd = true;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel,
+ struct priv_prog **skel, int *tgt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err;
+ char buf[256];
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ *skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts"))
+ return -EINVAL;
+ err = priv_prog__load(*skel);
+ if (!ASSERT_OK(err, "priv_prog__load"))
+ return -EINVAL;
+
+ *fr_skel = priv_freplace_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts"))
+ return -EINVAL;
+
+ *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1);
+ return 0;
+}
+
+/* Verify that freplace works from user namespace, because bpf token is loaded
+ * in bpf_object__prepare
+ */
+static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_object__prepare(fr_skel->obj);
+ if (!ASSERT_OK(err, "freplace__prepare"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = priv_freplace_prog__load(fr_skel);
+ ASSERT_OK(err, "priv_freplace_prog__load");
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
+/* Verify that replace fails to set attach target from user namespace without bpf token */
+static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (ASSERT_ERR(err, "attach fails"))
+ err = 0;
+ else
+ err = -EINVAL;
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (expect_success) {
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+ } else /* expect failure */ {
+ if (!ASSERT_ERR(err, "obj_token_path_load"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+static const char *token_bpffs_custom_dir()
+{
+ return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ?: "/tmp/bpf-token-fs";
+}
+
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+
+static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+ * token automatically and implicitly
+ */
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ return -EINVAL;
+
+ /* disable implicit BPF token creation by setting
+ * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+ */
+ err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ return -EINVAL;
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+ unsetenv(TOKEN_ENVVAR);
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+ unsetenv(TOKEN_ENVVAR);
+
+ /* now the same struct_ops skeleton should succeed thanks to libbpf
+ * creating BPF token from /sys/fs/bpf mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ return -EINVAL;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, should fail */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ const char *custom_dir = token_bpffs_custom_dir();
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over custom location, so libbpf can't create
+ * BPF token implicitly, unless pointed to it through
+ * LIBBPF_BPF_TOKEN_PATH envvar
+ */
+ rmdir(custom_dir);
+ if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom"))
+ goto err_out;
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ goto err_out;
+
+ /* even though we have BPF FS with delegation, it's not at default
+ * /sys/fs/bpf location, so we still fail to load until envvar is set up
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+ dummy_st_ops_success__destroy(skel);
+ goto err_out;
+ }
+
+ err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ goto err_out;
+
+ /* now the same struct_ops skeleton should succeed thanks to libbpf
+ * creating BPF token from custom mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ goto err_out;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ goto err_out;
+
+ rmdir(custom_dir);
+ unsetenv(TOKEN_ENVVAR);
+ return 0;
+err_out:
+ rmdir(custom_dir);
+ unsetenv(TOKEN_ENVVAR);
+ return -EINVAL;
+}
+
+#define bit(n) (1ULL << (n))
+
+static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ int err, token_fd = -1;
+ struct bpf_token_info info;
+ u32 len = sizeof(struct bpf_token_info);
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ memset(&info, 0, len);
+ err = bpf_obj_get_info_by_fd(token_fd, &info, &len);
+ if (!ASSERT_ERR(err, "bpf_obj_get_token_info"))
+ goto cleanup;
+ if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The BPF_PROG_TYPE_EXT is not set in token */
+ if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext"))
+ err = -EINVAL;
+
+cleanup:
+ zclose(token_fd);
+ return err;
+}
+
+void test_token(void)
+{
+ if (test__start_subtest("map_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "map_create",
+ .maps_str = "stack",
+ };
+
+ subtest_userns(&opts, userns_map_create);
+ }
+ if (test__start_subtest("btf_token")) {
+ struct bpffs_opts opts = {
+ .cmds = 1ULL << BPF_BTF_LOAD,
+ };
+
+ subtest_userns(&opts, userns_btf_load);
+ }
+ if (test__start_subtest("prog_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "PROG_LOAD",
+ .progs_str = "XDP",
+ .attachs_str = "xdp",
+ };
+
+ subtest_userns(&opts, userns_prog_load);
+ }
+ if (test__start_subtest("obj_priv_map")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .maps = bit(BPF_MAP_TYPE_QUEUE),
+ };
+
+ subtest_userns(&opts, userns_obj_priv_map);
+ }
+ if (test__start_subtest("obj_priv_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_PROG_LOAD),
+ .progs = bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_prog);
+ }
+ if (test__start_subtest("obj_priv_freplace_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog);
+ }
+ if (test__start_subtest("obj_priv_freplace_prog_fail")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_fail")) {
+ struct bpffs_opts opts = {
+ /* disallow BTF loading */
+ .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_success")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_success);
+ }
+ if (test__start_subtest("obj_priv_implicit_token")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token);
+ }
+ if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+ }
+ if (test__start_subtest("bpf_token_info")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_bpf_token_info);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index fb095e5cd9af..655d69f0ff0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-void test_tp_attach_query(void)
+void serial_test_tp_attach_query(void)
{
const int num_progs = 3;
int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
__u32 duration = 0, info_len, saved_prog_ids[num_progs];
- const char *file = "./test_tracepoint.o";
+ const char *file = "./test_tracepoint.bpf.o";
struct perf_event_query_bpf *query;
struct perf_event_attr attr = {};
struct bpf_object *obj[num_progs];
@@ -16,8 +16,13 @@ void test_tp_attach_query(void)
for (i = 0; i < num_progs; i++)
obj[i] = NULL;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/sched/sched_switch/id");
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
return;
@@ -35,7 +40,7 @@ void test_tp_attach_query(void)
query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
for (i = 0; i < num_progs; i++) {
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
&prog_fd[i]);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto cleanup1;
@@ -45,8 +50,9 @@ void test_tp_attach_query(void)
prog_info.xlated_prog_len = 0;
prog_info.nr_map_ids = 0;
info_len = sizeof(prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
- if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ err = bpf_prog_get_info_by_fd(prog_fd[i], &prog_info,
+ &info_len);
+ if (CHECK(err, "bpf_prog_get_info_by_fd", "err %d errno %d\n",
err, errno))
goto cleanup1;
saved_prog_ids[i] = prog_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
new file mode 100644
index 000000000000..accc42e01f8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_tp_btf_nullable.skel.h"
+
+void test_tp_btf_nullable(void)
+{
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ RUN_TESTS(test_tp_btf_nullable);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
index 924441d4362d..aabdff7bea3e 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_ext.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -23,8 +23,12 @@ void test_trace_ext(void)
int err, pkt_fd, ext_fd;
struct bpf_program *prog;
char buf[100];
- __u32 retval;
__u64 len;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
/* open/load/attach test_pkt_md_access */
skel_pkt = test_pkt_md_access__open_and_load();
@@ -77,32 +81,32 @@ void test_trace_ext(void)
/* load/attach tracing */
err = test_trace_ext_tracing__load(skel_trace);
- if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) {
libbpf_strerror(err, buf, sizeof(buf));
fprintf(stderr, "%s\n", buf);
goto cleanup;
}
err = test_trace_ext_tracing__attach(skel_trace);
- if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach"))
goto cleanup;
/* trigger the test */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
bss_ext = skel_ext->bss;
bss_trace = skel_trace->bss;
len = bss_ext->ext_called;
- CHECK(bss_ext->ext_called == 0,
- "check", "failed to trigger freplace/test_pkt_md_access\n");
- CHECK(bss_trace->fentry_called != len,
- "check", "failed to trigger fentry/test_pkt_md_access_new\n");
- CHECK(bss_trace->fexit_called != len,
- "check", "failed to trigger fexit/test_pkt_md_access_new\n");
+ ASSERT_NEQ(bss_ext->ext_called, 0,
+ "failed to trigger freplace/test_pkt_md_access");
+ ASSERT_EQ(bss_trace->fentry_called, len,
+ "failed to trigger fentry/test_pkt_md_access_new");
+ ASSERT_EQ(bss_trace->fexit_called, len,
+ "failed to trigger fexit/test_pkt_md_access_new");
cleanup:
test_trace_ext_tracing__destroy(skel_trace);
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 39b0decb1bb2..e56e88596d64 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -3,73 +3,56 @@
#include <test_progs.h>
-#include "trace_printk.skel.h"
+#include "trace_printk.lskel.h"
-#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
-void test_trace_printk(void)
+static void trace_pipe_cb(const char *str, void *data)
{
- int err, iter = 0, duration = 0, found = 0;
- struct trace_printk__bss *bss;
- struct trace_printk *skel;
- char *buf = NULL;
- FILE *fp = NULL;
- size_t buflen;
-
- skel = trace_printk__open();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
+void serial_test_trace_printk(void)
+{
+ struct trace_printk_lskel__bss *bss;
+ struct trace_printk_lskel *skel;
+ int err = 0, found = 0;
+
+ skel = trace_printk_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
return;
- err = trace_printk__load(skel);
- if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]");
+ skel->rodata->fmt[0] = 't';
+
+ err = trace_printk_lskel__load(skel);
+ if (!ASSERT_OK(err, "trace_printk__load"))
goto cleanup;
bss = skel->bss;
- err = trace_printk__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ err = trace_printk_lskel__attach(skel);
+ if (!ASSERT_OK(err, "trace_printk__attach"))
goto cleanup;
- fp = fopen(TRACEBUF, "r");
- if (CHECK(fp == NULL, "could not open trace buffer",
- "error %d opening %s", errno, TRACEBUF))
- goto cleanup;
-
- /* We do not want to wait forever if this test fails... */
- fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
-
/* wait for tracepoint to trigger */
usleep(1);
- trace_printk__detach(skel);
+ trace_printk_lskel__detach(skel);
- if (CHECK(bss->trace_printk_ran == 0,
- "bpf_trace_printk never ran",
- "ran == %d", bss->trace_printk_ran))
+ if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran"))
goto cleanup;
- if (CHECK(bss->trace_printk_ret <= 0,
- "bpf_trace_printk returned <= 0 value",
- "got %d", bss->trace_printk_ret))
+ if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
goto cleanup;
/* verify our search string is in the trace buffer */
- while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
- if (strstr(buf, SEARCHMSG) != NULL)
- found++;
- if (found == bss->trace_printk_ran)
- break;
- if (++iter > 1000)
- break;
- }
-
- if (CHECK(!found, "message from bpf_trace_printk not found",
- "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
+
+ if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
goto cleanup;
cleanup:
- trace_printk__destroy(skel);
- free(buf);
- if (fp)
- fclose(fp);
+ trace_printk_lskel__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
new file mode 100644
index 000000000000..2af6a6f2096a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10"
+
+static void trace_pipe_cb(const char *str, void *data)
+{
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
+void serial_test_trace_vprintk(void)
+{
+ struct trace_vprintk_lskel__bss *bss;
+ struct trace_vprintk_lskel *skel;
+ int err = 0, found = 0;
+
+ skel = trace_vprintk_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = trace_vprintk_lskel__attach(skel);
+ if (!ASSERT_OK(err, "trace_vprintk__attach"))
+ goto cleanup;
+
+ /* wait for tracepoint to trigger */
+ usleep(1);
+ trace_vprintk_lskel__detach(skel);
+
+ if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran"))
+ goto cleanup;
+
+ if (!ASSERT_GT(bss->trace_vprintk_ret, 0, "bss->trace_vprintk_ret"))
+ goto cleanup;
+
+ /* verify our search string is in the trace buffer */
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
+
+ if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found"))
+ goto cleanup;
+
+ if (!ASSERT_LT(bss->null_data_vprintk_ret, 0, "bss->null_data_vprintk_ret"))
+ goto cleanup;
+
+cleanup:
+ trace_vprintk_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
new file mode 100644
index 000000000000..10e231965589
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "tracing_failure.skel.h"
+
+static void test_bpf_spin_lock(bool is_spin_lock)
+{
+ struct tracing_failure *skel;
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ if (is_spin_lock)
+ bpf_program__set_autoload(skel->progs.test_spin_lock, true);
+ else
+ bpf_program__set_autoload(skel->progs.test_spin_unlock, true);
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_OK(err, "tracing_failure__load"))
+ goto out;
+
+ err = tracing_failure__attach(skel);
+ ASSERT_ERR(err, "tracing_failure__attach");
+
+out:
+ tracing_failure__destroy(skel);
+}
+
+static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg)
+{
+ struct tracing_failure *skel;
+ struct bpf_program *prog;
+ char log_buf[256];
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+
+ bpf_program__set_autoload(prog, true);
+ bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_ERR(err, "tracing_failure__load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf");
+out:
+ tracing_failure__destroy(skel);
+}
+
+static void test_tracing_deny(void)
+{
+ int btf_id;
+
+ /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */
+ btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY);
+ if (btf_id <= 0) {
+ test__skip();
+ return;
+ }
+
+ test_tracing_fail_prog("tracing_deny",
+ "Attaching tracing programs to function '__rcu_read_lock' is rejected.");
+}
+
+static void test_fexit_noreturns(void)
+{
+ test_tracing_fail_prog("fexit_noreturns",
+ "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+}
+
+void test_tracing_failure(void)
+{
+ if (test__start_subtest("bpf_spin_lock"))
+ test_bpf_spin_lock(true);
+ if (test__start_subtest("bpf_spin_unlock"))
+ test_bpf_spin_lock(false);
+ if (test__start_subtest("tracing_deny"))
+ test_tracing_deny();
+ if (test__start_subtest("fexit_noreturns"))
+ test_fexit_noreturns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
new file mode 100644
index 000000000000..6f8c0bfb0415
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "tracing_struct.skel.h"
+#include "tracing_struct_many_args.skel.h"
+
+static void test_struct_args(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto destroy_skel;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->t1_a_a, 2, "t1:a.a");
+ ASSERT_EQ(skel->bss->t1_a_b, 3, "t1:a.b");
+ ASSERT_EQ(skel->bss->t1_b, 1, "t1:b");
+ ASSERT_EQ(skel->bss->t1_c, 4, "t1:c");
+
+ ASSERT_EQ(skel->bss->t1_nregs, 4, "t1 nregs");
+ ASSERT_EQ(skel->bss->t1_reg0, 2, "t1 reg0");
+ ASSERT_EQ(skel->bss->t1_reg1, 3, "t1 reg1");
+ ASSERT_EQ(skel->bss->t1_reg2, 1, "t1 reg2");
+ ASSERT_EQ(skel->bss->t1_reg3, 4, "t1 reg3");
+ ASSERT_EQ(skel->bss->t1_ret, 10, "t1 ret");
+
+ ASSERT_EQ(skel->bss->t2_a, 1, "t2:a");
+ ASSERT_EQ(skel->bss->t2_b_a, 2, "t2:b.a");
+ ASSERT_EQ(skel->bss->t2_b_b, 3, "t2:b.b");
+ ASSERT_EQ(skel->bss->t2_c, 4, "t2:c");
+ ASSERT_EQ(skel->bss->t2_ret, 10, "t2 ret");
+
+ ASSERT_EQ(skel->bss->t3_a, 1, "t3:a");
+ ASSERT_EQ(skel->bss->t3_b, 4, "t3:b");
+ ASSERT_EQ(skel->bss->t3_c_a, 2, "t3:c.a");
+ ASSERT_EQ(skel->bss->t3_c_b, 3, "t3:c.b");
+ ASSERT_EQ(skel->bss->t3_ret, 10, "t3 ret");
+
+ ASSERT_EQ(skel->bss->t4_a_a, 10, "t4:a.a");
+ ASSERT_EQ(skel->bss->t4_b, 1, "t4:b");
+ ASSERT_EQ(skel->bss->t4_c, 2, "t4:c");
+ ASSERT_EQ(skel->bss->t4_d, 3, "t4:d");
+ ASSERT_EQ(skel->bss->t4_e_a, 2, "t4:e.a");
+ ASSERT_EQ(skel->bss->t4_e_b, 3, "t4:e.b");
+ ASSERT_EQ(skel->bss->t4_ret, 21, "t4 ret");
+
+ ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret");
+
+ ASSERT_EQ(skel->bss->t6, 1, "t6 ret");
+
+destroy_skel:
+ tracing_struct__destroy(skel);
+}
+
+static void test_struct_many_args(void)
+{
+ struct tracing_struct_many_args *skel;
+ int err;
+
+ skel = tracing_struct_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct_many_args__open_and_load"))
+ return;
+
+ err = tracing_struct_many_args__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct_many_args__attach"))
+ goto destroy_skel;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->t7_a, 16, "t7:a");
+ ASSERT_EQ(skel->bss->t7_b, 17, "t7:b");
+ ASSERT_EQ(skel->bss->t7_c, 18, "t7:c");
+ ASSERT_EQ(skel->bss->t7_d, 19, "t7:d");
+ ASSERT_EQ(skel->bss->t7_e, 20, "t7:e");
+ ASSERT_EQ(skel->bss->t7_f_a, 21, "t7:f.a");
+ ASSERT_EQ(skel->bss->t7_f_b, 22, "t7:f.b");
+ ASSERT_EQ(skel->bss->t7_ret, 133, "t7 ret");
+
+ ASSERT_EQ(skel->bss->t8_a, 16, "t8:a");
+ ASSERT_EQ(skel->bss->t8_b, 17, "t8:b");
+ ASSERT_EQ(skel->bss->t8_c, 18, "t8:c");
+ ASSERT_EQ(skel->bss->t8_d, 19, "t8:d");
+ ASSERT_EQ(skel->bss->t8_e, 20, "t8:e");
+ ASSERT_EQ(skel->bss->t8_f_a, 21, "t8:f.a");
+ ASSERT_EQ(skel->bss->t8_f_b, 22, "t8:f.b");
+ ASSERT_EQ(skel->bss->t8_g, 23, "t8:g");
+ ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret");
+
+ ASSERT_EQ(skel->bss->t9_a, 16, "t9:a");
+ ASSERT_EQ(skel->bss->t9_b, 17, "t9:b");
+ ASSERT_EQ(skel->bss->t9_c, 18, "t9:c");
+ ASSERT_EQ(skel->bss->t9_d, 19, "t9:d");
+ ASSERT_EQ(skel->bss->t9_e, 20, "t9:e");
+ ASSERT_EQ(skel->bss->t9_f, 21, "t9:f");
+ ASSERT_EQ(skel->bss->t9_g, 22, "t9:f");
+ ASSERT_EQ(skel->bss->t9_h_a, 23, "t9:h.a");
+ ASSERT_EQ(skel->bss->t9_h_b, 24, "t9:h.b");
+ ASSERT_EQ(skel->bss->t9_h_c, 25, "t9:h.c");
+ ASSERT_EQ(skel->bss->t9_h_d, 26, "t9:h.d");
+ ASSERT_EQ(skel->bss->t9_i, 27, "t9:i");
+ ASSERT_EQ(skel->bss->t9_ret, 258, "t9 ret");
+
+destroy_skel:
+ tracing_struct_many_args__destroy(skel);
+}
+
+static void test_union_args(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a");
+ ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b");
+ ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c");
+
+ ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a");
+ ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a");
+ ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b");
+
+out:
+ tracing_struct__destroy(skel);
+}
+
+void test_tracing_struct(void)
+{
+ if (test__start_subtest("struct_args"))
+ test_struct_args();
+ if (test__start_subtest("struct_many_args"))
+ test_struct_many_args();
+ if (test__start_subtest("union_args"))
+ test_union_args();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index f3022d934e2d..6cd7349d4a2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -1,123 +1,100 @@
// SPDX-License-Identifier: GPL-2.0-only
#define _GNU_SOURCE
-#include <sched.h>
-#include <sys/prctl.h>
#include <test_progs.h>
-#define MAX_TRAMP_PROGS 38
-
struct inst {
struct bpf_object *obj;
- struct bpf_link *link_fentry;
- struct bpf_link *link_fexit;
+ struct bpf_link *link;
};
-static int test_task_rename(void)
-{
- int fd, duration = 0, err;
- char buf[] = "test_overhead";
-
- fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (CHECK(fd < 0, "open /proc", "err %d", errno))
- return -1;
- err = write(fd, buf, sizeof(buf));
- if (err < 0) {
- CHECK(err < 0, "task rename", "err %d", errno);
- close(fd);
- return -1;
- }
- close(fd);
- return 0;
-}
-
-static struct bpf_link *load(struct bpf_object *obj, const char *name)
+static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
{
+ struct bpf_object *obj;
struct bpf_program *prog;
- int duration = 0;
+ int err;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+ return NULL;
- prog = bpf_object__find_program_by_title(obj, name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
- return ERR_PTR(-EINVAL);
- return bpf_program__attach_trace(prog);
+ inst->obj = obj;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "obj_load"))
+ return NULL;
+
+ prog = bpf_object__find_program_by_name(obj, name);
+ if (!ASSERT_OK_PTR(prog, "obj_find_prog"))
+ return NULL;
+
+ return prog;
}
-void test_trampoline_count(void)
+/* TODO: use different target function to run in concurrent mode */
+void serial_test_trampoline_count(void)
{
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
- const char *object = "test_trampoline_count.o";
- struct inst inst[MAX_TRAMP_PROGS] = {};
- int err, i = 0, duration = 0;
- struct bpf_object *obj;
+ char *file = "test_trampoline_count.bpf.o";
+ char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
+ int bpf_max_tramp_links, err, i, prog_fd;
+ struct bpf_program *prog;
struct bpf_link *link;
- char comm[16] = {};
+ struct inst *inst;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ bpf_max_tramp_links = get_bpf_max_tramp_links();
+ if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+ return;
+ inst = calloc(bpf_max_tramp_links + 1, sizeof(*inst));
+ if (!ASSERT_OK_PTR(inst, "inst"))
+ return;
/* attach 'allowed' trampoline programs */
- for (i = 0; i < MAX_TRAMP_PROGS; i++) {
- obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
- obj = NULL;
+ for (i = 0; i < bpf_max_tramp_links; i++) {
+ prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]);
+ if (!prog)
goto cleanup;
- }
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- inst[i].obj = obj;
- obj = NULL;
-
- if (rand() % 2) {
- link = load(inst[i].obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
- inst[i].link_fentry = link;
- } else {
- link = load(inst[i].obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
- inst[i].link_fexit = link;
- }
+
+ inst[i].link = link;
}
/* and try 1 extra.. */
- obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
- obj = NULL;
+ prog = load_prog(file, "fmod_ret_test", &inst[i]);
+ if (!prog)
goto cleanup;
- }
-
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
- goto cleanup_extra;
/* ..that needs to fail */
- link = load(obj, fentry_name);
- if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
- bpf_link__destroy(link);
- goto cleanup_extra;
+ link = bpf_program__attach(prog);
+ if (!ASSERT_ERR_PTR(link, "attach_prog")) {
+ inst[i].link = link;
+ goto cleanup;
}
/* with E2BIG error */
- CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+ if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG"))
+ goto cleanup;
+ if (!ASSERT_EQ(link, NULL, "ptr_is_null"))
+ goto cleanup;
+
+ /* and finally execute the probe */
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto cleanup;
- /* and finaly execute the probe */
- if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
- goto cleanup_extra;
- CHECK_FAIL(test_task_rename());
- CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+ ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result");
+ ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect");
-cleanup_extra:
- bpf_object__close(obj);
cleanup:
- if (i >= MAX_TRAMP_PROGS)
- i = MAX_TRAMP_PROGS - 1;
for (; i >= 0; i--) {
- bpf_link__destroy(inst[i].link_fentry);
- bpf_link__destroy(inst[i].link_fexit);
+ bpf_link__destroy(inst[i].link);
bpf_object__close(inst[i].obj);
}
+ free(inst);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c
new file mode 100644
index 000000000000..9317d5fa2635
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "type_cast.skel.h"
+
+static void test_xdp(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+ char buf[128];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_xdp, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_xdp);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval");
+
+ ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
+ ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex");
+ ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
+ ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static void test_tc(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_skb, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_skb);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "tc test_run retval");
+
+ ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len");
+ ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len");
+ ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len");
+ ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len");
+ ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static const char * const negative_tests[] = {
+ "untrusted_ptr",
+ "kctx_u64",
+};
+
+static void test_negative(void)
+{
+ struct bpf_program *prog;
+ struct type_cast *skel;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(negative_tests); i++) {
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = type_cast__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ type_cast__destroy(skel);
+ }
+}
+
+void test_type_cast(void)
+{
+ if (test__start_subtest("xdp"))
+ test_xdp();
+ if (test__start_subtest("tc"))
+ test_tc();
+ if (test__start_subtest("negative"))
+ test_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 2aba09d4d01b..2643d896ddae 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -5,8 +5,6 @@
#include <sys/types.h>
#include <sys/socket.h>
-static int duration;
-
void test_udp_limit(void)
{
struct udp_limit *skel;
@@ -14,30 +12,29 @@ void test_udp_limit(void)
int cgroup_fd;
cgroup_fd = test__join_cgroup("/udp_limit");
- if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-join"))
return;
skel = udp_limit__open_and_load();
- if (CHECK(!skel, "skel-load", "errno %d", errno))
+ if (!ASSERT_OK_PTR(skel, "skel-load"))
goto close_cgroup_fd;
skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+ goto close_skeleton;
skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
- if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
- "cg-attach", "sock %ld sock_release %ld",
- PTR_ERR(skel->links.sock),
- PTR_ERR(skel->links.sock_release)))
+ if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
goto close_skeleton;
/* BPF program enforces a single UDP socket per cgroup,
* verify that.
*/
fd1 = socket(AF_INET, SOCK_DGRAM, 0);
- if (CHECK(fd1 < 0, "fd1", "errno %d", errno))
+ if (!ASSERT_GE(fd1, 0, "socket(fd1)"))
goto close_skeleton;
fd2 = socket(AF_INET, SOCK_DGRAM, 0);
- if (CHECK(fd2 >= 0, "fd2", "errno %d", errno))
+ if (!ASSERT_LT(fd2, 0, "socket(fd2)"))
goto close_skeleton;
/* We can reopen again after close. */
@@ -45,7 +42,7 @@ void test_udp_limit(void)
fd1 = -1;
fd1 = socket(AF_INET, SOCK_DGRAM, 0);
- if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno))
+ if (!ASSERT_GE(fd1, 0, "socket(fd1-again)"))
goto close_skeleton;
/* Make sure the program was invoked the expected
@@ -55,13 +52,11 @@ void test_udp_limit(void)
* - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE
* - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE
*/
- if (CHECK(skel->bss->invocations != 4, "bss-invocations",
- "invocations=%d", skel->bss->invocations))
+ if (!ASSERT_EQ(skel->bss->invocations, 4, "bss-invocations"))
goto close_skeleton;
/* We should still have a single socket in use */
- if (CHECK(skel->bss->in_use != 1, "bss-in_use",
- "in_use=%d", skel->bss->in_use))
+ if (!ASSERT_EQ(skel->bss->in_use, 1, "bss-in_use"))
goto close_skeleton;
close_skeleton:
diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
new file mode 100644
index 000000000000..e64c71948491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "uninit_stack.skel.h"
+
+void test_uninit_stack(void)
+{
+ RUN_TESTS(uninit_stack);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..472f4f9fa95f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_unpriv_bpf_disabled.skel.h"
+
+#include "cap_helpers.h"
+#include "bpf_util.h"
+
+/* Using CAP_LAST_CAP is risky here, since it can get pulled in from
+ * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result
+ * CAP_BPF would not be included in ALL_CAPS. Instead use CAP_BPF as
+ * we know its value is correct since it is explicitly defined in
+ * cap_helpers.h.
+ */
+#define ALL_CAPS ((2ULL << CAP_BPF) - 1)
+
+#define PINPATH "/sys/fs/bpf/unpriv_bpf_disabled_"
+#define NUM_MAPS 7
+
+static __u32 got_perfbuf_val;
+static __u32 got_ringbuf_val;
+
+static int process_ringbuf(void *ctx, void *data, size_t len)
+{
+ if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid"))
+ got_ringbuf_val = *(__u32 *)data;
+ return 0;
+}
+
+static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len)
+{
+ if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid"))
+ got_perfbuf_val = *(__u32 *)data;
+}
+
+static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val)
+{
+ int ret = 0;
+ FILE *fp;
+
+ fp = fopen(sysctl_path, "r+");
+ if (!fp)
+ return -errno;
+ if (old_val && fscanf(fp, "%s", old_val) <= 0) {
+ ret = -ENOENT;
+ } else if (!old_val || strcmp(old_val, new_val) != 0) {
+ fseek(fp, 0, SEEK_SET);
+ if (fprintf(fp, "%s", new_val) < 0)
+ ret = -errno;
+ }
+ fclose(fp);
+
+ return ret;
+}
+
+static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel,
+ __u32 prog_id, int prog_fd, int perf_fd,
+ char **map_paths, int *map_fds)
+{
+ struct perf_buffer *perfbuf = NULL;
+ struct ring_buffer *ringbuf = NULL;
+ int i, nr_cpus, link_fd = -1;
+
+ nr_cpus = bpf_num_possible_cpus();
+
+ skel->bss->perfbuf_val = 1;
+ skel->bss->ringbuf_val = 2;
+
+ /* Positive tests for unprivileged BPF disabled. Verify we can
+ * - retrieve and interact with pinned maps;
+ * - set up and interact with perf buffer;
+ * - set up and interact with ring buffer;
+ * - create a link
+ */
+ perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL,
+ NULL);
+ if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ /* trigger & validate perf event, ringbuf output */
+ usleep(1);
+
+ ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll");
+ ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val");
+ ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume");
+ ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val");
+
+ for (i = 0; i < NUM_MAPS; i++) {
+ map_fds[i] = bpf_obj_get(map_paths[i]);
+ if (!ASSERT_GT(map_fds[i], -1, "obj_get"))
+ goto cleanup;
+ }
+
+ for (i = 0; i < NUM_MAPS; i++) {
+ bool prog_array = strstr(map_paths[i], "prog_array") != NULL;
+ bool array = strstr(map_paths[i], "array") != NULL;
+ bool buf = strstr(map_paths[i], "buf") != NULL;
+ __u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus];
+ __u32 expected_val = 1;
+ int j;
+
+ /* skip ringbuf, perfbuf */
+ if (buf)
+ continue;
+
+ for (j = 0; j < nr_cpus; j++)
+ vals[j] = expected_val;
+
+ if (prog_array) {
+ /* need valid prog array value */
+ vals[0] = prog_fd;
+ /* prog array lookup returns prog id, not fd */
+ expected_val = prog_id;
+ }
+ ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem");
+ ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem");
+ ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values");
+ if (!array)
+ ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem");
+ }
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd,
+ BPF_PERF_EVENT, NULL);
+ ASSERT_GT(link_fd, 0, "link_create");
+
+cleanup:
+ if (link_fd)
+ close(link_fd);
+ if (perfbuf)
+ perf_buffer__free(perfbuf);
+ if (ringbuf)
+ ring_buffer__free(ringbuf);
+}
+
+static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel,
+ __u32 prog_id, int prog_fd, int perf_fd,
+ char **map_paths, int *map_fds)
+{
+ const struct bpf_insn prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t prog_insn_cnt = ARRAY_SIZE(prog_insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts);
+ struct bpf_map_info map_info = {};
+ __u32 map_info_len = sizeof(map_info);
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ struct btf *btf = NULL;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 next;
+ int i;
+
+ /* Negative tests for unprivileged BPF disabled. Verify we cannot
+ * - load BPF programs;
+ * - create BPF maps;
+ * - get a prog/map/link fd by id;
+ * - get next prog/map/link id
+ * - query prog
+ * - BTF load
+ */
+ ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL",
+ prog_insns, prog_insn_cnt, &load_opts),
+ -EPERM, "prog_load_fails");
+
+ /* some map types require particular correct parameters which could be
+ * sanity-checked before enforcing -EPERM, so only validate that
+ * the simple ARRAY and HASH maps are failing with -EPERM
+ */
+ for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++)
+ ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
+ -EPERM, "map_create_fails");
+
+ ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
+ ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
+
+ if (ASSERT_OK(bpf_map_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
+ "obj_get_info_by_fd")) {
+ ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
+ "map_get_next_id_fails");
+ }
+ ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
+
+ if (ASSERT_OK(bpf_link_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
+ &link_info, &link_info_len),
+ "obj_get_info_by_fd")) {
+ ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
+ "link_get_next_id_fails");
+ }
+ ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails");
+
+ ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids,
+ &prog_cnt), -EPERM, "prog_query_fails");
+
+ btf = btf__new_empty();
+ if (ASSERT_OK_PTR(btf, "empty_btf") &&
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) {
+ const void *raw_btf_data;
+ __u32 raw_btf_size;
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+ ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM,
+ "bpf_btf_load_fails");
+ }
+ btf__free(btf);
+}
+
+void test_unpriv_bpf_disabled(void)
+{
+ char *map_paths[NUM_MAPS] = { PINPATH "array",
+ PINPATH "percpu_array",
+ PINPATH "hash",
+ PINPATH "percpu_hash",
+ PINPATH "perfbuf",
+ PINPATH "ringbuf",
+ PINPATH "prog_array" };
+ int map_fds[NUM_MAPS];
+ struct test_unpriv_bpf_disabled *skel;
+ char unprivileged_bpf_disabled_orig[32] = {};
+ char perf_event_paranoid_orig[32] = {};
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ struct perf_event_attr attr = {};
+ int prog_fd, perf_fd = -1, i, ret;
+ __u64 save_caps = 0;
+ __u32 prog_id;
+
+ skel = test_unpriv_bpf_disabled__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->test_pid = getpid();
+
+ map_fds[0] = bpf_map__fd(skel->maps.array);
+ map_fds[1] = bpf_map__fd(skel->maps.percpu_array);
+ map_fds[2] = bpf_map__fd(skel->maps.hash);
+ map_fds[3] = bpf_map__fd(skel->maps.percpu_hash);
+ map_fds[4] = bpf_map__fd(skel->maps.perfbuf);
+ map_fds[5] = bpf_map__fd(skel->maps.ringbuf);
+ map_fds[6] = bpf_map__fd(skel->maps.prog_array);
+
+ for (i = 0; i < NUM_MAPS; i++)
+ ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd");
+
+ /* allow user without caps to use perf events */
+ if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig,
+ "-1"),
+ "set_perf_event_paranoid"))
+ goto cleanup;
+ /* ensure unprivileged bpf disabled is set */
+ ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled",
+ unprivileged_bpf_disabled_orig, "2");
+ if (ret == -EPERM) {
+ /* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
+ if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
+ "unprivileged_bpf_disabled_on"))
+ goto cleanup;
+ } else {
+ if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled"))
+ goto cleanup;
+ }
+
+ prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
+ ASSERT_OK(bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
+ "obj_get_info_by_fd");
+ prog_id = prog_info.id;
+ ASSERT_GT(prog_id, 0, "valid_prog_id");
+
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(perf_fd, 0, "perf_fd"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps"))
+ goto cleanup;
+
+ if (test__start_subtest("unpriv_bpf_disabled_positive"))
+ test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths,
+ map_fds);
+
+ if (test__start_subtest("unpriv_bpf_disabled_negative"))
+ test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths,
+ map_fds);
+
+cleanup:
+ close(perf_fd);
+ if (save_caps)
+ cap_enable_effective(save_caps, NULL);
+ if (strlen(perf_event_paranoid_orig) > 0)
+ sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig);
+ if (strlen(unprivileged_bpf_disabled_orig) > 0)
+ sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL,
+ unprivileged_bpf_disabled_orig);
+ for (i = 0; i < NUM_MAPS; i++)
+ unlink(map_paths[i]);
+ test_unpriv_bpf_disabled__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
new file mode 100644
index 000000000000..86404476c1da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include <test_progs.h>
+#include <asm/ptrace.h>
+#include "test_uprobe.skel.h"
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+static void test_uprobe_attach(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ FILE *urand_pipe = NULL;
+ int urand_pid = 0, err;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ skel->bss->my_pid = urand_pid;
+
+ /* Manual attach uprobe to urandlib_api
+ * There are two `urandlib_api` symbols in .dynsym section:
+ * - urandlib_api@LIBURANDOM_READ_1.0.0
+ * - urandlib_api@@LIBURANDOM_READ_2.0.0
+ * Both are global bind and would cause a conflict if user
+ * specify the symbol name without a version suffix
+ */
+ uprobe_opts.func_name = "urandlib_api";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict"))
+ goto cleanup;
+
+ uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok"))
+ goto cleanup;
+
+ /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */
+ err = test_uprobe__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger urandom_read */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset");
+ ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1");
+ ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2");
+ ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_uprobe__destroy(skel);
+}
+
+#ifdef __x86_64__
+__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void)
+{
+ asm volatile (
+ "ret\n"
+ );
+}
+
+static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uprobe_regs_change_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+ "ret\n"
+ );
+}
+
+static void regs_common(void)
+{
+ struct pt_regs before = {}, after = {}, expected = {
+ .rax = 0xc0ffe,
+ .rcx = 0xbad,
+ .rdx = 0xdead,
+ .r8 = 0x8,
+ .r9 = 0x9,
+ .r10 = 0x10,
+ .r11 = 0x11,
+ .rdi = 0x12,
+ .rsi = 0x13,
+ };
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->regs = expected;
+
+ uprobe_opts.func_name = "uprobe_regs_change_trigger";
+ skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ uprobe_regs_change(&before, &after);
+
+ ASSERT_EQ(after.rax, expected.rax, "ax");
+ ASSERT_EQ(after.rcx, expected.rcx, "cx");
+ ASSERT_EQ(after.rdx, expected.rdx, "dx");
+ ASSERT_EQ(after.r8, expected.r8, "r8");
+ ASSERT_EQ(after.r9, expected.r9, "r9");
+ ASSERT_EQ(after.r10, expected.r10, "r10");
+ ASSERT_EQ(after.r11, expected.r11, "r11");
+ ASSERT_EQ(after.rdi, expected.rdi, "rdi");
+ ASSERT_EQ(after.rsi, expected.rsi, "rsi");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static noinline unsigned long uprobe_regs_change_ip_1(void)
+{
+ return 0xc0ffee;
+}
+
+static noinline unsigned long uprobe_regs_change_ip_2(void)
+{
+ return 0xdeadbeef;
+}
+
+static void regs_ip(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ unsigned long ret;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2;
+
+ uprobe_opts.func_name = "uprobe_regs_change_ip_1";
+ skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts(
+ skel->progs.test_regs_change_ip,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ ret = uprobe_regs_change_ip_1();
+ ASSERT_EQ(ret, 0xdeadbeef, "ret");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static void test_uprobe_regs_change(void)
+{
+ if (test__start_subtest("regs_change_common"))
+ regs_common();
+ if (test__start_subtest("regs_change_ip"))
+ regs_ip();
+}
+#else
+static void test_uprobe_regs_change(void) { }
+#endif
+
+void test_uprobe(void)
+{
+ if (test__start_subtest("attach"))
+ test_uprobe_attach();
+ test_uprobe_regs_change();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..d5b3377aa33c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
+ int arg4, int arg5, int arg6,
+ int arg7, int arg8)
+{
+ asm volatile ("");
+ return arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + 1;
+}
+
+void test_uprobe_autoattach(void)
+{
+ const char *devnull_str = "/dev/null";
+ struct test_uprobe_autoattach *skel;
+ int trigger_ret;
+ FILE *devnull;
+
+ skel = test_uprobe_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_ret = autoattach_trigger_func(1, 2, 3, 4, 5, 6, 7, 8);
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen(devnull_str, "r");
+
+ ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uprobe_byname2_parm1, (__u64)(long)devnull_str,
+ "check_uprobe_byname2_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_rc, (__u64)(long)devnull,
+ "check_uretprobe_byname2_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+
+ ASSERT_EQ(skel->bss->a[0], 1, "arg1");
+ ASSERT_EQ(skel->bss->a[1], 2, "arg2");
+ ASSERT_EQ(skel->bss->a[2], 3, "arg3");
+#if FUNC_REG_ARG_CNT > 3
+ ASSERT_EQ(skel->bss->a[3], 4, "arg4");
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ ASSERT_EQ(skel->bss->a[4], 5, "arg5");
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ ASSERT_EQ(skel->bss->a[5], 6, "arg6");
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ ASSERT_EQ(skel->bss->a[6], 7, "arg7");
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ ASSERT_EQ(skel->bss->a[7], 8, "arg8");
+#endif
+
+ fclose(devnull);
+cleanup:
+ test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
new file mode 100644
index 000000000000..2ee17ef1dae2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -0,0 +1,1376 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <unistd.h>
+#include <pthread.h>
+#include <test_progs.h>
+#include "uprobe_multi.skel.h"
+#include "uprobe_multi_bench.skel.h"
+#include "uprobe_multi_usdt.skel.h"
+#include "uprobe_multi_consumers.skel.h"
+#include "uprobe_multi_pid_filter.skel.h"
+#include "uprobe_multi_session.skel.h"
+#include "uprobe_multi_session_single.skel.h"
+#include "uprobe_multi_session_cookie.skel.h"
+#include "uprobe_multi_session_recursive.skel.h"
+#include "uprobe_multi_verifier.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "testing_helpers.h"
+#include "../sdt.h"
+
+static char test_data[] = "test_data";
+
+noinline void uprobe_multi_func_1(void)
+{
+ asm volatile ("");
+}
+
+noinline void uprobe_multi_func_2(void)
+{
+ asm volatile ("");
+}
+
+noinline void uprobe_multi_func_3(void)
+{
+ asm volatile ("");
+}
+
+noinline void usdt_trigger(void)
+{
+ STAP_PROBE(test, pid_filter_usdt);
+}
+
+noinline void uprobe_session_recursive(int i)
+{
+ if (i)
+ uprobe_session_recursive(i - 1);
+}
+
+struct child {
+ int go[2];
+ int c2p[2]; /* child -> parent channel */
+ int pid;
+ int tid;
+ pthread_t thread;
+ char stack[65536];
+};
+
+static void release_child(struct child *child)
+{
+ int child_status;
+
+ if (!child)
+ return;
+ close(child->go[1]);
+ close(child->go[0]);
+ if (child->thread)
+ pthread_join(child->thread, NULL);
+ close(child->c2p[0]);
+ close(child->c2p[1]);
+ if (child->pid > 0)
+ waitpid(child->pid, &child_status, 0);
+}
+
+static void kick_child(struct child *child)
+{
+ char c = 1;
+
+ if (child) {
+ write(child->go[1], &c, 1);
+ release_child(child);
+ }
+ fflush(NULL);
+}
+
+static int child_func(void *arg)
+{
+ struct child *child = arg;
+ int err, c;
+
+ close(child->go[1]);
+
+ /* wait for parent's kick */
+ err = read(child->go[0], &c, 1);
+ if (err != 1)
+ exit(err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+
+ exit(errno);
+}
+
+static int spawn_child_flag(struct child *child, bool clone_vm)
+{
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child->go))
+ return -1;
+
+ if (clone_vm) {
+ child->pid = child->tid = clone(child_func, child->stack + sizeof(child->stack)/2,
+ CLONE_VM|SIGCHLD, child);
+ } else {
+ child->pid = child->tid = fork();
+ }
+ if (child->pid < 0) {
+ release_child(child);
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* fork-ed child */
+ if (!clone_vm && child->pid == 0)
+ child_func(child);
+
+ return 0;
+}
+
+static int spawn_child(struct child *child)
+{
+ return spawn_child_flag(child, false);
+}
+
+static void *child_thread(void *ctx)
+{
+ struct child *child = ctx;
+ int c = 0, err;
+
+ child->tid = sys_gettid();
+
+ /* let parent know we are ready */
+ err = write(child->c2p[1], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ /* wait for parent's kick */
+ err = read(child->go[0], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+
+ err = 0;
+ pthread_exit(&err);
+}
+
+static int spawn_thread(struct child *child)
+{
+ int c, err;
+
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child->go))
+ return -1;
+ /* pipe to notify parent that child thread is ready */
+ if (pipe(child->c2p)) {
+ close(child->go[0]);
+ close(child->go[1]);
+ return -1;
+ }
+
+ child->pid = getpid();
+
+ err = pthread_create(&child->thread, NULL, child_thread, child);
+ if (err) {
+ err = -errno;
+ close(child->go[0]);
+ close(child->go[1]);
+ close(child->c2p[0]);
+ close(child->c2p[1]);
+ errno = -err;
+ return -1;
+ }
+
+ err = read(child->c2p[0], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_thread_ready"))
+ return -1;
+
+ return 0;
+}
+
+static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
+{
+ skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
+ skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2;
+ skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3;
+
+ skel->bss->user_ptr = test_data;
+
+ /*
+ * Disable pid check in bpf program if we are pid filter test,
+ * because the probe should be executed only by child->pid
+ * passed at the probe attach.
+ */
+ skel->bss->pid = child ? 0 : getpid();
+ skel->bss->expect_pid = child ? child->pid : 0;
+
+ /* trigger all probes, if we are testing child *process*, just to make
+ * sure that PID filtering doesn't let through activations from wrong
+ * PIDs; when we test child *thread*, we don't want to do this to
+ * avoid double counting number of triggering events
+ */
+ if (!child || !child->thread) {
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+ }
+
+ if (child)
+ kick_child(child);
+
+ /*
+ * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
+ * function and each sleepable probe (6) increments uprobe_multi_sleep_result.
+ */
+ ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
+
+ ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
+
+ if (child) {
+ ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
+ }
+}
+
+static void test_skel_api(void)
+{
+ struct uprobe_multi *skel = NULL;
+ int err;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ err = uprobe_multi__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi__attach"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, NULL);
+
+cleanup:
+ uprobe_multi__destroy(skel);
+}
+
+static void
+__test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts,
+ struct child *child)
+{
+ pid_t pid = child ? child->pid : -1;
+ struct uprobe_multi *skel = NULL;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep,
+ pid, binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ /* Attach (uprobe-backed) USDTs */
+ skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
+ goto cleanup;
+
+ skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, child);
+
+ ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
+ if (child) {
+ ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
+ }
+cleanup:
+ uprobe_multi__destroy(skel);
+}
+
+static void
+test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts)
+{
+ static struct child child;
+
+ /* no pid filter */
+ __test_attach_api(binary, pattern, opts, NULL);
+
+ /* pid filter */
+ if (!ASSERT_OK(spawn_child(&child), "spawn_child"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, &child);
+
+ /* pid filter (thread) */
+ if (!ASSERT_OK(spawn_thread(&child), "spawn_thread"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, &child);
+}
+
+static void test_attach_api_pattern(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+
+ test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts);
+ test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts);
+}
+
+static void test_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_attach_api("/proc/self/exe", NULL, &opts);
+}
+
+static void test_attach_api_fails(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ struct uprobe_multi *skel = NULL;
+ int prog_fd, link_fd = -1;
+ unsigned long offset = 0;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+
+ /* abnormal cnt */
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = &offset;
+ opts.uprobe_multi.cnt = INT_MAX;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt"))
+ goto cleanup;
+
+ /* cnt is 0 */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero"))
+ goto cleanup;
+
+ /* negative offset */
+ offset = -1;
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = (unsigned long *) &offset;
+ opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative"))
+ goto cleanup;
+
+ /* offsets is NULL */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null"))
+ goto cleanup;
+
+ /* wrong offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong"))
+ goto cleanup;
+
+ /* path is NULL */
+ offset = 1;
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null"))
+ goto cleanup;
+
+ /* wrong path pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = (const char *) 1,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong"))
+ goto cleanup;
+
+ /* wrong path type */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = "/",
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type"))
+ goto cleanup;
+
+ /* wrong cookies pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) 1ULL,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong"))
+ goto cleanup;
+
+ /* wrong ref_ctr_offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) &offset,
+ .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong"))
+ goto cleanup;
+
+ /* wrong flags */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.flags = 1 << 31,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags"))
+ goto cleanup;
+
+ /* wrong pid */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ .uprobe_multi.pid = -2,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ uprobe_multi__destroy(skel);
+}
+
+#ifdef __x86_64__
+noinline void uprobe_multi_error_func(void)
+{
+ /*
+ * If --fcf-protection=branch is enabled the gcc generates endbr as
+ * first instruction, so marking the exact address of int3 with the
+ * symbol to be used in the attach_uprobe_fail_trap test below.
+ */
+ asm volatile (
+ ".globl uprobe_multi_error_func_int3; \n"
+ "uprobe_multi_error_func_int3: \n"
+ "int3 \n"
+ );
+}
+
+/*
+ * Attaching uprobe on uprobe_multi_error_func results in error
+ * because it already starts with int3 instruction.
+ */
+static void attach_uprobe_fail_trap(struct uprobe_multi *skel)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ const char *syms[4] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ "uprobe_multi_error_func_int3",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+
+ skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_ERR_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi")) {
+ bpf_link__destroy(skel->links.uprobe);
+ skel->links.uprobe = NULL;
+ }
+}
+#else
+static void attach_uprobe_fail_trap(struct uprobe_multi *skel) { }
+#endif
+
+short sema_1 __used, sema_2 __used;
+
+static void attach_uprobe_fail_refctr(struct uprobe_multi *skel)
+{
+ unsigned long *tmp_offsets = NULL, *tmp_ref_ctr_offsets = NULL;
+ unsigned long offsets[3], ref_ctr_offsets[3];
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ };
+ const char *sema[3] = {
+ "sema_1",
+ "sema_2",
+ };
+ int prog_fd, link_fd, err;
+
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 2, (const char **) &syms,
+ &tmp_offsets, STT_FUNC);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func"))
+ return;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 2, (const char **) &sema,
+ &tmp_ref_ctr_offsets, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema"))
+ goto cleanup;
+
+ /*
+ * We attach to 3 uprobes on 2 functions, so 2 uprobes share single function,
+ * but with different ref_ctr_offset which is not allowed and results in fail.
+ */
+ offsets[0] = tmp_offsets[0]; /* uprobe_multi_func_1 */
+ offsets[1] = tmp_offsets[1]; /* uprobe_multi_func_2 */
+ offsets[2] = tmp_offsets[1]; /* uprobe_multi_func_2 */
+
+ ref_ctr_offsets[0] = tmp_ref_ctr_offsets[0]; /* sema_1 */
+ ref_ctr_offsets[1] = tmp_ref_ctr_offsets[1]; /* sema_2 */
+ ref_ctr_offsets[2] = tmp_ref_ctr_offsets[0]; /* sema_1, error */
+
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = (const unsigned long *) &offsets;
+ opts.uprobe_multi.ref_ctr_offsets = (const unsigned long *) &ref_ctr_offsets;
+ opts.uprobe_multi.cnt = 3;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ close(link_fd);
+
+cleanup:
+ free(tmp_ref_ctr_offsets);
+ free(tmp_offsets);
+}
+
+static void test_attach_uprobe_fails(void)
+{
+ struct uprobe_multi *skel = NULL;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ return;
+
+ /* attach fails due to adding uprobe on trap instruction, x86_64 only */
+ attach_uprobe_fail_trap(skel);
+
+ /* attach fail due to wrong ref_ctr_offs on one of the uprobes */
+ attach_uprobe_fail_refctr(skel);
+
+ uprobe_multi__destroy(skel);
+}
+
+static void __test_link_api(struct child *child)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1;
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ struct uprobe_multi *skel = NULL;
+ unsigned long *offsets = NULL;
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+ int link_extra_fd = -1;
+ int err;
+
+ err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets, STT_FUNC);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets"))
+ return;
+
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = offsets;
+ opts.uprobe_multi.cnt = ARRAY_SIZE(syms);
+ opts.uprobe_multi.pid = child ? child->pid : 0;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe);
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.uretprobe);
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_sleep);
+ link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link3_fd, 0, "link3_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep);
+ link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link4_fd, 0, "link4_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ opts.uprobe_multi.pid = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+ link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, child);
+
+cleanup:
+ if (link1_fd >= 0)
+ close(link1_fd);
+ if (link2_fd >= 0)
+ close(link2_fd);
+ if (link3_fd >= 0)
+ close(link3_fd);
+ if (link4_fd >= 0)
+ close(link4_fd);
+ if (link_extra_fd >= 0)
+ close(link_extra_fd);
+
+ uprobe_multi__destroy(skel);
+ free(offsets);
+}
+
+static void test_link_api(void)
+{
+ static struct child child;
+
+ /* no pid filter */
+ __test_link_api(NULL);
+
+ /* pid filter */
+ if (!ASSERT_OK(spawn_child(&child), "spawn_child"))
+ return;
+
+ __test_link_api(&child);
+
+ /* pid filter (thread) */
+ if (!ASSERT_OK(spawn_thread(&child), "spawn_thread"))
+ return;
+
+ __test_link_api(&child);
+}
+
+static struct bpf_program *
+get_program(struct uprobe_multi_consumers *skel, int prog)
+{
+ switch (prog) {
+ case 0:
+ return skel->progs.uprobe_0;
+ case 1:
+ return skel->progs.uprobe_1;
+ case 2:
+ return skel->progs.uprobe_2;
+ case 3:
+ return skel->progs.uprobe_3;
+ default:
+ ASSERT_FAIL("get_program");
+ return NULL;
+ }
+}
+
+static struct bpf_link **
+get_link(struct uprobe_multi_consumers *skel, int link)
+{
+ switch (link) {
+ case 0:
+ return &skel->links.uprobe_0;
+ case 1:
+ return &skel->links.uprobe_1;
+ case 2:
+ return &skel->links.uprobe_2;
+ case 3:
+ return &skel->links.uprobe_3;
+ default:
+ ASSERT_FAIL("get_link");
+ return NULL;
+ }
+}
+
+static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx, unsigned long offset)
+{
+ struct bpf_program *prog = get_program(skel, idx);
+ struct bpf_link **link = get_link(skel, idx);
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+
+ if (!prog || !link)
+ return -1;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ /*
+ * bit/prog: 0 uprobe entry
+ * bit/prog: 1 uprobe return
+ * bit/prog: 2 uprobe session without return
+ * bit/prog: 3 uprobe session with return
+ */
+ opts.retprobe = idx == 1;
+ opts.session = idx == 2 || idx == 3;
+
+ *link = bpf_program__attach_uprobe_multi(prog, 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(*link, "bpf_program__attach_uprobe_multi"))
+ return -1;
+ return 0;
+}
+
+static void uprobe_detach(struct uprobe_multi_consumers *skel, int idx)
+{
+ struct bpf_link **link = get_link(skel, idx);
+
+ bpf_link__destroy(*link);
+ *link = NULL;
+}
+
+static bool test_bit(int bit, unsigned long val)
+{
+ return val & (1 << bit);
+}
+
+noinline int
+uprobe_consumer_test(struct uprobe_multi_consumers *skel,
+ unsigned long before, unsigned long after,
+ unsigned long offset)
+{
+ int idx;
+
+ /* detach uprobe for each unset programs in 'before' state ... */
+ for (idx = 0; idx < 4; idx++) {
+ if (test_bit(idx, before) && !test_bit(idx, after))
+ uprobe_detach(skel, idx);
+ }
+
+ /* ... and attach all new programs in 'after' state */
+ for (idx = 0; idx < 4; idx++) {
+ if (!test_bit(idx, before) && test_bit(idx, after)) {
+ if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_after"))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * We generate 16 consumer_testX functions that will have uprobe installed on
+ * and will be called in separate threads. All function pointer are stored in
+ * "consumers" section and each thread will pick one function based on index.
+ */
+
+extern const void *__start_consumers;
+
+#define __CONSUMER_TEST(func) \
+noinline int func(struct uprobe_multi_consumers *skel, unsigned long before, \
+ unsigned long after, unsigned long offset) \
+{ \
+ return uprobe_consumer_test(skel, before, after, offset); \
+} \
+void *__ ## func __used __attribute__((section("consumers"))) = (void *) func;
+
+#define CONSUMER_TEST(func) __CONSUMER_TEST(func)
+
+#define C1 CONSUMER_TEST(__PASTE(consumer_test, __COUNTER__))
+#define C4 C1 C1 C1 C1
+#define C16 C4 C4 C4 C4
+
+C16
+
+typedef int (*test_t)(struct uprobe_multi_consumers *, unsigned long,
+ unsigned long, unsigned long);
+
+static int consumer_test(struct uprobe_multi_consumers *skel,
+ unsigned long before, unsigned long after,
+ test_t test, unsigned long offset)
+{
+ int err, idx, ret = -1;
+
+ printf("consumer_test before %lu after %lu\n", before, after);
+
+ /* 'before' is each, we attach uprobe for every set idx */
+ for (idx = 0; idx < 4; idx++) {
+ if (test_bit(idx, before)) {
+ if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_before"))
+ goto cleanup;
+ }
+ }
+
+ err = test(skel, before, after, offset);
+ if (!ASSERT_EQ(err, 0, "uprobe_consumer_test"))
+ goto cleanup;
+
+ for (idx = 0; idx < 4; idx++) {
+ bool uret_stays, uret_survives;
+ const char *fmt = "BUG";
+ __u64 val = 0;
+
+ switch (idx) {
+ case 0:
+ /*
+ * uprobe entry
+ * +1 if define in 'before'
+ */
+ if (test_bit(idx, before))
+ val++;
+ fmt = "prog 0: uprobe";
+ break;
+ case 1:
+ /*
+ * To trigger uretprobe consumer, the uretprobe under test either stayed from
+ * before to after (uret_stays + test_bit) or uretprobe instance survived and
+ * we have uretprobe active in after (uret_survives + test_bit)
+ */
+ uret_stays = before & after & 0b0110;
+ uret_survives = ((before & 0b0110) && (after & 0b0110) && (before & 0b1001));
+
+ if ((uret_stays || uret_survives) && test_bit(idx, after))
+ val++;
+ fmt = "prog 1: uretprobe";
+ break;
+ case 2:
+ /*
+ * session with return
+ * +1 if defined in 'before'
+ * +1 if defined in 'after'
+ */
+ if (test_bit(idx, before)) {
+ val++;
+ if (test_bit(idx, after))
+ val++;
+ }
+ fmt = "prog 2: session with return";
+ break;
+ case 3:
+ /*
+ * session without return
+ * +1 if defined in 'before'
+ */
+ if (test_bit(idx, before))
+ val++;
+ fmt = "prog 3: session with NO return";
+ break;
+ }
+
+ if (!ASSERT_EQ(skel->bss->uprobe_result[idx], val, fmt))
+ goto cleanup;
+ skel->bss->uprobe_result[idx] = 0;
+ }
+
+ ret = 0;
+
+cleanup:
+ for (idx = 0; idx < 4; idx++)
+ uprobe_detach(skel, idx);
+ return ret;
+}
+
+#define CONSUMER_MAX 16
+
+/*
+ * Each thread runs 1/16 of the load by running test for single
+ * 'before' number (based on thread index) and full scale of
+ * 'after' numbers.
+ */
+static void *consumer_thread(void *arg)
+{
+ unsigned long idx = (unsigned long) arg;
+ struct uprobe_multi_consumers *skel;
+ unsigned long offset;
+ const void *func;
+ int after;
+
+ skel = uprobe_multi_consumers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_consumers__open_and_load"))
+ return NULL;
+
+ func = *((&__start_consumers) + idx);
+
+ offset = get_uprobe_offset(func);
+ if (!ASSERT_GE(offset, 0, "uprobe_offset"))
+ goto out;
+
+ for (after = 0; after < CONSUMER_MAX; after++)
+ if (consumer_test(skel, idx, after, func, offset))
+ goto out;
+
+out:
+ uprobe_multi_consumers__destroy(skel);
+ return NULL;
+}
+
+
+static void test_consumers(void)
+{
+ pthread_t pt[CONSUMER_MAX];
+ unsigned long idx;
+ int err;
+
+ /*
+ * The idea of this test is to try all possible combinations of
+ * uprobes consumers attached on single function.
+ *
+ * - 1 uprobe entry consumer
+ * - 1 uprobe exit consumer
+ * - 1 uprobe session with return
+ * - 1 uprobe session without return
+ *
+ * The test uses 4 uprobes attached on single function, but that
+ * translates into single uprobe with 4 consumers in kernel.
+ *
+ * The before/after values present the state of attached consumers
+ * before and after the probed function:
+ *
+ * bit/prog 0 : uprobe entry
+ * bit/prog 1 : uprobe return
+ *
+ * For example for:
+ *
+ * before = 0b01
+ * after = 0b10
+ *
+ * it means that before we call 'uprobe_consumer_test' we attach
+ * uprobes defined in 'before' value:
+ *
+ * - bit/prog 1: uprobe entry
+ *
+ * uprobe_consumer_test is called and inside it we attach and detach
+ * uprobes based on 'after' value:
+ *
+ * - bit/prog 0: is detached
+ * - bit/prog 1: is attached
+ *
+ * uprobe_consumer_test returns and we check counters values increased
+ * by bpf programs on each uprobe to match the expected count based on
+ * before/after bits.
+ */
+
+ for (idx = 0; idx < CONSUMER_MAX; idx++) {
+ err = pthread_create(&pt[idx], NULL, consumer_thread, (void *) idx);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (idx)
+ pthread_join(pt[--idx], NULL);
+}
+
+static struct bpf_program *uprobe_multi_program(struct uprobe_multi_pid_filter *skel, int idx)
+{
+ switch (idx) {
+ case 0: return skel->progs.uprobe_multi_0;
+ case 1: return skel->progs.uprobe_multi_1;
+ case 2: return skel->progs.uprobe_multi_2;
+ }
+ return NULL;
+}
+
+#define TASKS 3
+
+static void run_pid_filter(struct uprobe_multi_pid_filter *skel, bool clone_vm, bool retprobe)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, .retprobe = retprobe);
+ struct bpf_link *link[TASKS] = {};
+ struct child child[TASKS] = {};
+ int i;
+
+ memset(skel->bss->test, 0, sizeof(skel->bss->test));
+
+ for (i = 0; i < TASKS; i++) {
+ if (!ASSERT_OK(spawn_child_flag(&child[i], clone_vm), "spawn_child"))
+ goto cleanup;
+ skel->bss->pids[i] = child[i].pid;
+ }
+
+ for (i = 0; i < TASKS; i++) {
+ link[i] = bpf_program__attach_uprobe_multi(uprobe_multi_program(skel, i),
+ child[i].pid, "/proc/self/exe",
+ "uprobe_multi_func_1", &opts);
+ if (!ASSERT_OK_PTR(link[i], "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+ }
+
+ for (i = 0; i < TASKS; i++)
+ kick_child(&child[i]);
+
+ for (i = 0; i < TASKS; i++) {
+ ASSERT_EQ(skel->bss->test[i][0], 1, "pid");
+ ASSERT_EQ(skel->bss->test[i][1], 0, "unknown");
+ }
+
+cleanup:
+ for (i = 0; i < TASKS; i++)
+ bpf_link__destroy(link[i]);
+ for (i = 0; i < TASKS; i++)
+ release_child(&child[i]);
+}
+
+static void test_pid_filter_process(bool clone_vm)
+{
+ struct uprobe_multi_pid_filter *skel;
+
+ skel = uprobe_multi_pid_filter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_pid_filter__open_and_load"))
+ return;
+
+ run_pid_filter(skel, clone_vm, false);
+ run_pid_filter(skel, clone_vm, true);
+
+ uprobe_multi_pid_filter__destroy(skel);
+}
+
+static void test_session_skel_api(void)
+{
+ struct uprobe_multi_session *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct bpf_link *link = NULL;
+ int err;
+
+ skel = uprobe_multi_session__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_session__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->user_ptr = test_data;
+
+ err = uprobe_multi_session__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_session__attach"))
+ goto cleanup;
+
+ /* trigger all probes */
+ skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
+ skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2;
+ skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3;
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ /*
+ * We expect 2 for uprobe_multi_func_2 because it runs both entry/return probe,
+ * uprobe_multi_func_[13] run just the entry probe. All expected numbers are
+ * doubled, because we run extra test for sleepable session.
+ */
+ ASSERT_EQ(skel->bss->uprobe_session_result[0], 2, "uprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uprobe_session_result[1], 4, "uprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uprobe_session_result[2], 2, "uprobe_multi_func_3_result");
+
+ /* We expect increase in 3 entry and 1 return session calls -> 4 */
+ ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 4, "uprobe_multi_sleep_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ uprobe_multi_session__destroy(skel);
+}
+
+static void test_session_single_skel_api(void)
+{
+ struct uprobe_multi_session_single *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ int err;
+
+ skel = uprobe_multi_session_single__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_single__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ err = uprobe_multi_session_single__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_session_single__attach"))
+ goto cleanup;
+
+ uprobe_multi_func_1();
+
+ /*
+ * We expect consumer 0 and 2 to trigger just entry handler (value 1)
+ * and consumer 1 to hit both (value 2).
+ */
+ ASSERT_EQ(skel->bss->uprobe_session_result[0], 1, "uprobe_session_result_0");
+ ASSERT_EQ(skel->bss->uprobe_session_result[1], 2, "uprobe_session_result_1");
+ ASSERT_EQ(skel->bss->uprobe_session_result[2], 1, "uprobe_session_result_2");
+
+cleanup:
+ uprobe_multi_session_single__destroy(skel);
+}
+
+static void test_session_cookie_skel_api(void)
+{
+ struct uprobe_multi_session_cookie *skel = NULL;
+ int err;
+
+ skel = uprobe_multi_session_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_cookie__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ err = uprobe_multi_session_cookie__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_session_cookie__attach"))
+ goto cleanup;
+
+ /* trigger all probes */
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ ASSERT_EQ(skel->bss->test_uprobe_1_result, 1, "test_uprobe_1_result");
+ ASSERT_EQ(skel->bss->test_uprobe_2_result, 2, "test_uprobe_2_result");
+ ASSERT_EQ(skel->bss->test_uprobe_3_result, 3, "test_uprobe_3_result");
+
+cleanup:
+ uprobe_multi_session_cookie__destroy(skel);
+}
+
+static void test_session_recursive_skel_api(void)
+{
+ struct uprobe_multi_session_recursive *skel = NULL;
+ int i, err;
+
+ skel = uprobe_multi_session_recursive__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_recursive__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ err = uprobe_multi_session_recursive__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_session_recursive__attach"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(skel->bss->test_uprobe_cookie_entry); i++)
+ skel->bss->test_uprobe_cookie_entry[i] = i + 1;
+
+ uprobe_session_recursive(5);
+
+ /*
+ * entry uprobe:
+ * uprobe_session_recursive(5) { *cookie = 1, return 0
+ * uprobe_session_recursive(4) { *cookie = 2, return 1
+ * uprobe_session_recursive(3) { *cookie = 3, return 0
+ * uprobe_session_recursive(2) { *cookie = 4, return 1
+ * uprobe_session_recursive(1) { *cookie = 5, return 0
+ * uprobe_session_recursive(0) { *cookie = 6, return 1
+ * return uprobe:
+ * } i = 0 not executed
+ * } i = 1 test_uprobe_cookie_return[0] = 5
+ * } i = 2 not executed
+ * } i = 3 test_uprobe_cookie_return[1] = 3
+ * } i = 4 not executed
+ * } i = 5 test_uprobe_cookie_return[2] = 1
+ */
+
+ ASSERT_EQ(skel->bss->idx_entry, 6, "idx_entry");
+ ASSERT_EQ(skel->bss->idx_return, 3, "idx_return");
+
+ ASSERT_EQ(skel->bss->test_uprobe_cookie_return[0], 5, "test_uprobe_cookie_return[0]");
+ ASSERT_EQ(skel->bss->test_uprobe_cookie_return[1], 3, "test_uprobe_cookie_return[1]");
+ ASSERT_EQ(skel->bss->test_uprobe_cookie_return[2], 1, "test_uprobe_cookie_return[2]");
+
+cleanup:
+ uprobe_multi_session_recursive__destroy(skel);
+}
+
+static void test_bench_attach_uprobe(void)
+{
+ long attach_start_ns = 0, attach_end_ns = 0;
+ struct uprobe_multi_bench *skel = NULL;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+ int err;
+
+ skel = uprobe_multi_bench__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load"))
+ goto cleanup;
+
+ attach_start_ns = get_time_ns();
+
+ err = uprobe_multi_bench__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_bench__attach"))
+ goto cleanup;
+
+ attach_end_ns = get_time_ns();
+
+ system("./uprobe_multi bench");
+
+ ASSERT_EQ(skel->bss->count, 50000, "uprobes_count");
+
+cleanup:
+ detach_start_ns = get_time_ns();
+ uprobe_multi_bench__destroy(skel);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+static void test_bench_attach_usdt(void)
+{
+ long attach_start_ns = 0, attach_end_ns = 0;
+ struct uprobe_multi_usdt *skel = NULL;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+
+ skel = uprobe_multi_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open"))
+ goto cleanup;
+
+ attach_start_ns = get_time_ns();
+
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi",
+ "test", "usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ attach_end_ns = get_time_ns();
+
+ system("./uprobe_multi usdt");
+
+ ASSERT_EQ(skel->bss->count, 50000, "usdt_count");
+
+cleanup:
+ detach_start_ns = get_time_ns();
+ uprobe_multi_usdt__destroy(skel);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+void test_uprobe_multi_test(void)
+{
+ if (test__start_subtest("skel_api"))
+ test_skel_api();
+ if (test__start_subtest("attach_api_pattern"))
+ test_attach_api_pattern();
+ if (test__start_subtest("attach_api_syms"))
+ test_attach_api_syms();
+ if (test__start_subtest("link_api"))
+ test_link_api();
+ if (test__start_subtest("bench_uprobe"))
+ test_bench_attach_uprobe();
+ if (test__start_subtest("bench_usdt"))
+ test_bench_attach_usdt();
+ if (test__start_subtest("attach_api_fails"))
+ test_attach_api_fails();
+ if (test__start_subtest("attach_uprobe_fails"))
+ test_attach_uprobe_fails();
+ if (test__start_subtest("consumers"))
+ test_consumers();
+ if (test__start_subtest("filter_fork"))
+ test_pid_filter_process(false);
+ if (test__start_subtest("filter_clone_vm"))
+ test_pid_filter_process(true);
+ if (test__start_subtest("session"))
+ test_session_skel_api();
+ if (test__start_subtest("session_single"))
+ test_session_single_skel_api();
+ if (test__start_subtest("session_cookie"))
+ test_session_cookie_skel_api();
+ if (test__start_subtest("session_cookie_recursive"))
+ test_session_recursive_skel_api();
+ RUN_TESTS(uprobe_multi_verifier);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
new file mode 100644
index 000000000000..955a37751b52
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#ifdef __x86_64__
+
+#include <unistd.h>
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+#include <linux/stringify.h>
+#include <linux/kernel.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <asm/prctl.h>
+#include "uprobe_syscall.skel.h"
+#include "uprobe_syscall_executed.skel.h"
+#include "bpf/libbpf_internal.h"
+
+#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00
+#include "usdt.h"
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void)
+{
+ asm volatile (
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */
+ "movq $0xdeadbeef, %rax\n"
+ "ret\n"
+ );
+}
+
+__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r15, 0(%rdi)\n"
+ "movq %r14, 8(%rdi)\n"
+ "movq %r13, 16(%rdi)\n"
+ "movq %r12, 24(%rdi)\n"
+ "movq %rbp, 32(%rdi)\n"
+ "movq %rbx, 40(%rdi)\n"
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+ "movq $0, 120(%rdi)\n" /* orig_rax */
+ "movq $0, 128(%rdi)\n" /* rip */
+ "movq $0, 136(%rdi)\n" /* cs */
+ "pushq %rax\n"
+ "pushf\n"
+ "pop %rax\n"
+ "movq %rax, 144(%rdi)\n" /* eflags */
+ "pop %rax\n"
+ "movq %rsp, 152(%rdi)\n" /* rsp */
+ "movq $0, 160(%rdi)\n" /* ss */
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uprobe_regs_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r15, 0(%rax)\n"
+ "movq %r14, 8(%rax)\n"
+ "movq %r13, 16(%rax)\n"
+ "movq %r12, 24(%rax)\n"
+ "movq %rbp, 32(%rax)\n"
+ "movq %rbx, 40(%rax)\n"
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+ "movq $0, 120(%rax)\n" /* orig_rax */
+ "movq $0, 128(%rax)\n" /* rip */
+ "movq $0, 136(%rax)\n" /* cs */
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+
+ "pushf\n"
+ "pop %rax\n"
+
+ "movq %rax, 144(%rsi)\n" /* eflags */
+ "movq %rsp, 152(%rsi)\n" /* rsp */
+ "movq $0, 160(%rsi)\n" /* ss */
+ "ret\n"
+);
+}
+
+static void test_uprobe_regs_equal(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = retprobe,
+ );
+ struct uprobe_syscall *skel = NULL;
+ struct pt_regs before = {}, after = {};
+ unsigned long *pb = (unsigned long *) &before;
+ unsigned long *pa = (unsigned long *) &after;
+ unsigned long *pp;
+ unsigned long offset;
+ unsigned int i, cnt;
+
+ offset = get_uprobe_offset(&uprobe_regs_trigger);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return;
+
+ skel = uprobe_syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load"))
+ goto cleanup;
+
+ skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ /* make sure uprobe gets optimized */
+ if (!retprobe)
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
+
+ pp = (unsigned long *) &skel->bss->regs;
+ cnt = sizeof(before)/sizeof(*pb);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned int offset = i * sizeof(unsigned long);
+
+ /*
+ * Check register before and after uprobe_regs_trigger call
+ * that triggers the uretprobe.
+ */
+ switch (offset) {
+ case offsetof(struct pt_regs, rax):
+ ASSERT_EQ(pa[i], 0xdeadbeef, "return value");
+ break;
+ default:
+ if (!ASSERT_EQ(pb[i], pa[i], "register before-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+
+ /*
+ * Check register seen from bpf program and register after
+ * uprobe_regs_trigger call (with rax exception, check below).
+ */
+ switch (offset) {
+ /*
+ * These values will be different (not set in uretprobe_regs),
+ * we don't care.
+ */
+ case offsetof(struct pt_regs, orig_rax):
+ case offsetof(struct pt_regs, rip):
+ case offsetof(struct pt_regs, cs):
+ case offsetof(struct pt_regs, rsp):
+ case offsetof(struct pt_regs, ss):
+ break;
+ /*
+ * uprobe does not see return value in rax, it needs to see the
+ * original (before) rax value
+ */
+ case offsetof(struct pt_regs, rax):
+ if (!retprobe) {
+ ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check");
+ break;
+ }
+ default:
+ if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+ }
+
+cleanup:
+ uprobe_syscall__destroy(skel);
+}
+
+#define BPF_TESTMOD_UPROBE_TEST_FILE "/sys/kernel/bpf_testmod_uprobe"
+
+static int write_bpf_testmod_uprobe(unsigned long offset)
+{
+ size_t n, ret;
+ char buf[30];
+ int fd;
+
+ n = sprintf(buf, "%lu", offset);
+
+ fd = open(BPF_TESTMOD_UPROBE_TEST_FILE, O_WRONLY);
+ if (fd < 0)
+ return -errno;
+
+ ret = write(fd, buf, n);
+ close(fd);
+ return ret != n ? (int) ret : 0;
+}
+
+static void test_regs_change(void)
+{
+ struct pt_regs before = {}, after = {};
+ unsigned long *pb = (unsigned long *) &before;
+ unsigned long *pa = (unsigned long *) &after;
+ unsigned long cnt = sizeof(before)/sizeof(*pb);
+ unsigned int i, err, offset;
+
+ offset = get_uprobe_offset(uprobe_regs_trigger);
+
+ err = write_bpf_testmod_uprobe(offset);
+ if (!ASSERT_OK(err, "register_uprobe"))
+ return;
+
+ /* make sure uprobe gets optimized */
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
+
+ err = write_bpf_testmod_uprobe(0);
+ if (!ASSERT_OK(err, "unregister_uprobe"))
+ return;
+
+ for (i = 0; i < cnt; i++) {
+ unsigned int offset = i * sizeof(unsigned long);
+
+ switch (offset) {
+ case offsetof(struct pt_regs, rax):
+ ASSERT_EQ(pa[i], 0x12345678deadbeef, "rax");
+ break;
+ case offsetof(struct pt_regs, rcx):
+ ASSERT_EQ(pa[i], 0x87654321feebdaed, "rcx");
+ break;
+ case offsetof(struct pt_regs, r11):
+ ASSERT_EQ(pa[i], (__u64) -1, "r11");
+ break;
+ default:
+ if (!ASSERT_EQ(pa[i], pb[i], "register before-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+ }
+}
+
+#ifndef __NR_uretprobe
+#define __NR_uretprobe 335
+#endif
+
+__naked unsigned long uretprobe_syscall_call_1(void)
+{
+ /*
+ * Pretend we are uretprobe trampoline to trigger the return
+ * probe invocation in order to verify we get SIGILL.
+ */
+ asm volatile (
+ "pushq %rax\n"
+ "pushq %rcx\n"
+ "pushq %r11\n"
+ "movq $" __stringify(__NR_uretprobe) ", %rax\n"
+ "syscall\n"
+ "popq %r11\n"
+ "popq %rcx\n"
+ "retq\n"
+ );
+}
+
+__naked unsigned long uretprobe_syscall_call(void)
+{
+ asm volatile (
+ "call uretprobe_syscall_call_1\n"
+ "retq\n"
+ );
+}
+
+static void test_uretprobe_syscall_call(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .retprobe = true,
+ );
+ struct uprobe_syscall_executed *skel;
+ int pid, status, err, go[2], c = 0;
+ struct bpf_link *link;
+
+ if (!ASSERT_OK(pipe(go), "pipe"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ goto cleanup;
+
+ /* child */
+ if (pid == 0) {
+ close(go[1]);
+
+ /* wait for parent's kick */
+ err = read(go[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ uretprobe_syscall_call();
+ _exit(0);
+ }
+
+ skel->bss->pid = pid;
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ pid, "/proc/self/exe",
+ "uretprobe_syscall_call", &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+ skel->links.test_uretprobe_multi = link;
+
+ /* kick the child */
+ write(go[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+
+ /* verify the child got killed with SIGILL */
+ ASSERT_EQ(WIFSIGNALED(status), 1, "WIFSIGNALED");
+ ASSERT_EQ(WTERMSIG(status), SIGILL, "WTERMSIG");
+
+ /* verify the uretprobe program wasn't called */
+ ASSERT_EQ(skel->bss->executed, 0, "executed");
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+ close(go[1]);
+ close(go[0]);
+}
+
+#define TRAMP "[uprobes-trampoline]"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked void uprobe_test(void)
+{
+ asm volatile (" \n"
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
+ "ret \n"
+ );
+}
+
+__attribute__((aligned(16)))
+__nocf_check __weak void usdt_test(void)
+{
+ USDT(optimized_uprobe, usdt);
+}
+
+static int find_uprobes_trampoline(void *tramp_addr)
+{
+ void *start, *end;
+ char line[128];
+ int ret = -1;
+ FILE *maps;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ fprintf(stderr, "cannot open maps\n");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ int m = -1;
+
+ /* We care only about private r-x mappings. */
+ if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2)
+ continue;
+ if (m < 0)
+ continue;
+ if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ fclose(maps);
+ return ret;
+}
+
+static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_nop5(void *fn)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (!memcmp(nop5, fn + i, 5))
+ return fn + i;
+ }
+ return NULL;
+}
+
+typedef void (__attribute__((nocf_check)) *trigger_t)(void);
+
+static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger,
+ void *addr, int executed)
+{
+ struct __arch_relative_insn {
+ __u8 op;
+ __s32 raddr;
+ } __packed *call;
+ void *tramp = NULL;
+
+ /* Uprobe gets optimized after first trigger, so let's press twice. */
+ trigger();
+ trigger();
+
+ /* Make sure bpf program got executed.. */
+ ASSERT_EQ(skel->bss->executed, executed, "executed");
+
+ /* .. and check the trampoline is as expected. */
+ call = (struct __arch_relative_insn *) addr;
+ tramp = (void *) (call + 1) + call->raddr;
+ ASSERT_EQ(call->op, 0xe8, "call");
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+
+ return tramp;
+}
+
+static void check_detach(void *addr, void *tramp)
+{
+ /* [uprobes_trampoline] stays after detach */
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+ ASSERT_OK(memcmp(addr, nop5, 5), "nop5");
+}
+
+static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
+ trigger_t trigger, void *addr, int executed)
+{
+ void *tramp;
+
+ tramp = check_attach(skel, trigger, addr, executed);
+ bpf_link__destroy(link);
+ check_detach(addr, tramp);
+}
+
+static void test_uprobe_legacy(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ /* uprobe */
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe */
+ skel->bss->executed = 0;
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_multi(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ /* uprobe.multi */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe.multi */
+ skel->bss->executed = 0;
+ opts.retprobe = true;
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_session(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .session = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 4);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_usdt(void)
+{
+ struct uprobe_syscall_executed *skel;
+ struct bpf_link *link;
+ void *addr;
+
+ errno = 0;
+ addr = find_nop5(usdt_test);
+ if (!ASSERT_OK_PTR(addr, "find_nop5"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_usdt(skel->progs.test_usdt,
+ -1 /* all PIDs */, "/proc/self/exe",
+ "optimized_uprobe", "usdt", NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ check(skel, link, usdt_test, addr, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+/*
+ * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c.
+ *
+ * For use in inline enablement of shadow stack.
+ *
+ * The program can't return from the point where shadow stack gets enabled
+ * because there will be no address on the shadow stack. So it can't use
+ * syscall() for enablement, since it is a function.
+ *
+ * Based on code from nolibc.h. Keep a copy here because this can't pull
+ * in all of nolibc.h.
+ */
+#define ARCH_PRCTL(arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = __NR_arch_prctl; \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#ifndef ARCH_SHSTK_ENABLE
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#endif
+
+static void test_uretprobe_shadow_stack(void)
+{
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ test__skip();
+ return;
+ }
+
+ /* Run all the tests with shadow stack in place. */
+
+ test_uprobe_regs_equal(false);
+ test_uprobe_regs_equal(true);
+ test_uretprobe_syscall_call();
+
+ test_uprobe_legacy();
+ test_uprobe_multi();
+ test_uprobe_session();
+ test_uprobe_usdt();
+
+ test_regs_change();
+
+ ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+}
+
+static volatile bool race_stop;
+
+static USDT_DEFINE_SEMA(race);
+
+static void *worker_trigger(void *arg)
+{
+ unsigned long rounds = 0;
+
+ while (!race_stop) {
+ uprobe_test();
+ rounds++;
+ }
+
+ printf("tid %ld trigger rounds: %lu\n", sys_gettid(), rounds);
+ return NULL;
+}
+
+static void *worker_attach(void *arg)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct uprobe_syscall_executed *skel;
+ unsigned long rounds = 0, offset;
+ const char *sema[2] = {
+ __stringify(USDT_SEMA(race)),
+ NULL,
+ };
+ unsigned long *ref;
+ int err;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return NULL;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema"))
+ return NULL;
+
+ opts.ref_ctr_offset = *ref;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ while (!race_stop) {
+ skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts"))
+ break;
+
+ bpf_link__destroy(skel->links.test_uprobe);
+ skel->links.test_uprobe = NULL;
+ rounds++;
+ }
+
+ printf("tid %ld attach rounds: %lu hits: %d\n", sys_gettid(), rounds, skel->bss->executed);
+ uprobe_syscall_executed__destroy(skel);
+ free(ref);
+ return NULL;
+}
+
+static useconds_t race_msec(void)
+{
+ char *env;
+
+ env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC");
+ if (env)
+ return atoi(env);
+
+ /* default duration is 500ms */
+ return 500;
+}
+
+static void test_uprobe_race(void)
+{
+ int err, i, nr_threads;
+ pthread_t *threads;
+
+ nr_threads = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus"))
+ return;
+ nr_threads = max(2, nr_threads);
+
+ threads = alloca(sizeof(*threads) * nr_threads);
+ if (!ASSERT_OK_PTR(threads, "malloc"))
+ return;
+
+ for (i = 0; i < nr_threads; i++) {
+ err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach,
+ NULL);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto cleanup;
+ }
+
+ usleep(race_msec() * 1000);
+
+cleanup:
+ race_stop = true;
+ for (nr_threads = i, i = 0; i < nr_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore");
+}
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
+#endif
+
+static void test_uprobe_error(void)
+{
+ long err = syscall(__NR_uprobe);
+
+ ASSERT_EQ(err, -1, "error");
+ ASSERT_EQ(errno, ENXIO, "errno");
+}
+
+static void __test_uprobe_syscall(void)
+{
+ if (test__start_subtest("uretprobe_regs_equal"))
+ test_uprobe_regs_equal(true);
+ if (test__start_subtest("uretprobe_syscall_call"))
+ test_uretprobe_syscall_call();
+ if (test__start_subtest("uretprobe_shadow_stack"))
+ test_uretprobe_shadow_stack();
+ if (test__start_subtest("uprobe_legacy"))
+ test_uprobe_legacy();
+ if (test__start_subtest("uprobe_multi"))
+ test_uprobe_multi();
+ if (test__start_subtest("uprobe_session"))
+ test_uprobe_session();
+ if (test__start_subtest("uprobe_usdt"))
+ test_uprobe_usdt();
+ if (test__start_subtest("uprobe_race"))
+ test_uprobe_race();
+ if (test__start_subtest("uprobe_error"))
+ test_uprobe_error();
+ if (test__start_subtest("uprobe_regs_equal"))
+ test_uprobe_regs_equal(false);
+ if (test__start_subtest("regs_change"))
+ test_regs_change();
+}
+#else
+static void __test_uprobe_syscall(void)
+{
+ test__skip();
+}
+#endif
+
+void test_uprobe_syscall(void)
+{
+ __test_uprobe_syscall();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c
new file mode 100644
index 000000000000..6deb8d560ddd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "uretprobe_stack.skel.h"
+#include "../sdt.h"
+
+/* We set up target_1() -> target_2() -> target_3() -> target_4() -> USDT()
+ * call chain, each being traced by our BPF program. On entry or return from
+ * each target_*() we are capturing user stack trace and recording it in
+ * global variable, so that user space part of the test can validate it.
+ *
+ * Note, we put each target function into a custom section to get those
+ * __start_XXX/__stop_XXX symbols, generated by linker for us, which allow us
+ * to know address range of those functions
+ */
+__attribute__((section("uprobe__target_4")))
+__weak int target_4(void)
+{
+ STAP_PROBE1(uretprobe_stack, target, 42);
+ return 42;
+}
+
+extern const void *__start_uprobe__target_4;
+extern const void *__stop_uprobe__target_4;
+
+__attribute__((section("uprobe__target_3")))
+__weak int target_3(void)
+{
+ return target_4();
+}
+
+extern const void *__start_uprobe__target_3;
+extern const void *__stop_uprobe__target_3;
+
+__attribute__((section("uprobe__target_2")))
+__weak int target_2(void)
+{
+ return target_3();
+}
+
+extern const void *__start_uprobe__target_2;
+extern const void *__stop_uprobe__target_2;
+
+__attribute__((section("uprobe__target_1")))
+__weak int target_1(int depth)
+{
+ if (depth < 1)
+ return 1 + target_1(depth + 1);
+ else
+ return target_2();
+}
+
+extern const void *__start_uprobe__target_1;
+extern const void *__stop_uprobe__target_1;
+
+extern const void *__start_uretprobe_stack_sec;
+extern const void *__stop_uretprobe_stack_sec;
+
+struct range {
+ long start;
+ long stop;
+};
+
+static struct range targets[] = {
+ {}, /* we want target_1 to map to target[1], so need 1-based indexing */
+ { (long)&__start_uprobe__target_1, (long)&__stop_uprobe__target_1 },
+ { (long)&__start_uprobe__target_2, (long)&__stop_uprobe__target_2 },
+ { (long)&__start_uprobe__target_3, (long)&__stop_uprobe__target_3 },
+ { (long)&__start_uprobe__target_4, (long)&__stop_uprobe__target_4 },
+};
+
+static struct range caller = {
+ (long)&__start_uretprobe_stack_sec,
+ (long)&__stop_uretprobe_stack_sec,
+};
+
+static void validate_stack(__u64 *ips, int stack_len, int cnt, ...)
+{
+ int i, j;
+ va_list args;
+
+ if (!ASSERT_GT(stack_len, 0, "stack_len"))
+ return;
+
+ stack_len /= 8;
+
+ /* check if we have enough entries to satisfy test expectations */
+ if (!ASSERT_GE(stack_len, cnt, "stack_len2"))
+ return;
+
+ if (env.verbosity >= VERBOSE_NORMAL) {
+ printf("caller: %#lx - %#lx\n", caller.start, caller.stop);
+ for (i = 1; i < ARRAY_SIZE(targets); i++)
+ printf("target_%d: %#lx - %#lx\n", i, targets[i].start, targets[i].stop);
+ for (i = 0; i < stack_len; i++) {
+ for (j = 1; j < ARRAY_SIZE(targets); j++) {
+ if (ips[i] >= targets[j].start && ips[i] < targets[j].stop)
+ break;
+ }
+ if (j < ARRAY_SIZE(targets)) { /* found target match */
+ printf("ENTRY #%d: %#lx (in target_%d)\n", i, (long)ips[i], j);
+ } else if (ips[i] >= caller.start && ips[i] < caller.stop) {
+ printf("ENTRY #%d: %#lx (in caller)\n", i, (long)ips[i]);
+ } else {
+ printf("ENTRY #%d: %#lx\n", i, (long)ips[i]);
+ }
+ }
+ }
+
+ va_start(args, cnt);
+
+ for (i = cnt - 1; i >= 0; i--) {
+ /* most recent entry is the deepest target function */
+ const struct range *t = va_arg(args, const struct range *);
+
+ ASSERT_GE(ips[i], t->start, "addr_start");
+ ASSERT_LT(ips[i], t->stop, "addr_stop");
+ }
+
+ va_end(args);
+}
+
+/* __weak prevents inlining */
+__attribute__((section("uretprobe_stack_sec")))
+__weak void test_uretprobe_stack(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct uretprobe_stack *skel;
+ int err;
+
+ skel = uretprobe_stack__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = uretprobe_stack__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ ASSERT_EQ(target_1(0), 42 + 1, "trigger_return");
+
+ /*
+ * Stacks captured on ENTRY uprobes
+ */
+
+ /* (uprobe 1) target_1 in stack trace*/
+ validate_stack(skel->bss->entry_stack1, skel->bss->entry1_len,
+ 2, &caller, &targets[1]);
+ /* (uprobe 1, recursed) */
+ validate_stack(skel->bss->entry_stack1_recur, skel->bss->entry1_recur_len,
+ 3, &caller, &targets[1], &targets[1]);
+ /* (uprobe 2) caller -> target_1 -> target_1 -> target_2 */
+ validate_stack(skel->bss->entry_stack2, skel->bss->entry2_len,
+ 4, &caller, &targets[1], &targets[1], &targets[2]);
+ /* (uprobe 3) */
+ validate_stack(skel->bss->entry_stack3, skel->bss->entry3_len,
+ 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]);
+ /* (uprobe 4) caller -> target_1 -> target_1 -> target_2 -> target_3 -> target_4 */
+ validate_stack(skel->bss->entry_stack4, skel->bss->entry4_len,
+ 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]);
+
+ /* (USDT): full caller -> target_1 -> target_1 -> target_2 (uretprobed)
+ * -> target_3 -> target_4 (uretprobes) chain
+ */
+ validate_stack(skel->bss->usdt_stack, skel->bss->usdt_len,
+ 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]);
+
+ /*
+ * Now stacks captured on the way out in EXIT uprobes
+ */
+
+ /* (uretprobe 4) everything up to target_4, but excluding it */
+ validate_stack(skel->bss->exit_stack4, skel->bss->exit4_len,
+ 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]);
+ /* we didn't install uretprobes on target_2 and target_3 */
+ /* (uretprobe 1, recur) first target_1 call only */
+ validate_stack(skel->bss->exit_stack1_recur, skel->bss->exit1_recur_len,
+ 2, &caller, &targets[1]);
+ /* (uretprobe 1) just a caller in the stack trace */
+ validate_stack(skel->bss->exit_stack1, skel->bss->exit1_len,
+ 1, &caller);
+
+cleanup:
+ uretprobe_stack__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..f4be5269fa90
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, -4};
+
+static volatile struct {
+ int x;
+ signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+ long y = 42;
+
+ if (test_usdt0_semaphore)
+ STAP_PROBE(test, usdt0);
+ if (test_usdt3_semaphore)
+ STAP_PROBE3(test, usdt3, x, y, &bla);
+ if (test_usdt12_semaphore) {
+ STAP_PROBE12(test, usdt12,
+ x, x + 1, y, x + y, 5,
+ y / 7, bla, &bla, -9, nums[x],
+ nums[idx], t1.y);
+ }
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+/*
+ * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
+ * - 'size' is the size in bytes of the array element, and its sign indicates
+ * whether the type is signed (negative) or unsigned (positive).
+ * - 'base_reg' is the register holding the base address, normally rdx or edx
+ * - 'index_reg' is the register holding the index, normally rax or eax
+ * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
+ * size of the element type.
+ *
+ * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
+ * - size: -2 (negative because 'short' is signed)
+ * - scale: 2 (since sizeof(short) == 2)
+ *
+ * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
+ */
+static volatile short array[] = {-1, -2, -3, -4};
+
+#if defined(__x86_64__)
+#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
+#else
+#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
+#endif
+
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+
+static void trigger_sib_spec(void)
+{
+ /*
+ * Force SIB addressing with inline assembly.
+ *
+ * You must compile with -std=gnu99 or -std=c99 to use the
+ * STAP_PROBE_ASM macro.
+ *
+ * The STAP_PROBE_ASM macro generates a quoted string that gets
+ * inserted between the surrounding assembly instructions. In this
+ * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
+ * stream, creating a probe point between the asm statement boundaries.
+ * It works fine with gcc/clang.
+ *
+ * Register constraints:
+ * - "d"(array): Binds the 'array' variable to %rdx or %edx register
+ * - "a"(0): Binds the constant 0 to %rax or %eax register
+ * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
+ * %%rax(%eax), they contain the expected values for SIB addressing.
+ *
+ * The "memory" clobber prevents the compiler from reordering memory
+ * accesses around the probe point, ensuring that the probe behavior
+ * is predictable and consistent.
+ */
+ asm volatile(
+ STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
+ :
+ : "d"(array), "a"(0)
+ : "memory"
+ );
+}
+#endif
+
+static void subtest_basic_usdt(bool optimized)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err, i, called;
+ const __u64 expected_cookie = 0xcafedeadbeeffeed;
+
+#define TRIGGER(x) ({ \
+ trigger_func(x); \
+ if (optimized) \
+ trigger_func(x); \
+ optimized ? 2 : 1; \
+ })
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt0 won't be auto-attached */
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt0", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+ goto cleanup;
+
+#if defined(__x86_64__) || defined(__i386__)
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt_sib", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+ goto cleanup;
+#endif
+
+ called = TRIGGER(1);
+
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
+
+ ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+ ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+ ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
+
+ /* auto-attached usdt3 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+ ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");
+
+ /* auto-attached usdt12 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+ ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+ ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+ ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+ ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+ ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+ ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+ ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+ ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+ ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+ int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };
+
+ for (i = 0; i < 12; i++)
+ ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");
+
+ /* trigger_func() is marked __always_inline, so USDT invocations will be
+ * inlined in two different places, meaning that each USDT will have
+ * at least 2 different places to be attached to. This verifies that
+ * bpf_program__attach_usdt() handles this properly and attaches to
+ * all possible places of USDT invocation.
+ */
+ called += TRIGGER(2);
+
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
+
+ /* only check values that depend on trigger_func()'s input value */
+ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+ ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+ /* detach and re-attach usdt3 */
+ bpf_link__destroy(skel->links.usdt3);
+
+ opts.usdt_cookie = 0xBADC00C51E;
+ skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+ "/proc/self/exe", "test", "usdt3", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+ goto cleanup;
+
+ called += TRIGGER(3);
+
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ /* this time usdt3 has custom cookie */
+ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+#if defined(__x86_64__) || defined(__i386__)
+ trigger_sib_spec();
+ ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+ ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+ ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+ ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+ ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+ ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
+cleanup:
+ test_usdt__destroy(skel);
+#undef TRIGGER
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+ F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+ R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+ STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+ R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+ STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+ R100(f300, 0);
+ R100(f300, 100);
+ R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+ STAP_PROBE1(test, usdt_400, 400);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+ R100(f400, 0);
+ R100(f400, 100);
+ R100(f400, 200);
+ R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err, i;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt_100 is auto-attached and there are 100 inlined call sites,
+ * let's validate that all of them are properly attached to and
+ * handled from BPF side
+ */
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+ /* Stress test free spec ID tracking. By default libbpf allows up to
+ * 256 specs to be used, so if we don't return free spec IDs back
+ * after few detachments and re-attachments we should run out of
+ * available spec IDs.
+ */
+ for (i = 0; i < 2; i++) {
+ bpf_link__destroy(skel->links.usdt_100);
+
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_100", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+ goto cleanup;
+
+ bss->usdt_100_sum = 0;
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+ }
+
+ /* Now let's step it up and try to attach USDT that requires more than
+ * 256 attach points with different specs for each.
+ * Note that we need trigger_300_usdts() only to actually have 300
+ * USDT call sites, we are not going to actually trace them.
+ */
+ trigger_300_usdts();
+
+ bpf_link__destroy(skel->links.usdt_100);
+
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ /* If built with arm64/clang, there will be much less number of specs
+ * for usdt_300 call sites.
+ */
+#if !defined(__aarch64__) || !defined(__clang__)
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+ "test", "usdt_300", NULL);
+ err = -errno;
+ if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+ goto cleanup;
+ ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+ /* let's check that there are no "dangling" BPF programs attached due
+ * to partial success of the above test:usdt_300 attachment
+ */
+ f300(777); /* this is 301st instance of usdt_300 */
+
+ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+ ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+#endif
+
+ /* This time we have USDT with 400 inlined invocations, but arg specs
+ * should be the same across all sites, so libbpf will only need to
+ * use one spec and thus we'll be able to attach 400 uprobes
+ * successfully.
+ *
+ * Again, we are reusing usdt_100 BPF program.
+ */
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_400", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+ goto cleanup;
+
+ trigger_400_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+ ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+ struct test_urandom_usdt *skel;
+ struct test_urandom_usdt__bss *bss;
+ struct bpf_link *l;
+ FILE *urand_pipe = NULL;
+ int err, urand_pid = 0;
+
+ skel = test_urandom_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->urand_pid = urand_pid;
+
+ if (auto_attach) {
+ err = test_urandom_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_auto_attach"))
+ goto cleanup;
+ } else {
+ l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_with_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_with_sema = l;
+
+ }
+
+ /* trigger urandom_read USDTs */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+ ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+ ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+ ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_urandom_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+ if (test__start_subtest("basic"))
+ subtest_basic_usdt(false);
+#ifdef __x86_64__
+ if (test__start_subtest("basic_optimized"))
+ subtest_basic_usdt(true);
+#endif
+ if (test__start_subtest("multispec"))
+ subtest_multispec_usdt();
+ if (test__start_subtest("urand_auto_attach"))
+ subtest_urandom_usdt(true /* auto_attach */);
+ if (test__start_subtest("urand_pid_attach"))
+ subtest_urandom_usdt(false /* auto_attach */);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
new file mode 100644
index 000000000000..9fd3ae987321
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <linux/ring_buffer.h>
+#include <linux/build_bug.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include <uapi/linux/bpf.h>
+#include <unistd.h>
+
+#include "user_ringbuf_fail.skel.h"
+#include "user_ringbuf_success.skel.h"
+
+#include "../progs/test_user_ringbuf.h"
+
+static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
+static long c_ringbuf_size, c_max_entries;
+
+static void drain_current_samples(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples)
+{
+ int i, err = 0;
+
+ /* Write some number of samples to the ring buffer. */
+ for (i = 0; i < num_samples; i++) {
+ struct sample *entry;
+ int read;
+
+ entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry));
+ if (!entry) {
+ err = -errno;
+ goto done;
+ }
+
+ entry->pid = getpid();
+ entry->seq = i;
+ entry->value = i * i;
+
+ read = snprintf(entry->comm, sizeof(entry->comm), "%u", i);
+ if (read <= 0) {
+ /* Assert on the error path to avoid spamming logs with
+ * mostly success messages.
+ */
+ ASSERT_GT(read, 0, "snprintf_comm");
+ err = read;
+ user_ring_buffer__discard(ringbuf, entry);
+ goto done;
+ }
+
+ user_ring_buffer__submit(ringbuf, entry);
+ }
+
+done:
+ drain_current_samples();
+
+ return err;
+}
+
+static struct user_ringbuf_success *open_load_ringbuf_skel(void)
+{
+ struct user_ringbuf_success *skel;
+ int err;
+
+ skel = user_ringbuf_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size);
+ if (!ASSERT_OK(err, "set_max_entries"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size);
+ if (!ASSERT_OK(err, "set_max_entries"))
+ goto cleanup;
+
+ err = user_ringbuf_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ user_ringbuf_success__destroy(skel);
+ return NULL;
+}
+
+static void test_user_ringbuf_mappings(void)
+{
+ int err, rb_fd;
+ int page_size = getpagesize();
+ void *mmap_ptr;
+ struct user_ringbuf_success *skel;
+
+ skel = open_load_ringbuf_skel();
+ if (!skel)
+ return;
+
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+ /* cons_pos can be mapped R/O, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos");
+ err = -errno;
+ ASSERT_ERR(err, "wr_prod_pos_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons");
+
+ /* prod_pos can be mapped RW, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ rb_fd, page_size);
+ ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect");
+ err = -errno;
+ ASSERT_ERR(err, "wr_prod_pos_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod");
+
+ /* data pages can be mapped RW, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd,
+ 2 * page_size);
+ ASSERT_OK_PTR(mmap_ptr, "rw_data");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect");
+ err = -errno;
+ ASSERT_ERR(err, "exec_data_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data");
+
+ user_ringbuf_success__destroy(skel);
+}
+
+static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out,
+ struct ring_buffer **kern_ringbuf_out,
+ ring_buffer_sample_fn callback,
+ struct user_ring_buffer **user_ringbuf_out)
+{
+ struct user_ringbuf_success *skel;
+ struct ring_buffer *kern_ringbuf = NULL;
+ struct user_ring_buffer *user_ringbuf = NULL;
+ int err = -ENOMEM, rb_fd;
+
+ skel = open_load_ringbuf_skel();
+ if (!skel)
+ return err;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ if (kern_ringbuf_out) {
+ rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf);
+ kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL);
+ if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create"))
+ goto cleanup;
+
+ *kern_ringbuf_out = kern_ringbuf;
+ }
+
+ if (user_ringbuf_out) {
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+ user_ringbuf = user_ring_buffer__new(rb_fd, NULL);
+ if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create"))
+ goto cleanup;
+
+ *user_ringbuf_out = user_ringbuf;
+ ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load");
+ }
+
+ err = user_ringbuf_success__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ *skel_out = skel;
+ return 0;
+
+cleanup:
+ if (kern_ringbuf_out)
+ *kern_ringbuf_out = NULL;
+ if (user_ringbuf_out)
+ *user_ringbuf_out = NULL;
+ ring_buffer__free(kern_ringbuf);
+ user_ring_buffer__free(user_ringbuf);
+ user_ringbuf_success__destroy(skel);
+ return err;
+}
+
+static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out,
+ struct user_ring_buffer **ringbuf_out)
+{
+ return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out);
+}
+
+static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel,
+ __u32 size, __u64 producer_pos, int err)
+{
+ void *data_ptr;
+ __u64 *producer_pos_ptr;
+ int rb_fd, page_size = getpagesize();
+
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample");
+
+ /* Map the producer_pos as RW. */
+ producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, rb_fd, page_size);
+ ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr");
+
+ /* Map the data pages as RW. */
+ data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_OK_PTR(data_ptr, "rw_data");
+
+ memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ);
+ *(__u32 *)data_ptr = size;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */
+ smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ);
+
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample");
+ ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample");
+
+ ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos");
+ ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr");
+}
+
+static void test_user_ringbuf_post_misaligned(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = (1 << 5) + 7;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "misaligned_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size, -EINVAL);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_producer_wrong_offset(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = (1 << 5);
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "wrong_offset_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_larger_than_ringbuf_sz(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = c_ringbuf_size;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "huge_sample_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size, -E2BIG);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_basic(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_basic_skel"))
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ err = write_samples(ringbuf, 2);
+ if (!ASSERT_OK(err, "write_samples"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_sample_full_ring_buffer(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ void *sample;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_full_sample_skel"))
+ return;
+
+ sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ);
+ if (!ASSERT_OK_PTR(sample, "full_sample"))
+ goto cleanup;
+
+ user_ring_buffer__submit(ringbuf, sample);
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_alignment_autoadjust(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ struct sample *sample;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel"))
+ return;
+
+ /* libbpf should automatically round any sample up to an 8-byte alignment. */
+ sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1);
+ ASSERT_OK_PTR(sample, "reserve_autoaligned");
+ user_ring_buffer__submit(ringbuf, sample);
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_overfill(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ err = write_samples(ringbuf, c_max_entries * 5);
+ ASSERT_ERR(err, "write_samples");
+ ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries");
+
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_discards_properly_ignored(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err, num_discarded = 0;
+ __u64 *token;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ while (1) {
+ /* Write samples until the buffer is full. */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+ if (!token)
+ break;
+
+ user_ring_buffer__discard(ringbuf, token);
+ num_discarded++;
+ }
+
+ if (!ASSERT_GE(num_discarded, 0, "num_discarded"))
+ goto cleanup;
+
+ /* Should not read any samples, as they are all discarded. */
+ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 0, "num_post_kick");
+
+ /* Now that the ring buffer has been drained, we should be able to
+ * reserve another token.
+ */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+
+ if (!ASSERT_OK_PTR(token, "new_token"))
+ goto cleanup;
+
+ user_ring_buffer__discard(ringbuf, token);
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_loop(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ uint32_t total_samples = 8192;
+ uint32_t remaining_samples = total_samples;
+ int err;
+
+ if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries"))
+ return;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ do {
+ uint32_t curr_samples;
+
+ curr_samples = remaining_samples > c_max_entries
+ ? c_max_entries : remaining_samples;
+ err = write_samples(ringbuf, curr_samples);
+ if (err != 0) {
+ /* Assert inside of if statement to avoid flooding logs
+ * on the success path.
+ */
+ ASSERT_OK(err, "write_samples");
+ goto cleanup;
+ }
+
+ remaining_samples -= curr_samples;
+ ASSERT_EQ(skel->bss->read, total_samples - remaining_samples,
+ "current_batched_entries");
+ } while (remaining_samples > 0);
+ ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static int send_test_message(struct user_ring_buffer *ringbuf,
+ enum test_msg_op op, s64 operand_64,
+ s32 operand_32)
+{
+ struct test_msg *msg;
+
+ msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg));
+ if (!msg) {
+ /* Assert on the error path to avoid spamming logs with mostly
+ * success messages.
+ */
+ ASSERT_OK_PTR(msg, "reserve_msg");
+ return -ENOMEM;
+ }
+
+ msg->msg_op = op;
+
+ switch (op) {
+ case TEST_MSG_OP_INC64:
+ case TEST_MSG_OP_MUL64:
+ msg->operand_64 = operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ case TEST_MSG_OP_MUL32:
+ msg->operand_32 = operand_32;
+ break;
+ default:
+ PRINT_FAIL("Invalid operand %d\n", op);
+ user_ring_buffer__discard(ringbuf, msg);
+ return -EINVAL;
+ }
+
+ user_ring_buffer__submit(ringbuf, msg);
+
+ return 0;
+}
+
+static void kick_kernel_read_messages(void)
+{
+ syscall(__NR_prctl);
+}
+
+static int handle_kernel_msg(void *ctx, void *data, size_t len)
+{
+ struct user_ringbuf_success *skel = ctx;
+ struct test_msg *msg = data;
+
+ switch (msg->msg_op) {
+ case TEST_MSG_OP_INC64:
+ skel->bss->user_mutated += msg->operand_64;
+ return 0;
+ case TEST_MSG_OP_INC32:
+ skel->bss->user_mutated += msg->operand_32;
+ return 0;
+ case TEST_MSG_OP_MUL64:
+ skel->bss->user_mutated *= msg->operand_64;
+ return 0;
+ case TEST_MSG_OP_MUL32:
+ skel->bss->user_mutated *= msg->operand_32;
+ return 0;
+ default:
+ fprintf(stderr, "Invalid operand %d\n", msg->msg_op);
+ return -EINVAL;
+ }
+}
+
+static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf,
+ struct user_ringbuf_success *skel)
+{
+ int cnt;
+
+ cnt = ring_buffer__consume(kern_ringbuf);
+ ASSERT_EQ(cnt, 8, "consume_kern_ringbuf");
+ ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err");
+}
+
+static void test_user_ringbuf_msg_protocol(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *user_ringbuf;
+ struct ring_buffer *kern_ringbuf;
+ int err, i;
+ __u64 expected_kern = 0;
+
+ err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf);
+ if (!ASSERT_OK(err, "create_ringbufs"))
+ return;
+
+ for (i = 0; i < 64; i++) {
+ enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS;
+ __u64 operand_64 = TEST_OP_64;
+ __u32 operand_32 = TEST_OP_32;
+
+ err = send_test_message(user_ringbuf, op, operand_64, operand_32);
+ if (err) {
+ /* Only assert on a failure to avoid spamming success logs. */
+ ASSERT_OK(err, "send_test_message");
+ goto cleanup;
+ }
+
+ switch (op) {
+ case TEST_MSG_OP_INC64:
+ expected_kern += operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ expected_kern += operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ expected_kern *= operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ expected_kern *= operand_32;
+ break;
+ default:
+ PRINT_FAIL("Unexpected op %d\n", op);
+ goto cleanup;
+ }
+
+ if (i % 8 == 0) {
+ kick_kernel_read_messages();
+ ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern");
+ ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err");
+ drain_kernel_messages_buffer(kern_ringbuf, skel);
+ }
+ }
+
+cleanup:
+ ring_buffer__free(kern_ringbuf);
+ user_ring_buffer__free(user_ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void *kick_kernel_cb(void *arg)
+{
+ /* Kick the kernel, causing it to drain the ring buffer and then wake
+ * up the test thread waiting on epoll.
+ */
+ syscall(__NR_prlimit64);
+
+ return NULL;
+}
+
+static int spawn_kick_thread_for_poll(void)
+{
+ pthread_t thread;
+
+ return pthread_create(&thread, NULL, kick_kernel_cb, NULL);
+}
+
+static void test_user_ringbuf_blocking_reserve(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err, num_written = 0;
+ __u64 *token;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ while (1) {
+ /* Write samples until the buffer is full. */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+ if (!token)
+ break;
+
+ *token = 0xdeadbeef;
+
+ user_ring_buffer__submit(ringbuf, token);
+ num_written++;
+ }
+
+ if (!ASSERT_GE(num_written, 0, "num_written"))
+ goto cleanup;
+
+ /* Should not have read any samples until the kernel is kicked. */
+ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+
+ /* We correctly time out after 1 second, without a sample. */
+ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000);
+ if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token"))
+ goto cleanup;
+
+ err = spawn_kick_thread_for_poll();
+ if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n"))
+ goto cleanup;
+
+ /* After spawning another thread that asynchronously kicks the kernel to
+ * drain the messages, we're able to block and successfully get a
+ * sample once we receive an event notification.
+ */
+ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000);
+
+ if (!ASSERT_OK_PTR(token, "block_token"))
+ goto cleanup;
+
+ ASSERT_GT(skel->bss->read, 0, "num_post_kill");
+ ASSERT_LE(skel->bss->read, num_written, "num_post_kill");
+ ASSERT_EQ(skel->bss->err, 0, "err_post_poll");
+ user_ring_buffer__discard(ringbuf, token);
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+#define SUCCESS_TEST(_func) { _func, #_func }
+
+static struct {
+ void (*test_callback)(void);
+ const char *test_name;
+} success_tests[] = {
+ SUCCESS_TEST(test_user_ringbuf_mappings),
+ SUCCESS_TEST(test_user_ringbuf_post_misaligned),
+ SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset),
+ SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz),
+ SUCCESS_TEST(test_user_ringbuf_basic),
+ SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer),
+ SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust),
+ SUCCESS_TEST(test_user_ringbuf_overfill),
+ SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored),
+ SUCCESS_TEST(test_user_ringbuf_loop),
+ SUCCESS_TEST(test_user_ringbuf_msg_protocol),
+ SUCCESS_TEST(test_user_ringbuf_blocking_reserve),
+};
+
+void test_user_ringbuf(void)
+{
+ int i;
+
+ c_ringbuf_size = getpagesize(); /* 1 page */
+ c_max_entries = c_ringbuf_size / c_sample_size;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i].test_name))
+ continue;
+
+ success_tests[i].test_callback();
+ }
+
+ RUN_TESTS(user_ringbuf_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
index dd324b4933db..4d7056f8f177 100644
--- a/tools/testing/selftests/bpf/prog_tests/varlen.c
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -63,6 +63,13 @@ void test_varlen(void)
CHECK_VAL(data->total4, size1 + size2);
CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
"doesn't match!\n");
+
+ CHECK_VAL(bss->ret_bad_read, -EFAULT);
+ CHECK_VAL(data->payload_bad[0], 0x42);
+ CHECK_VAL(data->payload_bad[1], 0x42);
+ CHECK_VAL(data->payload_bad[2], 0);
+ CHECK_VAL(data->payload_bad[3], 0x42);
+ CHECK_VAL(data->payload_bad[4], 0x42);
cleanup:
test_varlen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
new file mode 100644
index 000000000000..af4b95f57ac1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+void test_verif_stats(void)
+{
+ __u32 len = sizeof(struct bpf_prog_info);
+ struct trace_vprintk_lskel *skel;
+ struct bpf_prog_info info = {};
+ int err;
+
+ skel = trace_vprintk_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+ goto cleanup;
+
+ err = bpf_prog_get_info_by_fd(skel->progs.sys_enter.prog_fd,
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ if (!ASSERT_GT(info.verified_insns, 0, "verified_insns"))
+ goto cleanup;
+
+cleanup:
+ trace_vprintk_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
new file mode 100644
index 000000000000..4b4b081b46cc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "cap_helpers.h"
+#include "verifier_and.skel.h"
+#include "verifier_arena.skel.h"
+#include "verifier_arena_large.skel.h"
+#include "verifier_array_access.skel.h"
+#include "verifier_async_cb_context.skel.h"
+#include "verifier_basic_stack.skel.h"
+#include "verifier_bitfield_write.skel.h"
+#include "verifier_bounds.skel.h"
+#include "verifier_bounds_deduction.skel.h"
+#include "verifier_bounds_deduction_non_const.skel.h"
+#include "verifier_bounds_mix_sign_unsign.skel.h"
+#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
+#include "verifier_bswap.skel.h"
+#include "verifier_btf_ctx_access.skel.h"
+#include "verifier_btf_unreliable_prog.skel.h"
+#include "verifier_cfg.skel.h"
+#include "verifier_cgroup_inv_retcode.skel.h"
+#include "verifier_cgroup_skb.skel.h"
+#include "verifier_cgroup_storage.skel.h"
+#include "verifier_const.skel.h"
+#include "verifier_const_or.skel.h"
+#include "verifier_ctx.skel.h"
+#include "verifier_ctx_sk_msg.skel.h"
+#include "verifier_d_path.skel.h"
+#include "verifier_direct_packet_access.skel.h"
+#include "verifier_direct_stack_access_wraparound.skel.h"
+#include "verifier_div0.skel.h"
+#include "verifier_div_overflow.skel.h"
+#include "verifier_global_subprogs.skel.h"
+#include "verifier_global_ptr_args.skel.h"
+#include "verifier_gotol.skel.h"
+#include "verifier_gotox.skel.h"
+#include "verifier_helper_access_var_len.skel.h"
+#include "verifier_helper_packet_access.skel.h"
+#include "verifier_helper_restricted.skel.h"
+#include "verifier_helper_value_access.skel.h"
+#include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
+#include "verifier_jeq_infer_not_null.skel.h"
+#include "verifier_jit_convergence.skel.h"
+#include "verifier_ld_ind.skel.h"
+#include "verifier_ldsx.skel.h"
+#include "verifier_leak_ptr.skel.h"
+#include "verifier_linked_scalars.skel.h"
+#include "verifier_live_stack.skel.h"
+#include "verifier_load_acquire.skel.h"
+#include "verifier_loops1.skel.h"
+#include "verifier_lwt.skel.h"
+#include "verifier_map_in_map.skel.h"
+#include "verifier_map_ptr.skel.h"
+#include "verifier_map_ptr_mixing.skel.h"
+#include "verifier_map_ret_val.skel.h"
+#include "verifier_masking.skel.h"
+#include "verifier_may_goto_1.skel.h"
+#include "verifier_may_goto_2.skel.h"
+#include "verifier_meta_access.skel.h"
+#include "verifier_movsx.skel.h"
+#include "verifier_mtu.skel.h"
+#include "verifier_mul.skel.h"
+#include "verifier_netfilter_ctx.skel.h"
+#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_bpf_fastcall.skel.h"
+#include "verifier_or_jmp32_k.skel.h"
+#include "verifier_precision.skel.h"
+#include "verifier_prevent_map_lookup.skel.h"
+#include "verifier_private_stack.skel.h"
+#include "verifier_raw_stack.skel.h"
+#include "verifier_raw_tp_writable.skel.h"
+#include "verifier_reg_equal.skel.h"
+#include "verifier_ref_tracking.skel.h"
+#include "verifier_regalloc.skel.h"
+#include "verifier_ringbuf.skel.h"
+#include "verifier_runtime_jit.skel.h"
+#include "verifier_scalar_ids.skel.h"
+#include "verifier_sdiv.skel.h"
+#include "verifier_search_pruning.skel.h"
+#include "verifier_sock.skel.h"
+#include "verifier_sock_addr.skel.h"
+#include "verifier_sockmap_mutate.skel.h"
+#include "verifier_spill_fill.skel.h"
+#include "verifier_spin_lock.skel.h"
+#include "verifier_stack_ptr.skel.h"
+#include "verifier_store_release.skel.h"
+#include "verifier_subprog_precision.skel.h"
+#include "verifier_subreg.skel.h"
+#include "verifier_tailcall.skel.h"
+#include "verifier_tailcall_jit.skel.h"
+#include "verifier_typedef.skel.h"
+#include "verifier_uninit.skel.h"
+#include "verifier_unpriv.skel.h"
+#include "verifier_unpriv_perf.skel.h"
+#include "verifier_value_adj_spill.skel.h"
+#include "verifier_value.skel.h"
+#include "verifier_value_illegal_alu.skel.h"
+#include "verifier_value_or_null.skel.h"
+#include "verifier_value_ptr_arith.skel.h"
+#include "verifier_var_off.skel.h"
+#include "verifier_vfs_accept.skel.h"
+#include "verifier_vfs_reject.skel.h"
+#include "verifier_xadd.skel.h"
+#include "verifier_xdp.skel.h"
+#include "verifier_xdp_direct_packet_access.skel.h"
+#include "verifier_bits_iter.skel.h"
+#include "verifier_lsm.skel.h"
+#include "irq.skel.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+__maybe_unused
+static void run_tests_aux(const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory,
+ pre_execution_cb pre_execution_cb)
+{
+ struct test_loader tester = {};
+ __u64 old_caps;
+ int err;
+
+ /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
+ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
+ return;
+ }
+
+ test_loader__set_pre_execution_cb(&tester, pre_execution_cb);
+ test_loader__run_subtests(&tester, skel_name, elf_bytes_factory);
+ test_loader_fini(&tester);
+
+ err = cap_enable_effective(old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
+}
+
+#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
+
+void test_verifier_and(void) { RUN(verifier_and); }
+void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
+void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
+void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
+void test_verifier_bounds(void) { RUN(verifier_bounds); }
+void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); }
+void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
+void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
+void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); }
+void test_verifier_bswap(void) { RUN(verifier_bswap); }
+void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
+void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
+void test_verifier_cfg(void) { RUN(verifier_cfg); }
+void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); }
+void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); }
+void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); }
+void test_verifier_const(void) { RUN(verifier_const); }
+void test_verifier_const_or(void) { RUN(verifier_const_or); }
+void test_verifier_ctx(void) { RUN(verifier_ctx); }
+void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
+void test_verifier_d_path(void) { RUN(verifier_d_path); }
+void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_access); }
+void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
+void test_verifier_div0(void) { RUN(verifier_div0); }
+void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
+void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
+void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
+void test_verifier_gotol(void) { RUN(verifier_gotol); }
+void test_verifier_gotox(void) { RUN(verifier_gotox); }
+void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
+void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
+void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
+void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
+void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
+void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
+void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); }
+void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); }
+void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
+void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
+void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
+void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); }
+void test_verifier_live_stack(void) { RUN(verifier_live_stack); }
+void test_verifier_loops1(void) { RUN(verifier_loops1); }
+void test_verifier_lwt(void) { RUN(verifier_lwt); }
+void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
+void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); }
+void test_verifier_map_ptr_mixing(void) { RUN(verifier_map_ptr_mixing); }
+void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); }
+void test_verifier_masking(void) { RUN(verifier_masking); }
+void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); }
+void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); }
+void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
+void test_verifier_movsx(void) { RUN(verifier_movsx); }
+void test_verifier_mul(void) { RUN(verifier_mul); }
+void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
+void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); }
+void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); }
+void test_verifier_precision(void) { RUN(verifier_precision); }
+void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
+void test_verifier_private_stack(void) { RUN(verifier_private_stack); }
+void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
+void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); }
+void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); }
+void test_verifier_ref_tracking(void) { RUN(verifier_ref_tracking); }
+void test_verifier_regalloc(void) { RUN(verifier_regalloc); }
+void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); }
+void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); }
+void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); }
+void test_verifier_sdiv(void) { RUN(verifier_sdiv); }
+void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
+void test_verifier_sock(void) { RUN(verifier_sock); }
+void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); }
+void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
+void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
+void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
+void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_store_release(void) { RUN(verifier_store_release); }
+void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
+void test_verifier_subreg(void) { RUN(verifier_subreg); }
+void test_verifier_tailcall(void) { RUN(verifier_tailcall); }
+void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
+void test_verifier_typedef(void) { RUN(verifier_typedef); }
+void test_verifier_uninit(void) { RUN(verifier_uninit); }
+void test_verifier_unpriv(void) { RUN(verifier_unpriv); }
+void test_verifier_unpriv_perf(void) { RUN(verifier_unpriv_perf); }
+void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); }
+void test_verifier_value(void) { RUN(verifier_value); }
+void test_verifier_value_illegal_alu(void) { RUN(verifier_value_illegal_alu); }
+void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); }
+void test_verifier_var_off(void) { RUN(verifier_var_off); }
+void test_verifier_vfs_accept(void) { RUN(verifier_vfs_accept); }
+void test_verifier_vfs_reject(void) { RUN(verifier_vfs_reject); }
+void test_verifier_xadd(void) { RUN(verifier_xadd); }
+void test_verifier_xdp(void) { RUN(verifier_xdp); }
+void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
+void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
+void test_verifier_lsm(void) { RUN(verifier_lsm); }
+void test_irq(void) { RUN(irq); }
+void test_verifier_mtu(void) { RUN(verifier_mtu); }
+
+static int init_test_val_map(struct bpf_object *obj, char *map_name)
+{
+ struct test_val value = {
+ .index = (6 + 1) * sizeof(int),
+ .foo[6] = 0xabcdef12,
+ };
+ struct bpf_map *map;
+ int err, key = 0;
+
+ map = bpf_object__find_map_by_name(obj, map_name);
+ if (!map) {
+ PRINT_FAIL("Can't find map '%s'\n", map_name);
+ return -EINVAL;
+ }
+
+ err = bpf_map_update_elem(bpf_map__fd(map), &key, &value, 0);
+ if (err) {
+ PRINT_FAIL("Error while updating map '%s': %d\n", map_name, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int init_array_access_maps(struct bpf_object *obj)
+{
+ return init_test_val_map(obj, "map_array_ro");
+}
+
+void test_verifier_array_access(void)
+{
+ run_tests_aux("verifier_array_access",
+ verifier_array_access__elf_bytes,
+ init_array_access_maps);
+}
+void test_verifier_async_cb_context(void) { RUN(verifier_async_cb_context); }
+
+static int init_value_ptr_arith_maps(struct bpf_object *obj)
+{
+ return init_test_val_map(obj, "map_array_48b");
+}
+
+void test_verifier_value_ptr_arith(void)
+{
+ run_tests_aux("verifier_value_ptr_arith",
+ verifier_value_ptr_arith__elf_bytes,
+ init_value_ptr_arith_maps);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
new file mode 100644
index 000000000000..3918ecc2ee91
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+
+#include "verifier_kfunc_prog_types.skel.h"
+
+void test_verifier_kfunc_prog_types(void)
+{
+ RUN_TESTS(verifier_kfunc_prog_types);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
new file mode 100644
index 000000000000..8337c6bc5b95
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+
+
+static bool check_prog_load(int prog_fd, bool expect_err, const char *tag)
+{
+ if (expect_err) {
+ if (!ASSERT_LT(prog_fd, 0, tag)) {
+ close(prog_fd);
+ return false;
+ }
+ } else /* !expect_err */ {
+ if (!ASSERT_GT(prog_fd, 0, tag))
+ return false;
+ }
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return true;
+}
+
+static struct {
+ /* strategically placed before others to avoid accidental modification by kernel */
+ char filler[1024];
+ char buf[1024];
+ /* strategically placed after buf[] to catch more accidental corruptions */
+ char reference[1024];
+} logs;
+static const struct bpf_insn *insns;
+static size_t insn_cnt;
+
+static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error)
+{
+ int prog_fd;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_prog",
+ "GPL", insns, insn_cnt, opts);
+ check_prog_load(prog_fd, expect_load_error, "prog_load");
+
+ return prog_fd;
+}
+
+static void verif_log_subtest(const char *name, bool expect_load_error, int log_level)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ char *exp_log, prog_name[16], op_name[32];
+ struct test_log_buf *skel;
+ struct bpf_program *prog;
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, mode, err, prog_fd, res;
+
+ skel = test_log_buf__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ if (strcmp(bpf_program__name(prog), name) == 0)
+ bpf_program__set_autoload(prog, true);
+ else
+ bpf_program__set_autoload(prog, false);
+ }
+
+ err = test_log_buf__load(skel);
+ if (!expect_load_error && !ASSERT_OK(err, "unexpected_load_failure"))
+ goto cleanup;
+ if (expect_load_error && !ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ insns = bpf_program__insns(skel->progs.good_prog);
+ insn_cnt = bpf_program__insn_cnt(skel->progs.good_prog);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = log_level | 8 /* BPF_LOG_FIXED */;
+ load_prog(&opts, expect_load_error);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED works as verifier log used to work, that is:
+ * we get -ENOSPC and beginning of the full verifier log. This only
+ * works for log_level 2 and log_level 1 + failed program. For log
+ * level 2 we don't reset log at all. For log_level 1 + failed program
+ * we don't get to verification stats output. With log level 1
+ * for successful program final result will be just verifier stats.
+ * But if provided too short log buf, kernel will NULL-out log->ubuf
+ * and will stop emitting further log. This means we'll never see
+ * predictable verifier stats.
+ * Long story short, we do the following -ENOSPC test only for
+ * predictable combinations.
+ */
+ if (log_level >= 2 || expect_load_error) {
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* fixed-length log */
+ opts.log_size = 25;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed25",
+ "GPL", insns, insn_cnt, &opts);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, "unexpected_log_fixed_prog_load_result")) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25"))
+ goto cleanup;
+ if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, "log_fixed_contents_25"))
+ goto cleanup;
+ }
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+
+ /* rotating mode, then fixed mode */
+ for (mode = 1; mode >= 0; mode--) {
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+ opts.log_level = log_level | (mode ? 0 : 8 /* BPF_LOG_FIXED */);
+
+ snprintf(prog_name, sizeof(prog_name),
+ "log_%s_%d", mode ? "roll" : "fixed", i);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, prog_name,
+ "GPL", insns, insn_cnt, &opts);
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_prog_load_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, op_name)) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_strlen_%d", mode ? "roll" : "fixed", i);
+ ASSERT_EQ(strlen(logs.buf), i - 1, op_name);
+
+ if (mode)
+ exp_log = logs.reference + fixed_log_sz - i;
+ else
+ exp_log = logs.reference;
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_contents_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STRNEQ(logs.buf, exp_log, i - 1, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strncmp(logs.buf, exp_log, i - 1),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf is not overwritten */
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_unused_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_rolling");
+
+cleanup:
+ test_log_buf__destroy(skel);
+}
+
+static const void *btf_data;
+static u32 btf_data_sz;
+
+static int load_btf(struct bpf_btf_load_opts *opts, bool expect_err)
+{
+ int fd;
+
+ fd = bpf_btf_load(btf_data, btf_data_sz, opts);
+ if (fd >= 0)
+ close(fd);
+ if (expect_err)
+ ASSERT_LT(fd, 0, "btf_load_failure");
+ else /* !expect_err */
+ ASSERT_GT(fd, 0, "btf_load_success");
+ return fd;
+}
+
+static void verif_btf_log_subtest(bool bad_btf)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ struct btf *btf;
+ struct btf_type *t;
+ char *exp_log, op_name[32];
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, res;
+
+ /* prepare simple BTF contents */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf_new_empty"))
+ return;
+ res = btf__add_int(btf, "whatever", 4, 0);
+ if (!ASSERT_GT(res, 0, "btf_add_int_id"))
+ goto cleanup;
+ if (bad_btf) {
+ /* btf__add_int() doesn't allow bad value of size, so we'll just
+ * force-cast btf_type pointer and manually override size to invalid
+ * 3 if we need to simulate failure
+ */
+ t = (void *)btf__type_by_id(btf, res);
+ if (!ASSERT_OK_PTR(t, "int_btf_type"))
+ goto cleanup;
+ t->size = 3;
+ }
+
+ btf_data = btf__raw_data(btf, &btf_data_sz);
+ if (!ASSERT_OK_PTR(btf_data, "btf_data"))
+ goto cleanup;
+
+ load_btf(&opts, bad_btf);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = 1 | 8 /* BPF_LOG_FIXED */;
+ load_btf(&opts, bad_btf);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED truncation works as verifier log used to work */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* fixed-length log */
+ opts.log_size = 25;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "half_log_fd");
+ ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25");
+ ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name);
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1; /* rolling log */
+
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+
+ snprintf(op_name, sizeof(op_name), "log_roll_btf_load_%d", i);
+ res = load_btf(&opts, true);
+ if (!ASSERT_EQ(res, -ENOSPC, op_name))
+ goto cleanup;
+
+ exp_log = logs.reference + fixed_log_sz - i;
+ snprintf(op_name, sizeof(op_name), "log_roll_contents_%d", i);
+ if (!ASSERT_STREQ(logs.buf, exp_log, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf, exp_log),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf are not overwritten */
+ snprintf(op_name, sizeof(op_name), "log_roll_unused_tail_%d", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_rolling");
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_verifier_log(void)
+{
+ if (test__start_subtest("good_prog-level1"))
+ verif_log_subtest("good_prog", false, 1);
+ if (test__start_subtest("good_prog-level2"))
+ verif_log_subtest("good_prog", false, 2);
+ if (test__start_subtest("bad_prog-level1"))
+ verif_log_subtest("bad_prog", true, 1);
+ if (test__start_subtest("bad_prog-level2"))
+ verif_log_subtest("bad_prog", true, 2);
+ if (test__start_subtest("bad_btf"))
+ verif_btf_log_subtest(true /* bad btf */);
+ if (test__start_subtest("good_btf"))
+ verif_btf_log_subtest(false /* !bad btf */);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
new file mode 100644
index 000000000000..4d69d9d55e17
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <endian.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <linux/keyctl.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <test_progs.h>
+
+#include "test_verify_pkcs7_sig.skel.h"
+#include "test_sig_in_xattr.skel.h"
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+#define VERIFY_USE_SECONDARY_KEYRING (1UL)
+#define VERIFY_USE_PLATFORM_KEYRING (2UL)
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ * - Signer's name
+ * - Key identifier
+ * - Signature data
+ * - Information block
+ */
+struct module_signature {
+ __u8 algo; /* Public-key crypto algorithm [0] */
+ __u8 hash; /* Digest algorithm [0] */
+ __u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
+ __u8 signer_len; /* Length of signer's name [0] */
+ __u8 key_id_len; /* Length of key identifier [0] */
+ __u8 __pad[3];
+ __be32 sig_len; /* Length of signature data */
+};
+
+struct data {
+ __u8 data[MAX_DATA_SIZE];
+ __u32 data_len;
+ __u8 sig[MAX_SIG_SIZE];
+ __u32 sig_len;
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ if (level == LIBBPF_WARN)
+ vprintf(fmt, args);
+
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+static int _run_setup_process(const char *setup_dir, const char *cmd)
+{
+ int child_pid, child_status;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ execlp("./verify_sig_setup.sh", "./verify_sig_setup.sh", cmd,
+ setup_dir, NULL);
+ exit(errno);
+
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ return WEXITSTATUS(child_status);
+ }
+
+ return -EINVAL;
+}
+
+static int populate_data_item_str(const char *tmp_dir, struct data *data_item)
+{
+ struct stat st;
+ char data_template[] = "/tmp/dataXXXXXX";
+ char path[PATH_MAX];
+ int ret, fd, child_status, child_pid;
+
+ data_item->data_len = 4;
+ memcpy(data_item->data, "test", data_item->data_len);
+
+ fd = mkstemp(data_template);
+ if (fd == -1)
+ return -errno;
+
+ ret = write(fd, data_item->data, data_item->data_len);
+
+ close(fd);
+
+ if (ret != data_item->data_len) {
+ ret = -EIO;
+ goto out;
+ }
+
+ child_pid = fork();
+
+ if (child_pid == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (child_pid == 0) {
+ snprintf(path, sizeof(path), "%s/signing_key.pem", tmp_dir);
+
+ return execlp("./sign-file", "./sign-file", "-d", "sha256",
+ path, path, data_template, NULL);
+ }
+
+ waitpid(child_pid, &child_status, 0);
+
+ ret = WEXITSTATUS(child_status);
+ if (ret)
+ goto out;
+
+ snprintf(path, sizeof(path), "%s.p7s", data_template);
+
+ ret = stat(path, &st);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (st.st_size > sizeof(data_item->sig)) {
+ ret = -EINVAL;
+ goto out_sig;
+ }
+
+ data_item->sig_len = st.st_size;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1) {
+ ret = -errno;
+ goto out_sig;
+ }
+
+ ret = read(fd, data_item->sig, data_item->sig_len);
+
+ close(fd);
+
+ if (ret != data_item->sig_len) {
+ ret = -EIO;
+ goto out_sig;
+ }
+
+ ret = 0;
+out_sig:
+ unlink(path);
+out:
+ unlink(data_template);
+ return ret;
+}
+
+static int populate_data_item_mod(struct data *data_item)
+{
+ char mod_path[PATH_MAX], *mod_path_ptr;
+ struct stat st;
+ void *mod;
+ FILE *fp;
+ struct module_signature ms;
+ int ret, fd, modlen, marker_len, sig_len;
+
+ data_item->data_len = 0;
+
+ if (stat("/lib/modules", &st) == -1)
+ return 0;
+
+ /* Requires CONFIG_TCP_CONG_BIC=m. */
+ fp = popen("find /lib/modules/$(uname -r) -name tcp_bic.ko", "r");
+ if (!fp)
+ return 0;
+
+ mod_path_ptr = fgets(mod_path, sizeof(mod_path), fp);
+ pclose(fp);
+
+ if (!mod_path_ptr)
+ return 0;
+
+ mod_path_ptr = strchr(mod_path, '\n');
+ if (!mod_path_ptr)
+ return 0;
+
+ *mod_path_ptr = '\0';
+
+ if (stat(mod_path, &st) == -1)
+ return 0;
+
+ modlen = st.st_size;
+ marker_len = sizeof(MODULE_SIG_STRING) - 1;
+
+ fd = open(mod_path, O_RDONLY);
+ if (fd == -1)
+ return -errno;
+
+ mod = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+ close(fd);
+
+ if (mod == MAP_FAILED)
+ return -errno;
+
+ if (strncmp(mod + modlen - marker_len, MODULE_SIG_STRING, marker_len)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ modlen -= marker_len;
+
+ memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
+
+ sig_len = __be32_to_cpu(ms.sig_len);
+ modlen -= sig_len + sizeof(ms);
+
+ if (modlen > sizeof(data_item->data)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ memcpy(data_item->data, mod, modlen);
+ data_item->data_len = modlen;
+
+ if (sig_len > sizeof(data_item->sig)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ memcpy(data_item->sig, mod + modlen, sig_len);
+ data_item->sig_len = sig_len;
+ ret = 0;
+out:
+ munmap(mod, st.st_size);
+ return ret;
+}
+
+static void test_verify_pkcs7_sig_from_map(void)
+{
+ libbpf_print_fn_t old_print_cb;
+ char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+ char *tmp_dir;
+ struct test_verify_pkcs7_sig *skel = NULL;
+ struct bpf_map *map;
+ struct data data = {};
+ int ret, zero = 0;
+
+ /* Trigger creation of session keyring. */
+ syscall(__NR_request_key, "keyring", "_uid.0", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+
+ tmp_dir = mkdtemp(tmp_dir_template);
+ if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+ return;
+
+ ret = _run_setup_process(tmp_dir, "setup");
+ if (!ASSERT_OK(ret, "_run_setup_process"))
+ goto close_prog;
+
+ skel = test_verify_pkcs7_sig__open();
+ if (!ASSERT_OK_PTR(skel, "test_verify_pkcs7_sig__open"))
+ goto close_prog;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ ret = test_verify_pkcs7_sig__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (ret < 0 && kfunc_not_supported) {
+ printf(
+ "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+ __func__);
+ test__skip();
+ goto close_prog;
+ }
+
+ if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__load"))
+ goto close_prog;
+
+ ret = test_verify_pkcs7_sig__attach(skel);
+ if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__attach"))
+ goto close_prog;
+
+ map = bpf_object__find_map_by_name(skel->obj, "data_input");
+ if (!ASSERT_OK_PTR(map, "data_input not found"))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+
+ /* Test without data and signature. */
+ skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /* Test successful signature verification with session keyring. */
+ ret = populate_data_item_str(tmp_dir, &data);
+ if (!ASSERT_OK(ret, "populate_data_item_str"))
+ goto close_prog;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /* Test successful signature verification with testing keyring. */
+ skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+ "ebpf_testing_keyring", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /*
+ * Ensure key_task_permission() is called and rejects the keyring
+ * (no Search permission).
+ */
+ syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+ 0x37373737);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+ 0x3f3f3f3f);
+
+ /*
+ * Ensure key_validate() is called and rejects the keyring (key expired)
+ */
+ syscall(__NR_keyctl, KEYCTL_SET_TIMEOUT,
+ skel->bss->user_keyring_serial, 1);
+ sleep(1);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+ /* Test with corrupted data (signature verification should fail). */
+ data.data[0] = 'a';
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ ret = populate_data_item_mod(&data);
+ if (!ASSERT_OK(ret, "populate_data_item_mod"))
+ goto close_prog;
+
+ /* Test signature verification with system keyrings. */
+ if (data.data_len) {
+ skel->bss->user_keyring_serial = 0;
+ skel->bss->system_keyring_id = 0;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->system_keyring_id = VERIFY_USE_SECONDARY_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->system_keyring_id = VERIFY_USE_PLATFORM_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ ASSERT_LT(ret, 0, "bpf_map_update_elem data_input");
+ }
+
+close_prog:
+ _run_setup_process(tmp_dir, "cleanup");
+
+ if (!skel)
+ return;
+
+ skel->bss->monitored_pid = 0;
+ test_verify_pkcs7_sig__destroy(skel);
+}
+
+static int get_signature_size(const char *sig_path)
+{
+ struct stat st;
+
+ if (stat(sig_path, &st) == -1)
+ return -1;
+
+ return st.st_size;
+}
+
+static int add_signature_to_xattr(const char *data_path, const char *sig_path)
+{
+ char sig[MAX_SIG_SIZE] = {0};
+ int fd, size, ret;
+
+ if (sig_path) {
+ fd = open(sig_path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ size = read(fd, sig, MAX_SIG_SIZE);
+ close(fd);
+ if (size <= 0)
+ return -1;
+ } else {
+ /* no sig_path, just write 32 bytes of zeros */
+ size = 32;
+ }
+ ret = setxattr(data_path, "user.sig", sig, size, 0);
+ if (!ASSERT_OK(ret, "setxattr"))
+ return -1;
+
+ return 0;
+}
+
+static int test_open_file(struct test_sig_in_xattr *skel, char *data_path,
+ pid_t pid, bool should_success, char *name)
+{
+ int ret;
+
+ skel->bss->monitored_pid = pid;
+ ret = open(data_path, O_RDONLY);
+ close(ret);
+ skel->bss->monitored_pid = 0;
+
+ if (should_success) {
+ if (!ASSERT_GE(ret, 0, name))
+ return -1;
+ } else {
+ if (!ASSERT_LT(ret, 0, name))
+ return -1;
+ }
+ return 0;
+}
+
+static void test_pkcs7_sig_fsverity(void)
+{
+ char data_path[PATH_MAX];
+ char sig_path[PATH_MAX];
+ char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+ char *tmp_dir;
+ struct test_sig_in_xattr *skel = NULL;
+ pid_t pid;
+ int ret;
+
+ tmp_dir = mkdtemp(tmp_dir_template);
+ if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+ return;
+
+ snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir);
+ snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir);
+
+ ret = _run_setup_process(tmp_dir, "setup");
+ if (!ASSERT_OK(ret, "_run_setup_process"))
+ goto out;
+
+ ret = _run_setup_process(tmp_dir, "fsverity-create-sign");
+
+ if (ret) {
+ printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_sig_in_xattr__open();
+ if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open"))
+ goto out;
+ ret = get_signature_size(sig_path);
+ if (!ASSERT_GT(ret, 0, "get_signature_size"))
+ goto out;
+ skel->bss->sig_size = ret;
+ skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+ "ebpf_testing_keyring", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+ memcpy(skel->bss->digest, "FSVerity", 8);
+
+ ret = test_sig_in_xattr__load(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__load"))
+ goto out;
+
+ ret = test_sig_in_xattr__attach(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__attach"))
+ goto out;
+
+ pid = getpid();
+
+ /* Case 1: fsverity is not enabled, open should succeed */
+ if (test_open_file(skel, data_path, pid, true, "open_1"))
+ goto out;
+
+ /* Case 2: fsverity is enabled, xattr is missing, open should
+ * fail
+ */
+ ret = _run_setup_process(tmp_dir, "fsverity-enable");
+ if (!ASSERT_OK(ret, "fsverity-enable"))
+ goto out;
+ if (test_open_file(skel, data_path, pid, false, "open_2"))
+ goto out;
+
+ /* Case 3: fsverity is enabled, xattr has valid signature, open
+ * should succeed
+ */
+ ret = add_signature_to_xattr(data_path, sig_path);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_1"))
+ goto out;
+
+ if (test_open_file(skel, data_path, pid, true, "open_3"))
+ goto out;
+
+ /* Case 4: fsverity is enabled, xattr has invalid signature, open
+ * should fail
+ */
+ ret = add_signature_to_xattr(data_path, NULL);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_2"))
+ goto out;
+ test_open_file(skel, data_path, pid, false, "open_4");
+
+out:
+ _run_setup_process(tmp_dir, "cleanup");
+ if (!skel)
+ return;
+
+ skel->bss->monitored_pid = 0;
+ test_sig_in_xattr__destroy(skel);
+}
+
+void test_verify_pkcs7_sig(void)
+{
+ if (test__start_subtest("pkcs7_sig_from_map"))
+ test_verify_pkcs7_sig_from_map();
+ if (test__start_subtest("pkcs7_sig_fsverity"))
+ test_pkcs7_sig_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
void test_vmlinux(void)
{
- int duration = 0, err;
+ int err;
struct test_vmlinux* skel;
struct test_vmlinux__bss *bss;
skel = test_vmlinux__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
return;
bss = skel->bss;
err = test_vmlinux__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_vmlinux__attach"))
goto cleanup;
/* trigger everything */
nsleep();
- CHECK(!bss->tp_called, "tp", "not called\n");
- CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
- CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
- CHECK(!bss->kprobe_called, "kprobe", "not called\n");
- CHECK(!bss->fentry_called, "fentry", "not called\n");
+ ASSERT_TRUE(bss->tp_called, "tp");
+ ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+ ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+ ASSERT_TRUE(bss->kprobe_called, "kprobe");
+ ASSERT_TRUE(bss->fentry_called, "fentry");
cleanup:
test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
new file mode 100644
index 000000000000..2a5e207edad6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace
+ * |
+ * +--------------+ | +--------------+
+ * | veth01 |----------| veth10 |
+ * | 172.16.1.100 | | | 172.16.1.200 |
+ * | bpf | | +--------------+
+ * +--------------+ |
+ * server(UDP/TCP) |
+ * +-------------------+ |
+ * | vrf1 | |
+ * | +--------------+ | | +--------------+
+ * | | veth02 |----------| veth20 |
+ * | | 172.16.2.100 | | | | 172.16.2.200 |
+ * | | bpf | | | +--------------+
+ * | +--------------+ | |
+ * | server(UDP/TCP) | |
+ * +-------------------+ |
+ *
+ * Test flow
+ * -----------
+ * The tests verifies that socket lookup via TC is VRF aware:
+ * 1) Creates two veth pairs between NS0 and NS1:
+ * a) veth01 <-> veth10 outside the VRF
+ * b) veth02 <-> veth20 in the VRF
+ * 2) Attaches to veth01 and veth02 a program that calls:
+ * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true
+ * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false
+ * c) bpf_sk_lookup_udp() with UDP
+ * The program stores the lookup result in bss->lookup_status.
+ * 3) Creates a socket TCP/UDP server in/outside the VRF.
+ * 4) The test expects lookup_status to be:
+ * a) 0 from device in VRF to server outside VRF
+ * b) 0 from device outside VRF to server in VRF
+ * c) 1 from device in VRF to server in VRF
+ * d) 1 from device outside VRF to server outside VRF
+ */
+
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "vrf_socket_lookup.skel.h"
+
+#define NS0 "vrf_socket_lookup_0"
+#define NS1 "vrf_socket_lookup_1"
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define NON_VRF_PORT 5000
+#define IN_VRF_PORT 5001
+
+#define TIMEOUT_MS 3000
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ int err, fd;
+
+ err = make_sockaddr(AF_INET, ip, port, addr, NULL);
+ if (!ASSERT_OK(err, "make_address"))
+ return -1;
+
+ fd = socket(AF_INET, sotype, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ return -1;
+
+ if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo"))
+ goto fail;
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_server(int sotype, const char *ip, int port, const char *ifname)
+{
+ int err, fd = -1;
+
+ fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS);
+ if (!ASSERT_GE(fd, 0, "start_server"))
+ return -1;
+
+ if (ifname) {
+ err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname) + 1);
+ if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)"))
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd)
+{
+ LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1,
+ .prog_fd = tc_prog_fd);
+ int ret, ifindex;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex"))
+ return -1;
+ hook.ifindex = ifindex;
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ return ret;
+
+ ret = bpf_tc_attach(&hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL);
+ if (!ASSERT_OK(ret, "bpf_xdp_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete "
+ NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete "
+ NS1);
+}
+
+static int setup(struct vrf_socket_lookup *skel)
+{
+ int tc_prog_fd, xdp_prog_fd, ret = 0;
+ struct nstoken *nstoken = NULL;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS1 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS1 " link set dev veth20 up");
+
+ /* veth02 -> vrf1 */
+ SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11");
+ SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default"
+ " metric 4278198272");
+ SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf");
+ SYS(fail, "ip -net " NS0 " link set vrf1 up");
+ SYS(fail, "ip -net " NS0 " link set veth02 master vrf1");
+
+ /* Attach TC and XDP progs to veth devices in NS0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto fail;
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd"))
+ goto fail;
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd"))
+ goto fail;
+
+ if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ goto close;
+fail:
+ ret = -1;
+close:
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int test_lookup(struct vrf_socket_lookup *skel, int sotype,
+ const char *ip, int port, bool test_xdp, bool tcp_skc,
+ int lookup_status_exp)
+{
+ static const char msg[] = "Hello Server";
+ struct sockaddr_storage addr = {};
+ int fd, ret = 0;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ skel->bss->test_xdp = test_xdp;
+ skel->bss->tcp_skc = tcp_skc;
+ skel->bss->lookup_status = -1;
+
+ if (sotype == SOCK_STREAM)
+ connect(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ else
+ sendto(fd, msg, sizeof(msg), 0, (void *)&addr,
+ sizeof(struct sockaddr_in));
+
+ if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp,
+ "lookup_status"))
+ goto fail;
+
+ goto close;
+
+fail:
+ ret = -1;
+close:
+ close(fd);
+ return ret;
+}
+
+static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype,
+ bool test_xdp, bool tcp_skc)
+{
+ int in_vrf_server = -1, non_vrf_server = -1;
+ struct nstoken *nstoken = NULL;
+
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+
+ /* Open sockets in and outside VRF */
+ non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL);
+ if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd"))
+ goto done;
+
+ in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02");
+ if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd"))
+ goto done;
+
+ /* Perform test from NS1 */
+ close_netns(nstoken);
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto done;
+
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT,
+ test_xdp, tcp_skc, 0), "in_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT,
+ test_xdp, tcp_skc, 1), "in_to_in"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT,
+ test_xdp, tcp_skc, 1), "out_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT,
+ test_xdp, tcp_skc, 0), "out_to_in"))
+ goto done;
+
+done:
+ if (non_vrf_server >= 0)
+ close(non_vrf_server);
+ if (in_vrf_server >= 0)
+ close(in_vrf_server);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+void test_vrf_socket_lookup(void)
+{
+ struct vrf_socket_lookup *skel;
+
+ cleanup();
+
+ skel = vrf_socket_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "setup"))
+ goto done;
+
+ if (test__start_subtest("tc_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+
+done:
+ vrf_socket_lookup__destroy(skel);
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
new file mode 100644
index 000000000000..15c67d23128b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires */
+#include <test_progs.h>
+#include "wq.skel.h"
+#include "wq_failures.skel.h"
+
+void serial_test_wq(void)
+{
+ struct wq *wq_skel = NULL;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ RUN_TESTS(wq);
+
+ /* re-run the success test to check if the timer was actually executed */
+
+ wq_skel = wq__open_and_load();
+ if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ return;
+
+ err = wq__attach(wq_skel);
+ if (!ASSERT_OK(err, "wq_attach"))
+ return;
+
+ prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+
+ ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+ wq__destroy(wq_skel);
+}
+
+void serial_test_failures_wq(void)
+{
+ RUN_TESTS(wq_failures);
+}
+
+static void test_failure_map_no_btf(void)
+{
+ struct wq *skel = NULL;
+ char log[8192];
+ const struct bpf_insn *insns;
+ size_t insn_cnt;
+ int ret, err, map_fd;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log,
+ .log_level = 2);
+
+ skel = wq__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "skel__prepare"))
+ goto out;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100,
+ NULL);
+ if (!ASSERT_GT(map_fd, -1, "map create"))
+ goto out;
+
+ err = bpf_map__reuse_fd(skel->maps.array, map_fd);
+ if (!ASSERT_OK(err, "map reuse fd")) {
+ close(map_fd);
+ goto out;
+ }
+
+ insns = bpf_program__insns(skel->progs.test_map_no_btf);
+ if (!ASSERT_OK_PTR(insns, "insns ptr"))
+ goto out;
+
+ insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf);
+ if (!ASSERT_GT(insn_cnt, 0u, "insn cnt"))
+ goto out;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ if (!ASSERT_LT(ret, 0, "prog load failed")) {
+ if (ret > 0)
+ close(ret);
+ goto out;
+ }
+
+ ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq",
+ "log complains no map BTF");
+out:
+ wq__destroy(skel);
+}
+
+void test_wq_custom(void)
+{
+ if (test__start_subtest("test_failure_map_no_btf"))
+ test_failure_map_no_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index 48921ff74850..947863a1d536 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -8,15 +8,21 @@ void test_xdp(void)
struct vip key6 = {.protocol = 6, .family = AF_INET6};
struct iptnl_info value4 = {.family = AF_INET};
struct iptnl_info value6 = {.family = AF_INET6};
- const char *file = "./test_xdp.o";
+ const char *file = "./test_xdp.bpf.o";
struct bpf_object *obj;
char buf[128];
- struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
- __u32 duration, retval, size;
+ struct ipv6hdr iph6;
+ struct iphdr iph;
int err, prog_fd, map_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -26,20 +32,23 @@ void test_xdp(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
bpf_map_update_elem(map_fd, &key6, &value6, 0);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out");
+ ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol");
- CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != 114 ||
- iph6->nexthdr != IPPROTO_IPV6, "ipv6",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out");
+ ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr");
out:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644
index 000000000000..fce203640f8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+static void test_xdp_update_frags(void)
+{
+ const char *file = "./test_xdp_update_frags.bpf.o";
+ int err, prog_fd, max_skb_frags, buf_size, num;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 *offset;
+ __u8 *buf;
+ FILE *f;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(128);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+ goto out;
+
+ memset(buf, 0, 128);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa; /* marker at offset 16 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 128;
+ topts.data_size_out = 128;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+ ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+ free(buf);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 5000;
+ buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+ ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 3510;
+ buf[*offset] = 0xaa; /* marker at offset 3510 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+ ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 7606;
+ buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+ ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+ free(buf);
+
+ /* test_xdp_update_frags: unsupported buffer size */
+ f = fopen("/proc/sys/net/core/max_skb_frags", "r");
+ if (!ASSERT_OK_PTR(f, "max_skb_frag file pointer"))
+ goto out;
+
+ num = fscanf(f, "%d", &max_skb_frags);
+ fclose(f);
+
+ if (!ASSERT_EQ(num, 1, "max_skb_frags read failed"))
+ goto out;
+
+ /* xdp_buff linear area size is always set to 4096 in the
+ * bpf_prog_test_run_xdp routine.
+ */
+ buf_size = 4096 + (max_skb_frags + 1) * sysconf(_SC_PAGE_SIZE);
+ buf = malloc(buf_size);
+ if (!ASSERT_OK_PTR(buf, "alloc buf"))
+ goto out;
+
+ memset(buf, 0, buf_size);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa;
+ buf[*offset + 15] = 0xaa;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buf_size;
+ topts.data_size_out = buf_size;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_EQ(err, -ENOMEM,
+ "unsupported buf size, possible non-default /proc/sys/net/core/max_skb_flags?");
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+ if (test__start_subtest("xdp_adjust_frags"))
+ test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index d5c98f2cb12f..43264347e7d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -2,81 +2,107 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_xdp_adjust_tail_shrink(void)
+static void test_xdp_adjust_tail_shrink(void)
{
- const char *file = "./test_xdp_adjust_tail_shrink.o";
- __u32 duration, retval, size, expect_sz;
+ const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
+ __u32 expect_sz;
struct bpf_object *obj;
int err, prog_fd;
char buf[128];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
+
bpf_object__close(obj);
}
-void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(bool is_64k_pagesize)
{
- const char *file = "./test_xdp_adjust_tail_grow.o";
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
struct bpf_object *obj;
- char buf[4096]; /* avoid segfault: large buf to hold grow results */
- __u32 duration, retval, size, expect_sz;
+ char buf[8192]; /* avoid segfault: large buf to hold grow results */
+ __u32 expect_sz;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ /* topts.data_size_in as a special signal to bpf prog */
+ if (is_64k_pagesize)
+ topts.data_size_in = sizeof(pkt_v4) - 1;
+ else
+ topts.data_size_in = sizeof(pkt_v4);
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
bpf_object__close(obj);
}
-void test_xdp_adjust_tail_grow2(void)
+static void test_xdp_adjust_tail_grow2(void)
{
- const char *file = "./test_xdp_adjust_tail_grow.o";
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
char buf[4096]; /* avoid segfault: large buf to hold grow results */
- int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
struct bpf_object *obj;
int err, cnt, i;
- int max_grow;
+ int max_grow, prog_fd;
+ /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__s390x__)
+ int tailroom = 512;
+#elif defined(__powerpc__)
+ int tailroom = 384;
+#else
+ int tailroom = 320;
+#endif
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.repeat = 1,
.data_in = &buf,
.data_out = &buf,
.data_size_in = 0, /* Per test */
.data_size_out = 0, /* Per test */
- };
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
/* Test case-64 */
@@ -84,58 +110,267 @@ void test_xdp_adjust_tail_grow2(void)
tattr.data_size_in = 64; /* Determine test case via pkt size */
tattr.data_size_out = 128; /* Limit copy_size */
/* Kernel side alloc packet memory area that is zero init */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
- CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
- || tattr.retval != XDP_TX
- || tattr.data_size_out != 192, /* Expected grow size */
- "case-64",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+ ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
/* Extra checks for data contents */
- CHECK_ATTR(tattr.data_size_out != 192
- || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
- || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
- || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
- "case-64-data",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */
+ ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+ ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+ ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+ ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+ ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
/* Test case-128 */
memset(buf, 2, sizeof(buf));
tattr.data_size_in = 128; /* Determine test case via pkt size */
tattr.data_size_out = sizeof(buf); /* Copy everything */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
- CHECK_ATTR(err
- || tattr.retval != XDP_TX
- || tattr.data_size_out != max_grow,/* Expect max grow size */
- "case-128",
- "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, max_grow);
+ ASSERT_OK(err, "case-128");
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
/* Extra checks for data content: Count grow size, will contain zeros */
for (i = 0, cnt = 0; i < sizeof(buf); i++) {
if (buf[i] == 0)
cnt++;
}
- CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
- || tattr.data_size_out != max_grow, /* Total grow size */
- "case-128-data",
- "err %d errno %d retval %d size %d grow-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, cnt);
+ ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
bpf_object__close(obj);
}
+static void test_xdp_adjust_frags_tail_shrink(void)
+{
+ const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* For the individual test cases, the first byte in the packet
+ * indicates which test will be run.
+ */
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+
+ /* Test case removing 10 bytes from last frag, NOT freeing it */
+ exp_size = 8990; /* 9000 - 10 */
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size");
+
+ /* Test case removing one of two pages, assuming 4K pages */
+ buf[0] = 1;
+ exp_size = 4900; /* 9000 - 4100 */
+
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-4Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size");
+
+ /* Test case removing two pages resulting in a linear xdp_buff */
+ buf[0] = 2;
+ exp_size = 800; /* 9000 - 8200 */
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-9Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_grow_4k(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(16384);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 16384);
+ exp_size = 9000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+
+ for (i = 0; i < 9000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ }
+
+ for (i = 9000; i < 9010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ }
+
+ for (i = 9010; i < 16384; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ }
+
+ /* Test a too large grow */
+ memset(buf, 1, 16384);
+ exp_size = 9001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 9001;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_grow_64k(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(262144);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 262144);
+ exp_size = 90000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 90000;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ for (i = 0; i < 90000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-old");
+ }
+
+ for (i = 90000; i < 90010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "90Kb+10b-new");
+ }
+
+ for (i = 90010; i < 262144; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched");
+ }
+
+ /* Test a too large grow */
+ memset(buf, 1, 262144);
+ exp_size = 90001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 90001;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
void test_xdp_adjust_tail(void)
{
+ int page_size = getpagesize();
+
if (test__start_subtest("xdp_adjust_tail_shrink"))
test_xdp_adjust_tail_shrink();
if (test__start_subtest("xdp_adjust_tail_grow"))
- test_xdp_adjust_tail_grow();
+ test_xdp_adjust_tail_grow(page_size == 65536);
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
+ if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+ test_xdp_adjust_frags_tail_shrink();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow")) {
+ if (page_size == 65536)
+ test_xdp_adjust_frags_tail_grow_64k();
+ else
+ test_xdp_adjust_frags_tail_grow_4k();
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 15ef3531483e..e6bcb6051402 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -1,86 +1,83 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_xdp_attach_fail.skel.h"
#define IFINDEX_LO 1
#define XDP_FLAGS_REPLACE (1U << 4)
-void test_xdp_attach(void)
+static void test_xdp_attach(const char *file)
{
__u32 duration = 0, id1, id2, id0 = 0, len;
struct bpf_object *obj1, *obj2, *obj3;
- const char *file = "./test_xdp.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
- DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
- .old_fd = -1);
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
len = sizeof(info);
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
if (CHECK_FAIL(err))
return;
- err = bpf_obj_get_info_by_fd(fd1, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd1, &info, &len);
if (CHECK_FAIL(err))
goto out_1;
id1 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
if (CHECK_FAIL(err))
goto out_1;
memset(&info, 0, sizeof(info));
- err = bpf_obj_get_info_by_fd(fd2, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd2, &info, &len);
if (CHECK_FAIL(err))
goto out_2;
id2 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
if (CHECK_FAIL(err))
goto out_2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
if (CHECK(err, "load_ok", "initial load failed"))
goto out_close;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id1, "id1_check",
"loaded prog id %u != id1 %u, err %d", id0, id1, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
goto out;
- opts.old_fd = fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+ opts.old_prog_fd = fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id2, "id2_check",
"loaded prog id %u != id2 %u, err %d", id0, id2, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
goto out;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
goto out;
- opts.old_fd = fd2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ opts.old_prog_fd = fd2;
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != 0, "unload_check",
"loaded prog id %u != 0, err %d", id0, err))
goto out_close;
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj3);
out_2:
@@ -88,3 +85,75 @@ out_2:
out_1:
bpf_object__close(obj1);
}
+
+#define ERRMSG_LEN 64
+
+struct xdp_errmsg {
+ char msg[ERRMSG_LEN];
+};
+
+static void on_xdp_errmsg(void *ctx, int cpu, void *data, __u32 size)
+{
+ struct xdp_errmsg *ctx_errmg = ctx, *tp_errmsg = data;
+
+ memcpy(&ctx_errmg->msg, &tp_errmsg->msg, ERRMSG_LEN);
+}
+
+static const char tgt_errmsg[] = "Invalid XDP flags for BPF link attachment";
+
+static void test_xdp_attach_fail(const char *file)
+{
+ struct test_xdp_attach_fail *skel = NULL;
+ struct xdp_errmsg errmsg = {};
+ struct perf_buffer *pb = NULL;
+ struct bpf_object *obj = NULL;
+ int err, fd_xdp;
+
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+
+ skel = test_xdp_attach_fail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_attach_fail__open_and_load"))
+ goto out_close;
+
+ err = test_xdp_attach_fail__attach(skel);
+ if (!ASSERT_EQ(err, 0, "test_xdp_attach_fail__attach"))
+ goto out_close;
+
+ /* set up perf buffer */
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.xdp_errmsg_pb), 1,
+ on_xdp_errmsg, NULL, &errmsg, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buffer__new"))
+ goto out_close;
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &fd_xdp);
+ if (!ASSERT_EQ(err, 0, "bpf_prog_test_load"))
+ goto out_close;
+
+ opts.flags = 0xFF; // invalid flags to fail to attach XDP prog
+ err = bpf_link_create(fd_xdp, IFINDEX_LO, BPF_XDP, &opts);
+ if (!ASSERT_EQ(err, -EINVAL, "bpf_link_create"))
+ goto out_close;
+
+ /* read perf buffer */
+ err = perf_buffer__poll(pb, 100);
+ if (!ASSERT_GT(err, -1, "perf_buffer__poll"))
+ goto out_close;
+
+ ASSERT_STRNEQ((const char *) errmsg.msg, tgt_errmsg,
+ 42 /* strlen(tgt_errmsg) */, "check error message");
+
+out_close:
+ perf_buffer__free(pb);
+ bpf_object__close(obj);
+ test_xdp_attach_fail__destroy(skel);
+}
+
+void serial_test_xdp_attach(void)
+{
+ if (test__start_subtest("xdp_attach"))
+ test_xdp_attach("./test_xdp.bpf.o");
+ if (test__start_subtest("xdp_attach_dynptr"))
+ test_xdp_attach("./test_xdp_dynptr.bpf.o");
+ if (test__start_subtest("xdp_attach_failed"))
+ test_xdp_attach_fail("./xdp_dummy.bpf.o");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644
index 000000000000..fb952703653e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <netinet/udp.h>
+#include <uapi/linux/netdev.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+ SYS(fail, "ip netns add ns_dst");
+ SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS(fail, "ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS(fail, "ip link set veth1_2 master bond1");
+ } else {
+ SYS(fail, "ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ struct iphdr iph = {};
+ struct udphdr uh = {};
+ uint8_t buf[128];
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = 16;
+ iph.id = 1;
+ iph.ttl = 64;
+ iph.protocol = IPPROTO_UDP;
+ iph.saddr = 1;
+ iph.daddr = 2;
+ iph.tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph.check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh.dest++;
+
+ if (vary_dst_ip)
+ iph.daddr++;
+
+ /* construct a packet */
+ memcpy(buf, &eh, sizeof(eh));
+ memcpy(buf + sizeof(eh), &iph, sizeof(iph));
+ memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh));
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond, err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_ERR_PTR(link2, "attach program to slave when master has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* test program unwinding with a non-XDP slave */
+ if (!ASSERT_OK(system("ip link add vxlan type vxlan id 1 remote 1.2.3.4 dstport 0 dev lo"),
+ "add vxlan"))
+ goto out;
+
+ err = system("ip link set vxlan master bond");
+ if (!ASSERT_OK(err, "set vxlan master"))
+ goto out;
+
+ /* attaching not allowed when one slave does not support XDP */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link, "attach program to master when slave does not support XDP"))
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+ system("ip link del vxlan");
+}
+
+/* Test with nested bonding devices to catch issue with negative jump label count */
+static void test_xdp_bonding_nested(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ int bond, err;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest1 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest1 master bond");
+ if (!ASSERT_OK(err, "set bond_nest1 master"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest2 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest2 master bond_nest1");
+ if (!ASSERT_OK(err, "set bond_nest2 master"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_OK_PTR(link, "attach program to master");
+
+out:
+ bpf_link__destroy(link);
+ system("ip link del bond");
+ system("ip link del bond_nest1");
+ system("ip link del bond_nest2");
+}
+
+static void test_xdp_bonding_features(struct skeletons *skeletons)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int bond_idx, veth1_idx, err;
+ struct bpf_link *link = NULL;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond_idx = if_nametoindex("bond");
+ if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* query default xdp-feature for bond device */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags, 0,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"),
+ "add veth{0,1} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"),
+ "add veth{2,3} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 master bond"),
+ "add veth0 to master bond"))
+ goto out;
+
+ /* xdp-feature for bond device should be obtained from the single slave
+ * device (veth0)
+ */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ veth1_idx = if_nametoindex("veth1");
+ if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog,
+ veth1_idx);
+ if (!ASSERT_OK_PTR(link, "attach program to veth1"))
+ goto out;
+
+ /* xdp-feature for veth0 are changed */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 master bond"),
+ "add veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ /* xdp-feature for bond device should be set to the most restrict
+ * value obtained from attached slave devices (veth0 and veth2)
+ */
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 nomaster"),
+ "del veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 nomaster"),
+ "del veth0 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ ASSERT_EQ(query_opts.feature_flags, 0,
+ "bond query_opts.feature_flags");
+out:
+ bpf_link__destroy(link);
+ system("ip link del veth0");
+ system("ip link del veth2");
+ system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void serial_test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ if (test__start_subtest("xdp_bonding_nested"))
+ test_xdp_bonding_nested(&skeletons);
+
+ if (test__start_subtest("xdp_bonding_features"))
+ test_xdp_bonding_features(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..76967d8ace9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,50 +10,110 @@ struct meta {
int pkt_len;
};
+struct test_ctx_s {
+ bool passed;
+ int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
- int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+ unsigned char *raw_pkt = data + sizeof(*meta);
+ struct test_ctx_s *tst_ctx = ctx;
+
+ ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+ ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+ ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+ ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+ "check_packet_content");
+
+ if (meta->pkt_len > sizeof(pkt_v4)) {
+ for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+ ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+ "check_packet_content");
+ }
+
+ tst_ctx->passed = true;
+}
- if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
- "check_size", "size %u < %zu\n",
- size, sizeof(pkt_v4) + sizeof(*meta)))
- return;
+#define BUF_SZ 9000
- if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
- "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+ struct test_xdp_bpf2bpf *ftrace_skel,
+ int pkt_size)
+{
+ __u8 *buf, *buf_in;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+ !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
return;
- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ buf_in = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
return;
- if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
- "check_packet_content", "content not the same\n"))
+ buf = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+ free(buf_in);
return;
+ }
+
+ test_ctx.passed = false;
+ test_ctx.pkt_size = pkt_size;
+
+ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+ if (pkt_size > sizeof(pkt_v4)) {
+ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+ buf_in[i + sizeof(pkt_v4)] = i;
+ }
+
+ /* Run test program */
+ topts.data_in = buf_in;
+ topts.data_size_in = pkt_size;
+ topts.data_out = buf;
+ topts.data_size_out = BUF_SZ;
+
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval");
+ ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size");
+
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
- *(bool *)ctx = true;
+ ASSERT_GE(err, 0, "perf_buffer__poll");
+ ASSERT_TRUE(test_ctx.passed, "test passed");
+ /* Verify test results */
+ ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+ "fentry result");
+ ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+ free(buf);
+ free(buf_in);
}
void test_xdp_bpf2bpf(void)
{
- __u32 duration = 0, retval, size;
- char buf[128];
int err, pkt_fd, map_fd;
- bool passed = false;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
- struct iptnl_info value4 = {.family = AF_INET};
+ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+ struct iptnl_info value4 = {.family = AF_INET6};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct bpf_program *prog;
struct perf_buffer *pb = NULL;
- struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+ if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
return;
pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -63,7 +123,7 @@ void test_xdp_bpf2bpf(void)
/* Load trace program */
ftrace_skel = test_xdp_bpf2bpf__open();
- if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
goto out;
/* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -78,52 +138,24 @@ void test_xdp_bpf2bpf(void)
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
err = test_xdp_bpf2bpf__load(ftrace_skel);
- if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
goto out;
err = test_xdp_bpf2bpf__attach(ftrace_skel);
- if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
goto out;
/* Set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
- 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+ on_sample, NULL, &test_ctx, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
- /* Run test program */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size))
- goto out;
-
- /* Make sure bpf_xdp_output() was triggered and it sent the expected
- * data to the perf ring buffer.
- */
- err = perf_buffer__poll(pb, 100);
- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
- goto out;
-
- CHECK_FAIL(!passed);
-
- /* Verify test results */
- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
- "result", "fentry failed err %llu\n",
- ftrace_skel->bss->test_result_fentry))
- goto out;
-
- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+ pkt_sizes[i]);
out:
- if (pb)
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644
index 000000000000..ee94c281888a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+#include "test_xdp_meta.skel.h"
+
+#define RX_NAME "veth0"
+#define TX_NAME "veth1"
+#define TX_NETNS "xdp_context_tx"
+#define RX_NETNS "xdp_context_rx"
+#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
+#define TAP_NETNS "xdp_context_tuntap"
+
+#define TEST_PAYLOAD_LEN 32
+static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+ __u32 data_meta, __u32 data, __u32 data_end,
+ __u32 ingress_ifindex, __u32 rx_queue_index,
+ __u32 egress_ifindex)
+{
+ struct xdp_md ctx = {
+ .data = data,
+ .data_end = data_end,
+ .data_meta = data_meta,
+ .ingress_ifindex = ingress_ifindex,
+ .rx_queue_index = rx_queue_index,
+ .egress_ifindex = egress_ifindex,
+ };
+ int err;
+
+ opts.ctx_in = &ctx;
+ opts.ctx_size_in = sizeof(ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+ ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+ struct test_xdp_context_test_run *skel = NULL;
+ char data[sizeof(pkt_v4) + sizeof(__u32)];
+ char bad_ctx[sizeof(struct xdp_md) + 1];
+ struct xdp_md ctx_in, ctx_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_out = &ctx_out,
+ .ctx_size_out = sizeof(ctx_out),
+ .repeat = 1,
+ );
+ int err, prog_fd;
+
+ skel = test_xdp_context_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+ /* Data past the end of the kernel's struct xdp_md must be 0 */
+ bad_ctx[sizeof(bad_ctx) - 1] = 1;
+ opts.ctx_in = bad_ctx;
+ opts.ctx_size_in = sizeof(bad_ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, E2BIG, "extradata-errno");
+ ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+ *(__u32 *)data = XDP_PASS;
+ *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+ opts.ctx_in = &ctx_in;
+ opts.ctx_size_in = sizeof(ctx_in);
+ memset(&ctx_in, 0, sizeof(ctx_in));
+ ctx_in.data_meta = 0;
+ ctx_in.data = sizeof(__u32);
+ ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run(valid)");
+ ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+ ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+ ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+ ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+ ASSERT_EQ(ctx_out.data, 0, "valid-data");
+ ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+ /* Meta data's size must be a multiple of 4 */
+ test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+ /* data_meta must reference the start of data */
+ test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+ 0, 0, 0);
+
+ /* Meta data must be 255 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
+
+ /* Total size of data must be data_end - data_meta or larger */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) + 1, 0, 0, 0);
+
+ /* RX queue cannot be specified without specifying an ingress */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 1, 0);
+
+ /* Interface 1 is always the loopback interface which always has only
+ * one RX queue (index 0). This makes index 1 an invalid rx queue index
+ * for interface 1.
+ */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 1, 1, 0);
+
+ /* The egress cannot be specified */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 0, 1);
+
+ test_xdp_context_test_run__destroy(skel);
+}
+
+static int send_test_packet(int ifindex)
+{
+ int n, sock = -1;
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+
+ /* We use the Ethernet header only to identify the test packet */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+
+ sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(sock, 0, "socket"))
+ goto err;
+
+ struct sockaddr_ll saddr = {
+ .sll_family = PF_PACKET,
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN
+ };
+ n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
+ sizeof(saddr));
+ if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
+ goto err;
+
+ close(sock);
+ return 0;
+
+err:
+ if (sock >= 0)
+ close(sock);
+ return -1;
+}
+
+static int write_test_packet(int tap_fd)
+{
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int n;
+
+ /* The Ethernet header is mostly not relevant. We use it to identify the
+ * test packet and some BPF helpers we exercise expect to operate on
+ * Ethernet frames carrying IP packets. Pretend that's the case.
+ */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ .h_proto = htons(ETH_P_IP),
+ };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+ return -1;
+
+ return 0;
+}
+
+static void dump_err_stream(const struct bpf_program *prog)
+{
+ char buf[512];
+ int ret;
+
+ ret = 0;
+ do {
+ ret = bpf_prog_stream_read(bpf_program__fd(prog),
+ BPF_STREAM_STDERR, buf, sizeof(buf),
+ NULL);
+ if (ret > 0)
+ fwrite(buf, sizeof(buf[0]), ret, stderr);
+ } while (ret > 0);
+}
+
+void test_xdp_context_veth(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+ struct bpf_program *tc_prog, *xdp_prog;
+ struct test_xdp_meta *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int rx_ifindex, tx_ifindex;
+ int ret;
+
+ tx_ns = netns_new(TX_NETNS, false);
+ if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+ return;
+
+ rx_ns = netns_new(RX_NETNS, false);
+ if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+ goto close;
+
+ SYS(close, "ip link add " RX_NAME " netns " RX_NETNS
+ " type veth peer name " TX_NAME " netns " TX_NETNS);
+
+ nstoken = open_netns(RX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+ goto close;
+
+ SYS(close, "ip link set dev " RX_NAME " up");
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto close;
+
+ rx_ifindex = if_nametoindex(RX_NAME);
+ if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx"))
+ goto close;
+
+ tc_hook.ifindex = rx_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls");
+ if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp");
+ if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog"))
+ goto close;
+
+ ret = bpf_xdp_attach(rx_ifindex,
+ bpf_program__fd(xdp_prog),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(TX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+ goto close;
+
+ SYS(close, "ip link set dev " TX_NAME " up");
+
+ tx_ifindex = if_nametoindex(TX_NAME);
+ if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
+ goto close;
+
+ skel->bss->test_pass = false;
+
+ ret = send_test_packet(tx_ifindex);
+ if (!ASSERT_OK(ret, "send_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
+
+close:
+ close_netns(nstoken);
+ test_xdp_meta__destroy(skel);
+ netns_free(rx_ns);
+ netns_free(tx_ns);
+}
+
+static void test_tuntap(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prio_1_prog,
+ struct bpf_program *tc_prio_2_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "create and open ns"))
+ return;
+
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = tap_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ if (tc_prio_2_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+ .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int dummy_ifindex;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ return;
+
+ /* Setup dummy interface */
+ SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+ SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+ dummy_ifindex = if_nametoindex(DUMMY_NAME);
+ if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = dummy_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ /* Setup TAP interface */
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ /* Copy all packets received from TAP to dummy ingress */
+ SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+ SYS(close, "tc filter add dev " TAP_NAME " ingress "
+ "protocol all matchall "
+ "action mirred ingress mirror dev " DUMMY_NAME);
+
+ /* Receive a packet on TAP */
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+void test_xdp_context_tuntap(void)
+{
+ struct test_xdp_meta *skel = NULL;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("data_meta"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_read"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_read,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_slice"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_slice,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_write"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_write,
+ skel->progs.ing_cls_dynptr_read,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_slice_rdwr"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_slice_rdwr,
+ skel->progs.ing_cls_dynptr_slice,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_offset"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_offset_wr,
+ skel->progs.ing_cls_dynptr_offset_rd,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_offset_oob"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_offset_oob,
+ skel->progs.ing_cls,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_survives_data_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_survives_data_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_survives_meta_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_survives_meta_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_survives_data_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_survives_meta_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
+ &skel->bss->test_pass);
+ /* Tests for BPF helpers which touch headroom */
+ if (test__start_subtest("helper_skb_vlan_push_pop"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_vlan_push_pop,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_adjust_room"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_adjust_room,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_head_tail"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_head_tail,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_proto"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_proto,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 0176573fe4e7..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -2,69 +2,148 @@
#include <uapi/linux/bpf.h>
#include <linux/if_link.h>
#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
#include "test_xdp_with_cpumap_helpers.skel.h"
#define IFINDEX_LO 1
+#define TEST_NS "cpu_attach_ns"
-void test_xdp_with_cpumap_helpers(void)
+static void test_xdp_with_cpumap_helpers(void)
{
- struct test_xdp_with_cpumap_helpers *skel;
+ struct test_xdp_with_cpumap_helpers *skel = NULL;
struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
struct bpf_cpumap_val val = {
.qsize = 192,
};
- __u32 duration = 0, idx = 0;
- __u32 len = sizeof(info);
- int err, prog_fd, map_fd;
+ int err, prog_fd, prog_redir_fd, map_fd;
+ struct nstoken *nstoken = NULL;
+ __u32 idx = 0;
+
+ SYS(out_close, "ip netns add %s", TEST_NS);
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto out_close;
+ SYS(out_close, "ip link set dev lo up");
skel = test_xdp_with_cpumap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_cpumap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
return;
- }
- /* can not attach program with cpumaps that allow programs
- * as xdp generic
- */
- prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
- "should have failed\n");
+ prog_redir_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_redir_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+ goto out_close;
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
- if (CHECK_FAIL(err))
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
- err, errno);
+ ASSERT_OK(err, "Add program to cpumap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
+
+ /* send a packet to trigger any potential bugs in there */
+ char data[ETH_HLEN] = {};
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = 1,
+ );
+ err = bpf_prog_test_run_opts(prog_redir_fd, &opts);
+ ASSERT_OK(err, "XDP test run");
+
+ /* wait for the packets to be flushed, then check that redirect has been
+ * performed
+ */
+ kern_sync_rcu();
+ ASSERT_NEQ(skel->bss->redirect_count, 0, "redirected packets");
+
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
/* can not attach BPF_XDP_CPUMAP program to a device */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
- "should have failed\n");
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+
+ /* Try to attach BPF_XDP program with frags to cpumap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
out_close:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
test_xdp_with_cpumap_helpers__destroy(skel);
}
+static void test_xdp_with_cpumap_frags_helpers(void)
+{
+ struct test_xdp_with_cpumap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ int err, frags_prog_fd, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+ return;
+
+ frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_prog_get_info_by_fd(frags_prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = frags_prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to cpumap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to cpumap entry prog_id");
+
+ /* Try to attach BPF_XDP program to cpumap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+ test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
void test_xdp_cpumap_attach(void)
{
- if (test__start_subtest("cpumap_with_progs"))
+ if (test__start_subtest("CPUMAP with programs in entries"))
test_xdp_with_cpumap_helpers();
+
+ if (test__start_subtest("CPUMAP with frags programs in entries"))
+ test_xdp_with_cpumap_frags_helpers();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
new file mode 100644
index 000000000000..7dd18c6d06c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#define LOCAL_NETNS "xdp_dev_bound_only_netns"
+
+static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags)
+{
+ struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.prog_flags = flags;
+ opts.prog_ifindex = ifindex;
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+/* A test case for bpf_offload_netdev->offload handling bug:
+ * - create a veth device (does not support offload);
+ * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag
+ * (such programs are not offloaded);
+ * - create a device bound XDP program without flags (such programs are offloaded).
+ * This might lead to 'BUG: kernel NULL pointer dereference'.
+ */
+void test_xdp_dev_bound_only_offdev(void)
+{
+ struct nstoken *tok = NULL;
+ __u32 ifindex;
+ int fd1 = -1;
+ int fd2 = -1;
+
+ SYS(out, "ip netns add " LOCAL_NETNS);
+ tok = open_netns(LOCAL_NETNS);
+ if (!ASSERT_OK_PTR(tok, "open_netns"))
+ goto out;
+ SYS(out, "ip link add eth42 type veth");
+ ifindex = if_nametoindex("eth42");
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) {
+ perror("if_nametoindex");
+ goto out;
+ }
+ fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY);
+ if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) {
+ perror("load_dummy_prog #1");
+ goto out;
+ }
+ /* Program with ifindex is considered offloaded, however veth
+ * does not support offload => error should be reported.
+ */
+ fd2 = load_dummy_prog("dummy2", ifindex, 0);
+ ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)");
+
+out:
+ close(fd1);
+ close(fd2);
+ close_netns(tok);
+ /* eth42 was added inside netns, removing the netns will
+ * also remove eth42 veth pair.
+ */
+ SYS_NOFAIL("ip netns del " LOCAL_NETNS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 88ef3ec8ac4c..a8ab05216c38 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -1,89 +1,275 @@
// SPDX-License-Identifier: GPL-2.0
+#include <arpa/inet.h>
#include <uapi/linux/bpf.h>
#include <linux/if_link.h>
+#include <network_helpers.h>
+#include <net/if.h>
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_devmap_tailcall.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
#define IFINDEX_LO 1
+#define TEST_NS "devmap_attach_ns"
-void test_xdp_with_devmap_helpers(void)
+static void test_xdp_with_devmap_helpers(void)
{
- struct test_xdp_with_devmap_helpers *skel;
+ struct test_xdp_with_devmap_helpers *skel = NULL;
struct bpf_prog_info info = {};
struct bpf_devmap_val val = {
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
- __u32 duration = 0, idx = 0;
- int err, dm_fd, map_fd;
+ int err, dm_fd, dm_fd_redir, map_fd;
+ struct nstoken *nstoken = NULL;
+ char data[ETH_HLEN] = {};
+ __u32 idx = 0;
+ SYS(out_close, "ip netns add %s", TEST_NS);
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto out_close;
+ SYS(out_close, "ip link set dev lo up");
skel = test_xdp_with_devmap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_devmap_helpers__open_and_load");
- return;
- }
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ goto out_close;
- /* can not attach program with DEVMAPs that allow programs
- * as xdp generic
- */
- dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte devmap",
- "should have failed\n");
+ dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd_redir, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+ goto out_close;
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
- err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
- if (CHECK_FAIL(err))
+ err = bpf_prog_get_info_by_fd(dm_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to devmap entry",
- "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "Add program to devmap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
+
+ /* send a packet to trigger any potential bugs in there */
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = 1,
+ );
+ err = bpf_prog_test_run_opts(dm_fd_redir, &opts);
+ ASSERT_OK(err, "XDP test run");
+
+ /* wait for the packets to be flushed */
+ kern_sync_rcu();
+
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
/* can not attach BPF_XDP_DEVMAP program to a device */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
- "should have failed\n");
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
+
+ /* Try to attach BPF_XDP program with frags to devmap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
out_close:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
test_xdp_with_devmap_helpers__destroy(skel);
}
-void test_neg_xdp_devmap_helpers(void)
+static void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
- __u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
- if (CHECK(skel,
- "Load of XDP program accessing egress ifindex without attach type",
- "should have failed\n")) {
+ if (!ASSERT_EQ(skel, NULL,
+ "Load of XDP program accessing egress ifindex without attach type")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
+static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev,
+ enum bpf_attach_type prog_tail,
+ bool expect_reject)
+{
+ struct test_xdp_devmap_tailcall *skel;
+ int err;
+
+ skel = test_xdp_devmap_tailcall__open();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open"))
+ return;
+
+ bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev);
+ bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail);
+
+ err = test_xdp_devmap_tailcall__load(skel);
+ if (expect_reject)
+ ASSERT_ERR(err, "test_xdp_devmap_tailcall__load");
+ else
+ ASSERT_OK(err, "test_xdp_devmap_tailcall__load");
+
+ test_xdp_devmap_tailcall__destroy(skel);
+}
-void test_xdp_devmap_attach(void)
+static void test_xdp_with_devmap_frags_helpers(void)
+{
+ struct test_xdp_with_devmap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ int err, dm_fd_frags, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ return;
+
+ dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_prog_get_info_by_fd(dm_fd_frags, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd_frags;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add frags program to devmap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to devmap entry prog_id");
+
+ /* Try to attach BPF_XDP program to devmap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+ test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
+
+static void test_xdp_with_devmap_helpers_veth(void)
+{
+ struct test_xdp_with_devmap_helpers *skel = NULL;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {};
+ struct nstoken *nstoken = NULL;
+ __u32 len = sizeof(info);
+ int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
+ char data[ETH_HLEN] = {};
+ __u32 idx = 0;
+
+ SYS(out_close, "ip netns add %s", TEST_NS);
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto out_close;
+
+ SYS(out_close, "ip link add veth_src type veth peer name veth_dst");
+ SYS(out_close, "ip link set dev veth_src up");
+ SYS(out_close, "ip link set dev veth_dst up");
+
+ val.ifindex = if_nametoindex("veth_src");
+ ifindex_dst = if_nametoindex("veth_dst");
+ if (!ASSERT_NEQ(val.ifindex, 0, "val.ifindex") ||
+ !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+ goto out_close;
+
+ skel = test_xdp_with_devmap_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ goto out_close;
+
+ dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_xdp_attach(val.ifindex, dm_fd_redir, XDP_FLAGS_DRV_MODE, NULL);
+ if (!ASSERT_OK(err, "Attach of program with 8-byte devmap"))
+ goto out_close;
+
+ dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_prog_get_info_by_fd(dm_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to devmap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
+
+ /* attach dummy to other side to enable reception */
+ dm_fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_xdp_attach(ifindex_dst, dm_fd, XDP_FLAGS_DRV_MODE, NULL);
+ if (!ASSERT_OK(err, "Attach of dummy XDP"))
+ goto out_close;
+
+ /* send a packet to trigger any potential bugs in there */
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = 1,
+ );
+ err = bpf_prog_test_run_opts(dm_fd_redir, &opts);
+ ASSERT_OK(err, "XDP test run");
+
+ /* wait for the packets to be flushed */
+ kern_sync_rcu();
+
+ err = bpf_xdp_detach(val.ifindex, XDP_FLAGS_DRV_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
+
+ err = bpf_xdp_detach(ifindex_dst, XDP_FLAGS_DRV_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
+
+out_close:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del %s", TEST_NS);
+ test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void serial_test_xdp_devmap_attach(void)
{
if (test__start_subtest("DEVMAP with programs in entries"))
test_xdp_with_devmap_helpers();
- if (test__start_subtest("Verifier check of DEVMAP programs"))
+ if (test__start_subtest("DEVMAP with frags programs in entries"))
+ test_xdp_with_devmap_frags_helpers();
+
+ if (test__start_subtest("Verifier check of DEVMAP programs")) {
test_neg_xdp_devmap_helpers();
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false);
+ test_xdp_devmap_tailcall(0, 0, true);
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true);
+ test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true);
+ }
+
+ if (test__start_subtest("DEVMAP with programs in entries on veth"))
+ test_xdp_with_devmap_helpers_veth();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
new file mode 100644
index 000000000000..dd34b0cc4b4e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_link.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <netinet/udp.h>
+#include <bpf/bpf_endian.h>
+#include <uapi/linux/netdev.h>
+#include "test_xdp_do_redirect.skel.h"
+#include "xdp_dummy.skel.h"
+
+struct udp_packet {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct udphdr udp;
+ __u8 payload[64 - sizeof(struct udphdr)
+ - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
+} __packed;
+
+static struct udp_packet pkt_udp = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+ .eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
+ .iph.version = 6,
+ .iph.nexthdr = IPPROTO_UDP,
+ .iph.payload_len = bpf_htons(sizeof(struct udp_packet)
+ - offsetof(struct udp_packet, udp)),
+ .iph.hop_limit = 2,
+ .iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
+ .iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
+ .udp.source = bpf_htons(1),
+ .udp.dest = bpf_htons(1),
+ .udp.len = bpf_htons(sizeof(struct udp_packet)
+ - offsetof(struct udp_packet, udp)),
+ .payload = {0x42}, /* receiver XDP program matches on this */
+};
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+/* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
+ * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
+ * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
+ */
+#if defined(__s390x__)
+#define MAX_PKT_SIZE 3216
+#else
+#define MAX_PKT_SIZE 3408
+#endif
+
+#define PAGE_SIZE_4K 4096
+#define PAGE_SIZE_64K 65536
+
+static void test_max_pkt_size(int fd)
+{
+ char data[PAGE_SIZE_64K + 1] = {};
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = 1,
+ );
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K;
+ else
+ opts.data_size_in = MAX_PKT_SIZE;
+
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "prog_run_max_size");
+
+ opts.data_size_in += 1;
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_EQ(err, -EINVAL, "prog_run_too_big");
+}
+
+#define NUM_PKTS 10000
+void test_xdp_do_redirect(void)
+{
+ int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
+ char data[sizeof(pkt_udp) + sizeof(__u64)];
+ struct test_xdp_do_redirect *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ struct bpf_link *link;
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ struct xdp_md ctx_in = { .data = sizeof(__u64),
+ .data_end = sizeof(data) };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_in = &ctx_in,
+ .ctx_size_in = sizeof(ctx_in),
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = NUM_PKTS,
+ .batch_size = 64,
+ );
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
+ ((__u32 *)data)[0] = 0x42; /* metadata test value */
+ ((__u32 *)data)[1] = 0;
+
+ skel = test_xdp_do_redirect__open();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ /* The XDP program we run with bpf_prog_run() will cycle through all
+ * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
+ * ending up with PASS, so we should end up with two packets on the dst
+ * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
+ * payload.
+ */
+ SYS(out, "ip netns add testns");
+ nstoken = open_netns("testns");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto out;
+
+ SYS(out, "ip link add veth_src type veth peer name veth_dst");
+ SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
+ SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
+ SYS(out, "ip link set dev veth_src up");
+ SYS(out, "ip link set dev veth_dst up");
+ SYS(out, "ip addr add dev veth_src fc00::1/64");
+ SYS(out, "ip addr add dev veth_dst fc00::2/64");
+ SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+
+ /* We enable forwarding in the test namespace because that will cause
+ * the packets that go through the kernel stack (with XDP_PASS) to be
+ * forwarded back out the same interface (because of the packet dst
+ * combined with the interface addresses). When this happens, the
+ * regular forwarding path will end up going through the same
+ * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
+ * deadlock if it happens on the same CPU. There's a local_bh_disable()
+ * in the test_run code to prevent this, but an earlier version of the
+ * code didn't have this, so we keep the test behaviour to make sure the
+ * bug doesn't resurface.
+ */
+ SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
+
+ ifindex_src = if_nametoindex("veth_src");
+ ifindex_dst = if_nametoindex("veth_dst");
+ if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
+ !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+ goto out;
+
+ /* Check xdp features supported by veth driver */
+ err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_src bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "veth_src query_opts.feature_flags"))
+ goto out;
+
+ err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "veth_dst query_opts.feature_flags"))
+ goto out;
+
+ /* Enable GRO */
+ SYS(out, "ethtool -K veth_src gro on");
+ SYS(out, "ethtool -K veth_dst gro on");
+
+ err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "veth_src query_opts.feature_flags gro on"))
+ goto out;
+
+ err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "veth_dst query_opts.feature_flags gro on"))
+ goto out;
+
+ memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
+ skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
+ skel->rodata->ifindex_in = ifindex_src;
+ ctx_in.ingress_ifindex = ifindex_src;
+ tc_hook.ifindex = ifindex_src;
+
+ if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto out;
+ skel->links.xdp_count_pkts = link;
+
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
+ if (attach_tc_prog(&tc_hook, tc_prog_fd))
+ goto out;
+
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
+ err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
+ if (!ASSERT_OK(err, "prog_run"))
+ goto out_tc;
+
+ /* wait for the packets to be flushed */
+ kern_sync_rcu();
+
+ /* There will be one packet sent through XDP_REDIRECT and one through
+ * XDP_TX; these will show up on the XDP counting program, while the
+ * rest will be counted at the TC ingress hook (and the counting program
+ * resets the packet payload so they don't get counted twice even though
+ * they are re-xmited out the veth device
+ */
+ ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
+ ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
+ ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
+
+ test_max_pkt_size(bpf_program__fd(skel->progs.xdp_count_pkts));
+
+out_tc:
+ bpf_tc_hook_destroy(&tc_hook);
+out:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del testns");
+ test_xdp_do_redirect__destroy(skel);
+}
+
+#define NS_NB 3
+#define NS0 "NS0"
+#define NS1 "NS1"
+#define NS2 "NS2"
+#define IPV4_NETWORK "10.1.1"
+#define VETH1_INDEX 111
+#define VETH2_INDEX 222
+
+struct test_data {
+ struct netns_obj *ns[NS_NB];
+ u32 xdp_flags;
+};
+
+static void cleanup(struct test_data *data)
+{
+ int i;
+
+ for (i = 0; i < NS_NB; i++)
+ netns_free(data->ns[i]);
+}
+
+/**
+ * ping_setup -
+ * Create two veth peers and forward packets in-between using XDP
+ *
+ * ------------ ------------
+ * | NS1 | | NS2 |
+ * | veth0 | | veth0 |
+ * | 10.1.1.1 | | 10.1.1.2 |
+ * -----|------ ------|-----
+ * | |
+ * | |
+ * -----|-----------------------|-------
+ * | veth1 veth2 |
+ * | (id:111) (id:222) |
+ * | | | |
+ * | ----- xdp forwarding ----- |
+ * | |
+ * | NS0 |
+ * -------------------------------------
+ */
+static int ping_setup(struct test_data *data)
+{
+ int i;
+
+ data->ns[0] = netns_new(NS0, false);
+ if (!ASSERT_OK_PTR(data->ns[0], "create ns"))
+ return -1;
+
+ for (i = 1; i < NS_NB; i++) {
+ char ns_name[4] = {};
+
+ snprintf(ns_name, 4, "NS%d", i);
+ data->ns[i] = netns_new(ns_name, false);
+ if (!ASSERT_OK_PTR(data->ns[i], "create ns"))
+ goto fail;
+
+ SYS(fail,
+ "ip -n %s link add veth%d index %d%d%d type veth peer name veth0 netns %s",
+ NS0, i, i, i, i, ns_name);
+ SYS(fail, "ip -n %s link set veth%d up", NS0, i);
+
+ SYS(fail, "ip -n %s addr add %s.%d/24 dev veth0", ns_name, IPV4_NETWORK, i);
+ SYS(fail, "ip -n %s link set veth0 up", ns_name);
+ }
+
+ return 0;
+
+fail:
+ cleanup(data);
+ return -1;
+}
+
+static void ping_test(struct test_data *data)
+{
+ struct test_xdp_do_redirect *skel = NULL;
+ struct xdp_dummy *skel_dummy = NULL;
+ struct nstoken *nstoken = NULL;
+ int i, ret;
+
+ skel_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skel_dummy, "open and load xdp_dummy skeleton"))
+ goto close;
+
+ for (i = 1; i < NS_NB; i++) {
+ char ns_name[4] = {};
+
+ snprintf(ns_name, 4, "NS%d", i);
+ nstoken = open_netns(ns_name);
+ if (!ASSERT_OK_PTR(nstoken, "open ns"))
+ goto close;
+
+ ret = bpf_xdp_attach(if_nametoindex("veth0"),
+ bpf_program__fd(skel_dummy->progs.xdp_dummy_prog),
+ data->xdp_flags, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach dummy_prog"))
+ goto close;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+ }
+
+ skel = test_xdp_do_redirect__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto close;
+
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto close;
+
+ ret = bpf_xdp_attach(VETH2_INDEX,
+ bpf_program__fd(skel->progs.xdp_redirect_to_111),
+ data->xdp_flags, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ ret = bpf_xdp_attach(VETH1_INDEX,
+ bpf_program__fd(skel->progs.xdp_redirect_to_222),
+ data->xdp_flags, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+ goto close;
+
+ SYS(close, "ping -c 1 %s.2 > /dev/null", IPV4_NETWORK);
+
+close:
+ close_netns(nstoken);
+ xdp_dummy__destroy(skel_dummy);
+ test_xdp_do_redirect__destroy(skel);
+}
+
+
+static void xdp_redirect_ping(u32 xdp_flags)
+{
+ struct test_data data = {};
+
+ if (ping_setup(&data) < 0)
+ return;
+
+ data.xdp_flags = xdp_flags;
+ ping_test(&data);
+ cleanup(&data);
+}
+
+void test_xdp_index_redirect(void)
+{
+ if (test__start_subtest("noflag"))
+ xdp_redirect_ping(0);
+
+ if (test__start_subtest("drvflag"))
+ xdp_redirect_ping(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("skbflag"))
+ xdp_redirect_ping(XDP_FLAGS_SKB_MODE);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
new file mode 100644
index 000000000000..3f9146d83d79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+#include <linux/if_link.h>
+#include <netinet/udp.h>
+#include <net/if.h>
+#include <unistd.h>
+
+#include "xdp_flowtable.skel.h"
+
+#define TX_NETNS_NAME "ns0"
+#define RX_NETNS_NAME "ns1"
+
+#define TX_NAME "v0"
+#define FORWARD_NAME "v1"
+#define RX_NAME "d0"
+
+#define TX_MAC "00:00:00:00:00:01"
+#define FORWARD_MAC "00:00:00:00:00:02"
+#define RX_MAC "00:00:00:00:00:03"
+#define DST_MAC "00:00:00:00:00:04"
+
+#define TX_ADDR "10.0.0.1"
+#define FORWARD_ADDR "10.0.0.2"
+#define RX_ADDR "20.0.0.1"
+#define DST_ADDR "20.0.0.2"
+
+#define PREFIX_LEN "8"
+#define N_PACKETS 10
+#define UDP_PORT 12345
+#define UDP_PORT_STR "12345"
+
+static int send_udp_traffic(void)
+{
+ struct sockaddr_storage addr;
+ int i, sock;
+
+ if (make_sockaddr(AF_INET, DST_ADDR, UDP_PORT, &addr, NULL))
+ return -EINVAL;
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0)
+ return sock;
+
+ for (i = 0; i < N_PACKETS; i++) {
+ unsigned char buf[] = { 0xaa, 0xbb, 0xcc };
+ int n;
+
+ n = sendto(sock, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (n != sizeof(buf)) {
+ close(sock);
+ return -EINVAL;
+ }
+
+ usleep(50000); /* 50ms */
+ }
+ close(sock);
+
+ return 0;
+}
+
+void test_xdp_flowtable(void)
+{
+ struct xdp_flowtable *skel = NULL;
+ struct nstoken *tok = NULL;
+ int iifindex, stats_fd;
+ __u32 value, key = 0;
+ struct bpf_link *link;
+
+ if (SYS_NOFAIL("nft -v")) {
+ fprintf(stdout, "Missing required nft tool\n");
+ test__skip();
+ return;
+ }
+
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
+
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "sysctl -qw net.ipv4.conf.all.forwarding=1");
+
+ SYS(out, "ip link add " TX_NAME " type veth peer " FORWARD_NAME);
+ SYS(out, "ip link set " TX_NAME " netns " TX_NETNS_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " address " FORWARD_MAC);
+ SYS(out,
+ "ip addr add " FORWARD_ADDR "/" PREFIX_LEN " dev " FORWARD_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " up");
+
+ SYS(out, "ip link add " RX_NAME " type dummy");
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+ SYS(out, "ip link set dev " RX_NAME " up");
+
+ /* configure the flowtable */
+ SYS(out, "nft add table ip filter");
+ SYS(out,
+ "nft add flowtable ip filter f { hook ingress priority 0\\; "
+ "devices = { " FORWARD_NAME ", " RX_NAME " }\\; }");
+ SYS(out,
+ "nft add chain ip filter forward "
+ "{ type filter hook forward priority 0\\; }");
+ SYS(out,
+ "nft add rule ip filter forward ip protocol udp th dport "
+ UDP_PORT_STR " flow add @f");
+
+ /* Avoid ARP calls */
+ SYS(out,
+ "ip -4 neigh add " DST_ADDR " lladdr " DST_MAC " dev " RX_NAME);
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
+ SYS(out, "ip link set dev " TX_NAME " up");
+ SYS(out, "ip route add default via " FORWARD_ADDR);
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ iifindex = if_nametoindex(FORWARD_NAME);
+ if (!ASSERT_NEQ(iifindex, 0, "iifindex"))
+ goto out;
+
+ skel = xdp_flowtable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skel->progs.xdp_flowtable_do_lookup,
+ iifindex);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ if (!ASSERT_OK(send_udp_traffic(), "send udp"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ stats_fd = bpf_map__fd(skel->maps.stats);
+ if (!ASSERT_OK(bpf_map_lookup_elem(stats_fd, &key, &value),
+ "bpf_map_update_elem stats"))
+ goto out;
+
+ ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed");
+out:
+ xdp_flowtable__destroy(skel);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index d2d7a283d72f..1dbddcab87a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -4,23 +4,24 @@
#define IFINDEX_LO 1
-void test_xdp_info(void)
+void serial_test_xdp_info(void)
{
__u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
- const char *file = "./xdp_dummy.o";
+ const char *file = "./xdp_dummy.bpf.o";
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
struct bpf_prog_info info = {};
struct bpf_object *obj;
int err, prog_fd;
/* Get prog_id for XDP_ATTACHED_NONE mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
return;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -29,40 +30,47 @@ void test_xdp_info(void)
/* Setup prog */
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
goto out_close;
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
goto out_close;
/* Get prog_id for single prog mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
goto out;
+ /* Check xdp features supported by lo device */
+ opts.feature_flags = ~0;
+ err = bpf_xdp_query(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &opts);
+ if (!ASSERT_OK(err, "bpf_xdp_query"))
+ goto out;
+
+ ASSERT_EQ(opts.feature_flags, 0, "opts.feature_flags");
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 6f814999b395..e7e9f3c22edf 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -6,96 +6,96 @@
#define IFINDEX_LO 1
-void test_xdp_link(void)
+void serial_test_xdp_link(void)
{
- __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
- DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct bpf_link_info link_info;
struct bpf_prog_info prog_info;
struct bpf_link *link;
+ int err;
__u32 link_info_len = sizeof(link_info);
__u32 prog_info_len = sizeof(prog_info);
skel1 = test_xdp_link__open_and_load();
- if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "skel_load"))
goto cleanup;
prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
skel2 = test_xdp_link__open_and_load();
- if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "skel_load"))
goto cleanup;
prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
memset(&prog_info, 0, sizeof(prog_info));
- err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ err = bpf_prog_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "fd_info1"))
goto cleanup;
id1 = prog_info.id;
memset(&prog_info, 0, sizeof(prog_info));
- err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
- if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ err = bpf_prog_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "fd_info2"))
goto cleanup;
id2 = prog_info.id;
/* set initial prog attachment */
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "fd_attach"))
goto cleanup;
/* validate prog ID */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
/* best-effort detach prog */
- opts.old_fd = prog_fd1;
- bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ opts.old_prog_fd = prog_fd1;
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
goto cleanup;
}
/* detach BPF program */
- opts.old_fd = prog_fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(err, "prog_detach", "failed %d\n", err))
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "prog_detach"))
goto cleanup;
/* now BPF link should attach successfully */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel1->links.xdp_handler = link;
/* validate prog ID */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id1, "id1_check",
- "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
goto cleanup;
/* BPF prog attach is not allowed to replace BPF link */
- opts.old_fd = prog_fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
- if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_ERR(err, "prog_attach_fail"))
goto cleanup;
/* Can't force-update when BPF link is active */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
- if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
+ if (!ASSERT_ERR(err, "prog_update_fail"))
goto cleanup;
/* Can't force-detach when BPF link is active */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
- if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
+ if (!ASSERT_ERR(err, "prog_detach_fail"))
goto cleanup;
/* BPF link is not allowed to replace another BPF link */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
goto cleanup;
}
@@ -105,45 +105,46 @@ void test_xdp_link(void)
/* new link attach should succeed */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel2->links.xdp_handler = link;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
- if (CHECK(err || id0 != id2, "id2_check",
- "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
goto cleanup;
/* updating program under active BPF link works as expected */
err = bpf_link__update_program(link, skel1->progs.xdp_handler);
- if (CHECK(err, "link_upd", "failed: %d\n", err))
+ if (!ASSERT_OK(err, "link_upd"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
- err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+ &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "link_info"))
goto cleanup;
- CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
- "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
- CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+ /* updating program under active BPF link with different type fails */
+ err = bpf_link__update_program(link, skel1->progs.tc_handler);
+ if (!ASSERT_ERR(err, "link_upd_invalid"))
+ goto cleanup;
err = bpf_link__detach(link);
- if (CHECK(err, "link_detach", "failed %d\n", err))
+ if (!ASSERT_OK(err, "link_detach"))
goto cleanup;
memset(&link_info, 0, sizeof(link_info));
- err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
- if (CHECK(err, "link_info", "failed: %d\n", err))
- goto cleanup;
- CHECK(link_info.prog_id != id1, "link_prog_id",
- "got %u != exp %u\n", link_info.prog_id, id1);
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+ &link_info, &link_info_len);
+
+ ASSERT_OK(err, "link_info");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
/* ifindex should be zeroed out */
- CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
- "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
cleanup:
test_xdp_link__destroy(skel1);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
new file mode 100644
index 000000000000..19f92affc2da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_metadata.skel.h"
+#include "xdp_metadata2.skel.h"
+#include "xdp_metadata.h"
+#include "xsk.h"
+
+#include <bpf/btf.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <netinet/udp.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <poll.h>
+
+#define TX_NAME "veTX"
+#define RX_NAME "veRX"
+
+#define UDP_PAYLOAD_BYTES 4
+
+#define UDP_SOURCE_PORT 1234
+#define AF_XDP_CONSUMER_PORT 8080
+
+#define UMEM_NUM 16
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS XDP_FLAGS_DRV_MODE
+#define QUEUE_ID 0
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define PREFIX_LEN "8"
+#define FAMILY AF_INET
+#define TX_NETNS_NAME "xdp_metadata_tx"
+#define RX_NETNS_NAME "xdp_metadata_rx"
+#define TX_MAC "00:00:00:00:00:01"
+#define RX_MAC "00:00:00:00:00:02"
+
+#define VLAN_ID 59
+#define VLAN_PROTO "802.1Q"
+#define VLAN_PID htons(ETH_P_8021Q)
+#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID)
+
+#define XDP_RSS_TYPE_L4 BIT(3)
+#define VLAN_VID_MASK 0xfff
+
+struct xsk {
+ void *umem_area;
+ struct xsk_umem *umem;
+ struct xsk_ring_prod fill;
+ struct xsk_ring_cons comp;
+ struct xsk_ring_prod tx;
+ struct xsk_ring_cons rx;
+ struct xsk_socket *socket;
+};
+
+static int open_xsk(int ifindex, struct xsk *xsk)
+{
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ const struct xsk_socket_config socket_config = {
+ .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .bind_flags = XDP_COPY,
+ };
+ const struct xsk_umem_config umem_config = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM |
+ XDP_UMEM_TX_METADATA_LEN,
+ .tx_metadata_len = sizeof(struct xsk_tx_metadata),
+ };
+ __u32 idx;
+ u64 addr;
+ int ret;
+ int i;
+
+ xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
+ return -1;
+
+ ret = xsk_umem__create(&xsk->umem,
+ xsk->umem_area, UMEM_SIZE,
+ &xsk->fill,
+ &xsk->comp,
+ &umem_config);
+ if (!ASSERT_OK(ret, "xsk_umem__create"))
+ return ret;
+
+ ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
+ xsk->umem,
+ &xsk->rx,
+ &xsk->tx,
+ &socket_config);
+ if (!ASSERT_OK(ret, "xsk_socket__create"))
+ return ret;
+
+ /* First half of umem is for TX. This way address matches 1-to-1
+ * to the completion queue index.
+ */
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = i * UMEM_FRAME_SIZE;
+ printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+ }
+
+ /* Second half of umem is for RX. */
+
+ ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+ if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
+ return ret;
+ if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
+ return -1;
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+ printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
+ }
+ xsk_ring_prod__submit(&xsk->fill, ret);
+
+ return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+ if (xsk->umem)
+ xsk_umem__delete(xsk->umem);
+ if (xsk->socket)
+ xsk_socket__delete(xsk->socket);
+ munmap(xsk->umem_area, UMEM_SIZE);
+}
+
+static int generate_packet(struct xsk *xsk, __u16 dst_port)
+{
+ struct xsk_tx_metadata *meta;
+ struct xdp_desc *tx_desc;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ void *data;
+ __u32 idx;
+ int ret;
+
+ ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+ if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
+ return -1;
+
+ tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+ tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
+ printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
+ data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+ meta = data - sizeof(struct xsk_tx_metadata);
+ memset(meta, 0, sizeof(*meta));
+ meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
+ eth = data;
+ iph = (void *)(eth + 1);
+ udph = (void *)(iph + 1);
+
+ memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
+ memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
+ eth->h_proto = htons(ETH_P_IP);
+
+ iph->version = 0x4;
+ iph->ihl = 0x5;
+ iph->tos = 0x9;
+ iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
+ iph->id = 0;
+ iph->frag_off = 0;
+ iph->ttl = 0;
+ iph->protocol = IPPROTO_UDP;
+ ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
+ ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
+ iph->check = build_ip_csum(iph);
+
+ udph->source = htons(UDP_SOURCE_PORT);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+
+ memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
+
+ meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+ meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+ meta->request.csum_offset = offsetof(struct udphdr, check);
+
+ tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
+ tx_desc->options |= XDP_TX_METADATA;
+ xsk_ring_prod__submit(&xsk->tx, 1);
+
+ ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (!ASSERT_GE(ret, 0, "sendto"))
+ return ret;
+
+ return 0;
+}
+
+static int generate_packet_inet(void)
+{
+ char udp_payload[UDP_PAYLOAD_BYTES];
+ struct sockaddr_in rx_addr;
+ int sock_fd, err = 0;
+
+ /* Build a packet */
+ memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES);
+ rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR);
+ rx_addr.sin_family = AF_INET;
+ rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT);
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)"))
+ return sock_fd;
+
+ err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT,
+ (void *)&rx_addr, sizeof(rx_addr));
+ ASSERT_GE(err, 0, "sendto");
+
+ close(sock_fd);
+ return err;
+}
+
+static void complete_tx(struct xsk *xsk)
+{
+ struct xsk_tx_metadata *meta;
+ __u64 addr;
+ void *data;
+ __u32 idx;
+
+ if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
+ addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+
+ printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+ meta = data - sizeof(struct xsk_tx_metadata);
+
+ ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp");
+
+ xsk_ring_cons__release(&xsk->comp, 1);
+ }
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+ __u32 idx;
+
+ if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
+ printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+ xsk_ring_prod__submit(&xsk->fill, 1);
+ }
+}
+
+static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
+{
+ const struct xdp_desc *rx_desc;
+ struct pollfd fds = {};
+ struct xdp_meta *meta;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ __u64 comp_addr;
+ void *data;
+ __u64 addr;
+ __u32 idx = 0;
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (!ASSERT_EQ(ret, 0, "recvfrom"))
+ return -1;
+
+ fds.fd = xsk_socket__fd(xsk->socket);
+ fds.events = POLLIN;
+
+ ret = poll(&fds, 1, 1000);
+ if (!ASSERT_GT(ret, 0, "poll"))
+ return -1;
+
+ ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+ if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
+ return -2;
+
+ rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+ comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+ addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+ printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
+ xsk, idx, rx_desc->addr, addr, comp_addr);
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+
+ /* Make sure we got the packet offset correctly. */
+
+ eth = data;
+ ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
+ iph = (void *)(eth + 1);
+ ASSERT_EQ((int)iph->version, 4, "iph->version");
+ udph = (void *)(iph + 1);
+
+ /* custom metadata */
+
+ meta = data - sizeof(struct xdp_meta);
+
+ if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
+ return -1;
+
+ if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
+ return -1;
+
+ if (!sent_from_af_xdp) {
+ if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto"))
+ return -1;
+ goto done;
+ }
+
+ ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
+
+ /* checksum offload */
+ ASSERT_EQ(udph->check, htons(0x721c), "csum");
+
+done:
+ xsk_ring_cons__release(&xsk->rx, 1);
+ refill_rx(xsk, comp_addr);
+
+ return 0;
+}
+
+static void switch_ns_to_rx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(RX_NETNS_NAME);
+}
+
+static void switch_ns_to_tx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(TX_NETNS_NAME);
+}
+
+void test_xdp_metadata(void)
+{
+ struct xdp_metadata2 *bpf_obj2 = NULL;
+ struct xdp_metadata *bpf_obj = NULL;
+ struct bpf_program *new_prog, *prog;
+ struct bpf_devmap_val devmap_e = {};
+ struct bpf_map *prog_arr, *devmap;
+ struct nstoken *tok = NULL;
+ __u32 queue_id = QUEUE_ID;
+ struct xsk tx_xsk = {};
+ struct xsk rx_xsk = {};
+ __u32 val, key = 0;
+ int retries = 10;
+ int rx_ifindex;
+ int tx_ifindex;
+ int sock_fd;
+ int ret;
+
+ /* Setup new networking namespaces, with a veth pair. */
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
+
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+ SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
+ " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
+ SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
+
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
+ SYS(out, "ip link set dev " TX_NAME " up");
+
+ SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN
+ " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID));
+ SYS(out, "ip link set dev " TX_NAME_VLAN " up");
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN);
+
+ /* Avoid ARP calls */
+ SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
+
+ switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
+
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+ SYS(out, "ip link set dev " RX_NAME " up");
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+
+ rx_ifindex = if_nametoindex(RX_NAME);
+
+ /* Setup separate AF_XDP for RX interface. */
+
+ ret = open_xsk(rx_ifindex, &rx_xsk);
+ if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
+ goto out;
+
+ bpf_obj = xdp_metadata__open();
+ if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+ bpf_program__set_ifindex(prog, rx_ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ /* Make sure we can load a dev-bound program that performs
+ * XDP_REDIRECT into a devmap.
+ */
+ new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+ bpf_program__set_ifindex(new_prog, rx_ifindex);
+ bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
+ goto out;
+
+ /* Make sure we can't add dev-bound programs to prog maps. */
+ prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
+ if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
+ goto out;
+
+ val = bpf_program__fd(prog);
+ if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY),
+ "update prog_arr"))
+ goto out;
+
+ /* Make sure we can't add dev-bound programs to devmaps. */
+ devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+ if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+ goto out;
+
+ devmap_e.bpf_prog.fd = val;
+ if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+ &devmap_e, sizeof(devmap_e),
+ BPF_ANY),
+ "update dev_map"))
+ goto out;
+
+ /* Attach BPF program to RX interface. */
+
+ ret = bpf_xdp_attach(rx_ifindex,
+ bpf_program__fd(bpf_obj->progs.rx),
+ XDP_FLAGS, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto out;
+
+ sock_fd = xsk_socket__fd(rx_xsk.socket);
+ ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+ if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
+
+ /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
+ tx_ifindex = if_nametoindex(TX_NAME);
+ ret = open_xsk(tx_ifindex, &tx_xsk);
+ if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
+ goto out;
+
+ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+ "generate AF_XDP_CONSUMER_PORT"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
+
+ /* Verify packet sent from AF_XDP has proper metadata. */
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
+ "verify_xsk_metadata"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
+ complete_tx(&tx_xsk);
+
+ /* Now check metadata of packet, generated with network stack */
+ if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
+
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
+ "verify_xsk_metadata"))
+ goto out;
+
+ /* Make sure freplace correctly picks up original bound device
+ * and doesn't crash.
+ */
+
+ bpf_obj2 = xdp_metadata2__open();
+ if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
+ goto out;
+
+ new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
+ bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
+
+ if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
+ goto out;
+
+ if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
+
+ /* Send packet to trigger . */
+ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+ "generate freplace packet"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
+
+ while (!retries--) {
+ if (bpf_obj2->bss->called)
+ break;
+ usleep(10);
+ }
+ ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
+
+out:
+ close_xsk(&rx_xsk);
+ close_xsk(&tx_xsk);
+ xdp_metadata2__destroy(bpf_obj2);
+ xdp_metadata__destroy(bpf_obj);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 0281095de266..92ef0aa50866 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -25,43 +25,49 @@ void test_xdp_noinline(void)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration = 0, retval, size;
int err, i;
__u64 bytes = 0, pkts = 0;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
skel = test_xdp_noinline__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
- NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts);
+ ASSERT_OK(err, "ipv4 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic");
- err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
- NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_out = buf;
+ topts.data_size_out = sizeof(buf);
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic");
bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
- "stats", "bytes %lld pkts %lld\n",
- (unsigned long long)bytes, (unsigned long long)pkts);
+ ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes");
+ ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts");
test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 7185bee16fe4..ec5369f247cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -3,23 +3,26 @@
void test_xdp_perf(void)
{
- const char *file = "./xdp_dummy.o";
- __u32 duration, retval, size;
+ const char *file = "./xdp_dummy.bpf.o";
struct bpf_object *obj;
char in[128], out[128];
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = in,
+ .data_size_in = sizeof(in),
+ .data_out = out,
+ .data_size_out = sizeof(out),
+ .repeat = 1000000,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
- out, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_PASS || size != 128,
- "xdp-perf",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval");
+ ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
new file mode 100644
index 000000000000..efa350d04ec5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_pull_data.skel.h"
+
+#define PULL_MAX (1 << 31)
+#define PULL_PLUS_ONE (1 << 30)
+
+#define XDP_PACKET_HEADROOM 256
+
+/* Find headroom and tailroom occupied by struct xdp_frame and struct
+ * skb_shared_info so that we can calculate the maximum pull lengths for
+ * test cases. They might not be the real size of the structures due to
+ * cache alignment.
+ */
+static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(frame_sz, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return -ENOMEM;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = frame_sz;
+ topts.data_size_out = frame_sz;
+ /* Pass a data_end larger than the linear space available to make sure
+ * bpf_prog_test_run_xdp() will fill the linear data area so that
+ * xdp_find_sizes can infer the size of struct skb_shared_info
+ */
+ ctx.data_end = frame_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ free(buf);
+
+ return err;
+}
+
+/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
+ * so caller expecting XDP_PASS should always pass pull_len no less than 1024
+ */
+static void run_test(struct test_xdp_pull_data *skel, int retval,
+ int frame_sz, int buff_len, int meta_len, int data_len,
+ int pull_len)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(buff_len, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return;
+
+ buf[meta_len + 1023] = 0xaa;
+ buf[meta_len + 1024] = 0xbb;
+ buf[meta_len + 1025] = 0xcc;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buff_len;
+ topts.data_size_out = buff_len;
+ ctx.data = meta_len;
+ ctx.data_end = meta_len + data_len;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ skel->bss->data_len = data_len;
+ if (pull_len & PULL_MAX) {
+ int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
+ int tailroom = frame_sz - XDP_PACKET_HEADROOM -
+ data_len - skel->bss->sinfo_sz;
+
+ pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
+ pull_len += headroom + tailroom + data_len;
+ }
+ skel->bss->pull_len = pull_len;
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
+
+ if (retval == XDP_DROP)
+ goto out;
+
+ ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
+ ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
+ /* Make sure data around xdp->data_end was not messed up by
+ * bpf_xdp_pull_data()
+ */
+ ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
+ ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
+ ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
+out:
+ free(buf);
+}
+
+static void test_xdp_pull_data_basic(void)
+{
+ u32 pg_sz, max_meta_len, max_data_len;
+ struct test_xdp_pull_data *skel;
+
+ skel = test_xdp_pull_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
+ return;
+
+ pg_sz = sysconf(_SC_PAGE_SIZE);
+
+ if (find_xdp_sizes(skel, pg_sz))
+ goto out;
+
+ max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
+ max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
+
+ /* linear xdp pkt, pull 0 byte */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
+
+ /* multi-buf pkt, pull results in linear xdp pkt */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
+
+ /* multi-buf pkt, pull 1 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
+
+ /* multi-buf pkt, pull 0 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
+
+ /* multi-buf pkt, empty linear data area, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+
+ /* multi-buf pkt, no tailroom, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+
+ /* Test cases with invalid pull length */
+
+ /* linear xdp pkt, pull more than total data len */
+ run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
+
+ /* multi-buf pkt with no space left in linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, empty linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no tailroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+out:
+ test_xdp_pull_data__destroy(skel);
+}
+
+void test_xdp_pull_data(void)
+{
+ if (test__start_subtest("xdp_pull_data"))
+ test_xdp_pull_data_basic();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..8b50a992d233
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <ctype.h>
+
+#define CMD_OUT_BUF_SIZE 1023
+
+#define SYS_OUT(cmd, ...) ({ \
+ char buf[1024]; \
+ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
+ FILE *f = popen(buf, "r"); \
+ if (!ASSERT_OK_PTR(f, buf)) \
+ goto out; \
+ f; \
+})
+
+/* out must be at least `size * 4 + 1` bytes long */
+static void escape_str(char *out, const char *in, size_t size)
+{
+ static const char *hex = "0123456789ABCDEF";
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
+ *out++ = in[i];
+ } else {
+ *out++ = '\\';
+ *out++ = 'x';
+ *out++ = hex[(in[i] >> 4) & 0xf];
+ *out++ = hex[in[i] & 0xf];
+ }
+ }
+ *out++ = '\0';
+}
+
+static bool expect_str(char *buf, size_t size, const char *str, const char *name)
+{
+ static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
+ static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
+ static int duration = 0;
+ bool ok;
+
+ ok = size == strlen(str) && !memcmp(buf, str, size);
+
+ if (!ok) {
+ escape_str(escbuf_expected, str, strlen(str));
+ escape_str(escbuf_actual, buf, size);
+ }
+ CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
+ name, escbuf_actual, escbuf_expected);
+
+ return ok;
+}
+
+static void test_synproxy(bool xdp)
+{
+ int server_fd = -1, client_fd = -1, accept_fd = -1;
+ char *prog_id = NULL, *prog_id_end;
+ struct nstoken *ns = NULL;
+ FILE *ctrl_file = NULL;
+ char buf[CMD_OUT_BUF_SIZE];
+ size_t size;
+
+ SYS(out, "ip netns add synproxy");
+
+ SYS(out, "ip link add tmp0 type veth peer name tmp1");
+ SYS(out, "ip link set tmp1 netns synproxy");
+ SYS(out, "ip link set tmp0 up");
+ SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0");
+
+ /* When checksum offload is enabled, the XDP program sees wrong
+ * checksums and drops packets.
+ */
+ SYS(out, "ethtool -K tmp0 tx off");
+ if (xdp)
+ /* Workaround required for veth. */
+ SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ SYS(out, "ip link set lo up");
+ SYS(out, "ip link set tmp1 up");
+ SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS(out, "iptables-legacy -t raw -I PREROUTING \
+ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+ SYS(out, "iptables-legacy -t filter -A INPUT \
+ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+ SYS(out, "iptables-legacy -t filter -A INPUT \
+ -i tmp1 -m state --state INVALID -j DROP");
+
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
+ --single --mss4 1460 --mss6 1440 \
+ --wscale 7 --ttl 64%s", xdp ? "" : " --tc");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
+ "initial SYNACKs"))
+ goto out;
+
+ if (!xdp) {
+ ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ prog_id = memmem(buf, size, " id ", 4);
+ if (!ASSERT_OK_PTR(prog_id, "find prog id"))
+ goto out;
+ prog_id += 4;
+ if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
+ goto out;
+ prog_id_end = prog_id;
+ while (prog_id_end < buf + size && *prog_id_end >= '0' &&
+ *prog_id_end <= '9')
+ prog_id_end++;
+ if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
+ goto out;
+ *prog_id_end = '\0';
+ }
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ close_netns(ns);
+ ns = NULL;
+
+ client_fd = connect_to_fd(server_fd, 10000);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out;
+
+ accept_fd = accept(server_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto out;
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ if (xdp)
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
+ else
+ ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
+ prog_id);
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
+ "SYNACKs after connection"))
+ goto out;
+
+out:
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ if (server_fd >= 0)
+ close(server_fd);
+ if (ns)
+ close_netns(ns);
+
+ SYS_NOFAIL("ip link del tmp0");
+ SYS_NOFAIL("ip netns del synproxy");
+}
+
+void test_xdp_synproxy(void)
+{
+ if (test__start_subtest("xdp"))
+ test_synproxy(true);
+ if (test__start_subtest("tc"))
+ test_synproxy(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
new file mode 100644
index 000000000000..18dd25344de7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Network topology:
+ * ----------- -----------
+ * | NS1 | | NS2 |
+ * | veth0 -|--------|- veth0 |
+ * ----------- -----------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include <uapi/linux/if_link.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xdp_vlan.skel.h"
+
+
+#define VETH_NAME "veth0"
+#define NS_MAX_SIZE 32
+#define NS1_NAME "ns-xdp-vlan-1-"
+#define NS2_NAME "ns-xdp-vlan-2-"
+#define NS1_IP_ADDR "100.64.10.1"
+#define NS2_IP_ADDR "100.64.10.2"
+#define VLAN_ID 4011
+
+static int setup_network(char *ns1, char *ns2)
+{
+ if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name"))
+ goto fail;
+ if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", ns1);
+ SYS(fail, "ip netns add %s", ns2);
+ SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s",
+ ns1, VETH_NAME, VETH_NAME, ns2);
+
+ /* NOTICE: XDP require VLAN header inside packet payload
+ * - Thus, disable VLAN offloading driver features
+ */
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME);
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME);
+
+ /* NS1 configuration */
+ SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME);
+
+ /* NS2 configuration */
+ SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d",
+ ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID);
+ SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID);
+ SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID);
+
+ /* At this point ping should fail because VLAN tags are only used by NS2 */
+ return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR);
+
+fail:
+ return -1;
+}
+
+static void cleanup_network(const char *ns1, const char *ns2)
+{
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+}
+
+static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ char ns1[NS_MAX_SIZE] = NS1_NAME;
+ char ns2[NS_MAX_SIZE] = NS2_NAME;
+ struct nstoken *nstoken = NULL;
+ int interface;
+ int ret;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2), "setup network"))
+ goto cleanup;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+ goto cleanup;
+
+ interface = if_nametoindex(VETH_NAME);
+ if (!ASSERT_NEQ(interface, 0, "get interface index"))
+ goto cleanup;
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp_vlan_change"))
+ goto cleanup;
+
+ tc_hook.ifindex = interface;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto detach_xdp;
+
+ /* Now we'll use BPF programs to pop/push the VLAN tags */
+ tc_opts.prog_fd = bpf_program__fd(tc);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto detach_xdp;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Now the namespaces can reach each-other, test with pings */
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR);
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR);
+
+
+detach_tc:
+ bpf_tc_detach(&tc_hook, &tc_opts);
+detach_xdp:
+ bpf_xdp_detach(interface, flags, NULL);
+cleanup:
+ close_netns(nstoken);
+ cleanup_network(ns1, ns2);
+}
+
+/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+ * egress use TC to add back VLAN tag 4011
+ */
+void test_xdp_vlan_change(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
+
+/* Second test: XDP prog fully remove vlan header
+ *
+ * Catch kernel bug for generic-XDP, that doesn't allow us to
+ * remove a VLAN header, because skb->protocol still contain VLAN
+ * ETH_P_8021Q indication, and this cause overwriting of our changes.
+ */
+void test_xdp_vlan_remove(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
new file mode 100644
index 000000000000..4599154c8e9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "xdpwall.skel.h"
+
+void test_xdpwall(void)
+{
+ struct xdpwall *skel;
+
+ skel = xdpwall__open_and_load();
+ ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?");
+
+ xdpwall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
new file mode 100644
index 000000000000..d37f5394e199
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace | NS2 namespace
+ * | |
+ * +---------------+ | +---------------+ |
+ * | ipsec0 |---------| ipsec0 | |
+ * | 192.168.1.100 | | | 192.168.1.200 | |
+ * | if_id: bpf | | +---------------+ |
+ * +---------------+ | |
+ * | | | +---------------+
+ * | | | | ipsec0 |
+ * \------------------------------------------| 192.168.1.200 |
+ * | | +---------------+
+ * | |
+ * | | (overlay network)
+ * ------------------------------------------------------
+ * | | (underlay network)
+ * +--------------+ | +--------------+ |
+ * | veth01 |----------| veth10 | |
+ * | 172.16.1.100 | | | 172.16.1.200 | |
+ * ---------------+ | +--------------+ |
+ * | |
+ * +--------------+ | | +--------------+
+ * | veth02 |-----------------------------------| veth20 |
+ * | 172.16.2.100 | | | | 172.16.2.200 |
+ * +--------------+ | | +--------------+
+ *
+ *
+ * Test Packet flow
+ * -----------
+ * The tests perform 'ping 192.168.1.200' from the NS0 namespace:
+ * 1) request is routed to NS0 ipsec0
+ * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based
+ * on the requested value. This makes the ipsec0 device in external mode
+ * select the destination tunnel
+ * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was
+ * used) and response is sent
+ * 4) response is received on NS0 ipsec0, tc ingress program is triggered and
+ * records the response if_id
+ * 5) requested if_id is compared with received if_id
+ */
+
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "xfrm_info.skel.h"
+
+#define NS0 "xfrm_test_ns0"
+#define NS1 "xfrm_test_ns1"
+#define NS2 "xfrm_test_ns2"
+
+#define IF_ID_0_TO_1 1
+#define IF_ID_0_TO_2 2
+#define IF_ID_1 3
+#define IF_ID_2 4
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define ESP_DUMMY_PARAMS \
+ "proto esp aead 'rfc4106(gcm(aes))' " \
+ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+ LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
+ .prog_fd = igr_fd);
+ LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1,
+ .prog_fd = egr_fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (igr_fd >= 0) {
+ hook->attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ if (egr_fd >= 0) {
+ hook->attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+ SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2);
+}
+
+static int config_underlay(void)
+{
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip netns add " NS2);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS2 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS2 " link set dev veth20 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
+ const char *ipv4_remote, int if_id)
+{
+ /* State: local -> remote */
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
+
+ /* State: local <- remote */
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
+
+ /* Policy: local -> remote */
+ SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_local, ipv4_remote, if_id);
+
+ /* Policy: local <- remote */
+ SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_remote, ipv4_local, if_id);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b,
+ const char *ipv4_a, const char *ipv4_b,
+ int if_id_a, int if_id_b)
+{
+ return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) ||
+ setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b);
+}
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned short len)
+{
+ struct rtattr *rta =
+ (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(len);
+ nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+ return rta;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+ const char *s)
+{
+ struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+ memcpy(RTA_DATA(rta), s, strlen(s));
+ return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+ return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+ attr->rta_len = end - (uint8_t *)attr;
+}
+
+static int setup_xfrmi_external_dev(const char *ns)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ unsigned char data[128];
+ } req;
+ struct rtattr *link_info, *info_data;
+ struct nstoken *nstoken;
+ int ret = -1, sock = -1;
+ struct nlmsghdr *nh;
+
+ memset(&req, 0, sizeof(req));
+ nh = &req.nh;
+ nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ nh->nlmsg_type = RTM_NEWLINK;
+ nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
+
+ rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
+ link_info = rtattr_begin(nh, IFLA_LINKINFO);
+ rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
+ info_data = rtattr_begin(nh, IFLA_INFO_DATA);
+ rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
+ rtattr_end(nh, info_data);
+ rtattr_end(nh, link_info);
+
+ nstoken = open_netns(ns);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
+
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (!ASSERT_GE(sock, 0, "netlink socket"))
+ goto done;
+ ret = send(sock, nh, nh->nlmsg_len, 0);
+ if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
+ goto done;
+
+ ret = 0;
+done:
+ if (sock != -1)
+ close(sock);
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int config_overlay(void)
+{
+ if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
+ IF_ID_0_TO_1, IF_ID_1))
+ goto fail;
+ if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
+ IF_ID_0_TO_2, IF_ID_2))
+ goto fail;
+
+ /* Older iproute2 doesn't support this option */
+ if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
+ goto fail;
+
+ SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+ SYS(fail, "ip -net " NS0 " link set dev ipsec0 up");
+
+ SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+ SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS1 " link set dev ipsec0 up");
+
+ SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+ SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS2 " link set dev ipsec0 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
+{
+ skel->bss->req_if_id = if_id;
+
+ SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+
+ if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
+ goto fail;
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void _test_xfrm_info(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
+ struct nstoken *nstoken = NULL;
+ struct xfrm_info *skel;
+ int ifindex;
+
+ /* load and attach bpf progs to ipsec dev tc hook point */
+ skel = xfrm_info__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
+ goto done;
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+ ifindex = if_nametoindex("ipsec0");
+ if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
+ get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
+ if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
+ set_xfrm_info_prog_fd))
+ goto done;
+
+ /* perform test */
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
+ goto done;
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
+ goto done;
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ xfrm_info__destroy(skel);
+}
+
+void test_xfrm_info(void)
+{
+ cleanup();
+
+ if (!ASSERT_OK(config_underlay(), "config_underlay"))
+ goto done;
+ if (!ASSERT_OK(config_overlay(), "config_overlay"))
+ goto done;
+
+ if (test__start_subtest("xfrm_info"))
+ _test_xfrm_info();
+
+done:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c
new file mode 100644
index 000000000000..dd4c35c0e428
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xsk.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <stdarg.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xsk.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define VETH_RX "veth0"
+#define VETH_TX "veth1"
+#define MTU 1500
+
+int setup_veth(bool busy_poll)
+{
+ SYS(fail,
+ "ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4",
+ VETH_RX, VETH_TX);
+ SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX);
+ SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX);
+
+ if (busy_poll) {
+ SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX);
+ SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX);
+ SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX);
+ SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX);
+ }
+
+ SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU);
+ SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU);
+ SYS(fail, "ip link set %s up", VETH_RX);
+ SYS(fail, "ip link set %s up", VETH_TX);
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+void delete_veth(void)
+{
+ SYS_NOFAIL("ip link del %s", VETH_RX);
+ SYS_NOFAIL("ip link del %s", VETH_TX);
+}
+
+int configure_ifobj(struct ifobject *tx, struct ifobject *rx)
+{
+ rx->ifindex = if_nametoindex(VETH_RX);
+ if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex"))
+ return -1;
+
+ tx->ifindex = if_nametoindex(VETH_TX);
+ if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex"))
+ return -1;
+
+ tx->shared_umem = false;
+ rx->shared_umem = false;
+
+
+ return 0;
+}
+
+static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode)
+{
+ struct ifobject *ifobj_tx, *ifobj_rx;
+ struct test_spec test;
+ int ret;
+
+ ifobj_tx = ifobject_create();
+ if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx"))
+ return;
+
+ ifobj_rx = ifobject_create();
+ if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx"))
+ goto delete_tx;
+
+ if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj"))
+ goto delete_rx;
+
+ ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+ if (!ret) {
+ ifobj_tx->hw_ring_size_supp = true;
+ ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+ ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+ }
+
+ if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX"))
+ goto delete_rx;
+ if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX"))
+ goto delete_rx;
+
+ test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+
+ test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation"))
+ goto delete_rx;
+ test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation"))
+ goto delete_rx;
+
+
+ test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run);
+ ret = test.test_func(&test);
+ if (ret != TEST_SKIP)
+ ASSERT_OK(ret, "Run test");
+ pkt_stream_restore_default(&test);
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ pkt_stream_delete(test.tx_pkt_stream_default);
+ pkt_stream_delete(test.rx_pkt_stream_default);
+ xsk_xdp_progs__destroy(ifobj_tx->xdp_progs);
+ xsk_xdp_progs__destroy(ifobj_rx->xdp_progs);
+
+delete_rx:
+ ifobject_delete(ifobj_rx);
+delete_tx:
+ ifobject_delete(ifobj_tx);
+}
+
+void test_ns_xsk_skb(void)
+{
+ int i;
+
+ if (!ASSERT_OK(setup_veth(false), "setup veth"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (test__start_subtest(tests[i].name))
+ test_xsk(&tests[i], TEST_MODE_SKB);
+ }
+
+ delete_veth();
+}
+
+void test_ns_xsk_drv(void)
+{
+ int i;
+
+ if (!ASSERT_OK(setup_veth(false), "setup veth"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (test__start_subtest(tests[i].name))
+ test_xsk(&tests[i], TEST_MODE_DRV);
+ }
+
+ delete_veth();
+}
+
diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c
new file mode 100644
index 000000000000..1126871c2ebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_htab_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int tgid = 0;
+
+static int acc_map_in_map(void *outer_map)
+{
+ int i, key, value = 0xdeadbeef;
+ void *inner_map;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != tgid)
+ return 0;
+
+ /* Find nonexistent inner map */
+ key = 1;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (inner_map)
+ return 0;
+
+ /* Find the old inner map */
+ key = 0;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (!inner_map)
+ return 0;
+
+ /* Wait for the old inner map to be replaced */
+ for (i = 0; i < 2048; i++)
+ bpf_map_update_elem(inner_map, &key, &value, 0);
+
+ return 0;
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
new file mode 100644
index 000000000000..d1841aac94a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include <stdatomic.h>
+#include "bpf_arena_common.h"
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 10); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+bool skip_all_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_all_tests = true;
+#endif
+
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_lacq_srel_tests = true;
+#endif
+
+__u32 pid = 0;
+
+__u64 __arena_global add64_value = 1;
+__u64 __arena_global add64_result = 0;
+__u32 __arena_global add32_value = 1;
+__u32 __arena_global add32_result = 0;
+__u64 __arena_global add_stack_value_copy = 0;
+__u64 __arena_global add_stack_result = 0;
+__u64 __arena_global add_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 add_stack_value = 1;
+
+ add64_result = __sync_fetch_and_add(&add64_value, 2);
+ add32_result = __sync_fetch_and_add(&add32_value, 2);
+ add_stack_result = __sync_fetch_and_add(&add_stack_value, 2);
+ add_stack_value_copy = add_stack_value;
+ __sync_fetch_and_add(&add_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__s64 __arena_global sub64_value = 1;
+__s64 __arena_global sub64_result = 0;
+__s32 __arena_global sub32_value = 1;
+__s32 __arena_global sub32_result = 0;
+__s64 __arena_global sub_stack_value_copy = 0;
+__s64 __arena_global sub_stack_result = 0;
+__s64 __arena_global sub_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 sub_stack_value = 1;
+
+ sub64_result = __sync_fetch_and_sub(&sub64_value, 2);
+ sub32_result = __sync_fetch_and_sub(&sub32_value, 2);
+ sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2);
+ sub_stack_value_copy = sub_stack_value;
+ __sync_fetch_and_sub(&sub_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+_Atomic __u64 __arena_global and64_value = (0x110ull << 32);
+_Atomic __u32 __arena_global and32_value = 0x110;
+#else
+__u64 __arena_global and64_value = (0x110ull << 32);
+__u32 __arena_global and32_value = 0x110;
+#endif
+
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+ __c11_atomic_fetch_and(&and64_value, 0x011ull << 32, memory_order_relaxed);
+ __c11_atomic_fetch_and(&and32_value, 0x011, memory_order_relaxed);
+#else
+ __sync_fetch_and_and(&and64_value, 0x011ull << 32);
+ __sync_fetch_and_and(&and32_value, 0x011);
+#endif
+#endif
+
+ return 0;
+}
+
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+_Atomic __u32 __arena_global or32_value = 0x110;
+_Atomic __u64 __arena_global or64_value = (0x110ull << 32);
+#else
+__u32 __arena_global or32_value = 0x110;
+__u64 __arena_global or64_value = (0x110ull << 32);
+#endif
+
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+ __c11_atomic_fetch_or(&or64_value, 0x011ull << 32, memory_order_relaxed);
+ __c11_atomic_fetch_or(&or32_value, 0x011, memory_order_relaxed);
+#else
+ __sync_fetch_and_or(&or64_value, 0x011ull << 32);
+ __sync_fetch_and_or(&or32_value, 0x011);
+#endif
+#endif
+
+ return 0;
+}
+
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+_Atomic __u64 __arena_global xor64_value = (0x110ull << 32);
+_Atomic __u32 __arena_global xor32_value = 0x110;
+#else
+__u64 __arena_global xor64_value = (0x110ull << 32);
+__u32 __arena_global xor32_value = 0x110;
+#endif
+
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING
+ __c11_atomic_fetch_xor(&xor64_value, 0x011ull << 32, memory_order_relaxed);
+ __c11_atomic_fetch_xor(&xor32_value, 0x011, memory_order_relaxed);
+#else
+ __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
+ __sync_fetch_and_xor(&xor32_value, 0x011);
+#endif
+#endif
+
+ return 0;
+}
+
+__u32 __arena_global cmpxchg32_value = 1;
+__u32 __arena_global cmpxchg32_result_fail = 0;
+__u32 __arena_global cmpxchg32_result_succeed = 0;
+__u64 __arena_global cmpxchg64_value = 1;
+__u64 __arena_global cmpxchg64_result_fail = 0;
+__u64 __arena_global cmpxchg64_result_succeed = 0;
+
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
+ cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
+
+ cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3);
+ cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global xchg64_value = 1;
+__u64 __arena_global xchg64_result = 0;
+__u32 __arena_global xchg32_value = 1;
+__u32 __arena_global xchg32_result = 0;
+
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 val64 = 2;
+ __u32 val32 = 2;
+
+ xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64);
+ xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global uaf_sink;
+volatile __u64 __arena_global uaf_recovery_fails;
+
+SEC("syscall")
+int uaf(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#if defined(ENABLE_ATOMICS_TESTS) && !defined(__TARGET_ARCH_arm64) && \
+ !defined(__TARGET_ARCH_x86)
+ __u32 __arena *page32;
+ __u64 __arena *page64;
+ void __arena *page;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ bpf_arena_free_pages(&arena, page, 1);
+ uaf_recovery_fails = 24;
+
+ page32 = (__u32 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page32, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page32, 1);
+ uaf_recovery_fails -= 1;
+
+ page64 = (__u64 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page64, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page64, 1);
+ uaf_recovery_fails -= 1;
+#endif
+
+ return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global load_acquire8_value = 0x12;
+__u16 __arena_global load_acquire16_value = 0x1234;
+__u32 __arena_global load_acquire32_value = 0x12345678;
+__u64 __arena_global load_acquire64_value = 0x1234567890abcdef;
+
+__u8 __arena_global load_acquire8_result = 0;
+__u16 __arena_global load_acquire16_result = 0;
+__u32 __arena_global load_acquire32_result = 0;
+__u64 __arena_global load_acquire64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global load_acquire8_value;
+__u64 __arena_global load_acquire16_value;
+__u64 __arena_global load_acquire32_value;
+__u64 __arena_global load_acquire64_value;
+
+__u64 __arena_global load_acquire8_result;
+__u64 __arena_global load_acquire16_result;
+__u64 __arena_global load_acquire32_result;
+__u64 __arena_global load_acquire64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int load_acquire(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
+ { asm volatile ( \
+ "r1 = %[" #SRC "] ll;" \
+ "r1 = addr_space_cast(r1, 0x0, 0x1);" \
+ ".8byte %[load_acquire_insn];" \
+ "r3 = %[" #DST "] ll;" \
+ "r3 = addr_space_cast(r3, 0x0, 0x1);" \
+ "*(" #SIZE " *)(r3 + 0) = r2;" \
+ : \
+ : __imm_addr(SRC), \
+ __imm_insn(load_acquire_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \
+ BPF_REG_2, BPF_REG_1, 0)), \
+ __imm_addr(DST) \
+ : __clobber_all); } \
+
+ LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result)
+ LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value,
+ load_acquire16_result)
+ LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value,
+ load_acquire32_result)
+ LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value,
+ load_acquire64_result)
+#undef LOAD_ACQUIRE_ARENA
+
+#endif
+ return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global store_release8_result = 0;
+__u16 __arena_global store_release16_result = 0;
+__u32 __arena_global store_release32_result = 0;
+__u64 __arena_global store_release64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global store_release8_result;
+__u64 __arena_global store_release16_result;
+__u64 __arena_global store_release32_result;
+__u64 __arena_global store_release64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int store_release(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
+ { asm volatile ( \
+ "r1 = " VAL ";" \
+ "r2 = %[" #DST "] ll;" \
+ "r2 = addr_space_cast(r2, 0x0, 0x1);" \
+ ".8byte %[store_release_insn];" \
+ : \
+ : __imm_addr(DST), \
+ __imm_insn(store_release_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \
+ BPF_REG_2, BPF_REG_1, 0)) \
+ : __clobber_all); } \
+
+ STORE_RELEASE_ARENA(B, store_release8_result, "0x12")
+ STORE_RELEASE_ARENA(H, store_release16_result, "0x1234")
+ STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678")
+ STORE_RELEASE_ARENA(DW, store_release64_result,
+ "0x1234567890abcdef ll")
+#undef STORE_RELEASE_ARENA
+
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
new file mode 100644
index 000000000000..81eaa94afeb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_htab.h"
+
+void __arena *htab_for_user;
+bool skip = false;
+
+int zero = 0;
+char __arena arr1[100000];
+char arr2[1000];
+
+SEC("syscall")
+int arena_htab_llvm(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
+ struct htab __arena *htab;
+ char __arena *arr = arr1;
+ __u64 i;
+
+ htab = bpf_alloc(sizeof(*htab));
+ cast_kern(htab);
+ htab_init(htab);
+
+ cast_kern(arr);
+
+ /* first run. No old elems in the table */
+ for (i = zero; i < 100000 && can_loop; i++) {
+ htab_update_elem(htab, i, i);
+ arr[i] = i;
+ }
+
+ /* should replace some elems with new ones */
+ for (i = zero; i < 1000 && can_loop; i++) {
+ htab_update_elem(htab, i, i);
+ /* Access mem to make the verifier use bounded loop logic */
+ arr2[i] = i;
+ }
+ cast_user(htab);
+ htab_for_user = htab;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab_asm.c b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
new file mode 100644
index 000000000000..6cd70ea12f0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_ARENA_FORCE_ASM
+#define arena_htab_llvm arena_htab_asm
+#include "arena_htab.c"
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
new file mode 100644
index 000000000000..3a2ddcacbea6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+struct arena_list_head __arena *list_head;
+int list_sum;
+int cnt;
+bool skip = false;
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+long __arena arena_sum;
+int __arena test_val = 1;
+struct arena_list_head __arena global_head;
+#else
+long arena_sum SEC(".addr_space.1");
+int test_val SEC(".addr_space.1");
+#endif
+
+int zero;
+
+SEC("syscall")
+int arena_list_add(void *ctx)
+{
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+ __u64 i;
+
+ list_head = &global_head;
+
+ for (i = zero; i < cnt && can_loop; i++) {
+ struct elem __arena *n = bpf_alloc(sizeof(*n));
+
+ test_val++;
+ n->value = i;
+ arena_sum += i;
+ list_add_head(&n->node, list_head);
+ }
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+int arena_list_del(void *ctx)
+{
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+ struct elem __arena *n;
+ int sum = 0;
+
+ arena_sum = 0;
+ list_for_each_entry(n, list_head, node) {
+ sum += n->value;
+ arena_sum += n->value;
+ list_del(&n->node);
+ bpf_free(n);
+ }
+ list_sum = sum;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
new file mode 100644
index 000000000000..086b57a426cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_spin_lock.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+int cs_count;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+arena_spinlock_t __arena lock;
+int test_skip = 1;
+#else
+int test_skip = 2;
+#endif
+
+int counter;
+int limit;
+
+SEC("tc")
+int prog(void *ctx)
+{
+ int ret = -2;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ unsigned long flags;
+
+ if ((ret = arena_spin_lock_irqsave(&lock, flags))) {
+ if (ret == -EOPNOTSUPP)
+ test_skip = 3;
+ return ret;
+ }
+ if (counter != limit)
+ counter++;
+ bpf_repeat(cs_count);
+ ret = 0;
+ arena_spin_unlock_irqrestore(&lock, flags);
+#endif
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_strsearch.c b/tools/testing/selftests/bpf/progs/arena_strsearch.c
new file mode 100644
index 000000000000..ef6b76658f7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_strsearch.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_strsearch.h"
+
+struct glob_test {
+ char const __arena *pat, *str;
+ bool expected;
+};
+
+static bool test(char const __arena *pat, char const __arena *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static const char __arena glob_tests[] =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+bool skip = false;
+
+SEC("syscall")
+int arena_strsearch(void *ctx)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const __arena *p = glob_tests;
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const __arena *pat = p;
+
+ cond_break;
+ p += bpf_arena_strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += bpf_arena_strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ /* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */
+
+ return n ? -1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
new file mode 100644
index 000000000000..36734683acbd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+__attribute__((noinline))
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[256] = {};
+ return buf[69];
+}
+
+__attribute__((noinline))
+static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[300] = {};
+ return buf[255] + timer_cb(NULL, NULL, NULL);
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is")
+int pseudo_call_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ timer_cb(NULL, NULL, NULL);
+ return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0];
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is")
+int async_call_root_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c
index c245345e41ca..f89c7f0cc53b 100644
--- a/tools/testing/selftests/bpf/progs/atomics.c
+++ b/tools/testing/selftests/bpf/progs/atomics.c
@@ -10,6 +10,8 @@ bool skip_tests __attribute((__section__(".data"))) = false;
bool skip_tests = true;
#endif
+__u32 pid = 0;
+
__u64 add64_value = 1;
__u64 add64_result = 0;
__u32 add32_value = 1;
@@ -18,9 +20,11 @@ __u64 add_stack_value_copy = 0;
__u64 add_stack_result = 0;
__u64 add_noreturn_value = 1;
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(add, int a)
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
__u64 add_stack_value = 1;
@@ -42,9 +46,11 @@ __s64 sub_stack_value_copy = 0;
__s64 sub_stack_result = 0;
__s64 sub_noreturn_value = 1;
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(sub, int a)
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
__u64 sub_stack_value = 1;
@@ -64,9 +70,11 @@ __u32 and32_value = 0x110;
__u32 and32_result = 0;
__u64 and_noreturn_value = (0x110ull << 32);
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(and, int a)
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32);
@@ -83,9 +91,11 @@ __u32 or32_value = 0x110;
__u32 or32_result = 0;
__u64 or_noreturn_value = (0x110ull << 32);
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(or, int a)
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32);
or32_result = __sync_fetch_and_or(&or32_value, 0x011);
@@ -101,9 +111,11 @@ __u32 xor32_value = 0x110;
__u32 xor32_result = 0;
__u64 xor_noreturn_value = (0x110ull << 32);
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(xor, int a)
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011);
@@ -120,9 +132,11 @@ __u32 cmpxchg32_value = 1;
__u32 cmpxchg32_result_fail = 0;
__u32 cmpxchg32_result_succeed = 0;
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(cmpxchg, int a)
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
@@ -139,9 +153,11 @@ __u64 xchg64_result = 0;
__u32 xchg32_value = 1;
__u32 xchg32_result = 0;
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(xchg, int a)
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
#ifdef ENABLE_ATOMICS_TESTS
__u64 val64 = 2;
__u32 val32 = 2;
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
new file mode 100644
index 000000000000..b3f77b4561c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1) { return 0; }
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2) { return 0; }
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops2 testmod_2 = {
+ .test_1 = (void *)test_1
+};
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops2.c b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
new file mode 100644
index 000000000000..64a95f6be86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* This is an unused struct_ops program, it lacks corresponding
+ * struct_ops map, which provides attachment information.
+ * W/o additional configuration attempt to load such
+ * BPF object file would fail.
+ */
+SEC("struct_ops/foo")
+void foo(void) {}
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
new file mode 100644
index 000000000000..c8ec0d0368e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+long create_errs = 0;
+long create_cnts = 0;
+long kmalloc_cnts = 0;
+__u32 bench_pid = 0;
+
+struct storage {
+ __u8 data[64];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} sk_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} task_storage_map SEC(".maps");
+
+SEC("raw_tp/kmalloc")
+int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
+ size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
+ int node)
+{
+ __sync_fetch_and_add(&kmalloc_cnts, 1);
+
+ return 0;
+}
+
+SEC("tp_btf/sched_process_fork")
+int BPF_PROG(sched_process_fork, struct task_struct *parent, struct task_struct *child)
+{
+ struct storage *stg;
+
+ if (parent->tgid != bench_pid)
+ return 0;
+
+ stg = bpf_task_storage_get(&task_storage_map, child, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct sock *sk = sock->sk;
+ struct storage *stg;
+ __u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != bench_pid || !sk)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_storage_map, sk, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int verdict_dir = 0;
+int dropped = 0;
+int pkt_size = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ int one = 1;
+ int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+ if (ret == SK_DROP)
+ dropped++;
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ int one = 1;
+
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 115a3b0ad984..b7ddf8ec4ee8 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -6,14 +6,14 @@
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
#include <linux/if.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */
#define SERV4_PORT 4040
#define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */
@@ -57,6 +57,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -99,23 +120,23 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+ user_ip4 |= load_byte(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 1, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 2, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 3, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+ user_ip4 |= load_word(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_word(ctx->user_ip4, 1, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
@@ -127,10 +148,20 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
return 1;
}
+SEC("cgroup/bind4")
+int bind_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 4c0d348034b9..501c3fc11d35 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -6,14 +6,14 @@
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
#include <linux/if.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */
#define SERV6_IP_1 0x12345678
#define SERV6_IP_2 0x00000000
@@ -63,6 +63,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -110,25 +131,25 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+ user_ip6 |= load_byte(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 1, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 2, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 3, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+ user_ip6 |= load_word(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_word(ctx->user_ip6[i], 1, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
@@ -141,6 +162,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
@@ -150,4 +175,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/bind6")
+int bind_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind_prog.h b/tools/testing/selftests/bpf/progs/bind_prog.h
new file mode 100644
index 000000000000..e830caa940c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_prog.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BIND_PROG_H__
+#define __BIND_PROG_H__
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[b] << 8 * b)
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * w)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[(b) + (sizeof(src) - (s))] << 8 * ((s) - (b) - 1))
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * (((s) / 2) - (w) - 1))
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
new file mode 100644
index 000000000000..7efcbdbe772d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+__u8 rand_vals[2500000];
+const __u32 nr_rand_bytes = 2500000;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ /* max entries and value_size will be set programmatically.
+ * They are configurable from the userspace bench program.
+ */
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ /* max entries, value_size, and # of hash functions will be set
+ * programmatically. They are configurable from the userspace
+ * bench program.
+ */
+ __uint(map_extra, 3);
+} bloom_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ /* max entries, key_size, and value_size, will be set
+ * programmatically. They are configurable from the userspace
+ * bench program.
+ */
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+ struct bpf_map *map;
+ bool update;
+};
+
+/* Tracks the number of hits, drops, and false hits */
+struct {
+ __u32 stats[3];
+} __attribute__((__aligned__(256))) percpu_stats[256];
+
+const __u32 hit_key = 0;
+const __u32 drop_key = 1;
+const __u32 false_hit_key = 2;
+
+__u8 value_size;
+
+const volatile bool hashmap_use_bloom;
+const volatile bool count_false_hits;
+
+int error = 0;
+
+static __always_inline void log_result(__u32 key)
+{
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ percpu_stats[cpu & 255].stats[key]++;
+}
+
+static __u64
+bloom_callback(struct bpf_map *map, __u32 *key, void *val,
+ struct callback_ctx *data)
+{
+ int err;
+
+ if (data->update)
+ err = bpf_map_push_elem(data->map, val, 0);
+ else
+ err = bpf_map_peek_elem(data->map, val);
+
+ if (err) {
+ error |= 1;
+ return 1; /* stop the iteration */
+ }
+
+ log_result(hit_key);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_lookup(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&bloom_map;
+ data.update = false;
+
+ bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_update(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&bloom_map;
+ data.update = true;
+
+ bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_hashmap_lookup(void *ctx)
+{
+ __u64 *result;
+ int i, err;
+
+ __u32 index = bpf_get_prandom_u32();
+ __u32 bitmask = (1ULL << 21) - 1;
+
+ for (i = 0; i < 1024; i++, index += value_size) {
+ index = index & bitmask;
+
+ if (hashmap_use_bloom) {
+ err = bpf_map_peek_elem(&bloom_map,
+ rand_vals + index);
+ if (err) {
+ if (err != -ENOENT) {
+ error |= 2;
+ return 0;
+ }
+ log_result(hit_key);
+ continue;
+ }
+ }
+
+ result = bpf_map_lookup_elem(&hashmap,
+ rand_vals + index);
+ if (result) {
+ log_result(hit_key);
+ } else {
+ if (hashmap_use_bloom && count_false_hits)
+ log_result(false_hit_key);
+ log_result(drop_key);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
new file mode 100644
index 000000000000..f245fcfe0c61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1000);
+} map_random_data SEC(".maps");
+
+struct map_bloom_type {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ __type(value, __u32);
+ __uint(max_entries, 10000);
+ __uint(map_extra, 5);
+} map_bloom SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct map_bloom_type);
+} outer_map SEC(".maps");
+
+struct callback_ctx {
+ struct bpf_map *map;
+};
+
+int error = 0;
+
+static __u64
+check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
+ struct callback_ctx *data)
+{
+ int err;
+
+ err = bpf_map_peek_elem(data->map, val);
+ if (err) {
+ error |= 1;
+ return 1; /* stop the iteration */
+ }
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int inner_map(void *ctx)
+{
+ struct bpf_map *inner_map;
+ struct callback_ctx data;
+ int key = 0;
+
+ inner_map = bpf_map_lookup_elem(&outer_map, &key);
+ if (!inner_map) {
+ error |= 2;
+ return 0;
+ }
+
+ data.map = inner_map;
+ bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int check_bloom(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&map_bloom;
+ bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
new file mode 100644
index 000000000000..f90531cf3ee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ARENA_SPIN_LOCK_H
+#define BPF_ARENA_SPIN_LOCK_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_atomic.h"
+
+#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label)
+#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1)
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+#define EBUSY 16
+#define EOPNOTSUPP 95
+#define ETIMEDOUT 110
+
+#ifndef __arena
+#define __arena __attribute__((address_space(1)))
+#endif
+
+extern unsigned long CONFIG_NR_CPUS __kconfig;
+
+/*
+ * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but
+ * PowerPC overrides the definition to define lock->val as u32 instead of
+ * atomic_t, leading to compilation errors. Import a local definition below so
+ * that we don't depend on the vmlinux.h version.
+ */
+
+struct __qspinlock {
+ union {
+ atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
+ u16 locked_pending;
+ u16 tail;
+ };
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
+ };
+};
+
+#define arena_spinlock_t struct __qspinlock
+/* FIXME: Using typedef causes CO-RE relocation error */
+/* typedef struct qspinlock arena_spinlock_t; */
+
+struct arena_mcs_spinlock {
+ struct arena_mcs_spinlock __arena *next;
+ int locked;
+ int count;
+};
+
+struct arena_qnode {
+ struct arena_mcs_spinlock mcs;
+};
+
+#define _Q_MAX_NODES 4
+#define _Q_PENDING_LOOPS 1
+
+/*
+ * Bitfields in the atomic value:
+ *
+ * 0- 7: locked byte
+ * 8: pending
+ * 9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ */
+#define _Q_MAX_CPUS 1024
+
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 8
+#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS 8
+#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS 2
+#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
+
+struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+ u32 tail;
+
+ tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+ tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+ return tail;
+}
+
+static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail)
+{
+ u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+ u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+ return &qnodes[cpu][idx].mcs;
+}
+
+static inline
+struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx)
+{
+ return &((struct arena_qnode __arena *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+ /*
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
+ */
+ /* These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+static __always_inline
+u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = old | _Q_PENDING_VAL;
+ /*
+ * These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * arena_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ if (unlikely(val))
+ return 0;
+
+ return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
+}
+
+__noinline
+int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val)
+{
+ struct arena_mcs_spinlock __arena *prev, *next, *node0, *node;
+ int ret = -ETIMEDOUT;
+ u32 old, tail;
+ int idx;
+
+ /*
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ if (val == _Q_PENDING_VAL) {
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed_label(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--,
+ release_err);
+ }
+
+ /*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
+ * trylock || pending
+ *
+ * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
+ */
+ val = arena_fetch_set_pending_acquire(lock);
+
+ /*
+ * If we observe contention, there is a concurrent locker.
+ *
+ * Undo and queue; our setting of PENDING might have made the
+ * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+ * on @next to become !NULL.
+ */
+ if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+ /* Undo PENDING if we set it. */
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
+
+ goto queue;
+ }
+
+ /*
+ * We're pending, wait for the owner to go away.
+ *
+ * 0,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
+ */
+ if (val & _Q_LOCKED_MASK)
+ (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+
+ /*
+ * take ownership and clear the pending bit.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ clear_pending_set_locked(lock);
+ return 0;
+
+ /*
+ * End of pending bit optimistic spinning and beginning of MCS
+ * queuing.
+ */
+queue:
+ node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs;
+ idx = node0->count++;
+ tail = encode_tail(bpf_get_smp_processor_id(), idx);
+
+ /*
+ * 4 nodes are allocated based on the assumption that there will not be
+ * nested NMIs taking spinlocks. That may not be true in some
+ * architectures even though the chance of needing more than 4 nodes
+ * will still be extremely unlikely. When that happens, we simply return
+ * an error. Original qspinlock has a trylock fallback in this case.
+ */
+ if (unlikely(idx >= _Q_MAX_NODES)) {
+ ret = -EBUSY;
+ goto release_node_err;
+ }
+
+ node = grab_mcs_node(node0, idx);
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
+ node->locked = 0;
+ node->next = NULL;
+
+ /*
+ * We touched a (possibly) cold cacheline in the per-cpu queue node;
+ * attempt the trylock once more in the hope someone let go while we
+ * weren't watching.
+ */
+ if (arena_spin_trylock(lock))
+ goto release;
+
+ /*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
+ * We have already touched the queueing cacheline; don't bother with
+ * pending stuff.
+ *
+ * p,*,* -> n,*,*
+ */
+ old = xchg_tail(lock, tail);
+ next = NULL;
+
+ /*
+ * if there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_MASK) {
+ prev = decode_tail(old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We cannot prefetch here
+ * due to lack of equivalent instruction in BPF ISA.
+ */
+ next = READ_ONCE(node->next);
+ }
+
+ /*
+ * we're at the head of the waitqueue, wait for the owner & pending to
+ * go away.
+ *
+ * *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
+ */
+ val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+ release_node_err);
+
+ /*
+ * claim the lock:
+ *
+ * n,0,0 -> 0,0,1 : lock, uncontended
+ * *,*,0 -> *,*,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
+ */
+
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set, because
+ * of lock stealing; therefore we must also allow:
+ *
+ * n,0,1 -> 0,0,1
+ *
+ * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+ * above wait condition, therefore any concurrent setting of
+ * PENDING will make the uncontended transition fail.
+ */
+ if ((val & _Q_TAIL_MASK) == tail) {
+ if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+ }
+
+ /*
+ * Either somebody is queued behind us or _Q_PENDING_VAL got set
+ * which will then detect the remaining tail and queue behind us
+ * ensuring we'll see a @next.
+ */
+ set_locked(lock);
+
+ /*
+ * contended path; wait for next if not observed yet, release.
+ */
+ if (!next)
+ next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err);
+
+ arch_mcs_spin_unlock_contended(&next->locked);
+
+release:;
+ /*
+ * release the node
+ *
+ * Doing a normal dec vs this_cpu_dec is fine. An upper context always
+ * decrements count it incremented before returning, thus we're fine.
+ * For contexts interrupting us, they either observe our dec or not.
+ * Just ensure the compiler doesn't reorder this statement, as a
+ * this_cpu_dec implicitly implied that.
+ */
+ barrier();
+ node0->count--;
+ return 0;
+release_node_err:
+ barrier();
+ node0->count--;
+ goto release_err;
+release_err:
+ return ret;
+}
+
+/**
+ * arena_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * On error, returned value will be negative.
+ * On success, zero is returned.
+ *
+ * The return value _must_ be tested against zero for success,
+ * instead of checking it against negative, for passing the
+ * BPF verifier.
+ *
+ * The user should do:
+ * if (arena_spin_lock(...) != 0) // failure
+ * or
+ * if (arena_spin_lock(...) == 0) // success
+ * or
+ * if (arena_spin_lock(...)) // failure
+ * or
+ * if (!arena_spin_lock(...)) // success
+ * instead of:
+ * if (arena_spin_lock(...) < 0) // failure
+ *
+ * The return value can still be inspected later.
+ */
+static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock)
+{
+ int val = 0;
+
+ if (CONFIG_NR_CPUS > 1024)
+ return -EOPNOTSUPP;
+
+ bpf_preempt_disable();
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ return 0;
+
+ val = arena_spin_lock_slowpath(lock, val);
+ /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */
+ if (val)
+ bpf_preempt_enable();
+ return val;
+}
+
+/**
+ * arena_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock)
+{
+ /*
+ * unlock() needs release semantics:
+ */
+ smp_store_release(&lock->locked, 0);
+ bpf_preempt_enable();
+}
+
+#define arena_spin_lock_irqsave(lock, flags) \
+ ({ \
+ int __ret; \
+ bpf_local_irq_save(&(flags)); \
+ __ret = arena_spin_lock((lock)); \
+ if (__ret) \
+ bpf_local_irq_restore(&(flags)); \
+ (__ret); \
+ })
+
+#define arena_spin_unlock_irqrestore(lock, flags) \
+ ({ \
+ arena_spin_unlock((lock)); \
+ bpf_local_irq_restore(&(flags)); \
+ })
+
+#endif
+
+#endif /* BPF_ARENA_SPIN_LOCK_H */
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
new file mode 100644
index 000000000000..9af19dfe4e80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Highlights:
+ * 1. The major difference between this bpf program and tcp_cubic.c
+ * is that this bpf program relies on `cong_control` rather than
+ * `cong_avoid` in the struct tcp_congestion_ops.
+ * 2. Logic such as tcp_cwnd_reduction, tcp_cong_avoid, and
+ * tcp_update_pacing_rate is bypassed when `cong_control` is
+ * defined, so moving these logic to `cong_control`.
+ * 3. WARNING: This bpf program is NOT the same as tcp_cubic.c.
+ * The main purpose is to show use cases of the arguments in
+ * `cong_control`. For simplicity's sake, it reuses tcp cubic's
+ * kernel functions.
+ */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define USEC_PER_SEC 1000000UL
+#define TCP_PACING_SS_RATIO (200)
+#define TCP_PACING_CA_RATIO (120)
+#define TCP_REORDERING (12)
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym;
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
+
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+ return dividend / divisor;
+}
+
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ __u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (__u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
+
+ /* current rate is (cwnd * mss) / srtt
+ * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
+ * In Congestion Avoidance phase, set it to 120 % the current rate.
+ *
+ * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+ * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ * end of slow start and should slow down.
+ */
+ if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ rate *= TCP_PACING_SS_RATIO;
+ else
+ rate *= TCP_PACING_CA_RATIO;
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ if (tp->srtt_us)
+ rate = div64_u64(rate, (__u64)tp->srtt_us);
+
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int newly_lost, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ __u32 pkts_in_flight = tp->packets_out - (tp->sacked_out + tp->lost_out) + tp->retrans_out;
+ int delta = tp->snd_ssthresh - pkts_in_flight;
+
+ if (newly_acked_sacked <= 0 || !tp->prior_cwnd)
+ return;
+
+ __u32 prr_delivered = tp->prr_delivered + newly_acked_sacked;
+
+ if (delta < 0) {
+ __u64 dividend =
+ (__u64)tp->snd_ssthresh * prr_delivered + tp->prior_cwnd - 1;
+ sndcnt = (__u32)div64_u64(dividend, (__u64)tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = max(prr_delivered - tp->prr_out, newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
+ }
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
+ tp->snd_cwnd = pkts_in_flight + sndcnt;
+}
+
+/* Decide whether to run the increase function of congestion control. */
+static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+{
+ if (tcp_sk(sk)->reordering > TCP_REORDERING)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return flag & FLAG_DATA_ACKED;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
+{
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (((1<<TCP_CA_CWR) | (1<<TCP_CA_Recovery)) &
+ (1 << inet_csk(sk)->icsk_ca_state)) {
+ /* Reduce cwnd if state mandates */
+ tcp_cwnd_reduction(sk, rs->acked_sacked, rs->losses, flag);
+
+ if (!before(tp->snd_una, tp->high_seq)) {
+ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+ }
+ } else if (tcp_may_raise_cwnd(sk, flag)) {
+ /* Advance cwnd if state allows */
+ cubictcp_cong_avoid(sk, ack, rs->acked_sacked);
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+
+ tcp_update_pacing_rate(sk);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
+{
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
+{
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops cc_cubic = {
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_control = (void *)bpf_cubic_cong_control,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
+ .name = "bpf_cc_cubic",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h
new file mode 100644
index 000000000000..a7c343dc82e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_COMPILER_H__
+#define __BPF_COMPILER_H__
+
+#define DO_PRAGMA_(X) _Pragma(#X)
+
+#if __clang__
+#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable))
+#else
+/* In GCC -funroll-loops, which is enabled with -O2, should have the
+ same impact than the loop-unroll-enable pragma above. */
+#define __pragma_loop_unroll
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N))
+#else
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N)
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full))
+#else
+#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534)
+#endif
+
+#if __clang__
+#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable))
+#else
+#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 6939bfd8690f..46fb2b37d3a7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
* as the tcp_cubic.c. The purpose is mainly for testing
* the kernel BPF logic.
*
@@ -14,15 +14,16 @@
* "ca->ack_cnt / delta" operation.
*/
-#include <linux/bpf.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
-#include "bpf_tcp_helpers.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
+
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
*/
@@ -70,7 +71,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
/ (bic_scale * 10);
/* BIC TCP Parameters */
-struct bictcp {
+struct bpf_bictcp {
__u32 cnt; /* increase cwnd by 1 after ACKs */
__u32 last_max_cwnd; /* last maximum snd_cwnd */
__u32 last_cwnd; /* the last snd_cwnd */
@@ -91,7 +92,7 @@ struct bictcp {
__u32 curr_rtt; /* the minimum rtt of current round */
};
-static inline void bictcp_reset(struct bictcp *ca)
+static void bictcp_reset(struct bpf_bictcp *ca)
{
ca->cnt = 0;
ca->last_max_cwnd = 0;
@@ -112,7 +113,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC 1000000UL
#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
-static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
return dividend / divisor;
}
@@ -120,7 +121,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64
#define BITS_PER_U64 (sizeof(__u64) * 8)
-static __always_inline int fls64(__u64 x)
+static int fls64(__u64 x)
{
int num = BITS_PER_U64 - 1;
@@ -153,15 +154,15 @@ static __always_inline int fls64(__u64 x)
return num + 1;
}
-static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
+static __u32 bictcp_clock_us(const struct sock *sk)
{
return tcp_sk(sk)->tcp_mstamp;
}
-static __always_inline void bictcp_hystart_reset(struct sock *sk)
+static void bictcp_hystart_reset(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
@@ -169,15 +170,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-/* "struct_ops/" prefix is not a requirement
- * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
- * as long as it is used in one of the func ptr
- * under SEC(".struct_ops").
- */
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
@@ -188,14 +184,11 @@ void BPF_PROG(bictcp_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* No prefix in SEC will also work.
- * The remaining tcp-cubic functions have an easier way.
- */
-SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32;
__s32 delta;
@@ -236,7 +229,7 @@ static const __u8 v[] = {
* Newton-Raphson iteration.
* Avg err ~= 0.195%
*/
-static __always_inline __u32 cubic_root(__u64 a)
+static __u32 cubic_root(__u64 a)
{
__u32 x, b, shift;
@@ -269,8 +262,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/*
* Compute congestion window to use.
*/
-static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
- __u32 acked)
+static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
{
__u32 delta, bic_target, max_cnt;
__u64 offs, t;
@@ -315,7 +307,7 @@ static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
* (so time^3 is done by using 64 bit)
* and without the support of division of 64bit numbers
* (so all divisions are done by using 32 bit)
- * also NOTE the unit of those veriables
+ * also NOTE the unit of those variables
* time = (t - K) / 2^bictcp_HZ
* c = bic_scale >> 10
* rtt = (srtt >> 3) / HZ
@@ -383,11 +375,11 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
if (!tcp_is_cwnd_limited(sk))
return;
@@ -403,10 +395,11 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */
@@ -420,7 +413,8 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -439,7 +433,7 @@ void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static __always_inline __u32 hystart_ack_delay(struct sock *sk)
+static __u32 hystart_ack_delay(struct sock *sk)
{
unsigned long rate;
@@ -450,10 +444,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
}
-static __always_inline void hystart_update(struct sock *sk, __u32 delay)
+static void hystart_update(struct sock *sk, __u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
@@ -496,14 +490,17 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
- const struct ack_sample *sample)
+int bpf_cubic_acked_called = 0;
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 delay;
- /* Some calls are for duplicates without timetamps */
+ bpf_cubic_acked_called = 1;
+ /* Some calls are for duplicates without timestamps */
if (sample->rtt_us < 0)
return;
@@ -525,21 +522,22 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
hystart_update(sk, delay);
}
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
- return max(tp->snd_cwnd, tp->prior_cwnd);
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
}
SEC(".struct_ops")
struct tcp_congestion_ops cubic = {
- .init = (void *)bictcp_init,
- .ssthresh = (void *)bictcp_recalc_ssthresh,
- .cong_avoid = (void *)bictcp_cong_avoid,
- .set_state = (void *)bictcp_state,
- .undo_cwnd = (void *)tcp_reno_undo_cwnd,
- .cwnd_event = (void *)bictcp_cwnd_event,
- .pkts_acked = (void *)bictcp_acked,
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_avoid = (void *)bpf_cubic_cong_avoid,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
.name = "bpf_cubic",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 4dc1a967776a..1cc83140849f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -1,23 +1,32 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
* as the tcp_dctcp.c. The purpose is mainly for testing
* the kernel BPF logic.
*/
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
char _license[] SEC("license") = "GPL";
+volatile const char fallback_cc[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
+int ebusy_cnt = 0;
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
@@ -28,7 +37,7 @@ struct {
#define DCTCP_MAX_ALPHA 1024U
-struct dctcp {
+struct bpf_dctcp {
__u32 old_delivered;
__u32 old_delivered_ce;
__u32 prior_rcv_nxt;
@@ -41,8 +50,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
-static __always_inline void dctcp_reset(const struct tcp_sock *tp,
- struct dctcp *ca)
+static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
{
ca->next_seq = tp->snd_nxt;
@@ -50,13 +58,40 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
ca->old_delivered_ce = tp->delivered_ce;
}
-SEC("struct_ops/dctcp_init")
-void BPF_PROG(dctcp_init, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback_cc[0]) {
+ /* Switch to fallback */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback_cc, sizeof(fallback_cc)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Switch back to myself and the recurred bpf_dctcp_init()
+ * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
+ * except the last "cdg" one.
+ */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Switch back to fallback */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback_cc, sizeof(fallback_cc)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
@@ -70,21 +105,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
dctcp_reset(tp, ca);
}
-SEC("struct_ops/dctcp_ssthresh")
-__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
-SEC("struct_ops/dctcp_update_alpha")
-void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
@@ -110,27 +145,27 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
}
}
-static __always_inline void dctcp_react_to_loss(struct sock *sk)
+static void dctcp_react_to_loss(struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
}
-SEC("struct_ops/dctcp_state")
-void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
dctcp_react_to_loss(sk);
- /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only
* one loss-adjustment per RTT.
*/
}
-static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
+static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -145,9 +180,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
-static __always_inline
-void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
- __u32 *prior_rcv_nxt, __u32 *ce_state)
+static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+ __u32 *prior_rcv_nxt, __u32 *ce_state)
{
__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
@@ -167,10 +201,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
dctcp_ece_ack_cwr(sk, new_ce_state);
}
-SEC("struct_ops/dctcp_cwnd_event")
-void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
switch (ev) {
case CA_EVENT_ECN_IS_CE:
@@ -186,49 +220,39 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
}
}
-SEC("struct_ops/dctcp_cwnd_undo")
-__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk)
{
- const struct dctcp *ca = inet_csk_ca(sk);
+ const struct bpf_dctcp *ca = inet_csk_ca(sk);
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tcp_is_cwnd_limited(sk))
- return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
- /* In "safe" area, increase. */
- if (tcp_in_slow_start(tp)) {
- acked = tcp_slow_start(tp, acked);
- if (!acked)
- return;
- }
- /* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
struct tcp_congestion_ops dctcp_nouse = {
- .init = (void *)dctcp_init,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp_nouse",
};
SEC(".struct_ops")
struct tcp_congestion_ops dctcp = {
- .init = (void *)dctcp_init,
- .in_ack_event = (void *)dctcp_update_alpha,
- .cwnd_event = (void *)dctcp_cwnd_event,
- .ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)tcp_reno_cong_avoid,
- .undo_cwnd = (void *)dctcp_cwnd_undo,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .in_ack_event = (void *)bpf_dctcp_update_alpha,
+ .cwnd_event = (void *)bpf_dctcp_cwnd_event,
+ .ssthresh = (void *)bpf_dctcp_ssthresh,
+ .cong_avoid = (void *)bpf_dctcp_cong_avoid,
+ .undo_cwnd = (void *)bpf_dctcp_cwnd_undo,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..c91763f248b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+SEC("struct_ops")
+void BPF_PROG(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 95a5a0778ed7..b04e092fac94 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -19,9 +19,10 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
#define PROG(F) PROG_(F, _##F)
-#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
+#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
+
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
@@ -144,6 +145,19 @@ int _dissect(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
+ if (keys->n_proto == bpf_htons(ETH_P_IP)) {
+ /* IP traffic from FLOW_CONTINUE_SADDR falls-back to
+ * standard dissector
+ */
+ struct iphdr *iph, _iph;
+
+ iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+ if (iph && iph->ihl == 5 &&
+ iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
+ return BPF_FLOW_DISSECTOR_CONTINUE;
+ }
+ }
+
return parse_eth_proto(skb, keys->n_proto);
}
@@ -323,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb)
keys->ip_proto = ip6h->nexthdr;
keys->flow_label = ip6_flowlabel(ip6h);
- if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
return export_flow_keys(keys, BPF_OK);
return parse_ipv6_proto(skb, ip6h->nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c
new file mode 100644
index 000000000000..216c71b94c64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+__u64 in_user;
+__u64 ret_user;
+
+int pid;
+
+/*
+ * Skip all the tests if compiler doesn't support indirect jumps.
+ *
+ * If tests are skipped, then all functions below are compiled as
+ * dummy, such that the skeleton looks the same, and the userspace
+ * program can avoid any checks rather than if data->skip is set.
+ */
+#ifdef __BPF_FEATURE_GOTOX
+__u64 skip SEC(".data") = 0;
+#else
+__u64 skip = 1;
+#endif
+
+struct simple_ctx {
+ __u64 x;
+};
+
+#ifdef __BPF_FEATURE_GOTOX
+__u64 some_var;
+
+/*
+ * This function adds code which will be replaced by a different
+ * number of instructions by the verifier. This adds additional
+ * stress on testing the insn_array maps corresponding to indirect jumps.
+ */
+static __always_inline void adjust_insns(__u64 x)
+{
+ some_var ^= x + bpf_jiffies64();
+}
+
+SEC("syscall")
+int one_switch(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int one_switch_non_zero_sec_off(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int simple_test_other_sec(struct pt_regs *ctx)
+{
+ __u64 x = in_user;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int two_switches(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ switch (ctx->x + !!ret_user) {
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 103;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 104;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 107;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 11);
+ ret_user = 205;
+ break;
+ case 5:
+ adjust_insns(ctx->x + 11);
+ ret_user = 115;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 1019;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int big_jump_table(struct simple_ctx *ctx __attribute__((unused)))
+{
+ const void *const jt[256] = {
+ [0 ... 255] = &&default_label,
+ [0] = &&l0,
+ [11] = &&l11,
+ [27] = &&l27,
+ [31] = &&l31,
+ };
+
+ goto *jt[ctx->x & 0xff];
+
+l0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ return 0;
+
+l11:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ return 0;
+
+l27:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ return 0;
+
+l31:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ return 0;
+
+default_label:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ return 0;
+}
+
+SEC("syscall")
+int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused)))
+{
+ __label__ l1, l2, l3, l4;
+ void *jt1[2] = { &&l1, &&l2 };
+ void *jt2[2] = { &&l3, &&l4 };
+ unsigned int a = ctx->x % 2;
+ unsigned int b = (ctx->x / 2) % 2;
+ volatile int ret = 0;
+
+ if (!(a < 2 && b < 2))
+ return 19;
+
+ if (ctx->x % 2)
+ goto *jt1[a];
+ else
+ goto *jt2[b];
+
+ l1: ret += 1;
+ l2: ret += 3;
+ l3: ret += 5;
+ l4: ret += 7;
+
+ ret_user = ret;
+ return ret;
+}
+
+SEC("syscall")
+int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused)))
+{
+ __label__ l1, l2, l3;
+ void *jt[3] = { &&l1, &&l2, &&l3 };
+ unsigned int a = (ctx->x >> 2) & 1;
+ unsigned int b = (ctx->x >> 3) & 1;
+ volatile int ret = 0;
+
+ if (ctx->x % 2)
+ goto *jt[a];
+
+ if (ctx->x % 3)
+ goto *jt[a + b];
+
+ l1: ret += 3;
+ l2: ret += 5;
+ l3: ret += 7;
+
+ ret_user = ret;
+ return ret;
+}
+
+/* Just to introduce some non-zero offsets in .text */
+static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx)
+{
+ if (ctx)
+ return 1;
+ else
+ return 13;
+}
+
+SEC("syscall") int f1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return f0(ctx);
+}
+
+static __noinline int __static_global(__u64 x)
+{
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int use_static_global1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return __static_global(ctx->x);
+}
+
+SEC("syscall")
+int use_static_global2(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ adjust_insns(ctx->x + 1);
+ return __static_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_static_global_other_sec(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ return __static_global(in_user);
+}
+
+__noinline int __nonstatic_global(__u64 x)
+{
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int use_nonstatic_global1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return __nonstatic_global(ctx->x);
+}
+
+SEC("syscall")
+int use_nonstatic_global2(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ adjust_insns(ctx->x + 1);
+ return __nonstatic_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_nonstatic_global_other_sec(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ return __nonstatic_global(in_user);
+}
+
+#else /* __BPF_FEATURE_GOTOX */
+
+#define SKIP_TEST(TEST_NAME) \
+ SEC("syscall") int TEST_NAME(void *ctx) \
+ { \
+ return 0; \
+ }
+
+SKIP_TEST(one_switch);
+SKIP_TEST(one_switch_non_zero_sec_off);
+SKIP_TEST(simple_test_other_sec);
+SKIP_TEST(two_switches);
+SKIP_TEST(big_jump_table);
+SKIP_TEST(one_jump_two_maps);
+SKIP_TEST(one_map_two_jumps);
+SKIP_TEST(use_static_global1);
+SKIP_TEST(use_static_global2);
+SKIP_TEST(use_static_global_other_sec);
+SKIP_TEST(use_nonstatic_global1);
+SKIP_TEST(use_nonstatic_global2);
+SKIP_TEST(use_nonstatic_global_other_sec);
+
+#endif /* __BPF_FEATURE_GOTOX */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
new file mode 100644
index 000000000000..56957557e3e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_ENTRIES 1000
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map_bench SEC(".maps");
+
+u64 __attribute__((__aligned__(256))) percpu_time[256];
+u64 nr_loops;
+
+static int loop_update_callback(__u32 index, u32 *key)
+{
+ u64 init_val = 1;
+
+ bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = cpu + MAX_ENTRIES;
+ u64 start_time = bpf_ktime_get_ns();
+
+ bpf_loop(nr_loops, loop_update_callback, &key, 0);
+ percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..1eb74ddca414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+} hash_map_bench SEC(".maps");
+
+/* The number of slots to store times */
+#define NR_SLOTS 32
+#define NR_CPUS 256
+#define CPU_MASK (NR_CPUS-1)
+
+/* Configured by userspace */
+u64 nr_entries;
+u64 nr_loops;
+u32 __attribute__((__aligned__(8))) key[NR_CPUS];
+
+/* Filled by us */
+u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
+u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
+
+static inline void patch_key(u32 i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ key[0] = i + 1;
+#else
+ key[0] = __builtin_bswap32(i + 1);
+#endif
+ /* the rest of key is random and is configured by userspace */
+}
+
+static int lookup_callback(__u32 index, u32 *unused)
+{
+ patch_key(index);
+ return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
+}
+
+static int loop_lookup_callback(__u32 index, u32 *unused)
+{
+ return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 times_index;
+ u64 start_time;
+
+ times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
+ start_time = bpf_ktime_get_ns();
+ bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
+ percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
+ percpu_times_index[cpu & CPU_MASK] += 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
deleted file mode 100644
index 3d83b185c4bc..000000000000
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#define bpf_iter__task_vma bpf_iter__task_vma___not_used
-#define bpf_iter__tcp bpf_iter__tcp___not_used
-#define tcp6_sock tcp6_sock___not_used
-#define bpf_iter__udp bpf_iter__udp___not_used
-#define udp6_sock udp6_sock___not_used
-#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
-#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
-#define bpf_iter__sockmap bpf_iter__sockmap___not_used
-#define btf_ptr btf_ptr___not_used
-#define BTF_F_COMPACT BTF_F_COMPACT___not_used
-#define BTF_F_NONAME BTF_F_NONAME___not_used
-#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
-#define BTF_F_ZERO BTF_F_ZERO___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
-#undef bpf_iter__ipv6_route
-#undef bpf_iter__netlink
-#undef bpf_iter__task
-#undef bpf_iter__task_file
-#undef bpf_iter__task_vma
-#undef bpf_iter__tcp
-#undef tcp6_sock
-#undef bpf_iter__udp
-#undef udp6_sock
-#undef bpf_iter__bpf_map_elem
-#undef bpf_iter__bpf_sk_storage_map
-#undef bpf_iter__sockmap
-#undef btf_ptr
-#undef BTF_F_COMPACT
-#undef BTF_F_NONAME
-#undef BTF_F_PTR_RAW
-#undef BTF_F_ZERO
-
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
- struct bpf_iter_meta *meta;
- struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
- struct bpf_iter_meta *meta;
- struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
- __u32 fd;
- struct file *file;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_vma {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
- struct vm_area_struct *vma;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__tcp {
- struct bpf_iter_meta *meta;
- struct sock_common *sk_common;
- uid_t uid;
-} __attribute__((preserve_access_index));
-
-struct tcp6_sock {
- struct tcp_sock tcp;
- struct ipv6_pinfo inet6;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__udp {
- struct bpf_iter_meta *meta;
- struct udp_sock *udp_sk;
- uid_t uid __attribute__((aligned(8)));
- int bucket __attribute__((aligned(8)));
-} __attribute__((preserve_access_index));
-
-struct udp6_sock {
- struct udp_sock udp;
- struct ipv6_pinfo inet6;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map_elem {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
- void *key;
- void *value;
-};
-
-struct bpf_iter__bpf_sk_storage_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
- struct sock *sk;
- void *value;
-};
-
-struct bpf_iter__sockmap {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
- void *key;
- struct sock *sk;
-};
-
-struct btf_ptr {
- void *ptr;
- __u32 type_id;
- __u32 flags;
-};
-
-enum {
- BTF_F_COMPACT = (1ULL << 0),
- BTF_F_NONAME = (1ULL << 1),
- BTF_F_PTR_RAW = (1ULL << 2),
- BTF_F_ZERO = (1ULL << 3),
-};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
index 6286023fd62b..19710cc0f250 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -1,17 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct key_t {
- int a;
- int b;
- int c;
-};
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 3);
@@ -19,13 +13,20 @@ struct {
__type(value, __u64);
} arraymap1 SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10);
+ __type(key, __u64);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
__u32 key_sum = 0;
__u64 val_sum = 0;
SEC("iter/bpf_map_elem")
int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
{
- __u32 *key = ctx->key;
+ __u32 *hmap_val, *key = ctx->key;
__u64 *val = ctx->value;
if (key == (void *)0 || val == (void *)0)
@@ -35,6 +36,18 @@ int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
key_sum += *key;
val_sum += *val;
+
+ /* workaround - It's necessary to do this convoluted (val, key)
+ * write into hashmap1, instead of simply doing
+ * bpf_map_update_elem(&hashmap1, val, key, BPF_ANY);
+ * because key has MEM_RDONLY flag and bpf_map_update elem expects
+ * types without this flag
+ */
+ bpf_map_update_elem(&hashmap1, val, val, BPF_ANY);
+ hmap_val = bpf_map_lookup_elem(&hashmap1, val);
+ if (hmap_val)
+ *hmap_val = *key;
+
*val = *key;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 6dfce3fd68bc..f47da665f7e0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
@@ -113,3 +112,12 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
return 0;
}
+
+SEC("iter.s/bpf_map_elem")
+int sleepable_dummy_dump(struct bpf_iter__bpf_map_elem *ctx)
+{
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(ctx->meta->seq, "map dump starts\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
new file mode 100644
index 000000000000..7b69e1887705
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat, Inc. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_link")
+int dump_bpf_link(struct bpf_iter__bpf_link *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_link *link = ctx->link;
+ int link_id;
+
+ if (!link)
+ return 0;
+
+ link_id = link->id;
+ bpf_seq_write(seq, &link_id, sizeof(link_id));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b83b5d2e17dc..c868ffb8080f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
index 85fa710fad90..9fdea8cd4c6f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -1,17 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct key_t {
- int a;
- int b;
- int c;
-};
-
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 3);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
index feaaa2b89c57..aa529f76c7fc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -20,7 +20,7 @@ struct {
} hashmap1 SEC(".maps");
/* will set before prog run */
-volatile const __u32 num_cpus = 0;
+volatile const __s32 num_cpus = 0;
/* will collect results during prog run */
__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index 6cecab2b32ba..e88dab196e0f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Google LLC. */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
index 6b70ccaba301..eb9642923e1c 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -16,19 +16,37 @@ struct {
__u32 val_sum = 0;
__u32 ipv6_sk_count = 0;
+__u32 to_add_val = 0;
SEC("iter/bpf_sk_storage_map")
-int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+int rw_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
{
struct sock *sk = ctx->sk;
__u32 *val = ctx->value;
- if (sk == (void *)0 || val == (void *)0)
+ if (sk == NULL || val == NULL)
return 0;
if (sk->sk_family == AF_INET6)
ipv6_sk_count++;
val_sum += *val;
+
+ *val += to_add_val;
+
+ return 0;
+}
+
+SEC("iter/bpf_sk_storage_map")
+int oob_write_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == NULL || val == NULL)
+ return 0;
+
+ *(val + 1) = 0xdeadbeef;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index d58d9f1642b5..73a5cf3ba3d3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
new file mode 100644
index 000000000000..3e725b1fce37
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned long last_sym_value = 0;
+
+static inline char to_lower(char c)
+{
+ if (c >= 'A' && c <= 'Z')
+ c += ('a' - 'A');
+ return c;
+}
+
+static inline char to_upper(char c)
+{
+ if (c >= 'a' && c <= 'z')
+ c -= ('a' - 'A');
+ return c;
+}
+
+/* Dump symbols with max size; the latter is calculated by caching symbol N value
+ * and when iterating on symbol N+1, we can print max size of symbol N via
+ * address of N+1 - address of N.
+ */
+SEC("iter/ksym")
+int dump_ksym(struct bpf_iter__ksym *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct kallsym_iter *iter = ctx->ksym;
+ __u32 seq_num = ctx->meta->seq_num;
+ unsigned long value;
+ char type;
+
+ if (!iter)
+ return 0;
+
+ if (seq_num == 0) {
+ BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n");
+ return 0;
+ }
+ if (last_sym_value)
+ BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value);
+ else
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ value = iter->show_value ? iter->value : 0;
+
+ last_sym_value = value;
+
+ type = iter->type;
+
+ if (iter->module_name[0]) {
+ type = iter->exported ? to_upper(type) : to_lower(type);
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
+ value, type, iter->name, iter->module_name);
+ } else {
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name);
+ }
+ if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "MOD ");
+ else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "FTRACE_MOD ");
+ else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "BPF ");
+ else
+ BPF_SEQ_PRINTF(seq, "KPROBE ");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
new file mode 100644
index 000000000000..2f20485e0de3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 value = 0;
+
+ if (ctx->value == (void *)0)
+ return 0;
+
+ bpf_probe_read_kernel(&value, sizeof(value), ctx->value);
+ value_sum += value;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 95989f4c99b5..00b2ceae81fb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..a8aa5a71d846
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+const char cubic_cc[] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..d92631ec6161
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <vmlinux.h>
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+ int i;
+
+ for (i = 0; i < AUTOBIND_LEN; i++) {
+ if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ int i, err;
+
+ if (!unix_sk || !unix_sk->addr)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ if (cmpname(unix_sk))
+ return 0;
+
+ for (i = 0; i < NR_CASES; i++) {
+ err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_setsockopt[i],
+ sizeof(sndbuf_setsockopt[i]));
+ if (err)
+ break;
+
+ err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_getsockopt[i],
+ sizeof(sndbuf_getsockopt[i]));
+ if (err)
+ break;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
index f3af0e30cead..317fe49760cc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Cloudflare */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
deleted file mode 100644
index b7f32c160f4e..000000000000
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-char _license[] SEC("license") = "GPL";
-
-SEC("iter/task")
-int dump_task(struct bpf_iter__task *ctx)
-{
- struct seq_file *seq = ctx->meta->seq;
- struct task_struct *task = ctx->task;
- static char info[] = " === END ===";
-
- if (task == (void *)0) {
- BPF_SEQ_PRINTF(seq, "%s\n", info);
- return 0;
- }
-
- if (ctx->meta->seq_num == 0)
- BPF_SEQ_PRINTF(seq, " tgid gid\n");
-
- BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
index a1ddc36f13ec..ef2f7c8d9373 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020, Oracle and/or its affiliates. */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index b2f7c7c5f952..959a8d899eaf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,21 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
int count = 0;
int tgid = 0;
+int last_tgid = 0;
+int unique_tgid_count = 0;
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
- __u32 fd = ctx->fd;
struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
if (task == (void *)0 || file == (void *)0)
return 0;
@@ -28,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
if (tgid == task->tgid && task->tgid != task->pid)
count++;
+ if (last_tgid != task->tgid) {
+ last_tgid = task->tgid;
+ unique_tgid_count++;
+ }
+
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
(long)file->f_op);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 50e59a2e142e..f5a309455490 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
@@ -35,3 +34,35 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+
+int num_user_stacks = 0;
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ uint64_t buf_sz = 0;
+ int64_t res;
+
+ if (task == (void *)0)
+ return 0;
+
+ res = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+ if (res <= 0)
+ return 0;
+
+ /* Only one task, the current one, should succeed */
+ ++num_user_stacks;
+
+ buf_sz += res;
+
+ /* If the verifier doesn't refine bpf_get_task_stack res, and instead
+ * assumes res is entirely unknown, this program will fail to load as
+ * the verifier will believe that max buf_sz value allows reading
+ * past the end of entries in bpf_seq_write call
+ */
+ bpf_seq_write(seq, &entries, buf_sz);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
index 11d1aa37cf11..d64ba7ddaed5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
@@ -21,6 +20,8 @@ char _license[] SEC("license") = "GPL";
#define D_PATH_BUF_SIZE 1024
char d_path_buf[D_PATH_BUF_SIZE] = {};
__u32 pid = 0;
+__u32 one_task = 0;
+__u32 one_task_error = 0;
SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
{
@@ -34,8 +35,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
return 0;
file = vma->vm_file;
- if (task->tgid != pid)
+ if (task->tgid != (pid_t)pid) {
+ if (one_task)
+ one_task_error = 1;
return 0;
+ }
perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
new file mode 100644
index 000000000000..966ee5a7b066
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+uint32_t tid = 0;
+int num_unknown_tid = 0;
+int num_known_tid = 0;
+void *user_ptr = 0;
+void *user_ptr_long = 0;
+uint32_t pid = 0;
+
+static char big_str1[5000];
+static char big_str2[5005];
+static char big_str3[4996];
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static char info[] = " === END ===";
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ if (task->pid != (pid_t)tid)
+ num_unknown_tid++;
+ else
+ num_known_tid++;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+ return 0;
+}
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
+int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static const char info[] = " === END ===";
+ struct pt_regs *regs;
+ char task_str1[10] = "aaaaaaaaaa";
+ char task_str2[10], task_str3[10];
+ char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
+ void *ptr;
+ uint32_t user_data = 0;
+ int ret;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ ++num_expected_failure_copy_from_user_task;
+ } else {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Try to read the contents of the task's instruction pointer from the
+ * remote task's address space.
+ */
+ regs = (struct pt_regs *)bpf_task_pt_regs(task);
+ if (regs == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ ptr = (void *)PT_REGS_IP(regs);
+
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task;
+
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+ if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get error with pad zeros flag */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1),
+ ptr, task, BPF_F_PAD_ZEROS);
+ if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_expected_failure_copy_from_user_task_str;
+
+ /* Same length as the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+ /* only need to do the task pid check once */
+ if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0);
+ if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string with pad zeros flag */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string past a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0);
+ if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* String that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ for (int i = 0; i < 4999; ++i) {
+ if (i % 2 == 0) {
+ if (big_str1[i] != 'b') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ } else {
+ if (big_str1[i] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ }
+ }
+
+ /* Longer length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0);
+ if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996
+ || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task_str;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid data\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 54380c5e1069..b1e509b231cd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
@@ -100,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
@@ -122,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
- seq_num, src, srcp, destp, destp);
+ seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index b4fbddfa4e10..dbc7166aee91 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
@@ -100,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 2a4647f20c46..6b17e7e86a48 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index ee49493dc125..56177508798f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
@@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
@@ -45,7 +45,7 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
}
/* fill seq_file buffer */
- for (i = 0; i < print_len; i++)
+ for (i = 0; i < (int)print_len; i++)
bpf_seq_write(seq, &seq_num, sizeof(seq_num));
return ret;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
index e3a7575e81d2..9d8b7310d2c2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
index 1c7304f56b1e..b150bd468824 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index d5e3df66ad9a..6a4c50497c5e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index f258583afbbd..23b2aa2604de 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
@@ -65,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 65f93bb03f0f..c48b05aa2a4b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include "bpf_iter.h"
+#include <vmlinux.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
@@ -73,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
-
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..fea275df9e22
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <vmlinux.h>
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (unix_sk->addr->name->sun_path[0]) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_validate_addr() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
new file mode 100644
index 000000000000..174298e122d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 unique_tgid_cnt = 0;
+uintptr_t address = 0;
+uintptr_t offset = 0;
+__u32 last_tgid = 0;
+__u32 pid = 0;
+__u32 page_shift = 0;
+
+SEC("iter/task_vma")
+int get_vma_offset(struct bpf_iter__task_vma *ctx)
+{
+ struct vm_area_struct *vma = ctx->vma;
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+
+ if (task == NULL || vma == NULL)
+ return 0;
+
+ if (last_tgid != task->tgid)
+ unique_tgid_cnt++;
+ last_tgid = task->tgid;
+
+ if (task->tgid != pid)
+ return 0;
+
+ if (vma->vm_start <= address && vma->vm_end > address) {
+ offset = address - vma->vm_start + (vma->vm_pgoff << page_shift);
+ BPF_SEQ_PRINTF(seq, "OK\n");
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
new file mode 100644
index 000000000000..1d194455b109
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int output;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 32);
+ __type(key, int);
+ __type(value, int);
+} map1 SEC(".maps");
+
+/* These should be set by the user program */
+u32 nested_callback_nr_loops;
+u32 stop_index = -1;
+u32 nr_loops;
+int pid;
+int callback_selector;
+
+/* Making these global variables so that the userspace program
+ * can verify the output through the skeleton
+ */
+int nr_loops_returned;
+int g_output;
+int err;
+
+static int callback(__u32 index, void *data)
+{
+ struct callback_ctx *ctx = data;
+
+ if (index >= stop_index)
+ return 1;
+
+ ctx->output += index;
+
+ return 0;
+}
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+static int nested_callback2(__u32 index, void *data)
+{
+ nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0);
+
+ return 0;
+}
+
+static int nested_callback1(__u32 index, void *data)
+{
+ bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int test_prog(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0);
+
+ if (nr_loops_returned < 0)
+ err = nr_loops_returned;
+ else
+ g_output = data.output;
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_null_ctx(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_invalid_flags(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_loop(nr_loops, callback, &data, 1);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_nested_calls(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = 0;
+ bpf_loop(nr_loops, nested_callback1, &data, 0);
+
+ g_output = data.output;
+
+ return 0;
+}
+
+static int callback_set_f0(int i, void *ctx)
+{
+ g_output = 0xF0;
+ return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+ g_output = 0x0F;
+ return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int (*callback)(int i, void *ctx);
+
+ g_output = 0;
+
+ if (callback_selector == 0x0F)
+ callback = callback_set_0f;
+ else
+ callback = callback_set_f0;
+
+ bpf_loop(1, callback, NULL, 0);
+
+ return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+ return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+ int *val = bpf_map_lookup_elem(&map1, &key);
+
+ return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+ bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+ int a = map1_lookup_elem(1);
+ int b = map1_lookup_elem(2);
+ int c = map1_lookup_elem(3);
+ int d = map1_lookup_elem(4);
+ int e = map1_lookup_elem(5);
+ int f = map1_lookup_elem(6);
+
+ bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+ map1_update_elem(1, a + 1);
+ map1_update_elem(2, b + 1);
+ map1_update_elem(3, c + 1);
+ map1_update_elem(4, d + 1);
+ map1_update_elem(5, e + 1);
+ map1_update_elem(6, f + 1);
+
+ return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int a = map1_lookup_elem(7);
+ int b = map1_lookup_elem(8);
+ int c = map1_lookup_elem(9);
+ int d = map1_lookup_elem(10);
+ int e = map1_lookup_elem(11);
+ int f = map1_lookup_elem(12);
+
+ bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+ map1_update_elem(7, a + 1);
+ map1_update_elem(8, b + 1);
+ map1_update_elem(9, c + 1);
+ map1_update_elem(10, d + 1);
+ map1_update_elem(11, e + 1);
+ map1_update_elem(12, f + 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
new file mode 100644
index 000000000000..d461746fd3c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+u32 nr_loops;
+long hits;
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+static int outer_loop(__u32 index, void *data)
+{
+ bpf_loop(nr_loops, empty_callback, NULL, 0);
+ __sync_add_and_fetch(&hits, nr_loops);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ bpf_loop(1000, outer_loop, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
new file mode 100644
index 000000000000..c9bfbe1bafc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_MISC_H__
+#define __BPF_MISC_H__
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+
+/* Expand a macro and then stringize the expansion */
+#define QUOTE(str) #str
+#define EXPAND_QUOTE(str) QUOTE(str)
+
+/* This set of attributes controls behavior of the
+ * test_loader.c:test_loader__run_subtests().
+ *
+ * The test_loader sequentially loads each program in a skeleton.
+ * Programs could be loaded in privileged and unprivileged modes.
+ * - __success, __failure, __msg, __regex imply privileged mode;
+ * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv
+ * imply unprivileged mode.
+ * If combination of privileged and unprivileged attributes is present
+ * both modes are used. If none are present privileged mode is implied.
+ *
+ * See test_loader.c:drop_capabilities() for exact set of capabilities
+ * that differ between privileged and unprivileged modes.
+ *
+ * For test filtering purposes the name of the program loaded in
+ * unprivileged mode is derived from the usual program name by adding
+ * `@unpriv' suffix.
+ *
+ * __msg Message expected to be found in the verifier log.
+ * Multiple __msg attributes could be specified.
+ * To match a regular expression use "{{" "}}" brackets,
+ * e.g. "foo{{[0-9]+}}" matches strings like "foo007".
+ * Extended POSIX regular expression syntax is allowed
+ * inside the brackets.
+ * __not_msg Message not expected to be found in verifier log.
+ * If __msg_not is situated between __msg tags
+ * framework matches __msg tags first, and then
+ * checks that __msg_not is not present in a portion of
+ * a log between bracketing __msg tags.
+ * Same regex syntax as for __msg is supported.
+ * __msg_unpriv Same as __msg but for unprivileged mode.
+ * __not_msg_unpriv Same as __not_msg but for unprivileged mode.
+ *
+ * __stderr Message expected to be found in bpf stderr stream. The
+ * same regex rules apply like __msg.
+ * __stderr_unpriv Same as __stderr but for unpriveleged mode.
+ * __stdout Same as __stderr but for stdout stream.
+ * __stdout_unpriv Same as __stdout but for unpriveleged mode.
+ *
+ * __xlated Expect a line in a disassembly log after verifier applies rewrites.
+ * Multiple __xlated attributes could be specified.
+ * Regular expressions could be specified same way as in __msg.
+ * __xlated_unpriv Same as __xlated but for unprivileged mode.
+ *
+ * __jited Match a line in a disassembly of the jited BPF program.
+ * Has to be used after __arch_* macro.
+ * For example:
+ *
+ * __arch_x86_64
+ * __jited(" endbr64")
+ * __jited(" nopl (%rax,%rax)")
+ * __jited(" xorq %rax, %rax")
+ * ...
+ * __naked void some_test(void)
+ * {
+ * asm volatile (... ::: __clobber_all);
+ * }
+ *
+ * Regular expressions could be included in patterns same way
+ * as in __msg.
+ *
+ * By default assume that each pattern has to be matched on the
+ * next consecutive line of disassembly, e.g.:
+ *
+ * __jited(" endbr64") # matched on line N
+ * __jited(" nopl (%rax,%rax)") # matched on line N+1
+ *
+ * If match occurs on a wrong line an error is reported.
+ * To override this behaviour use literal "...", e.g.:
+ *
+ * __jited(" endbr64") # matched on line N
+ * __jited("...") # not matched
+ * __jited(" nopl (%rax,%rax)") # matched on any line >= N
+ *
+ * __jited_unpriv Same as __jited but for unprivileged mode.
+ *
+ *
+ * __success Expect program load success in privileged mode.
+ * __success_unpriv Expect program load success in unprivileged mode.
+ *
+ * __failure Expect program load failure in privileged mode.
+ * __failure_unpriv Expect program load failure in unprivileged mode.
+ *
+ * __retval Execute the program using BPF_PROG_TEST_RUN command,
+ * expect return value to match passed parameter:
+ * - a decimal number
+ * - a hexadecimal number, when starts from 0x
+ * - a macro which expands to one of the above
+ * - literal _INT_MIN (expands to INT_MIN)
+ * In addition, two special macros are defined below:
+ * - POINTER_VALUE
+ * - TEST_DATA_LEN
+ * __retval_unpriv Same, but load program in unprivileged mode.
+ *
+ * __description Text to be used instead of a program name for display
+ * and filtering purposes.
+ *
+ * __log_level Log level to use for the program, numeric value expected.
+ *
+ * __flag Adds one flag use for the program, the following values are valid:
+ * - BPF_F_STRICT_ALIGNMENT;
+ * - BPF_F_TEST_RND_HI32;
+ * - BPF_F_TEST_STATE_FREQ;
+ * - BPF_F_SLEEPABLE;
+ * - BPF_F_XDP_HAS_FRAGS;
+ * - A numeric value.
+ * Multiple __flag attributes could be specified, the final flags
+ * value is derived by applying binary "or" to all specified values.
+ *
+ * __auxiliary Annotated program is not a separate test, but used as auxiliary
+ * for some other test cases and should always be loaded.
+ * __auxiliary_unpriv Same, but load program in unprivileged mode.
+ *
+ * __arch_* Specify on which architecture the test case should be tested.
+ * Several __arch_* annotations could be specified at once.
+ * When test case is not run on current arch it is marked as skipped.
+ * __caps_unpriv Specify the capabilities that should be set when running the test.
+ *
+ * __linear_size Specify the size of the linear area of non-linear skbs, or
+ * 0 for linear skbs.
+ */
+#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
+#define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
+#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
+#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
+#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
+#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
+#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
+#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
+#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
+#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
+#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
+#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
+#define __arch(arch) __attribute__((btf_decl_tag("comment:test_arch=" arch)))
+#define __arch_x86_64 __arch("X86_64")
+#define __arch_arm64 __arch("ARM64")
+#define __arch_riscv64 __arch("RISCV64")
+#define __arch_s390x __arch("s390x")
+#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
+#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
+#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
+#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
+
+/* Define common capabilities tested using __caps_unpriv */
+#define CAP_NET_ADMIN 12
+#define CAP_SYS_ADMIN 21
+#define CAP_PERFMON 38
+#define CAP_BPF 39
+
+/* Convenience macro for use with 'asm volatile' blocks */
+#define __naked __attribute__((naked))
+#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory"
+#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory"
+#define __imm(name) [name]"i"(name)
+#define __imm_const(name, expr) [name]"i"(expr)
+#define __imm_addr(name) [name]"i"(&name)
+#define __imm_ptr(name) [name]"r"(&name)
+#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
+/* Magic constants used with __retval() */
+#define POINTER_VALUE 0xbadcafe
+#define TEST_DATA_LEN 64
+
+#ifndef __used
+#define __used __attribute__((used))
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#elif defined(__TARGET_ARCH_riscv)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__riscv_"
+#elif defined(__TARGET_ARCH_powerpc)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX ""
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX "__se_"
+#endif
+
+/* How many arguments are passed to function in register */
+#if defined(__TARGET_ARCH_x86) || defined(__x86_64__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__i386__)
+#define FUNC_REG_ARG_CNT 3
+#elif defined(__TARGET_ARCH_s390) || defined(__s390x__)
+#define FUNC_REG_ARG_CNT 5
+#elif defined(__TARGET_ARCH_arm) || defined(__arm__)
+#define FUNC_REG_ARG_CNT 4
+#elif defined(__TARGET_ARCH_arm64) || defined(__aarch64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_mips) || defined(__mips__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_powerpc) || defined(__powerpc__) || defined(__powerpc64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_sparc) || defined(__sparc__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__TARGET_ARCH_riscv) || defined(__riscv__)
+#define FUNC_REG_ARG_CNT 8
+#else
+/* default to 5 for others */
+#define FUNC_REG_ARG_CNT 5
+#endif
+
+/* make it look to compiler like value is read and written */
+#define __sink(expr) asm volatile("" : "+g"(expr))
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+#define CAN_USE_GOTOL
+#endif
+
+#if __clang_major__ >= 18
+#define CAN_USE_BPF_ST
+#endif
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \
+ (defined(__TARGET_ARCH_powerpc))
+#define CAN_USE_LOAD_ACQ_STORE_REL
+#endif
+
+#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)
+#define SPEC_V1
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SPEC_V4
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644
index 000000000000..82a5c6c6ba83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+ /* thread to activate trace programs for */
+ pid_t tgid;
+ /* return error from __init function */
+ int inject_error;
+ /* uffd monitored range start address */
+ void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ * load_module()
+ * prepare_coming_module()
+ * notifier_call(MODULE_STATE_COMING)
+ * btf_parse_module()
+ * btf_alloc_id() // Visible to userspace at this point
+ * list_add(btf_mod->list, &btf_modules)
+ * do_init_module()
+ * freeinit = kmalloc()
+ * ret = mod->init()
+ * bpf_prog_widen_race()
+ * bpf_copy_from_user()
+ * ...<sleep>...
+ * if (ret < 0)
+ * ...
+ * free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module_live == false
+ * return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module == true
+ * <store module reference in btf_kfunc_tab or used_btf array>
+ * ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+ char dst;
+
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we will attempt to block */
+ bpf_blocking = 1;
+ bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+ return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we finished blocking */
+ bpf_blocking = 2;
+ return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+ res_try_get_module = !!mod;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS 0x00
+#define NET_XMIT_DROP 0x01 /* skb dropped */
+#define NET_XMIT_CN 0x02 /* congestion notification */
+
+#define TC_PRIO_CONTROL 7
+#define TC_PRIO_MAX 15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+ return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+ return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+ .enqueue = (void *)bpf_qdisc_test_enqueue,
+ .dequeue = (void *)bpf_qdisc_test_dequeue,
+ .reset = (void *)bpf_qdisc_test_reset,
+ .destroy = (void *)bpf_qdisc_test_destroy,
+ .id = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+ struct sk_buff __kptr * skb;
+ struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct skb_node *skbn;
+ u32 pkt_len;
+
+ if (sch->q.qlen == sch->limit)
+ goto drop;
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn)
+ goto drop;
+
+ pkt_len = qdisc_pkt_len(skb);
+
+ sch->q.qlen++;
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&q_fifo_lock);
+ bpf_list_push_back(&q_fifo, &skbn->node);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ sch->qstats.backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+ if (!node)
+ return NULL;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+ if (!skb)
+ return NULL;
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ sch->limit = 1000;
+ init_called = true;
+ return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct skb_node *skbn;
+ int i;
+
+ bpf_for(i, 0, sch->q.qlen) {
+ struct sk_buff *skb = NULL;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ if (!node)
+ break;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_kfree_skb(skb);
+ bpf_obj_drop(skbn);
+ }
+ sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+ .enqueue = (void *)bpf_fifo_enqueue,
+ .dequeue = (void *)bpf_fifo_dequeue,
+ .init = (void *)bpf_fifo_init,
+ .reset = (void *)bpf_fifo_reset,
+ .destroy = (void *)bpf_fifo_destroy,
+ .id = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ * tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ * tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ * bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ * ...
+ * .id = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_refill_delay;
+ u32 flow_plimit;
+ u64 horizon;
+ u32 orphan_mask;
+ u32 timer_slack;
+ u64 time_next_delayed_flow;
+ u64 unthrottle_latency_ns;
+ u8 horizon_drop;
+ u32 new_flow_cnt;
+ u32 old_flow_cnt;
+ u64 ktime_cache;
+};
+
+enum {
+ CLS_RET_PRIO = 0,
+ CLS_RET_NONPRIO = 1,
+ CLS_RET_ERR = 2,
+};
+
+struct skb_node {
+ u64 tstamp;
+ struct sk_buff __kptr * skb;
+ struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+ int credit;
+ u32 qlen;
+ u64 age;
+ u64 time_next_packet;
+ struct bpf_list_node list_node;
+ struct bpf_rb_node rb_node;
+ struct bpf_rb_root queue __contains(skb_node, node);
+ struct bpf_spin_lock lock;
+ struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+ bool stop_iter;
+ u64 expire;
+ u64 now;
+};
+
+struct remove_flows_ctx {
+ bool gc_only;
+ u32 reset_cnt;
+ u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+ bool unset_all;
+ u64 now;
+};
+
+struct fq_stashed_flow {
+ struct fq_flow_node __kptr * flow;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+ void *ret;
+
+ ret = bpf_kptr_xchg(map_val, ptr);
+ if (ret)
+ bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct skb_node *skbn_a;
+ struct skb_node *skbn_b;
+
+ skbn_a = container_of(a, struct skb_node, node);
+ skbn_b = container_of(b, struct skb_node, node);
+
+ return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct fq_flow_node *flow_a;
+ struct fq_flow_node *flow_b;
+
+ flow_a = container_of(a, struct fq_flow_node, rb_node);
+ flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+ return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct bpf_list_node **node, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ *node = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ *flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+ struct bpf_list_node *node;
+
+ bpf_spin_lock(lock);
+ node = bpf_list_pop_front(head);
+ if (node) {
+ bpf_list_push_front(head, node);
+ bpf_spin_unlock(lock);
+ return false;
+ }
+ bpf_spin_unlock(lock);
+
+ return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+ flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+ return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+ return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+ struct fq_stashed_flow tmp = {};
+ struct fq_flow_node *flow;
+ int ret;
+
+ flow = bpf_obj_new(typeof(*flow));
+ if (!flow)
+ return -ENOMEM;
+
+ flow->credit = q.initial_quantum,
+ flow->qlen = 0,
+ flow->age = 1,
+ flow->time_next_packet = 0,
+
+ ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+ if (ret == -ENOMEM || ret == -E2BIG) {
+ fq_gc();
+ bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+ }
+
+ *sflow = bpf_map_lookup_elem(flow_map, &hash);
+ if (!*sflow) {
+ bpf_obj_drop(flow);
+ return -ENOMEM;
+ }
+
+ bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+ return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+ struct sock *sk = skb->sk;
+ int ret = CLS_RET_NONPRIO;
+ u64 hash = 0;
+
+ if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+ *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ ret = CLS_RET_PRIO;
+ } else {
+ if (!sk || sk_listener(sk)) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ /* Avoid collision with an existing flow hash, which
+ * only uses the lower 32 bits of hash, by setting the
+ * upper half of hash to 1.
+ */
+ hash |= (1ULL << 32);
+ } else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ hash |= (1ULL << 32);
+ } else {
+ hash = sk->__sk_common.skc_hash;
+ }
+ *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+ }
+
+ if (!*sflow)
+ ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+ CLS_RET_ERR : CLS_RET_NONPRIO;
+
+ return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+ return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct fq_flow_node *flow = NULL, *flow_copy;
+ struct fq_stashed_flow *sflow;
+ u64 time_to_send, jiffies;
+ struct skb_node *skbn;
+ int ret;
+
+ if (sch->q.qlen >= sch->limit)
+ goto drop;
+
+ if (!skb->tstamp) {
+ time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+ } else {
+ if (fq_packet_beyond_horizon(skb)) {
+ q.ktime_cache = bpf_ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb)) {
+ if (q.horizon_drop)
+ goto drop;
+
+ skb->tstamp = q.ktime_cache + q.horizon;
+ }
+ }
+ time_to_send = skb->tstamp;
+ }
+
+ ret = fq_classify(skb, &sflow);
+ if (ret == CLS_RET_ERR)
+ goto drop;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ goto drop;
+
+ if (ret == CLS_RET_NONPRIO) {
+ if (flow->qlen >= q.flow_plimit) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ if (fq_flow_is_detached(flow)) {
+ flow_copy = bpf_refcount_acquire(flow);
+
+ jiffies = bpf_jiffies64();
+ if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+ if (flow_copy->credit < q.quantum)
+ flow_copy->credit = q.quantum;
+ }
+ flow_copy->age = 0;
+ fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+ &q.new_flow_cnt);
+ }
+ }
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ skbn->tstamp = skb->tstamp = time_to_send;
+
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&flow->lock);
+ bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+ bpf_spin_unlock(&flow->lock);
+
+ flow->qlen++;
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+ struct bpf_rb_node *node = NULL;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_delayed_lock);
+
+ node = bpf_rbtree_first(&fq_delayed);
+ if (!node) {
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+ q.time_next_delayed_flow = flow->time_next_packet;
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+ bpf_spin_unlock(&fq_delayed_lock);
+
+ if (!node)
+ return 1;
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ flow->age = 0;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+ return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+ flow->age = ~0ULL;
+
+ if (q.time_next_delayed_flow > flow->time_next_packet)
+ q.time_next_delayed_flow = flow->time_next_packet;
+
+ bpf_spin_lock(&fq_delayed_lock);
+ bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+ bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+ struct unset_throttled_flows_ctx ctx = {
+ .unset_all = false,
+ .now = now,
+ };
+ unsigned long sample;
+
+ if (q.time_next_delayed_flow > now)
+ return;
+
+ sample = (unsigned long)(now - q.time_next_delayed_flow);
+ q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+ q.unthrottle_latency_ns += sample >> 3;
+
+ q.time_next_delayed_flow = ~0ULL;
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+ u64 time_next_packet, time_to_send;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct bpf_list_head *head;
+ struct bpf_list_node *node;
+ struct bpf_spin_lock *lock;
+ struct fq_flow_node *flow;
+ struct skb_node *skbn;
+ bool is_empty;
+ u32 *cnt;
+
+ if (q.new_flow_cnt) {
+ head = &fq_new_flows;
+ lock = &fq_new_flows_lock;
+ cnt = &q.new_flow_cnt;
+ } else if (q.old_flow_cnt) {
+ head = &fq_old_flows;
+ lock = &fq_old_flows_lock;
+ cnt = &q.old_flow_cnt;
+ } else {
+ if (q.time_next_delayed_flow != ~0ULL)
+ ctx->expire = q.time_next_delayed_flow;
+ goto break_loop;
+ }
+
+ fq_flows_remove_front(head, lock, &node, cnt);
+ if (!node)
+ goto break_loop;
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ if (flow->credit <= 0) {
+ flow->credit += q.quantum;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ return NULL;
+ }
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+ if (head == &fq_new_flows && !is_empty) {
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ } else {
+ fq_flow_set_detached(flow);
+ bpf_obj_drop(flow);
+ }
+ return NULL;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ time_to_send = skbn->tstamp;
+
+ time_next_packet = (time_to_send > flow->time_next_packet) ?
+ time_to_send : flow->time_next_packet;
+ if (ctx->now < time_next_packet) {
+ bpf_spin_unlock(&flow->lock);
+ flow->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(flow);
+ return NULL;
+ }
+
+ rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto add_flow_and_break;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+ if (!skb)
+ goto add_flow_and_break;
+
+ flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+ flow->qlen--;
+
+add_flow_and_break:
+ fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+ ctx->stop_iter = true;
+ return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+ struct fq_flow_node *flow = NULL;
+ struct fq_stashed_flow *sflow;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+ u64 hash = 0;
+
+ sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ if (!sflow)
+ return NULL;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ return NULL;
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ goto out;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto out;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+out:
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+ struct dequeue_nonprio_ctx cb_ctx = {};
+ struct sk_buff *skb = NULL;
+ int i;
+
+ if (!sch->q.qlen)
+ goto out;
+
+ skb = fq_dequeue_prio();
+ if (skb)
+ goto dequeue;
+
+ q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+ fq_check_throttled(q.ktime_cache);
+ bpf_for(i, 0, sch->limit) {
+ skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+ if (cb_ctx.stop_iter)
+ break;
+ };
+
+ if (skb) {
+dequeue:
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ return skb;
+ }
+
+ if (cb_ctx.expire)
+ bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+ return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+ struct bpf_list_node *node;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_new_flows_lock);
+ node = bpf_list_pop_front(&fq_new_flows);
+ bpf_spin_unlock(&fq_new_flows_lock);
+ if (!node) {
+ bpf_spin_lock(&fq_old_flows_lock);
+ node = bpf_list_pop_front(&fq_old_flows);
+ bpf_spin_unlock(&fq_old_flows_lock);
+ if (!node)
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ bpf_obj_drop(flow);
+
+ return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+ u64 jiffies = bpf_jiffies64();
+
+ return fq_flow_is_detached(flow) &&
+ ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+ struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+ if (sflow->flow &&
+ (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+ bpf_map_delete_elem(flow_map, hash);
+ ctx->reset_cnt++;
+ }
+
+ return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+ struct remove_flows_ctx cb_ctx = {
+ .gc_only = true,
+ .reset_cnt = 0,
+ .reset_max = FQ_GC_MAX,
+ };
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+ struct unset_throttled_flows_ctx utf_ctx = {
+ .unset_all = true,
+ };
+ struct remove_flows_ctx rf_ctx = {
+ .gc_only = false,
+ .reset_cnt = 0,
+ .reset_max = NUM_QUEUE,
+ };
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+ rf_ctx.reset_cnt = 0;
+ bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+ fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+ bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = sch->dev_queue->dev;
+ u32 psched_mtu = dev->mtu + dev->hard_header_len;
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+ return -ENOMEM;
+
+ sch->limit = 10000;
+ q.initial_quantum = 10 * psched_mtu;
+ q.quantum = 2 * psched_mtu;
+ q.flow_refill_delay = 40;
+ q.flow_plimit = 100;
+ q.horizon = 10ULL * NSEC_PER_SEC;
+ q.horizon_drop = 1;
+ q.orphan_mask = 1024 - 1;
+ q.timer_slack = 10 * NSEC_PER_USEC;
+ q.time_next_delayed_flow = ~0ULL;
+ q.unthrottle_latency_ns = 0ULL;
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+ .enqueue = (void *)bpf_fq_enqueue,
+ .dequeue = (void *)bpf_fq_dequeue,
+ .reset = (void *)bpf_fq_reset,
+ .init = (void *)bpf_fq_init,
+ .destroy = (void *)bpf_fq_destroy,
+ .id = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
new file mode 100644
index 000000000000..70d8b08f5914
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+enum {
+ BPF_SMC_LISTEN = 10,
+};
+
+struct smc_sock___local {
+ struct sock sk;
+ struct smc_sock *listen_smc;
+ bool use_fallback;
+} __attribute__((preserve_access_index));
+
+int smc_cnt = 0;
+int fallback_cnt = 0;
+
+SEC("fentry/smc_release")
+int BPF_PROG(bpf_smc_release, struct socket *sock)
+{
+ /* only count from one side (client) */
+ if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
+ return 0;
+ smc_cnt++;
+ return 0;
+}
+
+SEC("fentry/smc_switch_to_fallback")
+int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
+{
+ /* only count from one side (client) */
+ if (smc && !smc->listen_smc)
+ fallback_cnt++;
+ return 0;
+}
+
+/* go with default value if no strat was found */
+bool default_ip_strat_value = true;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct smc_policy_ip_key));
+ __uint(value_size, sizeof(struct smc_policy_ip_value));
+ __uint(max_entries, 128);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} smc_policy_ip SEC(".maps");
+
+static bool smc_check(__u32 src, __u32 dst)
+{
+ struct smc_policy_ip_value *value;
+ struct smc_policy_ip_key key = {
+ .sip = src,
+ .dip = dst,
+ };
+
+ value = bpf_map_lookup_elem(&smc_policy_ip, &key);
+ return value ? value->mode : default_ip_strat_value;
+}
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(smc_run, int family, int type, int protocol)
+{
+ struct task_struct *task;
+
+ if (family != AF_INET && family != AF_INET6)
+ return protocol;
+
+ if ((type & 0xf) != SOCK_STREAM)
+ return protocol;
+
+ if (protocol != 0 && protocol != IPPROTO_TCP)
+ return protocol;
+
+ task = bpf_get_current_task_btf();
+ /* Prevent from affecting other tests */
+ if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+ return protocol;
+
+ return IPPROTO_SMC;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
+ struct inet_request_sock *ireq)
+{
+ return smc_check(ireq->req.__req_common.skc_daddr,
+ ireq->req.__req_common.skc_rcv_saddr);
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
+{
+ return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
+ tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
+}
+
+SEC(".struct_ops")
+struct smc_hs_ctrl linkcheck = {
+ .name = "linkcheck",
+ .syn_option = (void *)bpf_smc_set_tcp_option,
+ .synack_option = (void *)bpf_smc_set_tcp_option_cond,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
new file mode 100644
index 000000000000..9e7d9674ce2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <vmlinux.h>
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int arg1 = 0;
+unsigned long arg2 = 0;
+unsigned long arg3 = 0;
+unsigned long arg4_cx = 0;
+unsigned long arg4 = 0;
+unsigned long arg5 = 0;
+
+int arg1_core = 0;
+unsigned long arg2_core = 0;
+unsigned long arg3_core = 0;
+unsigned long arg4_core_cx = 0;
+unsigned long arg4_core = 0;
+unsigned long arg5_core = 0;
+
+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
+const volatile pid_t filter_pid = 0;
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE(handle_sys_prctl)
+{
+ struct pt_regs *real_regs;
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ unsigned long tmp = 0;
+
+ if (pid != filter_pid)
+ return 0;
+
+ real_regs = PT_REGS_SYSCALL_REGS(ctx);
+
+ /* test for PT_REGS_PARM */
+
+ bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+ arg1 = tmp;
+ bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
+ bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs));
+
+ /* test for the CORE variant of PT_REGS_PARM */
+ arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs);
+ arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs);
+ arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs);
+ arg4_core_cx = PT_REGS_PARM4_CORE(real_regs);
+ arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs);
+ arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs);
+
+ return 0;
+}
+
+SEC("ksyscall/prctl")
+int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != filter_pid)
+ return 0;
+
+ option_syscall = option;
+ arg2_syscall = arg2;
+ arg3_syscall = arg3;
+ arg4_syscall = arg4;
+ arg5_syscall = arg5;
+ return 0;
+}
+
+__u64 splice_fd_in;
+__u64 splice_off_in;
+__u64 splice_fd_out;
+__u64 splice_off_out;
+__u64 splice_len;
+__u64 splice_flags;
+
+SEC("ksyscall/splice")
+int BPF_KSYSCALL(splice_enter, int fd_in, loff_t *off_in, int fd_out,
+ loff_t *off_out, size_t len, unsigned int flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != filter_pid)
+ return 0;
+
+ splice_fd_in = fd_in;
+ splice_off_in = (__u64)off_in;
+ splice_fd_out = fd_out;
+ splice_off_out = (__u64)off_out;
+ splice_len = len;
+ splice_flags = flags;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
new file mode 100644
index 000000000000..8a7a4c1b54e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "X";
+
+SEC("struct_ops")
+void BPF_PROG(nogpltcp_init, struct sock *sk)
+{
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops bpf_nogpltcp = {
+ .init = (void *)nogpltcp_init,
+ .name = "bpf_nogpltcp",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
new file mode 100644
index 000000000000..f4e67b492dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TEST_UTILS_H__
+#define __BPF_TEST_UTILS_H__
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Clobber as many native registers and stack slots as possible. */
+static __always_inline void clobber_regs_stack(void)
+{
+ char tmp_str[] = "123456789";
+ unsigned long tmp;
+
+ bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp);
+ __sink(tmp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 01378911252b..d8dacef37c16 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -2,14 +2,93 @@
#ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+
#define AF_INET 2
#define AF_INET6 10
+#define SOL_SOCKET 1
+#define SO_REUSEADDR 2
+#define SO_SNDBUF 7
+#define SO_RCVBUF 8
+#define SO_KEEPALIVE 9
+#define SO_PRIORITY 12
+#define SO_REUSEPORT 15
+#if defined(__TARGET_ARCH_powerpc)
+#define SO_RCVLOWAT 16
+#else
+#define SO_RCVLOWAT 18
+#endif
+#define SO_BINDTODEVICE 25
+#define SO_MARK 36
+#define SO_MAX_PACING_RATE 47
+#define SO_BINDTOIFINDEX 62
+#define SO_TXREHASH 74
+#define __SO_ACCEPTCON (1 << 16)
+
+#define IP_TOS 1
+
+#define SOL_IPV6 41
+#define IPV6_TCLASS 67
+#define IPV6_AUTOFLOWLABEL 70
+
+#define TC_ACT_UNSPEC (-1)
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define SOL_TCP 6
+#define TCP_NODELAY 1
+#define TCP_MAXSEG 2
+#define TCP_KEEPIDLE 4
+#define TCP_KEEPINTVL 5
+#define TCP_KEEPCNT 6
+#define TCP_SYNCNT 7
+#define TCP_WINDOW_CLAMP 10
+#define TCP_CONGESTION 13
+#define TCP_THIN_LINEAR_TIMEOUTS 16
+#define TCP_USER_TIMEOUT 18
+#define TCP_NOTSENT_LOWAT 25
+#define TCP_SAVE_SYN 27
+#define TCP_SAVED_SYN 28
+#define TCP_CA_NAME_MAX 16
+#define TCP_NAGLE_OFF 1
+#define TCP_RTO_MAX_MS 44
+
+#define TCP_ECN_OK 1
+#define TCP_ECN_QUEUE_CWR 2
+#define TCP_ECN_DEMAND_CWR 4
+#define TCP_ECN_SEEN 8
+
+#define TCP_CONG_NEEDS_ECN 0x2
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
#define ICSK_TIME_REO_TIMEOUT 6
+#define ETH_ALEN 6
+#define ETH_HLEN 14
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_TIMESTAMP 8
+#define TCPOPT_SACK_PERM 4
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_SACK_PERM 2
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_PARTIAL 3
+
#define IFNAMSIZ 16
#define RTF_GATEWAY 0x0002
@@ -17,6 +96,14 @@
#define TCP_INFINITE_SSTHRESH 0x7fffffff
#define TCP_PINGPONG_THRESH 3
+#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
+#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
+#define FLAG_DATA_SACKED 0x20 /* New SACK. */
+#define FLAG_SND_UNA_ADVANCED \
+ 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
+#define FLAG_ACKED (FLAG_DATA_ACKED | FLAG_SYN_ACKED)
+#define FLAG_FORWARD_PROGRESS (FLAG_ACKED | FLAG_DATA_SACKED)
+
#define fib_nh_dev nh_common.nhc_dev
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
@@ -25,6 +112,8 @@
#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
#define inet_dport sk.__sk_common.skc_dport
+#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
+
#define ir_loc_addr req.__req_common.skc_rcv_saddr
#define ir_num req.__req_common.skc_num
#define ir_rmt_addr req.__req_common.skc_daddr
@@ -32,12 +121,19 @@
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
+#define sk_net __sk_common.skc_net
+#define sk_rcv_saddr __sk_common.skc_rcv_saddr
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+#define sk_flags __sk_common.skc_flags
+#define sk_reuse __sk_common.skc_reuse
+#define sk_cookie __sk_common.skc_cookie
#define s6_addr32 in6_u.u6_addr32
@@ -48,4 +144,51 @@
#define tw_v6_daddr __tw_common.skc_v6_daddr
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static inline bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1 - seq2) < 0;
+}
+
+#define after(seq2, seq1) before(seq1, seq2)
+
+static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+ return (struct inet_connection_sock *)sk;
+}
+
+static inline void *inet_csk_ca(const struct sock *sk)
+{
+ return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tcp_in_slow_start(tp))
+ return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+ return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
new file mode 100644
index 000000000000..21a560427b10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
new file mode 100644
index 000000000000..888e79db6a77
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
new file mode 100644
index 000000000000..194749130d87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
new file mode 100644
index 000000000000..3d732d4193e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
new file mode 100644
index 000000000000..17cf5d6a848d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
deleted file mode 100644
index dd0ffa518f36..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
deleted file mode 100644
index bc83372088ad..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
deleted file mode 100644
index 917bec41be08..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
deleted file mode 100644
index 6ec7e6ec1c91..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
deleted file mode 100644
index 7bbcacf2b0d1..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
deleted file mode 100644
index f384dd38ec70..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
new file mode 100644
index 000000000000..d14b496190c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___wrong_field_defs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
new file mode 100644
index 000000000000..3824345d82ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___diff_offs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
new file mode 100644
index 000000000000..57ae2c258928
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
index 8f44767a75fa..e01690618e1e 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -11,7 +11,7 @@
/*
*struct bitfields_only_mixed_types {
* int a: 3;
- * long int b: 2;
+ * long b: 2;
* _Bool c: 1;
* enum {
* A = 0,
@@ -27,7 +27,7 @@
struct bitfields_only_mixed_types {
int a: 3;
- long int b: 2;
+ long b: 2;
bool c: 1; /* it's really a _Bool type */
enum {
A, /* A = 0, dumper is very explicit */
@@ -44,8 +44,8 @@ struct bitfields_only_mixed_types {
* char: 4;
* int a: 4;
* short b;
- * long int c;
- * long int d: 8;
+ * long c;
+ * long d: 8;
* int e;
* int f;
*};
@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types {
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct bitfield_mixed_with_others {
- long: 4; /* char is enough as a backing field */
+ char: 4; /* char is enough as a backing field */
int a: 4;
/* 8-bit implicit padding */
short b; /* combined with previous bitfield */
@@ -71,7 +71,7 @@ struct bitfield_mixed_with_others {
*struct bitfield_flushed {
* int a: 4;
* long: 60;
- * long int b: 16;
+ * long b: 16;
*};
*
*/
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
index ba97165bdb28..a657651eba52 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -14,9 +14,9 @@ typedef int *ptr_arr_t[6];
typedef int *ptr_multiarr_t[7][8][9][10];
-typedef int * (*fn_ptr_arr_t[11])();
+typedef int * (*fn_ptr_arr_t[11])(void);
-typedef int * (*fn_ptr_multiarr_t[12][13])();
+typedef int * (*fn_ptr_multiarr_t[12][13])(void);
struct root_struct {
arr_t _1;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
index 1cef3bec1dc7..7998f27df7dd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -29,7 +29,7 @@ struct non_packed_fields {
struct nested_packed {
char: 4;
int a: 4;
- long int b;
+ long b;
struct {
char c;
int d;
@@ -44,7 +44,7 @@ union union_is_never_packed {
union union_does_not_need_packing {
struct {
- long int a;
+ long a;
int b;
} __attribute__((packed));
int c;
@@ -58,7 +58,81 @@ union jump_code_union {
} __attribute__((packed));
};
-/*------ END-EXPECTED-OUTPUT ------ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct nested_packed_but_aligned_struct {
+ * int x1;
+ * int x2;
+ *};
+ *
+ *struct outer_implicitly_packed_struct {
+ * char y1;
+ * struct nested_packed_but_aligned_struct y2;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct nested_packed_but_aligned_struct {
+ int x1;
+ int x2;
+} __attribute__((packed));
+
+struct outer_implicitly_packed_struct {
+ char y1;
+ struct nested_packed_but_aligned_struct y2;
+};
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct usb_ss_ep_comp_descriptor {
+ * char: 8;
+ * char bDescriptorType;
+ * char bMaxBurst;
+ * short wBytesPerInterval;
+ *};
+ *
+ *struct usb_host_endpoint {
+ * long: 64;
+ * char: 8;
+ * struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ * long: 0;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct usb_ss_ep_comp_descriptor {
+ char: 8;
+ char bDescriptorType;
+ char bMaxBurst;
+ int: 0;
+ short wBytesPerInterval;
+} __attribute__((packed));
+
+struct usb_host_endpoint {
+ long: 64;
+ char: 8;
+ struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ long: 0;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct nested_packed_struct {
+ int a;
+ char b;
+} __attribute__((packed));
+
+struct outer_nonpacked_struct {
+ short a;
+ struct nested_packed_struct b;
+};
+
+struct outer_packed_struct {
+ short a;
+ struct nested_packed_struct b;
+} __attribute__((packed));
+
+/* ------ END-EXPECTED-OUTPUT ------ */
int f(struct {
struct packed_trailing_space _1;
@@ -69,6 +143,10 @@ int f(struct {
union union_is_never_packed _6;
union union_does_not_need_packing _7;
union jump_code_union _8;
+ struct outer_implicitly_packed_struct _9;
+ struct usb_host_endpoint _10;
+ struct outer_nonpacked_struct _11;
+ struct outer_packed_struct _12;
} *_)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 35c512818a56..79276fbe454a 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -9,7 +9,7 @@
/* ----- START-EXPECTED-OUTPUT ----- */
struct padded_implicitly {
int a;
- long int b;
+ long b;
char c;
};
@@ -19,7 +19,7 @@ struct padded_implicitly {
/*
*struct padded_explicitly {
* int a;
- * int: 32;
+ * long: 0;
* int b;
*};
*
@@ -28,41 +28,28 @@ struct padded_implicitly {
struct padded_explicitly {
int a;
- int: 1; /* algo will explicitly pad with full 32 bits here */
+ int: 1; /* algo will emit aligning `long: 0;` here */
int b;
};
/* ----- START-EXPECTED-OUTPUT ----- */
-/*
- *struct padded_a_lot {
- * int a;
- * long: 32;
- * long: 64;
- * long: 64;
- * int b;
- *};
- *
- */
-/* ------ END-EXPECTED-OUTPUT ------ */
-
struct padded_a_lot {
int a;
- /* 32 bit of implicit padding here, which algo will make explicit */
long: 64;
long: 64;
int b;
};
+/* ------ END-EXPECTED-OUTPUT ------ */
+
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct padded_cache_line {
* int a;
- * long: 32;
* long: 64;
* long: 64;
* long: 64;
* int b;
- * long: 32;
* long: 64;
* long: 64;
* long: 64;
@@ -85,7 +72,7 @@ struct padded_cache_line {
*struct zone {
* int a;
* short b;
- * short: 16;
+ * long: 0;
* struct zone_padding __pad__;
*};
*
@@ -102,12 +89,160 @@ struct zone {
struct zone_padding __pad__;
};
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct padding_wo_named_members {
+ long: 64;
+ long: 64;
+};
+
+struct padding_weird_1 {
+ int a;
+ long: 64;
+ short: 16;
+ short b;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padding_weird_2 {
+ * long: 56;
+ * char a;
+ * long: 56;
+ * char b;
+ * char: 8;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct padding_weird_2 {
+ int: 32; /* these paddings will be collapsed into `long: 56;` */
+ short: 16;
+ char: 8;
+ char a;
+ int: 32; /* these paddings will be collapsed into `long: 56;` */
+ short: 16;
+ char: 8;
+ char b;
+ char: 8;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct exact_1byte {
+ char x;
+};
+
+struct padded_1byte {
+ char: 8;
+};
+
+struct exact_2bytes {
+ short x;
+};
+
+struct padded_2bytes {
+ short: 16;
+};
+
+struct exact_4bytes {
+ int x;
+};
+
+struct padded_4bytes {
+ int: 32;
+};
+
+struct exact_8bytes {
+ long x;
+};
+
+struct padded_8bytes {
+ long: 64;
+};
+
+struct ff_periodic_effect {
+ int: 32;
+ short magnitude;
+ long: 0;
+ short phase;
+ long: 0;
+ int: 32;
+ int custom_len;
+ short *custom_data;
+};
+
+struct ib_wc {
+ long: 64;
+ long: 64;
+ int: 32;
+ int byte_len;
+ void *qp;
+ union {} ex;
+ long: 64;
+ int slid;
+ int wc_flags;
+ long: 64;
+ char smac[6];
+ long: 0;
+ char network_hdr_type;
+};
+
+struct acpi_object_method {
+ long: 64;
+ char: 8;
+ char type;
+ short reference_count;
+ char flags;
+ short: 0;
+ char: 8;
+ char sync_level;
+ long: 64;
+ void *node;
+ void *aml_start;
+ union {} dispatch;
+ long: 64;
+ int aml_length;
+};
+
+struct nested_unpacked {
+ int x;
+};
+
+struct nested_packed {
+ struct nested_unpacked a;
+ char c;
+} __attribute__((packed));
+
+struct outer_mixed_but_unpacked {
+ struct nested_packed b1;
+ short a1;
+ struct nested_packed b2;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
int f(struct {
struct padded_implicitly _1;
struct padded_explicitly _2;
struct padded_a_lot _3;
struct padded_cache_line _4;
struct zone _5;
+ struct padding_wo_named_members _6;
+ struct padding_weird_1 _7;
+ struct padding_weird_2 _8;
+ struct exact_1byte _100;
+ struct padded_1byte _101;
+ struct exact_2bytes _102;
+ struct padded_2bytes _103;
+ struct exact_4bytes _104;
+ struct padded_4bytes _105;
+ struct exact_8bytes _106;
+ struct padded_8bytes _107;
+ struct ff_periodic_effect _200;
+ struct ib_wc _201;
+ struct acpi_object_method _202;
+ struct outer_mixed_but_unpacked _203;
} *_)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 31975c96e2c9..29d01fff32bd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -25,6 +25,39 @@ typedef enum {
H = 2,
} e3_t;
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_byte {
+ * EBYTE_1 = 0,
+ * EBYTE_2 = 1,
+ *} __attribute__((mode(byte)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_byte {
+ EBYTE_1,
+ EBYTE_2,
+} __attribute__((mode(byte)));
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_word {
+ * EWORD_1 = 0LL,
+ * EWORD_2 = 1LL,
+ *} __attribute__((mode(word)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_word {
+ EWORD_1,
+ EWORD_2,
+} __attribute__((mode(word))); /* force to use 8-byte backing for this enum */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+enum e_big {
+ EBIG_1 = 1000000000000ULL,
+};
+
typedef int int_t;
typedef volatile const int * volatile const crazy_ptr_t;
@@ -51,7 +84,7 @@ typedef void (*printf_fn_t)(const char *, ...);
* typedef int (*fn_t)(int);
* typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
*
- * - `fn_complext_t`: pointer to a function returning struct and accepting
+ * - `fn_complex_t`: pointer to a function returning struct and accepting
* union and struct. All structs and enum are anonymous and defined inline.
*
* - `signal_t: pointer to a function accepting a pointer to a function as an
@@ -67,7 +100,7 @@ typedef void (*printf_fn_t)(const char *, ...);
* `int -> char *` function and returns pointer to a char. Equivalent:
* typedef char * (*fn_input_t)(int);
* typedef char * (*fn_output_outer_t)(fn_input_t);
- * typedef const fn_output_outer_t (* fn_output_inner_t)();
+ * typedef const fn_output_outer_t (* fn_output_inner_t)(void);
* typedef const fn_output_inner_t fn_ptr_arr2_t[5];
*/
/* ----- START-EXPECTED-OUTPUT ----- */
@@ -94,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
typedef char * (*fn_ptr_arr1_t[10])(int **);
-typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])(void))(char * (*)(int));
struct struct_w_typedefs {
int_t a;
@@ -174,6 +207,12 @@ struct struct_in_struct {
};
};
+struct struct_in_array {};
+
+struct struct_in_array_typed {};
+
+typedef struct struct_in_array_typed struct_in_array_t[2];
+
struct struct_with_embedded_stuff {
int a;
struct {
@@ -183,7 +222,7 @@ struct struct_with_embedded_stuff {
const char *d;
} e;
union {
- volatile long int f;
+ volatile long f;
void * restrict g;
};
};
@@ -203,6 +242,14 @@ struct struct_with_embedded_stuff {
} r[5];
struct struct_in_struct s[10];
int t[11];
+ struct struct_in_array (*u)[2];
+ struct_in_array_t *v;
+};
+
+struct float_struct {
+ float f;
+ const double *d;
+ volatile long double *ld;
};
struct root_struct {
@@ -210,6 +257,9 @@ struct root_struct {
enum e2 _2;
e2_t _2_1;
e3_t _2_2;
+ enum e_byte _100;
+ enum e_word _101;
+ enum e_big _102;
struct struct_w_typedefs _3;
anon_struct_t _7;
struct struct_fwd *_8;
@@ -219,6 +269,7 @@ struct root_struct {
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
+ struct float_struct _15;
};
/* ------ END-EXPECTED-OUTPUT ------ */
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag.c b/tools/testing/selftests/bpf/progs/btf_type_tag.c
new file mode 100644
index 000000000000..1d488da7e920
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_type_tag)
+#define __tag1 __attribute__((btf_type_tag("tag1")))
+#define __tag2 __attribute__((btf_type_tag("tag2")))
+volatile const bool skip_tests = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct btf_type_tag_test {
+ int __tag1 * __tag1 __tag2 *p;
+} g;
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
new file mode 100644
index 000000000000..d93f68024cc6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+__u64 g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_1")
+int BPF_PROG(test_percpu1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+ g = arg->a;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_2")
+int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+ g = arg->p->a;
+ return 0;
+}
+
+/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
+ *
+ * struct css_rstat_cpu {
+ * ...
+ * struct cgroup_subsys_state *updated_children;
+ * ...
+ * };
+ *
+ * struct cgroup_subsys_state {
+ * ...
+ * struct css_rstat_cpu __percpu *rstat_cpu;
+ * ...
+ * };
+ *
+ * struct cgroup {
+ * struct cgroup_subsys_state self;
+ * ...
+ * };
+ */
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
+{
+ g = (__u64)cgrp->self.rstat_cpu->updated_children;
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
+{
+ struct css_rstat_cpu *rstat;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ rstat = (struct css_rstat_cpu *)bpf_per_cpu_ptr(
+ cgrp->self.rstat_cpu, cpu);
+ if (rstat) {
+ /* READ_ONCE */
+ *(volatile long *)rstat;
+ }
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
new file mode 100644
index 000000000000..5523f77c5a44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+int g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_1")
+int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+ g = arg->a;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_2")
+int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+ g = arg->p->a;
+ return 0;
+}
+
+/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ * int __user *usockaddr_len);
+ */
+SEC("fentry/__sys_getsockname")
+int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
+ int *usockaddr_len)
+{
+ g = usockaddr->sa_family;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
new file mode 100644
index 000000000000..5d6fc7f01ebb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr *ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 16);
+} array_map SEC(".maps");
+
+static __noinline int cb1(void *map, void *key, void *value, void *ctx)
+{
+ void *p = *(void **)ctx;
+ bpf_kfunc_call_test_release(p);
+ /* Without the fix this would cause underflow */
+ return 0;
+}
+
+SEC("?tc")
+int underflow_prog(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+static __always_inline int cb2(void *map, void *key, void *value, void *ctx)
+{
+ unsigned long sl = 0;
+
+ *(void **)ctx = bpf_kfunc_call_test_acquire(&sl);
+ /* Without the fix this would leak memory */
+ return 0;
+}
+
+SEC("?tc")
+int leak_prog(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 0;
+
+ p = NULL;
+ bpf_for_each_map_elem(&array_map, cb2, &p, 0);
+ p = bpf_kptr_xchg(&v->ptr, p);
+ if (p)
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+static __always_inline int cb(void *map, void *key, void *value, void *ctx)
+{
+ return 0;
+}
+
+static __always_inline int cb3(void *map, void *key, void *value, void *ctx)
+{
+ unsigned long sl = 0;
+ void *p;
+
+ bpf_kfunc_call_test_acquire(&sl);
+ bpf_for_each_map_elem(&array_map, cb, &p, 0);
+ /* It should only complain here, not in cb. This is why we need
+ * callback_ref to be set to frameno.
+ */
+ return 0;
+}
+
+SEC("?tc")
+int nested_cb(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+ int sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ bpf_for_each_map_elem(&array_map, cb3, &sp, 0);
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+SEC("?tc")
+int non_cb_transfer_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ cb1(NULL, NULL, NULL, &p);
+ bpf_kfunc_call_test_acquire(&sl);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
index a0778fe7857a..41d59f0ee606 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi.h
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -3,8 +3,6 @@
#ifndef __PROGS_CG_STORAGE_MULTI_H
#define __PROGS_CG_STORAGE_MULTI_H
-#include <asm/types.h>
-
struct cgroup_value {
__u32 egress_pkts;
__u32 ingress_pkts;
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
index a25373002055..3f81ff92184c 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -20,7 +20,7 @@ struct {
__u32 invocations = 0;
-SEC("cgroup_skb/egress/1")
+SEC("cgroup_skb/egress")
int egress1(struct __sk_buff *skb)
{
struct cgroup_value *ptr_cg_storage =
@@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb)
return 1;
}
-SEC("cgroup_skb/egress/2")
+SEC("cgroup_skb/egress")
int egress2(struct __sk_buff *skb)
{
struct cgroup_value *ptr_cg_storage =
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
index a149f33bc533..d662db27fe4a 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -20,7 +20,7 @@ struct {
__u32 invocations = 0;
-SEC("cgroup_skb/egress/1")
+SEC("cgroup_skb/egress")
int egress1(struct __sk_buff *skb)
{
struct cgroup_value *ptr_cg_storage =
@@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb)
return 1;
}
-SEC("cgroup_skb/egress/2")
+SEC("cgroup_skb/egress")
int egress2(struct __sk_buff *skb)
{
struct cgroup_value *ptr_cg_storage =
diff --git a/tools/testing/selftests/bpf/progs/cgroup_ancestor.c b/tools/testing/selftests/bpf/progs/cgroup_ancestor.c
new file mode 100644
index 000000000000..8c2deb4fc493
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_ancestor.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_tracing_net.h"
+#define NUM_CGROUP_LEVELS 4
+
+__u64 cgroup_ids[NUM_CGROUP_LEVELS];
+__u16 dport;
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+ /* [1] &level passed to external function that may change it, it's
+ * incompatible with loop unroll.
+ */
+ cgroup_ids[level] = bpf_skb_ancestor_cgroup_id(skb, level);
+}
+
+SEC("tc")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+ struct sock *sk = (void *)skb->sk;
+
+ if (!sk)
+ return TC_ACT_OK;
+
+ sk = bpf_core_cast(sk, struct sock);
+ if (sk->sk_protocol == IPPROTO_UDP && sk->sk_dport == dport) {
+ log_nth_level(skb, 0);
+ log_nth_level(skb, 1);
+ log_nth_level(skb, 2);
+ log_nth_level(skb, 3);
+ }
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644
index 000000000000..932b8ecd4ae3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+__u32 page_size = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ ctx_retval_value = ctx->retval;
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ ctx->retval = 0;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
new file mode 100644
index 000000000000..13dfb4bbfd28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define BPF_RETVAL_HOOK(name, section, ctx, expected_err) \
+ __attribute__((__section__("?" section))) \
+ int name(struct ctx *_ctx) \
+ { \
+ bpf_set_retval(bpf_get_retval()); \
+ return 1; \
+ }
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644
index 000000000000..45a0e9f492a9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__s32 page_size = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EUNATCH))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..ff189a736ad8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022 Google LLC.
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct percpu_attach_counter {
+ /* Previous percpu state, to figure out if we have new updates */
+ __u64 prev;
+ /* Current percpu state */
+ __u64 state;
+};
+
+struct attach_counter {
+ /* State propagated through children, pending aggregation */
+ __u64 pending;
+ /* Total state, including all cpus and all children */
+ __u64 state;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1024);
+ __type(key, __u64);
+ __type(value, struct percpu_attach_counter);
+} percpu_attach_counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, __u64);
+ __type(value, struct attach_counter);
+} attach_counters SEC(".maps");
+
+extern void css_rstat_updated(
+ struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
+
+static uint64_t cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+static int create_percpu_attach_counter(__u64 cg_id, __u64 state)
+{
+ struct percpu_attach_counter pcpu_init = {.state = state, .prev = 0};
+
+ return bpf_map_update_elem(&percpu_attach_counters, &cg_id,
+ &pcpu_init, BPF_NOEXIST);
+}
+
+static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending)
+{
+ struct attach_counter init = {.state = state, .pending = pending};
+
+ return bpf_map_update_elem(&attach_counters, &cg_id,
+ &init, BPF_NOEXIST);
+}
+
+SEC("fentry/cgroup_attach_task")
+int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
+ bool threadgroup)
+{
+ __u64 cg_id = cgroup_id(dst_cgrp);
+ struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem(
+ &percpu_attach_counters,
+ &cg_id);
+
+ if (pcpu_counter)
+ pcpu_counter->state += 1;
+ else if (create_percpu_attach_counter(cg_id, 1))
+ return 0;
+
+ css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
+ return 0;
+}
+
+SEC("fentry/bpf_rstat_flush")
+int BPF_PROG(flusher, struct cgroup *cgrp, struct cgroup *parent, int cpu)
+{
+ struct percpu_attach_counter *pcpu_counter;
+ struct attach_counter *total_counter, *parent_counter;
+ __u64 cg_id = cgroup_id(cgrp);
+ __u64 parent_cg_id = parent ? cgroup_id(parent) : 0;
+ __u64 state;
+ __u64 delta = 0;
+
+ /* Add CPU changes on this level since the last flush */
+ pcpu_counter = bpf_map_lookup_percpu_elem(&percpu_attach_counters,
+ &cg_id, cpu);
+ if (pcpu_counter) {
+ state = pcpu_counter->state;
+ delta += state - pcpu_counter->prev;
+ pcpu_counter->prev = state;
+ }
+
+ total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+ if (!total_counter) {
+ if (create_attach_counter(cg_id, delta, 0))
+ return 0;
+ goto update_parent;
+ }
+
+ /* Collect pending stats from subtree */
+ if (total_counter->pending) {
+ delta += total_counter->pending;
+ total_counter->pending = 0;
+ }
+
+ /* Propagate changes to this cgroup's total */
+ total_counter->state += delta;
+
+update_parent:
+ /* Skip if there are no changes to propagate, or no parent */
+ if (!delta || !parent_cg_id)
+ return 0;
+
+ /* Propagate changes to cgroup's parent */
+ parent_counter = bpf_map_lookup_elem(&attach_counters,
+ &parent_cg_id);
+ if (parent_counter)
+ parent_counter->pending += delta;
+ else
+ create_attach_counter(parent_cg_id, 0, delta);
+ return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct seq_file *seq = meta->seq;
+ struct attach_counter *total_counter;
+ __u64 cg_id = cgrp ? cgroup_id(cgrp) : 0;
+
+ /* Do nothing for the terminal call */
+ if (!cg_id)
+ return 1;
+
+ /* Flush the stats to make sure we get the most updated numbers */
+ css_rstat_flush(&cgrp->self);
+
+ total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+ if (!total_counter) {
+ BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: 0\n",
+ cg_id);
+ } else {
+ BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: %llu\n",
+ cg_id, total_counter->state);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c
new file mode 100644
index 000000000000..f30841997a8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+int terminate_early = 0;
+u64 terminal_cgroup = 0;
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+SEC("iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+
+ /* epilogue */
+ if (cgrp == NULL) {
+ BPF_SEQ_PRINTF(seq, "epilogue\n");
+ return 0;
+ }
+
+ /* prologue */
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "prologue\n");
+
+ BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+ if (terminal_cgroup == cgroup_id(cgrp))
+ return 1;
+
+ return terminate_early ? 1 : 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
new file mode 100644
index 000000000000..6a0ea02c4de2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/getsockopt")
+int getsockopt_1(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_2(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_3(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_4(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
new file mode 100644
index 000000000000..4ef6202baa0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int idx;
+__u8 result[4];
+
+SEC("cgroup/getsockopt")
+int child(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 1;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int child_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 2;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 3;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 4;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
new file mode 100644
index 000000000000..88e13e17ec9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+char value[16];
+
+static __always_inline void read_xattr(struct cgroup *cgroup)
+{
+ struct bpf_dynptr value_ptr;
+
+ bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+ bpf_cgroup_read_xattr(cgroup, "user.bpf_test",
+ &value_ptr);
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_css_iter_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
+int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(use_css_iter_sleepable_with_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ goto out;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_bpf_cgroup_ancestor)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int BPF_PROG(cgroup_skb)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e32b07d802bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 data_end;
+
+SEC("cgroup_skb/ingress")
+int direct_packet_access(struct __sk_buff *skb)
+{
+ data_end = skb->data_end;
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
index 3f757e30d7a0..ac86a8a61605 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -14,7 +14,6 @@
#include <sys/types.h>
#include <sys/socket.h>
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
__u16 g_serv_port = 0;
@@ -67,7 +66,6 @@ static inline int is_allowed_peer_cg(struct __sk_buff *skb,
SEC("cgroup_skb/ingress")
int ingress_lookup(struct __sk_buff *skb)
{
- __u32 serv_port_key = 0;
struct ipv6hdr ip6h;
struct tcphdr tcph;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_storage.c b/tools/testing/selftests/bpf/progs/cgroup_storage.c
new file mode 100644
index 000000000000..db1e4d2d3281
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_storage.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int bpf_prog(struct __sk_buff *skb)
+{
+ __u64 *counter;
+
+ counter = bpf_get_local_storage(&cgroup_storage, 0);
+ __sync_fetch_and_add(counter, 1);
+
+ /* Drop one out of every two packets */
+ return (*counter & 1);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..1e2e73f3b749
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "cgroup_tcp_skb.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u16 g_sock_port = 0;
+__u32 g_sock_state = 0;
+int g_unexpected = 0;
+__u32 g_packet_count = 0;
+
+int needed_tcp_pkt(struct __sk_buff *skb, struct tcphdr *tcph)
+{
+ struct ipv6hdr ip6h;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 0;
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 0;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 0;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), tcph, sizeof(*tcph)))
+ return 0;
+
+ if (tcph->source != bpf_htons(g_sock_port) &&
+ tcph->dest != bpf_htons(g_sock_port))
+ return 0;
+
+ return 1;
+}
+
+/* Run accept() on a socket in the cgroup to receive a new connection. */
+static int egress_accept(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_RECV_SENDING_SYN_ACK) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_accept(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case INIT:
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV_SENDING_SYN_ACK;
+ break;
+ case SYN_RECV:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Run connect() on a socket in the cgroup to start a new connection. */
+static int egress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == INIT) {
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_SENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_SENT) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* The connection is closed by the peer outside the cgroup. */
+static int egress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case CLOSE_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSE_WAIT;
+ break;
+ case CLOSE_WAIT:
+ if (!tcph->fin)
+ g_unexpected++;
+ else
+ g_sock_state = LAST_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = CLOSE_WAIT_SENDING_ACK;
+ break;
+ case LAST_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* The connection is closed by the endpoint inside the cgroup. */
+static int egress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = FIN_WAIT1;
+ break;
+ case TIME_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case FIN_WAIT1:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = FIN_WAIT2;
+ break;
+ case FIN_WAIT2:
+ if (!tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT_SENDING_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
new file mode 100644
index 000000000000..73ba32e9a693
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CGRP_KFUNC_COMMON_H
+#define _CGRP_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __cgrps_kfunc_map_value {
+ struct cgroup __kptr * cgrp;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __cgrps_kfunc_map_value);
+ __uint(max_entries, 1);
+} __cgrps_kfunc_map SEC(".maps");
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
+{
+ s32 id;
+ long status;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+}
+
+static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return status;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+ if (old) {
+ bpf_cgroup_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CGRP_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
new file mode 100644
index 000000000000..9fe9c4a4e8f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp)
+{
+ int status;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ return NULL;
+
+ return cgrps_kfunc_map_value_lookup(cgrp);
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */
+ acquired = bpf_cgroup_acquire(v->cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ /*
+ * Can't invoke bpf_cgroup_release() without checking the return value
+ * of bpf_cgroup_acquire().
+ */
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
+int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path;
+
+ /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */
+ acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/cgroup_destroy_locked")
+__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed")
+int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
+{
+ struct cgroup *acquired;
+
+ /* Can't acquire an untrusted struct cgroup * pointer. */
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("cgrp_kfunc_acquire_trusted_walked")
+int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */
+ acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */
+ acquired = bpf_cgroup_acquire(NULL);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
+int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+
+ /* Acquired cgroup is never released. */
+ __sink(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
+int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (kptr)
+ /* Can't release a cgroup kptr stored in a map. */
+ bpf_cgroup_release(kptr);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */
+ bpf_cgroup_release(v->cgrp);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
+int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired = (struct cgroup *)&path;
+
+ /* Cannot release random frame pointer. */
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return 0;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired)
+ return -ENOENT;
+
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+
+ /* old cannot be passed to bpf_cgroup_release() without a NULL check. */
+ bpf_cgroup_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("release kernel function bpf_cgroup_release expects")
+int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path)
+{
+ /* Cannot release trusted cgroup pointer which was not acquired. */
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
new file mode 100644
index 000000000000..02d8f160ca0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid, invocations;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ bool same = pid == cur_pid;
+
+ if (same)
+ __sync_fetch_and_add(&invocations, 1);
+
+ return same;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired)
+ err = 1;
+ else
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr, *cg;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = v->cgrp;
+ if (!kptr) {
+ err = 4;
+ return 0;
+ }
+
+ cg = bpf_cgroup_ancestor(kptr, 1);
+ if (cg) /* verifier only check */
+ bpf_cgroup_release(cg);
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (!kptr)
+ err = 3;
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *self, *ancestor1, *invalid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ self = bpf_cgroup_ancestor(cgrp, cgrp->level);
+ if (!self) {
+ err = 1;
+ return 0;
+ }
+
+ if (self->self.id != cgrp->self.id) {
+ bpf_cgroup_release(self);
+ err = 2;
+ return 0;
+ }
+ bpf_cgroup_release(self);
+
+ ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!ancestor1) {
+ err = 3;
+ return 0;
+ }
+ bpf_cgroup_release(ancestor1);
+
+ invalid = bpf_cgroup_ancestor(cgrp, 10000);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 4;
+ return 0;
+ }
+
+ invalid = bpf_cgroup_ancestor(cgrp, -1);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 5;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *parent, *res;
+ u64 parent_cgid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* @cgrp's ID is not visible yet, let's test with the parent */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent) {
+ err = 1;
+ return 0;
+ }
+
+ parent_cgid = parent->kn->id;
+ bpf_cgroup_release(parent);
+
+ res = bpf_cgroup_from_id(parent_cgid);
+ if (!res) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_cgroup_release(res);
+
+ if (res != parent) {
+ err = 3;
+ return 0;
+ }
+
+ res = bpf_cgroup_from_id((u64)-1);
+ if (res) {
+ bpf_cgroup_release(res);
+ err = 4;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int test_cgrp_from_id_ns(void *ctx)
+{
+ struct cgroup *cg;
+
+ cg = bpf_cgroup_from_id(1);
+ if (!cg)
+ return 42;
+ bpf_cgroup_release(cg);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
new file mode 100644
index 000000000000..8aeba1b75c83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct socket_cookie {
+ __u64 cookie_key;
+ __u64 cookie_value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct socket_cookie);
+} socket_cookies SEC(".maps");
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ struct socket_cookie *p;
+ struct tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tcp_sk = bpf_skc_to_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ p = bpf_cgrp_storage_get(&socket_cookies,
+ tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!p)
+ return 1;
+
+ p->cookie_value = 0xF;
+ p->cookie_key = bpf_get_socket_cookie(ctx);
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct socket_cookie *p;
+ struct tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+
+ if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tcp_sk = bpf_skc_to_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ p = bpf_cgrp_storage_get(&socket_cookies,
+ tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0);
+ if (!p)
+ return 1;
+
+ if (p->cookie_key != bpf_get_socket_cookie(ctx))
+ return 1;
+
+ p->cookie_value |= (ctx->local_port << 8);
+ return 1;
+}
+
+SEC("fexit/inet_stream_connect")
+int BPF_PROG(update_cookie_tracing, struct socket *sock,
+ struct sockaddr *uaddr, int addr_len, int flags)
+{
+ struct socket_cookie *p;
+
+ if (uaddr->sa_family != AF_INET6)
+ return 0;
+
+ p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0);
+ if (!p)
+ return 0;
+
+ if (p->cookie_key != bpf_get_socket_cookie(sock->sk))
+ return 0;
+
+ p->cookie_value |= 0xF0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
new file mode 100644
index 000000000000..d41f90e2ab64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ (void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
new file mode 100644
index 000000000000..3500e4b69ebe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_update(struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_update(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_update(task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 200;
+
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 100;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
new file mode 100644
index 000000000000..a2de95f85648
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__s32 target_pid;
+__u64 cgroup_id;
+int target_hid;
+bool is_cgroup1;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?iter.s/cgroup")
+int cgroup_iter(struct bpf_iter__cgroup *ctx)
+{
+ struct cgroup *cgrp = ctx->cgroup;
+ long *ptr;
+
+ if (cgrp == NULL)
+ return 0;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ return 0;
+}
+
+static void __no_rcu_lock(struct cgroup *cgrp)
+{
+ long *ptr;
+
+ /* Note that trace rcu is held in sleepable prog, so we can use
+ * bpf_cgrp_storage_get() in sleepable prog.
+ */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int cgrp1_no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* bpf_task_get_cgroup1 can work in sleepable prog */
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __no_rcu_lock(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* task->cgroups is untrusted in sleepable prog outside of RCU CS */
+ __no_rcu_lock(task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int yes_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ bpf_rcu_read_lock();
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_cgroup_release(cgrp);
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ cgrp = task->cgroups->dfl_cgrp;
+ /* cgrp is trusted under RCU CS */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
new file mode 100644
index 000000000000..1c348f000f38
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+ int err;
+
+ /* populate value 0 */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+
+ /* delete value 0 */
+ err = bpf_cgrp_storage_delete(&map_a, cgrp);
+ if (err)
+ return;
+
+ /* value is not available */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0);
+ if (ptr)
+ return;
+
+ /* re-populate the value */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_exit(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_exit(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
new file mode 100644
index 000000000000..6884ab99a421
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg(" 0: .......... (b7) r0 = 42")
+__msg(" 1: 0......... (bf) r1 = r0")
+__msg(" 2: .1........ (bf) r2 = r1")
+__msg(" 3: ..2....... (bf) r3 = r2")
+__msg(" 4: ...3...... (bf) r4 = r3")
+__msg(" 5: ....4..... (bf) r5 = r4")
+__msg(" 6: .....5.... (bf) r6 = r5")
+__msg(" 7: ......6... (bf) r7 = r6")
+__msg(" 8: .......7.. (bf) r8 = r7")
+__msg(" 9: ........8. (bf) r9 = r8")
+__msg("10: .........9 (bf) r0 = r9")
+__msg("11: 0......... (95) exit")
+__naked void assign_chain(void)
+{
+ asm volatile (
+ "r0 = 42;"
+ "r1 = r0;"
+ "r2 = r1;"
+ "r3 = r2;"
+ "r4 = r3;"
+ "r5 = r4;"
+ "r6 = r5;"
+ "r7 = r6;"
+ "r8 = r7;"
+ "r9 = r8;"
+ "r0 = r9;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 7")
+__msg("1: .1........ (07) r1 += 7")
+__msg("2: .......... (b7) r2 = 7")
+__msg("3: ..2....... (b7) r3 = 42")
+__msg("4: ..23...... (0f) r2 += r3")
+__msg("5: .......... (b7) r0 = 0")
+__msg("6: 0......... (95) exit")
+__naked void arithmetics(void)
+{
+ asm volatile (
+ "r1 = 7;"
+ "r1 += 7;"
+ "r2 = 7;"
+ "r3 = 42;"
+ "r2 += r3;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_BPF_ST
+SEC("socket")
+__log_level(2)
+__msg(" 1: .1........ (07) r1 += -8")
+__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7")
+__msg(" 3: .1........ (b7) r2 = 42")
+__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 6: .......... (b7) r0 = 0")
+__naked void store(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r1 +0) = 7;"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("1: ....4..... (07) r4 += -8")
+__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)")
+__msg("3: ....45.... (07) r4 += -8")
+__naked void load(void)
+{
+ asm volatile (
+ "r4 = r10;"
+ "r4 += -8;"
+ "r5 = *(u64 *)(r4 +0);"
+ "r4 += -8;"
+ "r0 = r5;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)")
+__msg("1: ..2....... (d4) r2 = le64 r2")
+__msg("2: ..2....... (bf) r0 = r2")
+__naked void endian(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 +0);"
+ "r2 = le64 r2;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg(" 8: 0......... (b7) r1 = 1")
+__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1")
+__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)")
+__msg("12: 01........ (bf) r2 = r0")
+__msg("13: .12....... (bf) r0 = r1")
+__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)")
+__naked void atomic(void)
+{
+ asm volatile (
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = 0;"
+ "*(u64 *)(r2 +0) = r1;"
+ "r1 = %[test_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 1;"
+ "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);"
+ ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */
+ "r1 = xchg_64(r0 + 0, r1);"
+ "r2 = r0;"
+ "r0 = r1;"
+ "r0 = cmpxchg_64(r2 + 0, r0, r1);"
+ "1: exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(test_map),
+ __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)")
+__msg("3: .......... (bf) r3 = r10")
+__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))")
+__naked void atomic_load_acq_store_rel(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "r2 = r10;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */
+ "r3 = r10;"
+ ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */
+ "r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__log_level(2)
+__msg("4: .12....7.. (85) call bpf_trace_printk#6")
+__msg("5: 0......7.. (0f) r0 += r7")
+__naked void regular_call(void)
+{
+ asm volatile (
+ "r7 = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 1;"
+ "call %[bpf_trace_printk];"
+ "r0 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (25) if r1 > 0x7 goto pc+1")
+__msg("3: ..2....... (bf) r0 = r2")
+__naked void if1(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 > 0x7 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("3: 0123...... (2d) if r1 > r3 goto pc+1")
+__msg("4: ..2....... (bf) r0 = r2")
+__naked void if2(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "r3 = 7;"
+ "if r1 > r3 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Verifier misses that r2 is alive if jset is not handled properly */
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (45) if r1 & 0x7 goto pc+1")
+__naked void if3_jset_bug(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 & 0x7 goto +1;"
+ "exit;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 0")
+__msg("1: .1........ (b7) r2 = 7")
+__msg("2: .12....... (25) if r1 > 0x7 goto pc+4")
+__msg("3: .12....... (07) r1 += 1")
+__msg("4: .12....... (27) r2 *= 2")
+__msg("5: .12....... (05) goto pc+0")
+__msg("6: .12....... (05) goto pc-5")
+__msg("7: .......... (b7) r0 = 0")
+__msg("8: 0......... (95) exit")
+__naked void loop(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "r2 = 7;"
+ "if r1 > 0x7 goto +4;"
+ "r1 += 1;"
+ "r2 *= 2;"
+ "goto +0;"
+ "goto -5;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__log_level(2)
+__msg("2: .123...... (25) if r1 > 0x7 goto pc+2")
+__msg("3: ..2....... (bf) r0 = r2")
+__msg("4: 0......... (06) gotol pc+1")
+__msg("5: ...3...... (bf) r0 = r3")
+__msg("6: 0......... (95) exit")
+__naked void gotol(void)
+{
+ asm volatile (
+ "r2 = 42;"
+ "r3 = 24;"
+ "if r1 > 0x7 goto +2;"
+ "r0 = r2;"
+ "gotol +1;"
+ "r0 = r3;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 1")
+__msg("1: .1........ (e5) may_goto pc+1")
+__msg("2: .......... (05) goto pc-3")
+__msg("3: .1........ (bf) r0 = r1")
+__msg("4: 0......... (95) exit")
+__naked void may_goto(void)
+{
+ asm volatile (
+ "1: r1 = 1;"
+ ".8byte %[may_goto];"
+ "goto 1b;"
+ "r0 = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("1: 0......... (18) r2 = 0x7")
+__msg("3: 0.2....... (0f) r0 += r2")
+__naked void ldimm64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 0x7 ll;"
+ "r0 += r2;"
+ "exit;"
+ :
+ :: __clobber_all);
+}
+
+/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */
+SEC("socket")
+__log_level(2)
+__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]")
+__msg("3: 012.456789 (0f) r7 += r0")
+__msg("4: 012.456789 (b7) r3 = 42")
+__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]")
+__msg("6: 0......7.. (0f) r7 += r0")
+__naked void ldabs(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ "r0 = *(u8 *)skb[42];"
+ "r7 += r0;"
+ "r3 = 42;"
+ ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */
+ "r7 += r0;"
+ "r0 = r7;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0))
+ : __clobber_all);
+}
+
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__log_level(2)
+__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages")
+__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)")
+__msg(" 8: .1........ (b7) r2 = 42")
+__naked void addr_space_cast(void)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = 0;"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r1 = addr_space_cast(r0, 0, 1);"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena)
+ : __clobber_all);
+}
+#endif
+
+static __used __naked int aux1(void)
+{
+ asm volatile (
+ "r0 = r1;"
+ "r0 += r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: ....45.... (b7) r1 = 1")
+__msg("1: .1..45.... (b7) r2 = 2")
+__msg("2: .12.45.... (b7) r3 = 3")
+/* Conservative liveness for subprog parameters. */
+__msg("3: .12345.... (85) call pc+2")
+__msg("4: .......... (b7) r0 = 0")
+__msg("5: 0......... (95) exit")
+__msg("6: .12....... (bf) r0 = r1")
+__msg("7: 0.2....... (0f) r0 += r2")
+/* Conservative liveness for subprog return value. */
+__msg("8: 0......... (95) exit")
+__naked void subprog1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "r3 = 3;"
+ "call aux1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
new file mode 100644
index 000000000000..a3819a5d09c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define VERDICT_REJECT 0
+#define VERDICT_PROCEED 1
+
+int port;
+
+SEC("cgroup/connect4")
+int connect_v4_dropper(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_STREAM)
+ return VERDICT_PROCEED;
+ if (ctx->user_port == bpf_htons(port))
+ return VERDICT_REJECT;
+ return VERDICT_PROCEED;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index a943d394fd3a..9d158cfad981 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -7,8 +7,7 @@
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
#include <linux/if.h>
#include <errno.h>
@@ -31,9 +30,14 @@
#define IFNAMSIZ 16
#endif
-int _version SEC("version") = 1;
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+const char reno[] = "reno";
+const char cubic[] = "cubic";
-__attribute__ ((noinline))
+__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
struct sockaddr_in sa = {};
@@ -49,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx)
}
static __inline int verify_cc(struct bpf_sock_addr *ctx,
- char expected[TCP_CA_NAME_MAX])
+ const char expected[])
{
char buf[TCP_CA_NAME_MAX];
- int i;
if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 1;
- for (i = 0; i < TCP_CA_NAME_MAX; i++) {
- if (buf[i] != expected[i])
- return 1;
- if (buf[i] == 0)
- break;
- }
+ if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected))
+ return 1;
return 0;
}
static __inline int set_cc(struct bpf_sock_addr *ctx)
{
- char reno[TCP_CA_NAME_MAX] = "reno";
- char cubic[TCP_CA_NAME_MAX] = "cubic";
-
- if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno)))
return 1;
if (verify_cc(ctx, reno))
return 1;
- if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic)))
return 1;
if (verify_cc(ctx, cubic))
return 1;
@@ -198,4 +194,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
return do_bind(ctx) ? 1 : 0;
}
+SEC("cgroup/connect4")
+int connect_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 506d0f81a375..e98573b00ddb 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -24,8 +24,6 @@
#define DST_REWRITE_PORT6 6666
-int _version SEC("version") = 1;
-
SEC("cgroup/connect6")
int connect_v6_prog(struct bpf_sock_addr *ctx)
{
@@ -92,4 +90,10 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/connect6")
+int connect_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
index a979aaef2a76..27a632dd382e 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port4.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -13,7 +13,6 @@
#include <bpf_sockopt_helpers.h>
char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
struct svc_addr {
__be32 addr;
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
index afc8f1c5a9d6..19cad93e612f 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port6.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -12,7 +12,6 @@
#include <bpf_sockopt_helpers.h>
char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
struct svc_addr {
__be32 addr[4];
diff --git a/tools/testing/selftests/bpf/progs/connect_ping.c b/tools/testing/selftests/bpf/progs/connect_ping.c
new file mode 100644
index 000000000000..60178192b672
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_ping.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+
+__u32 do_bind = 0;
+__u32 has_error = 0;
+__u32 invocations_v4 = 0;
+__u32 invocations_v6 = 0;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = bpf_htonl(0x01010101),
+ };
+
+ __sync_fetch_and_add(&invocations_v4, 1);
+
+ if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+ has_error = 1;
+
+ return 1;
+}
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = BINDADDR_V6,
+ };
+
+ __sync_fetch_and_add(&invocations_v6, 1);
+
+ if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+ has_error = 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
new file mode 100644
index 000000000000..ba60adadb335
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/connect_unix")
+int connect_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+SEC("cgroup/connect_unix")
+int connect_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
new file mode 100644
index 000000000000..004f2acef2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define ATTR __always_inline
+#include "test_jhash.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array2 SEC(".maps");
+
+static __noinline int randmap(int v, const struct net_device *dev)
+{
+ struct bpf_map *map = (struct bpf_map *)&array1;
+ int key = bpf_get_prandom_u32() & 0xff;
+ int *val;
+
+ if (bpf_get_prandom_u32() & 1)
+ map = (struct bpf_map *)&array2;
+
+ val = bpf_map_lookup_elem(map, &key);
+ if (val)
+ *val = bpf_get_prandom_u32() + v + dev->mtu;
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device
+ *from_dev, const struct net_device *to_dev, int sent, int drops,
+ int err)
+{
+ return randmap(from_dev->ifindex, from_dev);
+}
+
+SEC("fentry/eth_type_trans")
+int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+SEC("fexit/eth_type_trans")
+int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+volatile const int never;
+
+struct __sk_bUfF /* it will not exist in vmlinux */ {
+ int len;
+} __attribute__((preserve_access_index));
+
+struct bpf_testmod_test_read_ctx /* it exists in bpf_testmod */ {
+ size_t len;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ if (never) { \
+ /* below is a dead code with unresolvable CO-RE relo */ \
+ i += ((struct __sk_bUfF *)ctx)->len; \
+ /* this CO-RE relo may or may not resolve
+ * depending on whether bpf_testmod is loaded.
+ */ \
+ i += ((struct bpf_testmod_test_read_ctx *)ctx)->len; \
+ } \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+
+typedef int (*func_proto_typedef___match)(long);
+typedef int (*func_proto_typedef___doesnt_match)(char *);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match);
+
+int proto_out[3];
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out[0] = bpf_core_type_exists(func_proto_typedef___match);
+ proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match);
+ proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
new file mode 100644
index 000000000000..f0d5652256ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+int proto_out;
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out = bpf_core_type_exists(func_proto_typedef_nested2);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9a2850850121..5760ae015e09 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -13,6 +13,7 @@ struct core_reloc_kernel_output {
int valid[10];
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
};
/*
@@ -346,6 +347,7 @@ struct core_reloc_nesting___err_too_deep {
*/
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -454,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr {
struct core_reloc_arrays_substruct d[1][2];
};
+struct core_reloc_arrays___err_bad_signed_arr_elem_sz {
+ /* int -> short (signed!): not supported case */
+ short a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
/*
* PRIMITIVES
*/
@@ -700,27 +711,11 @@ struct core_reloc_existence___minimal {
int a;
};
-struct core_reloc_existence___err_wrong_int_sz {
- short a;
-};
-
-struct core_reloc_existence___err_wrong_int_type {
+struct core_reloc_existence___wrong_field_defs {
+ void *a;
int b[1];
-};
-
-struct core_reloc_existence___err_wrong_int_kind {
struct{ int x; } c;
-};
-
-struct core_reloc_existence___err_wrong_arr_kind {
int arr;
-};
-
-struct core_reloc_existence___err_wrong_arr_value_type {
- short arr[1];
-};
-
-struct core_reloc_existence___err_wrong_struct_type {
int s;
};
@@ -801,12 +796,21 @@ struct core_reloc_bitfields___err_too_big_bitfield {
*/
struct core_reloc_size_output {
int int_sz;
+ int int_off;
int struct_sz;
+ int struct_off;
int union_sz;
+ int union_off;
int arr_sz;
+ int arr_off;
int arr_elem_sz;
+ int arr_elem_off;
int ptr_sz;
+ int ptr_off;
int enum_sz;
+ int enum_off;
+ int float_sz;
+ int float_off;
};
struct core_reloc_size {
@@ -816,6 +820,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___diff_sz {
@@ -825,6 +830,17 @@ struct core_reloc_size___diff_sz {
char arr_field[10];
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+ double float_field;
+};
+
+struct core_reloc_size___diff_offs {
+ float float_field;
+ enum { YET_OTHER_VALUE = 123 } enum_field;
+ void *ptr_field;
+ int arr_field[4];
+ union { int x; } union_field;
+ struct { int x; } struct_field;
+ int int_field;
};
/* Error case of two candidates with the fields (int_field) at the same
@@ -839,6 +855,7 @@ struct core_reloc_size___err_ambiguous1 {
int arr_field[4];
void *ptr_field;
enum { VALUE___1 = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___err_ambiguous2 {
@@ -850,13 +867,15 @@ struct core_reloc_size___err_ambiguous2 {
int arr_field[4];
void *ptr_field;
enum { VALUE___2 = 123 } enum_field;
+ float float_field;
};
/*
- * TYPE EXISTENCE & SIZE
+ * TYPE EXISTENCE, MATCH & SIZE
*/
struct core_reloc_type_based_output {
bool struct_exists;
+ bool complex_struct_exists;
bool union_exists;
bool enum_exists;
bool typedef_named_struct_exists;
@@ -865,9 +884,24 @@ struct core_reloc_type_based_output {
bool typedef_int_exists;
bool typedef_enum_exists;
bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
bool typedef_func_proto_exists;
bool typedef_arr_exists;
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
int struct_sz;
int union_sz;
int enum_sz;
@@ -885,6 +919,14 @@ struct a_struct {
int x;
};
+struct a_complex_struct {
+ union {
+ struct a_struct * restrict a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
union a_union {
int y;
int z;
@@ -909,6 +951,7 @@ typedef int int_typedef;
typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
typedef int (*func_proto_typedef)(long);
@@ -916,22 +959,86 @@ typedef char arr_typedef[20];
struct core_reloc_type_based {
struct a_struct f1;
- union a_union f2;
- enum an_enum f3;
- named_struct_typedef f4;
- anon_struct_typedef f5;
- struct_ptr_typedef f6;
- int_typedef f7;
- enum_typedef f8;
- void_ptr_typedef f9;
- func_proto_typedef f10;
- arr_typedef f11;
+ struct a_complex_struct f2;
+ union a_union f3;
+ enum an_enum f4;
+ named_struct_typedef f5;
+ anon_struct_typedef f6;
+ struct_ptr_typedef f7;
+ int_typedef f8;
+ enum_typedef f9;
+ void_ptr_typedef f10;
+ restrict_ptr_typedef f11;
+ func_proto_typedef f12;
+ arr_typedef f13;
};
/* no types in target */
struct core_reloc_type_based___all_missing {
};
+/* different member orders, enum variant values, signedness, etc */
+struct a_struct___diff {
+ int x;
+ int a;
+};
+
+struct a_struct___forward;
+
+struct a_complex_struct___diff {
+ union {
+ struct a_struct___forward *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
+union a_union___diff {
+ int z;
+ int y;
+};
+
+typedef struct a_struct___diff named_struct_typedef___diff;
+
+typedef struct { int z, x, y; } anon_struct_typedef___diff;
+
+typedef struct {
+ int c;
+ int b;
+ int a;
+} *struct_ptr_typedef___diff;
+
+enum an_enum___diff {
+ AN_ENUM_VAL2___diff = 0,
+ AN_ENUM_VAL1___diff = 42,
+ AN_ENUM_VAL3___diff = 1,
+};
+
+typedef unsigned int int_typedef___diff;
+
+typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff;
+
+typedef const void *void_ptr_typedef___diff;
+
+typedef int_typedef___diff (*func_proto_typedef___diff)(long);
+
+typedef char arr_typedef___diff[3];
+
+struct core_reloc_type_based___diff {
+ struct a_struct___diff f1;
+ struct a_complex_struct___diff f2;
+ union a_union___diff f3;
+ enum an_enum___diff f4;
+ named_struct_typedef___diff f5;
+ anon_struct_typedef___diff f6;
+ struct_ptr_typedef___diff f7;
+ int_typedef___diff f8;
+ enum_typedef___diff f9;
+ void_ptr_typedef___diff f10;
+ func_proto_typedef___diff f11;
+ arr_typedef___diff f12;
+};
+
/* different type sizes, extra modifiers, anon vs named enums, etc */
struct a_struct___diff_sz {
long x;
@@ -1110,6 +1217,20 @@ struct core_reloc_enumval_output {
int anon_val2;
};
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
enum named_enum {
NAMED_ENUM_VAL1 = 1,
NAMED_ENUM_VAL2 = 2,
@@ -1127,6 +1248,23 @@ struct core_reloc_enumval {
anon_enum f2;
};
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val {
+ enum named_unsigned_enum64 f1;
+ enum named_signed_enum64 f2;
+};
+
/* differing enumerator values */
enum named_enum___diff {
NAMED_ENUM_VAL1___diff = 101,
@@ -1145,6 +1283,23 @@ struct core_reloc_enumval___diff {
anon_enum___diff f2;
};
+enum named_unsigned_enum64___diff {
+ UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL,
+};
+
+enum named_signed_enum64___diff {
+ SIGNED_ENUM64_VAL1___diff = -101,
+ SIGNED_ENUM64_VAL2___diff = -202,
+ SIGNED_ENUM64_VAL3___diff = -303,
+};
+
+struct core_reloc_enum64val___diff {
+ enum named_unsigned_enum64___diff f1;
+ enum named_signed_enum64___diff f2;
+};
+
/* missing (optional) third enum value */
enum named_enum___val3_missing {
NAMED_ENUM_VAL1___val3_missing = 111,
@@ -1161,6 +1316,21 @@ struct core_reloc_enumval___val3_missing {
anon_enum___val3_missing f2;
};
+enum named_unsigned_enum64___val3_missing {
+ UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___val3_missing = 0x222,
+};
+
+enum named_signed_enum64___val3_missing {
+ SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL,
+ SIGNED_ENUM64_VAL2___val3_missing = -222,
+};
+
+struct core_reloc_enum64val___val3_missing {
+ enum named_unsigned_enum64___val3_missing f1;
+ enum named_signed_enum64___val3_missing f2;
+};
+
/* missing (mandatory) second enum value, should fail */
enum named_enum___err_missing {
NAMED_ENUM_VAL1___err_missing = 1,
@@ -1176,3 +1346,18 @@ struct core_reloc_enumval___err_missing {
enum named_enum___err_missing f1;
anon_enum___err_missing f2;
};
+
+enum named_unsigned_enum64___err_missing {
+ UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64___err_missing {
+ SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL3___err_missing = -3,
+};
+
+struct core_reloc_enum64val___err_missing {
+ enum named_unsigned_enum64___err_missing f1;
+ enum named_signed_enum64___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
new file mode 100644
index 000000000000..86085b79f5ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CPUMASK_COMMON_H
+#define _CPUMASK_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+/* Should use BTF_FIELDS_MAX, but it is not always available in vmlinux.h,
+ * so use the hard-coded number as a workaround.
+ */
+#define CPUMASK_KPTR_FIELDS_MAX 11
+
+int err;
+
+#define private(name) SEC(".bss." #name) __attribute__((aligned(8)))
+private(MASK) static struct bpf_cpumask __kptr * global_mask;
+
+struct __cpumask_map_value {
+ struct bpf_cpumask __kptr * cpumask;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __cpumask_map_value);
+ __uint(max_entries, 1);
+} __cpumask_map SEC(".maps");
+
+struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak;
+void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak;
+struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_and(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_or(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak;
+
+void bpf_rcu_read_lock(void) __ksym __weak;
+void bpf_rcu_read_unlock(void) __ksym __weak;
+
+static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
+{
+ return (const struct cpumask *)cpumask;
+}
+
+static inline struct bpf_cpumask *create_cpumask(void)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = bpf_cpumask_create();
+ if (!cpumask) {
+ err = 1;
+ return NULL;
+ }
+
+ if (!bpf_cpumask_empty(cast(cpumask))) {
+ err = 2;
+ bpf_cpumask_release(cpumask);
+ return NULL;
+ }
+
+ return cpumask;
+}
+
+static inline struct __cpumask_map_value *cpumask_map_value_lookup(void)
+{
+ u32 key = 0;
+
+ return bpf_map_lookup_elem(&__cpumask_map, &key);
+}
+
+static inline int cpumask_map_insert(struct bpf_cpumask *mask)
+{
+ struct __cpumask_map_value local, *v;
+ long status;
+ struct bpf_cpumask *old;
+ u32 key = 0;
+
+ local.cpumask = NULL;
+ status = bpf_map_update_elem(&__cpumask_map, &key, &local, 0);
+ if (status) {
+ bpf_cpumask_release(mask);
+ return status;
+ }
+
+ v = bpf_map_lookup_elem(&__cpumask_map, &key);
+ if (!v) {
+ bpf_cpumask_release(mask);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->cpumask, mask);
+ if (old) {
+ bpf_cpumask_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CPUMASK_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
new file mode 100644
index 000000000000..8a2fd596c8a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct kptr_nested_array_2 {
+ struct bpf_cpumask __kptr * mask;
+};
+
+struct kptr_nested_array_1 {
+ /* Make btf_parse_fields() in map_create() return -E2BIG */
+ struct kptr_nested_array_2 d_2[CPUMASK_KPTR_FIELDS_MAX + 1];
+};
+
+struct kptr_nested_array {
+ struct kptr_nested_array_1 d_1;
+};
+
+private(MASK_NESTED) static struct kptr_nested_array global_mask_nested_arr;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+ __sink(cpumask);
+
+ /* cpumask is never released. */
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+
+ /* cpumask is released twice. */
+ bpf_cpumask_release(cpumask);
+ bpf_cpumask_release(cpumask);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("must be referenced")
+int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ /* Can't acquire a non-struct bpf_cpumask. */
+ cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr);
+ __sink(cpumask);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
+int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ /* Can't set the CPU of a non-struct bpf_cpumask. */
+ bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+ struct __cpumask_map_value *v;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask))
+ return 0;
+
+ v = cpumask_map_value_lookup();
+ if (!v)
+ return 0;
+
+ cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+
+ /* cpumask is never released. */
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
+{
+ /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+ bpf_cpumask_empty(NULL);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be a rcu pointer")
+int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_unlock();
+
+ /* RCU region is exited before calling KF_RCU kfunc. */
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg1")
+int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+
+ /* No NULL check is performed on global cpumask kptr. */
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *prev, *curr;
+
+ curr = bpf_cpumask_create();
+ if (!curr)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ if (prev)
+ bpf_cpumask_release(prev);
+
+ bpf_rcu_read_lock();
+ curr = global_mask;
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU passed to bpf_kptr_xchg() */
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ bpf_rcu_read_unlock();
+ if (prev)
+ bpf_cpumask_release(prev);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("has no valid kptr")
+int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask_nested_arr.d_1.d_2[CPUMASK_KPTR_FIELDS_MAX].mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("type=scalar expected=fp")
+int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456;
+ u64 bits;
+ int ret;
+
+ ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits));
+ if (!ret)
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("leads to invalid memory access")
+int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags)
+{
+ void *garbage = (void *)0x123456;
+ struct bpf_cpumask *local;
+ int ret;
+
+ local = create_cpumask();
+ if (!local) {
+ err = 1;
+ return 0;
+ }
+
+ ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8);
+ if (!ret)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
new file mode 100644
index 000000000000..0e04c31b91c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -0,0 +1,890 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, nr_cpus;
+
+struct kptr_nested {
+ struct bpf_cpumask __kptr * mask;
+};
+
+struct kptr_nested_pair {
+ struct bpf_cpumask __kptr * mask_1;
+ struct bpf_cpumask __kptr * mask_2;
+};
+
+struct kptr_nested_mid {
+ int dummy;
+ struct kptr_nested m;
+};
+
+struct kptr_nested_deep {
+ struct kptr_nested_mid ptrs[2];
+ struct kptr_nested_pair ptr_pairs[3];
+};
+
+struct kptr_nested_deep_array_1_2 {
+ int dummy;
+ struct bpf_cpumask __kptr * mask[CPUMASK_KPTR_FIELDS_MAX];
+};
+
+struct kptr_nested_deep_array_1_1 {
+ int dummy;
+ struct kptr_nested_deep_array_1_2 d_2;
+};
+
+struct kptr_nested_deep_array_1 {
+ long dummy;
+ struct kptr_nested_deep_array_1_1 d_1;
+};
+
+struct kptr_nested_deep_array_2_2 {
+ long dummy[2];
+ struct bpf_cpumask __kptr * mask;
+};
+
+struct kptr_nested_deep_array_2_1 {
+ int dummy;
+ struct kptr_nested_deep_array_2_2 d_2[CPUMASK_KPTR_FIELDS_MAX];
+};
+
+struct kptr_nested_deep_array_2 {
+ long dummy;
+ struct kptr_nested_deep_array_2_1 d_1;
+};
+
+struct kptr_nested_deep_array_3_2 {
+ long dummy[2];
+ struct bpf_cpumask __kptr * mask;
+};
+
+struct kptr_nested_deep_array_3_1 {
+ int dummy;
+ struct kptr_nested_deep_array_3_2 d_2;
+};
+
+struct kptr_nested_deep_array_3 {
+ long dummy;
+ struct kptr_nested_deep_array_3_1 d_1[CPUMASK_KPTR_FIELDS_MAX];
+};
+
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2];
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1];
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1];
+private(MASK) static struct kptr_nested global_mask_nested[2];
+private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep;
+private(MASK_1) static struct kptr_nested_deep_array_1 global_mask_nested_deep_array_1;
+private(MASK_2) static struct kptr_nested_deep_array_2 global_mask_nested_deep_array_2;
+private(MASK_3) static struct kptr_nested_deep_array_3 global_mask_nested_deep_array_3;
+
+static bool is_test_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+static bool create_cpumask_set(struct bpf_cpumask **out1,
+ struct bpf_cpumask **out2,
+ struct bpf_cpumask **out3,
+ struct bpf_cpumask **out4)
+{
+ struct bpf_cpumask *mask1, *mask2, *mask3, *mask4;
+
+ mask1 = create_cpumask();
+ if (!mask1)
+ return false;
+
+ mask2 = create_cpumask();
+ if (!mask2) {
+ bpf_cpumask_release(mask1);
+ err = 3;
+ return false;
+ }
+
+ mask3 = create_cpumask();
+ if (!mask3) {
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ err = 4;
+ return false;
+ }
+
+ mask4 = create_cpumask();
+ if (!mask4) {
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(mask3);
+ err = 5;
+ return false;
+ }
+
+ *out1 = mask1;
+ *out2 = mask2;
+ *out3 = mask3;
+ *out4 = mask4;
+
+ return true;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_alloc_free_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_set_cpu(0, cpumask);
+ if (!bpf_cpumask_test_cpu(0, cast(cpumask))) {
+ err = 3;
+ goto release_exit;
+ }
+
+ bpf_cpumask_clear_cpu(0, cpumask);
+ if (bpf_cpumask_test_cpu(0, cast(cpumask))) {
+ err = 4;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_setall_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_setall(cpumask);
+ if (!bpf_cpumask_full(cast(cpumask))) {
+ err = 3;
+ goto release_exit;
+ }
+
+ bpf_cpumask_clear(cpumask);
+ if (!bpf_cpumask_empty(cast(cpumask))) {
+ err = 4;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_first_firstzero_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (bpf_cpumask_first(cast(cpumask)) < nr_cpus) {
+ err = 3;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_first_zero(cast(cpumask)) != 0) {
+ bpf_printk("first zero: %d", bpf_cpumask_first_zero(cast(cpumask)));
+ err = 4;
+ goto release_exit;
+ }
+
+ bpf_cpumask_set_cpu(0, cpumask);
+ if (bpf_cpumask_first(cast(cpumask)) != 0) {
+ err = 5;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_first_zero(cast(cpumask)) != 1) {
+ err = 6;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+ u32 first;
+
+ if (!is_test_task())
+ return 0;
+
+ mask1 = create_cpumask();
+ if (!mask1)
+ return 0;
+
+ mask2 = create_cpumask();
+ if (!mask2)
+ goto release_exit;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+
+ first = bpf_cpumask_first_and(cast(mask1), cast(mask2));
+ if (first <= 1)
+ err = 3;
+
+release_exit:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+ err = 3;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+ err = 4;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_test_and_clear_cpu(0, cpumask)) {
+ err = 5;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+
+ if (bpf_cpumask_and(dst1, cast(mask1), cast(mask2))) {
+ err = 6;
+ goto release_exit;
+ }
+ if (!bpf_cpumask_empty(cast(dst1))) {
+ err = 7;
+ goto release_exit;
+ }
+
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_test_cpu(0, cast(dst1))) {
+ err = 8;
+ goto release_exit;
+ }
+ if (!bpf_cpumask_test_cpu(1, cast(dst1))) {
+ err = 9;
+ goto release_exit;
+ }
+
+ bpf_cpumask_xor(dst2, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+ err = 10;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_intersects_subset, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+ if (bpf_cpumask_intersects(cast(mask1), cast(mask2))) {
+ err = 6;
+ goto release_exit;
+ }
+
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_subset(cast(mask1), cast(dst1))) {
+ err = 7;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_subset(cast(mask2), cast(dst1))) {
+ err = 8;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_subset(cast(dst1), cast(mask1))) {
+ err = 9;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+ int cpu;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+
+ cpu = bpf_cpumask_any_distribute(cast(mask1));
+ if (cpu != 0) {
+ err = 6;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
+ if (cpu < nr_cpus) {
+ err = 7;
+ goto release_exit;
+ }
+
+ bpf_cpumask_copy(dst2, cast(dst1));
+ if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+ err = 8;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
+ if (cpu > 1) {
+ err = 9;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2));
+ if (cpu < nr_cpus) {
+ err = 10;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask))
+ err = 3;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+ struct __cpumask_map_value *v;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask)) {
+ err = 3;
+ return 0;
+ }
+
+ v = cpumask_map_value_lookup();
+ if (!v) {
+ err = 4;
+ return 0;
+ }
+
+ cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+ if (cpumask)
+ bpf_cpumask_release(cpumask);
+ else
+ err = 5;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_one_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Kptr arrays with one element are special cased, being treated
+ * just like a single pointer.
+ */
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask_array_one[0], local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask_array_one[0];
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+static int _global_mask_array_rcu(struct bpf_cpumask **mask0,
+ struct bpf_cpumask **mask1)
+{
+ struct bpf_cpumask *local;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Check if two kptrs in the array work and independently */
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ bpf_rcu_read_lock();
+
+ local = bpf_kptr_xchg(mask0, local);
+ if (local) {
+ err = 1;
+ goto err_exit;
+ }
+
+ /* [<mask 0>, *] */
+ if (!*mask0) {
+ err = 2;
+ goto err_exit;
+ }
+
+ if (!mask1)
+ goto err_exit;
+
+ /* [*, NULL] */
+ if (*mask1) {
+ err = 3;
+ goto err_exit;
+ }
+
+ local = create_cpumask();
+ if (!local) {
+ err = 9;
+ goto err_exit;
+ }
+
+ local = bpf_kptr_xchg(mask1, local);
+ if (local) {
+ err = 10;
+ goto err_exit;
+ }
+
+ /* [<mask 0>, <mask 1>] */
+ if (!*mask0 || !*mask1 || *mask0 == *mask1) {
+ err = 11;
+ goto err_exit;
+ }
+
+err_exit:
+ if (local)
+ bpf_cpumask_release(local);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_array[0], &global_mask_array[1]);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_l2_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_array_l2[0][0], &global_mask_array_l2[1][0]);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_nested_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_nested[0].mask, &global_mask_nested[1].mask);
+}
+
+/* Ensure that the field->offset has been correctly advanced from one
+ * nested struct or array sub-tree to another. In the case of
+ * kptr_nested_deep, it comprises two sub-trees: ktpr_1 and kptr_2. By
+ * calling bpf_kptr_xchg() on every single kptr in both nested sub-trees,
+ * the verifier should reject the program if the field->offset of any kptr
+ * is incorrect.
+ *
+ * For instance, if we have 10 kptrs in a nested struct and a program that
+ * accesses each kptr individually with bpf_kptr_xchg(), the compiler
+ * should emit instructions to access 10 different offsets if it works
+ * correctly. If the field->offset values of any pair of them are
+ * incorrectly the same, the number of unique offsets in btf_record for
+ * this nested struct should be less than 10. The verifier should fail to
+ * discover some of the offsets emitted by the compiler.
+ *
+ * Even if the field->offset values of kptrs are not duplicated, the
+ * verifier should fail to find a btf_field for the instruction accessing a
+ * kptr if the corresponding field->offset is pointing to a random
+ * incorrect offset.
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clone_flags)
+{
+ int r, i;
+
+ r = _global_mask_array_rcu(&global_mask_nested_deep.ptrs[0].m.mask,
+ &global_mask_nested_deep.ptrs[1].m.mask);
+ if (r)
+ return r;
+
+ for (i = 0; i < 3; i++) {
+ r = _global_mask_array_rcu(&global_mask_nested_deep.ptr_pairs[i].mask_1,
+ &global_mask_nested_deep.ptr_pairs[i].mask_2);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_nested_deep_array_rcu, struct task_struct *task, u64 clone_flags)
+{
+ int i;
+
+ for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++)
+ _global_mask_array_rcu(&global_mask_nested_deep_array_1.d_1.d_2.mask[i], NULL);
+
+ for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++)
+ _global_mask_array_rcu(&global_mask_nested_deep_array_2.d_1.d_2[i].mask, NULL);
+
+ for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++)
+ _global_mask_array_rcu(&global_mask_nested_deep_array_3.d_1[i].d_2.mask, NULL);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_cpumask_set_cpu(0, local);
+ if (bpf_cpumask_weight(cast(local)) != 1) {
+ err = 4;
+ goto out;
+ }
+
+ /*
+ * Make sure that adding additional CPUs changes the weight. Test to
+ * see whether the CPU was set to account for running on UP machines.
+ */
+ bpf_cpumask_set_cpu(1, local);
+ if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) {
+ err = 5;
+ goto out;
+ }
+
+ bpf_cpumask_clear(local);
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 6;
+ goto out;
+ }
+out:
+ bpf_cpumask_release(local);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+
+ mask1 = bpf_cpumask_create();
+ mask2 = bpf_cpumask_create();
+
+ if (!mask1 || !mask2)
+ goto free_masks_return;
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2);
+
+free_masks_return:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+ u8 toofewbits;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ /* The kfunc should prevent this operation */
+ ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits));
+ if (ret != -EACCES)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
+
+/* Mask is guaranteed to be large enough for bpf_cpumask_t. */
+#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t))
+
+/* Add an extra word for the test_populate_reject_unaligned test. */
+u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1];
+extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ char *src;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Skip if unaligned accesses are fine for this arch. */
+ if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return 0;
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Misalign the source array by a byte. */
+ src = &((char *)bits)[1];
+
+ ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN);
+ if (ret != -EINVAL)
+ err = 2;
+
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ bool bit;
+ int ret;
+ int i;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Set only odd bits. */
+ __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN);
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Pass the entire bits array, the kfunc will only copy the valid bits. */
+ ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN);
+ if (ret) {
+ err = 2;
+ goto out;
+ }
+
+ /*
+ * Test is there to appease the verifier. We cannot directly
+ * access NR_CPUS, the upper bound for nr_cpus, so we infer
+ * it from the size of cpumask_t.
+ */
+ if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_for(i, 0, nr_cpus) {
+ /* Odd-numbered bits should be set, even ones unset. */
+ bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask);
+ if (bit == (i % 2 != 0))
+ continue;
+
+ err = 4;
+ break;
+ }
+
+out:
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+#undef CPUMASK_TEST_MASKLEN
diff --git a/tools/testing/selftests/bpf/progs/crypto_basic.c b/tools/testing/selftests/bpf/progs/crypto_basic.c
new file mode 100644
index 000000000000..8cf7168b42d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_basic.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+int status;
+SEC("syscall")
+int crypto_release(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("Unreleased reference")
+int crypto_acquire(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ cctx = bpf_crypto_ctx_acquire(cctx);
+ if (!cctx)
+ return -EINVAL;
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c
new file mode 100644
index 000000000000..4ac956b26240
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_bench.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+const volatile unsigned int len = 16;
+char cipher[128] = {};
+u32 key_len, authsize;
+char dst[256] = {};
+u8 key[256] = {};
+long hits = 0;
+int status;
+
+SEC("syscall")
+int crypto_setup(void *args)
+{
+ struct bpf_crypto_ctx *cctx;
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ int err = 0;
+
+ status = 0;
+
+ if (!cipher[0] || !key_len || key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, cipher, sizeof(cipher));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_encrypt(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_decrypt(struct __sk_buff *skb)
+{
+ struct bpf_dynptr psrc, pdst;
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+
+ v = crypto_ctx_value_lookup();
+ if (!v)
+ return -ENOENT;
+
+ ctx = v->ctx;
+ if (!ctx)
+ return -ENOENT;
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_common.h b/tools/testing/selftests/bpf/progs/crypto_common.h
new file mode 100644
index 000000000000..57dd7a68a8c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_common.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CRYPTO_COMMON_H
+#define _CRYPTO_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+struct bpf_crypto_ctx *bpf_crypto_ctx_create(const struct bpf_crypto_params *params,
+ u32 params__sz, int *err) __ksym;
+struct bpf_crypto_ctx *bpf_crypto_ctx_acquire(struct bpf_crypto_ctx *ctx) __ksym;
+void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx) __ksym;
+int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+
+struct __crypto_ctx_value {
+ struct bpf_crypto_ctx __kptr * ctx;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __crypto_ctx_value);
+ __uint(max_entries, 1);
+} __crypto_ctx_map SEC(".maps");
+
+static inline struct __crypto_ctx_value *crypto_ctx_value_lookup(void)
+{
+ u32 key = 0;
+
+ return bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+}
+
+static inline int crypto_ctx_insert(struct bpf_crypto_ctx *ctx)
+{
+ struct __crypto_ctx_value local, *v;
+ struct bpf_crypto_ctx *old;
+ u32 key = 0;
+ int err;
+
+ local.ctx = NULL;
+ err = bpf_map_update_elem(&__crypto_ctx_map, &key, &local, 0);
+ if (err) {
+ bpf_crypto_ctx_release(ctx);
+ return err;
+ }
+
+ v = bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+ if (!v) {
+ bpf_crypto_ctx_release(ctx);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->ctx, ctx);
+ if (old) {
+ bpf_crypto_ctx_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CRYPTO_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
new file mode 100644
index 000000000000..dfd8a258f14a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+unsigned char key[256] = {};
+u16 udp_test_port = 7777;
+u32 authsize, key_len;
+char algo[128] = {};
+char dst[16] = {}, dst_bad[8] = {};
+int status;
+
+static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
+{
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ u32 offset;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return -1;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return -1;
+
+ if (udph.dest != __bpf_htons(udp_test_port))
+ return -1;
+
+ offset = ETH_HLEN + sizeof(ip6h) + sizeof(udph);
+ if (skb->len < offset + 16)
+ return -1;
+
+ /* let's make sure that 16 bytes of payload are in the linear part of skb */
+ bpf_skb_pull_data(skb, offset + 16);
+ bpf_dynptr_from_skb(skb, 0, psrc);
+ bpf_dynptr_adjust(psrc, offset, offset + 16);
+
+ return 0;
+}
+
+SEC("syscall")
+int skb_crypto_setup(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err;
+
+ status = 0;
+ if (key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, algo, sizeof(algo));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+ return 0;
+}
+
+SEC("tc")
+int decrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+ int err;
+
+ status = 0;
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+err:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int encrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+ int err;
+
+ status = 0;
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+err:
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/csum_diff_test.c b/tools/testing/selftests/bpf/progs/csum_diff_test.c
new file mode 100644
index 000000000000..9438f1773a58
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/csum_diff_test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates */
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define BUFF_SZ 512
+
+/* Will be updated by benchmark before program loading */
+char to_buff[BUFF_SZ];
+const volatile unsigned int to_buff_len = 0;
+char from_buff[BUFF_SZ];
+const volatile unsigned int from_buff_len = 0;
+unsigned short seed = 0;
+
+short result;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int compute_checksum(void *ctx)
+{
+ int to_len_half = to_buff_len / 2;
+ int from_len_half = from_buff_len / 2;
+ short result2;
+
+ /* Calculate checksum in one go */
+ result2 = bpf_csum_diff((void *)from_buff, from_buff_len,
+ (void *)to_buff, to_buff_len, seed);
+
+ /* Calculate checksum by concatenating bpf_csum_diff()*/
+ result = bpf_csum_diff((void *)from_buff, from_buff_len - from_len_half,
+ (void *)to_buff, to_buff_len - to_len_half, seed);
+
+ result = bpf_csum_diff((void *)from_buff + (from_buff_len - from_len_half), from_len_half,
+ (void *)to_buff + (to_buff_len - to_len_half), to_len_half, result);
+
+ result = (result == result2) ? result : 0;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/decap_sanity.c b/tools/testing/selftests/bpf/progs/decap_sanity.c
new file mode 100644
index 000000000000..bd3c657c58a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/decap_sanity.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define UDP_TEST_PORT 7777
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+bool init_csum_partial = false;
+bool final_csum_none = false;
+bool broken_csum_start = false;
+
+static unsigned int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+static unsigned int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+static int skb_checksum_start_offset(const struct sk_buff *skb)
+{
+ return skb->csum_start - skb_headroom(skb);
+}
+
+SEC("tc")
+int decap_sanity(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb;
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ int err;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return TC_ACT_SHOT;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return TC_ACT_SHOT;
+
+ if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT))
+ return TC_ACT_SHOT;
+
+ kskb = bpf_cast_to_kern_ctx(skb);
+ init_csum_partial = (kskb->ip_summed == CHECKSUM_PARTIAL);
+ err = bpf_skb_adjust_room(skb, -(s32)(ETH_HLEN + sizeof(ip6h) + sizeof(udph)),
+ 1, BPF_F_ADJ_ROOM_FIXED_GSO);
+ if (err)
+ return TC_ACT_SHOT;
+ final_csum_none = (kskb->ip_summed == CHECKSUM_NONE);
+ if (kskb->ip_summed == CHECKSUM_PARTIAL &&
+ (unsigned int)skb_checksum_start_offset(kskb) >= skb_headlen(kskb))
+ broken_csum_start = true;
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c
index 8924e06bdef0..c1dfbd2b56fc 100644
--- a/tools/testing/selftests/bpf/progs/dev_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c
@@ -41,14 +41,14 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
#endif
- /* Allow access to /dev/zero and /dev/random.
+ /* Allow access to /dev/null and /dev/urandom.
* Forbid everything else.
*/
if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
return 0;
switch (ctx->minor) {
- case 5: /* 1:5 /dev/zero */
+ case 3: /* 1:3 /dev/null */
case 9: /* 1:9 /dev/urandom */
return 1;
}
@@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, DMA_BUF_NAME_LEN);
+ __type(value, bool);
+ __uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+ for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+ if (*c == '\n')
+ *c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+ const struct dma_buf *dmabuf = ctx->dmabuf;
+ struct seq_file *seq = ctx->meta->seq;
+ unsigned long inode = 0;
+ size_t size;
+ const char *pname, *exporter;
+ char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+ if (!dmabuf)
+ return 0;
+
+ if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+ bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+ bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+ bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (pname) {
+ if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ return 1;
+
+ /* Name strings can be provided by userspace */
+ sanitize_string(name, sizeof(name));
+ }
+
+ BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+ return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+ struct dma_buf *d;
+
+ bpf_for_each(dmabuf, d) {
+ char name[DMA_BUF_NAME_LEN];
+ const char *pname;
+ bool *found;
+ long len;
+ int i;
+
+ if (bpf_core_read(&pname, sizeof(pname), &d->name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (!pname)
+ continue;
+
+ len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+ if (len < 0)
+ return 1;
+
+ /*
+ * The entire name buffer is used as a map key.
+ * Zeroize any uninitialized trailing bytes after the NUL.
+ */
+ bpf_for(i, len, DMA_BUF_NAME_LEN)
+ name[i] = 0;
+
+ found = bpf_map_lookup_elem(&testbuf_hash, name);
+ if (found) {
+ bool t = true;
+
+ bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
new file mode 100644
index 000000000000..0bf969a0b5ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops.s/test_2")
+__failure __msg("attach to unsupported member test_2 of struct bpf_dummy_ops")
+int BPF_PROG(test_unsupported_field_sleepable,
+ struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ /* Tries to mark an unsleepable field in struct bpf_dummy_ops as sleepable. */
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = NULL,
+ .test_2 = (void *)test_unsupported_field_sleepable,
+ .test_sleepable = (void *)NULL,
+};
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
new file mode 100644
index 000000000000..ec0c595d47af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
+{
+ int ret;
+
+ /* Check that 'state' nullable status is detected correctly.
+ * If 'state' argument would be assumed non-null by verifier
+ * the code below would be deleted as dead (which it shouldn't).
+ * Hide it from the compiler behind 'asm' block to avoid
+ * unnecessary optimizations.
+ */
+ asm volatile (
+ "if %[state] != 0 goto +2;"
+ "r0 = 0xf2f3f4f5;"
+ "exit;"
+ ::[state]"p"(state));
+
+ ret = state->val;
+ state->val = 0x5a;
+ return ret;
+}
+
+__u64 test_2_args[5];
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ test_2_args[0] = state->val;
+ test_2_args[1] = a1;
+ test_2_args[2] = a2;
+ test_2_args[3] = a3;
+ test_2_args[4] = a4;
+ return 0;
+}
+
+SEC("struct_ops.s/test_sleepable")
+int BPF_PROG(test_sleepable, struct bpf_dummy_ops_state *state)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .test_sleepable = (void *)test_sleepable,
+};
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
new file mode 100644
index 000000000000..dda6a8dada82
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -0,0 +1,1995 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/if_ether.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test_info {
+ int x;
+ struct bpf_dynptr ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct bpf_dynptr);
+} array_map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct test_info);
+} array_map2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map3 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map4 SEC(".maps");
+
+struct sample {
+ int pid;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+int err, val;
+
+static int get_map_val_dynptr(struct bpf_dynptr *ptr)
+{
+ __u32 key = 0, *map_val;
+
+ bpf_map_update_elem(&array_map3, &key, &val, 0);
+
+ map_val = bpf_map_lookup_elem(&array_map3, &key);
+ if (!map_val)
+ return -ENOENT;
+
+ bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr);
+
+ return 0;
+}
+
+/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
+ * bpf_ringbuf_submit/discard_dynptr call
+ */
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=2")
+int ringbuf_missing_release1(void *ctx)
+{
+ struct bpf_dynptr ptr = {};
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=4")
+int ringbuf_missing_release2(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+ sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample));
+ if (!sample) {
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+ return 0;
+ }
+
+ bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */
+
+ return 0;
+}
+
+static int missing_release_callback_fn(__u32 index, void *data)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+ return 0;
+}
+
+/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id")
+int ringbuf_missing_release_callback(void *ctx)
+{
+ bpf_loop(10, missing_release_callback_fn, NULL, 0);
+ return 0;
+}
+
+/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int ringbuf_release_uninit_dynptr(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A dynptr can't be used after it has been invalidated */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #2")
+int use_after_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
+
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ return 0;
+}
+
+/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
+SEC("?raw_tp")
+__failure __msg("type=mem expected=ringbuf_mem")
+int ringbuf_invalid_api(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+ sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 123;
+
+ /* invalid API use. need to use dynptr API to submit/discard */
+ bpf_ringbuf_submit(sample, 0);
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Can't add a dynptr to a map */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int add_dynptr_to_map1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ int key = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* this should fail */
+ bpf_map_update_elem(&array_map1, &key, &ptr, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Can't add a struct with an embedded dynptr to a map */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int add_dynptr_to_map2(void *ctx)
+{
+ struct test_info x;
+ int key = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr);
+
+ /* this should fail */
+ bpf_map_update_elem(&array_map2, &key, &x, 0);
+
+ bpf_ringbuf_submit_dynptr(&x.ptr, 0);
+
+ return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_ringbuf(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, 8);
+ if (!data)
+ goto done;
+
+ /* can't index out of bounds of the data slice */
+ val = *((char *)data + 8);
+
+done:
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ *(__u8*)(hdr + 1) = 1;
+
+ return SK_PASS;
+}
+
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *(md + 1) = 42;
+
+ return SK_PASS;
+}
+
+SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_map_value(void *ctx)
+{
+ __u32 map_val;
+ struct bpf_dynptr ptr;
+ void *data;
+
+ get_map_val_dynptr(&ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, sizeof(map_val));
+ if (!data)
+ return 0;
+
+ /* can't index out of bounds of the data slice */
+ val = *((char *)data + (sizeof(map_val) + 1));
+
+ return 0;
+}
+
+/* A data slice can't be used after it has been released */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int data_slice_use_after_release1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+ sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 123;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ val = sample->pid;
+
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be used after it has been released.
+ *
+ * This tests the case where the data slice tracks a dynptr (ptr2)
+ * that is at a non-zero offset from the frame pointer (ptr1 is at fp,
+ * ptr2 is at fp - 16).
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int data_slice_use_after_release2(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+ sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr2, 0);
+
+ /* this should fail */
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
+/* A data slice must be first checked for NULL */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
+int data_slice_missing_null_check1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, 8);
+
+ /* missing if (!data) check */
+
+ /* this should fail */
+ *(__u8 *)data = 3;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be dereferenced if it wasn't checked for null */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
+int data_slice_missing_null_check2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data1, *data2;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+ data1 = bpf_dynptr_data(&ptr, 0, 8);
+ data2 = bpf_dynptr_data(&ptr, 0, 8);
+ if (data1)
+ /* this should fail */
+ *data2 = 3;
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
+ * dynptr argument
+ */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_helper1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!");
+
+ return 0;
+}
+
+/* A dynptr can't be passed into a helper function at a non-zero offset */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset=-8")
+int invalid_helper2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
+ return 0;
+}
+
+/* A bpf_dynptr is invalidated if it's been written into */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int invalid_write1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+ __u8 x = 0;
+
+ get_map_val_dynptr(&ptr);
+
+ memcpy(&ptr, &x, sizeof(x));
+
+ /* this should fail */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ __sink(data);
+
+ return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
+ * offset
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+ __u8 x = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ memcpy((void *)&ptr + 8, &x, sizeof(x));
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a
+ * non-const offset
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write3(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char stack_buf[16];
+ unsigned long len;
+ __u8 x = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ memcpy(stack_buf, &val, sizeof(val));
+ len = stack_buf[0] & 0xf;
+
+ memcpy((void *)&ptr + len, &x, sizeof(x));
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+static int invalid_write4_callback(__u32 index, void *data)
+{
+ *(__u32 *)data = 123;
+
+ return 0;
+}
+
+/* If the dynptr is written into in a callback function, it should
+ * be invalidated as a dynptr
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_loop(10, invalid_write4_callback, &ptr, 0);
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
+struct bpf_dynptr global_dynptr;
+
+SEC("?raw_tp")
+__failure __msg("type=map_value expected=fp")
+int global(void *ctx)
+{
+ /* this should fail */
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr);
+
+ bpf_ringbuf_discard_dynptr(&global_dynptr, 0);
+
+ return 0;
+}
+
+/* A direct read should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ /* this should fail */
+ val = *(int *)&ptr;
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A direct read at an offset should fail */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset")
+int invalid_read2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0);
+
+ return 0;
+}
+
+/* A direct read at an offset into the lower stack slot should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read3(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2);
+
+ /* this should fail */
+ memcpy(&val, (void *)&ptr1 + 8, sizeof(val));
+
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+
+ return 0;
+}
+
+static int invalid_read4_callback(__u32 index, void *data)
+{
+ /* this should fail */
+ val = *(__u32 *)data;
+
+ return 0;
+}
+
+/* A direct read within a callback function should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_loop(10, invalid_read4_callback, &ptr, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Initializing a dynptr on an offset should fail */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset=0")
+int invalid_offset(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Can't release a dynptr twice */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int release_twice(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ /* this second release should fail */
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+static int release_twice_callback_fn(__u32 index, void *data)
+{
+ /* this should fail */
+ bpf_ringbuf_discard_dynptr(data, 0);
+
+ return 0;
+}
+
+/* Test that releasing a dynptr twice, where one of the releases happens
+ * within a callback function, fails
+ */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int release_twice_callback(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ bpf_loop(10, release_twice_callback_fn, &ptr, 0);
+
+ return 0;
+}
+
+/* Reject unsupported local mem types for dynptr_from_mem API */
+SEC("?raw_tp")
+__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data")
+int dynptr_from_mem_invalid_api(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ int x = 0;
+
+ /* this should fail */
+ bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_overwrite(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 0xeB9F; \
+ r6 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto pjmp1; \
+ goto pjmp2; \
+ pjmp1: \
+ *(u64 *)(r10 - 16) = r9; \
+ pjmp2: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__success __msg("12: safe") __log_level(2)
+int dynptr_pruning_stacksafe(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 0xeB9F; \
+ r6 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto stjmp1; \
+ goto stjmp2; \
+ stjmp1: \
+ r9 = r9; \
+ stjmp2: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_type_confusion(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r6 = %[array_map4] ll; \
+ r7 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = r10; \
+ r2 += -8; \
+ r9 = 0; \
+ *(u64 *)(r2 + 0) = r9; \
+ r3 = r10; \
+ r3 += -24; \
+ r9 = 0xeB9FeB9F; \
+ *(u64 *)(r10 - 16) = r9; \
+ *(u64 *)(r10 - 24) = r9; \
+ r9 = 0; \
+ r4 = 0; \
+ r8 = r2; \
+ call %[bpf_map_update_elem]; \
+ r1 = r6; \
+ r2 = r8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto tjmp1; \
+ exit; \
+ tjmp1: \
+ r8 = r0; \
+ r1 = r7; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ r0 = *(u64 *)(r0 + 0); \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto tjmp2; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ goto tjmp3; \
+ tjmp2: \
+ *(u64 *)(r10 - 8) = r9; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r8; \
+ r1 += 8; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_dynptr_from_mem]; \
+ tjmp3: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_map_update_elem),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_dynptr_from_mem),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(array_map4),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("dynptr has to be at a constant offset") __log_level(2)
+int dynptr_var_off_overwrite(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 16; \
+ *(u32 *)(r10 - 4) = r9; \
+ r8 = *(u32 *)(r10 - 4); \
+ if r8 >= 0 goto vjmp1; \
+ r0 = 1; \
+ exit; \
+ vjmp1: \
+ if r8 <= 16 goto vjmp2; \
+ r0 = 1; \
+ exit; \
+ vjmp2: \
+ r8 &= 16; \
+ r1 = %[ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -32; \
+ r4 += r8; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ r9 = 0xeB9F; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r10; \
+ r1 += -32; \
+ r1 += r8; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_partial_slot_invalidate(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r6 = %[ringbuf] ll; \
+ r7 = %[array_map4] ll; \
+ r1 = r7; \
+ r2 = r10; \
+ r2 += -8; \
+ r9 = 0; \
+ *(u64 *)(r2 + 0) = r9; \
+ r3 = r2; \
+ r4 = 0; \
+ r8 = r2; \
+ call %[bpf_map_update_elem]; \
+ r1 = r7; \
+ r2 = r8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto sjmp1; \
+ exit; \
+ sjmp1: \
+ r7 = r0; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -24; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r7; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_dynptr_from_mem]; \
+ r1 = r10; \
+ r1 += -512; \
+ r2 = 488; \
+ r3 = r10; \
+ r3 += -24; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_dynptr_read]; \
+ r8 = 1; \
+ if r0 != 0 goto sjmp2; \
+ r8 = 0; \
+ sjmp2: \
+ r1 = r10; \
+ r1 += -24; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_map_update_elem),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm(bpf_dynptr_from_mem),
+ __imm(bpf_dynptr_read),
+ __imm_addr(ringbuf),
+ __imm_addr(array_map4)
+ : __clobber_all
+ );
+ return 0;
+}
+
+/* Test that it is allowed to overwrite unreferenced dynptr. */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_unref(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ return 0;
+}
+
+/* Test that slices are invalidated on reinitializing a dynptr. */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int dynptr_invalidate_slice_reinit(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u8 *p;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ p = bpf_dynptr_data(&ptr, 0, 1);
+ if (!p)
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ /* this should fail */
+ return *p;
+}
+
+/* Invalidation of dynptr slices on destruction of dynptr should not miss
+ * mem_or_null pointers.
+ */
+SEC("?raw_tp")
+__failure __msg("R{{[0-9]+}} type=scalar expected=percpu_ptr_")
+int dynptr_invalidate_slice_or_null(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u8 *p;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ p = bpf_dynptr_data(&ptr, 0, 1);
+ *(__u8 *)&ptr = 0;
+ /* this should fail */
+ bpf_this_cpu_ptr(p);
+ return 0;
+}
+
+/* Destruction of dynptr should also any slices obtained from it */
+SEC("?raw_tp")
+__failure __msg("R{{[0-9]+}} invalid mem access 'scalar'")
+int dynptr_invalidate_slice_failure(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u8 *p1, *p2;
+
+ if (get_map_val_dynptr(&ptr1))
+ return 0;
+ if (get_map_val_dynptr(&ptr2))
+ return 0;
+
+ p1 = bpf_dynptr_data(&ptr1, 0, 1);
+ if (!p1)
+ return 0;
+ p2 = bpf_dynptr_data(&ptr2, 0, 1);
+ if (!p2)
+ return 0;
+
+ *(__u8 *)&ptr1 = 0;
+ /* this should fail */
+ return *p1;
+}
+
+/* Invalidation of slices should be scoped and should not prevent dereferencing
+ * slices of another dynptr after destroying unrelated dynptr
+ */
+SEC("?raw_tp")
+__success
+int dynptr_invalidate_slice_success(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u8 *p1, *p2;
+
+ if (get_map_val_dynptr(&ptr1))
+ return 1;
+ if (get_map_val_dynptr(&ptr2))
+ return 1;
+
+ p1 = bpf_dynptr_data(&ptr1, 0, 1);
+ if (!p1)
+ return 1;
+ p2 = bpf_dynptr_data(&ptr2, 0, 1);
+ if (!p2)
+ return 1;
+
+ *(__u8 *)&ptr1 = 0;
+ return *p2;
+}
+
+/* Overwriting referenced dynptr should be rejected */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int dynptr_overwrite_ref(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+ /* this should fail */
+ if (get_map_val_dynptr(&ptr))
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Reject writes to dynptr slot from bpf_dynptr_read */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int dynptr_read_into_slot(void *ctx)
+{
+ union {
+ struct {
+ char _pad[48];
+ struct bpf_dynptr ptr;
+ };
+ char buf[64];
+ } data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &data.ptr);
+ /* this should fail */
+ bpf_dynptr_read(data.buf, sizeof(data.buf), &data.ptr, 0, 0);
+
+ return 0;
+}
+
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice3(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice4(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *d;
+
+ return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *d = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice1(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return XDP_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 9;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return XDP_PASS;
+}
+
+/* Only supported prog type can create skb-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
+int skb_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+
+ return 0;
+}
+
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr meta;
+
+ /* this should fail */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+ return 0;
+}
+
+SEC("fentry/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
+SEC("fexit/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fexit, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
+/* Reject writes to dynptr slot for uninit arg */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int uninit_write_into_slot(void *ctx)
+{
+ struct {
+ char buf[64];
+ struct bpf_dynptr ptr;
+ } data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 80, 0, &data.ptr);
+ /* this should fail */
+ bpf_get_current_comm(data.buf, 80);
+
+ return 0;
+}
+
+/* Only supported prog type can create xdp-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed")
+int xdp_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_xdp(ctx, 0, &ptr);
+
+ return 0;
+}
+
+__u32 hdr_size = sizeof(struct ethhdr);
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("unbounded memory access")
+int dynptr_slice_var_len1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail */
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("must be a known constant")
+int dynptr_slice_var_len2(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ if (hdr_size <= sizeof(buffer)) {
+ /* this should fail */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+ hdr->h_proto = 12;
+ }
+
+ return SK_PASS;
+}
+
+static int callback(__u32 index, void *data)
+{
+ *(__u32 *)data = 123;
+
+ return 0;
+}
+
+/* If the dynptr is written into in a callback function, its data
+ * slices should be invalidated as well.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int invalid_data_slices(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u32 *slice;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ slice = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+ if (!slice)
+ return 0;
+
+ bpf_loop(10, callback, &ptr, 0);
+
+ /* this should fail */
+ *slice = 1;
+
+ return 0;
+}
+
+/* Program types that don't allow writes to packet data should fail if
+ * bpf_dynptr_slice_rdwr is called
+ */
+SEC("cgroup_skb/ingress")
+__failure __msg("the prog does not allow writes to packet data")
+int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail since cgroup_skb doesn't allow
+ * changing packet data
+ */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ __sink(hdr);
+
+ return 0;
+}
+
+/* bpf_dynptr_adjust can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int dynptr_adjust_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr = {};
+
+ /* this should fail */
+ bpf_dynptr_adjust(&ptr, 1, 2);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_null can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int dynptr_is_null_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr = {};
+
+ /* this should fail */
+ bpf_dynptr_is_null(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int dynptr_is_rdonly_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr = {};
+
+ /* this should fail */
+ bpf_dynptr_is_rdonly(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_size can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int dynptr_size_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr = {};
+
+ /* this should fail */
+ bpf_dynptr_size(&ptr);
+
+ return 0;
+}
+
+/* Only initialized dynptrs can be cloned */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #0")
+int clone_invalid1(void *ctx)
+{
+ struct bpf_dynptr ptr1 = {};
+ struct bpf_dynptr ptr2;
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &ptr2);
+
+ return 0;
+}
+
+/* Can't overwrite an existing dynptr when cloning */
+SEC("?xdp")
+__failure __msg("cannot overwrite referenced dynptr")
+int clone_invalid2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr clone;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr1);
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone);
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its clones */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #2")
+int clone_invalidate1(void *ctx)
+{
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its parent */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #2")
+int clone_invalidate2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its siblings */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #2")
+int clone_invalidate3(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its clones
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_data(&clone, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its parent
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate5(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+ data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its sibling
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate6(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ data = bpf_dynptr_data(&clone1, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A skb clone's data slices should be invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_data(struct __sk_buff *skb)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ __u32 *data;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr clone, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ bpf_dynptr_clone(&meta, &clone);
+ md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return 0;
+}
+
+/* A xdp clone's data slices should be invalid anytime packet data changes */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_xdp_packet_data(struct xdp_md *xdp)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ __u32 *data;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Buffers that are provided must be sufficiently long */
+SEC("?cgroup_skb/egress")
+__failure __msg("memory, len pair leads to invalid memory access")
+int test_dynptr_skb_small_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char buffer[8] = {};
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, buffer, 9);
+
+ return !!data;
+}
+
+__noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
+{
+ long ret = 0;
+ /* Avoid leaving this global function empty to avoid having the compiler
+ * optimize away the call to this global function.
+ */
+ __sink(ret);
+ return ret;
+}
+
+SEC("?raw_tp")
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
+int test_dynptr_reg_type(void *ctx)
+{
+ struct task_struct *current = NULL;
+ /* R1 should be holding a PTR_TO_BTF_ID, so this shouldn't be a
+ * reg->type that can be passed to a function accepting a
+ * ARG_PTR_TO_DYNPTR | MEM_RDONLY. process_dynptr_func() should catch
+ * this.
+ */
+ global_call_bpf_dynptr((const struct bpf_dynptr *)current);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
new file mode 100644
index 000000000000..e0d672d93adf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -0,0 +1,1137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+#define PAGE_SIZE_64K 65536
+
+char _license[] SEC("license") = "GPL";
+
+int pid, err, val;
+
+struct ringbuf_sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_read_write(void *ctx)
+{
+ char write_data[64] = "hello there, world!!";
+ char read_data[64] = {};
+ struct bpf_dynptr ptr;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
+
+ /* Write data into the dynptr */
+ err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* Read the data that was written into the dynptr */
+ err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ /* Ensure the data we read matches the data we wrote */
+ for (i = 0; i < sizeof(read_data); i++) {
+ if (read_data[i] != write_data[i]) {
+ err = 1;
+ break;
+ }
+ }
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_data(void *ctx)
+{
+ __u32 key = 0, val = 235, *map_val;
+ struct bpf_dynptr ptr;
+ __u32 map_val_size;
+ void *data;
+
+ map_val_size = sizeof(*map_val);
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, 0);
+
+ map_val = bpf_map_lookup_elem(&array_map, &key);
+ if (!map_val) {
+ err = 1;
+ return 0;
+ }
+
+ bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr);
+
+ /* Try getting a data slice that is out of range */
+ data = bpf_dynptr_data(&ptr, map_val_size + 1, 1);
+ if (data) {
+ err = 2;
+ return 0;
+ }
+
+ /* Try getting more bytes than available */
+ data = bpf_dynptr_data(&ptr, 0, map_val_size + 1);
+ if (data) {
+ err = 3;
+ return 0;
+ }
+
+ data = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+ if (!data) {
+ err = 4;
+ return 0;
+ }
+
+ *(__u32 *)data = 999;
+
+ err = bpf_probe_read_kernel(&val, sizeof(val), data);
+ if (err)
+ return 0;
+
+ if (val != *(int *)data)
+ err = 5;
+
+ return 0;
+}
+
+static int ringbuf_callback(__u32 index, void *data)
+{
+ struct ringbuf_sample *sample;
+
+ struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
+
+ sample = bpf_dynptr_data(ptr, 0, sizeof(*sample));
+ if (!sample)
+ err = 2;
+ else
+ sample->pid += index;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_ringbuf(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct ringbuf_sample *sample;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ val = 100;
+
+ /* check that you can reserve a dynamic size reservation */
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample) {
+ err = 1;
+ goto done;
+ }
+
+ sample->pid = 10;
+
+ /* Can pass dynptr to callback functions */
+ bpf_loop(10, ringbuf_callback, &ptr, 0);
+
+ if (sample->pid != 55)
+ err = 2;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_skb_readonly(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since cgroup skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ if (data) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ err = 2;
+ md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+ if (md)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+ const __u64 INVALID_FLAGS = ~0ULL;
+ struct bpf_dynptr meta;
+ __u8 buf;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 2;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ err = 3;
+ ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 4;
+ ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u32 bytes = 64;
+ __u32 off = 10;
+ __u32 trim = 15;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr);
+ if (err) {
+ err = 1;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes) {
+ err = 2;
+ goto done;
+ }
+
+ /* Advance the dynptr by off */
+ err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr));
+ if (err) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes - off) {
+ err = 4;
+ goto done;
+ }
+
+ /* Trim the dynptr */
+ err = bpf_dynptr_adjust(&ptr, off, 15);
+ if (err) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that the size was adjusted correctly */
+ if (bpf_dynptr_size(&ptr) != trim - off) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust_err(void *ctx)
+{
+ char write_data[45] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+ __u32 off = 20;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* Check that start can't be greater than end */
+ if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) {
+ err = 2;
+ goto done;
+ }
+
+ /* Check that start can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) {
+ err = 3;
+ goto done;
+ }
+
+ /* Check that end can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) {
+ err = 4;
+ goto done;
+ }
+
+ if (bpf_dynptr_adjust(&ptr, off, size)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that you can't write more bytes than available into the dynptr
+ * after you've adjusted it
+ */
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 6;
+ goto done;
+ }
+
+ /* Check that even after adjusting, submitting/discarding
+ * a ringbuf dynptr works
+ */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_zero_size_dynptr(void *ctx)
+{
+ char write_data = 'x', read_data;
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* After this, the dynptr has a size of 0 */
+ if (bpf_dynptr_adjust(&ptr, size, size)) {
+ err = 2;
+ goto done;
+ }
+
+ /* Test that reading + writing non-zero bytes is not ok */
+ if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 4;
+ goto done;
+ }
+
+ /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */
+ if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) {
+ err = 5;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) {
+ err = 6;
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_is_null(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u64 size = 4;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ goto exit_early;
+ }
+
+ /* Test that the invalid dynptr is null */
+ if (!bpf_dynptr_is_null(&ptr1)) {
+ err = 2;
+ goto exit_early;
+ }
+
+ /* Get a valid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) {
+ err = 3;
+ goto exit;
+ }
+
+ /* Test that the valid dynptr is not null */
+ if (bpf_dynptr_is_null(&ptr2)) {
+ err = 4;
+ goto exit;
+ }
+
+exit:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+exit_early:
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_is_rdonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ struct bpf_dynptr ptr3;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ return 0;
+ }
+
+ /* Test that an invalid dynptr is_rdonly returns false */
+ if (bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 2;
+ return 0;
+ }
+
+ /* Get a read-only dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (!bpf_dynptr_is_rdonly(&ptr2)) {
+ err = 4;
+ return 0;
+ }
+
+ /* Get a read-writeable dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (bpf_dynptr_is_rdonly(&ptr3)) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr3, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_clone(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u32 off = 2, size;
+
+ /* Get a dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) {
+ err = 2;
+ return 0;
+ }
+
+ /* Clone the dynptr */
+ if (bpf_dynptr_clone(&ptr1, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ size = bpf_dynptr_size(&ptr1);
+
+ /* Check that the clone has the same size and rd-only */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 4;
+ return 0;
+ }
+
+ if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 5;
+ return 0;
+ }
+
+ /* Advance and trim the original dynptr */
+ bpf_dynptr_adjust(&ptr1, 5, 5);
+
+ /* Check that only original dynptr was affected, and the clone wasn't */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 6;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_no_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 1);
+
+ return !!data;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_strcmp(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 10);
+ if (data) {
+ bpf_strncmp(data, 10, "foo");
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since tp_btf skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+static inline int bpf_memcmp(const char *a, const char *b, u32 size)
+{
+ int i;
+
+ bpf_for(i, 0, size) {
+ if (a[i] != b[i])
+ return a[i] < b[i] ? -1 : 1;
+ }
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_copy(void *ctx)
+{
+ char data[] = "hello there, world!!";
+ char buf[32] = {'\0'};
+ __u32 sz = sizeof(data);
+ struct bpf_dynptr src, dst;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst);
+
+ /* Test basic case of copying contiguous memory backed dynptrs */
+ err = bpf_dynptr_write(&src, 0, data, sz, 0);
+ err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz);
+ err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0);
+ err = err ?: bpf_memcmp(data, buf, sz);
+
+ /* Test that offsets are handled correctly */
+ err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5);
+ err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0);
+ err = err ?: bpf_memcmp(data + 5, buf, sz - 5);
+
+ bpf_ringbuf_discard_dynptr(&src, 0);
+ bpf_ringbuf_discard_dynptr(&dst, 0);
+ return 0;
+}
+
+SEC("xdp")
+int test_dynptr_copy_xdp(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr_buf, ptr_xdp;
+ char data[] = "qwertyuiopasdfghjkl";
+ char buf[32] = {'\0'};
+ __u32 len = sizeof(data), xdp_data_size;
+ int i, chunks = 200;
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ xdp_data_size = bpf_dynptr_size(&ptr_xdp);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
+
+ /* Destination dynptr is backed by non-contiguous memory */
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Source dynptr is backed by non-contiguous memory */
+ __builtin_memset(buf, 0, sizeof(buf));
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Both source and destination dynptrs are backed by non-contiguous memory */
+ err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1));
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks - 1) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+ err = 1;
+
+out:
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+ return XDP_DROP;
+}
+
+char memset_zero_data[] = "data to be zeroed";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_zero_data);
+ char zeroes[32] = {'\0'};
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0);
+ err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz);
+
+ return 0;
+}
+
+#define DYNPTR_MEMSET_VAL 42
+
+char memset_notzero_data[] = "data to be overwritten";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_notzero(void *ctx)
+{
+ u32 data_sz = sizeof(memset_notzero_data);
+ struct bpf_dynptr ptr;
+ char expected[32];
+
+ __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz);
+
+ err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL);
+ err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_offset_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_offset(void *ctx)
+{
+ char expected[] = "data to \0\0\0\0eroed partially";
+ __u32 data_sz = sizeof(memset_zero_offset_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_adjusted_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_adjusted(void *ctx)
+{
+ char expected[] = "data\0\0\0\0be zeroed partially";
+ __u32 data_sz = sizeof(memset_zero_adjusted_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_adjust(&ptr, 4, 8);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz);
+
+ return 0;
+}
+
+char memset_overflow_data[] = "memset overflow data";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow_offset(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_memset_readonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* cgroup skbs are read only, memset should fail */
+ ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ if (ret != -EINVAL)
+ err = 1;
+
+ return 0;
+}
+
+#define min_t(type, x, y) ({ \
+ type __x = (x); \
+ type __y = (y); \
+ __x < __y ? __x : __y; })
+
+SEC("xdp")
+int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp)
+{
+ u32 data_sz, chunk_sz, offset = 0;
+ const int max_chunks = 200;
+ struct bpf_dynptr ptr_xdp;
+ char expected_buf[32];
+ char buf[32];
+ int i;
+
+ __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf));
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ data_sz = bpf_dynptr_size(&ptr_xdp);
+
+ err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL);
+ if (err) {
+ /* bpf_dynptr_memset() eventually called bpf_xdp_pointer()
+ * where if data_sz is greater than 0xffff, -EFAULT will be
+ * returned. For 64K page size, data_sz is greater than
+ * 64K, so error is expected and let us zero out error and
+ * return success.
+ */
+ if (data_sz >= PAGE_SIZE_64K)
+ err = 0;
+ goto out;
+ }
+
+ bpf_for(i, 0, max_chunks) {
+ offset = i * sizeof(buf);
+ if (offset >= data_sz)
+ goto out;
+ chunk_sz = min_t(u32, sizeof(buf), data_sz - offset);
+ err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0);
+ if (err)
+ goto out;
+ err = bpf_memcmp(buf, expected_buf, sizeof(buf));
+ if (err)
+ goto out;
+ }
+out:
+ return XDP_DROP;
+}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+ const __u32 headroom = 256;
+ const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+ /* 32 bytes before the approximate end of the fragment */
+ return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+ if (len > sizeof(buf))
+ break;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ __u32 cnt, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 cnt, off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ if (err)
+ return;
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+ return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c
new file mode 100644
index 000000000000..4b0cd6753251
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/empty_skb.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ifindex;
+int ret;
+
+SEC("lwt_xmit")
+int redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("lwt_xmit")
+int redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/epilogue_exit.c b/tools/testing/selftests/bpf/progs/epilogue_exit.c
new file mode 100644
index 000000000000..35fec7c75bef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/epilogue_exit.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__success
+/* save __u64 *ctx to stack */
+__xlated("0: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("1: r1 = *(u64 *)(r1 +0)")
+__xlated("2: r2 = *(u64 *)(r1 +0)")
+__xlated("3: r3 = 0")
+__xlated("4: r4 = 1")
+__xlated("5: if r2 == 0x0 goto pc+10")
+__xlated("6: r0 = 0")
+__xlated("7: *(u64 *)(r1 +0) = r3")
+/* epilogue */
+__xlated("8: r1 = *(u64 *)(r10 -8)")
+__xlated("9: r1 = *(u64 *)(r1 +0)")
+__xlated("10: r6 = *(u64 *)(r1 +0)")
+__xlated("11: r6 += 10000")
+__xlated("12: *(u64 *)(r1 +0) = r6")
+__xlated("13: r0 = r6")
+__xlated("14: r0 *= 2")
+__xlated("15: exit")
+/* 2nd part of the main prog after the first exit */
+__xlated("16: *(u64 *)(r1 +0) = r4")
+__xlated("17: r0 = 1")
+/* Clear the r1 to ensure it does not have
+ * off-by-1 error and ensure it jumps back to the
+ * beginning of epilogue which initializes
+ * the r1 with the ctx ptr.
+ */
+__xlated("18: r1 = 0")
+__xlated("19: gotol pc-12")
+SEC("struct_ops/test_epilogue_exit")
+__naked int test_epilogue_exit(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r2 = *(u64 *)(r1 +0);"
+ "r3 = 0;"
+ "r4 = 1;"
+ "if r2 == 0 goto +3;"
+ "r0 = 0;"
+ "*(u64 *)(r1 + 0) = r3;"
+ "exit;"
+ "*(u64 *)(r1 + 0) = r4;"
+ "r0 = 1;"
+ "r1 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops epilogue_exit = {
+ .test_epilogue = (void *)test_epilogue_exit,
+};
+
+SEC("syscall")
+__retval(20000)
+int syscall_epilogue_exit0(void *ctx)
+{
+ struct st_ops_args args = { .a = 1 };
+
+ return bpf_kfunc_st_ops_test_epilogue(&args);
+}
+
+SEC("syscall")
+__retval(20002)
+int syscall_epilogue_exit1(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_epilogue(&args);
+}
diff --git a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
new file mode 100644
index 000000000000..153514691ba4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+SEC("struct_ops/test_epilogue_subprog")
+int BPF_PROG(test_epilogue_subprog, struct st_ops_args *args)
+{
+ subprog(args);
+ return args->a;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __array(values, void (void));
+} epilogue_map SEC(".maps") = {
+ .values = {
+ [0] = (void *)&test_epilogue_subprog,
+ }
+};
+
+SEC("struct_ops/test_epilogue_tailcall")
+int test_epilogue_tailcall(unsigned long long *ctx)
+{
+ bpf_tail_call(ctx, &epilogue_map, 0);
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops epilogue_tailcall = {
+ .test_epilogue = (void *)test_epilogue_tailcall,
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops epilogue_subprog = {
+ .test_epilogue = (void *)test_epilogue_subprog,
+};
+
+SEC("syscall")
+int syscall_epilogue_tailcall(struct st_ops_args *args)
+{
+ return bpf_kfunc_st_ops_test_epilogue(args);
+}
diff --git a/tools/testing/selftests/bpf/progs/err.h b/tools/testing/selftests/bpf/progs/err.h
new file mode 100644
index 000000000000..38529779a236
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/err.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ERR_H__
+#define __ERR_H__
+
+#define MAX_ERRNO 4095
+#define IS_ERR_VALUE(x) (unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO
+
+#define __STR(x) #x
+
+#define set_if_not_errno_or_zero(x, y) \
+({ \
+ asm volatile ("if %0 s< -4095 goto +1\n" \
+ "if %0 s<= 0 goto +1\n" \
+ "%0 = " __STR(y) "\n" \
+ : "+r"(x)); \
+})
+
+static inline int IS_ERR_OR_NULL(const void *ptr)
+{
+ return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+#endif /* __ERR_H__ */
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
new file mode 100644
index 000000000000..f09cd14d8e04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline int static_func(u64 i)
+{
+ bpf_throw(32);
+ return i;
+}
+
+__noinline int global2static_simple(u64 i)
+{
+ static_func(i + 2);
+ return i - 1;
+}
+
+__noinline int global2static(u64 i)
+{
+ if (i == ETH_P_IP)
+ bpf_throw(16);
+ return static_func(i);
+}
+
+static __noinline int static2global(u64 i)
+{
+ return global2static(i) + i;
+}
+
+SEC("tc")
+int exception_throw_always_1(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+/* In this case, the global func will never be seen executing after call to
+ * static subprog, hence verifier will DCE the remaining instructions. Ensure we
+ * are resilient to that.
+ */
+SEC("tc")
+int exception_throw_always_2(struct __sk_buff *ctx)
+{
+ return global2static_simple(ctx->protocol);
+}
+
+SEC("tc")
+int exception_throw_unwind_1(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol));
+}
+
+SEC("tc")
+int exception_throw_unwind_2(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol) - 1);
+}
+
+SEC("tc")
+int exception_throw_default(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 1;
+}
+
+SEC("tc")
+int exception_throw_default_value(struct __sk_buff *ctx)
+{
+ bpf_throw(5);
+ return 1;
+}
+
+SEC("tc")
+int exception_tail_call_target(struct __sk_buff *ctx)
+{
+ bpf_throw(16);
+ return 0;
+}
+
+static __noinline
+int exception_tail_call_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 10;
+
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int exception_tail_call(struct __sk_buff *ctx) {
+ volatile int ret = 0;
+
+ ret = exception_tail_call_subprog(ctx);
+ return ret + 8;
+}
+
+__noinline int exception_ext_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+static __noinline int exception_ext_static(struct __sk_buff *ctx)
+{
+ return exception_ext_global(ctx);
+}
+
+SEC("tc")
+int exception_ext(struct __sk_buff *ctx)
+{
+ return exception_ext_static(ctx);
+}
+
+__noinline int exception_cb_mod_global(u64 cookie)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+/* Example of how the exception callback supplied during verification can still
+ * introduce extensions by calling to dummy global functions, and alter runtime
+ * behavior.
+ *
+ * Right now we don't allow freplace attachment to exception callback itself,
+ * but if the need arises this restriction is technically feasible to relax in
+ * the future.
+ */
+__noinline int exception_cb_mod(u64 cookie)
+{
+ return exception_cb_mod_global(cookie) + cookie + 10;
+}
+
+SEC("tc")
+__exception_cb(exception_cb_mod)
+int exception_ext_mod_cb_runtime(struct __sk_buff *ctx)
+{
+ bpf_throw(25);
+ return 0;
+}
+
+__noinline static int subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+__noinline int global_subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline int throwing_global_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+SEC("tc")
+int exception_throw_subprog(struct __sk_buff *ctx)
+{
+ switch (ctx->protocol) {
+ case 1:
+ return subprog(ctx);
+ case 2:
+ return global_subprog(ctx);
+ case 3:
+ return throwing_subprog(ctx);
+ case 4:
+ return throwing_global_subprog(ctx);
+ default:
+ break;
+ }
+ bpf_throw(1);
+ return 0;
+}
+
+__noinline int assert_nz_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert(cookie != 0);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, ==, 0));
+ return 0;
+}
+
+__noinline int assert_neg_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, <, 0));
+ return 0;
+}
+
+__noinline int assert_pos_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, >, 0));
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, <=, -1));
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, >=, 1));
+ return 0;
+}
+
+__noinline int assert_nz_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_with(cookie != 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_neg_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_pos_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100);
+ return 0;
+}
+
+#define check_assert(name, cookie, tag) \
+SEC("tc") \
+int exception##tag##name(struct __sk_buff *ctx) \
+{ \
+ return name(cookie) + 1; \
+}
+
+check_assert(assert_nz_gfunc, 5, _);
+check_assert(assert_zero_gfunc, 0, _);
+check_assert(assert_neg_gfunc, -100, _);
+check_assert(assert_pos_gfunc, 100, _);
+check_assert(assert_negeq_gfunc, -1, _);
+check_assert(assert_poseq_gfunc, 1, _);
+
+check_assert(assert_nz_gfunc_with, 5, _);
+check_assert(assert_zero_gfunc_with, 0, _);
+check_assert(assert_neg_gfunc_with, -100, _);
+check_assert(assert_pos_gfunc_with, 100, _);
+check_assert(assert_negeq_gfunc_with, -1, _);
+check_assert(assert_poseq_gfunc_with, 1, _);
+
+check_assert(assert_nz_gfunc, 0, _bad_);
+check_assert(assert_zero_gfunc, 5, _bad_);
+check_assert(assert_neg_gfunc, 100, _bad_);
+check_assert(assert_pos_gfunc, -100, _bad_);
+check_assert(assert_negeq_gfunc, 1, _bad_);
+check_assert(assert_poseq_gfunc, -1, _bad_);
+
+check_assert(assert_nz_gfunc_with, 0, _bad_);
+check_assert(assert_zero_gfunc_with, 5, _bad_);
+check_assert(assert_neg_gfunc_with, 100, _bad_);
+check_assert(assert_pos_gfunc_with, -100, _bad_);
+check_assert(assert_negeq_gfunc_with, 1, _bad_);
+check_assert(assert_poseq_gfunc_with, -1, _bad_);
+
+SEC("tc")
+int exception_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, 0, ~0ULL);
+ return 1;
+}
+
+SEC("tc")
+int exception_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, 0, ~0ULL, 10);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, -100, 100);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, -1000, 1000, 10);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
new file mode 100644
index 000000000000..a01c2736890f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <limits.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#define check_assert(type, op, name, value) \
+ SEC("?tc") \
+ __log_level(2) __failure \
+ int check_assert_##name(void *ctx) \
+ { \
+ type num = bpf_ktime_get_ns(); \
+ bpf_assert(bpf_cmp_unlikely(num, op, value)); \
+ return *(u64 *)num; \
+ }
+
+__msg(": R0=0xffffffff80000000")
+check_assert(s64, ==, eq_int_min, INT_MIN);
+__msg(": R0=0x7fffffff")
+check_assert(s64, ==, eq_int_max, INT_MAX);
+__msg(": R0=0")
+check_assert(s64, ==, eq_zero, 0);
+__msg(": R0=0x8000000000000000 R1=0x8000000000000000")
+check_assert(s64, ==, eq_llong_min, LLONG_MIN);
+__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff")
+check_assert(s64, ==, eq_llong_max, LLONG_MAX);
+
+__msg(": R0=scalar(id=1,smax=0x7ffffffe)")
+check_assert(s64, <, lt_pos, INT_MAX);
+__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, <, lt_zero, 0);
+__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff")
+check_assert(s64, <, lt_neg, INT_MIN);
+
+__msg(": R0=scalar(id=1,smax=0x7fffffff)")
+check_assert(s64, <=, le_pos, INT_MAX);
+__msg(": R0=scalar(id=1,smax=0)")
+check_assert(s64, <=, le_zero, 0);
+__msg(": R0=scalar(id=1,smax=0xffffffff80000000")
+check_assert(s64, <=, le_neg, INT_MIN);
+
+__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_pos, INT_MAX);
+__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_zero, 0);
+__msg(": R0=scalar(id=1,smin=0xffffffff80000001")
+check_assert(s64, >, gt_neg, INT_MIN);
+
+__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_pos, INT_MAX);
+__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_zero, 0);
+__msg(": R0=scalar(id=1,smin=0xffffffff80000000")
+check_assert(s64, >=, ge_neg, INT_MIN);
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
+int check_assert_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+ bpf_assert_range(num, INT_MIN + 2, INT_MAX - 2);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+int check_assert_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 8192);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
+int check_assert_single_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx() R2=4096 R10=fp0")
+int check_assert_single_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
+int check_assert_generic(struct __sk_buff *ctx)
+{
+ u8 *data_end = (void *)(long)ctx->data_end;
+ u8 *data = (void *)(long)ctx->data;
+
+ bpf_assert(data + 64 <= data_end);
+ return data[128];
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R1 has smin=64 smax=64")
+int check_assert_with_return(void *ctx)
+{
+ bpf_assert_with(!ctx, 64);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_ext.c b/tools/testing/selftests/bpf/progs/exceptions_ext.c
new file mode 100644
index 000000000000..743c05185d9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_ext.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+SEC("?fentry")
+int pfentry(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry")
+int throwing_fentry(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline int exception_cb(u64 cookie)
+{
+ return cookie + 64;
+}
+
+SEC("?freplace")
+int extension(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_exception_cb_extension(u64 cookie)
+{
+ bpf_throw(32);
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_extension(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+SEC("?fexit")
+int pfexit(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fexit")
+int throwing_fexit(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?fmod_ret")
+int pfmod_ret(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fmod_ret")
+int throwing_fmod_ret(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
new file mode 100644
index 000000000000..8a0fdff89927
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+struct foo {
+ struct bpf_rb_node node;
+};
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root rbtree __contains(foo, node);
+
+__noinline void *exception_cb_bad_ret_type(u64 cookie)
+{
+ return NULL;
+}
+
+__noinline int exception_cb_bad_arg_0(void)
+{
+ return 0;
+}
+
+__noinline int exception_cb_bad_arg_2(int a, int b)
+{
+ return 0;
+}
+
+__noinline int exception_cb_ok_arg_small(int a)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_ret_type)
+__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.")
+int reject_exception_cb_type_1(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_0)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_2(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_2)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_3(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_ok_arg_small)
+__success
+int reject_exception_cb_type_4(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback subprog")
+int reject_async_callback_throw(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+ return bpf_timer_set_callback(&elem->timer, timer_cb);
+}
+
+__noinline static int subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_spin_lock(&lock);
+ if (ctx->len)
+ bpf_throw(0);
+ return ret;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_lock(void *ctx)
+{
+ bpf_spin_lock(&lock);
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_subprog_with_lock(void *ctx)
+{
+ return subprog_lock(ctx);
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
+int reject_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->len)
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
+int reject_subprog_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ return throwing_subprog(ctx);
+}
+
+static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2)
+{
+ bpf_throw(0);
+ return true;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_rbtree_add_throw(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&rbtree, &f->node, rbless);
+ bpf_spin_unlock(&lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_ref(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_cb_ref(u32 i, void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_cb_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ bpf_obj_drop(f);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_with_cb(void *ctx)
+{
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_subprog_reference(void *ctx)
+{
+ return subprog_ref(ctx) + 1;
+}
+
+__noinline int throwing_exception_cb(u64 c)
+{
+ bpf_throw(0);
+ return c;
+}
+
+__noinline int exception_cb1(u64 c)
+{
+ return c;
+}
+
+__noinline int exception_cb2(u64 c)
+{
+ return c;
+}
+
+static __noinline int static_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+__noinline int global_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+SEC("?tc")
+__exception_cb(throwing_exception_cb)
+__failure __msg("cannot be called from callback subprog")
+int reject_throwing_exception_cb(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_global_func(struct __sk_buff *ctx)
+{
+ return global_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_static_func(struct __sk_buff *ctx)
+{
+ return static_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__exception_cb(exception_cb2)
+__failure __msg("multiple exception callback tags for main subprog")
+int reject_multiple_exception_cb(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 16;
+}
+
+__noinline int exception_cb_bad_ret(u64 c)
+{
+ return c;
+}
+
+SEC("?fentry/bpf_check")
+__exception_cb(exception_cb_bad_ret)
+__failure __msg("At program exit the register R0 has unknown scalar value should")
+int reject_set_exception_cb_bad_ret1(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R1 has smin=64 smax=64 should")
+int reject_set_exception_cb_bad_ret2(void *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+__noinline static int loop_cb1(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int loop_cb2(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb(struct __sk_buff *ctx)
+{
+ bpf_loop(5, loop_cb1, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb_diff(struct __sk_buff *ctx)
+{
+ if (ctx->protocol)
+ bpf_loop(5, loop_cb1, NULL, 0);
+ else
+ bpf_loop(5, loop_cb2, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
new file mode 100644
index 000000000000..20d009e2d266
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int exception_triggered;
+int test_pid;
+
+/* TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ struct callback_head *work;
+ void *func;
+
+ if (test_pid != pid)
+ return 0;
+
+ /* To verify we hit an exception we dereference task->task_works->func.
+ * If task work has been added,
+ * - task->task_works is non-NULL; and
+ * - task->task_works->func is non-NULL also (the callback function
+ * must be specified for the task work.
+ *
+ * However, for a newly-created task, task->task_works is NULLed,
+ * so we know the exception handler triggered if task_works is
+ * NULL and func is NULL.
+ */
+ work = task->task_works;
+ func = work->func;
+ /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction.
+ * To workaround the issue, use barrier_var() and rewrite as below to
+ * prevent compiler from generating verifier-unfriendly code.
+ */
+ barrier_var(work);
+ if (work)
+ return 0;
+ barrier_var(func);
+ if (func)
+ return 0;
+ exception_triggered++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
diff --git a/tools/testing/selftests/bpf/progs/fentry_many_args.c b/tools/testing/selftests/bpf/progs/fentry_many_args.c
new file mode 100644
index 000000000000..b61bb92fee2c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_many_args.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive.c b/tools/testing/selftests/bpf/progs/fentry_recursive.c
new file mode 100644
index 000000000000..2c9fb5ac42b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains */
+SEC("fentry/XXX")
+int BPF_PROG(recursive_attach, int a)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
new file mode 100644
index 000000000000..267c876d0aba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains. It's going to be
+ * a start of the chain.
+ */
+SEC("fentry/bpf_testmod_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ return 0;
+}
+
+/* Dummy bpf prog for testing attach_btf presence when attaching an fentry
+ * program.
+ */
+SEC("raw_tp/sys_enter")
+int BPF_PROG(fentry_target, struct pt_regs *regs, long id)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 5f645fdaba6f..52a550d281d9 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -64,7 +64,7 @@ __u64 test7_result = 0;
SEC("fentry/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 49a84a3a2306..983b7c233382 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -73,10 +73,10 @@ int test_subprog2(struct args_subprog2 *ctx)
__builtin_preserve_access_index(&skb->len));
ret = ctx->ret;
- /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
- * which randomizes upper 32 bits after BPF_ALU32 insns.
- * Hence after 'w0 <<= 1' upper bits of $rax are random.
- * That is expected and correct. Trim them.
+ /* bpf_prog_test_load() loads "test_pkt_access.bpf.o" with
+ * BPF_F_TEST_RND_HI32 which randomizes upper 32 bits after BPF_ALU32
+ * insns. Hence after 'w0 <<= 1' upper bits of $rax are random. That is
+ * expected and correct. Trim them.
*/
ret = (__u32) ret;
if (len != 74 || ret != 148)
@@ -120,8 +120,6 @@ int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var)
void *data = (void *)(long)skb->data;
struct ipv6hdr ip6, *ip6p;
int ifindex = skb->ifindex;
- __u32 eth_proto;
- __u32 nh_off;
/* check that BPF extension can read packet via direct packet access */
if (data + 14 + sizeof(ip6) > data_end)
diff --git a/tools/testing/selftests/bpf/progs/fexit_many_args.c b/tools/testing/selftests/bpf/progs/fexit_many_args.c
new file mode 100644
index 000000000000..53b335c2dafb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_many_args.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fexit/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && ret == 133;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k,
+ int ret)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 ret)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
new file mode 100644
index 000000000000..106dc75efcc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char LICENSE[] SEC("license") = "GPL";
+
+int pid = 0;
+int fentry_cnt = 0;
+int fexit_cnt = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fentry(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ fentry_cnt++;
+ return 0;
+}
+
+SEC("fexit/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fexit(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ fexit_cnt++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 0952affb22a6..8f1ccb7302e1 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -65,7 +65,7 @@ __u64 test7_result = 0;
SEC("fexit/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
@@ -74,7 +74,7 @@ __u64 test8_result = 0;
SEC("fexit/bpf_fentry_test8")
int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
{
- if (arg->a == 0)
+ if (!arg->a)
test8_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c
new file mode 100644
index 000000000000..7b5dd2214ff4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fib_lookup.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_fib_lookup fib_params = {};
+int fib_lookup_ret = 0;
+int lookup_flags = 0;
+
+SEC("tc")
+int fib_lookup(struct __sk_buff *skb)
+{
+ fib_lookup_ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params),
+ lookup_flags);
+
+ return TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
new file mode 100644
index 000000000000..4d756b623557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct elem {
+ struct file *file;
+ struct bpf_task_work tw;
+};
+
+char user_buf[256000];
+char tmp_buf[256000];
+
+int pid = 0;
+int err, run_success = 0;
+
+static int validate_file_read(struct file *file);
+static int task_work_callback(struct bpf_map *map, void *key, void *value);
+
+SEC("lsm/file_open")
+int on_open_expect_fault(void *c)
+{
+ struct bpf_dynptr dynptr;
+ struct file *file;
+ int local_err = 1;
+ __u32 user_buf_sz = sizeof(user_buf);
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ file = bpf_get_task_exe_file(bpf_get_current_task_btf());
+ if (!file)
+ return 0;
+
+ if (bpf_dynptr_from_file(file, 0, &dynptr))
+ goto out;
+
+ local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0);
+ if (local_err == -EFAULT) { /* Expect page fault */
+ local_err = 0;
+ run_success = 1;
+ }
+out:
+ bpf_dynptr_file_discard(&dynptr);
+ if (local_err)
+ err = local_err;
+ bpf_put_file(file);
+ return 0;
+}
+
+SEC("lsm/file_open")
+int on_open_validate_file_read(void *c)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct elem *work;
+ int key = 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work) {
+ err = 1;
+ return 0;
+ }
+ bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+ return 0;
+}
+
+/* Called in a sleepable context, read 256K bytes, cross check with user space read data */
+static int task_work_callback(struct bpf_map *map, void *key, void *value)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct file *file = bpf_get_task_exe_file(task);
+
+ if (!file)
+ return 0;
+
+ err = validate_file_read(file);
+ if (!err)
+ run_success = 1;
+ bpf_put_file(file);
+ return 0;
+}
+
+static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len)
+{
+ int i;
+
+ if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0))
+ return 1;
+
+ /* Verify file contents read from BPF is the same as the one read from userspace */
+ bpf_for(i, 0, len)
+ {
+ if (tmp_buf[i] != user_buf[i])
+ return 1;
+ }
+ return 0;
+}
+
+static int validate_file_read(struct file *file)
+{
+ struct bpf_dynptr dynptr;
+ int loc_err = 1, off;
+ __u32 user_buf_sz = sizeof(user_buf);
+
+ if (bpf_dynptr_from_file(file, 0, &dynptr))
+ goto cleanup;
+
+ loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz);
+ off = 1;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+ off = user_buf_sz - 1;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+ /* Read file with random offset and length */
+ off = 4097;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100);
+
+ /* Adjust dynptr, verify read */
+ loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1);
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1);
+ /* Can't read more than 1 byte */
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0;
+ /* Can't read with far offset */
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0;
+
+cleanup:
+ bpf_dynptr_file_discard(&dynptr);
+ return loc_err;
+}
diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c
new file mode 100644
index 000000000000..32fe28ed2439
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+void *user_ptr;
+
+SEC("lsm/file_open")
+__failure
+__msg("Unreleased reference id=")
+int on_nanosleep_unreleased_ref(void *ctx)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct file *file = bpf_get_task_exe_file(task);
+ struct bpf_dynptr dynptr;
+
+ if (!file)
+ return 0;
+
+ err = bpf_dynptr_from_file(file, 0, &dynptr);
+ return err ? 1 : 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected a dynptr of type file as arg #0")
+int xdp_wrong_dynptr_type(struct xdp_md *xdp)
+{
+ struct bpf_dynptr dynptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &dynptr);
+ bpf_dynptr_file_discard(&dynptr);
+ return 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected an initialized dynptr as arg #0")
+int xdp_no_dynptr_type(struct xdp_md *xdp)
+{
+ struct bpf_dynptr dynptr;
+
+ bpf_dynptr_file_discard(&dynptr);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
new file mode 100644
index 000000000000..02b82774469c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+#define VM_EXEC 0x00000004
+#define DNAME_INLINE_LEN 32
+
+pid_t target_pid = 0;
+char d_iname[DNAME_INLINE_LEN] = {0};
+__u32 found_vm_exec = 0;
+__u64 addr = 0;
+int find_zero_ret = -1;
+int find_addr_ret = -1;
+
+static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ if (vma->vm_file)
+ bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
+ vma->vm_file->f_path.dentry->d_shortname.string);
+
+ /* check for VM_EXEC */
+ if (vma->vm_flags & VM_EXEC)
+ found_vm_exec = 1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* this should return -ENOENT */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
+
+SEC("perf_event")
+int handle_pe(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* In NMI, this should return -EBUSY, as the previous call is using
+ * the irq_work.
+ */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
new file mode 100644
index 000000000000..7ba9a428f228
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#define vm_flags vm_start
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to vma, which is illegal */
+ vma->vm_start = 0xffffffffff600000;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail2.c b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
new file mode 100644
index 000000000000..9bcf3203e26b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_task(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to task, which is illegal */
+ task->mm = NULL;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_task, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..52f6995ff29c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+const volatile int bypass_unused = 1;
+
+static __u64
+unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output = 0;
+ return 1;
+}
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ if (*key == 1)
+ return 1; /* stop the iteration */
+ return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ cpu = bpf_get_smp_processor_id();
+ percpu_val = *val;
+ return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+ if (!bypass_unused)
+ bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0);
+ arraymap_output = data.output;
+
+ bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..276994d5c0c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ struct __sk_buff *ctx;
+ int input;
+ int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ struct __sk_buff *skb = data->ctx;
+ __u32 k;
+ __u64 v;
+
+ if (skb) {
+ k = *key;
+ v = *val;
+ if (skb->len == 10000 && k == 10 && v == 10)
+ data->output = 3; /* impossible path */
+ else
+ data->output = 4;
+ } else {
+ data->output = data->input;
+ bpf_map_delete_elem(map, key);
+ }
+
+ return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *unused)
+{
+ struct callback_ctx data;
+
+ percpu_called++;
+ cpu = bpf_get_smp_processor_id();
+ percpu_key = *key;
+ percpu_val = *val;
+
+ data.ctx = 0;
+ data.input = 100;
+ data.output = 0;
+ bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ percpu_output = data.output;
+
+ return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.ctx = skb;
+ data.input = 10;
+ data.output = 0;
+ hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ hashmap_output = data.output;
+
+ percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+ (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+ bpf_map_delete_elem(map, key);
+ bpf_map_update_elem(map, key, val, 0);
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ (void)skb;
+
+ bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ void *data)
+{
+ bpf_get_current_comm(key, sizeof(*key));
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+ bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/for_each_multi_maps.c b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
new file mode 100644
index 000000000000..ff0bed7d4459
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+u32 data_output = 0;
+int use_array = 0;
+
+static __u64
+check_map_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ if (use_array)
+ bpf_for_each_map_elem(&arraymap, check_map_elem, &data, 0);
+ else
+ bpf_for_each_map_elem(&hashmap, check_map_elem, &data, 0);
+ data_output = data.output;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c
new file mode 100644
index 000000000000..4501ae8fc414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/free_timer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ENTRIES 8
+
+struct map_value {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, MAX_ENTRIES);
+} map SEC(".maps");
+
+static int timer_cb(void *map, void *key, struct map_value *value)
+{
+ volatile int sum = 0;
+ int i;
+
+ bpf_for(i, 0, 1024 * 1024) sum += i;
+
+ return 0;
+}
+
+static int start_cb(int key)
+{
+ struct map_value *value;
+
+ value = bpf_map_lookup_elem(&map, (void *)&key);
+ if (!value)
+ return 0;
+
+ bpf_timer_init(&value->timer, &map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(&value->timer, timer_cb);
+ /* Hope 100us will be enough to wake-up and run the overwrite thread */
+ bpf_timer_start(&value->timer, 100000, BPF_F_TIMER_CPU_PIN);
+
+ return 0;
+}
+
+static int overwrite_cb(int key)
+{
+ struct map_value zero = {};
+
+ /* Free the timer which may run on other CPU */
+ bpf_map_update_elem(&map, (void *)&key, &zero, BPF_ANY);
+
+ return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(start_timer)
+{
+ bpf_loop(MAX_ENTRIES, start_cb, NULL, 0);
+ return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(overwrite_timer)
+{
+ bpf_loop(MAX_ENTRIES, overwrite_cb, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
index bb2a77c5b62b..370a0e1922e0 100644
--- a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -23,7 +23,7 @@ struct {
SEC("freplace/handle_kprobe")
int new_handle_kprobe(struct pt_regs *ctx)
{
- struct hmap_elem zero = {}, *val;
+ struct hmap_elem *val;
int key = 0;
val = bpf_map_lookup_elem(&hash_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
index 68a5a9db928a..7e94412d47a5 100644
--- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -7,12 +7,12 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") sock_map = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+} sock_map SEC(".maps");
SEC("freplace/cls_redirect")
int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
index 544e5ac90461..d09bbd8ae8a8 100644
--- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -12,7 +12,7 @@
SEC("freplace/connect_v4_prog")
int new_connect_v4_prog(struct bpf_sock_addr *ctx)
{
- // return value thats in invalid range
+ // return value that's in invalid range
return 255;
}
diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
new file mode 100644
index 000000000000..e6a75f86cac6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace")
+int freplace_prog(void)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_global_func.c b/tools/testing/selftests/bpf/progs/freplace_global_func.c
new file mode 100644
index 000000000000..96cb61a6ce87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_global_func.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline
+int test_ctx_global_func(struct __sk_buff *skb)
+{
+ volatile int retval = 1;
+ return retval;
+}
+
+SEC("freplace/test_pkt_access")
+int new_test_pkt_access(struct __sk_buff *skb)
+{
+ return test_ctx_global_func(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_progmap.c b/tools/testing/selftests/bpf/progs/freplace_progmap.c
new file mode 100644
index 000000000000..81b56b9aa7d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_progmap.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __type(key, __u32);
+ __type(value, struct bpf_cpumap_val);
+ __uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_drop_prog(struct xdp_md *ctx)
+{
+ return XDP_DROP;
+}
+
+SEC("freplace")
+int xdp_cpumap_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 0, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
new file mode 100644
index 000000000000..624078abf3de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("freplace/btf_unreliable_kprobe")
+/* context type is what BPF verifier expects for kprobe context, but target
+ * program has `stuct whatever *ctx` argument, so freplace operation will be
+ * rejected with the following message:
+ *
+ * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *)
+ */
+int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
new file mode 100644
index 000000000000..511ac634eef0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_hits = 0;
+__u64 address_low = 0;
+__u64 address_high = 0;
+int wasted_entries = 0;
+long total_entries = 0;
+
+#define ENTRY_CNT 32
+struct perf_branch_entry entries[ENTRY_CNT] = {};
+
+static inline bool gbs_in_range(__u64 val)
+{
+ return (val >= address_low) && (val < address_high);
+}
+
+SEC("fexit/bpf_testmod_loop_test")
+int BPF_PROG(test1, int n, int ret)
+{
+ long i;
+
+ total_entries = bpf_get_branch_snapshot(entries, sizeof(entries), 0);
+ total_entries /= sizeof(struct perf_branch_entry);
+
+ for (i = 0; i < ENTRY_CNT; i++) {
+ if (i >= total_entries)
+ break;
+ if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to))
+ test1_hits++;
+ else if (!test1_hits)
+ wasted_entries++;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 6b42db2fe391..30fd504856c7 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -4,37 +4,18 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, __u32);
- __type(value, __u64);
-} cg_ids SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, __u32);
- __type(value, __u32);
-} pidmap SEC(".maps");
+__u64 cg_id;
+__u64 expected_pid;
SEC("tracepoint/syscalls/sys_enter_nanosleep")
int trace(void *ctx)
{
__u32 pid = bpf_get_current_pid_tgid();
- __u32 key = 0, *expected_pid;
- __u64 *val;
-
- expected_pid = bpf_map_lookup_elem(&pidmap, &key);
- if (!expected_pid || *expected_pid != pid)
- return 0;
- val = bpf_map_lookup_elem(&cg_ids, &key);
- if (val)
- *val = bpf_get_current_cgroup_id();
+ if (expected_pid == pid)
+ cg_id = bpf_get_current_cgroup_id();
return 0;
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
new file mode 100644
index 000000000000..e0f34a55e697
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test1_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+
+ /* We need to cast access to traced function argument values with
+ * proper type cast, because trampoline uses type specific instruction
+ * to save it, like for 'int a' with 32-bit mov like:
+ *
+ * mov %edi,-0x8(%rbp)
+ *
+ * so the upper 4 bytes are not zeroed.
+ */
+ test1_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test1_result &= err == -EINVAL;
+
+ /* return value fails in fentry */
+ err = bpf_get_func_ret(ctx, &ret);
+ test1_result &= err == -EOPNOTSUPP;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test2_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test2_result &= err == 0 && (int) a == 2;
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test2_result &= err == 0 && b == 3;
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test2_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test2_result &= err == 0 && ret == 5;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test3_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test3_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test3_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test3_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test3_result &= err == 0 && ret == 0;
+
+ /* change return value, it's checked in fexit_test program */
+ return 1234;
+}
+
+__u64 test4_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test4_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test4_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test4_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test4_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test4_result &= err == 0 && ret == 1234;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644
index 000000000000..2011cacdeb18
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern int bpf_fentry_test1(int a) __ksym;
+extern int bpf_modify_return_test(int a, int *b) __ksym;
+
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+
+/* This function is here to have CONFIG_X86_KERNEL_IBT
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+}
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test2_result = (const void *) addr == &bpf_fentry_test2;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test3_result = (const void *) addr == &bpf_fentry_test3;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test4_result = (const void *) addr == &bpf_fentry_test4;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test5_result = (const void *) addr == &bpf_modify_return_test;
+ return ret;
+}
+
+__u64 test6_result = 0;
+SEC("?kprobe")
+int test6(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test6_result = (const void *) addr == 0;
+ return 0;
+}
+
+unsigned long uprobe_trigger;
+
+__u64 test7_result = 0;
+SEC("uprobe//proc/self/exe:uprobe_trigger")
+int BPF_UPROBE(test7)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == (const void *) uprobe_trigger;
+ return 0;
+}
+
+__u64 test8_result = 0;
+SEC("uretprobe//proc/self/exe:uprobe_trigger")
+int BPF_URETPROBE(test8, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test8_result = (const void *) addr == (const void *) uprobe_trigger;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c
new file mode 100644
index 000000000000..052f8a4345a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned long uprobe_trigger_body;
+
+__u64 test1_result = 0;
+SEC("uprobe//proc/self/exe:uprobe_trigger_body+1")
+int BPF_UPROBE(test1)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == (const void *) uprobe_trigger_body + 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/getpeername4_prog.c b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
new file mode 100644
index 000000000000..4c97208cd25d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getpeername4")
+int getpeername_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername6_prog.c b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
new file mode 100644
index 000000000000..070e4d7f636c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getpeername6")
+int getpeername_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
new file mode 100644
index 000000000000..5a76754f846b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getpeername_unix")
+int getpeername_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname4_prog.c b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
new file mode 100644
index 000000000000..e298487c6347
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getsockname4")
+int getsockname_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname6_prog.c b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
new file mode 100644
index 000000000000..811d10cd5525
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getsockname6")
+int getsockname_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
new file mode 100644
index 000000000000..7867113c696f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getsockname_unix")
+int getsockname_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/htab_mem_bench.c b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
new file mode 100644
index 000000000000..b1b721b14d67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define OP_BATCH 64
+
+struct update_ctx {
+ unsigned int from;
+ unsigned int step;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, 4);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+unsigned char zeroed_value[4096];
+unsigned int nr_thread = 0;
+long op_cnt = 0;
+
+static int write_htab(unsigned int i, struct update_ctx *ctx, unsigned int flags)
+{
+ bpf_map_update_elem(&htab, &ctx->from, zeroed_value, flags);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+static int overwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, 0);
+}
+
+static int newwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, BPF_NOEXIST);
+}
+
+static int del_htab(unsigned int i, struct update_ctx *ctx)
+{
+ bpf_map_delete_elem(&htab, &ctx->from);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int overwrite(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int batch_add_batch_del(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+
+ update.from = bpf_get_smp_processor_id();
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+
+ __sync_fetch_and_add(&op_cnt, 2);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int add_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, newwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getppid")
+int del_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c
new file mode 100644
index 000000000000..7f7368cb3095
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_reuse.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct htab_val {
+ struct bpf_spin_lock lock;
+ unsigned int data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, unsigned int);
+ __type(value, struct htab_val);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
new file mode 100644
index 000000000000..195d3b2fba00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Map value type: has BTF-managed field (bpf_timer) */
+struct val {
+ struct bpf_timer t;
+ __u64 payload;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct val);
+} htab SEC(".maps");
+
+int pid = 0;
+int update_err = 0;
+
+SEC("?fentry/bpf_obj_free_fields")
+int bpf_obj_free_fields(void *ctx)
+{
+ __u32 key = 0;
+ struct val value = { .payload = 1 };
+
+ if ((bpf_get_current_pid_tgid() >> 32) != pid)
+ return 0;
+
+ update_err = bpf_map_update_elem(&htab, &key, &value, BPF_ANY);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index 96060ff4ffc6..e16a2c208481 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -18,8 +18,12 @@ struct {
char _license[] SEC("license") = "GPL";
-SEC("lsm.s/bprm_committed_creds")
-void BPF_PROG(ima, struct linux_binprm *bprm)
+bool use_ima_file_hash;
+bool enable_bprm_creds_for_exec;
+bool enable_kernel_read_file;
+bool test_deny;
+
+static void ima_test_common(struct file *file)
{
u64 ima_hash = 0;
u64 *sample;
@@ -28,8 +32,12 @@ void BPF_PROG(ima, struct linux_binprm *bprm)
pid = bpf_get_current_pid_tgid() >> 32;
if (pid == monitored_pid) {
- ret = bpf_ima_inode_hash(bprm->file->f_inode, &ima_hash,
- sizeof(ima_hash));
+ if (!use_ima_file_hash)
+ ret = bpf_ima_inode_hash(file->f_inode, &ima_hash,
+ sizeof(ima_hash));
+ else
+ ret = bpf_ima_file_hash(file, &ima_hash,
+ sizeof(ima_hash));
if (ret < 0 || ima_hash == 0)
return;
@@ -43,3 +51,53 @@ void BPF_PROG(ima, struct linux_binprm *bprm)
return;
}
+
+static int ima_test_deny(void)
+{
+ u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid == monitored_pid && test_deny)
+ return -EPERM;
+
+ return 0;
+}
+
+SEC("lsm.s/bprm_committed_creds")
+void BPF_PROG(bprm_committed_creds, struct linux_binprm *bprm)
+{
+ ima_test_common(bprm->file);
+}
+
+SEC("lsm.s/bprm_creds_for_exec")
+int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm)
+{
+ if (!enable_bprm_creds_for_exec)
+ return 0;
+
+ ima_test_common(bprm->file);
+ return 0;
+}
+
+SEC("lsm.s/kernel_read_file")
+int BPF_PROG(kernel_read_file, struct file *file, enum kernel_read_file_id id,
+ bool contents)
+{
+ int ret;
+
+ if (!enable_kernel_read_file)
+ return 0;
+
+ if (!contents)
+ return 0;
+
+ if (id != READING_POLICY)
+ return 0;
+
+ ret = ima_test_deny();
+ if (ret < 0)
+ return ret;
+
+ ima_test_common(file);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/inner_array_lookup.c b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
new file mode 100644
index 000000000000..c2c8f2fa451d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map1 SEC(".maps");
+
+struct outer_map {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __array(values, struct inner_map);
+} outer_map1 SEC(".maps") = {
+ .values = {
+ [2] = &inner_map1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+ int outer_key = 2, inner_key = 3;
+ int *val;
+ void *map;
+
+ map = bpf_map_lookup_elem(&outer_map1, &outer_key);
+ if (!map)
+ return 1;
+
+ val = bpf_map_lookup_elem(map, &inner_key);
+ if (!val)
+ return 1;
+
+ if (*val == 1)
+ *val = 2;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
new file mode 100644
index 000000000000..0e87ad1ebcfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+#define NEXTHDR_FRAGMENT 44
+
+volatile int shootdowns = 0;
+
+static bool is_frag_v4(struct iphdr *iph)
+{
+ int offset;
+ int flags;
+
+ offset = bpf_ntohs(iph->frag_off);
+ flags = offset & ~IP_OFFSET;
+ offset &= IP_OFFSET;
+ offset <<= 3;
+
+ return (flags & IP_MF) || offset;
+}
+
+static bool is_frag_v6(struct ipv6hdr *ip6h)
+{
+ /* Simplifying assumption that there are no extension headers
+ * between fixed header and fragmentation header. This assumption
+ * is only valid in this test case. It saves us the hassle of
+ * searching all potential extension headers.
+ */
+ return ip6h->nexthdr == NEXTHDR_FRAGMENT;
+}
+
+static int handle_v4(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ u8 iph_buf[20] = {};
+ struct iphdr *iph;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_DROP;
+
+ iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf));
+ if (!iph)
+ return NF_DROP;
+
+ /* Shootdown any frags */
+ if (is_frag_v4(iph)) {
+ shootdowns++;
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+static int handle_v6(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ipv6hdr *ip6h;
+ u8 ip6h_buf[40] = {};
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_DROP;
+
+ ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf));
+ if (!ip6h)
+ return NF_DROP;
+
+ /* Shootdown any frags */
+ if (is_frag_v6(ip6h)) {
+ shootdowns++;
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+SEC("netfilter")
+int defrag(struct bpf_nf_ctx *ctx)
+{
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
+
+ switch (bpf_ntohs(ctx->skb->protocol)) {
+ case ETH_P_IP:
+ return handle_v4(skb);
+ case ETH_P_IPV6:
+ return handle_v6(skb);
+ default:
+ return NF_ACCEPT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
new file mode 100644
index 000000000000..74d912b22de9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+unsigned long global_flags;
+
+extern void bpf_local_irq_save(unsigned long *) __weak __ksym;
+extern void bpf_local_irq_restore(unsigned long *) __weak __ksym;
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_save_bad_arg(struct __sk_buff *ctx)
+{
+ bpf_local_irq_save(&global_flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_restore_bad_arg(struct __sk_buff *ctx)
+{
+ bpf_local_irq_restore(&global_flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags2);
+ bpf_local_irq_save(&flags3);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags2);
+ bpf_local_irq_save(&flags3);
+ bpf_local_irq_restore(&flags3);
+ bpf_local_irq_restore(&flags2);
+ return 0;
+}
+
+static __noinline void local_irq_save(unsigned long *flags)
+{
+ bpf_local_irq_save(flags);
+}
+
+static __noinline void local_irq_restore(unsigned long *flags)
+{
+ bpf_local_irq_restore(flags);
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_1_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags;
+
+ local_irq_save(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+
+ local_irq_save(&flags1);
+ local_irq_save(&flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save(&flags1);
+ local_irq_save(&flags2);
+ local_irq_save(&flags3);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save(&flags1);
+ local_irq_save(&flags2);
+ local_irq_save(&flags3);
+ local_irq_restore(&flags3);
+ local_irq_restore(&flags2);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance(struct __sk_buff *ctx)
+{
+ unsigned long flags;
+
+ local_irq_save(&flags);
+ local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance_n(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save(&flags1);
+ local_irq_save(&flags2);
+ local_irq_save(&flags3);
+ local_irq_restore(&flags3);
+ local_irq_restore(&flags2);
+ local_irq_restore(&flags1);
+ return 0;
+}
+
+static __noinline void local_irq_balance(void)
+{
+ unsigned long flags;
+
+ local_irq_save(&flags);
+ local_irq_restore(&flags);
+}
+
+static __noinline void local_irq_balance_n(void)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save(&flags1);
+ local_irq_save(&flags2);
+ local_irq_save(&flags3);
+ local_irq_restore(&flags3);
+ local_irq_restore(&flags2);
+ local_irq_restore(&flags1);
+}
+
+SEC("?tc")
+__success
+int irq_balance_subprog(struct __sk_buff *ctx)
+{
+ local_irq_balance();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int irq_sleepable_helper(void *ctx)
+{
+ unsigned long flags;
+ u32 data;
+
+ local_irq_save(&flags);
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region")
+int irq_sleepable_kfunc(void *ctx)
+{
+ unsigned long flags;
+ u32 data;
+
+ local_irq_save(&flags);
+ bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+ local_irq_restore(&flags);
+ return 0;
+}
+
+int __noinline global_local_irq_balance(void)
+{
+ local_irq_balance_n();
+ return 0;
+}
+
+SEC("?tc")
+__success
+int irq_global_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_local_irq_balance();
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags2);
+ bpf_local_irq_restore(&flags1);
+ bpf_local_irq_restore(&flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags2);
+ bpf_local_irq_restore(&flags2);
+ bpf_local_irq_save(&flags3);
+ bpf_local_irq_restore(&flags1);
+ bpf_local_irq_restore(&flags3);
+ return 0;
+}
+
+static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2,
+ unsigned long *flags3)
+{
+ local_irq_save(flags1);
+ local_irq_save(flags2);
+ local_irq_save(flags3);
+}
+
+SEC("?tc")
+__success
+int irq_restore_3_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save_3(&flags1, &flags2, &flags3);
+ bpf_local_irq_restore(&flags3);
+ bpf_local_irq_restore(&flags2);
+ bpf_local_irq_restore(&flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_4_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+ unsigned long flags4;
+
+ local_irq_save_3(&flags1, &flags2, &flags3);
+ bpf_local_irq_restore(&flags3);
+ bpf_local_irq_save(&flags4);
+ bpf_local_irq_restore(&flags4);
+ bpf_local_irq_restore(&flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3_subprog(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags2;
+ unsigned long flags3;
+
+ local_irq_save_3(&flags1, &flags2, &flags3);
+ bpf_local_irq_restore(&flags3);
+ bpf_local_irq_restore(&flags2);
+ bpf_local_irq_save(&flags3);
+ bpf_local_irq_restore(&flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_invalid(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+ unsigned long flags = 0xfaceb00c;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected uninitialized")
+int irq_save_invalid(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+
+ bpf_local_irq_save(&flags1);
+ bpf_local_irq_save(&flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_iter(struct __sk_buff *ctx)
+{
+ struct bpf_iter_num it;
+
+ bpf_iter_num_new(&it, 0, 42);
+ bpf_local_irq_restore((unsigned long *)&it);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=1")
+int irq_save_iter(struct __sk_buff *ctx)
+{
+ struct bpf_iter_num it;
+
+ /* Ensure same sized slot has st->ref_obj_id set, so we reject based on
+ * slot_type != STACK_IRQ_FLAG...
+ */
+ _Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size");
+
+ bpf_iter_num_new(&it, 0, 42);
+ bpf_local_irq_save((unsigned long *)&it);
+ bpf_local_irq_restore((unsigned long *)&it);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite(struct __sk_buff *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ flags = 0xdeadbeef;
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite_partial(struct __sk_buff *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ *(((char *)&flags) + 1) = 0xff;
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_refs_array(struct __sk_buff *ctx)
+{
+ unsigned long flags[4];
+ struct { int i; } *p;
+
+ /* refs=1 */
+ bpf_local_irq_save(&flags[0]);
+
+ /* refs=1,2 */
+ p = bpf_obj_new(typeof(*p));
+ if (!p) {
+ bpf_local_irq_restore(&flags[0]);
+ return 0;
+ }
+
+ /* refs=1,2,3 */
+ bpf_local_irq_save(&flags[1]);
+
+ /* refs=1,2,3,4 */
+ bpf_local_irq_save(&flags[2]);
+
+ /* Now when we remove ref=2, the verifier must not break the ordering in
+ * the refs array between 1,3,4. With an older implementation, the
+ * verifier would swap the last element with the removed element, but to
+ * maintain the stack property we need to use memmove.
+ */
+ bpf_obj_drop(p);
+
+ /* Save and restore to reset active_irq_id to 3, as the ordering is now
+ * refs=1,4,3. When restoring the linear scan will find prev_id in order
+ * as 3 instead of 4.
+ */
+ bpf_local_irq_save(&flags[3]);
+ bpf_local_irq_restore(&flags[3]);
+
+ /* With the incorrect implementation, we can release flags[1], flags[2],
+ * and flags[0], i.e. in the wrong order.
+ */
+ bpf_local_irq_restore(&flags[1]);
+ bpf_local_irq_restore(&flags[2]);
+ bpf_local_irq_restore(&flags[0]);
+ return 0;
+}
+
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__success
+int irq_non_sleepable_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_helper_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_sleepable_helper_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_global_subprog_indirect(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog_calling_sleepable_global(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_lock_cond_inv(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&lockB, &flags2)) {
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+ }
+
+ bpf_res_spin_unlock_irqrestore(&lockB, &flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_1(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ /* For now, bpf_local_irq_restore is not allowed in critical section,
+ * but this test ensures error will be caught with kfunc_class when it's
+ * opened up. Tested by temporarily permitting this kfunc in critical
+ * section.
+ */
+ bpf_local_irq_restore(&flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_2(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ bpf_local_irq_save(&flags1);
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags2))
+ return 0;
+ bpf_local_irq_restore(&flags2);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
new file mode 100644
index 000000000000..7dd92a303bf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -0,0 +1,1929 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_compiler.h"
+
+static volatile int zero = 0;
+
+int my_pid;
+int arr[256];
+int small_arr[16] SEC(".data.small_arr");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10);
+ __type(key, int);
+ __type(value, int);
+} amap SEC(".maps");
+
+#ifdef REAL_TEST
+#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
+#else
+#define MY_PID_GUARD() ({ })
+#endif
+
+SEC("?raw_tp")
+__failure __msg("math between map_value pointer and register with unbounded min value is not allowed")
+int iter_err_unsafe_c_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = zero; /* obscure initial value of i */
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 1000);
+ while ((v = bpf_iter_num_next(&it))) {
+ i++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ small_arr[i] = 123; /* invalid */
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("unbounded memory access")
+int iter_err_unsafe_asm_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ MY_PID_GUARD();
+
+ asm volatile (
+ "r6 = %[zero];" /* iteration counter */
+ "r1 = %[it];" /* iterator state */
+ "r2 = 0;"
+ "r3 = 1000;"
+ "r4 = 1;"
+ "call %[bpf_iter_num_new];"
+ "loop:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto out;"
+ "r6 += 1;"
+ "goto loop;"
+ "out:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = %[small_arr];"
+ "r2 = r6;"
+ "r2 <<= 2;"
+ "r1 += r2;"
+ "*(u32 *)(r1 + 0) = r6;" /* invalid */
+ :
+ : [it]"r"(&it),
+ [small_arr]"r"(small_arr),
+ [zero]"r"(zero),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop_auto_cleanup(const void *ctx)
+{
+ __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ /* (!) no explicit bpf_iter_num_destroy() */
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_for_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_each_macro(const void *ctx)
+{
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_for_each(num, v, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_macro(const void *ctx)
+{
+ int i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", i);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_pragma_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 2);
+ __pragma_loop_no_unroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_manual_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_multiple_sequential_loops(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 0, 2);
+ __pragma_loop_no_unroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_limit_cond_break_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = 0, sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v);
+ sum += *v;
+
+ i++;
+ if (i > 3)
+ break;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_SIMPLE: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_obfuscate_counter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ /* Make i's initial value unknowable for verifier to prevent it from
+ * pruning if/else branch inside the loop body and marking i as precise.
+ */
+ int i = zero;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ int x;
+
+ i += 1;
+
+ /* If we initialized i as `int i = 0;` above, verifier would
+ * track that i becomes 1 on first iteration after increment
+ * above, and here verifier would eagerly prune else branch
+ * and mark i as precise, ruining open-coded iterator logic
+ * completely, as each next iteration would have a different
+ * *precise* value of i, and thus there would be no
+ * convergence of state. This would result in reaching maximum
+ * instruction limit, no matter what the limit is.
+ */
+ if (i == 1)
+ x = 123;
+ else
+ x = i * 3 + 1;
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x);
+
+ sum += x;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_search_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, *elem = NULL;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SEARCH_LOOP: v=%d", *v);
+
+ if (*v == 2) {
+ found = true;
+ elem = v;
+ barrier_var(elem);
+ }
+ }
+
+ /* should fail to verify if bpf_iter_num_destroy() is here */
+
+ if (found)
+ /* here found element will be wrong, we should have copied
+ * value to a variable, but here we want to make sure we can
+ * access memory after the loop anyways
+ */
+ bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem);
+ else
+ bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n");
+
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_array_fill(const void *ctx)
+{
+ int sum, i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ arr[i] = i * 2;
+ }
+
+ sum = 0;
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ sum += arr[i];
+ }
+
+ bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256);
+
+ return 0;
+}
+
+static int arr2d[4][5];
+static int arr2d_row_sums[4];
+static int arr2d_col_sums[5];
+
+SEC("raw_tp")
+__success
+int iter_nested_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+ }
+
+ bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_nested_deeply_iters(const void *ctx)
+{
+ int sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ sum += 1;
+ }
+ }
+ }
+ }
+ /* validate that we can break from inside bpf_repeat() */
+ break;
+ }
+
+ return sum;
+}
+
+static __noinline void fill_inner_dimension(int row)
+{
+ int col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+}
+
+static __noinline int sum_inner_dimension(int row)
+{
+ int sum = 0, col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_subprog_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ fill_inner_dimension(row);
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ sum += sum_inner_dimension(row);
+ }
+
+ bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d",
+ row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1000);
+} hash_map SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int iter_err_too_permissive1(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = NULL;
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive2(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&hash_map, &key);
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive3(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&hash_map, &key);
+ found = true;
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_tricky_but_fine(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&hash_map, &key);
+ if (map_val) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0)
+
+SEC("raw_tp")
+__success
+int iter_stack_array_loop(const void *ctx)
+{
+ long arr1[16], arr2[16], sum = 0;
+ int i;
+
+ MY_PID_GUARD();
+
+ /* zero-init arr1 and arr2 in such a way that verifier doesn't know
+ * it's all zeros; if we don't do that, we'll make BPF verifier track
+ * all combination of zero/non-zero stack slots for arr1/arr2, which
+ * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different
+ * states
+ */
+ __bpf_memzero(arr1, sizeof(arr1));
+ __bpf_memzero(arr2, sizeof(arr1));
+
+ /* validate that we can break and continue when using bpf_for() */
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ if (i & 1) {
+ arr1[i] = i;
+ continue;
+ } else {
+ arr2[i] = i;
+ break;
+ }
+ }
+
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ sum += arr1[i] + arr2[i];
+ }
+
+ return sum;
+}
+
+static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
+{
+ int *t, i;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if (i >= n)
+ break;
+ arr[i] = i * mul;
+ }
+}
+
+static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
+{
+ int *t, i, sum = 0;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if ((__u32)i >= n)
+ break;
+ sum += arr[i];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_pass_iter_ptr_to_subprog(const void *ctx)
+{
+ int arr1[16], arr2[32];
+ struct bpf_iter_num it;
+ int n, sum1, sum2;
+
+ MY_PID_GUARD();
+
+ /* fill arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr1, n, 2);
+ bpf_iter_num_destroy(&it);
+
+ /* fill arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr2, n, 10);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ sum1 = sum(&it, arr1, n);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ sum2 = sum(&it, arr2, n);
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("R1 type=scalar expected=fp")
+__naked int delayed_read_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
+ * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read mark.
+ * Loop body with r7=0xdead would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of read mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r7 = &fp[-16]
+ * fp[-16] = 0
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r6++
+ * if (r6 != 42) {
+ * r7 = 0xdead
+ * continue;
+ * }
+ * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r7 = r10;"
+ "r7 += -16;"
+ "r0 = 0;"
+ "*(u64 *)(r7 + 0) = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "r6 += 1;"
+ "if r6 != 42 goto 3f;"
+ "r7 = 0xdead;"
+ "goto 1b;"
+ "3:"
+ "r1 = r7;"
+ "r2 = 8;"
+ "r3 = 0xdeadbeef;"
+ "call %[bpf_probe_read_user];"
+ "goto 1b;"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__naked int delayed_precision_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The test is similar to delayed_iter_mark but verifies that incomplete
+ * precision don't fool verifier.
+ * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
+ * State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read
+ * and precision marks.
+ * Loop body with r7=-32 would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of precision mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r8 = 0
+ * fp[-16] = 0
+ * r7 = -16
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (r6 != 42) {
+ * r7 = -32
+ * r6 = bpf_get_prandom_u32()
+ * continue;
+ * }
+ * r0 = r10
+ * r0 += r7
+ * r8 = *(u64 *)(r0 + 0) // this is not safe
+ * r6 = bpf_get_prandom_u32()
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return r8
+ */
+ asm volatile (
+ "r8 = 0;"
+ "*(u64 *)(r10 - 16) = r8;"
+ "r7 = -16;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;\n"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "if r6 != 42 goto 3f;"
+ "r7 = -33;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "3:"
+ "r0 = r10;"
+ "r0 += r7;"
+ "r8 = *(u64 *)(r0 + 0);"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = r8;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps1(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with c=-25 are explored only on a second iteration
+ * of the outer loop;
+ * - states with read+precise mark on c are explored only on
+ * second iteration of the inner loop and in a state which
+ * is pushed to states stack first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * a = 0;
+ * b = 0;
+ * c = -25;
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -25;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with read+precise mark on c are explored only on a second
+ * iteration of the first inner loop and in a state which is pushed to
+ * states stack first.
+ * - states with c=-25 are explored only on a second iteration of the
+ * second inner loop and in a state which is pushed to states stack
+ * first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * a = 0;
+ * c = -25;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+
+ /* first inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* second inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i2_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i2_loop_end_%=;"
+ "check2_one_r6_%=:"
+ "if r6 != 1 goto check2_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i2_loop_%=;"
+ "check2_zero_r6_%=:"
+ "if r6 != 0 goto i2_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check2_one_r7_%=;"
+ "goto i2_loop_%=;"
+ "check2_one_r7_%=:"
+ "if r7 != 1 goto i2_loop_%=;"
+ "r6 = 0;"
+ "r8 = -25;"
+ "goto i2_loop_%=;"
+ "i2_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ "r6 = 0;"
+ "r7 = 0;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps3(void)
+{
+ /* This is equivalent to a C program below.
+ *
+ * if (random() != 24) { // assume false branch is placed first
+ * i = iter_new(); // fp[-8]
+ * while (iter_next(i));
+ * iter_destroy(i);
+ * return;
+ * }
+ *
+ * for (i = 10; i > 0; i--); // increase dfs_depth for child states
+ *
+ * i = iter_new(); // fp[-8]
+ * b = -24; // r8
+ * for (;;) { // checkpoint (L)
+ * if (iter_next(i)) // checkpoint (N)
+ * break;
+ * if (random() == 77) { // assume false branch is placed first
+ * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25
+ * iter_destroy(i);
+ * return;
+ * }
+ * if (random() == 42) { // assume false branch is placed first
+ * b = -25;
+ * }
+ * }
+ * iter_destroy(i);
+ *
+ * In case of a buggy verifier first loop might poison
+ * env->cur_state->loop_entry with a state having 0 branches
+ * and small dfs_depth. This would trigger NOT_EXACT states
+ * comparison for some states within second loop.
+ * Specifically, checkpoint (L) might be problematic if:
+ * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet;
+ * - checkpoint (L) is first reached in state {b=-24};
+ * - traversal is pruned at checkpoint (N) setting checkpoint's (L)
+ * branch count to 0, thus making it eligible for use in pruning;
+ * - checkpoint (L) is next reached in state {b=-25},
+ * this would cause NOT_EXACT comparison with a state {b=-24}
+ * while 'b' is not marked precise yet.
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 24 goto 2f;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 5;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 != 0 goto 1b;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "2:"
+ /* loop to increase dfs_depth */
+ "r0 = 10;"
+ "3:"
+ "r0 -= 1;"
+ "if r0 != 0 goto 3b;"
+ /* end of loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r8 = -24;"
+ "main_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto main_loop_end_%=;"
+ /* first if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 77 goto unsafe_write_%=;"
+ /* second if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto poison_r8_%=;"
+ /* iterate */
+ "goto main_loop_%=;"
+ "main_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+
+ "unsafe_write_%=:"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto main_loop_end_%=;"
+
+ "poison_r8_%=:"
+ "r8 = -25;"
+ "goto main_loop_%=;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int triple_continue(void)
+{
+ /* This is equivalent to C program below.
+ * High branching factor of the loop body turned out to be
+ * problematic for one of the iterator convergence tracking
+ * algorithms explored.
+ *
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * r0 += 0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "r0 += 0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int widen_spill(void)
+{
+ /* This is equivalent to C program below.
+ * The counter is stored in fp[-16], if this counter is not widened
+ * verifier states representing loop iterations would never converge.
+ *
+ * fp[-16] = 0
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r0 = fp[-16];
+ * r0 += 1;
+ * fp[-16] = r0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "r0 = *(u64 *)(r10 - 16);"
+ "r0 += 1;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("raw_tp")
+__success
+__naked int checkpoint_states_deletion(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * int *a, *b, *c, *d, *e, *f;
+ * int i, sum = 0;
+ * bpf_for(i, 0, 10) {
+ * a = bpf_map_lookup_elem(&amap, &i);
+ * b = bpf_map_lookup_elem(&amap, &i);
+ * c = bpf_map_lookup_elem(&amap, &i);
+ * d = bpf_map_lookup_elem(&amap, &i);
+ * e = bpf_map_lookup_elem(&amap, &i);
+ * f = bpf_map_lookup_elem(&amap, &i);
+ * if (a) sum += 1;
+ * if (b) sum += 1;
+ * if (c) sum += 1;
+ * if (d) sum += 1;
+ * if (e) sum += 1;
+ * if (f) sum += 1;
+ * }
+ * return 0;
+ *
+ * The body of the loop spawns multiple simulation paths
+ * with different combination of NULL/non-NULL information for a/b/c/d/e/f.
+ * Each combination is unique from states_equal() point of view.
+ * Explored states checkpoint is created after each iterator next call.
+ * Iterator convergence logic expects that eventually current state
+ * would get equal to one of the explored states and thus loop
+ * exploration would be finished (at-least for a specific path).
+ * Verifier evicts explored states with high miss to hit ratio
+ * to to avoid comparing current state with too many explored
+ * states per instruction.
+ * This test is designed to "stress test" eviction policy defined using formula:
+ *
+ * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
+ *
+ * Currently N is set to 64, which allows for 6 variables in this test.
+ */
+ asm volatile (
+ "r6 = 0;" /* a */
+ "r7 = 0;" /* b */
+ "r8 = 0;" /* c */
+ "*(u64 *)(r10 - 24) = r6;" /* d */
+ "*(u64 *)(r10 - 32) = r6;" /* e */
+ "*(u64 *)(r10 - 40) = r6;" /* f */
+ "r9 = 0;" /* sum */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+
+ "*(u64 *)(r10 - 16) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r6 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r7 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r8 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 24) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 32) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 40) = r0;"
+
+ "if r6 == 0 goto +1;"
+ "r9 += 1;"
+ "if r7 == 0 goto +1;"
+ "r9 += 1;"
+ "if r8 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 24);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 32);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 40);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm_addr(amap)
+ : __clobber_all
+ );
+}
+
+struct {
+ int data[32];
+ int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+ int i, n = loop_data.n, sum = 0;
+
+ if (n > ARRAY_SIZE(loop_data.data))
+ return 0;
+
+ bpf_for(i, 0, n) {
+ /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+ sum += loop_data.data[i];
+ }
+
+ return sum;
+}
+
+__u32 upper, select_n, result;
+__u64 global;
+
+static __noinline bool nest_2(char *str)
+{
+ /* some insns (including branch insns) to ensure stacksafe() is triggered
+ * in nest_2(). This way, stacksafe() can compare frame associated with nest_1().
+ */
+ if (str[0] == 't')
+ return true;
+ if (str[1] == 'e')
+ return true;
+ if (str[2] == 's')
+ return true;
+ if (str[3] == 't')
+ return true;
+ return false;
+}
+
+static __noinline bool nest_1(int n)
+{
+ /* case 0: allocate stack, case 1: no allocate stack */
+ switch (n) {
+ case 0: {
+ char comm[16];
+
+ if (bpf_get_current_comm(comm, 16))
+ return false;
+ return nest_2(comm);
+ }
+ case 1:
+ return nest_2((char *)&global);
+ default:
+ return false;
+ }
+}
+
+SEC("raw_tp")
+__success
+int iter_subprog_check_stacksafe(const void *ctx)
+{
+ long i;
+
+ bpf_for(i, 0, upper) {
+ if (!nest_1(select_n)) {
+ result = 1;
+ return 0;
+ }
+ }
+
+ result = 2;
+ return 0;
+}
+
+struct bpf_iter_num global_it;
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_new_bad_arg(const void *ctx)
+{
+ bpf_iter_num_new(&global_it, 0, 1);
+ return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_next_bad_arg(const void *ctx)
+{
+ bpf_iter_num_next(&global_it);
+ return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_destroy_bad_arg(const void *ctx)
+{
+ bpf_iter_num_destroy(&global_it);
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int clean_live_states(const void *ctx)
+{
+ char buf[1];
+ int i, j, k, l, m, n, o;
+
+ bpf_for(i, 0, 10)
+ bpf_for(j, 0, 10)
+ bpf_for(k, 0, 10)
+ bpf_for(l, 0, 10)
+ bpf_for(m, 0, 10)
+ bpf_for(n, 0, 10)
+ bpf_for(o, 0, 10) {
+ if (unlikely(bpf_get_prandom_u32()))
+ buf[0] = 42;
+ bpf_printk("%s", buf);
+ }
+ return 0;
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "call noop;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int noop(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32())) {
+ * r6 = -31;
+ * jump_into_loop:
+ * goto +0;
+ * goto loop;
+ * }
+ * if (unlikely(bpf_get_prandom_u32()))
+ * goto jump_into_loop;
+ * loop:
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto jump_into_loop_%=;"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "jump_into_loop_%=: "
+ "goto +0;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state3(void)
+{
+ /*
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * loop1(-32, &fp[-8])
+ * loop1_wrapper(&fp[-8])
+ * bpf_iter_num_destroy(&fp[-8])
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ /* call #1 */
+ "r1 = -32;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call loop1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ /* call #2 */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call loop1_wrapper;"
+ /* return */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1(void)
+{
+ /*
+ * int loop1(num, iter) {
+ * r6 = num;
+ * r7 = iter;
+ * while (bpf_iter_num_next(r7)) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * return 0
+ * }
+ */
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "loop_%=:"
+ "r1 = r7;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1_wrapper(void)
+{
+ /*
+ * int loop1_wrapper(iter) {
+ * r6 = -32;
+ * r7 = iter;
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * loop1(r6, r7);
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "r6 = -32;"
+ "r7 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "r1 = r6;"
+ "r2 = r7;"
+ "call loop1;"
+ "r0 = 0;"
+ "exit;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
new file mode 100644
index 000000000000..ec1f6c2f590b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+u64 root_cg_id, leaf_cg_id;
+u64 first_cg_id, last_cg_id;
+
+int pre_order_cnt, post_order_cnt, tree_high;
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_css_for_each(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *root_css, *leaf_css, *pos;
+ struct cgroup *root_cgrp, *leaf_cgrp, *cur_cgrp;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+
+ root_cgrp = bpf_cgroup_from_id(root_cg_id);
+
+ if (!root_cgrp)
+ return 0;
+
+ leaf_cgrp = bpf_cgroup_from_id(leaf_cg_id);
+
+ if (!leaf_cgrp) {
+ bpf_cgroup_release(root_cgrp);
+ return 0;
+ }
+ root_css = &root_cgrp->self;
+ leaf_css = &leaf_cgrp->self;
+ pre_order_cnt = post_order_cnt = tree_high = 0;
+ first_cg_id = last_cg_id = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ cur_cgrp = pos->cgroup;
+ post_order_cnt++;
+ last_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_PRE) {
+ cur_cgrp = pos->cgroup;
+ pre_order_cnt++;
+ if (!first_cg_id)
+ first_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high++;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high--;
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(root_cgrp);
+ bpf_cgroup_release(leaf_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c
new file mode 100644
index 000000000000..9ac758649cb8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css_task.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+
+pid_t target_pid;
+int css_task_cnt;
+u64 cg_id;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ if (cur_task->pid != target_pid)
+ return ret;
+
+ cgrp = bpf_cgroup_from_id(cg_id);
+
+ if (!cgrp)
+ return -EPERM;
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS)
+ if (task->pid == target_pid)
+ css_task_cnt++;
+
+ bpf_cgroup_release(cgrp);
+
+ return -EPERM;
+}
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+SEC("?iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ /* epilogue */
+ if (cgrp == NULL) {
+ BPF_SEQ_PRINTF(seq, "epilogue\n");
+ return 0;
+ }
+
+ /* prologue */
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "prologue\n");
+
+ BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+ if (task->pid == target_pid)
+ css_task_cnt++;
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int BPF_PROG(iter_css_task_for_each_sleep)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
new file mode 100644
index 000000000000..d00fd570255a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int consume_first_item_only(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ "r0 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R0 invalid mem access 'scalar'")
+int missing_null_check_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ /* FAIL: deref with no NULL check */
+ "r1 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("invalid access to memory, mem_size=4 off=0 size=8")
+__msg("R0 min value is outside of the allowed memory range")
+int wrong_sized_read_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ /* FAIL: deref more than available 4 bytes */
+ "r0 = *(u64 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+int simplest_loop(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r6 = 0;" /* init sum */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto 2f;"
+ "r0 = *(u32 *)(r0 + 0);"
+ "r6 += r0;" /* accumulate sum */
+ "goto 1b;"
+
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
+
+__used
+static void iterator_with_diff_stack_depth(int x)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "if r1 == 42 goto 0f;"
+ "*(u64 *)(r10 - 128) = 0;"
+ "0:"
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "goto 1b;"
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("socket")
+__success
+__naked int widening_stack_size_bug(void *ctx)
+{
+ /*
+ * Depending on iterator_with_diff_stack_depth() parameter value,
+ * subprogram stack depth is either 8 or 128 bytes. Arrange values so
+ * that it is 128 on a first call and 8 on a second. This triggered a
+ * bug in verifier's widen_imprecise_scalars() logic.
+ */
+ asm volatile (
+ "r6 = 0;"
+ "r1 = 0;"
+ "1:"
+ "call iterator_with_diff_stack_depth;"
+ "r1 = 42;"
+ "r6 += 1;"
+ "if r6 < 2 goto 1b;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c
new file mode 100644
index 000000000000..7a77a8daee0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_num.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <limits.h>
+#include <linux/errno.h>
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+const volatile __s64 exp_empty_zero = 0 + 1;
+__s64 res_empty_zero;
+
+SEC("raw_tp/sys_enter")
+int num_empty_zero(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 0) sum += i;
+ res_empty_zero = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_min = 0 + 2;
+__s64 res_empty_int_min;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_min(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN) sum += i;
+ res_empty_int_min = 2 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_max = 0 + 3;
+__s64 res_empty_int_max;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_max(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX, INT_MAX) sum += i;
+ res_empty_int_max = 3 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_minus_one = 0 + 4;
+__s64 res_empty_minus_one;
+
+SEC("raw_tp/sys_enter")
+int num_empty_minus_one(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -1, -1) sum += i;
+ res_empty_minus_one = 4 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_simple_sum = 9 * 10 / 2;
+__s64 res_simple_sum;
+
+SEC("raw_tp/sys_enter")
+int num_simple_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 10) sum += i;
+ res_simple_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_sum = -11 * 10 / 2;
+__s64 res_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -10, 0) sum += i;
+ res_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1);
+__s64 res_very_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN + 2) sum += i;
+ res_very_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2);
+__s64 res_very_big_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_big_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX - 2, INT_MAX) sum += i;
+ res_very_big_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_pos_sum = -3;
+__s64 res_neg_pos_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_pos_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -3, 3) sum += i;
+ res_neg_pos_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_invalid_range = -EINVAL;
+__s64 res_invalid_range;
+
+SEC("raw_tp/sys_enter")
+int num_invalid_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_invalid_range = bpf_iter_num_new(&it, 1, 0);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_max_range = 0 + 10;
+__s64 res_max_range;
+
+SEC("raw_tp/sys_enter")
+int num_max_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_e2big_range = -E2BIG;
+__s64 res_e2big_range;
+
+SEC("raw_tp/sys_enter")
+int num_e2big_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_succ_elem_cnt = 10;
+__s64 res_succ_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_succ_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v;
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_succ_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_overfetched_elem_cnt = 5;
+__s64 res_overfetched_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_overfetched_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 0, 5);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_overfetched_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_fail_elem_cnt = 20 + 0;
+__s64 res_fail_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_fail_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 100, 10);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_fail_elem_cnt = 20 + cnt;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
new file mode 100644
index 000000000000..d273b46dfc7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
+int create_and_destroy(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int create_and_forget_to_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int destroy_without_creating_fail(void *ctx)
+{
+ /* init with zeros to stop verifier complaining about uninit stack */
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int compromise_iter_w_direct_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* don't destroy iter, leaking ref, which should fail */
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int compromise_iter_w_helper_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* overwrite 8th byte with bpf_probe_read_kernel() */
+ "r1 = %[iter];"
+ "r1 += 7;"
+ "r2 = 1;"
+ "r3 = 0;" /* NULL */
+ "call %[bpf_probe_read_kernel];"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel)
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+static __noinline void subprog_with_iter(void)
+{
+ struct bpf_iter_num iter;
+
+ bpf_iter_num_new(&iter, 0, 1);
+
+ return;
+}
+
+SEC("?raw_tp")
+__failure
+/* ensure there was a call to subprog, which might happen without __noinline */
+__msg("returning from callee:")
+__msg("Unreleased reference id=1")
+int leak_iter_from_subprog_fail(void *ctx)
+{
+ subprog_with_iter();
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
+int valid_stack_reuse(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+
+ /* now reuse same stack slots */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected uninitialized iter_num as arg #0")
+int double_create_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* (attempt to) create iterator again */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int double_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* (attempt to) destroy iterator again */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int next_without_new_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #0")
+int next_after_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int __naked read_from_iter_slot_fail(void)
+{
+ asm volatile (
+ /* r6 points to struct bpf_iter_num on the stack */
+ "r6 = r10;"
+ "r6 += -24;"
+
+ /* create iterator */
+ "r1 = r6;"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* attempt to leak bpf_iter_num state */
+ "r7 = *(u64 *)(r6 + 0);"
+ "r8 = *(u64 *)(r6 + 8);"
+
+ /* destroy iterator */
+ "r1 = r6;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* leak bpf_iter_num state */
+ "r0 = r7;"
+ "if r7 > r8 goto +1;"
+ "r0 = r8;"
+ "exit;"
+ :
+ : ITER_HELPERS
+ : __clobber_common, "r6", "r7", "r8"
+ );
+}
+
+int zero;
+
+SEC("?raw_tp")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("Unreleased reference")
+int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* Create a fork in logic, with general setup as follows:
+ * - fallthrough (first) path is valid;
+ * - branch (second) path is invalid.
+ * Then depending on what we do in fallthrough vs branch path,
+ * we try to detect bugs in func_states_equal(), regsafe(),
+ * refsafe(), stack_safe(), and similar by tricking verifier
+ * into believing that branch state is a valid subset of
+ * a fallthrough state. Verifier should reject overall
+ * validation, unless there is a bug somewhere in verifier
+ * logic.
+ */
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+
+ "if r6 > r7 goto bad;" /* fork */
+
+ /* spill r6 into stack slot of bpf_iter_num var */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "goto skip_bad;"
+
+ "bad:"
+ /* create iterator in the same stack slot */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* but then forget about it and overwrite it back to r6 spill */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "skip_bad:"
+ "goto +0;" /* force checkpoint */
+
+ /* corrupt stack slots, if they are really dynptr */
+ "*(u64 *)(%[iter] + 0) = r6;"
+ :
+ : __imm_ptr(iter),
+ __imm_addr(zero),
+ __imm(bpf_get_prandom_u32),
+ __imm(bpf_dynptr_from_mem),
+ ITER_HELPERS
+ : __clobber_common, "r6", "r7"
+ );
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c
new file mode 100644
index 000000000000..e4d53e40ff20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_task_for_each_sleep(void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct task_struct *pos;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+ procs_cnt = threads_cnt = proc_threads_cnt = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, ~0U) {
+ /* Below instructions shouldn't be executed for invalid flags */
+ invalid_cnt++;
+ }
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
+ /* Below instructions shouldn't be executed for invalid task__nullable */
+ invalid_cnt++;
+ }
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
+ if (pos->pid == target_pid)
+ procs_cnt++;
+
+ bpf_for_each(task, pos, cur_task, BPF_TASK_ITER_PROC_THREADS)
+ proc_threads_cnt++;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_THREADS)
+ if (pos->tgid == target_pid)
+ threads_cnt++;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
new file mode 100644
index 000000000000..fe3663dedbe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
+int BPF_PROG(iter_tasks_without_lock)
+{
+ struct task_struct *pos;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+
+ }
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
+int BPF_PROG(iter_css_without_lock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_lock_and_unlock)
+{
+ struct task_struct *pos;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_lock_and_unlock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
+int BPF_PROG(iter_css_task_for_each)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
new file mode 100644
index 000000000000..dc0c3691dcc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+pid_t target_pid = 0;
+unsigned int vmas_seen = 0;
+
+struct {
+ __u64 vm_start;
+ __u64 vm_end;
+} vm_ranges[1000];
+
+SEC("raw_tp/sys_enter")
+int iter_task_vma_for_each(const void *ctx)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct vm_area_struct *vma;
+ unsigned int seen = 0;
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (vmas_seen)
+ return 0;
+
+ bpf_for_each(task_vma, vma, task, 0) {
+ if (bpf_cmp_unlikely(seen, >=, 1000))
+ break;
+
+ vm_ranges[seen].vm_start = vma->vm_start;
+ vm_ranges[seen].vm_end = vma->vm_end;
+ seen++;
+ }
+
+ vmas_seen = seen;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
new file mode 100644
index 000000000000..5379e9960ffd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tp/sys_enter")
+__success
+int iter_next_trusted(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct bpf_iter_task_vma vma_it;
+ struct vm_area_struct *vma_ptr;
+
+ bpf_iter_task_vma_new(&vma_it, cur_task, 0);
+
+ vma_ptr = bpf_iter_task_vma_next(&vma_it);
+ if (vma_ptr == NULL)
+ goto out;
+
+ bpf_kfunc_trusted_vma_test(vma_ptr);
+out:
+ bpf_iter_task_vma_destroy(&vma_it);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int iter_next_trusted_or_null(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct bpf_iter_task_vma vma_it;
+ struct vm_area_struct *vma_ptr;
+
+ bpf_iter_task_vma_new(&vma_it, cur_task, 0);
+
+ vma_ptr = bpf_iter_task_vma_next(&vma_it);
+
+ bpf_kfunc_trusted_vma_test(vma_ptr);
+
+ bpf_iter_task_vma_destroy(&vma_it);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+__success
+int iter_next_rcu(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct bpf_iter_task task_it;
+ struct task_struct *task_ptr;
+
+ bpf_iter_task_new(&task_it, cur_task, 0);
+
+ task_ptr = bpf_iter_task_next(&task_it);
+ if (task_ptr == NULL)
+ goto out;
+
+ bpf_kfunc_rcu_task_test(task_ptr);
+out:
+ bpf_iter_task_destroy(&task_it);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int iter_next_rcu_or_null(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct bpf_iter_task task_it;
+ struct task_struct *task_ptr;
+
+ bpf_iter_task_new(&task_it, cur_task, 0);
+
+ task_ptr = bpf_iter_task_next(&task_it);
+
+ bpf_kfunc_rcu_task_test(task_ptr);
+
+ bpf_iter_task_destroy(&task_it);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+__failure __msg("R1 must be referenced or trusted")
+int iter_next_rcu_not_trusted(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct bpf_iter_task task_it;
+ struct task_struct *task_ptr;
+
+ bpf_iter_task_new(&task_it, cur_task, 0);
+
+ task_ptr = bpf_iter_task_next(&task_it);
+ if (task_ptr == NULL)
+ goto out;
+
+ bpf_kfunc_trusted_task_test(task_ptr);
+out:
+ bpf_iter_task_destroy(&task_it);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+__failure __msg("R1 cannot write into rdonly_mem")
+/* Message should not be 'R1 cannot write into rdonly_trusted_mem' */
+int iter_next_ptr_mem_not_trusted(const void *ctx)
+{
+ struct bpf_iter_num num_it;
+ int *num_ptr;
+
+ bpf_iter_num_new(&num_it, 0, 10);
+
+ num_ptr = bpf_iter_num_next(&num_it);
+ if (num_ptr == NULL)
+ goto out;
+
+ bpf_kfunc_trusted_num_test(num_ptr);
+out:
+ bpf_iter_num_destroy(&num_it);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
+int iter_ret_rcu_test_protected(const void *ctx)
+{
+ struct task_struct *p;
+
+ p = bpf_kfunc_ret_rcu_test();
+ return p->pid;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
+int iter_ret_rcu_test_type(const void *ctx)
+{
+ struct task_struct *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test();
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
+int iter_ret_rcu_test_protected_nostruct(const void *ctx)
+{
+ void *p;
+
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ return *(int *)p;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
+int iter_ret_rcu_test_type_nostruct(const void *ctx)
+{
+ void *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
new file mode 100644
index 000000000000..83791348bed5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct bpf_iter_testmod_seq {
+ u64 :64;
+ u64 :64;
+};
+
+extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym;
+extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym;
+extern s64 bpf_iter_testmod_seq_value(int blah, struct bpf_iter_testmod_seq *it) __ksym;
+extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym;
+
+const volatile __s64 exp_empty = 0 + 1;
+__s64 res_empty;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_empty(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 0) sum += *i;
+ res_empty = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_full = 1000000;
+__s64 res_full;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_full(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i;
+ res_full = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_truncated = 10 * 1000000;
+__s64 res_truncated;
+
+static volatile int zero = 0;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_truncated(const void *ctx)
+{
+ __s64 sum = 0, *i;
+ int cnt = zero;
+
+ bpf_for_each(testmod_seq, i, 10, 2000000) {
+ sum += *i;
+ cnt++;
+ if (cnt >= 1000000)
+ break;
+ }
+ res_truncated = sum;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("expected an initialized iter_testmod_seq as arg #1")
+int testmod_seq_getter_before_bad(const void *ctx)
+{
+ struct bpf_iter_testmod_seq it;
+
+ return bpf_iter_testmod_seq_value(0, &it);
+}
+
+SEC("?raw_tp")
+__failure
+__msg("expected an initialized iter_testmod_seq as arg #1")
+int testmod_seq_getter_after_bad(const void *ctx)
+{
+ struct bpf_iter_testmod_seq it;
+ s64 sum = 0, *v;
+
+ bpf_iter_testmod_seq_new(&it, 100, 100);
+
+ while ((v = bpf_iter_testmod_seq_next(&it))) {
+ sum += *v;
+ }
+
+ bpf_iter_testmod_seq_destroy(&it);
+
+ return sum + bpf_iter_testmod_seq_value(0, &it);
+}
+
+SEC("?socket")
+__success __retval(1000000)
+int testmod_seq_getter_good(const void *ctx)
+{
+ struct bpf_iter_testmod_seq it;
+ s64 sum = 0, *v;
+
+ bpf_iter_testmod_seq_new(&it, 100, 100);
+
+ while ((v = bpf_iter_testmod_seq_next(&it))) {
+ sum += *v;
+ }
+
+ sum *= bpf_iter_testmod_seq_value(0, &it);
+
+ bpf_iter_testmod_seq_destroy(&it);
+
+ return sum;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
new file mode 100644
index 000000000000..4d619bea9c75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, u64);
+ __type(value, u64);
+} m_hash SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("R8 invalid mem access 'map_value_or_null")
+int jeq_infer_not_null_ptr_to_btfid(void *ctx)
+{
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ struct bpf_map *inner_map = map->inner_map_meta;
+ u64 key = 0, ret = 0, *val;
+
+ val = bpf_map_lookup_elem(map, &key);
+ /* Do not mark ptr as non-null if one of them is
+ * PTR_TO_BTF_ID (R9), reject because of invalid
+ * access to map value (R8).
+ *
+ * Here, we need to inline those insns to access
+ * R8 directly, since compiler may use other reg
+ * once it figures out val==inner_map.
+ */
+ asm volatile("r8 = %[val];\n"
+ "r9 = %[inner_map];\n"
+ "if r8 != r9 goto +1;\n"
+ "%[ret] = *(u64 *)(r8 +0);\n"
+ : [ret] "+r"(ret)
+ : [inner_map] "r"(inner_map), [val] "r"(val)
+ : "r8", "r9");
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
new file mode 100644
index 000000000000..82190d79de37
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+static struct prog_test_ref_kfunc __kptr *v;
+long total_sum = -1;
+
+SEC("tc")
+int test_jit_probe_mem(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long zero = 0, sum;
+
+ p = bpf_kfunc_call_test_acquire(&zero);
+ if (!p)
+ return 1;
+
+ p = bpf_kptr_xchg(&v, p);
+ if (p)
+ goto release_out;
+
+ /* Direct map value access of kptr, should be PTR_UNTRUSTED */
+ p = v;
+ if (!p)
+ return 1;
+
+ asm volatile (
+ "r9 = %[p];"
+ "%[sum] = 0;"
+
+ /* r8 = p->a */
+ "r8 = *(u32 *)(r9 + 0);"
+ "%[sum] += r8;"
+
+ /* r8 = p->b */
+ "r8 = *(u32 *)(r9 + 4);"
+ "%[sum] += r8;"
+
+ "r9 += 8;"
+ /* r9 = p->a */
+ "r9 = *(u32 *)(r9 - 8);"
+ "%[sum] += r9;"
+
+ : [sum] "=r"(sum)
+ : [p] "r"(p)
+ : "r8", "r9"
+ );
+
+ total_sum = sum;
+ return 0;
+release_out:
+ bpf_kfunc_call_test_release(p);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..7236da72ce80 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -9,8 +9,8 @@
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
#define _(P) (__builtin_preserve_access_index(P))
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
return 0;
}
-static volatile struct {
+struct {
bool fentry_test_ok;
bool fexit_test_ok;
-} result;
+} result = {};
SEC("fentry/eth_type_trans")
int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
new file mode 100644
index 000000000000..b9670e9a6e3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_destructive_test(void)
+{
+ bpf_kfunc_call_test_destructive();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
new file mode 100644
index 000000000000..a1963497f0bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+SEC("?syscall")
+int kfunc_syscall_test_fail(struct syscall_test_args *args)
+{
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args) + 1);
+
+ return 0;
+}
+
+SEC("?syscall")
+int kfunc_syscall_test_null_fail(struct syscall_test_args *args)
+{
+ /* Must be called with args as a NULL pointer
+ * we do not check for it to have the verifier consider that
+ * the pointer might not be null, and so we can load it.
+ *
+ * So the following can not be added:
+ *
+ * if (args)
+ * return -22;
+ */
+
+ bpf_kfunc_call_test_mem_len_pass1(args, sizeof(*args));
+
+ return 0;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_rdonly(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ p[0] = 42; /* this is a read-only buffer, so -EACCES */
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_use_after_free(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+ if (p) {
+ p[0] = 42;
+ ret = p[1]; /* 108 */
+ } else {
+ ret = -1;
+ }
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ if (p)
+ ret = p[0]; /* p is not valid anymore */
+
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_oob(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[2 * sizeof(int)]; /* oob access, so -EACCES */
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+int not_const_size = 2 * sizeof(int);
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_not_const(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, not_const_size); /* non const size, -EINVAL */
+ if (p)
+ ret = p[0];
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_mem_acquire_fail(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ /* we are failing on this one, because we are not acquiring a PTR_TO_BTF_ID (a struct ptr) */
+ p = bpf_kfunc_call_test_acq_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[0];
+ else
+ ret = -1;
+
+ bpf_kfunc_call_int_mem_release(p);
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_pointer_arg_type_mismatch(struct __sk_buff *skb)
+{
+ bpf_kfunc_call_test_pass_ctx((void *)10);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644
index 000000000000..48f64827cd93
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+ bpf_testmod_test_mod_kfunc(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..8b86113a0126
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_call_test4(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ long tmp;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
+ return (tmp >> 32) + tmp;
+}
+
+SEC("tc")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("tc")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ __u64 a = 1ULL << 32;
+ __u32 ret;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+ ret = a >> 32; /* ret should be 2 */
+ ret += (__u32)a; /* ret should be 12 */
+
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ if (pt->a != 42 || pt->b != 108)
+ ret = -1;
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+ struct prog_test_pass1 p1 = {};
+ struct prog_test_pass2 p2 = {};
+ short a = 0;
+ __u64 b = 0;
+ long c = 0;
+ char d = 0;
+ int e = 0;
+
+ bpf_kfunc_call_test_pass_ctx(skb);
+ bpf_kfunc_call_test_pass1(&p1);
+ bpf_kfunc_call_test_pass2(&p2);
+
+ bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+ bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+ bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+ bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+ bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+ bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+ return 0;
+}
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+SEC("syscall")
+int kfunc_syscall_test(struct syscall_test_args *args)
+{
+ const long size = args->size;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(args->data));
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args));
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, size);
+
+ return 0;
+}
+
+SEC("syscall")
+int kfunc_syscall_test_null(struct syscall_test_args *args)
+{
+ /* Must be called with args as a NULL pointer
+ * we do not check for it to have the verifier consider that
+ * the pointer might not be null, and so we can load it.
+ *
+ * So the following can not be added:
+ *
+ * if (args)
+ * return -22;
+ */
+
+ bpf_kfunc_call_test_mem_len_pass1(args, 0);
+
+ return 0;
+}
+
+SEC("tc")
+int kfunc_call_test_get_mem(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+ if (p) {
+ p[0] = 42;
+ ret = p[1]; /* 108 */
+ } else {
+ ret = -1;
+ }
+
+ if (ret >= 0) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[0]; /* 42 */
+ else
+ ret = -1;
+ }
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_static_unused_arg(struct __sk_buff *skb)
+{
+
+ u32 expected = 5, actual;
+
+ actual = bpf_kfunc_call_test_static_unused_arg(expected, 0xdeadbeef);
+ return actual != expected ? -1 : 0;
+}
+
+struct ctx_val {
+ struct bpf_testmod_ctx __kptr *ctx;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct ctx_val);
+} ctx_map SEC(".maps");
+
+SEC("tc")
+int kfunc_call_ctx(struct __sk_buff *skb)
+{
+ struct bpf_testmod_ctx *ctx;
+ int err = 0;
+
+ ctx = bpf_testmod_ctx_create(&err);
+ if (!ctx && !err)
+ err = -1;
+ if (ctx) {
+ int key = 0;
+ struct ctx_val *ctx_val = bpf_map_lookup_elem(&ctx_map, &key);
+
+ /* Transfer ctx to map to be freed via implicit dtor call
+ * on cleanup.
+ */
+ if (ctx_val)
+ ctx = bpf_kptr_xchg(&ctx_val->ctx, ctx);
+ if (ctx) {
+ bpf_testmod_ctx_release(ctx);
+ err = -1;
+ }
+ }
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..8e150e85b50d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+extern const int bpf_prog_active __ksym;
+int active_res = -1;
+int sk_state_res = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int *active;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+ bpf_get_smp_processor_id());
+ if (active)
+ active_res = *active;
+
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+ return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("tc")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_module_order.c b/tools/testing/selftests/bpf/progs/kfunc_module_order.c
new file mode 100644
index 000000000000..76003d04c95f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_module_order.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_test_modorder_retx(void) __ksym;
+extern int bpf_test_modorder_rety(void) __ksym;
+
+SEC("classifier")
+int call_kfunc_xy(struct __sk_buff *skb)
+{
+ int ret1, ret2;
+
+ ret1 = bpf_test_modorder_retx();
+ ret2 = bpf_test_modorder_rety();
+
+ return ret1 == 'x' && ret2 == 'y' ? 0 : -1;
+}
+
+SEC("classifier")
+int call_kfunc_yx(struct __sk_buff *skb)
+{
+ int ret1, ret2;
+
+ ret1 = bpf_test_modorder_rety();
+ ret2 = bpf_test_modorder_retx();
+
+ return ret1 == 'y' && ret2 == 'x' ? 0 : -1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c
new file mode 100644
index 000000000000..b9c8f9457492
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define SLAB_NAME_MAX 32
+
+struct kmem_cache_result {
+ char name[SLAB_NAME_MAX];
+ long obj_size;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(void *));
+ __uint(value_size, SLAB_NAME_MAX);
+ __uint(max_entries, 1);
+} slab_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct kmem_cache_result));
+ __uint(max_entries, 1024);
+} slab_result SEC(".maps");
+
+extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym;
+
+/* Result, will be checked by userspace */
+int task_struct_found;
+int kmem_cache_seen;
+int open_coded_seen;
+
+SEC("iter/kmem_cache")
+int slab_info_collector(struct bpf_iter__kmem_cache *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct kmem_cache *s = ctx->s;
+ struct kmem_cache_result *r;
+ int idx;
+
+ if (s) {
+ /* To make sure if the slab_iter implements the seq interface
+ * properly and it's also useful for debugging.
+ */
+ BPF_SEQ_PRINTF(seq, "%s: %u\n", s->name, s->size);
+
+ idx = kmem_cache_seen;
+ r = bpf_map_lookup_elem(&slab_result, &idx);
+ if (r == NULL)
+ return 0;
+
+ kmem_cache_seen++;
+
+ /* Save name and size to match /proc/slabinfo */
+ bpf_probe_read_kernel_str(r->name, sizeof(r->name), s->name);
+ r->obj_size = s->size;
+
+ if (!bpf_strncmp(r->name, 11, "task_struct"))
+ bpf_map_update_elem(&slab_hash, &s, r->name, BPF_NOEXIST);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(check_task_struct)
+{
+ u64 curr = bpf_get_current_task();
+ struct kmem_cache *s;
+ char *name;
+
+ s = bpf_get_kmem_cache(curr);
+ if (s == NULL) {
+ task_struct_found = -1;
+ return 0;
+ }
+ name = bpf_map_lookup_elem(&slab_hash, &s);
+ if (name && !bpf_strncmp(name, 11, "task_struct"))
+ task_struct_found = 1;
+ else
+ task_struct_found = -2;
+ return 0;
+}
+
+SEC("syscall")
+int open_coded_iter(const void *ctx)
+{
+ struct kmem_cache *s;
+
+ bpf_for_each(kmem_cache, s) {
+ struct kmem_cache_result *r;
+
+ r = bpf_map_lookup_elem(&slab_result, &open_coded_seen);
+ if (!r)
+ break;
+
+ if (r->obj_size != s->size)
+ break;
+
+ open_coded_seen++;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
new file mode 100644
index 000000000000..9e1ca8e34913
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+bool test_cookie = false;
+
+__u64 kprobe_test1_result = 0;
+__u64 kprobe_test2_result = 0;
+__u64 kprobe_test3_result = 0;
+__u64 kprobe_test4_result = 0;
+__u64 kprobe_test5_result = 0;
+__u64 kprobe_test6_result = 0;
+__u64 kprobe_test7_result = 0;
+__u64 kprobe_test8_result = 0;
+
+__u64 kretprobe_test1_result = 0;
+__u64 kretprobe_test2_result = 0;
+__u64 kretprobe_test3_result = 0;
+__u64 kretprobe_test4_result = 0;
+__u64 kretprobe_test5_result = 0;
+__u64 kretprobe_test6_result = 0;
+__u64 kretprobe_test7_result = 0;
+__u64 kretprobe_test8_result = 0;
+
+static void kprobe_multi_check(void *ctx, bool is_return)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
+ __u64 addr = bpf_get_func_ip(ctx);
+
+#define SET(__var, __addr, __cookie) ({ \
+ if (((const void *) addr == __addr) && \
+ (!test_cookie || (cookie == __cookie))) \
+ __var = 1; \
+})
+
+ if (is_return) {
+ SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
+ SET(kretprobe_test2_result, &bpf_fentry_test2, 2);
+ SET(kretprobe_test3_result, &bpf_fentry_test3, 7);
+ SET(kretprobe_test4_result, &bpf_fentry_test4, 6);
+ SET(kretprobe_test5_result, &bpf_fentry_test5, 5);
+ SET(kretprobe_test6_result, &bpf_fentry_test6, 4);
+ SET(kretprobe_test7_result, &bpf_fentry_test7, 3);
+ SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
+ } else {
+ SET(kprobe_test1_result, &bpf_fentry_test1, 1);
+ SET(kprobe_test2_result, &bpf_fentry_test2, 7);
+ SET(kprobe_test3_result, &bpf_fentry_test3, 2);
+ SET(kprobe_test4_result, &bpf_fentry_test4, 3);
+ SET(kprobe_test5_result, &bpf_fentry_test5, 4);
+ SET(kprobe_test6_result, &bpf_fentry_test6, 5);
+ SET(kprobe_test7_result, &bpf_fentry_test7, 6);
+ SET(kprobe_test8_result, &bpf_fentry_test8, 8);
+ }
+
+#undef SET
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_tes??")
+int test_kprobe(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi/bpf_fentry_test*")
+int test_kretprobe(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, true);
+ return 0;
+}
+
+SEC("kprobe.multi")
+int test_kprobe_manual(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_manual(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, true);
+ return 0;
+}
+
+extern const void bpf_testmod_fentry_test1 __ksym;
+extern const void bpf_testmod_fentry_test2 __ksym;
+extern const void bpf_testmod_fentry_test3 __ksym;
+
+__u64 kprobe_testmod_test1_result = 0;
+__u64 kprobe_testmod_test2_result = 0;
+__u64 kprobe_testmod_test3_result = 0;
+
+__u64 kretprobe_testmod_test1_result = 0;
+__u64 kretprobe_testmod_test2_result = 0;
+__u64 kretprobe_testmod_test3_result = 0;
+
+static void kprobe_multi_testmod_check(void *ctx, bool is_return)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (is_return) {
+ if ((const void *) addr == &bpf_testmod_fentry_test1)
+ kretprobe_testmod_test1_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test2)
+ kretprobe_testmod_test2_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test3)
+ kretprobe_testmod_test3_result = 1;
+ } else {
+ if ((const void *) addr == &bpf_testmod_fentry_test1)
+ kprobe_testmod_test1_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test2)
+ kprobe_testmod_test2_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test3)
+ kprobe_testmod_test3_result = 1;
+ }
+}
+
+SEC("kprobe.multi")
+int test_kprobe_testmod(struct pt_regs *ctx)
+{
+ kprobe_multi_testmod_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_testmod(struct pt_regs *ctx)
+{
+ kprobe_multi_testmod_check(ctx, true);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
new file mode 100644
index 000000000000..e76e499aca39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi/")
+int test_kprobe_empty(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
new file mode 100644
index 000000000000..28f8487c9059
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi")
+int test_override(struct pt_regs *ctx)
+{
+ bpf_override_return(ctx, 123);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
new file mode 100644
index 000000000000..bd8b7fb7061e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+
+__u64 kprobe_session_result[8];
+
+static int session_check(void *ctx)
+{
+ unsigned int i;
+ __u64 addr;
+ const void *kfuncs[] = {
+ &bpf_fentry_test1,
+ &bpf_fentry_test2,
+ &bpf_fentry_test3,
+ &bpf_fentry_test4,
+ &bpf_fentry_test5,
+ &bpf_fentry_test6,
+ &bpf_fentry_test7,
+ &bpf_fentry_test8,
+ };
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ addr = bpf_get_func_ip(ctx);
+
+ for (i = 0; i < ARRAY_SIZE(kfuncs); i++) {
+ if (kfuncs[i] == (void *) addr) {
+ kprobe_session_result[i]++;
+ break;
+ }
+ }
+
+ /*
+ * Force probes for function bpf_fentry_test[5-8] not to
+ * install and execute the return probe
+ */
+ if (((const void *) addr == &bpf_fentry_test5) ||
+ ((const void *) addr == &bpf_fentry_test6) ||
+ ((const void *) addr == &bpf_fentry_test7) ||
+ ((const void *) addr == &bpf_fentry_test8))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test*")
+int test_kprobe(struct pt_regs *ctx)
+{
+ return session_check(ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
new file mode 100644
index 000000000000..0835b5edf685
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+
+__u64 test_kprobe_1_result = 0;
+__u64 test_kprobe_2_result = 0;
+__u64 test_kprobe_3_result = 0;
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+static int check_cookie(__u64 val, __u64 *result)
+{
+ __u64 *cookie;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ cookie = bpf_session_cookie();
+
+ if (bpf_session_is_return())
+ *result = *cookie == val ? val : 0;
+ else
+ *cookie = val;
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_1(struct pt_regs *ctx)
+{
+ return check_cookie(1, &test_kprobe_1_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_2(struct pt_regs *ctx)
+{
+ return check_cookie(2, &test_kprobe_2_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_3(struct pt_regs *ctx)
+{
+ return check_cookie(3, &test_kprobe_3_result);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c
new file mode 100644
index 000000000000..288577e81deb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+
+SEC("kprobe.session")
+__success
+int kprobe_session_return_0(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.session")
+__success
+int kprobe_session_return_1(struct pt_regs *ctx)
+{
+ return 1;
+}
+
+SEC("kprobe.session")
+__failure
+__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int kprobe_session_return_2(struct pt_regs *ctx)
+{
+ return 2;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
new file mode 100644
index 000000000000..f77aef0474d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86)
+SEC("kprobe")
+int kprobe_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+
+SEC("kprobe.multi")
+int kprobe_multi_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
new file mode 100644
index 000000000000..2414ac20b6d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bin_data {
+ char blob[32];
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(kptr) struct bin_data __kptr * ptr;
+
+SEC("tc")
+__naked int kptr_xchg_inline(void)
+{
+ asm volatile (
+ "r1 = %[ptr] ll;"
+ "r2 = 0;"
+ "call %[bpf_kptr_xchg];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r0;"
+ "r2 = 0;"
+ "call %[bpf_obj_drop_impl];"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(ptr),
+ __imm(bpf_kptr_xchg),
+ __imm(bpf_obj_drop_impl)
+ : __clobber_all
+ );
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __btf_root(void)
+{
+ bpf_obj_drop(NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644
index 000000000000..def97f2fed90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+ return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..049a1f78de3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+/* weak and shared between two files */
+const volatile __u32 my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+ output_val1 = x + subprog(x);
+ return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+ output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ output_weak1 = x;
+ return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+void *bpf_cast_to_kern_ctx(void *obj) __ksym;
+
+SEC("?raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+ static volatile int whatever;
+
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ set_output_val2(1000);
+ set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+/* Generate BTF FUNC record and test linking with duplicate extern functions */
+void kfunc_gen1(void)
+{
+ bpf_cast_to_kern_ctx(0);
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..96850759fd8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+ output_val2 = 2 * x + 2 * subprog(x);
+ return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+ output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = 2 * bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ output_weak2 = x;
+ return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+void *bpf_cast_to_kern_ctx(void *obj) __ksym;
+
+SEC("?raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+ static volatile int whatever;
+
+ if (my_tid != (s32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ set_output_val1(2000);
+ set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+/* Generate BTF FUNC record and test linking with duplicate extern functions */
+void kfunc_gen2(void)
+{
+ bpf_cast_to_kern_ctx(0);
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
new file mode 100644
index 000000000000..421f40835acd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+#include "linked_list.h"
+
+struct head_nested_inner {
+ struct bpf_spin_lock lock;
+ struct bpf_list_head head __contains(foo, node2);
+};
+
+struct head_nested {
+ int dummy;
+ struct head_nested_inner inner;
+};
+
+private(C) struct bpf_spin_lock glock_c;
+private(C) struct bpf_list_head ghead_array[2] __contains(foo, node2);
+private(C) struct bpf_list_head ghead_array_one[1] __contains(foo, node2);
+
+private(D) struct head_nested ghead_nested;
+
+static __always_inline
+int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ bpf_obj_drop(f);
+ return 3;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ bpf_obj_drop(f);
+ return 4;
+ }
+
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+ if (leave_in_map)
+ return 0;
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 13;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 7;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 13) {
+ bpf_obj_drop(f);
+ return 8;
+ }
+ bpf_obj_drop(f);
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 9;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 10;
+ }
+ return 0;
+}
+
+
+static __always_inline
+int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f[200], *pf;
+ int i;
+
+ /* Loop following this check adds nodes 2-at-a-time in order to
+ * validate multiple release_on_unlock release logic
+ */
+ if (ARRAY_SIZE(f) % 2)
+ return 10;
+
+ for (i = 0; i < ARRAY_SIZE(f); i += 2) {
+ f[i] = bpf_obj_new(typeof(**f));
+ if (!f[i])
+ return 2;
+ f[i]->data = i;
+
+ f[i + 1] = bpf_obj_new(typeof(**f));
+ if (!f[i + 1]) {
+ bpf_obj_drop(f[i]);
+ return 9;
+ }
+ f[i + 1]->data = i + 1;
+
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &f[i]->node2);
+ bpf_list_push_front(head, &f[i + 1]->node2);
+ bpf_spin_unlock(lock);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 3;
+ pf = container_of(n, struct foo, node2);
+ if (pf->data != (ARRAY_SIZE(f) - i - 1)) {
+ bpf_obj_drop(pf);
+ return 4;
+ }
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &pf->node2);
+ bpf_spin_unlock(lock);
+ }
+
+ if (leave_in_map)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ pf = container_of(n, struct foo, node2);
+ if (pf->data != i) {
+ bpf_obj_drop(pf);
+ return 6;
+ }
+ bpf_obj_drop(pf);
+ }
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 7;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 8;
+ }
+ return 0;
+}
+
+static __always_inline
+int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct bar *ba[8], *b;
+ struct foo *f;
+ int i;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 3;
+ }
+ b->data = i;
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_back(&f->head, &b->node);
+ bpf_spin_unlock(&f->lock);
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+
+ if (leave_in_map)
+ return 0;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 4;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 5;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (!n) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+ b = container_of(n, struct bar, node);
+ if (b->data != i) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(b);
+ return 7;
+ }
+ bpf_obj_drop(b);
+ }
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (n) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(container_of(n, struct bar, node));
+ return 8;
+ }
+ bpf_obj_drop(f);
+ return 0;
+}
+
+static __always_inline
+int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop(lock, head, false);
+ if (ret)
+ return ret;
+ return list_push_pop(lock, head, true);
+}
+
+static __always_inline
+int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(lock, head, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(lock, head, true);
+}
+
+static __always_inline
+int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_in_list(lock, head, false);
+ if (ret)
+ return ret;
+ return list_in_list(lock, head, true);
+}
+
+SEC("tc")
+int map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop(void *ctx)
+{
+ return test_list_push_pop(&glock, &ghead);
+}
+
+SEC("tc")
+int global_list_push_pop_nested(void *ctx)
+{
+ return test_list_push_pop(&ghead_nested.inner.lock, &ghead_nested.inner.head);
+}
+
+SEC("tc")
+int global_list_array_push_pop(void *ctx)
+{
+ int r;
+
+ r = test_list_push_pop(&glock_c, &ghead_array[0]);
+ if (r)
+ return r;
+
+ r = test_list_push_pop(&glock_c, &ghead_array[1]);
+ if (r)
+ return r;
+
+ /* Arrays with only one element is a special case, being treated
+ * just like a bpf_list_head variable by the verifier, not an
+ * array.
+ */
+ return test_list_push_pop(&glock_c, &ghead_array_one[0]);
+}
+
+SEC("tc")
+int map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop_multiple(void *ctx)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(&glock, &ghead, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(&glock, &ghead, true);
+}
+
+SEC("tc")
+int map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_in_list(void *ctx)
+{
+ return test_list_in_list(&glock, &ghead);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
new file mode 100644
index 000000000000..c0f3609a7ffa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef LINKED_LIST_H
+#define LINKED_LIST_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct bar {
+ struct bpf_list_node node;
+ int data;
+};
+
+struct foo {
+ struct bpf_list_node node;
+ struct bpf_list_head head __contains(bar, node);
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_node node2;
+};
+
+struct map_value {
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_head head __contains(foo, node2);
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+};
+
+struct array_map array_map SEC(".maps");
+struct array_map inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(foo, node2);
+private(B) struct bpf_spin_lock glock2;
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
new file mode 100644
index 000000000000..ddd26d1a083f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#include "linked_list.h"
+
+#define INIT \
+ struct map_value *v, *v2, *iv, *iv2; \
+ struct foo *f, *f1, *f2; \
+ struct bar *b; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v) \
+ return 0; \
+ v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v2) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv) \
+ return 0; \
+ iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv2) \
+ return 0; \
+ f = bpf_obj_new(typeof(*f)); \
+ if (!f) \
+ return 0; \
+ f1 = f; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ b = bpf_obj_new(typeof(*b)); \
+ if (!b) { \
+ bpf_obj_drop(f2); \
+ bpf_obj_drop(f1); \
+ return 0; \
+ }
+
+#define CHECK(test, op, hexpr) \
+ SEC("?tc") \
+ int test##_missing_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ p(hexpr); \
+ return 0; \
+ }
+
+CHECK(kptr, pop_front, &f->head);
+CHECK(kptr, pop_back, &f->head);
+
+CHECK(global, pop_front, &ghead);
+CHECK(global, pop_back, &ghead);
+
+CHECK(map, pop_front, &v->head);
+CHECK(map, pop_back, &v->head);
+
+CHECK(inner_map, pop_front, &iv->head);
+CHECK(inner_map, pop_back, &iv->head);
+
+#undef CHECK
+
+#define CHECK(test, op, hexpr, nexpr) \
+ SEC("?tc") \
+ int test##_missing_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ bpf_list_##op(hexpr, nexpr); \
+ return 0; \
+ }
+
+CHECK(kptr, push_front, &f->head, &b->node);
+CHECK(kptr, push_back, &f->head, &b->node);
+
+CHECK(global, push_front, &ghead, &f->node2);
+CHECK(global, push_back, &ghead, &f->node2);
+
+CHECK(map, push_front, &v->head, &f->node2);
+CHECK(map, push_back, &v->head, &f->node2);
+
+CHECK(inner_map, push_front, &iv->head, &f->node2);
+CHECK(inner_map, push_back, &iv->head, &f->node2);
+
+#undef CHECK
+
+#define CHECK(test, op, lexpr, hexpr) \
+ SEC("?tc") \
+ int test##_incorrect_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ bpf_spin_lock(lexpr); \
+ p(hexpr); \
+ return 0; \
+ }
+
+#define CHECK_OP(op) \
+ CHECK(kptr_kptr, op, &f1->lock, &f2->head); \
+ CHECK(kptr_global, op, &f1->lock, &ghead); \
+ CHECK(kptr_map, op, &f1->lock, &v->head); \
+ CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \
+ \
+ CHECK(global_global, op, &glock2, &ghead); \
+ CHECK(global_kptr, op, &glock, &f1->head); \
+ CHECK(global_map, op, &glock, &v->head); \
+ CHECK(global_inner_map, op, &glock, &iv->head); \
+ \
+ CHECK(map_map, op, &v->lock, &v2->head); \
+ CHECK(map_kptr, op, &v->lock, &f2->head); \
+ CHECK(map_global, op, &v->lock, &ghead); \
+ CHECK(map_inner_map, op, &v->lock, &iv->head); \
+ \
+ CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
+ CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \
+ CHECK(inner_map_global, op, &iv->lock, &ghead); \
+ CHECK(inner_map_map, op, &iv->lock, &v->head);
+
+CHECK_OP(pop_front);
+CHECK_OP(pop_back);
+
+#undef CHECK
+#undef CHECK_OP
+
+#define CHECK(test, op, lexpr, hexpr, nexpr) \
+ SEC("?tc") \
+ int test##_incorrect_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ bpf_spin_lock(lexpr); \
+ bpf_list_##op(hexpr, nexpr); \
+ return 0; \
+ }
+
+#define CHECK_OP(op) \
+ CHECK(kptr_kptr, op, &f1->lock, &f2->head, &b->node); \
+ CHECK(kptr_global, op, &f1->lock, &ghead, &f->node2); \
+ CHECK(kptr_map, op, &f1->lock, &v->head, &f->node2); \
+ CHECK(kptr_inner_map, op, &f1->lock, &iv->head, &f->node2); \
+ \
+ CHECK(global_global, op, &glock2, &ghead, &f->node2); \
+ CHECK(global_kptr, op, &glock, &f1->head, &b->node); \
+ CHECK(global_map, op, &glock, &v->head, &f->node2); \
+ CHECK(global_inner_map, op, &glock, &iv->head, &f->node2); \
+ \
+ CHECK(map_map, op, &v->lock, &v2->head, &f->node2); \
+ CHECK(map_kptr, op, &v->lock, &f2->head, &b->node); \
+ CHECK(map_global, op, &v->lock, &ghead, &f->node2); \
+ CHECK(map_inner_map, op, &v->lock, &iv->head, &f->node2); \
+ \
+ CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, &f->node2);\
+ CHECK(inner_map_kptr, op, &iv->lock, &f2->head, &b->node); \
+ CHECK(inner_map_global, op, &iv->lock, &ghead, &f->node2); \
+ CHECK(inner_map_map, op, &iv->lock, &v->head, &f->node2);
+
+CHECK_OP(push_front);
+CHECK_OP(push_back);
+
+#undef CHECK
+#undef CHECK_OP
+#undef INIT
+
+SEC("?kprobe/xyz")
+int map_compat_kprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?kretprobe/xyz")
+int map_compat_kretprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tracepoint/xyz")
+int map_compat_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?perf_event")
+int map_compat_perf(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp/xyz")
+int map_compat_raw_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp.w/xyz")
+int map_compat_raw_tp_w(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_type_id_oor(void *ctx)
+{
+ bpf_obj_new_impl(~0UL, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_composite(void *ctx)
+{
+ bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_struct(void *ctx)
+{
+ (void)bpf_obj_new(union { int data; unsigned udata; });
+ return 0;
+}
+
+SEC("?tc")
+int obj_drop_non_zero_off(void *ctx)
+{
+ void *f;
+
+ f = bpf_obj_new(struct foo);
+ if (!f)
+ return 0;
+ bpf_obj_drop(f+1);
+ return 0;
+}
+
+SEC("?tc")
+int new_null_ret(void *ctx)
+{
+ return bpf_obj_new(struct foo)->data;
+}
+
+SEC("?tc")
+int obj_new_acq(void *ctx)
+{
+ (void)bpf_obj_new(struct foo);
+ return 0;
+}
+
+SEC("?tc")
+int use_after_drop(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_obj_drop(f);
+ return f->data;
+}
+
+SEC("?tc")
+int ptr_walk_scalar(void *ctx)
+{
+ struct test1 {
+ struct test2 {
+ struct test2 *next;
+ } *ptr;
+ } *p;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_this_cpu_ptr(p->ptr);
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->lock;
+}
+
+SEC("?tc")
+int direct_write_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->lock = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->head;
+}
+
+SEC("?tc")
+int direct_write_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->head = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->node2;
+}
+
+SEC("?tc")
+int direct_write_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->node2 = 0;
+ return 0;
+}
+
+static __always_inline
+int use_after_unlock(bool push_front)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ f->data = 42;
+ if (push_front)
+ bpf_list_push_front(&ghead, &f->node2);
+ else
+ bpf_list_push_back(&ghead, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return f->data;
+}
+
+SEC("?tc")
+int use_after_unlock_push_front(void *ctx)
+{
+ return use_after_unlock(true);
+}
+
+SEC("?tc")
+int use_after_unlock_push_back(void *ctx)
+{
+ return use_after_unlock(false);
+}
+
+static __always_inline
+int list_double_add(bool push_front)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ if (push_front) {
+ bpf_list_push_front(&ghead, &f->node2);
+ bpf_list_push_front(&ghead, &f->node2);
+ } else {
+ bpf_list_push_back(&ghead, &f->node2);
+ bpf_list_push_back(&ghead, &f->node2);
+ }
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int double_push_front(void *ctx)
+{
+ return list_double_add(true);
+}
+
+SEC("?tc")
+int double_push_back(void *ctx)
+{
+ return list_double_add(false);
+}
+
+SEC("?tc")
+int no_node_value_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(struct { int data; });
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, p);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_value_type(void *ctx)
+{
+ struct bar *b;
+
+ b = bpf_obj_new(typeof(*b));
+ if (!b)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &b->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_var_off(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node2 + ctx->protocol);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off1(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node2 + 1);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off2(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int no_head_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(typeof(struct { int data; }));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(p, NULL);
+ bpf_spin_lock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off1(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off2(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off1(void *ctx)
+{
+ struct foo *f;
+ struct bar *b;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 0;
+ }
+
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_front((void *)&f->head + 1, &b->node);
+ bpf_spin_unlock(&f->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off2(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + 1, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+static __always_inline
+int pop_ptr_off(void *(*op)(void *head))
+{
+ struct {
+ struct bpf_list_head head __contains(foo, node2);
+ struct bpf_spin_lock lock;
+ } *p;
+ struct bpf_list_node *n;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&p->lock);
+ n = op(&p->head);
+ bpf_spin_unlock(&p->lock);
+
+ if (!n)
+ return 0;
+ bpf_spin_lock((void *)n);
+ return 0;
+}
+
+SEC("?tc")
+int pop_front_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_front);
+}
+
+SEC("?tc")
+int pop_back_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_back);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_list_node l;
+ int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+ struct bpf_list_node *l_n;
+ struct node_data *n;
+ int i, err = 0;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_front(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_back(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+ n->key = i;
+ bpf_spin_lock(&glock);
+ bpf_list_push_back(&ghead, &n->l);
+ bpf_spin_unlock(&glock);
+ }
+
+ bpf_spin_lock(&glock);
+
+ l_n = bpf_list_front(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != 0) {
+ err = __LINE__;
+ goto done;
+ }
+
+ l_n = bpf_list_back(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != NR_NODES - 1) {
+ err = __LINE__;
+ goto done;
+ }
+
+done:
+ bpf_spin_unlock(&glock);
+ return err;
+}
+
+#define TEST_FB(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_list_node *l_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock); \
+ l_n = bpf_list_##op(&ghead); \
+ if (l_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock); \
+ \
+ return !!jiffies; \
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..00bf1ca95986
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct my_key);
+ __type(value, struct my_value);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+ * matter.
+ */
+typedef struct {
+ __uint(max_entries, 8);
+ __type(key, int);
+ __type(value, int);
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+ /* update values with key = 1 */
+ int key = 1, val = 1;
+ struct my_key key_struct = { .x = 1 };
+ struct my_value val_struct = { .x = 1000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+ /* lookup values with key = 2, set in another file */
+ int key = 2, *val;
+ struct my_key key_struct = { .x = 2 };
+ struct my_value *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first1 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second1 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak1 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+ __uint(max_entries, 16);
+ __type(key, key_type);
+ __type(value, value_type);
+ __uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+ /* update values with key = 2 */
+ int key = 2, val = 2;
+ key_type key_struct = { .x = 2 };
+ value_type val_struct = { .x = 2000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+ /* lookup values with key = 1, set in another file */
+ int key = 1, *val;
+ key_type key_struct = { .x = 1 };
+ value_type *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first2 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second2 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak2 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+ output_bss1 = get_bss_res();
+ output_data1 = input_data1 + input_data2 + input_data_weak;
+ output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink1 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&bpf_link_fops;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+ output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+ output_data2 = get_data_res();
+ output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink2 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&__start_BTF;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
new file mode 100644
index 000000000000..15579d5bcd91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int fentry_hit;
+int fexit_hit;
+int my_pid;
+
+SEC("fentry/cmdline_proc_show")
+int BPF_PROG(fentry_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fentry_hit = 1;
+ return 0;
+}
+
+SEC("fexit/cmdline_proc_show")
+int BPF_PROG(fexit_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fexit_hit = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
new file mode 100644
index 000000000000..d736506a4c80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_experimental.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct plain_local;
+
+struct node_data {
+ long key;
+ long data;
+ struct plain_local __kptr * stashed_in_local_kptr;
+ struct bpf_rb_node node;
+};
+
+struct refcounted_node {
+ long data;
+ struct bpf_rb_node rb_node;
+ struct bpf_refcount refcount;
+};
+
+struct stash {
+ struct bpf_spin_lock l;
+ struct refcounted_node __kptr *stashed;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct stash);
+ __uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
+struct plain_local {
+ long key;
+ long data;
+};
+
+struct local_with_root {
+ long key;
+ struct bpf_spin_lock l;
+ struct bpf_rb_root r __contains(node_data, node);
+};
+
+struct map_value {
+ struct prog_test_ref_kfunc *not_kptr;
+ struct prog_test_ref_kfunc __kptr *val;
+ struct node_data __kptr *node;
+ struct plain_local __kptr *plain;
+ struct local_with_root __kptr *local_root;
+};
+
+/* This is necessary so that LLVM generates BTF for node_data struct
+ * If it's not included, a fwd reference for node_data will be generated but
+ * no struct. Example BTF of "node" field in map_value when not included:
+ *
+ * [10] PTR '(anon)' type_id=35
+ * [34] FWD 'node_data' fwd_kind=struct
+ * [35] TYPE_TAG 'kptr_ref' type_id=34
+ *
+ * (with no node_data struct defined)
+ * Had to do the same w/ bpf_kfunc_call_test_release below
+ */
+struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} some_nodes SEC(".maps");
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+static int create_and_stash(int idx, int val)
+{
+ struct plain_local *inner_local_kptr;
+ struct map_value *mapval;
+ struct node_data *res;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ inner_local_kptr = bpf_obj_new(typeof(*inner_local_kptr));
+ if (!inner_local_kptr)
+ return 2;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res) {
+ bpf_obj_drop(inner_local_kptr);
+ return 3;
+ }
+ res->key = val;
+
+ inner_local_kptr = bpf_kptr_xchg(&res->stashed_in_local_kptr, inner_local_kptr);
+ if (inner_local_kptr) {
+ /* Should never happen, we just obj_new'd res */
+ bpf_obj_drop(inner_local_kptr);
+ bpf_obj_drop(res);
+ return 4;
+ }
+
+ res = bpf_kptr_xchg(&mapval->node, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+long stash_rb_nodes(void *ctx)
+{
+ return create_and_stash(0, 41) ?: create_and_stash(1, 42);
+}
+
+SEC("tc")
+long stash_plain(void *ctx)
+{
+ struct map_value *mapval;
+ struct plain_local *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key = 41;
+
+ res = bpf_kptr_xchg(&mapval->plain, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+long stash_local_with_root(void *ctx)
+{
+ struct local_with_root *res;
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 2;
+ res->key = 41;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n) {
+ bpf_obj_drop(res);
+ return 3;
+ }
+
+ bpf_spin_lock(&res->l);
+ bpf_rbtree_add(&res->r, &n->node, less);
+ bpf_spin_unlock(&res->l);
+
+ res = bpf_kptr_xchg(&mapval->local_root, res);
+ if (res) {
+ bpf_obj_drop(res);
+ return 4;
+ }
+ return 0;
+}
+
+SEC("tc")
+long unstash_rb_node(void *ctx)
+{
+ struct plain_local *inner_local_kptr = NULL;
+ struct map_value *mapval;
+ struct node_data *res;
+ long retval;
+ int key = 1;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->node, NULL);
+ if (res) {
+ inner_local_kptr = bpf_kptr_xchg(&res->stashed_in_local_kptr, inner_local_kptr);
+ if (!inner_local_kptr) {
+ bpf_obj_drop(res);
+ return 1;
+ }
+ bpf_obj_drop(inner_local_kptr);
+
+ retval = res->key;
+ bpf_obj_drop(res);
+ return retval;
+ }
+ return 1;
+}
+
+SEC("tc")
+long stash_test_ref_kfunc(void *ctx)
+{
+ struct prog_test_ref_kfunc *res;
+ struct map_value *mapval;
+ int key = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->val, NULL);
+ if (res)
+ bpf_kfunc_call_test_release(res);
+ return 0;
+}
+
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int ret = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+ if (!s)
+ return 1;
+
+ if (!s->stashed)
+ /* refcount_acquire failure is expected when no refcounted_node
+ * has been stashed before this program executes
+ */
+ return 2;
+
+ p = bpf_refcount_acquire(s->stashed);
+ if (!p)
+ return 3;
+
+ ret = s->stashed ? s->stashed->data : -1;
+ bpf_obj_drop(p);
+ return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int key = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+ if (!s)
+ return 1;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 2;
+ p->data = 42;
+
+ p = bpf_kptr_xchg(&s->stashed, p);
+ if (p) {
+ bpf_obj_drop(p);
+ return 3;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c
new file mode 100644
index 000000000000..fcf7a7567da2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+struct map_value {
+ struct node_data __kptr *node;
+};
+
+struct node_data2 {
+ long key[4];
+};
+
+/* This is necessary so that LLVM generates BTF for node_data struct
+ * If it's not included, a fwd reference for node_data will be generated but
+ * no struct. Example BTF of "node" field in map_value when not included:
+ *
+ * [10] PTR '(anon)' type_id=35
+ * [34] FWD 'node_data' fwd_kind=struct
+ * [35] TYPE_TAG 'kptr_ref' type_id=34
+ */
+struct node_data *just_here_because_btf_bug;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} some_nodes SEC(".maps");
+
+SEC("tc")
+__failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data")
+long stash_rb_nodes(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data2 *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key[0] = 40;
+
+ res = bpf_kptr_xchg(&mapval->node, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+__failure __msg("R1 must have zero offset when passed to release func")
+long drop_rb_node_off(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ /* Try releasing with graph node offset */
+ bpf_obj_drop(&res->node);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 95868bc7ada9..637e75df2e14 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -13,14 +13,14 @@ char _license[] SEC("license") = "GPL";
#define DUMMY_STORAGE_VALUE 0xdeadbeef
-int monitored_pid = 0;
+__u32 monitored_pid = 0;
int inode_storage_result = -1;
int sk_storage_result = -1;
+int task_storage_result = -1;
struct local_storage {
struct inode *exec_inode;
__u32 value;
- struct bpf_spin_lock lock;
};
struct {
@@ -38,42 +38,77 @@ struct {
} sk_storage_map SEC(".maps");
struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct local_storage);
+} sk_storage_map2 SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, struct local_storage);
} task_storage_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} task_storage_map2 SEC(".maps");
+
SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct bpf_local_storage *local_storage;
struct local_storage *storage;
+ struct task_struct *task;
bool is_self_unlink;
if (pid != monitored_pid)
return 0;
- storage = bpf_task_storage_get(&task_storage_map,
- bpf_get_current_task_btf(), 0, 0);
- if (storage) {
- /* Don't let an executable delete itself */
- bpf_spin_lock(&storage->lock);
- is_self_unlink = storage->exec_inode == victim->d_inode;
- bpf_spin_unlock(&storage->lock);
- if (is_self_unlink)
- return -EPERM;
- }
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
- return 0;
+ task_storage_result = -1;
+
+ storage = bpf_task_storage_get(&task_storage_map, task, 0, 0);
+ if (!storage)
+ return 0;
+
+ /* Don't let an executable delete itself */
+ is_self_unlink = storage->exec_inode == victim->d_inode;
+
+ storage = bpf_task_storage_get(&task_storage_map2, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage || storage->value)
+ return 0;
+
+ if (bpf_task_storage_delete(&task_storage_map, task))
+ return 0;
+
+ /* Ensure that the task_storage_map is disconnected from the storage.
+ * The storage memory should not be freed back to the
+ * bpf_mem_alloc.
+ */
+ local_storage = task->bpf_storage;
+ if (!local_storage || local_storage->smap)
+ return 0;
+
+ task_storage_result = 0;
+
+ return is_self_unlink ? -EPERM : 0;
}
-SEC("lsm/inode_rename")
+SEC("lsm.s/inode_rename")
int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- __u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
int err;
@@ -89,10 +124,8 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
if (storage->value != DUMMY_STORAGE_VALUE)
inode_storage_result = -1;
- bpf_spin_unlock(&storage->lock);
err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
if (!err)
@@ -101,52 +134,69 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
-SEC("lsm/socket_bind")
+SEC("lsm.s/socket_bind")
int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
int addrlen)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
- int err;
+ struct sock *sk = sock->sk;
- if (pid != monitored_pid)
+ if (pid != monitored_pid || !sk)
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ storage = bpf_sk_storage_get(&sk_storage_map, sk, 0, 0);
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
+ sk_storage_result = -1;
if (storage->value != DUMMY_STORAGE_VALUE)
- sk_storage_result = -1;
- bpf_spin_unlock(&storage->lock);
+ return 0;
- err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
- if (!err)
- sk_storage_result = err;
+ /* This tests that we can associate multiple elements
+ * with the local storage.
+ */
+ storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (bpf_sk_storage_delete(&sk_storage_map2, sk))
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (bpf_sk_storage_delete(&sk_storage_map, sk))
+ return 0;
+ /* Ensure that the sk_storage_map is disconnected from the storage. */
+ if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap)
+ return 0;
+
+ sk_storage_result = 0;
return 0;
}
-SEC("lsm/socket_post_create")
+SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
+ struct sock *sk = sock->sk;
- if (pid != monitored_pid)
+ if (pid != monitored_pid || !sk)
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ storage = bpf_sk_storage_get(&sk_storage_map, sk, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return 0;
- bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
- bpf_spin_unlock(&storage->lock);
return 0;
}
@@ -154,7 +204,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
/* This uses the local storage to remember the inode of the binary that a
* process was originally executing.
*/
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
void BPF_PROG(exec, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
@@ -166,18 +216,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
storage = bpf_task_storage_get(&task_storage_map,
bpf_get_current_task_btf(), 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (storage) {
- bpf_spin_lock(&storage->lock);
+ if (storage)
storage->exec_inode = bprm->file->f_inode;
- bpf_spin_unlock(&storage->lock);
- }
storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return;
- bpf_spin_lock(&storage->lock);
storage->value = DUMMY_STORAGE_VALUE;
- bpf_spin_unlock(&storage->lock);
}
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644
index 000000000000..2c3234c5b73a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, HASHMAP_SZ);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+ struct task_struct *task;
+ long loop_hits;
+ long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+ void *map, *inner_map;
+ int idx = 0;
+
+ if (use_hashmap)
+ map = &array_of_hash_maps;
+ else
+ map = &array_of_local_storage_maps;
+
+ inner_map = bpf_map_lookup_elem(map, &elem);
+ if (!inner_map)
+ return -1;
+
+ if (use_hashmap) {
+ idx = bpf_get_prandom_u32() % hashmap_num_keys;
+ bpf_map_lookup_elem(inner_map, &idx);
+ } else {
+ bpf_task_storage_get(inner_map, lctx->task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ }
+
+ lctx->loop_hits++;
+ if (!elem)
+ lctx->loop_important_hits++;
+ return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+ struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+ unsigned int map_idx = index % num_maps;
+
+ do_lookup(map_idx, lctx);
+ if (interleave && map_idx % 3 == 0)
+ do_lookup(0, lctx);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct loop_ctx lctx;
+
+ lctx.task = bpf_get_current_task_btf();
+ lctx.loop_hits = 0;
+ lctx.loop_important_hits = 0;
+ bpf_loop(10000, &loop, &lctx, 0);
+ __sync_add_and_fetch(&hits, lctx.loop_hits);
+ __sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
new file mode 100644
index 000000000000..03bf69f49075
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_storage SEC(".maps");
+
+long hits;
+long gp_hits;
+long gp_times;
+long current_gp_start;
+long unexpected;
+bool postgp_seen;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct task_struct *task;
+ int idx;
+ int *s;
+
+ idx = 0;
+ task = bpf_get_current_task_btf();
+ s = bpf_task_storage_get(&task_storage, task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!s)
+ return 0;
+
+ *s = 3;
+ bpf_task_storage_delete(&task_storage, task);
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_pregp_step")
+int pregp_step(struct pt_regs *ctx)
+{
+ current_gp_start = bpf_ktime_get_ns();
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_postgp")
+int postgp(struct pt_regs *ctx)
+{
+ if (!current_gp_start && postgp_seen) {
+ /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
+ * execute before this prog
+ */
+ __sync_add_and_fetch(&unexpected, 1);
+ return 0;
+ }
+
+ __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
+ __sync_add_and_fetch(&gp_hits, 1);
+ current_gp_start = 0;
+ postgp_seen = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 50e66772c046..b0fa26fb4760 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 947bb7e988c2..0227409d4b0e 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 76e93b31c14b..5d1c9a775e6b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -1,20 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
-int while_true(volatile struct pt_regs* ctx)
+int while_true(struct pt_regs *ctx)
{
- __u64 i = 0, sum = 0;
+ volatile __u64 i = 0, sum = 0;
do {
i++;
sum += PT_REGS_RC(ctx);
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
index b35337926d66..0de0357f57cc 100644
--- a/tools/testing/selftests/bpf/progs/loop4.c
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -3,6 +3,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("socket")
@@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb)
{
int ret = 0, i;
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 20; i++)
if (skb->len)
ret |= 1 << i;
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
index 913791923fa3..1b13f37f85ec 100644
--- a/tools/testing/selftests/bpf/progs/loop5.c
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -2,7 +2,6 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#define barrier() __asm__ __volatile__("": : :"memory")
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..dd36aff4fba3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS 6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX 10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX 128
+#endif
+
+#define SG_CHAIN 0x01UL
+#define SG_END 0x02UL
+
+#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg) ((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+ struct scatterlist sg;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_last(&sg))
+ return NULL;
+
+ sgp++;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_chain(&sg))
+ sgp = sg_chain_ptr(&sg);
+
+ return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+ struct scatterlist *sgp;
+
+ bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+ return sgp;
+}
+
+int run_once = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+ unsigned int out_sgs, unsigned int in_sgs)
+{
+ struct scatterlist *sgp = NULL;
+ __u64 length1 = 0, length2 = 0;
+ unsigned int i, n, len;
+
+ if (run_once != 0)
+ return 0;
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+ __sink(out_sgs);
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ len = BPF_CORE_READ(sgp, length);
+ length1 += len;
+ n++;
+ }
+ }
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+ __sink(in_sgs);
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ len = BPF_CORE_READ(sgp, length);
+ length2 += len;
+ n++;
+ }
+ }
+
+ run_once = 1;
+ result = length2 - length1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h
new file mode 100644
index 000000000000..76aa5821807f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+/* Benchmark operations */
+enum {
+ LPM_OP_NOOP = 0,
+ LPM_OP_BASELINE,
+ LPM_OP_LOOKUP,
+ LPM_OP_INSERT,
+ LPM_OP_UPDATE,
+ LPM_OP_DELETE,
+ LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS 0
+#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
new file mode 100644
index 000000000000..a0e6ebd5507a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+ char name[BPF_OBJ_NAME_LEN];
+ u32 map_type;
+
+ map_type = BPF_CORE_READ(map, map_type);
+ if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+ return 0;
+
+ /*
+ * Ideally we'd have access to the map ID but that's already
+ * freed before we enter trie_free().
+ */
+ BPF_CORE_READ_STR_INTO(&name, map, name);
+ if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+ return 0;
+
+ latency_free_start = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+ __u64 val;
+
+ if (!latency_free_start)
+ return 0;
+
+ val = bpf_ktime_get_ns() - latency_free_start;
+ latency_free_start = 0;
+
+ __sync_add_and_fetch(&duration_ns, val);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+ key->prefixlen = prefixlen;
+
+ if (random)
+ key->data = bpf_get_prandom_u32() % nr_entries;
+ else
+ key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+ return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+ struct trie_key key;
+ __u32 blackbox = 0;
+
+ generate_key(&key);
+ /* Avoid compiler optimizing out the modulo */
+ barrier_var(blackbox);
+ blackbox = READ_ONCE(key.data);
+
+ return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+ struct trie_key key;
+
+ generate_key(&key);
+ if (!bpf_map_lookup_elem(&trie_map, &key)) {
+ *retval = -ENOENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Is this the last entry? */
+ if (key.data == nr_entries - 1) {
+ /* For atomicity concerns, see the comment in delete() */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+ struct trie_key key;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_delete_elem(&trie_map, &key);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Do we need to refill the map? */
+ if (key.data == nr_entries - 1) {
+ /*
+ * Atomicity isn't required because DELETE only supports
+ * one producer running concurrently. What we need is a
+ * way to track how many entries have been deleted from
+ * the trie between consecutive invocations of the BPF
+ * prog because a single bpf_loop() call might not
+ * delete all entries, e.g. when NR_LOOPS < nr_entries.
+ */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+ int err = LPM_BENCH_SUCCESS;
+ u64 start, delta;
+ int loops;
+
+ start = bpf_ktime_get_ns();
+
+ switch (op) {
+ case LPM_OP_NOOP:
+ loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+ break;
+ case LPM_OP_BASELINE:
+ loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+ break;
+ case LPM_OP_LOOKUP:
+ loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+ break;
+ case LPM_OP_INSERT:
+ loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+ break;
+ case LPM_OP_UPDATE:
+ loops = bpf_loop(NR_LOOPS, update, &err, 0);
+ break;
+ case LPM_OP_DELETE:
+ loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+ break;
+ default:
+ bpf_printk("invalid benchmark operation\n");
+ return -1;
+ }
+
+ delta = bpf_ktime_get_ns() - start;
+
+ __sync_add_and_fetch(&duration_ns, delta);
+ __sync_add_and_fetch(&hits, loops);
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
new file mode 100644
index 000000000000..6e60d686b664
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c
new file mode 100644
index 000000000000..ad73029cb1e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lru_bug.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+ struct task_struct __kptr_untrusted *ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_value);
+} lru_map SEC(".maps");
+
+int pid = 0;
+int result = 1;
+
+SEC("fentry/bpf_ktime_get_ns")
+int printk(void *ctx)
+{
+ struct map_value v = {};
+
+ if (pid == bpf_get_current_task_btf()->pid)
+ bpf_map_update_elem(&lru_map, &(int){0}, &v, 0);
+ return 0;
+}
+
+SEC("fentry/do_nanosleep")
+int nanosleep(void *ctx)
+{
+ struct map_value val = {}, *v;
+ struct task_struct *current;
+
+ bpf_map_update_elem(&lru_map, &(int){0}, &val, 0);
+ v = bpf_map_lookup_elem(&lru_map, &(int){0});
+ if (!v)
+ return 0;
+ bpf_map_delete_elem(&lru_map, &(int){0});
+ current = bpf_get_current_task_btf();
+ v->ptr = current;
+ pid = current->pid;
+ bpf_ktime_get_ns();
+ result = !v->ptr;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 33694ef8acfa..7de173daf27b 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -5,9 +5,11 @@
*/
#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <errno.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -87,14 +89,16 @@ SEC("lsm/file_mprotect")
int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
unsigned long reqprot, unsigned long prot, int ret)
{
- if (ret != 0)
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (ret != 0 || !mm)
return ret;
- __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ __s32 pid = bpf_get_current_pid_tgid() >> 32;
int is_stack = 0;
- is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ is_stack = (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack);
if (is_stack && monitored_pid == pid) {
mprotect_count++;
@@ -160,11 +164,11 @@ int BPF_PROG(test_task_free, struct task_struct *task)
int copy_test = 0;
-SEC("fentry.s/__x64_sys_setdomainname")
+SEC("fentry.s/" SYS_PREFIX "sys_setdomainname")
int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
{
- void *ptr = (void *)PT_REGS_PARM1(regs);
- int len = PT_REGS_PARM2(regs);
+ void *ptr = (void *)PT_REGS_PARM1_SYSCALL(regs);
+ int len = PT_REGS_PARM2_SYSCALL(regs);
int buf = 0;
long ret;
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644
index 000000000000..d7598538aa2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_SECURITY_SELINUX __kconfig __weak;
+extern bool CONFIG_SECURITY_SMACK __kconfig __weak;
+extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak;
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int test_local_storage(void)
+{
+ __u64 *val;
+
+ val = bpf_get_local_storage(&cgroup_storage, 0);
+ if (!val)
+ return 0;
+ *val += 1;
+
+ return 1;
+}
+
+static __always_inline int real_create(struct socket *sock, int family,
+ int protocol)
+{
+ struct sock *sk;
+ int prio = 123;
+
+ /* Reject non-tx-only AF_PACKET. */
+ if (family == AF_PACKET && protocol != 0)
+ return 0; /* EPERM */
+
+ sk = sock->sk;
+ if (!sk)
+ return 1;
+
+ /* The rest of the sockets get default policy. */
+ if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+ if (prio != 123)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create++;
+ return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create2++;
+ return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+ struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_ll sa = {};
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 1;
+
+ if (sk->__sk_common.skc_family != AF_PACKET)
+ return 1;
+
+ if (sk->sk_kern_sock)
+ return 1;
+
+ bpf_probe_read_kernel(&sa, sizeof(sa), address);
+ if (sa.sll_protocol)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind2++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+ called_socket_alloc++;
+ /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */
+ if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR)
+ return 1;
+
+ if (family == AF_UNIX)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ int prio = 234;
+
+ if (!newsk)
+ return 1;
+
+ /* Accepted request sockets get a different priority. */
+ if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+ if (prio != 234)
+ return 1;
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 1;
+
+ called_socket_clone++;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
new file mode 100644
index 000000000000..6cb0f161f417
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ /* Can not return any errors from void LSM hooks. */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
new file mode 100644
index 000000000000..6e7e58051e64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Huawei Technologies Co., Ltd */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("lsm/file_permission")
+int lsm_file_permission_prog(void *ctx)
+{
+ return 0;
+}
+
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_prog(void *ctx)
+{
+ return 0;
+}
+
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_entry(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c
new file mode 100644
index 000000000000..d461684728e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_excl.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} excl_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_not_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
new file mode 100644
index 000000000000..7a1336d7b16a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct inner_array_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} inner_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+ __array(values, struct inner_array_type);
+} outer_array SEC(".maps") = {
+ .values = {
+ [0] = &inner_array,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_inner_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ struct bpf_map *map;
+ int zero = 0;
+
+ if (done || (u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ map = bpf_map_lookup_elem(&outer_array, &zero);
+ if (!map)
+ return 0;
+
+ value = bpf_map_lookup_elem(map, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
new file mode 100644
index 000000000000..edaba481db9d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct pcpu_array_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_array_map SEC(".maps");
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} hash_map SEC(".maps");
+
+struct pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_hash_map SEC(".maps");
+
+struct hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_malloc_map SEC(".maps");
+
+struct pcpu_hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} pcpu_hash_malloc_map SEC(".maps");
+
+struct lru_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_hash_map SEC(".maps");
+
+struct lru_pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_pcpu_hash_map SEC(".maps");
+
+struct cgrp_ls_map {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} cgrp_ls_map SEC(".maps");
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} task_ls_map SEC(".maps");
+
+struct inode_ls_map {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} inode_ls_map SEC(".maps");
+
+struct sk_ls_map {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} sk_ls_map SEC(".maps");
+
+#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \
+ struct { \
+ __uint(type, map_type); \
+ __uint(max_entries, 1); \
+ __uint(key_size, sizeof(int)); \
+ __uint(value_size, sizeof(int)); \
+ __array(values, struct inner_map_type); \
+ } name SEC(".maps") = { \
+ .values = { [0] = &inner_map_type }, \
+ }
+
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_array_map, array_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_hash_map, array_of_pcpu_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_array_map, hash_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_hash_map, hash_of_pcpu_hash_maps);
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+
+static void test_kptr_unref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->unref_ptr;
+ /* store untrusted_ptr_or_null_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ if (!p)
+ return;
+ if (p->a + p->b > 100)
+ return;
+ /* store untrusted_ptr_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ /* store NULL */
+ WRITE_ONCE(v->unref_ptr, NULL);
+}
+
+static void test_kptr_ref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->ref_ptr;
+ /* store ptr_or_null_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ if (!p)
+ return;
+ /*
+ * p is rcu_ptr_prog_test_ref_kfunc,
+ * because bpf prog is non-sleepable and runs in RCU CS.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
+ if (p->a + p->b > 100)
+ return;
+ /* store NULL */
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return;
+ /*
+ * p is trusted_ptr_prog_test_ref_kfunc.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ /* store ptr_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ bpf_kfunc_call_test_release(p);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return;
+ /* store ptr_ */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (!p)
+ return;
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr(struct map_value *v)
+{
+ test_kptr_unref(v);
+ test_kptr_ref(v);
+}
+
+SEC("tc")
+int test_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+ TEST(pcpu_array_map);
+ TEST(pcpu_hash_map);
+
+#undef TEST
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path)
+{
+ struct map_value *v;
+
+ v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct task_struct *task;
+ struct map_value *v;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+ v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct map_value *v;
+
+ v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_sk_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+ v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_map_in_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+ void *map;
+
+#define TEST(map_in_map) \
+ map = bpf_map_lookup_elem(&map_in_map, &key); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_of_array_maps);
+ TEST(array_of_hash_maps);
+ TEST(array_of_hash_malloc_maps);
+ TEST(array_of_lru_hash_maps);
+ TEST(array_of_pcpu_array_maps);
+ TEST(array_of_pcpu_hash_maps);
+ TEST(hash_of_array_maps);
+ TEST(hash_of_hash_maps);
+ TEST(hash_of_hash_malloc_maps);
+ TEST(hash_of_lru_hash_maps);
+ TEST(hash_of_pcpu_array_maps);
+ TEST(hash_of_pcpu_hash_maps);
+
+#undef TEST
+ return 0;
+}
+
+int ref = 1;
+
+static __always_inline
+int test_map_kptr_ref_pre(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *p_st;
+ unsigned long arg = 0;
+ int ret;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+ ref++;
+
+ p_st = p->next;
+ if (p_st->cnt.refs.counter != ref) {
+ ret = 2;
+ goto end;
+ }
+
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ ret = 3;
+ goto end;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 4;
+
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return 5;
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 6;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 7;
+ ref++;
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ ret = 8;
+ goto end;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 9;
+ /* Leave in map */
+
+ return 0;
+end:
+ ref--;
+ bpf_kfunc_call_test_release(p);
+ return ret;
+}
+
+static __always_inline
+int test_map_kptr_ref_post(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *p_st;
+
+ p_st = v->ref_ptr;
+ if (!p_st || p_st->cnt.refs.counter != ref)
+ return 1;
+
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return 2;
+ if (p_st->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 3;
+ }
+
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ bpf_kfunc_call_test_release(p);
+ return 4;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 5;
+
+ return 0;
+}
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref1(struct __sk_buff *ctx)
+{
+ struct map_value *v, val = {};
+ int key = 0, ret;
+
+ bpf_map_update_elem(&hash_map, &key, &val, 0);
+ bpf_map_update_elem(&hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_hash_map, &key, &val, 0);
+
+ bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0);
+ bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0);
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, ret;
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+SEC("tc")
+int test_map_kptr_ref3(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sp);
+ if (!p)
+ return 1;
+ ref++;
+ if (p->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 2;
+ }
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ return 0;
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref1(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (v)
+ return 150;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_pre(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref2(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_post(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref_del(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ if (!v->ref_ptr)
+ return 300;
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
new file mode 100644
index 000000000000..4c0ff01f1a96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct map_value {
+ char buf[8];
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+ struct prog_test_member __kptr *ref_memb_ptr;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+SEC("?tc")
+__failure __msg("kptr access size must be BPF_DW")
+int size_not_bpf_dw(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ *(u32 *)&v->unref_ptr = 0;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access cannot have variable offset")
+int non_const_var_off(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ *(u64 *)((void *)v + id) = 0;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. kptr has to be")
+int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ bpf_kptr_xchg((void *)v + id, NULL);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=7")
+int misaligned_access_write(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ *(void **)((void *)v + 7) = NULL;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=1")
+int misaligned_access_read(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ return *(u64 *)((void *)v + 1);
+}
+
+SEC("?tc")
+__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)")
+int reject_var_off_store(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ unref_ptr += id;
+ v->unref_ptr = unref_ptr;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc")
+int reject_bad_type_match(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ unref_ptr = (void *)unref_ptr + 4;
+ v->unref_ptr = unref_ptr;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
+int marked_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_this_cpu_ptr(v->unref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4")
+int correct_btf_id_check_size(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->unref_ptr;
+ if (!p)
+ return 0;
+ return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc));
+}
+
+SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_")
+int inherit_untrusted_on_walk(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ unref_ptr = unref_ptr->next;
+ bpf_this_cpu_ptr(unref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("off=8 kptr isn't referenced kptr")
+int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_kptr_xchg(&v->unref_ptr, NULL);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_")
+int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_this_cpu_ptr(v->ref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("store to referenced kptr disallowed")
+int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->ref_ptr;
+ if (!p)
+ return 0;
+ /* Checkmate, clang */
+ *(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R2 must be referenced")
+int reject_untrusted_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->ref_ptr;
+ if (!p)
+ return 0;
+ bpf_kptr_xchg(&v->ref_ptr, p);
+ return 0;
+}
+
+SEC("?tc")
+__failure
+__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
+int reject_bad_type_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *ref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!ref_ptr)
+ return 0;
+ bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
+int reject_member_of_ref_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *ref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!ref_ptr)
+ return 0;
+ bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("kptr cannot be accessed indirectly by helper")
+int reject_indirect_helper_access(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_get_current_comm(v, sizeof(v->buf) + 1);
+ return 0;
+}
+
+__noinline
+int write_func(int *p)
+{
+ return p ? *p = 42 : 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr cannot be accessed indirectly by helper")
+int reject_indirect_global_func_access(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ return write_func((void *)v + 5);
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=4 alloc_insn=")
+int kptr_xchg_ref_state(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ bpf_kptr_xchg(&v->ref_ptr, p);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int kptr_xchg_possibly_null(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL passed to bpf_kptr_xchg() */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p)
+ bpf_kfunc_call_test_release(p);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
new file mode 100644
index 000000000000..63245785eb69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 target_id;
+
+__s64 bpf_map_sum_elem_count(const struct bpf_map *map) __ksym;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+
+ if (map && map->id == target_id)
+ BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d8850bc6a9f1..efaf622c28dd 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
__u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
#define VERIFY_TYPE(type, func) ({ \
g_map_type = type; \
@@ -102,6 +103,8 @@ struct {
__type(value, __u32);
} m_hash SEC(".maps");
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
static inline int check_hash(void)
{
struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
@@ -114,6 +117,8 @@ static inline int check_hash(void)
VERIFY(hash->elem_size == 64);
VERIFY(hash->count.counter == 0);
+ VERIFY(bpf_map_sum_elem_count(map) == 0);
+
for (i = 0; i < HALF_ENTRIES; ++i) {
const __u32 key = i;
const __u32 val = 1;
@@ -122,6 +127,7 @@ static inline int check_hash(void)
return 0;
}
VERIFY(hash->count.counter == HALF_ENTRIES);
+ VERIFY(bpf_map_sum_elem_count(map) == HALF_ENTRIES);
return 1;
}
@@ -310,7 +316,7 @@ struct lpm_trie {
} __attribute__((preserve_access_index));
struct lpm_key {
- struct bpf_lpm_trie_key trie_key;
+ struct bpf_lpm_trie_key_hdr trie_key;
__u32 data;
};
@@ -333,9 +339,11 @@ static inline int check_lpm_trie(void)
return 1;
}
+#define INNER_MAX_ENTRIES 1234
+
struct inner_map {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, INNER_MAX_ENTRIES);
__type(key, __u32);
__type(value, __u32);
} inner_map SEC(".maps");
@@ -347,7 +355,7 @@ struct {
__type(value, __u32);
__array(values, struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, INNER_MAX_ENTRIES);
__type(key, __u32);
__type(value, __u32);
});
@@ -359,8 +367,13 @@ static inline int check_array_of_maps(void)
{
struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+ struct bpf_array *inner_map;
+ int key = 0;
VERIFY(check_default(&array_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(array_of_maps, &key);
+ VERIFY(inner_map != NULL);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
}
@@ -381,8 +394,13 @@ static inline int check_hash_of_maps(void)
{
struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+ struct bpf_htab *inner_map;
+ int key = 2;
VERIFY(check_default(&hash_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
+ VERIFY(inner_map != NULL);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
return 1;
}
@@ -635,7 +653,6 @@ struct bpf_ringbuf_map {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} m_ringbuf SEC(".maps");
static inline int check_ringbuf(void)
@@ -643,7 +660,7 @@ static inline int check_ringbuf(void)
struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
- VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+ VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
return 1;
}
@@ -683,5 +700,4 @@ int cg_skb(void *ctx)
return 1;
}
-__u32 _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..3b984b6ae7c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
+int btf_id_to_ptr_mem(void *ctx)
+{
+ struct task_struct *task;
+ struct nameidata *idata;
+ u64 ret, off;
+
+ task = bpf_get_current_task_btf();
+ idata = task->nameidata;
+ off = bpf_core_field_offset(struct nameidata, pathname);
+ /*
+ * asm block to have reliable match target for __msg, equivalent of:
+ * ret = task->nameidata->pathname[0];
+ */
+ asm volatile (
+ "r7 = %[idata];"
+ "r7 += %[off];"
+ "r8 = *(u64 *)(r7 + 0);"
+ "r9 = *(u8 *)(r8 + 0);"
+ "%[ret] = r9;"
+ : [ret]"=r"(ret)
+ : [idata]"r"(idata),
+ [off]"r"(off)
+ : "r7", "r8", "r9");
+ return ret;
+}
+
+SEC("socket")
+__success
+__retval(0)
+int ldx_is_ok_bad_addr(void *ctx)
+{
+ char *p;
+
+ if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
+ return 42;
+
+ p = bpf_rdonly_cast(0, 0);
+ return p[0x7fff];
+}
+
+SEC("socket")
+__success
+__retval(1)
+int ldx_is_ok_good_addr(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return *p;
+}
+
+SEC("socket")
+__success
+int offset_not_tracked(void *ctx)
+{
+ int *p, i, s;
+
+ p = bpf_rdonly_cast(0, 0);
+ s = 0;
+ bpf_for(i, 0, 1000 * 1000 * 1000) {
+ p++;
+ s += *p;
+ }
+ return s;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int stx_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ *p = 1;
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ __sync_fetch_and_add(p, 1);
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_rmw_not_ok(void *ctx)
+{
+ long v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return __sync_val_compare_and_swap(p, 0, 42);
+}
+
+SEC("socket")
+__failure
+__msg("invalid access to memory, mem_size=0 off=0 size=4")
+__msg("R1 min value is outside of the allowed memory range")
+int kfunc_param_not_ok(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ bpf_kfunc_trusted_num_test(p);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure
+__msg("R1 type=rdonly_untrusted_mem expected=")
+int helper_param_not_ok(void *ctx)
+{
+ char *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ /*
+ * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
+ * the most permissive constraint
+ */
+ bpf_copy_from_user(p, 0, (void *)42);
+ return 0;
+}
+
+static __noinline u64 *get_some_addr(void)
+{
+ if (bpf_get_prandom_u32())
+ return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
+ else
+ return bpf_rdonly_cast(0, 0);
+}
+
+SEC("socket")
+__success
+__retval(0)
+int mixed_mem_type(void *ctx)
+{
+ u64 *p;
+
+ /* Try to avoid compiler hoisting load to if branches by using __noinline func. */
+ p = get_some_addr();
+ return *p;
+}
+
+__attribute__((__aligned__(8)))
+u8 global[] = {
+ 0x11, 0x22, 0x33, 0x44,
+ 0x55, 0x66, 0x77, 0x88,
+ 0x99
+};
+
+__always_inline
+static u64 combine(void *p)
+{
+ u64 acc;
+
+ acc = 0;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ acc |= (*(u64 *)p >> 56) << 24;
+ acc |= (*(u32 *)p >> 24) << 16;
+ acc |= (*(u16 *)p >> 8) << 8;
+ acc |= *(u8 *)p;
+#else
+ acc |= (*(u64 *)p & 0xff) << 24;
+ acc |= (*(u32 *)p & 0xff) << 16;
+ acc |= (*(u16 *)p & 0xff) << 8;
+ acc |= *(u8 *)p;
+#endif
+ return acc;
+}
+
+SEC("socket")
+__retval(0x88442211)
+int diff_size_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0));
+}
+
+SEC("socket")
+__retval(0x99553322)
+int misaligned_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0) + 1);
+}
+
+__weak int return_one(void)
+{
+ return 1;
+}
+
+SEC("socket")
+__success
+__retval(1)
+int null_check(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ if (p == 0)
+ /* make this a function call to avoid compiler
+ * moving r0 assignment before check.
+ */
+ return return_one();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
new file mode 100644
index 000000000000..51a4fe64c917
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
new file mode 100644
index 000000000000..29c18d869ec1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_kfunc_common_test")
+int test5(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.session/bpf_kfunc_common_test")
+int test6(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
new file mode 100644
index 000000000000..762385f827c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_printk("test");
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mmap_inner_array.c b/tools/testing/selftests/bpf/progs/mmap_inner_array.c
new file mode 100644
index 000000000000..90aacbc2938a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mmap_inner_array.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_array_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 1);
+} inner_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+ __array(values, struct inner_array_type);
+} outer_map SEC(".maps");
+
+int pid = 0;
+__u64 match_value = 0x13572468;
+bool done = false;
+bool pid_match = false;
+bool outer_map_match = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_inner_array(void *ctx)
+{
+ __u32 curr_pid, zero = 0;
+ struct bpf_map *map;
+ __u64 *value;
+
+ curr_pid = (u32)bpf_get_current_pid_tgid();
+ if (done || curr_pid != pid)
+ return 0;
+
+ pid_match = true;
+ map = bpf_map_lookup_elem(&outer_map, &curr_pid);
+ if (!map)
+ return 0;
+
+ outer_map_match = true;
+ value = bpf_map_lookup_elem(map, &zero);
+ if (!value)
+ return 0;
+
+ *value = match_value;
+ done = true;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
index 8b7466a15c6b..3376d4849f58 100644
--- a/tools/testing/selftests/bpf/progs/modify_return.c
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -47,3 +47,43 @@ int BPF_PROG(fexit_test, int a, __u64 b, int ret)
return 0;
}
+
+static int sequence2;
+
+__u64 fentry_result2 = 0;
+SEC("fentry/bpf_modify_return_test2")
+int BPF_PROG(fentry_test2, int a, int *b, short c, int d, void *e, char f,
+ int g)
+{
+ sequence2++;
+ fentry_result2 = (sequence2 == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result2 = 0;
+SEC("fmod_ret/bpf_modify_return_test2")
+int BPF_PROG(fmod_ret_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result2 = (sequence2 == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result2 = 0;
+SEC("fexit/bpf_modify_return_test2")
+int BPF_PROG(fexit_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result2 = (sequence2 == 3 && ret == input_retval);
+ else
+ fexit_result2 = (sequence2 == 3 && ret == 29);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
new file mode 100644
index 000000000000..3b188ccdcc40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __MPTCP_BPF_H__
+#define __MPTCP_BPF_H__
+
+#include "bpf_experimental.h"
+
+/* list helpers from include/linux/list.h */
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+/* small difference: 'can_loop' has been added in the conditions */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ !list_entry_is_head(pos, head, member) && can_loop; \
+ pos = list_next_entry(pos, member))
+
+/* mptcp helpers from protocol.h */
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
+static __always_inline struct sock *
+mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
+{
+ return subflow->tcp_sock;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
new file mode 100644
index 000000000000..f3acb90588c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+__u32 token = 0;
+
+struct mptcp_storage {
+ __u32 invoked;
+ __u32 is_mptcp;
+ struct sock *sk;
+ __u32 token;
+ struct sock *first;
+ char ca_name[TCP_CA_NAME_MAX];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct mptcp_storage);
+} socket_storage_map SEC(".maps");
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct mptcp_storage *storage;
+ struct mptcp_sock *msk;
+ int op = (int)ctx->op;
+ struct tcp_sock *tsk;
+ struct bpf_sock *sk;
+ bool is_mptcp;
+
+ if (op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tsk = bpf_skc_to_tcp_sock(sk);
+ if (!tsk)
+ return 1;
+
+ is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0;
+ if (!is_mptcp) {
+ storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ storage->token = 0;
+ __builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX);
+ storage->first = NULL;
+ } else {
+ msk = bpf_skc_to_mptcp_sock(sk);
+ if (!msk)
+ return 1;
+
+ storage = bpf_sk_storage_get(&socket_storage_map, msk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ storage->token = msk->token;
+ __builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX);
+ storage->first = msk->first;
+ }
+ storage->invoked++;
+ storage->is_mptcp = is_mptcp;
+ storage->sk = (struct sock *)sk;
+
+ return 1;
+}
+
+SEC("fentry/mptcp_pm_new_connection")
+int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk,
+ const struct sock *ssk, int server_side)
+{
+ if (!server_side)
+ token = msk->token;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
new file mode 100644
index 000000000000..d4eef0cbadb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+int sk_index;
+int redirect_idx;
+int trace_port;
+int helper_ret;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 100);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *sk;
+
+ /* only accept specified connection */
+ if (skops->local_port != trace_port ||
+ skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
+ return 1;
+
+ sk = skops->sk;
+ if (!sk)
+ return 1;
+
+ /* update sk handler */
+ helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
+
+ return 1;
+}
+
+SEC("sk_skb/stream_verdict")
+int mptcp_sockmap_redirect(struct __sk_buff *skb)
+{
+ /* redirect skb to the sk under sock_map[redirect_idx] */
+ return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
new file mode 100644
index 000000000000..41389e579578
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2024, Kylin Software */
+
+/* vmlinux.h, bpf_helpers.h and other 'define' */
+#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
+
+char _license[] SEC("license") = "GPL";
+
+char cc[TCP_CA_NAME_MAX] = "reno";
+int pid;
+
+/* Associate a subflow counter to each token */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 100);
+} mptcp_sf SEC(".maps");
+
+SEC("sockops")
+int mptcp_subflow(struct bpf_sock_ops *skops)
+{
+ __u32 init = 1, key, mark, *cnt;
+ struct mptcp_sock *msk;
+ struct bpf_sock *sk;
+ int err;
+
+ if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ sk = skops->sk;
+ if (!sk)
+ return 1;
+
+ msk = bpf_skc_to_mptcp_sock(sk);
+ if (!msk)
+ return 1;
+
+ key = msk->token;
+ cnt = bpf_map_lookup_elem(&mptcp_sf, &key);
+ if (cnt) {
+ /* A new subflow is added to an existing MPTCP connection */
+ __sync_fetch_and_add(cnt, 1);
+ mark = *cnt;
+ } else {
+ /* A new MPTCP connection is just initiated and this is its primary subflow */
+ bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY);
+ mark = init;
+ }
+
+ /* Set the mark of the subflow's socket based on appearance order */
+ err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+ if (err < 0)
+ return 1;
+ if (mark == 2)
+ err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX);
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ struct mptcp_sock *msk;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ if (!sk || sk->protocol != IPPROTO_MPTCP ||
+ (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
+ !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
+ return 1;
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.extra_subflows != 1) {
+ ctx->retval = -1;
+ return 1;
+ }
+
+ if (ctx->optname == SO_MARK)
+ return _check_getsockopt_subflow_mark(msk, ctx);
+ return _check_getsockopt_subflow_cc(msk, ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c
new file mode 100644
index 000000000000..cbdc730c3a47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcpify.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023, SUSE. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+int pid;
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(mptcpify, int family, int type, int protocol)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return protocol;
+
+ if ((family == AF_INET || family == AF_INET6) &&
+ type == SOCK_STREAM &&
+ (!protocol || protocol == IPPROTO_TCP)) {
+ return IPPROTO_MPTCP;
+ }
+
+ return protocol;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_acquire.c b/tools/testing/selftests/bpf/progs/nested_acquire.c
new file mode 100644
index 000000000000..49ad7b9adf56
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_acquire.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tp_btf/tcp_probe")
+__success
+int BPF_PROG(test_nested_acquire_nonzero, struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff *ptr;
+
+ ptr = bpf_kfunc_nested_acquire_nonzero_offset_test(&sk->sk_write_queue);
+
+ bpf_kfunc_nested_release_test(ptr);
+ return 0;
+}
+
+SEC("tp_btf/tcp_probe")
+__success
+int BPF_PROG(test_nested_acquire_zero, struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff *ptr;
+
+ ptr = bpf_kfunc_nested_acquire_zero_offset_test(&sk->__sk_common);
+
+ bpf_kfunc_nested_release_test(ptr);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h
new file mode 100644
index 000000000000..1784b496be2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _NESTED_TRUST_COMMON_H
+#define _NESTED_TRUST_COMMON_H
+
+#include <stdbool.h>
+
+bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
+__u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+
+#endif /* _NESTED_TRUST_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
new file mode 100644
index 000000000000..3568ec450100
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be")
+int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_test_cpu(0, task->user_cpus_ptr);
+ return 0;
+}
+
+/* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */
+SEC("tp_btf/tcp_probe")
+__failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+int BPF_PROG(test_invalid_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->next, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c
new file mode 100644
index 000000000000..2b66953ca82e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_test_cpu(0, task->cpus_ptr);
+ return 0;
+}
+
+SEC("tp_btf/tcp_probe")
+__success
+int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_nested_offset, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_first_zero(&task->cpus_mask);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c
new file mode 100644
index 000000000000..b4c2f0f2be11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/net_timestamping.c
@@ -0,0 +1,248 @@
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include <errno.h>
+
+__u32 monitored_pid = 0;
+
+int nr_active;
+int nr_snd;
+int nr_passive;
+int nr_sched;
+int nr_txsw;
+int nr_ack;
+
+struct sk_stg {
+ __u64 sendmsg_ns; /* record ts when sendmsg is called */
+};
+
+struct sk_tskey {
+ u64 cookie;
+ u32 tskey;
+};
+
+struct delay_info {
+ u64 sendmsg_ns; /* record ts when sendmsg is called */
+ u32 sched_delay; /* SCHED_CB - sendmsg_ns */
+ u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
+ u32 ack_delay; /* ACK_CB - SND_SW_CB */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sk_tskey);
+ __type(value, struct delay_info);
+ __uint(max_entries, 1024);
+} time_map SEC(".maps");
+
+static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
+
+extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
+
+static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
+{
+ int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
+ int opt = SK_BPF_CB_FLAGS;
+ int level = SOL_SOCKET;
+
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
+ return 1;
+
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
+ (!expected && tmp != new))
+ return 1;
+
+ return 0;
+}
+
+static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
+{
+ if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
+ return true;
+ return false;
+}
+
+static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
+{
+ u8 opt[3] = {0};
+ int load_flags = 0;
+ int ret;
+
+ ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
+{
+ int ret;
+
+ ret = bpf_sock_ops_cb_flags_set(skops, 0);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+/* In the timestamping callbacks, we're not allowed to call the following
+ * BPF CALLs for the safety concern. Return false if expected.
+ */
+static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
+ const struct sock *sk)
+{
+ if (bpf_test_access_sockopt(skops, sk))
+ return true;
+
+ if (bpf_test_access_load_hdr_opt(skops))
+ return true;
+
+ if (bpf_test_access_cb_flags_set(skops))
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
+{
+ struct bpf_sock_ops_kern *skops_kern;
+ u64 timestamp = bpf_ktime_get_ns();
+ struct skb_shared_info *shinfo;
+ struct delay_info dinfo = {0};
+ struct sk_tskey key = {0};
+ struct delay_info *val;
+ struct sk_buff *skb;
+ struct sk_stg *stg;
+ u64 prior_ts, delay;
+
+ if (bpf_test_access_bpf_calls(skops, sk))
+ return false;
+
+ skops_kern = bpf_cast_to_kern_ctx(skops);
+ skb = skops_kern->skb;
+ shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
+
+ key.cookie = bpf_get_socket_cookie(skops);
+ if (!key.cookie)
+ return false;
+
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
+ stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
+ if (!stg)
+ return false;
+ dinfo.sendmsg_ns = stg->sendmsg_ns;
+ bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+ bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
+ return true;
+ }
+
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+
+ val = bpf_map_lookup_elem(&time_map, &key);
+ if (!val)
+ return false;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ val->sched_delay = timestamp - val->sendmsg_ns;
+ delay = val->sched_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ prior_ts = val->sched_delay + val->sendmsg_ns;
+ val->snd_sw_delay = timestamp - prior_ts;
+ delay = val->snd_sw_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
+ val->ack_delay = timestamp - prior_ts;
+ delay = val->ack_delay;
+ break;
+ }
+
+ if (delay >= delay_tolerance_nsec)
+ return false;
+
+ /* Since it's the last one, remove from the map after latency check */
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
+ bpf_map_delete_elem(&time_map, &key);
+
+ return true;
+}
+
+SEC("fentry/tcp_sendmsg_locked")
+int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
+ size_t size)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ u64 timestamp = bpf_ktime_get_ns();
+ u32 flag = sk->sk_bpf_cb_flags;
+ struct sk_stg *stg;
+
+ if (pid != monitored_pid || !flag)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!stg)
+ return 0;
+
+ stg->sendmsg_ns = timestamp;
+ nr_snd += 1;
+ return 0;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *bpf_sk = skops->sk;
+ const struct sock *sk;
+
+ if (!bpf_sk)
+ return 1;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+ if (!sk)
+ return 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ nr_active += !bpf_test_sockopt(skops, sk, 0);
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_snd += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_sched += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_txsw += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_ack += 1;
+ break;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index d071adf178bd..f9ef8aee56f1 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,20 @@
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
- char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union percpu_net_cnt *percpu_cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
@@ -68,4 +67,3 @@ int bpf_nextcnt(struct __sk_buff *skb)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index 6b670039ea67..9e067dcbf607 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
#include <errno.h>
@@ -16,8 +17,11 @@ bool skip = false;
#define STRSIZE 2048
#define EXPECTED_STRSIZE 256
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#if defined(bpf_target_s390)
+/* NULL points to a readable struct lowcore on s390, so take the last page */
+#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL)
+#else
+#define BADPTR 0
#endif
struct {
@@ -46,7 +50,6 @@ static int __strncmp(const void *m1, const void *m2, size_t len)
do { \
static const char _expectedval[EXPECTED_STRSIZE] = \
_expected; \
- static const char _ptrtype[64] = #_type; \
__u64 _hflags = _flags | BTF_F_COMPACT; \
static _type _ptrdata = __VA_ARGS__; \
static struct btf_ptr _ptr = { }; \
@@ -113,11 +116,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
}
/* Check invalid ptr value */
- p.ptr = 0;
+ p.ptr = BADPTR;
__ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
if (__ret >= 0) {
- bpf_printk("printing NULL should generate error, got (%d)",
- __ret);
+ bpf_printk("printing %llx should generate error, got (%d)",
+ (unsigned long long)BADPTR, __ret);
ret = -ERANGE;
}
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..94040714af18
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+int tcx_init_netns_cookie, tcx_netns_cookie;
+int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie;
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+SEC("tcx/ingress")
+int get_netns_cookie_tcx(struct __sk_buff *skb)
+{
+ tcx_init_netns_cookie = bpf_get_netns_cookie(NULL);
+ tcx_netns_cookie = bpf_get_netns_cookie(skb);
+ return TCX_PASS;
+}
+
+SEC("cgroup_skb/ingress")
+int get_netns_cookie_cgroup_skb(struct __sk_buff *skb)
+{
+ cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL);
+ cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c
new file mode 100644
index 000000000000..a45c9299552c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ int zero = 0;
+
+ if (done || (int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ value = bpf_map_lookup_elem(&array, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
new file mode 100644
index 000000000000..37c2d2608ec0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -0,0 +1,190 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Update percpu data */
+SEC("?fentry/bpf_fentry_test2")
+int BPF_PROG(test_array_map_2)
+{
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data */
+SEC("?fentry/bpf_fentry_test3")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+/* Explicitly free allocated percpu data */
+SEC("?fentry/bpf_fentry_test4")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ /* delete */
+ p = bpf_kptr_xchg(&e->pc, NULL);
+ if (p) {
+ bpf_percpu_obj_drop(p);
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+int BPF_PROG(test_array_map_10)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ bpf_rcu_read_lock();
+ p = e->pc;
+ if (!p) {
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ goto out;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1) {
+ /* race condition */
+ bpf_percpu_obj_drop(p1);
+ }
+ }
+
+ v = bpf_this_cpu_ptr(p);
+ v->c = 3;
+ v = bpf_this_cpu_ptr(p);
+ v->c = 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ goto out;
+ v->c = 1;
+ v->d = 2;
+
+ /* delete */
+ p1 = bpf_kptr_xchg(&e->pc, NULL);
+ if (!p1)
+ goto out;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ /* finally release p */
+ bpf_percpu_obj_drop(p1);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
new file mode 100644
index 000000000000..a2acf9aa6c24
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
@@ -0,0 +1,109 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct elem);
+} cgrp SEC(".maps");
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_cgrp_local_storage_1)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Percpu data collection */
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test_cgrp_local_storage_2)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data collection */
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test_cgrp_local_storage_3)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int i;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
new file mode 100644
index 000000000000..f2b8eb2ff76f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -0,0 +1,182 @@
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct val2_t {
+ long b;
+};
+
+struct val_with_ptr_t {
+ char *p;
+};
+
+struct val_with_rb_root_t {
+ struct bpf_spin_lock lock;
+};
+
+struct val_600b_t {
+ char b[600];
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+long ret;
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("store to referenced kptr disallowed")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ e->pc = (struct val_t __percpu_kptr *)ret;
+ return 0;
+}
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("invalid kptr access, R2 type=percpu_ptr_val2_t expected=ptr_val_t")
+int BPF_PROG(test_array_map_2)
+{
+ struct val2_t __percpu_kptr *p2;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p2 = bpf_percpu_obj_new(struct val2_t);
+ if (!p2)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p2);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("R1 type=scalar expected=percpu_ptr_, percpu_rcu_ptr_, percpu_trusted_ptr_")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1)
+ bpf_percpu_obj_drop(p1);
+
+ v = bpf_this_cpu_ptr(p);
+ ret = v->b;
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+int BPF_PROG(test_array_map_5)
+{
+ struct val_t *p;
+
+ p = bpf_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must be of a struct of scalars")
+int BPF_PROG(test_array_map_6)
+{
+ struct val_with_ptr_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_ptr_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must not contain special fields")
+int BPF_PROG(test_array_map_7)
+{
+ struct val_with_rb_root_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_rb_root_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512")
+int BPF_PROG(test_array_map_8)
+{
+ struct val_600b_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_600b_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index 25467d13c356..f793280a3238 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -11,8 +11,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 16384);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(stack_trace_t));
+ __type(key, __u32);
+ __type(value, stack_trace_t);
} stackmap SEC(".maps");
struct {
@@ -35,10 +35,10 @@ int oncpu(void *ctx)
long val;
val = bpf_get_stackid(ctx, &stackmap, 0);
- if (val > 0)
+ if (val >= 0)
stackid_kernel = 2;
val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
- if (val > 0)
+ if (val >= 0)
stackid_user = 2;
trace = bpf_map_lookup_elem(&stackdata_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index e5ab4836a641..29c1639fc78a 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -18,10 +19,9 @@ const volatile int batch_cnt = 0;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_perfbuf(void *ctx)
{
- __u64 *sample;
int i;
for (i = 0; i < batch_cnt; i++) {
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
new file mode 100644
index 000000000000..7d04254e61f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_1(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_3(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_enable();
+ bpf_preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_disable(void)
+{
+ bpf_preempt_disable();
+}
+
+static __noinline void preempt_enable(void)
+{
+ bpf_preempt_enable();
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
+int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_balance_subprog(void)
+{
+ preempt_disable();
+ preempt_enable();
+}
+
+SEC("?tc")
+__success int preempt_balance(struct __sk_buff *ctx)
+{
+ bpf_guard_preempt();
+ return 0;
+}
+
+SEC("?tc")
+__success int preempt_balance_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int preempt_sleepable_helper(void *ctx)
+{
+ u32 data;
+
+ bpf_preempt_disable();
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ bpf_preempt_enable();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region")
+int preempt_sleepable_kfunc(void *ctx)
+{
+ u32 data;
+
+ bpf_preempt_disable();
+ bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+ bpf_preempt_enable();
+ return 0;
+}
+
+int __noinline preempt_global_subprog(void)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?tc")
+__success
+int preempt_global_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_global_subprog();
+ preempt_enable();
+ return 0;
+}
+
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_helper_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_kfunc_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_subprog_calling_sleepable_global(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..55907ef961bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+
+struct bin_data {
+ char data[256];
+ struct bpf_spin_lock lock;
+};
+
+struct map_value {
+ struct bin_data __kptr * data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2048);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+bool nomem_err = false;
+
+static int del_array(unsigned int i, int *from)
+{
+ struct map_value *value;
+ struct bin_data *old;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static int add_array(unsigned int i, int *from)
+{
+ struct bin_data *old, *new;
+ struct map_value *value;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new) {
+ nomem_err = true;
+ return 1;
+ }
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static void del_then_add_array(int from)
+{
+ int i;
+
+ i = from;
+ bpf_loop(512, del_array, &i, 0);
+
+ i = from;
+ bpf_loop(512, add_array, &i, 0);
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test0, int, a)
+{
+ del_then_add_array(0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test1, int, a, u64, b)
+{
+ del_then_add_array(512);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test2, char, a, int, b, u64, c)
+{
+ del_then_add_array(1024);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test3, void *, a, char, b, int, c, u64, d)
+{
+ del_then_add_array(1536);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
new file mode 100644
index 000000000000..cfc1f48e0d28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int program(struct __sk_buff *skb)
+{
+ err = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
new file mode 100644
index 000000000000..ccf1b04010ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("freplace/xdp_prog1")
+int new_xdp_prog2(struct xdp_md *xd)
+{
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, 1);
+ __type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..725e29595079
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("xdp")
+int xdp_prog1(struct xdp_md *xdp)
+{
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue.c b/tools/testing/selftests/bpf/progs/pro_epilogue.c
new file mode 100644
index 000000000000..d97d6e07ef5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+ bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r6 = *(u64 *)(r1 +0)")
+__xlated("1: r7 = *(u64 *)(r6 +0)")
+__xlated("2: r7 += 1000")
+__xlated("3: *(u64 *)(r6 +0) = r7")
+/* main prog */
+__xlated("4: r1 = *(u64 *)(r1 +0)")
+__xlated("5: r6 = r1")
+__xlated("6: call kernel-function")
+__xlated("7: r1 = r6")
+__xlated("8: call pc+1")
+__xlated("9: exit")
+SEC("struct_ops/test_prologue")
+__naked int test_prologue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+__success
+/* save __u64 *ctx to stack */
+__xlated("0: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("1: r1 = *(u64 *)(r1 +0)")
+__xlated("2: r6 = r1")
+__xlated("3: call kernel-function")
+__xlated("4: r1 = r6")
+__xlated("5: call pc+")
+/* epilogue */
+__xlated("6: r1 = *(u64 *)(r10 -8)")
+__xlated("7: r1 = *(u64 *)(r1 +0)")
+__xlated("8: r6 = *(u64 *)(r1 +0)")
+__xlated("9: r6 += 10000")
+__xlated("10: *(u64 *)(r1 +0) = r6")
+__xlated("11: r0 = r6")
+__xlated("12: r0 *= 2")
+__xlated("13: exit")
+SEC("struct_ops/test_epilogue")
+__naked int test_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+__success
+/* prologue */
+__xlated("0: r6 = *(u64 *)(r1 +0)")
+__xlated("1: r7 = *(u64 *)(r6 +0)")
+__xlated("2: r7 += 1000")
+__xlated("3: *(u64 *)(r6 +0) = r7")
+/* save __u64 *ctx to stack */
+__xlated("4: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("5: r1 = *(u64 *)(r1 +0)")
+__xlated("6: r6 = r1")
+__xlated("7: call kernel-function")
+__xlated("8: r1 = r6")
+__xlated("9: call pc+")
+/* epilogue */
+__xlated("10: r1 = *(u64 *)(r10 -8)")
+__xlated("11: r1 = *(u64 *)(r1 +0)")
+__xlated("12: r6 = *(u64 *)(r1 +0)")
+__xlated("13: r6 += 10000")
+__xlated("14: *(u64 *)(r1 +0) = r6")
+__xlated("15: r0 = r6")
+__xlated("16: r0 *= 2")
+__xlated("17: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_pro_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+SEC("syscall")
+__retval(1011) /* PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] */
+int syscall_prologue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_prologue(&args);
+}
+
+SEC("syscall")
+__retval(20022) /* (KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_epilogue(&args);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue = {
+ .test_prologue = (void *)test_prologue,
+ .test_epilogue = (void *)test_epilogue,
+ .test_pro_epilogue = (void *)test_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
new file mode 100644
index 000000000000..6048d79be48b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__success
+/* prologue */
+__xlated("0: r6 = *(u64 *)(r1 +0)")
+__xlated("1: r7 = *(u64 *)(r6 +0)")
+__xlated("2: r7 += 1000")
+__xlated("3: *(u64 *)(r6 +0) = r7")
+/* main prog */
+__xlated("4: if r1 == 0x0 goto pc+5")
+__xlated("5: if r1 == 0x1 goto pc+2")
+__xlated("6: r1 = 1")
+__xlated("7: goto pc-3")
+__xlated("8: r1 = 0")
+__xlated("9: goto pc-6")
+__xlated("10: r0 = 0")
+__xlated("11: exit")
+SEC("struct_ops/test_prologue_goto_start")
+__naked int test_prologue_goto_start(void)
+{
+ asm volatile (
+ "if r1 == 0 goto +5;"
+ "if r1 == 1 goto +2;"
+ "r1 = 1;"
+ "goto -3;"
+ "r1 = 0;"
+ "goto -6;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+__success
+/* save __u64 *ctx to stack */
+__xlated("0: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("1: if r1 == 0x0 goto pc+5")
+__xlated("2: if r1 == 0x1 goto pc+2")
+__xlated("3: r1 = 1")
+__xlated("4: goto pc-3")
+__xlated("5: r1 = 0")
+__xlated("6: goto pc-6")
+__xlated("7: r0 = 0")
+/* epilogue */
+__xlated("8: r1 = *(u64 *)(r10 -8)")
+__xlated("9: r1 = *(u64 *)(r1 +0)")
+__xlated("10: r6 = *(u64 *)(r1 +0)")
+__xlated("11: r6 += 10000")
+__xlated("12: *(u64 *)(r1 +0) = r6")
+__xlated("13: r0 = r6")
+__xlated("14: r0 *= 2")
+__xlated("15: exit")
+SEC("struct_ops/test_epilogue_goto_start")
+__naked int test_epilogue_goto_start(void)
+{
+ asm volatile (
+ "if r1 == 0 goto +5;"
+ "if r1 == 1 goto +2;"
+ "r1 = 1;"
+ "goto -3;"
+ "r1 = 0;"
+ "goto -6;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+__success
+/* prologue */
+__xlated("0: r6 = *(u64 *)(r1 +0)")
+__xlated("1: r7 = *(u64 *)(r6 +0)")
+__xlated("2: r7 += 1000")
+__xlated("3: *(u64 *)(r6 +0) = r7")
+/* save __u64 *ctx to stack */
+__xlated("4: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("5: if r1 == 0x0 goto pc+5")
+__xlated("6: if r1 == 0x1 goto pc+2")
+__xlated("7: r1 = 1")
+__xlated("8: goto pc-3")
+__xlated("9: r1 = 0")
+__xlated("10: goto pc-6")
+__xlated("11: r0 = 0")
+/* epilogue */
+__xlated("12: r1 = *(u64 *)(r10 -8)")
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = *(u64 *)(r1 +0)")
+__xlated("15: r6 += 10000")
+__xlated("16: *(u64 *)(r1 +0) = r6")
+__xlated("17: r0 = r6")
+__xlated("18: r0 *= 2")
+__xlated("19: exit")
+SEC("struct_ops/test_pro_epilogue_goto_start")
+__naked int test_pro_epilogue_goto_start(void)
+{
+ asm volatile (
+ "if r1 == 0 goto +5;"
+ "if r1 == 1 goto +2;"
+ "r1 = 1;"
+ "goto -3;"
+ "r1 = 0;"
+ "goto -6;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops epilogue_goto_start = {
+ .test_prologue = (void *)test_prologue_goto_start,
+ .test_epilogue = (void *)test_epilogue_goto_start,
+ .test_pro_epilogue = (void *)test_pro_epilogue_goto_start,
+};
+
+SEC("syscall")
+__retval(0)
+int syscall_prologue_goto_start(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_prologue(&args);
+}
+
+SEC("syscall")
+__retval(20000) /* (EPILOGUE_A [10000]) * 2 */
+int syscall_epilogue_goto_start(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_epilogue(&args);
+}
+
+SEC("syscall")
+__retval(22000) /* (PROLOGUE_A [1000] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue_goto_start(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
new file mode 100644
index 000000000000..a5a8f08ac8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+ bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r8 = r1")
+__xlated("1: r1 = 0")
+__xlated("2: call kernel-function")
+__xlated("3: if r0 != 0x0 goto pc+5")
+__xlated("4: r6 = *(u64 *)(r8 +0)")
+__xlated("5: r7 = *(u64 *)(r6 +0)")
+__xlated("6: r7 += 1000")
+__xlated("7: *(u64 *)(r6 +0) = r7")
+__xlated("8: goto pc+2")
+__xlated("9: r1 = r0")
+__xlated("10: call kernel-function")
+__xlated("11: r1 = r8")
+/* save __u64 *ctx to stack */
+__xlated("12: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = r1")
+__xlated("15: call kernel-function")
+__xlated("16: r1 = r6")
+__xlated("17: call pc+")
+/* epilogue */
+__xlated("18: r1 = 0")
+__xlated("19: r6 = 0")
+__xlated("20: call kernel-function")
+__xlated("21: if r0 != 0x0 goto pc+6")
+__xlated("22: r1 = *(u64 *)(r10 -8)")
+__xlated("23: r1 = *(u64 *)(r1 +0)")
+__xlated("24: r6 = *(u64 *)(r1 +0)")
+__xlated("25: r6 += 10000")
+__xlated("26: *(u64 *)(r1 +0) = r6")
+__xlated("27: goto pc+2")
+__xlated("28: r1 = r0")
+__xlated("29: call kernel-function")
+__xlated("30: r0 = r6")
+__xlated("31: r0 *= 2")
+__xlated("32: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_kfunc_pro_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue_with_kfunc = {
+ .test_pro_epilogue = (void *)test_kfunc_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 4896fdf816f7..813143b4985d 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -6,6 +6,10 @@
#include <bpf/bpf_tracing.h>
#include "profiler.h"
+#include "err.h"
+#include "bpf_experimental.h"
+#include "bpf_compiler.h"
+#include "bpf_misc.h"
#ifndef NULL
#define NULL 0
@@ -16,7 +20,6 @@
#define O_DIRECTORY 00200000
#define __O_TMPFILE 020000000
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define MAX_ERRNO 4095
#define S_IFMT 00170000
#define S_IFSOCK 0140000
#define S_IFLNK 0120000
@@ -34,7 +37,6 @@
#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
-#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
#define KILL_DATA_ARRAY_SIZE 8
@@ -132,10 +134,6 @@ struct {
__uint(max_entries, 16);
} disallowed_exec_inodes SEC(".maps");
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
-#endif
-
static INLINE bool IS_ERR(const void* ptr)
{
return IS_ERR_VALUE((unsigned long)ptr);
@@ -156,10 +154,10 @@ probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
{
len = len < max ? len : max;
if (len > 1) {
- if (bpf_probe_read(dst, len, src))
+ if (bpf_probe_read_kernel(dst, len, src))
return 0;
} else if (len == 1) {
- if (bpf_probe_read(dst, 1, src))
+ if (bpf_probe_read_kernel(dst, 1, src))
return 0;
}
return len;
@@ -169,7 +167,7 @@ static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
int spid)
{
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == spid)
@@ -185,7 +183,7 @@ static INLINE void populate_ancestors(struct task_struct* task,
ancestors_data->num_ancestors = 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
parent = BPF_CORE_READ(parent, real_parent);
@@ -212,20 +210,20 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
size_t filepart_length;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
filepart_length =
- bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(cgroup_node, name));
if (!cgroup_node)
return payload;
if (cgroup_node == cgroup_root_node)
*root_pos = payload - payload_start;
- if (filepart_length <= MAX_PATH) {
- barrier_var(filepart_length);
+ if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
payload += filepart_length;
}
- cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ cgroup_node = BPF_CORE_READ(cgroup_node, __parent);
}
return payload;
}
@@ -261,7 +259,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
pids_cgrp_id___local);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys_state* subsys =
@@ -303,19 +301,17 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
cgroup_data->cgroup_full_length = 0;
size_t cgroup_root_length =
- bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
- barrier_var(cgroup_root_length);
- if (cgroup_root_length <= MAX_PATH) {
- barrier_var(cgroup_root_length);
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(root_kernfs, name));
+ if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
cgroup_data->cgroup_root_length = cgroup_root_length;
payload += cgroup_root_length;
}
size_t cgroup_proc_length =
- bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
- barrier_var(cgroup_proc_length);
- if (cgroup_proc_length <= MAX_PATH) {
- barrier_var(cgroup_proc_length);
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(proc_kernfs, name));
+ if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
cgroup_data->cgroup_proc_length = cgroup_proc_length;
payload += cgroup_proc_length;
}
@@ -345,9 +341,7 @@ static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
metadata->comm_length = 0;
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
- barrier_var(comm_length);
- if (comm_length <= TASK_COMM_LEN) {
- barrier_var(comm_length);
+ if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
metadata->comm_length = comm_length;
payload += comm_length;
}
@@ -395,7 +389,8 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
if (arr_struct == NULL)
return 0;
- bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
+ bpf_probe_read_kernel(&arr_struct->array[0],
+ sizeof(arr_struct->array[0]), kill_data);
} else {
int index = get_var_spid_index(arr_struct, spid);
@@ -405,12 +400,13 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
if (kill_data == NULL)
return 0;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
if (arr_struct->array[i].meta.pid == 0) {
- bpf_probe_read(&arr_struct->array[i],
- sizeof(arr_struct->array[i]), kill_data);
+ bpf_probe_read_kernel(&arr_struct->array[i],
+ sizeof(arr_struct->array[i]),
+ kill_data);
bpf_map_update_elem(&var_tpid_to_data, &tpid,
arr_struct, 0);
@@ -427,17 +423,17 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
if (delta_sec < STALE_INFO) {
kill_data->kill_count++;
kill_data->last_kill_time = bpf_ktime_get_ns();
- bpf_probe_read(&arr_struct->array[index],
- sizeof(arr_struct->array[index]),
- kill_data);
+ bpf_probe_read_kernel(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
} else {
struct var_kill_data_t* kill_data =
get_var_kill_data(ctx, spid, tpid, sig);
if (kill_data == NULL)
return 0;
- bpf_probe_read(&arr_struct->array[index],
- sizeof(arr_struct->array[index]),
- kill_data);
+ bpf_probe_read_kernel(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
}
}
bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
@@ -484,15 +480,15 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
- filepart_length = bpf_probe_read_str(payload, MAX_PATH,
- BPF_CORE_READ(filp_dentry, d_name.name));
- barrier_var(filepart_length);
- if (filepart_length > MAX_PATH)
+ filepart_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp_dentry, d_name.name));
+ bpf_nop_mov(filepart_length);
+ if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
break;
- barrier_var(filepart_length);
payload += filepart_length;
length += filepart_length;
@@ -510,7 +506,7 @@ is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
{
struct dentry* parent_dentry;
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < MAX_PATH_DEPTH; i++) {
u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
@@ -572,19 +568,18 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write,
sysctl_data->sysctl_val_length = 0;
sysctl_data->sysctl_path_length = 0;
- size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
- barrier_var(sysctl_val_length);
- if (sysctl_val_length <= CTL_MAXNAME) {
- barrier_var(sysctl_val_length);
+ size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
+ CTL_MAXNAME, buf);
+ if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
sysctl_data->sysctl_val_length = sysctl_val_length;
payload += sysctl_val_length;
}
- size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
- BPF_CORE_READ(filp, f_path.dentry, d_name.name));
- barrier_var(sysctl_path_length);
- if (sysctl_path_length <= MAX_PATH) {
- barrier_var(sysctl_path_length);
+ size_t sysctl_path_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp, f_path.dentry,
+ d_name.name));
+ if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
sysctl_data->sysctl_path_length = sysctl_path_length;
payload += sysctl_path_length;
}
@@ -601,7 +596,7 @@ out:
}
SEC("tracepoint/syscalls/sys_enter_kill")
-int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
{
struct bpf_func_stats_ctx stats_ctx;
@@ -632,13 +627,14 @@ int raw_tracepoint__sched_process_exit(void* ctx)
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
#ifdef UNROLL
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
- if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
- bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
+ if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
+ bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
+ past_kill_data);
void* payload = kill_data->payload;
size_t offset = kill_data->payload_length;
if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
@@ -649,18 +645,16 @@ int raw_tracepoint__sched_process_exit(void* ctx)
kill_data->kill_target_cgroup_proc_length = 0;
size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
- barrier_var(comm_length);
- if (comm_length <= TASK_COMM_LEN) {
- barrier_var(comm_length);
+ if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
kill_data->kill_target_name_length = comm_length;
payload += comm_length;
}
- size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
- BPF_CORE_READ(proc_kernfs, name));
- barrier_var(cgroup_proc_length);
- if (cgroup_proc_length <= KILL_TARGET_LEN) {
- barrier_var(cgroup_proc_length);
+ size_t cgroup_proc_length =
+ bpf_probe_read_kernel_str(payload,
+ KILL_TARGET_LEN,
+ BPF_CORE_READ(proc_kernfs, name));
+ if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
payload += cgroup_proc_length;
}
@@ -718,10 +712,9 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
const char* filename = BPF_CORE_READ(bprm, filename);
- size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
- barrier_var(bin_path_length);
- if (bin_path_length <= MAX_FILENAME_LEN) {
- barrier_var(bin_path_length);
+ size_t bin_path_length =
+ bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
+ if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
proc_exec_data->bin_path_length = bin_path_length;
payload += bin_path_length;
}
@@ -731,8 +724,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
unsigned int cmdline_length = probe_read_lim(payload, arg_start,
arg_end - arg_start, MAX_ARGS_LEN);
- if (cmdline_length <= MAX_ARGS_LEN) {
- barrier_var(cmdline_length);
+ if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
proc_exec_data->cmdline_length = cmdline_length;
payload += cmdline_length;
}
@@ -809,9 +801,7 @@ int kprobe_ret__do_filp_open(struct pt_regs* ctx)
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
- barrier_var(len);
- if (len <= MAX_FILEPATH_LENGTH) {
- barrier_var(len);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
payload += len;
filemod_data->dst_filepath_length = len;
}
@@ -826,8 +816,9 @@ out:
SEC("kprobe/vfs_link")
int BPF_KPROBE(kprobe__vfs_link,
- struct dentry* old_dentry, struct inode* dir,
- struct dentry* new_dentry, struct inode** delegated_inode)
+ struct dentry* old_dentry, struct mnt_idmap *idmap,
+ struct inode* dir, struct dentry* new_dentry,
+ struct inode** delegated_inode)
{
struct bpf_func_stats_ctx stats_ctx;
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
@@ -863,17 +854,13 @@ int BPF_KPROBE(kprobe__vfs_link,
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
- barrier_var(len);
- if (len <= MAX_FILEPATH_LENGTH) {
- barrier_var(len);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
payload += len;
filemod_data->src_filepath_length = len;
}
len = read_absolute_file_path_from_dentry(new_dentry, payload);
- barrier_var(len);
- if (len <= MAX_FILEPATH_LENGTH) {
- barrier_var(len);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
payload += len;
filemod_data->dst_filepath_length = len;
}
@@ -921,17 +908,14 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
filemod_data->payload);
payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
- size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
- barrier_var(len);
- if (len <= MAX_FILEPATH_LENGTH) {
- barrier_var(len);
+ size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
+ oldname);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
payload += len;
filemod_data->src_filepath_length = len;
}
len = read_absolute_file_path_from_dentry(dentry, payload);
- barrier_var(len);
- if (len <= MAX_FILEPATH_LENGTH) {
- barrier_var(len);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
payload += len;
filemod_data->dst_filepath_length = len;
}
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
index 4df9088bfc00..fb6b13522949 100644
--- a/tools/testing/selftests/bpf/progs/profiler1.c
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
#define UNROLL
#define INLINE __always_inline
#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 2fb7adafb6b6..86484f07e1d1 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -7,6 +7,8 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -159,6 +161,57 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");
+#ifdef USE_BPF_LOOP
+struct process_frame_ctx {
+ int cur_cpu;
+ int32_t *symbol_counter;
+ void *frame_ptr;
+ FrameData *frame;
+ PidData *pidData;
+ Symbol *sym;
+ Event *event;
+ bool done;
+};
+
+static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
+{
+ int zero = 0;
+ void *frame_ptr = ctx->frame_ptr;
+ PidData *pidData = ctx->pidData;
+ FrameData *frame = ctx->frame;
+ int32_t *symbol_counter = ctx->symbol_counter;
+ int cur_cpu = ctx->cur_cpu;
+ Event *event = ctx->event;
+ Symbol *sym = ctx->sym;
+
+ if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
+ int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+ int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+
+ if (!symbol_id) {
+ bpf_map_update_elem(&symbolmap, sym, &zero, 0);
+ symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+ if (!symbol_id) {
+ ctx->done = true;
+ return 1;
+ }
+ }
+ if (*symbol_id == new_symbol_id)
+ (*symbol_counter)++;
+
+ barrier_var(i);
+ if (i >= STACK_MAX_LEN)
+ return 1;
+
+ event->stack[i] = *symbol_id;
+
+ event->stack_len = i + 1;
+ frame_ptr = frame->f_back;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
#ifdef GLOBAL_FUNC
__noinline
#elif defined(SUBPROGS)
@@ -228,13 +281,37 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
-#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+#ifdef USE_BPF_LOOP
+ struct process_frame_ctx ctx = {
+ .cur_cpu = cur_cpu,
+ .symbol_counter = symbol_counter,
+ .frame_ptr = frame_ptr,
+ .frame = &frame,
+ .pidData = pidData,
+ .sym = &sym,
+ .event = event,
+ };
+
+ bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
+ if (ctx.done)
+ return 0;
#else
-#pragma clang loop unroll(full)
-#endif
+#if defined(USE_ITER)
+/* no for loop, no unrolling */
+#elif defined(NO_UNROLL)
+ __pragma_loop_no_unroll
+#elif defined(UNROLL_COUNT)
+ __pragma_loop_unroll_count(UNROLL_COUNT)
+#else
+ __pragma_loop_unroll_full
+#endif /* NO_UNROLL */
/* Unwind python stack */
+#ifdef USE_ITER
+ int i;
+ bpf_for(i, 0, STACK_MAX_LEN) {
+#else /* !USE_ITER */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
+#endif
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
@@ -251,6 +328,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
frame_ptr = frame.f_back;
}
}
+#endif /* USE_BPF_LOOP */
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
@@ -268,7 +346,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
SEC("raw_tracepoint/kfree_skb")
int on_event(struct bpf_raw_tracepoint_args* ctx)
{
- int i, ret = 0;
+ int ret = 0;
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
index cb49b89e37cd..ce1aa5189cc4 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
-/* clang will not unroll the loop 600 times.
- * Instead it will unroll it to the amount it deemed
- * appropriate, but the loop will still execute 600 times.
- * Total program size is around 90k insns
+/* Full unroll of 600 iterations will have total
+ * program size close to 298k insns and this may
+ * cause BPF_JMP insn out of 16-bit integer range.
+ * So limit the unroll size to 150 so the
+ * total program size is around 80k insns but
+ * the loop will still execute 600 times.
*/
+#define UNROLL_COUNT 150
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
new file mode 100644
index 000000000000..5c2059dc01af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define STACK_MAX_LEN 600
+#define USE_BPF_LOOP
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
new file mode 100644
index 000000000000..d62e1b200c30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+#define STACK_MAX_LEN 600
+#define SUBPROGS
+#define NO_UNROLL
+#define USE_ITER
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
index 6beff7502f4d..520b58c4f8db 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -2,7 +2,4 @@
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
#define NO_UNROLL
-/* clang will not unroll at all.
- * Total program size is around 2k insns
- */
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
new file mode 100644
index 000000000000..efa416f53968
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int tid;
+int i;
+
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
+int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ if (task->pid != tid)
+ return 0;
+
+ /* If dead code elimination kicks in, the increment +=2 will be
+ * removed. For raw_tp programs attaching to tracepoints in kernel
+ * modules, we mark input arguments as PTR_MAYBE_NULL, so branch
+ * prediction should never kick in.
+ */
+ asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;"
+ : [i]"+r"(i)
+ : [ctx]"r"(skb)
+ : "memory");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
new file mode 100644
index 000000000000..0d58114a4955
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Ensure module parameter has PTR_MAYBE_NULL */
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) {
+ asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+ return 0;
+}
+
+/* Check NULL marking */
+SEC("tp_btf/sched_pi_setprio")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) {
+ asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
new file mode 100644
index 000000000000..49fe93d7e059
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+struct root_nested_inner {
+ struct bpf_spin_lock glock;
+ struct bpf_rb_root root __contains(node_data, node);
+};
+
+struct root_nested {
+ struct root_nested_inner inner;
+};
+
+long less_callback_ran = -1;
+long removed_key = -1;
+long first_data[2] = {-1, -1};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot_array[2] __contains(node_data, node);
+private(A) struct bpf_rb_root groot_array_one[1] __contains(node_data, node);
+private(B) struct root_nested groot_nested;
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ less_callback_ran = 1;
+
+ return node_a->key < node_b->key;
+}
+
+static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ m->key = 1;
+
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_rbtree_add(root, &m->node, less);
+ bpf_spin_unlock(lock);
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 3;
+ n->key = 3;
+
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_spin_unlock(lock);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_add_nodes(void *ctx)
+{
+ return __add_three(&groot, &glock);
+}
+
+SEC("tc")
+long rbtree_add_nodes_nested(void *ctx)
+{
+ return __add_three(&groot_nested.inner.root, &groot_nested.inner.glock);
+}
+
+SEC("tc")
+long rbtree_add_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ goto err_out;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 3;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ res = bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+
+ if (!res)
+ return 1;
+
+ n = container_of(res, struct node_data, node);
+ removed_key = n->key;
+ bpf_obj_drop(n);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+SEC("tc")
+long rbtree_add_and_remove_array(void *ctx)
+{
+ struct bpf_rb_node *res1 = NULL, *res2 = NULL, *res3 = NULL;
+ struct node_data *nodes[3][2] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}};
+ struct node_data *n;
+ long k1 = -1, k2 = -1, k3 = -1;
+ int i, j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 2; j++) {
+ nodes[i][j] = bpf_obj_new(typeof(*nodes[i][j]));
+ if (!nodes[i][j])
+ goto err_out;
+ nodes[i][j]->key = i * 2 + j;
+ }
+ }
+
+ bpf_spin_lock(&glock);
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ bpf_rbtree_add(&groot_array[i], &nodes[i][j]->node, less);
+ for (j = 0; j < 2; j++)
+ bpf_rbtree_add(&groot_array_one[0], &nodes[2][j]->node, less);
+ res1 = bpf_rbtree_remove(&groot_array[0], &nodes[0][0]->node);
+ res2 = bpf_rbtree_remove(&groot_array[1], &nodes[1][0]->node);
+ res3 = bpf_rbtree_remove(&groot_array_one[0], &nodes[2][0]->node);
+ bpf_spin_unlock(&glock);
+
+ if (res1) {
+ n = container_of(res1, struct node_data, node);
+ k1 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (res2) {
+ n = container_of(res2, struct node_data, node);
+ k2 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (res3) {
+ n = container_of(res3, struct node_data, node);
+ k3 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (k1 != 0 || k2 != 2 || k3 != 4)
+ return 2;
+
+ return 0;
+
+err_out:
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 2; j++) {
+ if (nodes[i][j])
+ bpf_obj_drop(nodes[i][j]);
+ }
+ }
+ return 1;
+}
+
+SEC("tc")
+long rbtree_first_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m, *o;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 3;
+ n->data = 4;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 5;
+ m->data = 6;
+
+ o = bpf_obj_new(typeof(*o));
+ if (!o)
+ goto err_out;
+ o->key = 1;
+ o->data = 2;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ bpf_rbtree_add(&groot, &o->node, less);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 2;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[0] = o->data;
+
+ res = bpf_rbtree_remove(&groot, &o->node);
+ bpf_spin_unlock(&glock);
+
+ if (!res)
+ return 5;
+
+ o = container_of(res, struct node_data, node);
+ removed_key = o->key;
+ bpf_obj_drop(o);
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 3;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[1] = o->data;
+ bpf_spin_unlock(&glock);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+SEC("tc")
+long rbtree_api_release_aliasing(void *ctx)
+{
+ struct node_data *n, *m, *o;
+ struct bpf_rb_node *res, *res2;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 41;
+ n->data = 42;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+
+ /* m and o point to the same node,
+ * but verifier doesn't know this
+ */
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ o = container_of(res, struct node_data, node);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ m = container_of(res, struct node_data, node);
+
+ res = bpf_rbtree_remove(&groot, &m->node);
+ /* Retval of previous remove returns an owning reference to m,
+ * which is the same node non-owning ref o is pointing at.
+ * We can safely try to remove o as the second rbtree_remove will
+ * return NULL since the node isn't in a tree.
+ *
+ * Previously we relied on the verifier type system + rbtree_remove
+ * invalidating non-owning refs to ensure that rbtree_remove couldn't
+ * fail, but now rbtree_remove does runtime checking so we no longer
+ * invalidate non-owning refs after remove.
+ */
+ res2 = bpf_rbtree_remove(&groot, &o->node);
+
+ bpf_spin_unlock(&glock);
+
+ if (res) {
+ o = container_of(res, struct node_data, node);
+ first_data[0] = o->data;
+ bpf_obj_drop(o);
+ }
+ if (res2) {
+ /* The second remove fails, so res2 is null and this doesn't
+ * execute
+ */
+ m = container_of(res2, struct node_data, node);
+ first_data[1] = m->data;
+ bpf_obj_drop(m);
+ }
+ return 0;
+
+err_out:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
new file mode 100644
index 000000000000..60079b202c07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ int key;
+ int data;
+ struct bpf_rb_node node;
+};
+
+struct node_data2 {
+ int key;
+ struct bpf_rb_node node;
+ int data;
+};
+
+static bool less2(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data2 *node_a;
+ struct node_data2 *node_b;
+
+ node_a = container_of(a, struct node_data2, node);
+ node_b = container_of(b, struct node_data2, node);
+
+ return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__add_wrong_type(void *ctx)
+{
+ struct node_data2 *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less2);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
new file mode 100644
index 000000000000..7651843f5a80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+/* BTF load should fail as bpf_rb_root __contains this type and points to
+ * 'node', but 'node' is not a bpf_rb_node
+ */
+struct node_data {
+ int key;
+ int data;
+ struct bpf_list_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__wrong_node_type(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
new file mode 100644
index 000000000000..4acb6af2dfe3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot2 __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_add(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_rbtree_add(&groot, &n->node, less);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_remove(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_rbtree_remove(&groot, &n->node);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_first(void *ctx)
+{
+ bpf_rbtree_first(&groot);
+ return 0;
+}
+
+SEC("?tc")
+__retval(0)
+long rbtree_api_remove_unadded_node(void *ctx)
+{
+ struct node_data *n, *m;
+ struct bpf_rb_node *res_n, *res_m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ res_n = bpf_rbtree_remove(&groot, &n->node);
+
+ res_m = bpf_rbtree_remove(&groot, &m->node);
+ bpf_spin_unlock(&glock);
+
+ bpf_obj_drop(m);
+ if (res_n)
+ bpf_obj_drop(container_of(res_n, struct node_data, node));
+ if (res_m) {
+ bpf_obj_drop(container_of(res_m, struct node_data, node));
+ /* m was not added to the rbtree */
+ return 2;
+ }
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn={{[0-9]+}}")
+long rbtree_api_remove_no_drop(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto unlock_err;
+
+ res = bpf_rbtree_remove(&groot, res);
+
+ if (res) {
+ n = container_of(res, struct node_data, node);
+ __sink(n);
+ }
+ bpf_spin_unlock(&glock);
+
+ /* if (res) { bpf_obj_drop(n); } is missing here */
+ return 0;
+
+unlock_err:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_to_multiple_trees(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ /* This add should fail since n already in groot's tree */
+ bpf_rbtree_add(&groot2, &n->node, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("dereference of modified ptr_or_null_ ptr R2 off=16 disallowed")
+long rbtree_api_use_unchecked_remove_retval(void *ctx)
+{
+ struct bpf_rb_node *res;
+
+ bpf_spin_lock(&glock);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ res = bpf_rbtree_remove(&groot, res);
+
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* Must check res for NULL before using in rbtree_add below */
+ bpf_rbtree_add(&groot, res, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+
+err_out:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+SEC("?tc")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
+long rbtree_api_add_release_unlock_escape(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After add() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
+long rbtree_api_first_release_unlock_escape(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 1;
+ }
+ n = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After first() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+static bool less__bad_fn_call_add(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_add(&groot, &node_a->node, less);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_remove(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_remove(&groot, &node_a->node);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_first_unlock_after(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+
+ return node_a->key < node_b->key;
+}
+
+static __always_inline
+long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, cb);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_add);
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove not allowed in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_remove(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_remove);
+}
+
+SEC("?tc")
+__failure __msg("can't spin_{lock,unlock} in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_first_unlock_after(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_first_unlock_after);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..b05565d1db0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_refcount ref;
+ struct bpf_rb_node r0;
+ struct bpf_rb_node r1;
+ int key0;
+ int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r0);
+ node_b = rb_entry(b, struct node_data, r0);
+
+ return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r1);
+ node_b = rb_entry(b, struct node_data, r1);
+
+ return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+ struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+ long lookup_key = NR_NODES / 2;
+ struct node_data *n, *m;
+ int i, nr_gc = 0;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+
+ m = bpf_refcount_acquire(n);
+
+ n->key0 = i;
+ m->key1 = i;
+
+ bpf_spin_lock(&glock0);
+ bpf_rbtree_add(&groot0, &n->r0, less0);
+ bpf_spin_unlock(&glock0);
+
+ bpf_spin_lock(&glock1);
+ bpf_rbtree_add(&groot1, &m->r1, less1);
+ bpf_spin_unlock(&glock1);
+ }
+
+ n = NULL;
+ bpf_spin_lock(&glock0);
+ rb_n = bpf_rbtree_root(&groot0);
+ while (can_loop) {
+ if (!rb_n) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ n = rb_entry(rb_n, struct node_data, r0);
+ if (lookup_key == n->key0)
+ break;
+ if (nr_gc < NR_NODES)
+ gc_ns[nr_gc++] = rb_n;
+ if (lookup_key < n->key0)
+ rb_n = bpf_rbtree_left(&groot0, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&groot0, rb_n);
+ }
+
+ if (!n || lookup_key != n->key0) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+ }
+
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock0);
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ if (rb_n) {
+ n = rb_entry(rb_n, struct node_data, r0);
+ bpf_obj_drop(n);
+ }
+ }
+
+ if (!m)
+ return __LINE__;
+
+ bpf_spin_lock(&glock1);
+ rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+ bpf_spin_unlock(&glock1);
+ bpf_obj_drop(m);
+ if (!rb_m)
+ return __LINE__;
+ bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+ return 0;
+}
+
+#define TEST_ROOT(dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_root_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+#define TEST_LR(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ struct node_data *n; \
+ __u64 jiffies = 0; \
+ \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (!rb_n) { \
+ bpf_spin_unlock(&glock0); \
+ return 1; \
+ } \
+ n = rb_entry(rb_n, struct node_data, r0); \
+ n = bpf_refcount_acquire(n); \
+ bpf_spin_unlock(&glock0); \
+ if (!n) \
+ return 1; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_##op(&groot0, &n->r0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+/*
+ * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left, true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
new file mode 100644
index 000000000000..d70c28824bbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__u32 user_data, target_pid;
+__s32 key_serial;
+__u64 flags, task_storage_val, cgroup_id;
+
+struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
+void bpf_key_put(struct bpf_key *key) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int get_cgroup_id(void *ctx)
+{
+ struct task_struct *task;
+ struct css_set *cgroups;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* simulate bpf_get_current_cgroup_id() helper */
+ bpf_rcu_read_lock();
+ cgroups = task->cgroups;
+ if (!cgroups)
+ goto unlock;
+ cgroup_id = cgroups->dfl_cgrp->kn->id;
+unlock:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_succ(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ long init_val = 2;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ /* region including helper using rcu ptr real_parent */
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ goto out;
+ ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (!ptr)
+ goto out;
+ task_storage_val = *ptr;
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int no_lock(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* old style ptr_to_btf_id is not allowed in sleepable */
+ task = bpf_get_current_task_btf();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int two_regions(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* two regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_1(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_2(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ bpf_rcu_read_lock();
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int task_acquire(void *ctx)
+{
+ struct task_struct *task, *real_parent, *gparent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+
+ /* rcu_ptr->rcu_field */
+ gparent = real_parent->real_parent;
+ if (!gparent)
+ goto out;
+
+ /* acquire a reference which can be used outside rcu read lock region */
+ gparent = bpf_task_acquire(gparent);
+ if (!gparent)
+ goto out;
+
+ (void)bpf_task_storage_get(&map_a, gparent, 0, 0);
+ bpf_task_release(gparent);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_lock(void *ctx)
+{
+ struct task_struct *task;
+
+ /* missing bpf_rcu_read_lock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_unlock(void *ctx)
+{
+ struct task_struct *task;
+
+ /* missing bpf_rcu_read_unlock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_rcu_mismatch(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ /* non-sleepable: missing bpf_rcu_read_unlock() in one path */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (real_parent)
+ bpf_rcu_read_unlock();
+out:
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int inproper_sleepable_helper(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ struct pt_regs *regs;
+ __u32 value = 0;
+ void *ptr;
+
+ task = bpf_get_current_task_btf();
+ /* sleepable helper in rcu read lock region */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
+ if (!regs)
+ goto out;
+
+ ptr = (void *)PT_REGS_IP(regs);
+ (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
+ user_data = value;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size,
+ bool kernel)
+{
+ struct bpf_key *bkey;
+
+ /* sleepable kfunc in rcu read lock region */
+ bpf_rcu_read_lock();
+ bkey = bpf_lookup_user_key(key_serial, flags);
+ bpf_rcu_read_unlock();
+ if (!bkey)
+ return -1;
+ bpf_key_put(bkey);
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_1(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_2(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_trusted_non_rcuptr(void *ctx)
+{
+ struct task_struct *task, *group_leader;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ /* the pointer group_leader is explicitly marked as trusted */
+ group_leader = task->real_parent->group_leader;
+ (void)bpf_task_storage_get(&map_a, group_leader, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_rcuptr(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ /* helper use of rcu ptr outside the rcu read lock region */
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int cross_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* rcu ptr define/use in different regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+__noinline
+static int static_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog(NULL);
+}
+
+__noinline
+static int static_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_lock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_lock(NULL);
+}
+
+__noinline
+static int static_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_unlock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_unlock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_unlock(NULL);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += static_subprog(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += global_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += static_subprog_lock(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += global_subprog_lock(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += static_subprog_unlock(ctx);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_unlock(ret);
+ return 0;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_helper_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_helper_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_kfunc_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_calling_sleepable_global(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
new file mode 100644
index 000000000000..df4873558634
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_ls_map SEC(".maps");
+
+long gp_seq;
+
+SEC("syscall")
+int do_call_rcu_tasks_trace(void *ctx)
+{
+ struct task_struct *current;
+ int *v;
+
+ current = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 1;
+ /* Invoke call_rcu_tasks_trace */
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+SEC("kprobe/rcu_tasks_trace_postgp")
+int rcu_tasks_trace_postgp(void *ctx)
+{
+ __sync_add_and_fetch(&gp_seq, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
new file mode 100644
index 000000000000..69da05bb6c63
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern bool CONFIG_PREEMPTION __kconfig __weak;
+extern const int bpf_task_storage_busy __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+int busy = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} task SEC(".maps");
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(read_bpf_task_storage_busy)
+{
+ int *value;
+
+ if (!CONFIG_PREEMPTION)
+ return 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
+ if (value)
+ busy = *value;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
new file mode 100644
index 000000000000..405adbe5e8b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid = 0;
+
+char xattr_value[64];
+static const char expected_value_a[] = "bpf_selftest_value_a";
+static const char expected_value_b[] = "bpf_selftest_value_b";
+bool found_value_a;
+bool found_value_b;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css, *tmp;
+ struct bpf_dynptr value_ptr;
+ struct cgroup *cgrp;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ css = &cgrp->self;
+ bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr);
+ bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) {
+ int ret;
+
+ ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test",
+ &value_ptr);
+ if (ret < 0)
+ continue;
+
+ if (ret == sizeof(expected_value_a) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a))
+ found_value_a = true;
+ if (ret == sizeof(expected_value_b) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b))
+ found_value_b = true;
+ }
+
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
new file mode 100644
index 000000000000..395591374d4f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[10];
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * These are the kfuncs, the others are helpers
+ */
+int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *,
+ struct task_struct *, u64) __weak __ksym;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+ read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+ read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+ read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+ read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+ read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+ read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+ bpf_get_current_task_btf(), 0);
+ read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+ read_ret[9] = bpf_copy_from_user_task_str((char *)buf,
+ sizeof(buf),
+ user_ptr,
+ bpf_get_current_task_btf(),
+ 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c
index 49f679375b9d..3c2423bb19e2 100644
--- a/tools/testing/selftests/bpf/progs/recursion.c
+++ b/tools/testing/selftests/bpf/progs/recursion.c
@@ -24,8 +24,8 @@ struct {
int pass1 = 0;
int pass2 = 0;
-SEC("fentry/__htab_map_lookup_elem")
-int BPF_PROG(on_lookup, struct bpf_map *map)
+SEC("fentry/htab_map_delete_elem")
+int BPF_PROG(on_delete, struct bpf_map *map)
{
int key = 0;
@@ -35,10 +35,7 @@ int BPF_PROG(on_lookup, struct bpf_map *map)
}
if (map == (void *)&hash2) {
pass2++;
- /* htab_map_gen_lookup() will inline below call
- * into direct call to __htab_map_lookup_elem()
- */
- bpf_map_lookup_elem(&hash2, &key);
+ bpf_map_delete_elem(&hash2, &key);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
index 3d1ae8b3402f..59748c95471a 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
@@ -17,8 +17,6 @@ SEC("cgroup/recvmsg4")
int recvmsg4_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock *sk;
- __u32 user_ip4;
- __u16 user_port;
sk = ctx->sk;
if (!sk)
diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
index 27dfb21b21b4..d9a4016596d5 100644
--- a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
@@ -20,8 +20,6 @@ SEC("cgroup/recvmsg6")
int recvmsg6_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock *sk;
- __u32 user_ip4;
- __u16 user_port;
sk = ctx->sk;
if (!sk)
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
new file mode 100644
index 000000000000..1c7ab44bccfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test";
+
+SEC("cgroup/recvmsg_unix")
+int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
new file mode 100644
index 000000000000..1aca85d86aeb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -0,0 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+extern void bpf_rcu_read_unlock(void) __ksym;
+
+struct node_data {
+ long key;
+ long list_data;
+ struct bpf_rb_node r;
+ struct bpf_list_node l;
+ struct bpf_refcount ref;
+};
+
+struct map_value {
+ struct node_data __kptr *node;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} stashed_nodes SEC(".maps");
+
+struct node_acquire {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ struct bpf_refcount refcount;
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root root __contains(node_data, r);
+private(A) struct bpf_list_head head __contains(node_data, l);
+
+private(B) struct bpf_spin_lock alock;
+private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
+
+private(C) struct bpf_spin_lock block;
+private(C) struct bpf_rb_root broot __contains(node_data, r);
+
+static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
+{
+ struct node_data *a;
+ struct node_data *b;
+
+ a = container_of(node_a, struct node_data, r);
+ b = container_of(node_b, struct node_data, r);
+
+ return a->key < b->key;
+}
+
+static bool less_a(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_acquire *node_a;
+ struct node_acquire *node_b;
+
+ node_a = container_of(a, struct node_acquire, node);
+ node_b = container_of(b, struct node_acquire, node);
+
+ return node_a->key < node_b->key;
+}
+
+static long __insert_in_tree_and_list(struct bpf_list_head *head,
+ struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return -1;
+
+ m = bpf_refcount_acquire(n);
+ m->key = 123;
+ m->list_data = 456;
+
+ bpf_spin_lock(lock);
+ if (bpf_rbtree_add(root, &n->r, less)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ bpf_obj_drop(m);
+ return -2;
+ }
+ bpf_spin_unlock(lock);
+
+ bpf_spin_lock(lock);
+ if (bpf_list_push_front(head, &m->l)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ return -3;
+ }
+ bpf_spin_unlock(lock);
+ return 0;
+}
+
+static long __stash_map_insert_tree(int idx, int val, struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock)
+{
+ struct map_value *mapval;
+ struct node_data *n, *m;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return -1;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return -2;
+
+ n->key = val;
+ m = bpf_refcount_acquire(n);
+
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (n) {
+ bpf_obj_drop(n);
+ bpf_obj_drop(m);
+ return -3;
+ }
+
+ bpf_spin_lock(lock);
+ if (bpf_rbtree_add(root, &m->r, less)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ return -4;
+ }
+ bpf_spin_unlock(lock);
+ return 0;
+}
+
+static long __read_from_tree(struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock,
+ bool remove_from_tree)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n;
+ long res = -99;
+
+ bpf_spin_lock(lock);
+
+ rb = bpf_rbtree_first(root);
+ if (!rb) {
+ bpf_spin_unlock(lock);
+ return -1;
+ }
+
+ n = container_of(rb, struct node_data, r);
+ res = n->key;
+
+ if (!remove_from_tree) {
+ bpf_spin_unlock(lock);
+ return res;
+ }
+
+ rb = bpf_rbtree_remove(root, rb);
+ bpf_spin_unlock(lock);
+ if (!rb)
+ return -2;
+ n = container_of(rb, struct node_data, r);
+ bpf_obj_drop(n);
+ return res;
+}
+
+static long __read_from_list(struct bpf_list_head *head,
+ struct bpf_spin_lock *lock,
+ bool remove_from_list)
+{
+ struct bpf_list_node *l;
+ struct node_data *n;
+ long res = -99;
+
+ bpf_spin_lock(lock);
+
+ l = bpf_list_pop_front(head);
+ if (!l) {
+ bpf_spin_unlock(lock);
+ return -1;
+ }
+
+ n = container_of(l, struct node_data, l);
+ res = n->list_data;
+
+ if (!remove_from_list) {
+ if (bpf_list_push_back(head, &n->l)) {
+ bpf_spin_unlock(lock);
+ return -2;
+ }
+ }
+
+ bpf_spin_unlock(lock);
+
+ if (remove_from_list)
+ bpf_obj_drop(n);
+ return res;
+}
+
+static long __read_from_unstash(int idx)
+{
+ struct node_data *n = NULL;
+ struct map_value *mapval;
+ long val = -99;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return -1;
+
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (!n)
+ return -2;
+
+ val = n->key;
+ bpf_obj_drop(n);
+ return val;
+}
+
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(579) \
+long insert_and_remove_tree_##rem_tree##_list_##rem_list(void *ctx) \
+{ \
+ long err, tree_data, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ err = __read_from_list(&head, &lock, rem_list); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ return tree_data + list_data; \
+}
+
+/* After successful insert of struct node_data into both collections:
+ * - it should have refcount = 2
+ * - removing / not removing the node_data from a collection after
+ * reading should have no effect on ability to read / remove from
+ * the other collection
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both: remove from list");
+
+#undef INSERT_READ_BOTH
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(579) \
+long insert_and_remove_lf_tree_##rem_tree##_list_##rem_list(void *ctx) \
+{ \
+ long err, tree_data, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_list(&head, &lock, rem_list); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ return tree_data + list_data; \
+}
+
+/* Similar to insert_read_both, but list data is read and possibly removed
+ * first
+ *
+ * Results should be no different than reading and possibly removing rbtree
+ * node first
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both_list_first: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both_list_first: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both_list_first: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both_list_first: remove from list");
+
+#define INSERT_DOUBLE_READ_AND_DEL(read_fn, read_root, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(-1) \
+long insert_double_##read_fn##_and_del_##read_root(void *ctx) \
+{ \
+ long err, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = read_fn(&read_root, &lock, true); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ err = read_fn(&read_root, &lock, true); \
+ if (err < 0) \
+ return err; \
+ \
+ return err + list_data; \
+}
+
+/* Insert into both tree and list, then try reading-and-removing from either twice
+ *
+ * The second read-and-remove should fail on read step since the node has
+ * already been removed
+ */
+INSERT_DOUBLE_READ_AND_DEL(__read_from_tree, root, "insert_double_del: 2x read-and-del from tree");
+INSERT_DOUBLE_READ_AND_DEL(__read_from_list, head, "insert_double_del: 2x read-and-del from list");
+
+#define INSERT_STASH_READ(rem_tree, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(84) \
+long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \
+{ \
+ long err, tree_data, map_data; \
+ \
+ err = __stash_map_insert_tree(0, 42, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ err = __read_from_unstash(0); \
+ if (err < 0) \
+ return err; \
+ else \
+ map_data = err; \
+ \
+ return tree_data + map_data; \
+}
+
+/* Stash a refcounted node in map_val, insert same node into tree, then try
+ * reading data from tree then unstashed map_val, possibly removing from tree
+ *
+ * Removing from tree should have no effect on map_val kptr validity
+ */
+INSERT_STASH_READ(true, "insert_stash_read: remove from tree");
+INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree");
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&alock);
+ bpf_rbtree_add(&aroot, &n->node, less_a);
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&alock);
+ if (!m)
+ return 2;
+
+ m->key = 2;
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ m = bpf_refcount_acquire(n);
+ m->key = 2;
+
+ bpf_spin_lock(&alock);
+ bpf_rbtree_add(&aroot, &n->node, less_a);
+ bpf_spin_unlock(&alock);
+
+ bpf_obj_drop(m);
+
+ return 0;
+}
+
+static long __stash_map_empty_xchg(struct node_data *n, int idx)
+{
+ struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+
+ if (!mapval) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (n) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a1(void *ctx)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ m = bpf_refcount_acquire(n);
+
+ if (__stash_map_empty_xchg(n, 0)) {
+ bpf_obj_drop(m);
+ return 2;
+ }
+
+ if (__stash_map_empty_xchg(m, 1))
+ return 3;
+
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_b(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ n = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!n)
+ return 2;
+
+ bpf_spin_lock(&block);
+
+ bpf_rbtree_add(&broot, &n->r, less);
+
+ bpf_spin_unlock(&block);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a2(void *ctx)
+{
+ struct map_value *mapval;
+ struct bpf_rb_node *res;
+ struct node_data *m;
+ int idx = 1;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ m = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!m)
+ return 2;
+ bpf_spin_lock(&lock);
+
+ /* make m non-owning ref */
+ bpf_list_push_back(&head, &m->l);
+ res = bpf_rbtree_remove(&root, &m->r);
+
+ bpf_spin_unlock(&lock);
+ if (res) {
+ bpf_obj_drop(container_of(res, struct node_data, r));
+ return 3;
+ }
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__success
+int BPF_PROG(rbtree_sleepable_rcu,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ bpf_rcu_read_lock();
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&root, &n->r, less);
+ rb = bpf_rbtree_first(&root);
+ if (!rb)
+ goto err_out;
+
+ rb = bpf_rbtree_remove(&root, rb);
+ if (!rb)
+ goto err_out;
+
+ m = container_of(rb, struct node_data, r);
+
+err_out:
+ bpf_spin_unlock(&lock);
+ bpf_rcu_read_unlock();
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__success
+int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ /* No explicit bpf_rcu_read_lock */
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&root, &n->r, less);
+ rb = bpf_rbtree_first(&root);
+ if (!rb)
+ goto err_out;
+
+ rb = bpf_rbtree_remove(&root, rb);
+ if (!rb)
+ goto err_out;
+
+ m = container_of(rb, struct node_data, r);
+
+err_out:
+ bpf_spin_unlock(&lock);
+ /* No explicit bpf_rcu_read_unlock */
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+private(kptr_ref) u64 ref;
+
+static int probe_read_refcount(void)
+{
+ u32 refcount;
+
+ bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
+ return refcount;
+}
+
+static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct node_data __kptr **node)
+{
+ struct node_data *node_new, *node_ref, *node_old;
+
+ node_new = bpf_obj_new(typeof(*node_new));
+ if (!node_new)
+ return -1;
+
+ node_ref = bpf_refcount_acquire(node_new);
+ node_old = bpf_kptr_xchg(node, node_new);
+ if (node_old) {
+ bpf_obj_drop(node_old);
+ bpf_obj_drop(node_ref);
+ return -2;
+ }
+
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &node_ref->l);
+ ref = (u64)(void *) &node_ref->ref;
+ bpf_spin_unlock(lock);
+ return probe_read_refcount();
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} percpu_hash SEC(".maps");
+
+SEC("tc")
+int percpu_hash_refcount_leak(void *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&percpu_hash, &key);
+ if (!v)
+ return 0;
+
+ return __insert_in_list(&head, &lock, &v->node);
+}
+
+SEC("tc")
+int check_percpu_hash_refcount(void *ctx)
+{
+ return probe_read_refcount();
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
new file mode 100644
index 000000000000..836c8ab7b908
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_acquire {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ struct bpf_refcount refcount;
+};
+
+extern void bpf_rcu_read_lock(void) __ksym;
+extern void bpf_rcu_read_unlock(void) __ksym;
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_acquire, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_acquire *node_a;
+ struct node_acquire *node_b;
+
+ node_a = container_of(a, struct node_acquire, node);
+ node_b = container_of(b, struct node_acquire, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=4 alloc_insn={{[0-9]+}}")
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ /* m becomes an owning ref but is never drop'd or added to a tree */
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock);
+ if (!m)
+ return 2;
+
+ m->key = 2;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ /* Intentionally not testing !n
+ * it's MAYBE_NULL for refcount_acquire
+ */
+ m = bpf_refcount_acquire(n);
+ if (m)
+ bpf_obj_drop(m);
+ if (n)
+ bpf_obj_drop(n);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn={{[0-9]+}}")
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ /* m becomes an owning ref but is never drop'd or added to a tree */
+ m = bpf_refcount_acquire(n);
+ m->key = 2;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__failure __msg("function calls are not allowed while holding a lock")
+int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct node_acquire *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ /* spin_{lock,unlock} are in different RCU CS */
+ bpf_rcu_read_lock();
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ bpf_spin_unlock(&glock);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c
new file mode 100644
index 000000000000..22c4fb8b9266
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define EDEADLK 35
+#define ETIMEDOUT 110
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("tc")
+int res_spin_lock_test(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem1, *elem2;
+ int r;
+
+ elem1 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem1)
+ return -1;
+ elem2 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem2)
+ return -1;
+
+ r = bpf_res_spin_lock(&elem1->lock);
+ if (r)
+ return r;
+ r = bpf_res_spin_lock(&elem2->lock);
+ if (!r) {
+ bpf_res_spin_unlock(&elem2->lock);
+ bpf_res_spin_unlock(&elem1->lock);
+ return -1;
+ }
+ bpf_res_spin_unlock(&elem1->lock);
+ return r != -EDEADLK;
+}
+
+SEC("tc")
+int res_spin_lock_test_AB(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockA);
+ if (r)
+ return !r;
+ /* Only unlock if we took the lock. */
+ if (!bpf_res_spin_lock(&lockB))
+ bpf_res_spin_unlock(&lockB);
+ bpf_res_spin_unlock(&lockA);
+ return 0;
+}
+
+int err;
+
+SEC("tc")
+int res_spin_lock_test_BA(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockB);
+ if (r)
+ return !r;
+ if (!bpf_res_spin_lock(&lockA))
+ bpf_res_spin_unlock(&lockA);
+ else
+ err = -EDEADLK;
+ bpf_res_spin_unlock(&lockB);
+ return err ?: 0;
+}
+
+SEC("tc")
+int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
+{
+ struct bpf_res_spin_lock *locks[48] = {};
+ struct arr_elem *e;
+ u64 time_beg, time;
+ int ret = 0, i;
+
+ _Static_assert(ARRAY_SIZE(((struct rqspinlock_held){}).locks) == 31,
+ "RES_NR_HELD assumed to be 31");
+
+ for (i = 0; i < 34; i++) {
+ int key = i;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ for (; i < 48; i++) {
+ int key = i - 2;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ time_beg = bpf_ktime_get_ns();
+ for (i = 0; i < 34; i++) {
+ if (bpf_res_spin_lock(locks[i]))
+ goto end;
+ }
+
+ /* Trigger AA, after exhausting entries in the held lock table. This
+ * time, only the timeout can save us, as AA detection won't succeed.
+ */
+ ret = bpf_res_spin_lock(locks[34]);
+ if (!ret) {
+ bpf_res_spin_unlock(locks[34]);
+ ret = 1;
+ goto end;
+ }
+
+ ret = ret != -ETIMEDOUT ? 2 : 0;
+
+end:
+ for (i = i - 1; i >= 0; i--)
+ bpf_res_spin_unlock(locks[i]);
+ time = bpf_ktime_get_ns() - time_beg;
+ /* Time spent should be easily above our limit (1/4 s), since AA
+ * detection won't be expedited due to lack of held lock entry.
+ */
+ return ret ?: (time > 1000000000 / 4 ? 0 : 1);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
new file mode 100644
index 000000000000..330682a88c16
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+long value;
+
+struct bpf_spin_lock lock __hidden SEC(".data.A");
+struct bpf_res_spin_lock res_lock __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("point to map value or allocated object")
+int res_spin_lock_arg(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((struct bpf_res_spin_lock *)bpf_core_cast(&elem->lock, struct __sk_buff));
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_cond_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_local_irq_save(&f1);
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock)) {
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(&elem->lock);
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo_irq(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1, f2;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&elem->lock, &f2)) {
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ /* We won't have a unreleased IRQ flag error here. */
+ return 0;
+ }
+ bpf_res_spin_unlock_irqrestore(&elem->lock, &f2);
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+struct bpf_res_spin_lock lock1 __hidden SEC(".data.OO1");
+struct bpf_res_spin_lock lock2 __hidden SEC(".data.OO2");
+
+SEC("?tc")
+__failure __msg("bpf_res_spin_unlock cannot be out of order")
+int res_spin_lock_ooo_unlock(struct __sk_buff *ctx)
+{
+ if (bpf_res_spin_lock(&lock1))
+ return 0;
+ if (bpf_res_spin_lock(&lock2)) {
+ bpf_res_spin_unlock(&lock1);
+ return 0;
+ }
+ bpf_res_spin_unlock(&lock1);
+ bpf_res_spin_unlock(&lock2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("off 1 doesn't point to 'struct bpf_res_spin_lock' that is at 0")
+int res_spin_lock_bad_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((void *)&elem->lock + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. bpf_res_spin_lock has to be at the constant offset")
+int res_spin_lock_var_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ u64 val = value;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem) {
+ // FIXME: Only inline assembly use in assert macro doesn't emit
+ // BTF definition.
+ bpf_throw(0);
+ return 0;
+ }
+ bpf_assert_range(val, 0, 40);
+ bpf_res_spin_lock((void *)&value + val);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("map 'res_spin.bss' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_map(struct __sk_buff *ctx)
+{
+ bpf_res_spin_lock((void *)&value + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("local 'kptr' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_kptr(struct __sk_buff *ctx)
+{
+ struct { int i; } *p = bpf_obj_new(typeof(*p));
+
+ if (!p)
+ return 0;
+ bpf_res_spin_lock((void *)p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 123607d314d6..d96c7d1e8fc2 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
+#include <stdbool.h>
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -13,9 +15,11 @@ struct {
const volatile int batch_cnt = 0;
const volatile long use_output = 0;
+const volatile bool bench_producer = false;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
+long hits __attribute__((aligned(128))) = 0;
const volatile long wakeup_data_size = 0;
@@ -23,6 +27,9 @@ static __always_inline long get_flags()
{
long sz;
+ if (bench_producer)
+ return BPF_RB_NO_WAKEUP;
+
if (!wakeup_data_size)
return 0;
@@ -30,7 +37,7 @@ static __always_inline long get_flags()
return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_ringbuf(void *ctx)
{
long *sample, flags;
@@ -46,6 +53,8 @@ int bench_ringbuf(void *ctx)
*sample = sample_val;
flags = get_flags();
bpf_ringbuf_submit(sample, flags);
+ if (bench_producer)
+ __sync_add_and_fetch(&hits, 1);
}
}
} else {
@@ -54,6 +63,9 @@ int bench_ringbuf(void *ctx)
if (bpf_ringbuf_output(&ringbuf, &sample_val,
sizeof(sample_val), flags))
__sync_add_and_fetch(&dropped, 1);
+ else if (bench_producer)
+ __sync_add_and_fetch(&hits, 1);
+
}
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c
new file mode 100644
index 000000000000..7216b3450e96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define EPERM 1 /* Operation not permitted */
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} prot_status_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} prot_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} not_prot_map SEC(".maps");
+
+SEC("fmod_ret/security_bpf_map")
+int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode)
+{
+ __u32 key = 0;
+ __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key);
+
+ if (!status_ptr || !*status_ptr)
+ return 0;
+
+ if (map == &prot_map) {
+ /* Allow read-only access */
+ if (fmode & FMODE_WRITE)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * This program keeps references to maps. This is needed to prevent
+ * optimizing them out.
+ */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_dummy1, int a)
+{
+ __u32 key = 0;
+ __u32 val1 = a;
+ __u32 val2 = a + 1;
+
+ bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY);
+ bpf_map_update_elem(&not_prot_map, &key, &val2, BPF_ANY);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index ac5abc34cde8..edc159598a0e 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -18,13 +18,9 @@
#define DST_PORT 4040
#define DST_REWRITE_PORT4 4444
-int _version SEC("version") = 1;
-
SEC("cgroup/sendmsg4")
int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
{
- int prio;
-
if (ctx->type != SOCK_DGRAM)
return 0;
@@ -53,4 +49,10 @@ int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index 24694b1a8d82..36a7f960799f 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -20,9 +20,12 @@
#define DST_REWRITE_IP6_2 0
#define DST_REWRITE_IP6_3 1
-#define DST_REWRITE_PORT6 6666
+#define DST_REWRITE_IP6_V4_MAPPED_0 0
+#define DST_REWRITE_IP6_V4_MAPPED_1 0
+#define DST_REWRITE_IP6_V4_MAPPED_2 0x0000FFFF
+#define DST_REWRITE_IP6_V4_MAPPED_3 0xc0a80004 // 192.168.0.4
-int _version SEC("version") = 1;
+#define DST_REWRITE_PORT6 6666
SEC("cgroup/sendmsg6")
int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
@@ -61,4 +64,56 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_v4mapped_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_wildcard_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(0);
+ ctx->user_ip6[1] = bpf_htonl(0);
+ ctx->user_ip6[2] = bpf_htonl(0);
+ ctx->user_ip6[3] = bpf_htonl(0);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_preserve_dst_prog(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
new file mode 100644
index 000000000000..332d0eb1116f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
new file mode 100644
index 000000000000..ebaef28b2cb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+typedef __s32 s32;
+typedef s32 i32;
+typedef __u8 u8;
+
+enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 };
+enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
+enum Enums64 { EC1 = 0ll, EC2 = 13ll };
+
+const volatile __s64 var_s64 = -1;
+const volatile __u64 var_u64 = 0;
+const volatile i32 var_s32 = -1;
+const volatile __u32 var_u32 = 0;
+const volatile __s16 var_s16 = -1;
+const volatile __u16 var_u16 = 0;
+const volatile __s8 var_s8 = -1;
+const volatile u8 var_u8 = 0;
+const volatile enum Enum var_ea = EA1;
+const volatile enum Enumu64 var_eb = EB1;
+const volatile enum Enums64 var_ec = EC1;
+const volatile bool var_b = false;
+const volatile i32 arr[32];
+const volatile enum Enum enum_arr[32];
+const volatile i32 three_d[47][19][17];
+const volatile i32 *ptr_arr[32];
+
+struct Struct {
+ int:16;
+ __u16 filler;
+ struct {
+ const __u16 filler2;
+ };
+ struct Struct2 {
+ __u16 filler;
+ volatile struct {
+ const int:1;
+ union {
+ const volatile u8 var_u8[3];
+ const volatile __s16 filler3;
+ const int:1;
+ s32 mat[7][5];
+ } u;
+ };
+ } struct2[2][4];
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1[3];
+const volatile struct Struct struct11[11][7];
+
+struct Struct3 {
+ struct {
+ u8 var_u8_l;
+ };
+ struct {
+ struct {
+ u8 var_u8_h;
+ };
+ };
+};
+
+typedef struct Struct3 Struct3_t;
+
+union Union {
+ __u16 var_u16;
+ Struct3_t struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
+SEC("socket")
+int test_set_globals(void *ctx)
+{
+ volatile __s8 a;
+
+ a = var_s64;
+ a = var_u64;
+ a = var_s32;
+ a = var_u32;
+ a = var_s16;
+ a = var_u16;
+ a = var_s8;
+ a = var_u8;
+ a = var_ea;
+ a = var_eb;
+ a = var_ec;
+ a = var_b;
+ a = struct1[2].struct2[1][2].u.var_u8[2];
+ a = union1.var_u16;
+ a = arr[3];
+ a = arr[EA2];
+ a = enum_arr[EC2];
+ a = three_d[31][7][EA2];
+ a = struct1[2].struct2[1][2].u.mat[5][3];
+ a = struct11[7][5].struct2[0][1].u.mat[3][0];
+
+ return a;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
new file mode 100644
index 000000000000..d330b1511979
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+extern unsigned long CONFIG_HZ __kconfig;
+
+const volatile char veth[IFNAMSIZ];
+const volatile int veth_ifindex;
+
+int nr_listen;
+int nr_passive;
+int nr_active;
+int nr_connect;
+int nr_binddev;
+int nr_socket_post_create;
+int nr_fin_wait1;
+
+struct sockopt_test {
+ int opt;
+ int new;
+ int restore;
+ int expected;
+ int tcp_expected;
+ unsigned int flip:1;
+};
+
+static const char not_exist_cc[] = "not_exist";
+static const char cubic_cc[] = "cubic";
+static const char reno_cc[] = "reno";
+
+static const struct sockopt_test sol_socket_tests[] = {
+ { .opt = SO_REUSEADDR, .flip = 1, },
+ { .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, },
+ { .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, },
+ { .opt = SO_KEEPALIVE, .flip = 1, },
+ { .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_REUSEPORT, .flip = 1, },
+ { .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, },
+ { .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_TXREHASH, .flip = 1, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_tcp_tests[] = {
+ { .opt = TCP_NODELAY, .flip = 1, },
+ { .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
+ { .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
+ { .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
+ { .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, },
+ { .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, },
+ { .opt = TCP_CONGESTION, },
+ { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
+ { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
+ { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+ { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
+ .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
+ { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, },
+ { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, },
+ { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_ip_tests[] = {
+ { .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_ipv6_tests[] = {
+ { .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+ { .opt = IPV6_AUTOFLOWLABEL, .flip = 1, },
+ { .opt = 0, },
+};
+
+struct loop_ctx {
+ void *ctx;
+ struct sock *sk;
+};
+
+static bool sk_is_tcp(struct sock *sk)
+{
+ return (sk->__sk_common.skc_family == AF_INET ||
+ sk->__sk_common.skc_family == AF_INET6) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
+static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
+ const struct sockopt_test *t,
+ int level)
+{
+ int old, tmp, new, opt = t->opt;
+
+ opt = t->opt;
+
+ if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+ return 0;
+
+ if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+ /* kernel initialized txrehash to 255 */
+ if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
+ old = 1;
+
+ new = !old;
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
+ tmp != new)
+ return 1;
+
+ if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
+ const struct sockopt_test *t,
+ int level)
+{
+ int old, tmp, new, expected, opt;
+
+ opt = t->opt;
+ new = t->new;
+ if (sk->sk_type == SOCK_STREAM && t->tcp_expected)
+ expected = t->tcp_expected;
+ else
+ expected = t->expected;
+
+ if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)) ||
+ old == new)
+ return 1;
+
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
+ tmp != expected)
+ return 1;
+
+ if (t->restore)
+ old = t->restore;
+ if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_socket_tests))
+ return 1;
+
+ t = &sol_socket_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
+}
+
+static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_ip_tests))
+ return 1;
+
+ t = &sol_ip_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP);
+}
+
+static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_ipv6_tests))
+ return 1;
+
+ t = &sol_ipv6_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+}
+
+static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+ struct sock *sk;
+ void *ctx;
+
+ if (i >= ARRAY_SIZE(sol_tcp_tests))
+ return 1;
+
+ t = &sol_tcp_tests[i];
+ if (!t->opt)
+ return 1;
+
+ ctx = lc->ctx;
+ sk = lc->sk;
+
+ if (t->opt == TCP_CONGESTION) {
+ char old_cc[16], tmp_cc[16];
+ const char *new_cc;
+ int new_cc_len;
+
+ if (!bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION,
+ (void *)not_exist_cc, sizeof(not_exist_cc)))
+ return 1;
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+ return 1;
+ if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) {
+ new_cc = reno_cc;
+ new_cc_len = sizeof(reno_cc);
+ } else {
+ new_cc = cubic_cc;
+ new_cc_len = sizeof(cubic_cc);
+ }
+ if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
+ new_cc_len))
+ return 1;
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
+ return 1;
+ if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc))
+ return 1;
+ if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+ return 1;
+ return 0;
+ }
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP);
+
+ return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP);
+}
+
+static int bpf_test_sockopt(void *ctx, struct sock *sk)
+{
+ struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
+ __u16 family, proto;
+ int n;
+
+ family = sk->sk_family;
+ proto = sk->sk_protocol;
+
+ n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_socket_tests))
+ return -1;
+
+ if (proto == IPPROTO_TCP) {
+ n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_tcp_tests))
+ return -1;
+ }
+
+ if (family == AF_INET) {
+ n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_ip_tests))
+ return -1;
+ } else {
+ n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_ipv6_tests))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int binddev_test(void *ctx)
+{
+ const char empty_ifname[] = "";
+ int ifindex, zero = 0;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ (void *)veth, sizeof(veth)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != veth_ifindex)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ (void *)empty_ifname, sizeof(empty_ifname)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != 0)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ (void *)&veth_ifindex, sizeof(int)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != veth_ifindex)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &zero, sizeof(int)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != 0)
+ return -1;
+
+ return 0;
+}
+
+static int test_tcp_maxseg(void *ctx, struct sock *sk)
+{
+ int val = 1314, tmp;
+
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG,
+ &val, sizeof(val));
+
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, &tmp, sizeof(tmp)) ||
+ tmp > val)
+ return -1;
+
+ return 0;
+}
+
+static int test_tcp_saved_syn(void *ctx, struct sock *sk)
+{
+ __u8 saved_syn[20];
+ int one = 1;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_SAVE_SYN,
+ &one, sizeof(one));
+
+ return bpf_getsockopt(ctx, IPPROTO_TCP, TCP_SAVED_SYN,
+ saved_syn, sizeof(saved_syn));
+}
+
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 1;
+
+ nr_socket_post_create += !bpf_test_sockopt(sk, sk);
+ nr_binddev += !binddev_test(sk);
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *optval = ctx->optval;
+ struct tcp_sock *tp;
+
+ if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
+ return 1;
+
+ tp = bpf_core_cast(sk, struct tcp_sock);
+ if (ctx->optval + sizeof(int) <= ctx->optval_end) {
+ *optval = tp->bpf_sock_ops_cb_flags;
+ ctx->retval = 0;
+ }
+ return 1;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *bpf_sk = skops->sk;
+ struct sock *sk;
+ int flags;
+
+ if (!bpf_sk)
+ return 1;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+ if (!sk)
+ return 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ nr_listen += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk) ||
+ test_tcp_saved_syn(skops, sk));
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ nr_connect += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk));
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ nr_active += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ nr_passive += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk) ||
+ test_tcp_saved_syn(skops, sk));
+ flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
+ bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
+ nr_fin_wait1 += !bpf_test_sockopt(skops, sk);
+ break;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..09a00d11ffcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+extern int tcp_memory_per_cpu_fw_alloc __ksym;
+extern int udp_memory_per_cpu_fw_alloc __ksym;
+
+int nr_cpus;
+bool tcp_activated, udp_activated;
+long tcp_memory_allocated, udp_memory_allocated;
+
+struct sk_prot {
+ long *memory_allocated;
+ int *memory_per_cpu_fw_alloc;
+};
+
+static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
+{
+ int *memory_per_cpu_fw_alloc;
+
+ memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
+ if (memory_per_cpu_fw_alloc)
+ *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
+
+ return 0;
+}
+
+static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
+{
+ struct sock *sk = bpf_core_cast(_sk, struct sock);
+ struct sk_prot sk_prot_ctx;
+ long memory_allocated;
+
+ /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
+ memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
+
+ sk_prot_ctx.memory_allocated = &memory_allocated;
+ sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
+
+ bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
+
+ return memory_allocated;
+}
+
+static void fentry_init_sock(struct sock *sk, bool *activated,
+ long *memory_allocated, int *memory_per_cpu_fw_alloc)
+{
+ if (!*activated)
+ return;
+
+ *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
+ *activated = false;
+}
+
+SEC("fentry/tcp_init_sock")
+int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &tcp_activated,
+ &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("fentry/udp_init_sock")
+int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &udp_activated,
+ &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("cgroup/sock_create")
+int sock_create(struct bpf_sock *ctx)
+{
+ int err, val = 1;
+
+ err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ val = 0;
+
+ err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ if (val != 1) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ return 1;
+
+err:
+ bpf_set_retval(err);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
new file mode 100644
index 000000000000..46d6eb2a3b17
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+void *local_storage_ptr = NULL;
+void *sk_ptr = NULL;
+int cookie_found = 0;
+__u64 cookie = 0;
+__u32 omem = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_storage SEC(".maps");
+
+SEC("fexit/bpf_local_storage_destroy")
+int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+{
+ struct sock *sk;
+
+ if (local_storage_ptr != local_storage)
+ return 0;
+
+ sk = bpf_core_cast(sk_ptr, struct sock);
+ if (sk->sk_cookie.counter != cookie)
+ return 0;
+
+ cookie_found++;
+ omem = sk->sk_omem_alloc.counter;
+ local_storage_ptr = NULL;
+
+ return 0;
+}
+
+SEC("fentry/inet6_sock_destruct")
+int BPF_PROG(inet6_sock_destruct, struct sock *sk)
+{
+ int *value;
+
+ if (!cookie || sk->sk_cookie.counter != cookie)
+ return 0;
+
+ value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
+ if (value && *value == 0xdeadbeef) {
+ cookie_found++;
+ sk_ptr = sk;
+ local_storage_ptr = sk->sk_bpf_storage;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
new file mode 100644
index 000000000000..e4252fd973be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 load_offset = 0;
+int test_result = 0;
+
+SEC("tc")
+int skb_process(struct __sk_buff *skb)
+{
+ char buf[16];
+
+ test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index cf6823f42e80..3bb4451524a1 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+#ifndef BPF_NO_PRESERVE_ACCESS_INDEX
#define BPF_NO_PRESERVE_ACCESS_INDEX
+#endif
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
-#define NULL 0
#define INLINE __always_inline
#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
@@ -26,12 +27,14 @@ out:
return ip;
}
-SEC("classifier/cls")
+SEC("tc")
int main_prog(struct __sk_buff *skb)
{
struct iphdr *ip = NULL;
struct tcphdr *tcp;
__u8 proto = 0;
+ int urg_ptr;
+ u32 offset;
if (!(ip = get_iphdr(skb)))
goto out;
@@ -47,7 +50,14 @@ int main_prog(struct __sk_buff *skb)
if (!tcp)
goto out;
- return tcp->urg_ptr;
+ urg_ptr = tcp->urg_ptr;
+
+ /* Checksum validation part */
+ proto++;
+ offset = sizeof(struct ethhdr) + offsetof(struct iphdr, protocol);
+ bpf_skb_store_bytes(skb, offset, &proto, sizeof(proto), BPF_F_RECOMPUTE_CSUM);
+
+ return urg_ptr;
out:
return -1;
}
diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
new file mode 100644
index 000000000000..84ad515eafd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("syscall")
+int init_sock(struct init_sock_args *args)
+{
+ bpf_kfunc_init_sock(args);
+
+ return 0;
+}
+
+SEC("syscall")
+int close_sock(void *ctx)
+{
+ bpf_kfunc_close_sock();
+
+ return 0;
+}
+
+SEC("syscall")
+int kernel_connect(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_connect(args);
+}
+
+SEC("syscall")
+int kernel_bind(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_bind(args);
+}
+
+SEC("syscall")
+int kernel_listen(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_listen();
+}
+
+SEC("syscall")
+int kernel_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_kernel_sendmsg(args);
+}
+
+SEC("syscall")
+int sock_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_sock_sendmsg(args);
+}
+
+SEC("syscall")
+int kernel_getsockname(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getsockname(args);
+}
+
+SEC("syscall")
+int kernel_getpeername(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getpeername(args);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
new file mode 100644
index 000000000000..9e0bf7a54cec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "bpf_tracing_net.h"
+
+__be16 serv_port = 0;
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} tcp_conn_sockets SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} udp_conn_sockets SEC(".maps");
+
+SEC("cgroup/connect6")
+int sock_connect(struct bpf_sock_addr *ctx)
+{
+ __u64 sock_cookie = 0;
+ int key = 0;
+ __u32 keyc = 0;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ sock_cookie = bpf_get_socket_cookie(ctx);
+ if (ctx->protocol == IPPROTO_TCP)
+ bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0);
+ else if (ctx->protocol == IPPROTO_UDP)
+ bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0);
+ else
+ return 1;
+
+ return 1;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_client(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ __u64 sock_cookie = 0;
+ __u64 *val;
+ int key = 0;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ val = bpf_map_lookup_elem(&tcp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_server(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ const struct inet_connection_sock *icsk;
+ const struct inet_sock *inet;
+ struct tcp6_sock *tcp_sk;
+ __be16 srcp;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tcp_sk = bpf_skc_to_tcp6_sock(sk_common);
+ if (!tcp_sk)
+ return 0;
+
+ icsk = &tcp_sk->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ srcp = inet->inet_sport;
+
+ /* Destroy server sockets. */
+ if (srcp == serv_port)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+
+SEC("iter/udp")
+int iter_udp6_client(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ __u64 sock_cookie = 0, *val;
+ int key = 0;
+
+ if (!sk)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk);
+ val = bpf_map_lookup_elem(&udp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+SEC("iter/udp")
+int iter_udp6_server(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ struct inet_sock *inet;
+ __be16 srcp;
+
+ if (!sk)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = inet->inet_sport;
+ if (srcp == serv_port)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
new file mode 100644
index 000000000000..dd6850b58e25
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+SEC("tp_btf/tcp_destroy_sock")
+__failure __msg("calling kernel function bpf_sock_destroy is not allowed")
+int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk)
+{
+ /* should not load */
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
new file mode 100644
index 000000000000..77966ded5467
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Meta
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+
+#define ATTR __always_inline
+#include "test_jhash.h"
+
+static bool ipv6_addr_loopback(const struct in6_addr *a)
+{
+ return (a->s6_addr32[0] | a->s6_addr32[1] |
+ a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
+}
+
+static bool ipv4_addr_loopback(__be32 a)
+{
+ return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
+volatile const unsigned int ss;
+volatile const __u16 ports[2];
+unsigned int bucket[2];
+
+SEC("iter/tcp")
+int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
+{
+ struct sock *sk = (struct sock *)ctx->sk_common;
+ struct inet_hashinfo *hinfo;
+ unsigned int hash;
+ __u64 sock_cookie;
+ struct net *net;
+ int idx;
+
+ if (!sk)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk);
+ sk = bpf_core_cast(sk, struct sock);
+ if (sk->sk_family != sf ||
+ (ss && sk->sk_state != ss) ||
+ (sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
+ return 0;
+
+ if (sk->sk_num == ports[0])
+ idx = 0;
+ else if (sk->sk_num == ports[1])
+ idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
+ else
+ return 0;
+
+ /* bucket selection as in inet_lhash2_bucket_sk() */
+ net = sk->sk_net.net;
+ hash = jhash2(sk->sk_v6_rcv_saddr.s6_addr32, 4, net->hash_mix);
+ hash ^= sk->sk_num;
+ hinfo = net->ipv4.tcp_death_row.hashinfo;
+ bucket[idx] = hash & hinfo->lhash2_mask;
+ bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+ __u64 sock_cookie;
+
+ if (!sk_common)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ if (sock_cookie != destroy_cookie)
+ return 0;
+
+ bpf_sock_destroy(sk_common);
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
+#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk)
+
+SEC("iter/udp")
+int iter_udp_soreuse(struct bpf_iter__udp *ctx)
+{
+ struct sock *sk = (struct sock *)ctx->udp_sk;
+ struct udp_table *udptable;
+ __u64 sock_cookie;
+ int idx;
+
+ if (!sk)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk);
+ sk = bpf_core_cast(sk, struct sock);
+ if (sk->sk_family != sf ||
+ (sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
+ return 0;
+
+ if (sk->sk_num == ports[0])
+ idx = 0;
+ else if (sk->sk_num == ports[1])
+ idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
+ else
+ return 0;
+
+ /* bucket selection as in udp_hashslot2() */
+ udptable = sk->sk_net.net->ipv4.udp_table;
+ bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
+ bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index ca283af80d4e..c9abfe3a11af 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -2,15 +2,11 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
__u8 *d = data;
int err;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..80632954c5a1 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -3,23 +3,9 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
SEC("sk_msg1")
int bpf_prog1(struct sk_msg_md *msg)
{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 4797dc985064..0660f29dca95 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,28 +1,27 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-int _version SEC("version") = 1;
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_msg SEC(".maps");
struct {
@@ -42,6 +41,9 @@ int bpf_prog2(struct __sk_buff *skb)
__u8 *d = data;
__u8 sk, map;
+ __sink(lport);
+ __sink(rport);
+
if (data + 8 > data_end)
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
index c6d428a8d785..a3434b840928 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -3,13 +3,14 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
#define SOL_CUSTOM 0xdeadbeef
#define CUSTOM_INHERIT1 0
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
+__s32 page_size = 0;
+
struct sockopt_inherit {
__u8 val;
};
@@ -56,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -71,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -81,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -94,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = -1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 9d8c212dde9f..db67278e12d4 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -4,16 +4,17 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-SEC("cgroup/getsockopt/child")
+__s32 page_size = 0;
+
+SEC("cgroup/getsockopt")
int _getsockopt_child(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -27,16 +28,22 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
-SEC("cgroup/getsockopt/parent")
+SEC("cgroup/getsockopt")
int _getsockopt_parent(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -50,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -59,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -68,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..5c3614333b01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+__s32 page_size = 0;
+
+const char cc_reno[TCP_CA_NAME_MAX] = "reno";
+const char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ goto out;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (bpf_strncmp(buf, sizeof(buf), cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d3597f81e6e9..cb990a7d3d45 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -6,11 +6,8 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
+int page_size = 0; /* userspace should set it */
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -35,14 +32,27 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ struct bpf_sock *sk;
+
+ /* Bypass AF_NETLINK. */
+ sk = ctx->sk;
+ if (sk && sk->family == AF_NETLINK)
+ goto out;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
- return 1;
+ goto out;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
@@ -50,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
@@ -58,7 +68,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
@@ -67,18 +77,19 @@ int _getsockopt(struct bpf_sockopt *ctx)
* reasons.
*/
- if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
- return 0; /* EPERM, bounds check */
+ /* Check that optval contains address (__u64) */
+ if (optval + sizeof(__u64) > optval_end)
+ return 0; /* bounds check */
if (((struct tcp_zerocopy_receive *)optval)->address != 0)
- return 0; /* EPERM, unexpected data */
+ return 0; /* unexpected data */
- return 1;
+ goto out;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
@@ -90,25 +101,25 @@ int _getsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
- return 0; /* EPERM, unexpected data size */
+ if (optval_end - optval != page_size)
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
if (!ctx->retval)
- return 0; /* EPERM, kernel should not have handled
+ return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong!
*/
ctx->retval = 0; /* Reset system call return value to zero */
@@ -117,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -125,6 +142,20 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ struct bpf_sock *sk;
+
+ /* Bypass AF_NETLINK. */
+ sk = ctx->sk;
+ if (sk && sk->family == AF_NETLINK)
+ goto out;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
@@ -139,7 +170,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
*(__u32 *)optval = 0x55AA;
ctx->optlen = 4;
@@ -151,7 +182,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Always use cubic */
if (optval + 5 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
@@ -161,11 +192,11 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
- if (ctx->optlen != PAGE_SIZE * 2)
- return 0; /* EPERM, unexpected data size */
+ if (ctx->optlen != page_size * 2)
+ return 0; /* unexpected data size */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
@@ -175,22 +206,22 @@ int _setsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
- return 0; /* EPERM, unexpected data size */
+ if (optval_end - optval != page_size)
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
@@ -198,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
*/
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
new file mode 100644
index 000000000000..a96c8150d7f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+extern bool CONFIG_UNWINDER_ORC __kconfig __weak;
+
+/*
+ * This function is here to have CONFIG_UNWINDER_ORC
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_UNWINDER_ORC ? 0 : 1;
+}
+
+__u32 stack_key;
+
+SEC("kprobe.multi")
+int kprobe_multi_test(struct pt_regs *ctx)
+{
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+SEC("raw_tp/bpf_testmod_test_read")
+int rawtp_test(void *ctx)
+{
+ /* Skip ebpf program entry in the stack. */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c
index 00ed48672620..0c77df05be7f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
@@ -27,8 +27,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 16384);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(stack_trace_t));
+ __type(key, __u32);
+ __type(value, stack_trace_t);
} stackmap SEC(".maps");
struct {
@@ -38,18 +38,19 @@ struct {
__type(value, stack_trace_t);
} stack_amap SEC(".maps");
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
+__u32 stack_id;
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
@@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx)
/* The size of stackmap and stackid_hmap should be the same */
key = bpf_get_stackid(ctx, &stackmap, 0);
if ((int)key >= 0) {
+ stack_id = key;
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
if (stack_p)
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
new file mode 100644
index 000000000000..2eb297df3dd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define TEST_STACK_DEPTH 2
+#define TEST_MAX_ENTRIES 16384
+
+typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} stackid_hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stack_amap SEC(".maps");
+
+int pid = 0;
+int control = 0;
+int failed = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct trace_event_raw_sched_switch *ctx)
+{
+ __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0;
+ __u64 *stack_p;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ if (control)
+ return 0;
+
+ /* it should allow skipping whole buffer size entries */
+ key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
+ if ((int)key >= 0) {
+ /* The size of stackmap and stack_amap should be the same */
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p) {
+ bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
+ /* it wrongly skipped all the entries and filled zero */
+ if (stack_p[0] == 0)
+ failed = 1;
+ }
+ } else {
+ /* old kernel doesn't support skipping that many entries */
+ failed = 2;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
new file mode 100644
index 000000000000..4a5bd852f10c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1); /* number of pages */
+} arena SEC(".maps");
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+#define ENOSPC 28
+#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+int size;
+u64 fault_addr;
+void *arena_ptr;
+
+SEC("syscall")
+__success __retval(0)
+int stream_exhaust(void *ctx)
+{
+ /* Use global variable for loop convergence. */
+ size = 0;
+ bpf_repeat(BPF_MAX_LOOPS) {
+ if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954)
+ return 0;
+ size += sizeof(_STR) - 1;
+ }
+ return 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__arch_s390x
+__success __retval(0)
+__stderr("ERROR: Timeout detected for may_goto instruction")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_cond_break(void *ctx)
+{
+ while (can_loop)
+ ;
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
+__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n"
+"Total held locks = 1\n"
+"Held lock\\[ 0\\] = \\1}}")
+__stderr("...")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_deadlock(void *ctx)
+{
+ struct bpf_res_spin_lock *lock, *nlock;
+
+ lock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!lock)
+ return 1;
+ nlock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!nlock)
+ return 1;
+ if (bpf_res_spin_lock(lock))
+ return 1;
+ if (bpf_res_spin_lock(nlock)) {
+ bpf_res_spin_unlock(lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(nlock);
+ bpf_res_spin_unlock(lock);
+ return 1;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_syscall(void *ctx)
+{
+ bpf_stream_printk(BPF_STDOUT, "foo");
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_write_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r2 = 1;"
+ "*(u32 *)(r1 + 0x7fff) = r2;"
+ :
+ : "r" (user_vm_start)
+ : "r1", "r2"
+ );
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_read_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r1 = *(u32 *)(r1 + 0x7fff);"
+ :
+ : "r" (user_vm_start)
+ : "r1"
+ );
+ return 0;
+}
+
+static __noinline void subprog(void)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_subprog_fault(void *ctx)
+{
+ subprog();
+ return 0;
+}
+
+static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_callback_fault(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+
+ arr_timer = bpf_map_lookup_elem(&array, &(int){0});
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb);
+ bpf_timer_start(arr_timer, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
new file mode 100644
index 000000000000..3662515f0107
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+SEC("syscall")
+__failure __msg("Possibly NULL pointer passed")
+int stream_vprintk_null_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("R3 type=scalar expected=")
+int stream_vprintk_scalar_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("arg#1 doesn't point to a const string")
+int stream_vprintk_string_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
new file mode 100644
index 000000000000..826e6b6aff7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char *user_ptr = (char *)1;
+char *invalid_kern_ptr = (char *)-1;
+
+/*
+ * When passing userspace pointers, the error code differs based on arch:
+ * -ERANGE on arches with non-overlapping address spaces
+ * -EFAULT on other arches
+ */
+#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \
+ defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86)
+#define USER_PTR_ERR -ERANGE
+#else
+#define USER_PTR_ERR -EFAULT
+#endif
+
+/*
+ * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and
+ * user_ptr (instead of causing an exception) so the below two groups of tests
+ * are not applicable.
+ */
+#ifndef __TARGET_ARCH_s390
+
+/* Passing NULL to string kfuncs (treated as a userspace ptr) */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null1(void *ctx) { return bpf_strcasestr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null2(void *ctx) { return bpf_strcasestr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null1(void *ctx) { return bpf_strncasestr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null2(void *ctx) { return bpf_strncasestr("hello", NULL, 1); }
+
+/* Passing userspace ptr to string kfuncs */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr1(void *ctx) { return bpf_strcasestr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr2(void *ctx) { return bpf_strcasestr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr1(void *ctx) { return bpf_strncasestr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr2(void *ctx) { return bpf_strncasestr("hello", user_ptr, 1); }
+
+#endif /* __TARGET_ARCH_s390 */
+
+/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault1(void *ctx) { return bpf_strcasestr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault2(void *ctx) { return bpf_strcasestr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault1(void *ctx) { return bpf_strncasestr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault2(void *ctx) { return bpf_strncasestr("hello", invalid_kern_ptr, 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
new file mode 100644
index 000000000000..05e1da1f250f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+char long_str[XATTR_SIZE_MAX + 1];
+
+SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
+SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
+SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
+SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
+SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); }
+SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); }
+SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); }
+SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); }
+SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); }
+SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
+SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
+SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strcasestr_too_long(void *ctx) { return bpf_strcasestr(long_str, "hello"); }
+SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+SEC("syscall") int test_strncasestr_too_long(void *ctx) { return bpf_strncasestr(long_str, "hello", sizeof(long_str)); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
new file mode 100644
index 000000000000..a8513964516b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char str[] = "hello world";
+
+#define __test(retval) SEC("syscall") __success __retval(retval)
+
+/* Functional tests */
+__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
+__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); }
+__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); }
+__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
+__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
+__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
+__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
+__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
+__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
+__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); }
+__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); }
+__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); }
+__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); }
+__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); }
+__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); }
+__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); }
+__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); }
+__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); }
+__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
+__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
+__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
+__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(6) int test_strcasestr_found(void *ctx) { return bpf_strcasestr(str, "woRLD"); }
+__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(-ENOENT) int test_strcasestr_notfound(void *ctx) { return bpf_strcasestr(str, "hi"); }
+__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strcasestr_empty(void *ctx) { return bpf_strcasestr(str, ""); }
+__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
+__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
+__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
+__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
+__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+__test(0) int test_strncasestr_found1(void *ctx) { return bpf_strncasestr("", "", 0); }
+__test(0) int test_strncasestr_found2(void *ctx) { return bpf_strncasestr(str, "heLLO", 5); }
+__test(0) int test_strncasestr_found3(void *ctx) { return bpf_strncasestr(str, "heLLO", 6); }
+__test(-ENOENT) int test_strncasestr_notfound1(void *ctx) { return bpf_strncasestr(str, "hi", 10); }
+__test(-ENOENT) int test_strncasestr_notfound2(void *ctx) { return bpf_strncasestr(str, "hello", 4); }
+__test(-ENOENT) int test_strncasestr_notfound3(void *ctx) { return bpf_strncasestr("", "a", 0); }
+__test(0) int test_strncasestr_empty(void *ctx) { return bpf_strncasestr(str, "", 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
new file mode 100644
index 000000000000..f47bf88f8d2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 4096
+
+/* Will be updated by benchmark before program loading */
+const volatile unsigned int cmp_str_len = 1;
+const char target[STRNCMP_STR_SZ];
+
+long hits = 0;
+char str[STRNCMP_STR_SZ];
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline int local_strncmp(const char *s1, unsigned int sz,
+ const char *s2)
+{
+ int ret = 0;
+ unsigned int i;
+
+ for (i = 0; i < sz; i++) {
+ /* E.g. 0xff > 0x31 */
+ ret = (unsigned char)s1[i] - (unsigned char)s2[i];
+ if (ret || !s1[i])
+ break;
+ }
+
+ return ret;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_no_helper(void *ctx)
+{
+ const char *target_str = target;
+
+ barrier_var(target_str);
+ if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_helper(void *ctx)
+{
+ if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
new file mode 100644
index 000000000000..769668feed48
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 8
+
+const char target[STRNCMP_STR_SZ] = "EEEEEEE";
+char str[STRNCMP_STR_SZ];
+int cmp_ret = 0;
+int target_pid = 0;
+
+const char no_str_target[STRNCMP_STR_SZ] = "12345678";
+char writable_target[STRNCMP_STR_SZ];
+unsigned int no_const_str_size = STRNCMP_STR_SZ;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int do_strncmp(void *ctx)
+{
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_const_str_size(void *ctx)
+{
+ /* The value of string size is not const, so will fail */
+ cmp_ret = bpf_strncmp(str, no_const_str_size, target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_writable_target(void *ctx)
+{
+ /* Compared target is not read-only, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, writable_target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_null_term_target(void *ctx)
+{
+ /* Compared target is not null-terminated, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, no_str_target);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 7de534f38c3f..6e1918deaf26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
typedef uint32_t pid_t;
struct task_struct {};
@@ -24,9 +26,11 @@ struct task_struct {};
#define STACK_TABLE_EPOCH_SHIFT 20
#define STROBE_MAX_STR_LEN 1
#define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP \
+ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
#define STROBE_MAX_PAYLOAD \
(STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
struct strobe_value_header {
/*
@@ -135,7 +139,7 @@ struct strobe_value_loc {
* tpidr_el0 for aarch64).
* TLS_IMM_EXEC: absolute address of GOT entry containing offset
* from thread pointer;
- * TLS_GENERAL_DYN: absolute addres of double GOT entry
+ * TLS_GENERAL_DYN: absolute address of double GOT entry
* containing tls_index_t struct;
*/
int64_t offset;
@@ -326,9 +330,9 @@ static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
}
bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
- return tls_ptr && tls_ptr != (void *)-1
- ? tls_ptr + tls_index.offset
- : NULL;
+ if (!tls_ptr || tls_ptr == (void *)-1)
+ return NULL;
+ return tls_ptr + tls_index.offset;
}
#ifdef SUBPROGS
@@ -355,10 +359,10 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data,
- void *payload)
+ size_t off)
{
void *location;
- uint32_t len;
+ uint64_t len;
data->str_lens[idx] = 0;
location = calc_location(&cfg->str_locs[idx], tls_base);
@@ -366,10 +370,10 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
/*
* if bpf_probe_read_user_str returns error (<0), due to casting to
- * unsinged int, it will become big number, so next check is
+ * unsigned int, it will become big number, so next check is
* sufficient to check for errors AND prove to BPF verifier, that
* bpf_probe_read_user_str won't return anything bigger than
* STROBE_MAX_STR_LEN
@@ -378,31 +382,30 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
data->str_lens[idx] = len;
- return len;
+ return off + len;
}
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data,
- void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ size_t off)
{
struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map;
void *location;
- uint32_t len;
- int i;
+ uint64_t len;
descr->tag_len = 0; /* presume no tag is set */
descr->cnt = -1; /* presume no value is set */
location = calc_location(&cfg->map_locs[idx], tls_base);
if (!location)
- return payload;
+ return off;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
- return payload;
+ return off;
descr->id = map.id;
descr->cnt = map.cnt;
@@ -411,40 +414,92 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
- payload += len;
+ off += len;
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
if (i >= map.cnt)
break;
descr->key_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
- payload += len;
+ off += len;
}
descr->val_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
- payload += len;
+ off += len;
}
}
- return payload;
+ return off;
}
+#ifdef USE_BPF_LOOP
+enum read_type {
+ READ_INT_VAR,
+ READ_MAP_VAR,
+ READ_STR_VAR,
+};
+
+struct read_var_ctx {
+ struct strobemeta_payload *data;
+ void *tls_base;
+ struct strobemeta_cfg *cfg;
+ size_t payload_off;
+ /* value gets mutated */
+ struct strobe_value_generic *value;
+ enum read_type type;
+};
+
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
+{
+ /* lose precision info for ctx->payload_off, verifier won't track
+ * double xor, barrier_var() is needed to force clang keep both xors.
+ */
+ ctx->payload_off ^= index;
+ barrier_var(ctx->payload_off);
+ ctx->payload_off ^= index;
+ switch (ctx->type) {
+ case READ_INT_VAR:
+ if (index >= STROBE_MAX_INTS)
+ return 1;
+ read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
+ break;
+ case READ_MAP_VAR:
+ if (index >= STROBE_MAX_MAPS)
+ return 1;
+ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+ return 1;
+ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ case READ_STR_VAR:
+ if (index >= STROBE_MAX_STRS)
+ return 1;
+ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+ return 1;
+ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
/*
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
@@ -460,7 +515,8 @@ static void *read_strobe_meta(struct task_struct *task,
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg;
- void *tls_base, *payload;
+ size_t payload_off;
+ void *tls_base;
cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
if (!cfg)
@@ -468,42 +524,74 @@ static void *read_strobe_meta(struct task_struct *task,
data->int_vals_set_mask = 0;
data->req_meta_valid = 0;
- payload = data->payload;
+ payload_off = 0;
/*
* we don't have struct task_struct definition, it should be:
* tls_base = (void *)task->thread.fsbase;
*/
tls_base = (void *)task;
+#ifdef USE_BPF_LOOP
+ struct read_var_ctx ctx = {
+ .cfg = cfg,
+ .tls_base = tls_base,
+ .value = &value,
+ .data = data,
+ .payload_off = 0,
+ };
+ int err;
+
+ ctx.type = READ_INT_VAR;
+ err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_INTS)
+ return NULL;
+
+ ctx.type = READ_STR_VAR;
+ err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_STRS)
+ return NULL;
+
+ ctx.type = READ_MAP_VAR;
+ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_MAPS)
+ return NULL;
+
+ payload_off = ctx.payload_off;
+ /* this should not really happen, here only to satisfy verifier */
+ if (payload_off > sizeof(data->payload))
+ payload_off = sizeof(data->payload);
+#else
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
- payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
- payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
}
+#endif /* USE_BPF_LOOP */
+
/*
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
*/
- return payload;
+ return &data->payload[payload_off];
}
SEC("raw_tracepoint/kfree_skb")
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
new file mode 100644
index 000000000000..d18b992f0165
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+#define USE_BPF_LOOP
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
new file mode 100644
index 000000000000..ba10c3896213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_2)
+{
+ return 0;
+}
+
+struct bpf_testmod_ops___v1 {
+ int (*test_1)(void);
+};
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ int (*does_not_exist)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v1 testmod_1 = {
+ .test_1 = (void *)test_1
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .does_not_exist = (void *)test_2
+};
+
+SEC("?.struct_ops")
+struct bpf_testmod_ops___v1 optional_map = {
+ .test_1 = (void *)test_1,
+};
+
+SEC("?.struct_ops.link")
+struct bpf_testmod_ops___v1 optional_map2 = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
new file mode 100644
index 000000000000..6049d9c902d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(foo)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(bar)
+{
+ test_1_result = 24;
+ return 0;
+}
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)bar
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
new file mode 100644
index 000000000000..284a5b008e0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * This subprogram validates that libbpf handles the situation in which BPF
+ * object has subprograms in .text section, but has no entry BPF programs.
+ * At some point that was causing issues due to legacy logic of treating such
+ * subprogram as entry program (with unknown program type, which would fail).
+ */
+int dangling_subprog(void)
+{
+ /* do nothing, just be here */
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_do_detach;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
new file mode 100644
index 000000000000..d8cc99f5c2e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_forgotten)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ /* we forgot to reference test_1_forgotten above, oops */
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
new file mode 100644
index 000000000000..ad8bb546c9bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP1_MAGIC 1234
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP1_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
new file mode 100644
index 000000000000..cea1a2f4b62f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP2_MAGIC 4567
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP2_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
new file mode 100644
index 000000000000..2b98b7710816
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
+ * here is acquired automatically as the task argument is tagged with "__ref".
+ */
+SEC("struct_ops/test_return_ref_kptr")
+struct task_struct *BPF_PROG(kptr_return, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ if (dummy % 2) {
+ bpf_task_release(task);
+ return NULL;
+ }
+ return task;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
new file mode 100644
index 000000000000..caeea158ef69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
@@ -0,0 +1,26 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a non-zero scalar value.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]")
+struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ bpf_task_release(task);
+ return (struct task_struct *)1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
new file mode 100644
index 000000000000..b8b4f05c3d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
@@ -0,0 +1,34 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a local kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *t;
+
+ bpf_task_release(task);
+
+ t = bpf_obj_new(typeof(*task));
+ if (!t)
+ return NULL;
+
+ return t;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
new file mode 100644
index 000000000000..7ddeb28c2329
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
@@ -0,0 +1,25 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a modified referenced kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed")
+struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ return (struct task_struct *)&task->jobctl;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
new file mode 100644
index 000000000000..6a2dd5367802
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a referenced kptr of the wrong type.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *ret;
+
+ ret = (struct task_struct *)bpf_cgroup_acquire(cgrp);
+ bpf_task_release(task);
+
+ return ret;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
new file mode 100644
index 000000000000..ccab3935aa42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+/* This is a test BPF program that uses struct_ops to access an argument
+ * that may be NULL. This is a test for the verifier to ensure that it can
+ * rip PTR_MAYBE_NULL correctly.
+ */
+SEC("struct_ops/test_maybe_null")
+int BPF_PROG(test_maybe_null, int dummy,
+ struct task_struct *task)
+{
+ if (task)
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_maybe_null = (void *)test_maybe_null,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
new file mode 100644
index 000000000000..8b5515f4f724
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+SEC("struct_ops/test_maybe_null_struct_ptr")
+int BPF_PROG(test_maybe_null_struct_ptr, int dummy,
+ struct task_struct *task)
+{
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_struct_ptr = {
+ .test_maybe_null = (void *)test_maybe_null_struct_ptr,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
new file mode 100644
index 000000000000..71c420c3a5a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+int test_2_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 0xdeadbeef;
+ return 0;
+}
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2, int a, int b)
+{
+ test_2_result = a + b;
+}
+
+SEC("?struct_ops/test_3")
+int BPF_PROG(test_3, int a, int b)
+{
+ test_2_result = a + b + 3;
+ return a + b + 3;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+};
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2_v2, int a, int b)
+{
+ test_2_result = a * b;
+}
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+};
+
+struct bpf_testmod_ops___zeroed {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+ void (*zeroed_op)(int a, int b);
+ int zeroed;
+};
+
+SEC("struct_ops/test_3")
+int BPF_PROG(zeroed_op)
+{
+ return 1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___zeroed testmod_zeroed = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+ .zeroed_op = (void *)zeroed_op,
+};
+
+struct bpf_testmod_ops___incompatible {
+ int (*test_1)(void);
+ void (*test_2)(int *a);
+ int data;
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___incompatible testmod_incompatible = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 3,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
new file mode 100644
index 000000000000..5b23ea817f1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TRAMP(x) \
+ SEC("struct_ops/tramp_" #x) \
+ int BPF_PROG(tramp_ ## x, int a) \
+ { \
+ return a; \
+ }
+
+TRAMP(1)
+TRAMP(2)
+TRAMP(3)
+TRAMP(4)
+TRAMP(5)
+TRAMP(6)
+TRAMP(7)
+TRAMP(8)
+TRAMP(9)
+TRAMP(10)
+TRAMP(11)
+TRAMP(12)
+TRAMP(13)
+TRAMP(14)
+TRAMP(15)
+TRAMP(16)
+TRAMP(17)
+TRAMP(18)
+TRAMP(19)
+TRAMP(20)
+TRAMP(21)
+TRAMP(22)
+TRAMP(23)
+TRAMP(24)
+TRAMP(25)
+TRAMP(26)
+TRAMP(27)
+TRAMP(28)
+TRAMP(29)
+TRAMP(30)
+TRAMP(31)
+TRAMP(32)
+TRAMP(33)
+TRAMP(34)
+TRAMP(35)
+TRAMP(36)
+TRAMP(37)
+TRAMP(38)
+TRAMP(39)
+TRAMP(40)
+
+#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops multi_pages = {
+ F_TRAMP(1),
+ F_TRAMP(2),
+ F_TRAMP(3),
+ F_TRAMP(4),
+ F_TRAMP(5),
+ F_TRAMP(6),
+ F_TRAMP(7),
+ F_TRAMP(8),
+ F_TRAMP(9),
+ F_TRAMP(10),
+ F_TRAMP(11),
+ F_TRAMP(12),
+ F_TRAMP(13),
+ F_TRAMP(14),
+ F_TRAMP(15),
+ F_TRAMP(16),
+ F_TRAMP(17),
+ F_TRAMP(18),
+ F_TRAMP(19),
+ F_TRAMP(20),
+ F_TRAMP(21),
+ F_TRAMP(22),
+ F_TRAMP(23),
+ F_TRAMP(24),
+ F_TRAMP(25),
+ F_TRAMP(26),
+ F_TRAMP(27),
+ F_TRAMP(28),
+ F_TRAMP(29),
+ F_TRAMP(30),
+ F_TRAMP(31),
+ F_TRAMP(32),
+ F_TRAMP(33),
+ F_TRAMP(34),
+ F_TRAMP(35),
+ F_TRAMP(36),
+ F_TRAMP(37),
+ F_TRAMP(38),
+ F_TRAMP(39),
+ F_TRAMP(40),
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
new file mode 100644
index 000000000000..5d0937fa07be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int rand;
+int arr[1];
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_turn_off)
+{
+ return arr[rand]; /* potentially way out of range access */
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ .test_1 = (void *)test_1_turn_off,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
new file mode 100644
index 000000000000..dbe646013811
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+bool skip __attribute((__section__(".data"))) = false;
+#else
+bool skip = true;
+#endif
+
+void bpf_testmod_ops3_call_test_2(void) __ksym;
+
+int val_i, val_j;
+
+__noinline static int subprog2(int *a, int *b)
+{
+ return val_i + a[10] + b[20];
+}
+
+__noinline static int subprog1(int *a)
+{
+ /* stack size 200 bytes */
+ int b[50] = {};
+
+ b[20] = 2;
+ return subprog2(a, b);
+}
+
+
+SEC("struct_ops")
+int BPF_PROG(test_1)
+{
+ /* stack size 400 bytes */
+ int a[100] = {};
+
+ a[10] = 1;
+ val_i = subprog1(a);
+ bpf_testmod_ops3_call_test_2();
+ return 0;
+}
+
+SEC("struct_ops")
+int BPF_PROG(test_2)
+{
+ /* stack size 200 bytes */
+ int a[50] = {};
+
+ a[10] = 3;
+ val_j = subprog1(a);
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_testmod_ops3 testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
new file mode 100644
index 000000000000..3d89ad7cbe2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+bool skip __attribute((__section__(".data"))) = false;
+#else
+bool skip = true;
+#endif
+
+void bpf_testmod_ops3_call_test_2(void) __ksym;
+
+int val_i, val_j;
+
+__noinline static int subprog2(int *a, int *b)
+{
+ return val_i + a[10] + b[20];
+}
+
+__noinline static int subprog1(int *a)
+{
+ /* stack size 200 bytes */
+ int b[50] = {};
+
+ b[20] = 2;
+ return subprog2(a, b);
+}
+
+
+SEC("struct_ops")
+int BPF_PROG(test_1)
+{
+ /* stack size 100 bytes */
+ int a[25] = {};
+
+ a[10] = 1;
+ val_i = subprog1(a);
+ bpf_testmod_ops3_call_test_2();
+ return 0;
+}
+
+SEC("struct_ops")
+int BPF_PROG(test_2)
+{
+ /* stack size 400 bytes */
+ int a[100] = {};
+
+ a[10] = 3;
+ val_j = subprog1(a);
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_testmod_ops3 testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
new file mode 100644
index 000000000000..b1f6d7e5a8e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+bool skip __attribute((__section__(".data"))) = false;
+#else
+bool skip = true;
+#endif
+
+void bpf_testmod_ops3_call_test_1(void) __ksym;
+
+int val_i, val_j;
+
+__noinline static int subprog2(int *a, int *b)
+{
+ return val_i + a[1] + b[20];
+}
+
+__noinline static int subprog1(int *a)
+{
+ /* stack size 400 bytes */
+ int b[100] = {};
+
+ b[20] = 2;
+ return subprog2(a, b);
+}
+
+
+SEC("struct_ops")
+int BPF_PROG(test_1)
+{
+ /* stack size 20 bytes */
+ int a[5] = {};
+
+ a[1] = 1;
+ val_j += subprog1(a);
+ bpf_testmod_ops3_call_test_1();
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_testmod_ops3 testmod_1 = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
new file mode 100644
index 000000000000..9c0a65466356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -0,0 +1,31 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This is a test BPF program that uses struct_ops to access a referenced
+ * kptr argument. This is a test for the verifier to ensure that it
+ * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 2) the same reference can be acquired from multiple paths as long as it
+ * has not been released.
+ */
+SEC("struct_ops/test_refcounted")
+int BPF_PROG(refcounted, int dummy, struct task_struct *task)
+{
+ if (dummy == 1)
+ bpf_task_release(task);
+ else
+ bpf_task_release(task);
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_refcounted = {
+ .test_refcounted = (void *)refcounted,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
new file mode 100644
index 000000000000..ae074aa62852
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
@@ -0,0 +1,39 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+__noinline int subprog_release(__u64 *ctx __arg_ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+ int dummy = (int)ctx[0];
+
+ bpf_task_release(task);
+
+ return dummy + 1;
+}
+
+/* Test that the verifier rejects a program that contains a global
+ * subprogram with referenced kptr arguments
+ */
+SEC("struct_ops/test_refcounted")
+__failure __log_level(2)
+__msg("Validating subprog_release() func#1...")
+__msg("invalid bpf_context access off=8. Reference may already be released")
+int refcounted_fail__global_subprog(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+
+ return subprog_release(ctx);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__global_subprog,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
new file mode 100644
index 000000000000..e945b1a04294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
@@ -0,0 +1,22 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Test that the verifier rejects a program that acquires a referenced
+ * kptr through context without releasing the reference
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("Unreleased reference id=1 alloc_insn=0")
+int BPF_PROG(refcounted_fail__ref_leak, int dummy,
+ struct task_struct *task)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__ref_leak,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
new file mode 100644
index 000000000000..3b125025a1f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+/* Test that the verifier rejects a program with referenced kptr arguments
+ * that tail call
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("program with __ref argument cannot tail call")
+int refcounted_fail__tail_call(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+ bpf_tail_call(ctx, &prog_array, 0);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__tail_call,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c
new file mode 100644
index 000000000000..f89effe82c9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk)
+{
+ return bpf_skb_pull_data(sk, 0);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+SEC("?tc")
+int main_changes_with_subprogs(struct __sk_buff *sk)
+{
+ changes_pkt_data(sk);
+ does_not_change_pkt_data(sk);
+ return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+ bpf_skb_pull_data(sk, 0);
+ return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+__noinline
+long might_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+__noinline __weak
+long does_not_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep_with_subprogs(struct pt_regs *ctx)
+{
+ might_sleep(ctx);
+ does_not_sleep(ctx);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?uprobe.s")
+int main_does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/summarization_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c
new file mode 100644
index 000000000000..935f00e0e9ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("?freplace")
+long changes_pkt_data(struct __sk_buff *sk)
+{
+ return bpf_skb_pull_data(sk, 0);
+}
+
+SEC("?freplace")
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+SEC("?freplace")
+long might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?freplace")
+long does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..b698cc62a371
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
+#include <string.h>
+#include <errno.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map {
+ int id;
+} __attribute__((preserve_access_index));
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, union bpf_attr);
+ __uint(max_entries, 1);
+} bpf_attr_array SEC(".maps");
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int btf_load(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(raw_btf.types),
+ .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+ .str_len = sizeof(raw_btf.str),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+ static union bpf_attr btf_load_attr = {
+ .btf_size = sizeof(raw_btf),
+ };
+
+ btf_load_attr.btf = (long)&raw_btf;
+ return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
+SEC("syscall")
+int load_prog(struct args *ctx)
+{
+ static char license[] = "GPL";
+ static struct bpf_insn insns[] = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ static union bpf_attr map_create_attr = {
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = 8,
+ .value_size = 8,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 2,
+ };
+ static union bpf_attr map_update_attr = { .map_fd = 1, };
+ static __u64 key = 12;
+ static __u64 value = 34;
+ static union bpf_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .insn_cnt = ARRAY_SIZE(insns),
+ };
+ int ret;
+
+ ret = btf_load();
+ if (ret <= 0)
+ return ret;
+
+ ctx->btf_fd = ret;
+ map_create_attr.max_entries = ctx->max_entries;
+ map_create_attr.btf_fd = ret;
+
+ prog_load_attr.license = ptr_to_u64(license);
+ prog_load_attr.insns = ptr_to_u64(insns);
+ prog_load_attr.log_buf = ctx->log_buf;
+ prog_load_attr.log_size = ctx->log_size;
+ prog_load_attr.log_level = 1;
+
+ ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->map_fd = ret;
+ insns[3].imm = ret;
+
+ map_update_attr.map_fd = ret;
+ map_update_attr.key = ptr_to_u64(&key);
+ map_update_attr.value = ptr_to_u64(&value);
+ ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->prog_fd = ret;
+ return 1;
+}
+
+SEC("syscall")
+int update_outer_map(void *ctx)
+{
+ int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err;
+ const int attr_sz = sizeof(union bpf_attr);
+ union bpf_attr *attr;
+
+ attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero);
+ if (!attr)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_id = ((struct bpf_map *)&outer_array_map)->id;
+ outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz);
+ if (outer_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_type = BPF_MAP_TYPE_ARRAY;
+ attr->key_size = 4;
+ attr->value_size = 4;
+ attr->max_entries = 1;
+ inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz);
+ if (inner_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ attr->value = ptr_to_u64(&inner_fd);
+ err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+ ret = 1;
+out:
+ if (inner_fd >= 0)
+ bpf_sys_close(inner_fd);
+ if (outer_fd >= 0)
+ bpf_sys_close(outer_fd);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 7115bcefbe8a..8159a0b4a69a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -11,8 +11,8 @@ struct {
} jmp_table SEC(".maps");
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -20,7 +20,7 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
/* Multiple locations to make sure we patch
@@ -45,4 +45,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index 0431e4fe7efd..a5ff53e61702 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -10,41 +10,41 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 1);
return 0;
}
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 2);
return 1;
}
-SEC("classifier/2")
-int bpf_func_2(struct __sk_buff *skb)
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
{
return 2;
}
-SEC("classifier/3")
-int bpf_func_3(struct __sk_buff *skb)
+SEC("tc")
+int classifier_3(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 4);
return 3;
}
-SEC("classifier/4")
-int bpf_func_4(struct __sk_buff *skb)
+SEC("tc")
+int classifier_4(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 3);
return 4;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 0);
@@ -56,4 +56,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 739dc2a51e74..f60bcd7b8d4b 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,17 +10,17 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
count++;
bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 0);
@@ -28,4 +28,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..a56bbc2313ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,11 +10,11 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call(skb, &jmp_table, selector);
@@ -30,4 +30,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..8d03496eb6ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,11 +10,11 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
int idx = 0;
@@ -37,4 +37,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall6.c b/tools/testing/selftests/bpf/progs/tailcall6.c
new file mode 100644
index 000000000000..d77b8abd62f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall6.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count, which;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count++;
+ if (__builtin_constant_p(which))
+ __bpf_unreachable();
+ bpf_tail_call(skb, &jmp_table, which);
+ return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ if (__builtin_constant_p(which))
+ __bpf_unreachable();
+ bpf_tail_call(skb, &jmp_table, which);
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
index 0103f3dd9f02..8c91428deb90 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -10,8 +10,8 @@ struct {
} jmp_table SEC(".maps");
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -26,7 +26,7 @@ int subprog_tail(struct __sk_buff *skb)
return skb->len * 2;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 1);
@@ -35,4 +35,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 7b1c04183824..ce97d141daee 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -20,16 +20,16 @@ int subprog_tail(struct __sk_buff *skb)
return 1;
}
-static volatile int count;
+int count = 0;
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
count++;
return subprog_tail(skb);
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 0);
@@ -38,4 +38,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
index 0d5482bea6c9..99c8d1d8a187 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -20,6 +21,8 @@ int subprog_tail2(struct __sk_buff *skb)
else
bpf_tail_call_static(skb, &jmp_table, 1);
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len;
}
@@ -30,32 +33,39 @@ int subprog_tail(struct __sk_buff *skb)
bpf_tail_call_static(skb, &jmp_table, 0);
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len * 2;
}
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return subprog_tail2(skb);
}
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return skb->len * 3;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
volatile char arr[128] = {};
+ __sink(arr[sizeof(arr) - 1]);
+
return subprog_tail(skb);
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 9a1b166b7fbe..a017d6b2f1dd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -3,17 +3,35 @@
#include <bpf/bpf_helpers.h>
struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} nop_table SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 3);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
+int noise = 0;
+
+static __always_inline int subprog_noise(void)
+{
+ __u32 key = 0;
+
+ bpf_map_lookup_elem(&nop_table, &key);
+ return 0;
+}
__noinline
int subprog_tail_2(struct __sk_buff *skb)
{
+ if (noise)
+ subprog_noise();
bpf_tail_call_static(skb, &jmp_table, 2);
return skb->len * 3;
}
@@ -32,30 +50,29 @@ int subprog_tail(struct __sk_buff *skb)
return skb->len;
}
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
{
return subprog_tail_2(skb);
}
-SEC("classifier/2")
-int bpf_func_2(struct __sk_buff *skb)
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
{
count++;
return subprog_tail_2(skb);
}
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
return subprog_tail_1(skb);
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
return subprog_tail(skb);
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
new file mode 100644
index 000000000000..4a9f63bea66c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define __unused __attribute__((unused))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+ done = 1;
+ return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ /* Don't propagate the constant to the caller */
+ volatile int ret = 1;
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ /* Have data on stack which size is not a multiple of 8 */
+ volatile char arr[1] = {};
+
+ __sink(arr[0]);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
new file mode 100644
index 000000000000..8436c6729167
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fentry/subprog_tail")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
new file mode 100644
index 000000000000..fe16412c6e6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fexit/subprog_tail")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
new file mode 100644
index 000000000000..d556b19413d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+#include "bpf_test_utils.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 0;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ int ret = 1;
+
+ clobber_regs_stack();
+
+ count++;
+ subprog_tail(skb);
+ subprog_tail(skb);
+
+ return ret;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
new file mode 100644
index 000000000000..ae94c9c70ab7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_test_utils.h"
+
+int classifier_0(struct __sk_buff *skb);
+int classifier_1(struct __sk_buff *skb);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table SEC(".maps") = {
+ .values = {
+ [0] = (void *) &classifier_0,
+ [1] = (void *) &classifier_1,
+ },
+};
+
+int count0 = 0;
+int count1 = 0;
+
+static __noinline
+int subprog_tail0(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 0;
+}
+
+__auxiliary
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count0++;
+ subprog_tail0(skb);
+ return 0;
+}
+
+static __noinline
+int subprog_tail1(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ return 0;
+}
+
+__auxiliary
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
+{
+ count1++;
+ subprog_tail1(skb);
+ return 0;
+}
+
+__success
+__retval(33)
+SEC("tc")
+int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb)
+{
+ int ret = 0;
+
+ clobber_regs_stack();
+
+ subprog_tail0(skb);
+ subprog_tail1(skb);
+
+ __sink(ret);
+ return (count1 << 16) | count0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
new file mode 100644
index 000000000000..56b6b0099840
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_test_utils.h"
+
+int classifier_0(struct __sk_buff *skb);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table0 SEC(".maps") = {
+ .values = {
+ [0] = (void *) &classifier_0,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table1 SEC(".maps") = {
+ .values = {
+ [0] = (void *) &classifier_0,
+ },
+};
+
+int count = 0;
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb, void *jmp_table)
+{
+ bpf_tail_call_static(skb, jmp_table, 0);
+ return 0;
+}
+
+__auxiliary
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count++;
+ subprog_tail(skb, &jmp_table0);
+ subprog_tail(skb, &jmp_table1);
+ return count;
+}
+
+__success
+__retval(33)
+SEC("tc")
+int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb)
+{
+ int ret = 0;
+
+ clobber_regs_stack();
+
+ bpf_tail_call_static(skb, &jmp_table0, 0);
+
+ __sink(ret);
+ return ret;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
new file mode 100644
index 000000000000..5261395713cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_test_utils.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+
+static __noinline
+int subprog_tail(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("fentry/dummy")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ clobber_regs_stack();
+
+ count++;
+ subprog_tail(ctx);
+ subprog_tail(ctx);
+
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_fail.c b/tools/testing/selftests/bpf/progs/tailcall_fail.c
new file mode 100644
index 000000000000..bc77921d2bb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_fail.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+extern void bpf_rcu_read_unlock(void) __ksym;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(A) struct bpf_spin_lock lock;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_tail_call_spin_lock(struct __sk_buff *ctx)
+{
+ bpf_spin_lock(&lock);
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("tail_call cannot be used inside bpf_rcu_read_lock-ed region")
+int reject_tail_call_rcu_lock(struct __sk_buff *ctx)
+{
+ bpf_rcu_read_lock();
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("tail_call cannot be used inside bpf_preempt_disable-ed region")
+int reject_tail_call_preempt_lock(struct __sk_buff *ctx)
+{
+ bpf_guard_preempt();
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("tail_call would lead to reference leak")
+int reject_tail_call_ref(struct __sk_buff *ctx)
+{
+ struct foo { int i; } *p;
+
+ p = bpf_obj_new(typeof(*p));
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_freplace.c b/tools/testing/selftests/bpf/progs/tailcall_freplace.c
new file mode 100644
index 000000000000..6713b809df44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_freplace.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+
+SEC("freplace")
+int entry_freplace(struct __sk_buff *skb)
+{
+ count++;
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return count;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c
new file mode 100644
index 000000000000..c78b94b75e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test, int a)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call1, int a)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call2, int a)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
new file mode 100644
index 000000000000..e9c4fea7a4bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TASK_KFUNC_COMMON_H
+#define _TASK_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __tasks_kfunc_map_value {
+ struct task_struct __kptr * task;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __tasks_kfunc_map_value);
+ __uint(max_entries, 1);
+} __tasks_kfunc_map SEC(".maps");
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+struct task_struct *bpf_task_from_vpid(s32 vpid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
+{
+ s32 pid;
+ long status;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+}
+
+static inline int tasks_kfunc_map_insert(struct task_struct *p)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return status;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v) {
+ bpf_map_delete_elem(&__tasks_kfunc_map, &pid);
+ return -ENOENT;
+ }
+
+ acquired = bpf_task_acquire(p);
+ if (!acquired)
+ return -ENOENT;
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+ if (old) {
+ bpf_task_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _TASK_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
new file mode 100644
index 000000000000..4c07ea193f72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
+{
+ int status;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ return NULL;
+
+ return tasks_kfunc_map_value_lookup(task);
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on an untrusted pointer. */
+ acquired = bpf_task_acquire(v->task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
+int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
+
+ /* Can't invoke bpf_task_acquire() on a random frame pointer. */
+ acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/free_task")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/free_task")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ bpf_rcu_read_lock();
+ if (!task) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ bpf_task_release(acquired);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't invoke bpf_task_acquire() on a NULL pointer. */
+ acquired = bpf_task_acquire(NULL);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+
+ /* Acquired task is never released. */
+ __sink(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ /* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_release() on an untrusted pointer. */
+ bpf_task_release(v->task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
+int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = (struct task_struct *)&clone_flags;
+
+ /* Cannot release random frame pointer. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
+ if (status)
+ return 0;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return -EEXIST;
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+
+ /* old cannot be passed to bpf_task_release() without a NULL check. */
+ bpf_task_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("release kernel function bpf_task_release expects")
+int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
+{
+ /* Cannot release trusted task pointer which was not acquired. */
+ bpf_task_release(task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(task->pid);
+
+ /* Releasing bpf_task_from_pid() lookup without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_from_vpid_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_vpid(task->pid);
+
+ /* Releasing bpf_task_from_vpid() lookup without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("lsm/task_free")
+__failure __msg("R1 must be a rcu pointer")
+int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
+{
+ struct task_struct *acquired;
+
+ /* the argument of lsm task_free hook is untrusted. */
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm, 17, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm + 1, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("write into memory")
+int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags)
+{
+ bpf_probe_read_kernel(task->comm, 16, task->comm);
+ return 0;
+}
+
+SEC("fentry/__set_task_comm")
+__failure __msg("R1 type=ptr_ expected")
+int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec)
+{
+ /*
+ * task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee
+ * its safety. Hence it cannot be accessed with normal load insns.
+ */
+ bpf_strncmp(task->comm, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *local;
+ struct __tasks_kfunc_map_value *v;
+
+ if (tasks_kfunc_map_insert(task))
+ return 0;
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v)
+ return 0;
+
+ bpf_rcu_read_lock();
+ local = v->task;
+ if (!local) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't release a kptr that's still stored in a map. */
+ bpf_task_release(local);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
new file mode 100644
index 000000000000..5fb4fc19d26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "../bpf_experimental.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+
+struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak;
+/* The two-param bpf_task_acquire doesn't exist */
+struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak;
+/* Incorrect type for first param */
+struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak;
+
+void invalid_kfunc(void) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+static int test_acquire_release(struct task_struct *task)
+{
+ struct task_struct *acquired = NULL;
+
+ if (!bpf_ksym_exists(bpf_task_acquire)) {
+ err = 3;
+ return 0;
+ }
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) {
+ err = 4;
+ return 0;
+ }
+ if (bpf_ksym_exists(invalid_kfunc)) {
+ /* the verifier's dead code elimination should remove this */
+ err = 5;
+ asm volatile ("goto -1"); /* for (;;); */
+ }
+
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 6;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = NULL;
+ int fake_ctx = 42;
+
+ if (bpf_ksym_exists(bpf_task_acquire___one)) {
+ acquired = bpf_task_acquire___one(task);
+ } else if (bpf_ksym_exists(bpf_task_acquire___two)) {
+ /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id
+ * call will find vmlinux's bpf_task_acquire, but subsequent
+ * bpf_core_types_are_compat will fail
+ */
+ acquired = bpf_task_acquire___two(task, &fake_ctx);
+ err = 3;
+ return 0;
+ } else if (bpf_ksym_exists(bpf_task_acquire___three)) {
+ /* bpf_core_types_are_compat will fail similarly to above case */
+ acquired = bpf_task_acquire___three(&fake_ctx);
+ err = 4;
+ return 0;
+ }
+
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 5;
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags)
+{
+ /* Neither symbol should successfully resolve.
+ * Success or failure of one ___flavor should not affect others
+ */
+ if (bpf_ksym_exists(bpf_task_acquire___two))
+ err = 1;
+ else if (bpf_ksym_exists(bpf_task_acquire___three))
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr, *acquired;
+ struct __tasks_kfunc_map_value *v, *local;
+ int refcnt, refcnt_after_drop;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ local = bpf_obj_new(typeof(*local));
+ if (!local) {
+ err = 4;
+ bpf_task_release(kptr);
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&local->task, kptr);
+ if (kptr) {
+ err = 5;
+ bpf_obj_drop(local);
+ bpf_task_release(kptr);
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&local->task, NULL);
+ if (!kptr) {
+ err = 6;
+ bpf_obj_drop(local);
+ return 0;
+ }
+
+ /* Stash a copy into local kptr and check if it is released recursively */
+ acquired = bpf_task_acquire(kptr);
+ if (!acquired) {
+ err = 7;
+ bpf_obj_drop(local);
+ bpf_task_release(kptr);
+ return 0;
+ }
+ bpf_probe_read_kernel(&refcnt, sizeof(refcnt), &acquired->rcu_users);
+
+ acquired = bpf_kptr_xchg(&local->task, acquired);
+ if (acquired) {
+ err = 8;
+ bpf_obj_drop(local);
+ bpf_task_release(kptr);
+ bpf_task_release(acquired);
+ return 0;
+ }
+
+ bpf_obj_drop(local);
+
+ bpf_probe_read_kernel(&refcnt_after_drop, sizeof(refcnt_after_drop), &kptr->rcu_users);
+ if (refcnt != refcnt_after_drop + 1) {
+ err = 9;
+ bpf_task_release(kptr);
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ kptr = v->task;
+ if (!kptr) {
+ err = 3;
+ } else {
+ kptr = bpf_task_acquire(kptr);
+ if (!kptr)
+ err = 4;
+ else
+ bpf_task_release(kptr);
+ }
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *current, *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ current = bpf_get_current_task_btf();
+ acquired = bpf_task_acquire(current);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
+
+ return 0;
+}
+
+static void lookup_compare_pid(const struct task_struct *p)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(p->pid);
+ if (!acquired) {
+ err = 1;
+ return;
+ }
+
+ if (acquired->pid != p->pid)
+ err = 2;
+ bpf_task_release(acquired);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(task);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(bpf_get_current_task_btf());
+ return 0;
+}
+
+static int is_pid_lookup_valid(s32 pid)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(pid);
+ if (acquired) {
+ bpf_task_release(acquired);
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ bpf_strncmp(task->comm, 12, "foo");
+ bpf_strncmp(task->comm, 16, "foo");
+ bpf_strncmp(&task->comm[8], 4, "foo");
+
+ if (is_pid_lookup_valid(-1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (is_pid_lookup_valid(0xcafef00d)) {
+ err = 2;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* task->group_leader is listed as a trusted, non-NULL field of task struct. */
+ acquired = bpf_task_acquire(task->group_leader);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
+
+
+ return 0;
+}
+
+SEC("syscall")
+int test_task_from_vpid_current(const void *ctx)
+{
+ struct task_struct *current, *v_task;
+
+ v_task = bpf_task_from_vpid(1);
+ if (!v_task) {
+ err = 1;
+ return 0;
+ }
+
+ current = bpf_get_current_task_btf();
+
+ /* The current process should be the init process (pid 1) in the new pid namespace. */
+ if (current != v_task)
+ err = 2;
+
+ bpf_task_release(v_task);
+ return 0;
+}
+
+SEC("syscall")
+int test_task_from_vpid_invalid(const void *ctx)
+{
+ struct task_struct *v_task;
+
+ v_task = bpf_task_from_vpid(-1);
+ if (v_task) {
+ err = 1;
+ goto err;
+ }
+
+ /* There should be only one process (current process) in the new pid namespace. */
+ v_task = bpf_task_from_vpid(2);
+ if (v_task) {
+ err = 2;
+ goto err;
+ }
+
+ v_task = bpf_task_from_vpid(9999);
+ if (v_task) {
+ err = 3;
+ goto err;
+ }
+
+ return 0;
+err:
+ bpf_task_release(v_task);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
new file mode 100644
index 000000000000..432fff2af844
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_BPF_H
+#define __TASK_LOCAL_DATA_BPF_H
+
+/*
+ * Task local data is a library that facilitates sharing per-task data
+ * between user space and bpf programs.
+ *
+ *
+ * USAGE
+ *
+ * A TLD, an entry of data in task local data, first needs to be created by the
+ * user space. This is done by calling user space API, TLD_DEFINE_KEY() or
+ * tld_create_key(), with the name of the TLD and the size.
+ *
+ * TLD_DEFINE_KEY(prio, "priority", sizeof(int));
+ *
+ * or
+ *
+ * void func_call(...) {
+ * tld_key_t prio, in_cs;
+ *
+ * prio = tld_create_key("priority", sizeof(int));
+ * in_cs = tld_create_key("in_critical_section", sizeof(bool));
+ * ...
+ *
+ * A key associated with the TLD, which has an opaque type tld_key_t, will be
+ * initialized or returned. It can be used to get a pointer to the TLD in the
+ * user space by calling tld_get_data().
+ *
+ * In a bpf program, tld_object_init() first needs to be called to initialized a
+ * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data().
+ * The API will try to fetch the key by the name and use it to locate the data.
+ * A pointer to the TLD will be returned. It also caches the key in a task local
+ * storage map, tld_key_map, whose value type, struct tld_keys, must be defined
+ * by the developer.
+ *
+ * struct tld_keys {
+ * tld_key_t prio;
+ * tld_key_t in_cs;
+ * };
+ *
+ * SEC("struct_ops")
+ * void prog(struct task_struct task, ...)
+ * {
+ * struct tld_object tld_obj;
+ * int err, *p;
+ *
+ * err = tld_object_init(task, &tld_obj);
+ * if (err)
+ * return;
+ *
+ * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int));
+ * if (p)
+ * // do something depending on *p
+ */
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifndef TLD_KEY_MAP_CREATE_RETRY
+#define TLD_KEY_MAP_CREATE_RETRY 10
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ __u16 size;
+};
+
+struct tld_meta_u {
+ __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[TLD_MAX_DATA_CNT];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[__PAGE_SIZE - sizeof(__u64)];
+};
+
+struct tld_map_value {
+ struct tld_data_u __uptr *data;
+ struct tld_meta_u __uptr *meta;
+};
+
+typedef struct tld_uptr_dummy {
+ struct tld_data_u data[0];
+ struct tld_meta_u meta[0];
+} *tld_uptr_dummy_t;
+
+struct tld_object {
+ struct tld_map_value *data_map;
+ struct tld_keys *key_map;
+ /*
+ * Force the compiler to generate the actual definition of tld_meta_u
+ * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will
+ * be BTF_KIND_FWD.
+ */
+ tld_uptr_dummy_t dummy[0];
+};
+
+/*
+ * Map value of tld_key_map for caching keys. Must be defined by the developer.
+ * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key().
+ */
+struct tld_keys;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_map_value);
+} tld_data_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_keys);
+} tld_key_map SEC(".maps");
+
+/**
+ * tld_object_init() - Initialize a tld_object.
+ *
+ * @task: The task_struct of the target task
+ * @tld_obj: A pointer to a tld_object to be initialized
+ *
+ * Return 0 on success; -ENODATA if the user space did not initialize task local data
+ * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map
+ * fails
+ */
+__attribute__((unused))
+static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj)
+{
+ int i;
+
+ tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0);
+ if (!tld_obj->data_map)
+ return -ENODATA;
+
+ bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) {
+ tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (likely(tld_obj->key_map))
+ break;
+ }
+ if (!tld_obj->key_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Return the offset of TLD if @name is found. Otherwise, return the current TLD count
+ * using the nonpositive range so that the next tld_get_data() can skip fetching key if
+ * no new TLD is added or start comparing name from the first newly added TLD.
+ */
+__attribute__((unused))
+static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start)
+{
+ struct tld_metadata *metadata;
+ int i, cnt, start, off = 0;
+
+ if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta)
+ return 0;
+
+ start = tld_obj->data_map->data->start;
+ cnt = tld_obj->data_map->meta->cnt;
+ metadata = tld_obj->data_map->meta->metadata;
+
+ bpf_for(i, 0, cnt) {
+ if (i >= TLD_MAX_DATA_CNT)
+ break;
+
+ if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name))
+ return start + off;
+
+ off += TLD_ROUND_UP(metadata[i].size, 8);
+ }
+
+ return -cnt;
+}
+
+/**
+ * tld_get_data() - Retrieve a pointer to the TLD associated with the name.
+ *
+ * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init()
+ * @key: The cached key of the TLD in tld_key_map
+ * @name: The name of the key associated with a TLD
+ * @size: The size of the TLD. Must be a known constant value
+ *
+ * Return a pointer to the TLD associated with @name; NULL if not found or @size is too
+ * big. @key is used to cache the key if the TLD is found to speed up subsequent calls.
+ * It should be defined as an member of tld_keys of tld_key_t type by the developer.
+ */
+#define tld_get_data(tld_obj, key, name, size) \
+ ({ \
+ void *data = NULL, *_data = (tld_obj)->data_map->data; \
+ long off = (tld_obj)->key_map->key.off; \
+ int cnt; \
+ \
+ if (likely(_data)) { \
+ if (likely(off > 0)) { \
+ barrier_var(off); \
+ if (likely(off < __PAGE_SIZE - size)) \
+ data = _data + off; \
+ } else { \
+ cnt = -(off); \
+ if (likely((tld_obj)->data_map->meta) && \
+ cnt < (tld_obj)->data_map->meta->cnt) { \
+ off = __tld_fetch_key(tld_obj, name, cnt); \
+ (tld_obj)->key_map->key.off = off; \
+ \
+ if (likely(off < __PAGE_SIZE - size)) { \
+ barrier_var(off); \
+ if (off > 0) \
+ data = _data + off; \
+ } \
+ } \
+ } \
+ } \
+ data; \
+ })
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..41d88ed222ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, __u64);
+} task_storage SEC(".maps");
+
+int run_count = 0;
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+ __u64 *ptr;
+
+ ptr = bpf_task_storage_get(&task_storage, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(&valid_ptr_count, 1);
+ else
+ __sync_fetch_and_add(&null_ptr_count, 1);
+
+ __sync_fetch_and_add(&run_count, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..f1853c38aada
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
+char _license[] SEC("license") = "GPL";
+int nr_del_errs = 0;
+int test_pid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ /* ptr will not be NULL when it is called from
+ * the bpf_task_storage_get(&map_b,...F_CREATE) in
+ * the BPF_PROG(on_enter) below. It is because
+ * the value can be found in map_a and the kernel
+ * does not need to acquire any spin_lock.
+ */
+ if (ptr) {
+ int err;
+
+ *ptr += 1;
+ err = bpf_task_storage_delete(&map_a, task);
+ if (err == -EBUSY)
+ nr_del_errs++;
+ }
+
+ /* This will still fail because map_b is empty and
+ * this BPF_PROG(on_update) has failed to acquire
+ * the percpu busy lock => meaning potential
+ * deadlock is detected and it will fail to create
+ * new storage.
+ */
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr && !*ptr)
+ *ptr = 200;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr && !*ptr)
+ *ptr = 100;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_uptr.c b/tools/testing/selftests/bpf/progs/task_ls_uptr.c
new file mode 100644
index 000000000000..ddbe11b46eef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_uptr.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "uptr_test_common.h"
+
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct value_type);
+} datamap SEC(".maps");
+
+pid_t target_pid = 0;
+pid_t parent_pid = 0;
+
+SEC("tp_btf/sys_enter")
+int on_enter(__u64 *ctx)
+{
+ struct task_struct *task, *data_task;
+ struct value_type *ptr;
+ struct user_data *udata;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ data_task = bpf_task_from_pid(parent_pid);
+ if (!data_task)
+ return 0;
+
+ ptr = bpf_task_storage_get(&datamap, data_task, 0, 0);
+ bpf_task_release(data_task);
+ if (!ptr)
+ return 0;
+
+ cgrp = bpf_kptr_xchg(&ptr->cgrp, NULL);
+ if (cgrp) {
+ int lvl = cgrp->level;
+
+ bpf_cgroup_release(cgrp);
+ return lvl;
+ }
+
+ udata = ptr->udata;
+ if (!udata || udata->result)
+ return 0;
+ udata->result = MAGIC_VALUE + udata->a + udata->b;
+
+ udata = ptr->nested.udata;
+ if (udata && !udata->nested_result)
+ udata->nested_result = udata->result;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
new file mode 100644
index 000000000000..986829aaf73a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
+extern bool CONFIG_PREEMPTION __kconfig __weak;
+int nr_get_errs = 0;
+int nr_del_errs = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_storage SEC(".maps");
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct task_struct *task;
+ int ret, zero = 0;
+ int *value;
+
+ if (!CONFIG_PREEMPTION)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ value = bpf_task_storage_get(&task_storage, task, &zero,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!value)
+ __sync_fetch_and_add(&nr_get_errs, 1);
+
+ ret = bpf_task_storage_delete(&task_storage,
+ bpf_get_current_task_btf());
+ if (ret == -EBUSY)
+ __sync_fetch_and_add(&nr_del_errs, 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
new file mode 100644
index 000000000000..663a80990f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} lrumap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+int oncpu_hash_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_array_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_lru_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (work)
+ return 0;
+ err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (!work || work->data[0])
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
new file mode 100644
index 000000000000..1270953fd092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+__failure __msg("doesn't match map pointer in R3")
+int mismatch_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("arg#1 doesn't point to a map value")
+int no_map_task_work(struct pt_regs *args)
+{
+ struct task_struct *task;
+ struct bpf_task_work tw;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int task_work_null(struct pt_regs *args)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg2")
+int map_null(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
new file mode 100644
index 000000000000..55e555f7f41b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+#define ENTRIES 128
+
+char _license[] SEC("license") = "GPL";
+
+__u64 callback_scheduled = 0;
+__u64 callback_success = 0;
+__u64 schedule_error = 0;
+__u64 delete_success = 0;
+
+struct elem {
+ __u32 count;
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, ENTRIES);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ __sync_fetch_and_add(&callback_success, 1);
+ return 0;
+}
+
+SEC("syscall")
+int schedule_task_work(void *ctx)
+{
+ struct elem empty_work = {.count = 0};
+ struct elem *work;
+ int key = 0, err;
+
+ key = bpf_ktime_get_ns() % ENTRIES;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work) {
+ bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+ }
+ err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work, NULL);
+ if (err)
+ __sync_fetch_and_add(&schedule_error, 1);
+ else
+ __sync_fetch_and_add(&callback_scheduled, 1);
+ return 0;
+}
+
+SEC("syscall")
+int delete_task_work(void *ctx)
+{
+ int key = 0, err;
+
+ key = bpf_get_prandom_u32() % ENTRIES;
+ err = bpf_map_delete_elem(&hmap, &key);
+ if (!err)
+ __sync_fetch_and_add(&delete_success, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
new file mode 100644
index 000000000000..fe6249d99b31
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+int subprog_tc(struct __sk_buff *skb)
+{
+ int ret = 1;
+
+ __sink(skb);
+ __sink(ret);
+ /* let verifier know that 'subprog_tc' can change pointers to skb->data */
+ bpf_skb_change_proto(skb, 0, 0);
+ return ret;
+}
+
+SEC("tc")
+int entry_tc(struct __sk_buff *skb)
+{
+ return subprog_tc(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tc_dummy.c b/tools/testing/selftests/bpf/progs/tc_dummy.c
new file mode 100644
index 000000000000..69a3d0dc8787
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tc_dummy.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ return 1;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644
index 000000000000..0016c90e9c13
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+ /* Intentionally leaving out any of the required cong_avoid() and
+ * cong_control() here.
+ */
+ .ssthresh = (void *)incompl_cong_ops_ssthresh,
+ .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+ .name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
new file mode 100644
index 000000000000..f95862f570b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+extern void bbr_init(struct sock *sk) __ksym;
+extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym;
+extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym;
+extern u32 bbr_undo_cwnd(struct sock *sk) __ksym;
+extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern u32 bbr_ssthresh(struct sock *sk) __ksym;
+extern u32 bbr_min_tso_segs(struct sock *sk) __ksym;
+extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void dctcp_init(struct sock *sk) __ksym;
+extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym;
+extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym;
+extern u32 dctcp_ssthresh(struct sock *sk) __ksym;
+extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym;
+extern void dctcp_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym;
+extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+
+SEC("struct_ops")
+void BPF_PROG(init, struct sock *sk)
+{
+ bbr_init(sk);
+ dctcp_init(sk);
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(in_ack_event, struct sock *sk, u32 flags)
+{
+ dctcp_update_alpha(sk, flags);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
+{
+ bbr_main(sk, ack, flag, rs);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked)
+{
+ cubictcp_cong_avoid(sk, ack, acked);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(sndbuf_expand, struct sock *sk)
+{
+ return bbr_sndbuf_expand(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(undo_cwnd, struct sock *sk)
+{
+ bbr_undo_cwnd(sk);
+ return dctcp_cwnd_undo(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ bbr_cwnd_event(sk, event);
+ dctcp_cwnd_event(sk, event);
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(ssthresh, struct sock *sk)
+{
+ bbr_ssthresh(sk);
+ dctcp_ssthresh(sk);
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(min_tso_segs, struct sock *sk)
+{
+ return bbr_min_tso_segs(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(set_state, struct sock *sk, u8 new_state)
+{
+ bbr_set_state(sk, new_state);
+ dctcp_state(sk, new_state);
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops tcp_ca_kfunc = {
+ .init = (void *)init,
+ .in_ack_event = (void *)in_ack_event,
+ .cong_control = (void *)cong_control,
+ .cong_avoid = (void *)cong_avoid,
+ .sndbuf_expand = (void *)sndbuf_expand,
+ .undo_cwnd = (void *)undo_cwnd,
+ .cwnd_event = (void *)cwnd_event,
+ .ssthresh = (void *)ssthresh,
+ .min_tso_segs = (void *)min_tso_segs,
+ .set_state = (void *)set_state,
+ .pkts_acked = (void *)pkts_acked,
+ .name = "tcp_ca_kfunc",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644
index 000000000000..54f916a931c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+ .get_info = (void *)unsupp_cong_op_get_info,
+ .name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
new file mode 100644
index 000000000000..e4bd82bc0d01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ca1_cnt = 0;
+int ca2_cnt = 0;
+
+SEC("struct_ops")
+void BPF_PROG(ca_update_1_init, struct sock *sk)
+{
+ ca1_cnt++;
+}
+
+SEC("struct_ops")
+void BPF_PROG(ca_update_2_init, struct sock *sk)
+{
+ ca2_cnt++;
+}
+
+SEC("struct_ops")
+void BPF_PROG(ca_update_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_1 = {
+ .init = (void *)ca_update_1_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_2 = {
+ .init = (void *)ca_update_2_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_wrong = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_wrong",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops ca_no_link = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_no_link",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644
index 000000000000..022291f21dfb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+static unsigned int tcp_left_out(const struct tcp_sock *tp)
+{
+ return tp->sacked_out + tp->lost_out;
+}
+
+static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+{
+ return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
+}
+
+SEC("struct_ops")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+ SK_PACING_NEEDED);
+#else
+ sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long rate =
+ ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+ (tp->srtt_us ?: 1U << 3);
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+ tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+ .init = (void *)write_sk_pacing_init,
+ .cong_control = (void *)write_sk_pacing_cong_control,
+ .ssthresh = (void *)write_sk_pacing_ssthresh,
+ .undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+ .name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 0cb3204ddb18..42c729f85524 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -3,7 +3,6 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
struct tcp_rtt_storage {
__u32 invoked;
@@ -11,6 +10,9 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
struct {
@@ -56,5 +58,8 @@ int _sockops(struct bpf_sock_ops *ctx)
storage->delivered_ce = tcp_sk->delivered_ce;
storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+ storage->mrtt_us = ctx->args[0];
+ storage->srtt = ctx->args[1];
+
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
new file mode 100644
index 000000000000..326b7d1f496a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+unsigned long span = 0;
+
+SEC("fentry/sched_balance_rq")
+int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq,
+ struct sched_domain *sd)
+{
+ span = sd->span[0];
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_assign_reuse.c b/tools/testing/selftests/bpf/progs/test_assign_reuse.c
new file mode 100644
index 000000000000..4f2e2321ea06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_assign_reuse.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/pkt_cls.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+__u64 sk_cookie_seen;
+__u64 reuseport_executed;
+union {
+ struct tcphdr tcp;
+ struct udphdr udp;
+} headers;
+
+const volatile __u16 dest_port;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sk_map SEC(".maps");
+
+SEC("sk_reuseport")
+int reuse_accept(struct sk_reuseport_md *ctx)
+{
+ reuseport_executed++;
+
+ if (ctx->ip_protocol == IPPROTO_TCP) {
+ if (ctx->data + sizeof(headers.tcp) > ctx->data_end)
+ return SK_DROP;
+
+ if (__builtin_memcmp(&headers.tcp, ctx->data, sizeof(headers.tcp)) != 0)
+ return SK_DROP;
+ } else if (ctx->ip_protocol == IPPROTO_UDP) {
+ if (ctx->data + sizeof(headers.udp) > ctx->data_end)
+ return SK_DROP;
+
+ if (__builtin_memcmp(&headers.udp, ctx->data, sizeof(headers.udp)) != 0)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+
+ sk_cookie_seen = bpf_get_socket_cookie(ctx->sk);
+ return SK_PASS;
+}
+
+SEC("sk_reuseport")
+int reuse_drop(struct sk_reuseport_md *ctx)
+{
+ reuseport_executed++;
+ sk_cookie_seen = 0;
+ return SK_DROP;
+}
+
+static int
+assign_sk(struct __sk_buff *skb)
+{
+ int zero = 0, ret = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sk_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static bool
+maybe_assign_tcp(struct __sk_buff *skb, struct tcphdr *th)
+{
+ if (th + 1 > (void *)(long)(skb->data_end))
+ return TC_ACT_SHOT;
+
+ if (!th->syn || th->ack || th->dest != bpf_htons(dest_port))
+ return TC_ACT_OK;
+
+ __builtin_memcpy(&headers.tcp, th, sizeof(headers.tcp));
+ return assign_sk(skb);
+}
+
+static bool
+maybe_assign_udp(struct __sk_buff *skb, struct udphdr *uh)
+{
+ if (uh + 1 > (void *)(long)(skb->data_end))
+ return TC_ACT_SHOT;
+
+ if (uh->dest != bpf_htons(dest_port))
+ return TC_ACT_OK;
+
+ __builtin_memcpy(&headers.udp, uh, sizeof(headers.udp));
+ return assign_sk(skb);
+}
+
+SEC("tc")
+int tc_main(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth;
+
+ eth = (struct ethhdr *)(data);
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (iph->protocol == IPPROTO_TCP)
+ return maybe_assign_tcp(skb, (struct tcphdr *)(iph + 1));
+ else if (iph->protocol == IPPROTO_UDP)
+ return maybe_assign_udp(skb, (struct udphdr *)(iph + 1));
+ else
+ return TC_ACT_SHOT;
+ } else {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
+
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ return maybe_assign_tcp(skb, (struct tcphdr *)(ip6h + 1));
+ else if (ip6h->nexthdr == IPPROTO_UDP)
+ return maybe_assign_udp(skb, (struct udphdr *)(ip6h + 1));
+ else
+ return TC_ACT_SHOT;
+ }
+}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
new file mode 100644
index 000000000000..f548b7446218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..fb79e6cab932 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -1,42 +1,183 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <errno.h>
+#include "bpf_misc.h"
-int kprobe_res = 0;
-int kretprobe_res = 0;
-int uprobe_res = 0;
-int uretprobe_res = 0;
+u32 dynamic_sz = 1;
+int kprobe2_res = 0;
+int kretprobe2_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
+int uprobe_byname3_sleepable_res = 0;
+int uprobe_byname3_str_sleepable_res = 0;
+int uprobe_byname3_res = 0;
+int uretprobe_byname3_sleepable_res = 0;
+int uretprobe_byname3_str_sleepable_res = 0;
+int uretprobe_byname3_res = 0;
+void *user_ptr = 0;
-SEC("kprobe/sys_nanosleep")
-int handle_kprobe(struct pt_regs *ctx)
+int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+
+SEC("ksyscall/nanosleep")
+int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem)
+{
+ kprobe2_res = 11;
+ return 0;
+}
+
+SEC("kretsyscall/nanosleep")
+int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
+{
+ kretprobe2_res = 22;
+ return ret;
+}
+
+SEC("uprobe")
+int handle_uprobe_ref_ctr(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_ref_ctr(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+ uretprobe_byname_res = 6;
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
+{
+ char mode_buf[2] = {};
+
+ /* verify fopen mode */
+ bpf_probe_read_user(mode_buf, sizeof(mode_buf), mode);
+ if (mode_buf[0] == 'r' && mode_buf[1] == 0)
+ uprobe_byname2_res = 7;
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
+{
+ uretprobe_byname2_res = 8;
+ return 0;
+}
+
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+static __always_inline bool verify_sleepable_user_copy_str(void)
+{
+ int ret;
+ char data_long[20];
+ char data_long_pad[20];
+ char data_long_err[20];
+ char data_short[4];
+ char data_short_pad[4];
+
+ ret = bpf_copy_from_user_str(data_short, sizeof(data_short), user_ptr, 0);
+
+ if (bpf_strncmp(data_short, 4, "tes\0") != 0 || ret != 4)
+ return false;
+
+ ret = bpf_copy_from_user_str(data_short_pad, sizeof(data_short_pad), user_ptr, BPF_F_PAD_ZEROS);
+
+ if (bpf_strncmp(data_short, 4, "tes\0") != 0 || ret != 4)
+ return false;
+
+ /* Make sure this passes the verifier */
+ ret = bpf_copy_from_user_str(data_long, dynamic_sz & sizeof(data_long), user_ptr, 0);
+
+ if (ret != 0)
+ return false;
+
+ ret = bpf_copy_from_user_str(data_long, sizeof(data_long), user_ptr, 0);
+
+ if (bpf_strncmp(data_long, 10, "test_data\0") != 0 || ret != 10)
+ return false;
+
+ ret = bpf_copy_from_user_str(data_long_pad, sizeof(data_long_pad), user_ptr, BPF_F_PAD_ZEROS);
+
+ if (bpf_strncmp(data_long_pad, 10, "test_data\0") != 0 || ret != 10 || data_long_pad[19] != '\0')
+ return false;
+
+ ret = bpf_copy_from_user_str(data_long_err, sizeof(data_long_err), (void *)data_long, BPF_F_PAD_ZEROS);
+
+ if (ret > 0 || data_long_err[19] != '\0')
+ return false;
+
+ ret = bpf_copy_from_user_str(data_long, sizeof(data_long), user_ptr, 2);
+
+ if (ret != -EINVAL)
+ return false;
+
+ return true;
+}
+
+SEC("uprobe.s//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3_sleepable(struct pt_regs *ctx)
{
- kprobe_res = 1;
+ if (verify_sleepable_user_copy())
+ uprobe_byname3_sleepable_res = 9;
+ if (verify_sleepable_user_copy_str())
+ uprobe_byname3_str_sleepable_res = 10;
return 0;
}
-SEC("kretprobe/sys_nanosleep")
-int BPF_KRETPROBE(handle_kretprobe)
+/**
+ * same target as the uprobe.s above to force sleepable and non-sleepable
+ * programs in the same bpf_prog_array
+ */
+SEC("uprobe//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3(struct pt_regs *ctx)
{
- kretprobe_res = 2;
+ uprobe_byname3_res = 11;
return 0;
}
-SEC("uprobe/trigger_func")
-int handle_uprobe(struct pt_regs *ctx)
+SEC("uretprobe.s//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx)
{
- uprobe_res = 3;
+ if (verify_sleepable_user_copy())
+ uretprobe_byname3_sleepable_res = 12;
+ if (verify_sleepable_user_copy_str())
+ uretprobe_byname3_str_sleepable_res = 13;
return 0;
}
-SEC("uretprobe/trigger_func")
-int handle_uretprobe(struct pt_regs *ctx)
+SEC("uretprobe//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3(struct pt_regs *ctx)
{
- uretprobe_res = 4;
+ uretprobe_byname3_res = 14;
return 0;
}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
new file mode 100644
index 000000000000..7f08bce94596
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+void *user_ptr = 0;
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ kretprobe_res = 2;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ uprobe_res = 3;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ uretprobe_res = 4;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c
new file mode 100644
index 000000000000..11a44493ebce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoattach.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..c83142b55f47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+int my_tid;
+
+__u64 kprobe_res;
+__u64 kprobe_multi_res;
+__u64 kretprobe_res;
+__u64 uprobe_res;
+__u64 uretprobe_res;
+__u64 tp_res;
+__u64 pe_res;
+__u64 raw_tp_res;
+__u64 tp_btf_res;
+__u64 fentry_res;
+__u64 fexit_res;
+__u64 fmod_ret_res;
+__u64 lsm_res;
+
+static void update(void *ctx, __u64 *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_raw_tp(void *ctx)
+{
+ update(ctx, &raw_tp_res);
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handle_tp_btf(void *ctx)
+{
+ update(ctx, &tp_btf_res);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_test1, int a)
+{
+ update(ctx, &fentry_res);
+ return 0;
+}
+
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(fexit_test1, int a, int ret)
+{
+ update(ctx, &fexit_res);
+ return 0;
+}
+
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+ update(ctx, &fmod_ret_res);
+ return 1234;
+}
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return ret;
+ update(ctx, &lsm_res);
+ return -EPERM;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
new file mode 100644
index 000000000000..4a4e0b8d9b72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct generic_map_value {
+ void *data;
+};
+
+char _license[] SEC("license") = "GPL";
+
+const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
+const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
+const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512};
+const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
+int err = 0;
+u32 pid = 0;
+
+#define DEFINE_ARRAY_WITH_KPTR(_size) \
+ struct bin_data_##_size { \
+ char data[_size - sizeof(void *)]; \
+ }; \
+ /* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */ \
+ struct bin_data_##_size *__bin_data_##_size; \
+ struct map_value_##_size { \
+ struct bin_data_##_size __kptr * data; \
+ }; \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_ARRAY); \
+ __type(key, int); \
+ __type(value, struct map_value_##_size); \
+ __uint(max_entries, 128); \
+ } array_##_size SEC(".maps")
+
+#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+ struct percpu_bin_data_##_size { \
+ char data[_size]; \
+ }; \
+ struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \
+ struct map_value_percpu_##_size { \
+ struct percpu_bin_data_##_size __percpu_kptr * data; \
+ }; \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_ARRAY); \
+ __type(key, int); \
+ __type(value, struct map_value_percpu_##_size); \
+ __uint(max_entries, 128); \
+ } array_percpu_##_size SEC(".maps")
+
+static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old, *new;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 1;
+ return;
+ }
+ new = bpf_obj_new_impl(data_btf_ids[idx], NULL);
+ if (!new) {
+ err = 2;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old) {
+ bpf_obj_drop(old);
+ err = 3;
+ return;
+ }
+ }
+}
+
+static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 4;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (!old) {
+ err = 5;
+ return;
+ }
+ bpf_obj_drop(old);
+ }
+}
+
+static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old, *new;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 1;
+ return;
+ }
+ /* per-cpu allocator may not be able to refill in time */
+ new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL);
+ if (!new)
+ continue;
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old) {
+ bpf_percpu_obj_drop(old);
+ err = 2;
+ return;
+ }
+ }
+}
+
+static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 3;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (!old)
+ continue;
+ bpf_percpu_obj_drop(old);
+ }
+}
+
+#define CALL_BATCH_ALLOC(size, batch, idx) \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx)
+
+#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
+ do { \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \
+ batch_free((struct bpf_map *)(&array_##size), batch, idx); \
+ } while (0)
+
+#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx)
+
+#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \
+ do { \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ } while (0)
+
+/* kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_KPTR(16);
+DEFINE_ARRAY_WITH_KPTR(32);
+DEFINE_ARRAY_WITH_KPTR(64);
+DEFINE_ARRAY_WITH_KPTR(96);
+DEFINE_ARRAY_WITH_KPTR(128);
+DEFINE_ARRAY_WITH_KPTR(192);
+DEFINE_ARRAY_WITH_KPTR(256);
+DEFINE_ARRAY_WITH_KPTR(512);
+DEFINE_ARRAY_WITH_KPTR(1024);
+DEFINE_ARRAY_WITH_KPTR(2048);
+DEFINE_ARRAY_WITH_KPTR(4096);
+
+DEFINE_ARRAY_WITH_PERCPU_KPTR(8);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(96);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_alloc_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes objects in batch to trigger refilling,
+ * then free 128 16-bytes objects in batch to trigger freeing.
+ */
+ CALL_BATCH_ALLOC_FREE(16, 128, 0);
+ CALL_BATCH_ALLOC_FREE(32, 128, 1);
+ CALL_BATCH_ALLOC_FREE(64, 128, 2);
+ CALL_BATCH_ALLOC_FREE(96, 128, 3);
+ CALL_BATCH_ALLOC_FREE(128, 128, 4);
+ CALL_BATCH_ALLOC_FREE(192, 128, 5);
+ CALL_BATCH_ALLOC_FREE(256, 128, 6);
+ CALL_BATCH_ALLOC_FREE(512, 64, 7);
+ CALL_BATCH_ALLOC_FREE(1024, 32, 8);
+ CALL_BATCH_ALLOC_FREE(2048, 16, 9);
+ CALL_BATCH_ALLOC_FREE(4096, 8, 10);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes objects in batch to trigger refilling,
+ * then free these objects through map free.
+ */
+ CALL_BATCH_ALLOC(16, 128, 0);
+ CALL_BATCH_ALLOC(32, 128, 1);
+ CALL_BATCH_ALLOC(64, 128, 2);
+ CALL_BATCH_ALLOC(96, 128, 3);
+ CALL_BATCH_ALLOC(128, 128, 4);
+ CALL_BATCH_ALLOC(192, 128, 5);
+ CALL_BATCH_ALLOC(256, 128, 6);
+ CALL_BATCH_ALLOC(512, 64, 7);
+ CALL_BATCH_ALLOC(1024, 32, 8);
+ CALL_BATCH_ALLOC(2048, 16, 9);
+ CALL_BATCH_ALLOC(4096, 8, 10);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_percpu_alloc_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
+ * then free 128 8-bytes per-cpu objects in batch to trigger freeing.
+ */
+ CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0);
+ CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_percpu_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
+ * then free these object through map free.
+ */
+ CALL_BATCH_PERCPU_ALLOC(8, 128, 0);
+ CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644
index 000000000000..f7b330ddd007
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL)
+#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY)
+
+extern unsigned long CONFIG_HZ __kconfig;
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_reserved_new = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+int test_alloc_entry = -EINVAL;
+int test_insert_entry = -EAFNOSUPPORT;
+int test_succ_lookup = -ENOENT;
+int test_ct_zone_id_alloc_entry = -EINVAL;
+int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
+int test_ct_zone_id_succ_lookup = -ENOENT;
+int test_ct_zone_dir_enoent_lookup = 0;
+int test_ct_zone_id_enoent_lookup = 0;
+u32 test_delta_timeout = 0;
+u32 test_status = 0;
+u32 test_insert_lookup_mark = 0;
+int test_snat_addr = -EINVAL;
+int test_dnat_addr = -EINVAL;
+__be32 saddr = 0;
+__be16 sport = 0;
+__be32 daddr = 0;
+__be16 dport = 0;
+int test_exist_lookup = -ENOENT;
+u32 test_exist_lookup_mark = 0;
+
+enum nf_nat_manip_type___local {
+ NF_NAT_MANIP_SRC___local,
+ NF_NAT_MANIP_DST___local
+};
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+};
+
+struct bpf_ct_opts___new {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u16 ct_zone_id;
+ u8 ct_zone_dir;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
+ int port, enum nf_nat_manip_type___local) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_bpf_tuple = opts_def.error;
+
+ opts_def.reserved[0] = 1;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved = opts_def.error;
+
+ opts_def.netns_id = -2;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_netns_id = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def) - 1);
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_len_opts = opts_def.error;
+
+ opts_def.l4proto = IPPROTO_ICMP;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eproto_l4proto = opts_def.error;
+
+ opts_def.netns_id = 0xf00f;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enonet_netns_id = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enoent_lookup = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def,
+ sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eafnosupport = opts_def.error;
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ __u16 sport = bpf_get_prandom_u32();
+ __u16 dport = bpf_get_prandom_u32();
+ union nf_inet_addr saddr = {};
+ union nf_inet_addr daddr = {};
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+ ct->mark = 77;
+
+ /* snat */
+ saddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+ /* dnat */
+ daddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ struct nf_conntrack_tuple *tuple;
+
+ /* check snat and dnat addresses */
+ tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple;
+ if (tuple->dst.u3.ip == saddr.ip &&
+ tuple->dst.u.all == bpf_htons(sport))
+ test_snat_addr = 0;
+ if (tuple->src.u3.ip == daddr.ip &&
+ tuple->src.u.all == bpf_htons(dport))
+ test_dnat_addr = 0;
+
+ /* update ct entry timeout */
+ bpf_ct_change_timeout(ct_lk, 10000);
+ test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
+ test_delta_timeout /= CONFIG_HZ;
+ test_insert_lookup_mark = ct_lk->mark;
+ bpf_ct_change_status(ct_lk,
+ IPS_CONFIRMED | IPS_SEEN_REPLY);
+ test_status = ct_lk->status;
+
+ bpf_ct_release(ct_lk);
+ test_succ_lookup = 0;
+ }
+ bpf_ct_release(ct_ins);
+ test_insert_entry = 0;
+ }
+ test_alloc_entry = 0;
+ }
+
+ bpf_tuple.ipv4.saddr = saddr;
+ bpf_tuple.ipv4.daddr = daddr;
+ bpf_tuple.ipv4.sport = sport;
+ bpf_tuple.ipv4.dport = dport;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ test_exist_lookup = 0;
+ if (ct->mark == 42) {
+ ct->mark++;
+ test_exist_lookup_mark = ct->mark;
+ }
+ bpf_ct_release(ct);
+ } else {
+ test_exist_lookup = opts_def.error;
+ }
+}
+
+static __always_inline void
+nf_ct_opts_new_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___new *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___new *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___new opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ opts_def.reserved[0] = 1;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved_new = opts_def.error;
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ /* use non-default ct zone */
+ opts_def.ct_zone_id = 10;
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG;
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ __u16 sport = bpf_get_prandom_u32();
+ __u16 dport = bpf_get_prandom_u32();
+ union nf_inet_addr saddr = {};
+ union nf_inet_addr daddr = {};
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+
+ /* snat */
+ saddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+ /* dnat */
+ daddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ /* entry should exist in same ct zone we inserted it */
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ bpf_ct_release(ct_lk);
+ test_ct_zone_id_succ_lookup = 0;
+ }
+
+ /* entry should not exist with wrong direction */
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_REPL;
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG;
+ if (ct_lk)
+ bpf_ct_release(ct_lk);
+ else
+ test_ct_zone_dir_enoent_lookup = opts_def.error;
+
+ /* entry should not exist in default ct zone */
+ opts_def.ct_zone_id = 0;
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk)
+ bpf_ct_release(ct_lk);
+ else
+ test_ct_zone_id_enoent_lookup = opts_def.error;
+
+ bpf_ct_release(ct_ins);
+ test_ct_zone_id_insert_entry = 0;
+ }
+ test_ct_zone_id_alloc_entry = 0;
+ }
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+ nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+ nf_ct_opts_new_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+ return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+ nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+ nf_ct_opts_new_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
new file mode 100644
index 000000000000..a586f087ffeb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+
+SEC("?tc")
+int alloc_release(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?tc")
+int insert_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int lookup_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int write_not_allowlisted_field(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct->status = 0xF00;
+ return 0;
+}
+
+SEC("?tc")
+int set_timeout_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int set_status_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_status(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_timeout_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_status_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_status(ct, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
new file mode 100644
index 000000000000..c88ccc53529a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_decl_tag)
+#define __tag1 __attribute__((btf_decl_tag("tag1")))
+#define __tag2 __attribute__((btf_decl_tag("tag2")))
+volatile const bool skip_tests __tag1 __tag2 = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct key_t {
+ int a;
+ int b __tag1 __tag2;
+ int c;
+} __tag1 __tag2;
+
+typedef struct {
+ int a;
+ int b;
+} value_t __tag1 __tag2;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, value_t);
+} hashmap1 SEC(".maps");
+
+
+static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+{
+ struct key_t key;
+ value_t val = {};
+
+ key.a = key.b = key.c = x;
+ bpf_map_update_elem(&hashmap1, &key, &val, 0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+ return foo(x);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+ __u64 a = 1;
+
+ __sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+ f0();
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
deleted file mode 100644
index 31538c9ed193..000000000000
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
-
-int _version SEC("version") = 1;
-
-struct ipv_counts {
- unsigned int v4;
- unsigned int v6;
-};
-
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-__attribute__((noinline))
-int test_long_fname_2(void)
-{
- struct ipv_counts *counts;
- int key = 0;
-
- counts = bpf_map_lookup_elem(&btf_map, &key);
- if (!counts)
- return 0;
-
- counts->v6++;
-
- return 0;
-}
-
-__attribute__((noinline))
-int test_long_fname_1(void)
-{
- return test_long_fname_2();
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(void *arg)
-{
- return test_long_fname_1();
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index c1e0c8c7c55f..c218cf8989a9 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -21,8 +21,8 @@ struct inner_map_sz2 {
struct outer_arr {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 3);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
/* it's possible to use anonymous struct as inner map definition here */
__array(values, struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -61,8 +61,8 @@ struct inner_map_sz4 {
struct outer_arr_dyn {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 3);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
__array(values, struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(map_flags, BPF_F_INNER_MAP);
@@ -81,7 +81,7 @@ struct outer_arr_dyn {
struct outer_hash {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(max_entries, 5);
- __uint(key_size, sizeof(int));
+ __type(key, int);
/* Here everything works flawlessly due to reuse of struct inner_map
* and compiler will complain at the attempt to use non-inner_map
* references below. This is great experience.
@@ -111,8 +111,8 @@ struct sockarr_sz2 {
struct outer_sockarr_sz1 {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 1);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
__array(values, struct sockarr_sz1);
} outer_sockarr SEC(".maps") = {
.values = { (void *)&sockarr_sz1 },
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 6c5560162746..251854a041b5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -4,23 +4,11 @@
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
-int _version SEC("version") = 1;
-
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
-/* just to validate we can handle maps in multiple sections */
-struct bpf_map_def SEC("maps") btf_map_legacy = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long long),
- .max_entries = 4,
-};
-
-BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 4);
@@ -40,11 +28,6 @@ int test_long_fname_2(void)
counts->v6++;
- /* just verify we can reference both maps */
- counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
- if (!counts)
- return 0;
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 506da7fd2da2..1dabb88f8cb4 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -3,19 +3,17 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct ipv_counts));
+ __uint(max_entries, 4);
+} btf_map SEC(".maps");
__attribute__((noinline))
int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
index 9a6b85dd52d2..1cd1a1b72cb5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -1,31 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include <string.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/if_ether.h>
-#include <linux/pkt_cls.h>
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef ENOENT
+#define ENOENT 2
+#endif
struct sockaddr_in6 srv_sa6 = {};
+struct sockaddr_in srv_sa4 = {};
__u16 listen_tp_sport = 0;
__u16 req_sk_sport = 0;
__u32 recv_cookie = 0;
__u32 gen_cookie = 0;
+__u32 mss = 0;
__u32 linum = 0;
#define LOG() ({ if (!linum) linum = __LINE__; })
-static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
- struct tcp_sock *tp,
+static void test_syncookie_helper(void *iphdr, int iphdr_size,
+ struct tcphdr *th, struct tcp_sock *tp,
struct __sk_buff *skb)
{
if (th->syn) {
@@ -44,17 +40,18 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
return;
}
- mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+ mss_cookie = bpf_tcp_gen_syncookie(tp, iphdr, iphdr_size,
th, 40);
if (mss_cookie < 0) {
if (mss_cookie != -ENOENT)
LOG();
} else {
gen_cookie = (__u32)mss_cookie;
+ mss = mss_cookie >> 32;
}
} else if (gen_cookie) {
/* It was in cookie mode */
- int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+ int ret = bpf_tcp_check_syncookie(tp, iphdr, iphdr_size,
th, sizeof(*th));
if (ret < 0) {
@@ -66,26 +63,58 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
}
}
-static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+static int handle_ip_tcp(struct ethhdr *eth, struct __sk_buff *skb)
{
- struct bpf_sock_tuple *tuple;
+ struct bpf_sock_tuple *tuple = NULL;
+ unsigned int tuple_len = 0;
struct bpf_sock *bpf_skc;
- unsigned int tuple_len;
+ void *data_end, *iphdr;
+ struct ipv6hdr *ip6h;
+ struct iphdr *ip4h;
struct tcphdr *th;
- void *data_end;
+ int iphdr_size;
data_end = (void *)(long)(skb->data_end);
- th = (struct tcphdr *)(ip6h + 1);
- if (th + 1 > data_end)
- return TC_ACT_OK;
-
- /* Is it the testing traffic? */
- if (th->dest != srv_sa6.sin6_port)
+ switch (eth->h_proto) {
+ case bpf_htons(ETH_P_IP):
+ ip4h = (struct iphdr *)(eth + 1);
+ if (ip4h + 1 > data_end)
+ return TC_ACT_OK;
+ if (ip4h->protocol != IPPROTO_TCP)
+ return TC_ACT_OK;
+ th = (struct tcphdr *)(ip4h + 1);
+ if (th + 1 > data_end)
+ return TC_ACT_OK;
+ /* Is it the testing traffic? */
+ if (th->dest != srv_sa4.sin_port)
+ return TC_ACT_OK;
+ tuple_len = sizeof(tuple->ipv4);
+ tuple = (struct bpf_sock_tuple *)&ip4h->saddr;
+ iphdr = ip4h;
+ iphdr_size = sizeof(*ip4h);
+ break;
+ case bpf_htons(ETH_P_IPV6):
+ ip6h = (struct ipv6hdr *)(eth + 1);
+ if (ip6h + 1 > data_end)
+ return TC_ACT_OK;
+ if (ip6h->nexthdr != IPPROTO_TCP)
+ return TC_ACT_OK;
+ th = (struct tcphdr *)(ip6h + 1);
+ if (th + 1 > data_end)
+ return TC_ACT_OK;
+ /* Is it the testing traffic? */
+ if (th->dest != srv_sa6.sin6_port)
+ return TC_ACT_OK;
+ tuple_len = sizeof(tuple->ipv6);
+ tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+ iphdr = ip6h;
+ iphdr_size = sizeof(*ip6h);
+ break;
+ default:
return TC_ACT_OK;
+ }
- tuple_len = sizeof(tuple->ipv6);
- tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
if ((void *)tuple + tuple_len > data_end) {
LOG();
return TC_ACT_OK;
@@ -132,7 +161,7 @@ static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
- test_syncookie_helper(ip6h, th, tp, skb);
+ test_syncookie_helper(iphdr, iphdr_size, th, tp, skb);
bpf_sk_release(tp);
return TC_ACT_OK;
}
@@ -145,10 +174,9 @@ release:
return TC_ACT_OK;
}
-SEC("classifier/ingress")
+SEC("tc")
int cls_ingress(struct __sk_buff *skb)
{
- struct ipv6hdr *ip6h;
struct ethhdr *eth;
void *data_end;
@@ -158,17 +186,11 @@ int cls_ingress(struct __sk_buff *skb)
if (eth + 1 > data_end)
return TC_ACT_OK;
- if (eth->h_proto != bpf_htons(ETH_P_IPV6))
- return TC_ACT_OK;
-
- ip6h = (struct ipv6hdr *)(eth + 1);
- if (ip6h + 1 > data_end)
+ if (eth->h_proto != bpf_htons(ETH_P_IP) &&
+ eth->h_proto != bpf_htons(ETH_P_IPV6))
return TC_ACT_OK;
- if (ip6h->nexthdr == IPPROTO_TCP)
- return handle_ip6_tcp(ip6h, skb);
-
- return TC_ACT_OK;
+ return handle_ip_tcp(eth, skb);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_build_id.c b/tools/testing/selftests/bpf/progs/test_build_id.c
new file mode 100644
index 000000000000..32ce59f9aa27
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_build_id.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct bpf_stack_build_id stack_sleepable[128];
+int res_sleepable;
+
+struct bpf_stack_build_id stack_nofault[128];
+int res_nofault;
+
+SEC("uprobe.multi/./uprobe_multi:uprobe")
+int uprobe_nofault(struct pt_regs *ctx)
+{
+ res_nofault = bpf_get_stack(ctx, stack_nofault, sizeof(stack_nofault),
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+
+ return 0;
+}
+
+SEC("uprobe.multi.s/./uprobe_multi:uprobe")
+int uprobe_sleepable(struct pt_regs *ctx)
+{
+ res_sleepable = bpf_get_stack(ctx, stack_sleepable, sizeof(stack_sleepable),
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..4fee0fdc7607
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+ struct cgroup *cgrp, *ancestor;
+ struct task_struct *task;
+ int ret = 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+
+ /* Then it can run in parallel with others */
+ if (task->pid != target_pid)
+ return 0;
+
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ /* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+ if (cgrp->kn->id == target_ancestor_cgid)
+ ret = -1;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+ if (!ancestor)
+ goto out;
+
+ if (ancestor->kn->id == target_ancestor_cgid)
+ ret = -1;
+ bpf_cgroup_release(ancestor);
+
+out:
+ bpf_cgroup_release(cgrp);
+ return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
index 77e47b9e4446..4faba88e45a5 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup_link.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -6,14 +6,14 @@
int calls = 0;
int alt_calls = 0;
-SEC("cgroup_skb/egress1")
+SEC("cgroup_skb/egress")
int egress(struct __sk_buff *skb)
{
__sync_fetch_and_add(&calls, 1);
return 1;
}
-SEC("cgroup_skb/egress2")
+SEC("cgroup_skb/egress")
int egress_alt(struct __sk_buff *skb)
{
__sync_fetch_and_add(&alt_calls, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index b7787b43f9db..7b6b2b342c1d 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -7,12 +7,13 @@
#include <stddef.h>
#include <stdint.h>
+#include <errno.h>
char _license[] SEC("license") = "GPL";
/* Userspace will update with MTU it can see on device */
-static volatile const int GLOBAL_USER_MTU;
-static volatile const __u32 GLOBAL_USER_IFINDEX;
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
/* BPF-prog will update these with MTU values it can see */
__u32 global_bpf_mtu_xdp = 0;
@@ -105,7 +106,55 @@ int xdp_minus_delta(struct xdp_md *ctx)
return retval;
}
-SEC("classifier")
+SEC("xdp")
+int xdp_input_len(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input len is L3, like MTU and iph->tot_len.
+ * Remember XDP data_len is L2.
+ */
+ __u32 mtu_len = data_len - ETH_HLEN;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = XDP_ABORTED;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_input_len_exceed(struct xdp_md *ctx)
+{
+ int retval = XDP_ABORTED; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = XDP_PASS ; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
int tc_use_helper(struct __sk_buff *ctx)
{
int retval = BPF_OK; /* Expected retval on successful test */
@@ -124,7 +173,7 @@ out:
return retval;
}
-SEC("classifier")
+SEC("tc")
int tc_exceed_mtu(struct __sk_buff *ctx)
{
__u32 ifindex = GLOBAL_USER_IFINDEX;
@@ -148,7 +197,7 @@ int tc_exceed_mtu(struct __sk_buff *ctx)
return retval;
}
-SEC("classifier")
+SEC("tc")
int tc_exceed_mtu_da(struct __sk_buff *ctx)
{
/* SKB Direct-Access variant */
@@ -175,7 +224,7 @@ int tc_exceed_mtu_da(struct __sk_buff *ctx)
return retval;
}
-SEC("classifier")
+SEC("tc")
int tc_minus_delta(struct __sk_buff *ctx)
{
int retval = BPF_OK; /* Expected retval on successful test */
@@ -196,3 +245,58 @@ int tc_minus_delta(struct __sk_buff *ctx)
global_bpf_mtu_xdp = mtu_len;
return retval;
}
+
+SEC("tc")
+int tc_input_len(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = BPF_DROP;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_input_len_exceed(struct __sk_buff *ctx)
+{
+ int retval = BPF_DROP; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_OK; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_chk_segs_flag(struct __sk_buff *ctx)
+{
+ __u32 mtu_len = 0;
+ int err;
+
+ err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS);
+
+ return err == -EINVAL ? BPF_OK : BPF_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 3c1e042962e6..26a53e54b8fa 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -15,12 +15,16 @@
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#include "bpf_misc.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
#ifdef SUBPROGS
#define INLINING __noinline
@@ -28,9 +32,6 @@
#define INLINING __always_inline
#endif
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
-
#define IP_OFFSET_MASK (0x1FFF)
#define IP_MF (0x2000)
@@ -39,8 +40,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
/**
* Destination port and IP used for UDP encapsulation.
*/
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
typedef struct {
uint64_t processed_packets_total;
@@ -126,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8)));
typedef struct buf {
struct __sk_buff *skb;
net_ptr head;
- /* NB: tail musn't have alignment other than 1, otherwise
+ /* NB: tail mustn't have alignment other than 1, otherwise
* LLVM will go and eliminate code, e.g. when checking packet lengths.
*/
uint8_t *const tail;
@@ -267,7 +268,7 @@ static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
uint32_t acc = 0;
uint16_t *ipw = (uint16_t *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
acc += ipw[i];
}
@@ -294,7 +295,7 @@ bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
};
*is_fragment = false;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 6; i++) {
switch (exthdr.next) {
case IPPROTO_FRAGMENT:
@@ -600,7 +601,7 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
return TC_ACT_SHOT;
}
- /* Skip the remainig next hops (may be zero). */
+ /* Skip the remaining next hops (may be zero). */
return skip_next_hops(pkt, encap->unigue.hop_count -
encap->unigue.next_hop - 1);
}
@@ -610,8 +611,8 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
*
* fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
*
- * clang will substitue a costant for sizeof, which allows the verifier
- * to track it's value. Based on this, it can figure out the constant
+ * clang will substitute a constant for sizeof, which allows the verifier
+ * to track its value. Based on this, it can figure out the constant
* return value, and calling code works while still being "generic" to
* IPv4 and IPv6.
*/
@@ -928,7 +929,7 @@ static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
}
}
-SEC("classifier/cls_redirect")
+SEC("tc")
int cls_redirect(struct __sk_buff *skb)
{
metrics_t *metrics = get_global_metrics();
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
index 76eab0aacba0..eb55cb8a3dbd 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -10,7 +10,16 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
+
+/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
+ * friendly version breaks compilation for older clang versions <= 15
+ * when invoked in a static assert. Restore original here.
+ */
+#ifdef offsetof
+#undef offsetof
+#define offsetof(type, member) __builtin_offsetof(type, member)
+#endif
struct gre_base_hdr {
uint16_t flags;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
new file mode 100644
index 000000000000..dfd4a2710391
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <netinet/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+#include "bpf_kfuncs.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+ uint64_t errors_total_encap_mtu_violate;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+struct iphdr_info {
+ void *hdr;
+ __u64 len;
+};
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr)
+{
+ if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*iphdr);
+
+ if (iphdr->ihl < 5)
+ return -1;
+
+ /* skip ipv4 options */
+ *offset += (iphdr->ihl - 5) * 4;
+
+ return 0;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports)
+{
+ if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0))
+ return false;
+
+ *offset += sizeof(*ports);
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++)
+ acc += ipw[i];
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset,
+ const struct ipv6hdr *ipv6, uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0))
+ return false;
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ *offset += (exthdr.len + 1) * 8;
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6,
+ uint8_t *proto, bool *is_fragment)
+{
+ if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*ipv6);
+
+ if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment))
+ return -1;
+
+ return 0;
+}
+
+/* Global metrics, per CPU
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ /* Changing the ethertype if the encapsulated packet is ipv6 */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6)
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ __u8 encap_buffer[sizeof(encap_gre_t)] = {};
+ uint16_t proto = ETH_P_IP;
+ uint32_t mtu_len = 0;
+ encap_gre_t *encap_gre;
+
+ metrics->forwarded_packets_total_gre++;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
+ metrics->errors_total_encap_mtu_violate++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap_gre) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ if (encap_gre == encap_buffer)
+ bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, dynptr, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(__u64 *offset, int n)
+{
+ switch (n) {
+ case 1:
+ *offset += sizeof(struct in_addr);
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count)
+ return TC_ACT_SHOT;
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0))
+ return TC_ACT_SHOT;
+
+ *offset += sizeof(*next_hop);
+
+ /* Skip the remaining next hops (may be zero). */
+ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitute a constant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+
+ if (tuplen == sizeof(tuple->ipv6))
+ iphlen = sizeof(struct ipv6hdr);
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset,
+ metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ struct iphdr ipv4;
+
+ if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(icmp);
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO)
+ return ECHO_REQUEST;
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4.daddr;
+ tuple.ipv4.daddr = ipv4.saddr;
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, ipv4.protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct ipv6hdr ipv6;
+ struct icmp6hdr icmp6;
+ bool is_fragment;
+ uint8_t l4_proto;
+
+ if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct tcphdr tcp;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_tcp++;
+
+ if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(tcp);
+
+ if (tcp.syn)
+ return SYN;
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest);
+ return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp);
+}
+
+static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct udphdr udph;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_udp++;
+
+ if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+ *offset += sizeof(udph);
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest);
+ return classify_udp(skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct iphdr ipv4;
+ struct iphdr_info info = {
+ .hdr = &ipv4,
+ .len = sizeof(ipv4),
+ };
+
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4.version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(&ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4.protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(skb, dynptr, offset, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct ipv6hdr ipv6;
+ struct iphdr_info info = {
+ .hdr = &ipv6,
+ .len = sizeof(ipv6),
+ };
+ uint8_t l4_proto;
+ bool is_fragment;
+
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6.version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(dynptr, offset, skb, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("tc")
+int cls_redirect(struct __sk_buff *skb)
+{
+ __u8 encap_buffer[sizeof(encap_headers_t)] = {};
+ struct bpf_dynptr dynptr;
+ struct in_addr next_hop;
+ /* Tracks offset of the dynptr. This will be unnecessary once
+ * bpf_dynptr_advance() is available.
+ */
+ __u64 off = 0;
+ ret_t ret;
+
+ bpf_dynptr_from_skb(skb, 0, &dynptr);
+
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL)
+ return TC_ACT_SHOT;
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap)))
+ return TC_ACT_OK;
+
+ encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap)
+ return TC_ACT_OK;
+
+ off += sizeof(*encap);
+
+ if (encap->ip.ihl != 5)
+ /* We never have any options. */
+ return TC_ACT_OK;
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT)
+ return TC_ACT_OK;
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0)
+ return TC_ACT_SHOT;
+
+ MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(skb, &dynptr, &off, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(skb, &dynptr, &off, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ ret = accept_locally(skb, encap);
+
+ if (encap == encap_buffer)
+ bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
index 44f5aa2e8956..9a7829c5e4a7 100644
--- a/tools/testing/selftests/bpf/progs/test_core_autosize.c
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
return 0;
}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
SEC("raw_tp/sys_enter")
int handle_probed(void *ctx)
{
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
__u64 tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
ptr_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
val1_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
val2_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
val3_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
val4_probed = tmp;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c
index 3ac3603ad53d..a3c7c1042f35 100644
--- a/tools/testing/selftests/bpf/progs/test_core_extern.c
+++ b/tools/testing/selftests/bpf/progs/test_core_extern.c
@@ -11,6 +11,7 @@
static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
extern int LINUX_KERNEL_VERSION __kconfig;
+extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak;
extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */
extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak;
extern bool CONFIG_BOOL __kconfig __weak;
@@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak;
extern uint64_t CONFIG_MISSING __kconfig __weak;
uint64_t kern_ver = -1;
+uint64_t unkn_virt_val = -1;
uint64_t bpf_syscall = -1;
uint64_t tristate_val = -1;
uint64_t bool_val = -1;
@@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx)
int i;
kern_ver = LINUX_KERNEL_VERSION;
+ unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN;
bpf_syscall = CONFIG_BPF_SYSCALL;
tristate_val = CONFIG_TRISTATE;
bool_val = CONFIG_BOOL;
diff --git a/tools/testing/selftests/bpf/progs/test_core_read_macros.c b/tools/testing/selftests/bpf/progs/test_core_read_macros.c
index fd54caa17319..873d85a4739b 100644
--- a/tools/testing/selftests/bpf/progs/test_core_read_macros.c
+++ b/tools/testing/selftests/bpf/progs/test_core_read_macros.c
@@ -36,7 +36,7 @@ int handler(void *ctx)
return 0;
/* next pointers for kernel address space have to be initialized from
- * BPF side, user-space mmaped addresses are stil user-space addresses
+ * BPF side, user-space mmaped addresses are still user-space addresses
*/
k_probe_in.next = &k_probe_in;
__builtin_preserve_access_index(({k_core_in.next = &k_core_in;}));
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 51b3f79df523..448403634eea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -15,6 +15,7 @@ struct {
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -41,6 +42,7 @@ int test_core_arrays(void *ctx)
{
struct core_reloc_arrays *in = (void *)&data.in;
struct core_reloc_arrays_output *out = (void *)&data.out;
+ int *a;
if (CORE_READ(&out->a2, &in->a[2]))
return 1;
@@ -53,6 +55,9 @@ int test_core_arrays(void *ctx)
if (CORE_READ(&out->f01c, &in->f[0][1].c))
return 1;
+ a = __builtin_preserve_access_index(({ in->a; }));
+ out->a3 = a[0] + a[1] + a[2] + a[3];
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
index ab1e647aeb31..b86fdda2a6ea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
@@ -42,7 +42,6 @@ int test_core_bitfields(void *ctx)
{
struct core_reloc_bitfields *in = (void *)&data.in;
struct core_reloc_bitfields_output *out = (void *)&data.out;
- uint64_t res;
out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1);
out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2);
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
new file mode 100644
index 000000000000..63147fbfae6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enum64val(void *ctx)
+{
+#if __clang_major__ >= 15
+ struct core_reloc_enum64val_output *out = (void *)&data.out;
+ enum named_unsigned_enum64 named_unsigned = 0;
+ enum named_signed_enum64 named_signed = 0;
+
+ out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2);
+ out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3);
+ out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2);
+ out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3);
+
+ out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2);
+ out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2);
+ /* NAMED_ENUM64_VAL3 value is optional */
+
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
index 7e45e2bdf6cd..5b8a75097ea3 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
@@ -45,35 +45,34 @@ int test_core_existence(void *ctx)
struct core_reloc_existence_output *out = (void *)&data.out;
out->a_exists = bpf_core_field_exists(in->a);
- if (bpf_core_field_exists(in->a))
+ if (bpf_core_field_exists(struct core_reloc_existence, a))
out->a_value = BPF_CORE_READ(in, a);
else
out->a_value = 0xff000001u;
out->b_exists = bpf_core_field_exists(in->b);
- if (bpf_core_field_exists(in->b))
+ if (bpf_core_field_exists(struct core_reloc_existence, b))
out->b_value = BPF_CORE_READ(in, b);
else
out->b_value = 0xff000002u;
out->c_exists = bpf_core_field_exists(in->c);
- if (bpf_core_field_exists(in->c))
+ if (bpf_core_field_exists(struct core_reloc_existence, c))
out->c_value = BPF_CORE_READ(in, c);
else
out->c_value = 0xff000003u;
out->arr_exists = bpf_core_field_exists(in->arr);
- if (bpf_core_field_exists(in->arr))
+ if (bpf_core_field_exists(struct core_reloc_existence, arr))
out->arr_value = BPF_CORE_READ(in, arr[0]);
else
out->arr_value = 0xff000004u;
out->s_exists = bpf_core_field_exists(in->s);
- if (bpf_core_field_exists(in->s))
+ if (bpf_core_field_exists(struct core_reloc_existence, s))
out->s_value = BPF_CORE_READ(in, s.x);
else
out->s_value = 0xff000005u;
return 0;
}
-
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index 145028b52ad8..ee4a601dcb06 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -21,6 +21,7 @@ struct core_reloc_kernel_output {
/* we have test_progs[-flavor], so cut flavor part */
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
};
struct task_struct {
@@ -30,15 +31,29 @@ struct task_struct {
struct task_struct *group_leader;
};
+struct mm_struct___wrong {
+ int abc_whatever_should_not_exist;
+};
+
+struct task_struct___local {
+ int pid;
+ struct mm_struct___wrong *mm;
+};
+
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
SEC("raw_tracepoint/sys_enter")
int test_core_kernel(void *ctx)
{
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
struct task_struct *task = (void *)bpf_get_current_task();
struct core_reloc_kernel_output *out = (void *)&data.out;
uint64_t pid_tgid = bpf_get_current_pid_tgid();
- uint32_t real_tgid = (uint32_t)pid_tgid;
+ int32_t real_tgid = (int32_t)pid_tgid;
int pid, tgid;
if (data.my_pid_tgid != pid_tgid)
@@ -93,6 +108,10 @@ int test_core_kernel(void *ctx)
group_leader, group_leader, group_leader, group_leader,
comm);
+ out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local);
+#else
+ data.skip = true;
+#endif
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
index 8b533db4a7a5..b2ded497572a 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -42,7 +42,16 @@ struct core_reloc_mods {
core_reloc_mods_substruct_t h;
};
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+#else
+#define CORE_READ(dst, src) ({ \
+ int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \
+ sizeof(*(src)); \
+ bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \
+ (const char *)(src) + sizeof(*(src)) - __sz); \
+})
+#endif
SEC("raw_tracepoint/sys_enter")
int test_core_mods(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index f59f175c7baf..bcb31ff92dcc 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -43,8 +43,8 @@ int BPF_PROG(test_core_module_probed,
#if __has_builtin(__builtin_preserve_enum_value)
struct core_reloc_module_output *out = (void *)&data.out;
__u64 pid_tgid = bpf_get_current_pid_tgid();
- __u32 real_tgid = (__u32)(pid_tgid >> 32);
- __u32 real_pid = (__u32)pid_tgid;
+ __s32 real_tgid = (__s32)(pid_tgid >> 32);
+ __s32 real_pid = (__s32)pid_tgid;
if (data.my_pid_tgid != pid_tgid)
return 0;
@@ -77,8 +77,8 @@ int BPF_PROG(test_core_module_direct,
#if __has_builtin(__builtin_preserve_enum_value)
struct core_reloc_module_output *out = (void *)&data.out;
__u64 pid_tgid = bpf_get_current_pid_tgid();
- __u32 real_tgid = (__u32)(pid_tgid >> 32);
- __u32 real_pid = (__u32)pid_tgid;
+ __s32 real_tgid = (__s32)(pid_tgid >> 32);
+ __s32 real_pid = (__s32)pid_tgid;
if (data.my_pid_tgid != pid_tgid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..5b686053ce42 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -15,12 +15,21 @@ struct {
struct core_reloc_size_output {
int int_sz;
+ int int_off;
int struct_sz;
+ int struct_off;
int union_sz;
+ int union_off;
int arr_sz;
+ int arr_off;
int arr_elem_sz;
+ int arr_elem_off;
int ptr_sz;
+ int ptr_off;
int enum_sz;
+ int enum_off;
+ int float_sz;
+ int float_off;
};
struct core_reloc_size {
@@ -30,6 +39,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
SEC("raw_tracepoint/sys_enter")
@@ -39,12 +49,28 @@ int test_core_size(void *ctx)
struct core_reloc_size_output *out = (void *)&data.out;
out->int_sz = bpf_core_field_size(in->int_field);
+ out->int_off = bpf_core_field_offset(in->int_field);
+
out->struct_sz = bpf_core_field_size(in->struct_field);
+ out->struct_off = bpf_core_field_offset(in->struct_field);
+
out->union_sz = bpf_core_field_size(in->union_field);
+ out->union_off = bpf_core_field_offset(in->union_field);
+
out->arr_sz = bpf_core_field_size(in->arr_field);
- out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
- out->ptr_sz = bpf_core_field_size(in->ptr_field);
- out->enum_sz = bpf_core_field_size(in->enum_field);
+ out->arr_off = bpf_core_field_offset(in->arr_field);
+
+ out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]);
+ out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]);
+
+ out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field);
+ out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field);
+
+ out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field);
+ out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field);
+
+ out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field);
+ out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
index fb60f8195c53..2edb4df35e6e 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -19,6 +19,14 @@ struct a_struct {
int x;
};
+struct a_complex_struct {
+ union {
+ struct a_struct *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
union a_union {
int y;
int z;
@@ -43,6 +51,7 @@ typedef int int_typedef;
typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
typedef int (*func_proto_typedef)(long);
@@ -50,6 +59,7 @@ typedef char arr_typedef[20];
struct core_reloc_type_based_output {
bool struct_exists;
+ bool complex_struct_exists;
bool union_exists;
bool enum_exists;
bool typedef_named_struct_exists;
@@ -58,9 +68,24 @@ struct core_reloc_type_based_output {
bool typedef_int_exists;
bool typedef_enum_exists;
bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
bool typedef_func_proto_exists;
bool typedef_arr_exists;
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
int struct_sz;
int union_sz;
int enum_sz;
@@ -77,10 +102,17 @@ struct core_reloc_type_based_output {
SEC("raw_tracepoint/sys_enter")
int test_core_type_based(void *ctx)
{
-#if __has_builtin(__builtin_preserve_type_info)
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test. Part of it
+ * could run with merely __builtin_preserve_type_info (which could be checked
+ * separately), but we have to find an upper bound.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
struct core_reloc_type_based_output *out = (void *)&data.out;
out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct);
out->union_exists = bpf_core_type_exists(union a_union);
out->enum_exists = bpf_core_type_exists(enum an_enum);
out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
@@ -89,9 +121,24 @@ int test_core_type_based(void *ctx)
out->typedef_int_exists = bpf_core_type_exists(int_typedef);
out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef);
out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+ out->struct_matches = bpf_core_type_matches(struct a_struct);
+ out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct);
+ out->union_matches = bpf_core_type_matches(union a_union);
+ out->enum_matches = bpf_core_type_matches(enum an_enum);
+ out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef);
+ out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef);
+ out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef);
+ out->typedef_int_matches = bpf_core_type_matches(int_typedef);
+ out->typedef_enum_matches = bpf_core_type_matches(enum_typedef);
+ out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef);
+ out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef);
+ out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef);
+ out->typedef_arr_matches = bpf_core_type_matches(arr_typedef);
+
out->struct_sz = bpf_core_type_size(struct a_struct);
out->union_sz = bpf_core_type_size(union a_union);
out->enum_sz = bpf_core_type_size(enum an_enum);
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
index 22aba3f6e344..6fc8b9d66e34 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -80,7 +80,7 @@ int test_core_type_id(void *ctx)
* to detect whether this test has to be executed, however strange
* that might look like.
*
- * [0] https://reviews.llvm.org/D85174
+ * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
*/
#if __has_builtin(__builtin_preserve_type_info)
struct core_reloc_type_id_output *out = (void *)&data.out;
diff --git a/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
new file mode 100644
index 000000000000..4061f701ca50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile int my_pid;
+
+bool abc1_called;
+bool abc2_called;
+bool custom1_called;
+bool custom2_called;
+bool kprobe1_called;
+bool xyz_called;
+
+SEC("abc")
+int abc1(void *ctx)
+{
+ abc1_called = true;
+ return 0;
+}
+
+SEC("abc/whatever")
+int abc2(void *ctx)
+{
+ abc2_called = true;
+ return 0;
+}
+
+SEC("custom")
+int custom1(void *ctx)
+{
+ custom1_called = true;
+ return 0;
+}
+
+SEC("custom/something")
+int custom2(void *ctx)
+{
+ custom2_called = true;
+ return 0;
+}
+
+SEC("kprobe")
+int kprobe1(void *ctx)
+{
+ kprobe1_called = true;
+ return 0;
+}
+
+SEC("xyz/blah")
+int xyz(void *ctx)
+{
+ int whatever;
+
+ /* use sleepable helper, custom handler should set sleepable flag */
+ bpf_copy_from_user(&whatever, sizeof(whatever), NULL);
+ xyz_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
new file mode 100644
index 000000000000..27c27cff6a3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to readonly memory. bpf helpers
+ * that update its arguments can not write into it.
+ */
+ bpf_d_path(path, active, sizeof(int));
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644
index 000000000000..7e02b7361307
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to 'regular' memory. It
+ * cannot be submitted to ring buffer.
+ */
+ bpf_ringbuf_submit(active, 0);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
new file mode 100644
index 000000000000..e96b901a733c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+#include <linux/capability.h>
+
+typedef struct { unsigned long long val; } kernel_cap_t;
+
+struct cred {
+ kernel_cap_t cap_effective;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/userns_create")
+int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
+{
+ kernel_cap_t caps = cred->cap_effective;
+ __u64 cap_mask = 1ULL << CAP_SYS_ADMIN;
+
+ if (ret)
+ return 0;
+
+ ret = -EPERM;
+ if (caps.val & cap_mask)
+ return 0;
+
+ return -EPERM;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
index 01a002ade529..1705097d01d7 100644
--- a/tools/testing/selftests/bpf/progs/test_enable_stats.c
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -13,6 +13,6 @@ __u64 count = 0;
SEC("raw_tracepoint/sys_enter")
int test_enable_stats(void *ctx)
{
- count += 1;
+ __sync_fetch_and_add(&count, 1);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
new file mode 100644
index 000000000000..fac33a14f200
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak;
+extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak;
+extern bool CONFIG_PPC64 __kconfig __weak;
+
+/* This function is here to have CONFIG_X86_KERNEL_IBT,
+ * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE,
+ * CONFIG_PPC6 used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_X86_KERNEL_IBT ||
+ CONFIG_PPC_FTRACE_OUT_OF_LINE ||
+ CONFIG_KPROBES_ON_FTRACE ||
+ CONFIG_PPC64 ? 0 : 1;
+}
+
+SEC("kprobe")
+int BPF_PROG(kprobe_run)
+{
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_PROG(uprobe_run)
+{
+ return 0;
+}
+
+SEC("tracepoint")
+int BPF_PROG(tp_run)
+{
+ return 0;
+}
+
+SEC("perf_event")
+int event_run(void *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.multi")
+int BPF_PROG(kmulti_run)
+{
+ return 0;
+}
+
+SEC("uprobe.multi")
+int BPF_PROG(umulti_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c
new file mode 100644
index 000000000000..9e0f73e8189c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fsverity.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+
+char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+__u32 monitored_pid;
+__u32 got_fsverity;
+__u32 digest_matches;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr;
+ __u32 pid;
+ int ret;
+ int i;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ if (ret < 0)
+ return 0;
+ got_fsverity = 1;
+
+ for (i = 0; i < (int)sizeof(digest); i++) {
+ if (digest[i] != expected_digest[i])
+ return 0;
+ }
+
+ digest_matches = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
new file mode 100644
index 000000000000..54305f4c9f2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 found_xattr_from_file;
+__u32 found_xattr_from_dentry;
+
+static const char expected_value[] = "hello";
+char value1[32];
+char value2[32];
+
+/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */
+static const char xattr_names[][64] = {
+ /* The following work. */
+ "user.kfuncs",
+ "security.bpf.xxx",
+
+ /* The following do not work. */
+ "security.bpf",
+ "security.selinux"
+};
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret, i;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr);
+
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
+ if (ret != sizeof(expected_value))
+ return 0;
+ if (bpf_strncmp(value1, ret, expected_value))
+ return 0;
+ found_xattr_from_file = 1;
+ return 0;
+}
+
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret, i;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr);
+
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
+ if (ret != sizeof(expected_value))
+ return 0;
+ if (bpf_strncmp(value2, ret, expected_value))
+ return 0;
+ found_xattr_from_dentry = 1;
+
+ /* return non-zero to fail getxattr from user space */
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 1319be1c54ba..719e314ef3e4 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
bpf_map_update_elem(&result_##map, &key, var, 0); \
} while (0)
-SEC("classifier/static_data_load")
+SEC("tc")
int load_static_data(struct __sk_buff *skb)
{
static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 880260f6d536..fc69ff18880d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -3,14 +3,15 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
-#ifndef MAX_STACK
-#define MAX_STACK (512 - 3 * 32 + 8)
-#endif
+#define MAX_STACK 260
static __attribute__ ((noinline))
int f0(int var, struct __sk_buff *skb)
{
+ asm volatile ("");
+
return skb->len;
}
@@ -19,6 +20,8 @@ int f1(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return f0(0, skb) + skb->len;
}
@@ -27,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int);
__attribute__ ((noinline))
int f2(int val, struct __sk_buff *skb)
{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
return f1(skb) + f3(val, skb, 1);
}
@@ -35,11 +42,14 @@ int f3(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("combined stack size of 3 calls is")
+int global_func1(struct __sk_buff *skb)
{
return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 61c2ae92ce41..09d027bd3ea8 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -2,26 +2,32 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
struct Small {
- int x;
+ long x;
};
struct Big {
- int x;
- int y;
+ long x;
+ long y;
};
__noinline int foo(const struct Big *big)
{
- if (big == 0)
+ if (!big)
return 0;
return bpf_get_prandom_u32() < big->y;
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("invalid read from stack")
+int global_func10(struct __sk_buff *skb)
{
const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c
index 28488047c849..283e036dc401 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func11.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func11.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct S {
int x;
@@ -13,7 +14,8 @@ __noinline int foo(const struct S *s)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("Caller passes invalid args into func#1")
+int global_func11(struct __sk_buff *skb)
{
- return foo(skb);
+ return foo((const void *)skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c
index 62343527cc59..6e03d42519a6 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func12.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func12.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct S {
int x;
@@ -13,9 +14,12 @@ __noinline int foo(const struct S *s)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("invalid mem access 'mem_or_null'")
+int global_func12(struct __sk_buff *skb)
{
const struct S s = {.x = skb->len };
- return foo(&s);
+ foo(&s);
+
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c
index ff8897c1ac22..02ea80da75b5 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func13.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func13.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct S {
int x;
@@ -16,7 +17,8 @@ __noinline int foo(const struct S *s)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("Caller passes invalid args into func#1")
+int global_func13(struct __sk_buff *skb)
{
const struct S *s = (const struct S *)(0xbedabeda);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c
index 698c77199ebf..33b7d5efd7b2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func14.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func14.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct S;
@@ -14,7 +15,8 @@ __noinline int foo(const struct S *s)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("reference type('FWD S') size cannot be determined")
+int global_func14(struct __sk_buff *skb)
{
return foo(NULL);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
index c19c435988d5..201cc000b3f4 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func15.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__noinline int foo(unsigned int *v)
{
@@ -12,7 +13,8 @@ __noinline int foo(unsigned int *v)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("At program exit the register R0 has ")
+int global_func15(struct __sk_buff *skb)
{
unsigned int v = 1;
@@ -20,3 +22,35 @@ int test_cls(struct __sk_buff *skb)
return v;
}
+
+SEC("cgroup_skb/ingress")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1")
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7")
+__msg("At program exit the register R0 has ")
+__naked int global_func15_tricky_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 1;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* cgroup_skb/ingress program is expected to return [0, 1]
+ * values, so branch above makes sure that in a fallthrough
+ * case we have a valid 1 stored in R0 register, but in
+ * a branch case we assign some random value to R0. So if
+ * there is something wrong with precision tracking for R0 at
+ * program exit, we might erroneously prune branch case,
+ * because R0 in fallthrough case is imprecise (and thus any
+ * value is valid from POV of verifier is_state_equal() logic)
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c
index 0312d1e8d8c0..e3e64bc472cd 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func16.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func16.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__noinline int foo(int (*arr)[10])
{
@@ -12,7 +13,8 @@ __noinline int foo(int (*arr)[10])
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__success
+int global_func16(struct __sk_buff *skb)
{
int array[10];
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..5de44b09e8ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline int foo(int *p)
+{
+ barrier_var(p);
+ return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+__failure __msg("Caller passes invalid args into func#1")
+int global_func17(struct __sk_buff *skb)
+{
+ return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
index 2c18d82923a2..2beab9c3b68a 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func2.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func2.c
@@ -1,4 +1,49 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
#define MAX_STACK (512 - 3 * 32)
-#include "test_global_func1.c"
+
+static __attribute__ ((noinline))
+int f0(int var, struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
+ return f0(0, skb) + skb->len;
+}
+
+int f3(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + f3(val, skb, 1);
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
+ return skb->ifindex * val * var;
+}
+
+SEC("tc")
+__success
+int global_func2(struct __sk_buff *skb)
+{
+ return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 86f0ecb304fc..142b682d3c2f 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -46,20 +47,15 @@ int f7(struct __sk_buff *skb)
return f6(skb);
}
-#ifndef NO_FN8
__attribute__ ((noinline))
int f8(struct __sk_buff *skb)
{
return f7(skb);
}
-#endif
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("the call stack of 8 frames")
+int global_func3(struct __sk_buff *skb)
{
-#ifndef NO_FN8
return f8(skb);
-#else
- return f7(skb);
-#endif
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c
index 610f75edf276..1733d87ad3f3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func4.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func4.c
@@ -1,4 +1,55 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2020 Facebook */
-#define NO_FN8
-#include "test_global_func3.c"
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + val;
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ return f2(var, skb) + val;
+}
+
+__attribute__ ((noinline))
+int f4(struct __sk_buff *skb)
+{
+ return f3(1, skb, 2);
+}
+
+__attribute__ ((noinline))
+int f5(struct __sk_buff *skb)
+{
+ return f4(skb);
+}
+
+__attribute__ ((noinline))
+int f6(struct __sk_buff *skb)
+{
+ return f5(skb);
+}
+
+__attribute__ ((noinline))
+int f7(struct __sk_buff *skb)
+{
+ return f6(skb);
+}
+
+SEC("tc")
+__success
+int global_func4(struct __sk_buff *skb)
+{
+ return f7(skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
index 260c25b827ef..257c0569ff98 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func5.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -24,8 +25,9 @@ int f3(int val, struct __sk_buff *skb)
return skb->ifindex * val;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("expects pointer to ctx")
+int global_func5(struct __sk_buff *skb)
{
return f1(skb) + f2(2, skb) + f3(3, skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
index 69e19c64e10b..46c38c8f2cf0 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func6.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func6.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -24,8 +25,9 @@ int f3(int val, struct __sk_buff *skb)
return skb->ifindex * val;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("modified ctx ptr R2")
+int global_func6(struct __sk_buff *skb)
{
return f1(skb) + f2(2, skb) + f3(3, skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
index 309b3f6136bd..f182febfde3c 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func7.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
void foo(struct __sk_buff *skb)
@@ -10,8 +11,9 @@ void foo(struct __sk_buff *skb)
skb->tc_index = 0;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("foo() doesn't return scalar")
+int global_func7(struct __sk_buff *skb)
{
foo(skb);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
index d55a6544b1ab..9b9c57fa2dd3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func8.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func8.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__noinline int foo(struct __sk_buff *skb)
{
@@ -10,7 +11,8 @@ __noinline int foo(struct __sk_buff *skb)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__success
+int global_func8(struct __sk_buff *skb)
{
if (!foo(skb))
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c
index bd233ddede98..1f2cb0159b8d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func9.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func9.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct S {
int x;
@@ -74,7 +75,8 @@ __noinline int quuz(int **p)
}
SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__success
+int global_func9(struct __sk_buff *skb)
{
int result = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
index cae309538a9e..e712bf77daae 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -8,7 +8,7 @@ struct S {
int v;
};
-static volatile struct S global_variable;
+struct S global_variable = {};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
new file mode 100644
index 000000000000..143c8a4852bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+static long stack[256];
+
+/*
+ * KPROBE contexts
+ */
+
+__weak int kprobe_typedef_ctx_subprog(bpf_user_pt_regs_t *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_typedef_ctx(void *ctx)
+{
+ return kprobe_typedef_ctx_subprog(ctx);
+}
+
+/* s390x defines:
+ *
+ * typedef user_pt_regs bpf_user_pt_regs_t;
+ * typedef struct { ... } user_pt_regs;
+ *
+ * And so "canonical" underlying struct type is anonymous.
+ * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs
+ * are:
+ * - bpf_user_pt_regs_t *ctx (typedef);
+ * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack);
+ * - void *ctx __arg_ctx (arg:ctx tag)
+ *
+ * Other architectures also allow using underlying struct types (e.g.,
+ * `struct pt_regs *ctx` for x86-64)
+ */
+#ifndef bpf_target_s390
+
+#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL)))
+
+__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx)
+{
+ return bpf_get_stack((void *)ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_resolved_ctx(void *ctx)
+{
+ return kprobe_struct_ctx_subprog(ctx);
+}
+
+#endif
+
+/* this is current hack to make this work on old kernels */
+struct bpf_user_pt_regs_t {};
+
+__weak int kprobe_workaround_ctx_subprog(struct bpf_user_pt_regs_t *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_workaround_ctx(void *ctx)
+{
+ return kprobe_workaround_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT contexts
+ */
+
+__weak int raw_tp_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_ctx(void *ctx)
+{
+ return raw_tp_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT_WRITABLE contexts
+ */
+
+__weak int raw_tp_writable_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_writable_ctx(void *ctx)
+{
+ return raw_tp_writable_ctx_subprog(ctx);
+}
+
+/*
+ * PERF_EVENT contexts
+ */
+
+__weak int perf_event_ctx_subprog(struct bpf_perf_event_data *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?perf_event")
+__success
+int perf_event_ctx(void *ctx)
+{
+ return perf_event_ctx_subprog(ctx);
+}
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+ return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+struct my_struct { int x; };
+
+__weak int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+ struct my_struct *mem,
+ void *ctx2 __arg_ctx)
+{
+ if (!mem)
+ return 0;
+
+ return bpf_get_stack(ctx1, stack, sizeof(stack), 0) +
+ mem->x +
+ bpf_get_stack(ctx2, stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?perf_event")
+__success __log_level(2)
+int arg_tag_ctx_perf(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
new file mode 100644
index 000000000000..ee65bad0436d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* rodata section */
+const volatile pid_t pid;
+const volatile size_t bss_array_len;
+const volatile size_t data_array_len;
+
+/* bss section */
+int sum = 0;
+int array[1];
+
+/* custom data section */
+int my_array[1] SEC(".data.custom");
+
+/* custom data section which should NOT be resizable,
+ * since it contains a single var which is not an array
+ */
+int my_int SEC(".data.non_array");
+
+/* custom data section which should NOT be resizable,
+ * since its last var is not an array
+ */
+int my_array_first[1] SEC(".data.array_not_last");
+int my_int_last SEC(".data.array_not_last");
+
+int percpu_arr[1] SEC(".data.percpu_arr");
+
+/* at least one extern is included, to ensure that a specific
+ * regression is tested whereby resizing resulted in a free-after-use
+ * bug after type information is invalidated by the resize operation.
+ *
+ * There isn't a particularly good API to test for this specific condition,
+ * but by having externs for the resizing tests it will cover this path.
+ */
+extern int LINUX_KERNEL_VERSION __kconfig;
+long version_sink;
+
+SEC("tp/syscalls/sys_enter_getpid")
+int bss_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
+
+ for (size_t i = 0; i < bss_array_len; ++i)
+ sum += array[i];
+
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int data_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
+
+ for (size_t i = 0; i < data_array_len; ++i)
+ sum += my_array[i];
+
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
+ return 0;
+}
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ return 0;
+}
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops st_ops_resize = {
+ .test_1 = (void *)test_1
+};
diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
index 473a22794a62..8b438128f46b 100644
--- a/tools/testing/selftests/bpf/progs/test_hash_large_key.c
+++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
@@ -28,7 +28,7 @@ struct bigelement {
SEC("raw_tracepoint/sys_enter")
int bpf_hash_large_key_test(void *ctx)
{
- int zero = 0, err = 1, value = 42;
+ int zero = 0, value = 42;
struct bigelement *key;
key = bpf_map_lookup_elem(&key_map, &zero);
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
new file mode 100644
index 000000000000..5715c569ec03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <time.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct timer {
+ struct bpf_timer t;
+};
+
+struct lock {
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct timer);
+} timers SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct lock);
+} locks SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct timer *timer)
+{
+ return 0;
+}
+
+static void timer_work(void)
+{
+ struct timer *timer;
+ const int key = 0;
+
+ timer = bpf_map_lookup_elem(&timers, &key);
+ if (timer) {
+ bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(&timer->t, timer_cb);
+ bpf_timer_start(&timer->t, 10E9, 0);
+ bpf_timer_cancel(&timer->t);
+ }
+}
+
+static void spin_lock_work(void)
+{
+ const int key = 0;
+ struct lock *lock;
+
+ lock = bpf_map_lookup_elem(&locks, &key);
+ if (lock) {
+ bpf_spin_lock(&lock->l);
+ bpf_spin_unlock(&lock->l);
+ }
+}
+
+SEC("?raw_tp/sys_enter")
+int raw_tp_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int tp_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?kprobe")
+int kprobe_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?perf_event")
+int perf_event_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?raw_tp/sys_enter")
+int raw_tp_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int tp_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?kprobe")
+int kprobe_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?perf_event")
+int perf_event_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+const char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
index c300734d26f6..ef53559bbbdf 100644
--- a/tools/testing/selftests/bpf/progs/test_jhash.h
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -69,3 +69,34 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
+
+static __always_inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ break;
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
new file mode 100644
index 000000000000..b45fab3be352
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2025 Microsoft Corporation
+ *
+ * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_tid;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ __u32 tid;
+
+ tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+ if (!kernel || tid != monitored_tid)
+ return 0;
+ else
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
new file mode 100644
index 000000000000..061befb004c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+ struct bpf_dynptr *sig_ptr,
+ struct bpf_key *trusted_keyring) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+int err, pid;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?lsm.s/bpf")
+__failure __msg("cannot pass in dynptr at an offset=-8")
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ unsigned long val;
+
+ return bpf_verify_pkcs7_signature((struct bpf_dynptr *)&val,
+ (struct bpf_dynptr *)&val, NULL);
+}
+
+SEC("?lsm.s/bpf")
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ unsigned long val = 0;
+
+ return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
+ (struct bpf_dynptr *)val, NULL);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ struct bpf_key *trusted_keyring;
+ struct bpf_dynptr ptr;
+ __u32 *value;
+ int ret, zero = 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ value = bpf_map_lookup_elem(&array_map, &zero);
+ if (!value)
+ return 0;
+
+ /* Pass invalid flags. */
+ ret = bpf_dynptr_from_mem(value, sizeof(*value), ((__u64)~0ULL), &ptr);
+ if (ret != -EINVAL)
+ return 0;
+
+ trusted_keyring = bpf_lookup_system_key(0);
+ if (!trusted_keyring)
+ return 0;
+
+ err = bpf_verify_pkcs7_signature(&ptr, &ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
new file mode 100644
index 000000000000..0ad1bf1ede8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_dynptr_nullable_test1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, NULL);
+
+ return 0;
+}
+
+SEC("tc")
+int kfunc_dynptr_nullable_test2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, &data);
+
+ return 0;
+}
+
+SEC("tc")
+__failure __msg("expected pointer to stack or const struct bpf_dynptr")
+int kfunc_dynptr_nullable_test3(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(NULL, &data);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
new file mode 100644
index 000000000000..27109b877714
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr
+ * is read-only, should _not_ pass verification.
+ */
+ /* WRITE_ONCE */
+ *(volatile int *)active = -1;
+ }
+
+ return 0;
+}
+
+__noinline int write_active(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ int *active;
+
+ active = bpf_this_cpu_ptr(&bpf_prog_active);
+ write_active(active);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
index d6a0b3086b90..0650d918c096 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
@@ -2,24 +2,48 @@
/* Copyright (c) 2021 Facebook */
#include "vmlinux.h"
-
#include <bpf/bpf_helpers.h>
+#define X_0(x)
+#define X_1(x) x X_0(x)
+#define X_2(x) x X_1(x)
+#define X_3(x) x X_2(x)
+#define X_4(x) x X_3(x)
+#define X_5(x) x X_4(x)
+#define X_6(x) x X_5(x)
+#define X_7(x) x X_6(x)
+#define X_8(x) x X_7(x)
+#define X_9(x) x X_8(x)
+#define X_10(x) x X_9(x)
+#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y)
+
extern const int bpf_testmod_ksym_percpu __ksym;
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak;
-int out_mod_ksym_global = 0;
-bool triggered = false;
+int out_bpf_testmod_ksym = 0;
+const volatile int x = 0;
-SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+SEC("tc")
+int load(struct __sk_buff *skb)
{
- int *val;
- __u32 cpu;
-
- val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
- out_mod_ksym_global = *val;
- triggered = true;
+ /* This will be kept by clang, but removed by verifier. Since it is
+ * marked as __weak, libbpf and gen_loader don't error out if BTF ID
+ * is not found for it, instead imm and off is set to 0 for it.
+ */
+ if (x)
+ bpf_testmod_invalid_mod_kfunc();
+ bpf_testmod_test_mod_kfunc(42);
+ out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+ return 0;
+}
+SEC("tc")
+int load_256(struct __sk_buff *skb)
+{
+ /* this will fail if kfunc doesn't reuse its own btf fd index */
+ REPEAT_256(bpf_testmod_test_mod_kfunc(42););
+ bpf_testmod_test_mod_kfunc(42);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..d00268c91e19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+void invalid_kfunc(void) __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq && bpf_ksym_exists(&runqueues))
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ if (!bpf_ksym_exists(bpf_task_acquire))
+ /* dead code won't be seen by the verifier */
+ bpf_task_acquire(0);
+
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc))
+ /* dead code won't be seen by the verifier */
+ bpf_testmod_test_mod_kfunc(0);
+
+ if (bpf_ksym_exists(invalid_kfunc))
+ /* dead code won't be seen by the verifier */
+ invalid_kfunc();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 33493911d87a..c26057ec46dc 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -21,8 +21,6 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
static inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
@@ -450,7 +448,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index b9e2753f4f91..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real,
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
@@ -447,7 +447,7 @@ static __noinline int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
new file mode 100644
index 000000000000..f997f5080748
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include "test_iptunnel_common.h"
+#include <bpf/bpf_endian.h>
+
+#include "bpf_kfuncs.h"
+
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, struct vip);
+ __type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CH_RINGS_SIZE);
+ __type(key, __u32);
+ __type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_REALS);
+ __type(key, __u32);
+ __type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, __u32);
+ __type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CTL_MAP_SIZE);
+ __type(key, __u32);
+ __type(value, struct ctl_value);
+} ctl_array SEC(".maps");
+
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return false;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer_icmp[sizeof(struct iphdr)] = {};
+ __u8 buffer_ip[sizeof(struct iphdr)] = {};
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct udphdr)] = {};
+ struct udphdr *udp;
+
+ udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!udp)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ struct tcphdr *tcp;
+
+ tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!tcp)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __noinline int process_packet(struct bpf_dynptr *skb_ptr,
+ struct eth_hdr *eth, __u64 off,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ struct packet_description pckt = {};
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ __u8 buffer[sizeof(struct iphdr)] = {};
+
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ __u8 buffer[sizeof(struct eth_hdr)] = {};
+ struct bpf_dynptr ptr;
+ struct eth_hdr *eth;
+ __u32 eth_proto;
+ __u32 nh_off;
+ int err;
+
+ nh_off = sizeof(struct eth_hdr);
+
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+ eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ err = process_packet(&ptr, eth, nh_off, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ err = process_packet(&ptr, eth, nh_off, true, ctx);
+ else
+ return TC_ACT_SHOT;
+
+ if (eth == buffer)
+ bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
new file mode 100644
index 000000000000..2a2a942737d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+const volatile int skip = 0;
+#else
+const volatile int skip = 1;
+#endif
+
+volatile const short val1 = -1;
+volatile const int val2 = -1;
+short val3 = -1;
+int val4 = -1;
+int done1, done2, ret1, ret2;
+
+SEC("?raw_tp/sys_enter")
+int rdonly_map_prog(const void *ctx)
+{
+ if (done1)
+ return 0;
+
+ done1 = 1;
+ /* val1/val2 readonly map */
+ if (val1 == val2)
+ ret1 = 1;
+ return 0;
+
+}
+
+SEC("?raw_tp/sys_enter")
+int map_val_prog(const void *ctx)
+{
+ if (done2)
+ return 0;
+
+ done2 = 1;
+ /* val1/val2 regular read/write map */
+ if (val3 == val4)
+ ret2 = 1;
+ return 0;
+
+}
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+
+long long int_member;
+
+SEC("?fentry/bpf_testmod_test_arg_ptr_to_struct")
+int BPF_PROG2(test_ptr_struct_arg, struct bpf_testmod_struct_arg_1 *, p)
+{
+ /* probed memory access */
+ int_member = p->a;
+ return 0;
+}
+
+long long set_optlen, set_retval;
+
+SEC("?cgroup/getsockopt")
+int _getsockopt(volatile struct bpf_sockopt *ctx)
+{
+ int old_optlen, old_retval;
+
+ old_optlen = ctx->optlen;
+ old_retval = ctx->retval;
+
+ ctx->optlen = -1;
+ ctx->retval = -1;
+
+ /* sign extension for ctx member */
+ set_optlen = ctx->optlen;
+ set_retval = ctx->retval;
+
+ ctx->optlen = old_optlen;
+ ctx->retval = old_retval;
+
+ return 0;
+}
+
+long long set_mark;
+
+SEC("?tc")
+int _tc(volatile struct __sk_buff *skb)
+{
+ long long tmp_mark;
+ int old_mark;
+
+ old_mark = skb->mark;
+
+ skb->mark = 0xf6fe;
+
+ /* narrowed sign extension for ctx member */
+#if __clang_major__ >= 18
+ /* force narrow one-byte signed load. Otherwise, compiler may
+ * generate a 32-bit unsigned load followed by an s8 movsx.
+ */
+ asm volatile ("r1 = *(s8 *)(%[ctx] + %[off_mark])\n\t"
+ "%[tmp_mark] = r1"
+ : [tmp_mark]"=r"(tmp_mark)
+ : [ctx]"r"(skb),
+ [off_mark]"i"(offsetof(struct __sk_buff, mark)
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ + sizeof(skb->mark) - 1
+#endif
+ )
+ : "r1");
+#else
+ tmp_mark = (char)skb->mark;
+#endif
+ set_mark = tmp_mark;
+
+ skb->mark = old_mark;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
new file mode 100644
index 000000000000..42718cd8e6a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#define BPF_NO_GLOBAL_DATA
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} res_map SEC(".maps");
+
+volatile int my_pid_var = 0;
+volatile int res_var = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_legacy(void *ctx)
+{
+ int zero = 0, *my_pid, cur_pid, *my_res;
+
+ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+ if (!my_pid)
+ return 1;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != *my_pid)
+ return 1;
+
+ my_res = bpf_map_lookup_elem(&res_map, &zero);
+ if (!my_res)
+ return 1;
+
+ if (*my_res == 0)
+ /* use bpf_printk() in combination with BPF_NO_GLOBAL_DATA to
+ * force .rodata.str1.1 section that previously caused
+ * problems on old kernels due to libbpf always tried to
+ * create a global data map for it
+ */
+ bpf_printk("Legacy-case bpf_printk test, pid %d\n", cur_pid);
+ *my_res = 1;
+
+ return *my_res;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_modern(void *ctx)
+{
+ int cur_pid;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != my_pid_var)
+ return 1;
+
+ if (res_var == 0)
+ /* we need bpf_printk() to validate libbpf logic around unused
+ * global maps and legacy kernels; see comment in handle_legacy()
+ */
+ bpf_printk("Modern-case bpf_printk test, pid %d\n", cur_pid);
+ res_var = 1;
+
+ return res_var;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..568816307f71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm/bpf_map")
+int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
+{
+ if (map != (struct bpf_map *)&data_input)
+ return 0;
+
+ if (fmode & FMODE_WRITE)
+ return -EACCES;
+ barrier();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_log_buf.c b/tools/testing/selftests/bpf/progs/test_log_buf.c
new file mode 100644
index 000000000000..199f459bd5ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_buf.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int a[4];
+const volatile int off = 4000;
+
+SEC("raw_tp/sys_enter")
+int good_prog(const void *ctx)
+{
+ a[0] = (int)(long)ctx;
+ return a[1];
+}
+
+SEC("raw_tp/sys_enter")
+int bad_prog(const void *ctx)
+{
+ /* out of bounds access */
+ return a[off];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
new file mode 100644
index 000000000000..1bd48feaaa42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct___bad {
+ int pid;
+ int fake_field;
+ void *fake_field_subprog;
+} __attribute__((preserve_access_index));
+
+SEC("?raw_tp/sys_enter")
+int bad_relo(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bpf_core_field_size(t->fake_field);
+}
+
+static __noinline int bad_subprog(void)
+{
+ static struct task_struct___bad *t;
+
+ /* ugliness below is a field offset relocation */
+ return (void *)&t->fake_field_subprog - (void *)t;
+}
+
+SEC("?raw_tp/sys_enter")
+int bad_relo_subprog(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bad_subprog() + bpf_core_field_size(t->pid);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} existing_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} missing_map SEC(".maps");
+
+SEC("?raw_tp/sys_enter")
+int use_missing_map(const void *ctx)
+{
+ int zero = 0, *value;
+
+ value = bpf_map_lookup_elem(&existing_map, &zero);
+
+ value = bpf_map_lookup_elem(&missing_map, &zero);
+
+ return value != NULL;
+}
+
+extern int bpf_nonexistent_kfunc(void) __ksym __weak;
+
+SEC("?raw_tp/sys_enter")
+int use_missing_kfunc(const void *ctx)
+{
+ bpf_nonexistent_kfunc();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+ if (set_pid == bpf_get_current_pid_tgid() >> 32)
+ bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
new file mode 100644
index 000000000000..1f7e1e59b073
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__s32 key_serial;
+__u32 key_id;
+__u64 flags;
+
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ struct bpf_key *bkey;
+ __u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ if (key_serial)
+ bkey = bpf_lookup_user_key(key_serial, flags);
+ else
+ bkey = bpf_lookup_system_key(key_id);
+
+ if (!bkey)
+ return -ENOENT;
+
+ bpf_key_put(bkey);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
new file mode 100644
index 000000000000..83439b87b766
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/ip.h>
+#include <linux/if_ether.h>
+
+/* We don't care about whether the packet can be received by network stack.
+ * Just care if the packet is sent to the correct device at correct direction
+ * and not panic the kernel.
+ */
+static int prepend_dummy_mac(struct __sk_buff *skb)
+{
+ char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf,
+ 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00};
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0))
+ return -1;
+
+ if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0))
+ return -1;
+
+ return 0;
+}
+
+/* Use the last byte of IP address to redirect the packet */
+static int get_redirect_target(struct __sk_buff *skb)
+{
+ struct iphdr *iph = NULL;
+ void *start = (void *)(long)skb->data;
+ void *end = (void *)(long)skb->data_end;
+
+ if (start + sizeof(*iph) > end)
+ return -1;
+
+ iph = (struct iphdr *)start;
+ return bpf_ntohl(iph->daddr) & 0xff;
+}
+
+SEC("redir_ingress")
+int test_lwt_redirect_in(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ if (prepend_dummy_mac(skb))
+ return BPF_DROP;
+
+ return bpf_redirect(target, BPF_F_INGRESS);
+}
+
+SEC("redir_egress")
+int test_lwt_redirect_out(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ if (prepend_dummy_mac(skb))
+ return BPF_DROP;
+
+ return bpf_redirect(target, 0);
+}
+
+SEC("redir_egress_nomac")
+int test_lwt_redirect_out_nomac(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ return bpf_redirect(target, 0);
+}
+
+SEC("redir_ingress_nomac")
+int test_lwt_redirect_in_nomac(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ return bpf_redirect(target, BPF_F_INGRESS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_reroute.c b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c
new file mode 100644
index 000000000000..1dc64351929c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+/* This function extracts the last byte of the daddr, and uses it
+ * as output dev index.
+ */
+SEC("lwt_xmit")
+int test_lwt_reroute(struct __sk_buff *skb)
+{
+ struct iphdr *iph = NULL;
+ void *start = (void *)(long)skb->data;
+ void *end = (void *)(long)skb->data_end;
+
+ /* set mark at most once */
+ if (skb->mark != 0)
+ return BPF_OK;
+
+ if (start + sizeof(*iph) > end)
+ return BPF_DROP;
+
+ iph = (struct iphdr *)start;
+ skb->mark = bpf_ntohl(iph->daddr) & 0xff;
+
+ /* do not reroute x.x.x.0 packets */
+ if (skb->mark == 0)
+ return BPF_OK;
+
+ return BPF_LWT_REROUTE;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 48ff2b2ad5e7..fed66f36adb6 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
@@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb)
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = bpf_cpu_to_be64(4 - lo);
seg->hi = bpf_cpu_to_be64(hi);
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 1cfeb940cf9f..b295f9b721bf 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -9,21 +9,45 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 1);
__uint(map_flags, 0);
- __uint(key_size, sizeof(__u32));
- /* must be sizeof(__u32) for map in map */
- __uint(value_size, sizeof(__u32));
+ __type(key, __u32);
+ __type(value, __u32);
} mim_array SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(max_entries, 1);
__uint(map_flags, 0);
- __uint(key_size, sizeof(int));
- /* must be sizeof(__u32) for map in map */
- __uint(value_size, sizeof(__u32));
+ __type(key, int);
+ __type(value, __u32);
} mim_hash SEC(".maps");
-SEC("xdp_mimtest")
+/* The following three maps are used to test
+ * perf_event_array map can be an inner
+ * map of hash/array_of_maps.
+ */
+struct perf_event_array {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map0 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_array_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_hash_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
int value = 123;
@@ -49,5 +73,4 @@ int xdp_mimtest0(struct xdp_md *ctx)
return XDP_PASS;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644
index 000000000000..9c7d75cf0bd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __type(key, __u32);
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index b5c07ae7b68f..1c02511b73cd 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -30,10 +30,10 @@ struct {
__type(value, struct array_elem);
} array_map SEC(".maps");
-SEC("map_lock_demo")
+SEC("cgroup/skb")
int bpf_map_lock_test(struct __sk_buff *skb)
{
- struct hmap_elem zero = {}, *val;
+ struct hmap_elem *val;
int rnd = bpf_get_prandom_u32();
int key = 0, err = 1, i;
struct array_elem *q;
diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..ca827b1092da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 percpu_array_elem_sum = 0;
+__u64 percpu_hash_elem_sum = 0;
+__u64 percpu_lru_hash_elem_sum = 0;
+const volatile int nr_cpus;
+const volatile int my_pid;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u64);
+ __type(value, __u64);
+} percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u64);
+ __type(value, __u64);
+} percpu_lru_hash_map SEC(".maps");
+
+struct read_percpu_elem_ctx {
+ void *map;
+ __u64 sum;
+};
+
+static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx)
+{
+ __u64 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_percpu_elem(ctx->map, &key, index);
+ if (value)
+ ctx->sum += *value;
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int sysenter_getuid(const void *ctx)
+{
+ struct read_percpu_elem_ctx map_ctx;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ map_ctx.map = &percpu_array_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_array_elem_sum = map_ctx.sum;
+
+ map_ctx.map = &percpu_hash_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_hash_elem_sum = map_ctx.sum;
+
+ map_ctx.map = &percpu_lru_hash_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_lru_hash_elem_sum = map_ctx.sum;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_ops.c b/tools/testing/selftests/bpf/progs/test_map_ops.c
new file mode 100644
index 000000000000..b53b46a090c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_ops.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, 1);
+ __type(value, int);
+} stack_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} array_map SEC(".maps");
+
+const volatile pid_t pid;
+long err = 0;
+
+static u64 callback(u64 map, u64 key, u64 val, u64 ctx, u64 flags)
+{
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpid")
+int map_update(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_update_elem(&hash_map, &key, &val, BPF_NOEXIST);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getppid")
+int map_delete(void *ctx)
+{
+ const int key = 0;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_delete_elem(&hash_map, &key);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int map_push(void *ctx)
+{
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_push_elem(&stack_map, &val, 0);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_geteuid")
+int map_pop(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_pop_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getgid")
+int map_peek(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_peek_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_gettid")
+int map_for_each_pass(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = 0;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int map_for_each_fail(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = BPF_NOEXIST;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ /* calling for_each with non-zero flags will return error */
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. If reuse_md->migrating_sk is NULL (SYN packet),
+ * return SK_PASS without selecting a listener.
+ * 2. If reuse_md->migrating_sk is not NULL (socket migration),
+ * select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 256);
+ __type(key, int);
+ __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 256);
+ __type(key, __u64);
+ __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ struct tcphdr *tcp = NULL;
+
+ if (eth + 1 > data_end)
+ goto pass;
+
+ switch (bpf_ntohs(eth->h_proto)) {
+ case ETH_P_IP: {
+ struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+ if (ip + 1 > data_end)
+ goto pass;
+
+ if (ip->protocol != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+ break;
+ }
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+ if (ipv6 + 1 > data_end)
+ goto pass;
+
+ if (ipv6->nexthdr != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)(ipv6 + 1);
+ break;
+ }
+ default:
+ goto pass;
+ }
+
+ if (tcp + 1 > data_end)
+ goto pass;
+
+ if (tcp->dest != server_port)
+ goto pass;
+
+ if (!tcp->syn && tcp->ack)
+ return XDP_DROP;
+
+pass:
+ return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+ int *key, flags = 0, state, err;
+ __u64 cookie;
+
+ if (!reuse_md->migrating_sk)
+ return SK_PASS;
+
+ state = reuse_md->migrating_sk->state;
+ cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+ key = bpf_map_lookup_elem(&migrate_map, &cookie);
+ if (!key)
+ return SK_DROP;
+
+ err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+ if (err)
+ return SK_PASS;
+
+ switch (state) {
+ case BPF_TCP_ESTABLISHED:
+ __sync_fetch_and_add(&migrated_at_close, 1);
+ break;
+ case BPF_TCP_SYN_RECV:
+ __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (!reuse_md->len)
+ __sync_fetch_and_add(&migrated_at_send_synack, 1);
+ else
+ __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+ break;
+ }
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index 6077a025092c..d487153a839d 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -27,6 +27,7 @@ unsigned int nr_pure_ack = 0;
unsigned int nr_data = 0;
unsigned int nr_syn = 0;
unsigned int nr_fin = 0;
+unsigned int nr_hwtstamp = 0;
/* Check the header received from the active side */
static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
@@ -146,6 +147,9 @@ static int check_active_hdr_in(struct bpf_sock_ops *skops)
if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
nr_pure_ack++;
+ if (skops->skb_hwtstamp)
+ nr_hwtstamp++;
+
return CG_OK;
}
@@ -293,7 +297,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
return check_active_hdr_in(skops);
}
-SEC("sockops/misc_estab")
+SEC("sockops")
int misc_estab(struct bpf_sock_ops *skops)
{
int true_val = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4096);
__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
__type(key, __u32);
__type(value, char);
@@ -17,7 +16,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
__type(value, __u64);
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index bd37ceec5587..03d7f89787a1 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
__u32 raw_tp_read_sz = 0;
@@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp,
__u32 raw_tp_bare_write_sz = 0;
-SEC("raw_tp/bpf_testmod_test_write_bare")
+SEC("raw_tp/bpf_testmod_test_write_bare_tp")
int BPF_PROG(handle_raw_tp_bare,
struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
{
@@ -27,6 +27,20 @@ int BPF_PROG(handle_raw_tp_bare,
return 0;
}
+int raw_tp_writable_bare_in_val = 0;
+int raw_tp_writable_bare_early_ret = 0;
+int raw_tp_writable_bare_out_val = 0;
+
+SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp")
+int BPF_PROG(handle_raw_tp_writable_bare,
+ struct bpf_testmod_test_writable_ctx *writable)
+{
+ raw_tp_writable_bare_in_val = writable->val;
+ writable->early_ret = raw_tp_writable_bare_early_ret;
+ writable->val = raw_tp_writable_bare_out_val;
+ return 0;
+}
+
__u32 tp_btf_read_sz = 0;
SEC("tp_btf/bpf_testmod_test_read")
@@ -50,7 +64,7 @@ int BPF_PROG(handle_fentry,
__u32 fentry_manual_read_sz = 0;
-SEC("fentry/placeholder")
+SEC("fentry")
int BPF_PROG(handle_fentry_manual,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
@@ -59,6 +73,29 @@ int BPF_PROG(handle_fentry_manual,
return 0;
}
+__u32 fentry_explicit_read_sz = 0;
+
+SEC("fentry/bpf_testmod:bpf_testmod_test_read")
+int BPF_PROG(handle_fentry_explicit,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_read_sz = len;
+ return 0;
+}
+
+
+__u32 fentry_explicit_manual_read_sz = 0;
+
+SEC("fentry")
+int BPF_PROG(handle_fentry_explicit_manual,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_manual_read_sz = len;
+ return 0;
+}
+
__u32 fexit_read_sz = 0;
int fexit_ret = 0;
@@ -73,6 +110,18 @@ int BPF_PROG(handle_fexit,
return 0;
}
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
+{
+ long buf = 0;
+
+ bpf_probe_read_kernel(&buf, 8, ret);
+ bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
+ *(volatile int *)ret;
+ *(volatile int *)&ret->f_mode;
+ return 0;
+}
+
__u32 fmod_ret_read_sz = 0;
SEC("fmod_ret/bpf_testmod_test_read")
@@ -84,4 +133,10 @@ int BPF_PROG(handle_fmod_ret,
return 0; /* don't override the exit code */
}
+SEC("kprobe.multi/bpf_testmod_test_read")
+int BPF_PROG(kprobe_multi)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
new file mode 100644
index 000000000000..03a475160abe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+int nf_link_attach_test(struct bpf_nf_ctx *ctx)
+{
+ return NF_ACCEPT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
index 0763d49f9c42..386315afad65 100644
--- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -5,23 +5,48 @@
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+} sock_map SEC(".maps");
+
__u64 user_pid = 0;
__u64 user_tgid = 0;
__u64 dev = 0;
__u64 ino = 0;
-SEC("tracepoint/syscalls/sys_enter_nanosleep")
-int handler(const void *ctx)
+static void get_pid_tgid(void)
{
struct bpf_pidns_info nsdata;
if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info)))
- return 0;
+ return;
user_pid = nsdata.pid;
user_tgid = nsdata.tgid;
+}
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int tp_handler(const void *ctx)
+{
+ get_pid_tgid();
return 0;
}
+SEC("?cgroup/bind4")
+int cgroup_bind4(struct bpf_sock_addr *ctx)
+{
+ get_pid_tgid();
+ return 1;
+}
+
+SEC("?sk_msg")
+int sk_msg(struct sk_msg_md *msg)
+{
+ get_pid_tgid();
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index ded71b3ff6b4..2850ae788a91 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -4,6 +4,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -19,6 +20,7 @@ int test_obj_id(void *ctx)
__u64 *value;
value = bpf_map_lookup_elem(&test_map_id, &key);
+ __sink(value);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..5edf3cdc213d 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-#include <stdbool.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..d9b2ba7ac340
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This parsing logic is taken from the open source library katran, a layer 4
+ * load balancer.
+ *
+ * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c
+ *
+ * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+struct hdr_opt_state {
+ __u32 server_id;
+ __u8 byte_offset;
+ __u8 hdr_bytes_remaining;
+};
+
+static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ __u8 *tcp_opt, kind, hdr_len;
+
+ tcp_opt = (__u8 *)(data + state->byte_offset);
+ if (tcp_opt + 1 > data_end)
+ return -1;
+
+ kind = tcp_opt[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ state->hdr_bytes_remaining--;
+ state->byte_offset++;
+ return 0;
+ }
+
+ if (state->hdr_bytes_remaining < 2 ||
+ tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end)
+ return -1;
+
+ hdr_len = tcp_opt[1];
+ if (hdr_len > state->hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ if (tcp_opt + tcp_hdr_opt_len_tpr > data_end)
+ return -1;
+
+ state->server_id = *(__u32 *)&tcp_opt[2];
+ return 1;
+ }
+
+ state->hdr_bytes_remaining -= hdr_len;
+ state->byte_offset += hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ struct hdr_opt_state opt_state = {};
+ __u8 tcp_hdr_opt_len = 0;
+ struct tcphdr *tcp_hdr;
+ __u64 tcp_offset = 0;
+ int err;
+
+ tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ tcp_hdr = (struct tcphdr *)(data + tcp_offset);
+ if (tcp_hdr + 1 > data_end)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ opt_state.hdr_bytes_remaining = tcp_hdr_opt_len;
+ opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset;
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(xdp, &opt_state);
+
+ if (err || !opt_state.hdr_bytes_remaining)
+ break;
+ }
+
+ if (!opt_state.server_id)
+ return XDP_DROP;
+
+ server_id = opt_state.server_id;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
new file mode 100644
index 000000000000..dc6e43bc6a62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This logic is lifted from a real-world use case of packet parsing, used in
+ * the open source library katran, a layer 4 load balancer.
+ *
+ * This test demonstrates how to parse packet contents using dynptrs. The
+ * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
+ __u32 *server_id)
+{
+ __u8 kind, hdr_len;
+ __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
+ __u8 *data;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer));
+ if (!data)
+ return -1;
+
+ kind = data[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ *off += 1;
+ *hdr_bytes_remaining -= 1;
+ return 0;
+ }
+
+ if (*hdr_bytes_remaining < 2)
+ return -1;
+
+ hdr_len = data[1];
+ if (hdr_len > *hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id));
+ return 1;
+ }
+
+ *off += hdr_len;
+ *hdr_bytes_remaining -= hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ __u8 hdr_bytes_remaining;
+ struct tcphdr *tcp_hdr;
+ __u8 tcp_hdr_opt_len;
+ int err = 0;
+ __u32 off;
+
+ struct bpf_dynptr ptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer));
+ if (!tcp_hdr)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ hdr_bytes_remaining = tcp_hdr_opt_len;
+
+ off += sizeof(struct tcphdr);
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id);
+
+ if (err || !hdr_bytes_remaining)
+ break;
+ }
+
+ if (!server_id)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
index fb22de7c365d..1249a945699f 100644
--- a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -7,15 +7,15 @@
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(max_entries, 1);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} array_1 SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(max_entries, 1);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
__uint(map_flags, BPF_F_PRESERVE_ELEMS);
} array_2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
index a1ccc831c882..05ac9410cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -8,6 +8,7 @@
#include <bpf/bpf_tracing.h>
int valid = 0;
+int run_cnt = 0;
int required_size_out = 0;
int written_stack_out = 0;
int written_global_out = 0;
@@ -24,6 +25,8 @@ int perf_branches(void *ctx)
__u64 entries[4 * 3] = {0};
int required_size, written_stack, written_global;
+ ++run_cnt;
+
/* write to stack */
written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
/* ignore spurious events */
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index 8207a2dc2f9d..17d5b67744d5 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -7,19 +7,35 @@
#include <bpf/bpf_tracing.h>
struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
SEC("tp/raw_syscalls/sys_enter")
int handle_sys_enter(void *ctx)
{
+ int zero = 0, *my_pid, cur_pid;
int cpu = bpf_get_smp_processor_id();
+ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+ if (!my_pid)
+ return 1;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != *my_pid)
+ return 1;
+
bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
&cpu, sizeof(cpu));
- return 0;
+ return 1;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_skip.c b/tools/testing/selftests/bpf/progs/test_perf_skip.c
new file mode 100644
index 000000000000..7eb8b6de7a57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_skip.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+uintptr_t ip;
+
+SEC("perf_event")
+int handler(struct bpf_perf_event_data *data)
+{
+ /* Skip events that have the correct ip. */
+ return ip != PT_REGS_IP(&data->regs);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c
index 4ef2630292b2..0facea6cbbae 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning.c
@@ -3,8 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
new file mode 100644
index 000000000000..c855f8f87eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
new file mode 100644
index 000000000000..ae227930c73c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct timer_val {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+} timer_prealloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} timer_no_prealloc SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
index 5412e0c732c7..2a56db1094b8 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
@@ -3,8 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 852051064507..bce7173152c6 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -13,9 +13,7 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-int _version SEC("version") = 1;
+#include "bpf_misc.h"
/* llvm will optimize both subprograms into exactly the same BPF assembly
*
@@ -54,6 +52,8 @@ int get_skb_len(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->len;
}
@@ -76,6 +76,8 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
@@ -97,7 +99,7 @@ int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
return 0;
}
-SEC("classifier/test_pkt_access")
+SEC("tc")
int test_pkt_access(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
index 610c74ea9f64..d1839366f3e1 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
@@ -7,8 +7,6 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define TEST_FIELD(TYPE, FIELD, MASK) \
{ \
@@ -27,7 +25,7 @@ int _version SEC("version") = 1;
}
#endif
-SEC("classifier/test_pkt_md_access")
+SEC("tc")
int test_pkt_md_access(struct __sk_buff *skb)
{
TEST_FIELD(__u8, len, 0xFF);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 89b3532ccc75..a8e501af9604 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -1,26 +1,47 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
-
-#include <netinet/in.h>
-
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
static struct sockaddr_in old;
-SEC("kprobe/__sys_connect")
-int BPF_KPROBE(handle_sys_connect)
+static int handle_sys_connect_common(struct sockaddr_in *uservaddr)
{
- void *ptr = (void *)PT_REGS_PARM2(ctx);
struct sockaddr_in new;
- bpf_probe_read_user(&old, sizeof(old), ptr);
+ bpf_probe_read_user(&old, sizeof(old), uservaddr);
__builtin_memset(&new, 0xab, sizeof(new));
- bpf_probe_write_user(ptr, &new, sizeof(new));
+ bpf_probe_write_user(uservaddr, &new, sizeof(new));
+
+ return 0;
+}
+
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr,
+ int addrlen)
+{
+ return handle_sys_connect_common(uservaddr);
+}
+
+#if defined(bpf_target_s390)
+#ifndef SYS_CONNECT
+#define SYS_CONNECT 3
+#endif
+
+SEC("ksyscall/socketcall")
+int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args)
+{
+ if (call == SYS_CONNECT) {
+ struct sockaddr_in *uservaddr;
+
+ bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]);
+ return handle_sys_connect_common(uservaddr);
+ }
return 0;
}
+#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_prog_array_init.c b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
new file mode 100644
index 000000000000..2cd138356126
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile pid_t my_pid = 0;
+int value = 0;
+
+SEC("raw_tp/sys_enter")
+int tailcall_1(void *ctx)
+{
+ value = 42;
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} prog_array_init SEC(".maps") = {
+ .values = {
+ [1] = (void *)&tailcall_1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int entry(void *ctx)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ bpf_tail_call(ctx, &prog_array_init, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
new file mode 100644
index 000000000000..89b0cd5a3e06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+char tp_name[128];
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ switch (cmd) {
+ case BPF_RAW_TRACEPOINT_OPEN:
+ bpf_copy_from_user(tp_name, sizeof(tp_name) - 1,
+ (void *)attr->raw_tracepoint.name);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+SEC("raw_tracepoint")
+int BPF_PROG(raw_tp_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
index 4dd9806ad73b..648e8cab7a23 100644
--- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
@@ -8,8 +8,6 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, MAP_TYPE);
__uint(max_entries, 32);
@@ -26,7 +24,7 @@ struct {
__uint(value_size, sizeof(__u32));
} map_out SEC(".maps");
-SEC("test")
+SEC("tc")
int _test(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..7035fb4d4165 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -4,8 +4,9 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
-static volatile const struct {
+const struct {
unsigned a[4];
/*
* if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +16,11 @@ static volatile const struct {
char _y;
} rdonly_values = { .a = {2, 3, 4, 5} };
-static volatile struct {
+struct {
unsigned did_run;
unsigned iters;
unsigned sum;
-} res;
+} res = {};
SEC("raw_tracepoint/sys_enter:skip_loop")
int skip_loop(struct pt_regs *ctx)
@@ -64,7 +65,7 @@ int full_loop(struct pt_regs *ctx)
{
/* prevent compiler to optimize everything out */
unsigned * volatile p = (void *)&rdonly_values.a;
- int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]);
+ int i = ARRAY_SIZE(rdonly_values.a);
unsigned iters = 0, sum = 0;
/* validate verifier can allow full loop as well */
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..501cefa97633 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -15,7 +16,6 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf SEC(".maps");
/* inputs */
@@ -36,12 +36,11 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
- int zero = 0;
if (cur_pid != pid)
return 0;
@@ -49,7 +48,7 @@ int test_ringbuf(void *ctx)
sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
if (!sample) {
__sync_fetch_and_add(&dropped, 1);
- return 1;
+ return 0;
}
sample->pid = pid;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
new file mode 100644
index 000000000000..21bb7da90ea5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, struct sample);
+ __type(value, int);
+} hash_map SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_mem_map_key(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample, sample_copy;
+ int *lookup_val;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->seq = ++seq;
+ sample->value = 42;
+
+ /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg
+ */
+ lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample);
+ __sink(lookup_val);
+
+ /* workaround - memcpy is necessary so that verifier doesn't
+ * complain with:
+ * verifier internal error: more than one arg with ref_obj_id R3
+ * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY);
+ *
+ * Since bpf_map_lookup_elem above uses 'sample' as key, test using
+ * sample field as value below
+ */
+ __builtin_memcpy(&sample_copy, sample, sizeof(struct sample));
+ bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY);
+
+ bpf_ringbuf_submit(sample, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..9626baa6779c 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,8 @@ struct sample {
struct ringbuf_map {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
+ /* libbpf will adjust to valid page size */
+ __uint(max_entries, 1000);
} ringbuf1 SEC(".maps"),
ringbuf2 SEC(".maps");
@@ -31,6 +32,17 @@ struct {
},
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ },
+};
+
/* inputs */
int pid = 0;
int target_ring = 0;
@@ -47,7 +59,6 @@ int test_ringbuf(void *ctx)
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
void *rb;
- int zero = 0;
if (cur_pid != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_n.c b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
new file mode 100644
index 000000000000..8669eb42dbe0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Andrea Righi <andrea.righi@canonical.com>
+
+#include <linux/bpf.h>
+#include <sched.h>
+#include <unistd.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TASK_COMM_LEN 16
+
+struct sample {
+ int pid;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int pid = 0;
+long value = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_n(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ sample->pid = pid;
+ sample->value = value;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
new file mode 100644
index 000000000000..ff4aa67ddacc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(map_flags, BPF_F_RB_OVERWRITE);
+} ringbuf SEC(".maps");
+
+int pid;
+
+const volatile unsigned long LEN1;
+const volatile unsigned long LEN2;
+const volatile unsigned long LEN3;
+const volatile unsigned long LEN4;
+const volatile unsigned long LEN5;
+
+long reserve1_fail = 0;
+long reserve2_fail = 0;
+long reserve3_fail = 0;
+long reserve4_fail = 0;
+long reserve5_fail = 0;
+
+unsigned long avail_data = 0;
+unsigned long ring_size = 0;
+unsigned long cons_pos = 0;
+unsigned long prod_pos = 0;
+unsigned long over_pos = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_overwrite_ringbuf(void *ctx)
+{
+ char *rec1, *rec2, *rec3, *rec4, *rec5;
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != pid)
+ return 0;
+
+ rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0);
+ if (!rec1) {
+ reserve1_fail = 1;
+ return 0;
+ }
+
+ rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0);
+ if (!rec2) {
+ bpf_ringbuf_discard(rec1, 0);
+ reserve2_fail = 1;
+ return 0;
+ }
+
+ rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0);
+ /* expect failure */
+ if (!rec3) {
+ reserve3_fail = 1;
+ } else {
+ bpf_ringbuf_discard(rec1, 0);
+ bpf_ringbuf_discard(rec2, 0);
+ bpf_ringbuf_discard(rec3, 0);
+ return 0;
+ }
+
+ rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0);
+ if (!rec4) {
+ reserve4_fail = 1;
+ bpf_ringbuf_discard(rec1, 0);
+ bpf_ringbuf_discard(rec2, 0);
+ return 0;
+ }
+
+ bpf_ringbuf_submit(rec1, 0);
+ bpf_ringbuf_submit(rec2, 0);
+ bpf_ringbuf_submit(rec4, 0);
+
+ rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0);
+ if (!rec5) {
+ reserve5_fail = 1;
+ return 0;
+ }
+
+ for (int i = 0; i < LEN3; i++)
+ rec5[i] = 0xdd;
+
+ bpf_ringbuf_submit(rec5, 0);
+
+ ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+ avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+ prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+ over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVERWRITE_POS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
new file mode 100644
index 000000000000..f063a0013f85
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* outputs */
+long passed = 0;
+long discarded = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_write(void *ctx)
+{
+ int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
+ void *sample1, *sample2;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
+ if (!sample1)
+ return 0;
+ /* first one can pass */
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
+ if (!sample2) {
+ bpf_ringbuf_discard(sample1, 0);
+ __sync_fetch_and_add(&discarded, 1);
+ return 0;
+ }
+ /* second one must not */
+ __sync_fetch_and_add(&passed, 1);
+ foo = sample2 + 4084;
+ *foo = 256;
+ bpf_ringbuf_discard(sample1, 0);
+ bpf_ringbuf_discard(sample2, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index a7278f064368..5059050f74f6 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
// we can only go as far as ~10 TLVs due to the BPF max stack size
// workaround: define induction variable "i" as "long" instead
// of "int" to prevent alu32 sub-register spilling.
- #pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 26e77dcc7e91..a5be3267dbb0 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
-#include <stdlib.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -15,8 +14,6 @@
#include <bpf/bpf_helpers.h>
#include "test_select_reuseport_common.h"
-int _version SEC("version") = 1;
-
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
@@ -24,8 +21,8 @@ int _version SEC("version") = 1;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 1);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(__u32));
+ __type(key, __u32);
+ __type(value, __u32);
} outer_map SEC(".maps");
struct {
@@ -66,7 +63,7 @@ SEC("sk_reuseport")
int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
{
__u32 linum, index = 0, flags = 0, index_zero = 0;
- __u32 *result_cnt, *linum_value;
+ __u32 *result_cnt;
struct data_check data_check = {};
struct cmd *cmd, cmd_copy;
void *data, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index b4233d3efac2..176a355e3062 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -1,27 +1,50 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
-__u32 sig = 0, pid = 0, status = 0, signal_thread = 0;
+struct task_struct *bpf_task_from_pid(int pid) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, u64 value) __ksym;
+
+__u32 sig = 0, pid = 0, status = 0, signal_thread = 0, target_pid = 0;
static __always_inline int bpf_send_signal_test(void *ctx)
{
+ struct task_struct *target_task = NULL;
int ret;
+ u64 value;
- if (status != 0 || sig == 0 || pid == 0)
+ if (status != 0 || pid == 0)
return 0;
if ((bpf_get_current_pid_tgid() >> 32) == pid) {
- if (signal_thread)
- ret = bpf_send_signal_thread(sig);
- else
- ret = bpf_send_signal(sig);
+ if (target_pid) {
+ target_task = bpf_task_from_pid(target_pid);
+ if (!target_task)
+ return 0;
+ value = 8;
+ }
+
+ if (signal_thread) {
+ if (target_pid)
+ ret = bpf_send_signal_task(target_task, sig, PIDTYPE_PID, value);
+ else
+ ret = bpf_send_signal_thread(sig);
+ } else {
+ if (target_pid)
+ ret = bpf_send_signal_task(target_task, sig, PIDTYPE_TGID, value);
+ else
+ ret = bpf_send_signal(sig);
+ }
if (ret == 0)
status = 1;
}
+ if (target_task)
+ bpf_task_release(target_task);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
new file mode 100644
index 000000000000..6a612cf168d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+
+const char xattr_foo[] = "security.bpf.foo";
+const char xattr_bar[] = "security.bpf.bar";
+static const char xattr_selinux[] = "security.selinux";
+char value_bar[] = "world";
+char read_value[32];
+
+bool set_security_bpf_bar_success;
+bool remove_security_bpf_bar_success;
+bool set_security_selinux_fail;
+bool remove_security_selinux_fail;
+
+char name_buf[32];
+
+static inline bool name_match_foo(const char *name)
+{
+ bpf_probe_read_kernel(name_buf, sizeof(name_buf), name);
+
+ return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo);
+}
+
+/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
+
+bool locked_set_security_bpf_bar_success;
+bool locked_remove_security_bpf_bar_success;
+bool locked_set_security_selinux_fail;
+bool locked_remove_security_selinux_fail;
+
+/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked.
+ * It not necessary to differentiate the _locked version and the
+ * not-_locked version in the BPF program. The verifier will fix them up
+ * properly.
+ */
+SEC("lsm.s/inode_setxattr")
+int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ locked_set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ locked_set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ locked_remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ locked_remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
new file mode 100644
index 000000000000..34b30e2603f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "err.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define MAX_SIG_SIZE 1024
+
+/* By default, "fsverity sign" signs a file with fsverity_formatted_digest
+ * of the file. fsverity_formatted_digest on the kernel side is only used
+ * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not
+ * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have
+ * fsverity_formatted_digest. In this test, we intentionally avoid using
+ * fsverity_formatted_digest.
+ *
+ * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by
+ * fsverity_digest. We use a char array of size fsverity_formatted_digest
+ * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space,
+ * and the rest of it is filled by bpf_get_fsverity_digest.
+ *
+ * Note that, generating signatures based on fsverity_formatted_digest is
+ * the design choice of this selftest (and "fsverity sign"). With BPF
+ * LSM, we have the flexibility to generate signature based on other data
+ * sets, for example, fsverity_digest or only the digest[] part of it.
+ */
+#define MAGIC_SIZE 8
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+
+__u32 monitored_pid;
+char sig[MAX_SIG_SIZE];
+__u32 sig_size;
+__s32 user_keyring_serial;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr, sig_ptr;
+ struct bpf_key *trusted_keyring;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* digest_ptr points to fsverity_digest */
+ bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr);
+
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ /* No verity, allow access */
+ if (ret < 0)
+ return 0;
+
+ /* Move digest_ptr to fsverity_formatted_digest */
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+
+ /* Read signature from xattr */
+ bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr);
+ ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr);
+ /* No signature, reject access */
+ if (ret < 0)
+ return -EPERM;
+
+ trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+ if (!trusted_keyring)
+ return -ENOENT;
+
+ /* Verify signature */
+ ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+
+ set_if_not_errno_or_zero(ret, -EFAULT);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h
new file mode 100644
index 000000000000..5d3a7ec36780
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_siphash.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_SIPHASH_H
+#define _TEST_SIPHASH_H
+
+/* include/linux/bitops.h */
+static inline u64 rol64(u64 word, unsigned int shift)
+{
+ return (word << (shift & 63)) | (word >> ((-shift) & 63));
+}
+
+/* include/linux/siphash.h */
+#define SIPHASH_PERMUTATION(a, b, c, d) ( \
+ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
+ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
+ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
+ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
+
+#define SIPHASH_CONST_0 0x736f6d6570736575ULL
+#define SIPHASH_CONST_1 0x646f72616e646f6dULL
+#define SIPHASH_CONST_2 0x6c7967656e657261ULL
+#define SIPHASH_CONST_3 0x7465646279746573ULL
+
+/* lib/siphash.c */
+#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+#define PREAMBLE(len) \
+ u64 v0 = SIPHASH_CONST_0; \
+ u64 v1 = SIPHASH_CONST_1; \
+ u64 v2 = SIPHASH_CONST_2; \
+ u64 v3 = SIPHASH_CONST_3; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 1ecd987005d2..3079244c7f96 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -15,7 +15,18 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#if defined(IPROUTE2_HAVE_LIBBPF)
+/* Use a new-style map definition. */
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, __u64);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+ __uint(max_entries, 1);
+} server_map SEC(".maps");
+#else
/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
#define PIN_GLOBAL_NS 2
@@ -35,8 +46,8 @@ struct {
.max_elem = 1,
.pinning = PIN_GLOBAL_NS,
};
+#endif
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -47,7 +58,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
void *data = (void *)(long)skb->data;
struct bpf_sock_tuple *result;
struct ethhdr *eth;
- __u64 tuple_len;
__u8 proto = 0;
__u64 ihl_len;
@@ -84,13 +94,13 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
return NULL;
*tcp = (proto == IPPROTO_TCP);
+ __sink(ihl_len);
return result;
}
static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -122,7 +132,6 @@ assign:
static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -159,13 +168,12 @@ assign:
return ret;
}
-SEC("classifier/sk_assign_test")
+SEC("tc")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
- struct bpf_sock_tuple *tuple, ln = {0};
+ struct bpf_sock_tuple *tuple;
bool ipv4 = false;
bool tcp = false;
- int tuple_len;
int ret = 0;
tuple = get_tuple(skb, &ipv4, &tcp);
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
new file mode 100644
index 000000000000..dcf46adfda04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define IPROUTE2_HAVE_LIBBPF
+#include "test_sk_assign.c"
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 1032b292af5b..71f844b9b902 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -64,36 +64,48 @@ static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
-SEC("sk_lookup/lookup_pass")
+SEC("sk_lookup")
int lookup_pass(struct bpf_sk_lookup *ctx)
{
return SK_PASS;
}
-SEC("sk_lookup/lookup_drop")
+SEC("sk_lookup")
int lookup_drop(struct bpf_sk_lookup *ctx)
{
return SK_DROP;
}
-SEC("sk_reuseport/reuse_pass")
+SEC("sk_lookup")
+int check_ifindex(struct bpf_sk_lookup *ctx)
+{
+ if (ctx->ingress_ifindex == 1)
+ return SK_DROP;
+ return SK_PASS;
+}
+
+SEC("sk_reuseport")
int reuseport_pass(struct sk_reuseport_md *ctx)
{
return SK_PASS;
}
-SEC("sk_reuseport/reuse_drop")
+SEC("sk_reuseport")
int reuseport_drop(struct sk_reuseport_md *ctx)
{
return SK_DROP;
}
/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
-SEC("sk_lookup/redir_port")
+SEC("sk_lookup")
int redir_port(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -112,7 +124,7 @@ int redir_port(struct bpf_sk_lookup *ctx)
}
/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
-SEC("sk_lookup/redir_ip4")
+SEC("sk_lookup")
int redir_ip4(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -135,7 +147,7 @@ int redir_ip4(struct bpf_sk_lookup *ctx)
}
/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
-SEC("sk_lookup/redir_ip6")
+SEC("sk_lookup")
int redir_ip6(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -160,7 +172,7 @@ int redir_ip6(struct bpf_sk_lookup *ctx)
return err ? SK_DROP : SK_PASS;
}
-SEC("sk_lookup/select_sock_a")
+SEC("sk_lookup")
int select_sock_a(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -175,7 +187,7 @@ int select_sock_a(struct bpf_sk_lookup *ctx)
return err ? SK_DROP : SK_PASS;
}
-SEC("sk_lookup/select_sock_a_no_reuseport")
+SEC("sk_lookup")
int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -190,7 +202,7 @@ int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
return err ? SK_DROP : SK_PASS;
}
-SEC("sk_reuseport/select_sock_b")
+SEC("sk_reuseport")
int select_sock_b(struct sk_reuseport_md *ctx)
{
__u32 key = KEY_SERVER_B;
@@ -201,7 +213,7 @@ int select_sock_b(struct sk_reuseport_md *ctx)
}
/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
-SEC("sk_lookup/sk_assign_eexist")
+SEC("sk_lookup")
int sk_assign_eexist(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -234,7 +246,7 @@ out:
}
/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
-SEC("sk_lookup/sk_assign_replace_flag")
+SEC("sk_lookup")
int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -266,7 +278,7 @@ out:
}
/* Check that bpf_sk_assign(sk=NULL) is accepted. */
-SEC("sk_lookup/sk_assign_null")
+SEC("sk_lookup")
int sk_assign_null(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk = NULL;
@@ -309,7 +321,7 @@ out:
}
/* Check that selected sk is accessible through context. */
-SEC("sk_lookup/access_ctx_sk")
+SEC("sk_lookup")
int access_ctx_sk(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk1 = NULL, *sk2 = NULL;
@@ -375,11 +387,11 @@ out:
* are not covered because they give bogus results, that is the
* verifier ignores the offset.
*/
-SEC("sk_lookup/ctx_narrow_access")
+SEC("sk_lookup")
int ctx_narrow_access(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
- int err, family;
+ __u32 val_u32;
bool v4;
v4 = (ctx->family == AF_INET);
@@ -398,11 +410,22 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
return SK_DROP;
- /* Narrow loads from remote_port field. Expect non-0 value. */
- if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
- LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+ /* Narrow loads from remote_port field. Expect SRC_PORT. */
+ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
- if (LSW(ctx->remote_port, 0) == 0)
+
+ /*
+ * NOTE: 4-byte load from bpf_sk_lookup at remote_port offset
+ * is quirky. It gets rewritten by the access converter to a
+ * 2-byte load for backward compatibility. Treating the load
+ * result as a be16 value makes the code portable across
+ * little- and big-endian platforms.
+ */
+ val_u32 = *(__u32 *)&ctx->remote_port;
+ if (val_u32 != SRC_PORT)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
@@ -415,11 +438,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv4 fields */
if (v4) {
- /* Expect non-0.0.0.0 in remote_ip4 */
- if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
- LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+ /* Expect SRC_IP4 in remote_ip4 */
+ if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+ LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+ LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+ LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+ if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
@@ -448,20 +474,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv6 fields */
if (!v4) {
- /* Expect non-:: IP in remote_ip6 */
- if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
- LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
- LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
- LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
- LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
- LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
- LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
- LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+ /* Expect SRC_IP6 in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
- LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
- LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
- LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+ if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
@@ -533,7 +571,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
}
/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
-SEC("sk_lookup/sk_assign_esocknosupport")
+SEC("sk_lookup")
int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
{
struct bpf_sock *sk;
@@ -558,28 +596,28 @@ out:
return ret;
}
-SEC("sk_lookup/multi_prog_pass1")
+SEC("sk_lookup")
int multi_prog_pass1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
-SEC("sk_lookup/multi_prog_pass2")
+SEC("sk_lookup")
int multi_prog_pass2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
-SEC("sk_lookup/multi_prog_drop1")
+SEC("sk_lookup")
int multi_prog_drop1(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_DROP;
}
-SEC("sk_lookup/multi_prog_drop2")
+SEC("sk_lookup")
int multi_prog_drop2(struct bpf_sk_lookup *ctx)
{
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
@@ -603,25 +641,20 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
return SK_PASS;
}
-SEC("sk_lookup/multi_prog_redir1")
+SEC("sk_lookup")
int multi_prog_redir1(struct bpf_sk_lookup *ctx)
{
- int ret;
-
- ret = select_server_a(ctx);
+ (void)select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
-SEC("sk_lookup/multi_prog_redir2")
+SEC("sk_lookup")
int multi_prog_redir2(struct bpf_sk_lookup *ctx)
{
- int ret;
-
- ret = select_server_a(ctx);
+ (void)select_server_a(ctx);
bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
return SK_PASS;
}
char _license[] SEC("license") = "Dual BSD/GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e83d0b48d80c..e9efc3263022 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -15,7 +15,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -24,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
bool *ipv4)
{
struct bpf_sock_tuple *result;
+ __u64 ihl_len = 0;
__u8 proto = 0;
- __u64 ihl_len;
if (eth_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(data + nh_off);
@@ -53,8 +52,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
return result;
}
-SEC("classifier/sk_lookup_success")
-int bpf_sk_lookup_test0(struct __sk_buff *skb)
+SEC("?tc")
+int sk_lookup_success(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
@@ -79,8 +78,8 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
}
-SEC("classifier/sk_lookup_success_simple")
-int bpf_sk_lookup_test1(struct __sk_buff *skb)
+SEC("?tc")
+int sk_lookup_success_simple(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -91,8 +90,8 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_use_after_free")
-int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+SEC("?tc")
+int err_use_after_free(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -106,12 +105,11 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
-int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+SEC("?tc")
+int err_modify_sk_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) {
@@ -121,12 +119,11 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
-int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+SEC("?tc")
+int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
sk += 1;
@@ -135,8 +132,8 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_no_release")
-int bpf_sk_lookup_test2(struct __sk_buff *skb)
+SEC("?tc")
+int err_no_release(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -144,8 +141,8 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_twice")
-int bpf_sk_lookup_test3(struct __sk_buff *skb)
+SEC("?tc")
+int err_release_twice(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -156,8 +153,8 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_unchecked")
-int bpf_sk_lookup_test4(struct __sk_buff *skb)
+SEC("?tc")
+int err_release_unchecked(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -173,8 +170,8 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
-int bpf_sk_lookup_test5(struct __sk_buff *skb)
+SEC("?tc")
+int err_no_release_subcall(struct __sk_buff *skb)
{
lookup_no_release(skb);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 8e94e5c080aa..40531e56776e 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -68,7 +68,7 @@ static void set_task_info(struct sock *sk)
}
SEC("fentry/inet_csk_listen_start")
-int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog)
+int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk)
{
set_task_info(sk);
@@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk)
}
SEC("fexit/inet_csk_accept")
-int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg,
struct sock *accepted_sk)
{
set_task_info(accepted_sk);
@@ -92,4 +92,20 @@ int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
return 0;
}
+SEC("tp_btf/tcp_retransmit_synack")
+int BPF_PROG(tcp_retransmit_synack, struct sock* sk, struct request_sock* req)
+{
+ /* load only test */
+ bpf_sk_storage_get(&sk_stg_map, sk, 0, 0);
+ bpf_sk_storage_get(&sk_stg_map, req->sk, 0, 0);
+ return 0;
+}
+
+SEC("tp_btf/tcp_bad_csum")
+int BPF_PROG(tcp_bad_csum, struct sk_buff* skb)
+{
+ bpf_sk_storage_get(&sk_stg_map, skb->sk, 0, 0);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
deleted file mode 100644
index 552f2090665c..000000000000
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-
-#include <string.h>
-
-#include <bpf/bpf_helpers.h>
-
-#define NUM_CGROUP_LEVELS 4
-
-struct bpf_map_def SEC("maps") cgroup_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = NUM_CGROUP_LEVELS,
-};
-
-static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
-{
- __u64 id;
-
- /* [1] &level passed to external function that may change it, it's
- * incompatible with loop unroll.
- */
- id = bpf_skb_ancestor_cgroup_id(skb, level);
- bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
-}
-
-SEC("cgroup_id_logger")
-int log_cgroup_id(struct __sk_buff *skb)
-{
- /* Loop unroll can't be used here due to [1]. Unrolling manually.
- * Number of calls should be in sync with NUM_CGROUP_LEVELS.
- */
- log_nth_level(skb, 0);
- log_nth_level(skb, 1);
- log_nth_level(skb, 2);
- log_nth_level(skb, 3);
-
- return TC_ACT_OK;
-}
-
-int _version SEC("version") = 1;
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index b02ea589ce7e..a724a70c6700 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -3,13 +3,14 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
-SEC("skb_ctx")
+SEC("tc")
int process(struct __sk_buff *skb)
{
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 5; i++) {
if (skb->cb[i] != i + 1)
return 1;
@@ -25,6 +26,12 @@ int process(struct __sk_buff *skb)
return 1;
if (skb->gso_size != 10)
return 1;
+ if (skb->ingress_ifindex != 11)
+ return 1;
+ if (skb->ifindex != 1)
+ return 1;
+ if (skb->hwtstamp != 11)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
index bb3fbf1a29e3..507215791c5b 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -14,7 +14,7 @@ struct {
char _license[] SEC("license") = "GPL";
-SEC("classifier/test_skb_helpers")
+SEC("tc")
int test_skb_helpers(struct __sk_buff *skb)
{
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
new file mode 100644
index 000000000000..4cfa42aa9436
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+const volatile pid_t my_pid = 0;
+char path[256] = {};
+
+SEC("fentry/unix_listen")
+int BPF_PROG(unix_listen, struct socket *sock, int backlog)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ struct unix_sock *unix_sk;
+ int i, len;
+
+ if (pid != my_pid)
+ return 0;
+
+ unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk);
+ if (!unix_sk)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ len = unix_sk->addr->len - sizeof(short);
+ path[0] = '@';
+ for (i = 1; i < len; i++) {
+ if (i >= (int)sizeof(struct sockaddr_un))
+ break;
+
+ path[i] = unix_sk->addr->name->sun_path[i];
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 374ccef704e1..adece9f91f58 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -5,6 +5,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#define __read_mostly SEC(".data.read_mostly")
+
struct s {
int a;
long long b;
@@ -38,10 +40,37 @@ extern int LINUX_KERNEL_VERSION __kconfig;
bool bpf_syscall = 0;
int kern_ver = 0;
+struct s out5 = {};
+
+
+const volatile int in_dynarr_sz SEC(".rodata.dyn");
+const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 };
+
+int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 };
+
+int read_mostly_var __read_mostly;
+int out_mostly_var;
+
+char huge_arr[16 * 1024 * 1024];
+
+/* non-mmapable custom .data section */
+
+struct my_value { int x, y, z; };
+
+__hidden int zero_key SEC(".data.non_mmapable");
+static struct my_value zero_value SEC(".data.non_mmapable");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct my_value);
+ __uint(max_entries, 1);
+} my_map SEC(".maps");
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
- static volatile struct s out5;
+ int i;
out1 = in1;
out2 = in2;
@@ -53,6 +82,16 @@ int handler(const void *ctx)
bpf_syscall = CONFIG_BPF_SYSCALL;
kern_ver = LINUX_KERNEL_VERSION;
+ for (i = 0; i < in_dynarr_sz; i++)
+ out_dynarr[i] = in_dynarr[i];
+
+ out_mostly_var = read_mostly_var;
+
+ huge_arr[sizeof(huge_arr) - 1] = 123;
+
+ /* make sure zero_key and zero_value are not optimized out */
+ bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
index 45e8fc75a739..996b177324ba 100644
--- a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -24,8 +24,7 @@ struct {
__type(value, __u64);
} socket_storage SEC(".maps");
-SEC("sk_msg")
-int prog_msg_verdict(struct sk_msg_md *msg)
+static int prog_msg_verdict_common(struct sk_msg_md *msg)
{
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
int verdict = SK_PASS;
@@ -44,4 +43,28 @@ int prog_msg_verdict(struct sk_msg_md *msg)
return verdict;
}
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone2(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..8fda07544023
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 pid = 0;
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ /* Convenient values to pretty-print */
+ const __u8 ex_ipv4[] = {127, 0, 0, 1};
+ const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static const char str1[] = "str1";
+ static const char longstr[] = "longstr";
+
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Integer types */
+ num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
+ "%d %u %x %li %llu %lX",
+ -8, 9, 150, -424242, 1337, 0xDABBAD00);
+ /* IP addresses */
+ ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+ &ex_ipv4, &ex_ipv6);
+ /* Symbol lookup formatting */
+ sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB",
+ &schedule, &schedule, &schedule);
+ /* Kernel pointers */
+ addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+ 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
+ /* Overflow */
+ over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+ /* Padding of fixed width numbers */
+ pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+ /* No args */
+ noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+ /* No buffer */
+ nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..3095837334d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ unsigned long long arg = 42;
+
+ bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..196844be349c 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,11 +7,11 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
@@ -41,6 +41,10 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct tcp_sock {
+ __u32 lsndtime;
+} __attribute__((preserve_access_index));
+
struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {};
@@ -113,14 +117,14 @@ static void tpcpy(struct bpf_tcp_sock *dst,
#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
return CG_OK; \
})
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
- struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
+ struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
@@ -133,11 +137,11 @@ int egress_read_sock_fields(struct __sk_buff *skb)
if (!sk)
RET_LOG();
- /* Not the testing egress traffic or
- * TCP_LISTEN (10) socket will be copied at the ingress side.
+ /* Not testing the egress traffic or the listening socket,
+ * which are covered by the cgroup_skb/ingress test program.
*/
if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
- sk->state == 10)
+ sk->state == BPF_TCP_LISTEN)
return CG_OK;
if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
@@ -231,8 +235,8 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
return CG_OK;
- /* Only interested in TCP_LISTEN */
- if (sk->state != 10)
+ /* Only interested in the listening socket */
+ if (sk->state != BPF_TCP_LISTEN)
return CG_OK;
/* It must be a fullsock for cgroup_skb/ingress prog */
@@ -250,4 +254,57 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
return CG_OK;
}
+/*
+ * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
+ * gets rewritten by the access converter to a 2-byte load for
+ * backward compatibility. Treating the load result as a be16 value
+ * makes the code portable across little- and big-endian platforms.
+ */
+static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
+{
+ __u32 *word = (__u32 *)&sk->dst_port;
+ return word[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
+{
+ __u16 *half;
+
+ asm volatile ("");
+ half = (__u16 *)&sk->dst_port;
+ return half[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
+{
+ __u8 *byte = (__u8 *)&sk->dst_port;
+ return byte[0] == 0xca && byte[1] == 0xfe;
+}
+
+SEC("cgroup_skb/egress")
+int read_sk_dst_port(struct __sk_buff *skb)
+{
+ __u32 linum, linum_idx;
+ struct bpf_sock *sk;
+
+ linum_idx = READ_SK_DST_PORT_LINUM_IDX;
+
+ sk = skb->sk;
+ if (!sk)
+ RET_LOG();
+
+ /* Ignore everything but the SYN from the client socket */
+ if (sk->state != BPF_TCP_SYN_SENT)
+ return CG_OK;
+
+ if (!sk_dst_port__load_word(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_half(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_byte(sk))
+ RET_LOG();
+
+ return CG_OK;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
new file mode 100644
index 000000000000..1c7941a4ad00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 ByteDance */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+long change_tail_ret = 1;
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ char *data, *data_end;
+
+ bpf_skb_pull_data(skb, 1);
+ data = (char *)(unsigned long)skb->data;
+ data_end = (char *)(unsigned long)skb->data_end;
+
+ if (data + 1 > data_end)
+ return SK_PASS;
+
+ if (data[0] == 'T') { /* Trim the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0);
+ return SK_PASS;
+ } else if (data[0] == 'G') { /* Grow the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
+ return SK_PASS;
+ } else if (data[0] == 'E') { /* Error */
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
+ return SK_PASS;
+ }
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
new file mode 100644
index 000000000000..29314805ce42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 1858435de7aa..f48f85f1bd70 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -14,6 +14,7 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
/* Sockmap sample program connects a client and a backend together
* using cgroups.
@@ -91,7 +92,7 @@ struct {
__uint(value_size, sizeof(int));
} tls_sock_map SEC(".maps");
-SEC("sk_skb1")
+SEC("sk_skb/stream_parser")
int bpf_prog1(struct __sk_buff *skb)
{
int *f, two = 2;
@@ -103,7 +104,7 @@ int bpf_prog1(struct __sk_buff *skb)
return skb->len;
}
-SEC("sk_skb2")
+SEC("sk_skb/stream_verdict")
int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
@@ -111,12 +112,15 @@ int bpf_prog2(struct __sk_buff *skb)
int len, *f, ret, zero = 0;
__u64 flags = 0;
+ __sink(rport);
if (lport == 10000)
ret = 10;
else
ret = 1;
len = (__u32)skb->data_end - (__u32)skb->data;
+ __sink(len);
+
f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
@@ -147,7 +151,7 @@ static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
memcpy(c + offset, "PASS", 4);
}
-SEC("sk_skb3")
+SEC("sk_skb/stream_verdict")
int bpf_prog3(struct __sk_buff *skb)
{
int err, *f, ret = SK_PASS;
@@ -173,14 +177,10 @@ int bpf_prog3(struct __sk_buff *skb)
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
#endif
}
- f = bpf_map_lookup_elem(&sock_skb_opts, &one);
- if (f && *f)
- ret = SK_DROP;
err = bpf_skb_adjust_room(skb, 4, 0, 0);
if (err)
return SK_DROP;
bpf_write_pass(skb, 13);
-tls_out:
return ret;
}
@@ -188,8 +188,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
- int op, err = 0, index, key, ret;
-
+ int op, ret;
op = (int) skops->op;
@@ -201,10 +200,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -216,10 +215,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -231,11 +230,11 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
return 0;
}
-SEC("sk_msg1")
+SEC("sk_msg")
int bpf_prog4(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int *start, *end, *start_push, *end_push, *start_pop, *pop;
+ int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -249,8 +248,11 @@ int bpf_prog4(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
@@ -258,11 +260,12 @@ int bpf_prog4(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg2")
+SEC("sk_msg")
int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+ int err = 0;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -279,8 +282,11 @@ int bpf_prog6(struct sk_msg_md *msg)
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
@@ -299,7 +305,7 @@ int bpf_prog6(struct sk_msg_md *msg)
#endif
}
-SEC("sk_msg3")
+SEC("sk_msg")
int bpf_prog8(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -314,9 +320,14 @@ int bpf_prog8(struct sk_msg_md *msg)
} else {
return SK_DROP;
}
+
+ __sink(data_end);
+ __sink(data);
+
return SK_PASS;
}
-SEC("sk_msg4")
+
+SEC("sk_msg")
int bpf_prog9(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -334,11 +345,11 @@ int bpf_prog9(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg5")
+SEC("sk_msg")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -352,8 +363,11 @@ int bpf_prog10(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_PASS;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
@@ -361,5 +375,4 @@ int bpf_prog10(struct sk_msg_md *msg)
return SK_DROP;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..83df4919c224
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+int pop_start;
+int pop_end;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+ if (cork_byte > 0)
+ bpf_msg_cork_bytes(msg, cork_byte);
+ if (push_start > 0 && push_end > 0)
+ bpf_msg_push_data(msg, push_start, push_end, 0);
+ if (pop_start >= 0 && pop_end > 0)
+ bpf_msg_pop_data(msg, pop_start, pop_end, 0);
+
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+ int two = 2;
+
+ bpf_msg_apply_bytes(msg, apply_bytes);
+ return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..b7250eb9c30c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -15,6 +15,13 @@ struct {
} sock_map SEC(".maps");
struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__uint(max_entries, 2);
__type(key, __u32);
@@ -28,16 +35,31 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
-static volatile bool test_sockmap; /* toggled by user-space */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} parser_map SEC(".maps");
+
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
{
+ int *value;
+ __u32 key = 0;
+
+ value = bpf_map_lookup_elem(&parser_map, &key);
+ if (value && *value)
+ return *value;
+
return skb->len;
}
SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;
@@ -55,6 +77,27 @@ int prog_skb_verdict(struct __sk_buff *skb)
return verdict;
}
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
@@ -94,5 +137,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse)
return verdict;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
new file mode 100644
index 000000000000..69aacc96db36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -0,0 +1,47 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+int clone_called;
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_clone(struct __sk_buff *skb)
+{
+ clone_called = 1;
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644
index 000000000000..9d58d61c0dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_map, \
+ struct sk_msg_md * : bpf_msg_redirect_map \
+ )((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_hash, \
+ struct sk_msg_md * : bpf_msg_redirect_hash \
+ )((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param) \
+SEC("sk_" XSTR(__type)) \
+int prog_ ## __type ## _verdict(__param data) \
+{ \
+ unsigned int *count; \
+ int verdict; \
+ \
+ if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
+ verdict = redirect_map(data); \
+ else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
+ verdict = redirect_hash(data); \
+ else \
+ verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
+ \
+ count = bpf_map_lookup_elem(&verdict_map, &verdict); \
+ if (count) \
+ (*count)++; \
+ \
+ return verdict; \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..d25b0bb30fc0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ __u32 one = 1;
+
+ if (skb->len > verdict_max_size)
+ return SK_PASS;
+
+ return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+ /* agreement with the test program on a 4-byte size header
+ * and 6-byte body.
+ */
+ if (skb->len < 4) {
+ /* need more header to determine full length */
+ return 0;
+ }
+ /* return full length decoded from header.
+ * the return value may be larger than skb->len which
+ * means framework must wait body coming.
+ */
+ return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
index 9d0c9f28cab2..6d64ea536e3d 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_update.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -24,7 +24,7 @@ struct {
__type(value, __u64);
} dst_sock_hash SEC(".maps");
-SEC("classifier/copy_sock_map")
+SEC("tc")
int copy_sock_map(void *ctx)
{
struct bpf_sock *sk;
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 0d31a3b3505f..d8d77bdffd3d 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct hmap_elem {
volatile int cnt;
@@ -45,8 +46,8 @@ struct {
#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
-SEC("spin_lock_demo")
-int bpf_sping_lock_test(struct __sk_buff *skb)
+SEC("cgroup_skb/ingress")
+int bpf_spin_lock_test(struct __sk_buff *skb)
{
volatile int credit = 0, max_credit = 100, pkt_len = 64;
struct hmap_elem zero = {}, *val;
@@ -89,6 +90,8 @@ int bpf_sping_lock_test(struct __sk_buff *skb)
credit = q->credit;
bpf_spin_unlock(&q->lock);
+ __sink(credit);
+
/* spin_lock in cgroup local storage */
cls = bpf_get_local_storage(&cls_map, 0);
bpf_spin_lock(&cls->lock);
@@ -98,4 +101,69 @@ int bpf_sping_lock_test(struct __sk_buff *skb)
err:
return err;
}
+
+struct bpf_spin_lock lockA __hidden SEC(".data.A");
+
+__noinline
+static int static_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_lock(&lockA);
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_unlock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret + ctx->len;
+}
+
+SEC("tc")
+int lock_static_subprog_call(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_lock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ ret = static_subprog_lock(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_unlock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ ret = static_subprog_unlock(ctx);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
new file mode 100644
index 000000000000..f678ee6bd7ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct foo {
+ struct bpf_spin_lock lock;
+ int data;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct foo);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &array_map,
+ },
+};
+
+static struct bpf_spin_lock lockA SEC(".data.A");
+static struct bpf_spin_lock lockB SEC(".data.B");
+
+SEC("?tc")
+int lock_id_kptr_preserve(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_global_zero(void *ctx)
+{
+ bpf_this_cpu_ptr(&lockA);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+
+ f = bpf_map_lookup_elem(&array_map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_innermapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f = bpf_map_lookup_elem(map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+#define CHECK(test, A, B) \
+ SEC("?tc") \
+ int lock_id_mismatch_##test(void *ctx) \
+ { \
+ struct foo *f1, *f2, *v, *iv; \
+ int key = 0; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &key); \
+ if (!map) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &key); \
+ if (!iv) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &key); \
+ if (!v) \
+ return 0; \
+ f1 = bpf_obj_new(typeof(*f1)); \
+ if (!f1) \
+ return 0; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ bpf_spin_lock(A); \
+ bpf_spin_unlock(B); \
+ return 0; \
+ }
+
+CHECK(kptr_kptr, &f1->lock, &f2->lock);
+CHECK(kptr_global, &f1->lock, &lockA);
+CHECK(kptr_mapval, &f1->lock, &v->lock);
+CHECK(kptr_innermapval, &f1->lock, &iv->lock);
+
+CHECK(global_global, &lockA, &lockB);
+CHECK(global_kptr, &lockA, &f1->lock);
+CHECK(global_mapval, &lockA, &v->lock);
+CHECK(global_innermapval, &lockA, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_mapval_mapval(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+
+ f1 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(mapval_kptr, &v->lock, &f1->lock);
+CHECK(mapval_global, &v->lock, &lockB);
+CHECK(mapval_innermapval, &v->lock, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval1(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval2(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(innermapval_kptr, &iv->lock, &f1->lock);
+CHECK(innermapval_global, &iv->lock, &lockA);
+CHECK(innermapval_mapval, &iv->lock, &v->lock);
+
+#undef CHECK
+
+__noinline
+int global_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ ret += ctx->protocol;
+ return ret + ctx->mark;
+}
+
+__noinline
+static int static_subprog_call_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len + global_subprog(ctx);
+}
+
+SEC("?tc")
+int lock_global_subprog_call1(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?tc")
+int lock_global_subprog_call2(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog_call_global(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+int __noinline
+global_subprog_int(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog_int(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_helper_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_kfunc_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog_calling_sleepable_global(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 0cf0134631b4..0c4426592a26 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -28,8 +28,8 @@ struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 128);
__uint(map_flags, BPF_F_STACK_BUILD_ID);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(stack_trace_t));
+ __type(key, __u32);
+ __type(value, stack_trace_t);
} stackmap SEC(".maps");
struct {
@@ -39,16 +39,8 @@ struct {
__type(value, stack_trace_t);
} stack_amap SEC(".maps");
-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct random_urandom_args {
- unsigned long long pad;
- int got_bits;
- int pool_left;
- int input_left;
-};
-
-SEC("tracepoint/random/urandom_read")
-int oncpu(struct random_urandom_args *args)
+SEC("kprobe/urandom_read_iter")
+int oncpu(struct pt_regs *args)
{
__u32 max_len = sizeof(struct bpf_stack_build_id)
* PERF_MAX_STACK_DEPTH;
@@ -73,4 +65,3 @@ int oncpu(struct random_urandom_args *args)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
new file mode 100644
index 000000000000..4f0b612e1661
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
+int var1 = -1;
+/* 4-byte aligned .rodata */
+const volatile int rovar1;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ var1 = subprog(rovar1) + static_var1 + static_var2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+int VERSION SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
new file mode 100644
index 000000000000..766ebd502a60
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
+int var2 = -1;
+/* 8-byte aligned .rodata */
+const volatile long rovar2;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 3;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ var2 = subprog(rovar2) + static_var1 + static_var2;
+
+ return 0;
+}
+
+/* different name and/or type of the variable doesn't matter */
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index d3c5673c0218..a8d602d7c88a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -4,8 +4,18 @@
const char LICENSE[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
__noinline int sub1(int x)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return x + 1;
}
@@ -23,6 +33,9 @@ static __noinline int sub3(int z)
static __noinline int sub4(int w)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return w + sub3(5) + sub1(6);
}
@@ -34,7 +47,7 @@ static __noinline int sub5(int v)
return sub1(v) - 1; /* compensates sub1()'s + 1 */
}
-/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+/* unfortunately verifier rejects `struct task_struct *t` as an unknown pointer
* type, so we need to accept pointer as integer and then cast it inside the
* function
*/
@@ -76,6 +89,11 @@ int prog2(void *ctx)
return 0;
}
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
/* prog3 has the same section name as prog1 */
SEC("raw_tp/sys_enter")
int prog3(void *ctx)
@@ -85,6 +103,9 @@ int prog3(void *ctx)
if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
return 1;
+ /* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */
+ bpf_loop(1, empty_callback, NULL, 0);
+
res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
new file mode 100644
index 000000000000..dcac69f5928a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_array SEC(".maps");
+
+unsigned int triggered;
+
+static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return 1;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
+{
+ *(volatile int *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
+{
+ *(volatile int *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
+{
+ *(volatile int *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton.c b/tools/testing/selftests/bpf/progs/test_subskeleton.c
new file mode 100644
index 000000000000..006417974372
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read, compiler may assume 0 otherwise */
+const volatile int rovar1;
+int out1;
+
+/* Override weak symbol in test_subskeleton_lib */
+int var5 = 5;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+extern int lib_routine(void);
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ (void) CONFIG_BPF_SYSCALL;
+
+ out1 = lib_routine() * rovar1;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
new file mode 100644
index 000000000000..ecfafe812c36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read */
+const volatile int var1;
+volatile int var2 = 1;
+struct {
+ int var3_1;
+ __s64 var3_2;
+} var3;
+int libout1;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+int var4[4];
+
+__weak int var5 SEC(".data");
+
+/* Fully contained within library extern-and-definition */
+extern int var6;
+
+int var7 SEC(".data.custom");
+
+int (*fn_ptr)(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+extern struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map2 SEC(".maps");
+
+int lib_routine(void)
+{
+ __u32 key = 1, value = 2;
+
+ (void) CONFIG_BPF_SYSCALL;
+ bpf_map_update_elem(&map2, &key, &value, BPF_ANY);
+
+ libout1 = var1 + var2 + var3.var3_1 + var3.var3_2 + var5 + var6;
+ return libout1;
+}
+
+SEC("perf_event")
+int lib_perf_handler(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
new file mode 100644
index 000000000000..80238486b7ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int var6 = 6;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map2 SEC(".maps");
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 553a282d816a..548660e299a5 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -9,9 +9,8 @@
#include <bpf/bpf_helpers.h>
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_compiler.h"
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
@@ -30,7 +29,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -59,7 +58,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index 2b64bc563a12..81249d119a8b 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -9,9 +9,8 @@
#include <bpf/bpf_helpers.h>
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_compiler.h"
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
@@ -30,7 +29,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +56,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 5489823c83fc..bbdd08764789 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -9,16 +9,15 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+#include "bpf_misc.h"
+
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
/* Max supported length of sysctl value string (pow2). */
#define MAX_VALUE_STR_LEN 0x40
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
@@ -31,7 +30,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +56,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c
new file mode 100644
index 000000000000..fffafc013044
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_local_data.bpf.h"
+
+struct tld_keys {
+ tld_key_t value0;
+ tld_key_t value1;
+ tld_key_t value2;
+ tld_key_t value_not_exist;
+};
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+int test_value0;
+int test_value1;
+struct test_tld_struct test_value2;
+
+SEC("syscall")
+int task_main(void *ctx)
+{
+ struct tld_object tld_obj;
+ struct test_tld_struct *struct_p;
+ struct task_struct *task;
+ int err, *int_p;
+
+ task = bpf_get_current_task_btf();
+ err = tld_object_init(task, &tld_obj);
+ if (err)
+ return 1;
+
+ int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int));
+ if (int_p)
+ test_value0 = *int_p;
+ else
+ return 2;
+
+ int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int));
+ if (int_p)
+ test_value1 = *int_p;
+ else
+ return 3;
+
+ struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct));
+ if (struct_p)
+ test_value2 = *struct_p;
+ else
+ return 4;
+
+ int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int));
+ if (int_p)
+ return 5;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..1926facba122
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define PT_REGS_SIZE sizeof(struct pt_regs)
+
+/*
+ * The kernel struct pt_regs isn't exported in its entirety to userspace.
+ * Pass it as an array to task_pt_regs.c
+ */
+char current_regs[PT_REGS_SIZE] = {};
+char ctx_regs[PT_REGS_SIZE] = {};
+int uprobe_res = 0;
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
+ return 0;
+ if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
+ return 0;
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
new file mode 100644
index 000000000000..0b74b8bd22e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+const volatile int local_pid;
+const volatile __u64 cgid;
+int remote_pid;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(tp_btf_run, struct task_struct *task, u64 clone_flags)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *acquired;
+
+ if (local_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ if (local_pid == acquired->tgid)
+ goto out;
+
+ cgrp = bpf_cgroup_from_id(cgid);
+ if (!cgrp)
+ goto out;
+
+ if (bpf_task_under_cgroup(acquired, cgrp))
+ remote_pid = acquired->tgid;
+
+out:
+ if (cgrp)
+ bpf_cgroup_release(cgrp);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *task;
+ int ret = 0;
+
+ task = bpf_get_current_task_btf();
+ if (local_pid != task->pid)
+ return 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ /* 1 is the root cgroup */
+ cgrp = bpf_cgroup_from_id(1);
+ if (!cgrp)
+ goto out;
+ if (!bpf_task_under_cgroup(task, cgrp))
+ ret = -1;
+ bpf_cgroup_release(cgrp);
+
+out:
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..ef7da419632a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("tc")
+int cls(struct __sk_buff *skb)
+{
+ return 0;
+}
+
+/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */
+SEC("tcx/ingress")
+int pkt_ptr(struct __sk_buff *skb)
+{
+ struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr);
+
+ if ((long)(iph + 1) > (long)skb->data_end)
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
new file mode 100644
index 000000000000..fcba8299f0bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
+long change_tail_ret = 1;
+
+static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = data;
+ struct iphdr *iph;
+
+ /* Verify Ethernet header */
+ if ((void *)(data + sizeof(*eth)) > data_end)
+ return NULL;
+
+ /* Skip Ethernet header to get to IP header */
+ iph = (void *)(data + sizeof(struct ethhdr));
+
+ /* Verify IP header */
+ if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end)
+ return NULL;
+
+ /* Basic IP header validation */
+ if (iph->version != 4) /* Only support IPv4 */
+ return NULL;
+
+ if (iph->ihl < 5) /* Minimum IP header length */
+ return NULL;
+
+ *ip_proto = iph->protocol;
+ return iph;
+}
+
+static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *hdr = (void *)iph;
+ struct udphdr *udp;
+
+ /* Calculate UDP header position */
+ udp = hdr + (iph->ihl * 4);
+ hdr = (void *)udp;
+
+ /* Verify UDP header bounds */
+ if ((void *)(hdr + sizeof(*udp)) > data_end)
+ return NULL;
+
+ return udp;
+}
+
+SEC("tc/ingress")
+int change_tail(struct __sk_buff *skb)
+{
+ int len = skb->len;
+ struct udphdr *udp;
+ struct iphdr *iph;
+ void *data_end;
+ char *payload;
+ int ip_proto;
+
+ bpf_skb_pull_data(skb, len);
+
+ data_end = (void *)(long)skb->data_end;
+ iph = parse_ip_header(skb, &ip_proto);
+ if (!iph)
+ return TCX_PASS;
+
+ if (ip_proto != IPPROTO_UDP)
+ return TCX_PASS;
+
+ udp = parse_udp_header(skb, iph);
+ if (!udp)
+ return TCX_PASS;
+
+ payload = (char *)udp + (sizeof(struct udphdr));
+ if (payload + 1 > (char *)data_end)
+ return TCX_PASS;
+
+ if (payload[0] == 'T') { /* Trim the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0);
+ if (!change_tail_ret)
+ bpf_skb_change_tail(skb, len, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'G') { /* Grow the packet */
+ change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0);
+ if (!change_tail_ret)
+ bpf_skb_change_tail(skb, len, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'E') { /* Error */
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
+ return TCX_PASS;
+ } else if (payload[0] == 'Z') { /* Zero */
+ change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
+ return TCX_PASS;
+ }
+ return TCX_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
new file mode 100644
index 000000000000..ca8e8734d901
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Meta
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
+ * | |
+ * ns_src | ns_fwd | ns_dst
+ *
+ * ns_src and ns_dst: ENDHOST namespace
+ * ns_fwd: Fowarding namespace
+ */
+
+#define ctx_ptr(field) (void *)(long)(field)
+
+#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
+#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
+
+#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
+#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
+#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
+
+enum {
+ INGRESS_FWDNS_P100,
+ INGRESS_FWDNS_P101,
+ EGRESS_FWDNS_P100,
+ EGRESS_FWDNS_P101,
+ INGRESS_ENDHOST,
+ EGRESS_ENDHOST,
+ SET_DTIME,
+ __MAX_CNT,
+};
+
+enum {
+ TCP_IP6_CLEAR_DTIME,
+ TCP_IP4,
+ TCP_IP6,
+ UDP_IP4,
+ UDP_IP6,
+ TCP_IP4_RT_FWD,
+ TCP_IP6_RT_FWD,
+ UDP_IP4_RT_FWD,
+ UDP_IP6_RT_FWD,
+ UKN_TEST,
+ __NR_TESTS,
+};
+
+enum {
+ SRC_NS = 1,
+ DST_NS,
+};
+
+__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
+__u32 errs[__NR_TESTS][__MAX_CNT] = {};
+__u32 test = 0;
+
+static void inc_dtimes(__u32 idx)
+{
+ if (test < __NR_TESTS)
+ dtimes[test][idx]++;
+ else
+ dtimes[UKN_TEST][idx]++;
+}
+
+static void inc_errs(__u32 idx)
+{
+ if (test < __NR_TESTS)
+ errs[test][idx]++;
+ else
+ errs[UKN_TEST][idx]++;
+}
+
+static int skb_proto(int type)
+{
+ return type & 0xff;
+}
+
+static int skb_ns(int type)
+{
+ return (type >> 8) & 0xff;
+}
+
+static bool fwdns_clear_dtime(void)
+{
+ return test == TCP_IP6_CLEAR_DTIME;
+}
+
+static bool bpf_fwd(void)
+{
+ return test < TCP_IP4_RT_FWD;
+}
+
+static __u8 get_proto(void)
+{
+ switch (test) {
+ case UDP_IP4:
+ case UDP_IP6:
+ case UDP_IP4_RT_FWD:
+ case UDP_IP6_RT_FWD:
+ return IPPROTO_UDP;
+ default:
+ return IPPROTO_TCP;
+ }
+}
+
+/* -1: parse error: TC_ACT_SHOT
+ * 0: not testing traffic: TC_ACT_OK
+ * >0: first byte is the inet_proto, second byte has the netns
+ * of the sender
+ */
+static int skb_get_type(struct __sk_buff *skb)
+{
+ __u16 dst_ns_port = __bpf_htons(50000 + test);
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u8 inet_proto = 0, ns = 0;
+ struct ipv6hdr *ip6h;
+ __u16 sport, dport;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ void *trans;
+
+ switch (skb->protocol) {
+ case __bpf_htons(ETH_P_IP):
+ iph = data + sizeof(struct ethhdr);
+ if (iph + 1 > data_end)
+ return -1;
+ if (iph->saddr == ip4_src)
+ ns = SRC_NS;
+ else if (iph->saddr == ip4_dst)
+ ns = DST_NS;
+ inet_proto = iph->protocol;
+ trans = iph + 1;
+ break;
+ case __bpf_htons(ETH_P_IPV6):
+ ip6h = data + sizeof(struct ethhdr);
+ if (ip6h + 1 > data_end)
+ return -1;
+ if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}}))
+ ns = SRC_NS;
+ else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}}))
+ ns = DST_NS;
+ inet_proto = ip6h->nexthdr;
+ trans = ip6h + 1;
+ break;
+ default:
+ return 0;
+ }
+
+ /* skb is not from src_ns or dst_ns.
+ * skb is not the testing IPPROTO.
+ */
+ if (!ns || inet_proto != get_proto())
+ return 0;
+
+ switch (inet_proto) {
+ case IPPROTO_TCP:
+ th = trans;
+ if (th + 1 > data_end)
+ return -1;
+ sport = th->source;
+ dport = th->dest;
+ break;
+ case IPPROTO_UDP:
+ uh = trans;
+ if (uh + 1 > data_end)
+ return -1;
+ sport = uh->source;
+ dport = uh->dest;
+ break;
+ default:
+ return 0;
+ }
+
+ /* The skb is the testing traffic */
+ if ((ns == SRC_NS && dport == dst_ns_port) ||
+ (ns == DST_NS && sport == dst_ns_port))
+ return (ns << 8 | inet_proto);
+
+ return 0;
+}
+
+/* format: direction@iface@netns
+ * egress@veth_(src|dst)@ns_(src|dst)
+ */
+SEC("tc")
+int egress_host(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ if (skb_proto(skb_type) == IPPROTO_TCP) {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
+ skb->tstamp)
+ inc_dtimes(EGRESS_ENDHOST);
+ else
+ inc_errs(EGRESS_ENDHOST);
+ } else if (skb_proto(skb_type) == IPPROTO_UDP) {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_TAI &&
+ skb->tstamp)
+ inc_dtimes(EGRESS_ENDHOST);
+ else
+ inc_errs(EGRESS_ENDHOST);
+ } else {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME &&
+ skb->tstamp)
+ inc_errs(EGRESS_ENDHOST);
+ }
+
+ skb->tstamp = EGRESS_ENDHOST_MAGIC;
+
+ return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)@ns_(src|dst) */
+SEC("tc")
+int ingress_host(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
+ skb->tstamp == EGRESS_FWDNS_MAGIC)
+ inc_dtimes(INGRESS_ENDHOST);
+ else
+ inc_errs(INGRESS_ENDHOST);
+
+ return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int ingress_fwdns_prio100(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ /* delivery_time is only available to the ingress
+ * if the tc-bpf checks the skb->tstamp_type.
+ */
+ if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
+ inc_errs(INGRESS_FWDNS_P100);
+
+ if (fwdns_clear_dtime())
+ skb->tstamp = 0;
+
+ return TC_ACT_UNSPEC;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int egress_fwdns_prio100(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ /* delivery_time is always available to egress even
+ * the tc-bpf did not use the tstamp_type.
+ */
+ if (skb->tstamp == INGRESS_FWDNS_MAGIC)
+ inc_dtimes(EGRESS_FWDNS_P100);
+ else
+ inc_errs(EGRESS_FWDNS_P100);
+
+ if (fwdns_clear_dtime())
+ skb->tstamp = 0;
+
+ return TC_ACT_UNSPEC;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int ingress_fwdns_prio101(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1 || !skb_type)
+ /* Should have handled in prio100 */
+ return TC_ACT_SHOT;
+
+ if (skb->tstamp_type) {
+ if (fwdns_clear_dtime() ||
+ (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC &&
+ skb->tstamp_type != BPF_SKB_CLOCK_TAI) ||
+ skb->tstamp != EGRESS_ENDHOST_MAGIC)
+ inc_errs(INGRESS_FWDNS_P101);
+ else
+ inc_dtimes(INGRESS_FWDNS_P101);
+ } else {
+ if (!fwdns_clear_dtime())
+ inc_errs(INGRESS_FWDNS_P101);
+ }
+
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
+ skb->tstamp = INGRESS_FWDNS_MAGIC;
+ } else {
+ if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+ BPF_SKB_CLOCK_MONOTONIC))
+ inc_errs(SET_DTIME);
+ }
+
+ if (skb_ns(skb_type) == SRC_NS)
+ return bpf_fwd() ?
+ bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
+ else
+ return bpf_fwd() ?
+ bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int egress_fwdns_prio101(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1 || !skb_type)
+ /* Should have handled in prio100 */
+ return TC_ACT_SHOT;
+
+ if (skb->tstamp_type) {
+ if (fwdns_clear_dtime() ||
+ skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC ||
+ skb->tstamp != INGRESS_FWDNS_MAGIC)
+ inc_errs(EGRESS_FWDNS_P101);
+ else
+ inc_dtimes(EGRESS_FWDNS_P101);
+ } else {
+ if (!fwdns_clear_dtime())
+ inc_errs(EGRESS_FWDNS_P101);
+ }
+
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
+ skb->tstamp = EGRESS_FWDNS_MAGIC;
+ } else {
+ if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
+ BPF_SKB_CLOCK_MONOTONIC))
+ inc_errs(SET_DTIME);
+ }
+
+ return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index bf28814bfde5..4f6f03122d61 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -14,22 +14,22 @@
#define TIME_HORIZON_NS (2000 * 1000 * 1000)
#define NS_PER_SEC 1000000000
#define ECN_HORIZON_NS 5000000
-#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(uint32_t),
- .value_size = sizeof(uint64_t),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, uint32_t);
+ __type(value, uint64_t);
+ __uint(max_entries, 1);
+} flow_map SEC(".maps");
+
+__uint64_t target_rate;
static inline int throttle_flow(struct __sk_buff *skb)
{
int key = 0;
uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
- uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
- THROTTLE_RATE_BPS;
+ uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / target_rate;
uint64_t now = bpf_ktime_get_ns();
uint64_t tstamp, next_tstamp = 0;
@@ -70,7 +70,7 @@ static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
if ((void *)(tcp + 1) > data_end)
return TC_ACT_SHOT;
- if (tcp->dest == bpf_htons(9000))
+ if (tcp->source == bpf_htons(9000))
return throttle_flow(skb);
return TC_ACT_OK;
@@ -99,7 +99,8 @@ static inline int handle_ipv4(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+SEC("tc")
+int tc_prog(struct __sk_buff *skb)
{
if (skb->protocol == bpf_htons(ETH_P_IP))
return handle_ipv4(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
new file mode 100644
index 000000000000..630f12e51b07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/stddef.h>
+#include <linux/if_packet.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+bool seen_tc1;
+bool seen_tc2;
+bool seen_tc3;
+bool seen_tc4;
+bool seen_tc5;
+bool seen_tc6;
+bool seen_tc7;
+bool seen_tc8;
+
+bool set_type;
+
+bool seen_eth;
+bool seen_host;
+bool seen_mcast;
+
+int mark, prio;
+unsigned short headroom, tailroom;
+
+SEC("tc/ingress")
+int tc1(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+ seen_host = skb->pkt_type == PACKET_HOST;
+ if (seen_host && set_type) {
+ eth.h_dest[0] = 4;
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0))
+ goto fail;
+ bpf_skb_change_type(skb, PACKET_MULTICAST);
+ }
+out:
+ seen_tc1 = true;
+fail:
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc2(struct __sk_buff *skb)
+{
+ seen_tc2 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc3(struct __sk_buff *skb)
+{
+ seen_tc3 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc4(struct __sk_buff *skb)
+{
+ seen_tc4 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc5(struct __sk_buff *skb)
+{
+ seen_tc5 = true;
+ return TCX_PASS;
+}
+
+SEC("tc/egress")
+int tc6(struct __sk_buff *skb)
+{
+ seen_tc6 = true;
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+int tc7(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ if (eth.h_dest[0] == 4 && set_type) {
+ seen_mcast = skb->pkt_type == PACKET_MULTICAST;
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+out:
+ seen_tc7 = true;
+ return TCX_PASS;
+}
+
+struct sk_buff {
+ struct net_device *dev;
+};
+
+struct net_device {
+ unsigned short needed_headroom;
+ unsigned short needed_tailroom;
+};
+
+SEC("tc/egress")
+int tc8(struct __sk_buff *skb)
+{
+ struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev);
+
+ seen_tc8 = true;
+ mark = skb->mark;
+ prio = skb->priority;
+ headroom = BPF_CORE_READ(dev, needed_headroom);
+ tailroom = BPF_CORE_READ(dev, needed_tailroom);
+ return TCX_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index b985ac4e7a81..de15155f2609 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,17 +33,8 @@
a.s6_addr32[3] == b.s6_addr32[3])
#endif
-enum {
- dev_src,
- dev_dst,
-};
-
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
__be32 addr)
@@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
return v6_equal(ip6h->daddr, addr);
}
-static __always_inline int get_dev_ifindex(int which)
-{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
- return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -108,7 +94,7 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
break;
case __bpf_constant_htons(ETH_P_IPV6):
- redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_src}});
break;
}
@@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
{
__u8 zero[ETH_ALEN * 2];
bool redirect = false;
@@ -132,7 +119,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
break;
case __bpf_constant_htons(ETH_P_IPV6):
- redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_dst}});
break;
}
@@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
return TC_ACT_SHOT;
- return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+ return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index d82ed3457030..ec4cce19362d 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
return 0;
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
{
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
@@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
/* these are identical, but keep them separate for compatibility with the
* section names expected by test_tc_redirect.sh
*/
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
{
return tc_redir(skb);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
{
return tc_redir(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2..365eacb5dc34 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,41 +5,59 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
-enum {
- dev_src,
- dev_dst,
-};
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
-struct bpf_map_def SEC("maps") ifindex_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2,
-};
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
-static __always_inline int get_dev_ifindex(int which)
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
{
- int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+ return TC_ACT_SHOT;
+}
- return ifindex ? *ifindex : 0;
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
}
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
{
- return TC_ACT_SHOT;
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("tc")
+int tc_dst_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+ return bpf_redirect(IFINDEX_SRC, 0);
}
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("tc")
+int tc_src_l3(struct __sk_buff *skb)
{
- return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
}
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..7330c61b5730 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -2,36 +2,47 @@
/* In-place tunneling */
-#include <stdbool.h>
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/mpls.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
-#include <linux/types.h>
+#include <vmlinux.h>
-#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_compiler.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define ETH_P_MPLS_UC 0x8847
+#define ETH_P_TEB 0x6558
+
+#define MPLS_LS_S_MASK 0x00000100
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) \
+ (((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777
+#define VXLAN_UDP_PORT 8472
+
+#define EXTPROTO_VXLAN 0x1
+
+#define VXLAN_FLAGS bpf_htonl(1<<27)
+#define VNI_ID 1
+#define VXLAN_VNI bpf_htonl(VNI_ID << 8)
+
+#ifndef NEXTHDR_DEST
+#define NEXTHDR_DEST 60
+#endif
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
-
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +56,13 @@ union l4hdr {
struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -62,21 +73,22 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
{
+ struct iphdr iph_inner = {0};
__u16 udp_dst = UDP_PORT;
- struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
@@ -99,7 +111,6 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
/* Derive the IPv4 header fields from the IPv6 header */
- memset(&iph_inner, 0, sizeof(iph_inner));
iph_inner.version = 4;
iph_inner.ihl = 5;
iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
@@ -141,7 +152,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +186,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = VXLAN_VNI;
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +241,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
+ return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
+{
__u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
__u16 tot_len;
__u64 flags;
@@ -249,7 +283,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +305,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
- sizeof(h_outer.l4hdr.udp);
+ sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break;
@@ -278,13 +316,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = VXLAN_VNI;
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
break;
}
@@ -309,7 +358,67 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-SEC("encap_ipip_none")
+static int encap_ipv6_ipip6(struct __sk_buff *skb)
+{
+ struct v6hdr h_outer = {0};
+ struct iphdr iph_inner;
+ struct tcphdr tcph;
+ struct ethhdr eth;
+ __u64 flags;
+ int olen;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+ sizeof(iph_inner)) < 0)
+ return TC_ACT_OK;
+
+ /* filter only packets we want */
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
+ &tcph, sizeof(tcph)) < 0)
+ return TC_ACT_OK;
+
+ if (tcph.dest != __bpf_constant_htons(cfg_port))
+ return TC_ACT_OK;
+
+ olen = sizeof(h_outer.ip);
+
+ flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+ /* add room between mac and network header */
+ if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+ return TC_ACT_SHOT;
+
+ /* prepare new outer network header */
+ h_outer.ip.version = 6;
+ h_outer.ip.hop_limit = iph_inner.ttl;
+ h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
+ h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
+ h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
+ h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
+ h_outer.ip.payload_len = iph_inner.tot_len;
+ h_outer.ip.nexthdr = IPPROTO_IPIP;
+
+ /* store new outer network header */
+ if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+ BPF_F_INVALIDATE_HASH) < 0)
+ return TC_ACT_SHOT;
+
+ /* update eth->h_proto */
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+ return TC_ACT_SHOT;
+ eth.h_proto = bpf_htons(ETH_P_IPV6);
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
+{
+ return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
+SEC("tc")
int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -318,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_none")
+SEC("tc")
int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -327,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_mpls")
+SEC("tc")
int __encap_gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -336,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_eth")
+SEC("tc")
int __encap_gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -345,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_none")
+SEC("tc")
int __encap_udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -354,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_mpls")
+SEC("tc")
int __encap_udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -363,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_eth")
+SEC("tc")
int __encap_udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -372,7 +481,18 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_sit_none")
+SEC("tc")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return __encap_ipv4(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("tc")
int __encap_sit_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -381,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6tnl_none")
+SEC("tc")
int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -390,7 +510,16 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_none")
+SEC("tc")
+int __encap_ipip6_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv6_ipip6(skb);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("tc")
int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -399,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_mpls")
+SEC("tc")
int __encap_ip6gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -408,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_eth")
+SEC("tc")
int __encap_ip6gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -417,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_none")
+SEC("tc")
int __encap_ip6udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -426,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_mpls")
+SEC("tc")
int __encap_ip6udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -435,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_eth")
+SEC("tc")
int __encap_ip6udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -444,16 +573,46 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("tc")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return __encap_ipv6(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct v6hdr)];
+ __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+ struct ipv6_opt_hdr ip6_opt_hdr;
struct gre_hdr greh;
struct udphdr udph;
int olen = len;
switch (proto) {
case IPPROTO_IPIP:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ break;
case IPPROTO_IPV6:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ break;
+ case NEXTHDR_DEST:
+ if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
+ sizeof(ip6_opt_hdr)) < 0)
+ return TC_ACT_OK;
+ switch (ip6_opt_hdr.nexthdr) {
+ case IPPROTO_IPIP:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ break;
+ case IPPROTO_IPV6:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ break;
+ default:
+ return TC_ACT_OK;
+ }
break;
case IPPROTO_GRE:
olen += sizeof(struct gre_hdr);
@@ -479,14 +638,16 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT:
olen += ETH_HLEN;
break;
+ case VXLAN_UDP_PORT:
+ olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ break;
}
break;
default:
return TC_ACT_OK;
}
- if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
- BPF_F_ADJ_ROOM_FIXED_GSO))
+ if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
return TC_ACT_OK;
@@ -519,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb)
iph_outer.nexthdr);
}
-SEC("decap")
+SEC("tc")
int decap_f(struct __sk_buff *skb)
{
switch (skb->protocol) {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
deleted file mode 100644
index 47cbe2eeae43..000000000000
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-// Copyright (c) 2019 Cloudflare
-
-#include <string.h>
-
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <sys/socket.h>
-#include <linux/tcp.h>
-
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_endian.h>
-
-struct bpf_map_def SEC("maps") results = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 3,
-};
-
-static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
- void *iph, __u32 ip_size,
- struct tcphdr *tcph)
-{
- __u32 thlen = tcph->doff * 4;
-
- if (tcph->syn && !tcph->ack) {
- // packet should only have an MSS option
- if (thlen != 24)
- return 0;
-
- if ((void *)tcph + thlen > data_end)
- return 0;
-
- return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen);
- }
- return 0;
-}
-
-static __always_inline void check_syncookie(void *ctx, void *data,
- void *data_end)
-{
- struct bpf_sock_tuple tup;
- struct bpf_sock *sk;
- struct ethhdr *ethh;
- struct iphdr *ipv4h;
- struct ipv6hdr *ipv6h;
- struct tcphdr *tcph;
- int ret;
- __u32 key_mss = 2;
- __u32 key_gen = 1;
- __u32 key = 0;
- __s64 seq_mss;
-
- ethh = data;
- if (ethh + 1 > data_end)
- return;
-
- switch (bpf_ntohs(ethh->h_proto)) {
- case ETH_P_IP:
- ipv4h = data + sizeof(struct ethhdr);
- if (ipv4h + 1 > data_end)
- return;
-
- if (ipv4h->ihl != 5)
- return;
-
- tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr);
- if (tcph + 1 > data_end)
- return;
-
- tup.ipv4.saddr = ipv4h->saddr;
- tup.ipv4.daddr = ipv4h->daddr;
- tup.ipv4.sport = tcph->source;
- tup.ipv4.dport = tcph->dest;
-
- sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4),
- BPF_F_CURRENT_NETNS, 0);
- if (!sk)
- return;
-
- if (sk->state != BPF_TCP_LISTEN)
- goto release;
-
- seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h),
- tcph);
-
- ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
- tcph, sizeof(*tcph));
- break;
-
- case ETH_P_IPV6:
- ipv6h = data + sizeof(struct ethhdr);
- if (ipv6h + 1 > data_end)
- return;
-
- if (ipv6h->nexthdr != IPPROTO_TCP)
- return;
-
- tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
- if (tcph + 1 > data_end)
- return;
-
- memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr));
- memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr));
- tup.ipv6.sport = tcph->source;
- tup.ipv6.dport = tcph->dest;
-
- sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6),
- BPF_F_CURRENT_NETNS, 0);
- if (!sk)
- return;
-
- if (sk->state != BPF_TCP_LISTEN)
- goto release;
-
- seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h),
- tcph);
-
- ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
- tcph, sizeof(*tcph));
- break;
-
- default:
- return;
- }
-
- if (seq_mss > 0) {
- __u32 cookie = (__u32)seq_mss;
- __u32 mss = seq_mss >> 32;
-
- bpf_map_update_elem(&results, &key_gen, &cookie, 0);
- bpf_map_update_elem(&results, &key_mss, &mss, 0);
- }
-
- if (ret == 0) {
- __u32 cookie = bpf_ntohl(tcph->ack_seq) - 1;
-
- bpf_map_update_elem(&results, &key, &cookie, 0);
- }
-
-release:
- bpf_sk_release(sk);
-}
-
-SEC("clsact/check_syncookie")
-int check_syncookie_clsact(struct __sk_buff *skb)
-{
- check_syncookie(skb, (void *)(long)skb->data,
- (void *)(long)skb->data_end);
- return TC_ACT_OK;
-}
-
-SEC("xdp/check_syncookie")
-int check_syncookie_xdp(struct xdp_md *ctx)
-{
- check_syncookie(ctx, (void *)(long)ctx->data,
- (void *)(long)ctx->data_end);
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
new file mode 100644
index 000000000000..7d5293de1952
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+#include "test_siphash.h"
+#include "test_tcp_custom_syncookie.h"
+#include "bpf_misc.h"
+
+#define MAX_PACKET_OFF 0xffff
+
+/* Hash is calculated for each client and split into ISN and TS.
+ *
+ * MSB LSB
+ * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 |
+ * | Hash_1 | MSS | ECN | SACK | WScale |
+ *
+ * TS: | 31 ... 8 | 7 ... 0 |
+ * | Random | Hash_2 |
+ */
+#define COOKIE_BITS 8
+#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1)
+
+enum {
+ /* 0xf is invalid thus means that SYN did not have WScale. */
+ BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1,
+ BPF_SYNCOOKIE_SACK = (1 << 4),
+ BPF_SYNCOOKIE_ECN = (1 << 5),
+};
+
+#define MSS_LOCAL_IPV4 65495
+#define MSS_LOCAL_IPV6 65476
+
+const __u16 msstab4[] = {
+ 536,
+ 1300,
+ 1460,
+ MSS_LOCAL_IPV4,
+};
+
+const __u16 msstab6[] = {
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
+ 1480 - 60,
+ 9000 - 60,
+ MSS_LOCAL_IPV6,
+};
+
+static siphash_key_t test_key_siphash = {
+ { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }
+};
+
+struct tcp_syncookie {
+ struct __sk_buff *skb;
+ void *data;
+ void *data_end;
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __be32 *ptr32;
+ struct bpf_tcp_req_attrs attrs;
+ u32 off;
+ u32 cookie;
+ u64 first;
+};
+
+bool handled_syn, handled_ack;
+
+static int tcp_load_headers(struct tcp_syncookie *ctx)
+{
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+
+ if (ctx->eth + 1 > ctx->data_end)
+ goto err;
+
+ switch (bpf_ntohs(ctx->eth->h_proto)) {
+ case ETH_P_IP:
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+
+ if (ctx->ipv4 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4)
+ goto err;
+
+ if (ctx->ipv4->version != 4)
+ goto err;
+
+ if (ctx->ipv4->protocol != IPPROTO_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ break;
+ case ETH_P_IPV6:
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+
+ if (ctx->ipv6 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv6->version != 6)
+ goto err;
+
+ if (ctx->ipv6->nexthdr != NEXTHDR_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ break;
+ default:
+ goto err;
+ }
+
+ if (ctx->tcp + 1 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_reload_headers(struct tcp_syncookie *ctx)
+{
+ /* Without volatile,
+ * R3 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 data_len = ctx->skb->data_end - ctx->skb->data;
+
+ if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4)
+ goto err;
+
+ /* Needed to calculate csum and parse TCP options. */
+ if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
+ goto err;
+
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+ if (ctx->ipv4) {
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+ ctx->ipv6 = NULL;
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ } else {
+ ctx->ipv4 = NULL;
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ }
+
+ if ((void *)ctx->tcp + 60 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static int tcp_validate_header(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_reload_headers(ctx))
+ goto err;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ /* check tcp_v4_csum(csum) is 0 if not on lo. */
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (csum_fold(csum) != 0)
+ goto err;
+ } else if (ctx->ipv6) {
+ /* check tcp_v6_csum(csum) is 0 if not on lo. */
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+ __u8 *opcode, *opsize, *wscale;
+ __u32 *tsval, *tsecr;
+ __u16 *mss;
+ __u32 off;
+
+ off = ctx->off;
+ opcode = next(ctx, 1);
+ if (!opcode)
+ goto stop;
+
+ if (*opcode == TCPOPT_EOL)
+ goto stop;
+
+ if (*opcode == TCPOPT_NOP)
+ goto next;
+
+ opsize = next(ctx, 1);
+ if (!opsize)
+ goto stop;
+
+ if (*opsize < 2)
+ goto stop;
+
+ switch (*opcode) {
+ case TCPOPT_MSS:
+ mss = next(ctx, 2);
+ if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
+ ctx->attrs.mss = get_unaligned_be16(mss);
+ break;
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
+ ctx->attrs.wscale_ok = 1;
+ ctx->attrs.snd_wscale = *wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsval = next(ctx, 4);
+ tsecr = next(ctx, 4);
+ if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
+ ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
+ ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
+
+ if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
+ ctx->attrs.tstamp_ok = 0;
+ else
+ ctx->attrs.tstamp_ok = 1;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
+ ctx->attrs.sack_ok = 1;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+next:
+ return 0;
+stop:
+ return 1;
+}
+
+static void tcp_parse_options(struct tcp_syncookie *ctx)
+{
+ ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
+
+ bpf_loop(40, tcp_parse_option, ctx, 0);
+}
+
+static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
+{
+ if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
+ (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
+ goto err;
+
+ if (!ctx->attrs.wscale_ok ||
+ !ctx->attrs.snd_wscale ||
+ ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
+ goto err;
+
+ if (!ctx->attrs.tstamp_ok)
+ goto err;
+
+ if (!ctx->attrs.sack_ok)
+ goto err;
+
+ if (!ctx->tcp->ece || !ctx->tcp->cwr)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static void tcp_prepare_cookie(struct tcp_syncookie *ctx)
+{
+ u32 seq = bpf_ntohl(ctx->tcp->seq);
+ u64 first = 0, second;
+ int mssind = 0;
+ u32 hash;
+
+ if (ctx->ipv4) {
+ for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab4[mssind])
+ break;
+
+ ctx->attrs.mss = msstab4[mssind];
+
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ } else if (ctx->ipv6) {
+ for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab6[mssind])
+ break;
+
+ ctx->attrs.mss = msstab6[mssind];
+
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+ }
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok) {
+ ctx->attrs.rcv_tsecr = bpf_get_prandom_u32();
+ ctx->attrs.rcv_tsecr &= ~COOKIE_MASK;
+ ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK;
+ }
+
+ hash &= ~COOKIE_MASK;
+ hash |= mssind << 6;
+
+ if (ctx->attrs.wscale_ok)
+ hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK;
+
+ if (ctx->attrs.sack_ok)
+ hash |= BPF_SYNCOOKIE_SACK;
+
+ if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr)
+ hash |= BPF_SYNCOOKIE_ECN;
+
+ ctx->cookie = hash;
+}
+
+static void tcp_write_options(struct tcp_syncookie *ctx)
+{
+ ctx->ptr32 = (__be32 *)(ctx->tcp + 1);
+
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 |
+ ctx->attrs.mss);
+
+ if (ctx->attrs.wscale_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ ctx->attrs.snd_wscale);
+
+ if (ctx->attrs.tstamp_ok) {
+ if (ctx->attrs.sack_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 |
+ TCPOLEN_SACK_PERM << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr);
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval);
+ } else if (ctx->attrs.sack_ok) {
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_SACK_PERM << 8 |
+ TCPOLEN_SACK_PERM);
+ }
+}
+
+static int tcp_handle_syn(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_sysctl(ctx))
+ goto err;
+
+ tcp_prepare_cookie(ctx);
+ tcp_write_options(ctx);
+
+ swap(ctx->tcp->source, ctx->tcp->dest);
+ ctx->tcp->check = 0;
+ ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1);
+ ctx->tcp->seq = bpf_htonl(ctx->cookie);
+ ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2;
+ ctx->tcp->ack = 1;
+ if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr)
+ ctx->tcp->ece = 0;
+ ctx->tcp->cwr = 0;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ swap(ctx->ipv4->saddr, ctx->ipv4->daddr);
+ ctx->tcp->check = tcp_v4_csum(ctx, csum);
+
+ ctx->ipv4->check = 0;
+ ctx->ipv4->tos = 0;
+ ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4);
+ ctx->ipv4->id = 0;
+ ctx->ipv4->ttl = 64;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0);
+ if (csum < 0)
+ goto err;
+
+ ctx->ipv4->check = csum_fold(csum);
+ } else if (ctx->ipv6) {
+ swap(ctx->ipv6->saddr, ctx->ipv6->daddr);
+ ctx->tcp->check = tcp_v6_csum(ctx, csum);
+
+ *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000);
+ ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp);
+ ctx->ipv6->hop_limit = 64;
+ }
+
+ swap_array(ctx->eth->h_source, ctx->eth->h_dest);
+
+ if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0))
+ goto err;
+
+ return bpf_redirect(ctx->skb->ifindex, 0);
+err:
+ return TC_ACT_SHOT;
+}
+
+static int tcp_validate_cookie(struct tcp_syncookie *ctx)
+{
+ u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1;
+ u32 seq = bpf_ntohl(ctx->tcp->seq) - 1;
+ u64 first = 0, second;
+ int mssind;
+ u32 hash;
+
+ if (ctx->ipv4)
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ else if (ctx->ipv6)
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok)
+ hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK;
+ else
+ hash &= ~COOKIE_MASK;
+
+ hash -= cookie & ~COOKIE_MASK;
+ if (hash)
+ goto err;
+
+ mssind = (cookie & (3 << 6)) >> 6;
+ if (ctx->ipv4)
+ ctx->attrs.mss = msstab4[mssind];
+ else
+ ctx->attrs.mss = msstab6[mssind];
+
+ ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
+ ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK;
+ ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_handle_ack(struct tcp_syncookie *ctx)
+{
+ struct bpf_sock_tuple tuple;
+ struct bpf_sock *skc;
+ int ret = TC_ACT_OK;
+ struct sock *sk;
+ u32 tuple_size;
+
+ if (ctx->ipv4) {
+ tuple.ipv4.saddr = ctx->ipv4->saddr;
+ tuple.ipv4.daddr = ctx->ipv4->daddr;
+ tuple.ipv4.sport = ctx->tcp->source;
+ tuple.ipv4.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv4);
+ } else if (ctx->ipv6) {
+ __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr));
+ __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr));
+ tuple.ipv6.sport = ctx->tcp->source;
+ tuple.ipv6.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv6);
+ } else {
+ goto out;
+ }
+
+ skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0);
+ if (!skc)
+ goto out;
+
+ if (skc->state != TCP_LISTEN)
+ goto release;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(skc);
+ if (!sk)
+ goto err;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_cookie(ctx))
+ goto err;
+
+ ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs));
+ if (ret < 0)
+ goto err;
+
+release:
+ bpf_sk_release(skc);
+out:
+ return ret;
+
+err:
+ ret = TC_ACT_SHOT;
+ goto release;
+}
+
+SEC("tc")
+int tcp_custom_syncookie(struct __sk_buff *skb)
+{
+ struct tcp_syncookie ctx = {
+ .skb = skb,
+ };
+
+ if (tcp_load_headers(&ctx))
+ return TC_ACT_OK;
+
+ if (ctx.tcp->rst)
+ return TC_ACT_OK;
+
+ if (ctx.tcp->syn) {
+ if (ctx.tcp->ack)
+ return TC_ACT_OK;
+
+ handled_syn = true;
+
+ return tcp_handle_syn(&ctx);
+ }
+
+ handled_ack = true;
+
+ return tcp_handle_ack(&ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
new file mode 100644
index 000000000000..34024de6337e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_TCP_SYNCOOKIE_H
+#define _TEST_TCP_SYNCOOKIE_H
+
+#define __packed __attribute__((__packed__))
+#define __force
+
+#define swap(a, b) \
+ do { \
+ typeof(a) __tmp = (a); \
+ (a) = (b); \
+ (b) = __tmp; \
+ } while (0)
+
+#define swap_array(a, b) \
+ do { \
+ typeof(a) __tmp[sizeof(a)]; \
+ __builtin_memcpy(__tmp, a, sizeof(a)); \
+ __builtin_memcpy(a, b, sizeof(a)); \
+ __builtin_memcpy(b, __tmp, sizeof(a)); \
+ } while (0)
+
+/* linux/unaligned.h */
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return bpf_ntohs(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return bpf_ntohl(__get_unaligned_t(__be32, p));
+}
+
+/* lib/checksum.c */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/* asm-generic/checksum.h */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/* net/ipv6/ip6_checksum.c */
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)bpf_htonl((__u32)len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)bpf_htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index adc83a54c352..e2ae049c2f85 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -244,7 +244,7 @@ static __always_inline void send_basic_event(struct sock *sk,
bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
}
-SEC("dummy_tracepoint")
+SEC("tp/dummy/tracepoint")
int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
{
if (!arg->sock)
@@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 678bd0fad29e..1ecdf4c54de4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -14,10 +14,7 @@
#include <bpf/bpf_endian.h>
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
#include "test_tcp_hdr_options.h"
-
-#ifndef sizeof_field
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#endif
+#include "bpf_misc.h"
__u8 test_kind = TCPOPT_EXP;
__u16 test_magic = 0xeB9F;
@@ -594,7 +591,7 @@ static int handle_parse_hdr(struct bpf_sock_ops *skops)
return CG_OK;
}
-SEC("sockops/estab")
+SEC("sockops")
int estab(struct bpf_sock_ops *skops)
{
int true_val = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 94f50f7e94d6..6935f32eeb8f 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -1,22 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h"
struct tcpbpf_globals global = {};
-int _version SEC("version") = 1;
/**
* SOL_TCP is defined in <netinet/tcp.h> while
@@ -47,8 +35,6 @@ int bpf_testcb(struct bpf_sock_ops *skops)
struct bpf_sock_ops *reuse = skops;
struct tcphdr *thdr;
int window_clamp = 9216;
- int good_call_rv = 0;
- int bad_call_rv = 0;
int save_syn = 1;
int rv = -1;
int v = 0;
@@ -62,7 +48,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
asm volatile (
"%[op] = *(u32 *)(%[skops] +96)"
- : [op] "+r"(op)
+ : [op] "=r"(op)
: [skops] "r"(skops)
:);
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ac63410bb541..ef00d38b0a8d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -23,13 +23,10 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(max_entries, 2);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(__u32));
+ __type(key, int);
+ __type(value, __u32);
} perf_event_map SEC(".maps");
-int _version SEC("version") = 1;
-
SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c
new file mode 100644
index 000000000000..7ea0863f3ddb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_time_tai.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int time_tai(struct __sk_buff *skb)
+{
+ __u64 ts1, ts2;
+
+ /* Get TAI timestamps */
+ ts1 = bpf_ktime_get_tai_ns();
+ ts2 = bpf_ktime_get_tai_ns();
+
+ /* Save TAI timestamps (Note: skb->hwtstamp is read-only) */
+ skb->tstamp = ts1;
+ skb->cb[0] = ts2 & 0xffffffff;
+ skb->cb[1] = ts2 >> 32;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
new file mode 100644
index 000000000000..cf0547a613ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ return nullable_ctx->len;
+}
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
+int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ if (nullable_ctx)
+ return nullable_ctx->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index 4b825ee122cf..4cb8bbb6a320 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
@@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index f030e469d05b..7765720da7d5 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -1,20 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct task_struct;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, int *b)
+{
+ return 0;
+}
-SEC("fentry/__set_task_comm")
-int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
{
return 0;
}
-SEC("fexit/__set_task_comm")
-int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, int *b, int ret)
{
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index a621b58ab079..32127f1cd687 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -6,44 +6,56 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <stddef.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/pkt_cls.h>
-#include <linux/erspan.h>
+#include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_kfuncs.h"
+#include "bpf_tracing_net.h"
-#define ERROR(ret) do {\
- char fmt[] = "ERROR line:%d ret:%d\n";\
- bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
- } while (0)
-
-int _version SEC("version") = 1;
-
-struct geneve_opt {
- __be16 opt_class;
- __u8 type;
- __u8 length:5;
- __u8 r3:1;
- __u8 r2:1;
- __u8 r1:1;
- __u8 opt_data[8]; /* hard-coded to 8 byte */
-};
+#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
+
+#define VXLAN_UDP_PORT 4789
+#define ETH_P_IP 0x0800
+#define PACKET_HOST 0
+#define TUNNEL_CSUM bpf_htons(0x01)
+#define TUNNEL_KEY bpf_htons(0x04)
+
+/* Only IPv4 address assigned to veth1.
+ * 172.16.1.200
+ */
+#define ASSIGNED_ADDR_VETH1 0xac1001c8
-struct vxlan_metadata {
- __u32 gbp;
+struct bpf_fou_encap___local {
+ __be16 sport;
+ __be16 dport;
+} __attribute__((preserve_access_index));
+
+enum bpf_fou_encap_type___local {
+ FOU_BPF_ENCAP_FOU___local,
+ FOU_BPF_ENCAP_GUE___local,
};
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
+struct bpf_fou_encap;
+
+int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap, int type) __ksym;
+int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap) __ksym;
+struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
+ u32 opts__sz) __ksym;
+void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} local_ip_map SEC(".maps");
+
+SEC("tc")
+int gre_set_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
@@ -57,32 +69,52 @@ int _gre_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int gre_set_tunnel_no_key(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER |
+ BPF_F_NO_TUNNEL_KEY);
+ if (ret < 0) {
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int gre_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "key %d remote ip 0x%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+ bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4);
return TC_ACT_OK;
}
-SEC("ip6gretap_set_tunnel")
-int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
int ret;
@@ -98,35 +130,34 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
BPF_F_SEQ_NUMBER);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6gretap_get_tunnel")
-int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
struct bpf_tunnel_key key;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+ bpf_printk("key %d remote ip6 ::%x label %x\n",
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
return TC_ACT_OK;
}
-SEC("erspan_set_tunnel")
-int _erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
@@ -141,7 +172,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
@@ -154,63 +185,58 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 7;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("erspan_get_tunnel")
-int _erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
+ bpf_printk("key %d remote ip 0x%x erspan version %d\n",
+ key.tunnel_id, key.remote_ipv4, md.version);
#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
+ bpf_printk("\tindex %x\n", index);
#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
+ bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
+ bpf_ntohl(md.u.md2.timestamp));
#endif
return TC_ACT_OK;
}
-SEC("ip4ip6erspan_set_tunnel")
-int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
@@ -225,7 +251,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
@@ -239,70 +265,113 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 17;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip4ip6erspan_get_tunnel")
-int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
+ bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n",
+ key.tunnel_id, key.remote_ipv4, md.version);
#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
+ bpf_printk("\tindex %x\n", index);
#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
+ bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
+ bpf_ntohl(md.u.md2.timestamp));
#endif
return TC_ACT_OK;
}
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_set_tunnel_dst(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+ __u32 index = 0;
+ __u32 *local_ip = NULL;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.remote_ipv4 = *local_ip;
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int vxlan_set_tunnel_src(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
+ __u32 index = 0;
+ __u32 *local_ip = NULL;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
__builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv4 = *local_ip;
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
@@ -311,53 +380,154 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_get_tunnel_src(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
- char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_FLAGS);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.gbp);
+ if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF ||
+ !(key.tunnel_flags & TUNNEL_KEY) ||
+ (key.tunnel_flags & TUNNEL_CSUM)) {
+ bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x flags 0x%x\n",
+ key.tunnel_id, key.local_ipv4,
+ key.remote_ipv4, md.gbp,
+ bpf_ntohs(key.tunnel_flags));
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
return TC_ACT_OK;
}
-SEC("ip6vxlan_set_tunnel")
-int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int veth_set_outer_dst(struct __sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)(long)skb->data;
+ __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1);
+ void *data_end = (void *)(long)skb->data_end;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ int ret = 0;
+ __s64 csum;
+
+ if ((void *)eth + sizeof(*eth) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ iph = (struct iphdr *)(eth + 1);
+ if ((void *)iph + sizeof(*iph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (iph->protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ udph = (struct udphdr *)(iph + 1);
+ if ((void *)udph + sizeof(*udph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (udph->dest != bpf_htons(VXLAN_UDP_PORT))
+ return TC_ACT_OK;
+
+ if (iph->daddr != assigned_ip) {
+ csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip,
+ sizeof(__u32), 0);
+ if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr),
+ &assigned_ip, sizeof(__u32), 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
+ 0, csum, 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
__builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.remote_ipv6[3] = bpf_htonl(*local_ip);
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv6[3] = bpf_htonl(*local_ip);
key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
key.tunnel_id = 22;
key.tunnel_tos = 0;
@@ -366,39 +536,61 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6vxlan_get_tunnel")
-int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
struct bpf_tunnel_key key;
- int ret;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
+ BPF_F_TUNINFO_IPV6 | BPF_F_TUNINFO_FLAGS);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+ if (bpf_ntohl(key.local_ipv6[3]) != *local_ip ||
+ !(key.tunnel_flags & TUNNEL_KEY) ||
+ !(key.tunnel_flags & TUNNEL_CSUM)) {
+ bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x flags 0x%x\n",
+ key.tunnel_id, bpf_ntohl(key.local_ipv6[3]),
+ bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label,
+ bpf_ntohs(key.tunnel_flags));
+ bpf_printk("local_ip 0x%x\n", *local_ip);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
return TC_ACT_OK;
}
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
+struct local_geneve_opt {
+ struct geneve_opt gopt;
+ int data;
+};
+
+SEC("tc")
+int geneve_set_tunnel(struct __sk_buff *skb)
{
- int ret, ret2;
+ int ret;
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
@@ -406,61 +598,59 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xdeadbeef);
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(local_gopt));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
return TC_ACT_OK;
}
-SEC("ip6geneve_set_tunnel")
-int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
@@ -472,32 +662,31 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xfeedbeef);
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(gopt));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6geneve_get_tunnel")
-int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
struct bpf_tunnel_key key;
struct geneve_opt gopt;
int ret;
@@ -505,24 +694,22 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
return TC_ACT_OK;
}
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
void *data = (void *)(long)skb->data;
@@ -532,7 +719,7 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
/* single length check */
if (data + sizeof(*iph) > data_end) {
- ERROR(1);
+ log_err(1);
return TC_ACT_SHOT;
}
@@ -543,32 +730,136 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "remote ip 0x%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+ bpf_printk("remote ip 0x%x\n", key.remote_ipv4);
return TC_ACT_OK;
}
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_gue_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap___local encap = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP)
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, (struct bpf_fou_encap *)&encap,
+ bpf_core_enum_value(enum bpf_fou_encap_type___local,
+ FOU_BPF_ENCAP_GUE___local));
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_fou_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap___local encap = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP)
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, (struct bpf_fou_encap *)&encap,
+ FOU_BPF_ENCAP_FOU___local);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_encap_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap___local encap = {};
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_fou_encap(skb, (struct bpf_fou_encap *)&encap);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_ntohs(encap.dport) != 5555)
+ return TC_ACT_SHOT;
+
+ bpf_printk("%d remote ip 0x%x, sport %d, dport %d\n", ret,
+ key.remote_ipv4, bpf_ntohs(encap.sport),
+ bpf_ntohs(encap.dport));
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip6_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
void *data = (void *)(long)skb->data;
@@ -578,7 +869,7 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
/* single length check */
if (data + sizeof(*iph) > data_end) {
- ERROR(1);
+ log_err(1);
return TC_ACT_SHOT;
}
@@ -591,34 +882,33 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
+ bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
return TC_ACT_OK;
}
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
void *data = (void *)(long)skb->data;
@@ -628,7 +918,7 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
/* single length check */
if (data + sizeof(*iph) > data_end) {
- ERROR(1);
+ log_err(1);
return TC_ACT_SHOT;
}
@@ -640,46 +930,97 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
+ bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
return TC_ACT_OK;
}
-SEC("xfrm_get_state")
-int _xfrm_get_state(struct __sk_buff *skb)
+volatile int xfrm_reqid = 0;
+volatile int xfrm_spi = 0;
+volatile int xfrm_remote_ip = 0;
+
+SEC("tc")
+int xfrm_get_state(struct __sk_buff *skb)
{
struct bpf_xfrm_state x;
- char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
int ret;
ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
if (ret < 0)
return TC_ACT_OK;
- bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
- bpf_ntohl(x.remote_ipv4));
+ xfrm_reqid = x.reqid;
+ xfrm_spi = bpf_ntohl(x.spi);
+ xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
+
return TC_ACT_OK;
}
+volatile int xfrm_replay_window = 0;
+
+SEC("xdp")
+int xfrm_get_state_xdp(struct xdp_md *xdp)
+{
+ struct bpf_xfrm_state_opts opts = {};
+ struct xfrm_state *x = NULL;
+ struct ip_esp_hdr *esph;
+ struct bpf_dynptr ptr;
+ u8 esph_buf[8] = {};
+ u8 iph_buf[20] = {};
+ struct iphdr *iph;
+ u32 off;
+
+ if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
+ goto out;
+
+ off = sizeof(struct ethhdr);
+ iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
+ if (!iph || iph->protocol != IPPROTO_ESP)
+ goto out;
+
+ off += sizeof(struct iphdr);
+ esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
+ if (!esph)
+ goto out;
+
+ opts.netns_id = BPF_F_CURRENT_NETNS;
+ opts.daddr.a4 = iph->daddr;
+ opts.spi = esph->spi;
+ opts.proto = IPPROTO_ESP;
+ opts.family = AF_INET;
+
+ x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
+ if (!x)
+ goto out;
+
+ if (!x->replay_esn)
+ goto out;
+
+ xfrm_replay_window = x->replay_esn->replay_window;
+out:
+ if (x)
+ bpf_xdp_xfrm_state_release(x);
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..fc423e43a3cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+__u32 perfbuf_val = 0;
+__u32 ringbuf_val = 0;
+
+int test_pid;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} percpu_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} percpu_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} perfbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_nanosleep_enter(void *ctx)
+{
+ int cur_pid;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != test_pid)
+ return 0;
+
+ bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val));
+ bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0);
+
+ return 0;
+}
+
+SEC("perf_event")
+int handle_perf_event(void *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
new file mode 100644
index 000000000000..12f4065fca20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+pid_t my_pid = 0;
+
+int test1_result = 0;
+int test2_result = 0;
+int test3_result = 0;
+int test4_result = 0;
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset")
+int BPF_UPROBE(test1)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test1_result = 1;
+ return 0;
+}
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0")
+int BPF_UPROBE(test2)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test2_result = 1;
+ return 0;
+}
+
+SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0")
+int BPF_URETPROBE(test3, int ret)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test3_result = ret;
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(test4)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test4_result = 1;
+ return 0;
+}
+
+#if defined(__TARGET_ARCH_x86)
+struct pt_regs regs;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ax = regs.ax;
+ ctx->cx = regs.cx;
+ ctx->dx = regs.dx;
+ ctx->r8 = regs.r8;
+ ctx->r9 = regs.r9;
+ ctx->r10 = regs.r10;
+ ctx->r11 = regs.r11;
+ ctx->di = regs.di;
+ ctx->si = regs.si;
+ return 0;
+}
+
+unsigned long ip;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change_ip)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ip = ip;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..da4bf89d004c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ret = 0;
+int uretprobe_byname_ran = 0;
+u64 uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+u64 uretprobe_byname2_rc = 0;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
+
+int a[8];
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int BPF_UPROBE(handle_uprobe_byname
+ , int arg1
+ , int arg2
+ , int arg3
+#if FUNC_REG_ARG_CNT > 3
+ , int arg4
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ , int arg5
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ , int arg6
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ , int arg7
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ , int arg8
+#endif
+)
+{
+ uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname_ran = 1;
+
+ a[0] = arg1;
+ a[1] = arg2;
+ a[2] = arg3;
+#if FUNC_REG_ARG_CNT > 3
+ a[3] = arg4;
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ a[4] = arg5;
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ a[5] = arg6;
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ a[6] = arg7;
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ a[7] = arg8;
+#endif
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int BPF_URETPROBE(handle_uretprobe_byname, int ret)
+{
+ uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+ uretprobe_byname_ret = ret;
+ uretprobe_byname_ran = 2;
+
+ return 0;
+}
+
+
+SEC("uprobe/libc.so.6:fopen")
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uprobe_byname2_parm1 = (u64)(long)pathname;
+ uprobe_byname2_ran = 3;
+ return 0;
+}
+
+SEC("uretprobe/libc.so.6:fopen")
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uretprobe_byname2_rc = (u64)(long)ret;
+ uretprobe_byname2_ran = 4;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..a78c87537b07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+int usdt0_arg_size;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt0_called, 1);
+
+ usdt0_cookie = bpf_usdt_cookie(ctx);
+ usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+ /* should return -ENOENT for any arg_num */
+ usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32());
+ return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+int usdt3_arg_sizes[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt3_called, 1);
+
+ usdt3_cookie = bpf_usdt_cookie(ctx);
+ usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt3_args[0] = (int)tmp;
+ usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0);
+
+ usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+ usdt3_args[1] = (long)tmp;
+ usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1);
+
+ usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+ usdt3_args[2] = (uintptr_t)tmp;
+ usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2);
+
+ return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+int usdt12_arg_sizes[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+ long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+ short a11, signed char a12)
+{
+ int i;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt12_called, 1);
+
+ usdt12_cookie = bpf_usdt_cookie(ctx);
+ usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt12_args[0] = a1;
+ usdt12_args[1] = a2;
+ usdt12_args[2] = a3;
+ usdt12_args[3] = a4;
+ usdt12_args[4] = a5;
+ usdt12_args[5] = a6;
+ usdt12_args[6] = a7;
+ usdt12_args[7] = a8;
+ usdt12_args[8] = a9;
+ usdt12_args[9] = a10;
+ usdt12_args[10] = a11;
+ usdt12_args[11] = a12;
+
+ bpf_for(i, 0, 12) {
+ usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i);
+ }
+
+ return 0;
+}
+
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+short usdt_sib_arg;
+int usdt_sib_arg_size;
+
+/*
+ * usdt_sib is only tested on x86-related architectures, so it requires
+ * manual attach since auto-attach will panic tests under other architectures
+ */
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_sib_called, 1);
+
+ usdt_sib_cookie = bpf_usdt_cookie(ctx);
+ usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt_sib_arg = (short)tmp;
+ usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..962f3462066a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_100_called, 1);
+ __sync_fetch_and_add(&usdt_100_sum, x);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_user_ringbuf.h b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
new file mode 100644
index 000000000000..1643b4d59ba7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TEST_USER_RINGBUF_H
+#define _TEST_USER_RINGBUF_H
+
+#define TEST_OP_64 4
+#define TEST_OP_32 2
+
+enum test_msg_op {
+ TEST_MSG_OP_INC64,
+ TEST_MSG_OP_INC32,
+ TEST_MSG_OP_MUL64,
+ TEST_MSG_OP_MUL32,
+
+ // Must come last.
+ TEST_MSG_OP_NUM_OPS,
+};
+
+struct test_msg {
+ enum test_msg_op msg_op;
+ union {
+ __s64 operand_64;
+ __s32 operand_32;
+ };
+};
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+#endif /* _TEST_USER_RINGBUF_H */
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index 913acdffd90f..20eb7d422c41 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -19,6 +19,7 @@ __u64 payload1_len1 = 0;
__u64 payload1_len2 = 0;
__u64 total1 = 0;
char payload1[MAX_LEN + MAX_LEN] = {};
+__u64 ret_bad_read = 0;
/* .data */
int payload2_len1 = -1;
@@ -36,31 +37,35 @@ int payload4_len2 = -1;
int total4= -1;
char payload4[MAX_LEN + MAX_LEN] = { 1 };
+char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 };
+
SEC("raw_tp/sys_enter")
int handler64_unsigned(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload1;
- u64 len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
return 0;
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len1 = len;
}
len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
- if (len <= MAX_LEN) {
+ if (len >= 0) {
payload += len;
payload1_len2 = len;
}
total1 = payload - (void *)payload1;
+ ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1);
+
return 0;
}
@@ -123,7 +128,7 @@ int handler32_signed(void *regs)
{
int pid = bpf_get_current_pid_tgid() >> 32;
void *payload = payload4;
- int len;
+ long len;
/* ignore irrelevant invocations */
if (test_pid != pid || !capture)
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
index d38153dab3dd..323a73fb2e8c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -5,13 +5,13 @@
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
-SEC("scale90_noinline")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index f024154c7be7..f5318f757084 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#define ATTR __always_inline
#include "test_jhash.h"
-SEC("scale90_inline")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
index 9beb5bf80373..2e06dbb1ad5c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -5,13 +5,13 @@
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
-SEC("scale90_noinline32")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 32;
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
new file mode 100644
index 000000000000..ff8d755548b9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "err.h"
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+__u32 monitored_pid;
+__s32 user_keyring_serial;
+__u64 system_keyring_id;
+
+struct data {
+ __u8 data[MAX_DATA_SIZE];
+ __u32 data_len;
+ __u8 sig[MAX_SIG_SIZE];
+ __u32 sig_len;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct data);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ struct bpf_dynptr data_ptr, sig_ptr;
+ struct data *data_val;
+ struct bpf_key *trusted_keyring;
+ __u32 pid;
+ __u64 value;
+ int ret, zero = 0;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ data_val = bpf_map_lookup_elem(&data_input, &zero);
+ if (!data_val)
+ return 0;
+
+ ret = bpf_probe_read_kernel(&value, sizeof(value), &attr->value);
+ if (ret)
+ goto out;
+
+ ret = bpf_copy_from_user(data_val, sizeof(struct data),
+ (void *)(unsigned long)value);
+ if (ret)
+ goto out;
+
+ if (data_val->data_len > sizeof(data_val->data))
+ return -EINVAL;
+
+ bpf_dynptr_from_mem(data_val->data, data_val->data_len, 0, &data_ptr);
+
+ if (data_val->sig_len > sizeof(data_val->sig))
+ return -EINVAL;
+
+ bpf_dynptr_from_mem(data_val->sig, data_val->sig_len, 0, &sig_ptr);
+
+ if (user_keyring_serial)
+ trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+ else
+ trusted_keyring = bpf_lookup_system_key(system_keyring_id);
+
+ if (!trusted_keyring)
+ return -ENOENT;
+
+ ret = bpf_verify_pkcs7_signature(&data_ptr, &sig_ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+
+out:
+ set_if_not_errno_or_zero(ret, -EFAULT);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index e9dfa0313d1b..78b23934d9f8 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -16,12 +16,12 @@ bool kprobe_called = false;
bool fentry_called = false;
SEC("tp/syscalls/sys_enter_nanosleep")
-int handle__tp(struct trace_event_raw_sys_enter *args)
+int handle__tp(struct syscall_trace_enter *args)
{
struct __kernel_timespec *ts;
long tv_nsec;
- if (args->id != __NR_nanosleep)
+ if (args->nr != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
@@ -42,7 +42,7 @@ int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
if (id != __NR_nanosleep)
return 0;
- ts = (void *)PT_REGS_PARM1_CORE(regs);
+ ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
tv_nsec != MY_TV_NSEC)
return 0;
@@ -60,7 +60,7 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
if (id != __NR_nanosleep)
return 0;
- ts = (void *)PT_REGS_PARM1_CORE(regs);
+ ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
tv_nsec != MY_TV_NSEC)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 31f9bce37491..8caf58be5818 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -19,8 +19,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
-
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -139,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
@@ -210,7 +209,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 3d66599eee2e..5904f45cfbc4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -2,25 +2,41 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_adjust_tail_grow")
+SEC("xdp")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- unsigned int data_len;
+ int data_len = bpf_xdp_get_buff_len(xdp);
int offset = 0;
+ /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__TARGET_ARCH_s390)
+ int tailroom = 512;
+#elif defined(__TARGET_ARCH_powerpc)
+ int tailroom = 384;
+#else
+ int tailroom = 320;
+#endif
/* Data length determine test case */
- data_len = data_end - data;
if (data_len == 54) { /* sizeof(pkt_v4) */
- offset = 4096; /* test too large offset */
+ offset = 4096; /* test too large offset, 4k page size */
+ } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */
+ offset = 65536; /* test too large offset, 64k page size */
} else if (data_len == 74) { /* sizeof(pkt_v6) */
offset = 40;
} else if (data_len == 64) {
offset = 128;
} else if (data_len == 128) {
- offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ /* Max tail grow 3520 */
+ offset = 4096 - 256 - tailroom - data_len;
+ } else if (data_len == 9000) {
+ offset = 10;
+ } else if (data_len == 9001) {
+ offset = 4096;
+ } else if (data_len == 90000) {
+ offset = 10; /* test a small offset, 64k page size */
+ } else if (data_len == 90001) {
+ offset = 65536; /* test too large offset, 64k page size */
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index 22065a9cfb25..ca68c038357c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -9,19 +9,41 @@
#include <linux/if_ether.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail_shrink")
+SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54) /* sizeof(pkt_v4) */
+ switch (bpf_xdp_get_buff_len(xdp)) {
+ case 54:
+ /* sizeof(pkt_v4) */
offset = 256; /* shrink too much */
- else
+ break;
+ case 9000:
+ /* non-linear buff test cases */
+ if (data + 1 > data_end)
+ return XDP_DROP;
+
+ switch (data[0]) {
+ case 0:
+ offset = 10;
+ break;
+ case 1:
+ offset = 4100;
+ break;
+ case 2:
+ offset = 8200;
+ break;
+ default:
+ return XDP_DROP;
+ }
+ break;
+ default:
offset = 20;
+ break;
+ }
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c
new file mode 100644
index 000000000000..2ff1b596e87e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define ERRMSG_LEN 64
+
+struct xdp_errmsg {
+ char msg[ERRMSG_LEN];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, int);
+ __type(value, int);
+} xdp_errmsg_pb SEC(".maps");
+
+struct xdp_attach_error_ctx {
+ unsigned long unused;
+
+ /*
+ * bpf does not support tracepoint __data_loc directly.
+ *
+ * Actually, this field is a 32 bit integer whose value encodes
+ * information on where to find the actual data. The first 2 bytes is
+ * the size of the data. The last 2 bytes is the offset from the start
+ * of the tracepoint struct where the data begins.
+ * -- https://github.com/iovisor/bpftrace/pull/1542
+ */
+ __u32 msg; // __data_loc char[] msg;
+};
+
+/*
+ * Catch the error message at the tracepoint.
+ */
+
+SEC("tp/xdp/bpf_xdp_link_attach_failed")
+int tp__xdp__bpf_xdp_link_attach_failed(struct xdp_attach_error_ctx *ctx)
+{
+ char *msg = (void *)(__u64) ((void *) ctx + (__u16) ctx->msg);
+ struct xdp_errmsg errmsg = {};
+
+ bpf_probe_read_kernel_str(&errmsg.msg, ERRMSG_LEN, msg);
+ bpf_perf_event_output(ctx, &xdp_errmsg_pb, BPF_F_CURRENT_CPU, &errmsg,
+ ERRMSG_LEN);
+ return 0;
+}
+
+/*
+ * Reuse the XDP program in xdp_dummy.c.
+ */
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index a038e827f850..ee48c4963971 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -36,8 +36,8 @@ struct meta {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
__u64 test_result_fentry = 0;
@@ -45,11 +45,9 @@ SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
struct meta meta;
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
- meta.pkt_len = data_end - data;
+ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
bpf_xdp_output(xdp, &perf_buf_map,
((__u64) meta.pkt_len << 32) |
BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644
index 000000000000..d7b88cd05afd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ __u32 ret;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+ ret = *metadata;
+ if (bpf_xdp_adjust_meta(xdp, 4))
+ return XDP_ABORTED;
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
index b360ba2bd441..807bf895f42c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dm_log")
+SEC("xdp")
int xdpdm_devlog(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
new file mode 100644
index 000000000000..814e2a980e97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("xdp")
+int xdp_devmap(struct xdp_md *ctx)
+{
+ return ctx->egress_ifindex;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} xdp_map SEC(".maps") = {
+ .values = {
+ [0] = (void *)&xdp_devmap,
+ },
+};
+
+SEC("xdp")
+int xdp_entry(struct xdp_md *ctx)
+{
+ bpf_tail_call(ctx, &xdp_map, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
new file mode 100644
index 000000000000..5928ed0911ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define ETH_ALEN 6
+#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
+/**
+ * enum frame_mark - magics to distinguish page/packet paths
+ * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
+ * @MARK_IN: frame is being processed by the input XDP prog.
+ * @MARK_SKB: frame did hit the TC ingress hook as an skb.
+ */
+enum frame_mark {
+ MARK_XMIT = 0U,
+ MARK_IN = 0x42,
+ MARK_SKB = 0x45,
+};
+
+const volatile int ifindex_out;
+const volatile int ifindex_in;
+const volatile __u8 expect_dst[ETH_ALEN];
+volatile int pkts_seen_xdp = 0;
+volatile int pkts_seen_zero = 0;
+volatile int pkts_seen_tc = 0;
+volatile int retcode = XDP_REDIRECT;
+
+SEC("xdp")
+int xdp_redirect(struct xdp_md *xdp)
+{
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ __u8 *payload = data + HDR_SZ;
+ int ret = retcode;
+
+ if (payload + 1 > data_end)
+ return XDP_ABORTED;
+
+ if (xdp->ingress_ifindex != (__u32)ifindex_in)
+ return XDP_ABORTED;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+
+ if (*metadata != 0x42)
+ return XDP_ABORTED;
+
+ if (*payload == MARK_XMIT)
+ pkts_seen_zero++;
+
+ *payload = MARK_IN;
+
+ if (bpf_xdp_adjust_meta(xdp, sizeof(__u64)))
+ return XDP_ABORTED;
+
+ if (retcode > XDP_PASS)
+ retcode--;
+
+ if (ret == XDP_REDIRECT)
+ return bpf_redirect(ifindex_out, 0);
+
+ return ret;
+}
+
+static bool check_pkt(void *data, void *data_end, const __u32 mark)
+{
+ struct ipv6hdr *iph = data + sizeof(struct ethhdr);
+ __u8 *payload = data + HDR_SZ;
+
+ if (payload + 1 > data_end)
+ return false;
+
+ if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
+ return false;
+
+ /* reset the payload so the same packet doesn't get counted twice when
+ * it cycles back through the kernel path and out the dst veth
+ */
+ *payload = mark;
+ return true;
+}
+
+SEC("xdp")
+int xdp_count_pkts(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+
+ if (check_pkt(data, data_end, MARK_XMIT))
+ pkts_seen_xdp++;
+
+ /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
+ * it exited a physical NIC. Those pages will be counted in the
+ * pkts_seen_zero counter above.
+ */
+ return XDP_DROP;
+}
+
+SEC("xdp")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+ return bpf_redirect(111, 0);
+}
+
+SEC("xdp")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+ return bpf_redirect(222, 0);
+}
+
+SEC("tc")
+int tc_count_pkts(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+
+ if (check_pkt(data, data_end, MARK_SKB))
+ pkts_seen_tc++;
+
+ /* Will be either recycled or freed, %MARK_SKB makes sure it won't
+ * hit any of the counters above.
+ */
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
new file mode 100644
index 000000000000..67a77944ef29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta */
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "test_iptunnel_common.h"
+#include "bpf_kfuncs.h"
+
+#define tcphdr_sz sizeof(struct tcphdr)
+#define udphdr_sz sizeof(struct udphdr)
+#define ethhdr_sz sizeof(struct ethhdr)
+#define iphdr_sz sizeof(struct iphdr)
+#define ipv6hdr_sz sizeof(struct ipv6hdr)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_IPTNL_ENTRIES);
+ __type(key, struct vip);
+ __type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz];
+ __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz];
+ __u8 iph_buffer_udp[iphdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph;
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp));
+ __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp));
+ else
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp));
+
+ if (!iph)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ iph = (struct iphdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(iph + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ iph->version = 4;
+ iph->ihl = iphdr_sz >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + iphdr_sz);
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+ for (i = 0; i < iphdr_sz >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz];
+ __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz];
+ __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp));
+ __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp));
+ else
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp));
+
+ if (!ip6h)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ ip6h = (struct ipv6hdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(ip6h + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz);
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ __u8 buffer[ethhdr_sz];
+ struct bpf_dynptr ptr;
+ struct ethhdr *eth;
+ __u16 h_proto;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp, &ptr);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp, &ptr);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index eb93ea95d1d8..64ff32eaae92 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -5,8 +5,14 @@
char LICENSE[] SEC("license") = "GPL";
-SEC("xdp/handler")
+SEC("xdp")
int xdp_handler(struct xdp_md *xdp)
{
return 0;
}
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index fcabcda30ba3..93267a68825b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -15,8 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
-
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -135,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
@@ -206,7 +205,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index a7c4a7d49fe6..0a0f371a2dec 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,40 +1,320 @@
+#include <stdbool.h>
#include <linux/bpf.h>
+#include <linux/errno.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
+
+#define META_SIZE 32
-#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
-#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
-SEC("t")
+/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
+ *
+ * The XDP program extracts a fixed-size payload following the Ethernet header
+ * and stores it as packet metadata to test the driver's metadata support. The
+ * TC program then verifies if the passed metadata is correct.
+ */
+
+bool test_pass;
+
+static const __u8 smac_want[ETH_ALEN] = {
+ 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
+};
+
+static const __u8 meta_want[META_SIZE] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+static bool check_smac(const struct ethhdr *eth)
+{
+ return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
+}
+
+static bool check_metadata(const char *file, int line, __u8 *meta_have)
+{
+ if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
+ return true;
+
+ bpf_stream_printk(BPF_STREAM_STDERR,
+ "FAIL:%s:%d: metadata mismatch\n"
+ " have:\n %pI6\n %pI6\n"
+ " want:\n %pI6\n %pI6\n",
+ file, line,
+ &meta_have[0x00], &meta_have[0x10],
+ &meta_want[0x00], &meta_want[0x10]);
+ return false;
+}
+
+#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
+
+static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
+{
+ __u8 *data_meta = ctx_ptr(skb, data_meta);
+ __u8 *data = ctx_ptr(skb, data);
+
+ return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
+}
+
+#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
+
+SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
- __u8 *data, *data_meta, *data_end;
- __u32 diff = 0;
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ __u8 *data = ctx_ptr(ctx, data);
- data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
- data = ctx_ptr(ctx, data);
+ if (meta_have + META_SIZE > data)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+ __u8 meta_have[META_SIZE];
+ struct bpf_dynptr meta;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 *meta_have;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src, *dst;
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
return TC_ACT_SHOT;
- diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
- diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 meta_have[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ dst = meta_have;
+
+ /* 1. Regular read */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 2. Read from an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 3. Read at an offset */
+ bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+ dst += chunk_len;
+
+ /* 4. Read from a slice starting at an offset */
+ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!src)
+ goto out;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ if (!check_metadata(meta_have))
+ goto out;
- return diff ? TC_ACT_SHOT : TC_ACT_OK;
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
}
-SEC("x")
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 payload[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+ src = payload;
+
+ /* 1. Regular write */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 2. Write to an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 3. Write at an offset */
+ bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 4. Write to a slice starting at an offset */
+ dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!dst)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 md, *p;
+ int err;
+
+ err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (err)
+ goto fail;
+
+ /* read offset OOB */
+ err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* write offset OOB */
+ err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* adjust end offset OOB */
+ err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* adjust start offset OOB */
+ err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* slice offset OOB */
+ p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ /* slice rdwr offset OOB */
+ p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ return TC_ACT_UNSPEC;
+fail:
+ return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *meta;
+ int ret;
+
+ /* Drop any non-test packets */
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ return XDP_DROP;
+ if (!check_smac(eth))
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+ if (ret < 0)
+ return XDP_DROP;
+
+ meta = ctx_ptr(ctx, data_meta);
+ if (meta + META_SIZE > ctx_ptr(ctx, data))
+ return XDP_DROP;
+
+ __builtin_memset(meta, 0, META_SIZE);
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
- __u8 *data, *data_meta, *data_end;
+ __u8 *data, *data_meta, *data_end, *payload;
+ struct ethhdr *eth;
int ret;
- ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
if (ret < 0)
return XDP_DROP;
@@ -42,12 +322,352 @@ int ing_xdp(struct xdp_md *ctx)
data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ eth = (struct ethhdr *)data;
+ payload = data + sizeof(struct ethhdr);
+
+ if (payload + META_SIZE > data_end ||
+ data_meta + META_SIZE > data)
+ return XDP_DROP;
+
+ /* The Linux networking stack may send other packets on the test
+ * interface that interfere with the test. Just drop them.
+ * The test packets can be recognized by their source MAC address.
+ */
+ if (!check_smac(eth))
return XDP_DROP;
- __builtin_memcpy(data_meta, data, ETH_ALEN);
+ __builtin_memcpy(data_meta, payload, META_SIZE);
return XDP_PASS;
}
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _payload_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
+{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ if (meta_have + META_SIZE > eth)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _metadata_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
+{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ if (meta_have + META_SIZE > eth)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ *meta_have = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates a r/w slice to packet _payload_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates an r/w slice to packet _metadata_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+ __u8 *meta_have;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _payload_ using dynptr_write helper and metadata
+ * remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ const struct ethhdr *eth;
+ int err;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ /* Expect read-write metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta))
+ goto out;
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ /* Helper write to payload will unclone the packet */
+ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _metadata_ using dynptr_write helper and
+ * metadata remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ const struct ethhdr *eth;
+ int err;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ /* Expect read-write metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta))
+ goto out;
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ /* Helper write to metadata will unclone the packet */
+ bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
+ * secondary tag push triggers an actual MAC header modification.
+ */
+ err = bpf_skb_vlan_push(ctx, 0, 42);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_push(ctx, 0, 207);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_adjust_room(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Grow a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Shrink a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Grow a 256 byte hole to trigger head reallocation */
+ err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_head_tail(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Reserve 1 extra in the front for packet data */
+ err = bpf_skb_change_head(ctx, 1, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 256 extra bytes in the front to trigger head reallocation */
+ err = bpf_skb_change_head(ctx, 256, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 4k extra bytes in the back to trigger head reallocation */
+ err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_proto(struct __sk_buff *ctx)
+{
+ int err;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 3a67921f62b5..fad94e41cef9 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,6 +15,7 @@
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
@@ -239,7 +240,7 @@ bool parse_udp(void *data, void *data_end,
udp = data + off;
if (udp + 1 > data_end)
- return 0;
+ return false;
if (!is_icmp) {
pckt->flow.port16[0] = udp->source;
pckt->flow.port16[1] = udp->dest;
@@ -247,7 +248,7 @@ bool parse_udp(void *data, void *data_end,
pckt->flow.port16[0] = udp->dest;
pckt->flow.port16[1] = udp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -261,7 +262,7 @@ bool parse_tcp(void *data, void *data_end,
tcp = data + off;
if (tcp + 1 > data_end)
- return 0;
+ return false;
if (tcp->syn)
pckt->flags |= (1 << 1);
if (!is_icmp) {
@@ -271,7 +272,7 @@ bool parse_tcp(void *data, void *data_end,
pckt->flow.port16[0] = tcp->dest;
pckt->flow.port16[1] = tcp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -287,7 +288,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
void *data;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -295,7 +296,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct ipv6hdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || ip6h + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 56710;
@@ -314,9 +315,17 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
ip6h->saddr.in6_u.u6_addr32[2] = 3;
ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
- return 1;
+ return true;
}
+#ifndef __clang__
+#pragma GCC push_options
+/* GCC optimization collapses functions and increases the number of arguments
+ * beyond the compatible amount supported by BPF.
+ */
+#pragma GCC optimize("-fno-ipa-sra")
+#endif
+
static __attribute__ ((noinline))
bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
struct packet_description *pckt,
@@ -335,7 +344,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
ip_suffix <<= 15;
ip_suffix ^= pckt->flow.src;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -343,7 +352,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct iphdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || iph + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 8;
@@ -362,53 +371,18 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
iph->ttl = 4;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- return 1;
+ return false;
+ return true;
}
-static __attribute__ ((noinline))
-bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct ipv6hdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- if (inner_v4)
- new_eth->eth_proto = 8;
- else
- new_eth->eth_proto = 56710;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct iphdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- new_eth->eth_proto = 8;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
-}
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
static __attribute__ ((noinline))
int swap_mac_and_send(void *data, void *data_end)
@@ -430,7 +404,6 @@ int send_icmp_reply(void *data, void *data_end)
__u16 *next_iph_u16;
__u32 tmp_addr = 0;
struct iphdr *iph;
- __u32 csum1 = 0;
__u32 csum = 0;
__u64 off = 0;
@@ -449,7 +422,7 @@ int send_icmp_reply(void *data, void *data_end)
iph->saddr = tmp_addr;
iph->check = 0;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
@@ -564,22 +537,22 @@ static bool get_packet_dst(struct real_definition **real,
hash = get_packet_hash(pckt, hash_16bytes);
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
key = 2 * vip_info->vip_num + hash % 2;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
- return 0;
+ return false;
key = *real_pos;
*real = bpf_map_lookup_elem(&reals, &key);
if (!(*real))
- return 0;
+ return false;
if (!(vip_info->flags & (1 << 1))) {
__u32 conn_rate_key = 512 + 2;
struct lb_stats *conn_rate_stats =
bpf_map_lookup_elem(&stats, &conn_rate_key);
if (!conn_rate_stats)
- return 1;
+ return true;
cur_time = bpf_ktime_get_ns();
if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
conn_rate_stats->v1 = 1;
@@ -587,14 +560,14 @@ static bool get_packet_dst(struct real_definition **real,
} else {
conn_rate_stats->v1 += 1;
if (conn_rate_stats->v1 >= 1)
- return 1;
+ return true;
}
if (pckt->flow.proto == IPPROTO_UDP)
new_dst_lru.atime = cur_time;
new_dst_lru.pos = key;
bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
}
- return 1;
+ return true;
}
__attribute__ ((noinline))
@@ -627,12 +600,13 @@ static void connection_table_lookup(struct real_definition **real,
__attribute__ ((noinline))
static int process_l3_headers_v6(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct ipv6hdr *ip6h;
__u64 iph_len;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
ip6h = data + off;
if (ip6h + 1 > data_end)
@@ -658,12 +632,12 @@ static int process_l3_headers_v6(struct packet_description *pckt,
__attribute__ ((noinline))
static int process_l3_headers_v4(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct iphdr *iph;
- __u64 iph_len;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
iph = data + off;
if (iph + 1 > data_end)
@@ -696,7 +670,6 @@ static int process_packet(void *data, __u64 off, void *data_end,
struct packet_description pckt = { };
struct vip_definition vip = { };
struct lb_stats *data_stats;
- struct eth_hdr *eth = data;
void *lru_map = &lru_cache;
struct vip_meta *vip_info;
__u32 lru_stats_key = 513;
@@ -704,17 +677,17 @@ static int process_packet(void *data, __u64 off, void *data_end,
__u32 stats_key = 512;
struct ctl_value *cval;
__u16 pkt_bytes;
- __u64 iph_len;
__u8 protocol;
__u32 vip_num;
int action;
+ void *extra_args[2] = { data, data_end };
if (is_ipv6)
action = process_l3_headers_v6(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
else
action = process_l3_headers_v4(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
if (action >= 0)
return action;
protocol = pckt.flow.proto;
@@ -797,7 +770,7 @@ out:
return XDP_DROP;
}
-SEC("xdp-test-v4")
+SEC("xdp")
int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
@@ -816,7 +789,7 @@ int balancer_ingress_v4(struct xdp_md *ctx)
return XDP_DROP;
}
-SEC("xdp-test-v6")
+SEC("xdp")
int balancer_ingress_v6(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
new file mode 100644
index 000000000000..c41a21413eaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int xdpf_sz;
+int sinfo_sz;
+int data_len;
+int pull_len;
+
+#define XDP_PACKET_HEADROOM 256
+
+SEC("xdp.frags")
+int xdp_find_sizes(struct xdp_md *ctx)
+{
+ xdpf_sz = sizeof(struct xdp_frame);
+ sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
+ (ctx->data_end - ctx->data);
+
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_pull_data_prog(struct xdp_md *ctx)
+{
+ __u8 *data_end = (void *)(long)ctx->data_end;
+ __u8 *data = (void *)(long)ctx->data;
+ __u8 *val_p;
+ int err;
+
+ if (data_len != data_end - data)
+ return XDP_DROP;
+
+ err = bpf_xdp_pull_data(ctx, pull_len);
+ if (err)
+ return XDP_DROP;
+
+ val_p = (void *)(long)ctx->data + 1024;
+ if (val_p + 1 > (void *)(long)ctx->data_end)
+ return XDP_DROP;
+
+ if (*val_p != 0xbb)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
deleted file mode 100644
index a5337cd9400b..000000000000
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright (c) 2017 VMware
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-int _version SEC("version") = 1;
-
-SEC("redirect_to_111")
-int xdp_redirect_to_111(struct xdp_md *xdp)
-{
- return bpf_redirect(111, 0);
-}
-SEC("redirect_to_222")
-int xdp_redirect_to_222(struct xdp_md *xdp)
-{
- return bpf_redirect(222, 0);
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644
index 000000000000..2a3496d8e327
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
+ __u8 val[16] = {};
+ __u32 offset;
+ int err;
+
+ if (data + sizeof(__u32) > data_end)
+ return XDP_DROP;
+
+ offset = *(__u32 *)data;
+ err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+ return XDP_DROP;
+
+ val[0] = 0xbb; /* update the marker */
+ val[15] = 0xbb;
+ err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index 134768f6b788..a80cc5f2f4f2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -98,12 +98,12 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
return true;
}
-/* Hint, VLANs are choosen to hit network-byte-order issues */
+/* Hint, VLANs are chosen to hit network-byte-order issues */
#define TESTVLAN 4011 /* 0xFAB */
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
-SEC("xdp_drop_vlan_4011")
-int xdp_prognum0(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_drop_vlan_4011(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -144,8 +144,8 @@ Load prog with ip tool:
/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
#define TO_VLAN 0
-SEC("xdp_vlan_change")
-int xdp_prognum1(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_change(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -160,7 +160,7 @@ int xdp_prognum1(struct xdp_md *ctx)
/* Modifying VLAN, preserve top 4 bits */
vlan_hdr->h_vlan_TCI =
- bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+ bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000U)
| TO_VLAN);
}
@@ -178,8 +178,8 @@ int xdp_prognum1(struct xdp_md *ctx)
#endif
#define VLAN_HDR_SZ 4 /* bytes */
-SEC("xdp_vlan_remove_outer")
-int xdp_prognum2(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -195,7 +195,7 @@ int xdp_prognum2(struct xdp_md *ctx)
/* Moving Ethernet header, dest overlap with src, memmove handle this */
dest = data;
- dest+= VLAN_HDR_SZ;
+ dest += VLAN_HDR_SZ;
/*
* Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
* only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
@@ -210,19 +210,6 @@ int xdp_prognum2(struct xdp_md *ctx)
}
static __always_inline
-void shift_mac_4bytes_16bit(void *data)
-{
- __u16 *p = data;
-
- p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
- p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
- p[5] = p[3];
- p[4] = p[2];
- p[3] = p[1];
- p[2] = p[0];
-}
-
-static __always_inline
void shift_mac_4bytes_32bit(void *data)
{
__u32 *p = data;
@@ -237,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data)
p[1] = p[0];
}
-SEC("xdp_vlan_remove_outer2")
-int xdp_prognum3(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer2(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -267,8 +254,8 @@ int xdp_prognum3(struct xdp_md *ctx)
* The TC-clsact eBPF programs (currently) need to be attach via TC commands
*/
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
+SEC("tc")
+int tc_vlan_push(struct __sk_buff *ctx)
{
bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644
index 000000000000..97ed625bb70a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 59ee4f182ff8..3619239b01b7 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -12,25 +12,36 @@ struct {
__uint(max_entries, 4);
} cpu_map SEC(".maps");
-SEC("xdp_redir")
+__u32 redirect_count = 0;
+
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
- return bpf_redirect_map(&cpu_map, 1, 0);
+ return bpf_redirect_map(&cpu_map, 0, 0);
}
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
}
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
int xdp_dummy_cm(struct xdp_md *ctx)
{
+ if (bpf_get_smp_processor_id() == 0)
+ redirect_count++;
+
if (ctx->ingress_ifindex == IFINDEX_LO)
return XDP_DROP;
return XDP_PASS;
}
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644
index 000000000000..cdcf7de7ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp/devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 0ac086497722..92b65a485d4a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -9,16 +9,16 @@ struct {
__uint(max_entries, 4);
} dm_ports SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
- return bpf_redirect_map(&dm_ports, 1, 0);
+ return bpf_redirect_map(&dm_ports, 0, 0);
}
/* invalid program on DEVMAP entry;
* SEC name means expected attach type not set
*/
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
@@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
/* valid program on DEVMAP entry via SEC name;
* has access to egress and ingress ifindex
*/
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
int xdp_dummy_dm(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
return XDP_PASS;
}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..4c677c001258
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
+ race_array SEC(".maps");
+
+__u64 bss_data;
+__u64 abs_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+__u64 pinned_callback_check;
+__s32 pinned_cpu;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ /* increment bss variable twice.
+ * Once via array timer callback and once via lru timer callback
+ */
+ bss_data += 5;
+
+ /* *key == 0 - the callback was called for array timer.
+ * *key == 4 - the callback was called from lru timer.
+ */
+ if (*key == ARRAY) {
+ struct bpf_timer *lru_timer;
+ int lru_key = LRU;
+
+ /* rearm array timer to be called again in ~35 seconds */
+ if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+ err |= 1;
+
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_set_callback(lru_timer, timer_cb1);
+ if (bpf_timer_start(lru_timer, 0, 0) != 0)
+ err |= 2;
+ } else if (*key == LRU) {
+ int lru_key, i;
+
+ for (i = LRU + 1;
+ i <= 100 /* for current LRU eviction algorithm this number
+ * should be larger than ~ lru->max_entries * 2
+ */;
+ i++) {
+ struct elem init = {};
+
+ /* lru_key cannot be used as loop induction variable
+ * otherwise the loop will be unbounded.
+ */
+ lru_key = i;
+
+ /* add more elements into lru map to push out current
+ * element and force deletion of this timer
+ */
+ bpf_map_update_elem(map, &lru_key, &init, 0);
+ /* look it up to bump it into active list */
+ bpf_map_lookup_elem(map, &lru_key);
+
+ /* keep adding until *key changes underneath,
+ * which means that key/timer memory was reused
+ */
+ if (*key != LRU)
+ break;
+ }
+
+ /* check that the timer was removed */
+ if (bpf_timer_cancel(timer) != -EINVAL)
+ err |= 4;
+ ok |= 1;
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test1, int, a)
+{
+ struct bpf_timer *arr_timer, *lru_timer;
+ struct elem init = {};
+ int lru_key = LRU;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+ bpf_map_update_elem(&lru, &lru_key, &init, 0);
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+ /* init more timers to check that array destruction
+ * doesn't leak timer memory.
+ */
+ array_key = 0;
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ if (*key == HTAB)
+ callback_check--;
+ else
+ callback2_check--;
+ if (val->counter > 0 && --val->counter) {
+ /* re-arm the timer again to execute after 1 usec */
+ bpf_timer_start(&val->timer, 1000, 0);
+ } else if (*key == HTAB) {
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ /* cancel arr_timer otherwise bpf_fentry_test1 prog
+ * will stay alive forever.
+ */
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ if (bpf_timer_cancel(arr_timer) != 1)
+ /* bpf_timer_cancel should return 1 to indicate
+ * that arr_timer was active at this time
+ */
+ err |= 8;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 16;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in preallocated hashmap both 'key' and 'val' could have been
+ * reused to store another map element (like in LRU above),
+ * but in controlled test environment the below test works.
+ * It's not a use-after-free. The memory is owned by the map.
+ */
+ if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+ err |= 32;
+ ok |= 2;
+ } else {
+ if (*key != HTAB_MALLOC)
+ err |= 64;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 128;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ ok |= 4;
+ }
+ return 0;
+}
+
+int bpf_timer_test(void)
+{
+ struct hmap_elem *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+ err |= 512;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+ err |= 1024;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test2, int, a, int, b)
+{
+ struct hmap_elem init = {}, *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ init.counter = 10; /* number of times to trigger timer_cb2 */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+ /* init more timers to check that htab operations
+ * don't leak timer memory.
+ */
+ key = 0;
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap, &key);
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+ /* and with non-prealloc htab */
+ key_malloc = 0;
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+ return bpf_timer_test();
+}
+
+/* callback for absolute timer */
+static int timer_cb3(void *map, int *key, struct bpf_timer *timer)
+{
+ abs_data += 6;
+
+ if (abs_data < 12) {
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ } else {
+ /* Re-arm timer ~35 seconds in future */
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35),
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test3, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ bpf_printk("test3");
+
+ timer = bpf_map_lookup_elem(&abs_timer, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0)
+ err |= 2048;
+ bpf_timer_set_callback(timer, timer_cb3);
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+/* callback for pinned timer */
+static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer)
+{
+ __s32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu != pinned_cpu)
+ err |= 16384;
+
+ pinned_callback_check++;
+ return 0;
+}
+
+static void test_pinned_timer(bool soft)
+{
+ int key = 0;
+ void *map;
+ struct bpf_timer *timer;
+ __u64 flags = BPF_F_TIMER_CPU_PIN;
+ __u64 start_time;
+
+ if (soft) {
+ map = &soft_timer_pinned;
+ start_time = 0;
+ } else {
+ map = &abs_timer_pinned;
+ start_time = bpf_ktime_get_boot_ns();
+ flags |= BPF_F_TIMER_ABS;
+ }
+
+ timer = bpf_map_lookup_elem(map, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0)
+ err |= 4096;
+ bpf_timer_set_callback(timer, timer_cb_pinned);
+ pinned_cpu = bpf_get_smp_processor_id();
+ bpf_timer_start(timer, start_time + 1000, flags);
+ } else {
+ err |= 8192;
+ }
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test4, int, a)
+{
+ bpf_printk("test4");
+ test_pinned_timer(true);
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG2(test5, int, a)
+{
+ bpf_printk("test5");
+ test_pinned_timer(false);
+
+ return 0;
+}
+
+static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
+{
+ bpf_timer_start(timer, 1000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int race(void *ctx)
+{
+ struct bpf_timer *timer;
+ int err, race_key = 0;
+ struct elem init;
+
+ __builtin_memset(&init, 0, sizeof(struct elem));
+ bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
+
+ timer = bpf_map_lookup_elem(&race_array, &race_key);
+ if (!timer)
+ return 1;
+
+ err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+ if (err && err != -EBUSY)
+ return 1;
+
+ bpf_timer_set_callback(timer, race_timer_callback);
+ bpf_timer_start(timer, 0, 0);
+ bpf_timer_cancel(timer);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c
new file mode 100644
index 000000000000..f8f7944e70da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_crash.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_elem {
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} amap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} hmap SEC(".maps");
+
+int pid = 0;
+int crash_map = 0; /* 0 for amap, 1 for hmap */
+
+SEC("fentry/do_nanosleep")
+int sys_enter(void *ctx)
+{
+ struct map_elem *e, value = {};
+ void *map = crash_map ? (void *)&hmap : (void *)&amap;
+
+ if (bpf_get_current_task_btf()->tgid != pid)
+ return 0;
+
+ *(void **)&value = (void *)0xdeadcaf3;
+
+ bpf_map_update_elem(map, &(int){0}, &value, 0);
+ /* For array map, doing bpf_map_update_elem will do a
+ * check_and_free_timer_in_array, which will trigger the crash if timer
+ * pointer was overwritten, for hmap we need to use bpf_timer_cancel.
+ */
+ if (crash_map == 1) {
+ e = bpf_map_lookup_elem(map, &(int){0});
+ if (!e)
+ return 0;
+ bpf_timer_cancel(&e->timer);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
new file mode 100644
index 000000000000..5a2e9dabf1c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer_map SEC(".maps");
+
+__naked __noinline __used
+static unsigned long timer_cb_ret_bad()
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* async callback is expected to return 0, so branch above
+ * skipping r0 = 0; should lead to a failure, but if exit
+ * instruction doesn't enforce r0's precision, this callback
+ * will be successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
+
+SEC("fentry/bpf_fentry_test1")
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before")
+__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7")
+__msg("should have been in [0, 0]")
+long BPF_PROG2(test_bad_ret, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb_ret_bad);
+ bpf_timer_start(timer, 1000, 0);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c
new file mode 100644
index 000000000000..19180a455f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define CLOCK_MONOTONIC 1
+
+int preempt_count;
+int in_interrupt;
+int in_interrupt_cb;
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer)
+{
+ preempt_count = get_preempt_count();
+ in_interrupt_cb = bpf_in_interrupt();
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_timer_interrupt)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&array, &key);
+ if (!timer)
+ return 0;
+
+ in_interrupt = bpf_in_interrupt();
+ bpf_timer_init(timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_in_interrupt);
+ bpf_timer_start(timer, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 000000000000..3e520133281e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644
index 000000000000..50ebc3f68522
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb1);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 1;
+ ok |= 1;
+ return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 2;
+ /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+ bpf_map_lookup_elem(map, key);
+ ok |= 2;
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb1))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644
index 000000000000..dd3f1ed6d6e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map, *inner_map2;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int array_key2 = ARRAY_KEY2;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+ if (!inner_map2)
+ return 0;
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c
new file mode 100644
index 000000000000..a6002d073b1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/token_lsm.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int my_pid;
+int reject_capable;
+int reject_cmd;
+
+SEC("lsm/bpf_token_capable")
+int BPF_PROG(token_capable, struct bpf_token *token, int cap)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_capable)
+ return -1;
+ return 0;
+}
+
+SEC("lsm/bpf_token_cmd")
+int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_cmd)
+ return -1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
new file mode 100644
index 000000000000..00a4be9d3074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int val = 0;
+
+SEC("fentry/test_1")
+int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx)
+{
+ __u64 state;
+
+ /* Read the traced st_ops arg1 which is a pointer */
+ bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx);
+ /* Read state->val */
+ bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 8ca7f399b670..6695478c2b25 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -4,17 +4,18 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
int trace_printk_ret = 0;
int trace_printk_ran = 0;
-SEC("tp/raw_syscalls/sys_enter")
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int sys_enter(void *ctx)
{
- static const char fmt[] = "testing,testing %d\n";
-
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
++trace_printk_ran);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
new file mode 100644
index 000000000000..969306cd4f33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int null_data_vprintk_ret = 0;
+int trace_vprintk_ret = 0;
+int trace_vprintk_ran = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_enter(void *ctx)
+{
+ static const char one[] = "1";
+ static const char three[] = "3";
+ static const char five[] = "5";
+ static const char seven[] = "7";
+ static const char nine[] = "9";
+ static const char f[] = "%pS\n";
+
+ /* runner doesn't search for \t, just ensure it compiles */
+ bpf_printk("\t");
+
+ trace_vprintk_ret = __bpf_vprintk("%s,%d,%s,%d,%s,%d,%s,%d,%s,%d %d\n",
+ one, 2, three, 4, five, 6, seven, 8, nine, 10, ++trace_vprintk_ran);
+
+ /* non-NULL fmt w/ NULL data should result in error */
+ null_data_vprintk_ret = bpf_trace_vprintk(f, sizeof(f), NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
new file mode 100644
index 000000000000..65e485c4468c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry/bpf_spin_lock")
+int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_spin_unlock")
+int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
+
+SEC("?fentry/__rcu_read_lock")
+int BPF_PROG(tracing_deny)
+{
+ return 0;
+}
+
+SEC("?fexit/do_exit")
+int BPF_PROG(fexit_noreturns)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
new file mode 100644
index 000000000000..d460732e2023
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+struct bpf_testmod_struct_arg_2 {
+ long a;
+ long b;
+};
+
+struct bpf_testmod_struct_arg_3 {
+ int a;
+ int b[];
+};
+
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
+long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
+__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
+long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
+long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
+long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
+long t5_ret;
+int t6;
+
+long ut1_a_a, ut1_b, ut1_c;
+long ut2_a, ut2_b_a, ut2_b_b;
+
+SEC("fentry/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
+{
+ t1_a_a = a.a;
+ t1_a_b = a.b;
+ t1_b = b;
+ t1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_2, struct bpf_testmod_struct_arg_2, a, int, b, int, c, int, ret)
+{
+ t1_nregs = bpf_get_func_arg_cnt(ctx);
+ /* a.a */
+ bpf_get_func_arg(ctx, 0, &t1_reg0);
+ /* a.b */
+ bpf_get_func_arg(ctx, 1, &t1_reg1);
+ /* b */
+ bpf_get_func_arg(ctx, 2, &t1_reg2);
+ t1_reg2 = (int)t1_reg2;
+ /* c */
+ bpf_get_func_arg(ctx, 3, &t1_reg3);
+ t1_reg3 = (int)t1_reg3;
+
+ t1_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_3, int, a, struct bpf_testmod_struct_arg_2, b, int, c)
+{
+ t2_a = a;
+ t2_b_a = b.a;
+ t2_b_b = b.b;
+ t2_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_4, int, a, struct bpf_testmod_struct_arg_2, b, int, c, int, ret)
+{
+ t2_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_5, int, a, int, b, struct bpf_testmod_struct_arg_2, c)
+{
+ t3_a = a;
+ t3_b = b;
+ t3_c_a = c.a;
+ t3_c_b = c.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_6, int, a, int, b, struct bpf_testmod_struct_arg_2, c, int, ret)
+{
+ t3_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_7, struct bpf_testmod_struct_arg_1, a, int, b,
+ int, c, int, d, struct bpf_testmod_struct_arg_2, e)
+{
+ t4_a_a = a.a;
+ t4_b = b;
+ t4_c = c;
+ t4_d = d;
+ t4_e_a = e.a;
+ t4_e_b = e.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_8, struct bpf_testmod_struct_arg_1, a, int, b,
+ int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+{
+ t4_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_9)
+{
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_10, int, ret)
+{
+ t5_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_6")
+int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
+{
+ t6 = a->b[0];
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_1")
+int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c)
+{
+ ut1_a_a = a.arg.a;
+ ut1_b = b;
+ ut1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_2")
+int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b)
+{
+ ut2_a = a;
+ ut2_b_a = b.arg.a;
+ ut2_b_b = b.arg.b;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
new file mode 100644
index 000000000000..4742012ace06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
+long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
+long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
+long t9_a, t9_b, t9_c, t9_d, t9_e, t9_f, t9_g, t9_h_a, t9_h_b, t9_h_c, t9_h_d, t9_i, t9_ret;
+
+SEC("fentry/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_1, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f)
+{
+ t7_a = a;
+ t7_b = (long)b;
+ t7_c = c;
+ t7_d = d;
+ t7_e = (long)e;
+ t7_f_a = f.a;
+ t7_f_b = f.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_2, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
+{
+ t7_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_3, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
+{
+ t8_a = a;
+ t8_b = (long)b;
+ t8_c = c;
+ t8_d = d;
+ t8_e = (long)e;
+ t8_f_a = f.a;
+ t8_f_b = f.b;
+ t8_g = g;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_4, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
+ int, ret)
+{
+ t8_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_5, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i)
+{
+ t9_a = a;
+ t9_b = (long)b;
+ t9_c = c;
+ t9_d = d;
+ t9_e = (long)e;
+ t9_f = f;
+ t9_g = g;
+ t9_h_a = h.a;
+ t9_h_b = h.b;
+ t9_h_c = h.c;
+ t9_h_d = h.d;
+ t9_i = i;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_6, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i, int, ret)
+{
+ t9_ret = ret;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 9a4d09590b3d..2898b3749d07 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,54 +1,154 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
-
#include <linux/bpf.h>
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
-long hits = 0;
+#define CPU_MASK 255
+#define MAX_CPUS (CPU_MASK + 1) /* should match MAX_BUCKETS in benchs/bench_trigger.c */
-SEC("tp/syscalls/sys_enter_getpgid")
-int bench_trigger_tp(void *ctx)
+/* matches struct counter in bench.h */
+struct counter {
+ long value;
+} __attribute__((aligned(128)));
+
+struct counter hits[MAX_CPUS];
+
+static __always_inline void inc_counter(void)
+{
+ int cpu = bpf_get_smp_processor_id();
+
+ __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
+}
+
+SEC("?uprobe")
+int bench_trigger_uprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("raw_tp/sys_enter")
-int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+SEC("?uprobe.multi")
+int bench_trigger_uprobe_multi(void *ctx)
{
- if (id == __NR_getpgid)
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
+ return 0;
+}
+
+const volatile int batch_iters = 0;
+
+SEC("?raw_tp")
+int trigger_kernel_count(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++) {
+ inc_counter();
+ bpf_get_numa_node_id();
+ }
+
return 0;
}
-SEC("kprobe/__x64_sys_getpgid")
+SEC("?raw_tp")
+int trigger_driver(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
+ return 0;
+}
+
+extern int bpf_modify_return_test_tp(int nonce) __ksym __weak;
+
+SEC("?raw_tp")
+int trigger_driver_kfunc(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_modify_return_test_tp(0); /* attach point for benchmarking */
+
+ return 0;
+}
+
+SEC("?kprobe/bpf_get_numa_node_id")
int bench_trigger_kprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("?kretprobe/bpf_get_numa_node_id")
+int bench_trigger_kretprobe(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
+
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kprobe_multi(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
+
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kretprobe_multi(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
+
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry.s/__x64_sys_getpgid")
-int bench_trigger_fentry_sleep(void *ctx)
+SEC("?fexit/bpf_get_numa_node_id")
+int bench_trigger_fexit(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fmod_ret/__x64_sys_getpgid")
+SEC("?fmod_ret/bpf_modify_return_test_tp")
int bench_trigger_fmodret(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return -22;
}
+
+SEC("?tp/bpf_test_run/bpf_trigger_tp")
+int bench_trigger_tp(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
+
+SEC("?raw_tp/bpf_trigger_tp")
+int bench_trigger_rawtp(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c
new file mode 100644
index 000000000000..de1b18a62b46
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/twfw.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define TWFW_MAX_TIERS (64)
+/*
+ * load is successful
+ * #define TWFW_MAX_TIERS (64u)$
+ */
+
+struct twfw_tier_value {
+ unsigned long mask[1];
+};
+
+struct rule {
+ uint8_t seqnum;
+};
+
+struct rules_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct rule);
+ __uint(max_entries, 1);
+};
+
+struct tiers_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct twfw_tier_value);
+ __uint(max_entries, 1);
+};
+
+struct rules_map rules SEC(".maps");
+struct tiers_map tiers SEC(".maps");
+
+SEC("cgroup_skb/ingress")
+int twfw_verifier(struct __sk_buff* skb)
+{
+ const uint32_t key = 0;
+ const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key);
+ if (!tier)
+ return 1;
+
+ struct rule* rule = bpf_map_lookup_elem(&rules, &key);
+ if (!rule)
+ return 1;
+
+ if (rule && rule->seqnum < TWFW_MAX_TIERS) {
+ /* rule->seqnum / 64 should always be 0 */
+ unsigned long mask = tier->mask[rule->seqnum / 64];
+ if (mask)
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
new file mode 100644
index 000000000000..9d808b8f4ab0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define IFNAMSIZ 16
+
+int ifindex, ingress_ifindex;
+char name[IFNAMSIZ];
+unsigned int inum;
+unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
+
+SEC("?xdp")
+int md_xdp(struct xdp_md *ctx)
+{
+ struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx);
+ struct net_device *dev;
+
+ dev = kctx->rxq->dev;
+ ifindex = dev->ifindex;
+ inum = dev->nd_net.net->ns.inum;
+ __builtin_memcpy(name, dev->name, IFNAMSIZ);
+ ingress_ifindex = ctx->ingress_ifindex;
+ return XDP_PASS;
+}
+
+SEC("?tc")
+int md_skb(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb);
+ struct skb_shared_info *shared_info;
+ struct sk_buff *kskb2;
+
+ kskb_len = kskb->len;
+
+ /* Simulate the following kernel macro:
+ * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
+ */
+ shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+ meta_len = shared_info->meta_len;
+ frag0_len = shared_info->frag_list->len;
+
+ /* kskb2 should be equal to kskb */
+ kskb2 = bpf_core_cast(kskb, typeof(*kskb2));
+ kskb2_len = kskb2->len;
+ return 0;
+}
+
+SEC("?tp_btf/sys_enter")
+int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
+{
+ struct task_struct *task, *task_dup;
+
+ task = bpf_get_current_task_btf();
+ task_dup = bpf_core_cast(task, struct task_struct);
+ (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
+ return 0;
+}
+
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int kctx_u64(void *ctx)
+{
+ u64 *kctx = bpf_core_cast(ctx, u64);
+
+ (void)kctx;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
index 165e3c2dd9a3..4767451b59ac 100644
--- a/tools/testing/selftests/bpf/progs/udp_limit.c
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -17,7 +17,6 @@ SEC("cgroup/sock_create")
int sock(struct bpf_sock *ctx)
{
int *sk_storage;
- __u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
@@ -46,7 +45,6 @@ SEC("cgroup/sock_release")
int sock_release(struct bpf_sock *ctx)
{
int *sk_storage;
- __u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
new file mode 100644
index 000000000000..046a204c8fc6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Read an uninitialized value from stack at a fixed offset */
+SEC("socket")
+__naked int read_uninit_stack_fixed_off(void *ctx)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = *(u8 *)(r10 - 8 ); \
+ r0 += r1; \
+ r1 = *(u8 *)(r10 - 11); \
+ r1 = *(u8 *)(r10 - 13); \
+ r1 = *(u8 *)(r10 - 15); \
+ r1 = *(u16*)(r10 - 16); \
+ r1 = *(u32*)(r10 - 32); \
+ r1 = *(u64*)(r10 - 64); \
+ /* read from a spill of a wrong size, it is a separate \
+ * branch in check_stack_read_fixed_off() \
+ */ \
+ *(u32*)(r10 - 72) = r1; \
+ r1 = *(u64*)(r10 - 72); \
+ r0 = 0; \
+ exit; \
+"
+ ::: __clobber_all);
+}
+
+/* Read an uninitialized value from stack at a variable offset */
+SEC("socket")
+__naked int read_uninit_stack_var_off(void *ctx)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ /* force stack depth to be 64 */ \
+ *(u64*)(r10 - 64) = r0; \
+ r0 = -r0; \
+ /* give r0 a range [-31, -1] */ \
+ if r0 s<= -32 goto exit_%=; \
+ if r0 s>= 0 goto exit_%=; \
+ /* access stack using r0 */ \
+ r1 = r10; \
+ r1 += r0; \
+ r2 = *(u8*)(r1 + 0); \
+exit_%=: r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __noinline void dummy(void) {}
+
+/* Pass a pointer to uninitialized stack memory to a helper.
+ * Passed memory block should be marked as STACK_MISC after helper call.
+ */
+SEC("socket")
+__log_level(7) __msg("fp-104=mmmmmmmm")
+__naked int helper_uninit_to_misc(void *ctx)
+{
+ asm volatile (" \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = r10; \
+ r1 += -128; \
+ r2 = 32; \
+ r3 = 0; \
+ call %[bpf_probe_read_user]; \
+ /* Call to dummy() forces print_verifier_state(..., true), \
+ * thus showing the stack state, matched by __msg(). \
+ */ \
+ call %[dummy]; \
+ r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_probe_read_user),
+ __imm(dummy)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/unsupported_ops.c b/tools/testing/selftests/bpf/progs/unsupported_ops.c
new file mode 100644
index 000000000000..8aa2e0dd624e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/unsupported_ops.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupported_ops")
+__failure
+__msg("attach to unsupported member unsupported_ops of struct bpf_testmod_ops")
+int BPF_PROG(unsupported_ops)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod = {
+ .unsupported_ops = (void *)unsupported_ops,
+};
diff --git a/tools/testing/selftests/bpf/progs/update_map_in_htab.c b/tools/testing/selftests/bpf/progs/update_map_in_htab.c
new file mode 100644
index 000000000000..c2066247cd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/update_map_in_htab.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+ __array(values, struct inner_map_type);
+} outer_htab_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+ __array(values, struct inner_map_type);
+} outer_alloc_htab_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
new file mode 100644
index 000000000000..44190efcdba2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 uprobe_multi_func_1_addr = 0;
+__u64 uprobe_multi_func_2_addr = 0;
+__u64 uprobe_multi_func_3_addr = 0;
+
+__u64 uprobe_multi_func_1_result = 0;
+__u64 uprobe_multi_func_2_result = 0;
+__u64 uprobe_multi_func_3_result = 0;
+
+__u64 uretprobe_multi_func_1_result = 0;
+__u64 uretprobe_multi_func_2_result = 0;
+__u64 uretprobe_multi_func_3_result = 0;
+
+__u64 uprobe_multi_sleep_result = 0;
+
+int pid = 0;
+int child_pid = 0;
+int child_tid = 0;
+int child_pid_usdt = 0;
+int child_tid_usdt = 0;
+
+int expect_pid = 0;
+bool bad_pid_seen = false;
+bool bad_pid_seen_usdt = false;
+
+bool test_cookie = false;
+void *user_ptr = 0;
+
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
+{
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
+
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
+ return;
+
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen = true;
+
+ child_pid = cur_pid_tgid >> 32;
+ child_tid = (__u32)cur_pid_tgid;
+
+ __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
+ __u64 addr = bpf_get_func_ip(ctx);
+
+#define SET(__var, __addr, __cookie) ({ \
+ if (addr == __addr && \
+ (!test_cookie || (cookie == __cookie))) \
+ __var += 1; \
+})
+
+ if (is_return) {
+ SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2);
+ SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3);
+ SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1);
+ } else {
+ SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3);
+ SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1);
+ SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2);
+ }
+
+#undef SET
+
+ if (is_sleep && verify_sleepable_user_copy())
+ uprobe_multi_sleep_result += 1;
+}
+
+SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uprobe(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, false, false);
+ return 0;
+}
+
+SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uretprobe(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, true, false);
+ return 0;
+}
+
+SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*")
+int uprobe_sleep(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, false, true);
+ return 0;
+}
+
+SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*")
+int uretprobe_sleep(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, true, true);
+ return 0;
+}
+
+SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uprobe_extra(struct pt_regs *ctx)
+{
+ /* we need this one just to mix PID-filtered and global uprobes */
+ return 0;
+}
+
+SEC("usdt")
+int usdt_pid(struct pt_regs *ctx)
+{
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
+
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
+ return 0;
+
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen_usdt = true;
+
+ child_pid_usdt = cur_pid_tgid >> 32;
+ child_tid_usdt = (__u32)cur_pid_tgid;
+
+ return 0;
+}
+
+SEC("usdt")
+int usdt_extra(struct pt_regs *ctx)
+{
+ /* we need this one just to mix PID-filtered and global USDT probes */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c
new file mode 100644
index 000000000000..5367f6105e30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int count;
+
+SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*")
+int uprobe_bench(struct pt_regs *ctx)
+{
+ count++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c
new file mode 100644
index 000000000000..93752bb5690b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u64 uprobe_result[4];
+
+SEC("uprobe.multi")
+int uprobe_0(struct pt_regs *ctx)
+{
+ uprobe_result[0]++;
+ return 0;
+}
+
+SEC("uprobe.multi")
+int uprobe_1(struct pt_regs *ctx)
+{
+ uprobe_result[1]++;
+ return 0;
+}
+
+SEC("uprobe.session")
+int uprobe_2(struct pt_regs *ctx)
+{
+ uprobe_result[2]++;
+ return 0;
+}
+
+SEC("uprobe.session")
+int uprobe_3(struct pt_regs *ctx)
+{
+ uprobe_result[3]++;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_pid_filter.c b/tools/testing/selftests/bpf/progs/uprobe_multi_pid_filter.c
new file mode 100644
index 000000000000..67fcbad36661
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_pid_filter.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 pids[3];
+__u32 test[3][2];
+
+static void update_pid(int idx)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid == pids[idx])
+ test[idx][0]++;
+ else
+ test[idx][1]++;
+}
+
+SEC("uprobe.multi")
+int uprobe_multi_0(struct pt_regs *ctx)
+{
+ update_pid(0);
+ return 0;
+}
+
+SEC("uprobe.multi")
+int uprobe_multi_1(struct pt_regs *ctx)
+{
+ update_pid(1);
+ return 0;
+}
+
+SEC("uprobe.multi")
+int uprobe_multi_2(struct pt_regs *ctx)
+{
+ update_pid(2);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
new file mode 100644
index 000000000000..30bff90b68dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u64 uprobe_multi_func_1_addr = 0;
+__u64 uprobe_multi_func_2_addr = 0;
+__u64 uprobe_multi_func_3_addr = 0;
+
+__u64 uprobe_session_result[3] = {};
+__u64 uprobe_multi_sleep_result = 0;
+
+void *user_ptr = 0;
+int pid = 0;
+
+static int uprobe_multi_check(void *ctx, bool is_return)
+{
+ const __u64 funcs[] = {
+ uprobe_multi_func_1_addr,
+ uprobe_multi_func_2_addr,
+ uprobe_multi_func_3_addr,
+ };
+ unsigned int i;
+ __u64 addr;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ addr = bpf_get_func_ip(ctx);
+
+ for (i = 0; i < ARRAY_SIZE(funcs); i++) {
+ if (funcs[i] == addr) {
+ uprobe_session_result[i]++;
+ break;
+ }
+ }
+
+ /* only uprobe_multi_func_2 executes return probe */
+ if ((addr == uprobe_multi_func_1_addr) ||
+ (addr == uprobe_multi_func_3_addr))
+ return 1;
+
+ return 0;
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*")
+int uprobe(struct pt_regs *ctx)
+{
+ return uprobe_multi_check(ctx, bpf_session_is_return());
+}
+
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+SEC("uprobe.session.s//proc/self/exe:uprobe_multi_func_*")
+int uprobe_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uprobe_multi_sleep_result++;
+ return uprobe_multi_check(ctx, bpf_session_is_return());
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
new file mode 100644
index 000000000000..5befdf944dc6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+
+__u64 test_uprobe_1_result = 0;
+__u64 test_uprobe_2_result = 0;
+__u64 test_uprobe_3_result = 0;
+
+static int check_cookie(__u64 val, __u64 *result)
+{
+ __u64 *cookie;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ cookie = bpf_session_cookie();
+
+ if (bpf_session_is_return())
+ *result = *cookie == val ? val : 0;
+ else
+ *cookie = val;
+ return 0;
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
+int uprobe_1(struct pt_regs *ctx)
+{
+ return check_cookie(1, &test_uprobe_1_result);
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2")
+int uprobe_2(struct pt_regs *ctx)
+{
+ return check_cookie(2, &test_uprobe_2_result);
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3")
+int uprobe_3(struct pt_regs *ctx)
+{
+ return check_cookie(3, &test_uprobe_3_result);
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
new file mode 100644
index 000000000000..8fbcd69fae22
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+
+int idx_entry = 0;
+int idx_return = 0;
+
+__u64 test_uprobe_cookie_entry[6];
+__u64 test_uprobe_cookie_return[3];
+
+static int check_cookie(void)
+{
+ __u64 *cookie = bpf_session_cookie();
+
+ if (bpf_session_is_return()) {
+ if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return))
+ return 1;
+ test_uprobe_cookie_return[idx_return++] = *cookie;
+ return 0;
+ }
+
+ if (idx_entry >= ARRAY_SIZE(test_uprobe_cookie_entry))
+ return 1;
+ *cookie = test_uprobe_cookie_entry[idx_entry];
+ return idx_entry++ % 2;
+}
+
+
+SEC("uprobe.session//proc/self/exe:uprobe_session_recursive")
+int uprobe_recursive(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ return check_cookie();
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c
new file mode 100644
index 000000000000..7c960376ae97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u64 uprobe_session_result[3] = {};
+int pid = 0;
+
+static int uprobe_multi_check(void *ctx, int idx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ uprobe_session_result[idx]++;
+
+ /* only consumer 1 executes return probe */
+ if (idx == 0 || idx == 2)
+ return 1;
+
+ return 0;
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
+int uprobe_0(struct pt_regs *ctx)
+{
+ return uprobe_multi_check(ctx, 0);
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
+int uprobe_1(struct pt_regs *ctx)
+{
+ return uprobe_multi_check(ctx, 1);
+}
+
+SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1")
+int uprobe_2(struct pt_regs *ctx)
+{
+ return uprobe_multi_check(ctx, 2);
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c
new file mode 100644
index 000000000000..9e1c33d0bd2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+int count;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ count++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c
new file mode 100644
index 000000000000..fe49f2cb5360
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+
+SEC("uprobe.session")
+__success
+int uprobe_sesison_return_0(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe.session")
+__success
+int uprobe_sesison_return_1(struct pt_regs *ctx)
+{
+ return 1;
+}
+
+SEC("uprobe.session")
+__failure
+__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int uprobe_sesison_return_2(struct pt_regs *ctx)
+{
+ return 2;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
new file mode 100644
index 000000000000..e08c31669e5a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <string.h>
+
+struct pt_regs regs;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("uprobe")
+int probe(struct pt_regs *ctx)
+{
+ __builtin_memcpy(&regs, ctx, sizeof(regs));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
new file mode 100644
index 000000000000..915d38591bf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+#include <string.h>
+
+struct pt_regs regs;
+
+char _license[] SEC("license") = "GPL";
+
+int executed = 0;
+int pid;
+
+SEC("uprobe")
+int BPF_UPROBE(test_uprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(test_uretprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.multi")
+int test_uprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uretprobe.multi")
+int test_uretprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.session")
+int test_uprobe_session(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("usdt")
+int test_usdt(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uptr_failure.c b/tools/testing/selftests/bpf/progs/uptr_failure.c
new file mode 100644
index 000000000000..0cfa1fd61440
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uptr_failure.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+#include "uptr_test_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct value_type);
+} datamap SEC(".maps");
+
+SEC("?syscall")
+__failure __msg("store to uptr disallowed")
+int uptr_write(const void *ctx)
+{
+ struct task_struct *task;
+ struct value_type *v;
+
+ task = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&datamap, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ v->udata = NULL;
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("store to uptr disallowed")
+int uptr_write_nested(const void *ctx)
+{
+ struct task_struct *task;
+ struct value_type *v;
+
+ task = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&datamap, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ v->nested.udata = NULL;
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("R1 invalid mem access 'mem_or_null'")
+int uptr_no_null_check(const void *ctx)
+{
+ struct task_struct *task;
+ struct value_type *v;
+
+ task = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&datamap, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ v->udata->result = 0;
+
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("doesn't point to kptr")
+int uptr_kptr_xchg(const void *ctx)
+{
+ struct task_struct *task;
+ struct value_type *v;
+
+ task = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&datamap, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ bpf_kptr_xchg(&v->udata, NULL);
+
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("invalid mem access 'scalar'")
+int uptr_obj_new(const void *ctx)
+{
+ struct value_type *v;
+
+ v = bpf_obj_new(typeof(*v));
+ if (!v)
+ return 0;
+
+ if (v->udata)
+ v->udata->result = 0;
+
+ bpf_obj_drop(v);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/uptr_map_failure.c b/tools/testing/selftests/bpf/progs/uptr_map_failure.c
new file mode 100644
index 000000000000..417b763d76b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uptr_map_failure.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "uptr_test_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct large_uptr);
+} large_uptr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct empty_uptr);
+} empty_uptr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct kstruct_uptr);
+} kstruct_uptr_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/uptr_update_failure.c b/tools/testing/selftests/bpf/progs/uptr_update_failure.c
new file mode 100644
index 000000000000..86c3bb954abc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uptr_update_failure.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "uptr_test_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct value_lock_type);
+} datamap SEC(".maps");
+
+/* load test only. not used */
+SEC("syscall")
+int not_used(void *ctx)
+{
+ struct value_lock_type *ptr;
+ struct task_struct *task;
+ struct user_data *udata;
+
+ task = bpf_get_current_task_btf();
+ ptr = bpf_task_storage_get(&datamap, task, 0, 0);
+ if (!ptr)
+ return 0;
+
+ bpf_spin_lock(&ptr->lock);
+
+ udata = ptr->udata;
+ if (!udata) {
+ bpf_spin_unlock(&ptr->lock);
+ return 0;
+ }
+ udata->result = MAGIC_VALUE + udata->a + udata->b;
+
+ bpf_spin_unlock(&ptr->lock);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
new file mode 100644
index 000000000000..a2951e2f1711
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 entry_stack1[32], exit_stack1[32];
+__u64 entry_stack1_recur[32], exit_stack1_recur[32];
+__u64 entry_stack2[32];
+__u64 entry_stack3[32];
+__u64 entry_stack4[32], exit_stack4[32];
+__u64 usdt_stack[32];
+
+int entry1_len, exit1_len;
+int entry1_recur_len, exit1_recur_len;
+int entry2_len, exit2_len;
+int entry3_len, exit3_len;
+int entry4_len, exit4_len;
+int usdt_len;
+
+#define SZ sizeof(usdt_stack)
+
+SEC("uprobe//proc/self/exe:target_1")
+int BPF_UPROBE(uprobe_1)
+{
+ /* target_1 is recursive with depth of 2, so we capture two separate
+ * stack traces, depending on which occurrence it is
+ */
+ static bool recur = false;
+
+ if (!recur)
+ entry1_len = bpf_get_stack(ctx, &entry_stack1, SZ, BPF_F_USER_STACK);
+ else
+ entry1_recur_len = bpf_get_stack(ctx, &entry_stack1_recur, SZ, BPF_F_USER_STACK);
+
+ recur = true;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:target_1")
+int BPF_URETPROBE(uretprobe_1)
+{
+ /* see above, target_1 is recursive */
+ static bool recur = false;
+
+ /* NOTE: order of returns is reversed to order of entries */
+ if (!recur)
+ exit1_recur_len = bpf_get_stack(ctx, &exit_stack1_recur, SZ, BPF_F_USER_STACK);
+ else
+ exit1_len = bpf_get_stack(ctx, &exit_stack1, SZ, BPF_F_USER_STACK);
+
+ recur = true;
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:target_2")
+int BPF_UPROBE(uprobe_2)
+{
+ entry2_len = bpf_get_stack(ctx, &entry_stack2, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+/* no uretprobe for target_2 */
+
+SEC("uprobe//proc/self/exe:target_3")
+int BPF_UPROBE(uprobe_3)
+{
+ entry3_len = bpf_get_stack(ctx, &entry_stack3, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+/* no uretprobe for target_3 */
+
+SEC("uprobe//proc/self/exe:target_4")
+int BPF_UPROBE(uprobe_4)
+{
+ entry4_len = bpf_get_stack(ctx, &entry_stack4, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:target_4")
+int BPF_URETPROBE(uretprobe_4)
+{
+ exit4_len = bpf_get_stack(ctx, &exit_stack4, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+SEC("usdt//proc/self/exe:uretprobe_stack:target")
+int BPF_USDT(usdt_probe)
+{
+ usdt_len = bpf_get_stack(ctx, &usdt_stack, SZ, BPF_F_USER_STACK);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
new file mode 100644
index 000000000000..54de0389f878
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 4096);
+} user_ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 2);
+} ringbuf SEC(".maps");
+
+static int map_value;
+
+static long
+bad_access1(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample;
+
+ sample = bpf_dynptr_data(dynptr - 1, 0, sizeof(*sample));
+ bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr - 1);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read before the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("negative offset dynptr_ptr ptr")
+int user_ringbuf_callback_bad_access1(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
+
+ return 0;
+}
+
+static long
+bad_access2(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample;
+
+ sample = bpf_dynptr_data(dynptr + 1, 0, sizeof(*sample));
+ bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr + 1);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr")
+int user_ringbuf_callback_bad_access2(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
+
+ return 0;
+}
+
+static long
+write_forbidden(struct bpf_dynptr *dynptr, void *context)
+{
+ *((long *)dynptr) = 0;
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'dynptr_ptr'")
+int user_ringbuf_callback_write_forbidden(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
+
+ return 0;
+}
+
+static long
+null_context_write(struct bpf_dynptr *dynptr, void *context)
+{
+ *((__u64 *)context) = 0;
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int user_ringbuf_callback_null_context_write(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
+
+ return 0;
+}
+
+static long
+null_context_read(struct bpf_dynptr *dynptr, void *context)
+{
+ __u64 id = *((__u64 *)context);
+
+ bpf_printk("Read id %lu\n", id);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int user_ringbuf_callback_null_context_read(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_discard_dynptr(dynptr, 0);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
+int user_ringbuf_callback_discard_dynptr(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_submit_dynptr(dynptr, 0);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
+int user_ringbuf_callback_submit_dynptr(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
+
+ return 0;
+}
+
+static long
+invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
+{
+ return 2;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("At callback return the register R0 has ")
+int user_ringbuf_callback_invalid_return(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_dynptr_from_mem(&map_value, 4, 0, dynptr);
+ return 0;
+}
+
+static long
+try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
+int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
+int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
+ return 0;
+}
+
+__noinline long global_call_bpf_dynptr_data(struct bpf_dynptr *dynptr)
+{
+ bpf_dynptr_data(dynptr, 0xA, 0xA);
+ return 0;
+}
+
+static long callback_adjust_bpf_dynptr_reg_off(struct bpf_dynptr *dynptr,
+ void *ctx)
+{
+ global_call_bpf_dynptr_data(dynptr += 1024);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr R1 off=16384 disallowed")
+int user_ringbuf_callback_const_ptr_to_dynptr_reg_off(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf,
+ callback_adjust_bpf_dynptr_reg_off, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
new file mode 100644
index 000000000000..dd3bdf672633
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "test_user_ringbuf.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+} user_ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} kernel_ringbuf SEC(".maps");
+
+/* inputs */
+int pid, err, val;
+
+int read = 0;
+
+/* Counter used for end-to-end protocol test */
+__u64 kern_mutated = 0;
+__u64 user_mutated = 0;
+__u64 expected_user_mutated = 0;
+
+static int
+is_test_process(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return cur_pid == pid;
+}
+
+static long
+record_sample(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample = NULL;
+ struct sample stack_sample;
+ int status;
+ static int num_calls;
+
+ if (num_calls++ % 2 == 0) {
+ status = bpf_dynptr_read(&stack_sample, sizeof(stack_sample), dynptr, 0, 0);
+ if (status) {
+ bpf_printk("bpf_dynptr_read() failed: %d\n", status);
+ err = 1;
+ return 1;
+ }
+ } else {
+ sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
+ if (!sample) {
+ bpf_printk("Unexpectedly failed to get sample\n");
+ err = 2;
+ return 1;
+ }
+ stack_sample = *sample;
+ }
+
+ __sync_fetch_and_add(&read, 1);
+ return 0;
+}
+
+static void
+handle_sample_msg(const struct test_msg *msg)
+{
+ switch (msg->msg_op) {
+ case TEST_MSG_OP_INC64:
+ kern_mutated += msg->operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ kern_mutated += msg->operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ kern_mutated *= msg->operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ kern_mutated *= msg->operand_32;
+ break;
+ default:
+ bpf_printk("Unrecognized op %d\n", msg->msg_op);
+ err = 2;
+ }
+}
+
+static long
+read_protocol_msg(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct test_msg *msg = NULL;
+
+ msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
+ if (!msg) {
+ err = 1;
+ bpf_printk("Unexpectedly failed to get msg\n");
+ return 0;
+ }
+
+ handle_sample_msg(msg);
+
+ return 0;
+}
+
+static int publish_next_kern_msg(__u32 index, void *context)
+{
+ struct test_msg *msg = NULL;
+ int operand_64 = TEST_OP_64;
+ int operand_32 = TEST_OP_32;
+
+ msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
+ if (!msg) {
+ err = 4;
+ return 1;
+ }
+
+ switch (index % TEST_MSG_OP_NUM_OPS) {
+ case TEST_MSG_OP_INC64:
+ msg->operand_64 = operand_64;
+ msg->msg_op = TEST_MSG_OP_INC64;
+ expected_user_mutated += operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ msg->operand_32 = operand_32;
+ msg->msg_op = TEST_MSG_OP_INC32;
+ expected_user_mutated += operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ msg->operand_64 = operand_64;
+ msg->msg_op = TEST_MSG_OP_MUL64;
+ expected_user_mutated *= operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ msg->operand_32 = operand_32;
+ msg->msg_op = TEST_MSG_OP_MUL32;
+ expected_user_mutated *= operand_32;
+ break;
+ default:
+ bpf_ringbuf_discard(msg, 0);
+ err = 5;
+ return 1;
+ }
+
+ bpf_ringbuf_submit(msg, 0);
+
+ return 0;
+}
+
+static void
+publish_kern_messages(void)
+{
+ if (expected_user_mutated != user_mutated) {
+ bpf_printk("%lu != %lu\n", expected_user_mutated, user_mutated);
+ err = 3;
+ return;
+ }
+
+ bpf_loop(8, publish_next_kern_msg, NULL, 0);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prctl")
+int test_user_ringbuf_protocol(void *ctx)
+{
+ long status = 0;
+
+ if (!is_test_process())
+ return 0;
+
+ status = bpf_user_ringbuf_drain(&user_ringbuf, read_protocol_msg, NULL, 0);
+ if (status < 0) {
+ bpf_printk("Drain returned: %ld\n", status);
+ err = 1;
+ return 0;
+ }
+
+ publish_kern_messages();
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_user_ringbuf(void *ctx)
+{
+ if (!is_test_process())
+ return 0;
+
+ err = bpf_user_ringbuf_drain(&user_ringbuf, record_sample, NULL, 0);
+
+ return 0;
+}
+
+static long
+do_nothing_cb(struct bpf_dynptr *dynptr, void *context)
+{
+ __sync_fetch_and_add(&read, 1);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prlimit64")
+int test_user_ringbuf_epoll(void *ctx)
+{
+ long num_samples;
+
+ if (!is_test_process())
+ return 0;
+
+ num_samples = bpf_user_ringbuf_drain(&user_ringbuf, do_nothing_cb, NULL, 0);
+ if (num_samples <= 0)
+ err = 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
new file mode 100644
index 000000000000..2b4fdca162be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/and.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("invalid and of negative number")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_and_of_negative_number(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= -4; \
+ r1 <<= 2; \
+ r0 += r1; \
+l0_%=: r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid range check")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_range_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r9 = 1; \
+ w1 %%= 2; \
+ w1 += 1; \
+ w9 &= w1; \
+ w9 += 1; \
+ w9 >>= 1; \
+ w3 = 1; \
+ w3 -= w9; \
+ w3 *= 0x10000000; \
+ r0 += r3; \
+ *(u32*)(r0 + 0) = r3; \
+l0_%=: r0 = r0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check known subreg with unknown reg")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w0 < 0x1 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */
+__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void known_subreg_with_unknown_reg(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ r0 += 1; \
+ r0 &= 0xFFFF1234; \
+ /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */\
+ if w0 < 1 goto l0_%=; \
+ r1 = *(u32*)(r1 + 512); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
new file mode 100644
index 000000000000..7f4827eede3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile int __arena *page1, *page2, *no_page, *page3;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page2 != page3)
+ return 9;
+ if (*page1 != 1)
+ return 10;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0)
+ return 5;
+ if (*page2 != 0)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
+struct bpf_arena___l {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+SEC("syscall")
+__success __retval(0) __log_level(2)
+int basic_alloc3(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ page += __PAGE_SIZE;
+
+ /* Reserve the second page */
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 2;
+
+ /* Try to explicitly allocate the reserved page. */
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 3;
+
+ /* Try to implicitly allocate the page (since there's only 2 of them). */
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 4;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if ((u64)page)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Reserve the same page twice, should return -EBUSY. */
+SEC("syscall")
+__success __retval(0)
+int reserve_twice(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret != -EBUSY)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Try to reserve past the end of the arena. */
+SEC("syscall")
+__success __retval(0)
+int reserve_invalid_region(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ /* Try a NULL pointer. */
+ ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+ if (ret != -EINVAL)
+ return 1;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 3);
+ if (ret != -EINVAL)
+ return 2;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 4096);
+ if (ret != -EINVAL)
+ return 3;
+
+ ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+ if (ret != -EINVAL)
+ return 4;
+#endif
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__success __log_level(2)
+int iter_maps1(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("expected pointer to STRUCT bpf_map")
+int iter_maps2(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+
+ bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("untrusted_ptr_bpf_map")
+int iter_maps3(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
new file mode 100644
index 000000000000..f19e15400b3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_SIZE (1ull << 32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_SIZE / PAGE_SIZE);
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *no_page, *page3;
+ void __arena *base;
+
+ page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2,
+ 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 1);
+ if (*page2 != 2)
+ return 6;
+ if (*page1 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page1 != page3)
+ return 9;
+ if (*page2 != 2)
+ return 10;
+ if (*(page1 + PAGE_SIZE) != 0)
+ return 11;
+ if (*(page1 - PAGE_SIZE) != 0)
+ return 12;
+ if (*(page2 + PAGE_SIZE) != 0)
+ return 13;
+ if (*(page2 - PAGE_SIZE) != 0)
+ return 14;
+#endif
+ return 0;
+}
+
+/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
+SEC("syscall")
+__success __retval(0)
+int access_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ const size_t len = 4;
+ int ret, i;
+
+ /* Get a separate region of the arena. */
+ page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base, len);
+ if (ret)
+ return 1;
+
+ /* Try to dirty reserved memory. */
+ for (i = 0; i < len && can_loop; i++)
+ *page = 0x5a;
+
+ for (i = 0; i < len && can_loop; i++) {
+ page = (volatile char __arena *)(base + i * PAGE_SIZE);
+
+ /*
+ * Error out in case either the write went through,
+ * or the address has random garbage.
+ */
+ if (*page == 0x5a)
+ return 2 + 2 * i;
+
+ if (*page)
+ return 2 + 2 * i + 1;
+ }
+#endif
+ return 0;
+}
+
+/* Try to allocate a region overlapping with a reservation. */
+SEC("syscall")
+__success __retval(0)
+int request_partially_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
+ if ((u64)page != 0ULL)
+ return 2;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int free_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *addr;
+ char __arena *page;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
+
+ page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
+ if (ret)
+ return 2;
+
+ /*
+ * Reserved and allocated pages should be interchangeable for
+ * bpf_arena_free_pages(). Free a reserved and an allocated
+ * page with a single call.
+ */
+ bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
+
+ /* The free call above should have succeeded, so this allocation should too. */
+ page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 3;
+#endif
+ return 0;
+}
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+#define PAGE_CNT 100
+__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
+__u8 __arena *base;
+
+/*
+ * Check that arena's range_tree algorithm allocates pages sequentially
+ * on the first pass and then fills in all gaps on the second pass.
+ */
+__noinline int alloc_pages(int page_cnt, int pages_atonce, bool first_pass,
+ int max_idx, int step)
+{
+ __u8 __arena *pg;
+ int i, pg_idx;
+
+ for (i = 0; i < page_cnt; i++) {
+ pg = bpf_arena_alloc_pages(&arena, NULL, pages_atonce,
+ NUMA_NO_NODE, 0);
+ if (!pg)
+ return step;
+ pg_idx = (unsigned long) (pg - base) / PAGE_SIZE;
+ if (first_pass) {
+ /* Pages must be allocated sequentially */
+ if (pg_idx != i)
+ return step + 100;
+ } else {
+ /* Allocator must fill into gaps */
+ if (pg_idx >= max_idx || (pg_idx & 1))
+ return step + 200;
+ }
+ *pg = pg_idx;
+ page[pg_idx] = pg;
+ cond_break;
+ }
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc2(void *ctx)
+{
+ __u8 __arena *pg;
+ int i, err;
+
+ base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!base)
+ return 1;
+ bpf_arena_free_pages(&arena, (void __arena *)base, 1);
+
+ err = alloc_pages(PAGE_CNT, 1, true, PAGE_CNT, 2);
+ if (err)
+ return err;
+
+ /* Clear all even pages */
+ for (i = 0; i < PAGE_CNT; i += 2) {
+ pg = page[i];
+ if (*pg != i)
+ return 3;
+ bpf_arena_free_pages(&arena, (void __arena *)pg, 1);
+ page[i] = NULL;
+ cond_break;
+ }
+
+ /* Allocate into freed gaps */
+ err = alloc_pages(PAGE_CNT / 2, 1, false, PAGE_CNT, 4);
+ if (err)
+ return err;
+
+ /* Free pairs of pages */
+ for (i = 0; i < PAGE_CNT; i += 4) {
+ pg = page[i];
+ if (*pg != i)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)pg, 2);
+ page[i] = NULL;
+ barrier();
+ page[i + 1] = NULL;
+ cond_break;
+ }
+
+ /* Allocate 2 pages at a time into freed gaps */
+ err = alloc_pages(PAGE_CNT / 4, 2, false, PAGE_CNT, 6);
+ if (err)
+ return err;
+
+ /* Check pages without freeing */
+ for (i = 0; i < PAGE_CNT; i += 2) {
+ pg = page[i];
+ if (*pg != i)
+ return 7;
+ cond_break;
+ }
+
+ pg = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+
+ if (!pg)
+ return 8;
+ /*
+ * The first PAGE_CNT pages are occupied. The new page
+ * must be above.
+ */
+ if ((pg - base) / PAGE_SIZE < PAGE_CNT)
+ return 9;
+ return 0;
+}
+#endif
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
new file mode 100644
index 000000000000..0a187ff725cc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -0,0 +1,731 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/array_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_RDONLY_PROG);
+} map_array_ro SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_WRONLY_PROG);
+} map_array_wo SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, struct test_val);
+} map_array_pcpu SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, struct test_val);
+} map_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("valid map access into an array with a constant")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void an_array_with_a_constant_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a register")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a signed variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_a_signed_variable(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if w1 s> 0xffffffff goto l1_%=; \
+ w1 = 0; \
+l1_%=: w2 = %[max_entries]; \
+ if r2 s> r1 goto l2_%=; \
+ w1 = 0; \
+l2_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a constant")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=8")
+__failure_unpriv
+__naked void an_array_with_a_constant_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + %[__imm_0]) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, (MAX_ENTRIES + 1) << 2),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a register")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a variable")
+__failure
+__msg("R0 unbounded memory access, make sure to bounds check any such access")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with no floor check")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_no_floor_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ w2 = %[max_entries]; \
+ if r2 s> r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("invalid access to map value, value_size=48 off=44 size=8")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ w2 = %[__imm_0]; \
+ if r2 > r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("R0 pointer += pointer")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r8 = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += r8; \
+ r0 = *(u32*)(r0 + %[test_val_foo]); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid read map access into a read-only array 1")
+__success __success_unpriv __retval(28)
+__naked void a_read_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid read map access into a read-only array 2")
+__success __retval(65507)
+__naked void a_read_only_array_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid write map access into a read-only array 1")
+__failure __msg("write into map forbidden")
+__failure_unpriv
+__naked void a_read_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid write map access into a read-only array 2")
+__failure __msg("write into map forbidden")
+__naked void a_read_only_array_2_2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid write map access into a write-only array 1")
+__success __success_unpriv __retval(1)
+__naked void a_write_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid write map access into a write-only array 2")
+__success __retval(0)
+__naked void a_write_only_array_2_1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid read map access into a write-only array 1")
+__failure __msg("read from map forbidden")
+__failure_unpriv
+__naked void a_write_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid read map access into a write-only array 2")
+__failure __msg("read from map forbidden")
+__naked void a_write_only_array_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int an_array_with_a_constant_no_nullness(void)
+{
+ /* Need 8-byte alignment for spill tracking */
+ __u32 __attribute__((aligned(8))) key = 1;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_array, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("valid multiple map access into an array using constant without nullness")
+__success __retval(8) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -16) = {{(0|r[0-9])}}")
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int multiple_array_with_a_constant_no_nullness(void)
+{
+ __u32 __attribute__((aligned(8))) key = 1;
+ __u32 __attribute__((aligned(8))) key2 = 0;
+ struct test_val *val, *val2;
+
+ val = bpf_map_lookup_elem(&map_array, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ val2 = bpf_map_lookup_elem(&map_array, &key2);
+ val2->index = offsetof(struct test_val, foo);
+
+ return val->index + val2->index;
+}
+
+SEC("socket")
+__description("valid map access into an array using natural aligned 32-bit constant 0 without nullness")
+__success __retval(4)
+unsigned int an_array_with_a_32bit_constant_0_no_nullness(void)
+{
+ /* Unlike the above tests, 32-bit zeroing is precisely tracked even
+ * if writes are not aligned to BPF_REG_SIZE. This tests that our
+ * STACK_ZERO handling functions.
+ */
+ struct test_val *val;
+ __u32 key = 0;
+
+ val = bpf_map_lookup_elem(&map_array, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("valid map access into a pcpu array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int a_pcpu_array_with_a_constant_no_nullness(void)
+{
+ __u32 __attribute__((aligned(8))) key = 1;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_array_pcpu, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant without nullness")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_no_nullness_out_of_bounds(void)
+{
+ /* Out of bounds */
+ __u32 __attribute__((aligned(8))) key = 3;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_array, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant smaller than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_small(void)
+{
+ __u32 __attribute__((aligned(8))) key;
+ struct test_val *val;
+
+ /* Mark entire key as STACK_MISC */
+ bpf_probe_read_user(&key, sizeof(key), NULL);
+
+ /* Spilling only the bottom byte results in a tnum const of 1.
+ * We want to check that the verifier rejects it, as the spill is < 4B.
+ */
+ *(__u8 *)&key = 1;
+ val = bpf_map_lookup_elem(&map_array, &key);
+
+ /* Should fail, as verifier cannot prove in-bound lookup */
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant larger than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_big(void)
+{
+ struct test_val *val;
+ __u64 key = 1;
+
+ /* Even if the constant value is < max_entries, if the spill size is
+ * larger than the key size, the set bits may not be where we expect them
+ * to be on different endian architectures.
+ */
+ val = bpf_map_lookup_elem(&map_array, &key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("invalid elided lookup using const and non-const key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int mixed_const_and_non_const_key_lookup(void)
+{
+ __u32 __attribute__((aligned(8))) key;
+ struct test_val *val;
+ __u32 rand;
+
+ rand = bpf_get_prandom_u32();
+ key = rand > 42 ? 1 : rand;
+ val = bpf_map_lookup_elem(&map_array, &key);
+
+ return val->index;
+}
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=4096 size=4")
+__naked void key_lookup_at_invalid_fp(void)
+{
+ asm volatile (" \
+ r1 = %[map_array] ll; \
+ r2 = r10; \
+ r2 += 4096; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = *(u64*)(r0 + 0); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+volatile __u32 __attribute__((aligned(8))) global_key;
+
+SEC("socket")
+__description("invalid elided lookup using non-stack key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int non_stack_key_lookup(void)
+{
+ struct test_val *val;
+
+ global_key = 1;
+ val = bpf_map_lookup_elem(&map_array, (void *)&global_key);
+ val->index = offsetof(struct test_val, foo);
+
+ return val->index;
+}
+
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+ __u64 key = 0xFFFFFFFFFFFFFFFF;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_hash_48b, &key);
+ if (val)
+ return val->index;
+
+ return 42;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
new file mode 100644
index 000000000000..7efa9521105e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Timer tests */
+
+struct timer_elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct timer_elem);
+} timer_map SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ u32 data;
+ /* Timer callbacks are never sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_non_sleepable_prog(void *ctx)
+{
+ struct timer_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&timer_map, &key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->t, &timer_map, 0);
+ bpf_timer_set_callback(&val->t, timer_cb);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_sleepable_prog(void *ctx)
+{
+ struct timer_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&timer_map, &key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->t, &timer_map, 0);
+ bpf_timer_set_callback(&val->t, timer_cb);
+ return 0;
+}
+
+/* Workqueue tests */
+
+struct wq_elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct wq_elem);
+} wq_map SEC(".maps");
+
+static int wq_cb(void *map, int *key, void *value)
+{
+ u32 data;
+ /* Workqueue callbacks are always sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int wq_non_sleepable_prog(void *ctx)
+{
+ struct wq_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&wq_map, &key);
+ if (!val)
+ return 0;
+
+ if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+ return 0;
+ if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ return 0;
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int wq_sleepable_prog(void *ctx)
+{
+ struct wq_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&wq_map, &key);
+ if (!val)
+ return 0;
+
+ if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+ return 0;
+ if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ return 0;
+ return 0;
+}
+
+/* Task work tests */
+
+struct task_work_elem {
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct task_work_elem);
+} task_work_map SEC(".maps");
+
+static int task_work_cb(struct bpf_map *map, void *key, void *value)
+{
+ u32 data;
+ /* Task work callbacks are always sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int task_work_non_sleepable_prog(void *ctx)
+{
+ struct task_work_elem *val;
+ struct task_struct *task;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&task_work_map, &key);
+ if (!val)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int task_work_sleepable_prog(void *ctx)
+{
+ struct task_work_elem *val;
+ struct task_struct *task;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&task_work_map, &key);
+ if (!val)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
new file mode 100644
index 000000000000..fb62e09f2114
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/basic_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("stack out of bounds")
+__failure __msg("invalid write to stack")
+__failure_unpriv
+__naked void stack_out_of_bounds(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 + 8) = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack1")
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid read from stack")
+__naked void uninitialized_stack1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack2")
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid read from stack")
+__naked void uninitialized_stack2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("invalid fp arithmetic")
+__failure __msg("R1 subtraction from stack pointer")
+__failure_unpriv
+__naked void invalid_fp_arithmetic(void)
+{
+ /* If this gets ever changed, make sure JITs can deal with it. */
+ asm volatile (" \
+ r0 = 0; \
+ r1 = r10; \
+ r1 -= 8; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("non-invalid fp arithmetic")
+__success __success_unpriv __retval(0)
+__naked void non_invalid_fp_arithmetic(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned read from stack")
+__failure __msg("misaligned stack access")
+__failure_unpriv
+__naked void misaligned_read_from_stack(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 4); \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
new file mode 100644
index 000000000000..623f130a3198
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+__description("single CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(3)
+int single_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+ return BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+}
+
+SEC("tc")
+__description("multiple CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x3FD)
+int multiple_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub2;
+ int8_t sb4;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1);
+
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+ sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4);
+
+ return (((uint8_t)sb4) << 2) | ub2;
+}
+
+SEC("tc")
+__description("adjacent CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(7)
+int adjacent_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub1, ub2;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+
+ return (ub2 << 1) | ub1;
+}
+
+SEC("tc")
+__description("multibyte CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x21)
+int multibyte_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint32_t ub7;
+ uint8_t ub1;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7);
+
+ return (ub7 << 1) | ub1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
new file mode 100644
index 000000000000..8bcddadfc4da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
+ u32 nr_bits) __ksym __weak;
+int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
+void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
+
+u64 bits_array[511] = {};
+
+SEC("iter.s/cgroup")
+__description("bits iter without destroy")
+__failure __msg("Unreleased reference")
+int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits it;
+ u64 data = 1;
+
+ bpf_iter_bits_new(&it, &data, 1);
+ bpf_iter_bits_next(&it);
+ return 0;
+}
+
+SEC("iter/cgroup")
+__description("uninitialized iter in ->next()")
+__failure __msg("expected an initialized iter_bits as arg #0")
+int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits it = {};
+
+ bpf_iter_bits_next(&it);
+ return 0;
+}
+
+SEC("iter/cgroup")
+__description("uninitialized iter in ->destroy()")
+__failure __msg("expected an initialized iter_bits as arg #0")
+int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits it = {};
+
+ bpf_iter_bits_destroy(&it);
+ return 0;
+}
+
+SEC("syscall")
+__description("null pointer")
+__success __retval(0)
+int null_pointer(void)
+{
+ struct bpf_iter_bits iter;
+ int err, nr = 0;
+ int *bit;
+
+ err = bpf_iter_bits_new(&iter, NULL, 1);
+ bpf_iter_bits_destroy(&iter);
+ if (err != -EINVAL)
+ return 1;
+
+ bpf_for_each(bits, bit, NULL, 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bits copy")
+__success __retval(10)
+int bits_copy(void)
+{
+ u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data, 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bits memalloc")
+__success __retval(64)
+int bits_memalloc(void)
+{
+ u64 data[2];
+ int nr = 0;
+ int *bit;
+
+ __builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */
+ bpf_for_each(bits, bit, &data[0], ARRAY_SIZE(data))
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bit index")
+__success __retval(8)
+int bit_index(void)
+{
+ u64 data = 0x100;
+ int bit_idx = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data, 1) {
+ if (*bit == 0)
+ continue;
+ bit_idx = *bit;
+ }
+ return bit_idx;
+}
+
+SEC("syscall")
+__description("bits too big")
+__success __retval(0)
+int bits_too_big(void)
+{
+ u64 data[4];
+ int nr = 0;
+ int *bit;
+
+ __builtin_memset(&data, 0xff, sizeof(data));
+ bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("fewer words")
+__success __retval(1)
+int fewer_words(void)
+{
+ u64 data[2] = {0x1, 0xff};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("zero words")
+__success __retval(0)
+int zero_words(void)
+{
+ u64 data[2] = {0x1, 0xff};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 0)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("huge words")
+__success __retval(0)
+int huge_words(void)
+{
+ u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 67108865)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("max words")
+__success __retval(4)
+int max_words(void)
+{
+ volatile int nr = 0;
+ int *bit;
+
+ bits_array[0] = (1ULL << 63) | 1U;
+ bits_array[510] = (1ULL << 33) | (1ULL << 32);
+
+ bpf_for_each(bits, bit, bits_array, 511) {
+ if (nr == 0 && *bit != 0)
+ break;
+ if (nr == 2 && *bit != 32672)
+ break;
+ nr++;
+ }
+ return nr;
+}
+
+SEC("syscall")
+__description("bad words")
+__success __retval(0)
+int bad_words(void)
+{
+ void *bad_addr = (void *)-4095;
+ struct bpf_iter_bits iter;
+ volatile int nr;
+ int *bit;
+ int err;
+
+ err = bpf_iter_bits_new(&iter, bad_addr, 1);
+ bpf_iter_bits_destroy(&iter);
+ if (err != -EFAULT)
+ return 1;
+
+ nr = 0;
+ bpf_for_each(bits, bit, bad_addr, 1)
+ nr++;
+ if (nr != 0)
+ return 2;
+
+ err = bpf_iter_bits_new(&iter, bad_addr, 4);
+ bpf_iter_bits_destroy(&iter);
+ if (err != -EFAULT)
+ return 3;
+
+ nr = 0;
+ bpf_for_each(bits, bit, bad_addr, 4)
+ nr++;
+ if (nr != 0)
+ return 4;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
new file mode 100644
index 000000000000..411a18437d7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -0,0 +1,1866 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds.c */
+
+#include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("subtraction bounds (map value) variant 1")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__naked void bounds_map_value_variant_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ if r1 > 0xff goto l0_%=; \
+ r3 = *(u8*)(r0 + 1); \
+ if r3 > 0xff goto l0_%=; \
+ r1 -= r3; \
+ r1 >>= 56; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("subtraction bounds (map value) variant 2")
+__failure
+__msg("R0 min value is negative, either use unsigned index or do a if (index >=0) check.")
+__msg_unpriv("R1 has unknown scalar with mixed signed bounds")
+__naked void bounds_map_value_variant_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ if r1 > 0xff goto l0_%=; \
+ r3 = *(u8*)(r0 + 1); \
+ if r3 > 0xff goto l0_%=; \
+ r1 -= r3; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check subtraction on pointers for unpriv")
+__success __failure_unpriv __msg_unpriv("R9 pointer -= pointer prohibited")
+__retval(0)
+__naked void subtraction_on_pointers_for_unpriv(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 9; \
+ *(u64*)(r2 + 0) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r10; \
+ r9 -= r0; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64*)(r0 + 0) = r9; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on zero-extended MOV")
+__success __success_unpriv __retval(0)
+__naked void based_on_zero_extended_mov(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0x0000'0000'ffff'ffff */ \
+ w2 = 0xffffffff; \
+ /* r2 = 0 */ \
+ r2 >>= 32; \
+ /* no-op */ \
+ r0 += r2; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on sign-extended MOV. test1")
+__failure __msg("map_value pointer and 4294967295")
+__failure_unpriv
+__naked void on_sign_extended_mov_test1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0xffff'ffff'ffff'ffff */ \
+ r2 = 0xffffffff; \
+ /* r2 = 0xffff'ffff */ \
+ r2 >>= 32; \
+ /* r0 = <oob pointer> */ \
+ r0 += r2; \
+ /* access to OOB pointer */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on sign-extended MOV. test2")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void on_sign_extended_mov_test2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0xffff'ffff'ffff'ffff */ \
+ r2 = 0xffffffff; \
+ /* r2 = 0xfff'ffff */ \
+ r2 >>= 36; \
+ /* r0 = <oob pointer> */ \
+ r0 += r2; \
+ /* access to OOB pointer */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check based on reg_off + var_off + insn_off. test1")
+__failure __msg("value_size=8 off=1073741825")
+__naked void var_off_insn_off_test1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r6 &= 1; \
+ r6 += %[__imm_0]; \
+ r0 += r6; \
+ r0 += %[__imm_0]; \
+l0_%=: r0 = *(u8*)(r0 + 3); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check based on reg_off + var_off + insn_off. test2")
+__failure __msg("value 1073741823")
+__naked void var_off_insn_off_test2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r6 &= 1; \
+ r6 += %[__imm_0]; \
+ r0 += r6; \
+ r0 += %[__imm_1]; \
+l0_%=: r0 = *(u8*)(r0 + 3); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, (1 << 30) - 1),
+ __imm_const(__imm_1, (1 << 29) - 1),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of non-boundary-crossing range")
+__success __success_unpriv __retval(0)
+__naked void of_non_boundary_crossing_range(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r2 = 1; \
+ /* r2 = 0x10'0000'0000 */ \
+ r2 <<= 36; \
+ /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ \
+ r1 += r2; \
+ /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ \
+ r1 += 0x7fffffff; \
+ /* r1 = [0x00, 0xff] */ \
+ w1 -= 0x7fffffff; \
+ /* r1 = 0 */ \
+ r1 >>= 8; \
+ /* no-op */ \
+ r0 += r1; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of boundary-crossing range (1)")
+__failure
+/* not actually fully unbounded, but the bound is very high */
+__msg("value -4294967168 makes map_value pointer be out of bounds")
+__failure_unpriv
+__naked void of_boundary_crossing_range_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */ \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or \
+ * [0x0000'0000, 0x0000'007f] \
+ */ \
+ w1 += 0; \
+ r1 -= %[__imm_0]; \
+ /* r1 = [0x00, 0xff] or \
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]\
+ */ \
+ r1 -= %[__imm_0]; \
+ /* error on OOB pointer computation */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 0xffffff80 >> 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of boundary-crossing range (2)")
+__failure __msg("value -4294967168 makes map_value pointer be out of bounds")
+__failure_unpriv
+__naked void of_boundary_crossing_range_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */ \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or \
+ * [0x0000'0000, 0x0000'007f] \
+ * difference to previous test: truncation via MOV32\
+ * instead of ALU32. \
+ */ \
+ w1 = w1; \
+ r1 -= %[__imm_0]; \
+ /* r1 = [0x00, 0xff] or \
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]\
+ */ \
+ r1 -= %[__imm_0]; \
+ /* error on OOB pointer computation */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 0xffffff80 >> 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after wrapping 32-bit addition")
+__success __success_unpriv __retval(0)
+__naked void after_wrapping_32_bit_addition(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = 0x7fff'ffff */ \
+ r1 = 0x7fffffff; \
+ /* r1 = 0xffff'fffe */ \
+ r1 += 0x7fffffff; \
+ /* r1 = 0 */ \
+ w1 += 2; \
+ /* no-op */ \
+ r0 += r1; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after shift with oversized count operand")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__naked void shift_with_oversized_count_operand(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = 32; \
+ r1 = 1; \
+ /* r1 = (u32)1 << (u32)32 = ? */ \
+ w1 <<= w2; \
+ /* r1 = [0x0000, 0xffff] */ \
+ r1 &= 0xffff; \
+ /* computes unknown pointer, potentially OOB */ \
+ r0 += r1; \
+ /* potentially OOB access */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after right shift of maybe-negative number")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv
+__naked void shift_of_maybe_negative_number(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* r1 = [-0x01, 0xfe] */ \
+ r1 -= 1; \
+ /* r1 = 0 or 0xff'ffff'ffff'ffff */ \
+ r1 >>= 8; \
+ /* r1 = 0 or 0xffff'ffff'ffff */ \
+ r1 >>= 8; \
+ /* computes unknown pointer, potentially OOB */ \
+ r0 += r1; \
+ /* potentially OOB access */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after 32-bit right shift with 64-bit input")
+__failure __msg("math between map_value pointer and 4294967294 is not allowed")
+__failure_unpriv
+__naked void shift_with_64_bit_input(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 2; \
+ /* r1 = 1<<32 */ \
+ r1 <<= 31; \
+ /* r1 = 0 (NOT 2!) */ \
+ w1 >>= 31; \
+ /* r1 = 0xffff'fffe (NOT 0!) */ \
+ w1 -= 2; \
+ /* error on computing OOB pointer */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test1")
+__failure __msg("map_value pointer and 2147483646")
+__failure_unpriv
+__naked void size_signed_32bit_overflow_test1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 0x7ffffffe; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test2")
+__failure __msg("pointer offset 1073741822")
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void size_signed_32bit_overflow_test2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 0x1fffffff; \
+ r0 += 0x1fffffff; \
+ r0 += 0x1fffffff; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test3")
+__failure __msg("pointer offset -1073741822")
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void size_signed_32bit_overflow_test3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 -= 0x1fffffff; \
+ r0 -= 0x1fffffff; \
+ r0 = *(u64*)(r0 + 2); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test4")
+__failure __msg("map_value pointer and 1000000000000")
+__failure_unpriv
+__naked void size_signed_32bit_overflow_test4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 1000000; \
+ r1 *= 1000000; \
+ r0 += r1; \
+ r0 = *(u64*)(r0 + 2); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check mixed 32bit and 64bit arithmetic. test1")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
+__naked void _32bit_and_64bit_arithmetic_test1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = -1; \
+ r1 <<= 32; \
+ r1 += 1; \
+ /* r1 = 0xffffFFFF00000001 */ \
+ if w1 > 1 goto l0_%=; \
+ /* check ALU64 op keeps 32bit bounds */ \
+ r1 += 1; \
+ if w1 > 2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: /* invalid ldx if bounds are lost above */ \
+ r0 = *(u64*)(r0 - 1); \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check mixed 32bit and 64bit arithmetic. test2")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
+__naked void _32bit_and_64bit_arithmetic_test2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = -1; \
+ r1 <<= 32; \
+ r1 += 1; \
+ /* r1 = 0xffffFFFF00000001 */ \
+ r2 = 3; \
+ /* r1 = 0x2 */ \
+ w1 += 1; \
+ /* check ALU32 op zero extends 64bit bounds */ \
+ if r1 > r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: /* invalid ldx if bounds are lost above */ \
+ r0 = *(u64*)(r0 - 1); \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("assigning 32bit bounds to 64bit for wA = 0, wB = wA")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void for_wa_0_wb_wa(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ w9 = 0; \
+ w2 = w9; \
+ r6 = r7; \
+ r6 += r2; \
+ r3 = r6; \
+ r3 += 8; \
+ if r3 > r8 goto l0_%=; \
+ r5 = *(u32*)(r6 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = 0, reg xor 1")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void reg_0_reg_xor_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 0; \
+ r1 ^= 1; \
+ if r1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 = 0, reg32 xor 1")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void reg32_0_reg32_xor_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: w1 = 0; \
+ w1 ^= 1; \
+ if w1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = 2, reg xor 3")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 > 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void reg_2_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 2; \
+ r1 ^= 3; \
+ if r1 > 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = any, reg xor 3")
+__failure __msg("invalid access to map value")
+__msg_unpriv("invalid access to map value")
+__naked void reg_any_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ r1 ^= 3; \
+ if r1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 = any, reg32 xor 3")
+__failure __msg("invalid access to map value")
+__msg_unpriv("invalid access to map value")
+__naked void reg32_any_reg32_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ w1 ^= 3; \
+ if w1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg > 0, reg xor 3")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void reg_0_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ if r1 <= 0 goto l1_%=; \
+ r1 ^= 3; \
+ if r1 >= 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 > 0, reg32 xor 3")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
+__naked void reg32_0_reg32_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ if w1 <= 0 goto l1_%=; \
+ w1 ^= 3; \
+ if w1 >= 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const xor src dst")
+__success __log_level(2)
+__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_xor_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 ^= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const or src dst")
+__success __log_level(2)
+__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_or_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const mul regs")
+__success __log_level(2)
+__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__naked void non_const_mul_regs(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xff; \
+ r0 &= 0x0f; \
+ r0 *= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks after 32-bit truncation. test 1")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void _32_bit_truncation_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ /* This used to reduce the max bound to 0x7fffffff */\
+ if r1 == 0 goto l1_%=; \
+ if r1 > 0x7fffffff goto l0_%=; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks after 32-bit truncation. test 2")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void _32_bit_truncation_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 s< 1 goto l1_%=; \
+ if w1 s< 0 goto l0_%=; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP_JLT for crossing 64-bit signed boundary")
+__success __retval(0)
+__naked void crossing_64_bit_signed_boundary_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x7fffffffffffff10 ll; \
+ r1 += r0; \
+ r0 = 0x8000000000000000 ll; \
+l1_%=: r0 += 1; \
+ /* r1 unsigned range is [0x7fffffffffffff10, 0x800000000000000f] */\
+ if r0 < r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
+__success __retval(0)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void crossing_64_bit_signed_boundary_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x7fffffffffffff10 ll; \
+ r1 += r0; \
+ r2 = 0x8000000000000fff ll; \
+ r0 = 0x8000000000000000 ll; \
+l1_%=: r0 += 1; \
+ if r0 s> r2 goto l0_%=; \
+ /* r1 signed range is [S64_MIN, S64_MAX] */ \
+ if r0 s< r1 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check for loop upper bound greater than U32_MAX")
+__success __retval(0)
+__naked void bound_greater_than_u32_max(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x100000000 ll; \
+ r1 += r0; \
+ r0 = 0x100000000 ll; \
+l1_%=: r0 += 1; \
+ if r0 < r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP32_JLT for crossing 32-bit signed boundary")
+__success __retval(0)
+__naked void crossing_32_bit_signed_boundary_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ w0 = 0x7fffff10; \
+ w1 += w0; \
+ w0 = 0x80000000; \
+l1_%=: w0 += 1; \
+ /* r1 unsigned range is [0, 0x8000000f] */ \
+ if w0 < w1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
+__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__naked void crossing_32_bit_signed_boundary_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ w0 = 0x7fffff10; \
+ w1 += w0; \
+ w2 = 0x80000fff; \
+ w0 = 0x80000000; \
+l1_%=: w0 += 1; \
+ if w0 s> w2 goto l0_%=; \
+ /* r1 signed range is [S32_MIN, S32_MAX] */ \
+ if w0 s< w1 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check with JMP_NE for reg edge")
+__success __retval(0)
+__naked void reg_not_equal_const(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r4 = r0; \
+ r4 &= 7; \
+ if r4 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = r6; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -8; \
+ r5 = 0; \
+ /* The 4th argument of bpf_skb_store_bytes is defined as \
+ * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \
+ * is providing us this exclusion of zero from initial \
+ * [0, 7] range. \
+ */ \
+ call %[bpf_skb_store_bytes]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check with JMP_EQ for reg edge")
+__success __retval(0)
+__naked void reg_equal_const(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r4 = r0; \
+ r4 &= 7; \
+ if r4 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -8; \
+ r5 = 0; \
+ /* Just the same as what we do in reg_not_equal_const() */ \
+ call %[bpf_skb_store_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiply mixed sign bounds. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__naked void mult_mixed0_sign(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= 0xf;"
+ "r6 -= 1000000000;"
+ "r7 &= 0xf;"
+ "r7 -= 2000000000;"
+ "r6 *= r7;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiply mixed sign bounds. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)")
+__naked void mult_mixed1_sign(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= 0xf;"
+ "r6 -= 0xa;"
+ "r7 &= 0xf;"
+ "r7 -= 0x14;"
+ "r6 *= r7;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiply negative bounds")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__naked void mult_sign_bounds(void)
+{
+ asm volatile (
+ "r8 = 0x7fff;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= 0xa;"
+ "r6 -= r8;"
+ "r7 &= 0xf;"
+ "r7 -= r8;"
+ "r6 *= r7;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiply bounds that don't cross signed boundary")
+__success __log_level(2)
+__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__naked void mult_no_sign_crossing(void)
+{
+ asm volatile (
+ "r6 = 0xb;"
+ "r8 = 0xb3c3f8c99262687 ll;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= r7;"
+ "r8 *= r6;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
+__naked void mult_unsign_ovf(void)
+{
+ asm volatile (
+ "r8 = 0x7ffffffffff ll;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= 0x7fffffff;"
+ "r7 &= r8;"
+ "r6 *= r7;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
+__naked void mult_sign_ovf(void)
+{
+ asm volatile (
+ "r8 = 0x7ffffffff ll;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r6 &= 0xa;"
+ "r6 -= r8;"
+ "r7 &= 0x7fffffff;"
+ "r6 *= r7;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, all outcomes overflow")
+__success __log_level(2)
+__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__retval(0)
+__naked void add64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 0xa000000000000000 ll;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()")
+__retval(0)
+__naked void add64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 2;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition overflow, all outcomes overflow")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 0xa0000000;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 2;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, all outcomes underflow")
+__success __log_level(2)
+__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)")
+__retval(0)
+__naked void sub64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 0x8000000000000000 ll;"
+ "r1 |= r2;"
+ "r3 = 0;"
+ "r3 -= r1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
+__retval(0)
+__naked void sub64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r3 = r0;"
+ "r2 = 1;"
+ "r3 -= r2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction overflow, all outcomes underflow")
+__success __log_level(2)
+__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w1 = w0;"
+ "w2 = 0x80000000;"
+ "w1 |= w2;"
+ "w3 = 0;"
+ "w3 -= w1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w3 = w0;"
+ "w2 = 1;"
+ "w3 -= w2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead branch on jset, does not result in invariants violation error")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_range_analysis(void)
+{
+ asm volatile (" \
+ call %[bpf_get_netns_cookie]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 & 0xffffffff goto +0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the negative side. At instruction 7, the ranges look as follows:
+ *
+ * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX
+ * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxxxx] [xxxxxxxxxxxx|
+ * 0 smax=0xeffffeee smin=-655 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX
+ * | [xxx] |
+ * |----------------------------|------------------------------|
+ * | [xxx] |
+ * 0 s64=[-655;-146] -1
+ *
+ * Without the deduction cross sign boundary, we end up with an invariant
+ * violation error.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, negative overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))")
+__retval(0)
+__naked void bounds_deduct_negative_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w3 = w0; \
+ w6 = (s8)w0; \
+ r0 = (s8)r0; \
+ if w6 >= 0xf0000000 goto l0_%=; \
+ r0 += r6; \
+ r6 += 400; \
+ r0 -= r6; \
+ if r3 < r0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the positive side. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff00 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0;127] U64_MAX
+ * [xxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * [xxxxxxxx] |
+ * 0 s64=[0;127] -1
+ *
+ * Without the deduction cross sign boundary, the program is rejected due to
+ * the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, positive overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__retval(0)
+__naked void bounds_deduct_positive_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff00; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test is the same as above, but the s64 and u64 ranges overlap in two
+ * places. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff80 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and
+ * the program should fail due to the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, two overlaps")
+__failure __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("frame pointer is read only")
+__naked void bounds_deduct_two_overlaps(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff80; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jne branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jne_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 != r0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jeq branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jeq_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 == r0 goto +1; \
+ exit; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("conditional jump on same register, branch taken")
+__not_msg("20: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void condition_jump_on_same_register(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w8 = 0x80000000; \
+ r0 &= r8; \
+ if r0 == r0 goto +1; \
+ goto l1_%=; \
+ if r0 >= r0 goto +1; \
+ goto l1_%=; \
+ if r0 s>= r0 goto +1; \
+ goto l1_%=; \
+ if r0 <= r0 goto +1; \
+ goto l1_%=; \
+ if r0 s<= r0 goto +1; \
+ goto l1_%=; \
+ if r0 != r0 goto l1_%=; \
+ if r0 > r0 goto l1_%=; \
+ if r0 s> r0 goto l1_%=; \
+ if r0 < r0 goto l1_%=; \
+ if r0 s< r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, constant value branch taken")
+__not_msg("7: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_1(void *ctx)
+{
+ asm volatile(" \
+ r0 = 0; \
+ if r0 & r0 goto l1_%=; \
+ r0 = 1; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value branch taken")
+__not_msg("12: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_2(void *ctx)
+{
+ asm volatile(" \
+ /* range [1;2] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 += 1; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+ /* range [-2;-1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 -= 2; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 1")
+__msg("3: (b7) r0 = 0 {{.*}} R0=0")
+__msg("5: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_3(void *ctx)
+{
+ asm volatile(" \
+ /* range [0;1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 2")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_4(void *ctx)
+{
+ asm volatile(" \
+ /* range [-1;0] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 -= 1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 3")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_5(void *ctx)
+{
+ asm volatile(" \
+ /* range [-1;1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x2; \
+ r0 -= 1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
new file mode 100644
index 000000000000..260a6df264e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_deduction.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from const, 1")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 2")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(1)
+__naked void deducing_bounds_from_const_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s<= 1 goto l1_%=; \
+ exit; \
+l1_%=: r1 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 3")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 4")
+__success __failure_unpriv
+__msg_unpriv("R6 has pointer with unsupported alu operation")
+__retval(0)
+__naked void deducing_bounds_from_const_4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s>= 0 goto l1_%=; \
+ exit; \
+l1_%=: r6 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 5")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_5(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 1 goto l0_%=; \
+ r0 -= r1; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 6")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_6(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 7")
+__failure __msg("dereference of modified ctx ptr")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_7(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r1 -= r0; \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 8")
+__failure __msg("negative offset ctx ptr R1 off=-1 disallowed")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_8(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+ r1 += r0; \
+l0_%=: r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 9")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_9(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 10")
+__failure
+__msg("math between ctx pointer and register with unbounded min value is not allowed")
+__failure_unpriv
+__naked void deducing_bounds_from_const_10(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: /* Marks r0 as unknown. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 -= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
new file mode 100644
index 000000000000..823f727cf210
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_6(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 5; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 3 goto l0_%=; \
+ w2 = 4; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r2 = 0; \
+ if r2 > r0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 > r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 >= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 < r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 >= 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 <= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_14(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 == r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s> r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_16(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 s>= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_17(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s< r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_18(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 5; \
+ if r2 s<= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_19(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 != r0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_20(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ w2 = 0; \
+ if w2 > w0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_21(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 > w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_22(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 >= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_23(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 < w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_24(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 >= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 <= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_25(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 == w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_26(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s> w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_27(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 s>= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_28(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s> 4 goto l0_%=; \
+ w2 = 5; \
+ if w2 s< w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_29(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s>= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s<= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_30(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w2 != w0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
new file mode 100644
index 000000000000..4f40144748a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, positive bounds")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_positive_bounds(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ if r1 s> 4 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void checks_mixing_signed_and_unsigned(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 2")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = 0; \
+ r8 += r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 3")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 4")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 1; \
+ r1 &= r2; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 5")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += 4; \
+ r0 -= r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 6")
+__failure __msg("R4 min value is negative, either use unsigned")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_6(void)
+{
+ asm volatile (" \
+ r9 = r1; \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r9; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -512; \
+ r4 = *(u64*)(r10 - 16); \
+ r6 = -1; \
+ if r4 > r6 goto l0_%=; \
+ if r4 s> 1 goto l0_%=; \
+ r4 += 1; \
+ r5 = 0; \
+ r6 = 0; \
+ *(u16*)(r10 - 512) = r6; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 7")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = %[__imm_0]; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 1024 * 1024 * 1024)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 8")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 9")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -9223372036854775808ULL ll; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 10")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 11")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 >= r1 goto l1_%=; \
+ /* Dead branch. */ \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 12")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 13")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ r7 = 1; \
+ if r7 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r7 += r1; \
+ if r7 s> 4 goto l2_%=; \
+ r0 += r7; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 14")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_14(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ r8 = 2; \
+ if r9 == 42 goto l1_%=; \
+ if r8 s> r1 goto l2_%=; \
+l3_%=: if r1 s> 1 goto l2_%=; \
+ r0 += r1; \
+l0_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+l1_%=: if r1 > r2 goto l2_%=; \
+ goto l3_%=; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 15")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 += r1; \
+ if r0 > 1 goto l2_%=; \
+ r0 = 0; \
+ exit; \
+l2_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
new file mode 100644
index 000000000000..fb4fa465d67c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 8")
+__xlated("4: r5 = 5")
+__xlated("5: r0 = ")
+__xlated("6: r0 = &(void __percpu *)(r0)")
+__xlated("7: r0 = *(u32 *)(r0 +0)")
+__xlated("8: exit")
+__success
+__naked void simple(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "r3 = 3;"
+ "r4 = 4;"
+ "r5 = 5;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "*(u64 *)(r10 - 24) = r2;"
+ "*(u64 *)(r10 - 32) = r3;"
+ "*(u64 *)(r10 - 40) = r4;"
+ "*(u64 *)(r10 - 48) = r5;"
+ "call %[bpf_get_smp_processor_id];"
+ "r5 = *(u64 *)(r10 - 48);"
+ "r4 = *(u64 *)(r10 - 40);"
+ "r3 = *(u64 *)(r10 - 32);"
+ "r2 = *(u64 *)(r10 - 24);"
+ "r1 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+/* The logic for detecting and verifying bpf_fastcall pattern is the same for
+ * any arch, however x86 differs from arm64 or riscv64 in a way
+ * bpf_get_smp_processor_id is rewritten:
+ * - on x86 it is done by verifier
+ * - on arm64 and riscv64 it is done by jit
+ *
+ * Which leads to different xlated patterns for different archs:
+ * - on x86 the call is expanded as 3 instructions
+ * - on arm64 and riscv64 the call remains as is
+ * (but spills/fills are still removed)
+ *
+ * It is really desirable to check instruction indexes in the xlated
+ * patterns, so add this canary test to check that function rewrite by
+ * jit is correctly processed by bpf_fastcall logic, keep the rest of the
+ * tests as x86.
+ */
+SEC("raw_tp")
+__arch_arm64
+__arch_riscv64
+__xlated("0: r1 = 1")
+__xlated("1: call bpf_get_smp_processor_id")
+__xlated("2: exit")
+__success
+__naked void canary_arm64_riscv64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("3: exit")
+__success
+__naked void canary_zero_spills(void)
+{
+ asm volatile (
+ "call %[bpf_get_smp_processor_id];"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 16")
+__xlated("1: *(u64 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r2 = *(u64 *)(r10 -16)")
+__success
+__naked void wrong_reg_in_pattern1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r2 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -16) = r6")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r6 = *(u64 *)(r10 -16)")
+__success
+__naked void wrong_reg_in_pattern2(void)
+{
+ asm volatile (
+ "r6 = 1;"
+ "*(u64 *)(r10 - 16) = r6;"
+ "call %[bpf_get_smp_processor_id];"
+ "r6 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -16) = r0")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r0 = *(u64 *)(r10 -16)")
+__success
+__naked void wrong_reg_in_pattern3(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "call %[bpf_get_smp_processor_id];"
+ "r0 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("2: *(u64 *)(r2 -16) = r1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("6: r1 = *(u64 *)(r10 -16)")
+__success
+__naked void wrong_base_in_pattern(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = r10;"
+ "*(u64 *)(r2 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r2 = 1")
+__success
+__naked void wrong_insn_in_pattern(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r2 = 1;"
+ "r1 = *(u64 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("2: *(u64 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("6: r1 = *(u64 *)(r10 -8)")
+__success
+__naked void wrong_off_in_pattern1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u32 *)(r10 -4) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u32 *)(r10 -4)")
+__success
+__naked void wrong_off_in_pattern2(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u32 *)(r10 - 4) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u32 *)(r10 - 4);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u32 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u32 *)(r10 -16)")
+__success
+__naked void wrong_size_in_pattern(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u32 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u32 *)(r10 - 16);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("2: *(u32 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("6: r1 = *(u32 *)(r10 -8)")
+__success
+__naked void partial_pattern(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "*(u32 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r2;"
+ "call %[bpf_get_smp_processor_id];"
+ "r2 = *(u64 *)(r10 - 16);"
+ "r1 = *(u32 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("0: r1 = 1")
+__xlated("1: r2 = 2")
+/* not patched, spills for -8, -16 not removed */
+__xlated("2: *(u64 *)(r10 -8) = r1")
+__xlated("3: *(u64 *)(r10 -16) = r2")
+__xlated("...")
+__xlated("5: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("7: r2 = *(u64 *)(r10 -16)")
+__xlated("8: r1 = *(u64 *)(r10 -8)")
+/* patched, spills for -24, -32 removed */
+__xlated("...")
+__xlated("10: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("12: exit")
+__success
+__naked void min_stack_offset(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ /* this call won't be patched */
+ "*(u64 *)(r10 - 8) = r1;"
+ "*(u64 *)(r10 - 16) = r2;"
+ "call %[bpf_get_smp_processor_id];"
+ "r2 = *(u64 *)(r10 - 16);"
+ "r1 = *(u64 *)(r10 - 8);"
+ /* this call would be patched */
+ "*(u64 *)(r10 - 24) = r1;"
+ "*(u64 *)(r10 - 32) = r2;"
+ "call %[bpf_get_smp_processor_id];"
+ "r2 = *(u64 *)(r10 - 32);"
+ "r1 = *(u64 *)(r10 - 24);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u64 *)(r10 -8)")
+__success
+__naked void bad_fixed_read(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r1 = *(u64 *)(r1 - 0);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u64 *)(r10 -8)")
+__success
+__naked void bad_fixed_write(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r1 - 0) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("6: *(u64 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("8: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("10: r1 = *(u64 *)(r10 -16)")
+__success
+__naked void bad_varying_read(void)
+{
+ asm volatile (
+ "r6 = *(u64 *)(r1 + 0);" /* random scalar value */
+ "r6 &= 0x7;" /* r6 range [0..7] */
+ "r6 += 0x2;" /* r6 range [2..9] */
+ "r7 = 0;"
+ "r7 -= r6;" /* r7 range [-9..-2] */
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "r1 = r10;"
+ "r1 += r7;"
+ "r1 = *(u8 *)(r1 - 0);" /* touches slot [-16..-9] where spills are stored */
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("6: *(u64 *)(r10 -16) = r1")
+__xlated("...")
+__xlated("8: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("10: r1 = *(u64 *)(r10 -16)")
+__success
+__naked void bad_varying_write(void)
+{
+ asm volatile (
+ "r6 = *(u64 *)(r1 + 0);" /* random scalar value */
+ "r6 &= 0x7;" /* r6 range [0..7] */
+ "r6 += 0x2;" /* r6 range [2..9] */
+ "r7 = 0;"
+ "r7 -= r6;" /* r7 range [-9..-2] */
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "r1 = r10;"
+ "r1 += r7;"
+ "*(u8 *)(r1 - 0) = r7;" /* touches slot [-16..-9] where spills are stored */
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u64 *)(r10 -8)")
+__success
+__naked void bad_write_in_subprog(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call bad_write_in_subprog_aux;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+__used
+__naked static void bad_write_in_subprog_aux(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "*(u64 *)(r1 - 0) = r0;" /* invalidates bpf_fastcall contract for caller: */
+ "exit;" /* caller stack at -8 used outside of the pattern */
+ ::: __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__xlated("1: *(u64 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u64 *)(r10 -8)")
+__success
+__naked void bad_helper_write(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ /* bpf_fastcall pattern with stack offset -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 1;"
+ "r3 = 42;"
+ /* read dst is fp[-8], thus bpf_fastcall rewrite not applied */
+ "call %[bpf_probe_read_kernel];"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+/* main, not patched */
+__xlated("1: *(u64 *)(r10 -8) = r1")
+__xlated("...")
+__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("5: r1 = *(u64 *)(r10 -8)")
+__xlated("...")
+__xlated("9: call pc+1")
+__xlated("...")
+__xlated("10: exit")
+/* subprogram, patched */
+__xlated("11: r1 = 1")
+__xlated("...")
+__xlated("13: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("15: exit")
+__success
+__naked void invalidate_one_subprog(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r1 = *(u64 *)(r1 - 0);"
+ "call invalidate_one_subprog_aux;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+__used
+__naked static void invalidate_one_subprog_aux(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+/* main */
+__xlated("0: r1 = 1")
+__xlated("...")
+__xlated("2: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("4: call pc+1")
+__xlated("5: exit")
+/* subprogram */
+__xlated("6: r1 = 1")
+__xlated("...")
+__xlated("8: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+__xlated("10: *(u64 *)(r10 -16) = r1")
+__xlated("11: exit")
+__success
+__naked void subprogs_use_independent_offsets(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "call subprogs_use_independent_offsets_aux;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+__used
+__naked static void subprogs_use_independent_offsets_aux(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 24) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 24);"
+ "*(u64 *)(r10 - 16) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 8")
+__xlated("2: r0 = &(void __percpu *)(r0)")
+__success
+__naked void helper_call_does_not_prevent_bpf_fastcall(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+/* may_goto expansion starts */
+__xlated("6: r11 = *(u64 *)(r10 -24)")
+__xlated("7: if r11 == 0x0 goto pc+6")
+__xlated("8: r11 -= 1")
+__xlated("9: if r11 != 0x0 goto pc+2")
+__xlated("10: r11 = -24")
+__xlated("11: call unknown")
+__xlated("12: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("13: *(u64 *)(r10 -8) = r1")
+__xlated("14: exit")
+__success
+__naked void may_goto_interaction_x86_64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ ".8byte %[may_goto];"
+ /* just touch some stack at -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_arm64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("3: call bpf_get_smp_processor_id")
+/* may_goto expansion starts */
+__xlated("4: r11 = *(u64 *)(r10 -24)")
+__xlated("5: if r11 == 0x0 goto pc+6")
+__xlated("6: r11 -= 1")
+__xlated("7: if r11 != 0x0 goto pc+2")
+__xlated("8: r11 = -24")
+__xlated("9: call unknown")
+__xlated("10: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("11: *(u64 *)(r10 -8) = r1")
+__xlated("12: exit")
+__success
+__naked void may_goto_interaction_arm64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ ".8byte %[may_goto];"
+ /* just touch some stack at -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+__used
+__naked static void dummy_loop_callback(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 32+0")
+__xlated("2: r1 = 1")
+__xlated("3: r0 =")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("5: r0 = *(u32 *)(r0 +0)")
+/* bpf_loop params setup */
+__xlated("6: r2 =")
+__xlated("7: r3 = 0")
+__xlated("8: r4 = 0")
+__xlated("...")
+/* ... part of the inlined bpf_loop */
+__xlated("12: *(u64 *)(r10 -32) = r6")
+__xlated("13: *(u64 *)(r10 -24) = r7")
+__xlated("14: *(u64 *)(r10 -16) = r8")
+__xlated("...")
+__xlated("21: call pc+8") /* dummy_loop_callback */
+/* ... last insns of the bpf_loop_interaction1 */
+__xlated("...")
+__xlated("28: r0 = 0")
+__xlated("29: exit")
+/* dummy_loop_callback */
+__xlated("30: r0 = 0")
+__xlated("31: exit")
+__success
+__naked int bpf_loop_interaction1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ /* bpf_fastcall stack region at -16, but could be removed */
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "r2 = %[dummy_loop_callback];"
+ "r3 = 0;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(dummy_loop_callback),
+ __imm(bpf_get_smp_processor_id),
+ __imm(bpf_loop)
+ : __clobber_common
+ );
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4) __msg("stack depth 40+0")
+/* call bpf_get_smp_processor_id */
+__xlated("2: r1 = 42")
+__xlated("3: r0 =")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("5: r0 = *(u32 *)(r0 +0)")
+/* call bpf_get_prandom_u32 */
+__xlated("6: *(u64 *)(r10 -16) = r1")
+__xlated("7: call")
+__xlated("8: r1 = *(u64 *)(r10 -16)")
+__xlated("...")
+/* ... part of the inlined bpf_loop */
+__xlated("15: *(u64 *)(r10 -40) = r6")
+__xlated("16: *(u64 *)(r10 -32) = r7")
+__xlated("17: *(u64 *)(r10 -24) = r8")
+__success
+__naked int bpf_loop_interaction2(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ /* bpf_fastcall stack region at -16, cannot be removed */
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = *(u64 *)(r10 - 16);"
+ "r2 = %[dummy_loop_callback];"
+ "r3 = 0;"
+ "r4 = 0;"
+ "call %[bpf_loop];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(dummy_loop_callback),
+ __imm(bpf_get_smp_processor_id),
+ __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop)
+ : __clobber_common
+ );
+}
+
+SEC("raw_tp")
+__arch_x86_64
+__log_level(4)
+__msg("stack depth 512+0")
+/* just to print xlated version when debugging */
+__xlated("r0 = &(void __percpu *)(r0)")
+__success
+/* cumulative_stack_depth() stack usage is MAX_BPF_STACK,
+ * called subprogram uses an additional slot for bpf_fastcall spill/fill,
+ * since bpf_fastcall spill/fill could be removed the program still fits
+ * in MAX_BPF_STACK and should be accepted.
+ */
+__naked int cumulative_stack_depth(void)
+{
+ asm volatile(
+ "r1 = 42;"
+ "*(u64 *)(r10 - %[max_bpf_stack]) = r1;"
+ "call cumulative_stack_depth_subprog;"
+ "exit;"
+ :
+ : __imm_const(max_bpf_stack, MAX_BPF_STACK)
+ : __clobber_all
+ );
+}
+
+__used
+__naked static void cumulative_stack_depth_subprog(void)
+{
+ asm volatile (
+ "*(u64 *)(r10 - 8) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :: __imm(bpf_get_smp_processor_id) : __clobber_all);
+}
+
+SEC("cgroup/getsockname_unix")
+__xlated("0: r2 = 1")
+/* bpf_cast_to_kern_ctx is replaced by a single assignment */
+__xlated("1: r0 = r1")
+__xlated("2: r0 = r2")
+__xlated("3: exit")
+__success
+__naked void kfunc_bpf_cast_to_kern_ctx(void)
+{
+ asm volatile (
+ "r2 = 1;"
+ "*(u64 *)(r10 - 32) = r2;"
+ "call %[bpf_cast_to_kern_ctx];"
+ "r2 = *(u64 *)(r10 - 32);"
+ "r0 = r2;"
+ "exit;"
+ :
+ : __imm(bpf_cast_to_kern_ctx)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__xlated("3: r3 = 1")
+/* bpf_rdonly_cast is replaced by a single assignment */
+__xlated("4: r0 = r1")
+__xlated("5: r0 = r3")
+void kfunc_bpf_rdonly_cast(void)
+{
+ asm volatile (
+ "r2 = %[btf_id];"
+ "r3 = 1;"
+ "*(u64 *)(r10 - 32) = r3;"
+ "call %[bpf_rdonly_cast];"
+ "r3 = *(u64 *)(r10 - 32);"
+ "r0 = r3;"
+ :
+ : __imm(bpf_rdonly_cast),
+ [btf_id]"r"(bpf_core_type_id_kernel(union bpf_attr))
+ : __clobber_common);
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void kfunc_root(void)
+{
+ bpf_cast_to_kern_ctx(0);
+ bpf_rdonly_cast(0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c b/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c
new file mode 100644
index 000000000000..325a2bab4a71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bpf_get_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("tracepoint")
+__description("bpf_get_stack return R0 within range")
+__success
+__naked void stack_return_r0_within_range(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ r9 = %[__imm_0]; \
+ r1 = r6; \
+ r2 = r7; \
+ r3 = %[__imm_0]; \
+ r4 = 256; \
+ call %[bpf_get_stack]; \
+ r1 = 0; \
+ r8 = r0; \
+ r8 <<= 32; \
+ r8 s>>= 32; \
+ if r1 s> r8 goto l0_%=; \
+ r9 -= r8; \
+ r2 = r7; \
+ r2 += r8; \
+ r1 = r9; \
+ r1 <<= 32; \
+ r1 s>>= 32; \
+ r3 = r2; \
+ r3 += r1; \
+ r1 = r7; \
+ r5 = %[__imm_0]; \
+ r1 += r5; \
+ if r3 >= r1 goto l0_%=; \
+ r1 = r6; \
+ r3 = r9; \
+ r4 = 0; \
+ call %[bpf_get_stack]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_stack),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) / 2)
+ : __clobber_all);
+}
+
+SEC("iter/task")
+__description("bpf_get_task_stack return R0 range is refined")
+__success
+__naked void return_r0_range_is_refined(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 0); \
+ r6 = *(u64*)(r6 + 0); /* ctx->meta->seq */\
+ r7 = *(u64*)(r1 + 8); /* ctx->task */\
+ r1 = %[map_array_48b] ll; /* fixup_map_array_48b */\
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: if r7 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r1 = r7; \
+ r2 = r0; \
+ r9 = r0; /* keep buf for seq_write */\
+ r3 = 48; \
+ r4 = 0; \
+ call %[bpf_get_task_stack]; \
+ if r0 s> 0 goto l2_%=; \
+ r0 = 0; \
+ exit; \
+l2_%=: r1 = r6; \
+ r2 = r9; \
+ r3 = r0; \
+ call %[bpf_seq_write]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_task_stack),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_seq_write),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+ __builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+ __bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "call %[__bpf_trap];"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 0 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 1 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
new file mode 100644
index 000000000000..e61755656e8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("BSWAP, 16")
+__success __success_unpriv __retval(0x23ff)
+__naked void bswap_16(void)
+{
+ asm volatile (" \
+ r0 = 0xff23; \
+ r0 = bswap16 r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("BSWAP, 32")
+__success __success_unpriv __retval(0x23ff0000)
+__naked void bswap_32(void)
+{
+ asm volatile (" \
+ r0 = 0xff23; \
+ r0 = bswap32 r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("BSWAP, 64")
+__success __success_unpriv __retval(0x34ff12ff)
+__naked void bswap_64(void)
+{
+ asm volatile (" \
+ r0 = %[u64_val] ll; \
+ r0 = bswap64 r0; \
+ exit; \
+" :
+ : [u64_val]"i"(0xff12ff34ff56ff78ull)
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
new file mode 100644
index 000000000000..03942cec07e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/btf_ctx_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_modify_return_test")
+__description("btf_ctx_access accept")
+__success __retval(0)
+__naked void btf_ctx_access_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer accept")
+__success __retval(0)
+__naked void ctx_access_u32_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u32")
+__failure __msg("size 4 must be 8")
+__naked void ctx_access_u32_pointer_reject_32(void)
+{
+ asm volatile (" \
+ r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u16")
+__failure __msg("size 2 must be 8")
+__naked void ctx_access_u32_pointer_reject_16(void)
+{
+ asm volatile (" \
+ r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u8")
+__failure __msg("size 1 must be 8")
+__naked void ctx_access_u32_pointer_reject_8(void)
+{
+ asm volatile (" \
+ r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
new file mode 100644
index 000000000000..36e033a2e02c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+struct whatever {};
+
+SEC("kprobe")
+__success __log_level(2)
+/* context type is wrong, making it impossible to freplace this program */
+int btf_unreliable_kprobe(struct whatever *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c
new file mode 100644
index 000000000000..c1f55e1d80a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cfg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("unreachable")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable(void)
+{
+ asm volatile (" \
+ exit; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unreachable2")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable2(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ goto l0_%=; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ exit; \
+l0_%=: \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump2")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump2(void)
+{
+ asm volatile (" \
+ goto -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop (back-edge)")
+__failure __msg("unreachable insn 1")
+__msg_unpriv("back-edge")
+__naked void loop_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop2 (back-edge)")
+__failure __msg("unreachable insn 4")
+__msg_unpriv("back-edge")
+__naked void loop2_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: r1 = r0; \
+ r2 = r0; \
+ r3 = r0; \
+ goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional loop")
+__failure __msg("infinite loop detected")
+__msg_unpriv("back-edge")
+__naked void conditional_loop(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+l0_%=: r2 = r0; \
+ r3 = r0; \
+ if r1 == 0 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+ asm volatile (" \
+ r9 = 2 ll; \
+ r3 = 0x20 ll; \
+ r4 = 0x35 ll; \
+ r8 = r4; \
+ goto l1_%=; \
+l0_%=: r9 -= r3; \
+ r9 -= r4; \
+ r9 -= r8; \
+l1_%=: r8 += r4; \
+ if r8 < 0x64 goto l0_%=; \
+ r0 = r9; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 = 1; \
+ goto l0_%=; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+ asm volatile (" \
+ r0 = r1; \
+ goto -1; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 += 1; \
+ call never_ending_subprog; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
new file mode 100644
index 000000000000..6e0f349f8f15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test1")
+__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]")
+__naked void with_invalid_return_code_test1(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test2")
+__success
+__naked void with_invalid_return_code_test2(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test3")
+__failure __msg("smin=0 smax=3 should have been in [0, 1]")
+__naked void with_invalid_return_code_test3(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test4")
+__success
+__naked void with_invalid_return_code_test4(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test5")
+__failure __msg("smin=2 smax=2 should have been in [0, 1]")
+__naked void with_invalid_return_code_test5(void)
+{
+ asm volatile (" \
+ r0 = 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test6")
+__failure __msg("R0 is not a known value (ctx)")
+__naked void with_invalid_return_code_test6(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test7")
+__failure __msg("R0 has unknown scalar value should have been in [0, 1]")
+__naked void with_invalid_return_code_test7(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r2 = *(u32*)(r1 + 4); \
+ r0 *= r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
new file mode 100644
index 000000000000..5ee3d349d6d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_skb.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/skb")
+__description("direct packet read test#1 for CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=76 size=4")
+__retval(0)
+__naked void test_1_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_pkt_type]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_queue_mapping]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_protocol]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_vlan_present]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_protocol, offsetof(struct __sk_buff, protocol)),
+ __imm_const(__sk_buff_queue_mapping, offsetof(struct __sk_buff, queue_mapping)),
+ __imm_const(__sk_buff_vlan_present, offsetof(struct __sk_buff, vlan_present))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#2 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_2_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_vlan_tci]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_vlan_proto]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_priority]); \
+ *(u32*)(r1 + %[__sk_buff_priority]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_ingress_ifindex]);\
+ r8 = *(u32*)(r1 + %[__sk_buff_tc_index]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_hash]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_hash, offsetof(struct __sk_buff, hash)),
+ __imm_const(__sk_buff_ingress_ifindex, offsetof(struct __sk_buff, ingress_ifindex)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)),
+ __imm_const(__sk_buff_tc_index, offsetof(struct __sk_buff, tc_index)),
+ __imm_const(__sk_buff_vlan_proto, offsetof(struct __sk_buff, vlan_proto)),
+ __imm_const(__sk_buff_vlan_tci, offsetof(struct __sk_buff, vlan_tci))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#3 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_3_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_cb_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_cb_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_cb_2]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_cb_3]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_cb_4]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_cb_0]) = r4; \
+ *(u32*)(r1 + %[__sk_buff_cb_1]) = r5; \
+ *(u32*)(r1 + %[__sk_buff_cb_2]) = r6; \
+ *(u32*)(r1 + %[__sk_buff_cb_3]) = r7; \
+ *(u32*)(r1 + %[__sk_buff_cb_4]) = r8; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])),
+ __imm_const(__sk_buff_cb_1, offsetof(struct __sk_buff, cb[1])),
+ __imm_const(__sk_buff_cb_2, offsetof(struct __sk_buff, cb[2])),
+ __imm_const(__sk_buff_cb_3, offsetof(struct __sk_buff, cb[3])),
+ __imm_const(__sk_buff_cb_4, offsetof(struct __sk_buff, cb[4])),
+ __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#4 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_4_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_family]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_remote_ip4]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_local_ip4]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_1]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_2]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_3]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_0]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_2]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_3]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_remote_port]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_local_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_family, offsetof(struct __sk_buff, family)),
+ __imm_const(__sk_buff_local_ip4, offsetof(struct __sk_buff, local_ip4)),
+ __imm_const(__sk_buff_local_ip6_0, offsetof(struct __sk_buff, local_ip6[0])),
+ __imm_const(__sk_buff_local_ip6_1, offsetof(struct __sk_buff, local_ip6[1])),
+ __imm_const(__sk_buff_local_ip6_2, offsetof(struct __sk_buff, local_ip6[2])),
+ __imm_const(__sk_buff_local_ip6_3, offsetof(struct __sk_buff, local_ip6[3])),
+ __imm_const(__sk_buff_local_port, offsetof(struct __sk_buff, local_port)),
+ __imm_const(__sk_buff_remote_ip4, offsetof(struct __sk_buff, remote_ip4)),
+ __imm_const(__sk_buff_remote_ip6_0, offsetof(struct __sk_buff, remote_ip6[0])),
+ __imm_const(__sk_buff_remote_ip6_1, offsetof(struct __sk_buff, remote_ip6[1])),
+ __imm_const(__sk_buff_remote_ip6_2, offsetof(struct __sk_buff, remote_ip6[2])),
+ __imm_const(__sk_buff_remote_ip6_3, offsetof(struct __sk_buff, remote_ip6[3])),
+ __imm_const(__sk_buff_remote_port, offsetof(struct __sk_buff, remote_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of tc_classid for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of data_meta for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void data_meta_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_data_meta]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data_meta, offsetof(struct __sk_buff, data_meta))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of flow_keys for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void flow_keys_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_flow_keys]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_flow_keys, offsetof(struct __sk_buff, flow_keys))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid write access to napi_id for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void napi_id_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_napi_id]) = r9; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("write tstamp from CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=152 size=8")
+__retval(0)
+__naked void write_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_tstamp]) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("read tstamp from CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void read_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
new file mode 100644
index 000000000000..9a13f5c11ac7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_storage.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[TEST_DATA_LEN]);
+} cgroup_storage SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[64]);
+} percpu_cgroup_storage SEC(".maps");
+
+SEC("cgroup/skb")
+__description("valid cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void valid_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void invalid_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("valid per-cpu cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void per_cpu_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void cpu_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void cpu_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_const.c b/tools/testing/selftests/bpf/progs/verifier_const.c
new file mode 100644
index 000000000000..e118dbb768bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_const.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Isovalent */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+const volatile long foo = 42;
+long bar;
+long bart = 96;
+
+SEC("tc/ingress")
+__description("rodata/strtol: write rejected")
+__failure __msg("write into map forbidden")
+int tcx1(struct __sk_buff *skb)
+{
+ char buff[] = { '8', '4', '\0' };
+ bpf_strtol(buff, sizeof(buff), 0, (long *)&foo);
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+__description("bss/strtol: write accepted")
+__success
+int tcx2(struct __sk_buff *skb)
+{
+ char buff[] = { '8', '4', '\0' };
+ bpf_strtol(buff, sizeof(buff), 0, &bar);
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+__description("data/strtol: write accepted")
+__success
+int tcx3(struct __sk_buff *skb)
+{
+ char buff[] = { '8', '4', '\0' };
+ bpf_strtol(buff, sizeof(buff), 0, &bart);
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+__description("rodata/mtu: write rejected")
+__failure __msg("write into map forbidden")
+int tcx4(struct __sk_buff *skb)
+{
+ bpf_check_mtu(skb, skb->ifindex, (__u32 *)&foo, 0, 0);
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+__description("bss/mtu: write accepted")
+__success
+int tcx5(struct __sk_buff *skb)
+{
+ bpf_check_mtu(skb, skb->ifindex, (__u32 *)&bar, 0, 0);
+ return TCX_PASS;
+}
+
+SEC("tc/ingress")
+__description("data/mtu: write accepted")
+__success
+int tcx6(struct __sk_buff *skb)
+{
+ bpf_check_mtu(skb, skb->ifindex, (__u32 *)&bart, 0, 0);
+ return TCX_PASS;
+}
+
+static inline void write_fixed(volatile void *p, __u32 val)
+{
+ *(volatile __u32 *)p = val;
+}
+
+static inline void write_dyn(void *p, void *val, int len)
+{
+ bpf_copy_from_user(p, len, val);
+}
+
+SEC("tc/ingress")
+__description("rodata/mark: write with unknown reg rejected")
+__failure __msg("write into map forbidden")
+int tcx7(struct __sk_buff *skb)
+{
+ write_fixed((void *)&foo, skb->mark);
+ return TCX_PASS;
+}
+
+SEC("lsm.s/bprm_committed_creds")
+__description("rodata/mark: write with unknown reg rejected")
+__failure __msg("write into map forbidden")
+int BPF_PROG(bprm, struct linux_binprm *bprm)
+{
+ write_dyn((void *)&foo, &bart, bpf_get_prandom_u32() & 3);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c
new file mode 100644
index 000000000000..68c568c3c3a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/const_or.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tracepoint")
+__description("constant register |= constant should keep constant type")
+__success
+__naked void constant_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 13; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant should not bypass stack boundary checks")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 24; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should keep constant type")
+__success
+__naked void register_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 13; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should not bypass stack boundary checks")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 24; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
new file mode 100644
index 000000000000..5ebf7d9bcc55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ctx.c */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc")
+__description("context stores via BPF_ATOMIC")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void context_stores_via_bpf_atomic(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ lock *(u32 *)(r1 + %[__sk_buff_mark]) += w0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("arithmetic ops make PTR_TO_CTX unusable")
+__failure __msg("dereference of modified ctx ptr")
+__naked void make_ptr_to_ctx_unusable(void)
+{
+ asm volatile (" \
+ r1 += %[__imm_0]; \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0,
+ offsetof(struct __sk_buff, data) - offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass unmodified ctx pointer to helper")
+__success __retval(0)
+__naked void unmodified_ctx_pointer_to_helper(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass modified ctx pointer to helper, 1")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void ctx_pointer_to_helper_1(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("pass modified ctx pointer to helper, 2")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__failure_unpriv __msg_unpriv("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void ctx_pointer_to_helper_2(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass modified ctx pointer to helper, 3")
+__failure __msg("variable ctx access var_off=(0x0; 0x4)")
+__naked void ctx_pointer_to_helper_3(void)
+{
+ asm volatile (" \
+ r3 = *(u32*)(r1 + 0); \
+ r3 &= 4; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 1: ctx")
+__success
+__naked void or_null_check_1_ctx(void)
+{
+ asm volatile (" \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 2: null")
+__success
+__naked void or_null_check_2_null(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 3: 1")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void or_null_check_3_1(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 4: ctx - const")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void null_check_4_ctx_const(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/connect4")
+__description("pass ctx or null check, 5: null (connect)")
+__success
+__naked void null_check_5_null_connect(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 6: null (bind)")
+__success
+__naked void null_check_6_null_bind(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 7: ctx (bind)")
+__success
+__naked void null_check_7_ctx_bind(void)
+{
+ asm volatile (" \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 8: null (bind)")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void null_check_8_null_bind(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+#define narrow_load(type, ctx, field) \
+ SEC(type) \
+ __description("narrow load on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void invalid_narrow_load##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u32 *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(off, offsetof(struct ctx, field) + 4) \
+ : __clobber_all); \
+ }
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
+#define unaligned_access(type, ctx, field) \
+ SEC(type) \
+ __description("unaligned access on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void unaligned_ctx_access_##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sizeof_field(struct ctx, field) * 8), \
+ __imm_const(off, offsetof(struct ctx, field) + 1) \
+ : __clobber_all); \
+ }
+
+unaligned_access("flow_dissector", __sk_buff, data);
+unaligned_access("netfilter", bpf_nf_ctx, skb);
+
+#define padding_access(type, ctx, prev_field, sz) \
+ SEC(type) \
+ __description("access on " #ctx " padding after " #prev_field) \
+ __naked void padding_ctx_access_##ctx(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sz * 8), \
+ __imm_const(off, offsetofend(struct ctx, prev_field)) \
+ : __clobber_all); \
+ }
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4);
+
+__success
+padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("tc", __sk_buff, tstamp_type, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
new file mode 100644
index 000000000000..65edc89799f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ctx_sk_msg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("sk_msg")
+__description("valid access family in SK_MSG")
+__success
+__naked void access_family_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_family]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_family, offsetof(struct sk_msg_md, family))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_ip4 in SK_MSG")
+__success
+__naked void remote_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip4, offsetof(struct sk_msg_md, remote_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_ip4 in SK_MSG")
+__success
+__naked void local_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip4, offsetof(struct sk_msg_md, local_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_port in SK_MSG")
+__success
+__naked void remote_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_port, offsetof(struct sk_msg_md, remote_port))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_port in SK_MSG")
+__success
+__naked void local_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_port, offsetof(struct sk_msg_md, local_port))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access remote_ip6 in SK_MSG")
+__success
+__naked void remote_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip6_0, offsetof(struct sk_msg_md, remote_ip6[0])),
+ __imm_const(sk_msg_md_remote_ip6_1, offsetof(struct sk_msg_md, remote_ip6[1])),
+ __imm_const(sk_msg_md_remote_ip6_2, offsetof(struct sk_msg_md, remote_ip6[2])),
+ __imm_const(sk_msg_md_remote_ip6_3, offsetof(struct sk_msg_md, remote_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access local_ip6 in SK_MSG")
+__success
+__naked void local_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip6_0, offsetof(struct sk_msg_md, local_ip6[0])),
+ __imm_const(sk_msg_md_local_ip6_1, offsetof(struct sk_msg_md, local_ip6[1])),
+ __imm_const(sk_msg_md_local_ip6_2, offsetof(struct sk_msg_md, local_ip6[2])),
+ __imm_const(sk_msg_md_local_ip6_3, offsetof(struct sk_msg_md, local_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access size in SK_MSG")
+__success
+__naked void access_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid 64B read of size in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void of_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read past end of SK_MSG")
+__failure __msg("invalid bpf_context access")
+__naked void past_end_of_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, size) + 4)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read offset in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_offset_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, family) + 1)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet read for SK_MSG")
+__success
+__naked void packet_read_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet write for SK_MSG")
+__success
+__naked void packet_write_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("overlapping checks for direct packet access SK_MSG")
+__success
+__naked void direct_packet_access_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u16*)(r2 + 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c
new file mode 100644
index 000000000000..87e51a215558
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/d_path.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/dentry_open")
+__description("d_path accept")
+__success __retval(0)
+__naked void d_path_accept(void)
+{
+ asm volatile (" \
+ r1 = *(u64 *)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ r3 = 8 ll; \
+ call %[bpf_d_path]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_d_path)
+ : __clobber_all);
+}
+
+SEC("fentry/d_path")
+__description("d_path reject")
+__failure __msg("helper call is not allowed in probe")
+__naked void d_path_reject(void)
+{
+ asm volatile (" \
+ r1 = *(u64 *)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ r3 = 8 ll; \
+ call %[bpf_d_path]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_d_path)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
new file mode 100644
index 000000000000..911caa8fd1b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
+
+#include <linux/if_ether.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc")
+__description("pkt_end - pkt_start is allowed")
+__success __retval(TEST_DATA_LEN)
+__naked void end_pkt_start_is_allowed(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r0 -= r2; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test1")
+__success __retval(0)
+__naked void direct_packet_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test2")
+__success __retval(0)
+__naked void direct_packet_access_test2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r4 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r5 = r3; \
+ r5 += 14; \
+ if r5 > r4 goto l0_%=; \
+ r0 = *(u8*)(r3 + 7); \
+ r4 = *(u8*)(r3 + 12); \
+ r4 *= 14; \
+ r3 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 += r4; \
+ r2 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r2 <<= 49; \
+ r2 >>= 49; \
+ r3 += r2; \
+ r2 = r3; \
+ r2 += 8; \
+ r1 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ if r2 > r1 goto l1_%=; \
+ r1 = *(u8*)(r3 + 4); \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("direct packet access: test3")
+__failure __msg("invalid bpf_context access off=76")
+__failure_unpriv
+__naked void direct_packet_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test4 (write)")
+__success __retval(0)
+__naked void direct_packet_access_test4_write(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test5 (pkt_end >= reg, good access)")
+__success __retval(0)
+__naked void pkt_end_reg_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test6 (pkt_end >= reg, bad access)")
+__failure __msg("invalid access to packet")
+__naked void pkt_end_reg_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test7 (pkt_end >= reg, both accesses)")
+__failure __msg("invalid access to packet")
+__naked void pkt_end_reg_both_accesses(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test8 (double test, variant 1)")
+__success __retval(0)
+__naked void test8_double_test_variant_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ if r0 > r3 goto l1_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test9 (double test, variant 2)")
+__success __retval(0)
+__naked void test9_double_test_variant_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: if r0 > r3 goto l1_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test10 (write invalid)")
+__failure __msg("invalid access to packet")
+__naked void packet_access_test10_write_invalid(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: *(u8*)(r2 + 0) = r2; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test11 (shift, good access)")
+__success __retval(1)
+__naked void access_test11_shift_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = 144; \
+ r5 = r3; \
+ r5 += 23; \
+ r5 >>= 3; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test12 (and, good access)")
+__success __retval(1)
+__naked void access_test12_and_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = 144; \
+ r5 = r3; \
+ r5 += 23; \
+ r5 &= 15; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test13 (branches, good access)")
+__success __retval(1)
+__naked void access_test13_branches_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r4 = 1; \
+ if r3 > r4 goto l1_%=; \
+ r3 = 14; \
+ goto l2_%=; \
+l1_%=: r3 = 24; \
+l2_%=: r5 = r3; \
+ r5 += 23; \
+ r5 &= 15; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)")
+__success __retval(1)
+__naked void _0_const_imm_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r5 = 12; \
+ r5 >>= 4; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = *(u8*)(r6 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test15 (spill with xadd)")
+__failure __msg("R2 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void access_test15_spill_with_xadd(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r5 = 4096; \
+ r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r2; \
+ lock *(u64 *)(r4 + 0) += r5; \
+ r2 = *(u64*)(r4 + 0); \
+ *(u32*)(r2 + 0) = r5; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test16 (arith on data_end)")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void test16_arith_on_data_end(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ r3 += 16; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test17 (pruning, alignment)")
+__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void packet_access_test17_pruning_alignment(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r0 = r2; \
+ r0 += 14; \
+ if r7 > 1 goto l0_%=; \
+l2_%=: if r0 > r3 goto l1_%=; \
+ *(u32*)(r0 - 4) = r0; \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: r0 += 1; \
+ goto l2_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test18 (imm += pkt_ptr, 1)")
+__success __retval(0)
+__naked void test18_imm_pkt_ptr_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 8; \
+ r0 += r2; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test19 (imm += pkt_ptr, 2)")
+__success __retval(0)
+__naked void test19_imm_pkt_ptr_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r4 = 4; \
+ r4 += r2; \
+ *(u8*)(r4 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test20 (x += pkt_ptr, 1)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test20_x_pkt_ptr_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0x7fff; \
+ r4 = r0; \
+ r4 += r2; \
+ r5 = r4; \
+ r4 += %[__imm_0]; \
+ if r4 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test21 (x += pkt_ptr, 2)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test21_x_pkt_ptr_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r4 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r4 &= 0x7fff; \
+ r4 += r2; \
+ r5 = r4; \
+ r4 += %[__imm_0]; \
+ if r4 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test22 (x += pkt_ptr, 3)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test22_x_pkt_ptr_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ *(u64*)(r10 - 8) = r2; \
+ *(u64*)(r10 - 16) = r3; \
+ r3 = *(u64*)(r10 - 16); \
+ if r0 > r3 goto l0_%=; \
+ r2 = *(u64*)(r10 - 8); \
+ r4 = 0xffffffff; \
+ lock *(u64 *)(r10 - 8) += r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r4 >>= 49; \
+ r4 += r2; \
+ r0 = r4; \
+ r0 += 2; \
+ if r0 > r3 goto l0_%=; \
+ r2 = 1; \
+ *(u16*)(r4 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test23 (x += pkt_ptr, 4)")
+__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void test23_x_pkt_ptr_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0xffff; \
+ r4 = r0; \
+ r0 = 31; \
+ r0 += r4; \
+ r0 += r2; \
+ r5 = r0; \
+ r0 += %[__imm_0]; \
+ if r0 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test24 (x += pkt_ptr, 5)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test24_x_pkt_ptr_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0xff; \
+ r4 = r0; \
+ r0 = 64; \
+ r0 += r4; \
+ r0 += r2; \
+ r5 = r0; \
+ r0 += %[__imm_0]; \
+ if r0 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test25 (marking on <, good access)")
+__success __retval(0)
+__naked void test25_marking_on_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 < r3 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test26 (marking on <, bad access)")
+__failure __msg("invalid access to packet")
+__naked void test26_marking_on_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 < r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test27 (marking on <=, good access)")
+__success __retval(1)
+__naked void test27_marking_on_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 <= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test28 (marking on <=, bad access)")
+__failure __msg("invalid access to packet")
+__naked void test28_marking_on_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 <= r0 goto l0_%=; \
+l1_%=: r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test29 (reg > pkt_end in subprog)")
+__success __retval(0)
+__naked void reg_pkt_end_in_subprog(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r2 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = r6; \
+ r3 += 8; \
+ call reg_pkt_end_in_subprog__1; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u8*)(r6 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void reg_pkt_end_in_subprog__1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r3 > r2 goto l0_%=; \
+ r0 = 1; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test30 (check_id() in regsafe(), bad access)")
+__failure __msg("invalid access to packet, off=0 size=1, R2")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void id_in_regsafe_bad_access(void)
+{
+ asm volatile (" \
+ /* r9 = ctx */ \
+ r9 = r1; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* r2 = ctx->data \
+ * r3 = ctx->data \
+ * r4 = ctx->data_end \
+ */ \
+ r2 = *(u32*)(r9 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r9 + %[__sk_buff_data]); \
+ r4 = *(u32*)(r9 + %[__sk_buff_data_end]); \
+ /* if r6 > 100 goto exit \
+ * if r7 > 100 goto exit \
+ */ \
+ if r6 > 100 goto l0_%=; \
+ if r7 > 100 goto l0_%=; \
+ /* r2 += r6 ; this forces assignment of ID to r2\
+ * r2 += 1 ; get some fixed off for r2\
+ * r3 += r7 ; this forces assignment of ID to r3\
+ * r3 += 1 ; get some fixed off for r3\
+ */ \
+ r2 += r6; \
+ r2 += 1; \
+ r3 += r7; \
+ r3 += 1; \
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * r2 = r3 ; optionally share ID between r2 and r3\
+ */ \
+ if r6 != r7 goto l1_%=; \
+ r2 = r3; \
+l1_%=: /* if r3 > ctx->data_end goto exit */ \
+ if r3 > r4 goto l0_%=; \
+ /* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2,\
+ * ; this is not always safe\
+ */ \
+ r5 = *(u8*)(r2 - 1); \
+l0_%=: /* exit(0) */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \
+ SEC(type) \
+ __description("direct packet access: " #name " (non-linear, " type ", " desc ")") \
+ __success __retval(retval) \
+ __linear_size(linear_sz) \
+ __naked void access_non_linear_##name(void) \
+ { \
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[skb_data]); \
+ r3 = *(u32*)(r1 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += %[offset]; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ r0 = 0; \
+ exit; \
+ l0_%=: r0 = 1; \
+ exit; \
+ " : \
+ : __imm_const(skb_data, offsetof(struct __sk_buff, data)), \
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \
+ __imm_const(offset, off) \
+ : __clobber_all); \
+ }
+
+access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
+access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
+access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
+access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
+access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
+access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
+
+SEC("tc")
+__description("direct packet access: test37 (non-linear, linearized)")
+__success __retval(0)
+__linear_size(ETH_HLEN)
+__naked void access_non_linear_linearized(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 22; \
+ call %[bpf_skb_pull_data]; \
+ r2 = *(u32*)(r6 + %[skb_data]); \
+ r3 = *(u32*)(r6 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ exit; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_skb_pull_data),
+ __imm_const(skb_data, offsetof(struct __sk_buff, data)),
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
new file mode 100644
index 000000000000..c538c6893552
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test1")
+__failure __msg("fp pointer and 2147483647")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x7fffffff; \
+ r1 += 0x7fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test2")
+__failure __msg("fp pointer and 1073741823")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x3fffffff; \
+ r1 += 0x3fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test3")
+__failure __msg("fp pointer offset 1073741822")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void with_32_bit_wraparound_test3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x1fffffff; \
+ r1 += 0x1fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div0.c b/tools/testing/selftests/bpf/progs/verifier_div0.c
new file mode 100644
index 000000000000..cca5ea18fc28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div0.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div0.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("DIV32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void div64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void mod64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check ok, cls")
+__success __retval(8)
+__naked void _0_zero_check_ok_cls_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 2; \
+ w2 = 16; \
+ w2 /= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 1, cls")
+__success __retval(0)
+__naked void _0_zero_check_1_cls_1(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 2, cls")
+__success __retval(0)
+__naked void _0_zero_check_2_cls_1(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 by 0, zero check, cls")
+__success __retval(0)
+__naked void by_0_zero_check_cls(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ r0 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check ok, cls")
+__success __retval(2)
+__naked void _0_zero_check_ok_cls_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 3; \
+ w2 = 5; \
+ w2 %%= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 1, cls")
+__success __retval(1)
+__naked void _0_zero_check_1_cls_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 2, cls")
+__success __retval(1)
+__naked void _0_zero_check_2_cls_2(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 1, cls")
+__success __retval(2)
+__naked void _0_zero_check_1_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 2; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 2, cls")
+__success __retval(-1)
+__naked void _0_zero_check_2_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = -1; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
new file mode 100644
index 000000000000..34e0c012ee76
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div_overflow.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+/* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+
+SEC("tc")
+__description("DIV32 overflow, check 1")
+__success __retval(0)
+__naked void div32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 /= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 overflow, check 2")
+__success __retval(0)
+__naked void div32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 /= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 1")
+__success __retval(0)
+__naked void div64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r2 /= r1; \
+ w0 = 0; \
+ if r0 == r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 2")
+__success __retval(0)
+__naked void div64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 /= -1; \
+ w0 = 0; \
+ if r0 == r1 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 1")
+__success __retval(_INT_MIN)
+__naked void mod32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 %%= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 2")
+__success __retval(_INT_MIN)
+__naked void mod32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 %%= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 1")
+__success __retval(1)
+__naked void mod64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= r1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 2")
+__success __retval(1)
+__naked void mod64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= -1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
new file mode 100644
index 000000000000..1204fbc58178
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+
+extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+extern void bpf_task_release(struct task_struct *p) __ksym __weak;
+
+__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ if (!task)
+ return 0;
+ return task->pid + task->tgid;
+}
+
+__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nullable() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int trusted_task_arg_nullable(void *ctx)
+{
+ struct task_struct *t1 = bpf_get_current_task_btf();
+ struct task_struct *t2 = bpf_task_acquire(t1);
+ int res = 0;
+
+ /* known NULL */
+ res += subprog_trusted_task_nullable(NULL);
+
+ /* known non-NULL */
+ res += subprog_trusted_task_nullable(t1);
+ res += subprog_trusted_task_nullable_extra_layer(t1);
+
+ /* unknown if NULL or not */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ if (t2) {
+ /* known non-NULL after explicit NULL check, just in case */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ bpf_task_release(t2);
+ }
+
+ return res;
+}
+
+__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted)
+{
+ return task->pid + task->tgid;
+}
+
+SEC("?kprobe")
+__failure __log_level(2)
+__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail1(void *ctx)
+{
+ return subprog_trusted_task_nonnull(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail2(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+ struct task_struct *nullable;
+ int res;
+
+ nullable = bpf_task_acquire(t);
+
+ /* should fail, PTR_TO_BTF_ID_OR_NULL */
+ res = subprog_trusted_task_nonnull(nullable);
+
+ if (nullable)
+ bpf_task_release(nullable);
+
+ return res;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nonnull() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int trusted_task_arg_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_trusted_task_nonnull(t);
+}
+
+struct task_struct___local {} __attribute__((preserve_access_index));
+
+__weak int subprog_nullable_task_flavor(
+ struct task_struct___local *task __arg_trusted __arg_nullable)
+{
+ char buf[16];
+
+ if (!task)
+ return 0;
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nullable_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int flavor_ptr_nullable(void *ctx)
+{
+ struct task_struct___local *t = (void *)bpf_get_current_task_btf();
+
+ return subprog_nullable_task_flavor(t);
+}
+
+__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted)
+{
+ char buf[16];
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nonnull_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int flavor_ptr_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_nonnull_task_flavor((void *)t);
+}
+
+__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted)
+{
+ bpf_task_release(task); /* should be rejected */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID")
+int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_destroy(task);
+}
+
+__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted)
+{
+ struct task_struct *owned;
+
+ owned = bpf_task_acquire(task);
+ if (!owned)
+ return 0;
+
+ bpf_task_release(owned); /* this one is OK, we acquired it locally */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_acq_rel(task);
+}
+
+__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 untrusted cannot be combined with any other tags")
+int untrusted_bad_tags(void *ctx)
+{
+ return subprog_untrusted_bad_tags(0);
+}
+
+struct local_type_wont_be_accepted {};
+
+__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted)
+{
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches")
+int untrusted_bad_type(void *ctx)
+{
+ return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0));
+}
+
+__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
+__msg("Validating subprog_untrusted() func#1...")
+__msg(": R1=untrusted_ptr_task_struct")
+int trusted_to_untrusted(void *ctx)
+{
+ return subprog_untrusted(bpf_get_current_task_btf());
+}
+
+char mem[16];
+u32 offset;
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted(void *ctx)
+{
+ /* untrusted to untrusted */
+ subprog_untrusted(bpf_core_cast(0, struct task_struct));
+ /* wrong type to untrusted */
+ subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env));
+ /* map value to untrusted */
+ subprog_untrusted((void *)mem);
+ /* scalar to untrusted */
+ subprog_untrusted(0);
+ /* variable offset to untrusted (map) */
+ subprog_untrusted((void *)mem + offset);
+ /* variable offset to untrusted (trusted) */
+ subprog_untrusted((void *)bpf_get_current_task_btf() + offset);
+ return 0;
+}
+
+__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted)
+{
+ return subprog_trusted_task_nullable(task);
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')")
+int untrusted_to_trusted(void *ctx)
+{
+ return subprog_untrusted2(bpf_get_current_task_btf());
+}
+
+__weak int subprog_void_untrusted(void *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_char_untrusted(char *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
+__msg("Validating subprog_void_untrusted() func#1...")
+__msg(": R1=rdonly_untrusted_mem(sz=0)")
+int trusted_to_untrusted_mem(void *ctx)
+{
+ return subprog_void_untrusted(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted_mem(void *ctx)
+{
+ /* untrusted to untrusted mem */
+ subprog_void_untrusted(bpf_core_cast(0, struct task_struct));
+ /* map value to untrusted mem */
+ subprog_void_untrusted(mem);
+ /* scalar to untrusted mem */
+ subprog_void_untrusted(0);
+ /* variable offset to untrusted mem (map) */
+ subprog_void_untrusted((void *)mem + offset);
+ /* variable offset to untrusted mem (trusted) */
+ subprog_void_untrusted(bpf_get_current_task_btf() + offset);
+ /* variable offset to untrusted char/enum (map) */
+ subprog_char_untrusted(mem + offset);
+ subprog_enum_untrusted((void *)mem + offset);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
new file mode 100644
index 000000000000..20904cd2baa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+#include "err.h"
+
+/* The compiler may be able to detect the access to uninitialized
+ memory in the routines performing out of bound memory accesses and
+ emit warnings about it. This is the case of GCC. */
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+int arr[1];
+int unkn_idx;
+const volatile bool call_dead_subprog = false;
+
+__noinline long global_bad(void)
+{
+ return arr[unkn_idx]; /* BOOM */
+}
+
+__noinline long global_good(void)
+{
+ return arr[0];
+}
+
+__noinline long global_calls_bad(void)
+{
+ return global_good() + global_bad() /* does BOOM indirectly */;
+}
+
+__noinline long global_calls_good_only(void)
+{
+ return global_good();
+}
+
+__noinline long global_dead(void)
+{
+ return arr[0] * 2;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* main prog is validated completely first */
+__msg("('global_calls_good_only') is global and assumed valid.")
+/* eventually global_good() is transitively validated as well */
+__msg("Validating global_good() func")
+__msg("('global_good') is safe for any args that match its prototype")
+int chained_global_func_calls_success(void)
+{
+ int sum = 0;
+
+ if (call_dead_subprog)
+ sum += global_dead();
+ return global_calls_good_only() + sum;
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+/* main prog validated successfully first */
+__msg("('global_calls_bad') is global and assumed valid.")
+/* eventually we validate global_bad() and fail */
+__msg("Validating global_bad() func")
+__msg("math between map_value pointer and register") /* BOOM */
+int chained_global_func_calls_bad(void)
+{
+ return global_calls_bad();
+}
+
+/* do out of bounds access forcing verifier to fail verification if this
+ * global func is called
+ */
+__noinline int global_unsupp(const int *mem)
+{
+ if (!mem)
+ return 0;
+ return mem[100]; /* BOOM */
+}
+
+const volatile bool skip_unsupp_global = true;
+
+SEC("?raw_tp")
+__success
+int guarded_unsupp_global_called(void)
+{
+ if (!skip_unsupp_global)
+ return global_unsupp(NULL);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("Func#1 ('global_unsupp') is global and assumed valid.")
+__msg("Validating global_unsupp() func#1...")
+__msg("value is outside of the allowed memory range")
+int unguarded_unsupp_global_called(void)
+{
+ int x = 0;
+
+ return global_unsupp(&x);
+}
+
+long stack[128];
+
+__weak int subprog_nullable_ptr_bad(int *p)
+{
+ return (*p) * 2; /* bad, missing null check */
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("invalid mem access 'mem_or_null'")
+int arg_tag_nullable_ptr_fail(void *ctx)
+{
+ int x = 42;
+
+ return subprog_nullable_ptr_bad(&x);
+}
+
+typedef struct {
+ int x;
+} user_struct_t;
+
+__noinline __weak int subprog_user_anon_mem(user_struct_t *t)
+{
+ return t ? t->x : 0;
+}
+
+SEC("?tracepoint")
+__failure __log_level(2)
+__msg("invalid bpf_context access")
+__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')")
+int anon_user_mem_invalid(void *ctx)
+{
+ /* can't pass PTR_TO_CTX as user memory */
+ return subprog_user_anon_mem(ctx);
+}
+
+SEC("?tracepoint")
+__success __log_level(2)
+__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype")
+int anon_user_mem_valid(void *ctx)
+{
+ user_struct_t t = { .x = 42 };
+
+ return subprog_user_anon_mem(&t);
+}
+
+__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull)
+{
+ return (*p1) * (*p2); /* good, no need for NULL checks */
+}
+
+int x = 47;
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_nonnull_ptr_good(void *ctx)
+{
+ int y = 74;
+
+ return subprog_nonnull_ptr_good(&x, &y);
+}
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+ return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+__weak int raw_tp_canonical(struct bpf_raw_tracepoint_args *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int raw_tp_u64_array(u64 *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+SEC("?raw_tp.w")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp_writable(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+SEC("?tp_btf/sys_enter")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp_btf(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+struct whatever { };
+
+__weak int tp_whatever(struct whatever *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?tp")
+__success __log_level(2)
+int arg_tag_ctx_tp(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + tp_whatever(ctx);
+}
+
+__weak int kprobe_subprog_pt_regs(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int kprobe_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+ return subprog_ctx_tag(ctx) +
+ kprobe_subprog_pt_regs(ctx) +
+ kprobe_subprog_typedef(ctx);
+}
+
+__weak int perf_subprog_regs(
+#if defined(bpf_target_riscv)
+ struct user_regs_struct *ctx __arg_ctx
+#elif defined(bpf_target_s390)
+ /* user_pt_regs typedef is anonymous struct, so only `void *` works */
+ void *ctx __arg_ctx
+#elif defined(bpf_target_loongarch) || defined(bpf_target_arm64) || defined(bpf_target_powerpc)
+ struct user_pt_regs *ctx __arg_ctx
+#else
+ struct pt_regs *ctx __arg_ctx
+#endif
+)
+{
+ return 0;
+}
+
+__weak int perf_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int perf_subprog_canonical(struct bpf_perf_event_data *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?perf_event")
+__success __log_level(2)
+int arg_tag_ctx_perf(void *ctx)
+{
+ return subprog_ctx_tag(ctx) +
+ perf_subprog_regs(ctx) +
+ perf_subprog_typedef(ctx) +
+ perf_subprog_canonical(ctx);
+}
+
+__weak int iter_subprog_void(void *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int iter_subprog_typed(struct bpf_iter__task *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?iter/task")
+__success __log_level(2)
+int arg_tag_ctx_iter_task(struct bpf_iter__task *ctx)
+{
+ return (iter_subprog_void(ctx) + iter_subprog_typed(ctx)) & 1;
+}
+
+__weak int tracing_subprog_void(void *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int tracing_subprog_u64(u64 *ctx __arg_ctx)
+{
+ return 0;
+}
+
+int acc;
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fentry)
+{
+ acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+ return 0;
+}
+
+SEC("?fexit/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fexit)
+{
+ acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+ return 0;
+}
+
+SEC("?fmod_ret/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fmod_ret)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+}
+
+SEC("?lsm/bpf")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_lsm)
+{
+ int ret;
+
+ ret = tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+ set_if_not_errno_or_zero(ret, -1);
+ return ret;
+}
+
+SEC("?struct_ops/test_1")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_struct_ops)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = (void *)arg_tag_ctx_struct_ops,
+};
+
+SEC("?syscall")
+__success __log_level(2)
+int arg_tag_ctx_syscall(void *ctx)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx);
+}
+
+__weak int subprog_dynptr(struct bpf_dynptr *dptr)
+{
+ long *d, t, buf[1] = {};
+
+ d = bpf_dynptr_data(dptr, 0, sizeof(long));
+ if (!d)
+ return 0;
+
+ t = *d + 1;
+
+ d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long));
+ if (!d)
+ return t;
+
+ t = *d + 2;
+
+ return t;
+}
+
+SEC("?xdp")
+__success __log_level(2)
+int arg_tag_dynptr(struct xdp_md *ctx)
+{
+ struct bpf_dynptr dptr;
+
+ bpf_dynptr_from_xdp(ctx, 0, &dptr);
+
+ return subprog_dynptr(&dptr);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
new file mode 100644
index 000000000000..d5d8f24df394
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_GOTOL
+
+SEC("socket")
+__description("gotol, small_imm")
+__success __success_unpriv __retval(1)
+__naked void gotol_small_imm(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 == 0 goto l0_%=; \
+ gotol l1_%=; \
+l2_%=: \
+ gotol l3_%=; \
+l1_%=: \
+ r0 = 1; \
+ gotol l2_%=; \
+l0_%=: \
+ r0 = 2; \
+l3_%=: \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("gotol, large_imm")
+__success __failure_unpriv __retval(40000)
+__naked void gotol_large_imm(void)
+{
+ asm volatile (" \
+ gotol 1f; \
+0: \
+ r0 = 0; \
+ .rept 40000; \
+ r0 += 1; \
+ .endr; \
+ exit; \
+1: gotol 0b; \
+" :
+ :
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
new file mode 100644
index 000000000000..607dad058ca1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Isovalent */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../../../include/linux/filter.h"
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME) \
+ \
+ SEC("socket") \
+ OUTCOME \
+ __naked void jump_table_ ## NAME(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \
+ : __clobber_all); \
+ }
+
+/*
+ * The first program which doesn't use reserved fields
+ * loads and works properly. The rest fail to load.
+ */
+DEFINE_SIMPLE_JUMP_TABLE_PROG(ok, BPF_REG_0, 0, 0, __success __retval(1))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg, BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+
+/*
+ * Gotox is forbidden when there is no jump table loaded
+ * which points to the sub-function where the gotox is used
+ */
+SEC("socket")
+__failure __msg("no jump tables found for subprog starting at 0")
+__naked void jump_table_no_jump_table(void)
+{
+ asm volatile (" \
+ .8byte %[gotox_r0]; \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+/*
+ * Incorrect type of the target register, only PTR_TO_INSN allowed
+ */
+SEC("socket")
+__failure __msg("R1 has type scalar, expected PTR_TO_INSN")
+__naked void jump_table_incorrect_dst_reg_type(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ r1 = 42; \
+ .8byte %[gotox_r1]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME) \
+ \
+ SEC("socket") \
+ OUTCOME \
+ __naked void jump_table_invalid_read_size_ ## READ_SIZE(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(" #READ_SIZE " *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \
+ : __clobber_all); \
+ }
+
+DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u8, __failure __msg("Invalid read of 1 bytes from insn_array"))
+
+SEC("socket")
+__failure __msg("misaligned value access off 0+1+0 size 8")
+__naked void jump_table_misaligned_access(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 1; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=24 size=8")
+__naked void jump_table_invalid_mem_acceess_pos(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 24; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__naked void jump_table_invalid_mem_acceess_neg(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 -= 24; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_add_sub_ok(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 -= 24; \
+ r0 += 32; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("write into map forbidden, value_size=16 off=8 size=8")
+__naked void jump_table_no_writes(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r1 = 0xbeef; \
+ *(u64 *)(r0 + 0) = r1; \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#define DEFINE_JUMP_TABLE_USE_REG(REG) \
+ SEC("socket") \
+ __success __retval(1) \
+ __naked void jump_table_use_reg_r ## REG(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r" #REG " = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_rX]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_rX, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_ ## REG, 0, 0 , 0)) \
+ : __clobber_all); \
+ }
+
+DEFINE_JUMP_TABLE_USE_REG(0)
+DEFINE_JUMP_TABLE_USE_REG(1)
+DEFINE_JUMP_TABLE_USE_REG(2)
+DEFINE_JUMP_TABLE_USE_REG(3)
+DEFINE_JUMP_TABLE_USE_REG(4)
+DEFINE_JUMP_TABLE_USE_REG(5)
+DEFINE_JUMP_TABLE_USE_REG(6)
+DEFINE_JUMP_TABLE_USE_REG(7)
+DEFINE_JUMP_TABLE_USE_REG(8)
+DEFINE_JUMP_TABLE_USE_REG(9)
+
+__used static int test_subprog(void)
+{
+ return 0;
+}
+
+SEC("socket")
+__failure __msg("jump table for insn 4 points outside of the subprog [0,10]")
+__naked void jump_table_outside_subprog(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret_out_%= - socket; \
+ .size jt0_%=, 24; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ call test_subprog; \
+ exit; \
+ ret_out_%=: \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_contains_non_unique_values(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 80; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
new file mode 100644
index 000000000000..f2c54e4d89eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_access_var_len.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, correct bounds")
+__success
+__naked void bitwise_and_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 64; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, bitwise AND, zero included")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_bitwise_and_zero_included(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use bitwise AND to limit r3 range to [0, 64] */\
+ r3 &= 64; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, wrong max")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
+__naked void bitwise_and_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 65; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, correct bounds")
+__success
+__naked void memory_stack_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), correct bounds")
+__success
+__naked void stack_jmp_signed_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, bounds + offset")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_bounds_offset(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r2 += 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, wrong max")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 65 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, no max check")
+__failure
+/* because max wasn't checked, signed min is negative */
+__msg("R2 min value is negative, either use unsigned or 'var &= const'")
+__naked void stack_jmp_no_max_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, JMP, no min check")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_jmp_no_min_check(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use JMP to limit r3 range to [0, 64] */ \
+ if r3 > 64 goto l0_%=; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), no min check")
+__failure __msg("R2 min value is negative")
+__naked void jmp_signed_no_min_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, correct bounds")
+__success
+__naked void memory_map_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[sizeof_test_val] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, wrong max")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=49")
+__naked void memory_map_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 1)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, correct bounds")
+__success
+__naked void map_adjusted_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 20),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, wrong max")
+__failure __msg("R1 min value is outside of the allowed memory range")
+__naked void map_adjusted_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 19)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + 0); \
+ r1 = 0; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ r2 &= 64; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ *(u64*)(r1 + 0) = r2; \
+ r2 &= 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_5(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ *(u64*)(r1 + 0) = r2; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_6(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+/* csum_diff of 64-byte packet */
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void ptr_to_mem_or_null_7(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r6; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r6; \
+ r2 = *(u64*)(r6 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_8(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_9(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_10(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_11(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_12(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_13(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: 8 bytes leak")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack R2 off -64+32 size 64")
+__retval(0)
+__naked void variable_memory_8_bytes_leak(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ /* Note: fp[-32] left uninitialized */ \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Limit r3 range to [1, 64] */ \
+ r3 &= 63; \
+ r3 += 1; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory region [1, 64]\
+ * at &fp[-64] is not fully initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: 8 bytes no leak (init memory)")
+__success
+__naked void bytes_no_leak_init_memory(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 0; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r1 += -64; \
+ r2 = 0; \
+ r2 &= 32; \
+ r2 += 32; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r1 = *(u64*)(r10 - 16); \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
new file mode 100644
index 000000000000..74f5f9cd153d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("xdp")
+__description("helper access to packet: test1, valid packet_ptr range")
+__success __retval(0)
+__naked void test1_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test2, unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void packet_test2_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test3, variable add")
+__success __retval(0)
+__naked void to_packet_test3_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test4, packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test5, packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test6, cls valid packet_ptr range")
+__success __retval(0)
+__naked void cls_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test7, cls unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void test7_cls_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test8, cls variable add")
+__success __retval(0)
+__naked void packet_test8_cls_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test9, cls packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test10, cls packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test11, cls unsuitable helper 1")
+__failure __msg("helper access to the packet")
+__naked void test11_cls_unsuitable_helper_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r3 = r6; \
+ r3 += 7; \
+ if r3 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 42; \
+ r5 = 0; \
+ call %[bpf_skb_store_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_store_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test12, cls unsuitable helper 2")
+__failure __msg("helper access to the packet")
+__naked void test12_cls_unsuitable_helper_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = r6; \
+ r6 += 8; \
+ if r6 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 4; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test13, cls helper ok")
+__success __retval(0)
+__naked void packet_test13_cls_helper_ok(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test14, cls helper ok sub")
+__success __retval(0)
+__naked void test14_cls_helper_ok_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 4; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test15, cls helper fail sub")
+__failure __msg("invalid access to packet")
+__naked void test15_cls_helper_fail_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 12; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test16, cls helper fail range 1")
+__failure __msg("invalid access to packet")
+__naked void cls_helper_fail_range_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test17, cls helper fail range 2")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = -9; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test18, cls helper fail range 3")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_3(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test19, cls helper range zero")
+__success __retval(0)
+__naked void test19_cls_helper_range_zero(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test20, pkt end as input")
+__failure __msg("R1 type=pkt_end expected=fp")
+__naked void test20_pkt_end_as_input(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r7; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test21, wrong reg")
+__failure __msg("invalid access to packet")
+__naked void to_packet_test21_wrong_reg(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
new file mode 100644
index 000000000000..059aa716e3d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_restricted.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct val);
+} map_spin_lock SEC(".maps");
+
+struct timer {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct timer);
+} map_timer SEC(".maps");
+
+SEC("kprobe")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_kprobe_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_perf_event_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_raw_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_kprobe_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_perf_event_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_raw_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_kprobe_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_perf_event_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_raw_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
new file mode 100644
index 000000000000..886498b5e6f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_value_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to map: full range")
+__success
+__naked void access_to_map_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: partial range")
+__success
+__naked void access_to_map_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+/* Call a function taking a pointer and a size which doesn't allow the size to
+ * be zero (i.e. bpf_trace_printk() declares the second argument to be
+ * ARG_CONST_SIZE, not ARG_CONST_SIZE_OR_ZERO). We attempt to pass zero for the
+ * size and expect to fail.
+ */
+SEC("tracepoint")
+__description("helper access to map: empty range")
+__failure __msg("R2 invalid zero-sized read: u64=[0,0]")
+__naked void access_to_map_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+/* Like the test above, but this time the size register is not known to be zero;
+ * its lower-bound is zero though, which is still unacceptable.
+ */
+SEC("tracepoint")
+__description("helper access to map: possibly-empty ange")
+__failure __msg("R2 invalid zero-sized read: u64=[0,4]")
+__naked void access_to_map_possibly_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ /* Read an unknown value */ \
+ r7 = *(u64*)(r0 + 0); \
+ /* Make it small and positive, to avoid other errors */ \
+ r7 &= 4; \
+ r2 = 0; \
+ r2 += r7; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=56")
+__naked void map_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 8)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: negative range")
+__failure __msg("R2 min value is negative")
+__naked void access_to_map_negative_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): full range")
+__success
+__naked void via_const_imm_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): partial range")
+__success
+__naked void via_const_imm_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void via_const_imm_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void imm_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): full range")
+__success
+__naked void via_const_reg_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): partial range")
+__success
+__naked void via_const_reg_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void via_const_reg_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = 0; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void reg_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): full range")
+__success
+__naked void map_via_variable_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): partial range")
+__success
+__naked void map_via_variable_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void map_via_variable_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): no max check")
+__failure __msg("R1 unbounded memory access")
+__naked void via_variable_no_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r1 += r3; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=45")
+__naked void via_variable_wrong_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, good access")
+__success
+__naked void bounds_check_using_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, good access")
+__success
+__naked void bounds_check_using_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access")
+__success
+__naked void check_using_s_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access 2")
+__success
+__naked void using_s_good_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access")
+__success
+__naked void check_using_s_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access 2")
+__success
+__naked void using_s_good_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map lookup helper access to map")
+__success
+__naked void lookup_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map")
+__success
+__naked void update_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map: wrong size")
+__failure __msg("invalid access to map value, value_size=8 off=0 size=16")
+__naked void access_to_map_wrong_size(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm)")
+__success
+__naked void adjusted_map_via_const_imm(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[other_val_bar]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void imm_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[__imm_0]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void imm_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += -4; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg)")
+__success
+__naked void adjusted_map_via_const_reg(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[other_val_bar]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void reg_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[__imm_0]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void reg_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = -4; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable)")
+__success
+__naked void to_adjusted_map_via_variable(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[other_val_bar] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): no max check")
+__failure
+__msg("R2 unbounded memory access, make sure to bounds check any such access")
+__naked void via_variable_no_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=16 off=9 size=8")
+__naked void via_variable_wrong_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[__imm_0] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, offsetof(struct other_val, bar) + 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
new file mode 100644
index 000000000000..59e34d558654
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/int_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("arg pointer to long uninitialized")
+__success
+__naked void arg_ptr_to_long_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arg pointer to long half-uninitialized")
+__success
+__retval(0)
+__naked void ptr_to_long_half_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("arg pointer to long misaligned")
+__failure __msg("misaligned stack access off 0+-20+0 size 8")
+__naked void arg_ptr_to_long_misaligned(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -12; \
+ r0 = 0; \
+ *(u32*)(r7 + 0) = r0; \
+ *(u64*)(r7 + 4) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("arg pointer to long size < sizeof(long)")
+__failure __msg("invalid write to stack R4 off=-4 size=8")
+__naked void to_long_size_sizeof_long(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -16; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += 12; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("arg pointer to long initialized")
+__success
+__naked void arg_ptr_to_long_initialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u64*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644
index 000000000000..75dd922e4e9f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+ char *buf;
+};
+
+struct num_context {
+ __u64 i;
+ __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+ if (idx == 0) {
+ ctx->buf = (char *)(0xDEAD);
+ return 0;
+ }
+
+ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+ return 1;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+ char buf[4];
+ struct buf_context loop_ctx = { .buf = buf };
+
+ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i = 0;
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+ struct num_context loop_ctx = { .i = 32 };
+
+ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+ ++ctx->i;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+ bpf_loop(100, widening_cb, &loop_ctx, 0);
+ /* loop_ctx.j is not changed during callback iteration,
+ * verifier should not apply widening to it.
+ */
+ return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+ for (;;) {}
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+ switch (ctx->i) {
+ case 0:
+ ctx->i = 1;
+ break;
+ case 1:
+ ctx->i = 32;
+ break;
+ }
+ return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i++;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(1, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(2, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 100;
+ return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 10;
+ return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 1;
+ bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+ bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+ return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+ struct num_context ctx1 = { .i = 0 };
+ struct num_context ctx2 = { .i = 0 };
+ __u64 a, b, c;
+
+ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+ a = ctx1.i;
+ b = ctx2.i;
+ /* Force 'ctx1.i' and 'ctx2.i' precise. */
+ c = choice_arr[(a + b) % 2];
+ /* This makes 'c' zero, but neither clang nor verifier know it. */
+ c /= 10;
+ /* Make sure that verifier does not visit 'impossible' states:
+ * enumerate all possible callback visit masks.
+ */
+ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+ b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+ asm volatile ("r0 /= 0;" ::: "r0");
+ return 1000 * a + b + c;
+}
+
+struct iter_limit_bug_ctx {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+};
+
+static __naked void iter_limit_bug_cb(void)
+{
+ /* This is the same as C code below, but written
+ * in assembly to control which branches are fall-through.
+ *
+ * switch (bpf_get_prandom_u32()) {
+ * case 1: ctx->a = 42; break;
+ * case 2: ctx->b = 42; break;
+ * default: ctx->c = 42; break;
+ * }
+ */
+ asm volatile (
+ "r9 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 42;"
+ "r0 = 0;"
+ "if r1 == 0x1 goto 1f;"
+ "if r1 == 0x2 goto 2f;"
+ "*(u64 *)(r9 + 16) = r2;"
+ "exit;"
+ "1: *(u64 *)(r9 + 0) = r2;"
+ "exit;"
+ "2: *(u64 *)(r9 + 8) = r2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+int tmp_var;
+SEC("socket")
+__failure __msg("infinite loop detected at insn 2")
+__naked void jgt_imm64_and_may_goto(void)
+{
+ asm volatile (" \
+ r0 = %[tmp_var] ll; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -3; /* off -3 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm_addr(tmp_var)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("infinite loop detected at insn 1")
+__naked void may_goto_self(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -1; /* off -1 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void may_goto_neg_off(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+ goto l0_%=; \
+ goto l1_%=; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -2; /* off -2 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+int iter_limit_bug(struct __sk_buff *skb)
+{
+ struct iter_limit_bug_ctx ctx = { 7, 7, 7 };
+
+ bpf_loop(2, iter_limit_bug_cb, &ctx, 0);
+
+ /* This is the same as C code below,
+ * written in assembly to guarantee checks order.
+ *
+ * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
+ * asm volatile("r1 /= 0;":::"r1");
+ */
+ asm volatile (
+ "r1 = *(u64 *)%[ctx_a];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_b];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_c];"
+ "if r1 != 7 goto 1f;"
+ "r1 /= 0;"
+ "1:"
+ :
+ : [ctx_a]"m"(ctx.a),
+ [ctx_b]"m"(ctx.b),
+ [ctx_c]"m"(ctx.c)
+ : "r1"
+ );
+ return 0;
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto2(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void jlt_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: call %[bpf_jiffies64]; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ if r0 < 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm(bpf_jiffies64)
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__success __retval(0)
+__naked void gotol_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ gotol l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+#endif
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto_subprog(void)
+{
+ asm volatile (" \
+ call subprog_with_may_goto; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void subprog_with_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#define ARR_SZ 1000000
+int zero;
+char arr[ARR_SZ];
+
+SEC("socket")
+__success __retval(0xd495cdc0)
+int cond_break1(const void *ctx)
+{
+ unsigned long i;
+ unsigned int sum = 0;
+
+ for (i = zero; i < ARR_SZ && can_loop; i++)
+ sum += i;
+ for (i = zero; i < ARR_SZ; i++) {
+ barrier_var(i);
+ sum += i + arr[i];
+ cond_break;
+ }
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(999000000)
+int cond_break2(const void *ctx)
+{
+ int i, j;
+ int sum = 0;
+
+ for (i = zero; i < 1000 && can_loop; i++)
+ for (j = zero; j < 1000; j++) {
+ sum += i + j;
+ cond_break;
+ }
+ return sum;
+}
+
+static __noinline int loop(void)
+{
+ int i, sum = 0;
+
+ for (i = zero; i <= 1000000 && can_loop; i++)
+ sum += i;
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(0x6a5a2920)
+int cond_break3(const void *ctx)
+{
+ return loop();
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break4(const void *ctx)
+{
+ int cnt = zero;
+
+ for (;;) {
+ /* should eventually break out of the loop */
+ cond_break;
+ cnt++;
+ }
+ /* if we looped a bit, it's a success */
+ return cnt > 1 ? 1 : 0;
+}
+
+static __noinline int static_subprog(void)
+{
+ int cnt = zero;
+
+ for (;;) {
+ cond_break;
+ cnt++;
+ }
+
+ return cnt;
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break5(const void *ctx)
+{
+ int cnt1 = zero, cnt2;
+
+ for (;;) {
+ cond_break;
+ cnt1++;
+ }
+
+ cnt2 = static_subprog();
+
+ /* main and subprog have to loop a bit */
+ return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
+}
+
+#define ARR2_SZ 1000
+SEC(".data.arr2")
+char arr2[ARR2_SZ];
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_signed(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ long i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ && i >= 0)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+volatile const int limit = ARR2_SZ;
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_volatile_limit(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < limit)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+#define ARR_LONG_SZ 1000
+
+SEC(".data.arr_long")
+long arr_long[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test1(const void *ctx)
+{
+ long i;
+
+ for (i = 0; i < ARR_LONG_SZ && can_loop; i++)
+ arr_long[i] = i;
+ return 0;
+}
+
+SEC("socket")
+__success
+int test2(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_long[i] = i;
+ }
+ return 0;
+}
+
+SEC(".data.arr_foo")
+struct {
+ int a;
+ int b;
+} arr_foo[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test3(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+SEC("socket")
+__success
+int test4(const void *ctx)
+{
+ long i;
+
+ for (i = zero + ARR_LONG_SZ - 1; i < ARR_LONG_SZ && i >= 0 && can_loop; i--) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+char buf[10] SEC(".data.buf");
+
+SEC("socket")
+__description("check add const")
+__success
+__naked void check_add_const(void)
+{
+ /* typical LLVM generated loop with may_goto */
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 9 goto l1_%=; \
+l0_%=: r1 = %[buf]; \
+ r2 = r0; \
+ r1 += r2; \
+ r3 = *(u8 *)(r1 +0); \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 4; /* off of l1_%=: */ \
+ .long 0; /* imm */ \
+ r0 = r2; \
+ r0 += 1; \
+ if r2 < 9 goto l0_%=; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r7 +0) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r0 > 8 goto 1f;" /* r0 range [0, 8] */
+ "r6 += r1;" /* r1 range [1, 9] */
+ "r7 += r2;" /* r2 range [2, 10] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 +0) = r0;" /* unsafe, out of bounds */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r8 -1) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs_2if(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "if r0 < 2 goto 1f;"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r2 > 11 goto 1f;" /* r2 range [0, 11] -> r0 range [-2, 9]; r1 range [-1, 10] */
+ "if r0 s< 0 goto 1f;" /* r0 range [0, 9] -> r1 range [1, 10]; r2 range [2, 11]; */
+ "r6 += r0;" /* r0 range [0, 9] */
+ "r7 += r1;" /* r1 range [1, 10] */
+ "r8 += r2;" /* r2 range [2, 11] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 -1) = r0;" /* safe */
+ "*(u8 *)(r8 -1) = r0;" /* unsafe */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_add_const_regsafe_off(void)
+{
+ asm volatile (
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* same ids for r1 and r0 */
+ "if r6 > r7 goto 1f;" /* this jump can't be predicted */
+ "r1 += 1;" /* r1.off == +1 */
+ "goto 2f;"
+ "1: r1 += 100;" /* r1.off == +100 */
+ "goto +0;" /* verify r1.off in regsafe() after this insn */
+ "2: if r0 > 8 goto 3f;" /* r0 range [0,8], r1 range either [1,9] or [100,108]*/
+ "r8 += r1;"
+ "*(u8 *)(r8 +0) = r0;" /* potentially unsafe, buf size is 10 */
+ "3: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
new file mode 100644
index 000000000000..bf16b00502f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_xskmap SEC(".maps");
+
+/* This is equivalent to the following program:
+ *
+ * r6 = skb->sk;
+ * r7 = sk_fullsock(r6);
+ * r0 = sk_fullsock(r6);
+ * if (r0 == 0) return 0; (a)
+ * if (r0 != r7) return 0; (b)
+ * *r7->type; (c)
+ * return 0;
+ *
+ * It is safe to dereference r7 at point (c), because of (a) and (b).
+ * The test verifies that relation r0 == r7 is propagated from (b) to (c).
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch")
+__success __failure_unpriv __msg_unpriv("R7 pointer comparison")
+__retval(0)
+__naked void socket_for_jne_false_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 == r7) r0 = *(r7->type); */ \
+ if r0 != r7 goto l0_%=; /* Use ! JNE ! */\
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch")
+__failure __msg("R7 invalid mem access 'sock_or_null'")
+__failure_unpriv __msg_unpriv("R7 pointer comparison")
+__naked void unchanged_for_jne_true_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 != 0 goto l0_%=; \
+ /* if (r0 == r7) return 0; */ \
+ if r0 != r7 goto l1_%=; /* Use ! JNE ! */\
+ goto l0_%=; \
+l1_%=: /* r0 = *(r7->type); */ \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as a first test, but not null should be inferred for JEQ branch */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch")
+__success __failure_unpriv __msg_unpriv("R7 pointer comparison")
+__retval(0)
+__naked void socket_for_jeq_true_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == null) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 != r7) return 0; */ \
+ if r0 == r7 goto l1_%=; /* Use ! JEQ ! */\
+ goto l0_%=; \
+l1_%=: /* r0 = *(r7->type); */ \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch")
+__failure __msg("R7 invalid mem access 'sock_or_null'")
+__failure_unpriv __msg_unpriv("R7 pointer comparison")
+__naked void unchanged_for_jeq_false_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == null) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 != r7) r0 = *(r7->type); */ \
+ if r0 == r7 goto l0_%=; /* Use ! JEQ ! */\
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Maps are treated in a different branch of `mark_ptr_not_null_reg`,
+ * so separate test for maps case.
+ */
+SEC("xdp")
+__description("jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE")
+__success __retval(0)
+__naked void null_ptr_to_map_value(void)
+{
+ asm volatile (" \
+ /* r9 = &some stack to use as key */ \
+ r1 = 0; \
+ *(u32*)(r10 - 8) = r1; \
+ r9 = r10; \
+ r9 += -8; \
+ /* r8 = process local map */ \
+ r8 = %[map_xskmap] ll; \
+ /* r6 = map_lookup_elem(r8, r9); */ \
+ r1 = r8; \
+ r2 = r9; \
+ call %[bpf_map_lookup_elem]; \
+ r6 = r0; \
+ /* r7 = map_lookup_elem(r8, r9); */ \
+ r1 = r8; \
+ r2 = r9; \
+ call %[bpf_map_lookup_elem]; \
+ r7 = r0; \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* if (r6 != r7) return 0; */ \
+ if r6 != r7 goto l0_%=; \
+ /* read *r7; */ \
+ r0 = *(u32*)(r7 + %[bpf_xdp_sock_queue_id]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap),
+ __imm_const(bpf_xdp_sock_queue_id, offsetof(struct bpf_xdp_sock, queue_id))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_jit_convergence.c b/tools/testing/selftests/bpf/progs/verifier_jit_convergence.c
new file mode 100644
index 000000000000..9f3f2b7db450
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_jit_convergence.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct value_t {
+ long long a[32];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct value_t);
+} map_hash SEC(".maps");
+
+SEC("socket")
+__description("bpf_jit_convergence je <-> jmp")
+__success __retval(0)
+__arch_x86_64
+__jited(" pushq %rbp")
+__naked void btf_jit_convergence_je_jmp(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 0 goto l20_%=;"
+ "if r0 == 1 goto l21_%=;"
+ "if r0 == 2 goto l22_%=;"
+ "if r0 == 3 goto l23_%=;"
+ "if r0 == 4 goto l24_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "call %[bpf_get_prandom_u32];"
+"l20_%=:"
+"l21_%=:"
+"l22_%=:"
+"l23_%=:"
+"l24_%=:"
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[map_hash] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l1_%=;"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r5 = r6;"
+ "if r0 != 0x0 goto l12_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = r6;"
+ "if r1 == 0x0 goto l0_%=;"
+"l9_%=:"
+ "r2 = *(u64 *)(r6 + 0x0);"
+ "r2 += 0x1;"
+ "*(u64 *)(r6 + 0x0) = r2;"
+ "goto l1_%=;"
+"l12_%=:"
+ "r1 = r7;"
+ "r1 += 0x98;"
+ "r2 = r5;"
+ "r2 += 0x90;"
+ "r2 = *(u32 *)(r2 + 0x0);"
+ "r3 = r7;"
+ "r3 &= 0x1;"
+ "r2 *= 0xa8;"
+ "if r3 == 0x0 goto l2_%=;"
+ "r1 += r2;"
+ "r1 -= r7;"
+ "r1 += 0x8;"
+ "if r1 <= 0xb20 goto l3_%=;"
+ "r1 = 0x0;"
+ "goto l4_%=;"
+"l3_%=:"
+ "r1 += r7;"
+"l4_%=:"
+ "if r1 == 0x0 goto l8_%=;"
+ "goto l9_%=;"
+"l2_%=:"
+ "r1 += r2;"
+ "r1 -= r7;"
+ "r1 += 0x10;"
+ "if r1 <= 0xb20 goto l6_%=;"
+ "r1 = 0x0;"
+ "goto l7_%=;"
+"l6_%=:"
+ "r1 += r7;"
+"l7_%=:"
+ "if r1 == 0x0 goto l8_%=;"
+ "goto l9_%=;"
+"l0_%=:"
+ "r1 = 0x3;"
+ "*(u64 *)(r10 - 0x10) = r1;"
+ "r2 = r1;"
+ "goto l1_%=;"
+"l8_%=:"
+ "r1 = r5;"
+ "r1 += 0x4;"
+ "r1 = *(u32 *)(r1 + 0x0);"
+ "*(u64 *)(r10 - 0x8) = r1;"
+"l1_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
new file mode 100644
index 000000000000..a509cad97e69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cgrp_kfunc_common.h"
+#include "cpumask_common.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/***************
+ * Task kfuncs *
+ ***************/
+
+static void task_kfunc_load_test(void)
+{
+ struct task_struct *current, *ref_1, *ref_2;
+
+ current = bpf_get_current_task_btf();
+ ref_1 = bpf_task_from_pid(current->pid);
+ if (!ref_1)
+ return;
+
+ ref_2 = bpf_task_acquire(ref_1);
+ if (ref_2)
+ bpf_task_release(ref_2);
+ bpf_task_release(ref_1);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(task_kfunc_raw_tp)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(task_kfunc_syscall)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+SEC("tracepoint")
+__success
+int BPF_PROG(task_kfunc_tracepoint)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+SEC("perf_event")
+__success
+int BPF_PROG(task_kfunc_perf_event)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+/*****************
+ * cgroup kfuncs *
+ *****************/
+
+static void cgrp_kfunc_load_test(void)
+{
+ struct cgroup *cgrp, *ref;
+
+ cgrp = bpf_cgroup_from_id(0);
+ if (!cgrp)
+ return;
+
+ ref = bpf_cgroup_acquire(cgrp);
+ if (!ref) {
+ bpf_cgroup_release(cgrp);
+ return;
+ }
+
+ bpf_cgroup_release(ref);
+ bpf_cgroup_release(cgrp);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cgrp_kfunc_raw_tp)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cgrp_kfunc_syscall)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+SEC("tracepoint")
+__success
+int BPF_PROG(cgrp_kfunc_tracepoint)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+SEC("perf_event")
+__success
+int BPF_PROG(cgrp_kfunc_perf_event)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+/******************
+ * cpumask kfuncs *
+ ******************/
+
+static void cpumask_kfunc_load_test(void)
+{
+ struct bpf_cpumask *alloc, *ref;
+
+ alloc = bpf_cpumask_create();
+ if (!alloc)
+ return;
+
+ ref = bpf_cpumask_acquire(alloc);
+ bpf_cpumask_set_cpu(0, alloc);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)ref);
+
+ bpf_cpumask_release(ref);
+ bpf_cpumask_release(alloc);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cpumask_kfunc_raw_tp)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cpumask_kfunc_syscall)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
+
+SEC("tracepoint")
+__success
+int BPF_PROG(cpumask_kfunc_tracepoint)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
+
+SEC("perf_event")
+__success
+int BPF_PROG(cpumask_kfunc_perf_event)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
new file mode 100644
index 000000000000..c925ba9a2e74
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ld_ind.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("ld_ind: check calling conv, r1")
+__failure __msg("R1 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r2")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r3")
+__failure __msg("R3 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r3(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r3 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r4")
+__failure __msg("R4 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r4 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r4; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r5")
+__failure __msg("R5 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r5(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r5 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r5; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r7")
+__success __success_unpriv __retval(1)
+__naked void ind_check_calling_conv_r7(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
new file mode 100644
index 000000000000..c8494b682c31
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_common.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__description("LDSX, S8")
+__success __success_unpriv __retval(-2)
+__naked void ldsx_s8(void)
+{
+ asm volatile (
+ "r1 = 0x3fe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r10 - 8);"
+#else
+ "r0 = *(s8 *)(r10 - 1);"
+#endif
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S16")
+__success __success_unpriv __retval(-2)
+__naked void ldsx_s16(void)
+{
+ asm volatile (
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r10 - 8);"
+#else
+ "r0 = *(s16 *)(r10 - 2);"
+#endif
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S32")
+__success __success_unpriv __retval(-1)
+__naked void ldsx_s32(void)
+{
+ asm volatile (
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r10 - 8);"
+#else
+ "r0 = *(s32 *)(r10 - 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S8 range checking, privileged")
+__log_level(2) __success __retval(1)
+__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)")
+__naked void ldsx_s8_range_priv(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s8 *)(r10 - 8);"
+#else
+ "r1 = *(s8 *)(r10 - 1);"
+#endif
+ /* r1 with s8 range */
+ "if r1 s> 0x7f goto l0_%=;"
+ "if r1 s< -0x80 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S16 range checking")
+__success __success_unpriv __retval(1)
+__naked void ldsx_s16_range(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s16 *)(r10 - 8);"
+#else
+ "r1 = *(s16 *)(r10 - 2);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fff goto l0_%=;"
+ "if r1 s< -0x8000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S32 range checking")
+__success __success_unpriv __retval(1)
+__naked void ldsx_s32_range(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s32 *)(r10 - 8);"
+#else
+ "r1 = *(s32 *)(r10 - 4);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fffFFFF goto l0_%=;"
+ "if r1 s< -0x80000000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("LDSX, xdp s32 xdp_md->data")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_1(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[xdp_md_data]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("LDSX, xdp s32 xdp_md->data_end")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_2(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[xdp_md_data_end]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("LDSX, xdp s32 xdp_md->data_meta")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_3(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[xdp_md_data_meta]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("tcx/ingress")
+__description("LDSX, tcx s32 __sk_buff->data")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_4(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[sk_buff_data]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tcx/ingress")
+__description("LDSX, tcx s32 __sk_buff->data_end")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_5(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[sk_buff_data_end]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tcx/ingress")
+__description("LDSX, tcx s32 __sk_buff->data_meta")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_6(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[sk_buff_data_meta]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(sk_buff_data_meta, offsetof(struct __sk_buff, data_meta))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("LDSX, flow_dissector s32 __sk_buff->data")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_7(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[sk_buff_data]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("LDSX, flow_dissector s32 __sk_buff->data_end")
+__failure __msg("invalid bpf_context access")
+__naked void ldsx_ctx_8(void)
+{
+ asm volatile (
+ "r2 = *(s32 *)(r1 + %[sk_buff_data_end]);"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("syscall")
+__description("Arena LDSX Disasm")
+__success
+__arch_x86_64
+__jited("movslq 0x10(%rax,%r12), %r14")
+__jited("movswq 0x18(%rax,%r12), %r14")
+__jited("movsbq 0x20(%rax,%r12), %r14")
+__jited("movslq 0x10(%rdi,%r12), %r15")
+__jited("movswq 0x18(%rdi,%r12), %r15")
+__jited("movsbq 0x20(%rdi,%r12), %r15")
+__arch_arm64
+__jited("add x11, x7, x28")
+__jited("ldrsw x21, [x11, #0x10]")
+__jited("add x11, x7, x28")
+__jited("ldrsh x21, [x11, #0x18]")
+__jited("add x11, x7, x28")
+__jited("ldrsb x21, [x11, #0x20]")
+__jited("add x11, x0, x28")
+__jited("ldrsw x22, [x11, #0x10]")
+__jited("add x11, x0, x28")
+__jited("ldrsh x22, [x11, #0x18]")
+__jited("add x11, x0, x28")
+__jited("ldrsb x22, [x11, #0x20]")
+__naked void arena_ldsx_disasm(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = r0;"
+ "r8 = *(s32 *)(r0 + 16);"
+ "r8 = *(s16 *)(r0 + 24);"
+ "r8 = *(s8 *)(r0 + 32);"
+ "r9 = *(s32 *)(r1 + 16);"
+ "r9 = *(s16 *)(r1 + 24);"
+ "r9 = *(s8 *)(r1 + 32);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX Exception")
+__success __retval(0)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_exception(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r0 = 0xdeadbeef;"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r0 = *(s8 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm_addr(arena)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S8")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s8(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r0 + 0);"
+#else
+ "r0 = *(s8 *)(r0 + 7);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S16")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s16(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r0 + 0);"
+#else
+ "r0 = *(s16 *)(r0 + 6);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S32")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s32(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r0 + 0);"
+#else
+ "r0 = *(s32 *)(r0 + 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
new file mode 100644
index 000000000000..d153fbe50055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/leak_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("leak pointer into ctx 1")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R2 leaks addr into mem")
+__naked void leak_pointer_into_ctx_1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r2 = %[map_hash_8b] ll; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 2")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R10 leaks addr into mem")
+__naked void leak_pointer_into_ctx_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r10; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 3")
+__success __failure_unpriv __msg_unpriv("R2 leaks addr into ctx")
+__retval(0)
+__naked void leak_pointer_into_ctx_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = %[map_hash_8b] ll; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into map val")
+__success __failure_unpriv __msg_unpriv("R6 leaks addr into mem")
+__retval(0)
+__naked void leak_pointer_into_map_val(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 0; \
+ *(u64*)(r0 + 0) = r3; \
+ lock *(u64 *)(r0 + 0) += r6; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
new file mode 100644
index 000000000000..8f755d2464cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("scalars: find linked scalars")
+__failure
+__msg("math between fp pointer and 2147483647 is not allowed")
+__naked void scalars(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = 0x80000001 ll; \
+ r1 /= 1; \
+ r2 = r1; \
+ r4 = r1; \
+ w2 += 0x7FFFFFFF; \
+ w4 += 0; \
+ if r2 == 0 goto l1; \
+ exit; \
+l1: \
+ r4 >>= 63; \
+ r3 = 1; \
+ r3 -= r4; \
+ r3 *= 0x7FFFFFFF; \
+ r3 += r10; \
+ *(u8*)(r3 - 1) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
new file mode 100644
index 000000000000..2de105057bbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long long);
+} map SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) frame 0 insn 1 +live -24")
+__msg("(0) frame 0 insn 1 +written -8")
+__msg("(0) frame 0 insn 0 +live -8,-24")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void simple_read_simple_write(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 24);"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 1 +live -8")
+__not_msg("(0) frame 0 insn 1 +written")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 1 +live -16")
+__msg("(0) frame 0 insn 1 +written -32")
+__msg("(0) live stack update done in 2 iterations")
+__naked void read_write_join(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > 42 goto 1f;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "*(u64 *)(r10 - 40) = r0;"
+ "exit;"
+"1:"
+ "r0 = *(u64 *)(r10 - 16);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: (25) if r0 > 0x2a goto pc+1")
+__msg("7: (95) exit")
+__msg("(0) frame 0 insn 2 +written -16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("7: (95) exit")
+__not_msg("(0) frame 0 insn 2")
+__msg("(0) live stack update done in 1 iterations")
+__naked void must_write_not_same_slot(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = -8;"
+ "if r0 > 42 goto 1f;"
+ "r1 = -16;"
+"1:"
+ "r2 = r10;"
+ "r2 += r1;"
+ "*(u64 *)(r2 + 0) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 0 +written -8,-16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void must_write_not_same_type(void)
+{
+ asm volatile (
+ "*(u64*)(r10 - 8) = 0;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 != 0 goto 1f;"
+ "r0 = r10;"
+ "r0 += -16;"
+"1:"
+ "*(u64 *)(r0 + 0) = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(2,4) frame 0 insn 4 +written -8")
+__msg("(2,4) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_write(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call write_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void write_first_param(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 7;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+/* caller_stack_read() function */
+__msg("2: .12345.... (85) call pc+4")
+__msg("5: .12345.... (85) call pc+1")
+__msg("6: 0......... (95) exit")
+/* read_first_param() function */
+__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("8: 0......... (95) exit")
+/* update for callsite at (2) */
+__msg("(2,7) frame 0 insn 7 +live -8")
+__msg("(2,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +live -8")
+__msg("(0) live stack update done in 2 iterations")
+/* update for callsite at (5) */
+__msg("(5,7) frame 0 insn 7 +live -16")
+__msg("(5,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 5 +live -16")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_read(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call read_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void read_first_param(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+/* read_first_param2() function */
+__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("10: .......... (b7) r0 = 0")
+__msg("11: 0......... (05) goto pc+0")
+__msg("12: 0......... (95) exit")
+/*
+ * The purpose of the test is to check that checkpoint in
+ * read_first_param2() stops path traversal. This will only happen if
+ * verifier understands that fp[0]-8 at insn (12) is not alive.
+ */
+__msg("12: safe")
+__msg("processed 20 insns")
+__naked void caller_stack_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto 1f;"
+ "r0 = %[map] ll;"
+"1:"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call read_first_param2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void read_first_param2(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ /*
+ * Checkpoint at goto +0 should fire,
+ * as caller stack fp[0]-8 is not alive at this point.
+ */
+ "goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg("R1 type=scalar expected=map_ptr")
+__naked void caller_stack_pruning_callback(void)
+{
+ asm volatile (
+ "r0 = %[map] ll;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = 2;"
+ "r2 = loop_cb ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call %[bpf_loop];"
+ "r0 = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void loop_cb(void)
+{
+ asm volatile (
+ /*
+ * Checkpoint at function entry should not fire, as caller
+ * stack fp[0]-8 is alive at this point.
+ */
+ "r6 = r2;"
+ "r1 = *(u64 *)(r6 + 0);"
+ "*(u64*)(r10 - 8) = 7;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ /*
+ * This should stop verifier on a second loop iteration,
+ * but only if verifier correctly maintains that fp[0]-8
+ * is still alive.
+ */
+ "*(u64 *)(r6 + 0) = 0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Because of a bug in verifier.c:compute_postorder()
+ * the program below overflowed traversal queue in that function.
+ */
+SEC("socket")
+__naked void syzbot_postorder_bug1(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 != 0 goto -1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=-1024 size=8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked unsigned long caller_stack_write_tail_call(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "*(u64 *)(r10 - 8) = -8;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto 1f;"
+ "goto 2f;"
+ "1:"
+ "*(u64 *)(r10 - 8) = -1024;"
+ "2:"
+ "r1 = r6;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call write_tail_call;"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;"
+ "r0 = *(u64 *)(r2 + 0);"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __used __naked unsigned long write_tail_call(void)
+{
+ asm volatile (
+ "r6 = r2;"
+ "r2 = %[map_array] ll;"
+ "r3 = 0;"
+ "call %[bpf_tail_call];"
+ "*(u64 *)(r6 + 0) = -16;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
new file mode 100644
index 000000000000..74f4f19c10b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("load-acquire, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_8(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfe;"
+ "*(u8 *)(r10 - 1) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_16(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfedc;"
+ "*(u16 *)(r10 - 2) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_32(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfedcba09;"
+ "*(u32 *)(r10 - 4) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r1 = 0xfedcba0987654321 ll;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void load_acquire_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with non-pointer src_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void load_acquire_with_non_pointer_src_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned load-acquire")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_acquire_misaligned(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire from ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed")
+__naked void load_acquire_from_ctx_pointer(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("load-acquire from pkt pointer")
+__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed")
+__naked void load_acquire_from_pkt_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("load-acquire from flow_keys pointer")
+__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed")
+__naked void load_acquire_from_flow_keys_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("load-acquire from sock pointer")
+__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed")
+__naked void load_acquire_from_sock_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family)));
+ ".8byte %[load_acquire_insn];"
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2,
+ offsetof(struct bpf_sock, family)))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void load_acquire_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
new file mode 100644
index 000000000000..fbdde80e7b90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/loops1.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("bounded loop, count to 4")
+__success __retval(4)
+__naked void bounded_loop_count_to_4(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 1; \
+ if r0 < 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count to 20")
+__success
+__naked void bounded_loop_count_to_20(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 3; \
+ if r0 < 20 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count from positive unknown to 4")
+__success
+__naked void from_positive_unknown_to_4(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ if r0 s< 0 goto l0_%=; \
+l1_%=: r0 += 1; \
+ if r0 < 4 goto l1_%=; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count from totally unknown to 4")
+__success
+__naked void from_totally_unknown_to_4(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+l0_%=: r0 += 1; \
+ if r0 < 4 goto l0_%=; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count to 4 with equality")
+__success
+__naked void count_to_4_with_equality(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 1; \
+ if r0 != 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("bounded loop, start in the middle")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
+__naked void loop_start_in_the_middle(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ goto l0_%=; \
+l1_%=: r0 += 1; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("bounded loop containing a forward jump")
+__success __retval(4)
+__naked void loop_containing_a_forward_jump(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l1_%=: r0 += 1; \
+ if r0 == r0 goto l0_%=; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop that jumps out rather than in")
+__success
+__naked void jumps_out_rather_than_in(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+l1_%=: r6 += 1; \
+ if r6 > 10000 goto l0_%=; \
+ call %[bpf_get_prandom_u32]; \
+ goto l1_%=; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop after a conditional jump")
+__failure __msg("program is too large")
+__naked void loop_after_a_conditional_jump(void)
+{
+ asm volatile (" \
+ r0 = 5; \
+ if r0 < 4 goto l0_%=; \
+l1_%=: r0 += 1; \
+ goto l1_%=; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded recursion")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
+__naked void bounded_recursion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call bounded_recursion__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void bounded_recursion__1(void)
+{
+ asm volatile (" \
+ r1 += 1; \
+ r0 = r1; \
+ if r1 < 4 goto l0_%=; \
+ exit; \
+l0_%=: call bounded_recursion__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop in two jumps")
+__failure __msg("loop detected")
+__naked void infinite_loop_in_two_jumps(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l1_%=: goto l0_%=; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop: three-jump trick")
+__failure __msg("loop detected")
+__naked void infinite_loop_three_jump_trick(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l2_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l1_%=; \
+ exit; \
+l1_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l2_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("not-taken loop with back jump to 1st insn")
+__success __retval(123)
+__naked void back_jump_to_1st_insn_1(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 123; \
+ if r0 == 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("taken loop with back jump to 1st insn")
+__success __retval(55)
+__naked void back_jump_to_1st_insn_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ r2 = 0; \
+ call back_jump_to_1st_insn_2__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void back_jump_to_1st_insn_2__1(void)
+{
+ asm volatile (" \
+l0_%=: r2 += r1; \
+ r1 -= 1; \
+ if r1 != 0 goto l0_%=; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("taken loop with back jump to 1st insn, 2")
+__success __retval(55)
+__naked void jump_to_1st_insn_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ r2 = 0; \
+ call jump_to_1st_insn_2__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void jump_to_1st_insn_2__1(void)
+{
+ asm volatile (" \
+l0_%=: r2 += r1; \
+ r1 -= 1; \
+ if w1 != 0 goto l0_%=; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__success
+__naked void not_an_inifinite_loop(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+loop_%=: \
+ r0 = *(u64 *)(r10 - 8); \
+ if r0 > 10 goto exit_%=; \
+ r0 += 1; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+ goto loop_%=; \
+exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * This test case triggered a bug in verifier.c:maybe_exit_scc().
+ * Speculative execution path reaches stack access instruction,
+ * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call.
+ */
+SEC("socket")
+__arch_x86_64
+__caps_unpriv(CAP_BPF)
+__naked void maybe_exit_scc_bug1(void)
+{
+ asm volatile (
+ "r0 = 100;"
+"1:"
+ /* Speculative execution path reaches and stops here. */
+ "*(u64 *)(r10 - 512) = r0;"
+ /* Condition is always false, but verifier speculatively executes the true branch. */
+ "if r0 <= 0x0 goto 1b;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
new file mode 100644
index 000000000000..6af9100a37ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("lsm/file_permission")
+__description("lsm bpf prog with -4095~0 retval. test 1")
+__success
+__naked int errno_zero_retval_test1(void *ctx)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_permission")
+__description("lsm bpf prog with -4095~0 retval. test 2")
+__success
+__naked int errno_zero_retval_test2(void *ctx)
+{
+ asm volatile (
+ "r0 = -4095;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_mprotect")
+__description("lsm bpf prog with -4095~0 retval. test 4")
+__failure __msg("R0 has smin=-4096 smax=-4096 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test4(void *ctx)
+{
+ asm volatile (
+ "r0 = -4096;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_mprotect")
+__description("lsm bpf prog with -4095~0 retval. test 5")
+__failure __msg("R0 has smin=4096 smax=4096 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test5(void *ctx)
+{
+ asm volatile (
+ "r0 = 4096;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_mprotect")
+__description("lsm bpf prog with -4095~0 retval. test 6")
+__failure __msg("R0 has smin=1 smax=1 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test6(void *ctx)
+{
+ asm volatile (
+ "r0 = 1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 1")
+__success
+__naked int bool_retval_test1(void *ctx)
+{
+ asm volatile (
+ "r0 = 1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 2")
+__success
+__success
+__naked int bool_retval_test2(void *ctx)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 3")
+__failure __msg("R0 has smin=-1 smax=-1 should have been in [0, 1]")
+__naked int bool_retval_test3(void *ctx)
+{
+ asm volatile (
+ "r0 = -1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 4")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
+__naked int bool_retval_test4(void *ctx)
+{
+ asm volatile (
+ "r0 = 2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_free_security")
+__success
+__description("lsm bpf prog with void retval. test 1")
+__naked int void_retval_test1(void *ctx)
+{
+ asm volatile (
+ "r0 = -4096;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/file_free_security")
+__success
+__description("lsm bpf prog with void retval. test 2")
+__naked int void_retval_test2(void *ctx)
+{
+ asm volatile (
+ "r0 = 4096;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/getprocattr")
+__description("lsm disabled hook: getprocattr")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test1(void *ctx)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/setprocattr")
+__description("lsm disabled hook: setprocattr")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test2(void *ctx)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm/ismaclabel")
+__description("lsm disabled hook: ismaclabel")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test3(void *ctx)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lwt.c b/tools/testing/selftests/bpf/progs/verifier_lwt.c
new file mode 100644
index 000000000000..5ab746307309
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_lwt.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/lwt.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("lwt_in")
+__description("invalid direct packet write for LWT_IN")
+__failure __msg("cannot write into packet")
+__naked void packet_write_for_lwt_in(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_out")
+__description("invalid direct packet write for LWT_OUT")
+__failure __msg("cannot write into packet")
+__naked void packet_write_for_lwt_out(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("direct packet write for LWT_XMIT")
+__success __retval(0)
+__naked void packet_write_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("direct packet read for LWT_IN")
+__success __retval(0)
+__naked void packet_read_for_lwt_in(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_out")
+__description("direct packet read for LWT_OUT")
+__success __retval(0)
+__naked void packet_read_for_lwt_out(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("direct packet read for LWT_XMIT")
+__success __retval(0)
+__naked void packet_read_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("overlapping checks for direct packet access")
+__success __retval(0)
+__naked void checks_for_direct_packet_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u16*)(r2 + 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("make headroom for LWT_XMIT")
+__success __retval(0)
+__naked void make_headroom_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 34; \
+ r3 = 0; \
+ call %[bpf_skb_change_head]; \
+ /* split for s390 to succeed */ \
+ r1 = r6; \
+ r2 = 42; \
+ r3 = 0; \
+ call %[bpf_skb_change_head]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_change_head)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_IN")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_in(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_OUT")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_out(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_XMIT")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("check skb->tc_classid half load not permitted for lwt prog")
+__failure __msg("invalid bpf_context access")
+__naked void not_permitted_for_lwt_prog(void)
+{
+ asm volatile (
+ "r0 = 0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u16*)(r1 + %[__sk_buff_tc_classid]);"
+#else
+ "r0 = *(u16*)(r1 + %[__imm_0]);"
+#endif
+ "exit;"
+ :
+ : __imm_const(__imm_0, offsetof(struct __sk_buff, tc_classid) + 2),
+ __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
new file mode 100644
index 000000000000..16b761e510f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_in_map.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} map_in_map SEC(".maps");
+
+SEC("socket")
+__description("map in map access")
+__success __success_unpriv __retval(0)
+__naked void map_in_map_access(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("map in map state pruning")
+__success __msg("processed 15 insns")
+__log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void map_in_map_state_pruning(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r6 = r10; \
+ r6 += -4; \
+ r2 = r6; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r2 = r6; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ r2 = r6; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r2 = r6; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + 0); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid inner map pointer")
+__failure __msg("R1 pointer arithmetic on map_ptr prohibited")
+__failure_unpriv
+__naked void invalid_inner_map_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ r1 += 8; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("forgot null checking on the inner map pointer")
+__failure __msg("R1 type=map_value_or_null expected=map_ptr")
+__failure_unpriv
+__naked void on_the_inner_map_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null")
+__success
+__naked void map_ptr_is_never_null(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_in_map] ll; \
+ if r1 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner")
+__success
+__naked void map_ptr_is_never_null_inner(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner spill fill")
+__success
+__naked void map_ptr_is_never_null_inner_spill_fill(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64 *)(r10 -16) = r0; \
+ r1 = *(u64 *)(r10 -16); \
+ if r1 == 0 goto l1_%=; \
+ exit; \
+l1_%=: r10 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 64 * 1024);
+ });
+} rb_in_map SEC(".maps");
+
+struct rb_ctx {
+ void *rb;
+ struct bpf_dynptr dptr;
+};
+
+static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz)
+{
+ struct rb_ctx rb_ctx = {};
+ void *rb;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 rb_slot = cpu & 1;
+
+ rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot);
+ if (!rb)
+ return rb_ctx;
+
+ rb_ctx.rb = rb;
+ bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr);
+
+ return rb_ctx;
+}
+
+static __noinline void __rb_event_submit(struct rb_ctx *ctx)
+{
+ if (!ctx->rb)
+ return;
+
+ /* If the verifier (incorrectly) concludes that ctx->rb can be
+ * NULL at this point, we'll get "BPF_EXIT instruction in main
+ * prog would lead to reference leak" error
+ */
+ bpf_ringbuf_submit_dynptr(&ctx->dptr, 0);
+}
+
+SEC("socket")
+int map_ptr_is_never_null_rb(void *ctx)
+{
+ struct rb_ctx event_ctx = __rb_event_reserve(256);
+ __rb_event_submit(&event_ctx);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
new file mode 100644
index 000000000000..e2767d27d8aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+SEC("socket")
+__description("bpf_map_ptr: read with negative offset rejected")
+__failure __msg("R1 is bpf_array invalid negative access: off=-8")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void read_with_negative_offset_rejected(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 - 8); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: write rejected")
+__failure __msg("only read from bpf_array is supported")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void bpf_map_ptr_write_rejected(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ *(u64*)(r1 + 0) = r2; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing
+ * into this array is valid. The opts field is now at offset 33.
+ */
+SEC("socket")
+__description("bpf_map_ptr: read non-existent field rejected")
+__failure
+__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_non_existent_field_rejected(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u32*)(r1 + 33); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: read ops field accepted")
+__success __failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__retval(1)
+__naked void ptr_read_ops_field_accepted(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 + 0); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, map_ptr = map_ptr + r")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(0)
+__naked void map_ptr_map_ptr_r(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r0 = 0; \
+ r1 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, r = r + map_ptr")
+__success __failure_unpriv
+__msg_unpriv("R0 has pointer with unsupported alu operation")
+__retval(0)
+__naked void _0_r_r_map_ptr(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ r0 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c
new file mode 100644
index 000000000000..c5a7c1ddc562
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ptr_mixing.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} map_in_map SEC(".maps");
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+void dummy_prog_loop2_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps") = {
+ .values = {
+ [1] = (void *)&dummy_prog_loop2_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ [7] = (void *)&dummy_prog_42_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop2_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog2_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("calls: two calls returning different map pointers for lookup (hash, array)")
+__success __retval(1)
+__naked void pointers_for_lookup_hash_array(void)
+{
+ asm volatile (" \
+ /* main prog */ \
+ if r1 != 0 goto l0_%=; \
+ call pointers_for_lookup_hash_array__1; \
+ goto l1_%=; \
+l0_%=: call pointers_for_lookup_hash_array__2; \
+l1_%=: r1 = r0; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ r0 = 1; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void pointers_for_lookup_hash_array__1(void)
+{
+ asm volatile (" \
+ r0 = %[map_hash_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void pointers_for_lookup_hash_array__2(void)
+{
+ asm volatile (" \
+ r0 = %[map_array_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("calls: two calls returning different map pointers for lookup (hash, map in map)")
+__failure __msg("only read from bpf_array is supported")
+__naked void lookup_hash_map_in_map(void)
+{
+ asm volatile (" \
+ /* main prog */ \
+ if r1 != 0 goto l0_%=; \
+ call lookup_hash_map_in_map__1; \
+ goto l1_%=; \
+l0_%=: call lookup_hash_map_in_map__2; \
+l1_%=: r1 = r0; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ r0 = 1; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lookup_hash_map_in_map__1(void)
+{
+ asm volatile (" \
+ r0 = %[map_array_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lookup_hash_map_in_map__2(void)
+{
+ asm volatile (" \
+ r0 = %[map_in_map] ll; \
+ exit; \
+" :
+ : __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("cond: two branches returning different map pointers for lookup (tail, tail)")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(42)
+__naked void pointers_for_lookup_tail_tail_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ if r6 != 0 goto l0_%=; \
+ r2 = %[map_prog2_socket] ll; \
+ goto l1_%=; \
+l0_%=: r2 = %[map_prog1_socket] ll; \
+l1_%=: r3 = 7; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("cond: two branches returning same map pointers for lookup (tail, tail)")
+__success __success_unpriv __retval(42)
+__naked void pointers_for_lookup_tail_tail_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ if r6 == 0 goto l0_%=; \
+ r2 = %[map_prog2_socket] ll; \
+ goto l1_%=; \
+l0_%=: r2 = %[map_prog2_socket] ll; \
+l1_%=: r3 = 7; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
new file mode 100644
index 000000000000..1639628b832d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ret_val.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("invalid map_fd for function call")
+__failure __msg("fd 0 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void map_fd_for_function_call(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_map_delete_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_delete_elem),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("don't check return value before access")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__failure_unpriv
+__naked void check_return_value_before_access(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("access memory with incorrect alignment")
+__failure __msg("misaligned value access")
+__failure_unpriv
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sometimes access memory with incorrect alignment")
+__failure __msg("R0 invalid mem access")
+__msg_unpriv("R0 leaks addr")
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+l0_%=: r1 = 1; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_masking.c b/tools/testing/selftests/bpf/progs/verifier_masking.c
new file mode 100644
index 000000000000..5732cc1b4c47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_masking.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/masking.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("masking, test out of bounds 1")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 2")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 3")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 4")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 5")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 6")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 7")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_7(void)
+{
+ asm volatile (" \
+ r1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 8")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_8(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 9")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_9(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 10")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_10(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 11")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_11(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 12")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_12(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 1")
+__success __success_unpriv __retval(4)
+__naked void masking_test_in_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 4; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 2")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 3")
+__success __success_unpriv __retval(0xfffffffe)
+__naked void masking_test_in_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xfffffffe; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 4")
+__success __success_unpriv __retval(0xabcde)
+__naked void masking_test_in_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xabcde; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xabcdef - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 5")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 6")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = 46; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 7")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_7(void)
+{
+ asm volatile (" \
+ r3 = -46; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 8")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_8(void)
+{
+ asm volatile (" \
+ r3 = -47; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
new file mode 100644
index 000000000000..6d1edaef9213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("raw_tp")
+__description("may_goto 0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_simple(void)
+{
+ asm volatile (
+ ".8byte %[may_goto];"
+ "r0 = 1;"
+ ".8byte %[may_goto];"
+ "exit;"
+ :
+ : __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__description("batch 2 of may_goto 0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_0(void)
+{
+ asm volatile (
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto1];"
+ "r0 = 1;"
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto1];"
+ "exit;"
+ :
+ : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/1/0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_1(void)
+{
+ asm volatile (
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto2];"
+ ".8byte %[may_goto3];"
+ "r0 = 1;"
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto2];"
+ ".8byte %[may_goto3];"
+ "exit;"
+ :
+ : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+ __imm_insn(may_goto2, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1 /* offset */, 0)),
+ __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: *(u64 *)(r10 -16) = 65535")
+__xlated("1: *(u64 *)(r10 -8) = 0")
+__xlated("2: r11 = *(u64 *)(r10 -16)")
+__xlated("3: if r11 == 0x0 goto pc+6")
+__xlated("4: r11 -= 1")
+__xlated("5: if r11 != 0x0 goto pc+2")
+__xlated("6: r11 = -16")
+__xlated("7: call unknown")
+__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("9: r0 = 1")
+__xlated("10: r0 = 2")
+__xlated("11: exit")
+__success
+__naked void may_goto_batch_2(void)
+{
+ asm volatile (
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto3];"
+ "r0 = 1;"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+ __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
new file mode 100644
index 000000000000..b891faf50660
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+int gvar;
+
+SEC("raw_tp")
+__description("C code with may_goto 0")
+__success
+int may_goto_c_code(void)
+{
+ int i, tmp[3];
+
+ for (i = 0; i < 3 && can_loop; i++)
+ tmp[i] = 0;
+
+ for (i = 0; i < 3 && can_loop; i++)
+ tmp[i] = gvar - i;
+
+ for (i = 0; i < 3 && can_loop; i++)
+ gvar += tmp[i];
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
new file mode 100644
index 000000000000..d81722fb5f19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/meta_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("meta access, test1")
+__success __retval(0)
+__naked void meta_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test2")
+__failure __msg("invalid access to packet, off=-8")
+__naked void meta_access_test2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 -= 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test3")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test4")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r4; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test5")
+__failure __msg("R3 !read_ok")
+__naked void meta_access_test5(void)
+{
+ asm volatile (" \
+ r3 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ if r0 > r4 goto l0_%=; \
+ r2 = -8; \
+ call %[bpf_xdp_adjust_meta]; \
+ r0 = *(u8*)(r3 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_xdp_adjust_meta),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test6")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test7")
+__success __retval(0)
+__naked void meta_access_test7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test8")
+__success __retval(0)
+__naked void meta_access_test8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test9")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ r4 += 1; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test10")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r3 += r5; \
+ r5 = r3; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r5 goto l0_%=; \
+ r2 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test11")
+__success __retval(0)
+__naked void meta_access_test11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r2 += r5; \
+ r5 = r2; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r3 goto l0_%=; \
+ r5 = *(u8*)(r5 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test12")
+__success __retval(0)
+__naked void meta_access_test12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = r3; \
+ r5 += 16; \
+ if r5 > r4 goto l0_%=; \
+ r0 = *(u8*)(r3 + 0); \
+ r5 = r2; \
+ r5 += 16; \
+ if r5 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
new file mode 100644
index 000000000000..a4d8814eb5ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("MOV32SX, S8")
+__success __success_unpriv __retval(0x23)
+__naked void mov32sx_s8(void)
+{
+ asm volatile (" \
+ w0 = 0xff23; \
+ w0 = (s8)w0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16")
+__success __success_unpriv __retval(0xFFFFff23)
+__naked void mov32sx_s16(void)
+{
+ asm volatile (" \
+ w0 = 0xff23; \
+ w0 = (s16)w0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S8")
+__success __success_unpriv __retval(-2)
+__naked void mov64sx_s8(void)
+{
+ asm volatile (" \
+ r0 = 0x1fe; \
+ r0 = (s8)r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16")
+__success __success_unpriv __retval(0xf23)
+__naked void mov64sx_s16(void)
+{
+ asm volatile (" \
+ r0 = 0xf0f23; \
+ r0 = (s16)r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S32")
+__success __success_unpriv __retval(-1)
+__naked void mov64sx_s32(void)
+{
+ asm volatile (" \
+ r0 = 0xfffffffe; \
+ r0 = (s32)r0; \
+ r0 >>= 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s8_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = (s8)w0; \
+ /* w1 with s8 range */ \
+ if w1 s> 0x7f goto l0_%=; \
+ if w1 s< -0x80 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s16_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = (s16)w0; \
+ /* w1 with s16 range */ \
+ if w1 s> 0x7fff goto l0_%=; \
+ if w1 s< -0x80ff goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16, range_check 2")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s16_range_2(void)
+{
+ asm volatile (" \
+ r1 = 65535; \
+ w2 = (s16)w1; \
+ r2 >>= 1; \
+ if r2 != 0x7fffFFFF goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 0; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S8, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s8_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s8)r0; \
+ /* r1 with s8 range */ \
+ if r1 s> 0x7f goto l0_%=; \
+ if r1 s< -0x80 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s16_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s16)r0; \
+ /* r1 with s16 range */ \
+ if r1 s> 0x7fff goto l0_%=; \
+ if r1 s< -0x8000 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S32, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s32_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s32)r0; \
+ /* r1 with s32 range */ \
+ if r1 s> 0x7fffffff goto l0_%=; \
+ if r1 s< -0x80000000 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16, R10 Sign Extension")
+__failure __msg("R1 type=scalar expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf, trusted_ptr_")
+__failure_unpriv __msg_unpriv("R10 sign-extension part of pointer")
+__naked void mov64sx_s16_r10(void)
+{
+ asm volatile (" \
+ r1 = 553656332; \
+ *(u32 *)(r10 - 8) = r1; \
+ r1 = (s16)r10; \
+ r1 += -8; \
+ r2 = 3; \
+ if r2 <= r1 goto l0_%=; \
+l0_%=: \
+ call %[bpf_trace_printk]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off u32_max")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0")
+__naked void mov64sx_s32_varoff_1(void)
+{
+ asm volatile (" \
+l0_%=: \
+ r3 = *(u8 *)(r10 -387); \
+ w7 = (s8)w3; \
+ if w7 >= 0x2533823b goto l0_%=; \
+ w0 = 0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
+__success __retval(0)
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
+__naked void mov64sx_s32_varoff_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ w7 = (s8)w3; \
+ if w7 s>= 16 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
+__success __retval(0)
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
+__naked void mov64sx_s32_varoff_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ r3 |= 0x80; \
+ w7 = (s8)w3; \
+ if w7 s>= -5 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S8, unsigned range_check")
+__success __retval(0)
+__naked void mov64sx_s8_range_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 += 0xfe; \
+ r0 = (s8)r0; \
+ if r0 < 0xfffffffffffffffe goto label_%=; \
+ r0 = 0; \
+ exit; \
+label_%=: \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, unsigned range_check")
+__success __retval(0)
+__naked void mov32sx_s8_range_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0x1; \
+ w0 += 0xfe; \
+ w0 = (s8)w0; \
+ if w0 < 0xfffffffe goto label_%=; \
+ r0 = 0; \
+ exit; \
+label_%=: \
+ exit; \
+ " :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c
new file mode 100644
index 000000000000..256956ea1ac5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc/ingress")
+__description("uninit/mtu: write rejected")
+__success
+__caps_unpriv(CAP_BPF|CAP_NET_ADMIN)
+__failure_unpriv __msg_unpriv("invalid read from stack")
+int tc_uninit_mtu(struct __sk_buff *ctx)
+{
+ __u32 mtu;
+
+ bpf_check_mtu(ctx, 0, &mtu, 0, 0);
+ return TCX_PASS;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c
new file mode 100644
index 000000000000..7145fe3351d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mul.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Nandakumar Edamana */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+/* Intended to test the abstract multiplication technique(s) used by
+ * the verifier. Using assembly to avoid compiler optimizations.
+ */
+SEC("fentry/bpf_fentry_test1")
+void BPF_PROG(mul_precise, int x)
+{
+ /* First, force the verifier to be uncertain about the value:
+ * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1;
+ *
+ * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}.
+ * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision
+ * would cause the following to fail, because the required return value
+ * is 0:
+ * return (a * 0x3) & 0x4);
+ */
+ asm volatile ("\
+ call %[bpf_get_prandom_u32];\
+ r0 &= 0x2;\
+ r0 |= 0x1;\
+ r0 *= 0x3;\
+ r0 &= 0x4;\
+ if r0 != 0 goto l0_%=;\
+ r0 = 0;\
+ goto l1_%=;\
+l0_%=:\
+ r0 = 1;\
+l1_%=:\
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
new file mode 100644
index 000000000000..e2cbc5bda65e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include "bpf_misc.h"
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("netfilter")
+__description("netfilter invalid context access, size too short")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u8*)(r1 + %[__bpf_nf_ctx_state]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_state, offsetof(struct bpf_nf_ctx, state))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context access, size too short")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test2(void)
+{
+ asm volatile (" \
+ r2 = *(u16*)(r1 + %[__bpf_nf_ctx_skb]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context access, past end of ctx")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[__bpf_nf_ctx_size]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_size, sizeof(struct bpf_nf_ctx))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context, write")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test4(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ *(u64*)(r2 + 0) = r1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb))
+ : __clobber_all);
+}
+
+#define NF_DROP 0
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+__description("netfilter valid context read and invalid write")
+__failure __msg("only read is supported")
+int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
+{
+ struct nf_hook_state *state = (void *)ctx->state;
+
+ state->sk = NULL;
+ return NF_ACCEPT;
+}
+
+SEC("netfilter")
+__description("netfilter test prog with skb and state read access")
+__success __failure_unpriv
+__retval(0)
+int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
+{
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
+ const struct nf_hook_state *state = ctx->state;
+ const struct iphdr *iph;
+ const struct tcphdr *th;
+ u8 buffer_iph[20] = {};
+ u8 buffer_th[40] = {};
+ struct bpf_dynptr ptr;
+ uint8_t ihl;
+
+ if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_ACCEPT;
+
+ iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph));
+ if (!iph)
+ return NF_ACCEPT;
+
+ if (state->pf != 2)
+ return NF_ACCEPT;
+
+ ihl = iph->ihl << 2;
+
+ th = bpf_dynptr_slice(&ptr, ihl, buffer_th, sizeof(buffer_th));
+ if (!th)
+ return NF_ACCEPT;
+
+ return th->dest == bpf_htons(22) ? NF_ACCEPT : NF_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
new file mode 100644
index 000000000000..e1ffa5d32ff0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("netfilter")
+__description("bpf_exit with invalid return code. test1")
+__failure __msg("R0 is not a known value")
+__naked void with_invalid_return_code_test1(void)
+{
+ asm volatile (" \
+ r0 = *(u64*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with valid return code. test2")
+__success
+__naked void with_valid_return_code_test2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with valid return code. test3")
+__success
+__naked void with_valid_return_code_test3(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with invalid return code. test4")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
+__naked void with_invalid_return_code_test4(void)
+{
+ asm volatile (" \
+ r0 = 2; \
+ exit; \
+" ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
new file mode 100644
index 000000000000..f37713a265ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("or_jmp32_k: bit ops + branch on unknown value")
+__failure
+__msg("R0 invalid mem access 'scalar'")
+__naked void or_jmp32_k(void)
+{
+ asm volatile (" \
+ r0 = 0xffffffff; \
+ r0 /= 1; \
+ r1 = 0; \
+ w1 = -1; \
+ w1 >>= 1; \
+ w0 &= w1; \
+ w0 |= 2; \
+ if w0 != 0x7ffffffd goto l1; \
+ r0 = 1; \
+ exit; \
+l3: \
+ r0 = 5; \
+ *(u64*)(r0 - 8) = r0; \
+ exit; \
+l2: \
+ w0 -= 0xe; \
+ if w0 == 1 goto l3; \
+ r0 = 4; \
+ exit; \
+l1: \
+ w0 -= 0x7ffffff0; \
+ if w0 s>= 0xe goto l2; \
+ r0 = 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
new file mode 100644
index 000000000000..1fe090cd6744
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 SUSE LLC */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8")
+__naked int bpf_neg(void)
+{
+ asm volatile (
+ "r2 = 8;"
+ "r2 = -r2;"
+ "if r2 != -8 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_le(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = le16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_be(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = be16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_bswap(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = bswap16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_load_acquire(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10")
+__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_store_release(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+#endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1 ; R0=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=: R0=P4")
+__msg("3: R0=6")
+__naked int state_loop_first_last_equal(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "l0_%=:"
+ "r0 += 1;"
+ "r0 += 1;"
+ /* every few iterations we'll have a checkpoint here with
+ * first_idx == last_idx, potentially confusing precision
+ * backtracking logic
+ */
+ "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */
+ "goto l0_%=;"
+ "l1_%=:"
+ "exit;"
+ ::: __clobber_common
+ );
+}
+
+__used __naked static void __bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r2 = 2314885393468386424 ll;"
+ "goto +0;"
+ "if r2 <= r10 goto +3;"
+ "if r1 >= -1835016 goto +0;"
+ "if r2 <= 8 goto +0;"
+ "if r3 <= 0 goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r3 = 0 ll;"
+ "call __bpf_cond_op_r10;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 2314885393468386424 ll;"
+ "r3 = r10;"
+ "if r3 <= r2 goto +1;"
+ "if r2 <= 8 goto +2;"
+ "r0 = 2 ll;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (84) w0 = -w0 ; R0=0xffffffff")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_2(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_3(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (87) r0 = -r0 ; R0=-1")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_4(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_5(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c b/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c
new file mode 100644
index 000000000000..8d27c780996f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/prevent_map_lookup.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_stacktrace SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps");
+
+SEC("perf_event")
+__description("prevent map lookup in stack trace")
+__failure __msg("cannot pass map_type 7 into func bpf_map_lookup_elem")
+__naked void map_lookup_in_stack_trace(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_stacktrace] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_stacktrace)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("prevent map lookup in prog array")
+__failure __msg("cannot pass map_type 3 into func bpf_map_lookup_elem")
+__failure_unpriv
+__naked void map_lookup_in_prog_array(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_prog2_socket] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
new file mode 100644
index 000000000000..1ecd34ebde19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+/* From include/linux/filter.h */
+#define MAX_BPF_STACK 512
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+struct elem {
+ struct bpf_timer t;
+ char pad[256];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+SEC("kprobe")
+__description("Private stack, single prog")
+__success
+__arch_x86_64
+__jited(" movabsq $0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
+__jited(" movl $0x2a, %edi")
+__jited(" movq %rdi, -0x100(%r9)")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
+__naked void private_stack_single_prog(void)
+{
+ asm volatile (" \
+ r1 = 42; \
+ *(u64 *)(r10 - 256) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("No private stack")
+__success
+__arch_x86_64
+__jited(" subq $0x8, %rsp")
+__arch_arm64
+__jited(" mov x25, sp")
+__jited(" sub sp, sp, #0x10")
+__naked void no_private_stack_nested(void)
+{
+ asm volatile (" \
+ r1 = 42; \
+ *(u64 *)(r10 - 8) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+__used
+__naked static void cumulative_stack_depth_subprog(void)
+{
+ asm volatile (" \
+ r1 = 41; \
+ *(u64 *)(r10 - 32) = r1; \
+ call %[bpf_get_smp_processor_id]; \
+ exit; \
+" :
+ : __imm(bpf_get_smp_processor_id)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("Private stack, subtree > MAX_BPF_STACK")
+__success
+__arch_x86_64
+/* private stack fp for the main prog */
+__jited(" movabsq $0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
+__jited(" movl $0x2a, %edi")
+__jited(" movq %rdi, -0x200(%r9)")
+__jited(" pushq %r9")
+__jited(" callq 0x{{.*}}")
+__jited(" popq %r9")
+__jited(" xorl %eax, %eax")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl {{.*}}")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
+__naked void private_stack_nested_1(void)
+{
+ asm volatile (" \
+ r1 = 42; \
+ *(u64 *)(r10 - %[max_bpf_stack]) = r1; \
+ call cumulative_stack_depth_subprog; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(max_bpf_stack, MAX_BPF_STACK)
+ : __clobber_all);
+}
+
+__naked __noinline __used
+static unsigned long loop_callback(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 42; \
+ *(u64 *)(r10 - 512) = r1; \
+ call cumulative_stack_depth_subprog; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__description("Private stack, callback")
+__success
+__arch_x86_64
+/* for func loop_callback */
+__jited("func #1")
+__jited(" endbr64")
+__jited(" nopl (%rax,%rax)")
+__jited(" nopl (%rax)")
+__jited(" pushq %rbp")
+__jited(" movq %rsp, %rbp")
+__jited(" endbr64")
+__jited(" movabsq $0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
+__jited(" pushq %r9")
+__jited(" callq")
+__jited(" popq %r9")
+__jited(" movl $0x2a, %edi")
+__jited(" movq %rdi, -0x200(%r9)")
+__jited(" pushq %r9")
+__jited(" callq")
+__jited(" popq %r9")
+__arch_arm64
+__jited("func #1")
+__jited("...")
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x7, #0x0")
+__jited(" ldp x25, x27, [sp], {{.*}}")
+__naked void private_stack_callback(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ r2 = %[loop_callback]; \
+ r3 = 0; \
+ r4 = 0; \
+ call %[bpf_loop]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_ptr(loop_callback),
+ __imm(bpf_loop)
+ : __clobber_common);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("Private stack, exception in main prog")
+__success __retval(0)
+__arch_x86_64
+__jited(" pushq %r9")
+__jited(" callq")
+__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x29, x30, [sp, #-0x10]!")
+__jited(" mov x29, sp")
+__jited(" stp xzr, x26, [sp, #-0x10]!")
+__jited(" mov x26, sp")
+__jited(" stp x19, x20, [sp, #-0x10]!")
+__jited(" stp x21, x22, [sp, #-0x10]!")
+__jited(" stp x23, x24, [sp, #-0x10]!")
+__jited(" stp x25, x26, [sp, #-0x10]!")
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" mov x0, #0x0")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
+int private_stack_exception_main_prog(void)
+{
+ asm volatile (" \
+ r1 = 42; \
+ *(u64 *)(r10 - 512) = r1; \
+" ::: __clobber_common);
+
+ bpf_throw(0);
+ return 0;
+}
+
+__used static int subprog_exception(void)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("Private stack, exception in subprog")
+__success __retval(0)
+__arch_x86_64
+__jited(" movq %rdi, -0x200(%r9)")
+__jited(" pushq %r9")
+__jited(" callq")
+__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
+int private_stack_exception_sub_prog(void)
+{
+ asm volatile (" \
+ r1 = 42; \
+ *(u64 *)(r10 - 512) = r1; \
+ call subprog_exception; \
+" ::: __clobber_common);
+
+ return 0;
+}
+
+int glob;
+__noinline static void subprog2(int *val)
+{
+ glob += val[0] * 2;
+}
+
+__noinline static void subprog1(int *val)
+{
+ int tmp[64] = {};
+
+ tmp[0] = *val;
+ subprog2(tmp);
+}
+
+__noinline static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ subprog1(key);
+ return 0;
+}
+
+__noinline static int timer_cb2(void *map, int *key, struct bpf_timer *timer)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("Private stack, async callback, not nested")
+__success __retval(0)
+__arch_x86_64
+__jited(" movabsq $0x{{.*}}, %r9")
+__arch_arm64
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+int private_stack_async_callback_1(void)
+{
+ struct bpf_timer *arr_timer;
+ int array_key = 0;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb2);
+ bpf_timer_start(arr_timer, 0, 0);
+ subprog1(&array_key);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("Private stack, async callback, potential nesting")
+__success __retval(0)
+__arch_x86_64
+__jited(" subq $0x100, %rsp")
+__arch_arm64
+__jited(" sub sp, sp, #0x100")
+int private_stack_async_callback_2(void)
+{
+ struct bpf_timer *arr_timer;
+ int array_key = 0;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0, 0);
+ subprog1(&array_key);
+ return 0;
+}
+
+#else
+
+SEC("kprobe")
+__description("private stack is not supported, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
new file mode 100644
index 000000000000..c689665e07b9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("raw_stack: no skb_load_bytes")
+__success
+__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
+__naked void stack_no_skb_load_bytes(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ /* Call to skb_load_bytes() omitted. */ \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len")
+__failure __msg("R4 min value is negative")
+__naked void skb_load_bytes_negative_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = -8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len 2")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_negative_len_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = %[__imm_0]; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__imm_0, ~0)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, zero len")
+__failure __msg("R4 invalid zero-sized read: u64=[0,0]")
+__naked void skb_load_bytes_zero_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, no init")
+__success __retval(0)
+__naked void skb_load_bytes_no_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, init")
+__success __retval(0)
+__naked void stack_skb_load_bytes_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = 0xcafe; \
+ *(u64*)(r6 + 0) = r3; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs around bounds")
+__success __retval(0)
+__naked void bytes_spilled_regs_around_bounds(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption")
+__failure __msg("R0 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_bytes_spilled_regs_corruption(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption 2")
+__failure __msg("R3 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bytes_spilled_regs_corruption_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r3 = *(u32*)(r3 + %[__sk_buff_pkt_type]); \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs + data")
+__success __retval(0)
+__naked void load_bytes_spilled_regs_data(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 1")
+__failure __msg("invalid write to stack R3 off=-513 size=8")
+__naked void load_bytes_invalid_access_1(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -513; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 2")
+__failure __msg("invalid write to stack R3 off=-1 size=8")
+__naked void load_bytes_invalid_access_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 3")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_invalid_access_3(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += 0xffffffff; \
+ r3 = r6; \
+ r4 = 0xffffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 4")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_4(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 5")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_5(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 6")
+__failure __msg("invalid zero-sized read")
+__naked void load_bytes_invalid_access_6(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, large access")
+__success __retval(0)
+__naked void skb_load_bytes_large_access(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 512; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
new file mode 100644
index 000000000000..14a0172e2141
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_tp_writable.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("raw_tracepoint.w")
+__description("raw_tracepoint_writable: reject variable offset")
+__failure
+__msg("R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void tracepoint_writable_reject_variable_offset(void)
+{
+ asm volatile (" \
+ /* r6 is our tp buffer */ \
+ r6 = *(u64*)(r1 + 0); \
+ r1 = %[map_hash_8b] ll; \
+ /* move the key (== 0) to r10-8 */ \
+ w0 = 0; \
+ r2 = r10; \
+ r2 += -8; \
+ *(u64*)(r2 + 0) = r0; \
+ /* lookup in the map */ \
+ call %[bpf_map_lookup_elem]; \
+ /* exit clean if null */ \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* shift the buffer pointer to a variable location */\
+ r0 = *(u32*)(r0 + 0); \
+ r6 += r0; \
+ /* clobber whatever's there */ \
+ r7 = 4242; \
+ *(u64*)(r6 + 0) = r7; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
new file mode 100644
index 000000000000..910365201f68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -0,0 +1,1495 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ref_tracking.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#define BPF_SK_LOOKUP(func) \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ "r2 = 0;" \
+ "*(u32*)(r10 - 8) = r2;" \
+ "*(u64*)(r10 - 16) = r2;" \
+ "*(u64*)(r10 - 24) = r2;" \
+ "*(u64*)(r10 - 32) = r2;" \
+ "*(u64*)(r10 - 40) = r2;" \
+ "*(u64*)(r10 - 48) = r2;" \
+ /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ "r2 = r10;" \
+ "r2 += -48;" \
+ "r3 = %[sizeof_bpf_sock_tuple];"\
+ "r4 = 0;" \
+ "r5 = 0;" \
+ "call %[" #func "];"
+
+struct bpf_key {} __attribute__((preserve_access_index));
+
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __kfunc_btf_root(void)
+{
+ bpf_key_put(0);
+ bpf_lookup_system_key(0);
+ bpf_lookup_user_key(0, 0);
+}
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+void dummy_prog_42_tc(void);
+void dummy_prog_24_tc(void);
+void dummy_prog_loop1_tc(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_tc SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_tc,
+ [1] = (void *)&dummy_prog_loop1_tc,
+ [2] = (void *)&dummy_prog_24_tc,
+ },
+};
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_42_tc(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_24_tc(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_loop1_tc(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_tc] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference")
+__failure __msg("Unreleased reference")
+__naked void reference_tracking_leak_potential_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference to sock_common")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_sock_common_1(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r6 = r0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference on stack")
+__failure __msg("Unreleased reference")
+__naked void leak_potential_reference_on_stack(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference on stack 2")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_on_stack_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r0; \
+ r0 = 0; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: zero potential reference")
+__failure __msg("Unreleased reference")
+__naked void reference_tracking_zero_potential_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r0 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: zero potential reference to sock_common")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_sock_common_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r0 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: copy and zero potential references")
+__failure __msg("Unreleased reference")
+__naked void copy_and_zero_potential_references(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r7 = r0; \
+ r0 = 0; \
+ r7 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: acquire/release user key reference")
+__success
+__naked void acquire_release_user_key_reference(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: acquire/release system key reference")
+__success
+__naked void acquire_release_system_key_reference(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release user key reference without check")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void user_key_reference_without_check(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release system key reference without check")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void system_key_reference_without_check(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release with NULL key pointer")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void release_with_null_key_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: leak potential reference to user key")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_user_key(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ exit; \
+" :
+ : __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: leak potential reference to system key")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_system_key(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ exit; \
+" :
+ : __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference without check")
+__failure __msg("type=sock_or_null expected=sock")
+__naked void tracking_release_reference_without_check(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* reference in r0 may be NULL */ \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference to sock_common without check")
+__failure __msg("type=sock_common_or_null expected=sock")
+__naked void to_sock_common_without_check(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" /* reference in r0 may be NULL */ \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference")
+__success __retval(0)
+__naked void reference_tracking_release_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference to sock_common")
+__success __retval(0)
+__naked void release_reference_to_sock_common(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference 2")
+__success __retval(0)
+__naked void reference_tracking_release_reference_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference twice")
+__failure __msg("type=scalar expected=sock")
+__naked void reference_tracking_release_reference_twice(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference twice inside branch")
+__failure __msg("type=scalar expected=sock")
+__naked void release_reference_twice_inside_branch(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; /* goto end */ \
+ call %[bpf_sk_release]; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: alloc, check, free in one subbranch")
+__failure __msg("Unreleased reference")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_free_in_one_subbranch(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 16; \
+ /* if (offsetof(skb, mark) > data_len) exit; */ \
+ if r0 <= r3 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r2 + %[__sk_buff_mark]); \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r6 == 0 goto l1_%=; /* mark == 0? */\
+ /* Leak reference in R0 */ \
+ exit; \
+l1_%=: if r0 == 0 goto l2_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: alloc, check, free in both subbranches")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_free_in_both_subbranches(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 16; \
+ /* if (offsetof(skb, mark) > data_len) exit; */ \
+ if r0 <= r3 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r2 + %[__sk_buff_mark]); \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r6 == 0 goto l1_%=; /* mark == 0? */\
+ if r0 == 0 goto l2_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l2_%=: exit; \
+l1_%=: if r0 == 0 goto l3_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l3_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: free reference in subprog")
+__success __retval(0)
+__naked void call_free_reference_in_subprog(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; /* unchecked reference */ \
+ call call_free_reference_in_subprog__1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void call_free_reference_in_subprog__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r2 = r1; \
+ if r2 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: free reference in subprog and outside")
+__failure __msg("type=scalar expected=sock")
+__naked void reference_in_subprog_and_outside(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; /* unchecked reference */ \
+ r6 = r0; \
+ call reference_in_subprog_and_outside__1; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void reference_in_subprog_and_outside__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r2 = r1; \
+ if r2 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: alloc & leak reference in subprog")
+__failure __msg("Unreleased reference")
+__naked void alloc_leak_reference_in_subprog(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call alloc_leak_reference_in_subprog__1; \
+ r1 = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void alloc_leak_reference_in_subprog__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r6 = r4; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* spill unchecked sk_ptr into stack of caller */\
+ *(u64*)(r6 + 0) = r0; \
+ r1 = r0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: alloc in subprog, release outside")
+__success __retval(POINTER_VALUE)
+__naked void alloc_in_subprog_release_outside(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ call alloc_in_subprog_release_outside__1; \
+ r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void alloc_in_subprog_release_outside__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; /* return sk */ \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: sk_ptr leak into caller stack")
+__failure __msg("Unreleased reference")
+__naked void ptr_leak_into_caller_stack(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call ptr_leak_into_caller_stack__1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_leak_into_caller_stack__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r5 = r10; \
+ r5 += -8; \
+ *(u64*)(r5 + 0) = r4; \
+ call ptr_leak_into_caller_stack__2; \
+ /* spill unchecked sk_ptr into stack of caller */\
+ r5 = r10; \
+ r5 += -8; \
+ r4 = *(u64*)(r5 + 0); \
+ *(u64*)(r4 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_leak_into_caller_stack__2(void)
+{
+ asm volatile (" \
+ /* subprog 2 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: sk_ptr spill into caller stack")
+__success __retval(0)
+__naked void ptr_spill_into_caller_stack(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call ptr_spill_into_caller_stack__1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_spill_into_caller_stack__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r5 = r10; \
+ r5 += -8; \
+ *(u64*)(r5 + 0) = r4; \
+ call ptr_spill_into_caller_stack__2; \
+ /* spill unchecked sk_ptr into stack of caller */\
+ r5 = r10; \
+ r5 += -8; \
+ r4 = *(u64*)(r5 + 0); \
+ *(u64*)(r4 + 0) = r0; \
+ if r0 == 0 goto l0_%=; \
+ /* now the sk_ptr is verified, free the reference */\
+ r1 = *(u64*)(r4 + 0); \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_spill_into_caller_stack__2(void)
+{
+ asm volatile (" \
+ /* subprog 2 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: allow LD_ABS")
+__success __retval(0)
+__naked void reference_tracking_allow_ld_abs(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r0 = *(u8*)skb[0]; \
+ r0 = *(u16*)skb[0]; \
+ r0 = *(u32*)skb[0]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: forbid LD_ABS while holding reference")
+__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak")
+__naked void ld_abs_while_holding_reference(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r0 = *(u8*)skb[0]; \
+ r0 = *(u16*)skb[0]; \
+ r0 = *(u32*)skb[0]; \
+ r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: allow LD_IND")
+__success __retval(1)
+__naked void reference_tracking_allow_ld_ind(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple)),
+ __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: forbid LD_IND while holding reference")
+__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak")
+__naked void ld_ind_while_holding_reference(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r0; \
+ r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ r1 = r4; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple)),
+ __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: check reference or tail call")
+__success __retval(0)
+__naked void check_reference_or_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* if (sk) bpf_sk_release() */ \
+ r1 = r0; \
+ if r1 != 0 goto l0_%=; \
+ /* bpf_tail_call() */ \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference then tail call")
+__success __retval(0)
+__naked void release_reference_then_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* if (sk) bpf_sk_release() */ \
+ r1 = r0; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: /* bpf_tail_call() */ \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak possible reference over tail call")
+__failure __msg("tail_call would lead to reference leak")
+__naked void possible_reference_over_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ /* Look up socket and store in REG_6 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* bpf_tail_call() */ \
+ r6 = r0; \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ /* if (sk) bpf_sk_release() */ \
+ r1 = r6; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak checked reference over tail call")
+__failure __msg("tail_call would lead to reference leak")
+__naked void checked_reference_over_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ /* Look up socket and store in REG_6 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ /* if (!sk) goto end */ \
+ if r0 == 0 goto l0_%=; \
+ /* bpf_tail_call() */ \
+ r3 = 0; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ r1 = r6; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: mangle and release sock_or_null")
+__failure __msg("R1 pointer arithmetic on sock_or_null prohibited")
+__naked void and_release_sock_or_null(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r1 += 5; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: mangle and release sock")
+__failure __msg("R1 pointer arithmetic on sock prohibited")
+__naked void tracking_mangle_and_release_sock(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 += 5; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: access member")
+__success __retval(0)
+__naked void reference_tracking_access_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u32*)(r0 + 4); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: write to member")
+__failure __msg("cannot write into sock")
+__naked void reference_tracking_write_to_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 42 ll; \
+ *(u32*)(r1 + %[bpf_sock_mark]) = r2; \
+ r1 = r6; \
+l0_%=: call %[bpf_sk_release]; \
+ r0 = 0 ll; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: invalid 64-bit access of member")
+__failure __msg("invalid sock access off=0 size=8")
+__naked void _64_bit_access_of_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: access after release")
+__failure __msg("!read_ok")
+__naked void reference_tracking_access_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+ r2 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: direct access for lookup")
+__success __retval(0)
+__naked void tracking_direct_access_for_lookup(void)
+{
+ asm volatile (" \
+ /* Check that the packet is at least 64B long */\
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 64; \
+ if r0 > r3 goto l0_%=; \
+ /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ \
+ r3 = %[sizeof_bpf_sock_tuple]; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_sk_lookup_tcp]; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u32*)(r0 + 4); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_tcp_sock() after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bpf_tcp_sock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r7 + %[bpf_tcp_sock_snd_cwnd]); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_sk_fullsock() after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bpf_sk_fullsock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_sk_fullsock(tp) after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void sk_fullsock_tp_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ r1 = r6; \
+ r6 = r0; \
+ call %[bpf_sk_release]; \
+ if r6 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use sk after bpf_sk_release(tp)")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void after_bpf_sk_release_tp(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)")
+__success __retval(0)
+__naked void after_bpf_sk_release_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_get_listener_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r6; \
+ r6 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_src_port]); \
+ exit; \
+" :
+ : __imm(bpf_get_listener_sock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_src_port, offsetof(struct bpf_sock, src_port)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: bpf_sk_release(listen_sk)")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_release_listen_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_get_listener_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_get_listener_sock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
+SEC("tc")
+__description("reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)")
+__failure __msg("invalid mem access")
+__naked void and_bpf_tcp_sock_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ r1 = r6; \
+ call %[bpf_tcp_sock]; \
+ r8 = r0; \
+ if r7 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r0 = *(u32*)(r8 + %[bpf_tcp_sock_snd_cwnd]); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: branch tracking valid pointer null comparison")
+__success __retval(0)
+__naked void tracking_valid_pointer_null_comparison(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ r3 = 1; \
+ if r6 != 0 goto l0_%=; \
+ r3 = 0; \
+l0_%=: if r6 == 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: branch tracking valid pointer value comparison")
+__failure __msg("Unreleased reference")
+__naked void tracking_valid_pointer_value_comparison(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ r3 = 1; \
+ if r6 == 0 goto l0_%=; \
+ r3 = 0; \
+ if r6 == 1234 goto l0_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: bpf_sk_release(btf_tcp_sock)")
+__success
+__retval(0)
+__naked void sk_release_btf_tcp_sock(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_skc_to_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_skc_to_tcp_sock() after release")
+__failure __msg("invalid mem access")
+__naked void to_tcp_sock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_skc_to_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u8*)(r7 + 0); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reference tracking: try to leak released ptr reg")
+__success __failure_unpriv __msg_unpriv("R8 !read_ok")
+__retval(0)
+__naked void to_leak_released_ptr_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u32*)(r10 - 4) = r0; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r9 = r0; \
+ r0 = 0; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r8 = r0; \
+ r1 = r8; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard]; \
+ r0 = 0; \
+ *(u64*)(r9 + 0) = r8; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_discard),
+ __imm(bpf_ringbuf_reserve),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_reg_equal.c b/tools/testing/selftests/bpf/progs/verifier_reg_equal.c
new file mode 100644
index 000000000000..dc1d8c30fb0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_reg_equal.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check w reg equal if r reg upper32 bits 0")
+__success
+__naked void subreg_equality_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64 *)(r10 - 8) = r0; \
+ r2 = *(u32 *)(r10 - 8); \
+ /* At this point upper 4-bytes of r2 are 0, \
+ * thus insn w3 = w2 should propagate reg id, \
+ * and w2 < 9 comparison would also propagate \
+ * the range for r3. \
+ */ \
+ w3 = w2; \
+ if w2 < 9 goto l0_%=; \
+ exit; \
+l0_%=: if r3 < 9 goto l1_%=; \
+ /* r1 read is illegal at this point */ \
+ r0 -= r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check w reg not equal if r reg upper32 bits not 0")
+__failure __msg("R1 !read_ok")
+__naked void subreg_equality_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r2 = r0; \
+ /* Upper 4-bytes of r2 may not be 0, thus insn \
+ * w3 = w2 should not propagate reg id, and \
+ * w2 < 9 comparison should not propagate \
+ * the range for r3 either. \
+ */ \
+ w3 = w2; \
+ if w2 < 9 goto l0_%=; \
+ exit; \
+l0_%=: if r3 < 9 goto l1_%=; \
+ /* r1 read is illegal at this point */ \
+ r0 -= r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_regalloc.c b/tools/testing/selftests/bpf/progs/verifier_regalloc.c
new file mode 100644
index 000000000000..ee5ddea87c91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_regalloc.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/regalloc.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("tracepoint")
+__description("regalloc basic")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_basic(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc negative")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=1")
+__naked void regalloc_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 24 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u8*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc src_reg mark")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_src_reg_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ r3 = 0; \
+ if r3 s>= r2 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc src_reg negative")
+__failure __msg("invalid access to map value, value_size=48 off=44 size=8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_src_reg_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 22 goto l0_%=; \
+ r3 = 0; \
+ if r3 s>= r2 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc and spill")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_and_spill(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ /* r0 has upper bound that should propagate into r2 */\
+ *(u64*)(r10 - 8) = r2; /* spill r2 */ \
+ r0 = 0; \
+ r2 = 0; /* clear r0 and r2 */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 */ \
+ if r0 s>= r3 goto l0_%=; \
+ /* r3 has lower and upper bounds */ \
+ r7 += r3; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc and spill negative")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_and_spill_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 48 goto l0_%=; \
+ /* r0 has upper bound that should propagate into r2 */\
+ *(u64*)(r10 - 8) = r2; /* spill r2 */ \
+ r0 = 0; \
+ r2 = 0; /* clear r0 and r2 */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 */\
+ if r0 s>= r3 goto l0_%=; \
+ /* r3 has lower and upper bounds */ \
+ r7 += r3; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc three regs")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_three_regs(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r4 = r2; \
+ if r0 s> 12 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r7 += r4; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc after call")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_after_call(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r8 = r0; \
+ r9 = r0; \
+ call regalloc_after_call__1; \
+ if r8 s> 20 goto l0_%=; \
+ if r9 s< 0 goto l0_%=; \
+ r7 += r8; \
+ r7 += r9; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void regalloc_after_call__1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc in callee")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_in_callee(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r2 = r0; \
+ r3 = r7; \
+ call regalloc_in_callee__1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void regalloc_in_callee__1(void)
+{
+ asm volatile (" \
+ if r1 s> 20 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r3 += r1; \
+ r3 += r2; \
+ r0 = *(u64*)(r3 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc, spill, JEQ")
+__success
+__naked void regalloc_spill_jeq(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ *(u64*)(r10 - 8) = r0; /* spill r0 */ \
+ if r0 == 0 goto l0_%=; \
+l0_%=: /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */\
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r2 == 20 goto l1_%=; \
+l1_%=: /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 with map_value */\
+ if r3 == 0 goto l2_%=; /* skip ldx if map_value == NULL */\
+ /* Buggy verifier will think that r3 == 20 here */\
+ r0 = *(u64*)(r3 + 0); /* read from map_value */\
+l2_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ringbuf.c b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
new file mode 100644
index 000000000000..ae1d521f326c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ringbuf.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 1")
+__failure __msg("R1 must have zero offset when passed to release func")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_1(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ /* add invalid offset to reserved ringbuf memory */\
+ r1 += 0xcafe; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 2")
+__failure __msg("R7 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_2(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* add invalid offset to reserved ringbuf memory */\
+ r7 += 0xcafe; \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("ringbuf: check passing rb mem to helpers")
+__success __retval(0)
+__naked void passing_rb_mem_to_helpers(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ r7 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* pass allocated ring buffer memory to fib lookup */\
+ r1 = r6; \
+ r2 = r0; \
+ r3 = 8; \
+ r4 = 0; \
+ call %[bpf_fib_lookup]; \
+ /* submit the ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_fib_lookup),
+ __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c b/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c
new file mode 100644
index 000000000000..27ebfc1fd9ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/runtime_jit.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+void dummy_prog_loop2_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps") = {
+ .values = {
+ [1] = (void *)&dummy_prog_loop2_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ [7] = (void *)&dummy_prog_42_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop2_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog2_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, prog once")
+__success __success_unpriv __retval(42)
+__naked void call_within_bounds_prog_once(void)
+{
+ asm volatile (" \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, prog loop")
+__success __success_unpriv __retval(41)
+__naked void call_within_bounds_prog_loop(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, no prog")
+__success __success_unpriv __retval(1)
+__naked void call_within_bounds_no_prog(void)
+{
+ asm volatile (" \
+ r3 = 3; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2")
+__success __success_unpriv __retval(24)
+__naked void call_within_bounds_key_2(void)
+{
+ asm volatile (" \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2 / key 2, first branch")
+__success __success_unpriv __retval(24)
+__naked void _2_key_2_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2 / key 2, second branch")
+__success __success_unpriv __retval(24)
+__naked void _2_key_2_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 0 / key 2, first branch")
+__success __success_unpriv __retval(24)
+__naked void _0_key_2_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 0 / key 2, second branch")
+__success __success_unpriv __retval(42)
+__naked void _0_key_2_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, different maps, first branch")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(1)
+__naked void bounds_different_maps_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 0; \
+ r2 = %[map_prog2_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, different maps, second branch")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(42)
+__naked void bounds_different_maps_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 0; \
+ r2 = %[map_prog2_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call out of bounds")
+__success __success_unpriv __retval(2)
+__naked void tail_call_out_of_bounds(void)
+{
+ asm volatile (" \
+ r3 = 256; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: pass negative index to tail_call")
+__success __success_unpriv __retval(2)
+__naked void negative_index_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = -1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: pass > 32bit index to tail_call")
+__success __success_unpriv __retval(42)
+/* Verifier rewrite for unpriv skips tail call here. */
+__retval_unpriv(2)
+__naked void _32bit_index_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = 0x100000000 ll; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
new file mode 100644
index 000000000000..c0ce690ddb68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Check that precision marks propagate through scalar IDs.
+ * Registers r{0,1,2} have the same scalar ID.
+ * Range information is propagated for scalars sharing same ID.
+ * Check that precision mark for r0 causes precision marks for r{1,2}
+ * when range information is propagated for 'if <reg> <op> <const>' insn.
+ */
+SEC("socket")
+__success __log_level(2)
+/* first 'if' branch */
+__msg("6: (0f) r3 += r0")
+__msg("frame0: regs=r0 stack= before 4: (25) if r1 > 0x7 goto pc+0")
+__msg("frame0: parent state regs=r0,r1,r2 stack=:")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+/* second 'if' branch */
+__msg("from 4 to 5: ")
+__msg("6: (0f) r3 += r0")
+__msg("frame0: regs=r0 stack= before 5: (bf) r3 = r10")
+__msg("frame0: regs=r0 stack= before 4: (25) if r1 > 0x7 goto pc+0")
+/* parent state already has r{0,1,2} as precise */
+__msg("frame0: parent state regs= stack=:")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void linked_regs_bpf_k(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ "if r1 > 7 goto +0;"
+ /* force r0 to be precise, this eventually marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Registers r{0,1,2} share same ID when 'if r1 > ...' insn is processed,
+ * check that verifier marks r{1,2} as precise while backtracking
+ * 'if r1 > ...' with r0 already marked.
+ */
+SEC("socket")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("frame0: regs=r0 stack= before 5: (2d) if r1 > r3 goto pc+0")
+__msg("frame0: parent state regs=r0,r1,r2,r3 stack=:")
+__msg("frame0: regs=r0,r1,r2,r3 stack= before 4: (b7) r3 = 7")
+__naked void linked_regs_bpf_x_src(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ "r3 = 7;"
+ "if r1 > r3 goto +0;"
+ /* force r0 to be precise, this eventually marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r4 = r10;"
+ "r4 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Registers r{0,1,2} share same ID when 'if r1 > r3' insn is processed,
+ * check that verifier marks r{0,1,2} as precise while backtracking
+ * 'if r1 > r3' with r3 already marked.
+ */
+SEC("socket")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("frame0: regs=r3 stack= before 5: (2d) if r1 > r3 goto pc+0")
+__msg("frame0: parent state regs=r0,r1,r2,r3 stack=:")
+__msg("frame0: regs=r0,r1,r2,r3 stack= before 4: (b7) r3 = 7")
+__naked void linked_regs_bpf_x_dst(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ "r3 = 7;"
+ "if r1 > r3 goto +0;"
+ /* force r0 to be precise, this eventually marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r4 = r10;"
+ "r4 += r3;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as linked_regs_bpf_k, but break one of the
+ * links, note that r1 is absent from regs=... in __msg below.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("7: (0f) r3 += r0")
+__msg("frame0: regs=r0 stack= before 6: (bf) r3 = r10")
+__msg("frame0: parent state regs=r0 stack=:")
+__msg("frame0: regs=r0 stack= before 5: (25) if r0 > 0x7 goto pc+0")
+__msg("frame0: parent state regs=r0,r2 stack=:")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void linked_regs_broken_link(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* break link for r1, this is the only line that differs
+ * compared to the previous test
+ */
+ "r1 = 0;"
+ "if r0 > 7 goto +0;"
+ /* force r0 to be precise,
+ * this eventually marks r2 as precise because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that precision marks propagate through scalar IDs.
+ * Use the same scalar ID in multiple stack frames, check that
+ * precision information is propagated up the call stack.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("12: (0f) r2 += r1")
+/* Current state */
+__msg("frame2: last_idx 12 first_idx 11 subseq_idx -1 ")
+__msg("frame2: regs=r1 stack= before 11: (bf) r2 = r10")
+__msg("frame2: parent state regs=r1 stack=")
+__msg("frame1: parent state regs= stack=")
+__msg("frame0: parent state regs= stack=")
+/* Parent state */
+__msg("frame2: last_idx 10 first_idx 10 subseq_idx 11 ")
+__msg("frame2: regs=r1 stack= before 10: (25) if r1 > 0x7 goto pc+0")
+__msg("frame2: parent state regs=r1 stack=")
+/* frame1.r{6,7} are marked because mark_precise_scalar_ids()
+ * looks for all registers with frame2.r1.id in the current state
+ */
+__msg("frame1: parent state regs=r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10")
+__msg("frame2: regs=r1 stack= before 8: (85) call pc+1")
+/* frame1.r1 is marked because of backtracking of call instruction */
+__msg("frame1: parent state regs=r1,r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8")
+__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1")
+__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1")
+__msg("frame1: parent state regs=r1 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+1")
+__msg("frame0: parent state regs=r1,r6 stack=")
+/* Parent state */
+__msg("frame0: last_idx 3 first_idx 1 subseq_idx 4")
+__msg("frame0: regs=r1,r6 stack= before 3: (bf) r6 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_many_frames(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r6.id */
+ "r1 = r0;"
+ "r6 = r0;"
+ "call precision_many_frames__foo;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "r6 = r1;"
+ "r7 = r1;"
+ "call precision_many_frames__bar;"
+ "exit"
+ ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__bar(void)
+{
+ asm volatile (
+ "if r1 > 7 goto +0;"
+ /* force r1 to be precise, this eventually marks:
+ * - bar frame r1
+ * - foo frame r{1,6,7}
+ * - main frame r{1,6}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Check that scalars with the same IDs are marked precise on stack as
+ * well as in registers.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (0f) r2 += r1")
+/* foo frame */
+__msg("frame1: regs=r1 stack= before 10: (bf) r2 = r10")
+__msg("frame1: regs=r1 stack= before 9: (25) if r1 > 0x7 goto pc+0")
+__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1")
+__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+2")
+/* main frame */
+__msg("frame0: regs=r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame0: regs=r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_stack(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == fp[-8].id */
+ "r1 = r0;"
+ "*(u64*)(r10 - 8) = r1;"
+ "call precision_stack__foo;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_stack__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "*(u64*)(r10 - 8) = r1;"
+ "*(u64*)(r10 - 16) = r1;"
+ "if r1 > 7 goto +0;"
+ /* force r1 to be precise, this eventually marks:
+ * - foo frame r1,fp{-8,-16}
+ * - main frame r1,fp{-8}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit"
+ ::: __clobber_all);
+}
+
+/* Use two separate scalar IDs to check that these are propagated
+ * independently.
+ */
+SEC("socket")
+__success __log_level(2)
+/* r{6,7} */
+__msg("12: (0f) r3 += r7")
+__msg("frame0: regs=r7 stack= before 11: (bf) r3 = r10")
+__msg("frame0: regs=r7 stack= before 9: (25) if r7 > 0x7 goto pc+0")
+/* ... skip some insns ... */
+__msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0")
+__msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0")
+/* r{8,9} */
+__msg("13: (0f) r3 += r9")
+__msg("frame0: regs=r9 stack= before 12: (0f) r3 += r7")
+/* ... skip some insns ... */
+__msg("frame0: regs=r9 stack= before 10: (25) if r9 > 0x7 goto pc+0")
+__msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0")
+__msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_two_ids(void)
+{
+ asm volatile (
+ /* r6 = random number up to 0xff
+ * r6.id == r7.id
+ */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "r7 = r0;"
+ /* same, but for r{8,9} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r8 = r0;"
+ "r9 = r0;"
+ /* clear r0 id */
+ "r0 = 0;"
+ /* propagate equal scalars precision */
+ "if r7 > 7 goto +0;"
+ "if r9 > 7 goto +0;"
+ "r3 = r10;"
+ /* force r7 to be precise, this also marks r6 */
+ "r3 += r7;"
+ /* force r9 to be precise, this also marks r8 */
+ "r3 += r9;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+/* check that r0 and r6 have different IDs after 'if',
+ * collect_linked_regs() can't tie more than 6 registers for a single insn.
+ */
+__msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1")
+__msg("9: (bf) r6 = r6 ; R6=scalar(id=2")
+/* check that r{0-5} are marked precise after 'if' */
+__msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0")
+__msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:")
+__naked void linked_regs_too_many_regs(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r{0-6} IDs */
+ "r1 = r0;"
+ "r2 = r0;"
+ "r3 = r0;"
+ "r4 = r0;"
+ "r5 = r0;"
+ "r6 = r0;"
+ /* propagate range for r{0-6} */
+ "if r0 > 7 goto +0;"
+ /* make r6 appear in the log */
+ "r6 = r6;"
+ /* force r0 to be precise,
+ * this would cause r{0-4} to be precise because of shared IDs
+ */
+ "r7 = r10;"
+ "r7 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("regs=r7 stack= before 5: (3d) if r8 >= r0")
+__msg("parent state regs=r0,r7,r8")
+__msg("regs=r0,r7,r8 stack= before 4: (25) if r0 > 0x1")
+__msg("div by zero")
+__naked void linked_regs_broken_link_2(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+ "r8 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > 1 goto +0;"
+ /* r7.id == r8.id,
+ * thus r7 precision implies r8 precision,
+ * which implies r0 precision because of the conditional below.
+ */
+ "if r8 >= r0 goto 1f;"
+ /* break id relation between r7 and r8 */
+ "r8 += r8;"
+ /* make r7 precise */
+ "if r7 == 0 goto 1f;"
+ "r0 /= 0;"
+"1:"
+ "r0 = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* Check that mark_chain_precision() for one of the conditional jump
+ * operands does not trigger equal scalars precision propagation.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("3: (25) if r1 > 0x100 goto pc+0")
+__msg("frame0: regs=r1 stack= before 2: (bf) r1 = r0")
+__naked void cjmp_no_linked_regs_trigger(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id */
+ "r1 = r0;"
+ /* the jump below would be predicted, thus r1 would be marked precise,
+ * this should not imply precision mark for r0
+ */
+ "if r1 > 256 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Verify that check_ids() is used by regsafe() for scalars.
+ *
+ * r9 = ... some pointer with range X ...
+ * r6 = ... unbound scalar ID=a ...
+ * r7 = ... unbound scalar ID=b ...
+ * if (r6 > r7) goto +1
+ * r7 = r6
+ * if (r7 > X) goto exit
+ * r9 += r6
+ * ... access memory using r9 ...
+ *
+ * The memory access is safe only if r7 is bounded,
+ * which is true for one branch and not true for another.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ "r7 = r6;"
+"l1_%=:"
+ /* if r7 > 4 ...; transfers range to r6 on one execution path
+ * but does not transfer on another
+ */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6], r6 is not always bounded */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Similar to check_ids_in_regsafe.
+ * The l0 could be reached in two states:
+ *
+ * (1) r6{.id=A}, r7{.id=A}, r8{.id=B}
+ * (2) r6{.id=B}, r7{.id=A}, r8{.id=B}
+ *
+ * Where (2) is not safe, as "r7 > 4" check won't propagate range for it.
+ * This example would be considered safe without changes to
+ * mark_chain_precision() to track scalar values with equal IDs.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe_2(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r8 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r8 = r0;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* scratch .id from r0 */
+ "r0 = 0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ /* tie r6 and r7 .id */
+ "r6 = r7;"
+"l0_%=:"
+ /* if r7 > 4 exit(0) */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6] */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+"l1_%=:"
+ /* tie r6 and r8 .id */
+ "r6 = r8;"
+ "goto l0_%=;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that scalar IDs *are not* generated on register to register
+ * assignments if source register is a constant.
+ *
+ * If such IDs *are* generated the 'l1' below would be reached in
+ * two states:
+ *
+ * (1) r1{.id=A}, r2{.id=A}
+ * (2) r1{.id=C}, r2{.id=C}
+ *
+ * Thus forcing 'if r1 == r2' verification twice.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1d) if r3 == r4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r1 = r1;"
+ "r3 = r1;"
+ "r4 = r1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r2 = 0;"
+ "r3 = r1;"
+ "r4 = r2;"
+"l1_%=:"
+ /* predictable jump, marks r3 and r4 precise */
+ "if r3 == r4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as no_scalar_id_for_const() but for 32-bit values */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1e) if w3 == w4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const32(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w1 = w1;"
+ "w3 = w1;"
+ "w4 = w1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w2 = 0;"
+ "w3 = w1;"
+ "w4 = w2;"
+"l1_%=:"
+ /* predictable jump, marks r1 and r2 precise */
+ "if w3 == w4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has no id on r1
+ * - new state has a unique id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (57) r1 &= 255")
+__msg("8: (bf) r2 = r10")
+__msg("from 6 to 8: safe")
+__msg("processed 12 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_cur(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l0_%=;"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has no id (cached state)
+ * - second: r1 has a unique id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has a unique id on r1
+ * - new state has no id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (05) goto pc+1")
+__msg("9: (bf) r2 = r10")
+__msg("9: safe")
+__msg("processed 13 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_old(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l1_%=;"
+ "goto l0_%=;"
+"l1_%=:"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has a unique id (cached state)
+ * - second: r1 has no id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that two different scalar IDs in a verified state can't be
+ * mapped to the same scalar ID in current state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* The exit instruction should be reachable from two states,
+ * use two matches and "processed .. insns" to ensure this.
+ */
+__msg("13: (95) exit")
+__msg("13: (95) exit")
+__msg("processed 18 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void two_old_ids_one_cur_id(void)
+{
+ asm volatile (
+ /* Give unique scalar IDs to r{6,7} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r7 = r0;"
+ "r0 = 0;"
+ /* Maybe make r{6,7} IDs identical */
+ "if r6 > r7 goto l0_%=;"
+ "goto l1_%=;"
+"l0_%=:"
+ "r6 = r7;"
+"l1_%=:"
+ /* Mark r{6,7} precise.
+ * Get here in two states:
+ * - first: r6{.id=A}, r7{.id=B} (cached state)
+ * - second: r6{.id=A}, r7{.id=A}
+ * Currently we don't want to consider such states equivalent.
+ * Thus "exit;" would be verified twice.
+ */
+ "r2 = r10;"
+ "r2 += r6;"
+ "r2 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+/* Note the flag, see verifier.c:opt_subreg_zext_lo32_rnd_hi32() */
+__flag(BPF_F_TEST_RND_HI32)
+__success
+/* This test was added because of a bug in verifier.c:sync_linked_regs(),
+ * upon range propagation it destroyed subreg_def marks for registers.
+ * The subreg_def mark is used to decide whether zero extension instructions
+ * are needed when register is read. When BPF_F_TEST_RND_HI32 is set it
+ * also causes generation of statements to randomize upper halves of
+ * read registers.
+ *
+ * The test is written in a way to return an upper half of a register
+ * that is affected by range propagation and must have it's subreg_def
+ * preserved. This gives a return value of 0 and leads to undefined
+ * return value if subreg_def mark is not preserved.
+ */
+__retval(0)
+/* Check that verifier believes r1/r0 are zero at exit */
+__log_level(2)
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
+__msg("6: (95) exit")
+__msg("from 3 to 4")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
+__msg("6: (95) exit")
+/* Verify that statements to randomize upper half of r1 had not been
+ * generated.
+ */
+__xlated("call unknown")
+__xlated("r0 &= 2147483647")
+__xlated("w1 = w0")
+/* This is how disasm.c prints BPF_ZEXT_REG at the moment, x86 and arm
+ * are the only CI archs that do not need zero extension for subregs.
+ */
+#if !defined(__TARGET_ARCH_x86) && !defined(__TARGET_ARCH_arm64)
+__xlated("w1 = w1")
+#endif
+__xlated("if w0 < 0xa goto pc+0")
+__xlated("r1 >>= 32")
+__xlated("r0 = r1")
+__xlated("exit")
+__naked void linked_regs_and_subreg_def(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* make sure r0 is in 32-bit range, otherwise w1 = w0 won't
+ * assign same IDs to registers.
+ */
+ "r0 &= 0x7fffffff;"
+ /* link w1 and w0 via ID */
+ "w1 = w0;"
+ /* 'if' statement propagates range info from w0 to w1,
+ * but should not affect w1->subreg_def property.
+ */
+ "if w0 < 10 goto +0;"
+ /* r1 is read here, on archs that require subreg zero
+ * extension this would cause zext patch generation.
+ */
+ "r1 >>= 32;"
+ "r0 = r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
new file mode 100644
index 000000000000..148d2299e5b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -0,0 +1,1224 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <limits.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 7")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_imm_7(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 8")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_imm_8(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 7")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_reg_7(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 8")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_reg_8(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv64_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv64_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = 2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv64_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = 2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv64_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod32_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = 2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod32_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = 2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-0)
+__naked void smod64_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 7")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_7(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 8")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_imm_8(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 7")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_7(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 8")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_reg_8(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, zero divisor")
+__success __success_unpriv __retval(0)
+__naked void sdiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = -1; \
+ w2 s/= w1; \
+ w0 = w2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, zero divisor")
+__success __success_unpriv __retval(0)
+__naked void sdiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 0; \
+ r2 = -1; \
+ r2 s/= r1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, zero divisor")
+__success __success_unpriv __retval(-1)
+__naked void smod32_zero_divisor(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = -1; \
+ w2 s%%= w1; \
+ w0 = w2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, zero divisor")
+__success __success_unpriv __retval(-1)
+__naked void smod64_zero_divisor(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 0; \
+ r2 = -1; \
+ r2 s%%= r1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow r/r, LLONG_MIN/-1")
+__success __retval(1)
+__arch_x86_64
+__xlated("0: r2 = 0x8000000000000000")
+__xlated("2: r3 = -1")
+__xlated("3: r4 = r2")
+__xlated("4: r11 = r3")
+__xlated("5: r11 += 1")
+__xlated("6: if r11 > 0x1 goto pc+4")
+__xlated("7: if r11 == 0x0 goto pc+1")
+__xlated("8: r2 = 0")
+__xlated("9: r2 = -r2")
+__xlated("10: goto pc+1")
+__xlated("11: r2 s/= r3")
+__xlated("12: r0 = 0")
+__xlated("13: if r2 != r4 goto pc+1")
+__xlated("14: r0 = 1")
+__xlated("15: exit")
+__naked void sdiv64_overflow_rr(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r3 = -1; \
+ r4 = r2; \
+ r2 s/= r3; \
+ r0 = 0; \
+ if r2 != r4 goto +1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, r/r, small_val/-1")
+__success __retval(-5)
+__arch_x86_64
+__xlated("0: r2 = 5")
+__xlated("1: r3 = -1")
+__xlated("2: r11 = r3")
+__xlated("3: r11 += 1")
+__xlated("4: if r11 > 0x1 goto pc+4")
+__xlated("5: if r11 == 0x0 goto pc+1")
+__xlated("6: r2 = 0")
+__xlated("7: r2 = -r2")
+__xlated("8: goto pc+1")
+__xlated("9: r2 s/= r3")
+__xlated("10: r0 = r2")
+__xlated("11: exit")
+__naked void sdiv64_rr_divisor_neg_1(void)
+{
+ asm volatile (" \
+ r2 = 5; \
+ r3 = -1; \
+ r2 s/= r3; \
+ r0 = r2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, overflow r/i, LLONG_MIN/-1")
+__success __retval(1)
+__arch_x86_64
+__xlated("0: r2 = 0x8000000000000000")
+__xlated("2: r4 = r2")
+__xlated("3: r2 = -r2")
+__xlated("4: r0 = 0")
+__xlated("5: if r2 != r4 goto pc+1")
+__xlated("6: r0 = 1")
+__xlated("7: exit")
+__naked void sdiv64_overflow_ri(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r4 = r2; \
+ r2 s/= -1; \
+ r0 = 0; \
+ if r2 != r4 goto +1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, r/i, small_val/-1")
+__success __retval(-5)
+__arch_x86_64
+__xlated("0: r2 = 5")
+__xlated("1: r4 = r2")
+__xlated("2: r2 = -r2")
+__xlated("3: r0 = r2")
+__xlated("4: exit")
+__naked void sdiv64_ri_divisor_neg_1(void)
+{
+ asm volatile (" \
+ r2 = 5; \
+ r4 = r2; \
+ r2 s/= -1; \
+ r0 = r2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow r/r, INT_MIN/-1")
+__success __retval(1)
+__arch_x86_64
+__xlated("0: w2 = -2147483648")
+__xlated("1: w3 = -1")
+__xlated("2: w4 = w2")
+__xlated("3: r11 = r3")
+__xlated("4: w11 += 1")
+__xlated("5: if w11 > 0x1 goto pc+4")
+__xlated("6: if w11 == 0x0 goto pc+1")
+__xlated("7: w2 = 0")
+__xlated("8: w2 = -w2")
+__xlated("9: goto pc+1")
+__xlated("10: w2 s/= w3")
+__xlated("11: r0 = 0")
+__xlated("12: if w2 != w4 goto pc+1")
+__xlated("13: r0 = 1")
+__xlated("14: exit")
+__naked void sdiv32_overflow_rr(void)
+{
+ asm volatile (" \
+ w2 = %[int_min]; \
+ w3 = -1; \
+ w4 = w2; \
+ w2 s/= w3; \
+ r0 = 0; \
+ if w2 != w4 goto +1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, r/r, small_val/-1")
+__success __retval(5)
+__arch_x86_64
+__xlated("0: w2 = -5")
+__xlated("1: w3 = -1")
+__xlated("2: w4 = w2")
+__xlated("3: r11 = r3")
+__xlated("4: w11 += 1")
+__xlated("5: if w11 > 0x1 goto pc+4")
+__xlated("6: if w11 == 0x0 goto pc+1")
+__xlated("7: w2 = 0")
+__xlated("8: w2 = -w2")
+__xlated("9: goto pc+1")
+__xlated("10: w2 s/= w3")
+__xlated("11: w0 = w2")
+__xlated("12: exit")
+__naked void sdiv32_rr_divisor_neg_1(void)
+{
+ asm volatile (" \
+ w2 = -5; \
+ w3 = -1; \
+ w4 = w2; \
+ w2 s/= w3; \
+ w0 = w2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, overflow r/i, INT_MIN/-1")
+__success __retval(1)
+__arch_x86_64
+__xlated("0: w2 = -2147483648")
+__xlated("1: w4 = w2")
+__xlated("2: w2 = -w2")
+__xlated("3: r0 = 0")
+__xlated("4: if w2 != w4 goto pc+1")
+__xlated("5: r0 = 1")
+__xlated("6: exit")
+__naked void sdiv32_overflow_ri(void)
+{
+ asm volatile (" \
+ w2 = %[int_min]; \
+ w4 = w2; \
+ w2 s/= -1; \
+ r0 = 0; \
+ if w2 != w4 goto +1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, r/i, small_val/-1")
+__success __retval(-5)
+__arch_x86_64
+__xlated("0: w2 = 5")
+__xlated("1: w4 = w2")
+__xlated("2: w2 = -w2")
+__xlated("3: w0 = w2")
+__xlated("4: exit")
+__naked void sdiv32_ri_divisor_neg_1(void)
+{
+ asm volatile (" \
+ w2 = 5; \
+ w4 = w2; \
+ w2 s/= -1; \
+ w0 = w2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow r/r, LLONG_MIN/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: r2 = 0x8000000000000000")
+__xlated("2: r3 = -1")
+__xlated("3: r4 = r2")
+__xlated("4: r11 = r3")
+__xlated("5: r11 += 1")
+__xlated("6: if r11 > 0x1 goto pc+3")
+__xlated("7: if r11 == 0x1 goto pc+3")
+__xlated("8: w2 = 0")
+__xlated("9: goto pc+1")
+__xlated("10: r2 s%= r3")
+__xlated("11: r0 = r2")
+__xlated("12: exit")
+__naked void smod64_overflow_rr(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r3 = -1; \
+ r4 = r2; \
+ r2 s%%= r3; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, r/r, small_val/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: r2 = 5")
+__xlated("1: r3 = -1")
+__xlated("2: r4 = r2")
+__xlated("3: r11 = r3")
+__xlated("4: r11 += 1")
+__xlated("5: if r11 > 0x1 goto pc+3")
+__xlated("6: if r11 == 0x1 goto pc+3")
+__xlated("7: w2 = 0")
+__xlated("8: goto pc+1")
+__xlated("9: r2 s%= r3")
+__xlated("10: r0 = r2")
+__xlated("11: exit")
+__naked void smod64_rr_divisor_neg_1(void)
+{
+ asm volatile (" \
+ r2 = 5; \
+ r3 = -1; \
+ r4 = r2; \
+ r2 s%%= r3; \
+ r0 = r2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, overflow r/i, LLONG_MIN/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: r2 = 0x8000000000000000")
+__xlated("2: r4 = r2")
+__xlated("3: w2 = 0")
+__xlated("4: r0 = r2")
+__xlated("5: exit")
+__naked void smod64_overflow_ri(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r4 = r2; \
+ r2 s%%= -1; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, r/i, small_val/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: r2 = 5")
+__xlated("1: r4 = r2")
+__xlated("2: w2 = 0")
+__xlated("3: r0 = r2")
+__xlated("4: exit")
+__naked void smod64_ri_divisor_neg_1(void)
+{
+ asm volatile (" \
+ r2 = 5; \
+ r4 = r2; \
+ r2 s%%= -1; \
+ r0 = r2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow r/r, INT_MIN/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: w2 = -2147483648")
+__xlated("1: w3 = -1")
+__xlated("2: w4 = w2")
+__xlated("3: r11 = r3")
+__xlated("4: w11 += 1")
+__xlated("5: if w11 > 0x1 goto pc+3")
+__xlated("6: if w11 == 0x1 goto pc+4")
+__xlated("7: w2 = 0")
+__xlated("8: goto pc+1")
+__xlated("9: w2 s%= w3")
+__xlated("10: goto pc+1")
+__xlated("11: w2 = w2")
+__xlated("12: r0 = r2")
+__xlated("13: exit")
+__naked void smod32_overflow_rr(void)
+{
+ asm volatile (" \
+ w2 = %[int_min]; \
+ w3 = -1; \
+ w4 = w2; \
+ w2 s%%= w3; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, r/r, small_val/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: w2 = -5")
+__xlated("1: w3 = -1")
+__xlated("2: w4 = w2")
+__xlated("3: r11 = r3")
+__xlated("4: w11 += 1")
+__xlated("5: if w11 > 0x1 goto pc+3")
+__xlated("6: if w11 == 0x1 goto pc+4")
+__xlated("7: w2 = 0")
+__xlated("8: goto pc+1")
+__xlated("9: w2 s%= w3")
+__xlated("10: goto pc+1")
+__xlated("11: w2 = w2")
+__xlated("12: r0 = r2")
+__xlated("13: exit")
+__naked void smod32_rr_divisor_neg_1(void)
+{
+ asm volatile (" \
+ w2 = -5; \
+ w3 = -1; \
+ w4 = w2; \
+ w2 s%%= w3; \
+ r0 = r2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, overflow r/i, INT_MIN/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: w2 = -2147483648")
+__xlated("1: w4 = w2")
+__xlated("2: w2 = 0")
+__xlated("3: r0 = r2")
+__xlated("4: exit")
+__naked void smod32_overflow_ri(void)
+{
+ asm volatile (" \
+ w2 = %[int_min]; \
+ w4 = w2; \
+ w2 s%%= -1; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, r/i, small_val/-1")
+__success __retval(0)
+__arch_x86_64
+__xlated("0: w2 = 5")
+__xlated("1: w4 = w2")
+__xlated("2: w2 = 0")
+__xlated("3: w0 = w2")
+__xlated("4: exit")
+__naked void smod32_ri_divisor_neg_1(void)
+{
+ asm volatile (" \
+ w2 = 5; \
+ w4 = w2; \
+ w2 s%%= -1; \
+ w0 = w2; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
new file mode 100644
index 000000000000..f40e57251e94
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/search_pruning.c */
+
+#include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("pointer/scalar confusion in state equality check (way 1)")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr as return value")
+__retval(POINTER_VALUE)
+__naked void state_equality_check_way_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l0_%=: r0 = r10; \
+l1_%=: goto l2_%=; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("pointer/scalar confusion in state equality check (way 2)")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr as return value")
+__retval(POINTER_VALUE)
+__naked void state_equality_check_way_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = r10; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r0 + 0); \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("liveness pruning and write screening")
+__failure __msg("R0 !read_ok")
+__naked void liveness_pruning_and_write_screening(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* branch conditions teach us nothing about R2 */\
+ if r2 >= 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r2 >= 0 goto l1_%=; \
+ r0 = 0; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("varlen_map_value_access pruning")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void varlen_map_value_access_pruning(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ w2 = %[max_entries]; \
+ if r2 s> r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ goto l2_%=; \
+l2_%=: r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("search pruning: all branches should be verified (nop operation)")
+__failure __msg("R6 invalid mem access 'scalar'")
+__naked void should_be_verified_nop_operation(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 == 0xbeef goto l1_%=; \
+ r4 = 0; \
+ goto l2_%=; \
+l1_%=: r4 = 1; \
+l2_%=: *(u64*)(r10 - 16) = r4; \
+ call %[bpf_ktime_get_ns]; \
+ r5 = *(u64*)(r10 - 16); \
+ if r5 == 0 goto l0_%=; \
+ r6 = 0; \
+ r1 = 0xdead; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("search pruning: all branches should be verified (invalid stack access)")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -16+0 size 8")
+__retval(0)
+__naked void be_verified_invalid_stack_access(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r0 + 0); \
+ r4 = 0; \
+ if r3 == 0xbeef goto l1_%=; \
+ *(u64*)(r10 - 16) = r4; \
+ goto l2_%=; \
+l1_%=: *(u64*)(r10 - 24) = r4; \
+l2_%=: call %[bpf_ktime_get_ns]; \
+ r5 = *(u64*)(r10 - 16); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("precision tracking for u32 spill/fill")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__naked void tracking_for_u32_spill_fill(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ w6 = 32; \
+ if r0 == 0 goto l0_%=; \
+ w6 = 4; \
+l0_%=: /* Additional insns to introduce a pruning point. */\
+ call %[bpf_get_prandom_u32]; \
+ r3 = 0; \
+ r3 = 0; \
+ if r0 == 0 goto l1_%=; \
+ r3 = 0; \
+l1_%=: /* u32 spill/fill */ \
+ *(u32*)(r10 - 8) = r6; \
+ r8 = *(u32*)(r10 - 8); \
+ /* out-of-bound map value access for r6=32 */ \
+ r1 = 0; \
+ *(u64*)(r10 - 16) = r1; \
+ r2 = r10; \
+ r2 += -16; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r0 += r8; \
+ r1 = *(u32*)(r0 + 0); \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("precision tracking for u32 spills, u64 fill")
+__failure __msg("div by zero")
+__naked void for_u32_spills_u64_fill(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ w7 = 0xffffffff; \
+ /* Additional insns to introduce a pruning point. */\
+ r3 = 1; \
+ r3 = 1; \
+ r3 = 1; \
+ r3 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 1; \
+l0_%=: w3 /= 0; \
+ /* u32 spills, u64 fill */ \
+ *(u32*)(r10 - 4) = r6; \
+ *(u32*)(r10 - 8) = r7; \
+ r8 = *(u64*)(r10 - 8); \
+ /* if r8 != X goto pc+1 r8 known in fallthrough branch */\
+ if r8 != 0xffffffff goto l1_%=; \
+ r3 = 1; \
+l1_%=: /* if r8 == X goto pc+1 condition always true on first\
+ * traversal, so starts backtracking to mark r8 as requiring\
+ * precision. r7 marked as needing precision. r6 not marked\
+ * since it's not tracked. \
+ */ \
+ if r8 == 0xffffffff goto l2_%=; \
+ /* fails if r8 correctly marked unknown after fill. */\
+ w3 /= 0; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("allocated_stack")
+__success __msg("processed 15 insns")
+__success_unpriv __msg_unpriv("") __log_level(1) __retval(0)
+__naked void allocated_stack(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r7 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r6; \
+ r6 = *(u64*)(r10 - 8); \
+ *(u8*)(r10 - 9) = r7; \
+ r7 = *(u8*)(r10 - 9); \
+l0_%=: if r0 != 0 goto l1_%=; \
+l1_%=: if r0 != 0 goto l2_%=; \
+l2_%=: if r0 != 0 goto l3_%=; \
+l3_%=: if r0 != 0 goto l4_%=; \
+l4_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* The test performs a conditional 64-bit write to a stack location
+ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8],
+ * then data is read from fp[-8]. This sequence is unsafe.
+ *
+ * The test would be mistakenly marked as safe w/o dst register parent
+ * preservation in verifier.c:copy_register_state() function.
+ *
+ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the
+ * checkpoint state after conditional 64-bit assignment.
+ */
+
+SEC("socket")
+__description("write tracking and register parent chain bug")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -8+1 size 8")
+__retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void and_register_parent_chain_bug(void)
+{
+ asm volatile (" \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* r0 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ /* if r0 > r6 goto +1 */ \
+ if r0 > r6 goto l0_%=; \
+ /* *(u64 *)(r10 - 8) = 0xdeadbeef */ \
+ r0 = 0xdeadbeef; \
+ *(u64*)(r10 - 8) = r0; \
+l0_%=: r1 = 42; \
+ *(u8*)(r10 - 8) = r1; \
+ r2 = *(u64*)(r10 - 8); \
+ /* exit(0) */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Without checkpoint forcibly inserted at the back-edge a loop this
+ * test would take a very long time to verify.
+ */
+SEC("kprobe")
+__failure __log_level(4)
+__msg("BPF program is too large.")
+__naked void short_loop1(void)
+{
+ asm volatile (
+ " r7 = *(u16 *)(r1 +0);"
+ "1: r7 += 0x1ab064b9;"
+ " .8byte %[jset];" /* same as 'if r7 & 0x702000 goto 1b;' */
+ " r7 &= 0x1ee60e;"
+ " r7 += r1;"
+ " if r7 s> 0x37d2 goto +0;"
+ " r0 = 0;"
+ " exit;"
+ :
+ : __imm_insn(jset, BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x702000, -2))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
new file mode 100644
index 000000000000..a2132c72d3b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -0,0 +1,1169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/sock.c */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_reuseport_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_xskmap SEC(".maps");
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, int);
+ __type(value, struct val);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} sk_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("cgroup/skb")
+__description("skb->sk: no NULL check")
+__failure __msg("invalid mem access 'sock_common_or_null'")
+__failure_unpriv
+__naked void skb_sk_no_null_check(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ r0 = *(u32*)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("skb->sk: sk->family [non fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_family_non_fullsock_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u32*)(r1 + %[bpf_sock_family]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_family, offsetof(struct bpf_sock, family))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("skb->sk: sk->type [fullsock field]")
+__failure __msg("invalid sock_common access")
+__failure_unpriv
+__naked void sk_sk_type_fullsock_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u32*)(r1 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_sk_fullsock(skb->sk): no !skb->sk check")
+__failure __msg("type=sock_common_or_null expected=sock_common")
+__failure_unpriv
+__naked void sk_no_skb_sk_check_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ call %[bpf_sk_fullsock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): no NULL check on ret")
+__failure __msg("invalid mem access 'sock_or_null'")
+__failure_unpriv
+__naked void no_null_check_on_ret_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->type [fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_type_fullsock_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->family [non fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_family_non_fullsock_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_family]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_family, offsetof(struct bpf_sock, family))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->state [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_state_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_state]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_state, offsetof(struct bpf_sock, state))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)")
+__success __success_unpriv __retval(0)
+__naked void port_word_load_backward_compatibility(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_dst_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [half load]")
+__success __success_unpriv __retval(0)
+__naked void sk_dst_port_half_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[bpf_sock_dst_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_half_load_invalid_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 2),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [byte load]")
+__success __success_unpriv __retval(0)
+__naked void sk_dst_port_byte_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r2 = *(u8*)(r0 + %[bpf_sock_dst_port]); \
+ r2 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 1),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_byte_load_invalid(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 2),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_half_load_invalid_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[bpf_sock_dst_port__end]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port__end, offsetofend(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]")
+__success __success_unpriv __retval(0)
+__naked void dst_ip6_load_2nd_byte(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->type [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_type_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->protocol [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_protocol_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_protocol]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_protocol, offsetof(struct bpf_sock, protocol))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): beyond last field")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void skb_sk_beyond_last_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_rx_queue_mapping__end]);\
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_rx_queue_mapping__end, offsetofend(struct bpf_sock, rx_queue_mapping))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): no !skb->sk check")
+__failure __msg("type=sock_common_or_null expected=sock_common")
+__failure_unpriv
+__naked void sk_no_skb_sk_check_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ call %[bpf_tcp_sock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): no NULL check on ret")
+__failure __msg("invalid mem access 'tcp_sock_or_null'")
+__failure_unpriv
+__naked void no_null_check_on_ret_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): tp->snd_cwnd")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_snd_cwnd_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): tp->bytes_acked")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_bytes_acked(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u64*)(r0 + %[bpf_tcp_sock_bytes_acked]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_bytes_acked, offsetof(struct bpf_tcp_sock, bytes_acked))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): beyond last field")
+__failure __msg("invalid tcp_sock access")
+__failure_unpriv
+__naked void skb_sk_beyond_last_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u64*)(r0 + %[bpf_tcp_sock_bytes_acked__end]);\
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_bytes_acked__end, offsetofend(struct bpf_tcp_sock, bytes_acked))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_snd_cwnd_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(skb->sk)")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_release_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(bpf_sk_fullsock(skb->sk))")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_fullsock_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(bpf_tcp_sock(skb->sk))")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_tcp_sock_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, NULL, 0): value == NULL")
+__success __retval(0)
+__naked void sk_null_0_value_null(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 0; \
+ r3 = 0; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, 1, 1): value == 1")
+__failure __msg("R3 type=scalar expected=fp")
+__naked void sk_1_1_value_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 1; \
+ r3 = 1; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, &stack_value, 1): stack_value")
+__success __retval(0)
+__naked void stack_value_1_stack_value(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 1; \
+ r3 = r10; \
+ r3 += -8; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_map_lookup_elem(smap, &key)")
+__failure __msg("cannot pass map_type 24 into func bpf_map_lookup_elem")
+__naked void map_lookup_elem_smap_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(sk_storage_map)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bpf_map_lookup_elem(xskmap, &key); xs->queue_id")
+__success __retval(0)
+__naked void xskmap_key_xs_queue_id(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_xskmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 = *(u32*)(r0 + %[bpf_xdp_sock_queue_id]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap),
+ __imm_const(bpf_xdp_sock_queue_id, offsetof(struct bpf_xdp_sock, queue_id))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockmap, &key)")
+__failure __msg("Unreleased reference id=2 alloc_insn=6")
+__naked void map_lookup_elem_sockmap_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockhash, &key)")
+__failure __msg("Unreleased reference id=2 alloc_insn=6")
+__naked void map_lookup_elem_sockhash_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockhash] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_sockhash)
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)")
+__success
+__naked void field_bpf_sk_release_sk_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = r0; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_sk_release),
+ __imm_addr(map_sockmap),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)")
+__success
+__naked void field_bpf_sk_release_sk_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockhash] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = r0; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_sk_release),
+ __imm_addr(map_sockhash),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)")
+__success
+__naked void ctx_reuseport_array_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_reuseport_array] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_reuseport_array)
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, sockmap, &key, flags)")
+__success
+__naked void reuseport_ctx_sockmap_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_sockmap] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, sockhash, &key, flags)")
+__success
+__naked void reuseport_ctx_sockhash_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_sockmap] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("mark null check on return value of bpf_skc_to helpers")
+__failure __msg("invalid mem access")
+__naked void of_bpf_skc_to_helpers(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r6 = r1; \
+ call %[bpf_skc_to_tcp_sock]; \
+ r7 = r0; \
+ r1 = r6; \
+ call %[bpf_skc_to_tcp_request_sock]; \
+ r8 = r0; \
+ if r8 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r7 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skc_to_tcp_request_sock),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("sk->src_ip6[0] [load 1st byte]")
+__failure __msg("invalid bpf_context access off=28 size=2")
+__naked void post_bind4_read_src_ip6(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = *(u16*)(r6 + %[bpf_sock_src_ip6_0]); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(bpf_sock_src_ip6_0, offsetof(struct bpf_sock, src_ip6[0]))
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("sk->mark [load mark]")
+__failure __msg("invalid bpf_context access off=16 size=2")
+__naked void post_bind4_read_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = *(u16*)(r6 + %[bpf_sock_mark]); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark))
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind6")
+__description("sk->src_ip4 [load src_ip4]")
+__failure __msg("invalid bpf_context access off=24 size=2")
+__naked void post_bind6_read_src_ip4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = *(u16*)(r6 + %[bpf_sock_src_ip4]); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(bpf_sock_src_ip4, offsetof(struct bpf_sock, src_ip4))
+ : __clobber_all);
+}
+
+SEC("cgroup/sock_create")
+__description("sk->src_port [word load]")
+__failure __msg("invalid bpf_context access off=44 size=2")
+__naked void sock_create_read_src_port(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = *(u16*)(r6 + %[bpf_sock_src_port]); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(bpf_sock_src_port, offsetof(struct bpf_sock, src_port))
+ : __clobber_all);
+}
+
+__noinline
+long skb_pull_data2(struct __sk_buff *sk, __u32 len)
+{
+ return bpf_skb_pull_data(sk, len);
+}
+
+__noinline
+long skb_pull_data1(struct __sk_buff *sk, __u32 len)
+{
+ return skb_pull_data2(sk, len);
+}
+
+/* global function calls bpf_skb_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ skb_pull_data1(sk, 0);
+ *p = 42; /* this is unsafe */
+ return TCX_PASS;
+}
+
+__noinline
+long xdp_pull_data2(struct xdp_md *x, __u32 len)
+{
+ return bpf_xdp_pull_data(x, len);
+}
+
+__noinline
+long xdp_pull_data1(struct xdp_md *x, __u32 len)
+{
+ return xdp_pull_data2(x, len);
+}
+
+/* global function calls bpf_xdp_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ xdp_pull_data1(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+/* XDP packet changing kfunc calls invalidate packet pointers */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ bpf_xdp_pull_data(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+__noinline
+int tail_call(struct __sk_buff *sk)
+{
+ bpf_tail_call_static(sk, &jmp_table, 0);
+ return 0;
+}
+
+static __noinline
+int static_tail_call(struct __sk_buff *sk)
+{
+ bpf_tail_call_static(sk, &jmp_table, 0);
+ return 0;
+}
+
+/* Tail calls in sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_global_tail_call(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ tail_call(sk);
+ *p = 42; /* this is unsafe */
+ return TCX_PASS;
+}
+
+/* Tail calls in static sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ static_tail_call(sk);
+ *p = 42; /* this is unsafe */
+ return TCX_PASS;
+}
+
+/* Direct tail calls do not invalidate packet pointers. */
+SEC("tc")
+__success
+int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ bpf_tail_call_static(sk, &jmp_table, 0);
+ *p = 42; /* this is NOT unsafe: tail calls don't return */
+ return TCX_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock_addr.c b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
new file mode 100644
index 000000000000..9c31448a0f52
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf_sockopt_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/recvmsg4")
+__success
+int recvmsg4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg6")
+__success
+int recvmsg6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg_unix")
+__success
+int recvmsg_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/getpeername4")
+__success
+int getpeername4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername6")
+__success
+int getpeername6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername_unix")
+__success
+int getpeername_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname4")
+__success
+int getsockname4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname6")
+__success
+int getsockname6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname_unix")
+__success
+int getsockname_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname_unix_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind4")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind6")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
new file mode 100644
index 000000000000..fe4b123187b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+#define __always_unused __attribute__((unused))
+
+char _license[] SEC("license") = "GPL";
+
+struct sock {
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__sockmap {
+ union {
+ struct sock *sk;
+ };
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockmap SEC(".maps");
+
+enum { CG_OK = 1 };
+
+int zero = 0;
+
+static __always_inline void test_sockmap_delete(void)
+{
+ bpf_map_delete_elem(&sockmap, &zero);
+ bpf_map_delete_elem(&sockhash, &zero);
+}
+
+static __always_inline void test_sockmap_update(void *sk)
+{
+ if (sk) {
+ bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY);
+ bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY);
+ }
+}
+
+static __always_inline void test_sockmap_lookup_and_update(void)
+{
+ struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero);
+
+ if (sk) {
+ test_sockmap_update(sk);
+ bpf_sk_release(sk);
+ }
+}
+
+static __always_inline void test_sockmap_mutate(void *sk)
+{
+ test_sockmap_delete();
+ test_sockmap_update(sk);
+}
+
+static __always_inline void test_sockmap_lookup_and_mutate(void)
+{
+ test_sockmap_delete();
+ test_sockmap_lookup_and_update();
+}
+
+SEC("action")
+__success
+int test_sched_act(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("classifier")
+__success
+int test_sched_cls(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("flow_dissector")
+__success
+int test_flow_dissector_delete(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("flow_dissector")
+__failure __msg("program of this type cannot use helper bpf_sk_release")
+int test_flow_dissector_update(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_lookup_and_update(); /* no access to skb->sk */
+ return 0;
+}
+
+SEC("iter/sockmap")
+__success
+int test_trace_iter(struct bpf_iter__sockmap *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_delete(const void *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_update(const void *ctx __always_unused)
+{
+ test_sockmap_lookup_and_update();
+ return 0;
+}
+
+SEC("sk_lookup")
+__success
+int test_sk_lookup(struct bpf_sk_lookup *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("sk_reuseport")
+__success
+int test_sk_reuseport(struct sk_reuseport_md *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("socket")
+__success
+int test_socket_filter(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("sockops")
+__success
+int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return CG_OK;
+}
+
+SEC("sockops")
+__failure __msg("cannot update sockmap in this context")
+int test_sockops_update(struct bpf_sock_ops *ctx)
+{
+ test_sockmap_update(ctx->sk);
+ return CG_OK;
+}
+
+SEC("sockops")
+__success
+int test_sockops_update_dedicated(struct bpf_sock_ops *ctx)
+{
+ bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY);
+ bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY);
+ return CG_OK;
+}
+
+SEC("xdp")
+__success
+int test_xdp(struct xdp_md *ctx __always_unused)
+{
+ test_sockmap_lookup_and_mutate();
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
new file mode 100644
index 000000000000..7a13dbd794b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/spill_fill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("check valid spill/fill")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(POINTER_VALUE)
+__naked void check_valid_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* fill it back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* should be able to access R0 = *(R2 + 8) */ \
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */\
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, skb mark")
+__success __success_unpriv __retval(0)
+__naked void valid_spill_fill_skb_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ *(u64*)(r10 - 8) = r6; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, ptr to mem")
+__success __success_unpriv __retval(0)
+__naked void spill_fill_ptr_to_mem(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check with invalid reg offset 0")
+__failure __msg("R0 pointer arithmetic on ringbuf_mem_or_null prohibited")
+__failure_unpriv
+__naked void with_invalid_reg_offset_0(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* add invalid offset to memory or NULL */ \
+ r0 += 1; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* should not be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u32*)(r6 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r6; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("attempt to corrupt spilled")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_corrupted_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* mess up with R1 pointer on stack */ \
+ r0 = 0x23; \
+ *(u8*)(r10 - 7) = r0; \
+ /* fill back into R0 is fine for priv. \
+ * R0 now becomes SCALAR_VALUE. \
+ */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Load from R0 should fail. */ \
+ r0 = *(u64*)(r0 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, LSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_lsb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0xcafe; \
+ *(u16*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, MSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_msb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0x12345678; \
+ *(u32*)(r10 - 4) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u32*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */ \
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("Spill a u32 const, refill from another half of the uninit u32 from the stack")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -4+0 size 4")
+__retval(0)
+__naked void uninit_u32_from_the_stack(void)
+{
+ asm volatile (" \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
+__success __retval(0)
+__naked void u16_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 8);"
+#else
+ "r4 = *(u16*)(r10 - 6);"
+#endif
+ " \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill u32 const scalars. Refill as u64. Offset to skb->data")
+__failure __msg("math between pkt pointer and register with unbounded min value is not allowed")
+__naked void u64_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w6 = 0; \
+ w7 = 20; \
+ *(u32*)(r10 - 4) = r6; \
+ *(u32*)(r10 - 8) = r7; \
+ r4 = *(u64*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \
+ r0 += r4; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void _6_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 6);"
+#else
+ "r4 = *(u16*)(r10 - 8);"
+#endif
+ " \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void addr_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ *(u32*)(r10 - 4) = r4; \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a umax=40 bounded scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ if r4 <= 40 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: /* *(u32 *)(r10 -8) = r4 R4=umax=40 */ \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = (*u32 *)(r10 - 8) */ \
+ r4 = *(u32*)(r10 - 8); \
+ /* r2 += r4 R2=pkt R4=umax=40 */ \
+ r2 += r4; \
+ /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */ \
+ r0 = r2; \
+ /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ \
+ r2 += 20; \
+ /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */\
+ if r2 > r3 goto l1_%=; \
+ /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */\
+ r0 = *(u32*)(r0 + 0); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 scalar at fp-4 and then at fp-8")
+__success __retval(0)
+__naked void and_then_at_fp_8(void)
+{
+ asm volatile (" \
+ w4 = 4321; \
+ *(u32*)(r10 - 4) = r4; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 64-bit reg should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void spill_32bit_of_64bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 32-bit spill r1 to stack - should clear the ID! */\
+ *(u32*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack. */ \
+ r2 = *(u32*)(r10 - 8); \
+ /* Compare r2 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffffffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 32-bit reg should clear ID")
+__failure __msg("dereference of modified ctx ptr R6 off=65535 disallowed")
+__naked void spill_16bit_of_32bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ w1 = 0xffff0000; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 16-bit spill r1 to stack - should clear the ID! */\
+ *(u16*)(r10 - 8) = r1; \
+ /* 16-bit fill r2 from stack. */ \
+ r2 = *(u16*)(r10 - 8); \
+ /* Compare r2 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 16; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__log_level(2)
+__success
+__msg("fp-8=0m??scalar()")
+__msg("fp-16=00mm??scalar()")
+__msg("fp-24=00mm???scalar()")
+__naked void spill_subregs_preserve_stack_zero(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+
+ /* 32-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */
+ "*(u8 *)(r10 -2) = r0;" /* MISC */
+ /* fp-3 and fp-4 stay INVALID */
+ "*(u32 *)(r10 -8) = r0;"
+
+ /* 16-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */
+ "*(u16 *)(r10 -12) = r0;" /* MISC */
+ /* fp-13 and fp-14 stay INVALID */
+ "*(u16 *)(r10 -16) = r0;"
+
+ /* 8-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */
+ "*(u16 *)(r10 -20) = r0;" /* MISC */
+ /* fp-21, fp-22, and fp-23 stay INVALID */
+ "*(u8 *)(r10 -24) = r0;"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)),
+ __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)),
+ __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0))
+ : __clobber_all);
+}
+
+char single_byte_buf[1] SEC(".data.single_byte_buf");
+
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0")
+/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
+ * precise immediately; if necessary, it will be marked precise later
+ */
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0")
+/* similarly, when R2 is assigned from spilled register, it is initially
+ * imprecise, but will be marked precise later once it is used in precise context
+ */
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0")
+__msg("11: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
+__naked void partial_stack_load_preserves_zeros(void)
+{
+ asm volatile (
+ /* fp-8 is value zero (represented by a zero value fake reg) */
+ ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
+
+ /* fp-16 is const zero register */
+ "r0 = 0;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* load single U8 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U8 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -9);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -10);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -12);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0))
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-4 is STACK_ZERO */
+__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????")
+__msg("5: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
+__naked void partial_stack_load_preserves_partial_zeros(void)
+{
+ asm volatile (
+ /* fp-4 is value zero */
+ ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */
+
+ /* load single U8 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0))
+ : __clobber_common);
+}
+
+char two_byte_buf[2] SEC(".data.two_byte_buf");
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is IMPRECISE fake register spill */
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1")
+/* and fp-16 is spilled IMPRECISE const reg */
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+/* now both fp-8 and fp-16 are precise, very good */
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__naked void stack_load_preserves_const_precision(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 register */
+ "r0 = 1;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U64 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U64 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1))
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is 32-bit FAKE subregister spill */
+__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
+__naked void stack_load_preserves_const_precision_subreg(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* SUB-register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 SUB-register */
+ "r0 = 1;"
+ "*(u32 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U32 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */
+ : __clobber_common);
+}
+
+SEC("xdp")
+__description("32-bit spilled reg range should be tracked")
+__success __retval(0)
+__naked void spill_32bit_range_track(void)
+{
+ asm volatile(" \
+ call %[bpf_ktime_get_ns]; \
+ /* Make r0 bounded. */ \
+ r0 &= 65535; \
+ /* Assign an ID to r0. */ \
+ r1 = r0; \
+ /* 32-bit spill r0 to stack. */ \
+ *(u32*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 < 1 goto l0_%=; \
+ /* 32-bit fill r1 from stack. */ \
+ r1 = *(u32*)(r10 - 8); \
+ /* r1 == r0 => r1 >= 1 always. */ \
+ if r1 >= 1 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. \
+ * Do an invalid memory access if the verifier \
+ * follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("64-bit spill of 64-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_64bit_of_64bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80000000; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 64-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u64*)(r10 - 8); \
+ /* Compare r1 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 32-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_32bit_of_32bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0x80000000; \
+ /* 32-bit spill r0 to stack - should assign an ID. */\
+ *(u32*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u32*)(r10 - 8); \
+ /* Compare r1 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 16-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_16bit_of_16bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8000; \
+ /* 16-bit spill r0 to stack - should assign an ID. */\
+ *(u16*)(r10 - 8) = r0; \
+ /* 16-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u16*)(r10 - 8); \
+ /* Compare r1 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("8-bit spill of 8-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_8bit_of_8bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80; \
+ /* 8-bit spill r0 to stack - should assign an ID. */\
+ *(u8*)(r10 - 8) = r0; \
+ /* 8-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u8*)(r10 - 8); \
+ /* Compare r1 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("spill unbounded reg, then range check src")
+__success __retval(0)
+__naked void spill_unbounded(void)
+{
+ asm volatile (" \
+ /* Produce an unbounded scalar. */ \
+ call %[bpf_get_prandom_u32]; \
+ /* Spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 > 16 goto l0_%=; \
+ /* Fill r0 from stack. */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 <= 16 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit(void)
+{
+ asm volatile(" \
+ /* Randomize the upper 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r0 from stack. */ \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u32*)(r10 - 8);"
+#else
+ "r0 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 == 0 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit_preserve_id(void)
+{
+ asm volatile (" \
+ /* Randomize the lower 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xffffffff; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(u32*)(r10 - 8);"
+#else
+ "r1 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r1 with another register to trigger sync_linked_regs. */\
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void fill_32bit_after_spill_64bit_clear_id(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* 64-bit spill r1 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack - should clear the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r2 = *(u32*)(r10 - 8);"
+#else
+ "r2 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r2 with another register to trigger sync_linked_regs.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on fill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* The verifier shouldn't propagate r2's range to r1, so it should\
+ * still remember r1 = 0xffffffff and reject the below.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if stack spill of an imprecise scalar in old state
+ * is considered equivalent to STACK_{MISC,INVALID} in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+/* STACK_INVALID should prevent verifier in unpriv mode from
+ * considering states equivalent and force an error on second
+ * verification path (entry - label 1 - label 2).
+ */
+__failure_unpriv
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("9: (95) exit")
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("invalid read from stack off -8+2 size 8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_imprecise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that stack spill of a precise scalar in old state
+ * is not considered equivalent to STACK_MISC in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should visit 'if r1 == 0x2a ...' two times:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("if r1 == 0x2a goto pc+0")
+__msg("if r1 == 0x2a goto pc+0")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_precise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ /* use r1 in precise context */
+ "if r1 == 42 goto +0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if STACK_MISC in old state is considered
+ * equivalent to stack spill of a scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r0 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_scalar(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r0 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that STACK_MISC in old state is not considered
+ * equivalent to stack spill of a non-scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should process exit instructions twice:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("9: (95) exit")
+__msg("from 2 to 7")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_ctx_ptr(void)
+{
+ asm volatile(
+ /* remember context pointer in r9 */
+ "r9 = r1;"
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure context pointer in fp-8 */
+ "*(u64*)(r10 - 8) = r9;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("stack_noperfmon: reject read of invalid slots")
+__success
+__caps_unpriv(CAP_BPF)
+__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8")
+__naked void stack_noperfmon_reject_invalid_read(void)
+{
+ asm volatile (" \
+ r2 = 1; \
+ r6 = r10; \
+ r6 += -8; \
+ *(u8 *)(r6 + 0) = r2; \
+ r2 = *(u64 *)(r6 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots")
+__success
+__caps_unpriv(CAP_BPF)
+__success_unpriv
+__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void)
+{
+ asm volatile(" \
+ r0 = 0; \
+ *(u64 *)(r10 - 8) = r0; \
+ *(u32 *)(r10 - 8) = r0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
new file mode 100644
index 000000000000..d9d7b05cf6d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/spin_lock.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct val);
+} map_spin_lock SEC(".maps");
+
+SEC("cgroup/skb")
+__description("spin_lock: test1 success")
+__success __failure_unpriv __msg_unpriv("")
+__retval(0)
+__naked void spin_lock_test1_success(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test2 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__naked void lock_test2_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r1 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test3 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void lock_test3_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 1); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test4 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void lock_test4_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u16*)(r6 + 3); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test5 call within a locked region")
+__failure __msg("calls are not allowed")
+__failure_unpriv __msg_unpriv("")
+__naked void call_within_a_locked_region(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test6 missing unlock")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test6_missing_unlock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ if r0 != 0 goto l1_%=; \
+ call %[bpf_spin_unlock]; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test7 unlock without lock")
+__failure __msg("without taking a lock")
+__failure_unpriv __msg_unpriv("")
+__naked void lock_test7_unlock_without_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ if r1 != 0 goto l1_%=; \
+ call %[bpf_spin_lock]; \
+l1_%=: r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test8 double lock")
+__failure __msg("calls are not allowed")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test8_double_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test9 different lock")
+__failure __msg("unlock of different lock")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test9_different_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test10 lock in subprog without unlock")
+__success
+__failure_unpriv __msg_unpriv("")
+__naked void lock_in_subprog_without_unlock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call lock_in_subprog_without_unlock__1; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lock_in_subprog_without_unlock__1(void)
+{
+ asm volatile (" \
+ call %[bpf_spin_lock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("spin_lock: test11 ld_abs under lock")
+__failure __msg("inside bpf_spin_lock")
+__naked void test11_ld_abs_under_lock(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r7 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r0 = *(u8*)skb[0]; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("spin_lock: regsafe compare reg->id for map value")
+__failure __msg("bpf_spin_unlock of different lock")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void reg_id_for_map_value(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r6 = *(u32*)(r6 + %[__sk_buff_mark]); \
+ r1 = %[map_spin_lock] ll; \
+ r9 = r1; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r2 = r10; \
+ r2 += -4; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r7 = r0; \
+ r1 = r9; \
+ r2 = r10; \
+ r2 += -4; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r8 = r0; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ if r6 == 0 goto l2_%=; \
+ goto l3_%=; \
+l2_%=: r7 = r8; \
+l3_%=: r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+/* Make sure that regsafe() compares ids for spin lock records using
+ * check_ids():
+ * 1: r9 = map_lookup_elem(...) ; r9.id == 1
+ * 2: r8 = map_lookup_elem(...) ; r8.id == 2
+ * 3: r7 = ktime_get_ns()
+ * 4: r6 = ktime_get_ns()
+ * 5: if r6 > r7 goto <9>
+ * 6: spin_lock(r8)
+ * 7: r9 = r8
+ * 8: goto <10>
+ * 9: spin_lock(r9)
+ * 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active,
+ * ; second visit to (10) should be considered safe
+ * ; if check_ids() is used.
+ * 11: exit(0)
+ */
+
+SEC("cgroup/skb")
+__description("spin_lock: regsafe() check_ids() similar id mappings")
+__success __msg("29: safe")
+__failure_unpriv __msg_unpriv("")
+__log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_similar_id_mappings(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ /* r9 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r9 = r0; \
+ /* r8 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l1_%=; \
+ r8 = r0; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* if r6 > r7 goto +5 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * spin_lock(r8) \
+ * r9 = r8 \
+ * goto unlock \
+ */ \
+ if r6 > r7 goto l2_%=; \
+ r1 = r8; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r9 = r8; \
+ goto l3_%=; \
+l2_%=: /* spin_lock(r9) */ \
+ r1 = r9; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+l3_%=: /* spin_unlock(r9) */ \
+ r1 = r9; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+l0_%=: /* exit(0) */ \
+ r0 = 0; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("spin_lock: loop within a locked region")
+__success __failure_unpriv __msg_unpriv("")
+__retval(0)
+int bpf_loop_inside_locked_region(void)
+{
+ const int zero = 0;
+ struct val *val;
+ int i, j = 0;
+
+ val = bpf_map_lookup_elem(&map_spin_lock, &zero);
+ if (!val)
+ return -1;
+
+ bpf_spin_lock(&val->l);
+ bpf_for(i, 0, 10) {
+ j++;
+ /* Silence "unused variable" warnings. */
+ if (j == 10)
+ break;
+ }
+ bpf_spin_unlock(&val->l);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
new file mode 100644
index 000000000000..24aabc6083fd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/stack_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+SEC("socket")
+__description("PTR_TO_STACK store/load")
+__success __success_unpriv __retval(0xfaceb00c)
+__naked void ptr_to_stack_store_load(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on off")
+__failure __msg("misaligned stack access off 0+-8+2 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_off(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on reg")
+__failure __msg("misaligned stack access off 0+-10+8 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds low")
+__failure __msg("invalid write to stack R1 off=-79992 size=8")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void load_out_of_bounds_low(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -80000; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds high")
+__failure __msg("invalid write to stack R1 off=0 size=8")
+__failure_unpriv
+__naked void load_out_of_bounds_high(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -1; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 2")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 3")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_high_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 4")
+__failure __msg("invalid write to stack R1 off=0 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 5")
+__failure __msg("invalid write to stack R1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_low_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 2")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_low_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 1) = r0; \
+ r0 = *(u8*)(r1 + 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 3")
+__failure __msg("invalid write to stack R1 off=-513 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 4")
+__failure __msg("math between fp pointer")
+__failure_unpriv
+__naked void to_stack_check_low_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[int_min]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 5")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 1")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 2")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = 0; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r5 = r10; \
+ r0 = *(u8*)(r5 - 6); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 3")
+__success __success_unpriv __retval(-3)
+__naked void stack_mixed_reg_k_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK reg")
+__success __success_unpriv __retval(42)
+__naked void ptr_to_stack_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack pointer arithmetic")
+__success __success_unpriv __retval(0)
+__naked void stack_pointer_arithmetic(void)
+{
+ asm volatile (" \
+ r1 = 4; \
+ goto l0_%=; \
+l0_%=: r7 = r10; \
+ r7 += -10; \
+ r7 += -10; \
+ r2 = r7; \
+ r2 += r1; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r2 = r7; \
+ r2 += 8; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("store PTR_TO_STACK in R10 to array map using BPF_B")
+__success __retval(42)
+__naked void array_map_using_bpf_b(void)
+{
+ asm volatile (" \
+ /* Load pointer to map. */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = 2; \
+ exit; \
+l0_%=: r1 = r0; \
+ /* Copy R10 to R9. */ \
+ r9 = r10; \
+ /* Pollute other registers with unaligned values. */\
+ r2 = -1; \
+ r3 = -1; \
+ r4 = -1; \
+ r5 = -1; \
+ r6 = -1; \
+ r7 = -1; \
+ r8 = -1; \
+ /* Store both R9 and R10 with BPF_B and read back. */\
+ *(u8*)(r1 + 0) = r10; \
+ r2 = *(u8*)(r1 + 0); \
+ *(u8*)(r1 + 0) = r9; \
+ r3 = *(u8*)(r1 + 0); \
+ /* Should read back as same value. */ \
+ if r2 == r3 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l1_%=: r0 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size > 512")
+__failure __msg("invalid write to stack R1 off=-520 size=8")
+__naked void stack_check_size_gt_512(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -520; \
+ r0 = 42; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#ifdef __BPF_FEATURE_MAY_GOTO
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto with jit")
+__load_if_JITed()
+__success __retval(42)
+__naked void stack_check_size_512_with_may_goto_jit(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto without jit")
+__load_if_no_JITed()
+__failure __msg("stack size 520(extra 8) is too large")
+__naked void stack_check_size_512_with_may_goto(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
new file mode 100644
index 000000000000..72f1eb006074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("store-release, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_8(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x12;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
+ "w2 = *(u8 *)(r10 - 1);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_16(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x1234;"
+ ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
+ "w2 = *(u16 *)(r10 - 2);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_32(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x12345678;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
+ "w2 = *(u32 *)(r10 - 4);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r1 = 0x1234567890abcdef ll;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized dst_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with non-pointer dst_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void store_release_with_non_pointer_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned store-release")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void store_release_misaligned(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release to ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void store_release_to_ctx_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0);
+ ".8byte %[store_release_insn];"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("store-release to pkt pointer")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__naked void store_release_to_pkt_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("store-release to flow_keys pointer")
+__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed")
+__naked void store_release_to_flow_keys_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("store-release to sock pointer")
+__failure __msg("R2 cannot write into sock")
+__naked void store_release_to_sock_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, leak pointer to stack")
+__success __success_unpriv __retval(0)
+__naked void store_release_leak_pointer_to_stack(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("store-release, leak pointer to map")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("R6 leaks addr into map")
+__naked void store_release_leak_pointer_to_map(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r1 = %[map_hash_8b] ll;"
+ "r2 = 0;"
+ "*(u64 *)(r10 - 8) = r2;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6);
+"l0_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(map_hash_8b),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void store_release_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
new file mode 100644
index 000000000000..61886ed554de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
+
+int vals[] SEC(".data.vals") = {1, 2, 3, 4};
+
+__naked __noinline __used
+static unsigned long identity_subprog()
+{
+ /* the simplest *static* 64-bit identity function */
+ asm volatile (
+ "r0 = r1;"
+ "exit;"
+ );
+}
+
+__noinline __used
+unsigned long global_identity_subprog(__u64 x)
+{
+ /* the simplest *global* 64-bit identity function */
+ return x;
+}
+
+__naked __noinline __used
+static unsigned long callback_subprog()
+{
+ /* the simplest callback function */
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* now use subprog's returned value (which is a
+ * r6 -> r1 -> r0 chain), as index into vals array, forcing
+ * all of that to be known precisely
+ */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+__naked __noinline __used
+static unsigned long fp_leaking_subprog()
+{
+ asm volatile (
+ ".8byte %[r0_eq_r10_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r10_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_10, 8))
+ );
+}
+
+__naked __noinline __used
+static unsigned long sneaky_fp_leaking_subprog()
+{
+ asm volatile (
+ "r1 = r10;"
+ ".8byte %[r0_eq_r1_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r1_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_1, 8))
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
+__msg("7: R0=scalar")
+__naked int fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
+/* here r1 is marked precise, even though it's fp register, but that's fine
+ * because by the time we get out of subprogram it has to be derived from r10
+ * anyways, at which point we'll break precision chain
+ */
+__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
+__msg("7: R0=scalar")
+__naked int sneaky_fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call sneaky_fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7")
+__naked int global_subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * given global_identity_subprog is global, precision won't
+ * propagate all the way back to r6
+ */
+ "r1 = r6;"
+ "call global_identity_subprog;"
+ /* now use subprog's returned value (which is unknown now, so
+ * we need to clamp it), as index into vals array, forcing r0
+ * to be marked precise (with no effect on r6, though)
+ */
+ "if r0 < %[vals_arr_sz] goto 1f;"
+ "r0 = %[vals_arr_sz] - 1;"
+ "1:"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0 is forced to be precise and has to be
+ * propagated back to the global subprog call, but it
+ * shouldn't go all the way to mark r6 as precise
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_const(vals_arr_sz, ARRAY_SIZE(vals))
+ : __clobber_common, "r6"
+ );
+}
+
+__naked __noinline __used
+static unsigned long loop_callback_bad()
+{
+ /* bpf_loop() callback that can return values outside of [0, 1] range */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* bpf_loop() expects [0, 1] values, so branch above skipping
+ * r0 = 0; should lead to a failure, but if exit instruction
+ * doesn't enforce r0's precision, this callback will be
+ * successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0")
+/* check that we have branch code path doing its own validation */
+__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001")
+/* check that branch code path marks r0 as precise, before failing */
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7")
+__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]")
+__naked int callback_precise_return_fail(void)
+{
+ asm volatile (
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[loop_callback_bad];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(loop_callback_bad),
+ __imm(bpf_loop)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
+__msg("mark_precise: frame0: regs=r0,r6 stack= before 10: (bf) r6 = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
+__naked int callback_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and use result; r0 shouldn't propagate back to
+ * callback_subprog
+ */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 = r0;"
+ "if r6 > 3 goto 1f;"
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the bpf_loop() call, but not beyond
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "1:"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_global(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body */
+__msg("12: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs=r6 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
+__naked int parent_callee_saved_reg_precise_with_callback(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) callback call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 13 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_global(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body */
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
+__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
+__naked int parent_stack_slot_precise_with_callback(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* ensure we have callback frame in jump history */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+__noinline __used
+static __u64 subprog_with_precise_arg(__u64 x)
+{
+ return vals[x]; /* x is forced to be precise */
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (0f) r2 += r1")
+__msg("mark_precise: frame1: last_idx 8 first_idx 0")
+__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ")
+__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2")
+__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2")
+__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3")
+__naked int subprog_arg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ "r1 = r6;"
+ /* subprog_with_precise_arg expects its argument to be
+ * precise, so r1->r6 will be marked precise from inside the
+ * subprog
+ */
+ "call subprog_with_precise_arg;"
+ "r0 += r6;"
+ "exit;"
+ :
+ :
+ : __clobber_common, "r6"
+ );
+}
+
+/* r1 is pointer to stack slot;
+ * r2 is a register to spill into that slot
+ * subprog also spills r2 into its own stack slot
+ */
+__naked __noinline __used
+static __u64 subprog_spill_reg_precise(void)
+{
+ asm volatile (
+ /* spill to parent stack */
+ "*(u64 *)(r1 + 0) = r2;"
+ /* spill to subprog stack (we use -16 offset to avoid
+ * accidental confusion with parent's -8 stack slot in
+ * verifier log output)
+ */
+ "*(u64 *)(r10 - 16) = r2;"
+ /* use both spills as return result to propagete precision everywhere */
+ "r0 = *(u64 *)(r10 - 16);"
+ "r2 = *(u64 *)(r1 + 0);"
+ "r0 += r2;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("10: (0f) r1 += r7")
+__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
+__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
+__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
+__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
+__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
+__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
+__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
+__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int subprog_spill_into_parent_stack_slot_precise(void)
+{
+ asm volatile (
+ "r6 = 1;"
+
+ /* pass pointer to stack slot and r6 to subprog;
+ * r6 will be marked precise and spilled into fp-8 slot, which
+ * also should be marked precise
+ */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r6;"
+ "call subprog_spill_reg_precise;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r7 = *(u64 *)(r10 - 8);"
+
+ "r7 *= 4;"
+ "r1 = %[vals];"
+ /* here r7 is forced to be precise and has to be propagated
+ * back to the beginning, handling subprog call and logic
+ */
+ "r1 += r7;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6", "r7"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("17: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int stack_slot_aliases_precision(void)
+{
+ asm volatile (
+ "r6 = 1;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* let's setup two registers that are aliased to r10 */
+ "r7 = r10;"
+ "r7 += -8;" /* r7 = r10 - 8 */
+ "r8 = r10;"
+ "r8 += -32;" /* r8 = r10 - 32 */
+ /* now spill subprog's return value (a r6 -> r1 -> r0 chain)
+ * a few times through different stack pointer regs, making
+ * sure to use r10, r7, and r8 both in LDX and STX insns, and
+ * *importantly* also using a combination of const var_off and
+ * insn->off to validate that we record final stack slot
+ * correctly, instead of relying on just insn->off derivation,
+ * which is only valid for r10-based stack offset
+ */
+ "*(u64 *)(r10 - 16) = r0;"
+ "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */
+ "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */
+ "r0 = *(u64 *)(r8 + 16);"
+ "*(u64 *)(r7 - 8) = r0;"
+ "r0 = *(u64 *)(r10 - 16);"
+ /* get ready to use r0 as an index into array to force precision */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call and all the stack spills and loads
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+__naked __noinline __used
+static unsigned long identity_tail_call(void)
+{
+ /* the simplest identity function involving a tail call */
+ asm volatile (
+ "r6 = r2;"
+ "r2 = %[map_array] ll;"
+ "r3 = 0;"
+ "call %[bpf_tail_call];"
+ "r0 = r6;"
+ "exit;"
+ :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("13: (85) call bpf_tail_call#12")
+__msg("mark_precise: frame1: last_idx 13 first_idx 0 subseq_idx -1 ")
+__msg("returning from callee:")
+__msg("frame1: R0=scalar() R6=3 R10=fp0")
+__msg("to caller at 4:")
+__msg("R0=scalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: parent state regs=r0 stack=: R0=Pscalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("math between map_value pointer and register with unbounded min value is not allowed")
+__naked int subprog_result_tail_call(void)
+{
+ asm volatile (
+ "r2 = 3;"
+ "call identity_tail_call;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c
new file mode 100644
index 000000000000..8613ea160dcd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/subreg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* This file contains sub-register zero extension checks for insns defining
+ * sub-registers, meaning:
+ * - All insns under BPF_ALU class. Their BPF_ALU32 variants or narrow width
+ * forms (BPF_END) could define sub-registers.
+ * - Narrow direct loads, BPF_B/H/W | BPF_LDX.
+ * - BPF_LD is not exposed to JIT back-ends, so no need for testing.
+ *
+ * "get_prandom_u32" is used to initialize low 32-bit of some registers to
+ * prevent potential optimizations done by verifier or JIT back-ends which could
+ * optimize register back into constant when range info shows one register is a
+ * constant.
+ */
+
+SEC("socket")
+__description("add32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void add32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000000 ll; \
+ w0 += w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("add32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void add32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ /* An insn could have no effect on the low 32-bit, for example:\
+ * a = a + 0 \
+ * a = a | 0 \
+ * a = a & -1 \
+ * But, they should still zero high 32-bit. \
+ */ \
+ w0 += 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 += -2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sub32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void sub32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x1ffffffff ll; \
+ w0 -= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sub32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void sub32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 -= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 -= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mul32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mul32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000001 ll; \
+ w0 *= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mul32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mul32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 *= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 *= -1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("div32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void div32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = -1; \
+ w0 /= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("div32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void div32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 /= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 /= 2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("or32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void or32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000001 ll; \
+ w0 |= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("or32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void or32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 |= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 |= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("and32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void and32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r1 |= r0; \
+ r0 = 0x1ffffffff ll; \
+ w0 &= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("and32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void and32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 &= -1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 &= -2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("lsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void lsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 <<= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("lsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void lsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 <<= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 <<= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("rsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void rsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 >>= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("rsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void rsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 >>= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 >>= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("neg32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void neg32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = -w0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mod32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mod32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = -1; \
+ w0 %%= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mod32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mod32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 %%= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 %%= 2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("xor32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void xor32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000000 ll; \
+ w0 ^= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("xor32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void xor32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 ^= 1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mov32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mov32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r1 |= r0; \
+ r0 = 0x100000000 ll; \
+ w0 = w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mov32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mov32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void arsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 s>>= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void arsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 s>>= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 s>>= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end16 (to_le) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void le_reg_zero_extend_check_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = le16 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end32 (to_le) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void le_reg_zero_extend_check_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = le32 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end16 (to_be) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void be_reg_zero_extend_check_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = be16 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end32 (to_be) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void be_reg_zero_extend_check_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = be32 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_b zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_b_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u8*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_h zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_h_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u16*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_w zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_w_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u32*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
new file mode 100644
index 000000000000..b4acce60fb9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__description("invalid map type for tail call")
+__failure __msg("expected prog array map for tail call")
+__failure_unpriv
+__naked void invalid_map_for_tail_call(void)
+{
+ asm volatile (" \
+ r2 = %[map_array] ll; \
+ r3 = 0; \
+ call %[bpf_tail_call]; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
new file mode 100644
index 000000000000..8d60c634a114
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+int main(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, void (void));
+} jmp_table SEC(".maps") = {
+ .values = {
+ [0] = (void *) &main,
+ },
+};
+
+__noinline __auxiliary
+static __naked int sub(void)
+{
+ asm volatile (
+ "r2 = %[jmp_table] ll;"
+ "r3 = 0;"
+ "call 12;"
+ "exit;"
+ :
+ : __imm_addr(jmp_table)
+ : __clobber_all);
+}
+
+__success
+__arch_x86_64
+/* program entry for main(), regular function prologue */
+__jited(" endbr64")
+__jited(" nopl (%rax,%rax)")
+__jited(" xorq %rax, %rax")
+__jited(" pushq %rbp")
+__jited(" movq %rsp, %rbp")
+/* tail call prologue for program:
+ * - establish memory location for tail call counter at &rbp[-8];
+ * - spill tail_call_cnt_ptr at &rbp[-16];
+ * - expect tail call counter to be passed in rax;
+ * - for entry program rax is a raw counter, value < 33;
+ * - for tail called program rax is tail_call_cnt_ptr (value > 33).
+ */
+__jited(" endbr64")
+__jited(" cmpq $0x21, %rax")
+__jited(" ja L0")
+__jited(" pushq %rax")
+__jited(" movq %rsp, %rax")
+__jited(" jmp L1")
+__jited("L0: pushq %rax") /* rbp[-8] = rax */
+__jited("L1: pushq %rax") /* rbp[-16] = rax */
+/* on subprogram call restore rax to be tail_call_cnt_ptr from rbp[-16]
+ * (cause original rax might be clobbered by this point)
+ */
+__jited(" movq -0x10(%rbp), %rax")
+__jited(" callq 0x{{.*}}") /* call to sub() */
+__jited(" xorl %eax, %eax")
+__jited(" leave")
+__jited(" {{(retq|jmp 0x)}}") /* return or jump to rethunk */
+__jited("...")
+/* subprogram entry for sub(), regular function prologue */
+__jited(" endbr64")
+__jited(" nopl (%rax,%rax)")
+__jited(" nopl (%rax)")
+__jited(" pushq %rbp")
+__jited(" movq %rsp, %rbp")
+/* tail call prologue for subprogram address of tail call counter
+ * stored at rbp[-16].
+ */
+__jited(" endbr64")
+__jited(" pushq %rax") /* rbp[-8] = rax */
+__jited(" pushq %rax") /* rbp[-16] = rax */
+__jited(" movabsq ${{.*}}, %rsi") /* r2 = &jmp_table */
+__jited(" xorl %edx, %edx") /* r3 = 0 */
+/* bpf_tail_call implementation:
+ * - load tail_call_cnt_ptr from rbp[-16];
+ * - if *tail_call_cnt_ptr < 33, increment it and jump to target;
+ * - otherwise do nothing.
+ */
+__jited(" movq -0x10(%rbp), %rax")
+__jited(" cmpq $0x21, (%rax)")
+__jited(" jae L0")
+__jited(" nopl (%rax,%rax)")
+__jited(" addq $0x1, (%rax)") /* *tail_call_cnt_ptr += 1 */
+__jited(" popq %rax")
+__jited(" popq %rax")
+__jited(" jmp {{.*}}") /* jump to tail call tgt */
+__jited("L0: leave")
+__jited(" {{(retq|jmp 0x)}}") /* return or jump to rethunk */
+SEC("tc")
+__naked int main(void)
+{
+ asm volatile (
+ "call %[sub];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(sub)
+ : __clobber_all);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_typedef.c b/tools/testing/selftests/bpf/progs/verifier_typedef.c
new file mode 100644
index 000000000000..08481cfaac4b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_typedef.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test_sinfo")
+__description("typedef: resolve")
+__success __retval(0)
+__naked void resolve_typedef(void)
+{
+ asm volatile (" \
+ r1 = *(u64 *)(r1 +0); \
+ r2 = *(u64 *)(r1 +%[frags_offs]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(frags_offs,
+ offsetof(struct skb_shared_info, frags))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_uninit.c b/tools/testing/selftests/bpf/progs/verifier_uninit.c
new file mode 100644
index 000000000000..7718cd7d19ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_uninit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/uninit.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("read uninitialized register")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void read_uninitialized_register(void)
+{
+ asm volatile (" \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("read invalid register")
+__failure __msg("R15 is invalid")
+__failure_unpriv
+__naked void read_invalid_register(void)
+{
+ asm volatile (" \
+ .8byte %[mov64_reg]; \
+ exit; \
+" :
+ : __imm_insn(mov64_reg, BPF_MOV64_REG(BPF_REG_0, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit")
+__failure __msg("R0 !read_ok")
+__failure_unpriv
+__naked void t_init_r0_before_exit(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit in all branches")
+__failure __msg("R0 !read_ok")
+__msg_unpriv("R1 pointer comparison")
+__naked void before_exit_in_all_branches(void)
+{
+ asm volatile (" \
+ if r1 >= 0 goto l0_%=; \
+ r0 = 1; \
+ r0 += 2; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
new file mode 100644
index 000000000000..28b4f7035ceb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/unpriv.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#define BPF_SK_LOOKUP(func) \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ "r2 = 0;" \
+ "*(u32*)(r10 - 8) = r2;" \
+ "*(u64*)(r10 - 16) = r2;" \
+ "*(u64*)(r10 - 24) = r2;" \
+ "*(u64*)(r10 - 32) = r2;" \
+ "*(u64*)(r10 - 40) = r2;" \
+ "*(u64*)(r10 - 48) = r2;" \
+ /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ "r2 = r10;" \
+ "r2 += -48;" \
+ "r3 = %[sizeof_bpf_sock_tuple];"\
+ "r4 = 0;" \
+ "r5 = 0;" \
+ "call %[" #func "];"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: return pointer")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(POINTER_VALUE)
+__naked void unpriv_return_pointer(void)
+{
+ asm volatile (" \
+ r0 = r10; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: add const to pointer")
+__success __success_unpriv __retval(0)
+__naked void unpriv_add_const_to_pointer(void)
+{
+ asm volatile (" \
+ r1 += 8; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: add pointer to pointer")
+__failure __msg("R1 pointer += pointer")
+__failure_unpriv
+__naked void unpriv_add_pointer_to_pointer(void)
+{
+ asm volatile (" \
+ r1 += r10; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: neg pointer")
+__success __failure_unpriv __msg_unpriv("R1 pointer arithmetic")
+__retval(0)
+__naked void unpriv_neg_pointer(void)
+{
+ asm volatile (" \
+ r1 = -r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_pointer_with_const(void)
+{
+ asm volatile (" \
+ if r1 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp pointer with pointer")
+__success __failure_unpriv __msg_unpriv("R10 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_pointer_with_pointer(void)
+{
+ asm volatile (" \
+ if r1 == r10 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("unpriv: check that printk is disallowed")
+__success
+__naked void check_that_printk_is_disallowed(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 8; \
+ r3 = r1; \
+ call %[bpf_trace_printk]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: pass pointer to helper function")
+__success __failure_unpriv __msg_unpriv("R4 leaks addr")
+__retval(0)
+__naked void pass_pointer_to_helper_function(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = r2; \
+ call %[bpf_map_update_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: indirectly pass pointer on stack to helper function")
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack R2 off -8+0 size 8")
+__retval(0)
+__naked void on_stack_to_helper_function(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: mangle pointer on stack 1")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(0)
+__naked void mangle_pointer_on_stack_1(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = 0; \
+ *(u32*)(r10 - 8) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: mangle pointer on stack 2")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(0)
+__naked void mangle_pointer_on_stack_2(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = 0; \
+ *(u8*)(r10 - 1) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: read pointer from stack in small chunks")
+__failure __msg("invalid size")
+__failure_unpriv
+__naked void from_stack_in_small_chunks(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = *(u32*)(r10 - 8); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write pointer into ctx")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv __msg_unpriv("R1 leaks addr")
+__naked void unpriv_write_pointer_into_ctx(void)
+{
+ asm volatile (" \
+ *(u64*)(r1 + 0) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: spill/fill of ctx")
+__success __success_unpriv __retval(0)
+__naked void unpriv_spill_fill_of_ctx(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r1 = *(u64*)(r6 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 2")
+__success __retval(0)
+__naked void spill_fill_of_ctx_2(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 3")
+__failure __msg("R1 type=fp expected=ctx")
+__naked void spill_fill_of_ctx_3(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 0) = r10; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 4")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void spill_fill_of_ctx_4(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r0 = 1; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_stx(void)
+{
+ asm volatile (" \
+ r3 = 42; \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += -16; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+/* Same as above, but use BPF_ST_MEM to save 42
+ * instead of BPF_STX_MEM.
+ */
+SEC("tc")
+__description("unpriv: spill/fill of different pointers st")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_st(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += -16; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ .8byte %[st_mem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_insn(st_mem,
+ BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - ctx and sock")
+__failure __msg("type=ctx expected=sock")
+__naked void pointers_stx_ctx_and_sock(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb == NULL) *target = sock; */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: /* else *target = skb; */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: /* struct __sk_buff *skb = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* skb->mark = 42; */ \
+ r3 = 42; \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ /* if (sk) bpf_sk_release(sk) */ \
+ if r1 == 0 goto l2_%=; \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - leak sock")
+__failure
+//.errstr = "same insn cannot be used with different pointers",
+__msg("Unreleased reference")
+__naked void different_pointers_stx_leak_sock(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb == NULL) *target = sock; */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: /* else *target = skb; */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: /* struct __sk_buff *skb = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* skb->mark = 42; */ \
+ r3 = 42; \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - sock and ctx (read)")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void stx_sock_and_ctx_read(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb) *target = skb */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: /* else *target = sock */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l1_%=: /* struct bpf_sock *sk = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */\
+ if r1 == 0 goto l2_%=; \
+ r3 = *(u32*)(r1 + %[bpf_sock_mark]); \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - sock and ctx (write)")
+__failure
+//.errstr = "same insn cannot be used with different pointers",
+__msg("cannot write into sock")
+__naked void stx_sock_and_ctx_write(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb) *target = skb */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: /* else *target = sock */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l1_%=: /* struct bpf_sock *sk = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* if (sk) sk->mark = 42; bpf_sk_release(sk); */\
+ if r1 == 0 goto l2_%=; \
+ r3 = 42; \
+ *(u32*)(r1 + %[bpf_sock_mark]) = r3; \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write pointer into map elem value")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void pointer_into_map_elem_value(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ *(u64*)(r0 + 0) = r0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("alu32: mov u32 const")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 == 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
+__naked void alu32_mov_u32_const(void)
+{
+ asm volatile (" \
+ w7 = 0; \
+ w7 &= 1; \
+ w0 = w7; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: partial copy of pointer")
+__success __failure_unpriv __msg_unpriv("R10 partial copy")
+__retval(0)
+__naked void unpriv_partial_copy_of_pointer(void)
+{
+ asm volatile (" \
+ w1 = w10; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: pass pointer to tail_call")
+__success __failure_unpriv __msg_unpriv("R3 leaks addr into helper")
+__retval(0)
+__naked void pass_pointer_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = r1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp map pointer with zero")
+__success __success_unpriv
+__retval(0)
+__naked void cmp_map_pointer_with_zero(void)
+{
+ asm volatile (" \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp map pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited")
+__retval(0)
+__naked void cmp_map_pointer_with_const(void)
+{
+ asm volatile (" \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0x0000beef goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write into frame pointer")
+__failure __msg("frame pointer is read only")
+__failure_unpriv
+__naked void unpriv_write_into_frame_pointer(void)
+{
+ asm volatile (" \
+ r10 = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: spill/fill frame pointer")
+__failure __msg("frame pointer is read only")
+__failure_unpriv
+__naked void unpriv_spill_fill_frame_pointer(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r10; \
+ r10 = *(u64*)(r6 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp of frame pointer")
+__success __failure_unpriv __msg_unpriv("R10 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_of_frame_pointer(void)
+{
+ asm volatile (" \
+ if r10 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: adding of fp, reg")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(0)
+__naked void unpriv_adding_of_fp_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = 0; \
+ r1 += r10; \
+ *(u64*)(r1 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: adding of fp, imm")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(0)
+__naked void unpriv_adding_of_fp_imm(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = r10; \
+ r1 += 0; \
+ *(u64*)(r1 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp of stack pointer")
+__success __failure_unpriv __msg_unpriv("R2 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_of_stack_pointer(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ if r2 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr")
+__success __success_unpriv __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 != 0x1 goto pc+2")
+/* This nospec prevents the exploit because it forces the mispredicted (not
+ * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer.
+ * This causes the CPU to realize that `r6 = r9` should have never executed. It
+ * ensures that r6 always contains a readable stack slot ptr when the insn after
+ * the nospec executes.
+ */
+__xlated_unpriv("nospec")
+__xlated_unpriv("r9 = *(u8 *)(r6 +0)")
+#endif
+__naked void unpriv_spec_v1_type_confusion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* r0: pointer to a map array entry */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ /* r1, r2: prepared call args */ \
+ r6 = r10; \
+ r6 += -8; \
+ /* r6: pointer to readable stack slot */ \
+ r9 = 0xffffc900; \
+ r9 <<= 32; \
+ /* r9: scalar controlled by attacker */ \
+ r0 = *(u64 *)(r0 + 0); /* cache miss */ \
+ if r0 != 0x0 goto l0_%=; \
+ r6 = r9; \
+l0_%=: if r0 != 0x1 goto l1_%=; \
+ r9 = *(u8 *)(r6 + 0); \
+l1_%=: /* leak r9 */ \
+ r9 &= 1; \
+ r9 <<= 9; \
+ *(u64*)(r10 - 8) = r9; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* leak secret into is_cached(map[0|512]): */ \
+ r0 = *(u64 *)(r0 + 0); \
+l2_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V4
+__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */
+__xlated_unpriv("*(u64 *)(r10 -8) = r1")
+__xlated_unpriv("nospec")
+#endif
+__naked void unpriv_ldimm64_spectre_v4(void)
+{
+ asm volatile (" \
+ r1 = 0x2020200005642020 ll; \
+ *(u64 *)(r10 -8) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+/* starts with r0 == r8 == r9 == 0 */
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("if r9 == 0x0 goto pc+4")
+__xlated_unpriv("r2 = r0")
+/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64)
+ * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The
+ * test therefore ensures the Spectre-v4--induced nospec does not prevent the
+ * Spectre-v1--induced speculative path from being fully analyzed.
+ */
+__xlated_unpriv("nospec") /* Spectre v1 */
+__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */
+__xlated_unpriv("nospec") /* Spectre v4 */
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+4")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */
+__xlated_unpriv("goto pc-1") /* second part of ldimm64 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+ r1 = 0x2020200005642020 ll; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c b/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c
new file mode 100644
index 000000000000..4d77407a0a79
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/unpriv.c */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("perf_event")
+__description("unpriv: spill/fill of different pointers ldx")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_ldx(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += %[__imm_0]; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ r1 = *(u64*)(r1 + %[sample_period]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0,
+ -(__s32) offsetof(struct bpf_perf_event_data, sample_period) - 8),
+ __imm_const(sample_period,
+ offsetof(struct bpf_perf_event_data, sample_period))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value.c b/tools/testing/selftests/bpf/progs/verifier_value.c
new file mode 100644
index 000000000000..b5af6b6f5acd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value store of cleared call register")
+__failure __msg("R1 !read_ok")
+__failure_unpriv __msg_unpriv("R1 !read_ok")
+__naked void store_of_cleared_call_register(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned store")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_store(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += 3; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = 43; \
+ *(u64*)(r0 + 2) = r1; \
+ r1 = 44; \
+ *(u64*)(r0 - 2) = r1; \
+ r8 = r0; \
+ r1 = 32; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 33; \
+ *(u64*)(r8 + 2) = r1; \
+ r1 = 34; \
+ *(u64*)(r8 - 2) = r1; \
+ r8 += 5; \
+ r1 = 22; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r8 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r8 - 7) = r1; \
+ r7 = r8; \
+ r7 += 3; \
+ r1 = 22; \
+ *(u64*)(r7 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r7 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r7 - 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned load")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_load(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r0 += 3; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 2); \
+ r8 = r0; \
+ r7 = *(u64*)(r8 + 0); \
+ r7 = *(u64*)(r8 + 2); \
+ r0 += 5; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 4); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += %[test_val_foo]; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
new file mode 100644
index 000000000000..d7a5ba9bbe6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_adj_spill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value or null is marked on register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_marked_on_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = r10; \
+ r1 += -152; \
+ *(u64*)(r1 + 0) = r0; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
new file mode 100644
index 000000000000..2129e4353fd9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_illegal_alu.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value illegal alu op, 1")
+__failure __msg("R0 bitwise operator &= on pointer")
+__failure_unpriv
+__naked void value_illegal_alu_op_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 &= 8; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 2")
+__failure __msg("R0 32-bit pointer arithmetic prohibited")
+__failure_unpriv
+__naked void value_illegal_alu_op_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ w0 += 0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 3")
+__failure __msg("R0 pointer arithmetic with /= operator")
+__failure_unpriv
+__naked void value_illegal_alu_op_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 /= 42; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 4")
+__failure __msg("invalid mem access 'scalar'")
+__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_illegal_alu_op_4(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = be64 r0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 5")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("leaking pointer from stack off -8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_illegal_alu_op_5(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 4096; \
+ r2 = r10; \
+ r2 += -8; \
+ *(u64*)(r2 + 0) = r0; \
+ lock *(u64 *)(r2 + 0) += r3; \
+ r0 = *(u64*)(r2 + 0); \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr illegal alu op, map_ptr = -map_ptr")
+__failure __msg("R0 invalid mem access 'scalar'")
+__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_ptr_illegal_alu_op(void)
+{
+ asm volatile (" \
+ r0 = %[map_hash_48b] ll; \
+ r0 = -r0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("flow_keys illegal alu op with variable offset")
+__failure __msg("R7 pointer arithmetic on flow_keys prohibited")
+__naked void flow_keys_illegal_variable_offset_alu(void)
+{
+ asm volatile(" \
+ r6 = r1; \
+ r7 = *(u64*)(r6 + %[flow_keys_off]); \
+ r8 = 8; \
+ r8 /= 1; \
+ r8 &= 8; \
+ r7 += r8; \
+ r0 = *(u64*)(r7 + 0); \
+ exit; \
+" :
+ : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys))
+ : __clobber_all);
+}
+
+#define DEFINE_BAD_OFFSET_TEST(name, op, off, imm) \
+ SEC("socket") \
+ __failure __msg("BPF_ALU uses reserved fields") \
+ __naked void name(void) \
+ { \
+ asm volatile( \
+ "r0 = 1;" \
+ ".8byte %[insn];" \
+ "r0 = 0;" \
+ "exit;" \
+ : \
+ : __imm_insn(insn, BPF_RAW_INSN((op), 0, 0, (off), (imm))) \
+ : __clobber_all); \
+ }
+
+/*
+ * Offset fields of 0 and 1 are legal for BPF_{DIV,MOD} instructions.
+ * Offset fields of 0 are legal for the rest of ALU instructions.
+ * Test that error is reported for illegal offsets, assuming that tests
+ * for legal offsets exist.
+ */
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx, BPF_ALU64 | BPF_DIV | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk, BPF_ALU64 | BPF_MOD | BPF_K, -1, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx, BPF_ALU64 | BPF_ADD | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx2, BPF_ALU64 | BPF_DIV | BPF_X, 2, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk2, BPF_ALU64 | BPF_MOD | BPF_K, 2, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx2, BPF_ALU64 | BPF_ADD | BPF_X, 1, 0)
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_or_null.c b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
new file mode 100644
index 000000000000..8ff668a242eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_or_null.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("multiple registers share map_lookup_elem result")
+__success __retval(0)
+__naked void share_map_lookup_elem_result(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 1")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 += -2; \
+ r4 += 2; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 2")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 &= -1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 3")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 <<= 1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid memory access with multiple map_lookup_elem calls")
+__failure __msg("R4 !read_ok")
+__naked void multiple_map_lookup_elem_calls(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid indirect map_lookup_elem access with 2nd lookup in branch")
+__success __retval(0)
+__naked void with_2nd_lookup_in_branch(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r2 = 10; \
+ if r2 != 0 goto l0_%=; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r4 = r0; \
+ if r0 == 0 goto l1_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access from else condition")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_access_from_else_condition(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[__imm_0] goto l1_%=; \
+ r1 += 1; \
+l1_%=: r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES-1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("map lookup and null branch prediction")
+__success __retval(0)
+__naked void lookup_and_null_branch_prediction(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r6 = r0; \
+ if r6 == 0 goto l0_%=; \
+ if r6 != 0 goto l0_%=; \
+ r10 += 10; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("MAP_VALUE_OR_NULL check_ids() in regsafe()")
+__failure __msg("R8 invalid mem access 'map_value_or_null'")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void null_check_ids_in_regsafe(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ /* r9 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ /* r8 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * r9 = r8 ; optionally share ID between r9 and r8\
+ */ \
+ if r6 > r7 goto l0_%=; \
+ r9 = r8; \
+l0_%=: /* if r9 == 0 goto <exit> */ \
+ if r9 == 0 goto l1_%=; \
+ /* read map value via r8, this is not always \
+ * safe because r8 might be not equal to r9. \
+ */ \
+ r0 = *(u64*)(r8 + 0); \
+l1_%=: /* exit 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
new file mode 100644
index 000000000000..af7938ce56cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -0,0 +1,1441 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_ptr_arith.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <errno.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs const")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void value_ptr_unknown_vs_const(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 3; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs unknown")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void value_ptr_const_vs_unknown(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 3; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs const (ne)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_const_vs_const_ne(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 3; \
+ goto l4_%=; \
+l3_%=: r1 = 5; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs const (eq)")
+__success __success_unpriv __retval(1)
+__naked void ptr_const_vs_const_eq(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 5; \
+ goto l4_%=; \
+l3_%=: r1 = 5; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (eq)")
+__success __success_unpriv __retval(1)
+__naked void ptr_unknown_vs_unknown_eq(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (lt)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_unknown_vs_unknown_lt(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = r9; \
+ r1 &= 0x3; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = r9; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (gt)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_unknown_vs_unknown_gt(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = r9; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = r9; \
+ r1 &= 0x3; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr from different maps")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_from_different_maps(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar from different maps")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(1)
+__naked void known_scalar_from_different_maps(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r0 -= r1; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr from different maps, but same value properties")
+__success __success_unpriv __retval(1)
+__naked void maps_but_same_value_properties(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_48b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_48b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: mixing value pointer and scalar, 1")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
+__retval(0)
+__naked void value_pointer_and_scalar_1(void)
+{
+ asm volatile (" \
+ /* load map value pointer into r0 and r2 */ \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* load some number from the map into r1 */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l1_%=; \
+ /* branch A */ \
+ r2 = r0; \
+ r3 = 0; \
+ goto l2_%=; \
+l1_%=: /* branch B */ \
+ r2 = 0; \
+ r3 = 0x100000; \
+l2_%=: /* common instruction */ \
+ r2 += r3; \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l3_%=; \
+ /* branch A */ \
+ goto l4_%=; \
+l3_%=: /* branch B */ \
+ r0 = 0x13371337; \
+ /* verifier follows fall-through */ \
+ /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\
+ if r2 != 0x100000 goto l4_%=; \
+ r0 = 0; \
+ exit; \
+l4_%=: /* fake-dead code; targeted from branch A to \
+ * prevent dead code sanitization \
+ */ \
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: mixing value pointer and scalar, 2")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
+__retval(0)
+__naked void value_pointer_and_scalar_2(void)
+{
+ asm volatile (" \
+ /* load map value pointer into r0 and r2 */ \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* load some number from the map into r1 */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* depending on r1, branch: */ \
+ if r1 == 0 goto l1_%=; \
+ /* branch A */ \
+ r2 = 0; \
+ r3 = 0x100000; \
+ goto l2_%=; \
+l1_%=: /* branch B */ \
+ r2 = r0; \
+ r3 = 0; \
+l2_%=: /* common instruction */ \
+ r2 += r3; \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l3_%=; \
+ /* branch A */ \
+ goto l4_%=; \
+l3_%=: /* branch B */ \
+ r0 = 0x13371337; \
+ /* verifier follows fall-through */ \
+ if r2 != 0x100000 goto l4_%=; \
+ r0 = 0; \
+ exit; \
+l4_%=: /* fake-dead code; targeted from branch A to \
+ * prevent dead code sanitization, rejected \
+ * via branch B however \
+ */ \
+ /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 1")
+__success __success_unpriv __retval(0x100000)
+__naked void alu_with_different_scalars_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u32*)(r0 + 0); \
+ if r1 == 0 goto l1_%=; \
+ r2 = 0; \
+ r3 = 0x100000; \
+ goto l2_%=; \
+l1_%=: r2 = 42; \
+ r3 = 0x100001; \
+l2_%=: r2 += r3; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 2")
+__success __success_unpriv __retval(0)
+__naked void alu_with_different_scalars_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r6 = r1; \
+ r2 = r10; \
+ r2 += -16; \
+ r7 = 0; \
+ *(u64*)(r10 - 16) = r7; \
+ call %[bpf_map_delete_elem]; \
+ r7 = r0; \
+ r1 = r6; \
+ r2 = r10; \
+ r2 += -16; \
+ call %[bpf_map_delete_elem]; \
+ r6 = r0; \
+ r8 = r6; \
+ r8 += r7; \
+ r0 = r8; \
+ r0 += %[einval]; \
+ r0 += %[einval]; \
+ exit; \
+" :
+ : __imm(bpf_map_delete_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(einval, EINVAL)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 3")
+__success __success_unpriv __retval(0)
+__naked void alu_with_different_scalars_3(void)
+{
+ asm volatile (" \
+ r0 = %[einval]; \
+ r0 *= -1; \
+ r7 = r0; \
+ r0 = %[einval]; \
+ r0 *= -1; \
+ r6 = r0; \
+ r8 = r6; \
+ r8 += r7; \
+ r0 = r8; \
+ r0 += %[einval]; \
+ r0 += %[einval]; \
+ exit; \
+" :
+ : __imm_const(einval, EINVAL)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 1")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void upper_oob_arith_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 48; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 2")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void upper_oob_arith_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 49; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 3")
+__success __success_unpriv __retval(1)
+__naked void upper_oob_arith_test_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 1")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void lower_oob_arith_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 48; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 2")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void lower_oob_arith_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 48; \
+ r0 -= r1; \
+ r1 = 1; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 3")
+__success __success_unpriv __retval(1)
+__naked void lower_oob_arith_test_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 47; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr")
+__success __success_unpriv __retval(1)
+__naked void access_known_scalar_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 1")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 2")
+__failure __msg("invalid access to map value")
+__failure_unpriv
+__naked void value_ptr_known_scalar_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 49; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 3")
+__failure __msg("invalid access to map value")
+__failure_unpriv
+__naked void value_ptr_known_scalar_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 4")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 5; \
+ r0 += r1; \
+ r1 = -2; \
+ r0 += r1; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 5")
+__success __success_unpriv __retval(0xabcdef12)
+__naked void value_ptr_known_scalar_5(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(__imm_0, (6 + 1) * sizeof(int))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 6")
+__success __success_unpriv __retval(0xabcdef12)
+__naked void value_ptr_known_scalar_6(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r0 += r1; \
+ r1 = %[__imm_1]; \
+ r0 += r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(__imm_0, (3 + 1) * sizeof(int)),
+ __imm_const(__imm_1, 3 * sizeof(int))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += N, value_ptr -= N known scalar")
+__success __success_unpriv __retval(0x12345678)
+__naked void value_ptr_n_known_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ w1 = 0x12345678; \
+ *(u32*)(r0 + 0) = r1; \
+ r0 += 2; \
+ r1 = 2; \
+ r0 -= r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 1")
+__success __success_unpriv __retval(1)
+__naked void unknown_scalar_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 2")
+__success __success_unpriv __retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 3")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = 1; \
+ r0 += r1; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 4")
+__failure __msg("R1 max value is outside of the allowed memory range")
+__msg_unpriv("R1 pointer arithmetic of map value goes out of range")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 19; \
+ r0 += r1; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 1")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_unknown_scalar_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 2")
+__success __success_unpriv __retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_ptr_unknown_scalar_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r0 += r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 3")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_unknown_scalar_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ r2 = *(u64*)(r0 + 8); \
+ r3 = *(u64*)(r0 + 16); \
+ r1 &= 0xf; \
+ r3 &= 1; \
+ r3 |= 1; \
+ if r2 > r3 goto l0_%=; \
+ r0 += r3; \
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 1; \
+l1_%=: exit; \
+l0_%=: r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += value_ptr")
+__failure __msg("R0 pointer += pointer prohibited")
+__failure_unpriv
+__naked void access_value_ptr_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += r0; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar -= value_ptr")
+__failure __msg("R1 tried to subtract pointer from scalar")
+__failure_unpriv
+__naked void access_known_scalar_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 -= r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void access_value_ptr_known_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, 2")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 6; \
+ r2 = 4; \
+ r0 += r1; \
+ r0 -= r2; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar -= value_ptr")
+__failure __msg("R1 tried to subtract pointer from scalar")
+__failure_unpriv
+__naked void access_unknown_scalar_value_ptr(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 -= r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= unknown scalar")
+__failure __msg("R0 min value is negative")
+__failure_unpriv
+__naked void access_value_ptr_unknown_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= unknown scalar, 2")
+__success __success_unpriv
+__retval(1)
+#ifdef SPEC_V1
+__xlated_unpriv("r1 &= 7")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */
+__xlated_unpriv("r0 -= r1")
+#endif
+__naked void value_ptr_unknown_scalar_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 |= 0x7; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0x7; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= value_ptr")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("R0 pointer -= pointer prohibited")
+__naked void access_value_ptr_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 -= r0; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: trying to leak tainted dst reg")
+__failure __msg("math between map_value pointer and 4294967295 is not allowed")
+__failure_unpriv
+__naked void to_leak_tainted_dst_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r2 = r0; \
+ w1 = 0xFFFFFFFF; \
+ w1 = w1; \
+ r2 -= r1; \
+ *(u64*)(r0 + 0) = r2; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("32bit pkt_ptr -= scalar")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void _32bit_pkt_ptr_scalar(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r6 = r7; \
+ r6 += 40; \
+ if r6 > r8 goto l0_%=; \
+ w4 = w7; \
+ w6 -= w4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("32bit scalar -= pkt_ptr")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void _32bit_scalar_pkt_ptr(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r6 = r7; \
+ r6 += 40; \
+ if r6 > r8 goto l0_%=; \
+ w4 = w6; \
+ w4 -= w7; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
new file mode 100644
index 000000000000..f345466bca68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/var_off.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("lwt_in")
+__description("variable-offset ctx access")
+__failure __msg("variable ctx access var_off=(0x0; 0x4)")
+__naked void variable_offset_ctx_access(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ /* add it to skb. We now have either &skb->len or\
+ * &skb->pkt_type, but we don't know which \
+ */ \
+ r1 += r2; \
+ /* dereference it */ \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("variable-offset stack read, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_read_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("variable-offset stack read, uninitialized")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void variable_offset_stack_read_uninitialized(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write, priv vs unpriv")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+/* Variable stack access is rejected for unprivileged.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or \
+ * fp-16, but we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+/* Similar to the previous test, but this time also perform a read from the
+ * address written to with a variable offet. The read is allowed, showing that,
+ * after a variable-offset write, a privileged program can read the slots that
+ * were in the range of that write (even if the verifier doesn't actually know if
+ * the slot being read was really written to or not.
+ *
+ * Despite this test being mostly a superset, the previous test is also kept for
+ * the sake of it checking the stack depth in the case where there is no read.
+ */
+SEC("socket")
+__description("variable-offset stack write followed by read")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_followed_by_read(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Now read from the address we just wrote. */ \
+ r3 = *(u64*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write clobbers spilled regs")
+__failure
+/* In the privileged case, dereferencing a spilled-and-then-filled
+ * register is rejected because the previous variable offset stack
+ * write might have overwritten the spilled pointer (i.e. we lose track
+ * of the spilled register when we analyze the write).
+ */
+__msg("R2 invalid mem access 'scalar'")
+__failure_unpriv
+/* The unprivileged case is not too interesting; variable
+ * stack access is rejected.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void stack_write_clobbers_spilled_regs(void)
+{
+ asm volatile (" \
+ /* Dummy instruction; needed because we need to patch the next one\
+ * and we can't patch the first instruction. \
+ */ \
+ r6 = 0; \
+ /* Make R0 a map ptr */ \
+ r0 = %[map_hash_8b] ll; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which. \
+ */ \
+ r2 += r10; \
+ /* Spill R0(map ptr) into stack */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Dereference the unknown value for a stack write */\
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Fill the register back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* Try to dereference R2 for a memory load */ \
+ r0 = *(u64*)(r2 + 8); \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("sockops")
+__description("indirect variable-offset stack access, unbounded")
+__failure __msg("invalid unbounded variable-offset write to stack R4")
+__naked void variable_offset_stack_access_unbounded(void)
+{
+ asm volatile (" \
+ r2 = 6; \
+ r3 = 28; \
+ /* Fill the top 16 bytes of the stack. */ \
+ r4 = 0; \
+ *(u64*)(r10 - 16) = r4; \
+ r4 = 0; \
+ *(u64*)(r10 - 8) = r4; \
+ /* Get an unknown value. */ \
+ r4 = *(u64*)(r1 + %[bpf_sock_ops_bytes_received]);\
+ /* Check the lower bound but don't check the upper one. */\
+ if r4 s< 0 goto l0_%=; \
+ /* Point the lower bound to initialized stack. Offset is now in range\
+ * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.\
+ */ \
+ r4 -= 16; \
+ r4 += r10; \
+ r5 = 8; \
+ /* Dereference it indirectly. */ \
+ call %[bpf_getsockopt]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_getsockopt),
+ __imm_const(bpf_sock_ops_bytes_received, offsetof(struct bpf_sock_ops, bytes_received))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, max out of bound")
+__failure __msg("invalid variable-offset read from stack R2")
+__naked void access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+/* Similar to the test above, but this time check the special case of a
+ * zero-sized stack access. We used to have a bug causing crashes for zero-sized
+ * out-of-bounds accesses.
+ */
+SEC("socket")
+__description("indirect variable-offset stack access, zero-sized, max out of bound")
+__failure __msg("invalid variable-offset write to stack R1")
+__naked void zero_sized_access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* Fill some stack */ \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r1 = *(u32*)(r1 + 0); \
+ r1 &= 63; \
+ r1 += -16; \
+ /* r1 is now anywhere in [-16,48) */ \
+ r1 += r10; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, min out of bound")
+__failure __msg("invalid variable-offset read from stack R2")
+__naked void access_min_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 516; \
+ /* add it to fp. We now have either fp-516 or fp-512, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("indirect variable-offset stack access, min_off < min_initialized")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void access_min_off_min_initialized(void)
+{
+ asm volatile (" \
+ /* Fill only the top 8 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know\
+ * which. fp-16 size 8 is partially uninitialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("indirect variable-offset stack access, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_access_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, ok")
+__success __retval(0)
+__naked void variable_offset_stack_access_ok(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
new file mode 100644
index 000000000000..55398c04290a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC. */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+static char buf[64];
+
+SEC("lsm.s/file_open")
+__success
+int BPF_PROG(get_task_exe_file_and_put_kfunc_from_current_sleepable)
+{
+ struct file *acquired;
+
+ acquired = bpf_get_task_exe_file(bpf_get_current_task_btf());
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm/file_open")
+__success
+int BPF_PROG(get_task_exe_file_and_put_kfunc_from_current_non_sleepable, struct file *file)
+{
+ struct file *acquired;
+
+ acquired = bpf_get_task_exe_file(bpf_get_current_task_btf());
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm.s/task_alloc")
+__success
+int BPF_PROG(get_task_exe_file_and_put_kfunc_from_argument,
+ struct task_struct *task)
+{
+ struct file *acquired;
+
+ acquired = bpf_get_task_exe_file(task);
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm.s/inode_getattr")
+__success
+int BPF_PROG(path_d_path_from_path_argument, struct path *path)
+{
+ int ret;
+
+ ret = bpf_path_d_path(path, buf, sizeof(buf));
+ __sink(ret);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int BPF_PROG(path_d_path_from_file_argument, struct file *file)
+{
+ int ret;
+ const struct path *path;
+
+ /* The f_path member is a path which is embedded directly within a
+ * file. Therefore, a pointer to such embedded members are still
+ * recognized by the BPF verifier as being PTR_TRUSTED as it's
+ * essentially PTR_TRUSTED w/ a non-zero fixed offset.
+ */
+ path = &file->f_path;
+ ret = bpf_path_d_path(path, buf, sizeof(buf));
+ __sink(ret);
+ return 0;
+}
+
+SEC("lsm.s/inode_rename")
+__success
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ if (!inode)
+ return 0;
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
new file mode 100644
index 000000000000..4b392c6c8fc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC. */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/limits.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+static char buf[PATH_MAX];
+
+SEC("lsm.s/file_open")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(get_task_exe_file_kfunc_null)
+{
+ struct file *acquired;
+
+ /* Can't pass a NULL pointer to bpf_get_task_exe_file(). */
+ acquired = bpf_get_task_exe_file(NULL);
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm.s/inode_getxattr")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point to scalar, or struct with scalar")
+int BPF_PROG(get_task_exe_file_kfunc_fp)
+{
+ u64 x;
+ struct file *acquired;
+ struct task_struct *task;
+
+ task = (struct task_struct *)&x;
+ /* Can't pass random frame pointer to bpf_get_task_exe_file(). */
+ acquired = bpf_get_task_exe_file(task);
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(get_task_exe_file_kfunc_untrusted)
+{
+ struct file *acquired;
+ struct task_struct *parent;
+
+ /* Walking a trusted struct task_struct returned from
+ * bpf_get_current_task_btf() yields an untrusted pointer.
+ */
+ parent = bpf_get_current_task_btf()->parent;
+ /* Can't pass untrusted pointer to bpf_get_task_exe_file(). */
+ acquired = bpf_get_task_exe_file(parent);
+ if (!acquired)
+ return 0;
+
+ bpf_put_file(acquired);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("Unreleased reference")
+int BPF_PROG(get_task_exe_file_kfunc_unreleased)
+{
+ struct file *acquired;
+
+ acquired = bpf_get_task_exe_file(bpf_get_current_task_btf());
+ if (!acquired)
+ return 0;
+
+ /* Acquired but never released. */
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("release kernel function bpf_put_file expects")
+int BPF_PROG(put_file_kfunc_unacquired, struct file *file)
+{
+ /* Can't release an unacquired pointer. */
+ bpf_put_file(file);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(path_d_path_kfunc_null)
+{
+ /* Can't pass NULL value to bpf_path_d_path() kfunc. */
+ bpf_path_d_path(NULL, buf, sizeof(buf));
+ return 0;
+}
+
+SEC("lsm.s/task_alloc")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(path_d_path_kfunc_untrusted_from_argument, struct task_struct *task)
+{
+ struct path *root;
+
+ /* Walking a trusted argument typically yields an untrusted
+ * pointer. This is one example of that.
+ */
+ root = &task->fs->root;
+ bpf_path_d_path(root, buf, sizeof(buf));
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(path_d_path_kfunc_untrusted_from_current)
+{
+ struct path *pwd;
+ struct task_struct *current;
+
+ current = bpf_get_current_task_btf();
+ /* Walking a trusted pointer returned from bpf_get_current_task_btf()
+ * yields an untrusted pointer.
+ */
+ pwd = &current->fs->pwd;
+ bpf_path_d_path(pwd, buf, sizeof(buf));
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("kernel function bpf_path_d_path args#0 expected pointer to STRUCT path but R1 has a pointer to STRUCT file")
+int BPF_PROG(path_d_path_kfunc_type_mismatch, struct file *file)
+{
+ bpf_path_d_path((struct path *)&file->f_task_work, buf, sizeof(buf));
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("invalid access to map value, value_size=4096 off=0 size=8192")
+int BPF_PROG(path_d_path_kfunc_invalid_buf_sz, struct file *file)
+{
+ /* bpf_path_d_path() enforces a constraint on the buffer size supplied
+ * by the BPF LSM program via the __sz annotation. buf here is set to
+ * PATH_MAX, so let's ensure that the BPF verifier rejects BPF_PROG_LOAD
+ * attempts if the supplied size and the actual size of the buffer
+ * mismatches.
+ */
+ bpf_path_d_path(&file->f_path, buf, PATH_MAX * 2);
+ return 0;
+}
+
+SEC("fentry/vfs_open")
+__failure __msg("calling kernel function bpf_path_d_path is not allowed")
+int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f)
+{
+ /* Calling bpf_path_d_path() from a non-LSM BPF program isn't permitted.
+ */
+ bpf_path_d_path(path, buf, sizeof(buf));
+ return 0;
+}
+
+SEC("lsm.s/inode_rename")
+__failure __msg("invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xadd.c b/tools/testing/selftests/bpf/progs/verifier_xadd.c
new file mode 100644
index 000000000000..05a0a55adb45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xadd.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xadd.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("xadd/w check unaligned stack")
+__failure __msg("misaligned stack access off")
+__naked void xadd_w_check_unaligned_stack(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 7) += w0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check unaligned map")
+__failure __msg("misaligned value access off")
+__naked void xadd_w_check_unaligned_map(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 1; \
+ lock *(u32 *)(r0 + 3) += w1; \
+ r0 = *(u32*)(r0 + 3); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("xadd/w check unaligned pkt")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void xadd_w_check_unaligned_pkt(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = 99; \
+ goto l1_%=; \
+l0_%=: r0 = 1; \
+ r1 = 0; \
+ *(u32*)(r2 + 0) = r1; \
+ r1 = 0; \
+ *(u32*)(r2 + 3) = r1; \
+ lock *(u32 *)(r2 + 1) += w0; \
+ lock *(u32 *)(r2 + 2) += w0; \
+ r0 = *(u32*)(r2 + 1); \
+l1_%=: exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 1")
+__success __retval(3)
+__naked void src_dst_got_mangled_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 2")
+__success __retval(3)
+__naked void src_dst_got_mangled_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u32*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u32*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c
new file mode 100644
index 000000000000..50768ed179b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP, using ifindex from netdev")
+__success __retval(1)
+__naked void xdp_using_ifindex_from_netdev(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = *(u32*)(r1 + %[xdp_md_ingress_ifindex]); \
+ if r2 < 1 goto l0_%=; \
+ r0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(xdp_md_ingress_ifindex, offsetof(struct xdp_md, ingress_ifindex))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
new file mode 100644
index 000000000000..df2dfd1b15d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
@@ -0,0 +1,1722 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 1")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 2")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 -= 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
new file mode 100644
index 000000000000..bcfb6feb38c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+#include <stdbool.h>
+
+int lookup_status;
+bool test_xdp;
+bool tcp_skc;
+
+#define CUR_NS BPF_F_CURRENT_NETNS
+
+static void socket_lookup(void *ctx, void *data_end, void *data)
+{
+ struct ethhdr *eth = data;
+ struct bpf_sock_tuple *tp;
+ struct bpf_sock *sk;
+ struct iphdr *iph;
+ int tplen;
+
+ if (eth + 1 > data_end)
+ return;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return;
+
+ iph = (struct iphdr *)(eth + 1);
+ if (iph + 1 > data_end)
+ return;
+
+ tp = (struct bpf_sock_tuple *)&iph->saddr;
+ tplen = sizeof(tp->ipv4);
+ if ((void *)tp + tplen > data_end)
+ return;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (tcp_skc)
+ sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ else
+ sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ case IPPROTO_UDP:
+ sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ default:
+ return;
+ }
+
+ lookup_status = 0;
+
+ if (sk) {
+ bpf_sk_release(sk);
+ lookup_status = 1;
+ }
+}
+
+SEC("tc")
+int tc_socket_lookup(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+
+ if (test_xdp)
+ return TC_ACT_UNSPEC;
+
+ socket_lookup(skb, data_end, data);
+ return TC_ACT_UNSPEC;
+}
+
+SEC("xdp")
+int xdp_socket_lookup(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ if (!test_xdp)
+ return XDP_PASS;
+
+ socket_lookup(xdp, data_end, data);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
new file mode 100644
index 000000000000..25be2cd9d42c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer; /* unused */
+ struct bpf_spin_lock lock; /* unused */
+ struct bpf_wq work;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ int ok_offset;
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u32 ok;
+__u32 ok_sleepable;
+
+static int test_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, void *value))
+{
+ struct elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (map == &lru &&
+ bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ val->ok_offset = *key;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+static int test_hmap_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, void *value))
+{
+ struct hmap_elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ wq = &val->work;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, void *value)
+{
+ bpf_kfunc_common_test();
+ ok |= (1 << *key);
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, void *value)
+{
+ struct elem *data = (struct elem *)value;
+ int offset = data->ok_offset;
+
+ if (*key != offset)
+ return 0;
+
+ bpf_kfunc_call_test_sleepable();
+ ok_sleepable |= (1 << offset);
+ return 0;
+}
+
+SEC("tc")
+/* test that workqueues can be used from an array */
+__retval(0)
+long test_call_array_sleepable(void *ctx)
+{
+ int key = 0;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("syscall")
+/* Same test than above but from a sleepable context. */
+__retval(0)
+long test_syscall_array_sleepable(void *ctx)
+{
+ int key = 1;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap */
+__retval(0)
+long test_call_hash_sleepable(void *ctx)
+{
+ int key = 2;
+
+ return test_hmap_elem_callback(&hmap, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap with NO_PREALLOC. */
+__retval(0)
+long test_call_hash_malloc_sleepable(void *ctx)
+{
+ int key = 3;
+
+ return test_hmap_elem_callback(&hmap_malloc, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a LRU map */
+__retval(0)
+long test_call_lru_sleepable(void *ctx)
+{
+ int key = 4;
+
+ return test_elem_callback(&lru, &key, wq_callback);
+}
+
+SEC("tc")
+long test_map_no_btf(void *ctx)
+{
+ struct elem *val;
+ struct bpf_wq *wq;
+ int key = 42;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -2;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &array, 0) != 0)
+ return -3;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
new file mode 100644
index 000000000000..d06f6d40594a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, void *value)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, void *value)
+{
+ bpf_kfunc_call_test_sleepable();
+ return 0;
+}
+
+SEC("tc")
+/* test that bpf_wq_init takes a map as a second argument
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("pointer in R2 isn't map pointer")
+long test_wq_init_nomap(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &key, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("tc")
+/* test that the workqueue is part of the map in bpf_wq_init
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("workqueue pointer in R1 map_uid=0 doesn't match map pointer in R2 map_uid=0")
+long test_wq_init_wrong_map(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &lru, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("arg#0 doesn't point to a map value")
+long test_wrong_wq_pointer(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)&wq, wq_callback, 0))
+ return 3;
+
+ return -22;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0")
+long test_wrong_wq_pointer_offset(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)wq + 1, wq_cb_sleepable, 0))
+ return 3;
+
+ return -22;
+}
+
+SEC("tc")
+__log_level(2)
+__failure
+__msg(": (85) call bpf_wq_init#")
+__msg("R1 doesn't have constant offset. bpf_wq has to be at the constant offset")
+long test_bad_wq_off(void *ctx)
+{
+ struct elem *val;
+ struct bpf_wq *wq;
+ int key = 42;
+ u64 unknown;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -2;
+
+ unknown = bpf_get_prandom_u32();
+ wq = &val->w + unknown;
+ if (bpf_wq_init(wq, &array, 0) != 0)
+ return -3;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index ea25e8881992..d988b2e0cee8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -4,7 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c
new file mode 100644
index 000000000000..67424084a38a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_features.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/netdev.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <asm-generic/errno-base.h>
+
+#include "xdp_features.h"
+
+#define ipv6_addr_equal(a, b) ((a).s6_addr32[0] == (b).s6_addr32[0] && \
+ (a).s6_addr32[1] == (b).s6_addr32[1] && \
+ (a).s6_addr32[2] == (b).s6_addr32[2] && \
+ (a).s6_addr32[3] == (b).s6_addr32[3])
+
+struct net_device;
+struct bpf_prog;
+
+struct xdp_cpumap_stats {
+ unsigned int redirect;
+ unsigned int pass;
+ unsigned int drop;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} dut_stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
+const volatile struct in6_addr tester_addr;
+const volatile struct in6_addr dut_addr;
+
+static __always_inline int
+xdp_process_echo_packet(struct xdp_md *xdp, bool dut)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ struct tlv_hdr *tlv;
+ struct udphdr *uh;
+ __be16 port;
+
+ if (eh + 1 > (struct ethhdr *)data_end)
+ return -EINVAL;
+
+ if (eh->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *ih = (struct iphdr *)(eh + 1);
+ __be32 saddr = dut ? tester_addr.s6_addr32[3]
+ : dut_addr.s6_addr32[3];
+ __be32 daddr = dut ? dut_addr.s6_addr32[3]
+ : tester_addr.s6_addr32[3];
+
+ ih = (struct iphdr *)(eh + 1);
+ if (ih + 1 > (struct iphdr *)data_end)
+ return -EINVAL;
+
+ if (saddr != ih->saddr)
+ return -EINVAL;
+
+ if (daddr != ih->daddr)
+ return -EINVAL;
+
+ if (ih->protocol != IPPROTO_UDP)
+ return -EINVAL;
+
+ uh = (struct udphdr *)(ih + 1);
+ } else if (eh->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct in6_addr saddr = dut ? tester_addr : dut_addr;
+ struct in6_addr daddr = dut ? dut_addr : tester_addr;
+ struct ipv6hdr *ih6 = (struct ipv6hdr *)(eh + 1);
+
+ if (ih6 + 1 > (struct ipv6hdr *)data_end)
+ return -EINVAL;
+
+ if (!ipv6_addr_equal(saddr, ih6->saddr))
+ return -EINVAL;
+
+ if (!ipv6_addr_equal(daddr, ih6->daddr))
+ return -EINVAL;
+
+ if (ih6->nexthdr != IPPROTO_UDP)
+ return -EINVAL;
+
+ uh = (struct udphdr *)(ih6 + 1);
+ } else {
+ return -EINVAL;
+ }
+
+ if (uh + 1 > (struct udphdr *)data_end)
+ return -EINVAL;
+
+ port = dut ? uh->dest : uh->source;
+ if (port != bpf_htons(DUT_ECHO_PORT))
+ return -EINVAL;
+
+ tlv = (struct tlv_hdr *)(uh + 1);
+ if (tlv + 1 > data_end)
+ return -EINVAL;
+
+ return bpf_htons(tlv->type) == CMD_ECHO ? 0 : -EINVAL;
+}
+
+static __always_inline int
+xdp_update_stats(struct xdp_md *xdp, bool tx, bool dut)
+{
+ __u32 *val, key = 0;
+
+ if (xdp_process_echo_packet(xdp, tx))
+ return -EINVAL;
+
+ if (dut)
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ else
+ val = bpf_map_lookup_elem(&stats, &key);
+
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+/* Tester */
+
+SEC("xdp")
+int xdp_tester_check_tx(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, true, false);
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_tester_check_rx(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, false, false);
+
+ return XDP_PASS;
+}
+
+/* DUT */
+
+SEC("xdp")
+int xdp_do_pass(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, true, true);
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_do_drop(struct xdp_md *xdp)
+{
+ if (xdp_update_stats(xdp, true, true))
+ return XDP_PASS;
+
+ return XDP_DROP;
+}
+
+SEC("xdp")
+int xdp_do_aborted(struct xdp_md *xdp)
+{
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ return XDP_ABORTED;
+}
+
+SEC("xdp")
+int xdp_do_tx(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ if (xdp_update_stats(xdp, true, true))
+ return XDP_PASS;
+
+ __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+ __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+ __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int xdp_do_redirect(struct xdp_md *xdp)
+{
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ return bpf_redirect_map(&cpu_map, 0, 0);
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, __u32 act)
+{
+ __u32 *val, key = 0;
+
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ __u32 *val, key = 0;
+
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+SEC("xdp/cpumap")
+int xdp_do_redirect_cpumap(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+ __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+ __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+ return bpf_redirect_map(&dev_map, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
new file mode 100644
index 000000000000..7fdc7b23ee74
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86dd
+#define IP_MF 0x2000 /* "More Fragments" */
+#define IP_OFFSET 0x1fff /* "Fragment Offset" */
+#define AF_INET 2
+#define AF_INET6 10
+
+struct bpf_flowtable_opts___local {
+ s32 error;
+};
+
+struct flow_offload_tuple_rhash *
+bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *,
+ struct bpf_flowtable_opts___local *, u32) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} stats SEC(".maps");
+
+static bool xdp_flowtable_offload_check_iphdr(struct iphdr *iph)
+{
+ /* ip fragmented traffic */
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET))
+ return false;
+
+ /* ip options */
+ if (iph->ihl * 4 != sizeof(*iph))
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ return true;
+}
+
+static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end,
+ u8 proto)
+{
+ if (proto == IPPROTO_TCP) {
+ struct tcphdr *tcph = ports;
+
+ if (tcph + 1 > data_end)
+ return false;
+
+ if (tcph->fin || tcph->rst)
+ return false;
+ }
+
+ return true;
+}
+
+struct flow_ports___local {
+ __be16 source, dest;
+} __attribute__((preserve_access_index));
+
+SEC("xdp.frags")
+int xdp_flowtable_do_lookup(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ struct bpf_flowtable_opts___local opts = {};
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct bpf_fib_lookup tuple = {
+ .ifindex = ctx->ingress_ifindex,
+ };
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ struct flow_ports___local *ports;
+ __u32 *val, key = 0;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (eth->h_proto) {
+ case bpf_htons(ETH_P_IP): {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ ports = (struct flow_ports___local *)(iph + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ /* sanity check on ip header */
+ if (!xdp_flowtable_offload_check_iphdr(iph))
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ iph->protocol))
+ return XDP_PASS;
+
+ tuple.family = AF_INET;
+ tuple.tos = iph->tos;
+ tuple.l4_protocol = iph->protocol;
+ tuple.tot_len = bpf_ntohs(iph->tot_len);
+ tuple.ipv4_src = iph->saddr;
+ tuple.ipv4_dst = iph->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ case bpf_htons(ETH_P_IPV6): {
+ struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst;
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ ports = (struct flow_ports___local *)(ip6h + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ if (ip6h->hop_limit <= 1)
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ ip6h->nexthdr))
+ return XDP_PASS;
+
+ tuple.family = AF_INET6;
+ tuple.l4_protocol = ip6h->nexthdr;
+ tuple.tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ default:
+ return XDP_PASS;
+ }
+
+ tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts));
+ if (!tuplehash)
+ return XDP_PASS;
+
+ val = bpf_map_lookup_elem(&stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
new file mode 100644
index 000000000000..330ece2eabdb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u32);
+} xsk SEC(".maps");
+
+__u64 pkts_skip = 0;
+__u64 pkts_fail = 0;
+__u64 pkts_redir = 0;
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+ __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
+
+SEC("xdp.frags")
+int rx(struct xdp_md *ctx)
+{
+ void *data, *data_meta, *data_end;
+ struct ipv6hdr *ip6h = NULL;
+ struct udphdr *udp = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_meta *meta;
+ struct ethhdr *eth;
+ int err;
+
+ data = (void *)(long)ctx->data;
+ data_end = (void *)(long)ctx->data_end;
+ eth = data;
+
+ if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) ||
+ eth->h_proto == bpf_htons(ETH_P_8021Q)))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
+ if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
+ if (eth + 1 < data_end) {
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+ udp = (void *)(iph + 1);
+ }
+ if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+ udp = (void *)(ip6h + 1);
+ }
+ if (udp && udp + 1 > data_end)
+ udp = NULL;
+ }
+
+ if (!udp) {
+ __sync_add_and_fetch(&pkts_skip, 1);
+ return XDP_PASS;
+ }
+
+ /* Forwarding UDP:9091 to AF_XDP */
+ if (udp->dest != bpf_htons(9091)) {
+ __sync_add_and_fetch(&pkts_skip, 1);
+ return XDP_PASS;
+ }
+
+ err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+ if (err) {
+ __sync_add_and_fetch(&pkts_fail, 1);
+ return XDP_PASS;
+ }
+
+ data = (void *)(long)ctx->data;
+ data_meta = (void *)(long)ctx->data_meta;
+ meta = data_meta;
+
+ if (meta + 1 > data) {
+ __sync_add_and_fetch(&pkts_fail, 1);
+ return XDP_PASS;
+ }
+
+ meta->hint_valid = 0;
+
+ meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+ err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
+ if (err)
+ meta->rx_timestamp_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_TS;
+
+ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash,
+ &meta->rx_hash_type);
+ if (err)
+ meta->rx_hash_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_RSS;
+
+ err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
+ if (err)
+ meta->rx_vlan_tag_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_VLAN_TAG;
+
+ __sync_add_and_fetch(&pkts_redir, 1);
+ return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
new file mode 100644
index 000000000000..09bb8a038d52
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 4);
+ __type(key, __u32);
+ __type(value, __u32);
+} xsk SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} prog_arr SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+ __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
+
+SEC("xdp")
+int rx(struct xdp_md *ctx)
+{
+ void *data, *data_meta, *data_end;
+ struct ipv6hdr *ip6h = NULL;
+ struct ethhdr *eth = NULL;
+ struct udphdr *udp = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_meta *meta;
+ u64 timestamp = -1;
+ int ret;
+
+ data = (void *)(long)ctx->data;
+ data_end = (void *)(long)ctx->data_end;
+ eth = data;
+ if (eth + 1 < data_end) {
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+ udp = (void *)(iph + 1);
+ }
+ if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+ udp = (void *)(ip6h + 1);
+ }
+ if (udp && udp + 1 > data_end)
+ udp = NULL;
+ }
+
+ if (!udp)
+ return XDP_PASS;
+
+ /* Forwarding UDP:8080 to AF_XDP */
+ if (udp->dest != bpf_htons(8080))
+ return XDP_PASS;
+
+ /* Reserve enough for all custom metadata. */
+
+ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+ if (ret != 0)
+ return XDP_DROP;
+
+ data = (void *)(long)ctx->data;
+ data_meta = (void *)(long)ctx->data_meta;
+
+ if (data_meta + sizeof(struct xdp_meta) > data)
+ return XDP_DROP;
+
+ meta = data_meta;
+
+ /* Export metadata. */
+
+ /* We expect veth bpf_xdp_metadata_rx_timestamp to return 0 HW
+ * timestamp, so put some non-zero value into AF_XDP frame for
+ * the userspace.
+ */
+ bpf_xdp_metadata_rx_timestamp(ctx, &timestamp);
+ if (timestamp == 0)
+ meta->rx_timestamp = 1;
+
+ bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+ bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
+
+ return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
new file mode 100644
index 000000000000..85f88d9d7a78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+
+int called;
+
+SEC("freplace/rx")
+int freplace_rx(struct xdp_md *ctx)
+{
+ enum xdp_rss_hash_type type = 0;
+ u32 hash = 0;
+ /* Call _any_ metadata function to make sure we don't crash. */
+ bpf_xdp_metadata_rx_hash(ctx, &hash, &type);
+ called++;
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index d037262c8937..50c8958f94e5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_ether.h>
+
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
@@ -10,22 +13,107 @@ struct {
__uint(value_size, sizeof(int));
} tx_port SEC(".maps");
-SEC("redirect_map_0")
+SEC("xdp")
int xdp_redirect_map_0(struct xdp_md *xdp)
{
return bpf_redirect_map(&tx_port, 0, 0);
}
-SEC("redirect_map_1")
+SEC("xdp")
int xdp_redirect_map_1(struct xdp_md *xdp)
{
return bpf_redirect_map(&tx_port, 1, 0);
}
-SEC("redirect_map_2")
+SEC("xdp")
int xdp_redirect_map_2(struct xdp_md *xdp)
{
return bpf_redirect_map(&tx_port, 2, 0);
}
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+static int xdp_count(struct xdp_md *xdp, __u32 key)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u64 *count;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ if (bpf_htons(eth->h_proto) == ETH_P_IP) {
+ /* We only count IPv4 packets */
+ count = bpf_map_lookup_elem(&rxcnt, &key);
+ if (count)
+ *count += 1;
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_count_0(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 0);
+}
+
+SEC("xdp")
+int xdp_count_1(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 1);
+}
+
+SEC("xdp")
+int xdp_count_2(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 2);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __be64);
+} rx_mac SEC(".maps");
+
+static int store_mac(struct xdp_md *xdp, __u32 id)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u32 key = id;
+ __be64 mac = 0;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ __builtin_memcpy(&mac, eth->h_source, ETH_ALEN);
+ bpf_map_update_elem(&rx_mac, &key, &mac, 0);
+ bpf_printk("%s - %x", __func__, mac);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int store_mac_1(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 0);
+}
+
+SEC("xdp")
+int store_mac_2(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 1);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..bc2945ed8a80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+/* map to store redirect flags for each protocol*/
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u16);
+ __type(value, __u64);
+ __uint(max_entries, 16);
+} redirect_flags SEC(".maps");
+
+SEC("xdp")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ int if_index = ctx->ingress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 *flags_from_map;
+ __u16 h_proto;
+ __u64 nh_off;
+ __u64 flags;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ h_proto = bpf_htons(eth->h_proto);
+
+ flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto);
+
+ /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */
+ if (h_proto == ETH_P_IP) {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
+ /* Default flags for IPv6 : 0 */
+ if (h_proto == ETH_P_IPV6) {
+ flags = flags_from_map ? *flags_from_map : 0;
+ return bpf_redirect_map(&map_all, if_index, flags);
+ }
+ /* Default flags for others BPF_F_BROADCAST : 0 */
+ else {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&map_egress, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp/devmap")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+ __be64 *mac;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..62b8e29ced9f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,865 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#define BPF_NO_KFUNC_PROTOTYPES
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>
+
+#include "bpf_compiler.h"
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define NSEC_PER_SEC 1000000000L
+
+#define ETH_ALEN 6
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
+
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1fff
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_SACK_PERM 4
+#define TCPOPT_TIMESTAMP 8
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_SACK_PERM 2
+#define TCPOLEN_TIMESTAMP 10
+
+#define TCP_TS_HZ 1000
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK (1 << 4)
+#define TS_OPT_ECN (1 << 5)
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+#define TCP_MAX_WSCALE 14U
+
+#define IPV4_MAXLEN 60
+#define TCP_MAXLEN 60
+
+#define DEFAULT_MSS4 1460
+#define DEFAULT_MSS6 1440
+#define DEFAULT_WSCALE 7
+#define DEFAULT_TTL 64
+#define MAX_ALLOWED_PORTS 8
+
+#define MAX_PACKET_OFF 0xffff
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 2);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u16);
+ __uint(max_entries, MAX_ALLOWED_PORTS);
+} allowed_ports SEC(".maps");
+
+/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
+ * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
+ */
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+} __attribute__((preserve_access_index));
+
+#define BPF_F_CURRENT_NETNS (-1)
+
+extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ __u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ __u32 len_opts) __ksym;
+
+extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ u32 len_opts) __ksym;
+
+extern void bpf_ct_release(struct nf_conn *ct) __ksym;
+
+static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
+{
+ __u8 tmp[ETH_ALEN];
+
+ __builtin_memcpy(tmp, a, ETH_ALEN);
+ __builtin_memcpy(a, b, ETH_ALEN);
+ __builtin_memcpy(b, tmp, ETH_ALEN);
+}
+
+static __always_inline __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (__u16)~csum;
+}
+
+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __u32 csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ s += proto + len;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ s += (proto + len) << 8;
+#else
+#error Unknown endian
+#endif
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __u32 csum)
+{
+ __u64 sum = csum;
+ int i;
+
+ __pragma_loop_unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)saddr->in6_u.u6_addr32[i];
+
+ __pragma_loop_unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)daddr->in6_u.u6_addr32[i];
+
+ /* Don't combine additions to avoid 32-bit overflow. */
+ sum += bpf_htonl(len);
+ sum += bpf_htonl(proto);
+
+ sum = (sum & 0xffffffff) + (sum >> 32);
+ sum = (sum & 0xffffffff) + (sum >> 32);
+
+ return csum_fold((__u32)sum);
+}
+
+static __always_inline __u64 tcp_clock_ns(void)
+{
+ return bpf_ktime_get_ns();
+}
+
+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{
+ return ns / (NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+static __always_inline __u32 tcp_clock_ms(void)
+{
+ return tcp_ns_to_ts(tcp_clock_ns());
+}
+
+struct tcpopt_context {
+ void *data;
+ void *data_end;
+ __be32 *tsecr;
+ __u8 wscale;
+ bool option_timestamp;
+ bool option_sack;
+ __u32 off;
+};
+
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 *opcode, *opsize, *wscale, *tsecr;
+ __u32 off = ctx->off;
+
+ opcode = next(ctx, 1);
+ if (!opcode)
+ return 1;
+
+ if (*opcode == TCPOPT_EOL)
+ return 1;
+ if (*opcode == TCPOPT_NOP)
+ return 0;
+
+ opsize = next(ctx, 1);
+ if (!opsize || *opsize < 2)
+ return 1;
+
+ switch (*opcode) {
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (!wscale)
+ return 1;
+ if (*opsize == TCPOLEN_WINDOW)
+ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsecr = next(ctx, 4);
+ if (!tsecr)
+ return 1;
+ if (*opsize == TCPOLEN_TIMESTAMP) {
+ ctx->option_timestamp = true;
+ /* Client's tsval becomes our tsecr. */
+ *ctx->tsecr = get_unaligned((__be32 *)tsecr);
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM)
+ ctx->option_sack = true;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+
+ return 0;
+}
+
+static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ if (tscookie_tcpopt_parse(context))
+ return 1;
+ return 0;
+}
+
+static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
+ __u16 tcp_len, __be32 *tsval,
+ __be32 *tsecr, void *data, void *data_end)
+{
+ struct tcpopt_context loop_ctx = {
+ .data = data,
+ .data_end = data_end,
+ .tsecr = tsecr,
+ .wscale = TS_OPT_WSCALE_MASK,
+ .option_timestamp = false,
+ .option_sack = false,
+ /* Note: currently verifier would track .off as unbound scalar.
+ * In case if verifier would at some point get smarter and
+ * compute bounded value for this var, beware that it might
+ * hinder bpf_loop() convergence validation.
+ */
+ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
+ };
+ u32 cookie;
+
+ bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
+
+ if (!loop_ctx.option_timestamp)
+ return false;
+
+ cookie = tcp_clock_ms() & ~TSMASK;
+ cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
+ if (loop_ctx.option_sack)
+ cookie |= TS_OPT_SACK;
+ if (tcp_header->ece && tcp_header->cwr)
+ cookie |= TS_OPT_ECN;
+ *tsval = bpf_htonl(cookie);
+
+ return true;
+}
+
+static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
+ __u8 *ttl, bool ipv6)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value && *value != 0) {
+ if (ipv6)
+ *mss = (*value >> 32) & 0xffff;
+ else
+ *mss = *value & 0xffff;
+ *wscale = (*value >> 16) & 0xf;
+ *ttl = (*value >> 24) & 0xff;
+ return;
+ }
+
+ *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
+ *wscale = DEFAULT_WSCALE;
+ *ttl = DEFAULT_TTL;
+}
+
+static __always_inline void values_inc_synacks(void)
+{
+ __u32 key = 1;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+}
+
+static __always_inline bool check_port_allowed(__u16 port)
+{
+ __u32 i;
+
+ for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
+ __u32 key = i;
+ __u16 *value;
+
+ value = bpf_map_lookup_elem(&allowed_ports, &key);
+
+ if (!value)
+ break;
+ /* 0 is a terminator value. Check it first to avoid matching on
+ * a forbidden port == 0 and returning true.
+ */
+ if (*value == 0)
+ break;
+
+ if (*value == port)
+ return true;
+ }
+
+ return false;
+}
+
+struct header_pointers {
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __u16 tcp_len;
+};
+
+static __always_inline int tcp_dissect(void *data, void *data_end,
+ struct header_pointers *hdr)
+{
+ hdr->eth = data;
+ if (hdr->eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (bpf_ntohs(hdr->eth->h_proto)) {
+ case ETH_P_IP:
+ hdr->ipv6 = NULL;
+
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv4 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
+ return XDP_DROP;
+ if (hdr->ipv4->version != 4)
+ return XDP_DROP;
+
+ if (hdr->ipv4->protocol != IPPROTO_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ break;
+ case ETH_P_IPV6:
+ hdr->ipv4 = NULL;
+
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv6 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv6->version != 6)
+ return XDP_DROP;
+
+ /* XXX: Extension headers are not supported and could circumvent
+ * XDP SYN flood protection.
+ */
+ if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ break;
+ default:
+ /* XXX: VLANs will circumvent XDP SYN flood protection. */
+ return XDP_PASS;
+ }
+
+ if (hdr->tcp + 1 > data_end)
+ return XDP_DROP;
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_DROP;
+
+ return XDP_TX;
+}
+
+static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
+{
+ struct bpf_ct_opts___local ct_lookup_opts = {
+ .netns_id = BPF_F_CURRENT_NETNS,
+ .l4proto = IPPROTO_TCP,
+ };
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+ __u32 tup_size;
+
+ if (hdr->ipv4) {
+ /* TCP doesn't normally use fragments, and XDP can't reassemble
+ * them.
+ */
+ if ((hdr->ipv4->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0)
+ return XDP_DROP;
+
+ tup.ipv4.saddr = hdr->ipv4->saddr;
+ tup.ipv4.daddr = hdr->ipv4->daddr;
+ tup.ipv4.sport = hdr->tcp->source;
+ tup.ipv4.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv4);
+ } else if (hdr->ipv6) {
+ __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
+ __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
+ tup.ipv6.sport = hdr->tcp->source;
+ tup.ipv6.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv6);
+ } else {
+ /* The verifier can't track that either ipv4 or ipv6 is not
+ * NULL.
+ */
+ return XDP_ABORTED;
+ }
+ if (xdp)
+ ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ else
+ ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ if (ct) {
+ unsigned long status = ct->status;
+
+ bpf_ct_release(ct);
+ if (status & IPS_CONFIRMED)
+ return XDP_PASS;
+ } else if (ct_lookup_opts.error != -ENOENT) {
+ return XDP_ABORTED;
+ }
+
+ /* error == -ENOENT || !(status & IPS_CONFIRMED) */
+ return XDP_TX;
+}
+
+static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
+ __u8 wscale)
+{
+ __be32 *start = buf;
+
+ *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+
+ if (!tsopt)
+ return buf - start;
+
+ if (tsopt[0] & bpf_htonl(1 << 4))
+ *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *buf++ = tsopt[0];
+ *buf++ = tsopt[1];
+
+ if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ wscale);
+
+ return buf - start;
+}
+
+static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
+ __u32 cookie, __be32 *tsopt,
+ __u16 mss, __u8 wscale)
+{
+ void *tcp_options;
+
+ tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
+ tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
+ tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
+ swap(tcp_header->source, tcp_header->dest);
+ tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
+ tcp_header->seq = bpf_htonl(cookie);
+ tcp_header->window = 0;
+ tcp_header->urg_ptr = 0;
+ tcp_header->check = 0; /* Calculate checksum later. */
+
+ tcp_options = (void *)(tcp_header + 1);
+ tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
+}
+
+static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, false);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
+ hdr->ipv4->check = 0; /* Calculate checksum later. */
+ hdr->ipv4->tos = 0;
+ hdr->ipv4->id = 0;
+ hdr->ipv4->ttl = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
+}
+
+static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, true);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
+ *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
+ hdr->ipv6->hop_limit = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
+}
+
+static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
+ void *ctx,
+ void *data, void *data_end,
+ bool xdp)
+{
+ __u32 old_pkt_size, new_pkt_size;
+ /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
+ * BPF verifier if tsopt is not volatile. Volatile forces it to store
+ * the pointer value and use it directly, otherwise tcp_mkoptions is
+ * (mis)compiled like this:
+ * if (!tsopt)
+ * return buf - start;
+ * reg = stored_return_value_of_tscookie_init;
+ * if (reg)
+ * tsopt = tsopt_buf;
+ * else
+ * tsopt = NULL;
+ * ...
+ * *buf++ = tsopt[1];
+ * It creates a dead branch where tsopt is assigned NULL, but the
+ * verifier can't prove it's dead and blocks the program.
+ */
+ __be32 * volatile tsopt = NULL;
+ __be32 tsopt_buf[2] = {};
+ __u16 ip_len;
+ __u32 cookie;
+ __s64 value;
+
+ /* Checksum is not yet verified, but both checksum failure and TCP
+ * header checks return XDP_DROP, so the order doesn't matter.
+ */
+ if (hdr->tcp->fin || hdr->tcp->rst)
+ return XDP_DROP;
+
+ /* Issue SYN cookies on allowed ports, drop SYN packets on blocked
+ * ports.
+ */
+ if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
+ return XDP_DROP;
+
+ if (hdr->ipv4) {
+ /* Check the IPv4 and TCP checksums before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_fold(value) != 0)
+ return XDP_DROP; /* Bad IPv4 checksum. */
+
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv4);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
+ hdr->tcp_len);
+ } else if (hdr->ipv6) {
+ /* Check the TCP checksum before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv6);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
+ hdr->tcp_len);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if (value < 0)
+ return XDP_ABORTED;
+ cookie = (__u32)value;
+
+ if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
+ &tsopt_buf[0], &tsopt_buf[1], data, data_end))
+ tsopt = tsopt_buf;
+
+ /* Check that there is enough space for a SYNACK. It also covers
+ * the check that the destination of the __builtin_memmove below
+ * doesn't overflow.
+ */
+ if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ if (hdr->ipv4) {
+ if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
+ struct tcphdr *new_tcp_header;
+
+ new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
+ __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
+ hdr->tcp = new_tcp_header;
+
+ hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
+ }
+
+ tcpv4_gen_synack(hdr, cookie, tsopt);
+ } else if (hdr->ipv6) {
+ tcpv6_gen_synack(hdr, cookie, tsopt);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Recalculate checksums. */
+ hdr->tcp->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (hdr->ipv4) {
+ hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
+ hdr->ipv4->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+
+ hdr->ipv4->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ hdr->ipv4->check = csum_fold(value);
+ } else if (hdr->ipv6) {
+ hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
+ &hdr->ipv6->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Set the new packet size. */
+ old_pkt_size = data_end - data;
+ new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
+ return XDP_ABORTED;
+ } else {
+ if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
+ return XDP_ABORTED;
+ }
+
+ values_inc_synacks();
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
+{
+ int err;
+
+ if (hdr->tcp->rst)
+ return XDP_DROP;
+
+ if (hdr->ipv4)
+ err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
+ else if (hdr->ipv6)
+ err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
+ else
+ return XDP_ABORTED;
+ if (err)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ int ret;
+
+ ret = tcp_dissect(data, data_end, hdr);
+ if (ret != XDP_TX)
+ return ret;
+
+ ret = tcp_lookup(ctx, hdr, xdp);
+ if (ret != XDP_TX)
+ return ret;
+
+ /* Packet is TCP and doesn't belong to an established connection. */
+
+ if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
+ return XDP_DROP;
+
+ /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
+ * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
+ */
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
+ return XDP_ABORTED;
+ } else {
+ /* Without volatile the verifier throws this error:
+ * R9 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 old_len = data_end - data;
+
+ if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
+ return XDP_ABORTED;
+ }
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ if (hdr->ipv4) {
+ hdr->eth = data;
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ /* IPV4_MAXLEN is needed when calculating checksum.
+ * At least sizeof(struct iphdr) is needed here to access ihl.
+ */
+ if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
+ return XDP_ABORTED;
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ } else if (hdr->ipv6) {
+ hdr->eth = data;
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ /* We run out of registers, tcp_len gets spilled to the stack, and the
+ * verifier forgets its min and max values checked above in tcp_dissect.
+ */
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_ABORTED;
+
+ return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
+ syncookie_handle_ack(hdr);
+}
+
+SEC("xdp")
+int syncookie_xdp(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(ctx, data, data_end, &hdr, true);
+ if (ret != XDP_TX)
+ return ret;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ return syncookie_part2(ctx, data, data_end, &hdr, true);
+}
+
+SEC("tc")
+int syncookie_tc(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(skb, data, data_end, &hdr, false);
+ if (ret != XDP_TX)
+ return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+
+ ret = syncookie_part2(skb, data, data_end, &hdr, false);
+ switch (ret) {
+ case XDP_PASS:
+ return TC_ACT_OK;
+ case XDP_TX:
+ return bpf_redirect(skb->ifindex, 0);
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 94e6c2b281cb..5f725c720e00 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 6b9ca40bd1f4..44e2b0ef23ae 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "xdping.h"
struct {
@@ -86,10 +87,9 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
return XDP_TX;
}
-SEC("xdpclient")
+SEC("xdp")
int xdping_client(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct pinginfo *pinginfo = NULL;
struct ethhdr *eth = data;
@@ -117,7 +117,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_PASS;
if (pinginfo->start) {
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < XDPING_MAX_COUNT; i++) {
if (pinginfo->times[i] == 0)
break;
@@ -150,10 +150,9 @@ int xdping_client(struct xdp_md *ctx)
return XDP_TX;
}
-SEC("xdpserver")
+SEC("xdp")
int xdping_server(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
struct icmphdr *icmph;
diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c
new file mode 100644
index 000000000000..c2dd0c28237a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdpwall.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+enum pkt_parse_err {
+ NO_ERR,
+ BAD_IP6_HDR,
+ BAD_IP4GUE_HDR,
+ BAD_IP6GUE_HDR,
+};
+
+enum pkt_flag {
+ TUNNEL = 0x1,
+ TCP_SYN = 0x2,
+ QUIC_INITIAL_FLAG = 0x4,
+ TCP_ACK = 0x8,
+ TCP_RST = 0x10
+};
+
+struct v4_lpm_key {
+ __u32 prefixlen;
+ __u32 src;
+};
+
+struct v4_lpm_val {
+ struct v4_lpm_key key;
+ __u8 val;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 16);
+ __type(key, struct in6_addr);
+ __type(value, bool);
+} v6_addr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 16);
+ __type(key, __u32);
+ __type(value, bool);
+} v4_addr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(max_entries, 16);
+ __uint(key_size, sizeof(struct v4_lpm_key));
+ __uint(value_size, sizeof(struct v4_lpm_val));
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} v4_lpm_val_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 16);
+ __type(key, int);
+ __type(value, __u8);
+} tcp_port_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 16);
+ __type(key, int);
+ __type(value, __u16);
+} udp_port_map SEC(".maps");
+
+enum ip_type { V4 = 1, V6 = 2 };
+
+struct fw_match_info {
+ __u8 v4_src_ip_match;
+ __u8 v6_src_ip_match;
+ __u8 v4_src_prefix_match;
+ __u8 v4_dst_prefix_match;
+ __u8 tcp_dp_match;
+ __u16 udp_sp_match;
+ __u16 udp_dp_match;
+ bool is_tcp;
+ bool is_tcp_syn;
+};
+
+struct pkt_info {
+ enum ip_type type;
+ union {
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ } ip;
+ int sport;
+ int dport;
+ __u16 trans_hdr_offset;
+ __u8 proto;
+ __u8 flags;
+};
+
+static __always_inline struct ethhdr *parse_ethhdr(void *data, void *data_end)
+{
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return NULL;
+
+ return eth;
+}
+
+static __always_inline __u8 filter_ipv6_addr(const struct in6_addr *ipv6addr)
+{
+ __u8 *leaf;
+
+ leaf = bpf_map_lookup_elem(&v6_addr_map, ipv6addr);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_addr(const __u32 ipaddr)
+{
+ __u8 *leaf;
+
+ leaf = bpf_map_lookup_elem(&v4_addr_map, &ipaddr);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_lpm(const __u32 ipaddr)
+{
+ struct v4_lpm_key v4_key = {};
+ struct v4_lpm_val *lpm_val;
+
+ v4_key.src = ipaddr;
+ v4_key.prefixlen = 32;
+
+ lpm_val = bpf_map_lookup_elem(&v4_lpm_val_map, &v4_key);
+
+ return lpm_val ? lpm_val->val : 0;
+}
+
+
+static __always_inline void
+filter_src_dst_ip(struct pkt_info* info, struct fw_match_info* match_info)
+{
+ if (info->type == V6) {
+ match_info->v6_src_ip_match =
+ filter_ipv6_addr(&info->ip.ipv6->saddr);
+ } else if (info->type == V4) {
+ match_info->v4_src_ip_match =
+ filter_ipv4_addr(info->ip.ipv4->saddr);
+ match_info->v4_src_prefix_match =
+ filter_ipv4_lpm(info->ip.ipv4->saddr);
+ match_info->v4_dst_prefix_match =
+ filter_ipv4_lpm(info->ip.ipv4->daddr);
+ }
+}
+
+static __always_inline void *
+get_transport_hdr(__u16 offset, void *data, void *data_end)
+{
+ if (offset > 255 || data + offset > data_end)
+ return NULL;
+
+ return data + offset;
+}
+
+static __always_inline bool tcphdr_only_contains_flag(struct tcphdr *tcp,
+ __u32 FLAG)
+{
+ return (tcp_flag_word(tcp) &
+ (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN)) == FLAG;
+}
+
+static __always_inline void set_tcp_flags(struct pkt_info *info,
+ struct tcphdr *tcp) {
+ if (tcphdr_only_contains_flag(tcp, TCP_FLAG_SYN))
+ info->flags |= TCP_SYN;
+ else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_ACK))
+ info->flags |= TCP_ACK;
+ else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_RST))
+ info->flags |= TCP_RST;
+}
+
+static __always_inline bool
+parse_tcp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+ struct tcphdr *tcp = transport_hdr;
+
+ if (tcp + 1 > data_end)
+ return false;
+
+ info->sport = bpf_ntohs(tcp->source);
+ info->dport = bpf_ntohs(tcp->dest);
+ set_tcp_flags(info, tcp);
+
+ return true;
+}
+
+static __always_inline bool
+parse_udp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+ struct udphdr *udp = transport_hdr;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ info->sport = bpf_ntohs(udp->source);
+ info->dport = bpf_ntohs(udp->dest);
+
+ return true;
+}
+
+static __always_inline __u8 filter_tcp_port(int port)
+{
+ __u8 *leaf = bpf_map_lookup_elem(&tcp_port_map, &port);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u16 filter_udp_port(int port)
+{
+ __u16 *leaf = bpf_map_lookup_elem(&udp_port_map, &port);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline bool
+filter_transport_hdr(void *transport_hdr, void *data_end,
+ struct pkt_info *info, struct fw_match_info *match_info)
+{
+ if (info->proto == IPPROTO_TCP) {
+ if (!parse_tcp(info, transport_hdr, data_end))
+ return false;
+
+ match_info->is_tcp = true;
+ match_info->is_tcp_syn = (info->flags & TCP_SYN) > 0;
+
+ match_info->tcp_dp_match = filter_tcp_port(info->dport);
+ } else if (info->proto == IPPROTO_UDP) {
+ if (!parse_udp(info, transport_hdr, data_end))
+ return false;
+
+ match_info->udp_dp_match = filter_udp_port(info->dport);
+ match_info->udp_sp_match = filter_udp_port(info->sport);
+ }
+
+ return true;
+}
+
+static __always_inline __u8
+parse_gue_v6(struct pkt_info *info, struct ipv6hdr *ip6h, void *data_end)
+{
+ struct udphdr *udp = (struct udphdr *)(ip6h + 1);
+ void *encap_data = udp + 1;
+
+ if (udp + 1 > data_end)
+ return BAD_IP6_HDR;
+
+ if (udp->dest != bpf_htons(6666))
+ return NO_ERR;
+
+ info->flags |= TUNNEL;
+
+ if (encap_data + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ if (*(__u8 *)encap_data & 0x30) {
+ struct ipv6hdr *inner_ip6h = encap_data;
+
+ if (inner_ip6h + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ info->type = V6;
+ info->proto = inner_ip6h->nexthdr;
+ info->ip.ipv6 = inner_ip6h;
+ info->trans_hdr_offset += sizeof(struct ipv6hdr) + sizeof(struct udphdr);
+ } else {
+ struct iphdr *inner_ip4h = encap_data;
+
+ if (inner_ip4h + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ info->type = V4;
+ info->proto = inner_ip4h->protocol;
+ info->ip.ipv4 = inner_ip4h;
+ info->trans_hdr_offset += sizeof(struct iphdr) + sizeof(struct udphdr);
+ }
+
+ return NO_ERR;
+}
+
+static __always_inline __u8 parse_ipv6_gue(struct pkt_info *info,
+ void *data, void *data_end)
+{
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+
+ if (ip6h + 1 > data_end)
+ return BAD_IP6_HDR;
+
+ info->proto = ip6h->nexthdr;
+ info->ip.ipv6 = ip6h;
+ info->type = V6;
+ info->trans_hdr_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ if (info->proto == IPPROTO_UDP)
+ return parse_gue_v6(info, ip6h, data_end);
+
+ return NO_ERR;
+}
+
+SEC("xdp")
+int edgewall(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)(ctx->data_end);
+ void *data = (void *)(long)(ctx->data);
+ struct fw_match_info match_info = {};
+ struct pkt_info info = {};
+ void *transport_hdr;
+ struct ethhdr *eth;
+ bool filter_res;
+ __u32 proto;
+
+ eth = parse_ethhdr(data, data_end);
+ if (!eth)
+ return XDP_DROP;
+
+ proto = eth->h_proto;
+ if (proto != bpf_htons(ETH_P_IPV6))
+ return XDP_DROP;
+
+ if (parse_ipv6_gue(&info, data, data_end))
+ return XDP_DROP;
+
+ if (info.proto == IPPROTO_ICMPV6)
+ return XDP_PASS;
+
+ if (info.proto != IPPROTO_TCP && info.proto != IPPROTO_UDP)
+ return XDP_DROP;
+
+ filter_src_dst_ip(&info, &match_info);
+
+ transport_hdr = get_transport_hdr(info.trans_hdr_offset, data,
+ data_end);
+ if (!transport_hdr)
+ return XDP_DROP;
+
+ filter_res = filter_transport_hdr(transport_hdr, data_end,
+ &info, &match_info);
+ if (!filter_res)
+ return XDP_DROP;
+
+ if (match_info.is_tcp && !match_info.is_tcp_syn)
+ return XDP_PASS;
+
+ return XDP_DROP;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
new file mode 100644
index 000000000000..a1d9f106c3f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+
+struct bpf_xfrm_info___local {
+ u32 if_id;
+ int link;
+} __attribute__((preserve_access_index));
+
+__u32 req_if_id;
+__u32 resp_if_id;
+
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+ const struct bpf_xfrm_info___local *from) __ksym;
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
+ struct bpf_xfrm_info___local *to) __ksym;
+
+SEC("tc")
+int set_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = { .if_id = req_if_id };
+
+ return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
+}
+
+SEC("tc")
+int get_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = {};
+
+ if (bpf_skb_get_xfrm_info(skb, &info) < 0)
+ return TC_ACT_SHOT;
+
+ resp_if_id = info.if_id;
+
+ return TC_ACT_UNSPEC;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
new file mode 100644
index 000000000000..683306db8594
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Intel */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
+#include "xsk_xdp_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} xsk SEC(".maps");
+
+static unsigned int idx;
+int adjust_value = 0;
+int count = 0;
+
+SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
+{
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp)
+{
+ /* Drop every other packet */
+ if (idx++ % 2)
+ return XDP_DROP;
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
+{
+ void *data, *data_meta;
+ struct xdp_info *meta;
+ int err;
+
+ /* Reserve enough for all custom metadata. */
+ err = bpf_xdp_adjust_meta(xdp, -(int)sizeof(struct xdp_info));
+ if (err)
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_meta = (void *)(long)xdp->data_meta;
+
+ if (data_meta + sizeof(struct xdp_info) > data)
+ return XDP_DROP;
+
+ meta = data_meta;
+ meta->count = count++;
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ /* Redirecting packets based on the destination MAC address */
+ idx = ((unsigned int)(eth->h_dest[5])) / 2;
+ if (idx > MAX_SOCKETS)
+ return XDP_DROP;
+
+ return bpf_redirect_map(&xsk, idx, XDP_DROP);
+}
+
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+ __u32 buff_len, curr_buff_len;
+ int ret;
+
+ buff_len = bpf_xdp_get_buff_len(xdp);
+ if (buff_len == 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+ if (ret < 0) {
+ /* Handle unsupported cases */
+ if (ret == -EOPNOTSUPP) {
+ /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+ * is unsupported
+ */
+ adjust_value = -EOPNOTSUPP;
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+ }
+
+ return XDP_DROP;
+ }
+
+ curr_buff_len = bpf_xdp_get_buff_len(xdp);
+ if (curr_buff_len != buff_len + adjust_value)
+ return XDP_DROP;
+
+ if (curr_buff_len > buff_len) {
+ __u32 *pkt_data = (void *)(long)xdp->data;
+ __u32 len, words_to_end, seq_num;
+
+ len = curr_buff_len - PKT_HDR_ALIGN;
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ seq_num = words_to_end;
+
+ /* Convert sequence number to network byte order. Store this in the last 4 bytes of
+ * the packet. Use 'adjust_value' to determine the position at the end of the
+ * packet for storing the sequence number.
+ */
+ seq_num = __constant_htonl(words_to_end);
+ bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+ sizeof(seq_num));
+ }
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure.
+
+ This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+ indicate whether the assembler supports "?" in .pushsection directives. */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..1fcfa5160231
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,515 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+ This file is dedicated to the public domain, pursuant to CC0
+ (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H 1
+
+/*
+ This file defines a family of macros
+
+ STAP_PROBEn(op1, ..., opn)
+
+ that emit a nop into the instruction stream, and some data into an auxiliary
+ note section. The data in the note section describes the operands, in terms
+ of size and location. Each location is encoded as assembler operand string.
+ Consumer tools such as gdb or systemtap insert breakpoints on top of
+ the nop, and decode the location operand-strings, like an assembler,
+ to find the values being passed.
+
+ The operand strings are selected by the compiler for each operand.
+ They are constrained by gcc inline-assembler codes. The default is:
+
+ #define STAP_SDT_ARG_CONSTRAINT nor
+
+ This is a good default if the operands tend to be integral and
+ moderate in number (smaller than number of registers). In other
+ cases, the compiler may report "'asm' requires impossible reload" or
+ similar. In this case, consider simplifying the macro call (fewer
+ and simpler operands), reduce optimization, or override the default
+ constraints string via:
+
+ #define STAP_SDT_ARG_CONSTRAINT g
+ #include <sys/sdt.h>
+
+ See also:
+ https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist) \
+ _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+ _SDT_ASM_BASE
+# define _SDT_ASM_1(x) x;
+# define _SDT_ASM_2(a, b) a,b;
+# define _SDT_ASM_3(a, b, c) a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x) .asciz #x;
+# define _SDT_ASM_SUBSTR_1(x) .ascii #x;
+# define _SDT_DEPAREN_0() /* empty */
+# define _SDT_DEPAREN_1(a) a
+# define _SDT_DEPAREN_2(a,b) a b
+# define _SDT_DEPAREN_3(a,b,c) a b c
+# define _SDT_DEPAREN_4(a,b,c,d) a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+ __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+ do { \
+ _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+ __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+ :: _SDT_ASM_OPERANDS_##n arglist); \
+ __asm__ __volatile__ (_SDT_ASM_BASE); \
+ } while (0)
+# define _SDT_S(x) #x
+# define _SDT_ASM_1(x) _SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "," _SDT_S(d) "," \
+ _SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+ _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT g
+# elif defined __loongarch__
+# define STAP_SDT_ARG_CONSTRAINT nmr
+# else
+# define STAP_SDT_ARG_CONSTRAINT nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x) #x
+# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+ macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x) \
+ [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+ [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \
+ || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \
+ && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \
+ ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+ static const bool __sdt_signed = false;
+};
+
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x) \
+ __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \
+ + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x) \
+ (!__extension__ \
+ (__builtin_constant_p ((((unsigned long long) \
+ (_SDT_ARGINTTYPE (x)) __sdt_unsp) \
+ & ((unsigned long long)1 << (sizeof (unsigned long long) \
+ * __CHAR_BIT__ - 1))) == 0) \
+ || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x) \
+ (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id) %I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id) %[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ operand note format.
+
+ The named register may be a longer or shorter (!) alias for the
+ storage where the value in question is found. For example, on
+ i386, 64-bit value may be put in register pairs, and the register
+ name stored would identify just one of them. Previously, gcc was
+ asked to emit the %w[id] (16-bit alias of some registers holding
+ operands), even when a wider 32-bit value was used.
+
+ Bottom line: the byte-width given before the @ sign governs. If
+ there is a mismatch between that width and that of the named
+ register, then a sys/sdt.h note consumer may need to employ
+ architecture-specific heuristics to figure out where the compiler
+ has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR .8byte
+#else
+# define _SDT_ASM_ADDR .4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP nop 0
+#else
+#define _SDT_NOP nop
+#endif
+
+#define _SDT_NOTE_NAME "stapsdt"
+#define _SDT_NOTE_TYPE 3
+
+/* If the assembler supports the necessary feature, then we can play
+ nice with code in COMDAT sections, which comes up in C++ code.
+ Without that assembler support, some combinations of probe placements
+ in certain kinds of C++ code may produce link-time errors. */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS \
+ _SDT_ASM_1(.altmacro) \
+ _SDT_ASM_1(.macro _SDT_SIGN x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.iflt \\x) \
+ _SDT_ASM_1(.ascii "-") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.ascii "\x") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE x) \
+ _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_2(.ifc 8,\\x) \
+ _SDT_ASM_1(.ascii "f") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.ascii "@") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE x) \
+ _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \
+ _SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(.purgem _SDT_SIGN) \
+ _SDT_ASM_1(.purgem _SDT_SIZE_) \
+ _SDT_ASM_1(.purgem _SDT_SIZE) \
+ _SDT_ASM_1(.purgem _SDT_TYPE_) \
+ _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \
+ _SDT_DEF_MACROS \
+ _SDT_ASM_1(990: _SDT_NOP) \
+ _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+ _SDT_ASM_1( .balign 4) \
+ _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \
+ _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \
+ _SDT_ASM_1(992: .balign 4) \
+ _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \
+ _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \
+ _SDT_SEMAPHORE(provider,name) \
+ _SDT_ASM_STRING(provider) \
+ _SDT_ASM_STRING(name) \
+ pack_args args \
+ _SDT_ASM_SUBSTR(\x00) \
+ _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(994: .balign 4) \
+ _SDT_ASM_1( .popsection)
+
+#define _SDT_ASM_BASE \
+ _SDT_ASM_1(.ifndef _.stapsdt.base) \
+ _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \
+ .stapsdt.base,comdat) \
+ _SDT_ASM_1( .weak _.stapsdt.base) \
+ _SDT_ASM_1( .hidden _.stapsdt.base) \
+ _SDT_ASM_1( _.stapsdt.base: .space 1) \
+ _SDT_ASM_2( .size _.stapsdt.base, 1) \
+ _SDT_ASM_1( .popsection) \
+ _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+ _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0 /* no arguments */
+#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+ _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+ _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+ _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+ _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+ _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+ _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+ _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+ _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+ _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+ _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+ _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+ In assembly code the arguments should use normal assembly operand syntax. */
+
+#define STAP_PROBE(provider, name) \
+ _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+ _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+ _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+ _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+ _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+ _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+ _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+ _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_PROBE(provider, name, 10, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_PROBE(provider, name, 11, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_PROBE(provider, name, 12, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+ number of probe arguments is not known until compile time. Since
+ variadic macro support may vary with compiler options, you must
+ pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+ The trick to count __VA_ARGS__ was inspired by this post by
+ Laurent Deniau <laurent.deniau@cern.ch>:
+ http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+ Note that our _SDT_NARG is called with an extra 0 arg that's not
+ counted, so we don't have to worry about the behavior of macros
+ called without any arguments. */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+ _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+ _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements. You must compile
+ with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+ The STAP_PROBE_ASM macro generates a quoted string to be used in the
+ template portion of the asm statement, concatenated with strings that
+ contain the actual assembly code around the probe site.
+
+ For example:
+
+ asm ("before\n"
+ STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+ "after");
+
+ emits the assembly code for "before\nafter", with a probe in between.
+ The probe arguments are the %eax register, and the value of the memory
+ word located 4 bytes past the address in the %esi register. Note that
+ because this is a simple asm, not a GNU C extended asm statement, these
+ % characters do not need to be doubled to generate literal %reg names.
+
+ In a GNU C extended asm statement, the probe arguments can be specified
+ using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired
+ macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+ and appears in the input operand list of the asm statement. For example:
+
+ asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+ STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+ "otherinsn %[namedarg]"
+ : "r" (outvar)
+ : "g" (some_value), [namedarg] "i" (1234),
+ STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+ This is just like writing:
+
+ STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+ but the probe site is right between "someinsn" and "otherinsn".
+
+ The probe arguments in STAP_PROBE_ASM can be given as assembly
+ operands instead, even inside a GNU C extended asm statement.
+ Note that these can use operand templates like %0 or %[name],
+ and likewise they must write %%reg for a literal operand of %reg. */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...) \
+ _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \
+ _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args) \
+ _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \
+ _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names. */
+#define DTRACE_PROBE(provider,probe) \
+ STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1) \
+ STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2) \
+ STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+ STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \
+ STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \
+ STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+ STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+ STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+ STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+ STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+ STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+ STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+ STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h
new file mode 100644
index 000000000000..281f86132766
--- /dev/null
+++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_STORAGE_HELPER_H
+#define __TASK_LOCAL_STORAGE_HELPER_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifndef __NR_pidfd_open
+#ifdef __alpha__
+#define __NR_pidfd_open 544
+#else
+#define __NR_pidfd_open 434
+#endif
+#endif
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
index 4fed2dc25c0a..1c2408ee1f5d 100644
--- a/tools/testing/selftests/bpf/test_bpftool.py
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -57,6 +57,11 @@ def default_iface(f):
return f(*args, iface, **kwargs)
return wrapper
+DMESG_EMITTING_HELPERS = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ "bpf_trace_vprintk",
+ ]
class TestBpftool(unittest.TestCase):
@classmethod
@@ -67,10 +72,7 @@ class TestBpftool(unittest.TestCase):
@default_iface
def test_feature_dev_json(self, iface):
- unexpected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ unexpected_helpers = DMESG_EMITTING_HELPERS
expected_keys = [
"syscall_config",
"program_types",
@@ -94,10 +96,7 @@ class TestBpftool(unittest.TestCase):
bpftool_json(["feature", "probe"]),
bpftool_json(["feature"]),
]
- unexpected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ unexpected_helpers = DMESG_EMITTING_HELPERS
expected_keys = [
"syscall_config",
"system_config",
@@ -121,10 +120,7 @@ class TestBpftool(unittest.TestCase):
bpftool_json(["feature", "probe", "kernel", "full"]),
bpftool_json(["feature", "probe", "full"]),
]
- expected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ expected_helpers = DMESG_EMITTING_HELPERS
for tc in test_cases:
# Check if expected helpers are included at least once in any
@@ -157,7 +153,7 @@ class TestBpftool(unittest.TestCase):
not_full_set.add(helper)
self.assertCountEqual(full_set - not_full_set,
- {"bpf_probe_write_user", "bpf_trace_printk"})
+ set(DMESG_EMITTING_HELPERS))
self.assertCountEqual(not_full_set - full_set, set())
def test_feature_macros(self):
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 2db3c60e1e61..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
@@ -85,23 +85,6 @@ make_with_tmpdir() {
echo
}
-make_doc_and_clean() {
- echo -e "\$PWD: $PWD"
- echo -e "command: make -s $* doc >/dev/null"
- RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
- if [ $? -ne 0 ] ; then
- ERROR=1
- printf "FAILURE: Errors or warnings when building documentation\n"
- fi
- (
- if [ $# -ge 1 ] ; then
- cd ${@: -1}
- fi
- make -s doc-clean
- )
- echo
-}
-
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -162,7 +145,3 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
-
-echo -e "Checking documentation build\n"
-# From tools/bpf/bpftool
-make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
new file mode 100755
index 000000000000..515b1df0501e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_map.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME="bpftool_map"
+BPF_FILE="security_bpf_map.bpf.o"
+BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
+PROTECTED_MAP_NAME="prot_map"
+NOT_PROTECTED_MAP_NAME="not_prot_map"
+BPF_FS_TMP_PARENT="/tmp"
+BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
+# bpftool will mount bpf file system under BPF_DIR if it is not mounted
+# under BPF_FS_PARENT.
+BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
+SCRIPT_DIR=$(dirname $(realpath "$0"))
+BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
+BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
+BPFTOOL_PATH="bpftool"
+# Assume the script is located under tools/testing/selftests/bpf/
+KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
+
+_cleanup()
+{
+ set +eu
+
+ # If BPF_DIR is a mount point this will not remove the mount point itself.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
+
+ # Unmount if BPF filesystem was temporarily created.
+ if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
+ # A loop and recursive unmount are required as bpftool might
+ # create multiple mounts. For example, a bind mount of the directory
+ # to itself. The bind mount is created to change mount propagation
+ # flags on an actual mount point.
+ max_attempts=3
+ attempt=0
+ while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
+ umount -R "$BPF_DIR" 2>/dev/null
+ attempt=$((attempt+1))
+ done
+
+ # The directory still exists. Remove it now.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
+ fi
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+check_root_privileges() {
+ if [ $(id -u) -ne 0 ]; then
+ echo "Need root privileges"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify bpftool path.
+# Parameters:
+# $1: bpftool path
+verify_bpftool_path() {
+ local bpftool_path="$1"
+ if ! "$bpftool_path" version > /dev/null 2>&1; then
+ echo "Could not run test without bpftool"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify BTF support.
+# The test requires BTF support for fmod_ret programs.
+verify_btf_support() {
+ if [ ! -f /sys/kernel/btf/vmlinux ]; then
+ echo "Could not run test without BTF support"
+ exit $ksft_skip
+ fi
+}
+
+# Function to initialize map entries with keys [0..2] and values set to 0.
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+initialize_map_entries() {
+ local map_name="$1"
+ local bpftool_path="$2"
+
+ for key in 0 1 2; do
+ "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
+ done
+}
+
+# Test read access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+access_for_read() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+
+ # Test read access to the map.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Read access to $key in $map_name failed"
+ exit 1
+ fi
+
+ # Test read access to map's BTF data.
+ if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
+ echo " Read access to $map_name for BTF data failed"
+ exit 1
+ fi
+}
+
+# Test write access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_write() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test entry deletion for the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_deletion() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ # Test deletion by key for the map.
+ # Before deleting, check the key exists.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Key $key does not exist in $map_name"
+ exit 1
+ fi
+
+ # Delete by key.
+ if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Deletion for $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Deletion for $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+
+ # After deleting, check the entry existence according to the expected status.
+ if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Key $key for $map_name was not deleted but should have been deleted"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "false" ]; then
+ echo "Key $key for $map_name was deleted but should have not been deleted"
+ exit 1
+ fi
+ fi
+
+ # Test creation of map's deleted entry, if deletion was successful.
+ # Otherwise, the entry exists.
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test map elements iterator.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: BPF_DIR
+# $5: bpf iterator object file path
+iterate_map_elem() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local bpf_dir="$4"
+ local bpf_file="$5"
+ local pin_path="$bpf_dir/map_iterator"
+
+ "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
+ if [ ! -f "$pin_path" ]; then
+ echo " Failed to pin iterator to $pin_path"
+ exit 1
+ fi
+
+ cat "$pin_path" 1>/dev/null
+ rm "$pin_path" 2>/dev/null
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key for rw
+# $5: key to delete
+# $6: Whether write should succeed (true/false)
+# $7: BPF_DIR
+# $8: bpf iterator object file path
+access_map() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key_for_rw="$4"
+ local key_to_del="$5"
+ local write_should_succeed="$6"
+ local bpf_dir="$7"
+ local bpf_iter_file_path="$8"
+
+ access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
+ access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
+ "$write_should_succeed"
+ access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
+ "$write_should_succeed"
+ iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
+ "$bpf_iter_file_path"
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+# $3: BPF_DIR
+# $4: Whether write should succeed (true/false)
+# $5: bpf iterator object file path
+test_map_access() {
+ local map_name="$1"
+ local bpftool_path="$2"
+ local bpf_dir="$3"
+ local pin_path="$bpf_dir/${map_name}_pinned"
+ local write_should_succeed="$4"
+ local bpf_iter_file_path="$5"
+
+ # Test access to the map by name.
+ access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+
+ # Pin the map to the BPF filesystem
+ "$bpftool_path" map pin name "$map_name" "$pin_path"
+ if [ ! -e "$pin_path" ]; then
+ echo " Failed to pin $map_name"
+ exit 1
+ fi
+
+ # Test access to the pinned map.
+ access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+}
+
+# Function to test map creation and map-of-maps
+# Parameters:
+# $1: bpftool path
+# $2: BPF_DIR
+test_map_creation_and_map_of_maps() {
+ local bpftool_path="$1"
+ local bpf_dir="$2"
+ local outer_map_name="outer_map_tt"
+ local inner_map_name="inner_map_tt"
+
+ "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
+ value 4 entries 4 name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$inner_map_name" ]; then
+ echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
+ key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$outer_map_name" ]; then
+ echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ # Add entries to the outer map by name and by pinned path.
+ "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
+ value pinned "$bpf_dir/$inner_map_name"
+ "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
+ name "$inner_map_name"
+
+ # The outer map should be full by now.
+ # The following map update command is expected to fail.
+ if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
+ "$inner_map_name" 2>/dev/null; then
+ echo " Update for $outer_map_name succeeded but should have failed"
+ exit 1
+ fi
+}
+
+# Function to test map access with the btf list command
+# Parameters:
+# $1: bpftool path
+test_map_access_with_btf_list() {
+ local bpftool_path="$1"
+
+ # The btf list command iterates over maps for
+ # loaded BPF programs.
+ if ! "$bpftool_path" btf list 1>/dev/null; then
+ echo " Failed to access btf data"
+ exit 1
+ fi
+}
+
+set -eu
+
+trap cleanup_skip EXIT
+
+check_root_privileges
+
+verify_bpftool_path "$BPFTOOL_PATH"
+
+verify_btf_support
+
+trap cleanup EXIT
+
+# Load and attach the BPF programs to control maps access.
+"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
+
+initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+
+# Activate the map protection mechanism. Protection status is controlled
+# by a value stored in the prot_status_map at index 0.
+"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
+
+# Test protected map (write should fail).
+test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
+ "$BPF_ITER_FILE_PATH"
+
+# Test not protected map (write should succeed).
+test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
+ "$BPF_ITER_FILE_PATH"
+
+test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
+
+test_map_access_with_btf_list "$BPFTOOL_PATH"
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
index 1bf81b49457a..b5520692f41b 100755
--- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -4,6 +4,9 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+BPF_FILE_USED="metadata_used.bpf.o"
+BPF_FILE_UNUSED="metadata_unused.bpf.o"
+
TESTNAME=bpftool_metadata
BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
BPF_DIR=$BPF_FS/test_$TESTNAME
@@ -55,7 +58,7 @@ mkdir $BPF_DIR
trap cleanup EXIT
-bpftool prog load metadata_unused.o $BPF_DIR/unused
+bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused
METADATA_PLAIN="$(bpftool prog)"
echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
@@ -67,7 +70,7 @@ bpftool map | grep 'metadata.rodata' > /dev/null
rm $BPF_DIR/unused
-bpftool prog load metadata_used.o $BPF_DIR/used
+bpftool prog load $BPF_FILE_USED $BPF_DIR/used
METADATA_PLAIN="$(bpftool prog)"
echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755
index 000000000000..238121fda5b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,627 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.getenv('BPFTOOL_DIR',
+ os.path.join(LINUX_ROOT, 'tools/bpf/bpftool'))
+BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR',
+ os.path.join(BPFTOOL_DIR, 'bash-completion'))
+BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR',
+ os.path.join(BPFTOOL_DIR, 'Documentation'))
+INCLUDE_DIR = os.getenv('INCLUDE_DIR',
+ os.path.join(LINUX_ROOT, 'tools/include'))
+
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting a set of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(fr'(static )?const bool {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile(r'\[(BPF_\w*)\]\s*= (true|false),?$')
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries |= {capture.group(1)}
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap around parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse a list of allowed BPF_* enum members, for example:
+
+ const bool prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = true,
+ [BPF_PROG_TYPE_SOCKET_FILTER] = true,
+ [BPF_PROG_TYPE_KPROBE] = true,
+ };
+
+ Return a set of the enum members, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile(r'^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def make_enum_map(self, names, enum_prefix):
+ """
+ Search for and parse an enum containing BPF_* members, just as get_enum
+ does. However, instead of just returning a set of the variant names,
+ also generate a textual representation from them by (assuming and)
+ removing a provided prefix and lowercasing the remainder. Then return a
+ dict mapping from name to textual representation.
+
+ @enum_values: a set of enum values; e.g., as retrieved by get_enum
+ @enum_prefix: the prefix to remove from each of the variants to infer
+ textual representation
+ """
+ mapping = {}
+ for name in names:
+ if not name.startswith(enum_prefix):
+ raise Exception(f"enum variant {name} does not start with {enum_prefix}")
+ text = name[len(enum_prefix):].lower()
+ mapping[name] = text
+
+ return mapping
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(fr'\*{block_name}\* := {{')
+ pattern = re.compile(r'\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(fr'"\s*{block_name} := {{')
+ pattern = re.compile(r'([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(fr'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(fr'local {block_name}=\'')
+ pattern = re.compile(r'(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_help_list_macro('HELP_SPEC_OPTIONS')
+
+class MainHeaderFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's main.h
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'main.h')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in main.h (options that apply to all
+ commands), which looks to the lists of options in other source files
+ but has different start and end markers:
+
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
+ """
+ start_marker = re.compile(f'"OPTIONS :=')
+ pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]"])')
+ end_marker = re.compile('#define')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+class ManSubstitutionsExtractor(SourceFileExtractor):
+ """
+ An extractor for substitutions.rst
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in substitutions.rst (options that
+ apply to all commands).
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
+ """
+ start_marker = re.compile(r'\|COMMON_OPTIONS\| replace:: {')
+ pattern = re.compile(r'\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}$')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_attach_types(self):
+ types = self.get_types_from_array('attach_types')
+ return self.make_enum_map(types, 'BPF_')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_type_map(self):
+ names = self.get_enum('bpf_map_type')
+ return self.make_enum_map(names, 'BPF_MAP_TYPE_')
+
+ def get_attach_type_map(self):
+ if not self.attach_types:
+ names = self.get_enum('bpf_attach_type')
+ self.attach_types = self.make_enum_map(names, 'BPF_')
+ return self.attach_types
+
+ def get_cgroup_attach_type_map(self):
+ if not self.attach_types:
+ self.get_attach_type_map()
+ return {name: text for name, text in self.attach_types.items()
+ if name.startswith('BPF_CGROUP')}
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ bpf_info = BpfHeaderExtractor()
+
+ # Map types (names)
+
+ map_info = MapFileExtractor()
+ source_map_types = set(bpf_info.get_map_type_map().values())
+ source_map_types.discard('unspec')
+
+ # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE
+ # share the same enum value and source_map_types picks
+ # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated.
+ # Replace 'cgroup_storage_deprecated' with 'cgroup_storage'
+ # so it aligns with what `bpftool map help` shows.
+ source_map_types.remove('cgroup_storage_deprecated')
+ source_map_types.add('cgroup_storage')
+
+ # The same applied to BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE which share the same enum value
+ # and source_map_types picks
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED/percpu_cgroup_storage_deprecated.
+ # Replace 'percpu_cgroup_storage_deprecated' with 'percpu_cgroup_storage'
+ # so it aligns with what `bpftool map help` shows.
+ source_map_types.remove('percpu_cgroup_storage_deprecated')
+ source_map_types.add('percpu_cgroup_storage')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+
+ # Attach types (names)
+
+ prog_info = ProgFileExtractor()
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+ bashcomp_info.close()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+ source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values())
+ bpf_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ # Compare common options (options that apply to all commands)
+
+ main_hdr_info = MainHeaderFileExtractor()
+ source_common_options = main_hdr_info.get_common_options()
+ main_hdr_info.close()
+
+ man_substitutions = ManSubstitutionsExtractor()
+ man_common_options = man_substitutions.get_common_options()
+ man_substitutions.close()
+
+ verify(source_common_options, man_common_options,
+ f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..e65889ab4adf 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -4,6 +4,8 @@
#ifndef _TEST_BTF_H
#define _TEST_BTF_H
+#define BTF_END_RAW 0xdeadbeef
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -39,6 +41,7 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
+#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32)
#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
((bitfield_size) << 24 | (bits_offset))
@@ -66,4 +69,19 @@
#define BTF_FUNC_ENC(name, func_proto) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
+#define BTF_DECL_ATTR_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx)
+
+#define BTF_DECL_TAG_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+
+#define BTF_TYPE_ATTR_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type)
+
+#define BTF_TYPE_TAG_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
+
#endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
deleted file mode 100644
index 0cda61da5d39..000000000000
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ /dev/null
@@ -1,170 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <assert.h>
-#include <bpf/bpf.h>
-#include <linux/filter.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/sysinfo.h>
-
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
-
-int main(int argc, char **argv)
-{
- struct bpf_insn prog[] = {
- BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */
- BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
- BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_local_storage),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
- int error = EXIT_FAILURE;
- int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
- struct bpf_cgroup_storage_key key;
- unsigned long long value;
- unsigned long long *percpu_value;
- int cpu, nproc;
-
- nproc = get_nprocs_conf();
- percpu_value = malloc(sizeof(*percpu_value) * nproc);
- if (!percpu_value) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
- sizeof(value), 0, 0);
- if (map_fd < 0) {
- printf("Failed to create map: %s\n", strerror(errno));
- goto out;
- }
-
- percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(key), sizeof(value), 0, 0);
- if (percpu_map_fd < 0) {
- printf("Failed to create map: %s\n", strerror(errno));
- goto out;
- }
-
- prog[0].imm = percpu_map_fd;
- prog[7].imm = map_fd;
- prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (prog_fd < 0) {
- printf("Failed to load bpf program: %s\n", bpf_log_buf);
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-
- /* Attach the bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program\n");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get the first key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &value)) {
- printf("Failed to lookup cgroup storage 0\n");
- goto err;
- }
-
- for (cpu = 0; cpu < nproc; cpu++)
- percpu_value[cpu] = 1000;
-
- if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) {
- printf("Failed to update the data in the cgroup storage\n");
- goto err;
- }
-
- /* Every second packet should be dropped */
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
-
- /* Check the counter in the cgroup local storage */
- if (bpf_map_lookup_elem(map_fd, &key, &value)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (value != 3) {
- printf("Unexpected data in the cgroup storage: %llu\n", value);
- goto err;
- }
-
- /* Bump the counter in the cgroup local storage */
- value++;
- if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
- printf("Failed to update the data in the cgroup storage\n");
- goto err;
- }
-
- /* Every second packet should be dropped */
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
- assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
-
- /* Check the final value of the counter in the cgroup local storage */
- if (bpf_map_lookup_elem(map_fd, &key, &value)) {
- printf("Failed to lookup the cgroup storage\n");
- goto err;
- }
-
- if (value != 7) {
- printf("Unexpected data in the cgroup storage: %llu\n", value);
- goto err;
- }
-
- /* Check the final value of the counter in the percpu local storage */
-
- for (cpu = 0; cpu < nproc; cpu++)
- percpu_value[cpu] = 0;
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) {
- printf("Failed to lookup the per-cpu cgroup storage\n");
- goto err;
- }
-
- value = 0;
- for (cpu = 0; cpu < nproc; cpu++)
- value += percpu_value[cpu];
-
- if (value != nproc * 1000 + 6) {
- printf("Unexpected data in the per-cpu cgroup storage\n");
- goto err;
- }
-
- error = 0;
- printf("test_cgroup_storage:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_value);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index a8d2e9a87fbf..abc2a56ab261 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,15 +1,113 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#include <iostream>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
+
+#ifndef _Bool
+#define _Bool bool
+#endif
#include "test_core_extern.skel.h"
+#include "struct_ops_module.skel.h"
+
+template <typename T>
+class Skeleton {
+private:
+ T *skel;
+public:
+ Skeleton(): skel(nullptr) { }
+
+ ~Skeleton() { if (skel) T::destroy(skel); }
+
+ int open(const struct bpf_object_open_opts *opts = nullptr)
+ {
+ int err;
+
+ if (skel)
+ return -EBUSY;
+
+ skel = T::open(opts);
+ err = libbpf_get_error(skel);
+ if (err) {
+ skel = nullptr;
+ return err;
+ }
+
+ return 0;
+ }
+
+ int load() { return T::load(skel); }
+
+ int attach() { return T::attach(skel); }
+
+ void detach() { return T::detach(skel); }
+
+ const T* operator->() const { return skel; }
+
+ T* operator->() { return skel; }
+
+ const T *get() const { return skel; }
+};
+
+static void dump_printf(void *ctx, const char *fmt, va_list args)
+{
+}
+
+static void try_skeleton_template()
+{
+ Skeleton<test_core_extern> skel;
+ std::string prog_name;
+ int err;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ err = skel.open(&opts);
+ if (err) {
+ fprintf(stderr, "Skeleton open failed: %d\n", err);
+ return;
+ }
+
+ skel->data->kern_ver = 123;
+ skel->data->int_val = skel->data->ushort_val;
+
+ err = skel.load();
+ if (err) {
+ fprintf(stderr, "Skeleton load failed: %d\n", err);
+ return;
+ }
+
+ if (!skel->kconfig->CONFIG_BPF_SYSCALL)
+ fprintf(stderr, "Seems like CONFIG_BPF_SYSCALL isn't set?!\n");
+
+ err = skel.attach();
+ if (err) {
+ fprintf(stderr, "Skeleton attach failed: %d\n", err);
+ return;
+ }
-/* do nothing, just make sure we can link successfully */
+ prog_name = bpf_program__name(skel->progs.handle_sys_enter);
+ if (prog_name != "handle_sys_enter")
+ fprintf(stderr, "Unexpected program name: %s\n", prog_name.c_str());
+
+ bpf_link__destroy(skel->links.handle_sys_enter);
+ skel->links.handle_sys_enter = bpf_program__attach(skel->progs.handle_sys_enter);
+
+ skel.detach();
+
+ /* destructor will destroy underlying skeleton */
+}
int main(int argc, char *argv[])
{
+ struct btf_dump_opts opts = { };
struct test_core_extern *skel;
+ struct struct_ops_module *skel2;
+ struct btf *btf;
+ int fd;
+
+ try_skeleton_template();
/* libbpf.h */
libbpf_set_print(NULL);
@@ -18,12 +116,23 @@ int main(int argc, char *argv[])
bpf_prog_get_fd_by_id(0);
/* btf.h */
- btf__new(NULL, 0);
+ btf = btf__new(NULL, 0);
+ if (!libbpf_get_error(btf))
+ btf_dump__new(btf, dump_printf, nullptr, &opts);
/* BPF skeleton */
skel = test_core_extern__open_and_load();
test_core_extern__destroy(skel);
+ skel2 = struct_ops_module__open_and_load();
+ struct_ops_module__destroy(skel2);
+
+ fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (fd < 0)
+ std::cout << "FAILED to enable stats: " << fd << std::endl;
+ else
+ ::close(fd);
+
std::cout << "DONE!" << std::endl;
return 0;
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
deleted file mode 100644
index 804dddd97d4c..000000000000
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2017 Facebook
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-#define DEV_CGROUP_PROG "./dev_cgroup.o"
-
-#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
-
-int main(int argc, char **argv)
-{
- struct bpf_object *obj;
- int error = EXIT_FAILURE;
- int prog_fd, cgroup_fd;
- __u32 prog_cnt;
-
- if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
- &obj, &prog_fd)) {
- printf("Failed to load DEV_CGROUP program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto out;
- }
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
- printf("Failed to attach DEV_CGROUP program");
- goto err;
- }
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- /* All operations with /dev/zero and and /dev/urandom are allowed,
- * everything else is forbidden.
- */
- assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
- assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
- assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
-
- /* /dev/zero is whitelisted */
- assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
- assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
- assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
-
- assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
-
- /* src is allowed, target is forbidden */
- assert(system("dd if=/dev/urandom of=/dev/full count=64"));
-
- /* src is forbidden, target is allowed */
- assert(system("dd if=/dev/random of=/dev/zero count=64"));
-
- error = 0;
- printf("test_dev_cgroup:PASS\n");
-
-err:
- cleanup_cgroup_environment();
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..679cf968c7d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
+cd $KDIR_ROOT_DIR
+
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
+for tgt in docs docs-clean; do
+ make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
deleted file mode 100644
index 571cc076dd7d..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.c
+++ /dev/null
@@ -1,780 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Inject packets with all sorts of encapsulation into the kernel.
- *
- * IPv4/IPv6 outer layer 3
- * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
- * IPv4/IPv6 inner layer 3
- */
-
-#define _GNU_SOURCE
-
-#include <stddef.h>
-#include <arpa/inet.h>
-#include <asm/byteorder.h>
-#include <error.h>
-#include <errno.h>
-#include <linux/if_packet.h>
-#include <linux/if_ether.h>
-#include <linux/ipv6.h>
-#include <netinet/ip.h>
-#include <netinet/in.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define CFG_PORT_INNER 8000
-
-/* Add some protocol definitions that do not exist in userspace */
-
-struct grehdr {
- uint16_t unused;
- uint16_t protocol;
-} __attribute__((packed));
-
-struct guehdr {
- union {
- struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 hlen:5,
- control:1,
- version:2;
-#elif defined (__BIG_ENDIAN_BITFIELD)
- __u8 version:2,
- control:1,
- hlen:5;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
- __u8 proto_ctype;
- __be16 flags;
- };
- __be32 word;
- };
-};
-
-static uint8_t cfg_dsfield_inner;
-static uint8_t cfg_dsfield_outer;
-static uint8_t cfg_encap_proto;
-static bool cfg_expect_failure = false;
-static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */
-static int cfg_l3_inner = AF_UNSPEC;
-static int cfg_l3_outer = AF_UNSPEC;
-static int cfg_num_pkt = 10;
-static int cfg_num_secs = 0;
-static char cfg_payload_char = 'a';
-static int cfg_payload_len = 100;
-static int cfg_port_gue = 6080;
-static bool cfg_only_rx;
-static bool cfg_only_tx;
-static int cfg_src_port = 9;
-
-static char buf[ETH_DATA_LEN];
-
-#define INIT_ADDR4(name, addr4, port) \
- static struct sockaddr_in name = { \
- .sin_family = AF_INET, \
- .sin_port = __constant_htons(port), \
- .sin_addr.s_addr = __constant_htonl(addr4), \
- };
-
-#define INIT_ADDR6(name, addr6, port) \
- static struct sockaddr_in6 name = { \
- .sin6_family = AF_INET6, \
- .sin6_port = __constant_htons(port), \
- .sin6_addr = addr6, \
- };
-
-INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
-INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
-INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
-INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
-
-INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
-INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-
-static unsigned long util_gettime(void)
-{
- struct timeval tv;
-
- gettimeofday(&tv, NULL);
- return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
-}
-
-static void util_printaddr(const char *msg, struct sockaddr *addr)
-{
- unsigned long off = 0;
- char nbuf[INET6_ADDRSTRLEN];
-
- switch (addr->sa_family) {
- case PF_INET:
- off = __builtin_offsetof(struct sockaddr_in, sin_addr);
- break;
- case PF_INET6:
- off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
- break;
- default:
- error(1, 0, "printaddr: unsupported family %u\n",
- addr->sa_family);
- }
-
- if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
- sizeof(nbuf)))
- error(1, errno, "inet_ntop");
-
- fprintf(stderr, "%s: %s\n", msg, nbuf);
-}
-
-static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
-{
- unsigned long sum = 0;
- int i;
-
- for (i = 0; i < num_u16; i++)
- sum += start[i];
-
- return sum;
-}
-
-static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
- unsigned long sum)
-{
- sum += add_csum_hword(start, num_u16);
-
- while (sum >> 16)
- sum = (sum & 0xffff) + (sum >> 16);
-
- return ~sum;
-}
-
-static void build_ipv4_header(void *header, uint8_t proto,
- uint32_t src, uint32_t dst,
- int payload_len, uint8_t tos)
-{
- struct iphdr *iph = header;
-
- iph->ihl = 5;
- iph->version = 4;
- iph->tos = tos;
- iph->ttl = 8;
- iph->tot_len = htons(sizeof(*iph) + payload_len);
- iph->id = htons(1337);
- iph->protocol = proto;
- iph->saddr = src;
- iph->daddr = dst;
- iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
-}
-
-static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
-{
- uint16_t val, *ptr = (uint16_t *)ip6h;
-
- val = ntohs(*ptr);
- val &= 0xF00F;
- val |= ((uint16_t) dsfield) << 4;
- *ptr = htons(val);
-}
-
-static void build_ipv6_header(void *header, uint8_t proto,
- struct sockaddr_in6 *src,
- struct sockaddr_in6 *dst,
- int payload_len, uint8_t dsfield)
-{
- struct ipv6hdr *ip6h = header;
-
- ip6h->version = 6;
- ip6h->payload_len = htons(payload_len);
- ip6h->nexthdr = proto;
- ip6h->hop_limit = 8;
- ipv6_set_dsfield(ip6h, dsfield);
-
- memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
- memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
-}
-
-static uint16_t build_udp_v4_csum(const struct iphdr *iph,
- const struct udphdr *udph,
- int num_words)
-{
- unsigned long pseudo_sum;
- int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */
-
- pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
- pseudo_sum += htons(IPPROTO_UDP);
- pseudo_sum += udph->len;
- return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
- const struct udphdr *udph,
- int num_words)
-{
- unsigned long pseudo_sum;
- int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */
-
- pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
- pseudo_sum += htons(ip6h->nexthdr);
- pseudo_sum += ip6h->payload_len;
- return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static void build_udp_header(void *header, int payload_len,
- uint16_t dport, int family)
-{
- struct udphdr *udph = header;
- int len = sizeof(*udph) + payload_len;
-
- udph->source = htons(cfg_src_port);
- udph->dest = htons(dport);
- udph->len = htons(len);
- udph->check = 0;
- if (family == AF_INET)
- udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
- udph, len >> 1);
- else
- udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
- udph, len >> 1);
-}
-
-static void build_gue_header(void *header, uint8_t proto)
-{
- struct guehdr *gueh = header;
-
- gueh->proto_ctype = proto;
-}
-
-static void build_gre_header(void *header, uint16_t proto)
-{
- struct grehdr *greh = header;
-
- greh->protocol = htons(proto);
-}
-
-static int l3_length(int family)
-{
- if (family == AF_INET)
- return sizeof(struct iphdr);
- else
- return sizeof(struct ipv6hdr);
-}
-
-static int build_packet(void)
-{
- int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
- int el3_len = 0;
-
- if (cfg_l3_extra)
- el3_len = l3_length(cfg_l3_extra);
-
- /* calculate header offsets */
- if (cfg_encap_proto) {
- ol3_len = l3_length(cfg_l3_outer);
-
- if (cfg_encap_proto == IPPROTO_GRE)
- ol4_len = sizeof(struct grehdr);
- else if (cfg_encap_proto == IPPROTO_UDP)
- ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
- }
-
- il3_len = l3_length(cfg_l3_inner);
- il4_len = sizeof(struct udphdr);
-
- if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
- sizeof(buf))
- error(1, 0, "packet too large\n");
-
- /*
- * Fill packet from inside out, to calculate correct checksums.
- * But create ip before udp headers, as udp uses ip for pseudo-sum.
- */
- memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
- cfg_payload_char, cfg_payload_len);
-
- /* add zero byte for udp csum padding */
- buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
-
- switch (cfg_l3_inner) {
- case PF_INET:
- build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
- IPPROTO_UDP,
- in_saddr4.sin_addr.s_addr,
- in_daddr4.sin_addr.s_addr,
- il4_len + cfg_payload_len,
- cfg_dsfield_inner);
- break;
- case PF_INET6:
- build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
- IPPROTO_UDP,
- &in_saddr6, &in_daddr6,
- il4_len + cfg_payload_len,
- cfg_dsfield_inner);
- break;
- }
-
- build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
- cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
-
- if (!cfg_encap_proto)
- return il3_len + il4_len + cfg_payload_len;
-
- switch (cfg_l3_outer) {
- case PF_INET:
- build_ipv4_header(buf + el3_len, cfg_encap_proto,
- out_saddr4.sin_addr.s_addr,
- out_daddr4.sin_addr.s_addr,
- ol4_len + il3_len + il4_len + cfg_payload_len,
- cfg_dsfield_outer);
- break;
- case PF_INET6:
- build_ipv6_header(buf + el3_len, cfg_encap_proto,
- &out_saddr6, &out_daddr6,
- ol4_len + il3_len + il4_len + cfg_payload_len,
- cfg_dsfield_outer);
- break;
- }
-
- switch (cfg_encap_proto) {
- case IPPROTO_UDP:
- build_gue_header(buf + el3_len + ol3_len + ol4_len -
- sizeof(struct guehdr),
- cfg_l3_inner == PF_INET ? IPPROTO_IPIP
- : IPPROTO_IPV6);
- build_udp_header(buf + el3_len + ol3_len,
- sizeof(struct guehdr) + il3_len + il4_len +
- cfg_payload_len,
- cfg_port_gue, cfg_l3_outer);
- break;
- case IPPROTO_GRE:
- build_gre_header(buf + el3_len + ol3_len,
- cfg_l3_inner == PF_INET ? ETH_P_IP
- : ETH_P_IPV6);
- break;
- }
-
- switch (cfg_l3_extra) {
- case PF_INET:
- build_ipv4_header(buf,
- cfg_l3_outer == PF_INET ? IPPROTO_IPIP
- : IPPROTO_IPV6,
- extra_saddr4.sin_addr.s_addr,
- extra_daddr4.sin_addr.s_addr,
- ol3_len + ol4_len + il3_len + il4_len +
- cfg_payload_len, 0);
- break;
- case PF_INET6:
- build_ipv6_header(buf,
- cfg_l3_outer == PF_INET ? IPPROTO_IPIP
- : IPPROTO_IPV6,
- &extra_saddr6, &extra_daddr6,
- ol3_len + ol4_len + il3_len + il4_len +
- cfg_payload_len, 0);
- break;
- }
-
- return el3_len + ol3_len + ol4_len + il3_len + il4_len +
- cfg_payload_len;
-}
-
-/* sender transmits encapsulated over RAW or unencap'd over UDP */
-static int setup_tx(void)
-{
- int family, fd, ret;
-
- if (cfg_l3_extra)
- family = cfg_l3_extra;
- else if (cfg_l3_outer)
- family = cfg_l3_outer;
- else
- family = cfg_l3_inner;
-
- fd = socket(family, SOCK_RAW, IPPROTO_RAW);
- if (fd == -1)
- error(1, errno, "socket tx");
-
- if (cfg_l3_extra) {
- if (cfg_l3_extra == PF_INET)
- ret = connect(fd, (void *) &extra_daddr4,
- sizeof(extra_daddr4));
- else
- ret = connect(fd, (void *) &extra_daddr6,
- sizeof(extra_daddr6));
- if (ret)
- error(1, errno, "connect tx");
- } else if (cfg_l3_outer) {
- /* connect to destination if not encapsulated */
- if (cfg_l3_outer == PF_INET)
- ret = connect(fd, (void *) &out_daddr4,
- sizeof(out_daddr4));
- else
- ret = connect(fd, (void *) &out_daddr6,
- sizeof(out_daddr6));
- if (ret)
- error(1, errno, "connect tx");
- } else {
- /* otherwise using loopback */
- if (cfg_l3_inner == PF_INET)
- ret = connect(fd, (void *) &in_daddr4,
- sizeof(in_daddr4));
- else
- ret = connect(fd, (void *) &in_daddr6,
- sizeof(in_daddr6));
- if (ret)
- error(1, errno, "connect tx");
- }
-
- return fd;
-}
-
-/* receiver reads unencapsulated UDP */
-static int setup_rx(void)
-{
- int fd, ret;
-
- fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
- if (fd == -1)
- error(1, errno, "socket rx");
-
- if (cfg_l3_inner == PF_INET)
- ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
- else
- ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
- if (ret)
- error(1, errno, "bind rx");
-
- return fd;
-}
-
-static int do_tx(int fd, const char *pkt, int len)
-{
- int ret;
-
- ret = write(fd, pkt, len);
- if (ret == -1)
- error(1, errno, "send");
- if (ret != len)
- error(1, errno, "send: len (%d < %d)\n", ret, len);
-
- return 1;
-}
-
-static int do_poll(int fd, short events, int timeout)
-{
- struct pollfd pfd;
- int ret;
-
- pfd.fd = fd;
- pfd.events = events;
-
- ret = poll(&pfd, 1, timeout);
- if (ret == -1)
- error(1, errno, "poll");
- if (ret && !(pfd.revents & POLLIN))
- error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
-
- return ret;
-}
-
-static int do_rx(int fd)
-{
- char rbuf;
- int ret, num = 0;
-
- while (1) {
- ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
- if (ret == -1 && errno == EAGAIN)
- break;
- if (ret == -1)
- error(1, errno, "recv");
- if (rbuf != cfg_payload_char)
- error(1, 0, "recv: payload mismatch");
- num++;
- }
-
- return num;
-}
-
-static int do_main(void)
-{
- unsigned long tstop, treport, tcur;
- int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
-
- if (!cfg_only_tx)
- fdr = setup_rx();
- if (!cfg_only_rx)
- fdt = setup_tx();
-
- len = build_packet();
-
- tcur = util_gettime();
- treport = tcur + 1000;
- tstop = tcur + (cfg_num_secs * 1000);
-
- while (1) {
- if (!cfg_only_rx)
- tx += do_tx(fdt, buf, len);
-
- if (!cfg_only_tx)
- rx += do_rx(fdr);
-
- if (cfg_num_secs) {
- tcur = util_gettime();
- if (tcur >= tstop)
- break;
- if (tcur >= treport) {
- fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
- tx = 0;
- rx = 0;
- treport = tcur + 1000;
- }
- } else {
- if (tx == cfg_num_pkt)
- break;
- }
- }
-
- /* read straggler packets, if any */
- if (rx < tx) {
- tstop = util_gettime() + 100;
- while (rx < tx) {
- tcur = util_gettime();
- if (tcur >= tstop)
- break;
-
- do_poll(fdr, POLLIN, tstop - tcur);
- rx += do_rx(fdr);
- }
- }
-
- fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
-
- if (fdr != -1 && close(fdr))
- error(1, errno, "close rx");
- if (fdt != -1 && close(fdt))
- error(1, errno, "close tx");
-
- /*
- * success (== 0) only if received all packets
- * unless failure is expected, in which case none must arrive.
- */
- if (cfg_expect_failure)
- return rx != 0;
- else
- return rx != tx;
-}
-
-
-static void __attribute__((noreturn)) usage(const char *filepath)
-{
- fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
- "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
- "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
- "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
- filepath);
- exit(1);
-}
-
-static void parse_addr(int family, void *addr, const char *optarg)
-{
- int ret;
-
- ret = inet_pton(family, optarg, addr);
- if (ret == -1)
- error(1, errno, "inet_pton");
- if (ret == 0)
- error(1, 0, "inet_pton: bad string");
-}
-
-static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
-{
- parse_addr(AF_INET, &addr->sin_addr, optarg);
-}
-
-static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
-{
- parse_addr(AF_INET6, &addr->sin6_addr, optarg);
-}
-
-static int parse_protocol_family(const char *filepath, const char *optarg)
-{
- if (!strcmp(optarg, "4"))
- return PF_INET;
- if (!strcmp(optarg, "6"))
- return PF_INET6;
-
- usage(filepath);
-}
-
-static void parse_opts(int argc, char **argv)
-{
- int c;
-
- while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
- switch (c) {
- case 'd':
- if (cfg_l3_outer == AF_UNSPEC)
- error(1, 0, "-d must be preceded by -o");
- if (cfg_l3_outer == AF_INET)
- parse_addr4(&out_daddr4, optarg);
- else
- parse_addr6(&out_daddr6, optarg);
- break;
- case 'D':
- if (cfg_l3_inner == AF_UNSPEC)
- error(1, 0, "-D must be preceded by -i");
- if (cfg_l3_inner == AF_INET)
- parse_addr4(&in_daddr4, optarg);
- else
- parse_addr6(&in_daddr6, optarg);
- break;
- case 'e':
- if (!strcmp(optarg, "gre"))
- cfg_encap_proto = IPPROTO_GRE;
- else if (!strcmp(optarg, "gue"))
- cfg_encap_proto = IPPROTO_UDP;
- else if (!strcmp(optarg, "bare"))
- cfg_encap_proto = IPPROTO_IPIP;
- else if (!strcmp(optarg, "none"))
- cfg_encap_proto = IPPROTO_IP; /* == 0 */
- else
- usage(argv[0]);
- break;
- case 'f':
- cfg_src_port = strtol(optarg, NULL, 0);
- break;
- case 'F':
- cfg_expect_failure = true;
- break;
- case 'h':
- usage(argv[0]);
- break;
- case 'i':
- if (!strcmp(optarg, "4"))
- cfg_l3_inner = PF_INET;
- else if (!strcmp(optarg, "6"))
- cfg_l3_inner = PF_INET6;
- else
- usage(argv[0]);
- break;
- case 'l':
- cfg_payload_len = strtol(optarg, NULL, 0);
- break;
- case 'n':
- cfg_num_pkt = strtol(optarg, NULL, 0);
- break;
- case 'o':
- cfg_l3_outer = parse_protocol_family(argv[0], optarg);
- break;
- case 'O':
- cfg_l3_extra = parse_protocol_family(argv[0], optarg);
- break;
- case 'R':
- cfg_only_rx = true;
- break;
- case 's':
- if (cfg_l3_outer == AF_INET)
- parse_addr4(&out_saddr4, optarg);
- else
- parse_addr6(&out_saddr6, optarg);
- break;
- case 'S':
- if (cfg_l3_inner == AF_INET)
- parse_addr4(&in_saddr4, optarg);
- else
- parse_addr6(&in_saddr6, optarg);
- break;
- case 't':
- cfg_num_secs = strtol(optarg, NULL, 0);
- break;
- case 'T':
- cfg_only_tx = true;
- break;
- case 'x':
- cfg_dsfield_outer = strtol(optarg, NULL, 0);
- break;
- case 'X':
- cfg_dsfield_inner = strtol(optarg, NULL, 0);
- break;
- }
- }
-
- if (cfg_only_rx && cfg_only_tx)
- error(1, 0, "options: cannot combine rx-only and tx-only");
-
- if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
- error(1, 0, "options: must specify outer with encap");
- else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
- error(1, 0, "options: cannot combine no-encap and outer");
- else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
- error(1, 0, "options: cannot combine no-encap and extra");
-
- if (cfg_l3_inner == AF_UNSPEC)
- cfg_l3_inner = AF_INET6;
- if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
- cfg_encap_proto = IPPROTO_IPV6;
-
- /* RFC 6040 4.2:
- * on decap, if outer encountered congestion (CE == 0x3),
- * but inner cannot encode ECN (NoECT == 0x0), then drop packet.
- */
- if (((cfg_dsfield_outer & 0x3) == 0x3) &&
- ((cfg_dsfield_inner & 0x3) == 0x0))
- cfg_expect_failure = true;
-}
-
-static void print_opts(void)
-{
- if (cfg_l3_inner == PF_INET6) {
- util_printaddr("inner.dest6", (void *) &in_daddr6);
- util_printaddr("inner.source6", (void *) &in_saddr6);
- } else {
- util_printaddr("inner.dest4", (void *) &in_daddr4);
- util_printaddr("inner.source4", (void *) &in_saddr4);
- }
-
- if (!cfg_l3_outer)
- return;
-
- fprintf(stderr, "encap proto: %u\n", cfg_encap_proto);
-
- if (cfg_l3_outer == PF_INET6) {
- util_printaddr("outer.dest6", (void *) &out_daddr6);
- util_printaddr("outer.source6", (void *) &out_saddr6);
- } else {
- util_printaddr("outer.dest4", (void *) &out_daddr4);
- util_printaddr("outer.source4", (void *) &out_saddr4);
- }
-
- if (!cfg_l3_extra)
- return;
-
- if (cfg_l3_outer == PF_INET6) {
- util_printaddr("extra.dest6", (void *) &extra_daddr6);
- util_printaddr("extra.source6", (void *) &extra_saddr6);
- } else {
- util_printaddr("extra.dest4", (void *) &extra_daddr4);
- util_printaddr("extra.source4", (void *) &extra_saddr4);
- }
-
-}
-
-int main(int argc, char **argv)
-{
- parse_opts(argc, argv);
- print_opts();
- return do_main();
-}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
deleted file mode 100755
index 174b72a64a4c..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Load BPF flow dissector and verify it correctly dissects traffic
-export TESTNAME=test_flow_dissector
-unmount=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
- echo $msg please run this as root >&2
- exit $ksft_skip
-fi
-
-# This test needs to be run in a network namespace with in_netns.sh. Check if
-# this is the case and run it with in_netns.sh if it is being run in the root
-# namespace.
-if [[ -z $(ip netns identify $$) ]]; then
- err=0
- if bpftool="$(which bpftool)"; then
- echo "Testing global flow dissector..."
-
- $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
- type flow_dissector
-
- if ! unshare --net $bpftool prog attach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
- echo "Unexpected unsuccessful attach in namespace" >&2
- err=1
- fi
-
- $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
- flow_dissector
-
- if unshare --net $bpftool prog attach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
- echo "Unexpected successful attach in namespace" >&2
- err=1
- fi
-
- if ! $bpftool prog detach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
- echo "Failed to detach flow dissector" >&2
- err=1
- fi
-
- rm -rf /sys/fs/bpf/flow
- else
- echo "Skipping root flow dissector test, bpftool not found" >&2
- fi
-
- # Run the rest of the tests in a net namespace.
- ../net/in_netns.sh "$0" "$@"
- err=$(( $err + $? ))
-
- if (( $err == 0 )); then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "selftests: $TESTNAME [FAILED]";
- fi
-
- exit $err
-fi
-
-# Determine selftest success via shell exit code
-exit_handler()
-{
- set +e
-
- # Cleanup
- tc filter del dev lo ingress pref 1337 2> /dev/null
- tc qdisc del dev lo ingress 2> /dev/null
- ./flow_dissector_load -d 2> /dev/null
- if [ $unmount -ne 0 ]; then
- umount bpffs 2> /dev/null
- fi
-}
-
-# Exit script immediately (well catched by trap handler) if any
-# program/thing exits with a non-zero status.
-set -e
-
-# (Use 'trap -l' to list meaning of numbers)
-trap exit_handler 0 2 3 6 9
-
-# Mount BPF file system
-if /bin/mount | grep /sys/fs/bpf > /dev/null; then
- echo "bpffs already mounted"
-else
- echo "bpffs not mounted. Mounting..."
- unmount=1
- /bin/mount bpffs /sys/fs/bpf -t bpf
-fi
-
-# Attach BPF program
-./flow_dissector_load -p bpf_flow.o -s flow_dissector
-
-# Setup
-tc qdisc add dev lo ingress
-echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
-
-echo "Testing IPv4..."
-# Drops all IP/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
- udp src_port 9 action drop
-
-# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 4 -f 8
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 4 -f 10
-
-echo "Testing IPIP..."
-# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 10
-
-echo "Testing IPv4 + GRE..."
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
- -D 192.168.0.1 -S 1.1.1.1 -f 10
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing port range..."
-# Drops all IP/UDP packets coming from port 8-10
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
- udp src_port 8-10 action drop
-
-# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
-./test_flow_dissector -i 4 -f 7
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
-./test_flow_dissector -i 4 -f 11
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing IPv6..."
-# Drops all IPv6/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
- udp src_port 9 action drop
-
-# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 6 -f 8
-# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 6 -f 9 -F
-# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 6 -f 10
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh
index 20de7bb873bc..f5109eb0e951 100755
--- a/tools/testing/selftests/bpf/test_ftrace.sh
+++ b/tools/testing/selftests/bpf/test_ftrace.sh
@@ -1,6 +1,11 @@
#!/bin/bash
-TR=/sys/kernel/debug/tracing/
+if [[ -e /sys/kernel/tracing/trace ]]; then
+ TR=/sys/kernel/tracing/
+else
+ TR=/sys/kernel/debug/tracing/
+fi
+
clear_trace() { # reset trace output
echo > $TR/trace
}
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 4f6444bcd53f..50dca53ac536 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,6 +1,11 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Usage:
+# ./test_kmod.sh [module_param]...
+# Ex.: ./test_kmod.sh test_range=1,3
+# All the parameters are passed to the kernel module.
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -24,17 +29,18 @@ test_run()
sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
echo "[ JIT enabled:$1 hardened:$2 ]"
+ shift 2
dmesg -C
if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
- insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
+ insmod ${OUTPUT}/lib/test_bpf.ko "$@" 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi
else
# Use modprobe dry run to check for missing test_bpf module
- if ! /sbin/modprobe -q -n test_bpf; then
+ if ! /sbin/modprobe -q -n test_bpf "$@"; then
echo "test_bpf: [SKIP]"
- elif /sbin/modprobe -q test_bpf; then
+ elif /sbin/modprobe -q test_bpf "$@"; then
echo "test_bpf: ok"
else
echo "test_bpf: [FAIL]"
@@ -59,9 +65,9 @@ test_restore()
rc=0
test_save
-test_run 0 0
-test_run 1 0
-test_run 1 1
-test_run 1 2
+test_run 0 0 "$@"
+test_run 1 0 "$@"
+test_run 1 1 "$@"
+test_run 1 2 "$@"
test_restore
exit $rc
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/test_kmods/.gitignore
index ded513777281..ded513777281 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/.gitignore
+++ b/tools/testing/selftests/bpf/test_kmods/.gitignore
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
new file mode 100644
index 000000000000..63c4d3f6a12f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -0,0 +1,21 @@
+TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
+
+$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
+
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+ $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules
+
+clean:
+ $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
new file mode 100644
index 000000000000..0cc747fa912f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_test_modorder_retx(void)
+{
+ return 'x';
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_test_modorder_kfunc_x_ids)
+BTF_ID_FLAGS(func, bpf_test_modorder_retx);
+BTF_KFUNCS_END(bpf_test_modorder_kfunc_x_ids)
+
+static const struct btf_kfunc_id_set bpf_test_modorder_x_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_test_modorder_kfunc_x_ids,
+};
+
+static int __init bpf_test_modorder_x_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &bpf_test_modorder_x_set);
+}
+
+static void __exit bpf_test_modorder_x_exit(void)
+{
+}
+
+module_init(bpf_test_modorder_x_init);
+module_exit(bpf_test_modorder_x_exit);
+
+MODULE_DESCRIPTION("BPF selftest ordertest module X");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
new file mode 100644
index 000000000000..c627ee085d13
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_test_modorder_rety(void)
+{
+ return 'y';
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_test_modorder_kfunc_y_ids)
+BTF_ID_FLAGS(func, bpf_test_modorder_rety);
+BTF_KFUNCS_END(bpf_test_modorder_kfunc_y_ids)
+
+static const struct btf_kfunc_id_set bpf_test_modorder_y_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_test_modorder_kfunc_y_ids,
+};
+
+static int __init bpf_test_modorder_y_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &bpf_test_modorder_y_set);
+}
+
+static void __exit bpf_test_modorder_y_exit(void)
+{
+}
+
+module_init(bpf_test_modorder_y_init);
+module_exit(bpf_test_modorder_y_exit);
+
+MODULE_DESCRIPTION("BPF selftest ordertest module Y");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
new file mode 100644
index 000000000000..948eb3962732
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+struct bpf_test_no_cfi_ops {
+ void (*fn_1)(void);
+ void (*fn_2)(void);
+};
+
+static int dummy_init(struct btf *btf)
+{
+ return 0;
+}
+
+static int dummy_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static int dummy_reg(void *kdata, struct bpf_link *link)
+{
+ return 0;
+}
+
+static void dummy_unreg(void *kdata, struct bpf_link *link)
+{
+}
+
+static const struct bpf_verifier_ops dummy_verifier_ops;
+
+static void bpf_test_no_cfi_ops__fn_1(void)
+{
+}
+
+static void bpf_test_no_cfi_ops__fn_2(void)
+{
+}
+
+static struct bpf_test_no_cfi_ops __test_no_cif_ops = {
+ .fn_1 = bpf_test_no_cfi_ops__fn_1,
+ .fn_2 = bpf_test_no_cfi_ops__fn_2,
+};
+
+static struct bpf_struct_ops test_no_cif_ops = {
+ .verifier_ops = &dummy_verifier_ops,
+ .init = dummy_init,
+ .init_member = dummy_init_member,
+ .reg = dummy_reg,
+ .unreg = dummy_unreg,
+ .name = "bpf_test_no_cfi_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_test_no_cfi_init(void)
+{
+ int ret;
+
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ if (!ret)
+ return -EINVAL;
+
+ test_no_cif_ops.cfi_stubs = &__test_no_cif_ops;
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ return ret;
+}
+
+static void bpf_test_no_cfi_exit(void)
+{
+}
+
+module_init(bpf_test_no_cfi_init);
+module_exit(bpf_test_no_cfi_exit);
+
+MODULE_AUTHOR("Kuifeng Lee");
+MODULE_DESCRIPTION("BPF no cfi_stubs test module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
new file mode 100644
index 000000000000..7b4ae5e81d32
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/prandom.h>
+#include <linux/ktime.h>
+#include <asm/rqspinlock.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/slab.h>
+
+static struct perf_event_attr hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+ .sample_period = 100000,
+};
+
+static rqspinlock_t lock_a;
+static rqspinlock_t lock_b;
+static rqspinlock_t lock_c;
+
+#define RQSL_SLOW_THRESHOLD_MS 10
+static const unsigned int rqsl_hist_ms[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
+ 100, 150, 200, 250, 1000,
+};
+#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
+
+enum rqsl_context {
+ RQSL_CTX_NORMAL = 0,
+ RQSL_CTX_NMI,
+ RQSL_CTX_MAX,
+};
+
+struct rqsl_cpu_hist {
+ atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
+ atomic64_t success[RQSL_CTX_MAX];
+ atomic64_t failure[RQSL_CTX_MAX];
+};
+
+static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
+
+enum rqsl_mode {
+ RQSL_MODE_AA = 0,
+ RQSL_MODE_ABBA,
+ RQSL_MODE_ABBCCA,
+};
+
+static int test_mode = RQSL_MODE_AA;
+module_param(test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode,
+ "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
+
+static int normal_delay = 20;
+module_param(normal_delay, int, 0644);
+MODULE_PARM_DESC(normal_delay,
+ "rqspinlock critical section length for normal context (20ms default)");
+
+static int nmi_delay = 10;
+module_param(nmi_delay, int, 0644);
+MODULE_PARM_DESC(nmi_delay,
+ "rqspinlock critical section length for NMI context (10ms default)");
+
+static struct perf_event **rqsl_evts;
+static int rqsl_nevts;
+
+static struct task_struct **rqsl_threads;
+static int rqsl_nthreads;
+static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
+
+static int pause = 0;
+
+static const char *rqsl_mode_names[] = {
+ [RQSL_MODE_AA] = "AA",
+ [RQSL_MODE_ABBA] = "ABBA",
+ [RQSL_MODE_ABBCCA] = "ABBCCA",
+};
+
+struct rqsl_lock_pair {
+ rqspinlock_t *worker_lock;
+ rqspinlock_t *nmi_lock;
+};
+
+static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
+{
+ int mode = READ_ONCE(test_mode);
+
+ switch (mode) {
+ default:
+ case RQSL_MODE_AA:
+ return (struct rqsl_lock_pair){ &lock_a, &lock_a };
+ case RQSL_MODE_ABBA:
+ if (cpu & 1)
+ return (struct rqsl_lock_pair){ &lock_b, &lock_a };
+ return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+ case RQSL_MODE_ABBCCA:
+ switch (cpu % 3) {
+ case 0:
+ return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+ case 1:
+ return (struct rqsl_lock_pair){ &lock_b, &lock_c };
+ default:
+ return (struct rqsl_lock_pair){ &lock_c, &lock_a };
+ }
+ }
+}
+
+static u32 rqsl_hist_bucket_idx(u32 delta_ms)
+{
+ int i;
+
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ if (delta_ms <= rqsl_hist_ms[i])
+ return i;
+ }
+
+ return RQSL_NR_HIST_BUCKETS - 1;
+}
+
+static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
+{
+ struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
+ u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
+ u32 bucket = rqsl_hist_bucket_idx(delta_ms);
+ atomic64_t *buckets = hist->hist[ctx];
+
+ atomic64_inc(&buckets[bucket]);
+ if (!ret)
+ atomic64_inc(&hist->success[ctx]);
+ else
+ atomic64_inc(&hist->failure[ctx]);
+}
+
+static int rqspinlock_worker_fn(void *arg)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ u64 start_ns;
+ int ret;
+
+ if (cpu) {
+ atomic_inc(&rqsl_ready_cpus);
+
+ while (!kthread_should_stop()) {
+ struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu);
+ rqspinlock_t *worker_lock = locks.worker_lock;
+
+ if (READ_ONCE(pause)) {
+ msleep(1000);
+ continue;
+ }
+ start_ns = ktime_get_mono_fast_ns();
+ ret = raw_res_spin_lock_irqsave(worker_lock, flags);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NORMAL, ret);
+ mdelay(normal_delay);
+ if (!ret)
+ raw_res_spin_unlock_irqrestore(worker_lock, flags);
+ cpu_relax();
+ }
+ return 0;
+ }
+
+ while (!kthread_should_stop()) {
+ int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
+ int ready = atomic_read(&rqsl_ready_cpus);
+
+ if (ready == expected && !READ_ONCE(pause)) {
+ for (int i = 0; i < rqsl_nevts; i++)
+ perf_event_enable(rqsl_evts[i]);
+ pr_err("Waiting 5 secs to pause the test\n");
+ msleep(1000 * 5);
+ WRITE_ONCE(pause, 1);
+ pr_err("Paused the test\n");
+ } else {
+ msleep(1000);
+ cpu_relax();
+ }
+ }
+ return 0;
+}
+
+static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct rqsl_lock_pair locks;
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ u64 start_ns;
+ int ret;
+
+ if (!cpu || READ_ONCE(pause))
+ return;
+
+ locks = rqsl_get_lock_pair(cpu);
+ start_ns = ktime_get_mono_fast_ns();
+ ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NMI, ret);
+
+ mdelay(nmi_delay);
+
+ if (!ret)
+ raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
+}
+
+static void free_rqsl_threads(void)
+{
+ int i;
+
+ if (rqsl_threads) {
+ for_each_online_cpu(i) {
+ if (rqsl_threads[i])
+ kthread_stop(rqsl_threads[i]);
+ }
+ kfree(rqsl_threads);
+ }
+}
+
+static void free_rqsl_evts(void)
+{
+ int i;
+
+ if (rqsl_evts) {
+ for (i = 0; i < rqsl_nevts; i++) {
+ if (rqsl_evts[i])
+ perf_event_release_kernel(rqsl_evts[i]);
+ }
+ kfree(rqsl_evts);
+ }
+}
+
+static int bpf_test_rqspinlock_init(void)
+{
+ int i, ret;
+ int ncpus = num_online_cpus();
+
+ if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) {
+ pr_err("Invalid mode %d\n", test_mode);
+ return -EINVAL;
+ }
+
+ pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
+
+ if (ncpus < test_mode + 2)
+ return -ENOTSUPP;
+
+ raw_res_spin_lock_init(&lock_a);
+ raw_res_spin_lock_init(&lock_b);
+ raw_res_spin_lock_init(&lock_c);
+
+ rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
+ if (!rqsl_evts)
+ return -ENOMEM;
+ rqsl_nevts = ncpus - 1;
+
+ for (i = 1; i < ncpus; i++) {
+ struct perf_event *e;
+
+ e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
+ if (IS_ERR(e)) {
+ ret = PTR_ERR(e);
+ goto err_perf_events;
+ }
+ rqsl_evts[i - 1] = e;
+ }
+
+ rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
+ if (!rqsl_threads) {
+ ret = -ENOMEM;
+ goto err_perf_events;
+ }
+ rqsl_nthreads = ncpus;
+
+ for_each_online_cpu(i) {
+ struct task_struct *t;
+
+ t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto err_threads_create;
+ }
+ kthread_bind(t, i);
+ rqsl_threads[i] = t;
+ wake_up_process(t);
+ }
+ return 0;
+
+err_threads_create:
+ free_rqsl_threads();
+err_perf_events:
+ free_rqsl_evts();
+ return ret;
+}
+
+module_init(bpf_test_rqspinlock_init);
+
+static void rqsl_print_histograms(void)
+{
+ int cpu, i;
+
+ pr_err("rqspinlock acquisition latency histogram (ms):\n");
+
+ for_each_online_cpu(cpu) {
+ struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
+ u64 norm_counts[RQSL_NR_HIST_BUCKETS];
+ u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
+ u64 total_counts[RQSL_NR_HIST_BUCKETS];
+ u64 norm_success, nmi_success, success_total;
+ u64 norm_failure, nmi_failure, failure_total;
+ u64 norm_total = 0, nmi_total = 0, total = 0;
+ bool has_slow = false;
+
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
+ nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
+ total_counts[i] = norm_counts[i] + nmi_counts[i];
+ norm_total += norm_counts[i];
+ nmi_total += nmi_counts[i];
+ total += total_counts[i];
+ if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
+ total_counts[i])
+ has_slow = true;
+ }
+
+ norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
+ nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
+ norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
+ nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
+ success_total = norm_success + nmi_success;
+ failure_total = norm_failure + nmi_failure;
+
+ if (!total)
+ continue;
+
+ if (!has_slow) {
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure,
+ RQSL_SLOW_THRESHOLD_MS);
+ continue;
+ }
+
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu)\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure);
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ unsigned int start_ms;
+
+ if (!total_counts[i])
+ continue;
+
+ start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
+ if (i == RQSL_NR_HIST_BUCKETS - 1) {
+ pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n",
+ start_ms, total_counts[i],
+ norm_counts[i], nmi_counts[i]);
+ } else {
+ pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n",
+ start_ms, rqsl_hist_ms[i],
+ total_counts[i],
+ norm_counts[i], nmi_counts[i]);
+ }
+ }
+ }
+}
+
+static void bpf_test_rqspinlock_exit(void)
+{
+ WRITE_ONCE(pause, 1);
+ free_rqsl_threads();
+ free_rqsl_evts();
+ rqsl_print_histograms();
+}
+
+module_exit(bpf_test_rqspinlock_exit);
+
+MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
+MODULE_DESCRIPTION("BPF rqspinlock stress test module");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
index 89c6d58e5dd6..aeef86b3da74 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
@@ -34,6 +34,35 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
TP_ARGS(task, ctx)
);
+/* Used in bpf_testmod_test_read() to test __nullable suffix */
+DECLARE_TRACE(bpf_testmod_test_nullable_bare,
+ TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable),
+ TP_ARGS(ctx__nullable)
+);
+
+struct sk_buff;
+
+DECLARE_TRACE(bpf_testmod_test_raw_tp_null,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb)
+);
+
+
+#undef BPF_TESTMOD_DECLARE_TRACE
+#ifdef DECLARE_TRACE_WRITABLE
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+ DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size)
+#else
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+ DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare,
+ TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx),
+ TP_ARGS(ctx),
+ sizeof(struct bpf_testmod_test_writable_ctx)
+);
+
#endif /* _BPF_TESTMOD_EVENTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
new file mode 100644
index 000000000000..1669a7eeda26
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -0,0 +1,1787 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <linux/delay.h>
+#include <linux/error-injection.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu-defs.h>
+#include <linux/sysfs.h>
+#include <linux/tracepoint.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/nsproxy.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/un.h>
+#include <linux/filter.h>
+#include <net/sock.h>
+#include <linux/namei.h>
+#include "bpf_testmod.h"
+#include "bpf_testmod_kfunc.h"
+
+#define CREATE_TRACE_POINTS
+#include "bpf_testmod-events.h"
+
+#define CONNECT_TIMEOUT_SEC 1
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
+long bpf_testmod_test_struct_arg_result;
+static DEFINE_MUTEX(sock_lock);
+static struct socket *sock;
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+struct bpf_testmod_struct_arg_2 {
+ long a;
+ long b;
+};
+
+struct bpf_testmod_struct_arg_3 {
+ int a;
+ int b[];
+};
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
+__bpf_hook_start();
+
+noinline int
+bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
+ bpf_testmod_test_struct_arg_result = a.a + a.b + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) {
+ bpf_testmod_test_struct_arg_result = a + b.a + b.b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) {
+ bpf_testmod_test_struct_arg_result = a + b + c.a + c.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b,
+ int c, int d, struct bpf_testmod_struct_arg_2 e) {
+ bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_5(void) {
+ bpf_testmod_test_struct_arg_result = 1;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) {
+ bpf_testmod_test_struct_arg_result = a->b[0];
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f, int g)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b + g;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
+ short g, struct bpf_testmod_struct_arg_5 h, long i)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e +
+ f + g + h.a + h.b + h.c + h.d + i;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c)
+{
+ bpf_testmod_test_struct_arg_result = a.arg.a + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b)
+{
+ bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
+ bpf_testmod_test_struct_arg_result = a->a;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
+__bpf_kfunc void
+bpf_testmod_test_mod_kfunc(int i)
+{
+ *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
+}
+
+__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt)
+{
+ it->cnt = cnt;
+
+ if (cnt < 0)
+ return -EINVAL;
+
+ it->value = value;
+
+ return 0;
+}
+
+__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it)
+{
+ if (it->cnt <= 0)
+ return NULL;
+
+ it->cnt--;
+
+ return &it->value;
+}
+
+__bpf_kfunc s64 bpf_iter_testmod_seq_value(int val, struct bpf_iter_testmod_seq* it__iter)
+{
+ if (it__iter->cnt < 0)
+ return 0;
+
+ return val + it__iter->value;
+}
+
+__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
+{
+ it->cnt = 0;
+}
+
+__bpf_kfunc void bpf_kfunc_common_test(void)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr,
+ struct bpf_dynptr *ptr__nullable)
+{
+}
+
+__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr)
+{
+ return NULL;
+}
+
+__bpf_kfunc struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr)
+{
+ return NULL;
+}
+
+__bpf_kfunc void bpf_kfunc_nested_release_test(struct sk_buff *ptr)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_trusted_task_test(struct task_struct *ptr)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_trusted_num_test(int *ptr)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
+{
+}
+
+__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void)
+{
+ return NULL;
+}
+
+__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
+{
+ return NULL;
+}
+
+__bpf_kfunc struct bpf_testmod_ctx *
+bpf_testmod_ctx_create(int *err)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (!ctx) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+ refcount_set(&ctx->usage, 1);
+
+ return ctx;
+}
+
+static void testmod_free_cb(struct rcu_head *head)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = container_of(head, struct bpf_testmod_ctx, rcu);
+ kfree(ctx);
+}
+
+__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx)
+{
+ if (!ctx)
+ return;
+ if (refcount_dec_and_test(&ctx->usage))
+ call_rcu(&ctx->rcu, testmod_free_cb);
+}
+
+static struct bpf_testmod_ops3 *st_ops3;
+
+static int bpf_testmod_test_3(void)
+{
+ return 0;
+}
+
+static int bpf_testmod_test_4(void)
+{
+ return 0;
+}
+
+static struct bpf_testmod_ops3 __bpf_testmod_ops3 = {
+ .test_1 = bpf_testmod_test_3,
+ .test_2 = bpf_testmod_test_4,
+};
+
+static void bpf_testmod_test_struct_ops3(void)
+{
+ if (st_ops3)
+ st_ops3->test_1();
+}
+
+__bpf_kfunc void bpf_testmod_ops3_call_test_1(void)
+{
+ st_ops3->test_1();
+}
+
+__bpf_kfunc void bpf_testmod_ops3_call_test_2(void)
+{
+ st_ops3->test_2();
+}
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 __user *p;
+};
+
+struct bpf_testmod_btf_type_tag_3 {
+ struct bpf_testmod_btf_type_tag_1 __percpu *p;
+};
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+ BTF_TYPE_EMIT(func_proto_typedef);
+ BTF_TYPE_EMIT(func_proto_typedef_nested1);
+ BTF_TYPE_EMIT(func_proto_typedef_nested2);
+ return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
+ return arg->p->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) {
+ return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) {
+ return arg->p->a;
+}
+
+noinline int bpf_testmod_loop_test(int n)
+{
+ /* Make sum volatile, so smart compilers, such as clang, will not
+ * optimize the code by removing the loop.
+ */
+ volatile int sum = 0;
+ int i;
+
+ /* the primary goal of this test is to test LBR. Create a lot of
+ * branches in the function, so we can catch it easily.
+ */
+ for (i = 0; i < n; i++)
+ sum += i;
+ return sum;
+}
+
+__weak noinline struct file *bpf_testmod_return_ptr(int arg)
+{
+ static struct file f = {};
+
+ switch (arg) {
+ case 1: return (void *)EINVAL; /* user addr */
+ case 2: return (void *)0xcafe4a11; /* user addr */
+ case 3: return (void *)-EINVAL; /* canonical, but invalid */
+ case 4: return (void *)(1ull << 60); /* non-canonical and invalid */
+ case 5: return (void *)~(1ull << 30); /* trigger extable */
+ case 6: return &f; /* valid addr */
+ case 7: return (void *)((long)&f | 1); /* kernel tricks */
+#ifdef CONFIG_X86_64
+ case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */
+#endif
+ default: return NULL;
+ }
+}
+
+noinline int bpf_testmod_fentry_test1(int a)
+{
+ return a + 1;
+}
+
+noinline int bpf_testmod_fentry_test2(int a, u64 b)
+{
+ return a + b;
+}
+
+noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
+{
+ return a + b + c;
+}
+
+noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
+ void *e, char f, int g)
+{
+ return a + (long)b + c + d + (long)e + f + g;
+}
+
+noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
+ void *e, char f, int g,
+ unsigned int h, long i, __u64 j,
+ unsigned long k)
+{
+ return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
+}
+
+noinline void bpf_testmod_stacktrace_test(void)
+{
+ /* used for stacktrace test as attach function */
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_3(void)
+{
+ bpf_testmod_stacktrace_test();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_2(void)
+{
+ bpf_testmod_stacktrace_test_3();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_1(void)
+{
+ bpf_testmod_stacktrace_test_2();
+ asm volatile ("");
+}
+
+int bpf_testmod_fentry_ok;
+
+noinline ssize_t
+bpf_testmod_test_read(struct file *file, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ struct bpf_testmod_test_read_ctx ctx = {
+ .buf = buf,
+ .off = off,
+ .len = len,
+ };
+ struct bpf_testmod_struct_arg_1 struct_arg1 = {10}, struct_arg1_2 = {-1};
+ struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
+ struct bpf_testmod_struct_arg_3 *struct_arg3;
+ struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
+ struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
+ union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} };
+ union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} };
+ int i = 1;
+
+ while (bpf_testmod_return_ptr(i))
+ i++;
+
+ (void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4);
+ (void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4);
+ (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2);
+ (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2);
+ (void)bpf_testmod_test_struct_arg_5();
+ (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4);
+ (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4, 23);
+ (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, struct_arg5, 27);
+
+ (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5);
+ (void)bpf_testmod_test_union_arg_2(6, union_arg2);
+
+ (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
+
+ (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
+
+ bpf_testmod_test_struct_ops3();
+
+ struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) +
+ sizeof(int)), GFP_KERNEL);
+ if (struct_arg3 != NULL) {
+ struct_arg3->b[0] = 1;
+ (void)bpf_testmod_test_struct_arg_6(struct_arg3);
+ kfree(struct_arg3);
+ }
+
+ /* This is always true. Use the check to make sure the compiler
+ * doesn't remove bpf_testmod_loop_test.
+ */
+ if (bpf_testmod_loop_test(101) > 100)
+ trace_bpf_testmod_test_read(current, &ctx);
+
+ trace_bpf_testmod_test_nullable_bare_tp(NULL);
+
+ /* Magic number to enable writable tp */
+ if (len == 64) {
+ struct bpf_testmod_test_writable_ctx writable = {
+ .val = 1024,
+ };
+ trace_bpf_testmod_test_writable_bare_tp(&writable);
+ if (writable.early_ret)
+ return snprintf(buf, len, "%d\n", writable.val);
+ }
+
+ if (bpf_testmod_fentry_test1(1) != 2 ||
+ bpf_testmod_fentry_test2(2, 3) != 5 ||
+ bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
+ bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
+ 21, 22) != 133 ||
+ bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, 23, 24, 25, 26) != 231)
+ goto out;
+
+ bpf_testmod_stacktrace_test_1();
+
+ bpf_testmod_fentry_ok = 1;
+out:
+ return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_read);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
+
+noinline ssize_t
+bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ struct bpf_testmod_test_write_ctx ctx = {
+ .buf = buf,
+ .off = off,
+ .len = len,
+ };
+
+ trace_bpf_testmod_test_write_bare_tp(current, &ctx);
+
+ return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_write);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
+
+noinline int bpf_fentry_shadow_test(int a)
+{
+ return a + 2;
+}
+EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
+
+__bpf_hook_end();
+
+static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
+ .attr = { .name = "bpf_testmod", .mode = 0666, },
+ .read = bpf_testmod_test_read,
+ .write = bpf_testmod_test_write,
+};
+
+/* bpf_testmod_uprobe sysfs attribute is so far enabled for x86_64 only,
+ * please see test_uretprobe_regs_change test
+ */
+#ifdef __x86_64__
+
+static int
+uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data)
+{
+ regs->cx = 0x87654321feebdaed;
+ return 0;
+}
+
+static int
+uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
+ struct pt_regs *regs, __u64 *data)
+
+{
+ regs->ax = 0x12345678deadbeef;
+ regs->r11 = (u64) -1;
+ return 0;
+}
+
+struct testmod_uprobe {
+ struct path path;
+ struct uprobe *uprobe;
+ struct uprobe_consumer consumer;
+};
+
+static DEFINE_MUTEX(testmod_uprobe_mutex);
+
+static struct testmod_uprobe uprobe = {
+ .consumer.handler = uprobe_handler,
+ .consumer.ret_handler = uprobe_ret_handler,
+};
+
+static int testmod_register_uprobe(loff_t offset)
+{
+ int err = -EBUSY;
+
+ if (uprobe.uprobe)
+ return -EBUSY;
+
+ mutex_lock(&testmod_uprobe_mutex);
+
+ if (uprobe.uprobe)
+ goto out;
+
+ err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path);
+ if (err)
+ goto out;
+
+ uprobe.uprobe = uprobe_register(d_real_inode(uprobe.path.dentry),
+ offset, 0, &uprobe.consumer);
+ if (IS_ERR(uprobe.uprobe)) {
+ err = PTR_ERR(uprobe.uprobe);
+ path_put(&uprobe.path);
+ uprobe.uprobe = NULL;
+ }
+out:
+ mutex_unlock(&testmod_uprobe_mutex);
+ return err;
+}
+
+static void testmod_unregister_uprobe(void)
+{
+ mutex_lock(&testmod_uprobe_mutex);
+
+ if (uprobe.uprobe) {
+ uprobe_unregister_nosync(uprobe.uprobe, &uprobe.consumer);
+ uprobe_unregister_sync();
+ path_put(&uprobe.path);
+ uprobe.uprobe = NULL;
+ }
+
+ mutex_unlock(&testmod_uprobe_mutex);
+}
+
+static ssize_t
+bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ unsigned long offset = 0;
+ int err = 0;
+
+ if (kstrtoul(buf, 0, &offset))
+ return -EINVAL;
+
+ if (offset)
+ err = testmod_register_uprobe(offset);
+ else
+ testmod_unregister_uprobe();
+
+ return err ?: strlen(buf);
+}
+
+static struct bin_attribute bin_attr_bpf_testmod_uprobe_file __ro_after_init = {
+ .attr = { .name = "bpf_testmod_uprobe", .mode = 0666, },
+ .write = bpf_testmod_uprobe_write,
+};
+
+static int register_bpf_testmod_uprobe(void)
+{
+ return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file);
+}
+
+static void unregister_bpf_testmod_uprobe(void)
+{
+ testmod_unregister_uprobe();
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file);
+}
+
+#else
+static int register_bpf_testmod_uprobe(void)
+{
+ return 0;
+}
+
+static void unregister_bpf_testmod_uprobe(void) { }
+#endif
+
+BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_value)
+BTF_ID_FLAGS(func, bpf_kfunc_common_test)
+BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
+BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_nonzero_offset_test, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_kfunc_nested_acquire_zero_offset_test, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_kfunc_nested_release_test, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
+BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2)
+BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
+
+BTF_ID_LIST(bpf_testmod_dtor_ids)
+BTF_ID(struct, bpf_testmod_ctx)
+BTF_ID(func, bpf_testmod_ctx_release)
+
+static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_common_kfunc_ids,
+};
+
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+ return a + b + c + d;
+}
+
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+ return a + b;
+}
+
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
+{
+ return sk;
+}
+
+__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+ /* Provoke the compiler to assume that the caller has sign-extended a,
+ * b and c on platforms where this is required (e.g. s390x).
+ */
+ return (long)a + (long)b + (long)c + d;
+}
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
+};
+
+__bpf_kfunc struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ refcount_inc(&prog_test_struct.cnt);
+ return &prog_test_struct;
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+__bpf_kfunc struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+ if (size > 2 * sizeof(int))
+ return NULL;
+
+ return (int *)p;
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+ const int rdwr_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+ /* p != NULL, but p->cnt could be 0 */
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
+{
+}
+
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+ return arg;
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_sleepable(void)
+{
+}
+
+__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args)
+{
+ int proto;
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ pr_err("%s called without releasing old sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ switch (args->af) {
+ case AF_INET:
+ case AF_INET6:
+ proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ break;
+ case AF_UNIX:
+ proto = PF_UNIX;
+ break;
+ default:
+ pr_err("invalid address family %d\n", args->af);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type,
+ proto, &sock);
+
+ if (!err)
+ /* Set timeout for call to kernel_connect() to prevent it from hanging,
+ * and consider the connection attempt failed if it returns
+ * -EINPROGRESS.
+ */
+ sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc void bpf_kfunc_close_sock(void)
+{
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ sock_release(sock);
+ sock = NULL;
+ }
+
+ mutex_unlock(&sock_lock);
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr,
+ args->addrlen, 0);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_listen(void)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_listen(sock, 128);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen);
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = sock_sendmsg(sock, &msg);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getsockname(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getpeername(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+static DEFINE_MUTEX(st_ops_mutex);
+static struct bpf_testmod_st_ops *st_ops;
+
+__bpf_kfunc int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args)
+{
+ int ret = -1;
+
+ mutex_lock(&st_ops_mutex);
+ if (st_ops && st_ops->test_prologue)
+ ret = st_ops->test_prologue(args);
+ mutex_unlock(&st_ops_mutex);
+
+ return ret;
+}
+
+__bpf_kfunc int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args)
+{
+ int ret = -1;
+
+ mutex_lock(&st_ops_mutex);
+ if (st_ops && st_ops->test_epilogue)
+ ret = st_ops->test_epilogue(args);
+ mutex_unlock(&st_ops_mutex);
+
+ return ret;
+}
+
+__bpf_kfunc int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args)
+{
+ int ret = -1;
+
+ mutex_lock(&st_ops_mutex);
+ if (st_ops && st_ops->test_pro_epilogue)
+ ret = st_ops->test_pro_epilogue(args);
+ mutex_unlock(&st_ops_mutex);
+
+ return ret;
+}
+
+__bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
+{
+ args->a += 10;
+ return args->a;
+}
+
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+
+BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
+
+static int bpf_testmod_ops_init(struct btf *btf)
+{
+ return 0;
+}
+
+static bool bpf_testmod_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_testmod_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) {
+ /* For data fields, this function has to copy it and return
+ * 1 to indicate that the data has been handled by the
+ * struct_ops type, or the verifier will reject the map if
+ * the value of the data field is not zero.
+ */
+ ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_check_kfunc_ids,
+};
+
+static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .get_func_proto = bpf_base_func_proto,
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+};
+
+static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = {
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+};
+
+static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_ops *ops = kdata;
+
+ if (ops->test_1)
+ ops->test_1();
+ /* Some test cases (ex. struct_ops_maybe_null) may not have test_2
+ * initialized, so we need to check for NULL.
+ */
+ if (ops->test_2)
+ ops->test_2(4, ops->data);
+
+ return 0;
+}
+
+static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
+{
+}
+
+static int bpf_testmod_test_1(void)
+{
+ return 0;
+}
+
+static void bpf_testmod_test_2(int a, int b)
+{
+}
+
+static int bpf_testmod_tramp(int value)
+{
+ return 0;
+}
+
+static int bpf_testmod_ops__test_maybe_null(int dummy,
+ struct task_struct *task__nullable)
+{
+ return 0;
+}
+
+static int bpf_testmod_ops__test_refcounted(int dummy,
+ struct task_struct *task__ref)
+{
+ return 0;
+}
+
+static struct task_struct *
+bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
+ struct cgroup *cgrp)
+{
+ return NULL;
+}
+
+static struct bpf_testmod_ops __bpf_testmod_ops = {
+ .test_1 = bpf_testmod_test_1,
+ .test_2 = bpf_testmod_test_2,
+ .test_maybe_null = bpf_testmod_ops__test_maybe_null,
+ .test_refcounted = bpf_testmod_ops__test_refcounted,
+ .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
+};
+
+struct bpf_struct_ops bpf_bpf_testmod_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops,
+ .name = "bpf_testmod_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_dummy_reg2(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_ops2 *ops = kdata;
+
+ ops->test_1();
+ return 0;
+}
+
+static struct bpf_testmod_ops2 __bpf_testmod_ops2 = {
+ .test_1 = bpf_testmod_test_1,
+};
+
+struct bpf_struct_ops bpf_testmod_ops2 = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg2,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops2,
+ .name = "bpf_testmod_ops2",
+ .owner = THIS_MODULE,
+};
+
+static int st_ops3_reg(void *kdata, struct bpf_link *link)
+{
+ int err = 0;
+
+ mutex_lock(&st_ops_mutex);
+ if (st_ops3) {
+ pr_err("st_ops has already been registered\n");
+ err = -EEXIST;
+ goto unlock;
+ }
+ st_ops3 = kdata;
+
+unlock:
+ mutex_unlock(&st_ops_mutex);
+ return err;
+}
+
+static void st_ops3_unreg(void *kdata, struct bpf_link *link)
+{
+ mutex_lock(&st_ops_mutex);
+ st_ops3 = NULL;
+ mutex_unlock(&st_ops_mutex);
+}
+
+static void test_1_recursion_detected(struct bpf_prog *prog)
+{
+ struct bpf_prog_stats *stats;
+
+ stats = this_cpu_ptr(prog->stats);
+ printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu",
+ u64_stats_read(&stats->misses));
+}
+
+static int st_ops3_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+ switch (moff) {
+ case offsetof(struct bpf_testmod_ops3, test_1):
+ prog->aux->priv_stack_requested = true;
+ prog->aux->recursion_detected = test_1_recursion_detected;
+ fallthrough;
+ default:
+ break;
+ }
+ return 0;
+}
+
+struct bpf_struct_ops bpf_testmod_ops3 = {
+ .verifier_ops = &bpf_testmod_verifier_ops3,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = st_ops3_reg,
+ .unreg = st_ops3_unreg,
+ .check_member = st_ops3_check_member,
+ .cfi_stubs = &__bpf_testmod_ops3,
+ .name = "bpf_testmod_ops3",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static int bpf_test_mod_st_ops__test_epilogue(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static int bpf_cgroup_from_id_id;
+static int bpf_cgroup_release_id;
+
+static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r8 = r1; // r8 will be "u64 *ctx".
+ * r1 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+5;
+ * r6 = r8[0]; // r6 will be "struct st_ops *args".
+ * r7 = r6->a;
+ * r7 += 1000;
+ * r6->a = r7;
+ * goto pc+2;
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r1 = r8;
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r1 = 0;
+ * r6 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+6;
+ * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct st_ops *args"
+ * r6 = r1->a;
+ * r6 += 10000;
+ * r1->a = r6;
+ * goto pc+2
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r0 = r6;
+ * r0 *= 2;
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+ *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+ *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+#define KFUNC_PRO_EPI_PREFIX "test_kfunc_"
+static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ if (strcmp(prog->aux->attach_func_name, "test_prologue") &&
+ strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
+ return 0;
+
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog);
+
+ /* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
+ * r7 = r6->a;
+ * r7 += 1000;
+ * r6->a = r7;
+ */
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ if (strcmp(prog->aux->attach_func_name, "test_epilogue") &&
+ strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
+ return 0;
+
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off);
+
+ /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct st_ops *args"
+ * r6 = r1->a;
+ * r6 += 10000;
+ * r1->a = r6;
+ * r0 = r6;
+ * r0 *= 2;
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+ *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+static int st_ops_btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ if (off < 0 || off + size > sizeof(struct st_ops_args))
+ return -EACCES;
+ return 0;
+}
+
+static const struct bpf_verifier_ops st_ops_verifier_ops = {
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+ .btf_struct_access = st_ops_btf_struct_access,
+ .gen_prologue = st_ops_gen_prologue,
+ .gen_epilogue = st_ops_gen_epilogue,
+ .get_func_proto = bpf_base_func_proto,
+};
+
+static struct bpf_testmod_st_ops st_ops_cfi_stubs = {
+ .test_prologue = bpf_test_mod_st_ops__test_prologue,
+ .test_epilogue = bpf_test_mod_st_ops__test_epilogue,
+ .test_pro_epilogue = bpf_test_mod_st_ops__test_pro_epilogue,
+};
+
+static int st_ops_reg(void *kdata, struct bpf_link *link)
+{
+ int err = 0;
+
+ mutex_lock(&st_ops_mutex);
+ if (st_ops) {
+ pr_err("st_ops has already been registered\n");
+ err = -EEXIST;
+ goto unlock;
+ }
+ st_ops = kdata;
+
+unlock:
+ mutex_unlock(&st_ops_mutex);
+ return err;
+}
+
+static void st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+ mutex_lock(&st_ops_mutex);
+ st_ops = NULL;
+ mutex_unlock(&st_ops_mutex);
+}
+
+static int st_ops_init(struct btf *btf)
+{
+ struct btf *kfunc_btf;
+
+ bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf);
+ bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf);
+ if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int st_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static struct bpf_struct_ops testmod_st_ops = {
+ .verifier_ops = &st_ops_verifier_ops,
+ .init = st_ops_init,
+ .init_member = st_ops_init_member,
+ .reg = st_ops_reg,
+ .unreg = st_ops_unreg,
+ .cfi_stubs = &st_ops_cfi_stubs,
+ .name = "bpf_testmod_st_ops",
+ .owner = THIS_MODULE,
+};
+
+struct hlist_head multi_st_ops_list;
+static DEFINE_SPINLOCK(multi_st_ops_lock);
+
+static int multi_st_ops_init(struct btf *btf)
+{
+ spin_lock_init(&multi_st_ops_lock);
+ INIT_HLIST_HEAD(&multi_st_ops_list);
+
+ return 0;
+}
+
+static int multi_st_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+
+ hlist_for_each_entry(st_ops, &multi_st_ops_list, node) {
+ if (st_ops->id == id)
+ return st_ops;
+ }
+
+ return NULL;
+}
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ int ret = -1;
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return ret;
+}
+
+static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops =
+ (struct bpf_testmod_multi_st_ops *)kdata;
+ unsigned long flags;
+ int err = 0;
+ u32 id;
+
+ if (!st_ops->test_1)
+ return -EINVAL;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ if (multi_st_ops_find_nolock(id)) {
+ pr_err("multi_st_ops(id:%d) has already been registered\n", id);
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ st_ops->id = id;
+ hlist_add_head(&st_ops->node, &multi_st_ops_list);
+unlock:
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return err;
+}
+
+static void multi_st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ u32 id;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ hlist_del(&st_ops->node);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+}
+
+static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = {
+ .test_1 = bpf_testmod_multi_st_ops__test_1,
+};
+
+struct bpf_struct_ops testmod_multi_st_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = multi_st_ops_init,
+ .init_member = multi_st_ops_init_member,
+ .reg = multi_st_ops_reg,
+ .unreg = multi_st_ops_unreg,
+ .cfi_stubs = &multi_st_ops_cfi_stubs,
+ .name = "bpf_testmod_multi_st_ops",
+ .owner = THIS_MODULE,
+};
+
+extern int bpf_fentry_test1(int a);
+
+static int bpf_testmod_init(void)
+{
+ const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = {
+ {
+ .btf_id = bpf_testmod_dtor_ids[0],
+ .kfunc_btf_id = bpf_testmod_dtor_ids[1]
+ },
+ };
+ void **tramp;
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
+ ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
+ ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
+ ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
+ ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops);
+ ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
+ ARRAY_SIZE(bpf_testmod_dtors),
+ THIS_MODULE);
+ if (ret < 0)
+ return ret;
+ if (bpf_fentry_test1(0) < 0)
+ return -EINVAL;
+ sock = NULL;
+ mutex_init(&sock_lock);
+ ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ if (ret < 0)
+ return ret;
+ ret = register_bpf_testmod_uprobe();
+ if (ret < 0)
+ return ret;
+
+ /* Ensure nothing is between tramp_1..tramp_40 */
+ BUILD_BUG_ON(offsetof(struct bpf_testmod_ops, tramp_1) + 40 * sizeof(long) !=
+ offsetofend(struct bpf_testmod_ops, tramp_40));
+ tramp = (void **)&__bpf_testmod_ops.tramp_1;
+ while (tramp <= (void **)&__bpf_testmod_ops.tramp_40)
+ *tramp++ = bpf_testmod_tramp;
+
+ return 0;
+}
+
+static void bpf_testmod_exit(void)
+{
+ /* Need to wait for all references to be dropped because
+ * bpf_kfunc_call_test_release() which currently resides in kernel can
+ * be called after bpf_testmod is unloaded. Once release function is
+ * moved into the module this wait can be removed.
+ */
+ while (refcount_read(&prog_test_struct.cnt) > 1)
+ msleep(20);
+
+ bpf_kfunc_close_sock();
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ unregister_bpf_testmod_uprobe();
+}
+
+module_init(bpf_testmod_init);
+module_exit(bpf_testmod_exit);
+
+MODULE_AUTHOR("Andrii Nakryiko");
+MODULE_DESCRIPTION("BPF selftests module");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
new file mode 100644
index 000000000000..f6e492f9d042
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef _BPF_TESTMOD_H
+#define _BPF_TESTMOD_H
+
+#include <linux/types.h>
+
+struct task_struct;
+struct cgroup;
+
+struct bpf_testmod_test_read_ctx {
+ char *buf;
+ loff_t off;
+ size_t len;
+};
+
+struct bpf_testmod_test_write_ctx {
+ char *buf;
+ loff_t off;
+ size_t len;
+};
+
+struct bpf_testmod_test_writable_ctx {
+ bool early_ret;
+ int val;
+};
+
+/* BPF iter that returns *value* *n* times in a row */
+struct bpf_iter_testmod_seq {
+ s64 value;
+ int cnt;
+};
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ /* Used to test nullable arguments. */
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+ int (*unsupported_ops)(void);
+ /* Used to test ref_acquired arguments. */
+ int (*test_refcounted)(int dummy, struct task_struct *task);
+ /* Used to test returning referenced kptr. */
+ struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
+ struct cgroup *cgrp);
+
+ /* The following fields are used to test shadow copies. */
+ char onebyte;
+ struct {
+ int a;
+ int b;
+ } unsupported;
+ int data;
+
+ /* The following pointers are used to test the maps having multiple
+ * pages of trampolines.
+ */
+ int (*tramp_1)(int value);
+ int (*tramp_2)(int value);
+ int (*tramp_3)(int value);
+ int (*tramp_4)(int value);
+ int (*tramp_5)(int value);
+ int (*tramp_6)(int value);
+ int (*tramp_7)(int value);
+ int (*tramp_8)(int value);
+ int (*tramp_9)(int value);
+ int (*tramp_10)(int value);
+ int (*tramp_11)(int value);
+ int (*tramp_12)(int value);
+ int (*tramp_13)(int value);
+ int (*tramp_14)(int value);
+ int (*tramp_15)(int value);
+ int (*tramp_16)(int value);
+ int (*tramp_17)(int value);
+ int (*tramp_18)(int value);
+ int (*tramp_19)(int value);
+ int (*tramp_20)(int value);
+ int (*tramp_21)(int value);
+ int (*tramp_22)(int value);
+ int (*tramp_23)(int value);
+ int (*tramp_24)(int value);
+ int (*tramp_25)(int value);
+ int (*tramp_26)(int value);
+ int (*tramp_27)(int value);
+ int (*tramp_28)(int value);
+ int (*tramp_29)(int value);
+ int (*tramp_30)(int value);
+ int (*tramp_31)(int value);
+ int (*tramp_32)(int value);
+ int (*tramp_33)(int value);
+ int (*tramp_34)(int value);
+ int (*tramp_35)(int value);
+ int (*tramp_36)(int value);
+ int (*tramp_37)(int value);
+ int (*tramp_38)(int value);
+ int (*tramp_39)(int value);
+ int (*tramp_40)(int value);
+};
+
+struct bpf_testmod_ops2 {
+ int (*test_1)(void);
+};
+
+struct bpf_testmod_ops3 {
+ int (*test_1)(void);
+ int (*test_2)(void);
+};
+
+struct st_ops_args {
+ u64 a;
+};
+
+struct bpf_testmod_st_ops {
+ int (*test_prologue)(struct st_ops_args *args);
+ int (*test_epilogue)(struct st_ops_args *args);
+ int (*test_pro_epilogue)(struct st_ops_args *args);
+ struct module *owner;
+};
+
+struct bpf_testmod_multi_st_ops {
+ int (*test_1)(struct st_ops_args *args);
+ struct hlist_node node;
+ int id;
+};
+
+#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
new file mode 100644
index 000000000000..4df6fa6a92cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_TESTMOD_KFUNC_H
+#define _BPF_TESTMOD_KFUNC_H
+
+#ifndef __KERNEL__
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#else
+#define __ksym
+struct prog_test_member1 {
+ int a;
+};
+
+struct prog_test_member {
+ struct prog_test_member1 m;
+ int c;
+};
+
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_member memb;
+ struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
+};
+#endif
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+struct init_sock_args {
+ int af;
+ int type;
+};
+
+struct addr_args {
+ char addr[sizeof(struct __kernel_sockaddr_storage)];
+ int addrlen;
+};
+
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
+
+struct bpf_testmod_ctx {
+ struct callback_head rcu;
+ refcount_t usage;
+};
+
+struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
+void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
+
+void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+
+/* The bpf_kfunc_call_test_static_unused_arg is defined as static,
+ * but bpf program compilation needs to see it as global symbol.
+ */
+#ifndef __KERNEL__
+u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#endif
+
+void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
+
+void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
+void bpf_kfunc_call_test_destructive(void) __ksym;
+void bpf_kfunc_call_test_sleepable(void) __ksym;
+
+void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
+struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
+void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p);
+void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
+void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
+void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
+void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+
+void bpf_kfunc_common_test(void) __ksym;
+
+int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym;
+void bpf_kfunc_close_sock(void) __ksym;
+int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_listen(void) __ksym;
+int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym;
+
+void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym;
+
+struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym;
+void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym;
+
+struct sk_buff *bpf_kfunc_nested_acquire_nonzero_offset_test(struct sk_buff_head *ptr) __ksym;
+struct sk_buff *bpf_kfunc_nested_acquire_zero_offset_test(struct sock_common *ptr) __ksym;
+void bpf_kfunc_nested_release_test(struct sk_buff *ptr) __ksym;
+
+struct st_ops_args;
+int bpf_kfunc_st_ops_test_prologue(struct st_ops_args *args) __ksym;
+int bpf_kfunc_st_ops_test_epilogue(struct st_ops_args *args) __ksym;
+int bpf_kfunc_st_ops_test_pro_epilogue(struct st_ops_args *args) __ksym;
+int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) __ksym;
+
+void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
+void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
+void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
+void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
+struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
+int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
+
+#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ec4e15948e40..5252b91f48a1 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -3,6 +3,7 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+ret=$ksft_skip
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -25,7 +26,7 @@ do
fi
done
-if [ -n $LIRCDEV ];
+if [ -n "$LIRCDEV" ];
then
TYPE=lirc_mode2
./test_lirc_mode2_user $LIRCDEV $INPUTDEV
@@ -36,3 +37,5 @@ then
echo -e ${GREEN}"PASS: $TYPE"${NC}
fi
fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index fb5fd6841ef3..88e4aeab21b7 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -28,7 +28,6 @@
// 5. We can read keycode from same /dev/lirc device
#include <linux/bpf.h>
-#include <linux/lirc.h>
#include <linux/input.h>
#include <errno.h>
#include <stdio.h>
@@ -45,6 +44,8 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
+
int main(int argc, char **argv)
{
struct bpf_object *obj;
@@ -58,8 +59,8 @@ int main(int argc, char **argv)
return 2;
}
- ret = bpf_prog_load("test_lirc_mode2_kern.o",
- BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+ ret = bpf_prog_test_load("test_lirc_mode2_kern.bpf.o",
+ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
if (ret) {
printf("Failed to load bpf program\n");
return 1;
@@ -73,7 +74,7 @@ int main(int argc, char **argv)
/* Let's try detach it before it was ever attached */
ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
- if (ret != -1 || errno != ENOENT) {
+ if (ret != -ENOENT) {
printf("bpf_prog_detach2 not attached should fail: %m\n");
return 1;
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
new file mode 100644
index 000000000000..338c035c3688
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -0,0 +1,1406 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <linux/capability.h>
+#include <stdlib.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "autoconf_helper.h"
+#include "disasm_helpers.h"
+#include "unpriv_helpers.h"
+#include "cap_helpers.h"
+#include "jit_disasm_helpers.h"
+
+#define str_has_pfx(str, pfx) \
+ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+
+#define TEST_LOADER_LOG_BUF_SZ 2097152
+
+#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
+#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
+#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
+#define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
+#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
+#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
+#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
+#define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
+#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
+#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
+#define TEST_TAG_RETVAL_PFX "comment:test_retval="
+#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
+#define TEST_TAG_AUXILIARY "comment:test_auxiliary"
+#define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv"
+#define TEST_BTF_PATH "comment:test_btf_path="
+#define TEST_TAG_ARCH "comment:test_arch="
+#define TEST_TAG_JITED_PFX "comment:test_jited="
+#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
+#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
+#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
+#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
+#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
+#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
+#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
+#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
+
+/* Warning: duplicated in bpf_misc.h */
+#define POINTER_VALUE 0xbadcafe
+#define TEST_DATA_LEN 64
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define EFFICIENT_UNALIGNED_ACCESS 1
+#else
+#define EFFICIENT_UNALIGNED_ACCESS 0
+#endif
+
+static int sysctl_unpriv_disabled = -1;
+
+enum mode {
+ PRIV = 1,
+ UNPRIV = 2
+};
+
+enum load_mode {
+ JITED = 1 << 0,
+ NO_JITED = 1 << 1,
+};
+
+struct test_subspec {
+ char *name;
+ bool expect_failure;
+ struct expected_msgs expect_msgs;
+ struct expected_msgs expect_xlated;
+ struct expected_msgs jited;
+ struct expected_msgs stderr;
+ struct expected_msgs stdout;
+ int retval;
+ bool execute;
+ __u64 caps;
+};
+
+struct test_spec {
+ const char *prog_name;
+ struct test_subspec priv;
+ struct test_subspec unpriv;
+ const char *btf_custom_path;
+ int log_level;
+ int prog_flags;
+ int mode_mask;
+ int arch_mask;
+ int load_mask;
+ int linear_sz;
+ bool auxiliary;
+ bool valid;
+};
+
+static int tester_init(struct test_loader *tester)
+{
+ if (!tester->log_buf) {
+ tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
+ tester->log_buf = calloc(tester->log_buf_sz, 1);
+ if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void test_loader_fini(struct test_loader *tester)
+{
+ if (!tester)
+ return;
+
+ free(tester->log_buf);
+}
+
+static void free_msgs(struct expected_msgs *msgs)
+{
+ int i;
+
+ for (i = 0; i < msgs->cnt; i++)
+ if (msgs->patterns[i].is_regex)
+ regfree(&msgs->patterns[i].regex);
+ free(msgs->patterns);
+ msgs->patterns = NULL;
+ msgs->cnt = 0;
+}
+
+static void free_test_spec(struct test_spec *spec)
+{
+ /* Deallocate expect_msgs arrays. */
+ free_msgs(&spec->priv.expect_msgs);
+ free_msgs(&spec->unpriv.expect_msgs);
+ free_msgs(&spec->priv.expect_xlated);
+ free_msgs(&spec->unpriv.expect_xlated);
+ free_msgs(&spec->priv.jited);
+ free_msgs(&spec->unpriv.jited);
+ free_msgs(&spec->unpriv.stderr);
+ free_msgs(&spec->priv.stderr);
+ free_msgs(&spec->unpriv.stdout);
+ free_msgs(&spec->priv.stdout);
+
+ free(spec->priv.name);
+ free(spec->unpriv.name);
+ spec->priv.name = NULL;
+ spec->unpriv.name = NULL;
+}
+
+/* Compiles regular expression matching pattern.
+ * Pattern has a special syntax:
+ *
+ * pattern := (<verbatim text> | regex)*
+ * regex := "{{" <posix extended regular expression> "}}"
+ *
+ * In other words, pattern is a verbatim text with inclusion
+ * of regular expressions enclosed in "{{" "}}" pairs.
+ * For example, pattern "foo{{[0-9]+}}" matches strings like
+ * "foo0", "foo007", etc.
+ */
+static int compile_regex(const char *pattern, regex_t *regex)
+{
+ char err_buf[256], buf[256] = {}, *ptr, *buf_end;
+ const char *original_pattern = pattern;
+ bool in_regex = false;
+ int err;
+
+ buf_end = buf + sizeof(buf);
+ ptr = buf;
+ while (*pattern && ptr < buf_end - 2) {
+ if (!in_regex && str_has_pfx(pattern, "{{")) {
+ in_regex = true;
+ pattern += 2;
+ continue;
+ }
+ if (in_regex && str_has_pfx(pattern, "}}")) {
+ in_regex = false;
+ pattern += 2;
+ continue;
+ }
+ if (in_regex) {
+ *ptr++ = *pattern++;
+ continue;
+ }
+ /* list of characters that need escaping for extended posix regex */
+ if (strchr(".[]\\()*+?{}|^$", *pattern)) {
+ *ptr++ = '\\';
+ *ptr++ = *pattern++;
+ continue;
+ }
+ *ptr++ = *pattern++;
+ }
+ if (*pattern) {
+ PRINT_FAIL("Regexp too long: '%s'\n", original_pattern);
+ return -EINVAL;
+ }
+ if (in_regex) {
+ PRINT_FAIL("Regexp has open '{{' but no closing '}}': '%s'\n", original_pattern);
+ return -EINVAL;
+ }
+ err = regcomp(regex, buf, REG_EXTENDED | REG_NEWLINE);
+ if (err != 0) {
+ regerror(err, regex, err_buf, sizeof(err_buf));
+ PRINT_FAIL("Regexp compilation error in '%s': '%s'\n", buf, err_buf);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int __push_msg(const char *pattern, bool on_next_line, bool negative,
+ struct expected_msgs *msgs)
+{
+ struct expect_msg *msg;
+ void *tmp;
+ int err;
+
+ tmp = realloc(msgs->patterns,
+ (1 + msgs->cnt) * sizeof(struct expect_msg));
+ if (!tmp) {
+ ASSERT_FAIL("failed to realloc memory for messages\n");
+ return -ENOMEM;
+ }
+ msgs->patterns = tmp;
+ msg = &msgs->patterns[msgs->cnt];
+ msg->on_next_line = on_next_line;
+ msg->substr = pattern;
+ msg->negative = negative;
+ msg->is_regex = false;
+ if (strstr(pattern, "{{")) {
+ err = compile_regex(pattern, &msg->regex);
+ if (err)
+ return err;
+ msg->is_regex = true;
+ }
+ msgs->cnt += 1;
+ return 0;
+}
+
+static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to)
+{
+ struct expect_msg *msg;
+ int i, err;
+
+ for (i = 0; i < from->cnt; i++) {
+ msg = &from->patterns[i];
+ err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs)
+{
+ return __push_msg(substr, false, negative, msgs);
+}
+
+static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs)
+{
+ int err;
+
+ if (strcmp(regex_str, "...") == 0) {
+ *on_next_line = false;
+ return 0;
+ }
+ err = __push_msg(regex_str, *on_next_line, false, msgs);
+ if (err)
+ return err;
+ *on_next_line = true;
+ return 0;
+}
+
+static int parse_int(const char *str, int *val, const char *name)
+{
+ char *end;
+ long tmp;
+
+ errno = 0;
+ if (str_has_pfx(str, "0x"))
+ tmp = strtol(str + 2, &end, 16);
+ else
+ tmp = strtol(str, &end, 10);
+ if (errno || end[0] != '\0') {
+ PRINT_FAIL("failed to parse %s from '%s'\n", name, str);
+ return -EINVAL;
+ }
+ *val = tmp;
+ return 0;
+}
+
+static int parse_caps(const char *str, __u64 *val, const char *name)
+{
+ int cap_flag = 0;
+ char *token = NULL, *saveptr = NULL;
+
+ char *str_cpy = strdup(str);
+ if (str_cpy == NULL) {
+ PRINT_FAIL("Memory allocation failed\n");
+ return -EINVAL;
+ }
+
+ token = strtok_r(str_cpy, "|", &saveptr);
+ while (token != NULL) {
+ errno = 0;
+ if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) {
+ PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token);
+ return -EINVAL;
+ }
+ cap_flag = strtol(token, NULL, 10);
+ if (!cap_flag || errno) {
+ PRINT_FAIL("failed to parse caps %s\n", name);
+ return -EINVAL;
+ }
+ *val |= (1ULL << cap_flag);
+ token = strtok_r(NULL, "|", &saveptr);
+ }
+
+ free(str_cpy);
+ return 0;
+}
+
+static int parse_retval(const char *str, int *val, const char *name)
+{
+ /*
+ * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a
+ * single int and cannot be parsed with strtol, so we handle it
+ * separately here. In addition, it expands to different expressions in
+ * different compilers so we use a prefixed _INT_MIN instead.
+ */
+ if (strcmp(str, "_INT_MIN") == 0) {
+ *val = INT_MIN;
+ return 0;
+ }
+
+ return parse_int(str, val, name);
+}
+
+static void update_flags(int *flags, int flag, bool clear)
+{
+ if (clear)
+ *flags &= ~flag;
+ else
+ *flags |= flag;
+}
+
+/* Matches a string of form '<pfx>[^=]=.*' and returns it's suffix.
+ * Used to parse btf_decl_tag values.
+ * Such values require unique prefix because compiler does not add
+ * same __attribute__((btf_decl_tag(...))) twice.
+ * Test suite uses two-component tags for such cases:
+ *
+ * <pfx> __COUNTER__ '='
+ *
+ * For example, two consecutive __msg tags '__msg("foo") __msg("foo")'
+ * would be encoded as:
+ *
+ * [18] DECL_TAG 'comment:test_expect_msg=0=foo' type_id=15 component_idx=-1
+ * [19] DECL_TAG 'comment:test_expect_msg=1=foo' type_id=15 component_idx=-1
+ *
+ * And the purpose of this function is to extract 'foo' from the above.
+ */
+static const char *skip_dynamic_pfx(const char *s, const char *pfx)
+{
+ const char *msg;
+
+ if (strncmp(s, pfx, strlen(pfx)) != 0)
+ return NULL;
+ msg = s + strlen(pfx);
+ msg = strchr(msg, '=');
+ if (!msg)
+ return NULL;
+ return msg + 1;
+}
+
+enum arch {
+ ARCH_UNKNOWN = 0x1,
+ ARCH_X86_64 = 0x2,
+ ARCH_ARM64 = 0x4,
+ ARCH_RISCV64 = 0x8,
+ ARCH_S390X = 0x10,
+};
+
+static int get_current_arch(void)
+{
+#if defined(__x86_64__)
+ return ARCH_X86_64;
+#elif defined(__aarch64__)
+ return ARCH_ARM64;
+#elif defined(__riscv) && __riscv_xlen == 64
+ return ARCH_RISCV64;
+#elif defined(__s390x__)
+ return ARCH_S390X;
+#endif
+ return ARCH_UNKNOWN;
+}
+
+/* Uses btf_decl_tag attributes to describe the expected test
+ * behavior, see bpf_misc.h for detailed description of each attribute
+ * and attribute combinations.
+ */
+static int parse_test_spec(struct test_loader *tester,
+ struct bpf_object *obj,
+ struct bpf_program *prog,
+ struct test_spec *spec)
+{
+ const char *description = NULL;
+ bool has_unpriv_result = false;
+ bool has_unpriv_retval = false;
+ bool unpriv_xlated_on_next_line = true;
+ bool xlated_on_next_line = true;
+ bool unpriv_jit_on_next_line;
+ bool jit_on_next_line;
+ bool stderr_on_next_line = true;
+ bool unpriv_stderr_on_next_line = true;
+ bool stdout_on_next_line = true;
+ bool unpriv_stdout_on_next_line = true;
+ bool collect_jit = false;
+ int func_id, i, err = 0;
+ u32 arch_mask = 0;
+ u32 load_mask = 0;
+ struct btf *btf;
+ enum arch arch;
+
+ memset(spec, 0, sizeof(*spec));
+
+ spec->prog_name = bpf_program__name(prog);
+ spec->prog_flags = testing_prog_flags();
+
+ btf = bpf_object__btf(obj);
+ if (!btf) {
+ ASSERT_FAIL("BPF object has no BTF");
+ return -EINVAL;
+ }
+
+ func_id = btf__find_by_name_kind(btf, spec->prog_name, BTF_KIND_FUNC);
+ if (func_id < 0) {
+ ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->prog_name);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ const char *s, *val, *msg;
+ const struct btf_type *t;
+ bool clear;
+ int flags;
+
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_decl_tag(t))
+ continue;
+
+ if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ s = btf__str_by_offset(btf, t->name_off);
+ if (str_has_pfx(s, TEST_TAG_DESCRIPTION_PFX)) {
+ description = s + sizeof(TEST_TAG_DESCRIPTION_PFX) - 1;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+ spec->priv.expect_failure = true;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
+ spec->priv.expect_failure = false;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = true;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = false;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
+ } else if (strcmp(s, TEST_TAG_AUXILIARY) == 0) {
+ spec->auxiliary = true;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_AUXILIARY_UNPRIV) == 0) {
+ spec->auxiliary = true;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
+ err = push_msg(msg, false, &spec->priv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+ err = push_msg(msg, true, &spec->priv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
+ err = push_msg(msg, false, &spec->unpriv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+ err = push_msg(msg, true, &spec->unpriv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_JITED_PFX))) {
+ if (arch_mask == 0) {
+ PRINT_FAIL("__jited used before __arch_*");
+ goto cleanup;
+ }
+ if (collect_jit) {
+ err = push_disasm_msg(msg, &jit_on_next_line,
+ &spec->priv.jited);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ }
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_JITED_PFX_UNPRIV))) {
+ if (arch_mask == 0) {
+ PRINT_FAIL("__unpriv_jited used before __arch_*");
+ goto cleanup;
+ }
+ if (collect_jit) {
+ err = push_disasm_msg(msg, &unpriv_jit_on_next_line,
+ &spec->unpriv.jited);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ }
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_XLATED_PFX))) {
+ err = push_disasm_msg(msg, &xlated_on_next_line,
+ &spec->priv.expect_xlated);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_XLATED_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_xlated_on_next_line,
+ &spec->unpriv.expect_xlated);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX) - 1;
+ err = parse_retval(val, &spec->priv.retval, "__retval");
+ if (err)
+ goto cleanup;
+ spec->priv.execute = true;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX_UNPRIV)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX_UNPRIV) - 1;
+ err = parse_retval(val, &spec->unpriv.retval, "__retval_unpriv");
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ spec->unpriv.execute = true;
+ has_unpriv_retval = true;
+ } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+ val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
+ err = parse_int(val, &spec->log_level, "test log level");
+ if (err)
+ goto cleanup;
+ } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
+ val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+ clear = val[0] == '!';
+ if (clear)
+ val++;
+
+ if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
+ } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
+ } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
+ } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
+ } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
+ } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+ } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
+ } else /* assume numeric value */ {
+ err = parse_int(val, &flags, "test prog flags");
+ if (err)
+ goto cleanup;
+ update_flags(&spec->prog_flags, flags, clear);
+ }
+ } else if (str_has_pfx(s, TEST_TAG_ARCH)) {
+ val = s + sizeof(TEST_TAG_ARCH) - 1;
+ if (strcmp(val, "X86_64") == 0) {
+ arch = ARCH_X86_64;
+ } else if (strcmp(val, "ARM64") == 0) {
+ arch = ARCH_ARM64;
+ } else if (strcmp(val, "RISCV64") == 0) {
+ arch = ARCH_RISCV64;
+ } else if (strcmp(val, "s390x") == 0) {
+ arch = ARCH_S390X;
+ } else {
+ PRINT_FAIL("bad arch spec: '%s'\n", val);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ arch_mask |= arch;
+ collect_jit = get_current_arch() == arch;
+ unpriv_jit_on_next_line = true;
+ jit_on_next_line = true;
+ } else if (str_has_pfx(s, TEST_BTF_PATH)) {
+ spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
+ } else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) {
+ val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1;
+ err = parse_caps(val, &spec->unpriv.caps, "test caps");
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
+ val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+ if (strcmp(val, "jited") == 0) {
+ load_mask = JITED;
+ } else if (strcmp(val, "no_jited") == 0) {
+ load_mask = NO_JITED;
+ } else {
+ PRINT_FAIL("bad load spec: '%s'", val);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+ err = push_disasm_msg(msg, &stderr_on_next_line,
+ &spec->priv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
+ &spec->unpriv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+ err = push_disasm_msg(msg, &stdout_on_next_line,
+ &spec->priv.stdout);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
+ &spec->unpriv.stdout);
+ if (err)
+ goto cleanup;
+ } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+ switch (bpf_program__type(prog)) {
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
+ err = parse_int(val, &spec->linear_sz, "test linear size");
+ if (err)
+ goto cleanup;
+ break;
+ default:
+ PRINT_FAIL("__linear_size for unsupported program type");
+ err = -EINVAL;
+ goto cleanup;
+ }
+ }
+ }
+
+ spec->arch_mask = arch_mask ?: -1;
+ spec->load_mask = load_mask ?: (JITED | NO_JITED);
+
+ if (spec->mode_mask == 0)
+ spec->mode_mask = PRIV;
+
+ if (!description)
+ description = spec->prog_name;
+
+ if (spec->mode_mask & PRIV) {
+ spec->priv.name = strdup(description);
+ if (!spec->priv.name) {
+ PRINT_FAIL("failed to allocate memory for priv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ }
+
+ if (spec->mode_mask & UNPRIV) {
+ int descr_len = strlen(description);
+ const char *suffix = " @unpriv";
+ char *name;
+
+ name = malloc(descr_len + strlen(suffix) + 1);
+ if (!name) {
+ PRINT_FAIL("failed to allocate memory for unpriv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ strcpy(name, description);
+ strcpy(&name[descr_len], suffix);
+ spec->unpriv.name = name;
+ }
+
+ if (spec->mode_mask & (PRIV | UNPRIV)) {
+ if (!has_unpriv_result)
+ spec->unpriv.expect_failure = spec->priv.expect_failure;
+
+ if (!has_unpriv_retval) {
+ spec->unpriv.retval = spec->priv.retval;
+ spec->unpriv.execute = spec->priv.execute;
+ }
+
+ if (spec->unpriv.expect_msgs.cnt == 0)
+ clone_msgs(&spec->priv.expect_msgs, &spec->unpriv.expect_msgs);
+ if (spec->unpriv.expect_xlated.cnt == 0)
+ clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
+ if (spec->unpriv.jited.cnt == 0)
+ clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
+ if (spec->unpriv.stderr.cnt == 0)
+ clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
+ if (spec->unpriv.stdout.cnt == 0)
+ clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
+ }
+
+ spec->valid = true;
+
+ return 0;
+
+cleanup:
+ free_test_spec(spec);
+ return err;
+}
+
+static void prepare_case(struct test_loader *tester,
+ struct test_spec *spec,
+ struct bpf_object *obj,
+ struct bpf_program *prog)
+{
+ int min_log_level = 0, prog_flags;
+
+ if (env.verbosity > VERBOSE_NONE)
+ min_log_level = 1;
+ if (env.verbosity > VERBOSE_VERY)
+ min_log_level = 2;
+
+ bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
+
+ /* Make sure we set at least minimal log level, unless test requires
+ * even higher level already. Make sure to preserve independent log
+ * level 4 (verifier stats), though.
+ */
+ if ((spec->log_level & 3) < min_log_level)
+ bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level);
+ else
+ bpf_program__set_log_level(prog, spec->log_level);
+
+ prog_flags = bpf_program__flags(prog);
+ bpf_program__set_flags(prog, prog_flags | spec->prog_flags);
+
+ tester->log_buf[0] = '\0';
+}
+
+static void emit_verifier_log(const char *log_buf, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf);
+}
+
+static void emit_xlated(const char *xlated, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "XLATED:\n=============\n%s=============\n", xlated);
+}
+
+static void emit_jited(const char *jited, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
+}
+
+static void emit_stderr(const char *stderr, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
+}
+
+static void emit_stdout(const char *bpf_stdout, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
+}
+
+static const char *match_msg(struct expect_msg *msg, const char **log)
+{
+ const char *match = NULL;
+ regmatch_t reg_match[1];
+ int err;
+
+ if (!msg->is_regex) {
+ match = strstr(*log, msg->substr);
+ if (match)
+ *log = match + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex, *log, 1, reg_match, 0);
+ if (err == 0) {
+ match = *log + reg_match[0].rm_so;
+ *log += reg_match[0].rm_eo;
+ }
+ }
+ return match;
+}
+
+static int count_lines(const char *start, const char *end)
+{
+ const char *tmp;
+ int n = 0;
+
+ for (tmp = start; tmp < end; ++tmp)
+ if (*tmp == '\n')
+ n++;
+ return n;
+}
+
+struct match {
+ const char *start;
+ const char *end;
+ int line;
+};
+
+/*
+ * Positive messages are matched sequentially, each next message
+ * is looked for starting from the end of a previous matched one.
+ */
+static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *prev_match;
+ int i, line;
+
+ prev_match = log;
+ line = 0;
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+ const char *match = NULL;
+
+ if (msg->negative)
+ continue;
+
+ match = match_msg(msg, &log);
+ if (match) {
+ line += count_lines(prev_match, match);
+ matches[i].start = match;
+ matches[i].end = log;
+ matches[i].line = line;
+ prev_match = match;
+ }
+ }
+}
+
+/*
+ * Each negative messages N located between positive messages P1 and P2
+ * is matched in the span P1.end .. P2.start. Consequently, negative messages
+ * are unordered within the span.
+ */
+static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *start = log, *end, *next, *match;
+ const char *log_end = log + strlen(log);
+ int i, j, next_positive;
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+
+ /* positive message bumps span start */
+ if (!msg->negative) {
+ start = matches[i].end ?: start;
+ continue;
+ }
+
+ /* count stride of negative patterns and adjust span end */
+ end = log_end;
+ for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) {
+ if (!msgs->patterns[next_positive].negative) {
+ end = matches[next_positive].start;
+ break;
+ }
+ }
+
+ /* try matching negative messages within identified span */
+ for (j = i; j < next_positive; j++) {
+ next = start;
+ match = match_msg(msg, &next);
+ if (match && next <= end) {
+ matches[j].start = match;
+ matches[j].end = next;
+ }
+ }
+
+ /* -1 to account for i++ */
+ i = next_positive - 1;
+ }
+}
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force))
+{
+ struct match matches[msgs->cnt];
+ struct match *prev_match = NULL;
+ int i, j;
+
+ memset(matches, 0, sizeof(*matches) * msgs->cnt);
+ match_positive_msgs(log_buf, msgs, matches);
+ match_negative_msgs(log_buf, msgs, matches);
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+ struct match *match = &matches[i];
+ const char *pat_status;
+ bool unexpected;
+ bool wrong_line;
+ bool no_match;
+
+ no_match = !msg->negative && !match->start;
+ wrong_line = !msg->negative &&
+ msg->on_next_line &&
+ prev_match && prev_match->line + 1 != match->line;
+ unexpected = msg->negative && match->start;
+ if (no_match || wrong_line || unexpected) {
+ PRINT_FAIL("expect_msg\n");
+ if (env.verbosity == VERBOSE_NONE)
+ emit_fn(log_buf, true /*force*/);
+ for (j = 0; j <= i; j++) {
+ msg = &msgs->patterns[j];
+ if (j < i)
+ pat_status = "MATCHED ";
+ else if (wrong_line)
+ pat_status = "WRONG LINE";
+ else if (no_match)
+ pat_status = "EXPECTED ";
+ else
+ pat_status = "UNEXPECTED";
+ msg = &msgs->patterns[j];
+ fprintf(stderr, "%s %s: '%s'\n",
+ pat_status,
+ msg->is_regex ? " REGEX" : "SUBSTR",
+ msg->substr);
+ }
+ if (wrong_line) {
+ fprintf(stderr,
+ "expecting match at line %d, actual match is at line %d\n",
+ prev_match->line + 1, match->line);
+ }
+ break;
+ }
+
+ if (!msg->negative)
+ prev_match = match;
+ }
+}
+
+struct cap_state {
+ __u64 old_caps;
+ bool initialized;
+};
+
+static int drop_capabilities(struct cap_state *caps)
+{
+ const __u64 caps_to_drop = (1ULL << CAP_SYS_ADMIN | 1ULL << CAP_NET_ADMIN |
+ 1ULL << CAP_PERFMON | 1ULL << CAP_BPF);
+ int err;
+
+ err = cap_disable_effective(caps_to_drop, &caps->old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err));
+ return err;
+ }
+
+ caps->initialized = true;
+ return 0;
+}
+
+static int restore_capabilities(struct cap_state *caps)
+{
+ int err;
+
+ if (!caps->initialized)
+ return 0;
+
+ err = cap_enable_effective(caps->old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err));
+ caps->initialized = false;
+ return err;
+}
+
+static bool can_execute_unpriv(struct test_loader *tester, struct test_spec *spec)
+{
+ if (sysctl_unpriv_disabled < 0)
+ sysctl_unpriv_disabled = get_unpriv_disabled() ? 1 : 0;
+ if (sysctl_unpriv_disabled)
+ return false;
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+ return true;
+}
+
+static bool is_unpriv_capable_map(struct bpf_map *map)
+{
+ enum bpf_map_type type;
+ __u32 flags;
+
+ type = bpf_map__type(map);
+
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ flags = bpf_map__map_flags(map);
+ return !(flags & BPF_F_ZERO_SEED);
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_USER_RINGBUF:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
+{
+ __u8 tmp_out[TEST_DATA_LEN << 2] = {};
+ __u8 tmp_in[TEST_DATA_LEN] = {};
+ struct __sk_buff ctx = {};
+ int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = tmp_in,
+ .data_size_in = sizeof(tmp_in),
+ .data_out = tmp_out,
+ .data_size_out = sizeof(tmp_out),
+ .repeat = 1,
+ );
+
+ if (linear_sz) {
+ ctx.data_end = linear_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ }
+
+ if (empty_opts) {
+ memset(&topts, 0, sizeof(struct bpf_test_run_opts));
+ topts.sz = sizeof(struct bpf_test_run_opts);
+ }
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
+ saved_errno = errno;
+
+ if (err) {
+ PRINT_FAIL("FAIL: Unexpected bpf_prog_test_run error: %d (%s) ",
+ saved_errno, strerror(saved_errno));
+ return err;
+ }
+
+ ASSERT_OK(0, "bpf_prog_test_run");
+ *retval = topts.retval;
+
+ return 0;
+}
+
+static bool should_do_test_run(struct test_spec *spec, struct test_subspec *subspec)
+{
+ if (!subspec->execute)
+ return false;
+
+ if (subspec->expect_failure)
+ return false;
+
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS) {
+ if (env.verbosity != VERBOSE_NONE)
+ printf("alignment prevents execution\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* Get a disassembly of BPF program after verifier applies all rewrites */
+static int get_xlated_program_text(int prog_fd, char *text, size_t text_sz)
+{
+ struct bpf_insn *insn_start = NULL, *insn, *insn_end;
+ __u32 insns_cnt = 0, i;
+ char buf[64];
+ FILE *out = NULL;
+ int err;
+
+ err = get_xlated_program(prog_fd, &insn_start, &insns_cnt);
+ if (!ASSERT_OK(err, "get_xlated_program"))
+ goto out;
+ out = fmemopen(text, text_sz, "w");
+ if (!ASSERT_OK_PTR(out, "open_memstream"))
+ goto out;
+ insn_end = insn_start + insns_cnt;
+ insn = insn_start;
+ while (insn < insn_end) {
+ i = insn - insn_start;
+ insn = disasm_insn(insn, buf, sizeof(buf));
+ fprintf(out, "%d: %s\n", i, buf);
+ }
+ fflush(out);
+
+out:
+ free(insn_start);
+ if (out)
+ fclose(out);
+ return err;
+}
+
+/* Read the bpf stream corresponding to the stream_id */
+static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
+{
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret;
+
+ ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ text[ret] = '\0';
+
+ return ret;
+}
+
+/* this function is forced noinline and has short generic name to look better
+ * in test_progs output (in case of a failure)
+ */
+static noinline
+void run_subtest(struct test_loader *tester,
+ struct bpf_object_open_opts *open_opts,
+ const void *obj_bytes,
+ size_t obj_byte_cnt,
+ struct test_spec *specs,
+ struct test_spec *spec,
+ bool unpriv)
+{
+ struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ int current_runtime = is_jit_enabled() ? JITED : NO_JITED;
+ struct bpf_program *tprog = NULL, *tprog_iter;
+ struct bpf_link *link, *links[32] = {};
+ struct test_spec *spec_iter;
+ struct cap_state caps = {};
+ struct bpf_object *tobj;
+ struct bpf_map *map;
+ int retval, err, i;
+ int links_cnt = 0;
+ bool should_load;
+
+ if (!test__start_subtest(subspec->name))
+ return;
+
+ if ((get_current_arch() & spec->arch_mask) == 0) {
+ test__skip();
+ return;
+ }
+
+ if ((current_runtime & spec->load_mask) == 0) {
+ test__skip();
+ return;
+ }
+
+ if (unpriv) {
+ if (!can_execute_unpriv(tester, spec)) {
+ test__skip();
+ test__end_subtest();
+ return;
+ }
+ if (drop_capabilities(&caps)) {
+ test__end_subtest();
+ return;
+ }
+ if (subspec->caps) {
+ err = cap_enable_effective(subspec->caps, NULL);
+ if (err) {
+ PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err));
+ goto subtest_cleanup;
+ }
+ }
+ }
+
+ /* Implicitly reset to NULL if next test case doesn't specify */
+ open_opts->btf_custom_path = spec->btf_custom_path;
+
+ tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts);
+ if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
+ goto subtest_cleanup;
+
+ i = 0;
+ bpf_object__for_each_program(tprog_iter, tobj) {
+ spec_iter = &specs[i++];
+ should_load = false;
+
+ if (spec_iter->valid) {
+ if (strcmp(bpf_program__name(tprog_iter), spec->prog_name) == 0) {
+ tprog = tprog_iter;
+ should_load = true;
+ }
+
+ if (spec_iter->auxiliary &&
+ spec_iter->mode_mask & (unpriv ? UNPRIV : PRIV))
+ should_load = true;
+ }
+
+ bpf_program__set_autoload(tprog_iter, should_load);
+ }
+
+ prepare_case(tester, spec, tobj, tprog);
+
+ /* By default bpf_object__load() automatically creates all
+ * maps declared in the skeleton. Some map types are only
+ * allowed in priv mode. Disable autoload for such maps in
+ * unpriv mode.
+ */
+ bpf_object__for_each_map(map, tobj)
+ bpf_map__set_autocreate(map, !unpriv || is_unpriv_capable_map(map));
+
+ err = bpf_object__load(tobj);
+ if (subspec->expect_failure) {
+ if (!ASSERT_ERR(err, "unexpected_load_success")) {
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ goto tobj_cleanup;
+ }
+ } else {
+ if (!ASSERT_OK(err, "unexpected_load_failure")) {
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ goto tobj_cleanup;
+ }
+ }
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
+
+ /* Restore capabilities because the kernel will silently ignore requests
+ * for program info (such as xlated program text) if we are not
+ * bpf-capable. Also, for some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
+ if (subspec->expect_xlated.cnt) {
+ err = get_xlated_program_text(bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err)
+ goto tobj_cleanup;
+ emit_xlated(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->expect_xlated, emit_xlated);
+ }
+
+ if (subspec->jited.cnt) {
+ err = get_jited_program_text(bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err == -EOPNOTSUPP) {
+ printf("%s:SKIP: jited programs disassembly is not supported,\n", __func__);
+ printf("%s:SKIP: tests are built w/o LLVM development libs\n", __func__);
+ test__skip();
+ goto tobj_cleanup;
+ }
+ if (!ASSERT_EQ(err, 0, "get_jited_program_text"))
+ goto tobj_cleanup;
+ emit_jited(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->jited, emit_jited);
+ }
+
+ if (should_do_test_run(spec, subspec)) {
+ /* Do bpf_map__attach_struct_ops() for each struct_ops map.
+ * This should trigger bpf_struct_ops->reg callback on kernel side.
+ */
+ bpf_object__for_each_map(map, tobj) {
+ if (!bpf_map__autocreate(map) ||
+ bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+ if (links_cnt >= ARRAY_SIZE(links)) {
+ PRINT_FAIL("too many struct_ops maps");
+ goto tobj_cleanup;
+ }
+ link = bpf_map__attach_struct_ops(map);
+ if (!link) {
+ PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n",
+ bpf_map__name(map), -errno);
+ goto tobj_cleanup;
+ }
+ links[links_cnt++] = link;
+ }
+
+ if (tester->pre_execution_cb) {
+ err = tester->pre_execution_cb(tobj);
+ if (err) {
+ PRINT_FAIL("pre_execution_cb failed: %d\n", err);
+ goto tobj_cleanup;
+ }
+ }
+
+ err = do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
+ spec->linear_sz);
+ if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
+ goto tobj_cleanup;
+ }
+
+ if (subspec->stderr.cnt) {
+ err = get_stream(2, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stderr(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
+ }
+
+ if (subspec->stdout.cnt) {
+ err = get_stream(1, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stdout(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
+ }
+
+ /* redo bpf_map__attach_struct_ops for each test */
+ while (links_cnt > 0)
+ bpf_link__destroy(links[--links_cnt]);
+ }
+
+tobj_cleanup:
+ while (links_cnt > 0)
+ bpf_link__destroy(links[--links_cnt]);
+ bpf_object__close(tobj);
+subtest_cleanup:
+ test__end_subtest();
+ restore_capabilities(&caps);
+}
+
+static void process_subtest(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
+ struct test_spec *specs = NULL;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ const void *obj_bytes;
+ int err, i, nr_progs;
+ size_t obj_byte_cnt;
+
+ if (tester_init(tester) < 0)
+ return; /* failed to initialize tester */
+
+ obj_bytes = elf_bytes_factory(&obj_byte_cnt);
+ obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
+ return;
+
+ nr_progs = 0;
+ bpf_object__for_each_program(prog, obj)
+ ++nr_progs;
+
+ specs = calloc(nr_progs, sizeof(struct test_spec));
+ if (!ASSERT_OK_PTR(specs, "specs_alloc"))
+ return;
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ /* ignore tests for which we can't derive test specification */
+ err = parse_test_spec(tester, obj, prog, &specs[i++]);
+ if (err)
+ PRINT_FAIL("Can't parse test spec for program '%s'\n",
+ bpf_program__name(prog));
+ }
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ struct test_spec *spec = &specs[i++];
+
+ if (!spec->valid || spec->auxiliary)
+ continue;
+
+ if (spec->mode_mask & PRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt,
+ specs, spec, false);
+ if (spec->mode_mask & UNPRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt,
+ specs, spec, true);
+
+ }
+
+ for (i = 0; i < nr_progs; ++i)
+ free_test_spec(&specs[i]);
+ free(specs);
+ bpf_object__close(obj);
+}
+
+void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ /* see comment in run_subtest() for why we do this function nesting */
+ process_subtest(tester, skel_name, elf_bytes_factory);
+}
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..0921939532c6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,7 +18,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "../../../include/linux/filter.h"
#define LOCAL_FREE_TARGET (128)
@@ -28,13 +27,14 @@ static int nr_cpus;
static int create_map(int map_type, int map_flags, unsigned int size)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);
int map_fd;
- map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
- sizeof(unsigned long long), size, map_flags);
+ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, &opts);
if (map_fd == -1)
- perror("bpf_create_map");
+ perror("bpf_map_create");
return map_fd;
}
@@ -42,8 +42,6 @@ static int create_map(int map_type, int map_flags, unsigned int size)
static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
void *value)
{
- struct bpf_load_program_attr prog;
- struct bpf_create_map_attr map;
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0),
BPF_LD_MAP_FD(BPF_REG_1, fd),
@@ -62,35 +60,26 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
};
__u8 data[64] = {};
int mfd, pfd, ret, zero = 0;
- __u32 retval = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = sizeof(data),
+ .repeat = 1,
+ );
- memset(&map, 0, sizeof(map));
- map.map_type = BPF_MAP_TYPE_ARRAY;
- map.key_size = sizeof(int);
- map.value_size = sizeof(unsigned long long);
- map.max_entries = 1;
-
- mfd = bpf_create_map_xattr(&map);
+ mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
if (mfd < 0)
return -1;
insns[0].imm = mfd;
- memset(&prog, 0, sizeof(prog));
- prog.prog_type = BPF_PROG_TYPE_SCHED_CLS;
- prog.insns = insns;
- prog.insns_cnt = ARRAY_SIZE(insns);
- prog.license = "GPL";
-
- pfd = bpf_load_program_xattr(&prog, NULL, 0);
+ pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL);
if (pfd < 0) {
close(mfd);
return -1;
}
- ret = bpf_prog_test_run(pfd, 1, data, sizeof(data),
- NULL, NULL, &retval, NULL);
- if (ret < 0 || retval != 42) {
+ ret = bpf_prog_test_run_opts(pfd, &topts);
+ if (ret < 0 || topts.retval != 42) {
ret = -1;
} else {
assert(!bpf_map_lookup_elem(mfd, &zero, value));
@@ -137,7 +126,8 @@ static int sched_next_online(int pid, int *next_to_try)
while (next < nr_cpus) {
CPU_ZERO(&cpuset);
- CPU_SET(next++, &cpuset);
+ CPU_SET(next, &cpuset);
+ next++;
if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) {
ret = 0;
break;
@@ -148,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try)
return ret;
}
+/* Derive target_free from map_size, same as bpf_common_lru_populate */
+static unsigned int __tgt_size(unsigned int map_size)
+{
+ return (map_size / nr_cpus) / 2;
+}
+
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+ return tgt_free * nr_cpus * 2;
+}
+
/* Size of the LRU map is 2
* Add key=1 (+1 key)
* Add key=2 (+1 key)
@@ -186,24 +188,20 @@ static void test_lru_sanity0(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
- errno == EINVAL);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL);
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -211,8 +209,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -228,8 +225,15 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
+
+ /* lookup elem key=1 and delete it, then check it doesn't exist */
+ key = 1;
+ assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+ assert(value[0] == 1234);
+
+ /* remove the same element from the expected map */
+ assert(!bpf_map_delete_elem(expected_map_fd, &key));
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -239,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
printf("Pass\n");
}
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
*/
static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -265,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -274,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup 1 to tgt_free/2 */
+ /* Lookup 1 to batch_size */
end_key = 1 + batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -288,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
BPF_NOEXIST));
}
- /* Insert 1+tgt_free to 2*tgt_free
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ /* Insert another map_size - batch_size keys
+ * Map will contain 1 to batch_size plus these latest, i.e.,
+ * => previous 1+batch_size to map_size - batch_size will have been
* removed by LRU
*/
- key = 1 + tgt_free;
- end_key = key + tgt_free;
+ key = 1 + __map_size(tgt_free);
+ end_key = key + __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -309,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- * => The original 1 to tgt_free/2 will be removed due to
- * the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- * => Key 1+tgt_free to tgt_free*3/2
- * will be removed from LRU because it has never
- * been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
*/
static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -342,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -351,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -365,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
* shrink the inactive list to get tgt_free
* number of free nodes.
*
- * Hence, the oldest key 1 to tgt_free/2
- * are removed from the LRU list.
+ * Hence, the oldest key is removed from the LRU list.
*/
key = 1;
if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -378,14 +373,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
BPF_EXIST));
}
- /* Re-insert 1 to tgt_free/2 again and do a lookup
- * immeidately.
+ /* Re-insert 1 to batch_size again and do a lookup immediately.
*/
end_key = 1 + batch_size;
value[0] = 4321;
for (key = 1; key < end_key; key++) {
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -396,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1+tgt_free to tgt_free*3/2 */
- end_key = 1 + tgt_free + batch_size;
- for (key = 1 + tgt_free; key < end_key; key++)
+ /* Insert batch_size new elements */
+ key = 1 + __map_size(tgt_free);
+ end_key = key + batch_size;
+ for (; key < end_key; key++)
/* These newly added but not referenced keys will be
* gone during the next LRU shrink.
*/
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
- end_key = key + tgt_free;
+ /* Insert map_size - batch_size elements */
+ end_key += __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -422,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map is 2*tgt_free
- * It is to test the active/inactive list rotation
- * Insert 1 to 2*tgt_free (+2*tgt_free keys)
- * Lookup key 1 to tgt_free*3/2
- * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
- * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+/* Test the active/inactive list rotation
+ *
+ * Fill the whole map, deplete the free list.
+ * Reference all except the last lru->target_free elements.
+ * Insert lru->target_free new elements. This triggers one shrink.
+ * Verify that the non-referenced elements are replaced.
*/
static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -446,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
assert(sched_next_online(0, &next_cpu) != -1);
- batch_size = tgt_free / 2;
- assert(batch_size * 2 == tgt_free);
+ batch_size = __tgt_size(tgt_free);
map_size = tgt_free * 2;
lru_map_fd = create_map(map_type, map_flags, map_size);
@@ -458,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
- end_key = 1 + (2 * tgt_free);
+ /* Fill the map */
+ end_key = 1 + map_size;
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup key 1 to tgt_free*3/2 */
- end_key = tgt_free + batch_size;
+ /* Reference all but the last batch_size */
+ end_key = 1 + map_size - batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
BPF_NOEXIST));
}
- /* Add 1+2*tgt_free to tgt_free*5/2
- * (+tgt_free/2 keys)
- */
+ /* Insert new batch_size: replaces the non-referenced elements */
key = 2 * tgt_free + 1;
end_key = key + batch_size;
for (; key < end_key; key++) {
@@ -509,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
lru_map_fd = create_map(map_type, map_flags,
3 * tgt_free * nr_cpus);
else
- lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * __map_size(tgt_free));
assert(lru_map_fd != -1);
expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
@@ -565,8 +557,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value));
/* Cannot find the last key because it was removed by LRU */
- assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT);
}
/* Test map with only one element */
@@ -714,21 +705,18 @@ static void test_lru_sanity7(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -736,8 +724,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -760,8 +747,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -808,21 +794,18 @@ static void test_lru_sanity8(int map_type, int map_flags)
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
@@ -832,8 +815,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and do _not_ mark ref bit.
* this will be evicted on next update.
@@ -856,8 +838,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* key=1 has been removed from the LRU */
key = 1;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -880,11 +861,14 @@ int main(int argc, char **argv)
assert(nr_cpus != -1);
printf("nr_cpus:%d\n\n", nr_cpus);
- for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
- for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+ for (t = 0; t < ARRAY_SIZE(map_types); t++) {
test_lru_sanity0(map_types[t], map_flags[f]);
test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
deleted file mode 100755
index 59ea56945e6c..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ /dev/null
@@ -1,464 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Setup/topology:
-#
-# NS1 NS2 NS3
-# veth1 <---> veth2 veth3 <---> veth4 (the top route)
-# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
-#
-# each vethN gets IPv[4|6]_N address
-#
-# IPv*_SRC = IPv*_1
-# IPv*_DST = IPv*_4
-#
-# all tests test pings from IPv*_SRC to IPv*_DST
-#
-# by default, routes are configured to allow packets to go
-# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
-#
-# a GRE device is installed in NS3 with IPv*_GRE, and
-# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
-# (the bottom route)
-#
-# Tests:
-#
-# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
-# from IP*_SRC to IP*_DST can work is via IPv*_GRE
-#
-# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-#
-# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-
-readonly IPv4_1="172.16.1.100"
-readonly IPv4_2="172.16.2.100"
-readonly IPv4_3="172.16.3.100"
-readonly IPv4_4="172.16.4.100"
-readonly IPv4_5="172.16.5.100"
-readonly IPv4_6="172.16.6.100"
-readonly IPv4_7="172.16.7.100"
-readonly IPv4_8="172.16.8.100"
-readonly IPv4_GRE="172.16.16.100"
-
-readonly IPv4_SRC=$IPv4_1
-readonly IPv4_DST=$IPv4_4
-
-readonly IPv6_1="fb01::1"
-readonly IPv6_2="fb02::1"
-readonly IPv6_3="fb03::1"
-readonly IPv6_4="fb04::1"
-readonly IPv6_5="fb05::1"
-readonly IPv6_6="fb06::1"
-readonly IPv6_7="fb07::1"
-readonly IPv6_8="fb08::1"
-readonly IPv6_GRE="fb10::1"
-
-readonly IPv6_SRC=$IPv6_1
-readonly IPv6_DST=$IPv6_4
-
-TEST_STATUS=0
-TESTS_SUCCEEDED=0
-TESTS_FAILED=0
-
-TMPFILE=""
-
-process_test_results()
-{
- if [[ "${TEST_STATUS}" -eq 0 ]] ; then
- echo "PASS"
- TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
- else
- echo "FAIL"
- TESTS_FAILED=$((TESTS_FAILED+1))
- fi
-}
-
-print_test_summary_and_exit()
-{
- echo "passed tests: ${TESTS_SUCCEEDED}"
- echo "failed tests: ${TESTS_FAILED}"
- if [ "${TESTS_FAILED}" -eq "0" ] ; then
- exit 0
- else
- exit 1
- fi
-}
-
-setup()
-{
- set -e # exit on error
- TEST_STATUS=0
-
- # create devices and namespaces
- ip netns add "${NS1}"
- ip netns add "${NS2}"
- ip netns add "${NS3}"
-
- ip link add veth1 type veth peer name veth2
- ip link add veth3 type veth peer name veth4
- ip link add veth5 type veth peer name veth6
- ip link add veth7 type veth peer name veth8
-
- ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip link set veth1 netns ${NS1}
- ip link set veth2 netns ${NS2}
- ip link set veth3 netns ${NS2}
- ip link set veth4 netns ${NS3}
- ip link set veth5 netns ${NS1}
- ip link set veth6 netns ${NS2}
- ip link set veth7 netns ${NS2}
- ip link set veth8 netns ${NS3}
-
- if [ ! -z "${VRF}" ] ; then
- ip -netns ${NS1} link add red type vrf table 1001
- ip -netns ${NS1} link set red up
- ip -netns ${NS1} route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} link set veth1 vrf red
- ip -netns ${NS1} link set veth5 vrf red
-
- ip -netns ${NS2} link add red type vrf table 1001
- ip -netns ${NS2} link set red up
- ip -netns ${NS2} route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} link set veth2 vrf red
- ip -netns ${NS2} link set veth3 vrf red
- ip -netns ${NS2} link set veth6 vrf red
- ip -netns ${NS2} link set veth7 vrf red
- fi
-
- # configure addesses: the top route (1-2-3-4)
- ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
- ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
- ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
- ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
- ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
- ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
- ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
- ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
-
- # configure addresses: the bottom route (5-6-7-8)
- ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
- ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
- ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
- ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
- ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
- ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
- ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
- ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
-
- ip -netns ${NS1} link set dev veth1 up
- ip -netns ${NS2} link set dev veth2 up
- ip -netns ${NS2} link set dev veth3 up
- ip -netns ${NS3} link set dev veth4 up
- ip -netns ${NS1} link set dev veth5 up
- ip -netns ${NS2} link set dev veth6 up
- ip -netns ${NS2} link set dev veth7 up
- ip -netns ${NS3} link set dev veth8 up
-
- # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
- # the bottom route to specific bottom addresses
-
- # NS1
- # top route
- ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF}
- ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default
- ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default
- # bottom route
- ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF}
- ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
-
- # NS2
- # top route
- ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF}
- ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
- # bottom route
- ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF}
- ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
-
- # NS3
- # top route
- ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
- ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
- ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
- ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
- # bottom route
- ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
- ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
- ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
- ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
-
- # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
- ip -netns ${NS3} link set gre_dev up
- ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
- ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
-
-
- # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
- ip -netns ${NS3} link set gre6_dev up
- ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
- ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
-
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
- TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
-
- sleep 1 # reduce flakiness
- set +e
-}
-
-cleanup()
-{
- if [ -f ${TMPFILE} ] ; then
- rm ${TMPFILE}
- fi
-
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
-}
-
-trap cleanup EXIT
-
-remove_routes_to_gredev()
-{
- ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
- ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
- ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
-}
-
-add_unreachable_routes_to_gredev()
-{
- ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
- ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-}
-
-test_ping()
-{
- local readonly PROTO=$1
- local readonly EXPECTED=$2
- local RET=0
-
- if [ "${PROTO}" == "IPv4" ] ; then
- ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
- RET=$?
- elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
- RET=$?
- else
- echo " test_ping: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
-
- if [ "0" != "${RET}" ]; then
- RET=1
- fi
-
- if [ "${EXPECTED}" != "${RET}" ] ; then
- echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
- TEST_STATUS=1
- fi
-}
-
-test_gso()
-{
- local readonly PROTO=$1
- local readonly PKT_SZ=5000
- local IP_DST=""
- : > ${TMPFILE} # trim the capture file
-
- # check that nc is present
- command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available: skipping TSO tests"; return; }
-
- # listen on port 9000, capture TCP into $TMPFILE
- if [ "${PROTO}" == "IPv4" ] ; then
- IP_DST=${IPv4_DST}
- ip netns exec ${NS3} bash -c \
- "nc -4 -l -p 9000 > ${TMPFILE} &"
- elif [ "${PROTO}" == "IPv6" ] ; then
- IP_DST=${IPv6_DST}
- ip netns exec ${NS3} bash -c \
- "nc -6 -l -p 9000 > ${TMPFILE} &"
- RET=$?
- else
- echo " test_gso: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
- sleep 1 # let nc start listening
-
- # send a packet larger than MTU
- ip netns exec ${NS1} bash -c \
- "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
- sleep 2 # let the packet get delivered
-
- # verify we received all expected bytes
- SZ=$(stat -c %s ${TMPFILE})
- if [ "$SZ" != "$PKT_SZ" ] ; then
- echo " test_gso failed: ${PROTO}"
- TEST_STATUS=1
- fi
-}
-
-test_egress()
-{
- local readonly ENCAP=$1
- echo "starting egress ${ENCAP} encap test ${VRF}"
- setup
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, ping fails
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
- else
- echo " unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # skip GSO tests with VRF: VRF routing needs properly assigned
- # source IP/device, which is easy to do with ping and hard with dd/nc.
- if [ -z "${VRF}" ] ; then
- test_gso IPv4
- test_gso IPv6
- fi
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-test_ingress()
-{
- local readonly ENCAP=$1
- echo "starting ingress ${ENCAP} encap test ${VRF}"
- setup
-
- # need to wait a bit for IPv6 to autoconf, otherwise
- # ping6 sometimes fails with "unable to bind to address"
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, pings fail
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
- else
- echo "FAIL: unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-VRF=""
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-VRF="vrf red"
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-print_test_summary_and_exit
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
deleted file mode 100755
index 5620919fde9e..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/bin/bash
-# Connects 6 network namespaces through veths.
-# Each NS may have different IPv6 global scope addresses :
-# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
-# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
-# fc42::1 fd00::4
-#
-# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
-# IPv6 header with a Segment Routing Header, with segments :
-# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
-#
-# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
-# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
-# - fd00::2 : remove the TLV, change the flags, add a tag
-# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
-#
-# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
-# Each End.BPF action will validate the operations applied on the SRH by the
-# previous BPF program in the chain, otherwise the packet is dropped.
-#
-# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
-# datagram can be read on NS6 when binding to fb00::6.
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
- echo $msg please run this as root >&2
- exit $ksft_skip
-fi
-
-TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_lwt_seg6local [PASS]";
- else
- echo "selftests: test_lwt_seg6local [FAILED]";
- fi
-
- set +e
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
- ip netns del ns4 2> /dev/null
- ip netns del ns5 2> /dev/null
- ip netns del ns6 2> /dev/null
- rm -f $TMP_FILE
-}
-
-set -e
-
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-ip netns add ns4
-ip netns add ns5
-ip netns add ns6
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-ip link add veth3 type veth peer name veth4
-ip link add veth5 type veth peer name veth6
-ip link add veth7 type veth peer name veth8
-ip link add veth9 type veth peer name veth10
-
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-ip link set veth3 netns ns2
-ip link set veth4 netns ns3
-ip link set veth5 netns ns3
-ip link set veth6 netns ns4
-ip link set veth7 netns ns4
-ip link set veth8 netns ns5
-ip link set veth9 netns ns5
-ip link set veth10 netns ns6
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-ip netns exec ns2 ip link set dev veth3 up
-ip netns exec ns3 ip link set dev veth4 up
-ip netns exec ns3 ip link set dev veth5 up
-ip netns exec ns4 ip link set dev veth6 up
-ip netns exec ns4 ip link set dev veth7 up
-ip netns exec ns5 ip link set dev veth8 up
-ip netns exec ns5 ip link set dev veth9 up
-ip netns exec ns6 ip link set dev veth10 up
-ip netns exec ns6 ip link set dev lo up
-
-# All link scope addresses and routes required between veths
-ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
-ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
-ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
-
-ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
-ip netns exec ns4 ip -6 addr add fc42::1 dev lo
-ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
-
-ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
-ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
-sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -TERM $!
-
-if [[ $(< $TMP_FILE) != "foobar" ]]; then
- exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..ccc5acd55ff9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -23,24 +23,19 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "test_maps.h"
+#include "testing_helpers.h"
-#ifndef ENOTSUPP
-#define ENOTSUPP 524
-#endif
+int skips;
-static int skips;
-
-static int map_flags;
+static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) };
static void test_hashmap(unsigned int task, void *data)
{
long long key, next_key, first_key, value;
int fd;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -53,23 +48,30 @@ static void test_hashmap(unsigned int task, void *data)
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
+ value = 1234;
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 matches the value and delete it */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -80,7 +82,7 @@ static void test_hashmap(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
@@ -89,12 +91,12 @@ static void test_hashmap(unsigned int task, void *data)
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
key = 3;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +106,7 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete both elements. */
@@ -112,13 +114,13 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -130,8 +132,7 @@ static void test_hashmap_sizes(unsigned int task, void *data)
for (i = 1; i <= 512; i <<= 1)
for (j = 1; j <= 1 << 18; j <<= 1) {
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts);
if (fd < 0) {
if (errno == ENOMEM)
return;
@@ -152,8 +153,8 @@ static void test_hashmap_percpu(unsigned int task, void *data)
int expected_key_mask = 0;
int fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
- sizeof(bpf_percpu(value, 0)), 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key),
+ sizeof(bpf_percpu(value, 0)), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -166,15 +167,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+ /* Lookup and delete elem key=1 and check value. */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value,0) == 100);
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value,i) = i + 100;
+
+ /* Insert key=1 element which should not exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +197,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
key = 2;
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -202,11 +213,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,32 +248,33 @@ static void test_hashmap_percpu(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
}
+#define VALUE_SIZE 3
static int helper_fill_hashmap(int max_entries)
{
int i, fd, ret;
- long long key, value;
+ long long key, value[VALUE_SIZE] = {};
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- max_entries, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ max_entries, &map_opts);
CHECK(fd < 0,
"failed to create hashmap",
- "err: %s, flags: 0x%x\n", strerror(errno), map_flags);
+ "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags);
for (i = 0; i < max_entries; i++) {
- key = i; value = key;
- ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ key = i; value[0] = key;
+ ret = bpf_map_update_elem(fd, &key, value, BPF_NOEXIST);
CHECK(ret != 0,
"can't update hashmap",
"err: %s\n", strerror(ret));
@@ -273,8 +285,8 @@ static int helper_fill_hashmap(int max_entries)
static void test_hashmap_walk(unsigned int task, void *data)
{
- int fd, i, max_entries = 1000;
- long long key, value, next_key;
+ int fd, i, max_entries = 10000;
+ long long key, value[VALUE_SIZE], next_key;
bool next_key_valid = true;
fd = helper_fill_hashmap(max_entries);
@@ -282,7 +294,7 @@ static void test_hashmap_walk(unsigned int task, void *data)
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
}
assert(i == max_entries);
@@ -290,9 +302,9 @@ static void test_hashmap_walk(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
for (i = 0; next_key_valid; i++) {
next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
- value++;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+ value[0]++;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
key = next_key;
}
@@ -301,8 +313,8 @@ static void test_hashmap_walk(unsigned int task, void *data)
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
- assert(value - 1 == key);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+ assert(value[0] - 1 == key);
}
assert(i == max_entries);
@@ -314,8 +326,8 @@ static void test_hashmap_zero_seed(void)
int i, first, second, old_flags;
long long key, next_first, next_second;
- old_flags = map_flags;
- map_flags |= BPF_F_ZERO_SEED;
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_ZERO_SEED;
first = helper_fill_hashmap(3);
second = helper_fill_hashmap(3);
@@ -337,7 +349,7 @@ static void test_hashmap_zero_seed(void)
key = next_first;
}
- map_flags = old_flags;
+ map_opts.map_flags = old_flags;
close(first);
close(second);
}
@@ -347,8 +359,7 @@ static void test_arraymap(unsigned int task, void *data)
int key, next_key, fd;
long long value;
- fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -360,7 +371,7 @@ static void test_arraymap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -374,11 +385,11 @@ static void test_arraymap(unsigned int task, void *data)
* due to max_entries limit.
*/
key = 2;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +398,12 @@ static void test_arraymap(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -403,8 +414,8 @@ static void test_arraymap_percpu(unsigned int task, void *data)
BPF_DECLARE_PERCPU(long, values);
int key, next_key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -418,7 +429,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
- assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -433,11 +444,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
- assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +457,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -466,8 +477,8 @@ static void test_arraymap_percpu_many_keys(void)
unsigned int nr_keys = 2000;
int key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), nr_keys, NULL);
if (fd < 0) {
printf("Failed to create per-cpu arraymap '%s'!\n",
strerror(errno));
@@ -498,8 +509,7 @@ static void test_devmap(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap '%s'!\n", strerror(errno));
exit(1);
@@ -513,8 +523,7 @@ static void test_devmap_hash(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
exit(1);
@@ -526,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data)
static void test_queuemap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
@@ -534,14 +543,12 @@ static void test_queuemap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Queue map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -555,7 +562,7 @@ static void test_queuemap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -571,12 +578,12 @@ static void test_queuemap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -584,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data)
static void test_stackmap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
@@ -592,14 +599,12 @@ static void test_stackmap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Stack map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -613,7 +618,7 @@ static void test_stackmap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -629,12 +634,12 @@ static void test_stackmap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -643,20 +648,20 @@ static void test_stackmap(unsigned int task, void *data)
#include <arpa/inet.h>
#include <sys/select.h>
#include <linux/err.h>
-#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
-#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
-#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.bpf.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.bpf.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.bpf.o"
static void test_sockmap(unsigned int tasks, void *data)
{
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+ struct bpf_object *parse_obj, *verdict_obj, *msg_obj;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog, msg_prog;
struct sockaddr_in addr;
int one = 1, s, sc, rc;
- struct bpf_object *obj;
struct timeval to;
__u32 key, value;
pid_t pid[tasks];
@@ -726,11 +731,11 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Test sockmap with connected sockets */
- fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+ fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL,
sizeof(key), sizeof(value),
- 6, 0);
+ 6, NULL);
if (fd < 0) {
- if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+ if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) {
printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
__func__);
skips++;
@@ -747,11 +752,12 @@ static void test_sockmap(unsigned int tasks, void *data)
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
+ close(udp);
/* Test update without programs */
for (i = 0; i < 6; i++) {
@@ -813,29 +819,29 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Load SK_SKB program and Attach */
- err = bpf_prog_load(SOCKMAP_PARSE_PROG,
- BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+ err = bpf_prog_test_load(SOCKMAP_PARSE_PROG,
+ BPF_PROG_TYPE_SK_SKB, &parse_obj, &parse_prog);
if (err) {
printf("Failed to load SK_SKB parse prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
- BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+ err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG,
+ BPF_PROG_TYPE_SK_MSG, &msg_obj, &msg_prog);
if (err) {
printf("Failed to load SK_SKB msg prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
- BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+ err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG,
+ BPF_PROG_TYPE_SK_SKB, &verdict_obj, &verdict_prog);
if (err) {
printf("Failed to load SK_SKB verdict prog\n");
goto out_sockmap;
}
- bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
- if (IS_ERR(bpf_map_rx)) {
+ bpf_map_rx = bpf_object__find_map_by_name(verdict_obj, "sock_map_rx");
+ if (!bpf_map_rx) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
@@ -846,8 +852,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
- if (IS_ERR(bpf_map_tx)) {
+ bpf_map_tx = bpf_object__find_map_by_name(verdict_obj, "sock_map_tx");
+ if (!bpf_map_tx) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -858,8 +864,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
- if (IS_ERR(bpf_map_msg)) {
+ bpf_map_msg = bpf_object__find_map_by_name(verdict_obj, "sock_map_msg");
+ if (!bpf_map_msg) {
printf("Failed to load map msg from msg_verdict prog\n");
goto out_sockmap;
}
@@ -870,8 +876,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
- if (IS_ERR(bpf_map_break)) {
+ bpf_map_break = bpf_object__find_map_by_name(verdict_obj, "sock_map_break");
+ if (!bpf_map_break) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -968,7 +974,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1116,7 +1122,9 @@ static void test_sockmap(unsigned int tasks, void *data)
}
close(fd);
close(map_fd_rx);
- bpf_object__close(obj);
+ bpf_object__close(parse_obj);
+ bpf_object__close(msg_obj);
+ bpf_object__close(verdict_obj);
return;
out:
for (i = 0; i < 6; i++)
@@ -1135,25 +1143,29 @@ out_sockmap:
exit(1);
}
-#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_PROG "./test_map_in_map.bpf.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.bpf.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1164,7 +1176,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1174,10 +1186,14 @@ static void test_map_in_map(void)
goto out_map_in_map;
}
- bpf_object__load(obj);
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("Failed to load test prog\n");
+ goto out_map_in_map;
+ }
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1194,7 +1210,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1211,11 +1227,75 @@ static void test_map_in_map(void)
}
close(fd);
+ fd = -1;
+ bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_map_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+
+ close(fd);
+ }
+
bpf_object__close(obj);
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
@@ -1231,8 +1311,8 @@ static void test_map_large(void)
} key;
int fd, i, value;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create large map '%s'!\n", strerror(errno));
exit(1);
@@ -1246,7 +1326,7 @@ static void test_map_large(void)
}
key.c = -1;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Iterate through all elements. */
@@ -1254,12 +1334,12 @@ static void test_map_large(void)
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1298,30 +1378,44 @@ static void __run_parallel(unsigned int tasks,
static void test_map_stress(void)
{
+ run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_hashmap, NULL);
run_parallel(100, test_hashmap_percpu, NULL);
run_parallel(100, test_hashmap_sizes, NULL);
- run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_arraymap, NULL);
run_parallel(100, test_arraymap_percpu, NULL);
}
-#define TASKS 1024
+#define TASKS 100
#define DO_UPDATE 1
#define DO_DELETE 0
#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
+
+static bool can_retry(int err)
+{
+ return (err == EAGAIN || err == EBUSY ||
+ ((err == ENOMEM || err == E2BIG) &&
+ map_opts.map_flags == BPF_F_NO_PREALLOC));
+}
-static int map_update_retriable(int map_fd, const void *key, const void *value,
- int flags, int attempts)
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_update_elem(map_fd, key, value, flags)) {
- if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ if (!attempts || !need_retry(errno))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1330,11 +1424,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1347,15 +1446,19 @@ static void test_update_delete(unsigned int fn, void *data)
int fd = ((int *)data)[0];
int i, key, value, err;
+ if (fn & 1)
+ test_hashmap_walk(fn, NULL);
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
- err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
- err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
@@ -1370,17 +1473,18 @@ static void test_update_delete(unsigned int fn, void *data)
static void test_map_parallel(void)
{
- int i, fd, key = 0, value = 0;
+ int i, fd, key = 0, value = 0, j = 0;
int data[2];
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create map for parallel test '%s'!\n",
strerror(errno));
exit(1);
}
+again:
/* Use the same fd in children to add elements to this map:
* child_0 adds key=0, key=1024, key=2048, ...
* child_1 adds key=1, key=1025, key=2049, ...
@@ -1391,7 +1495,7 @@ static void test_map_parallel(void)
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that all elements were inserted. */
@@ -1399,7 +1503,7 @@ static void test_map_parallel(void)
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
@@ -1409,22 +1513,32 @@ static void test_map_parallel(void)
value == key);
}
- /* Now let's delete all elemenets in parallel. */
+ /* Now let's delete all elements in parallel. */
data[1] = DO_DELETE;
run_parallel(TASKS, test_update_delete, data);
/* Nothing should be left. */
key = -1;
- assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
+
+ key = 0;
+ bpf_map_delete_elem(fd, &key);
+ if (j++ < 5)
+ goto again;
+ close(fd);
}
static void test_map_rdonly(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_RDONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_RDONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for read only test '%s'!\n",
strerror(errno));
@@ -1434,12 +1548,12 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
/* Try to insert key=1 element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
errno == EPERM);
/* Check that key=1 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1447,9 +1561,13 @@ static void test_map_rdonly(void)
static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
@@ -1462,8 +1580,8 @@ static void test_map_wronly_hash(void)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that reading elements and keys from the map is not allowed. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
close(fd);
}
@@ -1471,13 +1589,17 @@ static void test_map_wronly_hash(void)
static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
{
int fd, value = 0;
+ __u32 old_flags;
+
assert(map_type == BPF_MAP_TYPE_QUEUE ||
map_type == BPF_MAP_TYPE_STACK);
- fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
- map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
/* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -1490,10 +1612,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
/* Peek element should fail */
- assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
/* Pop element should fail */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
errno == EPERM);
close(fd);
@@ -1547,7 +1669,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
value = &fd32;
}
err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update unbound sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1576,7 +1698,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
*/
err = bpf_map_update_elem(map_fd, &index0, value,
BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update non-listening sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1604,33 +1726,33 @@ static void test_reuseport_array(void)
__u32 fds_idx = 0;
int fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u64), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u64), array_size, NULL);
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
/* Test lookup/update/delete with invalid index */
err = bpf_map_delete_elem(map_fd, &bad_index);
- CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != E2BIG,
+ CHECK(err >= 0 || errno != E2BIG,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
/* Test lookup/delete non existence elem */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup not-exist elem",
"err:%d errno:%d\n", err, errno);
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array del not-exist elem",
"err:%d errno:%d\n", err, errno);
@@ -1644,7 +1766,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update empty elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1653,7 +1775,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST success case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1662,7 +1784,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST success case. */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1792,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err != -1 || errno != EEXIST,
+ CHECK(err >= 0 || errno != EEXIST,
"reuseport array update non-empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1679,7 +1801,7 @@ static void test_reuseport_array(void)
/* BPF_ANY case (always succeed) */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_ANY);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same sk with BPF_ANY",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
@@ -1688,32 +1810,32 @@ static void test_reuseport_array(void)
/* The same sk cannot be added to reuseport_array twice */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with same index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with different index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err == -1, "reuseport array delete sk",
+ CHECK(err < 0, "reuseport array delete sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Add it back with BPF_NOEXIST */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array re-add with BPF_NOEXIST after del",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
/* Test cookie */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err == -1 || sk_cookie != map_cookie,
+ CHECK(err < 0 || sk_cookie != map_cookie,
"reuseport array lookup re-added sk",
"sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1844,7 @@ static void test_reuseport_array(void)
for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
close(grpa_fds64[f]);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup after close()",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1733,7 +1855,7 @@ static void test_reuseport_array(void)
CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
"err:%d errno:%d\n", err, errno);
close(fd64);
@@ -1741,18 +1863,18 @@ static void test_reuseport_array(void)
close(map_fd);
/* Test 32 bit fd */
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u32), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u32), array_size, NULL);
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
&sk_cookie, 1);
fd = fd64;
err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
- CHECK(err == -1, "reuseport array update 32 bit fd",
+ CHECK(err < 0, "reuseport array update 32 bit fd",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOSPC,
+ CHECK(err >= 0 || errno != ENOSPC,
"reuseport array lookup 32 bit fd",
"err:%d errno:%d\n", err, errno);
close(fd);
@@ -1798,10 +1920,12 @@ int main(void)
{
srand(time(NULL));
- map_flags = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ map_opts.map_flags = 0;
run_all_tests();
- map_flags = BPF_F_NO_PREALLOC;
+ map_opts.map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
#define DEFINE_TEST(name) test_##name();
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index 77d8587ac4ed..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -14,4 +15,10 @@
} \
})
+extern int skips;
+
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry);
+
#endif
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644
index a7b9a69f4fd5..000000000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0)
- goto err;
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6396932b97e2..02a85dda30e6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,15 +3,155 @@
*/
#define _GNU_SOURCE
#include "test_progs.h"
+#include "testing_helpers.h"
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
#include <argp.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <string.h>
+#include <sys/sysinfo.h> /* get_nprocs */
+#include <netinet/in.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <linux/keyctl.h>
+#include <sys/un.h>
+#include <bpf/btf.h>
+#include <time.h>
+#include "json_writer.h"
+
+#include "network_helpers.h"
+#include "verification_cert.h"
+
+/* backtrace() and backtrace_symbols_fd() are glibc specific,
+ * use header file when glibc is available and provide stub
+ * implementations when another libc implementation is used.
+ */
+#ifdef __GLIBC__
#include <execinfo.h> /* backtrace */
-#include <linux/membarrier.h>
+#else
+__weak int backtrace(void **buffer, int size)
+{
+ return 0;
+}
+
+__weak void backtrace_symbols_fd(void *const *buffer, int size, int fd)
+{
+ dprintf(fd, "<backtrace not supported>\n");
+}
+#endif /*__GLIBC__ */
+
+int env_verbosity = 0;
+
+static bool verbose(void)
+{
+ return env.verbosity > VERBOSE_NONE;
+}
+
+static void stdio_hijack_init(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ fflush(stdout);
+ fflush(stderr);
+
+ stdout = open_memstream(log_buf, log_cnt);
+ if (!stdout) {
+ stdout = env.stdout_saved;
+ perror("open_memstream");
+ return;
+ }
+
+ if (env.subtest_state)
+ env.subtest_state->stdout_saved = stdout;
+ else
+ env.test_state->stdout_saved = stdout;
+
+ stderr = stdout;
+#endif
+}
+
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ env.stdout_saved = stdout;
+ env.stderr_saved = stderr;
+
+ stdio_hijack_init(log_buf, log_cnt);
+#endif
+}
+
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ fflush(stdout);
+
+ pthread_mutex_lock(&stdout_lock);
+
+ if (env.subtest_state) {
+ if (env.subtest_state->stdout_saved)
+ fclose(env.subtest_state->stdout_saved);
+ env.subtest_state->stdout_saved = NULL;
+ stdout = env.test_state->stdout_saved;
+ stderr = env.test_state->stdout_saved;
+ } else {
+ if (env.test_state->stdout_saved)
+ fclose(env.test_state->stdout_saved);
+ env.test_state->stdout_saved = NULL;
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
+ }
+
+ pthread_mutex_unlock(&stdout_lock);
+#endif
+}
+
+static int traffic_monitor_print_fn(const char *format, va_list args)
+{
+ pthread_mutex_lock(&stdout_lock);
+ vfprintf(stdout, format, args);
+ pthread_mutex_unlock(&stdout_lock);
+
+ return 0;
+}
+
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
#define EXIT_NO_TEST 2
#define EXIT_ERR_SETUP_INFRA 3
@@ -23,17 +163,10 @@ struct prog_test_def {
const char *test_name;
int test_num;
void (*run_test)(void);
- bool force_log;
- int error_cnt;
- int skip_cnt;
- bool tested;
+ void (*run_serial_test)(void);
+ bool should_run;
bool need_cgroup_cleanup;
-
- char *subtest_name;
- int subtest_num;
-
- /* store counts before subtest started */
- int old_error_cnt;
+ bool should_tmon;
};
/* Override C runtime library's usleep() implementation to ensure nanosleep()
@@ -50,17 +183,100 @@ int usleep(useconds_t usec)
return syscall(__NR_nanosleep, &ts, NULL);
}
+/* Watchdog timer is started by watchdog_start() and stopped by watchdog_stop().
+ * If timer is active for longer than env.secs_till_notify,
+ * it prints the name of the current test to the stderr.
+ * If timer is active for longer than env.secs_till_kill,
+ * it kills the thread executing the test by sending a SIGSEGV signal to it.
+ */
+static void watchdog_timer_func(union sigval sigval)
+{
+ struct itimerspec timeout = {};
+ char test_name[256];
+ int err;
+
+ if (env.subtest_state)
+ snprintf(test_name, sizeof(test_name), "%s/%s",
+ env.test->test_name, env.subtest_state->name);
+ else
+ snprintf(test_name, sizeof(test_name), "%s",
+ env.test->test_name);
+
+ switch (env.watchdog_state) {
+ case WD_NOTIFY:
+ fprintf(env.stderr_saved, "WATCHDOG: test case %s executes for %d seconds...\n",
+ test_name, env.secs_till_notify);
+ timeout.it_value.tv_sec = env.secs_till_kill - env.secs_till_notify;
+ env.watchdog_state = WD_KILL;
+ err = timer_settime(env.watchdog, 0, &timeout, NULL);
+ if (err)
+ fprintf(env.stderr_saved, "Failed to arm watchdog timer\n");
+ break;
+ case WD_KILL:
+ fprintf(env.stderr_saved,
+ "WATCHDOG: test case %s executes for %d seconds, terminating with SIGSEGV\n",
+ test_name, env.secs_till_kill);
+ pthread_kill(env.main_thread, SIGSEGV);
+ break;
+ }
+}
+
+static void watchdog_start(void)
+{
+ struct itimerspec timeout = {};
+ int err;
+
+ if (env.secs_till_kill == 0)
+ return;
+ if (env.secs_till_notify > 0) {
+ env.watchdog_state = WD_NOTIFY;
+ timeout.it_value.tv_sec = env.secs_till_notify;
+ } else {
+ env.watchdog_state = WD_KILL;
+ timeout.it_value.tv_sec = env.secs_till_kill;
+ }
+ err = timer_settime(env.watchdog, 0, &timeout, NULL);
+ if (err)
+ fprintf(env.stderr_saved, "Failed to start watchdog timer\n");
+}
+
+static void watchdog_stop(void)
+{
+ struct itimerspec timeout = {};
+ int err;
+
+ env.watchdog_state = WD_NOTIFY;
+ err = timer_settime(env.watchdog, 0, &timeout, NULL);
+ if (err)
+ fprintf(env.stderr_saved, "Failed to stop watchdog timer\n");
+}
+
+static void watchdog_init(void)
+{
+ struct sigevent watchdog_sev = {
+ .sigev_notify = SIGEV_THREAD,
+ .sigev_notify_function = watchdog_timer_func,
+ };
+ int err;
+
+ env.main_thread = pthread_self();
+ err = timer_create(CLOCK_MONOTONIC, &watchdog_sev, &env.watchdog);
+ if (err)
+ fprintf(stderr, "Failed to initialize watchdog timer\n");
+}
+
static bool should_run(struct test_selector *sel, int num, const char *name)
{
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.tests[i].name) &&
+ !sel->blacklist.tests[i].subtest_cnt)
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.tests[i].name))
return true;
}
@@ -70,40 +286,202 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
return num < sel->num_set_len && sel->num_set[num];
}
-static void dump_test_log(const struct prog_test_def *test, bool failed)
+static bool match_subtest(struct test_filter_set *filter,
+ const char *test_name,
+ const char *subtest_name)
{
- if (stdout == env.stdout)
- return;
+ int i, j;
- fflush(stdout); /* exports env.log_buf & env.log_cnt */
+ for (i = 0; i < filter->cnt; i++) {
+ if (glob_match(test_name, filter->tests[i].name)) {
+ if (!filter->tests[i].subtest_cnt)
+ return true;
- if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
- if (env.log_cnt) {
- env.log_buf[env.log_cnt] = '\0';
- fprintf(env.stdout, "%s", env.log_buf);
- if (env.log_buf[env.log_cnt - 1] != '\n')
- fprintf(env.stdout, "\n");
+ for (j = 0; j < filter->tests[i].subtest_cnt; j++) {
+ if (glob_match(subtest_name,
+ filter->tests[i].subtests[j]))
+ return true;
+ }
}
}
- fseeko(stdout, 0, SEEK_SET); /* rewind */
+ return false;
+}
+
+static bool should_run_subtest(struct test_selector *sel,
+ struct test_selector *subtest_sel,
+ int subtest_num,
+ const char *test_name,
+ const char *subtest_name)
+{
+ if (match_subtest(&sel->blacklist, test_name, subtest_name))
+ return false;
+
+ if (match_subtest(&sel->whitelist, test_name, subtest_name))
+ return true;
+
+ if (!sel->whitelist.cnt && !subtest_sel->num_set)
+ return true;
+
+ return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
}
-static void skip_account(void)
+static bool should_tmon(struct test_selector *sel, const char *name)
{
- if (env.test->skip_cnt) {
- env.skip_cnt++;
- env.test->skip_cnt = 0;
+ int i;
+
+ for (i = 0; i < sel->whitelist.cnt; i++) {
+ if (glob_match(name, sel->whitelist.tests[i].name) &&
+ !sel->whitelist.tests[i].subtest_cnt)
+ return true;
}
+
+ return false;
}
-static void stdio_restore(void);
+static char *test_result(bool failed, bool skipped)
+{
+ return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
+}
+
+#define TEST_NUM_WIDTH 7
+
+static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state)
+{
+ int skipped_cnt = test_state->skip_cnt;
+ int subtests_cnt = test_state->subtest_num;
+
+ fprintf(env.stdout_saved, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name);
+ if (test_state->error_cnt)
+ fprintf(env.stdout_saved, "FAIL");
+ else if (!skipped_cnt)
+ fprintf(env.stdout_saved, "OK");
+ else if (skipped_cnt == subtests_cnt || !subtests_cnt)
+ fprintf(env.stdout_saved, "SKIP");
+ else
+ fprintf(env.stdout_saved, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt);
+
+ fprintf(env.stdout_saved, "\n");
+}
+
+static void print_test_log(char *log_buf, size_t log_cnt)
+{
+ log_buf[log_cnt] = '\0';
+ fprintf(env.stdout_saved, "%s", log_buf);
+ if (log_buf[log_cnt - 1] != '\n')
+ fprintf(env.stdout_saved, "\n");
+}
+
+static void print_subtest_name(int test_num, int subtest_num,
+ const char *test_name, char *subtest_name,
+ char *result)
+{
+ char test_num_str[32];
+
+ snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
+
+ fprintf(env.stdout_saved, "#%-*s %s/%s",
+ TEST_NUM_WIDTH, test_num_str,
+ test_name, subtest_name);
+
+ if (result)
+ fprintf(env.stdout_saved, ":%s", result);
+
+ fprintf(env.stdout_saved, "\n");
+}
+
+static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt)
+{
+ /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a
+ * null byte. Yet in parallel mode, log_buf will be NULL if there is no message.
+ */
+ if (log_cnt) {
+ jsonw_string_field(w, "message", log_buf);
+ } else {
+ jsonw_string_field(w, "message", "");
+ }
+}
+
+static void dump_test_log(const struct prog_test_def *test,
+ const struct test_state *test_state,
+ bool skip_ok_subtests,
+ bool par_exec_result,
+ json_writer_t *w)
+{
+ bool test_failed = test_state->error_cnt > 0;
+ bool force_log = test_state->force_log;
+ bool print_test = verbose() || force_log || test_failed;
+ int i;
+ struct subtest_state *subtest_state;
+ bool subtest_failed;
+ bool subtest_filtered;
+ bool print_subtest;
+
+ /* we do not print anything in the worker thread */
+ if (env.worker_id != -1)
+ return;
+
+ /* there is nothing to print when verbose log is used and execution
+ * is not in parallel mode
+ */
+ if (verbose() && !par_exec_result)
+ return;
+
+ if (test_state->log_cnt && print_test)
+ print_test_log(test_state->log_buf, test_state->log_cnt);
+
+ if (w && print_test) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", test->test_name);
+ jsonw_uint_field(w, "number", test->test_num);
+ jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt);
+ jsonw_bool_field(w, "failed", test_failed);
+ jsonw_name(w, "subtests");
+ jsonw_start_array(w);
+ }
+
+ for (i = 0; i < test_state->subtest_num; i++) {
+ subtest_state = &test_state->subtest_states[i];
+ subtest_failed = subtest_state->error_cnt;
+ subtest_filtered = subtest_state->filtered;
+ print_subtest = verbose() || force_log || subtest_failed;
+
+ if ((skip_ok_subtests && !subtest_failed) || subtest_filtered)
+ continue;
+
+ if (subtest_state->log_cnt && print_subtest) {
+ print_test_log(subtest_state->log_buf,
+ subtest_state->log_cnt);
+ }
+
+ print_subtest_name(test->test_num, i + 1,
+ test->test_name, subtest_state->name,
+ test_result(subtest_state->error_cnt,
+ subtest_state->skipped));
+
+ if (w && print_subtest) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", subtest_state->name);
+ jsonw_uint_field(w, "number", i+1);
+ jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt);
+ jsonw_bool_field(w, "failed", subtest_failed);
+ jsonw_end_object(w);
+ }
+ }
+
+ if (w && print_test) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
+ }
+
+ print_test_result(test, test_state);
+}
/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
* it after each test/sub-test.
*/
-static void reset_affinity() {
-
+static void reset_affinity(void)
+{
cpu_set_t cpuset;
int i, err;
@@ -113,13 +491,11 @@ static void reset_affinity() {
err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -137,76 +513,114 @@ static void save_netns(void)
static void restore_netns(void)
{
if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
- stdio_restore();
perror("setns(CLONE_NEWNS)");
exit(EXIT_ERR_SETUP_INFRA);
}
}
-void test__end_subtest()
+void test__end_subtest(void)
{
struct prog_test_def *test = env.test;
- int sub_error_cnt = test->error_cnt - test->old_error_cnt;
-
- if (sub_error_cnt)
- env.fail_cnt++;
- else if (test->skip_cnt == 0)
- env.sub_succ_cnt++;
- skip_account();
+ struct test_state *test_state = env.test_state;
+ struct subtest_state *subtest_state = env.subtest_state;
+
+ if (subtest_state->error_cnt) {
+ test_state->error_cnt++;
+ } else {
+ if (!subtest_state->skipped)
+ test_state->sub_succ_cnt++;
+ else
+ test_state->skip_cnt++;
+ }
- dump_test_log(test, sub_error_cnt);
+ if (verbose() && !env.workers)
+ print_subtest_name(test->test_num, test_state->subtest_num,
+ test->test_name, subtest_state->name,
+ test_result(subtest_state->error_cnt,
+ subtest_state->skipped));
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+ stdio_restore();
- free(test->subtest_name);
- test->subtest_name = NULL;
+ env.subtest_state = NULL;
}
-bool test__start_subtest(const char *name)
+bool test__start_subtest(const char *subtest_name)
{
struct prog_test_def *test = env.test;
+ struct test_state *state = env.test_state;
+ struct subtest_state *subtest_state;
+ size_t sub_state_size = sizeof(*subtest_state);
- if (test->subtest_name)
+ if (env.subtest_state)
test__end_subtest();
- test->subtest_num++;
+ state->subtest_num++;
+ state->subtest_states =
+ realloc(state->subtest_states,
+ state->subtest_num * sub_state_size);
+ if (!state->subtest_states) {
+ fprintf(stderr, "Not enough memory to allocate subtest result\n");
+ return false;
+ }
+
+ subtest_state = &state->subtest_states[state->subtest_num - 1];
+
+ memset(subtest_state, 0, sub_state_size);
- if (!name || !name[0]) {
- fprintf(env.stderr,
+ if (!subtest_name || !subtest_name[0]) {
+ fprintf(env.stderr_saved,
"Subtest #%d didn't provide sub-test name!\n",
- test->subtest_num);
+ state->subtest_num);
return false;
}
- if (!should_run(&env.subtest_selector, test->subtest_num, name))
+ subtest_state->name = strdup(subtest_name);
+ if (!subtest_state->name) {
+ fprintf(env.stderr_saved,
+ "Subtest #%d: failed to copy subtest name!\n",
+ state->subtest_num);
return false;
+ }
- test->subtest_name = strdup(name);
- if (!test->subtest_name) {
- fprintf(env.stderr,
- "Subtest #%d: failed to copy subtest name!\n",
- test->subtest_num);
+ if (!should_run_subtest(&env.test_selector,
+ &env.subtest_selector,
+ state->subtest_num,
+ test->test_name,
+ subtest_name)) {
+ subtest_state->filtered = true;
return false;
}
- env.test->old_error_cnt = env.test->error_cnt;
+
+ subtest_state->should_tmon = match_subtest(&env.tmon_selector.whitelist,
+ test->test_name,
+ subtest_name);
+
+ env.subtest_state = subtest_state;
+ stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt);
+ watchdog_start();
return true;
}
-void test__force_log() {
- env.test->force_log = true;
+void test__force_log(void)
+{
+ env.test_state->force_log = true;
}
void test__skip(void)
{
- env.test->skip_cnt++;
+ if (env.subtest_state)
+ env.subtest_state->skipped = true;
+ else
+ env.test_state->skip_cnt++;
}
void test__fail(void)
{
- env.test->error_cnt++;
+ if (env.subtest_state)
+ env.subtest_state->error_cnt++;
+ else
+ env.test_state->error_cnt++;
}
int test__join_cgroup(const char *path)
@@ -255,24 +669,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
return bpf_map__fd(map);
}
-static bool is_jit_enabled(void)
-{
- const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
- bool enabled = false;
- int sysctl_fd;
-
- sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
- if (sysctl_fd != -1) {
- char tmpc;
-
- if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
- enabled = (tmpc != '0');
- close(sysctl_fd);
- }
-
- return enabled;
-}
-
int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
@@ -336,111 +732,122 @@ out:
return err;
}
-int extract_build_id(char *build_id, size_t size)
-{
- FILE *fp;
- char *line = NULL;
- size_t len = 0;
-
- fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
- if (fp == NULL)
- return -1;
-
- if (getline(&line, &len, fp) == -1)
- goto err;
- fclose(fp);
-
- if (len > size)
- len = size;
- memcpy(build_id, line, len);
- build_id[len] = '\0';
- free(line);
- return 0;
-err:
- fclose(fp);
- return -1;
-}
-
-static int finit_module(int fd, const char *param_values, int flags)
-{
- return syscall(__NR_finit_module, fd, param_values, flags);
-}
-
-static int delete_module(const char *name, int flags)
-{
- return syscall(__NR_delete_module, name, flags);
-}
+struct netns_obj {
+ char *nsname;
+ struct tmonitor_ctx *tmon;
+ struct nstoken *nstoken;
+};
-/*
- * Trigger synchronize_rcu() in kernel.
+/* Create a new network namespace with the given name.
+ *
+ * Create a new network namespace and set the network namespace of the
+ * current process to the new network namespace if the argument "open" is
+ * true. This function should be paired with netns_free() to release the
+ * resource and delete the network namespace.
+ *
+ * It also implements the functionality of the option "-m" by starting
+ * traffic monitor on the background to capture the packets in this network
+ * namespace if the current test or subtest matching the pattern.
+ *
+ * nsname: the name of the network namespace to create.
+ * open: open the network namespace if true.
+ *
+ * Return: the network namespace object on success, NULL on failure.
*/
-int kern_sync_rcu(void)
+struct netns_obj *netns_new(const char *nsname, bool open)
{
- return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
-}
-
-static void unload_bpf_testmod(void)
-{
- if (kern_sync_rcu())
- fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n");
- if (delete_module("bpf_testmod", 0)) {
- if (errno == ENOENT) {
- if (env.verbosity > VERBOSE_NONE)
- fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
- return;
+ struct netns_obj *netns_obj = malloc(sizeof(*netns_obj));
+ const char *test_name, *subtest_name;
+ int r;
+
+ if (!netns_obj)
+ return NULL;
+ memset(netns_obj, 0, sizeof(*netns_obj));
+
+ netns_obj->nsname = strdup(nsname);
+ if (!netns_obj->nsname)
+ goto fail;
+
+ /* Create the network namespace */
+ r = make_netns(nsname);
+ if (r)
+ goto fail;
+
+ /* Start traffic monitor */
+ if (env.test->should_tmon ||
+ (env.subtest_state && env.subtest_state->should_tmon)) {
+ test_name = env.test->test_name;
+ subtest_name = env.subtest_state ? env.subtest_state->name : NULL;
+ netns_obj->tmon = traffic_monitor_start(nsname, test_name, subtest_name);
+ if (!netns_obj->tmon) {
+ fprintf(stderr, "Failed to start traffic monitor for %s\n", nsname);
+ goto fail;
}
- fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
- return;
+ } else {
+ netns_obj->tmon = NULL;
}
- if (env.verbosity > VERBOSE_NONE)
- fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
-}
-static int load_bpf_testmod(void)
-{
- int fd;
-
- /* ensure previous instance of the module is unloaded */
- unload_bpf_testmod();
-
- if (env.verbosity > VERBOSE_NONE)
- fprintf(stdout, "Loading bpf_testmod.ko...\n");
-
- fd = open("bpf_testmod.ko", O_RDONLY);
- if (fd < 0) {
- fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
- return -ENOENT;
- }
- if (finit_module(fd, "", 0)) {
- fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
- close(fd);
- return -EINVAL;
+ if (open) {
+ netns_obj->nstoken = open_netns(nsname);
+ if (!netns_obj->nstoken)
+ goto fail;
}
- close(fd);
- if (env.verbosity > VERBOSE_NONE)
- fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
- return 0;
+ return netns_obj;
+fail:
+ traffic_monitor_stop(netns_obj->tmon);
+ remove_netns(nsname);
+ free(netns_obj->nsname);
+ free(netns_obj);
+ return NULL;
+}
+
+/* Delete the network namespace.
+ *
+ * This function should be paired with netns_new() to delete the namespace
+ * created by netns_new().
+ */
+void netns_free(struct netns_obj *netns_obj)
+{
+ if (!netns_obj)
+ return;
+ traffic_monitor_stop(netns_obj->tmon);
+ close_netns(netns_obj->nstoken);
+ remove_netns(netns_obj->nsname);
+ free(netns_obj->nsname);
+ free(netns_obj);
}
/* extern declarations for test funcs */
-#define DEFINE_TEST(name) extern void test_##name(void);
+#define DEFINE_TEST(name) \
+ extern void test_##name(void) __weak; \
+ extern void serial_test_##name(void) __weak;
#include <prog_tests/tests.h>
#undef DEFINE_TEST
static struct prog_test_def prog_test_defs[] = {
-#define DEFINE_TEST(name) { \
- .test_name = #name, \
- .run_test = &test_##name, \
+#define DEFINE_TEST(name) { \
+ .test_name = #name, \
+ .run_test = &test_##name, \
+ .run_serial_test = &serial_test_##name, \
},
#include <prog_tests/tests.h>
#undef DEFINE_TEST
};
-const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
const char *argp_program_version = "test_progs 0.1";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] = "BPF selftests test runner";
+static const char argp_program_doc[] =
+"BPF selftests test runner\v"
+"Options accepting the NAMES parameter take either a comma-separated list\n"
+"of test names, or a filename prefixed with @. The file contains one name\n"
+"(or wildcard pattern) per line, and comments beginning with # are ignored.\n"
+"\n"
+"These options can be passed repeatedly to read multiple files.\n";
enum ARG_KEYS {
ARG_TEST_NUM = 'n',
@@ -450,6 +857,13 @@ enum ARG_KEYS {
ARG_VERBOSE = 'v',
ARG_GET_TEST_CNT = 'c',
ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
+ ARG_NUM_WORKERS = 'j',
+ ARG_DEBUG = -1,
+ ARG_JSON_SUMMARY = 'J',
+ ARG_TRAFFIC_MONITOR = 'm',
+ ARG_WATCHDOG_TIMEOUT = 'w',
};
static const struct argp_option opts[] = {
@@ -467,63 +881,115 @@ static const struct argp_option opts[] = {
"Get number of selected top-level tests " },
{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
"List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
+ { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
+ "Number of workers to run in parallel, default to number of cpus." },
+ { "debug", ARG_DEBUG, NULL, 0,
+ "print extra debug information for test_progs." },
+ { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."},
+#ifdef TRAFFIC_MONITOR
+ { "traffic-monitor", ARG_TRAFFIC_MONITOR, "NAMES", 0,
+ "Monitor network traffic of tests with name matching the pattern (supports '*' wildcard)." },
+#endif
+ { "watchdog-timeout", ARG_WATCHDOG_TIMEOUT, "SECONDS", 0,
+ "Kill the process if tests are not making progress for specified number of seconds." },
{},
};
+static FILE *libbpf_capture_stream;
+
+static struct {
+ char *buf;
+ size_t buf_sz;
+} libbpf_output_capture;
+
+/* Creates a global memstream capturing INFO and WARN level output
+ * passed to libbpf_print_fn.
+ * Returns 0 on success, negative value on failure.
+ * On failure the description is printed using PRINT_FAIL and
+ * current test case is marked as fail.
+ */
+int start_libbpf_log_capture(void)
+{
+ if (libbpf_capture_stream) {
+ PRINT_FAIL("%s: libbpf_capture_stream != NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ libbpf_capture_stream = open_memstream(&libbpf_output_capture.buf,
+ &libbpf_output_capture.buf_sz);
+ if (!libbpf_capture_stream) {
+ PRINT_FAIL("%s: open_memstream failed errno=%d\n", __func__, errno);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Destroys global memstream created by start_libbpf_log_capture().
+ * Returns a pointer to captured data which has to be freed.
+ * Returned buffer is null terminated.
+ */
+char *stop_libbpf_log_capture(void)
+{
+ char *buf;
+
+ if (!libbpf_capture_stream)
+ return NULL;
+
+ fputc(0, libbpf_capture_stream);
+ fclose(libbpf_capture_stream);
+ libbpf_capture_stream = NULL;
+ /* get 'buf' after fclose(), see open_memstream() documentation */
+ buf = libbpf_output_capture.buf;
+ memset(&libbpf_output_capture, 0, sizeof(libbpf_output_capture));
+ return buf;
+}
+
static int libbpf_print_fn(enum libbpf_print_level level,
const char *format, va_list args)
{
+ if (libbpf_capture_stream && level != LIBBPF_DEBUG) {
+ va_list args2;
+
+ va_copy(args2, args);
+ vfprintf(libbpf_capture_stream, format, args2);
+ va_end(args2);
+ }
+
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
+
vfprintf(stdout, format, args);
return 0;
}
-static void free_str_set(const struct str_set *set)
+static void free_test_filter_set(const struct test_filter_set *set)
{
- int i;
+ int i, j;
if (!set)
return;
- for (i = 0; i < set->cnt; i++)
- free((void *)set->strs[i]);
- free(set->strs);
-}
-
-static int parse_str_list(const char *s, struct str_set *set)
-{
- char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
-
- input = strdup(s);
- if (!input)
- return -ENOMEM;
-
- set->cnt = 0;
- set->strs = NULL;
-
- while ((next = strtok_r(state ? NULL : input, ",", &state))) {
- tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
- if (!tmp)
- goto err;
- strs = tmp;
-
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
+ for (i = 0; i < set->cnt; i++) {
+ free((void *)set->tests[i].name);
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
- cnt++;
+ free((void *)set->tests[i].subtests);
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
- free(input);
- return 0;
-err:
- free(strs);
- free(input);
- return -ENOMEM;
+ free((void *)set->tests);
+}
+
+static void free_test_selector(struct test_selector *test_selector)
+{
+ free_test_filter_set(&test_selector->blacklist);
+ free_test_filter_set(&test_selector->whitelist);
+ free(test_selector->num_set);
}
extern int extra_prog_load_log_flags;
@@ -531,6 +997,7 @@ extern int extra_prog_load_log_flags;
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
struct test_env *env = state->input;
+ int err = 0;
switch (key) {
case ARG_TEST_NUM: {
@@ -553,30 +1020,30 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
- char *subtest_str = strchr(arg, '/');
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.whitelist))
- return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
- char *subtest_str = strchr(arg, '/');
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.blacklist))
- return -ENOMEM;
break;
}
case ARG_VERIFIER_STATS:
@@ -598,13 +1065,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return -EINVAL;
}
}
+ env_verbosity = env->verbosity;
- if (env->verbosity > VERBOSE_NONE) {
+ if (verbose()) {
if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
fprintf(stderr,
"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
errno);
- return -1;
+ return -EINVAL;
}
}
@@ -615,57 +1083,58 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case ARG_LIST_TEST_NAMES:
env->list_test_names = true;
break;
+ case ARG_NUM_WORKERS:
+ if (arg) {
+ env->workers = atoi(arg);
+ if (!env->workers) {
+ fprintf(stderr, "Invalid number of worker: %s.", arg);
+ return -EINVAL;
+ }
+ } else {
+ env->workers = get_nprocs();
+ }
+ break;
+ case ARG_DEBUG:
+ env->debug = true;
+ break;
+ case ARG_JSON_SUMMARY:
+ env->json = fopen(arg, "w");
+ if (env->json == NULL) {
+ perror("Failed to open json summary file");
+ return -errno;
+ }
+ break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
case ARGP_KEY_END:
break;
+#ifdef TRAFFIC_MONITOR
+ case ARG_TRAFFIC_MONITOR:
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->tmon_selector.whitelist,
+ true);
+ else
+ err = parse_test_list(arg,
+ &env->tmon_selector.whitelist,
+ true);
+ break;
+#endif
+ case ARG_WATCHDOG_TIMEOUT:
+ env->secs_till_kill = atoi(arg);
+ if (env->secs_till_kill < 0) {
+ fprintf(stderr, "Invalid watchdog timeout: %s.\n", arg);
+ return -EINVAL;
+ }
+ if (env->secs_till_kill < env->secs_till_notify) {
+ env->secs_till_notify = 0;
+ }
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
- return 0;
-}
-
-static void stdio_hijack(void)
-{
-#ifdef __GLIBC__
- env.stdout = stdout;
- env.stderr = stderr;
-
- if (env.verbosity > VERBOSE_NONE) {
- /* nothing to do, output to stdout by default */
- return;
- }
-
- /* stdout and stderr -> buffer */
- fflush(stdout);
-
- stdout = open_memstream(&env.log_buf, &env.log_cnt);
- if (!stdout) {
- stdout = env.stdout;
- perror("open_memstream");
- return;
- }
-
- stderr = stdout;
-#endif
-}
-
-static void stdio_restore(void)
-{
-#ifdef __GLIBC__
- if (stdout == env.stdout)
- return;
-
- fclose(stdout);
- free(env.log_buf);
-
- env.log_buf = NULL;
- env.log_cnt = 0;
-
- stdout = env.stdout;
- stderr = env.stderr;
-#endif
+ return err;
}
/*
@@ -685,18 +1154,113 @@ int cd_flavor_subdir(const char *exec_name)
const char *flavor = strrchr(exec_name, '/');
if (!flavor)
- return 0;
- flavor++;
+ flavor = exec_name;
+ else
+ flavor++;
+
flavor = strrchr(flavor, '-');
if (!flavor)
return 0;
flavor++;
- if (env.verbosity > VERBOSE_NONE)
+ if (verbose())
fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
return chdir(flavor);
}
+int trigger_module_test_read(int read_sz)
+{
+ int fd, err;
+
+ fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
+ err = -errno;
+ if (!ASSERT_GE(fd, 0, "testmod_file_open"))
+ return err;
+
+ read(fd, NULL, read_sz);
+ close(fd);
+
+ return 0;
+}
+
+int trigger_module_test_write(int write_sz)
+{
+ int fd, err;
+ char *buf = malloc(write_sz);
+
+ if (!buf)
+ return -ENOMEM;
+
+ memset(buf, 'a', write_sz);
+ buf[write_sz-1] = '\0';
+
+ fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY);
+ err = -errno;
+ if (!ASSERT_GE(fd, 0, "testmod_file_open")) {
+ free(buf);
+ return err;
+ }
+
+ write(fd, buf, write_sz);
+ close(fd);
+ free(buf);
+ return 0;
+}
+
+int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (!ASSERT_NEQ(fd, -1, "open sysctl"))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (!ASSERT_EQ(err, len, "write sysctl"))
+ return -1;
+
+ return 0;
+}
+
+int get_bpf_max_tramp_links_from(struct btf *btf)
+{
+ const struct btf_enum *e;
+ const struct btf_type *t;
+ __u32 i, type_cnt;
+ const char *name;
+ __u16 j, vlen;
+
+ for (i = 1, type_cnt = btf__type_cnt(btf); i < type_cnt; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!t || !btf_is_enum(t) || t->name_off)
+ continue;
+ e = btf_enum(t);
+ for (j = 0, vlen = btf_vlen(t); j < vlen; j++, e++) {
+ name = btf__str_by_offset(btf, e->name_off);
+ if (name && !strcmp(name, "BPF_MAX_TRAMP_LINKS"))
+ return e->val;
+ }
+ }
+
+ return -1;
+}
+
+int get_bpf_max_tramp_links(void)
+{
+ struct btf *vmlinux_btf;
+ int ret;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux btf"))
+ return -1;
+ ret = get_bpf_max_tramp_links_from(vmlinux_btf);
+ btf__free(vmlinux_btf);
+
+ return ret;
+}
+
#define MAX_BACKTRACE_SZ 128
void crash_handler(int signum)
{
@@ -705,15 +1269,674 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
- if (env.test)
- dump_test_log(env.test, true);
- if (env.stdout)
- stdio_restore();
+ fflush(stdout);
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
+ if (env.test) {
+ env.test_state->error_cnt++;
+ dump_test_log(env.test, env.test_state, true, false, NULL);
+ }
+ if (env.worker_id != -1)
+ fprintf(stderr, "[%d]: ", env.worker_id);
fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
backtrace_symbols_fd(bt, sz, STDERR_FILENO);
}
+void hexdump(const char *prefix, const void *buf, size_t len)
+{
+ for (int i = 0; i < len; i++) {
+ if (!(i % 16)) {
+ if (i)
+ fprintf(stdout, "\n");
+ fprintf(stdout, "%s", prefix);
+ }
+ if (i && !(i % 8) && (i % 16))
+ fprintf(stdout, "\t");
+ fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]);
+ }
+ fprintf(stdout, "\n");
+}
+
+static void sigint_handler(int signum)
+{
+ int i;
+
+ for (i = 0; i < env.workers; i++)
+ if (env.worker_socks[i] > 0)
+ close(env.worker_socks[i]);
+}
+
+static int current_test_idx;
+static pthread_mutex_t current_test_lock;
+static pthread_mutex_t stdout_output_lock;
+
+static inline const char *str_msg(const struct msg *msg, char *buf)
+{
+ switch (msg->type) {
+ case MSG_DO_TEST:
+ sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num);
+ break;
+ case MSG_TEST_DONE:
+ sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
+ msg->test_done.num,
+ msg->test_done.have_log);
+ break;
+ case MSG_SUBTEST_DONE:
+ sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)",
+ msg->subtest_done.num,
+ msg->subtest_done.have_log);
+ break;
+ case MSG_TEST_LOG:
+ sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)",
+ strlen(msg->test_log.log_buf),
+ msg->test_log.is_last);
+ break;
+ case MSG_EXIT:
+ sprintf(buf, "MSG_EXIT");
+ break;
+ default:
+ sprintf(buf, "UNKNOWN");
+ break;
+ }
+
+ return buf;
+}
+
+static int send_message(int sock, const struct msg *msg)
+{
+ char buf[256];
+
+ if (env.debug)
+ fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf));
+ return send(sock, msg, sizeof(*msg), 0);
+}
+
+static int recv_message(int sock, struct msg *msg)
+{
+ int ret;
+ char buf[256];
+
+ memset(msg, 0, sizeof(*msg));
+ ret = recv(sock, msg, sizeof(*msg), 0);
+ if (ret >= 0) {
+ if (env.debug)
+ fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf));
+ }
+ return ret;
+}
+
+static bool ns_is_needed(const char *test_name)
+{
+ if (strlen(test_name) < 3)
+ return false;
+
+ return !strncmp(test_name, "ns_", 3);
+}
+
+static void run_one_test(int test_num)
+{
+ struct prog_test_def *test = &prog_test_defs[test_num];
+ struct test_state *state = &test_states[test_num];
+ struct netns_obj *ns = NULL;
+
+ env.test = test;
+ env.test_state = state;
+
+ stdio_hijack(&state->log_buf, &state->log_cnt);
+
+ watchdog_start();
+ if (ns_is_needed(test->test_name))
+ ns = netns_new(test->test_name, true);
+ if (test->run_test)
+ test->run_test();
+ else if (test->run_serial_test)
+ test->run_serial_test();
+ netns_free(ns);
+ watchdog_stop();
+
+ /* ensure last sub-test is finalized properly */
+ if (env.subtest_state)
+ test__end_subtest();
+
+ state->tested = true;
+
+ stdio_restore();
+
+ if (verbose() && env.worker_id == -1)
+ print_test_result(test, state);
+
+ reset_affinity();
+ restore_netns();
+ if (test->need_cgroup_cleanup)
+ cleanup_cgroup_environment();
+
+ free(stop_libbpf_log_capture());
+
+ dump_test_log(test, state, false, false, NULL);
+}
+
+struct dispatch_data {
+ int worker_id;
+ int sock_fd;
+};
+
+static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type)
+{
+ if (recv_message(sock_fd, msg) < 0)
+ return 1;
+
+ if (msg->type != type) {
+ printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt)
+{
+ FILE *log_fp = NULL;
+ int result = 0;
+
+ log_fp = open_memstream(log_buf, log_cnt);
+ if (!log_fp)
+ return 1;
+
+ while (true) {
+ struct msg msg;
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) {
+ result = 1;
+ goto out;
+ }
+
+ fprintf(log_fp, "%s", msg.test_log.log_buf);
+ if (msg.test_log.is_last)
+ break;
+ }
+
+out:
+ fclose(log_fp);
+ log_fp = NULL;
+ return result;
+}
+
+static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
+{
+ struct msg msg;
+ struct subtest_state *subtest_state;
+ int subtest_num = state->subtest_num;
+
+ state->subtest_states = malloc(subtest_num * sizeof(*subtest_state));
+
+ for (int i = 0; i < subtest_num; i++) {
+ subtest_state = &state->subtest_states[i];
+
+ memset(subtest_state, 0, sizeof(*subtest_state));
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE))
+ return 1;
+
+ subtest_state->name = strdup(msg.subtest_done.name);
+ subtest_state->error_cnt = msg.subtest_done.error_cnt;
+ subtest_state->skipped = msg.subtest_done.skipped;
+ subtest_state->filtered = msg.subtest_done.filtered;
+
+ /* collect all logs */
+ if (msg.subtest_done.have_log)
+ if (dispatch_thread_read_log(sock_fd,
+ &subtest_state->log_buf,
+ &subtest_state->log_cnt))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void *dispatch_thread(void *ctx)
+{
+ struct dispatch_data *data = ctx;
+ int sock_fd;
+
+ sock_fd = data->sock_fd;
+
+ while (true) {
+ int test_to_run = -1;
+ struct prog_test_def *test;
+ struct test_state *state;
+
+ /* grab a test */
+ {
+ pthread_mutex_lock(&current_test_lock);
+
+ if (current_test_idx >= prog_test_cnt) {
+ pthread_mutex_unlock(&current_test_lock);
+ goto done;
+ }
+
+ test = &prog_test_defs[current_test_idx];
+ test_to_run = current_test_idx;
+ current_test_idx++;
+
+ pthread_mutex_unlock(&current_test_lock);
+ }
+
+ if (!test->should_run || test->run_serial_test)
+ continue;
+
+ /* run test through worker */
+ {
+ struct msg msg_do_test;
+
+ memset(&msg_do_test, 0, sizeof(msg_do_test));
+ msg_do_test.type = MSG_DO_TEST;
+ msg_do_test.do_test.num = test_to_run;
+ if (send_message(sock_fd, &msg_do_test) < 0) {
+ perror("Fail to send command");
+ goto done;
+ }
+ env.worker_current_test[data->worker_id] = test_to_run;
+ }
+
+ /* wait for test done */
+ do {
+ struct msg msg;
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE))
+ goto error;
+ if (test_to_run != msg.test_done.num)
+ goto error;
+
+ state = &test_states[test_to_run];
+ state->tested = true;
+ state->error_cnt = msg.test_done.error_cnt;
+ state->skip_cnt = msg.test_done.skip_cnt;
+ state->sub_succ_cnt = msg.test_done.sub_succ_cnt;
+ state->subtest_num = msg.test_done.subtest_num;
+
+ /* collect all logs */
+ if (msg.test_done.have_log) {
+ if (dispatch_thread_read_log(sock_fd,
+ &state->log_buf,
+ &state->log_cnt))
+ goto error;
+ }
+
+ /* collect all subtests and subtest logs */
+ if (!state->subtest_num)
+ break;
+
+ if (dispatch_thread_send_subtests(sock_fd, state))
+ goto error;
+ } while (false);
+
+ pthread_mutex_lock(&stdout_output_lock);
+ dump_test_log(test, state, false, true, NULL);
+ pthread_mutex_unlock(&stdout_output_lock);
+ } /* while (true) */
+error:
+ if (env.debug)
+ fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
+
+done:
+ {
+ struct msg msg_exit;
+
+ msg_exit.type = MSG_EXIT;
+ if (send_message(sock_fd, &msg_exit) < 0) {
+ if (env.debug)
+ fprintf(stderr, "[%d]: send_message msg_exit: %s.\n",
+ data->worker_id, strerror(errno));
+ }
+ }
+ return NULL;
+}
+
+static void calculate_summary_and_print_errors(struct test_env *env)
+{
+ int i;
+ int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
+ json_writer_t *w = NULL;
+
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct test_state *state = &test_states[i];
+
+ if (!state->tested)
+ continue;
+
+ sub_succ_cnt += state->sub_succ_cnt;
+ skip_cnt += state->skip_cnt;
+
+ if (state->error_cnt)
+ fail_cnt++;
+ else
+ succ_cnt++;
+ }
+
+ if (env->json) {
+ w = jsonw_new(env->json);
+ if (!w)
+ fprintf(env->stderr_saved, "Failed to create new JSON stream.");
+ }
+
+ if (w) {
+ jsonw_start_object(w);
+ jsonw_uint_field(w, "success", succ_cnt);
+ jsonw_uint_field(w, "success_subtest", sub_succ_cnt);
+ jsonw_uint_field(w, "skipped", skip_cnt);
+ jsonw_uint_field(w, "failed", fail_cnt);
+ jsonw_name(w, "results");
+ jsonw_start_array(w);
+ }
+
+ /*
+ * We only print error logs summary when there are failed tests and
+ * verbose mode is not enabled. Otherwise, results may be inconsistent.
+ *
+ */
+ if (!verbose() && fail_cnt) {
+ printf("\nAll error logs:\n");
+
+ /* print error logs again */
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+ struct test_state *state = &test_states[i];
+
+ if (!state->tested || !state->error_cnt)
+ continue;
+
+ dump_test_log(test, state, true, true, w);
+ }
+ }
+
+ if (w) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
+ jsonw_destroy(&w);
+ }
+
+ if (env->json)
+ fclose(env->json);
+
+ printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
+
+ env->succ_cnt = succ_cnt;
+ env->sub_succ_cnt = sub_succ_cnt;
+ env->fail_cnt = fail_cnt;
+ env->skip_cnt = skip_cnt;
+}
+
+static void server_main(void)
+{
+ pthread_t *dispatcher_threads;
+ struct dispatch_data *data;
+ struct sigaction sigact_int = {
+ .sa_handler = sigint_handler,
+ .sa_flags = SA_RESETHAND,
+ };
+ int i;
+
+ sigaction(SIGINT, &sigact_int, NULL);
+
+ dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
+ data = calloc(sizeof(struct dispatch_data), env.workers);
+
+ env.worker_current_test = calloc(sizeof(int), env.workers);
+ for (i = 0; i < env.workers; i++) {
+ int rc;
+
+ data[i].worker_id = i;
+ data[i].sock_fd = env.worker_socks[i];
+ rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
+ if (rc < 0) {
+ perror("Failed to launch dispatcher thread");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+ }
+
+ /* wait for all dispatcher to finish */
+ for (i = 0; i < env.workers; i++) {
+ while (true) {
+ int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL);
+
+ if (!ret) {
+ break;
+ } else if (ret == EBUSY) {
+ if (env.debug)
+ fprintf(stderr, "Still waiting for thread %d (test %d).\n",
+ i, env.worker_current_test[i] + 1);
+ usleep(1000 * 1000);
+ continue;
+ } else {
+ fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret);
+ break;
+ }
+ }
+ }
+ free(dispatcher_threads);
+ free(env.worker_current_test);
+ free(data);
+
+ /* run serial tests */
+ save_netns();
+
+ for (int i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+
+ if (!test->should_run || !test->run_serial_test)
+ continue;
+
+ run_one_test(i);
+ }
+
+ /* generate summary */
+ fflush(stderr);
+ fflush(stdout);
+
+ calculate_summary_and_print_errors(&env);
+
+ /* reap all workers */
+ for (i = 0; i < env.workers; i++) {
+ int wstatus, pid;
+
+ pid = waitpid(env.worker_pids[i], &wstatus, 0);
+ if (pid != env.worker_pids[i])
+ perror("Unable to reap worker");
+ }
+}
+
+static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt)
+{
+ char *src;
+ size_t slen;
+
+ src = log_buf;
+ slen = log_cnt;
+ while (slen) {
+ struct msg msg_log;
+ char *dest;
+ size_t len;
+
+ memset(&msg_log, 0, sizeof(msg_log));
+ msg_log.type = MSG_TEST_LOG;
+ dest = msg_log.test_log.log_buf;
+ len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+ memcpy(dest, src, len);
+
+ src += len;
+ slen -= len;
+ if (!slen)
+ msg_log.test_log.is_last = true;
+
+ assert(send_message(sock, &msg_log) >= 0);
+ }
+}
+
+static void free_subtest_state(struct subtest_state *state)
+{
+ if (state->log_buf) {
+ free(state->log_buf);
+ state->log_buf = NULL;
+ state->log_cnt = 0;
+ }
+ free(state->name);
+ state->name = NULL;
+}
+
+static int worker_main_send_subtests(int sock, struct test_state *state)
+{
+ int i, result = 0;
+ struct msg msg;
+ struct subtest_state *subtest_state;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_SUBTEST_DONE;
+
+ for (i = 0; i < state->subtest_num; i++) {
+ subtest_state = &state->subtest_states[i];
+
+ msg.subtest_done.num = i;
+
+ strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
+
+ msg.subtest_done.error_cnt = subtest_state->error_cnt;
+ msg.subtest_done.skipped = subtest_state->skipped;
+ msg.subtest_done.filtered = subtest_state->filtered;
+ msg.subtest_done.have_log = false;
+
+ if (verbose() || state->force_log || subtest_state->error_cnt) {
+ if (subtest_state->log_cnt)
+ msg.subtest_done.have_log = true;
+ }
+
+ if (send_message(sock, &msg) < 0) {
+ perror("Fail to send message done");
+ result = 1;
+ goto out;
+ }
+
+ /* send logs */
+ if (msg.subtest_done.have_log)
+ worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt);
+
+ free_subtest_state(subtest_state);
+ free(subtest_state->name);
+ }
+
+out:
+ for (; i < state->subtest_num; i++)
+ free_subtest_state(&state->subtest_states[i]);
+ free(state->subtest_states);
+ return result;
+}
+
+static int worker_main(int sock)
+{
+ save_netns();
+ watchdog_init();
+
+ while (true) {
+ /* receive command */
+ struct msg msg;
+
+ if (recv_message(sock, &msg) < 0)
+ goto out;
+
+ switch (msg.type) {
+ case MSG_EXIT:
+ if (env.debug)
+ fprintf(stderr, "[%d]: worker exit.\n",
+ env.worker_id);
+ goto out;
+ case MSG_DO_TEST: {
+ int test_to_run = msg.do_test.num;
+ struct prog_test_def *test = &prog_test_defs[test_to_run];
+ struct test_state *state = &test_states[test_to_run];
+ struct msg msg;
+
+ if (env.debug)
+ fprintf(stderr, "[%d]: #%d:%s running.\n",
+ env.worker_id,
+ test_to_run + 1,
+ test->test_name);
+
+ run_one_test(test_to_run);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_TEST_DONE;
+ msg.test_done.num = test_to_run;
+ msg.test_done.error_cnt = state->error_cnt;
+ msg.test_done.skip_cnt = state->skip_cnt;
+ msg.test_done.sub_succ_cnt = state->sub_succ_cnt;
+ msg.test_done.subtest_num = state->subtest_num;
+ msg.test_done.have_log = false;
+
+ if (verbose() || state->force_log || state->error_cnt) {
+ if (state->log_cnt)
+ msg.test_done.have_log = true;
+ }
+ if (send_message(sock, &msg) < 0) {
+ perror("Fail to send message done");
+ goto out;
+ }
+
+ /* send logs */
+ if (msg.test_done.have_log)
+ worker_main_send_log(sock, state->log_buf, state->log_cnt);
+
+ if (state->log_buf) {
+ free(state->log_buf);
+ state->log_buf = NULL;
+ state->log_cnt = 0;
+ }
+
+ if (state->subtest_num)
+ if (worker_main_send_subtests(sock, state))
+ goto out;
+
+ if (env.debug)
+ fprintf(stderr, "[%d]: #%d:%s done.\n",
+ env.worker_id,
+ test_to_run + 1,
+ test->test_name);
+ break;
+ } /* case MSG_DO_TEST */
+ default:
+ if (env.debug)
+ fprintf(stderr, "[%d]: unknown message.\n", env.worker_id);
+ return -1;
+ }
+ }
+out:
+ return 0;
+}
+
+static void free_test_states(void)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
+ struct test_state *test_state = &test_states[i];
+
+ for (j = 0; j < test_state->subtest_num; j++)
+ free_subtest_state(&test_state->subtest_states[j]);
+
+ free(test_state->subtest_states);
+ free(test_state->log_buf);
+ test_state->subtest_states = NULL;
+ test_state->log_buf = NULL;
+ }
+}
+
+static __u32 register_session_key(const char *key_data, size_t key_data_size)
+{
+ return syscall(__NR_add_key, "asymmetric", "libbpf_session_key",
+ (const void *)key_data, key_data_size,
+ KEY_SPEC_SESSION_KEYRING);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -724,11 +1947,16 @@ int main(int argc, char **argv)
struct sigaction sigact = {
.sa_handler = crash_handler,
.sa_flags = SA_RESETHAND,
- };
+ };
int err, i;
sigaction(SIGSEGV, &sigact, NULL);
+ env.stdout_saved = stdout;
+ env.stderr_saved = stderr;
+
+ env.secs_till_notify = 10;
+ env.secs_till_kill = 120;
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
if (err)
return err;
@@ -737,7 +1965,17 @@ int main(int argc, char **argv)
if (err)
return err;
+ watchdog_init();
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ err = register_session_key((const char *)test_progs_verification_cert,
+ test_progs_verification_cert_len);
+ if (err < 0)
+ return err;
+
+ traffic_monitor_set_print(traffic_monitor_print_fn);
srand(time(NULL));
@@ -749,21 +1987,84 @@ int main(int argc, char **argv)
return -1;
}
- save_netns();
- stdio_hijack();
env.has_testmod = true;
- if (load_bpf_testmod()) {
- fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
- env.has_testmod = false;
+ if (!env.list_test_names) {
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose());
+
+ if (load_bpf_testmod(verbose())) {
+ fprintf(env.stderr_saved, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+ env.has_testmod = false;
+ }
}
+
+ /* initializing tests */
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
- env.test = test;
test->test_num = i + 1;
+ test->should_run = should_run(&env.test_selector,
+ test->test_num, test->test_name);
+
+ if ((test->run_test == NULL && test->run_serial_test == NULL) ||
+ (test->run_test != NULL && test->run_serial_test != NULL)) {
+ fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n",
+ test->test_num, test->test_name, test->test_name, test->test_name);
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+ if (test->should_run)
+ test->should_tmon = should_tmon(&env.tmon_selector, test->test_name);
+ }
+
+ /* ignore workers if we are just listing */
+ if (env.get_test_cnt || env.list_test_names)
+ env.workers = 0;
+
+ /* launch workers if requested */
+ env.worker_id = -1; /* main process */
+ if (env.workers) {
+ env.worker_pids = calloc(sizeof(pid_t), env.workers);
+ env.worker_socks = calloc(sizeof(int), env.workers);
+ if (env.debug)
+ fprintf(stdout, "Launching %d workers.\n", env.workers);
+ for (i = 0; i < env.workers; i++) {
+ int sv[2];
+ pid_t pid;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) {
+ perror("Fail to create worker socket");
+ return -1;
+ }
+ pid = fork();
+ if (pid < 0) {
+ perror("Failed to fork worker");
+ return -1;
+ } else if (pid != 0) { /* main process */
+ close(sv[1]);
+ env.worker_pids[i] = pid;
+ env.worker_socks[i] = sv[0];
+ } else { /* inside each worker process */
+ close(sv[0]);
+ env.worker_id = i;
+ return worker_main(sv[1]);
+ }
+ }
- if (!should_run(&env.test_selector,
- test->test_num, test->test_name))
+ if (env.worker_id == -1) {
+ server_main();
+ goto out;
+ }
+ }
+
+ /* The rest of the main process */
+
+ /* on single mode */
+ save_netns();
+
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+
+ if (!test->should_run)
continue;
if (env.get_test_cnt) {
@@ -772,37 +2073,13 @@ int main(int argc, char **argv)
}
if (env.list_test_names) {
- fprintf(env.stdout, "%s\n", test->test_name);
+ fprintf(env.stdout_saved, "%s\n", test->test_name);
env.succ_cnt++;
continue;
}
- test->run_test();
- /* ensure last sub-test is finalized properly */
- if (test->subtest_name)
- test__end_subtest();
-
- test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
- skip_account();
-
- dump_test_log(test, test->error_cnt);
-
- fprintf(env.stdout, "#%d %s:%s\n",
- test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
-
- reset_affinity();
- restore_netns();
- if (test->need_cgroup_cleanup)
- cleanup_cgroup_environment();
+ run_one_test(i);
}
- if (env.has_testmod)
- unload_bpf_testmod();
- stdio_restore();
if (env.get_test_cnt) {
printf("%d\n", env.succ_cnt);
@@ -812,17 +2089,17 @@ int main(int argc, char **argv)
if (env.list_test_names)
goto out;
- fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+ calculate_summary_and_print_errors(&env);
-out:
- free_str_set(&env.test_selector.blacklist);
- free_str_set(&env.test_selector.whitelist);
- free(env.test_selector.num_set);
- free_str_set(&env.subtest_selector.blacklist);
- free_str_set(&env.subtest_selector.whitelist);
- free(env.subtest_selector.num_set);
close(env.saved_netns_fd);
+out:
+ if (!env.list_test_names && env.has_testmod)
+ unload_bpf_testmod(verbose());
+
+ free_test_selector(&env.test_selector);
+ free_test_selector(&env.subtest_selector);
+ free_test_selector(&env.tmon_selector);
+ free_test_states();
if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f7c2fd89d01a..eebfc18cdcd2 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TEST_PROGS_H
+#define __TEST_PROGS_H
+
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
+#include <regex.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -25,6 +29,7 @@ typedef __u16 __sum16;
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/param.h>
#include <fcntl.h>
#include <pthread.h>
#include <linux/bpf.h>
@@ -37,7 +42,6 @@ typedef __u16 __sum16;
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
#include "testing_helpers.h"
-#include "flow_dissector_load.h"
enum verbosity {
VERBOSE_NONE,
@@ -46,22 +50,61 @@ enum verbosity {
VERBOSE_SUPER,
};
-struct str_set {
- const char **strs;
+struct test_filter {
+ char *name;
+ char **subtests;
+ int subtest_cnt;
+};
+
+struct test_filter_set {
+ struct test_filter *tests;
int cnt;
};
struct test_selector {
- struct str_set whitelist;
- struct str_set blacklist;
+ struct test_filter_set whitelist;
+ struct test_filter_set blacklist;
bool *num_set;
int num_set_len;
};
+struct subtest_state {
+ char *name;
+ size_t log_cnt;
+ char *log_buf;
+ int error_cnt;
+ bool skipped;
+ bool filtered;
+ bool should_tmon;
+
+ FILE *stdout_saved;
+};
+
+struct test_state {
+ bool tested;
+ bool force_log;
+
+ int error_cnt;
+ int skip_cnt;
+ int sub_succ_cnt;
+
+ struct subtest_state *subtest_states;
+ int subtest_num;
+
+ size_t log_cnt;
+ char *log_buf;
+
+ FILE *stdout_saved;
+};
+
+extern int env_verbosity;
+
struct test_env {
struct test_selector test_selector;
struct test_selector subtest_selector;
+ struct test_selector tmon_selector;
bool verifier_stats;
+ bool debug;
enum verbosity verbosity;
bool jit_enabled;
@@ -69,12 +112,14 @@ struct test_env {
bool get_test_cnt;
bool list_test_names;
- struct prog_test_def *test;
- FILE *stdout;
- FILE *stderr;
- char *log_buf;
- size_t log_cnt;
+ struct prog_test_def *test; /* current running test */
+ struct test_state *test_state; /* current running test state */
+ struct subtest_state *subtest_state; /* current running subtest state */
+
+ FILE *stdout_saved;
+ FILE *stderr_saved;
int nr_cpus;
+ FILE *json;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
@@ -82,15 +127,66 @@ struct test_env {
int skip_cnt; /* skipped tests */
int saved_netns_fd;
+ int workers; /* number of worker process */
+ int worker_id; /* id number of current worker, main process is -1 */
+ pid_t *worker_pids; /* array of worker pids */
+ int *worker_socks; /* array of worker socks */
+ int *worker_current_test; /* array of current running test for each worker */
+
+ pthread_t main_thread;
+ int secs_till_notify;
+ int secs_till_kill;
+ timer_t watchdog; /* watch for stalled tests/subtests */
+ enum { WD_NOTIFY, WD_KILL } watchdog_state;
+};
+
+#define MAX_LOG_TRUNK_SIZE 8192
+#define MAX_SUBTEST_NAME 1024
+enum msg_type {
+ MSG_DO_TEST = 0,
+ MSG_TEST_DONE = 1,
+ MSG_TEST_LOG = 2,
+ MSG_SUBTEST_DONE = 3,
+ MSG_EXIT = 255,
+};
+struct msg {
+ enum msg_type type;
+ union {
+ struct {
+ int num;
+ } do_test;
+ struct {
+ int num;
+ int sub_succ_cnt;
+ int error_cnt;
+ int skip_cnt;
+ bool have_log;
+ int subtest_num;
+ } test_done;
+ struct {
+ char log_buf[MAX_LOG_TRUNK_SIZE + 1];
+ bool is_last;
+ } test_log;
+ struct {
+ int num;
+ char name[MAX_SUBTEST_NAME + 1];
+ int error_cnt;
+ bool skipped;
+ bool filtered;
+ bool have_log;
+ } subtest_done;
+ };
};
extern struct test_env env;
-extern void test__force_log();
-extern bool test__start_subtest(const char *name);
-extern void test__skip(void);
-extern void test__fail(void);
-extern int test__join_cgroup(const char *path);
+void test__force_log(void);
+bool test__start_subtest(const char *name);
+void test__end_subtest(void);
+void test__skip(void);
+void test__fail(void);
+int test__join_cgroup(const char *path);
+void hexdump(const char *prefix, const void *buf, size_t len);
#define PRINT_FAIL(format...) \
({ \
@@ -130,6 +226,26 @@ extern int test__join_cgroup(const char *path);
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
+#define ASSERT_FAIL(fmt, args...) ({ \
+ static int duration = 0; \
+ CHECK(false, "", fmt"\n", ##args); \
+ false; \
+})
+
+#define ASSERT_TRUE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = (actual); \
+ CHECK(!___ok, (name), "unexpected %s: got FALSE\n", (name)); \
+ ___ok; \
+})
+
+#define ASSERT_FALSE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = !(actual); \
+ CHECK(!___ok, (name), "unexpected %s: got TRUE\n", (name)); \
+ ___ok; \
+})
+
#define ASSERT_EQ(actual, expected, name) ({ \
static int duration = 0; \
typeof(actual) ___act = (actual); \
@@ -152,6 +268,50 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_LT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act < ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld >= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_LE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act <= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld > expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act > ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld <= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act >= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld < expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
#define ASSERT_STREQ(actual, expected, name) ({ \
static int duration = 0; \
const char *___act = actual; \
@@ -163,11 +323,49 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
+#define ASSERT_HAS_SUBSTR(str, substr, name) ({ \
+ static int duration = 0; \
+ const char *___str = str; \
+ const char *___substr = substr; \
+ bool ___ok = strstr(___str, ___substr) != NULL; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: '%s' is not a substring of '%s'\n", \
+ (name), ___substr, ___str); \
+ ___ok; \
+})
+
+#define ASSERT_MEMEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const void *__act = actual; \
+ const void *__exp = expected; \
+ int __len = len; \
+ bool ___ok = memcmp(__act, __exp, __len) == 0; \
+ CHECK(!___ok, (name), "unexpected memory mismatch\n"); \
+ fprintf(stdout, "actual:\n"); \
+ hexdump("\t", __act, __len); \
+ fprintf(stdout, "expected:\n"); \
+ hexdump("\t", __exp, __len); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
bool ___ok = ___res == 0; \
- CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \
+ CHECK(!___ok, (name), "unexpected error: %lld (errno %d)\n", \
+ ___res, errno); \
___ok; \
})
@@ -190,20 +388,69 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_OK_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = !IS_ERR_OR_NULL(___res); \
- CHECK(!___ok, (name), \
- "unexpected error: %ld\n", PTR_ERR(___res)); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err == 0; \
+ CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \
___ok; \
})
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res) \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err != 0; \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
+#define ASSERT_OK_FD(fd, name) ({ \
+ static int duration = 0; \
+ int ___fd = (fd); \
+ bool ___ok = ___fd >= 0; \
+ CHECK(!___ok, (name), "unexpected fd: %d (errno %d)\n", \
+ ___fd, errno); \
+ ___ok; \
+})
+
+#define ASSERT_ERR_FD(fd, name) ({ \
+ static int duration = 0; \
+ int ___fd = (fd); \
+ bool ___ok = ___fd < 0; \
+ CHECK(!___ok, (name), "unexpected fd: %d\n", ___fd); \
+ ___ok; \
+})
+
+#define SYS(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
+#define SYS_FAIL(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_NEQ(0, system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
+#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ int n; \
+ n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \
+ strcat(cmd, ALL_TO_DEV_NULL); \
+ system(cmd); \
+ })
+
+int start_libbpf_log_capture(void);
+char *stop_libbpf_log_capture(void);
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -214,16 +461,106 @@ static inline void *u64_to_ptr(__u64 ptr)
return (void *) (unsigned long) ptr;
}
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
-int extract_build_id(char *build_id, size_t size);
-int kern_sync_rcu(void);
+int trigger_module_test_read(int read_sz);
+int trigger_module_test_write(int write_sz);
+int write_sysctl(const char *sysctl, const char *value);
+int get_bpf_max_tramp_links_from(struct btf *btf);
+int get_bpf_max_tramp_links(void);
+
+struct netns_obj;
+struct netns_obj *netns_new(const char *name, bool open);
+void netns_free(struct netns_obj *netns);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
#elif defined(__s390x__)
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
+#elif defined(__riscv)
+#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif
+
+#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
+
+typedef int (*pre_execution_cb)(struct bpf_object *obj);
+
+struct test_loader {
+ char *log_buf;
+ size_t log_buf_sz;
+ pre_execution_cb pre_execution_cb;
+
+ struct bpf_object *obj;
+};
+
+static inline void test_loader__set_pre_execution_cb(struct test_loader *tester,
+ pre_execution_cb cb)
+{
+ tester->pre_execution_cb = cb;
+}
+
+typedef const void *(*skel_elf_bytes_fn)(size_t *sz);
+
+extern void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory);
+
+extern void test_loader_fini(struct test_loader *tester);
+
+#define RUN_TESTS(skel) ({ \
+ struct test_loader tester = {}; \
+ \
+ test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \
+ test_loader_fini(&tester); \
+})
+
+struct expect_msg {
+ const char *substr; /* substring match */
+ regex_t regex;
+ bool is_regex;
+ bool on_next_line;
+ bool negative;
+};
+
+struct expected_msgs {
+ struct expect_msg *patterns;
+ size_t cnt;
+};
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force));
+
+#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
deleted file mode 100755
index a9bc6f82abc1..000000000000
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2018 Facebook
-
-set -eu
-
-wait_for_ip()
-{
- local _i
- echo -n "Wait for testing link-local IP to become available "
- for _i in $(seq ${MAX_PING_TRIES}); do
- echo -n "."
- if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
- echo " OK"
- return
- fi
- sleep 1
- done
- echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
- exit 1
-}
-
-setup()
-{
- # Create testing interfaces not to interfere with current environment.
- ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
- ip link set ${TEST_IF} up
- ip link set ${TEST_IF_PEER} up
-
- wait_for_ip
-
- tc qdisc add dev ${TEST_IF} clsact
- tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
- sec ${BPF_PROG_SECTION} da
-
- BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
- awk '/ id / {sub(/.* id /, "", $0); print($1)}')
-}
-
-cleanup()
-{
- ip link del ${TEST_IF} 2>/dev/null || :
- ip link del ${TEST_IF_PEER} 2>/dev/null || :
-}
-
-main()
-{
- trap cleanup EXIT 2 3 6 15
- setup
- ${PROG} ${TEST_IF} ${BPF_PROG_ID}
-}
-
-DIR=$(dirname $0)
-TEST_IF="test_cgid_1"
-TEST_IF_PEER="test_cgid_2"
-MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
-BPF_PROG_SECTION="cgroup_id_logger"
-BPF_PROG_ID=0
-PROG="${DIR}/test_skb_cgroup_id_user"
-type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
-
-main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
deleted file mode 100644
index 4a64306728ab..000000000000
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <net/if.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define CGROUP_PATH "/skb_cgroup_test"
-#define NUM_CGROUP_LEVELS 4
-
-/* RFC 4291, Section 2.7.1 */
-#define LINKLOCAL_MULTICAST "ff02::1"
-
-static int mk_dst_addr(const char *ip, const char *iface,
- struct sockaddr_in6 *dst)
-{
- memset(dst, 0, sizeof(*dst));
-
- dst->sin6_family = AF_INET6;
- dst->sin6_port = htons(1025);
-
- if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
- log_err("Invalid IPv6: %s", ip);
- return -1;
- }
-
- dst->sin6_scope_id = if_nametoindex(iface);
- if (!dst->sin6_scope_id) {
- log_err("Failed to get index of iface: %s", iface);
- return -1;
- }
-
- return 0;
-}
-
-static int send_packet(const char *iface)
-{
- struct sockaddr_in6 dst;
- char msg[] = "msg";
- int err = 0;
- int fd = -1;
-
- if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
- goto err;
-
- fd = socket(AF_INET6, SOCK_DGRAM, 0);
- if (fd == -1) {
- log_err("Failed to create UDP socket");
- goto err;
- }
-
- if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
- sizeof(dst)) == -1) {
- log_err("Failed to send datagram");
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- if (fd >= 0)
- close(fd);
- return err;
-}
-
-int get_map_fd_by_prog_id(int prog_id)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 map_ids[1];
- int prog_fd = -1;
- int map_fd = -1;
-
- prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (prog_fd < 0) {
- log_err("Failed to get fd by prog id %d", prog_id);
- goto err;
- }
-
- info.nr_map_ids = 1;
- info.map_ids = (__u64) (unsigned long) map_ids;
-
- if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
- log_err("Failed to get info by prog fd %d", prog_fd);
- goto err;
- }
-
- if (!info.nr_map_ids) {
- log_err("No maps found for prog fd %d", prog_fd);
- goto err;
- }
-
- map_fd = bpf_map_get_fd_by_id(map_ids[0]);
- if (map_fd < 0)
- log_err("Failed to get fd by map id %d", map_ids[0]);
-err:
- if (prog_fd >= 0)
- close(prog_fd);
- return map_fd;
-}
-
-int check_ancestor_cgroup_ids(int prog_id)
-{
- __u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
- __u32 level;
- int err = 0;
- int map_fd;
-
- expected_ids[0] = get_cgroup_id("/.."); /* root cgroup */
- expected_ids[1] = get_cgroup_id("");
- expected_ids[2] = get_cgroup_id(CGROUP_PATH);
- expected_ids[3] = 0; /* non-existent cgroup */
-
- map_fd = get_map_fd_by_prog_id(prog_id);
- if (map_fd < 0)
- goto err;
-
- for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
- if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
- log_err("Failed to lookup key %d", level);
- goto err;
- }
- if (actual_ids[level] != expected_ids[level]) {
- log_err("%llx (actual) != %llx (expected), level: %u\n",
- actual_ids[level], expected_ids[level], level);
- goto err;
- }
- }
-
- goto out;
-err:
- err = -1;
-out:
- if (map_fd >= 0)
- close(map_fd);
- return err;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- if (argc < 3) {
- fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
- exit(EXIT_FAILURE);
- }
-
- cgfd = cgroup_setup_and_join(CGROUP_PATH);
- if (cgfd < 0)
- goto err;
-
- if (send_packet(argv[1]))
- goto err;
-
- if (check_ancestor_cgroup_ids(atoi(argv[2])))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- printf("[%s]\n", err ? "FAIL" : "PASS");
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
deleted file mode 100644
index 9613f7538840..000000000000
--- a/tools/testing/selftests/bpf/test_sock.c
+++ /dev/null
@@ -1,481 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <stdio.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-
-#include "cgroup_helpers.h"
-#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-
-#define CG_PATH "/foo"
-#define MAX_INSNS 512
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-static bool verbose = false;
-
-struct sock_test {
- const char *descr;
- /* BPF prog properties */
- struct bpf_insn insns[MAX_INSNS];
- enum bpf_attach_type expected_attach_type;
- enum bpf_attach_type attach_type;
- /* Socket properties */
- int domain;
- int type;
- /* Endpoint to bind() to */
- const char *ip;
- unsigned short port;
- /* Expected test result */
- enum {
- LOAD_REJECT,
- ATTACH_REJECT,
- BIND_REJECT,
- SUCCESS,
- } result;
-};
-
-static struct sock_test tests[] = {
- {
- "bind4 load with invalid access: src_ip6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_ip6[0])),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
- },
- {
- "bind4 load with invalid access: mark",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, mark)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
- },
- {
- "bind6 load with invalid access: src_ip4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_ip4)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
- },
- {
- "sock_create load with invalid access: src_port",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_port)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
- },
- {
- "sock_create load w/o expected_attach_type (compat mode)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- 0,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
- },
- {
- "sock_create load w/ expected_attach_type",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
- },
- {
- "attach type mismatch bind4 vs bind6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
- },
- {
- "attach type mismatch bind6 vs bind4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
- },
- {
- "attach type mismatch default vs bind4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- 0,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
- },
- {
- "attach type mismatch bind6 vs sock_create",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
- },
- {
- "bind4 reject all",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- BIND_REJECT,
- },
- {
- "bind6 reject all",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- BIND_REJECT,
- },
- {
- "bind6 deny specific IP & port",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (ip == expected && port == expected) */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_ip6[3])),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
- __bpf_constant_ntohl(0x00000001), 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_port)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
-
- /* return DENY; */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_A(1),
-
- /* else return ALLOW; */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::1",
- 8193,
- BIND_REJECT,
- },
- {
- "bind4 allow specific IP & port",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (ip == expected && port == expected) */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
- __bpf_constant_ntohl(0x7F000001), 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock, src_port)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
-
- /* return ALLOW; */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_A(1),
-
- /* else return DENY; */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 4098,
- SUCCESS,
- },
- {
- "bind4 allow all",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- SUCCESS,
- },
- {
- "bind6 allow all",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- SUCCESS,
- },
-};
-
-static size_t probe_prog_length(const struct bpf_insn *fp)
-{
- size_t len;
-
- for (len = MAX_INSNS - 1; len > 0; --len)
- if (fp[len].code != 0 || fp[len].imm != 0)
- break;
- return len + 1;
-}
-
-static int load_sock_prog(const struct bpf_insn *prog,
- enum bpf_attach_type attach_type)
-{
- struct bpf_load_program_attr attr;
- int ret;
-
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- attr.expected_attach_type = attach_type;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
- attr.log_level = 2;
-
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (verbose && ret < 0)
- fprintf(stderr, "%s\n", bpf_log_buf);
-
- return ret;
-}
-
-static int attach_sock_prog(int cgfd, int progfd,
- enum bpf_attach_type attach_type)
-{
- return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
-}
-
-static int bind_sock(int domain, int type, const char *ip, unsigned short port)
-{
- struct sockaddr_storage addr;
- struct sockaddr_in6 *addr6;
- struct sockaddr_in *addr4;
- int sockfd = -1;
- socklen_t len;
- int err = 0;
-
- sockfd = socket(domain, type, 0);
- if (sockfd < 0)
- goto err;
-
- memset(&addr, 0, sizeof(addr));
-
- if (domain == AF_INET) {
- len = sizeof(struct sockaddr_in);
- addr4 = (struct sockaddr_in *)&addr;
- addr4->sin_family = domain;
- addr4->sin_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
- goto err;
- } else if (domain == AF_INET6) {
- len = sizeof(struct sockaddr_in6);
- addr6 = (struct sockaddr_in6 *)&addr;
- addr6->sin6_family = domain;
- addr6->sin6_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
- goto err;
- } else {
- goto err;
- }
-
- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(sockfd);
- return err;
-}
-
-static int run_test_case(int cgfd, const struct sock_test *test)
-{
- int progfd = -1;
- int err = 0;
-
- printf("Test case: %s .. ", test->descr);
- progfd = load_sock_prog(test->insns, test->expected_attach_type);
- if (progfd < 0) {
- if (test->result == LOAD_REJECT)
- goto out;
- else
- goto err;
- }
-
- if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
- if (test->result == ATTACH_REJECT)
- goto out;
- else
- goto err;
- }
-
- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
- /* sys_bind() may fail for different reasons, errno has to be
- * checked to confirm that BPF program rejected it.
- */
- if (test->result == BIND_REJECT && errno == EPERM)
- goto out;
- else
- goto err;
- }
-
-
- if (test->result != SUCCESS)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- /* Detaching w/o checking return code: best effort attempt. */
- if (progfd != -1)
- bpf_prog_detach(cgfd, test->attach_type);
- close(progfd);
- printf("[%s]\n", err ? "FAIL" : "PASS");
- return err;
-}
-
-static int run_tests(int cgfd)
-{
- int passes = 0;
- int fails = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- if (run_test_case(cgfd, &tests[i]))
- ++fails;
- else
- ++passes;
- }
- printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
- return fails ? -1 : 0;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
-
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
deleted file mode 100644
index aa3f185fcb89..000000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ /dev/null
@@ -1,1421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-
-#ifndef ENOTSUPP
-# define ENOTSUPP 524
-#endif
-
-#define CG_PATH "/foo"
-#define CONNECT4_PROG_PATH "./connect4_prog.o"
-#define CONNECT6_PROG_PATH "./connect6_prog.o"
-#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
-#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
-#define RECVMSG4_PROG_PATH "./recvmsg4_prog.o"
-#define RECVMSG6_PROG_PATH "./recvmsg6_prog.o"
-#define BIND4_PROG_PATH "./bind4_prog.o"
-#define BIND6_PROG_PATH "./bind6_prog.o"
-
-#define SERV4_IP "192.168.1.254"
-#define SERV4_REWRITE_IP "127.0.0.1"
-#define SRC4_IP "172.16.0.1"
-#define SRC4_REWRITE_IP "127.0.0.4"
-#define SERV4_PORT 4040
-#define SERV4_REWRITE_PORT 4444
-
-#define SERV6_IP "face:b00c:1234:5678::abcd"
-#define SERV6_REWRITE_IP "::1"
-#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
-#define SRC6_IP "::1"
-#define SRC6_REWRITE_IP "::6"
-#define WILDCARD6_IP "::"
-#define SERV6_PORT 6060
-#define SERV6_REWRITE_PORT 6666
-
-#define INET_NTOP_BUF 40
-
-struct sock_addr_test;
-
-typedef int (*load_fn)(const struct sock_addr_test *test);
-typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-struct sock_addr_test {
- const char *descr;
- /* BPF prog properties */
- load_fn loadfn;
- enum bpf_attach_type expected_attach_type;
- enum bpf_attach_type attach_type;
- /* Socket properties */
- int domain;
- int type;
- /* IP:port pairs for BPF prog to override */
- const char *requested_ip;
- unsigned short requested_port;
- const char *expected_ip;
- unsigned short expected_port;
- const char *expected_src_ip;
- /* Expected test result */
- enum {
- LOAD_REJECT,
- ATTACH_REJECT,
- ATTACH_OKAY,
- SYSCALL_EPERM,
- SYSCALL_ENOTSUPP,
- SUCCESS,
- } expected_result;
-};
-
-static int bind4_prog_load(const struct sock_addr_test *test);
-static int bind6_prog_load(const struct sock_addr_test *test);
-static int connect4_prog_load(const struct sock_addr_test *test);
-static int connect6_prog_load(const struct sock_addr_test *test);
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test);
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
-
-static struct sock_addr_test tests[] = {
- /* bind */
- {
- "bind4: load prog with wrong expected attach type",
- bind4_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind4: attach prog with wrong attach type",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind4: rewrite IP & TCP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind4: rewrite IP & UDP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: load prog with wrong expected attach type",
- bind6_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind6: attach prog with wrong attach type",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind6: rewrite IP & TCP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: rewrite IP & UDP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
-
- /* connect */
- {
- "connect4: load prog with wrong expected attach type",
- connect4_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect4: attach prog with wrong attach type",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect4: rewrite IP & TCP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect4: rewrite IP & UDP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: load prog with wrong expected attach type",
- connect6_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect6: attach prog with wrong attach type",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect6: rewrite IP & TCP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: rewrite IP & UDP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
-
- /* sendmsg */
- {
- "sendmsg4: load prog with wrong expected attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg4: attach prog with wrong attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg4: rewrite IP & port (asm)",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: rewrite IP & port (C)",
- sendmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SYSCALL_EPERM,
- },
- {
- "sendmsg6: load prog with wrong expected attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg6: attach prog with wrong attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg6: rewrite IP & port (asm)",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: rewrite IP & port (C)",
- sendmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: IPv4-mapped IPv6",
- sendmsg6_rw_v4mapped_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_ENOTSUPP,
- },
- {
- "sendmsg6: set dst IP = [::] (BSD'ism)",
- sendmsg6_rw_wildcard_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: preserve dst IP = [::] (BSD'ism)",
- sendmsg_allow_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- WILDCARD6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_PORT,
- SRC6_IP,
- SUCCESS,
- },
- {
- "sendmsg6: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_EPERM,
- },
-
- /* recvmsg */
- {
- "recvmsg4: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg4: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg6: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg6: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg4: rewrite IP & port (C)",
- recvmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_IP,
- SUCCESS,
- },
- {
- "recvmsg6: rewrite IP & port (C)",
- recvmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_IP,
- SUCCESS,
- },
-};
-
-static int mk_sockaddr(int domain, const char *ip, unsigned short port,
- struct sockaddr *addr, socklen_t addr_len)
-{
- struct sockaddr_in6 *addr6;
- struct sockaddr_in *addr4;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- return -1;
- }
-
- memset(addr, 0, addr_len);
-
- if (domain == AF_INET) {
- if (addr_len < sizeof(struct sockaddr_in))
- return -1;
- addr4 = (struct sockaddr_in *)addr;
- addr4->sin_family = domain;
- addr4->sin_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
- log_err("Invalid IPv4: %s", ip);
- return -1;
- }
- } else if (domain == AF_INET6) {
- if (addr_len < sizeof(struct sockaddr_in6))
- return -1;
- addr6 = (struct sockaddr_in6 *)addr;
- addr6->sin6_family = domain;
- addr6->sin6_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
- log_err("Invalid IPv6: %s", ip);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int load_insns(const struct sock_addr_test *test,
- const struct bpf_insn *insns, size_t insns_cnt)
-{
- struct bpf_load_program_attr load_attr;
- int ret;
-
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- load_attr.expected_attach_type = test->expected_attach_type;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = "GPL";
-
- ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (ret < 0 && test->expected_result != LOAD_REJECT) {
- log_err(">>> Loading program error.\n"
- ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
- }
-
- return ret;
-}
-
-static int load_path(const struct sock_addr_test *test, const char *path)
-{
- struct bpf_prog_load_attr attr;
- struct bpf_object *obj;
- int prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = path;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- attr.expected_attach_type = test->expected_attach_type;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
-
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
- if (test->expected_result != LOAD_REJECT)
- log_err(">>> Loading program (%s) error.\n", path);
- return -1;
- }
-
- return prog_fd;
-}
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND4_PROG_PATH);
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND6_PROG_PATH);
-}
-
-static int connect4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT4_PROG_PATH);
-}
-
-static int connect6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT6_PROG_PATH);
-}
-
-static int xmsg_ret_only_prog_load(const struct sock_addr_test *test,
- int32_t rc)
-{
- struct bpf_insn insns[] = {
- /* return rc */
- BPF_MOV64_IMM(BPF_REG_0, rc),
- BPF_EXIT_INSN(),
- };
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- struct sockaddr_in dst4_rw_addr;
- struct in_addr src4_rw_ip;
-
- if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
- log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
- (struct sockaddr *)&dst4_rw_addr,
- sizeof(dst4_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
-
- /* sk.type == SOCK_DGRAM) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, type)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
-
- /* msg_src_ip4 = src4_rw_ip */
- BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, msg_src_ip4)),
-
- /* user_ip4 = dst4_rw_addr.sin_addr */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_ip4)),
-
- /* user_port = dst4_rw_addr.sin_port */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG4_PROG_PATH);
-}
-
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG4_PROG_PATH);
-}
-
-static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
- const char *rw_dst_ip)
-{
- struct sockaddr_in6 dst6_rw_addr;
- struct in6_addr src6_rw_ip;
-
- if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
- log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
- (struct sockaddr *)&dst6_rw_addr,
- sizeof(dst6_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET6) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-#define STORE_IPV6_WORD_N(DST, SRC, N) \
- BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
- offsetof(struct bpf_sock_addr, DST[N]))
-
-#define STORE_IPV6(DST, SRC) \
- STORE_IPV6_WORD_N(DST, SRC, 0), \
- STORE_IPV6_WORD_N(DST, SRC, 1), \
- STORE_IPV6_WORD_N(DST, SRC, 2), \
- STORE_IPV6_WORD_N(DST, SRC, 3)
-
- STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
- STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
-
- /* user_port = dst6_rw_addr.sin6_port */
- BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
-
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
-}
-
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG6_PROG_PATH);
-}
-
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
-}
-
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
-}
-
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG6_PROG_PATH);
-}
-
-static int cmp_addr(const struct sockaddr_storage *addr1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- const struct sockaddr_in *four1, *four2;
- const struct sockaddr_in6 *six1, *six2;
-
- if (addr1->ss_family != addr2->ss_family)
- return -1;
-
- if (addr1->ss_family == AF_INET) {
- four1 = (const struct sockaddr_in *)addr1;
- four2 = (const struct sockaddr_in *)addr2;
- return !((four1->sin_port == four2->sin_port || !cmp_port) &&
- four1->sin_addr.s_addr == four2->sin_addr.s_addr);
- } else if (addr1->ss_family == AF_INET6) {
- six1 = (const struct sockaddr_in6 *)addr1;
- six2 = (const struct sockaddr_in6 *)addr2;
- return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
- !memcmp(&six1->sin6_addr, &six2->sin6_addr,
- sizeof(struct in6_addr)));
- }
-
- return -1;
-}
-
-static int cmp_sock_addr(info_fn fn, int sock1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- struct sockaddr_storage addr1;
- socklen_t len1 = sizeof(addr1);
-
- memset(&addr1, 0, len1);
- if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
- return -1;
-
- return cmp_addr(&addr1, addr2, cmp_port);
-}
-
-static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
-}
-
-static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int start_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int fd;
-
- fd = socket(addr->ss_family, type, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (type == SOCK_STREAM) {
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
- }
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Fail to connect to server");
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-int init_pktinfo(int domain, struct cmsghdr *cmsg)
-{
- struct in6_pktinfo *pktinfo6;
- struct in_pktinfo *pktinfo4;
-
- if (domain == AF_INET) {
- cmsg->cmsg_level = SOL_IP;
- cmsg->cmsg_type = IP_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
- pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo4, 0, sizeof(struct in_pktinfo));
- if (inet_pton(domain, SRC4_IP,
- (void *)&pktinfo4->ipi_spec_dst) != 1)
- return -1;
- } else if (domain == AF_INET6) {
- cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = IPV6_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
- pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
- if (inet_pton(domain, SRC6_IP,
- (void *)&pktinfo6->ipi6_addr) != 1)
- return -1;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len, int set_cmsg, int flags,
- int *syscall_err)
-{
- union {
- char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
- struct cmsghdr align;
- } control6;
- union {
- char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
- struct cmsghdr align;
- } control4;
- struct msghdr hdr;
- struct iovec iov;
- char data = 'a';
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = &data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = (void *)addr;
- hdr.msg_namelen = addr_len;
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- if (set_cmsg) {
- if (domain == AF_INET) {
- hdr.msg_control = &control4;
- hdr.msg_controllen = sizeof(control4.buf);
- } else if (domain == AF_INET6) {
- hdr.msg_control = &control6;
- hdr.msg_controllen = sizeof(control6.buf);
- }
- if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
- log_err("Fail to init pktinfo");
- goto err;
- }
- }
-
- if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
- log_err("Fail to send message to server");
- *syscall_err = errno;
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int fastconnect_to_server(const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int sendmsg_err;
-
- return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
- MSG_FASTOPEN, &sendmsg_err);
-}
-
-static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
-{
- struct timeval tv;
- struct msghdr hdr;
- struct iovec iov;
- char data[64];
- fd_set rfds;
-
- FD_ZERO(&rfds);
- FD_SET(sockfd, &rfds);
-
- tv.tv_sec = 2;
- tv.tv_usec = 0;
-
- if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
- !FD_ISSET(sockfd, &rfds))
- return -1;
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = src_addr;
- hdr.msg_namelen = sizeof(struct sockaddr_storage);
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- return recvmsg(sockfd, &hdr, 0);
-}
-
-static int init_addrs(const struct sock_addr_test *test,
- struct sockaddr_storage *requested_addr,
- struct sockaddr_storage *expected_addr,
- struct sockaddr_storage *expected_src_addr)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
-
- if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
- (struct sockaddr *)expected_addr, addr_len) == -1)
- goto err;
-
- if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
- (struct sockaddr *)requested_addr, addr_len) == -1)
- goto err;
-
- if (test->expected_src_ip &&
- mk_sockaddr(test->domain, test->expected_src_ip, 0,
- (struct sockaddr *)expected_src_addr, addr_len) == -1)
- goto err;
-
- return 0;
-err:
- return -1;
-}
-
-static int run_bind_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr, NULL))
- goto err;
-
- servfd = start_server(test->type, &requested_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- if (cmp_local_addr(servfd, &expected_addr))
- goto err;
-
- /* Try to connect to server just in case */
- clientfd = connect_to_server(test->type, &expected_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_connect_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_src_addr;
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr,
- &expected_src_addr))
- goto err;
-
- /* Prepare server to connect to */
- servfd = start_server(test->type, &expected_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- clientfd = connect_to_server(test->type, &requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
-
- if (test->type == SOCK_STREAM) {
- /* Test TCP Fast Open scenario */
- clientfd = fastconnect_to_server(&requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_xmsg_test_case(const struct sock_addr_test *test, int max_cmsg)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_addr;
- struct sockaddr_storage server_addr;
- struct sockaddr_storage sendmsg_addr;
- struct sockaddr_storage recvmsg_addr;
- int clientfd = -1;
- int servfd = -1;
- int set_cmsg;
- int err = 0;
-
- if (test->type != SOCK_DGRAM)
- goto err;
-
- if (init_addrs(test, &sendmsg_addr, &server_addr, &expected_addr))
- goto err;
-
- /* Prepare server to sendmsg to */
- servfd = start_server(test->type, &server_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- for (set_cmsg = 0; set_cmsg <= max_cmsg; ++set_cmsg) {
- if (clientfd >= 0)
- close(clientfd);
-
- clientfd = sendmsg_to_server(test->type, &sendmsg_addr,
- addr_len, set_cmsg, /*flags*/0,
- &err);
- if (err)
- goto out;
- else if (clientfd == -1)
- goto err;
-
- /* Try to receive message on server instead of using
- * getpeername(2) on client socket, to check that client's
- * destination address was rewritten properly, since
- * getpeername(2) doesn't work with unconnected datagram
- * sockets.
- *
- * Get source address from recvmsg(2) as well to make sure
- * source was rewritten properly: getsockname(2) can't be used
- * since socket is unconnected and source defined for one
- * specific packet may differ from the one used by default and
- * returned by getsockname(2).
- */
- if (recvmsg_from_client(servfd, &recvmsg_addr) == -1)
- goto err;
-
- if (cmp_addr(&recvmsg_addr, &expected_addr, /*cmp_port*/0))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_test_case(int cgfd, const struct sock_addr_test *test)
-{
- int progfd = -1;
- int err = 0;
-
- printf("Test case: %s .. ", test->descr);
-
- progfd = test->loadfn(test);
- if (test->expected_result == LOAD_REJECT && progfd < 0)
- goto out;
- else if (test->expected_result == LOAD_REJECT || progfd < 0)
- goto err;
-
- err = bpf_prog_attach(progfd, cgfd, test->attach_type,
- BPF_F_ALLOW_OVERRIDE);
- if (test->expected_result == ATTACH_REJECT && err) {
- err = 0; /* error was expected, reset it */
- goto out;
- } else if (test->expected_result == ATTACH_REJECT || err) {
- goto err;
- } else if (test->expected_result == ATTACH_OKAY) {
- err = 0;
- goto out;
- }
-
- switch (test->attach_type) {
- case BPF_CGROUP_INET4_BIND:
- case BPF_CGROUP_INET6_BIND:
- err = run_bind_test_case(test);
- break;
- case BPF_CGROUP_INET4_CONNECT:
- case BPF_CGROUP_INET6_CONNECT:
- err = run_connect_test_case(test);
- break;
- case BPF_CGROUP_UDP4_SENDMSG:
- case BPF_CGROUP_UDP6_SENDMSG:
- err = run_xmsg_test_case(test, 1);
- break;
- case BPF_CGROUP_UDP4_RECVMSG:
- case BPF_CGROUP_UDP6_RECVMSG:
- err = run_xmsg_test_case(test, 0);
- break;
- default:
- goto err;
- }
-
- if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (err || test->expected_result != SUCCESS)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- /* Detaching w/o checking return code: best effort attempt. */
- if (progfd != -1)
- bpf_prog_detach(cgfd, test->attach_type);
- close(progfd);
- printf("[%s]\n", err ? "FAIL" : "PASS");
- return err;
-}
-
-static int run_tests(int cgfd)
-{
- int passes = 0;
- int fails = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- if (run_test_case(cgfd, &tests[i]))
- ++fails;
- else
- ++passes;
- }
- printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
- return fails ? -1 : 0;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- if (argc < 2) {
- fprintf(stderr,
- "%s has to be run via %s.sh. Skip direct run.\n",
- argv[0], argv[0]);
- exit(err);
- }
-
- cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
-
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
deleted file mode 100755
index 3b9fdb8094aa..000000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-set -eu
-
-ping_once()
-{
- type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
- $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
-}
-
-wait_for_ip()
-{
- local _i
- echo -n "Wait for testing IPv4/IPv6 to become available "
- for _i in $(seq ${MAX_PING_TRIES}); do
- echo -n "."
- if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
- echo " OK"
- return
- fi
- done
- echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
- exit 1
-}
-
-setup()
-{
- # Create testing interfaces not to interfere with current environment.
- ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
- ip link set ${TEST_IF} up
- ip link set ${TEST_IF_PEER} up
-
- ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
- ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
- wait_for_ip
-}
-
-cleanup()
-{
- ip link del ${TEST_IF} 2>/dev/null || :
- ip link del ${TEST_IF_PEER} 2>/dev/null || :
-}
-
-main()
-{
- trap cleanup EXIT 2 3 6 15
- setup
- ./test_sock_addr setup_done
-}
-
-BASENAME=$(basename $0 .sh)
-TEST_IF="${BASENAME}1"
-TEST_IF_PEER="${BASENAME}2"
-TEST_IPv4="127.0.0.4/8"
-TEST_IPv6="::6/128"
-MAX_PING_TRIES=5
-
-main
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 427ca00a3217..76568db7a664 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -18,7 +18,6 @@
#include <sched.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <sys/sendfile.h>
@@ -37,7 +36,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
int running;
@@ -54,10 +52,12 @@ static void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
-#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.bpf.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o"
#define CG_PATH "/sockmap"
+#define EDATAINTEGRITY 2001
+
/* global sockets */
int s1, s2, c1, c2, p1, p2;
int test_cnt;
@@ -65,7 +65,8 @@ int passed;
int failed;
int map_fd[9];
struct bpf_map *maps[9];
-int prog_fd[11];
+struct bpf_program *progs[9];
+struct bpf_link *links[9];
int txmsg_pass;
int txmsg_redir;
@@ -87,6 +88,10 @@ int ktls;
int peek_flag;
int skb_use_parser;
int txmsg_omit_skb_parser;
+int verify_push_start;
+int verify_push_len;
+int verify_pop_start;
+int verify_pop_len;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -139,6 +144,8 @@ struct sockmap_options {
bool sendpage;
bool data_test;
bool drop_expected;
+ bool check_recved_len;
+ bool tx_wait_mem;
int iov_count;
int iov_length;
int rate;
@@ -417,16 +424,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
{
bool drop = opt->drop_expected;
unsigned char k = 0;
+ int i, j, fp;
FILE *file;
- int i, fp;
file = tmpfile();
if (!file) {
perror("create file for sendpage");
return 1;
}
- for (i = 0; i < iov_length * cnt; i++, k++)
- fwrite(&k, sizeof(char), 1, file);
+ for (i = 0; i < cnt; i++, k = 0) {
+ for (j = 0; j < iov_length; j++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ }
fflush(file);
fseek(file, 0, SEEK_SET);
@@ -509,42 +518,111 @@ unwind_iov:
return -ENOMEM;
}
-static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
+/* In push or pop test, we need to do some calculations for msg_verify_data */
+static void msg_verify_date_prep(void)
{
- int i, j = 0, bytes_cnt = 0;
- unsigned char k = 0;
+ int push_range_end = txmsg_start_push + txmsg_end_push - 1;
+ int pop_range_end = txmsg_start_pop + txmsg_pop - 1;
+
+ if (txmsg_end_push && txmsg_pop &&
+ txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) {
+ /* The push range and the pop range overlap */
+ int overlap_len;
+
+ verify_push_start = txmsg_start_push;
+ verify_pop_start = txmsg_start_pop;
+ if (txmsg_start_push < txmsg_start_pop)
+ overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop);
+ else
+ overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push);
+ verify_push_len = max(txmsg_end_push - overlap_len, 0);
+ verify_pop_len = max(txmsg_pop - overlap_len, 0);
+ } else {
+ /* Otherwise */
+ verify_push_start = txmsg_start_push;
+ verify_pop_start = txmsg_start_pop;
+ verify_push_len = txmsg_end_push;
+ verify_pop_len = txmsg_pop;
+ }
+}
- for (i = 0; i < msg->msg_iovlen; i++) {
+static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
+ unsigned char *k_p, int *bytes_cnt_p,
+ int *check_cnt_p, int *push_p)
+{
+ int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p;
+ unsigned char k = *k_p;
+ int i, j;
+
+ for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) {
unsigned char *d = msg->msg_iov[i].iov_base;
/* Special case test for skb ingress + ktls */
if (i == 0 && txmsg_ktls_skb) {
if (msg->msg_iov[i].iov_len < 4)
- return -EIO;
+ return -EDATAINTEGRITY;
if (memcmp(d, "PASS", 4) != 0) {
fprintf(stderr,
"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
i, 0, d[0], d[1], d[2], d[3]);
- return -EIO;
+ return -EDATAINTEGRITY;
}
j = 4; /* advance index past PASS header */
}
for (; j < msg->msg_iov[i].iov_len && size; j++) {
+ if (push > 0 &&
+ check_cnt == verify_push_start + verify_push_len - push) {
+ int skipped;
+revisit_push:
+ skipped = push;
+ if (j + push >= msg->msg_iov[i].iov_len)
+ skipped = msg->msg_iov[i].iov_len - j;
+ push -= skipped;
+ size -= skipped;
+ j += skipped - 1;
+ check_cnt += skipped;
+ continue;
+ }
+
+ if (verify_pop_len > 0 && check_cnt == verify_pop_start) {
+ bytes_cnt += verify_pop_len;
+ check_cnt += verify_pop_len;
+ k += verify_pop_len;
+
+ if (bytes_cnt == chunk_sz) {
+ k = 0;
+ bytes_cnt = 0;
+ check_cnt = 0;
+ push = verify_push_len;
+ }
+
+ if (push > 0 &&
+ check_cnt == verify_push_start + verify_push_len - push)
+ goto revisit_push;
+ }
+
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
i, j, d[j], k - 1, d[j+1], k);
- return -EIO;
+ return -EDATAINTEGRITY;
}
bytes_cnt++;
+ check_cnt++;
if (bytes_cnt == chunk_sz) {
k = 0;
bytes_cnt = 0;
+ check_cnt = 0;
+ push = verify_push_len;
}
size--;
}
}
+ *k_p = k;
+ *bytes_cnt_p = bytes_cnt;
+ *check_cnt_p = check_cnt;
+ *push_p = push;
return 0;
}
@@ -556,8 +634,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
int err, i, flags = MSG_NOSIGNAL;
bool drop = opt->drop_expected;
bool data = opt->data_test;
+ int iov_alloc_length = iov_length;
- err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+ if (!tx && opt->check_recved_len)
+ iov_alloc_length *= 2;
+
+ err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
if (err)
goto out_errno;
if (peek_flag) {
@@ -575,6 +657,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
sent = sendmsg(fd, &msg, flags);
if (!drop && sent < 0) {
+ if (opt->tx_wait_mem && errno == EACCES) {
+ errno = 0;
+ goto out_errno;
+ }
perror("sendmsg loop error");
goto out_errno;
} else if (drop && sent >= 0) {
@@ -589,10 +675,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
+ float total_bytes, txmsg_pop_total, txmsg_push_total;
int slct, recvp = 0, recv, max_fd = fd;
- float total_bytes, txmsg_pop_total;
int fd_flags = O_NONBLOCK;
struct timeval timeout;
+ unsigned char k = 0;
+ int bytes_cnt = 0;
+ int check_cnt = 0;
+ int push = 0;
fd_set w;
fcntl(fd, fd_flags);
@@ -606,12 +696,22 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
* This is really only useful for testing edge cases in code
* paths.
*/
- total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
- if (txmsg_apply)
+ total_bytes = (float)iov_length * (float)cnt;
+ if (!opt->sendpage)
+ total_bytes *= (float)iov_count;
+ if (txmsg_apply) {
+ txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply);
txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
- else
+ } else {
+ txmsg_push_total = txmsg_end_push * cnt;
txmsg_pop_total = txmsg_pop * cnt;
+ }
+ total_bytes += txmsg_push_total;
total_bytes -= txmsg_pop_total;
+ if (data) {
+ msg_verify_date_prep();
+ push = verify_push_len;
+ }
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
perror("recv start time");
@@ -641,6 +741,15 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
goto out_errno;
}
+ if (opt->tx_wait_mem) {
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+ slct = select(max_fd + 1, NULL, NULL, &w, &timeout);
+ errno = 0;
+ close(fd);
+ goto out_errno;
+ }
+
errno = 0;
if (peek_flag) {
flags |= MSG_PEEK;
@@ -663,14 +772,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
}
- s->bytes_recvd += recv;
+ if (recv > 0)
+ s->bytes_recvd += recv;
+
+ if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
+ errno = EMSGSIZE;
+ fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
+ s->bytes_recvd, total_bytes);
+ goto out_errno;
+ }
if (data) {
int chunk_sz = opt->sendpage ?
- iov_length * cnt :
+ iov_length :
iov_length * iov_count;
- errno = msg_verify_data(&msg, recv, chunk_sz);
+ errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt,
+ &check_cnt, &push);
if (errno) {
perror("data verify msg failed");
goto out_errno;
@@ -678,7 +796,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
if (recvp) {
errno = msg_verify_data(&msg_peek,
recvp,
- chunk_sz);
+ chunk_sz,
+ &k,
+ &bytes_cnt,
+ &check_cnt,
+ &push);
if (errno) {
perror("data verify msg_peek failed");
goto out_errno;
@@ -732,7 +854,7 @@ static int sendmsg_test(struct sockmap_options *opt)
* socket is not a valid test. So in this case lets not
* enable kTLS but still run the test.
*/
- if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+ if (!txmsg_redir || txmsg_ingress) {
err = sockmap_init_ktls(opt->verbose, rx_fd);
if (err)
return err;
@@ -742,9 +864,24 @@ static int sendmsg_test(struct sockmap_options *opt)
return err;
}
+ if (opt->tx_wait_mem) {
+ struct timeval timeout;
+ int rxtx_buf_len = 1024;
+
+ timeout.tv_sec = 3;
+ timeout.tv_usec = 0;
+
+ err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval));
+ err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int));
+ err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int));
+ if (err) {
+ perror("setsockopt failed()");
+ return errno;
+ }
+ }
+
rxpid = fork();
if (rxpid == 0) {
- iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
if (opt->drop_expected || txmsg_ktls_skb_drop)
_exit(0);
@@ -769,7 +906,7 @@ static int sendmsg_test(struct sockmap_options *opt)
s.bytes_sent, sent_Bps, sent_Bps/giga,
s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
peek_flag ? "(peek_msg)" : "");
- if (err && txmsg_cork)
+ if (err && err != -EDATAINTEGRITY && txmsg_cork)
err = 0;
exit(err ? 1 : 0);
} else if (rxpid == -1) {
@@ -777,6 +914,9 @@ static int sendmsg_test(struct sockmap_options *opt)
return errno;
}
+ if (opt->tx_wait_mem)
+ close(c2);
+
txpid = fork();
if (txpid == 0) {
if (opt->sendpage)
@@ -908,7 +1048,8 @@ enum {
static int run_options(struct sockmap_options *options, int cg_fd, int test)
{
- int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+ int i, key, next_key, err, zero = 0;
+ struct bpf_program *tx_prog;
/* If base test skip BPF setup */
if (test == BASE || test == BASE_SENDPAGE)
@@ -916,48 +1057,44 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
/* Attach programs to sockmap */
if (!txmsg_omit_skb_parser) {
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
+ links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]);
+ if (!links[0]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[0], err, strerror(errno));
- return err;
+ "ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n",
+ bpf_program__fd(progs[0]), map_fd[0], strerror(errno));
+ return -1;
}
}
- err = bpf_prog_attach(prog_fd[1], map_fd[0],
- BPF_SK_SKB_STREAM_VERDICT, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
+ links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]);
+ if (!links[1]) {
+ fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n",
+ strerror(errno));
+ return -1;
}
/* Attach programs to TLS sockmap */
if (txmsg_ktls_skb) {
if (!txmsg_omit_skb_parser) {
- err = bpf_prog_attach(prog_fd[0], map_fd[8],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
+ links[2] = bpf_program__attach_sockmap(progs[0], map_fd[8]);
+ if (!links[2]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[8], err, strerror(errno));
- return err;
+ "ERROR: bpf_program__attach_sockmap (TLS sockmap %i->%i): (%s)\n",
+ bpf_program__fd(progs[0]), map_fd[8], strerror(errno));
+ return -1;
}
}
- err = bpf_prog_attach(prog_fd[2], map_fd[8],
- BPF_SK_SKB_STREAM_VERDICT, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
+ links[3] = bpf_program__attach_sockmap(progs[2], map_fd[8]);
+ if (!links[3]) {
+ fprintf(stderr, "ERROR: bpf_program__attach_sockmap (TLS sockmap): (%s)\n",
+ strerror(errno));
+ return -1;
}
}
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -973,30 +1110,31 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[4];
+ tx_prog = progs[4];
else if (txmsg_redir)
- tx_prog_fd = prog_fd[5];
+ tx_prog = progs[5];
else if (txmsg_apply)
- tx_prog_fd = prog_fd[6];
+ tx_prog = progs[6];
else if (txmsg_cork)
- tx_prog_fd = prog_fd[7];
+ tx_prog = progs[7];
else if (txmsg_drop)
- tx_prog_fd = prog_fd[8];
+ tx_prog = progs[8];
else
- tx_prog_fd = 0;
+ tx_prog = NULL;
- if (tx_prog_fd) {
- int redir_fd, i = 0;
+ if (tx_prog) {
+ int redir_fd;
- err = bpf_prog_attach(tx_prog_fd,
- map_fd[1], BPF_SK_MSG_VERDICT, 0);
- if (err) {
+ links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]);
+ if (!links[4]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
- err, strerror(errno));
+ "ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n",
+ strerror(errno));
+ err = -1;
goto out;
}
+ i = 0;
err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
if (err) {
fprintf(stderr,
@@ -1234,17 +1372,15 @@ run:
} else
fprintf(stderr, "unknown test\n");
out:
- /* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
- bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
- bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
- bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
- bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+ /* Detach and zero all the maps */
+ bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
- if (tx_prog_fd >= 0)
- bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+ for (i = 0; i < ARRAY_SIZE(links); i++) {
+ if (links[i])
+ bpf_link__detach(links[i]);
+ }
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
key = next_key = 0;
bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
@@ -1414,8 +1550,8 @@ static void test_send_many(struct sockmap_options *opt, int cgrp)
static void test_send_large(struct sockmap_options *opt, int cgrp)
{
- opt->iov_length = 256;
- opt->iov_count = 1024;
+ opt->iov_length = 8192;
+ opt->iov_count = 32;
opt->rate = 2;
test_exec(cgrp, opt);
}
@@ -1441,6 +1577,18 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
test_send(opt, cgrp);
}
+static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
+{
+ opt->tx_wait_mem = true;
+ txmsg_redir = 1;
+ test_send_large(opt, cgrp);
+
+ txmsg_redir = 1;
+ txmsg_apply = 4097;
+ test_send_large(opt, cgrp);
+ opt->tx_wait_mem = false;
+}
+
static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
{
txmsg_drop = 1;
@@ -1536,17 +1684,19 @@ static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
{
/* Test basic start/end */
+ txmsg_pass = 1;
txmsg_start = 1;
txmsg_end = 2;
test_send(opt, cgrp);
/* Test >4k pull */
+ txmsg_pass = 1;
txmsg_start = 4096;
txmsg_end = 9182;
test_send_large(opt, cgrp);
/* Test pull + redirect */
- txmsg_redir = 0;
+ txmsg_redir = 1;
txmsg_start = 1;
txmsg_end = 2;
test_send(opt, cgrp);
@@ -1568,12 +1718,16 @@ static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
{
+ bool data = opt->data_test;
+
/* Test basic pop */
+ txmsg_pass = 1;
txmsg_start_pop = 1;
txmsg_pop = 2;
test_send_many(opt, cgrp);
/* Test pop with >4k */
+ txmsg_pass = 1;
txmsg_start_pop = 4096;
txmsg_pop = 4096;
test_send_large(opt, cgrp);
@@ -1584,6 +1738,12 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
txmsg_pop = 2;
test_send_many(opt, cgrp);
+ /* TODO: Test for pop + cork should be different,
+ * - It makes the layout of the received data difficult
+ * - It makes it hard to calculate the total_bytes in the recvmsg
+ * Temporarily skip the data integrity test for this case now.
+ */
+ opt->data_test = false;
/* Test pop + cork */
txmsg_redir = 0;
txmsg_cork = 512;
@@ -1597,16 +1757,21 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
txmsg_start_pop = 1;
txmsg_pop = 2;
test_send_many(opt, cgrp);
+ opt->data_test = data;
}
static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
{
+ bool data = opt->data_test;
+
/* Test basic push */
+ txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 1;
test_send(opt, cgrp);
/* Test push 4kB >4k */
+ txmsg_pass = 1;
txmsg_start_push = 4096;
txmsg_end_push = 4096;
test_send_large(opt, cgrp);
@@ -1617,45 +1782,108 @@ static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
txmsg_end_push = 2;
test_send_many(opt, cgrp);
+ /* TODO: Test for push + cork should be different,
+ * - It makes the layout of the received data difficult
+ * - It makes it hard to calculate the total_bytes in the recvmsg
+ * Temporarily skip the data integrity test for this case now.
+ */
+ opt->data_test = false;
/* Test push + cork */
txmsg_redir = 0;
txmsg_cork = 512;
txmsg_start_push = 1;
txmsg_end_push = 2;
test_send_many(opt, cgrp);
+ opt->data_test = data;
}
static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
{
+ /* Test push/pop range overlapping */
+ txmsg_pass = 1;
txmsg_start_push = 1;
txmsg_end_push = 10;
txmsg_start_pop = 5;
txmsg_pop = 4;
test_send_large(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_start_push = 1;
+ txmsg_end_push = 10;
+ txmsg_start_pop = 5;
+ txmsg_pop = 16;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_start_push = 5;
+ txmsg_end_push = 4;
+ txmsg_start_pop = 1;
+ txmsg_pop = 10;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_start_push = 5;
+ txmsg_end_push = 16;
+ txmsg_start_pop = 1;
+ txmsg_pop = 10;
+ test_send_large(opt, cgrp);
+
+ /* Test push/pop range non-overlapping */
+ txmsg_pass = 1;
+ txmsg_start_push = 1;
+ txmsg_end_push = 10;
+ txmsg_start_pop = 16;
+ txmsg_pop = 4;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_start_push = 16;
+ txmsg_end_push = 10;
+ txmsg_start_pop = 5;
+ txmsg_pop = 4;
+ test_send_large(opt, cgrp);
}
static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
{
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 0;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
@@ -1680,12 +1908,27 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
{
txmsg_pass = 1;
skb_use_parser = 512;
+ if (ktls == 1)
+ skb_use_parser = 570;
opt->iov_length = 256;
opt->iov_count = 1;
opt->rate = 2;
test_exec(cgrp, opt);
}
+static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
+{
+ if (ktls == 1)
+ return;
+ skb_use_parser = 10;
+ opt->iov_length = 20;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ opt->check_recved_len = true;
+ test_exec(cgrp, opt);
+ opt->check_recved_len = false;
+}
+
char *map_names[] = {
"sock_map",
"sock_map_txmsg",
@@ -1698,34 +1941,6 @@ char *map_names[] = {
"tls_sock_map",
};
-int prog_attach_type[] = {
- BPF_SK_SKB_STREAM_PARSER,
- BPF_SK_SKB_STREAM_VERDICT,
- BPF_SK_SKB_STREAM_VERDICT,
- BPF_CGROUP_SOCK_OPS,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
-};
-
-int prog_type[] = {
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SOCK_OPS,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
-};
-
static int populate_progs(char *bpf_file)
{
struct bpf_program *prog;
@@ -1744,21 +1959,14 @@ static int populate_progs(char *bpf_file)
return -1;
}
- bpf_object__for_each_program(prog, obj) {
- bpf_program__set_type(prog, prog_type[i]);
- bpf_program__set_expected_attach_type(prog,
- prog_attach_type[i]);
- i++;
- }
-
i = bpf_object__load(obj);
i = 0;
bpf_object__for_each_program(prog, obj) {
- prog_fd[i] = bpf_program__fd(prog);
+ progs[i] = prog;
i++;
}
- for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
map_fd[i] = bpf_map__fd(maps[i]);
if (map_fd[i] < 0) {
@@ -1768,12 +1976,16 @@ static int populate_progs(char *bpf_file)
}
}
+ for (i = 0; i < ARRAY_SIZE(links); i++)
+ links[i] = NULL;
+
return 0;
}
struct _test test[] = {
{"txmsg test passthrough", test_txmsg_pass},
{"txmsg test redirect", test_txmsg_redir},
+ {"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem},
{"txmsg test drop", test_txmsg_drop},
{"txmsg test ingress redirect", test_txmsg_ingress_redir},
{"txmsg test skb", test_txmsg_skb},
@@ -1784,7 +1996,8 @@ struct _test test[] = {
{"txmsg test pull-data", test_txmsg_pull},
{"txmsg test pop-data", test_txmsg_pop},
{"txmsg test push/pop data", test_txmsg_push_pop},
- {"txmsg text ingress parser", test_txmsg_ingress_parser},
+ {"txmsg test ingress parser", test_txmsg_ingress_parser},
+ {"txmsg test ingress parser2", test_txmsg_ingress_parser2},
};
static int check_whitelist(struct _test *t, struct sockmap_options *opt)
@@ -1800,10 +2013,13 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
@@ -1820,10 +2036,13 @@ static int check_blacklist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
@@ -1838,7 +2057,7 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
}
/* Tests basic commands and APIs */
- for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+ for (i = 0; i < ARRAY_SIZE(test); i++) {
struct _test t = test[i];
if (check_whitelist(&t, opt) != 0)
@@ -1877,7 +2096,6 @@ static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
static int test_selftest(int cg_fd, struct sockmap_options *opt)
{
-
test_selftests_sockmap(cg_fd, opt);
test_selftests_sockhash(cg_fd, opt);
test_selftests_ktls(cg_fd, opt);
@@ -1988,6 +2206,9 @@ int main(int argc, char **argv)
cg_created = 1;
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (test == SELFTESTS) {
err = test_selftest(cg_fd, &options);
goto out;
@@ -2014,9 +2235,9 @@ out:
free(options.whitelist);
if (options.blacklist)
free(options.blacklist);
+ close(cg_fd);
if (cg_created)
cleanup_cgroup_environment();
- close(cg_fd);
return err;
}
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
deleted file mode 100644
index 47e132726203..000000000000
--- a/tools/testing/selftests/bpf/test_stub.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-/* Copyright (C) 2019 Netronome Systems, Inc. */
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include <string.h>
-
-int extra_prog_load_log_flags = 0;
-
-int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_prog_load_attr attr;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- attr.log_level = extra_prog_load_log_flags;
-
- return bpf_prog_load_xattr(&attr, pobj, prog_fd);
-}
-
-int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
-{
- struct bpf_load_program_attr load_attr;
-
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = type;
- load_attr.expected_attach_type = 0;
- load_attr.name = NULL;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.kern_version = kern_version;
- load_attr.prog_flags = BPF_F_TEST_RND_HI32;
- load_attr.log_level = extra_prog_load_log_flags;
-
- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
-}
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 6272c784ca2a..f1300047c1e0 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -20,7 +20,7 @@
#include <bpf/bpf.h>
#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
+#include "testing_helpers.h"
static struct bpf_insn prog[BPF_MAXINSNS];
@@ -57,7 +57,7 @@ static int bpf_try_load_prog(int insns, int fd_map,
int fd_prog;
bpf_filler(insns, fd_map);
- fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+ fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
NULL, 0);
assert(fd_prog > 0);
if (fd_map > 0)
@@ -116,7 +116,7 @@ static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
static const struct sockaddr_alg alg = {
.salg_family = AF_ALG,
.salg_type = "hash",
- .salg_name = "sha1",
+ .salg_name = "sha256",
};
int fd_base, fd_alg, ret;
ssize_t size;
@@ -184,11 +184,15 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
int main(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
uint32_t tests = 0;
int i, fd_map;
- fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
- sizeof(int), 1, BPF_F_NO_PREALLOC);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), 1, &opts);
assert(fd_map > 0);
for (i = 0; i < 5; i++) {
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
deleted file mode 100755
index daa7d1b8d309..000000000000
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test installs a TC bpf program that throttles a TCP flow
-# with dst port = 9000 down to 5MBps. Then it measures actual
-# throughput of the flow.
-
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-# check that nc, dd, and timeout are present
-command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
- { echo >&2 "timeout is not available"; exit 1; }
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP_SRC="172.16.1.100"
-readonly IP_DST="172.16.2.100"
-
-cleanup()
-{
- ip netns del ${NS_SRC}
- ip netns del ${NS_DST}
-}
-
-trap cleanup EXIT
-
-set -e # exit on error
-
-ip netns add "${NS_SRC}"
-ip netns add "${NS_DST}"
-ip link add veth_src type veth peer name veth_dst
-ip link set veth_src netns ${NS_SRC}
-ip link set veth_dst netns ${NS_DST}
-
-ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src
-ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst
-
-ip -netns ${NS_SRC} link set dev veth_src up
-ip -netns ${NS_DST} link set dev veth_dst up
-
-ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src
-ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst
-
-# set up TC on TX
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
-ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
- bpf da obj test_tc_edt.o sec cls_test
-
-
-# start the listener
-ip netns exec ${NS_DST} bash -c \
- "nc -4 -l -p 9000 >/dev/null &"
-declare -i NC_PID=$!
-sleep 1
-
-declare -ir TIMEOUT=20
-declare -ir EXPECTED_BPS=5000000
-
-# run the load, capture RX bytes on DST
-declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
- cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-set +e
-ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
- bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
-set -e
-
-declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
- cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
-
-echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
- awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
- $1, ($2-$3)*100.0/$3}'
-
-# Pass the test if the actual bps is within 1% of the expected bps.
-# The difference is usually about 0.1% on a 20-sec test, and ==> zero
-# the longer the test runs.
-declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
- awk 'function abs(x){return ((x < 0.0) ? -x : x)}
- {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
- else { print "0"} }' )
-if [ "${RES}" == "0" ] ; then
- echo "PASS"
-else
- echo "FAIL"
- exit 1
-fi
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
deleted file mode 100755
index 8868aa1ca902..000000000000
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
-# between src and dst. The netns fwd has veth links to each src and dst. The
-# client is in src and server in dst. The test installs a TC BPF program to each
-# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
-# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
-# switch from ingress side; it also installs a checker prog on the egress side
-# to drop unexpected traffic.
-
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-# check that needed tools are present
-command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
- { echo >&2 "dd is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
- { echo >&2 "timeout is not available"; exit 1; }
-command -v ping >/dev/null 2>&1 || \
- { echo >&2 "ping is not available"; exit 1; }
-if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
-command -v perl >/dev/null 2>&1 || \
- { echo >&2 "perl is not available"; exit 1; }
-command -v jq >/dev/null 2>&1 || \
- { echo >&2 "jq is not available"; exit 1; }
-command -v bpftool >/dev/null 2>&1 || \
- { echo >&2 "bpftool is not available"; exit 1; }
-
-readonly GREEN='\033[0;92m'
-readonly RED='\033[0;31m'
-readonly NC='\033[0m' # No Color
-
-readonly PING_ARG="-c 3 -w 10 -q"
-
-readonly TIMEOUT=10
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP4_SRC="172.16.1.100"
-readonly IP4_DST="172.16.2.100"
-
-readonly IP6_SRC="::1:dead:beef:cafe"
-readonly IP6_DST="::2:dead:beef:cafe"
-
-readonly IP4_SLL="169.254.0.1"
-readonly IP4_DLL="169.254.0.2"
-readonly IP4_NET="169.254.0.0"
-
-netns_cleanup()
-{
- ip netns del ${NS_SRC}
- ip netns del ${NS_FWD}
- ip netns del ${NS_DST}
-}
-
-netns_setup()
-{
- ip netns add "${NS_SRC}"
- ip netns add "${NS_FWD}"
- ip netns add "${NS_DST}"
-
- ip link add veth_src type veth peer name veth_src_fwd
- ip link add veth_dst type veth peer name veth_dst_fwd
-
- ip link set veth_src netns ${NS_SRC}
- ip link set veth_src_fwd netns ${NS_FWD}
-
- ip link set veth_dst netns ${NS_DST}
- ip link set veth_dst_fwd netns ${NS_FWD}
-
- ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
- ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
-
- # The fwd netns automatically get a v6 LL address / routes, but also
- # needs v4 one in order to start ARP probing. IP4_NET route is added
- # to the endpoints so that the ARP processing will reply.
-
- ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
- ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
-
- ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
- ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
-
- ip -netns ${NS_SRC} link set dev veth_src up
- ip -netns ${NS_FWD} link set dev veth_src_fwd up
-
- ip -netns ${NS_DST} link set dev veth_dst up
- ip -netns ${NS_FWD} link set dev veth_dst_fwd up
-
- ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
- ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
-
- ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
- ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
- ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
-
- ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
- ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
-
- fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
- fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
-
- ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
-
- ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
- ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
-}
-
-netns_test_connectivity()
-{
- set +e
-
- ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
- ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
-
- TEST="TCPv4 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="TCPv6 connectivity test"
- ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv4 connectivity test"
- ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- TEST="ICMPv6 connectivity test"
- ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
- if [ $? -ne 0 ]; then
- echo -e "${TEST}: ${RED}FAIL${NC}"
- exit 1
- fi
- echo -e "${TEST}: ${GREEN}PASS${NC}"
-
- set -e
-}
-
-hex_mem_str()
-{
- perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
-}
-
-netns_setup_bpf()
-{
- local obj=$1
- local use_forwarding=${2:-0}
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
-
- ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
-
- if [ "$use_forwarding" -eq "1" ]; then
- # bpf_fib_lookup() checks if forwarding is enabled
- ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
- ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
- return 0
- fi
-
- veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
- veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
-
- progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
- for prog in $progs; do
- map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
- if [ ! -z "$map" ]; then
- bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
- bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
- fi
- done
-}
-
-trap netns_cleanup EXIT
-set -e
-
-netns_setup
-netns_setup_bpf test_tc_neigh.o
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_neigh_fib.o 1
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_peer.o
-netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
deleted file mode 100755
index 7c76b841b17b..000000000000
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ /dev/null
@@ -1,295 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# In-place tunneling
-
-# must match the port that the bpf program filters on
-readonly port=8000
-
-readonly ns_prefix="ns-$$-"
-readonly ns1="${ns_prefix}1"
-readonly ns2="${ns_prefix}2"
-
-readonly ns1_v4=192.168.1.1
-readonly ns2_v4=192.168.1.2
-readonly ns1_v6=fd::1
-readonly ns2_v6=fd::2
-
-# Must match port used by bpf program
-readonly udpport=5555
-# MPLSoverUDP
-readonly mplsudpport=6635
-readonly mplsproto=137
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-
-setup() {
- ip netns add "${ns1}"
- ip netns add "${ns2}"
-
- ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
- peer name veth2 mtu 1500 netns "${ns2}"
-
- ip netns exec "${ns1}" ethtool -K veth1 tso off
-
- ip -netns "${ns1}" link set veth1 up
- ip -netns "${ns2}" link set veth2 up
-
- ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
- ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
- ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
- ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
-
- # clamp route to reserve room for tunnel headers
- ip -netns "${ns1}" -4 route flush table main
- ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
-
- sleep 1
-
- dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
- ip netns del "${ns2}"
- ip netns del "${ns1}"
-
- if [[ -f "${outfile}" ]]; then
- rm "${outfile}"
- fi
- if [[ -f "${infile}" ]]; then
- rm "${infile}"
- fi
-
- if [[ -n $server_pid ]]; then
- kill $server_pid 2> /dev/null
- fi
-}
-
-server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
- server_pid=$!
- sleep 0.2
-}
-
-client_connect() {
- ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
- echo $?
-}
-
-verify_data() {
- wait "${server_pid}"
- server_pid=
- # sha1sum returns two fields [sha1] [filepath]
- # convert to bash array and access first elem
- insum=($(sha1sum ${infile}))
- outsum=($(sha1sum ${outfile}))
- if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
- echo "data mismatch"
- exit 1
- fi
-}
-
-set -e
-
-# no arguments: automated test, run all
-if [[ "$#" -eq "0" ]]; then
- echo "ipip"
- $0 ipv4 ipip none 100
-
- echo "ip6ip6"
- $0 ipv6 ip6tnl none 100
-
- echo "sit"
- $0 ipv6 sit none 100
-
- for mac in none mpls eth ; do
- echo "ip gre $mac"
- $0 ipv4 gre $mac 100
-
- echo "ip6 gre $mac"
- $0 ipv6 ip6gre $mac 100
-
- echo "ip gre $mac gso"
- $0 ipv4 gre $mac 2000
-
- echo "ip6 gre $mac gso"
- $0 ipv6 ip6gre $mac 2000
-
- echo "ip udp $mac"
- $0 ipv4 udp $mac 100
-
- echo "ip6 udp $mac"
- $0 ipv6 ip6udp $mac 100
-
- echo "ip udp $mac gso"
- $0 ipv4 udp $mac 2000
-
- echo "ip6 udp $mac gso"
- $0 ipv6 ip6udp $mac 2000
- done
-
- echo "OK. All tests passed"
- exit 0
-fi
-
-if [[ "$#" -ne "4" ]]; then
- echo "Usage: $0"
- echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
- exit 1
-fi
-
-case "$1" in
-"ipv4")
- readonly addr1="${ns1_v4}"
- readonly addr2="${ns2_v4}"
- readonly ipproto=4
- readonly netcat_opt=-${ipproto}
- readonly foumod=fou
- readonly foutype=ipip
- readonly fouproto=4
- readonly fouproto_mpls=${mplsproto}
- readonly gretaptype=gretap
- ;;
-"ipv6")
- readonly addr1="${ns1_v6}"
- readonly addr2="${ns2_v6}"
- readonly ipproto=6
- readonly netcat_opt=-${ipproto}
- readonly foumod=fou6
- readonly foutype=ip6tnl
- readonly fouproto="41 -6"
- readonly fouproto_mpls="${mplsproto} -6"
- readonly gretaptype=ip6gretap
- ;;
-*)
- echo "unknown arg: $1"
- exit 1
- ;;
-esac
-
-readonly tuntype=$2
-readonly mac=$3
-readonly datalen=$4
-
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
-
-trap cleanup EXIT
-
-setup
-
-# basic communication works
-echo "test basic connectivity"
-server_listen
-client_connect
-verify_data
-
-# clientside, insert bpf program to encap all TCP to port ${port}
-# client can no longer connect
-ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
-ip netns exec "${ns1}" tc filter add dev veth1 egress \
- bpf direct-action object-file ./test_tc_tunnel.o \
- section "encap_${tuntype}_${mac}"
-echo "test bpf encap without decap (expect failure)"
-server_listen
-! client_connect
-
-if [[ "$tuntype" =~ "udp" ]]; then
- # Set up fou tunnel.
- ttype="${foutype}"
- targs="encap fou encap-sport auto encap-dport $udpport"
- # fou may be a module; allow this to fail.
- modprobe "${foumod}" ||true
- if [[ "$mac" == "mpls" ]]; then
- dport=${mplsudpport}
- dproto=${fouproto_mpls}
- tmode="mode any ttl 255"
- else
- dport=${udpport}
- dproto=${fouproto}
- fi
- ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
- targs="encap fou encap-sport auto encap-dport $dport"
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
- ttype=$gretaptype
-else
- ttype=$tuntype
- targs=""
-fi
-
-# tunnel address family differs from inner for SIT
-if [[ "${tuntype}" == "sit" ]]; then
- link_addr1="${ns1_v4}"
- link_addr2="${ns2_v4}"
-else
- link_addr1="${addr1}"
- link_addr2="${addr2}"
-fi
-
-# serverside, insert decap module
-# server is still running
-# client can connect again
-ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
- ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
-
-expect_tun_fail=0
-
-if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
- # No support for MPLS IPv6 fou tunnel; expect failure.
- expect_tun_fail=1
-elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
- # No support for TEB fou tunnel; expect failure.
- expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
- # Share ethernet address between tunnel/veth2 so L2 decap works.
- ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
- awk '/ether/ { print $2 }')
- ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
-elif [[ "$mac" == "mpls" ]]; then
- modprobe mpls_iptunnel ||true
- modprobe mpls_gso ||true
- ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
- ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
- ip netns exec "${ns2}" ip link set lo up
- ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
-fi
-
-# Because packets are decapped by the tunnel they arrive on testtun0 from
-# the IP stack perspective. Ensure reverse path filtering is disabled
-# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
-# expected veth2 (veth2 is where 192.168.1.2 is configured).
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-# rp needs to be disabled for both all and testtun0 as the rp value is
-# selected as the max of the "all" and device-specific values.
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
-ip netns exec "${ns2}" ip link set dev testtun0 up
-if [[ "$expect_tun_fail" == 1 ]]; then
- # This tunnel mode is not supported, so we expect failure.
- echo "test bpf encap with tunnel device decap (expect failure)"
- ! client_connect
-else
- echo "test bpf encap with tunnel device decap"
- client_connect
- verify_data
- server_listen
-fi
-
-# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
-if [[ "${tuntype}" == "sit" ]]; then
- echo OK
- exit 0
-fi
-
-# serverside, use BPF for decap
-ip netns exec "${ns2}" ip link del dev testtun0
-ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-ip netns exec "${ns2}" tc filter add dev veth2 ingress \
- bpf direct-action object-file ./test_tc_tunnel.o section decap
-echo "test bpf encap with bpf decap"
-client_connect
-verify_data
-
-echo OK
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
deleted file mode 100755
index 9b3617d770a5..000000000000
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2018 Facebook
-# Copyright (c) 2019 Cloudflare
-
-set -eu
-
-wait_for_ip()
-{
- local _i
- printf "Wait for IP %s to become available " "$1"
- for _i in $(seq ${MAX_PING_TRIES}); do
- printf "."
- if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then
- echo " OK"
- return
- fi
- sleep 1
- done
- echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
- exit 1
-}
-
-get_prog_id()
-{
- awk '/ id / {sub(/.* id /, "", $0); print($1)}'
-}
-
-ns1_exec()
-{
- ip netns exec ns1 "$@"
-}
-
-setup()
-{
- ip netns add ns1
- ns1_exec ip link set lo up
-
- ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
- ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0
- ns1_exec sysctl -w net.ipv4.tcp_timestamps=0
- ns1_exec sysctl -w net.ipv4.tcp_sack=0
-
- wait_for_ip 127.0.0.1
- wait_for_ip ::1
-}
-
-cleanup()
-{
- ip netns del ns1 2>/dev/null || :
-}
-
-main()
-{
- trap cleanup EXIT 2 3 6 15
- setup
-
- printf "Testing clsact..."
- ns1_exec tc qdisc add dev "${TEST_IF}" clsact
- ns1_exec tc filter add dev "${TEST_IF}" ingress \
- bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da
-
- BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \
- get_prog_id)
- ns1_exec "${PROG}" "${BPF_PROG_ID}"
- ns1_exec tc qdisc del dev "${TEST_IF}" clsact
-
- printf "Testing XDP..."
- ns1_exec ip link set "${TEST_IF}" xdp \
- object "${BPF_PROG_OBJ}" section "${XDP_SECTION}"
- BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id)
- ns1_exec "${PROG}" "${BPF_PROG_ID}"
-}
-
-DIR=$(dirname $0)
-TEST_IF=lo
-MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
-CLSACT_SECTION="clsact/check_syncookie"
-XDP_SECTION="xdp/check_syncookie"
-BPF_PROG_ID=0
-PROG="${DIR}/test_tcp_check_syncookie_user"
-
-main
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
deleted file mode 100644
index b9e991d43155..000000000000
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-// Copyright (c) 2019 Cloudflare
-
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-static int start_server(const struct sockaddr *addr, socklen_t len)
-{
- int fd;
-
- fd = socket(addr->sa_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (bind(fd, addr, len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd = -1;
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- fd = socket(addr.ss_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
- log_err("Fail to connect to server");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
-{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- __u32 map_ids[1];
- int prog_fd = -1;
- int map_fd = -1;
-
- prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (prog_fd < 0) {
- log_err("Failed to get fd by prog id %d", prog_id);
- goto err;
- }
-
- info.nr_map_ids = 1;
- info.map_ids = (__u64)(unsigned long)map_ids;
-
- if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
- log_err("Failed to get info by prog fd %d", prog_fd);
- goto err;
- }
-
- if (!info.nr_map_ids) {
- log_err("No maps found for prog fd %d", prog_fd);
- goto err;
- }
-
- *xdp = info.type == BPF_PROG_TYPE_XDP;
-
- map_fd = bpf_map_get_fd_by_id(map_ids[0]);
- if (map_fd < 0)
- log_err("Failed to get fd by map id %d", map_ids[0]);
-err:
- if (prog_fd >= 0)
- close(prog_fd);
- return map_fd;
-}
-
-static int run_test(int server_fd, int results_fd, bool xdp)
-{
- int client = -1, srv_client = -1;
- int ret = 0;
- __u32 key = 0;
- __u32 key_gen = 1;
- __u32 key_mss = 2;
- __u32 value = 0;
- __u32 value_gen = 0;
- __u32 value_mss = 0;
-
- if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
- log_err("Can't clear results");
- goto err;
- }
-
- if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) {
- log_err("Can't clear results");
- goto err;
- }
-
- if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) {
- log_err("Can't clear results");
- goto err;
- }
-
- client = connect_to_server(server_fd);
- if (client == -1)
- goto err;
-
- srv_client = accept(server_fd, NULL, 0);
- if (srv_client == -1) {
- log_err("Can't accept connection");
- goto err;
- }
-
- if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) {
- log_err("Can't lookup result");
- goto err;
- }
-
- if (value == 0) {
- log_err("Didn't match syncookie: %u", value);
- goto err;
- }
-
- if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) {
- log_err("Can't lookup result");
- goto err;
- }
-
- if (xdp && value_gen == 0) {
- // SYN packets do not get passed through generic XDP, skip the
- // rest of the test.
- printf("Skipping XDP cookie check\n");
- goto out;
- }
-
- if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) {
- log_err("Can't lookup result");
- goto err;
- }
-
- if (value != value_gen) {
- log_err("BPF generated cookie does not match kernel one");
- goto err;
- }
-
- if (value_mss < 536 || value_mss > USHRT_MAX) {
- log_err("Unexpected MSS retrieved");
- goto err;
- }
-
- goto out;
-
-err:
- ret = 1;
-out:
- close(client);
- close(srv_client);
- return ret;
-}
-
-int main(int argc, char **argv)
-{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
- int server = -1;
- int server_v6 = -1;
- int results = -1;
- int err = 0;
- bool xdp;
-
- if (argc < 2) {
- fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
- exit(1);
- }
-
- results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
- if (results < 0) {
- log_err("Can't get map");
- goto err;
- }
-
- memset(&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr4.sin_port = 0;
-
- memset(&addr6, 0, sizeof(addr6));
- addr6.sin6_family = AF_INET6;
- addr6.sin6_addr = in6addr_loopback;
- addr6.sin6_port = 0;
-
- server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
- if (server == -1)
- goto err;
-
- server_v6 = start_server((const struct sockaddr *)&addr6,
- sizeof(addr6));
- if (server_v6 == -1)
- goto err;
-
- if (run_test(server, results, xdp))
- goto err;
-
- if (run_test(server_v6, results, xdp))
- goto err;
-
- printf("ok\n");
- goto out;
-err:
- err = 1;
-out:
- close(server);
- close(server_v6);
- close(results);
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
index 6118e3ab61fc..56c9f8a3ad3d 100644
--- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -50,6 +50,7 @@ struct linum_err {
#define TCPOPT_EOL 0
#define TCPOPT_NOP 1
+#define TCPOPT_MSS 2
#define TCPOPT_WINDOW 3
#define TCPOPT_EXP 254
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 73da7fe8c152..35b4893ccdf8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -15,20 +15,18 @@
#include <bpf/libbpf.h>
#include <sys/ioctl.h>
#include <linux/rtnetlink.h>
-#include <signal.h>
#include <linux/perf_event.h>
-#include <linux/err.h>
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "test_tcpnotify.h"
-#include "trace_helpers.h"
+#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
pthread_t tid;
+static bool exit_thread;
+
int rx_callbacks;
static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
@@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb)
{
int err;
- while (1) {
+ while (!exit_thread) {
err = perf_buffer__poll(pb, 100);
if (err < 0 && err != -EINTR) {
printf("failed perf_buffer__poll: %d\n", err);
@@ -69,9 +67,8 @@ int verify_result(const struct tcpnotify_globals *result)
int main(int argc, char **argv)
{
- const char *file = "test_tcpnotify_kern.o";
+ const char *file = "test_tcpnotify_kern.bpf.o";
struct bpf_map *perf_map, *global_map;
- struct perf_buffer_opts pb_opts = {};
struct tcpnotify_globals g = {0};
struct perf_buffer *pb = NULL;
const char *cg_path = "/foo";
@@ -79,18 +76,15 @@ int main(int argc, char **argv)
int error = EXIT_FAILURE;
struct bpf_object *obj;
char test_script[80];
- cpu_set_t cpuset;
__u32 key = 0;
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
}
@@ -114,9 +108,8 @@ int main(int argc, char **argv)
return -1;
}
- pb_opts.sample_cb = dummyfn;
- pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
- if (IS_ERR(pb))
+ pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL);
+ if (!pb)
goto err;
pthread_create(&tid, NULL, poller_thread, pb);
@@ -151,6 +144,13 @@ int main(int argc, char **argv)
sleep(10);
+ exit_thread = true;
+ int ret = pthread_join(tid, NULL);
+ if (ret) {
+ printf("FAILED: pthread_join\n");
+ goto err;
+ }
+
if (verify_result(&g)) {
printf("FAILED: Wrong stats Expected %d calls, got %d\n",
g.ncalls, rx_callbacks);
@@ -163,7 +163,6 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
deleted file mode 100755
index 1ccbe804e8e1..000000000000
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ /dev/null
@@ -1,798 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# End-to-end eBPF tunnel test suite
-# The script tests BPF network tunnel implementation.
-#
-# Topology:
-# ---------
-# root namespace | at_ns0 namespace
-# |
-# ----------- | -----------
-# | tnl dev | | | tnl dev | (overlay network)
-# ----------- | -----------
-# metadata-mode | native-mode
-# with bpf |
-# |
-# ---------- | ----------
-# | veth1 | --------- | veth0 | (underlay network)
-# ---------- peer ----------
-#
-#
-# Device Configuration
-# --------------------
-# Root namespace with metadata-mode tunnel + BPF
-# Device names and addresses:
-# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
-#
-# Namespace at_ns0 with native tunnel
-# Device names and addresses:
-# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
-#
-#
-# End-to-end ping packet flow
-# ---------------------------
-# Most of the tests start by namespace creation, device configuration,
-# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
-# from root namespace, the following operations happen:
-# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
-# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-# with remote_ip=172.16.1.200 and others.
-# 3) Outer tunnel header is prepended and route the packet to veth1's egress
-# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
-# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-# 6) Forward the packet to the overlay tnl dev
-
-PING_ARG="-c 3 -w 10 -q"
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-config_device()
-{
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-add_gre_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE key 2 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6gretap_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
- local ::11 remote ::22
-
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV fc80::200/24
- ip link set dev $DEV up
-}
-
-add_erspan_tunnel()
-{
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 2 erspan_dir egress erspan_hwid 3
- fi
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6erspan_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 2 erspan_dir egress erspan_hwid 7
- fi
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_vxlan_tunnel()
-{
- # Set static ARP entry here because iptables set-mark works
- # on L3 packet, as a result not applying to ARP packets,
- # causing errors at get_tunnel_{key/opt}.
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 4789 gbp remote 172.16.1.200
- ip netns exec at_ns0 \
- ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
- ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
- # root namespace
- ip link add dev $DEV type $TYPE external gbp dstport 4789
- ip link set dev $DEV address 52:54:00:d9:02:00 up
- ip addr add dev $DEV 10.1.1.200/24
- arp -s 10.1.1.100 52:54:00:d9:01:00
-}
-
-add_ip6vxlan_tunnel()
-{
- #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
- ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- #ip -4 addr del 172.16.1.200 dev veth1
- ip -6 addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external dstport 4789
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_geneve_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6geneve_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 \
- remote ::22 # geneve has no local option
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_ipip_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6tnl_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV 1::22/96
- ip link set dev $DEV up
-}
-
-test_gre()
-{
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gre()
-{
- TYPE=ip6gre
- DEV_NS=ip6gre00
- DEV=ip6gre11
- ret=0
-
- check $TYPE
- config_device
- # reuse the ip6gretap function
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gretap()
-{
- TYPE=ip6gretap
- DEV_NS=ip6gretap00
- DEV=ip6gretap11
- ret=0
-
- check $TYPE
- config_device
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_erspan()
-{
- TYPE=erspan
- DEV_NS=erspan00
- DEV=erspan11
- ret=0
-
- check $TYPE
- config_device
- add_erspan_tunnel $1
- attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6erspan()
-{
- TYPE=ip6erspan
- DEV_NS=ip6erspan00
- DEV=ip6erspan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6erspan_tunnel $1
- attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
- ping6 $PING_ARG ::11
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_vxlan()
-{
- TYPE=vxlan
- DEV_NS=vxlan00
- DEV=vxlan11
- ret=0
-
- check $TYPE
- config_device
- add_vxlan_tunnel
- attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6vxlan()
-{
- TYPE=vxlan
- DEV_NS=ip6vxlan00
- DEV=ip6vxlan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6vxlan_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_geneve()
-{
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- ret=0
-
- check $TYPE
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6geneve()
-{
- TYPE=geneve
- DEV_NS=ip6geneve00
- DEV=ip6geneve11
- ret=0
-
- check $TYPE
- config_device
- add_ip6geneve_tunnel
- attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_ipip()
-{
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- ret=0
-
- check $TYPE
- config_device
- add_ipip_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ipip6()
-{
- TYPE=ip6tnl
- DEV_NS=ipip6tnl00
- DEV=ipip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6ip6()
-{
- TYPE=ip6tnl
- DEV_NS=ip6ip6tnl00
- DEV=ip6ip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip6 over ip6
- ping6 $PING_ARG 1::11
- check_err $?
- ip netns exec at_ns0 ping6 $PING_ARG 1::22
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-setup_xfrm_tunnel()
-{
- auth=0x$(printf '1%.0s' {1..40})
- enc=0x$(printf '2%.0s' {1..32})
- spi_in_to_out=0x1
- spi_out_to_in=0x2
- # at_ns0 namespace
- # at_ns0 -> root
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip netns exec at_ns0 \
- ip addr add dev veth0 10.1.1.100/32
- ip netns exec at_ns0 \
- ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
- src 10.1.1.100
-
- # root namespace
- # at_ns0 -> root
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip addr add dev veth1 10.1.1.200/32
- ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
-}
-
-test_xfrm_tunnel()
-{
- config_device
- > /sys/kernel/debug/tracing/trace
- setup_xfrm_tunnel
- tc qdisc add dev veth1 clsact
- tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
- sec xfrm_get_state
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- sleep 1
- grep "reqid 1" /sys/kernel/debug/tracing/trace
- check_err $?
- grep "spi 0x1" /sys/kernel/debug/tracing/trace
- check_err $?
- grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: xfrm tunnel"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
-}
-
-attach_bpf()
-{
- DEV=$1
- SET=$2
- GET=$3
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
- tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
-}
-
-cleanup()
-{
- ip netns delete at_ns0 2> /dev/null
- ip link del veth1 2> /dev/null
- ip link del ipip11 2> /dev/null
- ip link del ipip6tnl11 2> /dev/null
- ip link del ip6ip6tnl11 2> /dev/null
- ip link del gretap11 2> /dev/null
- ip link del ip6gre11 2> /dev/null
- ip link del ip6gretap11 2> /dev/null
- ip link del vxlan11 2> /dev/null
- ip link del ip6vxlan11 2> /dev/null
- ip link del geneve11 2> /dev/null
- ip link del ip6geneve11 2> /dev/null
- ip link del erspan11 2> /dev/null
- ip link del ip6erspan11 2> /dev/null
- ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
- ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
- ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
- ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
-}
-
-cleanup_exit()
-{
- echo "CATCH SIGKILL or SIGINT, cleanup and exit"
- cleanup
- exit 0
-}
-
-check()
-{
- ip link help 2>&1 | grep -q "\s$1\s"
- if [ $? -ne 0 ];then
- echo "SKIP $1: iproute2 not support"
- cleanup
- return 1
- fi
-}
-
-enable_debug()
-{
- echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
-}
-
-check_err()
-{
- if [ $ret -eq 0 ]; then
- ret=$1
- fi
-}
-
-bpf_tunnel_test()
-{
- local errors=0
-
- echo "Testing GRE tunnel..."
- test_gre
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRE tunnel..."
- test_ip6gre
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRETAP tunnel..."
- test_ip6gretap
- errors=$(( $errors + $? ))
-
- echo "Testing ERSPAN tunnel..."
- test_erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing IP6ERSPAN tunnel..."
- test_ip6erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing VXLAN tunnel..."
- test_vxlan
- errors=$(( $errors + $? ))
-
- echo "Testing IP6VXLAN tunnel..."
- test_ip6vxlan
- errors=$(( $errors + $? ))
-
- echo "Testing GENEVE tunnel..."
- test_geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GENEVE tunnel..."
- test_ip6geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP tunnel..."
- test_ipip
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP6 tunnel..."
- test_ipip6
- errors=$(( $errors + $? ))
-
- echo "Testing IP6IP6 tunnel..."
- test_ip6ip6
- errors=$(( $errors + $? ))
-
- echo "Testing IPSec tunnel..."
- test_xfrm_tunnel
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup 0 3 6
-trap cleanup_exit 2 9
-
-cleanup
-bpf_tunnel_test
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 58b5a349d3ba..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -22,8 +22,6 @@
#include <limits.h>
#include <assert.h>
-#include <sys/capability.h>
-
#include <linux/unistd.h>
#include <linux/filter.h>
#include <linux/bpf_perf_event.h>
@@ -31,42 +29,79 @@
#include <linux/if_ether.h>
#include <linux/btf.h>
+#include <bpf/btf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#ifdef HAVE_GENHDR
-# include "autoconf.h"
-#else
-# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
-# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
-# endif
-#endif
-#include "bpf_rlimit.h"
+#include "autoconf_helper.h"
+#include "unpriv_helpers.h"
+#include "cap_helpers.h"
#include "bpf_rand.h"
#include "bpf_util.h"
#include "test_btf.h"
#include "../../../include/linux/filter.h"
+#include "testing_helpers.h"
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS 32
+#define MAX_UNEXPECTED_INSNS 32
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 21
+#define MAX_NR_MAPS 23
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
+#define MAX_FUNC_INFOS 8
+#define MAX_BTF_STRINGS 256
+#define MAX_BTF_TYPES 256
+
+#define INSN_OFF_MASK ((__s16)0xFFFF)
+#define INSN_IMM_MASK ((__s32)0xFFFFFFFF)
+#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL 4
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define F_NEEDS_JIT_ENABLED (1 << 2)
+/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+#define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \
+ 1ULL << CAP_PERFMON | \
+ 1ULL << CAP_BPF)
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
+static bool jit_disabled;
static int skips;
static bool verbose = false;
+static int verif_log_level = 0;
+
+struct kfunc_btf_id_pair {
+ const char *kfunc;
+ int insn_idx;
+};
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
struct bpf_insn *fill_insns;
+ /* If specified, test engine looks for this sequence of
+ * instructions in the BPF program after loading. Allows to
+ * test rewrites applied by verifier. Use values
+ * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+ * fields if content does not matter. The test case fails if
+ * specified instructions are not found.
+ *
+ * The sequence could be split into sub-sequences by adding
+ * SKIP_INSNS instruction at the end of each sub-sequence. In
+ * such case sub-sequences are searched for one after another.
+ */
+ struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+ /* If specified, test engine applies same pattern matching
+ * logic as for `expected_insns`. If the specified pattern is
+ * matched test case is marked as failed.
+ */
+ struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
@@ -88,6 +123,9 @@ struct bpf_test {
int fixup_map_event_output[MAX_FIXUPS];
int fixup_map_reuseport_array[MAX_FIXUPS];
int fixup_map_ringbuf[MAX_FIXUPS];
+ int fixup_map_timer[MAX_FIXUPS];
+ int fixup_map_kptr[MAX_FIXUPS];
+ struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
* Can be a tab-separated sequence of expected strings. An empty string
* means no log verification.
@@ -105,7 +143,7 @@ struct bpf_test {
enum bpf_prog_type prog_type;
uint8_t flags;
void (*fill_helper)(struct bpf_test *self);
- uint8_t runs;
+ int runs;
#define bpf_testdata_struct_t \
struct { \
uint32_t retval, retval_unpriv; \
@@ -120,6 +158,14 @@ struct bpf_test {
};
enum bpf_attach_type expected_attach_type;
const char *kfunc;
+ struct bpf_func_info func_info[MAX_FUNC_INFOS];
+ int func_info_cnt;
+ char btf_strings[MAX_BTF_STRINGS];
+ /* A set of BTF types to load when specified,
+ * use macro definitions from test_btf.h,
+ * must end with BTF_END_RAW
+ */
+ __u32 btf_types[MAX_BTF_TYPES];
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -157,7 +203,7 @@ loop:
insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_vlan_push),
+ BPF_FUNC_skb_vlan_push);
insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
i++;
}
@@ -168,7 +214,7 @@ loop:
i++;
insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_vlan_pop),
+ BPF_FUNC_skb_vlan_pop);
insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
i++;
}
@@ -373,6 +419,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self)
}
}
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ /* This test was added to catch a specific use after free
+ * error, which happened upon BPF program reallocation.
+ * Reallocation is handled by core.c:bpf_prog_realloc, which
+ * reuses old memory if page boundary is not crossed. The
+ * value of `len` is chosen to cross this boundary on bpf_loop
+ * patching.
+ */
+ const int len = getpagesize() - 25;
+ int callback_load_idx;
+ int callback_idx;
+ int i = 0;
+
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+ callback_load_idx = i;
+ insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+ BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+ 777 /* filled below */);
+ insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+ while (i < len - 3)
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ callback_idx = i;
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+ self->func_info[1].insn_off = callback_idx;
+ self->prog_len = i;
+ assert(i == len);
+}
+
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
@@ -445,7 +530,7 @@ static int probe_filter_length(const struct bpf_insn *fp)
static bool skip_unsupported_map(enum bpf_map_type map_type)
{
- if (!bpf_probe_map_type(map_type, 0)) {
+ if (!libbpf_probe_bpf_map_type(map_type, NULL)) {
printf("SKIP (unsupported map type %d)\n", map_type);
skips++;
return true;
@@ -457,11 +542,11 @@ static int __create_map(uint32_t type, uint32_t size_key,
uint32_t size_value, uint32_t max_elem,
uint32_t extra_flags)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int fd;
- fd = bpf_create_map(type, size_key, size_value, max_elem,
- (type == BPF_MAP_TYPE_HASH ?
- BPF_F_NO_PREALLOC : 0) | extra_flags);
+ opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags;
+ fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -494,8 +579,7 @@ static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret)
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
@@ -510,8 +594,7 @@ static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
@@ -519,8 +602,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
{
int mfd, p1fd, p2fd, p3fd;
- mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
- sizeof(int), max_elem, 0);
+ mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int),
+ sizeof(int), max_elem, NULL);
if (mfd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY))
return -1;
@@ -550,10 +633,11 @@ err:
static int create_map_in_map(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int inner_map_fd, outer_map_fd;
- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
- sizeof(int), 1, 0);
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
if (inner_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY))
return -1;
@@ -561,8 +645,9 @@ static int create_map_in_map(void)
return inner_map_fd;
}
- outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
- sizeof(int), inner_map_fd, 1, 0);
+ opts.inner_map_fd = inner_map_fd;
+ outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), sizeof(int), 1, &opts);
if (outer_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS))
return -1;
@@ -581,8 +666,8 @@ static int create_cgroup_storage(bool percpu)
BPF_MAP_TYPE_CGROUP_STORAGE;
int fd;
- fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
- TEST_DATA_LEN, 0, 0);
+ fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, NULL);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -600,8 +685,22 @@ static int create_cgroup_storage(bool percpu)
* int cnt;
* struct bpf_spin_lock l;
* };
+ * struct bpf_timer {
+ * __u64 :64;
+ * __u64 :64;
+ * } __attribute__((aligned(8)));
+ * struct timer {
+ * struct bpf_timer t;
+ * };
+ * struct btf_ptr {
+ * struct prog_test_ref_kfunc __kptr_untrusted *ptr;
+ * struct prog_test_ref_kfunc __kptr *ptr;
+ * struct prog_test_member __kptr *ptr;
+ * }
*/
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
+ "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted"
+ "\0prog_test_member";
static __u32 btf_raw_types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -612,56 +711,104 @@ static __u32 btf_raw_types[] = {
BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+ /* struct bpf_timer */ /* [4] */
+ BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16),
+ /* struct timer */ /* [5] */
+ BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
+ BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
+ /* struct prog_test_ref_kfunc */ /* [6] */
+ BTF_STRUCT_ENC(51, 0, 0),
+ BTF_STRUCT_ENC(95, 0, 0), /* [7] */
+ /* type tag "kptr_untrusted" */
+ BTF_TYPE_TAG_ENC(80, 6), /* [8] */
+ /* type tag "kptr" */
+ BTF_TYPE_TAG_ENC(75, 6), /* [9] */
+ BTF_TYPE_TAG_ENC(75, 7), /* [10] */
+ BTF_PTR_ENC(8), /* [11] */
+ BTF_PTR_ENC(9), /* [12] */
+ BTF_PTR_ENC(10), /* [13] */
+ /* struct btf_ptr */ /* [14] */
+ BTF_STRUCT_ENC(43, 3, 24),
+ BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */
+ BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */
+ BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 5];
+
+static int load_btf_spec(__u32 *types, int types_len,
+ const char *strings, int strings_len)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
.hdr_len = sizeof(struct btf_header),
- .type_len = sizeof(btf_raw_types),
- .str_off = sizeof(btf_raw_types),
- .str_len = sizeof(btf_str_sec),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = strings_len,
};
void *ptr, *raw_btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = (verbose
+ ? verif_log_level
+ : DEFAULT_LIBBPF_LOG_LEVEL),
+ );
- ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
- sizeof(btf_str_sec));
+ raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
+ ptr = raw_btf;
memcpy(ptr, &hdr, sizeof(hdr));
ptr += sizeof(hdr);
- memcpy(ptr, btf_raw_types, hdr.type_len);
+ memcpy(ptr, types, hdr.type_len);
ptr += hdr.type_len;
- memcpy(ptr, btf_str_sec, hdr.str_len);
+ memcpy(ptr, strings, hdr.str_len);
ptr += hdr.str_len;
- btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0);
- free(raw_btf);
+ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
if (btf_fd < 0)
- return -1;
- return btf_fd;
+ printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+ free(raw_btf);
+
+ return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+ return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+ btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+ int types_num = 0;
+
+ while (types_num < MAX_BTF_TYPES &&
+ test->btf_types[types_num] != BTF_END_RAW)
+ ++types_num;
+
+ int types_len = types_num * sizeof(test->btf_types[0]);
+
+ return load_btf_spec(test->btf_types, types_len,
+ test->btf_strings, sizeof(test->btf_strings));
}
static int create_map_spin_lock(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 1,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts);
if (fd < 0)
printf("Failed to create map with spin_lock\n");
return fd;
@@ -669,33 +816,196 @@ static int create_map_spin_lock(void)
static int create_sk_storage_map(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 0,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.map_flags = BPF_F_NO_PREALLOC,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
- close(attr.btf_fd);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts);
+ close(opts.btf_fd);
if (fd < 0)
printf("Failed to create sk_storage_map\n");
return fd;
}
-static char bpf_vlog[UINT_MAX >> 8];
+static int create_map_timer(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 5,
+ );
+ int fd, btf_fd;
+
+ btf_fd = load_btf();
+ if (btf_fd < 0)
+ return -1;
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts);
+ if (fd < 0)
+ printf("Failed to create map with timer\n");
+ return fd;
+}
+
+static int create_map_kptr(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 14,
+ );
+ int fd, btf_fd;
+
+ btf_fd = load_btf();
+ if (btf_fd < 0)
+ return -1;
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts);
+ if (fd < 0)
+ printf("Failed to create map with btf_id pointer\n");
+ return fd;
+}
+
+static void set_root(bool set)
+{
+ __u64 caps;
+
+ if (set) {
+ if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ } else {
+ if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ }
+}
+
+static __u64 ptr_to_u64(const void *ptr)
+{
+ return (uintptr_t) ptr;
+}
+
+static struct btf *btf__load_testmod_btf(struct btf *vmlinux)
+{
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ struct btf *btf = NULL;
+ char name[64];
+ __u32 id = 0;
+ int err, fd;
+
+ /* Iterate all loaded BTF objects and find bpf_testmod,
+ * we need SYS_ADMIN cap for that.
+ */
+ set_root(true);
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ perror("bpf_btf_get_next_id failed");
+ break;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ perror("bpf_btf_get_fd_by_id failed");
+ break;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.name_len = sizeof(name);
+ info.name = ptr_to_u64(name);
+ len = sizeof(info);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ close(fd);
+ perror("bpf_obj_get_info_by_fd failed");
+ break;
+ }
+
+ if (strcmp("bpf_testmod", name)) {
+ close(fd);
+ continue;
+ }
+
+ btf = btf__load_from_kernel_by_id_split(id, vmlinux);
+ if (!btf) {
+ close(fd);
+ break;
+ }
+
+ /* We need the fd to stay open so it can be used in fd_array.
+ * The final cleanup call to btf__free will free btf object
+ * and close the file descriptor.
+ */
+ btf__set_fd(btf, fd);
+ break;
+ }
+
+ set_root(false);
+ return btf;
+}
+
+static struct btf *testmod_btf;
+static struct btf *vmlinux_btf;
+
+static void kfuncs_cleanup(void)
+{
+ btf__free(testmod_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array,
+ struct kfunc_btf_id_pair *fixup_kfunc_btf_id)
+{
+ /* Patch in kfunc BTF IDs */
+ while (fixup_kfunc_btf_id->kfunc) {
+ int btf_id = 0;
+
+ /* try to find kfunc in kernel BTF */
+ vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf();
+ if (vmlinux_btf) {
+ btf_id = btf__find_by_name_kind(vmlinux_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ }
+
+ /* kfunc not found in kernel BTF, try bpf_testmod BTF */
+ if (!btf_id) {
+ testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf);
+ if (testmod_btf) {
+ btf_id = btf__find_by_name_kind(testmod_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ if (btf_id) {
+ /* We put bpf_testmod module fd into fd_array
+ * and its index 1 into instruction 'off'.
+ */
+ *fd_array = btf__fd(testmod_btf);
+ prog[fixup_kfunc_btf_id->insn_idx].off = 1;
+ }
+ }
+ }
+
+ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+ fixup_kfunc_btf_id++;
+ }
+}
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
- struct bpf_insn *prog, int *map_fds)
+ struct bpf_insn *prog, int *map_fds, int *fd_array)
{
int *fixup_map_hash_8b = test->fixup_map_hash_8b;
int *fixup_map_hash_48b = test->fixup_map_hash_48b;
@@ -718,6 +1028,8 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_event_output = test->fixup_map_event_output;
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
+ int *fixup_map_timer = test->fixup_map_timer;
+ int *fixup_map_kptr = test->fixup_map_kptr;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -897,62 +1209,45 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_map_ringbuf) {
map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
- 0, 4096);
+ 0, getpagesize());
do {
prog[*fixup_map_ringbuf].imm = map_fds[20];
fixup_map_ringbuf++;
} while (*fixup_map_ringbuf);
}
-}
+ if (*fixup_map_timer) {
+ map_fds[21] = create_map_timer();
+ do {
+ prog[*fixup_map_timer].imm = map_fds[21];
+ fixup_map_timer++;
+ } while (*fixup_map_timer);
+ }
+ if (*fixup_map_kptr) {
+ map_fds[22] = create_map_kptr();
+ do {
+ prog[*fixup_map_kptr].imm = map_fds[22];
+ fixup_map_kptr++;
+ } while (*fixup_map_kptr);
+ }
-struct libcap {
- struct __user_cap_header_struct hdr;
- struct __user_cap_data_struct data[2];
-};
+ fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id);
+}
static int set_admin(bool admin)
{
- cap_t caps;
- /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
- const cap_value_t cap_net_admin = CAP_NET_ADMIN;
- const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
- struct libcap *cap;
- int ret = -1;
-
- caps = cap_get_proc();
- if (!caps) {
- perror("cap_get_proc");
- return -1;
- }
- cap = (struct libcap *)caps;
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
- perror("cap_set_flag clear admin");
- goto out;
- }
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
- admin ? CAP_SET : CAP_CLEAR)) {
- perror("cap_set_flag set_or_clear net");
- goto out;
- }
- /* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
- * so update effective bits manually
- */
+ int err;
+
if (admin) {
- cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
- cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+ err = cap_enable_effective(ADMIN_CAPS, NULL);
+ if (err)
+ perror("cap_enable_effective(ADMIN_CAPS)");
} else {
- cap->data[1].effective &= ~(1 << (38 - 32));
- cap->data[1].effective &= ~(1 << (39 - 32));
- }
- if (cap_set_proc(caps)) {
- perror("cap_set_proc");
- goto out;
+ err = cap_disable_effective(ADMIN_CAPS, NULL);
+ if (err)
+ perror("cap_disable_effective(ADMIN_CAPS)");
}
- ret = 0;
-out:
- if (cap_free(caps))
- perror("cap_free");
- return ret;
+
+ return err;
}
static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
@@ -960,13 +1255,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
{
__u8 tmp[TEST_DATA_LEN << 2];
__u32 size_tmp = sizeof(tmp);
- uint32_t retval;
int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = size_data,
+ .data_out = tmp,
+ .data_size_out = size_tmp,
+ .repeat = 1,
+ );
if (unpriv)
set_admin(true);
- err = bpf_prog_test_run(fd_prog, 1, data, size_data,
- tmp, &size_tmp, &retval, NULL);
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
saved_errno = errno;
if (unpriv)
@@ -974,7 +1274,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
if (err) {
switch (saved_errno) {
- case 524/*ENOTSUPP*/:
+ case ENOTSUPP:
printf("Did not run the program (not supported) ");
return 0;
case EPERM:
@@ -990,9 +1290,8 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
}
}
- if (retval != expected_val &&
- expected_val != POINTER_VALUE) {
- printf("FAIL retval %d != %d ", retval, expected_val);
+ if (topts.retval != expected_val && expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d ", topts.retval, expected_val);
return 1;
}
@@ -1035,28 +1334,205 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
+static bool is_null_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn null_insn = {};
+
+ return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn skip_insn = SKIP_INSNS();
+
+ return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+ int i;
+
+ for (i = 0; i < max_len; ++i) {
+ if (is_null_insn(&seq[i]))
+ return i;
+ }
+ return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+ struct bpf_insn orig_masked;
+
+ memcpy(&orig_masked, orig, sizeof(orig_masked));
+ if (masked->imm == INSN_IMM_MASK)
+ orig_masked.imm = INSN_IMM_MASK;
+ if (masked->off == INSN_OFF_MASK)
+ orig_masked.off = INSN_OFF_MASK;
+
+ return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+ int seq_len, int subseq_len)
+{
+ int i, j;
+
+ if (subseq_len > seq_len)
+ return -1;
+
+ for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+ bool found = true;
+
+ for (j = 0; j < subseq_len; ++j) {
+ if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+ found = false;
+ break;
+ }
+ }
+ if (found)
+ return i;
+ }
+
+ return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+ int i;
+
+ for (i = 0; i < len; ++i)
+ if (is_skip_insn(&seq[i]))
+ return i;
+
+ return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+ int seq_len, int max_subseqs_len)
+{
+ int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+ while (subseqs_len > 0) {
+ int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+ int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+ int subseq_idx = find_insn_subseq(seq, subseqs,
+ seq_len, cur_subseq_len);
+
+ if (subseq_idx < 0)
+ return false;
+ seq += subseq_idx + cur_subseq_len;
+ seq_len -= subseq_idx + cur_subseq_len;
+ subseqs += cur_subseq_len + 1;
+ subseqs_len -= cur_subseq_len + 1;
+ }
+
+ return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+ int i;
+
+ printf(" addr op d s off imm\n");
+ for (i = 0; i < cnt; ++i) {
+ struct bpf_insn *insn = &buf[i];
+
+ if (is_null_insn(insn))
+ break;
+
+ if (is_skip_insn(insn))
+ printf(" ...\n");
+ else
+ printf(" %04x: %02x %1x %x %04hx %08x\n",
+ i, insn->code, insn->dst_reg,
+ insn->src_reg, insn->off, insn->imm);
+ }
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+ struct bpf_insn *buf;
+ unsigned int cnt;
+ bool result = true;
+ bool check_expected = !is_null_insn(test->expected_insns);
+ bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+ if (!check_expected && !check_unexpected)
+ goto out;
+
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
+ printf("FAIL: can't get xlated program\n");
+ result = false;
+ goto out;
+ }
+
+ if (check_expected &&
+ !find_all_insn_subseqs(buf, test->expected_insns,
+ cnt, MAX_EXPECTED_INSNS)) {
+ printf("FAIL: can't find expected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Expected subsequence:\n");
+ print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+ }
+ }
+
+ if (check_unexpected &&
+ find_all_insn_subseqs(buf, test->unexpected_insns,
+ cnt, MAX_UNEXPECTED_INSNS)) {
+ printf("FAIL: found unexpected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Un-expected subsequence:\n");
+ print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+ }
+ }
+
+ free(buf);
+ out:
+ return result;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
- int fd_prog, expected_ret, alignment_prevented_execution;
+ int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
int prog_len, prog_type = test->prog_type;
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
int run_errs, run_successes;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
+ int fd_array[2] = { -1, -1 };
int saved_errno;
int fixup_skips;
__u32 pflags;
int i, err;
+ if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) {
+ printf("SKIP (requires BPF JIT)\n");
+ skips++;
+ sched_yield();
+ return;
+ }
+
+ fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
+ btf_fd = -1;
if (!prog_type)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
- do_test_fixup(test, prog_type, prog, map_fds);
+ do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]);
if (test->fill_insns) {
prog = test->fill_insns;
prog_len = test->prog_len;
@@ -1069,7 +1545,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32;
+ pflags = testing_prog_flags();
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
@@ -1081,44 +1557,68 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->result_unpriv : test->result;
expected_err = unpriv && test->errstr_unpriv ?
test->errstr_unpriv : test->errstr;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = prog_type;
- attr.expected_attach_type = test->expected_attach_type;
- attr.insns = prog;
- attr.insns_cnt = prog_len;
- attr.license = "GPL";
- if (verbose)
- attr.log_level = 1;
- else if (expected_ret == VERBOSE_ACCEPT)
- attr.log_level = 2;
- else
- attr.log_level = 4;
- attr.prog_flags = pflags;
- if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
- attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
- attr.expected_attach_type);
- if (attr.attach_btf_id < 0) {
+ opts.expected_attach_type = test->expected_attach_type;
+ if (expected_ret == VERBOSE_ACCEPT)
+ opts.log_level = 2;
+ else if (verbose)
+ opts.log_level = verif_log_level | 4; /* force stats */
+ else
+ opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
+ opts.prog_flags = pflags;
+ if (fd_array[1] != -1)
+ opts.fd_array = &fd_array[0];
+
+ if ((prog_type == BPF_PROG_TYPE_TRACING ||
+ prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) {
+ int attach_btf_id;
+
+ attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ opts.expected_attach_type);
+ if (attach_btf_id < 0) {
printf("FAIL\nFailed to find BTF ID for '%s'!\n",
test->kfunc);
(*errors)++;
return;
}
+
+ opts.attach_btf_id = attach_btf_id;
+ }
+
+ if (test->btf_types[0] != 0) {
+ btf_fd = load_btf_for_test(test);
+ if (btf_fd < 0)
+ goto fail_log;
+ opts.prog_btf_fd = btf_fd;
+ }
+
+ if (test->func_info_cnt != 0) {
+ opts.func_info = test->func_info;
+ opts.func_info_cnt = test->func_info_cnt;
+ opts.func_info_rec_size = sizeof(test->func_info[0]);
}
- fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+ opts.log_buf = bpf_vlog;
+ opts.log_size = sizeof(bpf_vlog);
+ fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
saved_errno = errno;
/* BPF_PROG_TYPE_TRACING requires more setup and
* bpf_probe_prog_type won't give correct answer
*/
if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
- !bpf_probe_prog_type(prog_type, 0)) {
+ !libbpf_probe_bpf_prog_type(prog_type, NULL)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
}
+ if (fd_prog < 0 && saved_errno == ENOTSUPP) {
+ printf("SKIP (program uses an unsupported feature)\n");
+ skips++;
+ goto close_fds;
+ }
+
alignment_prevented_execution = 0;
if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
@@ -1147,7 +1647,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
- if (test->insn_processed) {
+ if (!unpriv && test->insn_processed) {
uint32_t insn_processed;
char *proc;
@@ -1163,9 +1663,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (verbose)
printf(", verifier log:\n%s", bpf_vlog);
+ if (!check_xlated_program(test, fd_prog))
+ goto fail_log;
+
run_errs = 0;
run_successes = 0;
- if (!alignment_prevented_execution && fd_prog >= 0) {
+ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
uint32_t expected_val;
int i;
@@ -1206,6 +1709,7 @@ close_fds:
if (test->fill_insns)
free(test->fill_insns);
close(fd_prog);
+ close(btf_fd);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
sched_yield();
@@ -1218,47 +1722,18 @@ fail_log:
static bool is_admin(void)
{
- cap_flag_value_t net_priv = CAP_CLEAR;
- bool perfmon_priv = false;
- bool bpf_priv = false;
- struct libcap *cap;
- cap_t caps;
-
-#ifdef CAP_IS_SUPPORTED
- if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
- perror("cap_get_flag");
- return false;
- }
-#endif
- caps = cap_get_proc();
- if (!caps) {
- perror("cap_get_proc");
+ __u64 caps;
+
+ /* The test checks for finer cap as CAP_NET_ADMIN,
+ * CAP_PERFMON, and CAP_BPF instead of CAP_SYS_ADMIN.
+ * Thus, disable CAP_SYS_ADMIN at the beginning.
+ */
+ if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) {
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
return false;
}
- cap = (struct libcap *)caps;
- bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
- perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
- if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
- perror("cap_get_flag NET");
- if (cap_free(caps))
- perror("cap_free");
- return bpf_priv && perfmon_priv && net_priv == CAP_SET;
-}
-static void get_unpriv_disabled()
-{
- char buf[2];
- FILE *fd;
-
- fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
- if (!fd) {
- perror("fopen /proc/sys/"UNPRIV_SYSCTL);
- unpriv_disabled = true;
- return;
- }
- if (fgets(buf, 2, fd) == buf && atoi(buf))
- unpriv_disabled = true;
- fclose(fd);
+ return (caps & ADMIN_CAPS) == ADMIN_CAPS;
}
static bool test_as_unpriv(struct bpf_test *test)
@@ -1285,6 +1760,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
int i, passes = 0, errors = 0;
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose);
+
+ if (load_bpf_testmod(verbose))
+ return EXIT_FAILURE;
+
for (i = from; i < to; i++) {
struct bpf_test *test = &tests[i];
@@ -1312,6 +1793,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
}
}
+ unload_bpf_testmod(verbose);
+ kfuncs_cleanup();
+
printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
skips, errors);
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
@@ -1326,6 +1810,13 @@ int main(int argc, char **argv)
if (argc > 1 && strcmp(argv[1], "-v") == 0) {
arg++;
verbose = true;
+ verif_log_level = 1;
+ argc--;
+ }
+ if (argc > 1 && strcmp(argv[1], "-vv") == 0) {
+ arg++;
+ verbose = true;
+ verif_log_level = 2;
argc--;
}
@@ -1346,13 +1837,18 @@ int main(int argc, char **argv)
}
}
- get_unpriv_disabled();
+ unpriv_disabled = get_unpriv_disabled();
if (unpriv && unpriv_disabled) {
printf("Cannot run as unprivileged user with sysctl %s.\n",
UNPRIV_SYSCTL);
return EXIT_FAILURE;
}
+ jit_disabled = !is_jit_enabled();
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
bpf_semi_rand_init();
return do_test(unpriv, from, to);
}
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
deleted file mode 100644
index 8d6918c3b4a2..000000000000
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ /dev/null
@@ -1,174 +0,0 @@
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/unistd.h>
-
-#include <bpf/bpf.h>
-
-#include "bpf_rlimit.h"
-
-#define LOG_SIZE (1 << 20)
-
-#define err(str...) printf("ERROR: " str)
-
-static const struct bpf_insn code_sample[] = {
- /* We need a few instructions to pass the min log length */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
-};
-
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64) (unsigned long) ptr;
-}
-
-static int load(char *log, size_t log_len, int log_level)
-{
- union bpf_attr attr;
-
- bzero(&attr, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
- attr.insns = ptr_to_u64(code_sample);
- attr.license = ptr_to_u64("GPL");
- attr.log_buf = ptr_to_u64(log);
- attr.log_size = log_len;
- attr.log_level = log_level;
-
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
-static void check_ret(int ret, int exp_errno)
-{
- if (ret > 0) {
- close(ret);
- err("broken sample loaded successfully!?\n");
- exit(1);
- }
-
- if (!ret || errno != exp_errno) {
- err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
- ret, errno, -1, exp_errno);
- exit(1);
- }
-}
-
-static void check_ones(const char *buf, size_t len, const char *msg)
-{
- while (len--)
- if (buf[len] != 1) {
- err("%s", msg);
- exit(1);
- }
-}
-
-static void test_log_good(char *log, size_t buf_len, size_t log_len,
- size_t exp_len, int exp_errno, const char *full_log)
-{
- size_t len;
- int ret;
-
- memset(log, 1, buf_len);
-
- ret = load(log, log_len, 1);
- check_ret(ret, exp_errno);
-
- len = strnlen(log, buf_len);
- if (len == buf_len) {
- err("verifier did not NULL terminate the log\n");
- exit(1);
- }
- if (exp_len && len != exp_len) {
- err("incorrect log length expected:%zd have:%zd\n",
- exp_len, len);
- exit(1);
- }
-
- if (strchr(log, 1)) {
- err("verifier leaked a byte through\n");
- exit(1);
- }
-
- check_ones(log + len + 1, buf_len - len - 1,
- "verifier wrote bytes past NULL termination\n");
-
- if (memcmp(full_log, log, LOG_SIZE)) {
- err("log did not match expected output\n");
- exit(1);
- }
-}
-
-static void test_log_bad(char *log, size_t log_len, int log_level)
-{
- int ret;
-
- ret = load(log, log_len, log_level);
- check_ret(ret, EINVAL);
- if (log)
- check_ones(log, LOG_SIZE,
- "verifier touched log with bad parameters\n");
-}
-
-int main(int argc, char **argv)
-{
- char full_log[LOG_SIZE];
- char log[LOG_SIZE];
- size_t want_len;
- int i;
-
- memset(log, 1, LOG_SIZE);
-
- /* Test incorrect attr */
- printf("Test log_level 0...\n");
- test_log_bad(log, LOG_SIZE, 0);
-
- printf("Test log_size < 128...\n");
- test_log_bad(log, 15, 1);
-
- printf("Test log_buff = NULL...\n");
- test_log_bad(NULL, LOG_SIZE, 1);
-
- /* Test with log big enough */
- printf("Test oversized buffer...\n");
- test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
-
- want_len = strlen(full_log);
-
- printf("Test exact buffer...\n");
- test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
-
- printf("Test undersized buffers...\n");
- for (i = 0; i < 64; i++) {
- full_log[want_len - i + 1] = 1;
- full_log[want_len - i] = 0;
-
- test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
- ENOSPC, full_log);
- }
-
- printf("test_verifier_log: OK\n");
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_xdp_features.sh b/tools/testing/selftests/bpf/test_xdp_features.sh
new file mode 100755
index 000000000000..0aa71c4455c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_features.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns1-$(mktemp -u XXXXXX)"
+readonly V0_IP4=10.10.0.11
+readonly V1_IP4=10.10.0.1
+readonly V0_IP6=2001:db8::11
+readonly V1_IP6=2001:db8::1
+
+ret=1
+
+setup() {
+ {
+ ip netns add ${NS}
+
+ ip link add v1 type veth peer name v0 netns ${NS}
+
+ ip link set v1 up
+ ip addr add $V1_IP4/24 dev v1
+ ip addr add $V1_IP6/64 nodad dev v1
+ ip -n ${NS} link set dev v0 up
+ ip -n ${NS} addr add $V0_IP4/24 dev v0
+ ip -n ${NS} addr add $V0_IP6/64 nodad dev v0
+
+ # Enable XDP mode and disable checksum offload
+ ethtool -K v1 gro on
+ ethtool -K v1 tx-checksumming off
+ ip netns exec ${NS} ethtool -K v0 gro on
+ ip netns exec ${NS} ethtool -K v0 tx-checksumming off
+ } > /dev/null 2>&1
+}
+
+cleanup() {
+ ip link del v1 2> /dev/null
+ ip netns del ${NS} 2> /dev/null
+ [ "$(pidof xdp_features)" = "" ] || kill $(pidof xdp_features) 2> /dev/null
+}
+
+wait_for_dut_server() {
+ while sleep 1; do
+ ss -tlp | grep -q xdp_features
+ [ $? -eq 0 ] && break
+ done
+}
+
+test_xdp_features() {
+ setup
+
+ ## XDP_PASS
+ ./xdp_features -f XDP_PASS -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_PASS \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_DROP
+ ./xdp_features -f XDP_DROP -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_DROP \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_ABORTED
+ ./xdp_features -f XDP_ABORTED -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_ABORTED \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_TX
+ ./xdp_features -f XDP_TX -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_TX \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_REDIRECT
+ ./xdp_features -f XDP_REDIRECT -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_REDIRECT \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_NDO_XMIT
+ ./xdp_features -f XDP_NDO_XMIT -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_NDO_XMIT \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ ret=$?
+ cleanup
+}
+
+set -e
+trap cleanup 2 3 6 9
+
+test_xdp_features
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
deleted file mode 100755
index 637fcf4fe4e3..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/sh
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_meta [PASS]";
- else
- echo "selftests: test_xdp_meta [FAILED]";
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
-}
-
-ip link set dev lo xdp off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdp support"
- exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
-
-ip netns exec ns1 tc qdisc add dev veth1 clsact
-ip netns exec ns2 tc qdisc add dev veth2 clsact
-
-ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
-
-ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
deleted file mode 100755
index c033850886f4..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# Create 2 namespaces with two veth peers, and
-# forward packets in-between using generic XDP
-#
-# NS1(veth11) NS2(veth22)
-# | |
-# | |
-# (veth1, ------ (veth2,
-# id:111) id:222)
-# | xdp forwarding |
-# ------------------
-
-ret=0
-
-setup()
-{
-
- local xdpmode=$1
-
- ip netns add ns1
- ip netns add ns2
-
- ip link add veth1 index 111 type veth peer name veth11 netns ns1
- ip link add veth2 index 222 type veth peer name veth22 netns ns2
-
- ip link set veth1 up
- ip link set veth2 up
- ip -n ns1 link set dev veth11 up
- ip -n ns2 link set dev veth22 up
-
- ip -n ns1 addr add 10.1.1.11/24 dev veth11
- ip -n ns2 addr add 10.1.1.22/24 dev veth22
-}
-
-cleanup()
-{
- ip link del veth1 2> /dev/null
- ip link del veth2 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
-}
-
-test_xdp_redirect()
-{
- local xdpmode=$1
-
- setup
-
- ip link set dev veth1 $xdpmode off &> /dev/null
- if [ $? -ne 0 ];then
- echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
- return 0
- fi
-
- ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
- ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
- ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
- ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
-
- if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null &&
- ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then
- echo "selftests: test_xdp_redirect $xdpmode [PASS]";
- else
- ret=1
- echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
- fi
-
- cleanup
-}
-
-set -e
-trap cleanup 2 3 6 9
-
-test_xdp_redirect xdpgeneric
-test_xdp_redirect xdpdrv
-
-exit $ret
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
deleted file mode 100755
index ba8ffcdaac30..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Create 3 namespaces with 3 veth peers, and
-# forward packets in-between using native XDP
-#
-# XDP_TX
-# NS1(veth11) NS2(veth22) NS3(veth33)
-# | | |
-# | | |
-# (veth1, (veth2, (veth3,
-# id:111) id:122) id:133)
-# ^ | ^ | ^ |
-# | | XDP_REDIRECT | | XDP_REDIRECT | |
-# | ------------------ ------------------ |
-# -----------------------------------------
-# XDP_REDIRECT
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-TESTNAME=xdp_veth
-BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
-BPF_DIR=$BPF_FS/test_$TESTNAME
-
-_cleanup()
-{
- set +e
- ip link del veth1 2> /dev/null
- ip link del veth2 2> /dev/null
- ip link del veth3 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
- rm -rf $BPF_DIR 2> /dev/null
-}
-
-cleanup_skip()
-{
- echo "selftests: $TESTNAME [SKIP]"
- _cleanup
-
- exit $ksft_skip
-}
-
-cleanup()
-{
- if [ "$?" = 0 ]; then
- echo "selftests: $TESTNAME [PASS]"
- else
- echo "selftests: $TESTNAME [FAILED]"
- fi
- _cleanup
-}
-
-if [ $(id -u) -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] Need root privileges"
- exit $ksft_skip
-fi
-
-if ! ip link set dev lo xdp off > /dev/null 2>&1; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without the ip xdp support"
- exit $ksft_skip
-fi
-
-if [ -z "$BPF_FS" ]; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
- exit $ksft_skip
-fi
-
-if ! bpftool version > /dev/null 2>&1; then
- echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
- exit $ksft_skip
-fi
-
-set -e
-
-trap cleanup_skip EXIT
-
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-
-ip link add veth1 index 111 type veth peer name veth11 netns ns1
-ip link add veth2 index 122 type veth peer name veth22 netns ns2
-ip link add veth3 index 133 type veth peer name veth33 netns ns3
-
-ip link set veth1 up
-ip link set veth2 up
-ip link set veth3 up
-
-ip -n ns1 addr add 10.1.1.11/24 dev veth11
-ip -n ns3 addr add 10.1.1.33/24 dev veth33
-
-ip -n ns1 link set dev veth11 up
-ip -n ns2 link set dev veth22 up
-ip -n ns3 link set dev veth33 up
-
-mkdir $BPF_DIR
-bpftool prog loadall \
- xdp_redirect_map.o $BPF_DIR/progs type xdp \
- pinmaps $BPF_DIR/maps
-bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
-bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
-bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
-ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
-ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
-ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
-
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
-
-trap cleanup EXIT
-
-ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
deleted file mode 100755
index bb8b0da91686..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Author: Jesper Dangaard Brouer <hawk@kernel.org>
-
-# Allow wrapper scripts to name test
-if [ -z "$TESTNAME" ]; then
- TESTNAME=xdp_vlan
-fi
-
-# Default XDP mode
-XDP_MODE=xdpgeneric
-
-usage() {
- echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
- echo ""
- echo "Usage: $0 [-vfh]"
- echo " -v | --verbose : Verbose"
- echo " --flush : Flush before starting (e.g. after --interactive)"
- echo " --interactive : Keep netns setup running after test-run"
- echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)"
- echo ""
-}
-
-valid_xdp_mode()
-{
- local mode=$1
-
- case "$mode" in
- xdpgeneric | xdpdrv | xdp)
- return 0
- ;;
- *)
- return 1
- esac
-}
-
-cleanup()
-{
- local status=$?
-
- if [ "$status" = "0" ]; then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "selftests: $TESTNAME [FAILED]";
- fi
-
- if [ -n "$INTERACTIVE" ]; then
- echo "Namespace setup still active explore with:"
- echo " ip netns exec ns1 bash"
- echo " ip netns exec ns2 bash"
- exit $status
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
-}
-
-# Using external program "getopt" to get --long-options
-OPTIONS=$(getopt -o hvfi: \
- --long verbose,flush,help,interactive,debug,mode: -- "$@")
-if (( $? != 0 )); then
- usage
- echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
- exit 2
-fi
-eval set -- "$OPTIONS"
-
-## --- Parse command line arguments / parameters ---
-while true; do
- case "$1" in
- -v | --verbose)
- export VERBOSE=yes
- shift
- ;;
- -i | --interactive | --debug )
- INTERACTIVE=yes
- shift
- ;;
- -f | --flush )
- cleanup
- shift
- ;;
- --mode )
- shift
- XDP_MODE=$1
- shift
- ;;
- -- )
- shift
- break
- ;;
- -h | --help )
- usage;
- echo "selftests: $TESTNAME [SKIP] usage help info requested"
- exit 0
- ;;
- * )
- shift
- break
- ;;
- esac
-done
-
-if [ "$EUID" -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] need root privileges"
- exit 1
-fi
-
-valid_xdp_mode $XDP_MODE
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)"
- exit 1
-fi
-
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] need ip xdp support"
- exit 0
-fi
-
-# Interactive mode likely require us to cleanup netns
-if [ -n "$INTERACTIVE" ]; then
- ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
-fi
-
-# Exit on failure
-set -e
-
-# Some shell-tools dependencies
-which ip > /dev/null
-which tc > /dev/null
-which ethtool > /dev/null
-
-# Make rest of shell verbose, showing comments as doc/info
-if [ -n "$VERBOSE" ]; then
- set -v
-fi
-
-# Create two namespaces
-ip netns add ns1
-ip netns add ns2
-
-# Run cleanup if failing or on kill
-trap cleanup 0 2 3 6 9
-
-# Create veth pair
-ip link add veth1 type veth peer name veth2
-
-# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-
-# NOTICE: XDP require VLAN header inside packet payload
-# - Thus, disable VLAN offloading driver features
-# - For veth REMEMBER TX side VLAN-offload
-#
-# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ns1 ethtool -K veth1 rxvlan off
-ip netns exec ns2 ethtool -K veth2 rxvlan off
-#
-# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ns2 ethtool -K veth2 txvlan off
-ip netns exec ns1 ethtool -K veth1 txvlan off
-
-export IPADDR1=100.64.41.1
-export IPADDR2=100.64.41.2
-
-# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ns1 ip link set veth1 up
-
-# In ns2/veth2 create VLAN device
-export VLAN=4011
-export DEVNS2=veth2
-ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ns2 ip link set $DEVNS2 up
-ip netns exec ns2 ip link set $DEVNS2.$VLAN up
-
-# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ns1 ip link set lo up
-ip netns exec ns2 ip link set lo up
-
-# At this point, the hosts cannot reach each-other,
-# because ns2 are using VLAN tags on the packets.
-
-ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
-
-
-# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
-# ----------------------------------------------------------------------
-# In ns1: ingress use XDP to remove VLAN tags
-export DEVNS1=veth1
-export FILE=test_xdp_vlan.o
-
-# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
-export XDP_PROG=xdp_vlan_change
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
-
-# In ns1: egress use TC to add back VLAN tag 4011
-# (del cmd)
-# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
-#
-ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
-ip netns exec ns1 tc filter add dev $DEVNS1 egress \
- prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
-
-# Now the namespaces can reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
-
-# Second test: Replace xdp prog, that fully remove vlan header
-#
-# Catch kernel bug for generic-XDP, that does didn't allow us to
-# remove a VLAN header, because skb->protocol still contain VLAN
-# ETH_P_8021Q indication, and this cause overwriting of our changes.
-#
-export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
-
-# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
deleted file mode 100755
index c515326d6d59..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test generic-XDP
-export TESTNAME=xdp_vlan_mode_generic
-./test_xdp_vlan.sh --mode=xdpgeneric
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
deleted file mode 100755
index 5cf7ce1f16c1..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test native-XDP
-export TESTNAME=xdp_vlan_mode_native
-./test_xdp_vlan.sh --mode=xdpdrv
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
index c2f0ddb45531..c3d82e0a7378 100755
--- a/tools/testing/selftests/bpf/test_xdping.sh
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do
test "$client_args" "$server_args"
done
+# Test drv mode
+test "-I veth1 -N" "-I veth0 -s -N"
+test "-I veth1 -N -c 10" "-I veth0 -s -N"
+
echo "OK. All tests passed"
exit 0
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 88a7483eaae4..62db060298a4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -24,8 +24,6 @@
# ----------- | ----------
# | vethX | --------- | vethY |
# ----------- peer ----------
-# | | |
-# namespaceX | namespaceY
#
# AF_XDP is an address family optimized for high performance packet processing,
# it is XDP’s user-space interface.
@@ -39,16 +37,14 @@
# Prerequisites setup by script:
#
# Set up veth interfaces as per the topology shown ^^:
-# * setup two veth interfaces and one namespace
-# ** veth<xxxx> in root namespace
-# ** veth<yyyy> in af_xdp<xxxx> namespace
-# ** namespace af_xdp<xxxx>
-# * create a spec file veth.spec that includes this run-time configuration
+# * setup two veth interfaces
+# ** veth<xxxx>
+# ** veth<yyyy>
# *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
# conflict with any existing interface
# * tests the veth and xsk layers of the topology
#
-# See the source xdpxceiver.c for information on each test
+# See the source xskxceiver.c for information on each test
#
# Kernel configuration:
# ---------------------
@@ -63,197 +59,188 @@
# ----------------
# Must run with CAP_NET_ADMIN capability.
#
-# Run (full color-coded output):
-# sudo ./test_xsk.sh -c
+# Run:
+# sudo ./test_xsk.sh
#
# If running from kselftests:
-# sudo make colorconsole=1 run_tests
+# sudo make run_tests
#
-# Run (full output without color-coding):
-# sudo ./test_xsk.sh
+# Run with verbose output:
+# sudo ./test_xsk.sh -v
+#
+# Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger:
+# sudo ./test_xsk.sh -d
+#
+# Run test suite for physical device in loopback mode
+# sudo ./test_xsk.sh -i IFACE
+#
+# Run test suite in a specific mode only [skb,drv,zc]
+# sudo ./test_xsk.sh -m MODE
+#
+# List available tests
+# ./test_xsk.sh -l
+#
+# Run a specific test from the test suite
+# sudo ./test_xsk.sh -t TEST_NAME
+#
+# Display the available command line options
+# ./test_xsk.sh -h
. xsk_prereqs.sh
-while getopts c flag
+ETH=""
+
+while getopts "vi:dm:lt:h" flag
do
case "${flag}" in
- c) colorconsole=1;;
+ v) verbose=1;;
+ d) debug=1;;
+ i) ETH=${OPTARG};;
+ m) MODE=${OPTARG};;
+ l) list=1;;
+ t) TEST=${OPTARG};;
+ h) help=1;;
esac
done
TEST_NAME="PREREQUISITES"
URANDOM=/dev/urandom
-[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; }
VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
VETH0=ve${VETH0_POSTFIX}
VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
VETH1=ve${VETH1_POSTFIX}
-NS0=root
-NS1=af_xdp${VETH1_POSTFIX}
MTU=1500
+trap ctrl_c INT
+
+function ctrl_c() {
+ cleanup_exit ${VETH0} ${VETH1}
+ exit 1
+}
+
setup_vethPairs() {
- echo "setting up ${VETH0}: namespace: ${NS0}"
- ip netns add ${NS1}
- ip link add ${VETH0} type veth peer name ${VETH1}
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH0}"
+ fi
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
if [ -f /proc/net/if_inet6 ]; then
echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH1}/disable_ipv6
+ fi
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH1}"
fi
- echo "setting up ${VETH1}: namespace: ${NS1}"
- ip link set ${VETH1} netns ${NS1}
- ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+
+ if [[ $busy_poll -eq 1 ]]; then
+ echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs
+ echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout
+ echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs
+ echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout
+ fi
+
+ ip link set ${VETH1} mtu ${MTU}
ip link set ${VETH0} mtu ${MTU}
- ip netns exec ${NS1} ip link set ${VETH1} up
+ ip link set ${VETH1} up
ip link set ${VETH0} up
}
-validate_root_exec
-validate_veth_support ${VETH0}
-validate_ip_utility
-setup_vethPairs
-
-retval=$?
-if [ $retval -ne 0 ]; then
- test_status $retval "${TEST_NAME}"
- cleanup_exit ${VETH0} ${VETH1} ${NS1}
- exit $retval
+if [[ $list -eq 1 ]]; then
+ ./${XSKOBJ} -l
+ exit
fi
-echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
-
-validate_veth_spec_file
-
-echo "Spec file created: ${SPECFILE}"
-
-test_status $retval "${TEST_NAME}"
-
-## START TESTS
-
-statusList=()
-
-### TEST 1
-TEST_NAME="XSK KSELFTEST FRAMEWORK"
-
-echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-retval=$?
-if [ $retval -eq 0 ]; then
- echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
- vethXDPnative ${VETH0} ${VETH1} ${NS1}
+if [[ $help -eq 1 ]]; then
+ ./${XSKOBJ}
+ exit
fi
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 2
-TEST_NAME="SKB NOPOLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 3
-TEST_NAME="SKB POLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 4
-TEST_NAME="DRV NOPOLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 5
-TEST_NAME="DRV POLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-p")
-execxdpxceiver params
+if [ ! -z $ETH ]; then
+ VETH0=${ETH}
+ VETH1=${ETH}
+else
+ validate_root_exec
+ validate_veth_support ${VETH0}
+ validate_ip_utility
+ setup_vethPairs
+
+ retval=$?
+ if [ $retval -ne 0 ]; then
+ test_status $retval "${TEST_NAME}"
+ cleanup_exit ${VETH0} ${VETH1}
+ exit $retval
+ fi
+fi
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-### TEST 6
-TEST_NAME="SKB SOCKET TEARDOWN"
+if [[ $verbose -eq 1 ]]; then
+ ARGS+="-v "
+fi
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+if [ -n "$MODE" ]; then
+ ARGS+="-m ${MODE} "
+fi
-params=("-S" "-T")
-execxdpxceiver params
+if [ -n "$TEST" ]; then
+ ARGS+="-t ${TEST} "
+fi
retval=$?
test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 7
-TEST_NAME="DRV SOCKET TEARDOWN"
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-T")
-execxdpxceiver params
+## START TESTS
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+statusList=()
-### TEST 8
-TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
+TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ"
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+if [[ $debug -eq 1 ]]; then
+ echo "-i" ${VETH0} "-i" ${VETH1}
+ exit
+fi
-params=("-S" "-B")
-execxdpxceiver params
+exec_xskxceiver
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 9
-TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+if [ -z $ETH ]; then
+ cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
+fi
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
+if [[ $list -eq 1 ]]; then
+ exit
+fi
-params=("-N" "-B")
-execxdpxceiver params
+TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
+busy_poll=1
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+if [ -z $ETH ]; then
+ setup_vethPairs
+fi
+exec_xskxceiver
## END TESTS
-cleanup_exit ${VETH0} ${VETH1} ${NS1}
+if [ -z $ETH ]; then
+ cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
+fi
-for _status in "${statusList[@]}"
+failures=0
+echo -e "\nSummary:"
+for i in "${!statusList[@]}"
do
- if [ $_status -ne 0 ]; then
- test_exit $ksft_fail 0
+ if [ ${statusList[$i]} -ne 0 ]; then
+ test_status ${statusList[$i]} ${nameList[$i]}
+ failures=1
fi
done
-test_exit $ksft_pass 0
+if [ $failures -eq 0 ]; then
+ echo "All tests successful!"
+else
+ exit 1
+fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 800d503e5cb4..16eb37e5bad6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -1,8 +1,16 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
/* Copyright (C) 2020 Facebook, Inc. */
+#include <ctype.h>
#include <stdlib.h>
+#include <string.h>
#include <errno.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "disasm.h"
+#include "test_progs.h"
#include "testing_helpers.h"
+#include <linux/membarrier.h>
int parse_num_list(const char *s, bool **num_set, int *num_set_len)
{
@@ -56,7 +64,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
set[i] = true;
}
- if (!set)
+ if (!set || parsing_end)
return -EINVAL;
*num_set = set;
@@ -65,16 +73,440 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+static int do_insert_test(struct test_filter_set *set,
+ char *test_str,
+ char *subtest_str)
+{
+ struct test_filter *tmp, *test;
+ char **ctmp;
+ int i;
+
+ for (i = 0; i < set->cnt; i++) {
+ test = &set->tests[i];
+
+ if (strcmp(test_str, test->name) == 0) {
+ free(test_str);
+ goto subtest;
+ }
+ }
+
+ tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1));
+ if (!tmp)
+ return -ENOMEM;
+
+ set->tests = tmp;
+ test = &set->tests[set->cnt];
+
+ test->name = test_str;
+ test->subtests = NULL;
+ test->subtest_cnt = 0;
+
+ set->cnt++;
+
+subtest:
+ if (!subtest_str)
+ return 0;
+
+ for (i = 0; i < test->subtest_cnt; i++) {
+ if (strcmp(subtest_str, test->subtests[i]) == 0) {
+ free(subtest_str);
+ return 0;
+ }
+ }
+
+ ctmp = realloc(test->subtests,
+ sizeof(*test->subtests) * (test->subtest_cnt + 1));
+ if (!ctmp)
+ return -ENOMEM;
+
+ test->subtests = ctmp;
+ test->subtests[test->subtest_cnt] = subtest_str;
+
+ test->subtest_cnt++;
+
+ return 0;
+}
+
+static int insert_test(struct test_filter_set *set,
+ char *test_spec,
+ bool is_glob_pattern)
+{
+ char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL;
+ int glob_chars = 0;
+
+ if (is_glob_pattern) {
+ pattern = "%s";
+ } else {
+ pattern = "*%s*";
+ glob_chars = 2;
+ }
+
+ subtest_str = strchr(test_spec, '/');
+ if (subtest_str) {
+ *subtest_str = '\0';
+ subtest_str += 1;
+ }
+
+ ext_test_str = malloc(strlen(test_spec) + glob_chars + 1);
+ if (!ext_test_str)
+ goto err;
+
+ sprintf(ext_test_str, pattern, test_spec);
+
+ if (subtest_str) {
+ ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1);
+ if (!ext_subtest_str)
+ goto err;
+
+ sprintf(ext_subtest_str, pattern, subtest_str);
+ }
+
+ return do_insert_test(set, ext_test_str, ext_subtest_str);
+
+err:
+ free(ext_test_str);
+ free(ext_subtest_str);
+
+ return -ENOMEM;
+}
+
+int parse_test_list_file(const char *path,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *buf = NULL, *capture_start, *capture_end, *scan_end;
+ size_t buflen = 0;
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+ return err;
+ }
+
+ while (getline(&buf, &buflen, f) != -1) {
+ capture_start = buf;
+
+ while (isspace(*capture_start))
+ ++capture_start;
+
+ capture_end = capture_start;
+ scan_end = capture_start;
+
+ while (*scan_end && *scan_end != '#') {
+ if (!isspace(*scan_end))
+ capture_end = scan_end;
+
+ ++scan_end;
+ }
+
+ if (capture_end == capture_start)
+ continue;
+
+ *(++capture_end) = '\0';
+
+ err = insert_test(set, capture_start, is_glob_pattern);
+ if (err)
+ break;
+ }
+
+ fclose(f);
+ return err;
+}
+
+int parse_test_list(const char *s,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *input, *state = NULL, *test_spec;
+ int err = 0, cnt = 0;
+
+ input = strdup(s);
+ if (!input)
+ return -ENOMEM;
+
+ while ((test_spec = strtok_r(cnt++ ? NULL : input, ",", &state))) {
+ err = insert_test(set, test_spec, is_glob_pattern);
+ if (err)
+ break;
+ }
+
+ free(input);
+ return err;
+}
+
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
{
__u32 info_len = sizeof(*info);
int err;
memset(info, 0, sizeof(*info));
- err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link), info, &info_len);
if (err) {
printf("failed to get link info: %d\n", -errno);
return 0;
}
return info->prog_id;
}
+
+int extra_prog_load_log_flags = 0;
+
+int testing_prog_flags(void)
+{
+ static int cached_flags = -1;
+ static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS };
+ static struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ if (cached_flags >= 0)
+ return cached_flags;
+
+ for (i = 0; i < ARRAY_SIZE(prog_flags); i++) {
+ opts.prog_flags = prog_flags[i];
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL",
+ insns, insn_cnt, &opts);
+ if (fd >= 0) {
+ flags |= prog_flags[i];
+ close(fd);
+ }
+ }
+
+ cached_flags = flags;
+ return cached_flags;
+}
+
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .kernel_log_level = extra_prog_load_log_flags,
+ );
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ __u32 flags;
+ int err;
+
+ obj = bpf_object__open_file(file, &opts);
+ if (!obj)
+ return -errno;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
+ bpf_program__set_type(prog, type);
+
+ flags = bpf_program__flags(prog) | testing_prog_flags();
+ bpf_program__set_flags(prog, flags);
+
+ err = bpf_object__load(obj);
+ if (err)
+ goto err_out;
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(prog);
+
+ return 0;
+err_out:
+ bpf_object__close(obj);
+ return err;
+}
+
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .kern_version = kern_version,
+ .prog_flags = testing_prog_flags(),
+ .log_level = extra_prog_load_log_flags,
+ .log_buf = log_buf,
+ .log_size = log_buf_sz,
+ );
+
+ return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts);
+}
+
+__u64 read_perf_max_sample_freq(void)
+{
+ __u64 sample_freq = 5000; /* fallback to 5000 on error */
+ FILE *f;
+
+ f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+ if (f == NULL) {
+ printf("Failed to open /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ return sample_freq;
+ }
+ if (fscanf(f, "%llu", &sample_freq) != 1) {
+ printf("Failed to parse /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ }
+
+ fclose(f);
+ return sample_freq;
+}
+
+int finit_module(int fd, const char *param_values, int flags)
+{
+ return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+int delete_module(const char *name, int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
+int unload_module(const char *name, bool verbose)
+{
+ int ret, cnt = 0;
+
+ if (kern_sync_rcu())
+ fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
+
+ for (;;) {
+ ret = delete_module(name, 0);
+ if (!ret || errno != EAGAIN)
+ break;
+ if (++cnt > 10000) {
+ fprintf(stdout, "Unload of %s timed out\n", name);
+ break;
+ }
+ usleep(100);
+ }
+
+ if (ret) {
+ if (errno == ENOENT) {
+ if (verbose)
+ fprintf(stdout, "%s.ko is already unloaded.\n", name);
+ return -1;
+ }
+ fprintf(stdout, "Failed to unload %s.ko from kernel: %d\n", name, -errno);
+ return -1;
+ }
+ if (verbose)
+ fprintf(stdout, "Successfully unloaded %s.ko.\n", name);
+ return 0;
+}
+
+static int __load_module(const char *path, const char *param_values, bool verbose)
+{
+ int fd;
+
+ if (verbose)
+ fprintf(stdout, "Loading %s...\n", path);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno);
+ return -ENOENT;
+ }
+ if (finit_module(fd, param_values, 0)) {
+ fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno);
+ close(fd);
+ return -EINVAL;
+ }
+ close(fd);
+
+ if (verbose)
+ fprintf(stdout, "Successfully loaded %s.\n", path);
+ return 0;
+}
+
+int load_module_params(const char *path, const char *param_values, bool verbose)
+{
+ return __load_module(path, param_values, verbose);
+}
+
+int load_module(const char *path, bool verbose)
+{
+ return __load_module(path, "", verbose);
+}
+
+int unload_bpf_testmod(bool verbose)
+{
+ return unload_module("bpf_testmod", verbose);
+}
+
+int load_bpf_testmod(bool verbose)
+{
+ return load_module("bpf_testmod.ko", verbose);
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ */
+int kern_sync_rcu(void)
+{
+ return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
+}
+
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %u is not multiple of %u\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!*buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ *buf = NULL;
+ return -1;
+}
+
+bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index d4f8e749611b..eb20d3772218 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,8 +1,62 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
+
+#ifndef __TESTING_HELPERS_H
+#define __TESTING_HELPERS_H
+
#include <stdbool.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include <time.h>
+
+#define __TO_STR(x) #x
+#define TO_STR(x) __TO_STR(x)
int parse_num_list(const char *s, bool **set, int *set_len);
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
+
+/*
+ * below function is exported for testing in prog_test test
+ */
+struct test_filter_set;
+int parse_test_list(const char *s,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
+int parse_test_list_file(const char *path,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
+
+__u64 read_perf_max_sample_freq(void);
+int load_bpf_testmod(bool verbose);
+int unload_bpf_testmod(bool verbose);
+int kern_sync_rcu(void);
+int finit_module(int fd, const char *param_values, int flags);
+int delete_module(const char *name, int flags);
+int load_module(const char *path, bool verbose);
+int load_module_params(const char *path, const char *param_values, bool verbose);
+int unload_module(const char *name, bool verbose);
+
+static inline __u64 get_time_ns(void)
+{
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+
+ return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+struct bpf_insn;
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt);
+int testing_prog_flags(void);
+bool is_jit_enabled(void);
+
+#endif /* __TESTING_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 1bbd1d9830c8..eeaab7013ca2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -6,96 +7,215 @@
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <linux/perf_event.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include "trace_helpers.h"
+#include <linux/limits.h>
+#include <libelf.h>
+#include <gelf.h>
+#include "bpf/hashmap.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf_util.h"
-#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
-#define MAX_SYMS 300000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
+struct ksyms {
+ struct ksym *syms;
+ size_t sym_cap;
+ size_t sym_cnt;
+};
-static int ksym_cmp(const void *p1, const void *p2)
+static struct ksyms *ksyms;
+static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int ksyms__add_symbol(struct ksyms *ksyms, const char *name,
+ unsigned long addr)
{
- return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+ void *tmp;
+
+ tmp = strdup(name);
+ if (!tmp)
+ return -ENOMEM;
+ ksyms->syms[ksyms->sym_cnt].addr = addr;
+ ksyms->syms[ksyms->sym_cnt].name = tmp;
+ ksyms->sym_cnt++;
+ return 0;
}
-int load_kallsyms(void)
+void free_kallsyms_local(struct ksyms *ksyms)
+{
+ unsigned int i;
+
+ if (!ksyms)
+ return;
+
+ if (!ksyms->syms) {
+ free(ksyms);
+ return;
+ }
+
+ for (i = 0; i < ksyms->sym_cnt; i++)
+ free(ksyms->syms[i].name);
+ free(ksyms->syms);
+ free(ksyms);
+}
+
+static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb)
{
- FILE *f = fopen("/proc/kallsyms", "r");
+ FILE *f;
char func[256], buf[256];
char symbol;
void *addr;
- int i = 0;
+ int ret;
+ struct ksyms *ksyms;
+ f = fopen("/proc/kallsyms", "r");
if (!f)
- return -ENOENT;
+ return NULL;
+
+ ksyms = calloc(1, sizeof(struct ksyms));
+ if (!ksyms) {
+ fclose(f);
+ return NULL;
+ }
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
- syms[i].addr = (long) addr;
- syms[i].name = strdup(func);
- i++;
+
+ ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap,
+ sizeof(struct ksym), ksyms->sym_cnt + 1);
+ if (ret)
+ goto error;
+ ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr);
+ if (ret)
+ goto error;
}
fclose(f);
- sym_cnt = i;
- qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
- return 0;
+ qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb);
+ return ksyms;
+
+error:
+ fclose(f);
+ free_kallsyms_local(ksyms);
+ return NULL;
}
-struct ksym *ksym_search(long key)
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+struct ksyms *load_kallsyms_local(void)
+{
+ return load_kallsyms_local_common(ksym_cmp);
+}
+
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb)
{
- int start = 0, end = sym_cnt;
+ return load_kallsyms_local_common(cmp_cb);
+}
+
+int load_kallsyms(void)
+{
+ pthread_mutex_lock(&ksyms_mutex);
+ if (!ksyms)
+ ksyms = load_kallsyms_local();
+ pthread_mutex_unlock(&ksyms_mutex);
+ return ksyms ? 0 : 1;
+}
+
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
+{
+ int start = 0, end = ksyms->sym_cnt;
int result;
/* kallsyms not loaded. return NULL */
- if (sym_cnt <= 0)
+ if (ksyms->sym_cnt <= 0)
return NULL;
while (start < end) {
size_t mid = start + (end - start) / 2;
- result = key - syms[mid].addr;
+ result = key - ksyms->syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
- return &syms[mid];
+ return &ksyms->syms[mid];
}
- if (start >= 1 && syms[start - 1].addr < key &&
- key < syms[start].addr)
+ if (start >= 1 && ksyms->syms[start - 1].addr < key &&
+ key < ksyms->syms[start].addr)
/* valid ksym */
- return &syms[start - 1];
+ return &ksyms->syms[start - 1];
/* out of range. return _stext */
- return &syms[0];
+ return &ksyms->syms[0];
}
-long ksym_get_addr(const char *name)
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p,
+ ksym_search_cmp_t cmp_cb)
+{
+ int start = 0, mid, end = ksyms->sym_cnt;
+ struct ksym *ks;
+ int result;
+
+ while (start < end) {
+ mid = start + (end - start) / 2;
+ ks = &ksyms->syms[mid];
+ result = cmp_cb(p, ks);
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return ks;
+ }
+
+ return NULL;
+}
+
+struct ksym *ksym_search(long key)
+{
+ if (!ksyms)
+ return NULL;
+ return ksym_search_local(ksyms, key);
+}
+
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name)
{
int i;
- for (i = 0; i < sym_cnt; i++) {
- if (strcmp(syms[i].name, name) == 0)
- return syms[i].addr;
+ for (i = 0; i < ksyms->sym_cnt; i++) {
+ if (strcmp(ksyms->syms[i].name, name) == 0)
+ return ksyms->syms[i].addr;
}
return 0;
}
+long ksym_get_addr(const char *name)
+{
+ if (!ksyms)
+ return 0;
+ return ksym_get_addr_local(ksyms, name);
+}
+
/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
* this is faster than load + find.
*/
int kallsyms_find(const char *sym, unsigned long long *addr)
{
- char type, name[500];
+ char type, name[500], *match;
unsigned long long value;
int err = 0;
FILE *f;
@@ -105,6 +225,17 @@ int kallsyms_find(const char *sym, unsigned long long *addr)
return -EINVAL;
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+ /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function
+ * symbols could be promoted to global due to cross-file inlining.
+ * For such cases, clang compiler will add .llvm.<hash> suffix
+ * to those symbols to avoid potential naming conflict.
+ * Let us ignore .llvm.<hash> suffix during symbol comparison.
+ */
+ if (type == 'd') {
+ match = strstr(name, ".llvm.");
+ if (match)
+ *match = '\0';
+ }
if (strcmp(name, sym) == 0) {
*addr = value;
goto out;
@@ -117,22 +248,508 @@ out:
return err;
}
+#ifdef PROCMAP_QUERY
+int env_verbosity __weak = 0;
+
+static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
+{
+ char path_buf[PATH_MAX], build_id_buf[20];
+ struct procmap_query q;
+ int err;
+
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_flags = query_flags;
+ q.query_addr = (__u64)addr;
+ q.vma_name_addr = (__u64)path_buf;
+ q.vma_name_size = sizeof(path_buf);
+ q.build_id_addr = (__u64)build_id_buf;
+ q.build_id_size = sizeof(build_id_buf);
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ if (err < 0) {
+ err = -errno;
+ if (err == -ENOTTY)
+ return -EOPNOTSUPP; /* ioctl() not implemented yet */
+ if (err == -ENOENT)
+ return -ESRCH; /* vma not found */
+ return err;
+ }
+
+ if (env_verbosity >= 1) {
+ printf("VMA FOUND (addr %08lx): %08lx-%08lx %c%c%c%c %08lx %02x:%02x %ld %s (build ID: %s, %d bytes)\n",
+ (long)addr, (long)q.vma_start, (long)q.vma_end,
+ (q.vma_flags & PROCMAP_QUERY_VMA_READABLE) ? 'r' : '-',
+ (q.vma_flags & PROCMAP_QUERY_VMA_WRITABLE) ? 'w' : '-',
+ (q.vma_flags & PROCMAP_QUERY_VMA_EXECUTABLE) ? 'x' : '-',
+ (q.vma_flags & PROCMAP_QUERY_VMA_SHARED) ? 's' : 'p',
+ (long)q.vma_offset, q.dev_major, q.dev_minor, (long)q.inode,
+ q.vma_name_size ? path_buf : "",
+ q.build_id_size ? "YES" : "NO",
+ q.build_id_size);
+ }
+
+ *start = q.vma_start;
+ *offset = q.vma_offset;
+ *flags = q.vma_flags;
+ return 0;
+}
+#else
+# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
+# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
+# endif
+
+static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, base, end;
+ FILE *f;
+ char buf[256];
+ int err, flags;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ /* requested executable VMA only */
+ err = procmap_query(fileno(f), addr, PROCMAP_QUERY_VMA_EXECUTABLE, &start, &base, &flags);
+ if (err == -EOPNOTSUPP) {
+ bool found = false;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ fclose(f);
+ return -ESRCH;
+ }
+ } else if (err) {
+ fclose(f);
+ return err;
+ }
+ fclose(f);
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ {
+ const __u32 *insn = (const __u32 *)(uintptr_t)addr;
+
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (uintptr_t)(insn + 2) - start + base;
+ }
+#endif
+ return (uintptr_t)addr - start + base;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+ int err, flags;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ err = procmap_query(fileno(f), (const void *)addr, 0, &start, &offset, &flags);
+ if (err == 0) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ } else if (err != -EOPNOTSUPP) {
+ fclose(f);
+ return err;
+ } else if (err) {
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+static int
+parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id)
+{
+ Elf32_Word note_offs = 0;
+
+ while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+ if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") &&
+ !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
+ memcpy(build_id, note_start + note_offs +
+ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz);
+ return (int) nhdr->n_descsz;
+ }
+
+ note_offs = note_offs + sizeof(Elf32_Nhdr) +
+ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ }
+
+ return -ENOENT;
+}
+
+/* Reads binary from *path* file and returns it in the *build_id* buffer
+ * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes.
+ * Returns size of build id on success. On error the error value is
+ * returned.
+ */
+int read_build_id(const char *path, char *build_id, size_t size)
+{
+ int fd, err = -EINVAL;
+ Elf *elf = NULL;
+ GElf_Ehdr ehdr;
+ size_t max, i;
+
+ if (size < BPF_BUILD_ID_SIZE)
+ return -EINVAL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ (void)elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf)
+ goto out;
+ if (elf_kind(elf) != ELF_K_ELF)
+ goto out;
+ if (!gelf_getehdr(elf, &ehdr))
+ goto out;
+
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ GElf_Phdr mem, *phdr;
+ char *data;
+
+ phdr = gelf_getphdr(elf, i, &mem);
+ if (!phdr)
+ goto out;
+ if (phdr->p_type != PT_NOTE)
+ continue;
+ data = elf_rawfile(elf, &max);
+ if (!data)
+ goto out;
+ if (phdr->p_offset + phdr->p_memsz > max)
+ goto out;
+ err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id);
+ if (err > 0)
+ break;
+ }
+
+out:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return err;
+}
+
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter)
+{
+ size_t buflen, n;
+ char *buf = NULL;
+ FILE *fp = NULL;
+
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!fp)
+ return -1;
+
+ /* We do not want to wait forever when iter is specified. */
+ if (iter)
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) {
+ if (n > 0)
+ cb(buf, data);
+ if (iter && !(--iter))
+ break;
+ }
+
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return 0;
+}
+
+static void trace_pipe_cb(const char *str, void *data)
+{
+ printf("%s", str);
+}
+
void read_trace_pipe(void)
{
- int trace_fd;
+ read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
+}
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
- if (trace_fd < 0)
- return;
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
- while (1) {
- static char buf[4096];
- ssize_t sz;
+static const char * const trace_blacklist[] = {
+ "migrate_disable",
+ "migrate_enable",
+ "rcu_read_unlock_strict",
+ "preempt_count_add",
+ "preempt_count_sub",
+ "__rcu_read_lock",
+ "__rcu_read_unlock",
+ "bpf_get_numa_node_id",
+};
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
+static bool skip_entry(char *name)
+{
+ int i;
+
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) {
+ if (!strcmp(name, trace_blacklist[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
+ struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ksyms)
+ return -EINVAL;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable to trace.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
}
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = ksym_name;
}
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err)
+ free(syms);
+ return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt * sizeof(long));
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
+ }
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
+ return err;
}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index f62fdef9e589..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -4,18 +4,44 @@
#include <bpf/libbpf.h>
+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
+
struct ksym {
long addr;
char *name;
};
+struct ksyms;
+
+typedef int (*ksym_cmp_t)(const void *p1, const void *p2);
+typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2);
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+struct ksyms *load_kallsyms_local(void);
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
+void free_kallsyms_local(struct ksyms *ksyms);
+
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb);
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p1,
+ ksym_search_cmp_t cmp_cb);
+
/* open kallsyms and find addresses on the fly, faster than load + search. */
int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data),
+ void *data, int iter);
+
+ssize_t get_uprobe_offset(const void *addr);
+ssize_t get_rel_offset(uintptr_t addr);
+
+int read_build_id(const char *path, char *build_id, size_t size);
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
#endif
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
new file mode 100644
index 000000000000..f997d7ec8fd0
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <zlib.h>
+
+#include "unpriv_helpers.h"
+
+static gzFile open_config(void)
+{
+ struct utsname uts;
+ char buf[PATH_MAX];
+ gzFile config;
+
+ if (uname(&uts)) {
+ perror("uname");
+ goto config_gz;
+ }
+
+ snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release);
+ config = gzopen(buf, "rb");
+ if (config)
+ return config;
+ fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno));
+
+config_gz:
+ config = gzopen("/proc/config.gz", "rb");
+ if (!config)
+ perror("gzopen /proc/config.gz");
+ return config;
+}
+
+static int config_contains(const char *pat)
+{
+ const char *msg;
+ char buf[1024];
+ gzFile config;
+ int n, err;
+
+ config = open_config();
+ if (!config)
+ return -1;
+
+ for (;;) {
+ if (!gzgets(config, buf, sizeof(buf))) {
+ msg = gzerror(config, &err);
+ if (err == Z_ERRNO)
+ perror("gzgets /proc/config.gz");
+ else if (err != Z_OK)
+ fprintf(stderr, "gzgets /proc/config.gz: %s", msg);
+ gzclose(config);
+ return -1;
+ }
+ n = strlen(buf);
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = 0;
+ if (strcmp(buf, pat) == 0) {
+ gzclose(config);
+ return 1;
+ }
+ }
+ gzclose(config);
+ return 0;
+}
+
+static bool cmdline_contains(const char *pat)
+{
+ char cmdline[4096], *c;
+ int fd, ret = false;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0) {
+ perror("open /proc/cmdline");
+ return false;
+ }
+
+ if (read(fd, cmdline, sizeof(cmdline) - 1) < 0) {
+ perror("read /proc/cmdline");
+ goto out;
+ }
+
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
+ if (strncmp(c, pat, strlen(c)))
+ continue;
+ ret = true;
+ break;
+ }
+out:
+ close(fd);
+ return ret;
+}
+
+static int get_mitigations_off(void)
+{
+ int enabled_in_config;
+
+ if (cmdline_contains("mitigations=off"))
+ return 1;
+ enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y");
+ if (enabled_in_config < 0)
+ return -1;
+ return !enabled_in_config;
+}
+
+bool get_unpriv_disabled(void)
+{
+ int mitigations_off;
+ bool disabled;
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/" UNPRIV_SYSCTL, "r");
+ if (fd) {
+ disabled = (fgets(buf, 2, fd) == buf && atoi(buf));
+ fclose(fd);
+ } else {
+ perror("fopen /proc/sys/" UNPRIV_SYSCTL);
+ disabled = true;
+ }
+
+ if (disabled)
+ return true;
+
+ /*
+ * Some unpriv tests rely on spectre mitigations being on.
+ * If mitigations are off or status can't be determined
+ * assume that unpriv tests are disabled.
+ */
+ mitigations_off = get_mitigations_off();
+ if (mitigations_off < 0) {
+ fprintf(stderr,
+ "Can't determine if mitigations are enabled, disabling unpriv tests.");
+ return true;
+ }
+ return mitigations_off;
+}
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.h b/tools/testing/selftests/bpf/unpriv_helpers.h
new file mode 100644
index 000000000000..151f67329665
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdbool.h>
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+
+bool get_unpriv_disabled(void);
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
new file mode 100644
index 000000000000..dd38dc68f635
--- /dev/null
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sdt.h>
+
+#ifndef MADV_POPULATE_READ
+#define MADV_POPULATE_READ 22
+#endif
+
+#ifndef MADV_PAGEOUT
+#define MADV_PAGEOUT 21
+#endif
+
+int __attribute__((weak)) uprobe(void)
+{
+ return 0;
+}
+
+#define __PASTE(a, b) a##b
+#define PASTE(a, b) __PASTE(a, b)
+
+#define NAME(name, idx) PASTE(name, idx)
+
+#define DEF(name, idx) int __attribute__((weak)) NAME(name, idx)(void) { return 0; }
+#define CALL(name, idx) NAME(name, idx)();
+
+#define F(body, name, idx) body(name, idx)
+
+#define F10(body, name, idx) \
+ F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \
+ F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \
+ F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \
+ F(body, PASTE(name, idx), 9)
+
+#define F100(body, name, idx) \
+ F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \
+ F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \
+ F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \
+ F10(body, PASTE(name, idx), 9)
+
+#define F1000(body, name, idx) \
+ F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \
+ F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \
+ F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \
+ F100(body, PASTE(name, idx), 9)
+
+#define F10000(body, name, idx) \
+ F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \
+ F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \
+ F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \
+ F1000(body, PASTE(name, idx), 9)
+
+F10000(DEF, uprobe_multi_func_, 0)
+F10000(DEF, uprobe_multi_func_, 1)
+F10000(DEF, uprobe_multi_func_, 2)
+F10000(DEF, uprobe_multi_func_, 3)
+F10000(DEF, uprobe_multi_func_, 4)
+
+static int bench(void)
+{
+ F10000(CALL, uprobe_multi_func_, 0)
+ F10000(CALL, uprobe_multi_func_, 1)
+ F10000(CALL, uprobe_multi_func_, 2)
+ F10000(CALL, uprobe_multi_func_, 3)
+ F10000(CALL, uprobe_multi_func_, 4)
+ return 0;
+}
+
+#define PROBE STAP_PROBE(test, usdt);
+
+#define PROBE10 PROBE PROBE PROBE PROBE PROBE \
+ PROBE PROBE PROBE PROBE PROBE
+#define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \
+ PROBE10 PROBE10 PROBE10 PROBE10 PROBE10
+#define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \
+ PROBE100 PROBE100 PROBE100 PROBE100 PROBE100
+#define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \
+ PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000
+
+static int usdt(void)
+{
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ return 0;
+}
+
+extern char build_id_start[];
+extern char build_id_end[];
+
+int __attribute__((weak)) trigger_uprobe(bool build_id_resident)
+{
+ int page_sz = sysconf(_SC_PAGESIZE);
+ void *addr;
+
+ /* page-align build ID start */
+ addr = (void *)((uintptr_t)&build_id_start & ~(page_sz - 1));
+
+ /* to guarantee MADV_PAGEOUT work reliably, we need to ensure that
+ * memory range is mapped into current process, so we unconditionally
+ * do MADV_POPULATE_READ, and then MADV_PAGEOUT, if necessary
+ */
+ madvise(addr, page_sz, MADV_POPULATE_READ);
+ if (!build_id_resident)
+ madvise(addr, page_sz, MADV_PAGEOUT);
+
+ (void)uprobe();
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ goto error;
+
+ if (!strcmp("bench", argv[1]))
+ return bench();
+ if (!strcmp("usdt", argv[1]))
+ return usdt();
+ if (!strcmp("uprobe-paged-out", argv[1]))
+ return trigger_uprobe(false /* page-out build ID */);
+ if (!strcmp("uprobe-paged-in", argv[1]))
+ return trigger_uprobe(true /* page-in build ID */);
+
+error:
+ fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]);
+ return -1;
+}
diff --git a/tools/testing/selftests/bpf/uprobe_multi.ld b/tools/testing/selftests/bpf/uprobe_multi.ld
new file mode 100644
index 000000000000..a2e94828bc8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/uprobe_multi.ld
@@ -0,0 +1,11 @@
+SECTIONS
+{
+ . = ALIGN(4096);
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ . = ALIGN(4096);
+}
+INSERT AFTER .text;
+
+build_id_start = ADDR(.note.gnu.build-id);
+build_id_end = ADDR(.note.gnu.build-id) + SIZEOF(.note.gnu.build-id);
+
diff --git a/tools/testing/selftests/bpf/uptr_test_common.h b/tools/testing/selftests/bpf/uptr_test_common.h
new file mode 100644
index 000000000000..f8a134ba12f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/uptr_test_common.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _UPTR_TEST_COMMON_H
+#define _UPTR_TEST_COMMON_H
+
+#define MAGIC_VALUE 0xabcd1234
+#define PAGE_SIZE 4096
+
+#ifdef __BPF__
+/* Avoid fwd btf type being generated for the following struct */
+struct large_data *dummy_large_data;
+struct empty_data *dummy_empty_data;
+struct user_data *dummy_data;
+struct cgroup *dummy_cgrp;
+#else
+#define __uptr
+#define __kptr
+#endif
+
+struct user_data {
+ int a;
+ int b;
+ int result;
+ int nested_result;
+};
+
+struct nested_udata {
+ struct user_data __uptr *udata;
+};
+
+struct value_type {
+ struct user_data __uptr *udata;
+ struct cgroup __kptr *cgrp;
+ struct nested_udata nested;
+};
+
+struct value_lock_type {
+ struct user_data __uptr *udata;
+ struct bpf_spin_lock lock;
+};
+
+struct large_data {
+ __u8 one_page[PAGE_SIZE];
+ int a;
+};
+
+struct large_uptr {
+ struct large_data __uptr *udata;
+};
+
+struct empty_data {
+};
+
+struct empty_uptr {
+ struct empty_data __uptr *udata;
+};
+
+struct kstruct_uptr {
+ struct cgroup __uptr *cgrp;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..4ed795655b9f 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,35 +1,99 @@
+#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
+#define SEC(name) __attribute__((section(name), used))
#define BUF_SIZE 256
-static __attribute__((noinline))
-void urandom_read(int fd, int count)
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+int urandlib_api(void);
+COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_old(void);
+int urandlib_api_sameoffset(void);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
+static noinline void urandom_read(int fd, int count)
{
- char buf[BUF_SIZE];
- int i;
+ char buf[BUF_SIZE];
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ read(fd, buf, BUF_SIZE);
- for (i = 0; i < count; ++i)
- read(fd, buf, BUF_SIZE);
+ /* trigger USDTs defined in executable itself */
+ urand_read_without_sema(i, count, BUF_SIZE);
+ STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
+
+ /* trigger USDTs defined in shared lib */
+ urandlib_read_without_sema(i, count, BUF_SIZE);
+ urandlib_read_with_sema(i, count, BUF_SIZE);
+ }
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+ parent_ready = true;
}
int main(int argc, char *argv[])
{
int fd = open("/dev/urandom", O_RDONLY);
int count = 4;
+ bool report_pid = false;
if (fd < 0)
return 1;
- if (argc == 2)
+ if (argc >= 2)
count = atoi(argv[1]);
+ if (argc >= 3) {
+ report_pid = true;
+ /* install SIGPIPE handler to catch when parent closes their
+ * end of the pipe (on the other side of our stdout)
+ */
+ signal(SIGPIPE, handle_sigpipe);
+ }
+
+ /* report PID and wait for parent process to send us "signal" by
+ * closing stdout
+ */
+ if (report_pid) {
+ while (!parent_ready) {
+ fprintf(stdout, "%d\n", getpid());
+ fflush(stdout);
+ }
+ /* at this point stdout is closed, parent process knows our
+ * PID and is ready to trace us
+ */
+ }
urandom_read(fd, count);
+ urandlib_api();
+ urandlib_api_old();
+ urandlib_api_sameoffset();
+
close(fd);
return 0;
}
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ /* semaphore-less USDT */
+ STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..8c1356d8b4ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
+
+COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_v1(void)
+{
+ return 1;
+}
+
+DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0)
+int urandlib_api_v2(void)
+{
+ return 2;
+}
+
+COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0)
+DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0)
+int urandlib_api_sameoffset(void)
+{
+ return 3;
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
new file mode 100644
index 000000000000..549d1f774810
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * This single-header library defines a collection of variadic macros for
+ * defining and triggering USDTs (User Statically-Defined Tracepoints):
+ *
+ * - For USDTs without associated semaphore:
+ * USDT(group, name, args...)
+ *
+ * - For USDTs with implicit (transparent to the user) semaphore:
+ * USDT_WITH_SEMA(group, name, args...)
+ * USDT_IS_ACTIVE(group, name)
+ *
+ * - For USDTs with explicit (user-defined and provided) semaphore:
+ * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...)
+ * USDT_SEMA_IS_ACTIVE(sema)
+ *
+ * all of which emit a NOP instruction into the instruction stream, and so
+ * have *zero* overhead for the surrounding code. USDTs are identified by
+ * a combination of `group` and `name` identifiers, which is used by external
+ * tracing tooling (tracers) for identifying exact USDTs of interest.
+ *
+ * USDTs can have an associated (2-byte) activity counter (USDT semaphore),
+ * automatically maintained by Linux kernel whenever any correctly written
+ * BPF-based tracer is attached to the USDT. This USDT semaphore can be used
+ * to check whether there is a need to do any extra data collection and
+ * processing for a given USDT (if necessary), and otherwise avoid extra work
+ * for a common case of USDT not being traced ("active").
+ *
+ * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or
+ * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on
+ * working with USDTs with implicitly or explicitly associated
+ * USDT semaphores, respectively.
+ *
+ * There is also some additional data recorded into an auxiliary note
+ * section. The data in the note section describes the operands, in terms of
+ * size and location, used by tracing tooling to know where to find USDT
+ * arguments. Each location is encoded as an assembler operand string.
+ * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert
+ * breakpoints on top of the nop, and decode the location operand-strings,
+ * like an assembler, to find the values being passed.
+ *
+ * The operand strings are selected by the compiler for each operand.
+ * They are constrained by inline-assembler codes.The default is:
+ *
+ * #define USDT_ARG_CONSTRAINT nor
+ *
+ * This is a good default if the operands tend to be integral and
+ * moderate in number (smaller than number of registers). In other
+ * cases, the compiler may report "'asm' requires impossible reload" or
+ * similar. In this case, consider simplifying the macro call (fewer
+ * and simpler operands), reduce optimization, or override the default
+ * constraints string via:
+ *
+ * #define USDT_ARG_CONSTRAINT g
+ * #include <usdt.h>
+ *
+ * For some historical description of USDT v3 format (the one used by this
+ * library and generally recognized and assumed by BPF-based tracing tools)
+ * see [0]. The more formal specification can be found at [1]. Additional
+ * argument constraints information can be found at [2].
+ *
+ * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for
+ * this USDT library implementation. Current implementation differs *a lot* in
+ * terms of exposed user API and general usability, which was the main goal
+ * and focus of the reimplementation work. Nevertheless, underlying recorded
+ * USDT definitions are fully binary compatible and any USDT-based tooling
+ * should work equally well with USDTs defined by either SystemTap's or this
+ * library's USDT implementation.
+ *
+ * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html
+ * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h
+ */
+#ifndef __USDT_H
+#define __USDT_H
+
+/*
+ * Changelog:
+ *
+ * 0.1.0
+ * -----
+ * - Initial release
+ */
+#define USDT_MAJOR_VERSION 0
+#define USDT_MINOR_VERSION 1
+#define USDT_PATCH_VERSION 0
+
+/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L)
+#define __usdt_va_opt 1
+#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__
+#else
+#define __usdt_va_args(...) , ##__VA_ARGS__
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. No USDT semaphore is
+ * associated with this USDT.
+ *
+ * Such "semaphoreless" USDTs are commonly used when there is no extra data
+ * collection or processing needed to collect and prepare USDT arguments and
+ * they are just available in the surrounding code. USDT() macro will just
+ * record their locations in CPU registers or in memory for tracing tooling to
+ * be able to access them, if necessary.
+ */
+#ifdef __usdt_va_opt
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__)
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. USDT also get an
+ * implicitly-defined associated USDT semaphore, which will be "activated" by
+ * tracing tooling and can be used to check whether USDT is being actively
+ * observed.
+ *
+ * USDTs with semaphore are commonly used when there is a need to perform
+ * additional data collection and processing to prepare USDT arguments, which
+ * otherwise might not be necessary for the rest of application logic. In such
+ * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT
+ * is not traced (which is presumed to be a common situation), the associated
+ * USDT semaphore is "inactive", and so there is no need to waste resources to
+ * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether
+ * USDT is "active".
+ *
+ * N.B. There is an inherent (albeit short) gap between checking whether USDT
+ * is active and triggering corresponding USDT, in which external tracer can
+ * be attached to an USDT and activate USDT semaphore after the activity check.
+ * If such a race occurs, tracers might miss one USDT execution. Tracers are
+ * expected to accommodate such possibility and this is expected to not be
+ * a problem for applications and tracers.
+ *
+ * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained
+ * within a single executable or shared library and is not shared outside
+ * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name
+ * identifier across executable and shared library, it will work and won't
+ * conflict, per se, but will define independent USDT semaphores, one for each
+ * shared library/executable in which USDT_WITH_SEMA(group, name) is used.
+ * That is, if you attach to this USDT in one shared library (or executable),
+ * then only USDT semaphore within that shared library (or executable) will be
+ * updated by the kernel, while other libraries (or executable) will not see
+ * activated USDT semaphore. In short, it's best to use unique USDT group:name
+ * identifiers across different shared libraries (and, equivalently, between
+ * executable and shared library). This is advanced consideration and is
+ * rarely (if ever) seen in practice, but just to avoid surprises this is
+ * called out here. (Static libraries become a part of final executable, once
+ * linked by linker, so the above considerations don't apply to them.)
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name) \
+ __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name), \
+ ##__VA_ARGS__)
+#endif
+
+struct usdt_sema { volatile unsigned short active; };
+
+/*
+ * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into the given USDT. It is assumed that USDT is triggered with
+ * USDT_WITH_SEMA() macro which will implicitly define associated USDT
+ * semaphore. (If one needs more control over USDT semaphore, see
+ * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.)
+ *
+ * N.B. Such checks are necessarily racy and speculative. Between checking
+ * whether USDT is active and triggering the USDT itself, tracer can be
+ * detached with no notification. This race should be extremely rare and worst
+ * case should result in one-time wasted extra data collection and processing.
+ */
+#define USDT_IS_ACTIVE(group, name) ({ \
+ extern struct usdt_sema __usdt_sema_name(group, name) \
+ __usdt_asm_name(__usdt_sema_name(group, name)); \
+ __usdt_sema_implicit(__usdt_sema_name(group, name)); \
+ __usdt_sema_name(group, name).active > 0; \
+})
+
+/*
+ * APIs for working with user-defined explicit USDT semaphores.
+ *
+ * This is a less commonly used advanced API for use cases in which user needs
+ * an explicit control over (potentially shared across multiple USDTs) USDT
+ * semaphore instance. This can be used when there is a group of logically
+ * related USDTs that all need extra data collection and processing whenever
+ * any of a family of related USDTs are "activated" (i.e., traced). In such
+ * a case, all such related USDTs will be associated with the same shared USDT
+ * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be
+ * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra
+ * USDT semaphore identifier as an extra parameter.
+ */
+
+/**
+ * Underlying C global variable name for user-defined USDT semaphore with
+ * `sema` identifier. Could be useful for debugging, but normally shouldn't be
+ * used explicitly.
+ */
+#define USDT_SEMA(sema) __usdt_sema_##sema
+
+/*
+ * Define storage for user-defined USDT semaphore `sema`.
+ *
+ * Should be used only once in non-header source file to let compiler allocate
+ * space for the semaphore variable. Just like with any other global variable.
+ *
+ * This macro can be used anywhere where global variable declaration is
+ * allowed. Just like with global variable definitions, there should be only
+ * one definition of user-defined USDT semaphore with given `sema` identifier,
+ * otherwise compiler or linker will complain about duplicate variable
+ * definition.
+ *
+ * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace
+ * and inside namespaces (including nested namespaces). Just make sure that
+ * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is
+ * referenced, or any of its parent namespaces, so the C++ language-level
+ * identifier is visible to the code that needs to reference the semaphore.
+ * At the lowest layer, USDT semaphores have global naming and visibility
+ * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked
+ * against from C or C++ code, if necessary). To keep it simple, putting
+ * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest
+ * no-brainer solution. All these aspects are irrelevant for plain C, because
+ * C doesn't have namespaces and everything is always in the global namespace.
+ *
+ * N.B. Due to USDT metadata being recorded in non-allocatable ELF note
+ * section, it has limitations when it comes to relocations, which, in
+ * practice, means that it's not possible to correctly share USDT semaphores
+ * between main executable and shared libraries, or even between multiple
+ * shared libraries. USDT semaphore has to be contained to individual shared
+ * library or executable to avoid unpleasant surprises with half-working USDT
+ * semaphores. We enforce this by marking semaphore ELF symbols as having
+ * a hidden visibility. This is quite an advanced use case and consideration
+ * and for most users this should have no consequences whatsoever.
+ */
+#define USDT_DEFINE_SEMA(sema) \
+ struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \
+ __usdt_asm_name(USDT_SEMA(sema)) \
+ __attribute__((visibility("hidden"))) = { 0 }
+
+/*
+ * Declare extern reference to user-defined USDT semaphore `sema`.
+ *
+ * Refers to a variable defined in another compilation unit by
+ * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across
+ * multiple compilation units (i.e., .c and .cpp files).
+ *
+ * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities.
+ */
+#define USDT_DECLARE_SEMA(sema) \
+ extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema))
+
+/*
+ * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into USDT(s) associated with USDT semaphore `sema`.
+ *
+ * N.B. Such checks are necessarily racy. Between checking the state of USDT
+ * semaphore and triggering associated USDT(s), the active tracer might attach
+ * or detach. This race should be extremely rare and worst case should result
+ * in one-time missed USDT event or wasted extra data collection and
+ * processing. USDT-using tracers should be written with this in mind and is
+ * not a concern of the application defining USDTs with associated semaphore.
+ */
+#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0)
+
+/*
+ * Invoke USDT specified by `group` and `name` identifiers and associate
+ * explicitly user-defined semaphore `sema` with it. Pass through `args` as
+ * USDT arguments. `args` are optional and zero arguments are acceptable.
+ *
+ * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be
+ * checked whether active with USDT_SEMA_IS_ACTIVE().
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__)
+#else
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__)
+#endif
+
+/*
+ * Adjustable implementation aspects
+ */
+#ifndef USDT_ARG_CONSTRAINT
+#if defined __powerpc__
+#define USDT_ARG_CONSTRAINT nZr
+#elif defined __arm__
+#define USDT_ARG_CONSTRAINT g
+#elif defined __loongarch__
+#define USDT_ARG_CONSTRAINT nmr
+#else
+#define USDT_ARG_CONSTRAINT nor
+#endif
+#endif /* USDT_ARG_CONSTRAINT */
+
+#ifndef USDT_NOP
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define USDT_NOP nop 0
+#else
+#define USDT_NOP nop
+#endif
+#endif /* USDT_NOP */
+
+/*
+ * Implementation details
+ */
+/* USDT name for implicitly-defined USDT semaphore, derived from group:name */
+#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name
+/* ELF section into which USDT semaphores are put */
+#define __usdt_sema_sec __attribute__((section(".probes")))
+
+#define __usdt_concat(a, b) a ## b
+#define __usdt_apply(fn, n) __usdt_concat(fn, n)
+
+#ifndef __usdt_nth
+#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N
+#endif
+
+#ifndef __usdt_narg
+#ifdef __usdt_va_opt
+#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#else
+#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+#endif /* __usdt_narg */
+
+#define __usdt_hash #
+#define __usdt_str_(x) #x
+#define __usdt_str(x) __usdt_str_(x)
+
+#ifndef __usdt_asm_name
+#define __usdt_asm_name(name) __asm__(__usdt_str(name))
+#endif
+
+#define __usdt_asm0() "\n"
+#define __usdt_asm1(x) __usdt_str(x) "\n"
+#define __usdt_asm2(x, ...) __usdt_str(x) "," __usdt_asm1(__VA_ARGS__)
+#define __usdt_asm3(x, ...) __usdt_str(x) "," __usdt_asm2(__VA_ARGS__)
+#define __usdt_asm4(x, ...) __usdt_str(x) "," __usdt_asm3(__VA_ARGS__)
+#define __usdt_asm5(x, ...) __usdt_str(x) "," __usdt_asm4(__VA_ARGS__)
+#define __usdt_asm6(x, ...) __usdt_str(x) "," __usdt_asm5(__VA_ARGS__)
+#define __usdt_asm7(x, ...) __usdt_str(x) "," __usdt_asm6(__VA_ARGS__)
+#define __usdt_asm8(x, ...) __usdt_str(x) "," __usdt_asm7(__VA_ARGS__)
+#define __usdt_asm9(x, ...) __usdt_str(x) "," __usdt_asm8(__VA_ARGS__)
+#define __usdt_asm10(x, ...) __usdt_str(x) "," __usdt_asm9(__VA_ARGS__)
+#define __usdt_asm11(x, ...) __usdt_str(x) "," __usdt_asm10(__VA_ARGS__)
+#define __usdt_asm12(x, ...) __usdt_str(x) "," __usdt_asm11(__VA_ARGS__)
+#define __usdt_asm(...) __usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#ifdef __LP64__
+#define __usdt_asm_addr .8byte
+#else
+#define __usdt_asm_addr .4byte
+#endif
+
+#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x)
+#define __usdt_asm_strz(x) __usdt_asm_strz_(x)
+#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x)
+#define __usdt_asm_str(x) __usdt_asm_str_(x)
+
+/* "semaphoreless" USDT case */
+#ifndef __usdt_sema_none
+#define __usdt_sema_none(sema)
+#endif
+
+/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */
+#ifndef __usdt_sema_implicit
+#define __usdt_sema_implicit(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm1(.ifndef sema) \
+ __usdt_asm3( .pushsection .probes, "aw", "progbits") \
+ __usdt_asm1( .weak sema) \
+ __usdt_asm1( .hidden sema) \
+ __usdt_asm1( .align 2) \
+ __usdt_asm1(sema:) \
+ __usdt_asm1( .zero 2) \
+ __usdt_asm2( .type sema, @object) \
+ __usdt_asm2( .size sema, 2) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ );
+#endif
+
+/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */
+#ifndef __usdt_sema_explicit
+#define __usdt_sema_explicit(sema) \
+ __asm__ __volatile__ ("" :: "m" (sema));
+#endif
+
+/* main USDT definition (nop and .note.stapsdt metadata) */
+#define __usdt_probe(group, name, sema_def, sema, ...) do { \
+ sema_def(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm( 990: USDT_NOP) \
+ __usdt_asm3( .pushsection .note.stapsdt, "", "note") \
+ __usdt_asm1( .balign 4) \
+ __usdt_asm3( .4byte 992f-991f,994f-993f,3) \
+ __usdt_asm1(991: .asciz "stapsdt") \
+ __usdt_asm1(992: .balign 4) \
+ __usdt_asm1(993: __usdt_asm_addr 990b) \
+ __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \
+ __usdt_asm1( __usdt_asm_addr sema) \
+ __usdt_asm_strz(group) \
+ __usdt_asm_strz(name) \
+ __usdt_asm_args(__VA_ARGS__) \
+ __usdt_asm1( .ascii "\0") \
+ __usdt_asm1(994: .balign 4) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.ifndef _.stapsdt.base) \
+ __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\
+ __usdt_asm1( .weak _.stapsdt.base) \
+ __usdt_asm1( .hidden _.stapsdt.base) \
+ __usdt_asm1(_.stapsdt.base:) \
+ __usdt_asm1( .space 1) \
+ __usdt_asm2( .size _.stapsdt.base, 1) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ :: __usdt_asm_ops(__VA_ARGS__) \
+ ); \
+} while (0)
+
+/*
+ * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ * operand note format.
+ *
+ * The named register may be a longer or shorter (!) alias for the
+ * storage where the value in question is found. For example, on
+ * i386, 64-bit value may be put in register pairs, and a register
+ * name stored would identify just one of them. Previously, gcc was
+ * asked to emit the %w[id] (16-bit alias of some registers holding
+ * operands), even when a wider 32-bit value was used.
+ *
+ * Bottom line: the byte-width given before the @ sign governs. If
+ * there is a mismatch between that width and that of the named
+ * register, then a sys/sdt.h note consumer may need to employ
+ * architecture-specific heuristics to figure out where the compiler
+ * has actually put the complete value.
+ */
+#if defined(__powerpc__) || defined(__powerpc64__)
+#define __usdt_argref(id) %I[id]%[id]
+#elif defined(__i386__)
+#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+#define __usdt_argref(id) %[id]
+#endif
+
+#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \
+ __usdt_asm1(.ascii "@") \
+ __usdt_asm_str(__usdt_argref(__usdt_aval##n))
+
+#define __usdt_asm_args0 /* no arguments */
+#define __usdt_asm_args1 __usdt_asm_arg(1)
+#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2)
+#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3)
+#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4)
+#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5)
+#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6)
+#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7)
+#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8)
+#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9)
+#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10)
+#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11)
+#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12)
+#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__))
+
+#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5)
+#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x))
+
+/*
+ * We can't use __builtin_choose_expr() in C++, so fall back to table-based
+ * signedness determination for known types, utilizing templates magic.
+ */
+#ifdef __cplusplus
+
+#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed)
+
+#include <cstddef>
+
+template<typename T> struct __usdt_t { static const bool is_signed = false; };
+template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {};
+template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {};
+
+#define __usdt_def_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; }
+#define __usdt_maybe_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; }
+
+__usdt_def_signed(signed char);
+__usdt_def_signed(short);
+__usdt_def_signed(int);
+__usdt_def_signed(long);
+__usdt_def_signed(long long);
+__usdt_maybe_signed(char);
+__usdt_maybe_signed(wchar_t);
+
+#else /* !__cplusplus */
+
+#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4)
+#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U))
+#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1)
+
+#endif /* __cplusplus */
+
+#define __usdt_asm_op(n, x) \
+ [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \
+ [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x)
+
+#define __usdt_asm_ops0() [__usdt_dummy] "g" (0)
+#define __usdt_asm_ops1(x) __usdt_asm_op(1, x)
+#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x)
+#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x)
+#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x)
+#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x)
+#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x)
+#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x)
+#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x)
+#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x)
+#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x)
+#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x)
+#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x)
+#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#endif /* __USDT_H */
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
deleted file mode 100644
index ca8fdb1b3f01..000000000000
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ /dev/null
@@ -1,66 +0,0 @@
-{
- "invalid and of negative number",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid range check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
- BPF_MOV32_IMM(BPF_REG_3, 1),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check known subreg with unknown reg",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
- /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
- BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0
-},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
deleted file mode 100644
index bed53b561e04..000000000000
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ /dev/null
@@ -1,378 +0,0 @@
-{
- "valid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "valid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a signed variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=48 size=8",
- .result = REJECT,
-},
-{
- "invalid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with no floor check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "R0 unbounded memory access",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3, 11 },
- .errstr = "R0 pointer += pointer",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid read map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = 28,
-},
-{
- "valid read map access into a read-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = -29,
-},
-{
- "invalid write map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "invalid write map access into a read-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 4 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "valid write map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "valid write map access into a write-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 4 },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "invalid read map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
-{
- "invalid read map access into a write-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c
index 1bdc8e6684f7..fe4bb70eb9c5 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_and.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_and.c
@@ -75,3 +75,26 @@
},
.result = ACCEPT,
},
+{
+ "BPF_ATOMIC_AND with fetch - r0 as source reg",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_and(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_0, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 2efd8bcf57a1..9a7b1106fda8 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -84,13 +84,152 @@
.errstr = "!read_ok",
},
{
- "Can't use cmpxchg on uninit memory",
+ "BPF_W cmpxchg should zero top 32 bits",
.insns = {
- BPF_MOV64_IMM(BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
+ /* r0 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* u64 val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
+ BPF_MOV32_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = 0x00000000FFFFFFFFull; */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* if (r0 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "Dest pointer in r0 - fail",
+ .insns = {
+ /* val = 0; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, 1); */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* if (r0 != 0) exit(1); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r0 = atomic_cmpxchg(&val, r0, 0); */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 2",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 3",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 4",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r10 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 partial copy of pointer",
+},
+{
+ "Dest pointer in r0 - succeed, check 5",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "invalid read from stack",
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R10 partial copy of pointer",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
new file mode 100644
index 000000000000..5bf03fb4fa2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
@@ -0,0 +1,151 @@
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
+#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \
+ { \
+ "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \
+ .insns = { \
+ /* u64 val = operan1; */ \
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1), \
+ /* u64 old = atomic_fetch_add(&val, operand2); */ \
+ BPF_MOV64_REG(dst_reg, BPF_REG_10), \
+ BPF_MOV64_IMM(src_reg, operand2), \
+ BPF_ATOMIC_OP(BPF_DW, op, \
+ dst_reg, src_reg, -8), \
+ /* if (old != operand1) exit(1); */ \
+ BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2), \
+ BPF_MOV64_IMM(BPF_REG_0, 1), \
+ BPF_EXIT_INSN(), \
+ /* if (val != result) exit (2); */ \
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), \
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2), \
+ BPF_MOV64_IMM(BPF_REG_0, 2), \
+ BPF_EXIT_INSN(), \
+ /* exit(0); */ \
+ BPF_MOV64_IMM(BPF_REG_0, 0), \
+ BPF_EXIT_INSN(), \
+ }, \
+ .result = ACCEPT, \
+ }
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011),
+#undef __ATOMIC_FETCH_OP_TEST
diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
new file mode 100644
index 000000000000..25f4ac1c69ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
@@ -0,0 +1,25 @@
+#define __INVALID_ATOMIC_ACCESS_TEST(op) \
+ { \
+ "atomic " #op " access through non-pointer ", \
+ .insns = { \
+ BPF_MOV64_IMM(BPF_REG_0, 1), \
+ BPF_MOV64_IMM(BPF_REG_1, 0), \
+ BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \
+ BPF_MOV64_IMM(BPF_REG_0, 0), \
+ BPF_EXIT_INSN(), \
+ }, \
+ .result = REJECT, \
+ .errstr = "R1 invalid mem access 'scalar'" \
+ }
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG),
diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c
index 70f982e1f9f0..9d0716ac5080 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_or.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_or.c
@@ -75,3 +75,28 @@
},
.result = ACCEPT,
},
+{
+ "BPF_W atomic_fetch_or should zero top 32 bits",
+ .insns = {
+ /* r1 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* u64 val = r1; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* r2 = 0x00000000FFFFFFFF; */
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
+ /* if (r2 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/basic_instr.c b/tools/testing/selftests/bpf/verifier/basic_instr.c
index 071dbc889e8c..bd928a72ad73 100644
--- a/tools/testing/selftests/bpf/verifier/basic_instr.c
+++ b/tools/testing/selftests/bpf/verifier/basic_instr.c
@@ -176,11 +176,11 @@
.retval = 1,
},
{
- "invalid 64-bit BPF_END",
+ "invalid 64-bit BPF_END with BPF_TO_BE",
.insns = {
BPF_MOV32_IMM(BPF_REG_0, 0),
{
- .code = BPF_ALU64 | BPF_END | BPF_TO_LE,
+ .code = BPF_ALU64 | BPF_END | BPF_TO_BE,
.dst_reg = BPF_REG_0,
.src_reg = 0,
.off = 0,
@@ -188,7 +188,7 @@
},
BPF_EXIT_INSN(),
},
- .errstr = "unknown opcode d7",
+ .errstr = "unknown opcode df",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c
deleted file mode 100644
index f995777dddb3..000000000000
--- a/tools/testing/selftests/bpf/verifier/basic_stack.c
+++ /dev/null
@@ -1,64 +0,0 @@
-{
- "stack out of bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid write to stack",
- .result = REJECT,
-},
-{
- "uninitialized stack1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr = "invalid indirect read from stack",
- .result = REJECT,
-},
-{
- "uninitialized stack2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid read from stack",
- .result = REJECT,
-},
-{
- "invalid fp arithmetic",
- /* If this gets ever changed, make sure JITs can deal with it. */
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 subtraction from stack pointer",
- .result = REJECT,
-},
-{
- "non-invalid fp arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "misaligned read from stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "misaligned stack access",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
deleted file mode 100644
index 57ed67b86074..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ /dev/null
@@ -1,746 +0,0 @@
-{
- "subtraction bounds (map value) variant 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
-},
-{
- "subtraction bounds (map value) variant 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "check subtraction on pointers for unpriv",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1, 9 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R9 pointer -= pointer prohibited",
-},
-{
- "bounds check based on zero-extended MOV",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0x0000'0000'ffff'ffff */
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0 */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check based on sign-extended MOV. test1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0xffff'ffff'ffff'ffff */
- BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0xffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
- /* r0 = <oob pointer> */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access to OOB pointer */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 4294967295",
- .result = REJECT
-},
-{
- "bounds check based on sign-extended MOV. test2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0xffff'ffff'ffff'ffff */
- BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0xfff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
- /* r0 = <oob pointer> */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access to OOB pointer */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is outside of the allowed memory range",
- .result = REJECT
-},
-{
- "bounds check based on reg_off + var_off + insn_off. test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "value_size=8 off=1073741825",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "bounds check based on reg_off + var_off + insn_off. test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "value 1073741823",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "bounds check after truncation of non-boundary-crossing range",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- /* r2 = 0x10'0000'0000 */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
- /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- /* r1 = [0x00, 0xff] */
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
- /* r1 = 0 */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check after truncation of boundary-crossing range (1)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0x1'0000'007f] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0xffff'ffff] or
- * [0x0000'0000, 0x0000'007f]
- */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0x00, 0xff] or
- * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* error on OOB pointer computation */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- /* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
- .errstr = "value -4294967168 makes map_value pointer be out of bounds",
- .result = REJECT,
-},
-{
- "bounds check after truncation of boundary-crossing range (2)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0x1'0000'007f] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0xffff'ffff] or
- * [0x0000'0000, 0x0000'007f]
- * difference to previous test: truncation via MOV32
- * instead of ALU32.
- */
- BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0x00, 0xff] or
- * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* error on OOB pointer computation */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- /* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
- .errstr = "value -4294967168 makes map_value pointer be out of bounds",
- .result = REJECT,
-},
-{
- "bounds check after wrapping 32-bit addition",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- /* r1 = 0x7fff'ffff */
- BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
- /* r1 = 0xffff'fffe */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- /* r1 = 0 */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check after shift with oversized count operand",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_2, 32),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- /* r1 = (u32)1 << (u32)32 = ? */
- BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
- /* r1 = [0x0000, 0xffff] */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
- /* computes unknown pointer, potentially OOB */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT
-},
-{
- "bounds check after right shift of maybe-negative number",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- /* r1 = [-0x01, 0xfe] */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- /* r1 = 0 or 0xff'ffff'ffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* r1 = 0 or 0xffff'ffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* computes unknown pointer, potentially OOB */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 unbounded memory access",
- .result = REJECT
-},
-{
- "bounds check after 32-bit right shift with 64-bit input",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* r1 = 2 */
- BPF_MOV64_IMM(BPF_REG_1, 2),
- /* r1 = 1<<32 */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31),
- /* r1 = 0 (NOT 2!) */
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31),
- /* r1 = 0xffff'fffe (NOT 0!) */
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2),
- /* error on computing OOB pointer */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "math between map_value pointer and 4294967294 is not allowed",
- .result = REJECT,
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 2147483646",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "pointer offset 1073741822",
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "pointer offset -1073741822",
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 1000000),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 1000000000000",
- .result = REJECT
-},
-{
- "bounds check mixed 32bit and 64bit arithmetic. test1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- /* r1 = 0xffffFFFF00000001 */
- BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3),
- /* check ALU64 op keeps 32bit bounds */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1),
- BPF_JMP_A(1),
- /* invalid ldx if bounds are lost above */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT
-},
-{
- "bounds check mixed 32bit and 64bit arithmetic. test2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- /* r1 = 0xffffFFFF00000001 */
- BPF_MOV64_IMM(BPF_REG_2, 3),
- /* r1 = 0x2 */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
- /* check ALU32 op zero extends 64bit bounds */
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1),
- BPF_JMP_A(1),
- /* invalid ldx if bounds are lost above */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT
-},
-{
- "assigning 32bit bounds to 64bit for wA = 0, wB = wA",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV32_IMM(BPF_REG_9, 0),
- BPF_MOV32_REG(BPF_REG_2, BPF_REG_9),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_8, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "bounds check for reg = 0, reg xor 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds check for reg32 = 0, reg32 xor 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
- BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds check for reg = 2, reg xor 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 2),
- BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds check for reg = any, reg xor 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
- .errstr_unpriv = "invalid access to map value",
-},
-{
- "bounds check for reg32 = any, reg32 xor 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
- BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
- .errstr_unpriv = "invalid access to map value",
-},
-{
- "bounds check for reg > 0, reg xor 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
- BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds check for reg32 > 0, reg32 xor 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
- BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
- BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks after 32-bit truncation. test 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- /* This used to reduce the max bound to 0x7fffffff */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "bounds checks after 32-bit truncation. test 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1),
- BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
deleted file mode 100644
index 1fd07a4f27ac..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ /dev/null
@@ -1,124 +0,0 @@
-{
- "check deducing bounds from const, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "check deducing bounds from const, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "check deducing bounds from const, 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- /* Marks reg as unknown. */
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
deleted file mode 100644
index 9baca7a75c42..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+++ /dev/null
@@ -1,406 +0,0 @@
-{
- "bounds checks mixing signed and unsigned, positive bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_MOV64_IMM(BPF_REG_8, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 5",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 min value is negative, either use unsigned",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 7",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 8",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 9",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 10",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 11",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- /* Dead branch. */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 12",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 13",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 14",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_8, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
- BPF_JMP_IMM(BPF_JA, 0, 0, -7),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 15",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
- .result_unpriv = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
deleted file mode 100644
index 69b048cf46d9..000000000000
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ /dev/null
@@ -1,44 +0,0 @@
-{
- "bpf_get_stack return R0 within range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
- BPF_MOV64_IMM(BPF_REG_4, 256),
- BPF_EMIT_CALL(BPF_FUNC_get_stack),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_EMIT_CALL(BPF_FUNC_get_stack),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644
index 000000000000..59125b22ae39
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -0,0 +1,270 @@
+#define BTF_TYPES \
+ .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+ .btf_types = { \
+ /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+ /* 2: int* */ BTF_PTR_ENC(1), \
+ /* 3: void* */ BTF_PTR_ENC(0), \
+ /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+ /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+ BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+ /* 6: main */ BTF_FUNC_ENC(20, 4), \
+ /* 7: callback */ BTF_FUNC_ENC(11, 5), \
+ BTF_END_RAW \
+ }
+
+#define MAIN_TYPE 6
+#define CALLBACK_TYPE 7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+ INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+ "inline simple bpf_loop call",
+ .insns = {
+ /* main */
+ /* force verifier state branching to verify logic on first and
+ * subsequent bpf_loop insn processing steps
+ */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, flags non-zero",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, callback non-constant",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 14, CALLBACK_TYPE },
+ { 16, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline and a dead func",
+ .insns = {
+ /* main */
+
+ /* A reference to callback #1 to make verifier count it as a func.
+ * This reference is overwritten below and callback #1 is dead.
+ */
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 10, CALLBACK_TYPE },
+ { 12, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline stack locations for loop vars",
+ .insns = {
+ /* main */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ /* bpf_loop call #1 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* bpf_loop call #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* call func and exit */
+ BPF_CALL_REL(2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* func */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ SKIP_INSNS(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ /* offsets are the same as in the first call */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ SKIP_INSNS(),
+ /* offsets differ from main because of different offset
+ * in BPF_ST_MEM instruction
+ */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+ },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 16, MAIN_TYPE },
+ { 25, CALLBACK_TYPE },
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "inline bpf_loop call in a big program",
+ .insns = {},
+ .fill_helper = bpf_fill_big_prog_with_loop_1,
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
new file mode 100644
index 000000000000..ce13002c7a19
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -0,0 +1,99 @@
+{
+ "BPF_ST_MEM stack imm non-zero",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 42),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -42),
+ /* if value is tracked correctly R0 is zero */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm zero",
+ .insns = {
+ /* mark stack 0000 0000 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* read and sum a few bytes */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* if value is tracked correctly R0 is zero */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm zero, variable offset",
+ .insns = {
+ /* set fp[-16], fp[-24] to zeros */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ /* r0 = random value in range [-32, -15] */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_0, 16, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 32),
+ /* fp[r0] = 0, make a variable offset write of zero,
+ * this should preserve zero marks on stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_10),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ /* r0 = fp[-20], if variable offset write was tracked correctly
+ * r0 would be a known zero.
+ */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_10, -20),
+ /* Would fail return code verification if r0 range is not tracked correctly. */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm sign",
+ /* Check if verifier correctly reasons about sign of an
+ * immediate spilled to stack by BPF_ST instruction.
+ *
+ * fp[-8] = -44;
+ * r0 = fp[-8];
+ * if r0 s< 0 goto ret0;
+ * r0 = -1;
+ * exit;
+ * ret0:
+ * r0 = 0;
+ * exit;
+ */
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .result = VERBOSE_ACCEPT,
+ .runs = -1,
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\
+ 2: (c5) if r0 s< 0x0 goto pc+2\
+ R0=-44",
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index eb888c8479c3..c8d640802cce 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1,4 +1,294 @@
{
+ "calls: invalid kfunc call not eliminated",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = REJECT,
+ .errstr = "invalid kernel function call not eliminated in verifier pass",
+},
+{
+ "calls: invalid kfunc call unreachable",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail1", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail2", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail3", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 expected pointer to ctx, but got fp",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_pass_ctx", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type UNKNOWN must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+ },
+},
+{
+ "calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_release", 5 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->off must be zero when passed to release kfunc",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R1 must have zero offset when passed to release func",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_memb_release", 8 },
+ },
+},
+{
+ "calls: invalid kfunc call: don't match first member type when passed to release kfunc",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_memb_acquire", 1 },
+ { "bpf_kfunc_call_memb1_release", 5 },
+ },
+},
+{
+ "calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_offset", 9 },
+ { "bpf_kfunc_call_test_release", 12 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "ptr R1 off=-4 disallowed",
+},
+{
+ "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_release", 9 },
+ { "bpf_kfunc_call_test_release", 13 },
+ { "bpf_kfunc_call_test_release", 17 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+{
+ "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_ref", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "R1 must be",
+},
+{
+ "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_release", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+},
+{
+ "calls: invalid kfunc call: must provide (attach_prog_fd, btf_id) pair when freplace",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_EXT,
+ .result = REJECT,
+ .errstr = "Tracing programs must provide btf_id",
+ .fixup_kfunc_btf_id = {
+ { "bpf_dynptr_from_skb", 0 },
+ },
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
@@ -11,7 +301,7 @@
.result = ACCEPT,
},
{
- "calls: not on unpriviledged",
+ "calls: not on unprivileged",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -19,7 +309,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -71,7 +361,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
+ .errstr = "R0 invalid mem access 'scalar'",
},
{
"calls: multiple ret types in subprog 2",
@@ -136,7 +426,7 @@
{
"calls: wrong src reg",
.insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
@@ -165,7 +455,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn 0 to 0",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -374,7 +664,7 @@
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "R6 invalid mem access 'inv'",
+ .errstr = "R6 invalid mem access 'scalar'",
.prog_type = BPF_PROG_TYPE_XDP,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -397,7 +687,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -522,7 +812,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -534,7 +824,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -1085,7 +1375,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
@@ -1149,7 +1439,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1203,7 +1493,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1228,7 +1518,9 @@
.prog_type = BPF_PROG_TYPE_XDP,
.fixup_map_hash_8b = { 23 },
.result = REJECT,
- .errstr = "invalid read from stack R7 off=-16 size=8",
+ .errstr = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
},
{
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
@@ -1580,7 +1872,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_hash_8b = { 12, 22 },
.result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
+ .errstr = "R0 invalid mem access 'scalar'",
},
{
"calls: pkt_ptr spill into caller stack",
@@ -1948,19 +2240,22 @@
* that fp-8 stack slot was unused in the fall-through
* branch and will accept the program incorrectly
*/
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 6 },
- .errstr = "invalid indirect read from stack R2 off -8+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map_hash_48b = { 7 },
+ .errstr_unpriv = "invalid read from stack R2 off -8+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"calls: ctx read at start of subprog",
@@ -1977,7 +2272,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2298,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2323,113 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
+/* Make sure that verifier.c:states_equal() considers IDs from all
+ * frames when building 'idmap' for check_ids().
+ */
+{
+ "calls: check_ids() across call boundary",
+ .insns = {
+ /* Function main() */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24),
+ /* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32),
+ /* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current
+ * ; stack frame
+ */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_CALL_REL(2),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* Function foo()
+ *
+ * r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers,
+ * r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value
+ */
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_2),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * r9 = r8
+ */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+ /* r9 = *r9 ; verifier gets to this point via two paths:
+ * ; (I) one including r9 = r8, verified first;
+ * ; (II) one excluding r9 = r8, verified next.
+ * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
+ * ; Suppose that checkpoint is created here via path (I).
+ * ; When verifying via (II) the r9.id must be compared against
+ * ; frame[0].fp[-24].id, otherwise (I) and (II) would be
+ * ; incorrectly deemed equivalent.
+ * if r9 == 0 goto <exit>
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+ /* r8 = *r8 ; read map value via r8, this is not safe
+ * r0 = *r8 ; because r8 might be not equal to r9.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .fixup_map_hash_8b = { 3, 9 },
+ .result = REJECT,
+ .errstr = "R8 invalid mem access 'map_value_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "calls: several args with ref_obj_id",
+ .insns = {
+ /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer.
+ * With a smaller size, the verifier would reject the call to
+ * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the
+ * ref_obj_id error.
+ */
+ BPF_MOV64_IMM(BPF_REG_2, 20),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* if r0 == 0 goto <exit> */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .result = REJECT,
+ .errstr = "more than one arg with ref_obj_id",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
deleted file mode 100644
index 4eb76ed739ce..000000000000
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ /dev/null
@@ -1,73 +0,0 @@
-{
- "unreachable",
- .insns = {
- BPF_EXIT_INSN(),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "unreachable2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "out of range jump",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "out of range jump2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -2),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "loop (back-edge)",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 1",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "loop2 (back-edge)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 4",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "conditional loop",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
- BPF_EXIT_INSN(),
- },
- .errstr = "infinite loop detected",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
deleted file mode 100644
index 6d65fe3e7321..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "bpf_exit with invalid return code. test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0xffffffff)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0x3)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x2; 0x0)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 is not a known value (ctx)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has unknown scalar value",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_skb.c b/tools/testing/selftests/bpf/verifier/cgroup_skb.c
deleted file mode 100644
index 52e4c03b076b..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_skb.c
+++ /dev/null
@@ -1,197 +0,0 @@
-{
- "direct packet read test#1 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, queue_mapping)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, protocol)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_present)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=76 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#2 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_tci)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_proto)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, priority)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, priority)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, ingress_ifindex)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, hash)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#3 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, cb[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, cb[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, cb[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, cb[4])),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
- offsetof(struct __sk_buff, cb[1])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, cb[2])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
- offsetof(struct __sk_buff, cb[3])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
- offsetof(struct __sk_buff, cb[4])),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#4 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, family)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, remote_port)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, local_port)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of tc_classid for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of data_meta for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, data_meta)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of flow_keys for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, flow_keys)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid write access to napi_id for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
- offsetof(struct __sk_buff, napi_id)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "write tstamp from CGROUP_SKB",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=152 size=8",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "read tstamp from CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_storage.c b/tools/testing/selftests/bpf/verifier/cgroup_storage.c
deleted file mode 100644
index 97057c0a1b8a..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_storage.c
+++ /dev/null
@@ -1,220 +0,0 @@
-{
- "valid cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "valid per-cpu cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid per-cpu cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
deleted file mode 100644
index 0719b0ddec04..000000000000
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ /dev/null
@@ -1,60 +0,0 @@
-{
- "constant register |= constant should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-48 size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 13),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 24),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-48 size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
deleted file mode 100644
index 23080862aafd..000000000000
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ /dev/null
@@ -1,197 +0,0 @@
-{
- "context stores via ST",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ST stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "context stores via BPF_ATOMIC",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "arithmetic ops make PTR_TO_CTX unusable",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
- offsetof(struct __sk_buff, data) -
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr = "dereference of modified ctx ptr",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "pass unmodified ctx pointer to helper",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "pass modified ctx pointer to helper, 1",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass modified ctx pointer to helper, 2",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr_unpriv = "dereference of modified ctx ptr",
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass modified ctx pointer to helper, 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "variable ctx access var_off=(0x0; 0x4)",
-},
-{
- "pass ctx or null check, 1: ctx",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 2: null",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 3: 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
-},
-{
- "pass ctx or null check, 4: ctx - const",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass ctx or null check, 5: null (connect)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_INET4_CONNECT,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 6: null (bind)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 7: ctx (bind)",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 8: null (bind)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index fb13ca2d5606..a2b006e2fd06 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -229,6 +229,24 @@
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, local_port)),
+ /* 1-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3),
+ /* 2-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ /* 4-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+
/* 8-byte read from sk field */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_sk_lookup, sk)),
@@ -239,6 +257,7 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
},
/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
{
@@ -350,6 +369,20 @@
.expected_attach_type = BPF_SK_LOOKUP,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
{
"invalid 4-byte read from bpf_sk_lookup sk field",
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
deleted file mode 100644
index c6c69220a569..000000000000
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
+++ /dev/null
@@ -1,181 +0,0 @@
-{
- "valid access family in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, family)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access local_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid 64B read of size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid read past end of SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size) + 4),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid read offset in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, family) + 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet read for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "direct packet write for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "overlapping checks for direct packet access SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2022c0f2cd75..0b394a7f7a2d 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -502,7 +502,7 @@
"check skb->hash byte load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash)),
#else
@@ -537,7 +537,7 @@
"check skb->hash byte load permitted 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 3),
#else
@@ -646,7 +646,7 @@
"check skb->hash half load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash)),
#else
@@ -661,7 +661,7 @@
"check skb->hash half load permitted 2",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 2),
#else
@@ -676,7 +676,7 @@
"check skb->hash half load not permitted, unaligned 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 1),
#else
@@ -693,7 +693,7 @@
"check skb->hash half load not permitted, unaligned 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 3),
#else
@@ -951,7 +951,7 @@
"check skb->data half load not permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, data)),
#else
@@ -1058,6 +1058,66 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "padding after gso_size is not accessible",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetofend(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=180 size=4",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+ "read hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "write hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=184 size=8",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read hwtstamp from CLS",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"check wire_len is not readable by sockets",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
@@ -1109,6 +1169,7 @@
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"pkt_end < pkt taken check",
@@ -1130,4 +1191,5 @@
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
deleted file mode 100644
index b988396379a7..000000000000
--- a/tools/testing/selftests/bpf/verifier/d_path.c
+++ /dev/null
@@ -1,37 +0,0 @@
-{
- "d_path accept",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
- BPF_LD_IMM64(BPF_REG_3, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACING,
- .expected_attach_type = BPF_TRACE_FENTRY,
- .kfunc = "dentry_open",
-},
-{
- "d_path reject",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
- BPF_LD_IMM64(BPF_REG_3, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "helper call is not allowed in probe",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACING,
- .expected_attach_type = BPF_TRACE_FENTRY,
- .kfunc = "d_path",
-},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..77207b498c6f 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -2,6 +2,7 @@
"dead code: start",
.insns = {
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 7),
@@ -85,7 +86,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +104,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +122,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +138,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,8 +153,20 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
+{
+ "dead code: zero extension",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
deleted file mode 100644
index ac1e19d0f520..000000000000
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ /dev/null
@@ -1,656 +0,0 @@
-{
- "pkt_end - pkt_start is allowed",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = TEST_DATA_LEN,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access off=76",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-},
-{
- "direct packet access: test4 (write)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test5 (pkt_end >= reg, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test6 (pkt_end >= reg, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test7 (pkt_end >= reg, both accesses)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test8 (double test, variant 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test9 (double test, variant 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test10 (write invalid)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test11 (shift, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_3, 144),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test12 (and, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_3, 144),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test13 (branches, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
- BPF_MOV64_IMM(BPF_REG_3, 14),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_MOV64_IMM(BPF_REG_3, 24),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
- BPF_MOV64_IMM(BPF_REG_5, 12),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test15 (spill with xadd)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_5, 4096),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_4, BPF_REG_5, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 invalid mem access 'inv'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test16 (arith on data_end)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test17 (pruning, alignment)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_A(-6),
- },
- .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
-{
- "direct packet access: test18 (imm += pkt_ptr, 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 8),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test19 (imm += pkt_ptr, 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test20 (x += pkt_ptr, 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test21 (x += pkt_ptr, 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test22 (x += pkt_ptr, 3)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_4, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test23 (x += pkt_ptr, 4)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test24 (x += pkt_ptr, 5)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 64),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test25 (marking on <, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test26 (marking on <, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JA, 0, 0, -3),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test27 (marking on <=, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test28 (marking on <=, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test29 (reg > pkt_end in subprog)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
deleted file mode 100644
index 698e3779fdd2..000000000000
--- a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
+++ /dev/null
@@ -1,40 +0,0 @@
-{
- "direct stack access with 32-bit wraparound. test1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 2147483647",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 1073741823",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer offset 1073741822",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = REJECT
-},
diff --git a/tools/testing/selftests/bpf/verifier/div0.c b/tools/testing/selftests/bpf/verifier/div0.c
deleted file mode 100644
index 7685edfbcf71..000000000000
--- a/tools/testing/selftests/bpf/verifier/div0.c
+++ /dev/null
@@ -1,184 +0,0 @@
-{
- "DIV32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 2),
- BPF_MOV32_IMM(BPF_REG_2, 16),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 8,
-},
-{
- "DIV32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 by 0, zero check, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 3),
- BPF_MOV32_IMM(BPF_REG_2, 5),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD64 by 0, zero check 2, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, -1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = -1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c
deleted file mode 100644
index acab4f00819f..000000000000
--- a/tools/testing/selftests/bpf/verifier/div_overflow.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Just make sure that JITs used udiv/umod as otherwise we get
- * an exception from INT_MIN/-1 overflow similarly as with div
- * by zero.
- */
-{
- "DIV32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, LLONG_MIN),
- BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
deleted file mode 100644
index 0ab7f1dfc97a..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ /dev/null
@@ -1,616 +0,0 @@
-{
- "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, bitwise AND, zero included",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, bounds + offset",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect access to stack R1 off=-64 size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no max check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- /* because max wasn't checked, signed min is negative */
- .errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no min check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), no min check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=49",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 0 /* csum_diff of 64-byte packet */,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: 8 bytes leak",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R1 off -64+32 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: 8 bytes no leak (init memory)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_packet_access.c b/tools/testing/selftests/bpf/verifier/helper_packet_access.c
deleted file mode 100644
index ae54587e9829..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_packet_access.c
+++ /dev/null
@@ -1,460 +0,0 @@
-{
- "helper access to packet: test1, valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test2, unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test3, variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test4, packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test5, packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test6, cls valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test7, cls unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test8, cls variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test9, cls packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test10, cls packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test11, cls unsuitable helper 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 42),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test12, cls unsuitable helper 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test13, cls helper ok",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test14, cls helper ok sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test15, cls helper fail sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test16, cls helper fail range 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test17, cls helper fail range 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, -9),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test18, cls helper fail range 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, ~0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test19, cls helper range zero",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test20, pkt end as input",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=pkt_end expected=fp",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test21, wrong reg",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
deleted file mode 100644
index 1c7882ddfa63..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ /dev/null
@@ -1,953 +0,0 @@
-{
- "helper access to map: full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=56",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: negative range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 unbounded memory access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=45",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map lookup helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 8 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map: wrong size",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .fixup_map_hash_16b = { 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=8 off=0 size=16",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, offsetof(struct other_val, bar)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, sizeof(struct other_val) - 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct other_val, bar)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct other_val) - 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, -4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar) + 1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=9 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
deleted file mode 100644
index 070893fb2900..000000000000
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ /dev/null
@@ -1,160 +0,0 @@
-{
- "ARG_PTR_TO_LONG uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack R4 off -16+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG half-uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack R4 off -16+4 size 8",
-},
-{
- "ARG_PTR_TO_LONG misaligned",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG size < sizeof(long)",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect access to stack R4 off=-4 size=8",
-},
-{
- "ARG_PTR_TO_LONG initialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
-},
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index df215e004566..8bf37e5207f1 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -21,6 +21,30 @@
.retval = 2,
},
{
+ "jit: lsh, rsh, arsh by reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0),
+ BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_1),
+ BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
"jit: mov32 for ldimm64, 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 2),
@@ -62,6 +86,11 @@
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
@@ -73,11 +102,69 @@
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
- BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
- BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
+ "jit: various div tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_IMM(BPF_REG_0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bd5cae4a7f73..91d83e9cb148 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -84,6 +84,7 @@
BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
@@ -147,6 +148,7 @@
BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
@@ -210,6 +212,7 @@
BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
@@ -277,11 +280,13 @@
BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: BPF_K",
@@ -345,11 +350,13 @@
BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jle32: BPF_K",
@@ -413,11 +420,13 @@
BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jlt32: BPF_K",
@@ -481,11 +490,13 @@
BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsge32: BPF_K",
@@ -549,11 +560,13 @@
BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsgt32: BPF_K",
@@ -617,11 +630,13 @@
BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsle32: BPF_K",
@@ -685,11 +700,13 @@
BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jslt32: BPF_K",
@@ -753,11 +770,13 @@
BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: range bound deduction, reg op imm",
@@ -842,3 +861,24 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "jeq32/jne32: bounds checking",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 563),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU32_REG(BPF_OR, BPF_REG_2, BPF_REG_6),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 8, 5),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_2, 500, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 8dcd4e0383d5..e901eefd774a 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -78,12 +78,11 @@
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .retval_unpriv = 1,
- .result_unpriv = ACCEPT,
.retval = 1,
.result = ACCEPT,
},
@@ -136,12 +135,12 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
.result = ACCEPT,
},
{
@@ -153,15 +152,16 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3),
BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c
index 6f951d1ff0a4..497fe17d2eaf 100644
--- a/tools/testing/selftests/bpf/verifier/jump.c
+++ b/tools/testing/selftests/bpf/verifier/jump.c
@@ -373,3 +373,25 @@
.result = ACCEPT,
.retval = 3,
},
+{
+ "jump & dead code elimination",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 32767),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_3, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0x8000, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32767),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 0, 1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index f9297900cea6..78f19c255f20 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -9,8 +9,8 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
@@ -23,8 +23,8 @@
BPF_LD_IMM64(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/ld_ind.c b/tools/testing/selftests/bpf/verifier/ld_ind.c
deleted file mode 100644
index 079734227538..000000000000
--- a/tools/testing/selftests/bpf/verifier/ld_ind.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "ld_ind: check calling conv, r1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_5, 1),
- BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
- BPF_EXIT_INSN(),
- },
- .errstr = "R5 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c
deleted file mode 100644
index 73f0dea95546..000000000000
--- a/tools/testing/selftests/bpf/verifier/leak_ptr.c
+++ /dev/null
@@ -1,67 +0,0 @@
-{
- "leak pointer into ctx 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr_unpriv = "R2 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_10,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .errstr_unpriv = "R2 leaks addr into ctx",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "leak pointer into map val",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr_unpriv = "R6 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
deleted file mode 100644
index 1af37187dc12..000000000000
--- a/tools/testing/selftests/bpf/verifier/loops1.c
+++ /dev/null
@@ -1,206 +0,0 @@
-{
- "bounded loop, count to 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop, count to 20",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, count from positive unknown to 4",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop, count from totally unknown to 4",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, count to 4 with equality",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, start in the middle",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_A(1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "back-edge",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop containing a forward jump",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop that jumps out rather than in",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_JMP_A(-4),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop after a conditional jump",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 5),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_A(-2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "program is too large",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded recursion",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
- BPF_EXIT_INSN(),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "back-edge",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop in two jumps",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "loop detected",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop: three-jump trick",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "loop detected",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "not-taken loop with back jump to 1st insn",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 123),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 123,
-},
-{
- "taken loop with back jump to 1st insn",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 55,
-},
-{
- "taken loop with back jump to 1st insn, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 55,
-},
diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c
deleted file mode 100644
index 2cab6a3966bb..000000000000
--- a/tools/testing/selftests/bpf/verifier/lwt.c
+++ /dev/null
@@ -1,189 +0,0 @@
-{
- "invalid direct packet write for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "cannot write into packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "invalid direct packet write for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "cannot write into packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_OUT,
-},
-{
- "direct packet write for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "direct packet read for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "direct packet read for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_OUT,
-},
-{
- "direct packet read for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "overlapping checks for direct packet access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "make headroom for LWT_XMIT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
- /* split for s390 to succeed */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 42),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "invalid access of tc_classid for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "invalid access of tc_classid for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "invalid access of tc_classid for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "check skb->tc_classid half load not permitted for lwt prog",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
-#else
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid) + 2),
-#endif
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c
deleted file mode 100644
index 2798927ee9ff..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_in_map.c
+++ /dev/null
@@ -1,62 +0,0 @@
-{
- "map in map access",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .result = ACCEPT,
-},
-{
- "invalid inner map pointer",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .errstr = "R1 pointer arithmetic on map_ptr prohibited",
- .result = REJECT,
-},
-{
- "forgot null checking on the inner map pointer",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .errstr = "R1 type=map_value_or_null expected=map_ptr",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
new file mode 100644
index 000000000000..4b39f8472f9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -0,0 +1,444 @@
+/* Common tests */
+{
+ "map_kptr: BPF_ST imm != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0",
+},
+{
+ "map_kptr: size != bpf_size_to_bytes(BPF_DW)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access size must be BPF_DW",
+},
+{
+ "map_kptr: map_value non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access cannot have variable offset",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "map_kptr: bpf_kptr_xchg non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset",
+},
+{
+ "map_kptr: unaligned boundary load/store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access misaligned expected=0 off=7",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "map_kptr: reject var_off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+/* Tests for unreferenced PTR_TO_BTF_ID */
+{
+ "map_kptr: unref: reject btf_struct_ids_match == false",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test",
+},
+{
+ "map_kptr: unref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'",
+},
+{
+ "map_kptr: unref: correct in kernel type size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8",
+},
+{
+ "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: unref: no reference state created",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = ACCEPT,
+},
+{
+ "map_kptr: unref: bpf_kptr_xchg rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "off=0 kptr isn't referenced kptr",
+},
+/* Tests for referenced PTR_TO_BTF_ID */
+{
+ "map_kptr: ref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: ref: reject off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member",
+},
+{
+ "map_kptr: ref: reference state created and released on xchg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "Unreleased reference id=4 alloc_insn=20",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 15 },
+ }
+},
+{
+ "map_kptr: ref: reject STX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: ref: reject ST",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: reject helper access to kptr",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr cannot be accessed indirectly by helper",
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
deleted file mode 100644
index b117bdd3806d..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ /dev/null
@@ -1,95 +0,0 @@
-{
- "bpf_map_ptr: read with negative offset rejected",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "R1 is bpf_array invalid negative access: off=-8",
-},
-{
- "bpf_map_ptr: write rejected",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "only read from bpf_array is supported",
-},
-{
- "bpf_map_ptr: read non-existent field rejected",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = REJECT,
- .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "bpf_map_ptr: read ops field accepted",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 4 },
- .result = ACCEPT,
-},
-{
- "bpf_map_ptr: r = 0, r = r + map_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 4 },
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
deleted file mode 100644
index 1f2b8c4cb26d..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ /dev/null
@@ -1,100 +0,0 @@
-{
- "calls: two calls returning different map pointers for lookup (hash, array)",
- .insns = {
- /* main prog */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_CALL_REL(11),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_CALL_REL(12),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- /* subprog 1 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- /* subprog 2 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_hash_48b = { 13 },
- .fixup_map_array_48b = { 16 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "calls: two calls returning different map pointers for lookup (hash, map in map)",
- .insns = {
- /* main prog */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_CALL_REL(11),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_CALL_REL(12),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- /* subprog 1 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- /* subprog 2 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_in_map = { 16 },
- .fixup_map_array_48b = { 13 },
- .result = REJECT,
- .errstr = "only read from bpf_array is supported",
-},
-{
- "cond: two branches returning different map pointers for lookup (tail, tail)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 2 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "cond: two branches returning same map pointers for lookup (tail, tail)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog2 = { 2, 5 },
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .retval = 42,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_ret_val.c b/tools/testing/selftests/bpf/verifier/map_ret_val.c
deleted file mode 100644
index bdd0e8d18333..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ret_val.c
+++ /dev/null
@@ -1,65 +0,0 @@
-{
- "invalid map_fd for function call",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
- BPF_EXIT_INSN(),
- },
- .errstr = "fd 0 is not pointing to valid bpf_map",
- .result = REJECT,
-},
-{
- "don't check return value before access",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access 'map_value_or_null'",
- .result = REJECT,
-},
-{
- "access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "misaligned value access",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
-{
- "sometimes access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access",
- .errstr_unpriv = "R0 leaks addr",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/masking.c b/tools/testing/selftests/bpf/verifier/masking.c
deleted file mode 100644
index 6e1358c544fd..000000000000
--- a/tools/testing/selftests/bpf/verifier/masking.c
+++ /dev/null
@@ -1,322 +0,0 @@
-{
- "masking, test out of bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 11",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 12",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 4),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 4,
-},
-{
- "masking, test in bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfffffffe,
-},
-{
- "masking, test in bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xabcde),
- BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xabcde,
-},
-{
- "masking, test in bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 46),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -46),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -47),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c
deleted file mode 100644
index b45e8af41420..000000000000
--- a/tools/testing/selftests/bpf/verifier/meta_access.c
+++ /dev/null
@@ -1,235 +0,0 @@
-{
- "meta access, test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet, off=-8",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test4",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test5",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_meta),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 !read_ok",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test6",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test8",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test9",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test10",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test11",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test12",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
index 471c1a5950d8..d8a9b1a1f9a2 100644
--- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
+++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
@@ -2,7 +2,7 @@
"check bpf_perf_event_data->sample_period byte load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
@@ -18,7 +18,7 @@
"check bpf_perf_event_data->sample_period half load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
@@ -34,7 +34,7 @@
"check bpf_perf_event_data->sample_period word load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 6dc8003ffc70..59a020c35647 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -27,7 +27,7 @@
BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
@@ -38,25 +38,24 @@
.fixup_map_array_48b = { 1 },
.result = VERBOSE_ACCEPT,
.errstr =
- "26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 20\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 10\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- regs=201 stack=0 before 15\
- regs=201 stack=0 before 14\
- regs=200 stack=0 before 13\
- regs=200 stack=0 before 12\
- regs=200 stack=0 before 11\
- regs=200 stack=0 before 10\
- parent already had regs=0 stack=0 marks",
+ "mark_precise: frame0: last_idx 26 first_idx 20\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: regs=r2 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 10\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: regs=r0,r9 stack= before 15\
+ mark_precise: frame0: regs=r0,r9 stack= before 14\
+ mark_precise: frame0: regs=r9 stack= before 13\
+ mark_precise: frame0: regs=r9 stack= before 12\
+ mark_precise: frame0: regs=r9 stack= before 11\
+ mark_precise: frame0: regs=r9 stack= before 10\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: test 2",
@@ -87,7 +86,7 @@
BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
@@ -100,20 +99,20 @@
.flags = BPF_F_TEST_STATE_FREQ,
.errstr =
"26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 22\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- parent didn't have regs=4 stack=0 marks\
- last_idx 20 first_idx 20\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 17\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- parent already had regs=0 stack=0 marks",
+ mark_precise: frame0: last_idx 26 first_idx 22\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: last_idx 20 first_idx 20\
+ mark_precise: frame0: regs=r2 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 17\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: cross frame pruning",
@@ -141,10 +140,11 @@
.result = REJECT,
},
{
- "precise: ST insn causing spi > allocated_stack",
+ "precise: ST zero to stack insn is supported",
.insns = {
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ /* not a register spill, so we stop precision propagation for R4 here */
BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
BPF_MOV64_IMM(BPF_REG_0, -1),
@@ -153,15 +153,16 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "5: (2d) if r4 > r0 goto pc+0\
- last_idx 5 first_idx 5\
- parent didn't have regs=10 stack=0 marks\
- last_idx 4 first_idx 2\
- regs=10 stack=0 before 4\
- regs=10 stack=0 before 3\
- regs=0 stack=1 before 2\
- last_idx 5 first_idx 5\
- parent didn't have regs=1 stack=0 marks",
+ .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r4 stack= before 4\
+ mark_precise: frame0: regs=r4 stack= before 3\
+ mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r0 stack= before 4\
+ 5: R0=-1 R4=0",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@@ -169,6 +170,8 @@
"precise: STX insn causing spi > allocated_stack",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ /* make later reg spill more interesting by having somewhat known scalar */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
@@ -179,16 +182,83 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "last_idx 6 first_idx 6\
- parent didn't have regs=10 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=10 stack=0 before 5\
- regs=10 stack=0 before 4\
- regs=0 stack=1 before 3\
- last_idx 6 first_idx 6\
- parent didn't have regs=1 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=1 stack=0 before 5",
+ .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
+ mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: last_idx 6 first_idx 4\
+ mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
+ mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
+ mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 3 first_idx 3\
+ mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
+ mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
+ mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 0 first_idx 0\
+ mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
+ mark_precise: frame0: last_idx 7 first_idx 7\
+ mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
+{
+ "precise: mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_2, 0x1000),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 42),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ | F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ .errstr = "invalid access to memory, mem_size=1 off=42 size=8",
+ .result = REJECT,
+},
+{
+ "precise: program doesn't prematurely prune branches",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_LD_MAP_FD(BPF_REG_4, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 13 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = REJECT,
+ .errstr = "register with unbounded min value is not allowed",
+},
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
deleted file mode 100644
index fc4e301260f6..000000000000
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ /dev/null
@@ -1,29 +0,0 @@
-{
- "prevent map lookup in stack trace",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_stacktrace = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "prevent map lookup in prog array",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_prog2 = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c
deleted file mode 100644
index cc8e8c3cdc03..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_stack.c
+++ /dev/null
@@ -1,305 +0,0 @@
-{
- "raw_stack: no skb_load_bytes",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- /* Call to skb_load_bytes() omitted. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid read from stack R6 off=-8 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, ~0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, zero len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid zero-sized read",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, no init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs around bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs + data",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid indirect access to stack R3 off=-513 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid indirect access to stack R3 off=-1 size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid zero-sized read",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, large access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 512),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
deleted file mode 100644
index 2978fb5a769d..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "raw_tracepoint_writable: reject variable offset",
- .insns = {
- /* r6 is our tp buffer */
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- /* move the key (== 0) to r10-8 */
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- /* lookup in the map */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
-
- /* exit clean if null */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
-
- /* shift the buffer pointer to a variable location */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
- /* clobber whatever's there */
- BPF_MOV64_IMM(BPF_REG_7, 4242),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1, },
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
deleted file mode 100644
index 3b6ee009c00b..000000000000
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ /dev/null
@@ -1,907 +0,0 @@
-{
- "reference tracking: leak potential reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference on stack",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference on stack 2",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: zero potential reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: zero potential reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: copy and zero potential references",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: release reference without check",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* reference in r0 may be NULL */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=sock_or_null expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference to sock_common without check",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- /* reference in r0 may be NULL */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=sock_common_or_null expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference 2",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference twice",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference twice inside branch",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: alloc, check, free in one subbranch",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
- /* if (offsetof(skb, mark) > data_len) exit; */
- BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
- offsetof(struct __sk_buff, mark)),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
- /* Leak reference in R0 */
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: alloc, check, free in both subbranches",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
- /* if (offsetof(skb, mark) > data_len) exit; */
- BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
- offsetof(struct __sk_buff, mark)),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking in call: free reference in subprog",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking in call: free reference in subprog and outside",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking in call: alloc & leak reference in subprog",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking in call: alloc in subprog, release outside",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(), /* return sk */
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = POINTER_VALUE,
- .result = ACCEPT,
-},
-{
- "reference tracking in call: sk_ptr leak into caller stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 2 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking in call: sk_ptr spill into caller stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- /* now the sk_ptr is verified, free the reference */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 2 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: allow LD_ABS",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LD_ABS(BPF_B, 0),
- BPF_LD_ABS(BPF_H, 0),
- BPF_LD_ABS(BPF_W, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: forbid LD_ABS while holding reference",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_LD_ABS(BPF_B, 0),
- BPF_LD_ABS(BPF_H, 0),
- BPF_LD_ABS(BPF_W, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
- .result = REJECT,
-},
-{
- "reference tracking: allow LD_IND",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "reference tracking: forbid LD_IND while holding reference",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
- .result = REJECT,
-},
-{
- "reference tracking: check reference or tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 17 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference then tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 18 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: leak possible reference over tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- /* Look up socket and store in REG_6 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* bpf_tail_call() */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 16 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "tail_call would lead to reference leak",
- .result = REJECT,
-},
-{
- "reference tracking: leak checked reference over tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- /* Look up socket and store in REG_6 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* if (!sk) goto end */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 17 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "tail_call would lead to reference leak",
- .result = REJECT,
-},
-{
- "reference tracking: mangle and release sock_or_null",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "R1 pointer arithmetic on sock_or_null prohibited",
- .result = REJECT,
-},
-{
- "reference tracking: mangle and release sock",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "R1 pointer arithmetic on sock prohibited",
- .result = REJECT,
-},
-{
- "reference tracking: access member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: write to member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_LD_IMM64(BPF_REG_2, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
- offsetof(struct bpf_sock, mark)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "cannot write into sock",
- .result = REJECT,
-},
-{
- "reference tracking: invalid 64-bit access of member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "invalid sock access off=0 size=8",
- .result = REJECT,
-},
-{
- "reference tracking: access after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "!read_ok",
- .result = REJECT,
-},
-{
- "reference tracking: direct access for lookup",
- .insns = {
- /* Check that the packet is at least 64B long */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
- /* sk = sk_lookup_tcp(ctx, skb->data, ...) */
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: use ptr from bpf_tcp_sock() after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: use ptr from bpf_sk_fullsock() after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: use sk after bpf_sk_release(tp)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: bpf_sk_release(listen_sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
- "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: branch tracking valid pointer null comparison",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: branch tracking valid pointer value comparison",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: bpf_sk_release(btf_tcp_sock)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "unknown func",
-},
-{
- "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .result_unpriv = REJECT,
- .errstr_unpriv = "unknown func",
-},
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
deleted file mode 100644
index bb0dd89dd212..000000000000
--- a/tools/testing/selftests/bpf/verifier/regalloc.c
+++ /dev/null
@@ -1,277 +0,0 @@
-{
- "regalloc basic",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc negative",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=48 off=48 size=1",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "regalloc src_reg mark",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc src_reg negative",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc and spill",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
- /* r0 has upper bound that should propagate into r2 */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
- BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
- /* r3 has lower and upper bounds */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc and spill negative",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
- /* r0 has upper bound that should propagate into r2 */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
- BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
- /* r3 has lower and upper bounds */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=48 off=48 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc three regs",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc after call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc in callee",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "regalloc, spill, JEQ",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
- /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */
- BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0),
- /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */
- /* Buggy verifier will think that r3 == 20 here */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c
deleted file mode 100644
index 94c399d1faca..000000000000
--- a/tools/testing/selftests/bpf/verifier/runtime_jit.c
+++ /dev/null
@@ -1,231 +0,0 @@
-{
- "runtime/jit: tail_call within bounds, prog once",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call within bounds, prog loop",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 41,
-},
-{
- "runtime/jit: tail_call within bounds, no prog",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "runtime/jit: tail_call within bounds, key 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 2 / key 2, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 2 / key 2, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 0 / key 2, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 0 / key 2, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call within bounds, different maps, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 9 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "runtime/jit: tail_call within bounds, different maps, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 9 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call out of bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 256),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "runtime/jit: pass negative index to tail_call",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "runtime/jit: pass > 32bit index to tail_call",
- .insns = {
- BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 2 },
- .result = ACCEPT,
- .retval = 42,
- /* Verifier rewrite for unpriv skips tail call here. */
- .retval_unpriv = 2,
-},
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
deleted file mode 100644
index 7e50cb80873a..000000000000
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ /dev/null
@@ -1,156 +0,0 @@
-{
- "pointer/scalar confusion in state equality check (way 1)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .retval = POINTER_VALUE,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr as return value"
-},
-{
- "pointer/scalar confusion in state equality check (way 2)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_JMP_A(1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .retval = POINTER_VALUE,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr as return value"
-},
-{
- "liveness pruning and write screening",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* branch conditions teach us nothing about R2 */
- BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "varlen_map_value_access pruning",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "R0 unbounded memory access",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "search pruning: all branches should be verified (nop operation)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_A(1),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R6 invalid mem access 'inv'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "search pruning: all branches should be verified (invalid stack access)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
- BPF_JMP_A(1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "invalid read from stack off -16+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "allocated_stack",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8),
- BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9),
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
- .insn_processed = 15,
-},
diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
new file mode 100644
index 000000000000..1f0d2bdc673f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -0,0 +1,91 @@
+{
+ "sleepable fentry accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable fexit accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable fmod_ret accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable iter accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .kfunc = "task",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable lsm accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_LSM,
+ .kfunc = "bpf",
+ .expected_attach_type = BPF_LSM_MAC,
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable uprobe accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_KPROBE,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable raw tracepoint reject",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_RAW_TP,
+ .kfunc = "sched_switch",
+ .result = REJECT,
+ .errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
deleted file mode 100644
index ce13ece08d51..000000000000
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ /dev/null
@@ -1,658 +0,0 @@
-{
- "skb->sk: no NULL check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'sock_common_or_null'",
-},
-{
- "skb->sk: sk->family [non fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "skb->sk: sk->type [fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock_common access",
-},
-{
- "bpf_sk_fullsock(skb->sk): no !skb->sk check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "type=sock_common_or_null expected=sock_common",
-},
-{
- "sk_fullsock(skb->sk): no NULL check on ret",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'sock_or_null'",
-},
-{
- "sk_fullsock(skb->sk): sk->type [fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->family [non fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->state [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->dst_port [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock access",
-},
-{
- "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->type [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->protocol [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): beyond last field",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock access",
-},
-{
- "bpf_tcp_sock(skb->sk): no !skb->sk check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "type=sock_common_or_null expected=sock_common",
-},
-{
- "bpf_tcp_sock(skb->sk): no NULL check on ret",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'tcp_sock_or_null'",
-},
-{
- "bpf_tcp_sock(skb->sk): tp->snd_cwnd",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_tcp_sock(skb->sk): tp->bytes_acked",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_tcp_sock(skb->sk): beyond last field",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid tcp_sock access",
-},
-{
- "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_sk_release(skb->sk)",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "bpf_sk_release(bpf_tcp_sock(skb->sk))",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 11 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "sk_storage_get(map, skb->sk, 1, 1): value == 1",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 11 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "R3 type=inv expected=fp",
-},
-{
- "sk_storage_get(map, skb->sk, &stack_value, 1): stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid indirect read from stack",
-},
-{
- "bpf_map_lookup_elem(smap, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 3 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
-},
-{
- "bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_xskmap = { 3 },
- .prog_type = BPF_PROG_TYPE_XDP,
- .result = ACCEPT,
-},
-{
- "bpf_map_lookup_elem(sockmap, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = REJECT,
- .errstr = "Unreleased reference id=2 alloc_insn=5",
-},
-{
- "bpf_map_lookup_elem(sockhash, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = REJECT,
- .errstr = "Unreleased reference id=2 alloc_insn=5",
-},
-{
- "bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_reuseport_array = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
-{
- "mark null check on return value of bpf_skc_to helpers",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
- .result_unpriv = REJECT,
- .errstr_unpriv = "unknown func",
-},
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
deleted file mode 100644
index 0b943897aaf6..000000000000
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ /dev/null
@@ -1,106 +0,0 @@
-{
- "check valid spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* fill it back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- /* should be able to access R0 = *(R2 + 8) */
- /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .retval = POINTER_VALUE,
-},
-{
- "check valid spill/fill, skb mark",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
-},
-{
- "check valid spill/fill, ptr to mem",
- .insns = {
- /* reserve 8 byte ringbuf memory */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
- /* store a pointer to the reserved memory in R6 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* check whether the reservation was successful */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* spill R6(mem) into the stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- /* fill it back in R7 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
- /* should be able to access *(R7) = 0 */
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
- /* submit the reserved ringbuf memory */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_ringbuf = { 1 },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
-},
-{
- "check corrupted spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* mess up with R1 pointer on stack */
- BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
- /* fill back into R0 is fine for priv.
- * R0 now becomes SCALAR_VALUE.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- /* Load from R0 should fail. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .errstr = "R0 invalid mem access 'inv",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check corrupted spill/fill, LSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
-{
- "check corrupted spill/fill, MSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c
deleted file mode 100644
index 781621facae4..000000000000
--- a/tools/testing/selftests/bpf/verifier/spin_lock.c
+++ /dev/null
@@ -1,333 +0,0 @@
-{
- "spin_lock: test1 success",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test2 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test3 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "spin_lock: test4 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 3),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "spin_lock: test5 call within a locked region",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "calls are not allowed",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test6 missing unlock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "unlock is missing",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test7 unlock without lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "without taking a lock",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test8 double lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "calls are not allowed",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test9 different lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3, 11 },
- .result = REJECT,
- .errstr = "unlock of different lock",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test10 lock in subprog without unlock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "unlock is missing",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test11 ld_abs under lock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_LD_ABS(BPF_B, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 4 },
- .result = REJECT,
- .errstr = "inside bpf_spin_lock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
deleted file mode 100644
index 07eaa04412ae..000000000000
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ /dev/null
@@ -1,361 +0,0 @@
-{
- "PTR_TO_STACK store/load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfaceb00c,
-},
-{
- "PTR_TO_STACK store/load - bad alignment on off",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-},
-{
- "PTR_TO_STACK store/load - bad alignment on reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-},
-{
- "PTR_TO_STACK store/load - out of bounds low",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack R1 off=-79992 size=8",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
-},
-{
- "PTR_TO_STACK store/load - out of bounds high",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack R1 off=0 size=8",
-},
-{
- "PTR_TO_STACK check high 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1 off=0 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check high 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1",
-},
-{
- "PTR_TO_STACK check high 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack",
-},
-{
- "PTR_TO_STACK check high 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK check low 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack R1 off=-513 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check low 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "math between fp pointer",
-},
-{
- "PTR_TO_STACK check low 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid write to stack",
-},
-{
- "PTR_TO_STACK check low 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid write to stack",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
-},
-{
- "PTR_TO_STACK check low 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK mixed reg/k, 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = -3,
-},
-{
- "PTR_TO_STACK reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid write to stack R1 off=0 size=1",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "stack pointer arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "store PTR_TO_STACK in R10 to array map using BPF_B",
- .insns = {
- /* Load pointer to map. */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- /* Copy R10 to R9. */
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
- /* Pollute other registers with unaligned values. */
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, -1),
- BPF_MOV64_IMM(BPF_REG_4, -1),
- BPF_MOV64_IMM(BPF_REG_5, -1),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_MOV64_IMM(BPF_REG_7, -1),
- BPF_MOV64_IMM(BPF_REG_8, -1),
- /* Store both R9 and R10 with BPF_B and read back. */
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0),
- /* Should read back as same value. */
- BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 42,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/subreg.c b/tools/testing/selftests/bpf/verifier/subreg.c
deleted file mode 100644
index 4c4133c80440..000000000000
--- a/tools/testing/selftests/bpf/verifier/subreg.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/* This file contains sub-register zero extension checks for insns defining
- * sub-registers, meaning:
- * - All insns under BPF_ALU class. Their BPF_ALU32 variants or narrow width
- * forms (BPF_END) could define sub-registers.
- * - Narrow direct loads, BPF_B/H/W | BPF_LDX.
- * - BPF_LD is not exposed to JIT back-ends, so no need for testing.
- *
- * "get_prandom_u32" is used to initialize low 32-bit of some registers to
- * prevent potential optimizations done by verifier or JIT back-ends which could
- * optimize register back into constant when range info shows one register is a
- * constant.
- */
-{
- "add32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_ALU32_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "add32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- /* An insn could have no effect on the low 32-bit, for example:
- * a = a + 0
- * a = a | 0
- * a = a & -1
- * But, they should still zero high 32-bit.
- */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, -2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "sub32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x1ffffffffULL),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "sub32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mul32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000001ULL),
- BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mul32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "div32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, -1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "div32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, 2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "or32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000001ULL),
- BPF_ALU32_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "or32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_OR, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_OR, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "and32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x1ffffffffULL),
- BPF_ALU32_REG(BPF_AND, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "and32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_0, -1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_0, -2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "lsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_LSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "lsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "rsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_RSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "rsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "neg32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mod32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, -1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mod32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, 2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "xor32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "xor32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_XOR, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mov32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_MOV32_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mov32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "arsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "arsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end16 (to_le) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_LE, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end32 (to_le) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_LE, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end16 (to_be) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_BE, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end32 (to_be) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_BE, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_b zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_h zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_w zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
diff --git a/tools/testing/selftests/bpf/verifier/uninit.c b/tools/testing/selftests/bpf/verifier/uninit.c
deleted file mode 100644
index 987a5871ff1d..000000000000
--- a/tools/testing/selftests/bpf/verifier/uninit.c
+++ /dev/null
@@ -1,39 +0,0 @@
-{
- "read uninitialized register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "read invalid register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R15 is invalid",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit in all branches",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
deleted file mode 100644
index b018ad71e0a8..000000000000
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ /dev/null
@@ -1,524 +0,0 @@
-{
- "unpriv: return pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- .retval = POINTER_VALUE,
-},
-{
- "unpriv: add const to pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "unpriv: add pointer to pointer",
- .insns = {
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 pointer += pointer",
-},
-{
- "unpriv: neg pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer arithmetic",
-},
-{
- "unpriv: cmp pointer with const",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer comparison",
-},
-{
- "unpriv: cmp pointer with pointer",
- .insns = {
- BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R10 pointer comparison",
-},
-{
- "unpriv: check that printk is disallowed",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "unknown func bpf_trace_printk#6",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "unpriv: pass pointer to helper function",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R4 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: indirectly pass pointer on stack to helper function",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: mangle pointer on stack 1",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: mangle pointer on stack 2",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: read pointer from stack in small chunks",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid size",
- .result = REJECT,
-},
-{
- "unpriv: write pointer into ctx",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 leaks addr",
- .result_unpriv = REJECT,
- .errstr = "invalid bpf_context access",
- .result = REJECT,
-},
-{
- "unpriv: spill/fill of ctx",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "unpriv: spill/fill of ctx 2",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of ctx 3",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=fp expected=ctx",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of ctx 4",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_RAW_INSN(BPF_STX | BPF_ATOMIC | BPF_DW,
- BPF_REG_10, BPF_REG_0, -8, BPF_ADD),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - ctx and sock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb == NULL) *target = sock; */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* else *target = skb; */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* struct __sk_buff *skb = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* skb->mark = 42; */
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- /* if (sk) bpf_sk_release(sk) */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "type=ctx expected=sock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - leak sock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb == NULL) *target = sock; */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* else *target = skb; */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* struct __sk_buff *skb = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* skb->mark = 42; */
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- //.errstr = "same insn cannot be used with different pointers",
- .errstr = "Unreleased reference",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - sock and ctx (read)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb) *target = skb */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* else *target = sock */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* struct bpf_sock *sk = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct bpf_sock, mark)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - sock and ctx (write)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb) *target = skb */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* else *target = sock */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* struct bpf_sock *sk = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* if (sk) sk->mark = 42; bpf_sk_release(sk); */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct bpf_sock, mark)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- //.errstr = "same insn cannot be used with different pointers",
- .errstr = "cannot write into sock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers ldx",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
- -(__s32)offsetof(struct bpf_perf_event_data,
- sample_period) - 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
- offsetof(struct bpf_perf_event_data, sample_period)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "unpriv: write pointer into map elem value",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "alu32: mov u32 const",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_7, 0),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1),
- BPF_MOV32_REG(BPF_REG_0, BPF_REG_7),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "unpriv: partial copy of pointer",
- .insns = {
- BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 partial copy",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: pass pointer to tail_call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .errstr_unpriv = "R3 leaks addr into helper",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: cmp map pointer with zero",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: write into frame pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "frame pointer is read only",
- .result = REJECT,
-},
-{
- "unpriv: spill/fill frame pointer",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "frame pointer is read only",
- .result = REJECT,
-},
-{
- "unpriv: cmp of frame pointer",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: adding of fp",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: cmp of stack pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R2 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value.c b/tools/testing/selftests/bpf/verifier/value.c
deleted file mode 100644
index 0e42592b1218..000000000000
--- a/tools/testing/selftests/bpf/verifier/value.c
+++ /dev/null
@@ -1,104 +0,0 @@
-{
- "map element value store of cleared call register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R1 !read_ok",
- .errstr = "R1 !read_ok",
- .result = REJECT,
- .result_unpriv = REJECT,
-},
-{
- "map element value with unaligned store",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value with unaligned load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(struct test_val, foo)),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_adj_spill.c b/tools/testing/selftests/bpf/verifier/value_adj_spill.c
deleted file mode 100644
index 7135e8021b81..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_adj_spill.c
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
-{
- "map element value or null is marked on register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
deleted file mode 100644
index 489062867218..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ /dev/null
@@ -1,95 +0,0 @@
-{
- "map element value illegal alu op, 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 bitwise operator &= on pointer",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 32-bit pointer arithmetic prohibited",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 pointer arithmetic with /= operator",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 pointer arithmetic prohibited",
- .errstr = "invalid mem access 'inv'",
- .result = REJECT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value illegal alu op, 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_3, 4096),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_2, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "leaking pointer from stack off -8",
- .errstr = "R0 invalid mem access 'inv'",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
deleted file mode 100644
index 3ecb70a3d939..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ /dev/null
@@ -1,171 +0,0 @@
-{
- "multiple registers share map_lookup_elem result",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid memory access with multiple map_lookup_elem calls",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = REJECT,
- .errstr = "R4 !read_ok",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "valid indirect map_lookup_elem access with 2nd lookup in branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_2, 10),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid map access from else condition",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access",
- .result = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map lookup and null branch prediction",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
deleted file mode 100644
index ed4e76b24649..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ /dev/null
@@ -1,876 +0,0 @@
-{
- "map access: known scalar += value_ptr from different maps",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 tried to add from different maps",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar from different maps",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 min value is outside of the allowed memory range",
- .retval = 1,
-},
-{
- "map access: known scalar += value_ptr from different maps, but same value properties",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: mixing value pointer and scalar, 1",
- .insns = {
- // load map value pointer into r0 and r2
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- // load some number from the map into r1
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3),
- // branch A
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_A(2),
- // branch B
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- // common instruction
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- // branch A
- BPF_JMP_A(4),
- // branch B
- BPF_MOV64_IMM(BPF_REG_0, 0x13371337),
- // verifier follows fall-through
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- // fake-dead code; targeted from branch A to
- // prevent dead code sanitization
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different pointers or scalars",
- .retval = 0,
-},
-{
- "map access: mixing value pointer and scalar, 2",
- .insns = {
- // load map value pointer into r0 and r2
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- // load some number from the map into r1
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- // branch A
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- BPF_JMP_A(2),
- // branch B
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- // common instruction
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- // branch A
- BPF_JMP_A(4),
- // branch B
- BPF_MOV64_IMM(BPF_REG_0, 0x13371337),
- // verifier follows fall-through
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- // fake-dead code; targeted from branch A to
- // prevent dead code sanitization
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps or paths",
- .retval = 0,
-},
-{
- "sanitation: alu with different scalars 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- BPF_JMP_A(2),
- BPF_MOV64_IMM(BPF_REG_2, 42),
- BPF_MOV64_IMM(BPF_REG_3, 0x100001),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .retval = 0x100000,
-},
-{
- "sanitation: alu with different scalars 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_delete_elem),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_EMIT_CALL(BPF_FUNC_map_delete_elem),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .retval = -EINVAL * 2,
-},
-{
- "sanitation: alu with different scalars 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, EINVAL),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, EINVAL),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = -EINVAL * 2,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is outside of the allowed memory range",
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: known scalar += value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
-},
-{
- "map access: value_ptr += known scalar, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
-},
-{
- "map access: value_ptr += known scalar, 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_1, 5),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, -2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 5",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
-},
-{
- "map access: value_ptr += known scalar, 6",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
-},
-{
- "map access: unknown scalar += value_ptr, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: unknown scalar += value_ptr, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: unknown scalar += value_ptr, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: unknown scalar += value_ptr, 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_1, 19),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 max value is outside of the allowed memory range",
- .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: value_ptr += unknown scalar, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += unknown scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: value_ptr += unknown scalar, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_JMP_IMM(BPF_JA, 0, 0, -3),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 pointer += pointer prohibited",
-},
-{
- "map access: known scalar -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 tried to subtract pointer from scalar",
-},
-{
- "map access: value_ptr -= known scalar",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is outside of the allowed memory range",
-},
-{
- "map access: value_ptr -= known scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: unknown scalar -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 tried to subtract pointer from scalar",
-},
-{
- "map access: value_ptr -= unknown scalar",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is negative",
-},
-{
- "map access: value_ptr -= unknown scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
- .errstr_unpriv = "R0 pointer -= pointer prohibited",
-},
-{
- "32bit pkt_ptr -= scalar",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
- BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "32bit scalar -= pkt_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
- BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
deleted file mode 100644
index eab1f7f56e2f..000000000000
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ /dev/null
@@ -1,343 +0,0 @@
-{
- "variable-offset ctx access",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- /* add it to skb. We now have either &skb->len or
- * &skb->pkt_type, but we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- /* dereference it */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "variable ctx access var_off=(0x0; 0x4)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "variable-offset stack read, priv vs unpriv",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it for a stack read */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "variable-offset stack read, uninitialized",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it for a stack read */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid variable-offset read from stack R2",
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "variable-offset stack write, priv vs unpriv",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 8-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-8 or fp-16, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it for a stack write */
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- /* Now read from the address we just wrote. This shows
- * that, after a variable-offset write, a priviledged
- * program can read the slots that were in the range of
- * that write (even if the verifier doesn't actually know
- * if the slot being read was really written to or not.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- /* Variable stack access is rejected for unprivileged.
- */
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "variable-offset stack write clobbers spilled regs",
- .insns = {
- /* Dummy instruction; needed because we need to patch the next one
- * and we can't patch the first instruction.
- */
- BPF_MOV64_IMM(BPF_REG_6, 0),
- /* Make R0 a map ptr */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 8-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-8 or fp-16, but
- * we don't know which.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Spill R0(map ptr) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- /* Dereference the unknown value for a stack write */
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- /* Fill the register back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- /* Try to dereference R2 for a memory load */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- /* The unpriviledged case is not too interesting; variable
- * stack access is rejected.
- */
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- /* In the priviledged case, dereferencing a spilled-and-then-filled
- * register is rejected because the previous variable offset stack
- * write might have overwritten the spilled pointer (i.e. we lose track
- * of the spilled register when we analyze the write).
- */
- .errstr = "R2 invalid mem access 'inv'",
- .result = REJECT,
-},
-{
- "indirect variable-offset stack access, unbounded",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
- bytes_received)),
- /* Check the lower bound but don't check the upper one. */
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
- /* Point the lower bound to initialized stack. Offset is now in range
- * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid unbounded variable-offset indirect access to stack R4",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "indirect variable-offset stack access, max out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid variable-offset indirect access to stack R2",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
- /* add it to fp. We now have either fp-516 or fp-512, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid variable-offset indirect access to stack R2",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, max_off+size > max_initialized",
- .insns = {
- /* Fill only the second from top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-12 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack R2 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min_off < min_initialized",
- .insns = {
- /* Fill only the top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-16 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack R2 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, priv vs unpriv",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .errstr_unpriv = "R2 variable stack access prohibited for !root",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "indirect variable-offset stack access, uninitialized",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R4 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "indirect variable-offset stack access, ok",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c
deleted file mode 100644
index b96ef3526815..000000000000
--- a/tools/testing/selftests/bpf/verifier/xadd.c
+++ /dev/null
@@ -1,97 +0,0 @@
-{
- "xadd/w check unaligned stack",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned map",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = REJECT,
- .errstr = "misaligned value access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned pkt",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 99),
- BPF_JMP_IMM(BPF_JA, 0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 1),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "BPF_ATOMIC stores into R2 pkt is not allowed",
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "xadd/w check whether src/dst got mangled, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
-{
- "xadd/w check whether src/dst got mangled, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp.c b/tools/testing/selftests/bpf/verifier/xdp.c
deleted file mode 100644
index 5ac390508139..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp.c
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "XDP, using ifindex from netdev",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, ingress_ifindex)),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
deleted file mode 100644
index bfb97383e6b5..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+++ /dev/null
@@ -1,900 +0,0 @@
-{
- "XDP pkt read, pkt_end mangling, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_end mangling, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
new file mode 100755
index 000000000000..09179fb551f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -o pipefail
+
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/verify_sig_setup.log.XXXXXX)"
+
+x509_genkey_content="\
+[ req ]
+default_bits = 2048
+distinguished_name = req_distinguished_name
+prompt = no
+string_mask = utf8only
+x509_extensions = myexts
+
+[ req_distinguished_name ]
+CN = eBPF Signature Verification Testing Key
+
+[ myexts ]
+basicConstraints=critical,CA:FALSE
+keyUsage=digitalSignature
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
+"
+
+usage()
+{
+ echo "Usage: $0 <setup|cleanup <existing_tmp_dir>"
+ exit 1
+}
+
+genkey()
+{
+ local tmp_dir="$1"
+
+ echo "${x509_genkey_content}" > ${tmp_dir}/x509.genkey
+
+ openssl req -new -nodes -utf8 -sha256 -days 36500 \
+ -batch -x509 -config ${tmp_dir}/x509.genkey \
+ -outform PEM -out ${tmp_dir}/signing_key.pem \
+ -keyout ${tmp_dir}/signing_key.pem 2>&1
+
+ openssl x509 -in ${tmp_dir}/signing_key.pem -out \
+ ${tmp_dir}/signing_key.der -outform der
+}
+
+setup()
+{
+ local tmp_dir="$1"
+
+ genkey "${tmp_dir}"
+ key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+ keyring_id=$(keyctl newring ebpf_testing_keyring @s)
+ keyctl link $key_id $keyring_id
+}
+
+cleanup() {
+ local tmp_dir="$1"
+
+ keyctl unlink $(keyctl search @s asymmetric ebpf_testing_key) @s
+ keyctl unlink $(keyctl search @s keyring ebpf_testing_keyring) @s
+ rm -rf ${tmp_dir}
+}
+
+fsverity_create_sign_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ sig_file=${tmp_dir}/sig-file
+ dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null
+ fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file
+
+ # We do not want to enable fsverity on $data_file yet. Try whether
+ # the file system support fsverity on a different file.
+ touch ${tmp_dir}/tmp-file
+ fsverity enable ${tmp_dir}/tmp-file
+}
+
+fsverity_enable_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ fsverity enable $data_file
+}
+
+catch()
+{
+ local exit_code="$1"
+ local log_file="$2"
+
+ if [[ "${exit_code}" -ne 0 ]]; then
+ cat "${log_file}" >&3
+ fi
+
+ rm -f "${log_file}"
+ exit ${exit_code}
+}
+
+main()
+{
+ [[ $# -ne 2 ]] && usage
+
+ local action="$1"
+ local tmp_dir="$2"
+
+ [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+ if [[ "${action}" == "setup" ]]; then
+ setup "${tmp_dir}"
+ elif [[ "${action}" == "genkey" ]]; then
+ genkey "${tmp_dir}"
+ elif [[ "${action}" == "cleanup" ]]; then
+ cleanup "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-create-sign" ]]; then
+ fsverity_create_sign_file "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-enable" ]]; then
+ fsverity_enable_file "${tmp_dir}"
+ else
+ echo "Unknown action: ${action}"
+ exit 1
+ fi
+}
+
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+ # Save the stderr to 3 so that we can output back to
+ # it incase of an error.
+ exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
+main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
new file mode 100644
index 000000000000..e962f133250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -0,0 +1,3384 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <libgen.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <pthread.h>
+#include <dirent.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <sys/stat.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include <bpf/bpf.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <float.h>
+#include <math.h>
+#include <limits.h>
+#include <assert.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+enum stat_id {
+ VERDICT,
+ DURATION,
+ TOTAL_INSNS,
+ TOTAL_STATES,
+ PEAK_STATES,
+ MAX_STATES_PER_INSN,
+ MARK_READ_MAX_LEN,
+ SIZE,
+ JITED_SIZE,
+ STACK,
+ PROG_TYPE,
+ ATTACH_TYPE,
+ MEMORY_PEAK,
+
+ FILE_NAME,
+ PROG_NAME,
+
+ ALL_STATS_CNT,
+ NUM_STATS_CNT = FILE_NAME - VERDICT,
+};
+
+/* In comparison mode each stat can specify up to four different values:
+ * - A side value;
+ * - B side value;
+ * - absolute diff value;
+ * - relative (percentage) diff value.
+ *
+ * When specifying stat specs in comparison mode, user can use one of the
+ * following variant suffixes to specify which exact variant should be used for
+ * ordering or filtering:
+ * - `_a` for A side value;
+ * - `_b` for B side value;
+ * - `_diff` for absolute diff value;
+ * - `_pct` for relative (percentage) diff value.
+ *
+ * If no variant suffix is provided, then `_b` (control data) is assumed.
+ *
+ * As an example, let's say instructions stat has the following output:
+ *
+ * Insns (A) Insns (B) Insns (DIFF)
+ * --------- --------- --------------
+ * 21547 20920 -627 (-2.91%)
+ *
+ * Then:
+ * - 21547 is A side value (insns_a);
+ * - 20920 is B side value (insns_b);
+ * - -627 is absolute diff value (insns_diff);
+ * - -2.91% is relative diff value (insns_pct).
+ *
+ * For verdict there is no verdict_pct variant.
+ * For file and program name, _a and _b variants are equivalent and there are
+ * no _diff or _pct variants.
+ */
+enum stat_variant {
+ VARIANT_A,
+ VARIANT_B,
+ VARIANT_DIFF,
+ VARIANT_PCT,
+};
+
+struct verif_stats {
+ char *file_name;
+ char *prog_name;
+
+ long stats[NUM_STATS_CNT];
+};
+
+/* joined comparison mode stats */
+struct verif_stats_join {
+ char *file_name;
+ char *prog_name;
+
+ const struct verif_stats *stats_a;
+ const struct verif_stats *stats_b;
+};
+
+struct stat_specs {
+ int spec_cnt;
+ enum stat_id ids[ALL_STATS_CNT];
+ enum stat_variant variants[ALL_STATS_CNT];
+ bool asc[ALL_STATS_CNT];
+ bool abs[ALL_STATS_CNT];
+ int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
+};
+
+enum resfmt {
+ RESFMT_TABLE,
+ RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
+ RESFMT_CSV,
+};
+
+enum filter_kind {
+ FILTER_NAME,
+ FILTER_STAT,
+};
+
+enum operator_kind {
+ OP_EQ, /* == or = */
+ OP_NEQ, /* != or <> */
+ OP_LT, /* < */
+ OP_LE, /* <= */
+ OP_GT, /* > */
+ OP_GE, /* >= */
+};
+
+struct filter {
+ enum filter_kind kind;
+ /* FILTER_NAME */
+ char *any_glob;
+ char *file_glob;
+ char *prog_glob;
+ /* FILTER_STAT */
+ enum operator_kind op;
+ int stat_id;
+ enum stat_variant stat_var;
+ long value;
+ bool abs;
+};
+
+struct rvalue {
+ enum { INTEGRAL, ENUMERATOR } type;
+ union {
+ long long ivalue;
+ char *svalue;
+ };
+};
+
+struct field_access {
+ enum { FIELD_NAME, ARRAY_INDEX } type;
+ union {
+ char *name;
+ struct rvalue index;
+ };
+};
+
+struct var_preset {
+ struct field_access *atoms;
+ int atom_count;
+ char *full_name;
+ struct rvalue value;
+ bool applied;
+};
+
+enum dump_mode {
+ DUMP_NONE = 0,
+ DUMP_XLATED = 1,
+ DUMP_JITED = 2,
+};
+
+static struct env {
+ char **filenames;
+ int filename_cnt;
+ bool verbose;
+ bool debug;
+ bool quiet;
+ bool force_checkpoints;
+ bool force_reg_invariants;
+ enum resfmt out_fmt;
+ bool show_version;
+ bool comparison_mode;
+ bool replay_mode;
+ int top_n;
+
+ int log_level;
+ int log_size;
+ bool log_fixed;
+
+ struct verif_stats *prog_stats;
+ int prog_stat_cnt;
+
+ /* baseline_stats is allocated and used only in comparison mode */
+ struct verif_stats *baseline_stats;
+ int baseline_stat_cnt;
+
+ struct verif_stats_join *join_stats;
+ int join_stat_cnt;
+
+ struct stat_specs output_spec;
+ struct stat_specs sort_spec;
+
+ struct filter *allow_filters;
+ struct filter *deny_filters;
+ int allow_filter_cnt;
+ int deny_filter_cnt;
+
+ int files_processed;
+ int files_skipped;
+ int progs_processed;
+ int progs_skipped;
+ int top_src_lines;
+ struct var_preset *presets;
+ int npresets;
+ char orig_cgroup[PATH_MAX];
+ char stat_cgroup[PATH_MAX];
+ int memory_peak_fd;
+ __u32 dump_mode;
+} env;
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (!env.verbose)
+ return 0;
+ if (level == LIBBPF_DEBUG && !env.debug)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+__attribute__((format(printf, 3, 4)))
+static int log_errno_aux(const char *file, int line, const char *fmt, ...)
+{
+ int err = -errno;
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s:%d: ", file, line);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, " failed with error '%s'.\n", strerror(errno));
+ va_end(ap);
+ return err;
+}
+
+#ifndef VERISTAT_VERSION
+#define VERISTAT_VERSION "<kernel>"
+#endif
+
+const char *argp_program_version = "veristat v" VERISTAT_VERSION;
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"veristat BPF verifier stats collection and comparison tool.\n"
+"\n"
+"USAGE: veristat <obj-file> [<obj-file>...]\n"
+" OR: veristat -C <baseline.csv> <comparison.csv>\n"
+" OR: veristat -R <results.csv>\n"
+" OR: veristat -vl2 <to_analyze.bpf.o>\n";
+
+enum {
+ OPT_LOG_FIXED = 1000,
+ OPT_LOG_SIZE = 1001,
+ OPT_DUMP = 1002,
+};
+
+static const struct argp_option opts[] = {
+ { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+ { "version", 'V', NULL, 0, "Print version" },
+ { "verbose", 'v', NULL, 0, "Verbose mode" },
+ { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
+ { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" },
+ { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
+ { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
+ { "top-n", 'n', "N", 0, "Emit only up to first N results." },
+ { "quiet", 'q', NULL, 0, "Quiet mode" },
+ { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
+ { "sort", 's', "SPEC", 0, "Specify sort order" },
+ { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
+ { "compare", 'C', NULL, 0, "Comparison mode" },
+ { "replay", 'R', NULL, 0, "Replay mode" },
+ { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+ { "test-states", 't', NULL, 0,
+ "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "test-reg-invariants", 'r', NULL, 0,
+ "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+ { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
+ { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
+ { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" },
+ {},
+};
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs);
+static int append_filter(struct filter **filters, int *cnt, const char *str);
+static int append_filter_file(const char *path);
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr);
+static int append_var_preset_file(const char *filename);
+static int append_file(const char *path);
+static int append_file_from_file(const char *path);
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ int err;
+
+ switch (key) {
+ case 'h':
+ argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+ break;
+ case 'V':
+ env.show_version = true;
+ break;
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'd':
+ env.debug = true;
+ env.verbose = true;
+ break;
+ case 'q':
+ env.quiet = true;
+ break;
+ case 'e':
+ err = parse_stats(arg, &env.output_spec);
+ if (err)
+ return err;
+ break;
+ case 's':
+ err = parse_stats(arg, &env.sort_spec);
+ if (err)
+ return err;
+ break;
+ case 'o':
+ if (strcmp(arg, "table") == 0) {
+ env.out_fmt = RESFMT_TABLE;
+ } else if (strcmp(arg, "csv") == 0) {
+ env.out_fmt = RESFMT_CSV;
+ } else {
+ fprintf(stderr, "Unrecognized output format '%s'\n", arg);
+ return -EINVAL;
+ }
+ break;
+ case 'l':
+ errno = 0;
+ env.log_level = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid log level: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case OPT_LOG_FIXED:
+ env.log_fixed = true;
+ break;
+ case OPT_LOG_SIZE:
+ errno = 0;
+ env.log_size = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid log size: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 't':
+ env.force_checkpoints = true;
+ break;
+ case 'r':
+ env.force_reg_invariants = true;
+ break;
+ case 'n':
+ errno = 0;
+ env.top_n = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top N specifier: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'C':
+ env.comparison_mode = true;
+ break;
+ case 'R':
+ env.replay_mode = true;
+ break;
+ case 'f':
+ if (arg[0] == '@')
+ err = append_filter_file(arg + 1);
+ else if (arg[0] == '!')
+ err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
+ else
+ err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
+ return err;
+ }
+ break;
+ case 'S':
+ errno = 0;
+ env.top_src_lines = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'G': {
+ if (arg[0] == '@')
+ err = append_var_preset_file(arg + 1);
+ else
+ err = append_var_preset(&env.presets, &env.npresets, arg);
+ if (err) {
+ fprintf(stderr, "Failed to parse global variable presets: %s\n", arg);
+ return err;
+ }
+ break;
+ }
+ case ARGP_KEY_ARG:
+ if (arg[0] == '@')
+ err = append_file_from_file(arg + 1);
+ else
+ err = append_file(arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect BPF object files: %d\n", err);
+ return err;
+ }
+ break;
+ case OPT_DUMP:
+ if (!arg || strcasecmp(arg, "xlated") == 0) {
+ env.dump_mode |= DUMP_XLATED;
+ } else if (strcasecmp(arg, "jited") == 0) {
+ env.dump_mode |= DUMP_JITED;
+ } else {
+ fprintf(stderr, "Unrecognized dump mode '%s'\n", arg);
+ return -EINVAL;
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+};
+
+
+/* Adapted from perf/util/string.c */
+static bool glob_matches(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_matches(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+static bool is_bpf_obj_file(const char *path) {
+ Elf64_Ehdr *ehdr;
+ int fd, err = -EINVAL;
+ Elf *elf = NULL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return true; /* we'll fail later and propagate error */
+
+ /* ensure libelf is initialized */
+ (void)elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf)
+ goto cleanup;
+
+ if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
+ goto cleanup;
+
+ ehdr = elf64_getehdr(elf);
+ /* Old LLVM set e_machine to EM_NONE */
+ if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return err == 0;
+}
+
+static bool should_process_file_prog(const char *filename, const char *prog_name)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_NAME)
+ continue;
+
+ if (f->any_glob && glob_matches(filename, f->any_glob))
+ return false;
+ if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
+ return false;
+ if (f->file_glob && glob_matches(filename, f->file_glob))
+ return false;
+ if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_NAME)
+ continue;
+
+ allow_cnt++;
+ if (f->any_glob) {
+ if (glob_matches(filename, f->any_glob))
+ return true;
+ /* If we don't know program name yet, any_glob filter
+ * has to assume that current BPF object file might be
+ * relevant; we'll check again later on after opening
+ * BPF object file, at which point program name will
+ * be known finally.
+ */
+ if (!prog_name || glob_matches(prog_name, f->any_glob))
+ return true;
+ } else {
+ if (f->file_glob && !glob_matches(filename, f->file_glob))
+ continue;
+ if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
+ continue;
+ return true;
+ }
+ }
+
+ /* if there are no file/prog name allow filters, allow all progs,
+ * unless they are denied earlier explicitly
+ */
+ return allow_cnt == 0;
+}
+
+static struct {
+ enum operator_kind op_kind;
+ const char *op_str;
+} operators[] = {
+ /* Order of these definitions matter to avoid situations like '<'
+ * matching part of what is actually a '<>' operator. That is,
+ * substrings should go last.
+ */
+ { OP_EQ, "==" },
+ { OP_NEQ, "!=" },
+ { OP_NEQ, "<>" },
+ { OP_LE, "<=" },
+ { OP_LT, "<" },
+ { OP_GE, ">=" },
+ { OP_GT, ">" },
+ { OP_EQ, "=" },
+};
+
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs);
+
+static int append_filter(struct filter **filters, int *cnt, const char *str)
+{
+ struct filter *f;
+ void *tmp;
+ const char *p;
+ int i;
+
+ tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
+ if (!tmp)
+ return -ENOMEM;
+ *filters = tmp;
+
+ f = &(*filters)[*cnt];
+ memset(f, 0, sizeof(*f));
+
+ /* First, let's check if it's a stats filter of the following form:
+ * <stat><op><value, where:
+ * - <stat> is one of supported numerical stats (verdict is also
+ * considered numerical, failure == 0, success == 1);
+ * - <op> is comparison operator (see `operators` definitions);
+ * - <value> is an integer (or failure/success, or false/true as
+ * special aliases for 0 and 1, respectively).
+ * If the form doesn't match what user provided, we assume file/prog
+ * glob filter.
+ */
+ for (i = 0; i < ARRAY_SIZE(operators); i++) {
+ enum stat_variant var;
+ int id;
+ long val;
+ const char *end = str;
+ const char *op_str;
+ bool is_abs;
+
+ op_str = operators[i].op_str;
+ p = strstr(str, op_str);
+ if (!p)
+ continue;
+
+ if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
+ fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
+ return -EINVAL;
+ }
+ if (id >= FILE_NAME) {
+ fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
+ return -EINVAL;
+ }
+
+ p += strlen(op_str);
+
+ if (strcasecmp(p, "true") == 0 ||
+ strcasecmp(p, "t") == 0 ||
+ strcasecmp(p, "success") == 0 ||
+ strcasecmp(p, "succ") == 0 ||
+ strcasecmp(p, "s") == 0 ||
+ strcasecmp(p, "match") == 0 ||
+ strcasecmp(p, "m") == 0) {
+ val = 1;
+ } else if (strcasecmp(p, "false") == 0 ||
+ strcasecmp(p, "f") == 0 ||
+ strcasecmp(p, "failure") == 0 ||
+ strcasecmp(p, "fail") == 0 ||
+ strcasecmp(p, "mismatch") == 0 ||
+ strcasecmp(p, "mis") == 0) {
+ val = 0;
+ } else {
+ errno = 0;
+ val = strtol(p, (char **)&end, 10);
+ if (errno || end == p || *end != '\0' ) {
+ fprintf(stderr, "Invalid integer value in '%s'!\n", str);
+ return -EINVAL;
+ }
+ }
+
+ f->kind = FILTER_STAT;
+ f->stat_id = id;
+ f->stat_var = var;
+ f->op = operators[i].op_kind;
+ f->abs = true;
+ f->value = val;
+
+ *cnt += 1;
+ return 0;
+ }
+
+ /* File/prog filter can be specified either as '<glob>' or
+ * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
+ * both file and program names. This seems to be way more useful in
+ * practice. If user needs full control, they can use '/<prog-glob>'
+ * form to glob just program name, or '<file-glob>/' to glob only file
+ * name. But usually common <glob> seems to be the most useful and
+ * ergonomic way.
+ */
+ f->kind = FILTER_NAME;
+ p = strchr(str, '/');
+ if (!p) {
+ f->any_glob = strdup(str);
+ if (!f->any_glob)
+ return -ENOMEM;
+ } else {
+ if (str != p) {
+ /* non-empty file glob */
+ f->file_glob = strndup(str, p - str);
+ if (!f->file_glob)
+ return -ENOMEM;
+ }
+ if (strlen(p + 1) > 0) {
+ /* non-empty prog glob */
+ f->prog_glob = strdup(p + 1);
+ if (!f->prog_glob) {
+ free(f->file_glob);
+ f->file_glob = NULL;
+ return -ENOMEM;
+ }
+ }
+ }
+
+ *cnt += 1;
+ return 0;
+}
+
+static int append_filter_file(const char *path)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err));
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ /* lines starting with ! are negative match filters */
+ if (buf[0] == '!')
+ err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
+ else
+ err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static const struct stat_specs default_output_spec = {
+ .spec_cnt = 8,
+ .ids = {
+ FILE_NAME, PROG_NAME, VERDICT, DURATION,
+ TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
+ },
+};
+
+static int append_file(const char *path)
+{
+ void *tmp;
+
+ tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+ if (!tmp)
+ return -ENOMEM;
+ env.filenames = tmp;
+ env.filenames[env.filename_cnt] = strdup(path);
+ if (!env.filenames[env.filename_cnt])
+ return -ENOMEM;
+ env.filename_cnt++;
+ return 0;
+}
+
+static int append_file_from_file(const char *path)
+{
+ char buf[1024];
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open object files list in '%s': %s\n",
+ path, strerror(errno));
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ err = append_file(buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static const struct stat_specs default_csv_output_spec = {
+ .spec_cnt = 15,
+ .ids = {
+ FILE_NAME, PROG_NAME, VERDICT, DURATION,
+ TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+ MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+ SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
+ STACK, MEMORY_PEAK,
+ },
+};
+
+static const struct stat_specs default_sort_spec = {
+ .spec_cnt = 2,
+ .ids = {
+ FILE_NAME, PROG_NAME,
+ },
+ .asc = { true, true, },
+};
+
+/* sorting for comparison mode to join two data sets */
+static const struct stat_specs join_sort_spec = {
+ .spec_cnt = 2,
+ .ids = {
+ FILE_NAME, PROG_NAME,
+ },
+ .asc = { true, true, },
+};
+
+static struct stat_def {
+ const char *header;
+ const char *names[4];
+ bool asc_by_default;
+ bool left_aligned;
+} stat_defs[] = {
+ [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
+ [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
+ [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
+ [DURATION] = { "Duration (us)", {"duration", "dur"}, },
+ [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
+ [TOTAL_STATES] = { "States", {"total_states", "states"}, },
+ [PEAK_STATES] = { "Peak states", {"peak_states"}, },
+ [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
+ [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
+ [SIZE] = { "Program size", {"prog_size"}, },
+ [JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
+ [STACK] = {"Stack depth", {"stack_depth", "stack"}, },
+ [PROG_TYPE] = { "Program type", {"prog_type"}, },
+ [ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
+ [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, },
+};
+
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs)
+{
+ static const char *var_sfxs[] = {
+ [VARIANT_A] = "_a",
+ [VARIANT_B] = "_b",
+ [VARIANT_DIFF] = "_diff",
+ [VARIANT_PCT] = "_pct",
+ };
+ int i, j, k;
+
+ /* |<stat>| means we take absolute value of given stat */
+ *is_abs = false;
+ if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+ *is_abs = true;
+ name += 1;
+ len -= 2;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
+ struct stat_def *def = &stat_defs[i];
+ size_t alias_len, sfx_len;
+ const char *alias;
+
+ for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
+ alias = def->names[j];
+ if (!alias)
+ continue;
+
+ alias_len = strlen(alias);
+ if (strncmp(name, alias, alias_len) != 0)
+ continue;
+
+ if (alias_len == len) {
+ /* If no variant suffix is specified, we
+ * assume control group (just in case we are
+ * in comparison mode. Variant is ignored in
+ * non-comparison mode.
+ */
+ *var = VARIANT_B;
+ *id = i;
+ return true;
+ }
+
+ for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
+ sfx_len = strlen(var_sfxs[k]);
+ if (alias_len + sfx_len != len)
+ continue;
+
+ if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
+ *var = (enum stat_variant)k;
+ *id = i;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool is_asc_sym(char c)
+{
+ return c == '^';
+}
+
+static bool is_desc_sym(char c)
+{
+ return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
+}
+
+static char *rtrim(char *str)
+{
+ int i;
+
+ for (i = strlen(str) - 1; i > 0; --i) {
+ if (!isspace(str[i]))
+ break;
+ str[i] = '\0';
+ }
+ return str;
+}
+
+static int parse_stat(const char *stat_name, struct stat_specs *specs)
+{
+ int id;
+ bool has_order = false, is_asc = false, is_abs = false;
+ size_t len = strlen(stat_name);
+ enum stat_variant var;
+
+ if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
+ fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
+ return -E2BIG;
+ }
+
+ if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
+ has_order = true;
+ is_asc = is_asc_sym(stat_name[len - 1]);
+ len -= 1;
+ }
+
+ if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
+ fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
+ return -ESRCH;
+ }
+
+ specs->ids[specs->spec_cnt] = id;
+ specs->variants[specs->spec_cnt] = var;
+ specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+ specs->abs[specs->spec_cnt] = is_abs;
+ specs->spec_cnt++;
+
+ return 0;
+}
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs)
+{
+ char *input, *state = NULL, *next;
+ int err, cnt = 0;
+
+ input = strdup(stats_str);
+ if (!input)
+ return -ENOMEM;
+
+ while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
+ err = parse_stat(next, specs);
+ if (err) {
+ free(input);
+ return err;
+ }
+ }
+
+ free(input);
+ return 0;
+}
+
+static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
+{
+ int i;
+
+ if (!stats)
+ return;
+
+ for (i = 0; i < stat_cnt; i++) {
+ free(stats[i].file_name);
+ free(stats[i].prog_name);
+ }
+ free(stats);
+}
+
+static char verif_log_buf[64 * 1024];
+
+#define MAX_PARSED_LOG_LINES 100
+
+static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
+{
+ const char *cur;
+ int pos, lines, sub_stack, cnt = 0;
+ char *state = NULL, *token, stack[512];
+
+ buf[buf_sz - 1] = '\0';
+
+ for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
+ /* find previous endline or otherwise take the start of log buf */
+ for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
+ }
+ /* next time start from end of previous line (or pos goes to <0) */
+ pos--;
+ /* if we found endline, point right after endline symbol;
+ * otherwise, stay at the beginning of log buf
+ */
+ if (cur[0] == '\n')
+ cur++;
+
+ if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
+ continue;
+ if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+ &s->stats[TOTAL_INSNS],
+ &s->stats[MAX_STATES_PER_INSN],
+ &s->stats[TOTAL_STATES],
+ &s->stats[PEAK_STATES],
+ &s->stats[MARK_READ_MAX_LEN]))
+ continue;
+
+ if (1 == sscanf(cur, "stack depth %511s", stack))
+ continue;
+ }
+ while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
+ if (sscanf(token, "%d", &sub_stack) == 0)
+ break;
+ s->stats[STACK] += sub_stack;
+ }
+ return 0;
+}
+
+struct line_cnt {
+ char *line;
+ int cnt;
+};
+
+static int str_cmp(const void *a, const void *b)
+{
+ const char **str1 = (const char **)a;
+ const char **str2 = (const char **)b;
+
+ return strcmp(*str1, *str2);
+}
+
+static int line_cnt_cmp(const void *a, const void *b)
+{
+ const struct line_cnt *a_cnt = (const struct line_cnt *)a;
+ const struct line_cnt *b_cnt = (const struct line_cnt *)b;
+
+ if (a_cnt->cnt != b_cnt->cnt)
+ return a_cnt->cnt > b_cnt->cnt ? -1 : 1;
+ return strcmp(a_cnt->line, b_cnt->line);
+}
+
+static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
+{
+ int lines_cap = 0;
+ int lines_size = 0;
+ char **lines = NULL;
+ char *line = NULL;
+ char *state;
+ struct line_cnt *freq = NULL;
+ struct line_cnt *cur;
+ int unique_lines;
+ int err = 0;
+ int i;
+
+ while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
+ if (strncmp(line, "; ", 2) != 0)
+ continue;
+ line += 2;
+
+ if (lines_size == lines_cap) {
+ char **tmp;
+
+ lines_cap = max(16, lines_cap * 2);
+ tmp = realloc(lines, lines_cap * sizeof(*tmp));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ lines = tmp;
+ }
+ lines[lines_size] = line;
+ lines_size++;
+ }
+
+ if (lines_size == 0)
+ goto cleanup;
+
+ qsort(lines, lines_size, sizeof(*lines), str_cmp);
+
+ freq = calloc(lines_size, sizeof(*freq));
+ if (!freq) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ cur = freq;
+ cur->line = lines[0];
+ cur->cnt = 1;
+ for (i = 1; i < lines_size; ++i) {
+ if (strcmp(lines[i], cur->line) != 0) {
+ cur++;
+ cur->line = lines[i];
+ cur->cnt = 0;
+ }
+ cur->cnt++;
+ }
+ unique_lines = cur - freq + 1;
+
+ qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
+
+ printf("Top source lines (%s):\n", prog_name);
+ for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
+ const char *src_code = freq[i].line;
+ const char *src_line = NULL;
+ char *split = strrchr(freq[i].line, '@');
+
+ if (split) {
+ src_line = split + 1;
+
+ while (*src_line && isspace(*src_line))
+ src_line++;
+
+ while (split > src_code && isspace(*split))
+ split--;
+ *split = '\0';
+ }
+
+ if (src_line)
+ printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
+ else
+ printf("%5d: %s\n", freq[i].cnt, src_code);
+ }
+ printf("\n");
+
+cleanup:
+ free(freq);
+ free(lines);
+ return err;
+}
+
+static int guess_prog_type_by_ctx_name(const char *ctx_name,
+ enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *attach_type)
+{
+ /* We need to guess program type based on its declared context type.
+ * This guess can't be perfect as many different program types might
+ * share the same context type. So we can only hope to reasonably
+ * well guess this and get lucky.
+ *
+ * Just in case, we support both UAPI-side type names and
+ * kernel-internal names.
+ */
+ static struct {
+ const char *uapi_name;
+ const char *kern_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ } ctx_map[] = {
+ /* __sk_buff is most ambiguous, we assume TC program */
+ { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
+ { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
+ { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
+ { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
+ { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
+ { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
+ { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
+ { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
+ { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
+ { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
+ { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
+ /* tracing types with no expected attach type */
+ { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
+ { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
+ /* raw_tp programs use u64[] from kernel side, we don't want
+ * to match on that, probably; so NULL for kern-side type
+ */
+ { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
+ };
+ int i;
+
+ if (!ctx_name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
+ if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
+ (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
+ *prog_type = ctx_map[i].prog_type;
+ *attach_type = ctx_map[i].attach_type;
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+/* Make sure only target program is referenced from struct_ops map,
+ * otherwise libbpf would automatically set autocreate for all
+ * referenced programs.
+ * See libbpf.c:bpf_object_adjust_struct_ops_autoload.
+ */
+static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
+ struct bpf_map *map,
+ struct bpf_program *prog)
+{
+ struct btf *btf = bpf_object__btf(obj);
+ const struct btf_type *t, *mt;
+ struct btf_member *m;
+ int i, moff;
+ size_t data_sz, ptr_sz = sizeof(void *);
+ void *data;
+
+ t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map));
+ if (!btf_is_struct(t))
+ return;
+
+ data = bpf_map__initial_value(map, &data_sz);
+ for (i = 0; i < btf_vlen(t); i++) {
+ m = &btf_members(t)[i];
+ mt = btf__type_by_id(btf, m->type);
+ if (!btf_is_ptr(mt))
+ continue;
+ moff = m->offset / 8;
+ if (moff + ptr_sz > data_sz)
+ continue;
+ if (memcmp(data + moff, &prog, ptr_sz) == 0)
+ continue;
+ memset(data + moff, 0, ptr_sz);
+ }
+}
+
+static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
+{
+ struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ /* disable pinning */
+ bpf_map__set_pin_path(map, NULL);
+
+ /* fix up map size, if necessary */
+ switch (bpf_map__type(map)) {
+ case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
+ break;
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ mask_unrelated_struct_ops_progs(obj, map, prog);
+ break;
+ default:
+ if (bpf_map__max_entries(map) == 0)
+ bpf_map__set_max_entries(map, 1);
+ }
+ }
+
+ /* SEC(freplace) programs can't be loaded with veristat as is,
+ * but we can try guessing their target program's expected type by
+ * looking at the type of program's first argument and substituting
+ * corresponding program type
+ */
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
+ const struct btf *btf = bpf_object__btf(obj);
+ const char *prog_name = bpf_program__name(prog);
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ const struct btf_type *t;
+ const char *ctx_name;
+ int id;
+
+ if (!btf)
+ goto skip_freplace_fixup;
+
+ id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
+ t = btf__type_by_id(btf, id);
+ t = btf__type_by_id(btf, t->type);
+ if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
+ goto skip_freplace_fixup;
+
+ /* context argument is a pointer to a struct/typedef */
+ t = btf__type_by_id(btf, btf_params(t)[0].type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t || !btf_is_ptr(t))
+ goto skip_freplace_fixup;
+ t = btf__type_by_id(btf, t->type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t)
+ goto skip_freplace_fixup;
+
+ ctx_name = btf__name_by_offset(btf, t->name_off);
+
+ if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog, attach_type);
+
+ if (!env.quiet) {
+ fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n",
+ libbpf_bpf_prog_type_str(prog_type),
+ filename, prog_name);
+ }
+ } else {
+ if (!env.quiet) {
+ fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ ctx_name, filename, prog_name);
+ }
+ }
+ }
+skip_freplace_fixup:
+ return;
+}
+
+static int max_verifier_log_size(void)
+{
+ const int SMALL_LOG_SIZE = UINT_MAX >> 8;
+ const int BIG_LOG_SIZE = UINT_MAX >> 2;
+ struct bpf_insn insns[] = {
+ { .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
+ { .code = BPF_JMP | BPF_EXIT, },
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_size = BIG_LOG_SIZE,
+ .log_buf = (void *)-1,
+ .log_level = 4
+ );
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+ static int log_size;
+
+ if (log_size != 0)
+ return log_size;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+
+ if (ret == -EFAULT)
+ log_size = BIG_LOG_SIZE;
+ else /* ret == -EINVAL, big log size is not supported by the verifier */
+ log_size = SMALL_LOG_SIZE;
+
+ return log_size;
+}
+
+static bool output_stat_enabled(int id)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++)
+ if (env.output_spec.ids[i] == id)
+ return true;
+ return false;
+}
+
+__attribute__((format(printf, 2, 3)))
+static int write_one_line(const char *file, const char *fmt, ...)
+{
+ int err, saved_errno;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "w");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ errno = 0;
+ err = vfprintf(f, fmt, ap);
+ saved_errno = errno;
+ va_end(ap);
+ fclose(f);
+ errno = saved_errno;
+ return err < 0 ? -1 : 0;
+}
+
+__attribute__((format(scanf, 3, 4)))
+static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...)
+{
+ int res = 0, saved_errno = 0;
+ char *line = NULL;
+ size_t line_len;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ while (getline(&line, &line_len, f) > 0) {
+ res = vsscanf(line, fmt, ap);
+ if (res == fields_expected)
+ goto out;
+ }
+ if (ferror(f)) {
+ saved_errno = errno;
+ res = -1;
+ }
+
+out:
+ va_end(ap);
+ free(line);
+ fclose(f);
+ errno = saved_errno;
+ return res;
+}
+
+static void destroy_stat_cgroup(void)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ close(env.memory_peak_fd);
+
+ if (env.orig_cgroup[0]) {
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0)
+ log_errno("moving self to original cgroup %s\n", env.orig_cgroup);
+ }
+
+ if (env.stat_cgroup[0]) {
+ err = rmdir(env.stat_cgroup);
+ if (err < 0)
+ log_errno("deletion of cgroup %s", env.stat_cgroup);
+ }
+
+ env.memory_peak_fd = -1;
+ env.orig_cgroup[0] = 0;
+ env.stat_cgroup[0] = 0;
+}
+
+/*
+ * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>,
+ * moves current process to this cgroup.
+ */
+static void create_stat_cgroup(void)
+{
+ char cgroup_fs_mount[4096];
+ char buf[4096];
+ int err;
+
+ env.memory_peak_fd = -1;
+
+ if (!output_stat_enabled(MEMORY_PEAK))
+ return;
+
+ err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s",
+ cgroup_fs_mount, buf);
+ if (err != 2) {
+ if (err < 0)
+ log_errno("reading /proc/self/mounts");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't find cgroupfs v2 mount point.\n");
+ goto err_out;
+ }
+
+ /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */
+ err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf);
+ if (err != 1) {
+ if (err < 0)
+ log_errno("reading /proc/self/cgroup");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't infer veristat process cgroup.");
+ goto err_out;
+ }
+
+ snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf);
+
+ snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid());
+ err = mkdir(buf, 0777);
+ if (err < 0) {
+ log_errno("creation of cgroup %s", buf);
+ goto err_out;
+ }
+ strcpy(env.stat_cgroup, buf);
+
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0) {
+ log_errno("entering cgroup %s", buf);
+ goto err_out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
+ env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
+ if (env.memory_peak_fd < 0) {
+ log_errno("opening %s", buf);
+ goto err_out;
+ }
+
+ return;
+
+err_out:
+ if (!env.quiet)
+ fprintf(stderr, "Memory usage metric unavailable.\n");
+ destroy_stat_cgroup();
+}
+
+/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */
+static long cgroup_memory_peak(void)
+{
+ long err, memory_peak;
+ char buf[32];
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
+ if (err <= 0) {
+ log_errno("pread(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+
+ buf[err] = 0;
+ errno = 0;
+ memory_peak = strtoll(buf, NULL, 10);
+ if (errno) {
+ log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf);
+ return -1;
+ }
+
+ return memory_peak;
+}
+
+static int reset_stat_cgroup(void)
+{
+ char buf[] = "r\n";
+ int err;
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0);
+ if (err <= 0) {
+ log_errno("pwrite(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+ return 0;
+}
+
+static int parse_rvalue(const char *val, struct rvalue *rvalue)
+{
+ long long value;
+ char *val_end;
+
+ if (val[0] == '-' || isdigit(val[0])) {
+ /* must be a number */
+ errno = 0;
+ value = strtoll(val, &val_end, 0);
+ if (errno == ERANGE) {
+ errno = 0;
+ value = strtoull(val, &val_end, 0);
+ }
+ if (errno || *val_end != '\0') {
+ fprintf(stderr, "Failed to parse value '%s'\n", val);
+ return -EINVAL;
+ }
+ rvalue->ivalue = value;
+ rvalue->type = INTEGRAL;
+ } else {
+ /* if not a number, consider it enum value */
+ rvalue->svalue = strdup(val);
+ if (!rvalue->svalue)
+ return -ENOMEM;
+ rvalue->type = ENUMERATOR;
+ }
+ return 0;
+}
+
+static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name)
+{
+ char command[64], buf[4096];
+ FILE *fp;
+ int status;
+
+ status = system("command -v bpftool > /dev/null 2>&1");
+ if (status != 0) {
+ fprintf(stderr, "bpftool is not available, can't print program dump\n");
+ return;
+ }
+ snprintf(command, sizeof(command), "bpftool prog dump %s id %u",
+ mode == DUMP_JITED ? "jited" : "xlated", prog_id);
+ fp = popen(command, "r");
+ if (!fp) {
+ fprintf(stderr, "bpftool failed with error: %d\n", errno);
+ return;
+ }
+
+ printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name);
+ while (fgets(buf, sizeof(buf), fp))
+ fputs(buf, stdout);
+ fprintf(stdout, "\n");
+
+ if (ferror(fp))
+ fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno);
+
+ pclose(fp);
+}
+
+static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
+{
+ const char *base_filename = basename(strdupa(filename));
+ const char *prog_name = bpf_program__name(prog);
+ long mem_peak_a, mem_peak_b, mem_peak = -1;
+ char *buf;
+ int buf_sz, log_level;
+ struct verif_stats *stats;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ int err = 0, cgroup_err;
+ void *tmp;
+ int fd;
+
+ if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
+ env.progs_skipped++;
+ return 0;
+ }
+
+ tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
+ if (!tmp)
+ return -ENOMEM;
+ env.prog_stats = tmp;
+ stats = &env.prog_stats[env.prog_stat_cnt++];
+ memset(stats, 0, sizeof(*stats));
+
+ if (env.verbose || env.top_src_lines > 0) {
+ buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
+ buf = malloc(buf_sz);
+ if (!buf)
+ return -ENOMEM;
+ /* ensure we always request stats */
+ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
+ /* --top-src-lines needs verifier log */
+ if (env.top_src_lines > 0 && env.log_level == 0)
+ log_level |= 2;
+ } else {
+ buf = verif_log_buf;
+ buf_sz = sizeof(verif_log_buf);
+ /* request only verifier stats */
+ log_level = 4 | (env.log_fixed ? 8 : 0);
+ }
+ verif_log_buf[0] = '\0';
+
+ bpf_program__set_log_buf(prog, buf, buf_sz);
+ bpf_program__set_log_level(prog, log_level);
+
+ /* increase chances of successful BPF object loading */
+ fixup_obj(obj, prog, base_filename);
+
+ if (env.force_checkpoints)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+ if (env.force_reg_invariants)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
+
+ err = bpf_object__prepare(obj);
+ if (!err) {
+ cgroup_err = reset_stat_cgroup();
+ mem_peak_a = cgroup_memory_peak();
+ err = bpf_object__load(obj);
+ mem_peak_b = cgroup_memory_peak();
+ if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+ mem_peak = mem_peak_b - mem_peak_a;
+ }
+ env.progs_processed++;
+
+ stats->file_name = strdup(base_filename);
+ stats->prog_name = strdup(bpf_program__name(prog));
+ stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
+ stats->stats[SIZE] = bpf_program__insn_cnt(prog);
+ stats->stats[PROG_TYPE] = bpf_program__type(prog);
+ stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+ stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
+
+ memset(&info, 0, info_len);
+ fd = bpf_program__fd(prog);
+ if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
+ stats->stats[JITED_SIZE] = info.jited_prog_len;
+ if (env.dump_mode & DUMP_JITED)
+ dump(info.id, DUMP_JITED, base_filename, prog_name);
+ if (env.dump_mode & DUMP_XLATED)
+ dump(info.id, DUMP_XLATED, base_filename, prog_name);
+ }
+
+ parse_verif_log(buf, buf_sz, stats);
+
+ if (env.verbose) {
+ printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
+ filename, prog_name, stats->stats[DURATION],
+ err ? "failure" : "success", buf);
+ }
+ if (env.top_src_lines > 0)
+ print_top_src_lines(buf, buf_sz, stats->prog_name);
+
+ if (verif_log_buf != buf)
+ free(buf);
+
+ return 0;
+}
+
+static int append_preset_atom(struct var_preset *preset, char *value, bool is_index)
+{
+ struct field_access *tmp;
+ int i = preset->atom_count;
+ int err;
+
+ tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms));
+ if (!tmp)
+ return -ENOMEM;
+
+ preset->atoms = tmp;
+ preset->atom_count++;
+
+ if (is_index) {
+ preset->atoms[i].type = ARRAY_INDEX;
+ err = parse_rvalue(value, &preset->atoms[i].index);
+ if (err)
+ return err;
+ } else {
+ preset->atoms[i].type = FIELD_NAME;
+ preset->atoms[i].name = strdup(value);
+ if (!preset->atoms[i].name)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int parse_var_atoms(const char *full_var, struct var_preset *preset)
+{
+ char expr[256], var[256], *name, *saveptr;
+ int n, len, off, err;
+
+ snprintf(expr, sizeof(expr), "%s", full_var);
+ preset->atom_count = 0;
+ while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) {
+ len = strlen(name);
+ /* parse variable name */
+ if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) {
+ fprintf(stderr, "Can't parse %s\n", name);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, false);
+ if (err)
+ return err;
+
+ /* parse optional array indexes */
+ while (off < len) {
+ if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) {
+ fprintf(stderr, "Can't parse %s as index\n", name + off);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, true);
+ if (err)
+ return err;
+ off += n;
+ }
+ }
+ return 0;
+}
+
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
+{
+ void *tmp;
+ struct var_preset *cur;
+ char var[256], val[256];
+ int n, err;
+
+ tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
+ if (!tmp)
+ return -ENOMEM;
+ *presets = tmp;
+ cur = &(*presets)[*cnt];
+ memset(cur, 0, sizeof(*cur));
+ (*cnt)++;
+
+ if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+ fprintf(stderr, "Failed to parse expression '%s'\n", expr);
+ return -EINVAL;
+ }
+ /* Remove trailing spaces from var, as scanf may add those */
+ rtrim(var);
+
+ err = parse_rvalue(val, &cur->value);
+ if (err)
+ return err;
+
+ cur->full_name = strdup(var);
+ if (!cur->full_name)
+ return -ENOMEM;
+
+ err = parse_var_atoms(var, cur);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int append_var_preset_file(const char *filename)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(filename, "rt");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err));
+ return -EINVAL;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+
+ err = append_var_preset(&env.presets, &env.npresets, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static bool is_signed_type(const struct btf_type *t)
+{
+ if (btf_is_int(t))
+ return btf_int_encoding(t) & BTF_INT_SIGNED;
+ if (btf_is_any_enum(t))
+ return btf_kflag(t);
+ return true;
+}
+
+static int enum_value_from_name(const struct btf *btf, const struct btf_type *t,
+ const char *evalue, long long *retval)
+{
+ if (btf_is_enum(t)) {
+ struct btf_enum *e = btf_enum(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = e->val;
+ return 0;
+ }
+ }
+ } else if (btf_is_enum64(t)) {
+ struct btf_enum64 *e = btf_enum64(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+ __u64 value = btf_enum64_value(e);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = value;
+ return 0;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+static bool is_preset_supported(const struct btf_type *t)
+{
+ return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static int find_enum_value(const struct btf *btf, const char *name, long long *value)
+{
+ const struct btf_type *t;
+ int cnt, i;
+ long long lvalue;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_any_enum(t))
+ continue;
+
+ if (enum_value_from_name(btf, t, name, &lvalue) == 0) {
+ *value = lvalue;
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result)
+{
+ int err = 0;
+
+ switch (rvalue->type) {
+ case INTEGRAL:
+ *result = rvalue->ivalue;
+ return 0;
+ case ENUMERATOR:
+ err = find_enum_value(btf, rvalue->svalue, result);
+ if (err) {
+ fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue);
+ return err;
+ }
+ return 0;
+ default:
+ fprintf(stderr, "Unknown rvalue type\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom,
+ const char *array_name, struct btf_var_secinfo *sinfo)
+{
+ const struct btf_type *t;
+ struct btf_array *barr;
+ long long idx;
+ int err;
+
+ tid = btf__resolve_type(btf, tid);
+ t = btf__type_by_id(btf, tid);
+ if (!btf_is_array(t)) {
+ fprintf(stderr, "Array index is not expected for %s\n",
+ array_name);
+ return -EINVAL;
+ }
+ barr = btf_array(t);
+ err = resolve_rvalue(btf, &atom->index, &idx);
+ if (err)
+ return err;
+ if (idx < 0 || idx >= barr->nelems) {
+ fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n",
+ idx, barr->nelems, array_name);
+ return -EINVAL;
+ }
+ sinfo->size = btf__resolve_size(btf, barr->type);
+ sinfo->offset += sinfo->size * idx;
+ sinfo->type = btf__resolve_type(btf, barr->type);
+ return 0;
+}
+
+static int adjust_var_secinfo_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ struct btf_var_secinfo *sinfo)
+{
+ int i;
+
+ if (!btf_is_composite(parent_type)) {
+ fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < btf_vlen(parent_type); ++i) {
+ const struct btf_member *member;
+ const struct btf_type *member_type;
+ int tid, off;
+
+ member = btf_members(parent_type) + i;
+ tid = btf__resolve_type(btf, member->type);
+ if (tid < 0)
+ return -EINVAL;
+
+ member_type = btf__type_by_id(btf, tid);
+ off = parent_offset + member->offset;
+ if (member->name_off) {
+ const char *name = btf__name_by_offset(btf, member->name_off);
+
+ if (strcmp(member_name, name) == 0) {
+ if (btf_member_bitfield_size(parent_type, i) != 0) {
+ fprintf(stderr, "Bitfield presets are not supported %s\n",
+ name);
+ return -EINVAL;
+ }
+ sinfo->offset += off / 8;
+ sinfo->type = tid;
+ sinfo->size = member_type->size;
+ return 0;
+ }
+ } else if (btf_is_composite(member_type)) {
+ int err;
+
+ err = adjust_var_secinfo_member(btf, member_type, off,
+ member_name, sinfo);
+ if (!err)
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+ struct btf_var_secinfo *sinfo, struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ const char *prev_name;
+ int err, i;
+ int tid;
+
+ assert(preset->atom_count > 0);
+ assert(preset->atoms[0].type == FIELD_NAME);
+
+ tid = btf__resolve_type(btf, t->type);
+ base_type = btf__type_by_id(btf, tid);
+ prev_name = preset->atoms[0].name;
+
+ for (i = 1; i < preset->atom_count; ++i) {
+ struct field_access *atom = preset->atoms + i;
+
+ switch (atom->type) {
+ case ARRAY_INDEX:
+ err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo);
+ break;
+ case FIELD_NAME:
+ err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo);
+ if (err == -ESRCH)
+ fprintf(stderr, "Can't find '%s'\n", atom->name);
+ prev_name = atom->name;
+ break;
+ default:
+ fprintf(stderr, "Unknown field_access type\n");
+ return -EOPNOTSUPP;
+ }
+ if (err)
+ return err;
+ base_type = btf__type_by_id(btf, sinfo->type);
+ tid = sinfo->type;
+ }
+
+ return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
+ struct bpf_map *map, struct btf_var_secinfo *sinfo,
+ struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ void *ptr;
+ long long value = preset->value.ivalue;
+ size_t size;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
+ if (!base_type) {
+ fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
+ return -EINVAL;
+ }
+ if (!is_preset_supported(base_type)) {
+ fprintf(stderr, "Can't set %s. Only ints and enums are supported\n",
+ preset->full_name);
+ return -EINVAL;
+ }
+
+ if (preset->value.type == ENUMERATOR) {
+ if (btf_is_any_enum(base_type)) {
+ if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) {
+ fprintf(stderr,
+ "Failed to find integer value for enum element %s\n",
+ preset->value.svalue);
+ return -EINVAL;
+ }
+ } else {
+ fprintf(stderr, "Value %s is not supported for type %s\n",
+ preset->value.svalue,
+ btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+ }
+
+ /* Check if value fits into the target variable size */
+ if (sinfo->size < sizeof(value)) {
+ bool is_signed = is_signed_type(base_type);
+ __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0);
+ long long max_val = 1ll << unsigned_bits;
+
+ if (value >= max_val || value < -max_val) {
+ fprintf(stderr,
+ "Variable %s value %lld is out of range [%lld; %lld]\n",
+ btf__name_by_offset(btf, base_type->name_off), value,
+ is_signed ? -max_val : 0, max_val - 1);
+ return -EINVAL;
+ }
+ }
+
+ ptr = bpf_map__initial_value(map, &size);
+ if (!ptr || sinfo->offset + sinfo->size > size)
+ return -EINVAL;
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN) {
+ memcpy(ptr + sinfo->offset, &value, sinfo->size);
+ } else { /* __BYTE_ORDER == __BIG_ENDIAN */
+ __u8 src_offset = sizeof(value) - sinfo->size;
+
+ memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size);
+ }
+ return 0;
+}
+
+static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets)
+{
+ struct btf_var_secinfo *sinfo;
+ const char *sec_name;
+ const struct btf_type *t;
+ struct bpf_map *map;
+ struct btf *btf;
+ int i, j, k, n, cnt, err = 0;
+
+ if (npresets == 0)
+ return 0;
+
+ btf = bpf_object__btf(obj);
+ if (!btf)
+ return -EINVAL;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ sinfo = btf_var_secinfos(t);
+ sec_name = btf__name_by_offset(btf, t->name_off);
+ map = bpf_object__find_map_by_name(obj, sec_name);
+ if (!map)
+ continue;
+
+ n = btf_vlen(t);
+ for (j = 0; j < n; ++j, ++sinfo) {
+ const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
+ const char *var_name;
+
+ if (!btf_is_var(var_type))
+ continue;
+
+ var_name = btf__name_by_offset(btf, var_type->name_off);
+
+ for (k = 0; k < npresets; ++k) {
+ struct btf_var_secinfo tmp_sinfo;
+
+ if (strcmp(var_name, presets[k].atoms[0].name) != 0)
+ continue;
+
+ if (presets[k].applied) {
+ fprintf(stderr, "Variable %s is set more than once",
+ var_name);
+ return -EINVAL;
+ }
+ tmp_sinfo = *sinfo;
+ err = adjust_var_secinfo(btf, var_type,
+ &tmp_sinfo, presets + k);
+ if (err)
+ return err;
+
+ err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
+ if (err)
+ return err;
+
+ presets[k].applied = true;
+ }
+ }
+ }
+ for (i = 0; i < npresets; ++i) {
+ if (!presets[i].applied) {
+ fprintf(stderr, "Global variable preset %s has not been applied\n",
+ presets[i].full_name);
+ err = -EINVAL;
+ }
+ presets[i].applied = false;
+ }
+ return err;
+}
+
+static int process_obj(const char *filename)
+{
+ const char *base_filename = basename(strdupa(filename));
+ struct bpf_object *obj = NULL, *tobj;
+ struct bpf_program *prog, *tprog, *lprog;
+ libbpf_print_fn_t old_libbpf_print_fn;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err = 0, prog_cnt = 0;
+
+ if (!should_process_file_prog(base_filename, NULL)) {
+ if (env.verbose)
+ printf("Skipping '%s' due to filters...\n", filename);
+ env.files_skipped++;
+ return 0;
+ }
+ if (!is_bpf_obj_file(filename)) {
+ if (env.verbose)
+ printf("Skipping '%s' as it's not a BPF object file...\n", filename);
+ env.files_skipped++;
+ return 0;
+ }
+
+ if (!env.quiet && env.out_fmt == RESFMT_TABLE)
+ printf("Processing '%s'...\n", base_filename);
+
+ old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
+ obj = bpf_object__open_file(filename, &opts);
+ if (!obj) {
+ /* if libbpf can't open BPF object file, it could be because
+ * that BPF object file is incomplete and has to be statically
+ * linked into a final BPF object file; instead of bailing
+ * out, report it into stderr, mark it as skipped, and
+ * proceed
+ */
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
+ env.files_skipped++;
+ err = 0;
+ goto cleanup;
+ }
+
+ env.files_processed++;
+
+ bpf_object__for_each_program(prog, obj) {
+ prog_cnt++;
+ }
+
+ if (prog_cnt == 1) {
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_autoload(prog, true);
+ err = set_global_vars(obj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
+ process_prog(filename, obj, prog);
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *prog_name = bpf_program__name(prog);
+
+ tobj = bpf_object__open_file(filename, &opts);
+ if (!tobj) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+ goto cleanup;
+ }
+
+ err = set_global_vars(tobj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
+
+ lprog = NULL;
+ bpf_object__for_each_program(tprog, tobj) {
+ const char *tprog_name = bpf_program__name(tprog);
+
+ if (strcmp(prog_name, tprog_name) == 0) {
+ bpf_program__set_autoload(tprog, true);
+ lprog = tprog;
+ } else {
+ bpf_program__set_autoload(tprog, false);
+ }
+ }
+
+ process_prog(filename, tobj, lprog);
+ bpf_object__close(tobj);
+ }
+
+cleanup:
+ bpf_object__close(obj);
+ libbpf_set_print(old_libbpf_print_fn);
+ return err;
+}
+
+static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
+ enum stat_id id, bool asc, bool abs)
+{
+ int cmp = 0;
+
+ switch (id) {
+ case FILE_NAME:
+ cmp = strcmp(s1->file_name, s2->file_name);
+ break;
+ case PROG_NAME:
+ cmp = strcmp(s1->prog_name, s2->prog_name);
+ break;
+ case ATTACH_TYPE:
+ case PROG_TYPE:
+ case SIZE:
+ case JITED_SIZE:
+ case STACK:
+ case VERDICT:
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MEMORY_PEAK:
+ case MARK_READ_MAX_LEN: {
+ long v1 = s1->stats[id];
+ long v2 = s2->stats[id];
+
+ if (abs) {
+ v1 = v1 < 0 ? -v1 : v1;
+ v2 = v2 < 0 ? -v2 : v2;
+ }
+
+ if (v1 != v2)
+ cmp = v1 < v2 ? -1 : 1;
+ break;
+ }
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ exit(1);
+ }
+
+ return asc ? cmp : -cmp;
+}
+
+static int cmp_prog_stats(const void *v1, const void *v2)
+{
+ const struct verif_stats *s1 = v1, *s2 = v2;
+ int i, cmp;
+
+ for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+ cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+ env.sort_spec.asc[i], env.sort_spec.abs[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
+}
+
+static void fetch_join_stat_value(const struct verif_stats_join *s,
+ enum stat_id id, enum stat_variant var,
+ const char **str_val,
+ double *num_val)
+{
+ long v1, v2;
+
+ if (id == FILE_NAME) {
+ *str_val = s->file_name;
+ return;
+ }
+ if (id == PROG_NAME) {
+ *str_val = s->prog_name;
+ return;
+ }
+
+ v1 = s->stats_a ? s->stats_a->stats[id] : 0;
+ v2 = s->stats_b ? s->stats_b->stats[id] : 0;
+
+ switch (var) {
+ case VARIANT_A:
+ if (!s->stats_a)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_a->stats[id];
+ return;
+ case VARIANT_B:
+ if (!s->stats_b)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_b->stats[id];
+ return;
+ case VARIANT_DIFF:
+ if (!s->stats_a || !s->stats_b)
+ *num_val = -DBL_MAX;
+ else if (id == VERDICT)
+ *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
+ else
+ *num_val = (double)(v2 - v1);
+ return;
+ case VARIANT_PCT:
+ if (!s->stats_a || !s->stats_b) {
+ *num_val = -DBL_MAX;
+ } else if (v1 == 0) {
+ if (v1 == v2)
+ *num_val = 0.0;
+ else
+ *num_val = v2 < v1 ? -100.0 : 100.0;
+ } else {
+ *num_val = (v2 - v1) * 100.0 / v1;
+ }
+ return;
+ }
+}
+
+static int cmp_join_stat(const struct verif_stats_join *s1,
+ const struct verif_stats_join *s2,
+ enum stat_id id, enum stat_variant var,
+ bool asc, bool abs)
+{
+ const char *str1 = NULL, *str2 = NULL;
+ double v1 = 0.0, v2 = 0.0;
+ int cmp = 0;
+
+ fetch_join_stat_value(s1, id, var, &str1, &v1);
+ fetch_join_stat_value(s2, id, var, &str2, &v2);
+
+ if (abs) {
+ v1 = fabs(v1);
+ v2 = fabs(v2);
+ }
+
+ if (str1)
+ cmp = strcmp(str1, str2);
+ else if (v1 != v2)
+ cmp = v1 < v2 ? -1 : 1;
+
+ return asc ? cmp : -cmp;
+}
+
+static int cmp_join_stats(const void *v1, const void *v2)
+{
+ const struct verif_stats_join *s1 = v1, *s2 = v2;
+ int i, cmp;
+
+ for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+ cmp = cmp_join_stat(s1, s2,
+ env.sort_spec.ids[i],
+ env.sort_spec.variants[i],
+ env.sort_spec.asc[i],
+ env.sort_spec.abs[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
+}
+
+#define HEADER_CHAR '-'
+#define COLUMN_SEP " "
+
+static void output_header_underlines(void)
+{
+ int i, j, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ len = env.output_spec.lens[i];
+
+ printf("%s", i == 0 ? "" : COLUMN_SEP);
+ for (j = 0; j < len; j++)
+ printf("%c", HEADER_CHAR);
+ }
+ printf("\n");
+}
+
+static void output_headers(enum resfmt fmt)
+{
+ const char *fmt_str;
+ int i, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int *max_len = &env.output_spec.lens[i];
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ len = snprintf(NULL, 0, "%s", stat_defs[id].header);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
+ printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (fmt == RESFMT_TABLE)
+ output_header_underlines();
+}
+
+static void prepare_value(const struct verif_stats *s, enum stat_id id,
+ const char **str, long *val)
+{
+ switch (id) {
+ case FILE_NAME:
+ *str = s ? s->file_name : "N/A";
+ break;
+ case PROG_NAME:
+ *str = s ? s->prog_name : "N/A";
+ break;
+ case VERDICT:
+ if (!s)
+ *str = "N/A";
+ else
+ *str = s->stats[VERDICT] ? "success" : "failure";
+ break;
+ case ATTACH_TYPE:
+ if (!s)
+ *str = "N/A";
+ else
+ *str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
+ break;
+ case PROG_TYPE:
+ if (!s)
+ *str = "N/A";
+ else
+ *str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
+ break;
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MARK_READ_MAX_LEN:
+ case STACK:
+ case SIZE:
+ case JITED_SIZE:
+ case MEMORY_PEAK:
+ *val = s ? s->stats[id] : 0;
+ break;
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ exit(1);
+ }
+}
+
+static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int *max_len = &env.output_spec.lens[i], len;
+ const char *str = NULL;
+ long val = 0;
+
+ prepare_value(s, id, &str, &val);
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ if (str)
+ len = snprintf(NULL, 0, "%s", str);
+ else
+ len = snprintf(NULL, 0, "%ld", val);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ if (str)
+ printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
+ else
+ printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ if (str)
+ printf("%s%s", i == 0 ? "" : ",", str);
+ else
+ printf("%s%ld", i == 0 ? "" : ",", val);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (last && fmt == RESFMT_TABLE) {
+ output_header_underlines();
+ printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
+ env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
+ }
+}
+
+static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
+{
+ switch (id) {
+ case FILE_NAME:
+ st->file_name = strdup(str);
+ if (!st->file_name)
+ return -ENOMEM;
+ break;
+ case PROG_NAME:
+ st->prog_name = strdup(str);
+ if (!st->prog_name)
+ return -ENOMEM;
+ break;
+ case VERDICT:
+ if (strcmp(str, "success") == 0) {
+ st->stats[VERDICT] = true;
+ } else if (strcmp(str, "failure") == 0) {
+ st->stats[VERDICT] = false;
+ } else {
+ fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
+ return -EINVAL;
+ }
+ break;
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MARK_READ_MAX_LEN:
+ case SIZE:
+ case JITED_SIZE:
+ case MEMORY_PEAK:
+ case STACK: {
+ long val;
+ int err, n;
+
+ if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
+ err = -errno;
+ fprintf(stderr, "Failed to parse '%s' as integer\n", str);
+ return err;
+ }
+
+ st->stats[id] = val;
+ break;
+ }
+ case PROG_TYPE: {
+ enum bpf_prog_type prog_type = 0;
+ const char *type;
+
+ while ((type = libbpf_bpf_prog_type_str(prog_type))) {
+ if (strcmp(type, str) == 0) {
+ st->stats[id] = prog_type;
+ break;
+ }
+ prog_type++;
+ }
+
+ if (!type) {
+ fprintf(stderr, "Unrecognized prog type %s\n", str);
+ return -EINVAL;
+ }
+ break;
+ }
+ case ATTACH_TYPE: {
+ enum bpf_attach_type attach_type = 0;
+ const char *type;
+
+ while ((type = libbpf_bpf_attach_type_str(attach_type))) {
+ if (strcmp(type, str) == 0) {
+ st->stats[id] = attach_type;
+ break;
+ }
+ attach_type++;
+ }
+
+ if (!type) {
+ fprintf(stderr, "Unrecognized attach type %s\n", str);
+ return -EINVAL;
+ }
+ break;
+ }
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_stats_csv(const char *filename, struct stat_specs *specs,
+ struct verif_stats **statsp, int *stat_cntp)
+{
+ char line[4096];
+ FILE *f;
+ int err = 0;
+ bool header = true;
+
+ f = fopen(filename, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+ return err;
+ }
+
+ *stat_cntp = 0;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *input = line, *state = NULL, *next;
+ struct verif_stats *st = NULL;
+ int col = 0, cnt = 0;
+
+ if (!header) {
+ void *tmp;
+
+ tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ *statsp = tmp;
+
+ st = &(*statsp)[*stat_cntp];
+ memset(st, 0, sizeof(*st));
+
+ *stat_cntp += 1;
+ }
+
+ while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
+ if (header) {
+ /* for the first line, set up spec stats */
+ err = parse_stat(next, specs);
+ if (err)
+ goto cleanup;
+ continue;
+ }
+
+ /* for all other lines, parse values based on spec */
+ if (col >= specs->spec_cnt) {
+ fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
+ col, *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = parse_stat_value(next, specs->ids[col], st);
+ if (err)
+ goto cleanup;
+ col++;
+ }
+
+ if (header) {
+ header = false;
+ continue;
+ }
+
+ if (col < specs->spec_cnt) {
+ fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
+ *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!st->file_name || !st->prog_name) {
+ fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
+ *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* in comparison mode we can only check filters after we
+ * parsed entire line; if row should be ignored we pretend we
+ * never parsed it
+ */
+ if (!should_process_file_prog(st->file_name, st->prog_name)) {
+ free(st->file_name);
+ free(st->prog_name);
+ *stat_cntp -= 1;
+ }
+ }
+
+ if (!feof(f)) {
+ err = -errno;
+ fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+/* empty/zero stats for mismatched rows */
+static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
+
+static bool is_key_stat(enum stat_id id)
+{
+ return id == FILE_NAME || id == PROG_NAME;
+}
+
+static void output_comp_header_underlines(void)
+{
+ int i, j, k;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int max_j = is_key_stat(id) ? 1 : 3;
+
+ for (j = 0; j < max_j; j++) {
+ int len = env.output_spec.lens[3 * i + j];
+
+ printf("%s", i + j == 0 ? "" : COLUMN_SEP);
+
+ for (k = 0; k < len; k++)
+ printf("%c", HEADER_CHAR);
+ }
+ }
+ printf("\n");
+}
+
+static void output_comp_headers(enum resfmt fmt)
+{
+ static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
+ static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
+ int i, j, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ /* key stats don't have A/B/DIFF columns, they are common for both data sets */
+ int max_j = is_key_stat(id) ? 1 : 3;
+
+ for (j = 0; j < max_j; j++) {
+ int *max_len = &env.output_spec.lens[3 * i + j];
+ bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
+ const char *sfx;
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ sfx = is_key_stat(id) ? "" : table_sfxs[j];
+ len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ sfx = is_key_stat(id) ? "" : table_sfxs[j];
+ printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
+ *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
+ if (last)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ sfx = is_key_stat(id) ? "" : name_sfxs[j];
+ printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
+ if (last)
+ printf("\n");
+ break;
+ }
+ }
+ }
+
+ if (fmt == RESFMT_TABLE)
+ output_comp_header_underlines();
+}
+
+static void output_comp_stats(const struct verif_stats_join *join_stats,
+ enum resfmt fmt, bool last)
+{
+ const struct verif_stats *base = join_stats->stats_a;
+ const struct verif_stats *comp = join_stats->stats_b;
+ char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i], len;
+ int *max_len_base = &env.output_spec.lens[3 * i + 0];
+ int *max_len_comp = &env.output_spec.lens[3 * i + 1];
+ int *max_len_diff = &env.output_spec.lens[3 * i + 2];
+ const char *base_str = NULL, *comp_str = NULL;
+ long base_val = 0, comp_val = 0, diff_val = 0;
+
+ prepare_value(base, id, &base_str, &base_val);
+ prepare_value(comp, id, &comp_str, &comp_val);
+
+ /* normalize all the outputs to be in string buffers for simplicity */
+ if (is_key_stat(id)) {
+ /* key stats (file and program name) are always strings */
+ if (base)
+ snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+ else
+ snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
+ } else if (base_str) {
+ snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+ snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
+ if (!base || !comp)
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+ else if (strcmp(base_str, comp_str) == 0)
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
+ else
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
+ } else {
+ double p = 0.0;
+
+ if (base)
+ snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
+ else
+ snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
+ if (comp)
+ snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+ else
+ snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
+
+ diff_val = comp_val - base_val;
+ if (!base || !comp) {
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+ } else {
+ if (base_val == 0) {
+ if (comp_val == base_val)
+ p = 0.0; /* avoid +0 (+100%) case */
+ else
+ p = comp_val < base_val ? -100.0 : 100.0;
+ } else {
+ p = diff_val * 100.0 / base_val;
+ }
+ snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
+ }
+ }
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ len = strlen(base_buf);
+ if (len > *max_len_base)
+ *max_len_base = len;
+ if (!is_key_stat(id)) {
+ len = strlen(comp_buf);
+ if (len > *max_len_comp)
+ *max_len_comp = len;
+ len = strlen(diff_buf);
+ if (len > *max_len_diff)
+ *max_len_diff = len;
+ }
+ break;
+ case RESFMT_TABLE: {
+ /* string outputs are left-aligned, number outputs are right-aligned */
+ const char *fmt = base_str ? "%s%-*s" : "%s%*s";
+
+ printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
+ if (!is_key_stat(id)) {
+ printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
+ printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
+ }
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ case RESFMT_CSV:
+ printf("%s%s", i == 0 ? "" : ",", base_buf);
+ if (!is_key_stat(id)) {
+ printf("%s%s", i == 0 ? "" : ",", comp_buf);
+ printf("%s%s", i == 0 ? "" : ",", diff_buf);
+ }
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (last && fmt == RESFMT_TABLE)
+ output_comp_header_underlines();
+}
+
+static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
+{
+ int r;
+
+ r = strcmp(base->file_name, comp->file_name);
+ if (r != 0)
+ return r;
+ return strcmp(base->prog_name, comp->prog_name);
+}
+
+static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
+{
+ static const double eps = 1e-9;
+ const char *str = NULL;
+ double value = 0.0;
+
+ fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+
+ if (f->abs)
+ value = fabs(value);
+
+ switch (f->op) {
+ case OP_EQ: return value > f->value - eps && value < f->value + eps;
+ case OP_NEQ: return value < f->value - eps || value > f->value + eps;
+ case OP_LT: return value < f->value - eps;
+ case OP_LE: return value <= f->value + eps;
+ case OP_GT: return value > f->value + eps;
+ case OP_GE: return value >= f->value - eps;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_join_stats(const struct verif_stats_join *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
+static int handle_comparison_mode(void)
+{
+ struct stat_specs base_specs = {}, comp_specs = {};
+ struct stat_specs tmp_sort_spec;
+ enum resfmt cur_fmt;
+ int err, i, j, last_idx, cnt;
+
+ if (env.filename_cnt != 2) {
+ fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ err = parse_stats_csv(env.filenames[0], &base_specs,
+ &env.baseline_stats, &env.baseline_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+ return err;
+ }
+ err = parse_stats_csv(env.filenames[1], &comp_specs,
+ &env.prog_stats, &env.prog_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
+ return err;
+ }
+
+ /* To keep it simple we validate that the set and order of stats in
+ * both CSVs are exactly the same. This can be lifted with a bit more
+ * pre-processing later.
+ */
+ if (base_specs.spec_cnt != comp_specs.spec_cnt) {
+ fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
+ env.filenames[0], env.filenames[1],
+ base_specs.spec_cnt, comp_specs.spec_cnt);
+ return -EINVAL;
+ }
+ for (i = 0; i < base_specs.spec_cnt; i++) {
+ if (base_specs.ids[i] != comp_specs.ids[i]) {
+ fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
+ env.filenames[0], env.filenames[1],
+ stat_defs[base_specs.ids[i]].names[0],
+ stat_defs[comp_specs.ids[i]].names[0]);
+ return -EINVAL;
+ }
+ }
+
+ /* Replace user-specified sorting spec with file+prog sorting rule to
+ * be able to join two datasets correctly. Once we are done, we will
+ * restore the original sort spec.
+ */
+ tmp_sort_spec = env.sort_spec;
+ env.sort_spec = join_sort_spec;
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+ qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
+ env.sort_spec = tmp_sort_spec;
+
+ /* Join two datasets together. If baseline and comparison datasets
+ * have different subset of rows (we match by 'object + prog' as
+ * a unique key) then assume empty/missing/zero value for rows that
+ * are missing in the opposite data set.
+ */
+ i = j = 0;
+ while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
+ const struct verif_stats *base, *comp;
+ struct verif_stats_join *join;
+ void *tmp;
+ int r;
+
+ base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
+ comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
+
+ if (!base->file_name || !base->prog_name) {
+ fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+ i, env.filenames[0]);
+ return -EINVAL;
+ }
+ if (!comp->file_name || !comp->prog_name) {
+ fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+ j, env.filenames[1]);
+ return -EINVAL;
+ }
+
+ tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
+ if (!tmp)
+ return -ENOMEM;
+ env.join_stats = tmp;
+
+ join = &env.join_stats[env.join_stat_cnt];
+ memset(join, 0, sizeof(*join));
+
+ r = cmp_stats_key(base, comp);
+ if (r == 0) {
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = comp;
+ i++;
+ j++;
+ } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = NULL;
+ i++;
+ } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
+ join->file_name = comp->file_name;
+ join->prog_name = comp->prog_name;
+ join->stats_a = NULL;
+ join->stats_b = comp;
+ j++;
+ } else {
+ fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
+ __FILE__, __LINE__, i, j);
+ return -EINVAL;
+ }
+ env.join_stat_cnt += 1;
+ }
+
+ /* now sort joined results according to sort spec */
+ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
+
+ /* for human-readable table output we need to do extra pass to
+ * calculate column widths, so we substitute current output format
+ * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
+ * and do everything again.
+ */
+ if (env.out_fmt == RESFMT_TABLE)
+ cur_fmt = RESFMT_TABLE_CALCLEN;
+ else
+ cur_fmt = env.out_fmt;
+
+one_more_time:
+ output_comp_headers(cur_fmt);
+
+ last_idx = -1;
+ cnt = 0;
+ for (i = 0; i < env.join_stat_cnt; i++) {
+ const struct verif_stats_join *join = &env.join_stats[i];
+
+ if (!should_output_join_stats(join))
+ continue;
+
+ if (env.top_n && cnt >= env.top_n)
+ break;
+
+ if (cur_fmt == RESFMT_TABLE_CALCLEN)
+ last_idx = i;
+
+ output_comp_stats(join, cur_fmt, i == last_idx);
+
+ cnt++;
+ }
+
+ if (cur_fmt == RESFMT_TABLE_CALCLEN) {
+ cur_fmt = RESFMT_TABLE;
+ goto one_more_time; /* ... this time with feeling */
+ }
+
+ return 0;
+}
+
+static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
+{
+ long value = stats->stats[f->stat_id];
+
+ if (f->abs)
+ value = value < 0 ? -value : value;
+
+ switch (f->op) {
+ case OP_EQ: return value == f->value;
+ case OP_NEQ: return value != f->value;
+ case OP_LT: return value < f->value;
+ case OP_LE: return value <= f->value;
+ case OP_GT: return value > f->value;
+ case OP_GE: return value >= f->value;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_stats(const struct verif_stats *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
+static void output_prog_stats(void)
+{
+ const struct verif_stats *stats;
+ int i, last_stat_idx = 0, cnt = 0;
+
+ if (env.out_fmt == RESFMT_TABLE) {
+ /* calculate column widths */
+ output_headers(RESFMT_TABLE_CALCLEN);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ output_stats(stats, RESFMT_TABLE_CALCLEN, false);
+ last_stat_idx = i;
+ }
+ }
+
+ /* actually output the table */
+ output_headers(env.out_fmt);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
+ output_stats(stats, env.out_fmt, i == last_stat_idx);
+ cnt++;
+ }
+}
+
+static int handle_verif_mode(void)
+{
+ int i, err = 0;
+
+ if (env.filename_cnt == 0) {
+ fprintf(stderr, "Please provide path to BPF object file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ create_stat_cgroup();
+ for (i = 0; i < env.filename_cnt; i++) {
+ err = process_obj(env.filenames[i]);
+ if (err) {
+ fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
+ goto out;
+ }
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+out:
+ destroy_stat_cgroup();
+ return err;
+}
+
+static int handle_replay_mode(void)
+{
+ struct stat_specs specs = {};
+ int err;
+
+ if (env.filename_cnt != 1) {
+ fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ err = parse_stats_csv(env.filenames[0], &specs,
+ &env.prog_stats, &env.prog_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+ return err;
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int err = 0, i, j;
+
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ return 1;
+
+ if (env.show_version) {
+ printf("%s\n", argp_program_version);
+ return 0;
+ }
+
+ if (env.verbose && env.quiet) {
+ fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return 1;
+ }
+ if (env.verbose && env.log_level == 0)
+ env.log_level = 1;
+
+ if (env.output_spec.spec_cnt == 0) {
+ if (env.out_fmt == RESFMT_CSV)
+ env.output_spec = default_csv_output_spec;
+ else
+ env.output_spec = default_output_spec;
+ }
+ if (env.sort_spec.spec_cnt == 0)
+ env.sort_spec = default_sort_spec;
+
+ if (env.comparison_mode && env.replay_mode) {
+ fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return 1;
+ }
+
+ if (env.comparison_mode)
+ err = handle_comparison_mode();
+ else if (env.replay_mode)
+ err = handle_replay_mode();
+ else
+ err = handle_verif_mode();
+
+ free_verif_stats(env.prog_stats, env.prog_stat_cnt);
+ free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
+ free(env.join_stats);
+ for (i = 0; i < env.filename_cnt; i++)
+ free(env.filenames[i]);
+ free(env.filenames);
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ free(env.allow_filters[i].any_glob);
+ free(env.allow_filters[i].file_glob);
+ free(env.allow_filters[i].prog_glob);
+ }
+ free(env.allow_filters);
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ free(env.deny_filters[i].any_glob);
+ free(env.deny_filters[i].file_glob);
+ free(env.deny_filters[i].prog_glob);
+ }
+ free(env.deny_filters);
+ for (i = 0; i < env.npresets; ++i) {
+ free(env.presets[i].full_name);
+ for (j = 0; j < env.presets[i].atom_count; ++j) {
+ switch (env.presets[i].atoms[j].type) {
+ case FIELD_NAME:
+ free(env.presets[i].atoms[j].name);
+ break;
+ case ARRAY_INDEX:
+ if (env.presets[i].atoms[j].index.type == ENUMERATOR)
+ free(env.presets[i].atoms[j].index.svalue);
+ break;
+ }
+ }
+ free(env.presets[i].atoms);
+ }
+ free(env.presets);
+ return -err;
+}
diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg
new file mode 100644
index 000000000000..e661ffdcaadf
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.cfg
@@ -0,0 +1,18 @@
+# pre-canned list of rather complex selftests/bpf BPF object files to monitor
+# BPF verifier's performance on
+bpf_flow*
+bpf_loop_bench*
+loop*
+netif_receive_skb*
+profiler*
+pyperf*
+strobemeta*
+test_cls_redirect*
+test_l4lb
+test_sysctl*
+test_tcp_hdr_*
+test_usdt*
+test_verif_scale*
+test_xdp_noinline*
+xdp_synproxy*
+verifier_search_pruning*
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 26ae8d0b6ce3..2f869daf8a06 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -1,35 +1,92 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-set -u
set -e
-# This script currently only works for x86_64, as
-# it is based on the VM image used by the BPF CI which is
-# x86_64.
-QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}"
-X86_BZIMAGE="arch/x86/boot/bzImage"
+# This script currently only works for the following platforms,
+# as it is based on the VM image used by the BPF CI, which is
+# available only for these architectures. We can also specify
+# the local rootfs image generated by the following script:
+# https://github.com/libbpf/ci/blob/main/rootfs/mkrootfs_debian.sh
+PLATFORM="${PLATFORM:-$(uname -m)}"
+case "${PLATFORM}" in
+s390x)
+ QEMU_BINARY=qemu-system-s390x
+ QEMU_CONSOLE="ttyS1"
+ HOST_FLAGS=(-smp 2 -enable-kvm)
+ CROSS_FLAGS=(-smp 2)
+ BZIMAGE="arch/s390/boot/vmlinux"
+ ARCH="s390"
+ ;;
+x86_64)
+ QEMU_BINARY=qemu-system-x86_64
+ QEMU_CONSOLE="ttyS0,115200"
+ HOST_FLAGS=(-cpu host -enable-kvm -smp 8)
+ CROSS_FLAGS=(-smp 8)
+ BZIMAGE="arch/x86/boot/bzImage"
+ ARCH="x86"
+ ;;
+aarch64)
+ QEMU_BINARY=qemu-system-aarch64
+ QEMU_CONSOLE="ttyAMA0,115200"
+ HOST_FLAGS=(-M virt,gic-version=3 -cpu host -enable-kvm -smp 8)
+ CROSS_FLAGS=(-M virt,gic-version=3 -cpu cortex-a76 -smp 8)
+ BZIMAGE="arch/arm64/boot/Image"
+ ARCH="arm64"
+ ;;
+riscv64)
+ # required qemu version v7.2.0+
+ QEMU_BINARY=qemu-system-riscv64
+ QEMU_CONSOLE="ttyS0,115200"
+ HOST_FLAGS=(-M virt -cpu host -enable-kvm -smp 8)
+ CROSS_FLAGS=(-M virt -cpu rv64,sscofpmf=true -smp 8)
+ BZIMAGE="arch/riscv/boot/Image"
+ ARCH="riscv"
+ ;;
+ppc64el)
+ QEMU_BINARY=qemu-system-ppc64
+ QEMU_CONSOLE="hvc0"
+ # KVM could not be tested for powerpc, therefore not enabled for now.
+ HOST_FLAGS=(-machine pseries -cpu POWER9)
+ CROSS_FLAGS=(-machine pseries -cpu POWER9)
+ BZIMAGE="vmlinux"
+ ARCH="powerpc"
+ ;;
+*)
+ echo "Unsupported architecture"
+ exit 1
+ ;;
+esac
DEFAULT_COMMAND="./test_progs"
MOUNT_DIR="mnt"
+LOCAL_ROOTFS_IMAGE=""
ROOTFS_IMAGE="root.img"
OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config"
-KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
-INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+ "tools/testing/selftests/bpf/config.vm"
+ "tools/testing/selftests/bpf/config.${PLATFORM}")
+INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
usage()
{
cat <<EOF
-Usage: $0 [-i] [-d <output_dir>] -- [<command>]
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
<command> is the command you would normally run when you are in
tools/testing/selftests/bpf. e.g:
$0 -- ./test_progs -t test_lsm
-If no command is specified, "${DEFAULT_COMMAND}" will be run by
-default.
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
+
+Using PLATFORM= and CROSS_COMPILE= options will enable cross platform testing:
+
+ PLATFORM=<platform> CROSS_COMPILE=<toolchain> $0 -- ./test_progs -t test_lsm
If you build your kernel using KBUILD_OUTPUT= or O= options, these
can be passed as environment variables to the script:
@@ -42,10 +99,14 @@ or
Options:
+ -l) Specify the path to the local rootfs image.
-i) Update the rootfs image with a newer version.
-d) Update the output directory (default: ${OUTPUT_DIR})
-j) Number of jobs for compilation, similar to -j in make
(default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
EOF
}
@@ -62,24 +123,11 @@ populate_url_map()
fi
}
-download()
-{
- local file="$1"
-
- if [[ ! -v URLS[$file] ]]; then
- echo "$file not found" >&2
- return 1
- fi
-
- echo "Downloading $file..." >&2
- curl -Lsf "${URLS[$file]}" "${@:2}"
-}
-
newest_rootfs_version()
{
{
for file in "${!URLS[@]}"; do
- if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
+ if [[ $file =~ ^"${PLATFORM}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
echo "${BASH_REMATCH[1]}"
fi
done
@@ -88,16 +136,34 @@ newest_rootfs_version()
download_rootfs()
{
- local rootfsversion="$1"
- local dir="$2"
+ populate_url_map
+
+ local rootfsversion="$(newest_rootfs_version)"
+ local file="${PLATFORM}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst"
+
+ if [[ ! -v URLS[$file] ]]; then
+ echo "$file not found" >&2
+ return 1
+ fi
+
+ echo "Downloading $file..." >&2
+ curl -Lsf "${URLS[$file]}" "${@:2}"
+}
+
+load_rootfs()
+{
+ local dir="$1"
if ! which zstd &> /dev/null; then
echo 'Could not find "zstd" on the system, please install zstd'
exit 1
fi
- download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
- zstd -d | sudo tar -C "$dir" -x
+ if [[ -n "${LOCAL_ROOTFS_IMAGE}" ]]; then
+ cat "${LOCAL_ROOTFS_IMAGE}" | zstd -d | sudo tar -C "$dir" -x
+ else
+ download_rootfs | zstd -d | sudo tar -C "$dir" -x
+ fi
}
recompile_kernel()
@@ -146,7 +212,7 @@ update_init_script()
local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
local init_script="${init_script_dir}/S50-startup"
local command="$1"
- local log_file="$2"
+ local exit_command="$2"
mount_image
@@ -160,17 +226,26 @@ EOF
fi
- sudo bash -c "cat >${init_script}" <<EOF
-#!/bin/bash
+ sudo bash -c "echo '#!/bin/bash' > ${init_script}"
+
+ if [[ "${command}" != "" ]]; then
+ sudo bash -c "cat >>${init_script}" <<EOF
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
{
cd /root/bpf
echo ${command}
stdbuf -oL -eL ${command}
-} 2>&1 | tee /root/${log_file}
-poweroff -f
+ echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
+# Ensure that the logs are written to disk
+sync
EOF
+ fi
+ sudo bash -c "echo ${exit_command} >> ${init_script}"
sudo chmod a+x "${init_script}"
unmount_image
}
@@ -188,7 +263,7 @@ create_vm_image()
mkfs.ext4 -q "${rootfs_img}"
mount_image
- download_rootfs "$(newest_rootfs_version)" "${mount_dir}"
+ load_rootfs "${mount_dir}"
unmount_image
}
@@ -205,26 +280,32 @@ EOF
exit 1
fi
+ if [[ "${PLATFORM}" != "$(uname -m)" ]]; then
+ QEMU_FLAGS=("${CROSS_FLAGS[@]}")
+ else
+ QEMU_FLAGS=("${HOST_FLAGS[@]}")
+ fi
+
${QEMU_BINARY} \
-nodefaults \
-display none \
-serial mon:stdio \
- -cpu kvm64 \
- -enable-kvm \
- -smp 4 \
- -m 2G \
+ "${QEMU_FLAGS[@]}" \
+ -m 4G \
-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
-kernel "${kernel_bzimage}" \
- -append "root=/dev/vda rw console=ttyS0,115200"
+ -append "root=/dev/vda rw console=${QEMU_CONSOLE}"
}
copy_logs()
{
local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
- local log_file="${mount_dir}/root/$1"
+ local log_file="${mount_dir}/root/${LOG_FILE}"
+ local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
mount_image
sudo cp ${log_file} "${OUTPUT_DIR}"
+ sudo cp ${exit_status_file} "${OUTPUT_DIR}"
sudo rm -f ${log_file}
unmount_image
}
@@ -236,42 +317,76 @@ is_rel_path()
[[ ${path:0:1} != "/" ]]
}
+do_update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ rm -f "$kconfig_file" 2> /dev/null
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ cat "$kconfig_src" >> "$kconfig_file"
+ done
+}
+
update_kconfig()
{
- local kconfig_file="$1"
- local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}"
- # Github does not return the "last-modified" header when retrieving the
- # raw contents of the file. Use the API call to get the last-modified
- # time of the kernel config and only update the config if it has been
- # updated after the previously cached config was created. This avoids
- # unnecessarily compiling the kernel and selftests.
- if [[ -f "${kconfig_file}" ]]; then
- local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \
- grep "last-modified" | awk -F ': ' '{print $2}')"
- local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")"
- local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")"
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
- if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then
- ${update_command}
- fi
+ if [[ -f "${kconfig_file}" ]]; then
+ local local_modified="$(stat -c %Y "${kconfig_file}")"
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ local src_modified="$(stat -c %Y "${kconfig_src}")"
+ # Only update the config if it has been updated after the
+ # previously cached config was created. This avoids
+ # unnecessarily compiling the kernel and selftests.
+ if [[ "${src_modified}" -gt "${local_modified}" ]]; then
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # Once we have found one outdated configuration
+ # there is no need to check other ones.
+ break
+ fi
+ done
else
- ${update_command}
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
fi
}
+catch()
+{
+ local exit_code=$1
+ local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
+ # This is just a cleanup and the directory may
+ # have already been unmounted. So, don't let this
+ # clobber the error code we intend to return.
+ unmount_image || true
+ if [[ -f "${exit_status_file}" ]]; then
+ exit_code="$(cat ${exit_status_file})"
+ fi
+ exit ${exit_code}
+}
+
main()
{
local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
local kernel_checkout=$(realpath "${script_dir}"/../../../../)
- local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
# By default the script searches for the kernel in the checkout directory but
# it also obeys environment variables O= and KBUILD_OUTPUT=
- local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
+ local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
local command="${DEFAULT_COMMAND}"
local update_image="no"
+ local exit_command="poweroff -f"
+ local debug_shell="no"
- while getopts 'hkid:j:' opt; do
+ while getopts ':hskl:id:j:' opt; do
case ${opt} in
+ l)
+ LOCAL_ROOTFS_IMAGE="$OPTARG"
+ ;;
i)
update_image="yes"
;;
@@ -281,6 +396,11 @@ main()
j)
NUM_COMPILE_JOBS="$OPTARG"
;;
+ s)
+ command=""
+ debug_shell="yes"
+ exit_command="bash"
+ ;;
h)
usage
exit 0
@@ -299,14 +419,22 @@ main()
done
shift $((OPTIND -1))
- if [[ $# -eq 0 ]]; then
+ trap 'catch "$?"' EXIT
+
+ if [[ "${PLATFORM}" != "$(uname -m)" ]] && [[ -z "${CROSS_COMPILE}" ]]; then
+ echo "Cross-platform testing needs to specify CROSS_COMPILE"
+ exit 1
+ fi
+
+ if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
else
command="$@"
fi
local kconfig_file="${OUTPUT_DIR}/latest.config"
- local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+ local make_command="make ARCH=${ARCH} CROSS_COMPILE=${CROSS_COMPILE} \
+ -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
# Figure out where the kernel is being built.
# O takes precedence over KBUILD_OUTPUT.
@@ -314,18 +442,16 @@ main()
if is_rel_path "${O}"; then
O="$(realpath "${PWD}/${O}")"
fi
- kernel_bzimage="${O}/${X86_BZIMAGE}"
+ kernel_bzimage="${O}/${BZIMAGE}"
make_command="${make_command} O=${O}"
elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
if is_rel_path "${KBUILD_OUTPUT}"; then
KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
fi
- kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}"
+ kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
fi
- populate_url_map
-
local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
@@ -333,7 +459,7 @@ main()
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${mount_dir}"
- update_kconfig "${kconfig_file}"
+ update_kconfig "${kernel_checkout}" "${kconfig_file}"
recompile_kernel "${kernel_checkout}" "${make_command}"
@@ -347,22 +473,12 @@ main()
fi
update_selftests "${kernel_checkout}" "${make_command}"
- update_init_script "${command}" "${log_file}"
+ update_init_script "${command}" "${exit_command}"
run_vm "${kernel_bzimage}"
- copy_logs "${log_file}"
- echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
-}
-
-catch()
-{
- local exit_code=$1
- # This is just a cleanup and the directory may
- # have already been unmounted. So, don't let this
- # clobber the error code we intend to return.
- unmount_image || true
- exit ${exit_code}
+ if [[ "${command}" != "" ]]; then
+ copy_logs
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
}
-trap 'catch "$?"' EXIT
-
main "$@"
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
deleted file mode 100755
index ffcd3953f94c..000000000000
--- a/tools/testing/selftests/bpf/with_addr.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# add private ipv4 and ipv6 addresses to loopback
-
-readonly V6_INNER='100::a/128'
-readonly V4_INNER='192.168.0.1/32'
-
-if getopts ":s" opt; then
- readonly SIT_DEV_NAME='sixtofourtest0'
- readonly V6_SIT='2::/64'
- readonly V4_SIT='172.17.0.1/32'
- shift
-fi
-
-fail() {
- echo "error: $*" 1>&2
- exit 1
-}
-
-setup() {
- ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
- ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
-
- if [[ -n "${V6_SIT}" ]]; then
- ip link add "${SIT_DEV_NAME}" type sit remote any local any \
- || fail 'failed to add sit'
- ip link set dev "${SIT_DEV_NAME}" up \
- || fail 'failed to bring sit device up'
- ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v6 SIT address'
- ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v4 SIT address'
- fi
-
- sleep 2 # avoid race causing bind to fail
-}
-
-cleanup() {
- if [[ -n "${V6_SIT}" ]]; then
- ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
- ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
- ip link del "${SIT_DEV_NAME}"
- fi
-
- ip -4 addr del "${V4_INNER}" dev lo
- ip -6 addr del "${V6_INNER}" dev lo
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
deleted file mode 100755
index e24949ed3a20..000000000000
--- a/tools/testing/selftests/bpf/with_tunnels.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# setup tunnels for flow dissection test
-
-readonly SUFFIX="test_$(mktemp -u XXXX)"
-CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
-
-setup() {
- ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
- ip link add "gre_${SUFFIX}" type gre ${CONFIG}
- ip link add "sit_${SUFFIX}" type sit ${CONFIG}
-
- echo "tunnels before test:"
- ip tunnel show
-
- ip link set "ipip_${SUFFIX}" up
- ip link set "gre_${SUFFIX}" up
- ip link set "sit_${SUFFIX}" up
-}
-
-
-cleanup() {
- ip tunnel del "ipip_${SUFFIX}"
- ip tunnel del "gre_${SUFFIX}"
- ip tunnel del "sit_${SUFFIX}"
-
- echo "tunnels after test:"
- ip tunnel show
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
new file mode 100644
index 000000000000..595c79141cf3
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/netdev.h>
+#include <linux/if_link.h>
+#include <signal.h>
+#include <argp.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+
+#include <network_helpers.h>
+
+#include "xdp_features.skel.h"
+#include "xdp_features.h"
+
+#define RED(str) "\033[0;31m" str "\033[0m"
+#define GREEN(str) "\033[0;32m" str "\033[0m"
+#define YELLOW(str) "\033[0;33m" str "\033[0m"
+
+static struct env {
+ bool verbosity;
+ char ifname[IF_NAMESIZE];
+ int ifindex;
+ bool is_tester;
+ struct {
+ enum netdev_xdp_act drv_feature;
+ enum xdp_action action;
+ } feature;
+ struct sockaddr_storage dut_ctrl_addr;
+ struct sockaddr_storage dut_addr;
+ struct sockaddr_storage tester_addr;
+} env;
+
+#define BUFSIZE 128
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbosity)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static volatile bool exiting;
+
+static void sig_handler(int sig)
+{
+ exiting = true;
+}
+
+const char *argp_program_version = "xdp-features 0.0";
+const char argp_program_doc[] =
+"XDP features detection application.\n"
+"\n"
+"XDP features application checks the XDP advertised features match detected ones.\n"
+"\n"
+"USAGE: ./xdp-features [-vt] [-f <xdp-feature>] [-D <dut-data-ip>] [-T <tester-data-ip>] [-C <dut-ctrl-ip>] <iface-name>\n"
+"\n"
+"dut-data-ip, tester-data-ip, dut-ctrl-ip: IPv6 or IPv4-mapped-IPv6 addresses;\n"
+"\n"
+"XDP features\n:"
+"- XDP_PASS\n"
+"- XDP_DROP\n"
+"- XDP_ABORTED\n"
+"- XDP_REDIRECT\n"
+"- XDP_NDO_XMIT\n"
+"- XDP_TX\n";
+
+static const struct argp_option opts[] = {
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
+ { "tester", 't', NULL, 0, "Tester mode" },
+ { "feature", 'f', "XDP-FEATURE", 0, "XDP feature to test" },
+ { "dut_data_ip", 'D', "DUT-DATA-IP", 0, "DUT IP data channel" },
+ { "dut_ctrl_ip", 'C', "DUT-CTRL-IP", 0, "DUT IP control channel" },
+ { "tester_data_ip", 'T', "TESTER-DATA-IP", 0, "Tester IP data channel" },
+ {},
+};
+
+static int get_xdp_feature(const char *arg)
+{
+ if (!strcmp(arg, "XDP_PASS")) {
+ env.feature.action = XDP_PASS;
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ } else if (!strcmp(arg, "XDP_DROP")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_DROP;
+ } else if (!strcmp(arg, "XDP_ABORTED")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_ABORTED;
+ } else if (!strcmp(arg, "XDP_TX")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_TX;
+ } else if (!strcmp(arg, "XDP_REDIRECT")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_REDIRECT;
+ env.feature.action = XDP_REDIRECT;
+ } else if (!strcmp(arg, "XDP_NDO_XMIT")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static char *get_xdp_feature_str(void)
+{
+ switch (env.feature.action) {
+ case XDP_PASS:
+ return YELLOW("XDP_PASS");
+ case XDP_DROP:
+ return YELLOW("XDP_DROP");
+ case XDP_ABORTED:
+ return YELLOW("XDP_ABORTED");
+ case XDP_TX:
+ return YELLOW("XDP_TX");
+ case XDP_REDIRECT:
+ return YELLOW("XDP_REDIRECT");
+ default:
+ break;
+ }
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+ return YELLOW("XDP_NDO_XMIT");
+
+ return "";
+}
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case 'v':
+ env.verbosity = true;
+ break;
+ case 't':
+ env.is_tester = true;
+ break;
+ case 'f':
+ if (get_xdp_feature(arg) < 0) {
+ fprintf(stderr, "Invalid xdp feature: %s\n", arg);
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'D':
+ if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT,
+ &env.dut_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'C':
+ if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT,
+ &env.dut_ctrl_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'T':
+ if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Tester device: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case ARGP_KEY_ARG:
+ errno = 0;
+ if (strlen(arg) >= IF_NAMESIZE) {
+ fprintf(stderr, "Invalid device name: %s\n", arg);
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ env.ifindex = if_nametoindex(arg);
+ if (!env.ifindex)
+ env.ifindex = strtoul(arg, NULL, 0);
+ if (!env.ifindex || !if_indextoname(env.ifindex, env.ifname)) {
+ fprintf(stderr,
+ "Bad interface index or name (%d): %s\n",
+ errno, strerror(errno));
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+};
+
+static void set_env_default(void)
+{
+ env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+ env.feature.action = -EINVAL;
+ env.ifindex = -ENODEV;
+ strcpy(env.ifname, "unknown");
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT,
+ &env.dut_ctrl_addr, NULL);
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
+ &env.dut_addr, NULL);
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", 0, &env.tester_addr, NULL);
+}
+
+static void *dut_echo_thread(void *arg)
+{
+ unsigned char buf[sizeof(struct tlv_hdr)];
+ int sockfd = *(int *)arg;
+
+ while (!exiting) {
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ size_t n;
+
+ n = recvfrom(sockfd, buf, sizeof(buf), MSG_WAITALL,
+ (struct sockaddr *)&addr, &addrlen);
+ if (n != ntohs(tlv->len))
+ continue;
+
+ if (ntohs(tlv->type) != CMD_ECHO)
+ continue;
+
+ sendto(sockfd, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&addr, addrlen);
+ }
+
+ pthread_exit((void *)0);
+ close(sockfd);
+
+ return NULL;
+}
+
+static int dut_run_echo_thread(pthread_t *t, int *sockfd)
+{
+ int err;
+
+ sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL,
+ DUT_ECHO_PORT, 0, 1);
+ if (!sockfd) {
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
+ return -errno;
+ }
+
+ /* start echo channel */
+ err = pthread_create(t, NULL, dut_echo_thread, sockfd);
+ if (err) {
+ fprintf(stderr,
+ "Failed creating data UDP thread on device %s: %s\n",
+ env.ifname, strerror(-err));
+ free_fds(sockfd, 1);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dut_attach_xdp_prog(struct xdp_features *skel, int flags)
+{
+ enum xdp_action action = env.feature.action;
+ struct bpf_program *prog;
+ unsigned int key = 0;
+ int err, fd = 0;
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) {
+ struct bpf_devmap_val entry = {
+ .ifindex = env.ifindex,
+ };
+
+ err = bpf_map__update_elem(skel->maps.dev_map,
+ &key, sizeof(key),
+ &entry, sizeof(entry), 0);
+ if (err < 0)
+ return err;
+
+ fd = bpf_program__fd(skel->progs.xdp_do_redirect_cpumap);
+ action = XDP_REDIRECT;
+ }
+
+ switch (action) {
+ case XDP_TX:
+ prog = skel->progs.xdp_do_tx;
+ break;
+ case XDP_DROP:
+ prog = skel->progs.xdp_do_drop;
+ break;
+ case XDP_ABORTED:
+ prog = skel->progs.xdp_do_aborted;
+ break;
+ case XDP_PASS:
+ prog = skel->progs.xdp_do_pass;
+ break;
+ case XDP_REDIRECT: {
+ struct bpf_cpumap_val entry = {
+ .qsize = 2048,
+ .bpf_prog.fd = fd,
+ };
+
+ err = bpf_map__update_elem(skel->maps.cpu_map,
+ &key, sizeof(key),
+ &entry, sizeof(entry), 0);
+ if (err < 0)
+ return err;
+
+ prog = skel->progs.xdp_do_redirect;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+ if (err)
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
+ return err;
+}
+
+static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val,
+ size_t val_size)
+{
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ size_t len;
+
+ len = recv(sockfd, buf, bufsize, 0);
+ if (len != ntohs(tlv->len) || len < sizeof(*tlv))
+ return -EINVAL;
+
+ if (val) {
+ len -= sizeof(*tlv);
+ if (len > val_size)
+ return -ENOMEM;
+
+ memcpy(val, tlv->data, len);
+ }
+
+ return 0;
+}
+
+static int dut_run(struct xdp_features *skel)
+{
+ int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+ int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd;
+ struct sockaddr_storage ctrl_addr;
+ pthread_t dut_thread = 0;
+ socklen_t addrlen;
+
+ sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
+ DUT_CTRL_PORT, 0, 1);
+ if (!sockfd) {
+ fprintf(stderr,
+ "Failed creating control socket on device %s\n", env.ifname);
+ return -errno;
+ }
+
+ ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen);
+ if (ctrl_sockfd < 0) {
+ fprintf(stderr,
+ "Failed accepting connections on device %s control socket\n",
+ env.ifname);
+ free_fds(sockfd, 1);
+ return -errno;
+ }
+
+ /* CTRL loop */
+ while (!exiting) {
+ unsigned char buf[BUFSIZE] = {};
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+
+ err = recv_msg(ctrl_sockfd, buf, BUFSIZE, NULL, 0);
+ if (err)
+ continue;
+
+ switch (ntohs(tlv->type)) {
+ case CMD_START: {
+ if (state == CMD_START)
+ continue;
+
+ state = CMD_START;
+ /* Load the XDP program on the DUT */
+ err = dut_attach_xdp_prog(skel, flags);
+ if (err)
+ goto out;
+
+ err = dut_run_echo_thread(&dut_thread, &echo_sockfd);
+ if (err < 0)
+ goto out;
+
+ tlv->type = htons(CMD_ACK);
+ tlv->len = htons(sizeof(*tlv));
+ err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ case CMD_STOP:
+ if (state != CMD_START)
+ break;
+
+ state = CMD_STOP;
+
+ exiting = true;
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+
+ tlv->type = htons(CMD_ACK);
+ tlv->len = htons(sizeof(*tlv));
+ err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+ goto end_thread;
+ case CMD_GET_XDP_CAP: {
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ unsigned long long val;
+ size_t n;
+
+ err = bpf_xdp_query(env.ifindex, XDP_FLAGS_DRV_MODE,
+ &opts);
+ if (err) {
+ fprintf(stderr,
+ "Failed querying XDP cap for device %s\n",
+ env.ifname);
+ goto end_thread;
+ }
+
+ tlv->type = htons(CMD_ACK);
+ n = sizeof(*tlv) + sizeof(opts.feature_flags);
+ tlv->len = htons(n);
+
+ val = htobe64(opts.feature_flags);
+ memcpy(tlv->data, &val, sizeof(val));
+
+ err = send(ctrl_sockfd, buf, n, 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ case CMD_GET_STATS: {
+ unsigned int key = 0, val;
+ size_t n;
+
+ err = bpf_map__lookup_elem(skel->maps.dut_stats,
+ &key, sizeof(key),
+ &val, sizeof(val), 0);
+ if (err) {
+ fprintf(stderr,
+ "bpf_map_lookup_elem failed (%d)\n", err);
+ goto end_thread;
+ }
+
+ tlv->type = htons(CMD_ACK);
+ n = sizeof(*tlv) + sizeof(val);
+ tlv->len = htons(n);
+
+ val = htonl(val);
+ memcpy(tlv->data, &val, sizeof(val));
+
+ err = send(ctrl_sockfd, buf, n, 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+end_thread:
+ pthread_join(dut_thread, NULL);
+out:
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+ close(ctrl_sockfd);
+ free_fds(sockfd, 1);
+
+ return err;
+}
+
+static bool tester_collect_detected_cap(struct xdp_features *skel,
+ unsigned int dut_stats)
+{
+ unsigned int err, key = 0, val;
+
+ if (!dut_stats)
+ return false;
+
+ err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key),
+ &val, sizeof(val), 0);
+ if (err) {
+ fprintf(stderr, "bpf_map_lookup_elem failed (%d)\n", err);
+ return false;
+ }
+
+ switch (env.feature.action) {
+ case XDP_PASS:
+ case XDP_TX:
+ case XDP_REDIRECT:
+ return val > 0;
+ case XDP_DROP:
+ case XDP_ABORTED:
+ return val == 0;
+ default:
+ break;
+ }
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+ return val > 0;
+
+ return false;
+}
+
+static int send_and_recv_msg(int sockfd, enum test_commands cmd, void *val,
+ size_t val_size)
+{
+ unsigned char buf[BUFSIZE] = {};
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ int err;
+
+ tlv->type = htons(cmd);
+ tlv->len = htons(sizeof(*tlv));
+
+ err = send(sockfd, buf, sizeof(*tlv), 0);
+ if (err < 0)
+ return err;
+
+ err = recv_msg(sockfd, buf, BUFSIZE, val, val_size);
+ if (err < 0)
+ return err;
+
+ return ntohs(tlv->type) == CMD_ACK ? 0 : -EINVAL;
+}
+
+static int send_echo_msg(void)
+{
+ unsigned char buf[sizeof(struct tlv_hdr)];
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ int sockfd, n;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (sockfd < 0) {
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
+ return -errno;
+ }
+
+ tlv->type = htons(CMD_ECHO);
+ tlv->len = htons(sizeof(*tlv));
+
+ n = sendto(sockfd, buf, sizeof(*tlv), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&env.dut_addr, sizeof(env.dut_addr));
+ close(sockfd);
+
+ return n == ntohs(tlv->len) ? 0 : -EINVAL;
+}
+
+static int tester_run(struct xdp_features *skel)
+{
+ int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+ unsigned long long advertised_feature;
+ struct bpf_program *prog;
+ unsigned int stats;
+ int i, err, sockfd;
+ bool detected_cap;
+
+ sockfd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ fprintf(stderr,
+ "Failed creating tester service control socket\n");
+ return -errno;
+ }
+
+ if (settimeo(sockfd, 1000) < 0)
+ return -EINVAL;
+
+ err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr,
+ sizeof(env.dut_ctrl_addr));
+ if (err) {
+ fprintf(stderr,
+ "Failed connecting to the Device Under Test control socket\n");
+ return -errno;
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_GET_XDP_CAP, &advertised_feature,
+ sizeof(advertised_feature));
+ if (err < 0) {
+ close(sockfd);
+ return err;
+ }
+
+ advertised_feature = be64toh(advertised_feature);
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT ||
+ env.feature.action == XDP_TX)
+ prog = skel->progs.xdp_tester_check_tx;
+ else
+ prog = skel->progs.xdp_tester_check_rx;
+
+ err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+ if (err) {
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
+ goto out;
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_START, NULL, 0);
+ if (err)
+ goto out;
+
+ for (i = 0; i < 10 && !exiting; i++) {
+ err = send_echo_msg();
+ if (err < 0)
+ goto out;
+
+ sleep(1);
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_GET_STATS, &stats, sizeof(stats));
+ if (err)
+ goto out;
+
+ /* stop the test */
+ err = send_and_recv_msg(sockfd, CMD_STOP, NULL, 0);
+ /* send a new echo message to wake echo thread of the dut */
+ send_echo_msg();
+
+ detected_cap = tester_collect_detected_cap(skel, ntohl(stats));
+
+ fprintf(stdout, "Feature %s: [%s][%s]\n", get_xdp_feature_str(),
+ detected_cap ? GREEN("DETECTED") : RED("NOT DETECTED"),
+ env.feature.drv_feature & advertised_feature ? GREEN("ADVERTISED")
+ : RED("NOT ADVERTISED"));
+out:
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+ close(sockfd);
+ return err < 0 ? err : 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct xdp_features *skel;
+ int err;
+
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ set_env_default();
+
+ /* Parse command line arguments */
+ err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+ if (err)
+ return err;
+
+ if (env.ifindex < 0) {
+ fprintf(stderr, "Invalid device name %s\n", env.ifname);
+ return -ENODEV;
+ }
+
+ /* Load and verify BPF application */
+ skel = xdp_features__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to open and load BPF skeleton\n");
+ return -EINVAL;
+ }
+
+ skel->rodata->tester_addr =
+ ((struct sockaddr_in6 *)&env.tester_addr)->sin6_addr;
+ skel->rodata->dut_addr =
+ ((struct sockaddr_in6 *)&env.dut_addr)->sin6_addr;
+
+ /* Load & verify BPF programs */
+ err = xdp_features__load(skel);
+ if (err) {
+ fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+ goto cleanup;
+ }
+
+ err = xdp_features__attach(skel);
+ if (err) {
+ fprintf(stderr, "Failed to attach BPF skeleton\n");
+ goto cleanup;
+ }
+
+ if (env.is_tester) {
+ /* Tester */
+ fprintf(stdout, "Starting tester service on device %s\n",
+ env.ifname);
+ err = tester_run(skel);
+ } else {
+ /* DUT */
+ fprintf(stdout, "Starting test on device %s\n", env.ifname);
+ err = dut_run(skel);
+ }
+
+cleanup:
+ xdp_features__destroy(skel);
+
+ return err < 0 ? -err : 0;
+}
diff --git a/tools/testing/selftests/bpf/xdp_features.h b/tools/testing/selftests/bpf/xdp_features.h
new file mode 100644
index 000000000000..2670c541713b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* test commands */
+enum test_commands {
+ CMD_STOP, /* CMD */
+ CMD_START, /* CMD */
+ CMD_ECHO, /* CMD */
+ CMD_ACK, /* CMD + data */
+ CMD_GET_XDP_CAP, /* CMD */
+ CMD_GET_STATS, /* CMD */
+};
+
+#define DUT_CTRL_PORT 12345
+#define DUT_ECHO_PORT 12346
+
+struct tlv_hdr {
+ __be16 type;
+ __be16 len;
+ __u8 data[];
+};
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
new file mode 100644
index 000000000000..3d8de0d4c96a
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -0,0 +1,894 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Reference program for verifying XDP metadata on real HW. Functional test
+ * only, doesn't test the performance.
+ *
+ * RX:
+ * - UDP 9091 packets are diverted into AF_XDP
+ * - Metadata verified:
+ * - rx_timestamp
+ * - rx_hash
+ *
+ * TX:
+ * - UDP 9091 packets trigger TX reply
+ * - TX HW timestamp is requested and reported back upon completion
+ * - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
+ */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_hw_metadata.skel.h"
+#include "xsk.h"
+
+#include <error.h>
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <netinet/udp.h>
+#include <linux/sockios.h>
+#include <linux/if_xdp.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <ctype.h>
+#include <poll.h>
+#include <time.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/ethtool.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include "xdp_metadata.h"
+
+#define UMEM_NUM 256
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
+
+struct xsk {
+ void *umem_area;
+ struct xsk_umem *umem;
+ struct xsk_ring_prod fill;
+ struct xsk_ring_cons comp;
+ struct xsk_ring_prod tx;
+ struct xsk_ring_cons rx;
+ struct xsk_socket *socket;
+};
+
+struct xdp_hw_metadata *bpf_obj;
+__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
+struct xsk *rx_xsk;
+const char *ifname;
+int ifindex;
+int rxq;
+bool skip_tx;
+__u64 last_hw_rx_timestamp;
+__u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp;
+int launch_time_queue;
+
+#define run_command(cmd, ...) \
+({ \
+ char command[1024]; \
+ memset(command, 0, sizeof(command)); \
+ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+ fprintf(stderr, "Running: %s\n", command); \
+ system(command); \
+})
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
+{
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ const struct xsk_socket_config socket_config = {
+ .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .bind_flags = bind_flags,
+ };
+ const struct xsk_umem_config umem_config = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .flags = XDP_UMEM_TX_METADATA_LEN,
+ .tx_metadata_len = sizeof(struct xsk_tx_metadata),
+ };
+ __u32 idx = 0;
+ u64 addr;
+ int ret;
+ int i;
+
+ xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (xsk->umem_area == MAP_FAILED)
+ return -ENOMEM;
+
+ ret = xsk_umem__create(&xsk->umem,
+ xsk->umem_area, UMEM_SIZE,
+ &xsk->fill,
+ &xsk->comp,
+ &umem_config);
+ if (ret)
+ return ret;
+
+ ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
+ xsk->umem,
+ &xsk->rx,
+ &xsk->tx,
+ &socket_config);
+ if (ret)
+ return ret;
+
+ /* First half of umem is for TX. This way address matches 1-to-1
+ * to the completion queue index.
+ */
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = i * UMEM_FRAME_SIZE;
+ printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+ }
+
+ /* Second half of umem is for RX. */
+
+ ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+ printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr;
+ }
+ xsk_ring_prod__submit(&xsk->fill, ret);
+
+ return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+ if (xsk->umem)
+ xsk_umem__delete(xsk->umem);
+ if (xsk->socket)
+ xsk_socket__delete(xsk->socket);
+ munmap(xsk->umem_area, UMEM_SIZE);
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+ __u32 idx;
+
+ if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
+ printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+ xsk_ring_prod__submit(&xsk->fill, 1);
+ }
+}
+
+static int kick_tx(struct xsk *xsk)
+{
+ return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+}
+
+static int kick_rx(struct xsk *xsk)
+{
+ return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(clockid_t clock_id)
+{
+ struct timespec t;
+ int res;
+
+ /* See man clock_gettime(2) for type of clock_id's */
+ res = clock_gettime(clock_id, &t);
+
+ if (res < 0)
+ error(res, errno, "Error with clock_gettime()");
+
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static void print_tstamp_delta(const char *name, const char *refname,
+ __u64 tstamp, __u64 reference)
+{
+ __s64 delta = (__s64)reference - (__s64)tstamp;
+
+ printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n",
+ name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname,
+ (double)delta / NANOSEC_PER_SEC,
+ (double)delta / 1000);
+}
+
+#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */
+#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */
+#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */
+static void print_vlan_tci(__u16 tag)
+{
+ __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag);
+ __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag);
+ bool dei = FIELD_GET(VLAN_DEI_MASK, tag);
+
+ printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id);
+}
+
+static void verify_xdp_metadata(void *data, clockid_t clock_id)
+{
+ struct xdp_meta *meta;
+
+ meta = data - sizeof(*meta);
+
+ if (meta->hint_valid & XDP_META_FIELD_RSS)
+ printf("rx_hash: 0x%X with RSS type:0x%X\n",
+ meta->rx_hash, meta->rx_hash_type);
+ else
+ printf("No rx_hash, err=%d\n", meta->rx_hash_err);
+
+ if (meta->hint_valid & XDP_META_FIELD_TS) {
+ __u64 ref_tstamp = gettime(clock_id);
+
+ /* store received timestamps to calculate a delta at tx */
+ last_hw_rx_timestamp = meta->rx_timestamp;
+ last_xdp_rx_timestamp = meta->xdp_timestamp;
+
+ print_tstamp_delta("HW RX-time", "User RX-time",
+ meta->rx_timestamp, ref_tstamp);
+ print_tstamp_delta("XDP RX-time", "User RX-time",
+ meta->xdp_timestamp, ref_tstamp);
+ } else {
+ printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err);
+ }
+
+ if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) {
+ printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto));
+ printf("rx_vlan_tci: ");
+ print_vlan_tci(meta->rx_vlan_tci);
+ } else {
+ printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n",
+ meta->rx_vlan_tag_err);
+ }
+}
+
+static void verify_skb_metadata(int fd)
+{
+ char cmsg_buf[1024];
+ char packet_buf[128];
+
+ struct scm_timestamping *ts;
+ struct iovec packet_iov;
+ struct cmsghdr *cmsg;
+ struct msghdr hdr;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_iov = &packet_iov;
+ hdr.msg_iovlen = 1;
+ packet_iov.iov_base = packet_buf;
+ packet_iov.iov_len = sizeof(packet_buf);
+
+ hdr.msg_control = cmsg_buf;
+ hdr.msg_controllen = sizeof(cmsg_buf);
+
+ if (recvmsg(fd, &hdr, 0) < 0)
+ error(1, errno, "recvmsg");
+
+ for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMPING:
+ ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+ if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
+ printf("found skb hwtstamp = %lu.%lu\n",
+ ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ printf("skb hwtstamp is not found!\n");
+}
+
+static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
+{
+ struct xsk_tx_metadata *meta;
+ __u64 addr;
+ void *data;
+ __u32 idx;
+
+ if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx))
+ return false;
+
+ addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+ meta = data - sizeof(struct xsk_tx_metadata);
+
+ printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+ if (meta->completion.tx_timestamp) {
+ __u64 ref_tstamp = gettime(clock_id);
+
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ print_tstamp_delta("HW Launch-time",
+ "HW TX-complete-time",
+ last_launch_time,
+ meta->completion.tx_timestamp);
+ }
+ print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
+ meta->completion.tx_timestamp, ref_tstamp);
+ print_tstamp_delta("XDP RX-time", "User TX-complete-time",
+ last_xdp_rx_timestamp, ref_tstamp);
+ print_tstamp_delta("HW RX-time", "HW TX-complete-time",
+ last_hw_rx_timestamp, meta->completion.tx_timestamp);
+ } else {
+ printf("No tx_timestamp\n");
+ }
+
+ xsk_ring_cons__release(&xsk->comp, 1);
+
+ return true;
+}
+
+#define swap(a, b, len) do { \
+ for (int i = 0; i < len; i++) { \
+ __u8 tmp = ((__u8 *)a)[i]; \
+ ((__u8 *)a)[i] = ((__u8 *)b)[i]; \
+ ((__u8 *)b)[i] = tmp; \
+ } \
+} while (0)
+
+static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
+{
+ struct xsk_tx_metadata *meta;
+ struct ipv6hdr *ip6h = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_desc *tx_desc;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ __sum16 want_csum;
+ void *data;
+ __u32 idx;
+ int ret;
+ int len;
+
+ ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+ if (ret != 1) {
+ printf("%p: failed to reserve tx slot\n", xsk);
+ return;
+ }
+
+ tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+ tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
+ data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+ meta = data - sizeof(struct xsk_tx_metadata);
+ memset(meta, 0, sizeof(*meta));
+ meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
+ eth = rx_packet;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ udph = (void *)(iph + 1);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ udph = (void *)(ip6h + 1);
+ } else {
+ printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto);
+ xsk_ring_prod__cancel(&xsk->tx, 1);
+ return;
+ }
+
+ len = ETH_HLEN;
+ if (ip6h)
+ len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
+ if (iph)
+ len += ntohs(iph->tot_len);
+
+ swap(eth->h_dest, eth->h_source, ETH_ALEN);
+ if (iph)
+ swap(&iph->saddr, &iph->daddr, 4);
+ else
+ swap(&ip6h->saddr, &ip6h->daddr, 16);
+ swap(&udph->source, &udph->dest, 2);
+
+ want_csum = udph->check;
+ if (ip6h)
+ udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+ else
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+
+ meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+ if (iph)
+ meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+ else
+ meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h);
+ meta->request.csum_offset = offsetof(struct udphdr, check);
+
+ printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n",
+ xsk, ntohs(udph->check), ntohs(want_csum),
+ meta->request.csum_start, meta->request.csum_offset);
+
+ /* Set the value of launch time */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+ meta->request.launch_time = last_hw_rx_timestamp +
+ launch_time_delta_to_hw_rx_timestamp;
+ last_launch_time = meta->request.launch_time;
+ print_tstamp_delta("HW RX-time", "HW Launch-time",
+ last_hw_rx_timestamp,
+ meta->request.launch_time);
+ }
+
+ memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
+ tx_desc->options |= XDP_TX_METADATA;
+ tx_desc->len = len;
+
+ xsk_ring_prod__submit(&xsk->tx, 1);
+}
+
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
+{
+ const struct xdp_desc *rx_desc;
+ struct pollfd fds[rxq + 1];
+ __u64 comp_addr;
+ __u64 deadline;
+ __u64 addr;
+ __u32 idx = 0;
+ int ret;
+ int i;
+
+ for (i = 0; i < rxq; i++) {
+ fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
+ fds[i].events = POLLIN;
+ fds[i].revents = 0;
+ }
+
+ fds[rxq].fd = server_fd;
+ fds[rxq].events = POLLIN;
+ fds[rxq].revents = 0;
+
+ while (true) {
+ errno = 0;
+
+ for (i = 0; i < rxq; i++) {
+ ret = kick_rx(&rx_xsk[i]);
+ if (ret)
+ printf("kick_rx ret=%d\n", ret);
+ }
+
+ ret = poll(fds, rxq + 1, 1000);
+ printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
+ ret, errno, bpf_obj->bss->pkts_skip,
+ bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
+ if (ret < 0)
+ break;
+ if (ret == 0)
+ continue;
+
+ if (fds[rxq].revents)
+ verify_skb_metadata(server_fd);
+
+ for (i = 0; i < rxq; i++) {
+ bool first_seg = true;
+ bool is_eop = true;
+
+ if (fds[i].revents == 0)
+ continue;
+
+ struct xsk *xsk = &rx_xsk[i];
+peek:
+ ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+ printf("xsk_ring_cons__peek: %d\n", ret);
+ if (ret != 1)
+ continue;
+
+ rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+ comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+ addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+ is_eop = !(rx_desc->options & XDP_PKT_CONTD);
+ printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
+ xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
+ if (first_seg) {
+ verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
+ first_seg = false;
+
+ if (!skip_tx) {
+ /* mirror first chunk back */
+ ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
+
+ ret = kick_tx(xsk);
+ if (ret)
+ printf("kick_tx ret=%d\n", ret);
+
+ /* wait 1 second + cover launch time */
+ deadline = gettime(clock_id) +
+ NANOSEC_PER_SEC +
+ launch_time_delta_to_hw_rx_timestamp;
+ while (true) {
+ if (complete_tx(xsk, clock_id))
+ break;
+ if (gettime(clock_id) >= deadline)
+ break;
+ usleep(10);
+ }
+ }
+ }
+
+ xsk_ring_cons__release(&xsk->rx, 1);
+ refill_rx(xsk, comp_addr);
+ if (!is_eop)
+ goto peek;
+ }
+ }
+
+ return 0;
+}
+
+static int rxq_num(const char *ifname)
+{
+ struct ethtool_channels ch = {
+ .cmd = ETHTOOL_GCHANNELS,
+ };
+
+ struct ifreq ifr = {
+ .ifr_data = (void *)&ch,
+ };
+ strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ ret = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (ret < 0)
+ error(1, errno, "ioctl(SIOCETHTOOL)");
+
+ close(fd);
+
+ return ch.rx_count + ch.combined_count;
+}
+
+static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
+{
+ struct ifreq ifr = {
+ .ifr_data = (void *)cfg,
+ };
+ strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ ret = ioctl(fd, op, &ifr);
+ if (ret < 0)
+ error(1, errno, "ioctl(%d)", op);
+
+ close(fd);
+}
+
+static struct hwtstamp_config saved_hwtstamp_cfg;
+static const char *saved_hwtstamp_ifname;
+
+static void hwtstamp_restore(void)
+{
+ hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
+}
+
+static void hwtstamp_enable(const char *ifname)
+{
+ struct hwtstamp_config cfg = {
+ .rx_filter = HWTSTAMP_FILTER_ALL,
+ .tx_type = HWTSTAMP_TX_ON,
+ };
+
+ hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
+ saved_hwtstamp_ifname = strdup(ifname);
+ atexit(hwtstamp_restore);
+
+ hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
+}
+
+static void cleanup(void)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int ret;
+ int i;
+
+ if (bpf_obj) {
+ opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
+ if (opts.old_prog_fd >= 0) {
+ printf("detaching bpf program....\n");
+ ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
+ if (ret)
+ printf("failed to detach XDP program: %d\n", ret);
+ }
+ }
+
+ for (i = 0; i < rxq; i++)
+ close_xsk(&rx_xsk[i]);
+
+ if (bpf_obj)
+ xdp_hw_metadata__destroy(bpf_obj);
+
+ free((void *)saved_hwtstamp_ifname);
+}
+
+static void handle_signal(int sig)
+{
+ /* interrupting poll() is all we need */
+}
+
+static void timestamping_enable(int fd, int val)
+{
+ int ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
+ if (ret < 0)
+ error(1, errno, "setsockopt(SO_TIMESTAMPING)");
+}
+
+static void print_usage(void)
+{
+ const char *usage =
+ "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
+ " -c Run in copy mode (zerocopy is default)\n"
+ " -h Display this help and exit\n\n"
+ " -m Enable multi-buffer XDP for larger MTU\n"
+ " -r Don't generate AF_XDP reply (rx metadata only)\n"
+ " -l Delta of launch time relative to HW RX-time in ns\n"
+ " default: 0 ns (launch time request is disabled)\n"
+ " -L Tx Queue to be enabled with launch time offload\n"
+ " default: 0 (Tx Queue 0)\n"
+ "Generate test packets on the other machine with:\n"
+ " echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
+
+ printf("%s", usage);
+}
+
+static void read_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) {
+ switch (opt) {
+ case 'c':
+ bind_flags &= ~XDP_USE_NEED_WAKEUP;
+ bind_flags &= ~XDP_ZEROCOPY;
+ bind_flags |= XDP_COPY;
+ break;
+ case 'h':
+ print_usage();
+ exit(0);
+ case 'm':
+ bind_flags |= XDP_USE_SG;
+ break;
+ case 'r':
+ skip_tx = true;
+ break;
+ case 'l':
+ launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+ break;
+ case 'L':
+ launch_time_queue = atoll(optarg);
+ break;
+ case '?':
+ if (isprint(optopt))
+ fprintf(stderr, "Unknown option: -%c\n", optopt);
+ fallthrough;
+ default:
+ print_usage();
+ error(-1, opterr, "Command line options error");
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "No device name provided\n");
+ print_usage();
+ exit(-1);
+ }
+
+ ifname = argv[optind];
+ ifindex = if_nametoindex(ifname);
+
+ if (!ifname)
+ error(-1, errno, "Invalid interface name");
+}
+
+void clean_existing_configurations(void)
+{
+ /* Check and delete root qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s root", ifname);
+
+ /* Check and delete ingress qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s ingress", ifname);
+
+ /* Check and delete ethtool filters if any exist */
+ if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) {
+ run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2",
+ ifname, ifname);
+ }
+}
+
+#define MAX_TC 16
+
+int main(int argc, char *argv[])
+{
+ clockid_t clock_id = CLOCK_TAI;
+ struct bpf_program *prog;
+ int server_fd = -1;
+ size_t map_len = 0;
+ size_t que_len = 0;
+ char *buf = NULL;
+ char *map = NULL;
+ char *que = NULL;
+ char *tmp = NULL;
+ int tc = 0;
+ int ret;
+ int i;
+
+ read_args(argc, argv);
+
+ rxq = rxq_num(ifname);
+ printf("rxq: %d\n", rxq);
+
+ if (launch_time_queue >= rxq || launch_time_queue < 0)
+ error(1, 0, "Invalid launch_time_queue.");
+
+ clean_existing_configurations();
+ sleep(1);
+
+ /* Enable tx and rx hardware timestamping */
+ hwtstamp_enable(ifname);
+
+ /* Prepare priority to traffic class map for tc-mqprio */
+ for (i = 0; i < MAX_TC; i++) {
+ if (i < rxq)
+ tc = i;
+
+ if (asprintf(&buf, "%d ", tc) == -1) {
+ printf("Failed to malloc buf for tc map.\n");
+ goto free_mem;
+ }
+
+ map_len += strlen(buf);
+ tmp = realloc(map, map_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc map.\n");
+ goto free_mem;
+ }
+ map = tmp;
+ strcat(map, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Prepare traffic class to hardware queue map for tc-mqprio */
+ for (i = 0; i <= tc; i++) {
+ if (asprintf(&buf, "1@%d ", i) == -1) {
+ printf("Failed to malloc buf for tc queues.\n");
+ goto free_mem;
+ }
+
+ que_len += strlen(buf);
+ tmp = realloc(que, que_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc queues.\n");
+ goto free_mem;
+ }
+ que = tmp;
+ strcat(que, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Add mqprio qdisc */
+ run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0",
+ ifname, tc + 1, map, que);
+
+ /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ /* Enable launch time hardware offload on launch_time_queue */
+ run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000",
+ ifname, launch_time_queue + 1);
+ sleep(1);
+
+ /* Route incoming packet with VLAN priority 1 into launch_time_queue */
+ if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d",
+ ifname, launch_time_queue)) {
+ run_command("sudo tc qdisc add dev %s ingress", ifname);
+ run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d",
+ ifname, launch_time_queue);
+ }
+
+ /* Enable VLAN tag stripping offload */
+ run_command("sudo ethtool -K %s rxvlan on", ifname);
+ }
+
+ rx_xsk = malloc(sizeof(struct xsk) * rxq);
+ if (!rx_xsk)
+ error(1, ENOMEM, "malloc");
+
+ for (i = 0; i < rxq; i++) {
+ printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
+ ret = open_xsk(ifindex, &rx_xsk[i], i);
+ if (ret)
+ error(1, -ret, "open_xsk");
+
+ printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
+ }
+
+ printf("open bpf program...\n");
+ bpf_obj = xdp_hw_metadata__open();
+ if (libbpf_get_error(bpf_obj))
+ error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
+
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+ bpf_program__set_ifindex(prog, ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ printf("load bpf program...\n");
+ ret = xdp_hw_metadata__load(bpf_obj);
+ if (ret)
+ error(1, -ret, "xdp_hw_metadata__load");
+
+ printf("prepare skb endpoint...\n");
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
+ if (server_fd < 0)
+ error(1, errno, "start_server");
+ timestamping_enable(server_fd,
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE);
+
+ printf("prepare xsk map...\n");
+ for (i = 0; i < rxq; i++) {
+ int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
+ __u32 queue_id = i;
+
+ printf("map[%d] = %d\n", queue_id, sock_fd);
+ ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+ if (ret)
+ error(1, -ret, "bpf_map_update_elem");
+ }
+
+ printf("attach bpf program...\n");
+ ret = bpf_xdp_attach(ifindex,
+ bpf_program__fd(bpf_obj->progs.rx),
+ XDP_FLAGS, NULL);
+ if (ret)
+ error(1, -ret, "bpf_xdp_attach");
+
+ signal(SIGINT, handle_signal);
+ ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
+ close(server_fd);
+ cleanup();
+ if (ret)
+ error(1, -ret, "verify_metadata");
+
+ clean_existing_configurations();
+
+free_mem:
+ free(buf);
+ free(map);
+ free(que);
+}
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
new file mode 100644
index 000000000000..87318ad1117a
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+#ifndef ETH_P_IPV6
+#define ETH_P_IPV6 0x86DD
+#endif
+
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD 0x88A8
+#endif
+
+#ifndef BIT
+#define BIT(nr) (1 << (nr))
+#endif
+
+/* Non-existent checksum status */
+#define XDP_CHECKSUM_MAGIC BIT(2)
+
+enum xdp_meta_field {
+ XDP_META_FIELD_TS = BIT(0),
+ XDP_META_FIELD_RSS = BIT(1),
+ XDP_META_FIELD_VLAN_TAG = BIT(2),
+};
+
+struct xdp_meta {
+ union {
+ __u64 rx_timestamp;
+ __s32 rx_timestamp_err;
+ };
+ __u64 xdp_timestamp;
+ __u32 rx_hash;
+ union {
+ __u32 rx_hash_type;
+ __s32 rx_hash_err;
+ };
+ union {
+ struct {
+ __be16 rx_vlan_proto;
+ __u16 rx_vlan_tci;
+ };
+ __s32 rx_vlan_tag_err;
+ };
+ enum xdp_meta_field hint_valid;
+};
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
new file mode 100644
index 000000000000..ce68c342b56f
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <stdnoreturn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+
+static unsigned int ifindex;
+static __u32 attached_prog_id;
+static bool attached_tc;
+
+static void noreturn cleanup(int sig)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int prog_fd;
+ int err;
+
+ if (attached_prog_id == 0)
+ exit(0);
+
+ if (attached_tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+
+ err = bpf_tc_hook_destroy(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to destroy the TC hook\n");
+ exit(1);
+ }
+ exit(0);
+ }
+
+ prog_fd = bpf_prog_get_fd_by_id(attached_prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ err = bpf_xdp_attach(ifindex, -1, 0, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ } else {
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts);
+ close(prog_fd);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err));
+ /* Not an error if already replaced by someone else. */
+ if (err != -EEXIST) {
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ }
+ }
+ exit(0);
+}
+
+static noreturn void usage(const char *progname)
+{
+ fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n",
+ progname);
+ exit(1);
+}
+
+static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit)
+{
+ unsigned long res;
+ char *endptr;
+
+ errno = 0;
+ res = strtoul(arg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit)
+ usage(progname);
+
+ return res;
+}
+
+static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id,
+ __u64 *tcpipopts, char **ports, bool *single, bool *tc)
+{
+ static struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "iface", required_argument, NULL, 'i' },
+ { "prog", required_argument, NULL, 'x' },
+ { "mss4", required_argument, NULL, 4 },
+ { "mss6", required_argument, NULL, 6 },
+ { "wscale", required_argument, NULL, 'w' },
+ { "ttl", required_argument, NULL, 't' },
+ { "ports", required_argument, NULL, 'p' },
+ { "single", no_argument, NULL, 's' },
+ { "tc", no_argument, NULL, 'c' },
+ { NULL, 0, NULL, 0 },
+ };
+ unsigned long mss4, wscale, ttl;
+ unsigned long long mss6;
+ unsigned int tcpipopts_mask = 0;
+
+ if (argc < 2)
+ usage(argv[0]);
+
+ *ifindex = 0;
+ *prog_id = 0;
+ *tcpipopts = 0;
+ *ports = NULL;
+ *single = false;
+ *tc = false;
+
+ while (true) {
+ int opt;
+
+ opt = getopt_long(argc, argv, "", long_options, NULL);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ *ifindex = if_nametoindex(optarg);
+ if (*ifindex == 0)
+ usage(argv[0]);
+ break;
+ case 'x':
+ *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX);
+ if (*prog_id == 0)
+ usage(argv[0]);
+ break;
+ case 4:
+ mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 0;
+ break;
+ case 6:
+ mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 1;
+ break;
+ case 'w':
+ wscale = parse_arg_ul(argv[0], optarg, 14);
+ tcpipopts_mask |= 1 << 2;
+ break;
+ case 't':
+ ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX);
+ tcpipopts_mask |= 1 << 3;
+ break;
+ case 'p':
+ *ports = optarg;
+ break;
+ case 's':
+ *single = true;
+ break;
+ case 'c':
+ *tc = true;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+ if (optind < argc)
+ usage(argv[0]);
+
+ if (tcpipopts_mask == 0xf) {
+ if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0)
+ usage(argv[0]);
+ *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
+ } else if (tcpipopts_mask != 0) {
+ usage(argv[0]);
+ }
+
+ if (*ifindex != 0 && *prog_id != 0)
+ usage(argv[0]);
+ if (*ifindex == 0 && *prog_id == 0)
+ usage(argv[0]);
+}
+
+static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char xdp_filename[PATH_MAX];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int prog_fd;
+ int err;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.bpf.o", argv0);
+ obj = bpf_object__open_file(xdp_filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp");
+ if (!prog) {
+ fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n");
+ return -ENOENT;
+ }
+
+ prog_fd = bpf_program__fd(prog);
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ attached_tc = tc;
+ attached_prog_id = info.id;
+ signal(SIGINT, cleanup);
+ signal(SIGTERM, cleanup);
+ if (tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts,
+ .handle = 1,
+ .priority = 1,
+ .prog_fd = prog_fd);
+
+ err = bpf_tc_hook_create(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_create: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ err = bpf_tc_attach(&hook, &opts);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_attach: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+
+ } else {
+ err = bpf_xdp_attach(ifindex, prog_fd,
+ XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ }
+ err = 0;
+out:
+ bpf_object__close(obj);
+ return err;
+fail:
+ signal(SIGINT, SIG_DFL);
+ signal(SIGTERM, SIG_DFL);
+ attached_prog_id = 0;
+ goto out;
+}
+
+static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd)
+{
+ struct bpf_prog_info prog_info;
+ __u32 map_ids[8];
+ __u32 info_len;
+ int prog_fd;
+ int err;
+ int i;
+
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ return prog_fd;
+ }
+
+ prog_info = (struct bpf_prog_info) {
+ .nr_map_ids = 8,
+ .map_ids = (__u64)(unsigned long)map_ids,
+ };
+ info_len = sizeof(prog_info);
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+ strerror(-err));
+ goto out;
+ }
+
+ if (prog_info.nr_map_ids < 2) {
+ fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n",
+ prog_info.nr_map_ids);
+ err = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ err = bpf_map_get_fd_by_id(map_ids[i]);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err));
+ goto err_close_map_fds;
+ }
+ map_fd = err;
+
+ info_len = sizeof(map_info);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n",
+ strerror(-err));
+ close(map_fd);
+ goto err_close_map_fds;
+ }
+ if (strcmp(map_info.name, "values") == 0) {
+ *values_map_fd = map_fd;
+ continue;
+ }
+ if (strcmp(map_info.name, "allowed_ports") == 0) {
+ *ports_map_fd = map_fd;
+ continue;
+ }
+ close(map_fd);
+ }
+
+ if (*values_map_fd != -1 && *ports_map_fd != -1) {
+ err = 0;
+ goto out;
+ }
+
+ err = -ENOENT;
+
+err_close_map_fds:
+ if (*values_map_fd != -1)
+ close(*values_map_fd);
+ if (*ports_map_fd != -1)
+ close(*ports_map_fd);
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+out:
+ close(prog_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int values_map_fd, ports_map_fd;
+ __u64 tcpipopts;
+ bool firstiter;
+ __u64 prevcnt;
+ __u32 prog_id;
+ char *ports;
+ bool single;
+ int err = 0;
+ bool tc;
+
+ parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports,
+ &single, &tc);
+
+ if (prog_id == 0) {
+ if (!tc) {
+ err = bpf_xdp_query_id(ifindex, 0, &prog_id);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ }
+ if (prog_id == 0) {
+ err = syncookie_attach(argv[0], ifindex, tc);
+ if (err < 0)
+ goto out;
+ prog_id = attached_prog_id;
+ }
+ }
+
+ err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd);
+ if (err < 0)
+ goto out;
+
+ if (ports) {
+ __u16 port_last = 0;
+ __u32 port_idx = 0;
+ char *p = ports;
+
+ fprintf(stderr, "Replacing allowed ports\n");
+
+ while (p && *p != '\0') {
+ char *token = strsep(&p, ",");
+ __u16 port;
+
+ port = parse_arg_ul(argv[0], token, UINT16_MAX);
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add port %u (index %u)\n",
+ port, port_idx);
+ goto out_close_maps;
+ }
+ fprintf(stderr, "Added port %u\n", port);
+ port_idx++;
+ }
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n",
+ port_idx);
+ goto out_close_maps;
+ }
+ }
+
+ if (tcpipopts) {
+ __u32 key = 0;
+
+ fprintf(stderr, "Replacing TCP/IP options\n");
+
+ err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ }
+
+ if ((ports || tcpipopts) && attached_prog_id == 0 && !single)
+ goto out_close_maps;
+
+ prevcnt = 0;
+ firstiter = true;
+ while (true) {
+ __u32 key = 1;
+ __u64 value;
+
+ err = bpf_map_lookup_elem(values_map_fd, &key, &value);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ if (firstiter) {
+ prevcnt = value;
+ firstiter = false;
+ }
+ if (single) {
+ printf("Total SYNACKs generated: %llu\n", value);
+ break;
+ }
+ printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value);
+ prevcnt = value;
+ sleep(1);
+ }
+
+out_close_maps:
+ close(values_map_fd);
+ close(ports_map_fd);
+out:
+ return err == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 842d9155d36c..9ed8c796645d 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -12,7 +12,6 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
#include <net/if.h>
#include <sys/types.h>
#include <sys/socket.h>
@@ -22,13 +21,14 @@
#include "bpf/libbpf.h"
#include "xdping.h"
+#include "testing_helpers.h"
static int ifindex;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static void cleanup(int sig)
{
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
if (sig)
exit(1);
}
@@ -88,7 +88,6 @@ int main(int argc, char **argv)
{
__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
struct addrinfo *a, hints = { .ai_family = AF_INET };
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
__u16 count = XDPING_DEFAULT_COUNT;
struct pinginfo pinginfo = { 0 };
const char *optstr = "c:I:NsS";
@@ -156,7 +155,7 @@ int main(int argc, char **argv)
}
if (!server) {
- /* Only supports IPv4; see hints initiailization above. */
+ /* Only supports IPv4; see hints initialization above. */
if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
fprintf(stderr, "Could not resolve %s\n", argv[optind]);
return 1;
@@ -166,21 +165,18 @@ int main(int argc, char **argv)
freeaddrinfo(a);
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
- if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
fprintf(stderr, "load of %s failed\n", filename);
return 1;
}
- main_prog = bpf_object__find_program_by_title(obj,
- server ? "xdpserver" :
- "xdpclient");
+ main_prog = bpf_object__find_program_by_name(obj,
+ server ? "xdping_server" : "xdping_client");
if (main_prog)
prog_fd = bpf_program__fd(main_prog);
if (!main_prog || prog_fd < 0) {
@@ -188,7 +184,7 @@ int main(int argc, char **argv)
return 1;
}
- map = bpf_map__next(NULL, obj);
+ map = bpf_object__next_map(obj, NULL);
if (map)
map_fd = bpf_map__fd(map);
if (!map || map_fd < 0) {
@@ -203,7 +199,7 @@ int main(int argc, char **argv)
printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
goto done;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
deleted file mode 100644
index f4a96d5ff524..000000000000
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ /dev/null
@@ -1,1065 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2020 Intel Corporation. */
-
-/*
- * Some functions in this program are taken from
- * Linux kernel samples/bpf/xdpsock* and modified
- * for use.
- *
- * See test_xsk.sh for detailed information on test topology
- * and prerequisite network setup.
- *
- * This test program contains two threads, each thread is single socket with
- * a unique UMEM. It validates in-order packet delivery and packet content
- * by sending packets to each other.
- *
- * Tests Information:
- * ------------------
- * These selftests test AF_XDP SKB and Native/DRV modes using veth
- * Virtual Ethernet interfaces.
- *
- * The following tests are run:
- *
- * 1. AF_XDP SKB mode
- * Generic mode XDP is driver independent, used when the driver does
- * not have support for XDP. Works on any netdevice using sockets and
- * generic XDP path. XDP hook from netif_receive_skb().
- * a. nopoll - soft-irq processing
- * b. poll - using poll() syscall
- * c. Socket Teardown
- * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
- * both sockets, then repeat multiple times. Only nopoll mode is used
- * d. Bi-directional sockets
- * Configure sockets as bi-directional tx/rx sockets, sets up fill and
- * completion rings on each socket, tx/rx in both directions. Only nopoll
- * mode is used
- *
- * 2. AF_XDP DRV/Native mode
- * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
- * packets before SKB allocation. Provides better performance than SKB. Driver
- * hook available just after DMA of buffer descriptor.
- * a. nopoll
- * b. poll
- * c. Socket Teardown
- * d. Bi-directional sockets
- * - Only copy mode is supported because veth does not currently support
- * zero-copy mode
- *
- * Total tests: 8
- *
- * Flow:
- * -----
- * - Single process spawns two threads: Tx and Rx
- * - Each of these two threads attach to a veth interface within their assigned
- * namespaces
- * - Each thread Creates one AF_XDP socket connected to a unique umem for each
- * veth interface
- * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
- * - Rx thread verifies if all 10k packets were received and delivered in-order,
- * and have the right content
- *
- * Enable/disable debug mode:
- * --------------------------
- * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
- * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
- */
-
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <errno.h>
-#include <getopt.h>
-#include <asm/barrier.h>
-typedef __u16 __sum16;
-#include <linux/if_link.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <arpa/inet.h>
-#include <net/if.h>
-#include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stddef.h>
-#include <sys/mman.h>
-#include <sys/resource.h>
-#include <sys/types.h>
-#include <sys/queue.h>
-#include <time.h>
-#include <unistd.h>
-#include <stdatomic.h>
-#include <bpf/xsk.h>
-#include "xdpxceiver.h"
-#include "../kselftest.h"
-
-static void __exit_with_error(int error, const char *file, const char *func, int line)
-{
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-
-#define print_ksft_result(void)\
- (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
- "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
- opt_bidi ? "Bi-directional Sockets" : ""))
-
-static void pthread_init_mutex(void)
-{
- pthread_mutex_init(&sync_mutex, NULL);
- pthread_mutex_init(&sync_mutex_tx, NULL);
- pthread_cond_init(&signal_rx_condition, NULL);
- pthread_cond_init(&signal_tx_condition, NULL);
-}
-
-static void pthread_destroy_mutex(void)
-{
- pthread_mutex_destroy(&sync_mutex);
- pthread_mutex_destroy(&sync_mutex_tx);
- pthread_cond_destroy(&signal_rx_condition);
- pthread_cond_destroy(&signal_tx_condition);
-}
-
-static void *memset32_htonl(void *dest, u32 val, u32 size)
-{
- u32 *ptr = (u32 *)dest;
- int i;
-
- val = htonl(val);
-
- for (i = 0; i < (size & (~0x3)); i += 4)
- ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
- /* add up 16-bit and 16-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __u16 csum_fold(__u32 csum)
-{
- u32 sum = (__force u32)csum;
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return (__force __u16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline u32 from64to32(u64 x)
-{
- /* add up 32-bit and 32-bit for 32+c bit */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up carry.. */
- x = (x & 0xffffffff) + (x >> 32);
- return (u32)x;
-}
-
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
- unsigned long long s = (__force u32)sum;
-
- s += (__force u32)saddr;
- s += (__force u32)daddr;
-#ifdef __BIG_ENDIAN__
- s += proto + len;
-#else
- s += (proto + len) << 8;
-#endif
- return (__force __u32)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __u16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
-{
- u32 csum = 0;
- u32 cnt = 0;
-
- /* udp hdr and data */
- for (; cnt < len; cnt += 2)
- csum += udp_pkt[cnt >> 1];
-
- return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
-}
-
-static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
-{
- memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
- memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_IP);
-}
-
-static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
-{
- ip_hdr->version = IP_PKT_VER;
- ip_hdr->ihl = 0x5;
- ip_hdr->tos = IP_PKT_TOS;
- ip_hdr->tot_len = htons(IP_PKT_SIZE);
- ip_hdr->id = 0;
- ip_hdr->frag_off = 0;
- ip_hdr->ttl = IPDEFTTL;
- ip_hdr->protocol = IPPROTO_UDP;
- ip_hdr->saddr = ifobject->src_ip;
- ip_hdr->daddr = ifobject->dst_ip;
- ip_hdr->check = 0;
-}
-
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
- struct udphdr *udp_hdr)
-{
- udp_hdr->source = htons(ifobject->src_port);
- udp_hdr->dest = htons(ifobject->dst_port);
- udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
-}
-
-static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
-{
- udp_hdr->check = 0;
- udp_hdr->check =
- udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
-}
-
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
-{
- int ret;
-
- data->umem = calloc(1, sizeof(struct xsk_umem_info));
- if (!data->umem)
- exit_with_error(errno);
-
- ret = xsk_umem__create(&data->umem->umem, buffer, size,
- &data->umem->fq, &data->umem->cq, NULL);
- if (ret)
- exit_with_error(ret);
-
- data->umem->buffer = buffer;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
- int ret, i;
- u32 idx;
-
- ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
- *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
- xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
-}
-
-static int xsk_configure_socket(struct ifobject *ifobject)
-{
- struct xsk_socket_config cfg;
- struct xsk_ring_cons *rxr;
- struct xsk_ring_prod *txr;
- int ret;
-
- ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
- if (!ifobject->xsk)
- exit_with_error(errno);
-
- ifobject->xsk->umem = ifobject->umem;
- cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.libbpf_flags = 0;
- cfg.xdp_flags = opt_xdp_flags;
- cfg.bind_flags = opt_xdp_bind_flags;
-
- if (!opt_bidi) {
- rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
- txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
- } else {
- rxr = &ifobject->xsk->rx;
- txr = &ifobject->xsk->tx;
- }
-
- ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
- opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
-
- if (ret)
- return 1;
-
- return 0;
-}
-
-static struct option long_options[] = {
- {"interface", required_argument, 0, 'i'},
- {"queue", optional_argument, 0, 'q'},
- {"poll", no_argument, 0, 'p'},
- {"xdp-skb", no_argument, 0, 'S'},
- {"xdp-native", no_argument, 0, 'N'},
- {"copy", no_argument, 0, 'c'},
- {"tear-down", no_argument, 0, 'T'},
- {"bidi", optional_argument, 0, 'B'},
- {"debug", optional_argument, 0, 'D'},
- {"tx-pkt-count", optional_argument, 0, 'C'},
- {0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
- const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -i, --interface Use interface\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -p, --poll Use poll syscall\n"
- " -S, --xdp-skb=n Use XDP SKB mode\n"
- " -N, --xdp-native=n Enforce XDP DRV (native) mode\n"
- " -c, --copy Force copy mode\n"
- " -T, --tear-down Tear down sockets by repeatedly recreating them\n"
- " -B, --bidi Bi-directional sockets test\n"
- " -D, --debug Debug mode - dump packets L2 - L5\n"
- " -C, --tx-pkt-count=n Number of packets to send\n";
- ksft_print_msg(str, prog);
-}
-
-static bool switch_namespace(int idx)
-{
- char fqns[26] = "/var/run/netns/";
- int nsfd;
-
- strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
- nsfd = open(fqns, O_RDONLY);
-
- if (nsfd == -1)
- exit_with_error(errno);
-
- if (setns(nsfd, 0) == -1)
- exit_with_error(errno);
-
- return true;
-}
-
-static void *nsswitchthread(void *args)
-{
- struct targs *targs = args;
-
- targs->retptr = false;
-
- if (switch_namespace(targs->idx)) {
- ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname);
- if (!ifdict[targs->idx]->ifindex) {
- ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
- __func__, ifdict[targs->idx]->ifname);
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname);
- targs->retptr = true;
- }
- }
- pthread_exit(NULL);
-}
-
-static int validate_interfaces(void)
-{
- bool ret = true;
-
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (!strcmp(ifdict[i]->ifname, "")) {
- ret = false;
- ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
- }
- if (strcmp(ifdict[i]->nsname, "")) {
- struct targs *targs;
-
- targs = malloc(sizeof(*targs));
- if (!targs)
- exit_with_error(errno);
-
- targs->idx = i;
- if (pthread_create(&ns_thread, NULL, nsswitchthread, targs))
- exit_with_error(errno);
-
- pthread_join(ns_thread, NULL);
-
- if (targs->retptr)
- ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
-
- free(targs);
- } else {
- ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
- if (!ifdict[i]->ifindex) {
- ksft_test_result_fail
- ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
- ret = false;
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
- }
- }
- }
- return ret;
-}
-
-static void parse_command_line(int argc, char **argv)
-{
- int option_index, interface_index = 0, c;
-
- opterr = 0;
-
- for (;;) {
- c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
-
- if (c == -1)
- break;
-
- switch (c) {
- case 'i':
- if (interface_index == MAX_INTERFACES)
- break;
- char *sptr, *token;
-
- sptr = strndupa(optarg, strlen(optarg));
- memcpy(ifdict[interface_index]->ifname,
- strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
- token = strsep(&sptr, ",");
- if (token)
- memcpy(ifdict[interface_index]->nsname, token,
- MAX_INTERFACES_NAMESPACE_CHARS);
- interface_index++;
- break;
- case 'q':
- opt_queue = atoi(optarg);
- break;
- case 'p':
- opt_poll = 1;
- break;
- case 'S':
- opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
- break;
- case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
- break;
- case 'c':
- opt_xdp_bind_flags |= XDP_COPY;
- break;
- case 'T':
- opt_teardown = 1;
- break;
- case 'B':
- opt_bidi = 1;
- break;
- case 'D':
- debug_pkt_dump = 1;
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
- break;
- default:
- usage(basename(argv[0]));
- ksft_exit_xfail();
- }
- }
-
- if (!validate_interfaces()) {
- usage(basename(argv[0]));
- ksft_exit_xfail();
- }
-}
-
-static void kick_tx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
- return;
- exit_with_error(errno);
-}
-
-static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
-{
- unsigned int rcvd;
- u32 idx;
-
- if (!xsk->outstanding_tx)
- return;
-
- if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
-
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
- if (rcvd) {
- xsk_ring_cons__release(&xsk->umem->cq, rcvd);
- xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
- }
-}
-
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
-{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
- int ret;
-
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
- }
- return;
- }
-
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
- }
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr, orig;
-
- addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- orig = xsk_umem__extract_addr(addr);
-
- addr = xsk_umem__add_offset_to_addr(addr);
- pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
- if (!pkt_node_rx)
- exit_with_error(errno);
-
- pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
- if (!pkt_node_rx->pkt_frame)
- exit_with_error(errno);
-
- memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
- PKT_SIZE);
-
- TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
-
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
- }
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
-}
-
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
-{
- u32 idx;
- unsigned int i;
-
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
- complete_tx_only(xsk, batch_size);
-
- for (i = 0; i < batch_size; i++) {
- struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-
- tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = PKT_SIZE;
- }
-
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- xsk->outstanding_tx += batch_size;
- *frameptr += batch_size;
- *frameptr %= num_frames;
- complete_tx_only(xsk, batch_size);
-}
-
-static inline int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return BATCH_SIZE;
-
- if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
- return BATCH_SIZE;
-
- return opt_pkt_count - pkt_cnt;
-}
-
-static void complete_tx_only_all(struct ifobject *ifobject)
-{
- bool pending;
-
- do {
- pending = false;
- if (ifobject->xsk->outstanding_tx) {
- complete_tx_only(ifobject->xsk, BATCH_SIZE);
- pending = !!ifobject->xsk->outstanding_tx;
- }
- } while (pending);
-}
-
-static void tx_only_all(struct ifobject *ifobject)
-{
- struct pollfd fds[MAX_SOCKS] = { };
- u32 frame_nb = 0;
- int pkt_cnt = 0;
- int ret;
-
- fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds[0].events = POLLOUT;
-
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
-
- if (opt_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
-
- if (!(fds[0].revents & POLLOUT))
- continue;
- }
-
- tx_only(ifobject->xsk, &frame_nb, batch_size);
- pkt_cnt += batch_size;
- }
-
- if (opt_pkt_count)
- complete_tx_only_all(ifobject);
-}
-
-static void worker_pkt_dump(void)
-{
- struct in_addr ipaddr;
-
- fprintf(stdout, "---------------------------------------\n");
- for (int iter = 0; iter < num_frames - 1; iter++) {
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
-
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->source));
-
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->dest));
- /*extract L5 frame */
- int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
-
- if (payload == EOT) {
- ksft_print_msg("End-of-transmission frame received\n");
- fprintf(stdout, "---------------------------------------\n");
- break;
- }
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
- }
-}
-
-static void worker_pkt_validate(void)
-{
- u32 payloadseqnum = -2;
- struct iphdr *iphdr;
-
- while (1) {
- pkt_node_rx_q = TAILQ_LAST(&head, head_s);
- if (!pkt_node_rx_q)
- break;
-
- iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
- /*do not increment pktcounter if !(tos=0x9 and ipv4) */
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
- if (debug_pkt_dump && payloadseqnum != EOT) {
- pkt_obj = malloc(sizeof(*pkt_obj));
- pkt_obj->payload = malloc(PKT_SIZE);
- memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
- pkt_buf[payloadseqnum] = pkt_obj;
- }
-
- if (payloadseqnum == EOT) {
- ksft_print_msg("End-of-transmission frame received: PASS\n");
- sigvar = 1;
- break;
- }
-
- if (prev_pkt + 1 != payloadseqnum) {
- ksft_test_result_fail
- ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
- __func__, prev_pkt, payloadseqnum);
- ksft_exit_xfail();
- }
-
- prev_pkt = payloadseqnum;
- pkt_counter++;
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- }
-
- TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
- free(pkt_node_rx_q->pkt_frame);
- free(pkt_node_rx_q);
- pkt_node_rx_q = NULL;
- }
-}
-
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr,
- atomic_int *spinningptr)
-{
- int ctr = 0;
- int ret;
-
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
-
- /* Retry Create Socket if it fails as xsk_socket__create()
- * is asynchronous
- *
- * Essential to lock Mutex here to prevent Tx thread from
- * entering before Rx and causing a deadlock
- */
- pthread_mutex_lock(mutexptr);
- while (ret && ctr < SOCK_RECONF_CTR) {
- atomic_store(spinningptr, 1);
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
- usleep(USLEEP_MAX);
- ctr++;
- }
- atomic_store(spinningptr, 0);
- pthread_mutex_unlock(mutexptr);
-
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(ret);
-}
-
-static void *worker_testapp_validate(void *arg)
-{
- struct udphdr *udp_hdr =
- (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
- struct ifobject *ifobject = (struct ifobject *)arg;
- struct generic_data data;
- void *bufs = NULL;
-
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (!bidi_pass) {
- bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- if (strcmp(ifobject->nsname, ""))
- switch_namespace(ifobject->ifdict_index);
- }
-
- if (ifobject->fv.vector == tx) {
- int spinningrxctr = 0;
-
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
-
- while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
- spinningrxctr++;
- usleep(USLEEP_MAX);
- }
-
- ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
-
- ksft_print_msg("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
- } else if (ifobject->fv.vector == rx) {
- struct pollfd fds[MAX_SOCKS] = { };
- int ret;
-
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
-
- ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname);
- xsk_populate_fill_ring(ifobject->umem);
-
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
-
- fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds[0].events = POLLIN;
-
- pthread_mutex_lock(&sync_mutex);
- pthread_cond_signal(&signal_rx_condition);
- pthread_mutex_unlock(&sync_mutex);
-
- while (1) {
- if (opt_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
- }
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
-
- if (sigvar)
- break;
- }
-
- ksft_print_msg("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
-
- if (opt_teardown)
- ksft_print_msg("Destroying socket\n");
- }
-
- if (!opt_bidi || bidi_pass) {
- xsk_socket__delete(ifobject->xsk->xsk);
- (void)xsk_umem__delete(ifobject->umem->umem);
- }
- pthread_exit(NULL);
-}
-
-static void testapp_validate(void)
-{
- struct timespec max_wait = { 0, 0 };
-
- pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (opt_bidi && bidi_pass) {
- pthread_init_mutex();
- if (!switching_notify) {
- ksft_print_msg("Switching Tx/Rx vectors\n");
- switching_notify++;
- }
- }
-
- pthread_mutex_lock(&sync_mutex);
-
- /*Spawn RX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[0]->fv.vector = rx;
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- }
-
- if (clock_gettime(CLOCK_REALTIME, &max_wait))
- exit_with_error(errno);
- max_wait.tv_sec += TMOUT_SEC;
-
- if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
- exit_with_error(errno);
-
- pthread_mutex_unlock(&sync_mutex);
-
- /*Spawn TX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[1]->fv.vector = tx;
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- }
-
- pthread_join(t1, NULL);
- pthread_join(t0, NULL);
-
- if (debug_pkt_dump) {
- worker_pkt_dump();
- for (int iter = 0; iter < num_frames - 1; iter++) {
- free(pkt_buf[iter]->payload);
- free(pkt_buf[iter]);
- }
- free(pkt_buf);
- }
-
- if (!opt_teardown && !opt_bidi)
- print_ksft_result();
-}
-
-static void testapp_sockets(void)
-{
- for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
- ksft_print_msg("Creating socket\n");
- testapp_validate();
- opt_bidi ? bidi_pass++ : bidi_pass;
- }
-
- print_ksft_result();
-}
-
-static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
-{
- /*Init interface0 */
- ifdict[0]->fv.vector = tx;
- memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN);
- memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN);
- ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr;
- ifdict[0]->dst_port = ifaceconfig->dst_port;
- ifdict[0]->src_port = ifaceconfig->src_port;
-
- /*Init interface1 */
- ifdict[1]->fv.vector = rx;
- memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN);
- memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN);
- ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr;
- ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[1]->dst_port = ifaceconfig->src_port;
- ifdict[1]->src_port = ifaceconfig->dst_port;
-}
-
-int main(int argc, char **argv)
-{
- struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
-
- if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
- exit_with_error(errno);
-
- const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
- const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
- const char *IP1 = "192.168.100.162";
- const char *IP2 = "192.168.100.161";
- u16 UDP_DST_PORT = 2020;
- u16 UDP_SRC_PORT = 2121;
-
- ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
- memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
- memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
- inet_aton(IP1, &ifaceconfig->dst_ip);
- inet_aton(IP2, &ifaceconfig->src_ip);
- ifaceconfig->dst_port = UDP_DST_PORT;
- ifaceconfig->src_port = UDP_SRC_PORT;
-
- for (int i = 0; i < MAX_INTERFACES; i++) {
- ifdict[i] = malloc(sizeof(struct ifobject));
- if (!ifdict[i])
- exit_with_error(errno);
-
- ifdict[i]->ifdict_index = i;
- }
-
- setlocale(LC_ALL, "");
-
- parse_command_line(argc, argv);
-
- num_frames = ++opt_pkt_count;
-
- init_iface_config(ifaceconfig);
-
- pthread_init_mutex();
-
- ksft_set_plan(1);
-
- if (!opt_teardown && !opt_bidi) {
- testapp_validate();
- } else if (opt_teardown && opt_bidi) {
- ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
- ksft_exit_xfail();
- } else {
- testapp_sockets();
- }
-
- for (int i = 0; i < MAX_INTERFACES; i++)
- free(ifdict[i]);
-
- pthread_destroy_mutex();
-
- ksft_exit_pass();
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
deleted file mode 100644
index 0e9f9b7e61c2..000000000000
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2020 Intel Corporation.
- */
-
-#ifndef XDPXCEIVER_H_
-#define XDPXCEIVER_H_
-
-#ifndef SOL_XDP
-#define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
-#define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
-#define PF_XDP AF_XDP
-#endif
-
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 7
-#define MAX_INTERFACES_NAMESPACE_CHARS 10
-#define MAX_SOCKS 1
-#define MAX_TEARDOWN_ITER 10
-#define MAX_BIDI_ITER 2
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
- sizeof(struct udphdr))
-#define MIN_PKT_SIZE 64
-#define ETH_FCS_SIZE 4
-#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
-#define IP_PKT_VER 0x4
-#define IP_PKT_TOS 0x9
-#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define TMOUT_SEC (3)
-#define EOT (-1)
-#define USLEEP_MAX 200000
-#define THREAD_STACK 60000000
-#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define NEED_WAKEUP true
-
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TESTS {
- ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
- ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
-};
-
-u8 uut;
-u8 debug_pkt_dump;
-u32 num_frames;
-u8 switching_notify;
-u8 bidi_pass;
-
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int opt_queue;
-static int opt_pkt_count;
-static int opt_poll;
-static int opt_teardown;
-static int opt_bidi;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static u32 prev_pkt = -1;
-static int sigvar;
-
-struct xsk_umem_info {
- struct xsk_ring_prod fq;
- struct xsk_ring_cons cq;
- struct xsk_umem *umem;
- void *buffer;
-};
-
-struct xsk_socket_info {
- struct xsk_ring_cons rx;
- struct xsk_ring_prod tx;
- struct xsk_umem_info *umem;
- struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
- u32 outstanding_tx;
-};
-
-struct flow_vector {
- enum fvector {
- tx,
- rx,
- } vector;
-};
-
-struct generic_data {
- u32 seqnum;
-};
-
-struct ifaceconfigobj {
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- struct in_addr dst_ip;
- struct in_addr src_ip;
- u16 src_port;
- u16 dst_port;
-} *ifaceconfig;
-
-struct ifobject {
- int ifindex;
- int ifdict_index;
- char ifname[MAX_INTERFACE_NAME_CHARS];
- char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
- struct flow_vector fv;
- struct xsk_socket_info *xsk;
- struct xsk_umem_info *umem;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- u32 dst_ip;
- u32 src_ip;
- u16 src_port;
- u16 dst_port;
-};
-
-static struct ifobject *ifdict[MAX_INTERFACES];
-
-/*threads*/
-atomic_int spinning_tx;
-atomic_int spinning_rx;
-pthread_mutex_t sync_mutex;
-pthread_mutex_t sync_mutex_tx;
-pthread_cond_t signal_rx_condition;
-pthread_cond_t signal_tx_condition;
-pthread_t t0, t1, ns_thread;
-pthread_attr_t attr;
-
-struct targs {
- bool retptr;
- int idx;
-};
-
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
- char *pkt_frame;
-
- TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
- char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
-#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
new file mode 100644
index 000000000000..25d568abf0f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -0,0 +1,781 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "xsk.h"
+#include "bpf_util.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+#define XSKMAP_SIZE 1
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+ struct list_head ctx_list;
+ bool rx_ring_setup_done;
+ bool tx_ring_setup_done;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ __u32 queue_id;
+ struct xsk_umem *umem;
+ int refcount;
+ int ifindex;
+ struct list_head list;
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ struct xsk_ctx *ctx;
+ struct xsk_socket_config config;
+ int fd;
+};
+
+struct nl_mtu_req {
+ struct nlmsghdr nh;
+ struct ifinfomsg msg;
+ char buf[512];
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+ cfg->tx_metadata_len = 0;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
+ cfg->tx_metadata_len = usr_cfg->tx_metadata_len;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->bind_flags = 0;
+ return 0;
+ }
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(*off);
+ err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(*off))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ memset(&mr, 0, sizeof(mr));
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
+ mr.tx_metadata_len = umem->config.tx_metadata_len;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
+ goto out_socket;
+
+ umem->fill_save = fill;
+ umem->comp_save = comp;
+ *umem_ptr = umem;
+ return 0;
+
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+bool xsk_is_in_mode(u32 ifindex, int mode)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int ret;
+
+ ret = bpf_xdp_query(ifindex, mode, &opts);
+ if (ret) {
+ printf("XDP mode query returned error %s\n", strerror(errno));
+ return false;
+ }
+
+ if (mode == XDP_FLAGS_DRV_MODE)
+ return opts.attach_mode == XDP_ATTACHED_DRV;
+ else if (mode == XDP_FLAGS_SKB_MODE)
+ return opts.attach_mode == XDP_ATTACHED_SKB;
+
+ return false;
+}
+
+/* Lifted from netlink.c in tools/lib/bpf */
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+/* Lifted from netlink.c in tools/lib/bpf */
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
+/* Original version lifted from netlink.c in tools/lib/bpf */
+static int netlink_recv(int sock)
+{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ int len, ret;
+
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
+ while (multipart) {
+ multipart = false;
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ goto done;
+ case NLMSG_DONE:
+ ret = 0;
+ goto done;
+ default:
+ break;
+ }
+ }
+ }
+ ret = 0;
+done:
+ free(iov.iov_base);
+ return ret;
+}
+
+int xsk_set_mtu(int ifindex, int mtu)
+{
+ struct nl_mtu_req req;
+ struct rtattr *rta;
+ int fd, ret;
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (fd < 0)
+ return fd;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.msg.ifi_family = AF_UNSPEC;
+ req.msg.ifi_index = ifindex;
+ rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+ rta->rta_type = IFLA_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
+ req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu));
+ memcpy(RTA_DATA(rta), &mtu, sizeof(mtu));
+
+ ret = send(fd, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0) {
+ close(fd);
+ return errno;
+ }
+
+ ret = netlink_recv(fd);
+ close(fd);
+ return ret;
+}
+
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
+{
+ int prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
+}
+
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
+{
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
+}
+
+void xsk_clear_xskmap(struct bpf_map *map)
+{
+ u32 index = 0;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+ bpf_map_delete_elem(map_fd, &index);
+}
+
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index)
+{
+ int map_fd, sock_fd;
+
+ map_fd = bpf_map__fd(map);
+ sock_fd = xsk_socket__fd(xsk);
+
+ return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
+}
+
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+ __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount)
+ return;
+
+ if (!unmap)
+ goto out_free;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err)
+ goto out_free;
+
+ munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+
+out_free:
+ list_del(&ctx->list);
+ free(ctx);
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, int ifindex,
+ __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
+{
+ bool unmap, rx_setup_done = false, tx_setup_done = false;
+ void *rx_map = NULL, *tx_map = NULL;
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ struct xsk_ctx *ctx;
+ int err;
+
+ if (!umem || !xsk_ptr || !(rx || tx))
+ return -EFAULT;
+
+ unmap = umem->fill_save != fill;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_xsk_alloc;
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ rx_setup_done = umem->rx_ring_setup_done;
+ tx_setup_done = umem->tx_ring_setup_done;
+ }
+
+ ctx = xsk_get_ctx(umem, ifindex, queue_id);
+ if (!ctx) {
+ if (!fill || !comp) {
+ err = -EFAULT;
+ goto out_socket;
+ }
+
+ ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
+ }
+ xsk->ctx = ctx;
+
+ if (rx && !rx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->rx_ring_setup_done = true;
+ }
+ if (tx && !tx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->tx_ring_setup_done = true;
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ if (rx) {
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
+ rx->ring = rx_map + off.rx.desc;
+ rx->cached_prod = *rx->producer;
+ rx->cached_cons = *rx->consumer;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
+ tx->ring = tx_map + off.tx.desc;
+ tx->cached_prod = *tx->producer;
+ /* cached_cons is r->size bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ tx->cached_cons = *tx->consumer + xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
+ if (umem->refcount > 1) {
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
+ sxdp.sxdp_shared_umem_fd = umem->fd;
+ } else {
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+ }
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(tx_map, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(rx_map, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ xsk_put_ctx(ctx, unmap);
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ if (!umem)
+ return -EFAULT;
+
+ return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err && umem->fill_save && umem->comp_save) {
+ munmap(umem->fill_save->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp_save->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ size_t desc_sz = sizeof(struct xdp_desc);
+ struct xdp_mmap_offsets off;
+ struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
+ int err;
+
+ if (!xsk)
+ return;
+
+ ctx = xsk->ctx;
+ umem = ctx->umem;
+
+ xsk_put_ctx(ctx, true);
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+ munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc + xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc + xsk->config.tx_size * desc_sz);
+ }
+ }
+
+ umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
new file mode 100644
index 000000000000..48729da142c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __XSK_H
+#define __XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+ __u32 cached_prod; \
+ __u32 cached_cons; \
+ __u32 mask; \
+ __u32 size; \
+ __u32 *producer; \
+ __u32 *consumer; \
+ void *ring; \
+ __u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx)
+{
+ __u64 *addrs = (__u64 *)fill->ring;
+
+ return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+ const __u64 *addrs = (const __u64 *)comp->ring;
+
+ return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx)
+{
+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+ return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+ return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+ return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+ __u32 free_entries = r->cached_cons - r->cached_prod;
+
+ if (free_entries >= nb)
+ return free_entries;
+
+ /* Refresh the local tail pointer.
+ * cached_cons is r->size bigger than the real consumer pointer so
+ * that this addition can be avoided in the more frequently
+ * executed code that computes free_entries in the beginning of
+ * this function. Without this optimization it would have been
+ * free_entries = r->cached_prod - r->cached_cons + r->size.
+ */
+ r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
+ r->cached_cons += r->size;
+
+ return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+ __u32 entries = r->cached_prod - r->cached_cons;
+
+ if (entries == 0) {
+ r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
+ entries = r->cached_prod - r->cached_cons;
+ }
+
+ return (entries > nb) ? nb : entries;
+}
+
+static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
+{
+ if (xsk_prod_nb_free(prod, nb) < nb)
+ return 0;
+
+ *idx = prod->cached_prod;
+ prod->cached_prod += nb;
+
+ return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
+{
+ /* Make sure everything has been written to the ring before indicating
+ * this to the kernel by writing the producer pointer.
+ */
+ __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
+}
+
+static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb)
+{
+ prod->cached_prod -= nb;
+}
+
+static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
+{
+ __u32 entries = xsk_cons_nb_avail(cons, nb);
+
+ if (entries > 0) {
+ *idx = cons->cached_cons;
+ cons->cached_cons += entries;
+ }
+
+ return entries;
+}
+
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
+{
+ cons->cached_cons -= nb;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
+{
+ /* Make sure data has been read before indicating we are done
+ * with the entries by updating the consumer pointer.
+ */
+ __atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+ return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+ return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+int xsk_umem__fd(const struct xsk_umem *umem);
+int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+ __u32 flags;
+ __u32 tx_metadata_len;
+};
+
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index);
+void xsk_clear_xskmap(struct bpf_map *map);
+bool xsk_is_in_mode(u32 ifindex, int mode);
+
+struct xsk_socket_config {
+ __u32 rx_size;
+ __u32 tx_size;
+ __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+int xsk_socket__create(struct xsk_socket **xsk,
+ int ifindex, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+int xsk_umem__delete(struct xsk_umem *umem);
+void xsk_socket__delete(struct xsk_socket *xsk);
+
+int xsk_set_mtu(int ifindex, int mtu);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __XSK_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 9d54c4645127..47c7b8064f38 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,21 +8,14 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
-SPECFILE=veth.spec
-XSKOBJ=xdpxceiver
-NUMPKTS=10000
+XSKOBJ=xskxceiver
validate_root_exec()
{
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- test_exit $ksft_fail 2
+ test_exit $ksft_fail
else
return $ksft_pass
fi
@@ -33,103 +26,68 @@ validate_veth_support()
msg="skip all tests:"
if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
echo $msg veth kernel support not available >&2
- test_exit $ksft_skip 1
+ test_exit $ksft_skip
else
ip link del $1
return $ksft_pass
fi
}
-validate_veth_spec_file()
-{
- if [ ! -f ${SPECFILE} ]; then
- test_exit $ksft_skip 1
- fi
-}
-
test_status()
{
statusval=$1
- if [ -n "${colorconsole+set}" ]; then
- if [ $statusval -eq 2 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
- fi
- else
- if [ $statusval -eq 2 ]; then
- echo -e "$2: [ FAIL ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "$2: [ SKIPPED ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "$2: [ PASS ]"
- fi
+ if [ $statusval -eq $ksft_fail ]; then
+ echo "$2: [ FAIL ]"
+ elif [ $statusval -eq $ksft_skip ]; then
+ echo "$2: [ SKIPPED ]"
+ elif [ $statusval -eq $ksft_pass ]; then
+ echo "$2: [ PASS ]"
fi
}
test_exit()
{
- retval=$1
- if [ $2 -ne 0 ]; then
- test_status $2 $(basename $0)
+ if [ $1 -ne 0 ]; then
+ test_status $1 $(basename $0)
fi
- exit $retval
+ exit 1
+}
+
+cleanup_iface()
+{
+ ip link set $1 mtu $2
+ ip link set $1 xdp off
+ ip link set $1 xdpgeneric off
}
clear_configs()
{
- if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
- [ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
- echo "removing ns $3"
- ip netns del $3
- fi
- #Once we delete a veth pair node, the entire veth pair is removed,
- #this is just to be cautious just incase the NS does not exist then
- #veth node inside NS won't get removed so we explicitly remove it
[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1"; ip link del $1; }
- if [ -f ${SPECFILE} ]; then
- echo "removing spec file:" ${SPECFILE}
- rm -f ${SPECFILE}
- fi
+ { ip link del $1; }
}
cleanup_exit()
{
- echo "cleaning up..."
- clear_configs $1 $2 $3
+ clear_configs $1 $2
}
validate_ip_utility()
{
- [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+ [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
}
-vethXDPgeneric()
+exec_xskxceiver()
{
- ip link set dev $1 xdpdrv off
- ip netns exec $3 ip link set dev $2 xdpdrv off
-}
-
-vethXDPnative()
-{
- ip link set dev $1 xdpgeneric off
- ip netns exec $3 ip link set dev $2 xdpgeneric off
-}
-
-execxdpxceiver()
-{
- local -a 'paramkeys=("${!'"$1"'[@]}")' copy
- paramkeysstr=${paramkeys[*]}
+ if [[ $busy_poll -eq 1 ]]; then
+ ARGS+="-b "
+ fi
- for index in $paramkeysstr;
- do
- current=$1"[$index]"
- copy[$index]=${!current}
- done
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS}
+ retval=$?
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+ if [[ $list -ne 1 ]]; then
+ test_status $retval "${TEST_NAME}"
+ statusList+=($retval)
+ nameList+=(${TEST_NAME})
+ fi
}
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
new file mode 100644
index 000000000000..45810ff552da
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef XSK_XDP_COMMON_H_
+#define XSK_XDP_COMMON_H_
+
+#define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
+
+struct xdp_info {
+ __u64 count;
+} __attribute__((aligned(32)));
+
+#endif /* XSK_XDP_COMMON_H_ */
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
new file mode 100644
index 000000000000..05b3cebc5ca9
--- /dev/null
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * For each mode, the following tests are run:
+ * a. nopoll - soft-irq processing in run-to-completion mode
+ * b. poll - using poll() syscall
+ * c. Socket Teardown
+ * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ * both sockets, then repeat multiple times. Only nopoll mode is used
+ * d. Bi-directional sockets
+ * Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ * completion rings on each socket, tx/rx in both directions. Only nopoll
+ * mode is used
+ * e. Statistics
+ * Trigger some error conditions and ensure that the appropriate statistics
+ * are incremented. Within this test, the following statistics are tested:
+ * i. rx dropped
+ * Increase the UMEM frame headroom to a value which results in
+ * insufficient space in the rx buffer for both the packet and the headroom.
+ * ii. tx invalid
+ * Set the 'len' field of tx descriptors to an invalid value (umem frame
+ * size + 1).
+ * iii. rx ring full
+ * Reduce the size of the RX ring to a fraction of the fill ring size.
+ * iv. fill queue empty
+ * Do not populate the fill queue and then try to receive pkts.
+ * f. bpf_link resource persistence
+ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ * then remove xsk sockets from queue 0 on both veth interfaces and
+ * finally run a traffic on queues ids 1
+ * g. unaligned mode
+ * h. tests for invalid and corner case Tx descriptors so that the correct ones
+ * are discarded and let through, respectively.
+ * i. 2K frame size tests
+ * j. If multi-buffer is supported, send 9k packets divided into 3 frames
+ * k. If multi-buffer and huge pages are supported, send 9k packets in a single frame
+ * using unaligned mode
+ * l. If multi-buffer is supported, try various nasty combinations of descriptors to
+ * check if they pass the validation or not
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface
+ * - Each thread creates one AF_XDP socket connected to a unique umem for each
+ * veth interface
+ * - Tx thread Transmits a number of packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all packets were received and delivered in-order,
+ * and have the right content
+ *
+ * Enable/disable packet dump mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <linux/ethtool.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+
+#include "prog_tests/test_xsk.h"
+#include "xsk_xdp_progs.skel.h"
+#include "xsk.h"
+#include "xskxceiver.h"
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include "kselftest.h"
+#include "xsk_xdp_common.h"
+
+#include <network_helpers.h>
+
+static bool opt_print_tests;
+static enum test_mode opt_mode = TEST_MODE_ALL;
+static u32 opt_run_test = RUN_ALL_TESTS;
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line,
+ error, strerror(error));
+ ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+static bool ifobj_zc_avail(struct ifobject *ifobject)
+{
+ size_t umem_sz = DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ struct xsk_socket_info *xsk;
+ struct xsk_umem_info *umem;
+ bool zc_avail = false;
+ void *bufs;
+ int ret;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!umem) {
+ munmap(bufs, umem_sz);
+ exit_with_error(ENOMEM);
+ }
+ umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz);
+ if (ret)
+ exit_with_error(-ret);
+
+ xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!xsk)
+ goto out;
+ ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
+ ifobject->rx_on = true;
+ xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ret = xsk_configure_socket(xsk, umem, ifobject, false);
+ if (!ret)
+ zc_avail = true;
+
+ xsk_socket__delete(xsk->xsk);
+ free(xsk);
+out:
+ munmap(umem->buffer, umem_sz);
+ xsk_umem__delete(umem->umem);
+ free(umem);
+ return zc_avail;
+}
+
+static struct option long_options[] = {
+ {"interface", required_argument, 0, 'i'},
+ {"busy-poll", no_argument, 0, 'b'},
+ {"verbose", no_argument, 0, 'v'},
+ {"mode", required_argument, 0, 'm'},
+ {"list", no_argument, 0, 'l'},
+ {"test", required_argument, 0, 't'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+};
+
+static void print_usage(char **argv)
+{
+ const char *str =
+ " Usage: xskxceiver [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -v, --verbose Verbose output\n"
+ " -b, --busy-poll Enable busy poll\n"
+ " -m, --mode Run only mode skb, drv, or zc\n"
+ " -l, --list List all available tests\n"
+ " -t, --test Run a specific test. Enter number from -l option.\n"
+ " -h, --help Display this help and exit\n";
+
+ ksft_print_msg(str, basename(argv[0]));
+ ksft_exit_xfail();
+}
+
+static bool validate_interface(struct ifobject *ifobj)
+{
+ if (!strcmp(ifobj->ifname, ""))
+ return false;
+ return true;
+}
+
+static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
+ char **argv)
+{
+ struct ifobject *ifobj;
+ u32 interface_nb = 0;
+ int option_index, c;
+
+ opterr = 0;
+
+ for (;;) {
+ c = getopt_long(argc, argv, "i:vbm:lt:", long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'i':
+ if (interface_nb == 0)
+ ifobj = ifobj_tx;
+ else if (interface_nb == 1)
+ ifobj = ifobj_rx;
+ else
+ break;
+
+ memcpy(ifobj->ifname, optarg,
+ min_t(size_t, MAX_INTERFACE_NAME_CHARS, strlen(optarg)));
+
+ ifobj->ifindex = if_nametoindex(ifobj->ifname);
+ if (!ifobj->ifindex)
+ exit_with_error(errno);
+
+ interface_nb++;
+ break;
+ case 'v':
+ opt_verbose = true;
+ break;
+ case 'b':
+ ifobj_tx->busy_poll = true;
+ ifobj_rx->busy_poll = true;
+ break;
+ case 'm':
+ if (!strncmp("skb", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_SKB;
+ else if (!strncmp("drv", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_DRV;
+ else if (!strncmp("zc", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_ZC;
+ else
+ print_usage(argv);
+ break;
+ case 'l':
+ opt_print_tests = true;
+ break;
+ case 't':
+ errno = 0;
+ opt_run_test = strtol(optarg, NULL, 0);
+ if (errno)
+ print_usage(argv);
+ break;
+ case 'h':
+ default:
+ print_usage(argv);
+ }
+ }
+}
+
+static void xsk_unload_xdp_programs(struct ifobject *ifobj)
+{
+ xsk_xdp_progs__destroy(ifobj->xdp_progs);
+}
+
+static void run_pkt_test(struct test_spec *test)
+{
+ int ret;
+
+ ret = test->test_func(test);
+
+ switch (ret) {
+ case TEST_PASS:
+ ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ case TEST_SKIP:
+ ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ case TEST_FAILURE:
+ ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ default:
+ ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n",
+ mode_string(test), busy_poll_string(test), test->name, ret);
+ }
+
+ pkt_stream_restore_default(test);
+}
+
+static bool is_xdp_supported(int ifindex)
+{
+ int flags = XDP_FLAGS_DRV_MODE;
+
+ LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = flags);
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, insn_cnt = ARRAY_SIZE(insns);
+ int err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+ if (prog_fd < 0)
+ return false;
+
+ err = bpf_xdp_attach(ifindex, prog_fd, flags, NULL);
+ if (err) {
+ close(prog_fd);
+ return false;
+ }
+
+ bpf_xdp_detach(ifindex, flags, NULL);
+ close(prog_fd);
+
+ return true;
+}
+
+static void print_tests(void)
+{
+ u32 i;
+
+ printf("Tests:\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ printf("%u: %s\n", i, tests[i].name);
+ for (i = ARRAY_SIZE(tests); i < ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); i++)
+ printf("%u: %s\n", i, ci_skip_tests[i - ARRAY_SIZE(tests)].name);
+}
+
+int main(int argc, char **argv)
+{
+ const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests);
+ struct pkt_stream *rx_pkt_stream_default;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct ifobject *ifobj_tx, *ifobj_rx;
+ u32 i, j, failed_tests = 0, nb_tests;
+ int modes = TEST_MODE_SKB + 1;
+ struct test_spec test;
+ bool shared_netdev;
+ int ret;
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ ifobj_tx = ifobject_create();
+ if (!ifobj_tx)
+ exit_with_error(ENOMEM);
+ ifobj_rx = ifobject_create();
+ if (!ifobj_rx)
+ exit_with_error(ENOMEM);
+
+ setlocale(LC_ALL, "");
+
+ parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
+
+ if (opt_print_tests) {
+ print_tests();
+ ksft_exit_xpass();
+ }
+ if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= total_tests) {
+ ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
+ ksft_exit_xfail();
+ }
+
+ shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex);
+ ifobj_tx->shared_umem = shared_netdev;
+ ifobj_rx->shared_umem = shared_netdev;
+
+ if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx))
+ print_usage(argv);
+
+ if (is_xdp_supported(ifobj_tx->ifindex)) {
+ modes++;
+ if (ifobj_zc_avail(ifobj_tx))
+ modes++;
+ }
+
+ ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+ if (!ret) {
+ ifobj_tx->hw_ring_size_supp = true;
+ ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+ ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+ }
+
+ if (init_iface(ifobj_rx, worker_testapp_validate_rx) ||
+ init_iface(ifobj_tx, worker_testapp_validate_tx)) {
+ ksft_print_msg("Error : can't initialize interfaces\n");
+ ksft_exit_xfail();
+ }
+
+ test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+ tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!tx_pkt_stream_default || !rx_pkt_stream_default)
+ exit_with_error(ENOMEM);
+ test.tx_pkt_stream_default = tx_pkt_stream_default;
+ test.rx_pkt_stream_default = rx_pkt_stream_default;
+
+ if (opt_run_test == RUN_ALL_TESTS)
+ nb_tests = total_tests;
+ else
+ nb_tests = 1;
+ if (opt_mode == TEST_MODE_ALL) {
+ ksft_set_plan(modes * nb_tests);
+ } else {
+ if (opt_mode == TEST_MODE_DRV && modes <= TEST_MODE_DRV) {
+ ksft_print_msg("Error: XDP_DRV mode not supported.\n");
+ ksft_exit_xfail();
+ }
+ if (opt_mode == TEST_MODE_ZC && modes <= TEST_MODE_ZC) {
+ ksft_print_msg("Error: zero-copy mode not supported.\n");
+ ksft_exit_xfail();
+ }
+
+ ksft_set_plan(nb_tests);
+ }
+
+ for (i = 0; i < modes; i++) {
+ if (opt_mode != TEST_MODE_ALL && i != opt_mode)
+ continue;
+
+ for (j = 0; j < total_tests; j++) {
+ if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
+ continue;
+
+ if (j < ARRAY_SIZE(tests))
+ test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ else
+ test_init(&test, ifobj_tx, ifobj_rx, i,
+ &ci_skip_tests[j - ARRAY_SIZE(tests)]);
+ run_pkt_test(&test);
+ usleep(USLEEP_MAX);
+
+ if (test.fail)
+ failed_tests++;
+ }
+ }
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ pkt_stream_delete(tx_pkt_stream_default);
+ pkt_stream_delete(rx_pkt_stream_default);
+ xsk_unload_xdp_programs(ifobj_tx);
+ xsk_unload_xdp_programs(ifobj_rx);
+ ifobject_delete(ifobj_tx);
+ ifobject_delete(ifobj_rx);
+
+ if (failed_tests)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
new file mode 100644
index 000000000000..3ca518df23ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XSKXCEIVER_H_
+#define XSKXCEIVER_H_
+
+#include <limits.h>
+
+#include "xsk_xdp_progs.skel.h"
+#include "xsk_xdp_common.h"
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define MAX_TEARDOWN_ITER 10
+#define MAX_ETH_JUMBO_SIZE 9000
+#define SOCK_RECONF_CTR 10
+#define RX_FULL_RXQSIZE 32
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define RUN_ALL_TESTS UINT_MAX
+#define NUM_MAC_ADDRESSES 4
+
+#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index 3266cc9293fe..1159d81890c2 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -18,7 +18,7 @@
#include <errno.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define COUNT_ISN_BPS 4
#define COUNT_WPS 4
@@ -284,9 +284,9 @@ static void check_success(const char *msg)
nr_tests++;
if (ret)
- ksft_test_result_pass(msg);
+ ksft_test_result_pass("%s", msg);
else
- ksft_test_result_fail(msg);
+ ksft_test_result_fail("%s", msg);
}
static void launch_instruction_breakpoints(char *buf, int local, int global)
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index ad41ea69001b..5fc0f37f3fd4 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -26,7 +26,7 @@
#include <errno.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static volatile uint8_t var[96] __attribute__((__aligned__(32)));
@@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg(
- "ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ "ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
return false;
}
@@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
}
alarm(0);
if (WIFEXITED(status)) {
- ksft_print_msg("child did not single-step\n");
+ ksft_print_msg("child exited prematurely\n");
return false;
}
if (!WIFSTOPPED(status)) {
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 2cf6f10ab7c4..ca2aaab9e4ca 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
void child(int cpu)
{
@@ -89,7 +89,7 @@ int run_test(int cpu)
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
- ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
return KSFT_FAIL;
}
if (WIFEXITED(status)) {
@@ -127,23 +127,42 @@ int run_test(int cpu)
return KSFT_PASS;
}
+/*
+ * Reads the suspend success count from sysfs.
+ * Returns the count on success or exits on failure.
+ */
+static int get_suspend_success_count_or_fail(void)
+{
+ FILE *fp;
+ int val;
+
+ fp = fopen("/sys/power/suspend_stats/success", "r");
+ if (!fp)
+ ksft_exit_fail_msg(
+ "Failed to open suspend_stats/success: %s\n",
+ strerror(errno));
+
+ if (fscanf(fp, "%d", &val) != 1) {
+ fclose(fp);
+ ksft_exit_fail_msg(
+ "Failed to read suspend success count\n");
+ }
+
+ fclose(fp);
+ return val;
+}
+
void suspend(void)
{
- int power_state_fd;
- struct sigevent event = {};
int timerfd;
int err;
+ int count_before;
+ int count_after;
struct itimerspec spec = {};
if (getuid() != 0)
ksft_exit_skip("Please run the test as root - Exiting.\n");
- power_state_fd = open("/sys/power/state", O_RDWR);
- if (power_state_fd < 0)
- ksft_exit_fail_msg(
- "open(\"/sys/power/state\") failed %s)\n",
- strerror(errno));
-
timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timerfd < 0)
ksft_exit_fail_msg("timerfd_create() failed\n");
@@ -153,11 +172,15 @@ void suspend(void)
if (err < 0)
ksft_exit_fail_msg("timerfd_settime() failed\n");
- if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
+ count_before = get_suspend_success_count_or_fail();
+
+ system("(echo mem > /sys/power/state) 2> /dev/null");
+
+ count_after = get_suspend_success_count_or_fail();
+ if (count_after <= count_before)
ksft_exit_fail_msg("Failed to enter Suspend state\n");
close(timerfd);
- close(power_state_fd);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore
new file mode 100644
index 000000000000..abbb13b6e96b
--- /dev/null
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_cachestat
+tmpshmcstat
diff --git a/tools/testing/selftests/cachestat/Makefile b/tools/testing/selftests/cachestat/Makefile
new file mode 100644
index 000000000000..778b54ebb036
--- /dev/null
+++ b/tools/testing/selftests/cachestat/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := test_cachestat
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall
+LDLIBS += -lrt
+
+include ../lib.mk
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
new file mode 100644
index 000000000000..542cd09cb443
--- /dev/null
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/magic.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "kselftest.h"
+
+#define NR_TESTS 9
+
+static const char * const dev_files[] = {
+ "/dev/zero", "/dev/null", "/dev/urandom",
+ "/proc/version", "/proc"
+};
+
+void print_cachestat(struct cachestat *cs)
+{
+ ksft_print_msg(
+ "Using cachestat: Cached: %llu, Dirty: %llu, Writeback: %llu, Evicted: %llu, Recently Evicted: %llu\n",
+ cs->nr_cache, cs->nr_dirty, cs->nr_writeback,
+ cs->nr_evicted, cs->nr_recently_evicted);
+}
+
+enum file_type {
+ FILE_MMAP,
+ FILE_SHMEM
+};
+
+bool write_exactly(int fd, size_t filesize)
+{
+ int random_fd = open("/dev/urandom", O_RDONLY);
+ char *cursor, *data;
+ int remained;
+ bool ret;
+
+ if (random_fd < 0) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = false;
+ goto out;
+ }
+
+ data = malloc(filesize);
+ if (!data) {
+ ksft_print_msg("Unable to allocate data.\n");
+ ret = false;
+ goto close_random_fd;
+ }
+
+ remained = filesize;
+ cursor = data;
+
+ while (remained) {
+ ssize_t read_len = read(random_fd, cursor, remained);
+
+ if (read_len <= 0) {
+ ksft_print_msg("Unable to read from urandom.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= read_len;
+ cursor += read_len;
+ }
+
+ /* write random data to fd */
+ remained = filesize;
+ cursor = data;
+ while (remained) {
+ ssize_t write_len = write(fd, cursor, remained);
+
+ if (write_len <= 0) {
+ ksft_print_msg("Unable write random data to file.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= write_len;
+ cursor += write_len;
+ }
+
+ ret = true;
+out_free_data:
+ free(data);
+close_random_fd:
+ close(random_fd);
+out:
+ return ret;
+}
+
+/*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+ struct statfs statfs_buf;
+
+ if (fstatfs(fd, &statfs_buf))
+ return false;
+
+ return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
+ * Open/create the file at filename, (optionally) write random data to it
+ * (exactly num_pages), then test the cachestat syscall on this file.
+ *
+ * If test_fsync == true, fsync the file, then check the number of dirty
+ * pages.
+ */
+static int test_cachestat(const char *filename, bool write_random, bool create,
+ bool test_fsync, unsigned long num_pages,
+ int open_flags, mode_t open_mode)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ int filesize = num_pages * PS;
+ int ret = KSFT_PASS;
+ long syscall_ret;
+ struct cachestat cs;
+ struct cachestat_range cs_range = { 0, filesize };
+
+ int fd = open(filename, open_flags, open_mode);
+
+ if (fd == -1) {
+ ksft_print_msg("Unable to create/open file.\n");
+ ret = KSFT_FAIL;
+ goto out;
+ } else {
+ ksft_print_msg("Create/open %s\n", filename);
+ }
+
+ if (write_random) {
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call returned %ld\n", syscall_ret);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+
+ } else {
+ print_cachestat(&cs);
+
+ if (write_random) {
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = KSFT_FAIL;
+ }
+ }
+ }
+
+ if (test_fsync) {
+ if (is_on_tmpfs(fd)) {
+ ret = KSFT_SKIP;
+ } else if (fsync(fd)) {
+ ksft_print_msg("fsync fails.\n");
+ ret = KSFT_FAIL;
+ } else {
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call (after fsync) returned %ld\n",
+ syscall_ret);
+
+ if (!syscall_ret) {
+ print_cachestat(&cs);
+
+ if (cs.nr_dirty) {
+ ret = KSFT_FAIL;
+ ksft_print_msg(
+ "Number of dirty should be zero after fsync.\n");
+ }
+ } else {
+ ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+ }
+
+out1:
+ close(fd);
+
+ if (create)
+ remove(filename);
+out:
+ return ret;
+}
+const char *file_type_str(enum file_type type)
+{
+ switch (type) {
+ case FILE_SHMEM:
+ return "shmem";
+ case FILE_MMAP:
+ return "mmap";
+ default:
+ return "unknown";
+ }
+}
+
+
+bool run_cachestat_test(enum file_type type)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */
+ int syscall_ret;
+ size_t compute_len = PS * 512;
+ struct cachestat_range cs_range = { PS, compute_len };
+ char *filename = "tmpshmcstat", *map;
+ struct cachestat cs;
+ bool ret = true;
+ int fd;
+ unsigned long num_pages = compute_len / PS;
+ if (type == FILE_SHMEM)
+ fd = shm_open(filename, O_CREAT | O_RDWR, 0600);
+ else
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666);
+
+ if (fd < 0) {
+ ksft_print_msg("Unable to create %s file.\n",
+ file_type_str(type));
+ ret = false;
+ goto out;
+ }
+
+ if (ftruncate(fd, filesize)) {
+ ksft_print_msg("Unable to truncate %s file.\n",file_type_str(type));
+ ret = false;
+ goto close_fd;
+ }
+ switch (type) {
+ case FILE_SHMEM:
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to write to file.\n");
+ ret = false;
+ goto close_fd;
+ }
+ break;
+ case FILE_MMAP:
+ map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ if (map == MAP_FAILED) {
+ ksft_print_msg("mmap failed.\n");
+ ret = false;
+ goto close_fd;
+ }
+ for (int i = 0; i < filesize; i++)
+ map[i] = 'A';
+ break;
+ default:
+ ksft_print_msg("Unsupported file type.\n");
+ ret = false;
+ goto close_fd;
+ }
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = false;
+ goto close_fd;
+ } else {
+ print_cachestat(&cs);
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = false;
+ }
+ }
+
+close_fd:
+ shm_unlink(filename);
+out:
+ return ret;
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+
+ ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ ksft_exit_skip("cachestat syscall not available\n");
+
+ ksft_set_plan(NR_TESTS);
+
+ if (ret == -1 && errno == EBADF) {
+ ksft_test_result_pass("bad file descriptor recognized\n");
+ ret = 0;
+ } else {
+ ksft_test_result_fail("bad file descriptor ignored\n");
+ ret = 1;
+ }
+
+ for (int i = 0; i < 5; i++) {
+ const char *dev_filename = dev_files[i];
+
+ if (test_cachestat(dev_filename, false, false, false,
+ 4, O_RDONLY, 0400) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with %s\n", dev_filename);
+ else {
+ ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
+ ret = 1;
+ }
+ }
+
+ if (test_cachestat("tmpfilecachestat", true, true,
+ false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with a normal file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with normal file\n");
+ ret = 1;
+ }
+
+ switch (test_cachestat("tmpfilecachestat", true, true,
+ true, 4, O_CREAT | O_RDWR, 0600)) {
+ case KSFT_FAIL:
+ ksft_test_result_fail("cachestat fsync fails with normal file\n");
+ ret = KSFT_FAIL;
+ break;
+ case KSFT_PASS:
+ ksft_test_result_pass("cachestat fsync works with a normal file\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+ break;
+ }
+
+ if (run_cachestat_test(FILE_SHMEM))
+ ksft_test_result_pass("cachestat works with a shmem file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with a shmem file\n");
+ ret = 1;
+ }
+
+ if (run_cachestat_test(FILE_MMAP))
+ ksft_test_result_pass("cachestat works with a mmap file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with a mmap file\n");
+ ret = 1;
+ }
+ return ret;
+}
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
index 6e9d98d457d5..411ac098308f 100644
--- a/tools/testing/selftests/capabilities/Makefile
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -2,7 +2,7 @@
TEST_GEN_FILES := validate_cap
TEST_GEN_PROGS := test_execve
-CFLAGS += -O2 -g -std=gnu99 -Wall
+CFLAGS += -O2 -g -std=gnu99 -Wall $(KHDR_INCLUDES)
LDLIBS += -lcap-ng -lrt -ldl
include ../lib.mk
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index df0ef02b4036..46fc8d46b6e6 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -18,15 +18,7 @@
#include <sys/prctl.h>
#include <sys/stat.h>
-#include "../kselftest.h"
-
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
+#include "kselftest.h"
static int nerrs;
static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
@@ -90,17 +82,13 @@ static bool create_and_enter_ns(uid_t inner_uid)
{
uid_t outer_uid;
gid_t outer_gid;
- int i;
+ int i, ret;
bool have_outer_privilege;
outer_uid = getuid();
outer_gid = getgid();
- /*
- * TODO: If we're already root, we could skip creating the userns.
- */
-
- if (unshare(CLONE_NEWNS) == 0) {
+ if (outer_uid == 0 && unshare(CLONE_NEWNS) == 0) {
ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
@@ -109,7 +97,10 @@ static bool create_and_enter_ns(uid_t inner_uid)
ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
// Re-enable effective caps
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
+
for (i = 0; i < CAP_LAST_CAP; i++)
if (capng_have_capability(CAPNG_PERMITTED, i))
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
@@ -219,6 +210,7 @@ static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
static int do_tests(int uid, const char *our_path)
{
+ int ret;
bool have_outer_privilege = create_and_enter_ns(uid);
int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
@@ -262,7 +254,9 @@ static int do_tests(int uid, const char *our_path)
ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
}
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
/* Make sure that i starts out clear */
capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index cdfc94268fe6..cef1d9937b9f 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -7,15 +7,7 @@
#include <sys/prctl.h>
#include <sys/auxv.h>
-#include "../kselftest.h"
-
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
+#include "kselftest.h"
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
# define HAVE_GETAUXVAL
@@ -36,6 +28,7 @@ static bool bool_arg(char **argv, int i)
int main(int argc, char **argv)
{
const char *atsec = "";
+ int ret;
/*
* Be careful just in case a setgid or setcapped copy of this
@@ -52,7 +45,11 @@ int main(int argc, char **argv)
atsec = " (AT_SECURE is not set)";
#endif
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1) {
+ ksft_print_msg("capng_get_caps_process failed\n");
+ return 1;
+ }
if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
ksft_print_msg("Wrong effective state%s\n", atsec);
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 84cfcabea838..952e4448bf07 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,5 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
-test_memcontrol
test_core
+test_cpu
+test_cpuset
test_freezer
-test_kmem \ No newline at end of file
+test_hugetlb_memcg
+test_kill
+test_kmem
+test_memcontrol
+test_pids
+test_zswap
+wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index f027d933595b..e01584c2189a 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -1,18 +1,35 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wall -pthread
-all:
+all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
-TEST_PROGS := test_stress.sh
-TEST_GEN_PROGS = test_memcontrol
-TEST_GEN_PROGS += test_kmem
-TEST_GEN_PROGS += test_core
+TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh
+TEST_GEN_FILES := wait_inotify
+# Keep the lists lexicographically sorted
+TEST_GEN_PROGS = test_core
+TEST_GEN_PROGS += test_cpu
+TEST_GEN_PROGS += test_cpuset
TEST_GEN_PROGS += test_freezer
+TEST_GEN_PROGS += test_hugetlb_memcg
+TEST_GEN_PROGS += test_kill
+TEST_GEN_PROGS += test_kmem
+TEST_GEN_PROGS += test_memcontrol
+TEST_GEN_PROGS += test_pids
+TEST_GEN_PROGS += test_zswap
+
+LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
+include lib/libcgroup.mk
-$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_core: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpu: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpuset: $(LIBCGROUP_O)
+$(OUTPUT)/test_freezer: $(LIBCGROUP_O)
+$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O)
+$(OUTPUT)/test_kill: $(LIBCGROUP_O)
+$(OUTPUT)/test_kmem: $(LIBCGROUP_O)
+$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O)
+$(OUTPUT)/test_pids: $(LIBCGROUP_O)
+$(OUTPUT)/test_zswap: $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
new file mode 100644
index 000000000000..39f979690dd3
--- /dev/null
+++ b/tools/testing/selftests/cgroup/config
@@ -0,0 +1,6 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_MEMCG=y
+CONFIG_PAGE_COUNTER=y
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 027014662fb2..44c52f620fda 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -5,54 +5,53 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
+#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/inotify.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "cgroup_util.h"
-#include "../clone3/clone3_selftests.h"
+#include "../../clone3/clone3_selftests.h"
-static ssize_t read_text(const char *path, char *buf, size_t max_len)
+bool cg_test_v1_named;
+
+/* Returns read len on success, or -errno on failure. */
+ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
int fd;
fd = open(path, O_RDONLY);
if (fd < 0)
- return fd;
+ return -errno;
len = read(fd, buf, max_len - 1);
- if (len < 0)
- goto out;
- buf[len] = 0;
-out:
+ if (len >= 0)
+ buf[len] = 0;
+
close(fd);
- return len;
+ return len < 0 ? -errno : len;
}
-static ssize_t write_text(const char *path, char *buf, ssize_t len)
+/* Returns written len on success, or -errno on failure. */
+ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
- return fd;
+ return -errno;
len = write(fd, buf, len);
- if (len < 0) {
- close(fd);
- return len;
- }
-
close(fd);
-
- return len;
+ return len < 0 ? -errno : len;
}
char *cg_name(const char *root, const char *name)
@@ -85,16 +84,16 @@ char *cg_control(const char *cgroup, const char *control)
return ret;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
+ ssize_t ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
- if (read_text(path, buf, len) >= 0)
- return 0;
-
- return -1;
+ ret = read_text(path, buf, len);
+ return ret >= 0 ? 0 : ret;
}
int cg_read_strcmp(const char *cgroup, const char *control,
@@ -144,6 +143,16 @@ long cg_read_long(const char *cgroup, const char *control)
return atol(buf);
}
+long cg_read_long_fd(int fd)
+{
+ char buf[128];
+
+ if (pread(fd, buf, sizeof(buf), 0) <= 0)
+ return -1;
+
+ return atol(buf);
+}
+
long cg_read_key_long(const char *cgroup, const char *control, const char *key)
{
char buf[PAGE_SIZE];
@@ -175,23 +184,46 @@ long cg_read_lc(const char *cgroup, const char *control)
return cnt;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
- ssize_t len = strlen(buf);
+ ssize_t len = strlen(buf), ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+ ret = write_text(path, buf, len);
+ return ret == len ? 0 : ret;
+}
- if (write_text(path, buf, len) == len)
- return 0;
+/*
+ * Returns fd on success, or -1 on failure.
+ * (fd should be closed with close() as usual)
+ */
+int cg_open(const char *cgroup, const char *control, int flags)
+{
+ char path[PATH_MAX];
- return -1;
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+ return open(path, flags);
+}
+
+int cg_write_numeric(const char *cgroup, const char *control, long value)
+{
+ char buf[64];
+ int ret;
+
+ ret = sprintf(buf, "%lu", value);
+ if (ret < 0)
+ return ret;
+
+ return cg_write(cgroup, control, buf);
}
-int cg_find_unified_root(char *root, size_t len)
+static int cg_find_root(char *root, size_t len, const char *controller,
+ bool *nsdelegate)
{
char buf[10 * PAGE_SIZE];
- char *fs, *mount, *type;
+ char *fs, *mount, *type, *options;
const char delim[] = "\n\t ";
if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
@@ -204,22 +236,43 @@ int cg_find_unified_root(char *root, size_t len)
for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
mount = strtok(NULL, delim);
type = strtok(NULL, delim);
+ options = strtok(NULL, delim);
strtok(NULL, delim);
strtok(NULL, delim);
- strtok(NULL, delim);
-
- if (strcmp(type, "cgroup2") == 0) {
- strncpy(root, mount, len);
- return 0;
+ if (strcmp(type, "cgroup") == 0) {
+ if (!controller || !strstr(options, controller))
+ continue;
+ } else if (strcmp(type, "cgroup2") == 0) {
+ if (controller &&
+ cg_read_strstr(mount, "cgroup.controllers", controller))
+ continue;
+ } else {
+ continue;
}
+ strncpy(root, mount, len);
+
+ if (nsdelegate)
+ *nsdelegate = !!strstr(options, "nsdelegate");
+ return 0;
+
}
return -1;
}
+int cg_find_controller_root(char *root, size_t len, const char *controller)
+{
+ return cg_find_root(root, len, controller, NULL);
+}
+
+int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+{
+ return cg_find_root(root, len, NULL, nsdelegate);
+}
+
int cg_create(const char *cgroup)
{
- return mkdir(cgroup, 0644);
+ return mkdir(cgroup, 0755);
}
int cg_wait_for_proc_count(const char *cgroup, int count)
@@ -252,6 +305,10 @@ int cg_killall(const char *cgroup)
char buf[PAGE_SIZE];
char *ptr = buf;
+ /* If cgroup.kill exists use it. */
+ if (!cg_write(cgroup, "cgroup.kill", "1"))
+ return 0;
+
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
return -1;
@@ -275,6 +332,8 @@ int cg_destroy(const char *cgroup)
{
int ret;
+ if (!cgroup)
+ return 0;
retry:
ret = rmdir(cgroup);
if (ret && errno == EBUSY) {
@@ -304,7 +363,7 @@ int cg_enter_current(const char *cgroup)
int cg_enter_current_thread(const char *cgroup)
{
- return cg_write(cgroup, "cgroup.threads", "0");
+ return cg_write(cgroup, CG_THREADS_FILE, "0");
}
int cg_run(const char *cgroup,
@@ -451,87 +510,34 @@ int cg_run_nowait(const char *cgroup,
return pid;
}
-int get_temp_fd(void)
+int proc_mount_contains(const char *option)
{
- return open(".", O_TMPFILE | O_RDWR | O_EXCL);
-}
-
-int alloc_pagecache(int fd, size_t size)
-{
- char buf[PAGE_SIZE];
- struct stat st;
- int i;
-
- if (fstat(fd, &st))
- goto cleanup;
+ char buf[4 * PAGE_SIZE];
+ ssize_t read;
- size += st.st_size;
+ read = read_text("/proc/mounts", buf, sizeof(buf));
+ if (read < 0)
+ return read;
- if (ftruncate(fd, size))
- goto cleanup;
-
- for (i = 0; i < size; i += sizeof(buf))
- read(fd, buf, sizeof(buf));
-
- return 0;
-
-cleanup:
- return -1;
+ return strstr(buf, option) != NULL;
}
-int alloc_anon(const char *cgroup, void *arg)
-{
- size_t size = (unsigned long)arg;
- char *buf, *ptr;
-
- buf = malloc(size);
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
-
- free(buf);
- return 0;
-}
-
-int is_swap_enabled(void)
+int cgroup_feature(const char *feature)
{
char buf[PAGE_SIZE];
- const char delim[] = "\n";
- int cnt = 0;
- char *line;
+ ssize_t read;
- if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
- return -1;
-
- for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
- cnt++;
+ read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
+ if (read < 0)
+ return read;
- return cnt > 1;
-}
-
-int set_oom_adj_score(int pid, int score)
-{
- char path[PATH_MAX];
- int fd, len;
-
- sprintf(path, "/proc/%d/oom_score_adj", pid);
-
- fd = open(path, O_WRONLY | O_APPEND);
- if (fd < 0)
- return fd;
-
- len = dprintf(fd, "%d", score);
- if (len < 0) {
- close(fd);
- return len;
- }
-
- close(fd);
- return 0;
+ return strstr(buf, feature) != NULL;
}
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
+ ssize_t ret;
if (!pid)
snprintf(path, sizeof(path), "/proc/%s/%s",
@@ -539,7 +545,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
else
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
- return read_text(path, buf, size);
+ ret = read_text(path, buf, size);
+ return ret < 0 ? -1 : ret;
}
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
@@ -576,3 +583,57 @@ int clone_into_cgroup_run_wait(const char *cgroup)
(void)clone_reap(pid, WEXITED);
return 0;
}
+
+static int __prepare_for_wait(const char *cgroup, const char *filename)
+{
+ int fd, ret = -1;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ return fd;
+
+ ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
+ if (ret == -1) {
+ close(fd);
+ fd = -1;
+ }
+
+ return fd;
+}
+
+int cg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "cgroup.events");
+}
+
+int memcg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "memory.events");
+}
+
+int cg_wait_for(int fd)
+{
+ int ret = -1;
+ struct pollfd fds = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+
+ while (true) {
+ ret = poll(&fds, 1, 10000);
+
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+
+ if (ret > 0 && fds.revents & POLLIN) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 5a1305dd1f0b..7ab2824ed7b5 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -2,21 +2,54 @@
#include <stdbool.h>
#include <stdlib.h>
+#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
#define MB(x) (x << 20)
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+
+#define TEST_UID 65534 /* usually nobody, any !root is fine */
+
+#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks")
+#define CG_NAMED_NAME "selftest"
+#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s"))
+
/*
* Checks if two given values differ by less than err% of their sum.
*/
static inline int values_close(long a, long b, int err)
{
- return abs(a - b) <= (a + b) / 100 * err;
+ return labs(a - b) <= (a + b) / 100 * err;
+}
+
+/*
+ * Checks if two given values differ by less than err% of their sum and assert
+ * with detailed debug info if not.
+ */
+static inline int values_close_report(long a, long b, int err)
+{
+ long diff = labs(a - b);
+ long limit = (a + b) / 100 * err;
+ double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0;
+ int close = diff <= limit;
+
+ if (!close)
+ fprintf(stderr,
+ "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | "
+ "tolerance=%d%% | actual_error=%.2f%%\n",
+ a, b, diff, limit, err, actual_err);
+
+ return close;
}
-extern int cg_find_unified_root(char *root, size_t len);
+extern ssize_t read_text(const char *path, char *buf, size_t max_len);
+extern ssize_t write_text(const char *path, char *buf, ssize_t len);
+
+extern int cg_find_controller_root(char *root, size_t len, const char *controller);
+extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
extern char *cg_control(const char *cgroup, const char *control);
@@ -29,9 +62,12 @@ extern int cg_read_strcmp(const char *cgroup, const char *control,
extern int cg_read_strstr(const char *cgroup, const char *control,
const char *needle);
extern long cg_read_long(const char *cgroup, const char *control);
+extern long cg_read_long_fd(int fd);
long cg_read_key_long(const char *cgroup, const char *control, const char *key);
extern long cg_read_lc(const char *cgroup, const char *control);
extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_open(const char *cgroup, const char *control, int flags);
+int cg_write_numeric(const char *cgroup, const char *control, long value);
extern int cg_run(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
@@ -41,16 +77,17 @@ extern int cg_enter_current_thread(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
-extern int get_temp_fd(void);
-extern int alloc_pagecache(int fd, size_t size);
-extern int alloc_anon(const char *cgroup, void *arg);
-extern int is_swap_enabled(void);
-extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
+int proc_mount_contains(const char *option);
+int cgroup_feature(const char *feature);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
+extern int cg_prepare_for_wait(const char *cgroup);
+extern int memcg_prepare_for_wait(const char *cgroup);
+extern int cg_wait_for(int fd);
+extern bool cg_test_v1_named;
diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk
new file mode 100644
index 000000000000..7a73007204c3
--- /dev/null
+++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk
@@ -0,0 +1,19 @@
+CGROUP_DIR := $(selfdir)/cgroup
+
+LIBCGROUP_C := lib/cgroup_util.c
+
+LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C))
+
+LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq)
+
+CFLAGS += -I$(CGROUP_DIR)/lib/include
+
+EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h
+
+$(LIBCGROUP_O_DIRS):
+ mkdir -p $@
+
+$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/memcg_protection.m b/tools/testing/selftests/cgroup/memcg_protection.m
new file mode 100644
index 000000000000..051daa3477b6
--- /dev/null
+++ b/tools/testing/selftests/cgroup/memcg_protection.m
@@ -0,0 +1,89 @@
+% SPDX-License-Identifier: GPL-2.0
+%
+% run as: octave-cli memcg_protection.m
+%
+% This script simulates reclaim protection behavior on a single level of memcg
+% hierarchy to illustrate how overcommitted protection spreads among siblings
+% (as it depends also on their current consumption).
+%
+% Simulation assumes siblings consumed the initial amount of memory (w/out
+% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated
+% same. It simulates only non-low reclaim and assumes all memory.min = 0.
+%
+% Input configurations
+% --------------------
+% E number parent effective protection
+% n vector nominal protection of siblings set at the given level (memory.low)
+% c vector current consumption -,,- (memory.current)
+
+% example from testcase (values in GB)
+E = 50 / 1024;
+n = [75 25 0 500 ] / 1024;
+c = [50 50 50 0] / 1024;
+
+% Reclaim parameters
+% ------------------
+
+% Minimal reclaim amount (GB)
+cluster = 32*4 / 2**20;
+
+% Reclaim coefficient (think as 0.5^sc->priority)
+alpha = .1
+
+% Simulation parameters
+% ---------------------
+epsilon = 1e-7;
+timeout = 1000;
+
+% Simulation loop
+% ---------------
+
+ch = [];
+eh = [];
+rh = [];
+
+for t = 1:timeout
+ % low_usage
+ u = min(c, n);
+ siblings = sum(u);
+
+ % effective_protection()
+ protected = min(n, c); % start with nominal
+ e = protected * min(1, E / siblings); % normalize overcommit
+
+ % recursive protection
+ unclaimed = max(0, E - siblings);
+ parent_overuse = sum(c) - siblings;
+ if (unclaimed > 0 && parent_overuse > 0)
+ overuse = max(0, c - protected);
+ e += unclaimed * (overuse / parent_overuse);
+ endif
+
+ % get_scan_count()
+ r = alpha * c; % assume all memory is in a single LRU list
+
+ % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection")
+ sz = max(e, c);
+ r .*= (1 - (e+epsilon) ./ (sz+epsilon));
+
+ % uncomment to debug prints
+ % e, c, r
+
+ % nothing to reclaim, reached equilibrium
+ if max(r) < epsilon
+ break;
+ endif
+
+ % SWAP_CLUSTER_MAX roundup
+ r = max(r, (r > epsilon) .* cluster);
+ % XXX here I do parallel reclaim of all siblings
+ % in reality reclaim is serialized and each sibling recalculates own residual
+ c = max(c - r, 0);
+
+ ch = [ch ; c];
+ eh = [eh ; e];
+ rh = [rh ; r];
+endfor
+
+t
+c, e
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 3df648c37876..102262555a59 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -1,20 +1,30 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
#include <linux/limits.h>
+#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
+static bool nsdelegate;
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0
+#endif
+
static int touch_anon(char *buf, size_t size)
{
int fd;
@@ -143,6 +153,9 @@ static int test_cgcore_populated(const char *root)
int cgroup_fd = -EBADF;
pid_t pid;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
@@ -272,6 +285,9 @@ static int test_cgcore_invalid_domain(const char *root)
int ret = KSFT_FAIL;
char *grandparent = NULL, *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
grandparent = cg_name(root, "cg_test_grandparent");
parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
@@ -334,6 +350,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -373,7 +392,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
- if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+ if (cg_test_v1_named ||
+ cg_read_strstr(root, "cgroup.controllers", "cpu") ||
cg_write(root, "cgroup.subtree_control", "+cpu")) {
ret = KSFT_SKIP;
goto cleanup;
@@ -425,6 +445,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -460,6 +483,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -501,6 +527,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -568,7 +597,7 @@ static int test_cgcore_proc_migration(const char *root)
}
cg_enter_current(dst);
- if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
+ if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1)
goto cleanup;
ret = KSFT_PASS;
@@ -600,7 +629,7 @@ static void *migrating_thread_fn(void *arg)
char lines[3][PATH_MAX];
for (g = 1; g < 3; ++g)
- snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
+ snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0]));
for (i = 0; i < n_iterations; ++i) {
cg_enter_current_thread(grps[(i % 2) + 1]);
@@ -637,10 +666,12 @@ static int test_cgcore_thread_migration(const char *root)
if (cg_create(grps[2]))
goto cleanup;
- if (cg_write(grps[1], "cgroup.type", "threaded"))
- goto cleanup;
- if (cg_write(grps[2], "cgroup.type", "threaded"))
- goto cleanup;
+ if (!cg_test_v1_named) {
+ if (cg_write(grps[1], "cgroup.type", "threaded"))
+ goto cleanup;
+ if (cg_write(grps[2], "cgroup.type", "threaded"))
+ goto cleanup;
+ }
if (cg_enter_current(grps[1]))
goto cleanup;
@@ -654,7 +685,7 @@ static int test_cgcore_thread_migration(const char *root)
if (retval)
goto cleanup;
- snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
+ snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0]));
if (proc_read_strstr(0, 1, "cgroup", line))
goto cleanup;
@@ -674,6 +705,201 @@ cleanup:
return ret;
}
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = TEST_UID;
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ if (!nsdelegate)
+ return KSFT_SKIP;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+static int setup_named_v1_root(char *root, size_t len, const char *name)
+{
+ char options[PATH_MAX];
+ int r;
+
+ r = snprintf(root, len, "/mnt/cg_selftest");
+ if (r < 0)
+ return r;
+
+ r = snprintf(options, sizeof(options), "none,name=%s", name);
+ if (r < 0)
+ return r;
+
+ r = mkdir(root, 0755);
+ if (r < 0 && errno != EEXIST)
+ return r;
+
+ r = mount("none", root, "cgroup", 0, options);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void cleanup_named_v1_root(char *root)
+{
+ if (!cg_test_v1_named)
+ return;
+ umount(root);
+ rmdir(root);
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -689,21 +915,30 @@ struct corecg_test {
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
T(test_cgcore_destroy),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
-
- if (cg_find_unified_root(root, sizeof(root)))
- ksft_exit_skip("cgroup v2 isn't mounted\n");
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
+ if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
+ ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
+ cg_test_v1_named = true;
+ goto post_v2_setup;
+ }
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+post_v2_setup:
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
@@ -713,11 +948,11 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ cleanup_named_v1_root(root);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
new file mode 100644
index 000000000000..c83f05438d7c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <sys/param.h>
+#include <sys/sysinfo.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+enum hog_clock_type {
+ // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
+ CPU_HOG_CLOCK_PROCESS,
+ // Count elapsed time using system wallclock time.
+ CPU_HOG_CLOCK_WALL,
+};
+
+struct cpu_hogger {
+ char *cgroup;
+ pid_t pid;
+ long usage;
+};
+
+struct cpu_hog_func_param {
+ int nprocs;
+ struct timespec ts;
+ enum hog_clock_type clock_type;
+};
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the cpu controller.
+ */
+static int test_cpucg_subtree_control(const char *root)
+{
+ char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
+ int ret = KSFT_FAIL;
+
+ // Create two nested cgroups with the cpu controller enabled.
+ parent = cg_name(root, "cpucg_test_0");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ child = cg_name(parent, "cpucg_test_child");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
+ goto cleanup;
+
+ // Create two nested cgroups without enabling the cpu controller.
+ parent2 = cg_name(root, "cpucg_test_1");
+ if (!parent2)
+ goto cleanup;
+
+ if (cg_create(parent2))
+ goto cleanup;
+
+ child2 = cg_name(parent2, "cpucg_test_child");
+ if (!child2)
+ goto cleanup;
+
+ if (cg_create(child2))
+ goto cleanup;
+
+ if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(child);
+ free(child);
+ cg_destroy(child2);
+ free(child2);
+ cg_destroy(parent);
+ free(parent);
+ cg_destroy(parent2);
+ free(parent2);
+
+ return ret;
+}
+
+static void *hog_cpu_thread_func(void *arg)
+{
+ while (1)
+ ;
+
+ return NULL;
+}
+
+static struct timespec
+timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
+{
+ struct timespec zero = {
+ .tv_sec = 0,
+ .tv_nsec = 0,
+ };
+ struct timespec ret;
+
+ if (lhs->tv_sec < rhs->tv_sec)
+ return zero;
+
+ ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
+
+ if (lhs->tv_nsec < rhs->tv_nsec) {
+ if (ret.tv_sec == 0)
+ return zero;
+
+ ret.tv_sec--;
+ ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
+ } else
+ ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
+
+ return ret;
+}
+
+static int hog_cpus_timed(const char *cgroup, void *arg)
+{
+ const struct cpu_hog_func_param *param =
+ (struct cpu_hog_func_param *)arg;
+ struct timespec ts_run = param->ts;
+ struct timespec ts_remaining = ts_run;
+ struct timespec ts_start;
+ int i, ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ if (ret != 0)
+ return ret;
+
+ for (i = 0; i < param->nprocs; i++) {
+ pthread_t tid;
+
+ ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
+ if (ret != 0)
+ return ret;
+ }
+
+ while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
+ struct timespec ts_total;
+
+ ret = nanosleep(&ts_remaining, NULL);
+ if (ret && errno != EINTR)
+ return ret;
+
+ if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
+ ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
+ if (ret != 0)
+ return ret;
+ } else {
+ struct timespec ts_current;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
+ if (ret != 0)
+ return ret;
+
+ ts_total = timespec_sub(&ts_current, &ts_start);
+ }
+
+ ts_remaining = timespec_sub(&ts_run, &ts_total);
+ }
+
+ return 0;
+}
+
+/*
+ * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
+ * cpu.stat shows the expected output.
+ */
+static int test_cpucg_stats(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long usage_usec, user_usec, system_usec;
+ long usage_seconds = 2;
+ long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ char *cpucg;
+
+ cpucg = cg_name(root, "cpucg_test");
+ if (!cpucg)
+ goto cleanup;
+
+ if (cg_create(cpucg))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
+ if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_PROCESS,
+ };
+ if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ if (user_usec <= 0)
+ goto cleanup;
+
+ if (!values_close_report(usage_usec, expected_usage_usec, 1))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(cpucg);
+ free(cpucg);
+
+ return ret;
+}
+
+/*
+ * Creates a nice process that consumes CPU and checks that the elapsed
+ * usertime in the cgroup is close to the expected time.
+ */
+static int test_cpucg_nice(const char *root)
+{
+ int ret = KSFT_FAIL;
+ int status;
+ long user_usec, nice_usec;
+ long usage_seconds = 2;
+ long expected_nice_usec = usage_seconds * USEC_PER_SEC;
+ char *cpucg;
+ pid_t pid;
+
+ cpucg = cg_name(root, "cpucg_test");
+ if (!cpucg)
+ goto cleanup;
+
+ if (cg_create(cpucg))
+ goto cleanup;
+
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
+ if (nice_usec == -1)
+ ret = KSFT_SKIP;
+ if (user_usec != 0 || nice_usec != 0)
+ goto cleanup;
+
+ /*
+ * We fork here to create a new process that can be niced without
+ * polluting the nice value of other selftests
+ */
+ pid = fork();
+ if (pid < 0) {
+ goto cleanup;
+ } else if (pid == 0) {
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_PROCESS,
+ };
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cpucg, "cgroup.procs", buf))
+ goto cleanup;
+
+ /* Try to keep niced CPU usage as constrained to hog_cpu as possible */
+ nice(1);
+ hog_cpus_timed(cpucg, &param);
+ exit(0);
+ } else {
+ waitpid(pid, &status, 0);
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
+ if (!values_close_report(nice_usec, expected_nice_usec, 1))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+ }
+
+cleanup:
+ cg_destroy(cpucg);
+ free(cpucg);
+
+ return ret;
+}
+
+static int
+run_cpucg_weight_test(
+ const char *root,
+ pid_t (*spawn_child)(const struct cpu_hogger *child),
+ int (*validate)(const struct cpu_hogger *children, int num_children))
+{
+ int ret = KSFT_FAIL, i;
+ char *parent = NULL;
+ struct cpu_hogger children[3] = {};
+
+ parent = cg_name(root, "cpucg_test_0");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
+ if (!children[i].cgroup)
+ goto cleanup;
+
+ if (cg_create(children[i].cgroup))
+ goto cleanup;
+
+ if (cg_write_numeric(children[i].cgroup, "cpu.weight",
+ 50 * (i + 1)))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ pid_t pid = spawn_child(&children[i]);
+ if (pid <= 0)
+ goto cleanup;
+ children[i].pid = pid;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int retcode;
+
+ waitpid(children[i].pid, &retcode, 0);
+ if (!WIFEXITED(retcode))
+ goto cleanup;
+ if (WEXITSTATUS(retcode))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ children[i].usage = cg_read_key_long(children[i].cgroup,
+ "cpu.stat", "usage_usec");
+
+ if (validate(children, ARRAY_SIZE(children)))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ cg_destroy(children[i].cgroup);
+ free(children[i].cgroup);
+ }
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
+{
+ long usage_seconds = 10;
+ struct cpu_hog_func_param param = {
+ .nprocs = ncpus,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
+}
+
+static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
+{
+ return weight_hog_ncpus(child, get_nprocs());
+}
+
+static int
+overprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+ int ret = KSFT_FAIL, i;
+
+ for (i = 0; i < num_children - 1; i++) {
+ long delta;
+
+ if (children[i + 1].usage <= children[i].usage)
+ goto cleanup;
+
+ delta = children[i + 1].usage - children[i].usage;
+ if (!values_close_report(delta, children[0].usage, 35))
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 50
+ * A/C cpu.weight = 100
+ * A/D cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns as
+ * many threads as there are cores, and hogs each CPU as much as possible
+ * for some time interval.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * was given proportional runtime as informed by their cpu.weight.
+ */
+static int test_cpucg_weight_overprovisioned(const char *root)
+{
+ return run_cpucg_weight_test(root, weight_hog_all_cpus,
+ overprovision_validate);
+}
+
+static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
+{
+ return weight_hog_ncpus(child, 1);
+}
+
+static int
+underprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+ int ret = KSFT_FAIL, i;
+
+ for (i = 0; i < num_children - 1; i++) {
+ if (!values_close_report(children[i + 1].usage, children[0].usage, 15))
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 50
+ * A/C cpu.weight = 100
+ * A/D cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns a
+ * single thread that hogs a CPU. The testcase is only run on systems that
+ * have at least one core per-thread in the child processes.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * had roughly the same runtime despite having different cpu.weight.
+ */
+static int test_cpucg_weight_underprovisioned(const char *root)
+{
+ // Only run the test if there are enough cores to avoid overprovisioning
+ // the system.
+ if (get_nprocs() < 4)
+ return KSFT_SKIP;
+
+ return run_cpucg_weight_test(root, weight_hog_one_cpu,
+ underprovision_validate);
+}
+
+static int
+run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
+{
+ int ret = KSFT_FAIL, i;
+ char *parent = NULL, *child = NULL;
+ struct cpu_hogger leaf[3] = {};
+ long nested_leaf_usage, child_usage;
+ int nprocs = get_nprocs();
+
+ if (!overprovisioned) {
+ if (nprocs < 4)
+ /*
+ * Only run the test if there are enough cores to avoid overprovisioning
+ * the system.
+ */
+ return KSFT_SKIP;
+ nprocs /= 4;
+ }
+
+ parent = cg_name(root, "cpucg_test");
+ child = cg_name(parent, "cpucg_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+ if (cg_write(child, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+ if (cg_write(child, "cpu.weight", "1000"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ const char *ancestor;
+ long weight;
+
+ if (i == 0) {
+ ancestor = parent;
+ weight = 1000;
+ } else {
+ ancestor = child;
+ weight = 5000;
+ }
+ leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
+ if (!leaf[i].cgroup)
+ goto cleanup;
+
+ if (cg_create(leaf[i].cgroup))
+ goto cleanup;
+
+ if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ pid_t pid;
+ struct cpu_hog_func_param param = {
+ .nprocs = nprocs,
+ .ts = {
+ .tv_sec = 10,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+
+ pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
+ (void *)&param);
+ if (pid <= 0)
+ goto cleanup;
+ leaf[i].pid = pid;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ int retcode;
+
+ waitpid(leaf[i].pid, &retcode, 0);
+ if (!WIFEXITED(retcode))
+ goto cleanup;
+ if (WEXITSTATUS(retcode))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
+ "cpu.stat", "usage_usec");
+ if (leaf[i].usage <= 0)
+ goto cleanup;
+ }
+
+ nested_leaf_usage = leaf[1].usage + leaf[2].usage;
+ if (overprovisioned) {
+ if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15))
+ goto cleanup;
+ } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15))
+ goto cleanup;
+
+
+ child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
+ if (child_usage <= 0)
+ goto cleanup;
+ if (!values_close_report(child_usage, nested_leaf_usage, 1))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ cg_destroy(leaf[i].cgroup);
+ free(leaf[i].cgroup);
+ }
+ cg_destroy(child);
+ free(child);
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 1000
+ * A/C cpu.weight = 1000
+ * A/C/D cpu.weight = 5000
+ * A/C/E cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which spawn nproc threads
+ * that burn a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_overprovisioned(const char *root)
+{
+ return run_cpucg_nested_weight_test(root, true);
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 1000
+ * A/C cpu.weight = 1000
+ * A/C/D cpu.weight = 5000
+ * A/C/E cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which nproc / 4 threads
+ * that burns a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_underprovisioned(const char *root)
+{
+ return run_cpucg_nested_weight_test(root, false);
+}
+
+/*
+ * This test creates a cgroup with some maximum value within a period, and
+ * verifies that a process in the cgroup is not overscheduled.
+ */
+static int test_cpucg_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
+ char *cpucg;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
+
+ cpucg = cg_name(root, "cpucg_test");
+ if (!cpucg)
+ goto cleanup;
+
+ if (cg_create(cpucg))
+ goto cleanup;
+
+ if (cg_write(cpucg, "cpu.max", quota_buf))
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = duration_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ if (usage_usec <= 0)
+ goto cleanup;
+
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
+
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(cpucg);
+ free(cpucg);
+
+ return ret;
+}
+
+/*
+ * This test verifies that a process inside of a nested cgroup whose parent
+ * group has a cpu.max value set, is properly throttled.
+ */
+static int test_cpucg_max_nested(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
+ char *parent, *child;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
+
+ parent = cg_name(root, "cpucg_parent");
+ child = cg_name(parent, "cpucg_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cpu.max", quota_buf))
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = duration_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ if (cg_run(child, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
+ if (usage_usec <= 0)
+ goto cleanup;
+
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
+
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(child);
+ free(child);
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cpucg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cpucg_subtree_control),
+ T(test_cpucg_stats),
+ T(test_cpucg_nice),
+ T(test_cpucg_weight_overprovisioned),
+ T(test_cpucg_weight_underprovisioned),
+ T(test_cpucg_nested_weight_overprovisioned),
+ T(test_cpucg_nested_weight_underprovisioned),
+ T(test_cpucg_max),
+ T(test_cpucg_max_nested),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
+ if (cg_write(root, "cgroup.subtree_control", "+cpu"))
+ ksft_exit_skip("Failed to set cpu controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
new file mode 100644
index 000000000000..c5cf8b56ceb8
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/limits.h>
+#include <signal.h>
+
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+static int idle_process_fn(const char *cgroup, void *arg)
+{
+ (void)pause();
+ return 0;
+}
+
+static int do_migration_fn(const char *cgroup, void *arg)
+{
+ int object_pid = (int)(size_t)arg;
+
+ if (setuid(TEST_UID))
+ return EXIT_FAILURE;
+
+ // XXX checking /proc/$pid/cgroup would be quicker than wait
+ if (cg_enter(cgroup, object_pid) ||
+ cg_wait_for_proc_count(cgroup, 1))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+static int do_controller_fn(const char *cgroup, void *arg)
+{
+ const char *child = cgroup;
+ const char *parent = arg;
+
+ if (setuid(TEST_UID))
+ return EXIT_FAILURE;
+
+ if (!cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_write(parent, "cgroup.subtree_control", "-cpuset"))
+ return EXIT_FAILURE;
+
+ if (!cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Migrate a process between two sibling cgroups.
+ * The success should only depend on the parent cgroup permissions and not the
+ * migrated process itself (cpuset controller is in place because it uses
+ * security_task_setscheduler() in cgroup v1).
+ *
+ * Deliberately don't set cpuset.cpus in children to avoid definining migration
+ * permissions between two different cpusets.
+ */
+static int test_cpuset_perms_object(const char *root, bool allow)
+{
+ char *parent = NULL, *child_src = NULL, *child_dst = NULL;
+ char *parent_procs = NULL, *child_src_procs = NULL, *child_dst_procs = NULL;
+ const uid_t test_euid = TEST_UID;
+ int object_pid = 0;
+ int ret = KSFT_FAIL;
+
+ parent = cg_name(root, "cpuset_test_0");
+ if (!parent)
+ goto cleanup;
+ parent_procs = cg_name(parent, "cgroup.procs");
+ if (!parent_procs)
+ goto cleanup;
+ if (cg_create(parent))
+ goto cleanup;
+
+ child_src = cg_name(parent, "cpuset_test_1");
+ if (!child_src)
+ goto cleanup;
+ child_src_procs = cg_name(child_src, "cgroup.procs");
+ if (!child_src_procs)
+ goto cleanup;
+ if (cg_create(child_src))
+ goto cleanup;
+
+ child_dst = cg_name(parent, "cpuset_test_2");
+ if (!child_dst)
+ goto cleanup;
+ child_dst_procs = cg_name(child_dst, "cgroup.procs");
+ if (!child_dst_procs)
+ goto cleanup;
+ if (cg_create(child_dst))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpuset"))
+ goto cleanup;
+
+ if (cg_read_strstr(child_src, "cgroup.controllers", "cpuset") ||
+ cg_read_strstr(child_dst, "cgroup.controllers", "cpuset"))
+ goto cleanup;
+
+ /* Enable permissions along src->dst tree path */
+ if (chown(child_src_procs, test_euid, -1) ||
+ chown(child_dst_procs, test_euid, -1))
+ goto cleanup;
+
+ if (allow && chown(parent_procs, test_euid, -1))
+ goto cleanup;
+
+ /* Fork a privileged child as a test object */
+ object_pid = cg_run_nowait(child_src, idle_process_fn, NULL);
+ if (object_pid < 0)
+ goto cleanup;
+
+ /* Carry out migration in a child process that can drop all privileges
+ * (including capabilities), the main process must remain privileged for
+ * cleanup.
+ * Child process's cgroup is irrelevant but we place it into child_dst
+ * as hacky way to pass information about migration target to the child.
+ */
+ if (allow ^ (cg_run(child_dst, do_migration_fn, (void *)(size_t)object_pid) == EXIT_SUCCESS))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (object_pid > 0) {
+ (void)kill(object_pid, SIGTERM);
+ (void)clone_reap(object_pid, WEXITED);
+ }
+
+ cg_destroy(child_dst);
+ free(child_dst_procs);
+ free(child_dst);
+
+ cg_destroy(child_src);
+ free(child_src_procs);
+ free(child_src);
+
+ cg_destroy(parent);
+ free(parent_procs);
+ free(parent);
+
+ return ret;
+}
+
+static int test_cpuset_perms_object_allow(const char *root)
+{
+ return test_cpuset_perms_object(root, true);
+}
+
+static int test_cpuset_perms_object_deny(const char *root)
+{
+ return test_cpuset_perms_object(root, false);
+}
+
+/*
+ * Migrate a process between parent and child implicitely
+ * Implicit migration happens when a controller is enabled/disabled.
+ *
+ */
+static int test_cpuset_perms_subtree(const char *root)
+{
+ char *parent = NULL, *child = NULL;
+ char *parent_procs = NULL, *parent_subctl = NULL, *child_procs = NULL;
+ const uid_t test_euid = TEST_UID;
+ int object_pid = 0;
+ int ret = KSFT_FAIL;
+
+ parent = cg_name(root, "cpuset_test_0");
+ if (!parent)
+ goto cleanup;
+ parent_procs = cg_name(parent, "cgroup.procs");
+ if (!parent_procs)
+ goto cleanup;
+ parent_subctl = cg_name(parent, "cgroup.subtree_control");
+ if (!parent_subctl)
+ goto cleanup;
+ if (cg_create(parent))
+ goto cleanup;
+
+ child = cg_name(parent, "cpuset_test_1");
+ if (!child)
+ goto cleanup;
+ child_procs = cg_name(child, "cgroup.procs");
+ if (!child_procs)
+ goto cleanup;
+ if (cg_create(child))
+ goto cleanup;
+
+ /* Enable permissions as in a delegated subtree */
+ if (chown(parent_procs, test_euid, -1) ||
+ chown(parent_subctl, test_euid, -1) ||
+ chown(child_procs, test_euid, -1))
+ goto cleanup;
+
+ /* Put a privileged child in the subtree and modify controller state
+ * from an unprivileged process, the main process remains privileged
+ * for cleanup.
+ * The unprivileged child runs in subtree too to avoid parent and
+ * internal-node constraing violation.
+ */
+ object_pid = cg_run_nowait(child, idle_process_fn, NULL);
+ if (object_pid < 0)
+ goto cleanup;
+
+ if (cg_run(child, do_controller_fn, parent) != EXIT_SUCCESS)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (object_pid > 0) {
+ (void)kill(object_pid, SIGTERM);
+ (void)clone_reap(object_pid, WEXITED);
+ }
+
+ cg_destroy(child);
+ free(child_procs);
+ free(child);
+
+ cg_destroy(parent);
+ free(parent_subctl);
+ free(parent_procs);
+ free(parent);
+
+ return ret;
+}
+
+
+#define T(x) { x, #x }
+struct cpuset_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cpuset_perms_object_allow),
+ T(test_cpuset_perms_object_deny),
+ T(test_cpuset_perms_subtree),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset"))
+ if (cg_write(root, "cgroup.subtree_control", "+cpuset"))
+ ksft_exit_skip("Failed to set cpuset controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
new file mode 100755
index 000000000000..a17256d9f88a
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -0,0 +1,1193 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for cpuset v2 partition root state (PRS)
+#
+# The sched verbose flag can be optionally set so that the console log
+# can be examined for the correct setting of scheduling domain.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+
+# Get wait_inotify location
+WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
+
+# Find cgroup v2 mount point
+CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
+SUBPARTS_CPUS=$CGROUP2/.__DEBUG__.cpuset.cpus.subpartitions
+CPULIST=$(cat $CGROUP2/cpuset.cpus.effective)
+
+NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
+[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+
+# Check to see if /dev/console exists and is writable
+if [[ -c /dev/console && -w /dev/console ]]
+then
+ CONSOLE=/dev/console
+else
+ CONSOLE=/dev/null
+fi
+
+# Set verbose flag and delay factor
+PROG=$1
+VERBOSE=0
+DELAY_FACTOR=1
+SCHED_DEBUG=
+while [[ "$1" = -* ]]
+do
+ case "$1" in
+ -v) ((VERBOSE++))
+ # Enable sched/verbose can slow thing down
+ [[ $DELAY_FACTOR -eq 1 ]] &&
+ DELAY_FACTOR=2
+ ;;
+ -d) DELAY_FACTOR=$2
+ shift
+ ;;
+ *) echo "Usage: $PROG [-v] [-d <delay-factor>"
+ exit
+ ;;
+ esac
+ shift
+done
+
+# Set sched verbose flag if available when "-v" option is specified
+if [[ $VERBOSE -gt 0 && -d /sys/kernel/debug/sched ]]
+then
+ # Used to restore the original setting during cleanup
+ SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
+ echo Y > /sys/kernel/debug/sched/verbose
+fi
+
+cd $CGROUP2
+echo +cpuset > cgroup.subtree_control
+
+#
+# If cpuset has been set up and used in child cgroups, we may not be able to
+# create partition under root cgroup because of the CPU exclusivity rule.
+# So we are going to skip the test if this is the case.
+#
+[[ -d test ]] || mkdir test
+echo 0-6 > test/cpuset.cpus
+echo root > test/cpuset.cpus.partition
+cat test/cpuset.cpus.partition | grep -q invalid
+RESULT=$?
+echo member > test/cpuset.cpus.partition
+echo "" > test/cpuset.cpus
+[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!"
+
+#
+# If isolated CPUs have been reserved at boot time (as shown in
+# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
+# that will be used by this script for testing purpose. If not, some of
+# the tests may fail incorrectly. Wait a bit and retry again just in case
+# these isolated CPUs are leftover from previous run and have just been
+# cleaned up earlier in this script.
+#
+# These pre-isolated CPUs should stay in an isolated state throughout the
+# testing process for now.
+#
+BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+[[ -n "$BOOT_ISOLCPUS" ]] && {
+ sleep 0.5
+ BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+}
+if [[ -n "$BOOT_ISOLCPUS" ]]
+then
+ [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
+ skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
+ echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
+fi
+
+cleanup()
+{
+ online_cpus
+ cd $CGROUP2
+ rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1
+ rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \
+ rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1
+ [[ -n "$SCHED_DEBUG" ]] &&
+ echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
+}
+
+# Pause in ms
+pause()
+{
+ DELAY=$1
+ LOOP=0
+ while [[ $LOOP -lt $DELAY_FACTOR ]]
+ do
+ sleep $DELAY
+ ((LOOP++))
+ done
+ return 0
+}
+
+console_msg()
+{
+ MSG=$1
+ echo "$MSG"
+ echo "" > $CONSOLE
+ echo "$MSG" > $CONSOLE
+ pause 0.01
+}
+
+test_partition()
+{
+ EXPECTED_VAL=$1
+ echo $EXPECTED_VAL > cpuset.cpus.partition
+ [[ $? -eq 0 ]] || exit 1
+ ACTUAL_VAL=$(cat cpuset.cpus.partition)
+ [[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
+ echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $ACTUAL_VAL"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+test_effective_cpus()
+{
+ EXPECTED_VAL=$1
+ ACTUAL_VAL=$(cat cpuset.cpus.effective)
+ [[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
+ echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$ACTUAL_VAL'"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+# Adding current process to cgroup.procs as a test
+test_add_proc()
+{
+ OUTSTR="$1"
+ ERRMSG=$((echo $$ > cgroup.procs) |& cat)
+ echo $ERRMSG | grep -q "$OUTSTR"
+ [[ $? -ne 0 ]] && {
+ echo "cgroup.procs: expect '$OUTSTR', got '$ERRMSG'"
+ echo "Test FAILED"
+ exit 1
+ }
+ echo $$ > $CGROUP2/cgroup.procs # Move out the task
+}
+
+#
+# Cpuset controller state transition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root
+# |
+# +------+------+
+# | |
+# A1 B1
+# |
+# A2
+# |
+# A3
+#
+# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
+# C<l> = add cpu-list to cpuset.cpus
+# X<l> = add cpu-list to cpuset.cpus.exclusive
+# S<p> = use prefix in subtree_control
+# T = put a task into cgroup
+# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive
+# O<c>=<v> = Write <v> to CPU online file of <c>
+#
+# ECPUs - effective CPUs of cpusets
+# Pstate - partition root state
+# ISOLCPUS - isolated CPUs (<icpus>[,<icpus2>])
+#
+# Note that if there are 2 fields in ISOLCPUS, the first one is for
+# sched-debug matching which includes offline CPUs and single-CPU partitions
+# while the second one is for matching cpuset.cpus.isolated.
+#
+SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
+TEST_MATRIX=(
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
+ " C0-1 . . C2-3 P1 . . . 0 "
+ " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
+ " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
+ " C0-1:S+ . . C2-3 . . . P1 0 "
+ " C0-1:P1 . . C2-3 S+ C1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
+ " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
+ " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
+ " C0-1 . . C2-3:P1 . . . C2 0 "
+ " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+
+ # CPU offlining cases:
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ #
+ # Remote partition and cpuset.cpus.exclusive tests
+ #
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
+
+ # Nested remote/local partition tests
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P0|B1:P1"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
+ A1:P0|A2:P2|A3:P1 2-4|2-3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
+ A1:P0|A2:P2|A3:P1 2"
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \
+ A1:P0|A2:P-2|A3:P-1 ."
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \
+ A1:P0|A2:P2|A3:P-1 2-4"
+
+ # Remote partition offline tests
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
+
+ # An invalidated remote partition cannot self-recover from hotplug
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
+
+ # cpus.exclusive.effective clearing test
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
+
+ # Invalid to valid remote partition transition test
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
+ " C0-3:S+ C1-3:X3:P2
+ . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
+
+ # Invalid to valid local partition direct transition tests
+ " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
+
+ # Local partition invalidation tests
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
+ # Local partition CPU change tests
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
+
+ # cpus_allowed/exclusive_cpus update tests
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \
+ A1:P0|A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \
+ A1:P0|A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \
+ A1:P0|A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \
+ A1:P0|A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \
+ A1:P0|A3:P-2"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ #
+ # Incorrect change to cpuset.cpus[.exclusive] invalidates partition root
+ #
+ # Adding CPUs to partition root that are not in parent's
+ # cpuset.cpus is allowed, but those extra CPUs are ignored.
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
+
+ # Taking away all CPUs from parent or itself if there are tasks
+ # will make the partition invalid.
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+
+ # Changing a partition root to member makes child partitions invalid
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1"
+
+ # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
+ # as they overlap.
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+
+ # Deletion of CPUs distributed to child cgroup is allowed.
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
+
+ # To become a valid partition root, cpuset.cpus must overlap parent's
+ # cpuset.cpus.
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
+
+ # Enabling partition with child cpusets is allowed
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
+
+ # A partition root with non-partition root parent is invalid| but it
+ # can be made valid if its parent becomes a partition root too.
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
+
+ # A non-exclusive cpuset.cpus change will invalidate partition and its siblings
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1"
+
+ # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
+ " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5"
+
+ # Child partition root that try to take all CPUs from parent partition
+ # with tasks will remain invalid.
+ " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
+ " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+
+ # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
+ # affect cpuset.cpus.exclusive.effective.
+ " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3"
+
+ # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive
+ # but creating a local partition out of it is not allowed. Similarly and change
+ # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will
+ # invalidate it.
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
+ A1:P0|A2:P2:B1:P1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ # Failure cases:
+
+ # A task cannot be added to a partition with no cpu
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
+
+ # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
+
+ # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
+ " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5"
+)
+
+#
+# Cpuset controller remote partition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root
+# |
+# rtest (cpuset.cpus.exclusive=1-7)
+# |
+# +------+------+
+# | |
+# p1 p2
+# +--+--+ +--+--+
+# | | | |
+# c11 c12 c21 c22
+#
+# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX.
+# Only CPUs 1-7 should be used.
+#
+REMOTE_TEST_MATRIX=(
+ # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22
+ # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS
+ # ------ ------ ------- ------- ------- ------- ----- ------ --------
+ " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \
+ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \
+ c11:P2|c12:P2|c21:P2|c22:P2 1-6"
+ " CX1-4:S+ . X1-2:P2 C3 . . \
+ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \
+ p1:P0|c11:P2|c12:P0 1-2"
+ " CX1-4:S+ . X1-2:P2 . . . \
+ X2-4 . . . . . p1:1,3-4|c11:2 \
+ p1:P0|c11:P2 2"
+ " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \
+ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X2 . . . p1:1|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ # p1 as member, will get its effective CPUs from its parent rtest
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \
+ p1:P0|c11:P2|c12:P1 1"
+ " CX1-4:S+ X5-6:P1:S+ . . . . \
+ . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
+ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
+ 1-2,4-6|1-2,5-6"
+)
+
+#
+# Write to the cpu online file
+# $1 - <c>=<v> where <c> = cpu number, <v> value to be written
+#
+write_cpu_online()
+{
+ CPU=${1%=*}
+ VAL=${1#*=}
+ CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
+ if [[ $VAL -eq 0 ]]
+ then
+ OFFLINE_CPUS="$OFFLINE_CPUS $CPU"
+ else
+ [[ -n "$OFFLINE_CPUS" ]] && {
+ OFFLINE_CPUS=$(echo $CPU $CPU $OFFLINE_CPUS | fmt -1 |\
+ sort | uniq -u)
+ }
+ fi
+ echo $VAL > $CPUFILE
+ pause 0.05
+}
+
+#
+# Set controller state
+# $1 - cgroup directory
+# $2 - state
+# $3 - showerr
+#
+# The presence of ":" in state means transition from one to the next.
+#
+set_ctrl_state()
+{
+ TMPMSG=/tmp/.msg_$$
+ CGRP=$1
+ STATE=$2
+ SHOWERR=${3}
+ CTRL=${CTRL:=$CONTROLLER}
+ HASERR=0
+ REDIRECT="2> $TMPMSG"
+ [[ -z "$STATE" || "$STATE" = '.' ]] && return 0
+ [[ $VERBOSE -gt 0 ]] && SHOWERR=1
+
+ rm -f $TMPMSG
+ for CMD in $(echo $STATE | sed -e "s/:/ /g")
+ do
+ TFILE=$CGRP/cgroup.procs
+ SFILE=$CGRP/cgroup.subtree_control
+ PFILE=$CGRP/cpuset.cpus.partition
+ CFILE=$CGRP/cpuset.cpus
+ XFILE=$CGRP/cpuset.cpus.exclusive
+ case $CMD in
+ S*) PREFIX=${CMD#?}
+ COMM="echo ${PREFIX}${CTRL} > $SFILE"
+ eval $COMM $REDIRECT
+ ;;
+ X*)
+ CPUS=${CMD#?}
+ COMM="echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ ;;
+ CX*)
+ CPUS=${CMD#??}
+ COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ ;;
+ C*) CPUS=${CMD#?}
+ COMM="echo $CPUS > $CFILE"
+ eval $COMM $REDIRECT
+ ;;
+ P*) VAL=${CMD#?}
+ case $VAL in
+ 0) VAL=member
+ ;;
+ 1) VAL=root
+ ;;
+ 2) VAL=isolated
+ ;;
+ *)
+ echo "Invalid partition state - $VAL"
+ exit 1
+ ;;
+ esac
+ COMM="echo $VAL > $PFILE"
+ eval $COMM $REDIRECT
+ ;;
+ O*) VAL=${CMD#?}
+ write_cpu_online $VAL
+ ;;
+ T*) COMM="echo 0 > $TFILE"
+ eval $COMM $REDIRECT
+ ;;
+ *) echo "Unknown command: $CMD"
+ exit 1
+ ;;
+ esac
+ RET=$?
+ [[ $RET -ne 0 ]] && {
+ [[ -n "$SHOWERR" ]] && {
+ echo "$COMM"
+ cat $TMPMSG
+ }
+ HASERR=1
+ }
+ pause 0.01
+ rm -f $TMPMSG
+ done
+ return $HASERR
+}
+
+set_ctrl_state_noerr()
+{
+ CGRP=$1
+ STATE=$2
+ [[ -d $CGRP ]] || mkdir $CGRP
+ set_ctrl_state $CGRP $STATE 1
+ [[ $? -ne 0 ]] && {
+ echo "ERROR: Failed to set $2 to cgroup $1!"
+ exit 1
+ }
+}
+
+online_cpus()
+{
+ [[ -n "OFFLINE_CPUS" ]] && {
+ for C in $OFFLINE_CPUS
+ do
+ write_cpu_online ${C}=1
+ done
+ }
+}
+
+#
+# Remove all the test cgroup directories
+#
+reset_cgroup_states()
+{
+ echo 0 > $CGROUP2/cgroup.procs
+ online_cpus
+ rmdir $RESET_LIST > /dev/null 2>&1
+}
+
+dump_states()
+{
+ for DIR in $CGROUP_LIST
+ do
+ CPUS=$DIR/cpuset.cpus
+ ECPUS=$DIR/cpuset.cpus.effective
+ XCPUS=$DIR/cpuset.cpus.exclusive
+ XECPUS=$DIR/cpuset.cpus.exclusive.effective
+ PRS=$DIR/cpuset.cpus.partition
+ PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions
+ ISCPUS=$DIR/cpuset.cpus.isolated
+ [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)"
+ [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)"
+ [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
+ [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)"
+ [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
+ [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)"
+ [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)"
+ done
+}
+
+#
+# Set the actual cgroup directory into $CGRP_DIR
+# $1 - cgroup name
+#
+set_cgroup_dir()
+{
+ CGRP_DIR=$1
+ [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2
+ [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3
+ [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11
+ [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12
+ [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21
+ [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22
+}
+
+#
+# Check effective cpus
+# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]*
+#
+check_effective_cpus()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ EXPECTED_CPUS=$2
+ ACTUAL_CPUS=
+ if [[ $CGRP = X* ]]
+ then
+ CGRP=${CGRP#X}
+ FILE=cpuset.cpus.exclusive.effective
+ else
+ FILE=cpuset.cpus.effective
+ fi
+ set_cgroup_dir $CGRP
+ [[ -e $CGRP_DIR/$FILE ]] || return 1
+ ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE)
+ [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1
+ done
+}
+
+#
+# Check cgroup states
+# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]*
+#
+check_cgroup_states()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ EXPECTED_STATE=$2
+ FILE=
+ EVAL=$(expr substr $EXPECTED_STATE 2 2)
+
+ set_cgroup_dir $CGRP
+ case $EXPECTED_STATE in
+ P*) FILE=$CGRP_DIR/cpuset.cpus.partition
+ ;;
+ *) echo "Unknown state: $EXPECTED_STATE!"
+ exit 1
+ ;;
+ esac
+ ACTUAL_STATE=$(cat $FILE)
+
+ case "$ACTUAL_STATE" in
+ member) VAL=0
+ ;;
+ root) VAL=1
+ ;;
+ isolated)
+ VAL=2
+ ;;
+ "root invalid"*)
+ VAL=-1
+ ;;
+ "isolated invalid"*)
+ VAL=-2
+ ;;
+ esac
+ [[ $EVAL != $VAL ]] && return 1
+
+ #
+ # For root partition, dump sched-domains info to console if
+ # verbose mode set for manual comparison with sched debug info.
+ #
+ [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && {
+ DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective)
+ [[ -n "$DOMS" ]] &&
+ echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE
+ }
+ done
+ return 0
+}
+
+#
+# Get isolated (including offline) CPUs by looking at
+# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file,
+# if available, and compare that with the expected value.
+#
+# Note that isolated CPUs from the sched/domains context include offline
+# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may
+# not be included in the cpuset.cpus.isolated control file which contains
+# only CPUs in isolated partitions as well as those that are isolated at
+# boot time.
+#
+# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>}
+# <isolcpus1> - expected sched/domains value
+# <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined
+#
+check_isolcpus()
+{
+ EXPECTED_ISOLCPUS=$1
+ ISCPUS=${CGROUP2}/cpuset.cpus.isolated
+ ISOLCPUS=$(cat $ISCPUS)
+ LASTISOLCPU=
+ SCHED_DOMAINS=/sys/kernel/debug/sched/domains
+ if [[ $EXPECTED_ISOLCPUS = . ]]
+ then
+ EXPECTED_ISOLCPUS=
+ EXPECTED_SDOMAIN=
+ elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]]
+ then
+ set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g")
+ EXPECTED_ISOLCPUS=$2
+ EXPECTED_SDOMAIN=$1
+ else
+ EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS
+ fi
+
+ #
+ # Appending pre-isolated CPUs
+ # Even though CPU #8 isn't used for testing, it can't be pre-isolated
+ # to make appending those CPUs easier.
+ #
+ [[ -n "$BOOT_ISOLCPUS" ]] && {
+ EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS}
+ EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS}
+ }
+
+ #
+ # Check cpuset.cpus.isolated cpumask
+ #
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && {
+ # Take a 50ms pause and try again
+ pause 0.05
+ ISOLCPUS=$(cat $ISCPUS)
+ }
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1
+ ISOLCPUS=
+ EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
+
+ #
+ # Use the sched domain in debugfs to check isolated CPUs, if available
+ #
+ [[ -d $SCHED_DOMAINS ]] || return 0
+
+ for ((CPU=0; CPU < $NR_CPUS; CPU++))
+ do
+ [[ -n "$(ls ${SCHED_DOMAINS}/cpu$CPU)" ]] && continue
+
+ if [[ -z "$LASTISOLCPU" ]]
+ then
+ ISOLCPUS=$CPU
+ LASTISOLCPU=$CPU
+ elif [[ "$LASTISOLCPU" -eq $((CPU - 1)) ]]
+ then
+ echo $ISOLCPUS | grep -q "\<$LASTISOLCPU\$"
+ if [[ $? -eq 0 ]]
+ then
+ ISOLCPUS=${ISOLCPUS}-
+ fi
+ LASTISOLCPU=$CPU
+ else
+ if [[ $ISOLCPUS = *- ]]
+ then
+ ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ fi
+ ISOLCPUS=${ISOLCPUS},$CPU
+ LASTISOLCPU=$CPU
+ fi
+ done
+ [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+
+ [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]]
+}
+
+test_fail()
+{
+ TESTNUM=$1
+ TESTTYPE=$2
+ ADDINFO=$3
+ echo "Test $TEST[$TESTNUM] failed $TESTTYPE check!"
+ [[ -n "$ADDINFO" ]] && echo "*** $ADDINFO ***"
+ eval echo \${$TEST[$I]}
+ echo
+ dump_states
+ exit 1
+}
+
+#
+# Check to see if there are unexpected isolated CPUs left beyond the boot
+# time isolated ones.
+#
+null_isolcpus_check()
+{
+ [[ $VERBOSE -gt 0 ]] || return 0
+ # Retry a few times before printing error
+ RETRY=0
+ while [[ $RETRY -lt 8 ]]
+ do
+ pause 0.02
+ check_isolcpus "."
+ [[ $? -eq 0 ]] && return 0
+ ((RETRY++))
+ done
+ echo "Unexpected isolated CPUs: $ISOLCPUS"
+ dump_states
+ exit 1
+}
+
+#
+# Check state transition test result
+# $1 - Test number
+# $2 - Expected effective CPU values
+# $3 - Expected partition states
+# $4 - Expected isolated CPUs
+#
+check_test_results()
+{
+ _NR=$1
+ _ECPUS="$2"
+ _PSTATES="$3"
+ _ISOLCPUS="$4"
+
+ [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && {
+ check_effective_cpus $_ECPUS
+ [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \
+ "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS"
+ }
+
+ [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && {
+ check_cgroup_states $_PSTATES
+ [[ $? -ne 0 ]] && test_fail $_NR states \
+ "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE"
+ }
+
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$_ISOLCPUS" ]] && {
+ check_isolcpus $_ISOLCPUS
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS}
+ test_fail $_NR "isolated CPU" \
+ "Expect $_ISOLCPUS, get $ISOLCPUS instead"
+ }
+ }
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ RETRY=0
+ while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.02
+ ((RETRY++))
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ done
+ [[ $_NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $_NEWLIST after test $_NR!"
+ exit 1
+ }
+ null_isolcpus_check
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+}
+
+#
+# Run cpuset state transition test
+# $1 - test matrix name
+#
+# This test is somewhat fragile as delays (sleep x) are added in various
+# places to make sure state changes are fully propagated before the next
+# action. These delays may need to be adjusted if running in a slower machine.
+#
+run_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1"
+ RESET_LIST="A1/A2/A3 A1/A2 A1 B1"
+ I=0
+ eval CNT="\${#$TEST[@]}"
+
+ reset_cgroup_states
+ console_msg "Running state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > $CONSOLE
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
+ }
+ eval set -- "\${$TEST[$I]}"
+ OLD_A1=$1
+ OLD_A2=$2
+ OLD_A3=$3
+ OLD_B1=$4
+ NEW_A1=$5
+ NEW_A2=$6
+ NEW_A3=$7
+ NEW_B1=$8
+ RESULT=$9
+ ECPUS=${10}
+ STATES=${11}
+ ICPUS=${12}
+
+ set_ctrl_state_noerr A1 $OLD_A1
+ set_ctrl_state_noerr A1/A2 $OLD_A2
+ set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ set_ctrl_state_noerr B1 $OLD_B1
+
+ RETVAL=0
+ set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
+ set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
+ set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
+ set_ctrl_state B1 $NEW_B1; ((RETVAL += $?))
+
+ [[ $RETVAL -ne $RESULT ]] && test_fail $I result
+
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+}
+
+#
+# Run cpuset remote partition state transition test
+# $1 - test matrix name
+#
+run_remote_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ [[ -d rtest ]] || mkdir rtest
+ cd rtest
+ echo +cpuset > cgroup.subtree_control
+ echo "1-7" > cpuset.cpus
+ echo "1-7" > cpuset.cpus.exclusive
+ CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22"
+ RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2"
+ I=0
+ eval CNT="\${#$TEST[@]}"
+
+ reset_cgroup_states
+ console_msg "Running remote partition state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > $CONSOLE
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
+ }
+ eval set -- "\${$TEST[$I]}"
+ OLD_p1=$1
+ OLD_p2=$2
+ OLD_c11=$3
+ OLD_c12=$4
+ OLD_c21=$5
+ OLD_c22=$6
+ NEW_p1=$7
+ NEW_p2=$8
+ NEW_c11=$9
+ NEW_c12=${10}
+ NEW_c21=${11}
+ NEW_c22=${12}
+ ECPUS=${13}
+ STATES=${14}
+ ICPUS=${15}
+
+ set_ctrl_state_noerr p1 $OLD_p1
+ set_ctrl_state_noerr p2 $OLD_p2
+ set_ctrl_state_noerr p1/c11 $OLD_c11
+ set_ctrl_state_noerr p1/c12 $OLD_c12
+ set_ctrl_state_noerr p2/c21 $OLD_c21
+ set_ctrl_state_noerr p2/c22 $OLD_c22
+
+ RETVAL=0
+ set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?))
+ set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?))
+ set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?))
+ set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?))
+ set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?))
+ set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?))
+
+ [[ $RETVAL -ne 0 ]] && test_fail $I result
+
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
+ ((I++))
+ done
+ cd ..
+ rmdir rtest
+ echo "All $I tests of $TEST PASSED."
+}
+
+#
+# Testing the new "isolated" partition root type
+#
+test_isolated()
+{
+ cd $CGROUP2/test
+ echo 2-3 > cpuset.cpus
+ TYPE=$(cat cpuset.cpus.partition)
+ [[ $TYPE = member ]] || echo member > cpuset.cpus.partition
+
+ console_msg "Change from member to root"
+ test_partition root
+
+ console_msg "Change from root to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to member"
+ test_partition member
+
+ console_msg "Change from member to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to root"
+ test_partition root
+
+ console_msg "Change from root to member"
+ test_partition member
+
+ #
+ # Testing partition root with no cpu
+ #
+ console_msg "Distribute all cpus to child partition"
+ echo +cpuset > cgroup.subtree_control
+ test_partition root
+
+ mkdir A1
+ cd A1
+ echo 2-3 > cpuset.cpus
+ test_partition root
+ test_effective_cpus 2-3
+ cd ..
+ test_effective_cpus ""
+
+ console_msg "Moving task to partition test"
+ test_add_proc "No space left"
+ cd A1
+ test_add_proc ""
+ cd ..
+
+ console_msg "Shrink and expand child partition"
+ cd A1
+ echo 2 > cpuset.cpus
+ cd ..
+ test_effective_cpus 3
+ cd A1
+ echo 2-3 > cpuset.cpus
+ cd ..
+ test_effective_cpus ""
+
+ # Cleaning up
+ console_msg "Cleaning up"
+ echo $$ > $CGROUP2/cgroup.procs
+ [[ -d A1 ]] && rmdir A1
+ null_isolcpus_check
+ pause 0.05
+}
+
+#
+# Wait for inotify event for the given file and read it
+# $1: cgroup file to wait for
+# $2: file to store the read result
+#
+wait_inotify()
+{
+ CGROUP_FILE=$1
+ OUTPUT_FILE=$2
+
+ $WAIT_INOTIFY $CGROUP_FILE
+ cat $CGROUP_FILE > $OUTPUT_FILE
+}
+
+#
+# Test if inotify events are properly generated when going into and out of
+# invalid partition state.
+#
+test_inotify()
+{
+ ERR=0
+ PRS=/tmp/.prs_$$
+ cd $CGROUP2/test
+ [[ -f $WAIT_INOTIFY ]] || {
+ echo "wait_inotify not found, inotify test SKIPPED."
+ return
+ }
+
+ pause 0.01
+ echo 1 > cpuset.cpus
+ echo 0 > cgroup.procs
+ echo root > cpuset.cpus.partition
+ pause 0.01
+ rm -f $PRS
+ wait_inotify $PWD/cpuset.cpus.partition $PRS &
+ pause 0.01
+ set_ctrl_state . "O1=0"
+ pause 0.01
+ check_cgroup_states ".:P-1"
+ if [[ $? -ne 0 ]]
+ then
+ echo "FAILED: Inotify test - partition not invalid"
+ ERR=1
+ elif [[ ! -f $PRS ]]
+ then
+ echo "FAILED: Inotify test - event not generated"
+ ERR=1
+ kill %1
+ elif [[ $(cat $PRS) != "root invalid"* ]]
+ then
+ echo "FAILED: Inotify test - incorrect state"
+ cat $PRS
+ ERR=1
+ fi
+ online_cpus
+ echo member > cpuset.cpus.partition
+ echo 0 > ../cgroup.procs
+ if [[ $ERR -ne 0 ]]
+ then
+ exit 1
+ else
+ echo "Inotify test PASSED"
+ fi
+ echo member > cpuset.cpus.partition
+ echo "" > cpuset.cpus
+}
+
+trap cleanup 0 2 3 6
+run_state_test TEST_MATRIX
+run_remote_state_test REMOTE_TEST_MATRIX
+test_isolated
+test_inotify
+echo "All tests PASSED."
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
new file mode 100755
index 000000000000..42a6628fb8bc
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Basc test for cpuset v1 interfaces write/read
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+write_test() {
+ dir=$1
+ interface=$2
+ value=$3
+ original=$(cat $dir/$interface)
+ echo "testing $interface $value"
+ echo $value > $dir/$interface
+ new=$(cat $dir/$interface)
+ [[ $value -ne $(cat $dir/$interface) ]] && {
+ echo "$interface write $value failed: new:$new"
+ exit 1
+ }
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Find cpuset v1 mount point
+CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk '{print $3}')
+[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!"
+
+#
+# Create a test cpuset, read write test
+#
+TDIR=test$$
+[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR
+
+ITF_MATRIX=(
+ #interface value expect root_only
+ 'cpuset.cpus 0-1 0-1 0'
+ 'cpuset.mem_exclusive 1 1 0'
+ 'cpuset.mem_exclusive 0 0 0'
+ 'cpuset.mem_hardwall 1 1 0'
+ 'cpuset.mem_hardwall 0 0 0'
+ 'cpuset.memory_migrate 1 1 0'
+ 'cpuset.memory_migrate 0 0 0'
+ 'cpuset.memory_spread_page 1 1 0'
+ 'cpuset.memory_spread_page 0 0 0'
+ 'cpuset.memory_spread_slab 1 1 0'
+ 'cpuset.memory_spread_slab 0 0 0'
+ 'cpuset.mems 0 0 0'
+ 'cpuset.sched_load_balance 1 1 0'
+ 'cpuset.sched_load_balance 0 0 0'
+ 'cpuset.sched_relax_domain_level 2 2 0'
+ 'cpuset.memory_pressure_enabled 1 1 1'
+ 'cpuset.memory_pressure_enabled 0 0 1'
+)
+
+run_test()
+{
+ cnt="${ITF_MATRIX[@]}"
+ for i in "${ITF_MATRIX[@]}" ; do
+ args=($i)
+ root_only=${args[3]}
+ [[ $root_only -eq 1 ]] && {
+ write_test "$CPUSET" "${args[0]}" "${args[1]}" "${args[2]}"
+ continue
+ }
+ write_test "$CPUSET/$TDIR" "${args[0]}" "${args[1]}" "${args[2]}"
+ done
+}
+
+run_test
+rmdir $CPUSET/$TDIR
+echo "Test PASSED"
+exit 0
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
new file mode 100755
index 000000000000..7406c24be1ac
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the special cpuset v1 hotplug case where a cpuset become empty of
+# CPUs will force migration of tasks out to an ancestor.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Find cpuset v1 mount point
+CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk -e '{print $3}')
+[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!"
+
+#
+# Create a test cpuset, put a CPU and a task there and offline that CPU
+#
+TDIR=test$$
+[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR
+echo 1 > $CPUSET/$TDIR/cpuset.cpus
+echo 0 > $CPUSET/$TDIR/cpuset.mems
+sleep 10&
+TASK=$!
+echo $TASK > $CPUSET/$TDIR/tasks
+NEWCS=$(cat /proc/$TASK/cpuset)
+[[ $NEWCS != "/$TDIR" ]] && {
+ echo "Unexpected cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+
+echo 0 > /sys/devices/system/cpu/cpu1/online
+sleep 0.5
+echo 1 > /sys/devices/system/cpu/cpu1/online
+NEWCS=$(cat /proc/$TASK/cpuset)
+rmdir $CPUSET/$TDIR
+[[ $NEWCS != "/" ]] && {
+ echo "cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+echo "Test PASSED"
+exit 0
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 23d8fa4a3e4e..97fae92c8387 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -7,13 +7,11 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
-#include <poll.h>
#include <stdlib.h>
-#include <sys/inotify.h>
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
#define DEBUG
@@ -55,61 +53,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze)
}
/*
- * Prepare for waiting on cgroup.events file.
- */
-static int cg_prepare_for_wait(const char *cgroup)
-{
- int fd, ret = -1;
-
- fd = inotify_init1(0);
- if (fd == -1) {
- debug("Error: inotify_init1() failed\n");
- return fd;
- }
-
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
- if (ret == -1) {
- debug("Error: inotify_add_watch() failed\n");
- close(fd);
- fd = -1;
- }
-
- return fd;
-}
-
-/*
- * Wait for an event. If there are no events for 10 seconds,
- * treat this an error.
- */
-static int cg_wait_for(int fd)
-{
- int ret = -1;
- struct pollfd fds = {
- .fd = fd,
- .events = POLLIN,
- };
-
- while (true) {
- ret = poll(&fds, 1, 10000);
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
- debug("Error: poll() failed\n");
- break;
- }
-
- if (ret > 0 && fds.revents & POLLIN) {
- ret = 0;
- break;
- }
- }
-
- return ret;
-}
-
-/*
* Attach a task to the given cgroup and wait for a cgroup frozen event.
* All transient events (e.g. populated) are ignored.
*/
@@ -797,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root)
/*
* cg_check_frozen(cgroup, true) will fail here,
- * because the task in in the TRACEd state.
+ * because the task is in the TRACEd state.
*/
if (cg_freeze_wait(cgroup, false))
goto cleanup;
@@ -861,6 +804,662 @@ cleanup:
return ret;
}
+/*
+ * Get the current frozen_usec for the cgroup.
+ */
+static long cg_check_freezetime(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "cgroup.stat.local",
+ "frozen_usec ");
+}
+
+/*
+ * Test that the freeze time will behave as expected for an empty cgroup.
+ */
+static int test_cgfreezer_time_empty(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_empty");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create an empty cgroup and check that its freeze time
+ * is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 2) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 2).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Check the freeze time again to ensure that it has not
+ * changed.
+ */
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * A simple test for cgroup freezer time accounting. This test follows
+ * the same flow as test_cgfreezer_time_empty, but with a single process
+ * in the cgroup.
+ */
+static int test_cgfreezer_time_simple(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create a cgroup and check that its freeze time is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Populate the cgroup with one child and check that the
+ * freeze time is still 0.
+ */
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr > prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 3) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 3).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Sleep for 1000 us. Check that the freeze time is the
+ * same as at 4).
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that freezer time accounting works as expected, even while we're
+ * populating a cgroup with processes.
+ */
+static int test_cgfreezer_time_populate(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+ int i;
+
+ cgroup = cg_name(root, "cg_time_test_populate");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 1) Populate the cgroup with 100 processes. Check that
+ * the freeze time is 0.
+ */
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Wait for the group to become fully populated. Check
+ * that the freeze time is 0.
+ */
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Freeze the cgroup and then populate it with 100 more
+ * processes. Check that the freeze time continues to grow.
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Wait for the group to become fully populated. Check
+ * that the freeze time is larger than at 3).
+ */
+ if (cg_wait_for_proc_count(cgroup, 200))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 4).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 6) Kill the processes. Check that the freeze time is the
+ * same as it was at 5).
+ */
+ if (cg_killall(cgroup))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 7) Freeze and unfreeze the cgroup. Check that the freeze
+ * time is larger than it was at 6).
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that frozen time for a cgroup continues to work as expected,
+ * even as processes are migrated. Frozen cgroup A's freeze time should
+ * continue to increase and running cgroup B's should stay 0.
+ */
+static int test_cgfreezer_time_migrate(const char *root)
+{
+ long prev_A, curr_A, curr_B;
+ char *cgroup[2] = {0};
+ int ret = KSFT_FAIL;
+ int pid;
+
+ cgroup[0] = cg_name(root, "cg_time_test_migrate_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(root, "cg_time_test_migrate_B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup[0], child_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup[0], 1))
+ goto cleanup;
+
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A) {
+ debug("Expect time (%ld) to be 0\n", curr_A);
+ goto cleanup;
+ }
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ /*
+ * Freeze cgroup A.
+ */
+ if (cg_freeze_wait(cgroup[0], true))
+ goto cleanup;
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be > 0\n", curr_A);
+ goto cleanup;
+ }
+
+ /*
+ * Migrate from A (frozen) to B (running).
+ */
+ if (cg_enter(cgroup[1], pid))
+ goto cleanup;
+
+ usleep(1000);
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr_A, prev_A);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup[0])
+ cg_destroy(cgroup[0]);
+ free(cgroup[0]);
+ if (cgroup[1])
+ cg_destroy(cgroup[1]);
+ free(cgroup[1]);
+ return ret;
+}
+
+/*
+ * The test creates a cgroup and freezes it. Then it creates a child cgroup.
+ * After that it checks that the child cgroup has a non-zero freeze time
+ * that is less than the parent's. Next, it freezes the child, unfreezes
+ * the parent, and sleeps. Finally, it checks that the child's freeze
+ * time has grown larger than the parent's.
+ */
+static int test_cgfreezer_time_parent(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_parent_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_parent_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_freeze_wait(parent, true))
+ goto cleanup;
+
+ usleep(1000);
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ /*
+ * Since the parent was frozen the entire time the child cgroup
+ * was being created, we expect the parent's freeze time to be
+ * larger than the child's.
+ *
+ * Ideally, we would be able to check both times simultaneously,
+ * but here we get the child's after we get the parent's.
+ */
+ ptime = cg_check_freezetime(parent);
+ ctime = cg_check_freezetime(child);
+ if (ptime <= ctime) {
+ debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(child, true))
+ goto cleanup;
+
+ if (cg_freeze_wait(parent, false))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ usleep(100000);
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates a parent cgroup and a child cgroup. Then, it freezes
+ * the child and checks that the child's freeze time is greater than the
+ * parent's, which should be zero.
+ */
+static int test_cgfreezer_time_child(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_child_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_child_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_freeze_wait(child, true))
+ goto cleanup;
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+ if (ptime != 0) {
+ debug("Expect ptime (%ld) to be 0\n", ptime);
+ goto cleanup;
+ }
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * |
+ * B
+ * |
+ * C
+ *
+ * Then it freezes the cgroups in the order C, B, A.
+ * Then it unfreezes the cgroups in the order A, B, C.
+ * Then it checks that C's freeze time is larger than B's and
+ * that B's is larger than A's.
+ */
+static int test_cgfreezer_time_nested(const char *root)
+{
+ char *cgroup[3] = {0};
+ int ret = KSFT_FAIL;
+ long time[3] = {0};
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_time_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ if (cg_create(cgroup[2]))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[0], true))
+ goto cleanup;
+
+ usleep(1000);
+
+ if (cg_freeze_nowait(cgroup[0], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], false))
+ goto cleanup;
+
+ time[2] = cg_check_freezetime(cgroup[2]);
+ time[1] = cg_check_freezetime(cgroup[1]);
+ time[0] = cg_check_freezetime(cgroup[0]);
+
+ if (time[2] <= time[1]) {
+ debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]);
+ goto cleanup;
+ }
+
+ if (time[1] <= time[0]) {
+ debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 2; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
#define T(x) { x, #x }
struct cgfreezer_test {
int (*fn)(const char *root);
@@ -876,15 +1475,24 @@ struct cgfreezer_test {
T(test_cgfreezer_stopped),
T(test_cgfreezer_ptraced),
T(test_cgfreezer_vfork),
+ T(test_cgfreezer_time_empty),
+ T(test_cgfreezer_time_simple),
+ T(test_cgfreezer_time_populate),
+ T(test_cgfreezer_time_migrate),
+ T(test_cgfreezer_time_parent),
+ T(test_cgfreezer_time_child),
+ T(test_cgfreezer_time_nested),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
- if (cg_find_unified_root(root, sizeof(root)))
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
@@ -895,11 +1503,10 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
new file mode 100644
index 000000000000..f451aa449be6
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+#define ADDR ((void *)(0x0UL))
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+/* mapping 8 MBs == 4 hugepages */
+#define LENGTH (8UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* borrowed from mm/hmm-tests.c */
+static long get_hugepage_size(void)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q, *path = "/proc/meminfo", *tag = "Hugepagesize:";
+ long val;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
+static int set_file(const char *path, long value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen(path, "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int set_nr_hugepages(long value)
+{
+ return set_file("/proc/sys/vm/nr_hugepages", value);
+}
+
+static unsigned int check_first(char *addr)
+{
+ return *(unsigned int *)addr;
+}
+
+static void write_data(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int hugetlb_test_program(const char *cgroup, void *arg)
+{
+ char *test_group = (char *)arg;
+ void *addr;
+ long old_current, expected_current, current;
+ int ret = EXIT_FAILURE;
+
+ old_current = cg_read_long(test_group, "memory.current");
+ set_nr_hugepages(20);
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg(
+ "setting nr_hugepages should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ return EXIT_FAILURE;
+ }
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+ if (addr == MAP_FAILED) {
+ ksft_print_msg("fail to mmap.\n");
+ return EXIT_FAILURE;
+ }
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg("mmap should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ goto out_failed_munmap;
+ }
+ old_current = current;
+
+ /* read the first page */
+ check_first(addr);
+ expected_current = old_current + MB(2);
+ current = cg_read_long(test_group, "memory.current");
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 2MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* write to the whole range */
+ write_data(addr);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current + MB(8);
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 8MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* unmap the whole range */
+ munmap(addr, LENGTH);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current;
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should go back down.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ return ret;
+ }
+
+ ret = EXIT_SUCCESS;
+ return ret;
+
+out_failed_munmap:
+ munmap(addr, LENGTH);
+ return ret;
+}
+
+static int test_hugetlb_memcg(char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "hugetlb_memcg_test");
+ if (!test_group || cg_create(test_group)) {
+ ksft_print_msg("fail to create cgroup.\n");
+ goto out;
+ }
+
+ if (cg_write(test_group, "memory.max", "100M")) {
+ ksft_print_msg("fail to set cgroup memory limit.\n");
+ goto out;
+ }
+
+ /* disable swap */
+ if (cg_write(test_group, "memory.swap.max", "0")) {
+ ksft_print_msg("fail to disable swap.\n");
+ goto out;
+ }
+
+ if (!cg_run(test_group, hugetlb_test_program, (void *)test_group))
+ ret = KSFT_PASS;
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
+
+ has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
+ if (has_memory_hugetlb_acc < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ else if (!has_memory_hugetlb_acc)
+ ksft_exit_skip("memory hugetlb accounting is disabled\n");
+
+ /* Unit is kB! */
+ if (get_hugepage_size() != 2048) {
+ ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ return ret;
+ }
+
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ switch (test_hugetlb_memcg(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("test_hugetlb_memcg\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("test_hugetlb_memcg\n");
+ break;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
new file mode 100644
index 000000000000..c8c9d306925b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "../pidfd/pidfd.h"
+#include "cgroup_util.h"
+
+/*
+ * Kill the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_kill_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = cg_prepare_for_wait(cgroup);
+ if (fd < 0)
+ return fd;
+
+ ret = cg_write(cgroup, "cgroup.kill", "1");
+ if (ret)
+ goto out;
+
+ ret = cg_wait_for(fd);
+ if (ret)
+ goto out;
+
+out:
+ close(fd);
+ return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+static int test_cgkill_simple(const char *root)
+{
+ pid_t pids[100];
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ int i;
+
+ cgroup = cg_name(root, "cg_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ for (i = 0; i < 100; i++)
+ pids[i] = cg_run_nowait(cgroup, child_fn, NULL);
+
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+
+ if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 100; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * / / \ \
+ * B E I K
+ * /\ |
+ * C D F
+ * |
+ * G
+ * |
+ * H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to kill the whole tree.
+ */
+static int test_cgkill_tree(const char *root)
+{
+ pid_t pids[5];
+ char *cgroup[10] = {0};
+ int ret = KSFT_FAIL;
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_tree_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ cgroup[3] = cg_name(cgroup[1], "D");
+ if (!cgroup[3])
+ goto cleanup;
+
+ cgroup[4] = cg_name(cgroup[0], "E");
+ if (!cgroup[4])
+ goto cleanup;
+
+ cgroup[5] = cg_name(cgroup[4], "F");
+ if (!cgroup[5])
+ goto cleanup;
+
+ cgroup[6] = cg_name(cgroup[5], "G");
+ if (!cgroup[6])
+ goto cleanup;
+
+ cgroup[7] = cg_name(cgroup[6], "H");
+ if (!cgroup[7])
+ goto cleanup;
+
+ cgroup[8] = cg_name(cgroup[0], "I");
+ if (!cgroup[8])
+ goto cleanup;
+
+ cgroup[9] = cg_name(cgroup[0], "K");
+ if (!cgroup[9])
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ if (cg_create(cgroup[i]))
+ goto cleanup;
+
+ pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL);
+ pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL);
+ pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL);
+
+ /*
+ * Wait until all child processes will enter
+ * corresponding cgroups.
+ */
+
+ if (cg_wait_for_proc_count(cgroup[2], 1) ||
+ cg_wait_for_proc_count(cgroup[7], 1) ||
+ cg_wait_for_proc_count(cgroup[9], 3))
+ goto cleanup;
+
+ /*
+ * Kill A and check that we get an empty notification.
+ */
+ if (cg_kill_wait(cgroup[0]))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 5; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ for (i = 9; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+ int ppid;
+
+ fork();
+ fork();
+
+ ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to kill it.
+ */
+static int test_cgkill_forkbomb(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ pid_t pid = -ESRCH;
+
+ cgroup = cg_name(root, "cg_forkbomb_test");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup, forkbomb_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ usleep(100000);
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup, 0))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (pid > 0)
+ wait_for_pid(pid);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cgkill_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgkill_simple),
+ T(test_cgkill_tree),
+ T(test_cgkill_forkbomb),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 0941aa16157e..ca38525484e3 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -14,17 +14,17 @@
#include <sys/sysinfo.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
/*
- * Memory cgroup charging and vmstat data aggregation is performed using
- * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
- * discrepancy between charge and vmstat entries is number of cpus multiplied
- * by 32 pages multiplied by 2.
+ * Memory cgroup charging is performed using percpu batches 64 pages
+ * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
+ * the maximum discrepancy between charge and vmstat entries is number
+ * of cpus multiplied by 64 pages.
*/
-#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
+#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
static int alloc_dcache(const char *cgroup, void *arg)
@@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
goto cleanup;
cg_write(cg, "memory.high", "1M");
+
+ /* wait for RCU freeing */
+ sleep(1);
+
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
- if (current <= 0)
+ if (current < 0)
goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2)
@@ -158,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent,
* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
* threads. Then it checks the sanity of numbers on the parent level:
* the total size of the cgroups should be roughly equal to
- * anon + file + slab + kernel_stack.
+ * anon + file + kernel + sock.
*/
static int test_kmem_memcg_deletion(const char *root)
{
- long current, slab, anon, file, kernel_stack, sum;
+ long current, anon, file, kernel, sock, sum;
int ret = KSFT_FAIL;
char *parent;
@@ -180,24 +184,23 @@ static int test_kmem_memcg_deletion(const char *root)
goto cleanup;
current = cg_read_long(parent, "memory.current");
- slab = cg_read_key_long(parent, "memory.stat", "slab ");
anon = cg_read_key_long(parent, "memory.stat", "anon ");
file = cg_read_key_long(parent, "memory.stat", "file ");
- kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
- if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
- kernel_stack < 0)
+ kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
+ sock = cg_read_key_long(parent, "memory.stat", "sock ");
+ if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
goto cleanup;
- sum = slab + anon + file + kernel_stack;
- if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ sum = anon + file + kernel + sock;
+ if (labs(sum - current) < MAX_VMSTAT_ERROR) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
- printf("slab + anon + file + kernel_stack = %ld\n", sum);
- printf("slab = %ld\n", slab);
+ printf("anon + file + kernel + sock = %ld\n", sum);
printf("anon = %ld\n", anon);
printf("file = %ld\n", file);
- printf("kernel_stack = %ld\n", kernel_stack);
+ printf("kernel = %ld\n", kernel);
+ printf("sock = %ld\n", sock);
}
cleanup:
@@ -305,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root)
char *parent;
long dead;
int i;
+ int max_time = 20;
parent = cg_name(root, "kmem_dead_cgroups_test");
if (!parent)
@@ -319,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root)
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
goto cleanup;
- for (i = 0; i < 5; i++) {
+ for (i = 0; i < max_time; i++) {
dead = cg_read_key_long(parent, "cgroup.stat",
"nr_dying_descendants ");
if (dead == 0) {
@@ -331,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root)
* let's wait a bit and repeat.
*/
sleep(1);
+ if (i > 5)
+ printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
}
cleanup:
@@ -377,7 +383,7 @@ static int test_percpu_basic(const char *root)
current = cg_read_long(parent, "memory.current");
percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
- if (current > 0 && percpu > 0 && abs(current - percpu) <
+ if (current > 0 && percpu > 0 && labs(current - percpu) <
MAX_VMSTAT_ERROR)
ret = KSFT_PASS;
else
@@ -415,9 +421,11 @@ struct kmem_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
- if (cg_find_unified_root(root, sizeof(root)))
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
@@ -440,11 +448,10 @@ int main(int argc, char **argv)
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c19a97dd02d4..4e1647568c5b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -16,10 +16,92 @@
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
+#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
+static bool has_localevents;
+static bool has_recursiveprot;
+
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -94,6 +176,11 @@ static int alloc_anon_50M_check(const char *cgroup, void *arg)
int ret = -1;
buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
+ return -1;
+ }
+
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
@@ -152,13 +239,16 @@ cleanup:
/*
* This test create a memory cgroup, allocates
* some anonymous memory and some pagecache
- * and check memory.current and some memory.stat values.
+ * and checks memory.current, memory.peak, and some memory.stat values.
*/
-static int test_memcg_current(const char *root)
+static int test_memcg_current_peak(const char *root)
{
int ret = KSFT_FAIL;
- long current;
+ long current, peak, peak_reset;
char *memcg;
+ bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
+ int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
+ struct stat ss;
memcg = cg_name(root, "memcg_test");
if (!memcg)
@@ -171,28 +261,130 @@ static int test_memcg_current(const char *root)
if (current != 0)
goto cleanup;
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak != 0)
+ goto cleanup;
+
if (cg_run(memcg, alloc_anon_50M_check, NULL))
goto cleanup;
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak < MB(50))
+ goto cleanup;
+
+ /*
+ * We'll open a few FDs for the same memory.peak file to exercise the free-path
+ * We need at least three to be closed in a different order than writes occurred to test
+ * the linked-list handling.
+ */
+ peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (peak_fd == -1) {
+ if (errno == ENOENT)
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ /*
+ * Before we try to use memory.peak's fd, try to figure out whether
+ * this kernel supports writing to that file in the first place. (by
+ * checking the writable bit on the file's st_mode)
+ */
+ if (fstat(peak_fd, &ss))
+ goto cleanup;
+
+ if ((ss.st_mode & S_IWUSR) == 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (peak_fd2 == -1)
+ goto cleanup;
+
+ peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (peak_fd3 == -1)
+ goto cleanup;
+
+ /* any non-empty string resets, but make it clear */
+ static const char reset_string[] = "reset\n";
+
+ peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ /* Make sure a completely independent read isn't affected by our FD-local reset above*/
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak < MB(50))
+ goto cleanup;
+
+ fd2_closed = true;
+ if (close(peak_fd2))
+ goto cleanup;
+
+ peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (peak_fd4 == -1)
+ goto cleanup;
+
+ peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ peak = cg_read_long_fd(peak_fd);
+ if (peak > MB(30) || peak < 0)
+ goto cleanup;
+
if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
goto cleanup;
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak < MB(50))
+ goto cleanup;
+
+ /* Make sure everything is back to normal */
+ peak = cg_read_long_fd(peak_fd);
+ if (peak < MB(50))
+ goto cleanup;
+
+ peak = cg_read_long_fd(peak_fd4);
+ if (peak < MB(50))
+ goto cleanup;
+
+ fd3_closed = true;
+ if (close(peak_fd3))
+ goto cleanup;
+
+ fd4_closed = true;
+ if (close(peak_fd4))
+ goto cleanup;
+
ret = KSFT_PASS;
cleanup:
+ close(peak_fd);
+ if (!fd2_closed)
+ close(peak_fd2);
+ if (!fd3_closed)
+ close(peak_fd3);
+ if (!fd4_closed)
+ close(peak_fd4);
cg_destroy(memcg);
free(memcg);
return ret;
}
-static int alloc_pagecache_50M(const char *cgroup, void *arg)
-{
- int fd = (long)arg;
-
- return alloc_pagecache(fd, MB(50));
-}
-
static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
{
int fd = (long)arg;
@@ -210,13 +402,22 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
- if (alloc_anon(cgroup, arg))
+ buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
return -1;
+ }
+
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
while (getppid() == ppid)
sleep(1);
+ free(buf);
return 0;
}
@@ -237,36 +438,52 @@ static int cg_test_proc_killed(const char *cgroup)
return -1;
}
+static bool reclaim_until(const char *memcg, long goal);
+
/*
* First, this test creates the following hierarchy:
- * A memory.min = 50M, memory.max = 200M
- * A/B memory.min = 50M, memory.current = 50M
+ * A memory.min = 0, memory.max = 200M
+ * A/B memory.min = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
- * A/B/E memory.min = 500M, memory.current = 0
- * A/B/F memory.min = 0, memory.current = 50M
+ * A/B/E memory.min = 0, memory.current = 50M
+ * A/B/F memory.min = 500M, memory.current = 0
*
- * Usages are pagecache, but the test keeps a running
+ * (or memory.low if we test soft protection)
+ *
+ * Usages are pagecache and the test keeps a running
* process in every leaf cgroup.
* Then it creates A/G and creates a significant
- * memory pressure in it.
+ * memory pressure in A.
*
+ * Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 33M
- * A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
+ * A/B/C memory.current ~= 29M [memory.events:low > 0]
+ * A/B/D memory.current ~= 21M [memory.events:low > 0]
+ * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
+ * undefined otherwise]
+ * A/B/F memory.current = 0 [memory.events:low == 0]
+ * (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
- * unprotected memory in A available, and checks
- * checks that memory.min protects pagecache even
- * in this case.
+ * unprotected memory in A available, and checks that:
+ * a) memory.min protects pagecache even in this case,
+ * b) memory.low allows reclaiming page cache with low events.
+ *
+ * Then we try to reclaim from A/B/C using memory.reclaim until its
+ * usage reaches 10M.
+ * This makes sure that:
+ * (a) We ignore the protection of the reclaim target memcg.
+ * (b) The previously calculated emin value (~29M) should be dismissed.
*/
-static int test_memcg_min(const char *root)
+static int test_memcg_protection(const char *root, bool min)
{
- int ret = KSFT_FAIL;
+ int ret = KSFT_FAIL, rc;
char *parent[3] = {NULL};
char *children[4] = {NULL};
+ const char *attribute = min ? "memory.min" : "memory.low";
long c[4];
+ long current;
int i, attempts;
int fd;
@@ -289,8 +506,10 @@ static int test_memcg_min(const char *root)
if (cg_create(parent[0]))
goto cleanup;
- if (cg_read_long(parent[0], "memory.min")) {
- ret = KSFT_SKIP;
+ if (cg_read_long(parent[0], attribute)) {
+ /* No memory.min on older kernels is fine */
+ if (min)
+ ret = KSFT_SKIP;
goto cleanup;
}
@@ -320,24 +539,22 @@ static int test_memcg_min(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
(void *)(long)fd);
}
- if (cg_write(parent[0], "memory.min", "50M"))
+ if (cg_write(parent[1], attribute, "50M"))
goto cleanup;
- if (cg_write(parent[1], "memory.min", "50M"))
+ if (cg_write(children[0], attribute, "75M"))
goto cleanup;
- if (cg_write(children[0], "memory.min", "75M"))
+ if (cg_write(children[1], attribute, "25M"))
goto cleanup;
- if (cg_write(children[1], "memory.min", "25M"))
+ if (cg_write(children[2], attribute, "0"))
goto cleanup;
- if (cg_write(children[2], "memory.min", "500M"))
- goto cleanup;
- if (cg_write(children[3], "memory.min", "0"))
+ if (cg_write(children[3], attribute, "500M"))
goto cleanup;
attempts = 0;
@@ -357,178 +574,59 @@ static int test_memcg_min(const char *root)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(33), 10))
- goto cleanup;
-
- if (!values_close(c[1], MB(17), 10))
+ if (!values_close(c[0], MB(29), 15))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (!values_close(c[1], MB(21), 20))
goto cleanup;
- if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+ if (c[3] != 0)
goto cleanup;
- if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
+ if (min && !rc)
goto cleanup;
-
- ret = KSFT_PASS;
-
-cleanup:
- for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
- if (!children[i])
- continue;
-
- cg_destroy(children[i]);
- free(children[i]);
- }
-
- for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
- if (!parent[i])
- continue;
-
- cg_destroy(parent[i]);
- free(parent[i]);
- }
- close(fd);
- return ret;
-}
-
-/*
- * First, this test creates the following hierarchy:
- * A memory.low = 50M, memory.max = 200M
- * A/B memory.low = 50M, memory.current = 50M
- * A/B/C memory.low = 75M, memory.current = 50M
- * A/B/D memory.low = 25M, memory.current = 50M
- * A/B/E memory.low = 500M, memory.current = 0
- * A/B/F memory.low = 0, memory.current = 50M
- *
- * Usages are pagecache.
- * Then it creates A/G an creates a significant
- * memory pressure in it.
- *
- * Then it checks actual memory usages and expects that:
- * A/B memory.current ~= 50M
- * A/B/ memory.current ~= 33M
- * A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
- *
- * After that it tries to allocate more than there is
- * unprotected memory in A available,
- * and checks low and oom events in memory.events.
- */
-static int test_memcg_low(const char *root)
-{
- int ret = KSFT_FAIL;
- char *parent[3] = {NULL};
- char *children[4] = {NULL};
- long low, oom;
- long c[4];
- int i;
- int fd;
-
- fd = get_temp_fd();
- if (fd < 0)
- goto cleanup;
-
- parent[0] = cg_name(root, "memcg_test_0");
- if (!parent[0])
- goto cleanup;
-
- parent[1] = cg_name(parent[0], "memcg_test_1");
- if (!parent[1])
- goto cleanup;
-
- parent[2] = cg_name(parent[0], "memcg_test_2");
- if (!parent[2])
- goto cleanup;
-
- if (cg_create(parent[0]))
- goto cleanup;
-
- if (cg_read_long(parent[0], "memory.low"))
- goto cleanup;
-
- if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
- goto cleanup;
-
- if (cg_write(parent[0], "memory.max", "200M"))
- goto cleanup;
-
- if (cg_write(parent[0], "memory.swap.max", "0"))
- goto cleanup;
-
- if (cg_create(parent[1]))
- goto cleanup;
-
- if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
- goto cleanup;
-
- if (cg_create(parent[2]))
+ else if (!min && rc) {
+ fprintf(stderr,
+ "memory.low prevents from allocating anon memory\n");
goto cleanup;
-
- for (i = 0; i < ARRAY_SIZE(children); i++) {
- children[i] = cg_name_indexed(parent[1], "child_memcg", i);
- if (!children[i])
- goto cleanup;
-
- if (cg_create(children[i]))
- goto cleanup;
-
- if (i == 2)
- continue;
-
- if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
- goto cleanup;
}
- if (cg_write(parent[0], "memory.low", "50M"))
- goto cleanup;
- if (cg_write(parent[1], "memory.low", "50M"))
- goto cleanup;
- if (cg_write(children[0], "memory.low", "75M"))
- goto cleanup;
- if (cg_write(children[1], "memory.low", "25M"))
- goto cleanup;
- if (cg_write(children[2], "memory.low", "500M"))
- goto cleanup;
- if (cg_write(children[3], "memory.low", "0"))
- goto cleanup;
-
- if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
- goto cleanup;
-
- if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
- goto cleanup;
-
- for (i = 0; i < ARRAY_SIZE(children); i++)
- c[i] = cg_read_long(children[i], "memory.current");
-
- if (!values_close(c[0], MB(33), 10))
+ current = min ? MB(50) : MB(30);
+ if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
goto cleanup;
- if (!values_close(c[1], MB(17), 10))
+ if (!reclaim_until(children[0], MB(10)))
goto cleanup;
- if (!values_close(c[2], 0, 1))
- goto cleanup;
-
- if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
- fprintf(stderr,
- "memory.low prevents from allocating anon memory\n");
+ if (min) {
+ ret = KSFT_PASS;
goto cleanup;
}
+ /*
+ * Child 2 has memory.low=0, but some low protection may still be
+ * distributed down from its parent with memory.low=50M if cgroup2
+ * memory_recursiveprot mount option is enabled. Ignore the low
+ * event count in this case.
+ */
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int ignore_low_events_index = has_recursiveprot ? 2 : -1;
+ int no_low_events_index = 1;
+ long low, oom;
+
oom = cg_read_key_long(children[i], "memory.events", "oom ");
low = cg_read_key_long(children[i], "memory.events", "low ");
if (oom)
goto cleanup;
- if (i < 2 && low <= 0)
+ if (i == ignore_low_events_index)
+ continue;
+ if (i <= no_low_events_index && low <= 0)
goto cleanup;
- if (i >= 2 && low)
+ if (i > no_low_events_index && low)
goto cleanup;
+
}
ret = KSFT_PASS;
@@ -553,13 +651,28 @@ cleanup:
return ret;
}
+static int test_memcg_min(const char *root)
+{
+ return test_memcg_protection(root, true);
+}
+
+static int test_memcg_low(const char *root)
+{
+ return test_memcg_protection(root, false);
+}
+
static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
- long current;
+ long current, high, max;
int fd;
+ high = cg_read_long(cgroup, "memory.high");
+ max = cg_read_long(cgroup, "memory.max");
+ if (high != MB(30) && max != MB(30))
+ return -1;
+
fd = get_temp_fd();
if (fd < 0)
return -1;
@@ -568,7 +681,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
- if (current <= MB(29) || current > MB(30))
+ if (!values_close(current, MB(30), 5))
goto cleanup;
ret = 0;
@@ -606,7 +719,7 @@ static int test_memcg_high(const char *root)
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
- if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ if (cg_run(memcg, alloc_anon, (void *)MB(31)))
goto cleanup;
if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
@@ -628,6 +741,82 @@ cleanup:
return ret;
}
+static int alloc_anon_mlock(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ void *buf;
+
+ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ 0, 0);
+ if (buf == MAP_FAILED)
+ return -1;
+
+ mlock(buf, size);
+ munmap(buf, size);
+ return 0;
+}
+
+/*
+ * This test checks that memory.high is able to throttle big single shot
+ * allocation i.e. large allocation within one kernel entry.
+ */
+static int test_memcg_high_sync(const char *root)
+{
+ int ret = KSFT_FAIL, pid, fd = -1;
+ char *memcg;
+ long pre_high, pre_max;
+ long post_high, post_max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ pre_high = cg_read_key_long(memcg, "memory.events", "high ");
+ pre_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (pre_high < 0 || pre_max < 0)
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "140M"))
+ goto cleanup;
+
+ fd = memcg_prepare_for_wait(memcg);
+ if (fd < 0)
+ goto cleanup;
+
+ pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
+ if (pid < 0)
+ goto cleanup;
+
+ cg_wait_for(fd);
+
+ post_high = cg_read_key_long(memcg, "memory.events", "high ");
+ post_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (post_high < 0 || post_max < 0)
+ goto cleanup;
+
+ if (pre_high == post_high || pre_max != post_max)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
/*
* This test checks that memory.max limits the amount of
* memory which can be consumed by either anonymous memory
@@ -679,6 +868,121 @@ cleanup:
return ret;
}
+/*
+ * Reclaim from @memcg until usage reaches @goal by writing to
+ * memory.reclaim.
+ *
+ * This function will return false if the usage is already below the
+ * goal.
+ *
+ * This function assumes that writing to memory.reclaim is the only
+ * source of change in memory.current (no concurrent allocations or
+ * reclaim).
+ *
+ * This function makes sure memory.reclaim is sane. It will return
+ * false if memory.reclaim's error codes do not make sense, even if
+ * the usage goal was satisfied.
+ */
+static bool reclaim_until(const char *memcg, long goal)
+{
+ char buf[64];
+ int retries, err;
+ long current, to_reclaim;
+ bool reclaimed = false;
+
+ for (retries = 5; retries > 0; retries--) {
+ current = cg_read_long(memcg, "memory.current");
+
+ if (current < goal || values_close(current, goal, 3))
+ break;
+ /* Did memory.reclaim return 0 incorrectly? */
+ else if (reclaimed)
+ return false;
+
+ to_reclaim = current - goal;
+ snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+ err = cg_write(memcg, "memory.reclaim", buf);
+ if (!err)
+ reclaimed = true;
+ else if (err != -EAGAIN)
+ return false;
+ }
+ return reclaimed;
+}
+
+/*
+ * This test checks that memory.reclaim reclaims the given
+ * amount of memory (from both anon and file, if possible).
+ */
+static int test_memcg_reclaim(const char *root)
+{
+ int ret = KSFT_FAIL;
+ int fd = -1;
+ int retries;
+ char *memcg;
+ long current, expected_usage;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
+
+ /*
+ * If swap is enabled, try to reclaim from both anon and file, else try
+ * to reclaim from file only.
+ */
+ if (is_swap_enabled()) {
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
+ expected_usage = MB(100);
+ } else
+ expected_usage = MB(50);
+
+ /*
+ * Wait until current usage reaches the expected usage (or we run out of
+ * retries).
+ */
+ retries = 5;
+ while (!values_close(cg_read_long(memcg, "memory.current"),
+ expected_usage, 10)) {
+ if (retries--) {
+ sleep(1);
+ continue;
+ } else {
+ fprintf(stderr,
+ "failed to allocate %ld for memcg reclaim test\n",
+ expected_usage);
+ goto cleanup;
+ }
+ }
+
+ /*
+ * Reclaim until current reaches 30M, this makes sure we hit both anon
+ * and file if swap is enabled.
+ */
+ if (!reclaim_until(memcg, MB(30)))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+ close(fd);
+
+ return ret;
+}
+
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
@@ -688,6 +992,11 @@ static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
int ret = -1;
buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
+ return -1;
+ }
+
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
@@ -708,13 +1017,19 @@ cleanup:
/*
* This test checks that memory.swap.max limits the amount of
- * anonymous memory which can be swapped out.
+ * anonymous memory which can be swapped out. Additionally, it verifies that
+ * memory.swap.peak reflects the high watermark and can be reset.
*/
-static int test_memcg_swap_max(const char *root)
+static int test_memcg_swap_max_peak(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
- long max;
+ long max, peak;
+ struct stat ss;
+ int swap_peak_fd = -1, mem_peak_fd = -1;
+
+ /* any non-empty string resets */
+ static const char reset_string[] = "foobarbaz";
if (!is_swap_enabled())
return KSFT_SKIP;
@@ -731,6 +1046,61 @@ static int test_memcg_swap_max(const char *root)
goto cleanup;
}
+ swap_peak_fd = cg_open(memcg, "memory.swap.peak",
+ O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (swap_peak_fd == -1) {
+ if (errno == ENOENT)
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ /*
+ * Before we try to use memory.swap.peak's fd, try to figure out
+ * whether this kernel supports writing to that file in the first
+ * place. (by checking the writable bit on the file's st_mode)
+ */
+ if (fstat(swap_peak_fd, &ss))
+ goto cleanup;
+
+ if ((ss.st_mode & S_IWUSR) == 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
+
+ if (mem_peak_fd == -1)
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.swap.peak"))
+ goto cleanup;
+
+ if (cg_read_long_fd(swap_peak_fd))
+ goto cleanup;
+
+ /* switch the swap and mem fds into local-peak tracking mode*/
+ int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
+
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ if (cg_read_long_fd(swap_peak_fd))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.peak"))
+ goto cleanup;
+
+ if (cg_read_long_fd(mem_peak_fd))
+ goto cleanup;
+
+ peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ if (cg_read_long_fd(mem_peak_fd))
+ goto cleanup;
+
if (cg_read_strcmp(memcg, "memory.max", "max\n"))
goto cleanup;
@@ -753,6 +1123,61 @@ static int test_memcg_swap_max(const char *root)
if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
goto cleanup;
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long(memcg, "memory.swap.peak");
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long_fd(mem_peak_fd);
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long_fd(swap_peak_fd);
+ if (peak < MB(29))
+ goto cleanup;
+
+ /*
+ * open, reset and close the peak swap on another FD to make sure
+ * multiple extant fds don't corrupt the linked-list
+ */
+ peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
+ if (peak_reset)
+ goto cleanup;
+
+ peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
+ if (peak_reset)
+ goto cleanup;
+
+ /* actually reset on the fds */
+ peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
+ if (peak_reset != sizeof(reset_string))
+ goto cleanup;
+
+ peak = cg_read_long_fd(swap_peak_fd);
+ if (peak > MB(10))
+ goto cleanup;
+
+ /*
+ * The cgroup is now empty, but there may be a page or two associated
+ * with the open FD accounted to it.
+ */
+ peak = cg_read_long_fd(mem_peak_fd);
+ if (peak > MB(1))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.peak") < MB(29))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
+ goto cleanup;
+
if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
goto cleanup;
@@ -760,9 +1185,29 @@ static int test_memcg_swap_max(const char *root)
if (max <= 0)
goto cleanup;
+ peak = cg_read_long(memcg, "memory.peak");
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long(memcg, "memory.swap.peak");
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long_fd(mem_peak_fd);
+ if (peak < MB(29))
+ goto cleanup;
+
+ peak = cg_read_long_fd(swap_peak_fd);
+ if (peak < MB(19))
+ goto cleanup;
+
ret = KSFT_PASS;
cleanup:
+ if (mem_peak_fd != -1 && close(mem_peak_fd))
+ ret = KSFT_FAIL;
+ if (swap_peak_fd != -1 && close(swap_peak_fd))
+ ret = KSFT_FAIL;
cg_destroy(memcg);
free(memcg);
@@ -882,7 +1327,9 @@ static int tcp_client(const char *cgroup, unsigned short port)
char servport[6];
int retries = 0x10; /* nice round number */
int sk, ret;
+ long allocated;
+ allocated = cg_read_long(cgroup, "memory.current");
snprintf(servport, sizeof(servport), "%hd", port);
ret = getaddrinfo(server, servport, NULL, &ai);
if (ret)
@@ -910,10 +1357,8 @@ static int tcp_client(const char *cgroup, unsigned short port)
if (current < 0 || sock < 0)
goto close_sk;
- if (current < sock)
- goto close_sk;
-
- if (values_close(current, sock, 10)) {
+ /* exclude the memory not related to socket connection */
+ if (values_close(current - allocated, sock, 10)) {
ret = KSFT_PASS;
break;
}
@@ -1002,12 +1447,14 @@ cleanup:
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
- * processes in the leaf (but not the parent) were killed.
+ * processes in the leaf were killed. It also checks that oom_events
+ * were propagated to the parent level.
*/
static int test_memcg_oom_group_leaf_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
+ long parent_oom_events;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
@@ -1045,7 +1492,16 @@ static int test_memcg_oom_group_leaf_events(const char *root)
if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
goto cleanup;
- if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ parent_oom_events = cg_read_key_long(
+ parent, "memory.events", "oom_kill ");
+ /*
+ * If memory_localevents is not enabled (the default), the parent should
+ * count OOM events in its children groups. Otherwise, it should not
+ * have observed any events.
+ */
+ if (has_localevents && parent_oom_events != 0)
+ goto cleanup;
+ else if (!has_localevents && parent_oom_events <= 0)
goto cleanup;
ret = KSFT_PASS;
@@ -1169,20 +1625,21 @@ cleanup:
return ret;
}
-
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_memcg_subtree_control),
- T(test_memcg_current),
+ T(test_memcg_current_peak),
T(test_memcg_min),
T(test_memcg_low),
T(test_memcg_high),
+ T(test_memcg_high_sync),
T(test_memcg_max),
+ T(test_memcg_reclaim),
T(test_memcg_oom_events),
- T(test_memcg_swap_max),
+ T(test_memcg_swap_max_peak),
T(test_memcg_sock),
T(test_memcg_oom_group_leaf_events),
T(test_memcg_oom_group_parent_events),
@@ -1193,9 +1650,11 @@ struct memcg_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i, proc_status;
- if (cg_find_unified_root(root, sizeof(root)))
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
@@ -1209,6 +1668,16 @@ int main(int argc, char **argv)
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+ proc_status = proc_mount_contains("memory_recursiveprot");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_recursiveprot = proc_status;
+
+ proc_status = proc_mount_contains("memory_localevents");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_localevents = proc_status;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
@@ -1218,11 +1687,10 @@ int main(int argc, char **argv)
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c
new file mode 100644
index 000000000000..9a387c815d2c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_pids.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+static int run_success(const char *cgroup, void *arg)
+{
+ return 0;
+}
+
+static int run_pause(const char *cgroup, void *arg)
+{
+ return pause();
+}
+
+/*
+ * This test checks that pids.max prevents forking new children above the
+ * specified limit in the cgroup.
+ */
+static int test_pids_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_pids;
+ int pid;
+
+ cg_pids = cg_name(root, "pids_test");
+ if (!cg_pids)
+ goto cleanup;
+
+ if (cg_create(cg_pids))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_pids, "pids.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(cg_pids, "pids.max", "2"))
+ goto cleanup;
+
+ if (cg_enter_current(cg_pids))
+ goto cleanup;
+
+ pid = cg_run_nowait(cg_pids, run_pause, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_run_nowait(cg_pids, run_success, NULL) != -1 || errno != EAGAIN)
+ goto cleanup;
+
+ if (kill(pid, SIGINT))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ cg_destroy(cg_pids);
+ free(cg_pids);
+
+ return ret;
+}
+
+/*
+ * This test checks that pids.events are counted in cgroup associated with pids.max
+ */
+static int test_pids_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_parent = NULL, *cg_child = NULL;
+ int pid;
+
+ if (cgroup_feature("pids_localevents") <= 0)
+ return KSFT_SKIP;
+
+ cg_parent = cg_name(root, "pids_parent");
+ cg_child = cg_name(cg_parent, "pids_child");
+ if (!cg_parent || !cg_child)
+ goto cleanup;
+
+ if (cg_create(cg_parent))
+ goto cleanup;
+ if (cg_write(cg_parent, "cgroup.subtree_control", "+pids"))
+ goto cleanup;
+ if (cg_create(cg_child))
+ goto cleanup;
+
+ if (cg_write(cg_parent, "pids.max", "2"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_child, "pids.max", "max\n"))
+ goto cleanup;
+
+ if (cg_enter_current(cg_child))
+ goto cleanup;
+
+ pid = cg_run_nowait(cg_child, run_pause, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_run_nowait(cg_child, run_success, NULL) != -1 || errno != EAGAIN)
+ goto cleanup;
+
+ if (kill(pid, SIGINT))
+ goto cleanup;
+
+ if (cg_read_key_long(cg_child, "pids.events", "max ") != 0)
+ goto cleanup;
+ if (cg_read_key_long(cg_parent, "pids.events", "max ") != 1)
+ goto cleanup;
+
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_child)
+ cg_destroy(cg_child);
+ if (cg_parent)
+ cg_destroy(cg_parent);
+ free(cg_child);
+ free(cg_parent);
+
+ return ret;
+}
+
+
+
+#define T(x) { x, #x }
+struct pids_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_pids_max),
+ T(test_pids_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that pids controller is available:
+ * pids is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "pids"))
+ ksft_exit_skip("pids controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "pids"))
+ if (cg_write(root, "cgroup.subtree_control", "+pids"))
+ ksft_exit_skip("Failed to set pids controller\n");
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
index 15d9d5896394..3c9c4554d5f6 100755
--- a/tools/testing/selftests/cgroup/test_stress.sh
+++ b/tools/testing/selftests/cgroup/test_stress.sh
@@ -1,4 +1,4 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-./with_stress.sh -s subsys -s fork ./test_core
+./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
new file mode 100644
index 000000000000..64ebc3f3f203
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/sysinfo.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+static int read_int(const char *path, size_t *value)
+{
+ FILE *file;
+ int ret = 0;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+ if (fscanf(file, "%ld", value) != 1)
+ ret = -1;
+ fclose(file);
+ return ret;
+}
+
+static int set_min_free_kb(size_t value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen("/proc/sys/vm/min_free_kbytes", "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int read_min_free_kb(size_t *value)
+{
+ return read_int("/proc/sys/vm/min_free_kbytes", value);
+}
+
+static int get_zswap_stored_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/stored_pages", value);
+}
+
+static long get_cg_wb_count(const char *cg)
+{
+ return cg_read_key_long(cg, "memory.stat", "zswpwb");
+}
+
+static long get_zswpout(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
+}
+
+static int allocate_and_read_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+ int ret = 0;
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+
+ /* Go through the allocated memory to (z)swap in and out pages */
+ for (int i = 0; i < size; i += 4095) {
+ if (mem[i] != 'a')
+ ret = -1;
+ }
+
+ free(mem);
+ return ret;
+}
+
+static int allocate_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+ free(mem);
+ return 0;
+}
+
+static char *setup_test_group_1M(const char *root, const char *name)
+{
+ char *group_name = cg_name(root, name);
+
+ if (!group_name)
+ return NULL;
+ if (cg_create(group_name))
+ goto fail;
+ if (cg_write(group_name, "memory.max", "1M")) {
+ cg_destroy(group_name);
+ goto fail;
+ }
+ return group_name;
+fail:
+ free(group_name);
+ return NULL;
+}
+
+/*
+ * Sanity test to check that pages are written into zswap.
+ */
+static int test_zswap_usage(const char *root)
+{
+ long zswpout_before, zswpout_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "no_shrink_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "1M"))
+ goto out;
+
+ zswpout_before = get_zswpout(test_group);
+ if (zswpout_before < 0) {
+ ksft_print_msg("Failed to get zswpout\n");
+ goto out;
+ }
+
+ /* Allocate more than memory.max to push memory into zswap */
+ if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
+ goto out;
+
+ /* Verify that pages come into zswap */
+ zswpout_after = get_zswpout(test_group);
+ if (zswpout_after <= zswpout_before) {
+ ksft_print_msg("zswpout does not increase after test program\n");
+ goto out;
+ }
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/*
+ * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
+ * the cgroup.
+ */
+static int test_swapin_nozswap(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+ long swap_peak, zswpout;
+
+ test_group = cg_name(root, "no_zswap_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "8M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "0"))
+ goto out;
+
+ /* Allocate and read more than memory.max to trigger swapin */
+ if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+ goto out;
+
+ /* Verify that pages are swapped out, but no zswap happened */
+ swap_peak = cg_read_long(test_group, "memory.swap.peak");
+ if (swap_peak < 0) {
+ ksft_print_msg("failed to get cgroup's swap_peak\n");
+ goto out;
+ }
+
+ if (swap_peak < MB(24)) {
+ ksft_print_msg("at least 24MB of memory should be swapped out\n");
+ goto out;
+ }
+
+ zswpout = get_zswpout(test_group);
+ if (zswpout < 0) {
+ ksft_print_msg("failed to get zswpout\n");
+ goto out;
+ }
+
+ if (zswpout > 0) {
+ ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/* Simple test to verify the (z)swapin code paths */
+static int test_zswapin(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+ long zswpin;
+
+ test_group = cg_name(root, "zswapin_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "8M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "max"))
+ goto out;
+
+ /* Allocate and read more than memory.max to trigger (z)swap in */
+ if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+ goto out;
+
+ zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
+ if (zswpin < 0) {
+ ksft_print_msg("failed to get zswpin\n");
+ goto out;
+ }
+
+ if (zswpin < MB(24) / PAGE_SIZE) {
+ ksft_print_msg("at least 24MB should be brought back from zswap\n");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/*
+ * Attempt writeback with the following steps:
+ * 1. Allocate memory.
+ * 2. Reclaim memory equal to the amount that was allocated in step 1.
+ This will move it into zswap.
+ * 3. Save current zswap usage.
+ * 4. Move the memory allocated in step 1 back in from zswap.
+ * 5. Set zswap.max to half the amount that was recorded in step 3.
+ * 6. Attempt to reclaim memory equal to the amount that was allocated,
+ this will either trigger writeback if it's enabled, or reclamation
+ will fail if writeback is disabled as there isn't enough zswap space.
+ */
+static int attempt_writeback(const char *cgroup, void *arg)
+{
+ long pagesize = sysconf(_SC_PAGESIZE);
+ size_t memsize = MB(4);
+ char buf[pagesize];
+ long zswap_usage;
+ bool wb_enabled = *(bool *) arg;
+ int ret = -1;
+ char *mem;
+
+ mem = (char *)malloc(memsize);
+ if (!mem)
+ return ret;
+
+ /*
+ * Fill half of each page with increasing data, and keep other
+ * half empty, this will result in data that is still compressible
+ * and ends up in zswap, with material zswap usage.
+ */
+ for (int i = 0; i < pagesize; i++)
+ buf[i] = i < pagesize/2 ? (char) i : 0;
+
+ for (int i = 0; i < memsize; i += pagesize)
+ memcpy(&mem[i], buf, pagesize);
+
+ /* Try and reclaim allocated memory */
+ if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
+ ksft_print_msg("Failed to reclaim all of the requested memory\n");
+ goto out;
+ }
+
+ zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
+
+ /* zswpin */
+ for (int i = 0; i < memsize; i += pagesize) {
+ if (memcmp(&mem[i], buf, pagesize)) {
+ ksft_print_msg("invalid memory\n");
+ goto out;
+ }
+ }
+
+ if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
+ goto out;
+
+ /*
+ * If writeback is enabled, trying to reclaim memory now will trigger a
+ * writeback as zswap.max is half of what was needed when reclaim ran the first time.
+ * If writeback is disabled, memory reclaim will fail as zswap is limited and
+ * it can't writeback to swap.
+ */
+ ret = cg_write_numeric(cgroup, "memory.reclaim", memsize);
+ if (!wb_enabled)
+ ret = (ret == -EAGAIN) ? 0 : -1;
+
+out:
+ free(mem);
+ return ret;
+}
+
+static int test_zswap_writeback_one(const char *cgroup, bool wb)
+{
+ long zswpwb_before, zswpwb_after;
+
+ zswpwb_before = get_cg_wb_count(cgroup);
+ if (zswpwb_before != 0) {
+ ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
+ return -1;
+ }
+
+ if (cg_run(cgroup, attempt_writeback, (void *) &wb))
+ return -1;
+
+ /* Verify that zswap writeback occurred only if writeback was enabled */
+ zswpwb_after = get_cg_wb_count(cgroup);
+ if (zswpwb_after < 0)
+ return -1;
+
+ if (wb != !!zswpwb_after) {
+ ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
+ zswpwb_after, wb ? "enabled" : "disabled");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Test to verify the zswap writeback path */
+static int test_zswap_writeback(const char *root, bool wb)
+{
+ int ret = KSFT_FAIL;
+ char *test_group, *test_group_child = NULL;
+
+ if (cg_read_strcmp(root, "memory.zswap.writeback", "1"))
+ return KSFT_SKIP;
+
+ test_group = cg_name(root, "zswap_writeback_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
+ goto out;
+
+ if (test_zswap_writeback_one(test_group, wb))
+ goto out;
+
+ /* Reset memory.zswap.max to max (modified by attempt_writeback), and
+ * set up child cgroup, whose memory.zswap.writeback is hardcoded to 1.
+ * Thus, the parent's setting shall be what's in effect. */
+ if (cg_write(test_group, "memory.zswap.max", "max"))
+ goto out;
+ if (cg_write(test_group, "cgroup.subtree_control", "+memory"))
+ goto out;
+
+ test_group_child = cg_name(test_group, "zswap_writeback_test_child");
+ if (!test_group_child)
+ goto out;
+ if (cg_create(test_group_child))
+ goto out;
+ if (cg_write(test_group_child, "memory.zswap.writeback", "1"))
+ goto out;
+
+ if (test_zswap_writeback_one(test_group_child, wb))
+ goto out;
+
+ ret = KSFT_PASS;
+
+out:
+ if (test_group_child) {
+ cg_destroy(test_group_child);
+ free(test_group_child);
+ }
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+static int test_zswap_writeback_enabled(const char *root)
+{
+ return test_zswap_writeback(root, true);
+}
+
+static int test_zswap_writeback_disabled(const char *root)
+{
+ return test_zswap_writeback(root, false);
+}
+
+/*
+ * When trying to store a memcg page in zswap, if the memcg hits its memory
+ * limit in zswap, writeback should affect only the zswapped pages of that
+ * memcg.
+ */
+static int test_no_invasive_cgroup_shrink(const char *root)
+{
+ int ret = KSFT_FAIL;
+ size_t control_allocation_size = MB(10);
+ char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
+
+ wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
+ if (!wb_group)
+ return KSFT_FAIL;
+ if (cg_write(wb_group, "memory.zswap.max", "10K"))
+ goto out;
+ control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
+ if (!control_group)
+ goto out;
+
+ /* Push some test_group2 memory into zswap */
+ if (cg_enter_current(control_group))
+ goto out;
+ control_allocation = malloc(control_allocation_size);
+ for (int i = 0; i < control_allocation_size; i += 4095)
+ control_allocation[i] = 'a';
+ if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
+ goto out;
+
+ /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
+ if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
+ goto out;
+
+ /* Verify that only zswapped memory from gwb_group has been written back */
+ if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
+ ret = KSFT_PASS;
+out:
+ cg_enter_current(root);
+ if (control_group) {
+ cg_destroy(control_group);
+ free(control_group);
+ }
+ cg_destroy(wb_group);
+ free(wb_group);
+ if (control_allocation)
+ free(control_allocation);
+ return ret;
+}
+
+struct no_kmem_bypass_child_args {
+ size_t target_alloc_bytes;
+ size_t child_allocated;
+};
+
+static int no_kmem_bypass_child(const char *cgroup, void *arg)
+{
+ struct no_kmem_bypass_child_args *values = arg;
+ void *allocation;
+
+ allocation = malloc(values->target_alloc_bytes);
+ if (!allocation) {
+ values->child_allocated = true;
+ return -1;
+ }
+ for (long i = 0; i < values->target_alloc_bytes; i += 4095)
+ ((char *)allocation)[i] = 'a';
+ values->child_allocated = true;
+ pause();
+ free(allocation);
+ return 0;
+}
+
+/*
+ * When pages owned by a memcg are pushed to zswap by kswapd, they should be
+ * charged to that cgroup. This wasn't the case before commit
+ * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
+ *
+ * The test first allocates memory in a memcg, then raises min_free_kbytes to
+ * a very high value so that the allocation falls below low wm, then makes
+ * another allocation to trigger kswapd that should push the memcg-owned pages
+ * to zswap and verifies that the zswap pages are correctly charged.
+ *
+ * To be run on a VM with at most 4G of memory.
+ */
+static int test_no_kmem_bypass(const char *root)
+{
+ size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
+ struct no_kmem_bypass_child_args *values;
+ size_t trigger_allocation_size;
+ int wait_child_iteration = 0;
+ long stored_pages_threshold;
+ struct sysinfo sys_info;
+ int ret = KSFT_FAIL;
+ int child_status;
+ char *test_group = NULL;
+ pid_t child_pid;
+
+ /* Read sys info and compute test values accordingly */
+ if (sysinfo(&sys_info) != 0)
+ return KSFT_FAIL;
+ if (sys_info.totalram > 5000000000)
+ return KSFT_SKIP;
+ values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
+ PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (values == MAP_FAILED)
+ return KSFT_FAIL;
+ if (read_min_free_kb(&min_free_kb_original))
+ return KSFT_FAIL;
+ min_free_kb_high = sys_info.totalram / 2000;
+ min_free_kb_low = sys_info.totalram / 500000;
+ values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
+ sys_info.totalram * 5 / 100;
+ stored_pages_threshold = sys_info.totalram / 5 / 4096;
+ trigger_allocation_size = sys_info.totalram / 20;
+
+ /* Set up test memcg */
+ test_group = cg_name(root, "kmem_bypass_test");
+ if (!test_group)
+ goto out;
+
+ /* Spawn memcg child and wait for it to allocate */
+ set_min_free_kb(min_free_kb_low);
+ if (cg_create(test_group))
+ goto out;
+ values->child_allocated = false;
+ child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
+ if (child_pid < 0)
+ goto out;
+ while (!values->child_allocated && wait_child_iteration++ < 10000)
+ usleep(1000);
+
+ /* Try to wakeup kswapd and let it push child memory to zswap */
+ set_min_free_kb(min_free_kb_high);
+ for (int i = 0; i < 20; i++) {
+ size_t stored_pages;
+ char *trigger_allocation = malloc(trigger_allocation_size);
+
+ if (!trigger_allocation)
+ break;
+ for (int i = 0; i < trigger_allocation_size; i += 4095)
+ trigger_allocation[i] = 'b';
+ usleep(100000);
+ free(trigger_allocation);
+ if (get_zswap_stored_pages(&stored_pages))
+ break;
+ if (stored_pages < 0)
+ break;
+ /* If memory was pushed to zswap, verify it belongs to memcg */
+ if (stored_pages > stored_pages_threshold) {
+ int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
+ int delta = stored_pages * 4096 - zswapped;
+ int result_ok = delta < stored_pages * 4096 / 4;
+
+ ret = result_ok ? KSFT_PASS : KSFT_FAIL;
+ break;
+ }
+ }
+
+ kill(child_pid, SIGTERM);
+ waitpid(child_pid, &child_status, 0);
+out:
+ set_min_free_kb(min_free_kb_original);
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct zswap_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_zswap_usage),
+ T(test_swapin_nozswap),
+ T(test_zswapin),
+ T(test_zswap_writeback_enabled),
+ T(test_zswap_writeback_disabled),
+ T(test_no_kmem_bypass),
+ T(test_no_invasive_cgroup_shrink),
+};
+#undef T
+
+static bool zswap_configured(void)
+{
+ return access("/sys/module/zswap", F_OK) == 0;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (!zswap_configured())
+ ksft_exit_skip("zswap isn't configured\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/wait_inotify.c b/tools/testing/selftests/cgroup/wait_inotify.c
new file mode 100644
index 000000000000..e11b431e1b62
--- /dev/null
+++ b/tools/testing/selftests/cgroup/wait_inotify.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wait until an inotify event on the given cgroup file.
+ */
+#include <linux/limits.h>
+#include <sys/inotify.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static const char usage[] = "Usage: %s [-v] <cgroup_file>\n";
+static char *file;
+static int verbose;
+
+static inline void fail_message(char *msg)
+{
+ fprintf(stderr, msg, file);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ char *cmd = argv[0];
+ int c, fd;
+ struct pollfd fds = { .events = POLLIN, };
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose++;
+ break;
+ }
+ argv++, argc--;
+ }
+
+ if (argc != 2) {
+ fprintf(stderr, usage, cmd);
+ return -1;
+ }
+ file = argv[1];
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ fail_message("Cgroup file %s not found!\n");
+ close(fd);
+
+ fd = inotify_init();
+ if (fd < 0)
+ fail_message("inotify_init() fails on %s!\n");
+ if (inotify_add_watch(fd, file, IN_MODIFY) < 0)
+ fail_message("inotify_add_watch() fails on %s!\n");
+ fds.fd = fd;
+
+ /*
+ * poll waiting loop
+ */
+ for (;;) {
+ int ret = poll(&fds, 1, 10000);
+
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("poll");
+ exit(1);
+ }
+ if ((ret > 0) && (fds.revents & POLLIN))
+ break;
+ }
+ if (verbose) {
+ struct inotify_event events[10];
+ long len;
+
+ usleep(1000);
+ len = read(fd, events, sizeof(events));
+ printf("Number of events read = %ld\n",
+ len/sizeof(struct inotify_event));
+ }
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index 79b19a2863a0..84832c369a2e 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -g -std=gnu99 -I../../../../usr/include/
+CFLAGS += -g -std=gnu99 $(KHDR_INCLUDES)
LDLIBS += -lcap
TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index 42be3b925830..289e0c7c1f09 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -7,6 +7,7 @@
#include <inttypes.h>
#include <linux/types.h>
#include <linux/sched.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -17,7 +18,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
enum test_mode {
@@ -52,6 +53,12 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
size = sizeof(struct __clone_args);
switch (test_mode) {
+ case CLONE3_ARGS_NO_TEST:
+ /*
+ * Uses default 'flags' and 'SIGCHLD'
+ * assignment.
+ */
+ break;
case CLONE3_ARGS_ALL_0:
args.flags = 0;
args.exit_signal = 0;
@@ -88,17 +95,22 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
getpid(), pid);
if (waitpid(-1, &status, __WALL) < 0) {
- ksft_print_msg("Child returned %s\n", strerror(errno));
+ ksft_print_msg("waitpid() returned %s\n", strerror(errno));
return -errno;
}
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child did not exit normally, status 0x%x\n",
+ status);
+ return EXIT_FAILURE;
+ }
if (WEXITSTATUS(status))
return WEXITSTATUS(status);
return 0;
}
-static void test_clone3(uint64_t flags, size_t size, int expected,
- enum test_mode test_mode)
+static bool test_clone3(uint64_t flags, size_t size, int expected,
+ enum test_mode test_mode)
{
int ret;
@@ -108,88 +120,223 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
ret = call_clone3(flags, size, test_mode);
ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
getpid(), ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
+ if (ret != expected) {
+ ksft_print_msg(
"[%d] Result (%d) is different than expected (%d)\n",
getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
-}
-
-int main(int argc, char *argv[])
-{
- pid_t pid;
-
- uid_t uid = getuid();
-
- ksft_print_header();
- ksft_set_plan(17);
- test_clone3_supported();
-
- /* Just a simple clone3() should return 0.*/
- test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
+ return false;
+ }
- /* Do a clone3() in a new PID NS.*/
- if (uid == 0)
- test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ return true;
+}
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
- test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
+typedef bool (*filter_function)(void);
+typedef size_t (*size_function)(void);
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
- test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+static bool not_root(void)
+{
+ if (getuid() != 0) {
+ ksft_print_msg("Not running as root\n");
+ return true;
+ }
- /* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+ return false;
+}
- /* Do a clone3() with exit_signal having highest 32 bits non-zero */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+static bool no_timenamespace(void)
+{
+ if (not_root())
+ return true;
- /* Do a clone3() with negative 32-bit exit_signal */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+ if (!access("/proc/self/ns/time", F_OK))
+ return false;
- /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+ ksft_print_msg("Time namespaces are not supported\n");
+ return true;
+}
- /* Do a clone3() with NSIG < exit_signal < CSIG */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+static size_t page_size_plus_8(void)
+{
+ return getpagesize() + 8;
+}
- test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+struct test {
+ const char *name;
+ uint64_t flags;
+ size_t size;
+ size_function size_function;
+ int expected;
+ enum test_mode test_mode;
+ filter_function filter;
+};
- test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
- CLONE3_ARGS_ALL_0);
+static const struct test tests[] = {
+ {
+ .name = "simple clone3()",
+ .flags = 0,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "clone3() in a new PID_NS",
+ .flags = CLONE_NEWPID,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "exit_signal with highest 32 bits non-zero",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+ },
+ {
+ .name = "negative 32-bit exit_signal",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ },
+ {
+ .name = "exit_signal not fitting into CSIGNAL mask",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ },
+ {
+ .name = "NSIG < exit_signal < CSIG",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 16",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_arg) * 2",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments > page size",
+ .flags = 0,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "Arguments > page size in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "New time NS",
+ .flags = CLONE_NEWTIME,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = no_timenamespace,
+ },
+ {
+ .name = "exit signal (SIGCHLD) in flags",
+ .flags = SIGCHLD,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+};
- test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
- CLONE3_ARGS_ALL_0);
+int main(int argc, char *argv[])
+{
+ size_t size;
+ int i;
- /* Do a clone3() with > page size */
- test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ test_clone3_supported();
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (tests[i].filter && tests[i].filter()) {
+ ksft_test_result_skip("%s\n", tests[i].name);
+ continue;
+ }
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
- CLONE3_ARGS_NO_TEST);
+ if (tests[i].size_function)
+ size = tests[i].size_function();
+ else
+ size = tests[i].size;
- /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ ksft_print_msg("Running test '%s'\n", tests[i].name);
- /* Do a clone3() with > page size in a new PID NS */
- test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
- CLONE3_ARGS_NO_TEST);
+ ksft_test_result(test_clone3(tests[i].flags, size,
+ tests[i].expected,
+ tests[i].test_mode),
+ "%s\n", tests[i].name);
+ }
- return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 52d3f0364bda..e82281efa273 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -24,13 +24,9 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "clone3_selftests.h"
-#ifndef MAX_PID_NS_LEVEL
-#define MAX_PID_NS_LEVEL 32
-#endif
-
static void child_exit(int ret)
{
fflush(stdout);
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 47a8c0fc3676..de0c9d62015d 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -13,13 +13,9 @@
#include <sys/syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
-#ifndef CLONE_CLEAR_SIGHAND
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL
-#endif
-
static void nop_handler(int signo)
{
}
@@ -124,5 +120,5 @@ int main(int argc, char **argv)
test_clone3_clear_sighand();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index e81ffaaee02b..a0593e8950f0 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -11,16 +11,12 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
-#ifndef CLONE_INTO_CGROUP
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-#endif
-
#ifndef __NR_clone3
-#define __NR_clone3 -1
+#define __NR_clone3 435
#endif
struct __clone_args {
@@ -32,18 +28,9 @@ struct __clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
-#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#ifndef CLONE_ARGS_SIZE_VER1
-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
-#endif
__aligned_u64 cgroup;
-#ifndef CLONE_ARGS_SIZE_VER2
-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
-#endif
};
static pid_t sys_clone3(struct __clone_args *args, size_t size)
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 0229e9ebb995..5c944aee6b41 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -20,12 +20,10 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
-#ifndef MAX_PID_NS_LEVEL
#define MAX_PID_NS_LEVEL 32
-#endif
static int pipe_1[2];
static int pipe_2[2];
@@ -116,7 +114,8 @@ static int call_clone3_set_tid(pid_t *set_tid,
return WEXITSTATUS(status);
}
-static void test_clone3_set_tid(pid_t *set_tid,
+static void test_clone3_set_tid(const char *desc,
+ pid_t *set_tid,
size_t set_tid_size,
int flags,
int expected,
@@ -131,17 +130,13 @@ static void test_clone3_set_tid(pid_t *set_tid,
ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
wait_for_it);
ksft_print_msg(
- "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ "[%d] clone3() with CLONE_SET_TID %d says: %d - expected %d\n",
getpid(), set_tid[0], ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
- "[%d] Result (%d) is different than expected (%d)\n",
- getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
+
+ ksft_test_result(ret == expected, "%s with %zu TIDs and flags 0x%x\n",
+ desc, set_tid_size, flags);
}
+
int main(int argc, char *argv[])
{
FILE *f;
@@ -174,73 +169,91 @@ int main(int argc, char *argv[])
/* Try invalid settings */
memset(&set_tid, 0, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0xff, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0, sizeof(set_tid));
/* Try with an invalid PID */
set_tid[0] = 0;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, 0 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
set_tid[0] = -1;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Claim that the set_tid array actually contains 2 elements. */
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("2 TIDs, -1 and 0",
+ set_tid, 2, 0, -EINVAL, 0, 0);
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try with a valid PID (1) this should return -EEXIST. */
set_tid[0] = 1;
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EEXIST, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, CLONE_NEWPID, 0, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* pid_max should fail everywhere */
set_tid[0] = pid_max;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, 0, -EINVAL, 0, 0);
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
@@ -264,10 +277,12 @@ int main(int argc, char *argv[])
/* After the child has finished, its PID should be free. */
set_tid[0] = pid;
- test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+ test_clone3_set_tid("reallocate child TID",
+ set_tid, 1, 0, 0, 0, 0);
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate child TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* Creating a process with PID 1 in the newly created most nested
@@ -276,7 +291,8 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 1;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+ test_clone3_set_tid("create PID 1 in new NS",
+ set_tid, 2, CLONE_NEWPID, 0, pid, 0);
ksft_print_msg("unshare PID namespace\n");
if (unshare(CLONE_NEWPID) == -1)
@@ -286,7 +302,8 @@ int main(int argc, char *argv[])
set_tid[0] = pid;
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Let's create a PID 1 */
ns_pid = fork();
@@ -297,21 +314,25 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 43;
set_tid[1] = -1;
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("check leak on invalid TID -1",
+ set_tid, 2, 0, -EINVAL, 0, 0);
set_tid[0] = 43;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
+ test_clone3_set_tid("check leak on invalid specific TID",
+ set_tid, 2, 0, 0, 43, 0);
ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
set_tid[0] = 2;
- test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+ test_clone3_set_tid("create PID 2 in child NS",
+ set_tid, 1, 0, 0, 2, 0);
set_tid[0] = 1;
set_tid[1] = -1;
set_tid[2] = pid;
/* This should fail as there is invalid PID at level '1'. */
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to invalid TID at level 1",
+ set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
set_tid[0] = 1;
set_tid[1] = 42;
@@ -321,13 +342,15 @@ int main(int argc, char *argv[])
* namespaces. Again assuming this is running in the host's
* PID namespace. Not yet nested.
*/
- test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to too few active PID NSs",
+ set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* This should work and from the parent we should see
* something like 'NSpid: pid 42 1'.
*/
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
+ test_clone3_set_tid("verify that we have 3 PID NSs",
+ set_tid, 3, CLONE_NEWPID, 0, 42, true);
child_exit(ksft_cnt.ksft_fail);
}
@@ -382,16 +405,14 @@ int main(int argc, char *argv[])
ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
ksft_cnt.ksft_fail = WEXITSTATUS(status);
- if (ns3 == pid && ns2 == 42 && ns1 == 1)
- ksft_test_result_pass(
- "PIDs in all namespaces as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
- else
- ksft_test_result_fail(
- "PIDs in all namespaces not as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
+ ksft_print_msg("Expecting PIDs %d, 42, 1\n", pid);
+ ksft_print_msg("Have PIDs in namespaces: %d, %d, %d\n", ns3, ns2, ns1);
+ ksft_test_result(ns3 == pid && ns2 == 42 && ns1 == 1,
+ "PIDs in all namespaces as expected\n");
out:
ret = 0;
- return !ret ? ksft_exit_pass() : ksft_exit_fail();
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/connector/.gitignore b/tools/testing/selftests/connector/.gitignore
new file mode 100644
index 000000000000..c90098199a44
--- /dev/null
+++ b/tools/testing/selftests/connector/.gitignore
@@ -0,0 +1 @@
+proc_filter
diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
new file mode 100644
index 000000000000..92188b9bac5c
--- /dev/null
+++ b/tools/testing/selftests/connector/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS = proc_filter
+
+include ../lib.mk
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
new file mode 100644
index 000000000000..36c11467a8f1
--- /dev/null
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/connector.h>
+#include <linux/cn_proc.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <strings.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+
+#include "kselftest.h"
+
+#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(struct proc_input))
+#define NL_MESSAGE_SIZE_NF (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(int))
+
+#define MAX_EVENTS 1
+
+volatile static int interrupted;
+static int nl_sock, ret_errno, tcount;
+static struct epoll_event evn;
+
+static int filter;
+
+#ifdef ENABLE_PRINTS
+#define Printf printf
+#else
+#define Printf ksft_print_msg
+#endif
+
+int send_message(void *pinp)
+{
+ char buff[NL_MESSAGE_SIZE];
+ struct nlmsghdr *hdr;
+ struct cn_msg *msg;
+
+ hdr = (struct nlmsghdr *)buff;
+ if (filter)
+ hdr->nlmsg_len = NL_MESSAGE_SIZE;
+ else
+ hdr->nlmsg_len = NL_MESSAGE_SIZE_NF;
+ hdr->nlmsg_type = NLMSG_DONE;
+ hdr->nlmsg_flags = 0;
+ hdr->nlmsg_seq = 0;
+ hdr->nlmsg_pid = getpid();
+
+ msg = (struct cn_msg *)NLMSG_DATA(hdr);
+ msg->id.idx = CN_IDX_PROC;
+ msg->id.val = CN_VAL_PROC;
+ msg->seq = 0;
+ msg->ack = 0;
+ msg->flags = 0;
+
+ if (filter) {
+ msg->len = sizeof(struct proc_input);
+ ((struct proc_input *)msg->data)->mcast_op =
+ ((struct proc_input *)pinp)->mcast_op;
+ ((struct proc_input *)msg->data)->event_type =
+ ((struct proc_input *)pinp)->event_type;
+ } else {
+ msg->len = sizeof(int);
+ *(int *)msg->data = *(enum proc_cn_mcast_op *)pinp;
+ }
+
+ if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
+ ret_errno = errno;
+ perror("send failed");
+ return -3;
+ }
+ return 0;
+}
+
+int register_proc_netlink(int *efd, void *input)
+{
+ struct sockaddr_nl sa_nl;
+ int err = 0, epoll_fd;
+
+ nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+ if (nl_sock == -1) {
+ ret_errno = errno;
+ perror("socket failed");
+ return -1;
+ }
+
+ bzero(&sa_nl, sizeof(sa_nl));
+ sa_nl.nl_family = AF_NETLINK;
+ sa_nl.nl_groups = CN_IDX_PROC;
+ sa_nl.nl_pid = getpid();
+
+ if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
+ ret_errno = errno;
+ perror("bind failed");
+ return -2;
+ }
+
+ epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (epoll_fd < 0) {
+ ret_errno = errno;
+ perror("epoll_create1 failed");
+ return -2;
+ }
+
+ err = send_message(input);
+
+ if (err < 0)
+ return err;
+
+ evn.events = EPOLLIN;
+ evn.data.fd = nl_sock;
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, nl_sock, &evn) < 0) {
+ ret_errno = errno;
+ perror("epoll_ctl failed");
+ return -3;
+ }
+ *efd = epoll_fd;
+ return 0;
+}
+
+static void sigint(int sig)
+{
+ interrupted = 1;
+}
+
+int handle_packet(char *buff, int fd, struct proc_event *event)
+{
+ struct nlmsghdr *hdr;
+
+ hdr = (struct nlmsghdr *)buff;
+
+ if (hdr->nlmsg_type == NLMSG_ERROR) {
+ perror("NLMSG_ERROR error\n");
+ return -3;
+ } else if (hdr->nlmsg_type == NLMSG_DONE) {
+ event = (struct proc_event *)
+ ((struct cn_msg *)NLMSG_DATA(hdr))->data;
+ tcount++;
+ switch (event->what) {
+ case PROC_EVENT_EXIT:
+ Printf("Exit process %d (tgid %d) with code %d, signal %d\n",
+ event->event_data.exit.process_pid,
+ event->event_data.exit.process_tgid,
+ event->event_data.exit.exit_code,
+ event->event_data.exit.exit_signal);
+ break;
+ case PROC_EVENT_FORK:
+ Printf("Fork process %d (tgid %d), parent %d (tgid %d)\n",
+ event->event_data.fork.child_pid,
+ event->event_data.fork.child_tgid,
+ event->event_data.fork.parent_pid,
+ event->event_data.fork.parent_tgid);
+ break;
+ case PROC_EVENT_EXEC:
+ Printf("Exec process %d (tgid %d)\n",
+ event->event_data.exec.process_pid,
+ event->event_data.exec.process_tgid);
+ break;
+ case PROC_EVENT_UID:
+ Printf("UID process %d (tgid %d) uid %d euid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.ruid,
+ event->event_data.id.e.euid);
+ break;
+ case PROC_EVENT_GID:
+ Printf("GID process %d (tgid %d) gid %d egid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.rgid,
+ event->event_data.id.e.egid);
+ break;
+ case PROC_EVENT_SID:
+ Printf("SID process %d (tgid %d)\n",
+ event->event_data.sid.process_pid,
+ event->event_data.sid.process_tgid);
+ break;
+ case PROC_EVENT_PTRACE:
+ Printf("Ptrace process %d (tgid %d), Tracer %d (tgid %d)\n",
+ event->event_data.ptrace.process_pid,
+ event->event_data.ptrace.process_tgid,
+ event->event_data.ptrace.tracer_pid,
+ event->event_data.ptrace.tracer_tgid);
+ break;
+ case PROC_EVENT_COMM:
+ Printf("Comm process %d (tgid %d) comm %s\n",
+ event->event_data.comm.process_pid,
+ event->event_data.comm.process_tgid,
+ event->event_data.comm.comm);
+ break;
+ case PROC_EVENT_COREDUMP:
+ Printf("Coredump process %d (tgid %d) parent %d, (tgid %d)\n",
+ event->event_data.coredump.process_pid,
+ event->event_data.coredump.process_tgid,
+ event->event_data.coredump.parent_pid,
+ event->event_data.coredump.parent_tgid);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+int handle_events(int epoll_fd, struct proc_event *pev)
+{
+ char buff[CONNECTOR_MAX_MSG_SIZE];
+ struct epoll_event ev[MAX_EVENTS];
+ int i, event_count = 0, err = 0;
+
+ event_count = epoll_wait(epoll_fd, ev, MAX_EVENTS, -1);
+ if (event_count < 0) {
+ ret_errno = errno;
+ if (ret_errno != EINTR)
+ perror("epoll_wait failed");
+ return -3;
+ }
+ for (i = 0; i < event_count; i++) {
+ if (!(ev[i].events & EPOLLIN))
+ continue;
+ if (recv(ev[i].data.fd, buff, sizeof(buff), 0) == -1) {
+ ret_errno = errno;
+ perror("recv failed");
+ return -3;
+ }
+ err = handle_packet(buff, ev[i].data.fd, pev);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int epoll_fd, err;
+ struct proc_event proc_ev;
+ struct proc_input input;
+
+ signal(SIGINT, sigint);
+
+ if (argc > 2) {
+ printf("Expected 0(assume no-filter) or 1 argument(-f)\n");
+ exit(KSFT_SKIP);
+ }
+
+ if (argc == 2) {
+ if (strcmp(argv[1], "-f") == 0) {
+ filter = 1;
+ } else {
+ printf("Valid option : -f (for filter feature)\n");
+ exit(KSFT_SKIP);
+ }
+ }
+
+ if (filter) {
+ input.event_type = PROC_EVENT_NONZERO_EXIT;
+ input.mcast_op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&op);
+ }
+
+ if (err < 0) {
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+
+ while (!interrupted) {
+ err = handle_events(epoll_fd, &proc_ev);
+ if (err < 0) {
+ if (ret_errno == EINTR)
+ continue;
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+ }
+
+ if (filter) {
+ input.mcast_op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&op);
+ }
+
+ close(epoll_fd);
+ close(nl_sock);
+
+ printf("Done total count: %d\n", tcount);
+ exit(0);
+}
diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore
index 6e6712ce5817..7999361992aa 100644
--- a/tools/testing/selftests/core/.gitignore
+++ b/tools/testing/selftests/core/.gitignore
@@ -1 +1,2 @@
close_range_test
+unshare_test
diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile
index f6f2d6f473c6..8e99f87f5d7c 100644
--- a/tools/testing/selftests/core/Makefile
+++ b/tools/testing/selftests/core/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := close_range_test
+TEST_GEN_PROGS := close_range_test unshare_test
include ../lib.mk
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 73eb29c916d1..f14eca63f20c 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -12,36 +12,22 @@
#include <syscall.h>
#include <unistd.h>
#include <sys/resource.h>
+#include <linux/close_range.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
-#ifndef __NR_close_range
- #if defined __alpha__
- #define __NR_close_range 546
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_close_range (436 + 4000)
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_close_range (436 + 6000)
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_close_range (436 + 5000)
- #endif
- #elif defined __ia64__
- #define __NR_close_range (436 + 1024)
- #else
- #define __NR_close_range 436
- #endif
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
#endif
-#ifndef CLOSE_RANGE_UNSHARE
-#define CLOSE_RANGE_UNSHARE (1U << 1)
+#ifndef F_DUPFD_QUERY
+#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
-#ifndef CLOSE_RANGE_CLOEXEC
-#define CLOSE_RANGE_CLOEXEC (1U << 2)
+#ifndef F_CREATED_QUERY
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
#endif
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
@@ -50,11 +36,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
return syscall(__NR_close_range, fd, max_fd, flags);
}
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-TEST(close_range)
+TEST(core_close_range)
{
int i, ret;
int open_fds[101];
@@ -76,6 +58,15 @@ TEST(close_range)
SKIP(return, "close_range() syscall not supported");
}
+ for (i = 0; i < 100; i++) {
+ ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
+ if (ret < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+ }
+
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++)
@@ -389,7 +380,7 @@ TEST(close_range_cloexec_unshare)
*/
TEST(close_range_cloexec_syzbot)
{
- int fd1, fd2, fd3, flags, ret, status;
+ int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
@@ -403,6 +394,13 @@ TEST(close_range_cloexec_syzbot)
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -427,6 +425,15 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+
+
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
@@ -457,6 +464,24 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+ fd4 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd4, 0);
+
+ /* Same inode, different file pointers. */
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 0);
+ }
+
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
@@ -464,6 +489,7 @@ TEST(close_range_cloexec_syzbot)
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
+ EXPECT_EQ(close(fd4), 0);
}
/*
@@ -567,4 +593,74 @@ TEST(close_range_cloexec_unshare_syzbot)
EXPECT_EQ(close(fd3), 0);
}
+TEST(close_range_bitmap_corruption)
+{
+ pid_t pid;
+ int status;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ /* get the first 128 descriptors open */
+ for (int i = 2; i < 128; i++)
+ EXPECT_GE(dup2(0, i), 0);
+
+ /* get descriptor table shared */
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* unshare and truncate descriptor table down to 64 */
+ if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
+ exit(EXIT_FAILURE);
+
+ ASSERT_EQ(fcntl(64, F_GETFD), -1);
+ /* ... and verify that the range 64..127 is not
+ stuck "fully used" according to secondary bitmap */
+ EXPECT_EQ(dup(0), 64)
+ exit(EXIT_FAILURE);
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(fcntl_created)
+{
+ for (int i = 0; i < 101; i++) {
+ int fd;
+ char path[PATH_MAX];
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return,
+ "Skipping test since /dev/null does not exist");
+ }
+
+ /* We didn't create "/dev/null". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+
+ sprintf(path, "aaaa_%d", i);
+ fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
+ ASSERT_GE(fd, 0);
+
+ /* We created "aaaa_%d". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
+ close(fd);
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ /* We're opening it again, so no positive creation check. */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+ unlink(path);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c
new file mode 100644
index 000000000000..ffce75a6c228
--- /dev/null
+++ b/tools/testing/selftests/core/unshare_test.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <linux/close_range.h>
+
+#include "kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+TEST(unshare_EMFILE)
+{
+ pid_t pid;
+ int status;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+ int fd;
+ ssize_t n, n2;
+ static char buf[512], buf2[512];
+ struct rlimit rlimit;
+ int nr_open;
+
+ fd = open("/proc/sys/fs/nr_open", O_RDWR);
+ ASSERT_GE(fd, 0);
+
+ n = read(fd, buf, sizeof(buf));
+ ASSERT_GT(n, 0);
+ ASSERT_EQ(buf[n - 1], '\n');
+
+ ASSERT_EQ(sscanf(buf, "%d", &nr_open), 1);
+
+ ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+
+ /* bump fs.nr_open */
+ n2 = sprintf(buf2, "%d\n", nr_open + 1024);
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf2, n2);
+
+ /* bump ulimit -n */
+ rlimit.rlim_cur = nr_open + 1024;
+ rlimit.rlim_max = nr_open + 1024;
+ EXPECT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ /* get a descriptor past the old fs.nr_open */
+ EXPECT_GE(dup2(2, nr_open + 64), 0) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ /* get descriptor table shared */
+ pid = sys_clone3(&args, sizeof(args));
+ EXPECT_GE(pid, 0) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) {
+ int err;
+
+ /* restore fs.nr_open */
+ lseek(fd, 0, SEEK_SET);
+ write(fd, buf, n);
+ /* ... and now unshare(CLONE_FILES) must fail with EMFILE */
+ err = unshare(CLONE_FILES);
+ EXPECT_EQ(err, -1)
+ exit(EXIT_FAILURE);
+ EXPECT_EQ(errno, EMFILE)
+ exit(EXIT_FAILURE);
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/.gitignore b/tools/testing/selftests/coredump/.gitignore
new file mode 100644
index 000000000000..097f52db0be9
--- /dev/null
+++ b/tools/testing/selftests/coredump/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+stackdump_test
+coredump_socket_test
+coredump_socket_protocol_test
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
new file mode 100644
index 000000000000..dece1a31d561
--- /dev/null
+++ b/tools/testing/selftests/coredump/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+
+TEST_GEN_PROGS := stackdump_test \
+ coredump_socket_test \
+ coredump_socket_protocol_test
+TEST_FILES := stackdump
+
+include ../lib.mk
+
+$(OUTPUT)/stackdump_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_protocol_test: coredump_test_helpers.c
diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst
new file mode 100644
index 000000000000..164a7aa181c8
--- /dev/null
+++ b/tools/testing/selftests/coredump/README.rst
@@ -0,0 +1,50 @@
+coredump selftest
+=================
+
+Background context
+------------------
+
+`coredump` is a feature which dumps a process's memory space when the process terminates
+unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default,
+`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a
+different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a
+user-space program by writing the pipe symbol (`|`) followed by the command to be executed to
+`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`.
+
+The piped user program may be interested in reading the stack pointers of the crashed process. The
+crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in
+`/proc/$PID/stat`. See `man 5 proc` for all the details.
+
+The problem
+-----------
+While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field
+reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid
+value.
+
+However, this was broken in the past and `kstkesp` was zero even during coredump:
+
+* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to
+ always be zero
+
+* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the
+ coredumping thread. However, other threads in a coredumping process still had the problem.
+
+* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed
+ for all threads in a coredumping process.
+
+* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again
+ for the other threads in a coredumping process.
+
+The problem has been fixed now, but considering the history, it may appear again in the future.
+
+The goal of this test
+---------------------
+This test detects problem with reading `kstkesp` during coredump by doing the following:
+
+#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script
+ reads the stack pointers of all threads of crashed processes.
+
+#. Spawn a child process who creates some threads and then crashes.
+
+#. Read the output from the "stackdump" script, and make sure all stack pointer values are
+ non-zero.
diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config
new file mode 100644
index 000000000000..a05ef112b4f9
--- /dev/null
+++ b/tools/testing/selftests/coredump/config
@@ -0,0 +1,3 @@
+CONFIG_COREDUMP=y
+CONFIG_NET=y
+CONFIG_UNIX=y
diff --git a/tools/testing/selftests/coredump/coredump_socket_protocol_test.c b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
new file mode 100644
index 000000000000..d19b6717c53e
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
@@ -0,0 +1,1568 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+#define NUM_CRASHING_COREDUMPS 5
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_kernel: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_kernel: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_kernel: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_kernel: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_kernel: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_request_kernel: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_kernel: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_kernel: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_kernel: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_kernel: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_kernel: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket_request_kernel: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_kernel: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_userspace: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_userspace: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_userspace: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_userspace: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_userspace: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_userspace: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_userspace: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_userspace: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_userspace: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_userspace: unexpected data received (expected no coredump data)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_userspace: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_userspace: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_reject: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_reject: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_reject: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_reject: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_reject: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_reject: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_reject: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_reject: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_reject: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_reject: unexpected data received (expected no coredump data for REJECT)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_reject: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_reject: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_marker COREDUMP_MARK_CONFLICTING failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_flag_combination: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_unknown_flag: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_unknown_flag: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) {
+ fprintf(stderr, "socket_request_unknown_flag: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_marker COREDUMP_MARK_UNSUPPORTED failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_unknown_flag: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_small: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2)) {
+ fprintf(stderr, "socket_request_invalid_size_small: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_marker COREDUMP_MARK_MINSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_small: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_large: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_marker COREDUMP_MARK_MAXSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_large: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGSEGV
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGABRT
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: accept4 failed: %m\n");
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_peer_pidfd failed\n");
+ goto out;
+ }
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_pidfd_info failed\n");
+ goto out;
+ }
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: missing PIDFD_INFO_COREDUMP or PIDFD_COREDUMPED\n");
+ goto out;
+ }
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_coredump_req failed\n");
+ goto out;
+ }
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: check_coredump_req failed\n");
+ goto out;
+ }
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: send_coredump_ack failed\n");
+ goto out;
+ }
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_marker failed\n");
+ goto out;
+ }
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_socket_test.c b/tools/testing/selftests/coredump/coredump_socket_test.c
new file mode 100644
index 000000000000..7e26d4a6a15d
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_test.c
@@ -0,0 +1,742 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket test: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket test: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket test: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket test: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket test: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket test: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket test: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket test: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket test: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket test: write to core file failed (read=%zd, write=%zd): %m\n", bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket test: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_COREDUMP,
+ };
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_detect_userspace_client: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_mask & PIDFD_COREDUMPED) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_COREDUMPED incorrectly set (should be userspace client)\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_detect_userspace_client: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ int fd_socket;
+ ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd_socket < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): socket failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): connect failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_socket);
+ pause();
+ fprintf(stderr, "socket_detect_userspace_client (client): completed successfully\n");
+ _exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+ ASSERT_EQ(close(pidfd), 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+ int pidfd, status;
+ pid_t pid;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_no_listener: socket failed: %m\n");
+ goto out;
+ }
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_no_listener: bind failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_no_listener: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_no_listener: completed successfully\n");
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump with SIGABRT
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_test.h b/tools/testing/selftests/coredump/coredump_test.h
new file mode 100644
index 000000000000..ed47f01fa53c
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __COREDUMP_TEST_H
+#define __COREDUMP_TEST_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/coredump.h>
+
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+/* Coredump fixture */
+FIXTURE(coredump)
+{
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+/* Shared helper function declarations */
+void *do_nothing(void *arg);
+void crashing_child(void);
+int create_detached_tmpfs(void);
+int create_and_listen_unix_socket(const char *path);
+bool set_core_pattern(const char *pattern);
+int get_peer_pidfd(int fd);
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info);
+
+/* Inline helper that uses harness types */
+static inline void wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump) *self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+/* Protocol helper function declarations */
+ssize_t recv_marker(int fd);
+bool read_marker(int fd, enum coredump_mark mark);
+bool read_coredump_req(int fd, struct coredump_req *req);
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack);
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask);
+int open_coredump_tmpfile(int fd_tmpfs_detached);
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file);
+
+#endif /* __COREDUMP_TEST_H */
diff --git a/tools/testing/selftests/coredump/coredump_test_helpers.c b/tools/testing/selftests/coredump/coredump_test_helpers.c
new file mode 100644
index 000000000000..a6f6d5f2ae07
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test_helpers.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+/* Forward declarations to avoid including harness header */
+struct __test_metadata;
+
+/* Match the fixture definition from coredump_test.h */
+struct _fixture_coredump_data {
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+void *do_nothing(void *arg)
+{
+ (void)arg;
+ while (1)
+ pause();
+
+ return NULL;
+}
+
+void crashing_child(void)
+{
+ pthread_t thread;
+ int i;
+
+ for (i = 0; i < NUM_THREAD_SPAWN; ++i)
+ pthread_create(&thread, NULL, do_nothing, NULL);
+
+ /* crash on purpose */
+ i = *(int *)NULL;
+}
+
+int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
+int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "get_peer_pidfd: getsockopt(SO_PEERPIDFD) failed: %m\n");
+ return -1;
+ }
+ fprintf(stderr, "get_peer_pidfd: successfully retrieved pidfd %d\n", fd_peer_pidfd);
+ return fd_peer_pidfd;
+}
+
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ int ret;
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info);
+ if (ret < 0) {
+ fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
+ return false;
+ }
+ fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
+ (unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
+ return true;
+}
+
+/* Protocol helper functions */
+
+ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size) {
+ fprintf(stderr, "read_coredump_req: peek failed (got %zd, expected %zu): %m\n",
+ ret, field_size);
+ return false;
+ }
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu < min %d\n",
+ kernel_size, COREDUMP_ACK_SIZE_VER0);
+ return false;
+ }
+ if (kernel_size >= PAGE_SIZE) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu >= PAGE_SIZE %d\n",
+ kernel_size, PAGE_SIZE);
+ return false;
+ }
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+ struct epoll_event ev;
+ int flags;
+
+ /* Set socket to non-blocking mode for edge-triggered epoll */
+ flags = fcntl(fd_coredump, F_GETFL, 0);
+ if (flags < 0) {
+ fprintf(stderr, "Worker: fcntl(F_GETFL) failed: %m\n");
+ goto out;
+ }
+ if (fcntl(fd_coredump, F_SETFL, flags | O_NONBLOCK) < 0) {
+ fprintf(stderr, "Worker: fcntl(F_SETFL, O_NONBLOCK) failed: %m\n");
+ goto out;
+ }
+
+ epfd = epoll_create1(0);
+ if (epfd < 0) {
+ fprintf(stderr, "Worker: epoll_create1() failed: %m\n");
+ goto out;
+ }
+
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) {
+ fprintf(stderr, "Worker: epoll_ctl(EPOLL_CTL_ADD) failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0) {
+ fprintf(stderr, "Worker: epoll_wait() failed: %m\n");
+ break;
+ }
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ fprintf(stderr, "Worker: read() failed: %m\n");
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "Worker: write() failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "Worker: completed successfully\n");
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump
new file mode 100755
index 000000000000..96714ce42d12
--- /dev/null
+++ b/tools/testing/selftests/coredump/stackdump
@@ -0,0 +1,14 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CRASH_PROGRAM_ID=$1
+STACKDUMP_FILE=$2
+
+TMP=$(mktemp)
+
+for t in /proc/$CRASH_PROGRAM_ID/task/*; do
+ tid=$(basename $t)
+ cat /proc/$tid/stat | awk '{print $29}' >> $TMP
+done
+
+mv $TMP $STACKDUMP_FILE
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
new file mode 100644
index 000000000000..1ec88937a1c2
--- /dev/null
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+#include "coredump_test.h"
+
+#define STACKDUMP_FILE "stack_values"
+#define STACKDUMP_SCRIPT "stackdump"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ unlink(STACKDUMP_FILE);
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
+}
+
+TEST_F_TIMEOUT(coredump, stackdump, 120)
+{
+ unsigned long long stack;
+ char *test_dir, *line;
+ size_t line_length;
+ char buf[PAGE_SIZE];
+ int ret, i, status;
+ FILE *file;
+ pid_t pid;
+
+ /*
+ * Step 1: Setup core_pattern so that the stackdump script is executed when the child
+ * process crashes
+ */
+ ret = readlink("/proc/self/exe", buf, sizeof(buf));
+ ASSERT_NE(-1, ret);
+ ASSERT_LT(ret, sizeof(buf));
+ buf[ret] = '\0';
+
+ test_dir = dirname(buf);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(NULL, file);
+
+ ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE);
+ ASSERT_LT(0, ret);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+
+ /* Step 2: Create a process who spawns some threads then crashes */
+ pid = fork();
+ ASSERT_TRUE(pid >= 0);
+ if (pid == 0)
+ crashing_child();
+
+ /*
+ * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
+ */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ for (i = 0; i < 10; ++i) {
+ file = fopen(STACKDUMP_FILE, "r");
+ if (file)
+ break;
+ sleep(1);
+ }
+ ASSERT_NE(file, NULL);
+
+ /* Step 4: Make sure all stack pointer values are non-zero */
+ line = NULL;
+ for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
+ stack = strtoull(line, NULL, 10);
+ ASSERT_NE(stack, 0);
+ }
+ free(line);
+
+ ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
+
+ fclose(file);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index d8be047ee5b6..8b66c4738344 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -6,6 +6,6 @@ TEST_PROGS := cpu-on-off-test.sh
include ../lib.mk
run_full_test:
- @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+ @/bin/bash ./cpu-on-off-test.sh -a && echo "cpu-hotplug selftests: [PASS]" || echo "cpu-hotplug selftests: [FAIL]"
clean:
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
deleted file mode 100644
index d4aca2ad5069..000000000000
--- a/tools/testing/selftests/cpu-hotplug/config
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_NOTIFIER_ERROR_INJECTION=y
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index 0d26b5e3f966..6232a46ca6e1 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -4,6 +4,7 @@
SYSFS=
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+retval=0
prerequisite()
{
@@ -66,7 +67,7 @@ hotpluggable_cpus()
done
}
-hotplaggable_offline_cpus()
+hotpluggable_offline_cpus()
{
hotpluggable_cpus 0
}
@@ -102,10 +103,10 @@ online_cpu_expect_success()
if ! online_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
- exit 1
+ retval=1
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
@@ -115,10 +116,10 @@ online_cpu_expect_fail()
if online_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
- exit 1
+ retval=1
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected online >&2
- exit 1
+ retval=1
fi
}
@@ -128,10 +129,10 @@ offline_cpu_expect_success()
if ! offline_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
- exit 1
+ retval=1
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
@@ -141,16 +142,33 @@ offline_cpu_expect_fail()
if offline_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
- exit 1
+ retval=1
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
-error=-12
+online_all_hot_pluggable_cpus()
+{
+ for cpu in `hotpluggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+ done
+}
+
+offline_all_hot_pluggable_cpus()
+{
+ local reserve_cpu=$online_max
+ for cpu in `hotpluggable_online_cpus`; do
+ # Reserve one cpu oneline at least.
+ if [ $cpu -eq $reserve_cpu ];then
+ continue
+ fi
+ offline_cpu_expect_success $cpu
+ done
+}
+
allcpus=0
-priority=0
online_cpus=0
online_max=0
offline_cpus=0
@@ -158,31 +176,20 @@ offline_max=0
present_cpus=0
present_max=0
-while getopts e:ahp: opt; do
+while getopts ah opt; do
case $opt in
- e)
- error=$OPTARG
- ;;
a)
allcpus=1
;;
h)
- echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+ echo "Usage $0 [ -a ]"
echo -e "\t default offline one cpu"
echo -e "\t run with -a option to offline all cpus"
exit
;;
- p)
- priority=$OPTARG
- ;;
esac
done
-if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
- echo "error code must be -4095 <= errno < 0" >&2
- exit 1
-fi
-
prerequisite
#
@@ -196,12 +203,12 @@ if [ $allcpus -eq 0 ]; then
online_cpu_expect_success $online_max
if [[ $offline_cpus -gt 0 ]]; then
- echo -e "\t offline to online to offline: cpu $present_max"
+ echo -e "\t online to offline to online: cpu $present_max"
online_cpu_expect_success $present_max
offline_cpu_expect_success $present_max
online_cpu $present_max
fi
- exit 0
+ exit $retval
else
echo "Full scope test: all hotplug cpus"
echo -e "\t online all offline cpus"
@@ -209,85 +216,10 @@ else
echo -e "\t online all offline cpus"
fi
-#
-# Online all hot-pluggable CPUs
-#
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Offline all hot-pluggable CPUs
-#
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_success $cpu
-done
-
-#
-# Online all hot-pluggable CPUs again
-#
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Test with cpu notifier error injection
-#
+online_all_hot_pluggable_cpus
-DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
-NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+offline_all_hot_pluggable_cpus
-prerequisite_extra()
-{
- msg="skip extra tests:"
-
- /sbin/modprobe -q -r cpu-notifier-error-inject
- /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
-
- if [ ! -d "$DEBUGFS" ]; then
- echo $msg debugfs is not mounted >&2
- exit $ksft_skip
- fi
-
- if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
- echo $msg cpu-notifier-error-inject module is not available >&2
- exit $ksft_skip
- fi
-}
-
-prerequisite_extra
-
-#
-# Offline all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-add error handling (offline => online)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_fail $cpu
-done
-
-#
-# Online all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-remove error handling (online => offline)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_fail $cpu
-done
+online_all_hot_pluggable_cpus
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-/sbin/modprobe -q -r cpu-notifier-error-inject
+exit $retval
diff --git a/tools/testing/selftests/cpufreq/.gitignore b/tools/testing/selftests/cpufreq/.gitignore
new file mode 100644
index 000000000000..67604e91e068
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cpufreq_selftest.*
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
index c86ca8342222..9b2ccb10b0cf 100644
--- a/tools/testing/selftests/cpufreq/Makefile
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -3,6 +3,7 @@ all:
TEST_PROGS := main.sh
TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+EXTRA_CLEAN := cpufreq_selftest.dmesg_cpufreq.txt cpufreq_selftest.dmesg_full.txt cpufreq_selftest.txt
include ../lib.mk
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 27ff72ebd0f5..ce5068f5a6a2 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -5,11 +5,3 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PI_LIST=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index b583a2fb4504..9927b654fb8f 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -52,7 +52,14 @@ read_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
printf "$file:"
- cat $1/$file
+ #file is readable ?
+ local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }')
+
+ if [ ! -z $rfile ]; then
+ cat $1/$file
+ else
+ printf "$file is not readable\n"
+ fi
else
printf "\n"
read_cpufreq_files_in_dir "$1/$file"
@@ -83,10 +90,10 @@ update_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
- # file is writable ?
- local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+ # file is readable and writable ?
+ local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }')
- if [ ! -z $wfile ]; then
+ if [ ! -z $rwfile ]; then
# scaling_setspeed is a special file and we
# should skip updating it
if [ $file != "scaling_setspeed" ]; then
@@ -178,8 +185,7 @@ cpufreq_basic_tests()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 ]; then
- printf "No cpu is managed by cpufreq core, exiting\n"
- exit;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting\n"
else
printf "CPUFreq manages: $count CPUs\n\n"
fi
@@ -232,7 +238,23 @@ do_suspend()
for i in `seq 1 $2`; do
printf "Starting $1\n"
- echo $filename > $SYSFS/power/state
+
+ if [ "$3" = "rtc" ]; then
+ if ! command -v rtcwake &> /dev/null; then
+ printf "rtcwake could not be found, please install it.\n"
+ return 1
+ fi
+
+ rtcwake -m $filename -s 15
+
+ if [ $? -ne 0 ]; then
+ printf "Failed to suspend using RTC wake alarm\n"
+ return 1
+ fi
+ else
+ echo $filename > $SYSFS/power/state
+ fi
+
printf "Came out of $1\n"
printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 31f8c9a76c5f..f12ff7416e41 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -7,15 +7,15 @@ source governor.sh
source module.sh
source special-tests.sh
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
FUNC=basic # do basic tests by default
OUTFILE=cpufreq_selftest
SYSFS=
CPUROOT=
CPUFREQROOT=
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
helpme()
{
printf "Usage: $0 [-h] [-todg args]
@@ -24,6 +24,8 @@ helpme()
[-t <basic: Basic cpufreq testing
suspend: suspend/resume,
hibernate: hibernate/resume,
+ suspend_rtc: suspend/resume back using the RTC wakeup alarm,
+ hibernate_rtc: hibernate/resume back using the RTC wakeup alarm,
modtest: test driver or governor modules. Only to be used with -d or -g options,
sptest1: Simple governor switch to produce lockdep.
sptest2: Concurrent governor switch to produce lockdep.
@@ -32,7 +34,7 @@ helpme()
[-d <driver's module name: only with \"-t modtest>\"]
[-g <governor's module name: only with \"-t modtest>\"]
\n"
- exit 2
+ exit "${KSFT_FAIL}"
}
prerequisite()
@@ -40,8 +42,8 @@ prerequisite()
msg="skip all tests:"
if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
+ ktap_skip_all "$msg must be run as root"
+ exit "${KSFT_SKIP}"
fi
taskset -p 01 $$
@@ -49,21 +51,21 @@ prerequisite()
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
+ ktap_skip_all "$msg sysfs is not mounted"
+ exit "${KSFT_SKIP}"
fi
CPUROOT=$SYSFS/devices/system/cpu
CPUFREQROOT="$CPUROOT/cpufreq"
if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
- echo $msg cpus not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpus not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
- echo $msg cpufreq directory not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpufreq directory not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
}
@@ -76,7 +78,8 @@ parse_arguments()
helpme
;;
- t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+ t) # --func_type (Function to perform: basic, suspend, hibernate,
+ # suspend_rtc, hibernate_rtc, modtest, sptest1/2/3/4 (default: basic))
FUNC=$OPTARG
;;
@@ -105,8 +108,7 @@ do_test()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 -a $FUNC != "modtest" ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
case "$FUNC" in
@@ -122,11 +124,18 @@ do_test()
do_suspend "hibernate" 1
;;
+ "suspend_rtc")
+ do_suspend "suspend" 1 rtc
+ ;;
+
+ "hibernate_rtc")
+ do_suspend "hibernate" 1 rtc
+ ;;
+
"modtest")
# Do we have modules in place?
if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
- echo "No driver or governor module passed with -d or -g"
- exit 2;
+ ktap_exit_fail_msg "No driver or governor module passed with -d or -g"
fi
if [ $DRIVER_MOD ]; then
@@ -137,8 +146,7 @@ do_test()
fi
else
if [ $count = 0 ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
module_governor_test $GOVERNOR_MOD
@@ -162,7 +170,7 @@ do_test()
;;
*)
- echo "Invalid [-f] function type"
+ ktap_print_msg "Invalid [-f] function type"
helpme
;;
esac
@@ -186,13 +194,25 @@ dmesg_dumps()
dmesg >> $1.dmesg_full.txt
}
+ktap_print_header
+
# Parse arguments
parse_arguments $@
+ktap_set_plan 1
+
# Make sure all requirements are met
prerequisite
# Run requested functions
clear_dumps $OUTFILE
-do_test >> $OUTFILE.txt
+do_test | tee -a $OUTFILE.txt
+if [ "${PIPESTATUS[0]}" -ne 0 ]; then
+ exit ${PIPESTATUS[0]};
+fi
dmesg_dumps $OUTFILE
+
+ktap_test_pass "Completed successfully"
+
+ktap_print_totals
+exit "${KSFT_PASS}"
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
index 22563cd122e7..7f2667e0ae2d 100755
--- a/tools/testing/selftests/cpufreq/module.sh
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -24,16 +24,14 @@ test_basic_insmod_rmmod()
# insert module
insmod $1
if [ $? != 0 ]; then
- printf "Insmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "Insmod $1 failed\n"
fi
printf "Removing $1 module\n"
# remove module
rmmod $1
if [ $? != 0 ]; then
- printf "rmmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "rmmod $1 failed\n"
fi
printf "\n"
diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
new file mode 100644
index 000000000000..2f0297657c81
--- /dev/null
+++ b/tools/testing/selftests/damon/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+access_memory
+access_memory_even
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
new file mode 100644
index 000000000000..2180c328a825
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_GEN_FILES += access_memory access_memory_even
+
+TEST_FILES = _damon_sysfs.py
+TEST_FILES += drgn_dump_damon_status.py
+TEST_FILES += _common.sh
+
+# functionality tests
+TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs.py
+TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
+TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
+TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
+TEST_PROGS += reclaim.sh lru_sort.sh
+
+# regression tests (reproducers of previously found bugs)
+TEST_PROGS += sysfs_update_removed_scheme_dir.sh
+TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
+TEST_PROGS += sysfs_memcg_path_leak.sh
+TEST_PROGS += sysfs_no_op_commit_break.py
+
+EXTRA_CLEAN = __pycache__
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_common.sh b/tools/testing/selftests/damon/_common.sh
new file mode 100644
index 000000000000..0279698f733e
--- /dev/null
+++ b/tools/testing/selftests/damon/_common.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+check_dependencies()
+{
+ if [ $EUID -ne 0 ]
+ then
+ echo "Run as root"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
new file mode 100644
index 000000000000..748778b563cd
--- /dev/null
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -0,0 +1,837 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+
+ksft_skip=4
+
+sysfs_root = None
+with open('/proc/mounts', 'r') as f:
+ for line in f:
+ dev_name, mount_point, dev_fs = line.split()[:3]
+ if dev_fs == 'sysfs':
+ sysfs_root = '%s/kernel/mm/damon/admin' % mount_point
+ break
+if sysfs_root is None:
+ print('Seems sysfs not mounted?')
+ exit(ksft_skip)
+
+if not os.path.exists(sysfs_root):
+ print('Seems DAMON disabled?')
+ exit(ksft_skip)
+
+def write_file(path, string):
+ "Returns error string if failed, or None otherwise"
+ string = '%s' % string
+ try:
+ with open(path, 'w') as f:
+ f.write(string)
+ except Exception as e:
+ return '%s' % e
+ return None
+
+def read_file(path):
+ '''Returns the read content and error string. The read content is None if
+ the reading failed'''
+ try:
+ with open(path, 'r') as f:
+ return f.read(), None
+ except Exception as e:
+ return None, '%s' % e
+
+class DamosAccessPattern:
+ size = None
+ nr_accesses = None
+ age = None
+ scheme = None
+
+ def __init__(self, size=None, nr_accesses=None, age=None):
+ self.size = size
+ self.nr_accesses = nr_accesses
+ self.age = age
+
+ if self.size is None:
+ self.size = [0, 2**64 - 1]
+ if self.nr_accesses is None:
+ self.nr_accesses = [0, 2**32 - 1]
+ if self.age is None:
+ self.age = [0, 2**32 - 1]
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'access_pattern')
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0])
+ if err is not None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1])
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'),
+ self.nr_accesses[0])
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'),
+ self.nr_accesses[1])
+ if err is not None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0])
+ if err is not None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1])
+ if err is not None:
+ return err
+
+qgoal_metric_user_input = 'user_input'
+qgoal_metric_some_mem_psi_us = 'some_mem_psi_us'
+qgoal_metrics = [qgoal_metric_user_input, qgoal_metric_some_mem_psi_us]
+
+class DamosQuotaGoal:
+ metric = None
+ target_value = None
+ current_value = None
+ nid = None
+ effective_bytes = None
+ quota = None # owner quota
+ idx = None
+
+ def __init__(self, metric, target_value=10000, current_value=0, nid=0):
+ self.metric = metric
+ self.target_value = target_value
+ self.current_value = current_value
+ self.nid = nid
+
+ def sysfs_dir(self):
+ return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_metric'),
+ self.metric)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_value'),
+ self.target_value)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'current_value'),
+ self.current_value)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nid'), self.nid)
+ if err is not None:
+ return err
+
+ return None
+
+class DamosQuota:
+ sz = None # size quota, in bytes
+ ms = None # time quota
+ goals = None # quota goals
+ reset_interval_ms = None # quota reset interval
+ weight_sz_permil = None
+ weight_nr_accesses_permil = None
+ weight_age_permil = None
+ scheme = None # owner scheme
+
+ def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0,
+ weight_sz_permil=0, weight_nr_accesses_permil=0,
+ weight_age_permil=0):
+ self.sz = sz
+ self.ms = ms
+ self.reset_interval_ms = reset_interval_ms
+ self.weight_sz_permil = weight_sz_permil
+ self.weight_nr_accesses_permil = weight_nr_accesses_permil
+ self.weight_age_permil = weight_age_permil
+ self.goals = goals if goals is not None else []
+ for idx, goal in enumerate(self.goals):
+ goal.idx = idx
+ goal.quota = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'quotas')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'),
+ self.reset_interval_ms)
+ if err is not None:
+ return err
+
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'sz_permil'), self.weight_sz_permil)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'nr_accesses_permil'),
+ self.weight_nr_accesses_permil)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'age_permil'), self.weight_age_permil)
+ if err is not None:
+ return err
+
+ nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals')
+ content, err = read_file(nr_goals_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.goals):
+ err = write_file(nr_goals_file, len(self.goals))
+ if err is not None:
+ return err
+ for goal in self.goals:
+ err = goal.stage()
+ if err is not None:
+ return err
+ return None
+
+class DamosWatermarks:
+ metric = None
+ interval = None
+ high = None
+ mid = None
+ low = None
+ scheme = None # owner scheme
+
+ def __init__(self, metric='none', interval=0, high=0, mid=0, low=0):
+ self.metric = metric
+ self.interval = interval
+ self.high = high
+ self.mid = mid
+ self.low = low
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'watermarks')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'metric'), self.metric)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'interval_us'),
+ self.interval)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'high'), self.high)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'mid'), self.mid)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'low'), self.low)
+ if err is not None:
+ return err
+
+class DamosFilter:
+ type_ = None
+ matching = None
+ allow = None
+ memcg_path = None
+ addr_start = None
+ addr_end = None
+ target_idx = None
+ min_ = None
+ max_ = None
+ idx = None
+ filters = None # owner filters
+
+ def __init__(self, type_='anon', matching=False, allow=False,
+ memcg_path='', addr_start=0, addr_end=0, target_idx=0, min_=0,
+ max_=0):
+ self.type_ = type_
+ self.matching = matching
+ self.allow = allow
+ self.memcg_path = memcg_path,
+ self.addr_start = addr_start
+ self.addr_end = addr_end
+ self.target_idx = target_idx
+ self.min_ = min_
+ self.max_ = max_
+
+ def sysfs_dir(self):
+ return os.path.join(self.filters.sysfs_dir(), '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'type'), self.type_)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'matching'),
+ self.matching)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'allow'), self.allow)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'memcg_path'),
+ self.memcg_path)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'addr_start'),
+ self.addr_start)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'addr_end'),
+ self.addr_end)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'damon_target_idx'),
+ self.target_idx)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'min'), self.min_)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'max'), self.max_)
+ if err is not None:
+ return err
+ return None
+
+class DamosFilters:
+ name = None
+ filters = None
+ scheme = None # owner scheme
+
+ def __init__(self, name, filters=[]):
+ self.name = name
+ self.filters = filters
+ for idx, filter_ in enumerate(self.filters):
+ filter_.idx = idx
+ filter_.filters = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), self.name)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_filters'),
+ len(self.filters))
+ if err is not None:
+ return err
+ for filter_ in self.filters:
+ err = filter_.stage()
+ if err is not None:
+ return err
+ return None
+
+class DamosDest:
+ id = None
+ weight = None
+ idx = None
+ dests = None # owner dests
+
+ def __init__(self, id=0, weight=0):
+ self.id = id
+ self.weight = weight
+
+ def sysfs_dir(self):
+ return os.path.join(self.dests.sysfs_dir(), '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'id'), self.id)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'weight'), self.weight)
+ if err is not None:
+ return err
+ return None
+
+class DamosDests:
+ dests = None
+ scheme = None # owner scheme
+
+ def __init__(self, dests=[]):
+ self.dests = dests
+ for idx, dest in enumerate(self.dests):
+ dest.idx = idx
+ dest.dests = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'dests')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_dests'),
+ len(self.dests))
+ if err is not None:
+ return err
+ for dest in self.dests:
+ err = dest.stage()
+ if err is not None:
+ return err
+ return None
+
+class DamosStats:
+ nr_tried = None
+ sz_tried = None
+ nr_applied = None
+ sz_applied = None
+ qt_exceeds = None
+
+ def __init__(self, nr_tried, sz_tried, nr_applied, sz_applied, qt_exceeds):
+ self.nr_tried = nr_tried
+ self.sz_tried = sz_tried
+ self.nr_applied = nr_applied
+ self.sz_applied = sz_applied
+ self.qt_exceeds = qt_exceeds
+
+class DamosTriedRegion:
+ def __init__(self, start, end, nr_accesses, age):
+ self.start = start
+ self.end = end
+ self.nr_accesses = nr_accesses
+ self.age = age
+
+class Damos:
+ action = None
+ access_pattern = None
+ quota = None
+ watermarks = None
+ core_filters = None
+ ops_filters = None
+ filters = None
+ apply_interval_us = None
+ target_nid = None
+ dests = None
+ idx = None
+ context = None
+ tried_bytes = None
+ stats = None
+ tried_regions = None
+
+ def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
+ quota=DamosQuota(), watermarks=DamosWatermarks(),
+ core_filters=[], ops_filters=[], filters=[], target_nid=0,
+ dests=DamosDests(), apply_interval_us=0):
+ self.action = action
+ self.access_pattern = access_pattern
+ self.access_pattern.scheme = self
+ self.quota = quota
+ self.quota.scheme = self
+ self.watermarks = watermarks
+ self.watermarks.scheme = self
+
+ self.core_filters = DamosFilters(name='core_filters',
+ filters=core_filters)
+ self.core_filters.scheme = self
+ self.ops_filters = DamosFilters(name='ops_filters',
+ filters=ops_filters)
+ self.ops_filters.scheme = self
+ self.filters = DamosFilters(name='filters', filters=filters)
+ self.filters.scheme = self
+
+ self.target_nid = target_nid
+ self.dests = dests
+ self.dests.scheme = self
+
+ self.apply_interval_us = apply_interval_us
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'schemes', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action)
+ if err is not None:
+ return err
+ err = self.access_pattern.stage()
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'),
+ '%d' % self.apply_interval_us)
+ if err is not None:
+ return err
+
+ err = self.quota.stage()
+ if err is not None:
+ return err
+
+ err = self.watermarks.stage()
+ if err is not None:
+ return err
+
+ err = self.core_filters.stage()
+ if err is not None:
+ return err
+ err = self.ops_filters.stage()
+ if err is not None:
+ return err
+ err = self.filters.stage()
+ if err is not None:
+ return err
+
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_nid'), '%d' %
+ self.target_nid)
+ if err is not None:
+ return err
+
+ err = self.dests.stage()
+ if err is not None:
+ return err
+
+class DamonTarget:
+ pid = None
+ obsolete = None
+ # todo: Support target regions if test is made
+ idx = None
+ context = None
+
+ def __init__(self, pid, obsolete=False):
+ self.pid = pid
+ self.obsolete = obsolete
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'targets', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
+ if err is not None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+ if err is not None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'obsolete_target'),
+ 'Y' if self.obsolete else 'N')
+
+class IntervalsGoal:
+ access_bp = None
+ aggrs = None
+ min_sample_us = None
+ max_sample_us = None
+ attrs = None # owner DamonAttrs
+
+ def __init__(self, access_bp=0, aggrs=0, min_sample_us=0, max_sample_us=0):
+ self.access_bp = access_bp
+ self.aggrs = aggrs
+ self.min_sample_us = min_sample_us
+ self.max_sample_us = max_sample_us
+
+ def sysfs_dir(self):
+ return os.path.join(self.attrs.interval_sysfs_dir(), 'intervals_goal')
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'access_bp'), self.access_bp)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'aggrs'), self.aggrs)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'min_sample_us'),
+ self.min_sample_us)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'max_sample_us'),
+ self.max_sample_us)
+ if err is not None:
+ return err
+ return None
+
+class DamonAttrs:
+ sample_us = None
+ aggr_us = None
+ intervals_goal = None
+ update_us = None
+ min_nr_regions = None
+ max_nr_regions = None
+ context = None
+
+ def __init__(self, sample_us=5000, aggr_us=100000,
+ intervals_goal=IntervalsGoal(), update_us=1000000,
+ min_nr_regions=10, max_nr_regions=1000):
+ self.sample_us = sample_us
+ self.aggr_us = aggr_us
+ self.intervals_goal = intervals_goal
+ self.intervals_goal.attrs = self
+ self.update_us = update_us
+ self.min_nr_regions = min_nr_regions
+ self.max_nr_regions = max_nr_regions
+
+ def interval_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'intervals')
+
+ def nr_regions_range_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'nr_regions')
+
+ def stage(self):
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'),
+ self.sample_us)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'),
+ self.aggr_us)
+ if err is not None:
+ return err
+ err = self.intervals_goal.stage()
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
+ self.update_us)
+ if err is not None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'min'),
+ self.min_nr_regions)
+ if err is not None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'max'),
+ self.max_nr_regions)
+ if err is not None:
+ return err
+
+class DamonCtx:
+ ops = None
+ monitoring_attrs = None
+ targets = None
+ schemes = None
+ kdamond = None
+ idx = None
+
+ def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[],
+ schemes=[]):
+ self.ops = ops
+ self.monitoring_attrs = monitoring_attrs
+ self.monitoring_attrs.context = self
+
+ self.targets = targets
+ for idx, target in enumerate(self.targets):
+ target.idx = idx
+ target.context = self
+
+ self.schemes = schemes
+ for idx, scheme in enumerate(self.schemes):
+ scheme.idx = idx
+ scheme.context = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamond.sysfs_dir(), 'contexts',
+ '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'operations'), self.ops)
+ if err is not None:
+ return err
+ err = self.monitoring_attrs.stage()
+ if err is not None:
+ return err
+
+ nr_targets_file = os.path.join(
+ self.sysfs_dir(), 'targets', 'nr_targets')
+ content, err = read_file(nr_targets_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.targets):
+ err = write_file(nr_targets_file, '%d' % len(self.targets))
+ if err is not None:
+ return err
+ for target in self.targets:
+ err = target.stage()
+ if err is not None:
+ return err
+
+ nr_schemes_file = os.path.join(
+ self.sysfs_dir(), 'schemes', 'nr_schemes')
+ content, err = read_file(nr_schemes_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.schemes):
+ err = write_file(nr_schemes_file, '%d' % len(self.schemes))
+ if err is not None:
+ return err
+ for scheme in self.schemes:
+ err = scheme.stage()
+ if err is not None:
+ return err
+ return None
+
+class Kdamond:
+ state = None
+ pid = None
+ contexts = None
+ idx = None # index of this kdamond between siblings
+ kdamonds = None # parent
+
+ def __init__(self, contexts=[]):
+ self.contexts = contexts
+ for idx, context in enumerate(self.contexts):
+ context.idx = idx
+ context.kdamond = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx)
+
+ def start(self):
+ nr_contexts_file = os.path.join(self.sysfs_dir(),
+ 'contexts', 'nr_contexts')
+ content, err = read_file(nr_contexts_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.contexts):
+ err = write_file(nr_contexts_file, '%d' % len(self.contexts))
+ if err is not None:
+ return err
+
+ for context in self.contexts:
+ err = context.stage()
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
+ if err is not None:
+ return err
+ self.pid, err = read_file(os.path.join(self.sysfs_dir(), 'pid'))
+ return err
+
+ def stop(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'off')
+ return err
+
+ def update_schemes_tried_regions(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_tried_regions')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ tried_regions = []
+ tried_regions_dir = os.path.join(
+ scheme.sysfs_dir(), 'tried_regions')
+ region_indices = []
+ for filename in os.listdir(
+ os.path.join(scheme.sysfs_dir(), 'tried_regions')):
+ tried_region_dir = os.path.join(tried_regions_dir, filename)
+ if not os.path.isdir(tried_region_dir):
+ continue
+ region_indices.append(int(filename))
+ for region_idx in sorted(region_indices):
+ tried_region_dir = os.path.join(tried_regions_dir,
+ '%d' % region_idx)
+ region_values = []
+ for f in ['start', 'end', 'nr_accesses', 'age']:
+ content, err = read_file(
+ os.path.join(tried_region_dir, f))
+ if err is not None:
+ return err
+ region_values.append(int(content))
+ tried_regions.append(DamosTriedRegion(*region_values))
+ scheme.tried_regions = tried_regions
+
+ def update_schemes_tried_bytes(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_tried_bytes')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ content, err = read_file(os.path.join(scheme.sysfs_dir(),
+ 'tried_regions', 'total_bytes'))
+ if err is not None:
+ return err
+ scheme.tried_bytes = int(content)
+
+ def update_schemes_stats(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_stats')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ stat_values = []
+ for stat in ['nr_tried', 'sz_tried', 'nr_applied',
+ 'sz_applied', 'qt_exceeds']:
+ content, err = read_file(
+ os.path.join(scheme.sysfs_dir(), 'stats', stat))
+ if err is not None:
+ return err
+ stat_values.append(int(content))
+ scheme.stats = DamosStats(*stat_values)
+
+ def update_schemes_effective_quotas(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_effective_quotas')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ content, err = read_file(
+ os.path.join(scheme.quota.sysfs_dir(),
+ 'effective_bytes'))
+ if err is not None:
+ return err
+ goal.effective_bytes = int(content)
+ return None
+
+ def commit(self):
+ nr_contexts_file = os.path.join(self.sysfs_dir(),
+ 'contexts', 'nr_contexts')
+ content, err = read_file(nr_contexts_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.contexts):
+ err = write_file(nr_contexts_file, '%d' % len(self.contexts))
+ if err is not None:
+ return err
+
+ for context in self.contexts:
+ err = context.stage()
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'commit')
+ return err
+
+
+ def commit_schemes_quota_goals(self):
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ err = goal.stage()
+ if err is not None:
+ print('commit_schemes_quota_goals failed stagign: %s'%
+ err)
+ exit(1)
+ return write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'commit_schemes_quota_goals')
+
+class Kdamonds:
+ kdamonds = []
+
+ def __init__(self, kdamonds=[]):
+ self.kdamonds = kdamonds
+ for idx, kdamond in enumerate(self.kdamonds):
+ kdamond.idx = idx
+ kdamond.kdamonds = self
+
+ def sysfs_dir(self):
+ return os.path.join(sysfs_root, 'kdamonds')
+
+ def start(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'),
+ '%s' % len(self.kdamonds))
+ if err is not None:
+ return err
+ for kdamond in self.kdamonds:
+ err = kdamond.start()
+ if err is not None:
+ return err
+ return None
+
+ def stop(self):
+ for kdamond in self.kdamonds:
+ err = kdamond.stop()
+ if err is not None:
+ return err
+ return None
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
new file mode 100644
index 000000000000..56b17e8fe1be
--- /dev/null
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Artificial memory access program for testing DAMON.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+int main(int argc, char *argv[])
+{
+ char **regions;
+ clock_t start_clock;
+ int nr_regions;
+ int sz_region;
+ int access_time_ms;
+ int i;
+
+ if (argc != 4) {
+ printf("Usage: %s <number> <size (bytes)> <time (ms)>\n",
+ argv[0]);
+ return -1;
+ }
+
+ nr_regions = atoi(argv[1]);
+ sz_region = atoi(argv[2]);
+ access_time_ms = atoi(argv[3]);
+
+ regions = malloc(sizeof(*regions) * nr_regions);
+ for (i = 0; i < nr_regions; i++)
+ regions[i] = malloc(sz_region);
+
+ for (i = 0; i < nr_regions; i++) {
+ start_clock = clock();
+ while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
+ access_time_ms)
+ memset(regions[i], i, sz_region);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c
new file mode 100644
index 000000000000..93f3a71bcfd4
--- /dev/null
+++ b/tools/testing/selftests/damon/access_memory_even.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Artificial memory access program for testing DAMON.
+ *
+ * Receives number of regions and size of each region from user. Allocate the
+ * regions and repeatedly access even numbered (starting from zero) regions.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char *argv[])
+{
+ char **regions;
+ int nr_regions;
+ int sz_region;
+ int i;
+
+ if (argc != 3) {
+ printf("Usage: %s <number> <size (bytes)>\n", argv[0]);
+ return -1;
+ }
+
+ nr_regions = atoi(argv[1]);
+ sz_region = atoi(argv[2]);
+
+ regions = malloc(sizeof(*regions) * nr_regions);
+ for (i = 0; i < nr_regions; i++)
+ regions[i] = malloc(sz_region);
+
+ while (1) {
+ for (i = 0; i < nr_regions; i++) {
+ if (i % 2 == 0)
+ memset(regions[i], i, sz_region);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config
new file mode 100644
index 000000000000..a68a9fead5dc
--- /dev/null
+++ b/tools/testing/selftests/damon/config
@@ -0,0 +1,6 @@
+CONFIG_DAMON=y
+CONFIG_DAMON_SYSFS=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_RECLAIM=y
+CONFIG_DAMON_LRU_SORT=y
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
new file mode 100755
index 000000000000..58f3291fed12
--- /dev/null
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
+ '''
+ Create process of the given 'real_nr_regions' regions, monitor it using
+ DAMON with given '{min,max}_nr_regions' monitoring parameter.
+
+ Exit with non-zero return code if the given {min,max}_nr_regions is not
+ kept.
+ '''
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '%d' % real_nr_regions,
+ '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ min_nr_regions=min_nr_regions,
+ max_nr_regions=max_nr_regions),
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ collected_nr_regions = []
+ while proc.poll() is None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ collected_nr_regions.append(nr_tried_regions)
+ if len(collected_nr_regions) > 10:
+ break
+ proc.terminate()
+ kdamonds.stop()
+
+ test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
+ real_nr_regions, min_nr_regions, max_nr_regions)
+ collected_nr_regions.sort()
+ if (collected_nr_regions[0] < min_nr_regions or
+ collected_nr_regions[-1] > max_nr_regions):
+ print('fail %s' % test_name)
+ print('number of regions that collected are:')
+ for nr in collected_nr_regions:
+ print(nr)
+ exit(1)
+ print('pass %s ' % test_name)
+
+def main():
+ # test min_nr_regions larger than real nr regions
+ test_nr_regions(10, 20, 100)
+
+ # test max_nr_regions smaller than real nr regions
+ test_nr_regions(15, 3, 10)
+
+ # test online-tuned max_nr_regions that smaller than real nr regions
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ min_nr_regions=10, max_nr_regions=1000),
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ # wait until the real regions are found
+ time.sleep(3)
+
+ attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
+ attrs.min_nr_regions = 3
+ attrs.max_nr_regions = 7
+ attrs.update_us = 100000
+ err = kdamonds.kdamonds[0].commit()
+ if err is not None:
+ proc.terminate()
+ print('commit failed: %s' % err)
+ exit(1)
+ # wait for next merge operation is executed
+ time.sleep(0.3)
+
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ proc.terminate()
+
+ if nr_tried_regions > 7:
+ print('fail online-tuned max_nr_regions: %d > 7' % nr_tried_regions)
+ exit(1)
+ print('pass online-tuned max_nr_regions')
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_apply_interval.py b/tools/testing/selftests/damon/damos_apply_interval.py
new file mode 100755
index 000000000000..f04d43702481
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_apply_interval.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ # Set quota up to 1 MiB per 100 ms
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[
+ _damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ # aggregation interval (100 ms) is used
+ apply_interval_us=0),
+ # use 10ms apply interval
+ _damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ # explicitly set 10 ms apply interval
+ apply_interval_us=10 * 1000)
+ ] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ nr_quota_exceeds = 0
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_stats()
+ if err != None:
+ print('stats update failed: %s' % err)
+ exit(1)
+ schemes = kdamonds.kdamonds[0].contexts[0].schemes
+ nr_tried_stats = [s.stats.nr_tried for s in schemes]
+ if nr_tried_stats[0] == 0 or nr_tried_stats[1] == 0:
+ print('scheme(s) are not tried')
+ exit(1)
+
+ # Because the second scheme was having the apply interval that is ten times
+ # lower than that of the first scheme, the second scheme should be tried
+ # about ten times more frequently than the first scheme. For possible
+ # timing errors, check if it was at least nine times more freuqnetly tried.
+ ratio = nr_tried_stats[1] / nr_tried_stats[0]
+ if ratio < 9:
+ print('%d / %d = %f (< 9)' %
+ (nr_tried_stats[1], nr_tried_stats[0], ratio))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
new file mode 100755
index 000000000000..57c4937aaed2
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ # Set quota up to 1 MiB per 100 ms
+ sz_quota = 1024 * 1024 # 1 MiB
+ quota_reset_interval = 100 # 100 ms
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ quota=_damon_sysfs.DamosQuota(
+ sz=sz_quota, reset_interval_ms=quota_reset_interval)
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ nr_quota_exceeds = 0
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+ err = kdamonds.kdamonds[0].update_schemes_stats()
+ if err != None:
+ print('stats update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ wss_collected.append(scheme.tried_bytes)
+ nr_quota_exceeds = scheme.stats.qt_exceeds
+
+ wss_collected.sort()
+ nr_expected_quota_exceeds = 0
+ for wss in wss_collected:
+ if wss > sz_quota:
+ print('quota is not kept: %s > %s' % (wss, sz_quota))
+ print('collected samples are as below')
+ print('\n'.join(['%d' % wss for wss in wss_collected]))
+ exit(1)
+ if wss == sz_quota:
+ nr_expected_quota_exceeds += 1
+
+ if nr_quota_exceeds < nr_expected_quota_exceeds:
+ print('quota is exceeded less than expected: %d < %d' %
+ (nr_quota_exceeds, nr_expected_quota_exceeds))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
new file mode 100755
index 000000000000..f76e0412b564
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ goal = _damon_sysfs.DamosQuotaGoal(
+ metric=_damon_sysfs.qgoal_metric_user_input, target_value=10000)
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ action='stat',
+ quota=_damon_sysfs.DamosQuota(
+ goals=[goal], reset_interval_ms=100),
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ score_values_to_test = [0, 15000, 5000, 18000]
+ while proc.poll() == None:
+ if len(score_values_to_test) == 0:
+ time.sleep(0.1)
+ continue
+
+ goal.current_value = score_values_to_test.pop(0)
+ expect_increase = goal.current_value < goal.target_value
+
+ err = kdamonds.kdamonds[0].commit_schemes_quota_goals()
+ if err is not None:
+ print('commit_schemes_quota_goals failed: %s' % err)
+ exit(1)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('before-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+ time.sleep(0.5)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('after-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+
+ print('score: %s, effective quota: %d -> %d (%.3fx)' % (
+ goal.current_value, last_effective_bytes, goal.effective_bytes,
+ goal.effective_bytes / last_effective_bytes
+ if last_effective_bytes != 0 else -1.0))
+
+ if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
+ print('efective bytes not changed: %d' % goal.effective_bytes)
+ exit(1)
+
+ increased = last_effective_bytes < goal.effective_bytes
+ if expect_increase != increased:
+ print('expectation of increase (%s) != increased (%s)' %
+ (expect_increase, increased))
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_tried_regions.py b/tools/testing/selftests/damon/damos_tried_regions.py
new file mode 100755
index 000000000000..3b347eb28bd2
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_tried_regions.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # repeatedly access even-numbered ones in 14 regions of 10 MiB size
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ collected_nr_regions = []
+ while proc.poll() is None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ collected_nr_regions.append(nr_tried_regions)
+ if len(collected_nr_regions) > 10:
+ break
+ proc.terminate()
+
+ collected_nr_regions.sort()
+ sample = collected_nr_regions[4]
+ print('50-th percentile nr_regions: %d' % sample)
+ print('expectation (>= 14) is %s' % 'met' if sample >= 14 else 'not met')
+ if collected_nr_regions[4] < 14:
+ print('full nr_regions:')
+ print('\n'.join(collected_nr_regions))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
new file mode 100755
index 000000000000..5374d18d1fa8
--- /dev/null
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env drgn
+# SPDX-License-Identifier: GPL-2.0
+
+'''
+Read DAMON context data and dump as a json string.
+'''
+import drgn
+from drgn import FaultError, NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof
+from drgn.helpers.common import *
+from drgn.helpers.linux import *
+
+import json
+import sys
+
+if "prog" not in globals():
+ try:
+ prog = drgn.get_default_prog()
+ except drgn.NoDefaultProgramError:
+ prog = drgn.program_from_kernel()
+ drgn.set_default_prog(prog)
+
+def to_dict(object, attr_name_converter):
+ d = {}
+ for attr_name, converter in attr_name_converter:
+ d[attr_name] = converter(getattr(object, attr_name))
+ return d
+
+def ops_to_dict(ops):
+ return to_dict(ops, [
+ ['id', int],
+ ])
+
+def intervals_goal_to_dict(goal):
+ return to_dict(goal, [
+ ['access_bp', int],
+ ['aggrs', int],
+ ['min_sample_us', int],
+ ['max_sample_us', int],
+ ])
+
+def attrs_to_dict(attrs):
+ return to_dict(attrs, [
+ ['sample_interval', int],
+ ['aggr_interval', int],
+ ['ops_update_interval', int],
+ ['intervals_goal', intervals_goal_to_dict],
+ ['min_nr_regions', int],
+ ['max_nr_regions', int],
+ ])
+
+def addr_range_to_dict(addr_range):
+ return to_dict(addr_range, [
+ ['start', int],
+ ['end', int],
+ ])
+
+def region_to_dict(region):
+ return to_dict(region, [
+ ['ar', addr_range_to_dict],
+ ['sampling_addr', int],
+ ['nr_accesses', int],
+ ['nr_accesses_bp', int],
+ ['age', int],
+ ])
+
+def regions_to_list(regions):
+ return [region_to_dict(r)
+ for r in list_for_each_entry(
+ 'struct damon_region', regions.address_of_(), 'list')]
+
+def target_to_dict(target):
+ return to_dict(target, [
+ ['pid', int],
+ ['nr_regions', int],
+ ['regions_list', regions_to_list],
+ ['obsolete', bool],
+ ])
+
+def targets_to_list(targets):
+ return [target_to_dict(t)
+ for t in list_for_each_entry(
+ 'struct damon_target', targets.address_of_(), 'list')]
+
+def damos_access_pattern_to_dict(pattern):
+ return to_dict(pattern, [
+ ['min_sz_region', int],
+ ['max_sz_region', int],
+ ['min_nr_accesses', int],
+ ['max_nr_accesses', int],
+ ['min_age_region', int],
+ ['max_age_region', int],
+ ])
+
+def damos_quota_goal_to_dict(goal):
+ return to_dict(goal, [
+ ['metric', int],
+ ['target_value', int],
+ ['current_value', int],
+ ['last_psi_total', int],
+ ['nid', int],
+ ])
+
+def damos_quota_goals_to_list(goals):
+ return [damos_quota_goal_to_dict(g)
+ for g in list_for_each_entry(
+ 'struct damos_quota_goal', goals.address_of_(), 'list')]
+
+def damos_quota_to_dict(quota):
+ return to_dict(quota, [
+ ['reset_interval', int],
+ ['ms', int], ['sz', int],
+ ['goals', damos_quota_goals_to_list],
+ ['esz', int],
+ ['weight_sz', int],
+ ['weight_nr_accesses', int],
+ ['weight_age', int],
+ ])
+
+def damos_watermarks_to_dict(watermarks):
+ return to_dict(watermarks, [
+ ['metric', int],
+ ['interval', int],
+ ['high', int], ['mid', int], ['low', int],
+ ])
+
+def damos_migrate_dests_to_dict(dests):
+ nr_dests = int(dests.nr_dests)
+ node_id_arr = []
+ weight_arr = []
+ for i in range(nr_dests):
+ node_id_arr.append(int(dests.node_id_arr[i]))
+ weight_arr.append(int(dests.weight_arr[i]))
+ return {
+ 'node_id_arr': node_id_arr,
+ 'weight_arr': weight_arr,
+ 'nr_dests': nr_dests,
+ }
+
+def damos_filter_to_dict(damos_filter):
+ filter_type_keyword = {
+ 0: 'anon',
+ 1: 'active',
+ 2: 'memcg',
+ 3: 'young',
+ 4: 'hugepage_size',
+ 5: 'unmapped',
+ 6: 'addr',
+ 7: 'target'
+ }
+ dict_ = {
+ 'type': filter_type_keyword[int(damos_filter.type)],
+ 'matching': bool(damos_filter.matching),
+ 'allow': bool(damos_filter.allow),
+ }
+ type_ = dict_['type']
+ if type_ == 'memcg':
+ dict_['memcg_id'] = int(damos_filter.memcg_id)
+ elif type_ == 'addr':
+ dict_['addr_range'] = [int(damos_filter.addr_range.start),
+ int(damos_filter.addr_range.end)]
+ elif type_ == 'target':
+ dict_['target_idx'] = int(damos_filter.target_idx)
+ elif type_ == 'hugeapge_size':
+ dict_['sz_range'] = [int(damos_filter.sz_range.min),
+ int(damos_filter.sz_range.max)]
+ return dict_
+
+def scheme_to_dict(scheme):
+ dict_ = to_dict(scheme, [
+ ['pattern', damos_access_pattern_to_dict],
+ ['action', int],
+ ['apply_interval_us', int],
+ ['quota', damos_quota_to_dict],
+ ['wmarks', damos_watermarks_to_dict],
+ ['target_nid', int],
+ ['migrate_dests', damos_migrate_dests_to_dict],
+ ])
+ core_filters = []
+ for f in list_for_each_entry(
+ 'struct damos_filter', scheme.core_filters.address_of_(), 'list'):
+ core_filters.append(damos_filter_to_dict(f))
+ dict_['core_filters'] = core_filters
+ ops_filters = []
+ for f in list_for_each_entry(
+ 'struct damos_filter', scheme.ops_filters.address_of_(), 'list'):
+ ops_filters.append(damos_filter_to_dict(f))
+ dict_['ops_filters'] = ops_filters
+
+ return dict_
+
+def schemes_to_list(schemes):
+ return [scheme_to_dict(s)
+ for s in list_for_each_entry(
+ 'struct damos', schemes.address_of_(), 'list')]
+
+def damon_ctx_to_dict(ctx):
+ return to_dict(ctx, [
+ ['ops', ops_to_dict],
+ ['attrs', attrs_to_dict],
+ ['adaptive_targets', targets_to_list],
+ ['schemes', schemes_to_list],
+ ])
+
+def main():
+ if len(sys.argv) < 3:
+ print('Usage: %s <kdamond pid> <file>' % sys.argv[0])
+ exit(1)
+
+ pid = int(sys.argv[1])
+ file_to_store = sys.argv[2]
+
+ kthread_data = cast('struct kthread *',
+ find_task(prog, pid).worker_private).data
+ ctx = cast('struct damon_ctx *', kthread_data)
+ status = {'contexts': [damon_ctx_to_dict(ctx)]}
+ if file_to_store == 'stdout':
+ print(json.dumps(status, indent=4))
+ else:
+ with open(file_to_store, 'w') as f:
+ json.dump(status, f, indent=4)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
new file mode 100755
index 000000000000..1e4849db78a9
--- /dev/null
+++ b/tools/testing/selftests/damon/lru_sort.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _common.sh
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_dependencies
+
+damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled"
+if [ ! -f "$damon_lru_sort_enabled" ]
+then
+ echo "No 'enabled' file. Maybe DAMON_LRU_SORT not built"
+ exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "Another kdamond is running"
+ exit $ksft_skip
+fi
+
+echo Y > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+ echo "kdamond is not turned on"
+ exit 1
+fi
+
+echo N > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "kdamond is not turned off"
+ exit 1
+fi
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
new file mode 100755
index 000000000000..e56ceb035129
--- /dev/null
+++ b/tools/testing/selftests/damon/reclaim.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _common.sh
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_dependencies
+
+damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled"
+if [ ! -f "$damon_reclaim_enabled" ]
+then
+ echo "No 'enabled' file. Maybe DAMON_RECLAIM not built"
+ exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "Another kdamond is running"
+ exit $ksft_skip
+fi
+
+echo Y > "$damon_reclaim_enabled"
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+ echo "kdamond is not turned on"
+ exit 1
+fi
+
+echo N > "$damon_reclaim_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "kdamond is not turned off"
+ exit 1
+fi
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
new file mode 100755
index 000000000000..9cca71eb0325
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+ try:
+ subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+ except:
+ return None, 'drgn not found'
+ file_dir = os.path.dirname(os.path.abspath(__file__))
+ dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+ rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+ stderr=subprocess.DEVNULL)
+ if rc != 0:
+ return None, 'drgn fail'
+ try:
+ with open('damon_dump_output', 'r') as f:
+ return json.load(f), None
+ except Exception as e:
+ return None, 'json.load fail (%s)' % e
+
+def fail(expectation, status):
+ print('unexpected %s' % expectation)
+ print(json.dumps(status, indent=4))
+ exit(1)
+
+def assert_true(condition, expectation, status):
+ if condition is not True:
+ fail(expectation, status)
+
+def assert_watermarks_committed(watermarks, dump):
+ wmark_metric_val = {
+ 'none': 0,
+ 'free_mem_rate': 1,
+ }
+ assert_true(dump['metric'] == wmark_metric_val[watermarks.metric],
+ 'metric', dump)
+ assert_true(dump['interval'] == watermarks.interval, 'interval', dump)
+ assert_true(dump['high'] == watermarks.high, 'high', dump)
+ assert_true(dump['mid'] == watermarks.mid, 'mid', dump)
+ assert_true(dump['low'] == watermarks.low, 'low', dump)
+
+def assert_quota_goal_committed(qgoal, dump):
+ metric_val = {
+ 'user_input': 0,
+ 'some_mem_psi_us': 1,
+ 'node_mem_used_bp': 2,
+ 'node_mem_free_bp': 3,
+ }
+ assert_true(dump['metric'] == metric_val[qgoal.metric], 'metric', dump)
+ assert_true(dump['target_value'] == qgoal.target_value, 'target_value',
+ dump)
+ if qgoal.metric == 'user_input':
+ assert_true(dump['current_value'] == qgoal.current_value,
+ 'current_value', dump)
+ assert_true(dump['nid'] == qgoal.nid, 'nid', dump)
+
+def assert_quota_committed(quota, dump):
+ assert_true(dump['reset_interval'] == quota.reset_interval_ms,
+ 'reset_interval', dump)
+ assert_true(dump['ms'] == quota.ms, 'ms', dump)
+ assert_true(dump['sz'] == quota.sz, 'sz', dump)
+ for idx, qgoal in enumerate(quota.goals):
+ assert_quota_goal_committed(qgoal, dump['goals'][idx])
+ assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump)
+ assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil,
+ 'weight_nr_accesses', dump)
+ assert_true(
+ dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump)
+
+
+def assert_migrate_dests_committed(dests, dump):
+ assert_true(dump['nr_dests'] == len(dests.dests), 'nr_dests', dump)
+ for idx, dest in enumerate(dests.dests):
+ assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump)
+ assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump)
+
+def assert_filter_committed(filter_, dump):
+ assert_true(filter_.type_ == dump['type'], 'type', dump)
+ assert_true(filter_.matching == dump['matching'], 'matching', dump)
+ assert_true(filter_.allow == dump['allow'], 'allow', dump)
+ # TODO: check memcg_path and memcg_id if type is memcg
+ if filter_.type_ == 'addr':
+ assert_true([filter_.addr_start, filter_.addr_end] ==
+ dump['addr_range'], 'addr_range', dump)
+ elif filter_.type_ == 'target':
+ assert_true(filter_.target_idx == dump['target_idx'], 'target_idx',
+ dump)
+ elif filter_.type_ == 'hugepage_size':
+ assert_true([filter_.min_, filter_.max_] == dump['sz_range'],
+ 'sz_range', dump)
+
+def assert_access_pattern_committed(pattern, dump):
+ assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region',
+ dump)
+ assert_true(dump['max_sz_region'] == pattern.size[1], 'max_sz_region',
+ dump)
+ assert_true(dump['min_nr_accesses'] == pattern.nr_accesses[0],
+ 'min_nr_accesses', dump)
+ assert_true(dump['max_nr_accesses'] == pattern.nr_accesses[1],
+ 'max_nr_accesses', dump)
+ assert_true(dump['min_age_region'] == pattern.age[0], 'min_age_region',
+ dump)
+ assert_true(dump['max_age_region'] == pattern.age[1], 'miaxage_region',
+ dump)
+
+def assert_scheme_committed(scheme, dump):
+ assert_access_pattern_committed(scheme.access_pattern, dump['pattern'])
+ action_val = {
+ 'willneed': 0,
+ 'cold': 1,
+ 'pageout': 2,
+ 'hugepage': 3,
+ 'nohugeapge': 4,
+ 'lru_prio': 5,
+ 'lru_deprio': 6,
+ 'migrate_hot': 7,
+ 'migrate_cold': 8,
+ 'stat': 9,
+ }
+ assert_true(dump['action'] == action_val[scheme.action], 'action', dump)
+ assert_true(dump['apply_interval_us'] == scheme. apply_interval_us,
+ 'apply_interval_us', dump)
+ assert_true(dump['target_nid'] == scheme.target_nid, 'target_nid', dump)
+ assert_migrate_dests_committed(scheme.dests, dump['migrate_dests'])
+ assert_quota_committed(scheme.quota, dump['quota'])
+ assert_watermarks_committed(scheme.watermarks, dump['wmarks'])
+ # TODO: test filters directory
+ for idx, f in enumerate(scheme.core_filters.filters):
+ assert_filter_committed(f, dump['core_filters'][idx])
+ for idx, f in enumerate(scheme.ops_filters.filters):
+ assert_filter_committed(f, dump['ops_filters'][idx])
+
+def assert_schemes_committed(schemes, dump):
+ assert_true(len(schemes) == len(dump), 'len_schemes', dump)
+ for idx, scheme in enumerate(schemes):
+ assert_scheme_committed(scheme, dump[idx])
+
+def assert_monitoring_attrs_committed(attrs, dump):
+ assert_true(dump['sample_interval'] == attrs.sample_us, 'sample_interval',
+ dump)
+ assert_true(dump['aggr_interval'] == attrs.aggr_us, 'aggr_interval', dump)
+ assert_true(dump['intervals_goal']['access_bp'] ==
+ attrs.intervals_goal.access_bp, 'access_bp',
+ dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['aggrs'] == attrs.intervals_goal.aggrs,
+ 'aggrs', dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['min_sample_us'] ==
+ attrs.intervals_goal.min_sample_us, 'min_sample_us',
+ dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['max_sample_us'] ==
+ attrs.intervals_goal.max_sample_us, 'max_sample_us',
+ dump['intervals_goal'])
+
+ assert_true(dump['ops_update_interval'] == attrs.update_us,
+ 'ops_update_interval', dump)
+ assert_true(dump['min_nr_regions'] == attrs.min_nr_regions,
+ 'min_nr_regions', dump)
+ assert_true(dump['max_nr_regions'] == attrs.max_nr_regions,
+ 'max_nr_regions', dump)
+
+def assert_monitoring_target_committed(target, dump):
+ # target.pid is the pid "number", while dump['pid'] is 'struct pid'
+ # pointer, and hence cannot be compared.
+ assert_true(dump['obsolete'] == target.obsolete, 'target obsolete', dump)
+
+def assert_monitoring_targets_committed(targets, dump):
+ assert_true(len(targets) == len(dump), 'len_targets', dump)
+ for idx, target in enumerate(targets):
+ assert_monitoring_target_committed(target, dump[idx])
+
+def assert_ctx_committed(ctx, dump):
+ ops_val = {
+ 'vaddr': 0,
+ 'fvaddr': 1,
+ 'paddr': 2,
+ }
+ assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump)
+ assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
+ assert_monitoring_targets_committed(ctx.targets, dump['adaptive_targets'])
+ assert_schemes_committed(ctx.schemes, dump['schemes'])
+
+def assert_ctxs_committed(kdamonds):
+ status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print(err)
+ kdamonds.stop()
+ exit(1)
+
+ ctxs = kdamonds.kdamonds[0].contexts
+ dump = status['contexts']
+ assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
+ for idx, ctx in enumerate(ctxs):
+ assert_ctx_committed(ctx, dump[idx])
+
+def main():
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ targets=[_damon_sysfs.DamonTarget(pid=-1)],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ assert_ctxs_committed(kdamonds)
+
+ context = _damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ sample_us=100000, aggr_us=2000000,
+ intervals_goal=_damon_sysfs.IntervalsGoal(
+ access_bp=400, aggrs=3, min_sample_us=5000,
+ max_sample_us=10000000),
+ update_us=2000000),
+ schemes=[_damon_sysfs.Damos(
+ action='pageout',
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ size=[4096, 2**10],
+ nr_accesses=[3, 317],
+ age=[5,71]),
+ quota=_damon_sysfs.DamosQuota(
+ sz=100*1024*1024, ms=100,
+ goals=[_damon_sysfs.DamosQuotaGoal(
+ metric='node_mem_used_bp',
+ target_value=9950,
+ nid=1)],
+ reset_interval_ms=1500,
+ weight_sz_permil=20,
+ weight_nr_accesses_permil=200,
+ weight_age_permil=1000),
+ watermarks=_damon_sysfs.DamosWatermarks(
+ metric = 'free_mem_rate', interval = 500000, # 500 ms
+ high = 500, mid = 400, low = 50),
+ target_nid=1,
+ apply_interval_us=1000000,
+ dests=_damon_sysfs.DamosDests(
+ dests=[_damon_sysfs.DamosDest(id=1, weight=30),
+ _damon_sysfs.DamosDest(id=0, weight=70)]),
+ core_filters=[
+ _damon_sysfs.DamosFilter(type_='addr', matching=True,
+ allow=False, addr_start=42,
+ addr_end=4242),
+ ],
+ ops_filters=[
+ _damon_sysfs.DamosFilter(type_='anon', matching=True,
+ allow=True),
+ ],
+ )])
+ context.idx = 0
+ context.kdamond = kdamonds.kdamonds[0]
+ kdamonds.kdamonds[0].contexts = [context]
+ kdamonds.kdamonds[0].commit()
+
+ assert_ctxs_committed(kdamonds)
+
+ # test online commitment of minimum context.
+ context = _damon_sysfs.DamonCtx()
+ context.idx = 0
+ context.kdamond = kdamonds.kdamonds[0]
+ kdamonds.kdamonds[0].contexts = [context]
+ kdamonds.kdamonds[0].commit()
+
+ assert_ctxs_committed(kdamonds)
+
+ kdamonds.stop()
+
+ # test obsolete_target.
+ proc1 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc2 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc3 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[
+ _damon_sysfs.DamonTarget(pid=proc1.pid),
+ _damon_sysfs.DamonTarget(pid=proc2.pid),
+ _damon_sysfs.DamonTarget(pid=proc3.pid),
+ ],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+ kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+ kdamonds.kdamonds[0].commit()
+ del kdamonds.kdamonds[0].contexts[0].targets[1]
+ assert_ctxs_committed(kdamonds)
+ kdamonds.stop()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
new file mode 100755
index 000000000000..83e3b7f63d81
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -0,0 +1,370 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _common.sh
+
+# Kselftest frmework requirement - SKIP code is 4.
+ksft_skip=4
+
+ensure_write_succ()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if ! echo "$content" > "$file"
+ then
+ echo "writing $content to $file failed"
+ echo "expected success because $reason"
+ exit 1
+ fi
+}
+
+ensure_write_fail()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if (echo "$content" > "$file") 2> /dev/null
+ then
+ echo "writing $content to $file succeed ($fail_reason)"
+ echo "expected failure because $reason"
+ exit 1
+ fi
+}
+
+ensure_dir()
+{
+ dir=$1
+ to_ensure=$2
+ if [ "$to_ensure" = "exist" ] && [ ! -d "$dir" ]
+ then
+ echo "$dir dir is expected but not found"
+ exit 1
+ elif [ "$to_ensure" = "not_exist" ] && [ -d "$dir" ]
+ then
+ echo "$dir dir is not expected but found"
+ exit 1
+ fi
+}
+
+ensure_file()
+{
+ file=$1
+ to_ensure=$2
+ permission=$3
+ if [ "$to_ensure" = "exist" ]
+ then
+ if [ ! -f "$file" ]
+ then
+ echo "$file is expected but not found"
+ exit 1
+ fi
+ perm=$(stat -c "%a" "$file")
+ if [ ! "$perm" = "$permission" ]
+ then
+ echo "$file permission: expected $permission but $perm"
+ exit 1
+ fi
+ elif [ "$to_ensure" = "not_exist" ] && [ -f "$dir" ]
+ then
+ echo "$file is not expected but found"
+ exit 1
+ fi
+}
+
+test_range()
+{
+ range_dir=$1
+ ensure_dir "$range_dir" "exist"
+ ensure_file "$range_dir/min" "exist" 600
+ ensure_file "$range_dir/max" "exist" 600
+}
+
+test_tried_regions()
+{
+ tried_regions_dir=$1
+ ensure_dir "$tried_regions_dir" "exist"
+ ensure_file "$tried_regions_dir/total_bytes" "exist" "400"
+}
+
+test_stats()
+{
+ stats_dir=$1
+ ensure_dir "$stats_dir" "exist"
+ for f in nr_tried sz_tried nr_applied sz_applied qt_exceeds
+ do
+ ensure_file "$stats_dir/$f" "exist" "400"
+ done
+}
+
+test_filter()
+{
+ filter_dir=$1
+ ensure_file "$filter_dir/type" "exist" "600"
+ ensure_write_succ "$filter_dir/type" "anon" "valid input"
+ ensure_write_succ "$filter_dir/type" "memcg" "valid input"
+ ensure_write_succ "$filter_dir/type" "addr" "valid input"
+ ensure_write_succ "$filter_dir/type" "target" "valid input"
+ ensure_write_fail "$filter_dir/type" "foo" "invalid input"
+ ensure_file "$filter_dir/matching" "exist" "600"
+ ensure_file "$filter_dir/memcg_path" "exist" "600"
+ ensure_file "$filter_dir/addr_start" "exist" "600"
+ ensure_file "$filter_dir/addr_end" "exist" "600"
+ ensure_file "$filter_dir/damon_target_idx" "exist" "600"
+}
+
+test_filters()
+{
+ filters_dir=$1
+ ensure_dir "$filters_dir" "exist"
+ ensure_file "$filters_dir/nr_filters" "exist" "600"
+ ensure_write_succ "$filters_dir/nr_filters" "1" "valid input"
+ test_filter "$filters_dir/0"
+
+ ensure_write_succ "$filters_dir/nr_filters" "2" "valid input"
+ test_filter "$filters_dir/0"
+ test_filter "$filters_dir/1"
+
+ ensure_write_succ "$filters_dir/nr_filters" "0" "valid input"
+ ensure_dir "$filters_dir/0" "not_exist"
+ ensure_dir "$filters_dir/1" "not_exist"
+}
+
+test_watermarks()
+{
+ watermarks_dir=$1
+ ensure_dir "$watermarks_dir" "exist"
+ ensure_file "$watermarks_dir/metric" "exist" "600"
+ ensure_file "$watermarks_dir/interval_us" "exist" "600"
+ ensure_file "$watermarks_dir/high" "exist" "600"
+ ensure_file "$watermarks_dir/mid" "exist" "600"
+ ensure_file "$watermarks_dir/low" "exist" "600"
+}
+
+test_weights()
+{
+ weights_dir=$1
+ ensure_dir "$weights_dir" "exist"
+ ensure_file "$weights_dir/sz_permil" "exist" "600"
+ ensure_file "$weights_dir/nr_accesses_permil" "exist" "600"
+ ensure_file "$weights_dir/age_permil" "exist" "600"
+}
+
+test_goal()
+{
+ goal_dir=$1
+ ensure_dir "$goal_dir" "exist"
+ ensure_file "$goal_dir/target_value" "exist" "600"
+ ensure_file "$goal_dir/current_value" "exist" "600"
+}
+
+test_goals()
+{
+ goals_dir=$1
+ ensure_dir "$goals_dir" "exist"
+ ensure_file "$goals_dir/nr_goals" "exist" "600"
+
+ ensure_write_succ "$goals_dir/nr_goals" "1" "valid input"
+ test_goal "$goals_dir/0"
+
+ ensure_write_succ "$goals_dir/nr_goals" "2" "valid input"
+ test_goal "$goals_dir/0"
+ test_goal "$goals_dir/1"
+
+ ensure_write_succ "$goals_dir/nr_goals" "0" "valid input"
+ ensure_dir "$goals_dir/0" "not_exist"
+ ensure_dir "$goals_dir/1" "not_exist"
+}
+
+test_quotas()
+{
+ quotas_dir=$1
+ ensure_dir "$quotas_dir" "exist"
+ ensure_file "$quotas_dir/ms" "exist" 600
+ ensure_file "$quotas_dir/bytes" "exist" 600
+ ensure_file "$quotas_dir/reset_interval_ms" "exist" 600
+ test_weights "$quotas_dir/weights"
+ test_goals "$quotas_dir/goals"
+}
+
+test_access_pattern()
+{
+ access_pattern_dir=$1
+ ensure_dir "$access_pattern_dir" "exist"
+ test_range "$access_pattern_dir/age"
+ test_range "$access_pattern_dir/nr_accesses"
+ test_range "$access_pattern_dir/sz"
+}
+
+test_scheme()
+{
+ scheme_dir=$1
+ ensure_dir "$scheme_dir" "exist"
+ ensure_file "$scheme_dir/action" "exist" "600"
+ test_access_pattern "$scheme_dir/access_pattern"
+ ensure_file "$scheme_dir/apply_interval_us" "exist" "600"
+ test_quotas "$scheme_dir/quotas"
+ test_watermarks "$scheme_dir/watermarks"
+ test_filters "$scheme_dir/filters"
+ test_stats "$scheme_dir/stats"
+ test_tried_regions "$scheme_dir/tried_regions"
+}
+
+test_schemes()
+{
+ schemes_dir=$1
+ ensure_dir "$schemes_dir" "exist"
+ ensure_file "$schemes_dir/nr_schemes" "exist" 600
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "1" "valid input"
+ test_scheme "$schemes_dir/0"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "2" "valid input"
+ test_scheme "$schemes_dir/0"
+ test_scheme "$schemes_dir/1"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "0" "valid input"
+ ensure_dir "$schemes_dir/0" "not_exist"
+ ensure_dir "$schemes_dir/1" "not_exist"
+}
+
+test_region()
+{
+ region_dir=$1
+ ensure_dir "$region_dir" "exist"
+ ensure_file "$region_dir/start" "exist" 600
+ ensure_file "$region_dir/end" "exist" 600
+}
+
+test_regions()
+{
+ regions_dir=$1
+ ensure_dir "$regions_dir" "exist"
+ ensure_file "$regions_dir/nr_regions" "exist" 600
+
+ ensure_write_succ "$regions_dir/nr_regions" "1" "valid input"
+ test_region "$regions_dir/0"
+
+ ensure_write_succ "$regions_dir/nr_regions" "2" "valid input"
+ test_region "$regions_dir/0"
+ test_region "$regions_dir/1"
+
+ ensure_write_succ "$regions_dir/nr_regions" "0" "valid input"
+ ensure_dir "$regions_dir/0" "not_exist"
+ ensure_dir "$regions_dir/1" "not_exist"
+}
+
+test_target()
+{
+ target_dir=$1
+ ensure_dir "$target_dir" "exist"
+ ensure_file "$target_dir/pid_target" "exist" "600"
+ test_regions "$target_dir/regions"
+}
+
+test_targets()
+{
+ targets_dir=$1
+ ensure_dir "$targets_dir" "exist"
+ ensure_file "$targets_dir/nr_targets" "exist" 600
+
+ ensure_write_succ "$targets_dir/nr_targets" "1" "valid input"
+ test_target "$targets_dir/0"
+
+ ensure_write_succ "$targets_dir/nr_targets" "2" "valid input"
+ test_target "$targets_dir/0"
+ test_target "$targets_dir/1"
+
+ ensure_write_succ "$targets_dir/nr_targets" "0" "valid input"
+ ensure_dir "$targets_dir/0" "not_exist"
+ ensure_dir "$targets_dir/1" "not_exist"
+}
+
+test_intervals()
+{
+ intervals_dir=$1
+ ensure_dir "$intervals_dir" "exist"
+ ensure_file "$intervals_dir/aggr_us" "exist" "600"
+ ensure_file "$intervals_dir/sample_us" "exist" "600"
+ ensure_file "$intervals_dir/update_us" "exist" "600"
+}
+
+test_monitoring_attrs()
+{
+ monitoring_attrs_dir=$1
+ ensure_dir "$monitoring_attrs_dir" "exist"
+ test_intervals "$monitoring_attrs_dir/intervals"
+ test_range "$monitoring_attrs_dir/nr_regions"
+}
+
+test_context()
+{
+ context_dir=$1
+ ensure_dir "$context_dir" "exist"
+ ensure_file "$context_dir/avail_operations" "exit" 400
+ ensure_file "$context_dir/operations" "exist" 600
+ test_monitoring_attrs "$context_dir/monitoring_attrs"
+ test_targets "$context_dir/targets"
+ test_schemes "$context_dir/schemes"
+}
+
+test_contexts()
+{
+ contexts_dir=$1
+ ensure_dir "$contexts_dir" "exist"
+ ensure_file "$contexts_dir/nr_contexts" "exist" 600
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "1" "valid input"
+ test_context "$contexts_dir/0"
+
+ ensure_write_fail "$contexts_dir/nr_contexts" "2" "only 0/1 are supported"
+ test_context "$contexts_dir/0"
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "0" "valid input"
+ ensure_dir "$contexts_dir/0" "not_exist"
+}
+
+test_kdamond()
+{
+ kdamond_dir=$1
+ ensure_dir "$kdamond_dir" "exist"
+ ensure_file "$kdamond_dir/state" "exist" "600"
+ ensure_file "$kdamond_dir/pid" "exist" 400
+ test_contexts "$kdamond_dir/contexts"
+}
+
+test_kdamonds()
+{
+ kdamonds_dir=$1
+ ensure_dir "$kdamonds_dir" "exist"
+
+ ensure_file "$kdamonds_dir/nr_kdamonds" "exist" "600"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "1" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "2" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+ test_kdamond "$kdamonds_dir/1"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "0" "valid input"
+ ensure_dir "$kdamonds_dir/0" "not_exist"
+ ensure_dir "$kdamonds_dir/1" "not_exist"
+}
+
+test_damon_sysfs()
+{
+ damon_sysfs=$1
+ if [ ! -d "$damon_sysfs" ]
+ then
+ echo "$damon_sysfs not found"
+ exit $ksft_skip
+ fi
+
+ test_kdamonds "$damon_sysfs/kdamonds"
+}
+
+check_dependencies
+test_damon_sysfs "/sys/kernel/mm/damon/admin"
diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
new file mode 100755
index 000000000000..64c5d8c518a4
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $EUID -ne 0 ]
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+ echo "damon sysfs not found"
+ exit $ksft_skip
+fi
+
+# ensure filter directory
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters"
+
+filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0"
+
+before_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+
+# try to leak 3000 KiB
+for i in {1..102400};
+do
+ echo "012345678901234567890123456789" > "$filter_dir/memcg_path"
+done
+
+after_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+# expect up to 1500 KiB free from other tasks memory
+expected_after_kb_max=$((before_kb + 1500))
+
+if [ "$after_kb" -gt "$expected_after_kb_max" ]
+then
+ echo "maybe memcg_path are leaking: $before_kb -> $after_kb"
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
new file mode 100755
index 000000000000..2c65cffe6b54
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+import sys
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+ try:
+ subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+ except:
+ return None, 'drgn not found'
+ file_dir = os.path.dirname(os.path.abspath(__file__))
+ dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+ rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+ stderr=subprocess.DEVNULL)
+
+ if rc != 0:
+ return None, f'drgn fail: return code({rc})'
+ try:
+ with open('damon_dump_output', 'r') as f:
+ return json.load(f), None
+ except Exception as e:
+ return None, 'json.load fail (%s)' % e
+
+def main():
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ schemes=[_damon_sysfs.Damos(
+ ops_filters=[
+ _damon_sysfs.DamosFilter(
+ type_='anon',
+ matching=True,
+ allow=True,
+ )
+ ]
+ )],
+ )])]
+ )
+
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ before_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('before-commit status dump failed: %s' % err)
+ exit(1)
+
+ kdamonds.kdamonds[0].commit()
+
+ after_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('after-commit status dump failed: %s' % err)
+ exit(1)
+
+ if before_commit_status != after_commit_status:
+ print(f'before: {json.dumps(before_commit_status, indent=2)}')
+ print(f'after: {json.dumps(after_commit_status, indent=2)}')
+ exit(1)
+
+ kdamonds.stop()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
new file mode 100755
index 000000000000..35fc32beeaf7
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _common.sh
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_dependencies
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+ echo "damon sysfs not found"
+ exit $ksft_skip
+fi
+
+# clear log
+dmesg -C
+
+# start DAMON with a scheme
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets"
+echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0"
+echo 4096000 > "$scheme_dir/access_pattern/sz/max"
+echo 20 > "$scheme_dir/access_pattern/nr_accesses/max"
+echo 1024 > "$scheme_dir/access_pattern/age/max"
+echo "on" > "$damon_sysfs/kdamonds/0/state"
+sleep 0.3
+
+# remove scheme sysfs dir
+echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+
+# try to update stat of already removed scheme sysfs dir
+echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+ echo "update_schemes_stats triggers a kernel bug"
+ dmesg
+ exit 1
+fi
+
+# try to update tried regions of already removed scheme sysfs dir
+echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+ echo "update_schemes_tried_regions triggers a kernel bug"
+ dmesg
+ exit 1
+fi
+
+echo "off" > "$damon_sysfs/kdamonds/0/state"
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
new file mode 100755
index 000000000000..28c887a0108f
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ proc = subprocess.Popen(['sleep', '2'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ nr_accesses=[200, 200]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ while proc.poll() == None:
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
new file mode 100755
index 000000000000..90ad7409a7a6
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+ wss_collected.append(
+ kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes)
+
+ wss_collected.sort()
+ acceptable_error_rate = 0.2
+ for percentile in [50, 75]:
+ sample = wss_collected[int(len(wss_collected) * percentile / 100)]
+ error_rate = abs(sample - sz_region) / sz_region
+ print('%d-th percentile (%d) error %f' %
+ (percentile, sample, error_rate))
+ if error_rate > acceptable_error_rate:
+ print('the error rate is not acceptable (> %f)' %
+ acceptable_error_rate)
+ print('samples are as below')
+ print('\n'.join(['%d' % wss for wss in wss_collected]))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/devices/error_logs/Makefile b/tools/testing/selftests/devices/error_logs/Makefile
new file mode 100644
index 000000000000..d546c3fb0a7f
--- /dev/null
+++ b/tools/testing/selftests/devices/error_logs/Makefile
@@ -0,0 +1,3 @@
+TEST_PROGS := test_device_error_logs.py
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/devices/error_logs/test_device_error_logs.py b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py
new file mode 100755
index 000000000000..3dd56c8ec92c
--- /dev/null
+++ b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2024 Collabora Ltd
+#
+# This test checks for the presence of error (or more critical) log messages
+# coming from devices in the kernel log.
+#
+# One failed test case is reported for each device that has outputted error
+# logs. Devices with no errors do not produce a passing test case to avoid
+# polluting the results, therefore a successful run will list 0 tests run.
+#
+
+import glob
+import os
+import re
+import sys
+
+# Allow ksft module to be imported from different directory
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(this_dir, "../../kselftest/"))
+
+import ksft
+
+kmsg = "/dev/kmsg"
+
+RE_log = re.compile(
+ r"(?P<prefix>[0-9]+),(?P<sequence>[0-9]+),(?P<timestamp>[0-9]+),(?P<flag>[^;]*)(,[^;]*)*;(?P<message>.*)"
+)
+RE_tag = re.compile(r" (?P<key>[^=]+)=(?P<value>.*)")
+
+PREFIX_ERROR = 3
+
+logs = []
+error_log_per_device = {}
+
+
+def parse_kmsg():
+ current_log = {}
+
+ with open(kmsg) as f:
+ os.set_blocking(f.fileno(), False)
+
+ for line in f:
+ tag_line = RE_tag.match(line)
+ log_line = RE_log.match(line)
+
+ if log_line:
+ if current_log:
+ logs.append(current_log) # Save last log
+
+ current_log = {
+ "prefix": int(log_line.group("prefix")),
+ "sequence": int(log_line.group("sequence")),
+ "timestamp": int(log_line.group("timestamp")),
+ "flag": log_line.group("flag"),
+ "message": log_line.group("message"),
+ }
+ elif tag_line:
+ current_log[tag_line.group("key")] = tag_line.group("value")
+
+
+def generate_per_device_error_log():
+ for log in logs:
+ if log.get("DEVICE") and log["prefix"] <= PREFIX_ERROR:
+ if not error_log_per_device.get(log["DEVICE"]):
+ error_log_per_device[log["DEVICE"]] = []
+ error_log_per_device[log["DEVICE"]].append(log)
+
+
+parse_kmsg()
+
+generate_per_device_error_log()
+num_tests = len(error_log_per_device)
+
+ksft.print_header()
+ksft.set_plan(num_tests)
+
+for device in error_log_per_device:
+ for log in error_log_per_device[device]:
+ ksft.print_msg(log["message"])
+ ksft.test_result_fail(device)
+if num_tests == 0:
+ ksft.print_msg("No device error logs found")
+ksft.finished()
diff --git a/tools/testing/selftests/devices/probe/Makefile b/tools/testing/selftests/devices/probe/Makefile
new file mode 100644
index 000000000000..f630108c3fdf
--- /dev/null
+++ b/tools/testing/selftests/devices/probe/Makefile
@@ -0,0 +1,4 @@
+TEST_PROGS := test_discoverable_devices.py
+TEST_FILES := boards
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml
new file mode 100644
index 000000000000..ff932eb19f0b
--- /dev/null
+++ b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is the device definition for the XPS 13 9300.
+# The filename "Dell Inc.,XPS 13 9300" was chosen following the format
+# "Vendor,Product", where Vendor comes from
+# /sys/devices/virtual/dmi/id/sys_vendor, and Product comes from
+# /sys/devices/virtual/dmi/id/product_name.
+#
+# See google,spherion.yaml for more information.
+#
+- type: pci-controller
+ # This machine has a single PCI host controller so it's valid to not have any
+ # key to identify the controller. If it had more than one controller, the UID
+ # of the controller from ACPI could be used to distinguish as follows:
+ #acpi-uid: 0
+ devices:
+ - path: 14.0
+ type: usb-controller
+ usb-version: 2
+ devices:
+ - path: 9
+ name: camera
+ interfaces: [0, 1, 2, 3]
+ - path: 10
+ name: bluetooth
+ interfaces: [0, 1]
+ - path: 2.0
+ name: gpu
+ - path: 4.0
+ name: thermal
+ - path: 12.0
+ name: sensors
+ - path: 14.3
+ name: wifi
+ - path: 1d.0/0.0
+ name: ssd
+ - path: 1d.7/0.0
+ name: sdcard-reader
+ - path: 1f.3
+ name: audio
diff --git a/tools/testing/selftests/devices/probe/boards/google,spherion.yaml b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml
new file mode 100644
index 000000000000..3ea843324797
--- /dev/null
+++ b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is the device definition for the Google Spherion Chromebook.
+# The filename "google,spherion" comes from the Devicetree compatible, so this
+# file will be automatically used when the test is run on that machine.
+#
+# The top-level is a list of controllers, either for USB or PCI(e).
+# Every controller needs to have a 'type' key set to either 'usb-controller' or
+# 'pci-controller'.
+# Every controller needs to be uniquely identified on the platform. To achieve
+# this, several optional keys can be used:
+# - dt-mmio: identify the MMIO address of the controller as defined in the
+# Devicetree.
+# - of-fullname-regex: regular expression to match against the OF_FULLNAME
+# property. Useful when the controller's address is not unique across other
+# sibling controllers. In this case, dt-mmio can't be used, and this property
+# allows the matching to include parent nodes as well to make it unique.
+# - usb-version: for USB controllers to differentiate between USB3 and USB2
+# buses sharing the same controller.
+# - acpi-uid: _UID property of the controller as supplied by the ACPI. Useful to
+# distinguish between multiple PCI host controllers.
+#
+# The 'devices' key defines a list of devices that are accessible under that
+# controller. A device might be a leaf device or another controller (see
+# 'Dell Inc.,XPS 13 9300.yaml').
+#
+# The 'path' key is needed for every child device (that is, not top-level) to
+# define how to reach this device from the parent controller. For USB devices it
+# follows the format \d(.\d)* and denotes the port in the hub at each level in
+# the USB topology. For PCI devices it follows the format \d.\d(/\d.\d)*
+# denoting the device (identified by device-function pair) at each level in the
+# PCI topology.
+#
+# The 'name' key is used in the leaf devices to name the device for clarity in
+# the test output.
+#
+# For USB leaf devices, the 'interfaces' key should contain a list of the
+# interfaces in that device that should be bound to a driver.
+#
+- type: usb-controller
+ dt-mmio: 11200000
+ usb-version: 2
+ devices:
+ - path: 1.4.1
+ interfaces: [0, 1]
+ name: camera
+ - path: 1.4.2
+ interfaces: [0, 1]
+ name: bluetooth
+- type: pci-controller
+ dt-mmio: 11230000
+ devices:
+ - path: 0.0/0.0
+ name: wifi
diff --git a/tools/testing/selftests/devices/probe/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py
new file mode 100755
index 000000000000..d7a2bb91c807
--- /dev/null
+++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py
@@ -0,0 +1,358 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# This script tests for presence and driver binding of devices from discoverable
+# buses (ie USB, PCI).
+#
+# The per-platform YAML file defining the devices to be tested is stored inside
+# the boards/ directory and chosen based on DT compatible or DMI IDs (sys_vendor
+# and product_name).
+#
+# See boards/google,spherion.yaml and boards/'Dell Inc.,XPS 13 9300.yaml' for
+# the description and examples of the file structure and vocabulary.
+#
+
+import argparse
+import glob
+import os
+import re
+import sys
+import yaml
+
+# Allow ksft module to be imported from different directory
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(this_dir, "../../kselftest/"))
+
+import ksft
+
+pci_controllers = []
+usb_controllers = []
+
+sysfs_usb_devices = "/sys/bus/usb/devices/"
+
+
+def find_pci_controller_dirs():
+ sysfs_devices = "/sys/devices"
+ pci_controller_sysfs_dir = "pci[0-9a-f]{4}:[0-9a-f]{2}"
+
+ dir_regex = re.compile(pci_controller_sysfs_dir)
+ for path, dirs, _ in os.walk(sysfs_devices):
+ for d in dirs:
+ if dir_regex.match(d):
+ pci_controllers.append(os.path.join(path, d))
+
+
+def find_usb_controller_dirs():
+ usb_controller_sysfs_dir = r"usb[\d]+"
+
+ dir_regex = re.compile(usb_controller_sysfs_dir)
+ for d in os.scandir(sysfs_usb_devices):
+ if dir_regex.match(d.name):
+ usb_controllers.append(os.path.realpath(d.path))
+
+
+def get_dt_mmio(sysfs_dev_dir):
+ re_dt_mmio = re.compile("OF_FULLNAME=.*@([0-9a-f]+)")
+ dt_mmio = None
+
+ # PCI controllers' sysfs don't have an of_node, so have to read it from the
+ # parent
+ while not dt_mmio:
+ try:
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ dt_mmio = re_dt_mmio.search(f.read()).group(1)
+ return dt_mmio
+ except:
+ pass
+ sysfs_dev_dir = os.path.dirname(sysfs_dev_dir)
+
+
+def get_of_fullname(sysfs_dev_dir):
+ re_of_fullname = re.compile("OF_FULLNAME=(.*)")
+ of_full_name = None
+
+ # PCI controllers' sysfs don't have an of_node, so have to read it from the
+ # parent
+ while not of_full_name:
+ try:
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ of_fullname = re_of_fullname.search(f.read()).group(1)
+ return of_fullname
+ except:
+ pass
+ sysfs_dev_dir = os.path.dirname(sysfs_dev_dir)
+
+
+def get_acpi_uid(sysfs_dev_dir):
+ with open(os.path.join(sysfs_dev_dir, "firmware_node", "uid")) as f:
+ return f.read()
+
+
+def get_usb_version(sysfs_dev_dir):
+ re_usb_version = re.compile(r"PRODUCT=.*/(\d)/.*")
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ return int(re_usb_version.search(f.read()).group(1))
+
+
+def get_usb_busnum(sysfs_dev_dir):
+ re_busnum = re.compile("BUSNUM=(.*)")
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ return int(re_busnum.search(f.read()).group(1))
+
+
+def find_controller_in_sysfs(controller, parent_sysfs=None):
+ if controller["type"] == "pci-controller":
+ controllers = pci_controllers
+ elif controller["type"] == "usb-controller":
+ controllers = usb_controllers
+
+ result_controllers = []
+
+ for c in controllers:
+ if parent_sysfs and parent_sysfs not in c:
+ continue
+
+ if controller.get("dt-mmio"):
+ if str(controller["dt-mmio"]) != get_dt_mmio(c):
+ continue
+
+ if controller.get("of-fullname-regex"):
+ re_of_fullname = re.compile(str(controller["of-fullname-regex"]))
+ if not re_of_fullname.match(get_of_fullname(c)):
+ continue
+
+ if controller.get("usb-version"):
+ if controller["usb-version"] != get_usb_version(c):
+ continue
+
+ if controller.get("acpi-uid"):
+ if controller["acpi-uid"] != get_acpi_uid(c):
+ continue
+
+ result_controllers.append(c)
+
+ return result_controllers
+
+
+def is_controller(device):
+ return device.get("type") and "controller" in device.get("type")
+
+
+def path_to_dir(parent_sysfs, dev_type, path):
+ if dev_type == "usb-device":
+ usb_dev_sysfs_fmt = "{}-{}"
+ busnum = get_usb_busnum(parent_sysfs)
+ dirname = os.path.join(
+ sysfs_usb_devices, usb_dev_sysfs_fmt.format(busnum, path)
+ )
+ return [os.path.realpath(dirname)]
+ else:
+ pci_dev_sysfs_fmt = "????:??:{}"
+ path_glob = ""
+ for dev_func in path.split("/"):
+ dev_func = dev_func.zfill(4)
+ path_glob = os.path.join(path_glob, pci_dev_sysfs_fmt.format(dev_func))
+
+ dir_list = glob.glob(os.path.join(parent_sysfs, path_glob))
+
+ return dir_list
+
+
+def find_in_sysfs(device, parent_sysfs=None):
+ if parent_sysfs and device.get("path"):
+ pathdirs = path_to_dir(
+ parent_sysfs, device["meta"]["type"], str(device["path"])
+ )
+ if len(pathdirs) != 1:
+ # Early return to report error
+ return pathdirs
+ pathdir = pathdirs[0]
+ sysfs_path = os.path.join(parent_sysfs, pathdir)
+ else:
+ sysfs_path = parent_sysfs
+
+ if is_controller(device):
+ return find_controller_in_sysfs(device, sysfs_path)
+ else:
+ return [sysfs_path]
+
+
+def check_driver_presence(sysfs_dir, current_node):
+ if current_node["meta"]["type"] == "usb-device":
+ usb_intf_fmt = "*-*:*.{}"
+
+ interfaces = []
+ for i in current_node["interfaces"]:
+ interfaces.append((i, usb_intf_fmt.format(i)))
+
+ for intf_num, intf_dir_fmt in interfaces:
+ test_name = f"{current_node['meta']['pathname']}.{intf_num}.driver"
+
+ intf_dirs = glob.glob(os.path.join(sysfs_dir, intf_dir_fmt))
+ if len(intf_dirs) != 1:
+ ksft.test_result_fail(test_name)
+ continue
+ intf_dir = intf_dirs[0]
+
+ driver_link = os.path.join(sysfs_dir, intf_dir, "driver")
+ ksft.test_result(os.path.isdir(driver_link), test_name)
+ else:
+ driver_link = os.path.join(sysfs_dir, "driver")
+ test_name = current_node["meta"]["pathname"] + ".driver"
+ ksft.test_result(os.path.isdir(driver_link), test_name)
+
+
+def generate_pathname(device):
+ pathname = ""
+
+ if device.get("path"):
+ pathname = str(device["path"])
+
+ if device.get("type"):
+ dev_type = device["type"]
+ if device.get("usb-version"):
+ dev_type = dev_type.replace("usb", "usb" + str(device["usb-version"]))
+ if device.get("acpi-uid") is not None:
+ dev_type = dev_type.replace("pci", "pci" + str(device["acpi-uid"]))
+ pathname = pathname + "/" + dev_type
+
+ if device.get("dt-mmio"):
+ pathname += "@" + str(device["dt-mmio"])
+
+ if device.get("of-fullname-regex"):
+ pathname += "-" + str(device["of-fullname-regex"])
+
+ if device.get("name"):
+ pathname = pathname + "/" + device["name"]
+
+ return pathname
+
+
+def fill_meta_keys(child, parent=None):
+ child["meta"] = {}
+
+ if parent:
+ child["meta"]["type"] = parent["type"].replace("controller", "device")
+
+ pathname = generate_pathname(child)
+ if parent:
+ pathname = parent["meta"]["pathname"] + "/" + pathname
+ child["meta"]["pathname"] = pathname
+
+
+def parse_device_tree_node(current_node, parent_sysfs=None):
+ if not parent_sysfs:
+ fill_meta_keys(current_node)
+
+ sysfs_dirs = find_in_sysfs(current_node, parent_sysfs)
+ if len(sysfs_dirs) != 1:
+ if len(sysfs_dirs) == 0:
+ ksft.test_result_fail(
+ f"Couldn't find in sysfs: {current_node['meta']['pathname']}"
+ )
+ else:
+ ksft.test_result_fail(
+ f"Found multiple sysfs entries for {current_node['meta']['pathname']}: {sysfs_dirs}"
+ )
+ return
+ sysfs_dir = sysfs_dirs[0]
+
+ if not is_controller(current_node):
+ ksft.test_result(
+ os.path.exists(sysfs_dir), current_node["meta"]["pathname"] + ".device"
+ )
+ check_driver_presence(sysfs_dir, current_node)
+ else:
+ for child_device in current_node["devices"]:
+ fill_meta_keys(child_device, current_node)
+ parse_device_tree_node(child_device, sysfs_dir)
+
+
+def count_tests(device_trees):
+ test_count = 0
+
+ def parse_node(device):
+ nonlocal test_count
+ if device.get("devices"):
+ for child in device["devices"]:
+ parse_node(child)
+ else:
+ if device.get("interfaces"):
+ test_count += len(device["interfaces"])
+ else:
+ test_count += 1
+ test_count += 1
+
+ for device_tree in device_trees:
+ parse_node(device_tree)
+
+ return test_count
+
+
+def get_board_filenames():
+ filenames = []
+
+ platform_compatible_file = "/proc/device-tree/compatible"
+ if os.path.exists(platform_compatible_file):
+ with open(platform_compatible_file) as f:
+ for line in f:
+ filenames.extend(line.split("\0"))
+ else:
+ dmi_id_dir = "/sys/devices/virtual/dmi/id"
+ vendor_dmi_file = os.path.join(dmi_id_dir, "sys_vendor")
+ product_dmi_file = os.path.join(dmi_id_dir, "product_name")
+
+ with open(vendor_dmi_file) as f:
+ vendor = f.read().replace("\n", "")
+ with open(product_dmi_file) as f:
+ product = f.read().replace("\n", "")
+
+ filenames = [vendor + "," + product]
+
+ return filenames
+
+
+def run_test(yaml_file):
+ ksft.print_msg(f"Using board file: {yaml_file}")
+
+ with open(yaml_file) as f:
+ device_trees = yaml.safe_load(f)
+
+ ksft.set_plan(count_tests(device_trees))
+
+ for device_tree in device_trees:
+ parse_device_tree_node(device_tree)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--boards-dir", default="boards", help="Directory containing the board YAML files"
+)
+args = parser.parse_args()
+
+find_pci_controller_dirs()
+find_usb_controller_dirs()
+
+ksft.print_header()
+
+if not os.path.exists(args.boards_dir):
+ ksft.print_msg(f"Boards directory '{args.boards_dir}' doesn't exist")
+ ksft.exit_fail()
+
+board_file = ""
+for board_filename in get_board_filenames():
+ full_board_filename = os.path.join(args.boards_dir, board_filename + ".yaml")
+
+ if os.path.exists(full_board_filename):
+ board_file = full_board_filename
+ break
+
+if not board_file:
+ ksft.print_msg("No matching board file found")
+ ksft.exit_fail()
+
+run_test(board_file)
+
+ksft.finished()
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
deleted file mode 100644
index aa8e8b5b3864..000000000000
--- a/tools/testing/selftests/dma/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
-
-TEST_GEN_PROGS := dma_map_benchmark
-
-include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
deleted file mode 100644
index 6102ee3c43cd..000000000000
--- a/tools/testing/selftests/dma/config
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
deleted file mode 100644
index 537d65968c48..000000000000
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020 Hisilicon Limited.
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <linux/types.h>
-
-#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark)
-#define DMA_MAP_MAX_THREADS 1024
-#define DMA_MAP_MAX_SECONDS 300
-
-#define DMA_MAP_BIDIRECTIONAL 0
-#define DMA_MAP_TO_DEVICE 1
-#define DMA_MAP_FROM_DEVICE 2
-
-static char *directions[] = {
- "BIDIRECTIONAL",
- "TO_DEVICE",
- "FROM_DEVICE",
-};
-
-struct map_benchmark {
- __u64 avg_map_100ns; /* average map latency in 100ns */
- __u64 map_stddev; /* standard deviation of map latency */
- __u64 avg_unmap_100ns; /* as above */
- __u64 unmap_stddev;
- __u32 threads; /* how many threads will do map/unmap in parallel */
- __u32 seconds; /* how long the test will last */
- __s32 node; /* which numa node this benchmark will run on */
- __u32 dma_bits; /* DMA addressing capability */
- __u32 dma_dir; /* DMA data direction */
- __u8 expansion[84]; /* For future use */
-};
-
-int main(int argc, char **argv)
-{
- struct map_benchmark map;
- int fd, opt;
- /* default single thread, run 20 seconds on NUMA_NO_NODE */
- int threads = 1, seconds = 20, node = -1;
- /* default dma mask 32bit, bidirectional DMA */
- int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
-
- int cmd = DMA_MAP_BENCHMARK;
- char *p;
-
- while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
- switch (opt) {
- case 't':
- threads = atoi(optarg);
- break;
- case 's':
- seconds = atoi(optarg);
- break;
- case 'n':
- node = atoi(optarg);
- break;
- case 'b':
- bits = atoi(optarg);
- break;
- case 'd':
- dir = atoi(optarg);
- break;
- default:
- return -1;
- }
- }
-
- if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
- fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
- DMA_MAP_MAX_THREADS);
- exit(1);
- }
-
- if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
- fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
- DMA_MAP_MAX_SECONDS);
- exit(1);
- }
-
- /* suppose the mininum DMA zone is 1MB in the world */
- if (bits < 20 || bits > 64) {
- fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
- exit(1);
- }
-
- if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
- dir != DMA_MAP_FROM_DEVICE) {
- fprintf(stderr, "invalid dma direction\n");
- exit(1);
- }
-
- fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
- if (fd == -1) {
- perror("open");
- exit(1);
- }
-
- memset(&map, 0, sizeof(map));
- map.seconds = seconds;
- map.threads = threads;
- map.node = node;
- map.dma_bits = bits;
- map.dma_dir = dir;
- if (ioctl(fd, cmd, &map)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
- threads, seconds, node, dir[directions]);
- printf("average map latency(us):%.1f standard deviation:%.1f\n",
- map.avg_map_100ns/10.0, map.map_stddev/10.0);
- printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
- map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
-
- return 0;
-}
diff --git a/tools/testing/selftests/dmabuf-heaps/.gitignore b/tools/testing/selftests/dmabuf-heaps/.gitignore
new file mode 100644
index 000000000000..b500e76b9045
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/.gitignore
@@ -0,0 +1 @@
+dmabuf-heap
diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile
index 607c2acd2082..9e7e158d5fa3 100644
--- a/tools/testing/selftests/dmabuf-heaps/Makefile
+++ b/tools/testing/selftests/dmabuf-heaps/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include
+CFLAGS += -static -O3 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
TEST_GEN_PROGS = dmabuf-heap
diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config
new file mode 100644
index 000000000000..be091f1cdfa0
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/config
@@ -0,0 +1,3 @@
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 909da9cdda97..fc9694fc4e89 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -13,9 +13,9 @@
#include <sys/types.h>
#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
#include <drm/drm.h>
-
-#include "../../../../include/uapi/linux/dma-heap.h"
+#include "kselftest.h"
#define DEVPATH "/dev/dma_heap"
@@ -29,9 +29,11 @@ static int check_vgem(int fd)
version.name = name;
ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
- if (ret)
+ if (ret || version.name_len != 4)
return 0;
+ name[4] = '\0';
+
return !strcmp(name, "vgem");
}
@@ -91,14 +93,13 @@ static int dmabuf_heap_open(char *name)
char buf[256];
ret = snprintf(buf, 256, "%s/%s", DEVPATH, name);
- if (ret < 0) {
- printf("snprintf failed!\n");
- return ret;
- }
+ if (ret < 0)
+ ksft_exit_fail_msg("snprintf failed! %d\n", ret);
fd = open(buf, O_RDWR);
if (fd < 0)
- printf("open %s failed!\n", buf);
+ ksft_exit_fail_msg("open %s failed: %s\n", buf, strerror(errno));
+
return fd;
}
@@ -130,53 +131,39 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags,
dmabuf_fd);
}
-static void dmabuf_sync(int fd, int start_stop)
+static int dmabuf_sync(int fd, int start_stop)
{
struct dma_buf_sync sync = {
.flags = start_stop | DMA_BUF_SYNC_RW,
};
- int ret;
- ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
- if (ret)
- printf("sync failed %d\n", errno);
+ return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
}
#define ONE_MEG (1024 * 1024)
-static int test_alloc_and_import(char *heap_name)
+static void test_alloc_and_import(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1;
uint32_t handle = 0;
void *p = NULL;
int ret;
- printf("Testing heap: %s\n", heap_name);
-
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf("Allocating 1 MEG\n");
+ ksft_print_msg("Testing allocation and importing:\n");
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Allocation Failed!\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (Allocation Failed!) %d\n", ret);
+ return;
}
+
/* mmap and write a simple pattern */
- p = mmap(NULL,
- ONE_MEG,
- PROT_READ | PROT_WRITE,
- MAP_SHARED,
- dmabuf_fd,
- 0);
+ p = mmap(NULL, ONE_MEG, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd, 0);
if (p == MAP_FAILED) {
- printf("mmap() failed: %m\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed): %s\n", strerror(errno));
+ goto close_and_return;
}
- printf("mmap passed\n");
dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 1, ONE_MEG / 2);
@@ -185,37 +172,113 @@ static int test_alloc_and_import(char *heap_name)
importer_fd = open_vgem();
if (importer_fd < 0) {
- ret = importer_fd;
- printf("Failed to open vgem\n");
- goto out;
+ ksft_test_result_skip("Could not open vgem %d\n", importer_fd);
+ } else {
+ ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ ksft_test_result(ret >= 0, "Import buffer %d\n", ret);
}
- ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
if (ret < 0) {
- printf("Failed to import buffer\n");
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_START failed!) %d\n", ret);
goto out;
}
- printf("import passed\n");
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 0xff, ONE_MEG);
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
- printf("syncs passed\n");
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
+ if (ret < 0) {
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_END failed!) %d\n", ret);
+ goto out;
+ }
close_handle(importer_fd, handle);
- ret = 0;
+ ksft_test_result_pass("%s dmabuf sync succeeded\n", __func__);
+ return;
out:
- if (p)
- munmap(p, ONE_MEG);
- if (importer_fd >= 0)
- close(importer_fd);
- if (dmabuf_fd >= 0)
- close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result_fail("%s dmabuf sync failed\n", __func__);
+ munmap(p, ONE_MEG);
+ close(importer_fd);
- return ret;
+close_and_return:
+ close(dmabuf_fd);
+ close(heap_fd);
+}
+
+static void test_alloc_zeroed(char *heap_name, size_t size)
+{
+ int heap_fd = -1, dmabuf_fd[32];
+ int i, j, k, ret;
+ void *p = NULL;
+ char *c;
+
+ ksft_print_msg("Testing alloced %ldk buffers are zeroed:\n", size / 1024);
+ heap_fd = dmabuf_heap_open(heap_name);
+
+ /* Allocate and fill a bunch of buffers */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret) {
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
+ }
+
+ /* mmap and fill with simple pattern */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
+ }
+
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ memset(p, 0xff, size);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+ /* close them all */
+ for (i = 0; i < 32; i++)
+ close(dmabuf_fd[i]);
+ ksft_test_result_pass("Allocate and fill a bunch of buffers\n");
+
+ /* Allocate and validate all buffers are zeroed */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret < 0) {
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
+ }
+
+ /* mmap and validate everything is zero */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
+ }
+
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ c = (char *)p;
+ for (j = 0; j < size; j++) {
+ if (c[j] != 0) {
+ ksft_print_msg("FAIL (Allocated buffer not zeroed @ %i)\n", j);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ goto out;
+ }
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+
+out:
+ ksft_test_result(i == 32, "Allocate and validate all buffers are zeroed\n");
+
+close_and_return:
+ /* close them all */
+ for (k = 0; k < i; k++)
+ close(dmabuf_fd[k]);
+
+ close(heap_fd);
+ return;
}
/* Test the ioctl version compatibility w/ a smaller structure then expected */
@@ -283,115 +346,97 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags,
return ret;
}
-static int test_alloc_compat(char *heap_name)
+static void test_alloc_compat(char *heap_name)
{
- int heap_fd = -1, dmabuf_fd = -1;
- int ret;
+ int ret, heap_fd = -1, dmabuf_fd = -1;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf("Testing (theoretical)older alloc compat\n");
+ ksft_print_msg("Testing (theoretical) older alloc compat:\n");
ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("Older compat allocation failed!\n");
- ret = -1;
- goto out;
- }
- close(dmabuf_fd);
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_older\n");
- printf("Testing (theoretical)newer alloc compat\n");
+ ksft_print_msg("Testing (theoretical) newer alloc compat:\n");
ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("Newer compat allocation failed!\n");
- ret = -1;
- goto out;
- }
- printf("Ioctl compatibility tests passed\n");
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_newer\n");
- return ret;
+ close(heap_fd);
}
-static int test_alloc_errors(char *heap_name)
+static void test_alloc_errors(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1;
int ret;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf("Testing expected error cases\n");
+ ksft_print_msg("Testing expected error cases:\n");
ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("Did not see expected error (invalid fd)!\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid fd %d\n", ret);
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("Did not see expected error (invalid heap flags)!\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
- if (!ret) {
- printf("Did not see expected error (invalid fd flags)!\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
- printf("Expected error checking passed\n");
- ret = 0;
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ close(heap_fd);
+}
- return ret;
+static int numer_of_heaps(void)
+{
+ DIR *d = opendir(DEVPATH);
+ struct dirent *dir;
+ int heaps = 0;
+
+ while ((dir = readdir(d))) {
+ if (!strncmp(dir->d_name, ".", 2))
+ continue;
+ if (!strncmp(dir->d_name, "..", 3))
+ continue;
+ heaps++;
+ }
+
+ return heaps;
}
int main(void)
{
- DIR *d;
struct dirent *dir;
- int ret = -1;
+ DIR *d;
+
+ ksft_print_header();
d = opendir(DEVPATH);
if (!d) {
- printf("No %s directory?\n", DEVPATH);
- return -1;
+ ksft_print_msg("No %s directory?\n", DEVPATH);
+ return KSFT_SKIP;
}
- while ((dir = readdir(d)) != NULL) {
+ ksft_set_plan(11 * numer_of_heaps());
+
+ while ((dir = readdir(d))) {
if (!strncmp(dir->d_name, ".", 2))
continue;
if (!strncmp(dir->d_name, "..", 3))
continue;
- ret = test_alloc_and_import(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_compat(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_errors(dir->d_name);
- if (ret)
- break;
+ ksft_print_msg("Testing heap: %s\n", dir->d_name);
+ ksft_print_msg("=======================================\n");
+ test_alloc_and_import(dir->d_name);
+ test_alloc_zeroed(dir->d_name, 4 * 1024);
+ test_alloc_zeroed(dir->d_name, ONE_MEG);
+ test_alloc_compat(dir->d_name);
+ test_alloc_errors(dir->d_name);
}
closedir(d);
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore
index ca74f2e1c719..09e23b5afa96 100644
--- a/tools/testing/selftests/drivers/.gitignore
+++ b/tools/testing/selftests/drivers/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/dma-buf/udmabuf
+/s390x/uvdevice/test_uvdevice
diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
index 79cb16b4e01a..441407bb0e80 100644
--- a/tools/testing/selftests/drivers/dma-buf/Makefile
+++ b/tools/testing/selftests/drivers/dma-buf/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := udmabuf
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 4de902ea14d8..d78aec662586 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -1,56 +1,170 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <malloc.h>
+#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
+#include <sys/mman.h>
#include <linux/memfd.h>
#include <linux/udmabuf.h>
+#include "kselftest.h"
#define TEST_PREFIX "drivers/dma-buf/udmabuf"
#define NUM_PAGES 4
+#define NUM_ENTRIES 4
+#define MEMFD_SIZE 1024 /* in pages */
+
+static unsigned int page_size;
-static int memfd_create(const char *name, unsigned int flags)
+static int create_memfd_with_seals(off64_t size, bool hpage)
{
- return syscall(__NR_memfd_create, name, flags);
+ int memfd, ret;
+ unsigned int flags = MFD_ALLOW_SEALING;
+
+ if (hpage)
+ flags |= MFD_HUGETLB;
+
+ memfd = memfd_create("udmabuf-test", flags);
+ if (memfd < 0) {
+ ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
+ }
+
+ ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0) {
+ ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
+ }
+
+ ret = ftruncate(memfd, size);
+ if (ret == -1) {
+ ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return memfd;
+}
+
+static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size)
+{
+ struct udmabuf_create_list *list;
+ int ubuf_fd, i;
+
+ list = malloc(sizeof(struct udmabuf_create_list) +
+ sizeof(struct udmabuf_create_item) * NUM_ENTRIES);
+ if (!list) {
+ ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ list->list[i].memfd = memfd;
+ list->list[i].offset = i * (memfd_size / NUM_ENTRIES);
+ list->list[i].size = getpagesize() * NUM_PAGES;
+ }
+
+ list->count = NUM_ENTRIES;
+ list->flags = UDMABUF_FLAGS_CLOEXEC;
+ ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list);
+ free(list);
+ if (ubuf_fd < 0) {
+ ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return ubuf_fd;
+}
+
+static void write_to_memfd(void *addr, off64_t size, char chr)
+{
+ int i;
+
+ for (i = 0; i < size / page_size; i++) {
+ *((char *)addr + (i * page_size)) = chr;
+ }
+}
+
+static void *mmap_fd(int fd, off64_t size)
+{
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return addr;
+}
+
+static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size)
+{
+ off64_t off;
+ int i = 0, j, k = 0, ret = 0;
+ char char1, char2;
+
+ while (i < NUM_ENTRIES) {
+ off = i * (memfd_size / NUM_ENTRIES);
+ for (j = 0; j < NUM_PAGES; j++, k++) {
+ char1 = *((char *)addr1 + off + (j * getpagesize()));
+ char2 = *((char *)addr2 + (k * getpagesize()));
+ if (char1 != char2) {
+ ret = -1;
+ goto err;
+ }
+ }
+ i++;
+ }
+err:
+ munmap(addr1, memfd_size);
+ munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize());
+ return ret;
}
int main(int argc, char *argv[])
{
struct udmabuf_create create;
int devfd, memfd, buf, ret;
- off_t size;
- void *mem;
+ off64_t size;
+ void *addr1, *addr2;
+
+ ksft_print_header();
+ ksft_set_plan(7);
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
- printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
- exit(77);
+ ksft_print_msg(
+ "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
+ exit(KSFT_SKIP);
}
memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
if (memfd < 0) {
- printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
- exit(77);
+ ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
}
ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
if (ret < 0) {
- printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
- exit(77);
+ ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
}
-
size = getpagesize() * NUM_PAGES;
ret = ftruncate(memfd, size);
if (ret == -1) {
- printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
- exit(1);
+ ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
}
memset(&create, 0, sizeof(create));
@@ -60,44 +174,104 @@ int main(int argc, char *argv[])
create.offset = getpagesize()/2;
create.size = getpagesize();
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX);
/* should fail (size not multiple of page) */
create.memfd = memfd;
create.offset = 0;
create.size = getpagesize()/2;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX);
/* should fail (not memfd) */
create.memfd = 0; /* stdin */
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX);
/* should work */
+ page_size = getpagesize();
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
create.memfd = memfd;
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf < 0) {
- printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf < 0)
+ ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX);
+
+ munmap(addr1, size);
+ close(buf);
+ close(memfd);
+
+ /* should work (migration of 4k size pages)*/
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, false);
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX);
+
+ close(buf);
+ close(memfd);
+
+ /* should work (migration of 2MB size huge pages)*/
+ page_size = getpagesize() * 512; /* 2 MB */
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, true);
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX);
+
+ close(buf);
+ close(memfd);
+
+ /* same test as above but we pin first before writing to memfd */
+ page_size = getpagesize() * 512; /* 2 MB */
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, true);
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX);
- fprintf(stderr, "%s: ok\n", TEST_PREFIX);
close(buf);
close(memfd);
close(devfd);
+
+ ksft_print_msg("%s: ok\n", TEST_PREFIX);
+ ksft_print_cnts();
+
return 0;
}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
index b789dc8257e6..09c76cd7661d 100755
--- a/tools/testing/selftests/drivers/gpu/drm_mm.sh
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -3,7 +3,7 @@
# Runs API tests for struct drm_mm (DRM range manager)
if ! /sbin/modprobe -n -q test-drm_mm; then
- echo "drivers/gpu/drm_mm: [skip]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm is not found in /lib/modules/`uname -r` [skip]"
exit 77
fi
@@ -11,6 +11,6 @@ if /sbin/modprobe -q test-drm_mm; then
/sbin/modprobe -q -r test-drm_mm
echo "drivers/gpu/drm_mm: ok"
else
- echo "drivers/gpu/drm_mm: [FAIL]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm could not be removed [FAIL]"
exit 1
fi
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
new file mode 100644
index 000000000000..3633c7a3ed65
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gro
+napi_id_helper
+psp_responder
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
new file mode 100644
index 000000000000..f5c71d993750
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += $(KHDR_INCLUDES)
+
+TEST_INCLUDES := $(wildcard lib/py/*.py) \
+ $(wildcard lib/sh/*.sh) \
+ ../../net/lib.sh \
+
+TEST_GEN_FILES := \
+ gro \
+ napi_id_helper \
+# end of TEST_GEN_FILES
+
+TEST_PROGS := \
+ gro.py \
+ hds.py \
+ napi_id.py \
+ napi_threaded.py \
+ netcons_basic.sh \
+ netcons_cmdline.sh \
+ netcons_fragmented_msg.sh \
+ netcons_overflow.sh \
+ netcons_sysdata.sh \
+ netcons_torture.sh \
+ netpoll_basic.py \
+ ping.py \
+ psp.py \
+ queues.py \
+ ring_reconfig.py \
+ shaper.py \
+ stats.py \
+ xdp.py \
+# end of TEST_PROGS
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := psp_responder
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
+include ../../lib.mk
+
+# YNL build
+YNL_GENS := psp
+
+include ../../net/ynl.mk
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
new file mode 100644
index 000000000000..eb838ae94844
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Running driver tests
+====================
+
+Networking driver tests are executed within kselftest framework like any
+other tests. They support testing both real device drivers and emulated /
+software drivers (latter mostly to test the core parts of the stack).
+
+SW mode
+~~~~~~~
+
+By default, when no extra parameters are set or exported, tests execute
+against software drivers such as netdevsim. No extra preparation is required
+the software devices are created and destroyed as part of the test.
+In this mode the tests are indistinguishable from other selftests and
+(for example) can be run under ``virtme-ng`` like the core networking selftests.
+
+HW mode
+~~~~~~~
+
+Executing tests against a real device requires external preparation.
+The netdevice against which tests will be run must exist, be running
+(in UP state) and be configured with an IP address.
+
+Refer to list of :ref:`Variables` later in this file to set up running
+the tests against a real device.
+
+Both modes required
+~~~~~~~~~~~~~~~~~~~
+
+All tests in drivers/net must support running both against a software device
+and a real device. SW-only tests should instead be placed in net/ or
+drivers/net/netdevsim, HW-only tests in drivers/net/hw.
+
+Variables
+=========
+
+The variables can be set in the environment or by creating a net.config
+file in the same directory as this README file. Example::
+
+ $ NETIF=eth0 ./some_test.sh
+
+or::
+
+ $ cat tools/testing/selftests/drivers/net/net.config
+ # Variable set in a file
+ NETIF=eth0
+
+Local test (which don't require endpoint for sending / receiving traffic)
+need only the ``NETIF`` variable. Remaining variables define the endpoint
+and communication method.
+
+NETIF
+~~~~~
+
+Name of the netdevice against which the test should be executed.
+When empty or not set software devices will be used.
+
+LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Local and remote endpoint IP addresses.
+
+REMOTE_TYPE
+~~~~~~~~~~~
+
+Communication method used to run commands on the remote endpoint.
+Test framework has built-in support for ``netns`` and ``ssh`` channels.
+``netns`` assumes the "remote" interface is part of the same
+host, just moved to the specified netns.
+``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``.
+Using persistent SSH connections is strongly encouraged to avoid
+the latency of SSH connection setup on every command.
+
+Communication methods are defined by classes in ``lib/py/remote_{name}.py``.
+It should be possible to add a new method without modifying any of
+the framework, by simply adding an appropriately named file to ``lib/py``.
+
+REMOTE_ARGS
+~~~~~~~~~~~
+
+Arguments used to construct the communication channel.
+Communication channel dependent::
+
+ for netns - name of the "remote" namespace
+ for ssh - name/address of the remote host
+
+Example
+=======
+
+Build the selftests::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw"
+
+"Install" the tests and copy them over to the target machine::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \
+ install INSTALL_PATH=/tmp/ksft-net-drv
+
+ # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/
+
+On the target machine, running the tests will use netdevsim by default::
+
+ [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py
+ TAP version 13
+ 1..1
+ # timeout set to 45
+ # selftests: drivers/net: ping.py
+ # TAP version 13
+ # 1..3
+ # ok 1 ping.test_v4
+ # ok 2 ping.test_v6
+ # ok 3 ping.test_tcp
+ # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
+ ok 1 selftests: drivers/net: ping.py
+
+Create a config with remote info::
+
+ [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF
+ NETIF=eth0
+ LOCAL_V4=192.168.1.1
+ REMOTE_V4=192.168.1.2
+ REMOTE_TYPE=ssh
+ REMOTE_ARGS=root@192.168.1.2
+ EOF
+
+Run the test::
+
+ [/root] # ./ksft-net-drv/drivers/net/ping.py
+ TAP version 13
+ 1..3
+ ok 1 ping.test_v4
+ ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
+ ok 3 ping.test_tcp
+ # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644
index 000000000000..6c5c60adb5e8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := \
+ bond-arp-interval-causes-panic.sh \
+ bond-break-lacpdu-tx.sh \
+ bond-eth-type-change.sh \
+ bond-lladdr-target.sh \
+ bond_ipsec_offload.sh \
+ bond_lacp_prio.sh \
+ bond_macvlan_ipvlan.sh \
+ bond_options.sh \
+ bond_passive_lacp.sh \
+ dev_addr_lists.sh \
+ mode-1-recovery-updelay.sh \
+ mode-2-recovery-updelay.sh \
+ netcons_over_bonding.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+ bond_topo_2d1c.sh \
+ bond_topo_3d1c.sh \
+ lag_lib.sh \
+# end of TEST_FILES
+
+TEST_INCLUDES := \
+ ../../../net/lib.sh \
+ ../lib/sh/lib_netcons.sh \
+ ../../../net/forwarding/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
new file mode 100755
index 000000000000..5667febee328
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# cause kernel oops in bond_rr_gen_slave_id
+DEBUG=${DEBUG:-0}
+
+set -e
+test ${DEBUG} -ne 0 && set -x
+
+finish()
+{
+ ip netns delete server || true
+ ip netns delete client || true
+}
+
+trap finish EXIT
+
+client_ip4=192.168.1.198
+server_ip4=192.168.1.254
+
+# setup kernel so it reboots after causing the panic
+echo 180 >/proc/sys/kernel/panic
+
+# build namespaces
+ip netns add "server"
+ip netns add "client"
+ip -n client link add eth0 type veth peer name eth0 netns server
+ip netns exec server ip link set dev eth0 up
+ip netns exec server ip addr add ${server_ip4}/24 dev eth0
+
+ip netns exec client ip link add dev bond0 down type bond mode 1 \
+ miimon 100 all_slaves_active 1
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ip addr add ${client_ip4}/24 dev bond0
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+ip netns exec client ip link set dev eth0 nomaster
+ip netns exec client ip link set dev bond0 down
+ip netns exec client ip link set dev bond0 type bond mode 0 \
+ arp_interval 1000 arp_ip_target "+${server_ip4}"
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+exit 0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755
index 000000000000..1ec7f59db7f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify LACPDUs get transmitted after setting the MAC address of
+# the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+# +---------+
+# | fab-br0 |
+# +---------+
+# |
+# +---------+
+# | fbond |
+# +---------+
+# | |
+# +------+ +------+
+# |veth1 | |veth2 |
+# +------+ +------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+set -e
+cleanup() {
+ ip link del fab-br0 >/dev/null 2>&1 || :
+ ip link del fbond >/dev/null 2>&1 || :
+ ip link del veth1-bond >/dev/null 2>&1 || :
+ ip link del veth2-bond >/dev/null 2>&1 || :
+}
+
+trap cleanup 0 1 2
+cleanup
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+ forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+ ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+rc=0
+tc qdisc add dev veth1-end clsact
+tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass
+if slowwait_for_counter 15 2 \
+ tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then
+ echo "PASS, captured 2"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
new file mode 100755
index 000000000000..8293dbc7c18f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device ether type changing
+#
+
+ALL_TESTS="
+ bond_test_unsuccessful_enslave_type_change
+ bond_test_successful_enslave_type_change
+"
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+bond_check_flags()
+{
+ local bonddev=$1
+
+ ip -d l sh dev "$bonddev" | grep -q "MASTER"
+ check_err $? "MASTER flag is missing from the bond device"
+
+ ip -d l sh dev "$bonddev" | grep -q "SLAVE"
+ check_err $? "SLAVE flag is missing from the bond device"
+}
+
+# test enslaved bond dev type change from ARPHRD_ETHER and back
+# this allows us to test both MASTER and SLAVE flags at once
+bond_test_enslave_type_change()
+{
+ local test_success=$1
+ local devbond0="test-bond0"
+ local devbond1="test-bond1"
+ local devbond2="test-bond2"
+ local nonethdev="test-noneth0"
+
+ # create a non-ARPHRD_ETHER device for testing (e.g. nlmon type)
+ ip link add name "$nonethdev" type nlmon
+ check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)"
+ ip link add name "$devbond0" type bond
+ if [ $test_success -eq 1 ]; then
+ # we need devbond0 in active-backup mode to successfully enslave nonethdev
+ ip link set dev "$devbond0" type bond mode active-backup
+ check_err $? "could not change bond mode to active-backup"
+ fi
+ ip link add name "$devbond1" type bond
+ ip link add name "$devbond2" type bond
+ ip link set dev "$devbond0" master "$devbond1"
+ check_err $? "could not enslave $devbond0 to $devbond1"
+ # change bond type to non-ARPHRD_ETHER
+ ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null
+ ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null
+ # restore ARPHRD_ETHER type by enslaving such device
+ ip link set dev "$devbond2" master "$devbond0"
+ check_err $? "could not enslave $devbond2 to $devbond0"
+
+ bond_check_flags "$devbond0"
+
+ # clean up
+ ip link del dev "$devbond0"
+ ip link del dev "$devbond1"
+ ip link del dev "$devbond2"
+ ip link del dev "$nonethdev"
+}
+
+bond_test_unsuccessful_enslave_type_change()
+{
+ RET=0
+
+ bond_test_enslave_type_change 0
+ log_test "Change ether type of an enslaved bond device with unsuccessful enslave"
+}
+
+bond_test_successful_enslave_type_change()
+{
+ RET=0
+
+ bond_test_enslave_type_change 1
+ log_test "Change ether type of an enslaved bond device with successful enslave"
+}
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
new file mode 100755
index 000000000000..78d3e0fe6604
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify bond interface could up when set IPv6 link local address target.
+#
+# +----------------+
+# | br0 |
+# | | | sw
+# | veth0 veth1 |
+# +---+-------+----+
+# | |
+# +---+-------+----+
+# | veth0 veth1 |
+# | | | host
+# | bond0 |
+# +----------------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+sw="sw-$(mktemp -u XXXXXX)"
+host="ns-$(mktemp -u XXXXXX)"
+
+cleanup()
+{
+ ip netns del $sw
+ ip netns del $host
+}
+
+wait_lladdr_dad()
+{
+ $@ | grep fe80 | grep -qv tentative
+}
+
+wait_bond_up()
+{
+ $@ | grep -q 'state UP'
+}
+
+trap cleanup 0 1 2
+
+ip netns add $sw
+ip netns add $host
+
+ip -n $host link add veth0 type veth peer name veth0 netns $sw
+ip -n $host link add veth1 type veth peer name veth1 netns $sw
+
+ip -n $sw link add br0 type bridge
+ip -n $sw link set br0 up
+sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
+# wait some time to make sure bridge lladdr pass DAD
+slowwait 2 wait_lladdr_dad ip -n $sw addr show br0
+
+ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
+ arp_validate 3 arp_interval 1000
+# add a lladdr for bond to make sure there is a route to target
+ip -n $host addr add fe80::beef/64 dev bond0
+ip -n $host link set bond0 up
+ip -n $host link set veth0 master bond0
+ip -n $host link set veth1 master bond0
+
+ip -n $sw link set veth0 master br0
+ip -n $sw link set veth1 master br0
+ip -n $sw link set veth0 up
+ip -n $sw link set veth1 up
+
+slowwait 5 wait_bond_up ip -n $host link show bond0
+
+rc=0
+if ip -n $host link show bond0 | grep -q LOWER_UP; then
+ echo "PASS"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
new file mode 100755
index 000000000000..f09e100232c7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPsec over bonding offload test:
+#
+# +----------------+
+# | bond0 |
+# | | |
+# | eth0 eth1 |
+# +---+-------+----+
+#
+# We use netdevsim instead of physical interfaces
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 192.0.2.1/24 dst 192.0.2.2/24
+# offload dev bond0 dir out
+# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \
+# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \
+# spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+srcip=192.0.2.1
+dstip=192.0.2.2
+ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec
+ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec
+active_slave=""
+
+# shellcheck disable=SC2317
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave
+
+ # shellcheck disable=SC2154
+ new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+ jq -r ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ]
+}
+
+test_offload()
+{
+ # use ping to exercise the Tx path
+ ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null
+
+ active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+ jq -r ".[].linkinfo.info_data.active_slave")
+
+ if [ "$active_slave" = "$nic0" ]; then
+ sysfs=$ipsec0
+ elif [ "$active_slave" = "$nic1" ]; then
+ sysfs=$ipsec1
+ else
+ check_err 1 "bond_ipsec_offload invalid active_slave $active_slave"
+ fi
+
+ # The tx/rx order in sysfs may changed after failover
+ grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs"
+ check_err $? "incorrect tx count with link ${active_slave}"
+
+ log_test bond_ipsec_offload "active_slave ${active_slave}"
+}
+
+setup_env()
+{
+ if ! mount | grep -q debugfs; then
+ mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+ defer umount /sys/kernel/debug/
+
+ fi
+
+ # setup netdevsim since dummy/veth dev doesn't have offload support
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ if ! modprobe -q netdevsim; then
+ echo "SKIP: can't load netdevsim for ipsec offload"
+ # shellcheck disable=SC2154
+ exit "$ksft_skip"
+ fi
+ defer modprobe -r netdevsim
+ fi
+
+ setup_ns ns
+ defer cleanup_ns "$ns"
+}
+
+setup_bond()
+{
+ ip -n "$ns" link add bond0 type bond mode active-backup miimon 100
+ ip -n "$ns" addr add "$srcip/24" dev bond0
+ ip -n "$ns" link set bond0 up
+
+ echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null
+ nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1)
+ nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1)
+ ip -n "$ns" link set "$nic0" master bond0
+ ip -n "$ns" link set "$nic1" master bond0
+
+ # we didn't create a peer, make sure we can Tx by adding a permanent
+ # neighbour this need to be added after enslave
+ ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55
+
+ # create offloaded SAs, both in and out
+ ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \
+ tmpl proto esp src "$srcip" dst "$dstip" spi 9 \
+ mode transport reqid 42
+
+ ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \
+ tmpl proto esp src "$dstip" dst "$srcip" spi 9 \
+ mode transport reqid 42
+
+ ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \
+ mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+ 0x3132333435363738393031323334353664636261 128 \
+ sel src "$srcip/24" dst "$dstip/24" \
+ offload dev bond0 dir out
+
+ ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \
+ mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+ 0x3132333435363738393031323334353664636261 128 \
+ sel src "$dstip/24" dst "$srcip/24" \
+ offload dev bond0 dir in
+
+ # does offload show up in ip output
+ lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir")
+ if [ "$lines" -ne 2 ] ; then
+ check_err 1 "bond_ipsec_offload SA offload missing from list output"
+ fi
+}
+
+trap defer_scopes_cleanup EXIT
+setup_env
+setup_bond
+
+# start Offload testing
+test_offload
+
+# do failover and re-test
+ip -n "$ns" link set "$active_slave" down
+slowwait 5 active_slave_changed "$active_slave"
+test_offload
+
+# make sure offload get removed from driver
+ip -n "$ns" x s flush
+ip -n "$ns" x p flush
+line0=$(grep -c "SA count=0" "$ipsec0")
+line1=$(grep -c "SA count=0" "$ipsec1")
+[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ]
+check_fail $? "bond_ipsec_offload SA not removed from driver"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
new file mode 100755
index 000000000000..a483d505c6a8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing if bond lacp per port priority works
+#
+# Switch (s_ns) Backup Switch (b_ns)
+# +-------------------------+ +-------------------------+
+# | bond0 | | bond0 |
+# | + | | + |
+# | eth0 | eth1 | | eth0 | eth1 |
+# | +---+---+ | | +---+---+ |
+# | | | | | | | |
+# +-------------------------+ +-------------------------+
+# | | | |
+# +-----------------------------------------------------+
+# | | | | | |
+# | +-------+---------+---------+-------+ |
+# | eth0 eth1 | eth2 eth3 |
+# | + |
+# | bond0 |
+# +-----------------------------------------------------+
+# Client (c_ns)
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+
+setup_links()
+{
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+ ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}"
+ ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}"
+
+ ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast ad_select actor_port_prio
+ ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast
+ ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast
+
+ ip -n "${c_ns}" link set eth0 master bond0
+ ip -n "${c_ns}" link set eth1 master bond0
+ ip -n "${c_ns}" link set eth2 master bond0
+ ip -n "${c_ns}" link set eth3 master bond0
+ ip -n "${s_ns}" link set eth0 master bond0
+ ip -n "${s_ns}" link set eth1 master bond0
+ ip -n "${b_ns}" link set eth0 master bond0
+ ip -n "${b_ns}" link set eth1 master bond0
+
+ ip -n "${c_ns}" link set bond0 up
+ ip -n "${s_ns}" link set bond0 up
+ ip -n "${b_ns}" link set bond0 up
+}
+
+test_port_prio_setting()
+{
+ RET=0
+ ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000
+ prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \
+ ".[].linkinfo.info_slave_data.actor_port_prio")
+ [ "$prio" -ne 1000 ] && RET=1
+ ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10
+ prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \
+ ".[].linkinfo.info_slave_data.actor_port_prio")
+ [ "$prio" -ne 10 ] && RET=1
+}
+
+test_agg_reselect()
+{
+ local bond_agg_id slave_agg_id
+ local expect_slave="$1"
+ RET=0
+
+ # Trigger link state change to reselect the aggregator
+ ip -n "${c_ns}" link set eth1 down
+ sleep 0.5
+ ip -n "${c_ns}" link set eth1 up
+ sleep 0.5
+
+ bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.ad_info.aggregator")
+ slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \
+ ".[].linkinfo.info_slave_data.ad_aggregator_id")
+ # shellcheck disable=SC2034
+ [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \
+ RET=1
+}
+
+trap cleanup_all_ns EXIT
+setup_ns c_ns s_ns b_ns
+setup_links
+
+test_port_prio_setting
+log_test "bond 802.3ad" "actor_port_prio setting"
+
+test_agg_reselect eth0
+log_test "bond 802.3ad" "actor_port_prio select"
+
+# Change the actor port prio and re-test
+ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10
+ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000
+test_agg_reselect eth2
+log_test "bond 802.3ad" "actor_port_prio switch"
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
new file mode 100755
index 000000000000..559f300f965a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan/ipvlan over bond
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)"
+xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)"
+xvlan1_ip4="192.0.2.11"
+xvlan1_ip6="2001:db8::11"
+xvlan2_ip4="192.0.2.12"
+xvlan2_ip6="2001:db8::12"
+
+cleanup()
+{
+ client_destroy
+ server_destroy
+ gateway_destroy
+
+ ip netns del ${xvlan1_ns}
+ ip netns del ${xvlan2_ns}
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3}
+ RET=0
+
+ sleep 0.25
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
+}
+
+xvlan_over_bond()
+{
+ local param="$1"
+ local xvlan_type="$2"
+ local xvlan_mode="$3"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns}
+ ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns}
+ ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2"
+ check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2"
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client"
+ check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client"
+ check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${xvlan1_ns}
+ip netns add ${xvlan2_ns}
+
+bond_modes="active-backup balance-tlb balance-alb"
+
+for bond_mode in ${bond_modes}; do
+ xvlan_over_bond "mode ${bond_mode}" macvlan bridge
+ xvlan_over_bond "mode ${bond_mode}" ipvlan l2
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
new file mode 100755
index 000000000000..187b478d0ddf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -0,0 +1,578 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding options with mode 1,5,6
+
+ALL_TESTS="
+ prio
+ arp_validate
+ num_grat_arp
+ fail_over_mac
+ vlan_over_bond
+"
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_3d1c.sh
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
+
+skip_prio()
+{
+ local skip=1
+
+ # check if iproute support prio option
+ ip -n ${s_ns} link set eth0 type bond_slave prio 10
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support prio option
+ ip -n ${s_ns} -d link show eth0 | grep -q "prio 10"
+ [[ $? -ne 0 ]] && skip=0
+
+ return $skip
+}
+
+skip_ns()
+{
+ local skip=1
+
+ # check if iproute support ns_ip6_target option
+ ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6}
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support ns_ip6_target option
+ ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}"
+ [[ $? -ne 0 ]] && skip=0
+
+ ip -n ${s_ns} link del bond1
+
+ return $skip
+}
+
+active_slave=""
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ]
+}
+
+check_active_slave()
+{
+ local target_active_slave=$1
+ slowwait 5 active_slave_changed $active_slave
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ test "$active_slave" = "$target_active_slave"
+ check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+# Test bonding prio option
+prio_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+ # set active_slave to primary eth1 specifically
+ ip -n ${s_ns} link set bond0 type bond active_slave eth1
+
+ # check bonding member prio value
+ ip -n ${s_ns} link set eth0 type bond_slave prio 0
+ ip -n ${s_ns} link set eth1 type bond_slave prio 10
+ ip -n ${s_ns} link set eth2 type bond_slave prio 11
+ cmd_jq "ip -n ${s_ns} -d -j link show eth0" \
+ ".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null
+ check_err $? "eth0 prio is not 0"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth1" \
+ ".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null
+ check_err $? "eth1 prio is not 10"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth2" \
+ ".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null
+ check_err $? "eth2 prio is not 11"
+
+ bond_check_connection "setup"
+
+ # active slave should be the primary slave
+ check_active_slave eth1
+
+ # active slave should be the higher prio slave
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave eth2
+ bond_check_connection "fail over"
+
+ # when only 1 slave is up
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave eth0
+ bond_check_connection "only 1 slave up"
+
+ # when a higher prio slave change to up
+ ip -n ${s_ns} link set eth2 up
+ bond_check_connection "higher prio slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth2"
+ ;;
+ "1")
+ check_active_slave "eth0"
+ ;;
+ "2")
+ check_active_slave "eth0"
+ ;;
+ esac
+ local pre_active_slave=$active_slave
+
+ # when the primary slave change to up
+ ip -n ${s_ns} link set eth1 up
+ bond_check_connection "primary slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth1"
+ ;;
+ "1")
+ check_active_slave "$pre_active_slave"
+ ;;
+ "2")
+ check_active_slave "$pre_active_slave"
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "pre_active slave down"
+ check_active_slave "eth1"
+ ;;
+ esac
+
+ # Test changing bond slave prio
+ if [[ "$primary_reselect" == "0" ]];then
+ ip -n ${s_ns} link set eth0 type bond_slave prio 1000000
+ ip -n ${s_ns} link set eth1 type bond_slave prio 0
+ ip -n ${s_ns} link set eth2 type bond_slave prio -50
+ ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000'
+ check_err $? "eth0 prio is not 1000000"
+ ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0'
+ check_err $? "eth1 prio is not 0"
+ ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50'
+ check_err $? "eth3 prio is not -50"
+ check_active_slave "eth1"
+
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave "eth0"
+ bond_check_connection "change slave prio"
+ fi
+}
+
+prio_miimon()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode miimon primary_reselect $primary_reselect"
+ done
+}
+
+prio_arp()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
+ done
+}
+
+prio_ns()
+{
+ local primary_reselect
+ local mode=$1
+
+ if skip_ns; then
+ log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
+ done
+}
+
+prio()
+{
+ local mode modes="active-backup balance-tlb balance-alb"
+
+ if skip_prio; then
+ log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'."
+ return 0
+ fi
+
+ for mode in $modes; do
+ prio_miimon $mode
+ done
+ prio_arp "active-backup"
+ prio_ns "active-backup"
+}
+
+wait_mii_up()
+{
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ [ ${mii_status} != "UP" ] && return 1
+ done
+ return 0
+}
+
+arp_validate_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
+
+ # wait for a while to make sure the mii status stable
+ slowwait 5 wait_mii_up
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ if [ ${mii_status} != "UP" ]; then
+ RET=1
+ log_test "arp_validate" "interface eth$i mii_status $mii_status"
+ fi
+ done
+}
+
+# Testing correct multicast groups are added to slaves for ns targets
+arp_validate_mcast()
+{
+ RET=0
+ local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate")
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+ for i in $(seq 0 2); do
+ maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+ # arp_valid == 0 or active_slave should not join any maddrs
+ if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+ echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ # arp_valid != 0 and backup_slave should join both maddrs
+ elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+ ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+ ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ fi
+ done
+
+ # Do failover
+ ip -n ${s_ns} link set ${active_slave} down
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+ for i in $(seq 0 2); do
+ maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+ # arp_valid == 0 or active_slave should not join any maddrs
+ if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+ echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ # arp_valid != 0 and backup_slave should join both maddrs
+ elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+ ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+ ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ fi
+ done
+}
+
+arp_validate_arp()
+{
+ local mode=$1
+ local val
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val"
+ log_test "arp_validate" "$mode arp_ip_target arp_validate $val"
+ done
+}
+
+arp_validate_ns()
+{
+ local mode=$1
+ local val
+
+ if skip_ns; then
+ log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val"
+ log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+ arp_validate_mcast
+ log_test "arp_validate" "join mcast group"
+ done
+}
+
+arp_validate()
+{
+ arp_validate_arp "active-backup"
+ arp_validate_ns "active-backup"
+}
+
+garp_test()
+{
+ local param="$1"
+ local active_slave exp_num real_num i
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg"
+
+
+ # Add tc rules to count GARP number
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \
+ flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass
+ done
+
+ # Do failover
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+
+ exp_num=$(echo "${param}" | cut -f6 -d ' ')
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+ "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
+
+ # check result
+ real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
+ if [ "${real_num}" -ne "${exp_num}" ]; then
+ echo "$real_num garp packets sent on active slave ${active_slave}"
+ RET=1
+ fi
+
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter del dev s$i ingress
+ done
+}
+
+num_grat_arp()
+{
+ local val
+ for val in 10 20 30; do
+ garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100"
+ log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
+ done
+}
+
+check_all_mac_same()
+{
+ RET=0
+ # all slaves should have same mac address (with the first port's mac)
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+ local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+ [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_first()
+{
+ RET=0
+ # bond mac address should be same with the first added slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_active()
+{
+ RET=0
+ # bond mac address should be same with active slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "$active_slave_mac" ]; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_not_change()
+{
+ RET=0
+ # backup slave's mac address is not changed
+ if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+ | select(.linkinfo.info_slave_data.state=="BACKUP")
+ | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_inherit()
+{
+ local backup_mac
+ RET=0
+
+ # backup slaves should use mac[1] or mac[2]
+ local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+ jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+ for backup_mac in $backup_macs; do
+ if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+ RET=1
+ fi
+ done
+}
+
+check_first_slave_random_mac()
+{
+ RET=0
+ # remove the first added slave and added it back
+ ip -n "$s_ns" link set eth0 nomaster
+ ip -n "$s_ns" link set eth0 master bond0
+
+ # the first slave should use random mac address
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" = "${mac[0]}" ] && RET=1
+ log_test "bond fail_over_mac follow" "random first slave mac"
+
+ # remove the first slave, the permanent MAC address should be restored back
+ ip -n "$s_ns" link set eth0 nomaster
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+ slowwait 2 active_slave_changed $active_slave
+ ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+ # Bring down the first interface on the switch to force the bond to
+ # select another active interface instead of the first one that joined.
+ ip -n "$g_ns" link set s0 down
+
+ # fail_over_mac none
+ bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+ check_all_mac_same
+ log_test "fail_over_mac 0" "all slaves have same mac"
+ do_active_backup_failover
+ check_all_mac_same
+ log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+ # fail_over_mac active
+ bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "backup slave mac is not changed"
+ do_active_backup_failover
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+ # fail_over_mac follow
+ bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "backup slave mac inherit"
+ do_active_backup_failover
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+ check_first_slave_random_mac
+ log_test "fail_over_mac 2" "first slave mac random"
+}
+
+vlan_over_bond_arp()
+{
+ local mode="$1"
+ RET=0
+
+ bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond arp" "$mode"
+}
+
+vlan_over_bond_ns()
+{
+ local mode="$1"
+ RET=0
+
+ if skip_ns; then
+ log_test_skip "vlan_over_bond ns" "$mode"
+ return 0
+ fi
+
+ bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond ns" "$mode"
+}
+
+vlan_over_bond()
+{
+ # add vlan 3 for client
+ ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3
+ ip -n "${c_ns}" link set eth0.3 up
+ ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3
+ ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3
+
+ # Add tc rule to check the vlan pkts
+ tc -n "${c_ns}" qdisc add dev eth0.3 clsact
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \
+ handle 101 flower skip_hw arp_op request \
+ arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \
+ handle 102 flower skip_hw ip_proto icmpv6 \
+ type 135 src_ip 2001:db8::3:1 action pass
+
+ vlan_over_bond_arp "active-backup"
+ vlan_over_bond_ns "active-backup"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
new file mode 100755
index 000000000000..9c3b089813df
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test if a bond interface works with lacp_active=off.
+
+# shellcheck disable=SC2034
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+# shellcheck disable=SC2317
+check_port_state()
+{
+ local netns=$1
+ local port=$2
+ local state=$3
+
+ ip -n "${netns}" -d -j link show "$port" | \
+ jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
+}
+
+check_pkt_count()
+{
+ RET=0
+ local ns="$1"
+ local iface="$2"
+
+ # wait 65s, one per 30s
+ slowwait_for_counter 65 2 tc_rule_handle_stats_get \
+ "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
+}
+
+setup() {
+ setup_ns c_ns s_ns
+
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+ ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+
+ # Add tc filter to count the pkts
+ tc -n "${c_ns}" qdisc add dev eth0 clsact
+ tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
+ tc -n "${s_ns}" qdisc add dev eth1 clsact
+ tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
+
+ ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
+ ip -n "${s_ns}" link set eth0 master bond0
+ ip -n "${s_ns}" link set eth1 master bond0
+
+ ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
+ ip -n "${c_ns}" link set eth0 master bond0
+ ip -n "${c_ns}" link set eth1 master bond0
+
+}
+
+trap cleanup_all_ns EXIT
+setup
+
+# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
+# after interface up
+ip -n "${c_ns}" link set bond0 up
+sleep 2
+
+# 1. The passive side shouldn't send LACPDU.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "802.3ad lacp_active off" "init port"
+
+ip -n "${s_ns}" link set bond0 up
+# 2. The passive side should not have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
+log_test "802.3ad lacp_active off" "port state active"
+
+# 3. The active side should have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
+log_test "802.3ad lacp_active on" "port state active"
+
+# 4. Make sure the connection is not expired.
+RET=0
+slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
+slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
+log_test "bond 802.3ad lacp_active off" "port connection"
+
+# After testing, disconnect one port on each side to check the state.
+ip -n "${s_ns}" link set eth0 nomaster
+ip -n "${s_ns}" link set eth0 up
+ip -n "${c_ns}" link set eth1 nomaster
+ip -n "${c_ns}" link set eth1 up
+# Due to Periodic Machine and Rx Machine state change, the bond will still
+# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
+# negotiation finished
+sleep 5
+
+# 5. The active side should keep sending LACPDU.
+check_pkt_count "${s_ns}" "eth1" || RET=1
+log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
+
+# 6. The passive side shouldn't send LACPDU anymore.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644
index 000000000000..167aa4a4a12a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------+
+# | bond0 | Server
+# | + | 192.0.2.1/24
+# | eth0 | eth1 | 2001:db8::1/24
+# | +---+---+ |
+# | | | |
+# +-------------------------+
+# | |
+# +-------------------------+
+# | | | |
+# | +---+-------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------+-------+ | 2001:db8::254/24
+# | | |
+# +-------------------------+
+# |
+# +-------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
+
+gateway_create()
+{
+ ip netns add ${g_ns}
+ ip -n ${g_ns} link add br0 type bridge
+ ip -n ${g_ns} link set br0 up
+ ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+ ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+ ip -n ${g_ns} link del br0
+ ip netns del ${g_ns}
+}
+
+server_create()
+{
+ ip netns add ${s_ns}
+ ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+ for i in $(seq 0 1); do
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
+
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ local param="$1"
+ link_num=$((link_num -1))
+
+ ip -n ${s_ns} link set bond0 down
+ ip -n ${s_ns} link del bond0
+
+ ip -n ${s_ns} link add bond0 type bond $param
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link set eth$i master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ # Wait for IPv6 address ready as it needs DAD
+ slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null
+}
+
+server_destroy()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ link_num=$((link_num -1))
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link del eth${i}
+ done
+ ip netns del ${s_ns}
+}
+
+client_create()
+{
+ ip netns add ${c_ns}
+ ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+ ip -n ${g_ns} link set c0 up
+ ip -n ${g_ns} link set c0 master br0
+
+ ip -n ${c_ns} link set eth0 up
+ ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+ ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+ ip -n ${c_ns} link del eth0
+ ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+bond_check_connection()
+{
+ local msg=${1:-"check connection"}
+
+ slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null
+ ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping failed"
+ ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
new file mode 100644
index 000000000000..23a2932301cc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------------------+
+# | bond0 |
+# | + | Server
+# | eth0 | eth1 eth2 | 192.0.2.1/24
+# | +-------------------+ | 2001:db8::1/24
+# | | | | |
+# +-------------------------------------+
+# | | |
+# +-------------------------------------+
+# | | | | |
+# | +---+---------+---------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------------+-------------+ | 2001:db8::254/24
+# | | |
+# +-------------------------------------+
+# |
+# +-------------------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------------------+
+
+source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+
+ # Add the extra device as we use 3 down links for bond0
+ local i=2
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644
index 000000000000..991494376223
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -0,0 +1,21 @@
+CONFIG_BONDING=y
+CONFIG_BRIDGE=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_DUMMY=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_IPV6=y
+CONFIG_IPVLAN=y
+CONFIG_MACVLAN=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NLMON=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
new file mode 100755
index 000000000000..e6fa24eded5b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+ bond_cleanup_mode1
+ bond_cleanup_mode4
+ bond_listen_lacpdu_multicast_case_down
+ bond_listen_lacpdu_multicast_case_up
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/lag_lib.sh
+
+
+destroy()
+{
+ local ifnames=(dummy1 dummy2 bond1 mv0)
+ local ifname
+
+ for ifname in "${ifnames[@]}"; do
+ ip link del "$ifname" &>/dev/null
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ destroy
+}
+
+
+# bond driver control paths vary between modes that have a primary slave
+# (bond_uses_primary()) and others. Test both kinds of modes.
+
+bond_cleanup_mode1()
+{
+ RET=0
+
+ test_LAG_cleanup "bonding" "active-backup"
+}
+
+bond_cleanup_mode4() {
+ RET=0
+
+ test_LAG_cleanup "bonding" "802.3ad"
+}
+
+bond_listen_lacpdu_multicast()
+{
+ # Initial state of bond device, up | down
+ local init_state=$1
+ local lacpdu_mc="01:80:c2:00:00:02"
+
+ ip link add dummy1 type dummy
+ ip link add bond1 "$init_state" type bond mode 802.3ad
+ ip link set dev dummy1 master bond1
+ if [ "$init_state" = "down" ]; then
+ ip link set dev bond1 up
+ fi
+
+ grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address not present on slave (1)"
+
+ ip link set dev bond1 down
+
+ not grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address still present on slave"
+
+ ip link set dev bond1 up
+
+ grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address not present on slave (2)"
+
+ cleanup
+
+ log_test "bonding LACPDU multicast address to slave (from bond $init_state)"
+}
+
+# The LACPDU mc addr is added by different paths depending on the initial state
+# of the bond when enslaving a device. Test both cases.
+
+bond_listen_lacpdu_multicast_case_down()
+{
+ RET=0
+
+ bond_listen_lacpdu_multicast "down"
+}
+
+bond_listen_lacpdu_multicast_case_up()
+{
+ RET=0
+
+ bond_listen_lacpdu_multicast "up"
+}
+
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
new file mode 100644
index 000000000000..bf9bcd1b5ec0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NAMESPACES=""
+
+# Test that a link aggregation device (bonding, team) removes the hardware
+# addresses that it adds on its underlying devices.
+test_LAG_cleanup()
+{
+ local driver=$1
+ local mode=$2
+ local ucaddr="02:00:00:12:34:56"
+ local addr6="fe80::78:9abc/64"
+ local mcaddr="33:33:ff:78:9a:bc"
+ local name
+
+ ip link add dummy1 type dummy
+ ip link add dummy2 type dummy
+ if [ "$driver" = "bonding" ]; then
+ name="bond1"
+ ip link add "$name" up type bond mode "$mode"
+ ip link set dev dummy1 master "$name"
+ ip link set dev dummy2 master "$name"
+ elif [ "$driver" = "team" ]; then
+ name="team0"
+ teamd -d -c '
+ {
+ "device": "'"$name"'",
+ "runner": {
+ "name": "'"$mode"'"
+ },
+ "ports": {
+ "dummy1":
+ {},
+ "dummy2":
+ {}
+ }
+ }
+ '
+ ip link set dev "$name" up
+ else
+ check_err 1
+ log_test test_LAG_cleanup ": unknown driver \"$driver\""
+ return
+ fi
+
+ # Used to test dev->uc handling
+ ip link add mv0 link "$name" up address "$ucaddr" type macvlan
+ # Used to test dev->mc handling
+ ip address add "$addr6" dev "$name"
+
+ # Check that addresses were added as expected
+ (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "macvlan unicast address not found on a slave"
+
+ # mcaddr is added asynchronously by addrconf_dad_work(), use busywait
+ (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "IPv6 solicited-node multicast mac address not found on a slave"
+
+ ip link set dev "$name" down
+ ip link del "$name"
+
+ not grep_bridge_fdb "$ucaddr" bridge fdb show >/dev/null
+ check_err $? "macvlan unicast address still present on a slave"
+
+ not grep_bridge_fdb "$mcaddr" bridge fdb show >/dev/null
+ check_err $? "IPv6 solicited-node multicast mac address still present on a slave"
+
+ cleanup
+
+ log_test "$driver cleanup mode $mode"
+}
+
+# Build a generic 2 node net namespace with 2 connections
+# between the namespaces
+#
+# +-----------+ +-----------+
+# | node1 | | node2 |
+# | | | |
+# | | | |
+# | eth0 +-------+ eth0 |
+# | | | |
+# | eth1 +-------+ eth1 |
+# | | | |
+# +-----------+ +-----------+
+lag_setup2x2()
+{
+ local state=${1:-down}
+ local namespaces="lag_node1 lag_node2"
+
+ # create namespaces
+ for n in ${namespaces}; do
+ ip netns add ${n}
+ done
+
+ # wire up namespaces
+ ip link add name lag1 type veth peer name lag1-end
+ ip link set dev lag1 netns lag_node1 $state name eth0
+ ip link set dev lag1-end netns lag_node2 $state name eth0
+
+ ip link add name lag1 type veth peer name lag1-end
+ ip link set dev lag1 netns lag_node1 $state name eth1
+ ip link set dev lag1-end netns lag_node2 $state name eth1
+
+ NAMESPACES="${namespaces}"
+}
+
+# cleanup all lag related namespaces
+lag_cleanup()
+{
+ for n in ${NAMESPACES}; do
+ ip netns delete ${n} >/dev/null 2>&1 || true
+ done
+}
+
+SWITCH="lag_node1"
+CLIENT="lag_node2"
+CLIENTIP="172.20.2.1"
+SWITCHIP="172.20.2.2"
+
+lag_setup_network()
+{
+ lag_setup2x2 "down"
+
+ # create switch
+ ip netns exec ${SWITCH} ip link add br0 up type bridge
+ ip netns exec ${SWITCH} ip link set eth0 master br0 up
+ ip netns exec ${SWITCH} ip link set eth1 master br0 up
+ ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0
+}
+
+lag_reset_network()
+{
+ ip netns exec ${CLIENT} ip link del bond0
+ ip netns exec ${SWITCH} ip link set eth0 up
+ ip netns exec ${SWITCH} ip link set eth1 up
+}
+
+create_bond()
+{
+ # create client
+ ip netns exec ${CLIENT} ip link set eth0 down
+ ip netns exec ${CLIENT} ip link set eth1 down
+
+ ip netns exec ${CLIENT} ip link add bond0 type bond $@
+ ip netns exec ${CLIENT} ip link set eth0 master bond0
+ ip netns exec ${CLIENT} ip link set eth1 master bond0
+ ip netns exec ${CLIENT} ip link set bond0 up
+ ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0
+}
+
+test_bond_recovery()
+{
+ RET=0
+
+ create_bond $@
+
+ # verify connectivity
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+ check_err $? "No connectivity"
+
+ # force the links of the bond down
+ ip netns exec ${SWITCH} ip link set eth0 down
+ sleep 2
+ ip netns exec ${SWITCH} ip link set eth0 up
+ ip netns exec ${SWITCH} ip link set eth1 down
+
+ # re-verify connectivity
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+
+ local rc=$?
+ check_err $rc "Bond failed to recover"
+ log_test "$1 ($2) bond recovery"
+ lag_reset_network
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
new file mode 100755
index 000000000000..9d26ab4cad0b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# When the bond is configured with down/updelay and the link state of
+# slave members flaps if there are no remaining members up the bond
+# should immediately select a member to bring up. (from bonding.txt
+# section 13.1 paragraph 4)
+#
+# +-------------+ +-----------+
+# | client | | switch |
+# | | | |
+# | +--------| link1 |-----+ |
+# | | +-------+ | |
+# | | | | | |
+# | | +-------+ | |
+# | | bond | link2 | Br0 | |
+# +-------------+ +-----------+
+# 172.20.2.1 172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+ lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 1 miimon 100 updelay 0
+test_bond_recovery mode 1 miimon 100 updelay 200
+test_bond_recovery mode 1 miimon 100 updelay 500
+test_bond_recovery mode 1 miimon 100 updelay 1000
+test_bond_recovery mode 1 miimon 100 updelay 2000
+test_bond_recovery mode 1 miimon 100 updelay 5000
+test_bond_recovery mode 1 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
new file mode 100755
index 000000000000..2d275b3e47dd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# When the bond is configured with down/updelay and the link state of
+# slave members flaps if there are no remaining members up the bond
+# should immediately select a member to bring up. (from bonding.txt
+# section 13.1 paragraph 4)
+#
+# +-------------+ +-----------+
+# | client | | switch |
+# | | | |
+# | +--------| link1 |-----+ |
+# | | +-------+ | |
+# | | | | | |
+# | | +-------+ | |
+# | | bond | link2 | Br0 | |
+# +-------------+ +-----------+
+# 172.20.2.1 172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+ lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 2 miimon 100 updelay 0
+test_bond_recovery mode 2 miimon 100 updelay 200
+test_bond_recovery mode 2 miimon 100 updelay 500
+test_bond_recovery mode 2 miimon 100 updelay 1000
+test_bond_recovery mode 2 miimon 100 updelay 2000
+test_bond_recovery mode 2 miimon 100 updelay 5000
+test_bond_recovery mode 2 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
new file mode 100755
index 000000000000..477cc9379500
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
@@ -0,0 +1,361 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This selftest exercises trying to have multiple netpoll users at the same
+# time.
+#
+# This selftest has multiple smalls test inside, and the goal is to
+# get interfaces with bonding and netconsole in different orders in order
+# to catch any possible issue.
+#
+# The main test composes of four interfaces being created using netdevsim; two
+# of them are bonded to serve as the netconsole's transmit interface. The
+# remaining two interfaces are similarly bonded and assigned to a separate
+# network namespace, which acts as the receive interface, where socat monitors
+# for incoming messages.
+#
+# A netconsole message is then sent to ensure it is properly received across
+# this configuration.
+#
+# Later, run a few other tests, to make sure that bonding and netconsole
+# cannot coexist.
+#
+# The test's objective is to exercise netpoll usage when managed simultaneously
+# by multiple subsystems (netconsole and bonding).
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+modprobe bonding 2> /dev/null || true
+modprobe veth 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup_bond EXIT
+
+FORMAT="extended"
+IP_VERSION="ipv4"
+VETH0="veth"$(( RANDOM % 256))
+VETH1="veth"$((256 + RANDOM % 256))
+TXNS=""
+RXNS=""
+
+# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with
+# the bonding interfaces
+function setup_bonding_ifaces() {
+ local RAND=$(( RANDOM % 100 ))
+ BOND_TX_MAIN_IF="bond_tx_$RAND"
+ BOND_RX_MAIN_IF="bond_rx_$RAND"
+
+ # Setup TX
+ if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+ then
+ echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2
+ # only clean nsim ifaces and namespace. Nothing else has been
+ # initialized
+ cleanup_bond_nsim
+ trap - EXIT
+ exit "${ksft_skip}"
+ fi
+
+ # create_netdevsim() got the interface up, but it needs to be down
+ # before being enslaved.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # Setup RX
+ ip -n "${RXNS}" \
+ link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX_MAIN_IF}" up
+
+ export DSTIF="${BOND_RX_MAIN_IF}"
+ export SRCIF="${BOND_TX_MAIN_IF}"
+}
+
+# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface
+# and the other two will be bond to the RX interface (on the other namespace)
+function create_ifaces_bond() {
+ BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}")
+ BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}")
+ BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}")
+ BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}")
+}
+
+# netdevsim link BOND_TX to BOND_RX interfaces
+function link_ifaces_bond() {
+ local BOND_TX1_SLAVE_IFIDX
+ local BOND_TX2_SLAVE_IFIDX
+ local BOND_RX1_SLAVE_IFIDX
+ local BOND_RX2_SLAVE_IFIDX
+ local TXNS_FD
+ local RXNS_FD
+
+ BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex)
+ BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex)
+ BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex)
+ BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex)
+
+ exec {TXNS_FD}</var/run/netns/"${TXNS}"
+ exec {RXNS_FD}</var/run/netns/"${RXNS}"
+
+ # Linking TX ifaces to the RX ones (on the other namespace)
+ echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+ echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+
+ exec {TXNS_FD}<&-
+ exec {RXNS_FD}<&-
+}
+
+function create_all_ifaces() {
+ # setup_ns function is coming from lib.sh
+ setup_ns TXNS RXNS
+ export NAMESPACE="${RXNS}"
+
+ # Create two interfaces for RX and two for TX
+ create_ifaces_bond
+ # Link netlink ifaces
+ link_ifaces_bond
+}
+
+# configure DSTIF and SRCIF IPs
+function configure_ifaces_ips() {
+ local IP_VERSION=${1:-"ipv4"}
+ select_ipv4_or_ipv6 "${IP_VERSION}"
+
+ ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip -n "${RXNS}" link set "${DSTIF}" up
+
+ ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip -n "${TXNS}" link set "${SRCIF}" up
+}
+
+function test_enable_netpoll_on_enslaved_iface() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+
+ # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and
+ # linked to BOND_RX1_SLAVE_IF inside the namespace.
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # This should fail with the following message in dmesg:
+ # netpoll: netconsole: ethX is a slave device, aborting
+ set +e
+ enable_netcons_ns 2> /dev/null
+ set -e
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Bonding and netpoll cannot co-exists." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_delete_bond_and_reenable_target() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond
+
+ # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore
+ # netpoll can be plugged in there
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # this should work, since the interface is not enslaved
+ enable_netcons_ns
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Unable to start netpoll on an unbond iface." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Send a netconsole message to the netconsole target
+function test_send_netcons_msg_through_bond_iface() {
+ # Listen for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+}
+
+# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF.
+# Given BOND_TX_MAIN_IF was deleted, recreate it first
+function test_enslave_netcons_enabled_iface {
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # recreate the bonding iface. it got deleted by previous
+ # test (test_delete_bond_and_reenable_target)
+ ip -n "${TXNS}" \
+ link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+
+ # sub-interface need to be down before attaching to bonding
+ # This will also disable netconsole.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Get netconsole enabled on a bonding interface and attach a second
+# sub-interface.
+function test_enslave_iface_to_bond {
+ # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now
+ echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name
+ enable_netcons_ns
+
+ # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is
+ # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_enslave_iff_disabled_netpoll_iface {
+ local ret
+
+ # Create two interfaces. veth interfaces it known to have
+ # IFF_DISABLE_NETPOLL set
+ if ! ip link add "${VETH0}" type veth peer name "${VETH1}"
+ then
+ echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2
+ exit "${ksft_skip}"
+ fi
+ set +e
+ # This will print RTNETLINK answers: Device or resource busy
+ ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null
+ ret=$?
+ set -e
+ if [[ $ret -eq 0 ]]
+ then
+ echo "test failed: veth interface could not be enslaved"
+ exit "${ksft_fail}"
+ fi
+}
+
+# Given that netconsole picks the current net namespace, we need to enable it
+# from inside the TXNS namespace
+function enable_netcons_ns() {
+ ip netns exec "${TXNS}" sh -c \
+ "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled"
+}
+
+####################
+# Tests start here #
+####################
+
+# Create regular interfaces using netdevsim and link them
+create_all_ifaces
+
+# Setup the bonding interfaces
+# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF
+# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF
+setup_bonding_ifaces
+
+# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF
+configure_ifaces_ips "${IP_VERSION}"
+
+_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}"
+enable_netcons_ns
+set_user_data
+
+# Test #1 : Create an bonding interface and attach netpoll into
+# the bonding interface. Netconsole/netpoll should work on
+# the bonding interface.
+test_send_netcons_msg_through_bond_iface
+echo "test #1: netpoll on bonding interface worked. Test passed" >&2
+
+# Test #2: Attach netpoll to an enslaved interface
+# Try to attach netpoll to an enslaved sub-interface (while still being part of
+# a bonding interface), which shouldn't be allowed
+test_enable_netpoll_on_enslaved_iface
+echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2
+
+# Test #3: Unplug the sub-interface from bond and enable netconsole
+# Detach the interface from a bonding interface and attach netpoll again
+test_delete_bond_and_reenable_target
+echo "test #3: Able to attach to an unbound interface. Test passed." >&2
+
+# Test #4: Enslave a sub-interface that had netconsole enabled
+# Try to enslave an interface that has netconsole/netpoll enabled.
+# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it
+test_enslave_netcons_enabled_iface
+echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2
+
+# Test #5: Enslave a sub-interface to a bonding interface
+# Enslave an interface to a bond interface that has netpoll attached
+# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of
+# it. Netconsole is currently disabled
+test_enslave_iface_to_bond
+echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2
+
+# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface
+# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is
+# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface
+# and it should fail, with netpoll: veth0 doesn't support polling
+test_enslave_iff_disabled_netpoll_iface
+echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2
+
+cleanup_bond
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644
index 000000000000..79b65bdf05db
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -0,0 +1 @@
+timeout=1200
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
new file mode 100644
index 000000000000..77ccf83d87e0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/config
@@ -0,0 +1,10 @@
+CONFIG_CONFIGFS_FS=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_INET_PSP=y
+CONFIG_IPV6=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
new file mode 100644
index 000000000000..7994bd0e5c44
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS := \
+ bridge_locked_port.sh \
+ bridge_mdb.sh \
+ bridge_mld.sh \
+ bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
+ bridge_vlan_unaware.sh \
+ local_termination.sh \
+ no_forwarding.sh \
+ tc_actions.sh \
+ test_bridge_fdb_stress.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+ forwarding.config \
+ run_net_forwarding_test.sh \
+# end of TEST_FILES
+
+TEST_INCLUDES := \
+ ../../../net/forwarding/bridge_locked_port.sh \
+ ../../../net/forwarding/bridge_mdb.sh \
+ ../../../net/forwarding/bridge_mld.sh \
+ ../../../net/forwarding/bridge_vlan_aware.sh \
+ ../../../net/forwarding/bridge_vlan_mcast.sh \
+ ../../../net/forwarding/bridge_vlan_unaware.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/local_termination.sh \
+ ../../../net/forwarding/no_forwarding.sh \
+ ../../../net/forwarding/tc_actions.sh \
+ ../../../net/forwarding/tc_common.sh \
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
new file mode 100755
index 000000000000..4106c0a102ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+testname=$(basename "${BASH_SOURCE[0]}")
+
+source "$libdir"/forwarding.config
+cd "$libdir"/../../../net/forwarding/ || exit 1
+source "./$testname" "$@"
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..74682151d04d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+cleanup() {
+ echo "Cleaning up"
+ kill $pid && wait $pid &> /dev/null
+ ip link del br0
+ echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2>&1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+ bridge fdb add 00:01:02:03:04:05 dev $eth master static
+ bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+ bridge fdb show > /dev/null
+ sleep 3
+ echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
new file mode 100644
index 000000000000..e894037d2e3e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "../../net/lib/ksft.h"
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8
+#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
+#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
+
+#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+static const char *addr6_src = "fdaa::2";
+static const char *addr6_dst = "fdaa::1";
+static const char *addr4_src = "192.168.1.200";
+static const char *addr4_dst = "192.168.1.100";
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+static bool ipip;
+static const int num_flush_id_cases = 6;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off, opt_ipproto_off;
+ int next_off;
+
+ if (ipip)
+ next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol);
+ else if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ /* Overridden later if exthdrs are used: */
+ opt_ipproto_off = ipproto_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else {
+ BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+ /* same size for HBH and Fragment extension header types */
+ optlen = MIN_EXTHDR_SIZE;
+ opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+ + offsetof(struct ip6_ext, ip6e_nxt);
+ }
+ }
+
+ /* this filter validates the following:
+ * - packet is IPv4/IPv6 according to the running test.
+ * - packet is TCP. Also handles the case of one extension header and then TCP.
+ * - checks the packet tcp dport equals to DPORT. Also handles the case of one
+ * extension header and then TCP.
+ */
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len, int protocol)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = protocol;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = protocol;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+
+ if (ipip) {
+ fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr),
+ IPPROTO_IPIP);
+ fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr),
+ payload_len, IPPROTO_TCP);
+ } else {
+ fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP);
+ }
+
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+
+ if (ipip) {
+ iph += 1;
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ }
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
+{
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+ struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+ char *exthdr_payload_start = (char *)(exthdr + 1);
+
+ exthdr->hdrlen = 0;
+ exthdr->nexthdr = IPPROTO_TCP;
+
+ memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph->nexthdr = exthdr_type;
+ iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+static void fix_ip4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+}
+
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+{
+ static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
+ bool send_three = false;
+ struct iphdr *iph1;
+ struct iphdr *iph2;
+ struct iphdr *iph3;
+
+ iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
+ iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
+ iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
+
+ create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
+ create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+
+ switch (tcase) {
+ case 0: /* DF=1, Incrementing - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 1: /* DF=1, Fixed - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 2: /* DF=0, Incrementing - should coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 3: /* DF=0, Fixed - should coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 4: /* DF=1, two packets incrementing, and one fixed - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+
+ case 5: /* DF=1, two packets fixed, and one incrementing - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+ }
+
+ fix_ip4_checksum(iph1);
+ fix_ip4_checksum(iph2);
+ write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
+ write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ if (send_three) {
+ fix_ip4_checksum(iph3);
+ write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
+{
+ for (int i = 0; i < num_flush_id_cases; i++) {
+ sleep(1);
+ send_flush_id_case(fd, daddr, i);
+ sleep(1);
+ write_packet(fd, fin_pkt, total_hdr_len, daddr);
+ }
+}
+
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+ sleep(1);
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 3;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ ip_ext_len = 0;
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = MIN_EXTHDR_SIZE;
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ const int fin_delay_us = 100 * 1000;
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ /* Adding sleep before sending FIN so that it is not
+ * received prior to other packets.
+ */
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ test_flush_id(txfd, &daddr, fin_pkt);
+ } else if (proto == PF_INET6) {
+ sleep(1);
+ send_fragment6(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ /* send IPv6 packets with ext header with same payload */
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ /* send IPv6 packets with ext header with different payload */
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ ksft_ready();
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* is_atomic checks */
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("ipv6 with ext header does coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "daddr", required_argument, NULL, 'd' },
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "ipip", no_argument, NULL, 'e' },
+ { "rx", no_argument, NULL, 'r' },
+ { "saddr", required_argument, NULL, 's' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'e':
+ ipip = true;
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case 'd':
+ addr4_dst = addr6_dst = optarg;
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 's':
+ addr4_src = addr6_src = optarg;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (ipip) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket) {
+ gro_sender();
+ } else {
+ /* Only the receiver exit status determines test success. */
+ gro_receiver();
+ fprintf(stderr, "Gro::%s test passed.\n", testname);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
new file mode 100755
index 000000000000..ba83713bf7b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+GRO (Generic Receive Offload) conformance tests.
+
+Validates that GRO coalescing works correctly by running the gro
+binary in different configurations and checking for correct packet
+coalescing behavior.
+
+Test cases:
+ - data: Data packets with same size/headers and correct seq numbers coalesce
+ - ack: Pure ACK packets do not coalesce
+ - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
+ - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
+ - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
+ - large: Packets larger than GRO_MAX_SIZE don't coalesce
+"""
+
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, KsftXfailEx
+from lib.py import cmd, defer, bkg, ip
+from lib.py import ksft_variants
+
+
+def _resolve_dmac(cfg, ipver):
+ """
+ Find the destination MAC address remote host should use to send packets
+ towards the local host. It may be a router / gateway address.
+ """
+
+ attr = "dmac" + ipver
+ # Cache the response across test cases
+ if hasattr(cfg, attr):
+ return getattr(cfg, attr)
+
+ route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
+ json=True, host=cfg.remote)[0]
+ gw = route.get("gateway")
+ # Local L2 segment, address directly
+ if not gw:
+ setattr(cfg, attr, cfg.dev['address'])
+ return getattr(cfg, attr)
+
+ # ping to make sure neighbor is resolved,
+ # bind to an interface, for v6 the GW is likely link local
+ cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
+
+ neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
+ json=True, host=cfg.remote)[0]
+ setattr(cfg, attr, neigh['lladdr'])
+ return getattr(cfg, attr)
+
+
+def _write_defer_restore(cfg, path, val, defer_undo=False):
+ with open(path, "r", encoding="utf-8") as fp:
+ orig_val = fp.read().strip()
+ if str(val) == orig_val:
+ return
+ with open(path, "w", encoding="utf-8") as fp:
+ fp.write(val)
+ if defer_undo:
+ defer(_write_defer_restore, cfg, path, orig_val)
+
+
+def _set_mtu_restore(dev, mtu, host):
+ if dev['mtu'] < mtu:
+ ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
+ defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
+
+
+def _setup(cfg, test_name):
+ """ Setup hardware loopback mode for GRO testing. """
+
+ if not hasattr(cfg, "bin_remote"):
+ cfg.bin_local = cfg.test_dir / "gro"
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ # "large" test needs at least 4k MTU
+ if test_name == "large":
+ _set_mtu_restore(cfg.dev, 4096, None)
+ _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
+
+ flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+ irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+ _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+ _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+ try:
+ # Disable TSO for local tests
+ cfg.require_nsim() # will raise KsftXfailEx if not running on nsim
+
+ cmd(f"ethtool -K {cfg.ifname} gro on tso off")
+ cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+ except KsftXfailEx:
+ pass
+
+def _gro_variants():
+ """Generator that yields all combinations of protocol and test types."""
+
+ for protocol in ["ipv4", "ipv6", "ipip"]:
+ for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
+ yield protocol, test_name
+
+
+@ksft_variants(_gro_variants())
+def test(cfg, protocol, test_name):
+ """Run a single GRO test with retries."""
+
+ ipver = "6" if protocol[-1] == "6" else "4"
+ cfg.require_ipver(ipver)
+
+ _setup(cfg, test_name)
+
+ base_cmd_args = [
+ f"--{protocol}",
+ f"--dmac {_resolve_dmac(cfg, ipver)}",
+ f"--smac {cfg.remote_dev['address']}",
+ f"--daddr {cfg.addr_v[ipver]}",
+ f"--saddr {cfg.remote_addr_v[ipver]}",
+ f"--test {test_name}",
+ "--verbose"
+ ]
+ base_args = " ".join(base_cmd_args)
+
+ # Each test is run 6 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ max_retries = 6
+ for attempt in range(max_retries):
+ rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
+ tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"
+
+ fail_now = attempt >= max_retries - 1
+
+ with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
+ fail=fail_now) as rx_proc:
+ cmd(tx_cmd, host=cfg.remote)
+
+ if rx_proc.ret == 0:
+ return
+
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+ if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+ ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
+ return
+
+ ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
new file mode 100755
index 000000000000..c4fe049e9baa
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+import os
+import random
+from typing import Union
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
+
+
+def _get_hds_mode(cfg, netnl) -> str:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+ return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ ip("link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, prog))
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+ """
+ Helper for performing a hopefully unimportant IOCTL SET.
+ IOCTL does not support HDS, so it should not affect the HDS config.
+ """
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+
+ if 'tx' not in rings:
+ raise KsftSkipEx('setting Tx ring size not supported')
+
+ try:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+ except CmdExitFailure as e:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+ defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+ _get_hds_mode(cfg, netnl)
+
+
+def get_hds_thresh(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+
+def _hds_reset(cfg, netnl, rings) -> None:
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+
+ arg = {'header': {'dev-index': cfg.ifindex}}
+ if cur.get('tcp-data-split') != rings.get('tcp-data-split'):
+ # Try to reset to "unknown" first, we don't know if the setting
+ # was the default or user chose it. Default seems more likely.
+ arg['tcp-data-split'] = "unknown"
+ netnl.rings_set(arg)
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if cur['tcp-data-split'] == rings['tcp-data-split']:
+ del arg['tcp-data-split']
+ else:
+ # Try the explicit setting
+ arg['tcp-data-split'] = rings['tcp-data-split']
+ if cur.get('hds-thresh') != rings.get('hds-thresh'):
+ arg['hds-thresh'] = rings['hds-thresh']
+ if len(arg) > 1:
+ netnl.rings_set(arg)
+
+
+def _defer_reset_hds(cfg, netnl) -> Union[dict, None]:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' in rings or 'tcp-data-split' in rings:
+ defer(_hds_reset, cfg, netnl, rings)
+ except NlError as e:
+ pass
+
+
+def set_hds_enable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('enabled', rings['tcp-data-split'])
+
+def set_hds_disable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('disabled', rings['tcp-data-split'])
+
+def set_hds_thresh_zero(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+ ksft_eq(0, rings['hds-thresh'])
+
+def set_hds_thresh_random(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+
+ if rings['hds-thresh-max'] < 2:
+ raise KsftSkipEx('hds-thresh-max is too small')
+ elif rings['hds-thresh-max'] == 2:
+ hds_thresh = 1
+ else:
+ while True:
+ hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1)
+ if hds_thresh != rings['hds-thresh']:
+ break
+
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(hds_thresh, rings['hds-thresh'])
+
+def set_hds_thresh_max(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
+
+def set_hds_thresh_gt(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+ hds_gt = rings['hds-thresh-max'] + 1
+ with ksft_raises(NlError) as e:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
+ ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ _defer_reset_hds(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+ mode1 = _get_hds_mode(cfg, netnl)
+ _ioctl_ringparam_modify(cfg, netnl)
+ mode2 = _get_hds_mode(cfg, netnl)
+
+ ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+ """
+ Like set_xdp(), but we perturb the settings via the legacy ioctl.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _ioctl_ringparam_modify(cfg, netnl)
+
+ _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=3) as cfg:
+ ksft_run([get_hds,
+ get_hds_thresh,
+ set_hds_disable,
+ set_hds_enable,
+ set_hds_thresh_random,
+ set_hds_thresh_zero,
+ set_hds_thresh_max,
+ set_hds_thresh_gt,
+ set_xdp,
+ enabled_set_xdp,
+ ioctl,
+ ioctl_set_xdp,
+ ioctl_enabled_set_xdp],
+ args=(cfg, EthtoolFamily()))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
new file mode 100644
index 000000000000..46540468a775
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iou-zcrx
+ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
new file mode 100644
index 000000000000..9c163ba6feee
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+ echo -e '#include <liburing.h>\n' \
+ 'void *func = (void *)io_uring_register_ifq;\n' \
+ 'int main() {return 0;}' | \
+ $(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+ $(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
+
+TEST_PROGS = \
+ csum.py \
+ devlink_port_split.py \
+ devlink_rate_tc_bw.py \
+ devmem.py \
+ ethtool.sh \
+ ethtool_extended_state.sh \
+ ethtool_mm.sh \
+ ethtool_rmon.sh \
+ hw_stats_l3.sh \
+ hw_stats_l3_gre.sh \
+ iou-zcrx.py \
+ irq.py \
+ loopback.sh \
+ nic_timestamp.py \
+ pp_alloc_fail.py \
+ rss_api.py \
+ rss_ctx.py \
+ rss_flow_label.py \
+ rss_input_xfrm.py \
+ toeplitz.py \
+ tso.py \
+ xsk_reconfig.py \
+ #
+
+TEST_FILES := \
+ ethtool_lib.sh \
+ #
+
+TEST_INCLUDES := \
+ $(wildcard lib/py/*.py ../lib/py/*.py) \
+ ../../../net/lib.sh \
+ ../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/tc_common.sh \
+ #
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := \
+ ncdevmem \
+ toeplitz \
+# end of YNL_GEN_FILES
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
+include ../../../lib.mk
+
+# YNL build
+YNL_GENS := \
+ ethtool \
+ netdev \
+# end of YNL_GENS
+
+include ../../../net/ynl.mk
+
+include ../../../net/bpf.mk
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+$(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..2307aa001be1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,11 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_IO_URING=y
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_UDMABUF=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
new file mode 100755
index 000000000000..3e3a89a34afe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+from os import path
+
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen
+
+def test_receive(cfg, ipver="6", extra_args=None):
+ """Test local nic checksum receive. Remote host sends crafted packets."""
+ if not cfg.have_rx_csum:
+ raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
+
+ ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
+
+ rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(34000, proto="udp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_transmit(cfg, ipver="6", extra_args=None):
+ """Test local nic checksum transmit. Remote host verifies packets."""
+ if (not cfg.have_tx_csum_generic and
+ not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+ not (cfg.have_tx_csum_ipv6 and ipver == "6")):
+ raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
+
+ ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
+
+ # Cannot randomize input when calculating zero checksum
+ if extra_args != "-U -Z":
+ extra_args += " -r 1"
+
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
+
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(34000, proto="udp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
+ """Construct specific tests from the common template.
+
+ Most tests follow the same basic pattern, differing only in
+ Direction of the test and optional flags passed to csum."""
+ def f(cfg):
+ cfg.require_ipver(ipver)
+
+ if tx:
+ test_transmit(cfg, ipver, extra_args)
+ else:
+ test_receive(cfg, ipver, extra_args)
+
+ f.__name__ = f"ipv{ipver}_" + name
+ return f
+
+
+def check_nic_features(cfg) -> None:
+ """Test whether Tx and Rx checksum offload are enabled.
+
+ If the device under test has either off, then skip the relevant tests."""
+ cfg.have_tx_csum_generic = False
+ cfg.have_tx_csum_ipv4 = False
+ cfg.have_tx_csum_ipv6 = False
+ cfg.have_rx_csum = False
+
+ ethnl = EthtoolFamily()
+ features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ if f["name"] == "tx-checksum-ip-generic":
+ cfg.have_tx_csum_generic = True
+ elif f["name"] == "tx-checksum-ipv4":
+ cfg.have_tx_csum_ipv4 = True
+ elif f["name"] == "tx-checksum-ipv6":
+ cfg.have_tx_csum_ipv6 = True
+ elif f["name"] == "rx-checksum":
+ cfg.have_rx_csum = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ check_nic_features(cfg)
+
+ cfg.bin_local = cfg.net_lib_dir / "csum"
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ cases = []
+ for ipver in ["4", "6"]:
+ cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
+
+ cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
+
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
index 834066d465fc..2d84c7a0be6b 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
#
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
Port = collections.namedtuple('Port', 'bus_info name')
@@ -57,6 +59,8 @@ class devlink_ports(object):
assert stderr == ""
ports = json.loads(stdout)['port']
+ validate_devlink_output(ports, 'flavour')
+
for port in ports:
if dev in port:
if ports[port]['flavour'] == 'physical':
@@ -218,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev):
unsplit(port.bus_info)
+def validate_devlink_output(devlink_data, target_property=None):
+ """
+ Determine if test should be skipped by checking:
+ 1. devlink_data contains values
+ 2. The target_property exist in devlink_data
+ """
+ skip_reason = None
+ if any(devlink_data.values()):
+ if target_property:
+ skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+ for key in devlink_data:
+ if target_property in devlink_data[key]:
+ skip_reason = None
+ else:
+ skip_reason = 'devlink output is empty, test skipped'
+
+ if skip_reason:
+ print(skip_reason)
+ sys.exit(KSFT_SKIP)
+
+
def make_parser():
parser = argparse.ArgumentParser(description='A test for port splitting.')
parser.add_argument('--dev',
@@ -238,6 +263,7 @@ def main(cmdline=None):
stdout, stderr = run_command(cmd)
assert stderr == ""
+ validate_devlink_output(json.loads(stdout))
devs = json.loads(stdout)['dev']
dev = list(devs.keys())[0]
@@ -249,6 +275,7 @@ def main(cmdline=None):
ports = devlink_ports(dev)
+ found_max_lanes = False
for port in ports.if_names:
max_lanes = get_max_lanes(port.name)
@@ -271,6 +298,11 @@ def main(cmdline=None):
split_splittable_port(port, lane, max_lanes, dev)
lane //= 2
+ found_max_lanes = True
+
+ if not found_max_lanes:
+ print(f"Test not started, no port of device {dev} reports max_lanes")
+ sys.exit(KSFT_SKIP)
if __name__ == "__main__":
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..4e4faa9275bb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 20/80 split between TC3 and TC4
+ - This test should fail if bandwidth matches the 20/80 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 20/80
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 20/80 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 20/80
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+
+class BandwidthValidator:
+ """
+ Validates total bandwidth and individual shares with tolerance
+ relative to the overall total.
+ """
+
+ def __init__(self, shares):
+ self.tolerance_percent = 12
+ self.expected_total = sum(shares.values())
+ self.bounds = {}
+
+ for name, exp in shares.items():
+ self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (self.expected_total * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (self.expected_total * self.tolerance_percent / 100)
+
+ def bound(self, values):
+ """
+ Return True if all given values fall within tolerance.
+ """
+ for name, value in values.items():
+ low, high = self.bounds[name]
+ if not low <= value <= high:
+ return False
+ return True
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_vlans(cfg):
+ """
+ Sets up VLAN interfaces on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.2", "198.51.100.10"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration and devlink rate setup.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_vlans(cfg)
+
+
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
+ """
+ Synchronizes with peers and runs an iperf3-based bandwidth measurement
+ between the given endpoints. Returns average Gbps.
+ """
+ runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ try:
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+ return bw_gbps
+
+
+def run_bandwidth_test(cfg):
+ """
+ Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
+ """
+ def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+ results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.1", "198.51.100.2", 3),
+ ("198.51.100.9", "198.51.100.10", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_measure_bandwidth_thread,
+ args=(local_ip, remote_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
+
+ return results
+
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound({"total": total}):
+ return
+
+ low, high = validator.bounds["total"]
+
+ if total < low:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{low:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{high:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total:.1f} G)"
+ )
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel bandwidth measurements for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test(cfg)
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
+
+ return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+ "tc4": bw_data['tc4_percentage']})
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 20/80 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+
+ cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+ cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
new file mode 100755
index 000000000000..45c2d49d55b6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from os import path
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ksft_disruptive
+
+
+def require_devmem(cfg):
+ if not hasattr(cfg, "_devmem_probed"):
+ probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
+ cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
+ cfg._devmem_probed = True
+
+ if not cfg._devmem_supported:
+ raise KsftSkipEx("Test requires devmem support")
+
+
+@ksft_disruptive
+def check_rx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}"
+ listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+ wait_port_listen(port)
+ cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+ head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run([check_rx, check_tx, check_tx_chunks],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh
index dbb9fcf759e0..fa6953de6b6d 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh
@@ -10,7 +10,8 @@ ALL_TESTS="
different_speeds_autoneg_on
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
h1_create()
@@ -64,9 +65,8 @@ same_speeds_autoneg_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "speed $speed autoneg off"
- log_test "force of same speed autoneg off"
- log_info "speed = $speed"
+ check_err $? "ping with speed $speed autoneg off"
+ log_test "force speed $speed on both ends"
done
ethtool -s $h2 autoneg on
@@ -111,9 +111,8 @@ combination_of_neg_on_and_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1-speed=$speed autoneg off, h2 autoneg on"
- log_test "one side with autoneg off and another with autoneg on"
- log_info "force speed = $speed"
+ check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on"
+ log_test "force speed $speed vs. autoneg"
done
ethtool -s $h1 autoneg on
@@ -206,10 +205,9 @@ advertise_subset_of_speeds()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+ check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
- log_test "advertise subset of speeds"
- log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise"
+ log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise"
done
ethtool -s $h2 autoneg on
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
index 4b42dfd4efd1..a7584448416e 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
@@ -8,9 +8,12 @@ ALL_TESTS="
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
setup_prepare()
{
swp1=${NETIFS[p1]}
@@ -18,7 +21,7 @@ setup_prepare()
swp3=$NETIF_NO_CABLE
}
-ethtool_extended_state_check()
+ethtool_ext_state()
{
local dev=$1; shift
local expected_ext_state=$1; shift
@@ -30,21 +33,27 @@ ethtool_extended_state_check()
| sed -e 's/^[[:space:]]*//')
ext_state=$(echo $ext_state | cut -d "," -f1)
- [[ $ext_state == $expected_ext_state ]]
- check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
-
- [[ $ext_substate == $expected_ext_substate ]]
- check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ if [[ $ext_state != $expected_ext_state ]]; then
+ echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+ return 1
+ fi
+ if [[ $ext_substate != $expected_ext_substate ]]; then
+ echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ return 1
+ fi
}
autoneg()
{
+ local msg
+
RET=0
ip link set dev $swp1 up
- sleep 4
- ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected")
+ check_err $? "$msg"
log_test "Autoneg, No partner detected"
@@ -53,6 +62,8 @@ autoneg()
autoneg_force_mode()
{
+ local msg
+
RET=0
ip link set dev $swp1 up
@@ -65,12 +76,13 @@ autoneg_force_mode()
ethtool_set $swp1 speed $speed1 autoneg off
ethtool_set $swp2 speed $speed2 autoneg off
- sleep 4
- ethtool_extended_state_check $swp1 "Autoneg" \
- "No partner detected during force mode"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
- ethtool_extended_state_check $swp2 "Autoneg" \
- "No partner detected during force mode"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
log_test "Autoneg, No partner detected during force mode"
@@ -83,12 +95,14 @@ autoneg_force_mode()
no_cable()
{
+ local msg
+
RET=0
ip link set dev $swp3 up
- sleep 1
- ethtool_extended_state_check $swp3 "No cable"
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+ check_err $? "$msg"
log_test "No cable"
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
index b9bfb45085af..b9bfb45085af 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
new file mode 100755
index 000000000000..c301e735c8ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ manual_with_verification_h1_to_h2
+ manual_with_verification_h2_to_h1
+ manual_without_verification_h1_to_h2
+ manual_without_verification_h2_to_h1
+ manual_failed_verification_h1_to_h2
+ manual_failed_verification_h2_to_h1
+ lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+traffic_test()
+{
+ local if=$1; shift
+ local src=$1; shift
+ local num_pkts=10000
+ local before=
+ local after=
+ local delta=
+
+ if [ ${has_pmac_stats[$if]} = false ]; then
+ src="aggregate"
+ fi
+
+ before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+ after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ delta=$((after - before))
+
+ # Allow an extra 1% tolerance for random packets sent by the stack
+ [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+ # Processing state diagram whether the "send_r" variable (send response
+ # to verification frame) should be taken into consideration while the
+ # MAC Merge TX direction is disabled. That being said, at least the
+ # NXP ENETC does not, and requires tx-enabled on in order to respond to
+ # the link partner's verification frames.
+ ethtool --set-mm $rx tx-enabled on
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to finish
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_err "$?" "Verification did not succeed"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx tx-enabled off
+
+ log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+ manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+ manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'DISABLED'
+ check_err "$?" "Verification is not disabled"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+ log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+ manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+ manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $rx pmac-enabled off
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to time out
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_fail "$?" "Verification succeeded when it shouldn't have"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+ traffic_test $tx "emac"
+ check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx pmac-enabled on
+
+ log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+ manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+ manual_failed_verification $h2 $h1
+}
+
+smallest_supported_add_frag_size()
+{
+ local iface=$1
+ local rx_min_frag_size=
+
+ rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+ jq '.[]."rx-min-frag-size"')
+
+ if [ $rx_min_frag_size -le 60 ]; then
+ echo 0
+ elif [ $rx_min_frag_size -le 124 ]; then
+ echo 1
+ elif [ $rx_min_frag_size -le 188 ]; then
+ echo 2
+ elif [ $rx_min_frag_size -le 252 ]; then
+ echo 3
+ else
+ echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+ exit 1
+ fi
+}
+
+expected_add_frag_size()
+{
+ local iface=$1
+ local requested=$2
+ local min=$(smallest_supported_add_frag_size $iface)
+
+ [ $requested -le $min ] && echo $min || echo $requested
+}
+
+lldp_change_add_frag_size()
+{
+ local add_frag_size=$1
+ local pattern=
+
+ lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+ # Wait for TLVs to be received
+ sleep 2
+ pattern=$(printf "Additional fragment size: %d" \
+ $(expected_add_frag_size $h1 $add_frag_size))
+ lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
+}
+
+lldp()
+{
+ RET=0
+
+ systemctl start lldpad
+
+ # Configure the interfaces to receive and transmit LLDPDUs
+ lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+ lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+ # Enable the transmission of Additional Ethernet Capabilities TLV
+ lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+ lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+ # Wait for TLVs to be received
+ sleep 2
+
+ lldptool -i $h1 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h1 pMAC TX is not active"
+
+ lldptool -i $h2 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h2 pMAC TX is not active"
+
+ lldp_change_add_frag_size 3
+ check_err "$?" "addFragSize 3"
+
+ lldp_change_add_frag_size 2
+ check_err "$?" "addFragSize 2"
+
+ lldp_change_add_frag_size 1
+ check_err "$?" "addFragSize 1"
+
+ lldp_change_add_frag_size 0
+ check_err "$?" "addFragSize 0"
+
+ traffic_test $h1 "pmac"
+ check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+ traffic_test $h2 "pmac"
+ check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+ systemctl stop lldpad
+
+ log_test "LLDP"
+}
+
+h1_create()
+{
+ ip link set dev $h1 up
+
+ tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+
+ ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+ ip link set dev $h2 up
+
+ ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+ tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+}
+
+h1_destroy()
+{
+ ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+ tc qdisc del dev $h1 root
+
+ ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 root
+
+ ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+ ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+
+ if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+ has_pmac_stats[$netif]=true
+ else
+ has_pmac_stats[$netif]=false
+ echo "$netif does not report pMAC statistics, falling back to aggregate"
+ fi
+done
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
new file mode 100755
index 000000000000..8f60c1685ad4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ rmon_rx_histogram
+ rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+ local iface=$1; shift
+ local len=$1; shift
+ local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+ if [ $current -lt $required ]; then
+ ip link set dev $iface mtu $required || return 1
+ fi
+}
+
+bucket_test()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local bucket=$1; shift
+ local len=$1; shift
+ local num_rx=10000
+ local num_tx=20000
+ local expected=
+ local before=
+ local after=
+ local delta=
+
+ # Mausezahn does not include FCS bytes in its length - but the
+ # histogram counters do
+ len=$((len - ETH_FCS_LEN))
+ len=$((len > 0 ? len : 0))
+
+ before=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ # Send 10k one way and 20k in the other, to detect counters
+ # mapped to the wrong direction
+ $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+ $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+ after=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ delta=$((after - before))
+
+ expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+ # Allow some extra tolerance for other packets sent by the stack
+ [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local nbuckets=0
+ local step=
+
+ RET=0
+
+ while read -r -a bucket; do
+ step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+ for if in $iface $neigh; do
+ if ! ensure_mtu $if ${bucket[0]}; then
+ log_test_xfail "$if does not support the required MTU for $step"
+ return
+ fi
+ done
+
+ if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+ check_err 1 "$step failed"
+ return 1
+ fi
+ log_test "$step"
+ nbuckets=$((nbuckets + 1))
+ done < <(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+ if [ $nbuckets -eq 0 ]; then
+ log_test_xfail "$iface does not support $set histogram counters"
+ return
+ fi
+}
+
+rmon_rx_histogram()
+{
+ rmon_histogram $h1 $h2 rx
+ rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+ rmon_histogram $h1 $h2 tx
+ rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ for iface in $h1 $h2; do
+ netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ ip link set dev $iface up
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ for iface in $h2 $h1; do
+ ip link set dev $iface \
+ mtu ${netif_mtu[$iface]} \
+ down
+ done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
new file mode 100755
index 000000000000..67fafefc80be
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.200 + | | + $h2.200 |
+# | 192.0.2.1/28 | | | | 192.0.2.18/28 |
+# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.200 + + $rp2.200 |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | 2001:db8:1::2/64 2001:db8:2::2/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ respin_enablement
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ test_stats_report_rx
+ test_stats_report_tx
+ test_destroy_enabled
+ test_double_enable
+"
+NUM_NETIFS=4
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ vlan_destroy $h1 200
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+ ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+ ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ vlan_destroy $h2 200
+ simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+ ip stats set dev $rp1.200 l3_stats off
+ ip address del dev $rp1.200 2001:db8:1::2/64
+ ip address del dev $rp1.200 192.0.2.2/28
+ ip link del dev $rp1.200
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ router_rp1_200_create
+
+ ip link set dev $rp2 up
+ vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy $rp2 200
+ ip link set dev $rp2 down
+
+ router_rp1_200_destroy
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+ rp2mac=$(mac_get $rp2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+ ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+send_packets_rx_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+ $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+ $MZ $h2.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+ $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ "$@"
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ RET=0
+ ___test_stats "$dir" "$prot"
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+ __test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+ __test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+ __test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+ __test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+ log_info "Turning stats off and on again"
+ ip stats set dev $rp1.200 l3_stats off
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ router_rp1_200_destroy
+
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode none
+ ip stats set dev $rp1.200 l3_stats on
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ ip address flush dev $rp1.200
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+ log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+ ip stats set dev $rp1.200 l3_stats off
+ ip link del dev $rp1.200
+ router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+ __test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+ __test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+ RET=0
+
+ ip link del dev $rp1.200
+ router_rp1_200_create
+
+ log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+ RET=0
+ ___test_stats rx ipv4 \
+ ip stats set dev $rp1.200 l3_stats on
+ log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used')
+[[ $used = true ]]
+check_err $? "hw_stats_info.used=$used"
+log_test "l3_stats offloaded"
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..a94d92e1abce
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+ ping_ipv4
+ test_stats_rx
+ test_stats_tx
+"
+NUM_NETIFS=6
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/ipip_lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ ol1mac=$(mac_get $ol1)
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create gre $ol1 $ul1
+ sw2_flat_create gre $ol2 $ul2
+ ip stats set dev g1a l3_stats on
+ ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip stats set dev g1a l3_stats off
+ ip stats set dev g2a l3_stats off
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+ forwarding_restore
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1 -c 21 -d 20msec -p 100 \
+ -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+ local dev=$1; shift
+ local dir=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $dev $dir packets)
+ send_packets_ipv4
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $dev $dir packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+ test_stats g1a tx
+}
+
+test_stats_rx()
+{
+ test_stats g2a rx
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
new file mode 100644
index 000000000000..62456df947bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <liburing.h>
+
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
+#define SEND_SIZE (512 * 4096)
+#define min(a, b) \
+ ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+#define min_t(t, a, b) \
+ ({ \
+ t _ta = (a); \
+ t _tb = (b); \
+ min(_ta, _tb); \
+ })
+
+#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1))
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static int cfg_payload_len;
+static const char *cfg_ifname;
+static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
+static struct sockaddr_in6 cfg_addr;
+
+static char *payload;
+static void *area_ptr;
+static void *ring_ptr;
+static size_t ring_size;
+static struct io_uring_zcrx_rq rq_ring;
+static unsigned long area_token;
+static int connfd;
+static bool stop;
+static size_t received;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static inline size_t get_refill_ring_size(unsigned int rq_entries)
+{
+ size_t size;
+
+ ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
+ /* add space for the header (head/tail/etc.) */
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
+}
+
+static void setup_zcrx(struct io_uring *ring)
+{
+ unsigned int ifindex;
+ unsigned int rq_entries = 4096;
+ int ret;
+
+ ifindex = if_nametoindex(cfg_ifname);
+ if (!ifindex)
+ error(1, 0, "bad interface name: %s", cfg_ifname);
+
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+
+ ring_size = get_refill_ring_size(rq_entries);
+ ring_ptr = mmap(NULL,
+ ring_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+
+ struct io_uring_region_desc region_reg = {
+ .size = ring_size,
+ .user_addr = (__u64)(unsigned long)ring_ptr,
+ .flags = IORING_MEM_REGION_TYPE_USER,
+ };
+
+ struct io_uring_zcrx_area_reg area_reg = {
+ .addr = (__u64)(unsigned long)area_ptr,
+ .len = AREA_SIZE,
+ .flags = 0,
+ };
+
+ struct io_uring_zcrx_ifq_reg reg = {
+ .if_idx = ifindex,
+ .if_rxq = cfg_queue_id,
+ .rq_entries = rq_entries,
+ .area_ptr = (__u64)(unsigned long)&area_reg,
+ .region_ptr = (__u64)(unsigned long)&region_reg,
+ };
+
+ ret = io_uring_register_ifq(ring, &reg);
+ if (ret)
+ error(1, 0, "io_uring_register_ifq(): %d", ret);
+
+ rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
+ rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
+ rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes);
+ rq_ring.rq_tail = 0;
+ rq_ring.ring_entries = reg.rq_entries;
+
+ area_token = area_reg.rq_area_token;
+}
+
+static void add_accept(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0);
+ sqe->user_data = 1;
+}
+
+static void add_recvzc(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ if (cqe->res < 0)
+ error(1, 0, "accept()");
+ if (connfd)
+ error(1, 0, "Unexpected second connection");
+
+ connfd = cqe->res;
+ if (cfg_oneshot)
+ add_recvzc_oneshot(ring, connfd, page_size);
+ else
+ add_recvzc(ring, connfd);
+}
+
+static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ unsigned rq_mask = rq_ring.ring_entries - 1;
+ struct io_uring_zcrx_cqe *rcqe;
+ struct io_uring_zcrx_rqe *rqe;
+ struct io_uring_sqe *sqe;
+ uint64_t mask;
+ char *data;
+ ssize_t n;
+ int i;
+
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
+ stop = true;
+ return;
+ }
+
+ if (cqe->res < 0)
+ error(1, 0, "recvzc(): %d", cqe->res);
+
+ if (cfg_oneshot) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+ add_recvzc_oneshot(ring, connfd, page_size);
+ cfg_oneshot_recvs--;
+ }
+ } else if (!(cqe->flags & IORING_CQE_F_MORE)) {
+ add_recvzc(ring, connfd);
+ }
+
+ rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
+
+ n = cqe->res;
+ mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1;
+ data = (char *)area_ptr + (rcqe->off & mask);
+
+ for (i = 0; i < n; i++) {
+ if (*(data + i) != payload[(received + i)])
+ error(1, 0, "payload mismatch at %d", i);
+ }
+ received += n;
+
+ rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+ rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+ rqe->len = cqe->res;
+ io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+}
+
+static void server_loop(struct io_uring *ring)
+{
+ struct io_uring_cqe *cqe;
+ unsigned int count = 0;
+ unsigned int head;
+ int i, ret;
+
+ io_uring_submit_and_wait(ring, 1);
+
+ io_uring_for_each_cqe(ring, head, cqe) {
+ if (cqe->user_data == 1)
+ process_accept(ring, cqe);
+ else if (cqe->user_data == 2)
+ process_recvzc(ring, cqe);
+ else
+ error(1, 0, "unknown cqe");
+ count++;
+ }
+ io_uring_cq_advance(ring, count);
+}
+
+static void run_server(void)
+{
+ unsigned int flags = 0;
+ struct io_uring ring;
+ int fd, enable, ret;
+ uint64_t tstop;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ enable = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+ if (ret < 0)
+ error(1, 0, "setsockopt(SO_REUSEADDR)");
+
+ ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, 0, "bind()");
+
+ if (listen(fd, 1024) < 0)
+ error(1, 0, "listen()");
+
+ flags |= IORING_SETUP_COOP_TASKRUN;
+ flags |= IORING_SETUP_SINGLE_ISSUER;
+ flags |= IORING_SETUP_DEFER_TASKRUN;
+ flags |= IORING_SETUP_SUBMIT_ALL;
+ flags |= IORING_SETUP_CQE32;
+
+ io_uring_queue_init(512, &ring, flags);
+
+ setup_zcrx(&ring);
+
+ add_accept(&ring, fd);
+
+ tstop = gettimeofday_ms() + 5000;
+ while (!stop && gettimeofday_ms() < tstop)
+ server_loop(&ring);
+
+ if (!stop)
+ error(1, 0, "test failed\n");
+}
+
+static void run_client(void)
+{
+ ssize_t to_send = cfg_send_size;
+ ssize_t sent = 0;
+ ssize_t chunk, res;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)))
+ error(1, 0, "connect()");
+
+ while (to_send) {
+ void *src = &payload[sent];
+
+ chunk = min_t(ssize_t, cfg_payload_len, to_send);
+ res = send(fd, src, chunk, 0);
+ if (res < 0)
+ error(1, 0, "send(): %zd", sent);
+ sent += res;
+ to_send -= res;
+ }
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> "
+ "-l<payload_size> -i<ifname> -q<rxq_id>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = SEND_SIZE -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'q':
+ cfg_queue_id = strtoul(optarg, NULL, 0);
+ break;
+ case 'o': {
+ cfg_oneshot = true;
+ cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+ break;
+ }
+ case 'z':
+ cfg_send_size = strtoul(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Receiver cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "receiver address parse error: %s", addr);
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-l: payload exceeds max (%d)", max_payload_len);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+ int i;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
+ parse_opts(argc, argv);
+
+ for (i = 0; i < SEND_SIZE; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
new file mode 100755
index 000000000000..712c806508b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+from os import path
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+
+
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
+ return (output['rx'], output['hds-thresh'])
+
+
+def _get_combined_channels(cfg):
+ output = ethtool(f"-l {cfg.ifname}").stdout
+ values = re.findall(r'Combined:\s+(\d+)', output)
+ return int(values[1])
+
+
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def test_zcrx(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
new file mode 100755
index 000000000000..0699d6a8b4e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_ge, ksft_eq
+from lib.py import KsftSkipEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+
+def read_affinity(irq) -> str:
+ with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp:
+ return fp.read().lstrip("0,").strip()
+
+
+def write_affinity(irq, what) -> str:
+ if what != read_affinity(irq):
+ with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp:
+ fp.write(what)
+
+
+def check_irqs_reported(cfg) -> None:
+ """ Check that device reports IRQs for NAPI instances """
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ irqs = sum(['irq' in x for x in napis])
+
+ ksft_ge(irqs, 1)
+ ksft_eq(irqs, len(napis))
+
+
+def _check_reconfig(cfg, reconfig_cb) -> None:
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ for n in reversed(napis):
+ if 'irq' in n:
+ break
+ else:
+ raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}")
+
+ old = read_affinity(n['irq'])
+ # pick an affinity that's not the current one
+ new = "3" if old != "3" else "5"
+ write_affinity(n['irq'], new)
+ defer(write_affinity, n['irq'], old)
+
+ reconfig_cb(cfg)
+
+ ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig")
+
+
+def check_reconfig_queues(cfg) -> None:
+ def reconfig(cfg) -> None:
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} 1")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ _check_reconfig(cfg, reconfig)
+
+
+def check_reconfig_xdp(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip("link set dev %s down" % cfg.ifname)
+ ip("link set dev %s up" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ ksft_run([check_irqs_reported, check_reconfig_queues,
+ check_reconfig_xdp, check_down],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
new file mode 100644
index 000000000000..766bfc4ad842
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily, PSPFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup, ksft_variants, KsftNamedVariant
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+ from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none",
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
+except ModuleNotFoundError as e:
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
+ sys.exit(4)
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh
index 8f4057310b5b..5acc3ff820aa 100755
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ b/tools/testing/selftests/drivers/net/hw/loopback.sh
@@ -6,8 +6,9 @@ ksft_skip=4
ALL_TESTS="loopback_test"
NUM_NETIFS=2
-source tc_common.sh
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
new file mode 100644
index 000000000000..3288ed04ce08
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -0,0 +1,1524 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
+ *
+ * On client:
+ * echo -n "hello\nworld" | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ * head -c 1G | \
+ * nc <server IP> 5201 -p 5201
+ *
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ * head -c 1M | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ */
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+#include <poll.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
+#include <linux/udmabuf.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <linux/ethtool_netlink.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include "ethtool-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+#define MAX_IOV 1024
+
+static size_t max_chunk;
+static char *server_ip;
+static char *client_ip;
+static char *port;
+static size_t do_validation;
+static int start_queue = -1;
+static int num_queues = -1;
+static char *ifname;
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
+
+/* System state loaded by current_config_load() */
+#define MAX_FLOWS 8
+static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, };
+
+struct memory_buffer {
+ int fd;
+ size_t size;
+
+ int devfd;
+ int memfd;
+ char *buf_mem;
+};
+
+struct memory_provider {
+ struct memory_buffer *(*alloc)(size_t size);
+ void (*free)(struct memory_buffer *ctx);
+ void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+ void *src, int n);
+ void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
+ size_t off, int n);
+};
+
+static void pr_err(const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "%s: ", TEST_PREFIX);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ if (errno != 0)
+ fprintf(stderr, ": %s", strerror(errno));
+ fprintf(stderr, "\n");
+}
+
+static struct memory_buffer *udmabuf_alloc(size_t size)
+{
+ struct udmabuf_create create;
+ struct memory_buffer *ctx;
+ int ret;
+
+ ctx = malloc(sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+
+ ctx->devfd = open("/dev/udmabuf", O_RDWR);
+ if (ctx->devfd < 0) {
+ pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]");
+ goto err_free_ctx;
+ }
+
+ ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+ if (ctx->memfd < 0) {
+ pr_err("[skip,no-memfd]");
+ goto err_close_dev;
+ }
+
+ ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0) {
+ pr_err("[skip,fcntl-add-seals]");
+ goto err_close_memfd;
+ }
+
+ ret = ftruncate(ctx->memfd, size);
+ if (ret == -1) {
+ pr_err("[FAIL,memfd-truncate]");
+ goto err_close_memfd;
+ }
+
+ memset(&create, 0, sizeof(create));
+
+ create.memfd = ctx->memfd;
+ create.offset = 0;
+ create.size = size;
+ ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
+ if (ctx->fd < 0) {
+ pr_err("[FAIL, create udmabuf]");
+ goto err_close_fd;
+ }
+
+ ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ ctx->fd, 0);
+ if (ctx->buf_mem == MAP_FAILED) {
+ pr_err("[FAIL, map udmabuf]");
+ goto err_close_fd;
+ }
+
+ return ctx;
+
+err_close_fd:
+ close(ctx->fd);
+err_close_memfd:
+ close(ctx->memfd);
+err_close_dev:
+ close(ctx->devfd);
+err_free_ctx:
+ free(ctx);
+ return NULL;
+}
+
+static void udmabuf_free(struct memory_buffer *ctx)
+{
+ munmap(ctx->buf_mem, ctx->size);
+ close(ctx->fd);
+ close(ctx->memfd);
+ close(ctx->devfd);
+ free(ctx);
+}
+
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+ void *src, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst->buf_mem + off, src, n);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
+ size_t off, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START;
+ ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst, src->buf_mem + off, n);
+
+ sync.flags = DMA_BUF_SYNC_END;
+ ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static struct memory_provider udmabuf_memory_provider = {
+ .alloc = udmabuf_alloc,
+ .free = udmabuf_free,
+ .memcpy_to_device = udmabuf_memcpy_to_device,
+ .memcpy_from_device = udmabuf_memcpy_from_device,
+};
+
+static struct memory_provider *provider = &udmabuf_memory_provider;
+
+static void print_nonzero_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ putchar(p[i]);
+}
+
+int validate_buffer(void *line, size_t size)
+{
+ static unsigned char seed = 1;
+ unsigned char *ptr = line;
+ unsigned char expected;
+ static int errors;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ expected = seed ? seed : '\n';
+ if (ptr[i] != expected) {
+ fprintf(stderr,
+ "Failed validation: expected=%u, actual=%u, index=%lu\n",
+ expected, ptr[i], i);
+ errors++;
+ if (errors > 20) {
+ pr_err("validation failed");
+ return -1;
+ }
+ }
+ seed++;
+ if (seed == do_validation)
+ seed = 0;
+ }
+
+ fprintf(stdout, "Validated buffer\n");
+ return 0;
+}
+
+static int
+__run_command(char *out, size_t outlen, const char *cmd, va_list args)
+{
+ char command[256];
+ FILE *fp;
+
+ vsnprintf(command, sizeof(command), cmd, args);
+
+ fprintf(stderr, "Running: %s\n", command);
+ fp = popen(command, "r");
+ if (!fp)
+ return -1;
+ if (out) {
+ size_t len;
+
+ if (!fgets(out, outlen, fp))
+ return -1;
+
+ /* Remove trailing newline if present */
+ len = strlen(out);
+ if (len && out[len - 1] == '\n')
+ out[len - 1] = '\0';
+ }
+ return pclose(fp);
+}
+
+static int run_command(const char *cmd, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, cmd);
+ ret = __run_command(NULL, 0, cmd, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int ethtool_add_flow(const char *format, ...)
+{
+ char local_output[256], cmd[256];
+ const char *id_start;
+ int flow_idx, ret;
+ char *endptr;
+ long flow_id;
+ va_list args;
+
+ for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++)
+ if (ntuple_ids[flow_idx] == -1)
+ break;
+ if (flow_idx == MAX_FLOWS) {
+ fprintf(stderr, "Error: too many flows\n");
+ return -1;
+ }
+
+ snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format);
+
+ va_start(args, format);
+ ret = __run_command(local_output, sizeof(local_output), cmd, args);
+ va_end(args);
+
+ if (ret != 0)
+ return ret;
+
+ /* Extract the ID from the output */
+ id_start = strstr(local_output, "Added rule with ID ");
+ if (!id_start)
+ return -1;
+ id_start += strlen("Added rule with ID ");
+
+ flow_id = strtol(id_start, &endptr, 10);
+ if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX)
+ return -1;
+
+ fprintf(stderr, "Added flow rule with ID %ld\n", flow_id);
+ ntuple_ids[flow_idx] = flow_id;
+ return flow_id;
+}
+
+static int rxq_num(int ifindex)
+{
+ struct ethtool_channels_get_req *req;
+ struct ethtool_channels_get_rsp *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int num = -1;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = ethtool_channels_get_req_alloc();
+ ethtool_channels_get_req_set_header_dev_index(req, ifindex);
+ rsp = ethtool_channels_get(ys, req);
+ if (rsp)
+ num = rsp->rx_count + rsp->combined_count;
+ ethtool_channels_get_req_free(req);
+ ethtool_channels_get_rsp_free(rsp);
+
+ ynl_sock_destroy(ys);
+
+ return num;
+}
+
+static void reset_flow_steering(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_FLOWS; i++) {
+ if (ntuple_ids[i] == -1)
+ continue;
+ run_command("ethtool -N %s delete %d",
+ ifname, ntuple_ids[i]);
+ ntuple_ids[i] = -1;
+ }
+}
+
+static const char *tcp_data_split_str(int val)
+{
+ switch (val) {
+ case 0:
+ return "off";
+ case 1:
+ return "auto";
+ case 2:
+ return "on";
+ default:
+ return "?";
+ }
+}
+
+static struct ethtool_rings_get_rsp *get_ring_config(void)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return NULL;
+ }
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ ynl_sock_destroy(ys);
+
+ return get_rsp;
+}
+
+static void restore_ring_config(const struct ethtool_rings_get_rsp *config)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ if (!config)
+ return;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ if (config->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh);
+
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg);
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ /* use explicit value if UKNOWN didn't give us the previous */
+ if (get_rsp->tcp_data_split != config->tcp_data_split) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ config->tcp_data_split);
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring expl HDS cfg: %s\n",
+ ys->err.msg);
+ }
+
+ ethtool_rings_get_rsp_free(get_rsp);
+ ethtool_rings_set_req_free(req);
+
+ ynl_sock_destroy(ys);
+}
+
+static int
+configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ if (on) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+ if (old->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, 0);
+ } else {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ }
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
+ ethtool_rings_set_req_free(req);
+
+ if (ret == 0) {
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+ if (get_rsp)
+ fprintf(stderr, "TCP header split: %s\n",
+ tcp_data_split_str(get_rsp->tcp_data_split));
+ ethtool_rings_get_rsp_free(get_rsp);
+ }
+
+ ynl_sock_destroy(ys);
+
+ return ret;
+}
+
+static int configure_rss(void)
+{
+ return run_command("ethtool -X %s equal %d >&2", ifname, start_queue);
+}
+
+static void reset_rss(void)
+{
+ run_command("ethtool -X %s default >&2", ifname, start_queue);
+}
+
+static int check_changing_channels(unsigned int rx, unsigned int tx)
+{
+ struct ethtool_channels_get_req *gchan;
+ struct ethtool_channels_set_req *schan;
+ struct ethtool_channels_get_rsp *chan;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ gchan = ethtool_channels_get_req_alloc();
+ if (!gchan) {
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+ chan = ethtool_channels_get(ys, gchan);
+ ethtool_channels_get_req_free(gchan);
+ if (!chan) {
+ fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ schan = ethtool_channels_set_req_alloc();
+ if (!schan) {
+ ret = -1;
+ goto exit_free_chan;
+ }
+
+ ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+ if (chan->_present.combined_count) {
+ if (chan->_present.rx_count || chan->_present.tx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, 0);
+ ethtool_channels_set_req_set_tx_count(schan, 0);
+ }
+
+ if (rx == tx) {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ } else if (rx > tx) {
+ ethtool_channels_set_req_set_combined_count(schan, tx);
+ ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+ } else {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+ }
+
+ } else if (chan->_present.rx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx);
+ } else {
+ fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+ ret = -1;
+ goto exit_free_schan;
+ }
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret) {
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else {
+ /* We were expecting a failure, go back to previous settings */
+ ethtool_channels_set_req_set_combined_count(schan,
+ chan->combined_count);
+ ethtool_channels_set_req_set_rx_count(schan, chan->rx_count);
+ ethtool_channels_set_req_set_tx_count(schan, chan->tx_count);
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL un-setting channels: %s\n",
+ ys->err.msg);
+ }
+
+exit_free_schan:
+ ethtool_channels_set_req_free(schan);
+exit_free_chan:
+ ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+ ynl_sock_destroy(ys);
+
+ return ret;
+}
+
+static int configure_flow_steering(struct sockaddr_in6 *server_sin)
+{
+ const char *type = "tcp6";
+ const char *server_addr;
+ char buf[40];
+ int flow_id;
+
+ inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
+ server_addr = buf;
+
+ if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) {
+ type = "tcp4";
+ server_addr = strrchr(server_addr, ':') + 1;
+ }
+
+ /* Try configure 5-tuple */
+ flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d",
+ type,
+ client_ip ? "src-ip" : "",
+ client_ip ?: "",
+ server_addr,
+ client_ip ? "src-port" : "",
+ client_ip ? port : "",
+ port, start_queue);
+ if (flow_id < 0) {
+ /* If that fails, try configure 3-tuple */
+ flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d",
+ type, server_addr, port, start_queue);
+ if (flow_id < 0)
+ /* If that fails, return error */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct netdev_queue_id *queues,
+ unsigned int n_queue_index, struct ynl_sock **ys)
+{
+ struct netdev_bind_rx_req *req = NULL;
+ struct netdev_bind_rx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ netdev_queue_id_free(queues);
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_rx_req_alloc();
+ netdev_bind_rx_req_set_ifindex(req, ifindex);
+ netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+ __netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+ rsp = netdev_bind_rx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_rx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got dmabuf id=%d\n", rsp->id);
+ dmabuf_id = rsp->id;
+
+ netdev_bind_rx_req_free(req);
+ netdev_bind_rx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_rx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct ynl_sock **ys)
+{
+ struct netdev_bind_tx_req *req = NULL;
+ struct netdev_bind_tx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_tx_req_alloc();
+ netdev_bind_tx_req_set_ifindex(req, ifindex);
+ netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+ rsp = netdev_bind_tx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_tx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+ tx_dmabuf_id = rsp->id;
+
+ netdev_bind_tx_req_free(req);
+ netdev_bind_tx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_tx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static int enable_reuseaddr(int fd)
+{
+ int opt = 1;
+ int ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("SO_REUSEPORT failed");
+ return -1;
+ }
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("SO_REUSEADDR failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static struct netdev_queue_id *create_queues(void)
+{
+ struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = netdev_queue_id_alloc(num_queues);
+ for (i = 0; i < num_queues; i++) {
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
+ }
+
+ return queues;
+}
+
+static int do_server(struct memory_buffer *mem)
+{
+ struct ethtool_rings_get_rsp *ring_config;
+ char ctrl_data[sizeof(int) * 20000];
+ size_t non_page_aligned_frags = 0;
+ struct sockaddr_in6 client_addr;
+ struct sockaddr_in6 server_sin;
+ size_t page_aligned_frags = 0;
+ size_t total_received = 0;
+ socklen_t client_addr_len;
+ bool is_devmem = false;
+ char *tmp_mem = NULL;
+ struct ynl_sock *ys;
+ char iobuf[819200];
+ int ret, err = -1;
+ char buffer[256];
+ int socket_fd;
+ int client_fd;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ return -1;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to enable TCP header split");
+ goto err_free_ring_config;
+ }
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss()) {
+ pr_err("Failed to configure rss");
+ goto err_reset_headersplit;
+ }
+
+ /* Flow steer our devmem flows to start_queue */
+ if (configure_flow_steering(&server_sin)) {
+ pr_err("Failed to configure flow steering");
+ goto err_reset_rss;
+ }
+
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_flow_steering;
+ }
+
+ tmp_mem = malloc(mem->size);
+ if (!tmp_mem)
+ goto err_unbind;
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ pr_err("Failed to create socket");
+ goto err_free_tmp;
+ }
+
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
+
+ fprintf(stderr, "binding to address %s:%d\n", server_ip,
+ ntohs(server_sin.sin6_port));
+
+ ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
+
+ ret = listen(socket_fd, 1);
+ if (ret) {
+ pr_err("Failed to listen");
+ goto err_close_socket;
+ }
+
+ client_addr_len = sizeof(client_addr);
+
+ inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer,
+ sizeof(buffer));
+ fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
+ ntohs(server_sin.sin6_port));
+ client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+ if (client_fd < 0) {
+ pr_err("Failed to accept");
+ goto err_close_socket;
+ }
+
+ inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
+ sizeof(buffer));
+ fprintf(stderr, "Got connection from %s:%d\n", buffer,
+ ntohs(client_addr.sin6_port));
+
+ while (1) {
+ struct iovec iov = { .iov_base = iobuf,
+ .iov_len = sizeof(iobuf) };
+ struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+ struct cmsghdr *cm = NULL;
+ struct msghdr msg = { 0 };
+ struct dmabuf_token token;
+ ssize_t ret;
+
+ is_devmem = false;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+ ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+ fprintf(stderr, "recvmsg ret=%ld\n", ret);
+ if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+ continue;
+ if (ret < 0) {
+ perror("recvmsg");
+ if (errno == EFAULT) {
+ pr_err("received EFAULT, won't recover");
+ goto err_close_client;
+ }
+ continue;
+ }
+ if (ret == 0) {
+ errno = 0;
+ pr_err("client exited");
+ goto cleanup;
+ }
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_SOCKET ||
+ (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+ cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+ fprintf(stderr, "skipping non-devmem cmsg\n");
+ continue;
+ }
+
+ dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+ is_devmem = true;
+
+ if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+ /* TODO: process data copied from skb's linear
+ * buffer.
+ */
+ fprintf(stderr,
+ "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+ dmabuf_cmsg->frag_size);
+
+ continue;
+ }
+
+ token.token_start = dmabuf_cmsg->frag_token;
+ token.token_count = 1;
+
+ total_received += dmabuf_cmsg->frag_size;
+ fprintf(stderr,
+ "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+ dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+ dmabuf_cmsg->frag_offset % getpagesize(),
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
+ total_received, dmabuf_cmsg->dmabuf_id);
+
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id) {
+ pr_err("received on wrong dmabuf_id: flow steering error");
+ goto err_close_client;
+ }
+
+ if (dmabuf_cmsg->frag_size % getpagesize())
+ non_page_aligned_frags++;
+ else
+ page_aligned_frags++;
+
+ provider->memcpy_from_device(tmp_mem, mem,
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+
+ if (do_validation) {
+ if (validate_buffer(tmp_mem,
+ dmabuf_cmsg->frag_size))
+ goto err_close_client;
+ } else {
+ print_nonzero_bytes(tmp_mem,
+ dmabuf_cmsg->frag_size);
+ }
+
+ ret = setsockopt(client_fd, SOL_SOCKET,
+ SO_DEVMEM_DONTNEED, &token,
+ sizeof(token));
+ if (ret != 1) {
+ pr_err("SO_DEVMEM_DONTNEED not enough tokens");
+ goto err_close_client;
+ }
+ }
+ if (!is_devmem) {
+ pr_err("flow steering error");
+ goto err_close_client;
+ }
+
+ fprintf(stderr, "total_received=%lu\n", total_received);
+ }
+
+ fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+
+ fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+ err = 0;
+
+err_close_client:
+ close(client_fd);
+err_close_socket:
+ close(socket_fd);
+err_free_tmp:
+ free(tmp_mem);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_flow_steering:
+ reset_flow_steering();
+err_reset_rss:
+ reset_rss();
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+ return err;
+}
+
+int run_devmem_tests(void)
+{
+ struct ethtool_rings_get_rsp *ring_config;
+ struct netdev_queue_id *queues;
+ struct memory_buffer *mem;
+ struct ynl_sock *ys;
+ int err = -1;
+
+ mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ goto err_free_mem;
+ }
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss()) {
+ pr_err("rss error");
+ goto err_free_ring_config;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_rss;
+ }
+
+ queues = netdev_queue_id_alloc(num_queues);
+ if (!queues) {
+ pr_err("Failed to allocate empty queues array");
+ goto err_reset_headersplit;
+ }
+
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Binding empty queues array should have failed");
+ goto err_unbind;
+ }
+
+ if (configure_headersplit(ring_config, 0)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
+
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
+
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Configure dmabuf with header split off should have failed");
+ goto err_unbind;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
+
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
+
+ if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_headersplit;
+ }
+
+ /* Deactivating a bound queue should not be legal */
+ if (!check_changing_channels(num_queues, num_queues)) {
+ pr_err("Deactivating a bound queue should be illegal");
+ goto err_unbind;
+ }
+
+ err = 0;
+ goto err_unbind;
+
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_reset_rss:
+ reset_rss();
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+err_free_mem:
+ provider->free(mem);
+ return err;
+}
+
+static uint64_t gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, waittime_ms);
+ if (ret == -1) {
+ pr_err("poll");
+ return -1;
+ }
+
+ return ret && (pfd.revents & POLLERR);
+}
+
+static int wait_compl(int fd)
+{
+ int64_t tstop = gettimeofday_ms() + waittime_ms;
+ char control[CMSG_SPACE(100)] = {};
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ __u32 hi, lo;
+ int ret;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (gettimeofday_ms() < tstop) {
+ ret = do_poll(fd);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ continue;
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ pr_err("recvmsg(MSG_ERRQUEUE)");
+ return -1;
+ }
+ if (msg.msg_flags & MSG_CTRUNC) {
+ pr_err("MSG_CTRUNC");
+ return -1;
+ }
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_level != SOL_IPV6)
+ continue;
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ continue;
+ if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type != IPV6_RECVERR)
+ continue;
+
+ serr = (void *)CMSG_DATA(cm);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ pr_err("wrong origin %u", serr->ee_origin);
+ return -1;
+ }
+ if (serr->ee_errno != 0) {
+ pr_err("wrong errno %d", serr->ee_errno);
+ return -1;
+ }
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+
+ fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+ return 0;
+ }
+ }
+
+ pr_err("did not receive tx completion");
+ return -1;
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+ char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+ struct sockaddr_in6 server_sin;
+ struct sockaddr_in6 client_sin;
+ struct ynl_sock *ys = NULL;
+ struct iovec iov[MAX_IOV];
+ struct msghdr msg = {};
+ ssize_t line_size = 0;
+ struct cmsghdr *cmsg;
+ char *line = NULL;
+ int ret, err = -1;
+ size_t len = 0;
+ int socket_fd;
+ __u32 ddmabuf;
+ int opt = 1;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0) {
+ pr_err("parse client address");
+ return ret;
+ }
+ }
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ pr_err("create socket");
+ return -1;
+ }
+
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+ strlen(ifname) + 1);
+ if (ret) {
+ pr_err("bindtodevice");
+ goto err_close_socket;
+ }
+
+ if (bind_tx_queue(ifindex, mem->fd, &ys)) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
+
+ if (client_ip) {
+ ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+ if (ret) {
+ pr_err("bind");
+ goto err_unbind;
+ }
+ }
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("set sock opt");
+ goto err_unbind;
+ }
+
+ fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+ ntohs(server_sin.sin6_port), ifname);
+
+ ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret) {
+ pr_err("connect");
+ goto err_unbind;
+ }
+
+ while (1) {
+ free(line);
+ line = NULL;
+ line_size = getline(&line, &len, stdin);
+
+ if (line_size < 0)
+ break;
+
+ if (max_chunk) {
+ msg.msg_iovlen =
+ (line_size + max_chunk - 1) / max_chunk;
+ if (msg.msg_iovlen > MAX_IOV) {
+ pr_err("can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+ goto err_free_line;
+ }
+
+ for (int i = 0; i < msg.msg_iovlen; i++) {
+ iov[i].iov_base = (void *)(i * max_chunk);
+ iov[i].iov_len = max_chunk;
+ }
+
+ iov[msg.msg_iovlen - 1].iov_len =
+ line_size - (msg.msg_iovlen - 1) * max_chunk;
+ } else {
+ iov[0].iov_base = 0;
+ iov[0].iov_len = line_size;
+ msg.msg_iovlen = 1;
+ }
+
+ msg.msg_iov = iov;
+ provider->memcpy_to_device(mem, 0, line, line_size);
+
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+ ddmabuf = tx_dmabuf_id;
+
+ *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+ ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+ if (ret < 0) {
+ pr_err("Failed sendmsg");
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+ if (ret != line_size) {
+ pr_err("Did not send all bytes %d vs %zd", ret, line_size);
+ goto err_free_line;
+ }
+
+ if (wait_compl(socket_fd))
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+ err = 0;
+
+err_free_line:
+ free(line);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_close_socket:
+ close(socket_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ struct memory_buffer *mem;
+ int is_server = 0, opt;
+ int ret, err = 1;
+
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
+ switch (opt) {
+ case 'l':
+ is_server = 1;
+ break;
+ case 's':
+ server_ip = optarg;
+ break;
+ case 'c':
+ client_ip = optarg;
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'v':
+ do_validation = atoll(optarg);
+ break;
+ case 'q':
+ num_queues = atoi(optarg);
+ break;
+ case 't':
+ start_queue = atoi(optarg);
+ break;
+ case 'f':
+ ifname = optarg;
+ break;
+ case 'z':
+ max_chunk = atoi(optarg);
+ break;
+ case '?':
+ fprintf(stderr, "unknown option: %c\n", optopt);
+ break;
+ }
+ }
+
+ if (!ifname) {
+ pr_err("Missing -f argument");
+ return 1;
+ }
+
+ ifindex = if_nametoindex(ifname);
+
+ fprintf(stderr, "using ifindex=%u\n", ifindex);
+
+ if (!server_ip && !client_ip) {
+ if (start_queue < 0 && num_queues < 0) {
+ num_queues = rxq_num(ifindex);
+ if (num_queues < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
+ /* make sure can bind to multiple queues */
+ start_queue = num_queues / 2;
+ num_queues /= 2;
+ }
+
+ if (start_queue < 0 || num_queues < 0) {
+ pr_err("Both -t and -q are required");
+ return 1;
+ }
+
+ return run_devmem_tests();
+ }
+
+ if (start_queue < 0 && num_queues < 0) {
+ num_queues = rxq_num(ifindex);
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
+
+ num_queues = 1;
+ start_queue = rxq_num(ifindex) - num_queues;
+
+ if (start_queue < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+
+ fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
+ }
+
+ for (; optind < argc; optind++)
+ fprintf(stderr, "extra arguments: %s\n", argv[optind]);
+
+ if (start_queue < 0) {
+ pr_err("Missing -t argument");
+ return 1;
+ }
+
+ if (num_queues < 0) {
+ pr_err("Missing -q argument");
+ return 1;
+ }
+
+ if (!server_ip) {
+ pr_err("Missing -s argument");
+ return 1;
+ }
+
+ if (!port) {
+ pr_err("Missing -p argument");
+ return 1;
+ }
+
+ mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return 1;
+ }
+
+ ret = is_server ? do_server(mem) : do_client(mem);
+ if (ret)
+ goto err_free_mem;
+
+ err = 0;
+
+err_free_mem:
+ provider->free(mem);
+ return err;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
new file mode 100755
index 000000000000..c1e943d53f19
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to configuration of HW timestamping
+"""
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
+from lib.py import NetDrvEnv, EthtoolFamily, NlError
+
+
+def __get_hwtimestamp_support(cfg):
+ """ Retrieve supported configuration information """
+
+ try:
+ tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported") from e
+ raise
+
+ ctx = {}
+ tx = tsinfo.get('tx-types', {})
+ rx = tsinfo.get('rx-filters', {})
+
+ bits = tx.get('bits', {})
+ ctx['tx'] = bits.get('bit', [])
+ bits = rx.get('bits', {})
+ ctx['rx'] = bits.get('bit', [])
+ return ctx
+
+
+def __get_hwtimestamp_config(cfg):
+ """ Retrieve current TS configuration information """
+
+ try:
+ tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return tscfg
+
+
+def __set_hwtimestamp_config(cfg, ts):
+ """ Setup new TS configuration information """
+
+ ts['header'] = {'dev-name': cfg.ifname}
+ try:
+ res = cfg.ethnl.tsconfig_set(ts)
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return res
+
+
+def test_hwtstamp_tx(cfg):
+ """
+ Test TX timestamp configuration.
+ The driver should apply provided config and report back proper state.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ tx = ts['tx']
+ for t in tx:
+ tscfg = orig_tscfg
+ tscfg['tx-types']['bits']['bit'] = [t]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ ksft_eq(res['tx-types']['bits']['bit'], [t])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def test_hwtstamp_rx(cfg):
+ """
+ Test RX timestamp configuration.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ rx = ts['rx']
+ for r in rx:
+ tscfg = orig_tscfg
+ tscfg['rx-filters']['bits']['bit'] = [r]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ if r['index'] == 0 or r['index'] == 1:
+ ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ else:
+ # the driver can fallback to some value which has higher coverage for timestamping
+ ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
new file mode 100755
index 000000000000..2a51b60df8a1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
+import errno
+import time
+import math
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetdevFamily, NlError
+from lib.py import NetDrvEpEnv
+from lib.py import cmd, tool, GenerateTraffic
+
+
+def _write_fail_config(config):
+ for key, value in config.items():
+ path = "/sys/kernel/debug/fail_function/"
+ with open(path + key, "w", encoding='ascii') as fp:
+ fp.write(str(value) + "\n")
+
+
+def _enable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
+
+ if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+ _write_fail_config({"inject": "page_pool_alloc_netmems"})
+
+ _write_fail_config({
+ "verbose": 0,
+ "interval": 511,
+ "probability": 100,
+ "times": -1,
+ })
+
+
+def _disable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ return
+
+ if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+ _write_fail_config({"inject": ""})
+
+ _write_fail_config({
+ "probability": 0,
+ "times": 0,
+ })
+
+
+def test_pp_alloc(cfg, netdevnl):
+ """
+ Configure page pool allocation fail injection while traffic is running.
+ """
+
+ def get_stats():
+ return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ def check_traffic_flowing():
+ stat1 = get_stats()
+ time.sleep(1)
+ stat2 = get_stats()
+ if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
+ raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
+
+
+ try:
+ stats = get_stats()
+ except NlError as e:
+ if e.nl_msg.error == -errno.EOPNOTSUPP:
+ stats = {}
+ else:
+ raise
+ if 'rx-alloc-fail' not in stats:
+ raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
+
+ set_g = False
+ traffic = None
+ try:
+ traffic = GenerateTraffic(cfg)
+
+ check_traffic_flowing()
+
+ _enable_pp_allocation_fail()
+
+ s1 = get_stats()
+ time.sleep(3)
+ s2 = get_stats()
+
+ seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+ if seen_fails < 1:
+ raise KsftSkipEx("Allocation failures not increasing")
+ pkts = s2['rx-packets'] - s1['rx-packets']
+ # Expecting one failure per 512 buffers, 3.1x safety margin
+ want_fails = math.floor(pkts / 512 / 3.1)
+ if seen_fails < want_fails:
+ raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+ "packets:", pkts)
+ ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
+
+ # Basic failures are fine, try to wobble some settings to catch extra failures
+ check_traffic_flowing()
+ g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
+ if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
+ new_g = g['rx'] * 2
+ elif 'rx' in g:
+ new_g = g['rx'] // 2
+ else:
+ new_g = None
+
+ if new_g:
+ set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
+ if set_g:
+ ksft_pr("ethtool -G change retval: success")
+ else:
+ ksft_pr("ethtool -G change retval: did not succeed", new_g)
+ else:
+ ksft_pr("ethtool -G change retval: did not try")
+
+ time.sleep(0.1)
+ check_traffic_flowing()
+ finally:
+ _disable_pp_allocation_fail()
+ if traffic:
+ traffic.stop()
+ time.sleep(0.1)
+ if set_g:
+ cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+ netdevnl = NetdevFamily()
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+
+ ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
new file mode 100755
index 000000000000..ed7e405682f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -0,0 +1,832 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import datetime
+import random
+import re
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
+
+
+def _rss_key_str(key):
+ return ":".join(["{:02x}".format(x) for x in key])
+
+
+def _rss_key_rand(length):
+ return [random.randint(0, 255) for _ in range(length)]
+
+
+def _rss_key_check(cfg, data=None, context=0):
+ if data is None:
+ data = get_rss(cfg, context=context)
+ if 'rss-hash-key' not in data:
+ return
+ non_zero = [x for x in data['rss-hash-key'] if x != 0]
+ ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
+
+
+def get_rss(cfg, context=0):
+ return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def get_drop_err_sum(cfg):
+ stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+ cnt = 0
+ for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
+ 'length_errors', 'crc_errors', 'missed_errors',
+ 'frame_errors']:
+ cnt += stats["stats64"]["rx"][key]
+ return cnt, stats["stats64"]["tx"]["carrier_changes"]
+
+
+def ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def require_ntuple(cfg):
+ features = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ if not features["ntuple-filters"]["active"]:
+ # ntuple is more of a capability than a config knob, don't bother
+ # trying to enable it (until some driver actually needs it).
+ raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+
+
+def require_context_cnt(cfg, need_cnt):
+ # There's no good API to get the context count, so the tests
+ # which try to add a lot opportunisitically set the count they
+ # discovered. Careful with test ordering!
+ if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+ raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
+# Get Rx packet counts for all queues, as a simple list of integers
+# if @prev is specified the prev counts will be subtracted
+def _get_rx_cnts(cfg, prev=None):
+ cfg.wait_hw_stats_settle()
+ data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
+ data = [x for x in data if x['queue-type'] == "rx"]
+ max_q = max([x["queue-id"] for x in data])
+ queue_stats = [0] * (max_q + 1)
+ for q in data:
+ queue_stats[q["queue-id"]] = q["rx-packets"]
+ if prev and q["queue-id"] < len(prev):
+ queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
+ return queue_stats
+
+
+def _send_traffic_check(cfg, port, name, params):
+ # params is a dict with 3 possible keys:
+ # - "target": required, which queues we expect to get iperf traffic
+ # - "empty": optional, which queues should see no traffic at all
+ # - "noise": optional, which queues we expect to see low traffic;
+ # used for queues of the main context, since some background
+ # OS activity may use those queues while we're testing
+ # the value for each is a list, or some other iterable containing queue ids.
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[i] for i in params['target'])
+
+ ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
+ if params.get('noise'):
+ ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
+ f"traffic on other queues ({name})':" + str(cnts))
+ if params.get('empty'):
+ ksft_eq(sum(cnts[i] for i in params['empty']), 0,
+ f"traffic on inactive queues ({name}): " + str(cnts))
+
+
+def _ntuple_rule_check(cfg, rule_id, ctx_id):
+ """Check that ntuple rule references RSS context ID"""
+ text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout
+ pattern = f"RSS Context (ID: )?{ctx_id}"
+ ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule")
+
+
+def test_rss_key_indir(cfg):
+ """Test basics like updating the main RSS key and indirection table."""
+
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 3:
+ raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+
+ data = get_rss(cfg)
+ want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
+ for k in want_keys:
+ if k not in data:
+ raise KsftFailEx("ethtool results missing key: " + k)
+ if not data[k]:
+ raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
+
+ _rss_key_check(cfg, data=data)
+ key_len = len(data['rss-hash-key'])
+
+ # Set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+
+ data = get_rss(cfg)
+ ksft_eq(key, data['rss-hash-key'])
+
+ # Set the indirection table and the key together
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
+ reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
+
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(2, max(data['rss-indirection-table']))
+
+ # Reset indirection table and set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
+
+ # Set the indirection table
+ ethtool(f"-X {cfg.ifname} equal 2")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ # Check we only get traffic on the first 2 queues
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ # 2 queues, 20k packets, must be at least 5k per queue
+ ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
+ # The other queues should be unused
+ ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
+
+ # Restore, and check traffic gets spread again
+ reset_indir.exec()
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ if qcnt > 4:
+ # First two queues get less traffic than all the rest
+ ksft_lt(sum(cnts[:2]), sum(cnts[2:]),
+ "traffic distributed: " + str(cnts))
+ else:
+ # When queue count is low make sure third queue got significant pkts
+ ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts))
+
+
+def test_rss_queue_reconfigure(cfg, main_ctx=True):
+ """Make sure queue changes can't override requested RSS config.
+
+ By default main RSS table should change to include all queues.
+ When user sets a specific RSS config the driver should preserve it,
+ even when queue count changes. Driver should refuse to deactivate
+ queues used in the user-set RSS config.
+ """
+
+ if not main_ctx:
+ require_ntuple(cfg)
+
+ # Start with 4 queues, an arbitrary known number.
+ try:
+ qcnt = len(_get_rx_cnts(cfg))
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test or qstat not supported")
+
+ if main_ctx:
+ ctx_id = 0
+ ctx_ref = ""
+ else:
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_ref = f"context {ctx_id}"
+ defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
+
+ # Indirection table should be distributing to all queues.
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(3, max(data['rss-indirection-table']))
+
+ # Increase queues, indirection table should be distributing to all queues.
+ # It's unclear whether tables of additional contexts should be reset, too.
+ if main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(4, max(data['rss-indirection-table']))
+ ethtool(f"-L {cfg.ifname} combined 4")
+
+ # Configure the table explicitly
+ port = rand_port()
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
+ if main_ctx:
+ other_key = 'empty'
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_key = 'noise'
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2) })
+
+ # We should be able to increase queues, but table should be left untouched
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq({0, 3}, set(data['rss-indirection-table']))
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2, 4) })
+
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+ if not main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 4")
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1"
+ try:
+ # this targets queue 4, which doesn't exist
+ ntuple2 = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})")
+ # change the table to target queues 0 and 2
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
+ # ntuple rule therefore targets queues 1 and 3
+ try:
+ ntuple2 = ethtool_create(cfg, "-N", flow)
+ except CmdExitFailure:
+ ksft_pr("Driver does not support rss + queue offset")
+ return
+
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+ # should replace existing filter
+ ksft_eq(ntuple, ntuple2)
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),
+ 'noise' : (0, 2) })
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+
+def test_rss_resize(cfg):
+ """Test resizing of the RSS table.
+
+ Some devices dynamically increase and decrease the size of the RSS
+ indirection table based on the number of enabled queues.
+ When that happens driver must maintain the balance of entries
+ (preferably duplicating the smaller table).
+ """
+
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels['combined-max']
+ qcnt = channels['combined-count']
+
+ if ch_max < 2:
+ raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
+
+ ethtool(f"-L {cfg.ifname} combined 2")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+
+ ethtool(f"-X {cfg.ifname} weight 1 7")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ ethtool(f"-L {cfg.ifname} combined {ch_max}")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ ksft_eq(7,
+ data['rss-indirection-table'].count(1) /
+ data['rss-indirection-table'].count(0),
+ f"Table imbalance after resize: {data['rss-indirection-table']}")
+
+
+def test_hitless_key_update(cfg):
+ """Test that flows may be rehashed without impacting traffic.
+
+ Some workloads may want to rehash the flows in response to an imbalance.
+ Most effective way to do that is changing the RSS key. Check that changing
+ the key does not cause link flaps or traffic disruption.
+
+ Disrupting traffic for key update is not a bug, but makes the key
+ update unusable for rehashing under load.
+ """
+ data = get_rss(cfg)
+ key_len = len(data['rss-hash-key'])
+
+ ethnl = EthtoolFamily()
+ key = random.randbytes(key_len)
+
+ tgen = GenerateTraffic(cfg)
+ try:
+ errors0, carrier0 = get_drop_err_sum(cfg)
+ t0 = datetime.datetime.now()
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key})
+ t1 = datetime.datetime.now()
+ errors1, carrier1 = get_drop_err_sum(cfg)
+ finally:
+ tgen.wait_pkts_and_stop(5000)
+
+ ksft_lt((t1 - t0).total_seconds(), 0.15)
+ ksft_eq(errors1 - errors1, 0)
+ ksft_eq(carrier1 - carrier0, 0)
+
+
+def test_rss_context_dump(cfg):
+ """
+ Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+ """
+
+ # Get a random key of the right size
+ data = get_rss(cfg)
+ if 'rss-hash-key' in data:
+ key_data = _rss_key_rand(len(data['rss-hash-key']))
+ key = _rss_key_str(key_data)
+ else:
+ key_data = []
+ key = "ba:ad"
+
+ ids = []
+ try:
+ ids.append(ethtool_create(cfg, "-X", f"context new"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+ except CmdExitFailure:
+ if not ids:
+ raise KsftSkipEx("Unable to add any contexts")
+ ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+ expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+ # Dump all
+ ctxs = cfg.ethnl.rss_get({}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Sanity-check the results
+ for data in ctxs:
+ ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+ # More specific checks
+ if len(ids) > 1 and data.get('context') == ids[1]:
+ ksft_eq(set(data['indir']), {0, 1},
+ "ctx1 - indir table mismatch")
+ if len(ids) > 2 and data.get('context') == ids[2]:
+ ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+ # Ifindex filter
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ctx_tuples = set(tuples)
+ ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Skip ctx 0
+ expect_tuples.remove((cfg.ifname, -1))
+
+ ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # And finally both with ifindex and skip main
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+ ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+
+def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
+ """
+ Test separating traffic into RSS contexts.
+ The queues will be allocated 2 for each context:
+ ctx0 ctx1 ctx2 ctx3
+ [0 1] [2 3] [4 5] [6 7] ...
+ """
+
+ require_ntuple(cfg)
+
+ requested_ctx_cnt = ctx_cnt
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ports = []
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ want_cfg = f"start {2 + i * 2} equal 2"
+ create_cfg = want_cfg if create_with_cfg else ""
+
+ try:
+ ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+ except CmdExitFailure:
+ # try to carry on and skip at the end
+ if i == 0:
+ raise
+ ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
+ ctx_cnt = i
+ if cfg.context_cnt is None:
+ cfg.context_cnt = ctx_cnt
+ break
+
+ _rss_key_check(cfg, context=ctx_id)
+
+ if not create_with_cfg:
+ ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+ _rss_key_check(cfg, context=ctx_id)
+
+ # Sanity check the context we just created
+ data = get_rss(cfg, ctx_id)
+ ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
+ ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _ntuple_rule_check(cfg, ntuple, ctx_id)
+
+ for i in range(ctx_cnt):
+ _send_traffic_check(cfg, ports[i], f"context {i}",
+ { 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
+
+ if requested_ctx_cnt != ctx_cnt:
+ raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
+
+
+def test_rss_context4(cfg):
+ test_rss_context(cfg, 4)
+
+
+def test_rss_context32(cfg):
+ test_rss_context(cfg, 32)
+
+
+def test_rss_context4_create_with_cfg(cfg):
+ test_rss_context(cfg, 4, create_with_cfg=True)
+
+
+def test_rss_context_queue_reconfigure(cfg):
+ test_rss_queue_reconfigure(cfg, main_ctx=False)
+
+
+def test_rss_context_out_of_order(cfg, ctx_cnt=4):
+ """
+ Test separating traffic into RSS contexts.
+ Contexts are removed in semi-random order, and steering re-tested
+ to make sure removal doesn't break steering to surviving contexts.
+ Test requires 3 contexts to work.
+ """
+
+ require_ntuple(cfg)
+ require_context_cnt(cfg, 4)
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ntuple = []
+ ctx = []
+ ports = []
+
+ def remove_ctx(idx):
+ ntuple[idx].exec()
+ ntuple[idx] = None
+ ctx[idx].exec()
+ ctx[idx] = None
+
+ def check_traffic():
+ for i in range(ctx_cnt):
+ if ctx[i]:
+ expected = {
+ 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
+ }
+ else:
+ expected = {
+ 'target': (0, 1),
+ 'empty': range(2, 2+2*ctx_cnt)
+ }
+
+ _send_traffic_check(cfg, ports[i], f"context {i}", expected)
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
+ ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
+
+ check_traffic()
+
+ # Remove middle context
+ remove_ctx(ctx_cnt // 2)
+ check_traffic()
+
+ # Remove first context
+ remove_ctx(0)
+ check_traffic()
+
+ # Remove last context
+ remove_ctx(-1)
+ check_traffic()
+
+
+def test_rss_context_overlap(cfg, other_ctx=0):
+ """
+ Test contexts overlapping with each other.
+ Use 4 queues for the main context, but only queues 2 and 3 for context 1.
+ """
+
+ require_ntuple(cfg)
+ if other_ctx:
+ require_context_cnt(cfg, 2)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ if other_ctx == 0:
+ ethtool(f"-X {cfg.ifname} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_ctx = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
+
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ port = rand_port()
+ if other_ctx:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ # Test the main context
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
+ ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+ # Now create a rule for context 1 and make sure traffic goes to a subset
+ if other_ctx:
+ ntuple.exec()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[2:4])
+ ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
+ ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+
+def test_rss_context_overlap2(cfg):
+ test_rss_context_overlap(cfg, True)
+
+
+def test_flow_add_context_missing(cfg):
+ """
+ Test that we are not allowed to add a rule pointing to an RSS context
+ which was never created.
+ """
+
+ require_ntuple(cfg)
+
+ # Find a context which doesn't exist
+ for ctx_id in range(1, 100):
+ try:
+ get_rss(cfg, context=ctx_id)
+ except CmdExitFailure:
+ break
+
+ with ksft_raises(CmdExitFailure) as cm:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+ if cm.exception:
+ ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
+def test_delete_rss_context_busy(cfg):
+ """
+ Test that deletion returns -EBUSY when an rss context is being used
+ by an ntuple filter.
+ """
+
+ require_ntuple(cfg)
+
+ # create additional rss context
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # utilize context from ntuple filter
+ port = rand_port()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ # attempt to delete in-use context
+ try:
+ ctx_deleter.exec_only()
+ ctx_deleter.cancel()
+ raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}")
+ except CmdExitFailure:
+ pass
+
+
+def test_rss_ntuple_addition(cfg):
+ """
+ Test that the queue offset (ring_cookie) of an ntuple rule is added
+ to the queue number read from the indirection table.
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ # Use queue 0 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 1")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # create additional rss context
+ ctx_id = ethtool_create(cfg, "-X", "context new equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # utilize context from ntuple filter
+ port = rand_port()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2"
+ try:
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ except CmdExitFailure:
+ raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported")
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3),
+ 'empty' : (1,),
+ 'noise' : (0,) })
+
+
+def test_rss_default_context_rule(cfg):
+ """
+ Allocate a port, direct this port to context 0, then create a new RSS
+ context and steer all TCP traffic to it (context 1). Verify that:
+ * Traffic to the specific port continues to use queues of the main
+ context (0/1).
+ * Traffic to any other TCP port is redirected to the new context
+ (queues 2/3).
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except Exception as exc:
+ raise KsftSkipEx("Not enough queues for the test") from exc
+
+ # Use queues 0 and 1 for the main context
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # Create a new RSS context that uses queues 2 and 3
+ ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # Generic low-priority rule: redirect all TCP traffic to the new context.
+ # Give it an explicit higher location number (lower priority).
+ flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+ ethtool(f"-N {cfg.ifname} {flow_generic}")
+ defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+ # Specific high-priority rule for a random port that should stay on context 0.
+ # Assign loc 0 so it is evaluated before the generic rule.
+ port_main = rand_port()
+ flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+ ethtool(f"-N {cfg.ifname} {flow_main}")
+ defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+ _ntuple_rule_check(cfg, 1, ctx_id)
+
+ # Verify that traffic matching the specific rule still goes to queues 0/1
+ _send_traffic_check(cfg, port_main, "context 0",
+ { 'target': (0, 1),
+ 'empty' : (2, 3) })
+
+ # And that traffic for any other port is steered to the new context
+ port_other = rand_port()
+ _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+ { 'target': (2, 3),
+ 'noise' : (0, 1) })
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.context_cnt = None
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
+ test_rss_resize, test_hitless_key_update,
+ test_rss_context, test_rss_context4, test_rss_context32,
+ test_rss_context_dump, test_rss_context_queue_reconfigure,
+ test_rss_context_overlap, test_rss_context_overlap2,
+ test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
+ test_flow_add_context_missing,
+ test_delete_rss_context_busy, test_rss_ntuple_addition,
+ test_rss_default_context_rule],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
new file mode 100755
index 000000000000..6fa95fe27c47
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for RSS hashing on IPv6 Flow Label.
+"""
+
+import glob
+import os
+import socket
+from lib.py import CmdExitFailure
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
+ ksft_not_in, ksft_raises, KsftSkipEx
+from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
+from lib.py import NetDrvEpEnv
+
+
+def _check_system(cfg):
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+
+ for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
+ f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
+ try:
+ with open(f, 'r') as fp:
+ setting = fp.read().strip()
+ # CPU mask will be zeros and commas
+ if setting.replace("0", "").replace(",", ""):
+ raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
+ except FileNotFoundError:
+ pass
+
+ # 1 is the default, if someone changed it we probably shouldn"t mess with it
+ af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
+ if af.strip() != "1":
+ raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
+
+
+def _ethtool_get_cfg(cfg, fl_type):
+ descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ "IPv6 Flow Label": "l",
+ }
+
+ ret = ""
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ ret += converter[line]
+ return ret
+
+
+def _traffic(cfg, one_sock, one_cpu):
+ local_port = rand_port(socket.SOCK_DGRAM)
+ remote_port = rand_port(socket.SOCK_DGRAM)
+
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v["6"], 0))
+ if one_sock:
+ send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
+ "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
+ else:
+ send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
+ f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
+
+ cpus = set()
+ with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
+ for _ in range(20):
+ fd_read_timeout(sock.fileno(), 1)
+ cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+ cpus.add(cpu)
+
+ if one_cpu:
+ ksft_eq(len(cpus), 1,
+ f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
+ else:
+ ksft_ge(len(cpus), 2,
+ f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
+
+
+def test_rss_flow_label(cfg):
+ """
+ Test hashing on IPv6 flow label. Send traffic over a single socket
+ and over multiple sockets. Depend on the remote having auto-label
+ enabled so that it randomizes the label per socket.
+ """
+
+ cfg.require_ipver("6")
+ cfg.require_cmd("socat", remote=True)
+ _check_system(cfg)
+
+ # Enable flow label hashing for UDP6
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+
+ defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _traffic(cfg, one_sock=True, one_cpu=True)
+ _traffic(cfg, one_sock=False, one_cpu=False)
+
+ # Disable it, we should see no hashing (reset was already defer()ed)
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
+
+ _traffic(cfg, one_sock=False, one_cpu=True)
+
+
+def _check_v4_flow_types(cfg):
+ for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
+ try:
+ cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+ ksft_not_in("Flow Label", cur,
+ comment=f"{fl_type=} has Flow Label:" + cur)
+ except CmdExitFailure:
+ # Probably does not support this flow type
+ pass
+
+
+def test_rss_flow_label_6only(cfg):
+ """
+ Test interactions with IPv4 flow types. It should not be possible to set
+ IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
+ not appear in the IPv4 "current config".
+ """
+
+ with ksft_raises(CmdExitFailure) as cm:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
+ ksft_in("Invalid argument", cm.exception.cmd.stderr)
+
+ _check_v4_flow_types(cfg)
+
+ # Try to enable Flow Labels and check again, in case it leaks thru
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ changed = initial.replace("l", "") if "l" in initial else initial + "l"
+
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _check_v4_flow_types(cfg)
+ restore.exec()
+ _check_v4_flow_types(cfg)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_rss_flow_label,
+ test_rss_flow_label_6only],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
new file mode 100755
index 000000000000..72880e388478
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import multiprocessing
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+
+
+def traffic(cfg, local_port, remote_port, ipver):
+ af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
+ sock = socket.socket(af_inet, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v[ipver], remote_port))
+ tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
+ cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
+ fd_read_timeout(sock.fileno(), 5)
+ return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+
+
+def test_rss_input_xfrm(cfg, ipver):
+ """
+ Test symmetric input_xfrm.
+ If symmetric RSS hash is configured, send traffic twice, swapping the
+ src/dst UDP ports, and verify that the same queue is receiving the traffic
+ in both cases (IPs are constant).
+ """
+
+ if multiprocessing.cpu_count() < 2:
+ raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
+
+ # Check for symmetric xor/or-xor
+ if not input_xfrm:
+ raise KsftSkipEx("Symmetric RSS hash not requested")
+
+ cpus = set()
+ successful = 0
+ for _ in range(100):
+ try:
+ port1 = rand_port(socket.SOCK_DGRAM)
+ port2 = rand_port(socket.SOCK_DGRAM)
+ cpu1 = traffic(cfg, port1, port2, ipver)
+ cpu2 = traffic(cfg, port2, port1, ipver)
+ cpus.update([cpu1, cpu2])
+ ksft_eq(
+ cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
+
+ successful += 1
+ if successful == 10:
+ break
+ except:
+ continue
+ else:
+ raise KsftFailEx("Failed to run traffic")
+
+ ksft_ge(len(cpus), 2,
+ comment=f"Received traffic on less than two cpus {cpus = }")
+
+
+def test_rss_input_xfrm_ipv4(cfg):
+ cfg.require_ipver("4")
+ test_rss_input_xfrm(cfg, "4")
+
+
+def test_rss_input_xfrm_ipv6(cfg):
+ cfg.require_ipver("6")
+ test_rss_input_xfrm(cfg, "6")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
new file mode 100644
index 000000000000..d23b3b0c20a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "kselftest.h"
+#include "../../../net/lib/ksft.h"
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR (1 << 16)
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue;
+
+ if (rss_indir_tbl_size)
+ queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+ else
+ queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static bool recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return false;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+
+ return true;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ do {} while (recv_block(&rings[i]));
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 16;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void read_rss_dev_info_ynl(void)
+{
+ struct ethtool_rss_get_req *req;
+ struct ethtool_rss_get_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+ if (!ys)
+ error(1, errno, "ynl_sock_create failed");
+
+ req = ethtool_rss_get_req_alloc();
+ if (!req)
+ error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+ ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+ rsp = ethtool_rss_get(ys, req);
+ if (!rsp)
+ error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+ if (!rsp->_len.hkey)
+ error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+ if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+ rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+ error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+ rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+ TOEPLITZ_KEY_MAX_LEN);
+
+ memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+ if (rsp->_count.indir > RSS_MAX_INDIR)
+ error(1, 0, "RSS indirection table too large (%u > %u)",
+ rsp->_count.indir, RSS_MAX_INDIR);
+
+ /* If indir table not available we'll fallback to simple modulo math */
+ if (rsp->_count.indir) {
+ memcpy(rss_indir_tbl, rsp->indir,
+ rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+ rss_indir_tbl_size = rsp->_count.indir;
+
+ log_verbose("RSS indirection table size: %u\n",
+ rss_indir_tbl_size);
+ }
+
+ ethtool_rss_get_rsp_free(rsp);
+ ethtool_rss_get_req_free(req);
+ ynl_sock_destroy(ys);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ read_rss_dev_info_ynl();
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+
+ /* Signal to test framework that we're ready to receive */
+ ksft_ready();
+
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+ """Verify that RPS is not already configured."""
+
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if set(val) - {"0", ","}:
+ raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+ rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+ with open(rfs_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if val != "0":
+ raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+ with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+ encoding="utf-8") as fp:
+ data = fp.read().strip()
+ if "," in data or "-" in data:
+ raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+ return int(data)
+
+
+def _get_irq_cpus(cfg):
+ """
+ Read the list of IRQs for the device Rx queues.
+ """
+ queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+ # Remap into ID-based dicts
+ napis = {n["id"]: n for n in napis}
+ queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+ cpus = []
+ for rx in range(9999):
+ name = f"rx{rx}"
+ if name not in queues:
+ break
+ cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+ return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+ """
+ Get CPUs that are not used by Rx queues.
+ Returns a list of at least 'count' CPU numbers.
+ """
+
+ # Get CPUs used by Rx queues
+ rx_cpus = set(_get_irq_cpus(cfg))
+
+ # Get total number of CPUs
+ num_cpus = os.cpu_count()
+
+ # Find unused CPUs
+ unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+ if len(unused_cpus) < count:
+ raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+ return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+ """Configure RPS for all Rx queues."""
+
+ mask = 0
+ for cpu in rps_cpus:
+ mask |= (1 << cpu)
+ mask = hex(mask)[2:]
+
+ # Set RPS bitmap for all rx queues
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "w", encoding="utf-8") as fp:
+ fp.write(mask)
+
+ return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+ """Send 20 packets of requested type."""
+
+ # Determine protocol and IP version for socat
+ if proto_flag == "-u":
+ proto = "UDP"
+ else:
+ proto = "TCP"
+
+ baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+ # Run socat in a loop to send traffic periodically
+ # Use sh -c with a loop similar to toeplitz_client.sh
+ socat_cmd = f"""
+ for i in `seq 20`; do
+ echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+ sleep 0.001;
+ done
+ """
+
+ cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+ for grp in ["", "rss", "rps"]:
+ for l4 in ["tcp", "udp"]:
+ for l3 in ["4", "6"]:
+ name = f"{l4}_ipv{l3}"
+ if grp:
+ name = f"{grp}_{name}"
+ yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+ """Run a single toeplitz test."""
+
+ cfg.require_ipver(ipver)
+
+ # Check that rxhash is enabled
+ ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+ if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hfunc": ETH_RSS_HASH_TOP,
+ "input-xfrm": {}})
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+ "hfunc": rss.get('hfunc'),
+ "input-xfrm": rss.get('input-xfrm', {})
+ })
+
+ port = rand_port(socket.SOCK_DGRAM)
+
+ toeplitz_path = cfg.test_dir / "toeplitz"
+ rx_cmd = [
+ str(toeplitz_path),
+ "-" + ipver,
+ proto_flag,
+ "-d", str(port),
+ "-i", cfg.ifname,
+ "-T", "4000",
+ "-s",
+ "-v"
+ ]
+
+ if grp:
+ _check_rps_and_rfs_not_configured(cfg)
+ if grp == "rss":
+ irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+ rx_cmd += ["-C", irq_cpus]
+ ksft_pr(f"RSS using CPUs: {irq_cpus}")
+ elif grp == "rps":
+ # Get CPUs not used by Rx queues and configure them for RPS
+ rps_cpus = _get_unused_cpus(cfg, count=2)
+ rps_mask = _configure_rps(cfg, rps_cpus)
+ defer(_configure_rps, cfg, [])
+ rx_cmd += ["-r", rps_mask]
+ ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+ # Run rx in background, it will exit once it has seen enough packets
+ with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+ while rx_proc.proc.poll() is None:
+ _send_traffic(cfg, proto_flag, ipver, port)
+
+ # Check rx result
+ ksft_pr("Receiver output:")
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ if rx_proc.stderr:
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+ """Ksft boilerplate main."""
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..0998e68ebaf0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+ # Make sure we have order of magnitude more LSO packets than
+ # retransmits, in case TCP retransmitted all the LSO packets.
+ ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+ sock.close()
+
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[1]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+
+ # Full feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.hw_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.hw_features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ restore_wanted_features(cfg)
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ if info[3]:
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
new file mode 100644
index 000000000000..8b75faa9af6d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver test environment.
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily, PSPFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup, ksft_variants, KsftNamedVariant
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none"]
+
+ from .env import NetDrvEnv, NetDrvEpEnv
+ from .load import GenerateTraffic, Iperf3Runner
+ from .remote import Remote
+
+ __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
+except ModuleNotFoundError as e:
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
+ sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
new file mode 100644
index 000000000000..8b644fd84ff2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import time
+from pathlib import Path
+from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import ksft_setup, wait_file
+from lib.py import cmd, ethtool, ip, CmdExitFailure
+from lib.py import NetNS, NetdevSimDev
+from .remote import Remote
+
+
+class NetDrvEnvBase:
+ """
+ Base class for a NIC / host environments
+
+ Attributes:
+ test_dir: Path to the source directory of the test
+ net_lib_dir: Path to the net/lib directory
+ """
+ def __init__(self, src_path):
+ self.src_path = Path(src_path)
+ self.test_dir = self.src_path.parent.resolve()
+ self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve()
+
+ self.env = self._load_env_file()
+
+ # Following attrs must be set be inheriting classes
+ self.dev = None
+
+ def _load_env_file(self):
+ env = os.environ.copy()
+
+ src_dir = Path(self.src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return ksft_setup(env)
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
+ return ksft_setup(env)
+
+ def __del__(self):
+ pass
+
+ def __enter__(self):
+ ip(f"link set dev {self.dev['ifname']} up")
+ wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier",
+ lambda x: x.strip() == "1")
+
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
+
+class NetDrvEnv(NetDrvEnvBase):
+ """
+ Class for a single NIC / host env, with no remote end
+ """
+ def __init__(self, src_path, nsim_test=None, **kwargs):
+ super().__init__(src_path)
+
+ self._ns = None
+
+ if 'NETIF' in self.env:
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
+ else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
+ self._ns = NetdevSimDev(**kwargs)
+ self.dev = self._ns.nsims[0].dev
+ self.ifname = self.dev['ifname']
+ self.ifindex = self.dev['ifindex']
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+
+
+class NetDrvEpEnv(NetDrvEnvBase):
+ """
+ Class for an environment with a local device and "remote endpoint"
+ which can be used to send traffic in.
+
+ For local testing it creates two network namespaces and a pair
+ of netdevsim devices.
+ """
+
+ # Network prefixes used for local tests
+ nsim_v4_pfx = "192.0.2."
+ nsim_v6_pfx = "2001:db8::"
+
+ def __init__(self, src_path, nsim_test=None):
+ super().__init__(src_path)
+
+ self._stats_settle_time = None
+
+ # Things we try to destroy
+ self.remote = None
+ # These are for local testing state
+ self._netns = None
+ self._ns = None
+ self._ns_peer = None
+
+ self.addr_v = { "4": None, "6": None }
+ self.remote_addr_v = { "4": None, "6": None }
+
+ if "NETIF" in self.env:
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+ self._check_env()
+
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
+
+ self.addr_v["4"] = self.env.get("LOCAL_V4")
+ self.addr_v["6"] = self.env.get("LOCAL_V6")
+ self.remote_addr_v["4"] = self.env.get("REMOTE_V4")
+ self.remote_addr_v["6"] = self.env.get("REMOTE_V6")
+ kind = self.env["REMOTE_TYPE"]
+ args = self.env["REMOTE_ARGS"]
+ else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
+ self.create_local()
+
+ self.dev = self._ns.nsims[0].dev
+
+ self.addr_v["4"] = self.nsim_v4_pfx + "1"
+ self.addr_v["6"] = self.nsim_v6_pfx + "1"
+ self.remote_addr_v["4"] = self.nsim_v4_pfx + "2"
+ self.remote_addr_v["6"] = self.nsim_v6_pfx + "2"
+ kind = "netns"
+ args = self._netns.name
+
+ self.remote = Remote(kind, args, src_path)
+
+ self.addr_ipver = "6" if self.addr_v["6"] else "4"
+ self.addr = self.addr_v[self.addr_ipver]
+ self.remote_addr = self.remote_addr_v[self.addr_ipver]
+
+ # Bracketed addresses, some commands need IPv6 to be inside []
+ self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"]
+ self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"]
+
+ self.ifname = self.dev['ifname']
+ self.ifindex = self.dev['ifindex']
+
+ # resolve remote interface name
+ self.remote_ifname = self.resolve_remote_ifc()
+ self.remote_dev = ip("-d link show dev " + self.remote_ifname,
+ host=self.remote, json=True)[0]
+
+ self._required_cmd = {}
+
+ def create_local(self):
+ self._netns = NetNS()
+ self._ns = NetdevSimDev()
+ self._ns_peer = NetdevSimDev(ns=self._netns)
+
+ with open("/proc/self/ns/net") as nsfd0, \
+ open("/var/run/netns/" + self._netns.name) as nsfd1:
+ ifi0 = self._ns.nsims[0].ifindex
+ ifi1 = self._ns_peer.nsims[0].ifindex
+ NetdevSimDev.ctrl_write('link_device',
+ f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}')
+
+ ip(f" addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24")
+ ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad")
+ ip(f" link set dev {self._ns.nsims[0].ifname} up")
+
+ ip(f" addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns)
+ ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns)
+ ip(f" link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns)
+
+ def _check_env(self):
+ vars_needed = [
+ ["LOCAL_V4", "LOCAL_V6"],
+ ["REMOTE_V4", "REMOTE_V6"],
+ ["REMOTE_TYPE"],
+ ["REMOTE_ARGS"]
+ ]
+ missing = []
+
+ for choice in vars_needed:
+ for entry in choice:
+ if entry in self.env:
+ break
+ else:
+ missing.append(choice)
+ # Make sure v4 / v6 configs are symmetric
+ if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env):
+ missing.append(["LOCAL_V6", "REMOTE_V6"])
+ if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env):
+ missing.append(["LOCAL_V4", "REMOTE_V4"])
+ if missing:
+ raise Exception("Invalid environment, missing configuration:", missing,
+ "Please see tools/testing/selftests/drivers/net/README.rst")
+
+ def resolve_remote_ifc(self):
+ v4 = v6 = None
+ if self.remote_addr_v["4"]:
+ v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote)
+ if self.remote_addr_v["6"]:
+ v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote)
+ if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]:
+ raise Exception("Can't resolve remote interface name, v4 and v6 don't match")
+ if (v4 and len(v4) > 1) or (v6 and len(v6) > 1):
+ raise Exception("Can't resolve remote interface name, multiple interfaces match")
+ return v6[0]["ifname"] if v6 else v4[0]["ifname"]
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+ if self._ns_peer:
+ self._ns_peer.remove()
+ self._ns_peer = None
+ if self._netns:
+ del self._netns
+ self._netns = None
+ if self.remote:
+ del self.remote
+ self.remote = None
+
+ def require_ipver(self, ipver):
+ if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
+ raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
+
+ def require_nsim(self):
+ if self._ns is None:
+ raise KsftXfailEx("Test only works on netdevsim")
+
+ def _require_cmd(self, comm, key, host=None):
+ cached = self._required_cmd.get(comm, {})
+ if cached.get(key) is None:
+ cached[key] = cmd("command -v -- " + comm, fail=False,
+ shell=True, host=host).ret == 0
+ self._required_cmd[comm] = cached
+ return cached[key]
+
+ def require_cmd(self, comm, local=True, remote=False):
+ if local:
+ if not self._require_cmd(comm, "local"):
+ raise KsftSkipEx("Test requires command: " + comm)
+ if remote:
+ if not self._require_cmd(comm, "remote", host=self.remote):
+ raise KsftSkipEx("Test requires (remote) command: " + comm)
+
+ def wait_hw_stats_settle(self):
+ """
+ Wait for HW stats to become consistent, some devices DMA HW stats
+ periodically so events won't be reflected until next sync.
+ Good drivers will tell us via ethtool what their sync period is.
+ """
+ if self._stats_settle_time is None:
+ data = {}
+ try:
+ data = ethtool("-c " + self.ifname, json=True)[0]
+ except CmdExitFailure as e:
+ if "Operation not supported" not in e.cmd.stderr:
+ raise
+
+ self._stats_settle_time = 0.025 + \
+ data.get('stats-block-usecs', 0) / 1000 / 1000
+
+ time.sleep(self._stats_settle_time)
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
new file mode 100644
index 000000000000..f181fa2d38fc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+import time
+import json
+
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
+
+
+class Iperf3Runner:
+ """
+ Sets up and runs iperf3 traffic.
+ """
+ def __init__(self, env, port=None, server_ip=None, client_ip=None):
+ env.require_cmd("iperf3", local=True, remote=True)
+ self.env = env
+ self.port = rand_port() if port is None else port
+ self.server_ip = server_ip
+ self.client_ip = client_ip
+
+ def _build_server(self):
+ cmdline = f"iperf3 -s -1 -p {self.port}"
+ if self.server_ip:
+ cmdline += f" -B {self.server_ip}"
+ return cmdline
+
+ def _build_client(self, streams, duration, reverse):
+ host = self.env.addr if self.server_ip is None else self.server_ip
+ cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J"
+ if self.client_ip:
+ cmdline += f" -B {self.client_ip}"
+ if reverse:
+ cmdline += " --reverse"
+ return cmdline
+
+ def start_server(self):
+ """
+ Starts an iperf3 server with optional bind IP.
+ """
+ cmdline = self._build_server()
+ proc = cmd(cmdline, background=True)
+ wait_port_listen(self.port)
+ time.sleep(0.1)
+ return proc
+
+ def start_client(self, background=False, streams=1, duration=10, reverse=False):
+ """
+ Starts the iperf3 client with the configured options.
+ """
+ cmdline = self._build_client(streams, duration, reverse)
+ return cmd(cmdline, background=background, host=self.env.remote)
+
+ def measure_bandwidth(self, reverse=False):
+ """
+ Runs an iperf3 measurement and returns the average bandwidth (Gbps).
+ Discards the first and last few reporting intervals and uses only the
+ middle part of the run where throughput is typically stable.
+ """
+ self.start_server()
+ result = self.start_client(duration=10, reverse=reverse)
+
+ if result.ret != 0:
+ raise RuntimeError("iperf3 failed to run successfully")
+ try:
+ out = json.loads(result.stdout)
+ except json.JSONDecodeError as exc:
+ raise ValueError("Failed to parse iperf3 JSON output") from exc
+
+ intervals = out.get("intervals", [])
+ samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals]
+ if len(samples) < 10:
+ raise ValueError(f"iperf3 returned too few intervals: {len(samples)}")
+ # Discard potentially unstable first and last 3 seconds.
+ stable = samples[3:-3]
+
+ avg = sum(stable) / len(stable)
+
+ return avg
+
+
+class GenerateTraffic:
+ def __init__(self, env, port=None):
+ self.env = env
+ self.runner = Iperf3Runner(env, port)
+
+ self._iperf_server = self.runner.start_server()
+ self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400)
+
+ # Wait for traffic to ramp up
+ if not self._wait_pkts(pps=1000):
+ self.stop(verbose=True)
+ raise Exception("iperf3 traffic did not ramp up")
+
+ def _wait_pkts(self, pkt_cnt=None, pps=None):
+ """
+ Wait until we've seen pkt_cnt or until traffic ramps up to pps.
+ Only one of pkt_cnt or pss can be specified.
+ """
+ pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ for _ in range(50):
+ time.sleep(0.1)
+ pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ if pps:
+ if pkt_now - pkt_start > pps / 10:
+ return True
+ pkt_start = pkt_now
+ elif pkt_cnt:
+ if pkt_now - pkt_start > pkt_cnt:
+ return True
+ return False
+
+ def wait_pkts_and_stop(self, pkt_cnt):
+ failed = not self._wait_pkts(pkt_cnt=pkt_cnt)
+ self.stop(verbose=failed)
+
+ def stop(self, verbose=None):
+ self._iperf_client.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Client:")
+ ksft_pr(self._iperf_client.stdout)
+ ksft_pr(self._iperf_client.stderr)
+ self._iperf_server.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Server:")
+ ksft_pr(self._iperf_server.stdout)
+ ksft_pr(self._iperf_server.stderr)
+ self._wait_client_stopped()
+
+ def _wait_client_stopped(self, sleep=0.005, timeout=5):
+ end = time.monotonic() + timeout
+
+ live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ")
+
+ while time.monotonic() < end:
+ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+ if not live_port_pattern.search(data):
+ return
+ time.sleep(sleep)
+ raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py
new file mode 100644
index 000000000000..b1780b987722
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import importlib
+
+_modules = {}
+
+def Remote(kind, args, src_path):
+ global _modules
+
+ if kind not in _modules:
+ _modules[kind] = importlib.import_module("..remote_" + kind, __name__)
+
+ dir_path = os.path.abspath(src_path + "/../")
+ return getattr(_modules[kind], "Remote")(args, dir_path)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
new file mode 100644
index 000000000000..7d5eeb0271bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import subprocess
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def deploy(self, what):
+ if os.path.isabs(what):
+ return what
+ return os.path.abspath(self.dir_path + "/" + what)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
new file mode 100644
index 000000000000..924addde19a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import string
+import subprocess
+import random
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+ self._tmpdir = None
+
+ def __del__(self):
+ if self._tmpdir:
+ cmd("rm -rf " + self._tmpdir, host=self)
+ self._tmpdir = None
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ssh", "-q", self.name, comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def _mktmp(self):
+ return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+
+ def deploy(self, what):
+ if not self._tmpdir:
+ self._tmpdir = "/tmp/" + self._mktmp()
+ cmd("mkdir " + self._tmpdir, host=self)
+ file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
+
+ if not os.path.isabs(what):
+ what = os.path.abspath(self.dir_path + "/" + what)
+
+ cmd(f"scp {what} {self.name}:{file_name}")
+ return file_name
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
new file mode 100644
index 000000000000..ae8abff4be40
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -0,0 +1,419 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This file contains functions and helpers to support the netconsole
+# selftests
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+SRCIF="" # to be populated later
+SRCIP="" # to be populated later
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
+DSTIF="" # to be populated later
+DSTIP="" # to be populated later
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
+
+PORT="6666"
+MSG="netconsole selftest"
+USERDATA_KEY="key"
+USERDATA_VALUE="value"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test
+# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the
+# same time.
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_BOND_TX_1=$((768 + RANDOM % 256))
+NSIM_BOND_TX_2=$((1024 + RANDOM % 256))
+NSIM_BOND_RX_1=$((1280 + RANDOM % 256))
+NSIM_BOND_RX_2=$((1536 + RANDOM % 256))
+NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+
+# Used to create and delete namespaces
+source "${LIBDIR}"/../../../../net/lib.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+ udevadm settle 2> /dev/null || true
+
+ local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+ local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+ # These are global variables
+ SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+ -path "$NSIM1"/net -exec basename {} \;)
+ DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+ -path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+ local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+ local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+ local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+ exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+ exec {INITNS_FD}</proc/self/ns/net
+
+ # Bind the dst interface to namespace
+ ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+ # Linking one device to the other one (on the other namespace}
+ if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
+ then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup
+ exit "${ksft_skip}"
+ fi
+}
+
+function configure_ip() {
+ # Configure the IPs for both interfaces
+ ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+ ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip link set "${SRCIF}" up
+}
+
+function select_ipv4_or_ipv6()
+{
+ local VERSION=${1}
+
+ if [[ "$VERSION" == "ipv6" ]]
+ then
+ DSTIP="${DSTIP6}"
+ SRCIP="${SRCIP6}"
+ else
+ DSTIP="${DSTIP4}"
+ SRCIP="${SRCIP4}"
+ fi
+}
+
+function set_network() {
+ local IP_VERSION=${1:-"ipv4"}
+
+ # setup_ns function is coming from lib.sh
+ setup_ns NAMESPACE
+
+ # Create both interfaces, and assign the destination to a different
+ # namespace
+ create_ifaces
+
+ # Link both interfaces back to back
+ link_ifaces
+
+ select_ipv4_or_ipv6 "${IP_VERSION}"
+ configure_ip
+}
+
+function _create_dynamic_target() {
+ local FORMAT="${1:?FORMAT parameter required}"
+ local NCPATH="${2:?NCPATH parameter required}"
+
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+ # Create a dynamic target
+ mkdir "${NCPATH}"
+
+ echo "${DSTIP}" > "${NCPATH}"/remote_ip
+ echo "${SRCIP}" > "${NCPATH}"/local_ip
+ echo "${DSTMAC}" > "${NCPATH}"/remote_mac
+ echo "${SRCIF}" > "${NCPATH}"/dev_name
+
+ if [ "${FORMAT}" == "basic" ]
+ then
+ # Basic target does not support release
+ echo 0 > "${NCPATH}"/release
+ echo 0 > "${NCPATH}"/extended
+ elif [ "${FORMAT}" == "extended" ]
+ then
+ echo 1 > "${NCPATH}"/extended
+ fi
+}
+
+function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+ local NCPATH=${2:-"$NETCONS_PATH"}
+ _create_dynamic_target "${FORMAT}" "${NCPATH}"
+
+ echo 1 > "${NCPATH}"/enabled
+
+ # This will make sure that the kernel was able to
+ # load the netconsole driver configuration. The console message
+ # gets more organized/sequential as well.
+ sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+ local BINDMODE=${1:-"ifname"}
+ if [ "${BINDMODE}" == "ifname" ]
+ then
+ SRCDEV=${SRCIF}
+ else
+ SRCDEV=$(mac_get "${SRCIF}")
+ fi
+
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+ SRCPORT="1514"
+ TGTPORT="6666"
+
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\""
+}
+
+# Do not append the release to the header of the message
+function disable_release_append() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function do_cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+ # Delete netdevsim devices
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+ # this is coming from lib.sh
+ cleanup_all_ns
+
+ # Restoring printk configurations
+ echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function cleanup_netcons() {
+ # delete netconsole dynamic reconfiguration
+ # do not fail if the target is already disabled
+ if [[ ! -d "${NETCONS_PATH}" ]]
+ then
+ # in some cases this is called before netcons path is created
+ return
+ fi
+ if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+ then
+ echo 0 > "${NETCONS_PATH}"/enabled || true
+ fi
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+}
+
+function cleanup() {
+ cleanup_netcons
+ do_cleanup
+}
+
+function set_user_data() {
+ if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
+ then
+ echo "Userdata path not available in ${NETCONS_PATH}/userdata"
+ exit "${ksft_skip}"
+ fi
+
+ KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
+ mkdir -p "${KEY_PATH}"
+ VALUE_PATH="${KEY_PATH}""/value"
+ echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
+}
+
+function listen_port_and_save_to() {
+ local OUTPUT=${1}
+ local IPVERSION=${2:-"ipv4"}
+
+ if [ "${IPVERSION}" == "ipv4" ]
+ then
+ SOCAT_MODE="UDP-LISTEN"
+ else
+ SOCAT_MODE="UDP6-LISTEN"
+ fi
+
+ # Just wait for 2 seconds
+ timeout 2 ip netns exec "${NAMESPACE}" \
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
+}
+
+# Only validate that the message arrived properly
+function validate_msg() {
+ local TMPFILENAME="$1"
+
+ # Check if the file exists
+ if [ ! -f "$TMPFILENAME" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+ echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Validate the message and userdata
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ validate_msg "${TMPFILENAME}"
+
+ # userdata is not supported on basic format target,
+ # thus, do not validate it.
+ if [ "${FORMAT}" != "basic" ];
+ then
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+ fi
+
+ # Delete the file once it is validated, otherwise keep it
+ # for debugging purposes
+ rm "${TMPFILENAME}"
+}
+
+function check_for_dependencies() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo "This test must be run as root" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which socat > /dev/null ; then
+ echo "SKIP: socat(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which ip > /dev/null ; then
+ echo "SKIP: ip(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which udevadm > /dev/null ; then
+ echo "SKIP: udevadm(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -f /proc/net/if_inet6 ]; then
+ echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
+ echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+ echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip link show "${DSTIF}" 2> /dev/null; then
+ echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+ exit "${ksft_skip}"
+ fi
+
+ REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+ REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+ if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+ echo "SKIP: IPv4s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+ echo "SKIP: IPv6s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+function check_for_taskset() {
+ if ! which taskset > /dev/null ; then
+ echo "SKIP: taskset(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# This is necessary if running multiple tests in a row
+function pkill_socat() {
+ PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
+ # socat runs under timeout(1), kill it if it is still alive
+ # do not fail if socat doesn't exist anymore
+ set +e
+ pkill -f "${PROCESS_NAME4}"
+ pkill -f "${PROCESS_NAME6}"
+ set -e
+}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+ if modinfo netconsole | grep filename: | grep -q builtin
+ then
+ echo "SKIP: netconsole should be compiled as a module" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+ local NAMESPACE=${1}
+ local PORT=${2}
+ IP_VERSION=${3}
+
+ if [ "${IP_VERSION}" == "ipv6" ]
+ then
+ PROTOCOL="udp6"
+ else
+ PROTOCOL="udp"
+ fi
+
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+ # even after the port is open, let's wait 1 second before writing
+ # otherwise the packet could be missed, and the test will fail. Happens
+ # more frequently on IPv6
+ sleep 1
+}
+
+# Clean up netdevsim ifaces created for bonding test
+function cleanup_bond_nsim() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond || true
+ ip -n "${RXNS}" \
+ link delete "${BOND_RX_MAIN_IF}" type bond || true
+
+ cleanup_netdevsim "$NSIM_BOND_TX_1"
+ cleanup_netdevsim "$NSIM_BOND_TX_2"
+ cleanup_netdevsim "$NSIM_BOND_RX_1"
+ cleanup_netdevsim "$NSIM_BOND_RX_2"
+}
+
+# cleanup tests that use bonding interfaces
+function cleanup_bond() {
+ cleanup_netcons
+ cleanup_bond_nsim
+ cleanup_all_ns
+ ip link delete "${VETH0}" || true
+}
diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
new file mode 100755
index 000000000000..82be5d013330
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+# The script is adopted to work with the Microchip KSZ switch driver.
+
+ETH_FCS_LEN=4
+
+WAIT_TIME=1
+NUM_NETIFS=4
+REQUIRE_JQ="yes"
+REQUIRE_MZ="yes"
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# On h1_ and h2_create do not set IP addresses to avoid interaction with the
+# system, to keep packet counters clean.
+h1_create()
+{
+ simple_if_init $h1
+ sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1
+ # Get the MAC address of the interface to use it with mausezahn
+ h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address')
+}
+
+h1_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h1}.disable_ipv6
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1
+ h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address')
+}
+
+h2_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h2}.disable_ipv6
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+ sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1
+ sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+ sysctl_set net.ipv6.conf.br0.disable_ipv6 1
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6
+ sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6
+
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+set_apptrust_order()
+{
+ local if_name=$1
+ local order=$2
+
+ dcb apptrust set dev ${if_name} order ${order}
+}
+
+# Function to extract a specified field from a given JSON stats string
+extract_network_stat() {
+ local stats_json=$1
+ local field_name=$2
+
+ echo $(echo "$stats_json" | jq -r "$field_name")
+}
+
+run_test()
+{
+ local test_name=$1;
+ local apptrust_order=$2;
+ local port_prio=$3;
+ local dscp_ipv=$4;
+ local dscp=$5;
+ local have_vlan=$6;
+ local pcp_ipv=$7;
+ local vlan_pcp=$8;
+ local ip_v6=$9
+
+ local rx_ipv
+ local tx_ipv
+
+ RET=0
+
+ # Send some packet to populate the switch MAC table
+ $MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1
+
+ # Based on the apptrust order, set the expected Internal Priority values
+ # for the RX and TX paths.
+ if [ "${apptrust_order}" == "" ]; then
+ echo "Apptrust order not set."
+ rx_ipv=${port_prio}
+ tx_ipv=${port_prio}
+ elif [ "${apptrust_order}" == "dscp" ]; then
+ echo "Apptrust order is DSCP."
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ elif [ "${apptrust_order}" == "pcp" ]; then
+ echo "Apptrust order is PCP."
+ rx_ipv=${pcp_ipv}
+ tx_ipv=${pcp_ipv}
+ elif [ "${apptrust_order}" == "pcp dscp" ]; then
+ echo "Apptrust order is PCP DSCP."
+ if [ ${have_vlan} -eq 1 ]; then
+ rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv))
+ tx_ipv=${pcp_ipv}
+ else
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ fi
+ else
+ RET=1
+ echo "Error: Unknown apptrust order ${apptrust_order}"
+ log_test "${test_name}"
+ return
+ fi
+
+ # Most/all? of the KSZ switches do not provide per-TC counters. There
+ # are only tx_hi and rx_hi counters, which are used to count packets
+ # which are considered as high priority and most likely not assigned
+ # to the queue 0.
+ # On the ingress path, packets seem to get high priority status
+ # independently of the DSCP or PCP global mapping. On the egress path,
+ # the high priority status is assigned based on the DSCP or PCP global
+ # map configuration.
+ # The thresholds for the high priority status are not documented, but
+ # it seems that the switch considers packets as high priority on the
+ # ingress path if detected Internal Priority is greater than 0. On the
+ # egress path, the switch considers packets as high priority if
+ # detected Internal Priority is greater than 1.
+ if [ ${rx_ipv} -ge 1 ]; then
+ local expect_rx_high_prio=1
+ else
+ local expect_rx_high_prio=0
+ fi
+
+ if [ ${tx_ipv} -ge 2 ]; then
+ local expect_tx_high_prio=1
+ else
+ local expect_tx_high_prio=0
+ fi
+
+ # Use ip tool to get the current switch packet counters. ethool stats
+ # need to be recalculated to get the correct values.
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+ local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+ local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Assamble the mausezahn command based on the test parameters
+ # For the testis with ipv4 or ipv6, use icmp response packets,
+ # to avoid interaction with the system, to keep packet counters
+ # clean.
+ if [ ${ip_v6} -eq 0 ]; then
+ local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \
+ -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}"
+ else
+ local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \
+ -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}"
+ fi
+
+ if [ ${have_vlan} -eq 1 ]; then
+ local vlan_pcp_opt="-Q ${vlan_pcp}:0"
+ else
+ local vlan_pcp_opt=""
+ fi
+ $MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt}
+
+ # Wait until the switch packet counters are updated
+ sleep 6
+
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+
+ local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+
+ local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \
+ ${swp1_rx_packets_before}))
+ local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \
+ ${swp2_tx_packets_before}))
+
+ local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Test if any packets were received on swp1, we will rx before and after
+ if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of received packets on ${swp1}"
+ echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}"
+ RET=1
+ fi
+
+ # Test if any packets were transmitted on swp2, we will tx before and after
+ if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of transmitted packets on ${swp2}"
+ echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}"
+ RET=1
+ fi
+
+ # tx/rx_hi counted in bytes. So, we need to compare the difference in bytes
+ local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before))
+ local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before))
+ local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before))
+ local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before))
+
+ if [ ${expect_rx_high_prio} -eq 1 ]; then
+ swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \
+ ${swp1_rx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp1_rx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ if [ ${expect_tx_high_prio} -eq 1 ]; then
+ swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \
+ ${swp2_tx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp2_tx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ log_test "${test_name}"
+}
+
+run_test_dscp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1
+}
+
+run_test_dscp_pcp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order=""
+
+ RET=0
+
+ # Make sure no other priority sources will interfere with the test
+ set_apptrust_order ${swp1} "${apptrust_order}"
+
+ for val in $(seq 0 7); do
+ dcb app replace dev ${swp1} default-prio ${val}
+ if [ $val -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ break
+ fi
+
+ run_test_dscp "Port-default QoS classification, prio: ${val}" \
+ "${apptrust_order}" ${val} 0 0
+ done
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_prio}
+ if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Port-default QoS classification"
+}
+
+port_get_default_apptrust()
+{
+ local if_name=$1
+
+ dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \
+ tr '\n' ' ' | xargs
+}
+
+test_port_apptrust()
+{
+ local original_dscp_prios_swp1=$(get_dscp_prios ${swp1})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local order_variants=("pcp dscp" "dscp" "pcp")
+ local apptrust_order
+ local port_prio
+ local dscp_prio
+ local pcp_prio
+ local dscp
+ local pcp
+
+ RET=0
+
+ # First, test if apptrust configuration as taken by the kernel
+ for order in "${order_variants[@]}"; do
+ set_apptrust_order ${swp1} "${order}"
+ if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ break
+ fi
+ done
+
+ log_test "Apptrust, supported variants"
+
+ # To test if the apptrust configuration is working as expected, we need
+ # to set DSCP priorities for the switch port.
+ init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}"
+
+ # Start with a simple test where all apptrust sources are disabled
+ # default port priority is 0, DSCP priority is mapped to 7.
+ # No high priority packets should be received or transmitted.
+ port_prio=0
+ dscp_prio=7
+ dscp=4
+
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+
+ apptrust_order=""
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # Test with apptrust sources disabled, Packets should get port default
+ # priority which is 0
+ run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP is enabled, packets should get PCP priority, which is not
+ # set in this test (no VLAN tags are present in the packet). No high
+ # priority packets should be received or transmitted.
+ run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If DSCP is enabled, packets should get DSCP priority which is set to 7
+ # in this test. High priority packets should be received and transmitted.
+ run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP and DSCP are enabled, PCP would have higher apptrust priority
+ # so packets should get PCP priority. But in this test VLAN PCP is not
+ # set, so it should get DSCP priority which is set to 7. High priority
+ # packets should be received and transmitted.
+ run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ # If VLAN PCP is set, it should have higher apptrust priority than DSCP
+ # so packets should get VLAN PCP priority. Send packets with VLAN PCP
+ # set to 0, DSCP set to 7. Packets should get VLAN PCP priority.
+ # No high priority packets should be transmitted. Due to nature of the
+ # switch, high priority packets will be received.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # If VLAN PCP is set to 7, it should have higher apptrust priority than
+ # DSCP so packets should get VLAN PCP priority. Send packets with VLAN
+ # PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority.
+ # High priority packets should be received and transmitted.
+ pcp_prio=7
+ pcp=7
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # Now make sure that the switch is able to handle the case where DSCP
+ # priority is set to 0 and PCP priority is set to 7. Packets should get
+ # PCP priority. High priority packets should be received and transmitted.
+ dscp_prio=0
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # If both VLAN PCP and DSCP are set to 0, packets should get 0 priority.
+ # No high priority packets should be received or transmitted.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # Restore original priorities
+ if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Apptrust, restore original settings"
+}
+
+# Function to get current DSCP priorities
+get_dscp_prios() {
+ local if_name=$1
+ dcb -j app show dev ${if_name} | jq -c '.dscp_prio'
+}
+
+# Function to set a specific DSCP priority on a device
+replace_dscp_prio() {
+ local if_name=$1
+ local dscp=$2
+ local prio=$3
+ dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio}
+}
+
+# Function to compare DSCP maps
+compare_dscp_maps() {
+ local old_json=$1
+ local new_json=$2
+ local dscp=$3
+ local prio=$4
+
+ # Create a modified old_json with the expected change for comparison
+ local modified_old_json=$(echo "$old_json" |
+ jq --argjson dscp $dscp --argjson prio $prio \
+ 'map(if .[0] == $dscp then [$dscp, $prio] else . end)' |
+ tr -d " \n")
+
+ # Compare new_json with the modified_old_json
+ if [[ "$modified_old_json" == "$new_json" ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+# Function to set DSCP priorities
+set_and_verify_dscp() {
+ local port=$1
+ local dscp=$2
+ local new_prio=$3
+
+ local old_prios=$(get_dscp_prios $port)
+
+ replace_dscp_prio "$port" $dscp $new_prio
+
+ # Fetch current settings and compare
+ local current_prios=$(get_dscp_prios $port)
+ if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then
+ echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio."
+ return 1
+ fi
+ return 0
+}
+
+# Function to restore original priorities
+restore_priorities() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing test artifacts for $port"
+ local current_prios=$(get_dscp_prios $port)
+ local prio_str=$(echo "$current_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ echo "Restoring original DSCP priorities for $port"
+ local restore_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app add dev $port dscp-prio $restore_str
+
+ local current_prios=$(get_dscp_prios $port)
+ if [[ "$original_prios" != "$current_prios" ]]; then
+ echo "Error: Failed to restore original DSCP priorities for $port"
+ return 1
+ fi
+ return 0
+}
+
+# Initialize DSCP priorities. Set them to predictable values for testing.
+init_dscp_prios() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing any existing DSCP priority mappins for $port"
+ local prio_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ # Initialize DSCP priorities list
+ local dscp_prios=""
+ for dscp in {0..63}; do
+ dscp_prios+=("$dscp:0")
+ done
+
+ echo "Setting initial DSCP priorities map to 0 for $port"
+ dcb app add dev $port dscp-prio ${dscp_prios[@]}
+}
+
+# Main function to test global DSCP map across specified ports
+test_global_dscp_map() {
+ local ports=("$swp1" "$swp2")
+ local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order="dscp"
+ local port_prio=0
+ local dscp_prio
+ local dscp
+
+ RET=0
+
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+
+ # Initialize DSCP priorities
+ init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0"
+
+ # Loop over each DSCP index
+ for dscp in {0..63}; do
+ # and test each Internal Priority value
+ for dscp_prio in {0..7}; do
+ # do it for each port. This is to test if the global DSCP map
+ # is accessible from all ports.
+ for port in "${ports[@]}"; do
+ if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then
+ RET=1
+ fi
+ done
+
+ # Test if the DSCP priority is correctly applied to the packets
+ run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+ if [ ${RET} -eq 1 ]; then
+ break
+ fi
+ done
+ done
+
+ # Restore original priorities
+ if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "DSCP global map"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_port_apptrust
+ test_global_dscp_map
+"
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..224ca3695c89
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+# with an exception of the "activation_16x100G_test".
+# It set, only the selected line card is going to be used
+# for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ unprovision_test
+ provision_test
+ activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+ local state=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+ ! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+ local port_count=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+ local lc=$1
+
+ devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+ local lc=$1
+ local port_count=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+lc_nested_devlink_dev_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+ local lc=$1
+ local state
+
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" == "unprovisioned" ]]; then
+ return
+ fi
+
+ log_info "Unprovisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc notype
+ check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+ state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+ [ "$state" == "unprovisioned" ]
+ check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+ local lc=$1
+ local type=$2
+ local state
+
+ log_info "Provisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc type $type
+ check_err $? "Failed trigger linecard $lc provisioning"
+
+ state=$(lc_wait_until_state_changes $lc "provisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to provision linecard $lc (timeout)"
+
+ [ "$state" == "provisioned" ] || [ "$state" == "active" ]
+ check_err $? "Failed to provision linecard $lc (state=$state)"
+
+ provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+ [ "$provisioned_type" == "$type" ]
+ check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+ # Wait for possible activation to make sure the state
+ # won't change after return from this function.
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+ RET=0
+ local lc
+
+ lc=$LC_SLOT
+ unprovision_one $lc
+ log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+
+supported_types_check()
+{
+ local lc=$1
+ local supported_types_count
+ local type_index
+ local lc_16x100_found=false
+
+ supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types | length")
+ [ $supported_types_count != 0 ]
+ check_err $? "No supported types found for linecard $lc"
+ for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+ do
+ type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types[$type_index]")
+ if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+ lc_16x100_found=true
+ break
+ fi
+ done
+ [ $lc_16x100_found = true ]
+ check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+ports_check()
+{
+ local lc=$1
+ local expected_port_count=$2
+ local port_count
+
+ port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+ $PROV_PORTS_INSTANTIATION_TIMEOUT)
+ [ $port_count != 0 ]
+ check_err $? "No port associated with linecard $lc"
+ [ $port_count == $expected_port_count ]
+ check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+lc_dev_info_provisioned_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_hw_revision
+ local running_ini_version
+
+ fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.fixed."hw.revision"')
+ check_err $? "Failed to get linecard $lc fixed.hw.revision"
+ log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+ running_ini_version=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.running."ini.version"')
+ check_err $? "Failed to get linecard $lc running.ini.version"
+ log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
+provision_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+ local nested_devlink_dev
+
+ lc=$LC_SLOT
+ supported_types_check $lc
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" != "unprovisioned" ]]; then
+ unprovision_one $lc
+ fi
+ provision_one $lc $LC_16X100G_TYPE
+ ports_check $lc $LC_16X100G_PORT_COUNT
+
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_provisioned_check $lc $nested_devlink_dev
+
+ log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+ ip link set $h1 up
+ ip link set $h2 up
+ ifaces_upped=true
+ setup_wait
+}
+
+lc_dev_info_active_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_device_fw_psid
+ local running_device_fw
+
+ fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.fixed" | \
+ jq -e -r '."fw.psid"')
+ check_err $? "Failed to get linecard $lc fixed fw PSID"
+ log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\""
+
+ running_device_fw=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.running.fw")
+ check_err $? "Failed to get linecard $lc running.fw.version"
+ log_info "Linecard $lc running.fw: \"$running_device_fw\""
+}
+
+activation_16x100G_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+ local nested_devlink_dev
+
+ lc=$LC_SLOT
+ type=$LC_16X100G_TYPE
+
+ unprovision_one $lc
+ provision_one $lc $type
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $ACTIVATION_TIMEOUT)
+ check_err $? "Failed to get linecard $lc activated (timeout)"
+
+ interface_check
+
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_active_check $lc $nested_devlink_dev
+
+ log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+ local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+ if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+ echo "SKIP: No linecard support found"
+ exit $ksft_skip
+ fi
+
+ if [ -z "$LC_SLOT" ]; then
+ echo "SKIP: \"LC_SLOT\" variable not provided"
+ exit $ksft_skip
+ fi
+
+ # Interfaces are not present during the script start,
+ # that's why we define NUM_NETIFS here so dummy
+ # implicit veth pairs are not created.
+ NUM_NETIFS=2
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ ifaces_upped=false
+}
+
+cleanup()
+{
+ if [ "$ifaces_upped" = true ] ; then
+ ip link set $h1 down
+ ip link set $h2 down
+ fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
index 89b55e946eed..36055279ba92 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
@@ -116,7 +116,7 @@ dev_del_test()
log_test "Device delete"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
index a37273473c1b..64153bbf95df 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -83,10 +83,12 @@ ALL_TESTS="
ptp_general_test
flow_action_sample_test
flow_action_trap_test
+ eapol_test
"
NUM_NETIFS=4
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
h1_create()
{
@@ -626,8 +628,7 @@ ipv6_redirect_test()
ptp_event_test()
{
- # PTP is only supported on Spectrum-1, for now.
- [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+ mlxsw_only_on_spectrum 1 || return
# PTP Sync (0)
devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
@@ -638,8 +639,7 @@ ptp_event_test()
ptp_general_test()
{
- # PTP is only supported on Spectrum-1, for now.
- [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+ mlxsw_only_on_spectrum 1 || return
# PTP Announce (b)
devlink_trap_stats_test "PTP General Message" "ptp_general" \
@@ -678,6 +678,27 @@ flow_action_trap_test()
tc qdisc del dev $rp1 clsact
}
+eapol_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:03:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:8E:"$( : ETH type
+ )
+ echo $p
+}
+
+eapol_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \
+ $(eapol_payload_get $h1mac) -p 100 -q
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index a4c2812e9807..8d4b2c6265b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -14,6 +14,7 @@ ALL_TESTS="
ingress_stp_filter_test
port_list_is_empty_test
port_loopback_filter_test
+ locked_port_test
"
NUM_NETIFS=4
source $lib_dir/tc_common.sh
@@ -420,6 +421,110 @@ port_loopback_filter_test()
port_loopback_filter_uc_test
}
+locked_port_miss_test()
+{
+ local trap_name="locked_port"
+ local smac=00:11:22:33:44:55
+
+ bridge link set dev $swp1 learning off
+ bridge link set dev $swp1 locked on
+
+ RET=0
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased before setting action to \"trap\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_err $? "Trap stats did not increase when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after setting action to \"drop\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ bridge fdb replace $smac dev $swp1 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after adding an FDB entry"
+
+ bridge fdb del $smac dev $swp1 master static vlan 1
+ bridge link set dev $swp1 locked off
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after unlocking port"
+
+ log_test "Locked port - FDB miss"
+
+ devlink_trap_action_set $trap_name "drop"
+ bridge link set dev $swp1 learning on
+}
+
+locked_port_mismatch_test()
+{
+ local trap_name="locked_port"
+ local smac=00:11:22:33:44:55
+
+ bridge link set dev $swp1 learning off
+ bridge link set dev $swp1 locked on
+
+ RET=0
+
+ bridge fdb replace $smac dev $swp2 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased before setting action to \"trap\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_err $? "Trap stats did not increase when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after setting action to \"drop\""
+
+ devlink_trap_action_set $trap_name "trap"
+ bridge link set dev $swp1 locked off
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after unlocking port"
+
+ bridge link set dev $swp1 locked on
+ bridge fdb replace $smac dev $swp1 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after replacing an FDB entry"
+
+ bridge fdb del $smac dev $swp1 master static vlan 1
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Locked port - FDB mismatch"
+
+ bridge link set dev $swp1 locked off
+ bridge link set dev $swp1 learning on
+}
+
+locked_port_test()
+{
+ locked_port_miss_test
+ locked_port_mismatch_test
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 4029833f7e27..db5806d189bb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -109,6 +109,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
@@ -592,7 +595,7 @@ irif_disabled_test()
log_test "Ingress RIF disabled"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
ip link set dev $rp1 nomaster
__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
ip link del dev br0 type bridge
@@ -642,7 +645,7 @@ erif_disabled_test()
log_test "Egress RIF disabled"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
ip link del dev br0 type bridge
devlink_trap_action_set $trap_name "drop"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..5d6d88b600f0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
@@ -199,7 +202,7 @@ mtu_value_is_too_small_test()
mtu_restore $rp2
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
}
@@ -232,7 +235,7 @@ __ttl_value_is_too_small_test()
log_test "TTL value is too small: TTL=$ttl_val"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
}
@@ -296,7 +299,7 @@ __mc_reverse_path_forwarding_test()
log_test "Multicast reverse path forwarding: $desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
}
@@ -344,7 +347,7 @@ __reject_route_test()
log_test "Reject route: $desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
ip route del unreachable $unreachable
tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower
}
@@ -446,6 +449,35 @@ __invalid_nexthop_test()
log_test "Unresolved neigh: nexthop does not exist: $desc"
}
+__invalid_nexthop_bucket_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local via_add=$1; shift
+ local trap_name="unresolved_neigh"
+
+ RET=0
+
+ # Check that route to nexthop that does not exist triggers
+ # unresolved_neigh
+ ip nexthop add id 1 via $via_add dev $rp2
+ ip nexthop add id 10 group 1 type resilient buckets 32
+ ip route add $dip nhid 10
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ ip route del $dip nhid 10
+ ip nexthop del id 10
+ ip nexthop del id 1
+ log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
unresolved_neigh_test()
{
__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +485,8 @@ unresolved_neigh_test()
__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
2001:db8:2::4
+ __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+ __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
}
vrf_without_routes_create()
@@ -508,7 +542,7 @@ ipv4_lpm_miss_test()
log_test "LPM miss: IPv4"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
vrf_without_routes_destroy
}
@@ -535,7 +569,7 @@ ipv6_lpm_miss_test()
log_test "LPM miss: IPv6"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
vrf_without_routes_destroy
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 508a702f0021..e212ad8ccef6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -44,64 +44,51 @@ source $lib_dir/devlink_lib.sh
h1_create()
{
- simple_if_init $h1 192.0.2.1/24
+ adf_simple_if_init $h1 192.0.2.1/24
+
mtu_set $h1 10000
+ defer mtu_restore $h1
ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
-}
-
-h1_destroy()
-{
- ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
-
- mtu_restore $h1
- simple_if_fini $h1 192.0.2.1/24
+ defer ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
}
h2_create()
{
- simple_if_init $h2 198.51.100.1/24
+ adf_simple_if_init $h2 198.51.100.1/24
+
mtu_set $h2 10000
+ defer mtu_restore $h2
ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
-}
-
-h2_destroy()
-{
- ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
-
- mtu_restore $h2
- simple_if_fini $h2 198.51.100.1/24
+ defer ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
}
router_create()
{
ip link set dev $rp1 up
+ defer ip link set dev $rp1 down
+
ip link set dev $rp2 up
+ defer ip link set dev $rp2 down
__addr_add_del $rp1 add 192.0.2.2/24
+ defer __addr_add_del $rp1 del 192.0.2.2/24
+
__addr_add_del $rp2 add 198.51.100.2/24
+ defer __addr_add_del $rp2 del 198.51.100.2/24
+
mtu_set $rp1 10000
+ defer mtu_restore $rp1
+
mtu_set $rp2 10000
+ defer mtu_restore $rp2
ip -4 route add blackhole 198.51.100.100
+ defer ip -4 route del blackhole 198.51.100.100
devlink trap set $DEVLINK_DEV trap blackhole_route action trap
-}
-
-router_destroy()
-{
- devlink trap set $DEVLINK_DEV trap blackhole_route action drop
-
- ip -4 route del blackhole 198.51.100.100
-
- mtu_restore $rp2
- mtu_restore $rp1
- __addr_add_del $rp2 del 198.51.100.2/24
- __addr_add_del $rp1 del 192.0.2.2/24
-
- ip link set dev $rp2 down
- ip link set dev $rp1 down
+ defer devlink trap set $DEVLINK_DEV trap blackhole_route action drop
}
setup_prepare()
@@ -114,7 +101,10 @@ setup_prepare()
rp1_mac=$(mac_get $rp1)
- vrf_prepare
+ # Reload to ensure devlink-trap settings are back to default.
+ defer devlink_reload
+
+ adf_vrf_prepare
h1_create
h2_create
@@ -122,21 +112,6 @@ setup_prepare()
router_create
}
-cleanup()
-{
- pre_cleanup
-
- router_destroy
-
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-
- # Reload to ensure devlink-trap settings are back to default.
- devlink_reload
-}
-
rate_limits_test()
{
RET=0
@@ -214,7 +189,10 @@ __rate_test()
# by the policer. Make sure measured received rate is about 1000 pps
log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+ defer_scope_push
+
start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+ defer stop_traffic $!
sleep 5 # Take measurements when rate is stable
@@ -229,13 +207,16 @@ __rate_test()
check_err $? "Expected non-zero policer drop rate, got 0"
log_info "Measured policer drop rate of $drop_rate pps"
- stop_traffic
+ defer_scope_pop
# Send packets at a rate of 1000 pps and make sure they are not dropped
# by the policer
log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+ defer_scope_push
+
start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+ defer stop_traffic $!
sleep 5 # Take measurements when rate is stable
@@ -244,7 +225,7 @@ __rate_test()
check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
log_info "Measured policer drop rate of $drop_rate pps"
- stop_traffic
+ defer_scope_pop
# Unbind the policer and send packets at highest possible rate. Make
# sure they are not dropped by the policer and that the measured
@@ -253,7 +234,10 @@ __rate_test()
devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+ defer_scope_push
+
start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+ defer stop_traffic $!
rate=$(trap_rate_get)
(( rate > 1000 ))
@@ -265,20 +249,24 @@ __rate_test()
check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
log_info "Measured policer drop rate of $drop_rate pps"
- stop_traffic
+ defer_scope_pop
log_test "Trap policer rate"
}
rate_test()
{
- local id
+ local last_policer=$(devlink -j -p trap policer show |
+ jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
- for id in $(devlink_trap_policer_ids_get); do
- echo
- log_info "Running rate test for policer $id"
- __rate_test $id
- done
+ log_info "Running rate test for policer 1"
+ __rate_test 1
+
+ log_info "Running rate test for policer $((last_policer / 2))"
+ __rate_test $((last_policer / 2))
+
+ log_info "Running rate test for policer $last_policer"
+ __rate_test $last_policer
}
__burst_test()
@@ -342,13 +330,17 @@ __burst_test()
burst_test()
{
- local id
+ local last_policer=$(devlink -j -p trap policer show |
+ jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
+
+ log_info "Running burst test for policer 1"
+ __burst_test 1
+
+ log_info "Running burst test for policer $((last_policer / 2))"
+ __burst_test $((last_policer / 2))
- for id in $(devlink_trap_policer_ids_get); do
- echo
- log_info "Running burst size test for policer $id"
- __burst_test $id
- done
+ log_info "Running burst test for policer $last_policer"
+ __burst_test $last_policer
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 8817851da7a9..4ac1dae92d0f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -13,7 +13,7 @@
# |
# +-------------------|-----+
# | SW1 | |
-# | $swp1 + |
+# | $swp1 + |
# | 192.0.2.2/28 |
# | |
# | + g1a (gre) |
@@ -27,8 +27,8 @@
# |
# +--|----------------------+
# | | VRF2 |
-# | + $rp2 |
-# | 198.51.100.2/28 |
+# | + $rp2 |
+# | 198.51.100.2/28 |
# +-------------------------+
lib_dir=$(dirname $0)/../../../net/forwarding
@@ -116,12 +116,16 @@ cleanup()
forwarding_restore
}
-ecn_payload_get()
+ipip_payload_get()
{
+ local flags=$1; shift
+ local key=$1; shift
+
p=$(:
- )"0"$( : GRE flags
+ )"$flags"$( : GRE flags
)"0:00:"$( : Reserved + version
)"08:00:"$( : ETH protocol type
+ )"$key"$( : Key
)"4"$( : IP version
)"5:"$( : IHL
)"00:"$( : IP TOS
@@ -137,6 +141,11 @@ ecn_payload_get()
echo $p
}
+ecn_payload_get()
+{
+ echo $(ipip_payload_get "0")
+}
+
ecn_decap_test()
{
local trap_name="decap_error"
@@ -167,35 +176,10 @@ ecn_decap_test()
log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
}
-ipip_payload_get()
-{
- local flags=$1; shift
- local key=$1; shift
-
- p=$(:
- )"$flags"$( : GRE flags
- )"0:00:"$( : Reserved + version
- )"08:00:"$( : ETH protocol type
- )"$key"$( : Key
- )"4"$( : IP version
- )"5:"$( : IHL
- )"00:"$( : IP TOS
- )"00:14:"$( : IP total length
- )"00:00:"$( : IP identification
- )"20:00:"$( : IP flags + frag off
- )"30:"$( : IP TTL
- )"01:"$( : IP proto
- )"E7:E6:"$( : IP header csum
- )"C0:00:01:01:"$( : IP saddr : 192.0.1.1
- )"C0:00:02:01:"$( : IP daddr : 192.0.2.1
- )
- echo $p
-}
-
no_matching_tunnel_test()
{
local trap_name="decap_error"
@@ -223,7 +207,7 @@ no_matching_tunnel_test()
log_test "$desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
}
@@ -239,7 +223,8 @@ decap_error_test()
no_matching_tunnel_test "Decap error: Source IP check failed" \
192.0.2.68 "0"
no_matching_tunnel_test \
- "Decap error: Key exists but was not expected" $sip "2" ":E9:"
+ "Decap error: Key exists but was not expected" $sip "2" \
+ "00:00:00:E9:"
# Destroy the tunnel and create new one with key
__addr_add_del g1 del 192.0.2.65/32
@@ -251,7 +236,8 @@ decap_error_test()
no_matching_tunnel_test \
"Decap error: Key does not exist but was expected" $sip "0"
no_matching_tunnel_test \
- "Decap error: Packet has a wrong key field" $sip "2" "E8:"
+ "Decap error: Packet has a wrong key field" $sip "2" \
+ "00:00:00:E8:"
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
new file mode 100755
index 000000000000..fce885184404
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|-----+
+# | SW1 | |
+# | $swp1 + |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 |
+# | tos=inherit |
+# | |
+# | + $rp1 |
+# | | 2001:db8:10::1/64 |
+# +--|----------------------+
+# |
+# +--|----------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 2001:db8:10::2/64 |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 2001:db8:10::2/64
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:10::2/64
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ __addr_add_del $swp1 add 2001:db8:1::2/64
+ tc qdisc add dev $swp1 clsact
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit
+ ip link set dev g1 up
+ __addr_add_del g1 add 2001:db8:3::1/128
+
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 2001:db8:10::1/64
+}
+
+switch_destroy()
+{
+ __addr_add_del $rp1 del 2001:db8:10::1/64
+ ip link set dev $rp1 down
+
+ __addr_add_del g1 del 2001:db8:3::1/128
+ ip link set dev g1 down
+ tunnel_destroy g1
+
+ tc qdisc del dev $swp1 clsact
+ __addr_add_del $swp1 del 2001:db8:1::2/64
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+ipip_payload_get()
+{
+ local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ local flags=$1; shift
+ local key=$1; shift
+
+ p=$(:
+ )"$flags"$( : GRE flags
+ )"0:00:"$( : Reserved + version
+ )"86:dd:"$( : ETH protocol type
+ )"$key"$( : Key
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:00:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )
+ echo $p
+}
+
+ecn_payload_get()
+{
+ echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
+ action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
+ tos=$outer_tos,next=47,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill_process $mz_pid
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local sip=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ipip_payload_get "$@")
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill_process $mz_pid
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ # Correct source IP - the remote address
+ local sip=2001:db8:3::2
+
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ no_matching_tunnel_test "Decap error: Source IP check failed" \
+ 2001:db8:4::2 "0"
+ no_matching_tunnel_test \
+ "Decap error: Key exists but was not expected" $sip "2" \
+ "00:00:00:E9:"
+
+ # Destroy the tunnel and create new one with key
+ __addr_add_del g1 del 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit key 233
+ __addr_add_del g1 add 2001:db8:3::1/128
+
+ no_matching_tunnel_test \
+ "Decap error: Key does not exist but was expected" $sip "0"
+ no_matching_tunnel_test \
+ "Decap error: Packet has a wrong key field" $sip "2" \
+ "00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index 10e0f3dbc930..7aca8e5922cf 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -183,7 +183,7 @@ ecn_decap_test()
log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
}
@@ -217,9 +217,11 @@ short_payload_get()
dest_mac=$(mac_get $h1)
p=$(:
)"08:"$( : VXLAN flags
- )"01:00:00:"$( : VXLAN reserved
+ )"00:00:00:"$( : VXLAN reserved
)"00:03:e8:"$( : VXLAN VNI : 1000
)"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
)
echo $p
}
@@ -251,7 +253,7 @@ corrupted_packet_test()
log_test "$desc"
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
}
@@ -263,7 +265,8 @@ decap_error_test()
corrupted_packet_test "Decap error: Reserved bits in use" \
"reserved_bits_payload_get"
- corrupted_packet_test "Decap error: No L2 header" "short_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
}
mc_smac_payload_get()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
new file mode 100755
index 000000000000..4599c331240b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|-------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+ overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx1 master br1
+ ip link set dev vx1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:3::1/64
+}
+
+switch_destroy()
+{
+ ip address del dev $rp1 2001:db8:3::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev vx1 down
+ ip link set dev vx1 nomaster
+ ip link del dev vx1
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ forwarding_restore
+ vrf_cleanup
+}
+
+ecn_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp \
+ sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill_process $mz_pid
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"01:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+short_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )
+ echo $p
+}
+
+corrupted_packet_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local payload_get=$1; shift
+ local mz_pid
+
+ RET=0
+
+ # In case of too short packet, there is no any inner packet,
+ # so the matching will always succeed
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \
+ action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$($payload_get)
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill_process $mz_pid
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ corrupted_packet_test "Decap error: Reserved bits in use" \
+ "reserved_bits_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local source_mac="01:02:03:04:05:06"
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+ local trap_name="overlay_smac_is_mc"
+ local mz_pid
+
+ RET=0
+
+ # The matching will be checked on devlink_trap_drop_test()
+ # and the filter will be removed on devlink_trap_drop_cleanup()
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_mac 01:02:03:04:05:06 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(mc_smac_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $swp1 101
+
+ log_test "Overlay source MAC is multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
new file mode 100755
index 000000000000..a5c2aec52898
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test VLAN classification after routing and verify that the order of
+# configuration does not impact switch behavior. Verify that {RIF, Port}->VID
+# mapping is added correctly for existing {Port, VID}->FID mapping and that
+# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW $swp1 + + $swp2 |
+# | | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.20 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.20 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 20 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 20
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 20 "" 192.0.2.17/28
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20)
+}
+
+switch_destroy()
+{
+ vlan_destroy $swp3 20
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for swp1.10, then add a RIF and verify that
+ # packets get the correct VID after routing.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # get the correct VID after routing.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
index 91891b9418d7..fe905a7f34b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
lib_dir=$(dirname $0)/../../../net/forwarding
+ethtool_lib_dir=$(dirname $0)/../hw
ALL_TESTS="
autoneg
@@ -11,7 +12,7 @@ ALL_TESTS="
NUM_NETIFS=2
: ${TIMEOUT:=30000} # ms
source $lib_dir/lib.sh
-source $lib_dir/ethtool_lib.sh
+source $ethtool_lib_dir/ethtool_lib.sh
setup_prepare()
{
@@ -24,8 +25,8 @@ setup_prepare()
busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
check_err $? "ports did not come up"
- local lanes_exist=$(ethtool $swp1 | grep 'Lanes:')
- if [[ -z $lanes_exist ]]; then
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ if [[ $? -ne 0 ]]; then
log_test "SKIP: driver does not support lanes setting"
exit 1
fi
@@ -122,8 +123,9 @@ autoneg()
ethtool_set $swp1 speed $max_speed lanes $lanes
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "$lanes lanes is autonegotiated"
@@ -160,8 +162,9 @@ autoneg_force_mode()
ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "Autoneg off, $lanes lanes detected during force mode"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index 7a0a99c1d22f..6fd422d38fe8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -35,7 +35,9 @@ netdev_pre_up_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link add name vx1 up type vxlan id 1000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -46,7 +48,9 @@ netdev_pre_up_test()
ip link set dev $swp1 master br1
check_err $?
- ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link add name vx2 up type vxlan id 2000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -81,7 +85,9 @@ vxlan_vlan_add_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
ip link add name vx1 up type vxlan id 1000 \
@@ -117,7 +123,9 @@ vxlan_bridge_create_test()
dstport 4789 tos inherit ttl 100
# Test with VLAN-aware bridge.
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev vx1 master br1
@@ -142,8 +150,12 @@ bridge_create_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1
- ip link add name br2 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
+ ip link add name br2 type bridge vlan_filtering 1
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link set dev $swp1 master br1
check_err $?
diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
new file mode 100755
index 000000000000..941ba4c485c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ l3_monitor_test
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+ pre_cleanup
+}
+
+l3_monitor_test()
+{
+ hw_stats_monitor_test $swp l3 \
+ "ip addr add dev $swp 192.0.2.1/28" \
+ "ip addr del dev $swp 192.0.2.1/28"
+}
+
+trap cleanup EXIT
+
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
new file mode 100755
index 000000000000..7d7f862c809c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -0,0 +1,263 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mappings and that new mappings use the correct RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW $swp1 + + $swp2 |
+# | | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.10 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.10 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 10 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 10
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 10 "" 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ vlan_destroy $swp3 10
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for $swp1.10, then add a RIF and verify
+ # that packets can be routed via the existing mapping.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # can be routed via the new mapping.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
new file mode 100755
index 000000000000..577293bab88b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mapping and that packets can be routed via port which is added after the FID
+# already has a RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1 + + $swp2 | |
+# | | | |
+# | | br0 | |
+# | +--------------------------------------------------------+ |
+# | | |
+# | br0.10 |
+# | 192.0.2.2/28 |
+# | |
+# | |
+# | $swp3 + |
+# | 192.0.2.17/28 | |
+# +----------------|-------------------------------------------+
+# |
+# +----------------|--+
+# | $h3 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +-------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vid_map_rif
+ rif_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.18/28
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+ simple_if_fini $h3 192.0.2.18/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ ip link set dev $swp2 master br0
+ bridge vlan add vid 10 dev $swp2
+
+ ip link set dev $swp3 up
+ __addr_add_del $swp3 add 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ __addr_add_del $swp3 del 192.0.2.17/28
+ ip link set dev $swp3 down
+
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ vlan_create br0 10 "" 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ vlan_destroy br0 10
+}
+
+vid_map_rif()
+{
+ RET=0
+
+ # First add VID->FID for vlan 10, then add a RIF and verify that
+ # packets can be routed via the existing mapping.
+ bridge vlan add vid 10 dev br0 self
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ bridge_rif_add
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VID->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+ bridge vlan del vid 10 dev br0 self
+}
+
+rif_vid_map()
+{
+ RET=0
+
+ # Using 802.1Q, there is only one VID->FID map for each VID. That means
+ # that we cannot really check adding a new map for existing FID with a
+ # RIF. Verify that packets can be routed via port which is added after
+ # the FID already has a RIF, although in practice there is no new
+ # mapping in the hardware.
+ bridge vlan add vid 10 dev br0 self
+ bridge_rif_add
+
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add port to VID->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+
+ bridge_rif_del
+ bridge vlan del vid 10 dev br0 self
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
new file mode 100755
index 000000000000..90450216a10d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing after VXLAN decapsulation and verify that the order of
+# configuration does not impact switch behavior. Verify that RIF is added
+# correctly for existing mapping and that new mapping uses the correct RIF.
+
+# +---------------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|----------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 br1 | |
+# | | vid 10 pvid untagged | |
+# | | | |
+# | | | |
+# | | + vx4001 | |
+# | | local 192.0.2.17 | |
+# | | remote 192.0.2.18 | |
+# | | id 104001 | |
+# | | dstport $VXPORT | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | +----------------------------------+------------------------------------+ |
+# | | |
+# | +----------------------------------|------------------------------------+ |
+# | | | | |
+# | | +-------------------------------+---------------------------------+ | |
+# | | | | | |
+# | | + vlan10 vlan4001 + | |
+# | | 192.0.2.2/28 | |
+# | | | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 198.51.100.1/24 192.0.2.17/32 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | v$rp2 |
+# | + $rp2 |
+# | 198.51.100.2/24 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vni_fid_map_rif
+ rif_vni_fid_map
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 198.51.100.1/24
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+
+ tc qdisc add dev $swp1 clsact
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 192.0.2.17 dstport $VXPORT \
+ nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+
+ ip address add 192.0.2.17/32 dev lo
+
+ # Create SVIs.
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1)
+
+ ip address add 192.0.2.2/28 dev vlan10
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
+ bridge vlan del vid 4001 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 192.0.2.17/32 dev lo
+
+ tc qdisc del dev $swp1 clsact
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx4001 nomaster
+
+ ip link set dev vx4001 down
+ ip link del dev vx4001
+
+ ip address del dev $rp1 198.51.100.1/24
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 198.51.100.2/24
+
+ ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+}
+
+vrp2_destroy()
+{
+ ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+
+ simple_if_fini $rp2 198.51.100.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrp2_destroy
+
+ switch_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+payload_get()
+{
+ local dest_mac=$(mac_get vlan4001)
+ local src_mac=$(mac_get $rp1)
+
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"01:96:41:"$( : VXLAN VNI : 104001
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$src_mac:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"3f:49:"$( : IP identification
+ )"00:00:"$( : IP flags + frag off
+ )"3f:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"50:21:"$( : IP header csum
+ )"c6:33:64:0a:"$( : IP saddr: 198.51.100.10
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+vlan_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+}
+
+vlan_rif_del()
+{
+ ip link del dev vlan4001
+}
+
+vni_fid_map_rif()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add VNI->FID mapping to the FID of VLAN 4001
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ # Add a RIF to the FID with VNI->FID mapping
+ vlan_rif_add
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VNI->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+rif_vni_fid_map()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add a RIF to the FID of VLAN 4001
+ vlan_rif_add
+
+ # Add VNI->FID mapping to FID with a RIF
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add VNI->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
index 76f1ab4898d9..e1ad623146d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh
source $lib_dir/mirror_gre_lib.sh
source $lib_dir/mirror_gre_topo_lib.sh
+ALL_TESTS="
+ test_keyful
+ test_soft
+ test_tos_fixed
+ test_ttl_inherit
+"
+
setup_keyful()
{
tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
@@ -118,15 +125,15 @@ test_span_gre_ttl_inherit()
RET=0
ip link set dev $tundev type $type ttl inherit
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type ttl 100
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on TTL of inherit ($tcflags)"
+ log_test "$what: no offload on TTL of inherit"
}
test_span_gre_tos_fixed()
@@ -138,61 +145,49 @@ test_span_gre_tos_fixed()
RET=0
ip link set dev $tundev type $type tos 0x10
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type tos inherit
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on a fixed TOS ($tcflags)"
+ log_test "$what: no offload on a fixed TOS"
}
test_span_failable()
{
- local should_fail=$1; shift
local tundev=$1; shift
local what=$1; shift
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- if ((should_fail)); then
- fail_test_span_gre_dir $tundev ingress
- else
- quick_test_span_gre_dir $tundev ingress
- fi
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: should_fail=$should_fail ($tcflags)"
+ log_test "fail $what"
}
-test_failable()
+test_keyful()
{
- local should_fail=$1; shift
-
- test_span_failable $should_fail gt6-key "mirror to keyful gretap"
- test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+ test_span_failable gt6-key "mirror to keyful gretap"
}
-test_sw()
+test_soft()
{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- test_failable 0
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ test_span_failable gt6-soft "mirror to gretap w/ soft underlay"
}
-test_hw()
+test_tos_fixed()
{
- test_failable 1
-
test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+}
+
+test_ttl_inherit()
+{
test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
}
@@ -202,16 +197,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-if ! tc_offload_check; then
- check_err 1 "Could not test offloaded functionality"
- log_test "mlxsw-specific tests for mirror to gretap"
- exit
-fi
-
-tcflags="skip_hw"
-test_sw
-
-tcflags="skip_sw"
-test_hw
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index 6f3a70df63bc..d43093310e23 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -79,7 +79,7 @@ mirror_gre_tunnels_create()
cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
filter add dev $swp1 ingress pref 1000 \
protocol ipv6 \
- flower $tcflags dst_ip $match_dip \
+ flower skip_sw dst_ip $match_dip \
action mirred egress mirror dev $tun
EOF
done
@@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy()
done
}
-__mirror_gre_test()
+mirror_gre_test()
{
local count=$1; shift
local should_fail=$1; shift
@@ -120,30 +120,17 @@ __mirror_gre_test()
sleep 5
for ((i = 0; i < count; ++i)); do
+ local sip=$(mirror_gre_ipv6_addr 1 $i)::1
local dip=$(mirror_gre_ipv6_addr 1 $i)::2
local htun=h3-gt6-$i
local message
icmp6_capture_install $htun
- mirror_test v$h1 "" $dip $htun 100 10
+ mirror_test v$h1 $sip $dip $htun 100 10
icmp6_capture_uninstall $htun
done
}
-mirror_gre_test()
-{
- local count=$1; shift
- local should_fail=$1; shift
-
- if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
- check_err 1 "Could not test offloaded functionality"
- return
- fi
-
- tcflags="skip_sw"
- __mirror_gre_test $count $should_fail
-}
-
mirror_gre_setup_prepare()
{
h1=${NETIFS[p1]}
@@ -164,6 +151,7 @@ mirror_gre_setup_prepare()
simple_if_init $h3
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
index cbe50f260a40..48395cfd4f95 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -11,3 +11,67 @@ if [[ ! -v MLXSW_CHIP ]]; then
exit 1
fi
fi
+
+MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in
+ mlxsw_spectrum)
+ echo 1 ;;
+ mlxsw_spectrum*)
+ echo ${MLXSW_CHIP#mlxsw_spectrum} ;;
+ *)
+ echo "Couldn't determine Spectrum chip revision." \
+ > /dev/stderr ;;
+ esac)
+
+mlxsw_on_spectrum()
+{
+ local rev=$1; shift
+ local op="=="
+ local rev2=${rev%+}
+
+ if [[ $rev2 != $rev ]]; then
+ op=">="
+ fi
+
+ ((MLXSW_SPECTRUM_REV $op rev2))
+}
+
+__mlxsw_only_on_spectrum()
+{
+ local rev=$1; shift
+ local caller=$1; shift
+ local src=$1; shift
+
+ if ! mlxsw_on_spectrum "$rev"; then
+ log_test_xfail $src:$caller "(Spectrum-$rev only)"
+ return 1
+ fi
+}
+
+mlxsw_only_on_spectrum()
+{
+ local caller=${FUNCNAME[1]}
+ local src=${BASH_SOURCE[1]}
+ local rev
+
+ for rev in "$@"; do
+ if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+mlxsw_max_descriptors_get()
+{
+ local spectrum_rev=$MLXSW_SPECTRUM_REV
+
+ case $spectrum_rev in
+ 1) echo 81920 ;;
+ 2) echo 136960 ;;
+ 3) echo 204800 ;;
+ 4) echo 220000 ;;
+ *) echo "Unknown max descriptors for chip revision." > /dev/stderr
+ return 1 ;;
+ esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
index f02d83e94576..fca0e1e642c6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
@@ -83,7 +83,8 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge mcast_snooping 0
+ ip link add name br0 address $(mac_get $swp1) \
+ type bridge mcast_snooping 0
ip link set dev br0 up
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+ RET=0
+
+ local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+ local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+ if [ $bus != "pci" ]; then
+ check_err 1 "devlink device is not a PCI device"
+ log_test "pci reset"
+ return
+ fi
+
+ if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+ check_err 1 "reset is not supported"
+ log_test "pci reset"
+ return
+ fi
+
+ [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+ check_err $? "only \"bus\" reset method should be supported"
+
+ local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ echo 1 > /sys/bus/pci/devices/$bdf/reset
+ check_err $? "reset failed"
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ [[ $ifindex_pre != $ifindex_post ]]
+ check_err $? "reset not performed"
+
+ log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
new file mode 100755
index 000000000000..b1f0781f6b25
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that filters that match on the same port range, but with different
+# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by
+# observing port range registers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_range_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+port_range_occ_get()
+{
+ devlink_resource_occ_get port_range_registers
+}
+
+port_range_occ_test()
+{
+ RET=0
+
+ local occ=$(port_range_occ_get)
+
+ # Two port range registers are used, for source and destination port
+ # ranges.
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 104 flower
+ tc filter del dev $swp1 ingress pref 2 handle 103 flower
+ tc filter del dev $swp1 ingress pref 1 handle 102 flower
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $occ"
+
+ log_test "port range occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
new file mode 100644
index 000000000000..2a70840ff14b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+
+PORT_RANGE_NUM_NETIFS=2
+
+port_range_h1_create()
+{
+ simple_if_init $h1
+}
+
+port_range_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+port_range_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+port_range_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+port_range_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $batch_file <<-EOF
+ filter add dev $swp1 ingress \
+ prot ipv4 \
+ pref 1000 \
+ flower skip_sw \
+ ip_proto udp dst_port 1-$((100 + i)) \
+ action pass
+ EOF
+ done
+
+ tc -b $batch_file
+ check_err_fail $should_fail $? "Rule insertion"
+
+ rm -f $batch_file
+}
+
+__port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ port_range_rules_create $count $should_fail
+
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
+ ((offload_count == count))
+ check_err_fail $should_fail $? "port range offload count"
+}
+
+port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __port_range_test $count $should_fail
+}
+
+port_range_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ port_range_h1_create
+ port_range_switch_create
+}
+
+port_range_cleanup()
+{
+ pre_cleanup
+
+ port_range_switch_destroy
+ port_range_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index f813ffefc07e..1e9a4aff76a2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -7,6 +7,8 @@
PORT_NUM_NETIFS=0
+declare -a unsplit
+
port_setup_prepare()
{
:
@@ -20,12 +22,12 @@ port_cleanup()
devlink port unsplit $port
check_err $? "Did not unsplit $netdev"
done
+ unsplit=()
}
split_all_ports()
{
local should_fail=$1; shift
- local -a unsplit
# Loop over the splittable netdevs and create tuples of netdev along
# with its width. For example:
@@ -55,10 +57,6 @@ port_test()
| jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
[[ $occ -eq $max_ports ]]
- if [[ $should_fail -eq 0 ]]; then
- check_err $? "Mismatch ports number: Expected $max_ports, got $occ."
- else
- check_err_fail $should_fail $? "Reached more ports than expected"
- fi
+ check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)"
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
index 7edaed8eb86a..00d55b0e98c1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -48,6 +48,7 @@ create_vlan_upper_on_top_of_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -88,6 +89,7 @@ create_8021ad_vlan_upper_on_top_bridge_port()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev $swp1 master br0
ip link set dev br0 up
@@ -155,6 +157,7 @@ create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -177,6 +180,7 @@ create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name bond1 type bond mode 802.3ad
@@ -203,6 +207,7 @@ enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name $swp1.100 link $swp1 type vlan id 100
@@ -225,6 +230,7 @@ enslave_lag_with_vlan_upper_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name bond1 type bond mode 802.3ad
@@ -252,6 +258,7 @@ add_ip_address_to_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
@@ -273,6 +280,7 @@ switch_bridge_protocol_from_8021q_to_8021ad()
ip link add dev br0 type bridge vlan_filtering 1 \
vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index 71066bc4b886..5492fa5550d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -5,18 +5,18 @@
# prioritized according to the default priority specified at the port.
# rx_octets_prio_* counters are used to verify the prioritization.
#
-# +-----------------------+
-# | H1 |
-# | + $h1 |
-# | | 192.0.2.1/28 |
-# +----|------------------+
+# +----------------------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|-----------------------------+
# |
-# +----|------------------+
-# | SW | |
-# | + $swp1 |
-# | 192.0.2.2/28 |
-# | APP=<prio>,1,0 |
-# +-----------------------+
+# +----|-----------------------------+
+# | SW | |
+# | + $swp1 |
+# | 192.0.2.2/28 |
+# | dcb app default-prio <prio> |
+# +----------------------------------+
ALL_TESTS="
ping_ipv4
@@ -29,42 +29,6 @@ NUM_NETIFS=2
: ${HIT_TIMEOUT:=1000} # ms
source $lib_dir/lib.sh
-declare -a APP
-
-defprio_install()
-{
- local dev=$1; shift
- local prio=$1; shift
- local app="app=$prio,1,0"
-
- lldptool -T -i $dev -V APP $app >/dev/null
- lldpad_app_wait_set $dev
- APP[$prio]=$app
-}
-
-defprio_uninstall()
-{
- local dev=$1; shift
- local prio=$1; shift
- local app=${APP[$prio]}
-
- lldptool -T -i $dev -V APP -d $app >/dev/null
- lldpad_app_wait_del
- unset APP[$prio]
-}
-
-defprio_flush()
-{
- local dev=$1; shift
- local prio
-
- if ((${#APP[@]})); then
- lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
- fi
- lldpad_app_wait_del
- APP=()
-}
-
h1_create()
{
simple_if_init $h1 192.0.2.1/28
@@ -83,7 +47,7 @@ switch_create()
switch_destroy()
{
- defprio_flush $swp1
+ dcb app flush dev $swp1 default-prio
ip addr del dev $swp1 192.0.2.2/28
ip link set dev $swp1 down
}
@@ -124,7 +88,7 @@ __test_defprio()
RET=0
- defprio_install $swp1 $prio_install
+ dcb app add dev $swp1 default-prio $prio_install
local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
mausezahn -q $h1 -d 100m -c 10 -t arp reply
@@ -134,7 +98,7 @@ __test_defprio()
check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
log_test "Default priority $prio_install/$prio_observe"
- defprio_uninstall $swp1 $prio_install
+ dcb app del dev $swp1 default-prio $prio_install
}
test_defprio()
@@ -145,7 +109,7 @@ test_defprio()
__test_defprio $prio $prio
done
- defprio_install $swp1 3
+ dcb app add dev $swp1 default-prio 3
__test_defprio 0 3
__test_defprio 1 3
__test_defprio 2 3
@@ -153,7 +117,7 @@ test_defprio()
__test_defprio 5 5
__test_defprio 6 6
__test_defprio 7 7
- defprio_uninstall $swp1 3
+ dcb app del dev $swp1 default-prio 3
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..914c63d6318a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -20,7 +20,7 @@
# | SW | | |
# | +-|----------------------------------------------------------------|-+ |
# | | + $swp1 BR $swp2 + | |
-# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | |
+# | | dcb dscp-prio 10:0...17:7 dcb dscp-prio 20:0...27:7 | |
# | +--------------------------------------------------------------------+ |
# +---------------------------------------------------------------------------+
@@ -62,38 +62,28 @@ h2_destroy()
simple_if_fini $h2 192.0.2.2/28
}
-dscp_map()
-{
- local base=$1; shift
- local prio
-
- for prio in {0..7}; do
- echo app=$prio,5,$((base + prio))
- done
-}
-
switch_create()
{
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
ip link set dev $swp2 up
- lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
- lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
- lldpad_app_wait_set $swp1
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+ dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
}
switch_destroy()
{
- lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
- lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
+ dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index 4cb2aa65278a..f6c23f84423e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -94,16 +94,6 @@ h2_destroy()
simple_if_fini $h2 192.0.2.18/28
}
-dscp_map()
-{
- local base=$1; shift
- local prio
-
- for prio in {0..7}; do
- echo app=$prio,5,$((base + prio))
- done
-}
-
switch_create()
{
simple_if_init $swp1 192.0.2.2/28
@@ -112,17 +102,14 @@ switch_create()
tc qdisc add dev $swp1 clsact
tc qdisc add dev $swp2 clsact
- lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
- lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
- lldpad_app_wait_set $swp1
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+ dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
}
switch_destroy()
{
- lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
- lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+ dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
@@ -265,13 +252,11 @@ test_dscp_leftover()
{
echo "Test that last removed DSCP rule is deconfigured correctly"
- lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
__test_update 0 zero
- lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index e9f8718af979..9ca340c5f3a6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -57,66 +57,60 @@ source qos_lib.sh
h1_create()
{
- simple_if_init $h1
+ adf_simple_if_init $h1
+
mtu_set $h1 10000
+ defer mtu_restore $h1
vlan_create $h1 111 v$h1 192.0.2.33/28
+ defer vlan_destroy $h1 111
ip link set dev $h1.111 type vlan egress-qos-map 0:1
}
-h1_destroy()
-{
- vlan_destroy $h1 111
-
- mtu_restore $h1
- simple_if_fini $h1
-}
-
h2_create()
{
- simple_if_init $h2
+ adf_simple_if_init $h2
+
mtu_set $h2 10000
+ defer mtu_restore $h2
vlan_create $h2 222 v$h2 192.0.2.65/28
+ defer vlan_destroy $h2 222
ip link set dev $h2.222 type vlan egress-qos-map 0:2
}
-h2_destroy()
-{
- vlan_destroy $h2 222
-
- mtu_restore $h2
- simple_if_fini $h2
-}
-
h3_create()
{
- simple_if_init $h3
+ adf_simple_if_init $h3
+
mtu_set $h3 10000
+ defer mtu_restore $h3
vlan_create $h3 111 v$h3 192.0.2.34/28
- vlan_create $h3 222 v$h3 192.0.2.66/28
-}
-
-h3_destroy()
-{
- vlan_destroy $h3 222
- vlan_destroy $h3 111
+ defer vlan_destroy $h3 111
- mtu_restore $h3
- simple_if_fini $h3
+ vlan_create $h3 222 v$h3 192.0.2.66/28
+ defer vlan_destroy $h3 222
}
switch_create()
{
ip link set dev $swp1 up
+ defer ip link set dev $swp1 down
+
mtu_set $swp1 10000
+ defer mtu_restore $swp1
ip link set dev $swp2 up
+ defer ip link set dev $swp2 down
+
mtu_set $swp2 10000
+ defer mtu_restore $swp2
# prio n -> TC n, strict scheduling
lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7
+ defer lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
+
lldptool -T -i $swp3 -V ETS-CFG tsa=$(
)"0:strict,"$(
)"1:strict,"$(
@@ -129,80 +123,90 @@ switch_create()
sleep 1
ip link set dev $swp3 up
+ defer ip link set dev $swp3 down
+
mtu_set $swp3 10000
- ethtool -s $swp3 speed 1000 autoneg off
+ defer mtu_restore $swp3
+
+ tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \
+ burst 128K limit 1G
+ defer tc qdisc del dev $swp3 root handle 101:
vlan_create $swp1 111
+ defer vlan_destroy $swp1 111
+
vlan_create $swp2 222
+ defer vlan_destroy $swp2 222
+
vlan_create $swp3 111
+ defer vlan_destroy $swp3 111
+
vlan_create $swp3 222
+ defer vlan_destroy $swp3 222
+
+ ip link add name br111 type bridge vlan_filtering 0
+ defer ip link del dev br111
+ ip link set dev br111 addrgenmode none
+
+ ip link set dev br111 up
+ defer ip link set dev br111 down
- ip link add name br111 up type bridge vlan_filtering 0
ip link set dev $swp1.111 master br111
+ defer ip link set dev $swp1.111 nomaster
+
ip link set dev $swp3.111 master br111
+ defer ip link set dev $swp3.111 nomaster
+
+ ip link add name br222 type bridge vlan_filtering 0
+ defer ip link del dev br222
+ ip link set dev br222 addrgenmode none
+
+ ip link set dev br222 up
+ defer ip link set dev br222 down
- ip link add name br222 up type bridge vlan_filtering 0
ip link set dev $swp2.222 master br222
+ defer ip link set dev $swp2.222 nomaster
+
ip link set dev $swp3.222 master br222
+ defer ip link set dev $swp3.222 nomaster
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
devlink_pool_size_thtype_save 0
devlink_pool_size_thtype_set 0 dynamic 10000000
+ defer devlink_pool_size_thtype_restore 0
+
devlink_pool_size_thtype_save 4
devlink_pool_size_thtype_set 4 dynamic 10000000
+ defer devlink_pool_size_thtype_restore 4
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 6
+ defer devlink_port_pool_th_restore $swp1 0
+
devlink_tc_bind_pool_th_save $swp1 1 ingress
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+ defer devlink_tc_bind_pool_th_restore $swp1 1 ingress
devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 6
+ defer devlink_port_pool_th_restore $swp2 0
+
devlink_tc_bind_pool_th_save $swp2 2 ingress
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+ defer devlink_tc_bind_pool_th_restore $swp2 2 ingress
devlink_tc_bind_pool_th_save $swp3 1 egress
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+ defer devlink_tc_bind_pool_th_restore $swp3 1 egress
+
devlink_tc_bind_pool_th_save $swp3 2 egress
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+ defer devlink_tc_bind_pool_th_restore $swp3 2 egress
+
devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 7
-}
-
-switch_destroy()
-{
- devlink_port_pool_th_restore $swp3 4
- devlink_tc_bind_pool_th_restore $swp3 2 egress
- devlink_tc_bind_pool_th_restore $swp3 1 egress
-
- devlink_tc_bind_pool_th_restore $swp2 2 ingress
- devlink_port_pool_th_restore $swp2 0
-
- devlink_tc_bind_pool_th_restore $swp1 1 ingress
- devlink_port_pool_th_restore $swp1 0
-
- devlink_pool_size_thtype_restore 4
- devlink_pool_size_thtype_restore 0
-
- ip link del dev br222
- ip link del dev br111
-
- vlan_destroy $swp3 222
- vlan_destroy $swp3 111
- vlan_destroy $swp2 222
- vlan_destroy $swp1 111
-
- ethtool -s $swp3 autoneg on
- mtu_restore $swp3
- ip link set dev $swp3 down
- lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
-
- mtu_restore $swp2
- ip link set dev $swp2 down
-
- mtu_restore $swp1
- ip link set dev $swp1 down
+ defer devlink_port_pool_th_restore $swp3 4
}
setup_prepare()
@@ -218,7 +222,7 @@ setup_prepare()
h3mac=$(mac_get $h3)
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
@@ -226,18 +230,6 @@ setup_prepare()
switch_create
}
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
- h3_destroy
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
ping_ipv4()
{
ping_test $h1 192.0.2.34 " from H1"
@@ -256,21 +248,38 @@ rel()
"
}
+__run_hi_measure_rate()
+{
+ local what=$1; shift
+ local -a uc_rate
+
+ start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+ defer stop_traffic $!
+
+ uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_2 "$what"))
+ check_err $? "Could not get high enough $what ingress rate"
+
+ echo ${uc_rate[@]}
+}
+
+run_hi_measure_rate()
+{
+ in_defer_scope __run_hi_measure_rate "$@"
+}
+
test_ets_strict()
{
RET=0
# Run high-prio traffic on its own.
- start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
local -a rate_2
- rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2"))
- check_err $? "Could not get high enough prio-2 ingress rate"
+ rate_2=($(run_hi_measure_rate "prio 2"))
local rate_2_in=${rate_2[0]}
local rate_2_eg=${rate_2[1]}
- stop_traffic # $h2.222
# Start low-prio stream.
start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac
+ defer stop_traffic $!
local -a rate_1
rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1"))
@@ -285,14 +294,9 @@ test_ets_strict()
check_err $(bc <<< "$rel21 > 105")
# Start the high-prio stream--now both streams run.
- start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
- rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1"))
- check_err $? "Could not get high enough prio-2 ingress rate with prio-1"
+ rate_3=($(run_hi_measure_rate "prio 2+1"))
local rate_3_in=${rate_3[0]}
local rate_3_eg=${rate_3[1]}
- stop_traffic # $h2.222
-
- stop_traffic # $h1.111
# High-prio should have about the same throughput whether or not
# low-prio is in the system.
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 27de3d9ed08e..88162b4027c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -18,7 +18,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
swp=$NETIF_NO_CABLE
@@ -29,37 +28,38 @@ cleanup()
get_prio_pg()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+ # of buffer that priority X is mapped to.
+ dcb -j buffer show dev $swp |
+ jq -r '[.prio_buffer | .[] | tostring + " "] | add'
}
get_prio_pfc()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+ # whether priority X has PFC enabled (the value is 1) or disabled (0).
+ dcb -j pfc show dev $swp |
+ jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
}
get_prio_tc()
{
- __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
- awk '/^tc/ { TC = $2 }
- /priority:/ { PRIO[$2]=TC }
- END {
- for (i in PRIO)
- printf("%d ", PRIO[i])
- }'
+ # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+ # of TC that priority X is mapped to.
+ dcb -j ets show dev $swp |
+ jq -r '[.prio_tc | .[] | tostring + " "] | add'
}
get_buf_size()
{
local idx=$1; shift
- __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+ dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
}
get_tot_size()
{
- __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+ dcb -j buffer show dev $swp | jq '.total_size'
}
check_prio_pg()
@@ -121,18 +121,18 @@ test_dcb_ets()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
check_prio_pg "0 2 4 6 1 3 5 7 "
check_prio_tc "0 2 4 6 1 3 5 7 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
check_fail $? "prio2buffer accepted in DCB mode"
log_test "Configuring headroom through ETS"
@@ -174,7 +174,7 @@ test_pfc()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+ dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
local buf0size=$(get_buf_size 0)
local buf1size=$(get_buf_size 1)
@@ -193,7 +193,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
check_prio_pg "0 0 0 0 0 1 2 3 "
check_prio_pfc "0 0 0 0 0 1 1 1 "
@@ -210,7 +210,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+ dcb pfc set dev $swp delay 1000
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
@@ -221,8 +221,8 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off delay 0
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
@@ -242,13 +242,13 @@ test_tc_priomap()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
check_prio_pg "0 1 2 3 4 5 6 7 "
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
check_prio_pg "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
check_prio_pg "1 3 5 7 0 2 4 6 "
tc qdisc delete dev $swp root
@@ -256,9 +256,9 @@ test_tc_priomap()
# Clean up.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp prio-buffer all:0
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
log_test "TC: priomap"
}
@@ -270,12 +270,12 @@ test_tc_sizes()
RET=0
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail before qdisc is added"
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_err $? "buffer_size should pass after qdisc is added"
check_buf_size 0 "== $size" "set size: "
@@ -283,26 +283,26 @@ test_tc_sizes()
check_buf_size 0 "== $size" "set MTU: "
mtu_restore $swp
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# After replacing the qdisc for the same kind, buffer_size still has to
# work.
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace, set size: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# Likewise after replacing for a different kind.
tc qdisc replace dev $swp root handle 2: prio bands 8
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace different kind, set size: "
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail after qdisc is deleted"
log_test "TC: buffer size"
@@ -363,16 +363,16 @@ test_tc_int_buf()
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_int_buf "TC: "
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
test_int_buf "TC+buffsize: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
tc qdisc delete dev $swp root
}
-trap cleanup EXIT
+bail_on_lldpad "configure DCB" "configure Qdiscs"
-bail_on_lldpad
+trap cleanup EXIT
setup_wait
tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index 0bf76f13c030..5ad092b9bf10 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -54,45 +54,3 @@ measure_rate()
echo $ir $er
return $ret
}
-
-bail_on_lldpad()
-{
- if systemctl is-active --quiet lldpad; then
-
- cat >/dev/stderr <<-EOF
- WARNING: lldpad is running
-
- lldpad will likely configure DCB, and this test will
- configure Qdiscs. mlxsw does not support both at the
- same time, one of them is arbitrarily going to overwrite
- the other. That will cause spurious failures (or,
- unlikely, passes) of this test.
- EOF
-
- if [[ -z $ALLOW_LLDPAD ]]; then
- cat >/dev/stderr <<-EOF
-
- If you want to run the test anyway, please set
- an environment variable ALLOW_LLDPAD to a
- non-empty string.
- EOF
- exit 1
- else
- return
- fi
- fi
-}
-
-__mlnx_qos()
-{
- local err
-
- mlnx_qos "$@" 2>/dev/null
- err=$?
-
- if ((err)); then
- echo "Error ($err) in mlnx_qos $@" >/dev/stderr
- fi
-
- return $err
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
new file mode 100755
index 000000000000..a4a25637fe2a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends many small packets (size is less than cell size) through the
+# switch. A shaper is used in $swp2, so the traffic is limited there. Packets
+# are queued till they will be sent.
+#
+# The idea is to verify that the switch can handle at least 85% of maximum
+# supported descrpitors by hardware. Then, we verify that the driver configures
+# firmware to allow infinite size of egress descriptor pool, and does not use a
+# lower limitation. Increase the size of the relevant pools such that the pool's
+# size does not limit the traffic.
+
+# +-----------------------+
+# | H1 |
+# | + $h1.111 |
+# | | 192.0.2.33/28 |
+# | | |
+# | + $h1 |
+# +---|-------------------+
+# |
+# +---|-----------------------------+
+# | + $swp1 |
+# | | iPOOL1 |
+# | | |
+# | +-|------------------------+ |
+# | | + $swp1.111 | |
+# | | | |
+# | | BR1 | |
+# | | | |
+# | | + $swp2.111 | |
+# | +-|------------------------+ |
+# | | |
+# | + $swp2 |
+# | | ePOOL6 |
+# | | 1mbit |
+# +---+-----------------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+
+ALL_TESTS="
+ ping_ipv4
+ max_descriptors
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+MAX_POOL_SIZE=$(devlink_pool_size_get)
+SHAPER_RATE=1mbit
+
+# The current TBF qdisc interface does not allow us to configure the shaper to
+# flat zero. The ASIC shaper is guaranteed to work with a granularity of
+# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works
+# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the
+# test on Spectrum-1.
+mlxsw_only_on_spectrum 2+ || exit
+
+h1_create()
+{
+ adf_simple_if_init $h1
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+ defer vlan_destroy $h1 111
+ ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h2_create()
+{
+ adf_simple_if_init $h2
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+ defer vlan_destroy $h2 111
+}
+
+switch_create()
+{
+ # pools
+ # -----
+ # devlink_pool_size_thtype_restore needs to be done first so that we can
+ # reset the various limits to values that are only valid for the
+ # original static / dynamic setting.
+
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE
+ defer_prio devlink_pool_size_thtype_restore 1
+
+ devlink_pool_size_thtype_save 6
+ devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE
+ defer_prio devlink_pool_size_thtype_restore 6
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ defer ip link set dev $swp1 down
+
+ vlan_create $swp1 111
+ defer vlan_destroy $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_set $swp1 1 16
+ defer devlink_tc_bind_pool_th_restore $swp1 1 ingress
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+ defer devlink_port_pool_th_restore $swp1 1
+
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ defer tc qdisc del dev $swp1 root
+
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
+ defer dcb buffer set dev $swp1 prio-buffer all:0
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ defer ip link set dev $swp2 down
+
+ vlan_create $swp2 111
+ defer vlan_destroy $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_save $swp2 6
+ devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE
+ defer devlink_tc_bind_pool_th_restore $swp2 1 egress
+
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE
+ defer devlink_port_pool_th_restore $swp2 6
+
+ tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \
+ burst 128K limit 500M
+ defer tc qdisc del dev $swp2 root
+
+ tc qdisc replace dev $swp2 parent 1:1 handle 11: \
+ ets bands 8 strict 8 priomap 7 6
+ defer tc qdisc del dev $swp2 parent 1:1 handle 11:
+
+ # bridge
+ # ------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ defer ip link del dev br1
+
+ ip link set dev $swp1.111 master br1
+ defer ip link set dev $swp1.111 nomaster
+
+ ip link set dev br1 up
+ defer ip link set dev br1 down
+
+ ip link set dev $swp2.111 master br1
+ defer ip link set dev $swp2.111 nomaster
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ adf_vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34 " h1->h2"
+}
+
+percentage_used()
+{
+ local num_packets=$1; shift
+ local max_packets=$1; shift
+
+ bc <<< "
+ scale=2
+ 100 * $num_packets / $max_packets
+ "
+}
+
+max_descriptors()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local exp_perc_used=85
+ local max_descriptors
+ local pktsize=30
+
+ RET=0
+
+ max_descriptors=$(mlxsw_max_descriptors_get) || exit 1
+
+ local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+ log_info "Send many small packets, packet size = $pktsize bytes"
+ start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+ defer stop_traffic $!
+
+ # Sleep to wait for congestion.
+ sleep 5
+
+ local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+ ((d1 == d0))
+ check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+ # Check how many packets the switch can handle, the limitation is
+ # maximum descriptors.
+ local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+ local pkts_num=$((pkts_bytes / cell_size))
+ local perc_used=$(percentage_used $pkts_num $max_descriptors)
+
+ check_err $(bc <<< "$perc_used < $exp_perc_used") \
+ "Expected > $exp_perc_used% of descriptors, handle $perc_used%"
+
+ log_test "Maximum descriptors usage. The percentage used is $perc_used%"
+}
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 8f164c80e215..d8f8ae8533cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -72,120 +72,112 @@ source qos_lib.sh
h1_create()
{
- simple_if_init $h1 192.0.2.65/28
- mtu_set $h1 10000
-}
+ adf_simple_if_init $h1 192.0.2.65/28
-h1_destroy()
-{
- mtu_restore $h1
- simple_if_fini $h1 192.0.2.65/28
+ mtu_set $h1 10000
+ defer mtu_restore $h1
}
h2_create()
{
- simple_if_init $h2
+ adf_simple_if_init $h2
+
mtu_set $h2 10000
+ defer mtu_restore $h2
vlan_create $h2 111 v$h2 192.0.2.129/28
+ defer vlan_destroy $h2 111
ip link set dev $h2.111 type vlan egress-qos-map 0:1
}
-h2_destroy()
-{
- vlan_destroy $h2 111
-
- mtu_restore $h2
- simple_if_fini $h2
-}
-
h3_create()
{
- simple_if_init $h3 192.0.2.66/28
+ adf_simple_if_init $h3 192.0.2.66/28
+
mtu_set $h3 10000
+ defer mtu_restore $h3
vlan_create $h3 111 v$h3 192.0.2.130/28
-}
-
-h3_destroy()
-{
- vlan_destroy $h3 111
-
- mtu_restore $h3
- simple_if_fini $h3 192.0.2.66/28
+ defer vlan_destroy $h3 111
}
switch_create()
{
ip link set dev $swp1 up
+ defer ip link set dev $swp1 down
+
mtu_set $swp1 10000
+ defer mtu_restore $swp1
ip link set dev $swp2 up
+ defer ip link set dev $swp2 down
+
mtu_set $swp2 10000
+ defer mtu_restore $swp2
ip link set dev $swp3 up
+ defer ip link set dev $swp3 down
+
mtu_set $swp3 10000
+ defer mtu_restore $swp3
vlan_create $swp2 111
+ defer vlan_destroy $swp2 111
+
vlan_create $swp3 111
+ defer vlan_destroy $swp3 111
+
+ tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
+ burst 128K limit 1G
+ defer tc qdisc del dev $swp3 root handle 3:
- ethtool -s $swp3 speed 1000 autoneg off
- tc qdisc replace dev $swp3 root handle 3: \
- prio bands 8 priomap 7 7 7 7 7 7 7 7
+ tc qdisc replace dev $swp3 parent 3:3 handle 33: \
+ prio bands 8 priomap 7 7 7 7 7 7 7 7
+ defer tc qdisc del dev $swp3 parent 3:3 handle 33:
ip link add name br1 type bridge vlan_filtering 0
+ defer ip link del dev br1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
+
ip link set dev $swp1 master br1
+ defer ip link set dev $swp1 nomaster
+
ip link set dev $swp3 master br1
+ defer ip link set dev $swp3 nomaster
ip link add name br111 type bridge vlan_filtering 0
+ defer ip link del dev br111
+ ip link set dev br111 addrgenmode none
ip link set dev br111 up
+
ip link set dev $swp2.111 master br111
+ defer ip link set dev $swp2.111 nomaster
+
ip link set dev $swp3.111 master br111
+ defer ip link set dev $swp3.111 nomaster
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 5
+ defer devlink_port_pool_th_restore $swp1 0
+
devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+ defer devlink_tc_bind_pool_th_restore $swp1 0 ingress
devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 5
+ defer devlink_port_pool_th_restore $swp2 0
+
devlink_tc_bind_pool_th_save $swp2 1 ingress
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+ defer devlink_tc_bind_pool_th_restore $swp2 1 ingress
devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 12
-}
-
-switch_destroy()
-{
- devlink_port_pool_th_restore $swp3 4
-
- devlink_tc_bind_pool_th_restore $swp2 1 ingress
- devlink_port_pool_th_restore $swp2 0
-
- devlink_tc_bind_pool_th_restore $swp1 0 ingress
- devlink_port_pool_th_restore $swp1 0
-
- ip link del dev br111
- ip link del dev br1
-
- tc qdisc del dev $swp3 root handle 3:
- ethtool -s $swp3 autoneg on
-
- vlan_destroy $swp3 111
- vlan_destroy $swp2 111
-
- mtu_restore $swp3
- ip link set dev $swp3 down
-
- mtu_restore $swp2
- ip link set dev $swp2 down
-
- mtu_restore $swp1
- ip link set dev $swp1 down
+ defer devlink_port_pool_th_restore $swp3 4
}
setup_prepare()
@@ -201,7 +193,7 @@ setup_prepare()
h3mac=$(mac_get $h3)
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
@@ -209,45 +201,45 @@ setup_prepare()
switch_create
}
-cleanup()
+ping_ipv4()
{
- pre_cleanup
+ ping_test $h2 192.0.2.130
+}
- switch_destroy
- h3_destroy
- h2_destroy
- h1_destroy
+__run_uc_measure_rate()
+{
+ local what=$1; shift
+ local -a uc_rate
+
+ start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+ defer stop_traffic $!
+
+ uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what"))
+ check_err $? "Could not get high enough $what ingress rate"
- vrf_cleanup
+ echo ${uc_rate[@]}
}
-ping_ipv4()
+run_uc_measure_rate()
{
- ping_test $h2 192.0.2.130
+ in_defer_scope __run_uc_measure_rate "$@"
}
test_mc_aware()
{
RET=0
- local -a uc_rate
- start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
- uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only"))
- check_err $? "Could not get high enough UC-only ingress rate"
- stop_traffic
+ local -a uc_rate=($(run_uc_measure_rate "UC-only"))
local ucth1=${uc_rate[1]}
start_traffic $h1 192.0.2.65 bc bc
+ defer stop_traffic $!
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
- local -a uc_rate_2
- start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
- uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC"))
- check_err $? "Could not get high enough UC+MC ingress rate"
- stop_traffic
+ local -a uc_rate_2=($(run_uc_measure_rate "UC+MC"))
local ucth2=${uc_rate_2[1]}
local d1=$(date +%s)
@@ -269,8 +261,6 @@ test_mc_aware()
local mc_ir=$(rate $u0 $u1 $interval)
local mc_er=$(rate $t0 $t1 $interval)
- stop_traffic
-
log_test "UC performance under MC overload"
echo "UC-only throughput $(humanize $ucth1)"
@@ -294,6 +284,7 @@ test_uc_aware()
RET=0
start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+ defer stop_traffic $!
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
@@ -323,8 +314,6 @@ test_uc_aware()
((attempts == passes))
check_err $?
- stop_traffic
-
log_test "MC performance under UC overload"
echo " ingress UC throughput $(humanize ${uc_ir})"
echo " egress UC throughput $(humanize ${uc_er})"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5c7700212f75..0f0f4f05807c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -40,7 +40,6 @@
# | + $swp1 $swp3 + + $swp4 |
# | | iPOOL1 iPOOL0 | | iPOOL2 |
# | | ePOOL4 ePOOL5 | | ePOOL4 |
-# | | 1Gbps | | 1Gbps |
# | | PFC:enabled=1 | | PFC:enabled=1 |
# | +-|----------------------|-+ +-|------------------------+ |
# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | |
@@ -79,7 +78,6 @@ lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=6
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
_1KB=1000
_100KB=$((100 * _1KB))
@@ -121,6 +119,9 @@ h2_destroy()
switch_create()
{
+ local lanes_swp4
+ local pg1_size
+
# pools
# -----
@@ -171,7 +172,7 @@ switch_create()
# assignment.
tc qdisc replace dev $swp1 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
# $swp2
# -----
@@ -209,8 +210,8 @@ switch_create()
# the lossless prio into a buffer of its own. Don't bother with buffer
# sizes though, there is not going to be any pressure in the "backward"
# direction.
- __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp3 prio-buffer all:0 1:1
+ dcb pfc set dev $swp3 prio-pfc all:off 1:on
# $swp4
# -----
@@ -226,11 +227,24 @@ switch_create()
# Configure qdisc so that we can hand-tune headroom.
tc qdisc replace dev $swp4 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+ dcb pfc set dev $swp4 prio-pfc all:off 1:on
# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
# is (-2*MTU) about 80K of delay provision.
- __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+ pg1_size=$_100KB
+
+ setup_wait_dev_with_timeout $swp4
+
+ lanes_swp4=$(ethtool $swp4 | grep 'Lanes:')
+ lanes_swp4=${lanes_swp4#*"Lanes: "}
+
+ # 8-lane ports use two buffers among which the configured buffer
+ # is split, so double the size to get twice (20K + 80K).
+ if [[ $lanes_swp4 -eq 8 ]]; then
+ pg1_size=$((pg1_size * 2))
+ fi
+
+ dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size
# bridges
# -------
@@ -273,9 +287,9 @@ switch_destroy()
# $swp4
# -----
- __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0
+ dcb pfc set dev $swp4 prio-pfc all:off
+ dcb buffer set dev $swp4 prio-buffer all:0
tc qdisc del dev $swp4 root
devlink_tc_bind_pool_th_restore $swp4 1 ingress
@@ -288,8 +302,8 @@ switch_destroy()
# $swp3
# -----
- __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb pfc set dev $swp3 prio-pfc all:off
+ dcb buffer set dev $swp3 prio-buffer all:0
tc qdisc del dev $swp3 root
devlink_tc_bind_pool_th_restore $swp3 1 egress
@@ -315,7 +329,7 @@ switch_destroy()
# $swp1
# -----
- __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0
tc qdisc del dev $swp1 root
devlink_tc_bind_pool_th_restore $swp1 1 ingress
@@ -393,9 +407,9 @@ test_qos_pfc()
log_test "PFC"
}
-trap cleanup EXIT
+bail_on_lldpad "configure DCB" "configure Qdiscs"
-bail_on_lldpad
+trap cleanup EXIT
setup_prepare
setup_wait
tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
new file mode 100755
index 000000000000..4a11bf1d514a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ bridge_rif_add
+ bridge_rif_nomaster
+ bridge_rif_remaster
+ bridge_rif_nomaster_addr
+ bridge_rif_nomaster_port
+ bridge_rif_remaster_port
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 address $(mac_get lag1)
+ ip link set dev br1 up
+
+ ip link set dev lag1 master br1
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link set dev lag1 nomaster
+ ip link del dev lag1
+
+ ip link del dev br1
+}
+
+bridge_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br1 add 192.0.2.2/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on address addition"
+}
+
+bridge_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev lag1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for bridge on LAG deslavement"
+}
+
+bridge_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev lag1 master br1
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on LAG reenslavement"
+}
+
+bridge_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the LAG is enslaved shouldn't generate a RIF.
+ __addr_add_del lag1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the LAG from the bridge should drop RIF for the bridge (as
+ # tested in bridge_rif_lag_nomaster), but since the LAG now has an
+ # address, it should gain a RIF.
+ ip link set dev lag1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ log_test "Add RIF for LAG on deslavement from bridge"
+
+ __addr_add_del lag1 del 192.0.2.65/28
+ ip link set dev lag1 master br1
+ sleep 1
+}
+
+bridge_rif_nomaster_port()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for bridge on deslavement of port from LAG"
+}
+
+bridge_rif_remaster_port()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on reenslavement of port to LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
new file mode 100644
index 000000000000..a43a9926e690
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+
+RIF_COUNTER_NUM_NETIFS=2
+
+rif_counter_addr4()
+{
+ local i=$1; shift
+ local p=$1; shift
+
+ printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
+}
+
+rif_counter_addr4pfx()
+{
+ rif_counter_addr4 $@
+ printf /30
+}
+
+rif_counter_h1_create()
+{
+ simple_if_init $h1
+}
+
+rif_counter_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+rif_counter_h2_create()
+{
+ simple_if_init $h2
+}
+
+rif_counter_h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+rif_counter_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ rif_counter_h1_create
+ rif_counter_h2_create
+}
+
+rif_counter_cleanup()
+{
+ local count=$1; shift
+
+ pre_cleanup
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_destroy $h2 $i
+ done
+
+ rif_counter_h2_destroy
+ rif_counter_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
+ rm -f $RIF_COUNTER_BATCH_FILE
+ fi
+}
+
+
+rif_counter_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ RIF_COUNTER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
+ done
+ for ((i = 1; i <= count; i++)); do
+ cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
+ stats set dev $h2.$i l3_stats on
+ EOF
+ done
+
+ ip -b $RIF_COUNTER_BATCH_FILE
+ check_err_fail $should_fail $? "RIF counter enablement"
+}
+
+rif_counter_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count; i > 0; i /= 2)); do
+ $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(rif_counter_addr4 $i 1) \
+ -B $(rif_counter_addr4 $i 2) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count; i > 0; i /= 2)); do
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
+ hw_stats_get l3_stats $h2.$i rx packets > /dev/null
+ check_err $? "Traffic not seen at RIF $h2.$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
new file mode 100755
index 000000000000..b8bbe94f4736
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ lag_rif_add
+ lag_rif_nomaster
+ lag_rif_remaster
+ lag_rif_nomaster_addr
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link del dev lag1
+}
+
+lag_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del lag1 add 192.0.2.2/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for LAG on address addition"
+}
+
+lag_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for LAG on port deslavement"
+}
+
+lag_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for LAG on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the port is LAG'd shouldn't generate a RIF.
+ __addr_add_del $swp1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the port from LAG should drop RIF for the LAG (as tested in
+ # lag_rif_nomaster), but since the port now has an address, it should
+ # gain a RIF.
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ __addr_add_del $swp1 del 192.0.2.65/28
+ log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
new file mode 100755
index 000000000000..d1a9d379eaf3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ lag_rif_add
+ lag_rif_nomaster
+ lag_rif_remaster
+ lag_rif_nomaster_addr
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+
+ vlan_create lag1 100
+ ip link set dev lag1.100 addrgenmode none
+
+ vlan_create lag1 200
+ ip link set dev lag1.200 addrgenmode none
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev lag1.200
+ ip link del dev lag1.100
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link del dev lag1
+}
+
+lag_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del lag1.100 add 192.0.2.2/28
+ __addr_add_del lag1.200 add 192.0.2.18/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIFs for LAG VLANs on address addition"
+}
+
+lag_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIFs for LAG VLANs on port deslavement"
+}
+
+lag_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIFs for LAG VLANs on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the port is LAG'd shouldn't generate a RIF.
+ __addr_add_del $swp1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the port from LAG should drop two RIFs for the LAG VLANs (as
+ # tested in lag_rif_nomaster), but since the port now has an address, it
+ # should gain a RIF.
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ __addr_add_del $swp1 del 192.0.2.65/28
+ log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..71e7681f15f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to
+# the maximum number of RIF MAC profiles, sets each of them with a random
+# MAC address, and checks that eventually the number of occupied RIF MAC
+# profiles equals the maximum number of RIF MAC profiles.
+
+
+RIF_MAC_PROFILE_NUM_NETIFS=2
+
+rif_mac_profiles_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan=$(( i*10 ))
+ m=$(( i*11 ))
+
+ cat >> $batch_file <<-EOF
+ link add link $h1 name $h1.$vlan \
+ address 00:$m:$m:$m:$m:$m type vlan id $vlan
+ address add 192.0.$m.1/24 dev $h1.$vlan
+ EOF
+ done
+
+ ip -b $batch_file &> /dev/null
+ check_err_fail $should_fail $? "RIF creation"
+
+ rm -f $batch_file
+}
+
+rif_mac_profile_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ rif_mac_profiles_create $count $should_fail
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+ [[ $occ -eq $count ]]
+ check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)"
+}
+
+rif_mac_profile_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+ # being generated and RIFs being created.
+ sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+ sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+rif_mac_profile_cleanup()
+{
+ pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
+ sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+ sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
new file mode 100755
index 000000000000..c18340cee55d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ mac_profile_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2
+
+ tc qdisc add dev $h1 ingress
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 ingress
+
+ ip route del 198.51.100.0/24 vrf v$h1
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+ ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2
+
+ tc qdisc add dev $h2 ingress
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 ingress
+
+ ip route del 192.0.2.0/24 vrf v$h2
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ tc qdisc add dev $rp1 clsact
+ tc qdisc add dev $rp2 clsact
+ ip address add 192.0.2.2/24 dev $rp1
+ ip address add 198.51.100.2/24 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 198.51.100.2/24 dev $rp2
+ ip address del 192.0.2.2/24 dev $rp1
+ tc qdisc del dev $rp2 clsact
+ tc qdisc del dev $rp1 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+h1_to_h2()
+{
+ local test_name=$@; shift
+ local smac=$(mac_get $rp2)
+
+ RET=0
+
+ # Replace neighbour to avoid first packet being forwarded in software
+ ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+
+ # Add a filter to ensure that packets are forwarded in hardware. Cannot
+ # match on source MAC because it is not set in eACL after routing
+ tc filter add dev $rp2 egress proto ip pref 1 handle 101 \
+ flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \
+ action pass
+
+ # Add a filter to ensure that packets are received with the correct
+ # source MAC
+ tc filter add dev $h2 ingress proto ip pref 1 handle 101 \
+ flower skip_sw src_mac $smac ip_proto udp src_port 12345 \
+ dst_port 54321 action pass
+
+ $MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \
+ -A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q
+
+ tc_check_packets "dev $rp2 egress" 101 10
+ check_err $? "packets not forwarded in hardware"
+
+ tc_check_packets "dev $h2 ingress" 101 10
+ check_err $? "packets not forwarded with correct source mac"
+
+ log_test "h1->h2: $test_name"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+}
+
+h2_to_h1()
+{
+ local test_name=$@; shift
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+
+ tc filter add dev $rp1 egress proto ip pref 1 handle 101 \
+ flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \
+ action pass
+
+ tc filter add dev $h1 ingress proto ip pref 1 handle 101 \
+ flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \
+ dst_port 12345 action pass
+
+ $MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \
+ -A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q
+
+ tc_check_packets "dev $rp1 egress" 101 10
+ check_err $? "packets not forwarded in hardware"
+
+ tc_check_packets "dev $h1 ingress" 101 10
+ check_err $? "packets not forwarded with correct source mac"
+
+ log_test "h2->h1: $test_name"
+
+ tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower
+ ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+}
+
+smac_test()
+{
+ local test_name=$@; shift
+
+ # Test that packets forwarded to $h2 via $rp2 are forwarded with the
+ # current source MAC of $rp2
+ h1_to_h2 $test_name
+
+ # Test that packets forwarded to $h1 via $rp1 are forwarded with the
+ # current source MAC of $rp1. This MAC is never changed during the test,
+ # but given the shared nature of MAC profile, the point is to see that
+ # changes to the MAC of $rp2 do not affect that of $rp1
+ h2_to_h1 $test_name
+}
+
+mac_profile_test()
+{
+ local rp2_mac=$(mac_get $rp2)
+
+ # Test behavior when the RIF backing $rp2 is transitioned to use
+ # a new MAC profile
+ ip link set dev $rp2 addr 00:11:22:33:44:55
+ smac_test "new mac profile"
+
+ # Test behavior when the MAC profile used by the RIF is edited
+ ip link set dev $rp2 address 00:22:22:22:22:22
+ smac_test "edit mac profile"
+
+ # Restore original MAC
+ ip link set dev $rp2 addr $rp2_mac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+mac_profiles=$(devlink_resource_size_get rif_mac_profiles)
+if [[ $mac_profiles -ne 1 ]]; then
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
new file mode 100755
index 000000000000..026a126f584d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ rif_mac_profile_edit_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+ # being generated and RIFs being created
+ sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+ sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
+ sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+ sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+
+ # Reload in order to clean all the RIFs and RIF MAC profiles created
+ devlink_reload
+}
+
+create_max_rif_mac_profiles()
+{
+ local count=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan=$(( i*10 ))
+ m=$(( i*11 ))
+
+ cat >> $batch_file <<-EOF
+ link add link $h1 name $h1.$vlan \
+ address 00:$m:$m:$m:$m:$m type vlan id $vlan
+ address add 192.0.$m.1/24 dev $h1.$vlan
+ EOF
+ done
+
+ ip -b $batch_file &> /dev/null
+ rm -f $batch_file
+}
+
+rif_mac_profile_replacement_test()
+{
+ local h1_10_mac=$(mac_get $h1.10)
+
+ RET=0
+
+ ip link set $h1.10 address 00:12:34:56:78:99
+ check_err $?
+
+ log_test "RIF MAC profile replacement"
+
+ ip link set $h1.10 address $h1_10_mac
+}
+
+rif_mac_profile_consolidation_test()
+{
+ local count=$1; shift
+ local h1_20_mac
+
+ RET=0
+
+ if [[ $count -eq 1 ]]; then
+ return
+ fi
+
+ h1_20_mac=$(mac_get $h1.20)
+
+ # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are
+ # using the same MAC profile.
+ ip link set $h1.20 address 00:11:11:11:11:11
+ check_err $?
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+ [[ $occ -eq $((count - 1)) ]]
+ check_err $? "MAC profile occupancy did not decrease"
+
+ log_test "RIF MAC profile consolidation"
+
+ ip link set $h1.20 address $h1_20_mac
+}
+
+rif_mac_profile_shared_replacement_test()
+{
+ local count=$1; shift
+ local i=$((count + 1))
+ local vlan=$(( i*10 ))
+ local m=11
+
+ RET=0
+
+ # Create a VLAN netdevice that has the same MAC as the first one.
+ ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \
+ type vlan id $vlan
+ ip address add 192.0.$m.1/24 dev $h1.$vlan
+
+ # MAC replacement should fail because all the MAC profiles are in use
+ # and the profile is shared between multiple RIFs
+ m=$(( i*11 ))
+ ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null
+ check_fail $?
+
+ log_test "RIF MAC profile shared replacement"
+
+ ip link del dev $h1.$vlan
+}
+
+rif_mac_profile_edit_test()
+{
+ local count=$(devlink_resource_size_get rif_mac_profiles)
+
+ create_max_rif_mac_profiles $count
+
+ rif_mac_profile_replacement_test
+ rif_mac_profile_consolidation_test $count
+ rif_mac_profile_shared_replacement_test $count
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
new file mode 100755
index 000000000000..6ce317cfaf9b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test enslavement to LAG with a clean slate.
+# See $lib_dir/router_bridge_lag.sh for further details.
+
+ALL_TESTS="
+ config_devlink_reload
+ config_enslave_h1
+ config_enslave_h2
+ config_enslave_h3
+ config_enslave_h4
+ config_enslave_swp1
+ config_enslave_swp2
+ config_enslave_swp3
+ config_enslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+"
+
+config_devlink_reload()
+{
+ log_info "Devlink reload"
+ devlink_reload
+}
+
+config_enslave_h1()
+{
+ config_enslave $h1 lag1
+}
+
+config_enslave_h2()
+{
+ config_enslave $h2 lag4
+}
+
+config_enslave_h3()
+{
+ config_enslave $h3 lag4
+}
+
+config_enslave_h4()
+{
+ config_enslave $h4 lag1
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+EXTRA_SOURCE="source $lib_dir/devlink_lib.sh"
+source $lib_dir/router_bridge_lag.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
local t0=$1; shift
local route_count=$1; shift
- local t1=$(ip route | grep -o 'offload' | wc -l)
+ local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
local delta=$((t1 - t0))
echo $delta
[[ $delta -ge $route_count ]]
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index ed346da5d3cb..45a569618424 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -10,15 +10,12 @@
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
- rif_set_addr_test
rif_vrf_set_addr_test
- rif_inherit_bridge_addr_test
rif_non_inherit_bridge_addr_test
vlan_interface_deletion_test
bridge_deletion_test
bridge_vlan_flags_test
vlan_1_test
- lag_bridge_upper_test
duplicate_vlans_test
vlan_rif_refcount_test
subport_rif_refcount_test
@@ -33,8 +30,10 @@ ALL_TESTS="
nexthop_obj_invalid_test
nexthop_obj_offload_test
nexthop_obj_group_offload_test
+ nexthop_obj_bucket_offload_test
nexthop_obj_blackhole_offload_test
nexthop_obj_route_offload_test
+ bridge_locked_port_test
devlink_reload_test
"
NUM_NETIFS=2
@@ -59,55 +58,6 @@ cleanup()
ip link set dev $swp1 down
}
-rif_set_addr_test()
-{
- local swp1_mac=$(mac_get $swp1)
- local swp2_mac=$(mac_get $swp2)
-
- RET=0
-
- # $swp1 and $swp2 likely got their IPv6 local addresses already, but
- # here we need to test the transition to RIF.
- ip addr flush dev $swp1
- ip addr flush dev $swp2
- sleep .1
-
- ip addr add dev $swp1 192.0.2.1/28
- check_err $?
-
- ip link set dev $swp1 addr 00:11:22:33:44:55
- check_err $?
-
- # IP address enablement should be rejected if the MAC address prefix
- # doesn't match other RIFs.
- ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
- check_fail $? "IP address addition passed for a device with a wrong MAC"
- ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for IP address addition"
-
- ip link set dev $swp2 addr 00:11:22:33:44:66
- check_err $?
- ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
- check_err $?
-
- # Change of MAC address of a RIF should be forbidden if the new MAC
- # doesn't share the prefix with other MAC addresses.
- ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null
- check_fail $? "change of MAC address passed for a wrong MAC"
- ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for MAC address change"
-
- log_test "RIF - bad MAC change"
-
- ip addr del dev $swp2 192.0.2.2/28
- ip addr del dev $swp1 192.0.2.1/28
-
- ip link set dev $swp2 addr $swp2_mac
- ip link set dev $swp1 addr $swp1_mac
-}
-
rif_vrf_set_addr_test()
{
# Test that it is possible to set an IP address on a VRF upper despite
@@ -127,45 +77,6 @@ rif_vrf_set_addr_test()
ip link del dev vrf-test
}
-rif_inherit_bridge_addr_test()
-{
- RET=0
-
- # Create first RIF
- ip addr add dev $swp1 192.0.2.1/28
- check_err $?
-
- # Create a FID RIF
- ip link add name br1 up type bridge vlan_filtering 0
- ip link set dev $swp2 master br1
- ip addr add dev br1 192.0.2.17/28
- check_err $?
-
- # Prepare a device with a low MAC address
- ip link add name d up type dummy
- ip link set dev d addr 00:11:22:33:44:55
-
- # Attach the device to br1. That prompts bridge address change, which
- # should be vetoed, thus preventing the attachment.
- ip link set dev d master br1 &>/dev/null
- check_fail $? "Device with low MAC was permitted to attach a bridge with RIF"
- ip link set dev d master br1 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for bridge attach rejection"
-
- ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null
- check_fail $? "Changing swp2's MAC address permitted"
- ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for bridge port MAC address change rejection"
-
- log_test "RIF - attach port with bad MAC to bridge"
-
- ip link del dev d
- ip link del dev br1
- ip addr del dev $swp1 192.0.2.1/28
-}
-
rif_non_inherit_bridge_addr_test()
{
local swp2_mac=$(mac_get $swp2)
@@ -275,10 +186,7 @@ bridge_vlan_flags_test()
# If we did not handle references correctly, then this should produce a
# trace
- devlink dev reload "$DEVLINK_DEV"
-
- # Allow netdevices to be re-created following the reload
- sleep 20
+ devlink_reload
log_test "bridge vlan flags"
}
@@ -299,33 +207,6 @@ vlan_1_test()
ip link del dev $swp1.1
}
-lag_bridge_upper_test()
-{
- # Test that ports cannot be enslaved to LAG devices that have uppers
- # and that failure is handled gracefully. See commit b3529af6bb0d
- # ("spectrum: Reference count VLAN entries") for more details
- RET=0
-
- ip link add name bond1 type bond mode 802.3ad
-
- ip link add name br0 type bridge vlan_filtering 1
- ip link set dev bond1 master br0
-
- ip link set dev $swp1 down
- ip link set dev $swp1 master bond1 &> /dev/null
- check_fail $? "managed to enslave port to lag when should not"
-
- # This might generate a trace, if we did not handle the failure
- # correctly
- ip -6 address add 2001:db8:1::1/64 dev $swp1
- ip -6 address del 2001:db8:1::1/64 dev $swp1
-
- log_test "lag with bridge upper"
-
- ip link del dev br0
- ip link del dev bond1
-}
-
duplicate_vlans_test()
{
# Test that on a given port a VLAN is only used once. Either as VLAN
@@ -598,9 +479,6 @@ vlan_interface_uppers_test()
ip link set dev $swp1 master br0
ip link add link br0 name br0.10 type vlan id 10
- ip link add link br0.10 name macvlan0 \
- type macvlan mode private &> /dev/null
- check_fail $? "managed to create a macvlan when should not"
ip -6 address add 2001:db8:1::1/64 dev br0.10
ip link add link br0.10 name macvlan0 type macvlan mode private
@@ -739,11 +617,28 @@ nexthop_obj_invalid_test()
ip nexthop add id 1 dev $swp1
ip nexthop add id 2 dev $swp1
+ ip nexthop add id 3 via 192.0.2.3 dev $swp1
ip nexthop add id 10 group 1/2
check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+ ip nexthop add id 10 group 3 type resilient buckets 7
+ check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 129
+ check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+ ip nexthop add id 10 group 1/2 type resilient buckets 32
+ check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 32
+ check_err $? "failed to configure a valid resilient nexthop group"
+ ip nexthop replace id 3 dev $swp1
+ check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
log_test "nexthop objects - invalid configurations"
+ ip nexthop del id 10
+ ip nexthop del id 3
ip nexthop del id 2
ip nexthop del id 1
@@ -761,7 +656,7 @@ nexthop_obj_offload_test()
setup_wait
ip nexthop add id 1 via 192.0.2.2 dev $swp1
- ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
busywait "$TIMEOUT" wait_for_offload \
@@ -773,7 +668,7 @@ nexthop_obj_offload_test()
ip nexthop show id 1
check_err $? "nexthop marked as offloaded after setting neigh to failed state"
- ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
busywait "$TIMEOUT" wait_for_offload \
ip nexthop show id 1
@@ -810,11 +705,11 @@ nexthop_obj_group_offload_test()
ip nexthop add id 1 via 192.0.2.2 dev $swp1
ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
ip nexthop add id 10 group 1/2
- ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
- ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
- ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
busywait "$TIMEOUT" wait_for_offload \
@@ -858,6 +753,70 @@ nexthop_obj_group_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
+nexthop_obj_bucket_offload_test()
+{
+ # Test offload indication of nexthop buckets
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+ ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+ # Invalidate nexthop id 1
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+ # Invalidate nexthop id 2
+ ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+ # Revalidate nexthop id 1 by changing its configuration
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+ # Revalidate nexthop id 2 by changing its neighbour
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+ log_test "nexthop bucket offload indication"
+
+ ip neigh del 2001:db8:1::2 dev $swp1
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 10
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
nexthop_obj_blackhole_offload_test()
{
# Test offload indication of blackhole nexthop objects
@@ -889,9 +848,9 @@ nexthop_obj_route_offload_test()
setup_wait
ip nexthop add id 1 via 192.0.2.2 dev $swp1
- ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
- ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
dev $swp1
ip route replace 198.51.100.0/24 nhid 1
@@ -925,18 +884,45 @@ nexthop_obj_route_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
+bridge_locked_port_test()
+{
+ RET=0
+
+ ip link add name br1 up type bridge vlan_filtering 0
+
+ ip link add link $swp1 name $swp1.10 type vlan id 10
+ ip link set dev $swp1.10 master br1
+
+ bridge link set dev $swp1.10 locked on
+ check_fail $? "managed to set locked flag on a VLAN upper"
+
+ ip link set dev $swp1.10 nomaster
+ ip link set dev $swp1 master br1
+
+ bridge link set dev $swp1 locked on
+ check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper"
+
+ ip link del dev $swp1.10
+ bridge link set dev $swp1 locked on
+
+ ip link add link $swp1 name $swp1.10 type vlan id 10
+ check_fail $? "managed to configure a VLAN upper on a locked port"
+
+ log_test "bridge locked port"
+
+ ip link del dev $swp1.10 &> /dev/null
+ ip link del dev br1
+}
+
devlink_reload_test()
{
# Test that after executing all the above configuration tests, a
# devlink reload can be performed without errors
RET=0
- devlink dev reload "$DEVLINK_DEV"
- check_err $? "devlink reload failed"
+ devlink_reload
log_test "devlink reload - last test"
-
- sleep 20
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index af64bc9ea8ab..4aaceb6b2b60 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -5,7 +5,6 @@
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/sch_ets_core.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
ALL_TESTS="
ping_ipv4
@@ -15,13 +14,16 @@ ALL_TESTS="
ets_test_dwrr
"
+PARENT="parent 3:3"
+
switch_create()
{
- ets_switch_create
-
# Create a bottleneck so that the DWRR process can kick in.
- ethtool -s $h2 speed 1000 autoneg off
- ethtool -s $swp2 speed 1000 autoneg off
+ tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \
+ burst 128K limit 1G
+ defer tc qdisc del dev $swp2 root handle 3:
+
+ ets_switch_create
# Set the ingress quota high and use the three egress TCs to limit the
# amount of traffic that is admitted to the shared buffers. This makes
@@ -29,16 +31,27 @@ switch_create()
# for the DWRR process.
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 12
+ defer devlink_port_pool_th_restore $swp1 0
+
devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ defer devlink_tc_bind_pool_th_restore $swp1 0 ingress
+
devlink_port_pool_th_save $swp2 4
devlink_port_pool_th_set $swp2 4 12
+ defer devlink_port_pool_th_restore $swp2 4
+
devlink_tc_bind_pool_th_save $swp2 7 egress
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ defer devlink_tc_bind_pool_th_restore $swp2 7 egress
+
devlink_tc_bind_pool_th_save $swp2 6 egress
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ defer devlink_tc_bind_pool_th_restore $swp2 6 egress
+
devlink_tc_bind_pool_th_save $swp2 5 egress
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
+ defer devlink_tc_bind_pool_th_restore $swp2 5 egress
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
# priorities at $swp1 based on their 802.1p headers. ingress-qos-map is
@@ -46,21 +59,6 @@ switch_create()
# 1:1, which is the mapping currently hard-coded by the driver.
}
-switch_destroy()
-{
- devlink_tc_bind_pool_th_restore $swp2 5 egress
- devlink_tc_bind_pool_th_restore $swp2 6 egress
- devlink_tc_bind_pool_th_restore $swp2 7 egress
- devlink_port_pool_th_restore $swp2 4
- devlink_tc_bind_pool_th_restore $swp1 0 ingress
- devlink_port_pool_th_restore $swp1 0
-
- ethtool -s $swp2 autoneg on
- ethtool -s $h2 autoneg on
-
- ets_switch_destroy
-}
-
# Callback from sch_ets_tests.sh
collect_stats()
{
@@ -77,5 +75,5 @@ collect_stats()
done
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
new file mode 100755
index 000000000000..071a33d10c20
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test qdisc offload indication
+
+
+ALL_TESTS="
+ test_root
+ test_port_tbf
+ test_etsprio
+ test_etsprio_port_tbf
+"
+NUM_NETIFS=1
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+check_not_offloaded()
+{
+ local handle=$1; shift
+ local h
+ local offloaded
+
+ h=$(qdisc_stats_get $h1 "$handle" .handle)
+ [[ $h == '"'$handle'"' ]]
+ check_err $? "Qdisc with handle $handle does not exist"
+
+ offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+ [[ $offloaded == true ]]
+ check_fail $? "Qdisc with handle $handle offloaded, but should not be"
+}
+
+check_all_offloaded()
+{
+ local handle=$1; shift
+
+ if [[ ! -z $handle ]]; then
+ local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+ [[ $offloaded == true ]]
+ check_err $? "Qdisc with handle $handle not offloaded"
+ fi
+
+ local unoffloaded=$(tc q sh dev $h1 invisible |
+ grep -v offloaded |
+ sed s/root/parent\ root/ |
+ cut -d' ' -f 5)
+ [[ -z $unoffloaded ]]
+ check_err $? "Qdiscs with following parents not offloaded: $unoffloaded"
+
+ pre_cleanup
+}
+
+with_ets()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ ets bands 8 priomap 7 6 5 4 3 2 1 0
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_prio()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ prio bands 8 priomap 7 6 5 4 3 2 1 0
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_red()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_tbf()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ tbf rate 400Mbit burst 128K limit 1M
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_pfifo()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_bfifo()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_drr()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle drr
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_qdiscs()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+ local kind=$1; shift
+ local next_handle=$((handle * 2))
+ local locus;
+
+ if [[ $kind == "--" ]]; then
+ local cmd=$1; shift
+ $cmd $(printf %x: $parent) "$@"
+ else
+ if ((parent == 0)); then
+ locus=root
+ else
+ locus=$(printf "parent %x:1" $parent)
+ fi
+
+ with_$kind $(printf %x: $handle) "$locus" \
+ with_qdiscs $next_handle $handle "$@"
+ fi
+}
+
+get_name()
+{
+ local parent=$1; shift
+ local name=$(echo "" "${@^^}" | tr ' ' -)
+
+ if ((parent != 0)); then
+ kind=$(qdisc_stats_get $h1 $parent: .kind)
+ kind=${kind%\"}
+ kind=${kind#\"}
+ name="-${kind^^}$name"
+ fi
+
+ echo root$name
+}
+
+do_test_offloaded()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ RET=0
+ with_qdiscs $handle $parent "$@" -- check_all_offloaded
+ log_test $(get_name $parent "$@")" offloaded"
+}
+
+do_test_nooffload()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ local name=$(echo "${@^^}" | tr ' ' -)
+ local kind
+
+ RET=0
+ with_qdiscs $handle $parent "$@" -- check_not_offloaded
+ log_test $(get_name $parent "$@")" not offloaded"
+}
+
+do_test_combinations()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ local cont
+ local leaf
+ local fifo
+
+ for cont in "" ets prio; do
+ for leaf in "" red tbf "red tbf" "tbf red"; do
+ for fifo in "" pfifo bfifo; do
+ if [[ -z "$cont$leaf$fifo" ]]; then
+ continue
+ fi
+ do_test_offloaded $handle $parent \
+ $cont $leaf $fifo
+ done
+ done
+ done
+
+ for cont in ets prio; do
+ for leaf in red tbf; do
+ do_test_nooffload $handle $parent $cont red tbf $leaf
+ do_test_nooffload $handle $parent $cont tbf red $leaf
+ done
+ for leaf in "red red" "tbf tbf"; do
+ do_test_nooffload $handle $parent $cont $leaf
+ done
+ done
+
+ do_test_nooffload $handle $parent drr
+}
+
+test_root()
+{
+ do_test_combinations 1 0
+}
+
+test_port_tbf()
+{
+ with_tbf 1: root \
+ do_test_combinations 8 1
+}
+
+do_test_etsprio()
+{
+ local parent=$1; shift
+ local tbfpfx=$1; shift
+ local cont
+
+ for cont in ets prio; do
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 11: "parent 8:1" \
+ with_red 12: "parent 8:2" \
+ with_tbf 13: "parent 8:3" \
+ with_tbf 14: "parent 8:4" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED,TBF} offloaded"
+
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 81: "parent 8:1" \
+ with_tbf 811: "parent 81:1" \
+ with_tbf 84: "parent 8:4" \
+ with_red 841: "parent 84:1" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded"
+
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 81: "parent 8:1" \
+ with_tbf 811: "parent 81:1" \
+ with_bfifo 8111: "parent 811:1" \
+ with_tbf 82: "parent 8:2" \
+ with_red 821: "parent 82:1" \
+ with_bfifo 8211: "parent 821:1" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded"
+ done
+}
+
+test_etsprio()
+{
+ do_test_etsprio root ""
+}
+
+test_etsprio_port_tbf()
+{
+ with_tbf 1: root \
+ do_test_etsprio "parent 1:1" "-TBF"
+}
+
+cleanup()
+{
+ tc qdisc del dev $h1 root &>/dev/null
+}
+
+trap cleanup EXIT
+h1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index b0cb1aaffdda..47d2ffcf366e 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -73,7 +73,19 @@ CHECK_TC="yes"
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
+source mlxsw_lib.sh
+
+stop_traffic_sleep()
+{
+ local pid=$1; shift
+
+ # Issuing a kill still leaves a bunch of packets lingering in the
+ # buffers. This traffic then arrives at the point where a follow-up test
+ # is already running, and can confuse the test. Therefore sleep after
+ # stopping traffic to flush any leftover packets.
+ stop_traffic "$pid"
+ sleep 1
+}
ipaddr()
{
@@ -88,40 +100,31 @@ host_create()
local dev=$1; shift
local host=$1; shift
- simple_if_init $dev
+ adf_simple_if_init $dev
+
mtu_set $dev 10000
+ defer mtu_restore $dev
vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+ defer vlan_destroy $dev 10
ip link set dev $dev.10 type vlan egress 0:0
vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+ defer vlan_destroy $dev 11
ip link set dev $dev.11 type vlan egress 0:1
}
-host_destroy()
-{
- local dev=$1; shift
-
- vlan_destroy $dev 11
- vlan_destroy $dev 10
- mtu_restore $dev
- simple_if_fini $dev
-}
-
h1_create()
{
host_create $h1 1
}
-h1_destroy()
-{
- host_destroy $h1
-}
-
h2_create()
{
host_create $h2 2
+
tc qdisc add dev $h2 clsact
+ defer tc qdisc del dev $h2 clsact
# Some of the tests in this suite use multicast traffic. As this traffic
# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
@@ -134,28 +137,17 @@ h2_create()
# cause packets to fail to queue up at $swp3 due to shared buffer
# quotas, and the test to spuriously fail.
#
- # Prevent this by setting the speed of $h2 to 1Gbps.
-
- ethtool -s $h2 speed 1000 autoneg off
-}
+ # Prevent this by adding a shaper which limits the traffic in $h2 to
+ # 1Gbps.
-h2_destroy()
-{
- ethtool -s $h2 autoneg on
- tc qdisc del dev $h2 clsact
- host_destroy $h2
+ tc qdisc replace dev $h2 root handle 10: tbf rate 200mbit \
+ burst 128K limit 1G
+ defer tc qdisc del dev $h2 root handle 10:
}
h3_create()
{
host_create $h3 3
- ethtool -s $h3 speed 1000 autoneg off
-}
-
-h3_destroy()
-{
- ethtool -s $h3 autoneg on
- host_destroy $h3
}
switch_create()
@@ -164,29 +156,48 @@ switch_create()
local vlan
ip link add dev br1_10 type bridge
+ defer ip link del dev br1_10
+
ip link add dev br1_11 type bridge
+ defer ip link del dev br1_11
ip link add dev br2_10 type bridge
+ defer ip link del dev br2_10
+
ip link add dev br2_11 type bridge
+ defer ip link del dev br2_11
for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
ip link set dev $intf up
+ defer ip link set dev $intf down
+
mtu_set $intf 10000
+ defer mtu_restore $intf
done
for intf in $swp1 $swp4; do
for vlan in 10 11; do
vlan_create $intf $vlan
+ defer vlan_destroy $intf $vlan
+
ip link set dev $intf.$vlan master br1_$vlan
+ defer ip link set dev $intf.$vlan nomaster
+
ip link set dev $intf.$vlan up
+ defer ip link set dev $intf.$vlan up
done
done
for intf in $swp2 $swp3 $swp5; do
for vlan in 10 11; do
vlan_create $intf $vlan
+ defer vlan_destroy $intf $vlan
+
ip link set dev $intf.$vlan master br2_$vlan
+ defer ip link set dev $intf.$vlan nomaster
+
ip link set dev $intf.$vlan up
+ defer ip link set dev $intf.$vlan up
done
done
@@ -198,53 +209,28 @@ switch_create()
done
done
- for intf in $swp2 $swp3 $swp4 $swp5; do
- ethtool -s $intf speed 1000 autoneg off
+ for intf in $swp3 $swp4; do
+ tc qdisc replace dev $intf root handle 1: tbf rate 200mbit \
+ burst 128K limit 1G
+ defer tc qdisc del dev $intf root handle 1:
done
ip link set dev br1_10 up
+ defer ip link set dev br1_10 down
+
ip link set dev br1_11 up
+ defer ip link set dev br1_11 down
+
ip link set dev br2_10 up
+ defer ip link set dev br2_10 down
+
ip link set dev br2_11 up
+ defer ip link set dev br2_11 down
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
-}
-
-switch_destroy()
-{
- local intf
- local vlan
-
- devlink_port_pool_th_restore $swp3 8
-
- tc qdisc del dev $swp3 root 2>/dev/null
-
- ip link set dev br2_11 down
- ip link set dev br2_10 down
- ip link set dev br1_11 down
- ip link set dev br1_10 down
-
- for intf in $swp5 $swp4 $swp3 $swp2; do
- ethtool -s $intf autoneg on
- done
-
- for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
- for vlan in 11 10; do
- ip link set dev $intf.$vlan down
- ip link set dev $intf.$vlan nomaster
- vlan_destroy $intf $vlan
- done
-
- mtu_restore $intf
- ip link set dev $intf down
- done
-
- ip link del dev br2_11
- ip link del dev br2_10
- ip link del dev br1_11
- ip link del dev br1_10
+ defer devlink_port_pool_th_restore $swp3 8
}
setup_prepare()
@@ -263,7 +249,7 @@ setup_prepare()
h3_mac=$(mac_get $h3)
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
@@ -271,18 +257,6 @@ setup_prepare()
switch_create
}
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
- h3_destroy
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
ping_ipv4()
{
ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
@@ -331,6 +305,14 @@ get_nmarked()
ethtool_stats_get $swp3 ecn_marked
}
+get_qdisc_nmarked()
+{
+ local vlan=$1; shift
+
+ busywait_for_counter 1100 +1 \
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
+}
+
get_qdisc_npackets()
{
local vlan=$1; shift
@@ -365,6 +347,7 @@ build_backlog()
local i=0
while :; do
+ sleep 1
local cur=$(busywait 1100 until_counter_is "> $cur" \
get_qdisc_backlog $vlan)
local diff=$((size - cur))
@@ -384,14 +367,15 @@ build_backlog()
check_marking()
{
+ local get_nmarked=$1; shift
local vlan=$1; shift
local cond=$1; shift
local npackets_0=$(get_qdisc_npackets $vlan)
- local nmarked_0=$(get_nmarked $vlan)
+ local nmarked_0=$($get_nmarked $vlan)
sleep 5
local npackets_1=$(get_qdisc_npackets $vlan)
- local nmarked_1=$(get_nmarked $vlan)
+ local nmarked_1=$($get_nmarked $vlan)
local nmarked_d=$((nmarked_1 - nmarked_0))
local npackets_d=$((npackets_1 - npackets_0))
@@ -404,6 +388,7 @@ check_marking()
ecn_test_common()
{
local name=$1; shift
+ local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
local backlog
@@ -416,7 +401,7 @@ ecn_test_common()
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking "$get_nmarked" $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): $name backlog < limit"
@@ -426,22 +411,24 @@ ecn_test_common()
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan ">= 95")
+ pct=$(check_marking "$get_nmarked" $vlan ">= 95")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
log_test "TC $((vlan - 10)): $name backlog > limit"
}
-do_ecn_test()
+__do_ecn_test()
{
+ local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
- local name=ECN
+ local name=${1-ECN}; shift
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
+ defer stop_traffic_sleep $!
sleep 1
- ecn_test_common "$name" $vlan $limit
+ ecn_test_common "$name" "$get_nmarked" $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, it should all get dropped, and backlog
@@ -450,12 +437,28 @@ do_ecn_test()
build_backlog $vlan $((2 * limit)) udp >/dev/null
check_fail $? "UDP traffic went into backlog instead of being early-dropped"
log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+}
- stop_traffic
- sleep 1
+do_ecn_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ in_defer_scope \
+ __do_ecn_test get_nmarked "$vlan" "$limit"
}
-do_ecn_nodrop_test()
+do_ecn_test_perband()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ mlxsw_only_on_spectrum 3+ || return
+ in_defer_scope \
+ __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
+}
+
+__do_ecn_nodrop_test()
{
local vlan=$1; shift
local limit=$1; shift
@@ -463,9 +466,10 @@ do_ecn_nodrop_test()
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
+ defer stop_traffic_sleep $!
sleep 1
- ecn_test_common "$name" $vlan $limit
+ ecn_test_common "$name" get_nmarked $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, in nodrop mode, make sure it goes to
@@ -474,12 +478,15 @@ do_ecn_nodrop_test()
build_backlog $vlan $((2 * limit)) udp >/dev/null
check_err $? "UDP traffic was early-dropped instead of getting into backlog"
log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+}
- stop_traffic
- sleep 1
+do_ecn_nodrop_test()
+{
+ in_defer_scope \
+ __do_ecn_nodrop_test "$@"
}
-do_red_test()
+__do_red_test()
{
local vlan=$1; shift
local limit=$1; shift
@@ -490,12 +497,13 @@ do_red_test()
# is above limit.
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
+ defer stop_traffic_sleep $!
# Pushing below the queue limit should work.
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): RED backlog < limit"
@@ -503,19 +511,23 @@ do_red_test()
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_fail $? "Traffic went into backlog instead of being early-dropped"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ backlog=$(get_qdisc_backlog $vlan)
local diff=$((limit - backlog))
pct=$((100 * diff / limit))
- ((0 <= pct && pct <= 5))
- check_err $? "backlog $backlog / $limit expected <= 5% distance"
+ ((-15 <= pct && pct <= 15))
+ check_err $? "backlog $backlog / $limit expected <= 15% distance"
log_test "TC $((vlan - 10)): RED backlog > limit"
+}
- stop_traffic
- sleep 1
+do_red_test()
+{
+ in_defer_scope \
+ __do_red_test "$@"
}
-do_mc_backlog_test()
+__do_mc_backlog_test()
{
local vlan=$1; shift
local limit=$1; shift
@@ -525,7 +537,10 @@ do_mc_backlog_test()
RET=0
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+ defer stop_traffic_sleep $!
+
start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+ defer stop_traffic_sleep $!
qbl=$(busywait 5000 until_counter_is ">= 500000" \
get_qdisc_backlog $vlan)
@@ -538,27 +553,84 @@ do_mc_backlog_test()
get_mc_transmit_queue $vlan)
check_err $? "MC backlog reported by qdisc not visible in ethtool"
- stop_traffic
- stop_traffic
-
log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
}
-do_drop_test()
+do_mc_backlog_test()
+{
+ in_defer_scope \
+ __do_mc_backlog_test "$@"
+}
+
+__do_mark_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local subtest=$1; shift
+ local fetch_counter=$1; shift
+ local should_fail=$1; shift
+ local base
+
+ mlxsw_only_on_spectrum 2+ || return
+
+ RET=0
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+ defer stop_traffic_sleep $!
+
+ # Create a bit of a backlog and observe no mirroring due to marks.
+ qevent_rule_install_$subtest
+
+ build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null
+
+ base=$($fetch_counter)
+ count=$(busywait 1100 until_counter_is ">= $((base + 1))" \
+ $fetch_counter)
+ check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure"
+
+ # Above limit, everything should be mirrored, we should see lots of
+ # packets.
+ build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null
+ busywait_for_counter 1100 +2500 \
+ $fetch_counter > /dev/null
+ check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd"
+
+ # When the rule is uninstalled, there should be no mirroring.
+ qevent_rule_uninstall_$subtest
+ busywait_for_counter 1100 +10 \
+ $fetch_counter > /dev/null
+ check_fail $? "Spurious packets observed after uninstall"
+
+ if ((should_fail)); then
+ log_test "TC $((vlan - 10)): marked packets not $subtest'd"
+ else
+ log_test "TC $((vlan - 10)): marked packets $subtest'd"
+ fi
+}
+
+do_mark_test()
+{
+ in_defer_scope \
+ __do_mark_test "$@"
+}
+
+__do_drop_test()
{
local vlan=$1; shift
local limit=$1; shift
local trigger=$1; shift
local subtest=$1; shift
local fetch_counter=$1; shift
- local backlog
local base
local now
- local pct
+
+ mlxsw_only_on_spectrum 2+ || return
RET=0
start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+ defer stop_traffic_sleep $!
# Create a bit of a backlog and observe no mirroring due to drops.
qevent_rule_install_$subtest
@@ -575,25 +647,30 @@ do_drop_test()
build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
base=$($fetch_counter)
- send_packets $vlan udp 11
+ send_packets $vlan udp 100
- now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
- check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+ now=$(busywait 1100 until_counter_is ">= $((base + 95))" $fetch_counter)
+ check_err $? "${trigger}ped packets not observed: 100 expected, $((now - base)) seen"
# When no extra traffic is injected, there should be no mirroring.
- busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ busywait 1100 until_counter_is ">= $((base + 110))" \
+ $fetch_counter >/dev/null
check_fail $? "Spurious packets observed"
# When the rule is uninstalled, there should be no mirroring.
qevent_rule_uninstall_$subtest
- send_packets $vlan udp 11
- busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
- check_fail $? "Spurious packets observed after uninstall"
+ send_packets $vlan udp 100
+ now=$(busywait 1100 until_counter_is ">= $((base + 110))" \
+ $fetch_counter)
+ check_fail $? "$((now - base)) spurious packets observed after uninstall"
log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+}
- stop_traffic
- sleep 1
+do_drop_test()
+{
+ in_defer_scope \
+ __do_drop_test "$@"
}
qevent_rule_install_mirror()
@@ -628,6 +705,22 @@ do_drop_mirror_test()
tc filter del dev $h2 ingress pref 1 handle 101 flower
}
+do_mark_mirror_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+ flower skip_sw ip_proto tcp \
+ action drop
+
+ do_mark_test "$vlan" "$limit" mirror \
+ qevent_counter_fetch_mirror \
+ $(: should_fail=)0
+
+ tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
qevent_rule_install_trap()
{
tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
@@ -655,3 +748,14 @@ do_drop_trap_test()
do_drop_test "$vlan" "$limit" "$trap_name" trap \
"qevent_counter_fetch_trap $trap_name"
}
+
+qevent_rule_install_trap_fwd()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action trap_fwd hw_stats disabled
+}
+
+qevent_rule_uninstall_trap_fwd()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 3f007c5f8361..8902a115d9cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -4,11 +4,13 @@
ALL_TESTS="
ping_ipv4
ecn_test
+ ecn_test_perband
ecn_nodrop_test
red_test
mc_backlog_test
red_mirror_test
red_trap_test
+ ecn_mirror_test
"
: ${QDISC:=ets}
source sch_red_core.sh
@@ -18,99 +20,149 @@ source sch_red_core.sh
# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
# see (do not see) marking, it is actually due to the configuration of that one
# TC, and not due to configuration of the other TC leaking over.
-BACKLOG1=200000
-BACKLOG2=500000
+BACKLOG1=400000
+BACKLOG2=1000000
-install_qdisc()
+install_root_qdisc()
+{
+ tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \
+ bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+install_qdisc_tc0()
{
local -a args=("$@")
- tc qdisc add dev $swp3 root handle 10: $QDISC \
- bands 8 priomap 7 6 5 4 3 2 1 0
tc qdisc add dev $swp3 parent 10:8 handle 108: red \
limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
- probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+ probability 1.0 avpkt 8000 burst 51 "${args[@]}"
+}
+
+install_qdisc_tc1()
+{
+ local -a args=("$@")
+
tc qdisc add dev $swp3 parent 10:7 handle 107: red \
limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
- probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+ probability 1.0 avpkt 8000 burst 126 "${args[@]}"
+}
+
+install_qdisc()
+{
+ install_root_qdisc
+ install_qdisc_tc0 "$@"
+ install_qdisc_tc1 "$@"
sleep 1
}
-uninstall_qdisc()
+uninstall_qdisc_tc0()
{
- tc qdisc del dev $swp3 parent 10:7
tc qdisc del dev $swp3 parent 10:8
- tc qdisc del dev $swp3 root
+}
+
+uninstall_qdisc_tc1()
+{
+ tc qdisc del dev $swp3 parent 10:7
+}
+
+uninstall_root_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:
+}
+
+uninstall_qdisc()
+{
+ uninstall_qdisc_tc0
+ uninstall_qdisc_tc1
+ uninstall_root_qdisc
}
ecn_test()
{
install_qdisc ecn
+ defer uninstall_qdisc
do_ecn_test 10 $BACKLOG1
do_ecn_test 11 $BACKLOG2
+}
- uninstall_qdisc
+ecn_test_perband()
+{
+ install_qdisc ecn
+ defer uninstall_qdisc
+
+ do_ecn_test_perband 10 $BACKLOG1
+ do_ecn_test_perband 11 $BACKLOG2
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
+ defer uninstall_qdisc
do_ecn_nodrop_test 10 $BACKLOG1
do_ecn_nodrop_test 11 $BACKLOG2
-
- uninstall_qdisc
}
red_test()
{
install_qdisc
+ defer uninstall_qdisc
+
+ # Make sure that we get the non-zero value if there is any.
+ local cur=$(busywait 1100 until_counter_is "> 0" \
+ qdisc_stats_get $swp3 10: .backlog)
+ (( cur == 0 ))
+ check_err $? "backlog of $cur observed on non-busy qdisc"
+ log_test "$QDISC backlog properly cleaned"
do_red_test 10 $BACKLOG1
do_red_test 11 $BACKLOG2
-
- uninstall_qdisc
}
mc_backlog_test()
{
install_qdisc
+ defer uninstall_qdisc
# Note that the backlog numbers here do not correspond to RED
# configuration, but are arbitrary.
do_mc_backlog_test 10 $BACKLOG1
do_mc_backlog_test 11 $BACKLOG2
-
- uninstall_qdisc
}
red_mirror_test()
{
install_qdisc qevent early_drop block 10
+ defer uninstall_qdisc
do_drop_mirror_test 10 $BACKLOG1 early_drop
do_drop_mirror_test 11 $BACKLOG2 early_drop
-
- uninstall_qdisc
}
red_trap_test()
{
install_qdisc qevent early_drop block 10
+ defer uninstall_qdisc
do_drop_trap_test 10 $BACKLOG1 early_drop
do_drop_trap_test 11 $BACKLOG2 early_drop
+}
- uninstall_qdisc
+ecn_mirror_test()
+{
+ install_qdisc ecn qevent mark block 10
+ defer uninstall_qdisc
+
+ do_mark_mirror_test 10 $BACKLOG1
+ do_mark_mirror_test 11 $BACKLOG2
}
-trap cleanup EXIT
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index ede9c38d3eff..e9043771787b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -4,6 +4,7 @@
ALL_TESTS="
ping_ipv4
ecn_test
+ ecn_test_perband
ecn_nodrop_test
red_test
mc_backlog_test
@@ -17,7 +18,7 @@ install_qdisc()
{
local -a args=("$@")
- tc qdisc add dev $swp3 root handle 108: red \
+ tc qdisc add dev $swp3 parent 1: handle 108: red \
limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
probability 1.0 avpkt 8000 burst 38 "${args[@]}"
sleep 1
@@ -25,52 +26,64 @@ install_qdisc()
uninstall_qdisc()
{
- tc qdisc del dev $swp3 root
+ tc qdisc del dev $swp3 parent 1:
}
ecn_test()
{
install_qdisc ecn
+ defer uninstall_qdisc
+
do_ecn_test 10 $BACKLOG
- uninstall_qdisc
+}
+
+ecn_test_perband()
+{
+ install_qdisc ecn
+ defer uninstall_qdisc
+
+ do_ecn_test_perband 10 $BACKLOG
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
+ defer uninstall_qdisc
+
do_ecn_nodrop_test 10 $BACKLOG
- uninstall_qdisc
}
red_test()
{
install_qdisc
+ defer uninstall_qdisc
+
do_red_test 10 $BACKLOG
- uninstall_qdisc
}
mc_backlog_test()
{
install_qdisc
+ defer uninstall_qdisc
+
# Note that the backlog value here does not correspond to RED
# configuration, but is arbitrary.
do_mc_backlog_test 10 $BACKLOG
- uninstall_qdisc
}
red_mirror_test()
{
install_qdisc qevent early_drop block 10
+ defer uninstall_qdisc
+
do_drop_mirror_test 10 $BACKLOG
- uninstall_qdisc
}
-trap cleanup EXIT
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
index c6ce0b448bf3..ecc3664376b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
index 8d245f331619..2e0a4efb1703 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
index 013886061f15..6679a338dfc4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 7d9e73a43a49..c068e6c2a580 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -22,20 +22,34 @@ SB_ITC=0
h1_create()
{
simple_if_init $h1 192.0.1.1/24
+ tc qdisc add dev $h1 clsact
+
+ # Add egress filter on $h1 that will guarantee that the packet sent,
+ # will be the only packet being passed to the device.
+ tc filter add dev $h1 egress pref 2 handle 102 matchall action drop
}
h1_destroy()
{
+ tc filter del dev $h1 egress pref 2 handle 102 matchall action drop
+ tc qdisc del dev $h1 clsact
simple_if_fini $h1 192.0.1.1/24
}
h2_create()
{
simple_if_init $h2 192.0.1.2/24
+ tc qdisc add dev $h2 clsact
+
+ # Add egress filter on $h2 that will guarantee that the packet sent,
+ # will be the only packet being passed to the device.
+ tc filter add dev $h2 egress pref 1 handle 101 matchall action drop
}
h2_destroy()
{
+ tc filter del dev $h2 egress pref 1 handle 101 matchall action drop
+ tc qdisc del dev $h2 clsact
simple_if_fini $h2 192.0.1.2/24
}
@@ -98,22 +112,22 @@ sb_occ_etc_check()
port_pool_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
+ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+ src_mac $h1mac dst_mac $h2mac \
+ src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+ action pass
+
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
RET=0
- max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
- check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
- log_test "physical port's($h1) ingress pool"
-
- RET=0
max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "physical port's($h2) ingress pool"
@@ -122,16 +136,26 @@ port_pool_test()
max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "CPU port's egress pool"
+
+ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+ src_mac $h1mac dst_mac $h2mac \
+ src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+ action pass
}
port_tc_ip_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
+ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+ src_mac $h1mac dst_mac $h2mac \
+ src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+ action pass
+
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -139,48 +163,45 @@ port_tc_ip_test()
RET=0
max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
- log_test "physical port's($h1) ingress TC - IP packet"
-
- RET=0
- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "physical port's($h2) ingress TC - IP packet"
RET=0
max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "CPU port's egress TC - IP packet"
+
+ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+ src_mac $h1mac dst_mac $h2mac \
+ src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+ action pass
}
port_tc_arp_test()
{
- local exp_max_occ=96
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
- if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
- exp_max_occ=144
- fi
+ tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \
+ src_mac $h1mac action pass
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+ $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
devlink sb occupancy snapshot $DEVLINK_DEV
RET=0
max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
- log_test "physical port's($h1) ingress TC - ARP packet"
-
- RET=0
- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "physical port's($h2) ingress TC - ARP packet"
RET=0
max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
log_test "CPU port's egress TC - ARP packet"
+
+ tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \
+ src_mac $h1mac action pass
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
new file mode 120000
index 000000000000..bd670d9dc4e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
@@ -0,0 +1 @@
+../spectrum/port_range_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
deleted file mode 100755
index 0231205a7147..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-VXPORT=4789
-
-ALL_TESTS="
- create_dot1d_and_dot1ad_vxlans
-"
-NUM_NETIFS=2
-source $lib_dir/lib.sh
-
-setup_prepare()
-{
- swp1=${NETIFS[p1]}
- swp2=${NETIFS[p2]}
-
- ip link set dev $swp1 up
- ip link set dev $swp2 up
-}
-
-cleanup()
-{
- pre_cleanup
-
- ip link set dev $swp2 down
- ip link set dev $swp1 down
-}
-
-create_dot1d_and_dot1ad_vxlans()
-{
- RET=0
-
- ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
- vlan_default_pvid 0 mcast_snooping 0
- ip link set dev br0 up
-
- ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx100 up
-
- ip link set dev $swp1 master br0
- ip link set dev vx100 master br0
- bridge vlan add vid 100 dev vx100 pvid untagged
-
- ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
- ip link set dev br1 up
-
- ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx200 up
-
- ip link set dev $swp2 master br1
- ip link set dev vx200 master br1 2>/dev/null
- check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
-
- ip link set dev vx200 master br1 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
-
- log_test "create 802.1d and 802.1ad VxLANs"
-
- ip link del dev vx200
- ip link del dev br1
- ip link del dev vx100
- ip link del dev br0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 4a1c9328555f..d7505b933aef 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -7,12 +7,9 @@ NUM_NETIFS=6
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
source $lib_dir/devlink_lib.sh
+source ../mlxsw_lib.sh
-if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
- "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
- echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
- exit 1
-fi
+mlxsw_only_on_spectrum 2+ || exit 1
current_test=""
@@ -28,8 +25,19 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+ port_range
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -38,18 +46,34 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ continue
+ fi
+
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
+ # Update target in case occupancy of a certain resource changed
+ # following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
- devlink_reload
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]; then
+ $tt "$target"
+ log_test "'$current_test' $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' overflow $target"
fi
+ ${current_test}_cleanup $target
+ devlink_reload
+ RET_FIN=$(( RET_FIN || RET ))
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
new file mode 120000
index 000000000000..1f5752e8ffc0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
@@ -0,0 +1 @@
+../spectrum/rif_counter_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get rif_mac_profiles)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index fb850e0ec837..4994bea5daf8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -10,7 +10,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding
ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
multiple_masks_test ctcam_edge_cases_test delta_simple_test \
delta_two_masks_one_key_test delta_simple_rehash_test \
- bloom_simple_test bloom_complex_test bloom_delta_test"
+ bloom_simple_test bloom_complex_test bloom_delta_test \
+ max_erp_entries_test max_group_size_test collision_test"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
@@ -456,7 +457,7 @@ delta_two_masks_one_key_test()
{
# If 2 keys are the same and only differ in mask in a way that
# they belong under the same ERP (second is delta of the first),
- # there should be no C-TCAM spill.
+ # there should be C-TCAM spill.
RET=0
@@ -473,8 +474,8 @@ delta_two_masks_one_key_test()
tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
action drop"
- tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
- check_err $? "incorrect C-TCAM spill while inserting the second rule"
+ tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1
+ check_err $? "C-TCAM spill did not happen while inserting the second rule"
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -868,7 +869,7 @@ bloom_simple_test()
bloom_complex_test()
{
# Bloom filter index computation is affected from region ID, eRP
- # ID and from the region key size. In order to excercise those parts
+ # ID and from the region key size. In order to exercise those parts
# of the Bloom filter code, use a series of regions, each with a
# different key size and send packet that should hit all of them.
local index
@@ -983,6 +984,156 @@ bloom_delta_test()
log_test "bloom delta test ($tcflags)"
}
+max_erp_entries_test()
+{
+ # The number of eRP entries is limited. Once the maximum number of eRPs
+ # has been reached, filters cannot be added. This test verifies that
+ # when this limit is reached, inserstion fails without crashing.
+
+ RET=0
+
+ local num_masks=32
+ local num_regions=15
+ local chain_failed
+ local mask_failed
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ for ((i=1; i < $num_regions; i++)); do
+ for ((j=$num_masks; j >= 0; j--)); do
+ tc filter add dev $h2 ingress chain $i protocol ip \
+ pref $i handle $j flower $tcflags \
+ dst_ip 192.1.0.0/$j &> /dev/null
+ ret=$?
+
+ if [ $ret -ne 0 ]; then
+ chain_failed=$i
+ mask_failed=$j
+ break 2
+ fi
+ done
+ done
+
+ # We expect to exceed the maximum number of eRP entries, so that
+ # insertion eventually fails. Otherwise, the test should be adjusted to
+ # add more filters.
+ check_fail $ret "expected to exceed number of eRP entries"
+
+ for ((; i >= 1; i--)); do
+ for ((j=0; j <= $num_masks; j++)); do
+ tc filter del dev $h2 ingress chain $i protocol ip \
+ pref $i handle $j flower &> /dev/null
+ done
+ done
+
+ log_test "max eRP entries test ($tcflags). " \
+ "max chain $chain_failed, mask $mask_failed"
+}
+
+max_group_size_test()
+{
+ # The number of ACLs in an ACL group is limited. Once the maximum
+ # number of ACLs has been reached, filters cannot be added. This test
+ # verifies that when this limit is reached, insertion fails without
+ # crashing.
+
+ RET=0
+
+ local num_acls=32
+ local max_size
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ for ((i=1; i < $num_acls; i++)); do
+ if [[ $(( i % 2 )) == 1 ]]; then
+ tc filter add dev $h2 ingress pref $i proto ipv4 \
+ flower $tcflags dst_ip 198.51.100.1/32 \
+ ip_proto tcp tcp_flags 0x01/0x01 \
+ action drop &> /dev/null
+ else
+ tc filter add dev $h2 ingress pref $i proto ipv6 \
+ flower $tcflags dst_ip 2001:db8:1::1/128 \
+ action drop &> /dev/null
+ fi
+
+ ret=$?
+ [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break
+ done
+
+ # We expect to exceed the maximum number of ACLs in a group, so that
+ # insertion eventually fails. Otherwise, the test should be adjusted to
+ # add more filters.
+ check_fail $ret "expected to exceed number of ACLs in a group"
+
+ for ((; i >= 1; i--)); do
+ if [[ $(( i % 2 )) == 1 ]]; then
+ tc filter del dev $h2 ingress pref $i proto ipv4 \
+ flower $tcflags dst_ip 198.51.100.1/32 \
+ ip_proto tcp tcp_flags 0x01/0x01 \
+ action drop &> /dev/null
+ else
+ tc filter del dev $h2 ingress pref $i proto ipv6 \
+ flower $tcflags dst_ip 2001:db8:1::1/128 \
+ action drop &> /dev/null
+ fi
+ done
+
+ log_test "max ACL group size test ($tcflags). max size $max_size"
+}
+
+collision_test()
+{
+ # Filters cannot share an eRP if in the common unmasked part (i.e.,
+ # without the delta bits) they have the same values. If the driver does
+ # not prevent such configuration (by spilling into the C-TCAM), then
+ # multiple entries will be present in the device with the same key,
+ # leading to collisions and a reduced scale.
+ #
+ # Create such a scenario and make sure all the filters are successfully
+ # added.
+
+ RET=0
+
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all
+ # have the same values in the common unmasked part (dst_ip/24).
+
+ tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \
+ flower $tcflags dst_ip 198.51.100.0/24 \
+ action drop
+
+ for i in {0..255}; do
+ tc filter add dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) \
+ flower $tcflags dst_ip 198.51.100.${i}/32 \
+ action drop
+ ret=$?
+ [[ $ret -ne 0 ]] && break
+ done
+
+ check_err $ret "failed to add all the filters"
+
+ for i in {255..0}; do
+ tc filter del dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) flower
+ done
+
+ tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower
+
+ log_test "collision test ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index efd798a85931..4444bbace1a9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -4,17 +4,22 @@ source ../tc_flower_scale.sh
tc_flower_get_target()
{
local should_fail=$1; shift
+ local max_cnts
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 30K (30,720) flow counters and six of
- # these are used for multicast routing.
- local target=30714
+ max_cnts=$(devlink_resource_size_get counters flow)
+
+ # Remove already allocated counters.
+ ((max_cnts -= $(devlink_resource_occ_get counters flow)))
+
+ # Each rule uses two counters, for packets and bytes.
+ ((max_cnts /= 2))
if ((! should_fail)); then
- echo $target
+ echo $max_cnts
else
- echo $((target + 1))
+ echo $((max_cnts + 1))
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..fd23c80eba31
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to four IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..17} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 16 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=16
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 10..13
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
index 73035e25085d..06a80f40daa4 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -2,11 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
source "../../../../net/forwarding/devlink_lib.sh"
+source ../mlxsw_lib.sh
-if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
- echo "SKIP: test is tailored for Mellanox Spectrum"
- exit 1
-fi
+mlxsw_only_on_spectrum 1 || exit 1
# Needed for returning to default
declare -A KVD_DEFAULTS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
new file mode 100644
index 000000000000..d0847e8ea270
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_range_scale.sh
+
+port_range_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get port_range_registers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
index f0443b1b05b9..60753d46a2d4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -34,6 +34,7 @@ create_vxlan_on_top_of_8021ad_bridge()
ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
ip link set dev br0 up
ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 087a884f66cd..7b98cdd0580d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,8 +22,19 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre tc_police port"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+ port_range
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -40,18 +51,34 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ continue
+ fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
+ # Update target in case occupancy of a certain resource
+ # changed following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' [$profile] $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]
+ then
+ $tt "$target"
+ log_test "'$current_test' [$profile] $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' [$profile] overflow $target"
fi
+ ${current_test}_cleanup $target
+ RET_FIN=$(( RET_FIN || RET ))
done
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
new file mode 100644
index 000000000000..d44536276e8a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_counter_scale.sh
+
+rif_counter_get_target()
+{
+ local should_fail=$1; shift
+ local max_cnts
+ local max_rifs
+ local target
+
+ max_rifs=$(devlink_resource_size_get rifs)
+ max_cnts=$(devlink_resource_size_get counters rif)
+
+ # Remove already allocated RIFs.
+ ((max_rifs -= $(devlink_resource_occ_get rifs)))
+
+ # 10 KVD slots per counter, ingress+egress counters per RIF
+ ((max_cnts /= 20))
+
+ # Pointless to run the overflow test if we don't have enough RIFs to
+ # host all the counters.
+ if ((max_cnts > max_rifs && should_fail)); then
+ echo 0
+ return
+ fi
+
+ target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get rif_mac_profiles)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..d8fd875ad527
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to five IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..21} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 20 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=20
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 12..16
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 9 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index cc0f07e72cf2..d3d9e60d6ddf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -77,6 +77,7 @@ tc_flower_rules_create()
filter add dev $h2 ingress \
prot ipv6 \
pref 1000 \
+ handle 42$i \
flower $tcflags dst_ip $(tc_flower_addr $i) \
action drop
EOF
@@ -98,11 +99,7 @@ __tc_flower_test()
jq -r '[ .[] | select(.kind == "flower") |
.options | .in_hw ]' | jq .[] | wc -l)
[[ $((offload_count - 1)) -eq $count ]]
- if [[ $should_fail -eq 0 ]]; then
- check_err $? "Offload mismatch"
- else
- check_err_fail $should_fail $? "Offload more than expacted"
- fi
+ check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
}
tc_flower_test()
@@ -125,3 +122,19 @@ tc_flower_test()
tcflags="skip_sw"
__tc_flower_test $count $should_fail
}
+
+tc_flower_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count - 1; i > 0; i /= 2)); do
+ $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count - 1; i > 0; i /= 2)); do
+ tc_check_packets "dev $h2 ingress" 42$i 1
+ check_err $? "Traffic not seen at rule #$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
index 3e3e06ea5703..86e787895f78 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -60,7 +60,8 @@ __tc_police_test()
tc_police_rules_create $count $should_fail
- offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
((offload_count == count))
check_err_fail $should_fail $? "tc police offload count"
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 553cb9fad508..0441a18f098b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
matchall_mirror_behind_flower_ingress_test
matchall_sample_behind_flower_ingress_test
matchall_mirror_behind_flower_egress_test
+ matchall_proto_match_test
police_limits_test
multi_police_test
"
@@ -18,6 +19,8 @@ NUM_NETIFS=2
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
switch_create()
{
@@ -166,7 +169,8 @@ matchall_sample_egress_test()
RET=0
# It is forbidden in mlxsw driver to have matchall with sample action
- # bound on egress
+ # bound on egress. Spectrum-1 specific restriction
+ mlxsw_only_on_spectrum 1 || return
tc qdisc add dev $swp1 clsact
@@ -289,6 +293,22 @@ matchall_mirror_behind_flower_egress_test()
matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
}
+matchall_proto_match_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ matchall skip_sw \
+ action sample group 1 rate 100
+ check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall protocol match"
+}
+
police_limits_test()
{
RET=0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..bc7ea2df49fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,658 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+ +---------------------------------+
+# | H1 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h3_lag |
+# | | 192.0.2.1/28 | | | 192.0.2.17/28 |
+# | | | | | |
+# | | default via 192.0.2.2 | | | default via 192.0.2.18 |
+# +----|----------------------------+ +----|----------------------------+
+# | |
+# +----|-----------------------------------------|----------------------------+
+# | | 192.0.2.2/28 | 192.0.2.18/28 |
+# | + $rp1 + $rp3_lag |
+# | |
+# | + $rp2 + $rp4_lag |
+# | | 198.51.100.2/28 | 198.51.100.18/28 |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 198.51.100.18 |
+# | | | | | |
+# | | 198.51.100.1/28 | | | 198.51.100.17/28 |
+# | + $h2 | | + $h4_lag |
+# | H2 (vrf) | | H4 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_sample_rate_test
+ tc_sample_max_rate_test
+ tc_sample_conflict_test
+ tc_sample_group_conflict_test
+ tc_sample_md_iif_test
+ tc_sample_md_lag_iif_test
+ tc_sample_md_oif_test
+ tc_sample_md_lag_oif_test
+ tc_sample_md_out_tc_test
+ tc_sample_md_out_tc_occ_test
+ tc_sample_md_latency_test
+ tc_sample_acl_group_conflict_test
+ tc_sample_acl_rate_test
+ tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/28
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+ ip link set dev $h3 down
+ ip link add name ${h3}_bond type bond mode 802.3ad
+ ip link set dev $h3 master ${h3}_bond
+
+ simple_if_init ${h3}_bond 192.0.2.17/28
+
+ ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+ ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+ simple_if_fini ${h3}_bond 192.0.2.17/28
+
+ ip link set dev $h3 nomaster
+ ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+ ip link set dev $h4 down
+ ip link add name ${h4}_bond type bond mode 802.3ad
+ ip link set dev $h4 master ${h4}_bond
+
+ simple_if_init ${h4}_bond 198.51.100.17/28
+
+ ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+ ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+ simple_if_fini ${h4}_bond 198.51.100.17/28
+
+ ip link set dev $h4 nomaster
+ ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 192.0.2.2/28
+ tc qdisc add dev $rp1 clsact
+
+ ip link set dev $rp2 up
+ __addr_add_del $rp2 add 198.51.100.2/28
+ tc qdisc add dev $rp2 clsact
+
+ ip link add name ${rp3}_bond type bond mode 802.3ad
+ ip link set dev $rp3 master ${rp3}_bond
+ __addr_add_del ${rp3}_bond add 192.0.2.18/28
+ tc qdisc add dev $rp3 clsact
+ ip link set dev ${rp3}_bond up
+
+ ip link add name ${rp4}_bond type bond mode 802.3ad
+ ip link set dev $rp4 master ${rp4}_bond
+ __addr_add_del ${rp4}_bond add 198.51.100.18/28
+ tc qdisc add dev $rp4 clsact
+ ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+ ip link set dev ${rp4}_bond down
+ tc qdisc del dev $rp4 clsact
+ __addr_add_del ${rp4}_bond del 198.51.100.18/28
+ ip link set dev $rp4 nomaster
+ ip link del dev ${rp4}_bond
+
+ ip link set dev ${rp3}_bond down
+ tc qdisc del dev $rp3 clsact
+ __addr_add_del ${rp3}_bond del 192.0.2.18/28
+ ip link set dev $rp3 nomaster
+ ip link del dev ${rp3}_bond
+
+ tc qdisc del dev $rp2 clsact
+ __addr_add_del $rp2 del 198.51.100.2/28
+ ip link set dev $rp2 down
+
+ tc qdisc del dev $rp1 clsact
+ __addr_add_del $rp1 del 192.0.2.2/28
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+ h3=${NETIFS[p5]}
+ rp3=${NETIFS[p6]}
+ h4=${NETIFS[p7]}
+ rp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ rm -f $CAPTURE_FILE
+
+ router_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+psample_capture_start()
+{
+ rm -f $CAPTURE_FILE
+
+ psample &> $CAPTURE_FILE &
+
+ sleep 1
+}
+
+psample_capture_stop()
+{
+ kill_process %%
+}
+
+__tc_sample_rate_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local pkts pct
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+ -B $dip -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 10000) / 10000))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ log_test "tc sample rate ($desc)"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+ __tc_sample_rate_test "forward" 198.51.100.1
+ __tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample maximum rate"
+}
+
+tc_sample_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port,
+ # even when they share the same parameters.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1 &> /dev/null
+ check_fail $? "Managed to configure second sampling rule"
+
+ # Delete the first rule and make sure the second rule can now be
+ # configured.
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule after deletion"
+
+ log_test "tc sample conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
+tc_sample_group_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port
+ # with different groups.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample group conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+ local rp1_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample iif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+ local rp3_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample lag iif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+ local rp2_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample oif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+ local rp4_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample lag oif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+ RET=0
+
+ # Output traffic class is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # By default, all the packets should go to the same traffic class (0).
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 0 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (0)"
+
+ # Map all priorities to highest traffic class (7) and check reported
+ # out-tc.
+ tc qdisc replace dev $rp2 root handle 1: \
+ prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 7 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (7)"
+
+ log_test "tc sample out-tc"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+ local backlog pct occ
+
+ RET=0
+
+ # Output traffic class occupancy is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # Configure a shaper on egress to create congestion.
+ tc qdisc replace dev $rp2 root handle 1: \
+ tbf rate 1Mbit burst 256k limit 1M
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+ # Allow congestion to reach steady state.
+ sleep 10
+
+ backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+ # Kill mausezahn.
+ kill_process %%
+
+ psample_capture_stop
+
+ # Record last congestion sample.
+ occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+ cut -d ' ' -f 16)
+
+ pct=$((100 * (occ - backlog) / backlog))
+ (( -1 <= pct && pct <= 1))
+ check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+ log_test "tc sample out-tc-occ"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+ RET=0
+
+ # Egress sampling not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have latency attribute"
+
+ log_test "tc sample latency"
+
+ tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+ RET=0
+
+ # Test that two flower sampling rules cannot be configured on the same
+ # port with different groups.
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule with same group"
+
+ tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample (w/ flower) group conflict test"
+
+ tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+ local bind=$1; shift
+ local port=$1; shift
+ local pkts pct
+
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 10000) / 10000))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ # Setup a filter that should not match any packet and make sure packets
+ # are not sampled.
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "group 1 " $CAPTURE_FILE
+ check_fail $? "Sampled packets when should not"
+
+ log_test "tc sample (w/ flower) rate ($bind)"
+
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+ __tc_sample_acl_rate_test ingress $rp1
+ __tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24 - 1)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 729a86cc4ede..4687b0a7dffb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -4,10 +4,35 @@
# Test various aspects of VxLAN offloading which are specific to mlxsw, such
# as sanitization of invalid configurations and offload indication.
-lib_dir=$(dirname $0)/../../../net/forwarding
+: ${ADDR_FAMILY:=ipv4}
+export ADDR_FAMILY
+
+: ${LOCAL_IP_1:=198.51.100.1}
+export LOCAL_IP_1
+
+: ${LOCAL_IP_2:=198.51.100.2}
+export LOCAL_IP_2
+
+: ${PREFIX_LEN:=32}
+export PREFIX_LEN
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=239.0.0.1}
+export MC_IP
-ALL_TESTS="sanitization_test offload_indication_test \
- sanitization_vlan_aware_test offload_indication_vlan_aware_test"
+: ${IP_FLAG:=""}
+export IP_FLAG
+
+: ${ALL_TESTS:="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=2
: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
@@ -63,8 +88,8 @@ sanitization_single_dev_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -80,8 +105,8 @@ sanitization_single_dev_vlan_aware_test()
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -97,8 +122,8 @@ sanitization_single_dev_mcast_enabled_test()
ip link add dev br0 type bridge
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -115,9 +140,9 @@ sanitization_single_dev_mcast_group_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev dummy1 group 239.0.0.1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
+ dev dummy1 group $MC_IP
sanitization_single_dev_test_fail
@@ -134,7 +159,7 @@ sanitization_single_dev_no_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit dstport 4789
sanitization_single_dev_test_fail
@@ -145,31 +170,14 @@ sanitization_single_dev_no_local_ip_test()
log_test "vxlan device with no local ip"
}
-sanitization_single_dev_local_ipv6_test()
-{
- RET=0
-
- ip link add dev br0 type bridge mcast_snooping 0
-
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 2001:db8::1 dstport 4789
-
- sanitization_single_dev_test_fail
-
- ip link del dev vxlan0
- ip link del dev br0
-
- log_test "vxlan device with local ipv6 address"
-}
-
-sanitization_single_dev_learning_enabled_test()
+sanitization_single_dev_learning_enabled_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -186,8 +194,8 @@ sanitization_single_dev_local_interface_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
@@ -204,8 +212,8 @@ sanitization_single_dev_port_range_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
srcport 4000 5000
sanitization_single_dev_test_fail
@@ -222,8 +230,8 @@ sanitization_single_dev_tos_static_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos 20 local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -239,8 +247,8 @@ sanitization_single_dev_ttl_inherit_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl inherit tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -250,14 +258,14 @@ sanitization_single_dev_ttl_inherit_test()
log_test "vxlan device with inherit ttl"
}
-sanitization_single_dev_udp_checksum_test()
+sanitization_single_dev_udp_checksum_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -276,13 +284,12 @@ sanitization_single_dev_test()
sanitization_single_dev_mcast_enabled_test
sanitization_single_dev_mcast_group_test
sanitization_single_dev_no_local_ip_test
- sanitization_single_dev_local_ipv6_test
- sanitization_single_dev_learning_enabled_test
+ sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test
sanitization_single_dev_local_interface_test
sanitization_single_dev_port_range_test
sanitization_single_dev_tos_static_test
sanitization_single_dev_ttl_inherit_test
- sanitization_single_dev_udp_checksum_test
+ sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test
}
sanitization_multi_devs_test_pass()
@@ -334,10 +341,10 @@ sanitization_multi_devs_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_pass
@@ -356,10 +363,10 @@ sanitization_multi_devs_ttl_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 40 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_fail
@@ -378,10 +385,10 @@ sanitization_multi_devs_udp_dstport_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 5789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789
sanitization_multi_devs_test_fail
@@ -400,10 +407,10 @@ sanitization_multi_devs_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.2 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789
sanitization_multi_devs_test_fail
@@ -437,18 +444,22 @@ offload_indication_setup_create()
{
# Create a simple setup with two bridges, each with a VxLAN device
# and one local port
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
offload_indication_setup_destroy()
@@ -456,7 +467,7 @@ offload_indication_setup_destroy()
ip link del dev vxlan1
ip link del dev vxlan0
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev $swp2 nomaster
ip link set dev $swp1 nomaster
@@ -469,7 +480,7 @@ offload_indication_fdb_flood_test()
{
RET=0
- bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
bridge fdb show brport vxlan0
@@ -485,7 +496,7 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
- dst 198.51.100.2
+ dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
@@ -536,7 +547,7 @@ offload_indication_fdb_bridge_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
@@ -560,17 +571,17 @@ offload_indication_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 down
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan1 down
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device down"
@@ -579,26 +590,26 @@ offload_indication_decap_route_test()
ip link set dev vxlan1 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device up"
RET=0
- ip address delete 198.51.100.1/32 dev lo
+ ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - add local route"
@@ -607,18 +618,18 @@ offload_indication_decap_route_test()
ip link set dev $swp1 nomaster
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp2 nomaster
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - local ports enslavement"
@@ -627,44 +638,48 @@ offload_indication_decap_route_test()
ip link del dev br0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev br1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - bridge device deletion"
RET=0
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
ip link set dev vxlan0 master br0
ip link set dev vxlan1 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device deletion"
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
check_fdb_offloaded()
@@ -721,10 +736,10 @@ __offload_indication_join_vxlan_first()
local mac=00:11:22:33:44:55
local zmac=00:00:00:00:00:00
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
- bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2
+ bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2
RET=0
check_vxlan_fdb_not_offloaded
@@ -773,9 +788,11 @@ __offload_indication_join_vxlan_first()
offload_indication_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first
@@ -789,7 +806,7 @@ __offload_indication_join_vxlan_last()
RET=0
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev $swp1 master br0
@@ -808,9 +825,11 @@ __offload_indication_join_vxlan_last()
offload_indication_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -835,12 +854,13 @@ sanitization_vlan_aware_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+ ip link set dev br0 addrgenmode none
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
# Test that when each VNI is mapped to a different VLAN we can enslave
# a port to the bridge
@@ -884,20 +904,20 @@ sanitization_vlan_aware_test()
# Use the offload indication of the local route to ensure the VXLAN
# configuration was correctly rollbacked.
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev vxlan10 type vxlan ttl 10
ip link set dev $swp1 master br0 &> /dev/null
check_fail $?
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vlan-aware - failed enslavement to bridge due to conflict"
ip link set dev vxlan10 type vxlan ttl 20
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link del dev vxlan20
ip link del dev vxlan10
@@ -908,20 +928,22 @@ offload_indication_vlan_aware_setup_create()
{
# Create a simple setup with two VxLAN devices and a single VLAN-aware
# bridge
- ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \
+ ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \
vlan_default_pvid 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link set dev $swp1 master br0
bridge vlan add vid 10 dev $swp1
bridge vlan add vid 20 dev $swp1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
@@ -935,7 +957,7 @@ offload_indication_vlan_aware_setup_destroy()
ip link del dev vxlan20
ip link del dev vxlan10
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
bridge vlan del vid 20 dev $swp1
bridge vlan del vid 10 dev $swp1
@@ -952,7 +974,7 @@ offload_indication_vlan_aware_fdb_test()
log_info "vxlan entry offload indication - vlan-aware"
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
- dst 198.51.100.2 vlan 10
+ dst $LOCAL_IP_2 vlan 10
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
@@ -1003,7 +1025,7 @@ offload_indication_vlan_aware_fdb_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
@@ -1021,7 +1043,7 @@ offload_indication_vlan_aware_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on one VxLAN device and make sure route is still
@@ -1029,7 +1051,7 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 10 dev vxlan10 untagged
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on second VxLAN device and make sure route is no
@@ -1037,14 +1059,15 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 20 dev vxlan20 untagged
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag back and make sure route is marked as offloaded
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
- busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
+ busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \
+ $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vni map/unmap"
@@ -1052,10 +1075,12 @@ offload_indication_vlan_aware_decap_route_test()
offload_indication_vlan_aware_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first 1
@@ -1065,10 +1090,12 @@ offload_indication_vlan_aware_join_vxlan_first()
offload_indication_vlan_aware_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -1083,16 +1110,18 @@ offload_indication_vlan_aware_l3vni_test()
RET=0
sysctl_set net.ipv6.conf.default.disable_ipv6 1
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link set dev $swp1 master br0
# The test will use the offload indication on the FDB entry to
# understand if the tunnel is offloaded or not
- bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
bridge vlan add dev vxlan0 vid 10 pvid untagged
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
index 749ba3cfda1d..38148f51877a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -4,6 +4,21 @@
# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
# different veto vectors to test various rollback scenarios in the vxlan driver.
+: ${LOCAL_IP:=198.51.100.1}
+export LOCAL_IP
+
+: ${REMOTE_IP_1:=198.51.100.2}
+export REMOTE_IP_1
+
+: ${REMOTE_IP_2:=198.51.100.3}
+export REMOTE_IP_2
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=224.0.0.1}
+export MC_IP
+
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
@@ -26,8 +41,8 @@ setup_prepare()
ip link set dev $swp1 master br0
ip link set dev $swp2 up
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP dstport 4789
ip link set dev vxlan0 master br0
}
@@ -50,11 +65,11 @@ fdb_create_veto_test()
RET=0
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>/dev/null
+ dst $REMOTE_IP_1 2>/dev/null
check_fail $? "multicast MAC not rejected"
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum
+ dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum
check_err $? "multicast MAC rejected without extack"
log_test "vxlan FDB veto - create"
@@ -65,15 +80,15 @@ fdb_replace_veto_test()
RET=0
bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>/dev/null
+ dst $REMOTE_IP_1 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -85,15 +100,15 @@ fdb_append_veto_test()
RET=0
bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>/dev/null
+ dst $REMOTE_IP_2 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -105,11 +120,11 @@ fdb_changelink_veto_test()
RET=0
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>/dev/null
+ group $MC_IP dev lo 2>/dev/null
check_fail $? "FDB with a multicast IP not rejected"
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>&1 >/dev/null \
+ group $MC_IP dev lo 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with a multicast IP rejected without extack"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
new file mode 100755
index 000000000000..66c87aab86f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+LOCAL_IP=2001:db8:1::1
+REMOTE_IP_1=2001:db8:2::1
+REMOTE_IP_2=2001:db8:3::1
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+
+source vxlan_fdb_veto.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
tc qdisc add dev $rp2 clsact
for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
new file mode 100755
index 000000000000..f2ea0163ddea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+ADDR_FAMILY=ipv6
+LOCAL_IP_1=2001:db8:1::1
+LOCAL_IP_2=2001:db8:1::2
+PREFIX_LEN=128
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+IP_FLAG="-6"
+
+ALL_TESTS="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"
+
+sanitization_single_dev_learning_enabled_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+ log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_udp_checksum_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at RX"
+
+ ip link del dev vxlan0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at TX"
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+}
+
+source vxlan.sh
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..d05eddcad539
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+ port = rand_port()
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
+
+ with bkg(listen_cmd, ksft_wait=3) as server:
+ cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
+
+ ksft_eq(0, server.ret)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run([test_napi_id], args=(cfg,))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..7f49ca6c8637
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage address;
+ struct addrinfo *result;
+ struct addrinfo hints;
+ unsigned int napi_id;
+ socklen_t addr_len;
+ socklen_t optlen;
+ char buf[1024];
+ int opt = 1;
+ int family;
+ int server;
+ int client;
+ int ret;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+ return 1;
+ }
+
+ family = result->ai_family;
+ addr_len = result->ai_addrlen;
+
+ server = socket(family, SOCK_STREAM, IPPROTO_TCP);
+ if (server < 0) {
+ perror("socket creation failed");
+ freeaddrinfo(result);
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+ perror("setsockopt");
+ freeaddrinfo(result);
+ return 1;
+ }
+
+ memcpy(&address, result->ai_addr, result->ai_addrlen);
+ freeaddrinfo(result);
+
+ if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
+ perror("bind failed");
+ return 1;
+ }
+
+ if (listen(server, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+ ksft_ready();
+
+ client = accept(server, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ optlen = sizeof(napi_id);
+ ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+ &optlen);
+ if (ret != 0) {
+ perror("getsockopt");
+ return 1;
+ }
+
+ read(client, buf, 1024);
+
+ ksft_wait();
+
+ if (napi_id == 0) {
+ fprintf(stderr, "napi ID is 0\n");
+ return 1;
+ }
+
+ close(client);
+ close(server);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
new file mode 100755
index 000000000000..f4be72b2145a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test napi threaded states.
+"""
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge
+from lib.py import NetDrvEnv, NetdevFamily
+from lib.py import cmd, defer, ethtool
+
+
+def _assert_napi_threaded_enabled(nl, napi_id) -> None:
+ napi = nl.napi_get({'id': napi_id})
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
+def _assert_napi_threaded_disabled(nl, napi_id) -> None:
+ napi = nl.napi_get({'id': napi_id})
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+
+def _set_threaded_state(cfg, threaded) -> None:
+ with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp:
+ fp.write(str(threaded).encode('utf-8'))
+
+
+def _setup_deferred_cleanup(cfg) -> None:
+ combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0)
+ ksft_ge(combined, 2)
+ defer(ethtool, f"-L {cfg.ifname} combined {combined}")
+
+ threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout
+ defer(_set_threaded_state, cfg, threaded)
+
+ return combined
+
+
+def napi_init(cfg, nl) -> None:
+ """
+ Test that threaded state (in the persistent NAPI config) gets updated
+ even when NAPI with given ID is not allocated at the time.
+ """
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 0)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
+def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
+ """
+ Test that when napi threaded is enabled at device level and
+ then disabled at napi level for one napi, the threaded state
+ of all napis is preserved after a change in number of queues.
+ """
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ # set threaded
+ _set_threaded_state(cfg, 1)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ # disable threaded for napi1
+ nl.napi_set({'id': napi1_id, 'threaded': 'disabled'})
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_disabled(nl, napi1_id)
+
+
+def change_num_queues(cfg, nl) -> None:
+ """
+ Test that when napi threaded is enabled at device level,
+ the napi threaded state is preserved after a change in
+ number of queues.
+ """
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ # set threaded
+ _set_threaded_state(cfg, 1)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, queue_count=2) as cfg:
+ ksft_run([napi_init,
+ change_num_queues,
+ enable_dev_threaded_disable_napi_threaded],
+ args=(cfg, NetdevFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
new file mode 100755
index 000000000000..2022f3061738
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test creates two netdevsim virtual interfaces, assigns one of them (the
+# "destination interface") to a new namespace, and assigns IP addresses to both
+# interfaces.
+#
+# It listens on the destination interface using socat and configures a dynamic
+# target on netconsole, pointing to the destination IP address.
+#
+# Finally, it checks whether the message was received properly on the
+# destination interface. Note that this test may pollute the kernel log buffer
+# (dmesg) and relies on dynamic configuration and namespaces being configured.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+ for IP_VERSION in "ipv6" "ipv4"
+ do
+ echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Set current loglevel to KERN_INFO(6), and default to
+ # KERN_NOTICE(5)
+ echo "6 5" > /proc/sys/kernel/printk
+ # Create one namespace and two interfaces
+ set_network "${IP_VERSION}"
+ # Create a dynamic target for netconsole
+ create_dynamic_target "${FORMAT}"
+ # Only set userdata for extended format
+ if [ "$FORMAT" == "extended" ]
+ then
+ # Set userdata "key" with the "value" value
+ set_user_data
+ fi
+ # Listed for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+ cleanup
+ echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+ done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..d1d23dc67f99
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+ echo "Running with bind mode: ${BINDMODE}" >&2
+ # Create the command line for netconsole, with the configuration from
+ # the function above
+ CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+ # The content of kmsg will be save to the following file
+ OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+ # Load the module, with the cmdline set
+ modprobe netconsole "${CMDLINE}"
+
+ # Listed for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_msg "${OUTPUT_FILE}"
+
+ # kill socat in case it is still running
+ pkill_socat
+ # Unload the module
+ rmmod netconsole
+ echo "${BINDMODE} : Test passed" >&2
+done
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+# - Correct fragmentation of large messages
+# - Proper reassembly of fragments at the receiver
+# - Preservation of userdata across fragments
+# - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+ # header is everything before ;
+ local HEADER="${1}"
+ REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+ echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+ local MSGFILE="${1}"
+ # Extract the header, which is the very first thing that arrives in the
+ # first list.
+ HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+ HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+ # Remove the two headers from the received message
+ # This will return the message without any header, similarly to what
+ # was sent.
+ sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+# 13,468,514729715,-,ncfrag=0/1135;<message>
+# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+ # Discard the netconsole headers, and assemble the full message
+ RCVMSG=$(extract_msg "${1}")
+
+ # check for the main message
+ if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+ echo "Message body doesn't match." >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # check userdata
+ if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+ echo "message userdata doesn't match" >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+ # test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
new file mode 100755
index 000000000000..06089643b771
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test verifies that users can successfully create up to
+# MAX_USERDATA_ITEMS userdata entries without encountering any failures.
+#
+# Additionally, it tests for expected failure when attempting to exceed this
+# maximum limit.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
+MAX_USERDATA_ITEMS=256
+
+# Function to create userdata entries
+function create_userdata_max_entries() {
+ # All these keys should be created without any error
+ for i in $(seq $MAX_USERDATA_ITEMS)
+ do
+ # USERDATA_KEY is used by set_user_data
+ USERDATA_KEY="key"${i}
+ set_user_data
+ done
+}
+
+# Function to verify the entry limit
+function verify_entry_limit() {
+ # Allowing the test to fail without exiting, since the next command
+ # will fail
+ set +e
+ mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null
+ ret="$?"
+ set -e
+ if [ "$ret" -eq 0 ];
+ then
+ echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2
+ ls "${NETCONS_PATH}/userdata/" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# populate the maximum number of supported keys in userdata
+create_userdata_max_entries
+# Verify an additional entry is not allowed
+verify_entry_limit
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
new file mode 100755
index 000000000000..baf69031089e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A test that makes sure that sysdata runtime CPU data is properly set
+# when a message is sent.
+#
+# There are 3 different tests, every time sent using a random CPU.
+# - Test #1
+# * Only enable cpu_nr sysdata feature.
+# - Test #2
+# * Keep cpu_nr sysdata feature enable and enable userdata.
+# - Test #3
+# * keep userdata enabled, and disable sysdata cpu_nr feature.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Enable the sysdata cpu_nr feature
+function set_cpu_nr() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]]
+ then
+ echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Enable the taskname to be appended to sysdata
+function set_taskname() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]]
+ then
+ echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+# Enable the release to be appended to sysdata
+function set_release() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]]
+ then
+ echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+ then
+ echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Disable the sysdata cpu_nr feature
+function unset_cpu_nr() {
+ echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Once called, taskname=<..> will not be appended anymore
+function unset_taskname() {
+ echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+function unset_release() {
+ echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+function unset_msgid() {
+ echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Test if MSG contains sysdata
+function validate_sysdata() {
+ # OUTPUT_FILE will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # userdatakey=userdatavalue
+ # cpu=X
+ # taskname=<taskname>
+ # msgid=<id>
+
+ # Echo is what this test uses to create the message. See runtest()
+ # function
+ SENDER="echo"
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Check if cpu=XX exists in the file and matches the one used
+ # in taskset(1)
+ if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+ pkill_socat
+}
+
+function validate_release() {
+ RELEASE=$(uname -r)
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=`
+# strings
+function validate_no_sysdata() {
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "cpu=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "taskname=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "release=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "msgid=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+}
+
+# Start socat, send the message and wait for the file to show up in the file
+# system
+function runtest {
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# This test also depends on taskset(1). Check for it before starting the test
+check_for_taskset
+
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+
+#====================================================
+# TEST #1
+# Send message from a random CPU
+#====================================================
+# Random CPU in the system
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_1"
+MSG="Test #1 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+set_cpu_nr
+# Enable taskname to be appended to sysdata
+set_taskname
+set_release
+set_msgid
+runtest
+# Make sure the message was received in the dst part
+# and exit
+validate_release
+validate_sysdata
+
+#====================================================
+# TEST #2
+# This test now adds userdata together with sysdata
+# ===================================================
+# Get a new random CPU
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_2"
+MSG="Test #2 from CPU${CPU}"
+set_user_data
+runtest
+validate_release
+validate_sysdata
+
+# ===================================================
+# TEST #3
+# Unset all sysdata, fail if any userdata is set
+# ===================================================
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_3"
+MSG="Test #3 from CPU${CPU}"
+unset_cpu_nr
+unset_taskname
+unset_release
+unset_msgid
+runtest
+# At this time, cpu= shouldn't be present in the msg
+validate_no_sysdata
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh
new file mode 100755
index 000000000000..2ce9ee3719d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_torture.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Repeatedly send kernel messages, toggles netconsole targets on and off,
+# creates and deletes targets in parallel, and toggles the source interface to
+# simulate stress conditions.
+#
+# This test aims to verify the robustness of netconsole under dynamic
+# configurations and concurrent operations.
+#
+# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make
+# sure no issues is reported.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Number of times the main loop run
+ITERATIONS=${1:-150}
+
+# Only test extended format
+FORMAT="extended"
+# And ipv6 only
+IP_VERSION="ipv6"
+
+# Create, enable and delete some targets.
+create_and_delete_random_target() {
+ COUNT=2
+ RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_)
+
+ if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \
+ [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then
+ echo "Function didn't finish yet, skipping it." >&2
+ return
+ fi
+
+ # enable COUNT targets
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+
+ # Basic population so the target can come up
+ _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}"
+ done
+
+ echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg
+ # disable them all
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+ if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]]
+ then
+ echo 0 > "${RND_TARGET_PATH}"/enabled
+ fi
+ rmdir "${RND_TARGET_PATH}"
+ done
+}
+
+# Disable and enable the target mid-air, while messages
+# are being transmitted.
+toggle_netcons_target() {
+ for i in $(seq 2)
+ do
+ if [ ! -d "${NETCONS_PATH}" ]
+ then
+ break
+ fi
+ echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ # Try to enable a bit harder, given it might fail to enable
+ # Write to `enabled` might fail depending on the lock, which is
+ # highly contentious here
+ for _ in $(seq 5)
+ do
+ echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ done
+ done
+}
+
+toggle_iface(){
+ ip link set "${SRCIF}" down
+ ip link set "${SRCIF}" up
+}
+
+# Start here
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network "${IP_VERSION}"
+# Create a dynamic target for netconsole
+create_dynamic_target "${FORMAT}"
+
+for i in $(seq "$ITERATIONS")
+do
+ for _ in $(seq 10)
+ do
+ echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg
+ done
+ wait
+
+ if (( i % 30 == 0 )); then
+ toggle_netcons_target &
+ fi
+
+ if (( i % 50 == 0 )); then
+ # create some targets, enable them, send msg and disable
+ # all in a parallel thread
+ create_and_delete_random_target &
+ fi
+
+ if (( i % 70 == 0 )); then
+ toggle_iface &
+ fi
+done
+wait
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..1a228c5430f5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS := \
+ devlink.sh \
+ devlink_in_netns.sh \
+ devlink_trap.sh \
+ ethtool-coalesce.sh \
+ ethtool-features.sh \
+ ethtool-fec.sh \
+ ethtool-pause.sh \
+ fib.sh \
+ fib_notifications.sh \
+ hw_stats_l3.sh \
+ macsec-offload.sh \
+ nexthop.sh \
+ peer.sh \
+ psample.sh \
+ tc-mq-visibility.sh \
+ udp_tunnel_nic.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+ ethtool-common.sh
+# end of TEST_FILES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config
new file mode 100644
index 000000000000..5117c78ddf0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/config
@@ -0,0 +1,11 @@
+CONFIG_DUMMY=y
+CONFIG_GENEVE=m
+CONFIG_IPV6=y
+CONFIG_MACSEC=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_SCH_MQPRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_PSAMPLE=y
+CONFIG_PTP_1588_CLOCK_MOCK=y
+CONFIG_VXLAN=m
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 40909c254365..1b529ccaf050 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -3,51 +3,74 @@
lib_dir=$(dirname $0)/../../../net/forwarding
-ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+ALL_TESTS="fw_flash_test params_test \
+ params_default_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
- empty_reporter_test dummy_reporter_test"
+ empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
BUS_ADDR=10
PORT_COUNT=4
+VF_COUNT=4
DEV_NAME=netdevsim$BUS_ADDR
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
DL_HANDLE=netdevsim/$DEV_NAME
+wait_for_devlink()
+{
+ "$@" | grep -q $DL_HANDLE
+}
+
+devlink_wait()
+{
+ local timeout=$1
+
+ busywait "$timeout" wait_for_devlink devlink dev
+}
+
fw_flash_test()
{
+ DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1)
RET=0
- devlink dev flash $DL_HANDLE file dummy
+ if [ -z "$DUMMYFILE" ]
+ then
+ echo "SKIP: unable to find suitable dummy firmware file"
+ return
+ fi
+
+ echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
- devlink dev flash $DL_HANDLE file dummy component fw.mgmt
+ devlink dev flash $DL_HANDLE file $DUMMYFILE component fw.mgmt
check_err $? "Failed to flash with component attribute"
- devlink dev flash $DL_HANDLE file dummy overwrite settings
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
check_fail $? "Flash with overwrite settings should be rejected"
echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
check_err $? "Failed to change allowed overwrite mask"
- devlink dev flash $DL_HANDLE file dummy overwrite settings
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
check_err $? "Failed to flash with settings overwrite enabled"
- devlink dev flash $DL_HANDLE file dummy overwrite identifiers
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers
check_fail $? "Flash with overwrite settings should be identifiers"
echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
check_err $? "Failed to change allowed overwrite mask"
- devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers overwrite settings
check_err $? "Failed to flash with settings and identifiers overwrite enabled"
echo "n"> $DEBUGFS_DIR/fw_update_status
check_err $? "Failed to disable status updates"
- devlink dev flash $DL_HANDLE file dummy
+ devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates off"
log_test "fw flash test"
@@ -56,17 +79,28 @@ fw_flash_test()
param_get()
{
local name=$1
+ local attr=${2:-value}
+ local cmode=${3:-driverinit}
cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
- '.[][][].values[] | select(.cmode == "driverinit").value'
+ '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr"
}
param_set()
{
local name=$1
local value=$2
+ local cmode=${3:-driverinit}
- devlink dev param set $DL_HANDLE name $name cmode driverinit value $value
+ devlink dev param set $DL_HANDLE name $name cmode $cmode value $value
+}
+
+param_set_default()
+{
+ local name=$1
+ local cmode=${2:-driverinit}
+
+ devlink dev param set $DL_HANDLE name $name default cmode $cmode
}
check_value()
@@ -75,12 +109,18 @@ check_value()
local phase_name=$2
local expected_param_value=$3
local expected_debugfs_value=$4
+ local cmode=${5:-driverinit}
local value
+ local attr="value"
- value=$(param_get $name)
- check_err $? "Failed to get $name param value"
+ if [[ "$phase_name" == *"default"* ]]; then
+ attr="default"
+ fi
+
+ value=$(param_get $name $attr $cmode)
+ check_err $? "Failed to get $name param $attr"
[ "$value" == "$expected_param_value" ]
- check_err $? "Unexpected $phase_name $name param value"
+ check_err $? "Unexpected $phase_name $name param $attr"
value=$(<$DEBUGFS_DIR/$name)
check_err $? "Failed to get $name debugfs value"
[ "$value" == "$expected_debugfs_value" ]
@@ -113,6 +153,92 @@ params_test()
log_test "params test"
}
+value_to_debugfs()
+{
+ local value=$1
+
+ case "$value" in
+ true)
+ echo "Y"
+ ;;
+ false)
+ echo "N"
+ ;;
+ *)
+ echo "$value"
+ ;;
+ esac
+}
+
+test_default()
+{
+ local param_name=$1
+ local new_value=$2
+ local expected_default=$3
+ local cmode=${4:-driverinit}
+ local default_debugfs
+ local new_debugfs
+ local expected_debugfs
+
+ default_debugfs=$(value_to_debugfs $expected_default)
+ new_debugfs=$(value_to_debugfs $new_value)
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name initial-default $expected_default $expected_debugfs $cmode
+
+ param_set $param_name $new_value $cmode
+ check_err $? "Failed to set $param_name to $new_value"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs")
+ check_value $param_name post-set $new_value $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$new_debugfs
+ check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode
+
+ param_set_default $param_name $cmode
+ check_err $? "Failed to set $param_name to default"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs")
+ check_value $param_name post-set-default $expected_default $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode
+}
+
+params_default_test()
+{
+ RET=0
+
+ if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then
+ echo "SKIP: devlink cli missing default feature"
+ return
+ fi
+
+ # Remove side effects of previous tests. Use plain param_set, because
+ # param_set_default is a feature under test here.
+ param_set max_macs 32 driverinit
+ check_err $? "Failed to reset max_macs to default value"
+ param_set test1 true driverinit
+ check_err $? "Failed to reset test1 to default value"
+ param_set test2 1234 runtime
+ check_err $? "Failed to reset test2 to default value"
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device for clean state"
+
+ test_default max_macs 16 32 driverinit
+ test_default test1 false true driverinit
+ test_default test2 100 1234 runtime
+
+ log_test "params default test"
+}
+
check_region_size()
{
local name=$1
@@ -255,6 +381,9 @@ netns_reload_test()
ip netns del testns2
ip netns del testns1
+ # Wait until netns async cleanup is done.
+ devlink_wait 2000
+
log_test "netns reload test"
}
@@ -347,6 +476,9 @@ resource_test()
ip netns del testns2
ip netns del testns1
+ # Wait until netns async cleanup is done.
+ devlink_wait 2000
+
log_test "resource test"
}
@@ -495,8 +627,8 @@ dummy_reporter_test()
check_reporter_info dummy healthy 3 3 10 true
- echo 8192> $DEBUGFS_DIR/health/binary_len
- check_fail $? "Failed set dummy reporter binary len to 8192"
+ echo 8192 > $DEBUGFS_DIR/health/binary_len
+ check_err $? "Failed set dummy reporter binary len to 8192"
local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
check_err $? "Failed show dump of dummy reporter"
@@ -507,6 +639,223 @@ dummy_reporter_test()
log_test "dummy reporter test"
}
+rate_leafs_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+ local handle=$1
+ local name=$2
+ local value=$3
+ local units=$4
+
+ devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+ local handle=$1
+ local name=$2
+
+ cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+ local handle=$1
+ local name=$2
+ local rate=$3
+ local debug_file=$4
+
+ rate_attr_set $handle $name $rate mbit
+ check_err $? "Failed to set $name value"
+
+ local debug_value=$(cat $debug_file)
+ check_err $? "Failed to read $name value from debugfs"
+ [ "$debug_value" == "$rate" ]
+ check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+ local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+ check_err $? "Failed to get $name attr value"
+ [ "$api_value" == "$rate" ]
+ check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+ local handle=$1
+ local parent=$2
+ local debug_file=$3
+
+ rate_attr_set $handle parent $parent
+ check_err $? "Failed to set parent"
+
+ debug_value=$(cat $debug_file)
+ check_err $? "Failed to get parent debugfs value"
+ [ "$debug_value" == "$parent" ]
+ check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+ api_value=$(rate_attr_get $r_obj parent)
+ check_err $? "Failed to get parent attr value"
+ [ "$api_value" == "$parent" ]
+ check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_attr_tc_bw_check()
+{
+ local handle=$1
+ local tc_bw=$2
+ local debug_file=$3
+
+ local tc_bw_str=""
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ tc_bw_str="$tc_bw_str $tc:$value"
+ done
+ tc_bw_str=${tc_bw_str# }
+
+ rate_attr_set "$handle" tc-bw "$tc_bw_str"
+ check_err $? "Failed to set tc-bw values"
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ local debug_value
+ debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+ check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+ [ "$debug_value" == "$value" ]
+ check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+ done
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local expected_value=${bw##*:}
+ local api_value
+ api_value=$(rate_attr_get "$handle" tc_"$tc")
+ if [ "$api_value" = "null" ]; then
+ api_value=0
+ fi
+ [ "$api_value" == "$expected_value" ]
+ check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+ done
+}
+
+rate_node_add()
+{
+ local handle=$1
+
+ devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+ local handle=$1
+
+ devlink port function rate del $handle
+}
+
+rate_test()
+{
+ RET=0
+
+ echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+ devlink dev eswitch set $DL_HANDLE mode switchdev
+ local leafs=`rate_leafs_get $DL_HANDLE`
+ local num_leafs=`echo $leafs | wc -w`
+ [ "$num_leafs" == "$VF_COUNT" ]
+ check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+ rate=10
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_share $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+ rate=$(($rate+10))
+ done
+
+ rate=100
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_max $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+ rate=$(($rate+100))
+ done
+
+ local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+ for r_obj in $leafs
+ do
+ rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+ "$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+ done
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 1 ]
+ check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+ local node_tx_share=10
+ rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+ local node_tx_max=100
+ rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+
+ local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+ rate_attr_tc_bw_check $node1 "$tc_bw" \
+ "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 0 ]
+ check_err $? "Expected 0 rate node but got $num_nodes"
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ rate_attr_parent_check $r_obj $node1_name \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+ local node2_name='group2'
+ local node2="$DL_HANDLE/$node2_name"
+ rate_node_add "$node2"
+ check_err $? "Failed to add node $node2"
+
+ rate_attr_parent_check $node2 $node1_name \
+ $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+ rate_node_del "$node2"
+ check_err $? "Failed to delete node $node2"
+ rate_attr_set "$r_obj" noparent
+ check_err $? "Failed to unset $r_obj parent node"
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+
+ log_test "rate test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..b64d98ca0df7 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
require_command udevadm
@@ -45,6 +47,17 @@ if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
exit 1
fi
+check_netdev_down()
+{
+ state=$(cat /sys/class/net/${NETDEV}/flags)
+
+ if [ $((state & 1)) -ne 0 ]; then
+ echo "WARNING: unexpected interface UP, disable NetworkManager?"
+
+ ip link set dev $NETDEV down
+ fi
+}
+
init_test()
{
RET=0
@@ -149,6 +162,7 @@ trap_stats_test()
RET=0
+ check_netdev_down
for trap_name in $(devlink_traps_get); do
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when netdev down"
@@ -163,6 +177,16 @@ trap_stats_test()
devlink_trap_action_set $trap_name "drop"
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+ devlink_trap_drop_stats_idle_test $trap_name
+ check_fail $? "Drop stats of trap $trap_name idle when should not"
else
devlink_trap_stats_idle_test $trap_name
check_fail $? "Stats of non-drop trap $trap_name idle when should not"
@@ -242,6 +266,7 @@ trap_group_stats_test()
RET=0
+ check_netdev_down
for group_name in $(devlink_trap_groups_get); do
devlink_trap_group_stats_idle_test $group_name
check_err $? "Stats of trap group $group_name not idle when netdev down"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index 9f64d5c7107b..80160579e0cc 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -24,8 +24,11 @@ function check {
local code=$1
local str=$2
local exp_str=$3
+ local exp_fail=$4
- if [ $code -ne 0 ]; then
+ [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+ if [ $code $cop 0 ]; then
((num_errors++))
return
fi
@@ -47,7 +50,8 @@ function make_netdev {
modprobe netdevsim
fi
- echo $NSIM_ID > /sys/bus/netdevsim/new_device
+ echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device
+ udevadm settle
# get new device name
ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh
new file mode 100644
index 000000000000..bc210dc6ad2d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+FEATS="
+ tx-checksum-ip-generic
+ tx-scatter-gather
+ tx-tcp-segmentation
+ generic-segmentation-offload
+ generic-receive-offload"
+
+for feat in $FEATS ; do
+ s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".active" 2>/dev/null)
+ check $? "$s" true
+
+ s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".fixed" 2>/dev/null)
+ check $? "$s" false
+done
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..6c52ce1b0450
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...")
+# in ethtool CLI the Configured lines start with Supported/Configured.
+configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1)
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test multiple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"Off"'
+
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"
+"RS"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
deleted file mode 100755
index c969559ffa7a..000000000000
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-
-source ethtool-common.sh
-
-function get_value {
- local query="${SETTINGS_MAP[$1]}"
-
- echo $(ethtool -g $NSIM_NETDEV | \
- tail -n +$CURR_SETT_LINE | \
- awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
-}
-
-function update_current_settings {
- for key in ${!SETTINGS_MAP[@]}; do
- CURRENT_SETTINGS[$key]=$(get_value $key)
- done
- echo ${CURRENT_SETTINGS[@]}
-}
-
-if ! ethtool -h | grep -q set-ring >/dev/null; then
- echo "SKIP: No --set-ring support in ethtool"
- exit 4
-fi
-
-NSIM_NETDEV=$(make_netdev)
-
-set -o pipefail
-
-declare -A SETTINGS_MAP=(
- ["rx"]="RX"
- ["rx-mini"]="RX Mini"
- ["rx-jumbo"]="RX Jumbo"
- ["tx"]="TX"
-)
-
-declare -A EXPECTED_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-declare -A CURRENT_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-MAX_VALUE=$((RANDOM % $((2**32-1))))
-RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
-
-for ring_max_entry in $RING_MAX_LIST; do
- echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
-done
-
-CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
-
-# populate the expected settings map
-for key in ${!SETTINGS_MAP[@]}; do
- EXPECTED_SETTINGS[$key]=$(get_value $key)
-done
-
-# test
-for key in ${!SETTINGS_MAP[@]}; do
- value=$((RANDOM % $MAX_VALUE))
-
- ethtool -G $NSIM_NETDEV "$key" "$value"
-
- EXPECTED_SETTINGS[$key]="$value"
- expected=${EXPECTED_SETTINGS[@]}
- current=$(update_current_settings)
-
- check $? "$current" "$expected"
- set +x
-done
-
-if [ $num_errors -eq 0 ]; then
- echo "PASSED all $((num_passes)) checks"
- exit 0
-else
- echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
- exit 1
-fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 251f228ce63e..6800de816e8b 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -16,6 +16,7 @@ ALL_TESTS="
ipv4_replay
ipv4_flush
ipv4_error_path
+ ipv4_delete_fail
ipv6_add
ipv6_metric
ipv6_append_single
@@ -29,17 +30,21 @@ ALL_TESTS="
ipv6_replay_single
ipv6_replay_multipath
ipv6_error_path
+ ipv6_delete_fail
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
@@ -155,6 +160,27 @@ ipv4_error_path()
ipv4_error_path_replay
}
+ipv4_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 192.0.2.0/24 dev dummy1
+ ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv4 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
ipv6_add()
{
fib_ipv6_add_test "testns1"
@@ -302,6 +328,27 @@ ipv6_error_path()
ipv6_error_path_replay
}
+ipv6_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 2001:db8:1::/64 dev dummy1
+ ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv6 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
fib_notify_on_flag_change_set()
{
local notify=$1; shift
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
index 8d91191a098c..9896580c3d85 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
@@ -94,7 +94,7 @@ route_addition_check()
sleep 1
$IP route add $route dev dummy1
sleep 1
- kill %% && wait %% &> /dev/null
+ kill_process %%
route_notify_check $outfile $expected_num_notifications $offload_failed
rm -f $outfile
@@ -148,7 +148,7 @@ route_deletion_check()
sleep 1
$IP route del $route dev dummy1
sleep 1
- kill %% && wait %% &> /dev/null
+ kill_process %%
route_notify_check $outfile $expected_num_notifications
rm -f $outfile
@@ -191,7 +191,7 @@ route_replacement_check()
sleep 1
$IP route replace $route dev dummy2
sleep 1
- kill %% && wait %% &> /dev/null
+ kill_process %%
route_notify_check $outfile $expected_num_notifications
rm -f $outfile
diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
new file mode 100755
index 000000000000..cba5ac08426b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ l3_reporting_test
+ l3_fail_next_test
+ l3_counter_test
+ l3_rollback_test
+ l3_monitor_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR_1=1337
+DEV_ADDR_2=1057
+DEV_ADDR_3=5417
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DUMMY_IFINDEX=
+
+DEV_ADDR()
+{
+ local n=$1; shift
+ local var=DEV_ADDR_$n
+
+ echo ${!var}
+}
+
+DEV()
+{
+ echo netdevsim$(DEV_ADDR $1)
+}
+
+DEVLINK_DEV()
+{
+ echo netdevsim/$(DEV $1)
+}
+
+SYSFS_NET_DIR()
+{
+ echo /sys/bus/netdevsim/devices/$(DEV $1)/net/
+}
+
+DEBUGFS_DIR()
+{
+ echo /sys/kernel/debug/netdevsim/$(DEV $1)/
+}
+
+nsim_add()
+{
+ local n=$1; shift
+
+ echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done
+}
+
+nsim_reload()
+{
+ local n=$1; shift
+ local ns=$1; shift
+
+ devlink dev reload $(DEVLINK_DEV $n) netns $ns
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload $(DEV $n) into netns \"testns1\""
+ exit 1
+ fi
+
+}
+
+nsim_del()
+{
+ local n=$1; shift
+
+ echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device
+}
+
+nsim_hwstats_toggle()
+{
+ local action=$1; shift
+ local instance=$1; shift
+ local netdev=$1; shift
+ local type=$1; shift
+
+ local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex')
+
+ echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action
+}
+
+nsim_hwstats_enable()
+{
+ nsim_hwstats_toggle enable_ifindex "$@"
+}
+
+nsim_hwstats_disable()
+{
+ nsim_hwstats_toggle disable_ifindex "$@"
+}
+
+nsim_hwstats_fail_next_enable()
+{
+ nsim_hwstats_toggle fail_next_enable "$@"
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+ nsim_add 1
+ nsim_add 2
+ nsim_add 3
+
+ ip netns add testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ nsim_reload 1 testns1
+ nsim_reload 2 testns1
+ nsim_reload 3 testns1
+
+ IP="ip -n testns1"
+
+ $IP link add name dummy1 type dummy
+ $IP link set dev dummy1 up
+ DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex')
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ $IP link del name dummy1
+ ip netns del testns1
+ nsim_del 3
+ nsim_del 2
+ nsim_del 1
+ modprobe -r netdevsim &> /dev/null
+}
+
+netdev_hwstats_used()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used'
+}
+
+netdev_check_used()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_used $netdev $type) == "true" ]]
+}
+
+netdev_check_unused()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_used $netdev $type) == "false" ]]
+}
+
+netdev_hwstats_request()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+ jq ".[].info.${type}_stats.request"
+}
+
+netdev_check_requested()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_request $netdev $type) == "true" ]]
+}
+
+netdev_check_unrequested()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_request $netdev $type) == "false" ]]
+}
+
+reporting_test()
+{
+ local type=$1; shift
+ local instance=1
+
+ RET=0
+
+ [[ -n $(netdev_hwstats_used dummy1 $type) ]]
+ check_err $? "$type stats not reported"
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before either device or netdevsim request"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before device request"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested before device request"
+
+ $IP stats set dev dummy1 ${type}_stats on
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+ netdev_check_requested dummy1 $type
+ check_err $? "$type stats reported as not requested after device request"
+
+ nsim_hwstats_disable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after netdevsim request withdrawn"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after netdevsim request reenabled"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after device request withdrawn"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after device request withdrawn"
+
+ nsim_hwstats_disable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after both requests withdrawn"
+
+ log_test "Reporting of $type stats usage"
+}
+
+l3_reporting_test()
+{
+ reporting_test l3
+}
+
+__fail_next_test()
+{
+ local instance=$1; shift
+ local type=$1; shift
+
+ RET=0
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before either device or netdevsim request"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ nsim_hwstats_fail_next_enable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before device request"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested before device request"
+
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after bounce"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after bounce"
+
+ $IP stats set dev dummy1 ${type}_stats on
+ check_err $? "$type stats request failed when it shouldn't have"
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+ netdev_check_requested dummy1 $type
+ check_err $? "$type stats reported as not requested after device request"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ nsim_hwstats_disable $instance dummy1 $type
+
+ log_test "Injected failure of $type stats enablement (netdevsim #$instance)"
+}
+
+fail_next_test()
+{
+ __fail_next_test 1 "$@"
+ __fail_next_test 2 "$@"
+ __fail_next_test 3 "$@"
+}
+
+l3_fail_next_test()
+{
+ fail_next_test l3
+}
+
+get_hwstat()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+ local selector=$1; shift
+
+ $IP -j stats show dev $netdev group offload subgroup ${type}_stats |
+ jq ".[0].stats64.${selector}"
+}
+
+counter_test()
+{
+ local type=$1; shift
+ local instance=1
+
+ RET=0
+
+ nsim_hwstats_enable $instance dummy1 $type
+ $IP stats set dev dummy1 ${type}_stats on
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+
+ # Netdevsim counts 10pps on ingress. We should see maybe a couple
+ # packets, unless things take a reeealy long time.
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after first enablement"
+
+ sleep 2.5
+
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts >= 20))
+ check_err $? "$type stats show < 20 packets after 2.5s passed"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after second enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ nsim_hwstats_fail_next_enable $instance dummy1 $type
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after post-fail enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ log_test "Counter values in $type stats"
+}
+
+l3_counter_test()
+{
+ counter_test l3
+}
+
+rollback_test()
+{
+ local type=$1; shift
+
+ RET=0
+
+ nsim_hwstats_enable 1 dummy1 l3
+ nsim_hwstats_enable 2 dummy1 l3
+ nsim_hwstats_enable 3 dummy1 l3
+
+ # The three netdevsim instances are registered in order of their number
+ # one after another. It is reasonable to expect that whatever
+ # notifications take place hit no. 2 in between hitting nos. 1 and 3,
+ # whatever the actual order. This allows us to test that a fail caused
+ # by no. 2 does not leave the system in a partial state, and rolls
+ # everything back.
+
+ nsim_hwstats_fail_next_enable 2 dummy1 l3
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after bounce"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after bounce"
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ check_err $? "$type stats request not upheld as it should have been"
+
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show $pkts packets after post-fail enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ nsim_hwstats_disable 3 dummy1 l3
+ nsim_hwstats_disable 2 dummy1 l3
+ nsim_hwstats_disable 1 dummy1 l3
+
+ log_test "Failure in $type stats enablement rolled back"
+}
+
+l3_rollback_test()
+{
+ rollback_test l3
+}
+
+l3_monitor_test()
+{
+ hw_stats_monitor_test dummy1 l3 \
+ "nsim_hwstats_enable 1 dummy1 l3" \
+ "nsim_hwstats_disable 1 dummy1 l3" \
+ "$IP"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh
new file mode 100755
index 000000000000..98033e6667d2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+MACSEC_NETDEV=macsec_nsim
+
+set -o pipefail
+
+if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then
+ echo "SKIP: netdevsim doesn't support MACsec offload"
+ exit 4
+fi
+
+if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then
+ echo "SKIP: couldn't create macsec device"
+ exit 4
+fi
+ip link del $MACSEC_NETDEV
+
+#
+# test macsec offload API
+#
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null
+check $? '' '' 1
+
+ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef"
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+ key 00 0123456789abcdef0123456789abcdef
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null
+check $? '' '' 1
+
+# can't disable macsec offload when SAs are configured
+ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null
+check $? '' '' 1
+
+ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null
+check $? '' '' 1
+
+# toggle macsec offload via rtnetlink
+ip link set "${MACSEC_NETDEV}2" type macsec offload off
+check $?
+
+ip link set "${MACSEC_NETDEV}2" type macsec offload mac
+check $?
+
+# toggle macsec offload via genetlink
+ip macsec offload "${MACSEC_NETDEV}2" off
+check $?
+
+ip macsec offload "${MACSEC_NETDEV}2" mac
+check $?
+
+for dev in ${MACSEC_NETDEV}{,2,3} ; do
+ ip link del $dev
+ check $?
+done
+
+
+#
+# test ethtool features when toggling offload
+#
+
+ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac
+TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)"
+
+ip link set $MACSEC_NETDEV type macsec offload off
+TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)"
+
+ip link set $MACSEC_NETDEV type macsec offload mac
+TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)"
+
+[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ]
+check $?
+
+ip link del $MACSEC_NETDEV
+
+ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec
+check $?
+
+TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)"
+[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ]
+check $?
+
+ip link set $MACSEC_NETDEV type macsec offload mac
+check $?
+
+TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)"
+[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ]
+check $?
+
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..01d0c044a5fc 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,25 +11,47 @@ ALL_TESTS="
nexthop_single_add_err_test
nexthop_group_add_test
nexthop_group_add_err_test
+ nexthop_res_group_add_test
+ nexthop_res_group_add_err_test
nexthop_group_replace_test
nexthop_group_replace_err_test
+ nexthop_res_group_replace_test
+ nexthop_res_group_replace_err_test
+ nexthop_res_group_idle_timer_test
+ nexthop_res_group_idle_timer_del_test
+ nexthop_res_group_increase_idle_timer_test
+ nexthop_res_group_decrease_idle_timer_test
+ nexthop_res_group_unbalanced_timer_test
+ nexthop_res_group_unbalanced_timer_del_test
+ nexthop_res_group_no_unbalanced_timer_test
+ nexthop_res_group_short_unbalanced_timer_test
+ nexthop_res_group_increase_unbalanced_timer_test
+ nexthop_res_group_decrease_unbalanced_timer_test
+ nexthop_res_group_force_migrate_busy_test
nexthop_single_replace_test
nexthop_single_replace_err_test
nexthop_single_in_group_replace_test
nexthop_single_in_group_replace_err_test
+ nexthop_single_in_res_group_replace_test
+ nexthop_single_in_res_group_replace_err_test
nexthop_single_in_group_delete_test
nexthop_single_in_group_delete_err_test
+ nexthop_single_in_res_group_delete_test
+ nexthop_single_in_res_group_delete_err_test
nexthop_replay_test
nexthop_replay_err_test
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
nexthop_check()
{
@@ -44,6 +66,28 @@ nexthop_check()
return 0
}
+nexthop_bucket_nhid_count_check()
+{
+ local group_id=$1; shift
+ local expected
+ local count
+ local nhid
+ local ret
+
+ while (($# > 0)); do
+ nhid=$1; shift
+ expected=$1; shift
+
+ count=$($IP nexthop bucket show id $group_id nhid $nhid |
+ grep "trap" | wc -l)
+ if ((expected != count)); then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
nexthop_resource_check()
{
local expected_occ=$1; shift
@@ -159,6 +203,71 @@ nexthop_group_add_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+ nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 7
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Resilient nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
nexthop_group_replace_test()
{
RET=0
@@ -206,6 +315,411 @@ nexthop_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ $IP nexthop replace id 10 group 1/2/3 type resilient
+ nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+ $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 3
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Resilient nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+ local count=$1; shift
+ local index
+
+ for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+ jq '.[].bucket.index' | head -n ${count:--0})
+ do
+ echo $group_id $index \
+ > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+ done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+
+ $IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 6
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 \
+ type resilient buckets 8 idle_timer 6
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 4
+
+ # Deletion prompts group replacement. Check that the bucket timers
+ # are kept.
+ $IP nexthop delete id 3
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to still be unbalanced"
+
+ sleep 4
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+ sleep 4
+
+ # 6 seconds, past the new timer, before the old timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer decrease"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,100/2,100/3,1 \
+ type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+ __nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in 1 2; do
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ # Check that NH delete does not reset unbalanced time.
+ sleep 4
+ $IP nexthop delete id 3
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "1: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "2: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in $(seq 3); do
+ sleep 60
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ log_test "Buckets never force-migrated without unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120 unbalanced_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 5
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced < idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+ __nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ $IP nexthop replace id 10 group 2 type resilient
+ nexthop_bucket_nhid_count_check 10 2 8
+ check_err $? "All buckets expected to have migrated"
+
+ log_test "Busy buckets force-migrated when NH removed"
+
+ $IP nexthop flush &> /dev/null
+}
+
nexthop_single_replace_test()
{
RET=0
@@ -299,6 +813,63 @@ nexthop_single_in_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_single_in_group_delete_test()
{
RET=0
@@ -346,6 +917,57 @@ nexthop_single_in_group_delete_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 2 4
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop del id 1
+
+ # We failed to replace the two nexthop buckets that were originally
+ # assigned to nhid 1.
+ nexthop_bucket_nhid_count_check 10 2 2 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 8
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_replay_test()
{
RET=0
@@ -431,6 +1053,6 @@ trap cleanup EXIT
setup_prepare
-tests_run
+xfail_on_slow tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..7f32b5600925
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+lib_dir=$(dirname $0)/../../../net
+source $lib_dir/lib.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..e689ff7a0b12
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ psample_enable_test
+ psample_group_num_test
+ psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+ rm -f $CAPTURE_FILE
+
+ timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to enable sampling when should not"
+
+ echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling enablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+ check_err 1 "Failed to capture sampled packets"
+ fi
+
+ echo 0 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to disable sampling when should not"
+
+ echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling disablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+ check_err 1 "Captured sampled packets when should not"
+ fi
+
+ log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+ RET=0
+
+ echo 1234 > $PSAMPLE_DIR/group_num
+ echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong group number"
+
+ # New group number should only be used after disable / enable.
+ echo 4321 > $PSAMPLE_DIR/group_num
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_fail $? "Group number changed while sampling is active"
+
+ echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_err $? "Group number did not change after restarting sampling"
+
+ log_test "psample group number"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+
+ echo 1234 > $PSAMPLE_DIR/in_ifindex
+ echo 4321 > $PSAMPLE_DIR/out_ifindex
+ psample_capture
+
+ grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong in-ifindex"
+
+ grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-ifindex"
+
+ echo 5 > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc 5" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-tc"
+
+ echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc when should not"
+
+ echo 1 > $PSAMPLE_DIR/out_tc
+ echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+ echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+ echo 10000 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with latency when should"
+
+ echo 0 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with latency when should not"
+
+ log_test "psample metadata"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ rm -f $CAPTURE_FILE
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings
new file mode 100644
index 000000000000..a62d2fa1275c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
new file mode 100755
index 000000000000..b411fe66510f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+set -o pipefail
+
+n_children() {
+ n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l)
+ echo $((n - 1))
+}
+
+tcq() {
+ tc qdisc $1 dev $NDEV ${@:2}
+}
+
+n_child_assert() {
+ n=$(n_children)
+ if [ $n -ne $1 ]; then
+ echo "ERROR ($root): ${@:2}, expected $1 have $n"
+ ((num_errors++))
+ else
+ ((num_passes++))
+ fi
+}
+
+
+for root in mq mqprio; do
+ NDEV=$(make_netdev 1 4)
+
+ opts=
+ [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0 queues 1@0'
+
+ tcq add root handle 100: $root $opts
+ n_child_assert 4 'Init'
+
+ # All defaults
+
+ for n in 3 2 1 2 3 4 1 4; do
+ ethtool -L $NDEV combined $n
+ n_child_assert $n "Change queues to $n while down"
+ done
+
+ ip link set dev $NDEV up
+
+ for n in 3 2 1 2 3 4 1 4; do
+ ethtool -L $NDEV combined $n
+ n_child_assert $n "Change queues to $n while up"
+ done
+
+ # One real one
+ tcq replace parent 100:4 handle 204: pfifo_fast
+ n_child_assert 4 "One real queue"
+
+ ethtool -L $NDEV combined 1
+ n_child_assert 2 "One real queue, one default"
+
+ ethtool -L $NDEV combined 4
+ n_child_assert 4 "One real queue, rest default"
+
+ # Remove real one
+ tcq del parent 100:4 handle 204:
+
+ # Replace default with pfifo
+ tcq replace parent 100:1 handle 205: pfifo limit 1000
+ n_child_assert 3 "Deleting real one, replacing default one with pfifo"
+
+ ethtool -L $NDEV combined 1
+ n_child_assert 1 "Grafted, one"
+
+ cleanup_nsim
+done
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 1b08e042cf94..4c859ecdad94 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -142,7 +142,7 @@ function pre_ethtool {
}
function check_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
local -n expected=$2
local last=$3
@@ -212,7 +212,7 @@ function check_tables {
}
function print_table {
- local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
read -a have < $path
tree $NSIM_DEV_DFS/
@@ -233,6 +233,7 @@ function print_tables {
function get_netdev_name {
local -n old=$1
+ udevadm settle
new=$(ls /sys/class/net)
for netdev in $new; do
@@ -265,10 +266,10 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="new NIC device created"
exp0=( 0 0 0 0 )
@@ -282,8 +283,8 @@ for port in 0 1; do
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
- ifconfig vxlan1 down
- ifconfig vxlan0 down
+ ip link set dev vxlan1 down
+ ip link set dev vxlan0 down
check_tables
msg="VxLAN v6 devices"
@@ -291,7 +292,7 @@ for port in 0 1; do
new_vxlan vxlanA 4789 $NSIM_NETDEV 6
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
new_vxlan vxlanB 4789 $NSIM_NETDEV 6
@@ -305,14 +306,14 @@ for port in 0 1; do
new_geneve gnv0 6081
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
fi
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
exp1=( `mke 6081 2` 0 0 0 )
check_tables
@@ -348,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
-echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
-new_vxlan vxlan0 10000 $NSIM_NETDEV
-
-modprobe -r vxlan
-modprobe -r udp_tunnel
-
-msg="remove tunnels"
-exp0=( 0 0 0 0 )
-check_tables
-
-msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
+exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -426,11 +415,11 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -483,11 +472,11 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "overflow NIC table"
overflow_table1 "overflow NIC table"
@@ -539,11 +528,11 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
overflow_table0 "destroy NIC"
overflow_table1 "destroy NIC"
@@ -568,11 +557,11 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
@@ -628,13 +617,13 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
- echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
msg="1 - create VxLANs v6"
exp0=( 0 0 0 0 )
@@ -656,7 +645,7 @@ for port in 0 1; do
new_geneve gnv0 20000
msg="2 - destroy GENEVE"
- echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
exp1=( `mke 20000 2` 0 0 0 )
del_dev gnv0
@@ -683,11 +672,11 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -742,11 +731,11 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
- ifconfig $NSIM_NETDEV up
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
msg="create VxLANs v6"
exp0=( `mke 10000 1` 0 0 0 )
@@ -755,22 +744,22 @@ for port in 0 1; do
msg="create VxLANs v4"
new_vxlan vxlan0 10000 $NSIM_NETDEV
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes down"
- ifconfig $NSIM_NETDEV down
+ ip link set dev $NSIM_NETDEV down
if [ $port -eq 1 ]; then
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
fi
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="NIC device goes up again"
- ifconfig $NSIM_NETDEV up
+ ip link set dev $NSIM_NETDEV up
exp0=( `mke 10000 1` 0 0 0 )
check_tables
@@ -780,7 +769,7 @@ for port in 0 1; do
del_dev vxlan0
check_tables
- echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="destroy NIC"
@@ -800,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
-echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
@@ -818,12 +806,12 @@ new_vxlan vxlan1 4789 $NSIM_NETDEV2
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
-ifconfig vxlan1 down
-ifconfig vxlan0 down
+ip link set dev vxlan1 down
+ip link set dev vxlan0 down
check_tables
for ifc in vxlan0 vxlan1; do
- ifconfig $ifc up
+ ip link set dev $ifc up
done
msg="VxLAN v6 device"
@@ -835,11 +823,11 @@ exp1=( `mke 6081 2` 0 0 0 )
new_geneve gnv0 6081
msg="NIC device goes down"
-ifconfig $NSIM_NETDEV down
+ip link set dev $NSIM_NETDEV down
check_tables
msg="NIC device goes up again"
-ifconfig $NSIM_NETDEV up
+ip link set dev $NSIM_NETDEV up
check_tables
for i in `seq 2`; do
@@ -876,6 +864,7 @@ msg="re-add a port"
echo 2 > $NSIM_DEV_SYS/del_port
echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
check_tables
msg="replace VxLAN in overflow table"
@@ -886,7 +875,7 @@ msg="vacate VxLAN in overflow table"
exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
del_dev vxlan2
-echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
check_tables
msg="tunnels destroyed 2"
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <leitao@debian.org>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+ bpftrace,
+ CmdExitFailure,
+ defer,
+ ethtool,
+ GenerateTraffic,
+ ksft_exit,
+ ksft_pr,
+ ksft_run,
+ KsftFailEx,
+ KsftSkipEx,
+ NetDrvEpEnv,
+ KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+ """
+ Read the ringsize using ethtool. This will be used to restore it after the test
+ """
+ try:
+ ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+ rxs = ethtool_result["rx"]
+ txs = ethtool_result["tx"]
+ except (KeyError, IndexError) as exception:
+ raise KsftSkipEx(
+ f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+ """Try to the number of RX and TX ringsize."""
+ rxs = ring_size[0]
+ txs = ring_size[1]
+
+ logging.debug("Setting ring size to %d/%d", rxs, txs)
+ try:
+ ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+ except CmdExitFailure:
+ # This might fail on real device, retry with a higher value,
+ # worst case, keep it as it is.
+ return False
+
+ return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+ """Read the number of RX, TX and combined queues using ethtool"""
+
+ try:
+ ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+ rxq = ethtool_result.get("rx", -1)
+ txq = ethtool_result.get("tx", -1)
+ combined = ethtool_result.get("combined", -1)
+
+ except IndexError as exception:
+ raise KsftSkipEx(
+ f"Failed to read queues numbers: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+ """Set the number of RX, TX and combined queues using ethtool"""
+ rxq, txq, combined = queues
+
+ cmdline = f"-L {interface_name}"
+
+ if rxq != -1:
+ cmdline += f" rx {rxq}"
+ if txq != -1:
+ cmdline += f" tx {txq}"
+ if combined != -1:
+ cmdline += f" combined {combined}"
+
+ logging.debug("calling: ethtool %s", cmdline)
+
+ try:
+ ethtool(cmdline)
+ except CmdExitFailure as exception:
+ raise KsftSkipEx(
+ f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+ ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+ """Generate a random target name starting with 'netcons'"""
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+ config_data: dict[str, str],
+ target_name: str,
+) -> None:
+ """Create a netconsole dynamic target against the interfaces"""
+ logging.debug("Using netconsole name: %s", target_name)
+ try:
+ os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+ logging.debug(
+ "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+ )
+ except OSError as exception:
+ if exception.errno != errno.EEXIST:
+ raise KsftFailEx(
+ f"Failed to create netconsole target directory: {exception}"
+ ) from exception
+
+ try:
+ for key, value in config_data.items():
+ path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+ logging.debug("Writing %s to %s", key, path)
+ with open(path, "w", encoding="utf-8") as file:
+ # Always convert to string to write to file
+ file.write(str(value))
+
+ # Read all configuration values for debugging purposes
+ for debug_key in config_data.keys():
+ with open(
+ f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+ "r",
+ encoding="utf-8",
+ ) as file:
+ content = file.read()
+ logging.debug(
+ "%s/%s/%s : %s",
+ NETCONSOLE_CONFIGFS_PATH,
+ target_name,
+ debug_key,
+ content.strip(),
+ )
+
+ except Exception as exception:
+ raise KsftFailEx(
+ f"Failed to configure netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_configure_target(
+ cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+ """Configure netconsole on the interface with the given target name"""
+ config_data = {
+ "extended": "1",
+ "dev_name": interface_name,
+ "local_port": NETCONS_LOCAL_PORT,
+ "remote_port": NETCONS_REMOTE_PORT,
+ "local_ip": cfg.addr,
+ "remote_ip": cfg.remote_addr,
+ "remote_mac": "00:00:00:00:00:00", # Not important for this test
+ "enabled": "1",
+ }
+
+ netcons_create_target(config_data, target_name)
+ logging.debug(
+ "Created netconsole target: %s on interface %s", target_name, interface_name
+ )
+
+
+def netcons_delete_target(name: str) -> None:
+ """Delete a netconsole dynamic target"""
+ target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+ try:
+ if os.path.exists(target_path):
+ os.rmdir(target_path)
+ except OSError as exception:
+ raise KsftFailEx(
+ f"Failed to delete netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_load_module() -> None:
+ """Try to load the netconsole module"""
+ os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+ """Call bpftrace to find how many times netpoll_poll_dev() is called.
+ Output is saved in the global variable `maps`"""
+
+ # This is going to update the global variable, that will be seen by the
+ # main function
+ global MAPS # pylint: disable=W0603
+
+ # This will be passed to bpftrace as in bpftrace -e "expr"
+ expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+ MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+ logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+ """Start a thread to call `call_bpf` in a parallel thread"""
+ global BPF_THREAD # pylint: disable=W0603
+
+ BPF_THREAD = threading.Thread(target=bpftrace_call)
+ BPF_THREAD.start()
+ if not BPF_THREAD.is_alive():
+ raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+ """Stop the bpftrace thread"""
+ if BPF_THREAD:
+ BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+ """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+ if not BPF_THREAD:
+ raise KsftFailEx("BPFtrace didn't start")
+
+ if BPF_THREAD.is_alive():
+ if join:
+ # Wait for bpftrace to finish
+ BPF_THREAD.join()
+ else:
+ # bpftrace is still running, so, we will not check the result yet
+ return False
+
+ logging.debug("MAPS coming from bpftrace = %s", MAPS)
+ if "hits" not in MAPS.keys():
+ raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+ logging.debug("Got a total of %d hits", MAPS["hits"])
+ return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ # Start bpftrace in parallel, so, it is watching
+ # netpoll_poll_dev() while we are sending netconsole messages
+ bpftrace_start()
+ defer(bpftrace_stop)
+
+ do_netpoll_flush(cfg, ifname, target_name)
+
+ if bpftrace_any_hit(join=True):
+ ksft_pr("netpoll_poll_dev() was called. Success")
+ return
+
+ raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ netcons_configure_target(cfg, ifname, target_name)
+ retry = 0
+
+ for i in range(int(ITERATIONS)):
+ if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+ # bpftrace is done, stop sending messages
+ break
+
+ msg = f"netcons test #{i}"
+ with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+ for j in range(MAX_WRITES):
+ try:
+ kmsg.write(f"{msg}-{j}\n")
+ except OSError as exception:
+ # in some cases, kmsg can be busy, so, we will retry
+ time.sleep(1)
+ retry += 1
+ if retry < 5:
+ logging.info("Failed to write to kmsg. Retrying")
+ # Just retry a few times
+ continue
+ raise KsftFailEx(
+ f"Failed to write to kmsg: {exception}"
+ ) from exception
+
+ netcons_delete_target(target_name)
+ netcons_configure_target(cfg, ifname, target_name)
+ # If we sleep here, we will have a better chance of triggering
+ # This number is based on a few tests I ran while developing this test
+ time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+ """Configure ring size and queue numbers"""
+
+ # Set defined queues to 1 to force congestion
+ prev_queues = ethtool_get_queues_cnt(ifname)
+ logging.debug("RX/TX/combined queues: %s", prev_queues)
+ # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+ ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+ defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+ # Try to set the ring size to some low value.
+ # Do not fail if the hardware do not accepted desired values
+ prev_ring_size = ethtool_get_ringsize(ifname)
+ for size in [(1, 1), (128, 128), (256, 256)]:
+ if ethtool_set_ringsize(ifname, size):
+ # hardware accepted the desired ringsize
+ logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+ break
+ defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+ """
+ Test netpoll by sending traffic to the interface and then sending
+ netconsole messages to trigger a poll
+ """
+
+ ifname = cfg.ifname
+ configure_network(ifname)
+ target_name = netcons_generate_random_target_name()
+ traffic = None
+
+ try:
+ traffic = GenerateTraffic(cfg)
+ do_netpoll_flush_monitored(cfg, ifname, target_name)
+ finally:
+ if traffic:
+ traffic.stop()
+
+ # Revert RX/TX queues
+ netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+ """Check if the dependencies are met"""
+ if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+ raise KsftSkipEx(
+ f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
+ )
+
+
+def main() -> None:
+ """Main function to run the test"""
+ netcons_load_module()
+ test_check_dependencies()
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(
+ [test_netpoll],
+ args=(cfg,),
+ )
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h1 $vid
+ simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ ip link set $h1.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h2 $vid
+ simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ ip link set $h2.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+ h1_vlan_create 100
+ h2_vlan_create 100
+
+ tc qdisc add dev ${h1}.100 clsact
+ tc filter add dev ${h1}.100 egress protocol ipv4 \
+ flower ip_proto icmp action skbedit priority 3
+ tc filter add dev ${h1}.100 egress protocol ipv6 \
+ flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+ tc qdisc del dev ${h1}.100 clsact
+
+ h1_vlan_destroy 100
+ h2_vlan_destroy 100
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+ local dscp_cs=$1
+
+ # https://datatracker.ietf.org/doc/html/rfc2474
+ # 4.2.2.1 The Class Selector Codepoints
+ echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+ local test_name=$1; shift
+ local if_name=$1; shift
+ local tc=$1; shift
+ local tos=$1; shift
+ local counter_name="rx_green_prio_${tc}"
+ local ipv4_before
+ local ipv4_after
+ local ipv6_before
+ local ipv6_after
+
+ ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+ ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv4 ${test_name}"
+
+ ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+ ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig=$(port_default_prio_get ${swp1})
+ local dmac=$(mac_get ${h2})
+
+ dcb app replace dev ${swp1} default-prio 5
+
+ run_test "Port-default QoS classification" ${h1} 5 0
+
+ dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+ vlans_prepare
+
+ run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+ vlans_destroy
+}
+
+test_ip_dscp()
+{
+ local port_default=$(port_default_prio_get ${swp1})
+ local tos=$(dscp_cs_to_tos 4)
+
+ dcb app add dev ${swp1} dscp-prio CS4:4
+ run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+ dcb app del dev ${swp1} dscp-prio CS4:4
+
+ vlans_prepare
+ run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+ ${h1}.100 3 ${tos}
+ vlans_destroy
+
+ run_test "Untrusted DSCP QoS classification follows port default" \
+ ${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_vlan_pcp
+ test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..8972f42dfe03
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+ echo 30000
+}
+
+psfp_chain_create()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+
+ tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+ local expected=$1
+ local packets=""
+ local drops=""
+ local stats=""
+
+ stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+ packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+ drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+ if ! [ "${packets}" = "${expected}" ]; then
+ printf "Expected filter to match on %d packets but matched on %d instead\n" \
+ "${expected}" "${packets}"
+ fi
+
+ echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+
+ bridge vlan add dev ${swp2} vid ${STREAM_VID}
+ bridge vlan add dev ${swp1} vid ${STREAM_VID}
+ # PSFP on Ocelot requires the filter to also be added to the bridge
+ # FDB, and not be removed
+ bridge fdb add dev ${swp2} \
+ ${h2_mac_addr} vlan ${STREAM_VID} static master
+
+ psfp_chain_create ${swp1}
+
+ tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+ protocol 802.1Q flower skip_sw \
+ dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+ action gate base-time 0.000000000 \
+ sched-entry OPEN ${GATE_DURATION_NS} -1 -1 \
+ sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+ psfp_chain_destroy ${swp1}
+ ip link del br0
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev ${if_name} clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev ${if_name} egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev ${if_name} egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+ clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev ${if_name} root
+ tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup ${h1}
+
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start ${UDS_ADDRESS_SWP1}
+
+ # Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+ # synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+ # to swp1 (and both to CLOCK_REALTIME).
+ ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+ ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+ # Make sure there are no filter matches at the beginning of the test
+ psfp_filter_check 0
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ptp4l_stop ${swp1}
+ ptp4l_stop ${h1}
+ phc2sys_stop
+ isochron_recv_stop
+
+ txtime_cleanup ${h1}
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+ local isochron_dat=$1
+ local dropped_seqids
+ local seqid
+
+ echo "Packets incorrectly dropped:"
+
+ dropped_seqids=$(isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u RX hw %T\n" \
+ --printf-args "qR" | \
+ grep 'RX hw 0.000000000' | \
+ awk '{print $1}')
+
+ for seqid in ${dropped_seqids}; do
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --start ${seqid} --stop ${seqid} \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+ --printf-args "qST"
+ done
+}
+
+debug_incorrectly_received_packets()
+{
+ local isochron_dat=$1
+
+ echo "Packets incorrectly received:"
+
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+ --printf-args "qSTR" |
+ grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+ local base_time=$1
+ local expected=$2
+ local test_name=$3
+ local debug=$4
+ local isochron_dat="$(mktemp)"
+ local extra_args=""
+ local received
+
+ isochron_do \
+ "${h1}" \
+ "${h2}" \
+ "${UDS_ADDRESS_H1}" \
+ "" \
+ "${base_time}" \
+ "${CYCLE_TIME_NS}" \
+ "${SHIFT_TIME_NS}" \
+ "${GATE_DURATION_NS}" \
+ "${NUM_PKTS}" \
+ "${STREAM_VID}" \
+ "${STREAM_PRIO}" \
+ "" \
+ "${isochron_dat}"
+
+ received=$(isochron_report_num_received "${isochron_dat}")
+ if [ "${received}" = "${expected}" ]; then
+ RET=0
+ else
+ RET=1
+ echo "Expected isochron to receive ${expected} packets but received ${received}"
+ fi
+
+ log_test "${test_name}"
+
+ if [ "$RET" = "1" ]; then
+ ${debug} "${isochron_dat}"
+ fi
+
+ rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 ${NUM_PKTS} "In band" \
+ debug_incorrectly_dropped_packets
+
+ psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 0 "Out of band" \
+ debug_incorrectly_received_packets
+
+ psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_gate_in_band
+ test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..aff0a59f92d9 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,38 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
WAIT_TIME=1
NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
require_command tcpdump
-#
-# +---------------------------------------------+
-# | DUT ports Generator ports |
-# | +--------+ +--------+ +--------+ +--------+ |
-# | | | | | | | | | |
-# | | eth0 | | eth1 | | eth2 | | eth3 | |
-# | | | | | | | | | |
-# +-+--------+-+--------+-+--------+-+--------+-+
-# | | | |
-# | | | |
-# | +-----------+ |
-# | |
-# +--------------------------------+
-
-eth0=${NETIFS[p1]}
-eth1=${NETIFS[p2]}
-eth2=${NETIFS[p3]}
-eth3=${NETIFS[p4]}
-
-eth0_mac="de:ad:be:ef:00:00"
-eth1_mac="de:ad:be:ef:00:01"
-eth2_mac="de:ad:be:ef:00:02"
-eth3_mac="de:ad:be:ef:00:03"
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
# used by the kernel driver. The numbers are:
@@ -156,147 +138,200 @@ create_tcam_skeleton()
setup_prepare()
{
- create_tcam_skeleton $eth0
+ ip link set $swp1 up
+ ip link set $swp2 up
+ ip link set $h2 up
+ ip link set $h1 up
+
+ create_tcam_skeleton $swp1
ip link add br0 type bridge
- ip link set $eth0 master br0
- ip link set $eth1 master br0
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
ip link set br0 up
- ip link add link $eth3 name $eth3.100 type vlan id 100
- ip link set $eth3.100 up
+ ip link add link $h1 name $h1.100 type vlan id 100
+ ip link set $h1.100 up
- ip link add link $eth3 name $eth3.200 type vlan id 200
- ip link set $eth3.200 up
+ ip link add link $h1 name $h1.200 type vlan id 200
+ ip link set $h1.200 up
- tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+ tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \
protocol 802.1Q flower skip_sw vlan_id 100 \
action vlan pop \
action goto chain $(IS1 2)
- tc filter add dev $eth0 egress chain $(ES0) pref 1 \
- flower skip_sw indev $eth1 \
+ tc filter add dev $swp1 egress chain $(ES0) pref 1 \
+ flower skip_sw indev $swp2 \
action vlan push protocol 802.1Q id 100
- tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+ tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \
protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
action skbedit priority 7 \
action goto chain $(IS1 1)
- tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+ tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \
protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
- action police rate 50mbit burst 64k \
+ action police rate 50mbit burst 64k conform-exceed drop/pipe \
action goto chain $(IS2 1 0)
}
cleanup()
{
- ip link del $eth3.200
- ip link del $eth3.100
- tc qdisc del dev $eth0 clsact
+ ip link del $h1.200
+ ip link del $h1.100
+ tc qdisc del dev $swp1 clsact
ip link del br0
}
test_vlan_pop()
{
- printf "Testing VLAN pop.. "
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
- tcpdump_start $eth2
+ tcpdump_start $h2
# Work around Mausezahn VLAN builder bug
# (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
# an 8021q upper
- $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+ $MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
sleep 1
- tcpdump_stop
+ tcpdump_stop $h2
- if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
- echo "OK"
- else
- echo "FAIL"
- fi
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4"
+ check_err "$?" "untagged reception"
- tcpdump_cleanup
+ tcpdump_cleanup $h2
+
+ log_test "VLAN pop"
}
test_vlan_push()
{
- printf "Testing VLAN push.. "
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
- tcpdump_start $eth3.100
+ tcpdump_start $h1.100
- $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+ $MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip
sleep 1
- tcpdump_stop
+ tcpdump_stop $h1.100
- if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
- echo "OK"
- else
- echo "FAIL"
- fi
+ tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac"
+ check_err "$?" "tagged reception"
- tcpdump_cleanup
+ tcpdump_cleanup $h1.100
+
+ log_test "VLAN push"
}
-test_vlan_modify()
+test_vlan_ingress_modify()
{
- printf "Testing VLAN modification.. "
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
ip link set br0 type bridge vlan_filtering 1
- bridge vlan add dev $eth0 vid 200
- bridge vlan add dev $eth0 vid 300
- bridge vlan add dev $eth1 vid 300
+ bridge vlan add dev $swp1 vid 200
+ bridge vlan add dev $swp1 vid 300
+ bridge vlan add dev $swp2 vid 300
- tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
- protocol 802.1Q flower skip_sw vlan_id 200 \
+ tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \
action vlan modify id 300 \
action goto chain $(IS2 0 0)
- tcpdump_start $eth2
+ tcpdump_start $h2
- $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+ $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
sleep 1
- tcpdump_stop
+ tcpdump_stop $h2
- if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
- echo "OK"
- else
- echo "FAIL"
- fi
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+ check_err "$?" "tagged reception"
- tcpdump_cleanup
+ tcpdump_cleanup $h2
- tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+ tc filter del dev $swp1 ingress chain $(IS1 2) pref 3
- bridge vlan del dev $eth0 vid 200
- bridge vlan del dev $eth0 vid 300
- bridge vlan del dev $eth1 vid 300
+ bridge vlan del dev $swp1 vid 200
+ bridge vlan del dev $swp1 vid 300
+ bridge vlan del dev $swp2 vid 300
ip link set br0 type bridge vlan_filtering 0
+
+ log_test "Ingress VLAN modification"
+}
+
+test_vlan_egress_modify()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
+
+ tc qdisc add dev $swp2 clsact
+
+ ip link set br0 type bridge vlan_filtering 1
+ bridge vlan add dev $swp1 vid 200
+ bridge vlan add dev $swp2 vid 200
+
+ tc filter add dev $swp2 egress chain $(ES0) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
+ action vlan modify id 300 priority 7
+
+ tcpdump_start $h2
+
+ $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop $h2
+
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+ check_err "$?" "tagged reception"
+
+ tcpdump_cleanup $h2
+
+ tc filter del dev $swp2 egress chain $(ES0) pref 3
+ tc qdisc del dev $swp2 clsact
+
+ bridge vlan del dev $swp1 vid 200
+ bridge vlan del dev $swp2 vid 200
+ ip link set br0 type bridge vlan_filtering 0
+
+ log_test "Egress VLAN modification"
}
test_skbedit_priority()
{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
local num_pkts=100
- printf "Testing frame prioritization.. "
+ before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
- before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+ $MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2
- $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
-
- after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+ after=$(ethtool_stats_get $swp1 'rx_green_prio_7')
if [ $((after - before)) = $num_pkts ]; then
- echo "OK"
+ RET=0
else
- echo "FAIL"
+ RET=1
fi
+
+ log_test "Frame prioritization"
}
trap cleanup EXIT
@@ -304,7 +339,8 @@ trap cleanup EXIT
ALL_TESTS="
test_vlan_pop
test_vlan_push
- test_vlan_modify
+ test_vlan_ingress_modify
+ test_vlan_egress_modify
test_skbedit_priority
"
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
new file mode 100755
index 000000000000..da3623c5e8a9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import random, string, time
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
+
+no_sleep=False
+
+def _test_v4(cfg) -> None:
+ if not cfg.addr_v["4"]:
+ return
+
+ cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+
+def _test_v6(cfg) -> None:
+ if not cfg.addr_v["6"]:
+ return
+
+ cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
+ cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
+
+def _test_tcp(cfg) -> None:
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
+ with bkg(listen_cmd, exit_wait=True) as nc:
+ wait_port_listen(port)
+
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ shell=True, host=cfg.remote)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+def _schedule_checksum_reset(cfg, netnl) -> None:
+ features = ethtool(f"-k {cfg.ifname}", json=True)
+ setting = ""
+ for side in ["tx", "rx"]:
+ f = features[0][side + "-checksumming"]
+ if not f["fixed"]:
+ setting += " " + side
+ setting += " " + ("on" if f["requested"] or f["active"] else "off")
+ defer(ethtool, f" -K {cfg.ifname} " + setting)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+ try:
+ ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+ except:
+ return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+ defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+ ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if xdp_info['xdp']['mode'] != 1:
+ """
+ If the interface doesn't support native-mode, it falls back to generic mode.
+ The mode value 1 is native and 2 is generic.
+ So it raises an exception if mode is not 1(native mode).
+ """
+ raise KsftSkipEx('device does not support native-XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ except Exception as e:
+ raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+ except Exception as e:
+ raise KsftSkipEx('device does not support offloaded XDP')
+ defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+ global no_sleep
+
+ if cfg.remote_ifname == "":
+ raise KsftFailEx('Can not get remote interface')
+ local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+ no_sleep=True
+ if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+ no_sleep=True
+
+def set_interface_init(cfg) -> None:
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default_v4(cfg, netnl) -> None:
+ cfg.require_ipver("4")
+
+ _schedule_checksum_reset(cfg, netnl)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_tcp(cfg)
+
+def test_default_v6(cfg, netnl) -> None:
+ cfg.require_ipver("6")
+
+ _schedule_checksum_reset(cfg, netnl)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_generic_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_generic_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_native_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_native_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+ _set_xdp_offload_on(cfg)
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ get_interface_info(cfg)
+ set_interface_init(cfg)
+ ksft_run([test_default_v4,
+ test_default_v6,
+ test_xdp_generic_sb,
+ test_xdp_generic_mb,
+ test_xdp_native_sb,
+ test_xdp_native_mb,
+ test_xdp_offload],
+ args=(cfg, EthtoolFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
new file mode 100755
index 000000000000..06559ef49b9a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -0,0 +1,640 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Test suite for PSP capable drivers."""
+
+import errno
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import defer
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises
+from lib.py import ksft_not_none
+from lib.py import KsftSkipEx
+from lib.py import NetDrvEpEnv, PSPFamily, NlError
+from lib.py import bkg, rand_port, wait_port_listen
+
+
+def _get_outq(s):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one)
+ return struct.unpack("I", outq)[0]
+
+
+def _send_with_ack(cfg, msg):
+ cfg.comm_sock.send(msg)
+ response = cfg.comm_sock.recv(4)
+ if response != b'ack\0':
+ raise RuntimeError("Unexpected server response", response)
+
+
+def _remote_read_len(cfg):
+ cfg.comm_sock.send(b'read len\0')
+ return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+
+
+def _make_clr_conn(cfg, ipver=None):
+ _send_with_ack(cfg, b'conn clr\0')
+ remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+ s = socket.create_connection((remote_addr, cfg.comm_port), )
+ return s
+
+
+def _make_psp_conn(cfg, version=0, ipver=None):
+ _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version))
+ remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+ s = socket.create_connection((remote_addr, cfg.comm_port), )
+ return s
+
+
+def _close_conn(cfg, s):
+ _send_with_ack(cfg, b'data close\0')
+ s.close()
+
+
+def _close_psp_conn(cfg, s):
+ _close_conn(cfg, s)
+
+
+def _spi_xchg(s, rx):
+ s.send(struct.pack('I', rx['spi']) + rx['key'])
+ tx = s.recv(4 + len(rx['key']))
+ return {
+ 'spi': struct.unpack('I', tx[:4])[0],
+ 'key': tx[4:]
+ }
+
+
+def _send_careful(cfg, s, rounds):
+ data = b'0123456789' * 200
+ for i in range(rounds):
+ n = 0
+ for _ in range(10): # allow 10 retries
+ try:
+ n += s.send(data[n:], socket.MSG_DONTWAIT)
+ if n == len(data):
+ break
+ except BlockingIOError:
+ time.sleep(0.05)
+ else:
+ rlen = _remote_read_len(cfg)
+ outq = _get_outq(s)
+ report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}'
+ raise RuntimeError(report)
+
+ return len(data) * rounds
+
+
+def _check_data_rx(cfg, exp_len):
+ read_len = -1
+ for _ in range(30):
+ cfg.comm_sock.send(b'read len\0')
+ read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+ if read_len == exp_len:
+ break
+ time.sleep(0.01)
+ ksft_eq(read_len, exp_len)
+
+
+def _check_data_outq(s, exp_len, force_wait=False):
+ outq = 0
+ for _ in range(10):
+ outq = _get_outq(s)
+ if not force_wait and outq == exp_len:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, exp_len)
+
+
+def _get_stat(cfg, key):
+ return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key]
+
+#
+# Test case boiler plate
+#
+
+def _init_psp_dev(cfg):
+ if not hasattr(cfg, 'psp_dev_id'):
+ # Figure out which local device we are testing against
+ for dev in cfg.pspnl.dev_get({}, dump=True):
+ if dev['ifindex'] == cfg.ifindex:
+ cfg.psp_info = dev
+ cfg.psp_dev_id = cfg.psp_info['id']
+ break
+ else:
+ raise KsftSkipEx("No PSP devices found")
+
+ # Enable PSP if necessary
+ cap = cfg.psp_info['psp-versions-cap']
+ ena = cfg.psp_info['psp-versions-ena']
+ if cap != ena:
+ cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap})
+ defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id,
+ 'psp-versions-ena': ena })
+
+#
+# Test cases
+#
+
+def dev_list_devices(cfg):
+ """ Dump all devices """
+ _init_psp_dev(cfg)
+
+ devices = cfg.pspnl.dev_get({}, dump=True)
+
+ found = False
+ for dev in devices:
+ found |= dev['id'] == cfg.psp_dev_id
+ ksft_true(found)
+
+
+def dev_get_device(cfg):
+ """ Get the device we intend to use """
+ _init_psp_dev(cfg)
+
+ dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id})
+ ksft_eq(dev['id'], cfg.psp_dev_id)
+
+
+def dev_get_device_bad(cfg):
+ """ Test getting device which doesn't exist """
+ raised = False
+ try:
+ cfg.pspnl.dev_get({'id': 1234567})
+ except NlError as e:
+ ksft_eq(e.nl_msg.error, -errno.ENODEV)
+ raised = True
+ ksft_true(raised)
+
+
+def dev_rotate(cfg):
+ """ Test key rotation """
+ _init_psp_dev(cfg)
+
+ prev_rotations = _get_stat(cfg, 'key-rotations')
+
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+
+ cur_rotations = _get_stat(cfg, 'key-rotations')
+ ksft_eq(cur_rotations, prev_rotations + 2)
+
+
+def dev_rotate_spi(cfg):
+ """ Test key rotation and SPI check """
+ _init_psp_dev(cfg)
+
+ top_a = top_b = 0
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc_a = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ top_a = assoc_a['rx-key']['spi'] >> 31
+ s.close()
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+ assoc_b = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ top_b = assoc_b['rx-key']['spi'] >> 31
+ s.close()
+ ksft_ne(top_a, top_b)
+
+
+def assoc_basic(cfg):
+ """ Test creating associations """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ ksft_gt(assoc['rx-key']['spi'], 0)
+ ksft_eq(len(assoc['rx-key']['key']), 16)
+
+ assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ ksft_eq(len(assoc), 0)
+ s.close()
+
+
+def assoc_bad_dev(cfg):
+ """ Test creating associations with bad device ID """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV)
+
+
+def assoc_sk_only_conn(cfg):
+ """ Test creating associations based on socket """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ _close_conn(cfg, s)
+
+
+def assoc_sk_only_mismatch(cfg):
+ """ Test creating associations based on socket (dev mismatch) """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_mismatch_tx(cfg):
+ """ Test creating associations based on socket (dev mismatch) """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ with ksft_raises(NlError) as cm:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": assoc['rx-key'],
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_unconn(cfg):
+ """ Test creating associations based on socket (unconnected, should fail) """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_version_mismatch(cfg):
+ """ Test creating associations where Rx and Tx PSP versions do not match """
+ _init_psp_dev(cfg)
+
+ versions = list(cfg.psp_info['psp-versions-cap'])
+ if len(versions) < 2:
+ raise KsftSkipEx("Not enough PSP versions supported by the device for the test")
+
+ # Translate versions to integers
+ versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions]
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ rx = cfg.pspnl.rx_assoc({"version": versions[0],
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+
+ for version in versions[1:]:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": rx['rx-key'],
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_twice(cfg):
+ """ Test reusing Tx assoc for two sockets """
+ _init_psp_dev(cfg)
+
+ def rx_assoc_check(s):
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ ksft_gt(assoc['rx-key']['spi'], 0)
+ ksft_eq(len(assoc['rx-key']['key']), 16)
+
+ return assoc
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc = rx_assoc_check(s)
+ tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ ksft_eq(len(tx), 0)
+
+ # Use the same Tx assoc second time
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2:
+ rx_assoc_check(s2)
+ tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s2.fileno()})
+ ksft_eq(len(tx), 0)
+
+ s.close()
+
+
+def _data_basic_send(cfg, version, ipver):
+ """ Test basic data send """
+ _init_psp_dev(cfg)
+
+ # Version 0 is required by spec, don't let it skip
+ if version:
+ name = cfg.pspnl.consts["version"].entries_by_val[version].name
+ if name not in cfg.psp_info['psp-versions-cap']:
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": version,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
+ raise KsftSkipEx("PSP version not supported", name)
+
+ s = _make_psp_conn(cfg, version, ipver)
+
+ rx_assoc = cfg.pspnl.rx_assoc({"version": version,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _close_psp_conn(cfg, s)
+
+
+def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'):
+ # Make sure we accept the ACK for the SPI before we seal with the bad assoc
+ _check_data_outq(s, 0)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 20)
+ _check_data_outq(s, data_len, force_wait=True)
+ _check_data_rx(cfg, 0)
+ _close_psp_conn(cfg, s)
+
+
+def data_send_bad_key(cfg):
+ """ Test send data with bad key """
+ _init_psp_dev(cfg)
+
+ s = _make_psp_conn(cfg)
+
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+ tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:]
+ __bad_xfer_do(cfg, s, tx)
+
+
+def data_send_disconnect(cfg):
+ """ Test socket close after sending data """
+ _init_psp_dev(cfg)
+
+ with _make_psp_conn(cfg) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ tx = _spi_xchg(s, assoc['rx-key'])
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+
+ s.shutdown(socket.SHUT_RDWR)
+ s.close()
+
+
+def _data_mss_adjust(cfg, ipver):
+ _init_psp_dev(cfg)
+
+ # First figure out what the MSS would be without any adjustments
+ s = _make_clr_conn(cfg, ipver)
+ s.send(b"0123456789abcdef" * 1024)
+ _check_data_rx(cfg, 16 * 1024)
+ mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ _close_conn(cfg, s)
+
+ s = _make_psp_conn(cfg, 0, ipver)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, rxmss)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, txmss + 40)
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _check_data_outq(s, 0)
+
+ txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, txmss + 40)
+ finally:
+ _close_psp_conn(cfg, s)
+
+
+def data_stale_key(cfg):
+ """ Test send on a double-rotated key """
+ _init_psp_dev(cfg)
+
+ prev_stale = _get_stat(cfg, 'stale-events')
+ s = _make_psp_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _check_data_outq(s, 0)
+
+ cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+
+ cur_stale = _get_stat(cfg, 'stale-events')
+ ksft_gt(cur_stale, prev_stale)
+
+ s.send(b'0123456789' * 200)
+ _check_data_outq(s, 2000, force_wait=True)
+ finally:
+ _close_psp_conn(cfg, s)
+
+
+def __nsim_psp_rereg(cfg):
+ # The PSP dev ID will change, remember what was there before
+ before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+ cfg._ns.nsims[0].dfs_write('psp_rereg', '1')
+
+ after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+ new_devs = list(after - before)
+ ksft_eq(len(new_devs), 1)
+ cfg.psp_dev_id = list(after - before)[0]
+
+
+def removal_device_rx(cfg):
+ """ Test removing a netdev / PSD with active Rx assoc """
+
+ # We could technically devlink reload real devices, too
+ # but that kills the control socket. So test this on
+ # netdevsim only for now
+ cfg.require_nsim()
+
+ s = _make_clr_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_not_none(rx_assoc)
+
+ __nsim_psp_rereg(cfg)
+ finally:
+ _close_conn(cfg, s)
+
+
+def removal_device_bi(cfg):
+ """ Test removing a netdev / PSD with active Rx/Tx assoc """
+
+ # We could technically devlink reload real devices, too
+ # but that kills the control socket. So test this on
+ # netdevsim only for now
+ cfg.require_nsim()
+
+ s = _make_clr_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": rx_assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ __nsim_psp_rereg(cfg)
+ finally:
+ _close_conn(cfg, s)
+
+
+def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver):
+ """Build test cases for each combo of PSP version and IP version"""
+ def test_case(cfg):
+ cfg.require_ipver(ipver)
+ test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}"
+ test_func(cfg, psp_ver, ipver)
+ return test_case
+
+
+def ipver_test_builder(name, test_func, ipver):
+ """Build test cases for each IP version"""
+ def test_case(cfg):
+ cfg.require_ipver(ipver)
+ test_case.__name__ = f"{name}_ip{ipver}"
+ test_func(cfg, ipver)
+ return test_case
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.pspnl = PSPFamily()
+
+ # Set up responder and communication sock
+ responder = cfg.remote.deploy("psp_responder")
+
+ cfg.comm_port = rand_port()
+ srv = None
+ try:
+ with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote,
+ exit_wait=True) as srv:
+ wait_port_listen(cfg.comm_port, host=cfg.remote)
+
+ cfg.comm_sock = socket.create_connection((cfg.remote_addr,
+ cfg.comm_port),
+ timeout=1)
+
+ cases = [
+ psp_ip_ver_test_builder(
+ "data_basic_send", _data_basic_send, version, ipver
+ )
+ for version in range(0, 4)
+ for ipver in ("4", "6")
+ ]
+ cases += [
+ ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver)
+ for ipver in ("4", "6")
+ ]
+
+ ksft_run(cases=cases, globs=globals(),
+ case_pfx={"dev_", "data_", "assoc_", "removal_"},
+ args=(cfg, ))
+
+ cfg.comm_sock.send(b"exit\0")
+ cfg.comm_sock.close()
+ finally:
+ if srv and (srv.stdout or srv.stderr):
+ ksft_pr("")
+ ksft_pr(f"Responder logs ({srv.ret}):")
+ if srv and srv.stdout:
+ ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# "))
+ if srv and srv.stderr:
+ ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# "))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c
new file mode 100644
index 000000000000..f309e0d73cbf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp_responder.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <unistd.h>
+
+#include <ynl.h>
+
+#include "psp-user.h"
+
+#define dbg(msg...) \
+do { \
+ if (opts->verbose) \
+ fprintf(stderr, "DEBUG: " msg); \
+} while (0)
+
+static bool should_quit;
+
+struct opts {
+ int port;
+ int devid;
+ bool verbose;
+};
+
+enum accept_cfg {
+ ACCEPT_CFG_NONE = 0,
+ ACCEPT_CFG_CLEAR,
+ ACCEPT_CFG_PSP,
+};
+
+static struct {
+ unsigned char tx;
+ unsigned char rx;
+} psp_vers;
+
+static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock)
+{
+ struct psp_rx_assoc_rsp *rsp;
+ struct psp_rx_assoc_req *req;
+ struct psp_tx_assoc_rsp *tsp;
+ struct psp_tx_assoc_req *teq;
+ char info[300];
+ int key_len;
+ ssize_t sz;
+ __u32 spi;
+
+ dbg("create PSP connection\n");
+
+ // Rx assoc alloc
+ req = psp_rx_assoc_req_alloc();
+
+ psp_rx_assoc_req_set_sock_fd(req, data_sock);
+ psp_rx_assoc_req_set_version(req, psp_vers.rx);
+
+ rsp = psp_rx_assoc(ys, req);
+ psp_rx_assoc_req_free(req);
+
+ if (!rsp) {
+ perror("ERROR: failed to Rx assoc");
+ return -1;
+ }
+
+ // SPI exchange
+ key_len = rsp->rx_key._len.key;
+ memcpy(info, &rsp->rx_key.spi, sizeof(spi));
+ memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len);
+ sz = sizeof(spi) + key_len;
+
+ send(data_sock, info, sz, MSG_WAITALL);
+ psp_rx_assoc_rsp_free(rsp);
+
+ sz = recv(data_sock, info, sz, MSG_WAITALL);
+ if (sz < 0) {
+ perror("ERROR: failed to read PSP key from sock");
+ return -1;
+ }
+ memcpy(&spi, info, sizeof(spi));
+
+ // Setup Tx assoc
+ teq = psp_tx_assoc_req_alloc();
+
+ psp_tx_assoc_req_set_sock_fd(teq, data_sock);
+ psp_tx_assoc_req_set_version(teq, psp_vers.tx);
+ psp_tx_assoc_req_set_tx_key_spi(teq, spi);
+ psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len);
+
+ tsp = psp_tx_assoc(ys, teq);
+ psp_tx_assoc_req_free(teq);
+ if (!tsp) {
+ perror("ERROR: failed to Tx assoc");
+ return -1;
+ }
+ psp_tx_assoc_rsp_free(tsp);
+
+ return 0;
+}
+
+static void send_ack(int sock)
+{
+ send(sock, "ack", 4, MSG_WAITALL);
+}
+
+static void send_err(int sock)
+{
+ send(sock, "err", 4, MSG_WAITALL);
+}
+
+static void send_str(int sock, int value)
+{
+ char buf[128];
+ int ret;
+
+ ret = snprintf(buf, sizeof(buf), "%d", value);
+ send(sock, buf, ret + 1, MSG_WAITALL);
+}
+
+static void
+run_session(struct ynl_sock *ys, struct opts *opts,
+ int server_sock, int comm_sock)
+{
+ enum accept_cfg accept_cfg = ACCEPT_CFG_NONE;
+ struct pollfd pfds[3];
+ size_t data_read = 0;
+ int data_sock = -1;
+
+ while (true) {
+ bool race_close = false;
+ int nfds;
+
+ memset(pfds, 0, sizeof(pfds));
+
+ pfds[0].fd = server_sock;
+ pfds[0].events = POLLIN;
+
+ pfds[1].fd = comm_sock;
+ pfds[1].events = POLLIN;
+
+ nfds = 2;
+ if (data_sock >= 0) {
+ pfds[2].fd = data_sock;
+ pfds[2].events = POLLIN;
+ nfds++;
+ }
+
+ dbg(" ...\n");
+ if (poll(pfds, nfds, -1) < 0) {
+ perror("poll");
+ break;
+ }
+
+ /* data sock */
+ if (pfds[2].revents & POLLIN) {
+ char buf[8192];
+ ssize_t n;
+
+ n = recv(data_sock, buf, sizeof(buf), 0);
+ if (n <= 0) {
+ if (n < 0)
+ perror("data read");
+ close(data_sock);
+ data_sock = -1;
+ dbg("data sock closed\n");
+ } else {
+ data_read += n;
+ dbg("data read %zd\n", data_read);
+ }
+ }
+
+ /* comm sock */
+ if (pfds[1].revents & POLLIN) {
+ static char buf[4096];
+ static ssize_t off;
+ bool consumed;
+ ssize_t n;
+
+ n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0);
+ if (n <= 0) {
+ if (n < 0)
+ perror("comm read");
+ return;
+ }
+
+ off += n;
+ n = off;
+
+#define __consume(sz) \
+ ({ \
+ if (n == (sz)) { \
+ off = 0; \
+ } else { \
+ off -= (sz); \
+ memmove(buf, &buf[(sz)], off); \
+ } \
+ })
+
+#define cmd(_name) \
+ ({ \
+ ssize_t sz = sizeof(_name); \
+ bool match = n >= sz && !memcmp(buf, _name, sz); \
+ \
+ if (match) { \
+ dbg("command: " _name "\n"); \
+ __consume(sz); \
+ } \
+ consumed |= match; \
+ match; \
+ })
+
+ do {
+ consumed = false;
+
+ if (cmd("read len"))
+ send_str(comm_sock, data_read);
+
+ if (cmd("data echo")) {
+ if (data_sock >= 0)
+ send(data_sock, "echo", 5,
+ MSG_WAITALL);
+ else
+ fprintf(stderr, "WARN: echo but no data sock\n");
+ send_ack(comm_sock);
+ }
+ if (cmd("data close")) {
+ if (data_sock >= 0) {
+ close(data_sock);
+ data_sock = -1;
+ send_ack(comm_sock);
+ } else {
+ race_close = true;
+ }
+ }
+ if (cmd("conn psp")) {
+ if (accept_cfg != ACCEPT_CFG_NONE)
+ fprintf(stderr, "WARN: old conn config still set!\n");
+ accept_cfg = ACCEPT_CFG_PSP;
+ send_ack(comm_sock);
+ /* next two bytes are versions */
+ if (off >= 2) {
+ memcpy(&psp_vers, buf, 2);
+ __consume(2);
+ } else {
+ fprintf(stderr, "WARN: short conn psp command!\n");
+ }
+ }
+ if (cmd("conn clr")) {
+ if (accept_cfg != ACCEPT_CFG_NONE)
+ fprintf(stderr, "WARN: old conn config still set!\n");
+ accept_cfg = ACCEPT_CFG_CLEAR;
+ send_ack(comm_sock);
+ }
+ if (cmd("exit"))
+ should_quit = true;
+#undef cmd
+
+ if (!consumed) {
+ fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n",
+ off, buf);
+ }
+ } while (consumed && off);
+ }
+
+ /* server sock */
+ if (pfds[0].revents & POLLIN) {
+ if (data_sock >= 0) {
+ fprintf(stderr, "WARN: new data sock but old one still here\n");
+ close(data_sock);
+ data_sock = -1;
+ }
+ data_sock = accept(server_sock, NULL, NULL);
+ if (data_sock < 0) {
+ perror("accept");
+ continue;
+ }
+ data_read = 0;
+
+ if (accept_cfg == ACCEPT_CFG_CLEAR) {
+ dbg("new data sock: clear\n");
+ /* nothing to do */
+ } else if (accept_cfg == ACCEPT_CFG_PSP) {
+ dbg("new data sock: psp\n");
+ conn_setup_psp(ys, opts, data_sock);
+ } else {
+ fprintf(stderr, "WARN: new data sock but no config\n");
+ }
+ accept_cfg = ACCEPT_CFG_NONE;
+ }
+
+ if (race_close) {
+ if (data_sock >= 0) {
+ /* indeed, ordering problem, handle the close */
+ close(data_sock);
+ data_sock = -1;
+ send_ack(comm_sock);
+ } else {
+ fprintf(stderr, "WARN: close but no data sock\n");
+ send_err(comm_sock);
+ }
+ }
+ }
+ dbg("session ending\n");
+}
+
+static int spawn_server(struct opts *opts)
+{
+ struct sockaddr_in6 addr;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("can't open socket");
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_any;
+ addr.sin6_port = htons(opts->port);
+
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) {
+ perror("can't bind socket");
+ return -1;
+ }
+
+ if (listen(fd, 5)) {
+ perror("can't listen");
+ return -1;
+ }
+
+ return fd;
+}
+
+static int run_responder(struct ynl_sock *ys, struct opts *opts)
+{
+ int server_sock, comm;
+
+ server_sock = spawn_server(opts);
+ if (server_sock < 0)
+ return 4;
+
+ while (!should_quit) {
+ comm = accept(server_sock, NULL, NULL);
+ if (comm < 0) {
+ perror("accept failed");
+ } else {
+ run_session(ys, opts, server_sock, comm);
+ close(comm);
+ }
+ }
+
+ return 0;
+}
+
+static void usage(const char *name, const char *miss)
+{
+ if (miss)
+ fprintf(stderr, "Missing argument: %s\n", miss);
+
+ fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name);
+ exit(EXIT_FAILURE);
+}
+
+static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "vp:d:")) != -1) {
+ switch (opt) {
+ case 'v':
+ opts->verbose = 1;
+ break;
+ case 'p':
+ opts->port = atoi(optarg);
+ break;
+ case 'd':
+ opts->devid = atoi(optarg);
+ break;
+ default:
+ usage(argv[0], NULL);
+ }
+ }
+}
+
+static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions)
+{
+ struct psp_dev_set_req *sreq;
+ struct psp_dev_set_rsp *srsp;
+
+ fprintf(stderr, "Set PSP enable on device %d to 0x%x\n",
+ dev_id, versions);
+
+ sreq = psp_dev_set_req_alloc();
+
+ psp_dev_set_req_set_id(sreq, dev_id);
+ psp_dev_set_req_set_psp_versions_ena(sreq, versions);
+
+ srsp = psp_dev_set(ys, sreq);
+ psp_dev_set_req_free(sreq);
+ if (!srsp)
+ return 10;
+
+ psp_dev_set_rsp_free(srsp);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct psp_dev_get_list *dev_list;
+ bool devid_found = false;
+ __u32 ver_ena, ver_cap;
+ struct opts opts = {};
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int first_id = 0;
+ int ret;
+
+ parse_cmd_opts(argc, argv, &opts);
+ if (!opts.port)
+ usage(argv[0], "port"); // exits
+
+ ys = ynl_sock_create(&ynl_psp_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ dev_list = psp_dev_get_dump(ys);
+ if (ynl_dump_empty(dev_list)) {
+ if (ys->err.code)
+ goto err_close;
+ fprintf(stderr, "No PSP devices\n");
+ goto err_close_silent;
+ }
+
+ ynl_dump_foreach(dev_list, d) {
+ if (opts.devid) {
+ devid_found = true;
+ ver_ena = d->psp_versions_ena;
+ ver_cap = d->psp_versions_cap;
+ } else if (!first_id) {
+ first_id = d->id;
+ ver_ena = d->psp_versions_ena;
+ ver_cap = d->psp_versions_cap;
+ } else {
+ fprintf(stderr, "Multiple PSP devices found\n");
+ goto err_close_silent;
+ }
+ }
+ psp_dev_get_list_free(dev_list);
+
+ if (opts.devid && !devid_found) {
+ fprintf(stderr, "PSP device %d requested on cmdline, not found\n",
+ opts.devid);
+ goto err_close_silent;
+ } else if (!opts.devid) {
+ opts.devid = first_id;
+ }
+
+ if (ver_ena != ver_cap) {
+ ret = psp_dev_set_ena(ys, opts.devid, ver_cap);
+ if (ret)
+ goto err_close;
+ }
+
+ ret = run_responder(ys, &opts);
+
+ if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena))
+ fprintf(stderr, "WARN: failed to set the PSP versions back\n");
+
+ ynl_sock_destroy(ys);
+
+ return ret;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_close_silent:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
new file mode 100755
index 000000000000..236005290a33
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_disruptive, ksft_exit, ksft_run
+from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetdevFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import bkg, cmd, defer, ip
+import errno
+import glob
+import os
+import socket
+import struct
+
+def sys_get_queues(ifname, qtype='rx') -> int:
+ folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
+ return len(folders)
+
+
+def nl_get_queues(cfg, nl, qtype='rx'):
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if queues:
+ return len([q for q in queues if q['type'] == qtype])
+ return None
+
+
+def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+ ksft_wait=3):
+
+ rx = tx = False
+
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if not queues:
+ raise KsftSkipEx("Netlink reports no queues")
+
+ for q in queues:
+ if q['id'] == 0:
+ if q['type'] == 'rx':
+ rx = True
+ if q['type'] == 'tx':
+ tx = True
+
+ ksft_eq(q.get('xsk', None), {},
+ comment="xsk attr on queue we configured")
+ else:
+ ksft_not_in('xsk', q,
+ comment="xsk attr on queue we didn't configure")
+
+ ksft_eq(rx, True)
+ ksft_eq(tx, True)
+
+
+def get_queues(cfg, nl) -> None:
+ snl = NetdevFamily(recv_size=4096)
+
+ for qtype in ['rx', 'tx']:
+ queues = nl_get_queues(cfg, snl, qtype)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ expected = sys_get_queues(cfg.dev['ifname'], qtype)
+ ksft_eq(queues, expected)
+
+
+def addremove_queues(cfg, nl) -> None:
+ queues = nl_get_queues(cfg, nl)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ curr_queues = sys_get_queues(cfg.dev['ifname'])
+ if curr_queues == 1:
+ raise KsftSkipEx('cannot decrement queue: already at 1')
+
+ netnl = EthtoolFamily()
+ channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ rx_type = 'rx'
+ if channels.get('combined-count', 0) > 0:
+ rx_type = 'combined'
+
+ expected = curr_queues - 1
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+ expected = curr_queues
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+
+@ksft_disruptive
+def check_down(cfg, nl) -> None:
+ # Check the NAPI IDs before interface goes down and hides them
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+
+ ip(f"link set dev {cfg.dev['ifname']} down")
+ defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+ with ksft_raises(NlError) as cm:
+ nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'})
+ ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+ if napis:
+ with ksft_raises(NlError) as cm:
+ nl.napi_get({'id': napis[0]['id']})
+ ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=100) as cfg:
+ ksft_run([get_queues, addremove_queues, check_down, check_xsk],
+ args=(cfg, NetdevFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py
new file mode 100755
index 000000000000..f9530a8b0856
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ring_reconfig.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test channel and ring size configuration via ethtool (-L / -G).
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic
+from lib.py import defer, NlError
+
+
+def channels(cfg) -> None:
+ """
+ Twiddle channel counts in various combinations of parameters.
+ We're only looking for driver adhering to the requested config
+ if the config is accepted and crashes.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx", "tx", "combined"]
+ mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"},
+ {"rx", "tx", "combined"},]
+
+ # Get the set of keys that device actually supports
+ restore = {}
+ supported = set()
+ for key in all_keys:
+ if key + "-max" in chans:
+ supported.add(key)
+ restore |= {key + "-count": chans[key + "-count"]}
+
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+ def test_config(config):
+ try:
+ cfg.eth.channels_set(ehdr | config)
+ get = cfg.eth.channels_get(ehdr)
+ for k, v in config.items():
+ ksft_eq(get.get(k, 0), v)
+ except NlError as e:
+ failed.append(mix)
+ ksft_pr("Can't set", config, e)
+ else:
+ ksft_pr("Okay", config)
+
+ failed = []
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+
+ # Set all the values in the mix to 1, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = 1 if key in mix else 0
+ test_config(config)
+
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+ if mix in failed:
+ continue
+
+ # Set all the values in the mix to max, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = chans[key + '-max'] if key in mix else 0
+ test_config(config)
+
+
+def _configure_min_ring_cnt(cfg) -> None:
+ """ Try to configure a single Rx/Tx ring. """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx-count", "tx-count", "combined-count"]
+ restore = {}
+ config = {}
+ for key in all_keys:
+ if key in chans:
+ restore[key] = chans[key]
+ config[key] = 0
+
+ if chans.get('combined-count', 0) > 1:
+ config['combined-count'] = 1
+ elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1:
+ config['tx-count'] = 1
+ config['rx-count'] = 1
+ else:
+ # looks like we're already on 1 channel
+ return
+
+ cfg.eth.channels_set(ehdr | config)
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+
+def ringparam(cfg) -> None:
+ """
+ Tweak the ringparam configuration. Try to run some traffic over min
+ ring size to make sure it actually functions.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ rings = cfg.eth.rings_get(ehdr)
+
+ restore = {}
+ maxes = {}
+ params = set()
+ for key in rings.keys():
+ if 'max' in key:
+ param = key[:-4]
+ maxes[param] = rings[key]
+ params.add(param)
+ restore[param] = rings[param]
+
+ defer(cfg.eth.rings_set, ehdr | restore)
+
+ # Speed up the reconfig by configuring just one ring
+ _configure_min_ring_cnt(cfg)
+
+ # Try to reach min on all settings
+ for param in params:
+ val = rings[param]
+ while True:
+ try:
+ cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex},
+ param: val // 2})
+ if val == 0:
+ break
+ val //= 2
+ except NlError:
+ break
+
+ get = cfg.eth.rings_get(ehdr)
+ ksft_eq(get[param], val)
+
+ ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})")
+
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+ # Try max across all params, if the driver supports large rings
+ # this may OOM so we ignore errors
+ try:
+ ksft_pr("Applying max settings")
+ config = {p: maxes[p] for p in params}
+ cfg.eth.rings_set(ehdr | config)
+ except NlError as e:
+ ksft_pr("Can't set max params", config, e)
+ else:
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.eth = EthtoolFamily()
+
+ ksft_run([channels,
+ ringparam],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py
new file mode 100755
index 000000000000..11310f19bfa0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/shaper.py
@@ -0,0 +1,461 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx
+from lib.py import EthtoolFamily, NetshaperFamily
+from lib.py import NetDrvEnv
+from lib.py import NlError
+from lib.py import cmd
+
+def get_shapers(cfg, nl_shaper) -> None:
+ try:
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+
+ # Default configuration: no shapers configured.
+ ksft_eq(len(shapers), 0)
+
+def get_caps(cfg, nl_shaper) -> None:
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True)
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+
+ # Each device implementing shaper support must support some
+ # features in at least a scope.
+ ksft_true(len(caps)> 0)
+
+def set_qshapers(cfg, nl_shaper) -> None:
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'queue'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+ if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+ raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps")
+
+ cfg.queues = True;
+ netnl = EthtoolFamily()
+ channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ cfg.rx_type = 'rx'
+ cfg.nr_queues = channels['rx-count']
+ else:
+ cfg.rx_type = 'combined'
+ cfg.nr_queues = channels['combined-count']
+ if cfg.nr_queues < 3:
+ raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}")
+
+ nl_shaper.set({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+ nl_shaper.set({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 2},
+ 'metric': 'bps',
+ 'bw-max': 20000})
+
+ # Querying a specific shaper not yet configured must fail.
+ raised = False
+ try:
+ shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 0}})
+ except (NlError):
+ raised = True
+ ksft_eq(raised, True)
+
+ shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+ ksft_eq(shaper_q1, {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 10000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 2},
+ 'metric': 'bps',
+ 'bw-max': 20000}])
+
+def del_qshapers(cfg, nl_shaper) -> None:
+ if not cfg.queues:
+ raise KsftSkipEx("queue shapers not supported by device, skipping delete")
+
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 2}})
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(len(shapers), 0)
+
+def set_nshapers(cfg, nl_shaper) -> None:
+ # Check required features.
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'netdev'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+ if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+ raise KsftSkipEx("device does not support nested netdev scope shapers with weight")
+
+ cfg.netdev = True;
+ nl_shaper.set({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'netdev', 'id': 0},
+ 'bw-max': 100000})
+
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'netdev'},
+ 'metric': 'bps',
+ 'bw-max': 100000}])
+
+def del_nshapers(cfg, nl_shaper) -> None:
+ if not cfg.netdev:
+ raise KsftSkipEx("netdev shaper not supported by device, skipping delete")
+
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'netdev'}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(len(shapers), 0)
+
+def basic_groups(cfg, nl_shaper) -> None:
+ if not cfg.netdev:
+ raise KsftSkipEx("netdev shaper not supported by the device")
+ if cfg.nr_queues < 3:
+ raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}")
+
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'queue'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+ if not 'support-weight' in caps:
+ raise KsftSkipEx("device does not support queue scope shapers with weight")
+
+ node_handle = nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 1},
+ {'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2}],
+ 'handle': {'scope':'netdev'},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+ ksft_eq(node_handle, {'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'netdev'}})
+
+ shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+ ksft_eq(shaper, {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 1 })
+
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 2}})
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+
+ # Deleting all the leaves shaper does not affect the node one
+ # when the latter has 'netdev' scope.
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(len(shapers), 1)
+
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'netdev'}})
+
+def qgroups(cfg, nl_shaper) -> None:
+ if cfg.nr_queues < 4:
+ raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}")
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'node'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+ if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+ raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps")
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'queue'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("shapers not supported by the device")
+ raise
+ if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps:
+ raise KsftSkipEx("device does not support nested queue scope shapers with weight")
+
+ cfg.groups = True;
+ node_handle = nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3},
+ {'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2}],
+ 'handle': {'scope':'node'},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+ node_id = node_handle['handle']['id']
+
+ shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+ ksft_eq(shaper, {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3})
+ shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'node', 'id': node_id}})
+ ksft_eq(shaper, {'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'node', 'id': node_id},
+ 'parent': {'scope': 'netdev'},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+
+ # Grouping to a specified, not existing node scope shaper must fail
+ raised = False
+ try:
+ nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 3}],
+ 'handle': {'scope':'node', 'id': node_id + 1},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+
+ except (NlError):
+ raised = True
+ ksft_eq(raised, True)
+
+ # Add to an existing node
+ node_handle = nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 4}],
+ 'handle': {'scope':'node', 'id': node_id}})
+ ksft_eq(node_handle, {'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'node', 'id': node_id}})
+
+ shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 3}})
+ ksft_eq(shaper, {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 4})
+
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 2}})
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 1}})
+
+ # Deleting a non empty node will move the leaves downstream.
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'node', 'id': node_id}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 4}])
+
+ # Finish and verify the complete cleanup.
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': 3}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(len(shapers), 0)
+
+def delegation(cfg, nl_shaper) -> None:
+ if not cfg.groups:
+ raise KsftSkipEx("device does not support node scope")
+ try:
+ caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+ 'scope':'node'})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftSkipEx("node scope shapers not supported by the device")
+ raise
+ if not 'support-nesting' in caps:
+ raise KsftSkipEx("device does not support node scope shapers nesting")
+
+ node_handle = nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3},
+ {'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2},
+ {'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 1}],
+ 'handle': {'scope':'node'},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+ node_id = node_handle['handle']['id']
+
+ # Create the nested node and validate the hierarchy
+ nested_node_handle = nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3},
+ {'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2}],
+ 'handle': {'scope':'node'},
+ 'metric': 'bps',
+ 'bw-max': 5000})
+ nested_node_id = nested_node_handle['handle']['id']
+ ksft_true(nested_node_id != node_id)
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': nested_node_id},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': nested_node_id},
+ 'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 1},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'node', 'id': node_id},
+ 'metric': 'bps',
+ 'bw-max': 10000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'node', 'id': nested_node_id},
+ 'metric': 'bps',
+ 'bw-max': 5000}])
+
+ # Deleting a non empty node will move the leaves downstream.
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'node', 'id': nested_node_id}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'weight': 3},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 2},
+ 'weight': 2},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'node', 'id': node_id},
+ 'handle': {'scope': 'queue', 'id': 3},
+ 'weight': 1},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'node', 'id': node_id},
+ 'metric': 'bps',
+ 'bw-max': 10000}])
+
+ # Final cleanup.
+ for i in range(1, 4):
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': i}})
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(len(shapers), 0)
+
+def queue_update(cfg, nl_shaper) -> None:
+ if cfg.nr_queues < 4:
+ raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}")
+ if not cfg.queues:
+ raise KsftSkipEx("device does not support queue scope")
+
+ for i in range(3):
+ nl_shaper.set({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': i},
+ 'metric': 'bps',
+ 'bw-max': (i + 1) * 1000})
+ # Delete a channel, with no shapers configured on top of the related
+ # queue: no changes expected
+ cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10)
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 0},
+ 'metric': 'bps',
+ 'bw-max': 1000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 2000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 2},
+ 'metric': 'bps',
+ 'bw-max': 3000}])
+
+ # Delete a channel, with a shaper configured on top of the related
+ # queue: the shaper must be deleted, too
+ cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10)
+
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 0},
+ 'metric': 'bps',
+ 'bw-max': 1000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 2000}])
+
+ # Restore the original channels number, no expected changes
+ cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10)
+ shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 0},
+ 'metric': 'bps',
+ 'bw-max': 1000},
+ {'ifindex': cfg.ifindex,
+ 'parent': {'scope': 'netdev'},
+ 'handle': {'scope': 'queue', 'id': 1},
+ 'metric': 'bps',
+ 'bw-max': 2000}])
+
+ # Final cleanup.
+ for i in range(0, 2):
+ nl_shaper.delete({'ifindex': cfg.ifindex,
+ 'handle': {'scope': 'queue', 'id': i}})
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=4) as cfg:
+ cfg.queues = False
+ cfg.netdev = False
+ cfg.groups = False
+ cfg.nr_queues = 0
+ ksft_run([get_shapers,
+ get_caps,
+ set_qshapers,
+ del_qshapers,
+ set_nshapers,
+ del_nshapers,
+ basic_groups,
+ qgroups,
+ delegation,
+ queue_update], args=(cfg, NetshaperFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
new file mode 100755
index 000000000000..b08e4d48b15c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to standard netdevice statistics.
+"""
+
+import errno
+import subprocess
+import time
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+ethnl = EthtoolFamily()
+netfam = NetdevFamily()
+rtnl = RtnlFamily()
+
+
+def check_pause(cfg) -> None:
+ """
+ Check that drivers which support Pause config also report standard
+ pause stats.
+ """
+
+ try:
+ ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("pause not supported by the device") from e
+ raise
+
+ data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec(cfg) -> None:
+ """
+ Check that drivers which support FEC config also report standard
+ FEC stats.
+ """
+
+ try:
+ ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("FEC not supported by the device") from e
+ raise
+
+ data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec_hist(cfg) -> None:
+ """
+ Check that drivers which support FEC histogram statistics report
+ reasonable values.
+ """
+
+ try:
+ data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("FEC not supported by the device") from e
+ raise
+ if 'stats' not in data:
+ raise KsftSkipEx("FEC stats not supported by the device")
+ if 'hist' not in data['stats']:
+ raise KsftSkipEx("FEC histogram not supported by the device")
+
+ hist = data['stats']['hist']
+ for fec_bin in hist:
+ for key in ['bin-low', 'bin-high', 'bin-val']:
+ ksft_in(key, fec_bin,
+ "Drivers should always report FEC bin range and value")
+ ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
+ "FEC bin range should be valid")
+ if 'bin-val-per-lane' in fec_bin:
+ ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
+ "FEC bin value should be equal to sum of per-plane values")
+
+
+def pkt_byte_sum(cfg) -> None:
+ """
+ Check that qstat and interface stats match in value.
+ """
+
+ def get_qstat(test):
+ stats = netfam.qstats_get({}, dump=True)
+ if stats:
+ for qs in stats:
+ if qs["ifindex"]== test.ifindex:
+ return qs
+ return None
+
+ qstat = get_qstat(cfg)
+ if qstat is None:
+ raise KsftSkipEx("qstats not supported by the device")
+
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ ksft_in(key, qstat, "Drivers should always report basic keys")
+
+ # Compare stats, rtnl stats and qstats must match,
+ # but the interface may be up, so do a series of dumps
+ # each time the more "recent" stats must be higher or same.
+ def stat_cmp(rstat, qstat):
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ if rstat[key] != qstat[key]:
+ return rstat[key] - qstat[key]
+ return 0
+
+ for _ in range(10):
+ rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ if stat_cmp(rtstat, qstat) < 0:
+ raise KsftFailEx("RTNL stats are lower, fetched later")
+ qstat = get_qstat(cfg)
+ if stat_cmp(rtstat, qstat) > 0:
+ raise KsftFailEx("Qstats are lower, fetched later")
+
+
+def qstat_by_ifindex(cfg) -> None:
+ """ Qstats Netlink API tests - querying by ifindex. """
+
+ # Construct a map ifindex -> [dump, by-index, dump]
+ ifindexes = {}
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']] = [entry, None, None]
+
+ for ifindex in ifindexes:
+ entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
+ ksft_eq(len(entry), 1)
+ ifindexes[entry[0]['ifindex']][1] = entry[0]
+
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']][2] = entry
+
+ if len(ifindexes) == 0:
+ raise KsftSkipEx("No ifindex supports qstats")
+
+ # Now make sure the stats match/make sense
+ for ifindex, triple in ifindexes.items():
+ all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
+
+ for key in all_keys:
+ ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
+ ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
+
+ # Sanity check the dumps
+ queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
+ # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
+ parsed = {}
+ for entry in queues:
+ ifindex = entry["ifindex"]
+ if ifindex not in parsed:
+ parsed[ifindex] = {"rx":[], "tx": []}
+ parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
+ # Now, validate
+ for ifindex, queues in parsed.items():
+ for qtype in ['rx', 'tx']:
+ ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
+ comment="repeated queue keys")
+ ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
+ comment="missing queue keys")
+
+ # Test invalid dumps
+ # 0 is invalid
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 0}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -34)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # loopback has no stats
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 1}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # Try to get stats for lowest unused ifindex but not 0
+ devs = rtnl.getlink({}, dump=True)
+ all_ifindexes = set(dev["ifi-index"] for dev in devs)
+ lowest = 2
+ while lowest in all_ifindexes:
+ lowest += 1
+
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": lowest}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -19)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+ """ Test statistics (interface and qstat) are not impacted by ifdown """
+
+ try:
+ qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("qstats not supported by the device") from e
+ raise
+
+ ip(f"link set dev {cfg.dev['ifname']} down")
+ defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+ qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ for k in qstat:
+ ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
+
+ # exercise per-queue API to make sure that "device down" state
+ # is handled correctly and doesn't crash
+ netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+
+
+def __run_inf_loop(body):
+ body = body.strip()
+ if body[-1] != ';':
+ body += ';'
+
+ return subprocess.Popen(f"while true; do {body} done", shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+
+def __stats_increase_sanely(old, new) -> None:
+ for k in old.keys():
+ ksft_ge(new[k], old[k])
+ ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
+
+
+def procfs_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, which is not an exclusive
+ lock, make sure drivers can handle parallel reads of stats.
+ """
+ one = __run_inf_loop("cat /proc/net/dev")
+ defer(one.kill)
+ two = __run_inf_loop("cat /proc/net/dev")
+ defer(two.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(one.poll(), None)
+ ksft_is(two.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ time.sleep(2)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+ # defers will kill the loops
+
+
+@ksft_disruptive
+def procfs_downup_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, drivers often try
+ to sleep when reading the stats, or don't protect against races.
+ """
+ # Set a large number of queues,
+ # we'll flip between min(max_queues, 64) and 1
+ channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ # Real test stats
+ stats = __run_inf_loop("cat /proc/net/dev")
+ defer(stats.kill)
+
+ ipset = f"ip link set dev {cfg.ifname}"
+ defer(ip, f"link set dev {cfg.ifname} up")
+ # The "echo -n 1" lets us count iterations below
+ updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
+ "echo -n 1"
+ updown = __run_inf_loop(updown)
+ kill_updown = defer(updown.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(stats.poll(), None)
+ ksft_is(updown.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ # We're looking for crashes, give it extra time
+ time.sleep(9)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+
+ kill_updown.exec()
+ stdout, _ = updown.communicate(timeout=5)
+ ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, queue_count=100) as cfg:
+ ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
+ qstat_by_ifindex, check_down, procfs_hammer,
+ procfs_downup_hammer],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
new file mode 100644
index 000000000000..1340b3df9c31
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := \
+ dev_addr_lists.sh \
+ options.sh \
+ propagation.sh \
+# end of TEST_PROGS
+
+TEST_INCLUDES := \
+ ../bonding/lag_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/in_netns.sh \
+ ../../../net/lib.sh \
+ ../../../net/lib/sh/defer.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
new file mode 100644
index 000000000000..558e1d0cf565
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -0,0 +1,7 @@
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
new file mode 100755
index 000000000000..b1ec7755b783
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test team device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+ team_cleanup
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/../bonding/lag_lib.sh
+
+
+destroy()
+{
+ local ifnames=(dummy1 dummy2 team0 mv0)
+ local ifname
+
+ for ifname in "${ifnames[@]}"; do
+ ip link del "$ifname" &>/dev/null
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ destroy
+}
+
+
+team_cleanup()
+{
+ RET=0
+
+ test_LAG_cleanup "team" "lacp"
+}
+
+
+require_command teamd
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh
new file mode 100755
index 000000000000..44888f32b513
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/options.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# These tests verify basic set and get functionality of the team
+# driver options over netlink.
+
+# Run in private netns.
+test_dir="$(dirname "$0")"
+if [[ $# -eq 0 ]]; then
+ "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess
+ exit $?
+fi
+
+ALL_TESTS="
+ team_test_options
+"
+
+source "${test_dir}/../../../net/lib.sh"
+
+TEAM_PORT="team0"
+MEMBER_PORT="dummy0"
+
+setup()
+{
+ ip link add name "${MEMBER_PORT}" type dummy
+ ip link add name "${TEAM_PORT}" type team
+}
+
+get_and_check_value()
+{
+ local option_name="$1"
+ local expected_value="$2"
+ local port_flag="$3"
+
+ local value_from_get
+
+ if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \
+ "${port_flag}"); then
+ echo "Could not get option '${option_name}'" >&2
+ return 1
+ fi
+
+ if [[ "${value_from_get}" != "${expected_value}" ]]; then
+ echo "Incorrect value for option '${option_name}'" >&2
+ echo "get (${value_from_get}) != set (${expected_value})" >&2
+ return 1
+ fi
+}
+
+set_and_check_get()
+{
+ local option_name="$1"
+ local option_value="$2"
+ local port_flag="$3"
+
+ local value_from_get
+
+ if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \
+ "${option_value}" "${port_flag}"; then
+ echo "'setoption ${option_name} ${option_value}' failed" >&2
+ return 1
+ fi
+
+ get_and_check_value "${option_name}" "${option_value}" "${port_flag}"
+ return $?
+}
+
+# Get a "port flag" to pass to the `teamnl` command.
+# E.g. $1="dummy0" -> "port=dummy0",
+# $1="" -> ""
+get_port_flag()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ echo "--port=${port_name}"
+ fi
+}
+
+attach_port_if_specified()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ ip link set dev "${port_name}" master "${TEAM_PORT}"
+ return $?
+ fi
+}
+
+detach_port_if_specified()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ ip link set dev "${port_name}" nomaster
+ return $?
+ fi
+}
+
+# Test that an option's get value matches its set value.
+# Globals:
+# RET - Used by testing infra like `check_err`.
+# EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+# option_name - The name of the option.
+# value_1 - The first value to try setting.
+# value_2 - The second value to try setting.
+# port_name - The (optional) name of the attached port.
+team_test_option()
+{
+ local option_name="$1"
+ local value_1="$2"
+ local value_2="$3"
+ local possible_values="$2 $3 $2"
+ local port_name="$4"
+ local port_flag
+
+ RET=0
+
+ echo "Setting '${option_name}' to '${value_1}' and '${value_2}'"
+
+ attach_port_if_specified "${port_name}"
+ check_err $? "Couldn't attach ${port_name} to master"
+ port_flag=$(get_port_flag "${port_name}")
+
+ # Set and get both possible values.
+ for value in ${possible_values}; do
+ set_and_check_get "${option_name}" "${value}" "${port_flag}"
+ check_err $? "Failed to set '${option_name}' to '${value}'"
+ done
+
+ detach_port_if_specified "${port_name}"
+ check_err $? "Couldn't detach ${port_name} from its master"
+
+ log_test "Set + Get '${option_name}' test"
+}
+
+# Test that getting a non-existant option fails.
+# Globals:
+# RET - Used by testing infra like `check_err`.
+# EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+# option_name - The name of the option.
+# port_name - The (optional) name of the attached port.
+team_test_get_option_fails()
+{
+ local option_name="$1"
+ local port_name="$2"
+ local port_flag
+
+ RET=0
+
+ attach_port_if_specified "${port_name}"
+ check_err $? "Couldn't attach ${port_name} to master"
+ port_flag=$(get_port_flag "${port_name}")
+
+ # Just confirm that getting the value fails.
+ teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}"
+ check_fail $? "Shouldn't be able to get option '${option_name}'"
+
+ detach_port_if_specified "${port_name}"
+
+ log_test "Get '${option_name}' fails"
+}
+
+team_test_options()
+{
+ # Wrong option name behavior.
+ team_test_get_option_fails fake_option1
+ team_test_get_option_fails fake_option2 "${MEMBER_PORT}"
+
+ # Correct set and get behavior.
+ team_test_option mode activebackup loadbalance
+ team_test_option notify_peers_count 0 5
+ team_test_option notify_peers_interval 0 5
+ team_test_option mcast_rejoin_count 0 5
+ team_test_option mcast_rejoin_interval 0 5
+ team_test_option enabled true false "${MEMBER_PORT}"
+ team_test_option user_linkup true false "${MEMBER_PORT}"
+ team_test_option user_linkup_enabled true false "${MEMBER_PORT}"
+ team_test_option priority 10 20 "${MEMBER_PORT}"
+ team_test_option queue_id 0 1 "${MEMBER_PORT}"
+}
+
+require_command teamnl
+setup
+tests_run
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+ set +e
+ ip link del dummyteam &>/dev/null
+ ip link del team0 &>/dev/null
+ echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+ modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+ # using netdevsim because it supports NETIF_F_LRO
+ NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+ ip link add name team0 type team
+ ip link set $NSIM_LRO_NAME down
+ ip link set dev $NSIM_LRO_NAME master team0
+ ip link set team0 up
+ ethtool -K team0 large-receive-offload off
+
+ ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set team0 promisc on
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+
+ ip link del team0
+ ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+ ip link set team0 promisc on
+
+ # Make sure we can add more L2 addresses without any issues.
+ ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+ ip link set team0.1 up
+
+ ip link del team0.1
+ ip link del team0
+ ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
new file mode 100644
index 000000000000..868ece3fea1f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = basic_features.sh
+
+TEST_FILES = virtio_net_common.sh
+
+TEST_INCLUDES = \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
new file mode 100755
index 000000000000..cf8cf816ed48
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# See virtio_net_common.sh comments for more details about assumed setup
+
+ALL_TESTS="
+ initial_ping_test
+ f_mac_test
+"
+
+source virtio_net_common.sh
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+initial_ping_test()
+{
+ setup_cleanup
+ setup_prepare
+ ping_test $h1 $H2_IPV4 " simple"
+}
+
+f_mac_test()
+{
+ RET=0
+ local test_name="mac feature filtered"
+
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+
+ setup_cleanup
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_err $? "Permanent address assign type for $h1 is not set"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_err $? "Permanent address assign type for $h2 is not set"
+
+ setup_cleanup
+ virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC
+ virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered"
+
+ ping_do $h1 $H2_IPV4
+ check_err $? "Ping failed"
+
+ log_test "$test_name"
+}
+
+setup_prepare()
+{
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+setup_cleanup()
+{
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ virtio_filter_features_clear $h1
+ virtio_filter_features_clear $h2
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+}
+
+cleanup()
+{
+ pre_cleanup
+ setup_cleanup
+}
+
+check_driver $h1 "virtio_net"
+check_driver $h2 "virtio_net"
+check_virtio_debugfs $h1
+check_virtio_debugfs $h2
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config
new file mode 100644
index 000000000000..bcf7555eaffe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/config
@@ -0,0 +1,8 @@
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=m
+CONFIG_VIRTIO_DEBUG=y
+CONFIG_VIRTIO_NET=y
diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
new file mode 100644
index 000000000000..57bd8055e2e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This assumes running on a host with two virtio interfaces connected
+# back to back. Example script to do such wire-up of tap devices would
+# look like this:
+#
+# =======================================================================================================
+# #!/bin/bash
+#
+# DEV1="$1"
+# DEV2="$2"
+#
+# sudo tc qdisc add dev $DEV1 clsact
+# sudo tc qdisc add dev $DEV2 clsact
+# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2
+# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1
+# sudo ip link set $DEV1 up
+# sudo ip link set $DEV2 up
+# =======================================================================================================
+
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+NETIF_FIND_DRIVER="virtio_net"
+NUM_NETIFS=2
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+VIRTIO_NET_F_MAC=5
+
+virtio_device_get()
+{
+ local dev=$1; shift
+ local device_path="/sys/class/net/$dev/device/"
+
+ basename `realpath $device_path`
+}
+
+virtio_device_rebind()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind
+}
+
+virtio_debugfs_get()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo /sys/kernel/debug/virtio/$device/
+}
+
+check_virtio_debugfs()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ if [ ! -f "$debugfs/device_features" ] ||
+ [ ! -f "$debugfs/filter_feature_add" ] ||
+ [ ! -f "$debugfs/filter_feature_del" ] ||
+ [ ! -f "$debugfs/filter_features" ] ||
+ [ ! -f "$debugfs/filter_features_clear" ]; then
+ echo "SKIP: not possible to access debugfs for $dev"
+ exit $ksft_skip
+ fi
+}
+
+virtio_feature_present()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ cat $debugfs/device_features |grep "^$feature$" &> /dev/null
+ return $?
+}
+
+virtio_filter_features_clear()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "1" > $debugfs/filter_features_clear
+}
+
+virtio_filter_feature_add()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "$feature" > $debugfs/filter_feature_add
+}
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..e54df158dfe9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,779 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftNamedVariant, ksft_variants
+from lib.py import KsftFailEx, NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+ """Enum for XDP configuration options."""
+ MODE = 0 # Configures the BPF program for a specific test
+ PORT = 1 # Port configuration to communicate with the remote host
+ ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking
+ ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+ """Enum for XDP actions."""
+ PASS = 0 # Pass the packet up to the stack
+ DROP = 1 # Drop the packet
+ TX = 2 # Route the packet to the remote host
+ TAIL_ADJST = 3 # Adjust the tail of the packet
+ HEAD_ADJST = 4 # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+ """Enum for XDP statistics."""
+ RX = 0 # Count of valid packets received for testing
+ PASS = 1 # Count of packets passed up to the stack
+ DROP = 2 # Count of packets dropped
+ TX = 3 # Count of incoming packets routed to the remote host
+ ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+ """Data class to store information about a BPF program."""
+ name: str # Name of the BPF program
+ file: str # BPF program object file
+ xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags")
+ mtu: int = 1500 # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+ """
+ Exchanges UDP packets between a local and remote host using the socat tool.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ test_string: String that the remote host will send.
+
+ Returns:
+ The string received by the test host.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp_cmd, exit_wait=True) as nc:
+ wait_port_listen(port, proto="udp")
+ cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+ return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+ """
+ Tests UDP packet exchange between a local and remote host.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ size: The length of the test string to be exchanged, default is 256 characters.
+
+ Returns:
+ bool: True if the received string matches the sent string, False otherwise.
+ """
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+ """
+ Loads an XDP program onto a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+
+ Returns:
+ dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+ """
+ abs_path = cfg.net_lib_dir / bpf_info.file
+ prog_info = {}
+
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+ cmd(
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}",
+ shell=True
+ )
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off")
+
+ xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+ prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+ prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+ prog_id = prog_info["id"]
+
+ map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+ prog_info["maps"] = {}
+ for map_id in map_ids:
+ name = bpftool(f"map show id {map_id}", json=True)["name"]
+ prog_info["maps"][name] = map_id
+
+ return prog_info
+
+
+def format_hex_bytes(value):
+ """
+ Helper function that converts an integer into a formatted hexadecimal byte string.
+
+ Args:
+ value: An integer representing the number to be converted.
+
+ Returns:
+ A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+ """
+ hex_str = value.to_bytes(4, byteorder='little', signed=True)
+ return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+ """
+ Updates an XDP map with a given key-value pair using bpftool.
+
+ Args:
+ map_name: The name of the XDP map to update.
+ key: The key to update in the map, formatted as a hexadecimal string.
+ value: The value to associate with the key, formatted as a hexadecimal string.
+ """
+ key_formatted = format_hex_bytes(key)
+ value_formatted = format_hex_bytes(value)
+ bpftool(
+ f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+ )
+
+
+def _get_stats(xdp_map_id):
+ """
+ Retrieves and formats statistics from an XDP map.
+
+ Args:
+ xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+ Returns:
+ A dictionary containing formatted packet statistics for various XDP actions.
+ The keys are based on the XDPStats Enum values.
+
+ Raises:
+ KsftFailEx: If the stats retrieval fails.
+ """
+ stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+ if not stats_dump:
+ raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+ stats_formatted = {}
+ for key in range(0, 5):
+ val = stats_dump[key]["formatted"]["value"]
+ if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+ stats_formatted[XDPStats.RX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+ stats_formatted[XDPStats.PASS.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+ stats_formatted[XDPStats.DROP.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+ stats_formatted[XDPStats.TX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+ stats_formatted[XDPStats.ABORT.value] = val
+
+ return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_PASS action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+ """
+ Tests the XDP_PASS action for single buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+ """
+ Tests the XDP_PASS action for a multi-buff size.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_DROP action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+ """
+ Tests the XDP_DROP action for a signle-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+ """
+ Tests the XDP_DROP action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_drop(cfg, bpf_info, 8000)
+
+
+def _test_xdp_native_tx(cfg, bpf_info, payload_lens):
+ """
+ Tests the XDP_TX action.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing the BPF program metadata.
+ payload_lens: Array of packet lengths to send.
+ """
+ cfg.require_cmd("socat", remote=True)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ expected_pkts = 0
+ for payload_len in payload_lens:
+ test_string = "".join(
+ random.choice(string.ascii_lowercase) for _ in range(payload_len)
+ )
+
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \
+ f"-u UDP-RECV:{port},reuseport STDOUT"
+
+ # Writing zero bytes to stdin gets ignored by socat,
+ # but with the shut-null flag socat generates a zero sized packet
+ # when the socket is closed.
+ tx_cmd_suffix = ",shut-null" if payload_len == 0 else ""
+ tx_udp = f"echo -n {test_string} | socat -t 2 " + \
+ f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+
+ expected_pkts += 1
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch")
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch")
+
+
+def test_xdp_native_tx_sb(cfg):
+ """
+ Tests the XDP_TX action for a single-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ # Ensure there's enough room for an ETH / IP / UDP header
+ pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62
+
+ _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len])
+
+
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o",
+ "xdp.frags", 9000)
+ # The first packet ensures we exercise the fragmented code path.
+ # And the subsequent 0-sized packet ensures the driver
+ # reinitializes xdp_buff correctly.
+ _test_xdp_native_tx(cfg, bpf_info, [8000, 0])
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+ """
+ Validates the result of a test.
+
+ Args:
+ res: The result of the test, which should be a dictionary with a "status" key.
+
+ Raises:
+ KsftFailEx: If the test fails to pass any combination of offset and packet size.
+ """
+ if "status" not in res:
+ raise KsftFailEx("Missing 'status' key in result dictionary")
+
+ # Validate that not a single case was successful
+ if res["status"] == "fail":
+ if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+ raise KsftFailEx(f"{res['reason']}")
+
+ # Get the previous offset and packet size to report the successful run
+ tmp_idx = offset_lst.index(res["offset"])
+ prev_offset = offset_lst[tmp_idx - 1]
+ if tmp_idx == 0:
+ tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+ prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+ else:
+ prev_pkt_sz = res["pkt_sz"]
+
+ # Use these values for error reporting
+ ksft_pr(
+ f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+ f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+ f"Reason: {res['reason']}"
+ )
+
+
+def _check_for_failures(recvd_str, stats):
+ """
+ Checks for common failures while adjusting headroom or tailroom.
+
+ Args:
+ recvd_str: The string received from the remote host after sending a test string.
+ stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+ Returns:
+ str: A string describing the failure reason if a failure is detected, otherwise None.
+ """
+
+ # Any adjustment failure result in an abort hence, we track this counter
+ if stats[XDPStats.ABORT.value] != 0:
+ return "Adjustment failed"
+
+ # Since we are using aggregate stats for a single test across all offsets and packet sizes
+ # we can't use RX stats only to track data exchange failure without taking a previous
+ # snapshot. An easier way is to simply check for non-zero length of received string.
+ if len(recvd_str) == 0:
+ return "Data exchange failed"
+
+ # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+ if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+ return "RX stats mismatch"
+
+ return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP tail adjustment functionality.
+
+ This function loads the appropriate XDP program based on the provided
+ program name and configures the XDP map for tail adjustment. It then
+ validates the tail adjustment by sending and receiving UDP packets
+ with specified packet sizes and offsets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ prog: Name of the XDP program to load.
+ pkt_sz_lst: List of packet sizes to test.
+ offset_lst: List of offsets to validate support for tail adjustment.
+
+ Returns:
+ dict: A dictionary with test status and failure details if applicable.
+ """
+ port = rand_port()
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+
+ # Configure the XDP map for tail adjustment
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ for offset in offset_lst:
+ tag = format(random.randint(65, 90), "02x")
+
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+ if offset > 0:
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+ for pkt_sz in pkt_sz_lst:
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ failure = _check_for_failures(recvd_str, stats)
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset > 0:
+ expected_data = test_str + (offset * chr(int(tag, 16)))
+ else:
+ expected_data = test_str[0:pkt_sz + offset]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+ """
+ Tests the XDP tail adjustment by growing packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [1, 16, 32, 64, 128, 256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+ """
+ Tests the XDP tail adjustment by shrinking packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+ """
+ Retrieves the header data split (HDS) threshold for a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ Returns:
+ The HDS threshold value. If the threshold is not supported or an error occurs,
+ a default value of 1500 is returned.
+ """
+ ethnl = cfg.ethnl
+ hds_thresh = 1500
+
+ try:
+ rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' not in rings:
+ ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+ return hds_thresh
+ hds_thresh = rings['hds-thresh']
+ except NlError as e:
+ ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+ return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP head adjustment action for a multi-buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ ethnl: Network namespace or link object (not used in this function).
+
+ This function sets up the packet size and offset lists, then performs
+ the head adjustment test by sending and receiving UDP packets.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ hds_thresh = get_hds_thresh(cfg)
+ for offset in offset_lst:
+ for pkt_sz in pkt_sz_lst:
+ # The "head" buffer must contain at least the Ethernet header
+ # after we eat into it. We send large-enough packets, but if HDS
+ # is enabled head will only contain headers. Don't try to eat
+ # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+ l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+ if pkt_sz > hds_thresh and offset > l2_cut_off:
+ ksft_pr(
+ f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+ f"offset {offset} > {l2_cut_off}"
+ )
+ return {"status": "pass"}
+
+ test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ tag = format(random.randint(65, 90), '02x')
+
+ _set_xdp_map("map_xdp_setup",
+ TestConfig.ADJST_OFFSET.value,
+ offset)
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ # Check for failures around adjustment and data exchange
+ failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset < 0:
+ expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+ else:
+ expected_data = test_str[offset:]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+ """
+ Tests the XDP headroom growth support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully extended for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Negative values result in headroom shrinking, resulting in growing of payload
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+ """
+ Tests the XDP headroom shrinking support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully shrunk for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Positive values result in headroom growing, resulting in shrinking of payload
+ offset_lst = [16, 32, 64, 128, 256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+@ksft_variants([
+ KsftNamedVariant("pass", XDPAction.PASS),
+ KsftNamedVariant("drop", XDPAction.DROP),
+ KsftNamedVariant("tx", XDPAction.TX),
+])
+def test_xdp_native_qstats(cfg, act):
+ """
+ Send 1000 messages. Expect XDP action specified in @act.
+ Make sure the packets were counted to interface level qstats
+ (Rx, and Tx if act is TX).
+ """
+
+ cfg.require_cmd("socat")
+
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ # Discard the input, but we need a listener to avoid ICMP errors
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \
+ "/dev/null"
+ # Listener runs on "remote" in case of XDP_TX
+ rx_host = cfg.remote if act == XDPAction.TX else None
+ # We want to spew 1000 packets quickly, bash seems to do a good enough job
+ # Each reopening of the socket gives us a differenot local port (for RSS)
+ tx_udp = "for _ in `seq 20`; do " \
+ f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+ "for i in `seq 50`; do echo a >&5; done; " \
+ "exec 5>&-; done"
+
+ cfg.wait_hw_stats_settle()
+ # Qstats have more clearly defined semantics than rtnetlink.
+ # XDP is the "first layer of the stack" so XDP packets should be counted
+ # as received and sent as if the decision was made in the routing layer.
+ before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ with bkg(rx_udp, host=rx_host, exit_wait=True):
+ wait_port_listen(port, proto="udp", host=rx_host)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ cfg.wait_hw_stats_settle()
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ expected_pkts = 1000
+ ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts)
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts)
+
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
+ if act == XDPAction.TX:
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch")
+
+ # Flip the ring count back and forth to make sure the stats from XDP rings
+ # don't get lost.
+ chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if chans.get('combined-count', 0) > 1:
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': 1})
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': chans['combined-count']})
+ before = after
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'], before['rx-packets'])
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'], before['tx-packets'])
+
+
+def main():
+ """
+ Main function to execute the XDP tests.
+
+ This function runs a series of tests to validate the XDP support for
+ both the single and multi-buffer. It uses the NetDrvEpEnv context
+ manager to manage the network driver environment and the ksft_run
+ function to execute the tests.
+ """
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(
+ [
+ test_xdp_native_pass_sb,
+ test_xdp_native_pass_mb,
+ test_xdp_native_drop_sb,
+ test_xdp_native_drop_mb,
+ test_xdp_native_tx_sb,
+ test_xdp_native_tx_mb,
+ test_xdp_native_adjst_tail_grow_data,
+ test_xdp_native_adjst_tail_shrnk_data,
+ test_xdp_native_adjst_head_grow_data,
+ test_xdp_native_adjst_head_shrnk_data,
+ test_xdp_native_qstats,
+ ],
+ args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
new file mode 100644
index 000000000000..848573a3d3ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/.gitignore
@@ -0,0 +1 @@
+ntsync
diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
new file mode 100644
index 000000000000..dbf2b055c0b2
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/Makefile
@@ -0,0 +1,7 @@
+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
+TEST_GEN_PROGS := ntsync
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
new file mode 100644
index 000000000000..60539c826d06
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/config
@@ -0,0 +1 @@
+CONFIG_WINESYNC=y
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
new file mode 100644
index 000000000000..e6a37214aa46
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Various unit tests for the "ntsync" synchronization primitive driver.
+ *
+ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/ntsync.h>
+#include "kselftest_harness.h"
+
+static int read_sem_state(int sem, __u32 *count, __u32 *max)
+{
+ struct ntsync_sem_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
+ *count = args.count;
+ *max = args.max;
+ return ret;
+}
+
+#define check_sem_state(sem, count, max) \
+ ({ \
+ __u32 __count, __max; \
+ int ret = read_sem_state((sem), &__count, &__max); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((count), __count); \
+ EXPECT_EQ((max), __max); \
+ })
+
+static int release_sem(int sem, __u32 *count)
+{
+ return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count);
+}
+
+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
+{
+ struct ntsync_mutex_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
+ *count = args.count;
+ *owner = args.owner;
+ return ret;
+}
+
+#define check_mutex_state(mutex, count, owner) \
+ ({ \
+ __u32 __count, __owner; \
+ int ret = read_mutex_state((mutex), &__count, &__owner); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((count), __count); \
+ EXPECT_EQ((owner), __owner); \
+ })
+
+static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
+{
+ struct ntsync_mutex_args args;
+ int ret;
+
+ args.owner = owner;
+ args.count = 0xdeadbeef;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
+ *count = args.count;
+ return ret;
+}
+
+static int read_event_state(int event, __u32 *signaled, __u32 *manual)
+{
+ struct ntsync_event_args args;
+ int ret;
+
+ memset(&args, 0xcc, sizeof(args));
+ ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
+ *signaled = args.signaled;
+ *manual = args.manual;
+ return ret;
+}
+
+#define check_event_state(event, signaled, manual) \
+ ({ \
+ __u32 __signaled, __manual; \
+ int ret = read_event_state((event), &__signaled, &__manual); \
+ EXPECT_EQ(0, ret); \
+ EXPECT_EQ((signaled), __signaled); \
+ EXPECT_EQ((manual), __manual); \
+ })
+
+static int wait_objs(int fd, unsigned long request, __u32 count,
+ const int *objs, __u32 owner, int alert, __u32 *index)
+{
+ struct ntsync_wait_args args = {0};
+ struct timespec timeout;
+ int ret;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
+ args.count = count;
+ args.objs = (uintptr_t)objs;
+ args.owner = owner;
+ args.index = 0xdeadbeef;
+ args.alert = alert;
+ ret = ioctl(fd, request, &args);
+ *index = args.index;
+ return ret;
+}
+
+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
+}
+
+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
+}
+
+static int wait_any_alert(int fd, __u32 count, const int *objs,
+ __u32 owner, int alert, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
+ count, objs, owner, alert, index);
+}
+
+static int wait_all_alert(int fd, __u32 count, const int *objs,
+ __u32 owner, int alert, __u32 *index)
+{
+ return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
+ count, objs, owner, alert, index);
+}
+
+TEST(semaphore_state)
+{
+ struct ntsync_sem_args sem_args;
+ struct timespec timeout;
+ __u32 count, index;
+ int fd, ret, sem;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 3;
+ sem_args.max = 2;
+ sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_EQ(-1, sem);
+ EXPECT_EQ(EINVAL, errno);
+
+ sem_args.count = 2;
+ sem_args.max = 2;
+ sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, sem);
+ check_sem_state(sem, 2, 2);
+
+ count = 0;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_sem_state(sem, 2, 2);
+
+ count = 1;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 2, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(sem, 1, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(sem, 0, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ count = 3;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 0, 2);
+
+ count = 2;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(sem, 2, 2);
+
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+ ret = wait_any(fd, 1, &sem, 123, &index);
+ EXPECT_EQ(0, ret);
+
+ count = 1;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(sem, 1, 2);
+
+ count = ~0u;
+ ret = release_sem(sem, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOVERFLOW, errno);
+ check_sem_state(sem, 1, 2);
+
+ close(sem);
+
+ close(fd);
+}
+
+TEST(mutex_state)
+{
+ struct ntsync_mutex_args mutex_args;
+ __u32 owner, count, index;
+ struct timespec timeout;
+ int fd, ret, mutex;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 0;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_EQ(-1, mutex);
+ EXPECT_EQ(EINVAL, errno);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 2;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_EQ(-1, mutex);
+ EXPECT_EQ(EINVAL, errno);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 2;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, 2, 123);
+
+ ret = unlock_mutex(mutex, 0, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ ret = unlock_mutex(mutex, 456, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ check_mutex_state(mutex, 2, 123);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_mutex_state(mutex, 1, 123);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, count);
+ check_mutex_state(mutex, 0, 0);
+
+ ret = unlock_mutex(mutex, 123, &count);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+
+ ret = wait_any(fd, 1, &mutex, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 456);
+
+ ret = wait_any(fd, 1, &mutex, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 2, 456);
+
+ ret = unlock_mutex(mutex, 456, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+ check_mutex_state(mutex, 1, 456);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ owner = 0;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ owner = 123;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ check_mutex_state(mutex, 1, 456);
+
+ owner = 456;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ owner = 123;
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ memset(&mutex_args, 0xcc, sizeof(mutex_args));
+ ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, mutex_args.count);
+ EXPECT_EQ(0, mutex_args.owner);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ close(mutex);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, 0, 0);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_mutex_state(mutex, 1, 123);
+
+ close(mutex);
+
+ mutex_args.owner = 123;
+ mutex_args.count = ~0u;
+ mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, mutex);
+ check_mutex_state(mutex, ~0u, 123);
+
+ ret = wait_any(fd, 1, &mutex, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(mutex);
+
+ close(fd);
+}
+
+TEST(manual_event_state)
+{
+ struct ntsync_event_args event_args;
+ __u32 index, signaled;
+ int fd, event, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ event_args.manual = 1;
+ event_args.signaled = 0;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+ check_event_state(event, 0, 1);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 1, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 1, 1);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_event_state(event, 1, 1);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 1);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 1);
+
+ close(event);
+
+ close(fd);
+}
+
+TEST(auto_event_state)
+{
+ struct ntsync_event_args event_args;
+ __u32 index, signaled;
+ int fd, event, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ event_args.manual = 0;
+ event_args.signaled = 1;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ check_event_state(event, 1, 0);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 1, 0);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_event_state(event, 0, 0);
+
+ signaled = 0xdeadbeef;
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 0);
+
+ ret = wait_any(fd, 1, &event, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+ check_event_state(event, 0, 0);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(event, 0, 0);
+
+ close(event);
+
+ close(fd);
+}
+
+TEST(test_wait_any)
+{
+ int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 owner, index, count, i;
+ struct timespec timeout;
+
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_any(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ owner = 123;
+ ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ EXPECT_EQ(1, index);
+
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+
+ close(objs[1]);
+
+ /* test waiting on the same object twice */
+
+ count = 2;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ objs[1] = objs[0];
+ ret = wait_any(fd, 2, objs, 456, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+
+ ret = wait_any(fd, 0, NULL, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
+ objs[i] = objs[0];
+
+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ ret = wait_any(fd, -1, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ close(objs[0]);
+
+ close(fd);
+}
+
+TEST(test_wait_all)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 owner, index, count;
+ int objs[2], fd, ret;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_all(fd, 2, objs, 456, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+ check_sem_state(objs[0], 0, 3);
+ check_mutex_state(objs[1], 2, 123);
+
+ count = 3;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 2, 3);
+ check_mutex_state(objs[1], 3, 123);
+
+ owner = 123;
+ ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EOWNERDEAD, errno);
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 123);
+
+ close(objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+ check_sem_state(objs[0], 0, 3);
+ check_event_state(objs[1], 1, 1);
+
+ close(objs[1]);
+
+ /* test waiting on the same object twice */
+ objs[1] = objs[0];
+ ret = wait_all(fd, 2, objs, 123, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ close(objs[0]);
+
+ close(fd);
+}
+
+struct wake_args {
+ int fd;
+ int obj;
+};
+
+struct wait_args {
+ int fd;
+ unsigned long request;
+ struct ntsync_wait_args *args;
+ int ret;
+ int err;
+};
+
+static void *wait_thread(void *arg)
+{
+ struct wait_args *args = arg;
+
+ args->ret = ioctl(args->fd, args->request, args->args);
+ args->err = errno;
+ return NULL;
+}
+
+static __u64 get_abs_timeout(unsigned int ms)
+{
+ struct timespec timeout;
+ clock_gettime(CLOCK_MONOTONIC, &timeout);
+ return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
+}
+
+static int wait_for_thread(pthread_t thread, unsigned int ms)
+{
+ struct timespec timeout;
+
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += ms * 1000000;
+ timeout.tv_sec += (timeout.tv_nsec / 1000000000);
+ timeout.tv_nsec %= 1000000000;
+ return pthread_timedjoin_np(thread, NULL, &timeout);
+}
+
+TEST(wake_any)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 count, index, signaled;
+ int objs[2], fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 1;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ /* test waking the semaphore */
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 456;
+ wait_args.index = 0xdeadbeef;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ANY;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(objs[0], 0, 3);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(0, wait_args.index);
+
+ /* test waking the mutex */
+
+ /* first grab it again for owner 123 */
+ ret = wait_any(fd, 1, &objs[1], 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.owner = 456;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, mutex_args.count);
+ check_mutex_state(objs[1], 1, 456);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ close(objs[1]);
+
+ /* test waking events */
+
+ event_args.manual = false;
+ event_args.signaled = false;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ close(objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = false;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, objs[1]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 1, 1);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+ check_event_state(objs[1], 0, 1);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(1, wait_args.index);
+
+ /* delete an object while it's being waited on */
+
+ wait_args.timeout = get_abs_timeout(200);
+ wait_args.owner = 123;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ ret = wait_for_thread(thread, 200);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(-1, thread_args.ret);
+ EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+ close(fd);
+}
+
+TEST(wake_all)
+{
+ struct ntsync_event_args manual_event_args = {0};
+ struct ntsync_event_args auto_event_args = {0};
+ struct ntsync_mutex_args mutex_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 count, index, signaled;
+ int objs[4], fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 3;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ mutex_args.owner = 123;
+ mutex_args.count = 1;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, objs[1]);
+
+ manual_event_args.manual = true;
+ manual_event_args.signaled = true;
+ objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
+ EXPECT_LE(0, objs[2]);
+
+ auto_event_args.manual = false;
+ auto_event_args.signaled = true;
+ objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
+ EXPECT_EQ(0, objs[3]);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 4;
+ wait_args.owner = 456;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ALL;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ check_sem_state(objs[0], 1, 3);
+
+ ret = wait_any(fd, 1, &objs[0], 123, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = unlock_mutex(objs[1], 123, &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, count);
+
+ ret = pthread_tryjoin_np(thread, NULL);
+ EXPECT_EQ(EBUSY, ret);
+
+ check_mutex_state(objs[1], 0, 0);
+
+ ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ count = 2;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, count);
+ check_sem_state(objs[0], 2, 3);
+
+ ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, signaled);
+
+ ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, signaled);
+
+ check_sem_state(objs[0], 1, 3);
+ check_mutex_state(objs[1], 1, 456);
+ check_event_state(objs[2], 1, 1);
+ check_event_state(objs[3], 0, 0);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+
+ /* delete an object while it's being waited on */
+
+ wait_args.timeout = get_abs_timeout(200);
+ wait_args.owner = 123;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ close(objs[0]);
+ close(objs[1]);
+ close(objs[2]);
+ close(objs[3]);
+
+ ret = wait_for_thread(thread, 200);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(-1, thread_args.ret);
+ EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+ close(fd);
+}
+
+TEST(alert_any)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ __u32 index, count, signaled;
+ struct wait_args thread_args;
+ int objs[2], event, fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 0;
+ sem_args.max = 2;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ sem_args.count = 1;
+ sem_args.max = 2;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ /* test wakeup via alert */
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 123;
+ wait_args.index = 0xdeadbeef;
+ wait_args.alert = event;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ANY;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(2, wait_args.index);
+
+ close(event);
+
+ /* test with an auto-reset event */
+
+ event_args.manual = false;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ count = 1;
+ ret = release_sem(objs[0], &count);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(event);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ close(fd);
+}
+
+TEST(alert_all)
+{
+ struct ntsync_event_args event_args = {0};
+ struct ntsync_wait_args wait_args = {0};
+ struct ntsync_sem_args sem_args = {0};
+ struct wait_args thread_args;
+ __u32 index, count, signaled;
+ int objs[2], event, fd, ret;
+ pthread_t thread;
+
+ fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ sem_args.count = 2;
+ sem_args.max = 2;
+ objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[0]);
+
+ sem_args.count = 1;
+ sem_args.max = 2;
+ objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+ EXPECT_LE(0, objs[1]);
+
+ event_args.manual = true;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ /* test wakeup via alert */
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ wait_args.timeout = get_abs_timeout(1000);
+ wait_args.objs = (uintptr_t)objs;
+ wait_args.count = 2;
+ wait_args.owner = 123;
+ wait_args.index = 0xdeadbeef;
+ wait_args.alert = event;
+ thread_args.fd = fd;
+ thread_args.args = &wait_args;
+ thread_args.request = NTSYNC_IOC_WAIT_ALL;
+ ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(ETIMEDOUT, ret);
+
+ ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_for_thread(thread, 100);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, thread_args.ret);
+ EXPECT_EQ(2, wait_args.index);
+
+ close(event);
+
+ /* test with an auto-reset event */
+
+ event_args.manual = false;
+ event_args.signaled = true;
+ event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, event);
+
+ count = 2;
+ ret = release_sem(objs[1], &count);
+ EXPECT_EQ(0, ret);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(2, index);
+
+ ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(ETIMEDOUT, errno);
+
+ close(event);
+
+ close(objs[0]);
+ close(objs[1]);
+
+ close(fd);
+}
+
+#define STRESS_LOOPS 10000
+#define STRESS_THREADS 4
+
+static unsigned int stress_counter;
+static int stress_device, stress_start_event, stress_mutex;
+
+static void *stress_thread(void *arg)
+{
+ struct ntsync_wait_args wait_args = {0};
+ __u32 index, count, i;
+ int ret;
+
+ wait_args.timeout = UINT64_MAX;
+ wait_args.count = 1;
+ wait_args.objs = (uintptr_t)&stress_start_event;
+ wait_args.owner = gettid();
+ wait_args.index = 0xdeadbeef;
+
+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+ wait_args.objs = (uintptr_t)&stress_mutex;
+
+ for (i = 0; i < STRESS_LOOPS; ++i) {
+ ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+ ++stress_counter;
+
+ unlock_mutex(stress_mutex, wait_args.owner, &count);
+ }
+
+ return NULL;
+}
+
+TEST(stress_wait)
+{
+ struct ntsync_event_args event_args;
+ struct ntsync_mutex_args mutex_args;
+ pthread_t threads[STRESS_THREADS];
+ __u32 signaled, i;
+ int ret;
+
+ stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, stress_device);
+
+ mutex_args.owner = 0;
+ mutex_args.count = 0;
+ stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+ EXPECT_LE(0, stress_mutex);
+
+ event_args.manual = 1;
+ event_args.signaled = 0;
+ stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
+ EXPECT_LE(0, stress_start_event);
+
+ for (i = 0; i < STRESS_THREADS; ++i)
+ pthread_create(&threads[i], NULL, stress_thread, NULL);
+
+ ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
+ EXPECT_EQ(0, ret);
+
+ for (i = 0; i < STRESS_THREADS; ++i) {
+ ret = pthread_join(threads[i], NULL);
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
+
+ close(stress_start_event);
+ close(stress_mutex);
+ close(stress_device);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile
new file mode 100644
index 000000000000..03d0449d307c
--- /dev/null
+++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for ifs(In Field Scan) selftests
+
+TEST_PROGS := test_ifs.sh
+
+include ../../../../../lib.mk
diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh
new file mode 100755
index 000000000000..8b68964b29f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the functionality of the Intel IFS(In Field Scan) driver.
+#
+
+# Matched with kselftest framework: tools/testing/selftests/kselftest.h
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_XFAIL=2
+readonly KSFT_SKIP=4
+
+readonly CPU_SYSFS="/sys/devices/system/cpu"
+readonly CPU_OFFLINE_SYSFS="${CPU_SYSFS}/offline"
+readonly IMG_PATH="/lib/firmware/intel/ifs_0"
+readonly IFS_SCAN_MODE="0"
+readonly IFS_ARRAY_BIST_SCAN_MODE="1"
+readonly IFS_PATH="/sys/devices/virtual/misc/intel_ifs"
+readonly IFS_SCAN_SYSFS_PATH="${IFS_PATH}_${IFS_SCAN_MODE}"
+readonly IFS_ARRAY_BIST_SYSFS_PATH="${IFS_PATH}_${IFS_ARRAY_BIST_SCAN_MODE}"
+readonly RUN_TEST="run_test"
+readonly STATUS="status"
+readonly DETAILS="details"
+readonly STATUS_PASS="pass"
+readonly PASS="PASS"
+readonly FAIL="FAIL"
+readonly INFO="INFO"
+readonly XFAIL="XFAIL"
+readonly SKIP="SKIP"
+readonly IFS_NAME="intel_ifs"
+readonly ALL="all"
+readonly SIBLINGS="siblings"
+
+# Matches arch/x86/include/asm/intel-family.h and
+# drivers/platform/x86/intel/ifs/core.c requirement as follows
+readonly SAPPHIRERAPIDS_X="8f"
+readonly EMERALDRAPIDS_X="cf"
+
+readonly INTEL_FAM6="06"
+
+LOOP_TIMES=3
+FML=""
+MODEL=""
+STEPPING=""
+CPU_FMS=""
+TRUE="true"
+FALSE="false"
+RESULT=$KSFT_PASS
+IMAGE_NAME=""
+INTERVAL_TIME=1
+OFFLINE_CPUS=""
+# For IFS cleanup tags
+ORIGIN_IFS_LOADED=""
+IFS_IMAGE_NEED_RESTORE=$FALSE
+IFS_LOG="/tmp/ifs_logs.$$"
+RANDOM_CPU=""
+DEFAULT_IMG_ID=""
+
+append_log()
+{
+ echo -e "$1" | tee -a "$IFS_LOG"
+}
+
+online_offline_cpu_list()
+{
+ local on_off=$1
+ local target_cpus=$2
+ local cpu=""
+ local cpu_start=""
+ local cpu_end=""
+ local i=""
+
+ if [[ -n "$target_cpus" ]]; then
+ for cpu in $(echo "$target_cpus" | tr ',' ' '); do
+ if [[ "$cpu" == *"-"* ]]; then
+ cpu_start=""
+ cpu_end=""
+ i=""
+ cpu_start=$(echo "$cpu" | cut -d "-" -f 1)
+ cpu_end=$(echo "$cpu" | cut -d "-" -f 2)
+ for((i=cpu_start;i<=cpu_end;i++)); do
+ append_log "[$INFO] echo $on_off > \
+${CPU_SYSFS}/cpu${i}/online"
+ echo "$on_off" > "$CPU_SYSFS"/cpu"$i"/online
+ done
+ else
+ set_target_cpu "$on_off" "$cpu"
+ fi
+ done
+ fi
+}
+
+ifs_scan_result_summary()
+{
+ local failed_info pass_num skip_num fail_num
+
+ if [[ -e "$IFS_LOG" ]]; then
+ failed_info=$(grep ^"\[${FAIL}\]" "$IFS_LOG")
+ fail_num=$(grep -c ^"\[${FAIL}\]" "$IFS_LOG")
+ skip_num=$(grep -c ^"\[${SKIP}\]" "$IFS_LOG")
+ pass_num=$(grep -c ^"\[${PASS}\]" "$IFS_LOG")
+
+ if [[ "$fail_num" -ne 0 ]]; then
+ RESULT=$KSFT_FAIL
+ echo "[$INFO] IFS test failure summary:"
+ echo "$failed_info"
+ elif [[ "$skip_num" -ne 0 ]]; then
+ RESULT=$KSFT_SKIP
+ fi
+ echo "[$INFO] IFS test pass:$pass_num, skip:$skip_num, fail:$fail_num"
+ else
+ echo "[$INFO] No file $IFS_LOG for IFS scan summary"
+ fi
+}
+
+ifs_cleanup()
+{
+ echo "[$INFO] Restore environment after IFS test"
+
+ # Restore ifs origin image if origin image backup step is needed
+ [[ "$IFS_IMAGE_NEED_RESTORE" == "$TRUE" ]] && {
+ mv -f "$IMG_PATH"/"$IMAGE_NAME"_origin "$IMG_PATH"/"$IMAGE_NAME"
+ }
+
+ # Restore the CPUs to the state before testing
+ [[ -z "$OFFLINE_CPUS" ]] || online_offline_cpu_list "0" "$OFFLINE_CPUS"
+
+ lsmod | grep -q "$IFS_NAME" && [[ "$ORIGIN_IFS_LOADED" == "$FALSE" ]] && {
+ echo "[$INFO] modprobe -r $IFS_NAME"
+ modprobe -r "$IFS_NAME"
+ }
+
+ ifs_scan_result_summary
+ [[ -e "$IFS_LOG" ]] && rm -rf "$IFS_LOG"
+
+ echo "[RESULT] IFS test exit with $RESULT"
+ exit "$RESULT"
+}
+
+do_cmd()
+{
+ local cmd=$*
+ local ret=""
+
+ append_log "[$INFO] $cmd"
+ eval "$cmd"
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ append_log "[$FAIL] $cmd failed. Return code is $ret"
+ RESULT=$KSFT_XFAIL
+ ifs_cleanup
+ fi
+}
+
+test_exit()
+{
+ local info=$1
+ RESULT=$2
+
+ declare -A EXIT_MAP
+ EXIT_MAP[$KSFT_PASS]=$PASS
+ EXIT_MAP[$KSFT_FAIL]=$FAIL
+ EXIT_MAP[$KSFT_XFAIL]=$XFAIL
+ EXIT_MAP[$KSFT_SKIP]=$SKIP
+
+ append_log "[${EXIT_MAP[$RESULT]}] $info"
+ ifs_cleanup
+}
+
+online_all_cpus()
+{
+ local off_cpus=""
+
+ OFFLINE_CPUS=$(cat "$CPU_OFFLINE_SYSFS")
+ online_offline_cpu_list "1" "$OFFLINE_CPUS"
+
+ off_cpus=$(cat "$CPU_OFFLINE_SYSFS")
+ if [[ -z "$off_cpus" ]]; then
+ append_log "[$INFO] All CPUs are online."
+ else
+ append_log "[$XFAIL] There is offline cpu:$off_cpus after online all cpu!"
+ RESULT=$KSFT_XFAIL
+ ifs_cleanup
+ fi
+}
+
+get_cpu_fms()
+{
+ FML=$(grep -m 1 "family" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ MODEL=$(grep -m 1 "model" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ STEPPING=$(grep -m 1 "stepping" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ CPU_FMS="${FML}-${MODEL}-${STEPPING}"
+}
+
+check_cpu_ifs_support_interval_time()
+{
+ get_cpu_fms
+
+ if [[ "$FML" != "$INTEL_FAM6" ]]; then
+ test_exit "CPU family:$FML does not support IFS" "$KSFT_SKIP"
+ fi
+
+ # Ucode has time interval requirement for IFS scan on same CPU as follows:
+ case $MODEL in
+ "$SAPPHIRERAPIDS_X")
+ INTERVAL_TIME=180;
+ ;;
+ "$EMERALDRAPIDS_X")
+ INTERVAL_TIME=30;
+ ;;
+ *)
+ # Set default interval time for other platforms
+ INTERVAL_TIME=1;
+ append_log "[$INFO] CPU FML:$FML model:0x$MODEL, default: 1s interval time"
+ ;;
+ esac
+}
+
+check_ifs_loaded()
+{
+ local ifs_info=""
+
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+ if [[ -z "$ifs_info" ]]; then
+ append_log "[$INFO] modprobe $IFS_NAME"
+ modprobe "$IFS_NAME" || {
+ test_exit "Check if CONFIG_INTEL_IFS is set to m or \
+platform doesn't support ifs" "$KSFT_SKIP"
+ }
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+ [[ -n "$ifs_info" ]] || test_exit "No ifs module listed by lsmod" "$KSFT_FAIL"
+ fi
+}
+
+test_ifs_scan_entry()
+{
+ local ifs_info=""
+
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+
+ if [[ -z "$ifs_info" ]]; then
+ ORIGIN_IFS_LOADED="$FALSE"
+ check_ifs_loaded
+ else
+ ORIGIN_IFS_LOADED="$TRUE"
+ append_log "[$INFO] Module $IFS_NAME is already loaded"
+ fi
+
+ if [[ -d "$IFS_SCAN_SYSFS_PATH" ]]; then
+ append_log "[$PASS] IFS sysfs $IFS_SCAN_SYSFS_PATH entry is created\n"
+ else
+ test_exit "No sysfs entry in $IFS_SCAN_SYSFS_PATH" "$KSFT_FAIL"
+ fi
+}
+
+load_image()
+{
+ local image_id=$1
+ local image_info=""
+ local ret=""
+
+ check_ifs_loaded
+ if [[ -e "${IMG_PATH}/${IMAGE_NAME}" ]]; then
+ append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch"
+ echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null
+ ret=$?
+ [[ "$ret" -eq 0 ]] || {
+ append_log "[$FAIL] Load ifs image $image_id failed with ret:$ret\n"
+ return "$ret"
+ }
+ image_info=$(cat ${IFS_SCAN_SYSFS_PATH}/current_batch)
+ if [[ "$image_info" == 0x"$image_id" ]]; then
+ append_log "[$PASS] load IFS current_batch:$image_info"
+ else
+ append_log "[$FAIL] current_batch:$image_info is not expected:$image_id"
+ return "$KSFT_FAIL"
+ fi
+ else
+ append_log "[$FAIL] No IFS image file ${IMG_PATH}/${IMAGE_NAME}"\
+ return "$KSFT_FAIL"
+ fi
+ return 0
+}
+
+test_load_origin_ifs_image()
+{
+ local image_id=$1
+
+ IMAGE_NAME="${CPU_FMS}-${image_id}.scan"
+
+ load_image "$image_id" || return $?
+ return 0
+}
+
+test_load_bad_ifs_image()
+{
+ local image_id=$1
+
+ IMAGE_NAME="${CPU_FMS}-${image_id}.scan"
+
+ do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME} ${IMG_PATH}/${IMAGE_NAME}_origin"
+
+ # Set IFS_IMAGE_NEED_RESTORE to true before corrupt the origin ifs image file
+ IFS_IMAGE_NEED_RESTORE=$TRUE
+ do_cmd "dd if=/dev/urandom of=${IMG_PATH}/${IMAGE_NAME} bs=1K count=6 2>/dev/null"
+
+ # Use the specified judgment for negative testing
+ append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch"
+ echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null
+ ret=$?
+ if [[ "$ret" -ne 0 ]]; then
+ append_log "[$PASS] Load invalid ifs image failed with ret:$ret not 0 as expected"
+ else
+ append_log "[$FAIL] Load invalid ifs image ret:$ret unexpectedly"
+ fi
+
+ do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME}_origin ${IMG_PATH}/${IMAGE_NAME}"
+ IFS_IMAGE_NEED_RESTORE=$FALSE
+}
+
+test_bad_and_origin_ifs_image()
+{
+ local image_id=$1
+
+ append_log "[$INFO] Test loading bad and then loading original IFS image:"
+ test_load_origin_ifs_image "$image_id" || return $?
+ test_load_bad_ifs_image "$image_id"
+ # Load origin image again and make sure it's worked
+ test_load_origin_ifs_image "$image_id" || return $?
+ append_log "[$INFO] Loading invalid IFS image and then loading initial image passed.\n"
+}
+
+ifs_test_cpu()
+{
+ local ifs_mode=$1
+ local cpu_num=$2
+ local image_id status details ret result result_info
+
+ echo "$cpu_num" > "$IFS_PATH"_"$ifs_mode"/"$RUN_TEST"
+ ret=$?
+
+ status=$(cat "${IFS_PATH}_${ifs_mode}/${STATUS}")
+ details=$(cat "${IFS_PATH}_${ifs_mode}/${DETAILS}")
+
+ if [[ "$ret" -eq 0 && "$status" == "$STATUS_PASS" ]]; then
+ result="$PASS"
+ else
+ result="$FAIL"
+ fi
+
+ cpu_num=$(cat "${CPU_SYSFS}/cpu${cpu_num}/topology/thread_siblings_list")
+
+ # There is no image file for IFS ARRAY BIST scan
+ if [[ -e "${IFS_PATH}_${ifs_mode}/current_batch" ]]; then
+ image_id=$(cat "${IFS_PATH}_${ifs_mode}/current_batch")
+ result_info=$(printf "[%s] ifs_%1d cpu(s):%s, current_batch:0x%02x, \
+ret:%2d, status:%s, details:0x%016x" \
+ "$result" "$ifs_mode" "$cpu_num" "$image_id" "$ret" \
+ "$status" "$details")
+ else
+ result_info=$(printf "[%s] ifs_%1d cpu(s):%s, ret:%2d, status:%s, details:0x%016x" \
+ "$result" "$ifs_mode" "$cpu_num" "$ret" "$status" "$details")
+ fi
+
+ append_log "$result_info"
+}
+
+ifs_test_cpus()
+{
+ local cpus_type=$1
+ local ifs_mode=$2
+ local image_id=$3
+ local cpu_max_num=""
+ local cpu_num=""
+
+ case "$cpus_type" in
+ "$ALL")
+ cpu_max_num=$(($(nproc) - 1))
+ cpus=$(seq 0 $cpu_max_num)
+ ;;
+ "$SIBLINGS")
+ cpus=$(cat ${CPU_SYSFS}/cpu*/topology/thread_siblings_list \
+ | sed -e 's/,.*//' \
+ | sed -e 's/-.*//' \
+ | sort -n \
+ | uniq)
+ ;;
+ *)
+ test_exit "Invalid cpus_type:$cpus_type" "$KSFT_XFAIL"
+ ;;
+ esac
+
+ for cpu_num in $cpus; do
+ ifs_test_cpu "$ifs_mode" "$cpu_num"
+ done
+
+ if [[ -z "$image_id" ]]; then
+ append_log "[$INFO] ifs_$ifs_mode test $cpus_type cpus completed\n"
+ else
+ append_log "[$INFO] ifs_$ifs_mode $cpus_type cpus with $CPU_FMS-$image_id.scan \
+completed\n"
+ fi
+}
+
+test_ifs_same_cpu_loop()
+{
+ local ifs_mode=$1
+ local cpu_num=$2
+ local loop_times=$3
+
+ append_log "[$INFO] Test ifs mode $ifs_mode on CPU:$cpu_num for $loop_times rounds:"
+ [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]] && {
+ load_image "$DEFAULT_IMG_ID" || return $?
+ }
+ for (( i=1; i<=loop_times; i++ )); do
+ append_log "[$INFO] Loop iteration: $i in total of $loop_times"
+ # Only IFS scan needs the interval time
+ if [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]]; then
+ do_cmd "sleep $INTERVAL_TIME"
+ elif [[ "$ifs_mode" == "$IFS_ARRAY_BIST_SCAN_MODE" ]]; then
+ true
+ else
+ test_exit "Invalid ifs_mode:$ifs_mode" "$KSFT_XFAIL"
+ fi
+
+ ifs_test_cpu "$ifs_mode" "$cpu_num"
+ done
+ append_log "[$INFO] $loop_times rounds of ifs_$ifs_mode test on CPU:$cpu_num completed.\n"
+}
+
+test_ifs_scan_available_imgs()
+{
+ local image_ids=""
+ local image_id=""
+
+ append_log "[$INFO] Test ifs scan with available images:"
+ image_ids=$(find "$IMG_PATH" -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \
+ 2>/dev/null \
+ | sort \
+ | awk -F "-" '{print $NF}' \
+ | cut -d "." -f 1)
+
+ for image_id in $image_ids; do
+ load_image "$image_id" || return $?
+
+ ifs_test_cpus "$SIBLINGS" "$IFS_SCAN_MODE" "$image_id"
+ # IFS scan requires time interval for the scan on the same CPU
+ do_cmd "sleep $INTERVAL_TIME"
+ done
+}
+
+prepare_ifs_test_env()
+{
+ local max_cpu=""
+
+ check_cpu_ifs_support_interval_time
+
+ online_all_cpus
+ max_cpu=$(($(nproc) - 1))
+ RANDOM_CPU=$(shuf -i 0-$max_cpu -n 1)
+
+ DEFAULT_IMG_ID=$(find $IMG_PATH -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \
+ 2>/dev/null \
+ | sort \
+ | head -n 1 \
+ | awk -F "-" '{print $NF}' \
+ | cut -d "." -f 1)
+}
+
+test_ifs()
+{
+ prepare_ifs_test_env
+
+ test_ifs_scan_entry
+
+ if [[ -z "$DEFAULT_IMG_ID" ]]; then
+ append_log "[$SKIP] No proper ${IMG_PATH}/${CPU_FMS}-*.scan, skip ifs_0 scan"
+ else
+ test_bad_and_origin_ifs_image "$DEFAULT_IMG_ID"
+ test_ifs_scan_available_imgs
+ test_ifs_same_cpu_loop "$IFS_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES"
+ fi
+
+ if [[ -d "$IFS_ARRAY_BIST_SYSFS_PATH" ]]; then
+ ifs_test_cpus "$SIBLINGS" "$IFS_ARRAY_BIST_SCAN_MODE"
+ test_ifs_same_cpu_loop "$IFS_ARRAY_BIST_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES"
+ else
+ append_log "[$SKIP] No $IFS_ARRAY_BIST_SYSFS_PATH, skip IFS ARRAY BIST scan"
+ fi
+}
+
+trap ifs_cleanup SIGTERM SIGINT
+test_ifs
+ifs_cleanup
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
new file mode 100644
index 000000000000..755d164384c4
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
@@ -0,0 +1,20 @@
+include ../../../../../build/Build.include
+
+UNAME_M := $(shell uname -m)
+
+ifneq ($(UNAME_M),s390x)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+TEST_GEN_PROGS := test_uvdevice
+
+top_srcdir ?= ../../../../../..
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+
+CFLAGS += -Wall -Werror -static $(KHDR_INCLUDES) -I$(LINUX_TOOL_ARCH_INCLUDE)
+
+include ../../../lib.mk
+
+endif
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/config b/tools/testing/selftests/drivers/s390x/uvdevice/config
new file mode 100644
index 000000000000..f28a04b99eff
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/config
@@ -0,0 +1 @@
+CONFIG_S390_UV_UAPI=y
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
new file mode 100644
index 000000000000..14df9aa07308
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for the Ultravisor UAPI device
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Steffen Eiden <seiden@linux.ibm.com>
+ */
+
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <asm/uvdevice.h>
+
+#include "kselftest_harness.h"
+
+#define UV_PATH "/dev/uv"
+#define BUFFER_SIZE 0x200
+FIXTURE(uvio_fixture) {
+ int uv_fd;
+ struct uvio_ioctl_cb uvio_ioctl;
+ uint8_t buffer[BUFFER_SIZE];
+ __u64 fault_page;
+};
+
+FIXTURE_VARIANT(uvio_fixture) {
+ unsigned long ioctl_cmd;
+ uint32_t arg_size;
+};
+
+FIXTURE_VARIANT_ADD(uvio_fixture, att) {
+ .ioctl_cmd = UVIO_IOCTL_ATT,
+ .arg_size = sizeof(struct uvio_attest),
+};
+
+FIXTURE_SETUP(uvio_fixture)
+{
+ self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+ self->uvio_ioctl.argument_addr = (__u64)self->buffer;
+ self->uvio_ioctl.argument_len = variant->arg_size;
+ self->fault_page =
+ (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(uvio_fixture)
+{
+ if (self->uv_fd)
+ close(self->uv_fd);
+ munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+TEST_F(uvio_fixture, fault_ioctl_arg)
+{
+ int rc, errno_cache;
+
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, NULL);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, self->fault_page);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+}
+
+TEST_F(uvio_fixture, fault_uvio_arg)
+{
+ int rc, errno_cache;
+
+ self->uvio_ioctl.argument_addr = 0;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+
+ self->uvio_ioctl.argument_addr = self->fault_page;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+}
+
+/*
+ * Test to verify that IOCTLs with invalid values in the ioctl_control block
+ * are rejected.
+ */
+TEST_F(uvio_fixture, inval_ioctl_cb)
+{
+ int rc, errno_cache;
+
+ self->uvio_ioctl.argument_len = 0;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ self->uvio_ioctl.argument_len = (uint32_t)-1;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ self->uvio_ioctl.argument_len = variant->arg_size;
+
+ self->uvio_ioctl.flags = (uint32_t)-1;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ self->uvio_ioctl.flags = 0;
+
+ memset(self->uvio_ioctl.reserved14, 0xff, sizeof(self->uvio_ioctl.reserved14));
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ memset(&self->uvio_ioctl, 0x11, sizeof(self->uvio_ioctl));
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ ASSERT_EQ(rc, -1);
+}
+
+TEST_F(uvio_fixture, inval_ioctl_cmd)
+{
+ int rc, errno_cache;
+ uint8_t nr = _IOC_NR(variant->ioctl_cmd);
+ unsigned long cmds[] = {
+ _IOWR('a', nr, struct uvio_ioctl_cb),
+ _IOWR(UVIO_TYPE_UVC, nr, int),
+ _IO(UVIO_TYPE_UVC, nr),
+ _IOR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+ _IOW(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(cmds); i++) {
+ rc = ioctl(self->uv_fd, cmds[i], &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, ENOTTY);
+ }
+}
+
+struct test_attest_buffer {
+ uint8_t arcb[0x180];
+ uint8_t meas[64];
+ uint8_t add[32];
+};
+
+FIXTURE(attest_fixture) {
+ int uv_fd;
+ struct uvio_ioctl_cb uvio_ioctl;
+ struct uvio_attest uvio_attest;
+ struct test_attest_buffer attest_buffer;
+ __u64 fault_page;
+};
+
+FIXTURE_SETUP(attest_fixture)
+{
+ self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+ self->uvio_ioctl.argument_addr = (__u64)&self->uvio_attest;
+ self->uvio_ioctl.argument_len = sizeof(self->uvio_attest);
+
+ self->uvio_attest.arcb_addr = (__u64)&self->attest_buffer.arcb;
+ self->uvio_attest.arcb_len = sizeof(self->attest_buffer.arcb);
+
+ self->uvio_attest.meas_addr = (__u64)&self->attest_buffer.meas;
+ self->uvio_attest.meas_len = sizeof(self->attest_buffer.meas);
+
+ self->uvio_attest.add_data_addr = (__u64)&self->attest_buffer.add;
+ self->uvio_attest.add_data_len = sizeof(self->attest_buffer.add);
+ self->fault_page =
+ (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(attest_fixture)
+{
+ if (self->uv_fd)
+ close(self->uv_fd);
+ munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+static void att_inval_sizes_test(uint32_t *size, uint32_t max_size, bool test_zero,
+ struct __test_metadata *_metadata,
+ FIXTURE_DATA(attest_fixture) *self)
+{
+ int rc, errno_cache;
+ uint32_t tmp = *size;
+
+ if (test_zero) {
+ *size = 0;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ }
+ *size = max_size + 1;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ *size = tmp;
+}
+
+/*
+ * Test to verify that attestation IOCTLs with invalid values in the UVIO
+ * attestation control block are rejected.
+ */
+TEST_F(attest_fixture, att_inval_request)
+{
+ int rc, errno_cache;
+
+ att_inval_sizes_test(&self->uvio_attest.add_data_len, UVIO_ATT_ADDITIONAL_MAX_LEN,
+ false, _metadata, self);
+ att_inval_sizes_test(&self->uvio_attest.meas_len, UVIO_ATT_MEASUREMENT_MAX_LEN,
+ true, _metadata, self);
+ att_inval_sizes_test(&self->uvio_attest.arcb_len, UVIO_ATT_ARCB_MAX_LEN,
+ true, _metadata, self);
+
+ self->uvio_attest.reserved136 = (uint16_t)-1;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ memset(&self->uvio_attest, 0x11, sizeof(self->uvio_attest));
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ ASSERT_EQ(rc, -1);
+}
+
+static void att_inval_addr_test(__u64 *addr, struct __test_metadata *_metadata,
+ FIXTURE_DATA(attest_fixture) *self)
+{
+ int rc, errno_cache;
+ __u64 tmp = *addr;
+
+ *addr = 0;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+ *addr = self->fault_page;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+ *addr = tmp;
+}
+
+TEST_F(attest_fixture, att_inval_addr)
+{
+ att_inval_addr_test(&self->uvio_attest.arcb_addr, _metadata, self);
+ att_inval_addr_test(&self->uvio_attest.add_data_addr, _metadata, self);
+ att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self);
+}
+
+int main(int argc, char **argv)
+{
+ int fd = open(UV_PATH, O_ACCMODE);
+
+ if (fd < 0)
+ ksft_exit_skip("No uv-device or cannot access " UV_PATH "\n"
+ "Enable CONFIG_S390_UV_UAPI and check the access rights on "
+ UV_PATH ".\n");
+ close(fd);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi.sh b/tools/testing/selftests/drivers/sdsi/sdsi.sh
new file mode 100755
index 000000000000..9b84b9b82b49
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the intel_sdsi driver
+
+if ! command -v python3 > /dev/null 2>&1; then
+ echo "drivers/sdsi: [SKIP] python3 not installed"
+ exit 77
+fi
+
+if ! python3 -c "import pytest" > /dev/null 2>&1; then
+ echo "drivers/sdsi: [SKIP] pytest module not installed"
+ exit 77
+fi
+
+if ! /sbin/modprobe -q -r intel_sdsi; then
+ echo "drivers/sdsi: [SKIP]"
+ exit 77
+fi
+
+if /sbin/modprobe -q intel_sdsi && python3 -m pytest sdsi_test.py; then
+ echo "drivers/sdsi: [OK]"
+else
+ echo "drivers/sdsi: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi_test.py b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
new file mode 100644
index 000000000000..5efb29feee70
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from struct import pack
+from time import sleep
+
+import errno
+import glob
+import os
+import subprocess
+
+try:
+ import pytest
+except ImportError:
+ print("Unable to import pytest python module.")
+ print("\nIf not already installed, you may do so with:")
+ print("\t\tpip3 install pytest")
+ exit(1)
+
+SOCKETS = glob.glob('/sys/bus/auxiliary/devices/intel_vsec.sdsi.*')
+NUM_SOCKETS = len(SOCKETS)
+
+MODULE_NAME = 'intel_sdsi'
+DEV_PREFIX = 'intel_vsec.sdsi'
+CLASS_DIR = '/sys/bus/auxiliary/devices'
+GUID = "0x6dd191"
+
+def read_bin_file(file):
+ with open(file, mode='rb') as f:
+ content = f.read()
+ return content
+
+def get_dev_file_path(socket, file):
+ return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/' + file
+
+def kmemleak_enabled():
+ kmemleak = "/sys/kernel/debug/kmemleak"
+ return os.path.isfile(kmemleak)
+
+class TestSDSiDriver:
+ def test_driver_loaded(self):
+ lsmod_p = subprocess.Popen(('lsmod'), stdout=subprocess.PIPE)
+ result = subprocess.check_output(('grep', '-q', MODULE_NAME), stdin=lsmod_p.stdout)
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiFilesClass:
+
+ def read_value(self, file):
+ f = open(file, "r")
+ value = f.read().strip("\n")
+ return value
+
+ def get_dev_folder(self, socket):
+ return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/'
+
+ def test_sysfs_files_exist(self, socket):
+ folder = self.get_dev_folder(socket)
+ print (folder)
+ assert os.path.isfile(folder + "guid") == True
+ assert os.path.isfile(folder + "provision_akc") == True
+ assert os.path.isfile(folder + "provision_cap") == True
+ assert os.path.isfile(folder + "state_certificate") == True
+ assert os.path.isfile(folder + "registers") == True
+
+ def test_sysfs_file_permissions(self, socket):
+ folder = self.get_dev_folder(socket)
+ mode = os.stat(folder + "guid").st_mode & 0o777
+ assert mode == 0o444 # Read all
+ mode = os.stat(folder + "registers").st_mode & 0o777
+ assert mode == 0o400 # Read owner
+ mode = os.stat(folder + "provision_akc").st_mode & 0o777
+ assert mode == 0o200 # Read owner
+ mode = os.stat(folder + "provision_cap").st_mode & 0o777
+ assert mode == 0o200 # Read owner
+ mode = os.stat(folder + "state_certificate").st_mode & 0o777
+ assert mode == 0o400 # Read owner
+
+ def test_sysfs_file_ownership(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ st = os.stat(folder + "guid")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "registers")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "provision_akc")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "provision_cap")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "state_certificate")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ def test_sysfs_file_sizes(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ if self.read_value(folder + "guid") == GUID:
+ st = os.stat(folder + "registers")
+ assert st.st_size == 72
+
+ st = os.stat(folder + "provision_akc")
+ assert st.st_size == 1024
+
+ st = os.stat(folder + "provision_cap")
+ assert st.st_size == 1024
+
+ st = os.stat(folder + "state_certificate")
+ assert st.st_size == 4096
+
+ def test_no_seek_allowed(self, socket):
+ folder = self.get_dev_folder(socket)
+ rand_file = bytes(os.urandom(8))
+
+ f = open(folder + "provision_cap", "wb", 0)
+ f.seek(1)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ESPIPE
+ f.close()
+
+ f = open(folder + "provision_akc", "wb", 0)
+ f.seek(1)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ESPIPE
+ f.close()
+
+ def test_registers_seek(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ # Check that the value read from an offset of the entire
+ # file is none-zero and the same as the value read
+ # from seeking to the same location
+ f = open(folder + "registers", "rb")
+ data = f.read()
+ f.seek(64)
+ id = f.read()
+ assert id != bytes(0)
+ assert data[64:] == id
+ f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiMailboxCmdsClass:
+ def test_provision_akc_eoverflow_1017_bytes(self, socket):
+
+ # The buffer for writes is 1k, of with 8 bytes must be
+ # reserved for the command, leaving 1016 bytes max.
+ # Check that we get an overflow error for 1017 bytes.
+ node = get_dev_file_path(socket, "provision_akc")
+ rand_file = bytes(os.urandom(1017))
+
+ f = open(node, 'wb', 0)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.EOVERFLOW
+ f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSdsiDriverLocksClass:
+ def test_enodev_when_pci_device_removed(self, socket):
+ node = get_dev_file_path(socket, "provision_akc")
+ dev_name = DEV_PREFIX + '.' + str(socket)
+ driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+ rand_file = bytes(os.urandom(8))
+
+ f = open(node, 'wb', 0)
+ g = open(node, 'wb', 0)
+
+ with open(driver_dir + 'unbind', 'w') as k:
+ print(dev_name, file = k)
+
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ENODEV
+
+ with pytest.raises(OSError) as error:
+ g.write(rand_file)
+ assert error.value.errno == errno.ENODEV
+
+ f.close()
+ g.close()
+
+ # Short wait needed to allow file to close before pulling driver
+ sleep(1)
+
+ p = subprocess.Popen(('modprobe', '-r', 'intel_sdsi'))
+ p.wait()
+ p = subprocess.Popen(('modprobe', '-r', 'intel_vsec'))
+ p.wait()
+ p = subprocess.Popen(('modprobe', 'intel_vsec'))
+ p.wait()
+
+ # Short wait needed to allow driver time to get inserted
+ # before continuing tests
+ sleep(1)
+
+ def test_memory_leak(self, socket):
+ if not kmemleak_enabled():
+ pytest.skip("kmemleak not enabled in kernel")
+
+ dev_name = DEV_PREFIX + '.' + str(socket)
+ driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+
+ with open(driver_dir + 'unbind', 'w') as k:
+ print(dev_name, file = k)
+
+ sleep(1)
+
+ subprocess.check_output(('modprobe', '-r', 'intel_sdsi'))
+ subprocess.check_output(('modprobe', '-r', 'intel_vsec'))
+
+ with open('/sys/kernel/debug/kmemleak', 'w') as f:
+ print('scan', file = f)
+ sleep(5)
+
+ assert os.stat('/sys/kernel/debug/kmemleak').st_size == 0
+
+ subprocess.check_output(('modprobe', 'intel_vsec'))
+ sleep(1)
diff --git a/tools/testing/selftests/dt/.gitignore b/tools/testing/selftests/dt/.gitignore
new file mode 100644
index 000000000000..f6476c9f2884
--- /dev/null
+++ b/tools/testing/selftests/dt/.gitignore
@@ -0,0 +1 @@
+compatible_list
diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile
new file mode 100644
index 000000000000..2d33ee9e9b71
--- /dev/null
+++ b/tools/testing/selftests/dt/Makefile
@@ -0,0 +1,21 @@
+PY3 = $(shell which python3 2>/dev/null)
+
+ifneq ($(PY3),)
+
+TEST_PROGS := test_unprobed_devices.sh
+TEST_GEN_FILES := compatible_list
+TEST_FILES := compatible_ignore_list
+
+include ../lib.mk
+
+$(OUTPUT)/compatible_list:
+ $(top_srcdir)/scripts/dtc/dt-extract-compatibles -d $(top_srcdir) > $@
+
+else
+
+all: no_py3_warning
+
+no_py3_warning:
+ @echo "Missing python3. This test will be skipped."
+
+endif
diff --git a/tools/testing/selftests/dt/compatible_ignore_list b/tools/testing/selftests/dt/compatible_ignore_list
new file mode 100644
index 000000000000..1323903feca9
--- /dev/null
+++ b/tools/testing/selftests/dt/compatible_ignore_list
@@ -0,0 +1 @@
+simple-mfd
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh
new file mode 100755
index 000000000000..5e3f42ef249e
--- /dev/null
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Based on Frank Rowand's dt_stat script.
+#
+# This script tests for devices that were declared on the Devicetree and are
+# expected to bind to a driver, but didn't.
+#
+# To achieve this, two lists are used:
+# * a list of the compatibles that can be matched by a Devicetree node
+# * a list of compatibles that should be ignored
+#
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
+PDT=/proc/device-tree/
+COMPAT_LIST="${DIR}"/compatible_list
+IGNORE_LIST="${DIR}"/compatible_ignore_list
+
+ktap_print_header
+
+if [[ ! -d "${PDT}" ]]; then
+ ktap_skip_all "${PDT} doesn't exist."
+ exit "${KSFT_SKIP}"
+fi
+
+nodes_compatible=$(
+ for node in $(find ${PDT} -type d); do
+ [ ! -f "${node}"/compatible ] && continue
+ # Check if node is available
+ if [[ -e "${node}"/status ]]; then
+ status=$(tr -d '\000' < "${node}"/status)
+ if [[ "${status}" != "okay" && "${status}" != "ok" ]]; then
+ if [ -n "${disabled_nodes_regex}" ]; then
+ disabled_nodes_regex="${disabled_nodes_regex}|${node}"
+ else
+ disabled_nodes_regex="${node}"
+ fi
+ continue
+ fi
+ fi
+
+ # Ignore this node if one of its ancestors was disabled
+ if [ -n "${disabled_nodes_regex}" ]; then
+ echo "${node}" | grep -q -E "${disabled_nodes_regex}" && continue
+ fi
+
+ echo "${node}" | sed -e 's|\/proc\/device-tree||'
+ done | sort
+ )
+
+nodes_dev_bound=$(
+ IFS=$'\n'
+ for dev_dir in $(find /sys/devices -type d); do
+ [ ! -f "${dev_dir}"/uevent ] && continue
+ [ ! -d "${dev_dir}"/driver ] && continue
+
+ grep '^OF_FULLNAME=' "${dev_dir}"/uevent | sed -e 's|OF_FULLNAME=||'
+ done
+ )
+
+num_tests=$(echo ${nodes_compatible} | wc -w)
+ktap_set_plan "${num_tests}"
+
+retval="${KSFT_PASS}"
+for node in ${nodes_compatible}; do
+ if ! echo "${nodes_dev_bound}" | grep -E -q "(^| )${node}( |\$)"; then
+ compatibles=$(tr '\000' '\n' < "${PDT}"/"${node}"/compatible)
+
+ for compatible in ${compatibles}; do
+ if grep -x -q "${compatible}" "${IGNORE_LIST}"; then
+ continue
+ fi
+
+ if grep -x -q "${compatible}" "${COMPAT_LIST}"; then
+ ktap_test_fail "${node}"
+ retval="${KSFT_FAIL}"
+ continue 2
+ fi
+ done
+ ktap_test_skip "${node}"
+ else
+ ktap_test_pass "${node}"
+ fi
+
+done
+
+ktap_print_totals
+exit "${retval}"
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
index 9674a19396a3..7bc7af4eb2c1 100644
--- a/tools/testing/selftests/efivarfs/create-read.c
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -32,8 +32,10 @@ int main(int argc, char **argv)
rc = read(fd, buf, sizeof(buf));
if (rc != 0) {
fprintf(stderr, "Reading a new var should return EOF\n");
+ close(fd);
return EXIT_FAILURE;
}
+ close(fd);
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index a90f394f9aa9..c62544b966ae 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -76,18 +76,26 @@ test_create_empty()
: > $file
- if [ ! -e $file ]; then
- echo "$file can not be created without writing" >&2
+ if [ -e $file ]; then
+ echo "$file can be created without writing" >&2
+ file_cleanup $file
exit 1
fi
- file_cleanup $file
}
test_create_read()
{
local file=$efivarfs_mount/$FUNCNAME-$test_guid
./create-read $file
- file_cleanup $file
+ if [ $? -ne 0 ]; then
+ echo "create and read $file failed"
+ exit 1
+ fi
+ if [ -e $file ]; then
+ echo "file still exists and should not"
+ file_cleanup $file
+ exit 1
+ fi
}
test_delete()
@@ -197,6 +205,158 @@ test_invalid_filenames()
exit $ret
}
+test_no_set_size()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ local ret=0
+
+ printf "$attrs\x00" > $file
+ [ -e $file -a -s $file ] || exit 1
+ chattr -i $file
+ : > $file
+ if [ $? != 0 ]; then
+ echo "variable file failed to accept truncation"
+ ret=1
+ elif [ -e $file -a ! -s $file ]; then
+ echo "file can be truncated to zero size"
+ ret=1
+ fi
+ rm $file || exit 1
+
+ exit $ret
+}
+
+setup_test_multiple()
+{
+ ##
+ # we're going to do multi-threaded tests, so create a set of
+ # pipes for synchronization. We use pipes 1..3 to start the
+ # stalled shell job and pipes 4..6 as indicators that the job
+ # has started. If you need more than 3 jobs the two +3's below
+ # need increasing
+ ##
+
+ declare -ag p
+
+ # empty is because arrays number from 0 but jobs number from 1
+ p[0]=""
+
+ for f in 1 2 3 4 5 6; do
+ p[$f]=/tmp/efivarfs_pipe${f}
+ mknod ${p[$f]} p
+ done
+
+ declare -g var=$efivarfs_mount/test_multiple-$test_guid
+
+ cleanup() {
+ for f in ${p[@]}; do
+ rm -f ${f}
+ done
+ if [ -e $var ]; then
+ file_cleanup $var
+ fi
+ }
+ trap cleanup exit
+
+ waitstart() {
+ cat ${p[$[$1+3]]} > /dev/null
+ }
+
+ waitpipe() {
+ echo 1 > ${p[$[$1+3]]}
+ cat ${p[$1]} > /dev/null
+ }
+
+ endjob() {
+ echo 1 > ${p[$1]}
+ wait -n %$1
+ }
+}
+
+test_multiple_zero_size()
+{
+ ##
+ # check for remove on last close, set up three threads all
+ # holding the variable (one write and two reads) and then
+ # close them sequentially (waiting for completion) and check
+ # the state of the variable
+ ##
+
+ { waitpipe 1; echo 1; } > $var 2> /dev/null &
+ waitstart 1
+ # zero length file should exist
+ [ -e $var ] || exit 1
+ # second and third delayed close
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close first fd
+ endjob 1
+ # var should only be deleted on last close
+ [ -e $var ] || exit 1
+ # close second fd
+ endjob 2
+ [ -e $var ] || exit 1
+ # file should go on last close
+ endjob 3
+ [ ! -e $var ] || exit 1
+}
+
+test_multiple_create()
+{
+ ##
+ # set multiple threads to access the variable but delay
+ # the final write to check the close of 2 and 3. The
+ # final write should succeed in creating the variable
+ ##
+ { waitpipe 1; printf '\x07\x00\x00\x00\x54'; } > $var &
+ waitstart 1
+ [ -e $var -a ! -s $var ] || exit 1
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close second and third fds
+ endjob 2
+ # var should only be created (have size) on last close
+ [ -e $var -a ! -s $var ] || exit 1
+ endjob 3
+ [ -e $var -a ! -s $var ] || exit 1
+ # close first fd
+ endjob 1
+ # variable should still exist
+ [ -s $var ] || exit 1
+ file_cleanup $var
+}
+
+test_multiple_delete_on_write() {
+ ##
+ # delete the variable on final write; seqencing similar
+ # to test_multiple_create()
+ ##
+ printf '\x07\x00\x00\x00\x54' > $var
+ chattr -i $var
+ { waitpipe 1; printf '\x07\x00\x00\x00'; } > $var &
+ waitstart 1
+ [ -e $var -a -s $var ] || exit 1
+ { waitpipe 2; } < $var &
+ waitstart 2
+ { waitpipe 3; } < $var &
+ waitstart 3
+ # close first fd; write should set variable size to zero
+ endjob 1
+ # var should only be deleted on last close
+ [ -e $var -a ! -s $var ] || exit 1
+ endjob 2
+ [ -e $var ] || exit 1
+ # close last fd
+ endjob 3
+ # variable should now be removed
+ [ ! -e $var ] || exit 1
+}
+
check_prereqs
rc=0
@@ -209,5 +369,10 @@ run_test test_zero_size_delete
run_test test_open_unlink
run_test test_valid_filenames
run_test test_invalid_filenames
+run_test test_no_set_size
+setup_test_multiple
+run_test test_multiple_zero_size
+run_test test_multiple_create
+run_test test_multiple_delete_on_write
exit $rc
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 9e2f00343f15..7f3d1ae762ec 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,8 +7,15 @@ execveat.moved
execveat.path.ephemeral
execveat.ephemeral
execveat.denatured
-/load_address_*
+non-regular
+null-argv
+/check-exec
+/false
+/inc
+/load_address.*
+!load_address.c
/recursion-depth
+/set-exec
xxxxxxxx*
pipe
S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index cf69b2fcce59..45a3cfc435cf 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,25 +1,37 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS = -Wall
CFLAGS += -Wno-nonnull
-CFLAGS += -D_GNU_SOURCE
+CFLAGS += $(KHDR_INCLUDES)
-TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
+LDLIBS += -lcap
+
+ALIGNS := 0x1000 0x200000 0x1000000
+ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS))
+ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS))
+ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES)
+
+TEST_PROGS := binfmt_script.py check-exec-tests.sh
+TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS)
+TEST_GEN_PROGS_EXTENDED := false inc set-exec script-exec.inc script-noexec.inc
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
+TEST_GEN_PROGS += null-argv
+TEST_GEN_PROGS += check-exec
EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
$(OUTPUT)/S_I*.test
include ../lib.mk
+CHECK_EXEC_SAMPLES := $(top_srcdir)/samples/check-exec
+
$(OUTPUT)/subdir:
mkdir -p $@
-$(OUTPUT)/script:
- echo '#!/bin/sh' > $@
+$(OUTPUT)/script: Makefile
+ echo '#!/bin/bash' > $@
echo 'exit $$*' >> $@
chmod +x $@
$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
@@ -27,9 +39,19 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-$(OUTPUT)/load_address_4096: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
-$(OUTPUT)/load_address_2097152: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
-$(OUTPUT)/load_address_16777216: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
+$(OUTPUT)/load_address.0x%: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
+ -fPIE -pie $< -o $@
+$(OUTPUT)/load_address.static.0x%: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
+ -fPIE -static-pie $< -o $@
+$(OUTPUT)/false: false.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@
+$(OUTPUT)/inc: $(CHECK_EXEC_SAMPLES)/inc.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/set-exec: $(CHECK_EXEC_SAMPLES)/set-exec.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/script-exec.inc: $(CHECK_EXEC_SAMPLES)/script-exec.inc
+ cp $< $@
+$(OUTPUT)/script-noexec.inc: $(CHECK_EXEC_SAMPLES)/script-noexec.inc
+ cp $< $@
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script.py
index 05f94a741c7a..2c575a2c0eab 100755
--- a/tools/testing/selftests/exec/binfmt_script
+++ b/tools/testing/selftests/exec/binfmt_script.py
@@ -16,6 +16,8 @@ SIZE=256
NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
test_num=0
+pass_num=0
+fail_num=0
code='''#!/usr/bin/perl
print "Executed interpreter! Args:\n";
@@ -42,7 +44,7 @@ foreach my $a (@ARGV) {
# ...
def test(name, size, good=True, leading="", root="./", target="/perl",
fill="A", arg="", newline="\n", hashbang="#!"):
- global test_num, tests, NAME_MAX
+ global test_num, pass_num, fail_num, tests, NAME_MAX
test_num += 1
if test_num > tests:
raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
@@ -80,16 +82,20 @@ def test(name, size, good=True, leading="", root="./", target="/perl",
if good:
print("ok %d - binfmt_script %s (successful good exec)"
% (test_num, name))
+ pass_num += 1
else:
print("not ok %d - binfmt_script %s succeeded when it should have failed"
% (test_num, name))
+ fail_num = 1
else:
if good:
print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
% (test_num, name, proc.returncode))
+ fail_num = 1
else:
print("ok %d - binfmt_script %s (correctly failed bad exec)"
% (test_num, name))
+ pass_num += 1
# Clean up crazy binaries
os.unlink(script)
@@ -166,6 +172,8 @@ test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
test(name="two-under-leading", size=int(SIZE/2), leading=" ")
test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num))
+
if test_num != tests:
raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
% (test_num, tests))
diff --git a/tools/testing/selftests/exec/check-exec-tests.sh b/tools/testing/selftests/exec/check-exec-tests.sh
new file mode 100755
index 000000000000..87102906ae3c
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec-tests.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the "inc" interpreter.
+#
+# See include/uapi/linux/securebits.h, include/uapi/linux/fcntl.h and
+# samples/check-exec/inc.c
+#
+# Copyright © 2024 Microsoft Corporation
+
+set -u -e -o pipefail
+
+EXPECTED_OUTPUT="1"
+exec 2>/dev/null
+
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
+exec_direct() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Updates PATH for `env` to execute the `inc` interpreter.
+ out="$(PATH="." "$@" "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for direct file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for direct file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_indirect() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Script passed as argument.
+ out="$("$@" ./inc "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for indirect file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for indirect file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_stdin_reg() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Executing stdin must be allowed if the related file is executable.
+ out="$("$@" ./inc -i < "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for stdin regular file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for stdin regular file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_stdin_pipe() {
+ local expect="$1"
+ shift
+ local ret=0
+ local out
+
+ # A pipe is not executable.
+ out="$(cat script-exec.inc | "$@" ./inc -i)" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for stdin pipe execution: ${ret}"
+ return 1
+ fi
+}
+
+exec_argument() {
+ local expect="$1"
+ local ret=0
+ shift
+ local out
+
+ # Script not coming from a file must not be executed.
+ out="$("$@" ./inc -c "$(< script-exec.inc)")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for arbitrary argument execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for arbitrary argument execution: ${out}"
+ return 1
+ fi
+}
+
+exec_interactive() {
+ exec_stdin_pipe "$@"
+ exec_argument "$@"
+}
+
+ktap_test() {
+ ktap_test_result "$*" "$@"
+}
+
+ktap_print_header
+ktap_set_plan 28
+
+# Without secbit configuration, nothing is changed.
+
+ktap_print_msg "By default, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc
+ktap_test exec_indirect 0 script-exec.inc
+
+ktap_print_msg "By default, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc
+
+ktap_print_msg "By default, non-executable scripts are allowed to be interpreted, but not directly executed."
+# We get 126 because of direct execution by Bash.
+ktap_test exec_direct 126 script-noexec.inc
+ktap_test exec_indirect 0 script-noexec.inc
+
+ktap_print_msg "By default, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc
+
+ktap_print_msg "By default, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0
+
+# With only file restriction: protect non-malicious users from inadvertent errors (e.g. python ~/Downloads/*.py).
+
+ktap_print_msg "With -f, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -f --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable scripts are not allowed to be executed nor interpreted."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -f --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0 ./set-exec -f --
+
+# With only denied interactive commands: check or monitor script content (e.g. with LSM).
+
+ktap_print_msg "With -i, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable scripts are allowed to be interpreted, but not directly executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -i --
+
+# With both file restriction and denied interactive commands: only allow executable scripts.
+
+ktap_print_msg "With -fi, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -fi --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable scripts are not allowed to be interpreted nor executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -fi --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -fi --
+
+ktap_finished
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
new file mode 100644
index 000000000000..f2397e75aa7c
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test execveat(2) with AT_EXECVE_CHECK, and prctl(2) with
+ * SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their locked
+ * counterparts.
+ *
+ * Copyright © 2018-2020 ANSSI
+ * Copyright © 2024 Microsoft Corporation
+ *
+ * Author: Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <asm-generic/unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
+#include "kselftest_harness.h"
+
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+ char *const envp[], int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
+static void drop_privileges(struct __test_metadata *const _metadata)
+{
+ const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+ cap_t cap_p;
+
+ if ((cap_get_secbits() & noroot) != noroot)
+ EXPECT_EQ(0, cap_set_secbits(noroot));
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_clear(cap_p));
+
+ /*
+ * Drops everything, especially CAP_SETPCAP, CAP_DAC_OVERRIDE, and
+ * CAP_DAC_READ_SEARCH.
+ */
+ EXPECT_NE(-1, cap_set_proc(cap_p));
+ EXPECT_NE(-1, cap_free(cap_p));
+}
+
+static int test_secbits_set(const unsigned int secbits)
+{
+ int err;
+
+ err = prctl(PR_SET_SECUREBITS, secbits);
+ if (err)
+ return errno;
+ return 0;
+}
+
+FIXTURE(access)
+{
+ int memfd, pipefd;
+ int pipe_fds[2], socket_fds[2];
+};
+
+FIXTURE_VARIANT(access)
+{
+ const bool mount_exec;
+ const bool file_exec;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) {
+ /* clang-format on */
+ .mount_exec = true,
+ .file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) {
+ /* clang-format on */
+ .mount_exec = true,
+ .file_exec = false,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) {
+ /* clang-format on */
+ .mount_exec = false,
+ .file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) {
+ /* clang-format on */
+ .mount_exec = false,
+ .file_exec = false,
+};
+
+static const char binary_path[] = "./false";
+static const char workdir_path[] = "./test-mount";
+static const char reg_file_path[] = "./test-mount/regular_file";
+static const char dir_path[] = "./test-mount/directory";
+static const char block_dev_path[] = "./test-mount/block_device";
+static const char char_dev_path[] = "./test-mount/character_device";
+static const char fifo_path[] = "./test-mount/fifo";
+
+FIXTURE_SETUP(access)
+{
+ int procfd_path_size;
+ static const char path_template[] = "/proc/self/fd/%d";
+ char procfd_path[sizeof(path_template) + 10];
+
+ /* Makes sure we are not already restricted nor locked. */
+ EXPECT_EQ(0, test_secbits_set(0));
+
+ /*
+ * Cleans previous workspace if any error previously happened (don't
+ * check errors).
+ */
+ umount(workdir_path);
+ rmdir(workdir_path);
+
+ /* Creates a clean mount point. */
+ ASSERT_EQ(0, mkdir(workdir_path, 00700));
+ ASSERT_EQ(0, mount("test", workdir_path, "tmpfs",
+ MS_MGC_VAL | (variant->mount_exec ? 0 : MS_NOEXEC),
+ "mode=0700,size=9m"));
+
+ /* Creates a regular file. */
+ ASSERT_EQ(0, mknod(reg_file_path,
+ S_IFREG | (variant->file_exec ? 0700 : 0600), 0));
+ /* Creates a directory. */
+ ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0700 : 0600));
+ /* Creates a character device: /dev/null. */
+ ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3)));
+ /* Creates a block device: /dev/loop0 */
+ ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0)));
+ /* Creates a fifo. */
+ ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0600, 0));
+
+ /* Creates a regular file without user mount point. */
+ self->memfd = memfd_create("test-exec-probe", MFD_CLOEXEC);
+ ASSERT_LE(0, self->memfd);
+ /* Sets mode, which must be ignored by the exec check. */
+ ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0700 : 0600));
+
+ /* Creates a pipefs file descriptor. */
+ ASSERT_EQ(0, pipe(self->pipe_fds));
+ procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
+ path_template, self->pipe_fds[0]);
+ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+ self->pipefd = open(procfd_path, O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, self->pipefd);
+ ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0700 : 0600));
+
+ /* Creates a socket file descriptor. */
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0,
+ self->socket_fds));
+}
+
+FIXTURE_TEARDOWN_PARENT(access)
+{
+ /* There is no need to unlink the test files. */
+ EXPECT_EQ(0, umount(workdir_path));
+ EXPECT_EQ(0, rmdir(workdir_path));
+}
+
+static void fill_exec_fd(struct __test_metadata *_metadata, const int fd_out)
+{
+ char buf[1024];
+ size_t len;
+ int fd_in;
+
+ fd_in = open(binary_path, O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd_in);
+ /* Cannot use copy_file_range(2) because of EXDEV. */
+ len = read(fd_in, buf, sizeof(buf));
+ EXPECT_LE(0, len);
+ while (len > 0) {
+ EXPECT_EQ(len, write(fd_out, buf, len))
+ {
+ TH_LOG("Failed to write: %s (%d)", strerror(errno),
+ errno);
+ }
+ len = read(fd_in, buf, sizeof(buf));
+ EXPECT_LE(0, len);
+ }
+ EXPECT_EQ(0, close(fd_in));
+}
+
+static void fill_exec_path(struct __test_metadata *_metadata,
+ const char *const path)
+{
+ int fd_out;
+
+ fd_out = open(path, O_CLOEXEC | O_WRONLY);
+ ASSERT_LE(0, fd_out)
+ {
+ TH_LOG("Failed to open %s: %s", path, strerror(errno));
+ }
+ fill_exec_fd(_metadata, fd_out);
+ EXPECT_EQ(0, close(fd_out));
+}
+
+static void test_exec_fd(struct __test_metadata *_metadata, const int fd,
+ const int err_code)
+{
+ char *const argv[] = { "", NULL };
+ int access_ret, access_errno;
+
+ /*
+ * If we really execute fd, filled with the "false" binary, the current
+ * thread will exits with an error, which will be interpreted by the
+ * test framework as an error. With AT_EXECVE_CHECK, we only check a
+ * potential successful execution.
+ */
+ access_ret = sys_execveat(fd, "", argv, NULL,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ access_errno = errno;
+ if (err_code) {
+ EXPECT_EQ(-1, access_ret);
+ EXPECT_EQ(err_code, access_errno)
+ {
+ TH_LOG("Wrong error for execveat(2): %s (%d)",
+ strerror(access_errno), errno);
+ }
+ } else {
+ EXPECT_EQ(0, access_ret)
+ {
+ TH_LOG("Access denied: %s", strerror(access_errno));
+ }
+ }
+}
+
+static void test_exec_path(struct __test_metadata *_metadata,
+ const char *const path, const int err_code)
+{
+ int flags = O_CLOEXEC;
+ int fd;
+
+ /* Do not block on pipes. */
+ if (path == fifo_path)
+ flags |= O_NONBLOCK;
+
+ fd = open(path, flags | O_RDONLY);
+ ASSERT_LE(0, fd)
+ {
+ TH_LOG("Failed to open %s: %s", path, strerror(errno));
+ }
+ test_exec_fd(_metadata, fd, err_code);
+ EXPECT_EQ(0, close(fd));
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, regular_file_empty)
+{
+ const int exec = variant->mount_exec && variant->file_exec;
+
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+TEST_F(access, regular_file_elf)
+{
+ const int exec = variant->mount_exec && variant->file_exec;
+
+ fill_exec_path(_metadata, reg_file_path);
+
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, memfd_empty)
+{
+ const int exec = variant->file_exec;
+
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, memfd_elf)
+{
+ const int exec = variant->file_exec;
+
+ fill_exec_fd(_metadata, self->memfd);
+
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, non_regular_files)
+{
+ test_exec_path(_metadata, dir_path, EACCES);
+ test_exec_path(_metadata, block_dev_path, EACCES);
+ test_exec_path(_metadata, char_dev_path, EACCES);
+ test_exec_path(_metadata, fifo_path, EACCES);
+ test_exec_fd(_metadata, self->socket_fds[0], EACCES);
+ test_exec_fd(_metadata, self->pipefd, EACCES);
+}
+
+/* clang-format off */
+FIXTURE(secbits) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(secbits)
+{
+ const bool is_privileged;
+ const int error;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, priv) {
+ /* clang-format on */
+ .is_privileged = true,
+ .error = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, unpriv) {
+ /* clang-format on */
+ .is_privileged = false,
+ .error = EPERM,
+};
+
+FIXTURE_SETUP(secbits)
+{
+ /* Makes sure no exec bits are set. */
+ EXPECT_EQ(0, test_secbits_set(0));
+ EXPECT_EQ(0, prctl(PR_GET_SECUREBITS));
+
+ if (!variant->is_privileged)
+ drop_privileges(_metadata);
+}
+
+FIXTURE_TEARDOWN(secbits)
+{
+}
+
+TEST_F(secbits, legacy)
+{
+ EXPECT_EQ(variant->error, test_secbits_set(0));
+}
+
+#define CHILD(...) \
+ do { \
+ pid_t child = vfork(); \
+ EXPECT_LE(0, child); \
+ if (child == 0) { \
+ __VA_ARGS__; \
+ _exit(0); \
+ } \
+ } while (0)
+
+TEST_F(secbits, exec)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+ secbits &= ~(SECBIT_EXEC_RESTRICT_FILE | SECBIT_EXEC_DENY_INTERACTIVE);
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+}
+
+TEST_F(secbits, check_locked_set)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock set but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, check_locked_unset)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock unset but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_set)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock set but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_unset)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock unset but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/exec/config b/tools/testing/selftests/exec/config
new file mode 100644
index 000000000000..c308079867b3
--- /dev/null
+++ b/tools/testing/selftests/exec/config
@@ -0,0 +1,2 @@
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_LOOP=y
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67bf7254a48f..d37c068ed5fe 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -21,7 +21,12 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
+
+#define TESTS_EXPECTED 54
+#define TEST_NAME_LEN (PATH_MAX * 4)
+
+#define CHECK_COMM "CHECK_COMM"
static char longpath[2 * PATH_MAX] = "";
static char *envp[] = { "IN_TEST=yes", NULL, NULL };
@@ -43,71 +48,84 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
static int _check_execveat_fail(int fd, const char *path, int flags,
int expected_errno, const char *errno_str)
{
+ char test_name[TEST_NAME_LEN];
int rc;
errno = 0;
- printf("Check failure of execveat(%d, '%s', %d) with %s... ",
- fd, path?:"(null)", flags, errno_str);
+ snprintf(test_name, sizeof(test_name),
+ "Check failure of execveat(%d, '%s', %d) with %s",
+ fd, path?:"(null)", flags, errno_str);
rc = execveat_(fd, path, argv, envp, flags);
if (rc > 0) {
- printf("[FAIL] (unexpected success from execveat(2))\n");
+ ksft_print_msg("unexpected success from execveat(2)\n");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (errno != expected_errno) {
- printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
- expected_errno, strerror(expected_errno),
- errno, strerror(errno));
+ ksft_print_msg("expected errno %d (%s) not %d (%s)\n",
+ expected_errno, strerror(expected_errno),
+ errno, strerror(errno));
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
static int check_execveat_invoked_rc(int fd, const char *path, int flags,
int expected_rc, int expected_rc2)
{
+ char test_name[TEST_NAME_LEN];
int status;
int rc;
pid_t child;
int pathlen = path ? strlen(path) : 0;
if (pathlen > 40)
- printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
- fd, path, (path + pathlen - 20), flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%.20s...%s', %d)... ",
+ fd, path, (path + pathlen - 20), flags);
else
- printf("Check success of execveat(%d, '%s', %d)... ",
- fd, path?:"(null)", flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%s', %d)... ",
+ fd, path?:"(null)", flags);
+
child = fork();
if (child < 0) {
- printf("[FAIL] (fork() failed)\n");
+ ksft_perror("fork() failed");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
- rc, errno, strerror(errno));
- exit(1); /* should not reach here */
+ ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n",
+ rc, errno, strerror(errno));
+ exit(errno);
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
if (rc != child) {
- printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+ ksft_print_msg("waitpid(%d,...) returned %d\n", child, rc);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (!WIFEXITED(status)) {
- printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
- child, status);
+ ksft_print_msg("child %d did not exit cleanly, status=%08x\n",
+ child, status);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if ((WEXITSTATUS(status) != expected_rc) &&
(WEXITSTATUS(status) != expected_rc2)) {
- printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
- child, WEXITSTATUS(status), expected_rc, expected_rc2);
+ ksft_print_msg("child %d exited with %d neither %d nor %d\n",
+ child, WEXITSTATUS(status), expected_rc,
+ expected_rc2);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
@@ -129,11 +147,9 @@ static int open_or_die(const char *filename, int flags)
{
int fd = open(filename, flags);
- if (fd < 0) {
- printf("Failed to open '%s'; "
+ if (fd < 0)
+ ksft_exit_fail_msg("Failed to open '%s'; "
"check prerequisites are available\n", filename);
- exit(1);
- }
return fd;
}
@@ -162,8 +178,7 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
char *cwd = getcwd(NULL, 0);
if (!cwd) {
- printf("Failed to getcwd(), errno=%d (%s)\n",
- errno, strerror(errno));
+ ksft_perror("Failed to getcwd()");
return 2;
}
strcpy(longpath, cwd);
@@ -193,12 +208,12 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
*/
fd = open(longpath, O_RDONLY);
if (fd > 0) {
- printf("Invoke copy of '%s' via filename of length %zu:\n",
- src, strlen(longpath));
+ ksft_print_msg("Invoke copy of '%s' via filename of length %zu:\n",
+ src, strlen(longpath));
fail += check_execveat(fd, "", AT_EMPTY_PATH);
} else {
- printf("Failed to open length %zu filename, errno=%d (%s)\n",
- strlen(longpath), errno, strerror(errno));
+ ksft_print_msg("Failed to open length %zu filename, errno=%d (%s)\n",
+ strlen(longpath), errno, strerror(errno));
fail++;
}
@@ -212,15 +227,41 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
* "If the command name is found, but it is not an executable utility,
* the exit status shall be 126."), so allow either.
*/
- if (is_script)
+ if (is_script) {
+ ksft_print_msg("Invoke script via root_dfd and relative filename\n");
fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
127, 126);
- else
+ } else {
+ ksft_print_msg("Invoke exec via root_dfd and relative filename\n");
fail += check_execveat(root_dfd, longpath + 1, 0);
+ }
return fail;
}
+static int check_execveat_comm(int fd, char *argv0, char *expected)
+{
+ char buf[128], *old_env, *old_argv0;
+ int ret;
+
+ snprintf(buf, sizeof(buf), CHECK_COMM "=%s", expected);
+
+ old_env = envp[1];
+ envp[1] = buf;
+
+ old_argv0 = argv[0];
+ argv[0] = argv0;
+
+ ksft_print_msg("Check execveat(AT_EMPTY_PATH)'s comm is %s\n",
+ expected);
+ ret = check_execveat_invoked_rc(fd, "", AT_EMPTY_PATH, 0, 0);
+
+ envp[1] = old_env;
+ argv[0] = old_argv0;
+
+ return ret;
+}
+
static int run_tests(void)
{
int fail = 0;
@@ -373,13 +414,21 @@ static int run_tests(void)
fail += check_execveat_pathmax(root_dfd, "execveat", 0);
fail += check_execveat_pathmax(root_dfd, "script", 1);
+
+ /* /proc/pid/comm gives filename by default */
+ fail += check_execveat_comm(fd, "sentinel", "execveat");
+ /* /proc/pid/comm gives argv[0] when invoked via link */
+ fail += check_execveat_comm(fd_symlink, "sentinel", "execveat");
+ /* /proc/pid/comm gives filename if NULL is passed */
+ fail += check_execveat_comm(fd, NULL, "execveat");
+
return fail;
}
static void prerequisites(void)
{
int fd;
- const char *script = "#!/bin/sh\nexit $*\n";
+ const char *script = "#!/bin/bash\nexit $*\n";
/* Create ephemeral copies of files */
exe_cp("execveat", "execveat.ephemeral");
@@ -399,34 +448,73 @@ int main(int argc, char **argv)
int ii;
int rc;
const char *verbose = getenv("VERBOSE");
+ const char *check_comm = getenv(CHECK_COMM);
- if (argc >= 2) {
- /* If we are invoked with an argument, don't run tests. */
+ if (argc >= 2 || check_comm) {
+ /*
+ * If we are invoked with an argument, or no arguments but a
+ * command to check, don't run tests.
+ */
const char *in_test = getenv("IN_TEST");
if (verbose) {
- printf(" invoked with:");
+ ksft_print_msg("invoked with:\n");
for (ii = 0; ii < argc; ii++)
- printf(" [%d]='%s'", ii, argv[ii]);
- printf("\n");
+ ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]);
+ }
+
+ /* If the tests wanted us to check the command, do so. */
+ if (check_comm) {
+ /* TASK_COMM_LEN == 16 */
+ char buf[32];
+ int fd, ret;
+
+ fd = open("/proc/self/comm", O_RDONLY);
+ if (fd < 0) {
+ ksft_perror("open() comm failed");
+ exit(1);
+ }
+
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ ksft_perror("read() comm failed");
+ close(fd);
+ exit(1);
+ }
+ close(fd);
+
+ // trim off the \n
+ buf[ret-1] = 0;
+
+ if (strcmp(buf, check_comm)) {
+ ksft_print_msg("bad comm, got: %s expected: %s\n",
+ buf, check_comm);
+ exit(1);
+ }
+
+ exit(0);
}
/* Check expected environment transferred. */
if (!in_test || strcmp(in_test, "yes") != 0) {
- printf("[FAIL] (no IN_TEST=yes in env)\n");
+ ksft_print_msg("no IN_TEST=yes in env\n");
return 1;
}
/* Use the final argument as an exit code. */
rc = atoi(argv[argc - 1]);
- fflush(stdout);
+ exit(rc);
} else {
+ ksft_print_header();
+ ksft_set_plan(TESTS_EXPECTED);
prerequisites();
if (verbose)
envp[1] = "VERBOSE=1";
rc = run_tests();
if (rc > 0)
printf("%d tests failed\n", rc);
+ ksft_finished();
}
+
return rc;
}
diff --git a/tools/testing/selftests/exec/false.c b/tools/testing/selftests/exec/false.c
new file mode 100644
index 000000000000..104383ec3a79
--- /dev/null
+++ b/tools/testing/selftests/exec/false.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+int main(void)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index d487c2f6a615..55fd3732f029 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -5,10 +5,13 @@
#include <link.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
+#include "kselftest.h"
struct Statistics {
unsigned long long load_address;
unsigned long long alignment;
+ bool interp;
};
int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
@@ -25,11 +28,20 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
stats->alignment = 0;
for (i = 0; i < info->dlpi_phnum; i++) {
+ unsigned long long align;
+
+ if (info->dlpi_phdr[i].p_type == PT_INTERP) {
+ stats->interp = true;
+ continue;
+ }
+
if (info->dlpi_phdr[i].p_type != PT_LOAD)
continue;
- if (info->dlpi_phdr[i].p_align > stats->alignment)
- stats->alignment = info->dlpi_phdr[i].p_align;
+ align = info->dlpi_phdr[i].p_align;
+
+ if (align > stats->alignment)
+ stats->alignment = align;
}
return 1; // Terminate dl_iterate_phdr.
@@ -37,32 +49,57 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
int main(int argc, char **argv)
{
- struct Statistics extracted;
- unsigned long long misalign;
+ struct Statistics extracted = { };
+ unsigned long long misalign, pow2;
+ bool interp_needed;
+ char buf[1024];
+ FILE *maps;
int ret;
- ret = dl_iterate_phdr(ExtractStatistics, &extracted);
- if (ret != 1) {
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ ksft_print_header();
+ ksft_set_plan(4);
- if (extracted.alignment == 0) {
- fprintf(stderr, "No alignment found\n");
- return 1;
- } else if (extracted.alignment & (extracted.alignment - 1)) {
- fprintf(stderr, "Alignment is not a power of 2\n");
- return 1;
+ /* Dump maps file for debugging reference. */
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ ksft_exit_fail_msg("FAILED: /proc/self/maps: %s\n", strerror(errno));
+ while (fgets(buf, sizeof(buf), maps)) {
+ ksft_print_msg("%s", buf);
}
+ fclose(maps);
+
+ /* Walk the program headers. */
+ ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+ if (ret != 1)
+ ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n");
+
+ /* Report our findings. */
+ ksft_print_msg("load_address=%#llx alignment=%#llx\n",
+ extracted.load_address, extracted.alignment);
+
+ /* If we're named with ".static." we expect no INTERP. */
+ interp_needed = strstr(argv[0], ".static.") == NULL;
+
+ /* Were we built as expected? */
+ ksft_test_result(interp_needed == extracted.interp,
+ "%s INTERP program header %s\n",
+ interp_needed ? "Wanted" : "Unwanted",
+ extracted.interp ? "seen" : "missing");
+ /* Did we find an alignment? */
+ ksft_test_result(extracted.alignment != 0,
+ "Alignment%s found\n", extracted.alignment ? "" : " NOT");
+
+ /* Is the alignment sane? */
+ pow2 = extracted.alignment & (extracted.alignment - 1);
+ ksft_test_result(pow2 == 0,
+ "Alignment is%s a power of 2: %#llx\n",
+ pow2 == 0 ? "" : " NOT", extracted.alignment);
+
+ /* Is the load address aligned? */
misalign = extracted.load_address & (extracted.alignment - 1);
- if (misalign) {
- printf("alignment = %llu, load_address = %llu\n",
- extracted.alignment, extracted.load_address);
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ ksft_test_result(misalign == 0, "Load Address is %saligned (%#llx)\n",
+ misalign ? "MIS" : "", misalign);
- fprintf(stderr, "PASS\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
index cd3a34aca93e..14ac36487df5 100644
--- a/tools/testing/selftests/exec/non-regular.c
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -9,7 +9,7 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* Remove a file, ignoring the result if it didn't exist. */
void rm(struct __test_metadata *_metadata, const char *pathname,
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
new file mode 100644
index 000000000000..4940aee5bb38
--- /dev/null
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test that empty argvs are swapped out for a single empty string. */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+
+#define FORK(exec) \
+do { \
+ pid = fork(); \
+ if (pid == 0) { \
+ /* Child */ \
+ exec; /* Some kind of exec */ \
+ perror("# " #exec); \
+ return 1; \
+ } \
+ check_result(pid, #exec); \
+} while (0)
+
+void check_result(pid_t pid, const char *msg)
+{
+ int wstatus;
+
+ if (pid == (pid_t)-1) {
+ perror("# fork");
+ ksft_test_result_fail("fork failed: %s\n", msg);
+ return;
+ }
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ perror("# waitpid");
+ ksft_test_result_fail("waitpid failed: %s\n", msg);
+ return;
+ }
+ if (!WIFEXITED(wstatus)) {
+ ksft_test_result_fail("child did not exit: %s\n", msg);
+ return;
+ }
+ if (WEXITSTATUS(wstatus) != 0) {
+ ksft_test_result_fail("non-zero exit: %s\n", msg);
+ return;
+ }
+ ksft_test_result_pass("%s\n", msg);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+ pid_t pid;
+ static char * const args[] = { NULL };
+ static char * const str[] = { "", NULL };
+
+ /* argc counting checks */
+ if (argc < 1) {
+ fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n");
+ return 1;
+ }
+ if (argc != 1) {
+ fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc);
+ return 1;
+ }
+ if (argv[0][0] == '\0') {
+ /* Good, we found a NULL terminated string at argv[0]! */
+ return 0;
+ }
+
+ /* Test runner. */
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ FORK(execve(argv[0], str, NULL));
+ FORK(execve(argv[0], NULL, NULL));
+ FORK(execve(argv[0], NULL, envp));
+ FORK(execve(argv[0], args, NULL));
+ FORK(execve(argv[0], args, envp));
+
+ ksft_exit(ksft_cnt.ksft_pass == ksft_plan);
+}
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index 2dbd5bc45b3e..7b5c4f6d1928 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -23,45 +23,44 @@
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
+#include "kselftest.h"
int main(void)
{
+ int fd, rv;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (unshare(CLONE_NEWNS) == -1) {
if (errno == ENOSYS || errno == EPERM) {
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 4;
+ ksft_test_result_skip("error: unshare, errno %d\n", errno);
+ ksft_finished();
}
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 1;
- }
- if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
- fprintf(stderr, "error: mount '/', errno %d\n", errno);
- return 1;
+ ksft_exit_fail_perror("error: unshare");
}
+
+ if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
+ ksft_exit_fail_perror("error: mount '/'");
+
/* Require "exec" filesystem. */
- if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) {
- fprintf(stderr, "error: mount ramfs, errno %d\n", errno);
- return 1;
- }
+ if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
+ ksft_exit_fail_perror("error: mount ramfs");
#define FILENAME "/tmp/1"
- int fd = creat(FILENAME, 0700);
- if (fd == -1) {
- fprintf(stderr, "error: creat, errno %d\n", errno);
- return 1;
- }
+ fd = creat(FILENAME, 0700);
+ if (fd == -1)
+ ksft_exit_fail_perror("error: creat");
+
#define S "#!" FILENAME "\n"
- if (write(fd, S, strlen(S)) != strlen(S)) {
- fprintf(stderr, "error: write, errno %d\n", errno);
- return 1;
- }
+ if (write(fd, S, strlen(S)) != strlen(S))
+ ksft_exit_fail_perror("error: write");
+
close(fd);
- int rv = execve(FILENAME, NULL, NULL);
- if (rv == -1 && errno == ELOOP) {
- return 0;
- }
- fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno);
- return 1;
+ rv = execve(FILENAME, NULL, NULL);
+ ksft_test_result(rv == -1 && errno == ELOOP,
+ "execve failed as expected (ret %d, errno %d)\n", rv, errno);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/fchmodat2/.gitignore
index d65462d64816..82a4846cbc4b 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ b/tools/testing/selftests/fchmodat2/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-*.out
+/*_test
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
new file mode 100644
index 000000000000..4373cea79b79
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
+TEST_GEN_PROGS := fchmodat2_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
new file mode 100644
index 000000000000..e397339495f6
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
+{
+ int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+int setup_testdir(void)
+{
+ int dfd, ret;
+ char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX";
+
+ /* Make the top-level directory. */
+ if (!mkdtemp(dirname))
+ ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__);
+
+ dfd = open(dirname, O_PATH | O_DIRECTORY);
+ if (dfd < 0)
+ ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__);
+
+ ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create file in tmpdir\n",
+ __func__);
+ close(ret);
+
+ ret = symlinkat("regfile", dfd, "symlink");
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n",
+ __func__);
+
+ return dfd;
+}
+
+int expect_mode(int dfd, const char *filename, mode_t expect_mode)
+{
+ struct stat st;
+ int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW);
+
+ if (ret)
+ ksft_exit_fail_msg("%s: %s: fstatat failed\n",
+ __func__, filename);
+
+ return (st.st_mode == expect_mode);
+}
+
+void test_regfile(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "regfile", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100600))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ ksft_test_result_pass("fchmodat2(regfile)\n");
+}
+
+void test_symlink(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "symlink", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ if (!expect_mode(dfd, "symlink", 0120777))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
+
+ /*
+ * On certain filesystems (xfs or btrfs), chmod operation fails. So we
+ * first check the symlink target but if the operation fails we mark the
+ * test as skipped.
+ *
+ * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html
+ */
+ if (ret == 0 && !expect_mode(dfd, "symlink", 0120600))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (ret != 0)
+ ksft_test_result_skip("fchmodat2(symlink)\n");
+ else
+ ksft_test_result_pass("fchmodat2(symlink)\n");
+}
+
+#define NUM_TESTS 2
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ test_regfile();
+ test_symlink();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile
new file mode 100644
index 000000000000..478e82f8b464
--- /dev/null
+++ b/tools/testing/selftests/filelock/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := ofdlocks
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
new file mode 100644
index 000000000000..ff8d47fc373a
--- /dev/null
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "kselftest.h"
+
+static int lock_set(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_SETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+static int lock_get(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_GETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+int main(void)
+{
+ int rc;
+ struct flock fl, fl2;
+ int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
+ int fd2 = open("/tmp/aa", O_RDONLY);
+
+ unlink("/tmp/aa");
+ assert(fd != -1);
+ assert(fd2 != -1);
+ ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+
+ /* Set some read lock */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 3;
+ rc = lock_set(fd, &fl);
+ if (rc == 0) {
+ ksft_print_msg
+ ("[SUCCESS] set OFD read lock on first fd\n");
+ } else {
+ ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
+ return -1;
+ }
+ /* Make sure read locks do not conflict on different fds. */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg("[FAIL] read locks conflicted\n");
+ return -1;
+ }
+ /* Make sure read/write locks do conflict on different fds. */
+ fl.l_type = F_WRLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks conflicted\n");
+ } else {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks not conflicted\n");
+ return -1;
+ }
+ /* Get info about the lock on first fd. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd, &fl);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ } else {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
+ return -1;
+ }
+ /* Try the same but by locking everything by len==0. */
+ fl2.l_type = F_UNLCK;
+ fl2.l_start = 0;
+ fl2.l_len = 0;
+ rc = lock_get(fd, &fl2);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (memcmp(&fl, &fl2, sizeof(fl))) {
+ ksft_print_msg
+ ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ return -1;
+ }
+ ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+ /* Get info about the lock on second fd - no locks on it. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 0;
+ fl.l_len = 0;
+ lock_get(fd2, &fl);
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index f0c0ff20d6cf..64ac0dfa46b7 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -1,3 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
dnotify_test
devpts_pts
+fclog
+file_stressor
+anon_inode_test
+kernfs_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 129880fb42d3..85427d7f19b9 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
-TEST_GEN_PROGS := devpts_pts
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
new file mode 100644
index 000000000000..94c6c81c2301
--- /dev/null
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(anon_inode_no_chown)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchown(fd_context, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_chmod)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchmod(fd_context, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_exec)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_open)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_GE(dup2(fd_context, 500), 0);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = 500;
+
+ ASSERT_LT(open("/proc/self/fd/500", 0), 0);
+ ASSERT_EQ(errno, ENXIO);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
index 8af25ae96049..eb4c3b411934 100644
--- a/tools/testing/selftests/filesystems/binderfs/Makefile
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/ -pthread
+CFLAGS += $(KHDR_INCLUDES) -pthread
TEST_GEN_PROGS := binderfs_test
-binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h
-
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 477cbb042f5b..a1a79a6fef17 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,7 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define DEFAULT_THREADS 4
@@ -57,11 +57,16 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
{
int fd, ret, saved_errno, result = 1;
size_t len;
- ssize_t wret;
struct binderfs_device device = { 0 };
struct binder_version version = { 0 };
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+ static const char * const binder_features[] = {
+ "oneway_spam_detection",
+ "extended_error",
+ "freeze_notification",
+ "transaction_report",
+ };
change_mountns(_metadata);
@@ -150,6 +155,20 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
}
/* success: binder-control device removal failed as expected */
+
+ for (int i = 0; i < ARRAY_SIZE(binder_features); i++) {
+ snprintf(device_path, sizeof(device_path), "%s/features/%s",
+ binderfs_mntpt, binder_features[i]);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder feature: %s",
+ strerror(errno), binder_features[i]);
+ goto umount;
+ }
+ close(fd);
+ }
+
+ /* success: binder feature files found */
result = 0;
umount:
@@ -395,7 +414,8 @@ TEST(binderfs_stress)
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
ASSERT_EQ(ret, 0) {
- TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ TH_LOG("%s - Failed to mount binderfs, check if CONFIG_ANDROID_BINDERFS is enabled in the running kernel",
+ strerror(errno));
}
for (int i = 0; i < ARRAY_SIZE(fds); i++) {
diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config
index 02dd6cc9cf99..7b4fc6ee6205 100644
--- a/tools/testing/selftests/filesystems/binderfs/config
+++ b/tools/testing/selftests/filesystems/binderfs/config
@@ -1,3 +1,2 @@
-CONFIG_ANDROID=y
CONFIG_ANDROID_BINDERFS=y
CONFIG_ANDROID_BINDER_IPC=y
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b1fc9b916ace..54fea349204e 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -11,7 +11,7 @@
#include <asm/ioctls.h>
#include <sys/mount.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static bool terminal_dup2(int duplicate, int original)
{
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
index 78ae4aaf7141..0788a7dc8004 100644
--- a/tools/testing/selftests/filesystems/epoll/Makefile
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
LDLIBS += -lpthread
TEST_GEN_PROGS := epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index ad7fabd575f9..8bc57a2ef966 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -11,7 +11,7 @@
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/eventfd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
struct epoll_mtcontext
{
@@ -3449,4 +3449,48 @@ TEST(epoll63)
close(sfd[1]);
}
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll64)
+{
+ pthread_t waiter[2];
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ /*
+ * main will act as the emitter once both waiter threads are
+ * blocked and expects to both be awoken upon the ready event.
+ */
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&waiter[0], NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&waiter[1], NULL, waiter_entry1a, &ctx), 0);
+
+ usleep(100000);
+ ASSERT_EQ(write(ctx.sfd[1], "w", 1), 1);
+
+ ASSERT_EQ(pthread_join(waiter[0], NULL), 0);
+ ASSERT_EQ(pthread_join(waiter[1], NULL), 0);
+
+ EXPECT_EQ(ctx.count, 2);
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/filesystems/eventfd/.gitignore
index 57d296341304..483faf59fe4a 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ b/tools/testing/selftests/filesystems/eventfd/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-srcu.h
+eventfd_test
diff --git a/tools/testing/selftests/filesystems/eventfd/Makefile b/tools/testing/selftests/filesystems/eventfd/Makefile
new file mode 100644
index 000000000000..0a8e3910df15
--- /dev/null
+++ b/tools/testing/selftests/filesystems/eventfd/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+TEST_GEN_PROGS := eventfd_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
new file mode 100644
index 000000000000..1b48f267157d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <asm/unistd.h>
+#include <linux/time_types.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include "kselftest_harness.h"
+
+#define EVENTFD_TEST_ITERATIONS 100000UL
+
+struct error {
+ int code;
+ char msg[512];
+};
+
+static int error_set(struct error *err, int code, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ if (code == 0 || !err || err->code != 0)
+ return code;
+
+ err->code = code;
+ va_start(args, fmt);
+ r = vsnprintf(err->msg, sizeof(err->msg), fmt, args);
+ assert((size_t)r < sizeof(err->msg));
+ va_end(args);
+
+ return code;
+}
+
+static inline int sys_eventfd2(unsigned int count, int flags)
+{
+ return syscall(__NR_eventfd2, count, flags);
+}
+
+TEST(eventfd_check_flag_rdwr)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, 0);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ // The kernel automatically adds the O_RDWR flag.
+ EXPECT_EQ(flags, O_RDWR);
+
+ close(fd);
+}
+
+TEST(eventfd_check_flag_cloexec)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFD);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags, FD_CLOEXEC);
+
+ close(fd);
+}
+
+TEST(eventfd_check_flag_nonblock)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+ EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+ close(fd);
+}
+
+TEST(eventfd_check_flag_cloexec_and_nonblock)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_CLOEXEC|EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+ EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+ flags = fcntl(fd, F_GETFD);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags, FD_CLOEXEC);
+
+ close(fd);
+}
+
+static inline void trim_newline(char *str)
+{
+ char *pos = strrchr(str, '\n');
+
+ if (pos)
+ *pos = '\0';
+}
+
+static int verify_fdinfo(int fd, struct error *err, const char *prefix,
+ size_t prefix_len, const char *expect, ...)
+{
+ char buffer[512] = {0, };
+ char path[512] = {0, };
+ va_list args;
+ FILE *f;
+ char *line = NULL;
+ size_t n = 0;
+ int found = 0;
+ int r;
+
+ va_start(args, expect);
+ r = vsnprintf(buffer, sizeof(buffer), expect, args);
+ assert((size_t)r < sizeof(buffer));
+ va_end(args);
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
+ f = fopen(path, "re");
+ if (!f)
+ return error_set(err, -1, "fdinfo open failed for %d", fd);
+
+ while (getline(&line, &n, f) != -1) {
+ char *val;
+
+ if (strncmp(line, prefix, prefix_len))
+ continue;
+
+ found = 1;
+
+ val = line + prefix_len;
+ r = strcmp(val, buffer);
+ if (r != 0) {
+ trim_newline(line);
+ trim_newline(buffer);
+ error_set(err, -1, "%s '%s' != '%s'",
+ prefix, val, buffer);
+ }
+ break;
+ }
+
+ free(line);
+ fclose(f);
+
+ if (found == 0)
+ return error_set(err, -1, "%s not found for fd %d",
+ prefix, fd);
+
+ return 0;
+}
+
+TEST(eventfd_check_flag_semaphore)
+{
+ struct error err = {0};
+ int fd, ret;
+
+ fd = sys_eventfd2(0, EFD_SEMAPHORE);
+ ASSERT_GE(fd, 0);
+
+ ret = fcntl(fd, F_GETFL);
+ ASSERT_GT(ret, -1);
+ EXPECT_EQ(ret & O_RDWR, O_RDWR);
+
+ // The semaphore could only be obtained from fdinfo.
+ ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
+ if (ret != 0)
+ ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg);
+ EXPECT_EQ(ret, 0);
+
+ close(fd);
+}
+
+/*
+ * A write(2) fails with the error EINVAL if the size of the supplied buffer
+ * is less than 8 bytes, or if an attempt is made to write the value
+ * 0xffffffffffffffff.
+ */
+TEST(eventfd_check_write)
+{
+ uint64_t value = 1;
+ ssize_t size;
+ int fd;
+
+ fd = sys_eventfd2(0, 0);
+ ASSERT_GE(fd, 0);
+
+ size = write(fd, &value, sizeof(int));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+
+ value = (uint64_t)-1;
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ close(fd);
+}
+
+/*
+ * A read(2) fails with the error EINVAL if the size of the supplied buffer is
+ * less than 8 bytes.
+ */
+TEST(eventfd_check_read)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+
+ fd = sys_eventfd2(1, 0);
+ ASSERT_GE(fd, 0);
+
+ size = read(fd, &value, sizeof(int));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ EXPECT_EQ(value, 1);
+
+ close(fd);
+}
+
+
+/*
+ * If EFD_SEMAPHORE was not specified and the eventfd counter has a nonzero
+ * value, then a read(2) returns 8 bytes containing that value, and the
+ * counter's value is reset to zero.
+ * If the eventfd counter is zero at the time of the call to read(2), then the
+ * call fails with the error EAGAIN if the file descriptor has been made nonblocking.
+ */
+TEST(eventfd_check_read_with_nonsemaphore)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+ int i;
+
+ fd = sys_eventfd2(0, EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ value = 1;
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ }
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(uint64_t));
+ EXPECT_EQ(value, EVENTFD_TEST_ITERATIONS);
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ close(fd);
+}
+
+/*
+ * If EFD_SEMAPHORE was specified and the eventfd counter has a nonzero value,
+ * then a read(2) returns 8 bytes containing the value 1, and the counter's
+ * value is decremented by 1.
+ * If the eventfd counter is zero at the time of the call to read(2), then the
+ * call fails with the error EAGAIN if the file descriptor has been made nonblocking.
+ */
+TEST(eventfd_check_read_with_semaphore)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+ int i;
+
+ fd = sys_eventfd2(0, EFD_SEMAPHORE|EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ value = 1;
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ }
+
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ EXPECT_EQ(value, 1);
+ }
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/fat/.gitignore b/tools/testing/selftests/filesystems/fat/.gitignore
new file mode 100644
index 000000000000..b89920ed841c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rename_exchange
diff --git a/tools/testing/selftests/filesystems/fat/Makefile b/tools/testing/selftests/filesystems/fat/Makefile
new file mode 100644
index 000000000000..902033f6ef09
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := run_fat_tests.sh
+TEST_GEN_PROGS_EXTENDED := rename_exchange
+CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/fat/config b/tools/testing/selftests/filesystems/fat/config
new file mode 100644
index 000000000000..6cf95e787a17
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/config
@@ -0,0 +1,2 @@
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VFAT_FS=y
diff --git a/tools/testing/selftests/filesystems/fat/rename_exchange.c b/tools/testing/selftests/filesystems/fat/rename_exchange.c
new file mode 100644
index 000000000000..e488ad354fce
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/rename_exchange.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Program that atomically exchanges two paths using
+ * the renameat2() system call RENAME_EXCHANGE flag.
+ *
+ * Copyright 2022 Red Hat Inc.
+ * Author: Javier Martinez Canillas <javierm@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void print_usage(const char *program)
+{
+ printf("Usage: %s [oldpath] [newpath]\n", program);
+ printf("Atomically exchange oldpath and newpath\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ if (argc != 3) {
+ print_usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = renameat2(AT_FDCWD, argv[1], AT_FDCWD, argv[2], RENAME_EXCHANGE);
+ if (ret) {
+ perror("rename exchange failed");
+ exit(EXIT_FAILURE);
+ }
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
new file mode 100755
index 000000000000..d61264d4795d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run filesystem operations tests on an 1 MiB disk image that is formatted with
+# a vfat filesystem and mounted in a temporary directory using a loop device.
+#
+# Copyright 2022 Red Hat Inc.
+# Author: Javier Martinez Canillas <javierm@redhat.com>
+
+set -e
+set -u
+set -o pipefail
+
+BASE_DIR="$(dirname $0)"
+TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXXXX)"
+IMG_PATH="${TMP_DIR}/fat.img"
+MNT_PATH="${TMP_DIR}/mnt"
+
+cleanup()
+{
+ mountpoint -q "${MNT_PATH}" && unmount_image
+ rm -rf "${TMP_DIR}"
+}
+trap cleanup SIGINT SIGTERM EXIT
+
+create_loopback()
+{
+ touch "${IMG_PATH}"
+ chattr +C "${IMG_PATH}" >/dev/null 2>&1 || true
+
+ truncate -s 1M "${IMG_PATH}"
+ mkfs.vfat "${IMG_PATH}" >/dev/null 2>&1
+}
+
+mount_image()
+{
+ mkdir -p "${MNT_PATH}"
+ sudo mount -o loop "${IMG_PATH}" "${MNT_PATH}"
+}
+
+rename_exchange_test()
+{
+ local rename_exchange="${BASE_DIR}/rename_exchange"
+ local old_path="${MNT_PATH}/old_file"
+ local new_path="${MNT_PATH}/new_file"
+
+ echo old | sudo tee "${old_path}" >/dev/null 2>&1
+ echo new | sudo tee "${new_path}" >/dev/null 2>&1
+ sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1
+ sudo sync -f "${MNT_PATH}"
+ grep new "${old_path}" >/dev/null 2>&1
+ grep old "${new_path}" >/dev/null 2>&1
+}
+
+rename_exchange_subdir_test()
+{
+ local rename_exchange="${BASE_DIR}/rename_exchange"
+ local dir_path="${MNT_PATH}/subdir"
+ local old_path="${MNT_PATH}/old_file"
+ local new_path="${dir_path}/new_file"
+
+ sudo mkdir -p "${dir_path}"
+ echo old | sudo tee "${old_path}" >/dev/null 2>&1
+ echo new | sudo tee "${new_path}" >/dev/null 2>&1
+ sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1
+ sudo sync -f "${MNT_PATH}"
+ grep new "${old_path}" >/dev/null 2>&1
+ grep old "${new_path}" >/dev/null 2>&1
+}
+
+unmount_image()
+{
+ sudo umount "${MNT_PATH}" &> /dev/null
+}
+
+create_loopback
+mount_image
+rename_exchange_test
+rename_exchange_subdir_test
+unmount_image
+
+exit 0
diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c
new file mode 100644
index 000000000000..551c4a0f395a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fclog.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2025 SUSE LLC.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mount.h>
+
+#include "kselftest_harness.h"
+
+#define ASSERT_ERRNO(expected, _t, seen) \
+ __EXPECT(expected, #expected, \
+ ({__typeof__(seen) _tmp_seen = (seen); \
+ _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1)
+
+#define ASSERT_ERRNO_EQ(expected, seen) \
+ ASSERT_ERRNO(expected, ==, seen)
+
+#define ASSERT_SUCCESS(seen) \
+ ASSERT_ERRNO(0, <=, seen)
+
+FIXTURE(ns)
+{
+ int host_mntns;
+};
+
+FIXTURE_SETUP(ns)
+{
+ /* Stash the old mntns. */
+ self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_mntns);
+
+ /* Create a new mount namespace and make it private. */
+ ASSERT_SUCCESS(unshare(CLONE_NEWNS));
+ ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
+}
+
+FIXTURE_TEARDOWN(ns)
+{
+ ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS));
+ ASSERT_SUCCESS(close(self->host_mntns));
+}
+
+TEST_F(ns, fscontext_log_enodata)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ /* A brand new fscontext has no log entries. */
+ char buf[128] = {};
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc_after_fsmount)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+ int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID);
+ ASSERT_SUCCESS(mfd);
+ ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH));
+
+ /*
+ * The fscontext log should still contain data even after
+ * FSCONFIG_CMD_CREATE and fsmount().
+ */
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_emsgsize)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ /*
+ * Attempting to read a message with too small a buffer should not
+ * result in the message getting consumed.
+ */
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0));
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1));
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16));
+
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c
new file mode 100644
index 000000000000..141badd671a9
--- /dev/null
+++ b/tools/testing/selftests/filesystems/file_stressor.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+
+static inline int sys_fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key,
+ const char *value, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+
+static inline int sys_fsmount(int fd, unsigned int flags,
+ unsigned int attr_flags)
+{
+ return syscall(__NR_fsmount, fd, flags, attr_flags);
+}
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#endif
+
+static inline int sys_move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
+ to_pathname, flags);
+}
+
+FIXTURE(file_stressor) {
+ int fd_tmpfs;
+ int nr_procs;
+ int max_fds;
+ pid_t *pids_openers;
+ pid_t *pids_getdents;
+ int *fd_proc_pid;
+};
+
+FIXTURE_SETUP(file_stressor)
+{
+ int fd_context;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+ ASSERT_EQ(mkdir("/slab_typesafe_by_rcu", 0755), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ self->fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(self->fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(sys_move_mount(self->fd_tmpfs, "", -EBADF, "/slab_typesafe_by_rcu", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ self->nr_procs = sysconf(_SC_NPROCESSORS_ONLN);
+ self->pids_openers = malloc(sizeof(pid_t) * self->nr_procs);
+ ASSERT_NE(self->pids_openers, NULL);
+ self->pids_getdents = malloc(sizeof(pid_t) * self->nr_procs);
+ ASSERT_NE(self->pids_getdents, NULL);
+ self->fd_proc_pid = malloc(sizeof(int) * self->nr_procs);
+ ASSERT_NE(self->fd_proc_pid, NULL);
+ self->max_fds = 500;
+}
+
+FIXTURE_TEARDOWN(file_stressor)
+{
+ for (int i = 0; i < self->nr_procs; i++) {
+ int wstatus;
+ pid_t pid;
+
+ pid = waitpid(self->pids_openers[i], &wstatus, 0);
+ ASSERT_EQ(pid, self->pids_openers[i]);
+ ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus));
+
+ pid = waitpid(self->pids_getdents[i], &wstatus, 0);
+ ASSERT_EQ(pid, self->pids_getdents[i]);
+ ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus));
+ }
+ free(self->pids_openers);
+ free(self->pids_getdents);
+ ASSERT_EQ(close(self->fd_tmpfs), 0);
+
+ umount2("/slab_typesafe_by_rcu", 0);
+ ASSERT_EQ(rmdir("/slab_typesafe_by_rcu"), 0);
+}
+
+TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2)
+{
+ for (int i = 0; i < self->nr_procs; i++) {
+ pid_t pid_self;
+
+ self->pids_openers[i] = fork();
+ ASSERT_GE(self->pids_openers[i], 0);
+
+ if (self->pids_openers[i] != 0)
+ continue;
+
+ self->pids_openers[i] = getpid();
+ for (;;) {
+ for (int i = 0; i < self->max_fds; i++) {
+ char path[PATH_MAX];
+ int fd;
+
+ sprintf(path, "/slab_typesafe_by_rcu/file-%d-%d", self->pids_openers[i], i);
+ fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0644);
+ if (fd < 0)
+ continue;
+ }
+
+ close_range(3, ~0U, 0);
+ }
+
+ exit(0);
+ }
+
+ for (int i = 0; i < self->nr_procs; i++) {
+ char path[PATH_MAX];
+
+ sprintf(path, "/proc/%d/fd/", self->pids_openers[i]);
+ self->fd_proc_pid[i] = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(self->fd_proc_pid[i], 0);
+ }
+
+ for (int i = 0; i < self->nr_procs; i++) {
+ self->pids_getdents[i] = fork();
+ ASSERT_GE(self->pids_getdents[i], 0);
+
+ if (self->pids_getdents[i] != 0)
+ continue;
+
+ self->pids_getdents[i] = getpid();
+ for (;;) {
+ char ents[1024];
+ ssize_t nr_read;
+
+ /*
+ * Concurrently read /proc/<pid>/fd/ which roughly does:
+ *
+ * f = fget_task_next(p, &fd);
+ * if (!f)
+ * break;
+ * data.mode = f->f_mode;
+ * fput(f);
+ *
+ * Which means that it'll try to get a reference to a
+ * file in another task's file descriptor table.
+ *
+ * Under heavy file load it is increasingly likely that
+ * the other task will manage to close @file and @file
+ * is being recycled due to SLAB_TYPEAFE_BY_RCU
+ * concurrently. This will trigger various warnings in
+ * the file reference counting code.
+ */
+ do {
+ nr_read = syscall(SYS_getdents64, self->fd_proc_pid[i], ents, sizeof(ents));
+ } while (nr_read >= 0);
+
+ lseek(self->fd_proc_pid[i], 0, SEEK_SET);
+ }
+
+ exit(0);
+ }
+
+ ASSERT_EQ(clock_nanosleep(CLOCK_MONOTONIC, 0, &(struct timespec){ .tv_sec = 900 /* 15 min */ }, NULL), 0);
+
+ for (int i = 0; i < self->nr_procs; i++) {
+ kill(self->pids_openers[i], SIGKILL);
+ kill(self->pids_getdents[i], SIGKILL);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore
new file mode 100644
index 000000000000..3e72e742d08e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fuse_mnt
+fusectl_test
diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile
new file mode 100644
index 000000000000..612aad69a93a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := fusectl_test
+TEST_GEN_FILES := fuse_mnt
+
+include ../../lib.mk
+
+VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse
+endif
+
+VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS := -lfuse -pthread
+endif
+
+$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS)
+$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
new file mode 100644
index 000000000000..d12b17f30fad
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fusectl test file-system
+ * Creates a simple FUSE filesystem with a single read-write file (/test)
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static char *content;
+static size_t content_size = 0;
+static const char test_path[] = "/test";
+
+static int test_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ return 0;
+ }
+
+ if (!strcmp(path, test_path)) {
+ st->st_mode = S_IFREG | 0664;
+ st->st_nlink = 1;
+ st->st_size = content_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, test_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int test_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path))
+ return -ENOENT;
+
+ return 0;
+}
+
+static int test_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (!content || content_size == 0)
+ return 0;
+
+ if (offset >= content_size)
+ return 0;
+
+ if (offset + size > content_size)
+ size = content_size - offset;
+
+ memcpy(buf, content + offset, size);
+
+ return size;
+}
+
+static int test_write(const char *path, const char *buf, size_t size,
+ off_t offset, struct fuse_file_info *fi)
+{
+ size_t new_size;
+
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if(offset > content_size)
+ return -EINVAL;
+
+ new_size = MAX(offset + size, content_size);
+
+ if (new_size > content_size)
+ content = realloc(content, new_size);
+
+ content_size = new_size;
+
+ if (!content)
+ return -ENOMEM;
+
+ memcpy(content + offset, buf, size);
+
+ return size;
+}
+
+static int test_truncate(const char *path, off_t size)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (size == 0) {
+ free(content);
+ content = NULL;
+ content_size = 0;
+ return 0;
+ }
+
+ content = realloc(content, size);
+
+ if (!content)
+ return -ENOMEM;
+
+ if (size > content_size)
+ memset(content + content_size, 0, size - content_size);
+
+ content_size = size;
+ return 0;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = test_getattr,
+ .readdir = test_readdir,
+ .open = test_open,
+ .read = test_read,
+ .write = test_write,
+ .truncate = test_truncate,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
new file mode 100644
index 000000000000..0d1d012c35ed
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com>
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sched.h>
+#include <linux/limits.h>
+
+#include "kselftest_harness.h"
+
+#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections"
+#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX"
+#define FUSE_DEVICE "/dev/fuse"
+#define FUSECTL_TEST_VALUE "1"
+
+static void write_file(struct __test_metadata *const _metadata,
+ const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(write(fd, val, len), len);
+ ASSERT_EQ(close(fd), 0);
+}
+
+FIXTURE(fusectl){
+ char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)];
+ int connection;
+};
+
+FIXTURE_SETUP(fusectl)
+{
+ const char *fuse_mnt_prog = "./fuse_mnt";
+ int status, pid;
+ struct stat statbuf;
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+ char buf[32];
+
+ /* Setup userns */
+ ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0);
+ sprintf(buf, "0 %d 1", uid);
+ write_file(_metadata, "/proc/self/uid_map", buf);
+ write_file(_metadata, "/proc/self/setgroups", "deny");
+ sprintf(buf, "0 %d 1", gid);
+ write_file(_metadata, "/proc/self/gid_map", buf);
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT);
+
+ if (!mkdtemp(self->fuse_mountpoint))
+ SKIP(return,
+ "Failed to create FUSE mountpoint %s",
+ strerror(errno));
+
+ if (access(FUSECTL_MOUNTPOINT, F_OK))
+ SKIP(return,
+ "FUSE control filesystem not mounted");
+
+ pid = fork();
+ if (pid < 0)
+ SKIP(return,
+ "Failed to fork FUSE daemon process: %s",
+ strerror(errno));
+
+ if (pid == 0) {
+ execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL);
+ exit(errno);
+ }
+
+ waitpid(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ SKIP(return,
+ "Failed to start FUSE daemon %s",
+ strerror(WEXITSTATUS(status)));
+ }
+
+ if (stat(self->fuse_mountpoint, &statbuf))
+ SKIP(return,
+ "Failed to stat FUSE mountpoint %s",
+ strerror(errno));
+
+ self->connection = statbuf.st_dev;
+}
+
+FIXTURE_TEARDOWN(fusectl)
+{
+ umount2(self->fuse_mountpoint, MNT_DETACH);
+ rmdir(self->fuse_mountpoint);
+}
+
+TEST_F(fusectl, abort)
+{
+ char path_buf[PATH_MAX];
+ int abort_fd, test_fd, ret;
+
+ sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection);
+
+ ASSERT_EQ(0, access(path_buf, F_OK));
+
+ abort_fd = open(path_buf, O_WRONLY);
+ ASSERT_GE(abort_fd, 0);
+
+ sprintf(path_buf, "%s/test", self->fuse_mountpoint);
+
+ test_fd = open(path_buf, O_RDWR);
+ ASSERT_GE(test_fd, 0);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, 0);
+
+ ret = write(test_fd, "test", sizeof("test"));
+ ASSERT_EQ(ret, sizeof("test"));
+
+ ret = lseek(test_fd, 0, SEEK_SET);
+ ASSERT_GE(ret, 0);
+
+ ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE));
+ ASSERT_GT(ret, 0);
+
+ close(abort_fd);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOTCONN);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c
new file mode 100644
index 000000000000..84c2b910a60d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/kernfs_test.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(kernfs_listxattr)
+{
+ int fd;
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(flistxattr(fd, NULL, 0), 0);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST(kernfs_getxattr)
+{
+ int fd;
+ char buf[1];
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0);
+ ASSERT_EQ(errno, ENODATA);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
new file mode 100644
index 000000000000..124339ea7845
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
+/*_test_ns
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
new file mode 100644
index 000000000000..836a4eb7be06
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns
+
+include ../../lib.mk
+
+$(OUTPUT)/mount-notify_test: ../utils.c
+$(OUTPUT)/mount-notify_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
new file mode 100644
index 000000000000..6381af6a40e3
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/fanotify.h>
+
+#include "kselftest_harness.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
new file mode 100644
index 000000000000..320ee25dc8a5
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/fanotify.h>
+
+#include "kselftest_harness.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_types[] = {
+ FAN_MARK_FILESYSTEM,
+ FAN_MARK_MOUNT,
+ FAN_MARK_INODE
+};
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int orig_ns_fd;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->orig_ns_fd, 0);
+
+ ret = setup_userns();
+ ASSERT_EQ(ret, 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
+ // Verify that watching tmpfs mounted inside userns is allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
+ FAN_OPEN, AT_FDCWD, "/");
+ ASSERT_EQ(ret, 0);
+ // ...but watching entire orig root filesystem is not allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
+ FAN_OPEN, self->orig_root, ".");
+ ASSERT_NE(ret, 0);
+ close(fan_fd);
+
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ // Verify that watching mntns where group was created is allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // ...but watching orig mntns is not allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->orig_ns_fd, NULL);
+ ASSERT_NE(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/filesystems/nsfs/.gitignore
index ed79ebdf286e..92a8249006d1 100644
--- a/tools/testing/selftests/nsfs/.gitignore
+++ b/tools/testing/selftests/filesystems/nsfs/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
owner
pidns
+iterate_mntns
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/filesystems/nsfs/Makefile
index dd9bd50b7b93..231aaa7dfd95 100644
--- a/tools/testing/selftests/nsfs/Makefile
+++ b/tools/testing/selftests/filesystems/nsfs/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := owner pidns
+TEST_GEN_PROGS := owner pidns iterate_mntns
CFLAGS := -Wall -Werror
-include ../lib.mk
+include ../../lib.mk
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/filesystems/nsfs/config
index 598d0a225fc9..598d0a225fc9 100644
--- a/tools/testing/selftests/nsfs/config
+++ b/tools/testing/selftests/filesystems/nsfs/config
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
new file mode 100644
index 000000000000..61e55dfbf121
--- /dev/null
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+#define MNT_NS_COUNT 11
+#define MNT_NS_LAST_INDEX 10
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info)
+
+FIXTURE(iterate_mount_namespaces) {
+ int fd_mnt_ns[MNT_NS_COUNT];
+ __u64 mnt_ns_id[MNT_NS_COUNT];
+};
+
+FIXTURE_SETUP(iterate_mount_namespaces)
+{
+ for (int i = 0; i < MNT_NS_COUNT; i++)
+ self->fd_mnt_ns[i] = -EBADF;
+
+ /*
+ * Creating a new user namespace let's us guarantee that we only see
+ * mount namespaces that we did actually create.
+ */
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+
+ for (int i = 0; i < MNT_NS_COUNT; i++) {
+ struct mnt_ns_info info = {};
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(self->fd_mnt_ns[i], 0);
+ ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0);
+ self->mnt_ns_id[i] = info.mnt_ns_id;
+ }
+}
+
+FIXTURE_TEARDOWN(iterate_mount_namespaces)
+{
+ for (int i = 0; i < MNT_NS_COUNT; i++) {
+ if (self->fd_mnt_ns[i] < 0)
+ continue;
+ ASSERT_EQ(close(self->fd_mnt_ns[i]), 0);
+ }
+}
+
+TEST_F(iterate_mount_namespaces, iterate_all_forward)
+{
+ int fd_mnt_ns_cur, count = 0;
+
+ fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
+ ASSERT_GE(fd_mnt_ns_cur, 0);
+
+ for (;; count++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_next;
+
+ fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
+ if (fd_mnt_ns_next < 0 && errno == ENOENT)
+ break;
+ ASSERT_GE(fd_mnt_ns_next, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_next;
+ }
+ ASSERT_EQ(count, MNT_NS_LAST_INDEX);
+}
+
+TEST_F(iterate_mount_namespaces, iterate_all_backwards)
+{
+ int fd_mnt_ns_cur, count = 0;
+
+ fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
+ ASSERT_GE(fd_mnt_ns_cur, 0);
+
+ for (;; count++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_prev;
+
+ fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
+ if (fd_mnt_ns_prev < 0 && errno == ENOENT)
+ break;
+ ASSERT_GE(fd_mnt_ns_prev, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_prev;
+ }
+ ASSERT_EQ(count, MNT_NS_LAST_INDEX);
+}
+
+TEST_F(iterate_mount_namespaces, iterate_forward)
+{
+ int fd_mnt_ns_cur;
+
+ ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0);
+
+ fd_mnt_ns_cur = self->fd_mnt_ns[0];
+ for (int i = 1; i < MNT_NS_COUNT; i++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_next;
+
+ fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
+ ASSERT_GE(fd_mnt_ns_next, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_next;
+ ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
+ }
+}
+
+TEST_F(iterate_mount_namespaces, iterate_backward)
+{
+ int fd_mnt_ns_cur;
+
+ ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0);
+
+ fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX];
+ for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_prev;
+
+ fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
+ ASSERT_GE(fd_mnt_ns_prev, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_prev;
+ ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
+ }
+}
+
+TEST_F(iterate_mount_namespaces, nfs_valid_ioctl)
+{
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_OPENMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_CLOSEMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_READY, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/filesystems/nsfs/owner.c
index 96a976c74550..96a976c74550 100644
--- a/tools/testing/selftests/nsfs/owner.c
+++ b/tools/testing/selftests/filesystems/nsfs/owner.c
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/filesystems/nsfs/pidns.c
index e3c772c6a7c7..e3c772c6a7c7 100644
--- a/tools/testing/selftests/nsfs/pidns.c
+++ b/tools/testing/selftests/filesystems/nsfs/pidns.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore
new file mode 100644
index 000000000000..e23a18c8b37f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dev_in_maps
+set_layers_via_fds
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
new file mode 100644
index 000000000000..d3ad4a77db9b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -Wall
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
+
+LOCAL_HDRS += ../wrappers.h log.h
+
+TEST_GEN_PROGS := dev_in_maps
+TEST_GEN_PROGS += set_layers_via_fds
+
+include ../../lib.mk
+
+$(OUTPUT)/set_layers_via_fds: ../utils.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
new file mode 100644
index 000000000000..8924cea6aa4b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <fcntl.h>
+
+#include "kselftest.h"
+#include "log.h"
+#include "../wrappers.h"
+
+static long get_file_dev_and_inode(void *addr, struct statx *stx)
+{
+ char buf[4096];
+ FILE *mapf;
+
+ mapf = fopen("/proc/self/maps", "r");
+ if (mapf == NULL)
+ return pr_perror("fopen(/proc/self/maps)");
+
+ while (fgets(buf, sizeof(buf), mapf)) {
+ unsigned long start, end;
+ uint32_t maj, min;
+ __u64 ino;
+
+ if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu",
+ &start, &end, &maj, &min, &ino) != 5)
+ return pr_perror("unable to parse: %s", buf);
+ if (start == (unsigned long)addr) {
+ stx->stx_dev_major = maj;
+ stx->stx_dev_minor = min;
+ stx->stx_ino = ino;
+ return 0;
+ }
+ }
+
+ return pr_err("unable to find the mapping");
+}
+
+static int ovl_mount(void)
+{
+ int tmpfs, fsfd, ovl;
+
+ fsfd = sys_fsopen("tmpfs", 0);
+ if (fsfd == -1)
+ return pr_perror("fsopen(tmpfs)");
+
+ if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+ return pr_perror("FSCONFIG_CMD_CREATE");
+
+ tmpfs = sys_fsmount(fsfd, 0, 0);
+ if (tmpfs == -1)
+ return pr_perror("fsmount");
+
+ close(fsfd);
+
+ /* overlayfs can't be constructed on top of a detached mount. */
+ if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH))
+ return pr_perror("move_mount");
+ close(tmpfs);
+
+ if (mkdir("/tmp/w", 0755) == -1 ||
+ mkdir("/tmp/u", 0755) == -1 ||
+ mkdir("/tmp/l", 0755) == -1)
+ return pr_perror("mkdir");
+
+ fsfd = sys_fsopen("overlay", 0);
+ if (fsfd == -1)
+ return pr_perror("fsopen(overlay)");
+ if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1)
+ return pr_perror("fsconfig");
+ if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+ return pr_perror("fsconfig");
+ ovl = sys_fsmount(fsfd, 0, 0);
+ if (ovl == -1)
+ return pr_perror("fsmount");
+
+ return ovl;
+}
+
+/*
+ * Check that the file device and inode shown in /proc/pid/maps match values
+ * returned by stat(2).
+ */
+static int test(void)
+{
+ struct statx stx, mstx;
+ int ovl, fd;
+ void *addr;
+
+ ovl = ovl_mount();
+ if (ovl == -1)
+ return -1;
+
+ fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644);
+ if (fd == -1)
+ return pr_perror("openat");
+
+ addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED)
+ return pr_perror("mmap");
+
+ if (get_file_dev_and_inode(addr, &mstx))
+ return -1;
+ if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx))
+ return pr_perror("statx");
+
+ if (stx.stx_dev_major != mstx.stx_dev_major ||
+ stx.stx_dev_minor != mstx.stx_dev_minor ||
+ stx.stx_ino != mstx.stx_ino)
+ return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n",
+ mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino,
+ stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino);
+
+ ksft_test_result_pass("devices are matched\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int fsfd;
+
+ fsfd = sys_fsopen("overlay", 0);
+ if (fsfd == -1) {
+ ksft_test_result_skip("unable to create overlay mount\n");
+ return 1;
+ }
+ close(fsfd);
+
+ /* Create a new mount namespace to not care about cleaning test mounts. */
+ if (unshare(CLONE_NEWNS) == -1) {
+ ksft_test_result_skip("unable to create a new mount namespace\n");
+ return 1;
+ }
+ if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
+ pr_perror("mount");
+ return 1;
+ }
+
+ ksft_set_plan(1);
+
+ if (test())
+ return 1;
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/filesystems/overlayfs/log.h b/tools/testing/selftests/filesystems/overlayfs/log.h
new file mode 100644
index 000000000000..db64df2a8483
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/log.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTEST_TIMENS_LOG_H__
+#define __SELFTEST_TIMENS_LOG_H__
+
+#define pr_msg(fmt, lvl, ...) \
+ ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \
+ lvl, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_fail(fmt, ...) \
+ ({ \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
new file mode 100644
index 000000000000..3c0b93183348
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
+
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "log.h"
+#include "../utils.h"
+#include "../wrappers.h"
+
+FIXTURE(set_layers_via_fds) {
+ int pidfd;
+};
+
+FIXTURE_SETUP(set_layers_via_fds)
+{
+ self->pidfd = -EBADF;
+ EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
+}
+
+FIXTURE_TEARDOWN(set_layers_via_fds)
+{
+ if (self->pidfd >= 0) {
+ EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(self->pidfd), 0);
+ }
+ umount2("/set_layers_via_fds", 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+ umount2("/set_layers_via_fds_tmpfs", 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 8] = -EBADF };
+ bool layers_found[] = { [0 ... 8] = false };
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f_mountinfo;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ layer_fds[4] = openat(fd_tmpfs, "l3", O_DIRECTORY);
+ ASSERT_GE(layer_fds[4], 0);
+
+ layer_fds[5] = openat(fd_tmpfs, "l4", O_DIRECTORY);
+ ASSERT_GE(layer_fds[5], 0);
+
+ layer_fds[6] = openat(fd_tmpfs, "d1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[6], 0);
+
+ layer_fds[7] = openat(fd_tmpfs, "d2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[7], 0);
+
+ layer_fds[8] = openat(fd_tmpfs, "d3", O_DIRECTORY);
+ ASSERT_GE(layer_fds[8], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "r");
+ ASSERT_NE(f_mountinfo, NULL);
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ char *haystack = line;
+
+ if (strstr(haystack, "workdir=/tmp/w"))
+ layers_found[0] = true;
+ if (strstr(haystack, "upperdir=/tmp/u"))
+ layers_found[1] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l1"))
+ layers_found[2] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l2"))
+ layers_found[3] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l3"))
+ layers_found[4] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l4"))
+ layers_found[5] = true;
+ if (strstr(haystack, "datadir+=/tmp/d1"))
+ layers_found[6] = true;
+ if (strstr(haystack, "datadir+=/tmp/d2"))
+ layers_found[7] = true;
+ if (strstr(haystack, "datadir+=/tmp/d3"))
+ layers_found[8] = true;
+ }
+ free(line);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(layers_found[i], true);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+ int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ char path[100];
+
+ sprintf(path, "l%d", i);
+ ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+ layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY);
+ ASSERT_GE(layer_fds[i], 0);
+ }
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ fd_work = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(fd_work, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(fd_upper, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+ fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY);
+ ASSERT_GE(fd_lower, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0);
+ ASSERT_EQ(close(fd_work), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0);
+ ASSERT_EQ(close(fd_upper), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+ ASSERT_EQ(close(fd_lower), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_invalid)
+{
+ int fd_context, fd_tmpfs, fd_overlay, ret;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int fd_userns1, fd_userns2;
+ int ipc_sockets[2];
+ char c;
+ const unsigned int predictable_fd_context_nr = 123;
+
+ fd_userns1 = get_userns_fd(0, 0, 10000);
+ ASSERT_GE(fd_userns1, 0);
+
+ fd_userns2 = get_userns_fd(0, 1234, 10000);
+ ASSERT_GE(fd_userns2, 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_GE(ret, 0);
+
+ pid = create_child(&self->pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (close(ipc_sockets[0])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!switch_userns(fd_userns2, 0, 0, false)) {
+ TH_LOG("switch_userns should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
+ TH_LOG("read_nointr should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (close(ipc_sockets[1])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have failed");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ASSERT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+ ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = predictable_fd_context_nr;
+ ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
+ ASSERT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid), 0);
+ ASSERT_EQ(close(self->pidfd), 0);
+ self->pidfd = -EBADF;
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
+ ASSERT_EQ(close(layer_fds[i]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(close(fd_userns1), 0);
+ ASSERT_EQ(close(fd_userns2), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_nomknod)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (!cap_down(CAP_MKNOD))
+ _exit(EXIT_FAILURE);
+
+ if (!cap_down(CAP_SYS_ADMIN))
+ _exit(EXIT_FAILURE);
+
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
+ ASSERT_EQ(errno, EPERM);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+ int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ char path[100];
+
+ sprintf(path, "l%d", i);
+ ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+ layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[i], 0);
+ }
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_work, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_upper, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+ fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_lower, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0);
+ ASSERT_EQ(close(fd_work), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0);
+ ASSERT_EQ(close(fd_upper), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+ ASSERT_EQ(close(fd_lower), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+ int layer_fds[] = { [0 ... 8] = -EBADF };
+ bool layers_found[] = { [0 ... 8] = false };
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f_mountinfo;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tmp, 0);
+
+ layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[3], 0);
+
+ layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[4], 0);
+
+ layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[5], 0);
+
+ layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[6], 0);
+
+ layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[7], 0);
+
+ layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[8], 0);
+
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "r");
+ ASSERT_NE(f_mountinfo, NULL);
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ char *haystack = line;
+
+ if (strstr(haystack, "workdir=/tmp/w"))
+ layers_found[0] = true;
+ if (strstr(haystack, "upperdir=/tmp/u"))
+ layers_found[1] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l1"))
+ layers_found[2] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l2"))
+ layers_found[3] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l3"))
+ layers_found[4] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l4"))
+ layers_found[5] = true;
+ if (strstr(haystack, "datadir+=/tmp/d1"))
+ layers_found[6] = true;
+ if (strstr(haystack, "datadir+=/tmp/d2"))
+ layers_found[7] = true;
+ if (strstr(haystack, "datadir+=/tmp/d3"))
+ layers_found[8] = true;
+ }
+ free(line);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(layers_found[i], true);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore
new file mode 100644
index 000000000000..973363ad66a2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+statmount_test_ns
+/*_test
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
new file mode 100644
index 000000000000..8e354fe99b44
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
+
+include ../../lib.mk
+
+$(OUTPUT)/statmount_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c
new file mode 100644
index 000000000000..8bc82f38c42f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "statmount.h"
+#include "kselftest_harness.h"
+
+#ifndef LISTMOUNT_REVERSE
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+#endif
+
+#define LISTMNT_BUFFER 10
+
+/* Check that all mount ids are in increasing order. */
+TEST(listmount_forward)
+{
+ uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
+
+ for (;;) {
+ ssize_t nr_mounts;
+
+ nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
+ list, LISTMNT_BUFFER, 0);
+ ASSERT_GE(nr_mounts, 0);
+ if (nr_mounts == 0)
+ break;
+
+ for (size_t cur = 0; cur < nr_mounts; cur++) {
+ if (cur < nr_mounts - 1)
+ ASSERT_LT(list[cur], list[cur + 1]);
+ last_mnt_id = list[cur];
+ }
+ }
+}
+
+/* Check that all mount ids are in decreasing order. */
+TEST(listmount_backward)
+{
+ uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
+
+ for (;;) {
+ ssize_t nr_mounts;
+
+ nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
+ list, LISTMNT_BUFFER, LISTMOUNT_REVERSE);
+ ASSERT_GE(nr_mounts, 0);
+ if (nr_mounts == 0)
+ break;
+
+ for (size_t cur = 0; cur < nr_mounts; cur++) {
+ if (cur < nr_mounts - 1)
+ ASSERT_GT(list[cur], list[cur + 1]);
+ last_mnt_id = list[cur];
+ }
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
new file mode 100644
index 000000000000..99e5ad082fb1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __STATMOUNT_H
+#define __STATMOUNT_H
+
+#include <stdint.h>
+#include <linux/mount.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_statmount
+ #if defined __alpha__
+ #define __NR_statmount 567
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_statmount 4457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_statmount 6457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_statmount 5457
+ #endif
+ #else
+ #define __NR_statmount 457
+ #endif
+#endif
+
+#ifndef __NR_listmount
+ #if defined __alpha__
+ #define __NR_listmount 568
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_listmount 4458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_listmount 6458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_listmount 5458
+ #endif
+ #else
+ #define __NR_listmount 458
+ #endif
+#endif
+
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+ uint64_t last_mnt_id, uint64_t list[], size_t num,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+#endif /* __STATMOUNT_H */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
new file mode 100644
index 000000000000..6e53430423d2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -0,0 +1,702 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stddef.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <linux/stat.h>
+
+#include "statmount.h"
+#include "kselftest.h"
+
+static const char *const known_fs[] = {
+ "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs",
+ "autofs", "bcachefs", "bdev", "befs", "bfs", "binder", "binfmt_misc",
+ "bpf", "btrfs", "btrfs_test_fs", "ceph", "cgroup", "cgroup2", "cifs",
+ "coda", "configfs", "cpuset", "cramfs", "cxl", "dax", "debugfs",
+ "devpts", "devtmpfs", "dmabuf", "drm", "ecryptfs", "efivarfs", "efs",
+ "erofs", "exfat", "ext2", "ext3", "ext4", "f2fs", "functionfs",
+ "fuse", "fuseblk", "fusectl", "gadgetfs", "gfs2", "gfs2meta", "hfs",
+ "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem",
+ "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos",
+ "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2",
+ "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc",
+ "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs",
+ "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs",
+ "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs",
+ "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
+ "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
+
+static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
+{
+ size_t bufsize = 1 << 15;
+ struct statmount *buf = NULL, *tmp = alloca(bufsize);
+ int tofree = 0;
+ int ret;
+
+ for (;;) {
+ ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
+ if (ret != -1)
+ break;
+ if (tofree)
+ free(tmp);
+ if (errno != EOVERFLOW)
+ return NULL;
+ bufsize <<= 1;
+ tofree = 1;
+ tmp = malloc(bufsize);
+ if (!tmp)
+ return NULL;
+ }
+ buf = malloc(tmp->size);
+ if (buf)
+ memcpy(buf, tmp, tmp->size);
+ if (tofree)
+ free(tmp);
+
+ return buf;
+}
+
+static void write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1)
+ ksft_exit_fail_msg("opening %s for write: %s\n", path, strerror(errno));
+
+ ret = write(fd, val, len);
+ if (ret == -1)
+ ksft_exit_fail_msg("writing to %s: %s\n", path, strerror(errno));
+ if (ret != len)
+ ksft_exit_fail_msg("short write to %s\n", path);
+
+ ret = close(fd);
+ if (ret == -1)
+ ksft_exit_fail_msg("closing %s\n", path);
+}
+
+static uint64_t get_mnt_id(const char *name, const char *path, uint64_t mask)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, mask, &sx);
+ if (ret == -1)
+ ksft_exit_fail_msg("retrieving %s mount ID for %s: %s\n",
+ mask & STATX_MNT_ID_UNIQUE ? "unique" : "old",
+ name, strerror(errno));
+ if (!(sx.stx_mask & mask))
+ ksft_exit_fail_msg("no %s mount ID available for %s\n",
+ mask & STATX_MNT_ID_UNIQUE ? "unique" : "old",
+ name);
+
+ return sx.stx_mnt_id;
+}
+
+
+static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
+static int orig_root;
+static uint64_t root_id, parent_id;
+static uint32_t old_root_id, old_parent_id;
+static FILE *f_mountinfo;
+
+static void cleanup_namespace(void)
+{
+ int ret;
+
+ ret = fchdir(orig_root);
+ if (ret == -1)
+ ksft_perror("fchdir to original root");
+
+ ret = chroot(".");
+ if (ret == -1)
+ ksft_perror("chroot to original root");
+
+ umount2(root_mntpoint, MNT_DETACH);
+ rmdir(root_mntpoint);
+}
+
+static void setup_namespace(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret == -1)
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+
+ sprintf(buf, "0 %d 1", uid);
+ write_file("/proc/self/uid_map", buf);
+ write_file("/proc/self/setgroups", "deny");
+ sprintf(buf, "0 %d 1", gid);
+ write_file("/proc/self/gid_map", buf);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!f_mountinfo)
+ ksft_exit_fail_msg("failed to open mountinfo: %s\n",
+ strerror(errno));
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret == -1)
+ ksft_exit_fail_msg("making mount tree private: %s\n",
+ strerror(errno));
+
+ if (!mkdtemp(root_mntpoint))
+ ksft_exit_fail_msg("creating temporary directory %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ old_parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID);
+ parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID_UNIQUE);
+
+ orig_root = open("/", O_PATH);
+ if (orig_root == -1)
+ ksft_exit_fail_msg("opening root directory: %s",
+ strerror(errno));
+
+ atexit(cleanup_namespace);
+
+ ret = mount(root_mntpoint, root_mntpoint, NULL, MS_BIND, NULL);
+ if (ret == -1)
+ ksft_exit_fail_msg("mounting temp root %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ ret = chroot(root_mntpoint);
+ if (ret == -1)
+ ksft_exit_fail_msg("chroot to temp root %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ ret = chdir("/");
+ if (ret == -1)
+ ksft_exit_fail_msg("chdir to root: %s\n", strerror(errno));
+
+ old_root_id = get_mnt_id("root", "/", STATX_MNT_ID);
+ root_id = get_mnt_id("root", "/", STATX_MNT_ID_UNIQUE);
+}
+
+static int setup_mount_tree(int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_REC|MS_SHARED, NULL);
+ if (ret == -1) {
+ ksft_test_result_fail("making mount tree shared: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ if (ret == -1) {
+ ksft_test_result_fail("mounting submount %s: %s\n",
+ root_mntpoint, strerror(errno));
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void test_listmount_empty_root(void)
+{
+ ssize_t res;
+ const unsigned int size = 32;
+ uint64_t list[size];
+
+ res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
+ if (res == -1) {
+ ksft_test_result_fail("listmount: %s\n", strerror(errno));
+ return;
+ }
+ if (res != 1) {
+ ksft_test_result_fail("listmount result is %zi != 1\n", res);
+ return;
+ }
+
+ if (list[0] != root_id) {
+ ksft_test_result_fail("listmount ID doesn't match 0x%llx != 0x%llx\n",
+ (unsigned long long) list[0],
+ (unsigned long long) root_id);
+ return;
+ }
+
+ ksft_test_result_pass("listmount empty root\n");
+}
+
+static void test_statmount_zero_mask(void)
+{
+ struct statmount sm;
+ int ret;
+
+ ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount zero mask: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != 0) {
+ ksft_test_result_fail("unexpected mask: 0x%llx != 0x0\n",
+ (unsigned long long) sm.mask);
+ return;
+ }
+
+ ksft_test_result_pass("statmount zero mask\n");
+}
+
+static void test_statmount_mnt_basic(void)
+{
+ struct statmount sm;
+ int ret;
+ uint64_t mask = STATMOUNT_MNT_BASIC;
+
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount mnt basic: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != mask) {
+ ksft_test_result_skip("statmount mnt basic unavailable\n");
+ return;
+ }
+
+ if (sm.mnt_id != root_id) {
+ ksft_test_result_fail("unexpected root ID: 0x%llx != 0x%llx\n",
+ (unsigned long long) sm.mnt_id,
+ (unsigned long long) root_id);
+ return;
+ }
+
+ if (sm.mnt_id_old != old_root_id) {
+ ksft_test_result_fail("unexpected old root ID: %u != %u\n",
+ sm.mnt_id_old, old_root_id);
+ return;
+ }
+
+ if (sm.mnt_parent_id != parent_id) {
+ ksft_test_result_fail("unexpected parent ID: 0x%llx != 0x%llx\n",
+ (unsigned long long) sm.mnt_parent_id,
+ (unsigned long long) parent_id);
+ return;
+ }
+
+ if (sm.mnt_parent_id_old != old_parent_id) {
+ ksft_test_result_fail("unexpected old parent ID: %u != %u\n",
+ sm.mnt_parent_id_old, old_parent_id);
+ return;
+ }
+
+ if (sm.mnt_propagation != MS_PRIVATE) {
+ ksft_test_result_fail("unexpected propagation: 0x%llx\n",
+ (unsigned long long) sm.mnt_propagation);
+ return;
+ }
+
+ ksft_test_result_pass("statmount mnt basic\n");
+}
+
+
+static void test_statmount_sb_basic(void)
+{
+ struct statmount sm;
+ int ret;
+ uint64_t mask = STATMOUNT_SB_BASIC;
+ struct statx sx;
+ struct statfs sf;
+
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount sb basic: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != mask) {
+ ksft_test_result_skip("statmount sb basic unavailable\n");
+ return;
+ }
+
+ ret = statx(AT_FDCWD, "/", 0, 0, &sx);
+ if (ret == -1) {
+ ksft_test_result_fail("stat root failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (sm.sb_dev_major != sx.stx_dev_major ||
+ sm.sb_dev_minor != sx.stx_dev_minor) {
+ ksft_test_result_fail("unexpected sb dev %u:%u != %u:%u\n",
+ sm.sb_dev_major, sm.sb_dev_minor,
+ sx.stx_dev_major, sx.stx_dev_minor);
+ return;
+ }
+
+ ret = statfs("/", &sf);
+ if (ret == -1) {
+ ksft_test_result_fail("statfs root failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (sm.sb_magic != sf.f_type) {
+ ksft_test_result_fail("unexpected sb magic: 0x%llx != 0x%lx\n",
+ (unsigned long long) sm.sb_magic,
+ sf.f_type);
+ return;
+ }
+
+ ksft_test_result_pass("statmount sb basic\n");
+}
+
+static void test_statmount_mnt_point(void)
+{
+ struct statmount *sm;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mount point: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n");
+ return;
+ }
+ if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
+ ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
+ sm->str + sm->mnt_point);
+ goto out;
+ }
+ ksft_test_result_pass("statmount mount point\n");
+out:
+ free(sm);
+}
+
+static void test_statmount_mnt_root(void)
+{
+ struct statmount *sm;
+ const char *mnt_root, *last_dir, *last_root;
+
+ last_dir = strrchr(root_mntpoint, '/');
+ assert(last_dir);
+ last_dir++;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mount root: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n");
+ return;
+ }
+ mnt_root = sm->str + sm->mnt_root;
+ last_root = strrchr(mnt_root, '/');
+ if (last_root)
+ last_root++;
+ else
+ last_root = mnt_root;
+
+ if (strcmp(last_dir, last_root) != 0) {
+ ksft_test_result_fail("unexpected mount root last component: '%s' != '%s'\n",
+ last_root, last_dir);
+ goto out;
+ }
+ ksft_test_result_pass("statmount mount root\n");
+out:
+ free(sm);
+}
+
+static void test_statmount_fs_type(void)
+{
+ struct statmount *sm;
+ const char *fs_type;
+ const char *const *s;
+
+ sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount fs type: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (!(sm->mask & STATMOUNT_FS_TYPE)) {
+ ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n");
+ return;
+ }
+ fs_type = sm->str + sm->fs_type;
+ for (s = known_fs; s != NULL; s++) {
+ if (strcmp(fs_type, *s) == 0)
+ break;
+ }
+ if (!s)
+ ksft_print_msg("unknown filesystem type: %s\n", fs_type);
+
+ ksft_test_result_pass("statmount fs type\n");
+ free(sm);
+}
+
+static void test_statmount_mnt_opts(void)
+{
+ struct statmount *sm;
+ const char *statmount_opts;
+ char *line = NULL;
+ size_t len = 0;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mnt opts: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (!(sm->mask & STATMOUNT_MNT_BASIC)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n");
+ return;
+ }
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ int i;
+ char *p, *p2;
+ unsigned int old_mnt_id;
+
+ old_mnt_id = atoi(line);
+ if (old_mnt_id != sm->mnt_id_old)
+ continue;
+
+ for (p = line, i = 0; p && i < 5; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+
+ p2 = strchr(p + 1, ' ');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+ p = strchr(p2 + 1, '-');
+ if (!p)
+ continue;
+ for (p++, i = 0; p && i < 2; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+ p++;
+
+ /* skip generic superblock options */
+ if (strncmp(p, "ro", 2) == 0)
+ p += 2;
+ else if (strncmp(p, "rw", 2) == 0)
+ p += 2;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "sync", 4) == 0)
+ p += 4;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "dirsync", 7) == 0)
+ p += 7;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "lazytime", 8) == 0)
+ p += 8;
+ if (*p == ',')
+ p++;
+ p2 = strrchr(p, '\n');
+ if (p2)
+ *p2 = '\0';
+
+ if (sm->mask & STATMOUNT_MNT_OPTS)
+ statmount_opts = sm->str + sm->mnt_opts;
+ else
+ statmount_opts = "";
+ if (strcmp(statmount_opts, p) != 0)
+ ksft_test_result_fail(
+ "unexpected mount options: '%s' != '%s'\n",
+ statmount_opts, p);
+ else
+ ksft_test_result_pass("statmount mount options\n");
+ free(sm);
+ free(line);
+ return;
+ }
+
+ ksft_test_result_fail("didnt't find mount entry\n");
+ free(sm);
+ free(line);
+}
+
+static void test_statmount_string(uint64_t mask, size_t off, const char *name)
+{
+ struct statmount *sm;
+ size_t len, shortsize, exactsize;
+ uint32_t start, i;
+ int ret;
+
+ sm = statmount_alloc(root_id, mask, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount %s: %s\n", name,
+ strerror(errno));
+ goto out;
+ }
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto out;
+ }
+ if (sm->mask != mask) {
+ ksft_test_result_skip("statmount %s unavailable\n", name);
+ goto out;
+ }
+ len = sm->size - sizeof(*sm);
+ start = ((uint32_t *) sm)[off];
+
+ for (i = start;; i++) {
+ if (i >= len) {
+ ksft_test_result_fail("string out of bounds\n");
+ goto out;
+ }
+ if (!sm->str[i])
+ break;
+ }
+ exactsize = sm->size;
+ shortsize = sizeof(*sm) + i;
+
+ ret = statmount(root_id, 0, mask, sm, exactsize, 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount exact size: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ errno = 0;
+ ret = statmount(root_id, 0, mask, sm, shortsize, 0);
+ if (ret != -1 || errno != EOVERFLOW) {
+ ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ksft_test_result_pass("statmount string %s\n", name);
+out:
+ free(sm);
+}
+
+static void test_listmount_tree(void)
+{
+ ssize_t res;
+ const unsigned int log2_num = 4;
+ const unsigned int step = 3;
+ const unsigned int size = (1 << log2_num) + step + 1;
+ size_t num, expect = 1 << log2_num;
+ uint64_t list[size];
+ uint64_t list2[size];
+ size_t i;
+
+
+ res = setup_mount_tree(log2_num);
+ if (res == -1)
+ return;
+
+ num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
+ if (res == -1) {
+ ksft_test_result_fail("listmount: %s\n", strerror(errno));
+ return;
+ }
+ if (num != expect) {
+ ksft_test_result_fail("listmount result is %zi != %zi\n",
+ res, expect);
+ return;
+ }
+
+ for (i = 0; i < size - step;) {
+ res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
+ if (res == -1)
+ ksft_test_result_fail("short listmount: %s\n",
+ strerror(errno));
+ i += res;
+ if (res < step)
+ break;
+ }
+ if (i != num) {
+ ksft_test_result_fail("different number of entries: %zu != %zu\n",
+ i, num);
+ return;
+ }
+ for (i = 0; i < num; i++) {
+ if (list2[i] != list[i]) {
+ ksft_test_result_fail("different value for entry %zu: 0x%llx != 0x%llx\n",
+ i,
+ (unsigned long long) list2[i],
+ (unsigned long long) list[i]);
+ }
+ }
+
+ ksft_test_result_pass("listmount tree\n");
+}
+
+#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t))
+
+int main(void)
+{
+ int ret;
+ uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
+ STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
+ STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
+
+ ksft_print_header();
+
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ setup_namespace();
+
+ ksft_set_plan(15);
+ test_listmount_empty_root();
+ test_statmount_zero_mask();
+ test_statmount_mnt_basic();
+ test_statmount_sb_basic();
+ test_statmount_mnt_root();
+ test_statmount_mnt_point();
+ test_statmount_fs_type();
+ test_statmount_mnt_opts();
+ test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
+ test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
+ test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
+ test_statmount_string(all_mask, str_off(mnt_root), "mount root & all");
+ test_statmount_string(all_mask, str_off(mnt_point), "mount point & all");
+ test_statmount_string(all_mask, str_off(fs_type), "fs type & all");
+
+ test_listmount_tree();
+
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
new file mode 100644
index 000000000000..d56d4103182f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <linux/nsfs.h>
+#include <linux/stat.h>
+
+#include "statmount.h"
+#include "../utils.h"
+#include "kselftest.h"
+
+#define NSID_PASS 0
+#define NSID_FAIL 1
+#define NSID_SKIP 2
+#define NSID_ERROR 3
+
+static void handle_result(int ret, const char *testname)
+{
+ if (ret == NSID_PASS)
+ ksft_test_result_pass("%s\n", testname);
+ else if (ret == NSID_FAIL)
+ ksft_test_result_fail("%s\n", testname);
+ else if (ret == NSID_ERROR)
+ ksft_exit_fail_msg("%s\n", testname);
+ else
+ ksft_test_result_skip("%s\n", testname);
+}
+
+static inline int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
+ return -1;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
+ return -1;
+ }
+
+ ret = WEXITSTATUS(status);
+ return ret;
+}
+
+static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
+{
+ int fd = open(mnt_ns, O_RDONLY);
+
+ if (fd < 0) {
+ ksft_print_msg("failed to open for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ sleep(60);
+ return NSID_ERROR;
+ }
+
+ if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
+ ksft_print_msg("failed to get the nsid for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ return NSID_ERROR;
+ }
+ close(fd);
+ return NSID_PASS;
+}
+
+static int setup_namespace(void)
+{
+ if (setup_userns() != 0)
+ return NSID_ERROR;
+
+ return NSID_PASS;
+}
+
+static int _test_statmount_mnt_ns_id(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ uint64_t root_id;
+ int ret;
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ root_id = get_unique_mnt_id("/");
+ if (!root_id)
+ return NSID_ERROR;
+
+ ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ return NSID_FAIL;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+
+ return NSID_PASS;
+}
+
+static void test_statmount_mnt_ns_id(void)
+{
+ pid_t pid;
+ int ret;
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ /* We're the original pid, wait for the result. */
+ if (pid != 0) {
+ ret = wait_for_pid(pid);
+ handle_result(ret, "test statmount ns id");
+ return;
+ }
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id();
+ exit(ret);
+}
+
+static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
+{
+ uint64_t list[256];
+ uint64_t mnt_ns_id;
+ uint64_t nr_mounts;
+ char buf[256];
+ int ret;
+
+ /* Get the mount ns id for our child. */
+ snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
+ ret = get_mnt_ns_id(buf, &mnt_ns_id);
+
+ nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (nr_mounts != child_nr_mounts) {
+ ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
+ child_nr_mounts);
+ return NSID_FAIL;
+ }
+
+ /* Validate that all of our entries match our mnt_ns_id. */
+ for (int i = 0; i < nr_mounts; i++) {
+ struct statmount sm;
+
+ ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ sizeof(sm), 0);
+ if (ret < 0) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+ }
+
+ return NSID_PASS;
+}
+
+static void test_listmount_ns(void)
+{
+ uint64_t nr_mounts;
+ char pval;
+ int child_ready_pipe[2];
+ int parent_ready_pipe[2];
+ pid_t pid;
+ int ret, child_ret;
+
+ if (pipe(child_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the child pipe: %s\n",
+ strerror(errno));
+ if (pipe(parent_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
+ strerror(errno));
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ if (pid == 0) {
+ char cval;
+ uint64_t list[256];
+
+ close(child_ready_pipe[0]);
+ close(parent_ready_pipe[1]);
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+
+ nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ exit(NSID_FAIL);
+ }
+
+ /*
+ * Tell our parent how many mounts we have, and then wait for it
+ * to tell us we're done.
+ */
+ if (write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+ if (read(parent_ready_pipe[0], &cval, sizeof(cval)) != sizeof(cval))
+ ret = NSID_ERROR;
+ exit(NSID_PASS);
+ }
+
+ close(child_ready_pipe[1]);
+ close(parent_ready_pipe[0]);
+
+ /* Wait until the child has created everything. */
+ if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+
+ ret = validate_external_listmount(pid, nr_mounts);
+
+ if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
+ ret = NSID_ERROR;
+
+ child_ret = wait_for_pid(pid);
+ if (child_ret != NSID_PASS)
+ ret = child_ret;
+ handle_result(ret, "test listmount ns id");
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ ksft_set_plan(2);
+ test_statmount_mnt_ns_id();
+ test_listmount_ns();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
new file mode 100644
index 000000000000..c9dd5412b37b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/fsuid.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+#include <sys/mount.h>
+
+#include "kselftest.h"
+#include "wrappers.h"
+#include "utils.h"
+
+#define MAX_USERNS_LEVEL 32
+
+#define syserror(format, ...) \
+ ({ \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ (-errno); \
+ })
+
+#define syserror_set(__ret__, format, ...) \
+ ({ \
+ typeof(__ret__) __internal_ret__ = (__ret__); \
+ errno = labs(__ret__); \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ __internal_ret__; \
+ })
+
+#define STRLITERALLEN(x) (sizeof(""x"") - 1)
+
+#define INTTYPE_TO_STRLEN(type) \
+ (2 + (sizeof(type) <= 1 \
+ ? 3 \
+ : sizeof(type) <= 2 \
+ ? 5 \
+ : sizeof(type) <= 4 \
+ ? 10 \
+ : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
+
+#define list_for_each(__iterator, __list) \
+ for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
+
+typedef enum idmap_type_t {
+ ID_TYPE_UID,
+ ID_TYPE_GID
+} idmap_type_t;
+
+struct id_map {
+ idmap_type_t map_type;
+ __u32 nsid;
+ __u32 hostid;
+ __u32 range;
+};
+
+struct list {
+ void *elem;
+ struct list *next;
+ struct list *prev;
+};
+
+struct userns_hierarchy {
+ int fd_userns;
+ int fd_event;
+ unsigned int level;
+ struct list id_map;
+};
+
+static inline void list_init(struct list *list)
+{
+ list->elem = NULL;
+ list->next = list->prev = list;
+}
+
+static inline int list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+static inline void __list_add(struct list *new, struct list *prev, struct list *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add_tail(struct list *head, struct list *list)
+{
+ __list_add(list, head->prev, head);
+}
+
+static inline void list_del(struct list *list)
+{
+ struct list *next, *prev;
+
+ next = list->next;
+ prev = list->prev;
+ next->prev = prev;
+ prev->next = next;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ for (;;)
+ pause();
+ _exit(0);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
+{
+ int fd = -EBADF, setgroups_fd = -EBADF;
+ int fret = -1;
+ int ret;
+ char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/setgroups") + 1];
+
+ if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+ ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (setgroups_fd < 0 && errno != ENOENT) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ if (setgroups_fd >= 0) {
+ ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+ if (ret != STRLITERALLEN("deny\n")) {
+ syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+ goto out;
+ }
+ }
+ }
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ ret = write_nointr(fd, buf, buf_size);
+ if (ret != buf_size) {
+ syserror("Failed to write %cid mapping to \"%s\"",
+ map_type == ID_TYPE_UID ? 'u' : 'g', path);
+ goto out;
+ }
+
+ fret = 0;
+out:
+ close(fd);
+ close(setgroups_fd);
+
+ return fret;
+}
+
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
+{
+ int fill, left;
+ char mapbuf[4096] = {};
+ bool had_entry = false;
+ idmap_type_t map_type, u_or_g;
+
+ if (list_empty(idmap))
+ return 0;
+
+ for (map_type = ID_TYPE_UID, u_or_g = 'u';
+ map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+ char *pos = mapbuf;
+ int ret;
+ struct list *iterator;
+
+
+ list_for_each(iterator, idmap) {
+ struct id_map *map = iterator->elem;
+ if (map->map_type != map_type)
+ continue;
+
+ had_entry = true;
+
+ left = 4096 - (pos - mapbuf);
+ fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+ /*
+ * The kernel only takes <= 4k for writes to
+ * /proc/<pid>/{g,u}id_map
+ */
+ if (fill <= 0 || fill >= left)
+ return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
+
+ pos += fill;
+ }
+ if (!had_entry)
+ continue;
+
+ ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
+ if (ret < 0)
+ return syserror("Failed to write mapping: %s", mapbuf);
+
+ memset(mapbuf, 0, sizeof(mapbuf));
+ }
+
+ return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list *idmap)
+{
+ int ret;
+ pid_t pid;
+ char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/ns/user") + 1];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids_from_idmap(idmap, pid);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
+ if (ret < 0 || (size_t)ret >= sizeof(path_ns))
+ ret = -EIO;
+ else
+ ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+ (void)kill(pid, SIGKILL);
+ (void)wait_for_pid(pid);
+ return ret;
+}
+
+int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ struct list head, uid_mapl, gid_mapl;
+ struct id_map uid_map = {
+ .map_type = ID_TYPE_UID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+ struct id_map gid_map = {
+ .map_type = ID_TYPE_GID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+
+ list_init(&head);
+ uid_mapl.elem = &uid_map;
+ gid_mapl.elem = &gid_map;
+ list_add_tail(&head, &uid_mapl);
+ list_add_tail(&head, &gid_mapl);
+
+ return get_userns_fd_from_idmap(&head);
+}
+
+bool switch_ids(uid_t uid, gid_t gid)
+{
+ if (setgroups(0, NULL))
+ return syserror("failure: setgroups");
+
+ if (setresgid(gid, gid, gid))
+ return syserror("failure: setresgid");
+
+ if (setresuid(uid, uid, uid))
+ return syserror("failure: setresuid");
+
+ /* Ensure we can access proc files from processes we can ptrace. */
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
+ return syserror("failure: make dumpable");
+
+ return true;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h);
+
+static int userns_fd_cb(void *data)
+{
+ struct userns_hierarchy *h = data;
+ char c;
+ int ret;
+
+ ret = read_nointr(h->fd_event, &c, 1);
+ if (ret < 0)
+ return syserror("failure: read from socketpair");
+
+ /* Only switch ids if someone actually wrote a mapping for us. */
+ if (c == '1') {
+ if (!switch_ids(0, 0))
+ return syserror("failure: switch ids to 0");
+ }
+
+ ret = write_nointr(h->fd_event, "1", 1);
+ if (ret < 0)
+ return syserror("failure: write to socketpair");
+
+ ret = create_userns_hierarchy(++h);
+ if (ret < 0)
+ return syserror("failure: userns level %d", h->level);
+
+ return 0;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h)
+{
+ int fret = -1;
+ char c;
+ int fd_socket[2];
+ int fd_userns = -EBADF, ret = -1;
+ ssize_t bytes;
+ pid_t pid;
+ char path[256];
+
+ if (h->level == MAX_USERNS_LEVEL)
+ return 0;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
+ if (ret < 0)
+ return syserror("failure: create socketpair");
+
+ /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
+ h->fd_event = fd_socket[1];
+ pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
+ if (pid < 0) {
+ syserror("failure: userns level %d", h->level);
+ goto out_close;
+ }
+
+ ret = map_ids_from_idmap(&h->id_map, pid);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ if (!list_empty(&h->id_map))
+ bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
+ else
+ bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: write to socketpair");
+ goto out_wait;
+ }
+
+ /* Wait for child to set*id() and become dumpable. */
+ bytes = read_nointr(fd_socket[0], &c, 1);
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: read from socketpair");
+ goto out_wait;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_userns = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd_userns < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: open userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ fret = 0;
+
+out_wait:
+ if (!wait_for_pid(pid) && !fret) {
+ h->fd_userns = fd_userns;
+ fd_userns = -EBADF;
+ }
+
+out_close:
+ if (fd_userns >= 0)
+ close(fd_userns);
+ close(fd_socket[0]);
+ close(fd_socket[1]);
+ return fret;
+}
+
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return -1;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int setup_userns(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ if (ret) {
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret) {
+ ksft_print_msg("making mount tree private: %s\n", strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+/* caps_down - lower all effective caps */
+int caps_down(void)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_clear_flag(caps, CAP_EFFECTIVE);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+/* cap_down - lower an effective cap */
+int cap_down(cap_value_t down)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ cap_value_t cap = down;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+uint64_t get_unique_mnt_id(const char *path)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return 0;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return 0;
+ }
+
+ return sx.stx_mnt_id;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
new file mode 100644
index 000000000000..70f7ccc607f4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IDMAP_UTILS_H
+#define __IDMAP_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
+ unsigned long range);
+
+extern int caps_down(void);
+extern int cap_down(cap_value_t down);
+
+extern bool switch_ids(uid_t uid, gid_t gid);
+extern int setup_userns(void);
+
+static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
+{
+ if (setns(fd, CLONE_NEWUSER))
+ return false;
+
+ if (!switch_ids(uid, gid))
+ return false;
+
+ if (drop_caps && !caps_down())
+ return false;
+
+ return true;
+}
+
+extern uint64_t get_unique_mnt_id(const char *path);
+
+#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/filesystems/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h
new file mode 100644
index 000000000000..420ae4f908cf
--- /dev/null
+++ b/tools/testing/selftests/filesystems/wrappers.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+#ifndef __SELFTEST_OVERLAYFS_WRAPPERS_H__
+#define __SELFTEST_OVERLAYFS_WRAPPERS_H__
+
+#define _GNU_SOURCE
+
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+
+#ifndef STATX_MNT_ID_UNIQUE
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#endif
+
+static inline int sys_fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key,
+ const char *value, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+
+static inline int sys_fsmount(int fd, unsigned int flags,
+ unsigned int attr_flags)
+{
+ return syscall(__NR_fsmount, fd, flags, attr_flags);
+}
+
+static inline int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return syscall(__NR_mount, src, tgt, fst, flags, data);
+}
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#endif
+
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
+static inline int sys_move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
+ to_pathname, flags);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+#endif
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 40211cd8f0e6..7992969deaa2 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall \
-O2
TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
TEST_GEN_FILES := fw_namespace
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
index bf634dda0720..6e402519b117 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -3,3 +3,4 @@ CONFIG_FW_LOADER=y
CONFIG_FW_LOADER_USER_HELPER=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_FW_UPLOAD=y
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index c2a2a100114b..1a99aea0549e 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -11,6 +11,9 @@ TEST_REQS_FW_SET_CUSTOM_PATH="yes"
TEST_DIR=$(dirname $0)
source $TEST_DIR/fw_lib.sh
+RUN_XZ="xz -C crc32 --lzma2=dict=2MiB"
+RUN_ZSTD="zstd -q"
+
check_mods
check_setup
verify_reqs
@@ -211,7 +214,7 @@ read_firmwares()
else
fwfile="$FW"
fi
- if [ "$1" = "xzonly" ]; then
+ if [ "$1" = "componly" ]; then
fwfile="${fwfile}-orig"
fi
for i in $(seq 0 3); do
@@ -235,7 +238,7 @@ read_partial_firmwares()
fwfile="${FW}"
fi
- if [ "$1" = "xzonly" ]; then
+ if [ "$1" = "componly" ]; then
fwfile="${fwfile}-orig"
fi
@@ -409,10 +412,8 @@ test_request_firmware_nowait_custom()
config_unset_uevent
RANDOM_FILE_PATH=$(setup_random_file)
RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
- if [ "$2" = "both" ]; then
- xz -9 -C crc32 -k $RANDOM_FILE_PATH
- elif [ "$2" = "xzonly" ]; then
- xz -9 -C crc32 $RANDOM_FILE_PATH
+ if [ -n "$2" -a "$2" != "normal" ]; then
+ compress_"$2"_"$COMPRESS_FORMAT" $RANDOM_FILE_PATH
fi
config_set_name $RANDOM_FILE
config_trigger_async
@@ -435,6 +436,32 @@ test_request_partial_firmware_into_buf()
echo "OK"
}
+do_tests ()
+{
+ mode="$1"
+ suffix="$2"
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware_direct$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom$suffix $i $mode
+ done
+}
+
# Only continue if batched request triggers are present on the
# test-firmware driver
test_config_present
@@ -442,25 +469,7 @@ test_config_present
# test with the file present
echo
echo "Testing with the file present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i normal
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i normal
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i normal
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i normal
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i normal
-done
+do_tests normal
# Partial loads cannot use fallback, so do not repeat tests.
test_request_partial_firmware_into_buf 0 10
@@ -472,25 +481,7 @@ test_request_partial_firmware_into_buf 2 10
# a hung task, which would require a hard reset.
echo
echo "Testing with the file missing..."
-for i in $(seq 1 5); do
- test_batched_request_firmware_nofile $i
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf_nofile $i
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct_nofile $i
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent_nofile $i
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom_nofile $i
-done
+do_tests nofile _nofile
# Partial loads cannot use fallback, so do not repeat tests.
test_request_partial_firmware_into_buf_nofile 0 10
@@ -498,55 +489,58 @@ test_request_partial_firmware_into_buf_nofile 0 5
test_request_partial_firmware_into_buf_nofile 1 6
test_request_partial_firmware_into_buf_nofile 2 10
-test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
+test_request_firmware_compressed ()
+{
+ export COMPRESS_FORMAT="$1"
-# test with both files present
-xz -9 -C crc32 -k $FW
-config_set_name $NAME
-echo
-echo "Testing with both plain and xz files present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i both
-done
+ # test with both files present
+ compress_both_"$COMPRESS_FORMAT" $FW
+ compress_both_"$COMPRESS_FORMAT" $FW_INTO_BUF
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i both
-done
+ config_set_name $NAME
+ echo
+ echo "Testing with both plain and $COMPRESS_FORMAT files present..."
+ do_tests both
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i both
-done
+ # test with only compressed file present
+ mv "$FW" "${FW}-orig"
+ mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i both
-done
+ config_set_name $NAME
+ echo
+ echo "Testing with only $COMPRESS_FORMAT file present..."
+ do_tests componly
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i both
-done
+ mv "${FW}-orig" "$FW"
+ mv "${FW_INTO_BUF}-orig" "$FW_INTO_BUF"
+}
-# test with only xz file present
-mv "$FW" "${FW}-orig"
-echo
-echo "Testing with only xz file present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i xzonly
-done
+compress_both_XZ ()
+{
+ $RUN_XZ -k "$@"
+}
+
+compress_componly_XZ ()
+{
+ $RUN_XZ "$@"
+}
+
+compress_both_ZSTD ()
+{
+ $RUN_ZSTD -k "$@"
+}
+
+compress_componly_ZSTD ()
+{
+ $RUN_ZSTD --rm "$@"
+}
+
+if test "$HAS_FW_LOADER_COMPRESS_XZ" = "yes"; then
+ test_request_firmware_compressed XZ
+fi
+
+if test "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes"; then
+ test_request_firmware_compressed ZSTD
+fi
exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 5b8c0fedee76..7bffd67800bf 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -62,7 +62,9 @@ check_setup()
{
HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
- HAS_FW_LOADER_COMPRESS="$(kconfig_has CONFIG_FW_LOADER_COMPRESS=y)"
+ HAS_FW_LOADER_COMPRESS_XZ="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_XZ=y)"
+ HAS_FW_LOADER_COMPRESS_ZSTD="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_ZSTD=y)"
+ HAS_FW_UPLOAD="$(kconfig_has CONFIG_FW_UPLOAD=y)"
PROC_FW_IGNORE_SYSFS_FALLBACK="0"
PROC_FW_FORCE_SYSFS_FALLBACK="0"
@@ -98,9 +100,14 @@ check_setup()
OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
- if [ "$HAS_FW_LOADER_COMPRESS" = "yes" ]; then
+ if [ "$HAS_FW_LOADER_COMPRESS_XZ" = "yes" ]; then
if ! which xz 2> /dev/null > /dev/null; then
- HAS_FW_LOADER_COMPRESS=""
+ HAS_FW_LOADER_COMPRESS_XZ=""
+ fi
+ fi
+ if [ "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes" ]; then
+ if ! which zstd 2> /dev/null > /dev/null; then
+ HAS_FW_LOADER_COMPRESS_ZSTD=""
fi
fi
}
@@ -113,6 +120,12 @@ verify_reqs()
exit 0
fi
fi
+ if [ "$TEST_REQS_FW_UPLOAD" = "yes" ]; then
+ if [ ! "$HAS_FW_UPLOAD" = "yes" ]; then
+ echo "firmware upload disabled so ignoring test"
+ exit 0
+ fi
+ fi
}
setup_tmp_file()
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 5ebc1aec7923..04757dc7e546 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -17,10 +17,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#ifndef CLONE_NEWNS
-# define CLONE_NEWNS 0x00020000
-#endif
-
static char *fw_path = NULL;
static void die(char *fmt, ...)
@@ -95,7 +91,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_
}
if (block_fw_in_parent_ns)
umount("/lib/firmware");
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
if (unshare(CLONE_NEWNS) != 0) {
@@ -129,7 +125,8 @@ int main(int argc, char **argv)
die("mounting tmpfs to /lib/firmware failed\n");
sys_path = argv[1];
- asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+ if (asprintf(&fw_path, "/lib/firmware/%s", fw_name) < 0)
+ die("error: failed to build full fw_path\n");
setup_fw(fw_path);
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 777377078d5e..f6d95a2d5124 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -22,6 +22,10 @@ run_tests()
proc_set_force_sysfs_fallback $1
proc_set_ignore_sysfs_fallback $2
$TEST_DIR/fw_fallback.sh
+
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_upload.sh
}
run_test_config_0001()
diff --git a/tools/testing/selftests/firmware/fw_upload.sh b/tools/testing/selftests/firmware/fw_upload.sh
new file mode 100755
index 000000000000..c7a6f06c9adb
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_upload.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates the user-initiated fw upload mechanism of the firmware
+# loader. It verifies that one or more firmware devices can be created
+# for a device driver. It also verifies the data transfer, the
+# cancellation support, and the error flows.
+set -e
+
+TEST_REQS_FW_UPLOAD="yes"
+TEST_DIR=$(dirname $0)
+
+progress_states="preparing transferring programming"
+errors="hw-error
+ timeout
+ device-busy
+ invalid-file-size
+ read-write-error
+ flash-wearout"
+error_abort="user-abort"
+fwname1=fw1
+fwname2=fw2
+fwname3=fw3
+
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+
+trap "upload_finish" EXIT
+
+upload_finish() {
+ local fwdevs="$fwname1 $fwname2 $fwname3"
+
+ for name in $fwdevs; do
+ if [ -e "$DIR/$name" ]; then
+ echo -n "$name" > "$DIR"/upload_unregister
+ fi
+ done
+}
+
+upload_fw() {
+ local name="$1"
+ local file="$2"
+
+ echo 1 > "$DIR"/"$name"/loading
+ cat "$file" > "$DIR"/"$name"/data
+ echo 0 > "$DIR"/"$name"/loading
+}
+
+verify_fw() {
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" > "$DIR"/config_upload_name
+ if ! cmp "$file" "$DIR"/upload_read > /dev/null 2>&1; then
+ echo "$0: firmware compare for $name did not match" >&2
+ exit 1
+ fi
+
+ echo "$0: firmware upload for $name works" >&2
+ return 0
+}
+
+inject_error() {
+ local name="$1"
+ local status="$2"
+ local error="$3"
+
+ echo 1 > "$DIR"/"$name"/loading
+ echo -n "inject":"$status":"$error" > "$DIR"/"$name"/data
+ echo 0 > "$DIR"/"$name"/loading
+}
+
+await_status() {
+ local name="$1"
+ local expected="$2"
+ local status
+ local i
+
+ let i=0
+ while [ $i -lt 50 ]; do
+ status=$(cat "$DIR"/"$name"/status)
+ if [ "$status" = "$expected" ]; then
+ return 0;
+ fi
+ sleep 1e-03
+ let i=$i+1
+ done
+
+ echo "$0: Invalid status: Expected $expected, Actual $status" >&2
+ return 1;
+}
+
+await_idle() {
+ local name="$1"
+
+ await_status "$name" "idle"
+ return $?
+}
+
+expect_error() {
+ local name="$1"
+ local expected="$2"
+ local error=$(cat "$DIR"/"$name"/error)
+
+ if [ "$error" != "$expected" ]; then
+ echo "Invalid error: Expected $expected, Actual $error" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+random_firmware() {
+ local bs="$1"
+ local count="$2"
+ local file=$(mktemp -p /tmp uploadfwXXX.bin)
+
+ dd if=/dev/urandom of="$file" bs="$bs" count="$count" > /dev/null 2>&1
+ echo "$file"
+}
+
+test_upload_cancel() {
+ local name="$1"
+ local status
+
+ for status in $progress_states; do
+ inject_error $name $status $error_abort
+ if ! await_status $name $status; then
+ exit 1
+ fi
+
+ echo 1 > "$DIR"/"$name"/cancel
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name "$status":"$error_abort"; then
+ exit 1
+ fi
+ done
+
+ echo "$0: firmware upload cancellation works"
+ return 0
+}
+
+test_error_handling() {
+ local name=$1
+ local status
+ local error
+
+ for status in $progress_states; do
+ for error in $errors; do
+ inject_error $name $status $error
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name "$status":"$error"; then
+ exit 1
+ fi
+
+ done
+ done
+ echo "$0: firmware upload error handling works"
+}
+
+test_fw_too_big() {
+ local name=$1
+ local fw_too_big=`random_firmware 512 5`
+ local expected="preparing:invalid-file-size"
+
+ upload_fw $name $fw_too_big
+ rm -f $fw_too_big
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name $expected; then
+ exit 1
+ fi
+
+ echo "$0: oversized firmware error handling works"
+}
+
+echo -n "$fwname1" > "$DIR"/upload_register
+echo -n "$fwname2" > "$DIR"/upload_register
+echo -n "$fwname3" > "$DIR"/upload_register
+
+test_upload_cancel $fwname1
+test_error_handling $fwname1
+test_fw_too_big $fwname1
+
+fw_file1=`random_firmware 512 4`
+fw_file2=`random_firmware 512 3`
+fw_file3=`random_firmware 512 2`
+
+upload_fw $fwname1 $fw_file1
+upload_fw $fwname2 $fw_file2
+upload_fw $fwname3 $fw_file3
+
+verify_fw ${fwname1} ${fw_file1}
+verify_fw ${fwname2} ${fw_file2}
+verify_fw ${fwname3} ${fw_file3}
+
+echo -n "$fwname1" > "$DIR"/upload_unregister
+echo -n "$fwname2" > "$DIR"/upload_unregister
+echo -n "$fwname3" > "$DIR"/upload_unregister
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
index 2659417cb2c7..4d7fcb828850 100644
--- a/tools/testing/selftests/ftrace/.gitignore
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
logs
+poll
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index d6e106fbce11..7c12263f8260 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,8 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
all:
-TEST_PROGS := ftracetest
+TEST_PROGS_EXTENDED := ftracetest
+TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
+TEST_GEN_FILES := poll
+
include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index e59d985eeff0..544de0db5f58 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -1,16 +1,29 @@
-CONFIG_KPROBES=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_EPROBE_EVENTS=y
+CONFIG_FPROBE=y
+CONFIG_FPROBE_EVENTS=y
CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_PROFILER=y
-CONFIG_TRACER_SNAPSHOT=y
-CONFIG_STACK_TRACER=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_SCHED_TRACER=y
-CONFIG_PREEMPT_TRACER=y
CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_PREEMPT_TRACER=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_TRACE_EVENTS=m
CONFIG_SAMPLE_TRACE_PRINTK=m
-CONFIG_KALLSYMS_ALL=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 8ec1922e974e..3230bd54dba8 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -13,6 +13,7 @@ echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
echo " Options:"
echo " -h|--help Show help message"
echo " -k|--keep Keep passed test logs"
+echo " -K|--ktap Output in KTAP format"
echo " -v|--verbose Increase verbosity of test messages"
echo " -vv Alias of -v -v (Show all results in stdout)"
echo " -vvv Alias of -v -v -v (Show all commands immediately)"
@@ -21,6 +22,7 @@ echo " --fail-unresolved Treat UNRESOLVED as a failure"
echo " -d|--debug Debug mode (trace all shell commands)"
echo " -l|--logdir <dir> Save logs on the <dir>"
echo " If <dir> is -, all logs output in console only"
+echo " --rv Run RV selftests instead of ftrace ones"
exit $1
}
@@ -30,6 +32,9 @@ err_ret=1
# kselftest skip code is 4
err_skip=4
+# umount required
+UMOUNT_DIR=""
+
# cgroup RT scheduling prevents chrt commands from succeeding, which
# induces failures in test wakeup tests. Disable for the duration of
# the tests.
@@ -44,6 +49,9 @@ setup() {
cleanup() {
echo $sched_rt_runtime_orig > $sched_rt_runtime
+ if [ -n "${UMOUNT_DIR}" ]; then
+ umount ${UMOUNT_DIR} ||:
+ fi
}
errexit() { # message
@@ -85,6 +93,10 @@ parse_opts() { # opts
KEEP_LOG=1
shift 1
;;
+ --ktap|-K)
+ KTAP=1
+ shift 1
+ ;;
--verbose|-v|-vv|-vvv)
if [ $VERBOSE -eq -1 ]; then
usage "--console can not use with --verbose"
@@ -119,8 +131,13 @@ parse_opts() { # opts
;;
--logdir|-l)
LOG_DIR=$2
+ LINK_PTR=
shift 2
;;
+ --rv)
+ RV_TEST=1
+ shift 1
+ ;;
*.tc)
if [ -f "$1" ]; then
OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
@@ -140,9 +157,13 @@ parse_opts() { # opts
;;
esac
done
- if [ ! -z "$OPT_TEST_CASES" ]; then
+ if [ -n "$OPT_TEST_CASES" ]; then
TEST_CASES=$OPT_TEST_CASES
fi
+ if [ -n "$OPT_TEST_DIR" -a -f "$OPT_TEST_DIR"/test.d/functions ]; then
+ TOP_DIR=$OPT_TEST_DIR
+ TEST_DIR=$TOP_DIR/test.d
+ fi
}
# Parameters
@@ -155,11 +176,13 @@ if [ -z "$TRACING_DIR" ]; then
mount -t tracefs nodev /sys/kernel/tracing ||
errexit "Failed to mount /sys/kernel/tracing"
TRACING_DIR="/sys/kernel/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
# If debugfs exists, then so does /sys/kernel/debug
elif [ -d "/sys/kernel/debug" ]; then
mount -t debugfs nodev /sys/kernel/debug ||
errexit "Failed to mount /sys/kernel/debug"
TRACING_DIR="/sys/kernel/debug/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
else
err_ret=$err_skip
errexit "debugfs and tracefs are not configured in this kernel"
@@ -176,21 +199,30 @@ fi
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
-LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
KEEP_LOG=0
+KTAP=0
DEBUG=0
VERBOSE=0
UNSUPPORTED_RESULT=0
UNRESOLVED_RESULT=0
STOP_FAILURE=0
+RV_TEST=0
# Parse command-line options
parse_opts $*
+LOG_TOP_DIR=$TOP_DIR/logs
+LOG_DATE=`date +%Y%m%d-%H%M%S`
+LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
+LINK_PTR=$LOG_TOP_DIR/latest
+
[ $DEBUG -ne 0 ] && set -x
-# Verify parameters
-if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
- errexit "No ftrace directory found"
+if [ $RV_TEST -ne 0 ]; then
+ TRACING_DIR=$TRACING_DIR/rv
+ if [ ! -d "$TRACING_DIR" ]; then
+ err_ret=$err_skip
+ errexit "rv is not configured in this kernel"
+ fi
fi
# Preparing logs
@@ -201,6 +233,10 @@ else
LOG_FILE=$LOG_DIR/ftracetest.log
mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
date > $LOG_FILE
+ if [ "x-$LINK_PTR" != "x-" ]; then
+ unlink $LINK_PTR
+ ln -fs $LOG_DATE $LINK_PTR
+ fi
fi
# Define text colors
@@ -229,11 +265,17 @@ prlog() { # messages
newline=
shift
fi
- printf "$*$newline"
+ [ "$KTAP" != "1" ] && printf "$*$newline"
[ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE
}
catlog() { #file
- cat $1
+ if [ "${KTAP}" = "1" ]; then
+ cat $1 | while read line ; do
+ echo "# $line"
+ done
+ else
+ cat $1
+ fi
[ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE
}
prlog "=== Ftrace unit tests ==="
@@ -260,11 +302,11 @@ TOTAL_RESULT=0
INSTANCE=
CASENO=0
+CASENAME=
testcase() { # testfile
CASENO=$((CASENO+1))
- desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
- prlog -n "[$CASENO]$INSTANCE$desc"
+ CASENAME=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
}
checkreq() { # testfile
@@ -277,40 +319,68 @@ test_on_instance() { # testfile
grep -q "^#[ \t]*flags:.*instance" $1
}
+ktaptest() { # result comment
+ if [ "$KTAP" != "1" ]; then
+ return
+ fi
+
+ local result=
+ if [ "$1" = "1" ]; then
+ result="ok"
+ else
+ result="not ok"
+ fi
+ shift
+
+ local comment=$*
+ if [ "$comment" != "" ]; then
+ comment="# $comment"
+ fi
+
+ echo $result $CASENO $INSTANCE$CASENAME $comment
+}
+
eval_result() { # sigval
case $1 in
$PASS)
prlog " [${color_green}PASS${color_reset}]"
+ ktaptest 1
PASSED_CASES="$PASSED_CASES $CASENO"
return 0
;;
$FAIL)
prlog " [${color_red}FAIL${color_reset}]"
+ ktaptest 0
FAILED_CASES="$FAILED_CASES $CASENO"
return 1 # this is a bug.
;;
$UNRESOLVED)
prlog " [${color_blue}UNRESOLVED${color_reset}]"
+ ktaptest 0 UNRESOLVED
UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
return $UNRESOLVED_RESULT # depends on use case
;;
$UNTESTED)
prlog " [${color_blue}UNTESTED${color_reset}]"
+ ktaptest 1 SKIP
UNTESTED_CASES="$UNTESTED_CASES $CASENO"
return 0
;;
$UNSUPPORTED)
prlog " [${color_blue}UNSUPPORTED${color_reset}]"
+ ktaptest 1 SKIP
UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
return $UNSUPPORTED_RESULT # depends on use case
;;
$XFAIL)
prlog " [${color_green}XFAIL${color_reset}]"
+ ktaptest 1 XFAIL
XFAILED_CASES="$XFAILED_CASES $CASENO"
return 0
;;
*)
prlog " [${color_blue}UNDEFINED${color_reset}]"
+ ktaptest 0 error
UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
return 1 # this must be a test bug
;;
@@ -363,7 +433,7 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
__run_test() { # testfile
# setup PID and PPID, $$ is not updated.
(cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x;
- checkreq $1; initialize_ftrace; . $1)
+ checkreq $1; initialize_system; . $1)
[ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
}
@@ -371,6 +441,7 @@ __run_test() { # testfile
run_test() { # testfile
local testname=`basename $1`
testcase $1
+ prlog -n "[$CASENO]$INSTANCE$CASENAME"
if [ ! -z "$LOG_FILE" ] ; then
local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX`
else
@@ -405,6 +476,17 @@ run_test() { # testfile
# load in the helper functions
. $TEST_DIR/functions
+if [ "$KTAP" = "1" ]; then
+ echo "TAP version 13"
+
+ casecount=`echo $TEST_CASES | wc -w`
+ for t in $TEST_CASES; do
+ test_on_instance $t || continue
+ casecount=$((casecount+1))
+ done
+ echo "1..${casecount}"
+fi
+
# Main loop
for t in $TEST_CASES; do
run_test $t
@@ -428,7 +510,7 @@ for t in $TEST_CASES; do
exit 1
fi
done
-(cd $TRACING_DIR; initialize_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_system) # for cleanup
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
@@ -439,6 +521,17 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+if [ "$KTAP" = "1" ]; then
+ echo -n "# Totals:"
+ echo -n " pass:"`echo $PASSED_CASES | wc -w`
+ echo -n " fail:"`echo $FAILED_CASES | wc -w`
+ echo -n " xfail:"`echo $XFAILED_CASES | wc -w`
+ echo -n " xpass:0"
+ echo -n " skip:"`echo $UNTESTED_CASES $UNSUPPORTED_CASES | wc -w`
+ echo -n " error:"`echo $UNRESOLVED_CASES $UNDEFINED_CASES | wc -w`
+ echo
+fi
+
cleanup
# if no error, return 0
diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap
new file mode 100755
index 000000000000..14e62ef3f3b9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest-ktap
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner
+#
+# Copyright (C) Arm Ltd., 2023
+
+./ftracetest -K -v
diff --git a/tools/testing/selftests/ftrace/poll.c b/tools/testing/selftests/ftrace/poll.c
new file mode 100644
index 000000000000..53258f7515e7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/poll.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple poll on a file.
+ *
+ * Copyright (c) 2024 Google LLC.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define BUFSIZE 4096
+
+/*
+ * Usage:
+ * poll [-I|-P] [-t timeout] FILE
+ */
+int main(int argc, char *argv[])
+{
+ struct pollfd pfd = {.events = POLLIN};
+ char buf[BUFSIZE];
+ int timeout = -1;
+ int ret, opt;
+
+ while ((opt = getopt(argc, argv, "IPt:")) != -1) {
+ switch (opt) {
+ case 'I':
+ pfd.events = POLLIN;
+ break;
+ case 'P':
+ pfd.events = POLLPRI;
+ break;
+ case 't':
+ timeout = atoi(optarg);
+ break;
+ default:
+ fprintf(stderr, "Usage: %s [-I|-P] [-t timeout] FILE\n",
+ argv[0]);
+ return -1;
+ }
+ }
+ if (optind >= argc) {
+ fprintf(stderr, "Error: Polling file is not specified\n");
+ return -1;
+ }
+
+ pfd.fd = open(argv[optind], O_RDONLY);
+ if (pfd.fd < 0) {
+ fprintf(stderr, "failed to open %s", argv[optind]);
+ perror("open");
+ return -1;
+ }
+
+ /* Reset poll by read if POLLIN is specified. */
+ if (pfd.events & POLLIN)
+ do {} while (read(pfd.fd, buf, BUFSIZE) == BUFSIZE);
+
+ ret = poll(&pfd, 1, timeout);
+ if (ret < 0 && errno != EINTR) {
+ perror("poll");
+ return -1;
+ }
+ close(pfd.fd);
+
+ /* If timeout happned (ret == 0), exit code is 1 */
+ if (ret == 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
new file mode 100644
index 000000000000..318939451caf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
@@ -0,0 +1,101 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test tracefs GID mount option
+# requires: "[gid=<gid>]":README
+
+fail() {
+ local msg="$1"
+
+ echo "FAILED: $msg"
+ exit_fail
+}
+
+find_alternate_gid() {
+ local original_gid="$1"
+ tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3
+}
+
+remount_tracefs_with_options() {
+ local mount_point="$1"
+ local options="$2"
+
+ mount -t tracefs -o "remount,$options" nodev "$mount_point"
+
+ setup
+}
+
+unmount_tracefs() {
+ local mount_point="$1"
+
+ # Need to make sure the mount isn't busy so that we can umount it
+ (cd $mount_point; finish_system;)
+
+ cleanup
+}
+
+create_instance() {
+ local mount_point="$1"
+ local instance="$mount_point/instances/$(mktemp -u test-XXXXXX)"
+
+ mkdir "$instance"
+ echo "$instance"
+}
+
+remove_instance() {
+ local instance="$1"
+
+ rmdir "$instance"
+}
+
+check_gid() {
+ local mount_point="$1"
+ local expected_gid="$2"
+
+ echo "Checking permission group ..."
+
+ cd "$mount_point"
+
+ for file in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable"; do
+ local gid=`stat -c "%g" $file`
+ if [ "$gid" -ne "$expected_gid" ]; then
+ cd - # Return to the previous working directory (tracefs root)
+ fail "$(realpath $file): Expected group $expected_gid; Got group $gid"
+ fi
+ done
+
+ cd - # Return to the previous working directory (tracefs root)
+}
+
+test_gid_mount_option() {
+ local mount_point=$(get_mount_point)
+ local mount_options=$(get_mnt_options "$mount_point")
+ local original_group=$(stat -c "%g" .)
+ local other_group=$(find_alternate_gid "$original_group")
+
+ # Set up mount options with new GID for testing
+ local new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"`
+ if [ "$new_options" = "$mount_options" ]; then
+ new_options="$mount_options,gid=$other_group"
+ mount_options="$mount_options,gid=$original_group"
+ fi
+
+ # Unmount existing tracefs instance and mount with new GID
+ unmount_tracefs "$mount_point"
+ remount_tracefs_with_options "$mount_point" "$new_options"
+
+ check_gid "$mount_point" "$other_group"
+
+ # Check that files created after the mount inherit the GID
+ local instance=$(create_instance "$mount_point")
+ check_gid "$instance" "$other_group"
+ remove_instance "$instance"
+
+ # Unmount and remount with the original GID
+ unmount_tracefs "$mount_point"
+ remount_tracefs_with_options "$mount_point" "$mount_options"
+ check_gid "$mount_point" "$original_group"
+}
+
+test_gid_mount_option
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
new file mode 100644
index 000000000000..d44d09a33a74
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
@@ -0,0 +1,95 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Change the ringbuffer sub-buffer size
+# requires: buffer_subbuf_size_kb
+# flags: instance
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_buffer_data_offset() {
+ sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_event_header_size() {
+ type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ total_bits=$((type_len+time_len+array_len))
+ total_bits=$((total_bits+7))
+ echo $((total_bits/8))
+}
+
+get_print_event_buf_offset() {
+ sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
+}
+
+event_header_size=`get_event_header_size`
+print_header_size=`get_print_event_buf_offset`
+
+data_offset=`get_buffer_data_offset`
+
+marker_meta=$((event_header_size+print_header_size))
+
+make_str() {
+ cnt=$1
+ printf -- 'X%.0s' $(seq $cnt)
+}
+
+write_buffer() {
+ size=$1
+
+ str=`make_str $size`
+
+ # clear the buffer
+ echo > trace
+
+ # write the string into the marker
+ echo $str > trace_marker
+
+ echo $str
+}
+
+test_buffer() {
+ size_kb=$1
+ page_size=$((size_kb*1024))
+
+ size=`get_buffer_data_size`
+
+ # the size must be greater than or equal to page_size - data_offset
+ page_size=$((page_size-data_offset))
+ if [ $size -lt $page_size ]; then
+ exit fail
+ fi
+
+ # Now add a little more the meta data overhead will overflow
+
+ str=`write_buffer $size`
+
+ # Make sure the line was broken
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
+
+ if [ "$new_str" = "$str" ]; then
+ exit fail;
+ fi
+
+ # Make sure the entire line can be found
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
+
+ if [ "$new_str" != "$str" ]; then
+ exit fail;
+ fi
+}
+
+ORIG=`cat buffer_subbuf_size_kb`
+
+# Could test bigger sizes than 32K, but then creating the string
+# to write into the ring buffer takes too long
+for a in 4 8 16 32 ; do
+ echo $a > buffer_subbuf_size_kb
+ test_buffer $a
+done
+
+echo $ORIG > buffer_subbuf_size_kb
+
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644
index 000000000000..63b76cf2a360
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
new file mode 100644
index 000000000000..e71cc3ad0bdf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -0,0 +1,122 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test file and directory ownership changes for eventfs
+# requires: "[gid=<gid>]":README
+
+original_group=`stat -c "%g" .`
+original_owner=`stat -c "%u" .`
+
+local mount_point=$(get_mount_point)
+
+mount_options=$(get_mnt_options "$mount_point")
+
+# find another owner and group that is not the original
+other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3`
+other_owner=`tac /etc/passwd | grep -v ":$original_owner:" | head -1 | cut -d: -f3`
+
+# Remove any group ownership already
+new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"`
+
+if [ "$new_options" = "$mount_options" ]; then
+ new_options="$mount_options,gid=$other_group"
+ mount_options="$mount_options,gid=$original_group"
+fi
+
+canary="events/timer events/timer/timer_cancel events/timer/timer_cancel/format"
+
+test() {
+ file=$1
+ test_group=$2
+
+ owner=`stat -c "%u" $file`
+ group=`stat -c "%g" $file`
+
+ echo "testing $file $owner=$original_owner and $group=$test_group"
+ if [ $owner -ne $original_owner ]; then
+ exit_fail
+ fi
+ if [ $group -ne $test_group ]; then
+ exit_fail
+ fi
+
+ # Note, the remount does not update ownership so test going to and from owner
+ echo "test owner $file to $other_owner"
+ chown $other_owner $file
+ owner=`stat -c "%u" $file`
+ if [ $owner -ne $other_owner ]; then
+ exit_fail
+ fi
+
+ chown $original_owner $file
+ owner=`stat -c "%u" $file`
+ if [ $owner -ne $original_owner ]; then
+ exit_fail
+ fi
+
+}
+
+run_tests() {
+ for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events
+ test "events" $original_group
+ for d in "." "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched
+ test "events/sched" $original_group
+ for d in "." "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched/sched_switch
+ test "events/sched/sched_switch" $original_group
+ for d in "." "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched/sched_switch/enable
+ test "events/sched/sched_switch/enable" $original_group
+ for d in "." $canary; do
+ test "$d" $other_group
+ done
+}
+
+# Run the tests twice as leftovers can cause issues
+for loop in 1 2 ; do
+
+ echo "Running iteration $loop"
+
+ mount -o remount,"$new_options" .
+
+ run_tests
+
+ mount -o remount,"$mount_options" .
+
+ for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $original_group
+ done
+
+# check instances as well
+
+ chgrp $other_group instances
+
+ instance="$(mktemp -u test-XXXXXX)"
+
+ mkdir instances/$instance
+
+ cd instances/$instance
+
+ run_tests
+
+ cd ../..
+
+ rmdir instances/$instance
+
+ chgrp $original_group instances
+done
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc
new file mode 100644
index 000000000000..9aa0db2b84fc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic tests on writing to trace_marker
+# requires: trace_marker
+# flags: instance
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_buffer_data_offset() {
+ sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_event_header_size() {
+ type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ total_bits=$((type_len+time_len+array_len))
+ total_bits=$((total_bits+7))
+ echo $((total_bits/8))
+}
+
+get_print_event_buf_offset() {
+ sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
+}
+
+event_header_size=`get_event_header_size`
+print_header_size=`get_print_event_buf_offset`
+
+data_offset=`get_buffer_data_offset`
+
+marker_meta=$((event_header_size+print_header_size))
+
+make_str() {
+ cnt=$1
+ # subtract two for \n\0 as marker adds these
+ cnt=$((cnt-2))
+ printf -- 'X%.0s' $(seq $cnt)
+}
+
+write_buffer() {
+ size=$1
+
+ str=`make_str $size`
+
+ # clear the buffer
+ echo > trace
+
+ # write the string into the marker
+ echo -n $str > trace_marker
+
+ echo $str
+}
+
+test_buffer() {
+
+ size=`get_buffer_data_size`
+ oneline_size=$((size-marker_meta))
+ echo size = $size
+ echo meta size = $marker_meta
+
+ # Now add a little more the meta data overhead will overflow
+
+ str=`write_buffer $size`
+
+ # Make sure the line was broken
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
+
+ if [ "$new_str" = "$str" ]; then
+ exit fail;
+ fi
+
+ # Make sure the entire line can be found
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
+
+ if [ "$new_str" != "$str" ]; then
+ exit fail;
+ fi
+}
+
+test_buffer
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
new file mode 100644
index 000000000000..7daf7292209e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic tests on writing to trace_marker_raw
+# requires: trace_marker_raw
+# flags: instance
+
+is_little_endian() {
+ if lscpu | grep -q 'Little Endian'; then
+ echo 1;
+ else
+ echo 0;
+ fi
+}
+
+little=`is_little_endian`
+
+make_str() {
+ id=$1
+ cnt=$2
+
+ if [ $little -eq 1 ]; then
+ val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+ $(($id & 0xff)) \
+ $((($id >> 8) & 0xff)) \
+ $((($id >> 16) & 0xff)) \
+ $((($id >> 24) & 0xff))`
+ else
+ val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+ $((($id >> 24) & 0xff)) \
+ $((($id >> 16) & 0xff)) \
+ $((($id >> 8) & 0xff)) \
+ $(($id & 0xff))`
+ fi
+
+ data=`printf -- 'X%.0s' $(seq $cnt)`
+
+ printf "${val}${data}"
+}
+
+write_buffer() {
+ id=$1
+ size=$2
+
+ # write the string into the raw marker
+ make_str $id $size > trace_marker_raw
+}
+
+
+test_multiple_writes() {
+
+ # Write a bunch of data where the id is the count of
+ # data to write
+ for i in `seq 1 10` `seq 101 110` `seq 1001 1010`; do
+ write_buffer $i $i
+ done
+
+ # add a little buffer
+ echo stop > trace_marker
+
+ # Check to make sure the number of entries is the id (rounded up by 4)
+ awk '/.*: # [0-9a-f]* / {
+ print;
+ cnt = -1;
+ for (i = 0; i < NF; i++) {
+ # The counter is after the "#" marker
+ if ( $i == "#" ) {
+ i++;
+ cnt = strtonum("0x" $i);
+ num = NF - (i + 1);
+ # The number of items is always rounded up by 4
+ cnt2 = int((cnt + 3) / 4) * 4;
+ if (cnt2 != num) {
+ exit 1;
+ }
+ break;
+ }
+ }
+ }
+ // { if (NR > 30) { exit 0; } } ' trace_pipe;
+}
+
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+test_buffer() {
+
+ # The id must be four bytes, test that 3 bytes fails a write
+ if echo -n abc > ./trace_marker_raw ; then
+ echo "Too small of write expected to fail but did not"
+ exit_fail
+ fi
+
+ size=`get_buffer_data_size`
+ echo size = $size
+
+ # Now add a little more than what it can handle
+
+ if write_buffer 0xdeadbeef $size ; then
+ echo "Too big of write expected to fail but did not"
+ exit_fail
+ fi
+}
+
+test_buffer
+test_multiple_writes
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
new file mode 100644
index 000000000000..c0cdad4c400e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
@@ -0,0 +1,78 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove probes with BTF arguments
+# requires: dynamic_events "<argname>":README
+
+KPROBES=
+FPROBES=
+FIELDS=
+
+if grep -qF "p[:[<group>/][<event>]] <place> [<args>]" README ; then
+ KPROBES=yes
+fi
+if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; then
+ FPROBES=yes
+fi
+if grep -qF "<argname>[->field[->field|.field...]]" README ; then
+ FIELDS=yes
+fi
+
+if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then
+ exit_unsupported
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TP=kfree
+TP2=kmem_cache_alloc
+TP3=getname_flags
+TP4=sched_wakeup
+
+if [ "$FPROBES" ] ; then
+echo "f:fpevent $TP object" >> dynamic_events
+echo "t:tpevent $TP ptr" >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg1' >> dynamic_events
+grep -q "fpevent.*object=object" dynamic_events
+
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg*' >> dynamic_events
+echo "t:tpevent $TP "'$arg*' >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+! grep -q "tpevent.*_data" dynamic_events
+fi
+
+echo > dynamic_events
+
+if [ "$FIELDS" -a "$FPROBES" ] ; then
+echo "t:tpevent ${TP2} obj_size=s->object_size" >> dynamic_events
+echo "f:fpevent ${TP3}%return path=\$retval->name:string" >> dynamic_events
+echo "t:tpevent2 ${TP4} p->se.group_node.next->prev" >> dynamic_events
+
+grep -q "tpevent .*obj_size=s->object_size" dynamic_events
+grep -q "fpevent.*path=\$retval->name:string" dynamic_events
+grep -q 'tpevent2 .*p->se.group_node.next->prev' dynamic_events
+
+echo > dynamic_events
+fi
+
+if [ "$KPROBES" ] ; then
+echo "p:kpevent $TP object" >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+
+echo > dynamic_events
+
+echo "p:kpevent $TP "'$arg*' >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
new file mode 100644
index 000000000000..c300eb020262
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
@@ -0,0 +1,97 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove eprobe events
+# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+EVENT="sys_enter_openat"
+FIELD="filename"
+EPROBE="eprobe_open"
+OPTIONS="file=+0(\$filename):ustring"
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."' -e '(fault)' ` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# test various ways to remove the probe (already tested with just event name)
+
+# With group name
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# Finally make sure what is in the dynamic_events file clears it too
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events`
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+if grep -q "e\[:\[<group>/]\[<event>]]" README; then
+ echo "e:mygroup/ $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+ test -d events/mygroup
+ echo "-:mygroup/" >> dynamic_events
+ ! test -d events/mygroup
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
new file mode 100644
index 000000000000..47067a5e3cb0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+PLACE2="kmem_cache_free"
+PLACE3="schedule_timeout"
+
+# Some functions may have BPF programs attached, therefore
+# count already enabled_functions before tests start
+ocnt=`cat enabled_functions | wc -l`
+
+echo "f:myevent1 $PLACE" >> dynamic_events
+
+echo "f:myevent2 $PLACE%return" >> dynamic_events
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
+test -d events/fprobes/myevent1
+test -d events/fprobes/myevent2
+
+echo 1 > events/fprobes/myevent1/enable
+# Make sure the event is attached.
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -eq $ocnt ]; then
+ exit_fail
+fi
+
+echo 1 > events/fprobes/myevent2/enable
+cnt2=`cat enabled_functions | wc -l`
+
+echo 1 > events/fprobes/myevent3/enable
+# If the function is different, the attached function should be increased
+grep -q $PLACE2 enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -eq $cnt2 ]; then
+ exit_fail
+fi
+
+echo 0 > events/fprobes/myevent2/enable
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo 0 > events/fprobes/enable
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
+echo "f:myevent4 $PLACE" >> dynamic_events
+
+echo 1 > events/fprobes/myevent4/enable
+# Should only have one enabled
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+echo 0 > events/fprobes/enable
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index 000000000000..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+ echo "f:myevent $PLACE" >> dynamic_events
+ grep -q myevent dynamic_events
+ test -d events/fprobes/myevent
+ echo > dynamic_events
+done
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index b4da41d126d5..13d43f40a6fc 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -23,4 +23,11 @@ grep -q myevent1 dynamic_events
echo > dynamic_events
+if grep -q "p\[:\[<group>/]\[<event>]]" README; then
+ echo "p:mygroup/ $PLACE" >> dynamic_events
+ test -d events/mygroup
+ echo "-:mygroup/" >> dynamic_events
+ ! test -d events/mygroup
+fi
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
new file mode 100644
index 000000000000..f271c4238b72
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+SUBSYSTEM=kmem
+TRACEPOINT1=kmem_cache_alloc
+TRACEPOINT2=kmem_cache_free
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+# auto naming check
+echo "t $TRACEPOINT1" >> dynamic_events
+
+test -d events/tracepoints/$TRACEPOINT1
+
+echo > dynamic_events
+
+# SUBSYSTEM is not supported
+echo "t $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
new file mode 100644
index 000000000000..d319d5ed4226
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events on module
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+ echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m"
+ exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=foo_bar
+TRACEPOINT2=foo_bar_with_cond
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
+
+:;: "Try to put a probe on a tracepoint in non-loaded module" ;:
+rmmod trace-events-sample
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo 1 > events/tracepoints/enable
+
+modprobe trace-events-sample
+
+sleep 2
+
+grep -q "myevent1" trace
+grep -q "myevent2" trace
+
+rmmod trace-events-sample
+trap "" EXIT
+
+echo 0 > events/tracepoints/enable
+echo > dynamic_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
new file mode 100644
index 000000000000..f2048c244526
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove/test uprobe events
+# requires: uprobe_events
+
+if ! which readelf > /dev/null 2>&1 ; then
+ echo "No readelf found. skipped."
+ exit_unresolved
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+REALBIN=`readlink -f /bin/sh`
+ENTRYPOINT=`readelf -h ${REALBIN} | grep Entry | sed -e 's/[^0]*//'`
+
+echo "p:myevent ${REALBIN}:${ENTRYPOINT}" >> uprobe_events
+
+grep -q myevent uprobe_events
+test -d events/uprobes/myevent
+
+echo 1 > events/uprobes/myevent/enable
+echo 'ls' | /bin/sh > /dev/null
+echo 0 > events/uprobes/myevent/enable
+grep -q myevent trace
+
+echo "-:myevent" >> uprobe_events
+! grep -q myevent uprobe_events
+
+echo > uprobe_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
new file mode 100644
index 000000000000..f656bccb1a14
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Checking dynamic events limitations
+# requires: dynamic_events "imm-value":README
+
+# Max arguments limitation
+MAX_ARGS=128
+EXCEED_ARGS=$((MAX_ARGS + 1))
+
+# bash and dash evaluate variables differently.
+# dash will evaluate '\\' every time it is read whereas bash does not.
+#
+# TEST_STRING="$TEST_STRING \\$i"
+# echo $TEST_STRING
+#
+# With i=123
+# On bash, that will print "\123"
+# but on dash, that will print the escape sequence of \123 as the \ will
+# be interpreted again in the echo.
+#
+# Set a variable "bs" to save a double backslash, then echo that
+# to "ts" to see if $ts changed or not. If it changed, it's dash,
+# if not, it's bash, and then bs can equal a single backslash.
+bs='\\'
+ts=`echo $bs`
+if [ "$ts" = '\\' ]; then
+ # this is bash
+ bs='\'
+fi
+
+check_max_args() { # event_header
+ TEST_STRING=$1
+ # Acceptable
+ for i in `seq 1 $MAX_ARGS`; do
+ TEST_STRING="$TEST_STRING $bs$i"
+ done
+ echo "$TEST_STRING" >> dynamic_events
+ echo > dynamic_events
+ # Error
+ TEST_STRING="$TEST_STRING \\$EXCEED_ARGS"
+ ! echo "$TEST_STRING" >> dynamic_events
+ return 0
+}
+
+# Kprobe max args limitation
+if grep -q "kprobe_events" README; then
+ check_max_args "p vfs_read"
+fi
+
+# Fprobe max args limitation
+if grep -q "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README; then
+ check_max_args "f vfs_read"
+fi
+
+# Tprobe max args limitation
+if grep -q "t[:[<group>/][<event>]] <tracepoint> [<args>]" README; then
+ check_max_args "t kfree"
+fi
+
+# Uprobe max args limitation
+if grep -q "uprobe_events" README; then
+ check_max_args "p /bin/sh:10"
+fi
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
new file mode 100644
index 000000000000..c1f1cafa30f3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - enable/disable tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT=sched_switch
+ENABLEFILE=events/tracepoints/myprobe/enable
+
+:;: "Add tracepoint event on $TRACEPOINT" ;:
+
+echo "t:myprobe ${TRACEPOINT}" >> dynamic_events
+
+:;: "Check enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+:;: "Repeat enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
new file mode 100644
index 000000000000..4f5e8c665156
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Event probe event parser error log check
+# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README error_log
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'event_probe' "$1" 'dynamic_events'
+}
+
+check_error 'e ^a.' # NO_EVENT_INFO
+check_error 'e ^.b' # NO_EVENT_INFO
+check_error 'e ^a.b' # BAD_ATTACH_EVENT
+check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG
+check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME
+check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG
+
+check_error 'e:^foo.1/bar syscalls/sys_enter_openat' # BAD_GROUP_NAME
+check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME
+check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG
+check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
+
+check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
+check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG
+
+if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then
+ check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
new file mode 100644
index 000000000000..c6a9d2466a71
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event VFS type argument
+# requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+
+: "Test argument %pd with name for fprobe"
+echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pd without name for fprobe"
+echo 'f:testprobe dput $arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD with name for fprobe"
+echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD without name for fprobe"
+echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
new file mode 100644
index 000000000000..1e251ce2998e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Function return probe entry argument access
+# requires: dynamic_events 'f[:[<group>/][<event>]] <func-name>':README 'kernel return probes support:':README
+
+echo 'f:tests/myevent1 vfs_open arg=$arg1' >> dynamic_events
+echo 'f:tests/myevent2 vfs_open%return arg=$arg1' >> dynamic_events
+
+echo 1 > events/tests/enable
+
+echo > trace
+cat trace > /dev/null
+
+streq() {
+ test $1 = $2
+}
+
+streq `grep -A 1 -m 1 myevent1 trace | sed -r 's/^.*(arg=.*)/\1/' `
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
new file mode 100644
index 000000000000..fee479295e2f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -0,0 +1,122 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event parser error log check
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+case `uname -m` in
+x86_64|i[3456]86)
+ REG=%ax ;;
+aarch64)
+ REG=%x0 ;;
+*)
+ REG=%r0 ;;
+esac
+
+check_error 'f^100 vfs_read' # BAD_MAXACT
+
+check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent)
+check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR
+check_error 'f:^/bar vfs_read' # NO_GROUP_NAME
+check_error 'f:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
+
+check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME
+check_error 'f:^ vfs_read' # NO_EVENT_NAME
+check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
+check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+check_error 't kmem^/kfree' # BAD_TP_NAME
+
+check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM
+
+check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM
+
+if !grep -q 'kernel return probes support:' README; then
+check_error 'f vfs_read $retval ^$arg1' # BAD_VAR
+fi
+check_error 'f vfs_read ^$none_var' # BAD_VAR
+check_error 'f vfs_read ^'$REG # BAD_VAR
+
+check_error 'f vfs_read ^@12345678abcde' # BAD_MEM_ADDR
+check_error 'f vfs_read ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 'f vfs_read arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 'f vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 'f vfs_read ^+0@0)' # DEREF_NEED_BRACE
+check_error 'f vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 'f vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 'f vfs_read +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 'f vfs_read ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 'f vfs_read +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 'f vfs_read +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 'f vfs_read +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 'f vfs_read +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 'f vfs_read +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 'f vfs_read @11:^unknown_type' # BAD_TYPE
+check_error 'f vfs_read $stack0:^string' # BAD_STRING
+check_error 'f vfs_read @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 'f vfs_read ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 'f vfs_read ^=@11' # NO_ARG_NAME
+check_error 'f vfs_read ^var.1=@11' # BAD_ARG_NAME
+check_error 'f vfs_read var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 'f vfs_read ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 'f vfs_read arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "f:fprobes/testevent $FUNCTION_FORK" > dynamic_events
+check_error '^f:fprobes/testevent do_exit%return' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "f:fprobes/testevent $FUNCTION_FORK abcd=\\1" > dynamic_events
+check_error "f:fprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^f:fprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE
+fi
+
+# %return suffix errors
+check_error 'f vfs_read^%hoge' # BAD_ADDR_SUFFIX
+
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'f vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'f vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'f vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+if !grep -q 'kernel return probes support:' README; then
+check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS
+fi
+check_error 'f vfs_read ^hoge' # NO_BTFARG
+check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'f kfree%return ^$retval' # NO_RETVAL
+
+if grep -qF "<argname>[->field[->field|.field...]]" README ; then
+check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT
+check_error 'f vfs_read file->^foo' # NO_BTF_FIELD
+check_error 'f vfs_read file^-.foo' # BAD_HYPHEN
+check_error 'f vfs_read ^file:string' # BAD_TYPE4STR
+fi
+
+else
+check_error 'f vfs_read ^$arg*' # NOSUP_BTFARG
+check_error 't kfree ^$arg*' # NOSUP_BTFARG
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
new file mode 100644
index 000000000000..5f72abe6fa79
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - check if duplicate events are caught
+# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat
+
+echo 0 > events/enable
+
+HAVE_KPROBES=0
+
+if [ -f kprobe_events ]; then
+ HAVE_KPROBES=1
+fi
+
+clear_dynamic_events
+
+# first create dynamic events for eprobes and kprobes.
+
+echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test eprobe for same eprobe, existing kprobe and existing event
+! echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+! echo 'e:syscalls/sys_enter_open syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+if [ $HAVE_KPROBES -eq 1 ]; then
+ echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'e:kgroup/kevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test kprobe for same kprobe, existing eprobe and existing event
+ ! echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:egroup/eevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:syscalls/sys_enter_open vfs_open file=+0($arg2)' >> dynamic_events
+
+ echo '-:kgroup/kevent' >> dynamic_events
+fi
+
+echo '-:egroup/eevent' >> dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
new file mode 100644
index 000000000000..ffe8ffef4027
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Tracepoint probe event parser error log check
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+check_error 't^100 kfree' # BAD_MAXACT_TYPE
+
+check_error 't:^/bar kfree' # NO_GROUP_NAME
+check_error 't:^12345678901234567890123456789012345678901234567890123456789012345/bar kfree' # GROUP_TOO_LONG
+
+check_error 't:^foo.1/bar kfree' # BAD_GROUP_NAME
+check_error 't:^ kfree' # NO_EVENT_NAME
+check_error 't:foo/^12345678901234567890123456789012345678901234567890123456789012345 kfree' # EVENT_TOO_LONG
+check_error 't:foo/^bar.1 kfree' # BAD_EVENT_NAME
+
+check_error 't kfree ^$retval' # RETVAL_ON_PROBE
+check_error 't kfree ^$stack10000' # BAD_STACK_NUM
+
+check_error 't kfree ^$arg10000' # BAD_ARG_NUM
+
+check_error 't kfree ^$none_var' # BAD_VAR
+check_error 't kfree ^%rax' # BAD_VAR
+
+check_error 't kfree ^@12345678abcde' # BAD_MEM_ADDR
+check_error 't kfree ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 't kfree arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 't kfree arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 't kfree ^+0@0)' # DEREF_NEED_BRACE
+check_error 't kfree ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 't kfree +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 't kfree +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 't kfree ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 't kfree +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 't kfree +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 't kfree +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 't kfree +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 't kfree +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 't kfree @11:^unknown_type' # BAD_TYPE
+check_error 't kfree $stack0:^string' # BAD_STRING
+check_error 't kfree @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 't kfree ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 't kfree ^=@11' # NO_ARG_NAME
+check_error 't kfree ^var.1=@11' # BAD_ARG_NAME
+check_error 't kfree var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 't kfree ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 't kfree arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "t:tracepoint/testevent kfree" > dynamic_events
+check_error '^f:tracepoint/testevent kfree' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "t:tracepoints/testevent kfree abcd=\\1" > dynamic_events
+check_error "t:tracepoints/testevent kfree ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^t:tracepoints/testevent kfree abcd=\\1" # SAME_PROBE
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-mod.tc b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc
new file mode 100644
index 000000000000..175243cd9ab7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc
@@ -0,0 +1,191 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with module event
+# requires: set_event "Can enable module events via: :mod:":README
+# flags: instance
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+ echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m"
+ exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+# Set events for the module
+echo ":mod:trace-events-sample" > set_event
+
+test_all_enabled() {
+
+ # Check if more than one is enabled
+ grep -q sample-trace:foo_bar set_event
+ grep -q sample-trace:foo_bar_with_cond set_event
+ grep -q sample-trace:foo_bar_with_fn set_event
+
+ # All of them should be enabled. Check via the enable file
+ val=`cat events/sample-trace/enable`
+ if [ $val -ne 1 ]; then
+ exit_fail
+ fi
+}
+
+clear_events() {
+ echo > set_event
+ val=`cat events/enable`
+ if [ "$val" != "0" ]; then
+ exit_fail
+ fi
+ count=`cat set_event | wc -l`
+ if [ $count -ne 0 ]; then
+ exit_fail
+ fi
+}
+
+test_all_enabled
+
+echo clear all events
+echo 0 > events/enable
+
+echo Confirm the events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+ exit_fail
+fi
+
+echo And the set_event file is empty
+
+cnt=`wc -l set_event`
+if [ $cnt -ne 0 ]; then
+ exit_fail
+fi
+
+echo now enable all events
+echo 1 > events/enable
+
+echo Confirm the events are enabled again
+val=`cat events/sample-trace/enable`
+if [ $val -ne 1 ]; then
+ exit_fail
+fi
+
+echo disable just the module events
+echo '!:mod:trace-events-sample' >> set_event
+
+echo Should have mix of events enabled
+val=`cat events/enable`
+if [ "$val" != "X" ]; then
+ exit_fail
+fi
+
+echo Confirm the module events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+ exit_fail
+fi
+
+echo 0 > events/enable
+
+echo now enable the system events
+echo 'sample-trace:mod:trace-events-sample' > set_event
+
+test_all_enabled
+
+echo clear all events
+echo 0 > events/enable
+
+echo Confirm the events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+ exit_fail
+fi
+
+echo Test enabling foo_bar only
+echo 'foo_bar:mod:trace-events-sample' > set_event
+
+grep -q sample-trace:foo_bar set_event
+
+echo make sure nothing is found besides foo_bar
+if grep -q -v sample-trace:foo_bar set_event ; then
+ exit_fail
+fi
+
+echo Append another using the system and event name
+echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event
+
+grep -q sample-trace:foo_bar set_event
+grep -q sample-trace:foo_bar_with_cond set_event
+
+count=`cat set_event | wc -l`
+
+if [ $count -ne 2 ]; then
+ exit_fail
+fi
+
+clear_events
+
+rmmod trace-events-sample
+
+echo ':mod:trace-events-sample' > set_event
+
+echo make sure that the module shows up, and '-' is converted to '_'
+grep -q '\*:\*:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+test_all_enabled
+
+clear_events
+
+rmmod trace-events-sample
+
+echo Enable just the system events
+echo 'sample-trace:mod:trace-events-sample' > set_event
+grep -q 'sample-trace:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+test_all_enabled
+
+clear_events
+
+rmmod trace-events-sample
+
+echo Enable event with just event name
+echo 'foo_bar:mod:trace-events-sample' > set_event
+grep -q 'foo_bar:mod:trace_events_sample' set_event
+
+echo Enable another event with both system and event name
+echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event
+grep -q 'sample-trace:foo_bar_with_cond:mod:trace_events_sample' set_event
+echo Make sure the other event was still there
+grep -q 'foo_bar:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+echo There should be no :mod: cached events
+if grep -q ':mod:' set_event; then
+ exit_fail
+fi
+
+echo two events should be enabled
+count=`cat set_event | wc -l`
+if [ $count -ne 2 ]; then
+ exit_fail
+fi
+
+echo only two events should be enabled
+val=`cat events/sample-trace/enable`
+if [ "$val" != "X" ]; then
+ exit_fail
+fi
+
+val=`cat events/sample-trace/foo_bar/enable`
+if [ "$val" != "1" ]; then
+ exit_fail
+fi
+
+val=`cat events/sample-trace/foo_bar_with_cond/enable`
+if [ "$val" != "1" ]; then
+ exit_fail
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
index e6eb78f0b954..9933ed24f901 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
@@ -57,6 +57,10 @@ enable_events() {
echo 1 > tracing_on
}
+other_task() {
+ sleep .001 || usleep 1 || sleep 1
+}
+
echo 0 > options/event-fork
do_reset
@@ -94,6 +98,9 @@ child=$!
echo "child = $child"
wait $child
+# Be sure some other events will happen for small systems (e.g. 1 core)
+other_task
+
echo 0 > tracing_on
cnt=`count_pid $mypid`
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b1ede6249866..65916bb55dfb 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -14,11 +14,35 @@ fail() { #msg
exit_fail
}
+# As reading trace can last forever, simply look for 3 different
+# events then exit out of reading the file. If there's not 3 different
+# events, then the test has failed.
+check_unique() {
+ cat trace | grep -v '^#' | awk '
+ BEGIN { cnt = 0; }
+ {
+ for (i = 0; i < cnt; i++) {
+ if (event[i] == $5) {
+ break;
+ }
+ }
+ if (i == cnt) {
+ event[cnt++] = $5;
+ if (cnt > 2) {
+ exit;
+ }
+ }
+ }
+ END {
+ printf "%d", cnt;
+ }'
+}
+
echo 'sched:*' > set_event
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +53,7 @@ echo 1 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -40,7 +64,7 @@ echo 0 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -ne 0 ]; then
fail "any of scheduler events should not be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
new file mode 100644
index 000000000000..cfa16aa1f39a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -0,0 +1,109 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event filter function - test event filtering on functions
+# requires: set_event events/kmem/kmem_cache_free/filter
+# flags: instance
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+sample_events() {
+ echo 1 > events/kmem/kmem_cache_free/enable
+ echo 1 > tracing_on
+ ls > /dev/null
+ echo 0 > tracing_on
+ echo 0 > events/kmem/kmem_cache_free/enable
+}
+
+echo 0 > tracing_on
+echo 0 > events/enable
+
+# Clear functions caused by page cache; run sample_events twice
+sample_events
+sample_events
+
+echo "Get the most frequently calling function"
+echo > trace
+sample_events
+
+target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+if [ -z "$target_func" ]; then
+ exit_fail
+fi
+echo > trace
+
+echo "Test event filter function name"
+echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
+sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
+
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+ exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+ exit_fail
+fi
+
+address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
+
+echo "Test event filter function address"
+echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+echo > trace
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
+sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
+
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+ exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+ exit_fail
+fi
+
+# Check strings too
+if [ -f events/syscalls/sys_enter_openat/filter ]; then
+ DIRNAME=`basename $TMPDIR`
+ echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_openat/enable
+ echo 1 > tracing_on
+ ls /bin/sh
+ nocnt=`grep openat trace | wc -l`
+ ls $TMPDIR
+ echo 0 > tracing_on
+ hitcnt=`grep openat trace | wc -l`;
+ echo 0 > events/syscalls/sys_enter_openat/enable
+ if [ $nocnt -gt 0 ]; then
+ exit_fail
+ fi
+ if [ $hitcnt -eq 0 ]; then
+ exit_fail
+ fi
+fi
+
+reset_events_filter
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
new file mode 100644
index 000000000000..b6d6a312ead5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
+
+# Make sure that function graph filtering works
+
+INSTANCE1="instances/test1_$$"
+INSTANCE2="instances/test2_$$"
+
+WD=`pwd`
+
+do_reset() {
+ cd $WD
+ if [ -d $INSTANCE1 ]; then
+ echo nop > $INSTANCE1/current_tracer
+ rmdir $INSTANCE1
+ fi
+ if [ -d $INSTANCE2 ]; then
+ echo nop > $INSTANCE2/current_tracer
+ rmdir $INSTANCE2
+ fi
+}
+
+mkdir $INSTANCE1
+if ! grep -q function_graph $INSTANCE1/available_tracers; then
+ echo "function_graph not allowed with instances"
+ rmdir $INSTANCE1
+ exit_unsupported
+fi
+
+mkdir $INSTANCE2
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+function_count() {
+ search=$1
+ vsearch=$2
+
+ if [ -z "$search" ]; then
+ cat enabled_functions | wc -l
+ elif [ -z "$vsearch" ]; then
+ grep $search enabled_functions | wc -l
+ else
+ grep $search enabled_functions | grep $vsearch| wc -l
+ fi
+}
+
+set_fgraph() {
+ instance=$1
+ filter="$2"
+ notrace="$3"
+
+ echo "$filter" > $instance/set_ftrace_filter
+ echo "$notrace" > $instance/set_ftrace_notrace
+ echo function_graph > $instance/current_tracer
+}
+
+check_functions() {
+ orig_cnt=$1
+ test=$2
+
+ cnt=`function_count $test`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+check_cnt() {
+ orig_cnt=$1
+ search=$2
+ vsearch=$3
+
+ cnt=`function_count $search $vsearch`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+reset_graph() {
+ instance=$1
+ echo nop > $instance/current_tracer
+}
+
+# get any functions that were enabled before the test
+total_cnt=`function_count`
+sched_cnt=`function_count sched`
+lock_cnt=`function_count lock`
+time_cnt=`function_count time`
+clock_cnt=`function_count clock`
+locks_clock_cnt=`function_count locks clock`
+clock_locks_cnt=`function_count clock locks`
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# Make sure "time" isn't listed
+check_functions $time_cnt 'time'
+instance1_cnt=`function_count`
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+instance1_2_cnt=`function_count`
+
+# Turn off the first instance
+reset_graph $INSTANCE1
+
+# The second instance doesn't trace "clock" functions
+check_functions $clock_cnt 'clock'
+instance2_cnt=`function_count`
+
+# Start from a clean slate
+reset_graph $INSTANCE2
+check_functions $total_cnt
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+
+# This should match the last time instance 2 was by itself
+cnt=`function_count`
+if [ $instance2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "clock" functions
+check_functions $clock_cnt 'clock'
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# This should match the last time both instances were enabled
+cnt=`function_count`
+if [ $instance1_2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# Turn off the second instance
+reset_graph $INSTANCE2
+
+# This should match the last time instance 1 was by itself
+cnt=`function_count`
+if [ $instance1_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "time" functions
+check_functions $time_cnt 'time'
+
+# Start from a clean slate
+reset_graph $INSTANCE1
+check_functions $total_cnt
+
+# Enable all functions but those that have "locks"
+set_fgraph $INSTANCE1 '' '*locks*'
+
+# Enable all functions but those that have "clock"
+set_fgraph $INSTANCE2 '' '*clock*'
+
+# If a function has "locks" it should not have "clock"
+check_cnt $locks_clock_cnt locks clock
+
+# If a function has "clock" it should not have "locks"
+check_cnt $clock_locks_cnt clock locks
+
+reset_graph $INSTANCE1
+reset_graph $INSTANCE2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc
new file mode 100644
index 000000000000..ff88f97e41fb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc
@@ -0,0 +1,103 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
+
+# Make sure that function graph filtering works
+
+INSTANCE1="instances/test1_$$"
+INSTANCE2="instances/test2_$$"
+INSTANCE3="instances/test3_$$"
+
+WD=`pwd`
+
+do_reset() {
+ cd $WD
+ if [ -d $INSTANCE1 ]; then
+ echo nop > $INSTANCE1/current_tracer
+ rmdir $INSTANCE1
+ fi
+ if [ -d $INSTANCE2 ]; then
+ echo nop > $INSTANCE2/current_tracer
+ rmdir $INSTANCE2
+ fi
+ if [ -d $INSTANCE3 ]; then
+ echo nop > $INSTANCE3/current_tracer
+ rmdir $INSTANCE3
+ fi
+}
+
+mkdir $INSTANCE1
+if ! grep -q function_graph $INSTANCE1/available_tracers; then
+ echo "function_graph not allowed with instances"
+ rmdir $INSTANCE1
+ exit_unsupported
+fi
+
+mkdir $INSTANCE2
+mkdir $INSTANCE3
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+do_test() {
+ REGEX=$1
+ TEST=$2
+
+ # filter something, schedule is always good
+ if ! echo "$REGEX" > set_ftrace_filter; then
+ fail "can not enable filter $REGEX"
+ fi
+
+ echo > trace
+ echo function_graph > current_tracer
+ enable_tracing
+ sleep 1
+ # search for functions (has "{" or ";" on the line)
+ echo 0 > tracing_on
+ count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep -v "$TEST" | wc -l`
+ echo 1 > tracing_on
+ if [ $count -ne 0 ]; then
+ fail "Graph filtering not working by itself against $TEST?"
+ fi
+
+ # Make sure we did find something
+ echo 0 > tracing_on
+ count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l`
+ echo 1 > tracing_on
+ if [ $count -eq 0 ]; then
+ fail "No traces found with $TEST?"
+ fi
+}
+
+do_test '*sched*' 'sched'
+cd $INSTANCE1
+do_test '*lock*' 'lock'
+cd $WD
+cd $INSTANCE2
+do_test '*rcu*' 'rcu'
+cd $WD
+cd $INSTANCE3
+echo function_graph > current_tracer
+
+sleep 1
+count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No traces found with all tracing?"
+fi
+
+cd $WD
+echo nop > current_tracer
+echo nop > $INSTANCE1/current_tracer
+echo nop > $INSTANCE2/current_tracer
+echo nop > $INSTANCE3/current_tracer
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc
new file mode 100644
index 000000000000..ffff8646733c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiler with function graph tracing
+# requires: function_profile_enabled set_ftrace_filter function_graph:tracer
+
+# The function graph tracer can now be run along side of the function
+# profiler. But there was a bug that caused the combination of the two
+# to crash. It also required the function graph tracer to be started
+# first.
+#
+# This test triggers that bug
+#
+# We need both function_graph and profiling to run this test
+
+fail() { # mesg
+ echo $1
+ exit_fail
+}
+
+echo "Enabling function graph tracer:"
+echo function_graph > current_tracer
+echo "enable profiler"
+
+# Older kernels do not allow function_profile to be enabled with
+# function graph tracer. If the below fails, mark it as unsupported
+echo 1 > function_profile_enabled || exit_unsupported
+
+# Let it run for a bit to make sure nothing explodes
+sleep 1
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
new file mode 100644
index 000000000000..4307d4eef417
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph print function return value
+# requires: options/funcgraph-retval options/funcgraph-retval-hex function_graph:tracer
+
+# Make sure that funcgraph-retval works
+
+fail() { # msg
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# get self PID, can not use $$, because it is PPID
+read PID _ < /proc/self/stat
+
+[ -f set_ftrace_filter ] && echo proc_reg_write > set_ftrace_filter
+[ -f set_ftrace_pid ] && echo ${PID} > set_ftrace_pid
+echo function_graph > current_tracer
+echo 1 > options/funcgraph-retval
+
+set +e
+enable_tracing
+echo > /proc/interrupts
+disable_tracing
+set -e
+
+: "Test printing the error code in signed decimal format"
+echo 0 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep -e '=-5 ' -e '= -5 ' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in signed decimal format"
+fi
+
+: "Test printing the error code in hexadecimal format"
+echo 1 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep 'fffffffb' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in hexadecimal format"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 4b994b6df5ac..ed81eaf2afd6 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$'
ftrace_filter_check '*pin*lock' '.*pin.*lock$'
# filter by start*mid*
-ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*'
# Advanced full-glob matching feature is recently supported.
# Skip the tests if we are sure the kernel does not support it.
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 2f7211254529..8dcce001881d 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -8,12 +8,18 @@
# Also test it on an instance directory
do_function_fork=1
+do_funcgraph_proc=1
if [ ! -f options/function-fork ]; then
do_function_fork=0
echo "no option for function-fork found. Option will not be tested."
fi
+if [ ! -f options/funcgraph-proc ]; then
+ do_funcgraph_proc=0
+ echo "no option for function-fork found. Option will not be tested."
+fi
+
read PID _ < /proc/self/stat
if [ $do_function_fork -eq 1 ]; then
@@ -21,12 +27,19 @@ if [ $do_function_fork -eq 1 ]; then
orig_value=`grep function-fork trace_options`
fi
+if [ $do_funcgraph_proc -eq 1 ]; then
+ orig_value2=`cat options/funcgraph-proc`
+ echo 1 > options/funcgraph-proc
+fi
+
do_reset() {
- if [ $do_function_fork -eq 0 ]; then
- return
+ if [ $do_function_fork -eq 1 ]; then
+ echo $orig_value > trace_options
fi
- echo $orig_value > trace_options
+ if [ $do_funcgraph_proc -eq 1 ]; then
+ echo $orig_value2 > options/funcgraph-proc
+ fi
}
fail() { # msg
@@ -36,13 +49,15 @@ fail() { # msg
}
do_test() {
+ TRACER=$1
+
disable_tracing
echo do_execve* > set_ftrace_filter
echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_pid
- echo function > current_tracer
+ echo $TRACER > current_tracer
if [ $do_function_fork -eq 1 ]; then
# don't allow children to be traced
@@ -82,7 +97,11 @@ do_test() {
fi
}
-do_test
+do_test function
+if grep -s function_graph available_tracers; then
+ do_test function_graph
+fi
+
do_reset
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 3145b0f1835c..2ad7d4b501cc 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -38,11 +38,18 @@ cnt_trace() {
test_event_enabled() {
val=$1
+ check_times=10 # wait for 10 * SLEEP_TIME at most
- e=`cat $EVENT_ENABLE`
- if [ "$e" != $val ]; then
- fail "Expected $val but found $e"
- fi
+ while [ $check_times -ne 0 ]; do
+ e=`cat $EVENT_ENABLE`
+ if [ "$e" = $val ]; then
+ return 0
+ fi
+ sleep $SLEEP_TIME
+ check_times=$((check_times - 1))
+ done
+
+ fail "Expected $val but found $e"
}
run_enable_disable() {
@@ -85,7 +92,7 @@ run_enable_disable() {
echo $check_disable > $EVENT_ENABLE
done
sleep $SLEEP_TIME
- echo " make sure it's still works"
+ echo " make sure it still works"
test_event_enabled $check_enable_star
reset_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
new file mode 100644
index 000000000000..ccfbfde3d942
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: ftrace - function trace across cpu hotplug
+# requires: function:tracer
+
+if ! which nproc ; then
+ nproc() {
+ ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l
+ }
+fi
+
+NP=`nproc`
+
+if [ $NP -eq 1 ] ;then
+ echo "We cannot test cpu hotplug in UP environment"
+ exit_unresolved
+fi
+
+# Find online cpu
+for i in /sys/devices/system/cpu/cpu[1-9]*; do
+ if [ -f $i/online ] && [ "$(cat $i/online)" = "1" ]; then
+ cpu=$i
+ break
+ fi
+done
+
+if [ -z "$cpu" ]; then
+ echo "We cannot test cpu hotplug with a single cpu online"
+ exit_unresolved
+fi
+
+echo 0 > tracing_on
+echo > trace
+
+: "Set $(basename $cpu) offline/online with function tracer enabled"
+echo function > current_tracer
+echo 1 > tracing_on
+(echo 0 > $cpu/online)
+(echo "forked"; sleep 1)
+(echo 1 > $cpu/online)
+echo 0 > tracing_on
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index e96e279e0533..263f6b798c85 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,14 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="do_softirq"
+if grep '^sched_tick\b' available_filter_functions; then
+ FUNC2="sched_tick"
+elif grep '^scheduler_tick\b' available_filter_functions; then
+ FUNC2="scheduler_tick"
+else
+ exit_unresolved
+fi
+
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index a6fac927ee82..e8e718139294 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -83,7 +83,28 @@ clear_synthetic_events() { # reset all current synthetic events
done
}
-initialize_ftrace() { # Reset ftrace to initial-state
+clear_dynamic_events() { # reset all current dynamic events
+ again=1
+ stop=1
+ # loop mulitple times as some events require other to be removed first
+ while [ $again -eq 1 ]; do
+ stop=$((stop+1))
+ # Prevent infinite loops
+ if [ $stop -gt 10 ]; then
+ break;
+ fi
+ again=2
+ grep -v '^#' dynamic_events|
+ while read line; do
+ del=`echo $line | sed -e 's/^.\([^ ]*\).*/-\1/'`
+ if ! echo "$del" >> dynamic_events; then
+ again=1
+ fi
+ done
+ done
+}
+
+initialize_system() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
# no probes, and tracing on.
@@ -93,6 +114,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
reset_events_filter
reset_ftrace_filter
disable_events
+ clear_dynamic_events
[ -f set_event_pid ] && echo > set_event_pid
[ -f set_ftrace_pid ] && echo > set_ftrace_pid
[ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
@@ -102,21 +124,45 @@ initialize_ftrace() { # Reset ftrace to initial-state
[ -f uprobe_events ] && echo > uprobe_events
[ -f synthetic_events ] && echo > synthetic_events
[ -f snapshot ] && echo 0 > snapshot
+
+# Stop tracing while reading the trace file by default, to prevent
+# the test results while checking it and to avoid taking a long time
+# to check the result.
+ [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace
+
clear_trace
enable_tracing
}
+finish_system() {
+ initialize_system
+# And recover it to default.
+ [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
+}
+
check_requires() { # Check required files and tracers
for i in "$@" ; do
+ p=${i%:program}
r=${i%:README}
t=${i%:tracer}
- if [ $t != $i ]; then
+ if [ $p != $i ]; then
+ if ! which $p ; then
+ echo "Required program $p is not found."
+ exit_unresolved
+ fi
+ elif [ $t != $i ]; then
if ! grep -wq $t available_tracers ; then
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
- if ! grep -Fq "$r" README ; then
+ elif [ "$r" != "$i" ]; then
+ # If this is an instance, check the top directory
+ if echo $TRACING_DIR | grep -q "/instances/"; then
+ test="$TRACING_DIR/../.."
+ else
+ test=$TRACING_DIR
+ fi
+ if ! grep -Fq "$r" $test/README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
fi
@@ -153,3 +199,28 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
# " Command: " and "^\n" => 13
test $(expr 13 + $pos) -eq $N
}
+
+# Helper to get the tracefs mount point
+get_mount_point() {
+ local mount_point=`stat -c '%m' .`
+
+ # If stat -c '%m' does not work (e.g. busybox) or failed, try to use the
+ # current working directory (which should be a tracefs) as the mount point.
+ if [ ! -d "$mount_point" ]; then
+ if mount | grep -qw "$PWD"; then
+ mount_point=$PWD
+ else
+ # If PWD doesn't work, that is an environmental problem.
+ exit_unresolved
+ fi
+ fi
+ echo "$mount_point"
+}
+
+# Helper function to retrieve mount options for a given mount point
+get_mnt_options() {
+ local mnt_point="$1"
+ local opts=$(mount | grep -m1 "$mnt_point" | sed -e 's/.*(\(.*\)).*/\1/')
+
+ echo "$opts"
+} \ No newline at end of file
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
index 0eb47fbb3f44..42422e425107 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -39,7 +39,7 @@ instance_read() {
instance_set() {
while :; do
- echo 1 > foo/events/sched/sched_switch
+ echo 1 > foo/events/sched/sched_switch/enable
done 2> /dev/null
}
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
new file mode 100644
index 000000000000..77f4c07cdcb8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -0,0 +1,60 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event char type argument
+# requires: kprobe_events available_filter_functions
+
+case `uname -m` in
+x86_64)
+ ARG1=%di
+;;
+i[3456]86)
+ ARG1=%ax
+;;
+aarch64)
+ ARG1=%x0
+;;
+arm*)
+ ARG1=%r0
+;;
+ppc64*)
+ ARG1=%r3
+;;
+ppc*)
+ ARG1=%r3
+;;
+s390*)
+ ARG1=%r2
+;;
+mips*)
+ ARG1=%r4
+;;
+loongarch*)
+ ARG1=%r4
+;;
+riscv*)
+ ARG1=%a0
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+: "Test get argument (1)"
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test $FUNCTION_FORK" >> kprobe_events
+grep -qe "testprobe.* arg1='t'" trace
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test $FUNCTION_FORK" >> kprobe_events
+grep -qe "testprobe.* arg1='t' arg2={'t','e','s','t'}" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 84285a6f60b0..39001073f7ed 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event string type argument
-# requires: kprobe_events
+# requires: kprobe_events available_filter_functions
case `uname -m` in
x86_64)
@@ -22,20 +22,39 @@ ppc64*)
ppc*)
ARG1=%r3
;;
+s390*)
+ ARG1=%r2
+;;
+mips*)
+ ARG1=%r4
+;;
+loongarch*)
+ ARG1=%r4
+;;
+riscv*)
+ ARG1=%a0
+;;
*)
echo "Please implement other architecture here"
exit_untested
esac
: "Test get argument (1)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 474ca1a9a088..8f1292ad80ff 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -32,6 +32,22 @@ ppc*)
GOODREG=%r3
BADREG=%msr
;;
+s390*)
+ GOODREG=%r2
+ BADREG=%s2
+;;
+mips*)
+ GOODREG=%r4
+ BADREG=%r12
+;;
+loongarch*)
+ GOODREG=%r4
+ BADREG=%r12
+;;
+riscv*)
+ GOODREG=%a0
+ BADREG=%a8
+;;
*)
echo "Please implement other architecture here"
exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index 000000000000..21a54be6894c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events "%pd/%pD":README
+
+: "Test argument %pd with name"
+echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo 'p:testprobe dput $arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 1f6981ef7afa..ba19b81cef39 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -30,7 +30,8 @@ find_dot_func() {
fi
grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do
- if grep -s $f available_filter_functions; then
+ cnt=`grep -s $f available_filter_functions | wc -l`;
+ if [ $cnt -eq 1 ]; then
echo $f
break
fi
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
new file mode 100644
index 000000000000..4f7cc318f331
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register multiple kprobe events in a function
+# requires: kprobe_events
+
+for i in `seq 0 255`; do
+ echo p $FUNCTION_FORK+${i} >> kprobe_events || continue
+done
+
+cat kprobe_events >> $testlog
+
+echo 1 > events/kprobes/enable
+( echo "forked" )
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
new file mode 100644
index 000000000000..bc9514428dba
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test failure of registering kprobe on non unique symbol
+# requires: kprobe_events
+
+SYMBOL='name_show'
+
+# We skip this test on kernel where SYMBOL is unique or does not exist.
+if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then
+ exit_unsupported
+fi
+
+! echo "p:test_non_unique ${SYMBOL}" > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
new file mode 100644
index 000000000000..9f5d99328086
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
@@ -0,0 +1,34 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register/unregister optimized probe
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+;;
+arm*)
+;;
+ppc*)
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_unsupported
+esac
+
+DEFAULT=$(cat /proc/sys/debug/kprobes-optimization)
+echo 1 > /proc/sys/debug/kprobes-optimization
+for i in `seq 0 255`; do
+ echo "p:testprobe $FUNCTION_FORK+${i}" > kprobe_events || continue
+ echo 1 > events/kprobes/enable || continue
+ (echo "forked")
+ PROBE=$(grep $FUNCTION_FORK /sys/kernel/debug/kprobes/list)
+ echo 0 > events/kprobes/enable
+ echo > kprobe_events
+ if echo $PROBE | grep -q OPTIMIZED; then
+ echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+ exit_pass
+ fi
+done
+echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+exit_unresolved
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index fa928b431555..8f1c58f0c239 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -8,7 +8,7 @@ check_error() { # command-with-error-pos-by-^
}
if grep -q 'r\[maxactive\]' README; then
-check_error 'p^100 vfs_read' # MAXACT_NO_KPROBE
+check_error 'p^100 vfs_read' # BAD_MAXACT_TYPE
check_error 'r^1a111 vfs_read' # BAD_MAXACT
check_error 'r^100000 vfs_read' # MAXACT_TOO_BIG
fi
@@ -21,7 +21,7 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME
check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME
-check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME
+check_error 'p:^ vfs_read' # NO_EVENT_NAME
check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
@@ -103,4 +103,20 @@ check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX
check_error 'p ^vfs_read+10%return' # BAD_RETPROBE
fi
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'p vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'p vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+if !grep -q 'kernel return probes support:' README; then
+check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS
+fi
+check_error 'p vfs_read+20 ^$arg*' # NOFENTRY_ARGS
+check_error 'p vfs_read ^hoge' # NO_BTFARG
+check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'r kfree ^$retval' # NO_RETVAL
+else
+check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
new file mode 100644
index 000000000000..e50470b53164
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe entry argument access
+# requires: kprobe_events 'kernel return probes support:':README
+
+echo 'p:myevent1 vfs_open arg=$arg1' >> kprobe_events
+echo 'r:myevent2 vfs_open arg=$arg1' >> kprobe_events
+
+echo 1 > events/kprobes/enable
+
+echo > trace
+cat trace > /dev/null
+
+streq() {
+ test $1 = $2
+}
+
+streq `grep -A 1 -m 1 myevent1 trace | sed -r 's/^.*(arg=.*)/\1/' `
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d23780096..be754f5bcf79 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then
exit_fail
fi
+cat kprobe_events >> $testlog
+
echo 1 > events/kprobes/enable
echo 0 > events/kprobes/enable
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
index 624269c8d534..68425987a5dd 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -21,7 +21,7 @@ set_offs() { # prev target next
# We have to decode symbol addresses to get correct offsets.
# If the offset is not an instruction boundary, it cause -EILSEQ.
-set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+set_offs `grep -v __pfx_ /proc/kallsyms | grep -A1 -B1 ${TARGET_FUNC} | cut -f 1 -d " " | xargs`
UINT_TEST=no
# printf "%x" -1 returns (unsigned long)-1.
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index 98166fa3eb91..34fb89b0c61f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -1,6 +1,6 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-# description: Kprobe dynamic event - adding and removing
+# description: Kprobe profile
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index f5e3f9e4a01f..c817158b99db 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -23,4 +23,9 @@ check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX
check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX
fi
+# symstr is not supported by uprobe
+if grep -q ".*symstr.*" README; then
+check_error 'p /bin/sh:10 $stack0:^symstr' # BAD_TYPE
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
index 22bff122b933..ba1038953873 100644
--- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -46,10 +46,10 @@ cat trace
grep -q "tracer: preemptoff" trace || fail
# Check the end of the section
-egrep -q "5.....us : <stack trace>" trace || fail
+grep -E -q "5.....us : <stack trace>" trace || fail
# Check for 500ms of latency
-egrep -q "latency: 5..... us" trace || fail
+grep -E -q "latency: 5..... us" trace || fail
reset_tracer
@@ -69,10 +69,10 @@ cat trace
grep -q "tracer: irqsoff" trace || fail
# Check the end of the section
-egrep -q "5.....us : <stack trace>" trace || fail
+grep -E -q "5.....us : <stack trace>" trace || fail
# Check for 500ms of latency
-egrep -q "latency: 5..... us" trace || fail
+grep -E -q "latency: 5..... us" trace || fail
reset_tracer
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
index 11be10e1bf96..e8f0fac9a110 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
@@ -1,12 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup tracer
-# requires: wakeup:tracer
-
-if ! which chrt ; then
- echo "chrt is not found. This test requires nice command."
- exit_unresolved
-fi
+# requires: wakeup:tracer chrt:program
echo wakeup > current_tracer
echo 1 > tracing_on
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
index 3a77198b3c69..79807656785b 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
@@ -1,12 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup RT tracer
-# requires: wakeup_rt:tracer
-
-if ! which chrt ; then
- echo "chrt is not found. This test requires chrt command."
- exit_unresolved
-fi
+# requires: wakeup_rt:tracer chrt:program
echo wakeup_rt > current_tracer
echo 1 > tracing_on
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 1590d6bfb857..20a35fea13f8 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger expected fail actions
# requires: set_event snapshot "snapshot()":README
+# flags: instance
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 41119e0440e9..04c5dd7d0acc 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test field variable support
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index 9098f1e7433f..f7447d800899 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event combined histogram trigger
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index adaabb873ed4..55ab0270e5f7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -1,7 +1,8 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onchange action
-# requires: set_event "onchange(var)":README
+# requires: set_event "onchange(var)":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -19,4 +20,6 @@ if ! grep -q "changed:" events/sched/sched_waking/hist; then
fail "Failed to create onchange action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index 20e39471052e..d645abcf11c4 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index f4b03ab7c287..c369247efb35 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch-onmax action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 71c9b5911c70..e28dc5f11b2b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmax action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 67fa328b830f..9eb37c2fa417 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -1,7 +1,8 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger snapshot action
-# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README
+# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -27,4 +28,6 @@ if ! grep -q "comm=ping" snapshot; then
fail "Failed to create snapshot action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
new file mode 100644
index 000000000000..c2a8ab01e13b
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger eprobe on synthetic event
+# requires: dynamic_events synthetic_events events/syscalls/sys_enter_openat/hist "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+START="sys_enter_openat"
+END="sys_exit_openat"
+FIELD="filename"
+SYNTH="synth_open"
+EPROBE="eprobe_open"
+
+echo "$SYNTH unsigned long filename; long ret;" > synthetic_events
+echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+
+echo "e:$EPROBE synthetic/$SYNTH file=+0(\$filename):ustring ret=\$ret:s64" >> dynamic_events
+
+grep -q "$SYNTH" dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/synthetic/$SYNTH
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+echo '!'"hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+echo '!'"hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo '!'"$SYNTH u64 filename; s64 ret;" >> synthetic_events
+
+! grep -q "$SYNTH" dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/synthetic/$SYNTH
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
index 3d65c856eca3..174376ddbc6c 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
-# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "' >> synthetic_events":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
new file mode 100644
index 000000000000..d0cd91a93069
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param (legacy stack)
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[] stack' >> synthetic_events":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+# Test the old stacktrace keyword (for backward compatibility)
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+ fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
new file mode 100644
index 000000000000..8f1cc9a86a06
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "can be any field, or the special string 'common_stacktrace'":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+ fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
index 59216f3cfb12..366f1f3ad906 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -32,6 +32,10 @@ grep "myevent[[:space:]]u64 var1" synthetic_events
# it is not possible to add same name event
! echo "myevent u64 var2" >> synthetic_events
+# make sure !synthetic event doesn't require a field
+echo "!myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
# Non-append open will cleanup all events and add new one
echo "myevent u64 var2" > synthetic_events
@@ -66,6 +70,12 @@ grep "myevent[[:space:]]unsigned long var" synthetic_events
echo "myevent char var[10]" > synthetic_events
grep "myevent[[:space:]]char\[10\] var" synthetic_events
+if grep -q 'long\[\]' README; then
+ # test stacktrace type
+ echo "myevent unsigned long[] var" > synthetic_events
+ grep "myevent[[:space:]]unsigned long\[\] var" synthetic_events
+fi
+
do_reset
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
index ada594fe16cb..b927ee54c02d 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -1,19 +1,38 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test synthetic_events syntax parser errors
-# requires: synthetic_events error_log
+# requires: synthetic_events error_log "' >> synthetic_events":README
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
}
+check_dyn_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events'
+}
+
check_error 'myevent ^chr arg' # INVALID_TYPE
-check_error 'myevent ^char str[];; int v' # INVALID_TYPE
-check_error 'myevent char ^str]; int v' # INVALID_NAME
-check_error 'myevent char ^str;[]' # INVALID_NAME
-check_error 'myevent ^char str[; int v' # INVALID_TYPE
-check_error '^mye;vent char str[]' # BAD_NAME
-check_error 'myevent char str[]; ^int' # INVALID_FIELD
-check_error '^myevent' # INCOMPLETE_CMD
+check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE
+
+check_error 'myevent char ^str]; int v' # BAD_NAME
+check_error '^mye-vent char str[]' # BAD_NAME
+check_error 'myevent char ^st-r[]' # BAD_NAME
+
+check_error 'myevent char str;^[]' # INVALID_FIELD
+check_error 'myevent char str; ^int' # INVALID_FIELD
+
+check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC
+
+check_error '^mye;vent char str[]' # INVALID_CMD
+check_error '^myevent ; char str[]' # INVALID_CMD
+check_error '^myevent; char str[]' # INVALID_CMD
+check_error '^myevent ;char str[]' # INVALID_CMD
+check_error '^; char str[]' # INVALID_CMD
+check_error '^;myevent char str[]' # INVALID_CMD
+check_error '^myevent' # INVALID_CMD
+
+check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
index c126d2350a6d..d7312047ce28 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
new file mode 100644
index 000000000000..0ebda2068a00
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -0,0 +1,64 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram expression parsing
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+# flags: instance
+
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+test_hist_expr() { # test_name expression expected_val
+ trigger="events/sched/sched_process_fork/trigger"
+
+ reset_trigger_file $trigger
+
+ echo "Test hist trigger expressions - $1"
+
+ echo "hist:keys=common_pid:x=$2" > $trigger
+
+ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+ actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'`
+
+ if [ $actual != $3 ]; then
+ fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual"
+ fi
+
+ reset_trigger_file $trigger
+}
+
+check_error() { # test_name command-with-error-pos-by-^
+ trigger="events/sched/sched_process_fork/trigger"
+
+ echo "Test hist trigger expressions - $1"
+ ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger
+}
+
+test_hist_expr "Variable assignment" "123" "123"
+
+test_hist_expr "Subtraction not associative" "16-8-4-2" "2"
+
+test_hist_expr "Division not associative" "64/8/4/2" "1"
+
+test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14"
+
+test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12"
+
+test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8"
+
+test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5"
+
+# err pos for "too many subexpressions" is dependent on where
+# the last subexpression was detected. This can vary depending
+# on how the expression tree was generated.
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4'
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5'
+
+check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2'
+
+check_error "Division by zero" 'hist:keys=common_pid:x=3/^0'
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index 4562e13cb26b..717898894ef7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -40,7 +40,7 @@ grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
reset_trigger
-echo "Test histgram with log2 modifier"
+echo "Test histogram with log2 modifier"
echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc
new file mode 100644
index 000000000000..8d275e3238d9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test poll wait on histogram
+# requires: set_event events/sched/sched_process_free/trigger events/sched/sched_process_free/hist
+# flags: instance
+
+POLL=${FTRACETEST_ROOT}/poll
+
+if [ ! -x ${POLL} ]; then
+ echo "poll program is not compiled!"
+ exit_unresolved
+fi
+
+EVENT=events/sched/sched_process_free/
+
+# Check poll ops is supported. Before implementing poll on hist file, it
+# returns soon with POLLIN | POLLOUT, but not POLLPRI.
+
+# This must wait >1 sec and return 1 (timeout).
+set +e
+${POLL} -I -t 1000 ${EVENT}/hist
+ret=$?
+set -e
+if [ ${ret} != 1 ]; then
+ echo "poll on hist file is not supported"
+ exit_unsupported
+fi
+
+# Test POLLIN
+echo > trace
+echo 'hist:key=comm if comm =="sleep"' > ${EVENT}/trigger
+echo 1 > ${EVENT}/enable
+
+# This sleep command will exit after 2 seconds.
+sleep 2 &
+BGPID=$!
+# if timeout happens, poll returns 1.
+${POLL} -I -t 4000 ${EVENT}/hist
+echo 0 > tracing_on
+
+if [ -d /proc/${BGPID} ]; then
+ echo "poll exits too soon"
+ kill -KILL ${BGPID} ||:
+ exit_fail
+fi
+
+if ! grep -qw "sleep" trace; then
+ echo "poll exits before event happens"
+ exit_fail
+fi
+
+# Test POLLPRI
+echo > trace
+echo 1 > tracing_on
+
+# This sleep command will exit after 2 seconds.
+sleep 2 &
+BGPID=$!
+# if timeout happens, poll returns 1.
+${POLL} -P -t 4000 ${EVENT}/hist
+echo 0 > tracing_on
+
+if [ -d /proc/${BGPID} ]; then
+ echo "poll exits too soon"
+ kill -KILL ${BGPID} ||:
+ exit_fail
+fi
+
+if ! grep -qw "sleep" trace; then
+ echo "poll exits before event happens"
+ exit_fail
+fi
+
+exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 2950bfbc6fce..adae72665500 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -39,6 +39,24 @@ grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
reset_trigger
+echo "Test histogram with sym modifier"
+
+echo 'hist:keys=call_site.sym' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]* *}' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym modifier on kmalloc call_site did not work"
+
+reset_trigger
+
+echo "Test histogram with sym-offset modifier"
+
+echo 'hist:keys=call_site.sym-offset' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]*+0x[0-9a-f][0-9a-f]*' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym-offset modifier on kmalloc call_site did not work"
+
+reset_trigger
+
echo "Test histogram with sort key"
echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 12631f0076a1..78ab2cd111f6 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,15 +3,13 @@ SUBDIRS := functional
TEST_PROGS := run.sh
-.PHONY: all clean
-
include ../lib.mk
all:
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
if [ -e $$DIR/$(TEST_PROGS) ]; then \
rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
fi \
@@ -32,6 +30,6 @@ override define CLEAN
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
done
endef
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daab..776ad658f75e 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,8 +1,14 @@
# SPDX-License-Identifier: GPL-2.0-only
+futex_numa_mpol
+futex_priv_hash
+futex_requeue
futex_requeue_pi
futex_requeue_pi_mismatched_ops
futex_requeue_pi_signal_restart
+futex_wait
futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
+futex_waitv
+futex_numa
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec75..490ace1f017e 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,25 +1,31 @@
# SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
-LDLIBS := -lpthread -lrt
+PKG_CONFIG ?= pkg-config
+LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO")
-HEADERS := \
+INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1
+LDLIBS := -lpthread -lrt -lnuma
+
+LOCAL_HDRS := \
../include/futextest.h \
- ../include/atomic.h \
- ../include/logging.h
-TEST_GEN_FILES := \
+ ../include/atomic.h
+TEST_GEN_PROGS := \
futex_wait_timeout \
futex_wait_wouldblock \
futex_requeue_pi \
futex_requeue_pi_signal_restart \
futex_requeue_pi_mismatched_ops \
futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file
+ futex_wait_private_mapped_file \
+ futex_wait \
+ futex_requeue \
+ futex_priv_hash \
+ futex_numa_mpol \
+ futex_waitv \
+ futex_numa
TEST_PROGS := run.sh
top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
+DEFAULT_INSTALL_HDR_PATH := 1
include ../../lib.mk
-
-$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
new file mode 100644
index 000000000000..e0a33510ccb6
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include "futextest.h"
+#include "futex2test.h"
+
+typedef u_int32_t u32;
+typedef int32_t s32;
+typedef u_int64_t u64;
+
+static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
+static int fnode = FUTEX_NO_NODE;
+
+/* fairly stupid test-and-set lock with a waiter flag */
+
+#define N_LOCK 0x0000001
+#define N_WAITERS 0x0001000
+
+struct futex_numa_32 {
+ union {
+ u64 full;
+ struct {
+ u32 val;
+ u32 node;
+ };
+ };
+};
+
+void futex_numa_32_lock(struct futex_numa_32 *lock)
+{
+ for (;;) {
+ struct futex_numa_32 new, old = {
+ .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
+ };
+
+ for (;;) {
+ new = old;
+ if (old.val == 0) {
+ /* no waiter, no lock -> first lock, set no-node */
+ new.node = fnode;
+ }
+ if (old.val & N_LOCK) {
+ /* contention, set waiter */
+ new.val |= N_WAITERS;
+ }
+ new.val |= N_LOCK;
+
+ /* nothing changed, ready to block */
+ if (old.full == new.full)
+ break;
+
+ /*
+ * Use u64 cmpxchg to set the futex value and node in a
+ * consistent manner.
+ */
+ if (__atomic_compare_exchange_n(&lock->full,
+ &old.full, new.full,
+ /* .weak */ false,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED)) {
+
+ /* if we just set N_LOCK, we own it */
+ if (!(old.val & N_LOCK))
+ return;
+
+ /* go block */
+ break;
+ }
+ }
+
+ futex2_wait(lock, new.val, fflags, NULL, 0);
+ }
+}
+
+void futex_numa_32_unlock(struct futex_numa_32 *lock)
+{
+ u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
+ assert((s32)val >= 0);
+ if (val & N_WAITERS) {
+ int woken = futex2_wake(lock, 1, fflags);
+ assert(val == N_WAITERS);
+ if (!woken) {
+ __atomic_compare_exchange_n(&lock->val, &val, 0U,
+ false, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED);
+ }
+ }
+}
+
+static long nanos = 50000;
+
+struct thread_args {
+ pthread_t tid;
+ volatile int * done;
+ struct futex_numa_32 *lock;
+ int val;
+ int *val1, *val2;
+ int node;
+};
+
+static void *threadfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+ struct timespec ts = {
+ .tv_nsec = nanos,
+ };
+ int node;
+
+ while (!*args->done) {
+
+ futex_numa_32_lock(args->lock);
+ args->val++;
+
+ assert(*args->val1 == *args->val2);
+ (*args->val1)++;
+ nanosleep(&ts, NULL);
+ (*args->val2)++;
+
+ node = args->lock->node;
+ futex_numa_32_unlock(args->lock);
+
+ if (node != args->node) {
+ args->node = node;
+ printf("node: %d\n", node);
+ }
+
+ nanosleep(&ts, NULL);
+ }
+
+ return NULL;
+}
+
+static void *contendfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+
+ while (!*args->done) {
+ /*
+ * futex2_wait() will take hb-lock, verify *var == val and
+ * queue/abort. By knowingly setting val 'wrong' this will
+ * abort and thereby generate hb-lock contention.
+ */
+ futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
+ args->val++;
+ }
+
+ return NULL;
+}
+
+static volatile int done = 0;
+static struct futex_numa_32 lock = { .val = 0, };
+static int val1, val2;
+
+int main(int argc, char *argv[])
+{
+ struct thread_args *tas[512], *cas[512];
+ int c, t, threads = 2, contenders = 0;
+ int sleeps = 10;
+ int total = 0;
+
+ while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
+ switch (c) {
+ case 'c':
+ contenders = atoi(optarg);
+ break;
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ sleeps = atoi(optarg);
+ break;
+ case 'n':
+ nanos = atoi(optarg);
+ break;
+ case 'N':
+ fflags |= FUTEX2_NUMA;
+ if (optarg)
+ fnode = atoi(optarg);
+ break;
+ default:
+ exit(1);
+ break;
+ }
+ }
+
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, contendfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ cas[t] = args;
+ }
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, threadfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ tas[t] = args;
+ }
+
+ sleep(sleeps);
+
+ done = true;
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = tas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("total: %d\n", total);
+
+ if (contenders) {
+ total = 0;
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = cas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+ // printf("tval: %d\n", args->val);
+ }
+ printf("contenders: %d\n", total);
+ }
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
new file mode 100644
index 000000000000..ab8555752137
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <numa.h>
+#include <numaif.h>
+
+#include <linux/futex.h>
+#include <sys/mman.h>
+
+#include "futextest.h"
+#include "futex2test.h"
+#include "kselftest_harness.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_t threads[MAX_THREADS];
+
+struct thread_args {
+ void *futex_ptr;
+ unsigned int flags;
+ int result;
+};
+
+static struct thread_args thread_args[MAX_THREADS];
+
+#ifndef FUTEX_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+static void *thread_lock_fn(void *arg)
+{
+ struct thread_args *args = arg;
+ int ret;
+
+ pthread_barrier_wait(&barrier_main);
+ ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0);
+ args->result = ret;
+ return NULL;
+}
+
+static void create_max_threads(void *futex_ptr)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ thread_args[i].futex_ptr = futex_ptr;
+ thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA;
+ thread_args[i].result = 0;
+ ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags)
+{
+ int to_wake, ret, i, need_exit = 0;
+
+ pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ create_max_threads(futex_ptr);
+ pthread_barrier_wait(&barrier_main);
+ to_wake = MAX_THREADS;
+
+ do {
+ ret = futex2_wake(futex_ptr, to_wake, futex_flags);
+
+ if (err_value) {
+ if (ret >= 0)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+
+ if (errno != err_value)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n",
+ to_wake, futex_flags, err_value, errno, strerror(errno));
+
+ break;
+ }
+ if (ret < 0) {
+ ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
+ to_wake, futex_flags);
+ }
+ if (!ret)
+ usleep(50);
+ to_wake -= ret;
+
+ } while (to_wake);
+ join_max_threads();
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (err_value && thread_args[i].result != -1) {
+ ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
+ i, thread_args[i].result);
+ need_exit = 1;
+ }
+ if (!err_value && thread_args[i].result != 0) {
+ ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
+ need_exit = 1;
+ }
+ }
+ if (need_exit)
+ ksft_exit_fail_msg("Aborting due to earlier errors.\n");
+}
+
+static void test_futex(void *futex_ptr, int err_value)
+{
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+}
+
+static void test_futex_mpol(void *futex_ptr, int err_value)
+{
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+}
+
+TEST(futex_numa_mpol)
+{
+ struct futex32_numa *futex_numa;
+ void *futex_ptr;
+ int mem_size;
+
+ mem_size = sysconf(_SC_PAGE_SIZE);
+ futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (futex_ptr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+
+ /* Create an invalid memory region for the "Memory out of range" test */
+ mprotect(futex_ptr + mem_size, mem_size, PROT_NONE);
+
+ futex_numa = futex_ptr;
+
+ ksft_print_msg("Regular test\n");
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+ test_futex(futex_ptr, 0);
+
+ if (futex_numa->numa == FUTEX_NO_NODE)
+ ksft_exit_fail_msg("NUMA node is left uninitialized\n");
+
+ /* FUTEX2_NUMA futex must be 8-byte aligned */
+ ksft_print_msg("Mis-aligned futex\n");
+ test_futex(futex_ptr + mem_size - 4, EINVAL);
+
+ ksft_print_msg("Memory out of range\n");
+ test_futex(futex_ptr + mem_size, EFAULT);
+
+ futex_numa->numa = FUTEX_NO_NODE;
+ mprotect(futex_ptr, mem_size, PROT_READ);
+ ksft_print_msg("Memory, RO\n");
+ test_futex(futex_ptr, EFAULT);
+
+ mprotect(futex_ptr, mem_size, PROT_NONE);
+ ksft_print_msg("Memory, no access\n");
+ test_futex(futex_ptr, EFAULT);
+
+ mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
+ ksft_print_msg("Memory back to RW\n");
+ test_futex(futex_ptr, 0);
+
+ ksft_test_result_pass("futex2 memory boundary tests passed\n");
+
+ /* MPOL test. Does not work as expected */
+#ifdef LIBNUMA_VER_SUFFICIENT
+ for (int i = 0; i < 4; i++) {
+ unsigned long nodemask;
+ int ret;
+
+ nodemask = 1 << i;
+ ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
+ sizeof(nodemask) * 8, 0);
+ if (ret == 0) {
+ ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno);
+
+ ksft_print_msg("Node %d test\n", i);
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+
+ ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ if (ret < 0)
+ ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
+ if (futex_numa->numa != i) {
+ ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
+ }
+ }
+ }
+ ksft_test_result_pass("futex2 MPOL hints test passed\n");
+#else
+ ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n");
+#endif
+ munmap(futex_ptr, mem_size * 2);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
new file mode 100644
index 000000000000..e8079d7c65e8
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "kselftest_harness.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_mutex_t global_lock;
+static pthread_t threads[MAX_THREADS];
+static int counter;
+
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+#endif
+
+static int futex_hash_slots_set(unsigned int slots)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0);
+}
+
+static int futex_hash_slots_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+}
+
+static void futex_hash_slots_set_verify(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
+ ksft_finished();
+ }
+ ret = futex_hash_slots_get();
+ if (ret != slots) {
+ ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n",
+ slots, ret);
+ ksft_finished();
+ }
+ ksft_test_result_pass("SET and GET slots %d passed\n", slots);
+}
+
+static void futex_hash_slots_set_must_fail(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n",
+ slots);
+}
+
+static void *thread_return_fn(void *arg)
+{
+ return NULL;
+}
+
+static void *thread_lock_fn(void *arg)
+{
+ pthread_barrier_wait(&barrier_main);
+
+ pthread_mutex_lock(&global_lock);
+ counter++;
+ usleep(20);
+ pthread_mutex_unlock(&global_lock);
+ return NULL;
+}
+
+static void create_max_threads(void *(*thread_fn)(void *))
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_create(&threads[i], NULL, thread_fn, NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed: %m\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+#define SEC_IN_NSEC 1000000000
+#define MSEC_IN_NSEC 1000000
+
+static void futex_dummy_op(void)
+{
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ struct timespec timeout;
+ int ret;
+
+ pthread_mutex_lock(&lock);
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += 100 * MSEC_IN_NSEC;
+ if (timeout.tv_nsec >= SEC_IN_NSEC) {
+ timeout.tv_nsec -= SEC_IN_NSEC;
+ timeout.tv_sec++;
+ }
+ ret = pthread_mutex_timedlock(&lock, &timeout);
+ if (ret == 0)
+ ksft_exit_fail_msg("Successfully locked an already locked mutex.\n");
+
+ if (ret != ETIMEDOUT)
+ ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
+}
+
+static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
+static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
+
+TEST(priv_hash)
+{
+ int futex_slots1, futex_slotsn, online_cpus;
+ pthread_mutexattr_t mutex_attr_pi;
+ int ret, retry = 20;
+
+ ret = pthread_mutexattr_init(&mutex_attr_pi);
+ ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
+ ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to initialize pthread mutex.\n");
+ }
+ /* First thread, expect to be 0, not yet initialized */
+ ret = futex_hash_slots_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
+
+ ksft_test_result_pass("Basic get slots and immutable status.\n");
+ ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret);
+
+ ret = pthread_join(threads[0], NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
+
+ /* First thread, has to initialize private hash */
+ futex_slots1 = futex_hash_slots_get();
+ if (futex_slots1 <= 0) {
+ ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
+ ksft_exit_fail_msg("%s", test_msg_auto_create);
+ }
+
+ ksft_test_result_pass("%s", test_msg_auto_create);
+
+ online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n");
+
+ ret = pthread_mutex_lock(&global_lock);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n");
+
+ counter = 0;
+ create_max_threads(thread_lock_fn);
+ pthread_barrier_wait(&barrier_main);
+
+ /*
+ * The current default size of hash buckets is 16. The auto increase
+ * works only if more than 16 CPUs are available.
+ */
+ ksft_print_msg("Online CPUs: %d\n", online_cpus);
+ if (online_cpus > 16) {
+retry_getslots:
+ futex_slotsn = futex_hash_slots_get();
+ if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ retry--;
+ /*
+ * Auto scaling on thread creation can be slightly delayed
+ * because it waits for a RCU grace period twice. The new
+ * private hash is assigned upon the first futex operation
+ * after grace period.
+ * To cover all this for testing purposes the function
+ * below will acquire a lock and acquire it again with a
+ * 100ms timeout which must timeout. This ensures we
+ * sleep for 100ms and issue a futex operation.
+ */
+ if (retry > 0) {
+ futex_dummy_op();
+ goto retry_getslots;
+ }
+ ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
+ futex_slots1, futex_slotsn);
+ ksft_exit_fail_msg("%s", test_msg_auto_inc);
+ }
+ ksft_test_result_pass("%s", test_msg_auto_inc);
+ } else {
+ ksft_test_result_skip("%s", test_msg_auto_inc);
+ }
+ ret = pthread_mutex_unlock(&global_lock);
+
+ /* Once the user changes it, it has to be what is set */
+ futex_hash_slots_set_verify(2);
+ futex_hash_slots_set_verify(4);
+ futex_hash_slots_set_verify(8);
+ futex_hash_slots_set_verify(32);
+ futex_hash_slots_set_verify(16);
+
+ ret = futex_hash_slots_set(15);
+ ksft_test_result(ret < 0, "Use 15 slots\n");
+
+ futex_hash_slots_set_verify(2);
+ join_max_threads();
+ ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n",
+ counter, MAX_THREADS);
+ counter = 0;
+ /* Once the user set something, auto resize must be disabled */
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n",
+ ret);
+
+ futex_hash_slots_set_must_fail(1 << 29);
+ futex_hash_slots_set_verify(4);
+
+ /*
+ * Once the global hash has been requested, then this requested can not
+ * be undone.
+ */
+ ret = futex_hash_slots_set(0);
+ ksft_test_result(ret == 0, "Global hash request\n");
+ if (ret != 0)
+ return;
+
+ futex_hash_slots_set_must_fail(4);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(0);
+ futex_hash_slots_set_must_fail(6);
+
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 000000000000..35d4be23db5d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+
+#include "futextest.h"
+#include "kselftest_harness.h"
+
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(f1, *f1, &to, 0))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+TEST(requeue_single)
+{
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res;
+
+ f1 = &_f1;
+
+ /*
+ * Requeue a waiter from f1 to f2, and wake f2.
+ */
+ if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+ if (res != 1)
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+
+ ksft_print_dbg_msg("Waking 1 futex at f2\n");
+ res = futex_wake(&f2, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_requeue simple succeeds\n");
+ }
+}
+
+TEST(requeue_multiple)
+{
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res, i;
+
+ f1 = &_f1;
+
+ /*
+ * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+ * At futex_wake, wake INT_MAX (should be exactly 7).
+ */
+ for (i = 0; i < 10; i++) {
+ if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+ }
+
+ usleep(WAKE_WAIT_US);
+
+ ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+ if (res != 10) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ }
+
+ ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n");
+ res = futex_wake(&f2, INT_MAX, 0);
+ if (res != 7) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_requeue many succeeds\n");
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 1ee5518ee6b7..46d2858e15a8 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -17,6 +17,8 @@
*
*****************************************************************************/
+#define _GNU_SOURCE
+
#include <errno.h>
#include <limits.h>
#include <pthread.h>
@@ -24,11 +26,11 @@
#include <stdlib.h>
#include <signal.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi"
#define MAX_WAKE_ITERS 1000
#define THREAD_MAX 10
#define SIGNAL_PERIOD_US 100
@@ -40,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
futex_t wake_complete = FUTEX_INITIALIZER;
-/* Test option defaults */
-static long timeout_ns;
-static int broadcast;
-static int owner;
-static int locked;
-
struct thread_arg {
long id;
struct timespec *timeout;
@@ -54,18 +50,73 @@ struct thread_arg {
};
#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
-void usage(char *prog)
+FIXTURE(args)
{
- printf("Usage: %s\n", prog);
- printf(" -b Broadcast wakeup (all waiters)\n");
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -l Lock the pi futex across requeue\n");
- printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
- printf(" -t N Timeout in nanoseconds (default: 0)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
+};
+
+FIXTURE_SETUP(args)
+{
+};
+
+FIXTURE_TEARDOWN(args)
+{
+};
+
+FIXTURE_VARIANT(args)
+{
+ long timeout_ns;
+ bool broadcast;
+ bool owner;
+ bool locked;
+};
+
+/*
+ * For a given timeout value, this macro creates a test input with all the
+ * possible combinations of valid arguments
+ */
+#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout) \
+{ \
+ .timeout_ns = timeout, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .owner = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .owner = true, \
+}; \
+
+FIXTURE_VARIANT_ADD_TIMEOUT(0);
+FIXTURE_VARIANT_ADD_TIMEOUT(5000);
+FIXTURE_VARIANT_ADD_TIMEOUT(500000);
+FIXTURE_VARIANT_ADD_TIMEOUT(2000000000);
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
@@ -79,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
return -1;
}
ret = pthread_attr_setschedpolicy(&attr, policy);
if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
return -1;
}
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
if (ret) {
- error("pthread_attr_setschedparam\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
return -1;
}
ret = pthread_create(pth, &attr, func, arg);
if (ret) {
- error("pthread_create\n", ret);
+ ksft_exit_fail_msg("pthread_create\n");
return -1;
}
return 0;
@@ -110,7 +161,7 @@ void *waiterfn(void *arg)
struct thread_arg *args = (struct thread_arg *)arg;
futex_t old_val;
- info("Waiter %ld: running\n", args->id);
+ ksft_print_dbg_msg("Waiter %ld: running\n", args->id);
/* Each thread sleeps for a different amount of time
* This is to avoid races, because we don't lock the
* external mutex here */
@@ -118,26 +169,25 @@ void *waiterfn(void *arg)
old_val = f1;
atomic_inc(&waiters_blocked);
- info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
&f1, f1, &f2);
args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
FUTEX_PRIVATE_FLAG);
- info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret,
args->ret < 0 ? strerror(errno) : "");
atomic_inc(&waiters_woken);
if (args->ret < 0) {
if (args->timeout && errno == ETIMEDOUT)
args->ret = 0;
else {
- args->ret = RET_ERROR;
- error("futex_wait_requeue_pi\n", errno);
+ ksft_exit_fail_msg("futex_wait_requeue_pi\n");
}
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret);
pthread_exit((void *)&args->ret);
}
@@ -150,14 +200,14 @@ void *broadcast_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
- info("Waker: Calling broadcast\n");
+ ksft_print_dbg_msg("Waker: Calling broadcast\n");
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
continue_requeue:
@@ -165,16 +215,14 @@ void *broadcast_wakerfn(void *arg)
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
FUTEX_PRIVATE_FLAG);
if (args->ret < 0) {
- args->ret = RET_ERROR;
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
} else if (++i < MAX_WAKE_ITERS) {
task_count += args->ret;
if (task_count < THREAD_MAX - waiters_woken.val)
goto continue_requeue;
} else {
- error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
- 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
- args->ret = RET_ERROR;
+ ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ MAX_WAKE_ITERS, task_count, THREAD_MAX);
}
futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
@@ -185,7 +233,7 @@ void *broadcast_wakerfn(void *arg)
if (args->ret > 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
pthread_exit((void *)&args->ret);
}
@@ -198,20 +246,20 @@ void *signal_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
- info("task_count: %d, waiters_woken: %d\n",
+ ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n",
task_count, waiters_woken.val);
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
- info("Waker: Calling signal\n");
+ ksft_print_dbg_msg("Waker: Calling signal\n");
/* cond_signal */
old_val = f1;
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
@@ -219,28 +267,23 @@ void *signal_wakerfn(void *arg)
FUTEX_PRIVATE_FLAG);
if (args->ret < 0)
args->ret = -errno;
- info("futex: %x\n", f2);
+ ksft_print_dbg_msg("futex: %x\n", f2);
if (args->lock) {
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
}
- info("futex: %x\n", f2);
- if (args->ret < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- args->ret = RET_ERROR;
- break;
- }
+ ksft_print_dbg_msg("futex: %x\n", f2);
+ if (args->ret < 0)
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
task_count += args->ret;
usleep(SIGNAL_PERIOD_US);
i++;
/* we have to loop at least THREAD_MAX times */
if (i > MAX_WAKE_ITERS + THREAD_MAX) {
- error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
- 0, MAX_WAKE_ITERS + THREAD_MAX);
- args->ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ MAX_WAKE_ITERS + THREAD_MAX);
}
}
@@ -249,8 +292,8 @@ void *signal_wakerfn(void *arg)
if (args->ret >= 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
- info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val);
pthread_exit((void *)&args->ret);
}
@@ -267,35 +310,40 @@ void *third_party_blocker(void *arg)
ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
out:
- if (args->ret || ret2) {
- error("third_party_blocker() futex error", 0);
- args->ret = RET_ERROR;
- }
+ if (args->ret || ret2)
+ ksft_exit_fail_msg("third_party_blocker() futex error");
pthread_exit((void *)&args->ret);
}
-int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+TEST_F(args, futex_requeue_pi)
{
- void *(*wakerfn)(void *) = signal_wakerfn;
struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
pthread_t waiter[THREAD_MAX], waker, blocker;
- struct timespec ts, *tsp = NULL;
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ bool third_party_owner = variant->owner;
+ long timeout_ns = variant->timeout_ns;
+ bool broadcast = variant->broadcast;
struct thread_arg args[THREAD_MAX];
- int *waiter_ret;
- int i, ret = RET_PASS;
+ struct timespec ts, *tsp = NULL;
+ bool lock = variant->locked;
+ int *waiter_ret, i, ret = 0;
+
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, lock, third_party_owner, timeout_ns);
if (timeout_ns) {
time_t secs;
- info("timeout_ns = %ld\n", timeout_ns);
+ ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns);
ret = clock_gettime(CLOCK_MONOTONIC, &ts);
secs = (ts.tv_nsec + timeout_ns) / 1000000000;
ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
ts.tv_sec += secs;
- info("ts.tv_sec = %ld\n", ts.tv_sec);
- info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec);
+ ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec);
tsp = &ts;
}
@@ -305,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
if (third_party_owner) {
if (create_rt_thread(&blocker, third_party_blocker,
(void *)&blocker_arg, SCHED_FIFO, 1)) {
- error("Creating third party blocker thread failed\n",
- errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating third party blocker thread failed\n");
}
}
@@ -316,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
for (i = 0; i < THREAD_MAX; i++) {
args[i].id = i;
args[i].timeout = tsp;
- info("Starting thread %d\n", i);
+ ksft_print_dbg_msg("Starting thread %d\n", i);
if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
SCHED_FIFO, 1)) {
- error("Creating waiting thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waiting thread failed\n");
}
}
waker_arg.lock = lock;
if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
SCHED_FIFO, 1)) {
- error("Creating waker thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waker thread failed\n");
}
/* Wait for threads to finish */
@@ -343,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
pthread_join(blocker, NULL);
pthread_join(waker, NULL);
-out:
if (!ret) {
if (*waiter_ret)
ret = *waiter_ret;
@@ -353,57 +393,8 @@ out:
ret = blocker_arg.ret;
}
- return ret;
+ if (ret)
+ ksft_test_result_fail("fail");
}
-int main(int argc, char *argv[])
-{
- int c, ret;
-
- while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
- switch (c) {
- case 'b':
- broadcast = 1;
- break;
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'l':
- locked = 1;
- break;
- case 'o':
- owner = 1;
- locked = 0;
- break;
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
- ksft_print_msg(
- "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
- broadcast, locked, owner, timeout_ns);
-
- /*
- * FIXME: unit_test is obsolete now that we parse options and the
- * various style of runs are done by run.sh - simplify the code and move
- * unit_test into main()
- */
- ret = unit_test(broadcast, locked, owner, timeout_ns);
-
- print_result(TEST_NAME, ret);
- return ret;
-}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index d0a4d332ea44..f686e605359c 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -23,67 +23,32 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include "futextest.h"
-#include "logging.h"
-#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+#include "futextest.h"
+#include "kselftest_harness.h"
futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
int child_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *blocking_child(void *arg)
{
child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
if (child_ret < 0) {
child_ret = -errno;
- error("futex_wait\n", errno);
+ ksft_exit_fail_msg("futex_wait\n");
}
return (void *)&child_ret;
}
-int main(int argc, char *argv[])
+TEST(requeue_pi_mismatched_ops)
{
- int ret = RET_PASS;
pthread_t child;
- int c;
+ int ret;
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
- basename(argv[0]));
+ if (pthread_create(&child, NULL, blocking_child, NULL))
+ ksft_exit_fail_msg("pthread_create\n");
- if (pthread_create(&child, NULL, blocking_child, NULL)) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
/* Allow the child to block in the kernel. */
sleep(1);
@@ -102,34 +67,27 @@ int main(int argc, char *argv[])
* FUTEX_WAKE.
*/
ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
- if (ret == 1) {
- ret = RET_PASS;
- } else if (ret < 0) {
- error("futex_wake\n", errno);
- ret = RET_ERROR;
- } else {
- error("futex_wake did not wake the child\n", 0);
- ret = RET_ERROR;
- }
+ if (ret == 1)
+ ret = 0;
+ else if (ret < 0)
+ ksft_exit_fail_msg("futex_wake\n");
+ else
+ ksft_exit_fail_msg("futex_wake did not wake the child\n");
} else {
- error("futex_cmp_requeue_pi\n", errno);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi\n");
}
} else if (ret > 0) {
- fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
- ret = RET_FAIL;
+ ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
} else {
- error("futex_cmp_requeue_pi found no waiters\n", 0);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n");
}
pthread_join(child, NULL);
- if (!ret)
- ret = child_ret;
-
- out:
- /* If the kernel crashes, we shouldn't return at all. */
- print_result(TEST_NAME, ret);
- return ret;
+ if (!ret && !child_ret)
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n");
+ else
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index f8c43ce8fe66..a18ccae73eb1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -24,11 +24,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi-signal-restart"
#define DELAY_US 100
futex_t f1 = FUTEX_INITIALIZER;
@@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER;
int waiter_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
{
@@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
memset(&schedp, 0, sizeof(schedp));
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
- if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
ret = pthread_attr_setschedpolicy(&attr, policy);
- if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
- if (ret) {
- error("pthread_attr_setschedparam\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
ret = pthread_create(pth, &attr, func, arg);
- if (ret) {
- error("pthread_create\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
+
return 0;
}
void handle_signal(int signo)
{
- info("signal received %s requeue\n",
+ ksft_print_dbg_msg("signal received %s requeue\n",
requeued.val ? "after" : "prior to");
}
@@ -94,78 +78,46 @@ void *waiterfn(void *arg)
unsigned int old_val;
int res;
- waiter_ret = RET_PASS;
-
- info("Waiter running\n");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Waiter running\n");
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
old_val = f1;
res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
FUTEX_PRIVATE_FLAG);
if (!requeued.val || errno != EWOULDBLOCK) {
- fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
res, strerror(errno));
- info("w2:futex: %x\n", f2);
+ ksft_print_dbg_msg("w2:futex: %x\n", f2);
if (!res)
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- waiter_ret = RET_FAIL;
}
- info("Waiter exiting with %d\n", waiter_ret);
pthread_exit(NULL);
}
-int main(int argc, char *argv[])
+TEST(futex_requeue_pi_signal_restart)
{
unsigned int old_val;
struct sigaction sa;
pthread_t waiter;
- int c, res, ret = RET_PASS;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test signal handling during requeue_pi\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: <none>\n");
+ int res;
sa.sa_handler = handle_signal;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
- if (sigaction(SIGUSR1, &sa, NULL)) {
- error("sigaction\n", errno);
- exit(1);
- }
+ if (sigaction(SIGUSR1, &sa, NULL))
+ ksft_exit_fail_msg("sigaction\n");
- info("m1:f2: %x\n", f2);
- info("Creating waiter\n");
+ ksft_print_dbg_msg("m1:f2: %x\n", f2);
+ ksft_print_dbg_msg("Creating waiter\n");
res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
- if (res) {
- error("Creating waiting thread failed", res);
- ret = RET_ERROR;
- goto out;
- }
+ if (res)
+ ksft_exit_fail_msg("Creating waiting thread failed");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
- info("m2:f2: %x\n", f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("m2:f2: %x\n", f2);
futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
- info("m3:f2: %x\n", f2);
+ ksft_print_dbg_msg("m3:f2: %x\n", f2);
while (1) {
/*
@@ -173,30 +125,28 @@ int main(int argc, char *argv[])
* restart futex_wait_requeue_pi() in the kernel. Wait for the
* waiter to block on f1 again.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
usleep(DELAY_US);
- info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
old_val = f1;
res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
FUTEX_PRIVATE_FLAG);
/*
* If res is non-zero, we either requeued the waiter or hit an
* error, break out and handle it. If it is zero, then the
- * signal may have hit before the the waiter was blocked on f1.
+ * signal may have hit before the waiter was blocked on f1.
* Try again.
*/
if (res > 0) {
atomic_set(&requeued, 1);
break;
} else if (res < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
}
}
- info("m4:f2: %x\n", f2);
+ ksft_print_dbg_msg("m4:f2: %x\n", f2);
/*
* Signal the waiter after requeue, waiter should return from
@@ -204,19 +154,14 @@ int main(int argc, char *argv[])
* futex_unlock_pi() can't happen before the signal wakeup is detected
* in the kernel.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
- info("Waiting for waiter to return\n");
+ ksft_print_dbg_msg("Waiting for waiter to return\n");
pthread_join(waiter, NULL);
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("m5:f2: %x\n", f2);
-
- out:
- if (ret == RET_PASS && waiter_ret)
- ret = waiter_ret;
-
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_print_dbg_msg("m5:f2: %x\n", f2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 000000000000..0e69c53524c1
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#include "futextest.h"
+#include "kselftest_harness.h"
+
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+static void *waiterfn(void *arg)
+{
+ struct timespec to;
+ unsigned int flags = 0;
+
+ if (arg)
+ flags = *((unsigned int *) arg);
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(futex, 0, &to, flags))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+TEST(private_futex)
+{
+ unsigned int flags = FUTEX_PRIVATE_FLAG;
+ u_int32_t f_private = 0;
+ pthread_t waiter;
+ int res;
+
+ futex = &f_private;
+
+ /* Testing a private futex */
+ ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ errno, strerror(errno));
+ } else {
+ ksft_test_result_pass("futex_wake private succeeds\n");
+ }
+}
+
+TEST(anon_page)
+{
+ u_int32_t *shared_data;
+ pthread_t waiter;
+ int res, shm_id;
+
+ /* Testing an anon page shared memory */
+ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ futex = shared_data;
+
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+ errno, strerror(errno));
+ } else {
+ ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+ }
+
+ shmdt(shared_data);
+}
+
+TEST(file_backed)
+{
+ u_int32_t f_private = 0;
+ pthread_t waiter;
+ int res, fd;
+ void *shm;
+
+ /* Testing a file backed shared memory */
+ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ksft_exit_fail_msg("open");
+
+ if (ftruncate(fd, sizeof(f_private)))
+ ksft_exit_fail_msg("ftruncate");
+
+ shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (shm == MAP_FAILED)
+ ksft_exit_fail_msg("mmap");
+
+ memcpy(shm, &f_private, sizeof(f_private));
+
+ futex = shm;
+
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ res = futex_wake(shm, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+ errno, strerror(errno));
+ } else {
+ ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+ }
+
+ munmap(shm, sizeof(f_private));
+ remove(SHM_PATH);
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index fb4148f23fa3..2a749f9b14eb 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -27,10 +27,9 @@
#include <libgen.h>
#include <signal.h>
-#include "logging.h"
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-private-mapped-file"
#define PAGE_SZ 4096
char pad[PAGE_SZ] = {1};
@@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1};
#define WAKE_WAIT_US 3000000
struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *thr_futex_wait(void *arg)
{
int ret;
- info("futex wait\n");
+ ksft_print_dbg_msg("futex wait\n");
ret = futex_wait(&val, 1, &wait_timeout, 0);
- if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
- error("futex error.\n", errno);
- print_result(TEST_NAME, RET_ERROR);
- exit(RET_ERROR);
- }
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT)
+ ksft_exit_fail_msg("futex error.\n");
if (ret && errno == ETIMEDOUT)
- fail("waiter timedout\n");
+ ksft_exit_fail_msg("waiter timedout\n");
- info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+ ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno);
return NULL;
}
-int main(int argc, char **argv)
+TEST(wait_private_mapped_file)
{
pthread_t thr;
- int ret = RET_PASS;
int res;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg(
- "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
- basename(argv[0]));
-
- ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
- if (ret < 0) {
- fprintf(stderr, "pthread_create error\n");
- ret = RET_ERROR;
- goto out;
- }
-
- info("wait a while\n");
+
+ res = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (res < 0)
+ ksft_exit_fail_msg("pthread_create error\n");
+
+ ksft_print_dbg_msg("wait a while\n");
usleep(WAKE_WAIT_US);
val = 2;
res = futex_wake(&val, 1, 0);
- info("futex_wake %d\n", res);
- if (res != 1) {
- fail("FUTEX_WAKE didn't find the waiting thread.\n");
- ret = RET_FAIL;
- }
+ ksft_print_dbg_msg("futex_wake %d\n", res);
+ if (res != 1)
+ ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n");
- info("join\n");
+ ksft_print_dbg_msg("join\n");
pthread_join(thr, NULL);
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_test_result_pass("wait_private_mapped_file");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3..674dd13af421 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,76 +11,177 @@
*
* HISTORY
* 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
*
*****************************************************************************/
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include "futextest.h"
-#include "logging.h"
+#include <pthread.h>
-#define TEST_NAME "futex-wait-timeout"
+#include "futextest.h"
+#include "futex2test.h"
+#include "kselftest_harness.h"
static long timeout_ns = 100000; /* 100us default timeout */
+static futex_t futex_pi;
+static pthread_barrier_t barrier;
-void usage(char *prog)
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
+ int ret;
+ volatile futex_t lock = 0;
+
+ ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_lock_pi failed\n");
+
+ pthread_barrier_wait(&barrier);
+
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ ksft_exit_fail_msg("futex_wait failed\n");
+
+ return NULL;
}
-int main(int argc, char *argv[])
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, char *test_name, int err)
{
- futex_t f1 = FUTEX_INITIALIZER;
- struct timespec to;
- int res, ret = RET_PASS;
- int c;
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
+ if (!res || errno != err) {
+ ksft_test_result_fail("%s returned %d\n", test_name,
+ res < 0 ? errno : res);
+ } else {
+ ksft_test_result_pass("%s succeeds\n", test_name);
}
+}
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+ long timeout_ns)
+{
+ if (clock_gettime(clockid, to))
+ ksft_exit_fail_msg("clock_gettime failed\n");
+
+ to->tv_nsec += timeout_ns;
- /* initialize timeout */
+ if (to->tv_nsec >= 1000000000) {
+ to->tv_sec++;
+ to->tv_nsec -= 1000000000;
+ }
+
+ return 0;
+}
+
+TEST(wait_bitset)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res;
+
+ /* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
- info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
- res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != ETIMEDOUT) {
- fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- ret = RET_FAIL;
- }
+ res = futex_wait(&f1, f1, &to, 0);
+ test_timeout(res, "futex_wait relative", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT);
- print_result(TEST_NAME, ret);
- return ret;
+ /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+ test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT);
}
+
+TEST(requeue_pi)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res;
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+}
+
+TEST(lock_pi)
+{
+ struct timespec to;
+ pthread_t thread;
+ int res;
+
+ /* Create a thread that will lock forever so any waiter will timeout */
+ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* Wait until the other thread calls futex_lock_pi() */
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_destroy(&barrier);
+
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+ * clock, but requires the caller to not set CLOCK_REALTIME flag.
+ *
+ * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+ * interpreted as a realtime clock, and (unless you mess your machine's
+ * time or your time machine) the monotonic clock value is always
+ * smaller than realtime and the syscall will timeout immediately.
+ */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_lock_pi(&futex_pi, &to, 0, 0);
+ test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT);
+
+ /* Test operations that don't support FUTEX_CLOCK_REALTIME */
+ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS);
+}
+
+TEST(waitv)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ struct timespec to;
+ int res;
+
+ /* futex_waitv with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+ test_timeout(res, "futex_waitv monotonic", ETIMEDOUT);
+
+ /* futex_waitv with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ ksft_test_result_error("get_time error");
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
+ test_timeout(res, "futex_waitv realtime", ETIMEDOUT);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index ed9cd07e31c1..b07d68a67f31 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -29,95 +29,55 @@
#include <linux/futex.h>
#include <libgen.h>
-#include "logging.h"
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-uninitialized-heap"
#define WAIT_US 5000000
static int child_blocked = 1;
-static int child_ret;
+static bool child_ret;
void *buf;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *wait_thread(void *arg)
{
int res;
- child_ret = RET_PASS;
+ child_ret = true;
res = futex_wait(buf, 1, NULL, 0);
child_blocked = 0;
if (res != 0 && errno != EWOULDBLOCK) {
- error("futex failure\n", errno);
- child_ret = RET_ERROR;
+ ksft_exit_fail_msg("futex failure\n");
+ child_ret = false;
}
pthread_exit(NULL);
}
-int main(int argc, char **argv)
+TEST(futex_wait_uninitialized_heap)
{
- int c, ret = RET_PASS;
long page_size;
pthread_t thr;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
+ int ret;
page_size = sysconf(_SC_PAGESIZE);
buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
- if (buf == (void *)-1) {
- error("mmap\n", errno);
- exit(1);
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
+ if (buf == (void *)-1)
+ ksft_exit_fail_msg("mmap\n");
ret = pthread_create(&thr, NULL, wait_thread, NULL);
- if (ret) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
- info("waiting %dus for child to return\n", WAIT_US);
+ ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US);
usleep(WAIT_US);
- ret = child_ret;
- if (child_blocked) {
- fail("child blocked in kernel\n");
- ret = RET_FAIL;
- }
+ if (child_blocked)
+ ksft_test_result_fail("child blocked in kernel\n");
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ if (!child_ret)
+ ksft_test_result_fail("child error\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 0ae390ff8164..9ff936ecf164 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -21,58 +21,61 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "futextest.h"
-#include "logging.h"
+#include "futex2test.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-wouldblock"
#define timeout_ns 100000
-void usage(char *prog)
+TEST(futex_wait_wouldblock)
{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ int res;
+
+ ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != EWOULDBLOCK) {
+ ksft_test_result_fail("futex_wait returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_wait\n");
+ }
}
-int main(int argc, char *argv[])
+TEST(futex_waitv_wouldblock)
{
struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
- int c;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1 + 1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ int res;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("clock_gettime failed %d\n", errno);
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- basename(argv[0]));
+ to.tv_nsec += timeout_ns;
- info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
- res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != EWOULDBLOCK) {
- fail("futex_wait returned: %d %s\n",
- res ? errno : res, res ? strerror(errno) : "");
- ret = RET_FAIL;
+ if (to.tv_nsec >= 1000000000) {
+ to.tv_sec++;
+ to.tv_nsec -= 1000000000;
}
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+ if (!res || errno != EWOULDBLOCK) {
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv\n");
+ }
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
new file mode 100644
index 000000000000..d60876164d4b
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * futex_waitv() test by André Almeida <andrealmeid@collabora.com>
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <sys/shm.h>
+
+#include "futextest.h"
+#include "futex2test.h"
+#include "kselftest_harness.h"
+
+#define WAKE_WAIT_US 10000
+#define NR_FUTEXES 30
+static struct futex_waitv waitv[NR_FUTEXES];
+u_int32_t futexes[NR_FUTEXES] = {0};
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+ int res;
+
+ /* setting absolute timeout for futex2 */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res < 0) {
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
+ errno, strerror(errno));
+ } else if (res != NR_FUTEXES - 1) {
+ ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n",
+ res, NR_FUTEXES - 1);
+ }
+
+ return NULL;
+}
+
+TEST(private_waitv)
+{
+ pthread_t waiter;
+ int res, i;
+
+ for (i = 0; i < NR_FUTEXES; i++) {
+ waitv[i].uaddr = (uintptr_t)&futexes[i];
+ waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
+ waitv[i].val = 0;
+ waitv[i].__reserved = 0;
+ }
+
+ /* Private waitv */
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv private\n");
+ }
+}
+
+TEST(shared_waitv)
+{
+ pthread_t waiter;
+ int res, i;
+
+ /* Shared waitv */
+ for (i = 0; i < NR_FUTEXES; i++) {
+ int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ unsigned int *shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ waitv[i].uaddr = (uintptr_t)shared_data;
+ waitv[i].flags = FUTEX_32;
+ waitv[i].val = 0;
+ waitv[i].__reserved = 0;
+ }
+
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ ksft_exit_fail_msg("pthread_create failed\n");
+
+ usleep(WAKE_WAIT_US);
+
+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv shared\n");
+ }
+
+ for (i = 0; i < NR_FUTEXES; i++)
+ shmdt(u64_to_ptr(waitv[i].uaddr));
+}
+
+TEST(invalid_flag)
+{
+ struct timespec to;
+ int res;
+
+ /* Testing a waiter without FUTEX_32 flag */
+ waitv[0].flags = FUTEX_PRIVATE_FLAG;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv without FUTEX_32\n");
+ }
+}
+
+TEST(unaligned_address)
+{
+ struct timespec to;
+ int res;
+
+ /* Testing a waiter with an unaligned address */
+ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
+ waitv[0].uaddr = 1;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv with an unaligned address\n");
+ }
+}
+
+TEST(null_address)
+{
+ struct timespec to;
+ int res;
+
+ /* Testing a NULL address for waiters.uaddr */
+ waitv[0].uaddr = 0x00000000;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
+ }
+
+ /* Testing a NULL address for *waiters */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
+ }
+}
+
+TEST(invalid_clockid)
+{
+ struct timespec to;
+ int res;
+
+ /* Testing an invalid clockid */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("gettime64 failed\n");
+
+ to.tv_sec++;
+
+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ } else {
+ ksft_test_result_pass("futex_waitv invalid clockid\n");
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680..e88545c06d57 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -18,58 +18,36 @@
#
###############################################################################
-# Test for a color capable console
-if [ -z "$USE_COLOR" ]; then
- tput setf 7 || tput setaf 7
- if [ $? -eq 0 ]; then
- USE_COLOR=1
- tput sgr0
- fi
-fi
-if [ "$USE_COLOR" -eq 1 ]; then
- COLOR="-c"
-fi
+echo
+./futex_requeue_pi
+
+echo
+./futex_requeue_pi_mismatched_ops
+echo
+./futex_requeue_pi_signal_restart
echo
-# requeue pi testing
-# without timeouts
-./futex_requeue_pi $COLOR
-./futex_requeue_pi $COLOR -b
-./futex_requeue_pi $COLOR -b -l
-./futex_requeue_pi $COLOR -b -o
-./futex_requeue_pi $COLOR -l
-./futex_requeue_pi $COLOR -o
-# with timeouts
-./futex_requeue_pi $COLOR -b -l -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -l -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-./futex_requeue_pi $COLOR -b -t 5000
-./futex_requeue_pi $COLOR -t 5000
-./futex_requeue_pi $COLOR -b -t 500000
-./futex_requeue_pi $COLOR -t 500000
-./futex_requeue_pi $COLOR -b -o -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -o -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-# with long timeout
-./futex_requeue_pi $COLOR -b -l -t 2000000000
-./futex_requeue_pi $COLOR -l -t 2000000000
+./futex_wait_timeout
+echo
+./futex_wait_wouldblock
+
+echo
+./futex_wait_uninitialized_heap
+./futex_wait_private_mapped_file
echo
-./futex_requeue_pi_mismatched_ops $COLOR
+./futex_wait
echo
-./futex_requeue_pi_signal_restart $COLOR
+./futex_requeue
echo
-./futex_wait_timeout $COLOR
+./futex_waitv
echo
-./futex_wait_wouldblock $COLOR
+./futex_priv_hash
echo
-./futex_wait_uninitialized_heap $COLOR
-./futex_wait_private_mapped_file $COLOR
+./futex_numa_mpol
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
new file mode 100644
index 000000000000..1f625b39948a
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Futex2 library addons for futex tests
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+#include <linux/time_types.h>
+#include <stdint.h>
+
+#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+
+#ifndef __NR_futex_waitv
+#define __NR_futex_waitv 449
+struct futex_waitv {
+ __u64 val;
+ __u64 uaddr;
+ __u32 flags;
+ __u32 __reserved;
+};
+#endif
+
+#ifndef __NR_futex_wake
+#define __NR_futex_wake 454
+#endif
+
+#ifndef __NR_futex_wait
+#define __NR_futex_wait 455
+#endif
+
+#ifndef FUTEX2_SIZE_U32
+#define FUTEX2_SIZE_U32 0x02
+#endif
+
+#ifndef FUTEX2_NUMA
+#define FUTEX2_NUMA 0x04
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+#ifndef FUTEX2_PRIVATE
+#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
+#endif
+
+#ifndef FUTEX2_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX_32
+#define FUTEX_32 FUTEX2_SIZE_U32
+#endif
+
+struct futex32_numa {
+ futex_t futex;
+ futex_t numa;
+};
+
+/**
+ * futex_waitv - Wait at multiple futexes, wake on any
+ * @waiters: Array of waiters
+ * @nr_waiters: Length of waiters array
+ * @flags: Operation flags
+ * @timo: Optional timeout for operation
+ */
+static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
+ unsigned long flags, struct timespec *timo, clockid_t clockid)
+{
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
+}
+
+/*
+ * futex_wait() - block on uaddr with optional timeout
+ * @val: Expected value
+ * @flags: FUTEX2 flags
+ * @timeout: Relative timeout
+ * @clockid: Clock id for the timeout
+ */
+static inline int futex2_wait(void *uaddr, long val, unsigned int flags,
+ struct timespec *timeout, clockid_t clockid)
+{
+ return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid);
+}
+
+/*
+ * futex2_wake() - Wake a number of futexes
+ * @nr: Number of threads to wake at most
+ * @flags: FUTEX2 flags
+ */
+static inline int futex2_wake(void *uaddr, int nr, unsigned int flags)
+{
+ return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags);
+}
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index ddbcfc9b7bac..3d48e9789d9f 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -47,6 +47,28 @@ typedef volatile u_int32_t futex_t;
FUTEX_PRIVATE_FLAG)
#endif
+/*
+ * SYS_futex is expected from system C library, in glibc some 32-bit
+ * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have
+ * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as
+ * SYS_futex_time64 in this situation to ensure the compilation and the
+ * compatibility.
+ */
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+#define SYS_futex SYS_futex_time64
+#endif
+
+/*
+ * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if
+ * we are using a newer compiler then the size of the timestamps will be 64bit,
+ * however, the SYS_futex will still point to the 32bit futex system call.
+ */
+#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \
+ defined(_TIME_BITS) && _TIME_BITS == 64
+# undef SYS_futex
+# define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
deleted file mode 100644
index 874c69ce5cce..000000000000
--- a/tools/testing/selftests/futex/include/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/******************************************************************************
- *
- * Copyright © International Business Machines Corp., 2009
- *
- * DESCRIPTION
- * Glibc independent futex library for testing kernel functionality.
- *
- * AUTHOR
- * Darren Hart <dvhart@linux.intel.com>
- *
- * HISTORY
- * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
- *
- *****************************************************************************/
-
-#ifndef _LOGGING_H
-#define _LOGGING_H
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/futex.h>
-#include "kselftest.h"
-
-/*
- * Define PASS, ERROR, and FAIL strings with and without color escape
- * sequences, default to no color.
- */
-#define ESC 0x1B, '['
-#define BRIGHT '1'
-#define GREEN '3', '2'
-#define YELLOW '3', '3'
-#define RED '3', '1'
-#define ESCEND 'm'
-#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
-#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
-#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
-#define RESET_COLOR ESC, '0', 'm'
-static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
- RESET_COLOR, 0};
-static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
- RESET_COLOR, 0};
-static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
- RESET_COLOR, 0};
-static const char INFO_NORMAL[] = " INFO";
-static const char PASS_NORMAL[] = " PASS";
-static const char ERROR_NORMAL[] = "ERROR";
-static const char FAIL_NORMAL[] = " FAIL";
-const char *INFO = INFO_NORMAL;
-const char *PASS = PASS_NORMAL;
-const char *ERROR = ERROR_NORMAL;
-const char *FAIL = FAIL_NORMAL;
-
-/* Verbosity setting for INFO messages */
-#define VQUIET 0
-#define VCRITICAL 1
-#define VINFO 2
-#define VMAX VINFO
-int _verbose = VCRITICAL;
-
-/* Functional test return codes */
-#define RET_PASS 0
-#define RET_ERROR -1
-#define RET_FAIL -2
-
-/**
- * log_color() - Use colored output for PASS, ERROR, and FAIL strings
- * @use_color: use color (1) or not (0)
- */
-void log_color(int use_color)
-{
- if (use_color) {
- PASS = PASS_COLOR;
- ERROR = ERROR_COLOR;
- FAIL = FAIL_COLOR;
- } else {
- PASS = PASS_NORMAL;
- ERROR = ERROR_NORMAL;
- FAIL = FAIL_NORMAL;
- }
-}
-
-/**
- * log_verbosity() - Set verbosity of test output
- * @verbose: Enable (1) verbose output or not (0)
- *
- * Currently setting verbose=1 will enable INFO messages and 0 will disable
- * them. FAIL and ERROR messages are always displayed.
- */
-void log_verbosity(int level)
-{
- if (level > VMAX)
- level = VMAX;
- else if (level < 0)
- level = 0;
- _verbose = level;
-}
-
-/**
- * print_result() - Print standard PASS | ERROR | FAIL results
- * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
- *
- * print_result() is primarily intended for functional tests.
- */
-void print_result(const char *test_name, int ret)
-{
- switch (ret) {
- case RET_PASS:
- ksft_test_result_pass("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_ERROR:
- ksft_test_result_error("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_FAIL:
- ksft_test_result_fail("%s\n", test_name);
- ksft_print_cnts();
- return;
- }
-}
-
-/* log level macros */
-#define info(message, vargs...) \
-do { \
- if (_verbose >= VINFO) \
- fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
-} while (0)
-
-#define error(message, err, args...) \
-do { \
- if (_verbose >= VCRITICAL) {\
- if (err) \
- fprintf(stderr, "\t%s: %s: "message, \
- ERROR, strerror(err), ##args); \
- else \
- fprintf(stderr, "\t%s: "message, ERROR, ##args); \
- } \
-} while (0)
-
-#define fail(message, args...) \
-do { \
- if (_verbose >= VCRITICAL) \
- fprintf(stderr, "\t%s: "message, FAIL, ##args); \
-} while (0)
-
-#endif
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 4c69408f3e84..ededb077a3a6 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-gpio-mockup-chardev
+gpio-mockup-cdev
+gpio-chip-info
+gpio-line-name
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 41582fe485ee..7bfe315f7001 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,31 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null)
-VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null)
-ifeq ($(VAR_LDLIBS),)
-VAR_LDLIBS := -lmount -I/usr/include/libmount
-endif
-
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS)
-LDLIBS += $(VAR_LDLIBS)
-
-TEST_PROGS := gpio-mockup.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh gpio-aggregator.sh
TEST_FILES := gpio-mockup-sysfs.sh
-TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
+CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
-
-GPIODIR := $(realpath ../../../gpio)
-GPIOOUT := $(OUTPUT)/tools-gpio/
-GPIOOBJ := $(GPIOOUT)/gpio-utils.o
-
-CLEAN += ; $(RM) -rf $(GPIOOUT)
-
-$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
-
-$(GPIOOUT):
- mkdir -p $@
-
-$(GPIOOBJ): $(GPIOOUT)
- $(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index abaa6902b7b6..1287abeaac7e 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -1,2 +1,5 @@
CONFIG_GPIOLIB=y
+CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
+CONFIG_GPIO_SIM=m
+CONFIG_GPIO_AGGREGATOR=m
diff --git a/tools/testing/selftests/gpio/gpio-aggregator.sh b/tools/testing/selftests/gpio/gpio-aggregator.sh
new file mode 100755
index 000000000000..9b6f80ad9f8a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-aggregator.sh
@@ -0,0 +1,727 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Bartosz Golaszewski <brgl@bgdev.pl>
+# Copyright (C) 2025 Koichiro Den <koichiro.den@canonical.com>
+
+BASE_DIR=$(dirname "$0")
+CONFIGFS_SIM_DIR="/sys/kernel/config/gpio-sim"
+CONFIGFS_AGG_DIR="/sys/kernel/config/gpio-aggregator"
+SYSFS_AGG_DIR="/sys/bus/platform/drivers/gpio-aggregator"
+MODULE="gpio-aggregator"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+# gpio-sim
+sim_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+sim_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+sim_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_SIM_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live"
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_SIM_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_SIM_DIR: $remaining"
+ fi
+}
+
+sim_get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+ local CHIP_NAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name" 2> /dev/null) || \
+ fail "Unable to read the chip name from configfs"
+
+ $BASE_DIR/gpio-chip-info "/dev/$CHIP_NAME" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+# gpio-aggregator
+agg_create_chip() {
+ local CHIP=$1
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_remove_chip() {
+ local CHIP=$1
+
+ find "$CONFIGFS_AGG_DIR/$CHIP/" -depth -type d -exec rmdir {} \; || \
+ fail "Unable to remove $CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_create_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_remove_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ rmdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_set_key() {
+ local CHIP=$1
+ local LINE=$2
+ local KEY=$3
+
+ echo "$KEY" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/key" || fail "Unable to set the lookup key"
+}
+
+agg_set_offset() {
+ local CHIP=$1
+ local LINE=$2
+ local OFFSET=$3
+
+ echo "$OFFSET" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/offset" || \
+ fail "Unable to set the lookup offset"
+}
+
+agg_set_line_name() {
+ local CHIP=$1
+ local LINE=$2
+ local NAME=$3
+
+ echo "$NAME" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/name" || fail "Unable to set the line name"
+}
+
+agg_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+agg_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+agg_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_AGG_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live" 2> /dev/null
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_AGG_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_AGG_DIR: $remaining"
+ fi
+}
+
+agg_configfs_dev_name() {
+ local CHIP=$1
+
+ cat "$CONFIGFS_AGG_DIR/$CHIP/dev_name" 2> /dev/null || \
+ fail "Unable to read the device name from configfs"
+}
+
+agg_configfs_chip_name() {
+ local CHIP=$1
+ local DEV_NAME=$(agg_configfs_dev_name "$CHIP")
+ local CHIP_LIST=$(find "/sys/devices/platform/$DEV_NAME" \
+ -maxdepth 1 -type d -name "gpiochip[0-9]*" 2> /dev/null)
+ local CHIP_COUNT=$(echo "$CHIP_LIST" | wc -l)
+
+ if [ -z "$CHIP_LIST" ]; then
+ fail "No gpiochip in /sys/devices/platform/$DEV_NAME/"
+ elif [ "$CHIP_COUNT" -ne 1 ]; then
+ fail "Multiple gpiochips unexpectedly found: $CHIP_LIST"
+ fi
+ basename "$CHIP_LIST"
+}
+
+agg_get_chip_num_lines() {
+ local CHIP=$1
+ local N_DIR=$(ls -d $CONFIGFS_AGG_DIR/$CHIP/line[0-9]* 2> /dev/null | wc -l)
+ local N_LINES
+
+ if [ "$(cat $CONFIGFS_AGG_DIR/$CHIP/live)" = 0 ]; then
+ echo "$N_DIR"
+ else
+ N_LINES=$(
+ $BASE_DIR/gpio-chip-info \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" num-lines
+ ) || fail "Unable to read the number of lines from the character device"
+ if [ $N_DIR != $N_LINES ]; then
+ fail "Discrepancy between two sources for the number of lines"
+ fi
+ echo "$N_LINES"
+ fi
+}
+
+agg_get_chip_label() {
+ local CHIP=$1
+
+ $BASE_DIR/gpio-chip-info "/dev/$(agg_configfs_chip_name "$CHIP")" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+agg_get_line_name() {
+ local CHIP=$1
+ local OFFSET=$2
+ local NAME_CONFIGFS=$(cat "$CONFIGFS_AGG_DIR/$CHIP/line${OFFSET}/name")
+ local NAME_CDEV
+
+ if [ "$(cat "$CONFIGFS_AGG_DIR/$CHIP/live")" = 0 ]; then
+ echo "$NAME_CONFIGFS"
+ else
+ NAME_CDEV=$(
+ $BASE_DIR/gpio-line-name \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" "$OFFSET"
+ ) || fail "Unable to read the line name from the character device"
+ if [ "$NAME_CONFIGFS" != "$NAME_CDEV" ]; then
+ fail "Discrepancy between two sources for the name of line"
+ fi
+ echo "$NAME_CDEV"
+ fi
+}
+
+
+# Load the modules. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+modprobe gpio-aggregator || skip "unable to load the gpio-aggregator module"
+
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in $(seq 5); do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+
+# If the module was already loaded: remove all previous chips
+agg_configfs_cleanup
+sim_configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap "agg_configfs_cleanup 1; sim_configfs_cleanup 1" EXIT
+
+# Use gpio-sim chips as the test backend
+for CHIP in $(seq -f "chip%g" 0 1); do
+ mkdir $CONFIGFS_SIM_DIR/$CHIP
+ for BANK in $(seq -f "bank%g" 0 1); do
+ mkdir -p "$CONFIGFS_SIM_DIR/$CHIP/$BANK"
+ echo "${CHIP}_${BANK}" > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/label" || \
+ fail "unable to set the chip label"
+ echo 16 > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/num_lines" || \
+ fail "unable to set the number of lines"
+ for IDX in $(seq 0 15); do
+ LINE_NAME="${CHIP}${BANK}_${IDX}"
+ LINE_DIR="$CONFIGFS_SIM_DIR/$CHIP/$BANK/line$IDX"
+ mkdir -p $LINE_DIR
+ echo "$LINE_NAME" > "$LINE_DIR/name" || fail "unable to set the line name"
+ done
+ done
+ sim_enable_chip "$CHIP"
+done
+
+echo "1. GPIO aggregator creation/deletion"
+
+echo "1.1. Creation/deletion via configfs"
+
+echo "1.1.1. Minimum creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.2. Complex creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_create_line agg0 line1
+agg_create_line agg0 line2
+agg_create_line agg0 line3
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line2 "$(sim_get_chip_label chip1 bank0)"
+agg_set_key agg0 line3 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 1
+agg_set_offset agg0 line1 3
+agg_set_offset agg0 line2 5
+agg_set_offset agg0 line3 7
+agg_set_line_name agg0 line0 test0
+agg_set_line_name agg0 line1 test1
+agg_set_line_name agg0 line2 test2
+agg_set_line_name agg0 line3 test3
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "4" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "test1" || fail "line name is unset"
+test "$(agg_get_line_name agg0 2)" = "test2" || fail "line name is unset"
+test "$(agg_get_line_name agg0 3)" = "test3" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_line agg0 line1
+agg_remove_line agg0 line2
+agg_remove_line agg0 line3
+agg_remove_chip agg0
+
+echo "1.1.3. Can't instantiate a chip without any line"
+agg_create_chip agg0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_chip agg0
+
+echo "1.1.4. Can't instantiate a chip with invalid configuration"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chipX_bankX"
+agg_set_offset agg0 line0 99
+agg_set_line_name agg0 line0 test0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.5. Can't instantiate a chip asynchronously via deferred probe"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chip0_bank0"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+sim_disable_chip chip0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || \
+ fail "chip unexpectedly transitioned to 'live' state"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.6. Can't instantiate a chip with _sysfs prefix"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs" 2> /dev/null && fail "chip _sysfs unexpectedly created"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.foo" 2> /dev/null && fail "chip _sysfs.foo unexpectedly created"
+
+echo "1.2. Creation/deletion via sysfs"
+
+echo "1.2.1. Minimum creation/deletion"
+echo "chip0_bank0 0" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.2. Complex creation/deletion"
+echo "chip0bank0_0 chip1_bank1 10-11" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "3" || fail "number of lines is not 3"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 2)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.3. Asynchronous creation with deferred probe"
+sim_disable_chip chip0
+echo 'chip0_bank0 0' > $SYSFS_AGG_DIR/new_device
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly remains dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name unexpectedly set"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.4. Can't instantiate a chip with invalid configuration"
+echo "xyz 0" > "$SYSFS_AGG_DIR/new_device"
+test "$(cat $CONFIGFS_AGG_DIR/_sysfs.0/live)" = 0 || fail "chip unexpectedly alive"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+
+echo "2. GPIO aggregator configuration"
+
+echo "2.1. Configuring aggregators instantiated via configfs"
+setup_2_1() {
+ agg_create_chip agg0
+ agg_create_line agg0 line0
+ agg_create_line agg0 line1
+ agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+ agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank0)"
+ agg_set_offset agg0 line0 1
+ agg_set_offset agg0 line1 3
+ agg_set_line_name agg0 line0 test0
+ agg_set_line_name agg0 line1 test1
+ agg_enable_chip agg0
+}
+teardown_2_1() {
+ agg_configfs_cleanup
+}
+
+echo "2.1.1. While offline"
+
+echo "2.1.1.1. Line can be added/removed"
+setup_2_1
+agg_disable_chip agg0
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 5
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.2. Line key can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.3. Line name can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_line_name agg0 line0 new0
+agg_set_line_name agg0 line1 new1
+agg_enable_chip agg0
+test "$(agg_get_line_name agg0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "new1" || fail "line name is unset"
+teardown_2_1
+
+echo "2.1.1.4. Line offset can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 5
+agg_set_offset agg0 line1 7
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.5. Can re-enable a chip after valid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 15
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line0 14
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 13
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.7. Can't re-enable a chip with invalid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+
+echo "2.1.2. While online"
+
+echo "2.1.2.1. Can't add/remove line"
+setup_2_1
+mkdir "$CONFIGFS_AGG_DIR/agg0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/agg0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_1
+
+echo "2.1.2.2. Can't modify line key"
+setup_2_1
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/agg0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.3. Can't modify line name"
+setup_2_1
+echo "new0" > "$CONFIGFS_AGG_DIR/agg0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.4. Can't modify line offset"
+setup_2_1
+echo "5" > "$CONFIGFS_AGG_DIR/agg0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_1
+
+echo "2.2. Configuring aggregators instantiated via sysfs"
+setup_2_2() {
+ echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+}
+teardown_2_2() {
+ echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+}
+
+echo "2.2.1. While online"
+
+echo "2.2.1.1. Can toggle live"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.1.2. Can't add/remove line"
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.1.3. Can't modify line key"
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.4. Can't modify line name"
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.5. Can't modify line offset"
+setup_2_2
+echo "5" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+
+echo "2.2.2. While waiting for deferred probe"
+
+echo "2.2.2.1. Can't add/remove line despite live = 0"
+sim_disable_chip chip0
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.2. Can't modify line key"
+sim_disable_chip chip0
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.3. Can't modify line name"
+sim_disable_chip chip0
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.4. Can't modify line offset"
+sim_disable_chip chip0
+setup_2_2
+echo 5 > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.5. Can't toggle live"
+sim_disable_chip chip0
+setup_2_2
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.3. While offline"
+
+echo "2.2.3.1. Can't add/remove line despite live = 0"
+setup_2_2
+agg_disable_chip _sysfs.0
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.3.2. Line key can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.3. Line name can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_line_name _sysfs.0 line0 new0
+agg_set_line_name _sysfs.0 line1 new1
+agg_enable_chip _sysfs.0
+test "$(agg_get_line_name _sysfs.0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "new1" || fail "line name is unset"
+teardown_2_2
+
+echo "2.2.3.4. Line offset can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 5
+agg_set_offset _sysfs.0 line1 7
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.5. Can re-enable a chip with valid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset _sysfs.0 line0 15
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset _sysfs.0 line0 14
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.6. Can't re-enable a chip with invalid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+
+echo "3. Module unload"
+
+echo "3.1. Can't unload module if there is at least one device created via configfs"
+agg_create_chip agg0
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator || fail "module unexpectedly unloaded"
+agg_remove_chip agg0
+
+echo "3.2. Can unload module if there is no device created via configfs"
+echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator && fail "module unexpectedly remains to be loaded"
+modprobe gpio-aggregator 2> /dev/null
+
+echo "4. GPIO forwarder functional"
+SETTINGS="chip0:bank0:2 chip0:bank1:4 chip1:bank0:6 chip1:bank1:8"
+setup_4() {
+ local OFFSET=0
+ agg_create_chip agg0
+ for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ agg_create_line agg0 "line${OFFSET}"
+ agg_set_key agg0 "line${OFFSET}" "$(sim_get_chip_label "$CHIP" "$BANK")"
+ agg_set_offset agg0 "line${OFFSET}" "$LINE"
+ OFFSET=$(expr $OFFSET + 1)
+ done
+ agg_enable_chip agg0
+}
+teardown_4() {
+ agg_configfs_cleanup
+}
+
+echo "4.1. Forwarding set values"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ test $(cat $VAL_PATH) = "0" || fail "incorrect value read from sysfs"
+ $BASE_DIR/gpio-mockup-cdev -s 1 "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" &
+ mock_pid=$!
+ sleep 0.1 # FIXME Any better way?
+ test "$(cat $VAL_PATH)" = "1" || fail "incorrect value read from sysfs"
+ kill "$mock_pid"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "4.2. Forwarding set config"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ $BASE_DIR/gpio-mockup-cdev -b pull-up "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET"
+ test $(cat "$VAL_PATH") = "1" || fail "incorrect value read from sysfs"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "5. Race condition verification"
+
+echo "5.1. Stress test of new_device/delete_device and module load/unload"
+for _ in $(seq 1000); do
+ {
+ echo "dummy 0" > "$SYSFS_AGG_DIR/new_device"
+ cat "$CONFIGFS_AGG_DIR/_sysfs.0/dev_name" > "$SYSFS_AGG_DIR/delete_device"
+ } 2> /dev/null
+done &
+writer_pid=$!
+while kill -0 "$writer_pid" 2> /dev/null; do
+ {
+ modprobe gpio-aggregator
+ modprobe -r gpio-aggregator
+ } 2> /dev/null
+done
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/gpio/gpio-chip-info.c b/tools/testing/selftests/gpio/gpio-chip-info.c
new file mode 100644
index 000000000000..fdc07e742fba
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-chip-info.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading chip information.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-chip-info <chip path> [name|label|num-lines]\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpiochip_info info;
+ int fd, ret;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &info);
+ if (ret) {
+ perror("chip info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(argv[2], "name") == 0) {
+ printf("%s\n", info.name);
+ } else if (strcmp(argv[2], "label") == 0) {
+ printf("%s\n", info.label);
+ } else if (strcmp(argv[2], "num-lines") == 0) {
+ printf("%u\n", info.lines);
+ } else {
+ fprintf(stderr, "unknown command: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-line-name.c b/tools/testing/selftests/gpio/gpio-line-name.c
new file mode 100644
index 000000000000..e635cfadbded
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-line-name.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading line names.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-line-name <chip path> <line offset>\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpio_v2_line_info info;
+ int fd, ret;
+ char *endp;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.offset = strtoul(argv[2], &endp, 10);
+ if (*endp != '\0') {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &info);
+ if (ret) {
+ perror("line info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ printf("%s\n", info.name);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
new file mode 100644
index 000000000000..d1640f44f8ac
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPIO mockup cdev test helper
+ *
+ * Copyright (C) 2020 Kent Gibson
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+#define CONSUMER "gpio-mockup-cdev"
+
+static int request_line_v2(int cfd, unsigned int offset,
+ uint64_t flags, unsigned int val)
+{
+ struct gpio_v2_line_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.num_lines = 1;
+ req.offsets[0] = offset;
+ req.config.flags = flags;
+ strcpy(req.consumer, CONSUMER);
+ if (flags & GPIO_V2_LINE_FLAG_OUTPUT) {
+ req.config.num_attrs = 1;
+ req.config.attrs[0].mask = 1;
+ req.config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ if (val)
+ req.config.attrs[0].attr.values = 1;
+ }
+ ret = ioctl(cfd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+
+static int get_value_v2(int lfd)
+{
+ struct gpio_v2_line_values vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ vals.mask = 1;
+ ret = ioctl(lfd, GPIO_V2_LINE_GET_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.bits & 0x1;
+}
+
+static int request_line_v1(int cfd, unsigned int offset,
+ uint32_t flags, unsigned int val)
+{
+ struct gpiohandle_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.lines = 1;
+ req.lineoffsets[0] = offset;
+ req.flags = flags;
+ strcpy(req.consumer_label, CONSUMER);
+ if (flags & GPIOHANDLE_REQUEST_OUTPUT)
+ req.default_values[0] = val;
+
+ ret = ioctl(cfd, GPIO_GET_LINEHANDLE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+static int get_value_v1(int lfd)
+{
+ struct gpiohandle_data vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ ret = ioctl(lfd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.values[0];
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s [-l] [-b <bias>] [-s <value>] [-u <uAPI>] <gpiochip> <offset>\n", prog);
+ printf(" -b: set line bias to one of pull-down, pull-up, disabled\n");
+ printf(" (default is to leave bias unchanged):\n");
+ printf(" -l: set line active low (default is active high)\n");
+ printf(" -s: set line value (default is to get line value)\n");
+ printf(" -u: uAPI version to use (default is 2)\n");
+ exit(-1);
+}
+
+static int wait_signal(void)
+{
+ int sig;
+ sigset_t wset;
+
+ sigemptyset(&wset);
+ sigaddset(&wset, SIGHUP);
+ sigaddset(&wset, SIGINT);
+ sigaddset(&wset, SIGTERM);
+ sigwait(&wset, &sig);
+
+ return sig;
+}
+
+int main(int argc, char *argv[])
+{
+ char *chip;
+ int opt, ret, cfd, lfd;
+ unsigned int offset, val = 0, abiv;
+ uint32_t flags_v1;
+ uint64_t flags_v2;
+
+ abiv = 2;
+ ret = 0;
+ flags_v1 = GPIOHANDLE_REQUEST_INPUT;
+ flags_v2 = GPIO_V2_LINE_FLAG_INPUT;
+
+ while ((opt = getopt(argc, argv, "lb:s:u:")) != -1) {
+ switch (opt) {
+ case 'l':
+ flags_v1 |= GPIOHANDLE_REQUEST_ACTIVE_LOW;
+ flags_v2 |= GPIO_V2_LINE_FLAG_ACTIVE_LOW;
+ break;
+ case 'b':
+ if (strcmp("pull-up", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_UP;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_UP;
+ } else if (strcmp("pull-down", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_DOWN;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN;
+ } else if (strcmp("disabled", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_DISABLE;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_DISABLED;
+ }
+ break;
+ case 's':
+ val = atoi(optarg);
+ flags_v1 &= ~GPIOHANDLE_REQUEST_INPUT;
+ flags_v1 |= GPIOHANDLE_REQUEST_OUTPUT;
+ flags_v2 &= ~GPIO_V2_LINE_FLAG_INPUT;
+ flags_v2 |= GPIO_V2_LINE_FLAG_OUTPUT;
+ break;
+ case 'u':
+ abiv = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (argc < optind + 2)
+ usage(argv[0]);
+
+ chip = argv[optind];
+ offset = atoi(argv[optind + 1]);
+
+ cfd = open(chip, 0);
+ if (cfd == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", chip, strerror(errno));
+ return -errno;
+ }
+
+ if (abiv == 1)
+ lfd = request_line_v1(cfd, offset, flags_v1, val);
+ else
+ lfd = request_line_v2(cfd, offset, flags_v2, val);
+
+ close(cfd);
+
+ if (lfd < 0) {
+ fprintf(stderr, "Failed to request %s:%d: %s\n", chip, offset, strerror(-lfd));
+ return lfd;
+ }
+
+ if (flags_v2 & GPIO_V2_LINE_FLAG_OUTPUT) {
+ wait_signal();
+ } else {
+ if (abiv == 1)
+ ret = get_value_v1(lfd);
+ else
+ ret = get_value_v2(lfd);
+ }
+
+ close(lfd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
deleted file mode 100644
index 73ead8828d3a..000000000000
--- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c
+++ /dev/null
@@ -1,323 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * GPIO chardev test helper
- *
- * Copyright (C) 2016 Bamvor Jian Zhang
- */
-
-#define _GNU_SOURCE
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <sys/ioctl.h>
-#include <libmount.h>
-#include <err.h>
-#include <dirent.h>
-#include <linux/gpio.h>
-#include "../../../gpio/gpio-utils.h"
-
-#define CONSUMER "gpio-selftest"
-#define GC_NUM 10
-enum direction {
- OUT,
- IN
-};
-
-static int get_debugfs(char **path)
-{
- struct libmnt_context *cxt;
- struct libmnt_table *tb;
- struct libmnt_iter *itr = NULL;
- struct libmnt_fs *fs;
- int found = 0, ret;
-
- cxt = mnt_new_context();
- if (!cxt)
- err(EXIT_FAILURE, "libmount context allocation failed");
-
- itr = mnt_new_iter(MNT_ITER_FORWARD);
- if (!itr)
- err(EXIT_FAILURE, "failed to initialize libmount iterator");
-
- if (mnt_context_get_mtab(cxt, &tb))
- err(EXIT_FAILURE, "failed to read mtab");
-
- while (mnt_table_next_fs(tb, itr, &fs) == 0) {
- const char *type = mnt_fs_get_fstype(fs);
-
- if (!strcmp(type, "debugfs")) {
- found = 1;
- break;
- }
- }
- if (found) {
- ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
- if (ret < 0)
- err(EXIT_FAILURE, "failed to format string");
- }
-
- mnt_free_iter(itr);
- mnt_free_context(cxt);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
-{
- char *debugfs;
- FILE *f;
- char *line = NULL;
- size_t len = 0;
- char *cur;
- int found = 0;
-
- if (get_debugfs(&debugfs) != 0)
- err(EXIT_FAILURE, "debugfs is not mounted");
-
- f = fopen(debugfs, "r");
- if (!f)
- err(EXIT_FAILURE, "read from gpio debugfs failed");
-
- /*
- * gpio-2 ( |gpio-selftest ) in lo
- */
- while (getline(&line, &len, f) != -1) {
- cur = strstr(line, consumer);
- if (cur == NULL)
- continue;
-
- cur = strchr(line, ')');
- if (!cur)
- continue;
-
- cur += 2;
- if (!strncmp(cur, "out", 3)) {
- *dir = OUT;
- cur += 4;
- } else if (!strncmp(cur, "in", 2)) {
- *dir = IN;
- cur += 4;
- }
-
- if (!strncmp(cur, "hi", 2))
- *value = 1;
- else if (!strncmp(cur, "lo", 2))
- *value = 0;
-
- found = 1;
- break;
- }
- free(debugfs);
- fclose(f);
- free(line);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
-{
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- const struct dirent *ent;
- DIR *dp;
- char *chrdev_name;
- int fd;
- int i = 0;
-
- cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
- if (!cinfo)
- err(EXIT_FAILURE, "gpiochip_info allocation failed");
-
- current = cinfo;
- dp = opendir("/dev");
- if (!dp) {
- *ret = -errno;
- goto error_out;
- } else {
- *ret = 0;
- }
-
- while (ent = readdir(dp), ent) {
- if (check_prefix(ent->d_name, "gpiochip")) {
- *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
- if (*ret < 0)
- goto error_out;
-
- fd = open(chrdev_name, 0);
- if (fd == -1) {
- *ret = -errno;
- fprintf(stderr, "Failed to open %s\n",
- chrdev_name);
- goto error_close_dir;
- }
- *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
- if (*ret == -1) {
- perror("Failed to issue CHIPINFO IOCTL\n");
- goto error_close_dir;
- }
- close(fd);
- if (strcmp(current->label, gpiochip_name) == 0
- || check_prefix(current->label, gpiochip_name)) {
- *ret = 0;
- current++;
- i++;
- }
- }
- }
-
- if ((!*ret && i == 0) || *ret < 0) {
- free(cinfo);
- cinfo = NULL;
- }
- if (!*ret && i > 0) {
- cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
- *ret = i;
- }
-
-error_close_dir:
- closedir(dp);
-error_out:
- if (*ret < 0)
- err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
-
- return cinfo;
-}
-
-int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
-{
- struct gpiohandle_data data;
- unsigned int lines[] = {line};
- int fd;
- int debugfs_dir = IN;
- int debugfs_value = 0;
- int ret;
-
- data.values[0] = value;
- ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
- CONSUMER);
- if (ret < 0)
- goto fail_out;
- else
- fd = ret;
-
- ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
- if (ret) {
- ret = -EINVAL;
- goto fail_out;
- }
- if (flag & GPIOHANDLE_REQUEST_INPUT) {
- if (debugfs_dir != IN) {
- errno = -EINVAL;
- ret = -errno;
- }
- } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
- if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
- debugfs_value = !debugfs_value;
-
- if (!(debugfs_dir == OUT && value == debugfs_value)) {
- errno = -EINVAL;
- ret = -errno;
- }
- }
- gpiotools_release_linehandle(fd);
-
-fail_out:
- if (ret)
- err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
- cinfo->name, line, flag, value);
-
- return ret;
-}
-
-void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
-{
- printf("line<%d>", line);
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 0);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 1);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
- printf(".");
-}
-
-/*
- * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
- * Return 0 if successful or exit with EXIT_FAILURE if test failed.
- * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
- * gpio-mockup
- * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid,
- * 0 means invalid which could not be found by
- * list_gpiochip.
- */
-int main(int argc, char *argv[])
-{
- char *prefix;
- int valid;
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- int i;
- int ret;
-
- if (argc < 3) {
- printf("Usage: %s prefix is_valid", argv[0]);
- exit(EXIT_FAILURE);
- }
-
- prefix = argv[1];
- valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
-
- printf("Test gpiochip %s: ", prefix);
- cinfo = list_gpiochip(prefix, &ret);
- if (!cinfo) {
- if (!valid && ret == 0) {
- printf("Invalid test successful\n");
- ret = 0;
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- } else if (cinfo && !valid) {
- ret = -EINVAL;
- goto out;
- }
- current = cinfo;
- for (i = 0; i < ret; i++) {
- gpio_pin_tests(current, 0);
- gpio_pin_tests(current, current->lines - 1);
- gpio_pin_tests(current, random() % current->lines);
- current++;
- }
- ret = 0;
- printf("successful\n");
-
-out:
- if (ret)
- fprintf(stderr, "gpio<%s> test failed\n", prefix);
-
- if (cinfo)
- free(cinfo);
-
- if (ret)
- exit(EXIT_FAILURE);
-
- return ret;
-}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
index dd269d877562..2d2e5d8763b6 100755
--- a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -1,135 +1,77 @@
# SPDX-License-Identifier: GPL-2.0
-is_consistent()
-{
- val=
-
- active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
- val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
- dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
- gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
- dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
- val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
- if [ $val_debugfs = "lo" ]; then
- val=0
- elif [ $val_debugfs = "hi" ]; then
- val=1
- fi
+# Overrides functions in gpio-mockup.sh to test using the GPIO SYSFS uAPI
- if [ $active_low_sysfs = "1" ]; then
- if [ $val = "0" ]; then
- val="1"
- else
- val="0"
- fi
- fi
+SYSFS=`grep -w sysfs /proc/mounts | cut -f2 -d' '`
+[ -d "$SYSFS" ] || skip "sysfs is not mounted"
- if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
- echo -n "."
- else
- echo "test fail, exit"
- die
- fi
-}
+GPIO_SYSFS="${SYSFS}/class/gpio"
+[ -d "$GPIO_SYSFS" ] || skip "CONFIG_GPIO_SYSFS is not selected"
-test_pin_logic()
-{
- nr=$1
- direction=$2
- active_low=$3
- value=$4
+PLATFORM_SYSFS=$SYSFS/devices/platform
- echo $direction > $GPIO_SYSFS/gpio$nr/direction
- echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
- if [ $direction = "out" ]; then
- echo $value > $GPIO_SYSFS/gpio$nr/value
- fi
- is_consistent $nr
-}
+sysfs_nr=
+sysfs_ldir=
-test_one_pin()
+# determine the sysfs GPIO number given the $chip and $offset
+# e.g. gpiochip1:32
+find_sysfs_nr()
{
- nr=$1
-
- echo -n "test pin<$nr>"
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test GPIO pin $nr failed"
- die
- fi
-
- #"Checking if the sysfs is consistent with debugfs: "
- is_consistent $nr
-
- #"Checking the logic of active_low: "
- test_pin_logic $nr out 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr out 0 1
- test_pin_logic $nr out 0 0
-
- #"Checking the logic of direction: "
- test_pin_logic $nr in 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr low 0 1
- test_pin_logic $nr high 0 0
-
- echo $nr > $GPIO_SYSFS/unexport
-
- echo "successful"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpiochip1
+ local platform=$(find $PLATFORM_SYSFS -mindepth 2 -maxdepth 2 -type d -name $chip)
+ [ "$platform" ] || fail "can't find platform of $chip"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpio/gpiochip508/base
+ local base=$(find ${platform%/*}/gpio/ -mindepth 2 -maxdepth 2 -type f -name base)
+ [ "$base" ] || fail "can't find base of $chip"
+ sysfs_nr=$(($(< "$base") + $offset))
+ sysfs_ldir="$GPIO_SYSFS/gpio$sysfs_nr"
}
-test_one_pin_fail()
+acquire_line()
{
- nr=$1
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test invalid pin $nr successful"
- else
- echo "test invalid pin $nr failed"
- echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
- die
- fi
+ [ "$sysfs_nr" ] && return
+ find_sysfs_nr
+ echo "$sysfs_nr" > "$GPIO_SYSFS/export"
}
-list_chip()
+# The helpers being overridden...
+get_line()
{
- echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+ [ -e "$sysfs_ldir/value" ] && echo $(< "$sysfs_ldir/value")
}
-test_chip()
+set_line()
{
- chip=$1
- name=`basename $chip`
- base=`cat $chip/base`
- ngpio=`cat $chip/ngpio`
- printf "%-10s %-5s %-5s\n" $name $base $ngpio
- if [ $ngpio = "0" ]; then
- echo "number of gpio is zero is not allowed".
- fi
- test_one_pin $base
- test_one_pin $(($base + $ngpio - 1))
- test_one_pin $((( RANDOM % $ngpio ) + $base ))
+ acquire_line
+
+ for option in $*; do
+ case $option in
+ active-high)
+ echo 0 > "$sysfs_ldir/active_low"
+ ;;
+ active-low)
+ echo 1 > "$sysfs_ldir/active_low"
+ ;;
+ input)
+ echo "in" > "$sysfs_ldir/direction"
+ ;;
+ 0)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 0 > "$sysfs_ldir/value"
+ ;;
+ 1)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 1 > "$sysfs_ldir/value"
+ ;;
+ esac
+ done
}
-test_chips_sysfs()
+release_line()
{
- gpiochip=`list_chip $module`
- if [ X"$gpiochip" = X ]; then
- if [ X"$valid" = Xfalse ]; then
- echo "successful"
- else
- echo "fail"
- die
- fi
- else
- for chip in $gpiochip; do
- test_chip $chip
- done
- fi
+ [ "$sysfs_nr" ] || return 0
+ echo "$sysfs_nr" > "$GPIO_SYSFS/unexport"
+ sysfs_nr=
+ sysfs_ldir=
}
-
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 7f35b9880485..fc2dd4c24d06 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -1,72 +1,57 @@
-#!/bin/bash
+#!/bin/bash -efu
# SPDX-License-Identifier: GPL-2.0
#exit status
-#1: Internal error
-#2: sysfs/debugfs not mount
-#3: insert module fail when gpio-mockup is a module.
-#4: Skip test including run as non-root user.
-#5: other reason.
-
-SYSFS=
-GPIO_SYSFS=
-GPIO_DRV_SYSFS=
+#0: success
+#1: fail
+#4: skip test - including run as non-root user
+
+BASE=${0%/*}
DEBUGFS=
GPIO_DEBUGFS=
-dev_type=
-module=
+dev_type="cdev"
+module="gpio-mockup"
+verbose=
+full_test=
+random=
+uapi_opt=
+active_opt=
+bias_opt=
+line_set_pid=
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest return codes
+ksft_fail=1
ksft_skip=4
usage()
{
echo "Usage:"
- echo "$0 [-f] [-m name] [-t type]"
- echo "-f: full test. It maybe conflict with existence gpio device."
- echo "-m: module name, default name is gpio-mockup. It could also test"
- echo " other gpio device."
- echo "-t: interface type: chardev(char device) and sysfs(being"
- echo " deprecated). The first one is default"
- echo ""
- echo "$0 -h"
- echo "This usage"
+ echo "$0 [-frv] [-t type]"
+ echo "-f: full test (minimal set run by default)"
+ echo "-r: test random lines as well as fence posts"
+ echo "-t: interface type:"
+ echo " cdev (character device ABI) - default"
+ echo " cdev_v1 (deprecated character device ABI)"
+ echo " sysfs (deprecated SYSFS ABI)"
+ echo "-v: verbose progress reporting"
+ exit $ksft_fail
}
-prerequisite()
+skip()
{
- msg="skip all tests:"
- if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
- fi
- SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
- fi
- GPIO_SYSFS=`echo $SYSFS/class/gpio`
- GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
- DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$DEBUGFS" ]; then
- echo $msg debugfs is not mounted >&2
- exit 2
- fi
- GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
- source gpio-mockup-sysfs.sh
+ echo "$*" >&2
+ echo "GPIO $module test SKIP"
+ exit $ksft_skip
}
-try_insert_module()
+prerequisite()
{
- if [ -d "$GPIO_DRV_SYSFS" ]; then
- echo "$GPIO_DRV_SYSFS exist. Skip insert module"
- else
- modprobe -q $module $1
- if [ X$? != X0 ]; then
- echo $msg insmod $module failed >&2
- exit 3
- fi
- fi
+ [ $(id -u) -eq 0 ] || skip "must be run as root"
+
+ DEBUGFS=$(grep -w debugfs /proc/mounts | cut -f2 -d' ')
+ [ -d "$DEBUGFS" ] || skip "debugfs is not mounted"
+
+ GPIO_DEBUGFS=$DEBUGFS/$module
}
remove_module()
@@ -74,133 +59,342 @@ remove_module()
modprobe -r -q $module
}
-die()
+cleanup()
{
+ set +e
+ release_line
remove_module
- exit 5
+ jobs -p | xargs -r kill > /dev/null 2>&1
}
-test_chips()
+fail()
{
- if [ X$dev_type = Xsysfs ]; then
- echo "WARNING: sysfs ABI of gpio is going to deprecated."
- test_chips_sysfs $*
- else
- $BASE/gpio-mockup-chardev $*
- fi
+ echo "test failed: $*" >&2
+ echo "GPIO $module test FAIL"
+ exit $ksft_fail
+}
+
+try_insert_module()
+{
+ modprobe -q $module "$1" || fail "insert $module failed with error $?"
+}
+
+log()
+{
+ [ -z "$verbose" ] || echo "$*"
}
-gpio_test()
+# The following line helpers, release_Line, get_line and set_line, all
+# make use of the global $chip and $offset variables.
+#
+# This implementation drives the GPIO character device (cdev) uAPI.
+# Other implementations may override these to test different uAPIs.
+
+# Release any resources related to the line
+release_line()
{
- param=$1
- valid=$2
+ [ "$line_set_pid" ] && kill $line_set_pid && wait $line_set_pid || true
+ line_set_pid=
+}
- if [ X"$param" = X ]; then
- die
+# Read the current value of the line
+get_line()
+{
+ release_line
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset
+ echo $?
+}
+
+# Set the state of the line
+#
+# Changes to line configuration are provided as parameters.
+# The line is assumed to be an output if the line value 0 or 1 is
+# specified, else an input.
+set_line()
+{
+ local val=
+
+ release_line
+
+ # parse config options...
+ for option in $*; do
+ case $option in
+ active-low)
+ active_opt="-l "
+ ;;
+ active-high)
+ active_opt=
+ ;;
+ bias-none)
+ bias_opt=
+ ;;
+ pull-down)
+ bias_opt="-bpull-down "
+ ;;
+ pull-up)
+ bias_opt="-bpull-up "
+ ;;
+ 0)
+ val=0
+ ;;
+ 1)
+ val=1
+ ;;
+ esac
+ done
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ if [ "$val" ]; then
+ $BASE/gpio-mockup-cdev $cdev_opts -s$val /dev/$chip $offset &
+ # failure to set is detected by reading mockup and toggling values
+ line_set_pid=$!
+ # allow for gpio-mockup-cdev to launch and request line
+ # (there is limited value in checking if line has been requested)
+ sleep 0.01
+ elif [ "$bias_opt" ]; then
+ cdev_opts=${cdev_opts}${bias_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset || true
fi
- try_insert_module "gpio_mockup_ranges=$param"
- echo -n "GPIO $module test with ranges: <"
- echo "$param>: "
- printf "%-10s %s\n" $param
- test_chips $module $valid
- remove_module
}
-BASE=`dirname $0`
+assert_line()
+{
+ local val
+ # don't need any retry here as set_mock allows for propagation
+ val=$(get_line)
+ [ "$val" = "$1" ] || fail "line value is ${val:-empty} when $1 was expected"
+}
+
+# The following mockup helpers all make use of the $mock_line
+assert_mock()
+{
+ local backoff_wait=10
+ local retry=0
+ local val
+ # retry allows for set propagation from uAPI to mockup
+ while true; do
+ val=$(< $mock_line)
+ [ "$val" = "$1" ] && break
+ retry=$((retry + 1))
+ [ $retry -lt 5 ] || fail "mockup $mock_line value ${val:-empty} when $1 expected"
+ sleep $(printf "%0.2f" $((backoff_wait))e-3)
+ backoff_wait=$((backoff_wait * 2))
+ done
+}
+
+set_mock()
+{
+ echo "$1" > $mock_line
+ # allow for set propagation - so we won't be in a race with set_line
+ assert_mock "$1"
+}
-dev_type=
-TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+# test the functionality of a line
+#
+# The line is set from the mockup side and is read from the userspace side
+# (input), and is set from the userspace side and is read from the mockup side
+# (output).
+#
+# Setting the mockup pull using the userspace interface bias settings is
+# tested where supported by the userspace interface (cdev).
+test_line()
+{
+ chip=$1
+ offset=$2
+ log "test_line $chip $offset"
+ mock_line=$GPIO_DEBUGFS/$chip/$offset
+ [ -e "$mock_line" ] || fail "missing line $chip:$offset"
-if [ "$?" != "0" ]; then
- echo "Parameter process failed, Terminating..." >&2
- exit 1
-fi
+ # test input active-high
+ set_mock 1
+ set_line input active-high
+ assert_line 1
+ set_mock 0
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ if [ "$full_test" ]; then
+ if [ "$dev_type" != "sysfs" ]; then
+ # test pulls
+ set_mock 0
+ set_line input pull-up
+ assert_line 1
+ set_mock 0
+ assert_line 0
+
+ set_mock 1
+ set_line input pull-down
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ set_line bias-none
+ fi
+
+ # test input active-low
+ set_mock 0
+ set_line active-low
+ assert_line 1
+ set_mock 1
+ assert_line 0
+ set_mock 0
+ assert_line 1
+
+ # test output active-high
+ set_mock 1
+ set_line active-high 0
+ assert_mock 0
+ set_line 1
+ assert_mock 1
+ set_line 0
+ assert_mock 0
+ fi
+
+ # test output active-low
+ set_mock 0
+ set_line active-low 0
+ assert_mock 1
+ set_line 1
+ assert_mock 0
+ set_line 0
+ assert_mock 1
+
+ release_line
+}
+
+test_no_line()
+{
+ log test_no_line "$*"
+ [ ! -e "$GPIO_DEBUGFS/$1/$2" ] || fail "unexpected line $1:$2"
+}
-# Note the quotes around `$TEMP': they are essential!
-eval set -- "$TEMP"
+# Load the module and check that the expected number of gpiochips, with the
+# expected number of lines, are created and are functional.
+#
+# $1 is the gpio_mockup_ranges parameter for the module
+# The remaining parameters are the number of lines, n, expected for each of
+# the gpiochips expected to be created.
+#
+# For each gpiochip the fence post lines, 0 and n-1, are tested, and the
+# line on the far side of the fence post, n, is tested to not exist.
+#
+# If the $random flag is set then a random line in the middle of the
+# gpiochip is tested as well.
+insmod_test()
+{
+ local ranges=
+ local gc=
+ local width=
-while true; do
- case $1 in
- -f)
+ [ "${1:-}" ] || fail "missing ranges"
+ ranges=$1 ; shift
+ try_insert_module "gpio_mockup_ranges=$ranges"
+ log "GPIO $module test with ranges: <$ranges>:"
+ # e.g. /sys/kernel/debug/gpio-mockup/gpiochip1
+ gpiochip=$(find "$DEBUGFS/$module/" -name gpiochip* -type d | sort)
+ for chip in $gpiochip; do
+ gc=${chip##*/}
+ [ "${1:-}" ] || fail "unexpected chip - $gc"
+ width=$1 ; shift
+ test_line $gc 0
+ if [ "$random" -a $width -gt 2 ]; then
+ test_line $gc $((RANDOM % ($width - 2) + 1))
+ fi
+ test_line $gc $(($width - 1))
+ test_no_line $gc $width
+ done
+ [ "${1:-}" ] && fail "missing expected chip of width $1"
+ remove_module || fail "failed to remove module with error $?"
+}
+
+while getopts ":frvt:" opt; do
+ case $opt in
+ f)
full_test=true
- shift
- ;;
- -h)
- usage
- exit
;;
- -m)
- module=$2
- shift 2
+ r)
+ random=true
;;
- -t)
- dev_type=$2
- shift 2
+ t)
+ dev_type=$OPTARG
;;
- --)
- shift
- break
+ v)
+ verbose=true
;;
*)
- echo "Internal error!"
- exit 1
+ usage
;;
esac
done
+shift $((OPTIND - 1))
-if [ X"$module" = X ]; then
- module="gpio-mockup"
-fi
-
-if [ X$dev_type != Xsysfs ]; then
- dev_type="chardev"
-fi
+[ "${1:-}" ] && fail "unknown argument '$1'"
prerequisite
-echo "1. Test dynamic allocation of gpio successful means insert gpiochip and"
-echo " manipulate gpio pin successful"
-gpio_test "-1,32" true
-gpio_test "-1,32,-1,32" true
-gpio_test "-1,32,-1,32,-1,32" true
-if [ X$full_test = Xtrue ]; then
- gpio_test "-1,32,32,64" true
- gpio_test "-1,32,40,64,-1,5" true
- gpio_test "-1,32,32,64,-1,32" true
- gpio_test "0,32,32,64,-1,32,-1,32" true
- gpio_test "-1,32,-1,32,0,32,32,64" true
- echo "2. Do basic test: successful means insert gpiochip and"
- echo " manipulate gpio pin successful"
- gpio_test "0,32" true
- gpio_test "0,32,32,64" true
- gpio_test "0,32,40,64,64,96" true
+trap 'exit $ksft_fail' SIGTERM SIGINT
+trap cleanup EXIT
+
+case "$dev_type" in
+sysfs)
+ source $BASE/gpio-mockup-sysfs.sh
+ echo "WARNING: gpio sysfs ABI is deprecated."
+ ;;
+cdev_v1)
+ echo "WARNING: gpio cdev ABI v1 is deprecated."
+ uapi_opt="-u1 "
+ ;;
+cdev)
+ ;;
+*)
+ fail "unknown interface type: $dev_type"
+ ;;
+esac
+
+remove_module || fail "can't remove existing $module module"
+
+# manual gpio allocation tests fail if a physical chip already exists
+[ "$full_test" -a -e "/dev/gpiochip0" ] && skip "full tests conflict with gpiochip0"
+
+echo "1. Module load tests"
+echo "1.1. dynamic allocation of gpio"
+insmod_test "-1,32" 32
+insmod_test "-1,23,-1,32" 23 32
+insmod_test "-1,23,-1,26,-1,32" 23 26 32
+if [ "$full_test" ]; then
+ echo "1.2. manual allocation of gpio"
+ insmod_test "0,32" 32
+ insmod_test "0,32,32,60" 32 28
+ insmod_test "0,32,40,64,64,96" 32 24 32
+ echo "1.3. dynamic and manual allocation of gpio"
+ insmod_test "-1,32,32,62" 32 30
+ insmod_test "-1,22,-1,23,0,24,32,64" 22 23 24 32
+ insmod_test "-1,32,32,60,-1,29" 32 28 29
+ insmod_test "-1,32,40,64,-1,5" 32 24 5
+ insmod_test "0,32,32,44,-1,22,-1,31" 32 12 22 31
fi
-echo "3. Error test: successful means insert gpiochip failed"
-echo "3.1 Test number of gpio overflow"
-#Currently: The max number of gpio(1024) is defined in arm architecture.
-gpio_test "-1,32,-1,1024" false
-if [ X$full_test = Xtrue ]; then
- echo "3.2 Test zero line of gpio"
- gpio_test "0,0" false
- echo "3.3 Test range overlap"
- echo "3.3.1 Test corner case"
- gpio_test "0,32,0,1" false
- gpio_test "0,32,32,64,32,40" false
- gpio_test "0,32,35,64,35,45" false
- gpio_test "0,32,31,32" false
- gpio_test "0,32,32,64,36,37" false
- gpio_test "0,32,35,64,34,36" false
- echo "3.3.2 Test inserting invalid second gpiochip"
- gpio_test "0,32,30,35" false
- gpio_test "0,32,1,5" false
- gpio_test "10,32,9,14" false
- gpio_test "10,32,30,35" false
- echo "3.3.3 Test others"
- gpio_test "0,32,40,56,39,45" false
- gpio_test "0,32,40,56,30,33" false
- gpio_test "0,32,40,56,30,41" false
- gpio_test "0,32,40,56,20,21" false
+echo "2. Module load error tests"
+echo "2.1 no lines defined"
+insmod_test "0,0"
+if [ "$full_test" ]; then
+ echo "2.2 ignore range overlap"
+ insmod_test "0,32,0,1" 32
+ insmod_test "0,32,1,5" 32
+ insmod_test "0,32,30,35" 32
+ insmod_test "0,32,31,32" 32
+ insmod_test "10,32,30,35" 22
+ insmod_test "10,32,9,14" 22
+ insmod_test "0,32,20,21,40,56" 32 16
+ insmod_test "0,32,32,64,32,40" 32 32
+ insmod_test "0,32,32,64,36,37" 32 32
+ insmod_test "0,32,35,64,34,36" 32 29
+ insmod_test "0,30,35,64,35,45" 30 29
+ insmod_test "0,32,40,56,30,33" 32 16
+ insmod_test "0,32,40,56,30,41" 32 16
+ insmod_test "0,32,40,56,39,45" 32 16
fi
-echo GPIO test PASS
-
+echo "GPIO $module test PASS"
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
new file mode 100755
index 000000000000..bbc29ed9c60a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -0,0 +1,418 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+
+BASE_DIR=`dirname $0`
+CONFIGFS_DIR="/sys/kernel/config/gpio-sim"
+MODULE="gpio-sim"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+remove_chip() {
+ local CHIP=$1
+
+ for FILE in $CONFIGFS_DIR/$CHIP/*; do
+ BANK=`basename $FILE`
+ if [ "$BANK" = "live" -o "$BANK" = "dev_name" ]; then
+ continue
+ fi
+
+ LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | grep -E ^line`
+ if [ "$?" = 0 ]; then
+ for LINE in $LINES; do
+ if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog || \
+ fail "Unable to remove the hog"
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE || \
+ fail "Unable to remove the line"
+ done
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK
+ done
+
+ rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
+}
+
+create_chip() {
+ local CHIP=$1
+
+ mkdir $CONFIGFS_DIR/$CHIP
+}
+
+create_bank() {
+ local CHIP=$1
+ local BANK=$2
+
+ mkdir $CONFIGFS_DIR/$CHIP/$BANK
+}
+
+set_label() {
+ local CHIP=$1
+ local BANK=$2
+ local LABEL=$3
+
+ echo $LABEL > $CONFIGFS_DIR/$CHIP/$BANK/label || fail "Unable to set the chip label"
+}
+
+set_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+ local NUM_LINES=$3
+
+ echo $NUM_LINES > $CONFIGFS_DIR/$CHIP/$BANK/num_lines || \
+ fail "Unable to set the number of lines"
+}
+
+set_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+ local NAME=$4
+ local LINE_DIR=$CONFIGFS_DIR/$CHIP/$BANK/line$OFFSET
+
+ test -d $LINE_DIR || mkdir $LINE_DIR
+ echo $NAME > $LINE_DIR/name || fail "Unable to set the line name"
+}
+
+enable_chip() {
+ local CHIP=$1
+
+ echo 1 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to enable the chip"
+}
+
+disable_chip() {
+ local CHIP=$1
+
+ echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
+}
+
+configfs_cleanup() {
+ for CHIP in `ls $CONFIGFS_DIR/`; do
+ disable_chip $CHIP
+ remove_chip $CHIP
+ done
+}
+
+configfs_chip_name() {
+ local CHIP=$1
+ local BANK=$2
+
+ cat $CONFIGFS_DIR/$CHIP/$BANK/chip_name 2> /dev/null || \
+ fail "unable to read the chip name from configfs"
+}
+
+configfs_dev_name() {
+ local CHIP=$1
+
+ cat $CONFIGFS_DIR/$CHIP/dev_name 2> /dev/null || \
+ fail "unable to read the device name from configfs"
+}
+
+get_chip_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` num-lines || \
+ fail "unable to read the number of lines from the character device"
+}
+
+get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` label || \
+ fail "unable to read the chip label from the character device"
+}
+
+get_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+
+ $BASE_DIR/gpio-line-name /dev/`configfs_chip_name $CHIP $BANK` $OFFSET || \
+ fail "unable to read the line name from the character device"
+}
+
+sysfs_set_pull() {
+ local DEV=$1
+ local BANK=$2
+ local OFFSET=$3
+ local PULL=$4
+ local DEVNAME=`configfs_dev_name $DEV`
+ local CHIPNAME=`configfs_chip_name $DEV $BANK`
+ local SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull"
+
+ echo $PULL > $SYSFS_PATH || fail "Unable to set line pull in sysfs"
+}
+
+# Load the gpio-sim module. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in `seq 5`; do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+# If the module was already loaded: remove all previous chips
+configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap configfs_cleanup EXIT
+
+echo "1. chip_name and dev_name attributes"
+
+echo "1.1. Chip name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+disable_chip chip
+remove_chip chip
+
+echo "1.2. chip_name returns 'none' if the chip is still pending"
+create_chip chip
+create_bank chip bank
+test "`cat $CONFIGFS_DIR/chip/bank/chip_name`" = "none" || \
+ fail "chip_name doesn't return 'none' for a pending chip"
+remove_chip chip
+
+echo "1.3. Device name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+disable_chip chip
+remove_chip chip
+
+echo "2. Creating and configuring simulated chips"
+
+echo "2.1. Default number of lines is 1"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+disable_chip chip
+remove_chip chip
+
+echo "2.2. Number of lines can be specified"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+disable_chip chip
+remove_chip chip
+
+echo "2.3. Label can be set"
+create_chip chip
+create_bank chip bank
+set_label chip bank foobar
+enable_chip chip
+test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+disable_chip chip
+remove_chip chip
+
+echo "2.4. Label can be left empty"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+disable_chip chip
+remove_chip chip
+
+echo "2.5. Line names can be configured"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+set_line_name chip bank 0 foo
+set_line_name chip bank 2 bar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
+test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+disable_chip chip
+remove_chip chip
+
+echo "2.6. Line config can remain unused if offset is greater than number of lines"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 2
+set_line_name chip bank 5 foobar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
+test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+disable_chip chip
+remove_chip chip
+
+echo "2.7. Line configfs directory names are sanitized"
+create_chip chip
+create_bank chip bank
+mkdir $CONFIGFS_DIR/chip/bank/line12foobar 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+mkdir $CONFIGFS_DIR/chip/bank/line_no_offset 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+remove_chip chip
+
+echo "2.8. Multiple chips can be created"
+CHIPS="chip0 chip1 chip2"
+for CHIP in $CHIPS; do
+ create_chip $CHIP
+ create_bank $CHIP bank
+ enable_chip $CHIP
+done
+for CHIP in $CHIPS; do
+ disable_chip $CHIP
+ remove_chip $CHIP
+done
+
+echo "2.9. Can't modify settings when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
+ fail "Setting label of a live chip should fail"
+echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
+ fail "Setting number of lines of a live chip should fail"
+disable_chip chip
+remove_chip chip
+
+echo "2.10. Can't create line items when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+disable_chip chip
+remove_chip chip
+
+echo "2.11. Probe errors are propagated to user-space"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 99999
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Probe error was not propagated"
+remove_chip chip
+
+echo "2.12. Cannot enable a chip without any GPIO banks"
+create_chip chip
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Chip enabled without any GPIO banks"
+remove_chip chip
+
+echo "2.13. Duplicate chip labels are not allowed"
+create_chip chip
+create_bank chip bank0
+set_label chip bank0 foobar
+create_bank chip bank1
+set_label chip bank1 foobar
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Duplicate chip labels were not rejected"
+remove_chip chip
+
+echo "2.14. Lines can be hogged"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
+enable_chip chip
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
+ fail "Setting the value of a hogged line shouldn't succeed"
+disable_chip chip
+remove_chip chip
+
+echo "3. Controlling simulated chips"
+
+echo "3.1. Pull can be set over sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+sysfs_set_pull chip bank 0 pull-up
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 0
+test "$?" = "1" || fail "pull set incorrectly"
+sysfs_set_pull chip bank 0 pull-down
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
+test "$?" = "0" || fail "pull set incorrectly"
+disable_chip chip
+remove_chip chip
+
+echo "3.2. Pull can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
+test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
+sysfs_set_pull chip bank 0 pull-up
+test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+disable_chip chip
+remove_chip chip
+
+echo "3.3. Incorrect input in sysfs is rejected"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
+echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+disable_chip chip
+remove_chip chip
+
+echo "3.4. Can't write to value"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+disable_chip chip
+remove_chip chip
+
+echo "4. Simulated GPIO chips are functional"
+
+echo "4.1. Values can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+test `cat $SYSFS_PATH` = "0" || fail "incorrect value read from sysfs"
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
+sleep 0.1 # FIXME Any better way?
+test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
+kill $!
+disable_chip chip
+remove_chip chip
+
+echo "4.2. Bias settings work correctly"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
+test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+disable_chip chip
+remove_chip chip
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore
new file mode 100644
index 000000000000..933f483815b2
--- /dev/null
+++ b/tools/testing/selftests/hid/.gitignore
@@ -0,0 +1,7 @@
+bpftool
+*.skel.h
+/host-tools
+/tools
+hid_bpf
+hidraw
+results
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
new file mode 100644
index 000000000000..2839d2612ce3
--- /dev/null
+++ b/tools/testing/selftests/hid/Makefile
@@ -0,0 +1,242 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# based on tools/testing/selftest/bpf/Makefile
+include ../../../build/Build.include
+include ../../../scripts/Makefile.arch
+include ../../../scripts/Makefile.include
+
+TEST_PROGS := hid-core.sh
+TEST_PROGS += hid-apple.sh
+TEST_PROGS += hid-gamepad.sh
+TEST_PROGS += hid-ite.sh
+TEST_PROGS += hid-keyboard.sh
+TEST_PROGS += hid-mouse.sh
+TEST_PROGS += hid-multitouch.sh
+TEST_PROGS += hid-sony.sh
+TEST_PROGS += hid-tablet.sh
+TEST_PROGS += hid-usb_crash.sh
+TEST_PROGS += hid-wacom.sh
+
+TEST_FILES := run-hid-tools-tests.sh
+TEST_FILES += tests
+
+CXX ?= $(CROSS_COMPILE)g++
+
+HOSTPKG_CONFIG := pkg-config
+
+CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(OUTPUT)
+CFLAGS += -I$(OUTPUT)/tools/include
+
+LDLIBS += -lelf -lz -lrt -lpthread
+
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
+# Order correspond to 'make run_tests' order
+TEST_GEN_PROGS = hid_bpf hidraw
+
+# Emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+msg =
+else
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
+endif
+
+# override lib.mk's default rules
+OVERRIDE_TARGETS := 1
+override define CLEAN
+ $(call msg,CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS)
+ $(Q)$(RM) -r $(EXTRA_CLEAN)
+endef
+
+include ../lib.mk
+
+TOOLSDIR := $(top_srcdir)/tools
+LIBDIR := $(TOOLSDIR)/lib
+BPFDIR := $(LIBDIR)/bpf
+TOOLSINCDIR := $(TOOLSDIR)/include
+BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+INCLUDE_DIR := $(SCRATCH_DIR)/include
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+ifneq ($(CROSS_COMPILE),)
+HOST_BUILD_DIR := $(BUILD_DIR)/host
+HOST_SCRATCH_DIR := $(OUTPUT)/host-tools
+HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include
+else
+HOST_BUILD_DIR := $(BUILD_DIR)
+HOST_SCRATCH_DIR := $(SCRATCH_DIR)
+HOST_INCLUDE_DIR := $(INCLUDE_DIR)
+endif
+HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
+
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
+
+# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# to build individual tests.
+# NOTE: Semicolon at the end is critical to override lib.mk's default static
+# rule for binaries.
+$(notdir $(TEST_GEN_PROGS)): %: $(OUTPUT)/% ;
+
+# sort removes libbpf duplicates when not cross-building
+MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
+ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \
+ $(INCLUDE_DIR))
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),x86)
+LLD := lld
+else
+LLD := ld
+endif
+
+DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
+
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
+
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0' \
+ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
+ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ | $(BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ | $(HOST_BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
+ EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
+endif
+
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
+ $(call msg,GEN,,$@)
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
+ $(TOOLSDIR)/bpf/resolve_btfids/main.c \
+ $(TOOLSDIR)/lib/rbtree.c \
+ $(TOOLSDIR)/lib/zalloc.c \
+ $(TOOLSDIR)/lib/string.c \
+ $(TOOLSDIR)/lib/ctype.c \
+ $(TOOLSDIR)/lib/str_error_r.c
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
+ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \
+ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
+ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+# Determine target endianness.
+IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
+MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+ -I$(INCLUDE_DIR)
+
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+# Build BPF object using Clang
+# $1 - input .c file
+# $2 - output .o file
+# $3 - CFLAGS
+define CLANG_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
+define CLANG_NOALU32_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
+endef
+# Build BPF object using GCC
+define GCC_BPF_BUILD_RULE
+ $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
+endef
+
+BPF_PROGS_DIR := progs
+BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
+BPF_SRCS := $(notdir $(wildcard $(BPF_PROGS_DIR)/*.c))
+BPF_OBJS := $(patsubst %.c,$(OUTPUT)/%.bpf.o, $(BPF_SRCS))
+BPF_SKELS := $(patsubst %.c,$(OUTPUT)/%.skel.h, $(BPF_SRCS))
+TEST_GEN_FILES += $(BPF_OBJS)
+
+$(BPF_PROGS_DIR)-bpfobjs := y
+$(BPF_OBJS): $(OUTPUT)/%.bpf.o: \
+ $(BPF_PROGS_DIR)/%.c \
+ $(wildcard $(BPF_PROGS_DIR)/*.h) \
+ $(INCLUDE_DIR)/vmlinux.h \
+ $(wildcard $(BPFDIR)/hid_bpf_*.h) \
+ $(wildcard $(BPFDIR)/*.bpf.h) \
+ | $(OUTPUT) $(BPFOBJ)
+ $(call $(BPF_BUILD_RULE),$<,$@, $(BPF_CFLAGS))
+
+$(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT)
+ $(call msg,GEN-SKEL,$(BINARY),$@)
+ $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
+ $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@
+
+$(OUTPUT)/%.o: %.c $(BPF_SKELS) hid_common.h
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/%: $(OUTPUT)/%.o
+ $(call msg,BINARY,,$@)
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) feature bpftool \
+ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32)
diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config
new file mode 100644
index 000000000000..1758b055f295
--- /dev/null
+++ b/tools/testing/selftests/hid/config
@@ -0,0 +1,32 @@
+CONFIG_BPF_EVENTS=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_BPF_LSM=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_FPROBE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_HIDRAW=y
+CONFIG_HID=y
+CONFIG_HID_BPF=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_UHID=y
+CONFIG_LEDS_CLASS_MULTICOLOR=y
+CONFIG_USB=y
+CONFIG_USB_HID=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_ITE=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_PLAYSTATION=y
+CONFIG_PLAYSTATION_FF=y
+CONFIG_HID_SONY=y
+CONFIG_SONY_FF=y
+CONFIG_HID_WACOM=y
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
new file mode 100644
index 000000000000..38c51158adf8
--- /dev/null
+++ b/tools/testing/selftests/hid/config.common
@@ -0,0 +1,241 @@
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_9P_FS=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BONDING=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_WRITEBACK=y
+CONFIG_CMA_AREAS=7
+CONFIG_CMA=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPUSETS=y
+CONFIG_CRYPTO_BLAKE2B=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_DCB=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEFAULT_FQ_CODEL=y
+CONFIG_DEFAULT_RENO=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_EFI_STUB=y
+CONFIG_EFI=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VESA=y
+CONFIG_FB=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_MINI_4x6=y
+CONFIG_FONTS=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_PHY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPET=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HWPOISON_INJECT=y
+CONFIG_HZ_1000=y
+CONFIG_INET=y
+CONFIG_INTEL_POWERCLAMP=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_NAT=y
+CONFIG_IP6_NF_TARGET_MASQUERADE=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IRQ_POLL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_KEXEC=y
+CONFIG_KPROBES=y
+CONFIG_KSM=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_LOG_BUF_SHIFT=21
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=0
+CONFIG_LOGO=y
+CONFIG_LSM="selinux,bpf,integrity"
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MCORE2=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_MODULES=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_EMATCH=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_NAT=y
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NETLABEL=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=128
+CONFIG_NUMA_BALANCING=y
+CONFIG_NUMA=y
+CONFIG_NVMEM=y
+CONFIG_OSF_PARTITION=y
+CONFIG_OVERLAY_FS_INDEX=y
+CONFIG_OVERLAY_FS_METACOPY=y
+CONFIG_OVERLAY_FS_XINO_AUTO=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_PREEMPT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SMP=y
+CONFIG_SOCK_CGROUP_DATA=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYNC_FILE=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_UNIX=y
+CONFIG_USER_NS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_VETH=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZEROPLUS_FF=y
+CONFIG_KASAN=y
diff --git a/tools/testing/selftests/hid/config.x86_64 b/tools/testing/selftests/hid/config.x86_64
new file mode 100644
index 000000000000..a8721f403c21
--- /dev/null
+++ b/tools/testing/selftests/hid/config.x86_64
@@ -0,0 +1,4 @@
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_MSR=y
+CONFIG_X86_POWERNOW_K8=y
diff --git a/tools/testing/selftests/hid/hid-apple.sh b/tools/testing/selftests/hid/hid-apple.sh
new file mode 100755
index 000000000000..656f2d5ae5a9
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-apple.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_apple_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-core.sh b/tools/testing/selftests/hid/hid-core.sh
new file mode 100755
index 000000000000..5bbabc12c34f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-core.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_hid_core.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-gamepad.sh b/tools/testing/selftests/hid/hid-gamepad.sh
new file mode 100755
index 000000000000..1ba00c0ca95f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-gamepad.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_gamepad.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-ite.sh b/tools/testing/selftests/hid/hid-ite.sh
new file mode 100755
index 000000000000..52c5ccf42292
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-ite.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_ite_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-keyboard.sh b/tools/testing/selftests/hid/hid-keyboard.sh
new file mode 100755
index 000000000000..55368f17d1d5
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-keyboard.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-mouse.sh b/tools/testing/selftests/hid/hid-mouse.sh
new file mode 100755
index 000000000000..7b4ad4f646f7
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-mouse.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_mouse.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-multitouch.sh b/tools/testing/selftests/hid/hid-multitouch.sh
new file mode 100755
index 000000000000..d03a1ddbfb1f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-multitouch.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_multitouch.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-sony.sh b/tools/testing/selftests/hid/hid-sony.sh
new file mode 100755
index 000000000000..c863c442686e
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-sony.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_sony.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-tablet.sh b/tools/testing/selftests/hid/hid-tablet.sh
new file mode 100755
index 000000000000..e86b3fedafd9
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-tablet.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_tablet.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-usb_crash.sh b/tools/testing/selftests/hid/hid-usb_crash.sh
new file mode 100755
index 000000000000..3f0debe7e8fd
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-usb_crash.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_usb_crash.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-wacom.sh b/tools/testing/selftests/hid/hid-wacom.sh
new file mode 100755
index 000000000000..1630c22726d2
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-wacom.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_wacom_generic.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
new file mode 100644
index 000000000000..1e979fb3542b
--- /dev/null
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022-2024 Red Hat */
+#include "hid.skel.h"
+#include "hid_common.h"
+#include <bpf/bpf.h>
+
+struct hid_hw_request_syscall_args {
+ __u8 data[10];
+ unsigned int hid;
+ int retval;
+ size_t size;
+ enum hid_report_type type;
+ __u8 request_type;
+};
+
+FIXTURE(hid_bpf) {
+ struct uhid_device hid;
+ int hidraw_fd;
+ struct hid *skel;
+ struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */
+};
+static void detach_bpf(FIXTURE_DATA(hid_bpf) * self)
+{
+ int i;
+
+ if (self->hidraw_fd)
+ close(self->hidraw_fd);
+ self->hidraw_fd = 0;
+
+ if (!self->skel)
+ return;
+
+ hid__detach(self->skel);
+
+ for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) {
+ if (self->hid_links[i])
+ bpf_link__destroy(self->hid_links[i]);
+ }
+
+ hid__destroy(self->skel);
+ self->skel = NULL;
+}
+
+FIXTURE_TEARDOWN(hid_bpf) {
+ void *uhid_err;
+
+ uhid_destroy(_metadata, &self->hid);
+
+ detach_bpf(self);
+ pthread_join(self->hid.tid, &uhid_err);
+}
+#define TEARDOWN_LOG(fmt, ...) do { \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+ hid_bpf_teardown(_metadata, self, variant); \
+} while (0)
+
+FIXTURE_SETUP(hid_bpf)
+{
+ int err;
+
+ err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a36, rdesc, sizeof(rdesc));
+ ASSERT_OK(err);
+}
+
+struct test_program {
+ const char *name;
+ int insert_head;
+};
+#define LOAD_PROGRAMS(progs) \
+ load_programs(progs, ARRAY_SIZE(progs), _metadata, self, variant)
+#define LOAD_BPF \
+ load_programs(NULL, 0, _metadata, self, variant)
+static void load_programs(const struct test_program programs[],
+ const size_t progs_count,
+ struct __test_metadata *_metadata,
+ FIXTURE_DATA(hid_bpf) * self,
+ const FIXTURE_VARIANT(hid_bpf) * variant)
+{
+ struct bpf_map *iter_map;
+ int err = -EINVAL;
+
+ ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links))
+ TH_LOG("too many programs are to be loaded");
+
+ /* open the bpf file */
+ self->skel = hid__open();
+ ASSERT_OK_PTR(self->skel) TEARDOWN_LOG("Error while calling hid__open");
+
+ for (int i = 0; i < progs_count; i++) {
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int *ops_hid_id;
+
+ prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj,
+ programs[i].name);
+ ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name);
+
+ bpf_program__set_autoload(prog, true);
+
+ map = bpf_object__find_map_by_name(*self->skel->skeleton->obj,
+ programs[i].name + 4);
+ ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'",
+ programs[i].name + 4);
+
+ /* hid_id is the first field of struct hid_bpf_ops */
+ ops_hid_id = bpf_map__initial_value(map, NULL);
+ ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data");
+
+ *ops_hid_id = self->hid.hid_id;
+ }
+
+ /* we disable the auto-attach feature of all maps because we
+ * only want the tested one to be manually attached in the next
+ * call to bpf_map__attach_struct_ops()
+ */
+ bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj)
+ bpf_map__set_autoattach(iter_map, false);
+
+ err = hid__load(self->skel);
+ ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err);
+
+ for (int i = 0; i < progs_count; i++) {
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(*self->skel->skeleton->obj,
+ programs[i].name + 4);
+ ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'",
+ programs[i].name + 4);
+
+ self->hid_links[i] = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(self->hid_links[i]) TH_LOG("failed to attach struct ops '%s'",
+ programs[i].name + 4);
+ }
+
+ hid__attach(self->skel);
+
+ self->hidraw_fd = open_hidraw(&self->hid);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+}
+
+/*
+ * A simple test to see if the fixture is working fine.
+ * If this fails, none of the other tests will pass.
+ */
+TEST_F(hid_bpf, test_create_uhid)
+{
+}
+
+/*
+ * Attach hid_first_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, raw_event)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* check that the program is correctly loaded */
+ ASSERT_EQ(self->skel->data->callback_check, 52) TH_LOG("callback_check1");
+ ASSERT_EQ(self->skel->data->callback2_check, 52) TH_LOG("callback2_check1");
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* check that hid_first_event() was executed */
+ ASSERT_EQ(self->skel->data->callback_check, 42) TH_LOG("callback_check1");
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* check that hid_first_event() was executed */
+ ASSERT_EQ(self->skel->data->callback_check, 47) TH_LOG("callback_check1");
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[2], 52);
+}
+
+/*
+ * Attach hid_first_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, subprog_raw_event)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_subprog_first_event" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[2], 52);
+}
+
+/*
+ * Attach hid_first_event to the given uhid device,
+ * attempt at re-attaching it, we should not lock and
+ * return an invalid struct bpf_link
+ */
+TEST_F(hid_bpf, multiple_attach)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ };
+ struct bpf_link *link;
+
+ LOAD_PROGRAMS(progs);
+
+ link = bpf_map__attach_struct_ops(self->skel->maps.first_event);
+ ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops");
+}
+
+/*
+ * Ensures that we can attach/detach programs
+ */
+TEST_F(hid_bpf, test_attach_detach)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ { .name = "hid_second_event" },
+ };
+ struct bpf_link *link;
+ __u8 buf[10] = {0};
+ int err, link_fd;
+
+ LOAD_PROGRAMS(progs);
+
+ link = self->hid_links[0];
+ ASSERT_OK_PTR(link) TH_LOG("HID-BPF link not created");
+
+ link_fd = bpf_link__fd(link);
+ ASSERT_GE(link_fd, 0) TH_LOG("HID-BPF link FD not valid");
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* make sure both programs are run */
+ ASSERT_EQ(buf[3], 52);
+
+ /* pin the first program and immediately unpin it */
+#define PIN_PATH "/sys/fs/bpf/hid_first_event"
+ err = bpf_obj_pin(link_fd, PIN_PATH);
+ ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin");
+ remove(PIN_PATH);
+#undef PIN_PATH
+ usleep(100000);
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(&self->hid);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw_no_bpf");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 47);
+ ASSERT_EQ(buf[2], 0);
+ ASSERT_EQ(buf[3], 0);
+
+ /* re-attach our program */
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+ ASSERT_EQ(buf[3], 52);
+}
+
+/*
+ * Attach hid_change_report_id to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, test_hid_change_report)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_change_report_id" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+}
+
+/*
+ * Call hid_bpf_input_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_input_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ __u8 buf[10] = {0};
+ int err, prog_fd;
+
+ LOAD_BPF;
+
+ args.hid = self->hid.hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_input_report);
+
+ /* check that there is no data to read from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+
+ ASSERT_EQ(args.retval, 0);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 2);
+ ASSERT_EQ(buf[2], 42);
+}
+
+/*
+ * Call hid_bpf_hw_output_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_output_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ int err, cond_err, prog_fd;
+ struct timespec time_to_wait;
+
+ LOAD_BPF;
+
+ args.hid = self->hid.hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_output_report);
+
+ pthread_mutex_lock(&uhid_output_mtx);
+
+ memset(output_report, 0, sizeof(output_report));
+ clock_gettime(CLOCK_REALTIME, &time_to_wait);
+ time_to_wait.tv_sec += 2;
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+ cond_err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+ ASSERT_OK(cond_err) TH_LOG("error while calling waiting for the condition");
+
+ ASSERT_EQ(args.retval, 3);
+
+ ASSERT_EQ(output_report[0], 1);
+ ASSERT_EQ(output_report[1], 2);
+ ASSERT_EQ(output_report[2], 42);
+
+ pthread_mutex_unlock(&uhid_output_mtx);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_raw_request_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .type = HID_FEATURE_REPORT,
+ .request_type = HID_REQ_GET_REPORT,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ int err, prog_fd;
+
+ LOAD_BPF;
+
+ args.hid = self->hid.hid_id;
+ args.data[0] = 1; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_raw_request);
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+
+ ASSERT_EQ(args.retval, 2);
+
+ ASSERT_EQ(args.data[1], 2);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is called and prevents the
+ * call to uhid.
+ */
+TEST_F(hid_bpf, test_hid_filter_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_filter_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* first check that we did not attach to device_event */
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+
+ /* now check that our program is preventing hid_hw_raw_request() */
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("unexpected success while reading HIDIOCGFEATURE: %d", err);
+ ASSERT_EQ(errno, 20) TH_LOG("unexpected error code while reading HIDIOCGFEATURE: %d",
+ errno);
+
+ /* remove our bpf program and check that we can now emit commands */
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(&self->hid);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGFEATURE: %d", err);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is called and can issue the call
+ * to uhid and transform the answer.
+ */
+TEST_F(hid_bpf, test_hid_change_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_hidraw_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err);
+
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 3);
+ ASSERT_EQ(buf[2], 4);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is called and prevents the
+ * call to uhid.
+ */
+TEST_F(hid_bpf, test_hid_filter_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_filter_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* first check that we did not attach to device_event */
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+
+ /* now check that our program is preventing hid_hw_output_report() */
+
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_LT(err, 0) TH_LOG("unexpected success while sending hid_hw_output_report: %d", err);
+ ASSERT_EQ(errno, 25) TH_LOG("unexpected error code while sending hid_hw_output_report: %d",
+ errno);
+
+ /* remove our bpf program and check that we can now emit commands */
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(&self->hid);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_GE(err, 0) TH_LOG("error while sending hid_hw_output_report: %d", err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is called and can issue the call
+ * to uhid and transform the answer.
+ */
+TEST_F(hid_bpf, test_hid_change_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_hidraw_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 10);
+ ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d",
+ err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 8);
+ ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d",
+ err);
+}
+
+/*
+ * Attach hid_multiply_event_wq to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can add extra data
+ */
+TEST_F(hid_bpf, test_multiply_events_wq)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_multiply_events_wq" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 47);
+
+ usleep(100000);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 3);
+}
+
+/*
+ * Attach hid_multiply_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can add extra data
+ */
+TEST_F(hid_bpf, test_multiply_events)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_multiply_events" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 47);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 52);
+}
+
+/*
+ * Call hid_bpf_input_report against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_input_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 3);
+
+ /* read the data from hidraw: hid_bpf_try_input_report should work exactly one time */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 4);
+
+ /* read the data from hidraw: there should be none */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+}
+
+/*
+ * Attach hid_insert{0,1,2} to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the programs have been inserted in the correct order.
+ */
+TEST_F(hid_bpf, test_hid_attach_flags)
+{
+ const struct test_program progs[] = {
+ {
+ .name = "hid_test_insert2",
+ .insert_head = 0,
+ },
+ {
+ .name = "hid_test_insert1",
+ .insert_head = 1,
+ },
+ {
+ .name = "hid_test_insert3",
+ .insert_head = 0,
+ },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[1], 1);
+ ASSERT_EQ(buf[2], 2);
+ ASSERT_EQ(buf[3], 3);
+}
+
+/*
+ * Attach hid_rdesc_fixup to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * check that the hidraw report descriptor has been updated.
+ */
+TEST_F(hid_bpf, test_rdesc_fixup)
+{
+ struct hidraw_report_descriptor rpt_desc = {0};
+ const struct test_program progs[] = {
+ { .name = "hid_rdesc_fixup" },
+ };
+ int err, desc_size;
+
+ LOAD_PROGRAMS(progs);
+
+ /* check that hid_rdesc_fixup() was executed */
+ ASSERT_EQ(self->skel->data->callback2_check, 0x21);
+
+ /* read the exposed report descriptor from hidraw */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESCSIZE: %d", err);
+
+ /* ensure the new size of the rdesc is bigger than the old one */
+ ASSERT_GT(desc_size, sizeof(rdesc));
+
+ rpt_desc.size = desc_size;
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &rpt_desc);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESC: %d", err);
+
+ ASSERT_EQ(rpt_desc.value[4], 0x42);
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ char buf[1024];
+
+ if (level == LIBBPF_DEBUG)
+ return 0;
+
+ snprintf(buf, sizeof(buf), "# %s", format);
+
+ vfprintf(stdout, buf, args);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h
new file mode 100644
index 000000000000..e3b267446fa0
--- /dev/null
+++ b/tools/testing/selftests/hid/hid_common.h
@@ -0,0 +1,480 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022-2024 Red Hat */
+
+#include "kselftest_harness.h"
+
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <dirent.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <linux/hidraw.h>
+#include <linux/uhid.h>
+
+#define SHOW_UHID_DEBUG 0
+
+#define min(a, b) \
+ ({ __typeof__(a) _a = (a); \
+ __typeof__(b) _b = (b); \
+ _a < _b ? _a : _b; })
+
+struct uhid_device {
+ int dev_id; /* uniq (random) number to identify the device */
+ int uhid_fd;
+ int hid_id; /* HID device id in the system */
+ __u16 bus;
+ __u32 vid;
+ __u32 pid;
+ pthread_t tid; /* thread for reading uhid events */
+};
+
+static unsigned char rdesc[] = {
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x09, 0x21, /* Usage (Vendor Usage 0x21) */
+ 0xa1, 0x01, /* COLLECTION (Application) */
+ 0x09, 0x01, /* Usage (Vendor Usage 0x01) */
+ 0xa1, 0x00, /* COLLECTION (Physical) */
+ 0x85, 0x02, /* REPORT_ID (2) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0xff, /* LOGICAL_MAXIMUM (255) */
+ 0x95, 0x08, /* REPORT_COUNT (8) */
+ 0x75, 0x08, /* REPORT_SIZE (8) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0xc0, /* END_COLLECTION */
+ 0x09, 0x01, /* Usage (Vendor Usage 0x01) */
+ 0xa1, 0x00, /* COLLECTION (Physical) */
+ 0x85, 0x01, /* REPORT_ID (1) */
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x30, /* USAGE (X) */
+ 0x09, 0x31, /* USAGE (Y) */
+ 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */
+ 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */
+ 0x75, 0x10, /* REPORT_SIZE (16) */
+ 0x95, 0x02, /* REPORT_COUNT (2) */
+ 0x81, 0x06, /* INPUT (Data,Var,Rel) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x91, 0x02, /* Output (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x06, /* USAGE_MINIMUM (6) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (8) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0xb1, 0x02, /* Feature (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0xc0, /* END_COLLECTION */
+ 0xc0, /* END_COLLECTION */
+};
+
+static __u8 feature_data[] = { 1, 2 };
+
+#define ASSERT_OK(data) ASSERT_FALSE(data)
+#define ASSERT_OK_PTR(ptr) ASSERT_NE(NULL, ptr)
+
+#define UHID_LOG(fmt, ...) do { \
+ if (SHOW_UHID_DEBUG) \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER;
+
+static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER;
+static unsigned char output_report[10];
+
+/* no need to protect uhid_stopped, only one thread accesses it */
+static bool uhid_stopped;
+
+static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uhid_event *ev)
+{
+ ssize_t ret;
+
+ ret = write(fd, ev, sizeof(*ev));
+ if (ret < 0) {
+ TH_LOG("Cannot write to uhid: %m");
+ return -errno;
+ } else if (ret != sizeof(*ev)) {
+ TH_LOG("Wrong size written to uhid: %zd != %zu",
+ ret, sizeof(ev));
+ return -EFAULT;
+ } else {
+ return 0;
+ }
+}
+
+static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb,
+ __u16 bus, __u32 vid, __u32 pid, __u8 *rdesc,
+ size_t rdesc_size)
+{
+ struct uhid_event ev;
+ char buf[25];
+
+ sprintf(buf, "test-uhid-device-%d", rand_nb);
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_CREATE;
+ strcpy((char *)ev.u.create.name, buf);
+ ev.u.create.rd_data = rdesc;
+ ev.u.create.rd_size = rdesc_size;
+ ev.u.create.bus = bus;
+ ev.u.create.vendor = vid;
+ ev.u.create.product = pid;
+ ev.u.create.version = 0;
+ ev.u.create.country = 0;
+
+ sprintf(buf, "%d", rand_nb);
+ strcpy((char *)ev.u.create.phys, buf);
+
+ return uhid_write(_metadata, fd, &ev);
+}
+
+static void uhid_destroy(struct __test_metadata *_metadata, struct uhid_device *hid)
+{
+ struct uhid_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_DESTROY;
+
+ uhid_write(_metadata, hid->uhid_fd, &ev);
+}
+
+static int uhid_event(struct __test_metadata *_metadata, int fd)
+{
+ struct uhid_event ev, answer;
+ ssize_t ret;
+
+ memset(&ev, 0, sizeof(ev));
+ ret = read(fd, &ev, sizeof(ev));
+ if (ret == 0) {
+ UHID_LOG("Read HUP on uhid-cdev");
+ return -EFAULT;
+ } else if (ret < 0) {
+ UHID_LOG("Cannot read uhid-cdev: %m");
+ return -errno;
+ } else if (ret != sizeof(ev)) {
+ UHID_LOG("Invalid size read from uhid-dev: %zd != %zu",
+ ret, sizeof(ev));
+ return -EFAULT;
+ }
+
+ switch (ev.type) {
+ case UHID_START:
+ pthread_mutex_lock(&uhid_started_mtx);
+ pthread_cond_signal(&uhid_started);
+ pthread_mutex_unlock(&uhid_started_mtx);
+
+ UHID_LOG("UHID_START from uhid-dev");
+ break;
+ case UHID_STOP:
+ uhid_stopped = true;
+
+ UHID_LOG("UHID_STOP from uhid-dev");
+ break;
+ case UHID_OPEN:
+ UHID_LOG("UHID_OPEN from uhid-dev");
+ break;
+ case UHID_CLOSE:
+ UHID_LOG("UHID_CLOSE from uhid-dev");
+ break;
+ case UHID_OUTPUT:
+ UHID_LOG("UHID_OUTPUT from uhid-dev");
+
+ pthread_mutex_lock(&uhid_output_mtx);
+ memcpy(output_report,
+ ev.u.output.data,
+ min(ev.u.output.size, sizeof(output_report)));
+ pthread_cond_signal(&uhid_output_cond);
+ pthread_mutex_unlock(&uhid_output_mtx);
+ break;
+ case UHID_GET_REPORT:
+ UHID_LOG("UHID_GET_REPORT from uhid-dev");
+
+ answer.type = UHID_GET_REPORT_REPLY;
+ answer.u.get_report_reply.id = ev.u.get_report.id;
+ answer.u.get_report_reply.err = ev.u.get_report.rnum == 1 ? 0 : -EIO;
+ answer.u.get_report_reply.size = sizeof(feature_data);
+ memcpy(answer.u.get_report_reply.data, feature_data, sizeof(feature_data));
+
+ uhid_write(_metadata, fd, &answer);
+
+ break;
+ case UHID_SET_REPORT:
+ UHID_LOG("UHID_SET_REPORT from uhid-dev");
+
+ answer.type = UHID_SET_REPORT_REPLY;
+ answer.u.set_report_reply.id = ev.u.set_report.id;
+ answer.u.set_report_reply.err = 0; /* success */
+
+ uhid_write(_metadata, fd, &answer);
+ break;
+ default:
+ TH_LOG("Invalid event from uhid-dev: %u", ev.type);
+ }
+
+ return 0;
+}
+
+struct uhid_thread_args {
+ int fd;
+ struct __test_metadata *_metadata;
+};
+static void *uhid_read_events_thread(void *arg)
+{
+ struct uhid_thread_args *args = (struct uhid_thread_args *)arg;
+ struct __test_metadata *_metadata = args->_metadata;
+ struct pollfd pfds[1];
+ int fd = args->fd;
+ int ret = 0;
+
+ pfds[0].fd = fd;
+ pfds[0].events = POLLIN;
+
+ uhid_stopped = false;
+
+ while (!uhid_stopped) {
+ ret = poll(pfds, 1, 100);
+ if (ret < 0) {
+ TH_LOG("Cannot poll for fds: %m");
+ break;
+ }
+ if (pfds[0].revents & POLLIN) {
+ ret = uhid_event(_metadata, fd);
+ if (ret)
+ break;
+ }
+ }
+
+ return (void *)(long)ret;
+}
+
+static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid, int uhid_fd)
+{
+ struct uhid_thread_args args = {
+ .fd = uhid_fd,
+ ._metadata = _metadata,
+ };
+ int err;
+
+ pthread_mutex_lock(&uhid_started_mtx);
+ err = pthread_create(tid, NULL, uhid_read_events_thread, (void *)&args);
+ ASSERT_EQ(0, err) {
+ TH_LOG("Could not start the uhid thread: %d", err);
+ pthread_mutex_unlock(&uhid_started_mtx);
+ close(uhid_fd);
+ return -EIO;
+ }
+ pthread_cond_wait(&uhid_started, &uhid_started_mtx);
+ pthread_mutex_unlock(&uhid_started_mtx);
+
+ return 0;
+}
+
+static int uhid_send_event(struct __test_metadata *_metadata, struct uhid_device *hid,
+ __u8 *buf, size_t size)
+{
+ struct uhid_event ev;
+
+ if (size > sizeof(ev.u.input.data))
+ return -E2BIG;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_INPUT2;
+ ev.u.input2.size = size;
+
+ memcpy(ev.u.input2.data, buf, size);
+
+ return uhid_write(_metadata, hid->uhid_fd, &ev);
+}
+
+static bool match_sysfs_device(struct uhid_device *hid, const char *workdir, struct dirent *dir)
+{
+ char target[20] = "";
+ char phys[512];
+ char uevent[1024];
+ char temp[512];
+ int fd, nread;
+ bool found = false;
+
+ snprintf(target, sizeof(target), "%04X:%04X:%04X.*", hid->bus, hid->vid, hid->pid);
+
+ if (fnmatch(target, dir->d_name, 0))
+ return false;
+
+ /* we found the correct VID/PID, now check for phys */
+ sprintf(uevent, "%s/%s/uevent", workdir, dir->d_name);
+
+ fd = open(uevent, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return false;
+
+ sprintf(phys, "PHYS=%d", hid->dev_id);
+
+ nread = read(fd, temp, ARRAY_SIZE(temp));
+ if (nread > 0 && (strstr(temp, phys)) != NULL)
+ found = true;
+
+ close(fd);
+
+ return found;
+}
+
+static int get_hid_id(struct uhid_device *hid)
+{
+ const char *workdir = "/sys/devices/virtual/misc/uhid";
+ const char *str_id;
+ DIR *d;
+ struct dirent *dir;
+ int found = -1, attempts = 3;
+
+ /* it would be nice to be able to use nftw, but the no_alu32 target doesn't support it */
+
+ while (found < 0 && attempts > 0) {
+ attempts--;
+ d = opendir(workdir);
+ if (d) {
+ while ((dir = readdir(d)) != NULL) {
+ if (!match_sysfs_device(hid, workdir, dir))
+ continue;
+
+ str_id = dir->d_name + sizeof("0000:0000:0000.");
+ found = (int)strtol(str_id, NULL, 16);
+
+ break;
+ }
+ closedir(d);
+ }
+ if (found < 0)
+ usleep(100000);
+ }
+
+ return found;
+}
+
+static int get_hidraw(struct uhid_device *hid)
+{
+ const char *workdir = "/sys/devices/virtual/misc/uhid";
+ char sysfs[1024];
+ DIR *d, *subd;
+ struct dirent *dir, *subdir;
+ int i, found = -1;
+
+ /* retry 5 times in case the system is loaded */
+ for (i = 5; i > 0; i--) {
+ usleep(10);
+ d = opendir(workdir);
+
+ if (!d)
+ continue;
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!match_sysfs_device(hid, workdir, dir))
+ continue;
+
+ sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name);
+
+ subd = opendir(sysfs);
+ if (!subd)
+ continue;
+
+ while ((subdir = readdir(subd)) != NULL) {
+ if (fnmatch("hidraw*", subdir->d_name, 0))
+ continue;
+
+ found = atoi(subdir->d_name + strlen("hidraw"));
+ }
+
+ closedir(subd);
+
+ if (found > 0)
+ break;
+ }
+ closedir(d);
+ }
+
+ return found;
+}
+
+static int open_hidraw(struct uhid_device *hid)
+{
+ int hidraw_number;
+ char hidraw_path[64] = { 0 };
+
+ hidraw_number = get_hidraw(hid);
+ if (hidraw_number < 0)
+ return hidraw_number;
+
+ /* open hidraw node to check the other side of the pipe */
+ sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number);
+ return open(hidraw_path, O_RDWR | O_NONBLOCK);
+}
+
+static int setup_uhid(struct __test_metadata *_metadata, struct uhid_device *hid,
+ __u16 bus, __u32 vid, __u32 pid, const __u8 *rdesc, size_t rdesc_size)
+{
+ const char *path = "/dev/uhid";
+ time_t t;
+ int ret;
+
+ /* initialize random number generator */
+ srand((unsigned int)time(&t));
+
+ hid->dev_id = rand() % 1024;
+ hid->bus = bus;
+ hid->vid = vid;
+ hid->pid = pid;
+
+ hid->uhid_fd = open(path, O_RDWR | O_CLOEXEC);
+ ASSERT_GE(hid->uhid_fd, 0) TH_LOG("open uhid-cdev failed; %d", hid->uhid_fd);
+
+ ret = uhid_create(_metadata, hid->uhid_fd, hid->dev_id, bus, vid, pid,
+ (__u8 *)rdesc, rdesc_size);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("create uhid device failed: %d", ret);
+ close(hid->uhid_fd);
+ return ret;
+ }
+
+ /* locate the uevent file of the created device */
+ hid->hid_id = get_hid_id(hid);
+ ASSERT_GT(hid->hid_id, 0)
+ TH_LOG("Could not locate uhid device id: %d", hid->hid_id);
+
+ ret = uhid_start_listener(_metadata, &hid->tid, hid->uhid_fd);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("could not start udev listener: %d", ret);
+ close(hid->uhid_fd);
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c
new file mode 100644
index 000000000000..d625772f8b7c
--- /dev/null
+++ b/tools/testing/selftests/hid/hidraw.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022-2024 Red Hat */
+
+#include "hid_common.h"
+#include <linux/input.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+/* for older kernels */
+#ifndef HIDIOCREVOKE
+#define HIDIOCREVOKE _IOW('H', 0x0D, int) /* Revoke device access */
+#endif /* HIDIOCREVOKE */
+
+FIXTURE(hidraw) {
+ struct uhid_device hid;
+ int hidraw_fd;
+};
+static void close_hidraw(FIXTURE_DATA(hidraw) * self)
+{
+ if (self->hidraw_fd)
+ close(self->hidraw_fd);
+ self->hidraw_fd = 0;
+}
+
+FIXTURE_TEARDOWN(hidraw) {
+ void *uhid_err;
+
+ uhid_destroy(_metadata, &self->hid);
+
+ close_hidraw(self);
+ pthread_join(self->hid.tid, &uhid_err);
+}
+#define TEARDOWN_LOG(fmt, ...) do { \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+ hidraw_teardown(_metadata, self, variant); \
+} while (0)
+
+FIXTURE_SETUP(hidraw)
+{
+ int err;
+
+ err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a37, rdesc, sizeof(rdesc));
+ ASSERT_OK(err);
+
+ self->hidraw_fd = open_hidraw(&self->hid);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+}
+
+/*
+ * A simple test to see if the fixture is working fine.
+ * If this fails, none of the other tests will pass.
+ */
+TEST_F(hidraw, test_create_uhid)
+{
+}
+
+/*
+ * Inject one event in the uhid device,
+ * check that we get the same data through hidraw
+ */
+TEST_F(hidraw, raw_event)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+}
+
+/*
+ * After initial opening/checks of hidraw, revoke the hidraw
+ * node and check that we can not read any more data.
+ */
+TEST_F(hidraw, raw_event_revoked)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+
+ /* call the revoke ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL);
+ ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd");
+
+ /* inject one other event */
+ buf[0] = 1;
+ buf[1] = 43;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+ ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while reading the hidraw node: %d",
+ errno);
+}
+
+/*
+ * Revoke the hidraw node and check that we can not do any ioctl.
+ */
+TEST_F(hidraw, ioctl_revoked)
+{
+ int err, desc_size = 0;
+
+ /* call the revoke ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL);
+ ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd");
+
+ /* do an ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_EQ(err, -1) TH_LOG("ioctl_hidraw");
+ ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while doing an ioctl: %d",
+ errno);
+}
+
+/*
+ * Setup polling of the fd, and check that revoke works properly.
+ */
+TEST_F(hidraw, poll_revoked)
+{
+ struct pollfd pfds[1];
+ __u8 buf[10] = {0};
+ int err, ready;
+
+ /* setup polling */
+ pfds[0].fd = self->hidraw_fd;
+ pfds[0].events = POLLIN;
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, &self->hid, buf, 6);
+
+ while (true) {
+ ready = poll(pfds, 1, 5000);
+ ASSERT_EQ(ready, 1) TH_LOG("poll return value");
+
+ if (pfds[0].revents & POLLIN) {
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+
+ /* call the revoke ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL);
+ ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd");
+ } else {
+ break;
+ }
+ }
+
+ ASSERT_TRUE(pfds[0].revents & POLLHUP);
+}
+
+/*
+ * After initial opening/checks of hidraw, revoke the hidraw
+ * node and check that we can not read any more data.
+ */
+TEST_F(hidraw, write_event_revoked)
+{
+ struct timespec time_to_wait;
+ __u8 buf[10] = {0};
+ int err;
+
+ /* inject one event from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ pthread_mutex_lock(&uhid_output_mtx);
+
+ memset(output_report, 0, sizeof(output_report));
+ clock_gettime(CLOCK_REALTIME, &time_to_wait);
+ time_to_wait.tv_sec += 2;
+
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_EQ(err, 3) TH_LOG("unexpected error while writing to hidraw node: %d", err);
+
+ err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait);
+ ASSERT_OK(err) TH_LOG("error while calling waiting for the condition");
+
+ ASSERT_EQ(output_report[0], 1);
+ ASSERT_EQ(output_report[1], 2);
+ ASSERT_EQ(output_report[2], 42);
+
+ /* call the revoke ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL);
+ ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd");
+
+ /* inject one other event */
+ buf[0] = 1;
+ buf[1] = 43;
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_LT(err, 0) TH_LOG("unexpected success while writing to hidraw node: %d", err);
+ ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while writing to hidraw node: %d",
+ errno);
+
+ pthread_mutex_unlock(&uhid_output_mtx);
+}
+
+/*
+ * Test HIDIOCGRDESCSIZE ioctl to get report descriptor size
+ */
+TEST_F(hidraw, ioctl_rdescsize)
+{
+ int desc_size = 0;
+ int err;
+
+ /* call HIDIOCGRDESCSIZE ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESCSIZE ioctl failed");
+
+ /* verify the size matches our test report descriptor */
+ ASSERT_EQ(desc_size, sizeof(rdesc))
+ TH_LOG("expected size %zu, got %d", sizeof(rdesc), desc_size);
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl to get report descriptor data
+ */
+TEST_F(hidraw, ioctl_rdesc)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+
+ /* get the full report descriptor */
+ desc.size = sizeof(rdesc);
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed");
+
+ /* verify the descriptor data matches our test descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, sizeof(rdesc)), 0)
+ TH_LOG("report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl with smaller buffer size
+ */
+TEST_F(hidraw, ioctl_rdesc_small_buffer)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+ size_t small_size = sizeof(rdesc) / 2; /* request half the descriptor size */
+
+ /* get partial report descriptor */
+ desc.size = small_size;
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed with small buffer");
+
+ /* verify we got the first part of the descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, small_size), 0)
+ TH_LOG("partial report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRAWINFO ioctl to get device information
+ */
+TEST_F(hidraw, ioctl_rawinfo)
+{
+ struct hidraw_devinfo devinfo;
+ int err;
+
+ /* get device info */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWINFO, &devinfo);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRAWINFO ioctl failed");
+
+ /* verify device info matches our test setup */
+ ASSERT_EQ(devinfo.bustype, BUS_USB)
+ TH_LOG("expected bustype 0x03, got 0x%x", devinfo.bustype);
+ ASSERT_EQ(devinfo.vendor, 0x0001)
+ TH_LOG("expected vendor 0x0001, got 0x%x", devinfo.vendor);
+ ASSERT_EQ(devinfo.product, 0x0a37)
+ TH_LOG("expected product 0x0a37, got 0x%x", devinfo.product);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl to get feature report
+ */
+TEST_F(hidraw, ioctl_gfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGFEATURE ioctl failed, got %d", err);
+
+ /* verify we got the expected feature data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect nr bits
+ */
+TEST_F(hidraw, ioctl_invalid_nr)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0), got errno %d", errno);
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0), got errno %d", errno);
+
+ /* also test with bigger number */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x42, sizeof(buf)); /* 0x42 is not valid as well */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0x42), got errno %d", errno);
+
+ /* also test with bigger number: 0x42 is not valid as well */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x42, sizeof(buf));
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0x42), got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect type bits
+ */
+TEST_F(hidraw, ioctl_invalid_type)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_TYPE bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'I', 0x01, sizeof(buf)); /* 'I' should be 'H' */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl with wrong _IOC_TYPE (I) should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_NR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid_dir)
+{
+ __u8 buf[10] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGFEATURE should have _IOC_WRITE|_IOC_READ, let's use only _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ /* try to get feature report with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with only _IOC_READ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test read-only ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_readonly_invalid_dir)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGRAWNAME should have _IOC_READ, let's use _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x04, sizeof(buf)); /* should be _IOC_READ */
+
+ /* try to get device name with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with _IOC_WRITE|_IOC_READ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x04, sizeof(buf)); /* should be only _IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSFEATURE ioctl to set feature report
+ */
+TEST_F(hidraw, ioctl_sfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare feature report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x42;
+ buf[2] = 0x24;
+
+ /* set feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCSFEATURE(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSFEATURE ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl to get input report
+ */
+TEST_F(hidraw, ioctl_ginput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGINPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected input data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_ginput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGINPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSINPUT ioctl to set input report
+ */
+TEST_F(hidraw, ioctl_sinput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare input report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x55;
+ buf[2] = 0xAA;
+
+ /* set input report */
+ err = ioctl(self->hidraw_fd, HIDIOCSINPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSINPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl to get output report
+ */
+TEST_F(hidraw, ioctl_goutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGOUTPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected output data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_goutput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGOUTPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSOUTPUT ioctl to set output report
+ */
+TEST_F(hidraw, ioctl_soutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare output report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x33;
+ buf[2] = 0xCC;
+
+ /* set output report */
+ err = ioctl(self->hidraw_fd, HIDIOCSOUTPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSOUTPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGRAWNAME ioctl to get device name string
+ */
+TEST_F(hidraw, ioctl_rawname)
+{
+ char name[256] = {0};
+ char expected_name[64];
+ int err;
+
+ /* get device name */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(name)), name);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWNAME ioctl failed, got %d", err);
+
+ /* construct expected name based on device id */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify the name matches expected pattern */
+ ASSERT_EQ(strcmp(name, expected_name), 0)
+ TH_LOG("expected name '%s', got '%s'", expected_name, name);
+}
+
+/*
+ * Test HIDIOCGRAWPHYS ioctl to get device physical address string
+ */
+TEST_F(hidraw, ioctl_rawphys)
+{
+ char phys[256] = {0};
+ char expected_phys[64];
+ int err;
+
+ /* get device physical address */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWPHYS(sizeof(phys)), phys);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWPHYS ioctl failed, got %d", err);
+
+ /* construct expected phys based on device id */
+ snprintf(expected_phys, sizeof(expected_phys), "%d", self->hid.dev_id);
+
+ /* verify the phys matches expected value */
+ ASSERT_EQ(strcmp(phys, expected_phys), 0)
+ TH_LOG("expected phys '%s', got '%s'", expected_phys, phys);
+}
+
+/*
+ * Test HIDIOCGRAWUNIQ ioctl to get device unique identifier string
+ */
+TEST_F(hidraw, ioctl_rawuniq)
+{
+ char uniq[256] = {0};
+ int err;
+
+ /* get device unique identifier */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWUNIQ(sizeof(uniq)), uniq);
+ ASSERT_GE(err, 0) TH_LOG("HIDIOCGRAWUNIQ ioctl failed, got %d", err);
+
+ /* uniq is typically empty in our test setup */
+ ASSERT_EQ(strlen(uniq), 0) TH_LOG("expected empty uniq, got '%s'", uniq);
+}
+
+/*
+ * Test device string ioctls with small buffer sizes
+ */
+TEST_F(hidraw, ioctl_strings_small_buffer)
+{
+ char small_buf[8] = {0};
+ char expected_name[64];
+ int err;
+
+ /* test HIDIOCGRAWNAME with small buffer */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(small_buf)), small_buf);
+ ASSERT_EQ(err, sizeof(small_buf))
+ TH_LOG("HIDIOCGRAWNAME with small buffer failed, got %d", err);
+
+ /* construct expected truncated name */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify we got truncated name (first 8 chars, no null terminator guaranteed) */
+ ASSERT_EQ(strncmp(small_buf, expected_name, sizeof(small_buf)), 0)
+ TH_LOG("expected truncated name to match first %zu chars", sizeof(small_buf));
+
+ /* Note: hidraw driver doesn't guarantee null termination when buffer is too small */
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
new file mode 100644
index 000000000000..5ecc845ef792
--- /dev/null
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red hat */
+#include "hid_bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct attach_prog_args {
+ int prog_fd;
+ unsigned int hid;
+ int retval;
+ int insert_head;
+};
+
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ callback_check = rw_data[1];
+
+ rw_data[2] = rw_data[1] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops first_event = {
+ .hid_device_event = (void *)hid_first_event,
+ .hid_id = 2,
+};
+
+int __hid_subprog_first_event(struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[2] = rw_data[1] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_subprog_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ return __hid_subprog_first_event(hid_ctx, type);
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops subprog_first_event = {
+ .hid_device_event = (void *)hid_subprog_first_event,
+ .hid_id = 2,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[3] = rw_data[2] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops second_event = {
+ .hid_device_event = (void *)hid_second_event,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[0] = 2;
+
+ return 9;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops change_report_id = {
+ .hid_device_event = (void *)hid_change_report_id,
+};
+
+struct hid_hw_request_syscall_args {
+ /* data needs to come at offset 0 so we can use it in calls */
+ __u8 data[10];
+ unsigned int hid;
+ int retval;
+ size_t size;
+ enum hid_report_type type;
+ __u8 request_type;
+};
+
+SEC("syscall")
+int hid_user_raw_request(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_hw_request(ctx,
+ args->data,
+ size,
+ args->type,
+ args->request_type);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+SEC("syscall")
+int hid_user_output_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_hw_output_report(ctx,
+ args->data,
+ size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+SEC("syscall")
+int hid_user_input_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_input_report(ctx, HID_INPUT_REPORT, args->data, size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+static const __u8 rdesc[] = {
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x32, /* USAGE (Z) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x81, 0x06, /* INPUT (Data,Var,Rel) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x91, 0x02, /* Output (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x06, /* USAGE_MINIMUM (6) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (8) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0xb1, 0x02, /* Feature (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0xc0, /* END_COLLECTION */
+ 0xc0, /* END_COLLECTION */
+};
+
+/*
+ * the following program is marked as sleepable (struct_ops.s).
+ * This is not strictly mandatory but is a nice test for
+ * sleepable struct_ops
+ */
+SEC("?struct_ops.s/hid_rdesc_fixup")
+int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ callback2_check = data[4];
+
+ /* insert rdesc at offset 73 */
+ __builtin_memcpy(&data[73], rdesc, sizeof(rdesc));
+
+ /* Change Usage Vendor globally */
+ data[4] = 0x42;
+
+ return sizeof(rdesc) + 73;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops rdesc_fixup = {
+ .hid_rdesc_fixup = (void *)hid_rdesc_fixup,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* we need to be run first */
+ if (data[2] || data[3])
+ return -1;
+
+ data[1] = 1;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert1 = {
+ .hid_device_event = (void *)hid_test_insert1,
+ .flags = BPF_F_BEFORE,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* after insert0 and before insert2 */
+ if (!data[1] || data[3])
+ return -1;
+
+ data[2] = 2;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert2 = {
+ .hid_device_event = (void *)hid_test_insert2,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* at the end */
+ if (!data[1] || !data[2])
+ return -1;
+
+ data[3] = 3;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert3 = {
+ .hid_device_event = (void *)hid_test_insert3,
+};
+
+SEC("?struct_ops/hid_hw_request")
+int BPF_PROG(hid_test_filter_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ return -20;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_filter_raw_request = {
+ .hid_hw_request = (void *)hid_test_filter_raw_request,
+};
+
+static struct file *current_file;
+
+SEC("fentry/hidraw_open")
+int BPF_PROG(hidraw_open, struct inode *inode, struct file *file)
+{
+ current_file = file;
+ return 0;
+}
+
+SEC("?struct_ops.s/hid_hw_request")
+int BPF_PROG(hid_test_hidraw_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* check if the incoming request comes from our hidraw operation */
+ if (source == (__u64)current_file) {
+ data[0] = reportnum;
+
+ ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype);
+ if (ret != 2)
+ return -1;
+ data[0] = reportnum + 1;
+ data[1] = reportnum + 2;
+ data[2] = reportnum + 3;
+ return 3;
+ }
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_hidraw_raw_request = {
+ .hid_hw_request = (void *)hid_test_hidraw_raw_request,
+};
+
+SEC("?struct_ops.s/hid_hw_request")
+int BPF_PROG(hid_test_infinite_loop_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ */
+ data[0] = reportnum;
+
+ ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype);
+ if (ret == 2)
+ return 3;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_raw_request = {
+ .hid_hw_request = (void *)hid_test_infinite_loop_raw_request,
+};
+
+SEC("?struct_ops/hid_hw_output_report")
+int BPF_PROG(hid_test_filter_output_report, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ return -25;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_filter_output_report = {
+ .hid_hw_output_report = (void *)hid_test_filter_output_report,
+};
+
+SEC("?struct_ops.s/hid_hw_output_report")
+int BPF_PROG(hid_test_hidraw_output_report, struct hid_bpf_ctx *hctx, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* check if the incoming request comes from our hidraw operation */
+ if (source == (__u64)current_file)
+ return hid_bpf_hw_output_report(hctx, data, 2);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_hidraw_output_report = {
+ .hid_hw_output_report = (void *)hid_test_hidraw_output_report,
+};
+
+SEC("?struct_ops.s/hid_hw_output_report")
+int BPF_PROG(hid_test_infinite_loop_output_report, struct hid_bpf_ctx *hctx, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ */
+
+ ret = hid_bpf_hw_output_report(hctx, data, 2);
+ if (ret == 2)
+ return 2;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_output_report = {
+ .hid_hw_output_report = (void *)hid_test_infinite_loop_output_report,
+};
+
+struct elem {
+ struct bpf_wq work;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int wq_cb_sleepable(void *map, int *key, void *work)
+{
+ __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10};
+ struct hid_bpf_ctx *hid_ctx;
+
+ hid_ctx = hid_bpf_allocate_context(*key);
+ if (!hid_ctx)
+ return 0; /* EPERM check */
+
+ hid_bpf_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf));
+
+ hid_bpf_release_context(hid_ctx);
+
+ return 0;
+}
+
+static int test_inject_input_report_callback(int *key)
+{
+ struct elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if (bpf_map_update_elem(&hmap, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(&hmap, key);
+ if (!val)
+ return -2;
+
+ wq = &val->work;
+ if (bpf_wq_init(wq, &hmap, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, wq_cb_sleepable, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_multiply_events_wq, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */);
+ int hid = hid_ctx->hid->id;
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ if (data[0] != 1)
+ return 0;
+
+ ret = test_inject_input_report_callback(&hid);
+ if (ret)
+ return ret;
+
+ data[1] += 5;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_multiply_events_wq = {
+ .hid_device_event = (void *)hid_test_multiply_events_wq,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_multiply_events, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */);
+ __u8 buf[9];
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ if (data[0] != 1)
+ return 0;
+
+ /*
+ * we have to use an intermediate buffer as hid_bpf_input_report
+ * will memset data to \0
+ */
+ __builtin_memcpy(buf, data, sizeof(buf));
+
+ buf[0] = 2;
+ buf[1] += 5;
+ ret = hid_bpf_try_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * In real world we should reset the original buffer as data might be garbage now,
+ * but it actually now has the content of 'buf'
+ */
+ data[1] += 5;
+
+ return 9;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_multiply_events = {
+ .hid_device_event = (void *)hid_test_multiply_events,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_infinite_loop_input_report, struct hid_bpf_ctx *hctx,
+ enum hid_report_type report_type, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 6 /* size */);
+ __u8 buf[6];
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /*
+ * we have to use an intermediate buffer as hid_bpf_input_report
+ * will memset data to \0
+ */
+ __builtin_memcpy(buf, data, sizeof(buf));
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ * the return value is ignored so the event is passing to userspace.
+ */
+
+ hid_bpf_try_input_report(hctx, report_type, buf, sizeof(buf));
+
+ /* each time we process the event, we increment by one data[1]:
+ * after each successful call to hid_bpf_try_input_report, buf
+ * has been memcopied into data by the kernel.
+ */
+ data[1] += 1;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_input_report = {
+ .hid_device_event = (void *)hid_test_infinite_loop_input_report,
+};
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
new file mode 100644
index 000000000000..531228b849da
--- /dev/null
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022 Benjamin Tissoires
+ */
+
+#ifndef __HID_BPF_HELPERS_H
+#define __HID_BPF_HELPERS_H
+
+/* "undefine" structs and enums in vmlinux.h, because we "override" them below */
+#define hid_bpf_ctx hid_bpf_ctx___not_used
+#define hid_bpf_ops hid_bpf_ops___not_used
+#define hid_report_type hid_report_type___not_used
+#define hid_class_request hid_class_request___not_used
+#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used
+#define HID_INPUT_REPORT HID_INPUT_REPORT___not_used
+#define HID_OUTPUT_REPORT HID_OUTPUT_REPORT___not_used
+#define HID_FEATURE_REPORT HID_FEATURE_REPORT___not_used
+#define HID_REPORT_TYPES HID_REPORT_TYPES___not_used
+#define HID_REQ_GET_REPORT HID_REQ_GET_REPORT___not_used
+#define HID_REQ_GET_IDLE HID_REQ_GET_IDLE___not_used
+#define HID_REQ_GET_PROTOCOL HID_REQ_GET_PROTOCOL___not_used
+#define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used
+#define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used
+#define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used
+
+/* do not define kfunc through vmlinux.h as this messes up our custom hack */
+#define BPF_NO_KFUNC_PROTOTYPES
+
+#include "vmlinux.h"
+
+#undef hid_bpf_ctx
+#undef hid_bpf_ops
+#undef hid_report_type
+#undef hid_class_request
+#undef hid_bpf_attach_flags
+#undef HID_INPUT_REPORT
+#undef HID_OUTPUT_REPORT
+#undef HID_FEATURE_REPORT
+#undef HID_REPORT_TYPES
+#undef HID_REQ_GET_REPORT
+#undef HID_REQ_GET_IDLE
+#undef HID_REQ_GET_PROTOCOL
+#undef HID_REQ_SET_REPORT
+#undef HID_REQ_SET_IDLE
+#undef HID_REQ_SET_PROTOCOL
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/const.h>
+
+enum hid_report_type {
+ HID_INPUT_REPORT = 0,
+ HID_OUTPUT_REPORT = 1,
+ HID_FEATURE_REPORT = 2,
+
+ HID_REPORT_TYPES,
+};
+
+struct hid_bpf_ctx {
+ struct hid_device *hid;
+ __u32 allocated_size;
+ union {
+ __s32 retval;
+ __s32 size;
+ };
+} __attribute__((preserve_access_index));
+
+enum hid_class_request {
+ HID_REQ_GET_REPORT = 0x01,
+ HID_REQ_GET_IDLE = 0x02,
+ HID_REQ_GET_PROTOCOL = 0x03,
+ HID_REQ_SET_REPORT = 0x09,
+ HID_REQ_SET_IDLE = 0x0A,
+ HID_REQ_SET_PROTOCOL = 0x0B,
+};
+
+struct hid_bpf_ops {
+ int hid_id;
+ u32 flags;
+ struct list_head list;
+ int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type,
+ u64 source);
+ int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx);
+ int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype,
+ u64 source);
+ int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source);
+ struct hid_device *hdev;
+};
+
+#ifndef BPF_F_BEFORE
+#define BPF_F_BEFORE (1U << 3)
+#endif
+
+/* following are kfuncs exported by HID for HID-BPF */
+extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
+ unsigned int offset,
+ const size_t __sz) __weak __ksym;
+extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __weak __ksym;
+extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __weak __ksym;
+extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
+ __u8 *data,
+ size_t buf__sz,
+ enum hid_report_type type,
+ enum hid_class_request reqtype) __weak __ksym;
+extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx,
+ __u8 *buf, size_t buf__sz) __weak __ksym;
+extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx,
+ enum hid_report_type type,
+ __u8 *data,
+ size_t buf__sz) __weak __ksym;
+extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
+ enum hid_report_type type,
+ __u8 *data,
+ size_t buf__sz) __weak __ksym;
+
+/* bpf_wq implementation */
+extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
+extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *wq),
+ unsigned int flags__k, void *aux__ign) __weak __ksym;
+#define bpf_wq_set_callback(timer, cb, flags) \
+ bpf_wq_set_callback_impl(timer, cb, flags, NULL)
+
+#endif /* __HID_BPF_HELPERS_H */
diff --git a/tools/testing/selftests/hid/run-hid-tools-tests.sh b/tools/testing/selftests/hid/run-hid-tools-tests.sh
new file mode 100755
index 000000000000..af1682a53c27
--- /dev/null
+++ b/tools/testing/selftests/hid/run-hid-tools-tests.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+KSELFTEST_SKIP_TEST=4
+
+if ! command -v python3 > /dev/null 2>&1; then
+ echo "hid-tools: [SKIP] python3 not installed"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+if ! python3 -c "import pytest" > /dev/null 2>&1; then
+ echo "hid: [SKIP] pytest module not installed"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+if ! python3 -c "import pytest_tap" > /dev/null 2>&1; then
+ echo "hid: [SKIP] pytest_tap module not installed"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+if ! python3 -c "import hidtools" > /dev/null 2>&1; then
+ echo "hid: [SKIP] hid-tools module not installed"
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+TARGET=${TARGET:=.}
+
+echo TAP version 13
+python3 -u -m pytest $PYTEST_XDIST ./tests/$TARGET --tap-stream --udevd
diff --git a/tools/testing/selftests/hid/settings b/tools/testing/selftests/hid/settings
new file mode 100644
index 000000000000..b3cbfc521b10
--- /dev/null
+++ b/tools/testing/selftests/hid/settings
@@ -0,0 +1,3 @@
+# HID tests can be long, so give a little bit more time
+# to them
+timeout=200
diff --git a/tools/testing/selftests/hid/tests/__init__.py b/tools/testing/selftests/hid/tests/__init__.py
new file mode 100644
index 000000000000..c940e9275252
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# Just to make sphinx-apidoc document this directory
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
new file mode 100644
index 000000000000..5175cf235b2f
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -0,0 +1,432 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+
+import dataclasses
+import libevdev
+import os
+import pytest
+import shutil
+import subprocess
+import time
+
+import logging
+
+from .base_device import BaseDevice, EvdevMatch, SysfsFile
+from pathlib import Path
+from typing import Final, List, Tuple
+
+logger = logging.getLogger("hidtools.test.base")
+
+# application to matches
+application_matches: Final = {
+ # pyright: ignore
+ "Accelerometer": EvdevMatch(
+ req_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ]
+ ),
+ "Game Pad": EvdevMatch( # in systemd, this is a lot more complex, but that will do
+ requires=[
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ libevdev.EV_ABS.ABS_RX,
+ libevdev.EV_ABS.ABS_RY,
+ libevdev.EV_KEY.BTN_START,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Joystick": EvdevMatch( # in systemd, this is a lot more complex, but that will do
+ requires=[
+ libevdev.EV_ABS.ABS_RX,
+ libevdev.EV_ABS.ABS_RY,
+ libevdev.EV_KEY.BTN_START,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Key": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.KEY_A,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ libevdev.INPUT_PROP_DIRECT,
+ libevdev.INPUT_PROP_POINTER,
+ ],
+ ),
+ "Mouse": EvdevMatch(
+ requires=[
+ libevdev.EV_REL.REL_X,
+ libevdev.EV_REL.REL_Y,
+ libevdev.EV_KEY.BTN_LEFT,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Pad": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_0,
+ ],
+ excludes=[
+ libevdev.EV_KEY.BTN_TOOL_PEN,
+ libevdev.EV_KEY.BTN_TOUCH,
+ libevdev.EV_ABS.ABS_DISTANCE,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Pen": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_STYLUS,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Stylus": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_STYLUS,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Touch Pad": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_LEFT,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excludes=[libevdev.EV_KEY.BTN_TOOL_PEN, libevdev.EV_KEY.BTN_STYLUS],
+ req_properties=[
+ libevdev.INPUT_PROP_POINTER,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Touch Screen": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_TOUCH,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excludes=[libevdev.EV_KEY.BTN_TOOL_PEN, libevdev.EV_KEY.BTN_STYLUS],
+ req_properties=[
+ libevdev.INPUT_PROP_DIRECT,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+}
+
+
+class UHIDTestDevice(BaseDevice):
+ def __init__(self, name, application, rdesc_str=None, rdesc=None, input_info=None):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.application_matches = application_matches
+ if name is None:
+ name = f"uhid test {self.__class__.__name__}"
+ if not name.startswith("uhid test "):
+ name = "uhid test " + self.name
+ self.name = name
+
+
+@dataclasses.dataclass
+class HidBpf:
+ object_name: str
+ has_rdesc_fixup: bool
+
+
+@dataclasses.dataclass
+class KernelModule:
+ driver_name: str
+ module_name: str
+
+
+class BaseTestCase:
+ class TestUhid(object):
+ syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) # type: ignore
+ key_event = libevdev.InputEvent(libevdev.EV_KEY) # type: ignore
+ abs_event = libevdev.InputEvent(libevdev.EV_ABS) # type: ignore
+ rel_event = libevdev.InputEvent(libevdev.EV_REL) # type: ignore
+ msc_event = libevdev.InputEvent(libevdev.EV_MSC.MSC_SCAN) # type: ignore
+
+ # List of kernel modules to load before starting the test
+ # if any module is not available (not compiled), the test will skip.
+ # Each element is a KernelModule object, for example
+ # KernelModule("playstation", "hid-playstation")
+ kernel_modules: List[KernelModule] = []
+
+ # List of in kernel HID-BPF object files to load
+ # before starting the test
+ # Any existing pre-loaded HID-BPF module will be removed
+ # before the ones in this list will be manually loaded.
+ # Each Element is a HidBpf object, for example
+ # 'HidBpf("xppen-ArtistPro16Gen2.bpf.o", True)'
+ # If 'has_rdesc_fixup' is True, the test needs to wait
+ # for one unbind and rebind before it can be sure the kernel is
+ # ready
+ hid_bpfs: List[HidBpf] = []
+
+ def assertInputEventsIn(self, expected_events, effective_events):
+ effective_events = effective_events.copy()
+ for ev in expected_events:
+ assert ev in effective_events
+ effective_events.remove(ev)
+ return effective_events
+
+ def assertInputEvents(self, expected_events, effective_events):
+ remaining = self.assertInputEventsIn(expected_events, effective_events)
+ assert remaining == []
+
+ @classmethod
+ def debug_reports(cls, reports, uhdev=None, events=None):
+ data = [" ".join([f"{v:02x}" for v in r]) for r in reports]
+
+ if uhdev is not None:
+ human_data = [
+ uhdev.parsed_rdesc.format_report(r, split_lines=True)
+ for r in reports
+ ]
+ try:
+ human_data = [
+ f'\n\t {" " * h.index("/")}'.join(h.split("\n"))
+ for h in human_data
+ ]
+ except ValueError:
+ # '/' not found: not a numbered report
+ human_data = ["\n\t ".join(h.split("\n")) for h in human_data]
+ data = [f"{d}\n\t ====> {h}" for d, h in zip(data, human_data)]
+
+ reports = data
+
+ if len(reports) == 1:
+ print("sending 1 report:")
+ else:
+ print(f"sending {len(reports)} reports:")
+ for report in reports:
+ print("\t", report)
+
+ if events is not None:
+ print("events received:", events)
+
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def _load_kernel_module(self, kernel_driver, kernel_module):
+ sysfs_path = Path("/sys/bus/hid/drivers")
+ if kernel_driver is not None:
+ sysfs_path /= kernel_driver
+ else:
+ # special case for when testing all available modules:
+ # we don't know beforehand the name of the module from modinfo
+ sysfs_path = Path("/sys/module") / kernel_module.replace("-", "_")
+ if not sysfs_path.exists():
+ ret = subprocess.run(["/usr/sbin/modprobe", kernel_module])
+ if ret.returncode != 0:
+ pytest.skip(
+ f"module {kernel_module} could not be loaded, skipping the test"
+ )
+
+ @pytest.fixture()
+ def load_kernel_module(self):
+ for k in self.kernel_modules:
+ self._load_kernel_module(k.driver_name, k.module_name)
+ yield
+
+ def load_hid_bpfs(self):
+ # this function will only work when run in the kernel tree
+ script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
+ root_dir = (script_dir / "../../../../..").resolve()
+ bpf_dir = root_dir / "drivers/hid/bpf/progs"
+
+ if not bpf_dir.exists():
+ pytest.skip("looks like we are not in the kernel tree, skipping")
+
+ udev_hid_bpf = shutil.which("udev-hid-bpf")
+ if not udev_hid_bpf:
+ pytest.skip("udev-hid-bpf not found in $PATH, skipping")
+
+ wait = any(b.has_rdesc_fixup for b in self.hid_bpfs)
+
+ for hid_bpf in self.hid_bpfs:
+ # We need to start `udev-hid-bpf` in the background
+ # and dispatch uhid events in case the kernel needs
+ # to fetch features on the device
+ process = subprocess.Popen(
+ [
+ "udev-hid-bpf",
+ "--verbose",
+ "add",
+ str(self.uhdev.sys_path),
+ str(bpf_dir / hid_bpf.object_name),
+ ],
+ )
+ while process.poll() is None:
+ self.uhdev.dispatch(1)
+
+ if process.returncode != 0:
+ pytest.fail(
+ f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed"
+ )
+
+ if wait:
+ # the HID-BPF program exports a rdesc fixup, so it needs to be
+ # unbound by the kernel and then rebound.
+ # Ensure we get the bound event exactly 2 times (one for the normal
+ # uhid loading, and then the reload from HID-BPF)
+ now = time.time()
+ while self.uhdev.kernel_ready_count < 2 and time.time() - now < 2:
+ self.uhdev.dispatch(1)
+
+ if self.uhdev.kernel_ready_count < 2:
+ pytest.fail(
+ f"Couldn't insert hid-bpf programs, marking the test as failed"
+ )
+
+ def unload_hid_bpfs(self):
+ ret = subprocess.run(
+ ["udev-hid-bpf", "--verbose", "remove", str(self.uhdev.sys_path)],
+ )
+ if ret.returncode != 0:
+ pytest.fail(
+ f"Couldn't unload hid-bpf programs, marking the test as failed"
+ )
+
+ @pytest.fixture()
+ def new_uhdev(self, load_kernel_module):
+ return self.create_device()
+
+ def assertName(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert uhdev.name in evdev.name
+
+ @pytest.fixture(autouse=True)
+ def context(self, new_uhdev, request):
+ try:
+ with HIDTestUdevRule.instance():
+ with new_uhdev as self.uhdev:
+ for skip_cond in request.node.iter_markers("skip_if_uhdev"):
+ test, message, *rest = skip_cond.args
+
+ if test(self.uhdev):
+ pytest.skip(message)
+
+ self.uhdev.create_kernel_device()
+ now = time.time()
+ while not self.uhdev.is_ready() and time.time() - now < 5:
+ self.uhdev.dispatch(1)
+
+ if self.hid_bpfs:
+ self.load_hid_bpfs()
+
+ if self.uhdev.get_evdev() is None:
+ logger.warning(
+ f"available list of input nodes: (default application is '{self.uhdev.application}')"
+ )
+ logger.warning(self.uhdev.input_nodes)
+ yield
+ if self.hid_bpfs:
+ self.unload_hid_bpfs()
+ self.uhdev = None
+ except PermissionError:
+ pytest.skip("Insufficient permissions, run me as root")
+
+ @pytest.fixture(autouse=True)
+ def check_taint(self):
+ # we are abusing SysfsFile here, it's in /proc, but meh
+ taint_file = SysfsFile("/proc/sys/kernel/tainted")
+ taint = taint_file.int_value
+
+ yield
+
+ assert taint_file.int_value == taint
+
+ def test_creation(self):
+ """Make sure the device gets processed by the kernel and creates
+ the expected application input node.
+
+ If this fail, there is something wrong in the device report
+ descriptors."""
+ uhdev = self.uhdev
+ assert uhdev is not None
+ assert uhdev.get_evdev() is not None
+ self.assertName(uhdev)
+ assert len(uhdev.next_sync_events()) == 0
+ assert uhdev.get_evdev() is not None
+
+
+class HIDTestUdevRule(object):
+ _instance = None
+ """
+ A context-manager compatible class that sets up our udev rules file and
+ deletes it on context exit.
+
+ This class is tailored to our test setup: it only sets up the udev rule
+ on the **second** context and it cleans it up again on the last context
+ removed. This matches the expected pytest setup: we enter a context for
+ the session once, then once for each test (the first of which will
+ trigger the udev rule) and once the last test exited and the session
+ exited, we clean up after ourselves.
+ """
+
+ def __init__(self):
+ self.refs = 0
+ self.rulesfile = None
+
+ def __enter__(self):
+ self.refs += 1
+ if self.refs == 2 and self.rulesfile is None:
+ self.create_udev_rule()
+ self.reload_udev_rules()
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.refs -= 1
+ if self.refs == 0 and self.rulesfile:
+ os.remove(self.rulesfile.name)
+ self.reload_udev_rules()
+
+ def reload_udev_rules(self):
+ subprocess.run("udevadm control --reload-rules".split())
+ subprocess.run("systemd-hwdb update".split())
+
+ def create_udev_rule(self):
+ import tempfile
+
+ os.makedirs("/run/udev/rules.d", exist_ok=True)
+ with tempfile.NamedTemporaryFile(
+ prefix="91-uhid-test-device-REMOVEME-",
+ suffix=".rules",
+ mode="w+",
+ dir="/run/udev/rules.d",
+ delete=False,
+ ) as f:
+ f.write(
+ """
+KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"
+KERNELS=="*hid*", ENV{HID_NAME}=="*uhid test *", ENV{HID_BPF_IGNORE_DEVICE}="1"
+KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"
+"""
+ )
+ self.rulesfile = f
+
+ @classmethod
+ def instance(cls):
+ if not cls._instance:
+ cls._instance = HIDTestUdevRule()
+ return cls._instance
diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py
new file mode 100644
index 000000000000..59465c58d94d
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_device.py
@@ -0,0 +1,448 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import dataclasses
+import fcntl
+import functools
+import libevdev
+import os
+import threading
+
+try:
+ import pyudev
+except ImportError:
+ raise ImportError("UHID is not supported due to missing pyudev dependency")
+
+import logging
+
+import hidtools.hid as hid
+from hidtools.uhid import UHIDDevice
+from hidtools.util import BusType
+
+from pathlib import Path
+from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, Union
+
+logger = logging.getLogger("hidtools.device.base_device")
+
+
+class SysfsFile(object):
+ def __init__(self, path):
+ self.path = path
+
+ def __set_value(self, value):
+ with open(self.path, "w") as f:
+ return f.write(f"{value}\n")
+
+ def __get_value(self):
+ with open(self.path) as f:
+ return f.read().strip()
+
+ @property
+ def int_value(self) -> int:
+ return int(self.__get_value())
+
+ @int_value.setter
+ def int_value(self, v: int) -> None:
+ self.__set_value(v)
+
+ @property
+ def str_value(self) -> str:
+ return self.__get_value()
+
+ @str_value.setter
+ def str_value(self, v: str) -> None:
+ self.__set_value(v)
+
+
+class LED(object):
+ def __init__(self, sys_path):
+ self.max_brightness = SysfsFile(sys_path / "max_brightness").int_value
+ self.__brightness = SysfsFile(sys_path / "brightness")
+
+ @property
+ def brightness(self) -> int:
+ return self.__brightness.int_value
+
+ @brightness.setter
+ def brightness(self, value: int) -> None:
+ self.__brightness.int_value = value
+
+
+class PowerSupply(object):
+ """Represents Linux power_supply_class sysfs nodes."""
+
+ def __init__(self, sys_path):
+ self._capacity = SysfsFile(sys_path / "capacity")
+ self._status = SysfsFile(sys_path / "status")
+ self._type = SysfsFile(sys_path / "type")
+
+ @property
+ def capacity(self) -> int:
+ return self._capacity.int_value
+
+ @property
+ def status(self) -> str:
+ return self._status.str_value
+
+ @property
+ def type(self) -> str:
+ return self._type.str_value
+
+
+@dataclasses.dataclass
+class HidReadiness:
+ is_ready: bool = False
+ count: int = 0
+
+
+class HIDIsReady(object):
+ """
+ Companion class that binds to a kernel mechanism
+ and that allows to know when a uhid device is ready or not.
+
+ See :meth:`is_ready` for details.
+ """
+
+ def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None:
+ self.uhid = uhid
+
+ def is_ready(self: "HIDIsReady") -> HidReadiness:
+ """
+ Overwrite in subclasses: should return True or False whether
+ the attached uhid device is ready or not.
+ """
+ return HidReadiness()
+
+
+class UdevHIDIsReady(HIDIsReady):
+ _pyudev_context: ClassVar[Optional[pyudev.Context]] = None
+ _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None
+ _uhid_devices: ClassVar[Dict[int, HidReadiness]] = {}
+
+ def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None:
+ super().__init__(uhid)
+ self._init_pyudev()
+
+ @classmethod
+ def _init_pyudev(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_context is None:
+ cls._pyudev_context = pyudev.Context()
+ cls._pyudev_monitor = pyudev.Monitor.from_netlink(cls._pyudev_context)
+ cls._pyudev_monitor.filter_by("hid")
+ cls._pyudev_monitor.start()
+
+ UHIDDevice._append_fd_to_poll(
+ cls._pyudev_monitor.fileno(), cls._cls_udev_event_callback
+ )
+
+ @classmethod
+ def _cls_udev_event_callback(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_monitor is None:
+ return
+ event: pyudev.Device
+ for event in iter(functools.partial(cls._pyudev_monitor.poll, 0.02), None):
+ if event.action not in ["bind", "remove", "unbind"]:
+ return
+
+ logger.debug(f"udev event: {event.action} -> {event}")
+
+ id = int(event.sys_path.strip().split(".")[-1], 16)
+
+ readiness = cls._uhid_devices.setdefault(id, HidReadiness())
+
+ ready = event.action == "bind"
+ if not readiness.is_ready and ready:
+ readiness.count += 1
+
+ readiness.is_ready = ready
+
+ def is_ready(self: "UdevHIDIsReady") -> HidReadiness:
+ try:
+ return self._uhid_devices[self.uhid.hid_id]
+ except KeyError:
+ return HidReadiness()
+
+
+class EvdevMatch(object):
+ def __init__(
+ self: "EvdevMatch",
+ *,
+ requires: List[Any] = [],
+ excludes: List[Any] = [],
+ req_properties: List[Any] = [],
+ excl_properties: List[Any] = [],
+ ) -> None:
+ self.requires = requires
+ self.excludes = excludes
+ self.req_properties = req_properties
+ self.excl_properties = excl_properties
+
+ def is_a_match(self: "EvdevMatch", evdev: libevdev.Device) -> bool:
+ for m in self.requires:
+ if not evdev.has(m):
+ return False
+ for m in self.excludes:
+ if evdev.has(m):
+ return False
+ for p in self.req_properties:
+ if not evdev.has_property(p):
+ return False
+ for p in self.excl_properties:
+ if evdev.has_property(p):
+ return False
+ return True
+
+
+class EvdevDevice(object):
+ """
+ Represents an Evdev node and its properties.
+ This is a stub for the libevdev devices, as they are relying on
+ uevent to get the data, saving us some ioctls to fetch the names
+ and properties.
+ """
+
+ def __init__(self: "EvdevDevice", sysfs: Path) -> None:
+ self.sysfs = sysfs
+ self.event_node: Any = None
+ self.libevdev: Optional[libevdev.Device] = None
+
+ self.uevents = {}
+ # all of the interesting properties are stored in the input uevent, so in the parent
+ # so convert the uevent file of the parent input node into a dict
+ with open(sysfs.parent / "uevent") as f:
+ for line in f.readlines():
+ key, value = line.strip().split("=")
+ self.uevents[key] = value.strip('"')
+
+ # we open all evdev nodes in order to not miss any event
+ self.open()
+
+ @property
+ def name(self: "EvdevDevice") -> str:
+ assert "NAME" in self.uevents
+
+ return self.uevents["NAME"]
+
+ @property
+ def evdev(self: "EvdevDevice") -> Path:
+ return Path("/dev/input") / self.sysfs.name
+
+ def matches_application(
+ self: "EvdevDevice", application: str, matches: Dict[str, EvdevMatch]
+ ) -> bool:
+ if self.libevdev is None:
+ return False
+
+ if application in matches:
+ return matches[application].is_a_match(self.libevdev)
+
+ logger.error(
+ f"application '{application}' is unknown, please update/fix hid-tools"
+ )
+ assert False # hid-tools likely needs an update
+
+ def open(self: "EvdevDevice") -> libevdev.Device:
+ self.event_node = open(self.evdev, "rb")
+ self.libevdev = libevdev.Device(self.event_node)
+
+ assert self.libevdev.fd is not None
+
+ fd = self.libevdev.fd.fileno()
+ flag = fcntl.fcntl(fd, fcntl.F_GETFD)
+ fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK)
+
+ return self.libevdev
+
+ def close(self: "EvdevDevice") -> None:
+ if self.libevdev is not None and self.libevdev.fd is not None:
+ self.libevdev.fd.close()
+ self.libevdev = None
+ if self.event_node is not None:
+ self.event_node.close()
+ self.event_node = None
+
+
+class BaseDevice(UHIDDevice):
+ # default _application_matches that matches nothing. This needs
+ # to be set in the subclasses to have get_evdev() working
+ _application_matches: Dict[str, EvdevMatch] = {}
+
+ def __init__(
+ self,
+ name,
+ application,
+ rdesc_str: Optional[str] = None,
+ rdesc: Optional[Union[hid.ReportDescriptor, str, bytes]] = None,
+ input_info=None,
+ ) -> None:
+ self._kernel_is_ready: HIDIsReady = UdevHIDIsReady(self)
+ if rdesc_str is None and rdesc is None:
+ raise Exception("Please provide at least a rdesc or rdesc_str")
+ super().__init__()
+ if name is None:
+ name = f"uhid gamepad test {self.__class__.__name__}"
+ if input_info is None:
+ input_info = (BusType.USB, 1, 2)
+ self.name = name
+ self.info = input_info
+ self.default_reportID = None
+ self.opened = False
+ self.started = False
+ self.application = application
+ self._input_nodes: Optional[list[EvdevDevice]] = None
+ if rdesc is None:
+ assert rdesc_str is not None
+ self.rdesc = hid.ReportDescriptor.from_human_descr(rdesc_str) # type: ignore
+ else:
+ self.rdesc = rdesc # type: ignore
+
+ @property
+ def power_supply_class(self: "BaseDevice") -> Optional[PowerSupply]:
+ ps = self.walk_sysfs("power_supply", "power_supply/*")
+ if ps is None or len(ps) < 1:
+ return None
+
+ return PowerSupply(ps[0])
+
+ @property
+ def led_classes(self: "BaseDevice") -> List[LED]:
+ leds = self.walk_sysfs("led", "**/max_brightness")
+ if leds is None:
+ return []
+
+ return [LED(led.parent) for led in leds]
+
+ @property
+ def kernel_is_ready(self: "BaseDevice") -> bool:
+ return self._kernel_is_ready.is_ready().is_ready and self.started
+
+ @property
+ def kernel_ready_count(self: "BaseDevice") -> int:
+ return self._kernel_is_ready.is_ready().count
+
+ @property
+ def input_nodes(self: "BaseDevice") -> List[EvdevDevice]:
+ if self._input_nodes is not None:
+ return self._input_nodes
+
+ if not self.kernel_is_ready or not self.started:
+ return []
+
+ # Starting with kernel v6.16, an event is emitted when
+ # userspace opens a kernel device, and for some devices
+ # this translates into a SET_REPORT.
+ # Because EvdevDevice(path) opens every single evdev node
+ # we need to have a separate thread to process the incoming
+ # SET_REPORT or we end up having to wait for the kernel
+ # timeout of 5 seconds.
+ done = False
+
+ def dispatch():
+ while not done:
+ self.dispatch(1)
+
+ t = threading.Thread(target=dispatch)
+ t.start()
+
+ self._input_nodes = [
+ EvdevDevice(path)
+ for path in self.walk_sysfs("input", "input/input*/event*")
+ ]
+ done = True
+ t.join()
+ return self._input_nodes
+
+ def match_evdev_rule(self, application, evdev):
+ """Replace this in subclasses if the device has multiple reports
+ of the same type and we need to filter based on the actual evdev
+ node.
+
+ returning True will append the corresponding report to
+ `self.input_nodes[type]`
+ returning False will ignore this report / type combination
+ for the device.
+ """
+ return True
+
+ def open(self):
+ self.opened = True
+
+ def _close_all_opened_evdev(self):
+ if self._input_nodes is not None:
+ for e in self._input_nodes:
+ e.close()
+
+ def __del__(self):
+ self._close_all_opened_evdev()
+
+ def close(self):
+ self.opened = False
+
+ def start(self, flags):
+ self.started = True
+
+ def stop(self):
+ self.started = False
+ self._close_all_opened_evdev()
+
+ def next_sync_events(self, application=None):
+ evdev = self.get_evdev(application)
+ if evdev is not None:
+ return list(evdev.events())
+ return []
+
+ @property
+ def application_matches(self: "BaseDevice") -> Dict[str, EvdevMatch]:
+ return self._application_matches
+
+ @application_matches.setter
+ def application_matches(self: "BaseDevice", data: Dict[str, EvdevMatch]) -> None:
+ self._application_matches = data
+
+ def get_evdev(self, application=None):
+ if application is None:
+ application = self.application
+
+ if len(self.input_nodes) == 0:
+ return None
+
+ assert self._input_nodes is not None
+
+ if len(self._input_nodes) == 1:
+ evdev = self._input_nodes[0]
+ if self.match_evdev_rule(application, evdev.libevdev):
+ return evdev.libevdev
+ else:
+ for _evdev in self._input_nodes:
+ if _evdev.matches_application(application, self.application_matches):
+ if self.match_evdev_rule(application, _evdev.libevdev):
+ return _evdev.libevdev
+
+ def is_ready(self):
+ """Returns whether a UHID device is ready. Can be overwritten in
+ subclasses to add extra conditions on when to consider a UHID
+ device ready. This can be:
+
+ - we need to wait on different types of input devices to be ready
+ (Touch Screen and Pen for example)
+ - we need to have at least 4 LEDs present
+ (len(self.uhdev.leds_classes) == 4)
+ - or any other combinations"""
+ return self.kernel_is_ready
diff --git a/tools/testing/selftests/hid/tests/base_gamepad.py b/tools/testing/selftests/hid/tests/base_gamepad.py
new file mode 100644
index 000000000000..ec74d75767a2
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_gamepad.py
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: GPL-2.0
+import libevdev
+
+from .base_device import BaseDevice
+from hidtools.util import BusType
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class GamepadData(object):
+ pass
+
+
+class AxisMapping(object):
+ """Represents a mapping between a HID type
+ and an evdev event"""
+
+ def __init__(self, hid, evdev=None):
+ self.hid = hid.lower()
+
+ if evdev is None:
+ evdev = f"ABS_{hid.upper()}"
+
+ self.evdev = libevdev.evbit("EV_ABS", evdev)
+
+
+class BaseGamepad(BaseDevice):
+ buttons_map = {
+ 1: "BTN_SOUTH",
+ 2: "BTN_EAST",
+ 3: "BTN_C",
+ 4: "BTN_NORTH",
+ 5: "BTN_WEST",
+ 6: "BTN_Z",
+ 7: "BTN_TL",
+ 8: "BTN_TR",
+ 9: "BTN_TL2",
+ 10: "BTN_TR2",
+ 11: "BTN_SELECT",
+ 12: "BTN_START",
+ 13: "BTN_MODE",
+ 14: "BTN_THUMBL",
+ 15: "BTN_THUMBR",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Game Pad", name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, application, input_info=input_info, rdesc=rdesc)
+ self.buttons = (1, 2, 3)
+ self._buttons = {}
+ self.left = (127, 127)
+ self.right = (127, 127)
+ self.hat_switch = 15
+ assert self.parsed_rdesc is not None
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ self.fields.extend([f.usage_name for f in r])
+
+ def store_axes(self, which, gamepad, data):
+ amap = self.axes_map[which]
+ x, y = data
+ setattr(gamepad, amap["x"].hid, x)
+ setattr(gamepad, amap["y"].hid, y)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application="Game Pad",
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ (expressed in 1/8 of circle, 0 being North, 2 East)
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application used to report the values
+ """
+ if buttons is not None:
+ for i, b in buttons.items():
+ if i not in self.buttons:
+ raise InvalidHIDCommunication(
+ f"button {i} is not part of this {self.application}"
+ )
+ if b is not None:
+ self._buttons[i] = b
+
+ def replace_none_in_tuple(item, default):
+ if item is None:
+ item = (None, None)
+
+ if None in item:
+ if item[0] is None:
+ item = (default[0], item[1])
+ if item[1] is None:
+ item = (item[0], default[1])
+
+ return item
+
+ right = replace_none_in_tuple(right, self.right)
+ self.right = right
+ left = replace_none_in_tuple(left, self.left)
+ self.left = left
+
+ if hat_switch is None:
+ hat_switch = self.hat_switch
+ else:
+ self.hat_switch = hat_switch
+
+ reportID = reportID or self.default_reportID
+
+ gamepad = GamepadData()
+ for i, b in self._buttons.items():
+ gamepad.__setattr__(f"b{i}", int(b) if b is not None else 0)
+
+ self.store_axes("left_stick", gamepad, left)
+ self.store_axes("right_stick", gamepad, right)
+ gamepad.hatswitch = hat_switch # type: ignore ### gamepad is by default empty
+ return super().create_report(
+ gamepad, reportID=reportID, application=application
+ )
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ """
+ Send an input event on the default report ID.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ """
+ r = self.create_report(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+ self.call_input_event(r)
+ return [r]
+
+
+class JoystickGamepad(BaseGamepad):
+ buttons_map = {
+ 1: "BTN_TRIGGER",
+ 2: "BTN_THUMB",
+ 3: "BTN_THUMB2",
+ 4: "BTN_TOP",
+ 5: "BTN_TOP2",
+ 6: "BTN_PINKIE",
+ 7: "BTN_BASE",
+ 8: "BTN_BASE2",
+ 9: "BTN_BASE3",
+ 10: "BTN_BASE4",
+ 11: "BTN_BASE5",
+ 12: "BTN_BASE6",
+ 13: "BTN_DEAD",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("rudder"),
+ "y": AxisMapping("throttle"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Joystick", name=None, input_info=None):
+ super().__init__(rdesc, application, name, input_info)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application=None,
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application for this report, if needed
+ """
+ if application is None:
+ application = "Joystick"
+ return super().create_report(
+ left=left,
+ right=right,
+ hat_switch=hat_switch,
+ buttons=buttons,
+ reportID=reportID,
+ application=application,
+ )
+
+ def store_right_joystick(self, gamepad, data):
+ gamepad.rudder, gamepad.throttle = data
diff --git a/tools/testing/selftests/hid/tests/conftest.py b/tools/testing/selftests/hid/tests/conftest.py
new file mode 100644
index 000000000000..1361ec981db6
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/conftest.py
@@ -0,0 +1,81 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+
+import platform
+import pytest
+import re
+import resource
+import subprocess
+from .base import HIDTestUdevRule
+from pathlib import Path
+
+
+# See the comment in HIDTestUdevRule, this doesn't set up but it will clean
+# up once the last test exited.
+@pytest.fixture(autouse=True, scope="session")
+def udev_rules_session_setup():
+ with HIDTestUdevRule.instance():
+ yield
+
+
+@pytest.fixture(autouse=True, scope="session")
+def setup_rlimit():
+ resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
+
+
+@pytest.fixture(autouse=True, scope="session")
+def start_udevd(pytestconfig):
+ if pytestconfig.getoption("udevd"):
+ import subprocess
+
+ with subprocess.Popen("/usr/lib/systemd/systemd-udevd") as proc:
+ yield
+ proc.kill()
+ else:
+ yield
+
+
+def pytest_configure(config):
+ config.addinivalue_line(
+ "markers",
+ "skip_if_uhdev(condition, message): mark test to skip if the condition on the uhdev device is met",
+ )
+
+
+# Generate the list of modules and modaliases
+# for the tests that need to be parametrized with those
+def pytest_generate_tests(metafunc):
+ if "usbVidPid" in metafunc.fixturenames:
+ modules = (
+ Path("/lib/modules/")
+ / platform.uname().release
+ / "kernel"
+ / "drivers"
+ / "hid"
+ )
+
+ modalias_re = re.compile(r"alias:\s+hid:b0003g.*v([0-9a-fA-F]+)p([0-9a-fA-F]+)")
+
+ params = []
+ ids = []
+ for module in modules.glob("*.ko"):
+ p = subprocess.run(
+ ["modinfo", module], capture_output=True, check=True, encoding="utf-8"
+ )
+ for line in p.stdout.split("\n"):
+ m = modalias_re.match(line)
+ if m is not None:
+ vid, pid = m.groups()
+ vid = int(vid, 16)
+ pid = int(pid, 16)
+ params.append([module.name.replace(".ko", ""), vid, pid])
+ ids.append(f"{module.name} {vid:04x}:{pid:04x}")
+ metafunc.parametrize("usbVidPid", params, ids=ids)
+
+
+def pytest_addoption(parser):
+ parser.addoption("--udevd", action="store_true", default=False)
diff --git a/tools/testing/selftests/hid/tests/descriptors_wacom.py b/tools/testing/selftests/hid/tests/descriptors_wacom.py
new file mode 100644
index 000000000000..91c16e005c12
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/descriptors_wacom.py
@@ -0,0 +1,1360 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# fmt: off
+wacom_pth660_v145 = [
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x02, # . Usage (Mouse),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x01, # . Usage (Pointer),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x05, 0x09, # . Usage Page (Button),
+ 0x19, 0x01, # . Usage Minimum (01h),
+ 0x29, 0x03, # . Usage Maximum (03h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x05, # . Report Count (5),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x09, 0x31, # . Usage (Y),
+ 0x15, 0x81, # . Logical Minimum (-127),
+ 0x25, 0x7F, # . Logical Maximum (127),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x06, # . Input (Variable, Relative),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x47, 0x80, 0x57, 0x00, 0x00, # . Physical Maximum (22400),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x27, 0x00, 0xAF, 0x00, 0x00, # . Logical Maximum (44800),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0xD0, 0x39, 0x00, 0x00, # . Physical Maximum (14800),
+ 0x27, 0xA0, 0x73, 0x00, 0x00, # . Logical Maximum (29600),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047), # !!! Errata: Missing Physical Min/Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x17, 0x09, # . Usage Maximum (0917h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x08, # . Report Count (8),
+ 0x81, 0x02, # . Input (Variable),
+ 0x1A, 0x40, 0x09, # . Usage Minimum (0940h),
+ 0x2A, 0x47, 0x09, # . Usage Maximum (0947h),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x25, 0x01, # . Logical Maximum (1), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x85, 0x02, # . Report ID (2),
+ 0x09, 0x01, # . Usage (01h),
+ 0x75, 0x08, # . Report Size (8),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h), # !!! Errata: Missing Non-zero Physical Max
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x05, # . Report Count (5),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x32, # . Report ID (50),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x0A, 0x32, 0x10, # . Usage (1032h),
+ 0x25, 0x03, # . Logical Maximum (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x34, # . Report ID (52),
+ 0x0A, 0x34, 0x10, # . Usage (1034h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x36, # . Report ID (54),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x96, 0x01, 0x01, # . Report Count (257),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0xBF, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x64, # . Report ID (100),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0C, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x12, # . Report ID (18),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x40, # . Report ID (64),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x41, # . Report ID (65),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x42, # . Report ID (66),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x43, # . Report ID (67),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD0, # . Report ID (208),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
+
+# Report ID (20), Usage (1014h), Report Count (13) -> 15
+wacom_pth660_v150 = wacom_pth660_v145.copy()
+wacom_pth660_v150[0x2CB] = 0x0F
+
+# fmt: off
+wacom_pth860_v145 = [
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x02, # . Usage (Mouse),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x01, # . Usage (Pointer),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x05, 0x09, # . Usage Page (Button),
+ 0x19, 0x01, # . Usage Minimum (01h),
+ 0x29, 0x03, # . Usage Maximum (03h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x05, # . Report Count (5),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x09, 0x31, # . Usage (Y),
+ 0x15, 0x80, # . Logical Minimum (-128),
+ 0x25, 0x7F, # . Logical Maximum (127),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x06, # . Input (Variable, Relative),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x47, 0x7C, 0x79, 0x00, 0x00, # . Physical Maximum (31100),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x27, 0xF8, 0xF2, 0x00, 0x00, # . Logical Maximum (62200),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0x60, 0x54, 0x00, 0x00, # . Physical Maximum (21600),
+ 0x27, 0xC0, 0xA8, 0x00, 0x00, # . Logical Maximum (43200),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h), # !!! Errata: Missing Physical Max = 0
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191),
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047), # !!! Errata: Missing Physical Min/Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x16, 0x00, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x17, 0x09, # . Usage Maximum (0917h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x08, # . Report Count (8),
+ 0x81, 0x02, # . Input (Variable),
+ 0x1A, 0x40, 0x09, # . Usage Minimum (0940h),
+ 0x2A, 0x47, 0x09, # . Usage Maximum (0947h),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x25, 0x01, # . Logical Maximum (1), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x85, 0x02, # . Report ID (2),
+ 0x09, 0x01, # . Usage (01h),
+ 0x75, 0x08, # . Report Size (8),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h), # !!! Errata: Missing Non-zero Physical Max
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x05, # . Report Count (5),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x32, # . Report ID (50),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x0A, 0x32, 0x10, # . Usage (1032h),
+ 0x25, 0x03, # . Logical Maximum (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x34, # . Report ID (52),
+ 0x0A, 0x34, 0x10, # . Usage (1034h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x36, # . Report ID (54),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x96, 0x01, 0x01, # . Report Count (257),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0xBF, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x64, # . Report ID (100),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0C, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x12, # . Report ID (18),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x40, # . Report ID (64),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x41, # . Report ID (65),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x42, # . Report ID (66),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x43, # . Report ID (67),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD0, # . Report ID (208),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
+
+# Report ID (20), Usage (1014h), Report Count (13) -> 15
+wacom_pth860_v150 = wacom_pth860_v145.copy()
+wacom_pth860_v150[0x2CA] = 0x0F
+
+# fmt: off
+wacom_pth460_v105 = [
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x47, 0x58, 0x3E, 0x00, 0x00, # . Physical Maximum (15960),
+ 0x27, 0xB0, 0x7C, 0x00, 0x00, # . Logical Maximum (31920),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0xF7, 0x26, 0x00, 0x00, # . Physical Maximum (9975),
+ 0x27, 0xEE, 0x4D, 0x00, 0x00, # . Logical Maximum (19950),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x15, 0x09, # . Usage Maximum (0915h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x4A, # . Input (Variable, Wrap, Null State),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x41, 0x04, # . Usage (0441h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x07, # . Logical Maximum (7),
+ 0x75, 0x03, # . Report Size (3),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3C, 0x04, # . Usage (043Ch),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0xFB, # . Logical Minimum (-5),
+ 0x25, 0x32, # . Logical Maximum (50),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3D, 0x04, # . Usage (043Dh),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x85, 0x02, # . Report ID (2),
+ 0x0A, 0x02, 0x10, # . Usage (1002h),
+ 0x15, 0x02, # . Logical Minimum (2),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x09, 0x00, # . Usage (00h),
+ 0x25, 0x00, # . Logical Maximum (0),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0xC8, 0x00, # . Physical Maximum (200),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0x90, 0x01, # . Logical Maximum (400),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x03, # . Report Count (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x96, 0xBF, 0x00, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x65, # . Report ID (101),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x66, # . Report ID (102),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x67, # . Report ID (103),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x68, # . Report ID (104),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x11, # . Report Count (17),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x6F, # . Report ID (111),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCD, # . Report ID (205),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCB, # . Report ID (203),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x1F, 0x00, # . Report Count (31),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
new file mode 100644
index 000000000000..0e17588b945c
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
@@ -0,0 +1,441 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2019 Red Hat, Inc.
+#
+
+from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
+from hidtools.util import BusType
+from . import base
+
+import libevdev
+import logging
+
+logger = logging.getLogger("hidtools.test.apple-keyboard")
+
+KERNEL_MODULE = base.KernelModule("apple", "hid-apple")
+
+
+class KbdData(object):
+ pass
+
+
+class AppleKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x05, # .Report Count (5)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x05, # .Usage Maximum (5)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x75, 0x03, # .Report Size (3)
+ 0x95, 0x01, # .Report Count (1)
+ 0x91, 0x01, # .Output (Cnst,Arr,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x06, # .Report Count (6)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xff, 0x00, # .Logical Maximum (255)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x2a, 0xff, 0x00, # .Usage Maximum (255)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x47, # .Report ID (71)
+ 0x05, 0x01, # .Usage Page (Generic Desktop)
+ 0x09, 0x06, # .Usage (Keyboard)
+ 0xa1, 0x02, # .Collection (Logical)
+ 0x05, 0x06, # ..Usage Page (Generic Device Controls)
+ 0x09, 0x20, # ..Usage (Battery Strength)
+ 0x15, 0x00, # ..Logical Minimum (0)
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255)
+ 0x75, 0x08, # ..Report Size (8)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x81, 0x02, # ..Input (Data,Var,Abs)
+ 0xc0, # .End Collection
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x11, # .Report ID (17)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x05, 0x0c, # .Usage Page (Consumer Devices)
+ 0x09, 0xb8, # .Usage (Eject)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x06, 0xff, 0x00, # .Usage Page (Vendor Usage Page 0xff)
+ 0x09, 0x03, # .Usage (Vendor Usage 0x03)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x05, 0x0c, # .Usage Page (Consumer Devices)
+ 0x85, 0x12, # .Report ID (18)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x09, 0xcd, # .Usage (Play/Pause)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb3, # .Usage (Fast Forward)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb4, # .Usage (Rewind)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb5, # .Usage (Scan Next Track)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb6, # .Usage (Scan Previous Track)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x85, 0x13, # .Report ID (19)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x06, 0x01, 0xff, # .Usage Page (Vendor Usage Page 0xff01)
+ 0x09, 0x0a, # .Usage (Vendor Usage 0x0a)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x06, 0x01, 0xff, # .Usage Page (Vendor Usage Page 0xff01)
+ 0x09, 0x0c, # .Usage (Vendor Usage 0x0c)
+ 0x81, 0x22, # .Input (Data,Var,Abs,NoPref)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x06, # .Report Count (6)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x85, 0x09, # .Report ID (9)
+ 0x09, 0x0b, # .Usage (Vendor Usage 0x0b)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0xb1, 0x02, # .Feature (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x02, # .Report Count (2)
+ 0xb1, 0x01, # .Feature (Cnst,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(
+ self,
+ rdesc=report_descriptor,
+ name="Apple Wireless Keyboard",
+ input_info=(BusType.BLUETOOTH, 0x05AC, 0x0256),
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 1
+
+ def send_fn_state(self, state):
+ data = KbdData()
+ setattr(data, "0xff0003", state)
+ r = self.create_report(data, reportID=17)
+ self.call_input_event(r)
+ return [r]
+
+
+class TestAppleKeyboard(TestArrayKeyboard):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ return AppleKeyboard()
+
+ def test_single_function_key(self):
+ """check for function key reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["F4"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+
+ def test_single_fn_function_key(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ def test_single_fn_function_key_release_first(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+
+ def test_single_fn_function_key_inverted(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["F4"])
+ r.extend(uhdev.send_fn_state(1))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ def test_multiple_fn_function_key_release_first(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ r.extend(uhdev.event(["F4", "F6"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.event(["F6"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ def test_multiple_fn_function_key_release_between(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ # press F4
+ r = uhdev.event(["F4"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ # press Fn key
+ r = uhdev.send_fn_state(1)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # keep F4 and press F6
+ r = uhdev.event(["F4", "F6"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # keep F4 and F6
+ r = uhdev.event(["F4", "F6"])
+ expected = []
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # release Fn key and all keys
+ r = uhdev.send_fn_state(0)
+ r.extend(uhdev.event([]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ def test_single_pageup_key_release_first(self):
+ """check for function key reliability with the [page] up key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["UpArrow"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_PAGEUP, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_PAGEUP, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
new file mode 100644
index 000000000000..612197805931
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -0,0 +1,665 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2019 Red Hat, Inc.
+#
+
+from . import base
+import libevdev
+import pytest
+
+from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping
+from hidtools.util import BusType
+from .base import HidBpf
+
+import logging
+
+logger = logging.getLogger("hidtools.test.gamepad")
+
+
+class BaseTest:
+ class TestGamepad(base.BaseTestCase.TestUhid):
+ @pytest.fixture(autouse=True)
+ def send_initial_state(self):
+ """send an empty report to initialize the axes"""
+ uhdev = self.uhdev
+
+ r = uhdev.event()
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ def assert_button(self, button):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ buttons = {}
+ key = libevdev.evbit(uhdev.buttons_map[button])
+
+ buttons[button] = True
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key] == 1
+
+ buttons[button] = False
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key] == 0
+
+ def test_buttons(self):
+ """check for button reliability."""
+ uhdev = self.uhdev
+
+ for b in uhdev.buttons:
+ self.assert_button(b)
+
+ def test_dual_buttons(self):
+ """check for button reliability when pressing 2 buttons"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ # can change intended b1 b2 values
+ b1 = uhdev.buttons[0]
+ key1 = libevdev.evbit(uhdev.buttons_map[b1])
+ b2 = uhdev.buttons[1]
+ key2 = libevdev.evbit(uhdev.buttons_map[b2])
+
+ buttons = {b1: True, b2: True}
+ r = uhdev.event(buttons=buttons)
+ expected_event0 = libevdev.InputEvent(key1, 1)
+ expected_event1 = libevdev.InputEvent(key2, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(
+ (syn_event, expected_event0, expected_event1), events
+ )
+ assert evdev.value[key1] == 1
+ assert evdev.value[key2] == 1
+
+ buttons = {b1: False, b2: None}
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key1, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key1] == 0
+ assert evdev.value[key2] == 1
+
+ buttons = {b1: None, b2: False}
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key2, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key1] == 0
+ assert evdev.value[key2] == 0
+
+ def _get_libevdev_abs_events(self, which):
+ """Returns which ABS_* evdev axes are expected for the given stick"""
+ abs_map = self.uhdev.axes_map[which]
+
+ x = abs_map["x"].evdev
+ y = abs_map["y"].evdev
+
+ assert x
+ assert y
+
+ return x, y
+
+ def _test_joystick_press(self, which, data):
+ uhdev = self.uhdev
+
+ libevdev_axes = self._get_libevdev_abs_events(which)
+
+ r = None
+ if which == "left_stick":
+ r = uhdev.event(left=data)
+ else:
+ r = uhdev.event(right=data)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ for i, d in enumerate(data):
+ if d is not None and d != 127:
+ assert libevdev.InputEvent(libevdev_axes[i], d) in events
+ else:
+ assert libevdev.InputEvent(libevdev_axes[i]) not in events
+
+ def test_left_joystick_press_left(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (63, None))
+ self._test_joystick_press("left_stick", (0, 127))
+
+ def test_left_joystick_press_right(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (191, 127))
+ self._test_joystick_press("left_stick", (255, None))
+
+ def test_left_joystick_press_up(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (None, 63))
+ self._test_joystick_press("left_stick", (127, 0))
+
+ def test_left_joystick_press_down(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (127, 191))
+ self._test_joystick_press("left_stick", (None, 255))
+
+ def test_right_joystick_press_left(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (63, None))
+ self._test_joystick_press("right_stick", (0, 127))
+
+ def test_right_joystick_press_right(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (191, 127))
+ self._test_joystick_press("right_stick", (255, None))
+
+ def test_right_joystick_press_up(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (None, 63))
+ self._test_joystick_press("right_stick", (127, 0))
+
+ def test_right_joystick_press_down(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (127, 191))
+ self._test_joystick_press("right_stick", (None, 255))
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Hat switch" not in uhdev.fields,
+ "Device not compatible, missing Hat switch usage",
+ )
+ @pytest.mark.parametrize(
+ "hat_value,expected_evdev,evdev_value",
+ [
+ (0, "ABS_HAT0Y", -1),
+ (2, "ABS_HAT0X", 1),
+ (4, "ABS_HAT0Y", 1),
+ (6, "ABS_HAT0X", -1),
+ ],
+ )
+ def test_hat_switch(self, hat_value, expected_evdev, evdev_value):
+ uhdev = self.uhdev
+
+ r = uhdev.event(hat_switch=hat_value)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert (
+ libevdev.InputEvent(
+ libevdev.evbit("EV_ABS", expected_evdev), evdev_value
+ )
+ in events
+ )
+
+
+class SaitekGamepad(JoystickGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x04, # Usage (Joystick) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x09, 0x01, # .Usage (Pointer) 6
+ 0xa1, 0x00, # .Collection (Physical) 8
+ 0x85, 0x01, # ..Report ID (1) 10
+ 0x09, 0x30, # ..Usage (X) 12
+ 0x15, 0x00, # ..Logical Minimum (0) 14
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 16
+ 0x35, 0x00, # ..Physical Minimum (0) 19
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 21
+ 0x75, 0x08, # ..Report Size (8) 24
+ 0x95, 0x01, # ..Report Count (1) 26
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 28
+ 0x09, 0x31, # ..Usage (Y) 30
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 32
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 34
+ 0x09, 0xba, # ..Usage (Rudder) 36
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 38
+ 0x09, 0xbb, # ..Usage (Throttle) 40
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 42
+ 0x05, 0x09, # ..Usage Page (Button) 44
+ 0x19, 0x01, # ..Usage Minimum (1) 46
+ 0x29, 0x0c, # ..Usage Maximum (12) 48
+ 0x25, 0x01, # ..Logical Maximum (1) 50
+ 0x45, 0x01, # ..Physical Maximum (1) 52
+ 0x75, 0x01, # ..Report Size (1) 54
+ 0x95, 0x0c, # ..Report Count (12) 56
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 58
+ 0x95, 0x01, # ..Report Count (1) 60
+ 0x75, 0x00, # ..Report Size (0) 62
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 64
+ 0x05, 0x01, # ..Usage Page (Generic Desktop) 66
+ 0x09, 0x39, # ..Usage (Hat switch) 68
+ 0x25, 0x07, # ..Logical Maximum (7) 70
+ 0x46, 0x3b, 0x01, # ..Physical Maximum (315) 72
+ 0x55, 0x00, # ..Unit Exponent (0) 75
+ 0x65, 0x44, # ..Unit (Degrees^4,EngRotation) 77
+ 0x75, 0x04, # ..Report Size (4) 79
+ 0x81, 0x42, # ..Input (Data,Var,Abs,Null) 81
+ 0x65, 0x00, # ..Unit (None) 83
+ 0xc0, # .End Collection 85
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 86
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 88
+ 0xa1, 0x02, # .Collection (Logical) 90
+ 0x85, 0x02, # ..Report ID (2) 92
+ 0x09, 0xa0, # ..Usage (Vendor Usage 0xa0) 94
+ 0x09, 0x9f, # ..Usage (Vendor Usage 0x9f) 96
+ 0x25, 0x01, # ..Logical Maximum (1) 98
+ 0x45, 0x00, # ..Physical Maximum (0) 100
+ 0x75, 0x01, # ..Report Size (1) 102
+ 0x95, 0x02, # ..Report Count (2) 104
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 106
+ 0x75, 0x06, # ..Report Size (6) 108
+ 0x95, 0x01, # ..Report Count (1) 110
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 112
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 114
+ 0x75, 0x07, # ..Report Size (7) 116
+ 0x25, 0x7f, # ..Logical Maximum (127) 118
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 120
+ 0x09, 0x94, # ..Usage (Vendor Usage 0x94) 122
+ 0x75, 0x01, # ..Report Size (1) 124
+ 0x25, 0x01, # ..Logical Maximum (1) 126
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 128
+ 0xc0, # .End Collection 130
+ 0x09, 0x21, # .Usage (Vendor Usage 0x21) 131
+ 0xa1, 0x02, # .Collection (Logical) 133
+ 0x85, 0x0b, # ..Report ID (11) 135
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 137
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 139
+ 0x75, 0x08, # ..Report Size (8) 142
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 144
+ 0x09, 0x53, # ..Usage (Vendor Usage 0x53) 146
+ 0x25, 0x0a, # ..Logical Maximum (10) 148
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 150
+ 0x09, 0x50, # ..Usage (Vendor Usage 0x50) 152
+ 0x27, 0xfe, 0xff, 0x00, 0x00, # ..Logical Maximum (65534) 154
+ 0x47, 0xfe, 0xff, 0x00, 0x00, # ..Physical Maximum (65534) 159
+ 0x75, 0x10, # ..Report Size (16) 164
+ 0x55, 0xfd, # ..Unit Exponent (237) 166
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 168
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 171
+ 0x55, 0x00, # ..Unit Exponent (0) 173
+ 0x65, 0x00, # ..Unit (None) 175
+ 0x09, 0x54, # ..Usage (Vendor Usage 0x54) 177
+ 0x55, 0xfd, # ..Unit Exponent (237) 179
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 181
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 184
+ 0x55, 0x00, # ..Unit Exponent (0) 186
+ 0x65, 0x00, # ..Unit (None) 188
+ 0x09, 0xa7, # ..Usage (Vendor Usage 0xa7) 190
+ 0x55, 0xfd, # ..Unit Exponent (237) 192
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 194
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 197
+ 0x55, 0x00, # ..Unit Exponent (0) 199
+ 0x65, 0x00, # ..Unit (None) 201
+ 0xc0, # .End Collection 203
+ 0x09, 0x5a, # .Usage (Vendor Usage 0x5a) 204
+ 0xa1, 0x02, # .Collection (Logical) 206
+ 0x85, 0x0c, # ..Report ID (12) 208
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 210
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 212
+ 0x45, 0x00, # ..Physical Maximum (0) 215
+ 0x75, 0x08, # ..Report Size (8) 217
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 219
+ 0x09, 0x5c, # ..Usage (Vendor Usage 0x5c) 221
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 223
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 226
+ 0x75, 0x10, # ..Report Size (16) 229
+ 0x55, 0xfd, # ..Unit Exponent (237) 231
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 233
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 236
+ 0x55, 0x00, # ..Unit Exponent (0) 238
+ 0x65, 0x00, # ..Unit (None) 240
+ 0x09, 0x5b, # ..Usage (Vendor Usage 0x5b) 242
+ 0x25, 0x7f, # ..Logical Maximum (127) 244
+ 0x75, 0x08, # ..Report Size (8) 246
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 248
+ 0x09, 0x5e, # ..Usage (Vendor Usage 0x5e) 250
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 252
+ 0x75, 0x10, # ..Report Size (16) 255
+ 0x55, 0xfd, # ..Unit Exponent (237) 257
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 259
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 262
+ 0x55, 0x00, # ..Unit Exponent (0) 264
+ 0x65, 0x00, # ..Unit (None) 266
+ 0x09, 0x5d, # ..Usage (Vendor Usage 0x5d) 268
+ 0x25, 0x7f, # ..Logical Maximum (127) 270
+ 0x75, 0x08, # ..Report Size (8) 272
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 274
+ 0xc0, # .End Collection 276
+ 0x09, 0x73, # .Usage (Vendor Usage 0x73) 277
+ 0xa1, 0x02, # .Collection (Logical) 279
+ 0x85, 0x0d, # ..Report ID (13) 281
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 283
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 285
+ 0x45, 0x00, # ..Physical Maximum (0) 288
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 290
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 292
+ 0x15, 0x81, # ..Logical Minimum (-127) 294
+ 0x25, 0x7f, # ..Logical Maximum (127) 296
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 298
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 301
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 304
+ 0xc0, # .End Collection 306
+ 0x09, 0x6e, # .Usage (Vendor Usage 0x6e) 307
+ 0xa1, 0x02, # .Collection (Logical) 309
+ 0x85, 0x0e, # ..Report ID (14) 311
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 313
+ 0x15, 0x00, # ..Logical Minimum (0) 315
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 317
+ 0x35, 0x00, # ..Physical Minimum (0) 320
+ 0x45, 0x00, # ..Physical Maximum (0) 322
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 324
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 326
+ 0x25, 0x7f, # ..Logical Maximum (127) 328
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 330
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 333
+ 0x09, 0x6f, # ..Usage (Vendor Usage 0x6f) 335
+ 0x15, 0x81, # ..Logical Minimum (-127) 337
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 339
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 342
+ 0x09, 0x71, # ..Usage (Vendor Usage 0x71) 344
+ 0x15, 0x00, # ..Logical Minimum (0) 346
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 348
+ 0x35, 0x00, # ..Physical Minimum (0) 351
+ 0x46, 0x68, 0x01, # ..Physical Maximum (360) 353
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 356
+ 0x09, 0x72, # ..Usage (Vendor Usage 0x72) 358
+ 0x75, 0x10, # ..Report Size (16) 360
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 362
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 365
+ 0x55, 0xfd, # ..Unit Exponent (237) 368
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 370
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 373
+ 0x55, 0x00, # ..Unit Exponent (0) 375
+ 0x65, 0x00, # ..Unit (None) 377
+ 0xc0, # .End Collection 379
+ 0x09, 0x77, # .Usage (Vendor Usage 0x77) 380
+ 0xa1, 0x02, # .Collection (Logical) 382
+ 0x85, 0x51, # ..Report ID (81) 384
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 386
+ 0x25, 0x7f, # ..Logical Maximum (127) 388
+ 0x45, 0x00, # ..Physical Maximum (0) 390
+ 0x75, 0x08, # ..Report Size (8) 392
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 394
+ 0x09, 0x78, # ..Usage (Vendor Usage 0x78) 396
+ 0xa1, 0x02, # ..Collection (Logical) 398
+ 0x09, 0x7b, # ...Usage (Vendor Usage 0x7b) 400
+ 0x09, 0x79, # ...Usage (Vendor Usage 0x79) 402
+ 0x09, 0x7a, # ...Usage (Vendor Usage 0x7a) 404
+ 0x15, 0x01, # ...Logical Minimum (1) 406
+ 0x25, 0x03, # ...Logical Maximum (3) 408
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 410
+ 0xc0, # ..End Collection 412
+ 0x09, 0x7c, # ..Usage (Vendor Usage 0x7c) 413
+ 0x15, 0x00, # ..Logical Minimum (0) 415
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 417
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 420
+ 0xc0, # .End Collection 422
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 423
+ 0xa1, 0x02, # .Collection (Logical) 425
+ 0x85, 0x52, # ..Report ID (82) 427
+ 0x09, 0x96, # ..Usage (Vendor Usage 0x96) 429
+ 0xa1, 0x02, # ..Collection (Logical) 431
+ 0x09, 0x9a, # ...Usage (Vendor Usage 0x9a) 433
+ 0x09, 0x99, # ...Usage (Vendor Usage 0x99) 435
+ 0x09, 0x97, # ...Usage (Vendor Usage 0x97) 437
+ 0x09, 0x98, # ...Usage (Vendor Usage 0x98) 439
+ 0x09, 0x9b, # ...Usage (Vendor Usage 0x9b) 441
+ 0x09, 0x9c, # ...Usage (Vendor Usage 0x9c) 443
+ 0x15, 0x01, # ...Logical Minimum (1) 445
+ 0x25, 0x06, # ...Logical Maximum (6) 447
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 449
+ 0xc0, # ..End Collection 451
+ 0xc0, # .End Collection 452
+ 0x05, 0xff, # .Usage Page (Vendor Usage Page 0xff) 453
+ 0x0a, 0x01, 0x03, # .Usage (Vendor Usage 0x301) 455
+ 0xa1, 0x02, # .Collection (Logical) 458
+ 0x85, 0x40, # ..Report ID (64) 460
+ 0x0a, 0x02, 0x03, # ..Usage (Vendor Usage 0x302) 462
+ 0xa1, 0x02, # ..Collection (Logical) 465
+ 0x1a, 0x11, 0x03, # ...Usage Minimum (785) 467
+ 0x2a, 0x20, 0x03, # ...Usage Maximum (800) 470
+ 0x25, 0x10, # ...Logical Maximum (16) 473
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 475
+ 0xc0, # ..End Collection 477
+ 0x0a, 0x03, 0x03, # ..Usage (Vendor Usage 0x303) 478
+ 0x15, 0x00, # ..Logical Minimum (0) 481
+ 0x27, 0xff, 0xff, 0x00, 0x00, # ..Logical Maximum (65535) 483
+ 0x75, 0x10, # ..Report Size (16) 488
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 490
+ 0xc0, # .End Collection 492
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 493
+ 0x09, 0x7d, # .Usage (Vendor Usage 0x7d) 495
+ 0xa1, 0x02, # .Collection (Logical) 497
+ 0x85, 0x43, # ..Report ID (67) 499
+ 0x09, 0x7e, # ..Usage (Vendor Usage 0x7e) 501
+ 0x26, 0x80, 0x00, # ..Logical Maximum (128) 503
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 506
+ 0x75, 0x08, # ..Report Size (8) 509
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 511
+ 0xc0, # .End Collection 513
+ 0x09, 0x7f, # .Usage (Vendor Usage 0x7f) 514
+ 0xa1, 0x02, # .Collection (Logical) 516
+ 0x85, 0x0b, # ..Report ID (11) 518
+ 0x09, 0x80, # ..Usage (Vendor Usage 0x80) 520
+ 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 522
+ 0x45, 0x00, # ..Physical Maximum (0) 525
+ 0x75, 0x0f, # ..Report Size (15) 527
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 529
+ 0x09, 0xa9, # ..Usage (Vendor Usage 0xa9) 531
+ 0x25, 0x01, # ..Logical Maximum (1) 533
+ 0x75, 0x01, # ..Report Size (1) 535
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 537
+ 0x09, 0x83, # ..Usage (Vendor Usage 0x83) 539
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 541
+ 0x75, 0x08, # ..Report Size (8) 544
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 546
+ 0xc0, # .End Collection 548
+ 0x09, 0xab, # .Usage (Vendor Usage 0xab) 549
+ 0xa1, 0x03, # .Collection (Report) 551
+ 0x85, 0x15, # ..Report ID (21) 553
+ 0x09, 0x25, # ..Usage (Vendor Usage 0x25) 555
+ 0xa1, 0x02, # ..Collection (Logical) 557
+ 0x09, 0x26, # ...Usage (Vendor Usage 0x26) 559
+ 0x09, 0x30, # ...Usage (Vendor Usage 0x30) 561
+ 0x09, 0x32, # ...Usage (Vendor Usage 0x32) 563
+ 0x09, 0x31, # ...Usage (Vendor Usage 0x31) 565
+ 0x09, 0x33, # ...Usage (Vendor Usage 0x33) 567
+ 0x09, 0x34, # ...Usage (Vendor Usage 0x34) 569
+ 0x15, 0x01, # ...Logical Minimum (1) 571
+ 0x25, 0x06, # ...Logical Maximum (6) 573
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 575
+ 0xc0, # ..End Collection 577
+ 0xc0, # .End Collection 578
+ 0x09, 0x89, # .Usage (Vendor Usage 0x89) 579
+ 0xa1, 0x03, # .Collection (Report) 581
+ 0x85, 0x16, # ..Report ID (22) 583
+ 0x09, 0x8b, # ..Usage (Vendor Usage 0x8b) 585
+ 0xa1, 0x02, # ..Collection (Logical) 587
+ 0x09, 0x8c, # ...Usage (Vendor Usage 0x8c) 589
+ 0x09, 0x8d, # ...Usage (Vendor Usage 0x8d) 591
+ 0x09, 0x8e, # ...Usage (Vendor Usage 0x8e) 593
+ 0x25, 0x03, # ...Logical Maximum (3) 595
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 597
+ 0xc0, # ..End Collection 599
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 600
+ 0x15, 0x00, # ..Logical Minimum (0) 602
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 604
+ 0xb1, 0x02, # ..Feature (Data,Var,Abs) 607
+ 0xc0, # .End Collection 609
+ 0x09, 0x90, # .Usage (Vendor Usage 0x90) 610
+ 0xa1, 0x03, # .Collection (Report) 612
+ 0x85, 0x50, # ..Report ID (80) 614
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 616
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 618
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 621
+ 0xc0, # .End Collection 623
+ 0xc0, # End Collection 624
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x06A3, 0xFF0D))
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+
+
+class AsusGamepad(BaseGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x05, # Usage (Game Pad) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x85, 0x01, # .Report ID (1) 6
+ 0x05, 0x09, # .Usage Page (Button) 8
+ 0x0a, 0x01, 0x00, # .Usage (Vendor Usage 0x01) 10
+ 0x0a, 0x02, 0x00, # .Usage (Vendor Usage 0x02) 13
+ 0x0a, 0x04, 0x00, # .Usage (Vendor Usage 0x04) 16
+ 0x0a, 0x05, 0x00, # .Usage (Vendor Usage 0x05) 19
+ 0x0a, 0x07, 0x00, # .Usage (Vendor Usage 0x07) 22
+ 0x0a, 0x08, 0x00, # .Usage (Vendor Usage 0x08) 25
+ 0x0a, 0x0e, 0x00, # .Usage (Vendor Usage 0x0e) 28
+ 0x0a, 0x0f, 0x00, # .Usage (Vendor Usage 0x0f) 31
+ 0x0a, 0x0d, 0x00, # .Usage (Vendor Usage 0x0d) 34
+ 0x05, 0x0c, # .Usage Page (Consumer Devices) 37
+ 0x0a, 0x24, 0x02, # .Usage (AC Back) 39
+ 0x0a, 0x23, 0x02, # .Usage (AC Home) 42
+ 0x15, 0x00, # .Logical Minimum (0) 45
+ 0x25, 0x01, # .Logical Maximum (1) 47
+ 0x75, 0x01, # .Report Size (1) 49
+ 0x95, 0x0b, # .Report Count (11) 51
+ 0x81, 0x02, # .Input (Data,Var,Abs) 53
+ 0x75, 0x01, # .Report Size (1) 55
+ 0x95, 0x01, # .Report Count (1) 57
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 59
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 61
+ 0x75, 0x04, # .Report Size (4) 63
+ 0x95, 0x01, # .Report Count (1) 65
+ 0x25, 0x07, # .Logical Maximum (7) 67
+ 0x46, 0x3b, 0x01, # .Physical Maximum (315) 69
+ 0x66, 0x14, 0x00, # .Unit (Degrees,EngRotation) 72
+ 0x09, 0x39, # .Usage (Hat switch) 75
+ 0x81, 0x42, # .Input (Data,Var,Abs,Null) 77
+ 0x66, 0x00, 0x00, # .Unit (None) 79
+ 0x09, 0x01, # .Usage (Pointer) 82
+ 0xa1, 0x00, # .Collection (Physical) 84
+ 0x09, 0x30, # ..Usage (X) 86
+ 0x09, 0x31, # ..Usage (Y) 88
+ 0x09, 0x32, # ..Usage (Z) 90
+ 0x09, 0x35, # ..Usage (Rz) 92
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 94
+ 0x09, 0xc5, # ..Usage (Brake) 96
+ 0x09, 0xc4, # ..Usage (Accelerator) 98
+ 0x15, 0x00, # ..Logical Minimum (0) 100
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 102
+ 0x35, 0x00, # ..Physical Minimum (0) 105
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 107
+ 0x75, 0x08, # ..Report Size (8) 110
+ 0x95, 0x06, # ..Report Count (6) 112
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 114
+ 0xc0, # .End Collection 116
+ 0x85, 0x02, # .Report ID (2) 117
+ 0x05, 0x08, # .Usage Page (LEDs) 119
+ 0x0a, 0x01, 0x00, # .Usage (Num Lock) 121
+ 0x0a, 0x02, 0x00, # .Usage (Caps Lock) 124
+ 0x0a, 0x03, 0x00, # .Usage (Scroll Lock) 127
+ 0x0a, 0x04, 0x00, # .Usage (Compose) 130
+ 0x15, 0x00, # .Logical Minimum (0) 133
+ 0x25, 0x01, # .Logical Maximum (1) 135
+ 0x75, 0x01, # .Report Size (1) 137
+ 0x95, 0x04, # .Report Count (4) 139
+ 0x91, 0x02, # .Output (Data,Var,Abs) 141
+ 0x75, 0x04, # .Report Size (4) 143
+ 0x95, 0x01, # .Report Count (1) 145
+ 0x91, 0x03, # .Output (Cnst,Var,Abs) 147
+ 0xc0, # End Collection 149
+ 0x05, 0x0c, # Usage Page (Consumer Devices) 150
+ 0x09, 0x01, # Usage (Consumer Control) 152
+ 0xa1, 0x01, # Collection (Application) 154
+ 0x85, 0x03, # .Report ID (3) 156
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 158
+ 0x09, 0x06, # .Usage (Keyboard) 160
+ 0xa1, 0x02, # .Collection (Logical) 162
+ 0x05, 0x06, # ..Usage Page (Generic Device Controls) 164
+ 0x09, 0x20, # ..Usage (Battery Strength) 166
+ 0x15, 0x00, # ..Logical Minimum (0) 168
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 170
+ 0x75, 0x08, # ..Report Size (8) 173
+ 0x95, 0x01, # ..Report Count (1) 175
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 177
+ 0x06, 0xbc, 0xff, # ..Usage Page (Vendor Usage Page 0xffbc) 179
+ 0x0a, 0xad, 0xbd, # ..Usage (Vendor Usage 0xbdad) 182
+ 0x75, 0x08, # ..Report Size (8) 185
+ 0x95, 0x06, # ..Report Count (6) 187
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 189
+ 0xc0, # .End Collection 191
+ 0xc0, # End Collection 192
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x18D1, 0x2C40))
+ self.buttons = (1, 2, 4, 5, 7, 8, 14, 15, 13)
+
+
+class RaptorMach2Joystick(JoystickGamepad):
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(
+ self,
+ name,
+ rdesc=None,
+ application="Joystick",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ ):
+ super().__init__(rdesc, application, name, input_info)
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+ self.hat_switch = 240 # null value is 240 as max is 239
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ if hat_switch is not None:
+ hat_switch *= 30
+
+ return super().event(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+
+
+class TestSaitekGamepad(BaseTest.TestGamepad):
+ def create_device(self):
+ return SaitekGamepad()
+
+
+class TestAsusGamepad(BaseTest.TestGamepad):
+ def create_device(self):
+ return AsusGamepad()
+
+
+class TestRaptorMach2Joystick(BaseTest.TestGamepad):
+ hid_bpfs = [HidBpf("FR-TEC__Raptor-Mach-2.bpf.o", True)]
+
+ def create_device(self):
+ return RaptorMach2Joystick(
+ "uhid test Sanmos Group FR-TEC Raptor MACH 2",
+ rdesc="05 01 09 04 a1 01 05 01 85 01 05 01 09 30 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 31 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 33 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 00 09 00 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 32 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 35 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 34 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 36 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 09 19 01 2a 1d 00 15 00 25 01 75 01 96 80 00 81 02 05 01 09 39 26 ef 00 46 68 01 65 14 75 10 95 01 81 42 05 01 09 00 75 08 95 1d 81 01 15 00 26 ef 00 85 58 26 ff 00 46 ff 00 75 08 95 3f 09 00 91 02 85 59 75 08 95 80 09 00 b1 02 c0",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_hid_core.py b/tools/testing/selftests/hid/tests/test_hid_core.py
new file mode 100644
index 000000000000..9a7fe40020d2
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_hid_core.py
@@ -0,0 +1,154 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# This is for generic devices
+
+from . import base
+import logging
+
+logger = logging.getLogger("hidtools.test.hid")
+
+
+class TestCollectionOverflow(base.BaseTestCase.TestUhid):
+ """
+ Test class to test re-allocation of the HID collection stack in
+ hid-core.c.
+ """
+
+ def create_device(self):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop)
+ 0x09, 0x02, # .Usage (Mouse)
+ 0xa1, 0x01, # .Collection (Application)
+ 0x09, 0x02, # ..Usage (Mouse)
+ 0xa1, 0x02, # ..Collection (Logical)
+ 0x09, 0x01, # ...Usage (Pointer)
+ 0xa1, 0x00, # ...Collection (Physical)
+ 0x05, 0x09, # ....Usage Page (Button)
+ 0x19, 0x01, # ....Usage Minimum (1)
+ 0x29, 0x03, # ....Usage Maximum (3)
+ 0x15, 0x00, # ....Logical Minimum (0)
+ 0x25, 0x01, # ....Logical Maximum (1)
+ 0x75, 0x01, # ....Report Size (1)
+ 0x95, 0x03, # ....Report Count (3)
+ 0x81, 0x02, # ....Input (Data,Var,Abs)
+ 0x75, 0x05, # ....Report Size (5)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0x05, 0x01, # .....Usage Page (Generic Desktop)
+ 0x09, 0x30, # .....Usage (X)
+ 0x09, 0x31, # .....Usage (Y)
+ 0x15, 0x81, # .....Logical Minimum (-127)
+ 0x25, 0x7f, # .....Logical Maximum (127)
+ 0x75, 0x08, # .....Report Size (8)
+ 0x95, 0x02, # .....Report Count (2)
+ 0x81, 0x06, # .....Input (Data,Var,Rel)
+ 0xa1, 0x02, # ...Collection (Logical)
+ 0x85, 0x12, # ....Report ID (18)
+ 0x09, 0x48, # ....Usage (Resolution Multiplier)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x75, 0x02, # ....Report Size (2)
+ 0x15, 0x00, # ....Logical Minimum (0)
+ 0x25, 0x01, # ....Logical Maximum (1)
+ 0x35, 0x01, # ....Physical Minimum (1)
+ 0x45, 0x0c, # ....Physical Maximum (12)
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs)
+ 0x85, 0x1a, # ....Report ID (26)
+ 0x09, 0x38, # ....Usage (Wheel)
+ 0x35, 0x00, # ....Physical Minimum (0)
+ 0x45, 0x00, # ....Physical Maximum (0)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x75, 0x10, # ....Report Size (16)
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767)
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767)
+ 0x81, 0x06, # ....Input (Data,Var,Rel)
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ..End Collection
+ 0xc0, # .End Collection
+ ]
+ # fmt: on
+ return base.UHIDTestDevice(
+ name=None, rdesc=report_descriptor, application="Mouse"
+ )
+
+ def test_rdesc(self):
+ """
+ This test can only check for negatives. If the kernel crashes, you
+ know why. If this test passes, either the bug isn't present or just
+ didn't get triggered. No way to know.
+
+ For an explanation, see kernel patch
+ HID: core: replace the collection tree pointers with indices
+ """
+ pass
diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
new file mode 100644
index 000000000000..f695eaad1648
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
@@ -0,0 +1,167 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2020 Red Hat, Inc.
+#
+
+from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
+from hidtools.util import BusType
+
+import libevdev
+import logging
+from . import base
+
+logger = logging.getLogger("hidtools.test.ite-keyboard")
+
+KERNEL_MODULE = base.KernelModule("itetech", "hid_ite")
+
+
+class KbdData(object):
+ pass
+
+
+# The ITE keyboards have an issue regarding the Wifi key:
+# nothing comes in when pressing the key, but we get a null
+# event on the key release.
+# This test covers this case.
+class ITEKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x06, 0x85, 0xff, # Usage Page (Vendor Usage Page 0xff85)
+ 0x09, 0x95, # Usage (Vendor Usage 0x95) 3
+ 0xa1, 0x01, # Collection (Application) 5
+ 0x85, 0x5a, # .Report ID (90) 7
+ 0x09, 0x01, # .Usage (Vendor Usage 0x01) 9
+ 0x15, 0x00, # .Logical Minimum (0) 11
+ 0x26, 0xff, 0x00, # .Logical Maximum (255) 13
+ 0x75, 0x08, # .Report Size (8) 16
+ 0x95, 0x10, # .Report Count (16) 18
+ 0xb1, 0x00, # .Feature (Data,Arr,Abs) 20
+ 0xc0, # End Collection 22
+ 0x05, 0x01, # Usage Page (Generic Desktop) 23
+ 0x09, 0x06, # Usage (Keyboard) 25
+ 0xa1, 0x01, # Collection (Application) 27
+ 0x85, 0x01, # .Report ID (1) 29
+ 0x75, 0x01, # .Report Size (1) 31
+ 0x95, 0x08, # .Report Count (8) 33
+ 0x05, 0x07, # .Usage Page (Keyboard) 35
+ 0x19, 0xe0, # .Usage Minimum (224) 37
+ 0x29, 0xe7, # .Usage Maximum (231) 39
+ 0x15, 0x00, # .Logical Minimum (0) 41
+ 0x25, 0x01, # .Logical Maximum (1) 43
+ 0x81, 0x02, # .Input (Data,Var,Abs) 45
+ 0x95, 0x01, # .Report Count (1) 47
+ 0x75, 0x08, # .Report Size (8) 49
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 51
+ 0x95, 0x05, # .Report Count (5) 53
+ 0x75, 0x01, # .Report Size (1) 55
+ 0x05, 0x08, # .Usage Page (LEDs) 57
+ 0x19, 0x01, # .Usage Minimum (1) 59
+ 0x29, 0x05, # .Usage Maximum (5) 61
+ 0x91, 0x02, # .Output (Data,Var,Abs) 63
+ 0x95, 0x01, # .Report Count (1) 65
+ 0x75, 0x03, # .Report Size (3) 67
+ 0x91, 0x03, # .Output (Cnst,Var,Abs) 69
+ 0x95, 0x06, # .Report Count (6) 71
+ 0x75, 0x08, # .Report Size (8) 73
+ 0x15, 0x00, # .Logical Minimum (0) 75
+ 0x26, 0xff, 0x00, # .Logical Maximum (255) 77
+ 0x05, 0x07, # .Usage Page (Keyboard) 80
+ 0x19, 0x00, # .Usage Minimum (0) 82
+ 0x2a, 0xff, 0x00, # .Usage Maximum (255) 84
+ 0x81, 0x00, # .Input (Data,Arr,Abs) 87
+ 0xc0, # End Collection 89
+ 0x05, 0x0c, # Usage Page (Consumer Devices) 90
+ 0x09, 0x01, # Usage (Consumer Control) 92
+ 0xa1, 0x01, # Collection (Application) 94
+ 0x85, 0x02, # .Report ID (2) 96
+ 0x19, 0x00, # .Usage Minimum (0) 98
+ 0x2a, 0x3c, 0x02, # .Usage Maximum (572) 100
+ 0x15, 0x00, # .Logical Minimum (0) 103
+ 0x26, 0x3c, 0x02, # .Logical Maximum (572) 105
+ 0x75, 0x10, # .Report Size (16) 108
+ 0x95, 0x01, # .Report Count (1) 110
+ 0x81, 0x00, # .Input (Data,Arr,Abs) 112
+ 0xc0, # End Collection 114
+ 0x05, 0x01, # Usage Page (Generic Desktop) 115
+ 0x09, 0x0c, # Usage (Wireless Radio Controls) 117
+ 0xa1, 0x01, # Collection (Application) 119
+ 0x85, 0x03, # .Report ID (3) 121
+ 0x15, 0x00, # .Logical Minimum (0) 123
+ 0x25, 0x01, # .Logical Maximum (1) 125
+ 0x09, 0xc6, # .Usage (Wireless Radio Button) 127
+ 0x95, 0x01, # .Report Count (1) 129
+ 0x75, 0x01, # .Report Size (1) 131
+ 0x81, 0x06, # .Input (Data,Var,Rel) 133
+ 0x75, 0x07, # .Report Size (7) 135
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 137
+ 0xc0, # End Collection 139
+ 0x05, 0x88, # Usage Page (Vendor Usage Page 0x88) 140
+ 0x09, 0x01, # Usage (Vendor Usage 0x01) 142
+ 0xa1, 0x01, # Collection (Application) 144
+ 0x85, 0x04, # .Report ID (4) 146
+ 0x19, 0x00, # .Usage Minimum (0) 148
+ 0x2a, 0xff, 0xff, # .Usage Maximum (65535) 150
+ 0x15, 0x00, # .Logical Minimum (0) 153
+ 0x26, 0xff, 0xff, # .Logical Maximum (65535) 155
+ 0x75, 0x08, # .Report Size (8) 158
+ 0x95, 0x02, # .Report Count (2) 160
+ 0x81, 0x02, # .Input (Data,Var,Abs) 162
+ 0xc0, # End Collection 164
+ 0x05, 0x01, # Usage Page (Generic Desktop) 165
+ 0x09, 0x80, # Usage (System Control) 167
+ 0xa1, 0x01, # Collection (Application) 169
+ 0x85, 0x05, # .Report ID (5) 171
+ 0x19, 0x81, # .Usage Minimum (129) 173
+ 0x29, 0x83, # .Usage Maximum (131) 175
+ 0x15, 0x00, # .Logical Minimum (0) 177
+ 0x25, 0x01, # .Logical Maximum (1) 179
+ 0x95, 0x08, # .Report Count (8) 181
+ 0x75, 0x01, # .Report Size (1) 183
+ 0x81, 0x02, # .Input (Data,Var,Abs) 185
+ 0xc0, # End Collection 187
+ ]
+ # fmt: on
+
+ def __init__(
+ self,
+ rdesc=report_descriptor,
+ name=None,
+ input_info=(BusType.USB, 0x06CB, 0x2968),
+ ):
+ super().__init__(rdesc, name, input_info)
+
+ def event(self, keys, reportID=None, application=None):
+ application = application or "Keyboard"
+ return super().event(keys, reportID, application)
+
+
+class TestITEKeyboard(TestArrayKeyboard):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ return ITEKeyboard()
+
+ def test_wifi_key(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ # the following sends a 'release' event on the Wifi key.
+ # the kernel is supposed to translate this into Wifi key
+ # down and up
+ r = [0x03, 0x00]
+ uhdev.call_input_event(r)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_RFKILL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports([r], uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_RFKILL, 0))
+ # the kernel sends the two down/up key events in a batch, no need to
+ # call events = uhdev.next_sync_events()
+ self.debug_reports([], uhdev, events)
+ self.assertInputEventsIn(expected, events)
diff --git a/tools/testing/selftests/hid/tests/test_keyboard.py b/tools/testing/selftests/hid/tests/test_keyboard.py
new file mode 100644
index 000000000000..b3b2bdbf63b7
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_keyboard.py
@@ -0,0 +1,485 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2018 Red Hat, Inc.
+#
+
+from . import base
+import hidtools.hid
+import libevdev
+import logging
+
+logger = logging.getLogger("hidtools.test.keyboard")
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class KeyboardData(object):
+ pass
+
+
+class BaseKeyboard(base.UHIDTestDevice):
+ def __init__(self, rdesc, name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, "Key", input_info=input_info, rdesc=rdesc)
+ self.keystates = {}
+
+ def _update_key_state(self, keys):
+ """
+ Update the internal state of keys with the new state given.
+
+ :param key: a tuple of chars for the currently pressed keys.
+ """
+ # First remove the already released keys
+ unused_keys = [k for k, v in self.keystates.items() if not v]
+ for key in unused_keys:
+ del self.keystates[key]
+
+ # self.keystates contains now the list of currently pressed keys,
+ # release them...
+ for key in self.keystates.keys():
+ self.keystates[key] = False
+
+ # ...and press those that are in parameter
+ for key in keys:
+ self.keystates[key] = True
+
+ def _create_report_data(self):
+ keyboard = KeyboardData()
+ for key, value in self.keystates.items():
+ key = key.replace(" ", "").lower()
+ setattr(keyboard, key, value)
+ return keyboard
+
+ def create_array_report(self, keys, reportID=None, application=None):
+ """
+ Return an input report for this device.
+
+ :param keys: a tuple of chars for the pressed keys. The class maintains
+ the list of currently pressed keys, so to release a key, the caller
+ needs to call again this function without the key in this tuple.
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ self._update_key_state(keys)
+ reportID = reportID or self.default_reportID
+
+ keyboard = self._create_report_data()
+ return self.create_report(keyboard, reportID=reportID, application=application)
+
+ def event(self, keys, reportID=None, application=None):
+ """
+ Send an input event on the default report ID.
+
+ :param keys: a tuple of chars for the pressed keys. The class maintains
+ the list of currently pressed keys, so to release a key, the caller
+ needs to call again this function without the key in this tuple.
+ """
+ r = self.create_array_report(keys, reportID, application)
+ self.call_input_event(r)
+ return [r]
+
+
+class PlainKeyboard(BaseKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0x97, # .Usage Maximum (151)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x98, # .Report Count (152)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 1
+
+
+class ArrayKeyboard(BaseKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x95, 0x06, # .Report Count (6)
+ 0x75, 0x08, # .Report Size (8)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xa4, 0x00, # .Logical Maximum (164)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0xa4, # .Usage Maximum (164)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+ def _create_report_data(self):
+ data = KeyboardData()
+ array = []
+
+ hut = hidtools.hut.HUT
+
+ # strip modifiers from the array
+ for k, v in self.keystates.items():
+ # we ignore depressed keys
+ if not v:
+ continue
+
+ usage = hut[0x07].from_name[k].usage
+ if usage >= 224 and usage <= 231:
+ # modifier
+ setattr(data, k.lower(), 1)
+ else:
+ array.append(k)
+
+ # if array length is bigger than 6, report ErrorRollOver
+ if len(array) > 6:
+ array = ["ErrorRollOver"] * 6
+
+ data.keyboard = array
+ return data
+
+
+class LEDKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x08, # .Report Size (8)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x95, 0x05, # .Report Count (5)
+ 0x75, 0x01, # .Report Size (1)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x05, # .Usage Maximum (5)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x03, # .Report Size (3)
+ 0x91, 0x01, # .Output (Cnst,Arr,Abs)
+ 0x95, 0x06, # .Report Count (6)
+ 0x75, 0x08, # .Report Size (8)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xa4, 0x00, # .Logical Maximum (164)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0xa4, # .Usage Maximum (164)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+
+# Some Primax manufactured keyboards set the Usage Page after having defined
+# some local Usages. It relies on the fact that the specification states that
+# Usages are to be concatenated with Usage Pages upon finding a Main item (see
+# 6.2.2.8). This test covers this case.
+class PrimaxKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xA1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xE0, # .Usage Minimum (224)
+ 0x29, 0xE7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0x81, 0x01, # .Input (Data,Var,Abs)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x03, # .Usage Maximum (3)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x05, # .Report Size (5)
+ 0x91, 0x01, # .Output (Constant)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xFF, 0x00, # .Logical Maximum (255)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x2A, 0xFF, 0x00, # .Usage Maximum (255)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x06, # .Report Count (6)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xC0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+
+class BaseTest:
+ class TestKeyboard(base.BaseTestCase.TestUhid):
+ def test_single_key(self):
+ """check for key reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["a and A"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 0
+
+ def test_two_keys(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["a and A", "q and Q"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_Q, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_Q, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_Q] == 0
+
+ r = uhdev.event(["c and C"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_C, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 1
+
+ r = uhdev.event(["c and C", "Spacebar"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.KEY_C) not in events
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 1
+
+ r = uhdev.event(["Spacebar"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_C, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE) not in events
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 0
+
+ def test_modifiers(self):
+ # ctrl-alt-del would be very nice :)
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(["LeftControl", "LeftShift", "= and +"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_LEFTCTRL, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_LEFTSHIFT, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_EQUAL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestPlainKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return PlainKeyboard()
+
+ def test_10_keys(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ "7 and &",
+ "8 and *",
+ "9 and (",
+ "0 and )",
+ ]
+ )
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_0, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_7, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_8, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_9, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_0, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_7, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_8, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_9, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestArrayKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return ArrayKeyboard()
+
+ def test_10_keys(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ ]
+ )
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 1))
+ events = uhdev.next_sync_events()
+
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ # ErrRollOver
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ "7 and &",
+ "8 and *",
+ "9 and (",
+ "0 and )",
+ ]
+ )
+ events = uhdev.next_sync_events()
+
+ self.debug_reports(r, uhdev, events)
+
+ assert len(events) == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestLEDKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return LEDKeyboard()
+
+
+class TestPrimaxKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return PrimaxKeyboard()
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
new file mode 100644
index 000000000000..eb4e15a0e53b
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -0,0 +1,1047 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+
+from . import base
+import hidtools.hid
+from hidtools.util import BusType
+import libevdev
+import logging
+import pytest
+
+logger = logging.getLogger("hidtools.test.mouse")
+
+# workaround https://gitlab.freedesktop.org/libevdev/python-libevdev/issues/6
+try:
+ libevdev.EV_REL.REL_WHEEL_HI_RES
+except AttributeError:
+ libevdev.EV_REL.REL_WHEEL_HI_RES = libevdev.EV_REL.REL_0B
+ libevdev.EV_REL.REL_HWHEEL_HI_RES = libevdev.EV_REL.REL_0C
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class MouseData(object):
+ pass
+
+
+class BaseMouse(base.UHIDTestDevice):
+ def __init__(self, rdesc, name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, "Mouse", input_info=input_info, rdesc=rdesc)
+ self.left = False
+ self.right = False
+ self.middle = False
+
+ def create_report(self, x, y, buttons=None, wheels=None, reportID=None):
+ """
+ Return an input report for this device.
+
+ :param x: relative x
+ :param y: relative y
+ :param buttons: a (l, r, m) tuple of bools for the button states,
+ where ``None`` is "leave unchanged"
+ :param wheels: a single value for the vertical wheel or a (vertical, horizontal) tuple for
+ the two wheels
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ if buttons is not None:
+ left, right, middle = buttons
+ if left is not None:
+ self.left = left
+ if right is not None:
+ self.right = right
+ if middle is not None:
+ self.middle = middle
+ left = self.left
+ right = self.right
+ middle = self.middle
+ # Note: the BaseMouse doesn't actually have a wheel but the
+ # create_report magic only fills in those fields exist, so let's
+ # make this generic here.
+ wheel, acpan = 0, 0
+ if wheels is not None:
+ if isinstance(wheels, tuple):
+ wheel = wheels[0]
+ acpan = wheels[1]
+ else:
+ wheel = wheels
+
+ reportID = reportID or self.default_reportID
+
+ mouse = MouseData()
+ mouse.b1 = int(left)
+ mouse.b2 = int(right)
+ mouse.b3 = int(middle)
+ mouse.x = x
+ mouse.y = y
+ mouse.wheel = wheel
+ mouse.acpan = acpan
+ return super().create_report(mouse, reportID=reportID)
+
+ def event(self, x, y, buttons=None, wheels=None):
+ """
+ Send an input event on the default report ID.
+
+ :param x: relative x
+ :param y: relative y
+ :param buttons: a (l, r, m) tuple of bools for the button states,
+ where ``None`` is "leave unchanged"
+ :param wheels: a single value for the vertical wheel or a (vertical, horizontal) tuple for
+ the two wheels
+ """
+ r = self.create_report(x, y, buttons, wheels)
+ self.call_input_event(r)
+ return [r]
+
+
+class ButtonMouse(BaseMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # .Usage (Mouse) 2
+ 0xa1, 0x01, # .Collection (Application) 4
+ 0x09, 0x02, # ..Usage (Mouse) 6
+ 0xa1, 0x02, # ..Collection (Logical) 8
+ 0x09, 0x01, # ...Usage (Pointer) 10
+ 0xa1, 0x00, # ...Collection (Physical) 12
+ 0x05, 0x09, # ....Usage Page (Button) 14
+ 0x19, 0x01, # ....Usage Minimum (1) 16
+ 0x29, 0x03, # ....Usage Maximum (3) 18
+ 0x15, 0x00, # ....Logical Minimum (0) 20
+ 0x25, 0x01, # ....Logical Maximum (1) 22
+ 0x75, 0x01, # ....Report Size (1) 24
+ 0x95, 0x03, # ....Report Count (3) 26
+ 0x81, 0x02, # ....Input (Data,Var,Abs) 28
+ 0x75, 0x05, # ....Report Size (5) 30
+ 0x95, 0x01, # ....Report Count (1) 32
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs) 34
+ 0x05, 0x01, # ....Usage Page (Generic Desktop) 36
+ 0x09, 0x30, # ....Usage (X) 38
+ 0x09, 0x31, # ....Usage (Y) 40
+ 0x15, 0x81, # ....Logical Minimum (-127) 42
+ 0x25, 0x7f, # ....Logical Maximum (127) 44
+ 0x75, 0x08, # ....Report Size (8) 46
+ 0x95, 0x02, # ....Report Count (2) 48
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 50
+ 0xc0, # ...End Collection 52
+ 0xc0, # ..End Collection 53
+ 0xc0, # .End Collection 54
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+ def fake_report(self, x, y, buttons):
+ if buttons is not None:
+ left, right, middle = buttons
+ if left is None:
+ left = self.left
+ if right is None:
+ right = self.right
+ if middle is None:
+ middle = self.middle
+ else:
+ left = self.left
+ right = self.right
+ middle = self.middle
+
+ button_mask = sum(1 << i for i, b in enumerate([left, right, middle]) if b)
+ x = max(-127, min(127, x))
+ y = max(-127, min(127, y))
+ x = hidtools.util.to_twos_comp(x, 8)
+ y = hidtools.util.to_twos_comp(y, 8)
+ return [button_mask, x, y]
+
+
+class WheelMouse(ButtonMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x05, 0x09, # .Usage Page (Button) 6
+ 0x19, 0x01, # .Usage Minimum (1) 8
+ 0x29, 0x03, # .Usage Maximum (3) 10
+ 0x15, 0x00, # .Logical Minimum (0) 12
+ 0x25, 0x01, # .Logical Maximum (1) 14
+ 0x95, 0x03, # .Report Count (3) 16
+ 0x75, 0x01, # .Report Size (1) 18
+ 0x81, 0x02, # .Input (Data,Var,Abs) 20
+ 0x95, 0x01, # .Report Count (1) 22
+ 0x75, 0x05, # .Report Size (5) 24
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 26
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 28
+ 0x09, 0x01, # .Usage (Pointer) 30
+ 0xa1, 0x00, # .Collection (Physical) 32
+ 0x09, 0x30, # ..Usage (X) 34
+ 0x09, 0x31, # ..Usage (Y) 36
+ 0x15, 0x81, # ..Logical Minimum (-127) 38
+ 0x25, 0x7f, # ..Logical Maximum (127) 40
+ 0x75, 0x08, # ..Report Size (8) 42
+ 0x95, 0x02, # ..Report Count (2) 44
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 46
+ 0xc0, # .End Collection 48
+ 0x09, 0x38, # .Usage (Wheel) 49
+ 0x15, 0x81, # .Logical Minimum (-127) 51
+ 0x25, 0x7f, # .Logical Maximum (127) 53
+ 0x75, 0x08, # .Report Size (8) 55
+ 0x95, 0x01, # .Report Count (1) 57
+ 0x81, 0x06, # .Input (Data,Var,Rel) 59
+ 0xc0, # End Collection 61
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.wheel_multiplier = 1
+
+
+class TwoWheelMouse(WheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x09, 0x01, # .Usage (Pointer) 6
+ 0xa1, 0x00, # .Collection (Physical) 8
+ 0x05, 0x09, # ..Usage Page (Button) 10
+ 0x19, 0x01, # ..Usage Minimum (1) 12
+ 0x29, 0x10, # ..Usage Maximum (16) 14
+ 0x15, 0x00, # ..Logical Minimum (0) 16
+ 0x25, 0x01, # ..Logical Maximum (1) 18
+ 0x95, 0x10, # ..Report Count (16) 20
+ 0x75, 0x01, # ..Report Size (1) 22
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 24
+ 0x05, 0x01, # ..Usage Page (Generic Desktop) 26
+ 0x16, 0x01, 0x80, # ..Logical Minimum (-32767) 28
+ 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 31
+ 0x75, 0x10, # ..Report Size (16) 34
+ 0x95, 0x02, # ..Report Count (2) 36
+ 0x09, 0x30, # ..Usage (X) 38
+ 0x09, 0x31, # ..Usage (Y) 40
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 42
+ 0x15, 0x81, # ..Logical Minimum (-127) 44
+ 0x25, 0x7f, # ..Logical Maximum (127) 46
+ 0x75, 0x08, # ..Report Size (8) 48
+ 0x95, 0x01, # ..Report Count (1) 50
+ 0x09, 0x38, # ..Usage (Wheel) 52
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 54
+ 0x05, 0x0c, # ..Usage Page (Consumer Devices) 56
+ 0x0a, 0x38, 0x02, # ..Usage (AC Pan) 58
+ 0x95, 0x01, # ..Report Count (1) 61
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 63
+ 0xc0, # .End Collection 65
+ 0xc0, # End Collection 66
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.hwheel_multiplier = 1
+
+
+class MIDongleMIWirelessMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x02, # Usage (Mouse)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x09, 0x01, # .Usage (Pointer)
+ 0xa1, 0x00, # .Collection (Physical)
+ 0x95, 0x05, # ..Report Count (5)
+ 0x75, 0x01, # ..Report Size (1)
+ 0x05, 0x09, # ..Usage Page (Button)
+ 0x19, 0x01, # ..Usage Minimum (1)
+ 0x29, 0x05, # ..Usage Maximum (5)
+ 0x15, 0x00, # ..Logical Minimum (0)
+ 0x25, 0x01, # ..Logical Maximum (1)
+ 0x81, 0x02, # ..Input (Data,Var,Abs)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x75, 0x03, # ..Report Size (3)
+ 0x81, 0x01, # ..Input (Cnst,Arr,Abs)
+ 0x75, 0x08, # ..Report Size (8)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x05, 0x01, # ..Usage Page (Generic Desktop)
+ 0x09, 0x38, # ..Usage (Wheel)
+ 0x15, 0x81, # ..Logical Minimum (-127)
+ 0x25, 0x7f, # ..Logical Maximum (127)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0x05, 0x0c, # ..Usage Page (Consumer Devices)
+ 0x0a, 0x38, 0x02, # ..Usage (AC Pan)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0xc0, # .End Collection
+ 0x85, 0x02, # .Report ID (2)
+ 0x09, 0x01, # .Usage (Consumer Control)
+ 0xa1, 0x00, # .Collection (Physical)
+ 0x75, 0x0c, # ..Report Size (12)
+ 0x95, 0x02, # ..Report Count (2)
+ 0x05, 0x01, # ..Usage Page (Generic Desktop)
+ 0x09, 0x30, # ..Usage (X)
+ 0x09, 0x31, # ..Usage (Y)
+ 0x16, 0x01, 0xf8, # ..Logical Minimum (-2047)
+ 0x26, 0xff, 0x07, # ..Logical Maximum (2047)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0xc0, # .End Collection
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x03, # .Report ID (3)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x09, 0xcd, # .Usage (Play/Pause)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x83, 0x01, # .Usage (AL Consumer Control Config)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xb5, # .Usage (Scan Next Track)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xb6, # .Usage (Scan Previous Track)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xea, # .Usage (Volume Down)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xe9, # .Usage (Volume Up)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x25, 0x02, # .Usage (AC Forward)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x24, 0x02, # .Usage (AC Back)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+ device_input_info = (BusType.USB, 0x2717, 0x003B)
+ device_name = "uhid test MI Dongle MI Wireless Mouse"
+
+ def __init__(
+ self, rdesc=report_descriptor, name=device_name, input_info=device_input_info
+ ):
+ super().__init__(rdesc, name, input_info)
+
+ def event(self, x, y, buttons=None, wheels=None):
+ # this mouse spreads the relative pointer and the mouse buttons
+ # onto 2 distinct reports
+ rs = []
+ r = self.create_report(x, y, buttons, wheels, reportID=1)
+ self.call_input_event(r)
+ rs.append(r)
+ r = self.create_report(x, y, buttons, reportID=2)
+ self.call_input_event(r)
+ rs.append(r)
+ return rs
+
+
+class ResolutionMultiplierMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 83
+ 0x09, 0x02, # Usage (Mouse) 85
+ 0xa1, 0x01, # Collection (Application) 87
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 89
+ 0x09, 0x02, # .Usage (Mouse) 91
+ 0xa1, 0x02, # .Collection (Logical) 93
+ 0x85, 0x11, # ..Report ID (17) 95
+ 0x09, 0x01, # ..Usage (Pointer) 97
+ 0xa1, 0x00, # ..Collection (Physical) 99
+ 0x05, 0x09, # ...Usage Page (Button) 101
+ 0x19, 0x01, # ...Usage Minimum (1) 103
+ 0x29, 0x03, # ...Usage Maximum (3) 105
+ 0x95, 0x03, # ...Report Count (3) 107
+ 0x75, 0x01, # ...Report Size (1) 109
+ 0x25, 0x01, # ...Logical Maximum (1) 111
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 113
+ 0x95, 0x01, # ...Report Count (1) 115
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 117
+ 0x09, 0x05, # ...Usage (Vendor Usage 0x05) 119
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 121
+ 0x95, 0x03, # ...Report Count (3) 123
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 125
+ 0x05, 0x01, # ...Usage Page (Generic Desktop) 127
+ 0x09, 0x30, # ...Usage (X) 129
+ 0x09, 0x31, # ...Usage (Y) 131
+ 0x95, 0x02, # ...Report Count (2) 133
+ 0x75, 0x08, # ...Report Size (8) 135
+ 0x15, 0x81, # ...Logical Minimum (-127) 137
+ 0x25, 0x7f, # ...Logical Maximum (127) 139
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 141
+ 0xa1, 0x02, # ...Collection (Logical) 143
+ 0x85, 0x12, # ....Report ID (18) 145
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 147
+ 0x95, 0x01, # ....Report Count (1) 149
+ 0x75, 0x02, # ....Report Size (2) 151
+ 0x15, 0x00, # ....Logical Minimum (0) 153
+ 0x25, 0x01, # ....Logical Maximum (1) 155
+ 0x35, 0x01, # ....Physical Minimum (1) 157
+ 0x45, 0x04, # ....Physical Maximum (4) 159
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 161
+ 0x35, 0x00, # ....Physical Minimum (0) 163
+ 0x45, 0x00, # ....Physical Maximum (0) 165
+ 0x75, 0x06, # ....Report Size (6) 167
+ 0xb1, 0x01, # ....Feature (Cnst,Arr,Abs) 169
+ 0x85, 0x11, # ....Report ID (17) 171
+ 0x09, 0x38, # ....Usage (Wheel) 173
+ 0x15, 0x81, # ....Logical Minimum (-127) 175
+ 0x25, 0x7f, # ....Logical Maximum (127) 177
+ 0x75, 0x08, # ....Report Size (8) 179
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 181
+ 0xc0, # ...End Collection 183
+ 0x05, 0x0c, # ...Usage Page (Consumer Devices) 184
+ 0x75, 0x08, # ...Report Size (8) 186
+ 0x0a, 0x38, 0x02, # ...Usage (AC Pan) 188
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 191
+ 0xc0, # ..End Collection 193
+ 0xc0, # .End Collection 194
+ 0xc0, # End Collection 195
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 0x11
+
+ # Feature Report 12, multiplier Feature value must be set to 0b01,
+ # i.e. 1. We should extract that from the descriptor instead
+ # of hardcoding it here, but meanwhile this will do.
+ self.set_feature_report = [0x12, 0x1]
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x12:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if data != self.set_feature_report:
+ raise InvalidHIDCommunication(
+ f"Unexpected data: {data}, expected {self.set_feature_report}"
+ )
+
+ self.wheel_multiplier = 4
+
+ return 0
+
+
+class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
+ def set_report(self, req, rnum, rtype, data):
+ super().set_report(req, rnum, rtype, data)
+
+ self.wheel_multiplier = 1
+ self.hwheel_multiplier = 1
+ return 32 # EPIPE
+
+
+class BadReportDescriptorMouse(BaseMouse):
+ """
+ This "device" was one autogenerated by syzbot. There are a lot of issues in
+ it, and the most problematic is that it declares features that have no
+ size.
+
+ This leads to report->size being set to 0 and can mess up with usbhid
+ internals. Fortunately, uhid merely passes the incoming buffer, without
+ touching it so a buffer of size 0 will be translated to [] without
+ triggering a kernel oops.
+
+ Because the report descriptor is wrong, no input are created, and we need
+ to tweak a little bit the parameters to make it look correct.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x96, 0x01, 0x00, # Report Count (1) 0
+ 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3
+ # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow
+ 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6
+ 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9
+ 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14
+ 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19
+ 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24
+ ]
+ # fmt: on
+
+ def __init__(
+ self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA)
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.high_resolution_report_called = False
+
+ def get_evdev(self, application=None):
+ assert self._input_nodes is None
+ return (
+ "Ok" # should be a list or None, but both would fail, so abusing the system
+ )
+
+ def next_sync_events(self, application=None):
+ # there are no evdev nodes, so no events
+ return []
+
+ def is_ready(self):
+ # we wait for the SET_REPORT command to come
+ return self.high_resolution_report_called
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x0:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if len(data) != 1:
+ raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'")
+
+ self.high_resolution_report_called = True
+
+ return 0
+
+
+class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 6
+ 0x09, 0x02, # .Usage (Mouse) 8
+ 0xa1, 0x02, # .Collection (Logical) 10
+ 0x85, 0x1a, # ..Report ID (26) 12
+ 0x09, 0x01, # ..Usage (Pointer) 14
+ 0xa1, 0x00, # ..Collection (Physical) 16
+ 0x05, 0x09, # ...Usage Page (Button) 18
+ 0x19, 0x01, # ...Usage Minimum (1) 20
+ 0x29, 0x05, # ...Usage Maximum (5) 22
+ 0x95, 0x05, # ...Report Count (5) 24
+ 0x75, 0x01, # ...Report Size (1) 26
+ 0x15, 0x00, # ...Logical Minimum (0) 28
+ 0x25, 0x01, # ...Logical Maximum (1) 30
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 32
+ 0x75, 0x03, # ...Report Size (3) 34
+ 0x95, 0x01, # ...Report Count (1) 36
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 38
+ 0x05, 0x01, # ...Usage Page (Generic Desktop) 40
+ 0x09, 0x30, # ...Usage (X) 42
+ 0x09, 0x31, # ...Usage (Y) 44
+ 0x95, 0x02, # ...Report Count (2) 46
+ 0x75, 0x10, # ...Report Size (16) 48
+ 0x16, 0x01, 0x80, # ...Logical Minimum (-32767) 50
+ 0x26, 0xff, 0x7f, # ...Logical Maximum (32767) 53
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 56
+ 0xa1, 0x02, # ...Collection (Logical) 58
+ 0x85, 0x12, # ....Report ID (18) 60
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 62
+ 0x95, 0x01, # ....Report Count (1) 64
+ 0x75, 0x02, # ....Report Size (2) 66
+ 0x15, 0x00, # ....Logical Minimum (0) 68
+ 0x25, 0x01, # ....Logical Maximum (1) 70
+ 0x35, 0x01, # ....Physical Minimum (1) 72
+ 0x45, 0x0c, # ....Physical Maximum (12) 74
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 76
+ 0x85, 0x1a, # ....Report ID (26) 78
+ 0x09, 0x38, # ....Usage (Wheel) 80
+ 0x35, 0x00, # ....Physical Minimum (0) 82
+ 0x45, 0x00, # ....Physical Maximum (0) 84
+ 0x95, 0x01, # ....Report Count (1) 86
+ 0x75, 0x10, # ....Report Size (16) 88
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767) 90
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767) 93
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 96
+ 0xc0, # ...End Collection 98
+ 0xa1, 0x02, # ...Collection (Logical) 99
+ 0x85, 0x12, # ....Report ID (18) 101
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 103
+ 0x75, 0x02, # ....Report Size (2) 105
+ 0x15, 0x00, # ....Logical Minimum (0) 107
+ 0x25, 0x01, # ....Logical Maximum (1) 109
+ 0x35, 0x01, # ....Physical Minimum (1) 111
+ 0x45, 0x0c, # ....Physical Maximum (12) 113
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 115
+ 0x35, 0x00, # ....Physical Minimum (0) 117
+ 0x45, 0x00, # ....Physical Maximum (0) 119
+ 0x75, 0x04, # ....Report Size (4) 121
+ 0xb1, 0x01, # ....Feature (Cnst,Arr,Abs) 123
+ 0x85, 0x1a, # ....Report ID (26) 125
+ 0x05, 0x0c, # ....Usage Page (Consumer Devices) 127
+ 0x95, 0x01, # ....Report Count (1) 129
+ 0x75, 0x10, # ....Report Size (16) 131
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767) 133
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767) 136
+ 0x0a, 0x38, 0x02, # ....Usage (AC Pan) 139
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 142
+ 0xc0, # ...End Collection 144
+ 0xc0, # ..End Collection 145
+ 0xc0, # .End Collection 146
+ 0xc0, # End Collection 147
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 0x1A
+
+ # Feature Report 12, multiplier Feature value must be set to 0b0101,
+ # i.e. 5. We should extract that from the descriptor instead
+ # of hardcoding it here, but meanwhile this will do.
+ self.set_feature_report = [0x12, 0x5]
+
+ def set_report(self, req, rnum, rtype, data):
+ super().set_report(req, rnum, rtype, data)
+
+ self.wheel_multiplier = 12
+ self.hwheel_multiplier = 12
+
+ return 0
+
+
+class BaseTest:
+ class TestMouse(base.BaseTestCase.TestUhid):
+ def test_buttons(self):
+ """check for button reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(0, 0, (None, True, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+
+ r = uhdev.event(0, 0, (None, False, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+
+ r = uhdev.event(0, 0, (None, None, True))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_MIDDLE, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_MIDDLE] == 1
+
+ r = uhdev.event(0, 0, (None, None, False))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_MIDDLE, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_MIDDLE] == 0
+
+ r = uhdev.event(0, 0, (True, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(0, 0, (False, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(0, 0, (True, True, None))
+ expected_event0 = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1)
+ expected_event1 = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(
+ (syn_event, expected_event0, expected_event1), events
+ )
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(0, 0, (False, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(0, 0, (None, False, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ def test_relative(self):
+ """Check for relative events."""
+ uhdev = self.uhdev
+
+ syn_event = self.syn_event
+
+ r = uhdev.event(0, -1)
+ expected_event = libevdev.InputEvent(libevdev.EV_REL.REL_Y, -1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents((syn_event, expected_event), events)
+
+ r = uhdev.event(1, 0)
+ expected_event = libevdev.InputEvent(libevdev.EV_REL.REL_X, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents((syn_event, expected_event), events)
+
+ r = uhdev.event(-1, 2)
+ expected_event0 = libevdev.InputEvent(libevdev.EV_REL.REL_X, -1)
+ expected_event1 = libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(
+ (syn_event, expected_event0, expected_event1), events
+ )
+
+
+class TestSimpleMouse(BaseTest.TestMouse):
+ def create_device(self):
+ return ButtonMouse()
+
+ def test_rdesc(self):
+ """Check that the testsuite actually manages to format the
+ reports according to the report descriptors.
+ No kernel device is used here"""
+ uhdev = self.uhdev
+
+ event = (0, 0, (None, None, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (None, True, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (True, True, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (False, False, False))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (1, 0, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-1, 0, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-5, 5, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-127, 127, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, -128, (True, False, True))
+ with pytest.raises(hidtools.hid.RangeError):
+ uhdev.create_report(*event)
+
+
+class TestWheelMouse(BaseTest.TestMouse):
+ def create_device(self):
+ return WheelMouse()
+
+ def is_wheel_highres(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert evdev.has(libevdev.EV_REL.REL_WHEEL)
+ return evdev.has(libevdev.EV_REL.REL_WHEEL_HI_RES)
+
+ def test_wheel(self):
+ uhdev = self.uhdev
+
+ # check if the kernel is high res wheel compatible
+ high_res_wheel = self.is_wheel_highres(uhdev)
+
+ syn_event = self.syn_event
+ # The Resolution Multiplier is applied to the HID reports, so we
+ # need to pre-multiply too.
+ mult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=1 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 1))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=-1 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, -1))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=3 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 3))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 360))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestTwoWheelMouse(TestWheelMouse):
+ def create_device(self):
+ return TwoWheelMouse()
+
+ def is_hwheel_highres(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert evdev.has(libevdev.EV_REL.REL_HWHEEL)
+ return evdev.has(libevdev.EV_REL.REL_HWHEEL_HI_RES)
+
+ def test_ac_pan(self):
+ uhdev = self.uhdev
+
+ # check if the kernel is high res wheel compatible
+ high_res_wheel = self.is_wheel_highres(uhdev)
+ high_res_hwheel = self.is_hwheel_highres(uhdev)
+ assert high_res_wheel == high_res_hwheel
+
+ syn_event = self.syn_event
+ # The Resolution Multiplier is applied to the HID reports, so we
+ # need to pre-multiply too.
+ hmult = uhdev.hwheel_multiplier
+ vmult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=(0, 1 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 1))
+ if high_res_hwheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=(0, -1 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, -1))
+ if high_res_hwheel:
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, -120)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=(0, 3 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 3))
+ if high_res_hwheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 360))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=(-3 * vmult, 4 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, -3))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -360))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 4))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 480))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestResolutionMultiplierMouse(TestTwoWheelMouse):
+ def create_device(self):
+ return ResolutionMultiplierMouse()
+
+ def is_wheel_highres(self, uhdev):
+ high_res = super().is_wheel_highres(uhdev)
+
+ if not high_res:
+ # the kernel doesn't seem to support the high res wheel mice,
+ # make sure we haven't triggered the feature
+ assert uhdev.wheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_wheel(self):
+ uhdev = self.uhdev
+
+ if not self.is_wheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.wheel_multiplier > 1
+ assert 120 % uhdev.wheel_multiplier == 0
+
+ def test_wheel_with_multiplier(self):
+ uhdev = self.uhdev
+
+ if not self.is_wheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.wheel_multiplier > 1
+
+ syn_event = self.syn_event
+ mult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=1)
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120 / mult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=-1)
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -120 / mult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, -2))
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120 / mult)
+ )
+
+ for _ in range(mult - 1):
+ r = uhdev.event(1, -2, wheels=1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(1, -2, wheels=1)
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestBadResolutionMultiplierMouse(TestTwoWheelMouse):
+ def create_device(self):
+ return BadResolutionMultiplierMouse()
+
+ def is_wheel_highres(self, uhdev):
+ high_res = super().is_wheel_highres(uhdev)
+
+ assert uhdev.wheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_wheel(self):
+ uhdev = self.uhdev
+
+ assert uhdev.wheel_multiplier == 1
+
+
+class TestResolutionMultiplierHWheelMouse(TestResolutionMultiplierMouse):
+ def create_device(self):
+ return ResolutionMultiplierHWheelMouse()
+
+ def is_hwheel_highres(self, uhdev):
+ high_res = super().is_hwheel_highres(uhdev)
+
+ if not high_res:
+ # the kernel doesn't seem to support the high res wheel mice,
+ # make sure we haven't triggered the feature
+ assert uhdev.hwheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_ac_pan(self):
+ uhdev = self.uhdev
+
+ if not self.is_hwheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.hwheel_multiplier > 1
+ assert 120 % uhdev.hwheel_multiplier == 0
+
+ def test_ac_pan_with_multiplier(self):
+ uhdev = self.uhdev
+
+ if not self.is_hwheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.hwheel_multiplier > 1
+
+ syn_event = self.syn_event
+ hmult = uhdev.hwheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=(0, 1))
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120 / hmult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=(0, -1))
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, -120 / hmult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, -2))
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120 / hmult)
+ )
+
+ for _ in range(hmult - 1):
+ r = uhdev.event(1, -2, wheels=(0, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(1, -2, wheels=(0, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestMiMouse(TestWheelMouse):
+ def create_device(self):
+ return MIDongleMIWirelessMouse()
+
+ def assertInputEvents(self, expected_events, effective_events):
+ # Buttons and x/y are spread over two HID reports, so we can get two
+ # event frames for this device.
+ remaining = self.assertInputEventsIn(expected_events, effective_events)
+ try:
+ remaining.remove(libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT, 0))
+ except ValueError:
+ # If there's no SYN_REPORT in the list, continue and let the
+ # assert below print out the real error
+ pass
+ assert remaining == []
+
+
+class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ return BadReportDescriptorMouse()
+
+ def assertName(self, uhdev):
+ pass
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
new file mode 100644
index 000000000000..ece0ba8e7d34
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -0,0 +1,2143 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+
+from . import base
+from hidtools.hut import HUT
+from hidtools.util import BusType
+import libevdev
+import logging
+import pytest
+import sys
+import time
+
+logger = logging.getLogger("hidtools.test.multitouch")
+
+KERNEL_MODULE = base.KernelModule("hid-multitouch", "hid_multitouch")
+
+
+def BIT(x):
+ return 1 << x
+
+
+mt_quirks = {
+ "NOT_SEEN_MEANS_UP": BIT(0),
+ "SLOT_IS_CONTACTID": BIT(1),
+ "CYPRESS": BIT(2),
+ "SLOT_IS_CONTACTNUMBER": BIT(3),
+ "ALWAYS_VALID": BIT(4),
+ "VALID_IS_INRANGE": BIT(5),
+ "VALID_IS_CONFIDENCE": BIT(6),
+ "CONFIDENCE": BIT(7),
+ "SLOT_IS_CONTACTID_MINUS_ONE": BIT(8),
+ "NO_AREA": BIT(9),
+ "IGNORE_DUPLICATES": BIT(10),
+ "HOVERING": BIT(11),
+ "CONTACT_CNT_ACCURATE": BIT(12),
+ "FORCE_GET_FEATURE": BIT(13),
+ "FIX_CONST_CONTACT_ID": BIT(14),
+ "TOUCH_SIZE_SCALING": BIT(15),
+ "STICKY_FINGERS": BIT(16),
+ "ASUS_CUSTOM_UP": BIT(17),
+ "WIN8_PTP_BUTTONS": BIT(18),
+ "SEPARATE_APP_REPORT": BIT(19),
+ "MT_QUIRK_FORCE_MULTI_INPUT": BIT(20),
+}
+
+
+class Data(object):
+ pass
+
+
+class Touch(object):
+ def __init__(self, id, x, y):
+ self.contactid = id
+ self.x = x
+ self.y = y
+ self.cx = x
+ self.cy = y
+ self.tipswitch = True
+ self.confidence = True
+ self.tippressure = 15
+ self.azimuth = 0
+ self.inrange = True
+ self.width = 10
+ self.height = 10
+
+
+class Pen(Touch):
+ def __init__(self, x, y):
+ super().__init__(0, x, y)
+ self.barrel = False
+ self.invert = False
+ self.eraser = False
+ self.x_tilt = False
+ self.y_tilt = False
+ self.twist = 0
+
+
+class Digitizer(base.UHIDTestDevice):
+ @classmethod
+ def msCertificationBlob(cls, reportID):
+ return f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID ({reportID})
+ Usage Page (0xff00)
+ Usage (0xc5)
+ Logical Minimum (0)
+ Logical Maximum (255)
+ Report Size (8)
+ Report Count (256)
+ Feature (Data,Var,Abs)
+ End Collection
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Touch Screen",
+ physical="Finger",
+ max_contacts=None,
+ input_info=(BusType.USB, 1, 2),
+ quirks=None,
+ ):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.scantime = 0
+ self.quirks = quirks
+ if max_contacts is None:
+ self.max_contacts = sys.maxsize
+ for features in self.parsed_rdesc.feature_reports.values():
+ for feature in features:
+ if feature.usage_name in ["Contact Max"]:
+ self.max_contacts = feature.logical_max
+ for inputs in self.parsed_rdesc.input_reports.values():
+ for i in inputs:
+ if (
+ i.usage_name in ["Contact Count"]
+ and i.logical_max > 0
+ and self.max_contacts > i.logical_max
+ ):
+ self.max_contacts = i.logical_max
+ if self.max_contacts == sys.maxsize:
+ self.max_contacts = 1
+ else:
+ self.max_contacts = max_contacts
+ self.physical = physical
+ self.cur_application = application
+
+ for features in self.parsed_rdesc.feature_reports.values():
+ for feature in features:
+ if feature.usage_name == "Inputmode":
+ self.cur_application = "Mouse"
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ physicals = [f.physical_name for f in r]
+ if self.physical not in physicals and None not in physicals:
+ continue
+ self.fields = [f.usage_name for f in r]
+
+ @property
+ def touches_in_a_report(self):
+ return self.fields.count("Contact Id")
+
+ def event(self, slots, global_data=None, contact_count=None, incr_scantime=True):
+ if incr_scantime:
+ self.scantime += 1
+ rs = []
+ # make sure we have only the required number of available slots
+ slots = slots[: self.max_contacts]
+
+ if global_data is None:
+ global_data = Data()
+ if contact_count is None:
+ global_data.contactcount = len(slots)
+ else:
+ global_data.contactcount = contact_count
+ global_data.scantime = self.scantime
+
+ while len(slots):
+ r = self.create_report(
+ application=self.cur_application, data=slots, global_data=global_data
+ )
+ self.call_input_event(r)
+ rs.append(r)
+ global_data.contactcount = 0
+ return rs
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ if "Contact Max" not in [f.usage_name for f in rdesc]:
+ return (1, [])
+
+ self.contactmax = self.max_contacts
+ r = rdesc.create_report([self], None)
+ return (0, r)
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return 1
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return 1
+
+ if "Inputmode" not in [f.usage_name for f in rdesc]:
+ return 0
+
+ Inputmode_seen = False
+ for f in rdesc:
+ if "Inputmode" == f.usage_name:
+ values = f.get_values(data)
+ assert len(values) == 1
+ value = values[0]
+
+ if not Inputmode_seen:
+ Inputmode_seen = True
+ if value == 0:
+ self.cur_application = "Mouse"
+ elif value == 2:
+ self.cur_application = "Touch Screen"
+ elif value == 3:
+ self.cur_application = "Touch Pad"
+ else:
+ if value != 0:
+ # Elan bug where the device doesn't work properly
+ # if we set twice an Input Mode in the same Feature
+ self.cur_application = "Mouse"
+
+ return 0
+
+
+class PTP(Digitizer):
+ def __init__(
+ self,
+ name,
+ type="Click Pad",
+ rdesc_str=None,
+ rdesc=None,
+ application="Touch Pad",
+ physical="Pointer",
+ max_contacts=None,
+ input_info=None,
+ ):
+ self.type = type.lower().replace(" ", "")
+ if self.type == "clickpad":
+ self.buttontype = 0
+ else: # pressurepad
+ self.buttontype = 1
+ self.clickpad_state = False
+ self.left_state = False
+ self.right_state = False
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, max_contacts, input_info
+ )
+
+ def event(
+ self,
+ slots=None,
+ click=None,
+ left=None,
+ right=None,
+ contact_count=None,
+ incr_scantime=True,
+ ):
+ # update our internal state
+ if click is not None:
+ self.clickpad_state = click
+ if left is not None:
+ self.left_state = left
+ if right is not None:
+ self.right_state = right
+
+ # now create the global data
+ global_data = Data()
+ global_data.b1 = 1 if self.clickpad_state else 0
+ global_data.b2 = 1 if self.left_state else 0
+ global_data.b3 = 1 if self.right_state else 0
+
+ if slots is None:
+ slots = [Data()]
+
+ return super().event(slots, global_data, contact_count, incr_scantime)
+
+
+class MinWin8TSParallel(Digitizer):
+ def __init__(self, max_slots):
+ self.max_slots = max_slots
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Report Size (7)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ Usage Page (Digitizers)
+ Usage (Azimuth)
+ Logical Maximum (360)
+ Unit (SILinear: deg)
+ Report Size (16)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * self.max_slots}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__(f"uhid test parallel {self.max_slots}", rdesc_str)
+
+
+class MinWin8TSHybrid(Digitizer):
+ def __init__(self):
+ self.max_slots = 10
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Report Size (7)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * 2}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__("uhid test hybrid", rdesc_str)
+
+
+class Win8TSConfidence(Digitizer):
+ def __init__(self, max_slots):
+ self.max_slots = max_slots
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Usage (Confidence)
+ Input (Data,Var,Abs)
+ Report Size (6)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ Usage Page (Digitizers)
+ Usage (Azimuth)
+ Logical Maximum (360)
+ Unit (SILinear: deg)
+ Report Size (16)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * self.max_slots}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__(f"uhid test confidence {self.max_slots}", rdesc_str)
+
+
+class SmartTechDigitizer(Digitizer):
+ def __init__(self, name, input_info):
+ super().__init__(
+ name,
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 05 81 03 05 01 15 00 26 ff 0f 55 0e 65 11 75 10 95 01 35 00 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 06 15 00 26 ff 00 a1 01 85 02 75 08 95 3f 09 00 82 02 01 95 3f 09 00 92 02 01 c0 05 0d 09 04 a1 01 85 05 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 75 08 95 01 15 00 25 0a 09 54 81 02 09 55 b1 02 c0 05 0d 09 0e a1 01 85 04 09 23 a1 02 15 00 25 02 75 08 95 02 09 52 09 53 b1 02 c0 c0 05 0d 09 04 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 75 08 95 01 15 00 25 0a 09 54 81 02 09 55 b1 02 c0 05 0d 09 04 a1 01 85 06 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 06 81 03 95 01 75 10 65 11 55 0e 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 07 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 08 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 09 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 0a 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0",
+ input_info=input_info,
+ )
+
+ def create_report(self, data, global_data=None, reportID=None, application=None):
+ # this device has *a lot* of different reports, and most of them
+ # have the Touch Screen application. But the first one is a stylus
+ # report (as stated in the physical type), so we simply override
+ # the report ID to use what the device sends
+ return super().create_report(data, global_data=global_data, reportID=3)
+
+ def match_evdev_rule(self, application, evdev):
+ # we need to select the correct evdev node, as the device has multiple
+ # Touch Screen application collections
+ if application != "Touch Screen":
+ return True
+ absinfo = evdev.absinfo[libevdev.EV_ABS.ABS_MT_POSITION_X]
+ return absinfo is not None and absinfo.resolution == 3
+
+
+class BaseTest:
+ class TestMultitouch(base.BaseTestCase.TestUhid):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def get_slot(self, uhdev, t, default):
+ if uhdev.quirks is None:
+ return default
+
+ if "SLOT_IS_CONTACTID" in uhdev.quirks:
+ return t.contactid
+
+ if "SLOT_IS_CONTACTID_MINUS_ONE" in uhdev.quirks:
+ return t.contactid - 1
+
+ return default
+
+ def test_creation(self):
+ """Make sure the device gets processed by the kernel and creates
+ the expected application input node.
+
+ If this fail, there is something wrong in the device report
+ descriptors."""
+ super().test_creation()
+
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # some sanity checking for the quirks
+ if uhdev.quirks is not None:
+ for q in uhdev.quirks:
+ assert q in mt_quirks
+
+ assert evdev.num_slots == uhdev.max_contacts
+
+ if uhdev.max_contacts > 1:
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ if uhdev.max_contacts > 2:
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_required_usages(self):
+ """Make sure the device exports the correct required features and
+ inputs."""
+ uhdev = self.uhdev
+ rdesc = uhdev.parsed_rdesc
+ for feature in rdesc.feature_reports.values():
+ for field in feature:
+ page_id = field.usage >> 16
+ value = field.usage & 0xFF
+ try:
+ if HUT[page_id][value] == "Contact Max":
+ assert HUT[page_id][field.application] in [
+ "Touch Screen",
+ "Touch Pad",
+ "System Multi-Axis Controller",
+ ]
+ except KeyError:
+ pass
+
+ try:
+ if HUT[page_id][value] == "Inputmode":
+ assert HUT[page_id][field.application] in [
+ "Touch Screen",
+ "Touch Pad",
+ "Device Configuration",
+ ]
+ except KeyError:
+ pass
+
+ def test_mt_single_touch(self):
+ """send a single touch in the first slot of the device,
+ and release it."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_dual_touch(self):
+ """Send 2 touches in the first 2 slots.
+ Make sure the kernel sees this as a dual touch.
+ Release and check
+
+ Note: PTP will send here BTN_DOUBLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+
+ if uhdev.quirks is not None and (
+ "SLOT_IS_CONTACTID" in uhdev.quirks
+ or "SLOT_IS_CONTACTNUMBER" in uhdev.quirks
+ ):
+ t1.contactid = 0
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event([t0, t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X, 5) not in events
+ )
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y, 10) not in events
+ )
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0, t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X) not in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y) not in events
+
+ t1.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t1.inrange = False
+
+ if uhdev.quirks is not None and "SLOT_IS_CONTACTNUMBER" in uhdev.quirks:
+ r = uhdev.event([t0, t1])
+ else:
+ r = uhdev.event([t1])
+
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.max_contacts <= 2, "Device not compatible"
+ )
+ def test_mt_triple_tap(self):
+ """Send 3 touches in the first 3 slots.
+ Make sure the kernel sees this as a triple touch.
+ Release and check
+
+ Note: PTP will send here BTN_TRIPLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+ t2 = Touch(3, 250, 300)
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+ slot2 = self.get_slot(uhdev, t2, 2)
+
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 2
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_POSITION_X] == 250
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 300
+
+ t0.tipswitch = False
+ t1.tipswitch = False
+ t2.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ t1.inrange = False
+ t2.inrange = False
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.max_contacts <= 2, "Device not compatible"
+ )
+ def test_mt_max_contact(self):
+ """send the maximum number of contact as reported by the device.
+ Make sure all contacts are forwarded and that there is no miss.
+ Release and check."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = [
+ Touch(i, (i + 3) * 20, (i + 3) * 20 + 5)
+ for i in range(uhdev.max_contacts)
+ ]
+ if (
+ uhdev.quirks is not None
+ and "SLOT_IS_CONTACTID_MINUS_ONE" in uhdev.quirks
+ ):
+ for t in touches:
+ t.contactid += 1
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ for i, t in enumerate(touches):
+ slot = self.get_slot(uhdev, t, i)
+
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == i
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == t.x
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == t.y
+
+ for t in touches:
+ t.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t.inrange = False
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ for i, t in enumerate(touches):
+ slot = self.get_slot(uhdev, t, i)
+
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: (
+ uhdev.touches_in_a_report == 1
+ or uhdev.quirks is not None
+ and "CONTACT_CNT_ACCURATE" not in uhdev.quirks
+ ),
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_mt_contact_count_accurate(self):
+ """Test the MT_QUIRK_CONTACT_CNT_ACCURATE from the kernel.
+ A report should forward an accurate contact count and the kernel
+ should ignore any data provided after we have reached this
+ contact count."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+
+ r = uhdev.event([t0, t1], contact_count=1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ class TestWin8Multitouch(TestMultitouch):
+ def test_required_usages8(self):
+ """Make sure the device exports the correct required features and
+ inputs."""
+ uhdev = self.uhdev
+ rdesc = uhdev.parsed_rdesc
+ for feature in rdesc.feature_reports.values():
+ for field in feature:
+ page_id = field.usage >> 16
+ value = field.usage & 0xFF
+ try:
+ if HUT[page_id][value] == "Inputmode":
+ assert HUT[field.application] not in ["Touch Screen"]
+ except KeyError:
+ pass
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.fields.count("X") == uhdev.touches_in_a_report,
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_mt_tx_cx(self):
+ """send a single touch in the first slot of the device, with
+ different values of Tx and Cx. Make sure the kernel reports Tx."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ t0.cx = 50
+ t0.cy = 100
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 5
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TOOL_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 10
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TOOL_Y] == 100
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "In Range" not in uhdev.fields,
+ "Device not compatible, missing In Range usage",
+ )
+ def test_mt_inrange(self):
+ """Send one contact that has the InRange bit set before/after
+ tipswitch.
+ Kernel is supposed to mark the contact with a distance > 0
+ when inrange is set but not tipswitch.
+
+ This tests the hovering capability of devices (MT_QUIRK_HOVERING).
+
+ Make sure the contact is only released from the kernel POV
+ when the inrange bit is set to 0."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] > 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ t0.tipswitch = True
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] == 0
+
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] > 0
+
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_duplicates(self):
+ """Test the MT_QUIRK_IGNORE_DUPLICATES from the kernel.
+ If a touch is reported more than once with the same Contact ID,
+ we should only handle the first touch.
+
+ Note: this is not in MS spec, but the current kernel behaves
+ like that"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ t1 = Touch(1, 15, 20)
+ t2 = Touch(2, 50, 100)
+
+ r = uhdev.event([t0, t1, t2], contact_count=2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 5
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 10
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ def test_mt_release_miss(self):
+ """send a single touch in the first slot of the device, and
+ forget to release it. The kernel is supposed to release by itself
+ the touch in 100ms.
+ Make sure that we are dealing with a new touch by resending the
+ same touch after the timeout expired, and check that the kernel
+ considers it as a separate touch (different tracking ID)"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+
+ time.sleep(0.2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Azimuth" not in uhdev.fields,
+ "Device not compatible, missing Azimuth usage",
+ )
+ def test_mt_azimuth(self):
+ """Check for the azimtuh information bit.
+ When azimuth is presented by the device, it should be exported
+ as ABS_MT_ORIENTATION and the exported value should report a quarter
+ of circle."""
+ uhdev = self.uhdev
+
+ t0 = Touch(1, 5, 10)
+ t0.azimuth = 270
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # orientation is clockwise, while Azimuth is counter clockwise
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_ORIENTATION, 90) in events
+
+ class TestPTP(TestWin8Multitouch):
+ def test_ptp_buttons(self):
+ """check for button reliability.
+ There are 2 types of touchpads: the click pads and the pressure pads.
+ Each should reliably report the BTN_LEFT events.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ if uhdev.type == "clickpad":
+ r = uhdev.event(click=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(click=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+ else:
+ r = uhdev.event(left=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(left=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(right=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+
+ r = uhdev.event(right=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_ptp_confidence(self):
+ """Check for the validity of the confidence bit.
+ When a contact is marked as not confident, it should be detected
+ as a palm from the kernel POV and released.
+
+ Note: if the kernel exports ABS_MT_TOOL_TYPE, it shouldn't release
+ the touch but instead convert it to ABS_MT_TOOL_PALM."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ t0.confidence = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ if evdev.absinfo[libevdev.EV_ABS.ABS_MT_TOOL_TYPE] is not None:
+ # the kernel exports MT_TOOL_PALM
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TOOL_TYPE, 2) in events
+ )
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.touches_in_a_report >= uhdev.max_contacts,
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_ptp_non_touch_data(self):
+ """Some single finger hybrid touchpads might not provide the
+ button information in subsequent reports (only in the first report).
+
+ Emulate this and make sure we do not release the buttons in the
+ middle of the event."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = [Touch(i, i * 10, i * 10 + 5) for i in range(uhdev.max_contacts)]
+ contact_count = uhdev.max_contacts
+ incr_scantime = True
+ btn_state = True
+ events = None
+ while touches:
+ t = touches[: uhdev.touches_in_a_report]
+ touches = touches[uhdev.touches_in_a_report :]
+ r = uhdev.event(
+ t,
+ click=btn_state,
+ left=btn_state,
+ contact_count=contact_count,
+ incr_scantime=incr_scantime,
+ )
+ contact_count = 0
+ incr_scantime = False
+ btn_state = False
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ if touches:
+ assert len(events) == 0
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+
+################################################################################
+#
+# Windows 7 compatible devices
+#
+################################################################################
+class Test3m_0596_0500(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_0500",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 01 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 10 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 85 12 09 55 95 01 75 08 15 00 25 0a b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 07 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 c0",
+ input_info=(BusType.USB, 0x0596, 0x0500),
+ max_contacts=60,
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID", "TOUCH_SIZE_SCALING"),
+ )
+
+
+class Test3m_0596_0506(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_0506",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 02 81 03 05 0d 85 12 09 55 95 01 75 08 15 00 25 3c b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 73 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 85 0f 09 01 75 08 96 07 02 b1 02 c0",
+ input_info=(BusType.USB, 0x0596, 0x0506),
+ max_contacts=60,
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID", "TOUCH_SIZE_SCALING"),
+ )
+
+
+class TestActionStar_2101_1011(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ActionStar_2101_1011",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4d 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4d 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2101, 0x1011),
+ )
+
+ def test_mt_actionstar_inrange(self):
+ """Special sequence that might not be handled properly"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # fmt: off
+ sequence = [
+ # t0 = Touch(1, 6999, 2441) | t1 = Touch(2, 15227, 2026)
+ '01 ff 01 57 1b 89 09 ff 02 7b 3b ea 07 02',
+ # t0.xy = (6996, 2450) | t1.y = 2028
+ '01 ff 01 54 1b 92 09 ff 02 7b 3b ec 07 02',
+ # t1.xy = (15233, 2040) | t0.tipswitch = False
+ '01 ff 02 81 3b f8 07 fe 01 54 1b 92 09 02',
+ # t1 | t0.inrange = False
+ '01 ff 02 81 3b f8 07 fc 01 54 1b 92 09 02',
+ ]
+ # fmt: on
+
+ for num, r_str in enumerate(sequence):
+ r = [int(i, 16) for i in r_str.split()]
+ uhdev.call_input_event(r)
+ events = uhdev.next_sync_events()
+ self.debug_reports([r], uhdev)
+ for e in events:
+ print(e)
+ if num == 2:
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestAsus_computers_0486_0185(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test asus-computers_0486_0185",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 95 01 75 01 81 02 09 32 81 02 09 47 81 02 75 05 81 03 09 30 26 ff 00 75 08 81 02 09 51 25 02 81 02 26 96 0d 05 01 75 10 55 0d 65 33 09 30 35 00 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 09 22 a1 02 05 0d 35 00 45 00 55 00 65 00 09 42 25 01 75 01 81 02 09 32 81 02 09 47 81 02 75 05 81 03 09 30 26 ff 00 75 08 81 02 09 51 25 02 81 02 26 96 0d 05 01 75 10 55 0d 65 33 09 30 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 35 00 45 00 55 00 65 00 05 0d 09 54 75 08 25 02 81 02 85 08 09 55 b1 02 c0 09 0e a1 01 85 07 09 22 a1 00 09 52 25 0a b1 02 c0 05 0c 09 01 a1 01 85 06 09 01 26 ff 00 95 08 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 25 01 75 01 95 02 81 02 95 06 81 03 26 96 0d 05 01 75 10 95 01 55 0d 65 33 09 30 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 c0 06 ff 01 09 01 a1 01 26 ff 00 35 00 45 00 55 00 65 00 85 05 75 08 95 3f 09 00 81 02 c0",
+ input_info=(BusType.USB, 0x0486, 0x0185),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestAtmel_03eb_201c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_201c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x03EB, 0x201C),
+ )
+
+
+class TestAtmel_03eb_211c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_211c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 56 0a 26 ff 0f 09 30 81 02 46 b2 05 26 ff 0f 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x03EB, 0x211C),
+ )
+
+
+class TestCando_2087_0a02(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cando_2087_0a02",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 6d 03 81 02 46 ec 01 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 6d 03 81 02 46 ec 01 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 02 81 02 85 02 09 55 b1 02 c0 06 00 ff 09 01 a1 01 85 a6 95 22 75 08 26 ff 00 15 00 09 01 81 02 85 a5 95 06 75 08 26 ff 00 15 00 09 01 91 02 c0",
+ input_info=(BusType.USB, 0x2087, 0x0A02),
+ )
+
+
+class TestCando_2087_0b03(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cando_2087_0b03",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 49 46 f2 03 81 02 09 31 26 ff 29 46 39 02 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 49 46 f2 03 81 02 09 31 26 ff 29 46 39 02 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2087, 0x0B03),
+ )
+
+
+class TestCVTouch_1ff7_0013(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cvtouch_1ff7_0013",
+ rdesc="06 00 ff 09 00 a1 01 85 fd 06 00 ff 09 01 09 02 09 03 09 04 09 05 09 06 15 00 26 ff 00 75 08 95 06 81 02 85 fe 06 00 ff 09 01 09 02 09 03 09 04 15 00 26 ff 00 75 08 95 04 b1 02 c0 05 01 09 02 a1 01 09 01 a1 00 85 01 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 ff 7f 75 10 95 02 81 02 05 0d 09 33 15 00 26 ff 00 35 00 46 ff 00 75 08 95 01 81 02 05 01 09 38 15 81 25 7f 35 81 45 7f 95 01 81 06 c0 c0 06 00 ff 09 00 a1 01 85 fc 15 00 26 ff 00 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 06 00 ff 09 00 a1 01 85 fb 15 00 26 ff 00 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 85 03 09 55 15 00 25 02 b1 02 c0 09 0e a1 01 85 04 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1FF7, 0x0013),
+ quirks=("NOT_SEEN_MEANS_UP",),
+ )
+
+
+class TestCvtouch_1ff7_0017(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cvtouch_1ff7_0017",
+ rdesc="06 00 ff 09 00 a1 01 85 fd 06 00 ff 09 01 09 02 09 03 09 04 09 05 09 06 15 00 26 ff 00 75 08 95 06 81 02 85 fe 06 00 ff 09 01 09 02 09 03 09 04 15 00 26 ff 00 75 08 95 04 b1 02 c0 05 01 09 02 a1 01 09 01 a1 00 85 01 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 03 05 01 09 30 09 31 15 00 26 ff 0f 35 00 46 ff 0f 75 10 95 02 81 02 09 00 15 00 25 ff 35 00 45 ff 75 08 95 01 81 02 09 38 15 81 25 7f 95 01 81 06 c0 c0 06 00 ff 09 00 a1 01 85 fc 15 00 25 ff 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 06 00 ff 09 00 a1 01 85 fb 15 00 25 ff 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 05 0d 09 54 95 01 75 08 81 02 85 03 09 55 25 02 b1 02 c0 09 0e a1 01 85 04 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1FF7, 0x0017),
+ )
+
+
+class TestCypress_04b4_c001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cypress_04b4_c001",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 04 a1 01 85 02 09 22 09 53 95 01 75 08 81 02 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 09 55 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x04B4, 0xC001),
+ )
+
+
+class TestData_modul_7374_1232(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test data-modul_7374_1232",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 d0 07 26 ff 0f 09 30 81 02 46 40 06 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x7374, 0x1232),
+ )
+
+
+class TestData_modul_7374_1252(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test data-modul_7374_1252",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 d0 07 26 ff 0f 09 30 81 02 46 40 06 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x7374, 0x1252),
+ )
+
+
+class TestE4_2219_044c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test e4_2219_044c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 09 55 b1 02 c0 09 0e a1 01 85 02 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 05 01 09 38 15 81 25 7f 75 08 95 01 81 06 c0 c0",
+ input_info=(BusType.USB, 0x2219, 0x044C),
+ )
+
+
+class TestEgalax_capacitive_0eef_7224(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7224",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7224),
+ quirks=("SLOT_IS_CONTACTID", "ALWAYS_VALID"),
+ )
+
+
+class TestEgalax_capacitive_0eef_72fa(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_72fa",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 72 22 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 87 13 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 72 22 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 87 13 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x72FA),
+ quirks=("SLOT_IS_CONTACTID", "VALID_IS_INRANGE"),
+ )
+
+
+class TestEgalax_capacitive_0eef_7336(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7336",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 c1 20 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c2 18 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 c1 20 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c2 18 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7336),
+ )
+
+
+class TestEgalax_capacitive_0eef_7337(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7337",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 ae 17 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c3 0e 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 ae 17 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c3 0e 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7337),
+ )
+
+
+class TestEgalax_capacitive_0eef_7349(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7349",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7349),
+ quirks=("SLOT_IS_CONTACTID", "ALWAYS_VALID"),
+ )
+
+
+class TestEgalax_capacitive_0eef_73f4(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_73f4",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 96 4e 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 23 2c 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 96 4e 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 23 2c 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x73F4),
+ )
+
+
+class TestEgalax_capacitive_0eef_a001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_a001",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 23 28 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 11 19 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0xA001),
+ quirks=("SLOT_IS_CONTACTID", "VALID_IS_INRANGE"),
+ )
+
+
+class TestElo_touchsystems_04e7_0022(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo-touchsystems_04e7_0022",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 ff 0f 81 02 46 ff 0f 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 46 ff 0f 09 31 81 02 c0 05 0d 09 54 25 10 95 01 75 08 81 02 85 08 09 55 25 02 b1 02 c0 09 0e a1 01 85 07 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 06 00 ff 09 55 85 80 15 00 26 ff 00 75 08 95 01 b1 82 c0 05 01 09 02 a1 01 85 54 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 15 00 26 ff 0f 75 10 95 01 81 02 09 31 75 10 95 01 81 02 09 3b 16 00 00 26 00 01 36 00 00 46 00 01 66 00 00 75 10 95 01 81 62 c0 c0",
+ input_info=(BusType.USB, 0x04E7, 0x0022),
+ )
+
+
+class TestElo_touchsystems_04e7_0080(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo-touchsystems_04e7_0080",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 05 0d 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 c0",
+ input_info=(BusType.USB, 0x04E7, 0x0080),
+ )
+
+
+class TestFlatfrog_25b5_0002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test flatfrog_25b5_0002",
+ rdesc="05 0d 09 04 a1 01 85 05 09 22 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 65 00 55 00 05 0d 55 0c 66 01 10 75 20 95 01 27 ff ff ff 7f 45 00 09 56 81 02 75 08 95 01 15 00 25 28 09 54 81 02 09 55 85 06 25 28 b1 02 c0 65 00 55 00 45 00 09 0e a1 01 85 03 09 23 a1 02 09 52 15 02 25 02 75 08 95 01 b1 02 09 53 15 00 25 0a 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x25B5, 0x0002),
+ quirks=("NOT_SEEN_MEANS_UP", "NO_AREA"),
+ max_contacts=40,
+ )
+
+
+class TestFocaltech_10c4_81b9(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test focaltech_10c4_81b9",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 02 09 55 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x10C4, 0x81B9),
+ quirks=("ALWAYS_VALID",),
+ max_contacts=5,
+ )
+
+
+class TestHanvon_20b3_0a18(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test hanvon_20b3_0a18",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x20B3, 0x0A18),
+ )
+
+
+class TestHuitoo_03f7_0003(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test huitoo_03f7_0003",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 09 55 b1 02 c0 09 0e a1 01 85 02 09 23 a1 02 09 52 09 53 15 00 25 10 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 00 26 ff 0f 35 00 46 ff 0f 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 3f 09 02 81 02 95 3f 09 02 91 02 c0",
+ input_info=(BusType.USB, 0x03F7, 0x0003),
+ )
+
+
+class TestIdeacom_1cb6_6650(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ideacom_1cb6_6650",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 26 ff 1f 75 10 95 01 55 0d 65 33 09 31 35 00 46 61 13 81 02 09 30 46 73 22 81 02 05 0d 75 08 95 01 09 30 26 ff 00 81 02 09 51 81 02 85 0c 09 55 25 02 95 01 b1 02 c0 06 00 ff 85 02 09 01 75 08 95 07 b1 02 85 03 09 02 75 08 95 07 b1 02 85 04 09 03 75 08 95 07 b1 02 85 05 09 04 75 08 95 07 b1 02 85 06 09 05 75 08 96 27 00 b1 02 85 07 09 06 75 08 96 27 00 b1 02 85 08 09 07 75 08 95 07 b1 02 85 09 09 08 75 08 95 07 b1 02 85 0b 09 09 75 08 96 07 00 b1 02 85 0d 09 0a 75 08 96 27 00 b1 02 c0 09 0e a1 01 85 0e 09 52 09 53 95 07 b1 02 c0 05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 75 06 95 01 81 01 05 01 09 31 09 30 15 00 27 ff 1f 00 00 75 10 95 02 81 02 c0 09 01 a1 02 15 00 26 ff 00 95 02 75 08 81 03 c0 c0",
+ input_info=(BusType.USB, 0x1CB6, 0x6650),
+ )
+
+
+class TestIdeacom_1cb6_6651(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ideacom_1cb6_6651",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 02 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 26 ff 1f 75 10 95 01 55 0d 65 33 09 31 35 00 46 39 13 81 02 09 30 46 24 22 81 02 05 0d 75 08 95 01 09 30 26 ff 00 81 02 09 51 81 02 85 0c 09 55 25 02 95 01 b1 02 c0 06 00 ff 85 02 09 01 75 08 95 07 b1 02 85 03 09 02 75 08 95 07 b1 02 85 04 09 03 75 08 95 07 b1 02 85 05 09 04 75 08 95 07 b1 02 85 06 09 05 75 08 95 1f b1 02 85 07 09 06 75 08 96 1f 00 b1 02 85 08 09 07 75 08 95 07 b1 02 85 09 09 08 75 08 95 07 b1 02 85 0b 09 09 75 08 95 07 b1 02 85 0d 09 0a 75 08 96 1f 00 b1 02 c0 09 0e a1 01 85 0e 09 52 09 53 95 07 b1 02 c0 05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 75 06 95 01 81 01 05 01 09 31 09 30 15 00 27 ff 1f 00 00 75 10 95 02 81 02 c0 09 01 a1 02 15 00 26 ff 00 95 02 75 08 81 03 c0 c0",
+ input_info=(BusType.USB, 0x1CB6, 0x6651),
+ )
+
+
+class TestIkaist_2793_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ikaist_2793_0001",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 02 81 03 05 0d 85 12 09 55 95 01 75 08 15 00 25 3c b1 02 06 00 ff 15 00 26 ff 00 85 1e 09 01 75 08 95 80 b1 02 85 1f 09 01 75 08 96 3f 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2793, 0x0001),
+ )
+
+
+class TestIrmtouch_23c9_5666(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irmtouch_23c9_5666",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 0c 09 23 a1 02 09 52 15 00 25 06 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x23C9, 0x5666),
+ )
+
+
+class TestIrtouch_6615_0070(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irtouch_6615_0070",
+ rdesc="05 01 09 02 a1 01 85 10 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 30 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 05 0d 09 54 15 00 26 02 00 75 08 95 01 81 02 85 03 09 55 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 05 0d 09 02 a1 01 85 20 09 20 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 85 01 06 00 ff 09 01 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x6615, 0x0070),
+ )
+
+
+class TestIrtouch_6615_0081(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irtouch_6615_0081",
+ rdesc="05 0d 09 04 a1 01 85 30 09 22 09 00 15 00 26 ff 00 75 08 95 05 81 02 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 54 15 00 25 1f 75 05 95 01 81 02 09 00 15 00 25 07 75 03 95 01 81 02 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 09 55 85 03 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 06 00 ff 09 00 a1 01 09 02 a1 00 85 aa 09 06 15 00 26 ff 00 35 00 46 ff 00 75 08 95 3f b1 02 c0 c0 05 01 09 02 a1 01 85 10 09 01 a1 00 05 01 09 00 15 00 26 ff 00 75 08 95 05 81 02 09 30 09 31 09 32 15 00 26 ff 7f 75 10 95 03 81 02 05 09 19 01 29 08 15 00 25 01 95 08 75 01 81 02 09 00 15 00 26 ff 00 75 08 95 02 81 02 c0 c0 06 00 ff 09 00 a1 01 85 40 09 00 15 00 26 ff 00 75 08 95 2e 81 02 c0",
+ input_info=(BusType.USB, 0x6615, 0x0081),
+ )
+
+
+class TestLG_043e_9aa1(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_043e_9aa1",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 25 0a 09 55 b1 02 c0 09 0e a1 01 85 03 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 04 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 75 10 95 01 15 00 26 7f 07 81 02 09 31 26 37 04 81 02 c0 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 75 08 95 19 09 01 b1 02 c0 05 14 09 2b a1 02 85 07 09 2b 15 00 25 0a 75 08 95 40 b1 02 09 4b 15 00 25 0a 75 08 95 02 91 02 c0 05 14 09 2c a1 02 85 08 09 2b 15 00 25 0a 75 08 95 05 81 02 09 4b 15 00 25 0a 75 08 95 47 91 02 c0",
+ input_info=(BusType.USB, 0x043E, 0x9AA1),
+ )
+
+
+class TestLG_043e_9aa3(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_043e_9aa3",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 25 0a 09 55 b1 02 c0 09 0e a1 01 85 03 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 04 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 75 10 95 01 15 00 26 7f 07 81 02 09 31 26 37 04 81 02 c0 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 75 08 95 19 09 01 b1 02 c0 05 14 09 2b a1 02 85 07 09 2b 15 00 25 0a 75 08 95 40 b1 02 09 4b 15 00 25 0a 75 08 95 02 91 02 c0 05 14 09 2c a1 02 85 08 09 2b 15 00 25 0a 75 08 95 05 81 02 09 4b 15 00 25 0a 75 08 95 47 91 02 c0",
+ input_info=(BusType.USB, 0x043E, 0x9AA3),
+ )
+
+
+class TestLG_1fd2_0064(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_1fd2_0064",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 a1 00 05 01 26 80 07 75 10 55 0e 65 33 09 30 35 00 46 53 07 81 02 26 38 04 46 20 04 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 a1 00 05 01 26 80 07 75 10 55 0e 65 33 09 30 35 00 46 53 07 81 02 26 38 04 46 20 04 09 31 81 02 45 00 c0 c0 05 0d 09 54 95 01 75 08 81 02 85 08 09 55 95 01 25 02 b1 02 c0 09 0e a1 01 85 07 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 75 10 95 02 15 00 26 ff 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x1FD2, 0x0064),
+ )
+
+
+class TestLumio_202e_0006(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lumio_202e_0006",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 05 0d 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 c0",
+ input_info=(BusType.USB, 0x202E, 0x0006),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestLumio_202e_0007(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lumio_202e_0007",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 0a 81 03 05 01 26 ff 7f 65 11 55 0e 46 ba 0e 75 10 95 01 09 30 81 02 09 31 46 ea 0b 81 02 05 0d 09 51 75 10 95 01 81 02 09 55 15 00 25 08 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x202E, 0x0007),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestNexio_1870_0100(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_0100",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 40 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0100),
+ )
+
+
+class TestNexio_1870_010d(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_010d",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x010D),
+ )
+
+
+class TestNexio_1870_0119(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_0119",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0119),
+ )
+
+
+class TestPenmount_14e1_3500(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test penmount_14e1_3500",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 00 09 51 15 00 25 0f 75 04 95 01 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 81 01 05 01 75 10 95 01 09 30 26 ff 07 81 02 09 31 26 ff 07 81 02 05 0d 09 55 75 08 95 05 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x14E1, 0x3500),
+ quirks=("VALID_IS_CONFIDENCE",),
+ max_contacts=10,
+ )
+
+
+class TestPixart_093a_8002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test pixart_093a_8002",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 5a 14 26 ff 7f 09 30 81 22 46 72 0b 26 ff 7f 09 31 81 22 95 08 75 08 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 5a 14 26 ff 7f 81 02 09 31 46 72 0b 26 ff 7f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 5a 14 26 ff 7f 81 02 46 72 0b 26 ff 7f 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x093A, 0x8002),
+ quirks=("VALID_IS_INRANGE", "SLOT_IS_CONTACTNUMBER"),
+ )
+
+
+class TestPqlabs_1ef1_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test pqlabs_1ef1_0001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 3f 81 02 c0 c0 05 8c 09 07 a1 01 85 11 09 02 15 00 26 ff 00 75 08 95 3f 81 02 85 10 09 10 91 02 c0",
+ input_info=(BusType.USB, 0x1EF1, 0x0001),
+ )
+
+
+class TestQuanta_0408_3000(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3000",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 e3 13 26 7f 07 81 02 09 31 46 2f 0b 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 e3 13 26 7f 07 81 02 46 2f 0b 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 06 00 ff 09 01 26 ff 00 75 08 95 2f 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 c0",
+ input_info=(BusType.USB, 0x0408, 0x3000),
+ )
+
+
+class TestQuanta_0408_3001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 0f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 c0",
+ input_info=(BusType.USB, 0x0408, 0x3001),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID"),
+ )
+
+
+class TestQuanta_0408_3008_1(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3008_1",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 4c 11 26 7f 07 09 30 81 22 46 bb 09 26 37 04 09 31 81 22 95 08 75 08 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 4c 11 26 7f 07 81 02 09 31 46 bb 09 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 4c 11 26 7f 07 81 02 46 bb 09 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0408, 0x3008),
+ )
+
+
+class TestQuanta_0408_3008(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3008",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 98 12 26 7f 07 09 30 81 22 46 78 0a 26 37 04 09 31 81 22 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 09 31 46 78 0a 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 46 78 0a 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0408, 0x3008),
+ )
+
+
+class TestRafi_05bd_0107(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rafi_05bd_0107",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 65 00 55 00 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 09 81 02 05 0d 85 02 95 01 75 08 09 55 25 0a b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 05 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 65 11 55 0f 09 30 26 ff 03 35 00 46 9c 01 75 10 95 01 81 02 09 31 26 ff 03 35 00 46 e7 00 81 02 c0 c0",
+ input_info=(BusType.USB, 0x05BD, 0x0107),
+ )
+
+
+class TestRndplus_2512_5003(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rndplus_2512_5003",
+ rdesc="05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 08 09 55 b1 02 c0 09 0e a1 01 85 07 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 16 00 00 26 ff 3f 36 00 00 46 ff 3f 66 00 00 75 10 95 02 81 62 c0 c0",
+ input_info=(BusType.USB, 0x2512, 0x5003),
+ )
+
+
+class TestRndplus_2512_5004(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rndplus_2512_5004",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 05 09 55 b1 02 c0 09 0e a1 01 85 06 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 16 00 00 26 ff 3f 36 00 00 46 ff 3f 66 00 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 01 09 01 15 00 26 ff 00 75 08 95 3f 82 00 01 85 02 09 01 15 00 26 ff 00 75 08 95 3f 92 00 01 c0",
+ input_info=(BusType.USB, 0x2512, 0x5004),
+ )
+
+
+class TestSitronix_1403_5001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test sitronix_1403_5001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 54 95 01 75 08 81 02 09 22 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 05 01 26 90 04 75 0c 95 01 55 0f 65 11 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 04 09 48 09 49 81 02 c0 85 02 09 55 26 ff 00 75 08 95 01 b1 02 09 04 15 00 25 ff 75 08 95 07 91 02 c0 09 0e a1 01 85 03 09 23 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1403, 0x5001),
+ max_contacts=10,
+ )
+
+
+class TestSmart_0b8c_0092(BaseTest.TestMultitouch):
+ def create_device(self):
+ return SmartTechDigitizer(
+ "uhid test smart_0b8c_0092", input_info=(BusType.USB, 0x0B8C, 0x0092)
+ )
+
+
+class TestStantum_1f87_0002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test stantum_1f87_0002",
+ rdesc="05 0d 09 04 a1 01 85 03 05 0d 09 54 95 01 75 08 81 02 06 00 ff 75 02 09 01 81 01 75 0e 09 02 81 02 05 0d 09 22 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 85 08 05 0d 09 55 95 01 75 08 25 0a b1 02 c0",
+ input_info=(BusType.USB, 0x1F87, 0x0002),
+ )
+
+
+class TestTopseed_1784_0016(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test topseed_1784_0016",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 04 75 10 55 00 65 00 09 30 35 00 46 ff 04 81 02 09 31 46 ff 04 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 09 55 b1 02 c0 05 0c 09 01 a1 01 85 03 a1 02 09 b5 15 00 25 01 75 01 95 01 81 02 09 b6 81 02 09 b7 81 02 09 cd 81 02 09 e2 81 02 09 e9 81 02 09 ea 81 02 05 01 09 82 81 02 c0 c0",
+ input_info=(BusType.USB, 0x1784, 0x0016),
+ max_contacts=2,
+ )
+
+
+class TestTpv_25aa_8883(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test tpv_25aa_8883",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 05 0d 09 32 95 01 75 01 81 02 95 01 75 05 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 98 12 26 7f 07 09 30 81 22 46 78 0a 26 37 04 09 31 81 22 35 00 45 00 15 81 25 7f 75 08 95 01 09 38 81 06 09 00 75 08 95 07 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 09 31 46 78 0a 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 46 78 0a 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x25AA, 0x8883),
+ )
+
+
+class TestTrs_star_238f_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test trs-star_238f_0001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 95 01 81 03 09 37 95 01 81 03 95 01 81 03 15 00 25 0f 75 04 09 51 95 01 81 02 09 54 95 01 81 02 09 55 95 01 81 02 05 01 26 ff 03 15 00 75 10 65 00 09 30 95 01 81 02 09 31 81 02 c0 05 0d 09 0e 85 02 09 23 a1 02 15 00 25 0a 09 52 75 08 95 01 b1 02 09 53 95 01 b1 02 09 55 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x238F, 0x0001),
+ )
+
+
+class TestUnitec_227d_0103(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test unitec_227d_0103",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 03 09 55 25 05 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 53 15 00 25 05 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x227D, 0x0103),
+ )
+
+
+class TestZytronic_14c8_0005(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test zytronic_14c8_0005",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 95 01 81 02 95 06 81 01 05 01 26 00 10 75 10 95 01 65 00 09 30 81 02 09 31 46 00 10 81 02 05 0d 09 51 26 ff 00 75 08 95 01 81 02 c0 85 02 09 55 15 00 25 08 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 03 a1 02 09 23 09 52 09 53 15 00 25 08 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 15 00 26 00 10 35 00 46 00 10 65 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 05 09 00 15 00 26 ff 00 75 08 95 3f b1 02 c0 06 00 ff 09 01 a1 01 85 06 09 00 15 00 26 ff 00 75 08 95 3f 81 02 c0",
+ input_info=(BusType.USB, 0x14C8, 0x0005),
+ )
+
+
+class TestZytronic_14c8_0006(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test zytronic_14c8_0006",
+ rdesc="05 0d 09 04 a1 01 85 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 05 0d 85 02 09 55 95 01 75 08 15 00 25 3c b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 15 00 26 00 10 35 00 46 00 10 65 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 05 09 00 15 00 26 ff 00 75 08 95 3f b1 02 c0 06 00 ff 09 01 a1 01 85 06 09 00 15 00 26 ff 00 75 08 95 3f 81 02 c0",
+ input_info=(BusType.USB, 0x14C8, 0x0006),
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices
+#
+################################################################################
+
+
+class TestMinWin8TSParallelTriple(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSParallel(3)
+
+
+class TestMinWin8TSParallel(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSParallel(10)
+
+
+class TestMinWin8TSHybrid(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSHybrid()
+
+
+class TestWin8TSConfidence(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Win8TSConfidence(5)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_mt_confidence_bad_release(self):
+ """Check for the validity of the confidence bit.
+ When a contact is marked as not confident, it should be detected
+ as a palm from the kernel POV and released.
+
+ Note: if the kernel exports ABS_MT_TOOL_TYPE, it shouldn't release
+ the touch but instead convert it to ABS_MT_TOOL_PALM."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ t0.confidence = False
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ if evdev.absinfo[libevdev.EV_ABS.ABS_MT_TOOL_TYPE] is not None:
+ # the kernel exports MT_TOOL_PALM
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TOOL_TYPE, 2) in events
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_mt_confidence_bad_multi_release(self):
+ """Check for the sticky finger being properly detected.
+
+ We first inject 3 fingers, then release only the second.
+ After 100 ms, we should receive a generated event about the
+ 2 missing fingers being released.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # send 3 touches
+ t0 = Touch(1, 50, 10)
+ t1 = Touch(2, 150, 100)
+ t2 = Touch(3, 250, 200)
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # release the second
+ t1.tipswitch = False
+ r = uhdev.event([t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # only the second is released
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+ # wait for the timer to kick in
+ time.sleep(0.2)
+
+ events = uhdev.next_sync_events()
+ self.debug_reports([], uhdev, events)
+
+ # now all 3 fingers are released
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestElanXPS9360(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ElanXPS9360",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class TestTouchpadXPS9360(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test TouchpadXPS9360",
+ max_contacts=5,
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class TestSurfaceBook2(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test SurfaceBook2",
+ max_contacts=5,
+ rdesc="05 01 09 06 A1 01 85 01 14 25 01 75 01 95 08 05 07 19 E0 29 E7 81 02 75 08 95 0A 18 29 91 26 FF 00 80 05 0C 0A C0 02 A1 02 1A C1 02 2A C6 02 95 06 B1 03 C0 05 08 19 01 29 03 75 01 95 03 25 01 91 02 95 05 91 01 C0 05 01 09 02 A1 01 85 02 05 09 19 01 29 05 81 02 95 01 75 03 81 03 15 81 25 7F 75 08 95 02 05 01 09 30 09 31 81 06 A1 02 09 48 14 25 01 35 01 45 10 75 02 95 01 A4 B1 02 09 38 15 81 25 7F 34 44 75 08 81 06 C0 A1 02 09 48 B4 B1 02 34 44 75 04 B1 03 05 0C 0A 38 02 15 81 25 7F 75 08 81 06 C0 C0 05 0C 09 01 A1 01 85 03 75 10 14 26 FF 03 18 2A FF 03 80 C0 06 05 FF 09 01 A1 01 85 0D 25 FF 95 02 75 08 09 20 81 02 09 22 91 02 15 81 25 7F 95 20 75 08 09 21 81 02 09 23 91 02 C0 09 02 A1 01 85 0C 14 25 FF 95 01 08 91 02 C0 05 0D 09 05 A1 01 85 04 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 C0 05 0D 55 0C 66 01 10 47 FF FF 00 00 27 FF FF 00 00 09 56 81 02 09 54 25 7F 75 08 81 02 05 09 09 01 25 01 75 01 81 02 95 07 81 03 05 0D 85 04 09 55 09 59 75 04 95 02 25 0F B1 02 06 00 FF 09 C6 85 05 14 25 08 75 08 95 01 B1 02 09 C7 26 FF 00 75 08 95 20 B1 02 C0 05 0D 09 0E A1 01 85 07 09 22 A1 02 09 52 14 25 0A 75 08 95 01 B1 02 C0 09 22 A0 85 08 09 57 09 58 75 01 95 02 25 01 B1 02 95 06 B1 03 C0 C0 06 07 FF 09 01 A1 01 85 0A 09 02 26 FF 00 75 08 95 14 91 02 85 09 09 03 91 02 85 0A 09 04 95 26 81 02 85 09 09 05 81 02 85 09 09 06 95 01 B1 02 85 0B 09 07 B1 02 C0 06 05 FF 09 04 A1 01 85 0E 09 31 91 02 09 31 81 03 09 30 91 02 09 30 81 02 95 39 09 32 92 02 01 09 32 82 02 01 C0 06 05 FF 09 50 A1 01 85 20 14 25 FF 75 08 95 3C 09 60 82 02 01 09 61 92 02 01 09 62 B2 02 01 85 21 09 63 82 02 01 09 64 92 02 01 09 65 B2 02 01 85 22 25 FF 75 20 95 04 19 66 29 69 81 02 19 6A 29 6D 91 02 19 6E 29 71 B1 02 85 23 19 72 29 75 81 02 19 76 29 79 91 02 19 7A 29 7D B1 02 85 24 19 7E 29 81 81 02 19 82 29 85 91 02 19 86 29 89 B1 02 85 25 19 8A 29 8D 81 02 19 8E 29 91 91 02 19 92 29 95 B1 02 85 26 19 96 29 99 81 02 19 9A 29 9D 91 02 19 9E 29 A1 B1 02 85 27 19 A2 29 A5 81 02 19 A6 29 A9 91 02 19 AA 29 AD B1 02 85 28 19 AE 29 B1 81 02 19 B2 29 B5 91 02 19 B6 29 B9 B1 02 85 29 19 BA 29 BD 81 02 19 BE 29 C1 91 02 19 C2 29 C5 B1 02 C0 06 00 FF 0A 00 F9 A1 01 85 32 75 10 95 02 14 27 FF FF 00 00 0A 01 F9 B1 02 75 20 95 01 25 FF 0A 02 F9 B1 02 75 08 95 08 26 FF 00 0A 03 F9 B2 02 01 95 10 0A 04 F9 B2 02 01 0A 05 F9 B2 02 01 95 01 75 10 27 FF FF 00 00 0A 06 F9 81 02 C0",
+ )
+
+
+class Test3m_0596_051c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_051c",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 ff 7f 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 14 81 02 05 0d 55 0c 66 01 10 35 00 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 05 0d 09 55 85 12 15 00 25 14 75 08 95 01 b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 73 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 85 0f 09 01 75 08 96 07 02 b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_04e8_2084(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_04e8_2084",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_2306(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_2306",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_230a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_230a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_231c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_231c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_2703(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_2703",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_270b(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_270b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2575_0204(BaseTest.TestWin8Multitouch):
+ """found on the Dell Canvas 27"""
+
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2575_0204",
+ rdesc="05 0d 09 04 a1 01 85 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 42 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 c0 05 01 09 0e a1 01 85 05 05 01 09 08 a1 00 09 30 55 0e 65 11 15 00 26 ff 7f 35 00 46 4f 17 75 10 95 01 81 42 09 31 46 1d 0d 81 42 06 00 ff 09 01 75 20 81 03 05 01 09 37 55 00 65 14 16 98 fe 26 68 01 36 98 fe 46 68 01 75 0f 81 06 05 09 09 01 65 00 15 00 25 01 35 00 45 00 75 01 81 02 05 0d 09 42 81 02 09 51 75 07 25 7f 81 02 05 0d 09 48 55 0e 65 11 15 00 26 ff 7f 35 00 46 ff 7f 75 10 81 02 09 49 81 02 09 3f 55 00 65 14 15 00 26 67 01 35 00 46 67 01 81 0a c0 65 00 35 00 45 00 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 05 75 08 09 54 81 02 85 47 09 55 25 05 b1 02 c0 06 00 ff 09 04 a1 01 85 f0 09 01 75 08 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 85 c0 09 01 95 03 b1 02 85 c2 09 01 95 0f b1 02 85 c4 09 01 95 3e b1 02 85 c5 09 01 95 7e b1 02 85 c6 09 01 95 fe b1 02 85 c8 09 01 96 fe 03 b1 02 85 0a 09 01 95 3f b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2619_5610(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2619_5610",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 c0",
+ )
+
+
+class Testatmel_03eb_8409(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_8409",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 00 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 46 18 06 26 77 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testatmel_03eb_840b(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_840b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 00 0a 26 ff 0f 09 30 81 02 46 a0 05 26 ff 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testdell_044e_1220(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test dell_044e_1220",
+ type="pressurepad",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 09 38 95 01 81 06 05 0c 0a 38 02 81 06 c0 c0 05 0d 09 05 a1 01 85 08 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 af 04 75 10 55 0e 65 11 09 30 35 00 46 e8 03 95 01 81 02 26 7b 02 46 12 02 09 31 81 02 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 05 0d 09 56 81 02 09 54 25 05 95 01 75 08 81 02 05 09 19 01 29 03 25 01 75 01 95 03 81 02 95 05 81 03 05 0d 85 09 09 55 75 08 95 01 25 05 b1 02 06 00 ff 85 0a 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 01 ff 09 01 a1 01 85 03 09 01 15 00 26 ff 00 95 1b 81 02 85 04 09 02 95 50 81 02 85 05 09 03 95 07 b1 02 85 06 09 04 81 02 c0 06 02 ff 09 01 a1 01 85 07 09 02 95 86 75 08 b1 02 c0 05 0d 09 0e a1 01 85 0b 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 0c 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0",
+ )
+
+
+class Testdell_06cb_75db(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test dell_06cb_75db",
+ max_contacts=3,
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 1a 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 1a 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class Testegalax_capacitive_0eef_790a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax_capacitive_0eef_790a",
+ max_contacts=10,
+ rdesc="05 0d 09 04 a1 01 85 06 05 0d 09 54 75 08 15 00 25 0c 95 01 81 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 17 00 00 00 00 27 ff ff ff 7f 75 20 95 01 55 00 65 00 09 56 81 02 09 55 09 53 75 08 95 02 26 ff 00 b1 02 06 00 ff 09 c5 85 07 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 01 a1 01 85 01 09 01 a1 02 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 0e a1 01 85 05 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ )
+
+
+class Testelan_04f3_000a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_000a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_000c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_000c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_010c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_010c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_0125(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_0125",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_016f(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_016f",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_0732(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_0732",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff 00 00 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 25 ff 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0",
+ )
+
+
+class Testelan_04f3_200a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_200a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff 00 00 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 0e 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ )
+
+
+class Testelan_04f3_300b(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_300b",
+ max_contacts=3,
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 09 38 15 81 25 7f 75 08 95 03 81 06 05 0c 0a 38 02 95 01 81 06 75 08 95 03 81 03 c0 06 00 ff 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 76 02 75 08 b1 02 85 0b 09 c7 95 42 75 08 b1 02 09 01 85 5d 95 1f 75 08 81 06 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 02 25 02 09 51 81 02 75 01 95 04 81 03 05 01 15 00 26 a7 0c 75 10 55 0e 65 13 09 30 35 00 46 9d 01 95 01 81 02 46 25 01 26 2b 09 26 2b 09 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 08 95 02 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 15 00 75 01 95 02 25 03 b1 02 95 0e b1 03 c0 c0",
+ )
+
+
+class Testelan_04f3_3045(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_3045",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 09 38 15 81 25 7f 75 08 95 03 81 06 05 0c 0a 38 02 95 01 81 06 75 08 95 03 81 03 c0 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 75 01 95 02 81 03 95 01 75 04 25 0f 09 51 81 02 05 01 15 00 26 80 0c 75 10 55 0e 65 13 09 30 35 00 46 90 01 95 01 81 02 46 13 01 26 96 08 26 96 08 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 09 c5 75 08 95 04 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 8a 02 75 08 b1 02 85 0b 09 c7 95 80 75 08 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 08 95 02 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 15 00 75 01 95 02 25 03 b1 02 95 0e b1 03 c0 c0",
+ )
+
+
+class Testelan_04f3_313a(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_313a",
+ type="touchpad",
+ input_info=(BusType.I2C, 0x04F3, 0x313A),
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 75 08 95 05 81 03 c0 06 00 ff 09 01 85 0e 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0a 09 c6 15 00 26 ff 00 75 08 95 04 b1 02 c0 06 00 ff 09 01 a1 01 85 5c 09 01 95 0b 75 08 81 06 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 80 03 75 08 b1 02 85 0b 09 c7 95 82 75 08 b1 02 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 05 09 09 02 09 03 15 00 25 01 75 01 95 02 81 02 05 0d 95 01 75 04 25 0f 09 51 81 02 05 01 15 00 26 d7 0e 75 10 55 0d 65 11 09 30 35 00 46 44 2f 95 01 81 02 46 12 16 26 eb 06 26 eb 06 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 25 01 75 01 95 08 81 03 09 c5 75 08 95 02 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 10 95 01 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 75 01 95 02 25 01 b1 02 95 0e b1 03 c0 c0 05 01 09 02 a1 01 85 2a 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 35 81 45 7f 55 00 65 13 75 08 95 02 81 06 75 08 95 05 81 03 c0 c0",
+ )
+
+
+class Testelo_04e7_0080(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo_04e7_0080",
+ rdesc="05 0d 09 04 a1 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ )
+
+
+class Testilitek_222a_0015(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ilitek_222a_0015",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 02 09 55 25 0a b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 03 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ )
+
+
+class Testilitek_222a_001c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ilitek_222a_001c",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 02 09 55 25 0a b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 03 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ )
+
+
+class Testite_06cb_2968(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test ite_06cb_2968",
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 1b 04 75 10 55 0e 65 11 09 30 35 00 46 6c 03 95 01 81 02 46 db 01 26 3b 02 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 1a 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 1a 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ max_contacts=5,
+ input_info=(0x3, 0x06CB, 0x2968),
+ )
+
+
+class Testn_trig_1b96_0c01(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0c01",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c03(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0c03",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f00(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0f00",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f04(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0f04",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_1000(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_1000",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testsharp_04dd_9681(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test sharp_04dd_9681",
+ rdesc="06 00 ff 09 01 a1 01 75 08 26 ff 00 15 00 85 06 95 3f 09 01 91 02 85 05 95 3f 09 01 81 02 c0 05 0d 09 04 a1 01 85 81 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 56 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 81 02 09 54 95 01 75 08 15 00 25 0a 81 02 85 84 09 55 b1 02 85 87 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 09 0e a1 01 85 83 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 80 05 09 19 01 29 01 15 00 25 01 95 01 75 01 81 02 95 01 75 07 81 01 05 01 65 11 55 0f 09 30 26 80 07 35 00 46 66 00 75 10 95 01 81 02 09 31 26 38 04 35 00 46 4d 00 81 02 c0 c0",
+ )
+
+
+class Testsipodev_0603_0002(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test sipodev_0603_0002",
+ type="clickpad",
+ rdesc="05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 80 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 75 01 95 02 81 03 95 01 75 04 25 05 09 51 81 02 05 01 15 00 26 44 0a 75 0c 55 0e 65 11 09 30 35 00 46 ac 03 95 01 81 02 46 fe 01 26 34 05 75 0c 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 0a 95 01 75 04 81 02 75 01 95 03 81 03 05 09 09 01 25 01 75 01 95 01 81 02 05 0d 85 0a 09 55 09 59 75 04 95 02 25 0f b1 02 85 0b 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 09 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 06 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 07 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 05 01 09 0c a1 01 85 08 15 00 25 01 09 c6 75 01 95 01 81 06 75 07 81 03 c0 05 01 09 80 a1 01 85 01 15 00 25 01 75 01 0a 81 00 0a 82 00 0a 83 00 95 03 81 06 95 05 81 01 c0 06 0c 00 09 01 a1 01 85 02 25 01 15 00 75 01 0a b5 00 0a b6 00 0a b7 00 0a cd 00 0a e2 00 0a a2 00 0a e9 00 0a ea 00 95 08 81 02 0a 83 01 0a 6f 00 0a 70 00 0a 88 01 0a 8a 01 0a 92 01 0a a8 02 0a 24 02 95 08 81 02 0a 21 02 0a 23 02 0a 96 01 0a 25 02 0a 26 02 0a 27 02 0a 23 02 0a b1 02 95 08 81 02 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 19 01 29 02 75 08 95 05 b1 02 c0",
+ )
+
+
+class Testsynaptics_06cb_1d10(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test synaptics_06cb_1d10",
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 75 08 95 02 15 81 25 7f 35 81 45 7f 55 0e 65 11 81 06 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 95 01 75 08 15 00 25 0f 81 02 85 08 09 55 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 3f 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 05 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 01 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class Testsynaptics_06cb_ce08(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test synaptics_06cb_ce08",
+ max_contacts=5,
+ physical="Vendor Usage 1",
+ input_info=(BusType.I2C, 0x06CB, 0xCE08),
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+class Testsynaptics_06cb_ce26(TestWin8TSConfidence):
+ def create_device(self):
+ return PTP(
+ "uhid test synaptics_06cb_ce26",
+ max_contacts=5,
+ input_info=(BusType.I2C, 0x06CB, 0xCE26),
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py
new file mode 100644
index 000000000000..7fd3a8e6137d
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_sony.py
@@ -0,0 +1,343 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2020 Red Hat, Inc.
+#
+
+from .base import application_matches
+from .base import KernelModule
+from .test_gamepad import BaseTest
+from hidtools.device.sony_gamepad import (
+ PS3Controller,
+ PS4ControllerBluetooth,
+ PS4ControllerUSB,
+ PS5ControllerBluetooth,
+ PS5ControllerUSB,
+ PSTouchPoint,
+)
+from hidtools.util import BusType
+
+import libevdev
+import logging
+import pytest
+
+logger = logging.getLogger("hidtools.test.sony")
+
+PS3_MODULE = KernelModule("sony", "hid_sony")
+PS4_MODULE = KernelModule("playstation", "hid_playstation")
+PS5_MODULE = KernelModule("playstation", "hid_playstation")
+
+
+class SonyBaseTest:
+ class SonyTest(BaseTest.TestGamepad):
+ pass
+
+ class SonyPS4ControllerTest(SonyTest):
+ kernel_modules = [PS4_MODULE]
+
+ def test_accelerometer(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Accelerometer")
+
+ for x in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(x, None, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_X]
+ # Check against range due to small loss in precision due
+ # to inverse calibration, followed by calibration by hid-sony.
+ assert x - 1 <= value <= x + 1
+
+ for y in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(None, y, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_Y]
+ assert y - 1 <= value <= y + 1
+
+ for z in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(None, None, z))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Z) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_Z]
+ assert z - 1 <= value <= z + 1
+
+ def test_gyroscope(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Accelerometer")
+
+ for rx in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(rx, None, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RX) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RX]
+ # Sensor internal value is 16-bit, but calibrated is 22-bit, so
+ # 6-bit (64) difference, so allow a range of +/- 64.
+ assert rx - 64 <= value <= rx + 64
+
+ for ry in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(None, ry, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RY) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RY]
+ assert ry - 64 <= value <= ry + 64
+
+ for rz in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(None, None, rz))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RZ) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RZ]
+ assert rz - 64 <= value <= rz + 64
+
+ def test_battery(self):
+ uhdev = self.uhdev
+
+ assert uhdev.power_supply_class is not None
+
+ # DS4 capacity levels are in increments of 10.
+ # Battery is never below 5%.
+ for i in range(5, 105, 10):
+ uhdev.battery.capacity = i
+ uhdev.event()
+ assert uhdev.power_supply_class.capacity == i
+
+ # Discharging tests only make sense for BlueTooth.
+ if uhdev.bus == BusType.BLUETOOTH:
+ uhdev.battery.cable_connected = False
+ uhdev.battery.capacity = 45
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Discharging"
+
+ uhdev.battery.cable_connected = True
+ uhdev.battery.capacity = 5
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Charging"
+
+ uhdev.battery.capacity = 100
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Charging"
+
+ uhdev.battery.full = True
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Full"
+
+ def test_mt_single_touch(self):
+ """send a single touch in the first slot of the device,
+ and release it."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Touch Pad")
+
+ t0 = PSTouchPoint(1, 50, 100)
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_dual_touch(self):
+ """Send 2 touches in the first 2 slots.
+ Make sure the kernel sees this as a dual touch.
+ Release and check
+
+ Note: PTP will send here BTN_DOUBLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Touch Pad")
+
+ t0 = PSTouchPoint(1, 50, 100)
+ t1 = PSTouchPoint(2, 150, 200)
+
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event(touch=[t0, t1])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X, 5) not in events
+ )
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y, 10) not in events
+ )
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+
+ t0.tipswitch = False
+ r = uhdev.event(touch=[t0, t1])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X) not in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y) not in events
+
+ t1.tipswitch = False
+ r = uhdev.event(touch=[t1])
+
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestPS3Controller(SonyBaseTest.SonyTest):
+ kernel_modules = [PS3_MODULE]
+
+ def create_device(self):
+ controller = PS3Controller()
+ controller.application_matches = application_matches
+ return controller
+
+ @pytest.fixture(autouse=True)
+ def start_controller(self):
+ # emulate a 'PS' button press to tell the kernel we are ready to accept events
+ self.assert_button(17)
+
+ # drain any remaining udev events
+ while self.uhdev.dispatch(10):
+ pass
+
+ def test_led(self):
+ for k, v in self.uhdev.led_classes.items():
+ # the kernel might have set a LED for us
+ logger.info(f"{k}: {v.brightness}")
+
+ idx = int(k[-1]) - 1
+ assert self.uhdev.hw_leds.get_led(idx)[0] == bool(v.brightness)
+
+ v.brightness = 0
+ self.uhdev.dispatch(10)
+ assert self.uhdev.hw_leds.get_led(idx)[0] is False
+
+ v.brightness = v.max_brightness
+ self.uhdev.dispatch(10)
+ assert self.uhdev.hw_leds.get_led(idx)[0]
+
+
+class CalibratedPS4Controller(object):
+ # DS4 reports uncalibrated sensor data. Calibration coefficients
+ # can be retrieved using a feature report (0x2 USB / 0x5 BT).
+ # The values below are the processed calibration values for the
+ # DS4s matching the feature reports of PS4ControllerBluetooth/USB
+ # as dumped from hid-sony 'ds4_get_calibration_data'.
+ #
+ # Note we duplicate those values here in case the kernel changes them
+ # so we can have tests passing even if hid-tools doesn't have the
+ # correct values.
+ accelerometer_calibration_data = {
+ "x": {"bias": -73, "numer": 16384, "denom": 16472},
+ "y": {"bias": -352, "numer": 16384, "denom": 16344},
+ "z": {"bias": 81, "numer": 16384, "denom": 16319},
+ }
+ gyroscope_calibration_data = {
+ "x": {"bias": 0, "numer": 1105920, "denom": 17827},
+ "y": {"bias": 0, "numer": 1105920, "denom": 17777},
+ "z": {"bias": 0, "numer": 1105920, "denom": 17748},
+ }
+
+
+class CalibratedPS4ControllerBluetooth(CalibratedPS4Controller, PS4ControllerBluetooth):
+ pass
+
+
+class TestPS4ControllerBluetooth(SonyBaseTest.SonyPS4ControllerTest):
+ def create_device(self):
+ controller = CalibratedPS4ControllerBluetooth()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS4ControllerUSB(CalibratedPS4Controller, PS4ControllerUSB):
+ pass
+
+
+class TestPS4ControllerUSB(SonyBaseTest.SonyPS4ControllerTest):
+ def create_device(self):
+ controller = CalibratedPS4ControllerUSB()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS5Controller(object):
+ # DualSense reports uncalibrated sensor data. Calibration coefficients
+ # can be retrieved using feature report 0x09.
+ # The values below are the processed calibration values for the
+ # DualSene matching the feature reports of PS5ControllerBluetooth/USB
+ # as dumped from hid-playstation 'dualsense_get_calibration_data'.
+ #
+ # Note we duplicate those values here in case the kernel changes them
+ # so we can have tests passing even if hid-tools doesn't have the
+ # correct values.
+ accelerometer_calibration_data = {
+ "x": {"bias": 0, "numer": 16384, "denom": 16374},
+ "y": {"bias": -114, "numer": 16384, "denom": 16362},
+ "z": {"bias": 2, "numer": 16384, "denom": 16395},
+ }
+ gyroscope_calibration_data = {
+ "x": {"bias": 0, "numer": 1105920, "denom": 17727},
+ "y": {"bias": 0, "numer": 1105920, "denom": 17728},
+ "z": {"bias": 0, "numer": 1105920, "denom": 17769},
+ }
+
+
+class CalibratedPS5ControllerBluetooth(CalibratedPS5Controller, PS5ControllerBluetooth):
+ pass
+
+
+class TestPS5ControllerBluetooth(SonyBaseTest.SonyPS4ControllerTest):
+ kernel_modules = [PS5_MODULE]
+
+ def create_device(self):
+ controller = CalibratedPS5ControllerBluetooth()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS5ControllerUSB(CalibratedPS5Controller, PS5ControllerUSB):
+ pass
+
+
+class TestPS5ControllerUSB(SonyBaseTest.SonyPS4ControllerTest):
+ kernel_modules = [PS5_MODULE]
+
+ def create_device(self):
+ controller = CalibratedPS5ControllerUSB()
+ controller.application_matches = application_matches
+ return controller
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
new file mode 100644
index 000000000000..5b9abb616db4
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -0,0 +1,1577 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2021 Red Hat, Inc.
+#
+
+from . import base
+import copy
+from enum import Enum
+from hidtools.util import BusType
+from .base import HidBpf
+import libevdev
+import logging
+import pytest
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger("hidtools.test.tablet")
+
+
+class BtnTouch(Enum):
+ """Represents whether the BTN_TOUCH event is set to True or False"""
+
+ DOWN = True
+ UP = False
+
+
+class ToolType(Enum):
+ PEN = libevdev.EV_KEY.BTN_TOOL_PEN
+ RUBBER = libevdev.EV_KEY.BTN_TOOL_RUBBER
+
+
+class BtnPressed(Enum):
+ """Represents whether a button is pressed on the stylus"""
+
+ PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS
+ SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2
+ THIRD_PRESSED = libevdev.EV_KEY.BTN_STYLUS3
+
+
+class PenState(Enum):
+ """Pen states according to Microsoft reference:
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+
+ We extend it with the various buttons when we need to check them.
+ """
+
+ PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, False
+ PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, False
+ PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, True
+ PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, False
+ PEN_IS_IN_CONTACT_WITH_BUTTON = BtnTouch.DOWN, ToolType.PEN, True
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, False
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = BtnTouch.UP, ToolType.RUBBER, True
+ PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, False
+ PEN_IS_ERASING_WITH_BUTTON = BtnTouch.DOWN, ToolType.RUBBER, True
+
+ def __init__(
+ self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[bool]
+ ):
+ self.touch = touch # type: ignore
+ self.tool = tool # type: ignore
+ self.button = button # type: ignore
+
+ @classmethod
+ def from_evdev(cls, evdev, test_button) -> "PenState":
+ touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH])
+ tool = None
+ button = False
+ if (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ ):
+ tool = ToolType(libevdev.EV_KEY.BTN_TOOL_RUBBER)
+ elif (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ and not evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ ):
+ tool = ToolType(libevdev.EV_KEY.BTN_TOOL_PEN)
+ elif (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ or evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ ):
+ raise ValueError("2 tools are not allowed")
+
+ # we take only the provided button into account
+ if test_button is not None:
+ button = bool(evdev.value[test_button.value])
+
+ # the kernel tends to insert an EV_SYN once removing the tool, so
+ # the button will be released after
+ if tool is None:
+ button = False
+
+ return cls((touch, tool, button)) # type: ignore
+
+ def apply(
+ self, events: List[libevdev.InputEvent], strict: bool, test_button: BtnPressed
+ ) -> "PenState":
+ if libevdev.EV_SYN.SYN_REPORT in events:
+ raise ValueError("EV_SYN is in the event sequence")
+ touch = self.touch
+ touch_found = False
+ tool = self.tool
+ tool_found = False
+ button = self.button
+ button_found = False
+
+ for ev in events:
+ if ev == libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH):
+ if touch_found:
+ raise ValueError(f"duplicated BTN_TOUCH in {events}")
+ touch_found = True
+ touch = BtnTouch(ev.value)
+ elif ev in (
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_RUBBER),
+ ):
+ if tool_found:
+ raise ValueError(f"duplicated BTN_TOOL_* in {events}")
+ tool_found = True
+ tool = ToolType(ev.code) if ev.value else None
+ elif test_button is not None and ev in (test_button.value,):
+ if button_found:
+ raise ValueError(f"duplicated BTN_STYLUS* in {events}")
+ button_found = True
+ button = bool(ev.value)
+
+ # the kernel tends to insert an EV_SYN once removing the tool, so
+ # the button will be released after
+ if tool is None:
+ button = False
+
+ new_state = PenState((touch, tool, button)) # type: ignore
+ if strict:
+ assert (
+ new_state in self.valid_transitions()
+ ), f"moving from {self} to {new_state} is forbidden"
+ else:
+ assert (
+ new_state in self.historically_tolerated_transitions()
+ ), f"moving from {self} to {new_state} is forbidden"
+
+ return new_state
+
+ def valid_transitions(self) -> Tuple["PenState", ...]:
+ """Following the state machine in the URL above.
+
+ Note that those transitions are from the evdev point of view, not HID"""
+ if self == PenState.PEN_IS_OUT_OF_RANGE:
+ return (
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE:
+ return (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT:
+ return (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_ERASING:
+ return (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ )
+
+ return tuple()
+
+ def historically_tolerated_transitions(self) -> Tuple["PenState", ...]:
+ """Following the state machine in the URL above, with a couple of addition
+ for skipping the in-range state, due to historical reasons.
+
+ Note that those transitions are from the evdev point of view, not HID"""
+ if self == PenState.PEN_IS_OUT_OF_RANGE:
+ return (
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE:
+ return (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT:
+ return (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_ERASING:
+ return (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ return tuple()
+
+ @staticmethod
+ def legal_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the first half of the Windows Pen Implementation state machine:
+ we don't have Invert nor Erase bits, so just move in/out-of-range or proximity.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "in-range": (PenState.PEN_IS_IN_RANGE,),
+ "in-range -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "in-range -> touch": (PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_CONTACT),
+ "in-range -> touch -> release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> touch -> release -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def legal_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "hover-erasing": (PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,),
+ "hover-erasing -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "hover-erasing -> erase": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ ),
+ "hover-erasing -> erase -> release": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "hover-erasing -> erase -> release -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "hover-erasing -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> hover-erasing": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ }
+
+ @staticmethod
+ def legal_transitions_with_button() -> Dict[str, Tuple["PenState", ...]]:
+ """We revisit the Windows Pen Implementation state machine:
+ we now have a button.
+ """
+ return {
+ "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,),
+ "hover-button -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "in-range -> button-press": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ ),
+ "in-range -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> touch -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ ),
+ "in-range -> touch -> button-press -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> button-release -> release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """This is not adhering to the Windows Pen Implementation state machine
+ but we should expect the kernel to behave properly, mostly for historical
+ reasons."""
+ return {
+ "direct-in-contact": (PenState.PEN_IS_IN_CONTACT,),
+ "direct-in-contact -> out-of-range": (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def tolerated_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "direct-erase": (PenState.PEN_IS_ERASING,),
+ "direct-erase -> out-of-range": (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def broken_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """Those tests are definitely not part of the Windows specification.
+ However, a half broken device might export those transitions.
+ For example, a pen that has the eraser button might wobble between
+ touching and erasing if the tablet doesn't enforce the Windows
+ state machine."""
+ return {
+ "in-range -> touch -> erase -> hover-erase": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "in-range -> erase -> hover-erase": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "hover-erase -> erase -> touch -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "hover-erase -> touch -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "touch -> erase -> touch -> erase": (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ ),
+ }
+
+
+class Pen(object):
+ def __init__(self, x, y):
+ self.x = x
+ self.y = y
+ self.distance = -10
+ self.tipswitch = False
+ self.tippressure = 15
+ self.azimuth = 0
+ self.inrange = False
+ self.width = 10
+ self.height = 10
+ self.barrelswitch = False
+ self.secondarybarrelswitch = False
+ self.invert = False
+ self.eraser = False
+ self.xtilt = 1
+ self.ytilt = 1
+ self.twist = 1
+ self._old_values = None
+ self.current_state = None
+
+ def restore(self):
+ if self._old_values is not None:
+ for i in [
+ "x",
+ "y",
+ "distance",
+ "tippressure",
+ "azimuth",
+ "width",
+ "height",
+ "twist",
+ "xtilt",
+ "ytilt",
+ ]:
+ setattr(self, i, getattr(self._old_values, i))
+
+ def backup(self):
+ self._old_values = copy.copy(self)
+
+ def __assert_axis(self, evdev, axis, value):
+ if (
+ axis == libevdev.EV_KEY.BTN_TOOL_RUBBER
+ and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None
+ ):
+ return
+
+ assert (
+ evdev.value[axis] == value
+ ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}"
+
+ def assert_expected_input_events(self, evdev, button):
+ assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x
+ assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y
+
+ # assert no other buttons than the tested ones are set
+ buttons = [
+ BtnPressed.PRIMARY_PRESSED,
+ BtnPressed.SECONDARY_PRESSED,
+ BtnPressed.THIRD_PRESSED,
+ ]
+ if button is not None:
+ buttons.remove(button)
+ for b in buttons:
+ assert evdev.value[b.value] is None or evdev.value[b.value] == False
+
+ assert self.current_state == PenState.from_evdev(evdev, button)
+
+
+class PenDigitizer(base.UHIDTestDevice):
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 1, 2),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.physical = physical
+ self.cur_application = application
+ if evdev_name_suffix is not None:
+ self.name += evdev_name_suffix
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ physicals = [f.physical_name for f in r]
+ if self.physical not in physicals and None not in physicals:
+ continue
+ self.fields = [f.usage_name for f in r]
+
+ def move_to(self, pen, state, button):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.x = 0
+ pen.y = 0
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.distance = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = True
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+
+ pen.current_state = state
+
+ def event(self, pen, button):
+ rs = []
+ r = self.create_report(application=self.cur_application, data=pen)
+ self.call_input_event(r)
+ rs.append(r)
+ return rs
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ return (1, [])
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return 1
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return 1
+
+ return 1
+
+
+class BaseTest:
+ class TestTablet(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def post(self, uhdev, pen, test_button):
+ r = uhdev.event(pen, test_button)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ return events
+
+ def validate_transitions(
+ self, from_state, pen, evdev, events, allow_intermediate_states, button
+ ):
+ # check that the final state is correct
+ pen.assert_expected_input_events(evdev, button)
+
+ state = from_state
+
+ # check that the transitions are valid
+ sync_events = []
+ while libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) in events:
+ # split the first EV_SYN from the list
+ idx = events.index(libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT))
+ sync_events = events[:idx]
+ events = events[idx + 1 :]
+
+ # now check for a valid transition
+ state = state.apply(sync_events, not allow_intermediate_states, button)
+
+ if events:
+ state = state.apply(sync_events, not allow_intermediate_states, button)
+
+ def _test_states(
+ self, state_list, scribble, allow_intermediate_states, button=None
+ ):
+ """Internal method to test against a list of
+ transition between states.
+ state_list is a list of PenState objects
+ scribble is a boolean which tells if we need
+ to wobble a little the X,Y coordinates of the pen
+ between each state transition."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ cur_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ p = Pen(50, 60)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, button)
+ events = self.post(uhdev, p, button)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states, button
+ )
+
+ cur_state = p.current_state
+
+ for state in state_list:
+ if scribble and cur_state != PenState.PEN_IS_OUT_OF_RANGE:
+ p.x += 1
+ p.y -= 1
+ events = self.post(uhdev, p, button)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states, button
+ )
+ assert len(events) >= 3 # X, Y, SYN
+ uhdev.move_to(p, state, button)
+ if scribble and state != PenState.PEN_IS_OUT_OF_RANGE:
+ p.x += 1
+ p.y -= 1
+ events = self.post(uhdev, p, button)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states, button
+ )
+ cur_state = p.current_state
+
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()],
+ )
+ def test_valid_pen_states(self, state_list, scribble):
+ """This is the first half of the Windows Pen Implementation state machine:
+ we don't have Invert nor Erase bits, so just move in/out-of-range or proximity.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.tolerated_transitions().items()
+ ],
+ )
+ def test_tolerated_pen_states(self, state_list, scribble):
+ """This is not adhering to the Windows Pen Implementation state machine
+ but we should expect the kernel to behave properly, mostly for historical
+ reasons."""
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_button().items()
+ ],
+ )
+ def test_valid_primary_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the primary button."""
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.PRIMARY_PRESSED,
+ )
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Secondary Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_button().items()
+ ],
+ )
+ def test_valid_secondary_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the secondary button."""
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.SECONDARY_PRESSED,
+ )
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Third Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Third Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_button().items()
+ ],
+ )
+ def test_valid_third_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the secondary button."""
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.THIRD_PRESSED,
+ )
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_invert().items()
+ ],
+ )
+ def test_valid_invert_pen_states(self, state_list, scribble):
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.tolerated_transitions_with_invert().items()
+ ],
+ )
+ def test_tolerated_invert_pen_states(self, state_list, scribble):
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.broken_transitions().items()],
+ )
+ def test_tolerated_broken_pen_states(self, state_list, scribble):
+ """Those tests are definitely not part of the Windows specification.
+ However, a half broken device might export those transitions.
+ For example, a pen that has the eraser button might wobble between
+ touching and erasing if the tablet doesn't enforce the Windows
+ state machine."""
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Z" not in uhdev.fields,
+ "Device not compatible, missing Z usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()],
+ )
+ def test_z_reported_as_distance(self, state_list, scribble):
+ """Verify stylus Z values are reported as ABS_DISTANCE."""
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ p = Pen(0, 0)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, None)
+ p = Pen(100, 200)
+ uhdev.move_to(p, PenState.PEN_IS_IN_RANGE, None)
+ p.distance = -1
+ events = self.post(uhdev, p, None)
+ assert evdev.value[libevdev.EV_ABS.ABS_DISTANCE] == -1
+
+
+class GXTP_pen(PenDigitizer):
+ def event(self, pen, test_button):
+ if not hasattr(self, "prev_tip_state"):
+ self.prev_tip_state = False
+
+ internal_pen = copy.copy(pen)
+
+ # bug in the controller: when the pen touches the
+ # surface, in-range stays to 1, but when
+ # the pen moves in-range gets reverted to 0
+ if pen.tipswitch and self.prev_tip_state:
+ internal_pen.inrange = False
+
+ self.prev_tip_state = pen.tipswitch
+
+ # another bug in the controller: when the pen is
+ # inverted, invert is set to 1, but as soon as
+ # the pen touches the surface, eraser is correctly
+ # set to 1 but invert is released
+ if pen.eraser:
+ internal_pen.invert = False
+
+ return super().event(internal_pen, test_button)
+
+
+class USIPen(PenDigitizer):
+ pass
+
+
+class XPPen_ArtistPro16Gen2_28bd_095b(PenDigitizer):
+ """
+ Pen with two buttons and a rubber end, but which reports
+ the second button as an eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+
+ def move_to(self, pen, state, button):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.x = 0
+ pen.y = 0
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+
+ pen.current_state = state
+
+ def event(self, pen, test_button):
+ import math
+
+ pen_copy = copy.copy(pen)
+ width = 13.567
+ height = 8.480
+ tip_height = 0.055677699
+ hx = tip_height * (32767 / width)
+ hy = tip_height * (32767 / height)
+ if pen_copy.xtilt != 0:
+ pen_copy.x += round(hx * math.sin(math.radians(pen_copy.xtilt)))
+ if pen_copy.ytilt != 0:
+ pen_copy.y += round(hy * math.sin(math.radians(pen_copy.ytilt)))
+
+ return super().event(pen_copy, test_button)
+
+
+class XPPen_Artist24_28bd_093a(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+
+ pen.current_state = state
+
+ def send_intermediate_state(self, pen, state, button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, button, debug=False)
+ return super().event(intermediate_pen, button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # the pen reliably sends in-range events in a normal case (non emulation of eraser mode)
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ if button == BtnPressed.SECONDARY_PRESSED:
+ if self.previous_state == PenState.PEN_IS_IN_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE, button
+ )
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
+class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through secondary TipSwtich
+ and 3rd button through Invert
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Stylus",
+ physical=None,
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.fields.append("Third Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = True
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+
+ pen.current_state = state
+
+ def call_input_event(self, report):
+ if report[0] == 0x0A:
+ # ensures the original second Eraser usage is null
+ report[1] &= 0xDF
+
+ # ensures the original last bit is equal to bit 6 (In Range)
+ if report[1] & 0x40:
+ report[1] |= 0x80
+
+ super().call_input_event(report)
+
+ def send_intermediate_state(self, pen, state, test_button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, test_button, debug=False)
+ return super().event(intermediate_pen, test_button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # it's not possible to go between eraser mode or not without
+ # going out-of-prox: the eraser mode is activated by presenting
+ # the tail of the pen
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ # same than above except from eraser to normal
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ if self.previous_state == PenState.PEN_IS_OUT_OF_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
+class Wacom_2d1f_014b(PenDigitizer):
+ """
+ Pen that reports distance values with HID_GD_Z usage.
+ """
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+
+ def match_evdev_rule(self, application, evdev):
+ # there are 2 nodes created by the device, only one matters
+ return evdev.name.endswith("Stylus")
+
+ def event(self, pen, test_button):
+ # this device reports the distance through Z
+ pen.z = pen.distance
+
+ return super().event(pen, test_button)
+
+
+################################################################################
+#
+# Windows 7 compatible devices
+#
+################################################################################
+# class TestEgalax_capacitive_0eef_7224(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7224',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7224),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_72fa(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_72fa',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 72 22 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 87 13 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 72 22 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 87 13 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x72fa),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7336(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7336',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 c1 20 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c2 18 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 c1 20 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c2 18 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7336),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7337(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7337',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 ae 17 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c3 0e 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 ae 17 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c3 0e 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7337),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7349(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7349',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7349),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_73f4(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_73f4',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 96 4e 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 23 2c 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 96 4e 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 23 2c 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x73f4),
+# evdev_name_suffix=' Touchscreen')
+#
+# bogus: BTN_TOOL_PEN is not emitted
+# class TestIrtouch_6615_0070(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test irtouch_6615_0070',
+# rdesc='05 01 09 02 a1 01 85 10 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 30 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 05 0d 09 54 15 00 26 02 00 75 08 95 01 81 02 85 03 09 55 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 05 0d 09 02 a1 01 85 20 09 20 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 85 01 06 00 ff 09 01 75 08 95 01 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x6615, 0x0070))
+
+
+class TestNexio_1870_0100(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_0100",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 40 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0100),
+ )
+
+
+class TestNexio_1870_010d(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_010d",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x010D),
+ )
+
+
+class TestNexio_1870_0119(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_0119",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0119),
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices
+#
+################################################################################
+
+# bogus: application is 'undefined'
+# class Testatmel_03eb_8409(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test atmel_03eb_8409', rdesc='05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 00 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 46 18 06 26 77 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0')
+
+
+class Testatmel_03eb_840b(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test atmel_03eb_840b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 00 0a 26 ff 0f 09 30 81 02 46 a0 05 26 ff 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c01(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0c01",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c03(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0c03",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f00(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0f00",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f04(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0f04",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_1000(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_1000",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class TestGXTP_27c6_0113(BaseTest.TestTablet):
+ def create_device(self):
+ return GXTP_pen(
+ "uhid test GXTP_27c6_0113",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 01 75 08 09 51 95 01 81 02 05 01 26 00 14 75 10 55 0e 65 11 09 30 35 00 46 1f 07 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 01 75 08 09 51 95 01 81 02 05 01 26 00 14 75 10 55 0e 65 11 09 30 35 00 46 1f 07 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 08 09 20 a1 00 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 81 02 95 02 81 03 95 01 75 08 09 51 81 02 05 01 09 30 75 10 95 01 a4 55 0e 65 11 35 00 26 00 14 46 1f 07 81 42 09 31 26 80 0c 46 77 04 81 42 b4 05 0d 09 30 26 ff 0f 81 02 09 3d 65 14 55 0e 36 d8 dc 46 28 23 16 d8 dc 26 28 23 81 02 09 3e 81 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0 05 01 09 06 a1 01 85 04 05 07 09 e3 15 00 25 01 75 01 95 01 81 02 95 07 81 03 c0",
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices with USI Pen
+#
+################################################################################
+
+
+class TestElan_04f3_2A49(BaseTest.TestTablet):
+ def create_device(self):
+ return USIPen(
+ "uhid test Elan_04f3_2A49",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 54 25 7f 96 01 00 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 16 00 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 16 00 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 20 09 01 91 02 c0 06 00 ff 09 01 a1 01 85 06 09 03 75 08 95 12 91 02 09 04 75 08 95 03 b1 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0 05 0d 09 02 a1 01 85 07 35 00 09 20 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0f 65 11 46 26 01 26 1c 48 81 42 09 31 46 a6 00 26 bc 2f 81 42 b4 05 0d 09 30 26 00 10 81 02 75 08 95 01 09 3b 25 64 81 42 09 38 15 00 25 02 81 02 09 5c 26 ff 00 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 09 5b 25 ff 75 40 81 02 c0 06 00 ff 75 08 95 02 09 01 81 02 c0 05 0d 85 60 09 81 a1 02 09 38 75 08 95 01 15 00 25 02 81 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 61 09 5c a1 02 15 00 26 ff 00 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 62 09 5e a1 02 09 38 15 00 25 02 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 63 09 70 a1 02 75 08 95 01 15 00 25 02 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 64 09 80 15 00 25 ff 75 40 95 01 b1 02 85 65 09 44 a1 02 09 38 75 08 95 01 25 02 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 66 75 08 95 01 05 0d 09 90 a1 02 09 38 25 02 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 67 05 06 09 2b a1 02 05 0d 25 02 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 68 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 02 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 69 05 0d 09 38 75 08 95 01 15 00 25 02 b1 02 c0 06 00 ff 09 81 a1 01 85 17 75 08 95 1f 09 05 81 02 c0",
+ input_info=(BusType.I2C, 0x04F3, 0x2A49),
+ )
+
+
+class TestGoodix_27c6_0e00(BaseTest.TestTablet):
+ def create_device(self):
+ return USIPen(
+ "uhid test Elan_04f3_2A49",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 09 20 a1 00 85 08 05 01 a4 09 30 35 00 46 e6 09 15 00 26 04 20 55 0d 65 13 75 10 95 01 81 02 09 31 46 9a 06 26 60 15 81 02 b4 05 0d 09 38 95 01 75 08 15 00 25 01 81 02 09 30 75 10 26 ff 0f 81 02 09 31 81 02 09 42 09 44 09 5a 09 3c 09 45 09 32 75 01 95 06 25 01 81 02 95 02 81 03 09 3d 55 0e 65 14 36 d8 dc 46 28 23 16 d8 dc 26 28 23 95 01 75 10 81 02 09 3e 81 02 09 41 15 00 27 a0 8c 00 00 35 00 47 a0 8c 00 00 81 02 05 20 0a 53 04 65 00 16 01 f8 26 ff 07 75 10 95 01 81 02 0a 54 04 81 02 0a 55 04 81 02 0a 57 04 81 02 0a 58 04 81 02 0a 59 04 81 02 0a 72 04 81 02 0a 73 04 81 02 0a 74 04 81 02 05 0d 09 3b 15 00 25 64 75 08 81 02 09 5b 25 ff 75 40 81 02 06 00 ff 09 5b 75 20 81 02 05 0d 09 5c 26 ff 00 75 08 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 c0 06 00 ff 09 01 15 00 27 ff ff 00 00 75 10 95 01 81 02 85 09 09 81 a1 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 10 09 5c a1 02 15 00 25 01 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 11 09 5e a1 02 09 38 15 00 25 01 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 12 09 70 a1 02 75 08 95 01 15 00 25 01 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 13 09 80 15 00 25 ff 75 40 95 01 b1 02 85 14 09 44 a1 02 09 38 75 08 95 01 25 01 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 15 75 08 95 01 05 0d 09 90 a1 02 09 38 25 01 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 16 05 06 09 2b a1 02 05 0d 25 01 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 17 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 01 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 18 05 0d 09 38 75 08 95 01 15 00 25 01 b1 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0",
+ input_info=(BusType.I2C, 0x27C6, 0x0E00),
+ )
+
+
+class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
+ hid_bpfs = [HidBpf("XPPen__ArtistPro16Gen2.bpf.o", True)]
+
+ def create_device(self):
+ dev = XPPen_ArtistPro16Gen2_28bd_095b(
+ "uhid test XPPen Artist Pro 16 Gen2 28bd 095b",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 09 3c 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 15 00 25 01 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 ff 34 26 ff 7f 81 02 09 31 46 20 21 26 ff 7f 81 02 b4 09 30 45 00 26 ff 3f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ )
+ return dev
+
+
+class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
+ hid_bpfs = [HidBpf("XPPen__Artist24.bpf.o", True)]
+
+ def create_device(self):
+ return XPPen_Artist24_28bd_093a(
+ "uhid test XPPen Artist 24 28bd 093a",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 15 00 25 01 75 01 95 03 81 02 95 02 81 03 09 32 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 f0 50 26 ff 7f 81 02 09 31 46 91 2d 26 ff 7f 81 02 b4 09 30 45 00 26 ff 1f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ )
+
+
+class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
+ hid_bpfs = [HidBpf("Huion__Kamvas-Pro-19.bpf.o", True)]
+
+ def create_device(self):
+ return Huion_Kamvas_Pro_19_256c_006b(
+ "uhid test HUION Huion Tablet_GT1902",
+ rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ )
+
+
+################################################################################
+#
+# Devices Reporting Distance
+#
+################################################################################
+
+
+class TestWacom_2d1f_014b(BaseTest.TestTablet):
+ def create_device(self):
+ return Wacom_2d1f_014b(
+ "uhid test Wacom 2d1f_014b",
+ rdesc="05 0d 09 02 a1 01 85 02 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 95 02 81 03 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 45 00 65 00 55 00 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 09 00 75 08 26 ff 00 b1 12 85 03 09 00 95 12 b1 12 85 05 09 00 95 04 b1 02 85 06 09 00 95 24 b1 02 85 16 09 00 15 00 26 ff 00 95 06 b1 02 85 17 09 00 95 0c b1 02 85 19 09 00 95 01 b1 02 85 0a 09 00 75 08 95 01 15 10 26 ff 00 b1 02 85 1e 09 00 95 10 b1 02 c0 06 00 ff 09 00 a1 01 85 09 05 0d 09 20 a1 00 09 00 15 00 26 ff 00 75 08 95 10 81 02 c0 09 00 95 03 b1 12 c0 06 00 ff 09 02 a1 01 85 07 09 00 96 09 01 b1 02 85 08 09 00 95 03 81 02 09 00 b1 02 85 0e 09 00 96 0a 01 b1 02 c0 05 0d 09 02 a1 01 85 1a 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 09 38 25 03 75 02 95 01 81 02 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 46 b0 0f 66 11 e1 55 02 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 c0 06 00 ff 09 00 a1 01 85 1b 05 0d 09 20 a1 00 09 00 26 ff 00 75 08 95 10 81 02 c0 c0",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_usb_crash.py b/tools/testing/selftests/hid/tests/test_usb_crash.py
new file mode 100644
index 000000000000..e98bff9197c7
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_usb_crash.py
@@ -0,0 +1,103 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2021 Red Hat, Inc.
+#
+
+# This is to ensure we don't crash when emulating USB devices
+
+from . import base
+import pytest
+import logging
+
+logger = logging.getLogger("hidtools.test.usb")
+
+
+class USBDev(base.UHIDTestDevice):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # .Usage (Mouse) 2
+ 0xa1, 0x01, # .Collection (Application) 4
+ 0x09, 0x02, # ..Usage (Mouse) 6
+ 0xa1, 0x02, # ..Collection (Logical) 8
+ 0x09, 0x01, # ...Usage (Pointer) 10
+ 0xa1, 0x00, # ...Collection (Physical) 12
+ 0x05, 0x09, # ....Usage Page (Button) 14
+ 0x19, 0x01, # ....Usage Minimum (1) 16
+ 0x29, 0x03, # ....Usage Maximum (3) 18
+ 0x15, 0x00, # ....Logical Minimum (0) 20
+ 0x25, 0x01, # ....Logical Maximum (1) 22
+ 0x75, 0x01, # ....Report Size (1) 24
+ 0x95, 0x03, # ....Report Count (3) 26
+ 0x81, 0x02, # ....Input (Data,Var,Abs) 28
+ 0x75, 0x05, # ....Report Size (5) 30
+ 0x95, 0x01, # ....Report Count (1) 32
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs) 34
+ 0x05, 0x01, # ....Usage Page (Generic Desktop) 36
+ 0x09, 0x30, # ....Usage (X) 38
+ 0x09, 0x31, # ....Usage (Y) 40
+ 0x15, 0x81, # ....Logical Minimum (-127) 42
+ 0x25, 0x7f, # ....Logical Maximum (127) 44
+ 0x75, 0x08, # ....Report Size (8) 46
+ 0x95, 0x02, # ....Report Count (2) 48
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 50
+ 0xc0, # ...End Collection 52
+ 0xc0, # ..End Collection 53
+ 0xc0, # .End Collection 54
+ ]
+ # fmt: on
+
+ def __init__(self, name=None, input_info=None):
+ super().__init__(
+ name, "Mouse", input_info=input_info, rdesc=USBDev.report_descriptor
+ )
+
+ # skip witing for udev events, it's likely that the report
+ # descriptor is wrong
+ def is_ready(self):
+ return True
+
+ # we don't have an evdev node here, so paper over
+ # the checks
+ def get_evdev(self, application=None):
+ return "OK"
+
+
+class TestUSBDevice(base.BaseTestCase.TestUhid):
+ """
+ Test class to test if an emulated USB device crashes
+ the kernel.
+ """
+
+ # conftest.py is generating the following fixture:
+ #
+ # @pytest.fixture(params=[('modulename', 1, 2)])
+ # def usbVidPid(self, request):
+ # return request.param
+
+ @pytest.fixture()
+ def new_uhdev(self, usbVidPid, request):
+ self.module, self.vid, self.pid = usbVidPid
+ self._load_kernel_module(None, self.module)
+ return USBDev(input_info=(3, self.vid, self.pid))
+
+ def test_creation(self):
+ """
+ inject the USB dev through uhid and immediately see if there is a crash:
+
+ uhid can create a USB device with the BUS_USB bus, and some
+ drivers assume that they can then access USB related structures
+ when they are actually provided a uhid device. This leads to
+ a crash because those access result in a segmentation fault.
+
+ The kernel should not crash on any (random) user space correct
+ use of its API. So run through all available modules and declared
+ devices to see if we can generate a uhid device without a crash.
+
+ The test is empty as the fixture `check_taint` is doing the job (and
+ honestly, when the kernel crashes, the whole machine freezes).
+ """
+ assert True
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
new file mode 100644
index 000000000000..2d6d04f0ff80
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -0,0 +1,1407 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+# Copyright (c) 2020 Wacom Technology Corp.
+#
+# Authors:
+# Jason Gerecke <jason.gerecke@wacom.com>
+
+"""
+Tests for the Wacom driver generic codepath.
+
+This module tests the function of the Wacom driver's generic codepath.
+The generic codepath is used by devices which are not explicitly listed
+in the driver's device table. It uses the device's HID descriptor to
+decode reports sent by the device.
+"""
+
+from .descriptors_wacom import (
+ wacom_pth660_v145,
+ wacom_pth660_v150,
+ wacom_pth860_v145,
+ wacom_pth860_v150,
+ wacom_pth460_v105,
+)
+
+import attr
+from collections import namedtuple
+from enum import Enum
+from hidtools.hut import HUT
+from hidtools.hid import HidUnit
+from . import base
+from . import test_multitouch
+import libevdev
+import pytest
+
+import logging
+
+logger = logging.getLogger("hidtools.test.wacom")
+
+KERNEL_MODULE = base.KernelModule("wacom", "wacom")
+
+
+class ProximityState(Enum):
+ """
+ Enumeration of allowed proximity states.
+ """
+
+ # Tool is not able to be sensed by the device
+ OUT = 0
+
+ # Tool is close enough to be sensed, but some data may be invalid
+ # or inaccurate
+ IN_PROXIMITY = 1
+
+ # Tool is close enough to be sensed with high accuracy. All data
+ # valid.
+ IN_RANGE = 2
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.inrange = self in [ProximityState.IN_RANGE]
+ reportdata.wacomsense = self in [
+ ProximityState.IN_PROXIMITY,
+ ProximityState.IN_RANGE,
+ ]
+
+
+class ReportData:
+ """
+ Placeholder for HID report values.
+ """
+
+ pass
+
+
+@attr.s
+class Buttons:
+ """
+ Stylus button state.
+
+ Describes the state of each of the buttons / "side switches" that
+ may be present on a stylus. Buttons set to 'None' indicate the
+ state is "unchanged" since the previous event.
+ """
+
+ primary = attr.ib(default=None)
+ secondary = attr.ib(default=None)
+ tertiary = attr.ib(default=None)
+
+ @staticmethod
+ def clear():
+ """Button object with all states cleared."""
+ return Buttons(False, False, False)
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.barrelswitch = int(self.primary or 0)
+ reportdata.secondarybarrelswitch = int(self.secondary or 0)
+ reportdata.b3 = int(self.tertiary or 0)
+
+
+@attr.s
+class ToolID:
+ """
+ Stylus tool identifiers.
+
+ Contains values used to identify a specific stylus, e.g. its serial
+ number and tool-type identifier. Values of ``0`` may sometimes be
+ used for the out-of-range condition.
+ """
+
+ serial = attr.ib()
+ tooltype = attr.ib()
+
+ @staticmethod
+ def clear():
+ """ToolID object with all fields cleared."""
+ return ToolID(0, 0)
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.transducerserialnumber = self.serial & 0xFFFFFFFF
+ reportdata.serialhi = (self.serial >> 32) & 0xFFFFFFFF
+ reportdata.tooltype = self.tooltype
+
+
+@attr.s
+class PhysRange:
+ """
+ Range of HID physical values, with units.
+ """
+
+ unit = attr.ib()
+ min_size = attr.ib()
+ max_size = attr.ib()
+
+ CENTIMETER = HidUnit.from_string("SILinear: cm")
+ DEGREE = HidUnit.from_string("EnglishRotation: deg")
+
+ def contains(self, field):
+ """
+ Check if the physical size of the provided field is in range.
+
+ Compare the physical size described by the provided HID field
+ against the range of sizes described by this object. This is
+ an exclusive range comparison (e.g. 0 cm is not within the
+ range 0 cm - 5 cm) and exact unit comparison (e.g. 1 inch is
+ not within the range 0 cm - 5 cm).
+ """
+ phys_size = (field.physical_max - field.physical_min) * 10 ** (field.unit_exp)
+ return (
+ field.unit == self.unit.value
+ and phys_size > self.min_size
+ and phys_size < self.max_size
+ )
+
+
+class BaseTablet(base.UHIDTestDevice):
+ """
+ Skeleton object for all kinds of tablet devices.
+ """
+
+ def __init__(self, rdesc, name=None, info=None):
+ assert rdesc is not None
+ super().__init__(name, "Pen", input_info=info, rdesc=rdesc)
+ self.buttons = Buttons.clear()
+ self.toolid = ToolID.clear()
+ self.proximity = ProximityState.OUT
+ self.offset = 0
+ self.ring = -1
+ self.ek0 = False
+
+ def match_evdev_rule(self, application, evdev):
+ """
+ Filter out evdev nodes based on the requested application.
+
+ The Wacom driver may create several device nodes for each USB
+ interface device. It is crucial that we run tests with the
+ expected device node or things will obviously go off the rails.
+ Use the Wacom driver's usual naming conventions to apply a
+ sensible default filter.
+ """
+ if application in ["Pen", "Pad"]:
+ return evdev.name.endswith(application)
+ else:
+ return True
+
+ def create_report(
+ self, x, y, pressure, buttons=None, toolid=None, proximity=None, reportID=None
+ ):
+ """
+ Return an input report for this device.
+
+ :param x: absolute x
+ :param y: absolute y
+ :param pressure: pressure
+ :param buttons: stylus button state. Use ``None`` for unchanged.
+ :param toolid: tool identifiers. Use ``None`` for unchanged.
+ :param proximity: a ProximityState indicating the sensor's ability
+ to detect and report attributes of this tool. Use ``None``
+ for unchanged.
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ if buttons is not None:
+ self.buttons = buttons
+ buttons = self.buttons
+
+ if toolid is not None:
+ self.toolid = toolid
+ toolid = self.toolid
+
+ if proximity is not None:
+ self.proximity = proximity
+ proximity = self.proximity
+
+ reportID = reportID or self.default_reportID
+
+ report = ReportData()
+ report.x = x
+ report.y = y
+ report.tippressure = pressure
+ report.tipswitch = pressure > 0
+ buttons.fill(report)
+ proximity.fill(report)
+ toolid.fill(report)
+
+ return super().create_report(report, reportID=reportID)
+
+ def create_report_heartbeat(self, reportID):
+ """
+ Return a heartbeat input report for this device.
+
+ Heartbeat reports generally contain battery status information,
+ among other things.
+ """
+ report = ReportData()
+ report.wacombatterycharging = 1
+ return super().create_report(report, reportID=reportID)
+
+ def create_report_pad(self, reportID, ring, ek0):
+ report = ReportData()
+
+ if ring is not None:
+ self.ring = ring
+ ring = self.ring
+
+ if ek0 is not None:
+ self.ek0 = ek0
+ ek0 = self.ek0
+
+ if ring >= 0:
+ report.wacomtouchring = ring
+ report.wacomtouchringstatus = 1
+ else:
+ report.wacomtouchring = 0x7F
+ report.wacomtouchringstatus = 0
+
+ report.wacomexpresskey00 = ek0
+ return super().create_report(report, reportID=reportID)
+
+ def event(self, x, y, pressure, buttons=None, toolid=None, proximity=None):
+ """
+ Send an input event on the default report ID.
+
+ :param x: absolute x
+ :param y: absolute y
+ :param buttons: stylus button state. Use ``None`` for unchanged.
+ :param toolid: tool identifiers. Use ``None`` for unchanged.
+ :param proximity: a ProximityState indicating the sensor's ability
+ to detect and report attributes of this tool. Use ``None``
+ for unchanged.
+ """
+ r = self.create_report(x, y, pressure, buttons, toolid, proximity)
+ self.call_input_event(r)
+ return [r]
+
+ def event_heartbeat(self, reportID):
+ """
+ Send a heartbeat event on the requested report ID.
+ """
+ r = self.create_report_heartbeat(reportID)
+ self.call_input_event(r)
+ return [r]
+
+ def event_pad(self, reportID, ring=None, ek0=None):
+ """
+ Send a pad event on the requested report ID.
+ """
+ r = self.create_report_pad(reportID, ring, ek0)
+ self.call_input_event(r)
+ return [r]
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ result = (1, [])
+ result = self.create_report_offset(rdesc) or result
+ return result
+
+ def create_report_offset(self, rdesc):
+ require = [
+ "Wacom Offset Left",
+ "Wacom Offset Top",
+ "Wacom Offset Right",
+ "Wacom Offset Bottom",
+ ]
+ if not set(require).issubset(set([f.usage_name for f in rdesc])):
+ return None
+
+ report = ReportData()
+ report.wacomoffsetleft = self.offset
+ report.wacomoffsettop = self.offset
+ report.wacomoffsetright = self.offset
+ report.wacomoffsetbottom = self.offset
+ r = rdesc.create_report([report], None)
+ return (0, r)
+
+
+class OpaqueTablet(BaseTablet):
+ """
+ Bare-bones opaque tablet with a minimum of features.
+
+ A tablet stripped down to its absolute core. It is capable of
+ reporting X/Y position and if the pen is in contact. No pressure,
+ no barrel switches, no eraser. Notably it *does* report an "In
+ Range" flag, but this is only because the Wacom driver expects
+ one to function properly. The device uses only standard HID usages,
+ not any of Wacom's vendor-defined pages.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x0D, # . Usage Page (Digitizer),
+ 0x09, 0x01, # . Usage (Digitizer),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x20, # . Usage (Stylus),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (Tip Switch),
+ 0x09, 0x32, # . Usage (In Range),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x27, 0x80, 0x3E, 0x00, 0x00, # . Logical Maximum (16000),
+ 0x47, 0x80, 0x3E, 0x00, 0x00, # . Physical Maximum (16000),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x31, # . Usage (Y),
+ 0x27, 0x28, 0x23, 0x00, 0x00, # . Logical Maximum (9000),
+ 0x47, 0x28, 0x23, 0x00, 0x00, # . Physical Maximum (9000),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, info=(0x3, 0x056A, 0x9999)):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 1
+
+
+class OpaqueCTLTablet(BaseTablet):
+ """
+ Opaque tablet similar to something in the CTL product line.
+
+ A pen-only tablet with most basic features you would expect from
+ an actual device. Position, eraser, pressure, barrel buttons.
+ Uses the Wacom vendor-defined usage page.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x06, 0x0D, 0xFF, # . Usage Page (Vnd Wacom Emr),
+ 0x09, 0x01, # . Usage (Digitizer),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (Stylus),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (Tip Switch),
+ 0x09, 0x44, # . Usage (Barrel Switch),
+ 0x09, 0x5A, # . Usage (Secondary Barrel Switch),
+ 0x09, 0x45, # . Usage (Eraser),
+ 0x09, 0x3C, # . Usage (Invert),
+ 0x09, 0x32, # . Usage (In Range),
+ 0x09, 0x36, # . Usage (In Proximity),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (X),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x47, 0x80, 0x3E, 0x00, 0x00, # . Physical Maximum (16000),
+ 0x27, 0x80, 0x3E, 0x00, 0x00, # . Logical Maximum (16000),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (Y),
+ 0x47, 0x28, 0x23, 0x00, 0x00, # . Physical Maximum (9000),
+ 0x27, 0x28, 0x23, 0x00, 0x00, # . Logical Maximum (9000),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (Tip Pressure),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x47, 0x00, 0x00, 0x00, 0x00, # . Physical Maximum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x32, 0x01, # . Usage (Z),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (Transducer Serial Number),
+ 0x09, 0x5C, # . Usage (Transducer Serial Number Hi),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (Tool Type),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0xC0 # . End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, info=(0x3, 0x056A, 0x9999)):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 16
+
+
+class PTHX60_Pen(BaseTablet):
+ """
+ Pen interface of a PTH-660 / PTH-860 / PTH-460 tablet.
+
+ This generation of devices are nearly identical to each other, though
+ the PTH-460 uses a slightly different descriptor construction (splits
+ the pad among several physical collections)
+ """
+
+ def __init__(self, rdesc=None, name=None, info=None):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 16
+
+
+class BaseTest:
+ class TestTablet(base.BaseTestCase.TestUhid):
+ kernel_modules = [KERNEL_MODULE]
+
+ def sync_and_assert_events(
+ self, report, expected_events, auto_syn=True, strict=False
+ ):
+ """
+ Assert we see the expected events in response to a report.
+ """
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+ if auto_syn:
+ expected_events.append(syn_event)
+ actual_events = uhdev.next_sync_events()
+ self.debug_reports(report, uhdev, actual_events)
+ if strict:
+ self.assertInputEvents(expected_events, actual_events)
+ else:
+ self.assertInputEventsIn(expected_events, actual_events)
+
+ def get_usages(self, uhdev):
+ def get_report_usages(report):
+ application = report.application
+ for field in report.fields:
+ if field.usages is not None:
+ for usage in field.usages:
+ yield (field, usage, application)
+ else:
+ yield (field, field.usage, application)
+
+ desc = uhdev.parsed_rdesc
+ reports = [
+ *desc.input_reports.values(),
+ *desc.feature_reports.values(),
+ *desc.output_reports.values(),
+ ]
+ for report in reports:
+ for usage in get_report_usages(report):
+ yield usage
+
+ def assertName(self, uhdev, type):
+ """
+ Assert that the name is as we expect.
+
+ The Wacom driver applies a number of decorations to the name
+ provided by the hardware. We cannot rely on the definition of
+ this assertion from the base class to work properly.
+ """
+ evdev = uhdev.get_evdev()
+ expected_name = uhdev.name + type
+ if "wacom" not in expected_name.lower():
+ expected_name = "Wacom " + expected_name
+ assert evdev.name == expected_name
+
+ def test_descriptor_physicals(self):
+ """
+ Verify that all HID usages which should have a physical range
+ actually do, and those which shouldn't don't. Also verify that
+ the associated unit is correct and within a sensible range.
+ """
+
+ def usage_id(page_name, usage_name):
+ page = HUT.usage_page_from_name(page_name)
+ return (page.page_id << 16) | page[usage_name].usage
+
+ required = {
+ usage_id("Generic Desktop", "X"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Generic Desktop", "Y"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "Width"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "Height"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Digitizers", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Digitizers", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360),
+ usage_id("Wacom", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Wacom", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Wacom", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360),
+ usage_id("Wacom", "X"): PhysRange(PhysRange.CENTIMETER, 5, 150),
+ usage_id("Wacom", "Y"): PhysRange(PhysRange.CENTIMETER, 5, 150),
+ usage_id("Wacom", "Wacom TouchRing"): PhysRange(
+ PhysRange.DEGREE, 358, 360
+ ),
+ usage_id("Wacom", "Wacom Offset Left"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Top"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Right"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Bottom"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ }
+ for field, usage, application in self.get_usages(self.uhdev):
+ if application == usage_id("Generic Desktop", "Mouse"):
+ # Ignore the vestigial Mouse collection which exists
+ # on Wacom tablets only for backwards compatibility.
+ continue
+
+ expect_physical = usage in required
+
+ phys_set = field.physical_min != 0 or field.physical_max != 0
+ assert phys_set == expect_physical
+
+ unit_set = field.unit != 0
+ assert unit_set == expect_physical
+
+ if unit_set:
+ assert required[usage].contains(field)
+
+ def test_prop_direct(self):
+ """
+ Todo: Verify that INPUT_PROP_DIRECT is set on display devices.
+ """
+ pass
+
+ def test_prop_pointer(self):
+ """
+ Todo: Verify that INPUT_PROP_POINTER is set on opaque devices.
+ """
+ pass
+
+
+class PenTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Pen")
+
+
+class TouchTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Finger")
+
+
+class TestOpaqueTablet(PenTabletTest):
+ def create_device(self):
+ return OpaqueTablet()
+
+ def test_sanity(self):
+ """
+ Bring a pen into contact with the tablet, then remove it.
+
+ Ensure that we get the basic tool/touch/motion events that should
+ be sent by the driver.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=300,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=1),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(110, 220, pressure=0),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 110),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 220),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 120,
+ 230,
+ pressure=0,
+ toolid=ToolID.clear(),
+ proximity=ProximityState.OUT,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(130, 240, pressure=0), [], auto_syn=False, strict=True
+ )
+
+
+class TestOpaqueCTLTablet(TestOpaqueTablet):
+ def create_device(self):
+ return OpaqueCTLTablet()
+
+ def test_buttons(self):
+ """
+ Test that the barrel buttons (side switches) work as expected.
+
+ Press and release each button individually to verify that we get
+ the expected events.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=0,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=1),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(primary=True)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(primary=False)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(secondary=True)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2, 1),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(secondary=False)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2, 0),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+
+PTHX60_Devices = [
+ {"rdesc": wacom_pth660_v145, "info": (0x3, 0x056A, 0x0357)},
+ {"rdesc": wacom_pth660_v150, "info": (0x3, 0x056A, 0x0357)},
+ {"rdesc": wacom_pth860_v145, "info": (0x3, 0x056A, 0x0358)},
+ {"rdesc": wacom_pth860_v150, "info": (0x3, 0x056A, 0x0358)},
+ {"rdesc": wacom_pth460_v105, "info": (0x3, 0x056A, 0x0392)},
+]
+
+PTHX60_Names = [
+ "PTH-660/v145",
+ "PTH-660/v150",
+ "PTH-860/v145",
+ "PTH-860/v150",
+ "PTH-460/v105",
+]
+
+
+class TestPTHX60_Pen(TestOpaqueCTLTablet):
+ @pytest.fixture(
+ autouse=True, scope="class", params=PTHX60_Devices, ids=PTHX60_Names
+ )
+ def set_device_params(self, request):
+ request.cls.device_params = request.param
+
+ def create_device(self):
+ return PTHX60_Pen(**self.device_params)
+
+ @pytest.mark.xfail
+ def test_descriptor_physicals(self):
+ # XFAIL: Various documented errata
+ super().test_descriptor_physicals()
+
+ def test_heartbeat_spurious(self):
+ """
+ Test that the heartbeat report does not send spurious events.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=300,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=0x822),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1),
+ ],
+ )
+
+ # Exactly zero events: not even a SYN
+ self.sync_and_assert_events(
+ uhdev.event_heartbeat(19), [], auto_syn=False, strict=True
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(110, 200, pressure=300),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 110),
+ ],
+ )
+
+ def test_empty_pad_sync(self):
+ self.empty_pad_sync(num=3, denom=16, reverse=True)
+
+ def empty_pad_sync(self, num, denom, reverse):
+ """
+ Test that multiple pad collections do not trigger empty syncs.
+ """
+
+ def offset_rotation(value):
+ """
+ Offset touchring rotation values by the same factor as the
+ Linux kernel. Tablets historically don't use the same origin
+ as HID, and it sometimes changes from tablet to tablet...
+ """
+ evdev = self.uhdev.get_evdev()
+ info = evdev.absinfo[libevdev.EV_ABS.ABS_WHEEL]
+ delta = info.maximum - info.minimum + 1
+ if reverse:
+ value = info.maximum - value
+ value += num * delta // denom
+ if value > info.maximum:
+ value -= delta
+ elif value < info.minimum:
+ value += delta
+ return value
+
+ uhdev = self.uhdev
+ uhdev.application = "Pad"
+ evdev = uhdev.get_evdev()
+
+ print(evdev.name)
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=0, ek0=1),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(0)),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MISC, 15),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=1, ek0=1),
+ [libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(1))],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=2, ek0=0),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(2)),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 0),
+ ],
+ )
+
+
+class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest):
+ ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num")
+
+ def create_device(self):
+ return test_multitouch.Digitizer(
+ "DTH 2452",
+ rdesc="05 0d 09 04 a1 01 85 0c 95 01 75 08 15 00 26 ff 00 81 03 09 54 81 02 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 75 08 95 0e 81 03 09 55 26 ff 00 75 08 b1 02 85 0a 06 00 ff 09 c5 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 13 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ input_info=(0x3, 0x056A, 0x0383),
+ )
+
+ def make_contact(self, contact_id=0, t=0):
+ """
+ Make a single touch contact that can move over time.
+
+ Creates a touch object that has a well-known position in space that
+ does not overlap with other contacts. The value of `t` may be
+ incremented over time to move the point along a linear path.
+ """
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
+ return test_multitouch.Touch(contact_id, x, y)
+
+ def make_contacts(self, n, t=0):
+ """
+ Make multiple touch contacts that can move over time.
+
+ Returns a list of `n` touch objects that are positioned at well-known
+ locations. The value of `t` may be incremented over time to move the
+ points along a linear path.
+ """
+ return [self.make_contact(id, t) for id in range(0, n)]
+
+ def assert_contact(self, evdev, contact_ids, t=0):
+ """
+ Assert properties of a contact generated by make_contact.
+ """
+ contact_id = contact_ids.contact_id
+ tracking_id = contact_ids.tracking_id
+ slot_num = contact_ids.slot_num
+
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
+
+ # If the data isn't supposed to be stored in any slots, there is
+ # nothing we can check for in the evdev stream.
+ if slot_num is None:
+ assert tracking_id == -1
+ return
+
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == tracking_id
+ if tracking_id != -1:
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y
+
+ def assert_contacts(self, evdev, data, t=0):
+ """
+ Assert properties of a list of contacts generated by make_contacts.
+ """
+ for contact_ids in data:
+ self.assert_contact(evdev, contact_ids, t)
+
+ def test_contact_id_0(self):
+ """
+ Bring a finger in contact with the tablet, then hold it down and remove it.
+
+ Ensure that even with contact ID = 0 which is usually given as an invalid
+ touch event by most tablets with the exception of a few, that given the
+ confidence bit is set to 1 it should process it as a valid touch to cover
+ the few tablets using contact ID = 0 as a valid touch value.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(0, 50, 100)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_confidence_false(self):
+ """
+ Bring a finger in contact with the tablet with confidence set to false.
+
+ Ensure that the confidence bit being set to false should not result in a touch event.
+ """
+ uhdev = self.uhdev
+ _evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(1, 50, 100)
+ t0.confidence = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ _slot = self.get_slot(uhdev, t0, 0)
+
+ assert not events
+
+ def test_confidence_multitouch(self):
+ """
+ Bring multiple fingers in contact with the tablet, some with the
+ confidence bit set, and some without.
+
+ Ensure that all confident touches are reported and that all non-
+ confident touches are ignored.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = self.make_contacts(5)
+ touches[0].confidence = False
+ touches[2].confidence = False
+ touches[4].confidence = False
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+
+ self.assert_contacts(
+ evdev,
+ [
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ self.ContactIds(contact_id=2, tracking_id=-1, slot_num=None),
+ self.ContactIds(contact_id=3, tracking_id=1, slot_num=1),
+ self.ContactIds(contact_id=4, tracking_id=-1, slot_num=None),
+ ],
+ )
+
+ def confidence_change_assert_playback(self, uhdev, evdev, timeline):
+ """
+ Assert proper behavior of contacts that move and change tipswitch /
+ confidence status over time.
+
+ Given a `timeline` list of touch states to iterate over, verify
+ that the contacts move and are reported as up/down as expected
+ by the state of the tipswitch and confidence bits.
+ """
+ t = 0
+
+ for state in timeline:
+ touches = self.make_contacts(len(state), t)
+
+ for item in zip(touches, state):
+ item[0].tipswitch = item[1][1]
+ item[0].confidence = item[1][2]
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ ids = [x[0] for x in state]
+ self.assert_contacts(evdev, ids, t)
+
+ t += 1
+
+ def test_confidence_loss_a(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ first clearing the tipswitch.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally. This mode of confidence loss is used by the
+ DTH-2452.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the tipswitch flag
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
+
+ def test_confidence_loss_b(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ cleraing both tipswitch and confidence bits simultaneously.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally. This mode of confidence loss is used by some
+ AES devices.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and has both flags cleared simultaneously
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
+
+ def test_confidence_loss_c(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ clearing only the confidence bit.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the confidence flag
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
+
+ def test_confidence_gain_a(self):
+ """
+ Transition a contact that was always non-confident to confident.
+
+ Ensure that the confident contact is reported normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Only second finger is confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
+
+ def test_confidence_gain_b(self):
+ """
+ Transition a contact from non-confident to confident.
+
+ Ensure that the confident contact is reported normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First and second finger confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Firtst finger looses confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=2, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger goes up
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh
new file mode 100755
index 000000000000..ecbd57f775a0
--- /dev/null
+++ b/tools/testing/selftests/hid/vmtest.sh
@@ -0,0 +1,474 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Red Hat
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly HID_BPF_TEST="${SCRIPT_DIR}"/hid_bpf
+readonly HIDRAW_TEST="${SCRIPT_DIR}"/hidraw
+readonly HID_BPF_PROGS="${KERNEL_CHECKOUT}/drivers/hid/bpf/progs"
+readonly SSH_GUEST_PORT=22
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_hid_vmtest_XXXX.pid)
+
+readonly QEMU_OPTS="\
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE=""
+readonly LOG=$(mktemp /tmp/hid_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_hid_bpf vm_hidraw vm_pytest)
+readonly TEST_DESCS=(
+ "Run hid_bpf tests in the VM."
+ "Run hidraw tests in the VM."
+ "Run the hid-tools test-suite in the VM."
+)
+
+VERBOSE=0
+SHELL_MODE=0
+BUILD_HOST=""
+BUILD_HOST_PODMAN_CONTAINER_NAME=""
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]... [-- tests-args]"
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -H: hostname for remote build host (used with -b)"
+ echo " -p: podman container name for remote build host (used with -b)"
+ echo " Example: -H beefyserver -p vng"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -s: start a shell in the VM instead of running tests"
+ echo " -v: more verbose output (can be repeated multiple times)"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
+
+ exit 1
+}
+
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
+
+vm_ssh() {
+ # vng --ssh-client keeps shouting "Warning: Permanently added 'virtme-ng%22'
+ # (ED25519) to the list of known hosts.",
+ # So replace the command with what's actually called and add the "-q" option
+ stdbuf -oL ssh -q \
+ -F ${HOME}/.cache/virtme-ng/.ssh/virtme-ng-ssh.conf \
+ -l root virtme-ng%${SSH_GUEST_PORT} \
+ "$@"
+ return $?
+}
+
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
+
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
+
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh pytest; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${HID_BPF_TEST}") ]]; then
+ printf "skip: %s not found!" "${HID_BPF_TEST}"
+ printf " Please build the kselftest hid_bpf target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+
+ if [[ ! -x $(command -v "${HIDRAW_TEST}") ]]; then
+ printf "skip: %s not found!" "${HIDRAW_TEST}"
+ printf " Please build the kselftest hidraw target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+}
+
+check_vng() {
+ local tested_versions
+ local version
+ local ok
+
+ tested_versions=("1.36" "1.37")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
+}
+
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
+ exit 1
+ fi
+
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
+
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ local vng_args=("-v" "--config" "${SCRIPT_DIR}/config" "--build")
+
+ if [[ -n "${BUILD_HOST}" ]]; then
+ vng_args+=("--build-host" "${BUILD_HOST}")
+ fi
+
+ if [[ -n "${BUILD_HOST_PODMAN_CONTAINER_NAME}" ]]; then
+ vng_args+=("--build-host-exec-prefix" \
+ "podman exec -ti ${BUILD_HOST_PODMAN_CONTAINER_NAME}")
+ fi
+
+ if ! vng "${vng_args[@]}"; then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ if ! make -j$(nproc) -C "${HID_BPF_PROGS}"; then
+ die "failed to build HID bpf objects from source tree (${HID_BPF_PROGS})"
+ fi
+
+ if ! make -j$(nproc) -C "${SCRIPT_DIR}"; then
+ die "failed to build HID selftests from source tree (${SCRIPT_DIR})"
+ fi
+
+ popd &>/dev/null
+}
+
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
+
+ qemu=$(command -v "${QEMU}")
+
+ if [[ "${VERBOSE}" -eq 2 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
+
+ # If we are running from within the kernel source tree, use the kernel source tree
+ # as the kernel to boot, otherwise use the currently running kernel.
+ if [[ "$(realpath "$(pwd)")" == "${KERNEL_CHECKOUT}"* ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
+ fi
+
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --ssh "${SSH_GUEST_PORT}" \
+ --rw &> ${logfile} &
+
+ local vng_pid=$!
+ local elapsed=0
+
+ while [[ ! -s "${QEMU_PIDFILE}" ]]; do
+ if ! kill -0 "${vng_pid}" 2>/dev/null; then
+ echo "vng process (PID ${vng_pid}) exited early, check logs for details" >&2
+ die "failed to boot VM"
+ fi
+
+ if [[ ${elapsed} -ge ${WAIT_TOTAL} ]]; then
+ echo "Timed out after ${WAIT_TOTAL} seconds waiting for VM to boot" >&2
+ die "failed to boot VM"
+ fi
+
+ sleep 1
+ elapsed=$((elapsed + 1))
+ done
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
+ fi
+ if vm_ssh -- true; then
+ break
+ fi
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+vm_mount_bpffs() {
+ vm_ssh -- mount bpffs -t bpf /sys/fs/bpf
+}
+
+__log_stdin() {
+ stdbuf -oL awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0; fflush() }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local verbose="$1"
+ shift
+
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${verbose} -le 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
+ fi
+}
+
+log_setup() {
+ log $((VERBOSE-1)) "setup" "$@"
+}
+
+log_host() {
+ local testname=$1
+
+ shift
+ log $((VERBOSE-1)) "test:${testname}:host" "$@"
+}
+
+log_guest() {
+ local testname=$1
+
+ shift
+ log ${VERBOSE} "# test:${testname}" "$@"
+}
+
+test_vm_hid_bpf() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${HID_BPF_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_hidraw() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${HIDRAW_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_pytest() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ shift
+
+ vm_ssh -- pytest ${SCRIPT_DIR}/tests --color=yes "$@" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+run_test() {
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_error_cnt_before=$(vm_ssh -- dmesg --level=err | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}" "$@"
+ rc=$?
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_error_cnt_after=$(vm_ssh -- dmesg --level=err | wc -l)
+ if [[ ${vm_error_cnt_after} -gt ${vm_error_cnt_before} ]]; then
+ echo "FAIL: kernel error detected on vm" | log_host "${name}"
+ vm_ssh -- dmesg --level=err | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ return "${rc}"
+}
+
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsbq:H:p: o
+do
+ case $o in
+ v) VERBOSE=$((VERBOSE+1));;
+ s) SHELL_MODE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ H) BUILD_HOST=$OPTARG;;
+ p) BUILD_HOST_PODMAN_CONTAINER_NAME=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+PARAMS=""
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=()
+ COUNT=0
+ for arg in $@; do
+ COUNT=$((COUNT+1))
+ if [[ x"$arg" == x"--" ]]; then
+ break
+ fi
+ ARGS+=($arg)
+ done
+ shift $COUNT
+ PARAMS="$@"
+fi
+
+if [[ "${SHELL_MODE}" -eq 0 ]]; then
+ check_args "${ARGS[@]}"
+ echo "1..${#ARGS[@]}"
+fi
+check_deps
+check_vng
+handle_build
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+vm_mount_bpffs
+log_setup "VM booted up"
+
+if [[ "${SHELL_MODE}" -eq 1 ]]; then
+ log_setup "Starting interactive shell in VM"
+ echo "Starting shell in VM. Use 'exit' to quit and shutdown the VM."
+ CURRENT_DIR="$(pwd)"
+ vm_ssh -t -- "cd '${CURRENT_DIR}' && exec bash -l"
+ exit "$KSFT_PASS"
+fi
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}" "${PARAMS}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 39f0fa2a8fd6..f45372cb00fe 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+CFLAGS := $(CFLAGS) -Wall
LDLIBS += -lm
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq (x86,$(ARCH))
+ifeq (x86,$(ARCH_PROCESSED))
TEST_GEN_FILES := msr aperf
endif
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index a8acf3996973..953b63e5aa6a 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -11,7 +11,7 @@
#include <errno.h>
#include <string.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MSEC_PER_SEC 1000L
#define NSEC_PER_MSEC 1000000L
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index e7008f614ad7..6a3b8503264e 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -44,6 +44,11 @@ if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
exit $ksft_skip
fi
+if ! command -v cpupower &> /dev/null; then
+ echo $msg cpupower could not be found, please install it >&2
+ exit $ksft_skip
+fi
+
max_cpus=$(($(nproc)-1))
function run_test () {
@@ -87,9 +92,9 @@ mkt_freq=${_mkt_freq}0
# Get the ranges from cpupower
_min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ')
-min_freq=$(($_min_freq / 1000))
+min_freq=$((_min_freq / 1000))
_max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
-max_freq=$(($_max_freq / 1000))
+max_freq=$((_max_freq / 1000))
[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore
new file mode 100644
index 000000000000..7d0703049eba
--- /dev/null
+++ b/tools/testing/selftests/iommu/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/iommufd
+/iommufd_fail_nth
diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile
new file mode 100644
index 000000000000..84abeb2f0949
--- /dev/null
+++ b/tools/testing/selftests/iommu/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += iommufd
+TEST_GEN_PROGS += iommufd_fail_nth
+
+include ../lib.mk
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config
new file mode 100644
index 000000000000..02a2a1b267c1
--- /dev/null
+++ b/tools/testing/selftests/iommu/config
@@ -0,0 +1,5 @@
+CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_IOMMUFD_TEST=y
+CONFIG_FAILSLAB=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
new file mode 100644
index 000000000000..10e051b6f592
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -0,0 +1,3544 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#include <asm/unistd.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static unsigned long HUGEPAGE_SIZE;
+
+static unsigned long get_huge_page_size(void)
+{
+ char buf[80];
+ int ret;
+ int fd;
+
+ fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
+ O_RDONLY);
+ if (fd < 0)
+ return 2 * 1024 * 1024;
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret <= 0 || ret == sizeof(buf))
+ return 2 * 1024 * 1024;
+ buf[ret] = 0;
+ return strtoul(buf, NULL, 10);
+}
+
+static __attribute__((constructor)) void setup_sizes(void)
+{
+ void *vrc;
+ int rc;
+
+ PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+ HUGEPAGE_SIZE = get_huge_page_size();
+
+ BUFFER_SIZE = PAGE_SIZE * 16;
+ rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE);
+ assert(!rc);
+ assert(buffer);
+ assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0);
+ vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ assert(vrc == buffer);
+
+ mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ &mfd);
+ assert(mfd_buffer != MAP_FAILED);
+ assert(mfd > 0);
+}
+
+FIXTURE(iommufd)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(iommufd)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(iommufd)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+TEST_F(iommufd, simple_close)
+{
+}
+
+TEST_F(iommufd, cmd_fail)
+{
+ struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 };
+
+ /* object id is invalid */
+ EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0));
+ /* Bad pointer */
+ EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL));
+ /* Unknown ioctl */
+ EXPECT_ERRNO(ENOTTY,
+ ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1),
+ &cmd));
+}
+
+TEST_F(iommufd, cmd_length)
+{
+#define TEST_LENGTH(_struct, _ioctl, _last) \
+ { \
+ size_t min_size = offsetofend(struct _struct, _last); \
+ struct { \
+ struct _struct cmd; \
+ uint8_t extra; \
+ } cmd = { .cmd = { .size = min_size - 1 }, \
+ .extra = UINT8_MAX }; \
+ int old_errno; \
+ int rc; \
+ \
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd)); \
+ cmd.cmd.size = sizeof(struct _struct) + 1; \
+ EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd)); \
+ cmd.cmd.size = sizeof(struct _struct); \
+ rc = ioctl(self->fd, _ioctl, &cmd); \
+ old_errno = errno; \
+ cmd.cmd.size = sizeof(struct _struct) + 1; \
+ cmd.extra = 0; \
+ if (rc) { \
+ EXPECT_ERRNO(old_errno, \
+ ioctl(self->fd, _ioctl, &cmd)); \
+ } else { \
+ ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd)); \
+ } \
+ }
+
+ TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id);
+ TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved);
+ TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved);
+ TEST_LENGTH(iommu_hwpt_invalidate, IOMMU_HWPT_INVALIDATE, __reserved);
+ TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id);
+ TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES,
+ out_iova_alignment);
+ TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS,
+ allowed_iovas);
+ TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova);
+ TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova);
+ TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length);
+ TEST_LENGTH(iommu_option, IOMMU_OPTION, val64);
+ TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved);
+ TEST_LENGTH(iommu_ioas_map_file, IOMMU_IOAS_MAP_FILE, iova);
+ TEST_LENGTH(iommu_viommu_alloc, IOMMU_VIOMMU_ALLOC, out_viommu_id);
+ TEST_LENGTH(iommu_vdevice_alloc, IOMMU_VDEVICE_ALLOC, virt_id);
+ TEST_LENGTH(iommu_ioas_change_process, IOMMU_IOAS_CHANGE_PROCESS,
+ __reserved);
+#undef TEST_LENGTH
+}
+
+TEST_F(iommufd, cmd_ex_fail)
+{
+ struct {
+ struct iommu_destroy cmd;
+ __u64 future;
+ } cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } };
+
+ /* object id is invalid and command is longer */
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* future area is non-zero */
+ cmd.future = 1;
+ EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* Original command "works" */
+ cmd.cmd.size = sizeof(cmd.cmd);
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* Short command fails */
+ cmd.cmd.size = sizeof(cmd.cmd) - 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+}
+
+TEST_F(iommufd, global_options)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_RLIMIT_MODE,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 1,
+ };
+
+ cmd.option_id = IOMMU_OPTION_RLIMIT_MODE;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(0, cmd.val64);
+
+ /* This requires root */
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ cmd.val64 = 2;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(1, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ cmd.option_id = IOMMU_OPTION_HUGE_PAGES;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ cmd.op = IOMMU_OPTION_OP_SET;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+static void drop_cap_ipc_lock(struct __test_metadata *_metadata)
+{
+ cap_t caps;
+ cap_value_t cap_list[1] = { CAP_IPC_LOCK };
+
+ caps = cap_get_proc();
+ ASSERT_NE(caps, NULL);
+ ASSERT_NE(-1,
+ cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+ ASSERT_NE(-1, cap_set_proc(caps));
+ cap_free(caps);
+}
+
+static long get_proc_status_value(pid_t pid, const char *var)
+{
+ FILE *fp;
+ char buf[80], tag[80];
+ long val = -1;
+
+ snprintf(buf, sizeof(buf), "/proc/%d/status", pid);
+ fp = fopen(buf, "r");
+ if (!fp)
+ return val;
+
+ while (fgets(buf, sizeof(buf), fp))
+ if (fscanf(fp, "%s %ld\n", tag, &val) == 2 && !strcmp(tag, var))
+ break;
+
+ fclose(fp);
+ return val;
+}
+
+static long get_vm_pinned(pid_t pid)
+{
+ return get_proc_status_value(pid, "VmPin:");
+}
+
+static long get_vm_locked(pid_t pid)
+{
+ return get_proc_status_value(pid, "VmLck:");
+}
+
+FIXTURE(change_process)
+{
+ int fd;
+ uint32_t ioas_id;
+};
+
+FIXTURE_VARIANT(change_process)
+{
+ int accounting;
+};
+
+FIXTURE_SETUP(change_process)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ drop_cap_ipc_lock(_metadata);
+ if (variant->accounting != IOPT_PAGES_ACCOUNT_NONE) {
+ struct iommu_option set_limit_cmd = {
+ .size = sizeof(set_limit_cmd),
+ .option_id = IOMMU_OPTION_RLIMIT_MODE,
+ .op = IOMMU_OPTION_OP_SET,
+ .val64 = (variant->accounting == IOPT_PAGES_ACCOUNT_MM),
+ };
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &set_limit_cmd));
+ }
+
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+}
+
+FIXTURE_TEARDOWN(change_process)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(change_process, account_none)
+{
+ .accounting = IOPT_PAGES_ACCOUNT_NONE,
+};
+
+FIXTURE_VARIANT_ADD(change_process, account_user)
+{
+ .accounting = IOPT_PAGES_ACCOUNT_USER,
+};
+
+FIXTURE_VARIANT_ADD(change_process, account_mm)
+{
+ .accounting = IOPT_PAGES_ACCOUNT_MM,
+};
+
+TEST_F(change_process, basic)
+{
+ pid_t parent = getpid();
+ pid_t child;
+ __u64 iova;
+ struct iommu_ioas_change_process cmd = {
+ .size = sizeof(cmd),
+ };
+
+ /* Expect failure if non-file maps exist */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd));
+ test_ioctl_ioas_unmap(iova, PAGE_SIZE);
+
+ /* Change process works in current process. */
+ test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd));
+
+ /* Change process works in another process */
+ child = fork();
+ if (!child) {
+ int nlock = PAGE_SIZE / 1024;
+
+ /* Parent accounts for locked memory before */
+ ASSERT_EQ(nlock, get_vm_pinned(parent));
+ if (variant->accounting == IOPT_PAGES_ACCOUNT_MM)
+ ASSERT_EQ(nlock, get_vm_locked(parent));
+ ASSERT_EQ(0, get_vm_pinned(getpid()));
+ ASSERT_EQ(0, get_vm_locked(getpid()));
+
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd));
+
+ /* Child accounts for locked memory after */
+ ASSERT_EQ(0, get_vm_pinned(parent));
+ ASSERT_EQ(0, get_vm_locked(parent));
+ ASSERT_EQ(nlock, get_vm_pinned(getpid()));
+ if (variant->accounting == IOPT_PAGES_ACCOUNT_MM)
+ ASSERT_EQ(nlock, get_vm_locked(getpid()));
+
+ exit(0);
+ }
+ ASSERT_NE(-1, child);
+ ASSERT_EQ(child, waitpid(child, NULL, 0));
+}
+
+FIXTURE(iommufd_ioas)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t stdev_id;
+ uint32_t hwpt_id;
+ uint32_t device_id;
+ uint64_t base_iova;
+ uint32_t device_pasid_id;
+};
+
+FIXTURE_VARIANT(iommufd_ioas)
+{
+ unsigned int mock_domains;
+ unsigned int memory_limit;
+ bool pasid_capable;
+};
+
+FIXTURE_SETUP(iommufd_ioas)
+{
+ unsigned int i;
+
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ if (!variant->memory_limit) {
+ test_ioctl_set_default_memory_limit();
+ } else {
+ test_ioctl_set_temp_memory_limit(variant->memory_limit);
+ }
+
+ for (i = 0; i != variant->mock_domains; i++) {
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
+ &self->hwpt_id, &self->device_id);
+ test_cmd_dev_check_cache_all(self->device_id,
+ IOMMU_TEST_DEV_CACHE_DEFAULT);
+ self->base_iova = MOCK_APERTURE_START;
+ }
+
+ if (variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ NULL, NULL,
+ &self->device_pasid_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_ioas)
+{
+ test_ioctl_set_default_memory_limit();
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain)
+{
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain)
+{
+ .mock_domains = 1,
+ .pasid_capable = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain)
+{
+ .mock_domains = 2,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit)
+{
+ .mock_domains = 1,
+ .memory_limit = 16,
+};
+
+TEST_F(iommufd_ioas, ioas_auto_destroy)
+{
+}
+
+TEST_F(iommufd_ioas, ioas_destroy)
+{
+ if (self->stdev_id) {
+ /* IOAS cannot be freed while a device has a HWPT using it */
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->ioas_id));
+ } else {
+ /* Can allocate and manually free an IOAS table */
+ test_ioctl_destroy(self->ioas_id);
+ }
+}
+
+TEST_F(iommufd_ioas, alloc_hwpt_nested)
+{
+ const uint32_t min_data_len =
+ offsetofend(struct iommu_hwpt_selftest, iotlb);
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ struct iommu_hwpt_invalidate_selftest inv_reqs[2] = {};
+ uint32_t nested_hwpt_id[2] = {};
+ uint32_t num_inv;
+ uint32_t parent_hwpt_id = 0;
+ uint32_t parent_hwpt_id_not_work = 0;
+ uint32_t test_hwpt_id = 0;
+ uint32_t iopf_hwpt_id;
+ uint32_t fault_id;
+ uint32_t fault_fd;
+
+ if (self->device_id) {
+ /* Negative tests */
+ test_err_hwpt_alloc(ENOENT, self->ioas_id, self->device_id, 0,
+ &test_hwpt_id);
+ test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
+ &test_hwpt_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_FAULT_ID_VALID,
+ &test_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id_not_work);
+
+ /* Negative nested tests */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_NONE, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EOPNOTSUPP, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST + 1, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ min_data_len - 1);
+ test_err_hwpt_alloc_nested(EFAULT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, NULL,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(
+ EOPNOTSUPP, self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id_not_work, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id,
+ UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID,
+ &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+ test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0],
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1],
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Negative test: a nested hwpt on top of a nested hwpt */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ nested_hwpt_id[0], 0, &test_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ /* Negative test: parent hwpt now cannot be freed */
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, parent_hwpt_id));
+
+ /* hwpt_invalidate does not support a parent hwpt */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, parent_hwpt_id, inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Check data_type by passing zero-length array */
+ num_inv = 0;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: Invalid data_type */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST_INVALID,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: structure size sanity */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs) + 1, &num_inv);
+ assert(!num_inv);
+
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ 1, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid flag is passed */
+ num_inv = 1;
+ inv_reqs[0].flags = 0xffffffff;
+ test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid data_uptr when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], NULL,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid entry_len when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ 0, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid iotlb_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /*
+ * Invalidate the 1st iotlb entry but fail the 2nd request
+ * due to invalid flags configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 0;
+ inv_reqs[1].flags = 0xffffffff;
+ inv_reqs[1].iotlb_id = 1;
+ test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /*
+ * Invalidate the 1st iotlb entry but fail the 2nd request
+ * due to invalid iotlb_id configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 0;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Invalidate the 2nd iotlb entry and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 1;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Invalidate the 3rd and 4th iotlb entries and verify */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 2;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].iotlb_id = 3;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 2);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], 0);
+
+ /* Invalidate all iotlb entries for nested_hwpt_id[1] and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[1], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], 0);
+
+ /* Attach device to nested_hwpt_id[0] that then will be busy */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[0]));
+
+ /* Switch from nested_hwpt_id[0] to nested_hwpt_id[1] */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
+ test_ioctl_destroy(nested_hwpt_id[0]);
+
+ /* Switch from nested_hwpt_id[1] to iopf_hwpt_id */
+ test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+ /* Trigger an IOPF on the device */
+ test_cmd_trigger_iopf(self->device_id, fault_fd);
+
+ /* Detach from nested_hwpt_id[1] and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ test_ioctl_destroy(nested_hwpt_id[1]);
+ test_ioctl_destroy(iopf_hwpt_id);
+
+ /* Detach from the parent hw_pagetable and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(parent_hwpt_id);
+ test_ioctl_destroy(parent_hwpt_id_not_work);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+ } else {
+ test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id);
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[0]);
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[1]);
+ }
+}
+
+TEST_F(iommufd_ioas, hwpt_attach)
+{
+ /* Create a device attached directly to a hwpt */
+ if (self->stdev_id) {
+ test_cmd_mock_domain(self->hwpt_id, NULL, NULL, NULL);
+ } else {
+ test_err_mock_domain(ENOENT, self->hwpt_id, NULL, NULL);
+ }
+}
+
+TEST_F(iommufd_ioas, ioas_area_destroy)
+{
+ /* Adding an area does not change ability to destroy */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+ if (self->stdev_id)
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->ioas_id));
+ else
+ test_ioctl_destroy(self->ioas_id);
+}
+
+TEST_F(iommufd_ioas, ioas_area_auto_destroy)
+{
+ int i;
+
+ /* Can allocate and automatically free an IOAS table with many areas */
+ for (i = 0; i != 10; i++) {
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+ self->base_iova + i * PAGE_SIZE);
+ }
+}
+
+TEST_F(iommufd_ioas, get_hw_info)
+{
+ struct iommu_test_hw_info buffer_exact;
+ struct iommu_test_hw_info_buffer_larger {
+ struct iommu_test_hw_info info;
+ uint64_t trailing_bytes;
+ } buffer_larger;
+ struct iommu_test_hw_info_buffer_smaller {
+ __u32 flags;
+ } buffer_smaller;
+
+ if (self->device_id) {
+ uint8_t max_pasid = 0;
+
+ /* Provide a zero-size user_buffer */
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0);
+ /* Provide a user_buffer with exact size */
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
+
+ /* Request for a wrong data_type, and a correct one */
+ test_err_get_hw_info(EOPNOTSUPP, self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST + 1,
+ &buffer_exact, sizeof(buffer_exact));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact,
+ sizeof(buffer_exact));
+ /*
+ * Provide a user_buffer with size larger than the exact size to check if
+ * kernel zero the trailing bytes.
+ */
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
+ /*
+ * Provide a user_buffer with size smaller than the exact size to check if
+ * the fields within the size range still gets updated.
+ */
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT,
+ &buffer_smaller, sizeof(buffer_smaller));
+ test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
+ ASSERT_EQ(0, max_pasid);
+ if (variant->pasid_capable) {
+ test_cmd_get_hw_info_pasid(self->device_pasid_id,
+ &max_pasid);
+ ASSERT_EQ(MOCK_PASID_WIDTH, max_pasid);
+ }
+ } else {
+ test_err_get_hw_info(ENOENT, self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
+ test_err_get_hw_info(ENOENT, self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
+ }
+}
+
+TEST_F(iommufd_ioas, area)
+{
+ int i;
+
+ /* Unmap fails if nothing is mapped */
+ for (i = 0; i != 10; i++)
+ test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE);
+
+ /* Unmap works */
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+ self->base_iova + i * PAGE_SIZE);
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE,
+ PAGE_SIZE);
+
+ /* Split fails */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE,
+ PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE,
+ PAGE_SIZE);
+
+ /* Over map fails */
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+ self->base_iova + 17 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+ self->base_iova + 15 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3,
+ self->base_iova + 15 * PAGE_SIZE);
+
+ /* unmap all works */
+ test_ioctl_ioas_unmap(0, UINT64_MAX);
+
+ /* Unmap all succeeds on an empty IOAS */
+ test_ioctl_ioas_unmap(0, UINT64_MAX);
+}
+
+TEST_F(iommufd_ioas, unmap_fully_contained_areas)
+{
+ uint64_t unmap_len;
+ int i;
+
+ /* Give no_domain some space to rewind base_iova */
+ self->base_iova += 4 * PAGE_SIZE;
+
+ for (i = 0; i != 4; i++)
+ test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE,
+ self->base_iova + i * 16 * PAGE_SIZE);
+
+ /* Unmap not fully contained area doesn't work */
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE,
+ 8 * PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT,
+ self->base_iova + 3 * 16 * PAGE_SIZE +
+ 8 * PAGE_SIZE - 4 * PAGE_SIZE,
+ 8 * PAGE_SIZE);
+
+ /* Unmap fully contained areas works */
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id,
+ self->base_iova - 4 * PAGE_SIZE,
+ 3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE +
+ 4 * PAGE_SIZE,
+ &unmap_len));
+ ASSERT_EQ(32 * PAGE_SIZE, unmap_len);
+}
+
+TEST_F(iommufd_ioas, area_auto_iova)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE * 4,
+ .length = PAGE_SIZE * 100 },
+ };
+ struct iommu_iova_range ranges[1] = {};
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+ __u64 iovas[10];
+ int i;
+
+ /* Simple 4k pages */
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]);
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE);
+
+ /* Kernel automatically aligns IOVAs properly */
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ if (self->stdev_id) {
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ } else {
+ test_ioctl_ioas_map((void *)(1UL << 31), length,
+ &iovas[i]);
+ }
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+ /* Avoids a reserved region */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ EXPECT_EQ(false,
+ iovas[i] > test_cmd.add_reserved.start &&
+ iovas[i] <
+ test_cmd.add_reserved.start +
+ test_cmd.add_reserved.length);
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+ /* Allowed region intersects with a reserved region */
+ ranges[0].start = PAGE_SIZE;
+ ranges[0].last = PAGE_SIZE * 600;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+ /* Allocate from an allowed region */
+ if (self->stdev_id) {
+ ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE;
+ ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1;
+ } else {
+ ranges[0].start = PAGE_SIZE * 200;
+ ranges[0].last = PAGE_SIZE * 600 - 1;
+ }
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ EXPECT_EQ(true, iovas[i] >= ranges[0].start);
+ EXPECT_EQ(true, iovas[i] <= ranges[0].last);
+ EXPECT_EQ(true, iovas[i] + length > ranges[0].start);
+ EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1);
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+}
+
+/* https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */
+TEST_F(iommufd_ioas, reserved_overflow)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved.start = 6,
+ };
+ unsigned int map_len;
+ __u64 iova;
+
+ if (PAGE_SIZE == 4096) {
+ test_cmd.add_reserved.length = 0xffffffffffff8001;
+ map_len = 0x5000;
+ } else {
+ test_cmd.add_reserved.length =
+ 0xffffffffffffffff - MOCK_PAGE_SIZE * 16;
+ map_len = MOCK_PAGE_SIZE * 10;
+ }
+
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova);
+}
+
+TEST_F(iommufd_ioas, area_allowed)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE * 4,
+ .length = PAGE_SIZE * 100 },
+ };
+ struct iommu_iova_range ranges[1] = {};
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ /* Reserved intersects an allowed */
+ allow_cmd.num_iovas = 1;
+ ranges[0].start = self->base_iova;
+ ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+ test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE;
+ test_cmd.add_reserved.length = PAGE_SIZE;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ allow_cmd.num_iovas = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+ /* Allowed intersects a reserved */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ allow_cmd.num_iovas = 1;
+ ranges[0].start = self->base_iova;
+ ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+}
+
+TEST_F(iommufd_ioas, copy_area)
+{
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
+ .dst_ioas_id = self->ioas_id,
+ .src_ioas_id = self->ioas_id,
+ .length = PAGE_SIZE,
+ };
+
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+
+ /* Copy inside a single IOAS */
+ copy_cmd.src_iova = self->base_iova;
+ copy_cmd.dst_iova = self->base_iova + PAGE_SIZE;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+
+ /* Copy between IOAS's */
+ copy_cmd.src_iova = self->base_iova;
+ copy_cmd.dst_iova = 0;
+ test_ioctl_ioas_alloc(&copy_cmd.dst_ioas_id);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+}
+
+TEST_F(iommufd_ioas, iova_ranges)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE },
+ };
+ struct iommu_iova_range *ranges = buffer;
+ struct iommu_ioas_iova_ranges ranges_cmd = {
+ .size = sizeof(ranges_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = BUFFER_SIZE / sizeof(*ranges),
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ /* Range can be read */
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ if (!self->stdev_id) {
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(SIZE_MAX, ranges[0].last);
+ EXPECT_EQ(1, ranges_cmd.out_iova_alignment);
+ } else {
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment);
+ }
+
+ /* Buffer too small */
+ memset(ranges, 0, BUFFER_SIZE);
+ ranges_cmd.num_iovas = 0;
+ EXPECT_ERRNO(EMSGSIZE,
+ ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(0, ranges[0].last);
+
+ /* 2 ranges */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ if (!self->stdev_id) {
+ EXPECT_EQ(2, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+ EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start);
+ EXPECT_EQ(SIZE_MAX, ranges[1].last);
+ } else {
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ }
+
+ /* Buffer too small */
+ memset(ranges, 0, BUFFER_SIZE);
+ ranges_cmd.num_iovas = 1;
+ if (!self->stdev_id) {
+ EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES,
+ &ranges_cmd));
+ EXPECT_EQ(2, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+ } else {
+ ASSERT_EQ(0,
+ ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ }
+ EXPECT_EQ(0, ranges[1].start);
+ EXPECT_EQ(0, ranges[1].last);
+}
+
+TEST_F(iommufd_ioas, access_domain_destory)
+{
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = self->base_iova + PAGE_SIZE,
+ .length = PAGE_SIZE},
+ };
+ size_t buf_size = 2 * HUGEPAGE_SIZE;
+ uint8_t *buf;
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ test_ioctl_ioas_map_fixed(buf, buf_size, self->base_iova);
+
+ test_cmd_create_access(self->ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+ access_cmd.access_pages.uptr = (uintptr_t)buf + PAGE_SIZE;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+
+ /* Causes a complicated unpin across a huge page boundary */
+ if (self->stdev_id)
+ test_ioctl_destroy(self->stdev_id);
+
+ test_cmd_destroy_access_pages(
+ access_cmd.id, access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access(access_cmd.id);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_ioas, access_pin)
+{
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+ struct iommu_test_cmd check_map_cmd = {
+ .size = sizeof(check_map_cmd),
+ .op = IOMMU_TEST_OP_MD_CHECK_MAP,
+ .check_map = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+ uint32_t access_pages_id;
+ unsigned int npages;
+
+ test_cmd_create_access(self->ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+ for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) {
+ uint32_t mock_stdev_id;
+ uint32_t mock_hwpt_id;
+
+ access_cmd.access_pages.length = npages * PAGE_SIZE;
+
+ /* Single map/unmap */
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+
+ /* Double user */
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access_pages(access_cmd.id, access_pages_id);
+
+ /* Add/remove a domain with a user */
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_mock_domain(self->ioas_id, &mock_stdev_id,
+ &mock_hwpt_id, NULL);
+ check_map_cmd.id = mock_hwpt_id;
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP),
+ &check_map_cmd));
+
+ test_ioctl_destroy(mock_stdev_id);
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+ }
+ test_cmd_destroy_access(access_cmd.id);
+}
+
+TEST_F(iommufd_ioas, access_pin_unmap)
+{
+ struct iommu_test_cmd access_pages_cmd = {
+ .size = sizeof(access_pages_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ test_cmd_create_access(self->ioas_id, &access_pages_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START);
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_pages_cmd));
+
+ /* Trigger the unmap op */
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+
+ /* kernel removed the item for us */
+ test_err_destroy_access_pages(
+ ENOENT, access_pages_cmd.id,
+ access_pages_cmd.access_pages.out_access_pages_id);
+}
+
+static void check_access_rw(struct __test_metadata *_metadata, int fd,
+ unsigned int access_id, uint64_t iova,
+ unsigned int def_flags)
+{
+ uint16_t tmp[32];
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = access_id,
+ .access_rw = { .uptr = (uintptr_t)tmp },
+ };
+ uint16_t *buffer16 = buffer;
+ unsigned int i;
+ void *tmp2;
+
+ for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++)
+ buffer16[i] = rand();
+
+ for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50;
+ access_cmd.access_rw.iova < iova + PAGE_SIZE + 50;
+ access_cmd.access_rw.iova++) {
+ for (access_cmd.access_rw.length = 1;
+ access_cmd.access_rw.length < sizeof(tmp);
+ access_cmd.access_rw.length++) {
+ access_cmd.access_rw.flags = def_flags;
+ ASSERT_EQ(0, ioctl(fd,
+ _IOMMU_TEST_CMD(
+ IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0,
+ memcmp(buffer + (access_cmd.access_rw.iova -
+ iova),
+ tmp, access_cmd.access_rw.length));
+
+ for (i = 0; i != ARRAY_SIZE(tmp); i++)
+ tmp[i] = rand();
+ access_cmd.access_rw.flags = def_flags |
+ MOCK_ACCESS_RW_WRITE;
+ ASSERT_EQ(0, ioctl(fd,
+ _IOMMU_TEST_CMD(
+ IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0,
+ memcmp(buffer + (access_cmd.access_rw.iova -
+ iova),
+ tmp, access_cmd.access_rw.length));
+ }
+ }
+
+ /* Multi-page test */
+ tmp2 = malloc(BUFFER_SIZE);
+ ASSERT_NE(NULL, tmp2);
+ access_cmd.access_rw.iova = iova;
+ access_cmd.access_rw.length = BUFFER_SIZE;
+ access_cmd.access_rw.flags = def_flags;
+ access_cmd.access_rw.uptr = (uintptr_t)tmp2;
+ ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length));
+ free(tmp2);
+}
+
+TEST_F(iommufd_ioas, access_rw)
+{
+ __u32 access_id;
+ __u64 iova;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+ test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova);
+ check_access_rw(_metadata, self->fd, access_id, iova, 0);
+ check_access_rw(_metadata, self->fd, access_id, iova,
+ MOCK_ACCESS_RW_SLOW_PATH);
+ test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, access_rw_unaligned)
+{
+ __u32 access_id;
+ __u64 iova;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ /* Unaligned pages */
+ iova = self->base_iova + MOCK_PAGE_SIZE;
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova);
+ check_access_rw(_metadata, self->fd, access_id, iova, 0);
+ test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_gone)
+{
+ __u32 access_id;
+ pid_t child;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ /* Create a mapping with a different mm */
+ child = fork();
+ if (!child) {
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ exit(0);
+ }
+ ASSERT_NE(-1, child);
+ ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+ if (self->stdev_id) {
+ /*
+ * If a domain already existed then everything was pinned within
+ * the fork, so this copies from one domain to another.
+ */
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+ check_access_rw(_metadata, self->fd, access_id,
+ MOCK_APERTURE_START, 0);
+
+ } else {
+ /*
+ * Otherwise we need to actually pin pages which can't happen
+ * since the fork is gone.
+ */
+ test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL);
+ }
+
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_present)
+{
+ __u32 access_id;
+ int pipefds[2];
+ uint64_t tmp;
+ pid_t child;
+ int efd;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC));
+ efd = eventfd(0, EFD_CLOEXEC);
+ ASSERT_NE(-1, efd);
+
+ /* Create a mapping with a different mm */
+ child = fork();
+ if (!child) {
+ __u64 iova;
+ uint64_t one = 1;
+
+ close(pipefds[1]);
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ if (write(efd, &one, sizeof(one)) != sizeof(one))
+ exit(100);
+ if (read(pipefds[0], &iova, 1) != 1)
+ exit(100);
+ exit(0);
+ }
+ close(pipefds[0]);
+ ASSERT_NE(-1, child);
+ ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp)));
+
+ /* Read pages from the remote process */
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+ check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0);
+
+ ASSERT_EQ(0, close(pipefds[1]));
+ ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, ioas_option_huge_pages)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 3,
+ .object_id = self->ioas_id,
+ };
+
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(1, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ cmd.val64 = 3;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(0, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 2;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+TEST_F(iommufd_ioas, ioas_iova_alloc)
+{
+ unsigned int length;
+ __u64 iova;
+
+ for (length = 1; length != PAGE_SIZE * 2; length++) {
+ if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) {
+ test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova);
+ } else {
+ test_ioctl_ioas_map(buffer, length, &iova);
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+}
+
+TEST_F(iommufd_ioas, ioas_align_change)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_SET,
+ .object_id = self->ioas_id,
+ /* 0 means everything must be aligned to PAGE_SIZE */
+ .val64 = 0,
+ };
+
+ /*
+ * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain
+ * and map are present.
+ */
+ if (variant->mock_domains)
+ return;
+
+ /*
+ * We can upgrade to PAGE_SIZE alignment when things are aligned right
+ */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Misalignment is rejected at map time */
+ test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE,
+ PAGE_SIZE,
+ MOCK_APERTURE_START + PAGE_SIZE);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Reduce alignment */
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Confirm misalignment is rejected during alignment upgrade */
+ test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE,
+ MOCK_APERTURE_START + PAGE_SIZE);
+ cmd.val64 = 0;
+ EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE);
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE);
+}
+
+TEST_F(iommufd_ioas, copy_sweep)
+{
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
+ .src_ioas_id = self->ioas_id,
+ .dst_iova = MOCK_APERTURE_START,
+ .length = MOCK_PAGE_SIZE,
+ };
+ unsigned int dst_ioas_id;
+ uint64_t last_iova;
+ uint64_t iova;
+
+ test_ioctl_ioas_alloc(&dst_ioas_id);
+ copy_cmd.dst_ioas_id = dst_ioas_id;
+
+ if (variant->mock_domains)
+ last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1;
+ else
+ last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2;
+
+ test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1,
+ MOCK_APERTURE_START);
+
+ for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova;
+ iova += 511) {
+ copy_cmd.src_iova = iova;
+ if (iova < MOCK_APERTURE_START ||
+ iova + copy_cmd.length - 1 > last_iova) {
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY,
+ &copy_cmd));
+ } else {
+ ASSERT_EQ(0,
+ ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova,
+ copy_cmd.length);
+ }
+ }
+
+ test_ioctl_destroy(dst_ioas_id);
+}
+
+TEST_F(iommufd_ioas, dmabuf_simple)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u64 iova;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, 0, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, buf_size, buf_size, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, buf_size + 1, &iova);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+
+ close(dfd);
+}
+
+TEST_F(iommufd_ioas, dmabuf_revoke)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u32 hwpt_id;
+ __u64 iova;
+ __u64 iova2;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+ test_cmd_revoke_dmabuf(dfd, true);
+
+ if (variant->mock_domains)
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &hwpt_id);
+
+ test_err_ioctl_ioas_map_file(ENODEV, dfd, 0, buf_size, &iova2);
+
+ test_cmd_revoke_dmabuf(dfd, false);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova2);
+
+ /* Restore the iova back */
+ test_ioctl_ioas_unmap(iova, buf_size);
+ test_ioctl_ioas_map_fixed_file(dfd, 0, buf_size, iova);
+
+ close(dfd);
+}
+
+FIXTURE(iommufd_mock_domain)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t hwpt_ids[2];
+ uint32_t stdev_ids[2];
+ uint32_t idev_ids[2];
+ int mmap_flags;
+ size_t mmap_buf_size;
+};
+
+FIXTURE_VARIANT(iommufd_mock_domain)
+{
+ unsigned int mock_domains;
+ bool hugepages;
+ bool file;
+};
+
+FIXTURE_SETUP(iommufd_mock_domain)
+{
+ unsigned int i;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains);
+
+ for (i = 0; i != variant->mock_domains; i++) {
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i],
+ &self->hwpt_ids[i], &self->idev_ids[i]);
+ test_cmd_dev_check_cache_all(self->idev_ids[0],
+ IOMMU_TEST_DEV_CACHE_DEFAULT);
+ }
+ self->hwpt_id = self->hwpt_ids[0];
+
+ self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS;
+ self->mmap_buf_size = PAGE_SIZE * 8;
+ if (variant->hugepages) {
+ /*
+ * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+ * not available.
+ */
+ self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+ self->mmap_buf_size = HUGEPAGE_SIZE * 2;
+ }
+}
+
+FIXTURE_TEARDOWN(iommufd_mock_domain)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain)
+{
+ .mock_domains = 1,
+ .hugepages = false,
+ .file = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains)
+{
+ .mock_domains = 2,
+ .hugepages = false,
+ .file = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage)
+{
+ .mock_domains = 1,
+ .hugepages = true,
+ .file = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage)
+{
+ .mock_domains = 2,
+ .hugepages = true,
+ .file = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file)
+{
+ .mock_domains = 1,
+ .hugepages = false,
+ .file = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file_hugepage)
+{
+ .mock_domains = 1,
+ .hugepages = true,
+ .file = true,
+};
+
+
+/* Have the kernel check that the user pages made it to the iommu_domain */
+#define check_mock_iova(_ptr, _iova, _length) \
+ ({ \
+ struct iommu_test_cmd check_map_cmd = { \
+ .size = sizeof(check_map_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_MAP, \
+ .id = self->hwpt_id, \
+ .check_map = { .iova = _iova, \
+ .length = _length, \
+ .uptr = (uintptr_t)(_ptr) }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \
+ &check_map_cmd)); \
+ if (self->hwpt_ids[1]) { \
+ check_map_cmd.id = self->hwpt_ids[1]; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD( \
+ IOMMU_TEST_OP_MD_CHECK_MAP), \
+ &check_map_cmd)); \
+ } \
+ })
+
+static void
+test_basic_mmap(struct __test_metadata *_metadata,
+ struct _test_data_iommufd_mock_domain *self,
+ const struct _fixture_variant_iommufd_mock_domain *variant)
+{
+ size_t buf_size = self->mmap_buf_size;
+ uint8_t *buf;
+ __u64 iova;
+
+ /* Simple one page map */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ check_mock_iova(buffer, iova, PAGE_SIZE);
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+
+ /* EFAULT half way through mapping */
+ ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2));
+ test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+
+ /* EFAULT on first page */
+ ASSERT_EQ(0, munmap(buf, buf_size / 2));
+ test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+}
+
+static void
+test_basic_file(struct __test_metadata *_metadata,
+ struct _test_data_iommufd_mock_domain *self,
+ const struct _fixture_variant_iommufd_mock_domain *variant)
+{
+ size_t buf_size = self->mmap_buf_size;
+ uint8_t *buf;
+ __u64 iova;
+ int mfd_tmp;
+ int prot = PROT_READ | PROT_WRITE;
+
+ /* Simple one page map */
+ test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova);
+ check_mock_iova(mfd_buffer, iova, PAGE_SIZE);
+
+ buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd_tmp);
+ ASSERT_NE(MAP_FAILED, buf);
+
+ test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size + 1, &iova);
+
+ ASSERT_EQ(0, ftruncate(mfd_tmp, 0));
+ test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size, &iova);
+
+ close(mfd_tmp);
+}
+
+TEST_F(iommufd_mock_domain, basic)
+{
+ if (variant->file)
+ test_basic_file(_metadata, self, variant);
+ else
+ test_basic_mmap(_metadata, self, variant);
+}
+
+TEST_F(iommufd_mock_domain, ro_unshare)
+{
+ uint8_t *buf;
+ __u64 iova;
+ int fd;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ ASSERT_NE(-1, fd);
+
+ buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ close(fd);
+
+ /*
+ * There have been lots of changes to the "unshare" mechanism in
+ * get_user_pages(), make sure it works right. The write to the page
+ * after we map it for reading should not change the assigned PFN.
+ */
+ ASSERT_EQ(0,
+ _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE,
+ &iova, IOMMU_IOAS_MAP_READABLE));
+ check_mock_iova(buf, iova, PAGE_SIZE);
+ memset(buf, 1, PAGE_SIZE);
+ check_mock_iova(buf, iova, PAGE_SIZE);
+ ASSERT_EQ(0, munmap(buf, PAGE_SIZE));
+}
+
+TEST_F(iommufd_mock_domain, all_aligns)
+{
+ size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) :
+ MOCK_PAGE_SIZE;
+ size_t buf_size = self->mmap_buf_size;
+ unsigned int start;
+ unsigned int end;
+ uint8_t *buf;
+ int prot = PROT_READ | PROT_WRITE;
+ int mfd = -1;
+
+ if (variant->file)
+ buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
+ else
+ buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
+ check_refs(buf, buf_size, 0);
+
+ /*
+ * Map every combination of page size and alignment within a big region,
+ * less for hugepage case as it takes so long to finish.
+ */
+ for (start = 0; start < buf_size; start += test_step) {
+ if (variant->hugepages)
+ end = buf_size;
+ else
+ end = start + MOCK_PAGE_SIZE;
+ for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+ size_t length = end - start;
+ __u64 iova;
+
+ if (variant->file) {
+ test_ioctl_ioas_map_file(mfd, start, length,
+ &iova);
+ } else {
+ test_ioctl_ioas_map(buf + start, length, &iova);
+ }
+ check_mock_iova(buf + start, iova, length);
+ check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+ end / PAGE_SIZE * PAGE_SIZE -
+ start / PAGE_SIZE * PAGE_SIZE,
+ 1);
+
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+ check_refs(buf, buf_size, 0);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+ if (variant->file)
+ close(mfd);
+}
+
+TEST_F(iommufd_mock_domain, all_aligns_copy)
+{
+ size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 :
+ MOCK_PAGE_SIZE;
+ size_t buf_size = self->mmap_buf_size;
+ unsigned int start;
+ unsigned int end;
+ uint8_t *buf;
+ int prot = PROT_READ | PROT_WRITE;
+ int mfd = -1;
+
+ if (variant->file)
+ buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
+ else
+ buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
+ check_refs(buf, buf_size, 0);
+
+ /*
+ * Map every combination of page size and alignment within a big region,
+ * less for hugepage case as it takes so long to finish.
+ */
+ for (start = 0; start < buf_size; start += test_step) {
+ if (variant->hugepages)
+ end = buf_size;
+ else
+ end = start + MOCK_PAGE_SIZE;
+ for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+ size_t length = end - start;
+ unsigned int old_id;
+ uint32_t mock_stdev_id;
+ __u64 iova;
+
+ if (variant->file) {
+ test_ioctl_ioas_map_file(mfd, start, length,
+ &iova);
+ } else {
+ test_ioctl_ioas_map(buf + start, length, &iova);
+ }
+
+ /* Add and destroy a domain while the area exists */
+ old_id = self->hwpt_ids[1];
+ test_cmd_mock_domain(self->ioas_id, &mock_stdev_id,
+ &self->hwpt_ids[1], NULL);
+
+ check_mock_iova(buf + start, iova, length);
+ check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+ end / PAGE_SIZE * PAGE_SIZE -
+ start / PAGE_SIZE * PAGE_SIZE,
+ 1);
+
+ test_ioctl_destroy(mock_stdev_id);
+ self->hwpt_ids[1] = old_id;
+
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+ check_refs(buf, buf_size, 0);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+ if (variant->file)
+ close(mfd);
+}
+
+TEST_F(iommufd_mock_domain, user_copy)
+{
+ void *buf = variant->file ? mfd_buffer : buffer;
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buf },
+ };
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
+ .dst_ioas_id = self->ioas_id,
+ .dst_iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ };
+ struct iommu_ioas_unmap unmap_cmd = {
+ .size = sizeof(unmap_cmd),
+ .ioas_id = self->ioas_id,
+ .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ };
+ unsigned int new_ioas_id, ioas_id;
+
+ /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */
+ test_ioctl_ioas_alloc(&ioas_id);
+ if (variant->file) {
+ test_ioctl_ioas_map_id_file(ioas_id, mfd, 0, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+ } else {
+ test_ioctl_ioas_map_id(ioas_id, buf, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+ }
+ test_cmd_create_access(ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+ access_cmd.access_pages.iova = copy_cmd.src_iova;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ copy_cmd.src_ioas_id = ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE);
+
+ /* Now replace the ioas with a new one */
+ test_ioctl_ioas_alloc(&new_ioas_id);
+ if (variant->file) {
+ test_ioctl_ioas_map_id_file(new_ioas_id, mfd, 0, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+ } else {
+ test_ioctl_ioas_map_id(new_ioas_id, buf, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+ }
+ test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id);
+
+ /* Destroy the old ioas and cleanup copied mapping */
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_UNMAP, &unmap_cmd));
+ test_ioctl_destroy(ioas_id);
+
+ /* Then run the same test again with the new ioas */
+ access_cmd.access_pages.iova = copy_cmd.src_iova;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ copy_cmd.src_ioas_id = new_ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE);
+
+ test_cmd_destroy_access_pages(
+ access_cmd.id, access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access(access_cmd.id);
+
+ test_ioctl_destroy(new_ioas_id);
+}
+
+TEST_F(iommufd_mock_domain, replace)
+{
+ uint32_t ioas_id;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id);
+
+ /*
+ * Replacing the IOAS causes the prior HWPT to be deallocated, thus we
+ * should get enoent when we try to use it.
+ */
+ if (variant->mock_domains == 1)
+ test_err_mock_domain_replace(ENOENT, self->stdev_ids[0],
+ self->hwpt_ids[0]);
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id);
+ if (variant->mock_domains >= 2) {
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[1]);
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[1]);
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[0]);
+ }
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], self->ioas_id);
+ test_ioctl_destroy(ioas_id);
+}
+
+TEST_F(iommufd_mock_domain, alloc_hwpt)
+{
+ int i;
+
+ for (i = 0; i != variant->mock_domains; i++) {
+ uint32_t hwpt_id[2];
+ uint32_t stddev_id;
+
+ test_err_hwpt_alloc(EOPNOTSUPP,
+ self->idev_ids[i], self->ioas_id,
+ ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ 0, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]);
+
+ /* Do a hw_pagetable rotation test */
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id);
+ test_ioctl_destroy(hwpt_id[1]);
+
+ test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL);
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id[0]);
+ }
+}
+
+FIXTURE(iommufd_dirty_tracking)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t idev_id;
+ unsigned long page_size;
+ unsigned long bitmap_size;
+ void *bitmap;
+ void *buffer;
+};
+
+FIXTURE_VARIANT(iommufd_dirty_tracking)
+{
+ unsigned long buffer_size;
+ bool hugepages;
+};
+
+FIXTURE_SETUP(iommufd_dirty_tracking)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_SET,
+ .val64 = 0,
+ };
+ size_t mmap_buffer_size;
+ unsigned long size;
+ int mmap_flags;
+ void *vrc;
+ int rc;
+
+ if (variant->buffer_size < MOCK_PAGE_SIZE) {
+ SKIP(return,
+ "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%u",
+ variant->buffer_size, MOCK_PAGE_SIZE);
+ }
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ mmap_buffer_size = variant->buffer_size;
+ if (variant->hugepages) {
+ /*
+ * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+ * not available.
+ */
+ mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+
+ /*
+ * Allocation must be aligned to the HUGEPAGE_SIZE, because the
+ * following mmap() will automatically align the length to be a
+ * multiple of the underlying huge page size. Failing to do the
+ * same at this allocation will result in a memory overwrite by
+ * the mmap().
+ */
+ if (mmap_buffer_size < HUGEPAGE_SIZE)
+ mmap_buffer_size = HUGEPAGE_SIZE;
+ }
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ mmap_buffer_size, rc);
+ }
+ assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
+ vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE,
+ mmap_flags, -1, 0);
+ assert(vrc == self->buffer);
+
+ self->page_size = MOCK_PAGE_SIZE;
+ self->bitmap_size = variant->buffer_size / self->page_size;
+
+ /* Provision with an extra (PAGE_SIZE) for the unaligned case */
+ size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE);
+ rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE);
+ assert(!rc);
+ assert(self->bitmap);
+ assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
+
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ /*
+ * For dirty testing it is important that the page size fed into
+ * the iommu page tables matches the size the dirty logic
+ * expects, or set_dirty can touch too much stuff.
+ */
+ cmd.object_id = self->ioas_id;
+ if (!variant->hugepages)
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_dirty_tracking)
+{
+ free(self->buffer);
+ free(self->bitmap);
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k)
+{
+ /* half of an u8 index bitmap */
+ .buffer_size = 8UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k)
+{
+ /* one u8 index bitmap */
+ .buffer_size = 16UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k)
+{
+ /* one u32 index bitmap */
+ .buffer_size = 64UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+{
+ /* one u64 index bitmap */
+ .buffer_size = 128UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k)
+{
+ /* two u64 index and trailing end bitmap */
+ .buffer_size = 320UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M)
+{
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge)
+{
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+{
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+{
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
+TEST_F(iommufd_dirty_tracking, enforce_dirty)
+{
+ uint32_t ioas_id, stddev_id, idev_id;
+ uint32_t hwpt_id, _hwpt_id;
+ uint32_t dev_flags;
+
+ /* Regular case */
+ dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY;
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id,
+ NULL);
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+
+ /* IOMMU device does not support dirty tracking */
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id,
+ &idev_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_ioctl_destroy(stddev_id);
+}
+
+TEST_F(iommufd_dirty_tracking, set_dirty_tracking)
+{
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+ test_cmd_set_dirty_tracking(hwpt_id, false);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, device_dirty_capability)
+{
+ uint32_t caps = 0;
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_get_hw_capabilities(self->idev_id, caps);
+ ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
+ caps & IOMMU_HW_CAP_DIRTY_TRACKING);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
+{
+ uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
+ uint32_t hwpt_id;
+
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap, self->bitmap_size, 0, _metadata);
+
+ /* PAGE_SIZE unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size, 0, _metadata);
+
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size, 0,
+ _metadata);
+
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
+{
+ uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
+ uint32_t hwpt_id;
+
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ /* Unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ test_ioctl_destroy(hwpt_id);
+}
+
+/* VFIO compatibility IOCTLs */
+
+TEST_F(iommufd, simple_ioctls)
+{
+ ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION));
+ ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU));
+}
+
+TEST_F(iommufd, unmap_cmd)
+{
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .iova = MOCK_APERTURE_START,
+ .size = PAGE_SIZE,
+ };
+
+ unmap_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.argsz = sizeof(unmap_cmd);
+ unmap_cmd.flags = 1 << 31;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.flags = 0;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+}
+
+TEST_F(iommufd, map_cmd)
+{
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .iova = MOCK_APERTURE_START,
+ .size = PAGE_SIZE,
+ .vaddr = (__u64)buffer,
+ };
+
+ map_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ map_cmd.argsz = sizeof(map_cmd);
+ map_cmd.flags = 1 << 31;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ /* Requires a domain to be attached */
+ map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+}
+
+TEST_F(iommufd, info_cmd)
+{
+ struct vfio_iommu_type1_info info_cmd = {};
+
+ /* Invalid argsz */
+ info_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+
+ info_cmd.argsz = sizeof(info_cmd);
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+}
+
+TEST_F(iommufd, set_iommu_cmd)
+{
+ /* Requires a domain to be attached */
+ EXPECT_ERRNO(ENODEV,
+ ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU));
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU));
+}
+
+TEST_F(iommufd, vfio_ioas)
+{
+ struct iommu_vfio_ioas vfio_ioas_cmd = {
+ .size = sizeof(vfio_ioas_cmd),
+ .op = IOMMU_VFIO_IOAS_GET,
+ };
+ __u32 ioas_id;
+
+ /* ENODEV if there is no compat ioas */
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Invalid id for set */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Valid id for set*/
+ test_ioctl_ioas_alloc(&ioas_id);
+ vfio_ioas_cmd.ioas_id = ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Same id comes back from get */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id);
+
+ /* Clear works */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+}
+
+FIXTURE(vfio_compat_mock_domain)
+{
+ int fd;
+ uint32_t ioas_id;
+};
+
+FIXTURE_VARIANT(vfio_compat_mock_domain)
+{
+ unsigned int version;
+};
+
+FIXTURE_SETUP(vfio_compat_mock_domain)
+{
+ struct iommu_vfio_ioas vfio_ioas_cmd = {
+ .size = sizeof(vfio_ioas_cmd),
+ .op = IOMMU_VFIO_IOAS_SET,
+ };
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ /* Create what VFIO would consider a group */
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+
+ /* Attach it to the vfio compat */
+ vfio_ioas_cmd.ioas_id = self->ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version));
+}
+
+FIXTURE_TEARDOWN(vfio_compat_mock_domain)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2)
+{
+ .version = VFIO_TYPE1v2_IOMMU,
+};
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0)
+{
+ .version = VFIO_TYPE1_IOMMU,
+};
+
+TEST_F(vfio_compat_mock_domain, simple_close)
+{
+}
+
+TEST_F(vfio_compat_mock_domain, option_huge_pages)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 3,
+ .object_id = self->ioas_id,
+ };
+
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ if (variant->version == VFIO_TYPE1_IOMMU) {
+ ASSERT_EQ(0, cmd.val64);
+ } else {
+ ASSERT_EQ(1, cmd.val64);
+ }
+}
+
+/*
+ * Execute an ioctl command stored in buffer and check that the result does not
+ * overflow memory.
+ */
+static bool is_filled(const void *buf, uint8_t c, size_t len)
+{
+ const uint8_t *cbuf = buf;
+
+ for (; len; cbuf++, len--)
+ if (*cbuf != c)
+ return false;
+ return true;
+}
+
+#define ioctl_check_buf(fd, cmd) \
+ ({ \
+ size_t _cmd_len = *(__u32 *)buffer; \
+ \
+ memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \
+ ASSERT_EQ(0, ioctl(fd, cmd, buffer)); \
+ ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA, \
+ BUFFER_SIZE - _cmd_len)); \
+ })
+
+static void check_vfio_info_cap_chain(struct __test_metadata *_metadata,
+ struct vfio_iommu_type1_info *info_cmd)
+{
+ const struct vfio_info_cap_header *cap;
+
+ ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap));
+ cap = buffer + info_cmd->cap_offset;
+ while (true) {
+ size_t cap_size;
+
+ if (cap->next)
+ cap_size = (buffer + cap->next) - (void *)cap;
+ else
+ cap_size = (buffer + info_cmd->argsz) - (void *)cap;
+
+ switch (cap->id) {
+ case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: {
+ struct vfio_iommu_type1_info_cap_iova_range *data =
+ (void *)cap;
+
+ ASSERT_EQ(1, data->header.version);
+ ASSERT_EQ(1, data->nr_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START,
+ data->iova_ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end);
+ break;
+ }
+ case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: {
+ struct vfio_iommu_type1_info_dma_avail *data =
+ (void *)cap;
+
+ ASSERT_EQ(1, data->header.version);
+ ASSERT_EQ(sizeof(*data), cap_size);
+ break;
+ }
+ default:
+ ASSERT_EQ(false, true);
+ break;
+ }
+ if (!cap->next)
+ break;
+
+ ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap));
+ ASSERT_GE(buffer + cap->next, (void *)cap);
+ cap = buffer + cap->next;
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, get_info)
+{
+ struct vfio_iommu_type1_info *info_cmd = buffer;
+ unsigned int i;
+ size_t caplen;
+
+ /* Pre-cap ABI */
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = offsetof(struct vfio_iommu_type1_info, cap_offset),
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_NE(0, info_cmd->iova_pgsizes);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+
+ /* Read the cap chain size */
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = sizeof(*info_cmd),
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_NE(0, info_cmd->iova_pgsizes);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+ ASSERT_EQ(0, info_cmd->cap_offset);
+ ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz);
+
+ /* Read the caps, kernel should never create a corrupted caps */
+ caplen = info_cmd->argsz;
+ for (i = sizeof(*info_cmd); i < caplen; i++) {
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = i,
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+ if (!info_cmd->cap_offset)
+ continue;
+ check_vfio_info_cap_chain(_metadata, info_cmd);
+ }
+}
+
+static void shuffle_array(unsigned long *array, size_t nelms)
+{
+ unsigned int i;
+
+ /* Shuffle */
+ for (i = 0; i != nelms; i++) {
+ unsigned long tmp = array[i];
+ unsigned int other = rand() % (nelms - i);
+
+ array[i] = array[other];
+ array[other] = tmp;
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, map)
+{
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .argsz = sizeof(map_cmd),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (uintptr_t)buffer,
+ .size = BUFFER_SIZE,
+ .iova = MOCK_APERTURE_START,
+ };
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .argsz = sizeof(unmap_cmd),
+ .size = BUFFER_SIZE,
+ .iova = MOCK_APERTURE_START,
+ };
+ unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE];
+ unsigned int i;
+
+ /* Simple map/unmap */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+ /* Unmap of empty is success */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ /* UNMAP_FLAG_ALL requires 0 iova/size */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.iova = 0;
+ unmap_cmd.size = 0;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+
+ /* Small pages */
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ map_cmd.iova = pages_iova[i] =
+ MOCK_APERTURE_START + i * PAGE_SIZE;
+ map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE;
+ map_cmd.size = PAGE_SIZE;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ }
+ shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+ unmap_cmd.flags = 0;
+ unmap_cmd.size = PAGE_SIZE;
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ unmap_cmd.iova = pages_iova[i];
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, huge_map)
+{
+ size_t buf_size = HUGEPAGE_SIZE * 2;
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .argsz = sizeof(map_cmd),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .size = buf_size,
+ .iova = MOCK_APERTURE_START,
+ };
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .argsz = sizeof(unmap_cmd),
+ };
+ unsigned long pages_iova[16];
+ unsigned int i;
+ void *buf;
+
+ /* Test huge pages and splitting */
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ map_cmd.vaddr = (uintptr_t)buf;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++)
+ pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size);
+ shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+ /* type1 mode can cut up larger mappings, type1v2 always fails */
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ unmap_cmd.iova = pages_iova[i];
+ unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+ if (variant->version == VFIO_TYPE1_IOMMU) {
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+ &unmap_cmd));
+ } else {
+ EXPECT_ERRNO(ENOENT,
+ ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+ &unmap_cmd));
+ }
+ }
+}
+
+FIXTURE(iommufd_viommu)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t stdev_id;
+ uint32_t hwpt_id;
+ uint32_t nested_hwpt_id;
+ uint32_t device_id;
+ uint32_t viommu_id;
+};
+
+FIXTURE_VARIANT(iommufd_viommu)
+{
+ unsigned int viommu;
+};
+
+FIXTURE_SETUP(iommufd_viommu)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ test_ioctl_set_default_memory_limit();
+
+ if (variant->viommu) {
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id, NULL,
+ &self->device_id);
+
+ /* Allocate a nesting parent hwpt */
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &self->hwpt_id);
+
+ /* Allocate a vIOMMU taking refcount of the parent hwpt */
+ test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ &self->viommu_id);
+
+ /* Allocate a regular nested hwpt */
+ test_cmd_hwpt_alloc_nested(self->device_id, self->viommu_id, 0,
+ &self->nested_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ }
+}
+
+FIXTURE_TEARDOWN(iommufd_viommu)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_viommu, no_viommu)
+{
+ .viommu = 0,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_viommu, mock_viommu)
+{
+ .viommu = 1,
+};
+
+TEST_F(iommufd_viommu, viommu_auto_destroy)
+{
+}
+
+TEST_F(iommufd_viommu, viommu_negative_tests)
+{
+ uint32_t device_id = self->device_id;
+ uint32_t ioas_id = self->ioas_id;
+ uint32_t hwpt_id;
+
+ if (self->device_id) {
+ /* Negative test -- invalid hwpt (hwpt_id=0) */
+ test_err_viommu_alloc(ENOENT, device_id, 0,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
+
+ /* Negative test -- not a nesting parent hwpt */
+ test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id);
+ test_err_viommu_alloc(EINVAL, device_id, hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
+ test_ioctl_destroy(hwpt_id);
+
+ /* Negative test -- unsupported viommu type */
+ test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id,
+ 0xdead, NULL, 0, NULL);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->hwpt_id));
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->viommu_id));
+ } else {
+ test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
+ }
+}
+
+TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
+{
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t iopf_hwpt_id;
+ uint32_t fault_id;
+ uint32_t fault_fd;
+ uint32_t vdev_id;
+
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | (1 << 31),
+ &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id);
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+ test_cmd_trigger_iopf(dev_id, fault_fd);
+
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+}
+
+TEST_F(iommufd_viommu, viommu_alloc_with_data)
+{
+ struct iommu_viommu_selftest data = {
+ .in_data = 0xbeef,
+ };
+ uint32_t *test;
+
+ if (!self->device_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data),
+ &self->viommu_id);
+ ASSERT_EQ(data.out_data, data.in_data);
+
+ /* Negative mmap tests -- offset and length cannot be changed */
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE);
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE * 2);
+ test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset);
+ test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset);
+
+ /* Now do a correct mmap for a loopback test */
+ test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->fd, data.out_mmap_offset);
+ ASSERT_NE(MAP_FAILED, test);
+ ASSERT_EQ(data.in_data, *test);
+
+ /* The owner of the mmap region should be blocked */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id));
+ munmap(test, data.out_mmap_length);
+}
+
+TEST_F(iommufd_viommu, vdevice_alloc)
+{
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t vdev_id = 0;
+ uint32_t veventq_id;
+ uint32_t veventq_fd;
+ int prev_seq = -1;
+
+ if (dev_id) {
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ self->nested_hwpt_id);
+
+ /* Allocate a vEVENTQ with veventq_depth=2 */
+ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
+ &veventq_id, &veventq_fd);
+ test_err_veventq_alloc(EEXIST, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL);
+ /* Set vdev_id to 0x99, unset it, and set to 0x88 */
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq);
+ test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99,
+ &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(vdev_id);
+
+ /* Try again with 0x88 */
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ /* Trigger an overflow with three events */
+ test_cmd_trigger_vevents(dev_id, 3);
+ test_err_read_vevents(EOVERFLOW, veventq_fd, 3, 0x88,
+ &prev_seq);
+ /* Overflow must be gone after the previous reads */
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x88, &prev_seq);
+ close(veventq_fd);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(vdev_id);
+ test_ioctl_destroy(veventq_id);
+ } else {
+ test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL);
+ }
+}
+
+TEST_F(iommufd_viommu, vdevice_cache)
+{
+ struct iommu_viommu_invalidate_selftest inv_reqs[2] = {};
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t vdev_id = 0;
+ uint32_t num_inv;
+
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+
+ test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Check data_type by passing zero-length array */
+ num_inv = 0;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: Invalid data_type */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: structure size sanity */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs) + 1, &num_inv);
+ assert(!num_inv);
+
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid flag is passed */
+ num_inv = 1;
+ inv_reqs[0].flags = 0xffffffff;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid data_uptr when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid entry_len when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid cache_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid vdev_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x9;
+ inv_reqs[0].cache_id = 0;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid flags configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0xffffffff;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 1;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid cache_id configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 2nd cache entry and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 1;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, 0);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 3rd and 4th cache entries and verify */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 2;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 3;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 2);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+
+ /* Invalidate all cache entries for nested_dev_id[1] and verify */
+ num_inv = 1;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+ test_ioctl_destroy(vdev_id);
+}
+
+TEST_F(iommufd_viommu, hw_queue)
+{
+ __u64 iova = MOCK_APERTURE_START, iova2;
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t hw_queue_id[2];
+
+ if (!viommu_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ /* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */
+ test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+ /* Fail queue addr and length */
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, 0, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EOVERFLOW, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0,
+ PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail missing iova */
+ test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+
+ /* Map iova */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2);
+
+ /* Fail index=1 and =MAX; must start from index=0 */
+ test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+
+ /* Allocate index=0, declare ownership of the iova */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail duplicated index */
+ test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail unmap, due to iova ownership */
+ test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE);
+ /* The 2nd page is not pinned, so it can be unmmap */
+ test_ioctl_ioas_unmap(iova2, PAGE_SIZE);
+
+ /* Allocate index=1, with an unaligned case */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova + PAGE_SIZE / 2, PAGE_SIZE / 2,
+ &hw_queue_id[1]);
+ /* Fail to destroy, due to dependency */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0]));
+
+ /* Destroy in descending order */
+ test_ioctl_destroy(hw_queue_id[1]);
+ test_ioctl_destroy(hw_queue_id[0]);
+ /* Now it can unmap the first page */
+ test_ioctl_ioas_unmap(iova, PAGE_SIZE);
+}
+
+TEST_F(iommufd_viommu, vdevice_tombstone)
+{
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t vdev_id = 0;
+
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_ioctl_destroy(self->stdev_id);
+ EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id));
+}
+
+FIXTURE(iommufd_device_pasid)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t device_id;
+ uint32_t no_pasid_stdev_id;
+ uint32_t no_pasid_device_id;
+};
+
+FIXTURE_VARIANT(iommufd_device_pasid)
+{
+ bool pasid_capable;
+};
+
+FIXTURE_SETUP(iommufd_device_pasid)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, &self->hwpt_id,
+ &self->device_id);
+ if (!variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id, 0,
+ &self->no_pasid_stdev_id, NULL,
+ &self->no_pasid_device_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_device_pasid)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, no_pasid)
+{
+ .pasid_capable = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, has_pasid)
+{
+ .pasid_capable = true,
+};
+
+TEST_F(iommufd_device_pasid, pasid_attach)
+{
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ uint32_t nested_hwpt_id[3] = {};
+ uint32_t parent_hwpt_id = 0;
+ uint32_t fault_id, fault_fd;
+ uint32_t s2_hwpt_id = 0;
+ uint32_t iopf_hwpt_id;
+ uint32_t pasid = 100;
+ uint32_t viommu_id;
+
+ /*
+ * Negative, detach pasid without attaching, this is not expected.
+ * But it should not result in failure anyway.
+ */
+ test_cmd_pasid_detach(pasid);
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+
+ /* Fault related preparation */
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID,
+ &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate a regular nested hwpt based on viommu */
+ test_cmd_viommu_alloc(self->device_id, parent_hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, viommu_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[2],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &s2_hwpt_id);
+
+ /* Attach RID to non-pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ /* then attach to pasid should fail */
+ test_err_pasid_attach(EINVAL, pasid, s2_hwpt_id);
+
+ /* Attach RID to pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, s2_hwpt_id);
+ /* then attach to pasid should succeed, */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ /* but attach RID to non-pasid compat domain should fail now. */
+ test_err_mock_domain_replace(EINVAL, self->stdev_id, parent_hwpt_id);
+ /*
+ * Detach hwpt from pasid 100, and check if the pasid 100
+ * has null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+ /* RID is attached to pasid-comapt domain, pasid path is not used */
+
+ if (!variant->pasid_capable) {
+ /*
+ * PASID-compatible domain can be used by non-PASID-capable
+ * device.
+ */
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, nested_hwpt_id[0]);
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, self->ioas_id);
+ /*
+ * Attach hwpt to pasid 100 of non-PASID-capable device,
+ * should fail, no matter domain is pasid-comapt or not.
+ */
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, parent_hwpt_id));
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, s2_hwpt_id));
+ }
+
+ /*
+ * Attach non pasid compat hwpt to pasid-capable device, should
+ * fail, and have null domain.
+ */
+ test_err_pasid_attach(EINVAL, pasid, parent_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach ioas to pasid 100, should fail, domain should
+ * be null.
+ */
+ test_err_pasid_attach(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach the s2_hwpt to pasid 100, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Try attach pasid 100 with another hwpt, should FAIL
+ * as attach does not allow overwrite, use REPLACE instead.
+ */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /*
+ * Detach hwpt from pasid 100 for next test, should succeed,
+ * and have null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach nested hwpt to pasid 100, should succeed, domain
+ * should be valid.
+ */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /* Attach to pasid 100 which has been attached, should fail. */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /* cleanup pasid 100 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Replace tests */
+
+ pasid = 200;
+ /*
+ * Replace pasid 200 without attaching it, should fail
+ * with -EINVAL.
+ */
+ test_err_pasid_replace(EINVAL, pasid, s2_hwpt_id);
+
+ /*
+ * Attach the s2 hwpt to pasid 200, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 200 with self->ioas_id, should fail
+ * and domain should be the prior s2 hwpt.
+ */
+ test_err_pasid_replace(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace a nested hwpt for pasid 200, should succeed,
+ * and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /*
+ * Replace with another nested hwpt for pasid 200, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[1]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[1]));
+
+ /* cleanup pasid 200 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Negative Tests for pasid replace, use pasid 1024 */
+
+ /*
+ * Attach the s2 hwpt to pasid 1024, should succeed, domain should
+ * be valid.
+ */
+ pasid = 1024;
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 1024 with nested_hwpt_id[0], should fail,
+ * but have the old valid domain. This is a designed
+ * negative case. Normally, this shall succeed.
+ */
+ test_err_pasid_replace(ENOMEM, pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 1024 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Attach to iopf-capable hwpt */
+
+ /*
+ * Attach an iopf hwpt to pasid 2048, should succeed, domain should
+ * be valid.
+ */
+ pasid = 2048;
+ test_cmd_pasid_attach(pasid, iopf_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, iopf_hwpt_id));
+
+ test_cmd_trigger_iopf_pasid(self->device_id, pasid, fault_fd);
+
+ /*
+ * Replace with s2_hwpt_id for pasid 2048, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 2048 */
+ test_cmd_pasid_detach(pasid);
+
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+
+ /* Detach the s2_hwpt_id from RID */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
new file mode 100644
index 000000000000..45c14323a618
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ *
+ * These tests are "kernel integrity" tests. They are looking for kernel
+ * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging
+ * features. It does not attempt to verify that the system calls are doing what
+ * they are supposed to do.
+ *
+ * The basic philosophy is to run a sequence of calls that will succeed and then
+ * sweep every failure injection point on that call chain to look for
+ * interesting things in error handling.
+ *
+ * This test is best run with:
+ * echo 1 > /proc/sys/kernel/panic_on_warn
+ * If something is actually going wrong.
+ */
+#include <fcntl.h>
+#include <dirent.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static bool have_fault_injection;
+
+static int writeat(int dfd, const char *fn, const char *val)
+{
+ size_t val_len = strlen(val);
+ ssize_t res;
+ int fd;
+
+ fd = openat(dfd, fn, O_WRONLY);
+ if (fd == -1)
+ return -1;
+ res = write(fd, val, val_len);
+ assert(res == val_len);
+ close(fd);
+ return 0;
+}
+
+static __attribute__((constructor)) void setup_buffer(void)
+{
+ PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+
+ BUFFER_SIZE = 2*1024*1024;
+
+ buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ &mfd);
+}
+
+/*
+ * This sets up fail_injection in a way that is useful for this test.
+ * It does not attempt to restore things back to how they were.
+ */
+static __attribute__((constructor)) void setup_fault_injection(void)
+{
+ DIR *debugfs = opendir("/sys/kernel/debug/");
+ struct dirent *dent;
+
+ if (!debugfs)
+ return;
+
+ /* Allow any allocation call to be fault injected */
+ if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N"))
+ return;
+ writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N");
+ writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N");
+
+ while ((dent = readdir(debugfs))) {
+ char fn[300];
+
+ if (strncmp(dent->d_name, "fail", 4) != 0)
+ continue;
+
+ /* We are looking for kernel splats, quiet down the log */
+ snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name);
+ writeat(dirfd(debugfs), fn, "0");
+ }
+ closedir(debugfs);
+ have_fault_injection = true;
+}
+
+struct fail_nth_state {
+ int proc_fd;
+ unsigned int iteration;
+};
+
+static void fail_nth_first(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state)
+{
+ char buf[300];
+
+ snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid());
+ nth_state->proc_fd = open(buf, O_RDWR);
+ ASSERT_NE(-1, nth_state->proc_fd);
+}
+
+static bool fail_nth_next(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state,
+ int test_result)
+{
+ static const char disable_nth[] = "0";
+ char buf[300];
+
+ /*
+ * This is just an arbitrary limit based on the current kernel
+ * situation. Changes in the kernel can dramatically change the number of
+ * required fault injection sites, so if this hits it doesn't
+ * necessarily mean a test failure, just that the limit has to be made
+ * bigger.
+ */
+ ASSERT_GT(1000, nth_state->iteration);
+ if (nth_state->iteration != 0) {
+ ssize_t res;
+ ssize_t res2;
+
+ buf[0] = 0;
+ /*
+ * Annoyingly disabling the nth can also fail. This means
+ * the test passed without triggering failure
+ */
+ res = pread(nth_state->proc_fd, buf, sizeof(buf), 0);
+ if (res == -1 && errno == EFAULT) {
+ buf[0] = '1';
+ buf[1] = '\n';
+ res = 2;
+ }
+
+ res2 = pwrite(nth_state->proc_fd, disable_nth,
+ ARRAY_SIZE(disable_nth) - 1, 0);
+ if (res2 == -1 && errno == EFAULT) {
+ res2 = pwrite(nth_state->proc_fd, disable_nth,
+ ARRAY_SIZE(disable_nth) - 1, 0);
+ buf[0] = '1';
+ buf[1] = '\n';
+ }
+ ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2);
+
+ /* printf(" nth %u result=%d nth=%u\n", nth_state->iteration,
+ test_result, atoi(buf)); */
+ fflush(stdout);
+ ASSERT_LT(1, res);
+ if (res != 2 || buf[0] != '0' || buf[1] != '\n')
+ return false;
+ } else {
+ /* printf(" nth %u result=%d\n", nth_state->iteration,
+ test_result); */
+ }
+ nth_state->iteration++;
+ return true;
+}
+
+/*
+ * This is called during the test to start failure injection. It allows the test
+ * to do some setup that has already been swept and thus reduce the required
+ * iterations.
+ */
+void __fail_nth_enable(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state)
+{
+ char buf[300];
+ size_t len;
+
+ if (!nth_state->iteration)
+ return;
+
+ len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration);
+ ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0));
+}
+#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state)
+
+#define TEST_FAIL_NTH(fixture_name, name) \
+ static int test_nth_##name(struct __test_metadata *_metadata, \
+ FIXTURE_DATA(fixture_name) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ *variant, \
+ struct fail_nth_state *_nth_state); \
+ TEST_F(fixture_name, name) \
+ { \
+ struct fail_nth_state nth_state = {}; \
+ int test_result = 0; \
+ \
+ if (!have_fault_injection) \
+ SKIP(return, \
+ "fault injection is not enabled in the kernel"); \
+ fail_nth_first(_metadata, &nth_state); \
+ ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \
+ &nth_state)); \
+ while (fail_nth_next(_metadata, &nth_state, test_result)) { \
+ fixture_name##_teardown(_metadata, self, variant); \
+ fixture_name##_setup(_metadata, self, variant); \
+ test_result = test_nth_##name(_metadata, self, \
+ variant, &nth_state); \
+ }; \
+ ASSERT_EQ(0, test_result); \
+ } \
+ static int test_nth_##name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \
+ *variant, \
+ struct fail_nth_state *_nth_state)
+
+FIXTURE(basic_fail_nth)
+{
+ int fd;
+ uint32_t access_id;
+ uint32_t stdev_id;
+ uint32_t pasid;
+};
+
+FIXTURE_SETUP(basic_fail_nth)
+{
+ self->fd = -1;
+ self->access_id = 0;
+ self->stdev_id = 0;
+ self->pasid = 0; //test should use a non-zero value
+}
+
+FIXTURE_TEARDOWN(basic_fail_nth)
+{
+ int rc;
+
+ if (self->access_id) {
+ /* The access FD holds the iommufd open until it closes */
+ rc = _test_cmd_destroy_access(self->access_id);
+ assert(rc == 0);
+ }
+ if (self->pasid && self->stdev_id)
+ _test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid);
+ teardown_iommufd(self->fd, _metadata);
+}
+
+/* Cover ioas.c */
+TEST_FAIL_NTH(basic_fail_nth, basic)
+{
+ struct iommu_iova_range ranges[10];
+ uint32_t ioas_id;
+ __u64 iova;
+
+ fail_nth_enable();
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ {
+ struct iommu_ioas_iova_ranges ranges_cmd = {
+ .size = sizeof(ranges_cmd),
+ .num_iovas = ARRAY_SIZE(ranges),
+ .ioas_id = ioas_id,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+ if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd))
+ return -1;
+ }
+
+ {
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ ranges[0].start = 16*1024;
+ ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1;
+ if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd))
+ return -1;
+ }
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ {
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE,
+ .dst_ioas_id = ioas_id,
+ .src_ioas_id = ioas_id,
+ .src_iova = iova,
+ .length = sizeof(ranges),
+ };
+
+ if (ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd))
+ return -1;
+ }
+
+ if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE,
+ NULL))
+ return -1;
+ /* Failure path of no IOVA to unmap */
+ _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL);
+ return 0;
+}
+
+/* iopt_area_fill_domains() and iopt_area_fill_domain() */
+TEST_FAIL_NTH(basic_fail_nth, map_domain)
+{
+ uint32_t ioas_id;
+ __u32 stdev_id;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+ return 0;
+}
+
+/* iopt_area_fill_domains() and iopt_area_fill_domain() */
+TEST_FAIL_NTH(basic_fail_nth, map_file_domain)
+{
+ uint32_t ioas_id;
+ __u32 stdev_id;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map_file(self->fd, ioas_id, mfd, 0, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+ return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, map_two_domains)
+{
+ uint32_t ioas_id;
+ __u32 stdev_id2;
+ __u32 stdev_id;
+ __u32 hwpt_id2;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2,
+ NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id2))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2,
+ NULL))
+ return -1;
+ return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, access_rw)
+{
+ uint64_t tmp_big[4096];
+ uint32_t ioas_id;
+ uint16_t tmp[32];
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0))
+ return -1;
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = self->access_id,
+ .access_rw = { .iova = iova,
+ .length = sizeof(tmp),
+ .uptr = (uintptr_t)tmp },
+ };
+
+ // READ
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH |
+ MOCK_ACCESS_RW_WRITE;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ }
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = self->access_id,
+ .access_rw = { .iova = iova,
+ .flags = MOCK_ACCESS_RW_SLOW_PATH,
+ .length = sizeof(tmp_big),
+ .uptr = (uintptr_t)tmp_big },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ }
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+ return 0;
+}
+
+/* pages.c access functions */
+TEST_FAIL_NTH(basic_fail_nth, access_pin)
+{
+ uint32_t access_pages_id;
+ uint32_t ioas_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+ return -1;
+
+ fail_nth_enable();
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .id = self->access_id,
+ .access_pages = { .iova = iova,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ }
+
+ if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+ access_pages_id))
+ return -1;
+
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+ return 0;
+}
+
+/* iopt_pages_fill_xarray() */
+TEST_FAIL_NTH(basic_fail_nth, access_pin_domain)
+{
+ uint32_t access_pages_id;
+ uint32_t ioas_id;
+ __u32 stdev_id;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+ return -1;
+
+ fail_nth_enable();
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .id = self->access_id,
+ .access_pages = { .iova = iova,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ }
+
+ if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+ access_pages_id))
+ return -1;
+
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+ return 0;
+}
+
+/* device.c */
+TEST_FAIL_NTH(basic_fail_nth, device)
+{
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ struct iommu_test_hw_info info;
+ uint32_t fault_id, fault_fd;
+ uint32_t veventq_id, veventq_fd;
+ uint32_t fault_hwpt_id;
+ uint32_t test_hwpt_id;
+ uint32_t ioas_id;
+ uint32_t ioas_id2;
+ uint32_t idev_id;
+ uint32_t hwpt_id;
+ uint32_t viommu_id;
+ uint32_t hw_queue_id;
+ uint32_t vdev_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id2))
+ return -1;
+
+ iova = MOCK_APERTURE_START;
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, PAGE_SIZE, &iova,
+ IOMMU_IOAS_MAP_FIXED_IOVA |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+ if (_test_ioctl_ioas_map(self->fd, ioas_id2, buffer, PAGE_SIZE, &iova,
+ IOMMU_IOAS_MAP_FIXED_IOVA |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain_flags(self->fd, ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, NULL, &idev_id))
+ return -1;
+
+ if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT,
+ &info, sizeof(info), NULL, NULL))
+ return -1;
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID, &hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, ioas_id2, NULL))
+ return -1;
+
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, hwpt_id, NULL))
+ return -1;
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID,
+ &hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ &viommu_id))
+ return -1;
+
+ if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id))
+ return -1;
+
+ if (_test_cmd_hw_queue_alloc(self->fd, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova,
+ PAGE_SIZE, &hw_queue_id))
+ return -1;
+
+ if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd))
+ return -1;
+ close(fault_fd);
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &fault_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)))
+ return -1;
+
+ if (_test_cmd_veventq_alloc(self->fd, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id,
+ &veventq_fd))
+ return -1;
+ close(veventq_fd);
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID,
+ &test_hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ /* Tests for pasid attach/replace/detach */
+
+ self->pasid = 200;
+
+ if (_test_cmd_pasid_attach(self->fd, self->stdev_id,
+ self->pasid, hwpt_id)) {
+ self->pasid = 0;
+ return -1;
+ }
+
+ if (_test_cmd_pasid_replace(self->fd, self->stdev_id,
+ self->pasid, test_hwpt_id))
+ return -1;
+
+ if (_test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid))
+ return -1;
+
+ self->pasid = 0;
+
+ return 0;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
new file mode 100644
index 000000000000..5502751d500c
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -0,0 +1,1259 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#ifndef __SELFTEST_IOMMUFD_UTILS
+#define __SELFTEST_IOMMUFD_UTILS
+
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/fcntl.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+#include <assert.h>
+#include <poll.h>
+
+#include "kselftest_harness.h"
+#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
+
+/* Hack to make assertions more readable */
+#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
+
+/* Imported from include/asm-generic/bitops/generic-non-atomic.h */
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG __BITS_PER_LONG
+#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+
+enum {
+ IOPT_PAGES_ACCOUNT_NONE = 0,
+ IOPT_PAGES_ACCOUNT_USER = 1,
+ IOPT_PAGES_ACCOUNT_MM = 2,
+};
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+static inline void set_bit(unsigned int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline bool test_bit(unsigned int nr, unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)));
+}
+
+static void *buffer;
+static unsigned long BUFFER_SIZE;
+
+static void *mfd_buffer;
+static int mfd;
+
+static unsigned long PAGE_SIZE;
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
+#define test_err_mmap(_errno, length, offset) \
+ EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \
+ MAP_SHARED, self->fd, offset))
+
+static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
+{
+ int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
+ int mfd = memfd_create("buffer", mfd_flags);
+ void *buf = MAP_FAILED;
+
+ if (mfd <= 0)
+ return MAP_FAILED;
+ if (ftruncate(mfd, length))
+ goto out;
+ *mfd_p = mfd;
+ buf = mmap(0, length, prot, flags, mfd, 0);
+out:
+ if (buf == MAP_FAILED)
+ close(mfd);
+ return buf;
+}
+
+/*
+ * Have the kernel check the refcount on pages. I don't know why a freshly
+ * mmap'd anon non-compound page starts out with a ref of 3
+ */
+#define check_refs(_ptr, _length, _refs) \
+ ({ \
+ struct iommu_test_cmd test_cmd = { \
+ .size = sizeof(test_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_REFS, \
+ .check_refs = { .length = _length, \
+ .uptr = (uintptr_t)(_ptr), \
+ .refs = _refs }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \
+ &test_cmd)); \
+ })
+
+static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id,
+ __u32 *hwpt_id, __u32 *idev_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN,
+ .id = ioas_id,
+ .mock_domain = {},
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (stdev_id)
+ *stdev_id = cmd.mock_domain.out_stdev_id;
+ assert(cmd.id != 0);
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain.out_hwpt_id;
+ if (idev_id)
+ *idev_id = cmd.mock_domain.out_idev_id;
+ return 0;
+}
+#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id, idev_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, \
+ hwpt_id, idev_id))
+#define test_err_mock_domain(_errno, ioas_id, stdev_id, hwpt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
+ stdev_id, hwpt_id, NULL))
+
+static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id,
+ __u32 stdev_flags, __u32 *stdev_id,
+ __u32 *hwpt_id, __u32 *idev_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
+ .id = ioas_id,
+ .mock_domain_flags = { .dev_flags = stdev_flags },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (stdev_id)
+ *stdev_id = cmd.mock_domain_flags.out_stdev_id;
+ assert(cmd.id != 0);
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain_flags.out_hwpt_id;
+ if (idev_id)
+ *idev_id = cmd.mock_domain_flags.out_idev_id;
+ return 0;
+}
+#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, idev_id))
+#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, NULL))
+
+static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
+ __u32 *hwpt_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE,
+ .id = stdev_id,
+ .mock_domain_replace = {
+ .pt_id = pt_id,
+ },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain_replace.pt_id;
+ return 0;
+}
+
+#define test_cmd_mock_domain_replace(stdev_id, pt_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain_replace(self->fd, stdev_id, pt_id, \
+ NULL))
+#define test_err_mock_domain_replace(_errno, stdev_id, pt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \
+ pt_id, NULL))
+
+static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id,
+ __u32 flags, __u32 *hwpt_id, __u32 data_type,
+ void *data, size_t data_len)
+{
+ struct iommu_hwpt_alloc cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .dev_id = device_id,
+ .pt_id = pt_id,
+ .data_type = data_type,
+ .data_len = data_len,
+ .data_uptr = (uint64_t)data,
+ .fault_id = ft_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (hwpt_id)
+ *hwpt_id = cmd.out_hwpt_id;
+ return 0;
+}
+
+#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
+ 0))
+#define test_cmd_hwpt_alloc_iommupt(device_id, pt_id, flags, iommupt_type, \
+ hwpt_id) \
+ ({ \
+ struct iommu_hwpt_selftest user_cfg = { \
+ .pagetable_type = iommupt_type \
+ }; \
+ \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_SELFTEST, \
+ &user_cfg, sizeof(user_cfg))); \
+ })
+#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
+
+#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, data_type, data, data_len))
+#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, data_type, data, data_len))
+
+#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
+ flags, hwpt_id, data_type, data, \
+ data_len))
+#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \
+ hwpt_id, data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
+ flags, hwpt_id, data_type, data, \
+ data_len))
+
+#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \
+ ({ \
+ struct iommu_test_cmd test_cmd = { \
+ .size = sizeof(test_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_IOTLB, \
+ .id = hwpt_id, \
+ .check_iotlb = { \
+ .id = iotlb_id, \
+ .iotlb = expected, \
+ }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_IOTLB), \
+ &test_cmd)); \
+ })
+
+#define test_cmd_hwpt_check_iotlb_all(hwpt_id, expected) \
+ ({ \
+ int i; \
+ for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++) \
+ test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \
+ })
+
+#define test_cmd_dev_check_cache(device_id, cache_id, expected) \
+ ({ \
+ struct iommu_test_cmd test_cmd = { \
+ .size = sizeof(test_cmd), \
+ .op = IOMMU_TEST_OP_DEV_CHECK_CACHE, \
+ .id = device_id, \
+ .check_dev_cache = { \
+ .id = cache_id, \
+ .cache = expected, \
+ }, \
+ }; \
+ ASSERT_EQ(0, ioctl(self->fd, \
+ _IOMMU_TEST_CMD( \
+ IOMMU_TEST_OP_DEV_CHECK_CACHE), \
+ &test_cmd)); \
+ })
+
+#define test_cmd_dev_check_cache_all(device_id, expected) \
+ ({ \
+ int c; \
+ for (c = 0; c < MOCK_DEV_CACHE_NUM; c++) \
+ test_cmd_dev_check_cache(device_id, c, expected); \
+ })
+
+static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs,
+ uint32_t data_type, uint32_t lreq,
+ uint32_t *nreqs)
+{
+ struct iommu_hwpt_invalidate cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = hwpt_id,
+ .data_type = data_type,
+ .data_uptr = (uint64_t)reqs,
+ .entry_len = lreq,
+ .entry_num = *nreqs,
+ };
+ int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd);
+ *nreqs = cmd.entry_num;
+ return rc;
+}
+
+#define test_cmd_hwpt_invalidate(hwpt_id, reqs, data_type, lreq, nreqs) \
+ ({ \
+ ASSERT_EQ(0, \
+ _test_cmd_hwpt_invalidate(self->fd, hwpt_id, reqs, \
+ data_type, lreq, nreqs)); \
+ })
+#define test_err_hwpt_invalidate(_errno, hwpt_id, reqs, data_type, lreq, \
+ nreqs) \
+ ({ \
+ EXPECT_ERRNO(_errno, _test_cmd_hwpt_invalidate( \
+ self->fd, hwpt_id, reqs, \
+ data_type, lreq, nreqs)); \
+ })
+
+static int _test_cmd_viommu_invalidate(int fd, __u32 viommu_id, void *reqs,
+ uint32_t data_type, uint32_t lreq,
+ uint32_t *nreqs)
+{
+ struct iommu_hwpt_invalidate cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = viommu_id,
+ .data_type = data_type,
+ .data_uptr = (uint64_t)reqs,
+ .entry_len = lreq,
+ .entry_num = *nreqs,
+ };
+ int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd);
+ *nreqs = cmd.entry_num;
+ return rc;
+}
+
+#define test_cmd_viommu_invalidate(viommu, reqs, lreq, nreqs) \
+ ({ \
+ ASSERT_EQ(0, \
+ _test_cmd_viommu_invalidate(self->fd, viommu, reqs, \
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, \
+ lreq, nreqs)); \
+ })
+#define test_err_viommu_invalidate(_errno, viommu_id, reqs, data_type, lreq, \
+ nreqs) \
+ ({ \
+ EXPECT_ERRNO(_errno, _test_cmd_viommu_invalidate( \
+ self->fd, viommu_id, reqs, \
+ data_type, lreq, nreqs)); \
+ })
+
+static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
+ unsigned int ioas_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_ACCESS_REPLACE_IOAS,
+ .id = access_id,
+ .access_replace_ioas = { .ioas_id = ioas_id },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ return 0;
+}
+#define test_cmd_access_replace_ioas(access_id, ioas_id) \
+ ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id))
+
+static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled)
+{
+ struct iommu_hwpt_set_dirty_tracking cmd = {
+ .size = sizeof(cmd),
+ .flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+ .hwpt_id = hwpt_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd);
+ if (ret)
+ return -errno;
+ return 0;
+}
+#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \
+ ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled))
+
+static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u32 flags)
+{
+ struct iommu_hwpt_get_dirty_bitmap cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = hwpt_id,
+ .flags = flags,
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .data = (uintptr_t)bitmap,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, \
+ bitmap, flags) \
+ ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \
+ page_size, bitmap, flags))
+
+static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u64 *dirty)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DIRTY,
+ .id = hwpt_id,
+ .dirty = {
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .uptr = (uintptr_t)bitmap,
+ }
+ };
+ int ret;
+
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd);
+ if (ret)
+ return -ret;
+ if (dirty)
+ *dirty = cmd.dirty.out_nr_dirty;
+ return 0;
+}
+
+#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \
+ bitmap, nr) \
+ ASSERT_EQ(0, \
+ _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \
+ page_size, bitmap, nr))
+
+static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ size_t pte_page_size, __u64 *bitmap,
+ __u64 nbits, __u32 flags,
+ struct __test_metadata *_metadata)
+{
+ unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
+ unsigned long j, i, nr = nbits / pteset ?: 1;
+ unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
+ __u64 out_dirty = 0;
+
+ /* Mark all even bits as dirty in the mock domain */
+ memset(bitmap, 0, bitmap_size);
+ for (i = 0; i < nbits; i += pteset)
+ set_bit(i, (unsigned long *)bitmap);
+
+ test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
+ bitmap, &out_dirty);
+ ASSERT_EQ(nr, out_dirty);
+
+ /* Expect all even bits as dirty in the user bitmap */
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+ /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(j < npte,
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
+ ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap));
+ }
+
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+
+ /* It as read already -- expect all zeroes */
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(
+ (j < npte) &&
+ (flags &
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
+ }
+
+ return 0;
+}
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\
+ bitmap, bitmap_size, flags, _metadata) \
+ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
+ page_size, pte_size, bitmap, \
+ bitmap_size, flags, _metadata))
+
+static int _test_cmd_create_access(int fd, unsigned int ioas_id,
+ __u32 *access_id, unsigned int flags)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_CREATE_ACCESS,
+ .id = ioas_id,
+ .create_access = { .flags = flags },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ *access_id = cmd.create_access.out_access_fd;
+ return 0;
+}
+#define test_cmd_create_access(ioas_id, access_id, flags) \
+ ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \
+ flags))
+
+static int _test_cmd_destroy_access(unsigned int access_id)
+{
+ return close(access_id);
+}
+#define test_cmd_destroy_access(access_id) \
+ ASSERT_EQ(0, _test_cmd_destroy_access(access_id))
+
+static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
+ unsigned int access_pages_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
+ .id = access_id,
+ .destroy_access_pages = { .access_pages_id = access_pages_id },
+ };
+ return ioctl(fd, IOMMU_TEST_CMD, &cmd);
+}
+#define test_cmd_destroy_access_pages(access_id, access_pages_id) \
+ ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \
+ access_pages_id))
+#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \
+ self->fd, access_id, access_pages_id))
+
+static int _test_cmd_get_dmabuf(int fd, size_t len, int *out_fd)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_GET,
+ .dmabuf_get = { .length = len, .open_flags = O_CLOEXEC },
+ };
+
+ *out_fd = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (*out_fd < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_get_dmabuf(len, out_fd) \
+ ASSERT_EQ(0, _test_cmd_get_dmabuf(self->fd, len, out_fd))
+
+static int _test_cmd_revoke_dmabuf(int fd, int dmabuf_fd, bool revoked)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_REVOKE,
+ .dmabuf_revoke = { .dmabuf_fd = dmabuf_fd, .revoked = revoked },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_revoke_dmabuf(dmabuf_fd, revoke) \
+ ASSERT_EQ(0, _test_cmd_revoke_dmabuf(self->fd, dmabuf_fd, revoke))
+
+static int _test_ioctl_destroy(int fd, unsigned int id)
+{
+ struct iommu_destroy cmd = {
+ .size = sizeof(cmd),
+ .id = id,
+ };
+ return ioctl(fd, IOMMU_DESTROY, &cmd);
+}
+#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id))
+
+static int _test_ioctl_ioas_alloc(int fd, __u32 *id)
+{
+ struct iommu_ioas_alloc cmd = {
+ .size = sizeof(cmd),
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ *id = cmd.out_ioas_id;
+ return 0;
+}
+#define test_ioctl_ioas_alloc(id) \
+ ({ \
+ ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \
+ ASSERT_NE(0, *(id)); \
+ })
+
+static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
+ size_t length, __u64 *iova, unsigned int flags)
+{
+ struct iommu_ioas_map cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .ioas_id = ioas_id,
+ .user_va = (uintptr_t)buffer,
+ .length = length,
+ };
+ int ret;
+
+ if (flags & IOMMU_IOAS_MAP_FIXED_IOVA)
+ cmd.iova = *iova;
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd);
+ *iova = cmd.iova;
+ return ret;
+}
+#define test_ioctl_ioas_map(buffer, length, iova_p) \
+ ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+ length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \
+ EXPECT_ERRNO(_errno, \
+ _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+ length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \
+ ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \
+ iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_fixed(buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, _test_ioctl_ioas_map( \
+ self->fd, self->ioas_id, buffer, length, \
+ &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, \
+ _test_ioctl_ioas_map( \
+ self->fd, ioas_id, buffer, length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ EXPECT_ERRNO(_errno, \
+ _test_ioctl_ioas_map( \
+ self->fd, self->ioas_id, buffer, length, \
+ &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova,
+ size_t length, uint64_t *out_len)
+{
+ struct iommu_ioas_unmap cmd = {
+ .size = sizeof(cmd),
+ .ioas_id = ioas_id,
+ .iova = iova,
+ .length = length,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd);
+ if (out_len)
+ *out_len = cmd.length;
+ return ret;
+}
+#define test_ioctl_ioas_unmap(iova, length) \
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \
+ length, NULL))
+
+#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \
+ NULL))
+
+#define test_err_ioctl_ioas_unmap(_errno, iova, length) \
+ EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \
+ iova, length, NULL))
+
+static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd,
+ size_t start, size_t length, __u64 *iova,
+ unsigned int flags)
+{
+ struct iommu_ioas_map_file cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .ioas_id = ioas_id,
+ .fd = mfd,
+ .start = start,
+ .length = length,
+ };
+ int ret;
+
+ if (flags & IOMMU_IOAS_MAP_FIXED_IOVA)
+ cmd.iova = *iova;
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &cmd);
+ *iova = cmd.iova;
+ return ret;
+}
+
+#define test_ioctl_ioas_map_file(mfd, start, length, iova_p) \
+ ASSERT_EQ(0, \
+ _test_ioctl_ioas_map_file( \
+ self->fd, self->ioas_id, mfd, start, length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))
+
+#define test_err_ioctl_ioas_map_file(_errno, mfd, start, length, iova_p) \
+ EXPECT_ERRNO( \
+ _errno, \
+ _test_ioctl_ioas_map_file( \
+ self->fd, self->ioas_id, mfd, start, length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_id_file(ioas_id, mfd, start, length, iova_p) \
+ ASSERT_EQ(0, \
+ _test_ioctl_ioas_map_file( \
+ self->fd, ioas_id, mfd, start, length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_fixed_file(mfd, start, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, _test_ioctl_ioas_map_file( \
+ self->fd, self->ioas_id, mfd, start, \
+ length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
+{
+ struct iommu_test_cmd memlimit_cmd = {
+ .size = sizeof(memlimit_cmd),
+ .op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
+ .memory_limit = { .limit = limit },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT),
+ &memlimit_cmd);
+}
+
+#define test_ioctl_set_temp_memory_limit(limit) \
+ ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit))
+
+#define test_ioctl_set_default_memory_limit() \
+ test_ioctl_set_temp_memory_limit(65536)
+
+static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_MD_CHECK_REFS,
+ .check_refs = { .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (fd == -1)
+ return;
+
+ EXPECT_EQ(0, close(fd));
+
+ fd = open("/dev/iommu", O_RDWR);
+ EXPECT_NE(-1, fd);
+ EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS),
+ &test_cmd));
+ EXPECT_EQ(0, close(fd));
+}
+
+#define EXPECT_ERRNO(expected_errno, cmd) \
+ ({ \
+ ASSERT_EQ(-1, cmd); \
+ EXPECT_EQ(expected_errno, errno); \
+ })
+
+#endif
+
+/* @data can be NULL */
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type,
+ void *data, size_t data_len,
+ uint32_t *capabilities, uint8_t *max_pasid)
+{
+ struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
+ struct iommu_hw_info cmd = {
+ .size = sizeof(cmd),
+ .dev_id = device_id,
+ .data_len = data_len,
+ .in_data_type = data_type,
+ .data_uptr = (uint64_t)data,
+ .out_capabilities = 0,
+ };
+ int ret;
+
+ if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT)
+ cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE;
+
+ ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd);
+ if (ret)
+ return ret;
+
+ assert(cmd.out_data_type == IOMMU_HW_INFO_TYPE_SELFTEST);
+
+ /*
+ * The struct iommu_test_hw_info should be the one defined
+ * by the current kernel.
+ */
+ assert(cmd.data_len == sizeof(struct iommu_test_hw_info));
+
+ /*
+ * Trailing bytes should be 0 if user buffer is larger than
+ * the data that kernel reports.
+ */
+ if (data_len > cmd.data_len) {
+ char *ptr = (char *)(data + cmd.data_len);
+ int idx = 0;
+
+ while (idx < data_len - cmd.data_len) {
+ assert(!*(ptr + idx));
+ idx++;
+ }
+ }
+
+ if (info) {
+ if (data_len >= offsetofend(struct iommu_test_hw_info, test_reg))
+ assert(info->test_reg == IOMMU_HW_INFO_SELFTEST_REGVAL);
+ if (data_len >= offsetofend(struct iommu_test_hw_info, flags))
+ assert(!info->flags);
+ }
+
+ if (max_pasid)
+ *max_pasid = cmd.out_max_pasid_log2;
+
+ if (capabilities)
+ *capabilities = cmd.out_capabilities;
+
+ return 0;
+}
+
+#define test_cmd_get_hw_info(device_id, data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
+
+#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
+
+#define test_cmd_get_hw_capabilities(device_id, caps) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
+ 0, &caps, NULL))
+
+#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
+ 0, NULL, max_pasid))
+
+static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
+{
+ struct iommu_fault_alloc cmd = {
+ .size = sizeof(cmd),
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ *fault_id = cmd.out_fault_id;
+ *fault_fd = cmd.out_fault_fd;
+ return 0;
+}
+
+#define test_ioctl_fault_alloc(fault_id, fault_fd) \
+ ({ \
+ ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \
+ fault_fd)); \
+ ASSERT_NE(0, *(fault_id)); \
+ ASSERT_NE(0, *(fault_fd)); \
+ })
+
+static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid,
+ __u32 fault_fd)
+{
+ struct iommu_test_cmd trigger_iopf_cmd = {
+ .size = sizeof(trigger_iopf_cmd),
+ .op = IOMMU_TEST_OP_TRIGGER_IOPF,
+ .trigger_iopf = {
+ .dev_id = device_id,
+ .pasid = pasid,
+ .grpid = 0x2,
+ .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
+ .addr = 0xdeadbeaf,
+ },
+ };
+ struct iommu_hwpt_page_response response = {
+ .code = IOMMUFD_PAGE_RESP_SUCCESS,
+ };
+ struct iommu_hwpt_pgfault fault = {};
+ ssize_t bytes;
+ int ret;
+
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd);
+ if (ret)
+ return ret;
+
+ bytes = read(fault_fd, &fault, sizeof(fault));
+ if (bytes <= 0)
+ return -EIO;
+
+ response.cookie = fault.cookie;
+
+ bytes = write(fault_fd, &response, sizeof(response));
+ if (bytes <= 0)
+ return -EIO;
+
+ return 0;
+}
+
+#define test_cmd_trigger_iopf(device_id, fault_fd) \
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, 0x1, fault_fd))
+#define test_cmd_trigger_iopf_pasid(device_id, pasid, fault_fd) \
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, \
+ pasid, fault_fd))
+
+static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
+ __u32 flags, __u32 type, void *data,
+ __u32 data_len, __u32 *viommu_id)
+{
+ struct iommu_viommu_alloc cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .type = type,
+ .dev_id = device_id,
+ .hwpt_id = hwpt_id,
+ .data_uptr = (uint64_t)data,
+ .data_len = data_len,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (viommu_id)
+ *viommu_id = cmd.out_viommu_id;
+ return 0;
+}
+
+#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len, \
+ viommu_id) \
+ ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
+#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data, \
+ data_len, viommu_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
+
+static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
+ __u64 virt_id, __u32 *vdev_id)
+{
+ struct iommu_vdevice_alloc cmd = {
+ .size = sizeof(cmd),
+ .dev_id = idev_id,
+ .viommu_id = viommu_id,
+ .virt_id = virt_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (vdev_id)
+ *vdev_id = cmd.out_vdevice_id;
+ return 0;
+}
+
+#define test_cmd_vdevice_alloc(viommu_id, idev_id, virt_id, vdev_id) \
+ ASSERT_EQ(0, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \
+ virt_id, vdev_id))
+#define test_err_vdevice_alloc(_errno, viommu_id, idev_id, virt_id, vdev_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \
+ virt_id, vdev_id))
+
+static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 idx, __u64 base_addr, __u64 length,
+ __u32 *hw_queue_id)
+{
+ struct iommu_hw_queue_alloc cmd = {
+ .size = sizeof(cmd),
+ .viommu_id = viommu_id,
+ .type = type,
+ .index = idx,
+ .nesting_parent_iova = base_addr,
+ .length = length,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (hw_queue_id)
+ *hw_queue_id = cmd.out_hw_queue_id;
+ return 0;
+}
+
+#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \
+ ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \
+ out_qid) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+
+static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 *veventq_id, __u32 *veventq_fd)
+{
+ struct iommu_veventq_alloc cmd = {
+ .size = sizeof(cmd),
+ .type = type,
+ .veventq_depth = 2,
+ .viommu_id = viommu_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_VEVENTQ_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (veventq_id)
+ *veventq_id = cmd.out_veventq_id;
+ if (veventq_fd)
+ *veventq_fd = cmd.out_veventq_fd;
+ return 0;
+}
+
+#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \
+ ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \
+ veventq_fd) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+
+static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
+{
+ struct iommu_test_cmd trigger_vevent_cmd = {
+ .size = sizeof(trigger_vevent_cmd),
+ .op = IOMMU_TEST_OP_TRIGGER_VEVENT,
+ .trigger_vevent = {
+ .dev_id = dev_id,
+ },
+ };
+
+ while (nvevents--) {
+ if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
+ &trigger_vevent_cmd))
+ return -1;
+ }
+ return 0;
+}
+
+#define test_cmd_trigger_vevents(dev_id, nvevents) \
+ ASSERT_EQ(0, _test_cmd_trigger_vevents(self->fd, dev_id, nvevents))
+
+static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents,
+ __u32 virt_id, int *prev_seq)
+{
+ struct pollfd pollfd = { .fd = event_fd, .events = POLLIN };
+ struct iommu_viommu_event_selftest *event;
+ struct iommufd_vevent_header *hdr;
+ ssize_t bytes;
+ void *data;
+ int ret, i;
+
+ ret = poll(&pollfd, 1, 1000);
+ if (ret < 0)
+ return -1;
+
+ data = calloc(nvevents, sizeof(*hdr) + sizeof(*event));
+ if (!data) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ bytes = read(event_fd, data,
+ nvevents * (sizeof(*hdr) + sizeof(*event)));
+ if (bytes <= 0) {
+ errno = EFAULT;
+ ret = -1;
+ goto out_free;
+ }
+
+ for (i = 0; i < nvevents; i++) {
+ hdr = data + i * (sizeof(*hdr) + sizeof(*event));
+
+ if (hdr->flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS ||
+ hdr->sequence - *prev_seq > 1) {
+ *prev_seq = hdr->sequence;
+ errno = EOVERFLOW;
+ ret = -1;
+ goto out_free;
+ }
+ *prev_seq = hdr->sequence;
+ event = data + sizeof(*hdr);
+ if (event->virt_id != virt_id) {
+ errno = EINVAL;
+ ret = -1;
+ goto out_free;
+ }
+ }
+
+ ret = 0;
+out_free:
+ free(data);
+ return ret;
+}
+
+#define test_cmd_read_vevents(event_fd, nvevents, virt_id, prev_seq) \
+ ASSERT_EQ(0, _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+#define test_err_read_vevents(_errno, event_fd, nvevents, virt_id, prev_seq) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+
+static int _test_cmd_pasid_attach(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_attach = {
+ .size = sizeof(test_attach),
+ .op = IOMMU_TEST_OP_PASID_ATTACH,
+ .id = stdev_id,
+ .pasid_attach = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_ATTACH),
+ &test_attach);
+}
+
+#define test_cmd_pasid_attach(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_attach(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_replace(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_replace = {
+ .size = sizeof(test_replace),
+ .op = IOMMU_TEST_OP_PASID_REPLACE,
+ .id = stdev_id,
+ .pasid_replace = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_REPLACE),
+ &test_replace);
+}
+
+#define test_cmd_pasid_replace(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_replace(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_detach(int fd, __u32 stdev_id, __u32 pasid)
+{
+ struct iommu_test_cmd test_detach = {
+ .size = sizeof(test_detach),
+ .op = IOMMU_TEST_OP_PASID_DETACH,
+ .id = stdev_id,
+ .pasid_detach = {
+ .pasid = pasid,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_DETACH),
+ &test_detach);
+}
+
+#define test_cmd_pasid_detach(pasid) \
+ ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, self->stdev_id, pasid))
+
+static int test_cmd_pasid_check_hwpt(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 hwpt_id)
+{
+ struct iommu_test_cmd test_pasid_check = {
+ .size = sizeof(test_pasid_check),
+ .op = IOMMU_TEST_OP_PASID_CHECK_HWPT,
+ .id = stdev_id,
+ .pasid_check = {
+ .pasid = pasid,
+ .hwpt_id = hwpt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_CHECK_HWPT),
+ &test_pasid_check);
+}
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
index 1c4448a843a4..50e9c299fc4a 100644
--- a/tools/testing/selftests/ipc/Makefile
+++ b/tools/testing/selftests/ipc/Makefile
@@ -10,7 +10,7 @@ ifeq ($(ARCH),x86_64)
CFLAGS := -DCONFIG_X86_64 -D__x86_64__
endif
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := msgque
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 5ec4d9e18806..e107379d185c 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -7,7 +7,7 @@
#include <sys/msg.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MAX_MSG_SIZE 32
@@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque)
fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
if (fd == -1) {
- printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n");
return -errno;
}
sprintf(buf, "%d", msgque->msq_id);
ret = write(fd, buf, strlen(buf));
if (ret != strlen(buf)) {
- printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n");
return -errno;
}
id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
if (id == -1) {
- printf("Failed to create queue\n");
+ ksft_test_result_fail("Failed to create queue\n");
return -errno;
}
if (id != msgque->msq_id) {
- printf("Restored queue has wrong id (%d instead of %d)\n",
- id, msgque->msq_id);
+ ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n"
+ , id, msgque->msq_id);
ret = -EFAULT;
goto destroy;
}
@@ -66,10 +66,10 @@ int restore_queue(struct msgque_data *msgque)
for (i = 0; i < msgque->qnum; i++) {
if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
msgque->messages[i].msize, IPC_NOWAIT) != 0) {
- printf("msgsnd failed (%m)\n");
+ ksft_test_result_fail("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
- };
+ }
}
return 0;
@@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == ENOMSG)
break;
- printf("Failed to read IPC message: %m\n");
+ ksft_test_result_fail("Failed to read IPC message: %m\n");
ret = -errno;
goto err;
}
if (ret != msgque->messages[cnt].msize) {
- printf("Wrong message size: %d (expected %d)\n", ret,
- msgque->messages[cnt].msize);
+ ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize);
ret = -EINVAL;
goto err;
}
if (message.mtype != msgque->messages[cnt].mtype) {
- printf("Wrong message type\n");
+ ksft_test_result_fail("Wrong message type\n");
ret = -EINVAL;
goto err;
}
if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
- printf("Wrong message content\n");
+ ksft_test_result_fail("Wrong message content\n");
ret = -EINVAL;
goto err;
}
@@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque)
}
if (cnt != msgque->qnum) {
- printf("Wrong message number\n");
+ ksft_test_result_fail("Wrong message number\n");
ret = -EINVAL;
goto err;
}
@@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == EINVAL)
continue;
- printf("Failed to get stats for IPC queue with id %d\n",
+ ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n",
kern_id);
return -errno;
}
@@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque)
msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
if (msgque->messages == NULL) {
- printf("Failed to get stats for IPC queue\n");
+ ksft_test_result_fail("Failed to get stats for IPC queue\n");
return -ENOMEM;
}
@@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque)
ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
if (ret < 0) {
- printf("Failed to copy IPC message: %m (%d)\n", errno);
+ ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno);
return -errno;
}
msgque->messages[i].msize = ret;
@@ -178,78 +177,77 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("First message send failed (%m)\n");
+ ksft_test_result_fail("First message send failed (%m)\n");
return -errno;
- };
+ }
msgbuf.mtype = ANOTHER_MSG_TYPE;
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("Second message send failed (%m)\n");
+ ksft_test_result_fail("Second message send failed (%m)\n");
return -errno;
- };
+ }
return 0;
}
int main(int argc, char **argv)
{
- int msg, pid, err;
+ int err;
struct msgque_data msgque;
if (getuid() != 0)
- return ksft_exit_skip(
- "Please run the test as root - Exiting.\n");
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
- printf("Can't make key: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_test_result_fail("Can't make key: %d\n", -errno);
+ ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
if (msgque.msq_id == -1) {
err = -errno;
- printf("Can't create queue: %d\n", err);
+ ksft_test_result_fail("Can't create queue: %d\n", err);
goto err_out;
}
err = fill_msgque(&msgque);
if (err) {
- printf("Failed to fill queue: %d\n", err);
+ ksft_test_result_fail("Failed to fill queue: %d\n", err);
goto err_destroy;
}
err = dump_queue(&msgque);
if (err) {
- printf("Failed to dump queue: %d\n", err);
+ ksft_test_result_fail("Failed to dump queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to check and destroy queue: %d\n", err);
+ ksft_test_result_fail("Failed to check and destroy queue: %d\n", err);
goto err_out;
}
err = restore_queue(&msgque);
if (err) {
- printf("Failed to restore queue: %d\n", err);
+ ksft_test_result_fail("Failed to restore queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to test queue: %d\n", err);
+ ksft_test_result_fail("Failed to test queue: %d\n", err);
goto err_out;
}
- return ksft_exit_pass();
+ ksft_exit_pass();
err_destroy:
if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
printf("Failed to destroy queue: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
err_out:
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index af7f9c7d59bc..adfcf50b1264 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -23,13 +23,22 @@
#include <dirent.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TEST_SCANCODES 10
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define SYSFS_PATH_MAX 256
#define DNAME_PATH_MAX 256
+/*
+ * Support ancient lirc.h which does not have these values. Can be removed
+ * once RHEL 8 is no longer a relevant testing platform.
+ */
+#if RC_PROTO_MAX < 26
+#define RC_PROTO_RCMM12 24
+#define RC_PROTO_RCMM24 25
+#define RC_PROTO_RCMM32 26
+#endif
+
static const struct {
enum rc_proto proto;
const char *name;
diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh
index b90dc9939f45..aff9299c9416 100755
--- a/tools/testing/selftests/ir/ir_loopback.sh
+++ b/tools/testing/selftests/ir/ir_loopback.sh
@@ -10,7 +10,7 @@ if [ $UID != 0 ]; then
fi
if ! /sbin/modprobe -q -n rc-loopback; then
- echo "ir_loopback: module rc-loopback is not found [SKIP]"
+ echo "ir_loopback: module rc-loopback is not found in /lib/modules/`uname -r` [SKIP]"
exit $ksft_skip
fi
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index b4d39f6b5124..59a1e5379018 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := kcmp_test
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index 6ea7b9f37a41..79aa438b7479 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -18,7 +18,7 @@
#include <sys/wait.h>
#include <sys/epoll.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
{
@@ -88,7 +88,10 @@ int main(int argc, char **argv)
int pid2 = getpid();
int ret;
- fd2 = open(kpath, O_RDWR, 0644);
+ ksft_print_header();
+ ksft_set_plan(3);
+
+ fd2 = open(kpath, O_RDWR);
if (fd2 < 0) {
perror("Can't open file");
ksft_exit_fail();
@@ -152,7 +155,6 @@ int main(int argc, char **argv)
ksft_inc_pass_cnt();
}
- ksft_print_cnts();
if (ret)
ksft_exit_fail();
@@ -162,5 +164,5 @@ int main(int argc, char **argv)
waitpid(pid2, &status, P_ALL);
- return ksft_exit_pass();
+ return 0;
}
diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore
new file mode 100644
index 000000000000..5f3d9e089ae8
--- /dev/null
+++ b/tools/testing/selftests/kexec/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_kexec_jump
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index aa91d2063249..874cfdd3b75b 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -1,13 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for kexec tests
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq ($(ARCH),x86)
+ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
+include ../../../scripts/Makefile.arch
+
+ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
+TEST_PROGS += test_kexec_jump.sh
+TEST_GEN_PROGS := test_kexec_jump
+endif
+
include ../lib.mk
endif
diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh
index 43017cfe88f7..641ef05863b2 100755
--- a/tools/testing/selftests/kexec/kexec_common_lib.sh
+++ b/tools/testing/selftests/kexec/kexec_common_lib.sh
@@ -65,48 +65,42 @@ get_efivarfs_secureboot_mode()
return 0;
}
-get_efi_var_secureboot_mode()
+# On powerpc platform, check device-tree property
+# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing
+# to detect secureboot state.
+get_ppc64_secureboot_mode()
{
- local efi_vars
- local secure_boot_file
- local setup_mode_file
- local secureboot_mode
- local setup_mode
-
- if [ ! -d "$efi_vars" ]; then
- log_skip "efi_vars is not enabled\n"
- fi
- secure_boot_file=$(find "$efi_vars" -name SecureBoot-* 2>/dev/null)
- setup_mode_file=$(find "$efi_vars" -name SetupMode-* 2>/dev/null)
- if [ -f "$secure_boot_file/data" ] && \
- [ -f "$setup_mode_file/data" ]; then
- secureboot_mode=`od -An -t u1 "$secure_boot_file/data"`
- setup_mode=`od -An -t u1 "$setup_mode_file/data"`
-
- if [ $secureboot_mode -eq 1 ] && [ $setup_mode -eq 0 ]; then
- log_info "secure boot mode enabled (CONFIG_EFI_VARS)"
- return 1;
- fi
+ local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing"
+ # Check for secure boot file existence
+ if [ -f $secure_boot_file ]; then
+ log_info "Secureboot is enabled (Device tree)"
+ return 1;
fi
+ log_info "Secureboot is not enabled (Device tree)"
return 0;
}
+# Return the architecture of the system
+get_arch()
+{
+ echo $(arch)
+}
+
# Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID).
-# The secure boot mode can be accessed either as the last integer
-# of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from
-# "od -An -t u1 /sys/firmware/efi/vars/SecureBoot-*/data". The efi
+# The secure boot mode can be accessed as the last integer of
+# "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*". The efi
# SetupMode can be similarly accessed.
# Return 1 for SecureBoot mode enabled and SetupMode mode disabled.
get_secureboot_mode()
{
local secureboot_mode=0
+ local system_arch=$(get_arch)
- get_efivarfs_secureboot_mode
- secureboot_mode=$?
-
- # fallback to using the efi_var files
- if [ $secureboot_mode -eq 0 ]; then
- get_efi_var_secureboot_mode
+ if [ "$system_arch" == "ppc64le" ]; then
+ get_ppc64_secureboot_mode
+ secureboot_mode=$?
+ else
+ get_efivarfs_secureboot_mode
secureboot_mode=$?
fi
@@ -138,15 +132,20 @@ kconfig_enabled()
return 0
}
-# Attempt to get the kernel config first via proc, and then by
-# extracting it from the kernel image or the configs.ko using
-# scripts/extract-ikconfig.
+# Attempt to get the kernel config first by checking the modules directory
+# then via proc, and finally by extracting it from the kernel image or the
+# configs.ko using scripts/extract-ikconfig.
# Return 1 for found.
get_kconfig()
{
local proc_config="/proc/config.gz"
local module_dir="/lib/modules/`uname -r`"
- local configs_module="$module_dir/kernel/kernel/configs.ko"
+ local configs_module="$module_dir/kernel/kernel/configs.ko*"
+
+ if [ -f $module_dir/config ]; then
+ IKCONFIG=$module_dir/config
+ return 1
+ fi
if [ ! -f $proc_config ]; then
modprobe configs > /dev/null 2>&1
diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh
index 2ff600388c30..c9ccb3c93d72 100755
--- a/tools/testing/selftests/kexec/test_kexec_file_load.sh
+++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh
@@ -97,10 +97,11 @@ check_for_imasig()
check_for_modsig()
{
local module_sig_string="~Module signature appended~"
- local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
local ret=0
- if [ "$sig" == "$module_sig_string" ]; then
+ tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \
+ grep -q "$module_sig_string"
+ if [ $? -eq 0 ]; then
ret=1
log_info "kexec kernel image modsig signed"
else
@@ -225,8 +226,12 @@ get_secureboot_mode
secureboot=$?
# Are there pe and ima signatures
-check_for_pesig
-pe_signed=$?
+if [ "$(get_arch)" == 'ppc64le' ]; then
+ pe_signed=0
+else
+ check_for_pesig
+ pe_signed=$?
+fi
check_for_imasig
ima_signed=$?
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c
new file mode 100644
index 000000000000..fbce287866f5
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.c
@@ -0,0 +1,72 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+asm(
+ " .code64\n"
+ " .data\n"
+ "purgatory_start:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start_b(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ // Same again
+ "purgatory_start_b:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ "purgatory_end:\n"
+ ".previous"
+);
+extern char purgatory_start[], purgatory_end[];
+
+int main (void)
+{
+ struct kexec_segment segment = {};
+ int ret;
+
+ segment.buf = purgatory_start;
+ segment.bufsz = purgatory_end - purgatory_start;
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh
new file mode 100755
index 000000000000..6ae977054ba2
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Prevent loading a kernel image via the kexec_load syscall when
+# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.)
+
+TEST="$0"
+. ./kexec_common_lib.sh
+
+# kexec requires root privileges
+require_root_privileges
+
+# get the kernel config
+get_kconfig
+
+kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled"
+if [ $? -eq 0 ]; then
+ log_skip "kexec_jump is not enabled"
+fi
+
+kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled"
+ima_appraise=$?
+
+kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \
+ "IMA architecture specific policy enabled"
+arch_policy=$?
+
+get_secureboot_mode
+secureboot=$?
+
+if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then
+ log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled"
+fi
+
+./test_kexec_jump
+if [ $? -eq 0 ]; then
+ log_pass "kexec_jump succeeded"
+else
+ # The more likely failure mode if anything went wrong is that the
+ # kernel just crashes. But if we get back here, sure, whine anyway.
+ log_fail "kexec_jump failed"
+fi
diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf
new file mode 100644
index 000000000000..ee696807cd35
--- /dev/null
+++ b/tools/testing/selftests/kho/arm64.conf
@@ -0,0 +1,9 @@
+QEMU_CMD="qemu-system-aarch64 -M virt -cpu max"
+QEMU_KCONFIG="
+CONFIG_SERIAL_AMBA_PL010=y
+CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+"
+KERNEL_IMAGE="Image"
+KERNEL_CMDLINE="console=ttyAMA0"
diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c
new file mode 100644
index 000000000000..6d9e91d55d68
--- /dev/null
+++ b/tools/testing/selftests/kho/init.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/mount.h>
+#include <sys/reboot.h>
+#include <linux/kexec.h>
+
+/* from arch/x86/include/asm/setup.h */
+#define COMMAND_LINE_SIZE 2048
+
+#define KHO_FINALIZE "/debugfs/kho/out/finalize"
+#define KERNEL_IMAGE "/kernel"
+
+static int mount_filesystems(void)
+{
+ if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0)
+ return -1;
+
+ return mount("proc", "/proc", "proc", 0, NULL);
+}
+
+static int kho_enable(void)
+{
+ const char enable[] = "1";
+ int fd;
+
+ fd = open(KHO_FINALIZE, O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ if (write(fd, enable, sizeof(enable)) != sizeof(enable))
+ return 1;
+
+ close(fd);
+ return 0;
+}
+
+static long kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len, const char *cmdline,
+ unsigned long flags)
+{
+ return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len,
+ cmdline, flags);
+}
+
+static int kexec_load(void)
+{
+ char cmdline[COMMAND_LINE_SIZE];
+ ssize_t len;
+ int fd, err;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ len = read(fd, cmdline, sizeof(cmdline));
+ close(fd);
+ if (len < 0)
+ return -1;
+
+ /* replace \n with \0 */
+ cmdline[len - 1] = 0;
+ fd = open(KERNEL_IMAGE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS);
+ close(fd);
+
+ return err ? : 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (mount_filesystems())
+ goto err_reboot;
+
+ if (kho_enable())
+ goto err_reboot;
+
+ if (kexec_load())
+ goto err_reboot;
+
+ if (reboot(RB_KEXEC))
+ goto err_reboot;
+
+ return 0;
+
+err_reboot:
+ reboot(RB_AUTOBOOT);
+ return -1;
+}
diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh
new file mode 100755
index 000000000000..49fdac8e8b15
--- /dev/null
+++ b/tools/testing/selftests/kho/vmtest.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -ue
+
+CROSS_COMPILE="${CROSS_COMPILE:-""}"
+
+test_dir=$(realpath "$(dirname "$0")")
+kernel_dir=$(realpath "$test_dir/../../../..")
+
+tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX)
+headers_dir="$tmp_dir/usr"
+initrd="$tmp_dir/initrd.cpio"
+
+source "$test_dir/../kselftest/ktap_helpers.sh"
+
+function usage() {
+ cat <<EOF
+$0 [-d build_dir] [-j jobs] [-t target_arch] [-h]
+Options:
+ -d) path to the kernel build directory
+ -j) number of jobs for compilation, similar to -j in make
+ -t) run test for target_arch, requires CROSS_COMPILE set
+ supported targets: aarch64, x86_64
+ -h) display this help
+EOF
+}
+
+function cleanup() {
+ rm -fr "$tmp_dir"
+ ktap_finished
+}
+trap cleanup EXIT
+
+function skip() {
+ local msg=${1:-""}
+
+ ktap_test_skip "$msg"
+ exit "$KSFT_SKIP"
+}
+
+function fail() {
+ local msg=${1:-""}
+
+ ktap_test_fail "$msg"
+ exit "$KSFT_FAIL"
+}
+
+function build_kernel() {
+ local build_dir=$1
+ local make_cmd=$2
+ local arch_kconfig=$3
+ local kimage=$4
+
+ local kho_config="$tmp_dir/kho.config"
+ local kconfig="$build_dir/.config"
+
+ # enable initrd, KHO and KHO test in kernel configuration
+ tee "$kconfig" > "$kho_config" <<EOF
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
+CONFIG_TEST_KEXEC_HANDOVER=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_VM=y
+$arch_kconfig
+EOF
+
+ make_cmd="$make_cmd -C $kernel_dir O=$build_dir"
+ $make_cmd olddefconfig
+
+ # verify that kernel confiration has all necessary options
+ while read -r opt ; do
+ grep "$opt" "$kconfig" &>/dev/null || skip "$opt is missing"
+ done < "$kho_config"
+
+ $make_cmd "$kimage"
+ $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir"
+}
+
+function mkinitrd() {
+ local kernel=$1
+
+ "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \
+ -fno-asynchronous-unwind-tables -fno-ident \
+ -I "$headers_dir/include" \
+ -I "$kernel_dir/tools/include/nolibc" \
+ -o "$tmp_dir/init" "$test_dir/init.c"
+
+ cat > "$tmp_dir/cpio_list" <<EOF
+dir /dev 0755 0 0
+dir /proc 0755 0 0
+dir /debugfs 0755 0 0
+nod /dev/console 0600 0 0 c 5 1
+file /init $tmp_dir/init 0755 0 0
+file /kernel $kernel 0644 0 0
+EOF
+
+ "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd"
+}
+
+function run_qemu() {
+ local qemu_cmd=$1
+ local cmdline=$2
+ local kernel=$3
+ local serial="$tmp_dir/qemu.serial"
+
+ cmdline="$cmdline kho=on panic=-1"
+
+ $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \
+ -accel kvm -accel hvf -accel tcg \
+ -serial file:"$serial" \
+ -append "$cmdline" \
+ -kernel "$kernel" \
+ -initrd "$initrd"
+
+ grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed"
+}
+
+function target_to_arch() {
+ local target=$1
+
+ case $target in
+ aarch64) echo "arm64" ;;
+ x86_64) echo "x86" ;;
+ *) skip "architecture $target is not supported"
+ esac
+}
+
+function main() {
+ local build_dir="$kernel_dir/.kho"
+ local jobs=$(($(nproc) * 2))
+ local target="$(uname -m)"
+
+ # skip the test if any of the preparation steps fails
+ set -o errtrace
+ trap skip ERR
+
+ while getopts 'hd:j:t:' opt; do
+ case $opt in
+ d)
+ build_dir="$OPTARG"
+ ;;
+ j)
+ jobs="$OPTARG"
+ ;;
+ t)
+ target="$OPTARG"
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ *)
+ echo Unknown argument "$opt"
+ usage
+ exit 1
+ ;;
+ esac
+ done
+
+ ktap_print_header
+ ktap_set_plan 1
+
+ if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then
+ skip "Cross-platform testing needs to specify CROSS_COMPILE"
+ fi
+
+ mkdir -p "$build_dir"
+ local arch=$(target_to_arch "$target")
+ source "$test_dir/$arch.conf"
+
+ # build the kernel and create initrd
+ # initrd includes the kernel image that will be kexec'ed
+ local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs"
+ build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE"
+
+ local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE"
+ mkinitrd "$kernel"
+
+ run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel"
+
+ ktap_test_pass "KHO succeeded"
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf
new file mode 100644
index 000000000000..b419e610ca22
--- /dev/null
+++ b/tools/testing/selftests/kho/x86.conf
@@ -0,0 +1,7 @@
+QEMU_CMD=qemu-system-x86_64
+QEMU_KCONFIG="
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+"
+KERNEL_IMAGE="bzImage"
+KERNEL_CMDLINE="console=ttyS0"
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
index 259f4fd6b5e2..1f1e63494af9 100644
--- a/tools/testing/selftests/kmod/config
+++ b/tools/testing/selftests/kmod/config
@@ -1,7 +1,2 @@
CONFIG_TEST_KMOD=m
CONFIG_TEST_LKM=m
-CONFIG_XFS_FS=m
-
-# For the module parameter force_init_test is used
-CONFIG_TUN=m
-CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index afd42387e8b2..7189715d7960 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -1,18 +1,7 @@
#!/bin/bash
-#
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
-
# This is a stress test script for kmod, the kernel module loader. It uses
# test_kmod which exposes a series of knobs for the API for us so we can
# tweak each test in userspace rather than in kernelspace.
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 8d50483fe204..afbcf8412ae5 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -16,18 +16,24 @@
* For each test, report any progress, debugging, etc with:
*
* ksft_print_msg(fmt, ...);
+ * ksft_perror(msg);
*
- * and finally report the pass/fail/skip/xfail state of the test with one of:
+ * and finally report the pass/fail/skip/xfail/xpass state of the test
+ * with one of:
*
* ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_report(result, fmt, ...);
* ksft_test_result_pass(fmt, ...);
* ksft_test_result_fail(fmt, ...);
* ksft_test_result_skip(fmt, ...);
* ksft_test_result_xfail(fmt, ...);
+ * ksft_test_result_xpass(fmt, ...);
* ksft_test_result_error(fmt, ...);
+ * ksft_test_result_code(exit_code, test_name, fmt, ...);
*
* When all tests are finished, clean up and exit the program with one of:
*
+ * ksft_finished();
* ksft_exit(condition);
* ksft_exit_pass();
* ksft_exit_fail();
@@ -37,16 +43,43 @@
* the program is aborting before finishing all tests):
*
* ksft_exit_fail_msg(fmt, ...);
+ * ksft_exit_fail_perror(msg);
*
*/
#ifndef __KSELFTEST_H
#define __KSELFTEST_H
+#ifndef NOLIBC
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+/*
+ * gcc cpuid.h provides __cpuid_count() since v4.4.
+ * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0.
+ *
+ * Provide local define for tests needing __cpuid_count() because
+ * selftests need to work in older environments that do not yet
+ * have __cpuid_count().
+ */
+#ifndef __cpuid_count
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ __volatile__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+#endif /* end arch */
/* define kselftest exit codes */
#define KSFT_PASS 0
@@ -55,6 +88,19 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -67,6 +113,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
+static bool ksft_debug_enabled;
static inline unsigned int ksft_test_num(void)
{
@@ -91,6 +138,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
+ /*
+ * Force line buffering; If stdout is not connected to a terminal, it
+ * will otherwise default to fully buffered, which can cause output
+ * duplication if there is content in the buffer when fork()ing. If
+ * there is a crash, line buffering also means the most recent output
+ * line will be visible.
+ */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
if (!(getenv("KSFT_TAP_LEVEL")))
printf("TAP version 13\n");
}
@@ -98,21 +154,26 @@ static inline void ksft_print_header(void)
static inline void ksft_set_plan(unsigned int plan)
{
ksft_plan = plan;
- printf("1..%d\n", ksft_plan);
+ printf("1..%u\n", ksft_plan);
}
static inline void ksft_print_cnts(void)
{
+ if (ksft_cnt.ksft_xskip > 0)
+ printf(
+ "# %u skipped test(s) detected. Consider enabling relevant config options to improve coverage.\n",
+ ksft_cnt.ksft_xskip
+ );
if (ksft_plan != ksft_test_num())
printf("# Planned tests != run tests (%u != %u)\n",
ksft_plan, ksft_test_num());
- printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n",
+ printf("# Totals: pass:%u fail:%u xfail:%u xpass:%u skip:%u error:%u\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
}
-static inline void ksft_print_msg(const char *msg, ...)
+static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -124,7 +185,24 @@ static inline void ksft_print_msg(const char *msg, ...)
va_end(args);
}
-static inline void ksft_test_result_pass(const char *msg, ...)
+static inline void ksft_print_dbg_msg(const char *msg, ...)
+{
+ va_list args;
+
+ if (!ksft_debug_enabled)
+ return;
+
+ va_start(args, msg);
+ ksft_print_msg(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_perror(const char *msg)
+{
+ ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+}
+
+static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -132,13 +210,13 @@ static inline void ksft_test_result_pass(const char *msg, ...)
ksft_cnt.ksft_pass++;
va_start(args, msg);
- printf("ok %d ", ksft_test_num());
+ printf("ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_fail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_fail(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -146,7 +224,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
ksft_cnt.ksft_fail++;
va_start(args, msg);
- printf("not ok %d ", ksft_test_num());
+ printf("not ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
@@ -164,7 +242,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
ksft_test_result_fail(fmt, ##__VA_ARGS__);\
} while (0)
-static inline void ksft_test_result_xfail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -172,13 +250,27 @@ static inline void ksft_test_result_xfail(const char *msg, ...)
ksft_cnt.ksft_xfail++;
va_start(args, msg);
- printf("ok %d # XFAIL ", ksft_test_num());
+ printf("ok %u # XFAIL ", ksft_test_num());
+ errno = saved_errno;
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline __printf(1, 2) void ksft_test_result_xpass(const char *msg, ...)
+{
+ int saved_errno = errno;
+ va_list args;
+
+ ksft_cnt.ksft_xpass++;
+
+ va_start(args, msg);
+ printf("ok %u # XPASS ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_skip(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -186,14 +278,14 @@ static inline void ksft_test_result_skip(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("ok %d # SKIP ", ksft_test_num());
+ printf("ok %u # SKIP ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
/* TODO: how does "error" differ from "fail" or "skip"? */
-static inline void ksft_test_result_error(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -201,19 +293,89 @@ static inline void ksft_test_result_error(const char *msg, ...)
ksft_cnt.ksft_error++;
va_start(args, msg);
- printf("not ok %d # error ", ksft_test_num());
+ printf("not ok %u # error ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __printf(3, 4)
+void ksft_test_result_code(int exit_code, const char *test_name,
+ const char *msg, ...)
+{
+ const char *tap_code = "ok";
+ const char *directive = "";
+ int saved_errno = errno;
+ va_list args;
+
+ switch (exit_code) {
+ case KSFT_PASS:
+ ksft_cnt.ksft_pass++;
+ break;
+ case KSFT_XFAIL:
+ directive = " # XFAIL ";
+ ksft_cnt.ksft_xfail++;
+ break;
+ case KSFT_XPASS:
+ directive = " # XPASS ";
+ ksft_cnt.ksft_xpass++;
+ break;
+ case KSFT_SKIP:
+ directive = " # SKIP ";
+ ksft_cnt.ksft_xskip++;
+ break;
+ case KSFT_FAIL:
+ default:
+ tap_code = "not ok";
+ ksft_cnt.ksft_fail++;
+ break;
+ }
+
+ /* Docs seem to call for double space if directive is absent */
+ if (!directive[0] && msg)
+ directive = " # ";
+
+ printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
+ errno = saved_errno;
+ if (msg) {
+ va_start(args, msg);
+ vprintf(msg, args);
+ va_end(args);
+ }
+ printf("\n");
+}
+
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result_report(result, fmt, ...) do { \
+ switch (result) { \
+ case KSFT_PASS: \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_FAIL: \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_XFAIL: \
+ ksft_test_result_xfail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_XPASS: \
+ ksft_test_result_xpass(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_SKIP: \
+ ksft_test_result_skip(fmt, ##__VA_ARGS__); \
+ break; \
+ } } while (0)
+
+static inline __noreturn void ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn void ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -231,7 +393,16 @@ static inline int ksft_exit_fail(void)
ksft_exit_fail(); \
} while (0)
-static inline int ksft_exit_fail_msg(const char *msg, ...)
+/**
+ * ksft_finished() - Exit selftest with success if all tests passed
+ */
+#define ksft_finished() \
+ ksft_exit(ksft_plan == \
+ ksft_cnt.ksft_pass + \
+ ksft_cnt.ksft_xfail + \
+ ksft_cnt.ksft_xskip)
+
+static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -246,19 +417,24 @@ static inline int ksft_exit_fail_msg(const char *msg, ...)
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn void ksft_exit_fail_perror(const char *msg)
+{
+ ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+}
+
+static inline __noreturn void ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn void ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -273,7 +449,7 @@ static inline int ksft_exit_skip(const char *msg, ...)
*/
if (ksft_plan || ksft_test_num()) {
ksft_cnt.ksft_xskip++;
- printf("ok %d # SKIP ", 1 + ksft_test_num());
+ printf("ok %u # SKIP ", 1 + ksft_test_num());
} else {
printf("1..0 # SKIP ");
}
@@ -287,4 +463,16 @@ static inline int ksft_exit_skip(const char *msg, ...)
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+}
+
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py
new file mode 100644
index 000000000000..0e030837fc17
--- /dev/null
+++ b/tools/testing/selftests/kselftest/ksft.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Kselftest helpers for outputting in KTAP format. Based on kselftest.h.
+#
+
+import sys
+
+ksft_cnt = {"pass": 0, "fail": 0, "skip": 0}
+ksft_num_tests = 0
+ksft_test_number = 1
+
+KSFT_PASS = 0
+KSFT_FAIL = 1
+KSFT_SKIP = 4
+
+
+def print_header():
+ print("TAP version 13")
+
+
+def set_plan(num_tests):
+ global ksft_num_tests
+ ksft_num_tests = num_tests
+ print("1..{}".format(num_tests))
+
+
+def print_cnts():
+ if ksft_cnt['skip'] > 0:
+ print(f"# {ksft_cnt['skip']} skipped test(s) detected. Consider enabling relevant config options to improve coverage.")
+
+ print(
+ f"# Totals: pass:{ksft_cnt['pass']} fail:{ksft_cnt['fail']} xfail:0 xpass:0 skip:{ksft_cnt['skip']} error:0"
+ )
+
+
+def print_msg(msg):
+ print(f"# {msg}")
+
+
+def _test_print(result, description, directive=None):
+ if directive:
+ directive_str = f"# {directive}"
+ else:
+ directive_str = ""
+
+ global ksft_test_number
+ print(f"{result} {ksft_test_number} {description} {directive_str}")
+ ksft_test_number += 1
+
+
+def test_result_pass(description):
+ _test_print("ok", description)
+ ksft_cnt["pass"] += 1
+
+
+def test_result_fail(description):
+ _test_print("not ok", description)
+ ksft_cnt["fail"] += 1
+
+
+def test_result_skip(description):
+ _test_print("ok", description, "SKIP")
+ ksft_cnt["skip"] += 1
+
+
+def test_result(condition, description=""):
+ if condition:
+ test_result_pass(description)
+ else:
+ test_result_fail(description)
+
+
+def finished():
+ if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests:
+ exit_code = KSFT_PASS
+ else:
+ exit_code = KSFT_FAIL
+
+ print_cnts()
+
+ sys.exit(exit_code)
+
+
+def exit_fail():
+ print_cnts()
+ sys.exit(KSFT_FAIL)
+
+
+def exit_pass():
+ print_cnts()
+ sys.exit(KSFT_PASS)
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
new file mode 100644
index 000000000000..32dbfe9da2c4
--- /dev/null
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Helpers for outputting in KTAP format
+#
+KTAP_TESTNO=1
+KTAP_CNT_PASS=0
+KTAP_CNT_FAIL=0
+KTAP_CNT_XFAIL=0
+KTAP_CNT_SKIP=0
+
+KSFT_PASS=0
+KSFT_FAIL=1
+KSFT_XFAIL=2
+KSFT_XPASS=3
+KSFT_SKIP=4
+
+KSFT_NUM_TESTS=0
+
+ktap_print_header() {
+ echo "TAP version 13"
+}
+
+ktap_print_msg()
+{
+ echo "#" $@
+}
+
+ktap_set_plan() {
+ KSFT_NUM_TESTS="$1"
+
+ echo "1..$KSFT_NUM_TESTS"
+}
+
+ktap_skip_all() {
+ echo -n "1..0 # SKIP "
+ echo $@
+}
+
+__ktap_test() {
+ result="$1"
+ description="$2"
+ directive="${3:-}" # optional
+
+ local directive_str=
+ [ ! -z "$directive" ] && directive_str="# $directive"
+
+ echo $result $KTAP_TESTNO $description $directive_str
+
+ KTAP_TESTNO=$((KTAP_TESTNO+1))
+}
+
+ktap_test_pass() {
+ description="$1"
+
+ result="ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_PASS=$((KTAP_CNT_PASS+1))
+}
+
+ktap_test_skip() {
+ description="$1"
+
+ result="ok"
+ directive="SKIP"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
+}
+
+ktap_test_xfail() {
+ description="$1"
+
+ result="ok"
+ directive="XFAIL"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1))
+}
+
+ktap_test_fail() {
+ description="$1"
+
+ result="not ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1))
+}
+
+ktap_test_result() {
+ description="$1"
+ shift
+
+ if $@; then
+ ktap_test_pass "$description"
+ else
+ ktap_test_fail "$description"
+ fi
+}
+
+ktap_exit_fail_msg() {
+ echo "Bail out! " $@
+ ktap_print_totals
+
+ exit "$KSFT_FAIL"
+}
+
+ktap_finished() {
+ ktap_print_totals
+
+ if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then
+ exit "$KSFT_PASS"
+ else
+ exit "$KSFT_FAIL"
+ fi
+}
+
+ktap_print_totals() {
+ if [ "$KTAP_CNT_SKIP" -gt 0 ]; then
+ echo "# $KTAP_CNT_SKIP skipped test(s) detected. " \
+ "Consider enabling relevant config options to improve coverage."
+ fi
+ echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0"
+}
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index fb4733faff12..51fb65159932 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -11,7 +11,7 @@
# SPDX-License-Identifier: GPL-2.0+
# $(dirname $0)/../kselftest/module.sh "description" module_name
#
-# Example: tools/testing/selftests/lib/printf.sh
+# Example: tools/testing/selftests/lib/bitmap.sh
desc="" # Output prefix.
module="" # Filename (without the .ko).
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index cc9c846585f0..3a62039fa621 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -6,9 +6,11 @@ export skip_rc=4
export timeout_rc=124
export logfile=/dev/stdout
export per_test_logging=
+export RUN_IN_NETNS=
# Defaults for "settings" file fields:
-# "timeout" how many seconds to let each test run before failing.
+# "timeout" how many seconds to let each test run before running
+# over our soft timeout limit.
export kselftest_default_timeout=45
# There isn't a shell-agnostic way to find the path of a sourced file,
@@ -18,6 +20,8 @@ if [ -z "$BASE_DIR" ]; then
exit 1
fi
+TR_CMD=$(command -v tr)
+
# If Perl is unavailable, we must fall back to line-at-a-time prefixing
# with sed instead of unbuffered output.
tap_prefix()
@@ -33,22 +37,54 @@ tap_timeout()
{
# Make sure tests will time out if utility is available.
if [ -x /usr/bin/timeout ] ; then
- /usr/bin/timeout --foreground "$kselftest_timeout" "$1"
+ /usr/bin/timeout --foreground "$kselftest_timeout" \
+ /usr/bin/timeout "$kselftest_timeout" $1
else
- "$1"
+ $1
fi
}
+report_failure()
+{
+ echo "not ok $*"
+ echo "$*" >> "$kselftest_failures_file"
+}
+
run_one()
{
DIR="$1"
TEST="$2"
- NUM="$3"
+ local test_num="$3"
BASENAME_TEST=$(basename $TEST)
# Reset any "settings"-file variables.
export kselftest_timeout="$kselftest_default_timeout"
+
+ # Safe default if tr not available
+ kselftest_cmd_args_ref="KSELFTEST_ARGS"
+
+ # Optional arguments for this command, possibly defined as an
+ # environment variable built using the test executable in all
+ # uppercase and sanitized substituting non acceptable shell
+ # variable name characters with "_" as in:
+ #
+ # KSELFTEST_<UPPERCASE_SANITIZED_TESTNAME>_ARGS="<options>"
+ #
+ # e.g.
+ #
+ # rtctest --> KSELFTEST_RTCTEST_ARGS="/dev/rtc1"
+ #
+ # cpu-on-off-test.sh --> KSELFTEST_CPU_ON_OFF_TEST_SH_ARGS="-a -p 10"
+ #
+ if [ -n "$TR_CMD" ]; then
+ BASENAME_SANITIZED=$(echo "$BASENAME_TEST" | \
+ $TR_CMD -d "[:blank:][:cntrl:]" | \
+ $TR_CMD -c "[:alnum:]_" "_" | \
+ $TR_CMD [:lower:] [:upper:])
+ kselftest_cmd_args_ref="KSELFTEST_${BASENAME_SANITIZED}_ARGS"
+ fi
+
# Load per-test-directory kselftest "settings" file.
settings="$BASE_DIR/$DIR/settings"
if [ -r "$settings" ] ; then
@@ -63,19 +99,42 @@ run_one()
done < "$settings"
fi
+ # Command line timeout overrides the settings file
+ if [ -n "$kselftest_override_timeout" ]; then
+ kselftest_timeout="$kselftest_override_timeout"
+ echo "# overriding timeout to $kselftest_timeout" >> "$logfile"
+ else
+ echo "# timeout set to $kselftest_timeout" >> "$logfile"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
- if [ ! -x "$TEST" ]; then
- echo -n "# Warning: file $TEST is "
- if [ ! -e "$TEST" ]; then
- echo "missing!"
+ if [ ! -e "$TEST" ]; then
+ echo "# Warning: file $TEST is missing!"
+ report_failure "$test_num $TEST_HDR_MSG"
+ else
+ if [ -x /usr/bin/stdbuf ]; then
+ stdbuf="/usr/bin/stdbuf --output=L "
+ fi
+ eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
+ if [ -x "$TEST" ]; then
+ cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
+ elif [ -x "./ksft_runner.sh" ]; then
+ cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST"
else
- echo "not executable, correct this."
+ echo "# Warning: file $TEST is not executable"
+
+ if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
+ then
+ interpreter=$(head -n 1 "$TEST" | cut -c 3-)
+ cmd="$stdbuf $interpreter ./$BASENAME_TEST"
+ else
+ report_failure "$test_num $TEST_HDR_MSG"
+ return
+ fi
fi
- echo "not ok $test_num $TEST_HDR_MSG"
- else
cd `dirname $TEST` > /dev/null
- ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
+ ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) |
tap_prefix >&4) 3>&1) |
(read xs; exit $xs)) 4>>"$logfile" &&
echo "ok $test_num $TEST_HDR_MSG") ||
@@ -84,14 +143,41 @@ run_one()
echo "ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
echo "#"
- echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
+ report_failure "$test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
else
- echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
+ report_failure "$test_num $TEST_HDR_MSG # exit=$rc"
fi)
cd - >/dev/null
fi
}
+in_netns()
+{
+ local name=$1
+ ip netns exec $name bash <<-EOF
+ BASE_DIR=$BASE_DIR
+ source $BASE_DIR/kselftest/runner.sh
+ logfile=$logfile
+ run_one $DIR $TEST $test_num
+ EOF
+}
+
+run_in_netns()
+{
+ local netns=$(mktemp -u ${BASENAME_TEST}-XXXXXX)
+ local tmplog="/tmp/$(mktemp -u ${BASENAME_TEST}-XXXXXX)"
+ ip netns add $netns
+ if [ $? -ne 0 ]; then
+ echo "# Warning: Create namespace failed for $BASENAME_TEST"
+ echo "not ok $test_num selftests: $DIR: $BASENAME_TEST # Create NS failed"
+ fi
+ ip -n $netns link set lo up
+ in_netns $netns &> $tmplog
+ ip netns del $netns &> /dev/null
+ cat $tmplog
+ rm -f $tmplog
+}
+
run_many()
{
echo "TAP version 13"
@@ -106,6 +192,12 @@ run_many()
logfile="/tmp/$BASENAME_TEST"
cat /dev/null > "$logfile"
fi
- run_one "$DIR" "$TEST" "$test_num"
+ if [ -n "$RUN_IN_NETNS" ]; then
+ run_in_netns &
+ else
+ run_one "$DIR" "$TEST" "$test_num"
+ fi
done
+
+ wait
}
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index bbc04646346b..487e49fdf2a6 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -12,9 +12,9 @@ usage()
echo -e "Usage: $0 -[p] <compiler> [test_name]\n"
echo -e "\tkselftest_deps.sh [-p] gcc"
-echo -e "\tkselftest_deps.sh [-p] gcc vm"
+echo -e "\tkselftest_deps.sh [-p] gcc mm"
echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc"
-echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc vm\n"
+echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc mm\n"
echo "- Should be run in selftests directory in the kernel repo."
echo "- Checks if Kselftests can be built/cross-built on a system."
echo "- Parses all test/sub-test Makefile to find library dependencies."
@@ -26,7 +26,7 @@ echo " main Makefile when optional -p is specified."
echo "- Prints pass/fail dependency check for each tests/sub-test."
echo "- Prints pass/fail targets and libraries."
echo "- Default: runs dependency checks on all tests."
-echo "- Optional test name can be specified to check dependencies for it."
+echo "- Optional: test name can be specified to check dependencies for it."
exit 1
}
@@ -46,11 +46,11 @@ fi
print_targets=0
while getopts "p" arg; do
- case $arg in
- p)
+ case $arg in
+ p)
print_targets=1
shift;;
- esac
+ esac
done
if [ $# -eq 0 ]
@@ -90,7 +90,11 @@ pass_libs=()
pass_cnt=0
# Get all TARGETS from selftests Makefile
-targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+
+# Initially, in LDLIBS related lines, the dep checker needs
+# to ignore lines containing the following strings:
+filter="\$(VAR_LDLIBS)\|pkg-config\|PKG_CONFIG\|IOURING_EXTRA_LIBS"
# Single test case
if [ $# -eq 2 ]
@@ -100,6 +104,8 @@ then
l1_test $test
l2_test $test
l3_test $test
+ l4_test $test
+ l5_test $test
print_results $1 $2
exit $?
@@ -113,38 +119,57 @@ fi
# Append space at the end of the list to append more tests.
l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \
- grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+ grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 2: LDLIBS set dynamically.
#
# Level 2
# Some tests have multiple valid LDLIBS lines for individual sub-tests
# that need dependency checks. Find them and append them to the tests
-# e.g: vm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+# e.g: mm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
# Filter out VAR_LDLIBS to discard the following:
# memfd/Makefile:$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
# Append space at the end of the list to append more tests.
l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \
- grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+ grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 3
-# gpio, memfd and others use pkg-config to find mount and fuse libs
+# memfd and others use pkg-config to find mount and fuse libs
# respectively and save it in VAR_LDLIBS. If pkg-config doesn't find
# any, VAR_LDLIBS set to default.
# Use the default value and filter out pkg-config for dependency check.
# e.g:
-# gpio/Makefile
-# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null)
# memfd/Makefile
# VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \
- grep -v "pkg-config" | awk -F: '{print $1}')
+ grep -v "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq)
-#echo $l1_tests
-#echo $l2_1_tests
-#echo $l3_tests
+# Level 4
+# some tests may fall back to default using `|| echo -l<libname>`
+# if pkg-config doesn't find the libs, instead of using VAR_LDLIBS
+# as per level 3 checks.
+# e.g:
+# netfilter/Makefile
+# LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+l4_tests=$(grep -r --include=Makefile "^LDLIBS" | \
+ grep "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq)
+
+# Level 5
+# some tests may use IOURING_EXTRA_LIBS to add extra libs to LDLIBS,
+# which in turn may be defined in a sub-Makefile
+# e.g.:
+# mm/Makefile
+# $(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+l5_tests=$(grep -r --include=Makefile "LDLIBS +=.*\$(IOURING_EXTRA_LIBS)" | \
+ awk -F: '{print $1}' | uniq)
+
+#echo l1_tests $l1_tests
+#echo l2_tests $l2_tests
+#echo l3_tests $l3_tests
+#echo l4_tests $l4_tests
+#echo l5_tests $l5_tests
all_tests
print_results $1 $2
@@ -166,24 +191,32 @@ all_tests()
for test in $l3_tests; do
l3_test $test
done
+
+ for test in $l4_tests; do
+ l4_test $test
+ done
+
+ for test in $l5_tests; do
+ l5_test $test
+ done
}
# Use same parsing used for l1_tests and pick libraries this time.
l1_test()
{
test_libs=$(grep --include=Makefile "^LDLIBS" $test | \
- grep -v "VAR_LDLIBS" | \
+ grep -v "$filter" | \
sed -e 's/\:/ /' | \
sed -e 's/+/ /' | cut -d "=" -f 2)
check_libs $test $test_libs
}
-# Use same parsing used for l2__tests and pick libraries this time.
+# Use same parsing used for l2_tests and pick libraries this time.
l2_test()
{
test_libs=$(grep --include=Makefile ": LDLIBS" $test | \
- grep -v "VAR_LDLIBS" | \
+ grep -v "$filter" | \
sed -e 's/\:/ /' | sed -e 's/+/ /' | \
cut -d "=" -f 2)
@@ -199,6 +232,25 @@ l3_test()
check_libs $test $test_libs
}
+l4_test()
+{
+ test_libs=$(grep --include=Makefile "^VAR_LDLIBS\|^LDLIBS" $test | \
+ grep "\(pkg-config\|PKG_CONFIG\).*|| echo " | \
+ sed -e 's/.*|| echo //' | sed -e 's/)$//')
+
+ check_libs $test $test_libs
+}
+
+l5_test()
+{
+ tests=$(find $(dirname "$test") -type f -name "*.mk")
+ [[ -z "${tests// }" ]] && return
+ test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \
+ cut -d "=" -f 2)
+
+ check_libs $test $test_libs
+}
+
check_libs()
{
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index edce85420d19..baae6b7ded41 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -14,7 +14,7 @@
*
* .. code-block:: c
*
- * #include "../kselftest_harness.h"
+ * #include "kselftest_harness.h"
*
* TEST(standalone_test) {
* do_some_stuff;
@@ -54,7 +54,10 @@
#define _GNU_SOURCE
#endif
#include <asm/types.h>
+#include <ctype.h>
#include <errno.h>
+#include <linux/unistd.h>
+#include <poll.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -79,7 +82,7 @@
#endif
/**
- * TH_LOG(fmt, ...)
+ * TH_LOG()
*
* @fmt: format string
* @...: optional arguments
@@ -93,14 +96,6 @@
* E.g., #define TH_LOG_ENABLED 1
*
* If no definition is provided, logging is enabled by default.
- *
- * If there is no way to print an error message for the process running the
- * test (e.g. not allowed to write to stderr), it is still possible to get the
- * ASSERT_* number for which the test failed. This behavior can be enabled by
- * writing `_metadata->no_print = true;` before the check sequence that is
- * unable to print. When an error occur, instead of printing an error message
- * and calling `abort(3)`, the test process call `_exit(2)` with the assert
- * number as argument, which is then printed by the parent process.
*/
#define TH_LOG(fmt, ...) do { \
if (TH_LOG_ENABLED) \
@@ -113,12 +108,16 @@
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
- * SKIP(statement, fmt, ...)
+ * SKIP()
*
* @statement: statement to run after reporting SKIP
* @fmt: format string
* @...: optional arguments
*
+ * .. code-block:: c
+ *
+ * SKIP(statement, fmt, ...);
+ *
* This forces a "pass" after reporting why something is being skipped
* and runs "statement", which is usually "return" or "goto skip".
*/
@@ -129,14 +128,13 @@
fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
_metadata->results->reason); \
} \
- _metadata->passed = 1; \
- _metadata->skip = 1; \
+ _metadata->exit_code = KSFT_SKIP; \
_metadata->trigger = 0; \
statement; \
} while (0)
/**
- * TEST(test_name) - Defines the test function and creates the registration
+ * TEST() - Defines the test function and creates the registration
* stub
*
* @test_name: test name
@@ -155,7 +153,7 @@
#define TEST(test_name) __TEST_IMPL(test_name, -1)
/**
- * TEST_SIGNAL(test_name, signal)
+ * TEST_SIGNAL()
*
* @test_name: test name
* @signal: signal number
@@ -175,9 +173,9 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
- static inline void wrapper_##test_name( \
+ static void wrapper_##test_name( \
struct __test_metadata *_metadata, \
- struct __fixture_variant_metadata *variant) \
+ struct __fixture_variant_metadata __attribute__((unused)) *variant) \
{ \
test_name(_metadata); \
} \
@@ -195,7 +193,7 @@
struct __test_metadata __attribute__((unused)) *_metadata)
/**
- * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * FIXTURE_DATA() - Wraps the struct name so we have one less
* argument to pass around
*
* @datatype_name: datatype name
@@ -212,7 +210,7 @@
#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
/**
- * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * FIXTURE() - Called once per fixture to setup the data and
* register
*
* @fixture_name: fixture name
@@ -239,8 +237,8 @@
FIXTURE_DATA(fixture_name)
/**
- * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * FIXTURE_SETUP() - Prepares the setup function for the fixture.
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
@@ -258,15 +256,15 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_SETUP(fixture_name) \
- void fixture_name##_setup( \
+ static void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
__attribute__((unused)) *variant)
/**
- * FIXTURE_TEARDOWN(fixture_name)
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * FIXTURE_TEARDOWN()
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
@@ -281,12 +279,40 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_TEARDOWN(fixture_name) \
- void fixture_name##_teardown( \
+ static const bool fixture_name##_teardown_parent; \
+ __FIXTURE_TEARDOWN(fixture_name)
+
+/**
+ * FIXTURE_TEARDOWN_PARENT()
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_TEARDOWN_PARENT(fixture_name) { implementation }
+ *
+ * Same as FIXTURE_TEARDOWN() but run this code in a parent process. This
+ * enables the test process to drop its privileges without impacting the
+ * related FIXTURE_TEARDOWN_PARENT() (e.g. to remove files from a directory
+ * where write access was dropped).
+ *
+ * To make it possible for the parent process to use *self*, share (MAP_SHARED)
+ * the fixture data between all forked processes.
+ */
+#define FIXTURE_TEARDOWN_PARENT(fixture_name) \
+ static const bool fixture_name##_teardown_parent = true; \
+ __FIXTURE_TEARDOWN(fixture_name)
+
+#define __FIXTURE_TEARDOWN(fixture_name) \
+ static void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
/**
- * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * FIXTURE_VARIANT() - Optionally called once per fixture
* to declare fixture variant
*
* @fixture_name: fixture name
@@ -298,14 +324,14 @@
* ...
* };
*
- * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
- * as *variant*. Variants allow the same tests to be run with different
- * arguments.
+ * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and
+ * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with
+ * different arguments.
*/
#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
/**
- * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * FIXTURE_VARIANT_ADD() - Called once per fixture
* variant to setup and register the data
*
* @fixture_name: fixture name
@@ -323,7 +349,7 @@
* variant.
*/
#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
- extern FIXTURE_VARIANT(fixture_name) \
+ extern const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant; \
static struct __fixture_variant_metadata \
_##fixture_name##_##variant_name##_object = \
@@ -335,11 +361,11 @@
__register_fixture_variant(&_##fixture_name##_fixture_object, \
&_##fixture_name##_##variant_name##_object); \
} \
- FIXTURE_VARIANT(fixture_name) \
+ const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant =
/**
- * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * TEST_F() - Emits test registration and helpers for
* fixture-based test cases
*
* @fixture_name: fixture name
@@ -353,9 +379,12 @@
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
*
- * Warning: use of ASSERT_* here will skip TEARDOWN.
+ * The _metadata object is shared (MAP_SHARED) with all the potential forked
+ * processes, which enables them to use EXCEPT_*() and ASSERT_*().
+ *
+ * The *self* object is only shared with the potential forked processes if
+ * FIXTURE_TEARDOWN_PARENT() is used instead of FIXTURE_TEARDOWN().
*/
-/* TODO(wad) register fixtures on dedicated test lists. */
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -370,32 +399,79 @@
struct __test_metadata *_metadata, \
FIXTURE_DATA(fixture_name) *self, \
const FIXTURE_VARIANT(fixture_name) *variant); \
- static inline void wrapper_##fixture_name##_##test_name( \
+ static void wrapper_##fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
- FIXTURE_DATA(fixture_name) self; \
- memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
- fixture_name##_setup(_metadata, &self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (!_metadata->passed) \
- return; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
- fixture_name##_teardown(_metadata, &self); \
+ FIXTURE_DATA(fixture_name) self_private, *self = NULL; \
+ pid_t child = 1; \
+ int status = 0; \
+ /* Makes sure there is only one teardown, even when child forks again. */ \
+ _metadata->no_teardown = mmap(NULL, sizeof(*_metadata->no_teardown), \
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ *_metadata->no_teardown = true; \
+ if (sizeof(*self) > 0) { \
+ if (fixture_name##_teardown_parent) { \
+ self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ } else { \
+ memset(&self_private, 0, sizeof(self_private)); \
+ self = &self_private; \
+ } \
+ } \
+ _metadata->variant = variant->data; \
+ _metadata->self = self; \
+ /* _metadata and potentially self are shared with all forks. */ \
+ child = fork(); \
+ if (child == 0) { \
+ fixture_name##_setup(_metadata, self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ *_metadata->no_teardown = false; \
+ fixture_name##_##test_name(_metadata, self, variant->data); \
+ _metadata->teardown_fn(false, _metadata, self, variant->data); \
+ _exit(0); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
+ } \
+ _metadata->teardown_fn(true, _metadata, self, variant->data); \
+ munmap(_metadata->no_teardown, sizeof(*_metadata->no_teardown)); \
+ _metadata->no_teardown = NULL; \
+ if (self && fixture_name##_teardown_parent) \
+ munmap(self, sizeof(*self)); \
+ if (WIFEXITED(status)) { \
+ if (WEXITSTATUS(status)) \
+ _metadata->exit_code = WEXITSTATUS(status); \
+ } else if (WIFSIGNALED(status)) { \
+ /* Forward signal to __wait_for_test(). */ \
+ kill(getpid(), WTERMSIG(status)); \
+ } \
} \
- static struct __test_metadata \
- _##fixture_name##_##test_name##_object = { \
- .name = #test_name, \
- .fn = &wrapper_##fixture_name##_##test_name, \
- .fixture = &_##fixture_name##_fixture_object, \
- .termsig = signal, \
- .timeout = tmout, \
- }; \
+ static void wrapper_##fixture_name##_##test_name##_teardown( \
+ bool in_parent, struct __test_metadata *_metadata, \
+ void *self, const void *variant) \
+ { \
+ if (fixture_name##_teardown_parent == in_parent && \
+ !__atomic_test_and_set(_metadata->no_teardown, __ATOMIC_RELAXED)) \
+ fixture_name##_teardown(_metadata, self, variant); \
+ } \
+ static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
{ \
- __register_test(&_##fixture_name##_##test_name##_object); \
+ struct __test_metadata *object = mmap(NULL, sizeof(*object), \
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ object->name = #test_name; \
+ object->fn = &wrapper_##fixture_name##_##test_name; \
+ object->fixture = &_##fixture_name##_fixture_object; \
+ object->teardown_fn = &wrapper_##fixture_name##_##test_name##_teardown; \
+ object->termsig = signal; \
+ object->timeout = tmout; \
+ _##fixture_name##_##test_name##_object = object; \
+ __register_test(object); \
} \
static void fixture_name##_##test_name( \
struct __test_metadata __attribute__((unused)) *_metadata, \
@@ -413,12 +489,6 @@
* Use once to append a main() to the test file.
*/
#define TEST_HARNESS_MAIN \
- static void __attribute__((constructor)) \
- __constructor_order_last(void) \
- { \
- if (!__constructor_order) \
- __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
- } \
int main(int argc, char **argv) { \
return test_harness_run(argc, argv); \
}
@@ -667,7 +737,9 @@
#define EXPECT_STRNE(expected, seen) \
__EXPECT_STR(expected, seen, !=, 0)
+#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
* not thread-safe, but it should be fine in most sane test scenarios.
@@ -677,57 +749,51 @@
*/
#define OPTIONAL_HANDLER(_assert) \
for (; _metadata->trigger; _metadata->trigger = \
- __bail(_assert, _metadata->no_print, _metadata->step))
-
-#define __INC_STEP(_metadata) \
- /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
- if (_metadata->passed && _metadata->step < 253) \
- _metadata->step++;
+ __bail(_assert, _metadata))
-#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
+#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
/* Report with actual signedness to avoid weird output. */ \
- switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \
case 0: { \
- unsigned long long __exp_print = (uintptr_t)__exp; \
- unsigned long long __seen_print = (uintptr_t)__seen; \
- __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ uintmax_t __exp_print = (uintmax_t)__exp; \
+ uintmax_t __seen_print = (uintmax_t)__seen; \
+ __TH_LOG("Expected %s (%ju) %s %s (%ju)", \
_expected_str, __exp_print, #_t, \
_seen_str, __seen_print); \
break; \
} \
case 1: { \
- unsigned long long __exp_print = (uintptr_t)__exp; \
- long long __seen_print = (intptr_t)__seen; \
- __TH_LOG("Expected %s (%llu) %s %s (%lld)", \
+ uintmax_t __exp_print = (uintmax_t)__exp; \
+ intmax_t __seen_print = (intmax_t)__seen; \
+ __TH_LOG("Expected %s (%ju) %s %s (%jd)", \
_expected_str, __exp_print, #_t, \
_seen_str, __seen_print); \
break; \
} \
case 2: { \
- long long __exp_print = (intptr_t)__exp; \
- unsigned long long __seen_print = (uintptr_t)__seen; \
- __TH_LOG("Expected %s (%lld) %s %s (%llu)", \
+ intmax_t __exp_print = (intmax_t)__exp; \
+ uintmax_t __seen_print = (uintmax_t)__seen; \
+ __TH_LOG("Expected %s (%jd) %s %s (%ju)", \
_expected_str, __exp_print, #_t, \
_seen_str, __seen_print); \
break; \
} \
case 3: { \
- long long __exp_print = (intptr_t)__exp; \
- long long __seen_print = (intptr_t)__seen; \
- __TH_LOG("Expected %s (%lld) %s %s (%lld)", \
+ intmax_t __exp_print = (intmax_t)__exp; \
+ intmax_t __seen_print = (intmax_t)__seen; \
+ __TH_LOG("Expected %s (%jd) %s %s (%jd)", \
_expected_str, __exp_print, #_t, \
_seen_str, __seen_print); \
break; \
} \
} \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
} \
@@ -736,10 +802,9 @@
#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
const char *__exp = (_expected); \
const char *__seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(strcmp(__exp, __seen) _t 0)) { \
__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
_metadata->trigger = 1; \
} \
} while (0); OPTIONAL_HANDLER(_assert)
@@ -754,7 +819,7 @@
item->prev = item; \
return; \
} \
- if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+ if (__constructor_order_forward) { \
item->next = NULL; \
item->prev = head->prev; \
item->prev->next = item; \
@@ -785,11 +850,40 @@ struct __fixture_metadata {
.prev = &_fixture_global,
};
-static struct __fixture_metadata *__fixture_list = &_fixture_global;
-static int __constructor_order;
+struct __test_xfail {
+ struct __fixture_metadata *fixture;
+ struct __fixture_variant_metadata *variant;
+ struct __test_metadata *test;
+ struct __test_xfail *prev, *next;
+};
-#define _CONSTRUCTOR_ORDER_FORWARD 1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
+/**
+ * XFAIL_ADD() - mark variant + test case combination as expected to fail
+ * @fixture_name: name of the fixture
+ * @variant_name: name of the variant
+ * @test_name: name of the test case
+ *
+ * Mark a combination of variant + test case for a given fixture as expected
+ * to fail. Tests marked this way will report XPASS / XFAIL return codes,
+ * instead of PASS / FAIL,and use respective counters.
+ */
+#define XFAIL_ADD(fixture_name, variant_name, test_name) \
+ static struct __test_xfail \
+ _##fixture_name##_##variant_name##_##test_name##_xfail = \
+ { \
+ .fixture = &_##fixture_name##_fixture_object, \
+ .variant = &_##fixture_name##_##variant_name##_object, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
+ { \
+ _##fixture_name##_##variant_name##_##test_name##_xfail.test = \
+ _##fixture_name##_##test_name##_object; \
+ __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
+ }
+
+static struct __fixture_metadata *__fixture_list = &_fixture_global;
+static bool __constructor_order_forward;
static inline void __register_fixture(struct __fixture_metadata *f)
{
@@ -799,6 +893,7 @@ static inline void __register_fixture(struct __fixture_metadata *f)
struct __fixture_variant_metadata {
const char *name;
const void *data;
+ struct __test_xfail *xfails;
struct __fixture_variant_metadata *prev, *next;
};
@@ -816,24 +911,32 @@ struct __test_metadata {
struct __fixture_variant_metadata *);
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
+ void (*teardown_fn)(bool in_parent, struct __test_metadata *_metadata,
+ void *self, const void *variant);
int termsig;
- int passed;
- int skip; /* did SKIP get used? */
+ int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
- bool timed_out; /* did this test timeout instead of exiting? */
- __u8 step;
- bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ bool aborted; /* stopped test due to failed ASSERT */
+ bool *no_teardown; /* fixture needs teardown */
+ void *self;
+ const void *variant;
struct __test_results *results;
struct __test_metadata *prev, *next;
};
+static inline bool __test_passed(struct __test_metadata *metadata)
+{
+ return metadata->exit_code != KSFT_FAIL &&
+ metadata->exit_code <= KSFT_SKIP;
+}
+
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
* https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
* However, it seems not all toolchains do this correctly, so use
- * __constructor_order to detect which direction is called first
+ * __constructor_order_foward to detect which direction is called first
* and adjust list building logic to get things running in the right
* direction.
*/
@@ -842,75 +945,79 @@ static inline void __register_test(struct __test_metadata *t)
__LIST_APPEND(t->fixture->tests, t);
}
-static inline int __bail(int for_realz, bool no_print, __u8 step)
+static inline void __register_xfail(struct __test_xfail *xf)
+{
+ __LIST_APPEND(xf->variant->xfails, xf);
+}
+
+static inline int __bail(int for_realz, struct __test_metadata *t)
{
+ /* if this is ASSERT, return immediately. */
if (for_realz) {
- if (no_print)
- _exit(step);
+ if (t->teardown_fn)
+ t->teardown_fn(false, t, t->self, t->variant);
abort();
}
+ /* otherwise, end the for loop and continue. */
return 0;
}
-struct __test_metadata *__active_test;
-static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
+static void __wait_for_test(struct __test_metadata *t)
{
- struct __test_metadata *t = __active_test;
-
- /* Sanity check handler execution environment. */
- if (!t) {
+ /*
+ * Sets status so that WIFEXITED(status) returns true and
+ * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
+ * should never be evaluated because of the waitpid(2) check and
+ * timeout handling.
+ */
+ int status = KSFT_FAIL << 8;
+ struct pollfd poll_child;
+ int ret, child, childfd;
+ bool timed_out = false;
+
+ childfd = syscall(__NR_pidfd_open, t->pid, 0);
+ if (childfd == -1) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# no active test in SIGALRM handler!?\n");
- abort();
- }
- if (sig != SIGALRM || sig != info->si_signo) {
- fprintf(TH_LOG_STREAM,
- "# %s: SIGALRM handler caught signal %d!?\n",
- t->name, sig != SIGALRM ? sig : info->si_signo);
- abort();
+ "# %s: unable to open pidfd\n",
+ t->name);
+ return;
}
- t->timed_out = true;
- kill(t->pid, SIGKILL);
-}
-
-void __wait_for_test(struct __test_metadata *t)
-{
- struct sigaction action = {
- .sa_sigaction = __timeout_handler,
- .sa_flags = SA_SIGINFO,
- };
- struct sigaction saved_action;
- int status;
-
- if (sigaction(SIGALRM, &action, &saved_action)) {
- t->passed = 0;
+ poll_child.fd = childfd;
+ poll_child.events = POLLIN;
+ ret = poll(&poll_child, 1, t->timeout * 1000);
+ if (ret == -1) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to install SIGALRM handler\n",
+ "# %s: unable to wait on child pidfd\n",
t->name);
return;
+ } else if (ret == 0) {
+ timed_out = true;
+ /* signal process group */
+ kill(-(t->pid), SIGKILL);
}
- __active_test = t;
- t->timed_out = false;
- alarm(t->timeout);
- waitpid(t->pid, &status, 0);
- alarm(0);
- if (sigaction(SIGALRM, &saved_action, NULL)) {
- t->passed = 0;
+ child = waitpid(t->pid, &status, WNOHANG);
+ if (child == -1 && errno != EINTR) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to uninstall SIGALRM handler\n",
- t->name);
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
return;
}
- __active_test = NULL;
- if (t->timed_out) {
- t->passed = 0;
+ if (timed_out) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- if (t->termsig != -1) {
- t->passed = 0;
+ if (WEXITSTATUS(status) == KSFT_SKIP ||
+ WEXITSTATUS(status) == KSFT_XPASS ||
+ WEXITSTATUS(status) == KSFT_XFAIL) {
+ t->exit_code = WEXITSTATUS(status);
+ } else if (t->termsig != -1) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
@@ -918,31 +1025,25 @@ void __wait_for_test(struct __test_metadata *t)
} else {
switch (WEXITSTATUS(status)) {
/* Success */
- case 0:
- t->passed = 1;
- break;
- /* SKIP */
- case 255:
- t->passed = 1;
- t->skip = 1;
+ case KSFT_PASS:
+ t->exit_code = KSFT_PASS;
break;
- /* Other failure, assume step report. */
+ /* Failure */
default:
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Test failed at step #%d\n",
- t->name,
- WEXITSTATUS(status));
+ "# %s: Test failed\n",
+ t->name);
}
}
} else if (WIFSIGNALED(status)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
- t->passed = 1;
+ t->exit_code = KSFT_PASS;
} else {
fprintf(TH_LOG_STREAM,
"# %s: Test terminated unexpectedly by signal %d\n",
@@ -950,6 +1051,7 @@ void __wait_for_test(struct __test_metadata *t)
WTERMSIG(status));
}
} else {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test ended in some other way [%u]\n",
t->name,
@@ -957,70 +1059,216 @@ void __wait_for_test(struct __test_metadata *t)
}
}
-void __run_test(struct __fixture_metadata *f,
- struct __fixture_variant_metadata *variant,
- struct __test_metadata *t)
+static void test_harness_list_tests(void)
+{
+ struct __fixture_variant_metadata *v;
+ struct __fixture_metadata *f;
+ struct __test_metadata *t;
+
+ for (f = __fixture_list; f; f = f->next) {
+ v = f->variant;
+ t = f->tests;
+
+ if (f == __fixture_list)
+ fprintf(stderr, "%-20s %-25s %s\n",
+ "# FIXTURE", "VARIANT", "TEST");
+ else
+ fprintf(stderr, "--------------------------------------------------------------------------------\n");
+
+ do {
+ fprintf(stderr, "%-20s %-25s %s\n",
+ t == f->tests ? f->name : "",
+ v ? v->name : "",
+ t ? t->name : "");
+
+ v = v ? v->next : NULL;
+ t = t ? t->next : NULL;
+ } while (v || t);
+ }
+}
+
+static int test_harness_argv_check(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) {
+ switch (opt) {
+ case 'f':
+ case 'F':
+ case 'v':
+ case 'V':
+ case 't':
+ case 'T':
+ case 'r':
+ break;
+ case 'l':
+ test_harness_list_tests();
+ return KSFT_SKIP;
+ case 'd':
+ ksft_debug_enabled = true;
+ break;
+ case 'h':
+ default:
+ fprintf(stderr,
+ "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n"
+ "\t-h print help\n"
+ "\t-l list all tests\n"
+ "\t-d enable debug prints\n"
+ "\n"
+ "\t-t name include test\n"
+ "\t-T name exclude test\n"
+ "\t-v name include variant\n"
+ "\t-V name exclude variant\n"
+ "\t-f name include fixture\n"
+ "\t-F name exclude fixture\n"
+ "\t-r name run specified test\n"
+ "\n"
+ "Test filter options can be specified "
+ "multiple times. The filtering stops\n"
+ "at the first match. For example to "
+ "include all tests from variant 'bla'\n"
+ "but not test 'foo' specify '-T foo -v bla'.\n"
+ "", argv[0]);
+ return opt == 'h' ? KSFT_SKIP : KSFT_FAIL;
+ }
+ }
+
+ return KSFT_PASS;
+}
+
+static bool test_enabled(int argc, char **argv,
+ struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *v,
+ struct __test_metadata *t)
{
+ unsigned int flen = 0, vlen = 0, tlen = 0;
+ bool has_positive = false;
+ int opt;
+
+ optind = 1;
+ while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) {
+ if (opt != 'd')
+ has_positive |= islower(opt);
+
+ switch (tolower(opt)) {
+ case 't':
+ if (!strcmp(t->name, optarg))
+ return islower(opt);
+ break;
+ case 'f':
+ if (!strcmp(f->name, optarg))
+ return islower(opt);
+ break;
+ case 'v':
+ if (!strcmp(v->name, optarg))
+ return islower(opt);
+ break;
+ case 'r':
+ if (!tlen) {
+ flen = strlen(f->name);
+ vlen = strlen(v->name);
+ tlen = strlen(t->name);
+ }
+ if (strlen(optarg) == flen + 1 + vlen + !!vlen + tlen &&
+ !strncmp(f->name, &optarg[0], flen) &&
+ !strncmp(v->name, &optarg[flen + 1], vlen) &&
+ !strncmp(t->name, &optarg[flen + 1 + vlen + !!vlen], tlen))
+ return true;
+ break;
+ }
+ }
+
+ /*
+ * If there are no positive tests then we assume user just wants
+ * exclusions and everything else is a pass.
+ */
+ return !has_positive;
+}
+
+static void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
+{
+ struct __test_xfail *xfail;
+ char test_name[1024];
+ const char *diagnostic;
+ int child;
+
/* reset test struct */
- t->passed = 1;
- t->skip = 0;
+ t->exit_code = KSFT_PASS;
t->trigger = 0;
- t->step = 0;
- t->no_print = 0;
+ t->aborted = false;
+ t->no_teardown = NULL;
memset(t->results->reason, 0, sizeof(t->results->reason));
- ksft_print_msg(" RUN %s%s%s.%s ...\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ ksft_print_msg(" RUN %s ...\n", test_name);
/* Make sure output buffers are flushed before fork */
fflush(stdout);
fflush(stderr);
- t->pid = fork();
- if (t->pid < 0) {
+ child = fork();
+ if (child < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
- t->passed = 0;
- } else if (t->pid == 0) {
+ t->exit_code = KSFT_FAIL;
+ } else if (child == 0) {
+ setpgrp();
t->fn(t, variant);
- if (t->skip)
- _exit(255);
- /* Pass is exit 0 */
- if (t->passed)
- _exit(0);
- /* Something else happened, report the step. */
- _exit(t->step);
+ _exit(t->exit_code);
} else {
+ t->pid = child;
__wait_for_test(t);
}
- ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
-
- if (t->skip)
- ksft_test_result_skip("%s\n", t->results->reason[0] ?
- t->results->reason : "unknown");
+ ksft_print_msg(" %4s %s\n",
+ __test_passed(t) ? "OK" : "FAIL", test_name);
+
+ /* Check if we're expecting this test to fail */
+ for (xfail = variant->xfails; xfail; xfail = xfail->next)
+ if (xfail->test == t)
+ break;
+ if (xfail)
+ t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL;
+
+ if (t->results->reason[0])
+ diagnostic = t->results->reason;
+ else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL)
+ diagnostic = NULL;
else
- ksft_test_result(t->passed, "%s%s%s.%s\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ diagnostic = "unknown";
+
+ ksft_test_result_code(t->exit_code, test_name,
+ diagnostic ? "%s" : NULL, diagnostic);
}
-static int test_harness_run(int __attribute__((unused)) argc,
- char __attribute__((unused)) **argv)
+static int test_harness_run(int argc, char **argv)
{
struct __fixture_variant_metadata no_variant = { .name = "", };
struct __fixture_variant_metadata *v;
struct __fixture_metadata *f;
struct __test_results *results;
struct __test_metadata *t;
- int ret = 0;
+ int ret;
unsigned int case_count = 0, test_count = 0;
unsigned int count = 0;
unsigned int pass_count = 0;
+ ret = test_harness_argv_check(argc, argv);
+ if (ret != KSFT_PASS)
+ return ret;
+
for (f = __fixture_list; f; f = f->next) {
for (v = f->variant ?: &no_variant; v; v = v->next) {
- case_count++;
+ unsigned int old_tests = test_count;
+
for (t = f->tests; t; t = t->next)
- test_count++;
+ if (test_enabled(argc, argv, f, v, t))
+ test_count++;
+
+ if (old_tests != test_count)
+ case_count++;
}
}
@@ -1034,11 +1282,13 @@ static int test_harness_run(int __attribute__((unused)) argc,
for (f = __fixture_list; f; f = f->next) {
for (v = f->variant ?: &no_variant; v; v = v->next) {
for (t = f->tests; t; t = t->next) {
+ if (!test_enabled(argc, argv, f, v, t))
+ continue;
count++;
t->results = results;
__run_test(f, v, t);
t->results = NULL;
- if (t->passed)
+ if (__test_passed(t))
pass_count++;
else
ret = 1;
@@ -1057,8 +1307,7 @@ static int test_harness_run(int __attribute__((unused)) argc,
static void __attribute__((constructor)) __constructor_order_first(void)
{
- if (!__constructor_order)
- __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+ __constructor_order_forward = true;
}
#endif /* __KSELFTEST_HARNESS_H */
diff --git a/tools/testing/selftests/kselftest_harness/.gitignore b/tools/testing/selftests/kselftest_harness/.gitignore
new file mode 100644
index 000000000000..e4e476a333c9
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/.gitignore
@@ -0,0 +1,2 @@
+/harness-selftest
+/harness-selftest.seen
diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile
new file mode 100644
index 000000000000..d2369c01701a
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS_EXTENDED := harness-selftest
+TEST_PROGS := harness-selftest.sh
+TEST_FILES := harness-selftest.expected
+EXTRA_CLEAN := harness-selftest.seen
+
+include ../lib.mk
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c
new file mode 100644
index 000000000000..7820bb5d0e6d
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+
+#include <sys/resource.h>
+#include <sys/prctl.h>
+
+/* Avoid any inconsistencies */
+#define TH_LOG_STREAM stdout
+
+#include "kselftest_harness.h"
+
+static void test_helper(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(0, 0);
+}
+
+TEST(standalone_pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ EXPECT_EQ(0, 0);
+ test_helper(_metadata);
+ TH_LOG("after");
+}
+
+TEST(standalone_fail) {
+ TH_LOG("before");
+ EXPECT_EQ(0, 0);
+ EXPECT_EQ(0, 1);
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+}
+
+TEST_SIGNAL(signal_pass, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+TEST_SIGNAL(signal_fail, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+FIXTURE(fixture) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN(fixture) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ test_helper(_metadata);
+ standalone_pass(_metadata);
+ TH_LOG("after");
+}
+
+TEST_F(fixture, fail) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ fixture_pass(_metadata, self, variant);
+ TH_LOG("after");
+}
+
+TEST_F_TIMEOUT(fixture, timeout, 1) {
+ TH_LOG("before");
+ sleep(2);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_parent) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_parent) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN_PARENT(fixture_parent) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_parent, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_setup_failure) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_setup_failure) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+ ASSERT_EQ(0, 1);
+}
+
+FIXTURE_TEARDOWN(fixture_setup_failure) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_setup_failure, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+int main(int argc, char **argv)
+{
+ /*
+ * The harness uses abort() to signal assertion failures, which triggers coredumps.
+ * This may be useful to debug real failures but not for this selftest, disable them.
+ */
+ struct rlimit rlimit = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
+ setrlimit(RLIMIT_CORE, &rlimit);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.expected b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
new file mode 100644
index 000000000000..97e1418c1c7e
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
@@ -0,0 +1,64 @@
+TAP version 13
+1..9
+# Starting 9 tests from 4 test cases.
+# RUN global.standalone_pass ...
+# harness-selftest.c:19:standalone_pass:before
+# harness-selftest.c:23:standalone_pass:after
+# OK global.standalone_pass
+ok 1 global.standalone_pass
+# RUN global.standalone_fail ...
+# harness-selftest.c:27:standalone_fail:before
+# harness-selftest.c:29:standalone_fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:30:standalone_fail:Expected 0 (0) == 1 (1)
+# standalone_fail: Test terminated by assertion
+# FAIL global.standalone_fail
+not ok 2 global.standalone_fail
+# RUN global.signal_pass ...
+# harness-selftest.c:35:signal_pass:before
+# harness-selftest.c:37:signal_pass:after
+# OK global.signal_pass
+ok 3 global.signal_pass
+# RUN global.signal_fail ...
+# harness-selftest.c:42:signal_fail:before
+# harness-selftest.c:43:signal_fail:Expected 0 (0) == 1 (1)
+# signal_fail: Test terminated by assertion
+# FAIL global.signal_fail
+not ok 4 global.signal_fail
+# RUN fixture.pass ...
+# harness-selftest.c:53:pass:setup
+# harness-selftest.c:62:pass:before
+# harness-selftest.c:19:pass:before
+# harness-selftest.c:23:pass:after
+# harness-selftest.c:66:pass:after
+# harness-selftest.c:58:pass:teardown same-process=1
+# OK fixture.pass
+ok 5 fixture.pass
+# RUN fixture.fail ...
+# harness-selftest.c:53:fail:setup
+# harness-selftest.c:70:fail:before
+# harness-selftest.c:71:fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:58:fail:teardown same-process=1
+# fail: Test terminated by assertion
+# FAIL fixture.fail
+not ok 6 fixture.fail
+# RUN fixture.timeout ...
+# harness-selftest.c:53:timeout:setup
+# harness-selftest.c:77:timeout:before
+# timeout: Test terminated by timeout
+# FAIL fixture.timeout
+not ok 7 fixture.timeout
+# RUN fixture_parent.pass ...
+# harness-selftest.c:87:pass:setup
+# harness-selftest.c:96:pass:before
+# harness-selftest.c:98:pass:after
+# harness-selftest.c:92:pass:teardown same-process=0
+# OK fixture_parent.pass
+ok 8 fixture_parent.pass
+# RUN fixture_setup_failure.pass ...
+# harness-selftest.c:106:pass:setup
+# harness-selftest.c:108:pass:Expected 0 (0) == 1 (1)
+# pass: Test terminated by assertion
+# FAIL fixture_setup_failure.pass
+not ok 9 fixture_setup_failure.pass
+# FAILED: 4 / 9 tests passed.
+# Totals: pass:4 fail:5 xfail:0 xpass:0 skip:0 error:0
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.sh b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
new file mode 100755
index 000000000000..fe72d16370fe
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Selftest for kselftest_harness.h
+#
+
+set -e
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+"$DIR"/harness-selftest > harness-selftest.seen || true
+
+diff -u "$DIR"/harness-selftest.expected harness-selftest.seen
diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h
index e8eafaf0941a..63cd7487373f 100644
--- a/tools/testing/selftests/kselftest_module.h
+++ b/tools/testing/selftests/kselftest_module.h
@@ -3,6 +3,7 @@
#define __KSELFTEST_MODULE_H
#include <linux/module.h>
+#include <linux/panic.h>
/*
* Test framework for writing test modules to be loaded by kselftest.
@@ -11,7 +12,8 @@
#define KSTM_MODULE_GLOBALS() \
static unsigned int total_tests __initdata; \
-static unsigned int failed_tests __initdata
+static unsigned int failed_tests __initdata; \
+static unsigned int skipped_tests __initdata
#define KSTM_CHECK_ZERO(x) do { \
total_tests++; \
@@ -21,11 +23,16 @@ static unsigned int failed_tests __initdata
} \
} while (0)
-static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests)
+static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests,
+ unsigned int skipped_tests)
{
- if (failed_tests == 0)
- pr_info("all %u tests passed\n", total_tests);
- else
+ if (failed_tests == 0) {
+ if (skipped_tests) {
+ pr_info("skipped %u tests\n", skipped_tests);
+ pr_info("remaining %u tests passed\n", total_tests);
+ } else
+ pr_info("all %u tests passed\n", total_tests);
+ } else
pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
return failed_tests ? -EINVAL : 0;
@@ -35,8 +42,9 @@ static inline int kstm_report(unsigned int total_tests, unsigned int failed_test
static int __init __module##_init(void) \
{ \
pr_info("loaded.\n"); \
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); \
selftest(); \
- return kstm_report(total_tests, failed_tests); \
+ return kstm_report(total_tests, failed_tests, skipped_tests); \
} \
static void __exit __module##_exit(void) \
{ \
@@ -45,4 +53,6 @@ static void __exit __module##_exit(void) \
module_init(__module##_init); \
module_exit(__module##_exit)
+MODULE_INFO(test, "Y");
+
#endif /* __KSELFTEST_MODULE_H */
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3a84394829ea..1d41a046a7bf 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,39 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
-/aarch64/get-reg-list
-/aarch64/get-reg-list-sve
-/s390x/memop
-/s390x/resets
-/s390x/sync_regs_test
-/x86_64/cr4_cpuid_sync_test
-/x86_64/debug_regs
-/x86_64/evmcs_test
-/x86_64/get_cpuid_test
-/x86_64/kvm_pv_test
-/x86_64/hyperv_cpuid
-/x86_64/mmio_warning_test
-/x86_64/platform_info_test
-/x86_64/set_sregs_test
-/x86_64/smm_test
-/x86_64/state_test
-/x86_64/svm_vmcall_test
-/x86_64/sync_regs_test
-/x86_64/tsc_msrs_test
-/x86_64/userspace_msr_exit_test
-/x86_64/vmx_apic_access_test
-/x86_64/vmx_close_while_nested_test
-/x86_64/vmx_dirty_log_test
-/x86_64/vmx_preemption_timer_test
-/x86_64/vmx_set_nested_state_test
-/x86_64/vmx_tsc_adjust_test
-/x86_64/xapic_ipi_test
-/x86_64/xen_shinfo_test
-/x86_64/xen_vmcall_test
-/x86_64/xss_msr_test
-/x86_64/vmx_pmu_msrs_test
-/demand_paging_test
-/dirty_log_test
-/dirty_log_perf_test
-/kvm_create_max_vcpus
-/memslot_modification_stress_test
-/set_memory_region_test
-/steal_time
+*
+!/**/
+!*.c
+!*.h
+!*.S
+!*.sh
+!.gitignore
+!config
+!settings
+!Makefile
+!Makefile.kvm \ No newline at end of file
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 8c8eda429576..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,151 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-only
-include ../../../../scripts/Kbuild.include
-
-all:
-
top_srcdir = ../../../..
-KSFT_KHDR_INSTALL := 1
-
-# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
-# directories and targets in this Makefile. "uname -m" doesn't map to
-# arch specific sub-directory names.
-#
-# UNAME_M variable to used to run the compiles pointing to the right arch
-# directories and build the right targets for these supported architectures.
-#
-# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
-# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
-#
-# x86_64 targets are named to include x86_64 as a suffix and directories
-# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
-# same convention. "uname -m" doesn't result in the correct mapping for
-# s390x and aarch64.
-#
-# No change necessary for x86_64
-UNAME_M := $(shell uname -m)
-
-# Set UNAME_M for arm64 compile/install to work
-ifeq ($(ARCH),arm64)
- UNAME_M := aarch64
-endif
-# Set UNAME_M s390x compile/install to work
-ifeq ($(ARCH),s390)
- UNAME_M := s390x
-endif
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
-
-TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
-TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += set_memory_region_test
-
-TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
-LIBKVM += $(LIBKVM_$(UNAME_M))
-
-INSTALL_HDR_PATH = $(top_srcdir)/usr
-LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
+# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+ override ARCH := x86
+endif
+include Makefile.kvm
else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+# Empty targets for unsupported architectures
+all:
+clean:
endif
-CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
- -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
- -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -I$(<D) -Iinclude/$(UNAME_M) -I..
-
-no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
-
-# On s390, build the testcases KVM-enabled
-pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
-
-
-LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
-
-# After inclusion, $(OUTPUT) is defined and
-# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
-include ../lib.mk
-
-STATIC_LIBS := $(OUTPUT)/libkvm.a
-LIBKVM_C := $(filter %.c,$(LIBKVM))
-LIBKVM_S := $(filter %.S,$(LIBKVM))
-LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
-LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
-
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
-$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
- $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
- $(AR) crs $@ $^
-
-x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
-
-cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
-cscope:
- $(RM) cscope.*
- (find $(include_paths) -name '*.h' \
- -exec realpath --relative-base=$(PWD) {} \;; \
- find . -name '*.c' \
- -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
- cscope -b
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
new file mode 100644
index 000000000000..ba5c2b643efa
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -0,0 +1,356 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../../build/Build.include
+
+all:
+
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
+LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+LIBKVM += lib/ucall_common.c
+LIBKVM += lib/userfaultfd_util.c
+
+LIBKVM_STRING += lib/string_override.c
+
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
+
+LIBKVM_riscv += lib/riscv/handlers.S
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
+
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
+# Non-compiled test targets
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
+# Compiled test targets
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
+
+# Compiled outputs used by test targets
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hello_el2
+TEST_GEN_PROGS_arm64 += arm64/host_sve
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += guest_memfd_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += steal_time
+
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
+TEST_GEN_PROGS_s390 += rseq_test
+
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += access_tracking_perf_test
+TEST_GEN_PROGS_riscv += arch_timer
+TEST_GEN_PROGS_riscv += coalesced_io_test
+TEST_GEN_PROGS_riscv += dirty_log_perf_test
+TEST_GEN_PROGS_riscv += get-reg-list
+TEST_GEN_PROGS_riscv += memslot_modification_stress_test
+TEST_GEN_PROGS_riscv += memslot_perf_test
+TEST_GEN_PROGS_riscv += mmu_stress_test
+TEST_GEN_PROGS_riscv += rseq_test
+TEST_GEN_PROGS_riscv += steal_time
+
+TEST_GEN_PROGS_loongarch = arch_timer
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+
+SPLIT_TESTS += arch_timer
+SPLIT_TESTS += get-reg-list
+
+TEST_PROGS += $(TEST_PROGS_$(ARCH))
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH))
+TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH))
+LIBKVM += $(LIBKVM_$(ARCH))
+
+OVERRIDE_TARGETS = 1
+
+# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
+# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
+# which causes the environment variable to override the makefile).
+include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
+ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -fno-builtin-memcmp -fno-builtin-memcpy \
+ -fno-builtin-memset -fno-builtin-strnlen \
+ -fno-stack-protector -fno-PIE -fno-strict-aliasing \
+ -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
+ -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \
+ -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+ifeq ($(ARCH),s390)
+ CFLAGS += -march=z10
+endif
+ifeq ($(ARCH),x86)
+ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
+ CFLAGS += -march=x86-64-v2
+endif
+endif
+ifeq ($(ARCH),arm64)
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
+
+ifneq ($(abs_objdir),)
+arm64_hdr_outdir := $(abs_objdir)/tools/
+else
+arm64_hdr_outdir := $(tools_dir)/
+endif
+
+GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+ $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+endif
+
+no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
+ $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
+ $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+LDLIBS += -ldl
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
+
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
+
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
+-include $(TEST_DEP_FILES)
+
+$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(GEN_HDRS) \
+ $(LIBKVM_OBJS) \
+ $(SPLIT_TEST_GEN_OBJ) \
+ $(TEST_DEP_FILES) \
+ $(TEST_GEN_OBJ) \
+ cscope.*
+
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
+$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
+$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+ $(RM) cscope.*
+ (find $(include_paths) -name '*.h' \
+ -exec realpath --relative-base=$(PWD) {} \;; \
+ find . -name '*.c' \
+ -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+ cscope -b
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644
index efba76682b4b..000000000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
new file mode 100644
index 000000000000..b058f27b2141
--- /dev/null
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
+ *
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
+ * updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ * immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ * which is drained to LRU lists some time in the future. There is no
+ * userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true (for example
+ * in nesting, where TLB size is unlimited) this test will print a warning
+ * rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration;
+
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+ /* Run the vCPU to access all its memory. */
+ ITERATION_ACCESS_MEMORY,
+ /* Mark the vCPU's memory idle in page_idle. */
+ ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
+struct test_params {
+ /* The backing source for the region of memory. */
+ enum vm_mem_backing_src_type backing_src;
+
+ /* The amount of memory to allocate for each vCPU. */
+ uint64_t vcpu_memory_bytes;
+
+ /* The number of vCPUs to create in the VM. */
+ int nr_vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+ uint64_t value;
+ off_t offset = index * sizeof(value);
+
+ TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+ "pread from %s offset 0x%" PRIx64 " failed!",
+ filename, offset);
+
+ return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+ uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+ uint64_t entry;
+ uint64_t pfn;
+
+ entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+ if (!(entry & PAGEMAP_PRESENT))
+ return 0;
+
+ pfn = entry & PAGEMAP_PFN_MASK;
+ __TEST_REQUIRE(pfn, "Looking up PFNs requires CAP_SYS_ADMIN");
+
+ return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+ return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = 1ULL << (pfn % 64);
+
+ TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+ "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
+{
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ uint64_t base_gva = vcpu_args->gva;
+ uint64_t pages = vcpu_args->pages;
+ uint64_t page;
+ uint64_t still_idle = 0;
+ uint64_t no_pfn = 0;
+ int page_idle_fd;
+ int pagemap_fd;
+
+ /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+ if (overlap_memory_access && vcpu_idx)
+ return;
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+ for (page = 0; page < pages; page++) {
+ uint64_t gva = base_gva + page * memstress_args.guest_page_size;
+ uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+ if (!pfn) {
+ no_pfn++;
+ continue;
+ }
+
+ if (is_page_idle(page_idle_fd, pfn)) {
+ still_idle++;
+ continue;
+ }
+
+ mark_page_idle(page_idle_fd, pfn);
+ }
+
+ /*
+ * Assumption: Less than 1% of pages are going to be swapped out from
+ * under us during this test.
+ */
+ TEST_ASSERT(no_pfn < pages / 100,
+ "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+ vcpu_idx, no_pfn, pages);
+
+ /*
+ * Check that at least 90% of memory has been marked idle (the rest
+ * might not be marked idle because the pages have not yet made it to an
+ * LRU list or the translations are still cached in the TLB). 90% is
+ * arbitrary; high enough that we ensure most memory access went through
+ * access tracking but low enough as to not make the test too brittle
+ * over time and across architectures.
+ */
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
+
+ close(page_idle_fd);
+ close(pagemap_fd);
+}
+
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
+static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
+{
+ struct ucall uc;
+ uint64_t actual_ucall = get_ucall(vcpu, &uc);
+
+ TEST_ASSERT(expected_ucall == actual_ucall,
+ "Guest exited unexpectedly (expected ucall %" PRIu64
+ ", got %" PRIu64 ")",
+ expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+ int last_iteration = *current_iteration;
+
+ do {
+ if (READ_ONCE(memstress_args.stop_vcpus))
+ return false;
+
+ *current_iteration = READ_ONCE(iteration);
+ } while (last_iteration == *current_iteration);
+
+ return true;
+}
+
+static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ struct kvm_vm *vm = memstress_args.vm;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ int current_iteration = 0;
+
+ while (spin_wait_for_next_iteration(&current_iteration)) {
+ switch (READ_ONCE(iteration_work)) {
+ case ITERATION_ACCESS_MEMORY:
+ vcpu_run(vcpu);
+ assert_ucall(vcpu, UCALL_SYNC);
+ break;
+ case ITERATION_MARK_IDLE:
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
+ break;
+ }
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+ }
+}
+
+static void spin_wait_for_vcpu(int vcpu_idx, int target_iteration)
+{
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_idx]) !=
+ target_iteration) {
+ continue;
+ }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+ ACCESS_READ,
+ ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *description)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ int next_iteration, i;
+
+ /* Kick off the vCPUs by incrementing iteration. */
+ next_iteration = ++iteration;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ /* Wait for all vCPUs to finish the iteration. */
+ for (i = 0; i < nr_vcpus; i++)
+ spin_wait_for_vcpu(i, next_iteration);
+
+ ts_elapsed = timespec_elapsed(ts_start);
+ pr_info("%-30s: %ld.%09lds\n",
+ description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int nr_vcpus,
+ enum access_type access, const char *description)
+{
+ memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
+ iteration_work = ITERATION_ACCESS_MEMORY;
+ run_iteration(vm, nr_vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
+{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
+ /*
+ * Even though this parallelizes the work across vCPUs, this is still a
+ * very slow operation because page_idle forces the test to mark one pfn
+ * at a time and the clear_young notifier may serialize on the KVM MMU
+ * lock.
+ */
+ pr_debug("Marking VM memory idle (slow)...\n");
+ iteration_work = ITERATION_MARK_IDLE;
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *params = arg;
+ struct kvm_vm *vm;
+ int nr_vcpus = params->nr_vcpus;
+
+ vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
+ params->backing_src, !overlap_memory_access);
+
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (uint64_t)getpagesize());
+
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
+
+ pr_info("\n");
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
+ /* As a control, read and write to the populated memory first. */
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
+
+ /* Repeat on memory that has been marked as idle. */
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to idle memory");
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
+
+ memstress_join_vcpu_threads(nr_vcpus);
+ memstress_destroy_vm(vm);
+}
+
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
+ name);
+ puts("");
+ printf(" -h: Display this help message.");
+ guest_modes_help();
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
+ backing_src_help("-s");
+ puts("");
+ exit(0);
+}
+
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_params params = {
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+ .nr_vcpus = 1,
+ };
+ char *new_cg = NULL;
+ int page_idle_fd;
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ params.vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ params.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ break;
+ case 'o':
+ overlap_memory_access = true;
+ break;
+ case 's':
+ params.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
+
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
new file mode 100644
index 000000000000..cf8fb67104f1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the arch timer IRQ functionality
+ *
+ * The guest's main thread configures the timer interrupt and waits
+ * for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period plus
+ * a user configurable error margin(default to 100us)
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), iterations per stage (-i) and timer
+ * interrupt arrival error margin (-e). To stress-test the timer stack
+ * even more, an option to migrate the vCPUs across pCPUs (-m), at a
+ * particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "timer_test.h"
+#include "ucall_common.h"
+
+struct test_args test_args = {
+ .nr_vcpus = NR_VCPUS_DEF,
+ .nr_iter = NR_TEST_ITERS_DEF,
+ .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+ .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+ .timer_err_margin_us = TIMER_TEST_ERR_MARGIN_US,
+ .reserved = 1,
+};
+
+struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void *test_vcpu_run(void *arg)
+{
+ unsigned int vcpu_idx = (unsigned long)arg;
+ struct ucall uc;
+ struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
+ struct kvm_vm *vm = vcpu->vm;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
+
+ vcpu_run(vcpu);
+
+ /* Currently, any exit from guest is an indication of completion */
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ __set_bit(vcpu_idx, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ sync_global_from_guest(vm, *shared_data);
+ fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n",
+ vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+
+ pr_info("PASS(vCPU-%d).\n", vcpu_idx);
+
+ return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+ uint32_t pcpu;
+ unsigned int nproc_conf;
+ cpu_set_t online_cpuset;
+
+ nproc_conf = get_nprocs_conf();
+ sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+ /* Randomly find an available pCPU to place a vCPU on */
+ do {
+ pcpu = rand() % nproc_conf;
+ } while (!CPU_ISSET(pcpu, &online_cpuset));
+
+ return pcpu;
+}
+
+static int test_migrate_vcpu(unsigned int vcpu_idx)
+{
+ int ret;
+ uint32_t new_pcpu = test_get_pcpu();
+
+ pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
+
+ ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
+
+ /* Allow the error where the vCPU thread is already finished */
+ TEST_ASSERT(ret == 0 || ret == ESRCH,
+ "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
+ vcpu_idx, new_pcpu, ret);
+
+ return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+ unsigned int i, n_done;
+ bool vcpu_done;
+
+ do {
+ usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+ for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ vcpu_done = test_bit(i, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ if (vcpu_done) {
+ n_done++;
+ continue;
+ }
+
+ test_migrate_vcpu(i);
+ }
+ } while (test_args.nr_vcpus != n_done);
+
+ return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+ pthread_t pt_vcpu_migration;
+ unsigned int i;
+ int ret;
+
+ pthread_mutex_init(&vcpu_done_map_lock, NULL);
+ vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+ TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
+
+ for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
+ ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
+ (void *)(unsigned long)i);
+ TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
+ }
+
+ /* Spawn a thread to control the vCPU migrations */
+ if (test_args.migration_freq_ms) {
+ srand(time(NULL));
+
+ ret = pthread_create(&pt_vcpu_migration, NULL,
+ test_vcpu_migration, NULL);
+ TEST_ASSERT(!ret, "Failed to create the migration pthread");
+ }
+
+
+ for (i = 0; i < test_args.nr_vcpus; i++)
+ pthread_join(pt_vcpu_run[i], NULL);
+
+ if (test_args.migration_freq_ms)
+ pthread_join(pt_vcpu_migration, NULL);
+
+ bitmap_free(vcpu_done_map);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n"
+ "\t\t [-m migration_freq_ms] [-o counter_offset]\n"
+ "\t\t [-e timer_err_margin_us]\n", name);
+ pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+ NR_VCPUS_DEF, KVM_MAX_VCPUS);
+ pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+ TIMER_TEST_PERIOD_MS_DEF);
+ pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+ TIMER_TEST_MIGRATION_FREQ_MS);
+ pr_info("\t-o: Counter offset (in counter cycles, default: 0) [aarch64-only]\n");
+ pr_info("\t-e: Interrupt arrival error margin (in us) of the guest timer (default: %u)\n",
+ TIMER_TEST_ERR_MARGIN_US);
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hn:i:p:m:o:e:")) != -1) {
+ switch (opt) {
+ case 'n':
+ test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+ pr_info("Max allowed vCPUs: %u\n",
+ KVM_MAX_VCPUS);
+ goto err;
+ }
+ break;
+ case 'i':
+ test_args.nr_iter = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'p':
+ test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
+ break;
+ case 'm':
+ test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
+ break;
+ case 'e':
+ test_args.timer_err_margin_us = atoi_non_negative("Error Margin", optarg);
+ break;
+ case 'o':
+ test_args.counter_offset = strtol(optarg, NULL, 0);
+ test_args.reserved = 0;
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
+ "At least two physical CPUs needed for vCPU migration");
+
+ vm = test_vm_create();
+ test_run(vm);
+ test_vm_cleanup(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
new file mode 100644
index 000000000000..713005b6f508
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+ GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+ uint64_t reg_id = raz_wi_reg_ids[i];
+ uint64_t val;
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+
+ /*
+ * Expect the ioctl to succeed with no effect on the register
+ * value.
+ */
+ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+ int i, r;
+
+ for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+ uint64_t reg_id = raz_invariant_reg_ids[i];
+ uint64_t val;
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+
+ r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ val = vcpu_get_reg(vcpu, reg_id);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+ uint64_t val, el0;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
+ return el0 == ID_AA64PFR0_EL1_EL0_IMP;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+ test_user_raz_wi(vcpu);
+ test_user_raz_invariant(vcpu);
+ test_guest_raz(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
new file mode 100644
index 000000000000..d592a4515399
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#include "arch_timer.h"
+#include "delay.h"
+#include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+#include "vgic.h"
+
+enum guest_stage {
+ GUEST_STAGE_VTIMER_CVAL = 1,
+ GUEST_STAGE_VTIMER_TVAL,
+ GUEST_STAGE_PTIMER_CVAL,
+ GUEST_STAGE_PTIMER_TVAL,
+ GUEST_STAGE_MAX,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+ switch (shared_data->guest_stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_VTIMER_TVAL:
+ timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_TVAL:
+ timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ default:
+ GUEST_ASSERT(0);
+ }
+}
+
+static void guest_validate_irq(unsigned int intid,
+ struct test_vcpu_shared_data *shared_data)
+{
+ enum guest_stage stage = shared_data->guest_stage;
+ uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+ unsigned long xctl = 0;
+ unsigned int timer_irq = 0;
+ unsigned int accessor;
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ switch (stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ case GUEST_STAGE_VTIMER_TVAL:
+ accessor = VIRTUAL;
+ timer_irq = vtimer_irq;
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ case GUEST_STAGE_PTIMER_TVAL:
+ accessor = PHYSICAL;
+ timer_irq = ptimer_irq;
+ break;
+ default:
+ GUEST_ASSERT(0);
+ return;
+ }
+
+ xctl = timer_get_ctl(accessor);
+ if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+ return;
+
+ timer_set_ctl(accessor, CTL_IMASK);
+ xcnt = timer_get_cntct(accessor);
+ cval = timer_get_cval(accessor);
+
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, timer_irq);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(xcnt >= cval,
+ "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
+ xcnt, cval, xcnt_diff_us);
+ __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ guest_validate_irq(intid, shared_data);
+
+ gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+ enum guest_stage stage)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->guest_stage = stage;
+ shared_data->nr_iter = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ guest_configure_timer_action(shared_data);
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ test_args.timer_err_margin_us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, test_args.nr_vcpus);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+ GUEST_DONE();
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+ /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+ ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ unsigned int i;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ if (!test_args.reserved) {
+ if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+ struct kvm_arm_counter_offset offset = {
+ .counter_offset = test_args.counter_offset,
+ .reserved = 0,
+ };
+ vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+ } else
+ TEST_FAIL("no support for global offset");
+ }
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ test_init_timer_irq(vm);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
new file mode 100644
index 000000000000..993c9e38e729
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -0,0 +1,1059 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+/* Depends on counter width. */
+static uint64_t CVAL_MAX;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static uint64_t DEF_CNT;
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+ atomic_t handled;
+ atomic_t spurious;
+} shared_data;
+
+struct test_args {
+ /* Virtual or physical timer and counter tests. */
+ enum arch_timer timer;
+ /* Delay used for most timer tests. */
+ uint64_t wait_ms;
+ /* Delay used in the test_long_timer_delays test. */
+ uint64_t long_wait_ms;
+ /* Number of iterations. */
+ int iterations;
+ /* Whether to test the physical timer. */
+ bool test_physical;
+ /* Whether to test the virtual timer. */
+ bool test_virtual;
+};
+
+struct test_args test_args = {
+ .wait_ms = WAIT_TEST_MS,
+ .long_wait_ms = LONG_WAIT_TEST_MS,
+ .iterations = NR_TEST_ITERS_DEF,
+ .test_physical = true,
+ .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+ SET_COUNTER_VALUE,
+ USERSPACE_USLEEP,
+ USERSPACE_SCHED_YIELD,
+ USERSPACE_MIGRATE_SELF,
+ NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+ sleep_poll,
+ sleep_sched_poll,
+ sleep_migrate,
+ sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+ wait_for_non_spurious_irq,
+ wait_poll_for_irq,
+ wait_sched_poll_for_irq,
+ wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+ TIMER_CVAL,
+ TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+ int h = atomic_read(&shared_data.handled);
+
+ __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+ GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+ userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+ GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+ GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ enum arch_timer timer;
+ uint64_t cnt, cval;
+ uint32_t ctl;
+ bool timer_condition, istatus;
+
+ if (intid == IAR_SPURIOUS) {
+ atomic_inc(&shared_data.spurious);
+ goto out;
+ }
+
+ if (intid == ptimer_irq)
+ timer = PHYSICAL;
+ else if (intid == vtimer_irq)
+ timer = VIRTUAL;
+ else
+ goto out;
+
+ ctl = timer_get_ctl(timer);
+ cval = timer_get_cval(timer);
+ cnt = timer_get_cntct(timer);
+ timer_condition = cnt >= cval;
+ istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+ GUEST_ASSERT_EQ(timer_condition, istatus);
+
+ /* Disable and mask the timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+
+ atomic_inc(&shared_data.handled);
+
+out:
+ gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_cval(timer, cval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+ enum timer_view tv)
+{
+ switch (tv) {
+ case TIMER_CVAL:
+ set_cval_irq(timer, xval, ctl);
+ break;
+ case TIMER_TVAL:
+ set_tval_irq(timer, xval, ctl);
+ break;
+ default:
+ GUEST_FAIL("Could not get timer %d", timer);
+ }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+ int h;
+
+ local_irq_disable();
+
+ for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+ int h;
+
+ local_irq_disable();
+
+ h = atomic_read(&shared_data.handled);
+
+ local_irq_enable();
+ while (h == atomic_read(&shared_data.handled)) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+ local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+ enum sync_cmd usp_cmd)
+{
+ uint64_t cycles = usec_to_cycles(usec);
+ /* Whichever timer we are testing with, sleep with the other. */
+ enum arch_timer sleep_timer = 1 - test_timer;
+ uint64_t start = timer_get_cntct(sleep_timer);
+
+ while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+ userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+ set_counter(timer, cnt);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+ enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ local_irq_disable();
+
+ if (reset_state)
+ reset_timer_state(timer, reset_cnt);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+ reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+ uint64_t usec, enum timer_view timer_view,
+ sleep_method_t guest_sleep)
+{
+ local_irq_disable();
+
+ set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+ guest_sleep(timer, usec);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume success (no IRQ) after waiting usec microseconds */
+ assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+ uint64_t usec, sleep_method_t wm)
+{
+ test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+ sleep_method_t wm)
+{
+ /* tval will be cast to an int32_t in test_xval_check_no_irq */
+ test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Unmask the timer, and then get an IRQ. */
+ local_irq_disable();
+ timer_set_ctl(timer, CTL_ENABLE);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wait_for_non_spurious_irq();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Local IRQs are not masked at this point. */
+
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+ irq_wait_method_t wm, int num)
+{
+ int i;
+
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_tval_irq(timer, 0, CTL_ENABLE);
+
+ for (i = 1; i <= num; i++) {
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ handler masked and disabled the timer.
+ * Enable and unmmask it again.
+ */
+ timer_set_ctl(timer, CTL_ENABLE);
+
+ assert_irqs_handled(i);
+ }
+
+ local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+ test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+ int32_t delta_1_ms, int32_t delta_2_ms)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Program the timer to DEF_CNT + delta_1_ms. */
+ set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+ /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+ timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+ GUEST_ASSERT(timer_get_cntct(timer) >=
+ DEF_CNT + msec_to_cycles(delta_2_ms));
+
+ local_irq_enable();
+ assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+ int i;
+ uint64_t base_wait = test_args.wait_ms;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /*
+ * Ensure reprogramming works whether going from a
+ * longer time to a shorter or vice versa.
+ */
+ test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+ base_wait);
+ test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+ 2 * base_wait);
+ }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ local_irq_disable();
+
+ /* cval in the past */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) -
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ /* tval in the past */
+ timer_set_tval(timer, -1);
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+ /* tval larger than TVAL_MAX. This requires programming with
+ * timer_set_cval instead so the value is expressible
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+ /*
+ * tval larger than 2 * TVAL_MAX.
+ * Twice the TVAL_MAX completely loops around the TVAL.
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <=
+ msec_to_cycles(test_args.wait_ms));
+
+ /* negative tval that rollovers from 0. */
+ set_counter(timer, msec_to_cycles(1));
+ timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+ /* tval should keep down-counting from 0 to -1. */
+ timer_set_tval(timer, 0);
+ sleep_poll(timer, 1);
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ local_irq_enable();
+
+ /* Mask and disable any pending timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+ timers_sanity_checks(timer, false);
+ /* Check how KVM saves/restores these edge-case values. */
+ timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_cval_irq(timer,
+ (uint64_t) TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+ set_counter(timer, TVAL_MAX);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+ uint64_t cval;
+ int i;
+
+ /*
+ * Test that the system is not implementing cval in terms of
+ * tval. If that was the case, setting a cval to "cval = now
+ * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+ * wait_ms", and the timer would fire immediately. Test that it
+ * doesn't.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ reset_timer_state(timer, DEF_CNT);
+ cval = timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms);
+ test_cval_no_irq(timer, cval,
+ msecs_to_usecs(test_args.wait_ms) +
+ TIMEOUT_NO_IRQ_US, sleep_method[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /* Get the IRQ by moving the counter forward. */
+ test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+ }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t xval, uint64_t cnt_2,
+ irq_wait_method_t wm, enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t xval,
+ uint64_t cnt_2,
+ sleep_method_t guest_sleep,
+ enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+ int32_t tval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t cval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, int32_t tval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+ TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t cval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+ TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+ int i;
+ int32_t tval;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+ test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+ /* Move counter ahead of negative tval. */
+ test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+ test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+ tval = TVAL_MAX;
+ test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+ wm);
+ }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+ sm);
+ test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+ }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+ int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ /* set a timer wait_ms the past. */
+ cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+ /* Set a timer to counter=0 (in the past) */
+ test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a time for tval=0 (now) */
+ test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a timer to as far in the past as possible */
+ test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+ }
+
+ /*
+ * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+ * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ set_counter(timer, msec_to_cycles(test_args.wait_ms));
+ test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+ }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+ test_basic_functionality(timer);
+ test_timers_sanity_checks(timer);
+
+ test_timers_above_tval_max(timer);
+ test_timers_in_the_past(timer);
+
+ test_move_counters_ahead_of_timers(timer);
+ test_move_counters_behind_timers(timer);
+ test_reprogram_timers(timer);
+
+ test_timers_fired_multiple_times(timer);
+
+ test_timer_control_mask_then_unmask(timer);
+ test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+ int i;
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, 1);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ for (i = 0; i < test_args.iterations; i++) {
+ GUEST_SYNC(i);
+ guest_run_iteration(timer);
+ }
+
+ test_long_timer_delays(timer);
+ GUEST_DONE();
+}
+
+static cpu_set_t default_cpuset;
+
+static uint32_t next_pcpu(void)
+{
+ uint32_t max = get_nprocs();
+ uint32_t cur = sched_getcpu();
+ uint32_t next = cur;
+ cpu_set_t cpuset = default_cpuset;
+
+ TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+ do {
+ next = (next + 1) % CPU_SETSIZE;
+ } while (!CPU_ISSET(next, &cpuset));
+
+ return next;
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+ enum arch_timer timer)
+{
+ if (timer == PHYSICAL)
+ vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+ else
+ vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ enum sync_cmd cmd = uc->args[1];
+ uint64_t val = uc->args[2];
+ enum arch_timer timer = uc->args[3];
+
+ switch (cmd) {
+ case SET_COUNTER_VALUE:
+ kvm_set_cntxct(vcpu, val, timer);
+ break;
+ case USERSPACE_USLEEP:
+ usleep(val);
+ break;
+ case USERSPACE_SCHED_YIELD:
+ sched_yield();
+ break;
+ case USERSPACE_MIGRATE_SELF:
+ pin_self_to_cpu(next_pcpu());
+ break;
+ default:
+ break;
+ }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ /* Start on CPU 0 */
+ pin_self_to_cpu(0);
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ break;
+ case UCALL_DONE:
+ goto out;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto out;
+ default:
+ TEST_FAIL("Unexpected guest exit\n");
+ }
+ }
+
+ out:
+ return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ ptimer_irq = vcpu_get_ptimer_irq(vcpu);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpu);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+ enum arch_timer timer)
+{
+ *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+ vm_init_descriptor_tables(*vm);
+ vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handler);
+
+ vcpu_init_descriptor_tables(*vcpu);
+ vcpu_args_set(*vcpu, 1, timer);
+
+ test_init_timer_irq(*vm, *vcpu);
+
+ sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+ , name);
+ pr_info("\t-i: Number of iterations (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+ pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+ LONG_WAIT_TEST_MS);
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
+ WAIT_TEST_MS);
+ pr_info("\t-p: Test physical timer (default: true)\n");
+ pr_info("\t-v: Test virtual timer (default: true)\n");
+ pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+ switch (opt) {
+ case 'b':
+ test_args.test_physical = true;
+ test_args.test_virtual = true;
+ break;
+ case 'i':
+ test_args.iterations =
+ atoi_positive("Number of iterations", optarg);
+ break;
+ case 'l':
+ test_args.long_wait_ms =
+ atoi_positive("Long wait time", optarg);
+ break;
+ case 'p':
+ test_args.test_physical = true;
+ test_args.test_virtual = false;
+ break;
+ case 'v':
+ test_args.test_virtual = true;
+ test_args.test_physical = false;
+ break;
+ case 'w':
+ test_args.wait_ms = atoi_positive("Wait time", optarg);
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+ err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+static void set_counter_defaults(void)
+{
+ const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ uint64_t freq = read_sysreg(CNTFRQ_EL0);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
+ if (test_args.test_virtual) {
+ test_vm_create(&vm, &vcpu, VIRTUAL);
+ test_run(vm, vcpu);
+ test_vm_cleanup(vm);
+ }
+
+ if (test_args.test_physical) {
+ test_vm_create(&vm, &vcpu, PHYSICAL);
+ test_run(vm, vcpu);
+ test_vm_cleanup(vm);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..c8ee6f520734
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+ TEST_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ GUEST_SYNC(TEST_ACCESS_FLAG); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ /*
+ * KVM's software PTW makes the implementation choice that the AT
+ * instruction sets the access flag.
+ */
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, forcing KVM to
+ * use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ case TEST_ACCESS_FLAG:
+ TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
+ "Expected access flag to be set (desc: %lu)", *ptep_hva);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
new file mode 100644
index 000000000000..1d431de8729c
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <linux/bitfield.h>
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+#define DBGBCR_LBN_SHIFT 16
+#define DBGBCR_BT_SHIFT 20
+#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+#define DBGWCR_LBN_SHIFT 16
+#define DBGWCR_WT_SHIFT 20
+#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
+extern unsigned char iter_ss_begin, iter_ss_end;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+#define GEN_DEBUG_WRITE_REG(reg_name) \
+static void write_##reg_name(int num, uint64_t val) \
+{ \
+ switch (num) { \
+ case 0: \
+ write_sysreg(val, reg_name##0_el1); \
+ break; \
+ case 1: \
+ write_sysreg(val, reg_name##1_el1); \
+ break; \
+ case 2: \
+ write_sysreg(val, reg_name##2_el1); \
+ break; \
+ case 3: \
+ write_sysreg(val, reg_name##3_el1); \
+ break; \
+ case 4: \
+ write_sysreg(val, reg_name##4_el1); \
+ break; \
+ case 5: \
+ write_sysreg(val, reg_name##5_el1); \
+ break; \
+ case 6: \
+ write_sysreg(val, reg_name##6_el1); \
+ break; \
+ case 7: \
+ write_sysreg(val, reg_name##7_el1); \
+ break; \
+ case 8: \
+ write_sysreg(val, reg_name##8_el1); \
+ break; \
+ case 9: \
+ write_sysreg(val, reg_name##9_el1); \
+ break; \
+ case 10: \
+ write_sysreg(val, reg_name##10_el1); \
+ break; \
+ case 11: \
+ write_sysreg(val, reg_name##11_el1); \
+ break; \
+ case 12: \
+ write_sysreg(val, reg_name##12_el1); \
+ break; \
+ case 13: \
+ write_sysreg(val, reg_name##13_el1); \
+ break; \
+ case 14: \
+ write_sysreg(val, reg_name##14_el1); \
+ break; \
+ case 15: \
+ write_sysreg(val, reg_name##15_el1); \
+ break; \
+ default: \
+ GUEST_ASSERT(0); \
+ } \
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
+static void reset_debug_state(void)
+{
+ uint8_t brps, wrps, i;
+ uint64_t dfr0;
+
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(0, osdlr_el1);
+ write_sysreg(0, oslar_el1);
+ isb();
+
+ write_sysreg(0, mdscr_el1);
+ write_sysreg(0, contextidr_el1);
+
+ /* Reset all bcr/bvr/wcr/wvr registers */
+ dfr0 = read_sysreg(id_aa64dfr0_el1);
+ brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0);
+ for (i = 0; i <= brps; i++) {
+ write_dbgbcr(i, 0);
+ write_dbgbvr(i, 0);
+ }
+ wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0);
+ for (i = 0; i <= wrps; i++) {
+ write_dbgwcr(i, 0);
+ write_dbgwvr(i, 0);
+ }
+
+ isb();
+}
+
+static void enable_os_lock(void)
+{
+ write_sysreg(1, oslar_el1);
+ isb();
+
+ GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
+static void enable_monitor_debug_exceptions(void)
+{
+ uint64_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static void install_wp(uint8_t wpn, uint64_t addr)
+{
+ uint32_t wcr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_dbgwcr(wpn, wcr);
+ write_dbgwvr(wpn, addr);
+
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
+{
+ uint32_t bcr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_dbgbcr(bpn, bcr);
+ write_dbgbvr(bpn, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t wcr;
+ uint64_t ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+ DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+ write_dbgwcr(addr_wp, wcr);
+ write_dbgwvr(addr_wp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t addr_bcr, ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /*
+ * Setup a normal breakpoint for Linked Address Match, and link it
+ * to the context-aware breakpoint.
+ */
+ addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_ADDR_LINK_CTX |
+ ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+ write_dbgbcr(addr_bp, addr_bcr);
+ write_dbgbvr(addr_bp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_ss(void)
+{
+ uint64_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ uint64_t ctx = 0xabcdef; /* a random context number */
+
+ /* Software-breakpoint */
+ reset_debug_state();
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ /* OS Lock does not block software-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ sw_bp_addr = 0;
+ asm volatile("sw_bp2: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+ /* OS Lock blocking hardware-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ install_hw_bp(bpn, PC(hw_bp2));
+ hw_bp_addr = 0;
+ asm volatile("hw_bp2: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+ /* OS Lock blocking watchpoint */
+ reset_debug_state();
+ enable_os_lock();
+ write_data = '\0';
+ wp_data_addr = 0;
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+ /* OS Lock blocking single-step */
+ reset_debug_state();
+ enable_os_lock();
+ ss_addr[0] = 0;
+ install_ss();
+ ss_idx = 0;
+ asm volatile("mrs x0, esr_el1\n\t"
+ "add x0, x0, #1\n\t"
+ "msr daifset, #8\n\t"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], 0);
+
+ /* Linked hardware-breakpoint */
+ hw_bp_addr = 0;
+ reset_debug_state();
+ install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ asm volatile("hw_bp_ctx: nop");
+ write_sysreg(0, contextidr_el1);
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
+ /* Linked watchpoint */
+ reset_debug_state();
+ install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static void guest_code_ss(int test_cnt)
+{
+ uint64_t i;
+ uint64_t bvr, wvr, w_bvr, w_wvr;
+
+ for (i = 0; i < test_cnt; i++) {
+ /* Bits [1:0] of dbg{b,w}vr are RES0 */
+ w_bvr = i << 2;
+ w_wvr = i << 2;
+
+ /*
+ * Enable Single Step execution. Note! This _must_ be a bare
+ * ucall as the ucall() path uses atomic operations to manage
+ * the ucall structures, and the built-in "atomics" are usually
+ * implemented via exclusive access instructions. The exlusive
+ * monitor is cleared on ERET, and so taking debug exceptions
+ * during a LDREX=>STREX sequence will prevent forward progress
+ * and hang the guest/test.
+ */
+ GUEST_UCALL_NONE();
+
+ /*
+ * The userspace will verify that the pc is as expected during
+ * single step execution between iter_ss_begin and iter_ss_end.
+ */
+ asm volatile("iter_ss_begin:nop\n");
+
+ write_sysreg(w_bvr, dbgbvr0_el1);
+ write_sysreg(w_wvr, dbgwvr0_el1);
+ bvr = read_sysreg(dbgbvr0_el1);
+ wvr = read_sysreg(dbgwvr0_el1);
+
+ /* Userspace disables Single Step when the end is nigh. */
+ asm volatile("iter_ss_end:\n");
+
+ GUEST_ASSERT_EQ(bvr, w_bvr);
+ GUEST_ASSERT_EQ(wvr, w_wvr);
+ }
+ GUEST_DONE();
+}
+
+static int debug_version(uint64_t id_aa64dfr0)
+{
+ return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0);
+}
+
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_BRK64, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SVC64, guest_svc_handler);
+
+ /* Specify bpn/wpn/ctx_bpn to be tested */
+ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+ pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ struct kvm_run *run;
+ uint64_t pc, cmd;
+ uint64_t test_pc = 0;
+ bool ss_enable = false;
+ struct kvm_guest_debug debug = {};
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+ run = vcpu->run;
+ vcpu_args_set(vcpu, 1, test_cnt);
+
+ while (1) {
+ vcpu_run(vcpu);
+ if (run->exit_reason != KVM_EXIT_DEBUG) {
+ cmd = get_ucall(vcpu, &uc);
+ if (cmd == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ } else if (cmd == UCALL_DONE) {
+ break;
+ }
+
+ TEST_ASSERT(cmd == UCALL_NONE,
+ "Unexpected ucall cmd 0x%lx", cmd);
+
+ debug.control = KVM_GUESTDBG_ENABLE |
+ KVM_GUESTDBG_SINGLESTEP;
+ ss_enable = true;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+ /* Check if the current pc is expected. */
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+ TEST_ASSERT(!test_pc || pc == test_pc,
+ "Unexpected pc 0x%lx (expected 0x%lx)",
+ pc, test_pc);
+
+ if ((pc + 4) == (uint64_t)&iter_ss_end) {
+ test_pc = 0;
+ debug.control = KVM_GUESTDBG_ENABLE;
+ ss_enable = false;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ /*
+ * If the current pc is between iter_ss_bgin and
+ * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+ * be the current pc + 4.
+ */
+ if ((pc >= (uint64_t)&iter_ss_begin) &&
+ (pc < (uint64_t)&iter_ss_end))
+ test_pc = pc + 4;
+ else
+ test_pc = 0;
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+ uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+ int b, w, c;
+
+ /* Number of breakpoints */
+ brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1;
+ __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+ /* Number of watchpoints */
+ wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1;
+
+ /* Number of context aware breakpoints */
+ ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1;
+
+ pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+ brp_num, wrp_num, ctx_brp_num);
+
+ /* Number of normal (non-context aware) breakpoints */
+ normal_brp_num = brp_num - ctx_brp_num;
+
+ /* Lowest context aware breakpoint number */
+ ctx_brp_base = normal_brp_num;
+
+ /* Run tests with all supported breakpoints/watchpoints */
+ for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+ for (b = 0; b < normal_brp_num; b++) {
+ for (w = 0; w < wrp_num; w++)
+ test_guest_debug_exceptions(b, w, c);
+ }
+ }
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int opt;
+ int ss_iteration = 10000;
+ uint64_t aa64dfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+ __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
+ "Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+
+ while ((opt = getopt(argc, argv, "i:")) != -1) {
+ switch (opt) {
+ case 'i':
+ ss_iteration = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ test_guest_debug_exceptions_all(aa64dfr0);
+ test_single_step_from_userspace(ss_iteration);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..d8fe17a6cc59
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_sea_s1ptw_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
+
+ GUEST_DONE();
+}
+
+static noinline void test_s1ptw_abort_guest(void)
+{
+ extern char test_s1ptw_abort_insn;
+
+ WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
+
+ asm volatile("test_s1ptw_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("Load on S1PTW abort should not retire");
+}
+
+static void test_s1ptw_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ u64 *ptep, bad_pa;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
+ expect_sea_s1ptw_handler);
+
+ ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
+ bad_pa = BIT(vm->pa_bits) - vm->page_size;
+
+ *ptep &= ~GENMASK(47, 12);
+ *ptep |= bad_pa;
+
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+ test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index 486932164cf2..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -4,64 +4,75 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*
- * When attempting to migrate from a host with an older kernel to a host
- * with a newer kernel we allow the newer kernel on the destination to
- * list new registers with get-reg-list. We assume they'll be unused, at
- * least until the guest reboots, and so they're relatively harmless.
- * However, if the destination host with the newer kernel is missing
- * registers which the source host with the older kernel has, then that's
- * a regression in get-reg-list. This test checks for that regression by
- * checking the current list against a blessed list. We should never have
- * missing registers, but if new ones appear then they can probably be
- * added to the blessed list. A completely new blessed list can be created
- * by running the test with the --list command line argument.
- *
- * Note, the blessed list should be created from the oldest possible
- * kernel. We can't go older than v4.15, though, because that's the first
- * release to expose the ID system registers in KVM_GET_REG_LIST, see
- * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
- * from guests"). Also, one must use the --core-reg-fixup command line
- * option when running on an older kernel that doesn't include df205b5c6328
- * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
*/
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
-
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
-
-#define for_each_reg(i) \
- for ((i) = 0; (i) < reg_list->n; ++(i))
-
-#define for_each_reg_filtered(i) \
- for_each_reg(i) \
- if (!filter_reg(reg_list->reg[i]))
-
-#define for_each_missing_reg(i) \
- for ((i) = 0; (i) < blessed_n; ++(i)) \
- if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
-
-#define for_each_new_reg(i) \
- for_each_reg_filtered(i) \
- if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
+struct feature_id_reg {
+ __u64 reg;
+ __u64 id_reg;
+ __u64 feat_shift;
+ __u64 feat_min;
+};
+#define FEAT(id, f, v) \
+ .id_reg = SYS_REG(id), \
+ .feat_shift = id ## _ ## f ## _SHIFT, \
+ .feat_min = id ## _ ## f ## _ ## v
-static struct kvm_reg_list *reg_list;
+#define REG_FEAT(r, id, f, v) \
+ { \
+ .reg = SYS_REG(r), \
+ FEAT(id, f, v) \
+ }
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+static struct feature_id_reg feat_id_regs[] = {
+ REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP),
+ REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+ REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+};
-static bool filter_reg(__u64 reg)
+bool filter_reg(__u64 reg)
{
/*
* DEMUX register presence depends on the host's CLIDR_EL1.
@@ -73,34 +84,55 @@ static bool filter_reg(__u64 reg)
return false;
}
-static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
{
- int i;
+ int i, ret;
+ __u64 data, feat_val;
- for (i = 0; i < nr_regs; ++i)
- if (reg == regs[i])
- return true;
- return false;
+ for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+ if (feat_id_regs[i].reg == reg) {
+ ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+ if (ret < 0)
+ return false;
+
+ feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+ return feat_val >= feat_id_regs[i].feat_min;
+ }
+ }
+
+ return true;
}
-static const char *str_with_index(const char *template, __u64 index)
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
{
- char *str, *p;
- int n;
+ return check_supported_feat_reg(vcpu, reg);
+}
+
+bool check_reject_set(int err)
+{
+ return err == EPERM;
+}
- str = strdup(template);
- p = strstr(str, "##");
- n = sprintf(p, "%lld", index);
- strcat(p + n, strstr(template, "##") + 2);
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int feature;
- return (const char *)str;
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
+ }
}
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
#define CORE_REGS_XX_NR_WORDS 2
#define CORE_SPSR_XX_NR_WORDS 2
#define CORE_FPREGS_XX_NR_WORDS 4
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(const char *prefix, __u64 id)
{
__u64 core_off = id & ~REG_MASK, idx;
@@ -111,8 +143,8 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
KVM_REG_ARM_CORE_REG(regs.regs[30]):
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
case KVM_REG_ARM_CORE_REG(regs.sp):
return "KVM_REG_ARM_CORE_REG(regs.sp)";
case KVM_REG_ARM_CORE_REG(regs.pc):
@@ -126,24 +158,24 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
}
- TEST_FAIL("Unknown core reg id: 0x%llx", id);
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
return NULL;
}
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(const char *prefix, __u64 id)
{
__u64 sve_off, n, i;
@@ -153,37 +185,37 @@ static const char *sve_id_to_str(__u64 id)
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
- TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
switch (sve_off) {
case KVM_REG_ARM64_SVE_ZREG_BASE ...
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "Unexpected bits set in SVE ZREG id: 0x%llx", id);
- return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
case KVM_REG_ARM64_SVE_PREG_BASE ...
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "Unexpected bits set in SVE PREG id: 0x%llx", id);
- return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
case KVM_REG_ARM64_SVE_FFR_BASE:
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "Unexpected bits set in SVE FFR id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
return "KVM_REG_ARM64_SVE_FFR(0)";
}
return NULL;
}
-static void print_reg(__u64 id)
+void print_reg(const char *prefix, __u64 id)
{
unsigned op0, op1, crn, crm, op2;
const char *reg_size = NULL;
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
switch (id & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U8:
@@ -214,17 +246,17 @@ static void print_reg(__u64 id)
reg_size = "KVM_REG_SIZE_U2048";
break;
default:
- TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
}
switch (id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
break;
case KVM_REG_ARM_DEMUX:
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
break;
@@ -235,253 +267,36 @@ static void print_reg(__u64 id)
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
break;
case KVM_REG_ARM_FW:
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "Unexpected bits set in FW reg id: 0x%llx", id);
+ "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
break;
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+ "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+ break;
case KVM_REG_ARM64_SVE:
- if (reg_list_sve())
- printf("\t%s,\n", sve_id_to_str(id));
- else
- TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+ printf("\t%s,\n", sve_id_to_str(prefix, id));
break;
default:
- TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
}
}
/*
- * Older kernels listed each 32-bit word of CORE registers separately.
- * For 64 and 128-bit registers we need to ignore the extra words. We
- * also need to fixup the sizes, because the older kernels stated all
- * registers were 64-bit, even when they weren't.
- */
-static void core_reg_fixup(void)
-{
- struct kvm_reg_list *tmp;
- __u64 id, core_off;
- int i;
-
- tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
-
- for (i = 0; i < reg_list->n; ++i) {
- id = reg_list->reg[i];
-
- if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
- tmp->reg[tmp->n++] = id;
- continue;
- }
-
- core_off = id & ~REG_MASK;
-
- switch (core_off) {
- case 0x52: case 0xd2: case 0xd6:
- /*
- * These offsets are pointing at padding.
- * We need to ignore them too.
- */
- continue;
- case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
- KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
- if (core_off & 3)
- continue;
- id &= ~KVM_REG_SIZE_MASK;
- id |= KVM_REG_SIZE_U128;
- tmp->reg[tmp->n++] = id;
- continue;
- case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
- case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
- id &= ~KVM_REG_SIZE_MASK;
- id |= KVM_REG_SIZE_U32;
- tmp->reg[tmp->n++] = id;
- continue;
- default:
- if (core_off & 1)
- continue;
- tmp->reg[tmp->n++] = id;
- break;
- }
- }
-
- free(reg_list);
- reg_list = tmp;
-}
-
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
-{
- if (reg_list_sve())
- init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
-}
-
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
-{
- int feature;
-
- if (reg_list_sve()) {
- feature = KVM_ARM_VCPU_SVE;
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
- }
-}
-
-static void check_supported(void)
-{
- if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
- fprintf(stderr, "SVE not available, skipping tests\n");
- exit(KSFT_SKIP);
- }
-}
-
-int main(int ac, char **av)
-{
- struct kvm_vcpu_init init = { .target = -1, };
- int new_regs = 0, missing_regs = 0, i;
- int failed_get = 0, failed_set = 0, failed_reject = 0;
- bool print_list = false, print_filtered = false, fixup_core_regs = false;
- struct kvm_vm *vm;
- __u64 *vec_regs;
-
- check_supported();
-
- for (i = 1; i < ac; ++i) {
- if (strcmp(av[i], "--core-reg-fixup") == 0)
- fixup_core_regs = true;
- else if (strcmp(av[i], "--list") == 0)
- print_list = true;
- else if (strcmp(av[i], "--list-filtered") == 0)
- print_filtered = true;
- else
- TEST_FAIL("Unknown option: %s\n", av[i]);
- }
-
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- prepare_vcpu_init(&init);
- aarch64_vcpu_add_default(vm, 0, &init, NULL);
- finalize_vcpu(vm, 0);
-
- reg_list = vcpu_get_reg_list(vm, 0);
-
- if (fixup_core_regs)
- core_reg_fixup();
-
- if (print_list || print_filtered) {
- putchar('\n');
- for_each_reg(i) {
- __u64 id = reg_list->reg[i];
- if ((print_list && !filter_reg(id)) ||
- (print_filtered && filter_reg(id)))
- print_reg(id);
- }
- putchar('\n');
- return 0;
- }
-
- /*
- * We only test that we can get the register and then write back the
- * same value. Some registers may allow other values to be written
- * back, but others only allow some bits to be changed, and at least
- * for ID registers set will fail if the value does not exactly match
- * what was returned by get. If registers that allow other values to
- * be written need to have the other values tested, then we should
- * create a new set of tests for those in a new independent test
- * executable.
- */
- for_each_reg(i) {
- uint8_t addr[2048 / 8];
- struct kvm_one_reg reg = {
- .id = reg_list->reg[i],
- .addr = (__u64)&addr,
- };
- int ret;
-
- ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
- if (ret) {
- puts("Failed to get ");
- print_reg(reg.id);
- putchar('\n');
- ++failed_get;
- }
-
- /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
- if (find_reg(rejects_set, rejects_set_n, reg.id)) {
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret != -1 || errno != EPERM) {
- printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
- print_reg(reg.id);
- putchar('\n');
- ++failed_reject;
- }
- continue;
- }
-
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret) {
- puts("Failed to set ");
- print_reg(reg.id);
- putchar('\n');
- ++failed_set;
- }
- }
-
- if (reg_list_sve()) {
- blessed_n = base_regs_n + sve_regs_n;
- vec_regs = sve_regs;
- } else {
- blessed_n = base_regs_n + vregs_n;
- vec_regs = vregs;
- }
-
- blessed_reg = calloc(blessed_n, sizeof(__u64));
- for (i = 0; i < base_regs_n; ++i)
- blessed_reg[i] = base_regs[i];
- for (i = 0; i < blessed_n - base_regs_n; ++i)
- blessed_reg[base_regs_n + i] = vec_regs[i];
-
- for_each_new_reg(i)
- ++new_regs;
-
- for_each_missing_reg(i)
- ++missing_regs;
-
- if (new_regs || missing_regs) {
- printf("Number blessed registers: %5lld\n", blessed_n);
- printf("Number registers: %5lld\n", reg_list->n);
- }
-
- if (new_regs) {
- printf("\nThere are %d new registers.\n"
- "Consider adding them to the blessed reg "
- "list with the following lines:\n\n", new_regs);
- for_each_new_reg(i)
- print_reg(reg_list->reg[i]);
- putchar('\n');
- }
-
- if (missing_regs) {
- printf("\nThere are %d missing registers.\n"
- "The following lines are missing registers:\n\n", missing_regs);
- for_each_missing_reg(i)
- print_reg(blessed_reg[i]);
- putchar('\n');
- }
-
- TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
- "There are %d missing registers; "
- "%d registers failed get; %d registers failed set; %d registers failed reject",
- missing_regs, failed_get, failed_set, failed_reject);
-
- return 0;
-}
-
-/*
- * The current blessed list was primed with the output of kernel version
+ * The original blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
*
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -527,12 +342,28 @@ static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
- KVM_REG_ARM_FW_REG(0),
- KVM_REG_ARM_FW_REG(1),
- KVM_REG_ARM_FW_REG(2),
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+ KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */
+ KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+ KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+ KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -604,6 +435,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(2, 0, 0, 15, 5),
ARM64_SYS_REG(2, 0, 0, 15, 6),
ARM64_SYS_REG(2, 0, 0, 15, 7),
+ ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
@@ -632,10 +464,10 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 3, 7),
ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 2),
+ ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 3),
ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 5),
+ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 6),
ARM64_SYS_REG(3, 0, 0, 4, 7),
ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
@@ -648,7 +480,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 5, 7),
ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 2),
+ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 3),
ARM64_SYS_REG(3, 0, 0, 6, 4),
ARM64_SYS_REG(3, 0, 0, 6, 5),
@@ -657,25 +489,28 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3),
- ARM64_SYS_REG(3, 0, 0, 7, 4),
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 5),
ARM64_SYS_REG(3, 0, 0, 7, 6),
ARM64_SYS_REG(3, 0, 0, 7, 7),
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
@@ -683,6 +518,20 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
@@ -692,8 +541,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 8, 0),
ARM64_SYS_REG(3, 3, 14, 8, 1),
ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -757,11 +604,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 14, 15, 5),
ARM64_SYS_REG(3, 3, 14, 15, 6),
ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
- ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
};
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +640,6 @@ static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
};
-static __u64 vregs_n = ARRAY_SIZE(vregs);
static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +694,318 @@ static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_FFR(0),
ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
};
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
-#endif
};
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
+static __u64 el2_regs[] = {
+ SYS_REG(VPIDR_EL2),
+ SYS_REG(VMPIDR_EL2),
+ SYS_REG(SCTLR_EL2),
+ SYS_REG(ACTLR_EL2),
+ SYS_REG(SCTLR2_EL2),
+ SYS_REG(HCR_EL2),
+ SYS_REG(MDCR_EL2),
+ SYS_REG(CPTR_EL2),
+ SYS_REG(HSTR_EL2),
+ SYS_REG(HFGRTR_EL2),
+ SYS_REG(HFGWTR_EL2),
+ SYS_REG(HFGITR_EL2),
+ SYS_REG(HACR_EL2),
+ SYS_REG(ZCR_EL2),
+ SYS_REG(HCRX_EL2),
+ SYS_REG(TTBR0_EL2),
+ SYS_REG(TTBR1_EL2),
+ SYS_REG(TCR_EL2),
+ SYS_REG(TCR2_EL2),
+ SYS_REG(VTTBR_EL2),
+ SYS_REG(VTCR_EL2),
+ SYS_REG(VNCR_EL2),
+ SYS_REG(HDFGRTR2_EL2),
+ SYS_REG(HDFGWTR2_EL2),
+ SYS_REG(HFGRTR2_EL2),
+ SYS_REG(HFGWTR2_EL2),
+ SYS_REG(HDFGRTR_EL2),
+ SYS_REG(HDFGWTR_EL2),
+ SYS_REG(HAFGRTR_EL2),
+ SYS_REG(HFGITR2_EL2),
+ SYS_REG(SPSR_EL2),
+ SYS_REG(ELR_EL2),
+ SYS_REG(AFSR0_EL2),
+ SYS_REG(AFSR1_EL2),
+ SYS_REG(ESR_EL2),
+ SYS_REG(FAR_EL2),
+ SYS_REG(HPFAR_EL2),
+ SYS_REG(MAIR_EL2),
+ SYS_REG(PIRE0_EL2),
+ SYS_REG(PIR_EL2),
+ SYS_REG(POR_EL2),
+ SYS_REG(AMAIR_EL2),
+ SYS_REG(VBAR_EL2),
+ SYS_REG(CONTEXTIDR_EL2),
+ SYS_REG(TPIDR_EL2),
+ SYS_REG(CNTVOFF_EL2),
+ SYS_REG(CNTHCTL_EL2),
+ SYS_REG(CNTHP_CTL_EL2),
+ SYS_REG(CNTHP_CVAL_EL2),
+ SYS_REG(CNTHV_CTL_EL2),
+ SYS_REG(CNTHV_CVAL_EL2),
+ SYS_REG(SP_EL2),
+ SYS_REG(VDISR_EL2),
+ SYS_REG(VSESR_EL2),
+};
+
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+ .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
+#define EL2_SUBLIST \
+ { \
+ .name = "EL2", \
+ .capability = KVM_CAP_ARM_EL2, \
+ .feature = KVM_ARM_VCPU_HAS_EL2, \
+ .regs = el2_regs, \
+ .regs_n = ARRAY_SIZE(el2_regs), \
+ }
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
+
+static struct vcpu_reg_list vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
+
+ &el2_vregs_config,
+ &el2_vregs_pmu_config,
+ &el2_sve_config,
+ &el2_sve_pmu_config,
+ &el2_pauth_config,
+ &el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c
new file mode 100644
index 000000000000..bbe6862c6ab1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hello_el2.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1
+ *
+ * Copyright 2025 Google LLC
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+static void guest_code(void)
+{
+ u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1);
+ u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1);
+ u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4);
+
+ GUEST_ASSERT_EQ(get_current_el(), 2);
+ GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H);
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1),
+ ID_AA64MMFR1_EL1_VH_IMP);
+
+ /*
+ * Traps of the complete ID register space are IMPDEF without FEAT_FGT,
+ * which is really annoying to deal with in KVM describing E2H as RES1.
+ *
+ * If the implementation doesn't honor the trap then expect the register
+ * to return all zeros.
+ */
+ if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP)
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0),
+ ID_AA64MMFR0_EL1_FGT_NI);
+ else
+ GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
new file mode 100644
index 000000000000..bf038a0371f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
+
+struct kvm_fw_reg_info {
+ uint64_t reg; /* Register definition */
+ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ uint64_t reset_val; /* Reset value for the register */
+};
+
+#define FW_REG_INFO(r) \
+ { \
+ .reg = r, \
+ .max_feat_bit = r##_BIT_MAX, \
+ .reset_val = r##_RESET_VAL \
+ }
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+ FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
+};
+
+enum test_stage {
+ TEST_STAGE_REG_IFACE,
+ TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+ TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+ TEST_STAGE_HVC_IFACE_FALSE_INFO,
+ TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+ uint32_t func_id;
+ uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1) \
+ { \
+ .func_id = f, \
+ .arg1 = a1, \
+ }
+
+static const struct test_hvc_info hvc_info[] = {
+ /* KVM_REG_ARM_STD_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+ /* KVM_REG_ARM_STD_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+ /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+ ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+ /* Feature support check against a different family of hypercalls */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+ unsigned int i;
+ struct arm_smccc_res res;
+ unsigned int hvc_info_arr_sz;
+
+ hvc_info_arr_sz =
+ hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+ for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+ memset(&res, 0, sizeof(res));
+ do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+ switch (stage) {
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ while (stage != TEST_STAGE_END) {
+ switch (stage) {
+ case TEST_STAGE_REG_IFACE:
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ guest_test_hvc(hvc_info);
+ break;
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ guest_test_hvc(false_hvc_info);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+
+ GUEST_SYNC(stage);
+ }
+
+ GUEST_DONE();
+}
+
+struct st_time {
+ uint32_t rev;
+ uint32_t attr;
+ uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE (1 << 30)
+
+static void steal_time_init(struct kvm_vcpu *vcpu)
+{
+ uint64_t st_ipa = (ulong)ST_GPA_BASE;
+ unsigned int gpages;
+
+ gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+ vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+ KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+ uint64_t set_val;
+
+ /* First 'read' should be the reset value for the reg */
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == reg_info->reset_val,
+ "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, reg_info->reset_val, val);
+
+ if (reg_info->reset_val)
+ set_val = 0;
+ else
+ set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
+
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
+ TEST_ASSERT(ret == 0,
+ "Failed to %s all the features of reg: 0x%lx; ret: %d",
+ (set_val ? "set" : "clear"), reg_info->reg, errno);
+
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == set_val,
+ "Expected all the features to be %s for reg: 0x%lx",
+ (set_val ? "set" : "cleared"), reg_info->reg);
+
+ /*
+ * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+ * expects it to be cleared.
+ */
+ if (set_val) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+ }
+
+ /*
+ * Test enabling a feature that's not supported.
+ * Avoid this check if all the bits are occupied.
+ */
+ if (reg_info->max_feat_bit < 63) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+ }
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+ /*
+ * Before starting the VM, the test clears all the bits.
+ * Check if that's still the case.
+ */
+ val = vcpu_get_reg(vcpu, reg_info->reg);
+ TEST_ASSERT(val == 0,
+ "Expected all the features to be cleared for reg: 0x%lx",
+ reg_info->reg);
+
+ /*
+ * Since the VM has run at least once, KVM shouldn't allow modification of
+ * the registers and should return EBUSY. Set the registers and check for
+ * the expected errno.
+ */
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+ TEST_ASSERT(ret != 0 && errno == EBUSY,
+ "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+}
+
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ steal_time_init(*vcpu);
+
+ return vm;
+}
+
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
+{
+ int prev_stage = stage;
+
+ pr_debug("Stage: %d\n", prev_stage);
+
+ /* Sync the stage early, the VM might be freed below. */
+ stage++;
+ sync_global_to_guest(*vm, stage);
+
+ switch (prev_stage) {
+ case TEST_STAGE_REG_IFACE:
+ test_fw_regs_after_vm_start(*vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ /* Start a new VM so that all the features are now enabled by default */
+ kvm_vm_free(*vm);
+ *vm = test_vm_create(vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ break;
+ default:
+ TEST_FAIL("Unknown test stage: %d", prev_stage);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = test_vm_create(&vcpu);
+
+ test_fw_regs_before_vm_start(vcpu);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ test_guest_stage(&vm, &vcpu);
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
new file mode 100644
index 000000000000..b5be9133535a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that nobody has tampered with KVM's UID
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+/*
+ * Do NOT redefine these constants, or try to replace them with some
+ * "common" version. They are hardcoded here to detect any potential
+ * breakage happening in the rest of the kernel.
+ *
+ * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74
+ */
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU
+
+static void guest_code(void)
+{
+ struct arm_smccc_res res = {};
+
+ do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
+ res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+ res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 &&
+ res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3,
+ "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3);
+ GUEST_DONE();
+}
+
+int main (int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
new file mode 100644
index 000000000000..152c34776981
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while(0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while(0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while(0)
+
+static void guest_code(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existance, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GICv3 */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t pfr0;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),
+ "GICv3 not supported.");
+ kvm_vm_free(vm);
+
+ test_guest_no_gicv3();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
new file mode 100644
index 000000000000..4ccbd389d133
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA 0xc0000000
+#define TEST_EXEC_GVA (TEST_GVA + 0x8)
+#define TEST_PTE_GVA 0xb0000000
+#define TEST_DATA 0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE (0)
+#define CMD_SKIP_TEST (1ULL << 1)
+#define CMD_HOLE_PT (1ULL << 2)
+#define CMD_HOLE_DATA (1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
+#define CMD_SET_PTE_AF (1ULL << 8)
+
+#define PREPARE_FN_NR 10
+#define CHECK_FN_NR 10
+
+static struct event_cnt {
+ int mmio_exits;
+ int fail_vcpu_runs;
+ int uffd_faults;
+ /* uffd_faults is incremented from multiple threads. */
+ pthread_mutex_t uffd_faults_mutex;
+} events;
+
+struct test_desc {
+ const char *name;
+ uint64_t mem_mark_cmd;
+ /* Skip the test if any prepare function returns false */
+ bool (*guest_prepare[PREPARE_FN_NR])(void);
+ void (*guest_test)(void);
+ void (*guest_test_check[CHECK_FN_NR])(void);
+ uffd_handler_t uffd_pt_handler;
+ uffd_handler_t uffd_data_handler;
+ void (*dabt_handler)(struct ex_regs *regs);
+ void (*iabt_handler)(struct ex_regs *regs);
+ void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+ void (*fail_vcpu_run_handler)(int ret);
+ uint32_t pt_memslot_flags;
+ uint32_t data_memslot_flags;
+ bool skip;
+ struct event_cnt expected_events;
+};
+
+struct test_params {
+ enum vm_mem_backing_src_type src_type;
+ struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+ uint64_t page = vaddr >> 12;
+
+ dsb(ishst);
+ asm volatile("tlbi vaae1is, %0" :: "r" (page));
+ dsb(ish);
+ isb();
+}
+
+static void guest_write64(void)
+{
+ uint64_t val;
+
+ WRITE_ONCE(*guest_test_memory, TEST_DATA);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+ uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+ uint64_t atomic;
+
+ atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);
+ return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+ uint64_t dczid = read_sysreg(dczid_el0);
+ uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid);
+
+ return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+ uint64_t val;
+
+ GUEST_ASSERT(guest_check_lse());
+ asm volatile(".arch_extension lse\n"
+ "casal %0, %1, [%2]\n"
+ :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+ uint64_t val;
+
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+ uint64_t par;
+
+ asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+ isb();
+ par = read_sysreg(par_el1);
+
+ /* Bit 1 indicates whether the AT was successful */
+ GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+ uint16_t val;
+
+ asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+ dsb(ish);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+ uint64_t val;
+ uint64_t addr = TEST_GVA - 8;
+
+ /*
+ * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+ * in a gap between memslots not backing by anything.
+ */
+ asm volatile("ldr %0, [%1, #8]!"
+ : "=r" (val), "+r" (addr));
+ GUEST_ASSERT_EQ(val, 0);
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+ uint64_t val = TEST_DATA;
+ uint64_t addr = TEST_GVA - 8;
+
+ asm volatile("str %0, [%1, #8]!"
+ : "+r" (val), "+r" (addr));
+
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+ val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+ uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+ uint64_t hadbs, tcr;
+
+ /* Skip if HA is not supported. */
+ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);
+ if (hadbs == 0)
+ return false;
+
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
+ write_sysreg(tcr, tcr_el1);
+ isb();
+
+ return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+ *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+ flush_tlb_page(TEST_GVA);
+
+ return true;
+}
+
+static void guest_check_pte_af(void)
+{
+ dsb(ish);
+ GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_check_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_exec(void)
+{
+ int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+ int ret;
+
+ ret = code();
+ GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+ bool (*prepare_fn)(void);
+ int i;
+
+ for (i = 0; i < PREPARE_FN_NR; i++) {
+ prepare_fn = test->guest_prepare[i];
+ if (prepare_fn && !prepare_fn())
+ return false;
+ }
+
+ return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+ void (*check_fn)(void);
+ int i;
+
+ for (i = 0; i < CHECK_FN_NR; i++) {
+ check_fn = test->guest_test_check[i];
+ if (check_fn)
+ check_fn();
+ }
+}
+
+static void guest_code(struct test_desc *test)
+{
+ if (!guest_prepare(test))
+ GUEST_SYNC(CMD_SKIP_TEST);
+
+ GUEST_SYNC(test->mem_mark_cmd);
+
+ if (test->guest_test)
+ test->guest_test();
+
+ guest_test_check(test);
+ GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
+}
+
+static struct uffd_args {
+ char *copy;
+ void *hva;
+ uint64_t paging_size;
+} pt_args, data_args;
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uint64_t addr = msg->arg.pagefault.address;
+ uint64_t flags = msg->arg.pagefault.flags;
+ struct uffdio_copy copy;
+ int ret;
+
+ TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+ "The only expected UFFD mode is MISSING");
+ TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+
+ pr_debug("uffd fault: addr=%p write=%d\n",
+ (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+ copy.src = (uint64_t)args->copy;
+ copy.dst = addr;
+ copy.len = args->paging_size;
+ copy.mode = 0;
+
+ ret = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (ret == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+ addr, errno);
+ return ret;
+ }
+
+ pthread_mutex_lock(&events.uffd_faults_mutex);
+ events.uffd_faults += 1;
+ pthread_mutex_unlock(&events.uffd_faults_mutex);
+ return 0;
+}
+
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &pt_args);
+}
+
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &data_args);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+ struct uffd_args *args)
+{
+ args->hva = (void *)region->region.userspace_addr;
+ args->paging_size = region->region.memory_size;
+
+ args->copy = malloc(args->paging_size);
+ TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+ memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+ struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+ struct test_desc *test = p->test_desc;
+ int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+ *pt_uffd = NULL;
+ if (test->uffd_pt_handler)
+ *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ pt_args.hva,
+ pt_args.paging_size,
+ 1, test->uffd_pt_handler);
+
+ *data_uffd = NULL;
+ if (test->uffd_data_handler)
+ *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ data_args.hva,
+ data_args.paging_size,
+ 1, test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+ struct uffd_desc *data_uffd)
+{
+ if (test->uffd_pt_handler)
+ uffd_stop_demand_paging(pt_uffd);
+ if (test->uffd_data_handler)
+ uffd_stop_demand_paging(data_uffd);
+
+ free(pt_args.copy);
+ free(data_args.copy);
+}
+
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ TEST_FAIL("There was no UFFD fault expected.");
+ return -1;
+}
+
+/* Returns false if the test should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ void *hva = (void *)region->region.userspace_addr;
+ uint64_t paging_size = region->region.memory_size;
+ int ret, fd = region->fd;
+
+ if (fd != -1) {
+ ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, paging_size);
+ TEST_ASSERT(ret == 0, "fallocate failed");
+ } else {
+ ret = madvise(hva, paging_size, MADV_DONTNEED);
+ TEST_ASSERT(ret == 0, "madvise failed");
+ }
+
+ return true;
+}
+
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+ memcpy(hva, run->mmio.data, run->mmio.len);
+ events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ uint64_t data;
+
+ memcpy(&data, run->mmio.data, sizeof(data));
+ pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+ run->mmio.phys_addr, run->mmio.len,
+ run->mmio.is_write, data);
+ TEST_FAIL("There was no MMIO exit expected.");
+}
+
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+ struct userspace_mem_region *region,
+ uint64_t host_pg_nr)
+{
+ unsigned long *bmap;
+ bool first_page_dirty;
+ uint64_t size = region->region.memory_size;
+
+ /* getpage_size() is not always equal to vm->page_size */
+ bmap = bitmap_zalloc(size / getpagesize());
+ kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+ first_page_dirty = test_bit(host_pg_nr, bmap);
+ free(bmap);
+ return first_page_dirty;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+ struct userspace_mem_region *data_region, *pt_region;
+ bool continue_test = true;
+ uint64_t pte_gpa, pte_pg;
+
+ data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
+
+ if (cmd == CMD_SKIP_TEST)
+ continue_test = false;
+
+ if (cmd & CMD_HOLE_PT)
+ continue_test = punch_hole_in_backing_store(vm, pt_region);
+ if (cmd & CMD_HOLE_DATA)
+ continue_test = punch_hole_in_backing_store(vm, data_region);
+ if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+ "Missing write in dirty log");
+ if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Missing s1ptw write in dirty log");
+ if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+ "Unexpected write in dirty log");
+ if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Unexpected s1ptw write in dirty log");
+
+ return continue_test;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+ TEST_FAIL("Unexpected vcpu run failure");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+ TEST_ASSERT(errno == ENOSYS,
+ "The mmio handler should have returned not implemented.");
+ events.fail_vcpu_runs += 1;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+ asm volatile("__exec_test: mov x0, #0x77\n"
+ "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+ uint64_t *code;
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ assert(TEST_EXEC_GVA > TEST_GVA);
+ code = hva + TEST_EXEC_GVA - TEST_GVA;
+ memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_DABT_CUR, no_dabt_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_IABT_CUR, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ uint64_t pte_gpa;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ /* Map TEST_GVA first. This will install a new PTE. */
+ virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+ /* Then map TEST_PTE_GVA to the above PTE. */
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+ CODE_AND_DATA_MEMSLOT,
+ PAGE_TABLE_MEMSLOT,
+ TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+ uint64_t guest_page_size = vm->page_size;
+ uint64_t max_gfn = vm_compute_max_gfn(vm);
+ /* Enough for 2M of code when using 4K guest pages. */
+ uint64_t code_npages = 512;
+ uint64_t pt_size, data_size, data_gpa;
+
+ /*
+ * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+ * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
+ * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+ * twice that just in case.
+ */
+ pt_size = 26 * guest_page_size;
+
+ /* memslot sizes and gpa's must be aligned to the backing page size */
+ pt_size = align_up(pt_size, backing_src_pagesz);
+ data_size = align_up(guest_page_size, backing_src_pagesz);
+ data_gpa = (max_gfn * guest_page_size) - data_size;
+ data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+ CODE_AND_DATA_MEMSLOT, code_npages, 0);
+ vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+ vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+ PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+ p->test_desc->pt_memslot_flags);
+ vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+ data_size / guest_page_size,
+ p->test_desc->data_memslot_flags);
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+
+ ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
+}
+
+static void setup_default_handlers(struct test_desc *test)
+{
+ if (!test->mmio_handler)
+ test->mmio_handler = mmio_no_handler;
+
+ if (!test->fail_vcpu_run_handler)
+ test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+ TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+ TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+ TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+ struct test_desc *test = p->test_desc;
+
+ pr_debug("Test: %s\n", test->name);
+ pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_debug("Testing memory backing src type: %s\n",
+ vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+ memset(&events, 0, sizeof(events));
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int ret;
+
+ run = vcpu->run;
+
+ for (;;) {
+ ret = _vcpu_run(vcpu);
+ if (ret) {
+ test->fail_vcpu_run_handler(ret);
+ goto done;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!handle_cmd(vm, uc.args[1])) {
+ test->skip = true;
+ goto done;
+ }
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ test->mmio_handler(vm, run);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = (struct test_params *)arg;
+ struct test_desc *test = p->test_desc;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct uffd_desc *pt_uffd, *data_uffd;
+
+ print_test_banner(mode, p);
+
+ vm = ____vm_create(VM_SHAPE(mode));
+ setup_memslots(vm, p);
+ kvm_vm_elf_load(vm, program_invocation_name);
+ setup_ucall(vm);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ setup_gva_maps(vm);
+
+ reset_event_counts();
+
+ /*
+ * Set some code in the data memslot for the guest to execute (only
+ * applicable to the EXEC tests). This has to be done before
+ * setup_uffd() as that function copies the memslot data for the uffd
+ * handler.
+ */
+ load_exec_code_for_test(vm);
+ setup_uffd(vm, p, &pt_uffd, &data_uffd);
+ setup_abort_handlers(vm, vcpu, test);
+ setup_default_handlers(test);
+ vcpu_args_set(vcpu, 1, test);
+
+ vcpu_run_loop(vm, vcpu, test);
+
+ kvm_vm_free(vm);
+ free_uffd(test, pt_uffd, data_uffd);
+
+ /*
+ * Make sure we check the events after the uffd threads have exited,
+ * which means they updated their respective event counters.
+ */
+ if (!test->skip)
+ check_event_counts(test);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-s mem-type]\n", name);
+ puts("");
+ guest_modes_help();
+ backing_src_help("-s");
+ puts("");
+}
+
+#define SNAME(s) #s
+#define SCAT2(a, b) SNAME(a ## _ ## b)
+#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
+
+#define _CHECK(_test) _CHECK_##_test
+#define _PREPARE(_test) _PREPARE_##_test
+#define _PREPARE_guest_read64 NULL
+#define _PREPARE_guest_ld_preidx NULL
+#define _PREPARE_guest_write64 NULL
+#define _PREPARE_guest_st_preidx NULL
+#define _PREPARE_guest_exec NULL
+#define _PREPARE_guest_at NULL
+#define _PREPARE_guest_dc_zva guest_check_dc_zva
+#define _PREPARE_guest_cas guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af NULL
+#define _CHECK_with_af guest_check_pte_af
+#define _CHECK_no_af NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd) \
+{ \
+ .name = SCAT3(_access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd, \
+ _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
+{ \
+ .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = _uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
+ _uffd_faults, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
+ _test_check) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits}, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
+{ \
+ .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
+ _uffd_data_handler, _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_uffd, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
+ _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+static struct test_desc tests[] = {
+
+ /* Check that HW is setting the Access Flag (AF) (sanity checks). */
+ TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+ TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+ TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+ /*
+ * Punch a hole in the data backing store, and then try multiple
+ * accesses: reads should rturn zeroes, and writes should
+ * re-populate the page. Moreover, the test also check that no
+ * exception was generated in the guest. Note that this
+ * reading/writing behavior is the same as reading/writing a
+ * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+ * userspace.
+ */
+ TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+ /*
+ * Punch holes in the data and PT backing stores and mark them for
+ * userfaultfd handling. This should result in 2 faults: the access
+ * on the data backing store, and its respective S1 page table walk
+ * (S1PTW).
+ */
+ TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ /*
+ * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+ * The S1PTW fault should still be marked as a write.
+ */
+ TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_no_handler, uffd_pt_handler, 1),
+ TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+
+ /*
+ * Try accesses when the data and PT memory regions are both
+ * tracked for dirty logging.
+ */
+ TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+
+ /*
+ * Access when the data and PT memory regions are both marked for
+ * dirty logging and UFFD at the same time. The expected result is
+ * that writes should mark the dirty log and trigger a userfaultfd
+ * write fault. Reads/execs should result in a read userfaultfd
+ * fault, and nothing in the dirty log. Any S1PTW should result in
+ * a write in the dirty log and a userfaultfd write.
+ */
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+ uffd_data_handler,
+ 2, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ /*
+ * Access when both the PT and data regions are marked read-only
+ * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+ * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+ * failed vcpu run, and reads/execs with and without syndroms do
+ * not fault.
+ */
+ TEST_RO_MEMSLOT(guest_read64, 0, 0),
+ TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+ TEST_RO_MEMSLOT(guest_at, 0, 0),
+ TEST_RO_MEMSLOT(guest_exec, 0, 0),
+ TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
+ /*
+ * The PT and data regions are both read-only and marked
+ * for dirty logging at the same time. The expected result is that
+ * for writes there should be no write in the dirty log. The
+ * readonly handling is the same as if the memslot was not marked
+ * for dirty logging: writes with a syndrome result in an MMIO
+ * exit, and writes with no syndrome result in a failed vcpu run.
+ */
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+ 1, guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+ guest_check_no_write_in_dirty_log),
+
+ /*
+ * The PT and data regions are both read-only and punched with
+ * holes tracked with userfaultfd. The expected result is the
+ * union of both userfaultfd and read-only behaviors. For example,
+ * write accesses result in a userfaultfd write fault and an MMIO
+ * exit. Writes with no syndrome result in a failed vcpu run and
+ * no userfaultfd write fault. Reads result in userfaultfd getting
+ * triggered.
+ */
+ TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+ uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
+
+ { 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+ struct test_desc *t;
+
+ for (t = &tests[0]; t->name; t++) {
+ if (t->skip)
+ continue;
+
+ struct test_params p = {
+ .src_type = src_type,
+ .test_desc = t,
+ };
+
+ for_each_guest_mode(run_test, &p);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type;
+ int opt;
+
+ src_type = DEFAULT_VM_MEM_SRC;
+
+ while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(0);
+ }
+ }
+
+ for_each_test_and_guest_mode(src_type);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
new file mode 100644
index 000000000000..98e49f710aef
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_test - Tests relating to KVM's PSCI implementation.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This test includes:
+ * - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ * and userspace reading the targeted vCPU's registers.
+ * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ * KVM_SYSTEM_EVENT_SUSPEND UAPI.
+ */
+
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <asm/cputype.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+ uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+ uint64_t lowest_affinity_level)
+{
+ struct arm_smccc_res res;
+
+ do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
+{
+ struct arm_smccc_res res;
+
+ do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+ struct arm_smccc_res res;
+
+ do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mp_state mp_state = {
+ .mp_state = KVM_MP_STATE_STOPPED,
+ };
+
+ vcpu_mp_state_set(vcpu, &mp_state);
+}
+
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+ struct kvm_vcpu **target)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(2);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+
+ kvm_arch_vm_finalize_vcpus(vm);
+ return vm;
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
+}
+
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+ uint64_t obs_pc, obs_x0;
+
+ obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+ obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
+
+ TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+ "unexpected target cpu pc: %lx (expected: %lx)",
+ obs_pc, CPU_ON_ENTRY_ADDR);
+ TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+ "unexpected target context id: %lx (expected: %lx)",
+ obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+ uint64_t target_state;
+
+ GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+ do {
+ target_state = psci_affinity_info(target_cpu, 0);
+
+ GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+ (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+ } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+ GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+ struct kvm_vcpu *source, *target;
+ uint64_t target_mpidr;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = setup_vm(guest_test_cpu_on, &source, &target);
+
+ /*
+ * make sure the target is already off when executing the test.
+ */
+ vcpu_power_off(target);
+
+ target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+ vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+ enter_guest(source);
+
+ if (get_ucall(source, &uc) != UCALL_DONE)
+ TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+
+ assert_vcpu_reset(target);
+ kvm_vm_free(vm);
+}
+
+static void guest_test_system_suspend(void)
+{
+ uint64_t ret;
+
+ /* assert that SYSTEM_SUSPEND is discoverable */
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+ ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+ GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+ struct kvm_vcpu *source, *target;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ vm = setup_vm(guest_test_system_suspend, &source, &target);
+ vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
+
+ vcpu_power_off(target);
+ run = source->run;
+
+ enter_guest(source);
+
+ TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
+ TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+ "Unhandled system event: %u (expected: %u)",
+ run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_system_off2(void)
+{
+ uint64_t ret;
+
+ /* assert that SYSTEM_OFF2 is discoverable */
+ GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
+ PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+ GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
+ PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+
+ /* With non-zero 'cookie' field, it should fail */
+ ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
+ GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+ /*
+ * This would normally never return, so KVM sets the return value
+ * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
+ * that it can test both values for HIBERNATE_OFF.
+ */
+ ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
+ GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+ /*
+ * Revision F.b of the PSCI v1.3 specification documents zero as an
+ * alias for HIBERNATE_OFF, since that's the value used in earlier
+ * revisions of the spec and some implementations in the field.
+ */
+ ret = psci_system_off2(0, 1);
+ GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+ ret = psci_system_off2(0, 0);
+ GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+ GUEST_DONE();
+}
+
+static void host_test_system_off2(void)
+{
+ struct kvm_vcpu *source, *target;
+ struct kvm_mp_state mps;
+ uint64_t psci_version = 0;
+ int nr_shutdowns = 0;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ setup_vm(guest_test_system_off2, &source, &target);
+
+ psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
+
+ TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
+ "Unexpected PSCI version %lu.%lu",
+ PSCI_VERSION_MAJOR(psci_version),
+ PSCI_VERSION_MINOR(psci_version));
+
+ vcpu_power_off(target);
+ run = source->run;
+
+ enter_guest(source);
+ while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
+ TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
+ "Unhandled system event: %u (expected: %u)",
+ run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
+ TEST_ASSERT(run->system_event.ndata >= 1,
+ "Unexpected amount of system event data: %u (expected, >= 1)",
+ run->system_event.ndata);
+ TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
+ "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
+ run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+
+ nr_shutdowns++;
+
+ /* Restart the vCPU */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ vcpu_mp_state_set(source, &mps);
+
+ enter_guest(source);
+ }
+
+ TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
+ TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
+
+ host_test_cpu_on();
+ host_test_system_suspend();
+ host_test_system_off2();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..573dd790aeb8
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static vm_paddr_t einj_gpa;
+static void *einj_hva;
+static uint64_t einj_hpa;
+static bool far_invalid;
+
+static uint64_t translate_to_host_paddr(unsigned long vaddr)
+{
+ uint64_t pinfo;
+ int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
+ int fd;
+ uint64_t page_addr;
+ uint64_t paddr;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
+ paddr = page_addr + (vaddr & (getpagesize() - 1));
+ return paddr;
+}
+
+static void write_einj_entry(const char *einj_path, uint64_t val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(uint64_t paddr)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, paddr);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ uint64_t guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(uint64_t *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ uint64_t num_guest_pages;
+ vm_paddr_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*guest_paddr=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ uint64_t guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(uint64_t *)einj_hva = 0xBAADCAFE;
+ guest_data = *(uint64_t *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
new file mode 100644
index 000000000000..c4815d365816
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE, /* Bigger value is safe */
+ FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
+ FTR_END, /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+ char *name;
+ bool sign;
+ enum ftr_type type;
+ uint8_t shift;
+ uint64_t mask;
+ /*
+ * For FTR_EXACT, safe_val is used as the exact safe value.
+ * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+ */
+ int64_t safe_val;
+};
+
+struct test_feature_reg {
+ uint32_t reg;
+ const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+ { \
+ .name = #NAME, \
+ .sign = SIGNED, \
+ .type = TYPE, \
+ .shift = SHIFT, \
+ .mask = MASK, \
+ .safe_val = SAFE_VAL, \
+ }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END \
+ { \
+ .type = FTR_END, \
+ }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0),
+ REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+ REG_FTR_END,
+};
+
+#define TEST_REG(id, table) \
+ { \
+ .reg = id, \
+ .ftr_bits = &((table)[0]), \
+ }
+
+static struct test_feature_reg test_regs[] = {
+ TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+ TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+ TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+ TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+ TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1),
+ TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+ TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
+ TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+ GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
+ GUEST_REG_SYNC(SYS_CTR_EL0);
+ GUEST_REG_SYNC(SYS_MIDR_EL1);
+ GUEST_REG_SYNC(SYS_REVIDR_EL1);
+ GUEST_REG_SYNC(SYS_AIDR_EL1);
+
+ GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == ftr_max)
+ ftr = 0;
+ else if (ftr != 0)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max - 1)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr != 0 && ftr != ftr_max - 1)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max - 1;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ftr = 0;
+ }
+
+ return ftr;
+}
+
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, new_val, ftr;
+
+ val = vcpu_get_reg(vcpu, reg);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_safe_value(ftr_bits, ftr);
+
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ vcpu_set_reg(vcpu, reg, val);
+ new_val = vcpu_get_reg(vcpu, reg);
+ TEST_ASSERT_EQ(new_val, val);
+
+ return new_val;
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, old_val, ftr;
+ int r;
+
+ val = vcpu_get_reg(vcpu, reg);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_invalid_value(ftr_bits, ftr);
+
+ old_val = val;
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ r = __vcpu_set_reg(vcpu, reg, val);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ val = vcpu_get_reg(vcpu, reg);
+ TEST_ASSERT_EQ(val, old_val);
+}
+
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding) \
+ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
+ sys_reg_CRn(encoding), sys_reg_CRm(encoding), \
+ sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ int ret;
+
+ /* KVM should return error when reserved field is not zero */
+ range.reserved[0] = 1;
+ ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+ TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+ uint32_t reg_id = test_regs[i].reg;
+ uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+ int idx;
+
+ /* Get the index to masks array for the idreg */
+ idx = encoding_to_range_idx(reg_id);
+
+ for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
+ /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+ if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+ ksft_test_result_skip("%s on AARCH64 only system\n",
+ ftr_bits[j].name);
+ continue;
+ }
+
+ /* Make sure the feature field is writable */
+ TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+ test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+
+ test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+ &ftr_bits[j]);
+
+ ksft_test_result_pass("%s\n", ftr_bits[j].name);
+ }
+ }
+}
+
+#define MPAM_IDREG_TEST 6
+static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ int idx, err;
+
+ /*
+ * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
+ * check that if it can be set to 1, (i.e. it is supported by the
+ * hardware), that it can't be set to other values.
+ */
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ /* Writeable? Nothing to test! */
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
+ if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
+ ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
+ return;
+ }
+
+ /* Get the id register value */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ /* Try to set MPAM=0. This should always be possible. */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
+
+ /* Try to set MPAM=1 */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
+
+ /* Try to set MPAM=2 */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+ if (err)
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
+ else
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
+
+ /* And again for ID_AA64PFR1_EL1.MPAM_frac */
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /* Get the id register value */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+ /* Try to set MPAM_frac=0. This should always be possible. */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
+
+ /* Try to set MPAM_frac=1 */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
+
+ /* Try to set MPAM_frac=2 */
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
+ else
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
+}
+
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ uint64_t mte;
+ uint64_t mte_frac;
+ int idx, err;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
+ if (!mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ /* Make sure the written values are seen by guest */
+ TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+ uc.args[3]);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+ uint64_t clidr;
+ int level;
+
+ clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
+
+ /* find the first empty level in the cache hierarchy */
+ for (level = 1; level <= 7; level++) {
+ if (!CLIDR_CTYPE(clidr, level))
+ break;
+ }
+
+ /*
+ * If you have a mind-boggling 7 levels of cache, congratulations, you
+ * get to fix this.
+ */
+ TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+ /* stick in a unified cache level */
+ clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+ test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_ctr(struct kvm_vcpu *vcpu)
+{
+ u64 ctr;
+
+ ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
+ ctr &= ~CTR_EL0_DIC_MASK;
+ if (ctr & CTR_EL0_IminLine_MASK)
+ ctr--;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
+ test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
+}
+
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
+{
+ u64 val;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+ test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_clidr(vcpu);
+ test_ctr(vcpu);
+
+ test_id_reg(vcpu, SYS_MPIDR_EL1);
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_id_reg(vcpu, SYS_MIDR_EL1);
+ test_id_reg(vcpu, SYS_REVIDR_EL1);
+ test_id_reg(vcpu, SYS_AIDR_EL1);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+ size_t idx = encoding_to_range_idx(encoding);
+ uint64_t observed;
+
+ observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
+ TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+ * architectural reset of the vCPU.
+ */
+ aarch64_vcpu_setup(vcpu, NULL);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+ test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+ test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+ test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool aarch64_only;
+ uint64_t val, el0;
+ int test_cnt, i, j;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
+
+ test_wants_mte();
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ /* Check for AARCH64 only system */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
+
+ ksft_print_header();
+
+ test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ for (i = 0; i < ARRAY_SIZE(test_regs); i++)
+ for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++)
+ test_cnt++;
+
+ ksft_set_plan(test_cnt);
+
+ test_vm_ftr_id_regs(vcpu, aarch64_only);
+ test_vcpu_ftr_id_regs(vcpu);
+ test_vcpu_non_ftr_id_regs(vcpu);
+ test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
+
+ test_guest_reg_read(vcpu);
+
+ test_reset_preserves_id_regs(vcpu);
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
new file mode 100644
index 000000000000..1763b9d45400
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ * is prevented from filtering the architecture range of SMCCC calls.
+ * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+ HVC_INSN,
+ SMC_INSN,
+};
+
+static bool test_runs_at_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ struct kvm_vcpu_init init;
+
+ kvm_get_default_vcpu_target(vm, &init);
+ kvm_vm_free(vm);
+
+ return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
+#define for_each_conduit(conduit) \
+ for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \
+ conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+ struct arm_smccc_res res;
+
+ if (conduit == SMC_INSN)
+ smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+ else
+ smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ struct kvm_smccc_filter filter = {
+ .base = start,
+ .nr_functions = nr_functions,
+ .action = action,
+ };
+
+ return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+ TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+ kvm_get_default_vcpu_target(vm, &init);
+
+ /*
+ * Enable in-kernel emulation of PSCI to ensure that calls are denied
+ * due to the SMCCC filter, not because of KVM.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ kvm_arch_vm_finalize_vcpus(vm);
+ return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ struct kvm_smccc_filter filter = {
+ .base = PSCI_0_2_FN_PSCI_VERSION,
+ .nr_functions = 1,
+ .action = KVM_SMCCC_FILTER_DENY,
+ .pad = { -1 },
+ };
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ uint32_t smc64_fn;
+ int r;
+
+ r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+ 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+ 0, 0);
+
+ r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to filter 0 functions should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to overflow filter range should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to use reserved filter action should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter already configured range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+
+ TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+ "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_denied(vcpu);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+ enum smccc_conduit conduit)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+ "Unexpected exit reason: %u", run->exit_reason);
+ TEST_ASSERT(run->hypercall.nr == func_id,
+ "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+ if (conduit == SMC_INSN)
+ TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+ "KVM_HYPERCALL_EXIT_SMC is not set");
+ else
+ TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+ "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+ kvm_vm_free(vm);
+ return !r;
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_supports_smccc_filter());
+
+ test_pad_must_be_zero();
+ test_invalid_nr_functions();
+ test_overflow_nr_functions();
+ test_reserved_action();
+ test_filter_reserved_range();
+ test_filter_overlap();
+ test_filter_denied();
+ test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/arm64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
new file mode 100644
index 000000000000..80b74c6f152b
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ vcpu1 = __vm_vcpu_add(vm, 1);
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ vcpu1 = __vm_vcpu_add(vm, 1);
+
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+ struct kvm_vcpu_init init0, init1;
+ struct kvm_vm *vm;
+ int ret;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
+
+ /* Get the preferred target type and copy that to init1 for later use */
+ vm = vm_create_barebones();
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
+ kvm_vm_free(vm);
+ init1 = init0;
+
+ /* Test with 64bit vCPUs */
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with 32bit vCPUs */
+ init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with mixed-width vCPUs */
+ init0.features[0] = 0;
+ init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
new file mode 100644
index 000000000000..8d6d3a4ae4db
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -0,0 +1,1021 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic init sequence tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include <arm64/gic_v3.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+#include "gic_v3.h"
+
+#define NR_VCPUS 4
+
+#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
+
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+ int want, const char *msg)
+{
+ uint32_t ignored_val;
+ int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &ignored_val);
+
+ TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+ const char *msg)
+{
+ uint32_t val;
+
+ kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &val);
+ TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
+}
+
+/* dummy guest code */
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+ GUEST_DONE();
+}
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+ uint32_t nr_vcpus,
+ struct kvm_vcpu *vcpus[])
+{
+ struct vm_gic v;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ return v;
+}
+
+static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
+{
+ struct vm_gic v;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_barebones();
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ return v;
+}
+
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+struct vgic_region_attr {
+ uint64_t attr;
+ uint64_t size;
+ uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+ .size = 0x10000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+ .size = NR_VCPUS * 0x20000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+ .size = 0x1000,
+ .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+ .size = 0x2000,
+ .alignment = 0x1000,
+};
+
+/**
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
+ */
+static void subtest_dist_rdist(struct vm_gic *v)
+{
+ int ret;
+ uint64_t addr;
+ struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+ struct vgic_region_attr dist;
+
+ rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+ : gic_v2_cpu_region;
+ dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+ : gic_v2_dist_region;
+
+ /* Check existing group/attributes */
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
+
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
+
+ /* check non existing attribute */
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+ TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
+
+ /* misaligned DIST and REDIST address settings */
+ addr = dist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+
+ addr = rdist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
+
+ /* out of range address */
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+ /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+ addr = max_phys_size - dist.alignment;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "half of the redist is beyond IPA limit");
+
+ /* set REDIST base address @0x0*/
+ addr = 0x00000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+
+ /* Attempt to create a second legacy redistributor region */
+ addr = 0xE0000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ if (!ret) {
+ /* Attempt to mix legacy and new redistributor regions */
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to mix GICv3 REDIST and REDIST_REGION");
+ }
+
+ /*
+ * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
+ * on first vcpu run instead.
+ */
+ addr = rdist.size - rdist.alignment;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+}
+
+/* Test the new REDIST region API */
+static void subtest_v3_redist_regions(struct vm_gic *v)
+{
+ uint64_t addr, expected_addr;
+ int ret;
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ TEST_ASSERT(!ret, "Multiple redist regions advertised");
+
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to register the first rdist region with index != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "register an rdist region overlapping with another one");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with base address beyond IPA range");
+
+ /* The last redist is above the pa range. */
+ addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with top address beyond IPA range");
+
+ addr = 0x260000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
+
+ /*
+ * Now there are 2 redist regions:
+ * region 0 @ 0x200000 2 redists
+ * region 1 @ 0x240000 1 redist
+ * Attempt to read their characteristics
+ */
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
+ expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
+ expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
+
+ addr = 0x260000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
+}
+
+/*
+ * VGIC KVM device is created and initialized before the secondary CPUs
+ * get created
+ */
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+/* All the VCPUs are created before the VGIC KVM device gets initialized */
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+#define KVM_VGIC_V2_ATTR(offset, cpu) \
+ (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
+ FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
+
+#define GIC_CPU_CTRL 0x00
+
+static void test_v2_uaccess_cpuif_no_vcpus(void)
+{
+ struct vm_gic v;
+ u64 val = 0;
+ int ret;
+
+ v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
+ subtest_dist_rdist(&v);
+
+ ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+ ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "accessed non-existent CPU interface, want errno: %i",
+ EINVAL);
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_new_redist_regions(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ void *dummy = NULL;
+ struct vm_gic v;
+ uint64_t addr;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
+ vm_gic_destroy(&v);
+
+ /* step2 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
+
+ vm_gic_destroy(&v);
+
+ /* step 3 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
+ TEST_ASSERT(ret && errno == EFAULT,
+ "register a third region allowing to cover the 4 vcpus");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(!ret, "vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_typer_accesses(void)
+{
+ struct vm_gic v;
+ uint64_t addr;
+ int ret, i;
+
+ v.vm = vm_create(NR_VCPUS);
+ (void)vm_vcpu_add(v.vm, 0, guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ (void)vm_vcpu_add(v.vm, 3, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+ "attempting to read GICR_TYPER of non created vcpu");
+
+ (void)vm_vcpu_add(v.vm, 1, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+ "read GICR_TYPER before GIC initialized");
+
+ (void)vm_vcpu_add(v.vm, 2, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ for (i = 0; i < NR_VCPUS ; i++) {
+ v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+ "read GICR_TYPER before rdist region setting");
+ }
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ /* The 2 first rdists should be put there (vcpu 0 and 3) */
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+ "no redist region attached to vcpu #1 yet, last cannot be returned");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+ "no redist region attached to vcpu #2, last cannot be returned");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+ "read typer of rdist #1, last properly returned");
+
+ vm_gic_destroy(&v);
+}
+
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+ uint32_t vcpuids[])
+{
+ struct vm_gic v;
+ int i;
+
+ v.vm = vm_create(nr_vcpus);
+ for (i = 0; i < nr_vcpus; i++)
+ vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ return v;
+}
+
+/**
+ * Test GICR_TYPER last bit with new redist regions
+ * rdist regions #1 and #2 are contiguous
+ * rdist region #0 @0x100000 2 rdist capacity
+ * rdists: 0, 3 (Last)
+ * rdist region #1 @0x240000 2 rdist capacity
+ * rdists: 5, 4 (Last)
+ * rdist region #2 @0x200000 2 rdist capacity
+ * rdists: 1, 2
+ */
+static void test_v3_last_bit_redist_regions(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+ v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
+
+ vm_gic_destroy(&v);
+}
+
+/* Test last bit with legacy region */
+static void test_v3_last_bit_single_rdist(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = 0x10000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
+
+ vm_gic_destroy(&v);
+}
+
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+ uint64_t addr;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
+
+ /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+ addr = max_phys_size - (3 * 2 * 0x10000);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ addr = 0x00000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Attempt to run a vcpu without enough redist space. */
+ ret = run_vcpu(vcpus[2]);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "redist base+size above PA range detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint64_t addr;
+ int its_fd, ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+
+ addr = 0x401000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "ITS region with misaligned address");
+
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register ITS region with base address beyond IPA range");
+
+ addr = max_phys_size - 0x10000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "Half of ITS region is beyond IPA range");
+
+ /* This one succeeds setting the ITS base */
+ addr = 0x400000;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+
+ addr = 0x300000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+ close(its_fd);
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_nassgicap(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ bool has_nassgicap;
+ struct vm_gic vm;
+ u32 typer2;
+ int ret;
+
+ vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+ typer2 |= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ if (has_nassgicap)
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+ else
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Enabled nASSGIcap even though it's unavailable");
+
+ typer2 &= ~GICD_TYPER2_nASSGIcap;
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ typer2 ^= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ TEST_ASSERT(ret && errno == EBUSY,
+ "Changed nASSGIcap after initializing the VGIC");
+
+ vm_gic_destroy(&vm);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint32_t other;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ /* try to create the other gic_dev_type */
+ other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+ : KVM_DEV_TYPE_ARM_VGIC_V2;
+
+ if (!__kvm_test_create_device(v.vm, other)) {
+ ret = __kvm_create_device(v.vm, other);
+ TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
+ "create GIC device while other version exists");
+ }
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+struct sr_def {
+ const char *name;
+ u32 encoding;
+};
+
+#define PACK_SR(r) \
+ ((sys_reg_Op0(r) << 14) | \
+ (sys_reg_Op1(r) << 11) | \
+ (sys_reg_CRn(r) << 7) | \
+ (sys_reg_CRm(r) << 3) | \
+ (sys_reg_Op2(r)))
+
+#define SR(r) \
+ { \
+ .name = #r, \
+ .encoding = r, \
+ }
+
+static const struct sr_def sysregs_el1[] = {
+ SR(SYS_ICC_PMR_EL1),
+ SR(SYS_ICC_BPR0_EL1),
+ SR(SYS_ICC_AP0R0_EL1),
+ SR(SYS_ICC_AP0R1_EL1),
+ SR(SYS_ICC_AP0R2_EL1),
+ SR(SYS_ICC_AP0R3_EL1),
+ SR(SYS_ICC_AP1R0_EL1),
+ SR(SYS_ICC_AP1R1_EL1),
+ SR(SYS_ICC_AP1R2_EL1),
+ SR(SYS_ICC_AP1R3_EL1),
+ SR(SYS_ICC_BPR1_EL1),
+ SR(SYS_ICC_CTLR_EL1),
+ SR(SYS_ICC_SRE_EL1),
+ SR(SYS_ICC_IGRPEN0_EL1),
+ SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+ SR(SYS_ICH_AP0R0_EL2),
+ SR(SYS_ICH_AP0R1_EL2),
+ SR(SYS_ICH_AP0R2_EL2),
+ SR(SYS_ICH_AP0R3_EL2),
+ SR(SYS_ICH_AP1R0_EL2),
+ SR(SYS_ICH_AP1R1_EL2),
+ SR(SYS_ICH_AP1R2_EL2),
+ SR(SYS_ICH_AP1R3_EL2),
+ SR(SYS_ICH_HCR_EL2),
+ SR(SYS_ICC_SRE_EL2),
+ SR(SYS_ICH_VTR_EL2),
+ SR(SYS_ICH_VMCR_EL2),
+ SR(SYS_ICH_LR0_EL2),
+ SR(SYS_ICH_LR1_EL2),
+ SR(SYS_ICH_LR2_EL2),
+ SR(SYS_ICH_LR3_EL2),
+ SR(SYS_ICH_LR4_EL2),
+ SR(SYS_ICH_LR5_EL2),
+ SR(SYS_ICH_LR6_EL2),
+ SR(SYS_ICH_LR7_EL2),
+ SR(SYS_ICH_LR8_EL2),
+ SR(SYS_ICH_LR9_EL2),
+ SR(SYS_ICH_LR10_EL2),
+ SR(SYS_ICH_LR11_EL2),
+ SR(SYS_ICH_LR12_EL2),
+ SR(SYS_ICH_LR13_EL2),
+ SR(SYS_ICH_LR14_EL2),
+ SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+ int (*check)(int, const struct sr_def *, const char *))
+{
+ for (int i = 0; i < nr; i++) {
+ u64 val;
+ u64 attr;
+ int ret;
+
+ /* Assume MPIDR_EL1.Aff*=0 */
+ attr = PACK_SR(sr[i].encoding);
+
+ /*
+ * The API is braindead. A register can be advertised as
+ * available, and yet not be readable or writable.
+ * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+ * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+ *
+ * On the bright side, no known HW is implementing more than
+ * 5 bits of priority, so we're safe. Sort of...
+ */
+ ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr);
+ TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+ /* Check that we can write back what we read */
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+ ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+ }
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICC_CTLR_EL1), &val);
+ TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+ pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICC_AP0R1_EL1:
+ case SYS_ICC_AP1R1_EL1:
+ if (get_ctlr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICC_AP0R2_EL1:
+ case SYS_ICC_AP0R3_EL1:
+ case SYS_ICC_AP1R2_EL1:
+ case SYS_ICC_AP1R3_EL1:
+ if (get_ctlr_pribits(gic) == 7)
+ return 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICH_VTR_EL2), &val);
+ TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+ pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICH_AP0R1_EL2:
+ case SYS_ICH_AP1R1_EL2:
+ if (get_vtr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICH_AP0R2_EL2:
+ case SYS_ICH_AP0R3_EL2:
+ case SYS_ICH_AP1R2_EL2:
+ case SYS_ICH_AP1R3_EL2:
+ if (get_vtr_pribits(gic) == 7)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+ struct kvm_vcpu_init init = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u32 feat = 0;
+ int gic;
+
+ if (kvm_check_cap(KVM_CAP_ARM_EL2))
+ feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ vm = vm_create(1);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= feat;
+
+ vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+ TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+ gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ TEST_ASSERT(gic >= 0, "No GIC???");
+
+ kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+ if (feat)
+ test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+ else
+ pr_info("SKIP EL2 registers, not available\n");
+
+ close(gic);
+ kvm_vm_free(vm);
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ test_vcpus_then_vgic(gic_dev_type);
+ test_vgic_then_vcpus(gic_dev_type);
+
+ if (VGIC_DEV_IS_V2(gic_dev_type))
+ test_v2_uaccess_cpuif_no_vcpus();
+
+ if (VGIC_DEV_IS_V3(gic_dev_type)) {
+ test_v3_new_redist_regions();
+ test_v3_typer_accesses();
+ test_v3_last_bit_redist_regions();
+ test_v3_last_bit_single_rdist();
+ test_v3_redist_ipa_range_check_at_vcpu_run();
+ test_v3_its_region();
+ test_v3_sysregs();
+ test_v3_nassgicap();
+ }
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+ int cnt_impl = 0;
+
+ test_disable_default_vgic();
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (!ret) {
+ pr_info("Running GIC_v3 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+ cnt_impl++;
+ }
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!ret) {
+ pr_info("Running GIC_v2 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+ cnt_impl++;
+ }
+
+ if (!cnt_impl) {
+ print_skip("No GICv2 nor GICv3 support");
+ exit(KSFT_SKIP);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
new file mode 100644
index 000000000000..2fb2c7939fe9
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -0,0 +1,1084 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ uint32_t shared_data;
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+ local_irq_enable();
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /*
+ * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /*
+ * In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
+ gic_set_eoi(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
+}
+
+static void guest_code(struct test_args *args)
+{
+ uint32_t i, nr_irqs = args->nr_irqs;
+ bool level_sensitive = args->level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !level_sensitive);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ gic_set_eoi_split(args->eoi_split);
+
+ reset_priorities(args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(args, inject_fns, f) {
+ test_injection(args, f);
+ test_preemption(args, f);
+ test_injection_failure(args, f);
+ }
+
+ /*
+ * Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(args, set_active_fns, f)
+ test_restore_active(args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+ if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ struct kvm_vcpu *vcpu,
+ bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ fd[f] = kvm_new_eventfd();
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ assert(i <= (uint64_t)UINT_MAX);
+ kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ __KVM_SYSCALL_ERROR("write()", ret));
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ kvm_close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, vcpu);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+ vm_vaddr_t args_gva;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpu, 1, args_gva);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ uint32_t intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ vm_vaddr_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ test_disable_default_vgic();
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi_non_negative("Number of IRQs", optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /*
+ * If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
new file mode 100644
index 000000000000..e857a605f577
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX 1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int its_fd;
+
+static struct test_data {
+ bool request_vcpus_stop;
+ u32 nr_cpus;
+ u32 nr_devices;
+ u32 nr_event_ids;
+
+ vm_paddr_t device_table;
+ vm_paddr_t collection_table;
+ vm_paddr_t cmdq_base;
+ void *cmdq_base_va;
+ vm_paddr_t itt_tables;
+
+ vm_paddr_t lpi_prop_table;
+ vm_paddr_t lpi_pend_tables;
+} test_data = {
+ .nr_cpus = 1,
+ .nr_devices = 1,
+ .nr_event_ids = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ u32 intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+ gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+ u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+ u32 nr_events = test_data.nr_event_ids;
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+
+ for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+ its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+ /* Round-robin the LPIs to all of the vCPUs in the VM */
+ coll_id = 0;
+ for (device_id = 0; device_id < nr_devices; device_id++) {
+ vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+ its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+ itt_base, SZ_64K, true);
+
+ for (event_id = 0; event_id < nr_events; event_id++) {
+ its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+ event_id, coll_id, intid++);
+
+ coll_id = (coll_id + 1) % test_data.nr_cpus;
+ }
+ }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+ int i;
+
+ for (i = 0; i < test_data.nr_cpus; i++)
+ its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+ static atomic_int nr_cpus_ready = 0;
+ u32 cpuid = guest_get_vcpuid();
+
+ gic_init(GIC_V3, test_data.nr_cpus);
+ gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+ test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+ atomic_fetch_add(&nr_cpus_ready, 1);
+
+ if (cpuid > 0)
+ return;
+
+ while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+ cpu_relax();
+
+ its_init(test_data.collection_table, SZ_64K,
+ test_data.device_table, SZ_64K,
+ test_data.cmdq_base, SZ_64K);
+
+ guest_setup_its_mappings();
+ guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
+}
+
+static void guest_code(size_t nr_lpis)
+{
+ guest_setup_gic();
+ local_irq_enable();
+
+ GUEST_SYNC(0);
+
+ /*
+ * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+ * never getting the stop signal.
+ */
+ while (!READ_ONCE(test_data.request_vcpus_stop))
+ cpu_relax();
+
+ GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+ size_t pages;
+ size_t sz;
+
+ /*
+ * For the ITS:
+ * - A single level device table
+ * - A single level collection table
+ * - The command queue
+ * - An ITT for each device
+ */
+ sz = (3 + test_data.nr_devices) * SZ_64K;
+
+ /*
+ * For the redistributors:
+ * - A shared LPI configuration table
+ * - An LPI pending table for each vCPU
+ */
+ sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+ pages = sz / vm->page_size;
+ gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+ TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO 0xa0
+
+static void configure_lpis(void)
+{
+ size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+ u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+ size_t i;
+
+ for (i = 0; i < nr_lpis; i++) {
+ tbl[i] = LPI_PROP_DEFAULT_PRIO |
+ LPI_PROP_GROUP1 |
+ LPI_PROP_ENABLED;
+ }
+}
+
+static void setup_test_data(void)
+{
+ size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+ vm_paddr_t cmdq_base;
+
+ test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+ TEST_MEMSLOT_INDEX);
+ virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+ test_data.cmdq_base = cmdq_base;
+ test_data.cmdq_base_va = (void *)cmdq_base;
+
+ test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base, TEST_MEMSLOT_INDEX);
+ configure_lpis();
+
+ test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+ its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+ vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+ struct kvm_msi msi = {
+ .address_lo = db_addr,
+ .address_hi = db_addr >> 32,
+ .data = event_id,
+ .devid = device_id,
+ .flags = KVM_MSI_VALID_DEVID,
+ };
+
+ /*
+ * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+ * which for arm64 implies having a valid translation in the ITS.
+ */
+ TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+ "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+ u32 device_id = (size_t)data;
+ u32 event_id;
+ size_t i;
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ for (i = 0; i < nr_iterations; i++)
+ for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+ signal_lpi(device_id, event_id);
+
+ return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pthread_barrier_wait(&test_setup_barrier);
+ continue;
+ case UCALL_DONE:
+ return NULL;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+ double nr_lpis;
+ double time;
+
+ nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+ time = delta.tv_sec;
+ time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+ pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_vcpus = test_data.nr_cpus;
+ pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+ pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+ struct timespec start, delta;
+ size_t i;
+
+ TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+ pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_join(lpi_threads[i], NULL);
+
+ delta = timespec_elapsed(start);
+ write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+ int i;
+
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ for (i = 0; i < test_data.nr_cpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ setup_memslot();
+
+ setup_gic();
+
+ setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+ close(its_fd);
+ kvm_vm_free(vm);
+ free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+ pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+ pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+ pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+ pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+ pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+ u32 nr_threads;
+ int c;
+
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
+ while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+ switch (c) {
+ case 'v':
+ test_data.nr_cpus = atoi(optarg);
+ break;
+ case 'd':
+ test_data.nr_devices = atoi(optarg);
+ break;
+ case 'e':
+ test_data.nr_event_ids = atoi(optarg);
+ break;
+ case 'i':
+ nr_iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'h':
+ default:
+ pr_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ nr_threads = test_data.nr_cpus + test_data.nr_devices;
+ if (nr_threads > get_nprocs())
+ pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+ nr_threads, get_nprocs());
+
+ setup_vm();
+
+ run_test();
+
+ destroy_vm();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
new file mode 100644
index 000000000000..ae36325c022f
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX 31
+
+struct vpmu_vm {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+ uint64_t set_reg_id;
+ uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+ return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+ uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+ return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevcntr_el0);
+ isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevtyper_el0);
+ isb();
+}
+
+static void pmu_disable_reset(void)
+{
+ uint64_t pmcr = read_sysreg(pmcr_el0);
+
+ /* Reset all counters, disabling them */
+ pmcr &= ~ARMV8_PMU_PMCR_E;
+ write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+ isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+ isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, READ_PMEVTYPERN);
+ return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+ isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+ /* A function to be used to read PMEVTCNTR<n>_EL0 */
+ unsigned long (*read_cntr)(int idx);
+ /* A function to be used to write PMEVTCNTR<n>_EL0 */
+ void (*write_cntr)(int idx, unsigned long val);
+ /* A function to be used to read PMEVTYPER<n>_EL0 */
+ unsigned long (*read_typer)(int idx);
+ /* A function to be used to write PMEVTYPER<n>_EL0 */
+ void (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+ /* test with all direct accesses */
+ { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+ /* test with all indirect accesses */
+ { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+ /* read with direct accesses, and write with indirect accesses */
+ { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+ /* read with indirect accesses, and write with direct accesses */
+ { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
+{ \
+ uint64_t _tval = read_sysreg(regname); \
+ \
+ if (set_expected) \
+ __GUEST_ASSERT((_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+ else \
+ __GUEST_ASSERT(!(_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+ GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+ uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+ bool set_expected = false;
+
+ if (set_op) {
+ write_sysreg(test_bit, pmcntenset_el0);
+ write_sysreg(test_bit, pmintenset_el1);
+ write_sysreg(test_bit, pmovsset_el0);
+
+ /* The bit will be set only if the counter is implemented */
+ pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+ set_expected = (pmc_idx < pmcr_n) ? true : false;
+ } else {
+ write_sysreg(test_bit, pmcntenclr_el0);
+ write_sysreg(test_bit, pmintenclr_el1);
+ write_sysreg(test_bit, pmovsclr_el0);
+ }
+ check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ uint64_t write_data, read_data;
+
+ /* Disable all PMCs and reset all PMCs to zero. */
+ pmu_disable_reset();
+
+ /*
+ * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+ */
+
+ /* Make sure that the bit in those registers are set to 0 */
+ test_bitmap_pmu_regs(pmc_idx, false);
+ /* Test if setting the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, true);
+ /* Test if clearing the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, false);
+
+ /*
+ * Tests for reading/writing the event type register.
+ */
+
+ /*
+ * Set the event type register to an arbitrary value just for testing
+ * of reading/writing the register.
+ * Arm ARM says that for the event from 0x0000 to 0x003F,
+ * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+ * the value written to the field even when the specified event
+ * is not supported.
+ */
+ write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+ acc->write_typer(pmc_idx, write_data);
+ read_data = acc->read_typer(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+ /*
+ * Tests for reading/writing the event count register.
+ */
+
+ read_data = acc->read_cntr(pmc_idx);
+
+ /* The count value must be 0, as it is disabled and reset */
+ __GUEST_ASSERT(read_data == 0,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+ write_data = read_data + pmc_idx + 0x12345;
+ acc->write_cntr(pmc_idx, write_data);
+ read_data = acc->read_cntr(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+ uint64_t esr, ec;
+
+ esr = read_sysreg(esr_el1);
+ ec = ESR_ELx_EC(esr);
+
+ __GUEST_ASSERT(expected_ec == ec,
+ "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+ regs->pc, esr, ec, expected_ec);
+
+ /* skip the trapping instruction */
+ regs->pc += 4;
+
+ /* Use INVALID_EC to indicate an exception occurred */
+ expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops) \
+({ \
+ GUEST_ASSERT(ec != INVALID_EC); \
+ WRITE_ONCE(expected_ec, ec); \
+ dsb(ish); \
+ ops; \
+ GUEST_ASSERT(expected_ec == INVALID_EC); \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ /*
+ * Reading/writing the event count/type registers should cause
+ * an UNDEFINED exception.
+ */
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
+ TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+ /*
+ * The bit corresponding to the (unimplemented) counter in
+ * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+ */
+ test_bitmap_pmu_regs(pmc_idx, 1);
+ test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+ uint64_t pmcr, pmcr_n, unimp_mask;
+ int i, pmc;
+
+ __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+ "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
+ expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+ pmcr = read_sysreg(pmcr_el0);
+ pmcr_n = get_pmcr_n(pmcr);
+
+ /* Make sure that PMCR_EL0.N indicates the value userspace set */
+ __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+ "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+ expected_pmcr_n, pmcr_n);
+
+ /*
+ * Make sure that (RAZ) bits corresponding to unimplemented event
+ * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+ * to zero.
+ * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+ */
+ unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+ check_bitmap_pmu_regs(unimp_mask, false);
+
+ /*
+ * Tests for reading/writing PMU registers for implemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = 0; pmc < pmcr_n; pmc++)
+ test_access_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ /*
+ * Tests for reading/writing PMU registers for unimplemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+ test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ GUEST_DONE();
+}
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+ struct kvm_vcpu_init init;
+ uint8_t pmuver, ec;
+ uint64_t dfr0, irq = 23;
+ struct kvm_device_attr irq_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+ .addr = (uint64_t)&irq,
+ };
+
+ /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+ memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+ vpmu_vm.vm = vm_create(1);
+ vm_init_descriptor_tables(vpmu_vm.vm);
+ for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
+ vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+ guest_sync_handler);
+ }
+
+ /* Create vCPU with PMUv3 */
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+ vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+
+ kvm_arch_vm_finalize_vcpus(vpmu_vm.vm);
+
+ /* Make sure that PMUv3 support is indicated in the ID register */
+ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+ pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0);
+ TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+ pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+ "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+ kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vcpu, 1, pmcr_n);
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned int prev;
+ int ret;
+
+ create_vpmu_vm(guest_code);
+ vcpu = vpmu_vm.vcpu;
+
+ prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)));
+
+ ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters);
+
+ if (expect_fail)
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded",
+ nr_counters, prev);
+ else
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL);
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+ uint64_t sp;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_init init;
+
+ pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ /* Save the initial sp to restore them later to run the guest again */
+ sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1));
+
+ run_vcpu(vcpu, pmcr_n);
+
+ /*
+ * Reset and re-initialize the vCPU, and run the guest code again to
+ * check if PMCR_EL0.N is preserved.
+ */
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ aarch64_vcpu_setup(vcpu, &init);
+ vcpu_init_descriptor_tables(vcpu);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+ PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+ PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+ PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+ uint64_t set_reg_id, clr_reg_id, reg_val;
+ uint64_t valid_counters_mask, max_counters_mask;
+
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ valid_counters_mask = get_counters_mask(pmcr_n);
+ max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+ for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+ set_reg_id = validity_check_reg_sets[i].set_reg_id;
+ clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+ /*
+ * Test if the 'set' and 'clr' variants of the registers
+ * are initialized based on the number of valid counters.
+ */
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+ /*
+ * Using the 'set' variant, force-set the register to the
+ * max number of possible counters and test if KVM discards
+ * the bits for unimplemented counters as it should.
+ */
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+ }
+
+ destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+ pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, true);
+ destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+ uint64_t pmcr;
+
+ create_vpmu_vm(guest_code);
+ pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ destroy_vpmu_vm();
+ return get_pmcr_n(pmcr);
+}
+
+static bool kvm_supports_nr_counters_attr(void)
+{
+ bool supported;
+
+ create_vpmu_vm(NULL);
+ supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS);
+ destroy_vpmu_vm();
+
+ return supported;
+}
+
+int main(void)
+{
+ uint64_t i, pmcr_n;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ TEST_REQUIRE(kvm_supports_nr_counters_attr());
+
+ pmcr_n = get_pmcr_n_limit();
+ for (i = 0; i <= pmcr_n; i++) {
+ run_access_test(i);
+ run_pmregs_validity_test(i);
+ }
+
+ for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ run_error_test(i);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c
new file mode 100644
index 000000000000..60cb25454899
--- /dev/null
+++ b/tools/testing/selftests/kvm/coalesced_io_test.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/sizes.h>
+
+#include <kvm_util.h>
+#include <processor.h>
+
+#include "ucall_common.h"
+
+struct kvm_coalesced_io {
+ struct kvm_coalesced_mmio_ring *ring;
+ uint32_t ring_size;
+ uint64_t mmio_gpa;
+ uint64_t *mmio;
+
+ /*
+ * x86-only, but define pio_port for all architectures to minimize the
+ * amount of #ifdeffery and complexity, without having to sacrifice
+ * verbose error messages.
+ */
+ uint8_t pio_port;
+};
+
+static struct kvm_coalesced_io kvm_builtin_io_ring;
+
+#ifdef __x86_64__
+static const int has_pio = 1;
+#else
+static const int has_pio = 0;
+#endif
+
+static void guest_code(struct kvm_coalesced_io *io)
+{
+ int i, j;
+
+ for (;;) {
+ for (j = 0; j < 1 + has_pio; j++) {
+ /*
+ * KVM always leaves one free entry, i.e. exits to
+ * userspace before the last entry is filled.
+ */
+ for (i = 0; i < io->ring_size - 1; i++) {
+#ifdef __x86_64__
+ if (i & 1)
+ outl(io->pio_port, io->pio_port + i);
+ else
+#endif
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+ }
+#ifdef __x86_64__
+ if (j & 1)
+ outl(io->pio_port, io->pio_port + i);
+ else
+#endif
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+ }
+ GUEST_SYNC(0);
+
+ WRITE_ONCE(*io->mmio, io->mmio_gpa + i);
+#ifdef __x86_64__
+ outl(io->pio_port, io->pio_port + i);
+#endif
+ }
+}
+
+static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io,
+ uint32_t ring_start,
+ uint32_t expected_exit)
+{
+ const bool want_pio = expected_exit == KVM_EXIT_IO;
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+ struct kvm_run *run = vcpu->run;
+ uint32_t pio_value;
+
+ WRITE_ONCE(ring->first, ring_start);
+ WRITE_ONCE(ring->last, ring_start);
+
+ vcpu_run(vcpu);
+
+ /*
+ * Annoyingly, reading PIO data is safe only for PIO exits, otherwise
+ * data_offset is garbage, e.g. an MMIO gpa.
+ */
+ if (run->exit_reason == KVM_EXIT_IO)
+ pio_value = *(uint32_t *)((void *)run + run->io.data_offset);
+ else
+ pio_value = 0;
+
+ TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write &&
+ run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 &&
+ *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) ||
+ (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port &&
+ run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 &&
+ pio_value == io->pio_port + io->ring_size - 1)),
+ "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n "
+ "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n "
+ "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x",
+ ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO",
+ want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa,
+ (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason,
+ run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other",
+ run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data,
+ run->io.port, run->io.direction, run->io.size, run->io.count, pio_value);
+}
+
+static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io,
+ uint32_t ring_start,
+ uint32_t expected_exit)
+{
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+ int i;
+
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit);
+
+ TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first,
+ "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u",
+ ring->first, ring->last, io->ring_size, ring_start);
+
+ for (i = 0; i < io->ring_size - 1; i++) {
+ uint32_t idx = (ring->first + i) % io->ring_size;
+ struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx];
+
+#ifdef __x86_64__
+ if (i & 1)
+ TEST_ASSERT(entry->phys_addr == io->pio_port &&
+ entry->len == 4 && entry->pio &&
+ *(uint32_t *)entry->data == io->pio_port + i,
+ "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x",
+ io->pio_port, io->pio_port + i, i,
+ entry->len, entry->pio ? "PIO" : "MMIO",
+ entry->phys_addr, *(uint32_t *)entry->data);
+ else
+#endif
+ TEST_ASSERT(entry->phys_addr == io->mmio_gpa &&
+ entry->len == 8 && !entry->pio,
+ "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx",
+ io->mmio_gpa, io->mmio_gpa + i, i,
+ entry->len, entry->pio ? "PIO" : "MMIO",
+ entry->phys_addr, *(uint64_t *)entry->data);
+ }
+}
+
+static void test_coalesced_io(struct kvm_vcpu *vcpu,
+ struct kvm_coalesced_io *io, uint32_t ring_start)
+{
+ struct kvm_coalesced_mmio_ring *ring = io->ring;
+
+ kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
+#ifdef __x86_64__
+ kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
+#endif
+
+ vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO);
+#ifdef __x86_64__
+ vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO);
+#endif
+
+ /*
+ * Verify ucall, which may use non-coalesced MMIO or PIO, generates an
+ * immediate exit.
+ */
+ WRITE_ONCE(ring->first, ring_start);
+ WRITE_ONCE(ring->last, ring_start);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+ TEST_ASSERT_EQ(ring->first, ring_start);
+ TEST_ASSERT_EQ(ring->last, ring_start);
+
+ /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */
+ kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */);
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO);
+
+#ifdef __x86_64__
+ kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */);
+ vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO);
+#endif
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO));
+
+#ifdef __x86_64__
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO));
+#endif
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ kvm_builtin_io_ring = (struct kvm_coalesced_io) {
+ /*
+ * The I/O ring is a kernel-allocated page whose address is
+ * relative to each vCPU's run page, with the page offset
+ * provided by KVM in the return of KVM_CAP_COALESCED_MMIO.
+ */
+ .ring = (void *)vcpu->run +
+ (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()),
+
+ /*
+ * The size of the I/O ring is fixed, but KVM defines the sized
+ * based on the kernel's PAGE_SIZE. Thus, userspace must query
+ * the host's page size at runtime to compute the ring size.
+ */
+ .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) /
+ sizeof(struct kvm_coalesced_mmio),
+
+ /*
+ * Arbitrary address+port (MMIO mustn't overlap memslots), with
+ * the MMIO GPA identity mapped in the guest.
+ */
+ .mmio_gpa = 4ull * SZ_1G,
+ .mmio = (uint64_t *)(4ull * SZ_1G),
+ .pio_port = 0x80,
+ };
+
+ virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1);
+
+ sync_global_to_guest(vm, kvm_builtin_io_ring);
+ vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring);
+
+ for (i = 0; i < kvm_builtin_io_ring.ring_size; i++)
+ test_coalesced_io(vcpu, &kvm_builtin_io_ring, i);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 63ed533f73d6..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,3 +1,6 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
+CONFIG_USERFAULTFD=y
+CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 5f7a229c3af1..0202b78f8680 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -6,398 +6,260 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
-#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
#include "kvm_util.h"
#include "test_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
+#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd
-#ifdef PRINT_PER_PAGE_UPDATES
-#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
-#ifdef PRINT_PER_VCPU_UPDATES
-#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static size_t demand_paging_size;
static char *guest_data_prototype;
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
- int ret;
- struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
- int vcpu_id = vcpu_args->vcpu_id;
- struct kvm_vm *vm = perf_test_args.vm;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ struct kvm_run *run = vcpu->run;
struct timespec start;
struct timespec ts_diff;
-
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
- run = vcpu_state(vm, vcpu_id);
+ int ret;
clock_gettime(CLOCK_MONOTONIC, &start);
/* Let the guest access its memory */
- ret = _vcpu_run(vm, vcpu_id);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+ ret = _vcpu_run(vcpu);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
TEST_ASSERT(false,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
}
ts_diff = timespec_elapsed(start);
- PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+ PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
ts_diff.tv_sec, ts_diff.tv_nsec);
-
- return NULL;
}
-static int handle_uffd_page_request(int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd,
+ struct uffd_msg *msg)
{
- pid_t tid;
+ pid_t tid = syscall(__NR_gettid);
+ uint64_t addr = msg->arg.pagefault.address;
struct timespec start;
struct timespec ts_diff;
- struct uffdio_copy copy;
int r;
- tid = syscall(__NR_gettid);
-
- copy.src = (uint64_t)guest_data_prototype;
- copy.dst = addr;
- copy.len = perf_test_args.host_page_size;
- copy.mode = 0;
-
clock_gettime(CLOCK_MONOTONIC, &start);
- r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
- addr, tid, errno);
- return r;
+ if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
+ struct uffdio_copy copy;
+
+ copy.src = (uint64_t)guest_data_prototype;
+ copy.dst = addr;
+ copy.len = demand_paging_size;
+ copy.mode = 0;
+
+ r = ioctl(uffd, UFFDIO_COPY, &copy);
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ struct uffdio_continue cont = {0};
+
+ cont.range.start = addr;
+ cont.range.len = demand_paging_size;
+
+ r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n",
+ addr, tid, errno);
+ return r;
+ }
+ } else {
+ TEST_FAIL("Invalid uffd mode %d", uffd_mode);
}
ts_diff = timespec_elapsed(start);
- PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+ PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
timespec_to_ns(ts_diff));
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
- perf_test_args.host_page_size, addr, tid);
+ demand_paging_size, addr, tid);
return 0;
}
-bool quit_uffd_thread;
-
-struct uffd_handler_args {
- int uffd;
- int pipefd;
- useconds_t delay;
+struct test_params {
+ int uffd_mode;
+ bool single_uffd;
+ useconds_t uffd_delay;
+ int readers_per_uffd;
+ enum vm_mem_backing_src_type src_type;
+ bool partition_vcpu_memory_access;
};
-static void *uffd_handler_thread_fn(void *arg)
+static void prefault_mem(void *alias, uint64_t len)
{
- struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
- int uffd = uffd_args->uffd;
- int pipefd = uffd_args->pipefd;
- useconds_t delay = uffd_args->delay;
- int64_t pages = 0;
- struct timespec start;
- struct timespec ts_diff;
+ size_t p;
- clock_gettime(CLOCK_MONOTONIC, &start);
- while (!quit_uffd_thread) {
- struct uffd_msg msg;
- struct pollfd pollfd[2];
- char tmp_chr;
- int r;
- uint64_t addr;
-
- pollfd[0].fd = uffd;
- pollfd[0].events = POLLIN;
- pollfd[1].fd = pipefd;
- pollfd[1].events = POLLIN;
-
- r = poll(pollfd, 2, -1);
- switch (r) {
- case -1:
- pr_info("poll err");
- continue;
- case 0:
- continue;
- case 1:
- break;
- default:
- pr_info("Polling uffd returned %d", r);
- return NULL;
- }
-
- if (pollfd[0].revents & POLLERR) {
- pr_info("uffd revents has POLLERR");
- return NULL;
- }
-
- if (pollfd[1].revents & POLLIN) {
- r = read(pollfd[1].fd, &tmp_chr, 1);
- TEST_ASSERT(r == 1,
- "Error reading pipefd in UFFD thread\n");
- return NULL;
- }
-
- if (!pollfd[0].revents & POLLIN)
- continue;
-
- r = read(uffd, &msg, sizeof(msg));
- if (r == -1) {
- if (errno == EAGAIN)
- continue;
- pr_info("Read of uffd gor errno %d", errno);
- return NULL;
- }
-
- if (r != sizeof(msg)) {
- pr_info("Read on uffd returned unexpected size: %d bytes", r);
- return NULL;
- }
-
- if (!(msg.event & UFFD_EVENT_PAGEFAULT))
- continue;
-
- if (delay)
- usleep(delay);
- addr = msg.arg.pagefault.address;
- r = handle_uffd_page_request(uffd, addr);
- if (r < 0)
- return NULL;
- pages++;
+ TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+ for (p = 0; p < (len / demand_paging_size); ++p) {
+ memcpy(alias + (p * demand_paging_size),
+ guest_data_prototype, demand_paging_size);
}
-
- ts_diff = timespec_elapsed(start);
- PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
- pages, ts_diff.tv_sec, ts_diff.tv_nsec,
- pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
-
- return NULL;
}
-static int setup_demand_paging(struct kvm_vm *vm,
- pthread_t *uffd_handler_thread, int pipefd,
- useconds_t uffd_delay,
- struct uffd_handler_args *uffd_args,
- void *hva, uint64_t len)
-{
- int uffd;
- struct uffdio_api uffdio_api;
- struct uffdio_register uffdio_register;
-
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1) {
- pr_info("uffd creation failed\n");
- return -1;
- }
-
- uffdio_api.api = UFFD_API;
- uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
- pr_info("ioctl uffdio_api failed\n");
- return -1;
- }
-
- uffdio_register.range.start = (uint64_t)hva;
- uffdio_register.range.len = len;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
- pr_info("ioctl uffdio_register failed\n");
- return -1;
- }
-
- if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
- UFFD_API_RANGE_IOCTLS) {
- pr_info("unexpected userfaultfd ioctl set\n");
- return -1;
- }
-
- uffd_args->uffd = uffd;
- uffd_args->pipefd = pipefd;
- uffd_args->delay = uffd_delay;
- pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
- uffd_args);
-
- PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
- hva, hva + len);
-
- return 0;
-}
-
-struct test_params {
- bool use_uffd;
- useconds_t uffd_delay;
- bool partition_vcpu_memory_access;
-};
-
static void run_test(enum vm_guest_mode mode, void *arg)
{
+ struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg;
- pthread_t *vcpu_threads;
- pthread_t *uffd_handler_threads = NULL;
- struct uffd_handler_args *uffd_args = NULL;
+ struct uffd_desc **uffd_descs = NULL;
+ uint64_t uffd_region_size;
struct timespec start;
struct timespec ts_diff;
- int *pipefds = NULL;
+ double vcpu_paging_rate;
struct kvm_vm *vm;
- int vcpu_id;
- int r;
+ int i, num_uffds = 0;
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- VM_MEM_SRC_ANONYMOUS);
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+ p->src_type, p->partition_vcpu_memory_access);
- perf_test_args.wr_fract = 1;
+ demand_paging_size = get_backing_src_pagesz(p->src_type);
- guest_data_prototype = malloc(perf_test_args.host_page_size);
+ guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern");
- memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
-
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
-
- if (p->use_uffd) {
- uffd_handler_threads =
- malloc(nr_vcpus * sizeof(*uffd_handler_threads));
- TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
-
- uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
- TEST_ASSERT(uffd_args, "Memory allocation failed");
+ memset(guest_data_prototype, 0xAB, demand_paging_size);
+
+ if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ for (i = 0; i < num_uffds; i++) {
+ vcpu_args = &memstress_args.vcpu_args[i];
+ prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
+ vcpu_args->pages * memstress_args.guest_page_size);
+ }
+ }
- pipefds = malloc(sizeof(int) * nr_vcpus * 2);
- TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+ if (p->uffd_mode) {
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vm_paddr_t vcpu_gpa;
+ uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *));
+ TEST_ASSERT(uffd_descs, "Memory allocation failed");
+ for (i = 0; i < num_uffds; i++) {
+ struct memstress_vcpu_args *vcpu_args;
void *vcpu_hva;
- uint64_t vcpu_mem_size;
-
- if (p->partition_vcpu_memory_access) {
- vcpu_gpa = guest_test_phys_mem +
- (vcpu_id * guest_percpu_mem_size);
- vcpu_mem_size = guest_percpu_mem_size;
- } else {
- vcpu_gpa = guest_test_phys_mem;
- vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
- }
- PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
-
- /* Cache the HVA pointer of the region */
- vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+ vcpu_args = &memstress_args.vcpu_args[i];
+ /* Cache the host addresses of the region */
+ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
/*
* Set up user fault fd to handle demand paging
* requests.
*/
- r = pipe2(&pipefds[vcpu_id * 2],
- O_CLOEXEC | O_NONBLOCK);
- TEST_ASSERT(!r, "Failed to set up pipefd");
-
- r = setup_demand_paging(vm,
- &uffd_handler_threads[vcpu_id],
- pipefds[vcpu_id * 2],
- p->uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, vcpu_mem_size);
- if (r < 0)
- exit(-r);
+ uffd_descs[i] = uffd_setup_demand_paging(
+ p->uffd_mode, p->uffd_delay, vcpu_hva,
+ uffd_region_size,
+ p->readers_per_uffd,
+ &handle_uffd_page_request);
}
}
- /* Export the shared variables to the guest */
- sync_global_to_guest(vm, perf_test_args);
-
pr_info("Finished creating vCPUs and starting uffd threads\n");
clock_gettime(CLOCK_MONOTONIC, &start);
-
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
- }
-
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
pr_info("Started all vCPUs\n");
- /* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- pthread_join(vcpu_threads[vcpu_id], NULL);
- PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
- }
-
+ memstress_join_vcpu_threads(nr_vcpus);
ts_diff = timespec_elapsed(start);
-
pr_info("All vCPU threads joined\n");
- if (p->use_uffd) {
- char c;
-
+ if (p->uffd_mode) {
/* Tell the user fault fd handler threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
- TEST_ASSERT(r == 1, "Unable to write to pipefd");
-
- pthread_join(uffd_handler_threads[vcpu_id], NULL);
- }
+ for (i = 0; i < num_uffds; i++)
+ uffd_stop_demand_paging(uffd_descs[i]);
}
- pr_info("Total guest execution time: %ld.%.9lds\n",
+ pr_info("Total guest execution time:\t%ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
- pr_info("Overall demand paging rate: %f pgs/sec\n",
- perf_test_args.vcpu_args[0].pages * nr_vcpus /
- ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
- perf_test_destroy_vm(vm);
+ vcpu_paging_rate = memstress_args.vcpu_args[0].pages /
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC);
+ pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n",
+ vcpu_paging_rate);
+ pr_info("Overall demand paging rate:\t%f pgs/sec\n",
+ vcpu_paging_rate * nr_vcpus);
+
+ memstress_destroy_vm(vm);
free(guest_data_prototype);
- free(vcpu_threads);
- if (p->use_uffd) {
- free(uffd_handler_threads);
- free(uffd_args);
- free(pipefds);
- }
+ if (p->uffd_mode)
+ free(uffd_descs);
}
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
- " [-b memory] [-v vcpus] [-o]\n", name);
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
guest_modes_help();
- printf(" -u: use User Fault FD to handle vCPU page\n"
- " faults.\n");
+ printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
+ " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
+ kvm_print_vcpu_pinning_help();
+ printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n"
+ " creating one for each region paged by a unique vCPU\n"
+ " Set implicitly with -o, and no effect without -u.\n");
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
+ printf(" -r: Set the number of reader threads per uffd.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
+ backing_src_help("-s");
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -408,20 +270,31 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ const char *cpulist = NULL;
struct test_params p = {
+ .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
+ .readers_per_uffd = 1,
+ .single_uffd = false,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
break;
case 'u':
- p.use_uffd = true;
+ if (!strcmp("MISSING", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+ else if (!strcmp("MINOR", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
+ TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
+ break;
+ case 'a':
+ p.single_uffd = true;
break;
case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0);
@@ -430,13 +303,26 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
+ case 's':
+ p.src_type = parse_backing_src_type(optarg);
+ break;
case 'v':
- nr_vcpus = atoi(optarg);
- TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
+ case 'c':
+ cpulist = optarg;
+ break;
case 'o':
p.partition_vcpu_memory_access = false;
+ p.single_uffd = true;
+ break;
+ case 'r':
+ p.readers_per_uffd = atoi(optarg);
+ TEST_ASSERT(p.readers_per_uffd >= 1,
+ "Invalid number of readers per uffd %d: must be >=1",
+ p.readers_per_uffd);
break;
case 'h':
default:
@@ -445,6 +331,17 @@ int main(int argc, char *argv[])
}
}
+ if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
+ !backing_src_is_shared(p.src_type)) {
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
+ }
+
+ if (cpulist) {
+ kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu,
+ nr_vcpus);
+ memstress_args.pin_vcpus = true;
+ }
+
for_each_guest_mode(run_test, &p);
return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 04a2641261be..0a1ea1d1e2d8 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -16,14 +16,16 @@
#include "kvm_util.h"
#include "test_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static bool run_vcpus_while_disabling_dirty_logging;
/* Host variables */
static u64 dirty_log_manual_caps;
@@ -31,128 +33,130 @@ static bool host_quit;
static int iteration;
static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
- int ret;
- struct kvm_vm *vm = perf_test_args.vm;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t pages_count = 0;
struct kvm_run *run;
struct timespec start;
struct timespec ts_diff;
struct timespec total = (struct timespec){0};
struct timespec avg;
- struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
- int vcpu_id = vcpu_args->vcpu_id;
+ int ret;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
- run = vcpu_state(vm, vcpu_id);
+ run = vcpu->run;
while (!READ_ONCE(host_quit)) {
int current_iteration = READ_ONCE(iteration);
clock_gettime(CLOCK_MONOTONIC, &start);
- ret = _vcpu_run(vm, vcpu_id);
+ ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
- "Invalid guest sync status: exit_reason=%s\n",
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
- pr_debug("Got sync event from vCPU %d\n", vcpu_id);
- vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
pr_debug("vCPU %d updated last completed iteration to %d\n",
- vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+ vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]);
if (current_iteration) {
pages_count += vcpu_args->pages;
total = timespec_add(total, ts_diff);
pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
- vcpu_id, current_iteration, ts_diff.tv_sec,
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
} else {
pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
- vcpu_id, current_iteration, ts_diff.tv_sec,
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
ts_diff.tv_nsec);
}
+ /*
+ * Keep running the guest while dirty logging is being disabled
+ * (iteration is negative) so that vCPUs are accessing memory
+ * for the entire duration of zapping collapsible SPTEs.
+ */
while (current_iteration == READ_ONCE(iteration) &&
- !READ_ONCE(host_quit)) {}
+ READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {}
}
- avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+ avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]);
pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
- vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+ vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx],
total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-
- return NULL;
}
struct test_params {
unsigned long iterations;
uint64_t phys_offset;
- int wr_fract;
bool partition_vcpu_memory_access;
enum vm_mem_backing_src_type backing_src;
+ int slots;
+ uint32_t write_percent;
+ bool random_access;
};
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
- pthread_t *vcpu_threads;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long **bitmaps;
uint64_t guest_num_pages;
uint64_t host_num_pages;
- int vcpu_id;
+ uint64_t pages_per_slot;
struct timespec start;
struct timespec ts_diff;
struct timespec get_dirty_log_total = (struct timespec){0};
struct timespec vcpu_dirty_total = (struct timespec){0};
struct timespec avg;
- struct kvm_enable_cap cap = {};
struct timespec clear_dirty_log_total = (struct timespec){0};
+ int i;
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- p->backing_src);
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ p->slots, p->backing_src,
+ p->partition_vcpu_memory_access);
- perf_test_args.wr_fract = p->wr_fract;
+ memstress_set_write_percent(vm, p->write_percent);
- guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+ guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- bmap = bitmap_alloc(host_num_pages);
-
- if (dirty_log_manual_caps) {
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = dirty_log_manual_caps;
- vm_enable_cap(vm, &cap);
- }
-
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+ pages_per_slot = host_num_pages / p->slots;
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
+ bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);
- sync_global_to_guest(vm, perf_test_args);
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
/* Start the iterations */
iteration = 0;
host_quit = false;
clock_gettime(CLOCK_MONOTONIC, &start);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vcpu_last_completed_iteration[vcpu_id] = -1;
-
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
- }
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ /*
+ * Use 100% writes during the population phase to ensure all
+ * memory is actually populated and not just mapped to the zero
+ * page. The prevents expensive copy-on-write faults from
+ * occurring during the dirty memory iterations below, which
+ * would pollute the performance results.
+ */
+ memstress_set_write_percent(vm, 100);
+ memstress_set_random_access(vm, false);
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
/* Allow the vCPUs to populate memory */
pr_debug("Starting iteration %d - Populating\n", iteration);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
iteration)
;
}
@@ -163,12 +167,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Enable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
- KVM_MEM_LOG_DIRTY_PAGES);
+ memstress_enable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
+ memstress_set_write_percent(vm, p->write_percent);
+ memstress_set_random_access(vm, p->random_access);
+
while (iteration < p->iterations) {
/*
* Incrementing the iteration number will start the vCPUs
@@ -178,8 +184,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration++;
pr_debug("Starting iteration %d\n", iteration);
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id])
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i])
!= iteration)
;
}
@@ -190,8 +196,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
- kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
-
+ memstress_get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
@@ -200,9 +205,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start);
- kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
- host_num_pages);
-
+ memstress_clear_dirty_log(vm, bitmaps, p->slots,
+ pages_per_slot);
ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
@@ -211,17 +215,28 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
}
+ /*
+ * Run vCPUs while dirty logging is being disabled to stress disabling
+ * in terms of both performance and correctness. Opt-in via command
+ * line as this significantly increases time to disable dirty logging.
+ */
+ if (run_vcpus_while_disabling_dirty_logging)
+ WRITE_ONCE(iteration, -1);
+
/* Disable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
+ memstress_disable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
- /* Tell the vcpu thread to quit */
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
host_quit = true;
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
+ memstress_join_vcpu_threads(nr_vcpus);
avg = timespec_div(get_dirty_log_total, p->iterations);
pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
@@ -235,35 +250,49 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
- free(bmap);
- free(vcpu_threads);
- perf_test_destroy_vm(vm);
+ memstress_free_bitmaps(bitmaps, p->slots);
+ memstress_destroy_vm(vm);
}
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-i iterations] [-p offset] "
- "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
+ printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "
+ "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
+ "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);
puts("");
+ printf(" -a: access memory randomly rather than in order.\n");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
TEST_HOST_LOOP_N);
+ printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
+ " makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"
+ " KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"
+ " and writes will be tracked as soon as dirty logging is\n"
+ " enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"
+ " is not enabled).\n");
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
+ printf(" -n: Run the vCPUs in nested mode (L2)\n");
+ printf(" -e: Run vCPUs while dirty logging is being disabled. This\n"
+ " can significantly increase runtime, especially if there\n"
+ " isn't a dedicated pCPU for the main thread.\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
- printf(" -f: specify the fraction of pages which should be written to\n"
- " as opposed to simply read, in the form\n"
- " 1/<fraction of pages to write>.\n"
- " (default: 1 i.e. all pages are written to.)\n");
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
- backing_src_help();
+ printf(" -r: specify the starting random seed.\n");
+ backing_src_help("-s");
+ printf(" -x: Split the memory region into this number of memslots.\n"
+ " (default: 1)\n");
+ printf(" -w: specify the percentage of pages which should be written to\n"
+ " as an integer from 0-100 inclusive. This is probabilistic,\n"
+ " so -w X means each page has an X%% chance of writing\n"
+ " and a (100-X)%% chance of reading.\n"
+ " (default: 100 i.e. all pages are written to.)\n");
+ kvm_print_vcpu_pinning_help();
puts("");
exit(0);
}
@@ -271,14 +300,19 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ const char *pcpu_list = NULL;
struct test_params p = {
.iterations = TEST_HOST_LOOP_N,
- .wr_fract = 1,
.partition_vcpu_memory_access = true,
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .slots = 1,
+ .write_percent = 100,
};
int opt;
+ /* Override the seed to be deterministic by default. */
+ guest_random_seed = 1;
+
dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
@@ -286,42 +320,73 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
switch (opt) {
- case 'i':
- p.iterations = atoi(optarg);
- break;
- case 'p':
- p.phys_offset = strtoull(optarg, NULL, 0);
- break;
- case 'm':
- guest_modes_cmdline(optarg);
+ case 'a':
+ p.random_access = true;
break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
- case 'f':
- p.wr_fract = atoi(optarg);
- TEST_ASSERT(p.wr_fract >= 1,
- "Write fraction cannot be less than one");
+ case 'c':
+ pcpu_list = optarg;
break;
- case 'v':
- nr_vcpus = atoi(optarg);
- TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
- "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ case 'e':
+ /* 'e' is for evil. */
+ run_vcpus_while_disabling_dirty_logging = true;
+ break;
+ case 'g':
+ dirty_log_manual_caps = 0;
+ break;
+ case 'h':
+ help(argv[0]);
+ break;
+ case 'i':
+ p.iterations = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'n':
+ memstress_args.nested = true;
break;
case 'o':
p.partition_vcpu_memory_access = false;
+ break;
+ case 'p':
+ p.phys_offset = strtoull(optarg, NULL, 0);
+ break;
+ case 'r':
+ guest_random_seed = atoi_positive("Random seed", optarg);
+ break;
case 's':
p.backing_src = parse_backing_src_type(optarg);
break;
- case 'h':
+ case 'v':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ break;
+ case 'w':
+ p.write_percent = atoi_non_negative("Write percentage", optarg);
+ TEST_ASSERT(p.write_percent <= 100,
+ "Write percentage must be between 0 and 100");
+ break;
+ case 'x':
+ p.slots = atoi_positive("Number of slots", optarg);
+ break;
default:
help(argv[0]);
break;
}
}
+ if (pcpu_list) {
+ kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu,
+ nr_vcpus);
+ memstress_args.pin_vcpus = true;
+ }
+
TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
pr_info("Test iterations: %"PRIu64"\n", p.iterations);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index bb2752d78fe3..d58a641b0e6a 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@@ -16,14 +13,17 @@
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <linux/atomic.h>
#include <asm/barrier.h>
#include "kvm_util.h"
#include "test_util.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
-#define VCPU_ID 1
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K 12
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
@@ -31,34 +31,37 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
# define test_bit_le(nr, addr) \
test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __set_bit_le(nr, addr) \
+ __set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __clear_bit_le(nr, addr) \
+ __clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_set_bit_le(nr, addr) \
+ __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_clear_bit_le(nr, addr) \
+ __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
#else
-# define test_bit_le test_bit
-# define set_bit_le set_bit
-# define clear_bit_le clear_bit
-# define test_and_set_bit_le test_and_set_bit
-# define test_and_clear_bit_le test_and_clear_bit
+# define test_bit_le test_bit
+# define __set_bit_le __set_bit
+# define __clear_bit_le __clear_bit
+# define __test_and_set_bit_le __test_and_set_bit
+# define __test_and_clear_bit_le __test_and_clear_bit
#endif
#define TEST_DIRTY_RING_COUNT 65536
@@ -74,8 +77,9 @@
static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
-static uint64_t random_array[TEST_PAGES_PER_LOOP];
static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
@@ -97,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(void)
{
uint64_t addr;
- int i;
+
+#ifdef __s390x__
+ uint64_t i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,19 +113,22 @@ static void guest_code(void)
*/
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
- *(uint64_t *)addr = READ_ONCE(iteration);
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
- addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+ addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
- addr &= ~(host_page_size - 1);
- *(uint64_t *)addr = READ_ONCE(iteration);
+ addr = align_down(addr, host_page_size);
+
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
- /* Tell the host that we need more random numbers */
GUEST_SYNC(1);
}
}
@@ -134,17 +143,18 @@ static uint64_t host_num_pages;
/* For statistics only */
static uint64_t host_dirty_count;
static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
/* Whether dirty ring reset is requested, or finished */
-static sem_t dirty_ring_vcpu_stop;
-static sem_t dirty_ring_vcpu_cont;
+static sem_t sem_vcpu_stop;
+static sem_t sem_vcpu_cont;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
- * dirty_ring_vcpu_stop and before vcpu continues to run.
+ * sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -159,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -184,75 +238,83 @@ static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
-
static bool clear_log_supported(void)
{
- return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
}
static void clear_log_create_vm_done(struct kvm_vm *vm)
{
- struct kvm_enable_cap cap = {};
u64 manual_caps;
manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = manual_caps;
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, manual_caps);
}
-static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
- void *bitmap, uint32_t num_pages)
+static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *unused)
{
- kvm_vm_get_dirty_log(vm, slot, bitmap);
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
}
-static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
- void *bitmap, uint32_t num_pages)
+static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *unused)
{
- kvm_vm_get_dirty_log(vm, slot, bitmap);
- kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
+ kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
}
-static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+/* Should only be called after a GUEST_SYNC */
+static void vcpu_handle_sync_stop(void)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ if (READ_ONCE(vcpu_stop)) {
+ sem_post(&sem_vcpu_stop);
+ sem_wait(&sem_vcpu_cont);
+ }
+}
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
- TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
- "Invalid guest sync status: exit_reason=%s\n",
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
+
+ vcpu_handle_sync_stop();
}
static bool dirty_ring_supported(void)
{
- return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+ return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) ||
+ kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL));
}
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
{
+ uint64_t pages;
+ uint32_t limit;
+
+ /*
+ * We rely on vcpu exit due to full dirty ring state. Adjust
+ * the ring buffer size to ensure we're able to reach the
+ * full dirty ring state.
+ */
+ pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
+ pages = vm_adjust_num_guest_pages(vm->mode, pages);
+ if (vm->page_size < getpagesize())
+ pages = vm_num_host_pages(vm->mode, pages);
+
+ limit = 1 << (31 - __builtin_clz(pages));
+ test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
+ test_dirty_ring_count = min(limit, test_dirty_ring_count);
+ pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
+
/*
* Switch to dirty ring mode after VM creation but before any
* of the vcpu creation.
@@ -263,12 +325,12 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm)
static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
{
- return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+ return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY;
}
static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
{
- gfn->flags = KVM_DIRTY_GFN_F_RESET;
+ smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET);
}
static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
@@ -286,8 +348,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
- set_bit_le(cur->offset, bitmap);
+ __set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
(*fetch_index)++;
@@ -297,89 +358,44 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
+static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx)
{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&dirty_ring_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&dirty_ring_vcpu_cont);
-}
-
-static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
- void *bitmap, uint32_t num_pages)
-{
- /* We only have one vcpu */
- static uint32_t fetch_index = 0;
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ uint32_t count, cleared;
/* Only have one vcpu */
- count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
- slot, bitmap, num_pages, &fetch_index);
+ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
+ slot, bitmap, num_pages,
+ ring_buf_idx);
- cleared = kvm_vm_reset_dirty_ring(vm);
+ cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
- /* Cleared pages should be the same as collected */
+ /*
+ * Cleared pages should be the same as collected, as KVM is supposed to
+ * clear only the entries that have been harvested.
+ */
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
- if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&dirty_ring_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&dirty_ring_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
- "exit_reason=%s\n",
+ "exit_reason=%s",
exit_reason_str(run->exit_reason));
}
}
-static void dirty_ring_before_vcpu_join(void)
-{
- /* Kick another round of vcpu just to make sure it will quit */
- sem_post(&dirty_ring_vcpu_cont);
-}
-
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
@@ -387,11 +403,11 @@ struct log_mode {
/* Hook when the vm creation is done (before vcpu creation) */
void (*create_vm_done)(struct kvm_vm *vm);
/* Hook to collect the dirty pages into the bitmap provided */
- void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
- void *bitmap, uint32_t num_pages);
+ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
- void (*before_vcpu_join) (void);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -410,20 +426,10 @@ struct log_mode {
.supported = dirty_ring_supported,
.create_vm_done = dirty_ring_create_vm_done,
.collect_dirty_pages = dirty_ring_collect_dirty_pages,
- .before_vcpu_join = dirty_ring_before_vcpu_join,
.after_vcpu_run = dirty_ring_after_vcpu_run,
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -452,226 +458,137 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
mode->create_vm_done(vm);
}
-static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
- void *bitmap, uint32_t num_pages)
+static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx)
{
struct log_mode *mode = &log_modes[host_log_mode];
TEST_ASSERT(mode->collect_dirty_pages != NULL,
"collect_dirty_pages() is required for any log mode!");
- mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+ mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vm, ret, err);
-}
-
-static void log_mode_before_vcpu_join(void)
-{
- struct log_mode *mode = &log_modes[host_log_mode];
-
- if (mode->before_vcpu_join)
- mode->before_vcpu_join();
-}
-
-static void generate_random_array(uint64_t *guest_array, uint64_t size)
-{
- uint64_t i;
-
- for (i = 0; i < size; i++)
- guest_array[i] = random();
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret, vcpu_fd;
- struct kvm_vm *vm = data;
- uint64_t *guest_array;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
-
- vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
-
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
- sigaddset(sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, sigset, NULL);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ struct kvm_vcpu *vcpu = data;
- guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = ioctl(vcpu_fd, KVM_RUN, NULL);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vm, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
+ uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
-
- if (test_and_clear_bit_le(page, bmap)) {
- bool matched;
+ uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
uint64_t extra_mem_pages, void *guest_code)
{
struct kvm_vm *vm;
- uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
- vm_create_irqchip(vm);
-#endif
+ vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages);
+
log_mode_create_vm_done(vm);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ *vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
-#define DIRTY_MEM_BITS 30 /* 1G */
-#define PAGE_SHIFT_4K 12
-
struct test_params {
unsigned long iterations;
unsigned long interval;
@@ -681,8 +598,11 @@ struct test_params {
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long *bmap[2];
+ uint32_t ring_buf_idx = 0;
+ int sem_val;
if (!log_mode_supported()) {
print_skip("Log mode '%s' not supported",
@@ -698,39 +618,46 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* (e.g., 64K page size guest will need even less memory for
* page tables).
*/
- vm = create_vm(mode, VCPU_ID,
- 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
- guest_code);
+ vm = create_vm(mode, &vcpu,
+ 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code);
- guest_page_size = vm_get_page_size(vm);
+ guest_page_size = vm->page_size;
/*
* A little more than 1G of guest page sized pages. Cover the
* case where the size is not aligned to 64 pages.
*/
- guest_num_pages = (1ul << (DIRTY_MEM_BITS -
- vm_get_page_shift(vm))) + 3;
+ guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_page_size = getpagesize();
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
if (!p->phys_offset) {
- guest_test_phys_mem = (vm_get_max_gfn(vm) -
- guest_num_pages) * guest_page_size;
- guest_test_phys_mem &= ~(host_page_size - 1);
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
+ guest_page_size;
+ guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
} else {
guest_test_phys_mem = p->phys_offset;
}
#ifdef __s390x__
/* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
+ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_alloc(host_num_pages);
- host_bmap_track = bitmap_alloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -740,51 +667,143 @@ static void run_test(enum vm_guest_mode mode, void *arg)
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
- ucall_init(vm, NULL);
-
/* Export the shared variables to the guest */
sync_global_to_guest(vm, host_page_size);
sync_global_to_guest(vm, guest_page_size);
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- host_quit = false;
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
+ WRITE_ONCE(host_quit, false);
- pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+ /*
+ * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
+ * that the main task and vCPU worker were synchronized and completed
+ * verification of all iterations.
+ */
+ sem_getvalue(&sem_vcpu_stop, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+ sem_getvalue(&sem_vcpu_cont, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
+ pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
+
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages);
- vm_dirty_log_verify(mode, bmap);
- iteration++;
sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
+
+ /*
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
+ */
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
+
+ /*
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
+ */
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * NOTE: for dirty ring, it's possible that we didn't stop at
+ * GUEST_SYNC but instead we stopped because ring is full;
+ * that's okay too because ring full means we're only missing
+ * the flush of the last page, and since we handle the last
+ * page specially verification will succeed anyway.
+ */
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
+ vm_dirty_log_verify(mode, bmap);
}
- /* Tell the vcpu thread to quit */
- host_quit = true;
- log_mode_before_vcpu_join();
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
- ucall_uninit(vm);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -794,7 +813,7 @@ static void help(char *name)
printf("usage: %s [-h] [-i iterations] [-I interval] "
"[-p offset] [-m mode]\n", name);
puts("");
- printf(" -c: specify dirty ring size, in number of entries\n");
+ printf(" -c: hint to dirty ring size, in number of entries\n");
printf(" (only useful for dirty-ring test; default: %"PRIu32")\n",
TEST_DIRTY_RING_COUNT);
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@@ -819,8 +838,8 @@ int main(int argc, char *argv[])
};
int opt, i;
- sem_init(&dirty_ring_vcpu_stop, 0, 0);
- sem_init(&dirty_ring_vcpu_cont, 0, 0);
+ sem_init(&sem_vcpu_stop, 0, 0);
+ sem_init(&sem_vcpu_cont, 0, 0);
guest_modes_append_default();
@@ -868,14 +887,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
new file mode 100644
index 000000000000..f4644c9d2d3b
--- /dev/null
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * The blessed list should be created from the oldest possible kernel.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+extern struct vcpu_reg_list *vcpu_configs[];
+extern int vcpu_configs_n;
+
+#define for_each_reg(i) \
+ for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_reg_filtered(i) \
+ for_each_reg(i) \
+ if (!filter_reg(reg_list->reg[i]))
+
+#define for_each_missing_reg(i) \
+ for ((i) = 0; (i) < blessed_n; ++(i)) \
+ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \
+ if (check_supported_reg(vcpu, blessed_reg[i]))
+
+#define for_each_new_reg(i) \
+ for_each_reg_filtered(i) \
+ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+#define for_each_present_blessed_reg(i) \
+ for_each_reg(i) \
+ if (find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+static const char *config_name(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int len = 0;
+
+ if (c->name)
+ return c->name;
+
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ if (len)
+ c->name[len++] = '+';
+ strcpy(c->name + len, s->name);
+ len += strlen(s->name);
+ }
+ c->name[len] = '\0';
+
+ return c->name;
+}
+
+bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ return true;
+}
+
+bool __weak filter_reg(__u64 reg)
+{
+ return false;
+}
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+ int i;
+
+ for (i = 0; i < nr_regs; ++i)
+ if (reg == regs[i])
+ return true;
+ return false;
+}
+
+void __weak print_reg(const char *prefix, __u64 id)
+{
+ printf("\t0x%llx,\n", id);
+}
+
+bool __weak check_reject_set(int err)
+{
+ return true;
+}
+
+void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+}
+
+#ifdef __aarch64__
+static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c,
+ struct kvm_vcpu_init *init)
+{
+ struct vcpu_reg_sublist *s;
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init);
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
+}
+
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+
+ prepare_vcpu_init(vm, c, &init);
+ vcpu = __vm_vcpu_add(vm, 0);
+ aarch64_vcpu_setup(vcpu, &init);
+
+ return vcpu;
+}
+#else
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ return __vm_vcpu_add(vm, 0);
+}
+#endif
+
+static void check_supported(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (!s->capability)
+ continue;
+
+ __TEST_REQUIRE(kvm_has_cap(s->capability),
+ "%s: %s not available, skipping tests",
+ config_name(c), s->name);
+ }
+}
+
+static bool print_list;
+static bool print_filtered;
+
+static void run_test(struct vcpu_reg_list *c)
+{
+ int new_regs = 0, missing_regs = 0, i, n;
+ int failed_get = 0, failed_set = 0, failed_reject = 0;
+ int skipped_set = 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct vcpu_reg_sublist *s;
+
+ check_supported(c);
+
+ vm = vm_create_barebones();
+ vcpu = vcpu_config_get_vcpu(c, vm);
+ finalize_vcpu(vcpu, c);
+
+ reg_list = vcpu_get_reg_list(vcpu);
+
+ if (print_list || print_filtered) {
+ putchar('\n');
+ for_each_reg(i) {
+ __u64 id = reg_list->reg[i];
+ if ((print_list && !filter_reg(id)) ||
+ (print_filtered && filter_reg(id)))
+ print_reg(config_name(c), id);
+ }
+ putchar('\n');
+ return;
+ }
+
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
+ blessed_reg = calloc(blessed_n, sizeof(__u64));
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
+
+ /*
+ * We only test that we can get the register and then write back the
+ * same value. Some registers may allow other values to be written
+ * back, but others only allow some bits to be changed, and at least
+ * for ID registers set will fail if the value does not exactly match
+ * what was returned by get. If registers that allow other values to
+ * be written need to have the other values tested, then we should
+ * create a new set of tests for those in a new independent test
+ * executable.
+ *
+ * Only do the get/set tests on present, blessed list registers,
+ * since we don't know the capabilities of any new registers.
+ */
+ for_each_present_blessed_reg(i) {
+ uint8_t addr[2048 / 8];
+ struct kvm_one_reg reg = {
+ .id = reg_list->reg[i],
+ .addr = (__u64)&addr,
+ };
+ bool reject_reg = false, skip_reg = false;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
+ if (ret) {
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_get;
+ }
+
+ for_each_sublist(c, s) {
+ /* rejects_set registers are rejected for set operation */
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || !check_reject_set(errno)) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
+ }
+
+ /* skips_set registers are skipped for set operation */
+ if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) {
+ skip_reg = true;
+ ++skipped_set;
+ break;
+ }
+ }
+
+ if (!reject_reg && !skip_reg) {
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
+ }
+ }
+
+ for_each_new_reg(i)
+ ++new_regs;
+
+ for_each_missing_reg(i)
+ ++missing_regs;
+
+ if (new_regs || missing_regs) {
+ n = 0;
+ for_each_reg_filtered(i)
+ ++n;
+
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld (includes %lld filtered registers)\n",
+ config_name(c), reg_list->n, reg_list->n - n);
+ }
+
+ if (new_regs) {
+ printf("\n%s: There are %d new registers.\n"
+ "Consider adding them to the blessed reg "
+ "list with the following lines:\n\n", config_name(c), new_regs);
+ for_each_new_reg(i)
+ print_reg(config_name(c), reg_list->reg[i]);
+ putchar('\n');
+ }
+
+ if (missing_regs) {
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
+ for_each_missing_reg(i)
+ print_reg(config_name(c), blessed_reg[i]);
+ putchar('\n');
+ }
+
+ TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+ "%s: There are %d missing registers; %d registers failed get; "
+ "%d registers failed set; %d registers failed reject; %d registers skipped set",
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set);
+
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_reg_list *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ "\n"
+ );
+}
+
+static struct vcpu_reg_list *parse_config(const char *config)
+{
+ struct vcpu_reg_list *c = NULL;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_reg_list *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
new file mode 100644
index 000000000000..618c937f3c90
--- /dev/null
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright Intel Corporation, 2023
+ *
+ * Author: Chao Peng <chao.p.peng@linux.intel.com>
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <linux/bitmap.h>
+#include <linux/falloc.h>
+#include <linux/sizes.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
+{
+ char buf[64];
+
+ TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
+ "read on a guest_mem fd should fail");
+ TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
+ "write on a guest_mem fd should fail");
+ TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
+ "pread on a guest_mem fd should fail");
+ TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
+ "pwrite on a guest_mem fd should fail");
+}
+
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+ int ret;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
+ page_size);
+ TEST_ASSERT(!ret, "fallocate the first page should succeed.");
+
+ for (i = 0; i < page_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
+ for (; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ memset(mem, val, page_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = 8;
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
+
+ for (i = 0; i < accessible_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
+}
+
+static void test_mmap_not_supported(int fd, size_t total_size)
+{
+ char *mem;
+
+ mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
+}
+
+static void test_file_size(int fd, size_t total_size)
+{
+ struct stat sb;
+ int ret;
+
+ ret = fstat(fd, &sb);
+ TEST_ASSERT(!ret, "fstat should succeed");
+ TEST_ASSERT_EQ(sb.st_size, total_size);
+ TEST_ASSERT_EQ(sb.st_blksize, page_size);
+}
+
+static void test_fallocate(int fd, size_t total_size)
+{
+ int ret;
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
+ TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size - 1, page_size);
+ TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
+ TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
+ TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ total_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ total_size + page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size, page_size - 1);
+ TEST_ASSERT(ret, "fallocate with unaligned size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
+}
+
+static void test_invalid_punch_hole(int fd, size_t total_size)
+{
+ struct {
+ off_t offset;
+ off_t len;
+ } testcases[] = {
+ {0, 1},
+ {0, page_size - 1},
+ {0, page_size + 1},
+
+ {1, 1},
+ {1, page_size - 1},
+ {1, page_size},
+ {1, page_size + 1},
+
+ {page_size, 1},
+ {page_size, page_size - 1},
+ {page_size, page_size + 1},
+ };
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ testcases[i].offset, testcases[i].len);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
+ testcases[i].offset, testcases[i].len);
+ }
+}
+
+static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
+ uint64_t guest_memfd_flags)
+{
+ size_t size;
+ int fd;
+
+ for (size = 1; size < page_size; size++) {
+ fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
+ "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
+ size);
+ }
+}
+
+static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
+{
+ int fd1, fd2, ret;
+ struct stat st1, st2;
+
+ fd1 = __vm_create_guest_memfd(vm, page_size, 0);
+ TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
+
+ ret = fstat(fd1, &st1);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
+
+ fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
+ TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
+
+ ret = fstat(fd2, &st2);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
+
+ ret = fstat(fd1, &st1);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
+ TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
+
+ close(fd2);
+ close(fd1);
+}
+
+static void test_guest_memfd_flags(struct kvm_vm *vm)
+{
+ uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ uint64_t flag;
+ int fd;
+
+ for (flag = BIT(0); flag; flag <<= 1) {
+ fd = __vm_create_guest_memfd(vm, page_size, flag);
+ if (flag & valid_flags) {
+ TEST_ASSERT(fd >= 0,
+ "guest_memfd() with flag '0x%lx' should succeed",
+ flag);
+ close(fd);
+ } else {
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
+ "guest_memfd() with flag '0x%lx' should fail with EINVAL",
+ flag);
+ }
+ }
+}
+
+#define gmem_test(__test, __vm, __flags) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \
+ \
+ test_##__test(fd, page_size * 4); \
+ close(fd); \
+} while (0)
+
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
+{
+ test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
+
+ gmem_test(file_read_write, vm, flags);
+
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
+
+ gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
+
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
+
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ uint64_t flags;
+
+ test_guest_memfd_flags(vm);
+
+ __test_guest_memfd(vm, 0);
+
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
+
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code(uint8_t *mem, uint64_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ __GUEST_ASSERT(mem[i] == 0xaa,
+ "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
+
+ memset(mem, 0xff, size);
+ GUEST_DONE();
+}
+
+static void test_guest_memfd_guest(void)
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables, and low memory contains
+ * the PCI hole and other MMIO regions that need to be avoided.
+ */
+ const uint64_t gpa = SZ_4G;
+ const int slot = 1;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint8_t *mem;
+ size_t size;
+ int fd, i;
+
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
+ return;
+
+ vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
+
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+
+ size = vm->page_size;
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+ vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ memset(mem, 0xaa, size);
+ kvm_munmap(mem, size);
+
+ virt_pg_map(vm, gpa, gpa);
+ vcpu_args_set(vcpu, 2, gpa, size);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ for (i = 0; i < size; i++)
+ TEST_ASSERT_EQ(mem[i], 0xff);
+
+ close(fd);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long vm_types, vm_type;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+
+ page_size = getpagesize();
+
+ /*
+ * Not all architectures support KVM_CAP_VM_TYPES. However, those that
+ * support guest_memfd have that support for the default VM type.
+ */
+ vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
+ if (!vm_types)
+ vm_types = BIT(VM_TYPE_DEFAULT);
+
+ for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
+ test_guest_memfd(vm_type);
+
+ test_guest_memfd_guest();
+}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
new file mode 100644
index 000000000000..bcf582852db9
--- /dev/null
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+struct guest_vals {
+ uint64_t a;
+ uint64_t b;
+ uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST \
+TYPE(test_type_i64, I64, "%ld", int64_t) \
+TYPE(test_type_u64, U64u, "%lu", uint64_t) \
+TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \
+TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \
+TYPE(test_type_u32, U32u, "%u", uint32_t) \
+TYPE(test_type_x32, U32x, "0x%x", uint32_t) \
+TYPE(test_type_X32, U32X, "0x%X", uint32_t) \
+TYPE(test_type_int, INT, "%d", int) \
+TYPE(test_type_char, CHAR, "%c", char) \
+TYPE(test_type_str, STR, "'%s'", const char *) \
+TYPE(test_type_ptr, PTR, "%p", uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+ TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \
+static void fn(struct kvm_vcpu *vcpu, T a, T b) \
+{ \
+ char expected_printf[UCALL_BUFFER_LEN]; \
+ char expected_assert[UCALL_BUFFER_LEN]; \
+ \
+ snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+ snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+ vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \
+ sync_global_to_guest(vcpu->vm, vals); \
+ run_test(vcpu, expected_printf, expected_assert); \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+ BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+ TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+ while (1) {
+ switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T) \
+ case TYPE_##ext: \
+ GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \
+ __GUEST_ASSERT(vals.a == vals.b, \
+ ASSERT_FMT_##ext, vals.a, vals.b); \
+ break;
+ TYPE_LIST
+#undef TYPE
+ default:
+ GUEST_SYNC(vals.type);
+ }
+
+ GUEST_DONE();
+ }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info. Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+ int len_str = strlen(assert_msg);
+ int len_substr = strlen(expected_assert_msg);
+ int offset = len_str - len_substr;
+
+ TEST_ASSERT(len_substr <= len_str,
+ "Expected '%s' to be a substring of '%s'",
+ assert_msg, expected_assert_msg);
+
+ TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_assert_msg, &assert_msg[offset]);
+}
+
+/*
+ * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional
+ * guest asserts guest can be verified instead of being reported as failures.
+ */
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ do {
+ r = __vcpu_run(vcpu);
+ } while (r == -1 && errno == EINTR);
+
+ TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r));
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ while (1) {
+ do_vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+ break;
+ case UCALL_PRINTF:
+ TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_printf, uc.buffer);
+ break;
+ case UCALL_ABORT:
+ ucall_abort(uc.buffer, expected_assert);
+ break;
+ case UCALL_DONE:
+ return;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+static void guest_code_limits(void)
+{
+ char test_str[UCALL_BUFFER_LEN + 10];
+
+ memset(test_str, 'a', sizeof(test_str));
+ test_str[sizeof(test_str) - 1] = 0;
+
+ GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+ run = vcpu->run;
+ do_vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+ "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)",
+ uc.cmd, UCALL_ABORT);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ test_type_i64(vcpu, -1, -1);
+ test_type_i64(vcpu, -1, 1);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+ test_type_int(vcpu, -1, -1);
+ test_type_int(vcpu, -1, 1);
+ test_type_int(vcpu, 1, 1);
+
+ test_type_char(vcpu, 'a', 'a');
+ test_type_char(vcpu, 'a', 'A');
+ test_type_char(vcpu, 'a', 'b');
+
+ test_type_str(vcpu, "foo", "foo");
+ test_type_str(vcpu, "foo", "bar");
+
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ kvm_vm_free(vm);
+
+ test_limits();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644
index 000000000000..94bd6ed24cf3
--- /dev/null
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+
+sem_t *sem;
+
+static void guest_code(void)
+{
+ for (;;)
+ ; /* Some busy work */
+ printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(false, "%s: exited with reason %d: %s",
+ __func__, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+ int fd;
+
+ while (true) {
+ fd = open("/dev/null", O_RDWR);
+ close(fd);
+ }
+ TEST_ASSERT(false, "%s: exited", __func__);
+ pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+ void *(*f)(void *), void *arg)
+{
+ int r;
+
+ r = pthread_create(thread, attr, f, arg);
+ TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+ int r;
+
+ r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+ TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+ int r;
+
+ r = pthread_join(thread, retval);
+ TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ cpu_set_t cpu_set;
+ pthread_t threads[VCPU_NUM];
+ pthread_t throw_away;
+ void *b;
+ uint32_t i, j;
+
+ CPU_ZERO(&cpu_set);
+ for (i = 0; i < VCPU_NUM; i++)
+ CPU_SET(i, &cpu_set);
+
+ vm = vm_create(VCPU_NUM);
+
+ pr_debug("%s: [%d] start vcpus\n", __func__, run);
+ for (i = 0; i < VCPU_NUM; ++i) {
+ vcpu = vm_vcpu_add(vm, i, guest_code);
+
+ check_create_thread(&threads[i], NULL, run_vcpu, vcpu);
+ check_set_affinity(threads[i], &cpu_set);
+
+ for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+ check_create_thread(&throw_away, NULL, sleeping_thread,
+ (void *)NULL);
+ check_set_affinity(throw_away, &cpu_set);
+ }
+ }
+ pr_debug("%s: [%d] all threads launched\n", __func__, run);
+ sem_post(sem);
+ for (i = 0; i < VCPU_NUM; ++i)
+ check_join(threads[i], &b);
+ /* Should not be reached */
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run);
+}
+
+void wait_for_child_setup(pid_t pid)
+{
+ /*
+ * Wait for the child to post to the semaphore, but wake up periodically
+ * to check if the child exited prematurely.
+ */
+ for (;;) {
+ const struct timespec wait_period = { .tv_sec = 1 };
+ int status;
+
+ if (!sem_timedwait(sem, &wait_period))
+ return;
+
+ /* Child is still running, keep waiting. */
+ if (pid != waitpid(pid, &status, WNOHANG))
+ continue;
+
+ /*
+ * Child is no longer running, which is not expected.
+ *
+ * If it exited with a non-zero status, we explicitly forward
+ * the child's status in case it exited with KSFT_SKIP.
+ */
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ else
+ TEST_ASSERT(false, "Child exited unexpectedly");
+ }
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t i;
+ int s, r;
+ pid_t pid;
+
+ sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+ sem_unlink("vm_sem");
+
+ for (i = 0; i < FORK_NUM; ++i) {
+ pid = fork();
+ TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+ if (pid == 0)
+ run_test(i); /* This function always exits */
+
+ pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
+ wait_for_child_setup(pid);
+ r = (rand() % DELAY_US_MAX) + 1;
+ pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
+ usleep(r);
+ r = waitpid(pid, &s, WNOHANG);
+ TEST_ASSERT(r != pid,
+ "%s: [%d] child exited unexpectedly status: [%d]",
+ __func__, i, s);
+ pr_debug("%s: [%d] killing child\n", __func__, i);
+ kill(pid, SIGKILL);
+ }
+
+ sem_destroy(sem);
+ exit(0);
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
deleted file mode 100644
index b7fa0c8551db..000000000000
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch64 processor specific defines
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include "kvm_util.h"
-
-
-#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
-
-/*
- * Default MAIR
- * index attribute
- * DEVICE_nGnRnE 0 0000:0000
- * DEVICE_nGnRE 1 0000:0100
- * DEVICE_GRE 2 0000:1100
- * NORMAL_NC 3 0100:0100
- * NORMAL 4 1111:1111
- * NORMAL_WT 5 1011:1011
- */
-#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
- (0x04ul << (1 * 8)) | \
- (0x0cul << (2 * 8)) | \
- (0x44ul << (3 * 8)) | \
- (0xfful << (4 * 8)) | \
- (0xbbul << (5 * 8)))
-
-static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
-{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)addr;
- vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
-}
-
-static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)&val;
- vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
-}
-
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
new file mode 100644
index 000000000000..e2c4e9f0010f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+ VIRTUAL,
+ PHYSICAL,
+};
+
+#define CTL_ENABLE (1 << 0)
+#define CTL_IMASK (1 << 1)
+#define CTL_ISTATUS (1 << 2)
+
+#define msec_to_cycles(msec) \
+ (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+ return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+ isb();
+
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntvct_el0);
+ case PHYSICAL:
+ return read_sysreg(cntpct_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(cval, cntv_cval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(cval, cntp_cval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_cval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_cval_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(tval, cntv_tval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(tval, cntp_tval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+ isb();
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_tval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_tval_el0);
+ default:
+ GUEST_FAIL("Could not get timer %d\n", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(ctl, cntv_ctl_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(ctl, cntp_ctl_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_ctl_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_ctl_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cntct(timer);
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+ timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
+static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h
new file mode 100644
index 000000000000..329e4f5079ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+ enum arch_timer timer = VIRTUAL;
+ uint64_t start = timer_get_cntct(timer);
+
+ while ((timer_get_cntct(timer) - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
new file mode 100644
index 000000000000..cc7a7f34ed37
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+#include <asm/kvm.h>
+
+enum gic_type {
+ GIC_V3,
+ GIC_TYPE_MAX,
+};
+
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA 0x8000000ULL
+#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA)
+
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
+void gic_init(enum gic_type type, unsigned int nr_cpus);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
new file mode 100644
index 000000000000..a76615fa39a1
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR 0x0000
+#define GICD_TYPER 0x0004
+#define GICD_IIDR 0x0008
+#define GICD_TYPER2 0x000C
+#define GICD_STATUSR 0x0010
+#define GICD_SETSPI_NSR 0x0040
+#define GICD_CLRSPI_NSR 0x0048
+#define GICD_SETSPI_SR 0x0050
+#define GICD_CLRSPI_SR 0x0058
+#define GICD_IGROUPR 0x0080
+#define GICD_ISENABLER 0x0100
+#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
+#define GICD_ISACTIVER 0x0300
+#define GICD_ICACTIVER 0x0380
+#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
+#define GICD_IGRPMODR 0x0D00
+#define GICD_NSACR 0x0E00
+#define GICD_IGROUPRnE 0x1000
+#define GICD_ISENABLERnE 0x1200
+#define GICD_ICENABLERnE 0x1400
+#define GICD_ISPENDRnE 0x1600
+#define GICD_ICPENDRnE 0x1800
+#define GICD_ISACTIVERnE 0x1A00
+#define GICD_ICACTIVERnE 0x1C00
+#define GICD_IPRIORITYRnE 0x2000
+#define GICD_ICFGRnE 0x3000
+#define GICD_IROUTER 0x6000
+#define GICD_IROUTERnE 0x8000
+#define GICD_IDREGS 0xFFD0
+#define GICD_PIDR2 0xFFE8
+
+#define ESPI_BASE_INTID 4096
+
+/*
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
+ */
+#define GICD_ITARGETSR 0x0800
+#define GICD_SGIR 0x0F00
+#define GICD_CPENDSGIR 0x0F10
+#define GICD_SPENDSGIR 0x0F20
+
+#define GICD_CTLR_RWP (1U << 31)
+#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_DS (1U << 6)
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENABLE_G1A (1U << 1)
+#define GICD_CTLR_ENABLE_G1 (1U << 0)
+
+#define GICD_IIDR_IMPLEMENTER_SHIFT 0
+#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT 12
+#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT 16
+#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT 24
+#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
+
+#define GICD_TYPER_RSS (1U << 26)
+#define GICD_TYPER_LPIS (1U << 17)
+#define GICD_TYPER_MBIS (1U << 16)
+#define GICD_TYPER_ESPI (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
+#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_ESPIS(typer) \
+ (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap (1U << 8)
+#define GICD_TYPER2_VIL (1U << 7)
+#define GICD_TYPER2_VID GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK 0xf0
+#define GIC_PIDR2_ARCH_GICv3 0x30
+#define GIC_PIDR2_ARCH_GICv4 0x40
+
+#define GIC_V3_DIST_SIZE 0x10000
+
+#define GIC_PAGE_SIZE_4K 0ULL
+#define GIC_PAGE_SIZE_16K 1ULL
+#define GIC_PAGE_SIZE_64K 2ULL
+#define GIC_PAGE_SIZE_MASK 3ULL
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR GICD_CTLR
+#define GICR_IIDR 0x0004
+#define GICR_TYPER 0x0008
+#define GICR_STATUSR GICD_STATUSR
+#define GICR_WAKER 0x0014
+#define GICR_SETLPIR 0x0040
+#define GICR_CLRLPIR 0x0048
+#define GICR_PROPBASER 0x0070
+#define GICR_PENDBASER 0x0078
+#define GICR_INVLPIR 0x00A0
+#define GICR_INVALLR 0x00B0
+#define GICR_SYNCR 0x00C0
+#define GICR_IDREGS GICD_IDREGS
+#define GICR_PIDR2 GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
+#define GICR_CTLR_CES (1UL << 1)
+#define GICR_CTLR_IR (1UL << 2)
+#define GICR_CTLR_RWP (1UL << 3)
+
+#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID 1056
+
+#define GICR_TYPER_NR_PPIS(r) \
+ ({ \
+ unsigned int __ppinum = ((r) >> 27) & 0x1f; \
+ unsigned int __nr_ppis = 16; \
+ if (__ppinum == 1 || __ppinum == 2) \
+ __nr_ppis += __ppinum * 32; \
+ \
+ __nr_ppis; \
+ })
+
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+
+#define GIC_BASER_CACHE_nCnB 0ULL
+#define GIC_BASER_CACHE_SameAsInner 0ULL
+#define GIC_BASER_CACHE_nC 1ULL
+#define GIC_BASER_CACHE_RaWt 2ULL
+#define GIC_BASER_CACHE_RaWb 3ULL
+#define GIC_BASER_CACHE_WaWt 4ULL
+#define GIC_BASER_CACHE_WaWb 5ULL
+#define GIC_BASER_CACHE_RaWaWt 6ULL
+#define GIC_BASER_CACHE_RaWaWb 7ULL
+#define GIC_BASER_CACHE_MASK 7ULL
+#define GIC_BASER_NonShareable 0ULL
+#define GIC_BASER_InnerShareable 1ULL
+#define GIC_BASER_OuterShareable 2ULL
+#define GIC_BASER_SHAREABILITY_MASK 3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \
+ (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type) \
+ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK (0x1f)
+#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ BIT_ULL(62)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_IGROUPR0 GICD_IGROUPR
+#define GICR_ISENABLER0 GICD_ISENABLER
+#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ICPENDR0 GICD_ICPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
+#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+#define GICR_ICFGR0 GICD_ICFGR
+#define GICR_IGRPMODR0 GICD_IGRPMODR
+#define GICR_NSACR GICD_NSACR
+
+#define GICR_TYPER_PLPIS (1U << 0)
+#define GICR_TYPER_VLPIS (1U << 1)
+#define GICR_TYPER_DIRTY (1U << 2)
+#define GICR_TYPER_DirectLPIS (1U << 3)
+#define GICR_TYPER_LAST (1U << 4)
+#define GICR_TYPER_RVPEID (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE 0x20000
+
+#define LPI_PROP_GROUP1 (1 << 1)
+#define LPI_PROP_ENABLED (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER 0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK 0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK \
+ GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER 0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK \
+ GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast (1ULL << 61)
+#define GICR_VPENDBASER_IDAI (1ULL << 62)
+#define GICR_VPENDBASER_Valid (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR 0x0080
+
+#define GICR_VSGIR_VPEID GENMASK(15, 0)
+
+#define GICR_VSGIPENDR 0x0088
+
+#define GICR_VSGIPENDR_BUSY (1U << 31)
+#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR 0x0000
+#define GITS_IIDR 0x0004
+#define GITS_TYPER 0x0008
+#define GITS_MPIDR 0x0018
+#define GITS_CBASER 0x0080
+#define GITS_CWRITER 0x0088
+#define GITS_CREADR 0x0090
+#define GITS_BASER 0x0100
+#define GITS_IDREGS_BASE 0xffd0
+#define GITS_PIDR0 0xffe0
+#define GITS_PIDR1 0xffe4
+#define GITS_PIDR2 GICR_PIDR2
+#define GITS_PIDR4 0xffd0
+#define GITS_CIDR0 0xfff0
+#define GITS_CIDR1 0xfff4
+#define GITS_CIDR2 0xfff8
+#define GITS_CIDR3 0xfffc
+
+#define GITS_TRANSLATER 0x10040
+
+#define GITS_SGIR 0x20020
+
+#define GITS_SGIR_VPEID GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE (1U << 0)
+#define GITS_CTLR_ImDe (1U << 1)
+#define GITS_CTLR_ITS_NUMBER_SHIFT 4
+#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT (1U << 31)
+
+#define GITS_TYPER_PLPIS (1UL << 0)
+#define GITS_TYPER_VLPIS (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
+#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT 8
+#define GITS_TYPER_DEVBITS_SHIFT 13
+#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT 24
+#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP (1ULL << 37)
+#define GITS_TYPER_VMAPP (1ULL << 40)
+#define GITS_TYPER_SVPET GENMASK_ULL(42, 41)
+
+#define GITS_IIDR_REV_SHIFT 12
+#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT 24
+
+#define GITS_CBASER_VALID (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_CBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
+
+#define GITS_CBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+
+#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS 8
+
+#define GITS_BASER_VALID (1ULL << 63)
+#define GITS_BASER_INDIRECT (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT (56)
+#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
+#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys) \
+ (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser) \
+ (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT (10)
+#define GITS_BASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT (8)
+#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX 256
+#define GITS_BASER_PAGES_SHIFT (0)
+#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE 0
+#define GITS_BASER_TYPE_DEVICE 1
+#define GITS_BASER_TYPE_VCPU 2
+#define GITS_BASER_TYPE_RESERVED3 3
+#define GITS_BASER_TYPE_COLLECTION 4
+#define GITS_BASER_TYPE_RESERVED5 5
+#define GITS_BASER_TYPE_RESERVED6 6
+#define GITS_BASER_TYPE_RESERVED7 7
+
+#define GITS_LVL1_ENTRY_SIZE (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD 0x08
+#define GITS_CMD_MAPC 0x09
+#define GITS_CMD_MAPTI 0x0a
+#define GITS_CMD_MAPI 0x0b
+#define GITS_CMD_MOVI 0x01
+#define GITS_CMD_DISCARD 0x0f
+#define GITS_CMD_INV 0x0c
+#define GITS_CMD_MOVALL 0x0e
+#define GITS_CMD_INVALL 0x0d
+#define GITS_CMD_INT 0x03
+#define GITS_CMD_CLEAR 0x04
+#define GITS_CMD_SYNC 0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x) ((x) | 0x20)
+#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
+#define E_ITS_MAPD_DEVICE_OOR 0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR 0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
+#define E_ITS_MAPTI_ID_OOR 0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT 0
+#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT 6
+#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
+#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
+#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT 14
+#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT 15
+#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT 0
+#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT 0
+#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT 0
+#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT 0
+#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT 0
+#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
+#define ICC_SRE_EL1_SRE (1U << 0)
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS 0x3ff
+
+#define ICC_SRE_EL2_SRE (1 << 0)
+#define ICC_SRE_EL2_ENABLE (1 << 3)
+
+#define ICC_SGI1R_TARGET_LIST_SHIFT 0
+#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT 16
+#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT 24
+#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT 32
+#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT 44
+#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT 48
+#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
new file mode 100644
index 000000000000..58feef3eb386
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
new file mode 100644
index 000000000000..b973bb2c64a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {
+ bool has_gic;
+ int gic_fd;
+};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
new file mode 100644
index 000000000000..ff928716574d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/brk-imm.h>
+#include <asm/esr.h>
+#include <asm/sysreg.h>
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id) \
+ ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
+ sys_reg_Op1(sys_reg_id), \
+ sys_reg_CRn(sys_reg_id), \
+ sys_reg_CRm(sys_reg_id), \
+ sys_reg_Op2(sys_reg_id))
+
+/*
+ * Default MAIR
+ * index attribute
+ * DEVICE_nGnRnE 0 0000:0000
+ * DEVICE_nGnRE 1 0000:0100
+ * DEVICE_GRE 2 0000:1100
+ * NORMAL_NC 3 0100:0100
+ * NORMAL 4 1111:1111
+ * NORMAL_WT 5 1011:1011
+ */
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT UL(0xbb)
+
+#define MT_DEVICE_nGnRnE 0
+#define MT_DEVICE_nGnRE 1
+#define MT_DEVICE_GRE 2
+#define MT_NORMAL_NC 3
+#define MT_NORMAL 4
+#define MT_NORMAL_WT 5
+
+#define DEFAULT_MAIR_EL1 \
+ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code);
+
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k);
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#define isb() asm volatile("isb" : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb() dmb(oshst)
+#define __iowmb() dma_wmb()
+
+#define dma_rmb() dmb(oshld)
+
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ dma_rmb(); \
+ \
+ /* \
+ * Courtesy of arch/arm64/include/asm/io.h: \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call \
+ * to udelay() will be ordered due to the ISB in __delay(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+ asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+ u32 val;
+ asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+ asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+ u64 val;
+ asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+
+#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
+
+static inline void local_irq_enable(void)
+{
+ asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ asm volatile("msr daifset, #3" : : : "memory");
+}
+
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
+void test_wants_mte(void);
+void test_disable_default_vgic(void);
+
+bool vm_supports_el2(struct kvm_vm *vm);
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+{
+ return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
+#define MAPPED_EL2_SYSREG(el2, el1) \
+ case SYS_##el1: \
+ if (vcpu_has_el2(vcpu)) \
+ alias = SYS_##el2; \
+ break
+
+
+static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding)
+{
+ u32 alias = encoding;
+
+ BUILD_BUG_ON(!__builtin_constant_p(encoding));
+
+ switch (encoding) {
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1);
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1);
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1);
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1);
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1);
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1);
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1);
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1);
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1);
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1);
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1);
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1);
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1);
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1);
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1);
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1);
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1);
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1);
+ MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1);
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1);
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1);
+ MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1);
+ case SYS_SP_EL1:
+ if (!vcpu_has_el2(vcpu))
+ return ARM64_CORE_REG(sp_el1);
+
+ alias = SYS_SP_EL2;
+ break;
+ default:
+ BUILD_BUG();
+ }
+
+ return KVM_ARM64_SYS_REG(alias);
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init);
+
+static inline unsigned int get_current_el(void)
+{
+ return (read_sysreg(CurrentEL) >> 2) & 0x3;
+}
+
+#define do_smccc(...) \
+do { \
+ if (get_current_el() == 2) \
+ smccc_smc(__VA_ARGS__); \
+ else \
+ smccc_hvc(__VA_ARGS__); \
+} while (0)
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h
new file mode 100644
index 000000000000..cf0984106d14
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+ int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
new file mode 100644
index 000000000000..688beccc9436
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+ (((uint64_t)(count) << 52) | \
+ ((uint64_t)((base) >> 16) << 16) | \
+ ((uint64_t)(flags) << 12) | \
+ index)
+
+bool kvm_supports_vgic_v3(void);
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+void __vgic_v3_init(int fd);
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+int vgic_its_setup(struct kvm_vm *vm);
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h
index b691df33e64e..63f5167397cc 100644
--- a/tools/testing/selftests/kvm/include/guest_modes.h
+++ b/tools/testing/selftests/kvm/include/guest_modes.h
@@ -11,8 +11,8 @@ struct guest_mode {
extern struct guest_mode guest_modes[NUM_VM_MODES];
-#define guest_mode_append(mode, supported, enabled) ({ \
- guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+#define guest_mode_append(mode, enabled) ({ \
+ guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \
})
void guest_modes_append_default(void);
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..d4e613162bba
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+#include <sys/syscall.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_test_harness.h b/tools/testing/selftests/kvm/include/kvm_test_harness.h
new file mode 100644
index 000000000000..8f7c6858e8e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_test_harness.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Macros for defining a KVM test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_TEST_HARNESS_H
+#define SELFTEST_KVM_TEST_HARNESS_H
+
+#include "kselftest_harness.h"
+
+#define KVM_ONE_VCPU_TEST_SUITE(name) \
+ FIXTURE(name) { \
+ struct kvm_vcpu *vcpu; \
+ }; \
+ \
+ FIXTURE_SETUP(name) { \
+ (void)vm_create_with_one_vcpu(&self->vcpu, NULL); \
+ } \
+ \
+ FIXTURE_TEARDOWN(name) { \
+ kvm_vm_free(self->vcpu->vm); \
+ }
+
+#define KVM_ONE_VCPU_TEST(suite, test, guestcode) \
+static void __suite##_##test(struct kvm_vcpu *vcpu); \
+ \
+TEST_F(suite, test) \
+{ \
+ vcpu_arch_set_entry_point(self->vcpu, guestcode); \
+ __suite##_##test(self->vcpu); \
+} \
+static void __suite##_##test(struct kvm_vcpu *vcpu)
+
+#endif /* SELFTEST_KVM_TEST_HARNESS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d7eb6989e83..81f4355ff28a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UTIL_H
@@ -9,68 +7,251 @@
#include "test_util.h"
-#include "asm/kvm.h"
+#include <linux/compiler.h>
+#include "linux/hashtable.h"
#include "linux/list.h"
-#include "linux/kvm.h"
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include "linux/rbtree.h"
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+#include <asm/kvm.h>
+
+#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_syscalls.h"
+#include "kvm_util_arch.h"
+#include "kvm_util_types.h"
#include "sparsebit.h"
+#define KVM_DEV_PATH "/dev/kvm"
#define KVM_MAX_VCPUS 512
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
+#define NSEC_PER_SEC 1000000000L
+
+struct userspace_mem_region {
+ struct kvm_userspace_memory_region2 region;
+ struct sparsebit *unused_phy_pages;
+ struct sparsebit *protected_phy_pages;
+ int fd;
+ off_t offset;
+ enum vm_mem_backing_src_type backing_src_type;
+ void *host_mem;
+ void *host_alias;
+ void *mmap_start;
+ void *mmap_alias;
+ size_t mmap_size;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
+};
+
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
+struct kvm_vcpu {
+ struct list_head list;
+ uint32_t id;
+ int fd;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+#ifdef __x86_64__
+ struct kvm_cpuid2 *cpuid;
+#endif
+#ifdef __aarch64__
+ struct kvm_vcpu_init init;
+#endif
+ struct kvm_binary_stats stats;
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
+};
+
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
+enum kvm_mem_region_type {
+ MEM_REGION_CODE,
+ MEM_REGION_DATA,
+ MEM_REGION_PT,
+ MEM_REGION_TEST_DATA,
+ NR_MEM_REGIONS,
+};
+
+struct kvm_vm {
+ int mode;
+ unsigned long type;
+ int kvm_fd;
+ int fd;
+ unsigned int pgtable_levels;
+ unsigned int page_size;
+ unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ uint64_t max_gfn;
+ struct list_head vcpus;
+ struct userspace_mem_regions regions;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t ucall_mmio_addr;
+ vm_paddr_t pgd;
+ vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
+ uint64_t gpa_tag_mask;
+
+ struct kvm_vm_arch arch;
+
+ struct kvm_binary_stats stats;
+
+ /*
+ * KVM region slots. These are the default memslots used by page
+ * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
+ * memslot.
+ */
+ uint32_t memslots[NR_MEM_REGIONS];
+};
+
+struct vcpu_reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ int feature_type;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+ __u64 *skips_set;
+ __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+ char *name;
+ struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+#define kvm_for_each_vcpu(vm, i, vcpu) \
+ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
+ if (!((vcpu) = vm->vcpus[i])) \
+ continue; \
+ else
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
+static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
+ enum kvm_mem_region_type type)
+{
+ assert(type < NR_MEM_REGIONS);
+ return memslot2region(vm, vm->memslots[type]);
+}
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
#define DEFAULT_STACK_PGS 5
enum vm_guest_mode {
VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_16K,
VM_MODE_P52V48_64K,
VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
VM_MODE_P48V48_64K,
VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
+ VM_MODE_P36V47_16K,
NUM_VM_MODES,
};
+struct vm_shape {
+ uint32_t type;
+ uint8_t mode;
+ uint8_t pad0;
+ uint16_t pad1;
+};
+
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
+#define VM_TYPE_DEFAULT 0
+
+#define VM_SHAPE(__mode) \
+({ \
+ struct vm_shape shape = { \
+ .mode = (__mode), \
+ .type = VM_TYPE_DEFAULT \
+ }; \
+ \
+ shape; \
+})
+
#if defined(__aarch64__)
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
#elif defined(__s390x__)
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 16)
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
#endif
+#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
+
#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-#define vm_guest_mode_string(m) vm_guest_mode_string[m]
-extern const char * const vm_guest_mode_string[];
-
struct vm_guest_mode_params {
unsigned int pa_bits;
unsigned int va_bits;
@@ -79,223 +260,842 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
-int kvm_check_cap(long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap);
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+
+int kvm_get_module_param_integer(const char *module_name, const char *param);
+bool kvm_get_module_param_bool(const char *module_name, const char *param);
+
+static inline bool get_kvm_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm", param);
+}
+
+static inline int get_kvm_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm", param);
+}
+
+unsigned int kvm_check_cap(long cap);
+
+static inline bool kvm_has_cap(long cap)
+{
+ return kvm_check_cap(cap);
+}
+
+/*
+ * Use the "inner", double-underscore macro when reporting errors from within
+ * other macros so that the name of ioctl() and not its literal numeric value
+ * is printed on error. The "outer" macro is strongly preferred when reporting
+ * errors "directly", i.e. without an additional layer of macros, as it reduces
+ * the probability of passing in the wrong string.
+ */
+#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
+#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
+
+#define kvm_do_ioctl(fd, cmd, arg) \
+({ \
+ kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
+ ioctl(fd, cmd, arg); \
+})
+
+#define __kvm_ioctl(kvm_fd, cmd, arg) \
+ kvm_do_ioctl(kvm_fd, cmd, arg)
+
+#define kvm_ioctl(kvm_fd, cmd, arg) \
+({ \
+ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
+})
+
+static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
+
+#define __vm_ioctl(vm, cmd, arg) \
+({ \
+ static_assert_is_vm(vm); \
+ kvm_do_ioctl((vm)->fd, cmd, arg); \
+})
+
+/*
+ * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
+ * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
+ * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
+ * selftests existed and (b) should never outright fail, i.e. is supposed to
+ * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
+ * VM and its vCPUs, including KVM_CHECK_EXTENSION.
+ */
+#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
+do { \
+ int __errno = errno; \
+ \
+ static_assert_is_vm(vm); \
+ \
+ if (cond) \
+ break; \
+ \
+ if (errno == EIO && \
+ __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
+ TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
+ TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
+ } \
+ errno = __errno; \
+ TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
+} while (0)
+
+#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
+ __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
+
+#define vm_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
+
+#define __vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ static_assert_is_vcpu(vcpu); \
+ kvm_do_ioctl((vcpu)->fd, cmd, arg); \
+})
+
+#define vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ int ret = __vcpu_ioctl(vcpu, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
+})
+
+/*
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+static inline int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
+ return ret;
+}
+
+static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size, uint64_t attributes)
+{
+ struct kvm_memory_attributes attr = {
+ .attributes = attributes,
+ .address = gpa,
+ .size = size,
+ .flags = 0,
+ };
+
+ /*
+ * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
+ * need significant enhancements to support multiple attributes.
+ */
+ TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ "Update me to support multiple attributes!");
+
+ vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
+}
+
+
+static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, 0);
+}
+
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
+ bool punch_hole);
+
+static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, true);
+}
+
+static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, false);
+}
+
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_restart(struct kvm_vm *vmp);
void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+int kvm_memfd_alloc(size_t size, bool hugepages);
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
+static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+ vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
+}
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
+static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log,
+ .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
+
+ vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
+}
+
+static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
+}
+
+static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm,
+ uint64_t address,
+ uint64_t size, bool pio)
+{
+ struct kvm_coalesced_mmio_zone zone = {
+ .addr = address,
+ .size = size,
+ .pio = pio,
+ };
+
+ vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone);
+}
+
+static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm,
+ uint64_t address,
+ uint64_t size, bool pio)
+{
+ struct kvm_coalesced_mmio_zone zone = {
+ .addr = address,
+ .size = size,
+ .pio = pio,
+ };
+
+ vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+}
+
+static inline int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
+ return fd;
+}
+
+static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = eventfd,
+ .gsi = gsi,
+ .flags = flags,
+ .resamplefd = -1,
+ };
+
+ return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+ int fd = eventfd(0, 0);
+
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+ return fd;
+}
+
+static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
+{
+ ssize_t ret;
+
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
+}
+
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header);
+
+static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
+{
+ /*
+ * The base size of the descriptor is defined by KVM's ABI, but the
+ * size of the name field is variable, as far as KVM's ABI is
+ * concerned. For a given instance of KVM, the name field is the same
+ * size for all stats and is provided in the overall stats header.
+ */
+ return sizeof(struct kvm_stats_desc) + header->name_size;
+}
+
+static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
+ int index,
+ struct kvm_stats_header *header)
+{
+ /*
+ * Note, size_desc includes the size of the name field, which is
+ * variable. i.e. this is NOT equivalent to &stats_desc[i].
+ */
+ return (void *)stats + index * get_stats_descriptor_size(header);
+}
+
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements);
+
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements);
+
+#define __get_stat(stats, stat) \
+({ \
+ uint64_t data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
+
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_smt_on(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
+
+ return false;
+}
void vm_create_irqchip(struct kvm_vm *vm);
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+}
+
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ int fd = __vm_create_guest_memfd(vm, size, flags);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
+ return fd;
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd_fd, uint64_t guest_memfd_offset);
+
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+ return false;
+}
+#endif
+
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot);
+ unsigned int npages);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+#ifndef vcpu_arch_put_guest
+#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0)
+#endif
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ return gpa & ~vm->gpa_tag_mask;
+}
+
+void vcpu_run(struct kvm_vcpu *vcpu);
+int _vcpu_run(struct kvm_vcpu *vcpu);
+
+static inline int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
+}
+
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
+
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
+ uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *debug)
+{
+ vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
+}
+
+static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
+}
+static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
+}
+
+static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
+}
+
+static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
+}
+static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
+
+}
+static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
+}
+static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
+}
+
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id)
+{
+ uint64_t val;
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
+
+ vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+ return val;
+}
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
+
+ vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
+static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
+}
+static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
+}
#endif
#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
+static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
+}
+static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+
+static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
#endif
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
+static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+ int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
-const char *exit_reason_str(unsigned int exit_reason);
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
+ return fd;
+}
+
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+
+static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ int ret = __kvm_has_device_attr(dev_fd, group, attr);
+
+ TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
+}
+
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+}
+
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ return __kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+
+static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+{
+ int fd = __kvm_create_device(vm, type);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
+ return fd;
+}
+
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
/*
- * VM Virtual Page Map
+ * VM VCPU Args Set
*
* Input Args:
* vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
*
* Output Args: None
*
* Return: None
*
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
+ * Sets the first @num input parameters for the function at @vcpu's entry point,
+ * per the C calling convention of the architecture, to the values given as
+ * variable args. Each of the variable args is expected to be of type uint64_t.
+ * The maximum @num can be is specific to the architecture.
*/
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t memslot);
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ /*
+ * By default, allocate memory as protected for VMs that support
+ * protected memory, as the majority of memory for such VMs is
+ * protected, i.e. using shared memory is effectively opt-in.
+ */
+ return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+ vm_arch_has_protected_memory(vm));
+}
/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
+ * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
+ * loads the test binary into guest memory and creates an IRQ chip (x86 only).
+ * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
+ * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
*/
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
+struct kvm_vm *____vm_create(struct vm_shape shape);
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages);
-/* Same as vm_create_default, but can be used for more than one vcpu */
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
+static inline struct kvm_vm *vm_create_barebones(void)
+{
+ return ____vm_create(VM_SHAPE_DEFAULT);
+}
+
+static inline struct kvm_vm *vm_create_barebones_type(unsigned long type)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = type,
+ };
-/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[]);
+ return ____vm_create(shape);
+}
+
+static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+{
+ return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
+}
+
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[]);
+
+static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+ void *guest_code,
+ struct kvm_vcpu *vcpus[])
+{
+ return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
+ guest_code, vcpus);
+}
+
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code);
/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
+ * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
+ * additional pages of guest memory. Returns the VM and vCPU (via out param).
*/
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
+ extra_mem_pages, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
+}
+
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+ int r;
+
+ r = __pin_task_to_cpu(task, cpu);
+ TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+ int cpu = sched_getcpu();
+
+ pin_task_to_cpu(task, cpu);
+ return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+ pin_task_to_cpu(pthread_self(), cpu);
+}
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+static inline int pin_self_to_any_cpu(void)
+{
+ return pin_task_to_any_cpu(pthread_self());
+}
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
+void kvm_print_vcpu_pinning_help(void);
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
@@ -311,15 +1111,6 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
return n;
}
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end);
-
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
#define sync_global_to_guest(vm, g) ({ \
typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
memcpy(_p, &(g), sizeof(g)); \
@@ -330,52 +1121,158 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
memcpy(&(g), _p, sizeof(g)); \
})
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+/*
+ * Write a global value, but only in the VM's (guest's) domain. Primarily used
+ * for "globals" that hold per-VM values (VMs always duplicate code and global
+ * data into their own region of physical memory), but can be used anytime it's
+ * undesirable to change the host's copy of the global.
+ */
+#define write_guest_global(vm, g, val) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) _val = val; \
+ \
+ memcpy(_p, &(_val), sizeof(g)); \
+})
-/* Common ucalls */
-enum {
- UCALL_NONE,
- UCALL_SYNC,
- UCALL_ABORT,
- UCALL_DONE,
-};
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
-#define UCALL_MAX_ARGS 6
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent);
-struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
-};
+static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent)
+{
+ vcpu_arch_dump(stream, vcpu, indent);
+}
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
-void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
-
-#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
- ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
-#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
-#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- #_condition, __LINE__, _args); \
-} while (0)
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - The id of the VCPU to add to the VM.
+ */
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
+
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+
+static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
+{
+ return vm_arch_vcpu_recreate(vm, vcpu_id);
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu);
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm);
+
+static inline void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ virt_arch_pgd_alloc(vm);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ virt_arch_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+}
+
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_arch_gva2gpa(vm, gva);
+}
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ virt_arch_dump(stream, vm, indent);
+}
+
+
+static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+ return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
+}
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT((_condition), 0, 0)
+/*
+ * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * to allow for arch-specific setup that is common to all tests, e.g. computing
+ * the default guest "mode".
+ */
+void kvm_selftest_arch_init(void);
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT((_condition), 1, (arg1))
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus);
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm);
+void kvm_arch_vm_release(struct kvm_vm *vm);
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+uint32_t guest_get_vcpuid(void);
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+bool kvm_arch_has_default_irqchip(void);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
new file mode 100644
index 000000000000..ec787b97cf18
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_TYPES_H
+#define SELFTEST_KVM_UTIL_TYPES_H
+
+/*
+ * Provide a version of static_assert() that is guaranteed to have an optional
+ * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE
+ * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and
+ * #defines static_assert() as a direct alias to _Static_assert() (see
+ * usr/include/assert.h). Define a custom macro instead of redefining
+ * static_assert() to avoid creating non-deterministic behavior that is
+ * dependent on include order.
+ */
+#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..2ed106b32c81
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..76840ddda57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_TI 11 /* Timer interrupt*/
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+ unsigned long prmd;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
new file mode 100644
index 000000000000..9071eb6dea60
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/memstress.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MEMSTRESS_H
+#define SELFTEST_KVM_MEMSTRESS_H
+
+#include <pthread.h>
+
+#include "kvm_util.h"
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
+
+#define MEMSTRESS_MEM_SLOT_INDEX 1
+
+struct memstress_vcpu_args {
+ uint64_t gpa;
+ uint64_t gva;
+ uint64_t pages;
+
+ /* Only used by the host userspace part of the vCPU thread */
+ struct kvm_vcpu *vcpu;
+ int vcpu_idx;
+};
+
+struct memstress_args {
+ struct kvm_vm *vm;
+ /* The starting address and size of the guest test region. */
+ uint64_t gpa;
+ uint64_t size;
+ uint64_t guest_page_size;
+ uint32_t random_seed;
+ uint32_t write_percent;
+
+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+ bool nested;
+ /* Randomize which pages are accessed by the guest. */
+ bool random_access;
+ /* True if all vCPUs are pinned to pCPUs */
+ bool pin_vcpus;
+ /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
+ uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
+
+ /* Test is done, stop running vCPUs. */
+ bool stop_vcpus;
+
+ struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS];
+};
+
+extern struct memstress_args memstress_args;
+
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+ uint64_t vcpu_memory_bytes, int slots,
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access);
+void memstress_destroy_vm(struct kvm_vm *vm);
+
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
+
+void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
+void memstress_join_vcpu_threads(int vcpus);
+void memstress_guest_code(uint32_t vcpu_id);
+
+uint64_t memstress_nested_pages(int nr_vcpus);
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots);
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot);
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot);
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots);
+
+#endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
deleted file mode 100644
index 005f2143adeb..000000000000
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * tools/testing/selftests/kvm/include/perf_test_util.h
- *
- * Copyright (C) 2020, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
-#define SELFTEST_KVM_PERF_TEST_UTIL_H
-
-#include "kvm_util.h"
-
-/* Default guest test virtual memory offset */
-#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-
-#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
-
-#define PERF_TEST_MEM_SLOT_INDEX 1
-
-struct perf_test_vcpu_args {
- uint64_t gva;
- uint64_t pages;
-
- /* Only used by the host userspace part of the vCPU thread */
- int vcpu_id;
-};
-
-struct perf_test_args {
- struct kvm_vm *vm;
- uint64_t host_page_size;
- uint64_t guest_page_size;
- int wr_fract;
-
- struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
-};
-
-extern struct perf_test_args perf_test_args;
-
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-extern uint64_t guest_test_phys_mem;
-
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes,
- enum vm_mem_backing_src_type backing_src);
-void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
- uint64_t vcpu_memory_bytes,
- bool partition_vcpu_memory_access);
-
-#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
new file mode 100644
index 000000000000..225d81dad064
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V Arch Timer(sstc) specific interface
+ *
+ * Copyright (c) 2024 Intel Corporation
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include <asm/csr.h>
+#include <asm/vdso/processor.h>
+
+static unsigned long timer_freq;
+
+#define msec_to_cycles(msec) \
+ ((timer_freq) * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ ((timer_freq) * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / (timer_freq))
+
+static inline uint64_t timer_get_cycles(void)
+{
+ return csr_read(CSR_TIME);
+}
+
+static inline void timer_set_cmp(uint64_t cval)
+{
+ csr_write(CSR_STIMECMP, cval);
+}
+
+static inline uint64_t timer_get_cmp(void)
+{
+ return csr_read(CSR_STIMECMP);
+}
+
+static inline void timer_irq_enable(void)
+{
+ csr_set(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_irq_disable(void)
+{
+ csr_clear(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_set_next_cmp_ms(uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cycles();
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cmp(next_ct);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
new file mode 100644
index 000000000000..e58282488beb
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V processor specific defines
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <linux/stringify.h>
+#include <asm/csr.h>
+#include <asm/vdso/processor.h>
+#include "kvm_util.h"
+
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
+static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
+ uint64_t idx, uint64_t size)
+{
+ return KVM_REG_RISCV | type | subtype | idx | size;
+}
+
+#if __riscv_xlen == 64
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64
+#else
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32
+#endif
+
+#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, 0, \
+ KVM_REG_RISCV_CONFIG_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, 0, \
+ KVM_REG_RISCV_CORE_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_GENERAL_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_GENERAL, \
+ KVM_REG_RISCV_CSR_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, 0, \
+ KVM_REG_RISCV_TIMER_REG(name), \
+ KVM_REG_SIZE_U64)
+
+#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \
+ KVM_REG_RISCV_ISA_SINGLE, \
+ idx, KVM_REG_SIZE_ULONG)
+
+#define RISCV_SBI_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_SBI_EXT, \
+ KVM_REG_RISCV_SBI_SINGLE, \
+ idx, KVM_REG_SIZE_ULONG)
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+
+static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
+}
+
+static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
+}
+
+struct pt_regs {
+ unsigned long epc;
+ unsigned long ra;
+ unsigned long sp;
+ unsigned long gp;
+ unsigned long tp;
+ unsigned long t0;
+ unsigned long t1;
+ unsigned long t2;
+ unsigned long s0;
+ unsigned long s1;
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+ unsigned long a4;
+ unsigned long a5;
+ unsigned long a6;
+ unsigned long a7;
+ unsigned long s2;
+ unsigned long s3;
+ unsigned long s4;
+ unsigned long s5;
+ unsigned long s6;
+ unsigned long s7;
+ unsigned long s8;
+ unsigned long s9;
+ unsigned long s10;
+ unsigned long s11;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+ /* Supervisor/Machine CSRs */
+ unsigned long status;
+ unsigned long badaddr;
+ unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
+};
+
+#define NR_VECTORS 2
+#define NR_EXCEPTIONS 32
+#define EC_MASK (NR_EXCEPTIONS - 1)
+
+typedef void(*exception_handler_fn)(struct pt_regs *);
+
+void vm_init_vector_tables(struct kvm_vm *vm);
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler);
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler);
+
+/* L3 index Bit[47:39] */
+#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
+#define PGTBL_L3_INDEX_SHIFT 39
+#define PGTBL_L3_BLOCK_SHIFT 39
+#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL
+#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1))
+/* L2 index Bit[38:30] */
+#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL
+#define PGTBL_L2_INDEX_SHIFT 30
+#define PGTBL_L2_BLOCK_SHIFT 30
+#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL
+#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1))
+/* L1 index Bit[29:21] */
+#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL
+#define PGTBL_L1_INDEX_SHIFT 21
+#define PGTBL_L1_BLOCK_SHIFT 21
+#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL
+#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1))
+/* L0 index Bit[20:12] */
+#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL
+#define PGTBL_L0_INDEX_SHIFT 12
+#define PGTBL_L0_BLOCK_SHIFT 12
+#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL
+#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1))
+
+#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL
+#define PGTBL_PTE_ADDR_SHIFT 10
+#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL
+#define PGTBL_PTE_RSW_SHIFT 8
+#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL
+#define PGTBL_PTE_DIRTY_SHIFT 7
+#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL
+#define PGTBL_PTE_ACCESSED_SHIFT 6
+#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL
+#define PGTBL_PTE_GLOBAL_SHIFT 5
+#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL
+#define PGTBL_PTE_USER_SHIFT 4
+#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL
+#define PGTBL_PTE_EXECUTE_SHIFT 3
+#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL
+#define PGTBL_PTE_WRITE_SHIFT 2
+#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
+#define PGTBL_PTE_READ_SHIFT 1
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \
+ PGTBL_PTE_DIRTY_MASK | \
+ PGTBL_PTE_EXECUTE_MASK | \
+ PGTBL_PTE_WRITE_MASK | \
+ PGTBL_PTE_READ_MASK)
+#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+#define PGTBL_PTE_VALID_SHIFT 0
+
+#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
+#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
+
+static inline void local_irq_enable(void)
+{
+ csr_set(CSR_SSTATUS, SR_SIE);
+}
+
+static inline void local_irq_disable(void)
+{
+ csr_clear(CSR_SSTATUS, SR_SIE);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
new file mode 100644
index 000000000000..046b432ae896
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V SBI specific definitions
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef SELFTEST_KVM_SBI_H
+#define SELFTEST_KVM_SBI_H
+
+/* SBI spec version fields */
+#define SBI_SPEC_VERSION_DEFAULT 0x1
+#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
+#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS 0
+#define SBI_ERR_FAILURE -1
+#define SBI_ERR_NOT_SUPPORTED -2
+#define SBI_ERR_INVALID_PARAM -3
+#define SBI_ERR_DENIED -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
+
+enum sbi_ext_id {
+ SBI_EXT_BASE = 0x10,
+ SBI_EXT_STA = 0x535441,
+ SBI_EXT_PMU = 0x504D55,
+};
+
+enum sbi_ext_base_fid {
+ SBI_EXT_BASE_GET_SPEC_VERSION = 0,
+ SBI_EXT_BASE_GET_IMP_ID,
+ SBI_EXT_BASE_GET_IMP_VERSION,
+ SBI_EXT_BASE_PROBE_EXT = 3,
+};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+};
+
+union sbi_pmu_ctr_info {
+ unsigned long value;
+ struct {
+ unsigned long csr:12;
+ unsigned long width:6;
+#if __riscv_xlen == 32
+ unsigned long reserved:13;
+#else
+ unsigned long reserved:45;
+#endif
+ unsigned long type:1;
+ };
+};
+
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/* SBI PMU counter types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+bool guest_sbi_probe_extension(int extid, long *out_val);
+
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
+}
+
+unsigned long get_host_sbi_spec_version(void);
+
+#endif /* SELFTEST_KVM_SBI_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
new file mode 100644
index 000000000000..a695ae36f3e0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+#include "sbi.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UCALL,
+ uc, 0, 0, 0, 0, 0);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h
new file mode 100644
index 000000000000..1bf275631cc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/debug_print.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Definition for kernel virtual machines on s390x
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_DEBUG_PRINT_H
+#define SELFTEST_KVM_DEBUG_PRINT_H
+
+#include "asm/ptrace.h"
+#include "kvm_util.h"
+#include "sie.h"
+
+static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
+{
+ u64 pos;
+
+ pr_debug("%s (%p)\n", name, (void *)addr);
+ pr_debug(" 0/0x00---------|");
+ if (len > 8)
+ pr_debug(" 8/0x08---------|");
+ if (len > 16)
+ pr_debug(" 16/0x10--------|");
+ if (len > 24)
+ pr_debug(" 24/0x18--------|");
+ for (pos = 0; pos < len; pos += 8) {
+ if ((pos % 32) == 0)
+ pr_debug("\n %3lu 0x%.3lx ", pos, pos);
+ pr_debug(" %16lx", *((u64 *)(addr + pos)));
+ }
+ pr_debug("\n");
+}
+
+static inline void print_hex(const char *name, u64 addr)
+{
+ print_hex_bytes(name, addr, 512);
+}
+
+static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
+ run->flags,
+ run->psw_mask, run->psw_addr,
+ run->exit_reason, exit_reason_str(run->exit_reason));
+ pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
+ sie_block->psw_mask, sie_block->psw_addr);
+}
+
+static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+ print_hex_bytes("run", (u64)run, 0x150);
+ print_hex("sie_block", (u64)sie_block);
+ print_psw(run, sie_block);
+}
+
+static inline void print_regs(struct kvm_run *run)
+{
+ struct kvm_sync_regs *sync_regs = &run->s.regs;
+
+ print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
+ print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
+ print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
+}
+
+#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
index b0ed71302722..b0ed71302722 100644
--- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
+++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
new file mode 100644
index 000000000000..00a1ced6538b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Get the facility bits with the STFLE instruction
+ */
+
+#ifndef SELFTEST_KVM_FACILITY_H
+#define SELFTEST_KVM_FACILITY_H
+
+#include <linux/bitops.h>
+
+/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
+#define NB_STFL_DOUBLEWORDS 32
+
+extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+extern bool stfle_flag;
+
+static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
+{
+ return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
+{
+ register unsigned long r0 asm("0") = nb_doublewords - 1;
+
+ asm volatile(" .insn s,0xb2b00000,0(%1)\n"
+ : "+d" (r0)
+ : "a" (fac)
+ : "memory", "cc");
+}
+
+static inline void setup_facilities(void)
+{
+ stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
+ stfle_flag = true;
+}
+
+static inline bool test_facility(int nr)
+{
+ if (!stfle_flag)
+ setup_facilities();
+ return test_bit_inv(nr, stfl_doublewords);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h
index e0e96a5f608c..33fef6fd9617 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390/processor.h
@@ -5,6 +5,8 @@
#ifndef SELFTEST_KVM_PROCESSOR_H
#define SELFTEST_KVM_PROCESSOR_H
+#include <linux/compiler.h>
+
/* Bits in the region/segment table entry */
#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
@@ -19,4 +21,21 @@
#define PAGE_PROTECT 0x200 /* HW read-only bit */
#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
+/* Page size definitions */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+ barrier();
+}
+
+/* Get the instruction length */
+static inline int insn_length(unsigned char code)
+{
+ return ((((int)code + 64) >> 7) + 1) << 1;
+}
+
#endif
diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h
new file mode 100644
index 000000000000..160acd4a1db9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/sie.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definition for kernel virtual machines on s390.
+ *
+ * Adapted copy of struct definition kvm_s390_sie_block from
+ * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
+ *
+ * Copyright IBM Corp. 2008, 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ * Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_SIE_H
+#define SELFTEST_KVM_SIE_H
+
+#include <linux/types.h>
+
+struct kvm_s390_sie_block {
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+ __u32 cpuflags; /* 0x0000 */
+ __u32: 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32: 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE BIT(0)
+ __u32 prog0c; /* 0x000c */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
+#define PROG_BLOCK_SIE BIT(0)
+#define PROG_REQUEST BIT(1)
+ __u32 prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SPECI 0x08
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+ __u32 scaol; /* 0x0064 */
+ __u8 sdf; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ __u64 psw_mask; /* 0x0090 */
+ __u64 psw_addr; /* 0x0098 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
+ __u32 reservedc8; /* 0x00c8 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __packed __aligned(512);
+
+#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h
new file mode 100644
index 000000000000..8035a872a351
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390/ucall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 12a9a4b9cead..bc760761e1a3 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -30,26 +30,26 @@ typedef uint64_t sparsebit_num_t;
struct sparsebit *sparsebit_alloc(void);
void sparsebit_free(struct sparsebit **sbitp);
-void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+void sparsebit_copy(struct sparsebit *dstp, const struct sparsebit *src);
-bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_set_num(struct sparsebit *sbit,
+bool sparsebit_is_set(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(const struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
-bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_clear_num(struct sparsebit *sbit,
+bool sparsebit_is_clear(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(const struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
-sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
-bool sparsebit_any_set(struct sparsebit *sbit);
-bool sparsebit_any_clear(struct sparsebit *sbit);
-bool sparsebit_all_set(struct sparsebit *sbit);
-bool sparsebit_all_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *sbit);
+bool sparsebit_any_set(const struct sparsebit *sbit);
+bool sparsebit_any_clear(const struct sparsebit *sbit);
+bool sparsebit_all_set(const struct sparsebit *sbit);
+bool sparsebit_all_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
@@ -62,9 +62,29 @@ void sparsebit_clear_num(struct sparsebit *sbitp,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_clear_all(struct sparsebit *sbitp);
-void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+void sparsebit_dump(FILE *stream, const struct sparsebit *sbit,
unsigned int indent);
-void sparsebit_validate_internal(struct sparsebit *sbit);
+void sparsebit_validate_internal(const struct sparsebit *sbit);
+
+/*
+ * Iterate over an inclusive ranges within sparsebit @s. In each iteration,
+ * @range_begin and @range_end will take the beginning and end of the set
+ * range, which are of type sparsebit_idx_t.
+ *
+ * For example, if the range [3, 7] (inclusive) is set, within the
+ * iteration,@range_begin will take the value 3 and @range_end will take
+ * the value 7.
+ *
+ * Ensure that there is at least one bit set before using this macro with
+ * sparsebit_any_set(), because sparsebit_first_set() will abort if none
+ * are set.
+ */
+#define sparsebit_for_each_set_range(s, range_begin, range_end) \
+ for (range_begin = sparsebit_first_set(s), \
+ range_end = sparsebit_next_clear(s, range_begin) - 1; \
+ range_begin && range_end; \
+ range_begin = sparsebit_next_set(s, range_end), \
+ range_end = sparsebit_next_clear(s, range_begin) - 1)
#ifdef __cplusplus
}
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index b7f41399f22c..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -17,9 +19,12 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "kselftest.h"
-static inline int _no_printf(const char *format, ...) { return 0; }
+#define msecs_to_usecs(msec) ((msec) * 1000ULL)
+
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
@@ -32,31 +37,65 @@ static inline int _no_printf(const char *format, ...) { return 0; }
#define pr_info(...) _no_printf(__VA_ARGS__)
#endif
-void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+void __printf(1, 2) print_skip(const char *fmt, ...);
+#define __TEST_REQUIRE(f, fmt, ...) \
+do { \
+ if (!(f)) \
+ ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \
+} while (0)
+
+#define TEST_REQUIRE(f) __TEST_REQUIRE(f, "Requirement not met: %s", #f)
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
int test_seq_read(const char *path, char **bufp, size_t *sizep);
-void test_assert(bool exp, const char *exp_str,
- const char *file, unsigned int line, const char *fmt, ...)
- __attribute__((format(printf, 5, 6)));
+void __printf(5, 6) test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line,
+ const char *fmt, ...);
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-#define ASSERT_EQ(a, b) do { \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- TEST_ASSERT(__a == __b, \
- "ASSERT_EQ(%s, %s) failed.\n" \
- "\t%s is %#lx\n" \
- "\t%s is %#lx", \
- #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \
+ "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
} while (0)
-#define TEST_FAIL(fmt, ...) \
- TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+#define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \
+ __u32 exit_reason = (vcpu)->run->exit_reason; \
+ \
+ TEST_ASSERT(exit_reason == (expected), \
+ "Wanted KVM exit reason: %u (%s), got: %u (%s)", \
+ (expected), exit_reason_str((expected)), \
+ exit_reason, exit_reason_str(exit_reason)); \
+} while (0)
+
+#define TEST_FAIL(fmt, ...) do { \
+ TEST_ASSERT(false, fmt, ##__VA_ARGS__); \
+ __builtin_unreachable(); \
+} while (0)
+
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
size_t parse_size(const char *size);
@@ -67,18 +106,136 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
struct timespec timespec_elapsed(struct timespec start);
struct timespec timespec_div(struct timespec ts, int divisor);
+struct guest_random_state {
+ uint32_t seed;
+};
+
+extern uint32_t guest_random_seed;
+extern struct guest_random_state guest_rng;
+
+struct guest_random_state new_guest_random_state(uint32_t seed);
+uint32_t guest_random_u32(struct guest_random_state *state);
+
+static inline bool __guest_random_bool(struct guest_random_state *state,
+ uint8_t percent)
+{
+ return (guest_random_u32(state) % 100) < percent;
+}
+
+static inline bool guest_random_bool(struct guest_random_state *state)
+{
+ return __guest_random_bool(state, 50);
+}
+
+static inline uint64_t guest_random_u64(struct guest_random_state *state)
+{
+ return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state);
+}
+
enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ VM_MEM_SRC_SHMEM,
+ VM_MEM_SRC_SHARED_HUGETLB,
+ NUM_SRC_TYPES,
};
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
struct vm_mem_backing_src_alias {
const char *name;
- enum vm_mem_backing_src_type type;
+ uint32_t flag;
};
-void backing_src_help(void);
+#define MIN_RUN_DELAY_NS 200000UL
+
+bool thp_configured(void);
+size_t get_trans_hugepagesz(void);
+size_t get_def_hugetlb_pagesz(void);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
+size_t get_backing_src_pagesz(uint32_t i);
+bool is_backing_src_hugetlb(uint32_t i);
+void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
+
+/*
+ * Whether or not the given source type is shared memory (as opposed to
+ * anonymous).
+ */
+static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+{
+ return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+}
+
+static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
+{
+ return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
+}
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static inline uint64_t align_up(uint64_t x, uint64_t size)
+{
+ uint64_t mask = size - 1;
+
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return ((x + mask) & ~mask);
+}
+
+static inline uint64_t align_down(uint64_t x, uint64_t size)
+{
+ uint64_t x_aligned_up = align_up(x, size);
+
+ if (x == x_aligned_up)
+ return x;
+ else
+ return x_aligned_up - size;
+}
+
+static inline void *align_ptr_up(void *x, size_t size)
+{
+ return (void *)align_up((unsigned long)x, size);
+}
+
+int atoi_paranoid(const char *num_str);
+
+static inline uint32_t atoi_positive(const char *name, const char *num_str)
+{
+ int num = atoi_paranoid(num_str);
+
+ TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str);
+ return num;
+}
+
+static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
+{
+ int num = atoi_paranoid(num_str);
+
+ TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str);
+ return num;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
+char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+
+char *sys_get_cur_clocksource(void);
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
new file mode 100644
index 000000000000..9b6edaafe6d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/timer_test.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * timer test specific header
+ *
+ * Copyright (C) 2018, Google LLC
+ */
+
+#ifndef SELFTEST_KVM_TIMER_TEST_H
+#define SELFTEST_KVM_TIMER_TEST_H
+
+#include "kvm_util.h"
+
+#define NR_VCPUS_DEF 4
+#define NR_TEST_ITERS_DEF 5
+#define TIMER_TEST_PERIOD_MS_DEF 10
+#define TIMER_TEST_ERR_MARGIN_US 100
+#define TIMER_TEST_MIGRATION_FREQ_MS 2
+
+/* Timer test cmdline parameters */
+struct test_args {
+ uint32_t nr_vcpus;
+ uint32_t nr_iter;
+ uint32_t timer_period_ms;
+ uint32_t migration_freq_ms;
+ uint32_t timer_err_margin_us;
+ /* Members of struct kvm_arm_counter_offset */
+ uint64_t counter_offset;
+ uint64_t reserved;
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+ uint32_t nr_iter;
+ int guest_stage;
+ uint64_t xcnt;
+};
+
+extern struct test_args test_args;
+extern struct kvm_vcpu *vcpus[];
+extern struct test_vcpu_shared_data vcpu_shared_data[];
+
+struct kvm_vm *test_vm_create(void);
+void test_vm_cleanup(struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_TIMER_TEST_H */
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
new file mode 100644
index 000000000000..d9d6581b8d4f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UCALL_COMMON_H
+#define SELFTEST_KVM_UCALL_COMMON_H
+#include "test_util.h"
+#include "ucall.h"
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_PRINTF,
+ UCALL_DONE,
+ UCALL_UNHANDLED,
+};
+
+#define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
+
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+ char buffer[UCALL_BUFFER_LEN];
+
+ /* Host virtual address of this struct. */
+ struct ucall *hva;
+};
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+void ucall_arch_do_ucall(vm_vaddr_t uc);
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
+
+void ucall(uint64_t cmd, int nargs, ...);
+__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp,
+ const char *file, unsigned int line,
+ const char *fmt, ...);
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
+
+/*
+ * Perform userspace call without any associated data. This bare call avoids
+ * allocating a ucall struct, which can be useful if the atomic operations in
+ * the full ucall() are problematic and/or unwanted. Note, this will come out
+ * as UCALL_NONE on the backend.
+ */
+#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL)
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0)
+#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1)
+#define GUEST_SYNC3(arg0, arg1, arg2) \
+ ucall(UCALL_SYNC, 3, arg0, arg1, arg2)
+#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \
+ ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3)
+#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \
+ ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5)
+
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
+enum guest_assert_builtin_args {
+ GUEST_ERROR_STRING,
+ GUEST_FILE,
+ GUEST_LINE,
+ GUEST_ASSERT_BUILTIN_NARGS
+};
+
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \
+do { \
+ if (!(_condition)) \
+ ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \
+} while (0)
+
+#define __GUEST_ASSERT(_condition, _fmt, _args...) \
+ ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
+
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition)
+
+#define GUEST_FAIL(_fmt, _args...) \
+ ucall_assert(UCALL_ABORT, "Unconditional guest failure", \
+ __FILE__, __LINE__, _fmt, ##_args)
+
+#define GUEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
+
+#define GUEST_ASSERT_NE(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
+
+#define REPORT_GUEST_ASSERT(ucall) \
+ test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \
+ (const char *)(ucall).args[GUEST_FILE], \
+ (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
+
+#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
new file mode 100644
index 000000000000..60f7f9d435dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM userfaultfd util
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+#include <inttypes.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+
+#include "test_util.h"
+
+typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
+
+struct uffd_reader_args {
+ int uffd_mode;
+ int uffd;
+ useconds_t delay;
+ uffd_handler_t handler;
+ /* Holds the read end of the pipe for killing the reader. */
+ int pipe;
+};
+
+struct uffd_desc {
+ int uffd;
+ uint64_t num_readers;
+ /* Holds the write ends of the pipes for killing the readers. */
+ int *pipefds;
+ pthread_t *readers;
+ struct uffd_reader_args *reader_args;
+};
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+ void *hva, uint64_t len,
+ uint64_t num_readers,
+ uffd_handler_t handler);
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd);
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
new file mode 100644
index 000000000000..80fe9f69b38d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+#include "ucall_common.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_IRR 0x200
+#define APIC_ICR 0x300
+#define APIC_LVTCMCI 0x2f0
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+#define APIC_LVTT 0x320
+#define APIC_LVT_TIMER_ONESHOT (0 << 17)
+#define APIC_LVT_TIMER_PERIODIC (1 << 17)
+#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
+#define APIC_LVT_MASKED (1 << 16)
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+{
+ return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
+ fault, APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+{
+ uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+ __GUEST_ASSERT(fault == GP_VECTOR,
+ "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
+ APIC_BASE_MSR + (reg >> 4), value, fault);
+}
+
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h
index a034438b6266..5a74bb30e2f8 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86/evmcs.h
@@ -1,15 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/vmx.h
- *
* Copyright (C) 2018, Red Hat, Inc.
- *
*/
#ifndef SELFTEST_KVM_EVMCS_H
#define SELFTEST_KVM_EVMCS_H
#include <stdint.h>
+#include "hyperv.h"
#include "vmx.h"
#define u16 uint16_t
@@ -20,15 +18,6 @@
extern bool enable_evmcs;
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved;
- __u64 vtl_control[2];
- __u64 nested_enlightenments_control[2];
- __u32 enlighten_vmentry;
- __u64 current_nested_vmcs;
-};
-
struct hv_enlightened_vmcs {
u32 revision_id;
u32 abort;
@@ -41,6 +30,8 @@ struct hv_enlightened_vmcs {
u16 host_gs_selector;
u16 host_tr_selector;
+ u16 padding16_1;
+
u64 host_ia32_pat;
u64 host_ia32_efer;
@@ -159,7 +150,7 @@ struct hv_enlightened_vmcs {
u64 ept_pointer;
u16 virtual_processor_id;
- u16 padding16[3];
+ u16 padding16_2[3];
u64 padding64_2[5];
u64 guest_physical_address;
@@ -195,47 +186,63 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
u32 msr_bitmap:1;
u32 reserved:30;
- } hv_enlightenments_control;
+ } __packed hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
-};
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
+} __packed;
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
+
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
extern struct hv_enlightened_vmcs *current_evmcs;
-extern struct hv_vp_assist_page *current_vp_assist;
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
-static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+static inline void evmcs_enable(void)
{
- u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
- HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
- wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
- current_vp_assist = vp_assist;
-
enable_evmcs = true;
-
- return 0;
}
static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
@@ -248,6 +255,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
return 0;
}
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
+{
+ if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
+ return false;
+
+ current_evmcs->revision_id = EVMCS_VERSION;
+
+ return true;
+}
+
static inline int evmcs_vmptrst(uint64_t *value)
{
*value = current_vp_assist->current_nested_vmcs &
@@ -637,6 +654,18 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
case VIRTUAL_PROCESSOR_ID:
*value = current_evmcs->virtual_processor_id;
break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->host_ia32_perf_global_ctrl;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->guest_ia32_perf_global_ctrl;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ *value = current_evmcs->encls_exiting_bitmap;
+ break;
+ case TSC_MULTIPLIER:
+ *value = current_evmcs->tsc_multiplier;
+ break;
default: return 1;
}
@@ -648,381 +677,523 @@ static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
switch (encoding) {
case GUEST_RIP:
current_evmcs->guest_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_RSP:
current_evmcs->guest_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case GUEST_RFLAGS:
current_evmcs->guest_rflags = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case HOST_IA32_PAT:
current_evmcs->host_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_EFER:
current_evmcs->host_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR0:
current_evmcs->host_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR3:
current_evmcs->host_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR4:
current_evmcs->host_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_SYSENTER_ESP:
current_evmcs->host_ia32_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_SYSENTER_EIP:
current_evmcs->host_ia32_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_RIP:
current_evmcs->host_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case IO_BITMAP_A:
current_evmcs->io_bitmap_a = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
break;
case IO_BITMAP_B:
current_evmcs->io_bitmap_b = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
break;
case MSR_BITMAP:
current_evmcs->msr_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
break;
case GUEST_ES_BASE:
current_evmcs->guest_es_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_BASE:
current_evmcs->guest_cs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_BASE:
current_evmcs->guest_ss_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_BASE:
current_evmcs->guest_ds_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_BASE:
current_evmcs->guest_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_BASE:
current_evmcs->guest_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_BASE:
current_evmcs->guest_ldtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_BASE:
current_evmcs->guest_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GDTR_BASE:
current_evmcs->guest_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_IDTR_BASE:
current_evmcs->guest_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case TSC_OFFSET:
current_evmcs->tsc_offset = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case VIRTUAL_APIC_PAGE_ADDR:
current_evmcs->virtual_apic_page_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case VMCS_LINK_POINTER:
current_evmcs->vmcs_link_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_DEBUGCTL:
current_evmcs->guest_ia32_debugctl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_PAT:
current_evmcs->guest_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_EFER:
current_evmcs->guest_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR0:
current_evmcs->guest_pdptr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR1:
current_evmcs->guest_pdptr1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR2:
current_evmcs->guest_pdptr2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR3:
current_evmcs->guest_pdptr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PENDING_DBG_EXCEPTIONS:
current_evmcs->guest_pending_dbg_exceptions = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_ESP:
current_evmcs->guest_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_EIP:
current_evmcs->guest_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case CR0_GUEST_HOST_MASK:
current_evmcs->cr0_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR4_GUEST_HOST_MASK:
current_evmcs->cr4_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR0_READ_SHADOW:
current_evmcs->cr0_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR4_READ_SHADOW:
current_evmcs->cr4_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR0:
current_evmcs->guest_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR3:
current_evmcs->guest_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR4:
current_evmcs->guest_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_DR7:
current_evmcs->guest_dr7 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case HOST_FS_BASE:
current_evmcs->host_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_GS_BASE:
current_evmcs->host_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_TR_BASE:
current_evmcs->host_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_GDTR_BASE:
current_evmcs->host_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_IDTR_BASE:
current_evmcs->host_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_RSP:
current_evmcs->host_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case EPT_POINTER:
current_evmcs->ept_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
break;
case GUEST_BNDCFGS:
current_evmcs->guest_bndcfgs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case XSS_EXIT_BITMAP:
current_evmcs->xss_exit_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case GUEST_PHYSICAL_ADDRESS:
current_evmcs->guest_physical_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case EXIT_QUALIFICATION:
current_evmcs->exit_qualification = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_LINEAR_ADDRESS:
current_evmcs->guest_linear_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_MSR_STORE_ADDR:
current_evmcs->vm_exit_msr_store_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_LOAD_ADDR:
current_evmcs->vm_exit_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_ENTRY_MSR_LOAD_ADDR:
current_evmcs->vm_entry_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE0:
current_evmcs->cr3_target_value0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE1:
current_evmcs->cr3_target_value1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE2:
current_evmcs->cr3_target_value2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE3:
current_evmcs->cr3_target_value3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case TPR_THRESHOLD:
current_evmcs->tpr_threshold = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_INTERRUPTIBILITY_INFO:
current_evmcs->guest_interruptibility_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case CPU_BASED_VM_EXEC_CONTROL:
current_evmcs->cpu_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
break;
case EXCEPTION_BITMAP:
current_evmcs->exception_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
break;
case VM_ENTRY_CONTROLS:
current_evmcs->vm_entry_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
break;
case VM_ENTRY_INTR_INFO_FIELD:
current_evmcs->vm_entry_intr_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case VM_ENTRY_EXCEPTION_ERROR_CODE:
current_evmcs->vm_entry_exception_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case VM_ENTRY_INSTRUCTION_LEN:
current_evmcs->vm_entry_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case HOST_IA32_SYSENTER_CS:
current_evmcs->host_ia32_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case PIN_BASED_VM_EXEC_CONTROL:
current_evmcs->pin_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case VM_EXIT_CONTROLS:
current_evmcs->vm_exit_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case SECONDARY_VM_EXEC_CONTROL:
current_evmcs->secondary_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case GUEST_ES_LIMIT:
current_evmcs->guest_es_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_LIMIT:
current_evmcs->guest_cs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_LIMIT:
current_evmcs->guest_ss_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_LIMIT:
current_evmcs->guest_ds_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_LIMIT:
current_evmcs->guest_fs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_LIMIT:
current_evmcs->guest_gs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_LIMIT:
current_evmcs->guest_ldtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_LIMIT:
current_evmcs->guest_tr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GDTR_LIMIT:
current_evmcs->guest_gdtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_IDTR_LIMIT:
current_evmcs->guest_idtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_ES_AR_BYTES:
current_evmcs->guest_es_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_AR_BYTES:
current_evmcs->guest_cs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_AR_BYTES:
current_evmcs->guest_ss_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_AR_BYTES:
current_evmcs->guest_ds_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_AR_BYTES:
current_evmcs->guest_fs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_AR_BYTES:
current_evmcs->guest_gs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_AR_BYTES:
current_evmcs->guest_ldtr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_AR_BYTES:
current_evmcs->guest_tr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_ACTIVITY_STATE:
current_evmcs->guest_activity_state = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_CS:
current_evmcs->guest_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case VM_INSTRUCTION_ERROR:
current_evmcs->vm_instruction_error = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_REASON:
current_evmcs->vm_exit_reason = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INTR_INFO:
current_evmcs->vm_exit_intr_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INTR_ERROR_CODE:
current_evmcs->vm_exit_intr_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case IDT_VECTORING_INFO_FIELD:
current_evmcs->idt_vectoring_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case IDT_VECTORING_ERROR_CODE:
current_evmcs->idt_vectoring_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INSTRUCTION_LEN:
current_evmcs->vm_exit_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VMX_INSTRUCTION_INFO:
current_evmcs->vmx_instruction_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case PAGE_FAULT_ERROR_CODE_MASK:
current_evmcs->page_fault_error_code_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case PAGE_FAULT_ERROR_CODE_MATCH:
current_evmcs->page_fault_error_code_match = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_COUNT:
current_evmcs->cr3_target_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_STORE_COUNT:
current_evmcs->vm_exit_msr_store_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_LOAD_COUNT:
current_evmcs->vm_exit_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_ENTRY_MSR_LOAD_COUNT:
current_evmcs->vm_entry_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case HOST_ES_SELECTOR:
current_evmcs->host_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CS_SELECTOR:
current_evmcs->host_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_SS_SELECTOR:
current_evmcs->host_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_DS_SELECTOR:
current_evmcs->host_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_FS_SELECTOR:
current_evmcs->host_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_GS_SELECTOR:
current_evmcs->host_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_TR_SELECTOR:
current_evmcs->host_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case GUEST_ES_SELECTOR:
current_evmcs->guest_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_SELECTOR:
current_evmcs->guest_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_SELECTOR:
current_evmcs->guest_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_SELECTOR:
current_evmcs->guest_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_SELECTOR:
current_evmcs->guest_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_SELECTOR:
current_evmcs->guest_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_SELECTOR:
current_evmcs->guest_ldtr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_SELECTOR:
current_evmcs->guest_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case VIRTUAL_PROCESSOR_ID:
current_evmcs->virtual_processor_id = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+ break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->host_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->guest_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ current_evmcs->encls_exiting_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case TSC_MULTIPLIER:
+ current_evmcs->tsc_multiplier = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
default: return 1;
}
@@ -1070,7 +1241,10 @@ static inline int evmcs_vmresume(void)
{
int ret;
- current_evmcs->hv_clean_fields = 0;
+ /* HOST_RIP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ /* HOST_RSP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
__asm__ __volatile__("push %%rbp;"
"push %%rcx;"
diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h
new file mode 100644
index 000000000000..f13e532be240
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/hyperv.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#include "processor.h"
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
+#define HV_MSR_SYNIC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
+#define HV_MSR_SYNTIMER_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
+#define HV_MSR_HYPERCALL_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
+#define HV_MSR_VP_INDEX_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
+#define HV_MSR_RESET_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
+#define HV_MSR_STAT_PAGES_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
+#define HV_ACCESS_FREQUENCY_MSRS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
+#define HV_ACCESS_REENLIGHTENMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
+#define HV_ACCESS_TSC_INVARIANT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
+#define HV_ACCESS_PARTITION_ID \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
+#define HV_ACCESS_MEMORY_POOL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
+#define HV_ADJUST_MESSAGE_BUFFERS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
+#define HV_POST_MESSAGES \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
+#define HV_SIGNAL_EVENTS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
+#define HV_CREATE_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
+#define HV_CONNECT_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
+#define HV_ACCESS_STATS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
+#define HV_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
+#define HV_CPU_MANAGEMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
+#define HV_ISOLATION \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
+#define HV_X64_PERF_MONITOR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EAX */
+#define HV_X64_NESTED_DIRECT_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
+#define HV_X64_NESTED_MSR_BITMAP \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EBX */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address,
+ uint64_t *hv_status)
+{
+ uint64_t error_code;
+ uint8_t vector;
+
+ /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+ asm volatile("mov %[output_address], %%r8\n\t"
+ KVM_ASM_SAFE("vmcall")
+ : "=a" (*hv_status),
+ "+c" (control), "+d" (input_address),
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+ : [output_address] "r"(output_address),
+ "a" (-EFAULT)
+ : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+ return vector;
+}
+
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ uint64_t hv_status;
+ uint8_t vector;
+
+ vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+ GUEST_ASSERT(!vector);
+ GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+ int i;
+
+ for (i = 0; i < n_sse_regs; i++)
+ write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+ struct {
+ __u32 directhypercall:1;
+ __u32 reserved:31;
+ } features;
+ struct {
+ __u32 reserved;
+ } hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved1;
+ __u64 vtl_control[3];
+ struct hv_nested_enlightenments_control nested_control;
+ __u8 enlighten_vmentry;
+ __u8 reserved2[7];
+ __u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
+struct hyperv_test_pages {
+ /* VP assist page */
+ void *vp_assist_hva;
+ uint64_t vp_assist_gpa;
+ void *vp_assist;
+
+ /* Partition assist page */
+ void *partition_assist_hva;
+ uint64_t partition_assist_gpa;
+ void *partition_assist;
+
+ /* Enlightened VMCS */
+ void *enlightened_vmcs_hva;
+ uint64_t enlightened_vmcs_gpa;
+ void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva);
+
+/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
+#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
new file mode 100644
index 000000000000..972bb1c4ab4c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
+struct kvm_vm_arch {
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+
+ uint64_t c_bit;
+ uint64_t s_bit;
+ int sev_fd;
+ bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+ return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+ __vm_arch_has_protected_memory(&(vm)->arch)
+
+#define vcpu_arch_put_guest(mem, __val) \
+do { \
+ const typeof(mem) val = (__val); \
+ \
+ if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \
+ (mem) = val; \
+ } else if (guest_random_bool(&guest_rng)) { \
+ __asm__ __volatile__(KVM_FEP "mov %1, %0" \
+ : "+m" (mem) \
+ : "r" (val) : "memory"); \
+ } else { \
+ uint64_t __old = READ_ONCE(mem); \
+ \
+ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
+ : [ptr] "+m" (mem), [old] "+a" (__old) \
+ : [new]"r" (val) : "memory", "cc"); \
+ } \
+} while (0)
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h
new file mode 100644
index 000000000000..295f2d554754
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/mce.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63) /* valid error */
+#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
new file mode 100644
index 000000000000..72575eadb63a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <linux/bits.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format. Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \
+ ((eventsel) & 0xff) | \
+ ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS BIT_ULL(29)
+#define INTEL_RDPMC_FIXED BIT_ULL(30)
+#define INTEL_RDPMC_FAST BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL BIT_ULL(0)
+#define FIXED_PMC_USER BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI BIT_ULL(3)
+#define FIXED_PMC_NR_BITS 4
+#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES BIT_ULL(13)
+#define PMU_CAP_LBR_FMT 0x3f
+
+#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00)
+#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01)
+#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f)
+#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41)
+#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
+#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
+#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02)
+#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00)
+#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01)
+#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02)
+#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01)
+
+#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
+#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00)
+#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note! The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+ INTEL_ARCH_CPU_CYCLES_INDEX,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+ INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+ INTEL_ARCH_LLC_REFERENCES_INDEX,
+ INTEL_ARCH_LLC_MISSES_INDEX,
+ INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+ INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+ INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX,
+ INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_RETIRING_INDEX,
+ INTEL_ARCH_LBR_INSERTS_INDEX,
+ NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+ AMD_ZEN_CORE_CYCLES_INDEX,
+ AMD_ZEN_INSTRUCTIONS_INDEX,
+ AMD_ZEN_BRANCHES_INDEX,
+ AMD_ZEN_BRANCH_MISSES_INDEX,
+ NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+enum pmu_errata {
+ INSTRUCTIONS_RETIRED_OVERCOUNT,
+ BRANCHES_RETIRED_OVERCOUNT,
+};
+extern uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void);
+
+static inline bool this_pmu_has_errata(enum pmu_errata errata)
+{
+ return pmu_errata_mask & BIT_ULL(errata);
+}
+
+#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
new file mode 100644
index 000000000000..57d62a425109
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -0,0 +1,1497 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+
+#include <asm/msr-index.h>
+#include <asm/prctl.h>
+
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+extern bool host_cpu_is_intel;
+extern bool host_cpu_is_amd;
+extern uint64_t guest_tsc_khz;
+
+#ifndef MAX_NR_CPUID_ENTRIES
+#define MAX_NR_CPUID_ENTRIES 100
+#endif
+
+#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
+#define NMI_VECTOR 0x02
+
+const char *ex_str(int vector);
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_LA57 (1ul << 12)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+struct xstate_header {
+ u64 xstate_bv;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+ u8 i387[512];
+ struct xstate_header header;
+ u8 extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP BIT_ULL(0)
+#define XFEATURE_MASK_SSE BIT_ULL(1)
+#define XFEATURE_MASK_YMM BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_PT BIT_ULL(8)
+#define XFEATURE_MASK_PKRU BIT_ULL(9)
+#define XFEATURE_MASK_PASID BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
+#define XFEATURE_MASK_LBR BIT_ULL(15)
+#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
+ XFEATURE_MASK_XTILE_CFG)
+
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
+enum cpuid_output_regs {
+ KVM_CPUID_EAX,
+ KVM_CPUID_EBX,
+ KVM_CPUID_ECX,
+ KVM_CPUID_EDX
+};
+
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+ u32 function;
+ u16 index;
+ u8 reg;
+ u8 bit;
+};
+#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
+({ \
+ struct kvm_x86_cpu_feature feature = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .bit = __bit, \
+ }; \
+ \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
+ feature; \
+})
+
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
+#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
+#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
+#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
+#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
+#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
+
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
+#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
+#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
+#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
+
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature. Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+ u32 function;
+ u8 index;
+ u8 reg;
+ u8 lo_bit;
+ u8 hi_bit;
+};
+#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \
+({ \
+ struct kvm_x86_cpu_property property = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .lo_bit = low_bit, \
+ .hi_bit = high_bit, \
+ }; \
+ \
+ kvm_static_assert(low_bit < high_bit); \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
+ property; \
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
+
+#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
+
+#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15)
+
+#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
+#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+/*
+ * Intel's architectural PMU events are bizarre. They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features. A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set. Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters. Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
+ *
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+ struct kvm_x86_cpu_feature f;
+};
+#define KVM_X86_PMU_FEATURE(__reg, __bit) \
+({ \
+ struct kvm_x86_pmu_feature feature = { \
+ .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
+ }; \
+ \
+ kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
+ KVM_CPUID_##__reg == KVM_CPUID_ECX); \
+ feature; \
+})
+
+#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8)
+#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9)
+#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10)
+#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11)
+#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
+
+#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT)
+
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 rsp;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
+ unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+ uint64_t tsc_val;
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed. If software requires RDTSC to be
+ * executed prior to execution of any subsequent instruction, it can
+ * execute LFENCE immediately after RDTSC
+ */
+ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+ tsc_val = ((uint64_t)edx) << 32 | eax;
+ return tsc_val;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline void set_idt(const struct desc_ptr *idt_desc)
+{
+ __asm__ __volatile__("lidt %0"::"m"(*idt_desc));
+}
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ __asm__ __volatile__("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void wrpkru(u32 pkru)
+{
+ /* Note, ECX and EDX are architecturally required to be '0'. */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(0), "d"(0));
+}
+
+static inline struct desc_ptr get_gdt(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt;
+}
+
+static inline struct desc_ptr get_idt(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt;
+}
+
+static inline void outl(uint16_t port, uint32_t value)
+{
+ __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+static inline void __cpuid(uint32_t function, uint32_t index,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ *eax = function;
+ *ecx = index;
+
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+static inline void cpuid(uint32_t function,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline uint32_t this_cpu_fms(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+ return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+ return x86_model(this_cpu_fms());
+}
+
+static inline bool this_cpu_vendor_string_is(const char *vendor)
+{
+ const uint32_t *chunk = (const uint32_t *)vendor;
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+static inline bool this_cpu_is_intel(void)
+{
+ return this_cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+static inline bool this_cpu_is_amd(void)
+{
+ return this_cpu_vendor_string_is("AuthenticAMD");
+}
+
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
+{
+ uint32_t gprs[4];
+
+ __cpuid(function, index,
+ &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+ &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+ return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return __this_cpu_has(feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return __this_cpu_has(property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
+ case 0:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+ }
+ return max_leaf >= property.function;
+}
+
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+ }
+
+ GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || this_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+ if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+
+static inline void read_sse_reg(int reg, sse128_t *data)
+{
+ switch (reg) {
+ case 0:
+ asm("movdqa %%xmm0, %0" : "=m"(*data));
+ break;
+ case 1:
+ asm("movdqa %%xmm1, %0" : "=m"(*data));
+ break;
+ case 2:
+ asm("movdqa %%xmm2, %0" : "=m"(*data));
+ break;
+ case 3:
+ asm("movdqa %%xmm3, %0" : "=m"(*data));
+ break;
+ case 4:
+ asm("movdqa %%xmm4, %0" : "=m"(*data));
+ break;
+ case 5:
+ asm("movdqa %%xmm5, %0" : "=m"(*data));
+ break;
+ case 6:
+ asm("movdqa %%xmm6, %0" : "=m"(*data));
+ break;
+ case 7:
+ asm("movdqa %%xmm7, %0" : "=m"(*data));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void write_sse_reg(int reg, const sse128_t *data)
+{
+ switch (reg) {
+ case 0:
+ asm("movdqa %0, %%xmm0" : : "m"(*data));
+ break;
+ case 1:
+ asm("movdqa %0, %%xmm1" : : "m"(*data));
+ break;
+ case 2:
+ asm("movdqa %0, %%xmm2" : : "m"(*data));
+ break;
+ case 3:
+ asm("movdqa %0, %%xmm3" : : "m"(*data));
+ break;
+ case 4:
+ asm("movdqa %0, %%xmm4" : : "m"(*data));
+ break;
+ case 5:
+ asm("movdqa %0, %%xmm5" : : "m"(*data));
+ break;
+ case 6:
+ asm("movdqa %0, %%xmm6" : : "m"(*data));
+ break;
+ case 7:
+ asm("movdqa %0, %%xmm7" : : "m"(*data));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void cpu_relax(void)
+{
+ asm volatile("rep; nop" ::: "memory");
+}
+
+static inline void udelay(unsigned long usec)
+{
+ uint64_t start, now, cycles;
+
+ GUEST_ASSERT(guest_tsc_khz);
+ cycles = guest_tsc_khz / 1000 * usec;
+
+ /*
+ * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
+ * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
+ */
+ start = rdtsc();
+ do {
+ now = rdtsc();
+ } while (now - start < cycles);
+}
+
+#define ud2() \
+ __asm__ __volatile__( \
+ "ud2\n" \
+ )
+
+#define hlt() \
+ __asm__ __volatile__( \
+ "hlt\n" \
+ )
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
+
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index);
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+static inline uint32_t kvm_cpu_fms(void)
+{
+ return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+ return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+ return x86_model(kvm_cpu_fms());
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature);
+
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
+ case 0:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+ }
+ return max_leaf >= property.function;
+}
+
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+ }
+
+ TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+ return sizeof(struct kvm_cpuid2) +
+ sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries. The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+ struct kvm_cpuid2 *cpuid;
+
+ cpuid = malloc(kvm_cpuid2_size(nr_entries));
+ TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+ cpuid->nent = nr_entries;
+
+ return cpuid;
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function,
+ uint32_t index)
+{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
+
+ vcpu_get_cpuid(vcpu);
+
+ return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+ function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function)
+{
+ return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+ if (r)
+ return r;
+
+ /* On success, refresh the cache to pick up adjustments made by KVM. */
+ vcpu_get_cpuid(vcpu);
+ return 0;
+}
+
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+
+ /* Refresh the cache to pick up adjustments made by KVM. */
+ vcpu_get_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set);
+
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
+
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
+do { \
+ if (__builtin_constant_p(msr)) { \
+ TEST_ASSERT(cond, fmt, str, args); \
+ } else if (!(cond)) { \
+ char buf[16]; \
+ \
+ snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
+ TEST_ASSERT(cond, fmt, buf, args); \
+ } \
+} while (0)
+
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc. This is NOT an exhaustive list! The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+ return msr != MSR_IA32_TSC;
+}
+
+#define vcpu_set_msr(vcpu, msr, val) \
+do { \
+ uint64_t r, v = val; \
+ \
+ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
+ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
+ if (!is_durable_msr(msr)) \
+ break; \
+ r = vcpu_get_msr(vcpu, msr); \
+ TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
+
+struct ex_regs {
+ uint64_t rax, rcx, rdx, rbx;
+ uint64_t rbp, rsi, rdi;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+ uint64_t vector;
+ uint64_t error_code;
+ uint64_t rip;
+ uint64_t cs;
+ uint64_t rflags;
+};
+
+struct idt_entry {
+ uint16_t offset0;
+ uint16_t selector;
+ uint16_t ist : 3;
+ uint16_t : 5;
+ uint16_t type : 4;
+ uint16_t : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t offset1;
+ uint32_t offset2; uint32_t reserved;
+};
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *));
+
+/*
+ * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
+ * used to signal "no expcetion".
+ */
+#define KVM_MAGIC_DE_VECTOR 0xff
+
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
+
+/*
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data. Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler. The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction. But, selftests are 64-bit only, making register* pressure a
+ * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9 = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9 = exception vector (non-zero)
+ * r10 = error code
+ */
+#define __KVM_ASM_SAFE(insn, fep) \
+ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
+ "lea 1f(%%rip), %%r10\n\t" \
+ "lea 2f(%%rip), %%r11\n\t" \
+ fep "1: " insn "\n\t" \
+ "xor %%r9, %%r9\n\t" \
+ "2:\n\t" \
+ "mov %%r9b, %[vector]\n\t" \
+ "mov %%r10, %[error_code]\n\t"
+
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
+#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
+#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_fep(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
+{ \
+ uint64_t error_code; \
+ uint8_t vector; \
+ uint32_t a, d; \
+ \
+ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
+ : "=a"(a), "=d"(d), \
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : "c"(idx) \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ \
+ *val = (uint64_t)a | ((uint64_t)d << 32); \
+ return vector; \
+}
+
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn) \
+ BUILD_READ_U64_SAFE_HELPER(insn, , ) \
+ BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+ return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
+bool kvm_is_tdp_enabled(void);
+
+static inline bool get_kvm_intel_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_intel", param);
+}
+
+static inline bool get_kvm_amd_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_amd", param);
+}
+
+static inline int get_kvm_intel_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_intel", param);
+}
+
+static inline int get_kvm_amd_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_amd", param);
+}
+
+static inline bool kvm_is_pmu_enabled(void)
+{
+ return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+ return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+static inline bool kvm_is_ignore_msrs(void)
+{
+ return get_kvm_param_bool("ignore_msrs");
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+
+static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
+ uint64_t size, uint64_t flags)
+{
+ return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
+}
+
+static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
+ uint64_t flags)
+{
+ uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+
+ GUEST_ASSERT(!ret);
+}
+
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+
+#define vm_xsave_require_permission(xfeature) \
+ __vm_xsave_require_permission(xfeature, #xfeature)
+
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_256T
+};
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
+bool sys_clocksource_is_based_on_tsc(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
new file mode 100644
index 000000000000..008b4169f5e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+ SEV_GUEST_STATE_UNINITIALIZED = 0,
+ SEV_GUEST_STATE_LAUNCH_UPDATE,
+ SEV_GUEST_STATE_LAUNCH_SECRET,
+ SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG (1UL << 0)
+#define SEV_POLICY_ES (1UL << 2)
+
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
+#define GHCB_MSR_TERM_REQ 0x100
+
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+ struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct. The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int r; \
+ \
+ union { \
+ struct kvm_sev_cmd c; \
+ unsigned long raw; \
+ } sev_cmd = { .c = { \
+ .id = (cmd), \
+ .data = (uint64_t)(arg), \
+ .sev_fd = (vm)->arch.sev_fd, \
+ } }; \
+ \
+ r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \
+ r ?: sev_cmd.c.error; \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_sev_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ struct kvm_enc_region range = {
+ .addr = region->region.userspace_addr,
+ .size = region->region.memory_size,
+ };
+
+ vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t size)
+{
+ struct kvm_sev_launch_update_data update_data = {
+ .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+ .len = size,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t hva, uint64_t size, uint8_t type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index f4ea2355dbc2..29cffd0a9181 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -1,10 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
#ifndef SELFTEST_KVM_SVM_H
#define SELFTEST_KVM_SVM_H
@@ -58,6 +52,27 @@ enum {
INTERCEPT_RDPRU,
};
+struct hv_vmcb_enlightenments {
+ struct __packed hv_enlightenments_control {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 enlightened_npt_tlb: 1;
+ u32 reserved:29;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
+
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL 0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
struct __attribute__ ((__packed__)) vmcb_control_area {
u32 intercept_cr;
@@ -99,7 +114,17 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u8 reserved_6[8]; /* Offset 0xe8 */
u64 avic_logical_id; /* Offset 0xf0 */
u64 avic_physical_id; /* Offset 0xf8 */
- u8 reserved_7[768];
+ u8 reserved_7[8];
+ u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[720];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ union {
+ struct hv_vmcb_enlightenments hv_enlightenments;
+ u8 reserved_sw[32];
+ };
};
@@ -211,8 +236,6 @@ struct __attribute__ ((__packed__)) vmcb {
struct vmcb_save_area save;
};
-#define SVM_CPUID_FUNC 0x8000000a
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
new file mode 100644
index 000000000000..b74c6dcddcbd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <asm/svm.h>
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+struct svm_test_data {
+ /* VMCB */
+ struct vmcb *vmcb; /* gva */
+ void *vmcb_hva;
+ uint64_t vmcb_gpa;
+
+ /* host state-save area */
+ struct vmcb_save_area *save_area; /* gva */
+ void *save_area_hva;
+ uint64_t save_area_gpa;
+
+ /* MSR-Bitmap */
+ void *msr; /* gva */
+ void *msr_hva;
+ uint64_t msr_gpa;
+};
+
+static inline void vmmcall(void)
+{
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+#define stgi() \
+ __asm__ __volatile__( \
+ "stgi\n" \
+ )
+
+#define clgi() \
+ __asm__ __volatile__( \
+ "clgi\n" \
+ )
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+
+int open_sev_dev_path_or_exit(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h
new file mode 100644
index 000000000000..d3825dcc3cd9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/ucall.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index 65eb1079a161..96e2b4c630a9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -1,15 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_VMX_H
#define SELFTEST_KVM_VMX_H
+#include <asm/vmx.h>
+
#include <stdint.h>
#include "processor.h"
+#include "apic.h"
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -95,56 +96,10 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
-#define EXIT_REASON_EXCEPTION_NMI 0
-#define EXIT_REASON_EXTERNAL_INTERRUPT 1
-#define EXIT_REASON_TRIPLE_FAULT 2
-#define EXIT_REASON_INTERRUPT_WINDOW 7
-#define EXIT_REASON_NMI_WINDOW 8
-#define EXIT_REASON_TASK_SWITCH 9
-#define EXIT_REASON_CPUID 10
-#define EXIT_REASON_HLT 12
-#define EXIT_REASON_INVD 13
-#define EXIT_REASON_INVLPG 14
-#define EXIT_REASON_RDPMC 15
-#define EXIT_REASON_RDTSC 16
-#define EXIT_REASON_VMCALL 18
-#define EXIT_REASON_VMCLEAR 19
-#define EXIT_REASON_VMLAUNCH 20
-#define EXIT_REASON_VMPTRLD 21
-#define EXIT_REASON_VMPTRST 22
-#define EXIT_REASON_VMREAD 23
-#define EXIT_REASON_VMRESUME 24
-#define EXIT_REASON_VMWRITE 25
-#define EXIT_REASON_VMOFF 26
-#define EXIT_REASON_VMON 27
-#define EXIT_REASON_CR_ACCESS 28
-#define EXIT_REASON_DR_ACCESS 29
-#define EXIT_REASON_IO_INSTRUCTION 30
-#define EXIT_REASON_MSR_READ 31
-#define EXIT_REASON_MSR_WRITE 32
-#define EXIT_REASON_INVALID_STATE 33
-#define EXIT_REASON_MWAIT_INSTRUCTION 36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MCE_DURING_VMENTRY 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS 44
-#define EXIT_REASON_EOI_INDUCED 45
-#define EXIT_REASON_EPT_VIOLATION 48
-#define EXIT_REASON_EPT_MISCONFIG 49
-#define EXIT_REASON_INVEPT 50
-#define EXIT_REASON_RDTSCP 51
-#define EXIT_REASON_PREEMPTION_TIMER 52
-#define EXIT_REASON_INVVPID 53
-#define EXIT_REASON_WBINVD 54
-#define EXIT_REASON_XSETBV 55
-#define EXIT_REASON_APIC_WRITE 56
-#define EXIT_REASON_INVPCID 58
-#define EXIT_REASON_PML_FULL 62
-#define EXIT_REASON_XSAVES 63
-#define EXIT_REASON_XRSTORS 64
-#define LAST_EXIT_REASON 64
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -204,6 +159,8 @@ enum vmcs_field {
VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
@@ -478,11 +435,16 @@ static inline int vmresume(void)
static inline void vmcall(void)
{
- /* Currently, L1 destroys our GPRs during vmexits. */
- __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
- "rax", "rbx", "rcx", "rdx",
- "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
- "r13", "r14", "r15");
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
}
static inline int vmread(uint64_t encoding, uint64_t *value)
@@ -558,14 +520,6 @@ struct vmx_pages {
uint64_t vmwrite_gpa;
void *vmwrite;
- void *vp_assist_hva;
- uint64_t vp_assist_gpa;
- void *vp_assist;
-
- void *enlightened_vmcs_hva;
- uint64_t enlightened_vmcs_gpa;
- void *enlightened_vmcs;
-
void *eptp_hva;
uint64_t eptp_gpa;
void *eptp;
@@ -603,19 +557,18 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
-bool nested_vmx_supported(void);
-void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+ uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
+bool kvm_cpu_has_ept(void);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
deleted file mode 100644
index 0b30b4e15c38..000000000000
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ /dev/null
@@ -1,478 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <assert.h>
-#include <stdint.h>
-
-#include <asm/msr-index.h>
-
-#define X86_EFLAGS_FIXED (1u << 1)
-
-#define X86_CR4_VME (1ul << 0)
-#define X86_CR4_PVI (1ul << 1)
-#define X86_CR4_TSD (1ul << 2)
-#define X86_CR4_DE (1ul << 3)
-#define X86_CR4_PSE (1ul << 4)
-#define X86_CR4_PAE (1ul << 5)
-#define X86_CR4_MCE (1ul << 6)
-#define X86_CR4_PGE (1ul << 7)
-#define X86_CR4_PCE (1ul << 8)
-#define X86_CR4_OSFXSR (1ul << 9)
-#define X86_CR4_OSXMMEXCPT (1ul << 10)
-#define X86_CR4_UMIP (1ul << 11)
-#define X86_CR4_LA57 (1ul << 12)
-#define X86_CR4_VMXE (1ul << 13)
-#define X86_CR4_SMXE (1ul << 14)
-#define X86_CR4_FSGSBASE (1ul << 16)
-#define X86_CR4_PCIDE (1ul << 17)
-#define X86_CR4_OSXSAVE (1ul << 18)
-#define X86_CR4_SMEP (1ul << 20)
-#define X86_CR4_SMAP (1ul << 21)
-#define X86_CR4_PKE (1ul << 22)
-
-/* CPUID.1.ECX */
-#define CPUID_VMX (1ul << 5)
-#define CPUID_SMX (1ul << 6)
-#define CPUID_PCID (1ul << 17)
-#define CPUID_XSAVE (1ul << 26)
-
-/* CPUID.7.EBX */
-#define CPUID_FSGSBASE (1ul << 0)
-#define CPUID_SMEP (1ul << 7)
-#define CPUID_SMAP (1ul << 20)
-
-/* CPUID.7.ECX */
-#define CPUID_UMIP (1ul << 2)
-#define CPUID_PKU (1ul << 3)
-#define CPUID_LA57 (1ul << 16)
-
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
-
-/* General Registers in 64-Bit Mode */
-struct gpr64_regs {
- u64 rax;
- u64 rcx;
- u64 rdx;
- u64 rbx;
- u64 rsp;
- u64 rbp;
- u64 rsi;
- u64 rdi;
- u64 r8;
- u64 r9;
- u64 r10;
- u64 r11;
- u64 r12;
- u64 r13;
- u64 r14;
- u64 r15;
-};
-
-struct desc64 {
- uint16_t limit0;
- uint16_t base0;
- unsigned base1:8, type:4, s:1, dpl:2, p:1;
- unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
- uint32_t base3;
- uint32_t zero1;
-} __attribute__((packed));
-
-struct desc_ptr {
- uint16_t size;
- uint64_t address;
-} __attribute__((packed));
-
-static inline uint64_t get_desc64_base(const struct desc64 *desc)
-{
- return ((uint64_t)desc->base3 << 32) |
- (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
-}
-
-static inline uint64_t rdtsc(void)
-{
- uint32_t eax, edx;
- uint64_t tsc_val;
- /*
- * The lfence is to wait (on Intel CPUs) until all previous
- * instructions have been executed. If software requires RDTSC to be
- * executed prior to execution of any subsequent instruction, it can
- * execute LFENCE immediately after RDTSC
- */
- __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
- tsc_val = ((uint64_t)edx) << 32 | eax;
- return tsc_val;
-}
-
-static inline uint64_t rdtscp(uint32_t *aux)
-{
- uint32_t eax, edx;
-
- __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
- return ((uint64_t)edx) << 32 | eax;
-}
-
-static inline uint64_t rdmsr(uint32_t msr)
-{
- uint32_t a, d;
-
- __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
- return a | ((uint64_t) d << 32);
-}
-
-static inline void wrmsr(uint32_t msr, uint64_t value)
-{
- uint32_t a = value;
- uint32_t d = value >> 32;
-
- __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-
-static inline uint16_t inw(uint16_t port)
-{
- uint16_t tmp;
-
- __asm__ __volatile__("in %%dx, %%ax"
- : /* output */ "=a" (tmp)
- : /* input */ "d" (port));
-
- return tmp;
-}
-
-static inline uint16_t get_es(void)
-{
- uint16_t es;
-
- __asm__ __volatile__("mov %%es, %[es]"
- : /* output */ [es]"=rm"(es));
- return es;
-}
-
-static inline uint16_t get_cs(void)
-{
- uint16_t cs;
-
- __asm__ __volatile__("mov %%cs, %[cs]"
- : /* output */ [cs]"=rm"(cs));
- return cs;
-}
-
-static inline uint16_t get_ss(void)
-{
- uint16_t ss;
-
- __asm__ __volatile__("mov %%ss, %[ss]"
- : /* output */ [ss]"=rm"(ss));
- return ss;
-}
-
-static inline uint16_t get_ds(void)
-{
- uint16_t ds;
-
- __asm__ __volatile__("mov %%ds, %[ds]"
- : /* output */ [ds]"=rm"(ds));
- return ds;
-}
-
-static inline uint16_t get_fs(void)
-{
- uint16_t fs;
-
- __asm__ __volatile__("mov %%fs, %[fs]"
- : /* output */ [fs]"=rm"(fs));
- return fs;
-}
-
-static inline uint16_t get_gs(void)
-{
- uint16_t gs;
-
- __asm__ __volatile__("mov %%gs, %[gs]"
- : /* output */ [gs]"=rm"(gs));
- return gs;
-}
-
-static inline uint16_t get_tr(void)
-{
- uint16_t tr;
-
- __asm__ __volatile__("str %[tr]"
- : /* output */ [tr]"=rm"(tr));
- return tr;
-}
-
-static inline uint64_t get_cr0(void)
-{
- uint64_t cr0;
-
- __asm__ __volatile__("mov %%cr0, %[cr0]"
- : /* output */ [cr0]"=r"(cr0));
- return cr0;
-}
-
-static inline uint64_t get_cr3(void)
-{
- uint64_t cr3;
-
- __asm__ __volatile__("mov %%cr3, %[cr3]"
- : /* output */ [cr3]"=r"(cr3));
- return cr3;
-}
-
-static inline uint64_t get_cr4(void)
-{
- uint64_t cr4;
-
- __asm__ __volatile__("mov %%cr4, %[cr4]"
- : /* output */ [cr4]"=r"(cr4));
- return cr4;
-}
-
-static inline void set_cr4(uint64_t val)
-{
- __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
-}
-
-static inline struct desc_ptr get_gdt(void)
-{
- struct desc_ptr gdt;
- __asm__ __volatile__("sgdt %[gdt]"
- : /* output */ [gdt]"=m"(gdt));
- return gdt;
-}
-
-static inline struct desc_ptr get_idt(void)
-{
- struct desc_ptr idt;
- __asm__ __volatile__("sidt %[idt]"
- : /* output */ [idt]"=m"(idt));
- return idt;
-}
-
-static inline void outl(uint16_t port, uint32_t value)
-{
- __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
-}
-
-static inline void cpuid(uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-#define SET_XMM(__var, __xmm) \
- asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
-
-static inline void set_xmm(int n, unsigned long val)
-{
- switch (n) {
- case 0:
- SET_XMM(val, xmm0);
- break;
- case 1:
- SET_XMM(val, xmm1);
- break;
- case 2:
- SET_XMM(val, xmm2);
- break;
- case 3:
- SET_XMM(val, xmm3);
- break;
- case 4:
- SET_XMM(val, xmm4);
- break;
- case 5:
- SET_XMM(val, xmm5);
- break;
- case 6:
- SET_XMM(val, xmm6);
- break;
- case 7:
- SET_XMM(val, xmm7);
- break;
- }
-}
-
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
-static inline unsigned long get_xmm(int n)
-{
- assert(n >= 0 && n <= 7);
-
- register v1di xmm0 __asm__("%xmm0");
- register v1di xmm1 __asm__("%xmm1");
- register v1di xmm2 __asm__("%xmm2");
- register v1di xmm3 __asm__("%xmm3");
- register v1di xmm4 __asm__("%xmm4");
- register v1di xmm5 __asm__("%xmm5");
- register v1di xmm6 __asm__("%xmm6");
- register v1di xmm7 __asm__("%xmm7");
- switch (n) {
- case 0:
- return (unsigned long)xmm0;
- case 1:
- return (unsigned long)xmm1;
- case 2:
- return (unsigned long)xmm2;
- case 3:
- return (unsigned long)xmm3;
- case 4:
- return (unsigned long)xmm4;
- case 5:
- return (unsigned long)xmm5;
- case 6:
- return (unsigned long)xmm6;
- case 7:
- return (unsigned long)xmm7;
- }
- return 0;
-}
-
-bool is_intel_cpu(void);
-
-struct kvm_x86_state;
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_x86_state *state);
-
-struct kvm_msr_list *kvm_get_msr_index_list(void);
-uint64_t kvm_get_feature_msr(uint64_t msr_index);
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-
-struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_cpuid2 *cpuid);
-
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
-
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
-{
- return kvm_get_supported_cpuid_index(function, 0);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
-
-uint32_t kvm_get_cpuid_max_basic(void);
-uint32_t kvm_get_cpuid_max_extended(void);
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
-
-struct ex_regs {
- uint64_t rax, rcx, rdx, rbx;
- uint64_t rbp, rsi, rdi;
- uint64_t r8, r9, r10, r11;
- uint64_t r12, r13, r14, r15;
- uint64_t vector;
- uint64_t error_code;
- uint64_t rip;
- uint64_t cs;
- uint64_t rflags;
-};
-
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *));
-
-/*
- * set_cpuid() - overwrites a matching cpuid entry with the provided value.
- * matches based on ent->function && ent->index. returns true
- * if a match was found and successfully overwritten.
- * @cpuid: the kvm cpuid list to modify.
- * @ent: cpuid entry to insert
- */
-bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3);
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
-
-/*
- * Basic CPU control in CR0
- */
-#define X86_CR0_PE (1UL<<0) /* Protection Enable */
-#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
-#define X86_CR0_EM (1UL<<2) /* Emulation */
-#define X86_CR0_TS (1UL<<3) /* Task Switched */
-#define X86_CR0_ET (1UL<<4) /* Extension Type */
-#define X86_CR0_NE (1UL<<5) /* Numeric Error */
-#define X86_CR0_WP (1UL<<16) /* Write Protect */
-#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
-#define X86_CR0_NW (1UL<<29) /* Not Write-through */
-#define X86_CR0_CD (1UL<<30) /* Cache Disable */
-#define X86_CR0_PG (1UL<<31) /* Paging */
-
-#define APIC_DEFAULT_GPA 0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_EXTD (1<<10)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define GET_APIC_BASE(x) (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
-#define APIC_TASKPRI 0x80
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
-#define APIC_SPIV_APIC_ENABLED (1 << 8)
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
-
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
deleted file mode 100644
index b7531c83b8ae..000000000000
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#ifndef SELFTEST_KVM_SVM_UTILS_H
-#define SELFTEST_KVM_SVM_UTILS_H
-
-#include <stdint.h>
-#include "svm.h"
-#include "processor.h"
-
-#define CPUID_SVM_BIT 2
-#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
-
-#define SVM_EXIT_VMMCALL 0x081
-
-struct svm_test_data {
- /* VMCB */
- struct vmcb *vmcb; /* gva */
- void *vmcb_hva;
- uint64_t vmcb_gpa;
-
- /* host state-save area */
- struct vmcb_save_area *save_area; /* gva */
- void *save_area_hva;
- uint64_t save_area_gpa;
-};
-
-struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-bool nested_svm_supported(void);
-void nested_svm_check_supported(void);
-
-static inline bool cpu_has_svm(void)
-{
- u32 eax = 0x80000001, ecx;
-
- asm("cpuid" :
- "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
-
- return ecx & CPUID_SVM;
-}
-
-#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..5d7590d01868
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY 0x20
+#define GSI_BASE_SECONDARY 0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+ int r, i;
+
+ /*
+ * The secondary task can encounter EBADF since the primary can close
+ * the eventfd at any time. And because the primary can recreate the
+ * eventfd, at the safe fd in the file table, the secondary can also
+ * encounter "unexpected" success, e.g. if the close+recreate happens
+ * between the first and second assignments. The secondary's role is
+ * mostly to antagonize KVM, not to detect bugs.
+ */
+ for (i = 0; i < 2; i++) {
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+ TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+ "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+ r, errno);
+
+ /* De-assign should succeed unless the eventfd was closed. */
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ TEST_ASSERT(!r || errno == EBADF,
+ "De-assign should succeed unless the fd was closed");
+ }
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+ while (!READ_ONCE(done)) {
+ juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+ juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+ }
+
+ return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+ int r1, r2;
+
+ /*
+ * At least one of the assigns should fail. KVM disallows assigning a
+ * single eventfd to multiple GSIs (or VMs), so it's possible that both
+ * assignments can fail, too.
+ */
+ r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+ TEST_ASSERT(!r1 || errno == EBUSY,
+ "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+ r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+ TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+ "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+ r1, r2, errno);
+
+ /*
+ * De-assign should always succeed, even if the corresponding assign
+ * failed.
+ */
+ kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t racing_thread;
+ struct kvm_vcpu *unused;
+ int r, i;
+
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
+
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ kvm_irqfd(vm1, 10, __eventfd, 0);
+
+ r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ /*
+ * De-assign all eventfds, along with multiple eventfds that were never
+ * assigned. KVM's ABI is that de-assign is allowed so long as the
+ * eventfd itself is valid.
+ */
+ kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+ close(__eventfd);
+
+ pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+ for (i = 0; i < 10000; i++) {
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ juggle_eventfd_primary(vm1, __eventfd);
+ juggle_eventfd_primary(vm2, __eventfd);
+ close(__eventfd);
+ }
+
+ WRITE_ONCE(done, true);
+ pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 000000000000..b7dbde9c0843
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include "kselftest.h"
+
+static void stats_test(int stats_fd)
+{
+ ssize_t ret;
+ int i;
+ size_t size_desc;
+ size_t size_data = 0;
+ struct kvm_stats_header header;
+ char *id;
+ struct kvm_stats_desc *stats_desc;
+ u64 *stats_data;
+ struct kvm_stats_desc *pdesc;
+ u32 type, unit, base;
+
+ /* Read kvm stats header */
+ read_stats_header(stats_fd, &header);
+
+ size_desc = get_stats_descriptor_size(&header);
+
+ /* Read kvm stats id string */
+ id = malloc(header.name_size);
+ TEST_ASSERT(id, "Allocate memory for id string");
+
+ ret = pread(stats_fd, id, header.name_size, sizeof(header));
+ TEST_ASSERT(ret == header.name_size,
+ "Expected header size '%u', read '%lu' bytes",
+ header.name_size, ret);
+
+ /* Check id string, that should start with "kvm" */
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
+ "Invalid KVM stats type, id: %s", id);
+
+ /* Sanity check for other fields in header */
+ if (header.num_desc == 0) {
+ ksft_print_msg("No KVM stats defined!\n");
+ return;
+ }
+ /*
+ * The descriptor and data offsets must be valid, they must not overlap
+ * the header, and the descriptor and data blocks must not overlap each
+ * other. Note, the data block is rechecked after its size is known.
+ */
+ TEST_ASSERT(header.desc_offset && header.desc_offset >= sizeof(header) &&
+ header.data_offset && header.data_offset >= sizeof(header),
+ "Invalid offset fields in header");
+
+ TEST_ASSERT(header.desc_offset > header.data_offset ||
+ (header.desc_offset + size_desc * header.num_desc <= header.data_offset),
+ "Descriptor block is overlapped with data block");
+
+ /* Read kvm stats descriptors */
+ stats_desc = read_stats_descriptors(stats_fd, &header);
+
+ /* Sanity check for fields in descriptors */
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ type = pdesc->flags & KVM_STATS_TYPE_MASK;
+ unit = pdesc->flags & KVM_STATS_UNIT_MASK;
+ base = pdesc->flags & KVM_STATS_BASE_MASK;
+
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header.name_size,
+ "KVM stats name (index: %d) too long", i);
+
+ /* Check type,unit,base boundaries */
+ TEST_ASSERT(type <= KVM_STATS_TYPE_MAX,
+ "Unknown KVM stats (%s) type: %u", pdesc->name, type);
+ TEST_ASSERT(unit <= KVM_STATS_UNIT_MAX,
+ "Unknown KVM stats (%s) unit: %u", pdesc->name, unit);
+ TEST_ASSERT(base <= KVM_STATS_BASE_MAX,
+ "Unknown KVM stats (%s) base: %u", pdesc->name, base);
+
+ /*
+ * Check exponent for stats unit
+ * Exponent for counter should be greater than or equal to 0
+ * Exponent for unit bytes should be greater than or equal to 0
+ * Exponent for unit seconds should be less than or equal to 0
+ * Exponent for unit clock cycles should be greater than or
+ * equal to 0
+ * Exponent for unit boolean should be 0
+ */
+ switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+ case KVM_STATS_UNIT_NONE:
+ case KVM_STATS_UNIT_BYTES:
+ case KVM_STATS_UNIT_CYCLES:
+ TEST_ASSERT(pdesc->exponent >= 0,
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
+ break;
+ case KVM_STATS_UNIT_SECONDS:
+ TEST_ASSERT(pdesc->exponent <= 0,
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
+ break;
+ case KVM_STATS_UNIT_BOOLEAN:
+ TEST_ASSERT(pdesc->exponent == 0,
+ "Unsupported KVM stats (%s) exponent: %d",
+ pdesc->name, pdesc->exponent);
+ break;
+ }
+
+ /* Check size field, which should not be zero */
+ TEST_ASSERT(pdesc->size,
+ "KVM descriptor(%s) with size of 0", pdesc->name);
+ /* Check bucket_size field */
+ switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+ case KVM_STATS_TYPE_LINEAR_HIST:
+ TEST_ASSERT(pdesc->bucket_size,
+ "Bucket size of Linear Histogram stats (%s) is zero",
+ pdesc->name);
+ break;
+ default:
+ TEST_ASSERT(!pdesc->bucket_size,
+ "Bucket size of stats (%s) is not zero",
+ pdesc->name);
+ }
+ size_data = max(size_data, pdesc->offset + pdesc->size * sizeof(*stats_data));
+ }
+
+ /*
+ * Now that the size of the data block is known, verify the data block
+ * doesn't overlap the descriptor block.
+ */
+ TEST_ASSERT(header.data_offset >= header.desc_offset ||
+ header.data_offset + size_data <= header.desc_offset,
+ "Data block is overlapped with Descriptor block");
+
+ /* Check validity of all stats data size */
+ TEST_ASSERT(size_data >= header.num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+
+ /* Allocate memory for stats data */
+ stats_data = malloc(size_data);
+ TEST_ASSERT(stats_data, "Allocate memory for stats data");
+ /* Read kvm stats data as a bulk */
+ ret = pread(stats_fd, stats_data, size_data, header.data_offset);
+ TEST_ASSERT(ret == size_data, "Read KVM stats data");
+ /* Read kvm stats data one by one */
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ read_stat_data(stats_fd, &header, pdesc, stats_data,
+ pdesc->size);
+ }
+
+ free(stats_data);
+ free(stats_desc);
+ free(id);
+
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM 4
+#define DEFAULT_NUM_VCPU 4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+ int vm_stats_fds, *vcpu_stats_fds;
+ int i, j;
+ struct kvm_vcpu **vcpus;
+ struct kvm_vm **vms;
+ int max_vm = DEFAULT_NUM_VM;
+ int max_vcpu = DEFAULT_NUM_VCPU;
+
+ /* Get the number of VMs and VCPUs that would be created for testing. */
+ if (argc > 1) {
+ max_vm = strtol(argv[1], NULL, 0);
+ if (max_vm <= 0)
+ max_vm = DEFAULT_NUM_VM;
+ }
+ if (argc > 2) {
+ max_vcpu = strtol(argv[2], NULL, 0);
+ if (max_vcpu <= 0)
+ max_vcpu = DEFAULT_NUM_VCPU;
+ }
+
+ ksft_print_header();
+
+ /* Check the extension for binary stats */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
+
+ ksft_set_plan(max_vm);
+
+ /* Create VMs and VCPUs */
+ vms = malloc(sizeof(vms[0]) * max_vm);
+ TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+
+ vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
+ TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
+
+ /*
+ * Not per-VM as the array is populated, used, and invalidated within a
+ * single for-loop iteration.
+ */
+ vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
+ TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
+
+ for (i = 0; i < max_vm; ++i) {
+ vms[i] = vm_create_barebones();
+ for (j = 0; j < max_vcpu; ++j)
+ vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
+ }
+
+ /*
+ * Check stats read for every VM and vCPU, with a variety of flavors.
+ * Note, stats_test() closes the passed in stats fd.
+ */
+ for (i = 0; i < max_vm; ++i) {
+ /*
+ * Verify that creating multiple userspace references to a
+ * single stats file works and doesn't cause explosions.
+ */
+ vm_stats_fds = vm_get_stats_fd(vms[i]);
+ stats_test(kvm_dup(vm_stats_fds));
+
+ /* Verify userspace can instantiate multiple stats files. */
+ stats_test(vm_get_stats_fd(vms[i]));
+
+ for (j = 0; j < max_vcpu; ++j) {
+ vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
+ stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
+ }
+
+ /*
+ * Close the VM fd and redo the stats tests. KVM should gift a
+ * reference (to the VM) to each stats fd, i.e. stats should
+ * still be accessible even after userspace has put its last
+ * _direct_ reference to the VM.
+ */
+ kvm_vm_free(vms[i]);
+
+ stats_test(vm_stats_fds);
+ for (j = 0; j < max_vcpu; ++j)
+ stats_test(vcpu_stats_fds[j]);
+
+ ksft_test_result_pass("vm%i\n", i);
+ }
+
+ free(vms);
+ free(vcpus);
+ free(vcpu_stats_fds);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 0299cd81b8ba..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -6,8 +6,6 @@
*
* Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -27,11 +25,11 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
/* This asserts that the vCPU was created. */
- vm_vcpu_add(vm, i);
+ __vm_vcpu_add(vm, i);
kvm_vm_free(vm);
}
@@ -44,6 +42,8 @@ int main(int argc, char *argv[])
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+ kvm_set_files_rlimit(kvm_max_vcpus);
+
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
* Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
@@ -53,7 +53,7 @@ int main(int argc, char *argv[])
kvm_max_vcpu_id = kvm_max_vcpus;
TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
- "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+ "KVM_MAX_VCPU_IDS (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
kvm_max_vcpu_id, kvm_max_vcpus);
test_vcpu_creation(0, kvm_max_vcpus);
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
new file mode 100644
index 000000000000..dd8b12f626d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM page table test
+ *
+ * Copyright (C) 2021, Huawei, Inc.
+ *
+ * Make sure that THP has been enabled or enough HUGETLB pages with specific
+ * page size have been pre-allocated on your system, if you are planning to
+ * use hugepages to back the guest memory for testing.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+#include "ucall_common.h"
+
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default size(1GB) of the memory for testing */
+#define DEFAULT_TEST_MEM_SIZE (1 << 30)
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+/* Different guest memory accessing stages */
+enum test_stage {
+ KVM_BEFORE_MAPPINGS,
+ KVM_CREATE_MAPPINGS,
+ KVM_UPDATE_MAPPINGS,
+ KVM_ADJUST_MAPPINGS,
+ NUM_TEST_STAGES,
+};
+
+static const char * const test_stage_string[] = {
+ "KVM_BEFORE_MAPPINGS",
+ "KVM_CREATE_MAPPINGS",
+ "KVM_UPDATE_MAPPINGS",
+ "KVM_ADJUST_MAPPINGS",
+};
+
+struct test_args {
+ struct kvm_vm *vm;
+ uint64_t guest_test_virt_mem;
+ uint64_t host_page_size;
+ uint64_t host_num_pages;
+ uint64_t large_page_size;
+ uint64_t large_num_pages;
+ uint64_t host_pages_per_lpage;
+ enum vm_mem_backing_src_type src_type;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+};
+
+/*
+ * Guest variables. Use addr_gva2hva() if these variables need
+ * to be changed in host.
+ */
+static enum test_stage guest_test_stage;
+
+/* Host variables */
+static uint32_t nr_vcpus = 1;
+static struct test_args test_args;
+static enum test_stage *current_stage;
+static bool host_quit;
+
+/* Whether the test stage is updated, or completed */
+static sem_t test_stage_updated;
+static sem_t test_stage_completed;
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+static void guest_code(bool do_write)
+{
+ struct test_args *p = &test_args;
+ enum test_stage *current_stage = &guest_test_stage;
+ uint64_t addr;
+ int i, j;
+
+ while (true) {
+ addr = p->guest_test_virt_mem;
+
+ switch (READ_ONCE(*current_stage)) {
+ /*
+ * All vCPU threads will be started in this stage,
+ * where guest code of each vCPU will do nothing.
+ */
+ case KVM_BEFORE_MAPPINGS:
+ break;
+
+ /*
+ * Before dirty logging, vCPUs concurrently access the first
+ * 8 bytes of each page (host page/large page) within the same
+ * memory region with different accessing types (read/write).
+ * Then KVM will create normal page mappings or huge block
+ * mappings for them.
+ */
+ case KVM_CREATE_MAPPINGS:
+ for (i = 0; i < p->large_num_pages; i++) {
+ if (do_write)
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+
+ addr += p->large_page_size;
+ }
+ break;
+
+ /*
+ * During dirty logging, KVM will only update attributes of the
+ * normal page mappings from RO to RW if memory backing src type
+ * is anonymous. In other cases, KVM will split the huge block
+ * mappings into normal page mappings if memory backing src type
+ * is THP or HUGETLB.
+ */
+ case KVM_UPDATE_MAPPINGS:
+ if (p->src_type == VM_MEM_SRC_ANONYMOUS) {
+ for (i = 0; i < p->host_num_pages; i++) {
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ addr += p->host_page_size;
+ }
+ break;
+ }
+
+ for (i = 0; i < p->large_num_pages; i++) {
+ /*
+ * Write to the first host page in each large
+ * page region, and triger break of large pages.
+ */
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+
+ /*
+ * Access the middle host pages in each large
+ * page region. Since dirty logging is enabled,
+ * this will create new mappings at the smallest
+ * granularity.
+ */
+ addr += p->large_page_size / 2;
+ for (j = 0; j < p->host_pages_per_lpage / 2; j++) {
+ READ_ONCE(*(uint64_t *)addr);
+ addr += p->host_page_size;
+ }
+ }
+ break;
+
+ /*
+ * After dirty logging is stopped, vCPUs concurrently read
+ * from every single host page. Then KVM will coalesce the
+ * split page mappings back to block mappings. And a TLB
+ * conflict abort could occur here if TLB entries of the
+ * page mappings are not fully invalidated.
+ */
+ case KVM_ADJUST_MAPPINGS:
+ for (i = 0; i < p->host_num_pages; i++) {
+ READ_ONCE(*(uint64_t *)addr);
+ addr += p->host_page_size;
+ }
+ break;
+
+ default:
+ GUEST_ASSERT(0);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ bool do_write = !(vcpu->id % 2);
+ struct timespec start;
+ struct timespec ts_diff;
+ enum test_stage stage;
+ int ret;
+
+ vcpu_args_set(vcpu, 1, do_write);
+
+ while (!READ_ONCE(host_quit)) {
+ ret = sem_wait(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_wait");
+
+ if (READ_ONCE(host_quit))
+ return NULL;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ ret = _vcpu_run(vcpu);
+ ts_diff = timespec_elapsed(start);
+
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
+ exit_reason_str(vcpu->run->exit_reason));
+
+ pr_debug("Got sync event from vCPU %d\n", vcpu->id);
+ stage = READ_ONCE(*current_stage);
+
+ /*
+ * Here we can know the execution time of every
+ * single vcpu running in different test stages.
+ */
+ pr_debug("vCPU %d has completed stage %s\n"
+ "execution time is: %ld.%.9lds\n\n",
+ vcpu->id, test_stage_string[stage],
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ ret = sem_post(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+
+ return NULL;
+}
+
+struct test_params {
+ uint64_t phys_offset;
+ uint64_t test_mem_size;
+ enum vm_mem_backing_src_type src_type;
+};
+
+static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
+{
+ int ret;
+ struct test_params *p = arg;
+ enum vm_mem_backing_src_type src_type = p->src_type;
+ uint64_t large_page_size = get_backing_src_pagesz(src_type);
+ uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
+ uint64_t host_page_size = getpagesize();
+ uint64_t test_mem_size = p->test_mem_size;
+ uint64_t guest_num_pages;
+ uint64_t alignment;
+ void *host_test_mem;
+ struct kvm_vm *vm;
+
+ /* Align up the test memory size */
+ alignment = max(large_page_size, guest_page_size);
+ test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1);
+
+ /* Create a VM with enough guest pages */
+ guest_num_pages = test_mem_size / guest_page_size;
+ vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,
+ guest_code, test_args.vcpus);
+
+ /* Align down GPA of the testing memslot */
+ if (!p->phys_offset)
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
+ guest_page_size;
+ else
+ guest_test_phys_mem = p->phys_offset;
+#ifdef __s390x__
+ alignment = max(0x100000UL, alignment);
+#endif
+ guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
+
+ /* Set up the shared data structure test_args */
+ test_args.vm = vm;
+ test_args.guest_test_virt_mem = guest_test_virt_mem;
+ test_args.host_page_size = host_page_size;
+ test_args.host_num_pages = test_mem_size / host_page_size;
+ test_args.large_page_size = large_page_size;
+ test_args.large_num_pages = test_mem_size / large_page_size;
+ test_args.host_pages_per_lpage = large_page_size / host_page_size;
+ test_args.src_type = src_type;
+
+ /* Add an extra memory slot with specified backing src type */
+ vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
+ TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
+
+ /* Do mapping(GVA->GPA) for the testing memory slot */
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
+
+ /* Cache the HVA pointer of the region */
+ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
+
+ /* Export shared structure test_args to guest */
+ sync_global_to_guest(vm, test_args);
+
+ ret = sem_init(&test_stage_updated, 0, 0);
+ TEST_ASSERT(ret == 0, "Error in sem_init");
+
+ ret = sem_init(&test_stage_completed, 0, 0);
+ TEST_ASSERT(ret == 0, "Error in sem_init");
+
+ current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage));
+ *current_stage = NUM_TEST_STAGES;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_info("Testing memory backing src type: %s\n",
+ vm_mem_backing_src_alias(src_type)->name);
+ pr_info("Testing memory backing src granularity: 0x%lx\n",
+ large_page_size);
+ pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size);
+ pr_info("Guest physical test memory offset: 0x%lx\n",
+ guest_test_phys_mem);
+ pr_info("Host virtual test memory offset: 0x%lx\n",
+ (uint64_t)host_test_mem);
+ pr_info("Number of testing vCPUs: %d\n", nr_vcpus);
+
+ return vm;
+}
+
+static void vcpus_complete_new_stage(enum test_stage stage)
+{
+ int ret;
+ int vcpus;
+
+ /* Wake up all the vcpus to run new test stage */
+ for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
+ ret = sem_post(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+ pr_debug("All vcpus have been notified to continue\n");
+
+ /* Wait for all the vcpus to complete new test stage */
+ for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
+ ret = sem_wait(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_wait");
+
+ pr_debug("%d vcpus have completed stage %s\n",
+ vcpus + 1, test_stage_string[stage]);
+ }
+
+ pr_debug("All vcpus have completed stage %s\n",
+ test_stage_string[stage]);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ pthread_t *vcpu_threads;
+ struct kvm_vm *vm;
+ struct timespec start;
+ struct timespec ts_diff;
+ int ret, i;
+
+ /* Create VM with vCPUs and make some pre-initialization */
+ vm = pre_init_before_test(mode, arg);
+
+ vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+ TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+ host_quit = false;
+ *current_stage = KVM_BEFORE_MAPPINGS;
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker,
+ test_args.vcpus[i]);
+
+ vcpus_complete_new_stage(*current_stage);
+ pr_info("Started all vCPUs successfully\n");
+
+ /* Test the stage of KVM creating mappings */
+ *current_stage = KVM_CREATE_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Test the stage of KVM updating mappings */
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ *current_stage = KVM_UPDATE_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Test the stage of KVM adjusting mappings */
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+
+ *current_stage = KVM_ADJUST_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ for (i = 0; i < nr_vcpus; i++) {
+ ret = sem_post(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ ret = sem_destroy(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_destroy");
+
+ ret = sem_destroy(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_destroy");
+
+ free(vcpu_threads);
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p offset] [-m mode] "
+ "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
+ puts("");
+ printf(" -p: specify guest physical test memory offset\n"
+ " Warning: a low offset can conflict with the loaded test code.\n");
+ guest_modes_help();
+ printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run\n"
+ " (default: 1)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ struct test_params p = {
+ .test_mem_size = DEFAULT_TEST_MEM_SIZE,
+ .src_type = DEFAULT_VM_MEM_SRC,
+ };
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
+ switch (opt) {
+ case 'p':
+ p.phys_offset = strtoull(optarg, NULL, 0);
+ break;
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ p.test_mem_size = parse_size(optarg);
+ break;
+ case 'v':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ break;
+ case 's':
+ p.src_type = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(0);
+ }
+ }
+
+ for_each_guest_mode(run_test, &p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
deleted file mode 100644
index cee92d477dc0..000000000000
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ /dev/null
@@ -1,339 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AArch64 code
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-
-#include <linux/compiler.h>
-
-#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-#include "processor.h"
-
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
-
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
- TEST_ASSERT(vm->pgtable_levels == 4,
- "Mode %d does not have 4 page table levels", vm->mode);
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
- TEST_ASSERT(vm->pgtable_levels >= 3,
- "Mode %d does not have >= 3 page table levels", vm->mode);
-
- return (gva >> shift) & mask;
-}
-
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- return (gva >> vm->page_shift) & mask;
-}
-
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
-{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
-}
-
-static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
-{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
- return 1 << (vm->va_bits - shift);
-}
-
-static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
-{
- return 1 << (vm->page_shift - 3);
-}
-
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
-{
- if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_pages_alloc(vm,
- page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
- vm->pgd_created = true;
- }
-}
-
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
-{
- uint8_t attr_idx = flags & 7;
- uint64_t *ptep;
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
-
- switch (vm->pgtable_levels) {
- case 4:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
- /* fall through */
- case 3:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
- /* fall through */
- case 2:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
- break;
- default:
- TEST_FAIL("Page table levels must be 2, 3, or 4");
- }
-
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
-}
-
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
-{
- uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
-
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
-}
-
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint64_t *ptep;
-
- if (!vm->pgd_created)
- goto unmapped_gva;
-
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
-
- switch (vm->pgtable_levels) {
- case 4:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- /* fall through */
- case 3:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- /* fall through */
- case 2:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
- if (!ptep)
- goto unmapped_gva;
- break;
- default:
- TEST_FAIL("Page table levels must be 2, 3, or 4");
- }
-
- return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
-
-unmapped_gva:
- TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(1);
-}
-
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
-{
-#ifdef DEBUG
- static const char * const type[] = { "", "pud", "pmd", "pte" };
- uint64_t pte, *ptep;
-
- if (level == 4)
- return;
-
- for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
- ptep = addr_gpa2hva(vm, pte);
- if (!*ptep)
- continue;
- fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
- pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
- }
-#endif
-}
-
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- int level = 4 - (vm->pgtable_levels - 1);
- uint64_t pgd, *ptep;
-
- if (!vm->pgd_created)
- return;
-
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
- ptep = addr_gpa2hva(vm, pgd);
- if (!*ptep)
- continue;
- fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
- pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
- }
-}
-
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
-{
- struct kvm_vcpu_init default_init = { .target = -1, };
- uint64_t sctlr_el1, tcr_el1;
-
- if (!init)
- init = &default_init;
-
- if (init->target == -1) {
- struct kvm_vcpu_init preferred;
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
- init->target = preferred.target;
- }
-
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
-
- /*
- * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
- * registers, which the variable argument list macros do.
- */
- set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
-
- get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
- get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
-
- switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- TEST_FAIL("AArch64 does not support 4K sized pages "
- "with 52-bit physical address ranges");
- case VM_MODE_PXXV48_4K:
- TEST_FAIL("AArch64 does not support 4K sized pages "
- "with ANY-bit physical address ranges");
- case VM_MODE_P52V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
- break;
- case VM_MODE_P48V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
- break;
- case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
- break;
- case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
- case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
- }
-
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
-
- set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
-}
-
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
-{
- uint64_t pstate, pc;
-
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
-
- fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
- indent, "", pstate, pc);
-}
-
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code)
-{
- size_t stack_size = vm->page_size == 4096 ?
- DEFAULT_STACK_PGS * vm->page_size :
- vm->page_size;
- uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
-
- vm_vcpu_add(vm, vcpuid);
- aarch64_vcpu_setup(vm, vcpuid, init);
-
- set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-}
-
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
-{
- aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
-}
-
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
- va_list ap;
- int i;
-
- TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u\n", num);
-
- va_start(ap, num);
-
- for (i = 0; i < num; i++) {
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
- }
-
- va_end(ap);
-}
-
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
-{
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
deleted file mode 100644
index 2f37b90ee1a9..000000000000
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ /dev/null
@@ -1,115 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-
-static vm_vaddr_t *ucall_exit_mmio_addr;
-
-static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
-{
- if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
- return false;
-
- virt_pg_map(vm, gpa, gpa, 0);
-
- ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-
- return true;
-}
-
-void ucall_init(struct kvm_vm *vm, void *arg)
-{
- vm_paddr_t gpa, start, end, step, offset;
- unsigned int bits;
- bool ret;
-
- if (arg) {
- gpa = (vm_paddr_t)arg;
- ret = ucall_mmio_init(vm, gpa);
- TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
- return;
- }
-
- /*
- * Find an address within the allowed physical and virtual address
- * spaces, that does _not_ have a KVM memory region associated with
- * it. Identity mapping an address like this allows the guest to
- * access it, but as KVM doesn't know what to do with it, it
- * will assume it's something userspace handles and exit with
- * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
- * Here we start with a guess that the addresses around 5/8th
- * of the allowed space are unmapped and then work both down and
- * up from there in 1/16th allowed space sized steps.
- *
- * Note, we need to use VA-bits - 1 when calculating the allowed
- * virtual address space for an identity mapping because the upper
- * half of the virtual address space is the two's complement of the
- * lower and won't match physical addresses.
- */
- bits = vm->va_bits - 1;
- bits = vm->pa_bits < bits ? vm->pa_bits : bits;
- end = 1ul << bits;
- start = end * 5 / 8;
- step = end / 16;
- for (offset = 0; offset < end - start; offset += step) {
- if (ucall_mmio_init(vm, start - offset))
- return;
- if (ucall_mmio_init(vm, start + offset))
- return;
- }
- TEST_FAIL("Can't find a ucall mmio address");
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
- ucall_exit_mmio_addr = 0;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
-
- if (uc)
- memset(uc, 0, sizeof(*uc));
-
- if (run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
- vm_vaddr_t gva;
-
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
- "Unexpected ucall exit mmio address access");
- memcpy(&gva, run->mmio.data, sizeof(gva));
- memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
- }
-
- return ucall.cmd;
-}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
new file mode 100644
index 000000000000..b023868fe0b8
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu)
+{
+ gic_common_ops->gic_cpu_init(cpu);
+}
+
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
+{
+ const struct gic_common_ops *gic_ops = NULL;
+
+ spin_lock(&gic_lock);
+
+ /* Distributor initialization is needed only once per VM */
+ if (gic_common_ops) {
+ spin_unlock(&gic_lock);
+ return;
+ }
+
+ if (type == GIC_V3)
+ gic_ops = &gicv3_ops;
+
+ GUEST_ASSERT(gic_ops);
+
+ gic_ops->gic_init(nr_cpus);
+ gic_common_ops = gic_ops;
+
+ /* Make sure that the initialized data is visible to all the vCPUs */
+ dsb(sy);
+
+ spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ GUEST_ASSERT(type < GIC_TYPE_MAX);
+ GUEST_ASSERT(nr_cpus);
+
+ gic_dist_init(type, nr_cpus);
+ gic_cpu_init(cpu);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+ uint64_t irqstat;
+ unsigned int intid;
+
+ GUEST_ASSERT(gic_common_ops);
+
+ irqstat = gic_common_ops->gic_read_iar();
+ intid = irqstat & GENMASK(23, 0);
+
+ return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
new file mode 100644
index 000000000000..b6a7e30c3eb1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+ void (*gic_init)(unsigned int nr_cpus);
+ void (*gic_cpu_init)(unsigned int cpu);
+ void (*gic_irq_enable)(unsigned int intid);
+ void (*gic_irq_disable)(unsigned int intid);
+ uint64_t (*gic_read_iar)(void);
+ void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_irq_set_group)(uint32_t intid, bool group);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
new file mode 100644
index 000000000000..50754a27f493
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_private.h"
+
+#define GICV3_MAX_CPUS 512
+
+#define GICD_INT_DEF_PRI 0xa0
+#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
+ (GICD_INT_DEF_PRI << 16) |\
+ (GICD_INT_DEF_PRI << 8) |\
+ GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
+struct gicv3_data {
+ unsigned int nr_cpus;
+ unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
+
+enum gicv3_intid_range {
+ SGI_RANGE,
+ PPI_RANGE,
+ SPI_RANGE,
+ INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(cpu_or_dist);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+ switch (intid) {
+ case 0 ... 15:
+ return SGI_RANGE;
+ case 16 ... 31:
+ return PPI_RANGE;
+ case 32 ... 1019:
+ return SPI_RANGE;
+ }
+
+ /* We should not be reaching here */
+ GUEST_ASSERT(0);
+
+ return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+ uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+ dsb(sy);
+ return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+ isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /*
+ * All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ enum gicv3_intid_range intid_range = get_intid_range(intid);
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+ /*
+ * This function does not support 64 bit accesses. Just asserting here
+ * until we implement readq/writeq.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(volatile void *redist_base)
+{
+ uint32_t val = readl(redist_base + GICR_WAKER);
+ unsigned int count = 100000; /* 1s */
+
+ val &= ~GICR_WAKER_ProcessorSleep;
+ writel(val, redist_base + GICR_WAKER);
+
+ /* Wait until the processor is 'active' */
+ while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_set_group(uint32_t intid, bool grp)
+{
+ uint32_t cpu_or_dist;
+ uint32_t val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
+static void gicv3_cpu_init(unsigned int cpu)
+{
+ volatile void *sgi_base;
+ unsigned int i;
+ volatile void *redist_base_cpu;
+ u64 typer;
+
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ redist_base_cpu = gicr_base_cpu(cpu);
+ sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
+ gicv3_enable_redist(redist_base_cpu);
+
+ /*
+ * Mark all the SGI and PPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ writel(~0, sgi_base + GICR_IGROUPR0);
+ writel(~0, sgi_base + GICR_ICACTIVER0);
+ writel(~0, sgi_base + GICR_ICENABLER0);
+
+ /* Set a default priority for all the SGIs and PPIs */
+ for (i = 0; i < 32; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ sgi_base + GICR_IPRIORITYR0 + i);
+
+ gicv3_gicr_wait_for_rwp(cpu);
+
+ /* Enable the GIC system register (ICC_*) access */
+ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+ SYS_ICC_SRE_EL1);
+
+ /* Set a default priority threshold */
+ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
+ /* Enable non-secure Group-1 interrupts */
+ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
+}
+
+static void gicv3_dist_init(void)
+{
+ unsigned int i;
+
+ /* Disable the distributor until we set things up */
+ writel(0, GICD_BASE_GVA + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+
+ /*
+ * Mark all the SPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+ writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
+ }
+
+ /* Set a default priority for all the SPIs */
+ for (i = 32; i < gicv3_data.nr_spis; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ GICD_BASE_GVA + GICD_IPRIORITYR + i);
+
+ /* Wait for the settings to sync-in */
+ gicv3_gicd_wait_for_rwp();
+
+ /* Finally, enable the distributor globally with ARE */
+ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+ GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus)
+{
+ GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+ gicv3_data.nr_cpus = nr_cpus;
+ gicv3_data.nr_spis = GICD_TYPER_SPIS(
+ readl(GICD_BASE_GVA + GICD_TYPER));
+ if (gicv3_data.nr_spis > 1020)
+ gicv3_data.nr_spis = 1020;
+
+ /*
+ * Initialize only the distributor for now.
+ * The redistributor and CPU interfaces are initialized
+ * later for every PE.
+ */
+ gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+ .gic_init = gicv3_init,
+ .gic_cpu_init = gicv3_cpu_init,
+ .gic_irq_enable = gicv3_irq_enable,
+ .gic_irq_disable = gicv3_irq_disable,
+ .gic_read_iar = gicv3_read_iar,
+ .gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
+};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table)
+{
+ volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+ u32 ctlr;
+ u64 val;
+
+ val = (cfg_table |
+ GICR_PROPBASER_InnerShareable |
+ GICR_PROPBASER_RaWaWb |
+ ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+ writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+ val = (pend_table |
+ GICR_PENDBASER_InnerShareable |
+ GICR_PENDBASER_RaWaWb);
+ writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+ ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+ ctlr |= GICR_CTLR_ENABLE_LPIS;
+ writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
new file mode 100644
index 000000000000..7f9fdcf42ae6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
+static u64 its_read_u64(unsigned long offset)
+{
+ return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+ writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+ return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+ writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+ u64 baser;
+ unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+ baser = its_read_u64(offset);
+ if (GITS_BASER_TYPE(baser) == type)
+ return offset;
+ }
+
+ GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+ return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+ unsigned long offset = its_find_baser(type);
+ u64 baser;
+
+ baser = ((size / SZ_64K) - 1) |
+ GITS_BASER_PAGE_SIZE_64K |
+ GITS_BASER_InnerShareable |
+ base |
+ GITS_BASER_RaWaWb |
+ GITS_BASER_VALID;
+
+ its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+ u64 cbaser;
+
+ cbaser = ((size / SZ_4K) - 1) |
+ GITS_CBASER_InnerShareable |
+ base |
+ GITS_CBASER_RaWaWb |
+ GITS_CBASER_VALID;
+
+ its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size)
+{
+ u32 ctlr;
+
+ its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+ its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+ its_install_cmdq(cmdq, cmdq_size);
+
+ ctlr = its_read_u32(GITS_CTLR);
+ ctlr |= GITS_CTLR_ENABLE;
+ its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+ union {
+ u64 raw_cmd[4];
+ __le64 raw_cmd_le[4];
+ };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+ /* Let's fixup BE commands */
+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+ u64 mask = GENMASK_ULL(h, l);
+ *raw_cmd &= ~mask;
+ *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+ its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+ its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+ its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+ its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+ its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+ return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS 0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+ u64 cwriter = its_read_u64(GITS_CWRITER);
+ struct its_cmd_block *dst = cmdq_base + cwriter;
+ u64 cbaser = its_read_u64(GITS_CBASER);
+ size_t cmdq_size;
+ u64 next;
+ int i;
+
+ cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+ its_fixup_cmd(cmd);
+
+ WRITE_ONCE(*dst, *cmd);
+ dsb(ishst);
+ next = (cwriter + sizeof(*cmd)) % cmdq_size;
+ its_write_u64(GITS_CWRITER, next);
+
+ /*
+ * Polling isn't necessary considering KVM's ITS emulation at the time
+ * of writing this, as the CMDQ is processed synchronously after a write
+ * to CWRITER.
+ */
+ for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+ __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+ "ITS didn't process command at offset %lu after %d iterations\n",
+ cwriter, i);
+
+ cpu_relax();
+ }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPD);
+ its_encode_devid(&cmd, device_id);
+ its_encode_size(&cmd, ilog2(itt_size) - 1);
+ its_encode_itt(&cmd, itt_base);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPC);
+ its_encode_collection(&cmd, collection_id);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+ its_encode_devid(&cmd, device_id);
+ its_encode_event_id(&cmd, event_id);
+ its_encode_phys_id(&cmd, intid);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_INVALL);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
new file mode 100644
index 000000000000..d46e4b13b92c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -0,0 +1,732 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "guest_modes.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+#include "vgic.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels == 4,
+ "Mode %d does not have 4 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+ TEST_ASSERT(vm->pgtable_levels >= 3,
+ "Mode %d does not have >= 3 page table levels", vm->mode);
+
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> vm->page_shift) & mask;
+}
+
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+ return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+ (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+ uint64_t pte;
+
+ if (use_lpa2_pte_format(vm)) {
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
+ } else {
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
+ }
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+ uint64_t pa;
+
+ if (use_lpa2_pte_format(vm)) {
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
+ }
+
+ return pa;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+ unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
+}
+
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
+{
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+ break;
+ default:
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
+ }
+
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t attr_idx = MT_NORMAL;
+
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
+}
+
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ uint64_t *ptep;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ if (level == 0)
+ return ptep;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ if (level == 1)
+ break;
+ /* fall through */
+ case 3:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ if (level == 2)
+ break;
+ /* fall through */
+ case 2:
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ break;
+ default:
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
+ }
+
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return virt_get_pte_hva_at_level(vm, gva, 3);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char * const type[] = { "", "pud", "pmd", "pte" };
+ uint64_t pte, *ptep;
+
+ if (level == 4)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+ }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = 4 - (vm->pgtable_levels - 1);
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+ }
+}
+
+bool vm_supports_el2(struct kvm_vm *vm)
+{
+ const char *value = getenv("NV");
+
+ if (value && *value == '0')
+ return false;
+
+ return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic;
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init preferred = {};
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+ if (vm_supports_el2(vm))
+ preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ *init = preferred;
+}
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init default_init = { .target = -1, };
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
+
+ if (!init) {
+ kvm_get_default_vcpu_target(vm, &default_init);
+ init = &default_init;
+ }
+
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+ vcpu->init = *init;
+
+ /*
+ * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+ * registers, which the variable argument list macros do.
+ */
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20);
+
+ sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1));
+
+ /* Configure base granule size */
+ switch (vm->mode) {
+ case VM_MODE_PXXVYY_4K:
+ TEST_FAIL("AArch64 does not support 4K sized pages "
+ "with ANY-bit physical address ranges");
+ case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ tcr_el1 |= TCR_TG0_64K;
+ break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= TCR_TG0_16K;
+ break;
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
+ tcr_el1 |= TCR_TG0_4K;
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= TCR_IPS_52_BITS;
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P48V48_64K:
+ tcr_el1 |= TCR_IPS_48_BITS;
+ break;
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P40V48_64K:
+ tcr_el1 |= TCR_IPS_40_BITS;
+ break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= TCR_IPS_36_BITS;
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
+ if (use_lpa2_pte_format(vm))
+ tcr_el1 |= TCR_DS;
+
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+
+ if (!vcpu_has_el2(vcpu))
+ return;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+ HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ uint64_t pstate, pc;
+
+ pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+ pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+
+ fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
+ indent, "", pstate, pc);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ aarch64_vcpu_setup(vcpu, init);
+
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size);
+ return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ return __aarch64_vcpu_add(vm, vcpu_id, NULL);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ extern char vectors;
+
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = ESR_ELx_EC(read_sysreg(esr_el1));
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec <= ESR_ELx_EC_MAX);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+ uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+ if (gran == not_sup_val)
+ return 0;
+ else if (gran >= ipa52_min_val && vm_ipa >= 52)
+ return 52;
+ else
+ return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ uint32_t gran;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val);
+ *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+ ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val);
+ *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+ ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val);
+ *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+ ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+ guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ /*
+ * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+ * is [0, 2^(64 - TCR_EL1.T0SZ)).
+ */
+ sparsebit_set_num(vm->vpages_valid, 0,
+ (1ULL << vm->va_bits) >> vm->page_shift);
+}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+ asm volatile("wfi");
+}
+
+static bool request_mte;
+static bool request_vgic = true;
+
+void test_wants_mte(void)
+{
+ request_mte = true;
+}
+
+void test_disable_default_vgic(void)
+{
+ request_vgic = false;
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE))
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+
+ if (request_vgic && kvm_supports_vgic_v3()) {
+ vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64);
+ vm->arch.has_gic = true;
+ }
+}
+
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ __vgic_v3_init(vm->arch.gic_fd);
+}
+
+void kvm_arch_vm_release(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ close(vm->arch.gic_fd);
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
new file mode 100644
index 000000000000..a076e780be5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+ int val, res;
+
+ asm volatile(
+ "1: ldaxr %w0, [%2]\n"
+ " cbnz %w0, 1b\n"
+ " mov %w0, #1\n"
+ " stxr %w1, %w0, [%2]\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (val), "=&r" (res)
+ : "r" (&lock->v)
+ : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+ asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c
new file mode 100644
index 000000000000..ddab0ce89d4d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
new file mode 100644
index 000000000000..d0f7bd0984b8
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/cputype.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+bool kvm_supports_vgic_v3(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ kvm_vm_free(vm);
+
+ return !r;
+}
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ * vm - KVM VM
+ * nr_vcpus - Number of vCPUs supported by this VM
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+ int gic_fd;
+ uint64_t attr;
+ unsigned int nr_gic_pages;
+
+ /* Distributor setup */
+ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (gic_fd < 0)
+ return gic_fd;
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+ attr = GICD_BASE_GPA;
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+ virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
+
+ /* Redistributor setup */
+ attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+ KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+ virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
+
+ return gic_fd;
+}
+
+void __vgic_v3_init(int fd)
+{
+ kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+}
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+ unsigned int nr_vcpus_created = 0;
+ struct list_head *iter;
+ int fd;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs);
+ if (fd < 0)
+ return fd;
+
+ __vgic_v3_init(fd);
+ return fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+ uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu->id == 0);
+ attr += SZ_64K;
+ }
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+ /*
+ * All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_attr_get(gic_fd, group, attr, &val);
+ val |= 1ULL << index;
+ kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+ int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+ u64 attr;
+
+ attr = GITS_BASE_GPA;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+ vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+ return its_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 5ebbd0d6b472..b49690658c60 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
-
#include "test_util.h"
#include <execinfo.h>
@@ -22,7 +19,7 @@ static void test_dump_stack(void)
* Build and run this command:
*
* addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
- * grep -v test_dump_stack | cat -n 1>&2
+ * cat -n 1>&2
*
* Note that the spacing is different and there's no newline.
*/
@@ -36,18 +33,24 @@ static void test_dump_stack(void)
n * (((sizeof(void *)) * 2) + 1) +
/* Null terminator: */
1];
- char *c;
+ char *c = cmd;
n = backtrace(stack, n);
- c = &cmd[0];
- c += sprintf(c, "%s", addr2line);
/*
- * Skip the first 3 frames: backtrace, test_dump_stack, and
- * test_assert. We hope that backtrace isn't inlined and the other two
- * we've declared noinline.
+ * Skip the first 2 frames, which should be test_dump_stack() and
+ * test_assert(); both of which are declared noinline. Bail if the
+ * resulting stack trace would be empty. Otherwise, addr2line will block
+ * waiting for addresses to be passed in via stdin.
*/
+ if (n <= 2) {
+ fputs(" (stack trace empty)\n", stderr);
+ return;
+ }
+
+ c += sprintf(c, "%s", addr2line);
for (i = 2; i < n; i++)
c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+
c += sprintf(c, "%s", pipeline);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
@@ -71,9 +74,9 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
- " pid=%d tid=%d - %s\n",
+ " pid=%d tid=%d errno=%d - %s\n",
file, line, exp_str, getpid(), _gettid(),
- strerror(errno));
+ errno, strerror(errno));
test_dump_stack();
if (fmt) {
fputs(" ", stderr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..f34d926d9735 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -11,7 +11,6 @@
#include <linux/elf.h>
#include "kvm_util.h"
-#include "kvm_util_internal.h"
static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
{
@@ -91,6 +90,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
" hdrp->e_shentsize: %x\n"
" expected: %zx",
hdrp->e_shentsize, sizeof(Elf64_Shdr));
+ close(fd);
}
/* VM ELF Load
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -140,7 +139,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
offset_rv = lseek(fd, offset, SEEK_SET);
TEST_ASSERT(offset_rv == offset,
- "Failed to seek to begining of program header %u,\n"
+ "Failed to seek to beginning of program header %u,\n"
" filename: %s\n"
" rv: %jd errno: %i",
n1, filename, (intmax_t) offset_rv, errno);
@@ -158,14 +157,13 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"memsize of 0,\n"
" phdr index: %u p_memsz: 0x%" PRIx64,
n1, (uint64_t) phdr.p_memsz);
- vm_vaddr_t seg_vstart = phdr.p_vaddr;
- seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ MEM_REGION_CODE);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
@@ -186,11 +184,12 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"Seek to program segment offset failed,\n"
" program header idx: %u errno: %i\n"
" offset_rv: 0x%jx\n"
- " expected: 0x%jx\n",
+ " expected: 0x%jx",
n1, errno, (intmax_t) offset_rv,
(intmax_t) phdr.p_offset);
test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
phdr.p_filesz);
}
}
+ close(fd);
}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index 25bff307c71f..b04901e55138 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,22 +4,81 @@
*/
#include "guest_modes.h"
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
- guest_mode_append(VM_MODE_DEFAULT, true, true);
-
-#ifdef __aarch64__
- guest_mode_append(VM_MODE_P40V48_64K, true, true);
+#ifndef __aarch64__
+ guest_mode_append(VM_MODE_DEFAULT, true);
+#else
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
- if (limit >= 52)
- guest_mode_append(VM_MODE_P52V48_64K, true, true);
- if (limit >= 48) {
- guest_mode_append(VM_MODE_P48V48_4K, true, true);
- guest_mode_append(VM_MODE_P48V48_64K, true, true);
+ uint32_t ipa4k, ipa16k, ipa64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k);
+
+ guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52);
+ guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52);
+ guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52);
+
+ guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48);
+ guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48);
+ guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48);
+
+ guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40);
+ guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40);
+ guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40);
+
+ guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36);
+ guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36);
+ guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36);
+ guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36);
+
+ vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES;
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
}
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
+ }
+#endif
+#ifdef __s390x__
+ {
+ int kvm_fd, vm_fd;
+ struct kvm_s390_vm_cpu_processor info;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ kvm_device_attr_get(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ close(vm_fd);
+ close(kvm_fd);
+ /* Starting with z13 we have 47bits of physical address */
+ if (info.ibc >= 0x30)
+ guest_mode_append(VM_MODE_P47V64_4K, true);
+ }
+#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true);
}
#endif
}
@@ -64,7 +123,7 @@ void guest_modes_cmdline(const char *arg)
mode_selected = true;
}
- mode = strtoul(optarg, NULL, 10);
+ mode = atoi_non_negative("Guest mode ID", arg);
TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode);
guest_modes[mode].enabled = true;
}
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644
index 000000000000..74627514c4d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do { \
+ GUEST_ASSERT(str < end); \
+ *str++ = (v); \
+} while (0)
+
+static int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) \
+({ \
+ int __res; \
+ \
+ __res = ((uint64_t) n) % (uint32_t) base; \
+ n = ((uint64_t) n) / (uint32_t) base; \
+ __res; \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+ int precision, int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /*
+ * locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters
+ */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ if (sign)
+ APPEND_BUFFER_SAFE(str, end, sign);
+ if (type & SPECIAL) {
+ if (base == 8)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ else if (base == 16) {
+ APPEND_BUFFER_SAFE(str, end, '0');
+ APPEND_BUFFER_SAFE(str, end, 'x');
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, c);
+ while (i < precision--)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ while (i-- > 0)
+ APPEND_BUFFER_SAFE(str, end, tmp[i]);
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+
+ return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+ char *str, *end;
+ const char *s;
+ uint64_t num;
+ int i, base;
+ int len;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /*
+ * min. # of digits for integers; max
+ * number of chars for from string
+ */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ end = buf + n;
+ GUEST_ASSERT(buf < end);
+ GUEST_ASSERT(n > 0);
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /*
+ * Play nice with %llu, %llx, etc. KVM selftests only support
+ * 64-bit builds, so just treat %ll* the same as %l*.
+ */
+ if (qualifier == 'l' && *fmt == 'l')
+ ++fmt;
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ APPEND_BUFFER_SAFE(str, end,
+ (uint8_t)va_arg(args, int));
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ for (i = 0; i < len; ++i)
+ APPEND_BUFFER_SAFE(str, end, *s++);
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= SPECIAL | SMALL | ZEROPAD;
+ }
+ str = number(str, end,
+ (uint64_t)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ APPEND_BUFFER_SAFE(str, end, '%');
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ APPEND_BUFFER_SAFE(str, end, '%');
+ if (*fmt)
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, uint64_t);
+ else if (qualifier == 'h') {
+ num = (uint16_t)va_arg(args, int);
+ if (flags & SIGN)
+ num = (int16_t)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, uint32_t);
+ str = number(str, end, num, base, field_width, precision, flags);
+ }
+
+ GUEST_ASSERT(str < end);
+ *str = '\0';
+ return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+ va_list va;
+ int len;
+
+ va_start(va, fmt);
+ len = guest_vsnprintf(buf, n, fmt, va);
+ va_end(va);
+
+ return len;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d787cb802b4a..8279b6ced8d2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -4,206 +4,279 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
-#include "kvm_util_internal.h"
#include "processor.h"
+#include "ucall_common.h"
#include <assert.h>
+#include <sched.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <linux/kernel.h>
-#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
+uint32_t guest_random_seed;
+struct guest_random_state guest_rng;
+static uint32_t last_guest_seed;
+
+static size_t vcpu_mmap_sz(void);
+
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
+{
+ int fd;
+
+ fd = open(path, flags);
+ if (fd < 0)
+ goto error;
+
+ return fd;
+
+error:
+ if (errno == EACCES || errno == ENOENT)
+ ksft_exit_skip("- Cannot open '%s': %s. %s\n",
+ path, strerror(errno),
+ errno == EACCES ? "Root required?" : enoent_help);
+ TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
{
- size_t mask = size - 1;
- TEST_ASSERT(size != 0 && !(size & (size - 1)),
- "size not a power of 2: %lu", size);
- return (void *) (((size_t) x + mask) & ~mask);
+ return __open_path_or_exit(path, flags, "");
}
/*
- * Capability
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
* Input Args:
- * cap - Capability
- *
- * Output Args: None
+ * flags - The flags to pass when opening KVM_DEV_PATH.
*
* Return:
- * On success, the Value corresponding to the capability (KVM_CAP_*)
- * specified by the value of cap. On failure a TEST_ASSERT failure
- * is produced.
- *
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
+ * The opened file descriptor of /dev/kvm.
*/
-int kvm_check_cap(long cap)
+static int _open_kvm_dev_path_or_exit(int flags)
{
- int ret;
- int kvm_fd;
+ return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
+}
+
+int open_kvm_dev_path_or_exit(void)
+{
+ return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+static ssize_t get_module_param(const char *module_name, const char *param,
+ void *buffer, size_t buffer_size)
+{
+ const int path_size = 128;
+ char path[path_size];
+ ssize_t bytes_read;
+ int fd, r;
- ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+ /* Verify KVM is loaded, to provide a more helpful SKIP message. */
+ close(open_kvm_dev_path_or_exit());
- close(kvm_fd);
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
- return ret;
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ bytes_read = read(fd, buffer, buffer_size);
+ TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
+ path, bytes_read, buffer_size);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+ return bytes_read;
}
-/* VM Enable Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * cap - Capability
- *
- * Output Args: None
- *
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
- *
- * Enables a capability (KVM_CAP_*) on the VM.
- */
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+int kvm_get_module_param_integer(const char *module_name, const char *param)
{
- int ret;
+ /*
+ * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
+ * NUL char, and 1 byte because the kernel sucks and inserts a newline
+ * at the end.
+ */
+ char value[16 + 1 + 1];
+ ssize_t r;
- ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+ memset(value, '\0', sizeof(value));
- return ret;
+ r = get_module_param(module_name, param, value, sizeof(value));
+ TEST_ASSERT(value[r - 1] == '\n',
+ "Expected trailing newline, got char '%c'", value[r - 1]);
+
+ /*
+ * Squash the newline, otherwise atoi_paranoid() will complain about
+ * trailing non-NUL characters in the string.
+ */
+ value[r - 1] = '\0';
+ return atoi_paranoid(value);
+}
+
+bool kvm_get_module_param_bool(const char *module_name, const char *param)
+{
+ char value;
+ ssize_t r;
+
+ r = get_module_param(module_name, param, &value, sizeof(value));
+ TEST_ASSERT_EQ(r, 1);
+
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-/* VCPU Enable Capability
+/*
+ * Capability
*
* Input Args:
- * vm - Virtual Machine
- * vcpu_id - VCPU
* cap - Capability
*
* Output Args: None
*
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
*
- * Enables a capability (KVM_CAP_*) on the VCPU.
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
*/
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap)
+unsigned int kvm_check_cap(long cap)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
- int r;
+ int ret;
+ int kvm_fd;
- TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
+ kvm_fd = open_kvm_dev_path_or_exit();
+ ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
- r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
- " rc: %i, errno: %i", r, errno);
+ close(kvm_fd);
- return r;
+ return (unsigned int)ret;
}
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
{
- struct kvm_enable_cap cap = { 0 };
-
- cap.cap = KVM_CAP_DIRTY_LOG_RING;
- cap.args[0] = ring_size;
- vm_enable_cap(vm, &cap);
+ if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
+ else
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
vm->dirty_ring_size = ring_size;
}
-static void vm_open(struct kvm_vm *vm, int perm)
+static void vm_open(struct kvm_vm *vm)
{
- vm->kvm_fd = open(KVM_DEV_PATH, perm);
- if (vm->kvm_fd < 0)
- exit(KSFT_SKIP);
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- print_skip("immediate_exit not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
+
+ vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
+ TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
- vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
- TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
- "rc: %i errno: %i", vm->fd, errno);
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
-const char * const vm_guest_mode_string[] = {
- "PA-bits:52, VA-bits:48, 4K pages",
- "PA-bits:52, VA-bits:48, 64K pages",
- "PA-bits:48, VA-bits:48, 4K pages",
- "PA-bits:48, VA-bits:48, 64K pages",
- "PA-bits:40, VA-bits:48, 4K pages",
- "PA-bits:40, VA-bits:48, 64K pages",
- "PA-bits:ANY, VA-bits:48, 4K pages",
-};
-_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
- "Missing new mode strings?");
+const char *vm_guest_mode_string(uint32_t i)
+{
+ static const char * const strings[] = {
+ [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
+ [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",
+ [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
+ [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
+ [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
+ [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
+ [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
+ [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
+ [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ };
+ _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
+ "Missing new mode strings?");
+
+ TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
+
+ return strings[i];
+}
const struct vm_guest_mode_params vm_guest_mode_params[] = {
- { 52, 48, 0x1000, 12 },
- { 52, 48, 0x10000, 16 },
- { 48, 48, 0x1000, 12 },
- { 48, 48, 0x10000, 16 },
- { 40, 48, 0x1000, 12 },
- { 40, 48, 0x10000, 16 },
- { 0, 0, 0x1000, 12 },
+ [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
+ [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },
+ [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
+ [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
+ [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
+ [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
+ [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
+ [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
/*
- * VM Create
- *
- * Input Args:
- * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
- * phy_pages - Physical memory pages
- * perm - permission
- *
- * Output Args: None
+ * Initializes vm->vpages_valid to match the canonical VA space of the
+ * architecture.
*
- * Return:
- * Pointer to opaque structure that describes the created VM.
- *
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
- * When phy_pages is non-zero, a memory region of phy_pages physical pages
- * is created and mapped starting at guest physical address 0. The file
- * descriptor to control the created VM is created with the permissions
- * given by perm (e.g. O_RDWR).
+ * The default implementation is valid for architectures which split the
+ * range addressed by a single page table into a low and high region
+ * based on the MSB of the VA. On architectures with this behavior
+ * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
*/
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
{
- struct kvm_vm *vm;
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+}
- pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
- vm_guest_mode_string(mode), phy_pages, perm);
+struct kvm_vm *____vm_create(struct vm_shape shape)
+{
+ struct kvm_vm *vm;
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
- INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ vm->regions.gpa_tree = RB_ROOT;
+ vm->regions.hva_tree = RB_ROOT;
+ hash_init(vm->regions.slot_hash);
- vm->mode = mode;
- vm->type = 0;
+ vm->mode = shape.mode;
+ vm->type = shape.type;
- vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
- vm->va_bits = vm_guest_mode_params[mode].va_bits;
- vm->page_size = vm_guest_mode_params[mode].page_size;
- vm->page_shift = vm_guest_mode_params[mode].page_shift;
+ vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
+ vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
+ vm->page_size = vm_guest_mode_params[vm->mode].page_size;
+ vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;
/* Setup mode specific traits. */
switch (vm->mode) {
@@ -220,116 +293,256 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P47V47_16K:
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this VM_MODE.
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+ kvm_init_vm_address_properties(vm);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
+ case VM_MODE_P47V64_4K:
+ vm->pgtable_levels = 5;
+ break;
+ case VM_MODE_P44V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
+ TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
}
#ifdef __aarch64__
+ TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");
if (vm->pa_bits != 40)
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
#endif
- vm_open(vm, perm);
+ vm_open(vm);
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
- sparsebit_set_num(vm->vpages_valid,
- 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
- sparsebit_set_num(vm->vpages_valid,
- (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
- (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ vm_vaddr_populate_bitmap(vm);
/* Limit physical addresses to PA-bits. */
- vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+ vm->max_gfn = vm_compute_max_gfn(vm);
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
- if (phy_pages != 0)
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- 0, 0, phy_pages, 0);
return vm;
}
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t extra_mem_pages, uint32_t num_percpu_pages,
- void *guest_code, uint32_t vcpuids[])
+static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages)
{
- /* The maximum page table size for a memory region will be when the
- * smallest pages are used. Considering each page contains x page
+ uint64_t page_size = vm_guest_mode_params[mode].page_size;
+ uint64_t nr_pages;
+
+ TEST_ASSERT(nr_runnable_vcpus,
+ "Use vm_create_barebones() for VMs that _never_ have vCPUs");
+
+ TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+ "nr_vcpus = %d too large for host, max-vcpus = %d",
+ nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+ /*
+ * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the
+ * test code and other per-VM assets that will be loaded into memslot0.
+ */
+ nr_pages = 512;
+
+ /* Account for the per-vCPU stacks on behalf of the test. */
+ nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
+
+ /*
+ * Account for the number of pages needed for the page tables. The
+ * maximum page table size for a memory region will be when the
+ * smallest page size is used. Considering each page contains x page
* table descriptors, the total extra size for page tables (for extra
* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
* than N/x*2.
*/
- uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
- uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
- struct kvm_vm *vm;
- int i;
+ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
- TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
- "nr_vcpus = %d too large for host, max-vcpus = %d",
- nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+ /* Account for the number of pages needed by ucall. */
+ nr_pages += ucall_nr_pages_required(page_size);
+
+ return vm_adjust_num_guest_pages(mode, nr_pages);
+}
+
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
- pages = vm_adjust_num_guest_pages(mode, pages);
- vm = vm_create(mode, pages, O_RDWR);
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+}
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
#ifdef __x86_64__
- vm_create_irqchip(vm);
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
#endif
+}
+
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages)
+{
+ uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
+ nr_extra_pages);
+ struct userspace_mem_region *slot0;
+ struct kvm_vm *vm;
+ int i, flags;
- for (i = 0; i < nr_vcpus; ++i) {
- uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
+ kvm_set_files_rlimit(nr_runnable_vcpus);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
+ vm_guest_mode_string(shape.mode), shape.type, nr_pages);
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
-#endif
+ vm = ____vm_create(shape);
+
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ /*
+ * TODO: Add proper defines to protect the library's memslots, and then
+ * carve out memslot1 for the ucall MMIO address. KVM treats writes to
+ * read-only memslots as MMIO, and creating a read-only memslot for the
+ * MMIO region would prevent silently clobbering the MMIO region.
+ */
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ if (guest_random_seed != last_guest_seed) {
+ pr_info("Random seed: 0x%x\n", guest_random_seed);
+ last_guest_seed = guest_random_seed;
}
+ guest_rng = new_guest_random_state(guest_random_seed);
+ sync_global_to_guest(vm, guest_rng);
+
+ kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
return vm;
}
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[])
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ * nr_vcpus - VCPU count
+ * extra_mem_pages - Non-slot0 physical memory total size
+ * guest_code - Guest entry point
+ * vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[])
{
- return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
- num_percpu_pages, guest_code, vcpuids);
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
+
+ vm = __vm_create(shape, nr_vcpus, extra_mem_pages);
+
+ for (i = 0; i < nr_vcpus; ++i)
+ vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+
+ kvm_arch_vm_finalize_vcpus(vm);
+ return vm;
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
{
- return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
- (uint32_t []){ vcpuid });
+ struct kvm_vcpu *vcpus[1];
+ struct kvm_vm *vm;
+
+ vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);
+
+ *vcpu = vcpus[0];
+ return vm;
}
/*
@@ -337,7 +550,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
*
* Input Args:
* vm - VM that has been released before
- * perm - permission
*
* Output Args: None
*
@@ -345,17 +557,19 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
* global state, such as the irqchip and the memory regions that are mapped
* into the guest.
*/
-void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+void kvm_vm_restart(struct kvm_vm *vmp)
{
+ int ctr;
struct userspace_mem_region *region;
- vm_open(vmp, perm);
+ vm_open(vmp);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
- int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
+ int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
" guest_phys_addr: 0x%llx size: 0x%llx",
@@ -366,32 +580,86 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
}
}
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
- int ret;
+ return __vm_vcpu_add(vm, vcpu_id);
+}
- ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
+{
+ kvm_vm_restart(vm);
+
+ return vm_vcpu_recreate(vm, 0);
}
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
+int __pin_task_to_cpu(pthread_t task, int cpu)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
- int ret;
+ cpu_set_t cpuset;
- ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
}
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
{
- return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+ uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
+
+ TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
+ "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
+ return pcpu;
+}
+
+void kvm_print_vcpu_pinning_help(void)
+{
+ const char *name = program_invocation_name;
+
+ printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
+ " values (target pCPU), one for each vCPU, plus an optional\n"
+ " entry for the main application task (specified via entry\n"
+ " <nr_vcpus + 1>). If used, entries must be provided for all\n"
+ " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+ " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+ " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+ " %s -v 3 -c 22,23,24,50\n\n"
+ " To leave the application task unpinned, drop the final entry:\n\n"
+ " %s -v 3 -c 22,23,24\n\n"
+ " (default: no pinning)\n", name, name);
+}
+
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus)
+{
+ cpu_set_t allowed_mask;
+ char *cpu, *cpu_list;
+ char delim[2] = ",";
+ int i, r;
+
+ cpu_list = strdup(pcpus_string);
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.");
+
+ r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_getaffinity() failed");
+
+ cpu = strtok(cpu_list, delim);
+
+ /* 1. Get all pcpus for vcpus. */
+ for (i = 0; i < nr_vcpus; i++) {
+ TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
+ vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
+ cpu = strtok(NULL, delim);
+ }
+
+ /* 2. Check if the main worker needs to be pinned. */
+ if (cpu) {
+ pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
+ cpu = strtok(NULL, delim);
+ }
+
+ TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
+ free(cpu_list);
}
/*
@@ -416,74 +684,43 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ for (node = vm->regions.gpa_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
+
+ if (start < existing_start)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
}
return NULL;
}
-/*
- * KVM Userspace Memory Region Find
- *
- * Input Args:
- * vm - Virtual Machine
- * start - Starting VM physical address
- * end - Ending VM physical address, inclusive.
- *
- * Output Args: None
- *
- * Return:
- * Pointer to overlapping region, NULL if no such region.
- *
- * Public interface to userspace_mem_region_find. Allows tests to look up
- * the memslot datastructure for a given range of guest physical memory.
- */
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end)
+static void kvm_stats_release(struct kvm_binary_stats *stats)
{
- struct userspace_mem_region *region;
+ if (stats->fd < 0)
+ return;
- region = userspace_mem_region_find(vm, start, end);
- if (!region)
- return NULL;
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
- return &region->region;
+ kvm_close(stats->fd);
+ stats->fd = -1;
}
-/*
- * VCPU Find
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to VCPU structure
- *
- * Locates a vcpu structure that describes the VCPU specified by vcpuid and
- * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
- * for the specified vcpuid.
- */
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
+__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu;
-
- list_for_each_entry(vcpu, &vm->vcpus, list) {
- if (vcpu->id == vcpuid)
- return vcpu;
- }
- return NULL;
}
/*
@@ -498,60 +735,57 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
*
* Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
- "rc: %i errno: %i", ret, errno);
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
- TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
- close(vcpu->fd);
- TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
+
+ kvm_close(vcpu->fd);
+ kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
+
+ vcpu_arch_free(vcpu);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
- struct vcpu *vcpu, *tmp;
- int ret;
+ struct kvm_vcpu *vcpu, *tmp;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
- " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
- close(vmp->kvm_fd);
- TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
- " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
+
+ kvm_arch_vm_release(vmp);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
- list_del(&region->list);
-
- region->region.memory_size = 0;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+ sparsebit_free(&region->protected_phy_pages);
+ kvm_munmap(region->mmap_start, region->mmap_size);
+ if (region->fd >= 0) {
+ /* There's an extra map when using shared memory. */
+ kvm_munmap(region->mmap_alias, region->mmap_size);
+ close(region->fd);
+ }
+ if (region->region.guest_memfd >= 0)
+ close(region->region.guest_memfd);
free(region);
}
@@ -561,13 +795,15 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- struct userspace_mem_region *region, *tmp;
+ int ctr;
+ struct hlist_node *node;
+ struct userspace_mem_region *region;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
- list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
/* Free sparsebit arrays. */
@@ -580,141 +816,179 @@ void kvm_vm_free(struct kvm_vm *vmp)
free(vmp);
}
-/*
- * Memory Compare, host virtual to guest virtual
- *
- * Input Args:
- * hva - Starting host virtual address
- * vm - Virtual Machine
- * gva - Starting guest virtual address
- * len - number of bytes to compare
- *
- * Output Args: None
- *
- * Input/Output Args: None
- *
- * Return:
- * Returns 0 if the bytes starting at hva for a length of len
- * are equal the guest virtual bytes starting at gva. Returns
- * a value < 0, if bytes at hva are less than those at gva.
- * Otherwise a value > 0 is returned.
- *
- * Compares the bytes starting at the host virtual address hva, for
- * a length of len, to the guest bytes starting at the guest virtual
- * address given by gva.
- */
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+int kvm_memfd_alloc(size_t size, bool hugepages)
{
- size_t amt;
+ int memfd_flags = MFD_CLOEXEC;
+ int fd;
- /*
- * Compare a batch of bytes until either a match is found
- * or all the bytes have been compared.
- */
- for (uintptr_t offset = 0; offset < len; offset += amt) {
- uintptr_t ptr1 = (uintptr_t)hva + offset;
+ if (hugepages)
+ memfd_flags |= MFD_HUGETLB;
- /*
- * Determine host address for guest virtual address
- * at offset.
- */
- uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+ fd = memfd_create("kvm_selftest", memfd_flags);
+ TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- /*
- * Determine amount to compare on this pass.
- * Don't allow the comparsion to cross a page boundary.
- */
- amt = len - offset;
- if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
- amt = vm->page_size - (ptr1 % vm->page_size);
- if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
- amt = vm->page_size - (ptr2 % vm->page_size);
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
+
+ return fd;
+}
- assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
- assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
- /*
- * Perform the comparison. If there is a difference
- * return that result to the caller, otherwise need
- * to continue on looking for a mismatch.
- */
- int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
- if (ret != 0)
- return ret;
+ for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), gpa_node);
+ parent = *cur;
+ if (region->region.guest_phys_addr <
+ cregion->region.guest_phys_addr)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->region.guest_phys_addr !=
+ cregion->region.guest_phys_addr,
+ "Duplicate GPA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
}
- /*
- * No mismatch found. Let the caller know the two memory
- * areas are equal.
- */
- return 0;
+ rb_link_node(&region->gpa_node, parent, cur);
+ rb_insert_color(&region->gpa_node, gpa_tree);
}
-/*
- * VM Userspace Memory Region Add
- *
- * Input Args:
- * vm - Virtual Machine
- * backing_src - Storage source for this region.
- * NULL to use anonymous memory.
- * guest_paddr - Starting guest physical address
- * slot - KVM region slot
- * npages - Number of physical pages
- * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
- *
- * Output Args: None
- *
- * Return: None
- *
- * Allocates a memory area of the number of pages specified by npages
- * and maps it to the VM specified by vm, at a starting physical address
- * given by guest_paddr. The region is created with a KVM region slot
- * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
- * region is created with the flags given by flags.
- */
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags)
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), hva_node);
+ parent = *cur;
+ if (region->host_mem < cregion->host_mem)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->host_mem !=
+ cregion->host_mem,
+ "Duplicate HVA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->hva_node, parent, cur);
+ rb_insert_color(&region->hva_node, hva_tree);
+}
+
+
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ struct kvm_userspace_memory_region region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ };
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
+ __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
+ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
+
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ struct kvm_userspace_memory_region2 region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ .guest_memfd = guest_memfd,
+ .guest_memfd_offset = guest_memfd_offset,
+ };
+
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
+}
+
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
+ guest_memfd, guest_memfd_offset);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+
+/* FIXME: This thing needs to be ripped apart and rewritten. */
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
- size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+ size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
+ size_t mem_size = npages * vm->page_size;
size_t alignment;
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ slot) {
if (region->region.slot != slot)
continue;
@@ -722,8 +996,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
+ slot, gpa, npages, region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
}
@@ -731,7 +1004,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
TEST_ASSERT(region != NULL, "Insufficient Memory");
- region->mmap_size = npages * vm->page_size;
+ region->mmap_size = mem_size;
#ifdef __s390x__
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
@@ -740,62 +1013,112 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
alignment = 1;
#endif
+ /*
+ * When using THP mmap is not guaranteed to returned a hugepage aligned
+ * address so we have to pad the mmap. Padding is not needed for HugeTLB
+ * because mmap will always return an address aligned to the HugeTLB
+ * page size.
+ */
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
- alignment = max(huge_page_size, alignment);
+ alignment = max(backing_src_pagesz, alignment);
+
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
region->mmap_size += alignment;
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS
- | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
- -1, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- "test_malloc failed, mmap_start: %p errno: %i",
- region->mmap_start, errno);
+ region->fd = -1;
+ if (backing_src_is_shared(src_type))
+ region->fd = kvm_memfd_alloc(region->mmap_size,
+ src_type == VM_MEM_SRC_SHARED_HUGETLB);
- /* Align host address */
- region->host_mem = align(region->mmap_start, alignment);
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
- /* As needed perform madvise */
- if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
- struct stat statbuf;
+ TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+ region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+ "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+ region->mmap_start, backing_src_pagesz);
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
- TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "stat /sys/kernel/mm/transparent_hugepage");
+ /* Align host address */
+ region->host_mem = align_ptr_up(region->mmap_start, alignment);
- TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
- "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
+ /* As needed perform madvise */
+ if ((src_type == VM_MEM_SRC_ANONYMOUS ||
+ src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
+ ret = madvise(region->host_mem, mem_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
+ region->host_mem, mem_size,
+ vm_mem_backing_src_alias(src_type)->name);
+ }
- if (ret == 0) {
- ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
- TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
- region->host_mem, npages * vm->page_size, src_type);
+ region->backing_src_type = src_type;
+
+ if (flags & KVM_MEM_GUEST_MEMFD) {
+ if (guest_memfd < 0) {
+ uint32_t guest_memfd_flags = 0;
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = kvm_dup(guest_memfd);
}
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
}
region->unused_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ if (vm_arch_has_protected_memory(vm))
+ region->protected_phy_pages = sparsebit_alloc();
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
+ region->region.guest_memfd);
+
+ /* Add to quick lookup data structures */
+ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+ vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+ hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+ /* If shared memory, create an alias. */
+ if (region->fd >= 0) {
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
+
+ /* Align host alias address */
+ region->host_alias = align_ptr_up(region->mmap_alias, alignment);
+ }
+}
- /* Add to linked-list of memory regions. */
- list_add(&region->list, &vm->userspace_mem_regions);
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -818,10 +1141,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ memslot)
if (region->region.slot == memslot)
return region;
- }
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
@@ -854,13 +1177,23 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
region->region.flags = flags;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i slot: %u flags: 0x%x",
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -884,9 +1217,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
region->region.guest_phys_addr = new_gpa;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"
"ret: %i errno: %i slot: %u new_gpa: 0x%lx",
ret, errno, slot, new_gpa);
}
@@ -906,85 +1239,105 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+
+ region->region.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+
+ __vm_mem_region_delete(vm, region);
}
-/*
- * VCPU mmap Size
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return:
- * Size of VCPU state
- *
- * Returns the size of the structure pointed to by the return value
- * of vcpu_state().
- */
-static int vcpu_mmap_sz(void)
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
+ bool punch_hole)
+{
+ const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
+ struct userspace_mem_region *region;
+ uint64_t end = base + size;
+ uint64_t gpa, len;
+ off_t fd_offset;
+ int ret;
+
+ for (gpa = base; gpa < end; gpa += len) {
+ uint64_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
+ "Private memory region not found for GPA 0x%lx", gpa);
+
+ offset = gpa - region->region.guest_phys_addr;
+ fd_offset = region->region.guest_memfd_offset + offset;
+ len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
+
+ ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
+ TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
+ punch_hole ? "punch hole" : "allocate", gpa, len,
+ region->region.guest_memfd, mode, fd_offset);
+ }
+}
+
+/* Returns the size of a vCPU's kvm_run structure. */
+static size_t vcpu_mmap_sz(void)
{
int dev_fd, ret;
- dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (dev_fd < 0)
- exit(KSFT_SKIP);
+ dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
- TEST_ASSERT(ret >= sizeof(struct kvm_run),
- "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
- __func__, ret, errno);
+ TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
+ KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
return ret;
}
+static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu;
+
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpu_id)
+ return true;
+ }
+
+ return false;
+}
+
/*
- * VM VCPU Add
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
- * No additional VCPU setup is done.
+ * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
+ * No additional vCPU setup is done. Returns the vCPU.
*/
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
- vcpu = vcpu_find(vm, vcpuid);
- if (vcpu != NULL)
- TEST_FAIL("vcpu with the specified id "
- "already exists,\n"
- " requested vcpuid: %u\n"
- " existing vcpuid: %u state: %p",
- vcpuid, vcpu->id, vcpu->state);
+ TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
- vcpu->id = vcpuid;
- vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
- TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
- vcpu->fd, errno);
-
- TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
- "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
- vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
- "vcpu id: %u errno: %i", vcpuid, errno);
+
+ vcpu->vm = vm;
+ vcpu->id = vcpu_id;
+ vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
+ TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->run));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
+
+ return vcpu;
}
/*
@@ -1007,8 +1360,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
* TEST_ASSERT failure occurs for invalid input or no area of at least
* sz unallocated bytes >= vaddr_min is available.
*/
-static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min)
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
@@ -1073,6 +1426,48 @@ va_found:
return pgidx_start * vm->page_size;
}
+static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type,
+ bool protected)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm);
+ vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size,
+ vm->memslots[type], protected);
+
+ /*
+ * Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size, paddr += vm->page_size) {
+
+ virt_pg_map(vm, vaddr, paddr);
+ }
+
+ return vaddr_start;
+}
+
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
+ vm_arch_has_protected_memory(vm));
+}
+
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+}
+
/*
* VM Virtual Address Allocate
*
@@ -1080,8 +1475,6 @@ va_found:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
- * data_memslot - Memory region slot for data pages
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1092,36 +1485,54 @@ va_found:
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
- * a page.
+ * a page. The allocated physical space comes from the TEST_DATA memory region.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
- uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
-
- virt_pgd_alloc(vm, pgd_memslot);
-
- /*
- * Find an unused range of virtual page addresses of at least
- * pages in length.
- */
- vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
-
- /* Map the virtual pages. */
- for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size) {
- vm_paddr_t paddr;
-
- paddr = vm_phy_page_alloc(vm,
- KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+ return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
+}
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
- sparsebit_set(vm->vpages_mapped,
- vaddr >> vm->page_shift);
- }
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
+{
+ return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
+}
- return vaddr_start;
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
}
/*
@@ -1132,7 +1543,6 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* vaddr - Virtuall address to map
* paddr - VM Physical Address
* npages - The number of pages to map
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1142,7 +1552,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1151,7 +1561,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
+
vaddr += page_size;
paddr += page_size;
}
@@ -1178,16 +1589,16 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((gpa >= region->region.guest_phys_addr)
- && (gpa <= (region->region.guest_phys_addr
- + region->region.memory_size - 1)))
- return (void *) ((uintptr_t) region->host_mem
- + (gpa - region->region.guest_phys_addr));
+ gpa = vm_untag_gpa(vm, gpa);
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region) {
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ return NULL;
}
- TEST_FAIL("No vm physical memory at 0x%lx", gpa);
- return NULL;
+ return (void *)((uintptr_t)region->host_mem
+ + (gpa - region->region.guest_phys_addr));
}
/*
@@ -1209,15 +1620,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
+
+ for (node = vm->regions.hva_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, hva_node);
+
+ if (hva >= region->host_mem) {
+ if (hva <= (region->host_mem
+ + region->region.memory_size - 1))
+ return (vm_paddr_t)((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t)region->host_mem));
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((hva >= region->host_mem)
- && (hva <= (region->host_mem
- + region->region.memory_size - 1)))
- return (vm_paddr_t) ((uintptr_t)
- region->region.guest_phys_addr
- + (hva - (uintptr_t) region->host_mem));
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1225,505 +1643,276 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
}
/*
- * VM Create IRQ Chip
+ * Address VM physical to Host Virtual *alias*.
*
* Input Args:
* vm - Virtual Machine
+ * gpa - VM physical address
*
* Output Args: None
*
- * Return: None
- *
- * Creates an interrupt controller chip for the VM specified by vm.
+ * Return:
+ * Equivalent address within the host virtual *alias* area, or NULL
+ * (without failing the test) if the guest memory is not shared (so
+ * no alias exists).
+ *
+ * Create a writable, shared virtual=>physical alias for the specific GPA.
+ * The primary use case is to allow the host selftest to manipulate guest
+ * memory without mapping said memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, to do so without triggering userfaults.
*/
-void vm_create_irqchip(struct kvm_vm *vm)
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
{
- int ret;
+ struct userspace_mem_region *region;
+ uintptr_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region)
+ return NULL;
- ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
- TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ if (!region->host_alias)
+ return NULL;
- vm->has_irqchip = true;
+ offset = gpa - region->region.guest_phys_addr;
+ return (void *) ((uintptr_t) region->host_alias + offset);
}
-/*
- * VM VCPU State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to structure that describes the state of the VCPU.
- *
- * Locates and returns a pointer to a structure that describes the
- * state of the VCPU with the given vcpuid.
- */
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+/* Create an interrupt controller chip for the specified VM. */
+void vm_create_irqchip(struct kvm_vm *vm)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ int r;
- return vcpu->state;
-}
+ /*
+ * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+ * split model (x86 only) if that fails (KVM x86 allows compiling out
+ * support for KVM_CREATE_IRQCHIP).
+ */
+ r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
-/*
- * VM VCPU Run
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Switch to executing the code for the VCPU given by vcpuid, within the VM
- * given by vm.
- */
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
-{
- int ret = _vcpu_run(vm, vcpuid);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm->has_irqchip = true;
}
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+int _vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
do {
- rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ rc = __vcpu_run(vcpu);
} while (rc == -1 && errno == EINTR);
- assert_on_unhandled_exception(vm, vcpuid);
+ if (!rc)
+ assert_on_unhandled_exception(vcpu);
return rc;
}
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+/*
+ * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.
+ * Assert if the KVM returns an error (other than -EINTR).
+ */
+void vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret = _vcpu_run(vcpu);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- return vcpu->fd;
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
}
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- vcpu->state->immediate_exit = 1;
- ret = ioctl(vcpu->fd, KVM_RUN, NULL);
- vcpu->state->immediate_exit = 0;
+ vcpu->run->immediate_exit = 1;
+ ret = __vcpu_run(vcpu);
+ vcpu->run->immediate_exit = 0;
TEST_ASSERT(ret == -1 && errno == EINTR,
"KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
ret, errno);
}
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
-
- TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
-}
-
-/*
- * VM VCPU Set MP State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * mp_state - mp_state to be set
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the MP state of the VCPU given by vcpuid, to the state given
- * by mp_state.
- */
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
- TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-}
-
/*
- * VM VCPU Get Reg List
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * None
- *
- * Return:
- * A pointer to an allocated struct kvm_reg_list
- *
* Get the list of guest registers which are supported for
- * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,
+ * it is the caller's responsibility to free the list.
*/
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
{
struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
int ret;
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+ ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &reg_list_n);
TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+
reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
reg_list->n = reg_list_n.n;
- vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
return reg_list;
}
-/*
- * VM VCPU Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * regs - current state of VCPU regs
- *
- * Return: None
- *
- * Obtains the current register state for the VCPU specified by vcpuid
- * and stores it at the location given by regs.
- */
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
- ret, errno);
-}
+ uint32_t page_size = getpagesize();
+ uint32_t size = vcpu->vm->dirty_ring_size;
-/*
- * VM VCPU Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * regs - Values to set VCPU regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the regs of the VCPU specified by vcpuid to the values
- * given by regs.
- */
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
- ret, errno);
-}
-
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-#endif
-
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ TEST_ASSERT(size > 0, "Should enable dirty ring first");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ if (!vcpu->dirty_gfns) {
+ void *addr;
- ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
-}
+ addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
- if (!ignore_error) {
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
+ vcpu->dirty_gfns = addr;
+ vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
}
- return ret;
-}
-#endif
-
-/*
- * VM VCPU System Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * sregs - current state of VCPU system regs
- *
- * Return: None
- *
- * Obtains the current system register state for the VCPU specified by
- * vcpuid and stores it at the location given by sregs.
- */
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
- TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
- ret, errno);
+ return vcpu->dirty_gfns;
}
/*
- * VM VCPU System Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * sregs - Values to set VCPU system regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the system regs of the VCPU specified by vcpuid to the values
- * given by sregs.
+ * Device Ioctl
*/
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-}
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_device_attr attribute = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ };
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+ return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
}
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
{
- int ret;
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .flags = KVM_CREATE_DEVICE_TEST,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
}
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
{
- int ret;
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .fd = -1,
+ .flags = 0,
+ };
+ int err;
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
+ TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
+ return err ? : create_dev.fd;
}
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
- int ret;
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
}
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
- int ret;
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
}
/*
- * VCPU Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VCPU fd.
+ * IRQ related functions.
*/
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- int ret;
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
- TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
}
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ int ret = _kvm_irq_line(vm, irq, level);
- ret = ioctl(vcpu->fd, cmd, arg);
-
- return ret;
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
{
- struct vcpu *vcpu;
- uint32_t size = vm->dirty_ring_size;
-
- TEST_ASSERT(size > 0, "Should enable dirty ring first");
-
- vcpu = vcpu_find(vm, vcpuid);
-
- TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
+ struct kvm_irq_routing *routing;
+ size_t size;
- if (!vcpu->dirty_gfns) {
- void *addr;
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
- addr = mmap(NULL, size, PROT_READ,
- MAP_PRIVATE, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
-
- addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
- MAP_PRIVATE, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
+ return routing;
+}
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_SHARED, vcpu->fd,
- vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
+{
+ int i;
- vcpu->dirty_gfns = addr;
- vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
- }
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
- return vcpu->dirty_gfns;
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
}
-/*
- * VM Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VM fd.
- */
-void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
int ret;
- ret = ioctl(vm->fd, cmd, arg);
- TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ assert(routing);
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ free(routing);
+
+ return ret;
}
-/*
- * KVM system ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a KVM fd.
- */
-void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
int ret;
- ret = ioctl(vm->kvm_fd, cmd, arg);
- TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
-}
-
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
-{
- return ioctl(vm->kvm_fd, cmd, arg);
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
}
/*
@@ -1743,14 +1932,15 @@ int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ int ctr;
struct userspace_mem_region *region;
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1758,6 +1948,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
region->host_mem);
fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
sparsebit_dump(stream, region->unused_phy_pages, 0);
+ if (region->protected_phy_pages) {
+ fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->protected_phy_pages, 0);
+ }
}
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
@@ -1769,42 +1963,59 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
+
list_for_each_entry(vcpu, &vm->vcpus, list)
- vcpu_dump(stream, vm, vcpu->id, indent + 2);
+ vcpu_dump(stream, vcpu, indent + 2);
}
+#define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}
+
/* Known KVM exit reasons */
static struct exit_reason {
unsigned int reason;
const char *name;
} exit_reasons_known[] = {
- {KVM_EXIT_UNKNOWN, "UNKNOWN"},
- {KVM_EXIT_EXCEPTION, "EXCEPTION"},
- {KVM_EXIT_IO, "IO"},
- {KVM_EXIT_HYPERCALL, "HYPERCALL"},
- {KVM_EXIT_DEBUG, "DEBUG"},
- {KVM_EXIT_HLT, "HLT"},
- {KVM_EXIT_MMIO, "MMIO"},
- {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
- {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
- {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
- {KVM_EXIT_INTR, "INTR"},
- {KVM_EXIT_SET_TPR, "SET_TPR"},
- {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
- {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
- {KVM_EXIT_S390_RESET, "S390_RESET"},
- {KVM_EXIT_DCR, "DCR"},
- {KVM_EXIT_NMI, "NMI"},
- {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
- {KVM_EXIT_OSI, "OSI"},
- {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
- {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
- {KVM_EXIT_X86_RDMSR, "RDMSR"},
- {KVM_EXIT_X86_WRMSR, "WRMSR"},
- {KVM_EXIT_XEN, "XEN"},
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
-#endif
+ KVM_EXIT_STRING(UNKNOWN),
+ KVM_EXIT_STRING(EXCEPTION),
+ KVM_EXIT_STRING(IO),
+ KVM_EXIT_STRING(HYPERCALL),
+ KVM_EXIT_STRING(DEBUG),
+ KVM_EXIT_STRING(HLT),
+ KVM_EXIT_STRING(MMIO),
+ KVM_EXIT_STRING(IRQ_WINDOW_OPEN),
+ KVM_EXIT_STRING(SHUTDOWN),
+ KVM_EXIT_STRING(FAIL_ENTRY),
+ KVM_EXIT_STRING(INTR),
+ KVM_EXIT_STRING(SET_TPR),
+ KVM_EXIT_STRING(TPR_ACCESS),
+ KVM_EXIT_STRING(S390_SIEIC),
+ KVM_EXIT_STRING(S390_RESET),
+ KVM_EXIT_STRING(DCR),
+ KVM_EXIT_STRING(NMI),
+ KVM_EXIT_STRING(INTERNAL_ERROR),
+ KVM_EXIT_STRING(OSI),
+ KVM_EXIT_STRING(PAPR_HCALL),
+ KVM_EXIT_STRING(S390_UCONTROL),
+ KVM_EXIT_STRING(WATCHDOG),
+ KVM_EXIT_STRING(S390_TSCH),
+ KVM_EXIT_STRING(EPR),
+ KVM_EXIT_STRING(SYSTEM_EVENT),
+ KVM_EXIT_STRING(S390_STSI),
+ KVM_EXIT_STRING(IOAPIC_EOI),
+ KVM_EXIT_STRING(HYPERV),
+ KVM_EXIT_STRING(ARM_NISV),
+ KVM_EXIT_STRING(X86_RDMSR),
+ KVM_EXIT_STRING(X86_WRMSR),
+ KVM_EXIT_STRING(DIRTY_RING_FULL),
+ KVM_EXIT_STRING(AP_RESET_HOLD),
+ KVM_EXIT_STRING(X86_BUS_LOCK),
+ KVM_EXIT_STRING(XEN),
+ KVM_EXIT_STRING(RISCV_SBI),
+ KVM_EXIT_STRING(RISCV_CSR),
+ KVM_EXIT_STRING(NOTIFY),
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -1842,6 +2053,7 @@ const char *exit_reason_str(unsigned int exit_reason)
* num - number of pages
* paddr_min - Physical address minimum
* memslot - Memory region to allocate page from
+ * protected - True if the pages will be used as protected/private memory
*
* Output Args: None
*
@@ -1853,8 +2065,9 @@ const char *exit_reason_str(unsigned int exit_reason)
* and their base address is returned. A TEST_ASSERT failure occurs if
* not enough pages are available at or above paddr_min.
*/
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected)
{
struct userspace_mem_region *region;
sparsebit_idx_t pg, base;
@@ -1867,8 +2080,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
paddr_min, vm->page_size);
region = memslot2region(vm, memslot);
- base = pg = paddr_min >> vm->page_shift;
+ TEST_ASSERT(!protected || region->protected_phy_pages,
+ "Region doesn't support protected memory");
+ base = pg = paddr_min >> vm->page_shift;
do {
for (; pg < base + num; ++pg) {
if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -1887,8 +2102,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
abort();
}
- for (pg = base; pg < base + num; ++pg)
+ for (pg = base; pg < base + num; ++pg) {
sparsebit_clear(region->unused_phy_pages, pg);
+ if (protected)
+ sparsebit_set(region->protected_phy_pages, pg);
+ }
return base * vm->page_size;
}
@@ -1899,6 +2117,12 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -1916,60 +2140,9 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
-/*
- * Is Unrestricted Guest
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
- *
- * Check if the unrestricted guest flag is enabled.
- */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- char val = 'N';
- size_t count;
- FILE *f;
-
- if (vm == NULL) {
- /* Ensure that the KVM vendor-specific module is loaded. */
- f = fopen(KVM_DEV_PATH, "r");
- TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
- errno);
- fclose(f);
- }
-
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
-}
-
-unsigned int vm_get_page_size(struct kvm_vm *vm)
-{
- return vm->page_size;
-}
-
-unsigned int vm_get_page_shift(struct kvm_vm *vm)
-{
- return vm->page_shift;
-}
-
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
-{
- return vm->max_gfn;
-}
-
-int vm_get_fd(struct kvm_vm *vm)
+unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
{
- return vm->fd;
+ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
static unsigned int vm_calc_num_pages(unsigned int num_pages,
@@ -2011,3 +2184,169 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+/*
+ * Read binary stats descriptors
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ *
+ * Output Args: None
+ *
+ * Return:
+ * A pointer to a newly allocated series of stat descriptors.
+ * Caller is responsible for freeing the returned kvm_stats_desc.
+ *
+ * Read the stats descriptors from the binary stats interface.
+ */
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header)
+{
+ struct kvm_stats_desc *stats_desc;
+ ssize_t desc_size, total_size, ret;
+
+ desc_size = get_stats_descriptor_size(header);
+ total_size = header->num_desc * desc_size;
+
+ stats_desc = calloc(header->num_desc, desc_size);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+
+ ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
+ TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
+
+ return stats_desc;
+}
+
+/*
+ * Read stat data for a particular stat
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ * desc - the binary stat metadata for the particular stat to be read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements)
+{
+ size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
+ size_t size = nr_elements * sizeof(*data);
+ ssize_t ret;
+
+ TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
+ TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
+
+ ret = pread(stats_fd, data, size,
+ header->data_offset + desc->offset);
+
+ TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
+ desc->name, errno, strerror(errno));
+ TEST_ASSERT(ret == size,
+ "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
+ desc->name, size, ret);
+}
+
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements)
+{
+ struct kvm_stats_desc *desc;
+ size_t size_desc;
+ int i;
+
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
+ }
+
+ size_desc = get_stats_descriptor_size(&stats->header);
+
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
+
+ if (strcmp(desc->name, name))
+ continue;
+
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
+ }
+
+ TEST_FAIL("Unable to find stat '%s'", name);
+}
+
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_arch_vm_release(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_selftest_arch_init(void)
+{
+}
+
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
+void __attribute((constructor)) kvm_selftest_init(void)
+{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
+ /* Tell stdout not to buffer its content. */
+ setbuf(stdout, NULL);
+
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
+ guest_random_seed = last_guest_seed = random();
+ pr_info("Random seed: 0x%x\n", guest_random_seed);
+
+ kvm_selftest_arch_init();
+}
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+{
+ sparsebit_idx_t pg = 0;
+ struct userspace_mem_region *region;
+
+ if (!vm_arch_has_protected_memory(vm))
+ return false;
+
+ region = userspace_mem_region_find(vm, paddr, paddr);
+ TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+
+ pg = paddr >> vm->page_shift;
+ return sparsebit_is_set(region->protected_phy_pages, pg);
+}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
deleted file mode 100644
index 34465dc562d8..000000000000
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/lib/kvm_util_internal.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
-#define SELFTEST_KVM_UTIL_INTERNAL_H
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region region;
- struct sparsebit *unused_phy_pages;
- int fd;
- off_t offset;
- void *host_mem;
- void *mmap_start;
- size_t mmap_size;
- struct list_head list;
-};
-
-struct vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_run *state;
- struct kvm_dirty_gfn *dirty_gfns;
- uint32_t fetch_index;
- uint32_t dirty_gfns_count;
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct list_head userspace_mem_regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
- vm_vaddr_t handlers;
- uint32_t dirty_ring_size;
-};
-
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * regs - Registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the registers given by @regs, to the FILE stream
- * given by @stream.
- */
-void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
-
-/*
- * System Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * sregs - System registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the system registers given by @sregs, to the FILE stream
- * given by @stream.
- */
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..3f1e4b67c5ae
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..07c103369ddb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include <asm/kvm.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static vm_paddr_t invalid_pgtable[4];
+static vm_vaddr_t exception_handlers;
+
+static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ unsigned int shift;
+ uint64_t mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child)
+{
+ uint64_t *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ vm_paddr_t child, table;
+
+ if (vm->pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->pgd = table;
+ vm->pgd_created = true;
+}
+
+static int virt_pte_none(uint64_t *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
+{
+ int level;
+ uint64_t *ptep;
+ vm_paddr_t child;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ child = vm->pgd;
+ level = vm->pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint32_t prot_bits;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, vaddr, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, paddr | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+ uint64_t pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level;
+
+ if (!vm->pgd_created)
+ return;
+
+ level = vm->pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ int vector;
+ unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, uint64_t);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ /* time count start from 0 */
+ val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
+ width = vm->page_shift - 3;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_vaddr_alloc(vm, vm->page_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_vaddr + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (uint64_t)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..fc6cbb50573f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
new file mode 100644
index 000000000000..557c0a0a5658
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ */
+#include <inttypes.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+struct memstress_args memstress_args;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_thread {
+ /* The index of the vCPU. */
+ int vcpu_idx;
+
+ /* The pthread backing the vCPU. */
+ pthread_t thread;
+
+ /* Set to true once the vCPU thread is up and running. */
+ bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct memstress_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
+static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+void memstress_guest_code(uint32_t vcpu_idx)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
+ struct guest_random_state rand_state;
+ uint64_t gva;
+ uint64_t pages;
+ uint64_t addr;
+ uint64_t page;
+ int i;
+
+ rand_state = new_guest_random_state(guest_random_seed + vcpu_idx);
+
+ gva = vcpu_args->gva;
+ pages = vcpu_args->pages;
+
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
+
+ while (true) {
+ for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size)
+ (void) *((volatile char *)args + i);
+
+ for (i = 0; i < pages; i++) {
+ if (args->random_access)
+ page = guest_random_u32(&rand_state) % pages;
+ else
+ page = i;
+
+ addr = gva + (page * args->guest_page_size);
+
+ if (__guest_random_bool(&rand_state, args->write_percent))
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
+ struct kvm_vcpu *vcpus[],
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args;
+ int i;
+
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &args->vcpu_args[i];
+
+ vcpu_args->vcpu = vcpus[i];
+ vcpu_args->vcpu_idx = i;
+
+ if (partition_vcpu_memory_access) {
+ vcpu_args->gva = guest_test_virt_mem +
+ (i * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes);
+ } else {
+ vcpu_args->gva = guest_test_virt_mem;
+ vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa;
+ }
+
+ vcpu_args_set(vcpus[i], 1, i);
+
+ pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ i, vcpu_args->gpa, vcpu_args->gpa +
+ (vcpu_args->pages * args->guest_page_size));
+ }
+}
+
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+ uint64_t vcpu_memory_bytes, int slots,
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct kvm_vm *vm;
+ uint64_t guest_num_pages, slot0_pages = 0;
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
+ int i;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+ /* By default vCPUs will write to memory. */
+ args->write_percent = 100;
+
+ /*
+ * Snapshot the non-huge page size. This is used by the guest code to
+ * access/dirty pages at the logging granularity.
+ */
+ args->guest_page_size = vm_guest_mode_params[mode].page_size;
+
+ guest_num_pages = vm_adjust_num_guest_pages(mode,
+ (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size);
+
+ TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
+ "Guest memory size is not host page size aligned.");
+ TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0,
+ "Guest memory size is not guest page size aligned.");
+ TEST_ASSERT(guest_num_pages % slots == 0,
+ "Guest memory cannot be evenly divided into %d slots.",
+ slots);
+
+ /*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (args->nested)
+ slot0_pages += memstress_nested_pages(nr_vcpus);
+
+ /*
+ * Pass guest_num_pages to populate the page tables for test memory.
+ * The memory is also added to memslot 0, but that's a benign side
+ * effect as KVM allows aliasing HVAs in meslots.
+ */
+ vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus,
+ slot0_pages + guest_num_pages,
+ memstress_guest_code, vcpus);
+
+ args->vm = vm;
+
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm->max_gfn + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (args->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size);
+#endif
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " nr_vcpus: %d wss: %" PRIx64 "]",
+ guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
+
+ args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
+ args->gpa = align_down(args->gpa, backing_src_pagesz);
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ args->gpa = align_down(args->gpa, 1 << 20);
+#endif
+ args->size = guest_num_pages * args->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ args->gpa, args->gpa + args->size);
+
+ /* Add extra memory slots for testing */
+ for (i = 0; i < slots; i++) {
+ uint64_t region_pages = guest_num_pages / slots;
+ vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
+
+ vm_userspace_mem_region_add(vm, backing_src, region_start,
+ MEMSTRESS_MEM_SLOT_INDEX + i,
+ region_pages, 0);
+ }
+
+ /* Do mapping for the demand paging memory slot */
+ virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages);
+
+ memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
+ partition_vcpu_memory_access);
+
+ if (args->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ memstress_setup_nested(vm, nr_vcpus, vcpus);
+ }
+
+ /* Export the shared variables to the guest. */
+ sync_global_to_guest(vm, memstress_args);
+
+ return vm;
+}
+
+void memstress_destroy_vm(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
+
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
+{
+ memstress_args.write_percent = write_percent;
+ sync_global_to_guest(vm, memstress_args.write_percent);
+}
+
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
+{
+ memstress_args.random_access = random_access;
+ sync_global_to_guest(vm, memstress_args.random_access);
+}
+
+uint64_t __weak memstress_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+ struct vcpu_thread *vcpu = data;
+ int vcpu_idx = vcpu->vcpu_idx;
+
+ if (memstress_args.pin_vcpus)
+ pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+
+ WRITE_ONCE(vcpu->running, true);
+
+ /*
+ * Wait for all vCPU threads to be up and running before calling the test-
+ * provided vCPU thread function. This prevents thread creation (which
+ * requires taking the mmap_sem in write mode) from interfering with the
+ * guest faulting in its memory.
+ */
+ while (!READ_ONCE(all_vcpu_threads_running))
+ ;
+
+ vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]);
+
+ return NULL;
+}
+
+void memstress_start_vcpu_threads(int nr_vcpus,
+ void (*vcpu_fn)(struct memstress_vcpu_args *))
+{
+ int i;
+
+ vcpu_thread_fn = vcpu_fn;
+ WRITE_ONCE(all_vcpu_threads_running, false);
+ WRITE_ONCE(memstress_args.stop_vcpus, false);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ struct vcpu_thread *vcpu = &vcpu_threads[i];
+
+ vcpu->vcpu_idx = i;
+ WRITE_ONCE(vcpu->running, false);
+
+ pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+ }
+
+ for (i = 0; i < nr_vcpus; i++) {
+ while (!READ_ONCE(vcpu_threads[i].running))
+ ;
+ }
+
+ WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void memstress_join_vcpu_threads(int nr_vcpus)
+{
+ int i;
+
+ WRITE_ONCE(memstress_args.stop_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i].thread, NULL);
+}
+
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+ int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+ vm_mem_region_set_flags(vm, slot, flags);
+ }
+}
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, true);
+}
+
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, false);
+}
+
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+ }
+}
+
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+ }
+}
+
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
deleted file mode 100644
index 81490b9b4e32..000000000000
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- */
-
-#include "kvm_util.h"
-#include "perf_test_util.h"
-#include "processor.h"
-
-struct perf_test_args perf_test_args;
-
-uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-
-/*
- * Continuously write to the first 8 bytes of each page in the
- * specified region.
- */
-static void guest_code(uint32_t vcpu_id)
-{
- struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
- uint64_t gva;
- uint64_t pages;
- int i;
-
- /* Make sure vCPU args data structure is not corrupt. */
- GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
-
- gva = vcpu_args->gva;
- pages = vcpu_args->pages;
-
- while (true) {
- for (i = 0; i < pages; i++) {
- uint64_t addr = gva + (i * perf_test_args.guest_page_size);
-
- if (i % perf_test_args.wr_fract == 0)
- *(uint64_t *)addr = 0x0123456789ABCDEF;
- else
- READ_ONCE(*(uint64_t *)addr);
- }
-
- GUEST_SYNC(1);
- }
-}
-
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes,
- enum vm_mem_backing_src_type backing_src)
-{
- struct kvm_vm *vm;
- uint64_t guest_num_pages;
-
- pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
- perf_test_args.host_page_size = getpagesize();
- perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
-
- guest_num_pages = vm_adjust_num_guest_pages(mode,
- (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
-
- TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
- "Guest memory size is not host page size aligned.");
- TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
- "Guest memory size is not guest page size aligned.");
-
- vm = vm_create_with_vcpus(mode, vcpus,
- (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
- 0, guest_code, NULL);
-
- perf_test_args.vm = vm;
-
- /*
- * If there should be more memory in the guest test region than there
- * can be pages in the guest, it will definitely cause problems.
- */
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
- "Requested more guest memory than address space allows.\n"
- " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
- vcpu_memory_bytes);
-
- guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
- perf_test_args.guest_page_size;
- guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
-#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
- pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
- /* Add an extra memory slot for testing */
- vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem,
- PERF_TEST_MEM_SLOT_INDEX,
- guest_num_pages, 0);
-
- /* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
- ucall_init(vm, NULL);
-
- return vm;
-}
-
-void perf_test_destroy_vm(struct kvm_vm *vm)
-{
- ucall_uninit(vm);
- kvm_vm_free(vm);
-}
-
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
- uint64_t vcpu_memory_bytes,
- bool partition_vcpu_memory_access)
-{
- vm_paddr_t vcpu_gpa;
- struct perf_test_vcpu_args *vcpu_args;
- int vcpu_id;
-
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
-
- vcpu_args->vcpu_id = vcpu_id;
- if (partition_vcpu_memory_access) {
- vcpu_args->gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
- vcpu_args->pages = vcpu_memory_bytes /
- perf_test_args.guest_page_size;
- vcpu_gpa = guest_test_phys_mem +
- (vcpu_id * vcpu_memory_bytes);
- } else {
- vcpu_args->gva = guest_test_virt_mem;
- vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
- perf_test_args.guest_page_size;
- vcpu_gpa = guest_test_phys_mem;
- }
-
- pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa +
- (vcpu_args->pages * perf_test_args.guest_page_size));
- }
-}
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 000000000000..a703f0194ea3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
new file mode 100644
index 000000000000..b787b982e922
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
+#include <asm/csr.h>
+
+.macro save_context
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
+ csrr s0, CSR_SEPC
+ csrr s1, CSR_SSTATUS
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
+ sd s1, 256(sp)
+ sd s2, 264(sp)
+ sd s3, 272(sp)
+.endm
+
+.macro restore_context
+ ld s3, 272(sp)
+ ld s2, 264(sp)
+ ld s1, 256(sp)
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
+ csrw CSR_SSTATUS, s1
+ csrw CSR_SEPC, s0
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
+.endm
+
+.balign 4
+.global exception_vectors
+exception_vectors:
+ save_context
+ move a0, sp
+ call route_exception
+ restore_context
+ sret
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..2eac7d4b59e9
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+{
+ unsigned long value = 0;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, ext, &value);
+
+ return !ret && !!value;
+}
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode));
+ core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
+ core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra));
+ core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp));
+ core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp));
+ core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp));
+ core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0));
+ core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1));
+ core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2));
+ core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0));
+ core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1));
+ core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0));
+ core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1));
+ core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2));
+ core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3));
+ core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4));
+ core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5));
+ core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6));
+ core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7));
+ core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2));
+ core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3));
+ core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4));
+ core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5));
+ core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6));
+ core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7));
+ core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8));
+ core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9));
+ core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10));
+ core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11));
+ core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3));
+ core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4));
+ core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5));
+ core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6));
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void __aligned(16) guest_unexp_trap(void)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UNEXP,
+ 0, 0, 0, 0, 0, 0);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ int r;
+ size_t stack_size;
+ unsigned long stack_vaddr;
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+ struct kvm_vcpu *vcpu;
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ riscv_vcpu_mmu_setup(vcpu);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
+
+ /* Setup sscratch for guest_get_vcpuid() */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
+
+ /* Setup default exception vector of guest */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
+
+ return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ }
+ vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, int ec)
+{
+ ucall(UCALL_UNHANDLED, 2, vector, ec);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ }
+}
+
+struct handlers {
+ exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
+};
+
+void route_exception(struct pt_regs *regs)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ int vector = 0, ec;
+
+ ec = regs->cause & ~CAUSE_IRQ_FLAG;
+ if (ec >= NR_EXCEPTIONS)
+ goto unexpected_exception;
+
+ /* Use the same handler for all the interrupts */
+ if (regs->cause & CAUSE_IRQ_FLAG) {
+ vector = 1;
+ ec = 0;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ return kvm_exit_unexpected_exception(vector, ec);
+}
+
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
+{
+ extern char exception_vectors;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
+}
+
+void vm_init_vector_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < NR_EXCEPTIONS);
+ handlers->exception_handlers[0][vector] = handler;
+}
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ handlers->exception_handlers[1][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(CSR_SSCRATCH);
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+bool guest_sbi_probe_extension(int extid, long *out_val)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
+ 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED,
+ "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value);
+
+ if (ret.error == SBI_ERR_NOT_SUPPORTED)
+ return false;
+
+ if (out_val)
+ *out_val = ret.value;
+
+ return true;
+}
+
+unsigned long get_host_sbi_spec_version(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
+ 0, 0, 0, 0, 0);
+
+ GUEST_ASSERT(!ret.error);
+
+ return ret.value;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..b5035c63d516
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "sbi.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
+ switch (run->riscv_sbi.function_id) {
+ case KVM_RISCV_SELFTESTS_SBI_UCALL:
+ return (void *)run->riscv_sbi.args[0];
+ case KVM_RISCV_SELFTESTS_SBI_UNEXP:
+ vcpu_dump(stderr, vcpu, 2);
+ TEST_ASSERT(0, "Unexpected trap taken by guest");
+ break;
+ default:
+ break;
+ }
+ }
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
index 86b9e611ad87..2c432fa164f1 100644
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
@@ -8,8 +8,6 @@
#include "test_util.h"
#include "kvm_util.h"
-#define VCPU_ID 6
-
#define ICPT_INSTRUCTION 0x04
#define IPA0_DIAG 0x8300
@@ -27,17 +25,17 @@ static void guest_code(void)
*/
static uint64_t diag318_handler(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
uint64_t reg;
uint64_t diag318_info;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_run(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_run(vcpu);
+ run = vcpu->run;
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "DIAGNOSE 0x0318 instruction was not intercepted");
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
"Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
@@ -62,7 +60,7 @@ uint64_t get_diag318_info(void)
* If KVM does not support diag318, then return 0 to
* ensure tests do not break.
*/
- if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) {
+ if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
if (!printed_skip) {
fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
"Skipping diag318 test.\n");
diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c
new file mode 100644
index 000000000000..d540812d911a
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/facility.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Contains the definition for the global variables to have the test facitlity feature.
+ */
+
+#include "facility.h"
+
+uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 0152f356c099..8ceeb17c819a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -7,24 +7,22 @@
#include "processor.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
if (vm->pgd_created)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -36,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -49,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -77,24 +74,24 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
/* Fill in page table entry */
- idx = (gva >> 12) & 0x0ffu; /* page index */
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
if (!(entry[idx] & PAGE_INVALID))
fprintf(stderr,
"WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
entry[idx] = gpa;
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
uint64_t *entry;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
entry = addr_gpa2hva(vm, vm->pgd);
@@ -106,7 +103,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
- idx = (gva >> 12) & 0x0ffu; /* page index */
+ idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */
TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
"No page mapping for vm virtual address 0x%lx", gva);
@@ -150,7 +147,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
if (!vm->pgd_created)
return;
@@ -158,68 +155,74 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump_region(stream, vm, indent, vm->pgd);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
uint64_t stack_vaddr;
struct kvm_regs regs;
struct kvm_sregs sregs;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
- TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
- vm_vcpu_add(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
/* Setup guest registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run = vcpu_state(vm, vcpuid);
- run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run->psw_addr = (uintptr_t)guest_code;
+ return vcpu;
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
int i;
TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
for (i = 0; i < num; i++)
regs.gprs[i + 2] = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- if (!vcpu)
- return;
-
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
- indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+ indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
}
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+bool kvm_arch_has_default_irqchip(void)
{
+ return true;
}
diff --git a/tools/testing/selftests/kvm/lib/s390/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c
new file mode 100644
index 000000000000..cca98734653d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390/ucall.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+ run->s390_sieic.icptcode == 4 &&
+ (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
+ (run->s390_sieic.ipb >> 16) == 0x501) {
+ int reg = run->s390_sieic.ipa & 0xf;
+
+ return (void *)run->s.regs.gprs[reg];
+ }
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
deleted file mode 100644
index 9d3b0f15249a..000000000000
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2019 Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-void ucall_init(struct kvm_vm *vm, void *arg)
-{
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
-
- if (uc)
- memset(uc, 0, sizeof(*uc));
-
- if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
- run->s390_sieic.icptcode == 4 &&
- (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
- (run->s390_sieic.ipb >> 16) == 0x501) {
- int reg = run->s390_sieic.ipa & 0xf;
-
- memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
- }
-
- return ucall.cmd;
-}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index 031ba3c932ed..a99188f87a38 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -116,7 +116,7 @@
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
@@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep)
/* Returns a pointer to the node that describes the
* lowest bit index.
*/
-static struct node *node_first(struct sparsebit *s)
+static struct node *node_first(const struct sparsebit *s)
{
struct node *nodep;
@@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s)
* lowest bit index > the index of the node pointed to by np.
* Returns NULL if no node with a higher index exists.
*/
-static struct node *node_next(struct sparsebit *s, struct node *np)
+static struct node *node_next(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np)
* highest index < the index of the node pointed to by np.
* Returns NULL if no node with a lower index exists.
*/
-static struct node *node_prev(struct sparsebit *s, struct node *np)
+static struct node *node_prev(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np)
* subtree and duplicates the bit settings to the newly allocated nodes.
* Returns the newly allocated copy of subtree.
*/
-static struct node *node_copy_subtree(struct node *subtree)
+static struct node *node_copy_subtree(const struct node *subtree)
{
struct node *root;
@@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree)
* index is within the bits described by the mask bits or the number of
* contiguous bits set after the mask. Returns NULL if there is no such node.
*/
-static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_all_set(struct sparsebit *s)
+bool sparsebit_all_set(const struct sparsebit *s)
{
/*
* If any nodes there must be at least one bit set. Only case
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
tmp = node_prev(s, nodep);
node_rm(s, nodep);
- nodep = NULL;
nodep = tmp;
reduction_performed = true;
@@ -776,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
/* Returns whether the bit at the index given by idx, within the
* sparsebit array is set or not.
*/
-bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -922,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
* used by test cases after they detect an unexpected condition, as a means
* to capture diagnostic information.
*/
-static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
/* Dump the contents of s */
@@ -970,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp)
* sparsebit_alloc(). It can though already have bits set, which
* if different from src will be cleared.
*/
-void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s)
{
/* First clear any bits already set in the destination */
sparsebit_clear_all(d);
@@ -982,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
}
/* Returns whether num consecutive bits starting at idx are all set. */
-bool sparsebit_is_set_num(struct sparsebit *s,
+bool sparsebit_is_set_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_cleared;
@@ -1006,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s,
}
/* Returns whether the bit at the index given by idx. */
-bool sparsebit_is_clear(struct sparsebit *s,
+bool sparsebit_is_clear(const struct sparsebit *s,
sparsebit_idx_t idx)
{
return !sparsebit_is_set(s, idx);
}
/* Returns whether num consecutive bits starting at idx are all cleared. */
-bool sparsebit_is_clear_num(struct sparsebit *s,
+bool sparsebit_is_clear_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_set;
@@ -1042,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s,
* value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
* to determine if the sparsebit array has any bits set.
*/
-sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *s)
{
return s->num_set;
}
/* Returns whether any bit is set in the sparsebit array. */
-bool sparsebit_any_set(struct sparsebit *s)
+bool sparsebit_any_set(const struct sparsebit *s)
{
/*
* Nodes only describe set bits. If any nodes then there
@@ -1071,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s)
}
/* Returns whether all the bits in the sparsebit array are cleared. */
-bool sparsebit_all_clear(struct sparsebit *s)
+bool sparsebit_all_clear(const struct sparsebit *s)
{
return !sparsebit_any_set(s);
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_any_clear(struct sparsebit *s)
+bool sparsebit_any_clear(const struct sparsebit *s)
{
return !sparsebit_all_set(s);
}
/* Returns the index of the first set bit. Abort if no bits are set.
*/
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s)
{
struct node *nodep;
@@ -1098,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
/* Returns the index of the first cleared bit. Abort if
* no bits are cleared.
*/
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s)
{
struct node *nodep1, *nodep2;
@@ -1152,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
/* Returns index of next bit set within s after the index given by prev.
* Returns 0 if there are no bits after prev that are set.
*/
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1245,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
/* Returns index of next bit cleared within s after the index given by prev.
* Returns 0 if there are no bits after prev that are cleared.
*/
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1301,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively set
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1336,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively cleared
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1584,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low,
* contiguous bits. This is done because '-' is used to specify command-line
* options, and sometimes ranges are specified as command-line arguments.
*/
-void sparsebit_dump(FILE *stream, struct sparsebit *s,
+void sparsebit_dump(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
size_t current_line_len = 0;
@@ -1682,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s,
* s. On error, diagnostic information is printed to stderr and
* abort is called.
*/
-void sparsebit_validate_internal(struct sparsebit *s)
+void sparsebit_validate_internal(const struct sparsebit *s)
{
bool error_detected = false;
struct node *nodep, *prev = NULL;
@@ -1866,7 +1865,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
* of total bits set.
*/
if (s->num_set != total_bits_set) {
- fprintf(stderr, "Number of bits set missmatch,\n"
+ fprintf(stderr, "Number of bits set mismatch,\n"
" s->num_set: 0x%lx total_bits_set: 0x%lx",
s->num_set, total_bits_set);
@@ -1890,7 +1889,6 @@ void sparsebit_validate_internal(struct sparsebit *s)
*/
#include <stdlib.h>
-#include <assert.h>
struct range {
sparsebit_idx_t first, last;
diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c
new file mode 100644
index 000000000000..5d1c87277c49
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/string_override.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+
+/*
+ * Override the "basic" built-in string helpers so that they can be used in
+ * guest code. KVM selftests don't support dynamic loading in guest code and
+ * will jump into the weeds if the compiler decides to insert an out-of-line
+ * call via the PLT.
+ */
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
+ if ((res = *su1 - *su2) != 0)
+ break;
+ }
+ return res;
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
+
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 906c955384e2..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,16 +4,44 @@
*
* Copyright (C) 2020, Google LLC.
*/
-
+#include <stdio.h>
+#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <time.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <linux/mman.h>
#include "linux/kernel.h"
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
+/*
+ * Random number generator that is usable from guest code. This is the
+ * Park-Miller LCG using standard constants.
+ */
+
+struct guest_random_state new_guest_random_state(uint32_t seed)
+{
+ struct guest_random_state s = {.seed = seed};
+ return s;
+}
+
+uint32_t guest_random_u32(struct guest_random_state *state)
+{
+ state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1);
+ return state->seed;
+}
+
/*
* Parses "[0-9]+[kmgt]?".
*/
@@ -111,30 +139,306 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-const struct vm_mem_backing_src_alias backing_src_aliases[] = {
- {"anonymous", VM_MEM_SRC_ANONYMOUS,},
- {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,},
- {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,},
-};
+static bool test_sysfs_path(const char *path)
+{
+ struct stat statbuf;
+ int ret;
+
+ ret = stat(path, &statbuf);
+ TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+ "Error in stat()ing '%s'", path);
+
+ return ret == 0;
+}
+
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
+{
+ size_t size;
+ FILE *f;
+ int ret;
+
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
+
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+
+ fclose(f);
+ return size;
+}
+
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
+size_t get_def_hugetlb_pagesz(void)
+{
+ char buf[64];
+ const char *hugepagesize = "Hugepagesize:";
+ const char *hugepages_total = "HugePages_Total:";
+ FILE *f;
+
+ f = fopen("/proc/meminfo", "r");
+ TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo");
+
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ if (strstr(buf, hugepages_total) == buf) {
+ unsigned long long total = strtoull(buf + strlen(hugepages_total), NULL, 10);
+ if (!total) {
+ fprintf(stderr, "HUGETLB is not enabled in /proc/sys/vm/nr_hugepages\n");
+ exit(KSFT_SKIP);
+ }
+ }
+ if (strstr(buf, hugepagesize) == buf) {
+ fclose(f);
+ return strtoull(buf + strlen(hugepagesize), NULL, 10) << 10;
+ }
+ }
+
+ if (feof(f)) {
+ fprintf(stderr, "HUGETLB is not configured in host kernel");
+ exit(KSFT_SKIP);
+ }
+
+ TEST_FAIL("Error in reading /proc/meminfo");
+}
+
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+{
+ static const struct vm_mem_backing_src_alias aliases[] = {
+ [VM_MEM_SRC_ANONYMOUS] = {
+ .name = "anonymous",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_THP] = {
+ .name = "anonymous_thp",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
+ .name = "anonymous_hugetlb",
+ .flag = ANON_HUGE_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
+ .name = "anonymous_hugetlb_16kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
+ .name = "anonymous_hugetlb_64kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
+ .name = "anonymous_hugetlb_512kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
+ .name = "anonymous_hugetlb_1mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
+ .name = "anonymous_hugetlb_2mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
+ .name = "anonymous_hugetlb_8mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
+ .name = "anonymous_hugetlb_16mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
+ .name = "anonymous_hugetlb_32mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
+ .name = "anonymous_hugetlb_256mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
+ .name = "anonymous_hugetlb_512mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
+ .name = "anonymous_hugetlb_1gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
+ .name = "anonymous_hugetlb_2gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
+ .name = "anonymous_hugetlb_16gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
+ },
+ [VM_MEM_SRC_SHMEM] = {
+ .name = "shmem",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_SHARED_HUGETLB] = {
+ .name = "shared_hugetlb",
+ /*
+ * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+ * we're using "file backed" memory, we need to specify
+ * this when the FD is created, not when the area is
+ * mapped.
+ */
+ .flag = MAP_SHARED,
+ },
+ };
+ _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
+ "Missing new backing src types?");
+
+ TEST_ASSERT(i < NUM_SRC_TYPES, "Backing src type ID %d too big", i);
+
+ return &aliases[i];
+}
+
+#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
+
+size_t get_backing_src_pagesz(uint32_t i)
+{
+ uint32_t flag = vm_mem_backing_src_alias(i)->flag;
+
+ switch (i) {
+ case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_SHMEM:
+ return getpagesize();
+ case VM_MEM_SRC_ANONYMOUS_THP:
+ return get_trans_hugepagesz();
+ case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_SHARED_HUGETLB:
+ return get_def_hugetlb_pagesz();
+ default:
+ return MAP_HUGE_PAGE_SIZE(flag);
+ }
+}
-void backing_src_help(void)
+bool is_backing_src_hugetlb(uint32_t i)
+{
+ return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
+static void print_available_backing_src_types(const char *prefix)
{
int i;
- printf("Available backing src types:\n");
- for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
- printf("\t%s\n", backing_src_aliases[i].name);
+ printf("%sAvailable backing src types:\n", prefix);
+
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
}
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
{
int i;
- for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
- if (!strcmp(type_name, backing_src_aliases[i].name))
- return backing_src_aliases[i].type;
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
+ return i;
- backing_src_help();
+ print_available_backing_src_types("");
TEST_FAIL("Unknown backing src type: %s", type_name);
return -1;
}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
+
+int atoi_paranoid(const char *num_str)
+{
+ char *end_ptr;
+ long num;
+
+ errno = 0;
+ num = strtol(num_str, &end_ptr, 0);
+ TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str);
+ TEST_ASSERT(num_str != end_ptr,
+ "strtol(\"%s\") didn't find a valid integer.", num_str);
+ TEST_ASSERT(*end_ptr == '\0',
+ "strtol(\"%s\") failed to parse trailing characters \"%s\".",
+ num_str, end_ptr);
+ TEST_ASSERT(num >= INT_MIN && num <= INT_MAX,
+ "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX);
+
+ return num;
+}
+
+char *strdup_printf(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+
+ va_start(ap, fmt);
+ TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed");
+ va_end(ap);
+
+ return str;
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+char *sys_get_cur_clocksource(void)
+{
+ char *clk_name;
+ struct stat st;
+ FILE *fp;
+
+ fp = fopen(CLOCKSOURCE_PATH, "r");
+ TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno);
+
+ TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d",
+ errno);
+
+ clk_name = malloc(st.st_size);
+ TEST_ASSERT(clk_name, "failed to allocate buffer to read file");
+
+ TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d",
+ ferror(fp));
+
+ fclose(fp);
+
+ return clk_name;
+}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
new file mode 100644
index 000000000000..42151e571953
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "linux/types.h"
+#include "linux/bitmap.h"
+#include "linux/atomic.h"
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+
+#define GUEST_UCALL_FAILED -1
+
+struct ucall_header {
+ DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
+ struct ucall ucalls[KVM_MAX_VCPUS];
+};
+
+int ucall_nr_pages_required(uint64_t page_size)
+{
+ return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
+/*
+ * ucall_pool holds per-VM values (global data is duplicated by each VM), it
+ * must not be accessed from host code.
+ */
+static struct ucall_header *ucall_pool;
+
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ struct ucall_header *hdr;
+ struct ucall *uc;
+ vm_vaddr_t vaddr;
+ int i;
+
+ vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+ MEM_REGION_DATA);
+ hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+ memset(hdr, 0, sizeof(*hdr));
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ uc = &hdr->ucalls[i];
+ uc->hva = uc;
+ }
+
+ write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr);
+
+ ucall_arch_init(vm, mmio_gpa);
+}
+
+static struct ucall *ucall_alloc(void)
+{
+ struct ucall *uc;
+ int i;
+
+ if (!ucall_pool)
+ goto ucall_failed;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (!test_and_set_bit(i, ucall_pool->in_use)) {
+ uc = &ucall_pool->ucalls[i];
+ memset(uc->args, 0, sizeof(uc->args));
+ return uc;
+ }
+ }
+
+ucall_failed:
+ /*
+ * If the vCPU cannot grab a ucall structure, make a bare ucall with a
+ * magic value to signal to get_ucall() that things went sideways.
+ * GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here.
+ */
+ ucall_arch_do_ucall(GUEST_UCALL_FAILED);
+ return NULL;
+}
+
+static void ucall_free(struct ucall *uc)
+{
+ /* Beware, here be pointer arithmetic. */
+ clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
+}
+
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+ unsigned int line, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+ WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+ WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall *uc;
+ va_list va;
+ int i;
+
+ uc = ucall_alloc();
+
+ WRITE_ONCE(uc->cmd, cmd);
+
+ nargs = min(nargs, UCALL_MAX_ARGS);
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ struct ucall ucall;
+ void *addr;
+
+ if (!uc)
+ uc = &ucall;
+
+ addr = ucall_arch_get_ucall(vcpu);
+ if (addr) {
+ TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED,
+ "Guest failed to allocate ucall struct");
+
+ memcpy(uc, addr, sizeof(*uc));
+ vcpu_run_complete_io(vcpu);
+ } else {
+ memset(uc, 0, sizeof(*uc));
+ }
+
+ return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
new file mode 100644
index 000000000000..5bde176cedd5
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM userfaultfd util
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <sys/epoll.h>
+#include <sys/syscall.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "userfaultfd_util.h"
+
+#ifdef __NR_userfaultfd
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+ struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
+ int uffd = reader_args->uffd;
+ int64_t pages = 0;
+ struct timespec start;
+ struct timespec ts_diff;
+ struct epoll_event evt;
+ int epollfd;
+
+ epollfd = epoll_create(1);
+ TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
+
+ evt.events = EPOLLIN | EPOLLEXCLUSIVE;
+ evt.data.u32 = 0;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
+ "Failed to add uffd to epollfd");
+
+ evt.events = EPOLLIN;
+ evt.data.u32 = 1;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
+ "Failed to add pipe to epollfd");
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ while (1) {
+ struct uffd_msg msg;
+ int r;
+
+ r = epoll_wait(epollfd, &evt, 1, -1);
+ TEST_ASSERT(r == 1,
+ "Unexpected number of events (%d) from epoll, errno = %d",
+ r, errno);
+
+ if (evt.data.u32 == 1) {
+ char tmp_chr;
+
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
+ r = read(reader_args->pipe, &tmp_chr, 1);
+ TEST_ASSERT(r == 1,
+ "Error reading pipefd in uffd reader thread");
+ break;
+ }
+
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
+
+ r = read(uffd, &msg, sizeof(msg));
+ if (r == -1) {
+ TEST_ASSERT(errno == EAGAIN,
+ "Error reading from UFFD: errno = %d", errno);
+ continue;
+ }
+
+ TEST_ASSERT(r == sizeof(msg),
+ "Read on uffd returned unexpected number of bytes (%d)", r);
+
+ if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+ continue;
+
+ if (reader_args->delay)
+ usleep(reader_args->delay);
+ r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
+ TEST_ASSERT(r >= 0,
+ "Reader thread handler fn returned negative value %d", r);
+ pages++;
+ }
+
+ ts_diff = timespec_elapsed(start);
+ PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+ pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+ pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
+
+ return NULL;
+}
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+ void *hva, uint64_t len,
+ uint64_t num_readers,
+ uffd_handler_t handler)
+{
+ struct uffd_desc *uffd_desc;
+ bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
+ int uffd;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;
+ uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+ int ret, i;
+
+ PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+ is_minor ? "MINOR" : "MISSING",
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
+
+ uffd_desc = malloc(sizeof(struct uffd_desc));
+ TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
+
+ uffd_desc->pipefds = calloc(sizeof(int), num_readers);
+ TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
+
+ uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
+ TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
+
+ uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
+ TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
+
+ uffd_desc->num_readers = num_readers;
+
+ /* In order to get minor faults, prefault via the alias. */
+ if (is_minor)
+ expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+ "ioctl UFFDIO_API failed: %" PRIu64,
+ (uint64_t)uffdio_api.api);
+
+ uffdio_register.range.start = (uint64_t)hva;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = uffd_mode;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+ "ioctl UFFDIO_REGISTER failed");
+ TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+ expected_ioctls, "missing userfaultfd ioctls");
+
+ uffd_desc->uffd = uffd;
+ for (i = 0; i < uffd_desc->num_readers; ++i) {
+ int pipes[2];
+
+ ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
+ i, uffd_desc);
+
+ uffd_desc->pipefds[i] = pipes[1];
+
+ uffd_desc->reader_args[i].uffd_mode = uffd_mode;
+ uffd_desc->reader_args[i].uffd = uffd;
+ uffd_desc->reader_args[i].delay = delay;
+ uffd_desc->reader_args[i].handler = handler;
+ uffd_desc->reader_args[i].pipe = pipes[0];
+
+ pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
+ &uffd_desc->reader_args[i]);
+
+ PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
+ i, hva, hva + len);
+ }
+
+ return uffd_desc;
+}
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd)
+{
+ char c = 0;
+ int i;
+
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
+ "Unable to write to pipefd %i for uffd_desc %p", i, uffd);
+
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
+ "Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
+
+ close(uffd->uffd);
+
+ for (i = 0; i < uffd->num_readers; ++i) {
+ close(uffd->pipefds[i]);
+ close(uffd->reader_args[i].pipe);
+ }
+
+ free(uffd->pipefds);
+ free(uffd->readers);
+ free(uffd->reader_args);
+ free(uffd);
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c
new file mode 100644
index 000000000000..89153a333e83
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/apic.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S
index aaf7bc7d2ce1..7629819734af 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86/handlers.S
@@ -54,9 +54,9 @@ idt_handlers:
.align 8
/* Fetch current address and append it to idt_handlers. */
- current_handler = .
+666 :
.pushsection .rodata
-.quad current_handler
+ .quad 666b
.popsection
.if ! \has_error
diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c
new file mode 100644
index 000000000000..15bc8cd583aa
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
+ }
+
+ vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
+ return false;
+
+ return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
+}
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva)
+{
+ vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+ struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+ /* Setup of a region of guest memory for the VP Assist page. */
+ hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+ hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+ /* Setup of a region of guest memory for the partition assist page. */
+ hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+ hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+ /* Setup of a region of guest memory for the enlightened VMCS. */
+ hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+ hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+ *p_hv_pages_gva = hv_pages_gva;
+ return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+ uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+ current_vp_assist = vp_assist;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
new file mode 100644
index 000000000000..0b1f288ad556
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+ memstress_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call memstress_l2_guest_code;"
+" ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(memstress_args.gpa, PG_SIZE_1G);
+ end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ memstress_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run memstress_l1_guest_code() which will
+ * bounce it into L2 before calling memstress_guest_code().
+ */
+ vcpu_regs_get(vcpus[vcpu_id], &regs);
+ regs.rip = (unsigned long) memstress_l1_guest_code;
+ vcpu_regs_set(vcpus[vcpu_id], &regs);
+ vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
new file mode 100644
index 000000000000..34cb57d1d671
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+ INTEL_ARCH_TOPDOWN_BE_BOUND,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC,
+ INTEL_ARCH_TOPDOWN_FE_BOUND,
+ INTEL_ARCH_TOPDOWN_RETIRING,
+ INTEL_ARCH_LBR_INSERTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+ AMD_ZEN_CORE_CYCLES,
+ AMD_ZEN_INSTRUCTIONS_RETIRED,
+ AMD_ZEN_BRANCHES_RETIRED,
+ AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+
+/*
+ * For Intel Atom CPUs, the PMU events "Instruction Retired" or
+ * "Branch Instruction Retired" may be overcounted for some certain
+ * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
+ * and complex SGX/SMX/CSTATE instructions/flows.
+ *
+ * The detailed information can be found in the errata (section SRF7):
+ * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
+ *
+ * For the Atom platforms before Sierra Forest (including Sierra Forest),
+ * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
+ * be overcounted on these certain instructions, but for Clearwater Forest
+ * only "Instruction Retired" event is overcounted on these instructions.
+ */
+static uint64_t get_pmu_errata(void)
+{
+ if (!this_cpu_is_intel())
+ return 0;
+
+ if (this_cpu_family() != 0x6)
+ return 0;
+
+ switch (this_cpu_model()) {
+ case 0xDD: /* Clearwater Forest */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
+ case 0xAF: /* Sierra Forest */
+ case 0x4D: /* Avaton, Rangely */
+ case 0x5F: /* Denverton */
+ case 0x86: /* Jacobsville */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
+ BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
+ default:
+ return 0;
+ }
+}
+
+uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void)
+{
+ pmu_errata_mask = get_pmu_errata();
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
new file mode 100644
index 000000000000..36104d27f3d9
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -0,0 +1,1325 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "linux/bitmap.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define KERNEL_CS 0x8
+#define KERNEL_DS 0x10
+#define KERNEL_TSS 0x18
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
+uint64_t guest_tsc_khz;
+
+const char *ex_str(int vector)
+{
+ switch (vector) {
+#define VEC_STR(v) case v##_VECTOR: return "#" #v
+ case DE_VECTOR: return "no exception";
+ case KVM_MAGIC_DE_VECTOR: return "#DE";
+ VEC_STR(DB);
+ VEC_STR(NMI);
+ VEC_STR(BP);
+ VEC_STR(OF);
+ VEC_STR(BR);
+ VEC_STR(UD);
+ VEC_STR(NM);
+ VEC_STR(DF);
+ VEC_STR(TS);
+ VEC_STR(NP);
+ VEC_STR(SS);
+ VEC_STR(GP);
+ VEC_STR(PF);
+ VEC_STR(MF);
+ VEC_STR(AC);
+ VEC_STR(MC);
+ VEC_STR(XM);
+ VEC_STR(VE);
+ VEC_STR(CP);
+ VEC_STR(HV);
+ VEC_STR(VC);
+ VEC_STR(SX);
+ default: return "#??";
+#undef VEC_STR
+ }
+}
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+bool kvm_is_tdp_enabled(void)
+{
+ if (host_cpu_is_intel)
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+
+ /* If needed, create the top-level page table. */
+ if (!vm->pgd_created) {
+ vm->pgd = vm_alloc_page_table(vm);
+ vm->pgd_created = true;
+ }
+}
+
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+ uint64_t vaddr, int level)
+{
+ uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+ uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+ TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+ level + 1, vaddr);
+
+ return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t *parent_pte,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+ paddr = vm_untag_gpa(vm, paddr);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (current_level == target_level)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ else
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ "Cannot create page table at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t *pte = &vm->pgd;
+ int current_level;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+ "Unexpected bits in paddr: %lx", paddr);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, pte, vaddr, paddr,
+ current_level, level);
+ if (*pte & PTE_LARGE_MASK)
+ return;
+ }
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+ /*
+ * Neither SEV nor TDX supports shared page tables, so only the final
+ * leaf PTE needs manually set the C/S-bit.
+ */
+ if (vm_is_gpa_protected(vm, paddr))
+ *pte |= vm->arch.c_bit;
+ else
+ *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+ sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+ if (*pte & PTE_LARGE_MASK) {
+ TEST_ASSERT(*level == PG_LEVEL_NONE ||
+ *level == current_level,
+ "Unexpected hugepage at level %d", current_level);
+ *level = current_level;
+ }
+
+ return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level)
+{
+ int va_width = 12 + (vm->pgtable_levels) * 9;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
+
+ TEST_ASSERT(!vm->arch.is_pt_protected,
+ "Walking page tables of protected guests is impossible");
+
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels,
+ "Invalid PG_LEVEL_* '%d'", *level);
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ /*
+ * Check that the vaddr is a sign-extended va_width value.
+ */
+ TEST_ASSERT(vaddr ==
+ (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, pte, vaddr, current_level);
+ if (vm_is_target_pte(pte, level, current_level))
+ return pte;
+ }
+
+ return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
+}
+
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+{
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!(*pml4e & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!(*pdpe & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
+
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!(*pde & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10llx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
+
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!(*pte & PTE_PRESENT_MASK))
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10llx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by @segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+ void *gdt = addr_gva2hva(vm, vm->arch.gdt);
+ struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+ desc->limit0 = segp->limit & 0xFFFF;
+ desc->base0 = segp->base & 0xFFFF;
+ desc->base1 = segp->base >> 16;
+ desc->type = segp->type;
+ desc->s = segp->s;
+ desc->dpl = segp->dpl;
+ desc->p = segp->present;
+ desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
+ desc->l = segp->l;
+ desc->db = segp->db;
+ desc->g = segp->g;
+ desc->base2 = segp->base >> 24;
+ if (!segp->s)
+ desc->base3 = segp->base >> 32;
+}
+
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = KERNEL_CS;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+}
+
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = KERNEL_DS;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ int level = PG_LEVEL_NONE;
+ uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+
+ TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
+
+ /*
+ * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+ * address bits to be zero.
+ */
+ return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
+}
+
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->base = base;
+ segp->limit = 0x67;
+ segp->selector = KERNEL_TSS;
+ segp->type = 0xb;
+ segp->present = 1;
+}
+
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct kvm_sregs sregs;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vcpu, &sregs);
+
+ sregs.idt.base = vm->arch.idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->arch.gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(&sregs.cs);
+ kvm_seg_set_kernel_data_64bit(&sregs.ds);
+ kvm_seg_set_kernel_data_64bit(&sregs.es);
+ kvm_seg_set_kernel_data_64bit(&sregs.gs);
+ kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct kvm_xcrs xcrs = {
+ .nr_xcrs = 1,
+ .xcrs[0].xcr = 0,
+ .xcrs[0].value = kvm_cpu_supported_xcr0(),
+ };
+
+ if (!kvm_cpu_has(X86_FEATURE_XSAVE))
+ return;
+
+ vcpu_xcrs_set(vcpu, &xcrs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
+
+ if (regs->vector == DE_VECTOR)
+ regs->vector = KVM_MAGIC_DE_VECTOR;
+
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
+
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
+ }
+
+ if (kvm_fixup_exception(regs))
+ return;
+
+ GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ extern void *idt_handlers;
+ struct kvm_segment seg;
+ int i;
+
+ vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+ kvm_seg_set_kernel_code_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_kernel_data_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ int r;
+
+ TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
+ "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
+
+ vm_create_irqchip(vm);
+ vm_init_descriptor_tables(vm);
+
+ sync_global_to_guest(vm, host_cpu_is_intel);
+ sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+ sync_global_to_guest(vm, pmu_errata_mask);
+
+ if (is_sev_vm(vm)) {
+ struct kvm_sev_init init = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+
+ r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
+ guest_tsc_khz = r;
+ sync_global_to_guest(vm, guest_tsc_khz);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ struct kvm_vcpu *vcpu;
+
+ stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+ /*
+ * Align stack to match calling sequence requirements in section "The
+ * Stack Frame" of the System V ABI AMD64 Architecture Processor
+ * Supplement, which requires the value (%rsp + 8) to be a multiple of
+ * 16 when control is transferred to the function entry point.
+ *
+ * If this code is ever used to launch a vCPU with 32-bit entry point it
+ * may need to subtract 4 bytes instead of 8 bytes.
+ */
+ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+ "__vm_vaddr_alloc() did not provide a page-aligned address");
+ stack_vaddr -= 8;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+ vcpu_init_sregs(vm, vcpu);
+ vcpu_init_xcrs(vm, vcpu);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr;
+ vcpu_regs_set(vcpu, &regs);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_mp_state_set(vcpu, &mp_state);
+
+ /*
+ * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
+ * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
+ * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
+ * is consistent with vCPU state.
+ */
+ vcpu_get_cpuid(vcpu);
+ return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+
+ return vcpu;
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpuid)
+ free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ int kvm_fd;
+
+ if (kvm_supported_cpuid)
+ return kvm_supported_cpuid;
+
+ kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+ (struct kvm_cpuid2 *)kvm_supported_cpuid);
+
+ close(kvm_fd);
+ return kvm_supported_cpuid;
+}
+
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
+{
+ const struct kvm_cpuid_entry2 *entry;
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ entry = &cpuid->entries[i];
+
+ /*
+ * The output registers in kvm_cpuid_entry2 are in alphabetical
+ * order, but kvm_x86_cpu_feature matches that mess, so yay
+ * pointer shenanigans!
+ */
+ if (entry->function == function && entry->index == index)
+ return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
+ }
+
+ return 0;
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature)
+{
+ return __kvm_cpu_has(cpuid, feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property)
+{
+ return __kvm_cpu_has(cpuid, property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r, kvm_fd;
+
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+
+ close(kvm_fd);
+ return buffer.entry.data;
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
+{
+ int kvm_fd;
+ u64 bitmask;
+ long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask,
+ };
+
+ TEST_ASSERT(!kvm_supported_cpuid,
+ "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+
+ TEST_ASSERT(is_power_of_2(xfeature),
+ "Dynamic XFeatures must be enabled one at a time");
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
+
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+
+ __TEST_REQUIRE(bitmask & xfeature,
+ "Required XSAVE feature '%s' not supported", name);
+
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & xfeature,
+ "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+ name, xfeature, bitmask);
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+ TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+ /* Allow overriding the default CPUID. */
+ if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+ free(vcpu->cpuid);
+ vcpu->cpuid = NULL;
+ }
+
+ if (!vcpu->cpuid)
+ vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+ memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+ (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+ (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+ vcpu_set_cpuid(vcpu);
+
+ /* Sanity check that @value doesn't exceed the bounds in any way. */
+ TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set)
+{
+ struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ reg = (&entry->eax) + feature.reg;
+
+ if (set)
+ *reg |= BIT(feature.bit);
+ else
+ *reg &= ~BIT(feature.bit);
+
+ vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+
+ vcpu_msrs_get(vcpu, &buffer.header);
+
+ return buffer.entry.data;
+}
+
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
+{
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+
+ return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vcpu, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vcpu, &regs);
+ va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vcpu, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
+{
+ struct kvm_msr_list *list;
+ struct kvm_msr_list nmsrs;
+ int kvm_fd, r;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ nmsrs.nmsrs = 0;
+ if (!feature_msrs)
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ else
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Expected -E2BIG, got rc: %i errno: %i (%s)",
+ r, errno, strerror(errno));
+
+ list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+ TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+ list->nmsrs = nmsrs.nmsrs;
+
+ if (!feature_msrs)
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ else
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ close(kvm_fd);
+
+ TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+ "Number of MSRs in list changed, was %d, now %d",
+ nmsrs.nmsrs, list->nmsrs);
+ return list;
+}
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(false);
+ return list;
+}
+
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
+{
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(true);
+ return list;
+}
+
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+ const struct kvm_msr_list *list = kvm_get_msr_index_list();
+ int i;
+
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index)
+ return true;
+ }
+
+ return false;
+}
+
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+ if (size) {
+ state->xsave = malloc(size);
+ vcpu_xsave2_get(vcpu, state->xsave);
+ } else {
+ state->xsave = malloc(sizeof(struct kvm_xsave));
+ vcpu_xsave_get(vcpu, state->xsave);
+ }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
+ struct kvm_x86_state *state;
+ int i;
+
+ static int nested_size = -1;
+
+ if (nested_size == -1) {
+ nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+ TEST_ASSERT(nested_size <= sizeof(state->nested_),
+ "Nested state size too big, %i > %zi",
+ nested_size, sizeof(state->nested_));
+ }
+
+ /*
+ * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+ * guest state is consistent only after userspace re-enters the
+ * kernel with KVM_RUN. Complete IO prior to migrating state
+ * to a new VM.
+ */
+ vcpu_run_complete_io(vcpu);
+
+ state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+ TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+ vcpu_events_get(vcpu, &state->events);
+ vcpu_mp_state_get(vcpu, &state->mp_state);
+ vcpu_regs_get(vcpu, &state->regs);
+ vcpu_save_xsave_state(vcpu, state);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_get(vcpu, &state->xcrs);
+
+ vcpu_sregs_get(vcpu, &state->sregs);
+
+ if (nested_size) {
+ state->nested.size = sizeof(state->nested_);
+
+ vcpu_nested_state_get(vcpu, &state->nested);
+ TEST_ASSERT(state->nested.size <= nested_size,
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else {
+ state->nested.size = 0;
+ }
+
+ state->msrs.nmsrs = msr_list->nmsrs;
+ for (i = 0; i < msr_list->nmsrs; i++)
+ state->msrs.entries[i].index = msr_list->indices[i];
+ vcpu_msrs_get(vcpu, &state->msrs);
+
+ vcpu_debugregs_get(vcpu, &state->debugregs);
+
+ return state;
+}
+
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
+{
+ vcpu_sregs_set(vcpu, &state->sregs);
+ vcpu_msrs_set(vcpu, &state->msrs);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_set(vcpu, &state->xcrs);
+
+ vcpu_xsave_set(vcpu, state->xsave);
+ vcpu_events_set(vcpu, &state->events);
+ vcpu_mp_state_set(vcpu, &state->mp_state);
+ vcpu_debugregs_set(vcpu, &state->debugregs);
+ vcpu_regs_set(vcpu, &state->regs);
+
+ if (state->nested.size)
+ vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+ *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+ *va_bits = 32;
+ } else {
+ *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+ }
+}
+
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+ if (is_sev_vm(vm)) {
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+ vm->gpa_tag_mask = vm->arch.c_bit;
+ } else {
+ vm->arch.sev_fd = -1;
+ }
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index)
+ return &cpuid->entries[i];
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
+#define X86_HYPERCALL(inputs...) \
+({ \
+ uint64_t r; \
+ \
+ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
+ "jnz 1f\n\t" \
+ "vmcall\n\t" \
+ "jmp 2f\n\t" \
+ "1: vmmcall\n\t" \
+ "2:" \
+ : "=a"(r) \
+ : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ \
+ r; \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3)
+{
+ return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+ unsigned long ht_gfn, max_gfn, max_pfn;
+ uint8_t maxphyaddr, guest_maxphyaddr;
+
+ /*
+ * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
+ * enumerates the max _mappable_ GPA, which can be less than the raw
+ * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+ * doesn't support 5-level TDP.
+ */
+ guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+ guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+ TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+ "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
+
+ max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
+
+ /* Avoid reserved HyperTransport region on AMD processors. */
+ if (!host_cpu_is_amd)
+ return max_gfn;
+
+ /* On parts with <40 physical address bits, the area is fully hidden */
+ if (vm->pa_bits < 40)
+ return max_gfn;
+
+ /* Before family 17h, the HyperTransport area is just below 1T. */
+ ht_gfn = (1 << 28) - num_ht_pages;
+ if (this_cpu_family() < 0x17)
+ goto done;
+
+ /*
+ * Otherwise it's at the top of the physical address space, possibly
+ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * the old conservative value if MAXPHYADDR is not enumerated.
+ */
+ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+ goto done;
+
+ maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+ if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+ max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+ ht_gfn = max_pfn - num_ht_pages;
+done:
+ return min(max_gfn, ht_gfn - 1);
+}
+
+void kvm_selftest_arch_init(void)
+{
+ host_cpu_is_intel = this_cpu_is_intel();
+ host_cpu_is_amd = this_cpu_is_amd();
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ kvm_init_pmu_errata();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+ char *clk_name = sys_get_cur_clocksource();
+ bool ret = !strcmp(clk_name, "tsc\n") ||
+ !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+ free(clk_name);
+
+ return ret;
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
new file mode 100644
index 000000000000..c3a9838f4806
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ uint8_t page_type, bool private)
+{
+ const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+ const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+ sparsebit_idx_t i, j;
+
+ if (!sparsebit_any_set(protected_phy_pages))
+ return;
+
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
+
+ sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+ const uint64_t size = (j - i + 1) * vm->page_size;
+ const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (uint64_t)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
+
+ }
+}
+
+void sev_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+ struct kvm_sev_launch_start launch_start = {
+ .policy = policy,
+ };
+ struct userspace_mem_region *region;
+ struct kvm_sev_guest_status status;
+ int ctr;
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+ TEST_ASSERT_EQ(status.policy, policy);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
+
+ if (policy & SEV_POLICY_ES)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+ struct kvm_sev_launch_measure launch_measure;
+ struct kvm_sev_guest_status guest_status;
+
+ launch_measure.len = 256;
+ launch_measure.uaddr = (__u64)measurement;
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+ TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+ status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+ "Unexpected guest state: %d", status.state);
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+ struct kvm_vcpu **cpu)
+{
+ struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = type,
+ };
+ struct kvm_vm *vm;
+ struct kvm_vcpu *cpus[1];
+
+ vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+ *cpu = cpus[0];
+
+ return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement)
+{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
+ sev_vm_launch(vm, policy);
+
+ if (!measurement)
+ measurement = alloca(256);
+
+ sev_vm_launch_measure(vm, measurement);
+
+ sev_vm_launch_finish(vm);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index 827fe6028dd4..d239c2097391 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
* Helpers used for nested SVM testing
* Largely inspired from KVM unit test svm.c
*
@@ -9,10 +8,11 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "svm_util.h"
+#define SEV_DEV_PATH "/dev/sev"
+
struct gpr64_regs guest_regs;
u64 rflags;
@@ -30,20 +30,22 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+ svm->msr = (void *)vm_vaddr_alloc_page(vm);
+ svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+ svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+ memset(svm->msr_hva, 0, getpagesize());
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -95,6 +97,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
(1ULL << INTERCEPT_VMMCALL);
+ ctrl->msrpm_base_pa = svm->msr_gpa;
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
@@ -148,18 +151,13 @@ void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
: "r15", "memory");
}
-bool nested_svm_supported(void)
-{
- struct kvm_cpuid_entry2 *entry =
- kvm_get_supported_cpuid_entry(0x80000001);
-
- return entry->ecx & CPUID_SVM;
-}
-
-void nested_svm_check_supported(void)
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ * The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
{
- if (!nested_svm_supported()) {
- print_skip("nested SVM not enabled");
- exit(KSFT_SKIP);
- }
+ return open_path_or_exit(SEV_DEV_PATH, 0);
}
diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c
new file mode 100644
index 000000000000..1265cecc7dd1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86/ucall.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /*
+ * FIXME: Revert this hack (the entire commit that added it) once nVMX
+ * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
+ * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+ * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
+ * in particular is problematic) along with RDX and RDI (which are
+ * inputs), and clobber volatile GPRs. *sigh*
+ */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+ "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+ asm volatile("push %%rbp\n\t"
+ "push %%r15\n\t"
+ "push %%r14\n\t"
+ "push %%r13\n\t"
+ "push %%r12\n\t"
+ "push %%rbx\n\t"
+ "push %%rdx\n\t"
+ "push %%rdi\n\t"
+ "in %[port], %%al\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rbx\n\t"
+ "pop %%r12\n\t"
+ "pop %%r13\n\t"
+ "pop %%r14\n\t"
+ "pop %%r15\n\t"
+ "pop %%rbp\n\t"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+ HORRIFIC_L2_UCALL_CLOBBER_HACK);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ return (void *)regs.rdi;
+ }
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index 2448b30e8efa..29b082a58daa 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -1,13 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
* Copyright (C) 2018, Google LLC.
*/
+#include <asm/msr-index.h>
+
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
@@ -43,21 +42,17 @@ struct eptPageTablePointer {
uint64_t address:40;
uint64_t reserved_63_52:12;
};
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
-
- vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ (unsigned long)&evmcs_ver);
/* KVM should return supported EVMCS version range */
TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
(evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x\n",
+ "Incorrect EVMCS version range: %x:%x",
evmcs_ver & 0xff, evmcs_ver >> 8);
return evmcs_ver;
@@ -77,55 +72,41 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
- /* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
- vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
-
- /* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->enlightened_vmcs_hva =
- addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
- vmx->enlightened_vmcs_gpa =
- addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
-
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -176,30 +157,32 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
bool load_vmcs(struct vmx_pages *vmx)
{
- if (!enable_evmcs) {
- /* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
- if (vmclear(vmx->vmcs_gpa))
- return false;
-
- if (vmptrld(vmx->vmcs_gpa))
- return false;
-
- /* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) =
- vmcs_revision() | 0x80000000ul;
- if (vmclear(vmx->shadow_vmcs_gpa))
- return false;
- } else {
- if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
- vmx->enlightened_vmcs))
- return false;
- current_evmcs->revision_id = EVMCS_VERSION;
- }
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
@@ -215,9 +198,9 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
if (vmx->eptp_gpa) {
uint64_t ept_paddr;
struct eptPageTablePointer eptp = {
- .memory_type = VMX_BASIC_MEM_TYPE_WB,
+ .memory_type = X86_MEMTYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@@ -379,101 +362,93 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-bool nested_vmx_supported(void)
-{
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
-
- return entry->ecx & CPUID_VMX;
-}
-
-void nested_vmx_check_supported(void)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
{
- if (!nested_vmx_supported()) {
- print_skip("nested VMX not enabled");
- exit(KSFT_SKIP);
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@@ -484,7 +459,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
@@ -493,28 +468,34 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,24 +511,41 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+ uint64_t ctrl;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+ if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return false;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
deleted file mode 100644
index de0c76177d02..000000000000
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ /dev/null
@@ -1,1370 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-#include "processor.h"
-
-#ifndef NUM_INTERRUPTS
-#define NUM_INTERRUPTS 256
-#endif
-
-#define DEFAULT_CODE_SELECTOR 0x8
-#define DEFAULT_DATA_SELECTOR 0x10
-
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-vm_vaddr_t exception_handlers;
-
-/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-void regs_dump(FILE *stream, struct kvm_regs *regs,
- uint8_t indent)
-{
- fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
- "rcx: 0x%.16llx rdx: 0x%.16llx\n",
- indent, "",
- regs->rax, regs->rbx, regs->rcx, regs->rdx);
- fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
- "rsp: 0x%.16llx rbp: 0x%.16llx\n",
- indent, "",
- regs->rsi, regs->rdi, regs->rsp, regs->rbp);
- fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
- "r10: 0x%.16llx r11: 0x%.16llx\n",
- indent, "",
- regs->r8, regs->r9, regs->r10, regs->r11);
- fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
- "r14: 0x%.16llx r15: 0x%.16llx\n",
- indent, "",
- regs->r12, regs->r13, regs->r14, regs->r15);
- fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
- indent, "",
- regs->rip, regs->rflags);
-}
-
-/*
- * Segment Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * segment - KVM segment
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM segment given by @segment, to the FILE stream
- * given by @stream.
- */
-static void segment_dump(FILE *stream, struct kvm_segment *segment,
- uint8_t indent)
-{
- fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
- "selector: 0x%.4x type: 0x%.2x\n",
- indent, "", segment->base, segment->limit,
- segment->selector, segment->type);
- fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
- "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
- indent, "", segment->present, segment->dpl,
- segment->db, segment->s, segment->l);
- fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
- "unusable: 0x%.2x padding: 0x%.2x\n",
- indent, "", segment->g, segment->avl,
- segment->unusable, segment->padding);
-}
-
-/*
- * dtable Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * dtable - KVM dtable
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
- * given by @stream.
- */
-static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
- uint8_t indent)
-{
- fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
- "padding: 0x%.4x 0x%.4x 0x%.4x\n",
- indent, "", dtable->base, dtable->limit,
- dtable->padding[0], dtable->padding[1], dtable->padding[2]);
-}
-
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
- uint8_t indent)
-{
- unsigned int i;
-
- fprintf(stream, "%*scs:\n", indent, "");
- segment_dump(stream, &sregs->cs, indent + 2);
- fprintf(stream, "%*sds:\n", indent, "");
- segment_dump(stream, &sregs->ds, indent + 2);
- fprintf(stream, "%*ses:\n", indent, "");
- segment_dump(stream, &sregs->es, indent + 2);
- fprintf(stream, "%*sfs:\n", indent, "");
- segment_dump(stream, &sregs->fs, indent + 2);
- fprintf(stream, "%*sgs:\n", indent, "");
- segment_dump(stream, &sregs->gs, indent + 2);
- fprintf(stream, "%*sss:\n", indent, "");
- segment_dump(stream, &sregs->ss, indent + 2);
- fprintf(stream, "%*str:\n", indent, "");
- segment_dump(stream, &sregs->tr, indent + 2);
- fprintf(stream, "%*sldt:\n", indent, "");
- segment_dump(stream, &sregs->ldt, indent + 2);
-
- fprintf(stream, "%*sgdt:\n", indent, "");
- dtable_dump(stream, &sregs->gdt, indent + 2);
- fprintf(stream, "%*sidt:\n", indent, "");
- dtable_dump(stream, &sregs->idt, indent + 2);
-
- fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
- "cr3: 0x%.16llx cr4: 0x%.16llx\n",
- indent, "",
- sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
- fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
- "apic_base: 0x%.16llx\n",
- indent, "",
- sregs->cr8, sregs->efer, sregs->apic_base);
-
- fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
- for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
- fprintf(stream, "%*s%.16llx\n", indent + 2, "",
- sregs->interrupt_bitmap[i]);
- }
-}
-
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
-{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- /* If needed, create page map l4 table. */
- if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
- vm->pgd_created = true;
- }
-}
-
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
-{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx",
- vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- index[0] = (vaddr >> 12) & 0x1ffu;
- index[1] = (vaddr >> 21) & 0x1ffu;
- index[2] = (vaddr >> 30) & 0x1ffu;
- index[3] = (vaddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
-
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
-
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
-
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
-}
-
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
-
- if (!vm->pgd_created)
- return;
-
- fprintf(stream, "%*s "
- " no\n", indent, "");
- fprintf(stream, "%*s index hvaddr gpaddr "
- "addr w exec dirty\n",
- indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
- for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
- pml4e = &pml4e_start[n1];
- if (!pml4e->present)
- continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
- " %u\n",
- indent, "",
- pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
- pml4e->writable, pml4e->execute_disable);
-
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
- for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
- pdpe = &pdpe_start[n2];
- if (!pdpe->present)
- continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
- "%u %u\n",
- indent, "",
- pdpe - pdpe_start, pdpe,
- addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
- pdpe->execute_disable);
-
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
- for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
- pde = &pde_start[n3];
- if (!pde->present)
- continue;
- fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
- indent, "", pde - pde_start, pde,
- addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
- pde->execute_disable);
-
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
- for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
- pte = &pte_start[n4];
- if (!pte->present)
- continue;
- fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
- " %u 0x%-10lx\n",
- indent, "",
- pte - pte_start, pte,
- addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
- ((uint64_t) n1 << 27)
- | ((uint64_t) n2 << 18)
- | ((uint64_t) n3 << 9)
- | ((uint64_t) n4));
- }
- }
- }
- }
-}
-
-/*
- * Set Unusable Segment
- *
- * Input Args: None
- *
- * Output Args:
- * segp - Pointer to segment register
- *
- * Return: None
- *
- * Sets the segment register pointed to by @segp to an unusable state.
- */
-static void kvm_seg_set_unusable(struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->unusable = true;
-}
-
-static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
-{
- void *gdt = addr_gva2hva(vm, vm->gdt);
- struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
-
- desc->limit0 = segp->limit & 0xFFFF;
- desc->base0 = segp->base & 0xFFFF;
- desc->base1 = segp->base >> 16;
- desc->type = segp->type;
- desc->s = segp->s;
- desc->dpl = segp->dpl;
- desc->p = segp->present;
- desc->limit1 = segp->limit >> 16;
- desc->avl = segp->avl;
- desc->l = segp->l;
- desc->db = segp->db;
- desc->g = segp->g;
- desc->base2 = segp->base >> 24;
- if (!segp->s)
- desc->base3 = segp->base >> 32;
-}
-
-
-/*
- * Set Long Mode Flat Kernel Code Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a code segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
- segp->limit = 0xFFFFFFFFu;
- segp->s = 0x1; /* kTypeCodeData */
- segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
- * | kFlagCodeReadable
- */
- segp->g = true;
- segp->l = true;
- segp->present = 1;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
-}
-
-/*
- * Set Long Mode Flat Kernel Data Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a data segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
-{
- memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
- segp->limit = 0xFFFFFFFFu;
- segp->s = 0x1; /* kTypeCodeData */
- segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
- * | kFlagDataWritable
- */
- segp->g = true;
- segp->present = true;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
-}
-
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
- struct pageTableEntry *pte;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- index[0] = (gva >> 12) & 0x1ffu;
- index[1] = (gva >> 21) & 0x1ffu;
- index[2] = (gva >> 30) & 0x1ffu;
- index[3] = (gva >> 39) & 0x1ffu;
-
- if (!vm->pgd_created)
- goto unmapped_gva;
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
- goto unmapped_gva;
-
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present)
- goto unmapped_gva;
-
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present)
- goto unmapped_gva;
-
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- if (!pte[index[0]].present)
- goto unmapped_gva;
-
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
-
-unmapped_gva:
- TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(EXIT_FAILURE);
-}
-
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
-{
- if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
-
- dt->base = vm->gdt;
- dt->limit = getpagesize();
-}
-
-static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
-{
- if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
-
- memset(segp, 0, sizeof(*segp));
- segp->base = vm->tss;
- segp->limit = 0x67;
- segp->selector = selector;
- segp->type = 0xb;
- segp->present = 1;
- kvm_seg_fill_gdt_64bit(vm, segp);
-}
-
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
-{
- struct kvm_sregs sregs;
-
- /* Set mode specific system register values. */
- vcpu_sregs_get(vm, vcpuid, &sregs);
-
- sregs.idt.limit = 0;
-
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
-
- switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
- sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
- sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
- sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
-
- kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
- break;
-
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
- }
-
- sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vm, vcpuid, &sregs);
-}
-
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
-{
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- vm_vaddr_t stack_vaddr;
- stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
-
- /* Create VCPU */
- vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
-
- /* Setup guest general purpose registers */
- vcpu_regs_get(vm, vcpuid, &regs);
- regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
- regs.rip = (unsigned long) guest_code;
- vcpu_regs_set(vm, vcpuid, &regs);
-
- /* Setup the MP state */
- mp_state.mp_state = 0;
- vcpu_set_mp_state(vm, vcpuid, &mp_state);
-}
-
-/*
- * Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
- struct kvm_cpuid2 *cpuid;
- int nent = 100;
- size_t size;
-
- size = sizeof(*cpuid);
- size += nent * sizeof(struct kvm_cpuid_entry2);
- cpuid = malloc(size);
- if (!cpuid) {
- perror("malloc");
- abort();
- }
-
- cpuid->nent = nent;
-
- return cpuid;
-}
-
-/*
- * KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
- static struct kvm_cpuid2 *cpuid;
- int ret;
- int kvm_fd;
-
- if (cpuid)
- return cpuid;
-
- cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
- ret, errno);
-
- close(kvm_fd);
- return cpuid;
-}
-
-/*
- * KVM Get MSR
- *
- * Input Args:
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t kvm_get_feature_msr(uint64_t msr_index)
-{
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r, kvm_fd;
-
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
- r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
-
- close(kvm_fd);
- return buffer.entry.data;
-}
-
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- *
- * Output Args: None
- *
- * Return: KVM CPUID (KVM_GET_CPUID2)
- *
- * Set the VCPU's CPUID.
- */
-struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_cpuid2 *cpuid;
- int rc, max_ent;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- cpuid = allocate_kvm_cpuid2();
- max_ent = cpuid->nent;
-
- for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) {
- rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid);
- if (!rc)
- break;
-
- TEST_ASSERT(rc == -1 && errno == E2BIG,
- "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d",
- rc, errno);
- }
-
- TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
-
- return cpuid;
-}
-
-
-
-/*
- * Locate a cpuid entry.
- *
- * Input Args:
- * function: The function of the cpuid entry to find.
- * index: The index of the cpuid entry.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
-{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry = NULL;
- int i;
-
- cpuid = kvm_get_supported_cpuid();
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index) {
- entry = &cpuid->entries[i];
- break;
- }
- }
-
- TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
- function, index);
- return entry;
-}
-
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- * cpuid - The CPUID values to set.
- *
- * Output Args: None
- *
- * Return: void
- *
- * Set the VCPU's CPUID.
- */
-void vcpu_set_cpuid(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int rc;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
- TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
-
-}
-
-/*
- * VCPU Get MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
-
- return buffer.entry.data;
-}
-
-/*
- * _VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: The result of KVM_SET_MSRS.
- *
- * Sets the value of an MSR for the given VCPU.
- */
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
- } buffer = {};
- int r;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
- memset(&buffer, 0, sizeof(buffer));
- buffer.header.nmsrs = 1;
- buffer.entry.index = msr_index;
- buffer.entry.data = msr_value;
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
- return r;
-}
-
-/*
- * VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r;
-
- r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
- TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
-}
-
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
- va_list ap;
- struct kvm_regs regs;
-
- TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u\n",
- num);
-
- va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
-
- if (num >= 1)
- regs.rdi = va_arg(ap, uint64_t);
-
- if (num >= 2)
- regs.rsi = va_arg(ap, uint64_t);
-
- if (num >= 3)
- regs.rdx = va_arg(ap, uint64_t);
-
- if (num >= 4)
- regs.rcx = va_arg(ap, uint64_t);
-
- if (num >= 5)
- regs.r8 = va_arg(ap, uint64_t);
-
- if (num >= 6)
- regs.r9 = va_arg(ap, uint64_t);
-
- vcpu_regs_set(vm, vcpuid, &regs);
- va_end(ap);
-}
-
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
-{
- struct kvm_regs regs;
- struct kvm_sregs sregs;
-
- fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
-
- fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vm, vcpuid, &regs);
- regs_dump(stream, &regs, indent + 4);
-
- fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vm, vcpuid, &sregs);
- sregs_dump(stream, &sregs, indent + 4);
-}
-
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
-static int kvm_get_num_msrs_fd(int kvm_fd)
-{
- struct kvm_msr_list nmsrs;
- int r;
-
- nmsrs.nmsrs = 0;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
- TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
- r);
-
- return nmsrs.nmsrs;
-}
-
-static int kvm_get_num_msrs(struct kvm_vm *vm)
-{
- return kvm_get_num_msrs_fd(vm->kvm_fd);
-}
-
-struct kvm_msr_list *kvm_get_msr_index_list(void)
-{
- struct kvm_msr_list *list;
- int nmsrs, r, kvm_fd;
-
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
- nmsrs = kvm_get_num_msrs_fd(kvm_fd);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- close(kvm_fd);
-
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
-
- return list;
-}
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_msr_list *list;
- struct kvm_x86_state *state;
- int nmsrs, r, i;
- static int nested_size = -1;
-
- if (nested_size == -1) {
- nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
- TEST_ASSERT(nested_size <= sizeof(state->nested_),
- "Nested state size too big, %i > %zi",
- nested_size, sizeof(state->nested_));
- }
-
- /*
- * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
- * guest state is consistent only after userspace re-enters the
- * kernel with KVM_RUN. Complete IO prior to migrating state
- * to a new VM.
- */
- vcpu_run_complete_io(vm, vcpuid);
-
- nmsrs = kvm_get_num_msrs(vm);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
-
- state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
- r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
- r);
- }
-
- r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
-
- if (nested_size) {
- state->nested.size = sizeof(state->nested_);
- r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
- TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
- } else
- state->nested.size = 0;
-
- state->msrs.nmsrs = nmsrs;
- for (i = 0; i < nmsrs; i++)
- state->msrs.entries[i].index = list->indices[i];
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
-
- r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
-
- free(list);
- return state;
-}
-
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int r;
-
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
- r);
- }
-
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
-
- r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
-
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
- }
-}
-
-bool is_intel_cpu(void)
-{
- int eax, ebx, ecx, edx;
- const uint32_t *chunk;
- const int leaf = 0;
-
- __asm__ __volatile__(
- "cpuid"
- : /* output */ "=a"(eax), "=b"(ebx),
- "=c"(ecx), "=d"(edx)
- : /* input */ "0"(leaf), "2"(0));
-
- chunk = (const uint32_t *)("GenuineIntel");
- return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
-}
-
-uint32_t kvm_get_cpuid_max_basic(void)
-{
- return kvm_get_supported_cpuid_entry(0)->eax;
-}
-
-uint32_t kvm_get_cpuid_max_extended(void)
-{
- return kvm_get_supported_cpuid_entry(0x80000000)->eax;
-}
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
-{
- struct kvm_cpuid_entry2 *entry;
- bool pae;
-
- /* SDM 4.1.4 */
- if (kvm_get_cpuid_max_extended() < 0x80000008) {
- pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
- *pa_bits = pae ? 36 : 32;
- *va_bits = 32;
- } else {
- entry = kvm_get_supported_cpuid_entry(0x80000008);
- *pa_bits = entry->eax & 0xff;
- *va_bits = (entry->eax >> 8) & 0xff;
- }
-}
-
-struct idt_entry {
- uint16_t offset0;
- uint16_t selector;
- uint16_t ist : 3;
- uint16_t : 5;
- uint16_t type : 4;
- uint16_t : 1;
- uint16_t dpl : 2;
- uint16_t p : 1;
- uint16_t offset1;
- uint32_t offset2; uint32_t reserved;
-};
-
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
- int dpl, unsigned short selector)
-{
- struct idt_entry *base =
- (struct idt_entry *)addr_gva2hva(vm, vm->idt);
- struct idt_entry *e = &base[vector];
-
- memset(e, 0, sizeof(*e));
- e->offset0 = addr;
- e->selector = selector;
- e->ist = 0;
- e->type = 14;
- e->dpl = dpl;
- e->p = 1;
- e->offset1 = addr >> 16;
- e->offset2 = addr >> 32;
-}
-
-void kvm_exit_unexpected_vector(uint32_t value)
-{
- outl(UNEXPECTED_VECTOR_PORT, value);
-}
-
-void route_exception(struct ex_regs *regs)
-{
- typedef void(*handler)(struct ex_regs *);
- handler *handlers = (handler *)exception_handlers;
-
- if (handlers && handlers[regs->vector]) {
- handlers[regs->vector](regs);
- return;
- }
-
- kvm_exit_unexpected_vector(regs->vector);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
- extern void *idt_handlers;
- int i;
-
- vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
- vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
- /* Handlers have the same address in both address spaces.*/
- for (i = 0; i < NUM_INTERRUPTS; i++)
- set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
- DEFAULT_CODE_SELECTOR);
-}
-
-void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct kvm_sregs sregs;
-
- vcpu_sregs_get(vm, vcpuid, &sregs);
- sregs.idt.base = vm->idt;
- sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
- sregs.gdt.base = vm->gdt;
- sregs.gdt.limit = getpagesize() - 1;
- kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
- vcpu_sregs_set(vm, vcpuid, &sregs);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_handle_exception(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
-{
- vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
- handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
-{
- if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
- && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
- && vcpu_state(vm, vcpuid)->io.size == 4) {
- /* Grab pointer to io data */
- uint32_t *data = (void *)vcpu_state(vm, vcpuid)
- + vcpu_state(vm, vcpuid)->io.data_offset;
-
- TEST_ASSERT(false,
- "Unexpected vectored event in guest (vector:0x%x)",
- *data);
- }
-}
-
-bool set_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 *ent)
-{
- int i;
-
- for (i = 0; i < cpuid->nent; i++) {
- struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
-
- if (cur->function != ent->function || cur->index != ent->index)
- continue;
-
- memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
- return true;
- }
-
- return false;
-}
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3)
-{
- uint64_t r;
-
- asm volatile("vmcall"
- : "=a"(r)
- : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
- return r;
-}
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
-{
- static struct kvm_cpuid2 *cpuid;
- int ret;
- int kvm_fd;
-
- if (cpuid)
- return cpuid;
-
- cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
-
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
- ret, errno);
-
- close(kvm_fd);
- return cpuid;
-}
-
-void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
-{
- static struct kvm_cpuid2 *cpuid_full;
- struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
- int i, nent = 0;
-
- if (!cpuid_full) {
- cpuid_sys = kvm_get_supported_cpuid();
- cpuid_hv = kvm_get_supported_hv_cpuid();
-
- cpuid_full = malloc(sizeof(*cpuid_full) +
- (cpuid_sys->nent + cpuid_hv->nent) *
- sizeof(struct kvm_cpuid_entry2));
- if (!cpuid_full) {
- perror("malloc");
- abort();
- }
-
- /* Need to skip KVM CPUID leaves 0x400000xx */
- for (i = 0; i < cpuid_sys->nent; i++) {
- if (cpuid_sys->entries[i].function >= 0x40000000 &&
- cpuid_sys->entries[i].function < 0x40000100)
- continue;
- cpuid_full->entries[nent] = cpuid_sys->entries[i];
- nent++;
- }
-
- memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
- cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
- cpuid_full->nent = nent + cpuid_hv->nent;
- }
-
- vcpu_set_cpuid(vm, vcpuid, cpuid_full);
-}
-
-struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
-{
- static struct kvm_cpuid2 *cpuid;
-
- cpuid = allocate_kvm_cpuid2();
-
- vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
- return cpuid;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
deleted file mode 100644
index a3489973e290..000000000000
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-void ucall_init(struct kvm_vm *vm, void *arg)
-{
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- asm volatile("in %[port], %%al"
- : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
-
- if (uc)
- memset(uc, 0, sizeof(*uc));
-
- if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
- struct kvm_regs regs;
-
- vcpu_regs_get(vm, vcpu_id, &regs);
- memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
- }
-
- return ucall.cmd;
-}
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..355ecac30954
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ uint32_t cpu = guest_get_vcpuid();
+ uint64_t xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(uint32_t cpu)
+{
+ uint32_t config_iter;
+ uint64_t xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(uint32_t cpu)
+{
+ uint32_t config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 6096bf0a5b34..3cdfa3b19b85 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -6,9 +6,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2020, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
@@ -21,10 +18,11 @@
#include <linux/bitops.h>
#include <linux/userfaultfd.h>
-#include "perf_test_util.h"
+#include "memstress.h"
#include "processor.h"
#include "test_util.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define DUMMY_MEMSLOT_INDEX 7
@@ -34,120 +32,96 @@
static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-static bool run_vcpus = true;
-
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
- int ret;
- struct perf_test_vcpu_args *vcpu_args =
- (struct perf_test_vcpu_args *)data;
- int vcpu_id = vcpu_args->vcpu_id;
- struct kvm_vm *vm = perf_test_args.vm;
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
struct kvm_run *run;
+ int ret;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
- run = vcpu_state(vm, vcpu_id);
+ run = vcpu->run;
/* Let the guest access its memory until a stop signal is received */
- while (READ_ONCE(run_vcpus)) {
- ret = _vcpu_run(vm, vcpu_id);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ while (!READ_ONCE(memstress_args.stop_vcpus)) {
+ ret = _vcpu_run(vcpu);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
- if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC)
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC)
continue;
TEST_ASSERT(false,
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
}
-
- return NULL;
}
-struct memslot_antagonist_args {
- struct kvm_vm *vm;
- useconds_t delay;
- uint64_t nr_modifications;
-};
-
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
- uint64_t nr_modifications, uint64_t gpa)
+ uint64_t nr_modifications)
{
+ uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size;
+ uint64_t gpa;
int i;
+ /*
+ * Add the dummy memslot just below the memstress memslot, which is
+ * at the top of the guest physical address space.
+ */
+ gpa = memstress_args.gpa - pages * vm->page_size;
+
for (i = 0; i < nr_modifications; i++) {
usleep(delay);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
- DUMMY_MEMSLOT_INDEX, 1, 0);
+ DUMMY_MEMSLOT_INDEX, pages, 0);
vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
}
}
struct test_params {
- useconds_t memslot_modification_delay;
- uint64_t nr_memslot_modifications;
+ useconds_t delay;
+ uint64_t nr_iterations;
bool partition_vcpu_memory_access;
+ bool disable_slot_zap_quirk;
};
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
- pthread_t *vcpu_threads;
struct kvm_vm *vm;
- int vcpu_id;
-
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- VM_MEM_SRC_ANONYMOUS);
-
- perf_test_args.wr_fract = 1;
-
- vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
- perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
- p->partition_vcpu_memory_access);
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+ VM_MEM_SRC_ANONYMOUS,
+ p->partition_vcpu_memory_access);
+#ifdef __x86_64__
+ if (p->disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
- /* Export the shared variables to the guest */
- sync_global_to_guest(vm, perf_test_args);
+ pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ?
+ "disabled" : "enabled");
+#endif
pr_info("Finished creating vCPUs\n");
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &perf_test_args.vcpu_args[vcpu_id]);
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
pr_info("Started all vCPUs\n");
- add_remove_memslot(vm, p->memslot_modification_delay,
- p->nr_memslot_modifications,
- guest_test_phys_mem +
- (guest_percpu_mem_size * nr_vcpus) +
- perf_test_args.host_page_size +
- perf_test_args.guest_page_size);
-
- run_vcpus = false;
-
- /* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
- pthread_join(vcpu_threads[vcpu_id], NULL);
+ add_remove_memslot(vm, p->delay, p->nr_iterations);
+ memstress_join_vcpu_threads(nr_vcpus);
pr_info("All vCPU threads joined\n");
- ucall_uninit(vm);
- kvm_vm_free(vm);
-
- free(vcpu_threads);
+ memstress_destroy_vm(vm);
}
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m mode] [-d delay_usec]\n"
+ printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n"
" [-b memory] [-v vcpus] [-o] [-i iterations]\n", name);
guest_modes_help();
printf(" -d: add a delay between each iteration of adding and\n"
" deleting a memslot in usec.\n");
+ printf(" -q: Disable memslot zap quirk.\n");
printf(" -b: specify the size of the memory region which should be\n"
" accessed by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
@@ -166,30 +140,27 @@ int main(int argc, char *argv[])
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
int opt;
struct test_params p = {
- .memslot_modification_delay = 0,
- .nr_memslot_modifications =
- DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
+ .delay = 0,
+ .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
.partition_vcpu_memory_access = true
};
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
break;
case 'd':
- p.memslot_modification_delay = strtoul(optarg, NULL, 0);
- TEST_ASSERT(p.memslot_modification_delay >= 0,
- "A negative delay is not supported.");
+ p.delay = atoi_non_negative("Delay", optarg);
break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
case 'v':
- nr_vcpus = atoi(optarg);
- TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
"Invalid number of vcpus, must be between 1 and %d",
max_vcpus);
break;
@@ -197,8 +168,16 @@ int main(int argc, char *argv[])
p.partition_vcpu_memory_access = false;
break;
case 'i':
- p.nr_memslot_modifications = atoi(optarg);
+ p.nr_iterations = atoi_positive("Number of iterations", optarg);
+ break;
+#ifdef __x86_64__
+ case 'q':
+ p.disable_slot_zap_quirk = true;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
+ KVM_X86_QUIRK_SLOT_ZAP_ALL);
break;
+#endif
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
new file mode 100644
index 000000000000..5087d082c4b0
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -0,0 +1,1146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A memslot-related performance benchmark.
+ *
+ * Copyright (C) 2021 Oracle and/or its affiliates.
+ *
+ * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
+ */
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/compiler.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <ucall_common.h>
+
+#define MEM_EXTRA_SIZE SZ_64K
+
+#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE)
+#define MEM_GPA SZ_256M
+#define MEM_AUX_GPA MEM_GPA
+#define MEM_SYNC_GPA MEM_AUX_GPA
+#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE)
+#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
+
+/*
+ * 32 MiB is max size that gets well over 100 iterations on 509 slots.
+ * Considering that each slot needs to have at least one page up to
+ * 8194 slots in use can then be tested (although with slightly
+ * limited resolution).
+ */
+#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE)
+#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
+
+/*
+ * 128 MiB is min size that fills 32k slots with at least one page in each
+ * while at the same time gets 100+ iterations in such test
+ *
+ * 2 MiB chunk size like a typical huge page
+ */
+#define MEM_TEST_UNMAP_SIZE SZ_128M
+#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
+
+/*
+ * For the move active test the middle of the test area is placed on
+ * a memslot boundary: half lies in the memslot being moved, half in
+ * other memslot(s).
+ *
+ * We have different number of memory slots, excluding the reserved
+ * memory slot 0, on various architectures and configurations. The
+ * memory size in this test is calculated by picking the maximal
+ * last memory slot's memory size, with alignment to the largest
+ * supported page size (64KB). In this way, the selected memory
+ * size for this test is compatible with test_memslot_move_prepare().
+ *
+ * architecture slots memory-per-slot memory-on-last-slot
+ * --------------------------------------------------------------
+ * x86-4KB 32763 16KB 160KB
+ * arm64-4KB 32766 16KB 112KB
+ * arm64-16KB 32766 16KB 112KB
+ * arm64-64KB 8192 64KB 128KB
+ */
+#define MEM_TEST_MOVE_SIZE (3 * SZ_64K)
+#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
+static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
+ "invalid move test region size");
+
+#define MEM_TEST_VAL_1 0x1122334455667788
+#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
+
+struct vm_data {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vcpu_thread;
+ uint32_t nslots;
+ uint64_t npages;
+ uint64_t pages_per_slot;
+ void **hva_slots;
+ bool mmio_ok;
+ uint64_t mmio_gpa_min;
+ uint64_t mmio_gpa_max;
+};
+
+struct sync_area {
+ uint32_t guest_page_size;
+ atomic_bool start_flag;
+ atomic_bool exit_flag;
+ atomic_bool sync_flag;
+ void *move_area_ptr;
+};
+
+/*
+ * Technically, we need also for the atomic bool to be address-free, which
+ * is recommended, but not strictly required, by C11 for lockless
+ * implementations.
+ * However, in practice both GCC and Clang fulfill this requirement on
+ * all KVM-supported platforms.
+ */
+static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+
+static sem_t vcpu_ready;
+
+static bool map_unmap_verify;
+#ifdef __x86_64__
+static bool disable_slot_zap_quirk;
+#endif
+
+static bool verbose;
+#define pr_info_v(...) \
+ do { \
+ if (verbose) \
+ pr_info(__VA_ARGS__); \
+ } while (0)
+
+static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
+{
+ TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+ TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
+ run->mmio.phys_addr <= data->mmio_gpa_max,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+}
+
+static void *vcpu_worker(void *__data)
+{
+ struct vm_data *data = __data;
+ struct kvm_vcpu *vcpu = data->vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == 0,
+ "Unexpected sync ucall, got %lx",
+ (ulong)uc.args[1]);
+ sem_post(&vcpu_ready);
+ continue;
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ check_mmio_access(data, run);
+ else
+ goto done;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d", errno);
+}
+
+static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+{
+ uint64_t gpage, pgoffs;
+ uint32_t slot, slotoffs;
+ void *base;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
+ TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
+ "Too high gpa to translate");
+ gpa -= MEM_GPA;
+
+ gpage = gpa / guest_page_size;
+ pgoffs = gpa % guest_page_size;
+ slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+ slotoffs = gpage - (slot * data->pages_per_slot);
+
+ if (rempages) {
+ uint64_t slotpages;
+
+ if (slot == data->nslots - 1)
+ slotpages = data->npages - slot * data->pages_per_slot;
+ else
+ slotpages = data->pages_per_slot;
+
+ TEST_ASSERT(!pgoffs,
+ "Asking for remaining pages in slot but gpa not page aligned");
+ *rempages = slotpages - slotoffs;
+ }
+
+ base = data->hva_slots[slot];
+ return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
+}
+
+static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+
+ TEST_ASSERT(slot < data->nslots, "Too high slot number");
+
+ return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
+}
+
+static struct vm_data *alloc_vm(void)
+{
+ struct vm_data *data;
+
+ data = malloc(sizeof(*data));
+ TEST_ASSERT(data, "malloc(vmdata) failed");
+
+ data->vm = NULL;
+ data->vcpu = NULL;
+ data->hva_slots = NULL;
+
+ return data;
+}
+
+static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
+ uint64_t pages_per_slot, uint64_t rempages)
+{
+ if (!pages_per_slot)
+ return false;
+
+ if ((pages_per_slot * guest_page_size) % host_page_size)
+ return false;
+
+ if ((rempages * guest_page_size) % host_page_size)
+ return false;
+
+ return true;
+}
+
+
+static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t mempages, pages_per_slot, rempages;
+ uint64_t slots;
+
+ mempages = data->npages;
+ slots = data->nslots;
+ while (--slots > 1) {
+ pages_per_slot = mempages / slots;
+ if (!pages_per_slot)
+ continue;
+
+ rempages = mempages % pages_per_slot;
+ if (check_slot_pages(host_page_size, guest_page_size,
+ pages_per_slot, rempages))
+ return slots + 1; /* slot 0 is reserved */
+ }
+
+ return 0;
+}
+
+static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
+ void *guest_code, uint64_t mem_size,
+ struct timespec *slot_runtime)
+{
+ uint64_t mempages, rempages;
+ uint64_t guest_addr;
+ uint32_t slot, host_page_size, guest_page_size;
+ struct timespec tstart;
+ struct sync_area *sync;
+
+ host_page_size = getpagesize();
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ mempages = mem_size / guest_page_size;
+
+ data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
+ TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
+
+ data->npages = mempages;
+ TEST_ASSERT(data->npages > 1, "Can't test without any memory");
+ data->nslots = nslots;
+ data->pages_per_slot = data->npages / data->nslots;
+ rempages = data->npages % data->nslots;
+ if (!check_slot_pages(host_page_size, guest_page_size,
+ data->pages_per_slot, rempages)) {
+ *maxslots = get_max_slots(data, host_page_size);
+ return false;
+ }
+
+ data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
+ TEST_ASSERT(data->hva_slots, "malloc() fail");
+
+ pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
+ data->nslots, data->pages_per_slot, rempages);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
+ uint64_t npages;
+
+ npages = data->pages_per_slot;
+ if (slot == data->nslots)
+ npages += rempages;
+
+ vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, npages,
+ 0);
+ guest_addr += npages * guest_page_size;
+ }
+ *slot_runtime = timespec_elapsed(tstart);
+
+ for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
+ uint64_t npages;
+ uint64_t gpa;
+
+ npages = data->pages_per_slot;
+ if (slot == data->nslots)
+ npages += rempages;
+
+ gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
+ TEST_ASSERT(gpa == guest_addr,
+ "vm_phy_pages_alloc() failed");
+
+ data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
+ memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
+
+ guest_addr += npages * guest_page_size;
+ }
+
+ virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ sync->guest_page_size = data->vm->page_size;
+ atomic_init(&sync->start_flag, false);
+ atomic_init(&sync->exit_flag, false);
+ atomic_init(&sync->sync_flag, false);
+
+ data->mmio_ok = false;
+
+ return true;
+}
+
+static void launch_vm(struct vm_data *data)
+{
+ pr_info_v("Launching the test VM\n");
+
+ pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+}
+
+static void free_vm(struct vm_data *data)
+{
+ kvm_vm_free(data->vm);
+ free(data->hva_slots);
+ free(data);
+}
+
+static void wait_guest_exit(struct vm_data *data)
+{
+ pthread_join(data->vcpu_thread, NULL);
+}
+
+static void let_guest_run(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->start_flag, true, memory_order_release);
+}
+
+static void guest_spin_until_start(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
+ ;
+}
+
+static void make_guest_exit(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
+}
+
+static bool _guest_should_exit(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
+}
+
+#define guest_should_exit() unlikely(_guest_should_exit())
+
+/*
+ * noinline so we can easily see how much time the host spends waiting
+ * for the guest.
+ * For the same reason use alarm() instead of polling clock_gettime()
+ * to implement a wait timeout.
+ */
+static noinline void host_perform_sync(struct sync_area *sync)
+{
+ alarm(10);
+
+ atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
+ while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
+ ;
+
+ alarm(0);
+}
+
+static bool guest_perform_sync(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ bool expected;
+
+ do {
+ if (guest_should_exit())
+ return false;
+
+ expected = true;
+ } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
+ &expected, false,
+ memory_order_acq_rel,
+ memory_order_relaxed));
+
+ return true;
+}
+
+static void guest_code_test_memslot_move(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+ uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (!guest_should_exit()) {
+ uintptr_t ptr;
+
+ for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ /*
+ * No host sync here since the MMIO exits are so expensive
+ * that the host would spend most of its time waiting for
+ * the guest and so instead of measuring memslot move
+ * performance we would measure the performance and
+ * likelihood of MMIO exits
+ */
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_map(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_unmap(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr = MEM_TEST_GPA;
+
+ /*
+ * We can afford to access (map) just a small number of pages
+ * per host sync as otherwise the host will spend
+ * a significant amount of its time waiting for the guest
+ * (instead of doing unmap operations), so this will
+ * effectively turn this test into a map performance test.
+ *
+ * Just access a single page to be on the safe side.
+ */
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ ptr += MEM_TEST_UNMAP_SIZE / 2;
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_rw(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + page_size / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
+ uint64_t val = *(uint64_t *)ptr;
+
+ GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
+ *(uint64_t *)ptr = 0;
+ }
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static bool test_memslot_move_prepare(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots, bool isactive)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t movesrcgpa, movetestgpa;
+
+#ifdef __x86_64__
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
+#endif
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+
+ if (isactive) {
+ uint64_t lastpages;
+
+ vm_gpa2hva(data, movesrcgpa, &lastpages);
+ if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
+ *maxslots = 0;
+ return false;
+ }
+ }
+
+ movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
+ sync->move_area_ptr = (void *)movetestgpa;
+
+ if (isactive) {
+ data->mmio_ok = true;
+ data->mmio_gpa_min = movesrcgpa;
+ data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
+ }
+
+ return true;
+}
+
+static bool test_memslot_move_prepare_active(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, true);
+}
+
+static bool test_memslot_move_prepare_inactive(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, false);
+}
+
+static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t movesrcgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1,
+ MEM_TEST_MOVE_GPA_DEST);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
+}
+
+static void test_memslot_do_unmap(struct vm_data *data,
+ uint64_t offsp, uint64_t count)
+{
+ uint64_t gpa, ctr;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
+ uint64_t npages;
+ void *hva;
+ int ret;
+
+ hva = vm_gpa2hva(data, gpa, &npages);
+ TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
+ npages = min(npages, count - ctr);
+ ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
+ TEST_ASSERT(!ret,
+ "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
+ hva, gpa);
+ ctr += npages;
+ gpa += npages * guest_page_size;
+ }
+ TEST_ASSERT(ctr == count,
+ "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
+}
+
+static void test_memslot_map_unmap_check(struct vm_data *data,
+ uint64_t offsp, uint64_t valexp)
+{
+ uint64_t gpa;
+ uint64_t *val;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ if (!map_unmap_verify)
+ return;
+
+ gpa = MEM_TEST_GPA + offsp * guest_page_size;
+ val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
+ TEST_ASSERT(*val == valexp,
+ "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
+ *val, valexp, gpa);
+ *val = 0;
+}
+
+static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
+
+ /*
+ * Unmap the second half of the test area while guest writes to (maps)
+ * the first half.
+ */
+ test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
+
+ /*
+ * Wait for the guest to finish writing the first half of the test
+ * area, verify the written value on the first and the last page of
+ * this area and then unmap it.
+ * Meanwhile, the guest is writing to (mapping) the second half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
+ test_memslot_do_unmap(data, 0, guest_pages / 2);
+
+
+ /*
+ * Wait for the guest to finish writing the second half of the test
+ * area and verify the written value on the first and the last page
+ * of this area.
+ * The area will be unmapped at the beginning of the next loop
+ * iteration.
+ * Meanwhile, the guest is writing to (mapping) the first half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+ test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
+}
+
+static void test_memslot_unmap_loop_common(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t chunk)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
+ uint64_t ctr;
+
+ /*
+ * Wait for the guest to finish mapping page(s) in the first half
+ * of the test area, verify the written value and then perform unmap
+ * of this area.
+ * Meanwhile, the guest is writing to (mapping) page(s) in the second
+ * half of the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+
+ /* Likewise, but for the opposite host / guest areas */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+ for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+}
+
+static void test_memslot_unmap_loop(struct vm_data *data,
+ struct sync_area *sync)
+{
+ uint32_t host_page_size = getpagesize();
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
+ 1 : host_page_size / guest_page_size;
+
+ test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
+}
+
+static void test_memslot_unmap_loop_chunked(struct vm_data *data,
+ struct sync_area *sync)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
+
+ test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
+}
+
+static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t gptr;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ for (gptr = MEM_TEST_GPA + guest_page_size / 2;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
+ *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+
+ host_perform_sync(sync);
+
+ for (gptr = MEM_TEST_GPA;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
+ uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+ uint64_t val = *vptr;
+
+ TEST_ASSERT(val == MEM_TEST_VAL_1,
+ "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
+ val, gptr);
+ *vptr = 0;
+ }
+
+ host_perform_sync(sync);
+}
+
+struct test_data {
+ const char *name;
+ uint64_t mem_size;
+ void (*guest_code)(void);
+ bool (*prepare)(struct vm_data *data, struct sync_area *sync,
+ uint64_t *maxslots);
+ void (*loop)(struct vm_data *data, struct sync_area *sync);
+};
+
+static bool test_execute(int nslots, uint64_t *maxslots,
+ unsigned int maxtime,
+ const struct test_data *tdata,
+ uint64_t *nloops,
+ struct timespec *slot_runtime,
+ struct timespec *guest_runtime)
+{
+ uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
+ struct vm_data *data;
+ struct sync_area *sync;
+ struct timespec tstart;
+ bool ret = true;
+
+ data = alloc_vm();
+ if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
+ mem_size, slot_runtime)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ if (tdata->prepare &&
+ !tdata->prepare(data, sync, maxslots)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ launch_vm(data);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ let_guest_run(sync);
+
+ while (1) {
+ *guest_runtime = timespec_elapsed(tstart);
+ if (guest_runtime->tv_sec >= maxtime)
+ break;
+
+ tdata->loop(data, sync);
+
+ (*nloops)++;
+ }
+
+ make_guest_exit(sync);
+ wait_guest_exit(data);
+
+exit_free:
+ free_vm(data);
+
+ return ret;
+}
+
+static const struct test_data tests[] = {
+ {
+ .name = "map",
+ .mem_size = MEM_SIZE_MAP,
+ .guest_code = guest_code_test_memslot_map,
+ .loop = test_memslot_map_loop,
+ },
+ {
+ .name = "unmap",
+ .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop,
+ },
+ {
+ .name = "unmap chunked",
+ .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop_chunked,
+ },
+ {
+ .name = "move active area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_active,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "move inactive area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_inactive,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "RW",
+ .guest_code = guest_code_test_memslot_rw,
+ .loop = test_memslot_rw_loop
+ },
+};
+
+#define NTESTS ARRAY_SIZE(tests)
+
+struct test_args {
+ int tfirst;
+ int tlast;
+ int nslots;
+ int seconds;
+ int runs;
+};
+
+static void help(char *name, struct test_args *targs)
+{
+ int ctr;
+
+ pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+ name);
+ pr_info(" -h: print this help screen.\n");
+ pr_info(" -v: enable verbose mode (not for benchmarking).\n");
+ pr_info(" -d: enable extra debug checks.\n");
+ pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
+ pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
+ targs->nslots);
+ pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
+ targs->tfirst, NTESTS - 1);
+ pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
+ targs->tlast, NTESTS - 1);
+ pr_info(" -l: specify the test length in seconds (currently: %i)\n",
+ targs->seconds);
+ pr_info(" -r: specify the number of runs per test (currently: %i)\n",
+ targs->runs);
+
+ pr_info("\nAvailable tests:\n");
+ for (ctr = 0; ctr < NTESTS; ctr++)
+ pr_info("%d: %s\n", ctr, tests[ctr].name);
+}
+
+static bool check_memory_sizes(void)
+{
+ uint32_t host_page_size = getpagesize();
+ uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+
+ if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
+ pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
+ host_page_size, guest_page_size);
+ return false;
+ }
+
+ if (MEM_SIZE % guest_page_size ||
+ MEM_TEST_SIZE % guest_page_size) {
+ pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
+ return false;
+ }
+
+ if (MEM_SIZE_MAP % guest_page_size ||
+ MEM_TEST_MAP_SIZE % guest_page_size ||
+ (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 ||
+ (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
+ pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
+ return false;
+ }
+
+ if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE ||
+ MEM_TEST_UNMAP_SIZE % guest_page_size ||
+ (MEM_TEST_UNMAP_SIZE / guest_page_size) %
+ (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
+ pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool parse_args(int argc, char *argv[],
+ struct test_args *targs)
+{
+ uint32_t max_mem_slots;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
+ switch (opt) {
+ case 'h':
+ default:
+ help(argv[0], targs);
+ return false;
+ case 'v':
+ verbose = true;
+ break;
+ case 'd':
+ map_unmap_verify = true;
+ break;
+#ifdef __x86_64__
+ case 'q':
+ disable_slot_zap_quirk = true;
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
+ KVM_X86_QUIRK_SLOT_ZAP_ALL);
+ break;
+#endif
+ case 's':
+ targs->nslots = atoi_paranoid(optarg);
+ if (targs->nslots <= 1 && targs->nslots != -1) {
+ pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
+ return false;
+ }
+ break;
+ case 'f':
+ targs->tfirst = atoi_non_negative("First test", optarg);
+ break;
+ case 'e':
+ targs->tlast = atoi_non_negative("Last test", optarg);
+ if (targs->tlast >= NTESTS) {
+ pr_info("Last test to run has to be non-negative and less than %zu\n",
+ NTESTS);
+ return false;
+ }
+ break;
+ case 'l':
+ targs->seconds = atoi_non_negative("Test length", optarg);
+ break;
+ case 'r':
+ targs->runs = atoi_positive("Runs per test", optarg);
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ help(argv[0], targs);
+ return false;
+ }
+
+ if (targs->tfirst > targs->tlast) {
+ pr_info("First test to run cannot be greater than the last test to run\n");
+ return false;
+ }
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ if (max_mem_slots <= 1) {
+ pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
+ return false;
+ }
+
+ /* Memory slot 0 is reserved */
+ if (targs->nslots == -1)
+ targs->nslots = max_mem_slots - 1;
+ else
+ targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
+
+ pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
+ targs->nslots + 1);
+
+ return true;
+}
+
+struct test_result {
+ struct timespec slot_runtime, guest_runtime, iter_runtime;
+ int64_t slottimens, runtimens;
+ uint64_t nloops;
+};
+
+static bool test_loop(const struct test_data *data,
+ const struct test_args *targs,
+ struct test_result *rbestslottime,
+ struct test_result *rbestruntime)
+{
+ uint64_t maxslots;
+ struct test_result result = {};
+
+ if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
+ &result.nloops,
+ &result.slot_runtime, &result.guest_runtime)) {
+ if (maxslots)
+ pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
+ maxslots);
+ else
+ pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
+
+ return false;
+ }
+
+ pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
+ result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
+ result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
+ if (!result.nloops) {
+ pr_info("No full loops done - too short test time or system too loaded?\n");
+ return true;
+ }
+
+ result.iter_runtime = timespec_div(result.guest_runtime,
+ result.nloops);
+ pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
+ result.nloops,
+ result.iter_runtime.tv_sec,
+ result.iter_runtime.tv_nsec);
+ result.slottimens = timespec_to_ns(result.slot_runtime);
+ result.runtimens = timespec_to_ns(result.iter_runtime);
+
+ /*
+ * Only rank the slot setup time for tests using the whole test memory
+ * area so they are comparable
+ */
+ if (!data->mem_size &&
+ (!rbestslottime->slottimens ||
+ result.slottimens < rbestslottime->slottimens))
+ *rbestslottime = result;
+ if (!rbestruntime->runtimens ||
+ result.runtimens < rbestruntime->runtimens)
+ *rbestruntime = result;
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_args targs = {
+ .tfirst = 0,
+ .tlast = NTESTS - 1,
+ .nslots = -1,
+ .seconds = 5,
+ .runs = 1,
+ };
+ struct test_result rbestslottime = {};
+ int tctr;
+
+ if (!check_memory_sizes())
+ return -1;
+
+ if (!parse_args(argc, argv, &targs))
+ return -1;
+
+ for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
+ const struct test_data *data = &tests[tctr];
+ unsigned int runctr;
+ struct test_result rbestruntime = {};
+
+ if (tctr > targs.tfirst)
+ pr_info("\n");
+
+ pr_info("Testing %s performance with %i runs, %d seconds each\n",
+ data->name, targs.runs, targs.seconds);
+
+ for (runctr = 0; runctr < targs.runs; runctr++)
+ if (!test_loop(data, &targs,
+ &rbestslottime, &rbestruntime))
+ break;
+
+ if (rbestruntime.runtimens)
+ pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
+ rbestruntime.iter_runtime.tv_sec,
+ rbestruntime.iter_runtime.tv_nsec,
+ rbestruntime.nloops);
+ }
+
+ if (rbestslottime.slottimens)
+ pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
+ rbestslottime.slot_runtime.tv_sec,
+ rbestslottime.slot_runtime.tv_nsec);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
new file mode 100644
index 000000000000..51c070556f3e
--- /dev/null
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/atomic.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+static bool mprotect_ro_done;
+static bool all_vcpus_hit_ro_fault;
+
+static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
+{
+ uint64_t gpa;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ GUEST_SYNC(i);
+ }
+
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ *((volatile uint64_t *)gpa);
+ GUEST_SYNC(2);
+
+ /*
+ * Write to the region while mprotect(PROT_READ) is underway. Keep
+ * looping until the memory is guaranteed to be read-only and a fault
+ * has occurred, otherwise vCPUs may complete their writes and advance
+ * to the next stage prematurely.
+ *
+ * For architectures that support skipping the faulting instruction,
+ * generate the store via inline assembly to ensure the exact length
+ * of the instruction is known and stable (vcpu_arch_put_guest() on
+ * fixed-length architectures should work, but the cost of paranoia
+ * is low in this case). For x86, hand-code the exact opcode so that
+ * there is no room for variability in the generated instruction.
+ */
+ do {
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+#ifdef __x86_64__
+ asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */
+#elif defined(__aarch64__)
+ asm volatile("str %0, [%0]" :: "r" (gpa) : "memory");
+#else
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+#endif
+ } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
+
+ /*
+ * Only architectures that write the entire range can explicitly sync,
+ * as other architectures will be stuck on the write fault.
+ */
+#if defined(__x86_64__) || defined(__aarch64__)
+ GUEST_SYNC(3);
+#endif
+
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ GUEST_SYNC(4);
+
+ GUEST_ASSERT(0);
+}
+
+struct vcpu_info {
+ struct kvm_vcpu *vcpu;
+ uint64_t start_gpa;
+ uint64_t end_gpa;
+};
+
+static int nr_vcpus;
+static atomic_t rendezvous;
+static atomic_t nr_ro_faults;
+
+static void rendezvous_with_boss(void)
+{
+ int orig = atomic_read(&rendezvous);
+
+ if (orig > 0) {
+ atomic_dec_and_test(&rendezvous);
+ while (atomic_read(&rendezvous) > 0)
+ cpu_relax();
+ } else {
+ atomic_inc(&rendezvous);
+ while (atomic_read(&rendezvous) < 0)
+ cpu_relax();
+ }
+}
+
+static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], stage);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ vcpu_run(vcpu);
+ assert_sync_stage(vcpu, stage);
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct kvm_sregs __maybe_unused sregs;
+ struct vcpu_info *info = data;
+ struct kvm_vcpu *vcpu = info->vcpu;
+ struct kvm_vm *vm = vcpu->vm;
+ int r;
+
+ vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
+
+ rendezvous_with_boss();
+
+ /* Stage 0, write all of guest memory. */
+ run_vcpu(vcpu, 0);
+ rendezvous_with_boss();
+#ifdef __x86_64__
+ vcpu_sregs_get(vcpu, &sregs);
+ /* Toggle CR0.WP to trigger a MMU context reset. */
+ sregs.cr0 ^= X86_CR0_WP;
+ vcpu_sregs_set(vcpu, &sregs);
+#endif
+ rendezvous_with_boss();
+
+ /* Stage 1, re-write all of guest memory. */
+ run_vcpu(vcpu, 1);
+ rendezvous_with_boss();
+
+ /* Stage 2, read all of guest memory, which is now read-only. */
+ run_vcpu(vcpu, 2);
+
+ /*
+ * Stage 3, write guest memory and verify KVM returns -EFAULT for once
+ * the mprotect(PROT_READ) lands. Only architectures that support
+ * validating *all* of guest memory sync for this stage, as vCPUs will
+ * be stuck on the faulting instruction for other architectures. Go to
+ * stage 3 without a rendezvous
+ */
+ r = _vcpu_run(vcpu);
+ TEST_ASSERT(r == -1 && errno == EFAULT,
+ "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
+
+ atomic_inc(&nr_ro_faults);
+ if (atomic_read(&nr_ro_faults) == nr_vcpus) {
+ WRITE_ONCE(all_vcpus_hit_ro_fault, true);
+ sync_global_to_guest(vm, all_vcpus_hit_ro_fault);
+ }
+
+#if defined(__x86_64__) || defined(__aarch64__)
+ /*
+ * Verify *all* writes from the guest hit EFAULT due to the VMA now
+ * being read-only. x86 and arm64 only at this time as skipping the
+ * instruction that hits the EFAULT requires advancing the program
+ * counter, which is arch specific and relies on inline assembly.
+ */
+#ifdef __x86_64__
+ vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS;
+#endif
+ for (;;) {
+ r = _vcpu_run(vcpu);
+ if (!r)
+ break;
+ TEST_ASSERT_EQ(errno, EFAULT);
+#if defined(__x86_64__)
+ WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS);
+ vcpu->run->s.regs.regs.rip += 3;
+#elif defined(__aarch64__)
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc),
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4);
+#endif
+
+ }
+ assert_sync_stage(vcpu, 3);
+#endif /* __x86_64__ || __aarch64__ */
+ rendezvous_with_boss();
+
+ /*
+ * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to
+ * make the memory writable again.
+ */
+ do {
+ r = _vcpu_run(vcpu);
+ } while (r && errno == EFAULT);
+ TEST_ASSERT_EQ(r, 0);
+ assert_sync_stage(vcpu, 4);
+ rendezvous_with_boss();
+
+ return NULL;
+}
+
+static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus,
+ uint64_t start_gpa, uint64_t end_gpa)
+{
+ struct vcpu_info *info;
+ uint64_t gpa, nr_bytes;
+ pthread_t *threads;
+ int i;
+
+ threads = malloc(nr_vcpus * sizeof(*threads));
+ TEST_ASSERT(threads, "Failed to allocate vCPU threads");
+
+ info = malloc(nr_vcpus * sizeof(*info));
+ TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges");
+
+ nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) &
+ ~((uint64_t)vm->page_size - 1);
+ TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus);
+
+ for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) {
+ info[i].vcpu = vcpus[i];
+ info[i].start_gpa = gpa;
+ info[i].end_gpa = gpa + nr_bytes;
+ pthread_create(&threads[i], NULL, vcpu_worker, &info[i]);
+ }
+ return threads;
+}
+
+static void rendezvous_with_vcpus(struct timespec *time, const char *name)
+{
+ int i, rendezvoused;
+
+ pr_info("Waiting for vCPUs to finish %s...\n", name);
+
+ rendezvoused = atomic_read(&rendezvous);
+ for (i = 0; abs(rendezvoused) != 1; i++) {
+ usleep(100);
+ if (!(i & 0x3f))
+ pr_info("\r%d vCPUs haven't rendezvoused...",
+ abs(rendezvoused) - 1);
+ rendezvoused = atomic_read(&rendezvous);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, time);
+
+ /* Release the vCPUs after getting the time of the previous action. */
+ pr_info("\rAll vCPUs finished %s, releasing...\n", name);
+ if (rendezvoused > 0)
+ atomic_set(&rendezvous, -nr_vcpus - 1);
+ else
+ atomic_set(&rendezvous, nr_vcpus + 1);
+}
+
+static void calc_default_nr_vcpus(void)
+{
+ cpu_set_t possible_mask;
+ int r;
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+
+ nr_vcpus = CPU_COUNT(&possible_mask);
+ TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables. Because selftests creates
+ * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot
+ * just below the 4gb boundary. This test could create memory at
+ * 1gb-3gb,but it's simpler to skip straight to 4gb.
+ */
+ const uint64_t start_gpa = SZ_4G;
+ const int first_slot = 1;
+
+ struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw;
+ uint64_t max_gpa, gpa, slot_size, max_mem, i;
+ int max_slots, slot, opt, fd;
+ bool hugepages = false;
+ struct kvm_vcpu **vcpus;
+ pthread_t *threads;
+ struct kvm_vm *vm;
+ void *mem;
+
+ /*
+ * Default to 2gb so that maxing out systems with MAXPHADDR=46, which
+ * are quite common for x86, requires changing only max_mem (KVM allows
+ * 32k memslots, 32k * 2gb == ~64tb of guest memory).
+ */
+ slot_size = SZ_2G;
+
+ max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_slots > first_slot, "KVM is broken");
+
+ /* All KVM MMUs should be able to survive a 128gb guest. */
+ max_mem = 128ull * SZ_1G;
+
+ calc_default_nr_vcpus();
+
+ while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) {
+ switch (opt) {
+ case 'c':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ break;
+ case 'm':
+ max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G;
+ break;
+ case 's':
+ slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G;
+ break;
+ case 'H':
+ hugepages = true;
+ break;
+ case 'h':
+ default:
+ printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]);
+ exit(1);
+ }
+ }
+
+ vcpus = malloc(nr_vcpus * sizeof(*vcpus));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus,
+#ifdef __x86_64__
+ max_mem / SZ_1G,
+#else
+ max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size,
+#endif
+ guest_code, vcpus);
+
+ max_gpa = vm->max_gfn << vm->page_shift;
+ TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
+
+ fd = kvm_memfd_alloc(slot_size, hugepages);
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
+
+ /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */
+ for (i = 0; i < slot_size; i += vm->page_size)
+ ((uint8_t *)mem)[i] = 0xaa;
+
+ gpa = 0;
+ for (slot = first_slot; slot < max_slots; slot++) {
+ gpa = start_gpa + ((slot - first_slot) * slot_size);
+ if (gpa + slot_size > max_gpa)
+ break;
+
+ if ((gpa - start_gpa) >= max_mem)
+ break;
+
+ vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem);
+
+#ifdef __x86_64__
+ /* Identity map memory in the guest using 1gb pages. */
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
+#else
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
+#endif
+ }
+
+ atomic_set(&rendezvous, nr_vcpus + 1);
+ threads = spawn_workers(vm, vcpus, start_gpa, gpa);
+
+ free(vcpus);
+ vcpus = NULL;
+
+ pr_info("Running with %lugb of guest memory and %u vCPUs\n",
+ (gpa - start_gpa) / SZ_1G, nr_vcpus);
+
+ rendezvous_with_vcpus(&time_start, "spawning");
+ rendezvous_with_vcpus(&time_run1, "run 1");
+ rendezvous_with_vcpus(&time_reset, "reset");
+ rendezvous_with_vcpus(&time_run2, "run 2");
+
+ mprotect(mem, slot_size, PROT_READ);
+ mprotect_ro_done = true;
+ sync_global_to_guest(vm, mprotect_ro_done);
+
+ rendezvous_with_vcpus(&time_ro, "mprotect RO");
+ mprotect(mem, slot_size, PROT_READ | PROT_WRITE);
+ rendezvous_with_vcpus(&time_rw, "mprotect RW");
+
+ time_rw = timespec_sub(time_rw, time_ro);
+ time_ro = timespec_sub(time_ro, time_run2);
+ time_run2 = timespec_sub(time_run2, time_reset);
+ time_reset = timespec_sub(time_reset, time_run1);
+ time_run1 = timespec_sub(time_run1, time_start);
+
+ pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
+ "ro = %ld.%.9lds, rw = %ld.%.9lds\n",
+ time_run1.tv_sec, time_run1.tv_nsec,
+ time_reset.tv_sec, time_reset.tv_nsec,
+ time_run2.tv_sec, time_run2.tv_nsec,
+ time_ro.tv_sec, time_ro.tv_nsec,
+ time_rw.tv_sec, time_rw.tv_nsec);
+
+ /*
+ * Delete even numbered slots (arbitrary) and unmap the first half of
+ * the backing (also arbitrary) to verify KVM correctly drops all
+ * references to the removed regions.
+ */
+ for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
+ vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
+
+ kvm_munmap(mem, slot_size / 2);
+
+ /* Sanity check that the vCPUs actually ran. */
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ /*
+ * Deliberately exit without deleting the remaining memslots or closing
+ * kvm_fd to test cleanup via mmu_notifier.release.
+ */
+}
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
new file mode 100644
index 000000000000..93e603d91311
--- /dev/null
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Intel, Inc
+ *
+ * Author:
+ * Isaku Yamahata <isaku.yamahata at gmail.com>
+ */
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <pthread.h>
+
+/* Arbitrarily chosen values */
+#define TEST_SIZE (SZ_2M + PAGE_SIZE)
+#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
+#define TEST_SLOT 10
+
+static void guest_code(uint64_t base_gva)
+{
+ volatile uint64_t val __used;
+ int i;
+
+ for (i = 0; i < TEST_NPAGES; i++) {
+ uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
+
+ val = *src;
+ }
+
+ GUEST_DONE();
+}
+
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ u64 gpa;
+ uint32_t flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
+{
+ struct kvm_pre_fault_memory range = {
+ .gpa = base_gpa + offset,
+ .size = size,
+ .flags = 0,
+ };
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
+ int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
+
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
+
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
+ prev = range.size;
+ ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
+ save_errno = errno;
+ TEST_ASSERT((range.size < prev) ^ (ret < 0),
+ "%sexpecting range.size to change on %s",
+ ret < 0 ? "not " : "",
+ ret < 0 ? "failure" : "success");
+
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
+
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
+}
+
+static void __test_pre_fault_memory(unsigned long vm_type, bool private)
+{
+ uint64_t gpa, gva, alignment, guest_page_size;
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = vm_type,
+ };
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
+
+ alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
+#ifdef __s390x__
+ alignment = max(0x100000UL, guest_page_size);
+#else
+ alignment = SZ_2M;
+#endif
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
+
+ if (private)
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
+
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
+
+ vcpu_args_set(vcpu, 1, gva);
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ break;
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void test_pre_fault_memory(unsigned long vm_type, bool private)
+{
+ if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) {
+ pr_info("Skipping tests for vm_type 0x%lx\n", vm_type);
+ return;
+ }
+
+ __test_pre_fault_memory(vm_type, private);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY));
+
+ test_pre_fault_memory(0, false);
+#ifdef __x86_64__
+ test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false);
+ test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true);
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
new file mode 100644
index 000000000000..f962fefc48fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
+ *
+ * The test validates the sstc timer IRQs using vstimecmp registers.
+ * It's ported from the aarch64 arch_timer test.
+ *
+ * Copyright (c) 2024, Intel Corporation.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static int timer_irq = IRQ_S_TIMER;
+
+static void guest_irq_handler(struct pt_regs *regs)
+{
+ uint64_t xcnt, xcnt_diff_us, cmp;
+ unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ timer_irq_disable();
+
+ xcnt = timer_get_cycles();
+ cmp = timer_get_cmp();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, timer_irq);
+
+ __GUEST_ASSERT(xcnt >= cmp,
+ "xcnt = 0x%"PRIx64", cmp = 0x%"PRIx64", xcnt_diff_us = 0x%" PRIx64,
+ xcnt, cmp, xcnt_diff_us);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_run(struct test_vcpu_shared_data *shared_data)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cycles();
+ timer_irq_enable();
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ test_args.timer_err_margin_us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ timer_irq_disable();
+ local_irq_enable();
+
+ guest_run(shared_data);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
+ "SSTC not available, skipping test\n");
+
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ for (int i = 0; i < nr_vcpus; i++)
+ vcpu_init_vector_tables(vcpus[i]);
+
+ /* Initialize guest timer frequency. */
+ timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency));
+ sync_global_to_guest(vm, timer_freq);
+ pr_debug("timer_freq: %lu\n", timer_freq);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
new file mode 100644
index 000000000000..739d17befb5a
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V KVM ebreak test.
+ *
+ * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ */
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define LABEL_ADDRESS(v) ((uint64_t)&(v))
+
+extern unsigned char sw_bp_1, sw_bp_2;
+static uint64_t sw_bp_addr;
+
+static void guest_code(void)
+{
+ asm volatile(
+ ".option push\n"
+ ".option norvc\n"
+ "sw_bp_1: ebreak\n"
+ "sw_bp_2: ebreak\n"
+ ".option pop\n"
+ );
+ GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
+
+ GUEST_DONE();
+}
+
+static void guest_breakpoint_handler(struct pt_regs *regs)
+{
+ WRITE_ONCE(sw_bp_addr, regs->epc);
+ regs->epc += 4;
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t pc;
+ struct kvm_guest_debug debug = {
+ .control = KVM_GUESTDBG_ENABLE,
+ };
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_vector_tables(vm);
+ vcpu_init_vector_tables(vcpu);
+ vm_install_exception_handler(vm, EXC_BREAKPOINT,
+ guest_breakpoint_handler);
+
+ /*
+ * Enable the guest debug.
+ * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
+ */
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+
+ pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
+ TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
+
+ /* skip sw_bp_1 */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
+
+ /*
+ * Disable all debug controls.
+ * Guest should handle the ebreak without exiting to the VMM.
+ */
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
new file mode 100644
index 000000000000..cb54a56990a0
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -0,0 +1,1302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
+
+enum {
+ VCPU_FEATURE_ISA_EXT = 0,
+ VCPU_FEATURE_SBI_EXT,
+};
+
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
+static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
+
+bool filter_reg(__u64 reg)
+{
+ switch (reg & ~REG_MASK) {
+ /*
+ * Same set of ISA_EXT registers are not present on all host because
+ * ISA_EXT registers are visible to the KVM user space based on the
+ * ISA extensions available on the host. Also, disabling an ISA
+ * extension using corresponding ISA_EXT register does not affect
+ * the visibility of the ISA_EXT register itself.
+ *
+ * Based on above, we should filter-out all ISA_EXT registers.
+ *
+ * Note: The below list is alphabetically sorted.
+ */
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_A:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_C:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_H:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMNPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSNPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT:
+ /*
+ * Like ISA_EXT registers, SBI_EXT registers are only visible when the
+ * host supports them and disabling them does not affect the visibility
+ * of the SBI_EXT register itself.
+ */
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
+ return true;
+ /* AIA registers are always available when Ssaia can't be disabled */
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+ return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA];
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool check_reject_set(int err)
+{
+ return err == EINVAL;
+}
+
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ uint64_t feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
+ struct vcpu_reg_sublist *s;
+ uint64_t feature;
+ int rc;
+
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
+ __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]);
+
+ /*
+ * Disable all extensions which were enabled by default
+ * if they were available in the risc-v host.
+ */
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
+ if (rc && isa_ext_state[i])
+ isa_ext_cant_disable[i] = true;
+ }
+
+ for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) {
+ rc = __vcpu_set_reg(vcpu, RISCV_SBI_EXT_REG(i), 0);
+ TEST_ASSERT(!rc || (rc == -1 && errno == ENOENT), "Unexpected error");
+ }
+
+ for_each_sublist(c, s) {
+ if (!s->feature)
+ continue;
+
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
+ switch (s->feature_type) {
+ case VCPU_FEATURE_ISA_EXT:
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ break;
+ case VCPU_FEATURE_SBI_EXT:
+ feature = RISCV_SBI_EXT_REG(s->feature);
+ break;
+ default:
+ TEST_FAIL("Unknown feature type");
+ }
+
+ /* Try to enable the desired extension */
+ __vcpu_set_reg(vcpu, feature, 1);
+
+skip:
+ /* Double check whether the desired extension was enabled */
+ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
+ "%s not available, skipping tests", s->name);
+ }
+}
+
+static const char *config_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_config */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ return "KVM_REG_RISCV_CONFIG_REG(isa)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+ return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
+ case KVM_REG_RISCV_CONFIG_REG(marchid):
+ return "KVM_REG_RISCV_CONFIG_REG(marchid)";
+ case KVM_REG_RISCV_CONFIG_REG(mimpid):
+ return "KVM_REG_RISCV_CONFIG_REG(mimpid)";
+ case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+ return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_core */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CORE_REG(regs.pc):
+ return "KVM_REG_RISCV_CORE_REG(regs.pc)";
+ case KVM_REG_RISCV_CORE_REG(regs.ra):
+ return "KVM_REG_RISCV_CORE_REG(regs.ra)";
+ case KVM_REG_RISCV_CORE_REG(regs.sp):
+ return "KVM_REG_RISCV_CORE_REG(regs.sp)";
+ case KVM_REG_RISCV_CORE_REG(regs.gp):
+ return "KVM_REG_RISCV_CORE_REG(regs.gp)";
+ case KVM_REG_RISCV_CORE_REG(regs.tp):
+ return "KVM_REG_RISCV_CORE_REG(regs.tp)";
+ case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t0));
+ case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s0));
+ case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.a0));
+ case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2);
+ case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3);
+ case KVM_REG_RISCV_CORE_REG(mode):
+ return "KVM_REG_RISCV_CORE_REG(mode)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+#define RISCV_CSR_GENERAL(csr) \
+ "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_AIA(csr) \
+ "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_SMSTATEEN(csr) \
+ "KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")"
+
+static const char *general_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_REG(sstatus):
+ return RISCV_CSR_GENERAL(sstatus);
+ case KVM_REG_RISCV_CSR_REG(sie):
+ return RISCV_CSR_GENERAL(sie);
+ case KVM_REG_RISCV_CSR_REG(stvec):
+ return RISCV_CSR_GENERAL(stvec);
+ case KVM_REG_RISCV_CSR_REG(sscratch):
+ return RISCV_CSR_GENERAL(sscratch);
+ case KVM_REG_RISCV_CSR_REG(sepc):
+ return RISCV_CSR_GENERAL(sepc);
+ case KVM_REG_RISCV_CSR_REG(scause):
+ return RISCV_CSR_GENERAL(scause);
+ case KVM_REG_RISCV_CSR_REG(stval):
+ return RISCV_CSR_GENERAL(stval);
+ case KVM_REG_RISCV_CSR_REG(sip):
+ return RISCV_CSR_GENERAL(sip);
+ case KVM_REG_RISCV_CSR_REG(satp):
+ return RISCV_CSR_GENERAL(satp);
+ case KVM_REG_RISCV_CSR_REG(scounteren):
+ return RISCV_CSR_GENERAL(scounteren);
+ case KVM_REG_RISCV_CSR_REG(senvcfg):
+ return RISCV_CSR_GENERAL(senvcfg);
+ }
+
+ return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *aia_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_aia_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_AIA_REG(siselect):
+ return RISCV_CSR_AIA(siselect);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+ return RISCV_CSR_AIA(iprio1);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+ return RISCV_CSR_AIA(iprio2);
+ case KVM_REG_RISCV_CSR_AIA_REG(sieh):
+ return RISCV_CSR_AIA(sieh);
+ case KVM_REG_RISCV_CSR_AIA_REG(siph):
+ return RISCV_CSR_AIA(siph);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+ return RISCV_CSR_AIA(iprio1h);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+ return RISCV_CSR_AIA(iprio2h);
+ }
+
+ return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *smstateen_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_smstateen_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0):
+ return RISCV_CSR_SMSTATEEN(sstateen0);
+ }
+
+ TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off);
+ return NULL;
+}
+
+static const char *csr_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_CSR_GENERAL:
+ return general_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_AIA:
+ return aia_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_SMSTATEEN:
+ return smstateen_csr_id_to_str(reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *timer_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_timer */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ return "KVM_REG_RISCV_TIMER_REG(frequency)";
+ case KVM_REG_RISCV_TIMER_REG(time):
+ return "KVM_REG_RISCV_TIMER_REG(time)";
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ return "KVM_REG_RISCV_TIMER_REG(compare)";
+ case KVM_REG_RISCV_TIMER_REG(state):
+ return "KVM_REG_RISCV_TIMER_REG(state)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *fp_f_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_f_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_F_REG(f[0]) ...
+ KVM_REG_RISCV_FP_F_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_F_REG(fcsr):
+ return "KVM_REG_RISCV_FP_F_REG(fcsr)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *fp_d_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_d_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_D_REG(f[0]) ...
+ KVM_REG_RISCV_FP_D_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_D_REG(fcsr):
+ return "KVM_REG_RISCV_FP_D_REG(fcsr)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+#define KVM_ISA_EXT_ARR(ext) \
+[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
+
+static const char *isa_ext_single_id_to_str(__u64 reg_off)
+{
+ static const char * const kvm_isa_ext_reg_name[] = {
+ KVM_ISA_EXT_ARR(A),
+ KVM_ISA_EXT_ARR(C),
+ KVM_ISA_EXT_ARR(D),
+ KVM_ISA_EXT_ARR(F),
+ KVM_ISA_EXT_ARR(H),
+ KVM_ISA_EXT_ARR(I),
+ KVM_ISA_EXT_ARR(M),
+ KVM_ISA_EXT_ARR(V),
+ KVM_ISA_EXT_ARR(SMNPM),
+ KVM_ISA_EXT_ARR(SMSTATEEN),
+ KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
+ KVM_ISA_EXT_ARR(SSNPM),
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVADE),
+ KVM_ISA_EXT_ARR(SVADU),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
+ KVM_ISA_EXT_ARR(ZABHA),
+ KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALRSC),
+ KVM_ISA_EXT_ARR(ZAWRS),
+ KVM_ISA_EXT_ARR(ZBA),
+ KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBC),
+ KVM_ISA_EXT_ARR(ZBKB),
+ KVM_ISA_EXT_ARR(ZBKC),
+ KVM_ISA_EXT_ARR(ZBKX),
+ KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZCA),
+ KVM_ISA_EXT_ARR(ZCB),
+ KVM_ISA_EXT_ARR(ZCD),
+ KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCMOP),
+ KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFBFMIN),
+ KVM_ISA_EXT_ARR(ZFH),
+ KVM_ISA_EXT_ARR(ZFHMIN),
+ KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOP),
+ KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICCRSE),
+ KVM_ISA_EXT_ARR(ZICNTR),
+ KVM_ISA_EXT_ARR(ZICOND),
+ KVM_ISA_EXT_ARR(ZICSR),
+ KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTNTL),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZIMOP),
+ KVM_ISA_EXT_ARR(ZKND),
+ KVM_ISA_EXT_ARR(ZKNE),
+ KVM_ISA_EXT_ARR(ZKNH),
+ KVM_ISA_EXT_ARR(ZKR),
+ KVM_ISA_EXT_ARR(ZKSED),
+ KVM_ISA_EXT_ARR(ZKSH),
+ KVM_ISA_EXT_ARR(ZKT),
+ KVM_ISA_EXT_ARR(ZTSO),
+ KVM_ISA_EXT_ARR(ZVBB),
+ KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFBFMIN),
+ KVM_ISA_EXT_ARR(ZVFBFWMA),
+ KVM_ISA_EXT_ARR(ZVFH),
+ KVM_ISA_EXT_ARR(ZVFHMIN),
+ KVM_ISA_EXT_ARR(ZVKB),
+ KVM_ISA_EXT_ARR(ZVKG),
+ KVM_ISA_EXT_ARR(ZVKNED),
+ KVM_ISA_EXT_ARR(ZVKNHA),
+ KVM_ISA_EXT_ARR(ZVKNHB),
+ KVM_ISA_EXT_ARR(ZVKSED),
+ KVM_ISA_EXT_ARR(ZVKSH),
+ KVM_ISA_EXT_ARR(ZVKT),
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
+ return strdup_printf("KVM_REG_RISCV_ISA_SINGLE | %lld /* UNKNOWN */", reg_off);
+
+ return kvm_isa_ext_reg_name[reg_off];
+}
+
+static const char *isa_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+ const char *unknown = "";
+
+ if (reg_off > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+ unknown = " /* UNKNOWN */";
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_MULTI_EN:
+ return strdup_printf("KVM_REG_RISCV_ISA_MULTI_EN | %lld%s", reg_off, unknown);
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ return strdup_printf("KVM_REG_RISCV_ISA_MULTI_DIS | %lld%s", reg_off, unknown);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *isa_ext_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_SINGLE:
+ return isa_ext_single_id_to_str(reg_off);
+ case KVM_REG_RISCV_ISA_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ return isa_ext_multi_id_to_str(reg_subtype, reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+#define KVM_SBI_EXT_ARR(ext) \
+[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext
+
+static const char *sbi_ext_single_id_to_str(__u64 reg_off)
+{
+ /* reg_off is KVM_RISCV_SBI_EXT_ID */
+ static const char * const kvm_sbi_ext_reg_name[] = {
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
+ return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
+
+ return kvm_sbi_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+ const char *unknown = "";
+
+ if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
+ unknown = " /* UNKNOWN */";
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown);
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_SINGLE:
+ return sbi_ext_single_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *sbi_sta_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo)";
+ case 1: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *sbi_fwft_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)";
+ case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)";
+ case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)";
+ case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)";
+ case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)";
+ case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *sbi_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_STATE);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ return sbi_sta_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_FWFT:
+ return sbi_fwft_id_to_str(reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV,
+ "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
+ default:
+ printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
+ (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
+ return;
+ }
+
+ switch (id & KVM_REG_RISCV_TYPE_MASK) {
+ case KVM_REG_RISCV_CONFIG:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
+ reg_size, config_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_CORE:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
+ reg_size, core_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_CSR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n",
+ reg_size, csr_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_TIMER:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n",
+ reg_size, timer_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_F:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n",
+ reg_size, fp_f_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_D:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
+ reg_size, fp_d_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_ISA_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
+ reg_size, isa_ext_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_SBI_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
+ reg_size, sbi_ext_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_SBI_STATE:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_STATE | %s,\n",
+ reg_size, sbi_id_to_str(prefix, id));
+ break;
+ default:
+ printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n",
+ reg_size, id & ~REG_MASK);
+ return;
+ }
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v6.5-rc3 and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+/*
+ * The skips_set list registers that should skip set test.
+ * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly.
+ */
+static __u64 base_skips_set[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+static __u64 sbi_base_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+};
+
+static __u64 sbi_sta_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
+};
+
+static __u64 sbi_fwft_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value),
+};
+
+static __u64 zicbom_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM,
+};
+
+static __u64 zicbop_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP,
+};
+
+static __u64 zicboz_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ,
+};
+
+static __u64 aia_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA,
+};
+
+static __u64 smstateen_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN,
+};
+
+static __u64 fp_f_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F,
+};
+
+static __u64 fp_d_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
+};
+
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
+#define SUBLIST_BASE \
+ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
+ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
+#define SUBLIST_SBI_BASE \
+ {"sbi-base", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_V01, \
+ .regs = sbi_base_regs, .regs_n = ARRAY_SIZE(sbi_base_regs),}
+#define SUBLIST_SBI_STA \
+ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
+ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
+#define SUBLIST_SBI_FWFT \
+ {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \
+ .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),}
+#define SUBLIST_ZICBOM \
+ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define SUBLIST_ZICBOP \
+ {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),}
+#define SUBLIST_ZICBOZ \
+ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
+#define SUBLIST_AIA \
+ {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define SUBLIST_SMSTATEEN \
+ {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),}
+#define SUBLIST_FP_F \
+ {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
+ .regs_n = ARRAY_SIZE(fp_f_regs),}
+#define SUBLIST_FP_D \
+ {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
+ .regs_n = ARRAY_SIZE(fp_d_regs),}
+
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
+#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
+static __u64 regs_##ext[] = { \
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
+ KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | \
+ KVM_RISCV_ISA_EXT_##extu, \
+}; \
+static struct vcpu_reg_list config_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ { \
+ .name = #ext, \
+ .feature = KVM_RISCV_ISA_EXT_##extu, \
+ .regs = regs_##ext, \
+ .regs_n = ARRAY_SIZE(regs_##ext), \
+ }, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_SBI_EXT_SIMPLE_CONFIG(ext, extu) \
+static __u64 regs_sbi_##ext[] = { \
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
+ KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | \
+ KVM_RISCV_SBI_EXT_##extu, \
+}; \
+static struct vcpu_reg_list config_sbi_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ { \
+ .name = "sbi-"#ext, \
+ .feature_type = VCPU_FEATURE_SBI_EXT, \
+ .feature = KVM_RISCV_SBI_EXT_##extu, \
+ .regs = regs_sbi_##ext, \
+ .regs_n = ARRAY_SIZE(regs_sbi_##ext), \
+ }, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_ISA_EXT_SUBLIST_CONFIG(ext, extu) \
+static struct vcpu_reg_list config_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ SUBLIST_##extu, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_SBI_EXT_SUBLIST_CONFIG(ext, extu) \
+static struct vcpu_reg_list config_sbi_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ SUBLIST_SBI_##extu, \
+ {0}, \
+ }, \
+} \
+
+/* Note: The below list is alphabetically sorted. */
+
+KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE);
+KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
+KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
+KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
+KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
+KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
+
+KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
+KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
+KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
+KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
+KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
+KVM_ISA_EXT_SIMPLE_CONFIG(ssnpm, SSNPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE);
+KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU);
+KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
+KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
+KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
+KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
+KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
+KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT);
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &config_sbi_base,
+ &config_sbi_sta,
+ &config_sbi_pmu,
+ &config_sbi_dbcn,
+ &config_sbi_susp,
+ &config_sbi_mpxy,
+ &config_sbi_fwft,
+ &config_aia,
+ &config_fp_f,
+ &config_fp_d,
+ &config_h,
+ &config_v,
+ &config_smnpm,
+ &config_smstateen,
+ &config_sscofpmf,
+ &config_ssnpm,
+ &config_sstc,
+ &config_svade,
+ &config_svadu,
+ &config_svinval,
+ &config_svnapot,
+ &config_svpbmt,
+ &config_svvptc,
+ &config_zaamo,
+ &config_zabha,
+ &config_zacas,
+ &config_zalrsc,
+ &config_zawrs,
+ &config_zba,
+ &config_zbb,
+ &config_zbc,
+ &config_zbkb,
+ &config_zbkc,
+ &config_zbkx,
+ &config_zbs,
+ &config_zca,
+ &config_zcb,
+ &config_zcd,
+ &config_zcf,
+ &config_zcmop,
+ &config_zfa,
+ &config_zfbfmin,
+ &config_zfh,
+ &config_zfhmin,
+ &config_zicbom,
+ &config_zicbop,
+ &config_zicboz,
+ &config_ziccrse,
+ &config_zicntr,
+ &config_zicond,
+ &config_zicsr,
+ &config_zifencei,
+ &config_zihintntl,
+ &config_zihintpause,
+ &config_zihpm,
+ &config_zimop,
+ &config_zknd,
+ &config_zkne,
+ &config_zknh,
+ &config_zkr,
+ &config_zksed,
+ &config_zksh,
+ &config_zkt,
+ &config_ztso,
+ &config_zvbb,
+ &config_zvbc,
+ &config_zvfbfmin,
+ &config_zvfbfwma,
+ &config_zvfh,
+ &config_zvfhmin,
+ &config_zvkb,
+ &config_zvkg,
+ &config_zvkned,
+ &config_zvknha,
+ &config_zvknhb,
+ &config_zvksed,
+ &config_zvksh,
+ &config_zvkt,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
new file mode 100644
index 000000000000..924a335d2262
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -0,0 +1,731 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
+ *
+ * Copyright (c) 2024, Rivos Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+#include "sbi.h"
+#include "arch_timer.h"
+#include "ucall_common.h"
+
+/* Maximum counters(firmware + hardware) */
+#define RISCV_MAX_PMU_COUNTERS 64
+union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
+
+/* Snapshot shared memory data */
+#define PMU_SNAPSHOT_GPA_BASE BIT(30)
+static void *snapshot_gva;
+static vm_paddr_t snapshot_gpa;
+
+static int vcpu_shared_irq_count;
+static int counter_in_use;
+
+/* Cache the available counters in a bitmask */
+static unsigned long counter_mask_available;
+
+static bool illegal_handler_invoked;
+
+#define SBI_PMU_TEST_BASIC BIT(0)
+#define SBI_PMU_TEST_EVENTS BIT(1)
+#define SBI_PMU_TEST_SNAPSHOT BIT(2)
+#define SBI_PMU_TEST_OVERFLOW BIT(3)
+
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
+
+unsigned long pmu_csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ default :
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static inline void dummy_func_loop(uint64_t iter)
+{
+ int i = 0;
+
+ while (i < iter) {
+ asm volatile("nop");
+ i++;
+ }
+}
+
+static void start_counter(unsigned long counter, unsigned long start_flags,
+ unsigned long ival)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
+ ival, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
+}
+
+/* This should be invoked only for reset counter use case */
+static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
+ stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld\n", counter);
+}
+
+static void stop_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
+ 0, 0, 0);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
+}
+
+static void guest_illegal_exception_handler(struct pt_regs *regs)
+{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
+ __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
+ "Unexpected exception handler %lx\n", regs->cause);
+
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
+ illegal_handler_invoked = true;
+ /* skip the trapping instruction */
+ regs->epc += 4;
+}
+
+static void guest_irq_handler(struct pt_regs *regs)
+{
+ unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ unsigned long overflown_mask;
+
+ /* Validate that we are in the correct irq handler */
+ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
+
+ /* Stop all counters first to avoid further interrupts */
+ stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
+
+ overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
+ GUEST_ASSERT(overflown_mask & 0x01);
+
+ WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
+}
+
+static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
+ unsigned long cflags,
+ unsigned long event)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
+ cflags, event, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
+ GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
+ GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
+
+ return ret.value;
+}
+
+static unsigned long get_num_counters(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
+ __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
+ "Invalid number of counters %ld\n", ret.value);
+
+ return ret.value;
+}
+
+static void update_counter_info(int num_counters)
+{
+ int i = 0;
+ struct sbiret ret;
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo_arr[i].value = ret.value;
+ counter_mask_available |= BIT(i);
+ }
+
+ GUEST_ASSERT(counter_mask_available > 0);
+}
+
+static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == 0);
+ return ret.value;
+}
+
+static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ unsigned long counter_val = 0;
+
+ __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
+
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
+ counter_val = pmu_csr_read_num(ctrinfo.csr);
+ else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
+ counter_val = read_fw_counter(idx, ctrinfo);
+
+ return counter_val;
+}
+
+static inline void verify_sbi_requirement_assert(void)
+{
+ long out_val = 0;
+ bool probe;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
+ __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
+}
+
+static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
+#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void test_pmu_event(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
+ stop_counter(counter, 0);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ stop_counter(counter, 0);
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_snapshot(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ /* The counter value is updated w.r.t relative index of cbase */
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
+
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_overflow(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_post;
+ unsigned long counter_init_value = ULONG_MAX - 10000;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_in_use = counter;
+
+ /* The counter value is updated w.r.t relative index of cbase passed to start/stop */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ udelay(msecs_to_usecs(2000));
+ /* irq handler should have stopped the counter */
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* The counter value after stopping should be less the init value due to overflow */
+ __GUEST_ASSERT(counter_value_post < counter_init_value,
+ "counter_value_post %lx counter_init_value %lx for counter\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_invalid_event(void)
+{
+ struct sbiret ret;
+ unsigned long event = 0x1234; /* A random event */
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
+ counter_mask_available, 0, event, 0, 0);
+ GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
+}
+
+static void test_pmu_events(void)
+{
+ int num_counters = 0;
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Sanity testing for any random invalid event */
+ test_invalid_event();
+
+ /* Only these two events are guaranteed to be present */
+ test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_basic_sanity(void)
+{
+ long out_val = 0;
+ bool probe;
+ struct sbiret ret;
+ int num_counters = 0, i;
+ union sbi_pmu_ctr_info ctrinfo;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ num_counters = get_num_counters();
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
+ 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo.value = ret.value;
+
+ /**
+ * Accessibility check of hardware and read capability of firmware counters.
+ * The spec doesn't mandate any initial value. No need to check any value.
+ */
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
+ pmu_csr_read_num(ctrinfo.csr);
+ GUEST_ASSERT(illegal_handler_invoked);
+ } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
+ read_fw_counter(i, ctrinfo);
+ }
+ }
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_snaphost(void)
+{
+ int num_counters = 0;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ int i;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Validate shared memory access */
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
+ for (i = 0; i < num_counters; i++) {
+ if (counter_mask_available & (BIT(i)))
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
+ }
+ /* Only these two events are guranteed to be present */
+ test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_overflow(void)
+{
+ int num_counters = 0, i = 0;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+ csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
+ local_irq_enable();
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /*
+ * Qemu supports overflow for cycle/instruction.
+ * This test may fail on any platform that do not support overflow for these two events.
+ */
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+ vcpu_shared_irq_count = 0;
+
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+void test_vm_destroy(struct kvm_vm *vm)
+{
+ memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
+ counter_mask_available = 0;
+ kvm_vm_free(vm);
+}
+
+static void test_vm_basic_test(void *guest_code)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ vm_init_vector_tables(vm);
+ /* Illegal instruction handler is required to verify read access without configuration */
+ vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu = NULL;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ /* PMU Snapshot requires single page only */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
+ /* PMU_SNAPSHOT_GPA_BASE is identity mapped */
+ virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
+
+ snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
+ snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gpa);
+}
+
+static void test_vm_events_snapshot_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_overflow(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
+ "Sscofpmf is not available, skipping overflow test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ /* Initialize guest timer frequency. */
+ timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
+ sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
+
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
+ switch (opt) {
+ case 't':
+ if (!strncmp("basic", optarg, 5))
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
+ else if (!strncmp("events", optarg, 6))
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
+ else if (!strncmp("snapshot", optarg, 8))
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
+ else if (!strncmp("overflow", optarg, 8))
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
+ else
+ goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
+ break;
+ case 'h':
+ default:
+ goto done;
+ }
+ }
+
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
+ return true;
+done:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
+ test_vm_basic_test(test_pmu_basic_sanity);
+ pr_info("SBI PMU basic test : PASS\n");
+ }
+
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ test_vm_events_test(test_pmu_events);
+ pr_info("SBI PMU event verification test : PASS\n");
+ }
+
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ test_vm_events_snapshot_test(test_pmu_events_snaphost);
+ pr_info("SBI PMU event verification with snapshot test : PASS\n");
+ }
+
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ test_vm_events_overflow(test_pmu_events_overflow);
+ pr_info("SBI PMU event verification with overflow test : PASS\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..1375fca80bcd
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Include rseq.c without _GNU_SOURCE defined, before including any headers, so
+ * that rseq.c is compiled with its configuration, not KVM selftests' config.
+ */
+#undef _GNU_SOURCE
+#include "../rseq/rseq.c"
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+ for (;;)
+ GUEST_SYNC(0);
+}
+
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
+static void *migration_worker(void *__rseq_tid)
+{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+ CPU_ZERO(&allowed_mask);
+
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+ CPU_SET(cpu, &allowed_mask);
+
+ /*
+ * Bump the sequence count twice to allow the reader to detect
+ * that a migration may have occurred in between rseq and sched
+ * CPU ID reads. An odd sequence count indicates a migration
+ * is in-progress, while a completely different count indicates
+ * a migration occurred since the count was last read.
+ */
+ atomic_inc(&seq_cnt);
+
+ /*
+ * Ensure the odd count is visible while getcpu() isn't
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+ atomic_inc(&seq_cnt);
+
+ CPU_CLR(cpu, &allowed_mask);
+
+ /*
+ * Wait 1-10us before proceeding to the next iteration and more
+ * specifically, before bumping seq_cnt again. A delay is
+ * needed on three fronts:
+ *
+ * 1. To allow sched_setaffinity() to prompt migration before
+ * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+ * (or TIF_NEED_RESCHED, which indirectly leads to handling
+ * NOTIFY_RESUME) is handled in KVM context.
+ *
+ * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+ * the guest, the guest will trigger a IO/MMIO exit all the
+ * way to userspace and the TIF flags will be handled by
+ * the generic "exit to userspace" logic, not by KVM. The
+ * exit to userspace is necessary to give the test a chance
+ * to check the rseq CPU ID (see #2).
+ *
+ * Alternatively, guest_code() could include an instruction
+ * to trigger an exit that is handled by KVM, but any such
+ * exit requires architecture specific code.
+ *
+ * 2. To let ioctl(KVM_RUN) make its way back to the test
+ * before the next round of migration. The test's check on
+ * the rseq CPU ID must wait for migration to complete in
+ * order to avoid false positive, thus any kernel rseq bug
+ * will be missed if the next migration starts before the
+ * check completes.
+ *
+ * 3. To ensure the read-side makes efficient forward progress,
+ * e.g. if getcpu() involves a syscall. Stalling the read-side
+ * means the test will spend more time waiting for getcpu()
+ * to stabilize and less time trying to hit the timing-dependent
+ * bug.
+ *
+ * Because any bug in this area is likely to be timing-dependent,
+ * run with a range of delays at 1us intervals from 1us to 10us
+ * as a best effort to avoid tuning the test to the point where
+ * it can hit _only_ the original bug and not detect future
+ * regressions.
+ *
+ * The original bug can reproduce with a delay up to ~500us on
+ * x86-64, but starts to require more iterations to reproduce
+ * as the delay creeps above ~10us, and the average runtime of
+ * each iteration obviously increases as well. Cap the delay
+ * at 10us to keep test runtime reasonable while minimizing
+ * potential coverage loss.
+ *
+ * The lower bound for reproducing the bug is likely below 1us,
+ * e.g. failures occur on x86-64 with nanosleep(0), but at that
+ * point the overhead of the syscall likely dominates the delay.
+ * Use usleep() for simplicity and to avoid unnecessary kernel
+ * dependencies.
+ */
+ usleep((i % 10) + 1);
+ }
+ done = true;
+ return NULL;
+}
+
+static void calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ __TEST_REQUIRE(cnt >= 2,
+ "Only one usable CPU, task migration not possible");
+}
+
+static void help(const char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
+ printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
+ bool skip_sanity_check = false;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ u32 cpu, rseq_cpu;
+
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
+ switch (opt) {
+ case 'u':
+ skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+ strerror(errno));
+
+ calc_min_max_cpu();
+
+ r = rseq_register_current_thread();
+ TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
+ errno, strerror(errno));
+
+ /*
+ * Create and run a dummy VM that immediately exits to userspace via
+ * GUEST_SYNC, while concurrently migrating the process by setting its
+ * CPU affinity.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)syscall(SYS_gettid));
+
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Guest failed?");
+
+ /*
+ * Verify rseq's CPU matches sched's CPU. Ensure migration
+ * doesn't occur between getcpu() and reading the rseq cpu_id
+ * by rereading both if the sequence count changes, or if the
+ * count is odd (migration in-progress).
+ */
+ do {
+ /*
+ * Drop bit 0 to force a mismatch if the count is odd,
+ * i.e. if a migration is in-progress.
+ */
+ snapshot = atomic_read(&seq_cnt) & ~1;
+
+ /*
+ * Ensure calling getcpu() and reading rseq.cpu_id complete
+ * in a single "no migration" window, i.e. are not reordered
+ * across the seq_cnt reads.
+ */
+ smp_rmb();
+ r = sys_getcpu(&cpu, NULL);
+ TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)",
+ errno, strerror(errno));
+ rseq_cpu = rseq_current_cpu_raw();
+ smp_rmb();
+ } while (snapshot != atomic_read(&seq_cnt));
+
+ TEST_ASSERT(rseq_cpu == cpu,
+ "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
+ }
+
+ if (fd > 0)
+ close(fd);
+
+ /*
+ * Sanity check that the test was able to enter the guest a reasonable
+ * number of times, e.g. didn't get stalled too often/long waiting for
+ * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
+ * conservative ratio on x86-64, which can do _more_ KVM_RUNs than
+ * migrations given the 1us+ delay in the migration task.
+ *
+ * Another reason why it may have small migration:KVM_RUN ratio is that,
+ * on systems with large low power mode wakeup latency, it may happen
+ * quite often that the scheduler is not able to wake up the target CPU
+ * before the vCPU thread is scheduled to another CPU.
+ */
+ TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n\n"
+ " Try disabling deep sleep states to reduce CPU wakeup latency,\n"
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
+
+ pthread_join(migration_thread, NULL);
+
+ kvm_vm_free(vm);
+
+ rseq_unregister_current_thread();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
new file mode 100644
index 000000000000..e39a724fe860
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ * Nico Boehr <nrb@linux.ibm.com>
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN PAGE_SIZE
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+ asm volatile(
+ /* load TEST_DATA_START_GFN into r1 */
+ " llilf 1,%[start_gfn]\n"
+ /* calculate the address from the gfn */
+ " sllg 1,1,12(0)\n"
+ /* set the first page in TEST_DATA memslot to STABLE */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN)
+ : "r1", "r2", "memory", "cc"
+ );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+ asm volatile(
+ /* r1 = TEST_DATA_START_GFN */
+ " xgr 1,1\n"
+ " llilf 1,%[start_gfn]\n"
+ /* r5 = TEST_DATA_PAGE_COUNT */
+ " lghi 5,%[page_count]\n"
+ /* r5 += r1 */
+ "2: agfr 5,1\n"
+ /* r2 = r1 << PAGE_SHIFT */
+ "1: sllg 2,1,12(0)\n"
+ /* essa(r4, r2, SET_STABLE) */
+ " .insn rrf,0xb9ab0000,4,2,1,0\n"
+ /* i++ */
+ " agfi 1,1\n"
+ /* if r1 < r5 goto 1 */
+ " cgrjl 1,5,1b\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN),
+ [page_count] "L"(TEST_DATA_PAGE_COUNT)
+ :
+ /* the counter in our loop over the pages */
+ "r1",
+ /* the calculated page physical address */
+ "r2",
+ /* ESSA output register */
+ "r4",
+ /* last page */
+ "r5",
+ "cc", "memory"
+ );
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+ int i;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+ /* set the array of memslots to zero like __vm_create does */
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_START_GFN << vm->page_shift,
+ TEST_DATA_MEMSLOT,
+ TEST_DATA_PAGE_COUNT,
+ 0
+ );
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+ /*
+ * Our VM has the following memory layout:
+ * +------+---------------------------+
+ * | GFN | Memslot |
+ * +------+---------------------------+
+ * | 0 | |
+ * | ... | MAIN (Code, Stack, ...) |
+ * | 511 | |
+ * +------+---------------------------+
+ * | 4096 | |
+ * | ... | TEST_DATA |
+ * | 4607 | |
+ * +------+---------------------------+
+ */
+ create_main_memslot(vm);
+ create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *slot0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm, 0);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_barebones();
+
+ create_memslots(vm);
+
+ finish_vm_setup(vm);
+
+ return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+ TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+ vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+ vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+ return __kvm_device_attr_set(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_START,
+ NULL
+ );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+ int r = __enable_migration_mode(vm);
+
+ TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+ u64 out;
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_STATUS,
+ &out
+ );
+ TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+ return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+ struct kvm_s390_cmma_log args;
+ int rc;
+
+ errno = 0;
+
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = flags,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+ *errno_out = errno;
+ return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+ int rc, errno_out;
+
+ /* GET_CMMA_BITS without CMMA enabled should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, ENXIO);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ vcpu_run(vcpu);
+
+ /* GET_CMMA_BITS without migration mode and without peeking should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ /* GET_CMMA_BITS without migration mode and with peeking should work */
+ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(errno_out, 0);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* GET_CMMA_BITS with invalid flags */
+ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+ int rc;
+
+ /* enabling migration mode on a VM without memory should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ create_memslots(vm);
+ finish_vm_setup(vm);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /* migration mode when memslots have dirty tracking off should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ /* enable dirty tracking */
+ enable_dirty_tracking(vm);
+
+ /* enabling migration mode should work now */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /* execute another ESSA instruction to see this goes fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * With migration mode on, create a new memslot with dirty tracking off.
+ * This should turn off migration mode.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_TWO_START_GFN << vm->page_shift,
+ TEST_DATA_TWO_MEMSLOT,
+ TEST_DATA_TWO_PAGE_COUNT,
+ 0
+ );
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "creating memslot without dirty tracking turns off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * Turn on dirty tracking on the new memslot.
+ * It should be possible to turn migration mode back on again.
+ */
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /*
+ * Turn off dirty tracking again, this time with just a flag change.
+ * Again, migration mode should turn off.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "disabling dirty tracking should turn off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /*
+ * First iteration - everything should be dirty.
+ * Start at the main memslot...
+ */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+
+ /* ...and then - after a hole - the TEST_DATA memslot should follow */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = MAIN_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ TEST_ASSERT_EQ(args.remaining, 0);
+
+ /* ...and nothing else should be there */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, 0);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+ TEST_ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /* If we start from GFN 0 again, nothing should be dirty. */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ if (args.count || args.remaining || args.start_gfn)
+ TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+ args.start_gfn,
+ args.count,
+ args.remaining
+ );
+}
+
+static void test_get_initial_dirty(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Start from the beginning again and make sure nothing else is dirty */
+ assert_no_pages_cmma_dirty(vm);
+
+ kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+ u64 start_gfn, u64 gfn_count,
+ struct kvm_s390_cmma_log *res_out)
+{
+ *res_out = (struct kvm_s390_cmma_log){
+ .start_gfn = start_gfn,
+ .count = gfn_count,
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+ u64 dirty_gfn_count,
+ const struct kvm_s390_cmma_log *res)
+{
+ TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ TEST_ASSERT_EQ(res->count, dirty_gfn_count);
+ for (size_t i = 0; i < dirty_gfn_count; i++)
+ TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+ size_t gfn_offset;
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_s390_cmma_log log;
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute some essa instructions in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* un-dirty all pages */
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Then, dirty just the TEST_DATA memslot */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+
+ gfn_offset = TEST_DATA_START_GFN;
+ /**
+ * Query CMMA attributes of one page, starting at page 0. Since the
+ * main memslot was not touched by the VM, this should yield the first
+ * page of the TEST_DATA memslot.
+ * The dirty bitmap should now look like this:
+ * 0: not dirty
+ * [0x1, 0x200): dirty
+ */
+ query_cmma_range(vm, 0, 1, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+ * memslot. This should wrap back to the beginning of the TEST_DATA
+ * memslot, page 1.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /* Skip 32 pages */
+ gfn_offset += 0x20;
+
+ /**
+ * After skipping 32 pages, query the next 32 (0x20) pages.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, gfn_offset, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /**
+ * Query 1 page from the beginning of the TEST_DATA memslot. This should
+ * yield page 0x21.
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * [0x22, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+ * This should yield pages [0x23, 0x33).
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * 0x22: dirty
+ * [0x23, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x23;
+ query_cmma_range(vm, gfn_offset, 15, &log);
+ assert_cmma_dirty(gfn_offset, 15, &log);
+
+ /**
+ * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+ * This should yield page [0x22, 0x33)
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x22;
+ query_cmma_range(vm, gfn_offset, 17, &log);
+ assert_cmma_dirty(gfn_offset, 17, &log);
+
+ /**
+ * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+ * This should yield page 0x40 and nothing more, since there are more
+ * than 16 non-dirty pages after page 0x40.
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x40): dirty
+ * [0x40, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x40;
+ query_cmma_range(vm, gfn_offset, 25, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+
+ /**
+ * Query pages [0x33, 0x40).
+ * The dirty bitmap should now look like this:
+ * [0, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x33;
+ query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+ assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+ /**
+ * Query the remaining pages [0x61, 0x200).
+ */
+ gfn_offset = TEST_DATA_START_GFN;
+ query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+ assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "migration mode and dirty tracking", test_migration_mode },
+ { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+ { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty },
+ { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+ kvm_vm_free(vm);
+
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+ TEST_REQUIRE(machine_has_cmma());
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config
new file mode 100644
index 000000000000..23270f2d679f
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/config
@@ -0,0 +1,2 @@
+CONFIG_KVM=y
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
new file mode 100644
index 000000000000..aded795d42be
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
+ * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
+ * query data reported via the CPU Model, allowing us to directly compare it with the data
+ * acquired through executing the queries in the test.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include "facility.h"
+
+#include "kvm_util.h"
+
+#define PLO_FUNCTION_MAX 256
+
+/* Query available CPU subfunctions */
+struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
+
+static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
+ struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
+{
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
+
+ TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ unsigned long function = nr | 0x100;
+ int cc;
+
+ asm volatile(" lgr 0,%[function]\n"
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : [function] "d" (function)
+ : "cc", "0");
+ return cc == 0;
+}
+
+/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
+static void test_plo_asm_block(u8 (*query)[32])
+{
+ for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
+ if (plo_test_bit(i))
+ (*query)[i >> 3] |= 0x80 >> (i & 7);
+ }
+}
+
+/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
+static void test_kmac_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb91e0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
+static void test_kmc_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92f0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
+static void test_km_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92e0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
+static void test_kimd_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93e0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
+static void test_klmd_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93f0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
+static void test_kmctr_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rrf,0xb92d0000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
+static void test_kmf_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92a0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
+static void test_kmo_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92b0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
+static void test_pcc_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb92c0000,0,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
+static void test_prno_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93c0000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
+static void test_kma_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rrf,0xb9290000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
+static void test_kdsa_asm_block(u8 (*query)[16])
+{
+ asm volatile(" la %%r1,%[query]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,0xb93a0000,0,2\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "r0", "r1");
+}
+
+/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
+static void test_sortl_asm_block(u8 (*query)[32])
+{
+ asm volatile(" lghi 0,0\n"
+ " la 1,%[query]\n"
+ " .insn rre,0xb9380000,2,4\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
+
+/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
+static void test_dfltcc_asm_block(u8 (*query)[32])
+{
+ asm volatile(" lghi 0,0\n"
+ " la 1,%[query]\n"
+ " .insn rrf,0xb9390000,2,4,6,0\n"
+ : [query] "=R" (*query)
+ :
+ : "cc", "0", "1");
+}
+
+/*
+ * Testing Perform Function with Concurrent Results (PFCR)
+ * CPU subfunctions's ASM block
+ */
+static void test_pfcr_asm_block(u8 (*query)[16])
+{
+ asm volatile(" lghi 0,0\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]\n"
+ : [query] "=QS" (*query)
+ :
+ : "cc", "0");
+}
+
+typedef void (*testfunc_t)(u8 (*array)[]);
+
+struct testdef {
+ const char *subfunc_name;
+ u8 *subfunc_array;
+ size_t array_size;
+ testfunc_t test;
+ int facility_bit;
+} testlist[] = {
+ /*
+ * PLO was introduced in the very first 64-bit machine generation.
+ * Hence it is assumed PLO is always installed in Z Arch.
+ */
+ { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
+ /* MSA - Facility bit 17 */
+ { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
+ { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
+ { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
+ { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
+ { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
+ /* MSA - Facility bit 77 */
+ { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
+ { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
+ { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
+ { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
+ /* MSA5 - Facility bit 57 */
+ { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
+ /* MSA8 - Facility bit 146 */
+ { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
+ /* MSA9 - Facility bit 155 */
+ { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
+ /* SORTL - Facility bit 150 */
+ { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
+ /* DFLTCC - Facility bit 151 */
+ { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
+ /* Concurrent-function facility - Facility bit 201 */
+ { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int idx;
+
+ ksft_print_header();
+
+ vm = vm_create(1);
+
+ memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
+ get_cpu_machine_subfuntions(vm, &cpu_subfunc);
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (test_facility(testlist[idx].facility_bit)) {
+ u8 *array = malloc(testlist[idx].array_size);
+
+ testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
+
+ TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
+ array, testlist[idx].array_size), 0);
+
+ ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
+ free(array);
+ } else {
+ ksft_test_result_skip("%s feature is not available\n",
+ testlist[idx].subfunc_name);
+ }
+ }
+
+ kvm_vm_free(vm);
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c
new file mode 100644
index 000000000000..ad8095968601
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/debug_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+ "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+ size_t new_psw_off, uint64_t *new_psw)
+{
+ struct kvm_guest_debug debug = {};
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ char *lowcore;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ lowcore = addr_gpa2hva(vm, 0);
+ new_psw[0] = (*vcpu)->run->psw_mask;
+ new_psw[1] = (uint64_t)int_handler;
+ memcpy(lowcore + new_psw_off, new_psw, 16);
+ vcpu_regs_get(*vcpu, &regs);
+ regs.gprs[2] = -1;
+ vcpu_regs_set(*vcpu, &regs);
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(*vcpu, &debug);
+ vcpu_run(*vcpu);
+
+ return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+ ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+ "j .\n");
+
+static void test_step_pgm(void)
+{
+ test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = PGM_SPECIFICATION,
+ };
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+ __LC_PGM_NEW_PSW, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+ vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+ "iske %r2,%r2\n"
+ "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+ test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+ "lctl %c0,%c0,1\n"
+ "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+ test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+ "svc 0\n"
+ "j .\n");
+
+static void test_step_svc(void)
+{
+ test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "single-step pgm", test_step_pgm },
+ { "single-step pgm caused by diag", test_step_pgm_diag },
+ { "single-step pgm caused by iske", test_step_pgm_iske },
+ { "single-step pgm caused by lctl", test_step_pgm_lctl },
+ { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c
new file mode 100644
index 000000000000..4374b4cd2a80
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/memop.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+enum mop_target {
+ LOGICAL,
+ SIDA,
+ ABSOLUTE,
+ INVALID,
+};
+
+enum mop_access_mode {
+ READ,
+ WRITE,
+ CMPXCHG,
+};
+
+struct mop_desc {
+ uintptr_t gaddr;
+ uintptr_t gaddr_v;
+ uint64_t set_flags;
+ unsigned int f_check : 1;
+ unsigned int f_inject : 1;
+ unsigned int f_key : 1;
+ unsigned int _gaddr_v : 1;
+ unsigned int _set_flags : 1;
+ unsigned int _sida_offset : 1;
+ unsigned int _ar : 1;
+ uint32_t size;
+ enum mop_target target;
+ enum mop_access_mode mode;
+ void *buf;
+ uint32_t sida_offset;
+ void *old;
+ uint8_t old_value[16];
+ bool *cmpxchg_success;
+ uint8_t ar;
+ uint8_t key;
+};
+
+const uint8_t NO_KEY = 0xff;
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = (uintptr_t)desc->gaddr,
+ .size = desc->size,
+ .buf = ((uintptr_t)desc->buf),
+ .reserved = "ignored_ignored_ignored_ignored"
+ };
+
+ switch (desc->target) {
+ case LOGICAL:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ break;
+ case SIDA:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+ break;
+ case ABSOLUTE:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+ if (desc->mode == CMPXCHG) {
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
+ ksmo.old_addr = (uint64_t)desc->old;
+ memcpy(desc->old_value, desc->old, desc->size);
+ }
+ break;
+ case INVALID:
+ ksmo.op = -1;
+ }
+ if (desc->f_check)
+ ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+ if (desc->f_inject)
+ ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+ if (desc->_set_flags)
+ ksmo.flags = desc->set_flags;
+ if (desc->f_key && desc->key != NO_KEY) {
+ ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ ksmo.key = desc->key;
+ }
+ if (desc->_ar)
+ ksmo.ar = desc->ar;
+ else
+ ksmo.ar = 0;
+ if (desc->_sida_offset)
+ ksmo.sida_offset = desc->sida_offset;
+
+ return ksmo;
+}
+
+struct test_info {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
+{
+ if (!PRINT_MEMOP)
+ return;
+
+ if (!vcpu)
+ printf("vm memop(");
+ else
+ printf("vcpu memop(");
+ switch (ksmo->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ printf("LOGICAL, READ, ");
+ break;
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ printf("LOGICAL, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ printf("SIDA, READ, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ printf("SIDA, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ printf("ABSOLUTE, READ, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ printf("ABSOLUTE, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ printf("ABSOLUTE, CMPXCHG, ");
+ break;
+ }
+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+ ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
+ ksmo->old_addr);
+ if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+ printf(", CHECK_ONLY");
+ if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+ printf(", INJECT_EXCEPTION");
+ if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+ printf(", SKEY_PROTECTION");
+ puts(")");
+}
+
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ struct kvm_vcpu *vcpu = info.vcpu;
+
+ if (!vcpu)
+ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
+ else
+ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
+}
+
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ int r;
+
+ r = err_memop_ioctl(info, ksmo, desc);
+ if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
+ if (desc->cmpxchg_success) {
+ int diff = memcmp(desc->old_value, desc->old, desc->size);
+ *desc->cmpxchg_success = !diff;
+ }
+ }
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
+}
+
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
+({ \
+ struct test_info __info = (info_p); \
+ struct mop_desc __desc = { \
+ .target = (mop_target_p), \
+ .mode = (access_mode_p), \
+ .buf = (buf_p), \
+ .size = (size_p), \
+ __VA_ARGS__ \
+ }; \
+ struct kvm_s390_mem_op __ksmo; \
+ \
+ if (__desc._gaddr_v) { \
+ if (__desc.target == ABSOLUTE) \
+ __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
+ else \
+ __desc.gaddr = __desc.gaddr_v; \
+ } \
+ __ksmo = ksmo_from_desc(&__desc); \
+ print_memop(__info.vcpu, &__ksmo); \
+ err##memop_ioctl(__info, &__ksmo, &__desc); \
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
+#define CMPXCHG_OLD(o) .old = (o)
+#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
+
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+static uint8_t __aligned(PAGE_SIZE) mem1[65536];
+static uint8_t __aligned(PAGE_SIZE) mem2[65536];
+
+struct test_default {
+ struct kvm_vm *kvm_vm;
+ struct test_info vm;
+ struct test_info vcpu;
+ struct kvm_run *run;
+ int size;
+};
+
+static struct test_default test_default_init(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct test_default t;
+
+ t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+ t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ t.vm = (struct test_info) { t.kvm_vm, NULL };
+ t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+ t.run = vcpu->run;
+ return t;
+}
+
+enum stage {
+ /* Synced state set by host, e.g. DAT */
+ STAGE_INITED,
+ /* Guest did nothing */
+ STAGE_IDLED,
+ /* Guest set storage keys (specifics up to test case) */
+ STAGE_SKEYS_SET,
+ /* Guest copied memory (locations up to test case) */
+ STAGE_COPIED,
+ /* End of guest code reached */
+ STAGE_DONE,
+};
+
+#define HOST_SYNC(info_p, stage) \
+({ \
+ struct test_info __info = (info_p); \
+ struct kvm_vcpu *__vcpu = __info.vcpu; \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) { \
+ REPORT_GUEST_ASSERT(uc); \
+ } \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+}) \
+
+static void prepare_mem12(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(mem1); i++)
+ mem1[i] = rand();
+ memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
+ GADDR_V(mem1), KEY(key));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_cmpxchg(struct test_default *test, uint8_t key)
+{
+ for (int size = 1; size <= 16; size *= 2) {
+ for (int offset = 0; offset < 16; offset += size) {
+ uint8_t __aligned(16) new[16] = {};
+ uint8_t __aligned(16) old[16];
+ bool succ;
+
+ prepare_mem12();
+ default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
+
+ memcpy(&old, mem1, 16);
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(succ, "exchange of values should succeed");
+ memcpy(mem1 + offset, new + offset, size);
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+
+ memcpy(&old, mem1, 16);
+ new[offset]++;
+ old[offset]++;
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(!succ, "exchange of values should not succeed");
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+ ASSERT_MEM_EQ(&old, mem1, 16);
+ }
+ }
+}
+
+static void guest_copy(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+}
+
+static void test_copy(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_access_register(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ prepare_mem12();
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+
+ /*
+ * Primary address space gets used if an access register
+ * contains zero. The host makes use of AR[1] so is a good
+ * candidate to ensure the guest AR (of zero) is used.
+ */
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+ GADDR_V(mem1), AR(1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+ GADDR_V(mem2), AR(1));
+ ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+ uintptr_t _addr, abs, i;
+ int not_mapped = 0;
+
+ _addr = (uintptr_t)addr;
+ for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+ abs = i;
+ asm volatile (
+ "lra %[abs], 0(0,%[abs])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[abs]\n"
+ "1:"
+ : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ GUEST_ASSERT_EQ(not_mapped, 0);
+ }
+}
+
+static void guest_copy_key(void)
+{
+ set_storage_key_range(mem1, sizeof(mem1), 0x90);
+ set_storage_key_range(mem2, sizeof(mem2), 0x90);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key);
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, no key */
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
+
+ /* vm/vcpu, machting key or key 0 */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+ /*
+ * There used to be different code paths for key handling depending on
+ * if the region crossed a page boundary.
+ * There currently are not, but the more tests the merrier.
+ */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
+
+ /* vm/vcpu, mismatching keys on read, but no fetch protection */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key);
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ default_cmpxchg(&t, NO_KEY);
+ default_cmpxchg(&t, 0);
+ default_cmpxchg(&t, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static __uint128_t cut_to_size(int size, __uint128_t val)
+{
+ switch (size) {
+ case 1:
+ return (uint8_t)val;
+ case 2:
+ return (uint16_t)val;
+ case 4:
+ return (uint32_t)val;
+ case 8:
+ return (uint64_t)val;
+ case 16:
+ return val;
+ }
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
+
+static bool popcount_eq(__uint128_t a, __uint128_t b)
+{
+ unsigned int count_a, count_b;
+
+ count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
+ __builtin_popcountl((uint64_t)a);
+ count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
+ __builtin_popcountl((uint64_t)b);
+ return count_a == count_b;
+}
+
+static __uint128_t rotate(int size, __uint128_t val, int amount)
+{
+ unsigned int bits = size * 8;
+
+ amount = (amount + bits) % bits;
+ val = cut_to_size(size, val);
+ if (!amount)
+ return val;
+ return (val << (bits - amount)) | (val >> amount);
+}
+
+const unsigned int max_block = 16;
+
+static void choose_block(bool guest, int i, int *size, int *offset)
+{
+ unsigned int rand;
+
+ rand = i;
+ if (guest) {
+ rand = rand * 19 + 11;
+ *size = 1 << ((rand % 3) + 2);
+ rand = rand * 19 + 11;
+ *offset = (rand % max_block) & ~(*size - 1);
+ } else {
+ rand = rand * 17 + 5;
+ *size = 1 << (rand % 5);
+ rand = rand * 17 + 5;
+ *offset = (rand % max_block) & ~(*size - 1);
+ }
+}
+
+static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
+{
+ unsigned int rand;
+ int amount;
+ bool swap;
+
+ rand = i;
+ rand = rand * 3 + 1;
+ if (guest)
+ rand = rand * 3 + 1;
+ swap = rand % 2 == 0;
+ if (swap) {
+ int i, j;
+ __uint128_t new;
+ uint8_t byte0, byte1;
+
+ rand = rand * 3 + 1;
+ i = rand % size;
+ rand = rand * 3 + 1;
+ j = rand % size;
+ if (i == j)
+ return old;
+ new = rotate(16, old, i * 8);
+ byte0 = new & 0xff;
+ new &= ~0xff;
+ new = rotate(16, new, -i * 8);
+ new = rotate(16, new, j * 8);
+ byte1 = new & 0xff;
+ new = (new & ~0xff) | byte0;
+ new = rotate(16, new, -j * 8);
+ new = rotate(16, new, i * 8);
+ new = new | byte1;
+ new = rotate(16, new, -i * 8);
+ return new;
+ }
+ rand = rand * 3 + 1;
+ amount = rand % (size * 8);
+ return rotate(size, old, amount);
+}
+
+static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+ bool ret;
+
+ switch (size) {
+ case 4: {
+ uint32_t old = *old_addr;
+
+ asm volatile ("cs %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint32_t *)(target))
+ : [new] "d" ((uint32_t)new)
+ : "cc"
+ );
+ ret = old == (uint32_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 8: {
+ uint64_t old = *old_addr;
+
+ asm volatile ("csg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint64_t *)(target))
+ : [new] "d" ((uint64_t)new)
+ : "cc"
+ );
+ ret = old == (uint64_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 16: {
+ __uint128_t old = *old_addr;
+
+ asm volatile ("cdsg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(__uint128_t *)(target))
+ : [new] "d" (new)
+ : "cc"
+ );
+ ret = old == *old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ }
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
+
+const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
+
+static void guest_cmpxchg_key(void)
+{
+ int size, offset;
+ __uint128_t old, new;
+
+ set_storage_key_range(mem1, max_block, 0x10);
+ set_storage_key_range(mem2, max_block, 0x10);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 1;
+ } while (!_cmpxchg(16, mem1, &old, 0));
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(true, i + j, &size, &offset);
+ do {
+ new = permutate_bits(true, i + j, size, old);
+ } while (!_cmpxchg(size, mem2 + offset, &old, new));
+ }
+ }
+
+ GUEST_SYNC(STAGE_DONE);
+}
+
+static void *run_guest(void *data)
+{
+ struct test_info *info = data;
+
+ HOST_SYNC(*info, STAGE_DONE);
+ return NULL;
+}
+
+static char *quad_to_char(__uint128_t *quad, int size)
+{
+ return ((char *)quad) + (sizeof(*quad) - size);
+}
+
+static void test_cmpxchg_key_concurrent(void)
+{
+ struct test_default t = test_default_init(guest_cmpxchg_key);
+ int size, offset;
+ __uint128_t old, new;
+ bool success;
+ pthread_t thread;
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
+ pthread_create(&thread, NULL, run_guest, &t.vcpu);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 0;
+ new = 1;
+ MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
+ sizeof(new), GADDR_V(mem1),
+ CMPXCHG_OLD(&old),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(false, i + j, &size, &offset);
+ do {
+ new = permutate_bits(false, i + j, size, old);
+ MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
+ size, GADDR_V(mem2 + offset),
+ CMPXCHG_OLD(quad_to_char(&old, size)),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ }
+ }
+
+ pthread_join(thread, NULL);
+
+ MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
+ TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
+ "Must retain number of set bits");
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+ /*
+ * For some reason combining the first sync with override enablement
+ * results in an exception when calling HOST_SYNC.
+ */
+ GUEST_SYNC(STAGE_INITED);
+ /* Storage protection override applies to both store and fetch. */
+ set_storage_key_range(mem1, sizeof(mem1), 0x98);
+ set_storage_key_range(mem2, sizeof(mem2), 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys, storage protection override in effect */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_key_fetch_prot(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, matching key, fetch protection in effect */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+#define ERR_PROT_MOP(...) \
+({ \
+ int rv; \
+ \
+ rv = ERR_MOP(__VA_ARGS__); \
+ TEST_ASSERT(rv == 4, "Should result in protection exception"); \
+})
+
+static void guest_error_key(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+ set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void test_errors_key(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, mismatching keys, fetch protection in effect */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+ int i;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ for (i = 1; i <= 16; i *= 2) {
+ __uint128_t old = 0;
+
+ ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
+ CMPXCHG_OLD(&old), KEY(2));
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_termination(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+ uint64_t prefix;
+ uint64_t teid;
+ uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+ uint64_t psw[2];
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys after first page */
+ ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+ /*
+ * The memop injected a program exception and the test needs to check the
+ * Translation-Exception Identification (TEID). It is necessary to run
+ * the guest in order to be able to read the TEID from guest memory.
+ * Set the guest program new PSW, so the guest state is not clobbered.
+ */
+ prefix = t.run->s.regs.prefix;
+ psw[0] = t.run->psw_mask;
+ psw[1] = t.run->psw_addr;
+ MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+ HOST_SYNC(t.vcpu, STAGE_IDLED);
+ MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+ /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+ TEST_ASSERT_EQ(teid & teid_mask, 0);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, mismatching keys, storage protection override not applicable to vm */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+ int i;
+ char *page_0 = 0;
+
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(0, PAGE_SIZE, 0x18);
+ set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+ asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ for (i = 0; i < PAGE_SIZE; i++)
+ page_0[i] = mem1[i];
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override applies */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+ /*
+ * vcpu, mismatching keys on fetch, fetch protection override applies,
+ * wraparound
+ */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+ GADDR_V(guest_last_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /*
+ * vcpu, mismatching keys on fetch,
+ * fetch protection override does not apply because memory range exceeded
+ */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+ GADDR_V(guest_last_page), KEY(2));
+ /* vm, fetch protected override does not apply */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+ GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
+ for (;;)
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
+{
+ int rv;
+
+ /* Bad size: */
+ rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+ /* Zero size: */
+ rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+ "ioctl allows 0 as size");
+
+ /* Bad flags: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+ /* Bad guest address: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
+
+ /* Bad host address: */
+ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == EFAULT,
+ "ioctl does not report bad host memory address");
+
+ /* Bad key: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ int rv;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ _test_errors_common(t.vcpu, LOGICAL, t.size);
+ _test_errors_common(t.vm, ABSOLUTE, t.size);
+
+ /* Bad operation: */
+ rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+ /* virtual addresses are not translated when passing INVALID */
+ rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+ /* Bad access register: */
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
+ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+ t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
+
+ /* Check that the SIDA calls are rejected for non-protected guests */
+ rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_READ in non-protected mode");
+ rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_WRITE in non-protected mode");
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ __uint128_t old;
+ int rv, i, power = 1;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ for (i = 0; i < 32; i++) {
+ if (i == power) {
+ power *= 2;
+ continue;
+ }
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad size for cmpxchg");
+ }
+ for (i = 1; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
+ }
+ for (i = 2; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad alignment for cmpxchg");
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int extension_cap, idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
+ extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+
+ struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool requirements_met;
+ } testlist[] = {
+ {
+ .name = "simple copy",
+ .test = test_copy,
+ .requirements_met = true,
+ },
+ {
+ .name = "generic error checks",
+ .test = test_errors,
+ .requirements_met = true,
+ },
+ {
+ .name = "copy with storage keys",
+ .test = test_copy_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "cmpxchg with storage keys",
+ .test = test_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "concurrently cmpxchg with storage keys",
+ .test = test_cmpxchg_key_concurrent,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "copy with key storage protection override",
+ .test = test_copy_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection",
+ .test = test_copy_key_fetch_prot,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection override",
+ .test = test_copy_key_fetch_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with access register mode",
+ .test = test_copy_access_register,
+ .requirements_met = true,
+ },
+ {
+ .name = "error checks with key",
+ .test = test_errors_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks for cmpxchg with key",
+ .test = test_errors_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "error checks for cmpxchg",
+ .test = test_errors_cmpxchg,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "termination",
+ .test = test_termination,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key storage protection override",
+ .test = test_errors_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks without key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_not_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ };
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (testlist[idx].requirements_met) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
+ testlist[idx].name, extension_cap);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c
index b143db6d8693..b58f75b381e5 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390/resets.c
@@ -12,15 +12,14 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
-#define VCPU_ID 3
#define LOCAL_IRQS 32
-struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-struct kvm_vm *vm;
-struct kvm_run *run;
-struct kvm_sync_regs *sync_regs;
static uint8_t regs_null[512];
static void guest_code_initial(void)
@@ -58,47 +57,45 @@ static void guest_code_initial(void)
);
}
-static void test_one_reg(uint64_t id, uint64_t value)
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
{
- struct kvm_one_reg reg;
uint64_t eval_reg;
- reg.addr = (uintptr_t)&eval_reg;
- reg.id = id;
- vcpu_get_reg(vm, VCPU_ID, &reg);
+ eval_reg = vcpu_get_reg(vcpu, id);
TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
}
-static void assert_noirq(void)
+static void assert_noirq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
int irqs;
irq_state.len = sizeof(buf);
irq_state.buf = (unsigned long)buf;
- irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
/*
* irqs contains the number of retrieved interrupts. Any interrupt
* (notably, the emergency call interrupt we have injected) should
* be cleared by the resets, so this should be 0.
*/
- TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+ TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
TEST_ASSERT(!irqs, "IRQ pending");
}
-static void assert_clear(void)
+static void assert_clear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_regs regs;
struct kvm_fpu fpu;
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
/* sync regs */
@@ -112,8 +109,10 @@ static void assert_clear(void)
"vrs0-15 == 0 (sync_regs)");
}
-static void assert_initial_noclear(void)
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
"gpr0 == 0xffff000000000000 (sync_regs)");
TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
@@ -127,13 +126,14 @@ static void assert_initial_noclear(void)
TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
}
-static void assert_initial(void)
+static void assert_initial(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_fpu fpu;
/* KVM_GET_SREGS */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
"cr14 == 0xC2000000 (KVM_GET_SREGS)");
@@ -156,36 +156,38 @@ static void assert_initial(void)
TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
/* kvm_run */
- TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
- TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!fpu.fpc, "fpc == 0");
- test_one_reg(KVM_REG_S390_GBEA, 1);
- test_one_reg(KVM_REG_S390_PP, 0);
- test_one_reg(KVM_REG_S390_TODPR, 0);
- test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
- test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+ test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
}
-static void assert_normal_noclear(void)
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
}
-static void assert_normal(void)
+static void assert_normal(struct kvm_vcpu *vcpu)
{
- test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
- TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+ test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
"pft == 0xff..... (sync_regs)");
- assert_noirq();
+ assert_noirq(vcpu);
}
-static void inject_irq(int cpu_id)
+static void inject_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
struct kvm_s390_irq *irq = &buf[0];
@@ -195,85 +197,117 @@ static void inject_irq(int cpu_id)
irq_state.len = sizeof(struct kvm_s390_irq);
irq_state.buf = (unsigned long)buf;
irq->type = KVM_S390_INT_EMERGENCY;
- irq->u.emerg.code = cpu_id;
- irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
- TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+ irq->u.emerg.code = vcpu->id;
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
+ TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
+}
+
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+ return vm;
}
static void test_normal(void)
{
- pr_info("Testing normal reset\n");
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing normal reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
/* must clears */
- assert_normal();
+ assert_normal(vcpu);
/* must not clears */
- assert_normal_noclear();
- assert_initial_noclear();
+ assert_normal_noclear(vcpu);
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_initial(void)
{
- pr_info("Testing initial reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- vcpu_run(vm, VCPU_ID);
+ ksft_print_msg("Testing initial reset\n");
+ vm = create_vm(&vcpu);
- inject_irq(VCPU_ID);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
/* must not clears */
- assert_initial_noclear();
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_clear(void)
{
- pr_info("Testing clear reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing clear reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
- assert_clear();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
+ assert_clear(vcpu);
kvm_vm_free(vm);
}
+struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool needs_cap;
+} testlist[] = {
+ { "initial", test_initial, false },
+ { "normal", test_normal, true },
+ { "clear", test_clear, true },
+};
+
int main(int argc, char *argv[])
{
- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
-
- test_initial();
- if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
- test_normal();
- test_clear();
+ bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+ testlist[idx].name);
+ }
}
- return 0;
+
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
new file mode 100644
index 000000000000..bba0d9a6dcc8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+ /* Issue some storage key instruction. */
+ set_storage_key((void *)0, 0x98);
+ GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+ struct page_region region;
+ struct pm_scan_arg arg = {
+ .start = (uintptr_t)addr,
+ .end = (uintptr_t)addr + 4096,
+ .vec = (uintptr_t)&region,
+ .vec_len = 1,
+ .size = sizeof(struct pm_scan_arg),
+ .category_mask = PAGE_IS_PFNZERO,
+ .category_anyof_mask = PAGE_IS_PRESENT,
+ .return_mask = PAGE_IS_PFNZERO,
+ };
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+ char *mem, *page0, *page1, *page2, tmp;
+ const size_t pagesize = getpagesize();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int pagemap_fd;
+
+ ksft_print_header();
+ ksft_set_plan(3);
+
+ /*
+ * We'll use memory that is not mapped into the VM for simplicity.
+ * Shared zeropages are enabled/disabled per-process.
+ */
+ mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+ /* Disable THP. Ignore errors on older kernels. */
+ madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+ page0 = mem;
+ page1 = page0 + pagesize;
+ page2 = page1 + pagesize;
+
+ /* Can we even detect shared zeropages? */
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_REQUIRE(pagemap_fd >= 0);
+
+ tmp = *page0;
+ asm volatile("" : "+r" (tmp));
+ TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Verify that we get the shared zeropage after VM creation. */
+ tmp = *page1;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+ "Shared zeropages should be enabled\n");
+
+ /*
+ * Let our VM execute a storage key instruction that should
+ * unshare all shared zeropages.
+ */
+ vcpu_run(vcpu);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+ /* Verify that we don't have a shared zeropage anymore. */
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+ "Shared zeropage should be gone\n");
+
+ /* Verify that we don't get any new shared zeropages. */
+ tmp = *page2;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+ "Shared zeropages should be disabled\n");
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c
index caf7b8859a94..53def355ccba 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c
@@ -10,8 +10,6 @@
*
* Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -21,8 +19,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "diag318_test_handler.h"
-
-#define VCPU_ID 5
+#include "kselftest.h"
static void guest_code(void)
{
@@ -40,13 +37,13 @@ static void guest_code(void)
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
#define REG_COMPARE32(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%x, 0x%x\n", \
+ " values did not match: 0x%x, 0x%x", \
left->reg, right->reg)
@@ -74,78 +71,80 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
#define INVALID_SYNC_FIELD 0x80000000
-int main(int argc, char *argv[])
+void test_read_invalid(struct kvm_vcpu *vcpu)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- int rv, cap;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if (!cap) {
- print_skip("CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
- }
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 &&
(run->s390_sieic.ipb >> 16) == 0x501,
- "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+ "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
run->s390_sieic.icptcode, run->s390_sieic.ipa,
run->s390_sieic.ipb);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Set and verify various register values */
run->s.regs.gprs[11] = 0xBAD1DEA;
@@ -159,12 +158,9 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
}
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
@@ -175,11 +171,17 @@ int main(int argc, char *argv[])
"diag318 sync regs value incorrect 0x%llx.",
run->s.regs.diag318);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
@@ -188,20 +190,49 @@ int main(int argc, char *argv[])
run->kvm_dirty_regs = 0;
run->s.regs.gprs[11] = 0xDEADBEEF;
run->s.regs.diag318 = 0x4B1D;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
"diag318 sync regs value incorrect 0x%llx.",
run->s.regs.diag318);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+ { "read invalid", test_read_invalid },
+ { "set invalid", test_set_invalid },
+ { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+ { "set+verify various regs", test_set_and_verify_various_reg_values },
+ { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test(vcpu);
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
kvm_vm_free(vm);
- return 0;
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c
new file mode 100644
index 000000000000..12d5e1cb62e3
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/tprot.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+ int not_mapped = 0;
+
+ asm volatile (
+ "lra %[addr], 0(0,%[addr])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[addr]\n"
+ "1:"
+ : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ return -not_mapped;
+}
+
+enum permission {
+ READ_WRITE = 0,
+ READ = 1,
+ RW_PROTECTED = 2,
+ TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+ uint64_t mask;
+
+ asm volatile (
+ "tprot %[addr], 0(%[key])\n"
+ " ipm %[mask]\n"
+ : [mask] "=r" (mask)
+ : [addr] "Q" (*(char *)addr),
+ [key] "a" (key)
+ : "cc"
+ );
+
+ return (enum permission)(mask >> 28);
+}
+
+enum stage {
+ STAGE_INIT_SIMPLE,
+ TEST_SIMPLE,
+ STAGE_INIT_FETCH_PROT_OVERRIDE,
+ TEST_FETCH_PROT_OVERRIDE,
+ TEST_STORAGE_PROT_OVERRIDE,
+ STAGE_END /* must be the last entry (it's the amount of tests) */
+};
+
+struct test {
+ enum stage stage;
+ void *addr;
+ uint8_t key;
+ enum permission expected;
+} tests[] = {
+ /*
+ * We perform each test in the array by executing TEST PROTECTION on
+ * the specified addr with the specified key and checking if the returned
+ * permissions match the expected value.
+ * Both guest and host cooperate to set up the required test conditions.
+ * A central condition is that the page targeted by addr has to be DAT
+ * protected in the host mappings, in order for KVM to emulate the
+ * TEST PROTECTION instruction.
+ * Since the page tables are shared, the host uses mprotect to achieve
+ * this.
+ *
+ * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+ * by SIE, not KVM, but there is no harm in testing them also.
+ * See Enhanced Suppression-on-Protection Facilities in the
+ * Interpretive-Execution Mode
+ */
+ /*
+ * guest: set storage key of page_store_prot to 1
+ * storage key of page_fetch_prot to 9 and enable
+ * protection for it
+ * STAGE_INIT_SIMPLE
+ * host: write protect both via mprotect
+ */
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+ /* mismatched keys, but no fetch protection -> RO */
+ { TEST_SIMPLE, page_store_prot, 0x20, READ },
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+ /* mismatched keys, fetch protection -> inaccessible */
+ { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+ /* page 0 not mapped yet -> translation not available */
+ { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+ /*
+ * host: try to map page 0
+ * guest: set storage key of page 0 to 9 and enable fetch protection
+ * STAGE_INIT_FETCH_PROT_OVERRIDE
+ * host: write protect page 0
+ * enable fetch protection override
+ */
+ /* mismatched keys, fetch protection, but override applies -> RO */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+ /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+ /*
+ * host: enable storage protection override
+ */
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+ /* mismatched keys, no fetch protection, override doesn't apply -> RO */
+ { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+ /* end marker */
+ { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+ enum stage stage = tests[*i].stage;
+ enum permission result;
+ bool skip;
+
+ for (; tests[*i].stage == stage; (*i)++) {
+ /*
+ * Some fetch protection override tests require that page 0
+ * be mapped, however, when the hosts tries to map that page via
+ * vm_vaddr_alloc, it may happen that some other page gets mapped
+ * instead.
+ * In order to skip these tests we detect this inside the guest
+ */
+ skip = tests[*i].addr < (void *)PAGE_SIZE &&
+ tests[*i].expected != TRANSL_UNAVAIL &&
+ !mapped_0;
+ if (!skip) {
+ result = test_protection(tests[*i].addr, tests[*i].key);
+ __GUEST_ASSERT(result == tests[*i].expected,
+ "Wanted %u, got %u, for i = %u",
+ tests[*i].expected, result, *i);
+ }
+ }
+ return stage;
+}
+
+static void guest_code(void)
+{
+ bool mapped_0;
+ int i = 0;
+
+ GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+ GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+ GUEST_SYNC(STAGE_INIT_SIMPLE);
+ GUEST_SYNC(perform_next_stage(&i, false));
+
+ /* Fetch-protection override */
+ mapped_0 = !set_storage_key((void *)0, 0x98);
+ GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+ /* Storage-protection override */
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC_NO_TAP(vcpup, stage) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpup); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) \
+ REPORT_GUEST_ASSERT(uc); \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+})
+
+#define HOST_SYNC(vcpu, stage) \
+({ \
+ HOST_SYNC_NO_TAP(vcpu, stage); \
+ ksft_test_result_pass("" #stage "\n"); \
+})
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ vm_vaddr_t guest_0_page;
+
+ ksft_print_header();
+ ksft_set_plan(STAGE_END);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+ HOST_SYNC(vcpu, TEST_SIMPLE);
+
+ guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ if (guest_0_page != 0) {
+ /* Use NO_TAP so we don't get a PASS print */
+ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+ "Did not allocate page at 0\n");
+ } else {
+ HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ }
+ if (guest_0_page == 0)
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
+
+ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
new file mode 100644
index 000000000000..50bc1c38225a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test code for the s390x kvm ucontrol interface
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ * Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+#include "debug_print.h"
+#include "kselftest_harness.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sie.h"
+
+#include <linux/capability.h>
+#include <linux/sizes.h>
+
+#define PGM_SEGMENT_TRANSLATION 0x10
+
+#define VM_MEM_SIZE (4 * SZ_1M)
+#define VM_MEM_EXT_SIZE (2 * SZ_1M)
+#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
+
+/* so directly declare capget to check caps without libcap */
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/**
+ * In order to create user controlled virtual machines on S390,
+ * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
+ * as privileged user (SYS_ADMIN).
+ */
+void require_ucontrol_admin(void)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ int rc;
+
+ rc = capget(&hdr, data);
+ TEST_ASSERT_EQ(0, rc);
+ TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+}
+
+/* Test program setting some registers and looping */
+extern char test_gprs_asm[];
+asm("test_gprs_asm:\n"
+ "xgr %r0, %r0\n"
+ "lgfi %r1,1\n"
+ "lgfi %r2,2\n"
+ "lgfi %r3,3\n"
+ "lgfi %r4,4\n"
+ "lgfi %r5,5\n"
+ "lgfi %r6,6\n"
+ "lgfi %r7,7\n"
+ "0:\n"
+ " diag 0,0,0x44\n"
+ " ahi %r0,1\n"
+ " j 0b\n"
+);
+
+/* Test program manipulating memory */
+extern char test_mem_asm[];
+asm("test_mem_asm:\n"
+ "xgr %r0, %r0\n"
+
+ "0:\n"
+ " ahi %r0,1\n"
+ " st %r1,0(%r5,%r6)\n"
+
+ " xgr %r1,%r1\n"
+ " l %r1,0(%r5,%r6)\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " j 0b\n"
+);
+
+/* Test program manipulating storage keys */
+extern char test_skey_asm[];
+asm("test_skey_asm:\n"
+ "xgr %r0, %r0\n"
+
+ "0:\n"
+ " ahi %r0,1\n"
+ " st %r1,0(%r5,%r6)\n"
+
+ " sske %r1,%r6\n"
+ " xgr %r1,%r1\n"
+ " iske %r1,%r6\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " rrbe %r1,%r6\n"
+ " iske %r1,%r6\n"
+ " ahi %r0,1\n"
+ " diag 0,0,0x44\n"
+
+ " j 0b\n"
+);
+
+FIXTURE(uc_kvm)
+{
+ struct kvm_s390_sie_block *sie_block;
+ struct kvm_run *run;
+ uintptr_t base_gpa;
+ uintptr_t code_gpa;
+ uintptr_t base_hva;
+ uintptr_t code_hva;
+ int kvm_run_size;
+ vm_paddr_t pgd;
+ void *vm_mem;
+ int vcpu_fd;
+ int kvm_fd;
+ int vm_fd;
+};
+
+/**
+ * create VM with single vcpu, map kvm_run and SIE control block for easy access
+ */
+FIXTURE_SETUP(uc_kvm)
+{
+ struct kvm_s390_vm_cpu_processor info;
+ int rc;
+
+ require_ucontrol_admin();
+
+ self->kvm_fd = open_kvm_dev_path_or_exit();
+ self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(self->vm_fd, 0);
+
+ kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ TH_LOG("create VM 0x%llx", info.cpuid);
+
+ self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(self->vcpu_fd, 0);
+
+ self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
+ TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
+ /**
+ * For virtual cpus that have been created with S390 user controlled
+ * virtual machines, the resulting vcpu fd can be memory mapped at page
+ * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
+ * the virtual cpu's hardware control block.
+ */
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
+
+ TH_LOG("VM created %p %p", self->run, self->sie_block);
+
+ self->base_gpa = 0;
+ self->code_gpa = self->base_gpa + (3 * SZ_1M);
+
+ self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
+ ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
+ self->base_hva = (uintptr_t)self->vm_mem;
+ self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = self->base_hva,
+ .vcpu_addr = self->base_gpa,
+ .length = VM_MEM_SIZE,
+ };
+ TH_LOG("ucas map %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+ ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
+ rc, strerror(errno));
+
+ TH_LOG("page in %p", (void *)self->base_gpa);
+ rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
+ ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
+ (void *)self->base_hva, rc, strerror(errno));
+
+ self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
+}
+
+FIXTURE_TEARDOWN(uc_kvm)
+{
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
+ close(self->vcpu_fd);
+ close(self->vm_fd);
+ close(self->kvm_fd);
+ free(self->vm_mem);
+}
+
+TEST_F(uc_kvm, uc_sie_assertions)
+{
+ /* assert interception of Code 08 (Program Interruption) is set */
+ EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
+}
+
+TEST_F(uc_kvm, uc_attr_mem_limit)
+{
+ u64 limit;
+ struct kvm_device_attr attr = {
+ .group = KVM_S390_VM_MEM_CTRL,
+ .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
+ .addr = (u64)&limit,
+ };
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr);
+ EXPECT_EQ(0, rc);
+
+ rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(0, rc);
+ EXPECT_EQ(~0UL, limit);
+
+ /* assert set not supported */
+ rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_no_dirty_log)
+{
+ struct kvm_dirty_log dlog;
+ int rc;
+
+ rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+/**
+ * Assert HPAGE CAP cannot be enabled on UCONTROL VM
+ */
+TEST(uc_cap_hpage)
+{
+ int rc, kvm_fd, vm_fd, vcpu_fd;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_S390_HPAGE_1M,
+ };
+
+ require_ucontrol_admin();
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+ ASSERT_GE(vm_fd, 0);
+
+ /* assert hpages are not supported on ucontrol vm */
+ rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
+ EXPECT_EQ(0, rc);
+
+ /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* assert HPAGE CAP is rejected after vCPU creation */
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ ASSERT_GE(vcpu_fd, 0);
+ rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+ EXPECT_EQ(-1, rc);
+ EXPECT_EQ(EBUSY, errno);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/* calculate host virtual addr from guest physical addr */
+static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
+{
+ return (void *)(self->base_hva - self->base_gpa + gpa);
+}
+
+/* map / make additional memory available */
+static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = (u64)gpa2hva(self, vcpu_addr),
+ .vcpu_addr = vcpu_addr,
+ .length = length,
+ };
+ pr_info("ucas map %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+}
+
+/* unmap previously mapped memory */
+static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+ struct kvm_s390_ucas_mapping map = {
+ .user_addr = (u64)gpa2hva(self, vcpu_addr),
+ .vcpu_addr = vcpu_addr,
+ .length = length,
+ };
+ pr_info("ucas unmap %p %p 0x%llx",
+ (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+ return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
+}
+
+/* handle ucontrol exit by mapping the accessed segment */
+static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_run *run = self->run;
+ u64 seg_addr;
+ int rc;
+
+ TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+ switch (run->s390_ucontrol.pgm_code) {
+ case PGM_SEGMENT_TRANSLATION:
+ seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
+ pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
+ run->s390_ucontrol.trans_exc_code, seg_addr);
+ /* map / make additional memory available */
+ rc = uc_map_ext(self, seg_addr, SZ_1M);
+ TEST_ASSERT_EQ(0, rc);
+ break;
+ default:
+ TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
+ }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+ /* disable KSS */
+ sie_block->cpuflags &= ~CPUSTAT_KSS;
+ /* disable skey inst interception */
+ sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+/*
+ * Handle the instruction intercept
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ int ilen = insn_length(sie_block->ipa >> 8);
+ struct kvm_run *run = self->run;
+
+ switch (run->s390_sieic.ipa) {
+ case 0xB229: /* ISKE */
+ case 0xB22b: /* SSKE */
+ case 0xB22a: /* RRBE */
+ uc_skey_enable(self);
+
+ /* rewind to reexecute intercepted instruction */
+ run->psw_addr = run->psw_addr - ilen;
+ pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ struct kvm_run *run = self->run;
+
+ /* check SIE interception code */
+ pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
+ run->s390_sieic.icptcode,
+ run->s390_sieic.ipa,
+ run->s390_sieic.ipb);
+ switch (run->s390_sieic.icptcode) {
+ case ICPT_INST:
+ /* end execution in caller on intercepted instruction */
+ pr_info("sie instruction interception\n");
+ return uc_handle_insn_ic(self);
+ case ICPT_KSS:
+ uc_skey_enable(self);
+ return true;
+ case ICPT_OPEREXC:
+ /* operation exception */
+ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
+ default:
+ TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
+ }
+ return true;
+}
+
+/* verify VM state on exit */
+static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_run *run = self->run;
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_S390_UCONTROL:
+ /** check program interruption code
+ * handle page fault --> ucas map
+ */
+ uc_handle_exit_ucontrol(self);
+ break;
+ case KVM_EXIT_S390_SIEIC:
+ return uc_handle_sieic(self);
+ default:
+ pr_info("exit_reason %2d not handled\n", run->exit_reason);
+ }
+ return true;
+}
+
+/* run the VM until interrupted */
+static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
+{
+ int rc;
+
+ rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
+ print_run(self->run, self->sie_block);
+ print_regs(self->run);
+ pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
+ return rc;
+}
+
+static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+ /* assert vm was interrupted by diag 0x0044 */
+ TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+ TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+ TEST_ASSERT_EQ(0x8300, sie_block->ipa);
+ TEST_ASSERT_EQ(0x440000, sie_block->ipb);
+}
+
+TEST_F(uc_kvm, uc_no_user_region)
+{
+ struct kvm_userspace_memory_region region = {
+ .slot = 1,
+ .guest_phys_addr = self->code_gpa,
+ .memory_size = VM_MEM_EXT_SIZE,
+ .userspace_addr = (uintptr_t)self->code_hva,
+ };
+ struct kvm_userspace_memory_region2 region2 = {
+ .slot = 1,
+ .guest_phys_addr = self->code_gpa,
+ .memory_size = VM_MEM_EXT_SIZE,
+ .userspace_addr = (uintptr_t)self->code_hva,
+ };
+
+ ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION",
+ strerror(errno), errno);
+
+ ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2",
+ strerror(errno), errno);
+}
+
+TEST_F(uc_kvm, uc_map_unmap)
+{
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ struct kvm_run *run = self->run;
+ const u64 disp = 1;
+ int rc;
+
+ /* copy test_mem_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ /* set register content for test_mem_asm to access not mapped memory*/
+ sync_regs->gprs[1] = 0x55;
+ sync_regs->gprs[5] = self->base_gpa;
+ sync_regs->gprs[6] = VM_MEM_SIZE + disp;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* run and expect to fail with ucontrol pic segment translation */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+ ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+
+ ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+ ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
+
+ /* fail to map memory with not segment aligned address */
+ rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
+ ASSERT_GT(0, rc)
+ TH_LOG("ucas map for non segment address should fail but didn't; "
+ "result %d not expected, %s", rc, strerror(errno));
+
+ /* map / make additional memory available */
+ rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+ ASSERT_EQ(0, rc)
+ TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* assert registers and memory are in expected state */
+ ASSERT_EQ(2, sync_regs->gprs[0]);
+ ASSERT_EQ(0x55, sync_regs->gprs[1]);
+ ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
+
+ /* unmap and run loop again */
+ rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+ ASSERT_EQ(0, rc)
+ TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(3, sync_regs->gprs[0]);
+ ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+ ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+ /* handle ucontrol exit and remap memory after previous map and unmap */
+ ASSERT_EQ(true, uc_handle_exit(self));
+}
+
+TEST_F(uc_kvm, uc_gprs)
+{
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ struct kvm_run *run = self->run;
+ struct kvm_regs regs = {};
+
+ /* Set registers to values that are different from the ones that we expect below */
+ for (int i = 0; i < 8; i++)
+ sync_regs->gprs[i] = 8;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* copy test_gprs_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ /* run and expect interception of diag 44 */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* Retrieve and check guest register values */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+ for (int i = 0; i < 8; i++) {
+ ASSERT_EQ(i, regs.gprs[i]);
+ ASSERT_EQ(i, sync_regs->gprs[i]);
+ }
+
+ /* run and expect interception of diag 44 again */
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ uc_assert_diag44(self);
+
+ /* check continued increment of register 0 value */
+ ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+ ASSERT_EQ(1, regs.gprs[0]);
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+}
+
+TEST_F(uc_kvm, uc_skey)
+{
+ struct kvm_s390_sie_block *sie_block = self->sie_block;
+ struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+ u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
+ struct kvm_run *run = self->run;
+ const u8 skeyvalue = 0x34;
+
+ /* copy test_skey_asm to code_hva / code_gpa */
+ TH_LOG("copy code %p to vm mapped memory %p / %p",
+ &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
+ memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
+
+ /* set register content for test_skey_asm to access not mapped memory */
+ sync_regs->gprs[1] = skeyvalue;
+ sync_regs->gprs[5] = self->base_gpa;
+ sync_regs->gprs[6] = test_vaddr;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+ /* DAT disabled + 64 bit mode */
+ run->psw_mask = 0x0000000180000000ULL;
+ run->psw_addr = self->code_gpa;
+
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(true, uc_handle_exit(self));
+ ASSERT_EQ(1, sync_regs->gprs[0]);
+
+ /* SSKE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+ ASSERT_EQ(0, uc_run_once(self));
+
+ /*
+ * Bail out and skip the test after uc_skey_enable was executed but iske
+ * is still intercepted. Instructions are not handled by the kernel.
+ * Thus there is no need to test this here.
+ */
+ TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
+ TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
+ TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+ TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+ TEST_REQUIRE(sie_block->ipa != 0xb22b);
+
+ /* SSKE + ISKE contd. */
+ ASSERT_EQ(false, uc_handle_exit(self));
+ ASSERT_EQ(2, sync_regs->gprs[0]);
+ ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
+ uc_assert_diag44(self);
+
+ /* RRBE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+ ASSERT_EQ(0, uc_run_once(self));
+ ASSERT_EQ(false, uc_handle_exit(self));
+ ASSERT_EQ(3, sync_regs->gprs[0]);
+ /* assert R reset but rest of skey unchanged */
+ ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
+ ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
+ uc_assert_diag44(self);
+}
+
+static char uc_flic_b[PAGE_SIZE];
+static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 };
+static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 };
+static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 };
+static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 };
+static struct uc_flic_attr_test {
+ char *name;
+ struct kvm_device_attr a;
+ int hasrc;
+ int geterrno;
+ int seterrno;
+} uc_flic_attr_tests[] = {
+ {
+ .name = "KVM_DEV_FLIC_GET_ALL_IRQS",
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_GET_ALL_IRQS,
+ .addr = (u64)&uc_flic_b,
+ .attr = PAGE_SIZE,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ENQUEUE",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_ENQUEUE, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_CLEAR_IRQS",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ADAPTER_REGISTER",
+ .geterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_ADAPTER_REGISTER,
+ .addr = (u64)&uc_flic_ioa,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ADAPTER_MODIFY",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_ADAPTER_MODIFY,
+ .addr = (u64)&uc_flic_ioam,
+ .attr = sizeof(uc_flic_ioam),
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_CLEAR_IO_IRQ,
+ .attr = 32,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AISM",
+ .geterrno = EINVAL,
+ .seterrno = ENOTSUP,
+ .a = {
+ .group = KVM_DEV_FLIC_AISM,
+ .addr = (u64)&uc_flic_asim,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AIRQ_INJECT",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AISM_ALL",
+ .geterrno = ENOTSUP,
+ .seterrno = ENOTSUP,
+ .a = {
+ .group = KVM_DEV_FLIC_AISM_ALL,
+ .addr = (u64)&uc_flic_asima,
+ .attr = sizeof(uc_flic_asima),
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_APF_ENABLE",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_APF_ENABLE, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, },
+ },
+};
+
+TEST_F(uc_kvm, uc_flic_attrs)
+{
+ struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC };
+ struct kvm_device_attr attr;
+ u64 value;
+ int rc, i;
+
+ rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd);
+ ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)",
+ strerror(errno), errno);
+
+ for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) {
+ TH_LOG("test %s", uc_flic_attr_tests[i].name);
+ attr = (struct kvm_device_attr) {
+ .group = uc_flic_attr_tests[i].a.group,
+ .attr = uc_flic_attr_tests[i].a.attr,
+ .addr = uc_flic_attr_tests[i].a.addr,
+ };
+ if (attr.addr == 0)
+ attr.addr = (u64)&value;
+
+ rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr);
+ EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc)
+ TH_LOG("expected dev attr missing %s",
+ uc_flic_attr_tests[i].name);
+
+ rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc)
+ TH_LOG("get dev attr rc not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ if (uc_flic_attr_tests[i].geterrno)
+ EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno)
+ TH_LOG("get dev attr errno not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+
+ rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc)
+ TH_LOG("set sev attr rc not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ if (uc_flic_attr_tests[i].seterrno)
+ EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno)
+ TH_LOG("set dev attr errno not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ }
+
+ close(cd.fd);
+}
+
+TEST_F(uc_kvm, uc_set_gsi_routing)
+{
+ struct kvm_irq_routing *routing = kvm_gsi_routing_create();
+ struct kvm_irq_routing_entry ue = {
+ .type = KVM_IRQ_ROUTING_S390_ADAPTER,
+ .gsi = 1,
+ .u.adapter = (struct kvm_irq_routing_s390_adapter) {
+ .ind_addr = 0,
+ },
+ };
+ int rc;
+
+ routing->entries[0] = ue;
+ routing->nr = 1;
+ rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing);
+ ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno);
+ ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
deleted file mode 100644
index 9f49ead380ab..000000000000
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x KVM_S390_MEM_OP
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-
-#define VCPU_ID 1
-
-static uint8_t mem1[65536];
-static uint8_t mem2[65536];
-
-static void guest_code(void)
-{
- int i;
-
- for (;;) {
- for (i = 0; i < sizeof(mem2); i++)
- mem2[i] = mem1[i];
- GUEST_SYNC(0);
- }
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_s390_mem_op ksmo;
- int rv, i, maxsize;
-
- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
-
- maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
- if (!maxsize) {
- print_skip("CAP_S390_MEM_OP not supported");
- exit(KSFT_SKIP);
- }
- if (maxsize > sizeof(mem1))
- maxsize = sizeof(mem1);
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
-
- for (i = 0; i < sizeof(mem1); i++)
- mem1[i] = i * i + i;
-
- /* Set the first array */
- ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
-
- /* Let the guest code copy the first array to the second */
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- memset(mem2, 0xaa, sizeof(mem2));
-
- /* Get the second array */
- ksmo.gaddr = (uintptr_t)mem2;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
- ksmo.buf = (uintptr_t)mem2;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
-
- TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
- "Memory contents do not match!");
-
- /* Check error conditions - first bad size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = -1;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
-
- /* Zero size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = 0;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
- "ioctl allows 0 as size");
-
- /* Bad flags: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = -1;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
-
- /* Bad operation: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = -1;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
- /* Bad guest address: */
- ksmo.gaddr = ~0xfffUL;
- ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
-
- /* Bad host address: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = 0;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == EFAULT,
- "ioctl does not report bad host memory address");
-
- /* Bad access register: */
- run->psw_mask &= ~(3UL << (63 - 17));
- run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
- vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 17;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
- run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
- vcpu_run(vm, VCPU_ID); /* Run to sync new state */
-
- kvm_vm_free(vm);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index f127ed31dba7..7fe427ff9b38 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
@@ -17,12 +16,10 @@
#include <kvm_util.h>
#include <processor.h>
-#define VCPU_ID 0
-
/*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
*/
#define MEM_REGION_SIZE 0x200000
@@ -54,8 +51,8 @@ static inline uint64_t guest_spin_on_val(uint64_t spin_val)
static void *vcpu_worker(void *data)
{
- struct kvm_vm *vm = data;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = data;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
uint64_t cmd;
@@ -64,13 +61,11 @@ static void *vcpu_worker(void *data)
* which will occur if the guest attempts to access a memslot after it
* has been deleted or while it is being moved .
*/
- run = vcpu_state(vm, VCPU_ID);
-
while (1) {
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_IO) {
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ cmd = get_ucall(vcpu, &uc);
if (cmd != UCALL_SYNC)
break;
@@ -92,8 +87,7 @@ static void *vcpu_worker(void *data)
}
if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
- TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2]);
+ REPORT_GUEST_ASSERT(uc);
return NULL;
}
@@ -103,23 +97,24 @@ static void wait_for_vcpu(void)
struct timespec ts;
TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
- "clock_gettime() failed: %d\n", errno);
+ "clock_gettime() failed: %d", errno);
ts.tv_sec += 2;
TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
- "sem_timedwait() failed: %d\n", errno);
+ "sem_timedwait() failed: %d", errno);
/* Wait for the vCPU thread to reenter the guest. */
usleep(100000);
}
-static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
+ void *guest_code)
{
struct kvm_vm *vm;
uint64_t *hva;
uint64_t gpa;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
MEM_REGION_GPA, MEM_REGION_SLOT,
@@ -132,13 +127,13 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
/* Ditto for the host mapping so that both pages can be zeroed. */
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, 2 * 4096);
- pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+ pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu);
/* Ensure the guest thread is spun up. */
wait_for_vcpu();
@@ -160,30 +155,37 @@ static void guest_code_move_memory_region(void)
* window where the memslot is invalid is usually quite small.
*/
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
/* Spin until the misaligning memory region move completes. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 1 || val == 0, val);
+ __GUEST_ASSERT(val == 1 || val == 0,
+ "Expected '0' or '1' (no MMIO), got '%lx'", val);
/* Spin until the memory region starts to get re-aligned. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
/* Spin until the re-aligning memory region move completes. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 1, val);
+ GUEST_ASSERT_EQ(val, 1);
GUEST_DONE();
}
-static void test_move_memory_region(void)
+static void test_move_memory_region(bool disable_slot_zap_quirk)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t *hva;
- vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
+
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -221,21 +223,33 @@ static void test_move_memory_region(void)
static void guest_code_delete_memory_region(void)
{
+ struct desc_ptr idt;
uint64_t val;
+ /*
+ * Clobber the IDT so that a #PF due to the memory region being deleted
+ * escalates to triple-fault shutdown. Because the memory region is
+ * deleted, there will be no valid mappings. As a result, KVM will
+ * repeatedly intercepts the state-2 page fault that occurs when trying
+ * to vector the guest's #PF. I.e. trying to actually handle the #PF
+ * in the guest will never succeed, and so isn't an option.
+ */
+ memset(&idt, 0, sizeof(idt));
+ set_idt(&idt);
+
GUEST_SYNC(0);
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
/* Spin until the memory region is recreated. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 0, val);
+ GUEST_ASSERT_EQ(val, 0);
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
asm("1:\n\t"
".pushsection .rodata\n\t"
@@ -252,17 +266,21 @@ static void guest_code_delete_memory_region(void)
"final_rip_end: .quad 1b\n\t"
".popsection");
- GUEST_ASSERT_1(0, 0);
+ GUEST_ASSERT(0);
}
-static void test_delete_memory_region(void)
+static void test_delete_memory_region(bool disable_slot_zap_quirk)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_run *run;
struct kvm_vm *vm;
- vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region);
+
+ if (disable_slot_zap_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
/* Delete the memory region, the guest should not die. */
vm_mem_region_delete(vm, MEM_REGION_SLOT);
@@ -286,13 +304,13 @@ static void test_delete_memory_region(void)
pthread_join(vcpu_thread, NULL);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
"Unexpected exit reason = %d", run->exit_reason);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
/*
* On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
@@ -301,7 +319,7 @@ static void test_delete_memory_region(void)
if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
TEST_ASSERT(regs.rip >= final_rip_start &&
regs.rip < final_rip_end,
- "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+ "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx",
final_rip_start, final_rip_end, regs.rip);
kvm_vm_free(vm);
@@ -309,26 +327,81 @@ static void test_delete_memory_region(void)
static void test_zero_memory_regions(void)
{
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
pr_info("Testing KVM_RUN with zero added memory regions\n");
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
-
- TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
- "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
- vcpu_run(vm, VCPU_ID);
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
- run = vcpu_state(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
- "Unexpected exit_reason = %u\n", run->exit_reason);
+ vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
kvm_vm_free(vm);
}
#endif /* __x86_64__ */
+static void test_invalid_memory_region_flags(void)
+{
+ uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES;
+ const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD;
+ struct kvm_vm *vm;
+ int r, i;
+
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
+ supported_flags |= KVM_MEM_READONLY;
+#endif
+
+#ifdef __x86_64__
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
+ else
+#endif
+ vm = vm_create_barebones();
+
+ if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE)
+ supported_flags |= KVM_MEM_GUEST_MEMFD;
+
+ for (i = 0; i < 32; i++) {
+ if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i)))
+ continue;
+
+ r = __vm_set_user_memory_region(vm, 0, BIT(i),
+ 0, MEM_REGION_SIZE, NULL);
+
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i));
+
+ if (supported_flags & BIT(i))
+ continue;
+
+ r = __vm_set_user_memory_region2(vm, 0, BIT(i),
+ 0, MEM_REGION_SIZE, NULL, 0, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i));
+ }
+
+ if (supported_flags & KVM_MEM_GUEST_MEMFD) {
+ int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+
+ r = __vm_set_user_memory_region2(vm, 0,
+ KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD,
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported");
+
+ r = __vm_set_user_memory_region2(vm, 0,
+ KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD,
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported");
+
+ close(guest_memfd);
+ }
+}
+
/*
* Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
* tentative to add further slots should fail.
@@ -339,76 +412,243 @@ static void test_add_max_memory_regions(void)
struct kvm_vm *vm;
uint32_t max_mem_slots;
uint32_t slot;
- uint64_t guest_addr = 0x0;
- uint64_t mem_reg_npages;
- void *mem;
+ void *mem, *mem_aligned, *mem_extra;
+ size_t alignment;
+
+#ifdef __s390x__
+ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
+ alignment = 0x100000;
+#else
+ alignment = 1;
+#endif
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
TEST_ASSERT(max_mem_slots > 0,
"KVM_CAP_NR_MEMSLOTS should be greater than 0");
pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
-
- mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+ vm = vm_create_barebones();
/* Check it can be added memory slots up to the maximum allowed */
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- for (slot = 0; slot < max_mem_slots; slot++) {
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_addr, slot, mem_reg_npages,
- 0);
- guest_addr += MEM_REGION_SIZE;
- }
+
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
+ mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
+
+ for (slot = 0; slot < max_mem_slots; slot++)
+ vm_set_user_memory_region(vm, slot, 0,
+ ((uint64_t)slot * MEM_REGION_SIZE),
+ MEM_REGION_SIZE,
+ mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
- ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
- &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
- MEM_REGION_SIZE, (uint64_t) mem});
+ ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
+ (uint64_t)max_mem_slots * MEM_REGION_SIZE,
+ MEM_REGION_SIZE, mem_extra);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_vm_free(vm);
+}
+
+
+#ifdef __x86_64__
+static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd,
+ size_t offset, const char *msg)
+{
+ int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE,
+ 0, memfd, offset);
+ TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg);
+}
+
+static void test_add_private_memory_region(void)
+{
+ struct kvm_vm *vm, *vm2;
+ int memfd, i;
+
+ pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n");
+
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
+
+ test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail");
+ test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail");
+
+ memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false);
+ test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail");
+ close(memfd);
+
+ vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
+ memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0);
+ test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail");
+
+ vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
+ close(memfd);
+ kvm_vm_free(vm2);
+
+ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+ for (i = 1; i < PAGE_SIZE; i++)
+ test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail");
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
+ close(memfd);
+
+ kvm_vm_free(vm);
+}
+
+static void test_add_overlapping_private_memory_regions(void)
+{
+ struct kvm_vm *vm;
+ int memfd;
+ int r;
+
+ pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n");
+
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
+
+ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0);
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2,
+ 0, memfd, MEM_REGION_SIZE * 2);
+
+ /*
+ * Delete the first memslot, and then attempt to recreate it except
+ * with a "bad" offset that results in overlap in the guest_memfd().
+ */
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, 0, NULL, -1, 0);
+
+ /* Overlap the front half of the other slot. */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2 - MEM_REGION_SIZE,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, 0);
+ TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ /* And now the back half of the other slot. */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2 + MEM_REGION_SIZE,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, 0);
+ TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ close(memfd);
+ kvm_vm_free(vm);
+}
+
+static void guest_code_mmio_during_vectoring(void)
+{
+ const struct desc_ptr idt_desc = {
+ .address = MEM_REGION_GPA,
+ .size = 0xFFF,
+ };
+
+ set_idt(&idt_desc);
+
+ /* Generate a #GP by dereferencing a non-canonical address */
+ *((uint8_t *)NONCANONICAL) = 0x1;
+
+ GUEST_ASSERT(0);
+}
+
+/*
+ * This test points the IDT descriptor base to an MMIO address. It should cause
+ * a KVM internal error when an event occurs in the guest.
+ */
+static void test_mmio_during_vectoring(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ u64 expected_gpa;
+
+ pr_info("Testing MMIO during vectoring error handling\n");
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
+
+ run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV,
+ "Unexpected suberror = %d", vcpu->run->internal.suberror);
+ TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d",
+ run->internal.ndata);
+
+ /* The reported GPA should be IDT base + offset of the GP vector */
+ expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry);
+
+ TEST_ASSERT(run->internal.data[3] == expected_gpa,
+ "Unexpected GPA = %llx (expected %lx)",
+ vcpu->run->internal.data[3], expected_gpa);
+
kvm_vm_free(vm);
}
+#endif
int main(int argc, char *argv[])
{
#ifdef __x86_64__
int i, loops;
-#endif
+ int j, disable_slot_zap_quirk = 0;
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
-#ifdef __x86_64__
+ if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL)
+ disable_slot_zap_quirk = 1;
/*
* FIXME: the zero-memslot test fails on aarch64 and s390x because
* KVM_RUN fails with ENOEXEC or EFAULT.
*/
test_zero_memory_regions();
+ test_mmio_during_vectoring();
#endif
+ test_invalid_memory_region_flags();
+
test_add_max_memory_regions();
#ifdef __x86_64__
+ if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) &&
+ (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) {
+ test_add_private_memory_region();
+ test_add_overlapping_private_memory_regions();
+ } else {
+ pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n");
+ }
+
if (argc > 1)
- loops = atoi(argv[1]);
+ loops = atoi_positive("Number of iterations", argv[1]);
else
loops = 10;
- pr_info("Testing MOVE of in-use region, %d loops\n", loops);
- for (i = 0; i < loops; i++)
- test_move_memory_region();
+ for (j = 0; j <= disable_slot_zap_quirk; j++) {
+ pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n",
+ loops, j ? "disabled" : "enabled");
+ for (i = 0; i < loops; i++)
+ test_move_memory_region(!!j);
- pr_info("Testing DELETE of in-use region, %d loops\n", loops);
- for (i = 0; i < loops; i++)
- test_delete_memory_region();
+ pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n",
+ loops, j ? "disabled" : "enabled");
+ for (i = 0; i < loops; i++)
+ test_delete_memory_region(!!j);
+ }
#endif
return 0;
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c91..8edc1fca345b 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -4,23 +4,25 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE
#include <stdio.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
-#include <sys/syscall.h>
#include <asm/kvm.h>
+#ifdef __riscv
+#include "sbi.h"
+#else
#include <asm/kvm_para.h>
+#endif
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
-#define MIN_RUN_DELAY_NS 200000UL
static void *st_gva[NR_VCPUS];
static uint64_t guest_stolen_time[NR_VCPUS];
@@ -33,8 +35,8 @@ static uint64_t guest_stolen_time[NR_VCPUS];
static void check_status(struct kvm_steal_time *st)
{
GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
- GUEST_ASSERT(READ_ONCE(st->flags) == 0);
- GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
}
static void guest_code(int cpu)
@@ -42,7 +44,7 @@ static void guest_code(int cpu)
struct kvm_steal_time *st = st_gva[cpu];
uint32_t version;
- GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+ GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
memset(st, 0, sizeof(*st));
GUEST_SYNC(0);
@@ -60,49 +62,41 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
- int i;
-
- if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
- KVM_FEATURE_STEAL_TIME)) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
-
- for (i = 0; i < NR_VCPUS; ++i) {
- int ret;
+ return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME);
+}
- vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ int ret;
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
- vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
- }
+ vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
- int i;
-
- pr_info("VCPU%d:\n", vcpuid);
- pr_info(" steal: %lld\n", st->steal);
- pr_info(" version: %d\n", st->version);
- pr_info(" flags: %d\n", st->flags);
- pr_info(" preempted: %d\n", st->preempted);
- pr_info(" u8_pad: ");
- for (i = 0; i < 3; ++i)
- pr_info("%d", st->u8_pad[i]);
- pr_info("\n pad: ");
- for (i = 0; i < 11; ++i)
- pr_info("%d", st->pad[i]);
- pr_info("\n");
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+ ksft_print_msg(" u8_pad: %d %d %d\n",
+ st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]);
+ ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n",
+ st->pad[0], st->pad[1], st->pad[2], st->pad[3],
+ st->pad[4], st->pad[5], st->pad[6], st->pad[7],
+ st->pad[8], st->pad[9], st->pad[10]);
}
#elif defined(__aarch64__)
@@ -120,25 +114,18 @@ struct st_time {
uint64_t st_time;
};
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
{
- unsigned long ret;
+ struct arm_smccc_res res;
- asm volatile(
- "mov x0, %1\n"
- "mov x1, %2\n"
- "hvc #0\n"
- "mov %0, x0\n"
- : "=r" (ret) : "r" (func), "r" (arg) :
- "x0", "x1", "x2", "x3");
-
- return ret;
+ do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+ return res.a0;
}
static void check_status(struct st_time *st)
{
- GUEST_ASSERT(READ_ONCE(st->rev) == 0);
- GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
}
static void guest_code(int cpu)
@@ -147,15 +134,15 @@ static void guest_code(int cpu)
int64_t status;
status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_ST, 0);
- GUEST_ASSERT(status != -1);
- GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+ GUEST_ASSERT_NE(status, -1);
+ GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
st = (struct st_time *)status;
GUEST_SYNC(0);
@@ -169,70 +156,153 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
};
- int i, ret;
-
- ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
- if (ret != 0 && errno == ENXIO) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
- for (i = 0; i < NR_VCPUS; ++i) {
- uint64_t st_ipa;
+ return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+}
- vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t st_ipa;
+ int ret;
- dev.addr = (uint64_t)&st_ipa;
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&st_ipa,
+ };
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i];
- vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+ st_ipa = (ulong)st_gva[i] | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+ st_ipa = (ulong)st_gva[i];
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- }
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- pr_info("VCPU%d:\n", vcpuid);
- pr_info(" rev: %d\n", st->rev);
- pr_info(" attr: %d\n", st->attr);
- pr_info(" st_time: %ld\n", st->st_time);
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" rev: %d\n", st->rev);
+ ksft_print_msg(" attr: %d\n", st->attr);
+ ksft_print_msg(" st_time: %ld\n", st->st_time);
}
+#elif defined(__riscv)
+
+/* SBI STA shmem must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63)
+
+static vm_paddr_t st_gpa[NR_VCPUS];
+
+struct sta_struct {
+ uint32_t sequence;
+ uint32_t flags;
+ uint64_t steal;
+ uint8_t preempted;
+ uint8_t pad[47];
+} __packed;
+
+static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_STA, 0, lo, hi, flags, 0, 0, 0);
-static long get_run_delay(void)
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void check_status(struct sta_struct *st)
{
- char path[64];
- long val[2];
- FILE *fp;
+ GUEST_ASSERT(!(READ_ONCE(st->sequence) & 1));
+ GUEST_ASSERT(READ_ONCE(st->flags) == 0);
+ GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+}
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
+static void guest_code(int cpu)
+{
+ struct sta_struct *st = st_gva[cpu];
+ uint32_t sequence;
+ long out_val = 0;
+ bool probe;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_STA, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ sta_set_shmem(st_gpa[cpu], 0);
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ sequence = READ_ONCE(st->sequence);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(sequence < READ_ONCE(st->sequence));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
+{
+ uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
+ unsigned long enabled = vcpu_get_reg(vcpu, id);
+
+ TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result");
- return val[1];
+ return enabled;
}
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]);
+ sync_global_to_guest(vcpu->vm, st_gva[i]);
+ sync_global_to_guest(vcpu->vm, st_gpa[i]);
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+{
+ struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+ int i;
+
+ pr_info("VCPU%d:\n", vcpu_idx);
+ pr_info(" sequence: %d\n", st->sequence);
+ pr_info(" flags: %d\n", st->flags);
+ pr_info(" steal: %"PRIu64"\n", st->steal);
+ pr_info(" preempted: %d\n", st->preempted);
+ pr_info(" pad: ");
+ for (i = 0; i < 47; ++i)
+ pr_info("%d", st->pad[i]);
+ pr_info("\n");
+}
+
+#endif
+
static void *do_steal_time(void *arg)
{
struct timespec ts, stop;
@@ -249,29 +319,27 @@ static void *do_steal_time(void *arg)
return NULL;
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- vcpu_args_set(vm, vcpuid, 1, vcpuid);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
-
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
case UCALL_DONE:
break;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
int main(int ac, char **av)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct kvm_vm *vm;
pthread_attr_t attr;
pthread_t thread;
@@ -291,26 +359,27 @@ int main(int ac, char **av)
pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
- vm = vm_create_default(0, 0, guest_code);
+ /* Create a VM and an identity mapped memslot for the steal time structure */
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
- ucall_init(vm, NULL);
-
- /* Add the rest of the VCPUs */
- for (i = 1; i < NR_VCPUS; ++i)
- vm_vcpu_add_default(vm, i, guest_code);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
- steal_time_init(vm);
+ ksft_print_header();
+ TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
+ ksft_set_plan(NR_VCPUS);
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
+ steal_time_init(vcpus[i], i);
+
+ vcpu_args_set(vcpus[i], 1, i);
+
/* First VCPU run initializes steal-time */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
/* Second VCPU run, expect guest stolen time to be <= run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i];
run_delay = get_run_delay();
@@ -322,7 +391,7 @@ int main(int ac, char **av)
run_delay = get_run_delay();
pthread_create(&thread, &attr, do_steal_time, NULL);
do
- pthread_yield();
+ sched_yield();
while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
pthread_join(thread, NULL);
run_delay = get_run_delay() - run_delay;
@@ -331,7 +400,7 @@ int main(int ac, char **av)
MIN_RUN_DELAY_NS, run_delay);
/* Run VCPU again to confirm stolen time is consistent with run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i] - stolen_time;
TEST_ASSERT(stolen_time >= run_delay,
@@ -339,14 +408,15 @@ int main(int ac, char **av)
run_delay, stolen_time);
if (verbose) {
- pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
- guest_stolen_time[i], stolen_time);
- if (stolen_time == run_delay)
- pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
- pr_info("\n");
+ ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n",
+ i, guest_stolen_time[i], stolen_time,
+ stolen_time == run_delay ?
+ " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : "");
steal_time_dump(vm, i);
}
+ ksft_test_result_pass("vcpu%d\n", i);
}
- return 0;
+ /* Print results and exit() accordingly */
+ ksft_finished();
}
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
new file mode 100644
index 000000000000..513d421a9bff
--- /dev/null
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the system counter from userspace
+ */
+#include <asm/kvm_para.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#ifdef __x86_64__
+
+struct test_case {
+ uint64_t tsc_offset;
+};
+
+static struct test_case test_cases[] = {
+ { 0 },
+ { 180 * NSEC_PER_SEC },
+ { -180 * NSEC_PER_SEC },
+};
+
+static void check_preconditions(struct kvm_vcpu *vcpu)
+{
+ __TEST_REQUIRE(!__vcpu_has_device_attr(vcpu, KVM_VCPU_TSC_CTRL,
+ KVM_VCPU_TSC_OFFSET),
+ "KVM_VCPU_TSC_OFFSET not supported; skipping test");
+}
+
+static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test)
+{
+ vcpu_device_attr_set(vcpu, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET,
+ &test->tsc_offset);
+}
+
+static uint64_t guest_read_system_counter(struct test_case *test)
+{
+ return rdtsc();
+}
+
+static uint64_t host_read_guest_system_counter(struct test_case *test)
+{
+ return rdtsc() + test->tsc_offset;
+}
+
+#else /* __x86_64__ */
+
+#error test not implemented for this architecture!
+
+#endif
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *test = &test_cases[i];
+
+ GUEST_SYNC_CLOCK(i, guest_read_system_counter(test));
+ }
+}
+
+static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end)
+{
+ uint64_t obs = uc->args[2];
+
+ TEST_ASSERT(start <= obs && obs <= end,
+ "unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, start, end);
+
+ pr_info("system counter value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, start, end);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ uint64_t start, end;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *test = &test_cases[i];
+
+ setup_system_counter(vcpu, test);
+ start = host_read_guest_system_counter(test);
+ vcpu_run(vcpu);
+ end = host_read_guest_system_counter(test);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, start, end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall %ld",
+ get_ucall(vcpu, &uc));
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ check_preconditions(vcpu);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
new file mode 100644
index 000000000000..f4ce5a185a7d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX 1
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XSAVE_HDR_OFFSET 512
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static void check_xtile_info(void)
+{
+ GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+ xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+ PALETTE_TABLE_INDEX);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+ xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+ GUEST_ASSERT(xtile.max_names == 8);
+ xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+ GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xstate *xstate)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
+ this_cpu_has(X86_FEATURE_OSXSAVE));
+ check_xtile_info();
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /*
+ * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+ * the xcomp_bv.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /*
+ * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+ * remains the same even when amx tiledata is disabled by IA32_XFD.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ int xsave_restore_size;
+ vm_vaddr_t amx_cfg, tiledata, xstate;
+ struct ucall uc;
+ u32 amx_offset;
+ int ret;
+
+ /*
+ * Note, all off-by-default features must be enabled before anything
+ * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+ */
+ vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+ "KVM should enumerate max XSAVE size when XSAVE is supported");
+ xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+ vcpu_regs_get(vcpu, &regs1);
+
+ /* Register #NM handler */
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* XSAVE state for guest_code */
+ xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vcpu);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..8b15a13df939
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+ return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+ uint64_t data;
+ ssize_t rc;
+
+ rc = pread(msr_fd, &data, sizeof(data), msr);
+ TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+ return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ITERATIONS; i++)
+ GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_read_aperf_mperf();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+ * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+ */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+ guest_read_aperf_mperf();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(nested_test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(nested_test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+ uint64_t msr_val;
+ uint8_t vector;
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ uint64_t host_aperf_before, host_mperf_before;
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int msr_fd, cpu, i;
+
+ /* Sanity check that APERF/MPERF are unsupported by default. */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ kvm_vm_free(vm);
+
+ cpu = pin_self_to_any_cpu();
+
+ msr_fd = open_dev_msr(cpu);
+
+ /*
+ * This test requires a non-standard VM initialization, because
+ * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+ * a VCPU has been created.
+ */
+ vm = vm_create(1);
+
+ TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (!has_nested)
+ nested_test_data_gva = NONCANONICAL;
+ else if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+ uint64_t host_aperf_after, host_mperf_after;
+ uint64_t guest_aperf, guest_mperf;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ guest_aperf = uc.args[0];
+ guest_mperf = uc.args[1];
+
+ host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ TEST_ASSERT(host_aperf_before < guest_aperf,
+ "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_aperf_before, guest_aperf);
+ TEST_ASSERT(guest_aperf < host_aperf_after,
+ "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_aperf, host_aperf_after);
+ TEST_ASSERT(host_mperf_before < guest_mperf,
+ "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_mperf_before, guest_mperf);
+ TEST_ASSERT(guest_mperf < host_mperf_after,
+ "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_mperf, host_mperf_after);
+
+ host_aperf_before = host_aperf_after;
+ host_mperf_before = host_mperf_after;
+
+ break;
+ }
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+ kvm_vm_free(vm);
+ close(msr_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
new file mode 100644
index 000000000000..f8916bb34405
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
+ * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by
+ * programming TMICT (timer initial count) to the largest value possible (so
+ * that the timer will not expire during the test). Then, after an arbitrary
+ * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
+ * of the expected value based on the time elapsed, the APIC bus frequency, and
+ * the programmed TDCR (timer divide configuration register).
+ */
+
+#include "apic.h"
+#include "test_util.h"
+
+/*
+ * Possible TDCR values with matching divide count. Used to modify APIC
+ * timer frequency.
+ */
+static const struct {
+ const uint32_t tdcr;
+ const uint32_t divide_count;
+} tdcrs[] = {
+ {0x0, 2},
+ {0x1, 4},
+ {0x2, 8},
+ {0x3, 16},
+ {0x8, 32},
+ {0x9, 64},
+ {0xa, 128},
+ {0xb, 1},
+};
+
+static bool is_x2apic;
+
+static void apic_enable(void)
+{
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+}
+
+static uint32_t apic_read_reg(unsigned int reg)
+{
+ return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
+}
+
+static void apic_write_reg(unsigned int reg, uint32_t val)
+{
+ if (is_x2apic)
+ x2apic_write_reg(reg, val);
+ else
+ xapic_write_reg(reg, val);
+}
+
+static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+ uint64_t tsc_hz = guest_tsc_khz * 1000;
+ const uint32_t tmict = ~0u;
+ uint64_t tsc0, tsc1, freq;
+ uint32_t tmcct;
+ int i;
+
+ apic_enable();
+
+ /*
+ * Setup one-shot timer. The vector does not matter because the
+ * interrupt should not fire.
+ */
+ apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
+
+ for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
+ apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
+ apic_write_reg(APIC_TMICT, tmict);
+
+ tsc0 = rdtsc();
+ udelay(delay_ms * 1000);
+ tmcct = apic_read_reg(APIC_TMCCT);
+ tsc1 = rdtsc();
+
+ /*
+ * Stop the timer _after_ reading the current, final count, as
+ * writing the initial counter also modifies the current count.
+ */
+ apic_write_reg(APIC_TMICT, 0);
+
+ freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
+ /* Check if measured frequency is within 5% of configured frequency. */
+ __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
+ "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
+ freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
+ apic_hz, tdcrs[i].divide_count, tsc_hz);
+ }
+
+ GUEST_DONE();
+}
+
+static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+ }
+}
+
+static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+ bool x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int ret;
+
+ is_x2apic = x2apic;
+
+ vm = vm_create(1);
+
+ sync_global_to_guest(vm, is_x2apic);
+
+ vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+
+ vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
+ vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
+
+ ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+ TEST_ASSERT(ret < 0 && errno == EINVAL,
+ "Setting of APIC bus frequency after vCPU is created should fail.");
+
+ if (!is_x2apic)
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_apic_bus_clock(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
+ puts("");
+ printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
+ printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
+ * different enough from the default 1GHz to be interesting.
+ */
+ uint64_t apic_hz = 25 * 1000 * 1000;
+ uint64_t delay_ms = 100;
+ int opt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
+
+ while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
+ switch (opt) {
+ case 'f':
+ apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
+ break;
+ case 'd':
+ delay_ms = atoi_positive("Delay in milliseconds", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(KSFT_SKIP);
+ }
+ }
+
+ run_apic_bus_clock_test(apic_hz, delay_ms, false);
+ run_apic_bus_clock_test(apic_hz, delay_ms, true);
+}
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
new file mode 100644
index 000000000000..7b3fda6842bc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct cpuid_mask {
+ union {
+ struct {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ };
+ u32 regs[4];
+ };
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+ int i;
+ u32 eax, ebx, ecx, edx;
+
+ for (i = 0; i < guest_cpuid->nent; i++) {
+ __cpuid(guest_cpuid->entries[i].function,
+ guest_cpuid->entries[i].index,
+ &eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+ GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+ GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+ GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+ }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+ GUEST_SYNC(1);
+
+ test_guest_cpuids(guest_cpuid);
+
+ GUEST_SYNC(2);
+
+ GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+ GUEST_DONE();
+}
+
+static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
+{
+ struct cpuid_mask mask;
+
+ memset(&mask, 0xff, sizeof(mask));
+
+ switch (entry->function) {
+ case 0x1:
+ mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
+ break;
+ case 0x7:
+ mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
+ break;
+ case 0xd:
+ /*
+ * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
+ * XCR0 and IA32_XSS MSR values.
+ */
+ if (entry->index < 2)
+ mask.ebx = 0;
+ break;
+ }
+ return mask;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+ const struct kvm_cpuid2 *cpuid2)
+{
+ const struct kvm_cpuid_entry2 *e1, *e2;
+ int i;
+
+ TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+ "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+ for (i = 0; i < cpuid1->nent; i++) {
+ struct cpuid_mask mask;
+
+ e1 = &cpuid1->entries[i];
+ e2 = &cpuid2->entries[i];
+
+ TEST_ASSERT(e1->function == e2->function &&
+ e1->index == e2->index && e1->flags == e2->flags,
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+ i, e1->function, e1->index, e1->flags,
+ e2->function, e2->index, e2->flags);
+
+ /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
+ mask = get_const_cpuid_mask(e1);
+
+ TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
+ (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
+ (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
+ (e1->edx & mask.edx) == (e2->edx & mask.edx),
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ e1->function, e1->index,
+ e1->eax & mask.eax, e1->ebx & mask.ebx,
+ e1->ecx & mask.ecx, e1->edx & mask.edx,
+ e2->eax & mask.eax, e2->ebx & mask.ebx,
+ e2->ecx & mask.ecx, e2->edx & mask.edx);
+ }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+ int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+ struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+ memcpy(guest_cpuids, cpuid, size);
+
+ *p_gva = gva;
+ return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+ int i, r;
+
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+ "KVM didn't update nent on success, wanted %u, got %u",
+ vcpu->cpuid->nent, cpuid->nent);
+
+ for (i = 0; i < vcpu->cpuid->nent; i++) {
+ cpuid->nent = i;
+ r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+ TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+ }
+ free(cpuid);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t cpuid_gva;
+ struct kvm_vm *vm;
+ int stage;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+ vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+ vcpu_args_set(vcpu, 1, cpuid_gva);
+
+ for (stage = 0; stage < 3; stage++)
+ run_vcpu(vcpu, stage);
+
+ set_cpuid_after_run(vcpu);
+
+ test_get_cpuid2(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000000..28cc66454601
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ * Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAGIC_HYPERCALL_PORT 0x80
+
+static void guest_code(void)
+{
+ u32 regs[4] = {
+ [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
+ [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
+ };
+
+ /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
+ GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
+
+ /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ /*
+ * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
+ * and then restore CR4. Do this all in assembly to ensure no AVX
+ * instructions are executed while OSXSAVE=0.
+ */
+ asm volatile (
+ "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
+ "cpuid\n\t"
+ "mov %%rdi, %%cr4\n\t"
+ : "+a" (regs[KVM_CPUID_EAX]),
+ "=b" (regs[KVM_CPUID_EBX]),
+ "+c" (regs[KVM_CPUID_ECX]),
+ "=d" (regs[KVM_CPUID_EDX])
+ : "D" (get_cr4())
+ );
+
+ /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
+ GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
+
+ /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_sregs sregs;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
+ vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
+ /* emulate hypervisor clearing CR4.OSXSAVE */
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr4 &= ~X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vcpu, &sregs);
+ continue;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 6097a8283377..2d814c1d1dc4 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -8,12 +8,13 @@
#include <string.h>
#include "kvm_util.h"
#include "processor.h"
-
-#define VCPU_ID 0
+#include "apic.h"
#define DR6_BD (1 << 13)
#define DR7_GD (1 << 13)
+#define IRQ_VECTOR 0xAA
+
/* For testing data access debug BP */
uint32_t guest_value;
@@ -21,6 +22,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
static void guest_code(void)
{
+ /* Create a pending interrupt on current vCPU */
+ x2apic_enable();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+ APIC_DM_FIXED | IRQ_VECTOR);
+
/*
* Software BP tests.
*
@@ -38,12 +44,22 @@ static void guest_code(void)
"mov %%rax,%0;\n\t write_data:"
: "=m" (guest_value) : : "rax");
- /* Single step test, covers 2 basic instructions and 2 emulated */
+ /*
+ * Single step test, covers 2 basic instructions and 2 emulated
+ *
+ * Enable interrupts during the single stepping to see that pending
+ * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
+ *
+ * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
+ * exits to userspace due to single-step being enabled.
+ */
asm volatile("ss_start: "
+ "sti\n\t"
"xor %%eax,%%eax\n\t"
"cpuid\n\t"
- "movl $0x1a0,%%ecx\n\t"
- "rdmsr\n\t"
+ "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
+ "wrmsr\n\t"
+ "cli\n\t"
: : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
@@ -51,63 +67,63 @@ static void guest_code(void)
GUEST_DONE();
}
-#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug))
-#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug)
#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-#define SET_RIP(v) do { \
- vcpu_regs_get(vm, VCPU_ID, &regs); \
- regs.rip = (v); \
- vcpu_regs_set(vm, VCPU_ID, &regs); \
- } while (0)
-#define MOVE_RIP(v) SET_RIP(regs.rip + (v));
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += insn_len;
+ vcpu_regs_set(vcpu, &regs);
+}
int main(void)
{
struct kvm_guest_debug debug;
unsigned long long target_dr6, target_rip;
- struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
uint64_t cmd;
int i;
/* Instruction lengths starting at ss_start */
- int ss_size[4] = {
+ int ss_size[6] = {
+ 1, /* sti*/
2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
+ 1, /* cli */
};
- if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
- print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
/* Test software BPs - int3 */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == BP_VECTOR &&
run->debug.arch.pc == CAST_TO_RIP(sw_bp),
"INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
run->exit_reason, run->debug.arch.exception,
run->debug.arch.pc, CAST_TO_RIP(sw_bp));
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test instruction HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -120,17 +136,17 @@ int main(void)
run->debug.arch.dr6, target_dr6);
}
/* Skip "nop" */
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test data access HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
(0x000d0000UL << (4*i));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -142,22 +158,22 @@ int main(void)
run->debug.arch.pc, CAST_TO_RIP(write_data),
run->debug.arch.dr6, target_dr6);
/* Rollback the 4-bytes "mov" */
- MOVE_RIP(-7);
+ vcpu_skip_insn(vcpu, -7);
}
/* Skip the 4-bytes "mov" */
- MOVE_RIP(7);
+ vcpu_skip_insn(vcpu, 7);
/* Test single step */
target_rip = CAST_TO_RIP(ss_start);
target_dr6 = 0xffff4ff0ULL;
- vcpu_regs_get(vm, VCPU_ID, &regs);
- for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
+ for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
target_rip += ss_size[i];
- CLEAR_DEBUG();
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+ KVM_GUESTDBG_BLOCKIRQ;
debug.arch.debugreg[7] = 0x00000400;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
run->debug.arch.pc == target_rip &&
@@ -170,11 +186,11 @@ int main(void)
}
/* Finally test global disable */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[7] = 0x400 | DR7_GD;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | DR6_BD;
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -187,12 +203,12 @@ int main(void)
target_dr6);
/* Disable all debug controls, run to the end */
- CLEAR_DEBUG();
- APPLY_DEBUG();
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ cmd = get_ucall(vcpu, &uc);
TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
new file mode 100644
index 000000000000..b0d2b04a7ff2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "ucall_common.h"
+
+#define VCPUS 2
+#define SLOTS 2
+#define ITERATIONS 2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+ uint64_t pages_4k;
+ uint64_t pages_2m;
+ uint64_t pages_1g;
+ uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
+ stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+ pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+ stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+ stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+ int i;
+
+ iteration++;
+ for (i = 0; i < VCPUS; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+ /* Wait for the start of the next iteration to be signaled. */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 &&
+ !READ_ONCE(host_quit))
+ ;
+ }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ int i;
+ struct kvm_page_stats stats_populated;
+ struct kvm_page_stats stats_dirty_logging_enabled;
+ struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+ struct kvm_page_stats stats_clear_pass[ITERATIONS];
+ struct kvm_page_stats stats_dirty_logging_disabled;
+ struct kvm_page_stats stats_repopulated;
+
+ vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+ SLOTS, backing_src, false);
+
+ guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / SLOTS;
+ TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+ TEST_ASSERT(!(host_num_pages % 512),
+ "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+ bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ /* Start the iterations */
+ iteration = -1;
+ host_quit = false;
+
+ for (i = 0; i < VCPUS; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_populated, "populating memory");
+
+ /* Enable dirty logging */
+ memstress_enable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+ while (iteration < ITERATIONS) {
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+ "dirtying memory");
+
+ memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+ if (dirty_log_manual_caps) {
+ memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+ get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+ }
+ }
+
+ /* Disable dirty logging */
+ memstress_disable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+ /* Run vCPUs again to fault pages back in. */
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(VCPUS);
+
+ memstress_free_bitmaps(bitmaps, SLOTS);
+ memstress_destroy_vm(vm);
+
+ TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+ stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+ /*
+ * Check that all huge pages were split. Since large pages can only
+ * exist in the data slot, and the vCPUs should have dirtied all pages
+ * in the data slot, there should be no huge pages left after splitting.
+ * Splitting happens at dirty log enable time without
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+ * with that capability.
+ */
+ if (dirty_log_manual_caps) {
+ TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_clear_pass[0].pages_4k);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ } else {
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_dirty_logging_enabled.pages_4k);
+ }
+
+ /*
+ * Once dirty logging is disabled and the vCPUs have touched all their
+ * memory again, the hugepage counts should be the same as they were
+ * right after initial population of memory.
+ */
+ TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+ name);
+ puts("");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+ TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+ while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+ switch (opt) {
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ case 's':
+ backing_src = parse_backing_src_type(optarg);
+ break;
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!is_backing_src_hugetlb(backing_src)) {
+ pr_info("This test will only work reliably with HugeTLB memory. "
+ "It can work with THP, but that is best effort.\n");
+ }
+
+ guest_modes_append_default();
+
+ dirty_log_manual_caps = 0;
+ for_each_guest_mode(run_test, NULL);
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+ if (dirty_log_manual_caps) {
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ for_each_guest_mode(run_test, NULL);
+ } else {
+ pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
new file mode 100644
index 000000000000..81055476d394
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+#include "flds_emulation.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+#define MMIO_GPA 0x700000000
+#define MMIO_GVA MMIO_GPA
+
+static void guest_code(void)
+{
+ /* Execute flds with an MMIO address to force KVM to emulate it. */
+ flds(MMIO_GVA);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+ virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+ vcpu_run(vcpu);
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..8926cfe0e209
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define fastop(__insn) \
+ "bt $0, %[bt_val]\n\t" \
+ __insn "\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t"
+
+#define flags_constraint(flags_val) [flags]"=r"(flags_val)
+#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val)
+
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[val]") \
+ : [val]"+r"(__val), flags_constraint(__flags) \
+ : bt_constraint(__val) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : [input]"r"(__input), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : "c"(__shift), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \
+ : "+a"(__a), "+d"(__d), flags_constraint(__flags), \
+ KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : [denom]"rm"(__rm), bt_constraint(__rm) \
+ : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define guest_test_fastop_div(insn, type_t, __val1, __val2) \
+({ \
+ type_t _a = __val1, _d = __val1, rm = __val2; \
+ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \
+ uint64_t flags, ex_flags; \
+ uint8_t v, ex_v; \
+ \
+ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \
+ v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \
+ \
+ GUEST_ASSERT_EQ(v, ex_v); \
+ __GUEST_ASSERT(v == ex_v, \
+ "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \
+ ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \
+ __GUEST_ASSERT(a == ex_a && d == ex_d, \
+ "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\
+ (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \
+ (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \
+ __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \
+ "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\
+})
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+if (sizeof(type_t) != 1) { \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+} \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint8_t, "b");
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
new file mode 100644
index 000000000000..a72f13ae2edb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static bool is_kvm_controlled_msr(uint32_t msr)
+{
+ return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
+}
+
+/*
+ * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
+ * MSR, and doesn't allow setting the hidden version.
+ */
+static bool is_hidden_vmx_msr(uint32_t msr)
+{
+ switch (msr) {
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_quirked_msr(uint32_t msr)
+{
+ return msr != MSR_AMD64_DE_CFG;
+}
+
+static void test_feature_msr(uint32_t msr)
+{
+ const uint64_t supported_mask = kvm_get_feature_msr(msr);
+ uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /*
+ * Don't bother testing KVM-controlled MSRs beyond verifying that the
+ * MSR can be read from userspace. Any value is effectively legal, as
+ * KVM is bound by x86 architecture, not by ABI.
+ */
+ if (is_kvm_controlled_msr(msr))
+ return;
+
+ /*
+ * More goofy behavior. KVM reports the host CPU's actual revision ID,
+ * but initializes the vCPU's revision ID to an arbitrary value.
+ */
+ if (msr == MSR_IA32_UCODE_REV)
+ reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
+
+ /*
+ * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
+ * full set of features supported by KVM. For non-quirked MSRs, and
+ * when the quirk is disabled, KVM must zero-initialize the MSR and let
+ * userspace do the configuration.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
+ "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
+ reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
+ vcpu_get_msr(vcpu, msr));
+ if (!is_hidden_vmx_msr(msr))
+ vcpu_set_msr(vcpu, msr, supported_mask);
+ kvm_vm_free(vm);
+
+ if (is_hidden_vmx_msr(msr))
+ return;
+
+ if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
+ !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+ return;
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
+ "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
+ msr, vcpu_get_msr(vcpu, msr));
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ const struct kvm_msr_list *feature_list;
+ int i;
+
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+ (void)kvm_get_msr_index_list();
+
+ feature_list = kvm_get_feature_msr_index_list();
+ for (i = 0; i < feature_list->nmsrs; i++)
+ test_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
new file mode 100644
index 000000000000..762628f7d4ba
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE 3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+ uint64_t ret;
+
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
+
+ return ret;
+}
+
+static void guest_main(void)
+{
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
+
+ if (host_cpu_is_intel) {
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
+ } else if (host_cpu_is_amd) {
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
+ } else {
+ GUEST_ASSERT(0);
+ /* unreachable */
+ return;
+ }
+
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+ /*
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
+ */
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+ uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+ }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
new file mode 100644
index 000000000000..37b1a9f52864
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+ __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint64_t flags;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ flags = run->emulation_failure.flags;
+ TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+ flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+ "run->emulation_failure is missing instruction bytes");
+
+ TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+ "Expected a 2-byte opcode for 'flds', got %d bytes",
+ run->emulation_failure.insn_size);
+
+ insn_bytes = run->emulation_failure.insn_bytes;
+ TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+ "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+ insn_bytes[0], insn_bytes[1]);
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
new file mode 100644
index 000000000000..10b1b0ba374e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+ const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+ const uint64_t legal = ignored | valid;
+ uint64_t val = BIT_ULL(bit);
+ uint64_t actual;
+ int r;
+
+ r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+ TEST_ASSERT(val & ~legal ? !r : r == 1,
+ "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+ val, val & ~legal ? "fail" : "succeed");
+
+ actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+ TEST_ASSERT(actual == (val & valid),
+ "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+ bit, actual, (val & valid));
+
+ vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ unsigned int bit;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++)
+ test_hwcr_bit(vcpu, bit);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
new file mode 100644
index 000000000000..e058bc676cd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+ return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = get_tscpage_ts(tsc_page);
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = get_tscpage_ts(tsc_page);
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+ GUEST_SYNC(7);
+ /* Sanity check TSC page timestamp, it should be close to 0 */
+ GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_set_hv_cpuid(vcpu);
+
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned");
+ vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vcpu);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
new file mode 100644
index 000000000000..3c21af811d8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_HYPERV_CPUID
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+static void guest_code(void)
+{
+}
+
+static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
+{
+ const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
+ int i;
+ int nent_expected = 10;
+ u32 test_val;
+
+ if (vcpu)
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ else
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+
+ TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
+ "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+ " (returned %d)",
+ nent_expected, hv_cpuid_entries->nent);
+
+ for (i = 0; i < hv_cpuid_entries->nent; i++) {
+ const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+
+ TEST_ASSERT((entry->function >= 0x40000000) &&
+ (entry->function <= 0x40000082),
+ "function %x is out of supported range",
+ entry->function);
+
+ TEST_ASSERT(entry->index == 0,
+ ".index field should be zero");
+
+ TEST_ASSERT(entry->flags == 0,
+ ".flags field should be zero");
+
+ TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
+ !entry->padding[2], "padding should be zero");
+
+ switch (entry->function) {
+ case 0x40000000:
+ test_val = 0x40000082;
+
+ TEST_ASSERT(entry->eax == test_val,
+ "Wrong max leaf report in 0x40000000.EAX: %x"
+ " (evmcs=%d)",
+ entry->eax, evmcs_expected
+ );
+ break;
+ case 0x40000003:
+ TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)),
+ "\"Direct\" Synthetic Timers should require in-kernel APIC");
+ break;
+ case 0x40000004:
+ test_val = entry->eax & (1UL << 18);
+
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
+ "NoNonArchitecturalCoreSharing bit"
+ " doesn't reflect SMT setting");
+
+ TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)),
+ "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC");
+ break;
+ case 0x4000000A:
+ TEST_ASSERT(entry->eax & (1UL << 19),
+ "Enlightened MSR-Bitmap should always be supported"
+ " 0x40000000.EAX: %x", entry->eax);
+ if (evmcs_expected)
+ TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+ "Supported Enlightened VMCS version range is supposed to be 1:1"
+ " 0x40000000.EAX: %x", entry->eax);
+
+ break;
+ default:
+ break;
+
+ }
+ /*
+ * If needed for debug:
+ * fprintf(stdout,
+ * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
+ * entry->function, entry->eax, entry->ebx, entry->ecx,
+ * entry->edx);
+ */
+ }
+
+ /*
+ * Note, the CPUID array returned by the system-scoped helper is a one-
+ * time allocation, i.e. must not be freed.
+ */
+ if (vcpu)
+ free((void *)hv_cpuid_entries);
+}
+
+static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 cpuid = {.nent = 0};
+ int ret;
+
+ if (vcpu)
+ ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ else
+ ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+
+ TEST_ASSERT(ret == -1 && errno == E2BIG,
+ "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+ " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ /* Test the vCPU ioctl without an in-kernel local APIC. */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_hv_cpuid(vcpu, false);
+ kvm_vm_free(vm);
+
+ /* Test vCPU ioctl version */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ test_hv_cpuid_e2big(vm, vcpu);
+ test_hv_cpuid(vcpu, false);
+
+ if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+ !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ goto do_sys;
+ }
+ vcpu_enable_evmcs(vcpu);
+ test_hv_cpuid(vcpu, true);
+
+do_sys:
+ /* Test system ioctl version */
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+ print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+ goto out;
+ }
+
+ test_hv_cpuid_e2big(vm, NULL);
+ test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX));
+
+out:
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
new file mode 100644
index 000000000000..74cf19661309
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ ud_count++;
+ regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(7);
+
+ GUEST_SYNC(8);
+
+ /* Forced exit to L1 upon restore */
+ GUEST_SYNC(9);
+
+ vmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+ &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+ x2apic_enable();
+
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+ current_evmcs->revision_id = -1u;
+ GUEST_ASSERT(vmlaunch());
+ current_evmcs->revision_id = EVMCS_VERSION;
+ GUEST_SYNC(6);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_NMI_EXITING);
+
+ /* L2 TLB flush setup */
+ current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+ current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+ current_evmcs->hv_vm_id = 1;
+ current_evmcs->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ /*
+ * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+ * up-to-date (RIP points where it should and not at the beginning
+ * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+ */
+ GUEST_ASSERT(!vmresume());
+
+ GUEST_SYNC(10);
+
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_USE_MSR_BITMAPS);
+ __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_SYNC(11);
+
+ /* Try enlightened vmptrld with an incorrect GPA */
+ evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(ud_count == 1);
+ GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.nmi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+ pr_info("Running L1 which uses EVMCS to run L2\n");
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ vcpu = save_restore_vm(vm, vcpu);
+
+ /* Force immediate L2->L1 exit before resuming */
+ if (stage == 8) {
+ pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+ inject_nmi(vcpu);
+ }
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ vcpu = save_restore_vm(vm, vcpu);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
new file mode 100644
index 000000000000..949e08e98f31
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+ vm_vaddr_t out_pg_gva)
+{
+ uint64_t *output_gva;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+ output_gva = (uint64_t *)out_pg_gva;
+
+ hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+ /* TLFS states output will be a uint64_t value */
+ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ vm_vaddr_t hcall_out_page;
+ vm_vaddr_t hcall_in_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t *outval;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ /* Verify if extended hypercalls are supported */
+ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+ HV_ENABLE_EXTENDED_HYPERCALLS)) {
+ print_skip("Extended calls not supported by the kernel");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+ vcpu_set_hv_cpuid(vcpu);
+
+ /* Hypercall input */
+ hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+ /* Hypercall output */
+ hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+ vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+ addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+ *outval = EXT_CAPABILITIES;
+ run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
new file mode 100644
index 000000000000..130b9ce7e5dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+ uint32_t idx;
+ bool fault_expected;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+ bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+ return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+ uint8_t vector = 0;
+ uint64_t msr_val = 0;
+
+ GUEST_ASSERT(msr->idx);
+
+ if (msr->write)
+ vector = wrmsr_safe(msr->idx, msr->write_val);
+
+ if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+ vector = rdmsr_safe(msr->idx, &msr_val);
+
+ if (msr->fault_expected)
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
+ else
+ __GUEST_ASSERT(!vector,
+ "Expected success on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
+
+ if (vector || is_write_only_msr(msr->idx))
+ goto done;
+
+ if (msr->write)
+ __GUEST_ASSERT(!vector,
+ "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+ msr->idx, msr->write_val, msr_val);
+
+ /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+ if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+ else
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+ !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+ }
+
+done:
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ u64 res, input, output;
+ uint8_t vector;
+
+ GUEST_ASSERT_NE(hcall->control, 0);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + PAGE_SIZE;
+ } else {
+ input = output = 0;
+ }
+
+ vector = __hyperv_hypercall(hcall->control, input, output, &res);
+ if (hcall->ud_expected) {
+ __GUEST_ASSERT(vector == UD_VECTOR,
+ "Expected #UD for control '%lu', got %s",
+ hcall->control, ex_str(vector));
+ } else {
+ __GUEST_ASSERT(!vector,
+ "Expected no exception for control '%lu', got %s",
+ hcall->control, ex_str(vector));
+ GUEST_ASSERT_EQ(res, hcall->expect);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Enable all supported Hyper-V features, then clear the leafs holding
+ * the features that will be tested one by one.
+ */
+ vcpu_set_hv_cpuid(vcpu);
+
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t msr_gva;
+ struct msr_data *msr;
+ bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ msr = addr_gva2hva(vm, msr_gva);
+
+ vcpu_args_set(vcpu, 1, msr_gva);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ /* TODO: Make this entire test easier to maintain. */
+ if (stage >= 21)
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = true;
+ msr->write_val = HYPERV_LINUX_OS_ID;
+ msr->fault_expected = false;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 7:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 10:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 12:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 13:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 15:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 16:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = true;
+ /*
+ * TODO: the test only writes '0' to HV_X64_MSR_RESET
+ * at the moment, writing some other value there will
+ * trigger real vCPU reset and the code is not prepared
+ * to handle it yet.
+ */
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 19:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 22:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 23:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 25:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 26:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = true;
+ break;
+ case 28:
+ vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = false;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 30:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 32:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 33:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 35:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 36:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 39:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 40:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 42:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 43:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 44:
+ /* MSR is not available when CPUID feature bit is unset */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 45:
+ /* MSR is vailable when CPUID feature bit is set */
+ if (!has_invtsc)
+ goto next_stage;
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 46:
+ /* Writing bits other than 0 is forbidden */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 0xdeadbeef;
+ msr->fault_expected = true;
+ break;
+ case 47:
+ /* Setting bit 0 enables the feature */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ default:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+next_stage:
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+static void guest_test_hcalls_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t hcall_page, hcall_params;
+ struct hcall_data *hcall;
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+ hcall = addr_gva2hva(vm, hcall_params);
+
+ vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ switch (stage) {
+ case 0:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ hcall->control = 0xbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 19:
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 20:
+ vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+ hcall->expect = HV_STATUS_INVALID_PARAMETER;
+ break;
+ case 21:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
new file mode 100644
index 000000000000..ca61836c4e32
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR 0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+ u32 vector;
+ u32 reserved;
+ struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ u32 vcpu_id;
+
+ x2apic_enable();
+ hv_init(pgs_gpa);
+
+ vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ /* Signal sender vCPU we're ready */
+ ipis_rcvd[vcpu_id] = (u64)-1;
+
+ for (;;) {
+ safe_halt();
+ cli();
+ }
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ ipis_rcvd[vcpu_id]++;
+ wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+ struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+ int stage = 1, ipis_expected[2] = {0};
+
+ hv_init(pgs_gpa);
+ GUEST_SYNC(stage++);
+
+ /* Wait for receiver vCPUs to come up */
+ while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+ nop_loop();
+ ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+ /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ ipi->vector = IPI_VECTOR;
+ ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ memset(hcall_page, 0, PAGE_SIZE);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ memset(hcall_page, 0, PAGE_SIZE);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+ memset(hcall_page, 0, PAGE_SIZE);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+ memset(hcall_page, 0, PAGE_SIZE);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /*
+ * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+ */
+ ipi_ex->vp_set.valid_bank_mask = 0;
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, HV_GENERIC_SET_ALL);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ int old, r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+
+ TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ vm_vaddr_t hcall_page;
+ pthread_t threads[2];
+ int stage = 1, r;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+
+ vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+ vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+ vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return r;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
new file mode 100644
index 000000000000..0ddb63229bcb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(3);
+ /* Exit to L1 */
+ vmmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ GUEST_SYNC(5);
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS, &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+ struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t pgs_gpa)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+ GUEST_SYNC(1);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* L2 TLB flush setup */
+ hve->partition_assist_page = hv_pages->partition_assist_gpa;
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ hve->hv_vm_id = 1;
+ hve->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_SYNC(2);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(4);
+ vmcb->save.rip += 3;
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+ __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ hve->hv_enlightenments_control.msr_bitmap = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ vmcb->save.rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+ GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(6);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
new file mode 100644
index 000000000000..a3b7ce155981
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+ u64 address_space;
+ u64 flags;
+ struct hv_vpset hv_vp_set;
+ u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+ vm_vaddr_t hcall_gva;
+ vm_paddr_t hcall_gpa;
+ vm_vaddr_t test_pages;
+ vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+ void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+ u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+ u64 expected, val;
+
+ x2apic_enable();
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+ for (;;) {
+ cpu_relax();
+
+ expected = READ_ONCE(*this_cpu);
+
+ /*
+ * Make sure the value in the test page is read after reading
+ * the expectation for the first time. Pairs with wmb() in
+ * prepare_to_test().
+ */
+ rmb();
+
+ val = READ_ONCE(*(u64 *)data->test_pages);
+
+ /*
+ * Make sure the value in the test page is read after before
+ * reading the expectation for the second time. Pairs with wmb()
+ * post_test().
+ */
+ rmb();
+
+ /*
+ * '0' indicates the sender is between iterations, wait until
+ * the sender is ready for this vCPU to start checking again.
+ */
+ if (!expected)
+ continue;
+
+ /*
+ * Re-read the per-vCPU byte to ensure the sender didn't move
+ * onto a new iteration.
+ */
+ if (expected != READ_ONCE(*this_cpu))
+ continue;
+
+ GUEST_ASSERT(val == expected);
+ }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+ void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+ *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+ uint64_t tmp = *(uint64_t *)pte_gva1;
+
+ *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+ *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+ /* Clear hypercall input page */
+ memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+ /* 'Disable' workers */
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+ /* Make sure workers are 'disabled' before we swap PTEs. */
+ wmb();
+
+ /* Make sure workers have enough time to notice */
+ do_delay();
+
+ /* Swap test page mappings */
+ swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+ /* Make sure we change the expectation after swapping PTEs */
+ wmb();
+
+ /* Set the expectation for workers, '0' means don't test */
+ set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+ /* Make sure workers have enough time to test */
+ do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+ struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+ vm_paddr_t hcall_gpa = data->hcall_gpa;
+ int i, stage = 1;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+ /* "Slow" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->processor_mask = 0;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ /* "Fast" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 :
+ TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT,
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ pthread_t threads[2];
+ vm_vaddr_t test_data_page, gva;
+ vm_paddr_t gpa;
+ uint64_t *pte;
+ struct test_data *data;
+ struct ucall uc;
+ int stage = 1, r, i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Test data page */
+ test_data_page = vm_vaddr_alloc_page(vm);
+ data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+ /* Hypercall input/output */
+ data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+ data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+ memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+ /*
+ * Test pages: the first one is filled with '0x01's, the second with '0x02's
+ * and the test will swap their mappings. The third page keeps the indication
+ * about the current state of mappings.
+ */
+ data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+ for (i = 0; i < NTEST_PAGES; i++)
+ memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+ (u8)(i + 1), PAGE_SIZE);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+ /*
+ * Get PTE pointers for test pages and map them inside the guest.
+ * Use separate page for each PTE for simplicity.
+ */
+ gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+ for (i = 0; i < NTEST_PAGES; i++) {
+ pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ gpa = addr_hva2gpa(vm, pte);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
+ data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+ }
+
+ /*
+ * Sender vCPU which performs the test: swaps test pages, sets expectation
+ * for 'workers' and issues TLB flush hypercalls.
+ */
+ vcpu_args_set(vcpu[0], 1, test_data_page);
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ /* Create worker vCPUs which check the contents of the test pages */
+ vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+ vcpu_args_set(vcpu[1], 1, test_data_page);
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+ vcpu_args_set(vcpu[2], 1, test_data_page);
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..d88500c118eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
new file mode 100644
index 000000000000..5bc12222d87a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+ uint64_t kvmclock_base;
+ int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+ { .kvmclock_base = 0 },
+ { .kvmclock_base = 180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+ int i;
+
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+ TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+ "unexpected clock data flags: %x (want set: %x)",
+ data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+ struct kvm_clock_data *end)
+{
+ uint64_t obs, exp_lo, exp_hi;
+
+ obs = uc->args[2];
+ exp_lo = start->clock;
+ exp_hi = end->clock;
+
+ assert_flags(start);
+ assert_flags(end);
+
+ TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+ "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, exp_lo, exp_hi);
+
+ pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+ struct kvm_clock_data data;
+
+ memset(&data, 0, sizeof(data));
+
+ data.clock = test_case->kvmclock_base;
+ if (test_case->realtime_offset) {
+ struct timespec ts;
+ int r;
+
+ data.flags |= KVM_CLOCK_REALTIME;
+ do {
+ r = clock_gettime(CLOCK_REALTIME, &ts);
+ if (!r)
+ break;
+ } while (errno == EINTR);
+
+ TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+ data.realtime = ts.tv_sec * NSEC_PER_SEC;
+ data.realtime += ts.tv_nsec;
+ data.realtime += test_case->realtime_offset;
+ }
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_clock_data start, end;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ setup_clock(vm, &test_cases[i]);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+ vcpu_run(vcpu);
+ vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, &start, &end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t pvti_gva;
+ vm_paddr_t pvti_gpa;
+ struct kvm_vm *vm;
+ int flags;
+
+ flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+ TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+ pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+ vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
new file mode 100644
index 000000000000..1b805cbdb47b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+ uint32_t idx;
+ const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+ TEST_MSR(MSR_KVM_SYSTEM_TIME),
+ TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+ TEST_MSR(MSR_KVM_WALL_CLOCK),
+ TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+ TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+ TEST_MSR(MSR_KVM_STEAL_TIME),
+ TEST_MSR(MSR_KVM_PV_EOI_EN),
+ TEST_MSR(MSR_KVM_POLL_CONTROL),
+ TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+ TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+ uint64_t ignored;
+ uint8_t vector;
+
+ PR_MSR(msr);
+
+ vector = rdmsr_safe(msr->idx, &ignored);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ vector = wrmsr_safe(msr->idx, 0);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+ uint64_t nr;
+ const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+ TEST_HCALL(KVM_HC_KICK_CPU),
+ TEST_HCALL(KVM_HC_SEND_IPI),
+ TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+ uint64_t r;
+
+ PR_HCALL(hc);
+ r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+ GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+ test_msr(&msrs_to_test[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+ test_hcall(&hcalls_to_test[i]);
+ }
+
+ GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+ struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+ pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+ struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+ pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PR_MSR:
+ pr_msr(&uc);
+ break;
+ case UCALL_PR_HCALL:
+ pr_hcall(&uc);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+ }
+}
+
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+ int r;
+
+ if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT))
+ return;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Verify KVM disallows disabling exits after vCPU creation. */
+ r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Disabling exits after vCPU creation didn't fail as expected");
+
+ kvm_vm_free(vm);
+
+ /* Verify that KVM clear PV_UNHALT from guest CPUID. */
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "vCPU created with PV_UNHALT set by default");
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
+
+ /*
+ * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT
+ * when KVM PV is not present, and DOES clear PV_UNHALT when switching
+ * back to the correct signature..
+ */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT cleared when using bogus KVM PV signature");
+
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+ vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+
+ test_pv_unhalt();
+}
diff --git a/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
new file mode 100644
index 000000000000..7e2bfb3c3f3b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID 2
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+ ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+ /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
+ if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
+
+ /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
+ }
+
+ /* Set KVM_CAP_MAX_VCPU_ID */
+ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID again */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+ /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
+
+ /* Create vCPU with id within bounds */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
+ TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
+
+ close(ret);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
new file mode 100644
index 000000000000..e45c028d2a7e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "kselftest.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+ MWAIT_QUIRK_DISABLED = BIT(0),
+ MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+ MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
+do { \
+ bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
+ ((testcase) & MWAIT_DISABLED); \
+ \
+ if (fault_wanted) \
+ __GUEST_ASSERT((vector) == UD_VECTOR, \
+ "Expected #UD on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
+ else \
+ __GUEST_ASSERT(!(vector), \
+ "Expected success on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
+} while (0)
+
+static void guest_monitor_wait(void *arg)
+{
+ int testcase = (int) (long) arg;
+ u8 vector;
+
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
+
+ /*
+ * Arbitrarily MONITOR this function, SVM performs fault checks before
+ * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+ */
+ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+ vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t disabled_quirks;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int testcase;
+ char test[80];
+
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ /* Detected in vcpu_run */
+ break;
+ case UCALL_DONE:
+ ksft_test_result_pass("%s\n", test);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+ kvm_vm_free(vm);
+ }
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..40d918aedce6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+ const struct kvm_x86_cpu_feature feature;
+ const struct kvm_x86_cpu_feature feature2;
+ const char *name;
+ const u64 reset_val;
+ const u64 write_val;
+ const u64 rsvd_val;
+ const u32 index;
+ const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \
+{ \
+ .index = msr, \
+ .name = str, \
+ .write_val = val, \
+ .rsvd_val = rsvd, \
+ .reset_val = reset, \
+ .feature = X86_FEATURE_ ##feat, \
+ .feature2 = X86_FEATURE_ ##f2, \
+ .is_kvm_defined = is_kvm, \
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \
+ ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2) \
+ ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set. This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat) \
+ __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat) \
+ ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features. For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+ /*
+ * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM
+ * is supposed to emulate that behavior based on guest vendor model
+ * (which is the same as the host vendor model for this test).
+ */
+ if (!host_cpu_is_amd)
+ return want;
+
+ switch (msr) {
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_TSC_AUX:
+ return want & GENMASK_ULL(31, 0);
+ default:
+ return want;
+ }
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+ u64 val;
+ u8 vec;
+
+ vec = rdmsr_safe(msr, &val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+ __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+ want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+ u8 vec;
+
+ vec = wrmsr_safe(msr, val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+ ex_str(vec), msr, val);
+ __rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+ __rdmsr(msr->index, msr->reset_val);
+ __wrmsr(msr->index, msr->write_val);
+ GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+ __rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+ u64 val;
+ u8 vec;
+
+ /*
+ * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+ * repair, just skip the unsupported MSR tests.
+ */
+ if (ignore_unsupported_msrs)
+ goto skip_wrmsr_gp;
+
+ /*
+ * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+ * writable only if their associated feature is supported. Skip the
+ * RDMSR #GP test if the secondary feature is supported, but perform
+ * the WRMSR #GP test as the to-be-written value is tied to the primary
+ * feature. For all other MSRs, simply do nothing.
+ */
+ if (this_cpu_has(msr->feature2)) {
+ if (msr->index != MSR_IA32_U_CET &&
+ msr->index != MSR_IA32_S_CET)
+ goto skip_wrmsr_gp;
+
+ goto skip_rdmsr_gp;
+ }
+
+ vec = rdmsr_safe(msr->index, &val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+ msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+ vec = wrmsr_safe(msr->index, msr->write_val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+ GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+ /* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+ if (ignore_unsupported_msrs)
+ return;
+
+ /*
+ * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+ * expect success and a truncated value, not #GP.
+ */
+ if (!this_cpu_has(msr->feature) ||
+ msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+ u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+ __GUEST_ASSERT(vec == GP_VECTOR,
+ "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->rsvd_val, ex_str(vec));
+ } else {
+ __wrmsr(msr->index, msr->rsvd_val);
+ __wrmsr(msr->index, msr->reset_val);
+ }
+}
+
+static void guest_main(void)
+{
+ for (;;) {
+ const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+ if (this_cpu_has(msr->feature))
+ guest_test_supported_msr(msr);
+ else
+ guest_test_unsupported_msr(msr);
+
+ if (msr->rsvd_val)
+ guest_test_reserved_val(msr);
+
+ GUEST_SYNC(msr->reset_val);
+ }
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS 1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct {
+ struct kvm_reg_list list;
+ u64 regs[KVM_X86_MAX_NR_REGS];
+ } regs = {};
+ int r, i;
+
+ /*
+ * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+ * regs, then the vCPU obviously doesn't support the reg.
+ */
+ r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ if (!r)
+ return false;
+
+ TEST_ASSERT_EQ(errno, E2BIG);
+
+ /*
+ * KVM x86 is expected to support enumerating a relative small number
+ * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG
+ * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For
+ * simplicity, hardcode the maximum number of regs and manually update
+ * the test as necessary.
+ */
+ TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+ "KVM reports %llu regs, test expects at most %u regs, stale test?",
+ regs.list.n, KVM_X86_MAX_NR_REGS);
+
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ for (i = 0; i < regs.list.n; i++) {
+ if (regs.regs[i] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+ bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+ u64 reset_val = msrs[idx].reset_val;
+ u64 write_val = msrs[idx].write_val;
+ u64 rsvd_val = msrs[idx].rsvd_val;
+ u32 reg = msrs[idx].index;
+ u64 val;
+ int r;
+
+ if (!use_one_reg)
+ return;
+
+ TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+ if (!has_reg) {
+ r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Expected failure on get_reg(0x%x)", reg);
+ rsvd_val = 0;
+ goto out;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, reg, val);
+
+ vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ write_val, reg, val);
+
+out:
+ r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+ TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+ u64 reset_val = msrs[idx].reset_val;
+ u32 msr = msrs[idx].index;
+ u64 val;
+
+ if (!kvm_cpu_has(msrs[idx].feature))
+ return;
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ guest_val, msr, val);
+
+ if (use_one_reg)
+ vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+ else
+ vcpu_set_msr(vcpu, msr, reset_val);
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ reset_val, msr, val);
+
+ if (!has_one_reg)
+ return;
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ host_test_msr(vcpu, uc.args[1]);
+ return;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_DONE:
+ TEST_FAIL("Unexpected UCALL_DONE");
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+ int i;
+
+ for (i = 0; i < NR_VCPUS; i++)
+ do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+ const struct kvm_msr __msrs[] = {
+ MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+ MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+ MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+ MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+ /*
+ * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add
+ * entries for each features so that TSC_AUX doesn't exists for
+ * the "unsupported" vCPU, and obviously to test both cases.
+ */
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+ MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+ /*
+ * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+ * but KVM doesn't emulate that behavior on emulated writes,
+ * i.e. this test will observe different behavior if the MSR
+ * writes are handed by hardware vs. KVM. KVM's behavior is
+ * intended (though far from ideal), so don't bother testing
+ * non-canonical values.
+ */
+ MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+ MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+ MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+ MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+ MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+ MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+ MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+ };
+
+ const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+ const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+ /*
+ * Create three vCPUs, but run them on the same task, to validate KVM's
+ * context switching of MSR state. Don't pin the task to a pCPU to
+ * also validate KVM's handling of cross-pCPU migration. Use the full
+ * set of features for the first two vCPUs, but clear all features in
+ * third vCPU in order to test both positive and negative paths.
+ */
+ const int NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct kvm_vm *vm;
+ int i;
+
+ kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+ kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+ memcpy(msrs, __msrs, sizeof(__msrs));
+
+ ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+ sync_global_to_guest(vm, msrs);
+ sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+ /*
+ * Clear features in the "unsupported features" vCPU. This needs to be
+ * done before the first vCPU run as KVM's ABI is that guest CPUID is
+ * immutable once the vCPU has been run.
+ */
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ /*
+ * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+ * honor LM=0 for MSRs that are supposed to exist if and only
+ * if the vCPU is a 64-bit model. Ditto for NONE; clearing a
+ * fake feature flag will result in false failures.
+ */
+ if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+ memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+ vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ struct kvm_msr *msr = &msrs[idx];
+
+ if (msr->is_kvm_defined) {
+ for (i = 0; i < NR_VCPUS; i++)
+ host_test_kvm_reg(vcpus[i]);
+ continue;
+ }
+
+ /*
+ * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+ * are consistent with respect to MSRs whose existence is
+ * enumerated via CPUID. Skip the check for FS/GS.base MSRs,
+ * as they aren't reported in the save/restore list since their
+ * state is managed via SREGS.
+ */
+ TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+ kvm_msr_is_in_save_restore_list(msr->index) ==
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+ "%s %s in save/restore list, but %s according to CPUID", msr->name,
+ kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+ "supported" : "unsupported");
+
+ sync_global_to_guest(vm, idx);
+
+ vcpus_run(vcpus, NR_VCPUS);
+ vcpus_run(vcpus, NR_VCPUS);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+ test_msrs();
+
+ if (has_one_reg) {
+ use_one_reg = true;
+ test_msrs();
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
new file mode 100644
index 000000000000..f001cb836bfa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+enum {
+ PORT_L0_EXIT = 0x2000,
+};
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ /* Exit to L0 */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(0);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t guest_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (run->io.port == PORT_L0_EXIT)
+ break;
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ uint8_t opcode[15];
+ uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ uint32_t insn_len = get_instruction_length(insn);
+ uint32_t exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
new file mode 100644
index 000000000000..3641a42934ac
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception. AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+ GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+ GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+ GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+ GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+ uint32_t error_code)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+
+ vmcb->save.rip = (u64)l2_code;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+ GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+ GUEST_ASSERT(!ctrl->int_state);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ struct vmcb_control_area *ctrl = &svm->vmcb->control;
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ svm->vmcb->save.idtr.limit = 0;
+ ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+ svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+ svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+ svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS;
+ svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+ GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+ GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+ GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO));
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+ /*
+ * VMX disallows injecting an exception with error_code[31:16] != 0,
+ * and hardware will never generate a VM-Exit with bits 31:16 set.
+ * KVM should likewise truncate the "bad" userspace value.
+ */
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+ vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+ vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+ vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+ vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+ GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else
+ l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(vector == uc.args[1],
+ "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(vector == -1,
+ "Expected L2 to ask for %d, L2 says it's done", vector);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+ }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ TEST_ASSERT(!events.exception.pending,
+ "Vector %d unexpectedlt pending", events.exception.nr);
+ TEST_ASSERT(!events.exception.injected,
+ "Vector %d unexpectedly injected", events.exception.nr);
+
+ events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = !inject;
+ events.exception.injected = inject;
+ events.exception.nr = SS_VECTOR;
+ events.exception.has_error_code = true;
+ events.exception.error_code = SS_ERROR_CODE;
+ vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2. Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu_events events;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ /* Run L1 => L2. L2 should sync and request #SS. */
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, SS_VECTOR);
+
+ /* Pend #SS and request immediate exit. #SS should still be pending. */
+ queue_ss_exception(vcpu, false);
+ vcpu->run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ /* Verify the pending events comes back out the same as it went in. */
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ TEST_ASSERT_EQ(events.exception.pending, true);
+ TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ TEST_ASSERT_EQ(events.exception.has_error_code, true);
+ TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+ /*
+ * Run for real with the pending #SS, L1 should get a VM-Exit due to
+ * #SS interception and re-enter L2 to request #GP (via injected #SS).
+ */
+ vcpu->run->immediate_exit = false;
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, GP_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 should intercept before KVM morphs it to #DF. L1 should then
+ * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, DF_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 is no longer interception, and so should see a #DF VM-Exit. L1
+ * should then signal that is done.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+ /*
+ * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
+ * should see nested TRIPLE_FAULT / SHUTDOWN.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, -1);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..a6b6da9cf7fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 7e33a350b053..2839f650e5c9 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -32,12 +31,11 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define PAGE_SIZE 4096
-#define VCPU_ID 5
-
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -52,11 +50,6 @@ enum {
NUM_VMX_PAGES,
};
-struct kvm_single_msr {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
-} __attribute__((packed));
-
/* The virtual machine object. */
static struct kvm_vm *vm;
@@ -80,42 +73,47 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
@@ -127,29 +125,29 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t nested_gva;
+ struct kvm_vcpu *vcpu;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
report(uc.args[1]);
@@ -161,7 +159,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..4260c9e4f489
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
new file mode 100644
index 000000000000..c0d84827f736
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT 10
+#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+ ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+ uint64_t hpage_1 = HPAGE_GVA;
+ uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(1);
+
+ READ_ONCE(*(uint64_t *)hpage_2);
+ GUEST_SYNC(2);
+
+ guest_do_CALL(hpage_1);
+ GUEST_SYNC(3);
+
+ guest_do_CALL(hpage_3);
+ GUEST_SYNC(4);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(5);
+
+ READ_ONCE(*(uint64_t *)hpage_3);
+ GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+ int actual_pages_2m;
+
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
+
+ TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+ "Unexpected 2m page count. Expected %d, got %d",
+ expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+ int actual_splits;
+
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
+
+ TEST_ASSERT(actual_splits == expected_splits,
+ "Unexpected NX huge page split count. Expected %d, got %d",
+ expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+ long reclaim_wait_ms;
+ struct timespec ts;
+
+ reclaim_wait_ms = reclaim_period_ms * 5;
+ ts.tv_sec = reclaim_wait_ms / 1000;
+ ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+ nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+ bool reboot_permissions)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t nr_bytes;
+ void *hva;
+ int r;
+
+ vm = vm_create(1);
+
+ if (disable_nx_huge_pages) {
+ r = __vm_disable_nx_huge_pages(vm);
+ if (reboot_permissions) {
+ TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+ } else {
+ TEST_ASSERT(r == -1 && errno == EPERM,
+ "This process should not have permission to disable NX huge pages");
+ return;
+ }
+ }
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ HPAGE_GPA, HPAGE_SLOT,
+ HPAGE_SLOT_NPAGES, 0);
+
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+ hva = addr_gpa2hva(vm, HPAGE_GPA);
+ memset(hva, RETURN_OPCODE, nr_bytes);
+
+ check_2m_page_count(vm, 0);
+ check_split_count(vm, 0);
+
+ /*
+ * The guest code will first read from the first hugepage, resulting
+ * in a huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 1);
+ check_split_count(vm, 0);
+
+ /*
+ * Then the guest code will read from the second hugepage, resulting
+ * in another huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 2);
+ check_split_count(vm, 0);
+
+ /*
+ * Next, the guest will execute from the first huge page, causing it
+ * to be remapped at 4k.
+ *
+ * If NX huge pages are disabled, this should have no effect.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+ /*
+ * Executing from the third huge page (previously unaccessed) will
+ * cause part to be mapped at 4k.
+ *
+ * If NX huge pages are disabled, it should be mapped at 2M.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Reading from the first huge page again should have no effect. */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Give recovery thread time to run. */
+ wait_for_reclaim(reclaim_period_ms);
+
+ /*
+ * Now that the reclaimer has run, all the split pages should be gone.
+ *
+ * If NX huge pages are disabled, the relaimer will not run, so
+ * nothing should change from here on.
+ */
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, 0);
+
+ /*
+ * The 4k mapping on hpage 3 should have been removed, so check that
+ * reading from it causes a huge page mapping to be installed.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+ check_split_count(vm, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+ puts("");
+ printf(" -p: The NX reclaim period in milliseconds.\n");
+ printf(" -t: The magic token to indicate environment setup is done.\n");
+ printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int reclaim_period_ms = 0, token = 0, opt;
+ bool reboot_permissions = false;
+
+ while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+ switch (opt) {
+ case 'p':
+ reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+ break;
+ case 't':
+ token = atoi_paranoid(optarg);
+ break;
+ case 'r':
+ reboot_permissions = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+ __TEST_REQUIRE(token == MAGIC_TOKEN,
+ "This test must be run with the magic token via '-t %d'.\n"
+ "Running via nx_huge_pages_test.sh, which also handles "
+ "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+ run_test(reclaim_period_ms, false, reboot_permissions);
+ run_test(reclaim_period_ms, true, reboot_permissions);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
new file mode 100755
index 000000000000..caad084b8bfd
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+ function do_sudo () {
+ "$@"
+ }
+else
+ function do_sudo () {
+ sudo "$@"
+ }
+fi
+
+set +e
+
+function sudo_echo () {
+ echo "$1" | do_sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+ set -e
+
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+ sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+ sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ # Test with reboot permissions
+ if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+ echo Running test with CAP_SYS_BOOT enabled
+ $NXECUTABLE -t 887563923 -p 100 -r
+ test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+ else
+ echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+ fi
+
+ # Test without reboot permissions
+ if [ $(whoami) != "root" ] ; then
+ echo Running test with CAP_SYS_BOOT disabled
+ $NXECUTABLE -t 887563923 -p 100
+ else
+ echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+ fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
new file mode 100644
index 000000000000..9cbf283ebc55
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+ uint64_t msr_platform_info;
+ uint8_t vector;
+
+ GUEST_SYNC(true);
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+ GUEST_SYNC(false);
+ vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t msr_platform_info;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ break;
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
new file mode 100644
index 000000000000..3eaa216b96c0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -0,0 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of iterations of the loop for the guest measurement payload. */
+#define NUM_LOOPS 10
+
+/* Each iteration of the loop retires one branch instruction. */
+#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
+
+/*
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
+ */
+#define NUM_INSNS_PER_LOOP 6
+
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disable the
+ * counter. 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS 5
+
+/* Total number of instructions retired within the measured section. */
+#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code,
+ uint8_t pmu_version,
+ uint64_t perf_capabilities)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
+
+ /*
+ * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+ * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+ */
+ if (kvm_has_perf_caps)
+ vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+ vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+ return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+ /*
+ * Return the effective PMU version, i.e. the minimum between what KVM
+ * supports and what is enumerated to the guest. The host deliberately
+ * advertises a PMU version to the guest beyond what is actually
+ * supported by KVM to verify KVM doesn't freak out and do something
+ * bizarre with an architecturally valid, but unsupported, version.
+ */
+ return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero. If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
+{
+ uint64_t count;
+
+ count = _rdpmc(pmc);
+ if (!(hardware_pmu_arch_events & BIT(idx)))
+ goto sanity_checks;
+
+ switch (idx) {
+ case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_LLC_REFERENCES_INDEX:
+ case INTEL_ARCH_LLC_MISSES_INDEX:
+ if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ !this_cpu_has(X86_FEATURE_CLFLUSH))
+ break;
+ fallthrough;
+ case INTEL_ARCH_CPU_CYCLES_INDEX:
+ case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
+ case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
+ GUEST_ASSERT_NE(count, 0);
+ break;
+ case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+ case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
+ break;
+ default:
+ break;
+ }
+
+sanity_checks:
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+ GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+ wrmsr(pmc_msr, 0xdead);
+ GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence. Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
+ * misses, i.e. to allow testing that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
+do { \
+ __asm__ __volatile__("wrmsr\n\t" \
+ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
+ "1:\n\t" \
+ FEP "enter $0, $0\n\t" \
+ clflush "\n\t" \
+ "mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
+ FEP "loop 1b\n\t" \
+ FEP "mov %%edi, %%ecx\n\t" \
+ FEP "xor %%eax, %%eax\n\t" \
+ FEP "xor %%edx, %%edx\n\t" \
+ "wrmsr\n\t" \
+ :: "a"((uint32_t)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
+ ); \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do { \
+ wrmsr(_pmc_msr, 0); \
+ \
+ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
+ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
+ else \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
+ \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
+ uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+ if (is_forced_emulation_enabled)
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+ uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint32_t pmu_version = guest_get_pmu_version();
+ /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+ bool guest_has_perf_global_ctrl = pmu_version >= 2;
+ struct kvm_x86_pmu_feature gp_event, fixed_event;
+ uint32_t base_pmc_msr;
+ unsigned int i;
+
+ /* The host side shouldn't invoke this without a guest PMU. */
+ GUEST_ASSERT(pmu_version);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_pmc_msr = MSR_IA32_PMC0;
+ else
+ base_pmc_msr = MSR_IA32_PERFCTR0;
+
+ gp_event = intel_event_to_feature(idx).gp_event;
+ GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+ GUEST_ASSERT(nr_gp_counters);
+
+ for (i = 0; i < nr_gp_counters; i++) {
+ uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+ ARCH_PERFMON_EVENTSEL_ENABLE |
+ intel_pmu_arch_events[idx];
+
+ wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+ if (guest_has_perf_global_ctrl)
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
+ MSR_P6_EVNTSEL0 + i, eventsel);
+ }
+
+ if (!guest_has_perf_global_ctrl)
+ return;
+
+ fixed_event = intel_event_to_feature(idx).fixed_event;
+ if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+ return;
+
+ i = fixed_event.f.bit;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
+ MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+ uint8_t i;
+
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+ guest_test_arch_event(i);
+
+ GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t length, uint32_t unavailable_mask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Testing arch events requires a vPMU (there are no negative tests). */
+ if (!pmu_version)
+ return;
+
+ unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
+ X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+ length);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+ unavailable_mask);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS 8
+#define MAX_NR_FIXED_COUNTERS 3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
+ "Expected %s on " #insn "(0x%x), got %s", \
+ expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
+ __GUEST_ASSERT(val == expected, \
+ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
+ msr, expected, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+ uint64_t expected_val)
+{
+ uint8_t vector;
+ uint64_t val;
+
+ vector = rdpmc_safe(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+ if (!is_forced_emulation_enabled)
+ return;
+
+ vector = rdpmc_safe_fep(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+ uint8_t nr_counters, uint32_t or_mask)
+{
+ const bool pmu_has_fast_mode = !guest_get_pmu_version();
+ uint8_t i;
+
+ for (i = 0; i < nr_possible_counters; i++) {
+ /*
+ * TODO: Test a value that validates full-width writes and the
+ * width of the counters.
+ */
+ const uint64_t test_val = 0xffff;
+ const uint32_t msr = base_msr + i;
+
+ /*
+ * Fixed counters are supported if the counter is less than the
+ * number of enumerated contiguous counters *or* the counter is
+ * explicitly enumerated in the supported counters mask.
+ */
+ const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+ /*
+ * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+ * unsupported, i.e. doesn't #GP and reads back '0'.
+ */
+ const uint64_t expected_val = expect_success ? test_val : 0;
+ const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+ msr != MSR_P6_PERFCTR1;
+ uint32_t rdpmc_idx;
+ uint8_t vector;
+ uint64_t val;
+
+ vector = wrmsr_safe(msr, test_val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+ vector = rdmsr_safe(msr, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+ /* On #GP, the result of RDMSR is undefined. */
+ if (!expect_gp)
+ GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+ /*
+ * Redo the read tests with RDPMC, which has different indexing
+ * semantics and additional capabilities.
+ */
+ rdpmc_idx = i;
+ if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+ rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+ guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+ /*
+ * KVM doesn't support non-architectural PMUs, i.e. it should
+ * impossible to have fast mode RDPMC. Verify that attempting
+ * to use fast RDPMC always #GPs.
+ */
+ GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+ rdpmc_idx |= INTEL_RDPMC_FAST;
+ guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+ vector = wrmsr_safe(msr, 0);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+ }
+}
+
+static void guest_test_gp_counters(void)
+{
+ uint8_t pmu_version = guest_get_pmu_version();
+ uint8_t nr_gp_counters = 0;
+ uint32_t base_msr;
+
+ if (pmu_version)
+ nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+ /*
+ * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+ * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+ * of GP counters. If there are no GP counters, require KVM to leave
+ * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
+ * follow the spirit of the architecture and only globally enable GP
+ * counters, of which there are none.
+ */
+ if (pmu_version > 1) {
+ uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+ if (nr_gp_counters)
+ GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+ else
+ GUEST_ASSERT_EQ(global_ctrl, 0);
+ }
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_msr = MSR_IA32_PMC0;
+ else
+ base_msr = MSR_IA32_PERFCTR0;
+
+ guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+ GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_gp_counters)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+ nr_gp_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+ uint64_t supported_bitmask = 0;
+ uint8_t nr_fixed_counters = 0;
+ uint8_t i;
+
+ /* Fixed counters require Architectural vPMU Version 2+. */
+ if (guest_get_pmu_version() >= 2)
+ nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+ /*
+ * The supported bitmask for fixed counters was introduced in PMU
+ * version 5.
+ */
+ if (guest_get_pmu_version() >= 5)
+ supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+ guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+ nr_fixed_counters, supported_bitmask);
+
+ for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+ uint8_t vector;
+ uint64_t val;
+
+ if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+ vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+ FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+ vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+ continue;
+ }
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+ GUEST_ASSERT_NE(val, 0);
+ }
+ GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_fixed_counters,
+ uint32_t supported_bitmask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+ supported_bitmask);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+ nr_fixed_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ unsigned int i;
+ uint8_t v, j;
+ uint32_t k;
+
+ const uint64_t perf_caps[] = {
+ 0,
+ PMU_CAP_FW_WRITES,
+ };
+
+ /*
+ * To keep the total runtime reasonable, test only a handful of select,
+ * semi-arbitrary values for the mask of unavailable PMU events. Test
+ * 0 (all events available) and all ones (no events available) as well
+ * as alternating bit sequencues, e.g. to detect if KVM is checking the
+ * wrong bit(s).
+ */
+ const uint32_t unavailable_masks[] = {
+ 0x0,
+ 0xffffffffu,
+ 0xaaaaaaaau,
+ 0x55555555u,
+ 0xf0f0f0f0u,
+ 0x0f0f0f0fu,
+ 0xa0a0a0a0u,
+ 0x0a0a0a0au,
+ 0x50505050u,
+ 0x05050505u,
+ };
+
+ /*
+ * Test up to PMU v5, which is the current maximum version defined by
+ * Intel, i.e. is the last version that is guaranteed to be backwards
+ * compatible with KVM's existing behavior.
+ */
+ uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+ /*
+ * Detect the existence of events that aren't supported by selftests.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
+ */
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
+ "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+ /*
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
+ */
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
+
+ for (v = 0; v <= max_pmu_version; v++) {
+ for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+ if (!kvm_has_perf_caps && perf_caps[i])
+ continue;
+
+ pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+
+ /*
+ * Test single bits for all PMU version and lengths up
+ * the number of events +1 (to verify KVM doesn't do
+ * weird things if the guest length is greater than the
+ * host length). Explicitly test a mask of '0' and all
+ * ones i.e. all events being available and unavailable.
+ */
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
+ for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
+ test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
+ }
+
+ pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_gp_counters; j++)
+ test_gp_counters(v, perf_caps[i], j);
+
+ pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_fixed_counters; j++) {
+ for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+ test_fixed_counters(v, perf_caps[i], j, k);
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+
+ test_intel_counters();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
new file mode 100644
index 000000000000..1c5b7611db24
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS 10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the
+ * same encoding for Instructions Retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+ .nevents = ARRAY_SIZE(base_event_filter.events),
+ .events = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+ AMD_ZEN_BRANCHES_RETIRED,
+ },
+};
+
+struct {
+ uint64_t loads;
+ uint64_t stores;
+ uint64_t loads_stores;
+ uint64_t branches_retired;
+ uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+ const uint64_t branches_retired = rdmsr(msr_base + 0);
+ const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+ wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+ run_and_measure_loop(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+ run_and_measure_loop(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ memset(&pmc_results, 0, sizeof(pmc_results));
+ sync_global_to_guest(vcpu->vm, pmc_results);
+
+ r = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+ sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ r = run_vcpu_to_sync(vcpu);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+ return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
+ br >= NUM_BRANCHES : br == NUM_BRANCHES; \
+ \
+ if (br && !br_matched) \
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
+ __func__, br, NUM_BRANCHES); \
+ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
+ __func__, br); \
+ TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
+ __func__, ir); \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
+ __func__, br); \
+ TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
+ __func__, ir); \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+ run_vcpu_and_sync_pmc_results(vcpu);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+ run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = KVM_PMU_EVENT_DENY,
+ .nevents = 1,
+ .events = {
+ RAW_EVENT(0x1C2, 0),
+ },
+ };
+
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+ struct kvm_vcpu *vcpu;
+ int r;
+ struct kvm_vm *vm;
+
+ r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+ if (!(r & KVM_PMU_CAP_DISABLE))
+ return;
+
+ vm = vm_create(1);
+
+ vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ TEST_ASSERT(!sanity_check_pmu(vcpu),
+ "Guest should not be able to use disabled PMU.");
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+ return host_cpu_is_intel &&
+ kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+ kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+/*
+ * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
+ * 0xc2,0 for Branch Instructions Retired.
+ */
+static bool use_amd_pmu(void)
+{
+ return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED 0xD0
+#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if (x86_family(eax) == 0x6) {
+ switch (x86_model(eax)) {
+ /* Sapphire Rapids */
+ case 0x8F:
+ /* Ice Lake */
+ case 0x6A:
+ /* Skylake */
+ /* Cascade Lake */
+ case 0x55:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH 0x29
+#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+ /*
+ * The actual value of the counters don't determine the outcome of
+ * the test. Only that they are zero or non-zero.
+ */
+ const uint64_t loads = rdmsr(msr_base + 0);
+ const uint64_t stores = rdmsr(msr_base + 1);
+ const uint64_t loads_stores = rdmsr(msr_base + 2);
+ int val;
+
+
+ __asm__ __volatile__("movl $0, %[v];"
+ "movl %[v], %%eax;"
+ "incl %[v];"
+ : [v]"+m"(val) :: "eax");
+
+ pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+ wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+ wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+ masked_events_guest_test(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL1, 0);
+ wrmsr(MSR_K7_EVNTSEL2, 0);
+
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+ wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+ masked_events_guest_test(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ const uint64_t masked_events[],
+ const int nmasked_events)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = nmasked_events,
+ .action = KVM_PMU_EVENT_ALLOW,
+ .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ };
+
+ memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS BIT(0)
+#define ALLOW_STORES BIT(1)
+#define ALLOW_LOADS_STORES BIT(2)
+
+struct masked_events_test {
+ uint64_t intel_events[MAX_TEST_EVENTS];
+ uint64_t intel_event_end;
+ uint64_t amd_events[MAX_TEST_EVENTS];
+ uint64_t amd_event_end;
+ const char *msg;
+ uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores). The filter is then set in KVM with the masked events
+ * provided. The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+ {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow loads.",
+ .flags = ALLOW_LOADS,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow stores.",
+ .flags = ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+ },
+ .msg = "Only allow loads + stores.",
+ .flags = ALLOW_LOADS_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+ },
+ .msg = "Only allow loads and stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow loads and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow stores and loads + stores.",
+ .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ },
+ .msg = "Only allow loads, stores, and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+ },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+ uint64_t *events, int nevents)
+{
+ const uint64_t *evts;
+ int i;
+
+ evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+ for (i = 0; i < MAX_TEST_EVENTS; i++) {
+ if (evts[i] == 0)
+ break;
+
+ events[nevents + i] = evts[i];
+ }
+
+ return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+ return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+ int nevents)
+{
+ int ntests = ARRAY_SIZE(test_cases);
+ int i, n;
+
+ for (i = 0; i < ntests; i++) {
+ const struct masked_events_test *test = &test_cases[i];
+
+ /* Do any test case events overflow MAX_TEST_EVENTS? */
+ assert(test->intel_event_end == 0);
+ assert(test->amd_event_end == 0);
+
+ n = append_test_events(test, events, nevents);
+
+ run_masked_events_test(vcpu, events, n);
+
+ TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ bool_eq(pmc_results.loads_stores,
+ test->flags & ALLOW_LOADS_STORES),
+ "%s loads: %lu, stores: %lu, loads + stores: %lu",
+ test->msg, pmc_results.loads, pmc_results.stores,
+ pmc_results.loads_stores);
+ }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+ int i;
+
+ for (i = 0; i < nevents; i++) {
+ int event_select = i % 0xFF;
+ bool exclude = ((i % 4) == 0);
+
+ if (event_select == MEM_INST_RETIRED ||
+ event_select == LS_DISPATCH)
+ event_select++;
+
+ events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+ 0, exclude);
+ }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+ int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+ uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+ /* Run the test cases against a sparse PMU event filter. */
+ run_masked_events_tests(vcpu, events, 0);
+
+ /* Run the test cases against a dense PMU event filter. */
+ add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+ run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+ uint32_t flags, uint32_t action)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = 1,
+ .flags = flags,
+ .action = action,
+ .events = {
+ event,
+ },
+ };
+
+ return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct __kvm_pmu_event_filter f;
+ uint64_t e = ~0ul;
+ int r;
+
+ /*
+ * Unfortunately having invalid bits set in event data is expected to
+ * pass when flags == 0 (bits other than eventsel+umask).
+ */
+ r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+ e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ f = base_event_filter;
+ f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+ f = base_event_filter;
+ f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+ f = base_event_filter;
+ f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+ f = base_event_filter;
+ f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+ /* Only OS_EN bit is enabled for fixed counter[idx]. */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+ }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ uint32_t action, uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = action,
+ .fixed_counter_bitmap = bitmap,
+ };
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ uint32_t action,
+ uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = action;
+ f.fixed_counter_bitmap = bitmap;
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+ uint8_t nr_fixed_counters)
+{
+ unsigned int i;
+ uint32_t bitmap;
+ uint64_t count;
+
+ TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+ "Invalid nr_fixed_counters");
+
+ /*
+ * Check the fixed performance counter can count normally when KVM
+ * userspace doesn't set any pmu filter.
+ */
+ count = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+ for (i = 0; i < BIT(nr_fixed_counters); i++) {
+ bitmap = BIT(i);
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+ /*
+ * Check that fixed_counter_bitmap has higher priority than
+ * events[] when both are set.
+ */
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+ }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint8_t idx;
+
+ /*
+ * Check that pmu_event_filter works as expected when it's applied to
+ * fixed performance counters.
+ */
+ for (idx = 0; idx < nr_fixed_counters; idx++) {
+ vm = vm_create_with_one_vcpu(&vcpu,
+ intel_run_fixed_counter_guest_code);
+ vcpu_args_set(vcpu, 1, idx);
+ __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+ kvm_vm_free(vm);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void);
+ struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+ TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+ guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+ if (use_amd_pmu())
+ test_amd_deny_list(vcpu);
+
+ test_without_filter(vcpu);
+ test_member_deny_list(vcpu);
+ test_member_allow_list(vcpu);
+ test_not_member_deny_list(vcpu);
+ test_not_member_allow_list(vcpu);
+
+ if (use_intel_pmu() &&
+ supports_event_mem_inst_retired() &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+ vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+ else if (use_amd_pmu())
+ vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+ if (vcpu2)
+ test_masked_events(vcpu2);
+ test_filter_ioctl(vcpu);
+
+ kvm_vm_free(vm);
+
+ test_pmu_config_disable(guest_code);
+ test_fixed_counter_bitmap();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
new file mode 100644
index 000000000000..1969f4ab9b28
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT 10
+#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern, size) \
+do { \
+ uint8_t *mem = (uint8_t *)gpa; \
+ size_t i; \
+ \
+ for (i = 0; i < size; i++) \
+ __GUEST_ASSERT(mem[i] == pattern, \
+ "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
+ pattern, i, gpa + i, mem[i]); \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ TEST_ASSERT(mem[i] == pattern,
+ "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+ pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ * are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ * userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ * userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+ SYNC_SHARED,
+ SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+ uint8_t current_pattern, uint8_t new_pattern)
+{
+ GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+ GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED BIT(1)
+#define MAP_GPA_DO_FALLOCATE BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+ bool do_fallocate)
+{
+ uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+ if (map_shared)
+ flags |= MAP_GPA_SHARED;
+ if (do_fallocate)
+ flags |= MAP_GPA_DO_FALLOCATE;
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+ uint64_t offset;
+ uint64_t size;
+} static const test_ranges[] = {
+ GUEST_STAGE(0, PAGE_SIZE),
+ GUEST_STAGE(0, SZ_2M),
+ GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+ GUEST_STAGE(PAGE_SIZE, SZ_2M),
+ GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+ const uint8_t def_p = 0xaa;
+ const uint8_t init_p = 0xcc;
+ uint64_t j;
+ int i;
+
+ /* Memory should be shared by default. */
+ memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+ guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+ uint8_t p1 = 0x11;
+ uint8_t p2 = 0x22;
+ uint8_t p3 = 0x33;
+ uint8_t p4 = 0x44;
+
+ /*
+ * Set the test region to pattern one to differentiate it from
+ * the data range as a whole (contains the initial pattern).
+ */
+ memset((void *)gpa, p1, size);
+
+ /*
+ * Convert to private, set and verify the private data, and
+ * then verify that the rest of the data (map shared) still
+ * holds the initial pattern, and that the host always sees the
+ * shared memory (initial pattern). Unlike shared memory,
+ * punching a hole in private memory is destructive, i.e.
+ * previous values aren't guaranteed to be preserved.
+ */
+ guest_map_private(gpa, size, do_fallocate);
+
+ if (size > PAGE_SIZE) {
+ memset((void *)gpa, p2, PAGE_SIZE);
+ goto skip;
+ }
+
+ memset((void *)gpa, p2, size);
+ guest_sync_private(gpa, size, p1);
+
+ /*
+ * Verify that the private memory was set to pattern two, and
+ * that shared memory still holds the initial pattern.
+ */
+ memcmp_g(gpa, p2, size);
+ if (gpa > base_gpa)
+ memcmp_g(base_gpa, init_p, gpa - base_gpa);
+ if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+ memcmp_g(gpa + size, init_p,
+ (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+ /*
+ * Convert odd-number page frames back to shared to verify KVM
+ * also correctly handles holes in private ranges.
+ */
+ for (j = 0; j < size; j += PAGE_SIZE) {
+ if ((j >> PAGE_SHIFT) & 1) {
+ guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+ guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+ memcmp_g(gpa + j, p3, PAGE_SIZE);
+ } else {
+ guest_sync_private(gpa + j, PAGE_SIZE, p1);
+ }
+ }
+
+skip:
+ /*
+ * Convert the entire region back to shared, explicitly write
+ * pattern three to fill in the even-number frames before
+ * asking the host to verify (and write pattern four).
+ */
+ guest_map_shared(gpa, size, do_fallocate);
+ memset((void *)gpa, p3, size);
+ guest_sync_shared(gpa, size, p3, p4);
+ memcmp_g(gpa, p4, size);
+
+ /* Reset the shared memory back to the initial pattern. */
+ memset((void *)gpa, init_p, size);
+
+ /*
+ * Free (via PUNCH_HOLE) *all* private memory so that the next
+ * iteration starts from a clean slate, e.g. with respect to
+ * whether or not there are pages/folios in guest_mem.
+ */
+ guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+ }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+ /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+ uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+ const uint8_t init_p = 0xcc;
+ int i;
+
+ /*
+ * Convert the entire range to private, this testcase is all about
+ * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+ */
+ guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+
+ /*
+ * Free all memory before each iteration, even for the !precise
+ * case where the memory will be faulted back in. Freeing and
+ * reallocating should obviously work, and freeing all memory
+ * minimizes the probability of cross-testcase influence.
+ */
+ guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+ /* Fault-in and initialize memory, and verify the pattern. */
+ if (precise) {
+ memset((void *)gpa, init_p, size);
+ memcmp_g(gpa, init_p, size);
+ } else {
+ memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+ }
+
+ /*
+ * Punch a hole at the target range and verify that reads from
+ * the guest succeed and return zeroes.
+ */
+ guest_punch_hole(gpa, size);
+ memcmp_g(gpa, 0, size);
+ }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+ /*
+ * Run the conversion test twice, with and without doing fallocate() on
+ * the guest_memfd backing when converting between shared and private.
+ */
+ guest_test_explicit_conversion(base_gpa, false);
+ guest_test_explicit_conversion(base_gpa, true);
+
+ /*
+ * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+ * faulted in, once with only the target range faulted in.
+ */
+ guest_test_punch_hole(base_gpa, false);
+ guest_test_punch_hole(base_gpa, true);
+ GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint64_t gpa = run->hypercall.args[0];
+ uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+ bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+ bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+ bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+ struct kvm_vm *vm = vcpu->vm;
+
+ TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+ "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+ KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+ if (do_fallocate)
+ vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+ if (set_attributes)
+ vm_set_memory_attributes(vm, gpa, size,
+ map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+ struct kvm_vcpu *vcpu = __vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+
+ while (!READ_ONCE(run_vcpus))
+ ;
+
+ for ( ;; ) {
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+ handle_exit_hypercall(vcpu);
+ continue;
+ }
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC: {
+ uint64_t gpa = uc.args[1];
+ size_t size = uc.args[2];
+ size_t i;
+
+ TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+ uc.args[0] == SYNC_PRIVATE,
+ "Unknown sync command '%ld'", uc.args[0]);
+
+ for (i = 0; i < size; i += vm->page_size) {
+ size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+ uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+ /* In all cases, the host should observe the shared data. */
+ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+ /* For shared, write the new pattern to guest memory. */
+ if (uc.args[0] == SYNC_SHARED)
+ memset(hva, uc.args[4], nr_bytes);
+ }
+ break;
+ }
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+ uint32_t nr_memslots)
+{
+ /*
+ * Allocate enough memory so that each vCPU's chunk of memory can be
+ * naturally aligned with respect to the size of the backing store.
+ */
+ const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+ const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+ const size_t memfd_size = per_cpu_size * nr_vcpus;
+ const size_t slot_size = memfd_size / nr_memslots;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ pthread_t threads[KVM_MAX_VCPUS];
+ struct kvm_vm *vm;
+ int memfd, i;
+
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+
+ TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+ "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+ memfd_size, nr_memslots);
+ vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+ memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+ for (i = 0; i < nr_memslots; i++)
+ vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+ BASE_DATA_SLOT + i, slot_size / vm->page_size,
+ KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
+
+ vcpu_args_set(vcpus[i], 1, gpa);
+
+ /*
+ * Map only what is needed so that an out-of-bounds access
+ * results #PF => SHUTDOWN instead of data corruption.
+ */
+ virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+ pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+ }
+
+ WRITE_ONCE(run_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Allocate and free memory from the guest_memfd after closing the VM
+ * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
+ * should prevent the VM from being fully destroyed until the last
+ * reference to the guest_memfd is also put.
+ */
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+
+ close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+ puts("");
+ printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+ puts("");
+ backing_src_help("-s");
+ puts("");
+ puts(" -n: specify the number of vcpus (default: 1)");
+ puts("");
+ puts(" -m: specify the number of memslots (default: 1)");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+ uint32_t nr_memslots = 1;
+ uint32_t nr_vcpus = 1;
+ int opt;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+ switch (opt) {
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'n':
+ nr_vcpus = atoi_positive("nr_vcpus", optarg);
+ break;
+ case 'm':
+ nr_memslots = atoi_positive("nr_memslots", optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+
+ test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
new file mode 100644
index 000000000000..13e72fcec8dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+ volatile uint64_t value;
+
+ while (true)
+ value = *((uint64_t *) EXITS_TEST_GVA);
+
+ return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = _vcpu_run(vcpu);
+ if (r) {
+ TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+ }
+ return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vm_thread;
+ void *thread_return;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES,
+ KVM_MEM_GUEST_MEMFD);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ pthread_create(&vm_thread, NULL,
+ (void *(*)(void *))run_vcpu_get_exit_reason,
+ (void *)vcpu);
+
+ vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+ pthread_join(vm_thread, &thread_return);
+ exit_reason = (uint32_t)(uint64_t)thread_return;
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ /* Add a non-private memslot (flags = 0) */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES, 0);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ test_private_access_memslot_deleted();
+ test_private_access_memslot_not_private();
+}
diff --git a/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
new file mode 100644
index 000000000000..cbc92a862ea9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT 5 /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID 0xff
+
+static void *race(void *arg)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu = arg;
+
+ while (1) {
+ /* Trigger kvm_recalculate_apic_map(). */
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpuN;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+ int i;
+
+ kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+ /*
+ * Create the max number of vCPUs supported by selftests so that KVM
+ * has decent amount of work to do when recalculating the map, i.e. to
+ * make the problematic window large enough to hit.
+ */
+ vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+ /*
+ * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+ * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+ */
+ for (i = 0; i < KVM_MAX_VCPUS; i++)
+ vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+ vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
new file mode 100644
index 000000000000..49913784bc82
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_invalid_bsp(struct kvm_vm *vm)
+{
+ unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+ int r;
+
+ if (max_vcpu_id) {
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
+ }
+
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+ (void *)(unsigned long)vcpu->id);
+
+ TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_bsp_busy(vcpu, "while running vm");
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+ struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ uint32_t i;
+
+ vm = vm_create(nr_vcpus);
+
+ test_set_invalid_bsp(vm);
+
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+ guest_not_bsp_vcpu);
+ return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+ test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+ run_vm_bsp(0);
+ run_vm_bsp(1);
+ run_vm_bsp(0);
+
+ check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
new file mode 100644
index 000000000000..f4095a3d1278
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
+
+#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+ uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
+
+ if (kvm_cpu_has(X86_FEATURE_UMIP))
+ cr4 |= X86_CR4_UMIP;
+ if (kvm_cpu_has(X86_FEATURE_LA57))
+ cr4 |= X86_CR4_LA57;
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ cr4 |= X86_CR4_VMXE;
+ if (kvm_cpu_has(X86_FEATURE_SMX))
+ cr4 |= X86_CR4_SMXE;
+ if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+ cr4 |= X86_CR4_FSGSBASE;
+ if (kvm_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ cr4 |= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_SMEP))
+ cr4 |= X86_CR4_SMEP;
+ if (kvm_cpu_has(X86_FEATURE_SMAP))
+ cr4 |= X86_CR4_SMAP;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ cr4 |= X86_CR4_PKE;
+
+ return cr4;
+}
+
+static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
+{
+ struct kvm_sregs sregs;
+ int rc, i;
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ sregs.cr4 |= cr4;
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)),
+ "KVM didn't %s OSXSAVE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear");
+
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)),
+ "KVM didn't %s OSPKE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear");
+
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+ sregs.cr4, cr4);
+
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify KVM enforces guest CPUID even if *userspace* never
+ * sets CPUID.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4);
+ kvm_vm_free(vm);
+
+ /* Create a "real" VM with a fully populated guest CPUID and verify
+ * APIC_BASE and all supported CR4 can be set.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ test_cr_bits(vcpu, calc_supported_cr4_feature_bits());
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
new file mode 100644
index 000000000000..b238615196ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types. Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+bool have_snp;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ struct kvm_sev_cmd cmd = {
+ .id = cmd_id,
+ .data = (uint64_t)data,
+ .sev_fd = open_sev_dev_path_or_exit(),
+ };
+ int ret;
+
+ ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+ TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+ "%d failed: fw error: %d\n",
+ cmd_id, cmd.error);
+
+ return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == 0,
+ "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+ ret, errno);
+ kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "KVM_SEV_INIT2 should fail, %s.",
+ msg);
+ kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+ test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+ /*
+ * TODO: check that unsupported types cannot be created. Probably
+ * a separate selftest.
+ */
+ if (have_sev_es)
+ test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
+ test_init2_invalid(0, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_DEFAULT_VM");
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .flags = BIT(i) },
+ "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (!(supported_features & BIT_ULL(i)))
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+ "unknown feature");
+ else if (KNOWN_FEATURES & BIT_ULL(i))
+ test_init2(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int kvm_fd = open_kvm_dev_path_or_exit();
+ bool have_sev;
+
+ TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES) == 0);
+ kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES,
+ &supported_vmsa_features);
+
+ have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+ TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+ "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+ "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
+ test_vm_types();
+
+ test_flags(KVM_X86_SEV_VM);
+ if (have_sev_es)
+ test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
+
+ test_features(KVM_X86_SEV_VM, 0);
+ if (have_sev_es)
+ test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
new file mode 100644
index 000000000000..0a6dfba3905b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!es)
+ sev_vm_init(vm);
+ else
+ sev_es_vm_init(vm);
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+ if (es)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!with_vcpus)
+ return vm;
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_migrate_from(dst, src);
+ TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+ struct kvm_vm *src_vm;
+ struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+ int i, ret;
+
+ src_vm = sev_vm_create(es);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ dst_vms[i] = aux_vm_create(true);
+
+ /* Initial migration from the src to the first dst. */
+ sev_migrate_from(dst_vms[0], src_vm);
+
+ for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+ sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+ /* Migrate the guest back to the original VM. */
+ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+ TEST_ASSERT(ret == -1 && errno == EIO,
+ "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+ errno);
+
+ kvm_vm_free(src_vm);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+ struct kvm_vm *vm;
+ struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+ int i, j;
+ struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+ for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+ j = i % NR_LOCK_TESTING_THREADS;
+ __sev_migrate_from(input->vm, input->source_vms[j]);
+ }
+
+ return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+ struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+ pthread_t pt[NR_LOCK_TESTING_THREADS];
+ int i;
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+ input[i].vm = sev_vm_create(/* es= */ false);
+ input[0].source_vms[i] = input[i].vm;
+ }
+ for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+ memcpy(input[i].source_vms, input[0].source_vms,
+ sizeof(input[i].source_vms));
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_join(pt[i], NULL);
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+ *sev_es_vm_no_vmsa;
+ int ret;
+
+ vm_no_vcpu = vm_create_barebones();
+ vm_no_sev = aux_vm_create(true);
+ ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Migrations require SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ sev_es_vm_no_vmsa = vm_create_barebones();
+ sev_es_vm_init(sev_es_vm_no_vmsa);
+ __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+ ret = __sev_migrate_from(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+ kvm_vm_free(vm_no_vcpu);
+ kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_mirror_create(dst, src);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+ int cmd_id;
+
+ for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d",
+ cmd_id, ret, errno);
+ }
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+ struct kvm_vm *src_vm, *dst_vm;
+ int i;
+
+ src_vm = sev_vm_create(es);
+ dst_vm = aux_vm_create(false);
+
+ sev_mirror_create(dst_vm, src_vm);
+
+ /* Check that we can complete creation of the mirror VM. */
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(dst_vm, i);
+
+ if (es)
+ vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ verify_mirror_allowed_cmds(dst_vm);
+
+ kvm_vm_free(src_vm);
+ kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ vm_with_vcpu = aux_vm_create(true);
+ vm_no_vcpu = aux_vm_create(false);
+
+ ret = __sev_mirror_create(sev_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to self. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+ ret, errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ ret = __sev_mirror_create(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_es_vm);
+
+out:
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(vm_with_vcpu);
+ kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+ struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+ *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ dst2_vm = aux_vm_create(true);
+ dst3_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+ dst2_mirror_vm = aux_vm_create(false);
+ dst3_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ sev_migrate_from(dst2_vm, dst_vm);
+ sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+ sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+ sev_migrate_from(dst3_vm, dst2_vm);
+
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(dst2_vm);
+ kvm_vm_free(dst3_vm);
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst2_mirror_vm);
+ kvm_vm_free(dst3_mirror_vm);
+
+ /*
+ * Run similar test be destroy mirrors before mirrored VMs to ensure
+ * destruction is done safely.
+ */
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ test_sev_migrate_from(/* es= */ false);
+ if (have_sev_es)
+ test_sev_migrate_from(/* es= */ true);
+ test_sev_migrate_locking();
+ test_sev_migrate_parameters();
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ test_sev_move_copy();
+ }
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ test_sev_mirror(/* es= */ false);
+ if (have_sev_es)
+ test_sev_mirror(/* es= */ true);
+ test_sev_mirror_parameters();
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
new file mode 100644
index 000000000000..86ad1c7d068f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <math.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
+static void guest_snp_code(void)
+{
+ uint64_t sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
+static void guest_sev_es_code(void)
+{
+ /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+ /*
+ * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
+ * force "termination" to signal "done" via the GHCB MSR protocol.
+ */
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
+static void guest_sev_code(void)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+ GUEST_DONE();
+}
+
+/* Stash state passed via VMSA before any compiled code runs. */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+ "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
+ "xor %edx, %edx\n"
+ "xsave (%rdi)\n"
+ "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+ int i;
+ bool bad = false;
+ for (i = 0; i < 4095; i++) {
+ if (from_host[i] != from_guest[i]) {
+ printf("mismatch at %u | %02hhx %02hhx\n",
+ i, from_host[i], from_guest[i]);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ abort();
+}
+
+static void test_sync_vmsa(uint32_t type, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t gva;
+ void *hva;
+
+ double x87val = M_PI;
+ struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
+ gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+ MEM_REGION_TEST_DATA);
+ hva = addr_gva2hva(vm, gva);
+
+ vcpu_args_set(vcpu, 1, gva);
+
+ asm("fninit\n"
+ "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+ "fldl %3\n"
+ "xsave (%2)\n"
+ "fstp %%st\n"
+ : "=m"(xsave)
+ : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+ : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+ vcpu_xsave_set(vcpu, &xsave);
+
+ vm_sev_launch(vm, policy, NULL);
+
+ /* This page is shared, so make it decrypted. */
+ memset(hva, 0, PAGE_SIZE);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+ compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+ kvm_vm_free(vm);
+}
+
+static void test_sev(void *guest_code, uint32_t type, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+ /* TODO: Validate the measurement is as expected. */
+ vm_sev_launch(vm, policy, NULL);
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ if (is_sev_es_vm(vm)) {
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+ break;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void guest_shutdown_code(void)
+{
+ struct desc_ptr idt;
+
+ /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
+ memset(&idt, 0, sizeof(idt));
+ set_idt(&idt);
+
+ __asm__ __volatile__("ud2");
+}
+
+static void test_sev_shutdown(uint32_t type, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
+
+ vm_sev_launch(vm, policy, NULL);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Wanted SHUTDOWN, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+
+ kvm_vm_free(vm);
+}
+
+static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy)
+{
+ const u64 xf_mask = XFEATURE_MASK_X87_AVX;
+
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
+
+ if (type == KVM_X86_SEV_VM)
+ return;
+
+ test_sev_shutdown(type, policy);
+
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
new file mode 100644
index 000000000000..fabeeaddfb3a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+ uint64_t error_code;
+ uint64_t vector;
+
+ vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+ /*
+ * When TDP is enabled, flds will trigger an emulation failure, exit to
+ * userspace, and then the selftest host "VMM" skips the instruction.
+ *
+ * When TDP is disabled, no instruction emulation is required so flds
+ * should generate #PF(RSVD).
+ */
+ if (tdp_enabled) {
+ GUEST_ASSERT(!vector);
+ } else {
+ GUEST_ASSERT_EQ(vector, PF_VECTOR);
+ GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t *pte;
+ uint64_t *hva;
+ uint64_t gpa;
+ int rc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+
+ pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+ *pte |= BIT_ULL(MAXPHYADDR);
+
+ vcpu_run(vcpu);
+
+ /*
+ * When TDP is enabled, KVM must emulate in response the guest physical
+ * address that is illegal from the guest's perspective, but is legal
+ * from hardware's perspeective. This should result in an emulation
+ * failure exit to userspace since KVM doesn't support emulating flds.
+ */
+ if (kvm_is_tdp_enabled()) {
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 613c42c5a9b8..55c88d664a94 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -4,7 +4,6 @@
*
* Tests for SMM.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -19,10 +18,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 1
-
-#define PAGE_SIZE 4096
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
@@ -53,15 +48,28 @@ static inline void sync_with_host(uint64_t phase)
: "+a" (phase));
}
-void self_smi(void)
+static void self_smi(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
-void guest_code(void *arg)
+static void l2_guest_code(void)
{
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
+}
+
+static void guest_code(void *arg)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
sync_with_host(1);
@@ -74,35 +82,63 @@ void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm())
- generic_svm_setup(arg, NULL, NULL);
- else
- GUEST_ASSERT(prepare_for_vmx_operation(arg));
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
sync_with_host(5);
self_smi();
sync_with_host(7);
+
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
}
sync_with_host(DONE);
}
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vcpu, &events);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
int stage, stage_reported;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
- run = vcpu_state(vm, VCPU_ID);
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
SMRAM_MEMSLOT, SMRAM_PAGES, 0);
@@ -113,29 +149,26 @@ int main(int argc, char *argv[])
memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
sizeof(smi_handler));
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip SMM test with VMX enabled\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
memset(&regs, 0, sizeof(regs));
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
stage_reported = regs.rax & 0xff;
@@ -147,14 +180,28 @@ int main(int argc, char *argv[])
"Unexpected stage: #%x, got %x",
stage, stage_reported);
- state = vcpu_save_state(vm, VCPU_ID);
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vcpu);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vcpu);
+
+ state = vcpu_save_state(vcpu);
kvm_vm_release(vm);
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 32854c1462ad..f2c7a1c297e3 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -6,7 +6,6 @@
*
* Tests for vCPU state save/restore, including nested guest state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -20,7 +19,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 5
#define L2_GUEST_STACK_SIZE 256
void svm_l2_guest_code(void)
@@ -140,10 +138,82 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[PAGE_SIZE];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
GUEST_SYNC(2);
if (arg) {
- if (cpu_has_svm())
+ if (this_cpu_has(X86_FEATURE_SVM))
svm_l1_guest_code(arg);
else
vmx_l1_guest_code(arg);
@@ -154,44 +224,40 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
int main(int argc, char *argv[])
{
+ uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
-
+ struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip nested state checks\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -206,22 +272,47 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
new file mode 100644
index 000000000000..917b6066cfc1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ sti_nop();
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
new file mode 100644
index 000000000000..00135cbba35e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
+ idt[11].p = 0; // #NP is not intercepted and will cause another
+ // #NP that will be converted to #DF
+ idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
new file mode 100644
index 000000000000..7b6481d6c0d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+ bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+ int_fired++;
+ GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+ GUEST_ASSERT_EQ(int_fired, 1);
+
+ /*
+ * Same as the vmmcall() function, but with a ud2 sneaked after the
+ * vmmcall. The caller injects an exception with the return address
+ * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+ * use vmmcall() directly.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+
+ GUEST_ASSERT_EQ(bp_fired, 1);
+ hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+ nmi_stage_inc();
+
+ if (nmi_stage_get() == 1) {
+ vmmcall();
+ GUEST_FAIL("Unexpected resume after VMMCALL");
+ } else {
+ GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+ GUEST_DONE();
+ }
+}
+
+static void l2_guest_code_nmi(void)
+{
+ ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (is_nmi)
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm,
+ is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+ vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+ if (is_nmi) {
+ vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else {
+ vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+ /* The return address pushed on stack */
+ vmcb->control.next_rip = vmcb->save.rip;
+ }
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ if (is_nmi) {
+ clgi();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+ GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+ nmi_stage_inc();
+
+ stgi();
+ /* self-NMI happens here */
+ while (true)
+ cpu_relax();
+ }
+
+ /* Skip over VMMCALL */
+ vmcb->save.rip += 3;
+
+ /* Switch to alternate IDT to cause intervening NPF again */
+ vmcb->save.idtr.base = idt_alt;
+ vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+ vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+ /* The return address pushed on stack, skip over UD2 */
+ vmcb->control.next_rip = vmcb->save.rip + 2;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+ "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t svm_gva;
+ vm_vaddr_t idt_alt_vm;
+ struct kvm_guest_debug debug;
+
+ pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+ vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (!is_nmi) {
+ void *idt, *idt_alt;
+
+ idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt = addr_gva2hva(vm, idt_alt_vm);
+ idt = addr_gva2hva(vm, vm->arch.idt);
+ memcpy(idt_alt, idt, getpagesize());
+ } else {
+ idt_alt_vm = 0;
+ }
+ vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+ "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+ atomic_init(&nmi_stage, 0);
+
+ run_test(false);
+ run_test(true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
index be2ca157485b..8a62cca28cfb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
@@ -12,10 +12,6 @@
#include "processor.h"
#include "svm_util.h"
-#define VCPU_ID 5
-
-static struct kvm_vm *vm;
-
static void l2_guest_code(struct svm_test_data *svm)
{
__asm__ __volatile__("vmcall");
@@ -39,28 +35,26 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
- nested_svm_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
new file mode 100644
index 000000000000..8fa3948b0170
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
+void guest_code(void)
+{
+ asm volatile("1: in %[port], %%al\n"
+ "add $0x1, %%rbx\n"
+ "jmp 1b"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
+}
+
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.injected, 1);
+ WRITE_ONCE(events->exception.pending, 1);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events. KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+ WRITE_ONCE(events->exception.pending, 1);
+ WRITE_ONCE(events->exception.nr, 255);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ __u64 *cr4 = &run->s.regs.sregs.cr4;
+ __u64 pae_enabled = *cr4;
+ __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+ WRITE_ONCE(*cr4, pae_enabled);
+ asm volatile(".rept 512\n\t"
+ "nop\n\t"
+ ".endr");
+ WRITE_ONCE(*cr4, pae_disabled);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+ const time_t TIMEOUT = 2; /* seconds, roughly */
+ struct kvm_x86_state *state;
+ struct kvm_translation tr;
+ struct kvm_run *run;
+ pthread_t thread;
+ time_t t;
+
+ run = vcpu->run;
+
+ run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ run->kvm_valid_regs = 0;
+
+ /* Save state *before* spawning the thread that mucks with vCPU state. */
+ state = vcpu_save_state(vcpu);
+
+ /*
+ * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+ * should already be set in guest state.
+ */
+ TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+ (run->s.regs.sregs.efer & EFER_LME),
+ "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+ !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+ !!(run->s.regs.sregs.efer & EFER_LME));
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ /*
+ * Reload known good state if the vCPU triple faults, e.g. due
+ * to the unhandled #GPs being injected. VMX preserves state
+ * on shutdown, but SVM synthesizes an INIT as the VMCB state
+ * is architecturally undefined on triple fault.
+ */
+ if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+ vcpu_load_state(vcpu, state);
+
+ if (racer == race_sregs_cr4) {
+ tr = (struct kvm_translation) { .linear_address = 0 };
+ __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+ }
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+ rv);
+ run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ vcpu_regs_get(vcpu, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vcpu, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.rbx = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vcpu, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vcpu, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.rbx = 0xDEADBEEF;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.rbx = 0xAAAA;
+ vcpu_regs_get(vcpu, &regs);
+ regs.rbx = 0xBAC0;
+ vcpu_regs_set(vcpu, &regs);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ vcpu_regs_get(vcpu, &regs);
+ TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
+ "rbx guest value incorrect 0x%llx.",
+ regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+ run->s.regs.regs.rbx = 0xBBBB;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
+ "rbx sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.rbx);
+ vcpu_regs_get(vcpu, &regs);
+ TEST_ASSERT(regs.rbx == 0xBBBB + 1,
+ "rbx guest value incorrect 0x%llx.",
+ regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+ race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+ race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+ race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+ int cap;
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+ TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
new file mode 100644
index 000000000000..56306a19144a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ /* L2 should triple fault after a triple fault event injected. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* don't intercept shutdown to test the case of SVM allowing to do so */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here, L1 should crash */
+ GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vcpu_events events;
+ struct ucall uc;
+
+ bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+ bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+ TEST_REQUIRE(has_vmx || has_svm);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+ if (has_vmx) {
+ vm_vaddr_t vmx_pages_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ } else {
+ vm_vaddr_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+ }
+
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+ vcpu_events_get(vcpu, &events);
+ events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ events.triple_fault.pending = true;
+ vcpu_events_set(vcpu, &events);
+ run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+ "Triple fault event invalid");
+ TEST_ASSERT(events.triple_fault.pending,
+ "No triple fault pending");
+ vcpu_run(vcpu);
+
+
+ if (has_svm) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ } else {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
index e357d8e222d4..12b0964f4f13 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
@@ -9,23 +9,12 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
-
#define UNITY (1ull << 30)
#define HOST_ADJUST (UNITY * 64)
#define GUEST_STEP (UNITY * 4)
#define ROUND(x) ((x + UNITY / 2) & -UNITY)
#define rounded_rdmsr(x) ROUND(rdmsr(x))
-#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
-
-#define GUEST_ASSERT_EQ(a, b) do { \
- __typeof(a) _a = (a); \
- __typeof(b) _b = (b); \
- if (_a != _b) \
- ucall(UCALL_ABORT, 4, \
- "Failed guest assert: " \
- #a " == " #b, __LINE__, _a, _b); \
- } while(0)
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
static void guest_code(void)
{
@@ -75,93 +64,98 @@ static void guest_code(void)
GUEST_DONE();
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
{
struct ucall uc;
- vcpu_args_set(vm, vcpuid, 1, vcpuid);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
-
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
+ if (!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1)
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ else
+ ksft_test_result_fail(
+ "stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
return;
case UCALL_DONE:
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
return;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld\n" \
- "\tvalues: %#lx, %#lx", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
int main(void)
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t val;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
val = 0;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC affect both MSRs. */
- run_vcpu(vm, VCPU_ID, 1);
+ run_vcpu(vcpu, 1);
val = 1ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
- run_vcpu(vm, VCPU_ID, 2);
+ run_vcpu(vcpu, 2);
val = 2ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Host: writes to MSR_IA32_TSC set the host-side offset
* and therefore do not change MSR_IA32_TSC_ADJUST.
*/
- vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
- run_vcpu(vm, VCPU_ID, 3);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ run_vcpu(vcpu, 3);
/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
- vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
/* Restore previous value. */
- vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
* host-side offset and affect both MSRs.
*/
- run_vcpu(vm, VCPU_ID, 4);
+ run_vcpu(vcpu, 4);
val = 3ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
* offset is now visible in MSR_IA32_TSC_ADJUST.
*/
- run_vcpu(vm, VCPU_ID, 5);
+ run_vcpu(vcpu, 5);
val = 4ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
kvm_vm_free(vm);
- return 0;
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
new file mode 100644
index 000000000000..59c7304f805e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ 2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+ uint64_t start_tsc, local_tsc, tmp;
+
+ start_tsc = rdtsc();
+ do {
+ tmp = READ_ONCE(tsc_sync);
+ local_tsc = rdtsc();
+ WRITE_ONCE(tsc_sync, local_tsc);
+ if (unlikely(local_tsc < tmp))
+ GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+ } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+ GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+ unsigned long vcpu_id = (unsigned long)_cpu_nr;
+ unsigned long failures = 0;
+ static bool first_cpu_done;
+ struct kvm_vcpu *vcpu;
+
+ /* The kernel is fine, but vm_vcpu_add() needs locking */
+ pthread_spin_lock(&create_lock);
+
+ vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+ if (!first_cpu_done) {
+ first_cpu_done = true;
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ }
+
+ pthread_spin_unlock(&create_lock);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto out;
+
+ case UCALL_SYNC:
+ printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+ uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ failures++;
+ break;
+
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+ out:
+ return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+ vm = vm_create(NR_TEST_VCPUS);
+ vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+ pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+ pthread_t cpu_threads[NR_TEST_VCPUS];
+ unsigned long cpu;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+ pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+ unsigned long failures = 0;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+ void *this_cpu_failures;
+ pthread_join(cpu_threads[cpu], &this_cpu_failures);
+ failures += (unsigned long)this_cpu_failures;
+ }
+
+ TEST_ASSERT(!failures, "TSC sync failed");
+ pthread_spin_destroy(&create_lock);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
new file mode 100644
index 000000000000..1e5e564523b3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR 0xa9
+
+#define UCNA_BANK 0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+ struct kvm_vcpu *vcpu;
+ uint64_t *p_i_ucna_rcvd;
+ uint64_t *p_i_ucna_addr;
+ uint64_t *p_ucna_addr;
+ uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+ uint64_t ctl2;
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+ xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ /* Enables interrupt in guest. */
+ sti();
+
+ /* Let user space inject the first UCNA */
+ GUEST_SYNC(SYNC_FIRST_UCNA);
+
+ ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+ /* Disables the per-bank CMCI signaling. */
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+ /* Let the user space inject the second UCNA */
+ GUEST_SYNC(SYNC_SECOND_UCNA);
+
+ ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+ GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+ i_ucna_rcvd++;
+ i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+ printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+ /*
+ * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+ * the IA32_MCi_STATUS register.
+ * MSCOD=1 (BIT[16] - MscodDataRdErr).
+ * MCACOD=0x0090 (Memory controller error format, channel 0)
+ */
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+ struct kvm_x86_mce mce = {};
+ mce.status = status;
+ mce.mcg_status = 0;
+ /*
+ * MCM_ADDR_PHYS indicates the reported address is a physical address.
+ * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+ * is at 4KB granularity.
+ */
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.addr = addr;
+ mce.bank = UCNA_BANK;
+
+ vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed with errno=%d",
+ r);
+
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+ printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+ printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false, "vCPU assertion failure: %s.",
+ (const char *)uc.args[0]);
+ }
+
+ return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ params->vcpu = vcpu;
+ params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+ params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+ params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+ run_ucna_injection(params);
+
+ TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+ TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+ "Only first UCNA reported addr get recorded via interrupt.");
+ TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+ "First injected UCNAs should get exposed via registers.");
+ TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+ "Second injected UCNAs should get exposed via registers.");
+
+ printf("Test successful.\n"
+ "UCNA CMCI interrupts received: %ld\n"
+ "Last UCNA address received via CMCI: %lx\n"
+ "First UCNA address in vCPU thread: %lx\n"
+ "Second UCNA address in vCPU thread: %lx\n",
+ *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+ *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+ uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ if (enable_cmci_p)
+ mcg_caps |= MCG_CMCI_P;
+
+ mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+ vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+ bool enable_cmci_p, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+ setup_mce_cap(vcpu, enable_cmci_p);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ struct thread_params params;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *ucna_vcpu;
+ struct kvm_vcpu *cmcidis_vcpu;
+ struct kvm_vcpu *cmci_vcpu;
+
+ kvm_check_cap(KVM_CAP_MCE);
+
+ vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+ kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+ &supported_mcg_caps);
+
+ if (!(supported_mcg_caps & MCG_CMCI_P)) {
+ print_skip("MCG_CMCI_P is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+ cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+ cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+ vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_ucna_injection(ucna_vcpu, &params);
+ run_vcpu_expect_gp(cmcidis_vcpu);
+ run_vcpu_expect_gp(cmci_vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
new file mode 100644
index 000000000000..be7d72f3c029
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+ unsigned long end;
+
+ if (count == 2)
+ end = (unsigned long)buffer + 1;
+ else
+ end = (unsigned long)buffer + 8192;
+
+ asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+ GUEST_ASSERT_EQ(count, 0);
+ GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+ uint8_t buffer[8192];
+ int i;
+
+ /*
+ * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
+ * test that KVM doesn't explode when userspace modifies the "count" on
+ * a userspace I/O exit. KVM isn't required to play nice with the I/O
+ * itself as KVM doesn't support manipulating the count, it just needs
+ * to not explode or overflow a buffer.
+ */
+ guest_ins_port80(buffer, 2);
+ guest_ins_port80(buffer, 3);
+
+ /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+ memset(buffer, 0, sizeof(buffer));
+ guest_ins_port80(buffer, 8192);
+ for (i = 0; i < 8192; i++)
+ __GUEST_ASSERT(buffer[i] == 0xaa,
+ "Expected '0xaa', got '0x%x' at buffer[%u]",
+ buffer[i], i);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ memset(&regs, 0, sizeof(regs));
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc))
+ break;
+
+ TEST_ASSERT(run->io.port == 0x80,
+ "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+ /*
+ * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+ * Note, this abuses KVM's batching of rep string I/O to avoid
+ * getting stuck in an infinite loop. That behavior isn't in
+ * scope from a testing perspective as it's not ABI in any way,
+ * i.e. it really is abusing internal KVM knowledge.
+ */
+ vcpu_regs_get(vcpu, &regs);
+ if (regs.rcx == 2)
+ regs.rcx = 1;
+ if (regs.rcx == 3)
+ regs.rcx = 8192;
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
+ vcpu_regs_set(vcpu, &regs);
+ }
+
+ switch (uc.cmd) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 72c0d0797522..8463a9956410 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -4,20 +4,13 @@
*
* Tests for exiting into userspace on registered MSRs
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
+#include "kvm_test_harness.h"
#include "test_util.h"
#include "kvm_util.h"
#include "vmx.h"
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-#define KVM_FEP_LENGTH 5
-static int fep_available = 1;
-
-#define VCPU_ID 1
#define MSR_NON_EXISTENT 0x474f4f00
static u64 deny_bits = 0;
@@ -261,14 +254,7 @@ static void guest_code_filter_allow(void)
GUEST_ASSERT(data == 2);
GUEST_ASSERT(guest_exception_count == 0);
- /*
- * Test to see if the instruction emulator is available (ie: the module
- * parameter 'kvm.force_emulation_prefix=1' is set). This instruction
- * will #UD if it isn't available.
- */
- __asm__ __volatile__(KVM_FEP "nop");
-
- if (fep_available) {
+ if (is_forced_emulation_enabled) {
/* Let userspace know we aren't done. */
GUEST_SYNC(0);
@@ -357,6 +343,12 @@ static void guest_code_permission_bitmap(void)
data = test_rdmsr(MSR_GS_BASE);
GUEST_ASSERT(data == MSR_GS_BASE);
+ /* Access the MSRs again to ensure KVM has disabled interception.*/
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
GUEST_DONE();
}
@@ -389,42 +381,23 @@ static void guest_fep_gp_handler(struct ex_regs *regs)
&em_wrmsr_start, &em_wrmsr_end);
}
-static void guest_ud_handler(struct ex_regs *regs)
-{
- fep_available = 0;
- regs->rip += KVM_FEP_LENGTH;
-}
-
-static void run_guest(struct kvm_vm *vm)
-{
- int rc;
-
- rc = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-}
-
-static void check_for_guest_assert(struct kvm_vm *vm)
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- if (run->exit_reason == KVM_EXIT_IO &&
- get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
}
}
-static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
TEST_ASSERT(run->msr.index == msr_index,
"Unexpected msr (0x%04x), expected 0x%04x",
run->msr.index, msr_index);
@@ -450,16 +423,13 @@ static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
}
}
-static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
TEST_ASSERT(run->msr.index == msr_index,
"Unexpected msr (0x%04x), expected 0x%04x",
run->msr.index, msr_index);
@@ -481,43 +451,35 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
}
}
-static void process_ucall_done(struct kvm_vm *vm)
+static void process_ucall_done(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s)",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
"Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
uc.cmd, UCALL_DONE);
}
-static uint64_t process_ucall(struct kvm_vm *vm)
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc = {};
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s)",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
break;
case UCALL_ABORT:
- check_for_guest_assert(vm);
+ check_for_guest_assert(vcpu);
break;
case UCALL_DONE:
- process_ucall_done(vm);
+ process_ucall_done(vcpu);
break;
default:
TEST_ASSERT(false, "Unexpected ucall");
@@ -526,106 +488,100 @@ static uint64_t process_ucall(struct kvm_vm *vm)
return uc.cmd;
}
-static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
{
- run_guest(vm);
- process_rdmsr(vm, msr_index);
+ vcpu_run(vcpu);
+ process_rdmsr(vcpu, msr_index);
}
-static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
{
- run_guest(vm);
- process_wrmsr(vm, msr_index);
+ vcpu_run(vcpu);
+ process_wrmsr(vcpu, msr_index);
}
-static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm)
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
{
- run_guest(vm);
- return process_ucall(vm);
+ vcpu_run(vcpu);
+ return process_ucall(vcpu);
}
-static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
{
- run_guest(vm);
- process_ucall_done(vm);
+ vcpu_run(vcpu);
+ process_ucall_done(vcpu);
}
-static void test_msr_filter_allow(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_FILTER,
- };
- struct kvm_vm *vm;
- int rc;
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t cmd;
+ int rc;
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
-
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
- run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
- run_guest(vm);
- vm_handle_exception(vm, UD_VECTOR, NULL);
+ vcpu_run(vcpu);
+ cmd = process_ucall(vcpu);
- if (process_ucall(vm) != UCALL_DONE) {
- vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+ if (is_forced_emulation_enabled) {
+ TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
/* Process emulated rdmsr and wrmsr instructions. */
- run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
- run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
- run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
- run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
- run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
/* Confirm the guest completed without issues. */
- run_guest_then_process_ucall_done(vm);
+ run_guest_then_process_ucall_done(vcpu);
} else {
+ TEST_ASSERT_EQ(cmd, UCALL_DONE);
printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
}
-
- kvm_vm_free(vm);
}
-static int handle_ucall(struct kvm_vm *vm)
+static int handle_ucall(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("Guest assertion not met");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_SYNC:
- vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+ vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
break;
case UCALL_DONE:
return 1;
@@ -673,25 +629,17 @@ static void handle_wrmsr(struct kvm_run *run)
}
}
-static void test_msr_filter_deny(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_INVAL |
- KVM_MSR_EXIT_REASON_UNKNOWN |
- KVM_MSR_EXIT_REASON_FILTER,
- };
- struct kvm_vm *vm;
- struct kvm_run *run;
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_run *run = vcpu->run;
int rc;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
-
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
@@ -700,9 +648,7 @@ static void test_msr_filter_deny(void) {
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+ vcpu_run(vcpu);
switch (run->exit_reason) {
case KVM_EXIT_X86_RDMSR:
@@ -712,7 +658,7 @@ static void test_msr_filter_deny(void) {
handle_wrmsr(run);
break;
case KVM_EXIT_IO:
- if (handle_ucall(vm))
+ if (handle_ucall(vcpu))
goto done;
break;
}
@@ -722,49 +668,110 @@ static void test_msr_filter_deny(void) {
done:
TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
- kvm_vm_free(vm);
}
-static void test_msr_permission_bitmap(void) {
- struct kvm_enable_cap cap = {
- .cap = KVM_CAP_X86_USER_SPACE_MSR,
- .args[0] = KVM_MSR_EXIT_REASON_FILTER,
- };
- struct kvm_vm *vm;
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+ struct kvm_vm *vm = vcpu->vm;
int rc;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- vm_enable_cap(vm, &cap);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
- run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
- TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
+ run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+ TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+ "Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
- run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
- run_guest_then_process_ucall_done(vm);
+ run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
- kvm_vm_free(vm);
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+ run_guest_then_process_ucall_done(vcpu);
}
-int main(int argc, char *argv[])
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \
+({ \
+ int r = __vm_ioctl(vm, cmd, arg); \
+ \
+ if (flag & valid_mask) \
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \
+ else \
+ TEST_ASSERT(r == -1 && errno == EINVAL, \
+ "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \
+ #cmd, flag, r, errno, strerror(errno)); \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
{
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
+ struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+ int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+ int rc;
+ int i;
- test_msr_filter_allow();
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
- test_msr_filter_deny();
+ for (i = 0; i < nflags; i++) {
+ cap.args[0] = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+ BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+ }
+}
- test_msr_permission_bitmap();
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+ u64 deny_bits = 0;
+ struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = 0,
+ .bitmap = (uint8_t *)&deny_bits,
+ },
+ },
+ };
+ int nflags;
+ int rc;
+ int i;
- return 0;
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+ }
+
+ filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+ nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.ranges[0].flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+ }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+ run_user_space_msr_flag_test(vm);
+
+ /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+ run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
index d14888b34adb..a81a24761aac 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
@@ -28,11 +28,6 @@
#include "kselftest.h"
-#define VCPU_ID 0
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
static void l2_guest_code(void)
{
/* Exit to L1 */
@@ -77,54 +72,42 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
int main(int argc, char *argv[])
{
unsigned long apic_access_addr = ~0ul;
- unsigned int paddr_width;
- unsigned int vaddr_width;
vm_vaddr_t vmx_pages_gva;
unsigned long high_gpa;
struct vmx_pages *vmx;
bool done = false;
- nested_vmx_check_supported();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
- high_gpa = (1ul << paddr_width) - getpagesize();
- if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) {
- print_skip("No unbacked physical page available");
- exit(KSFT_SKIP);
- }
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ high_gpa = (vm->max_gfn - 1) << vm->page_shift;
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- prepare_virtualize_apic_accesses(vmx, vm, 0);
- vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
while (!done) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (apic_access_addr == high_gpa) {
- TEST_ASSERT(run->exit_reason ==
- KVM_EXIT_INTERNAL_ERROR,
- "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
TEST_ASSERT(run->internal.suberror ==
KVM_INTERNAL_ERROR_EMULATION,
- "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
run->internal.suberror);
break;
}
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
apic_access_addr = uc.args[1];
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index 537de1068554..98cb6bdab3e6 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
@@ -17,8 +14,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 1
-
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
#define TEST_MEM_PAGES 3
@@ -30,16 +25,16 @@
#define NESTED_TEST_MEM1 0xc0001000
#define NESTED_TEST_MEM2 0xc0002000
-static void l2_guest_code(void)
+static void l2_guest_code(u64 *a, u64 *b)
{
- *(volatile uint64_t *)NESTED_TEST_MEM1;
- *(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+ READ_ONCE(*a);
+ WRITE_ONCE(*a, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
@@ -47,17 +42,33 @@ static void l2_guest_code(void)
vmcall();
}
+static void l2_guest_code_ept_enabled(void)
+{
+ l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+ /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+ l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
void l1_guest_code(struct vmx_pages *vmx)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
GUEST_ASSERT(load_vmcs(vmx));
- prepare_vmcs(vmx, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_ept_enabled;
+ else
+ l2_rip = l2_guest_code_ept_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(false);
GUEST_ASSERT(!vmlaunch());
@@ -66,25 +77,24 @@ void l1_guest_code(struct vmx_pages *vmx)
GUEST_DONE();
}
-int main(int argc, char *argv[])
+static void test_vmx_dirty_log(bool enable_ept)
{
vm_vaddr_t vmx_pages_gva = 0;
struct vmx_pages *vmx;
unsigned long *bmap;
uint64_t *host_test_mem;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- struct kvm_run *run;
struct ucall uc;
bool done = false;
- nested_vmx_check_supported();
+ pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -97,7 +107,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -105,27 +115,28 @@ int main(int argc, char *argv[])
*
* Note that prepare_eptp should be called only L1's GPA map is done,
* meaning after the last call to virt_map.
+ *
+ * When EPT is disabled, the L2 guest code will still access the same L1
+ * GPAs as the EPT enabled case.
*/
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ if (enable_ept) {
+ prepare_eptp(vmx, vm);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
+ }
- bmap = bitmap_alloc(TEST_MEM_PAGES);
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
/*
@@ -134,17 +145,17 @@ int main(int argc, char *argv[])
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
} else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
}
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
@@ -154,3 +165,15 @@ int main(int argc, char *argv[])
}
}
}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ test_vmx_dirty_log(/*enable_ept=*/false);
+
+ if (kvm_cpu_has_ept())
+ test_vmx_dirty_log(/*enable_ept=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..2cae86d9d5e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state(vcpu);
+ __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+ static struct kvm_vcpu *vcpu = NULL;
+
+ if (__vcpu)
+ vcpu = __vcpu;
+ return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vcpu, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ get_set_sigalrm_vcpu(vcpu);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state(vcpu);
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
new file mode 100644
index 000000000000..a100ee5f0009
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /*
+ * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+ * arbitrary port.
+ */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * L2 must be run without unrestricted guest, verify that the selftests
+ * library hasn't enabled it. Because KVM selftests jump directly to
+ * 64-bit mode, unrestricted guest support isn't required.
+ */
+ GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+ !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 should triple fault after main() stuffs invalid guest state. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+
+ /*
+ * The first exit to L0 userspace should be an I/O access from L2.
+ * Running L1 should launch L2 without triggering an exit to userspace.
+ */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ memset(&sregs, 0, sizeof(sregs));
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = 1;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
new file mode 100644
index 000000000000..90720b6205f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask &= val;
+
+ for_each_set_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask = ~mask | val;
+
+ for_each_clear_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+ vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+ BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+ uint64_t msr_bit,
+ struct kvm_x86_cpu_feature feature)
+{
+ uint64_t val;
+
+ vcpu_clear_cpuid_feature(vcpu, feature);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+ if (!kvm_cpu_has(feature))
+ return;
+
+ vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+ uint64_t supported_bits = FEAT_CTL_LOCKED |
+ FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+ FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+ FEAT_CTL_SGX_LC_ENABLED |
+ FEAT_CTL_SGX_ENABLED |
+ FEAT_CTL_LMCE_ENABLED;
+ int bit, r;
+
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+ for_each_clear_bit(bit, &supported_bits, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+ TEST_ASSERT(r == 0,
+ "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ /* No need to actually do KVM_RUN, thus no guest code. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vmx_save_restore_msrs_test(vcpu);
+ ia32_feature_control_msr_test(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..cf1d2d1f2a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ vm_paddr_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->pgd, vm->pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
new file mode 100644
index 000000000000..7ff6f62e20a3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ u64 full_width_write:1;
+ u64 pebs_baseline:1;
+ u64 perf_metrics:1;
+ u64 pebs_output_pt_available:1;
+ u64 pebs_timing_info:1;
+ };
+ u64 capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+ .pebs_timing_info = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+ uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for value '0x%lx', got %s",
+ val, ex_str(vector));
+}
+
+static void guest_code(uint64_t current_val)
+{
+ int i;
+
+ guest_test_perf_capabilities_gp(current_val);
+ guest_test_perf_capabilities_gp(0);
+
+ for (i = 0; i < 64; i++)
+ guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+ struct ucall uc;
+ int r, i;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+ host_cap.capabilities);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
+
+ /*
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
+ */
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
+ }
+
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
+ }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
+{
+ uint64_t val;
+ int i, r;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, host_cap.capabilities);
+
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, 0);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
+ TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
+ i, BIT_ULL(i));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
index a07480aed397..00dd2ac07a61 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
@@ -9,7 +9,6 @@
* value instead of partially decayed timer value
*
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -22,7 +21,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 5
#define PREEMPTION_TIMER_VALUE 100000000ull
#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
@@ -158,7 +156,7 @@ int main(int argc, char *argv[])
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -167,33 +165,25 @@ int main(int argc, char *argv[])
* AMD currently does not implement any VMX features, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -232,22 +222,19 @@ int main(int argc, char *argv[])
stage, uc.args[4], uc.args[5]);
}
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
index 5827b9bae468..67a62a5a8895 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
@@ -23,38 +23,37 @@
* changes this should be updated.
*/
#define VMCS12_REVISION 0x11e57ed0
-#define VCPU_ID 5
bool have_evmcs;
-void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
{
- vcpu_nested_state_set(vm, VCPU_ID, state, false);
+ vcpu_nested_state_set(vcpu, state);
}
-void test_nested_state_expect_errno(struct kvm_vm *vm,
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state,
int expected_errno)
{
int rv;
- rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+ rv = __vcpu_nested_state_set(vcpu, state);
TEST_ASSERT(rv == -1 && errno == expected_errno,
"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
strerror(expected_errno), expected_errno, rv, strerror(errno),
errno);
}
-void test_nested_state_expect_einval(struct kvm_vm *vm,
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EINVAL);
+ test_nested_state_expect_errno(vcpu, state, EINVAL);
}
-void test_nested_state_expect_efault(struct kvm_vm *vm,
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EFAULT);
+ test_nested_state_expect_errno(vcpu, state, EFAULT);
}
void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
@@ -86,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
set_revision_id_for_vmcs12(state, VMCS12_REVISION);
}
-void test_vmx_nested_state(struct kvm_vm *vm)
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
{
/* Add a page for VMCS12. */
const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
@@ -96,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm)
/* The format must be set to 0. 0 for VMX, 1 for SVM. */
set_default_vmx_state(state, state_sz);
state->format = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
* enabled.
*/
set_default_vmx_state(state, state_sz);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
@@ -112,55 +111,59 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Enable VMX in the guest CPUID. */
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
/*
* Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
- * setting the nested state but flags other than eVMCS must be clear.
- * The eVMCS flag can be set if the enlightened VMCS capability has
- * been enabled.
+ * setting the nested state. When the eVMCS flag is not set, the
+ * expected return value is '0'.
*/
set_default_vmx_state(state, state_sz);
+ state->flags = 0;
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state(vcpu, state);
- state->flags &= KVM_STATE_NESTED_EVMCS;
+ /*
+ * When eVMCS is supported, the eVMCS flag can only be set if the
+ * enlightened VMCS capability has been enabled.
+ */
if (have_evmcs) {
- test_nested_state_expect_einval(vm, state);
- vcpu_enable_evmcs(vm, VCPU_ID);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+ vcpu_enable_evmcs(vcpu);
+ test_nested_state(vcpu, state);
}
- test_nested_state(vm, state);
/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
state->hdr.vmx.smm.flags = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Invalid flags are rejected. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa set to a non-page aligned address. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
@@ -170,7 +173,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = KVM_STATE_NESTED_GUEST_MODE |
KVM_STATE_NESTED_RUN_PENDING;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have any of the SMM flags set besides:
@@ -180,13 +183,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
KVM_STATE_NESTED_SMM_VMXON);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Outside SMM, SMM flags must be zero. */
set_default_vmx_state(state, state_sz);
state->flags = 0;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Size must be large enough to fit kvm_nested_state and vmcs12
@@ -195,13 +198,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
state->flags = 0;
state->hdr.vmx.vmcs12_pa = -1;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/*
* KVM_SET_NESTED_STATE succeeds with invalid VMCS
@@ -209,7 +212,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Invalid flags are rejected, even if no VMCS loaded. */
set_default_vmx_state(state, state_sz);
@@ -217,13 +220,13 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = 0;
state->hdr.vmx.vmcs12_pa = -1;
state->hdr.vmx.flags = ~0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* vmxon_pa cannot be the same address as vmcs_pa. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 0;
state->hdr.vmx.vmcs12_pa = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Test that if we leave nesting the state reflects that when we get
@@ -233,8 +236,8 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = 0;
- test_nested_state(vm, state);
- vcpu_nested_state_get(vm, VCPU_ID, state);
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
@@ -244,54 +247,36 @@ void test_vmx_nested_state(struct kvm_vm *vm)
free(state);
}
-void disable_vmx(struct kvm_vm *vm)
-{
- struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
- int i;
-
- for (i = 0; i < cpuid->nent; ++i)
- if (cpuid->entries[i].function == 1 &&
- cpuid->entries[i].index == 0)
- break;
- TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found");
-
- cpuid->entries[i].ecx &= ~CPUID_VMX;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- cpuid->entries[i].ecx |= CPUID_VMX;
-}
-
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
+ struct kvm_vcpu *vcpu;
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/*
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
* First run tests with VMX disabled to check error handling.
*/
- disable_vmx(vm);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
- test_nested_state_expect_efault(vm, NULL);
+ test_nested_state_expect_efault(vcpu, NULL);
/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
set_default_state(&state);
state.size = 0;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* Setting the flags 0xf fails the flags check. The only flags that
@@ -302,7 +287,7 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = 0xf;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* If KVM_STATE_NESTED_RUN_PENDING is set then
@@ -310,9 +295,9 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = KVM_STATE_NESTED_RUN_PENDING;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index 2f964cdc273c..ae4a4b6c05ca 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -19,8 +19,6 @@
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
@@ -39,11 +37,6 @@
/* Default delay between migrate_pages calls (microseconds) */
#define DEFAULT_DELAY_USECS 500000
-#define HALTER_VCPU_ID 0
-#define SENDER_VCPU_ID 1
-
-volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
-
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -81,50 +74,10 @@ struct test_data_page {
struct thread_params {
struct test_data_page *data;
- struct kvm_vm *vm;
- uint32_t vcpu_id;
+ struct kvm_vcpu *vcpu;
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
-uint32_t read_apic_reg(uint reg)
-{
- return apic_base[reg >> 2];
-}
-
-void write_apic_reg(uint reg, uint32_t val)
-{
- apic_base[reg >> 2] = val;
-}
-
-void disable_apic(void)
-{
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) &
- ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void enable_xapic(void)
-{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
- /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
- if (val & MSR_IA32_APICBASE_EXTD) {
- disable_apic();
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
- } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
- wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
- }
-
- /*
- * Per SDM: reset value of spurious interrupt vector register has the
- * APIC software enabled bit=0. It must be enabled in addition to the
- * enable bit in the MSR.
- */
- val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
- write_apic_reg(APIC_SPIV, val);
-}
-
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
@@ -136,10 +89,10 @@ void verify_apic_base_addr(void)
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
- data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
- data->halter_lvr = read_apic_reg(APIC_LVR);
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
@@ -150,10 +103,11 @@ static void halter_guest_code(struct test_data_page *data)
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
- data->halter_tpr = read_apic_reg(APIC_TASKPRI);
- data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -166,7 +120,7 @@ static void halter_guest_code(struct test_data_page *data)
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
- write_apic_reg(APIC_EOI, 77);
+ xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
@@ -179,7 +133,7 @@ static void sender_guest_code(struct test_data_page *data)
uint64_t tsc_start;
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
/*
* Init interrupt command register for sending IPIs
@@ -206,8 +160,8 @@ static void sender_guest_code(struct test_data_page *data)
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
- write_apic_reg(APIC_ICR2, icr2_val);
- write_apic_reg(APIC_ICR, icr_val);
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
@@ -239,33 +193,30 @@ static void sender_guest_code(struct test_data_page *data)
static void *vcpu_thread(void *arg)
{
struct thread_params *params = (struct thread_params *)arg;
+ struct kvm_vcpu *vcpu = params->vcpu;
struct ucall uc;
int old;
int r;
- unsigned int exit_reason;
r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
TEST_ASSERT(r == 0,
"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
- params->vcpu_id, r);
+ vcpu->id, r);
- fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
- vcpu_run(params->vm, params->vcpu_id);
- exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
+ fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+ vcpu_run(vcpu);
- TEST_ASSERT(exit_reason == KVM_EXIT_IO,
- "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
- params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
TEST_ASSERT(false,
"vCPU %u exited with error: %s.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
- "Migrations completed: %lu\n",
- params->vcpu_id, (const char *)uc.args[0],
+ "Migrations completed: %lu",
+ vcpu->id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
*params->pipis_rcvd, params->data->halter_tpr,
@@ -277,7 +228,7 @@ static void *vcpu_thread(void *arg)
return NULL;
}
-static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
{
void *retval;
int r;
@@ -285,12 +236,12 @@ static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
r = pthread_cancel(thread);
TEST_ASSERT(r == 0,
"pthread_cancel on vcpu_id=%d failed with errno=%d",
- vcpu_id, r);
+ vcpu->id, r);
r = pthread_join(thread, &retval);
TEST_ASSERT(r == 0,
"pthread_join on vcpu_id=%d failed with errno=%d",
- vcpu_id, r);
+ vcpu->id, r);
TEST_ASSERT(retval == PTHREAD_CANCELED,
"expected retval=%p, got %p", PTHREAD_CANCELED,
retval);
@@ -305,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
uint64_t hlt_count;
@@ -316,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
@@ -336,7 +286,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
}
TEST_ASSERT(nodes > 1,
- "Did not find at least 2 numa nodes. Can't do migration\n");
+ "Did not find at least 2 numa nodes. Can't do migration");
fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
@@ -395,7 +345,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
wake_count != data->wake_count,
"IPI, HLT and wake count have not increased "
"in the last %lu seconds. "
- "HLTer is likely hung.\n", interval_secs);
+ "HLTer is likely hung.", interval_secs);
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
@@ -429,7 +379,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs,
"-m adds calls to migrate_pages while vCPUs are running."
" Default is no migrations.\n"
"-d <delay microseconds> - delay between migrate_pages() calls."
- " Default is %d microseconds.\n",
+ " Default is %d microseconds.",
DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
}
}
@@ -456,34 +406,28 @@ int main(int argc, char *argv[])
if (delay_usecs <= 0)
delay_usecs = DEFAULT_DELAY_USECS;
- vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
- params[0].vm = vm;
- params[1].vm = vm;
+ vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
- vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
- vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
+ params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
- test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
- data =
- (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+ data = addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
- vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
- vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
/* Start halter vCPU thread and wait for it to execute first HLT. */
- params[0].vcpu_id = HALTER_VCPU_ID;
r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
TEST_ASSERT(r == 0,
"pthread_create halter failed errno=%d", errno);
@@ -503,7 +447,6 @@ int main(int argc, char *argv[])
"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
data->halter_apic_id, wait_secs);
- params[1].vcpu_id = SENDER_VCPU_ID;
r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
@@ -519,8 +462,21 @@ int main(int argc, char *argv[])
/*
* Cancel threads and wait for them to stop.
*/
- cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
- cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
+ cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+ cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
fprintf(stderr,
"Test successful after running for %d seconds.\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
new file mode 100644
index 000000000000..3b4814c55722
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+ struct kvm_vcpu *vcpu;
+ bool is_x2apic;
+ bool has_xavic_errata;
+};
+
+static void xapic_guest_code(void)
+{
+ cli();
+
+ xapic_enable();
+
+ while (1) {
+ uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+ (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+ xapic_write_reg(APIC_ICR2, val >> 32);
+ xapic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ }
+}
+
+#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \
+ GENMASK_ULL(17, 16) | \
+ GENMASK_ULL(13, 13))
+
+static void x2apic_guest_code(void)
+{
+ cli();
+
+ x2apic_enable();
+
+ do {
+ uint64_t val = x2apic_read_reg(APIC_IRR) |
+ x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+ if (val & X2APIC_RSVD_BITS_MASK) {
+ x2apic_write_reg_fault(APIC_ICR, val);
+ } else {
+ x2apic_write_reg(APIC_ICR, val);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
+ }
+ GUEST_SYNC(val);
+ } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ struct kvm_lapic_state xapic;
+ struct ucall uc;
+ uint64_t icr;
+
+ /*
+ * Tell the guest what ICR value to write. Use the IRR to pass info,
+ * all bits are valid and should not be modified by KVM (ignoring the
+ * fact that vectors 0-15 are technically illegal).
+ */
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ *((u32 *)&xapic.regs[APIC_IRR]) = val;
+ *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], val);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+ (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+ if (!x->is_x2apic) {
+ if (!x->has_xavic_errata)
+ val &= (-1u | (0xffull << (32 + 24)));
+ } else if (val & X2APIC_RSVD_BITS_MASK) {
+ return;
+ }
+
+ if (x->has_xavic_errata)
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ else
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ /*
+ * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
+ * it is as _must_ be zero. Intel simply ignores the bit. Don't test
+ * the BUSY bit for x2APIC, as there is no single correct behavior.
+ */
+ if (!x->is_x2apic)
+ ____test_icr(x, val | APIC_ICR_BUSY);
+
+ ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ uint64_t icr, i, j;
+
+ icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ /*
+ * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use
+ * vector 0xff.
+ */
+ icr = APIC_INT_ASSERT | 0xff;
+ for (i = 0; i < 0xff; i++) {
+ if (i == vcpu->id)
+ continue;
+ for (j = 0; j < 8; j++)
+ __test_icr(x, i << (32 + 24) | icr | (j << 8));
+ }
+
+ /* And again with a shorthand destination for all types of IPIs. */
+ icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+ for (i = 0; i < 8; i++)
+ __test_icr(x, icr | (i << 8));
+
+ /* And a few garbage value, just make sure it's an IRQ (blocked). */
+ __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+ uint32_t apic_id, expected;
+ struct kvm_lapic_state xapic;
+
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+ apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+ TEST_ASSERT(apic_id == expected,
+ "APIC_ID not set back to %s format; wanted = %x, got = %x",
+ (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+ expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+ const uint32_t NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ uint64_t apic_base;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+ vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+ for (i = 0; i < NR_VCPUS; i++) {
+ apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+ TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+ "APIC not in ENABLED state at vCPU RESET");
+ TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+ "APIC not in xAPIC mode at vCPU RESET");
+
+ __test_apic_id(vcpus[i], apic_base);
+ __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+ __test_apic_id(vcpus[i], apic_base);
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void test_x2apic_id(void)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+
+ /*
+ * Try stuffing a modified x2APIC ID, KVM should ignore the value and
+ * always return the vCPU's default/readonly x2APIC ID.
+ */
+ for (i = 0; i <= 0xff; i++) {
+ *(u32 *)(lapic.regs + APIC_ID) = i << 24;
+ *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+ TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
+ "x2APIC ID should be fully readonly");
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct xapic_vcpu x = {
+ .vcpu = NULL,
+ .is_x2apic = true,
+ };
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ /*
+ * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+ * the guest in order to test AVIC. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+ x.is_x2apic = false;
+
+ /*
+ * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
+ * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
+ * drops writes, AMD does not). Account for the errata when checking
+ * that KVM reads back what was written.
+ */
+ x.has_xavic_errata = host_cpu_is_amd &&
+ get_kvm_amd_param_bool("avic");
+
+ vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ test_apic_id();
+ test_x2apic_id();
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
new file mode 100644
index 000000000000..d038c1571729
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+ __supported, (xfeatures), (dependencies)); \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently. E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
+do { \
+ uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ \
+ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
+ "supported = 0x%lx, xfeatures = 0x%llx", \
+ __supported, (xfeatures)); \
+} while (0)
+
+static void guest_code(void)
+{
+ uint64_t initial_xcr0;
+ uint64_t supported_xcr0;
+ int i, vector;
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+ initial_xcr0 = xgetbv(0);
+ supported_xcr0 = this_cpu_supported_xcr0();
+
+ GUEST_ASSERT(initial_xcr0 == supported_xcr0);
+
+ /* Check AVX */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_YMM,
+ XFEATURE_MASK_SSE);
+
+ /* Check MPX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ /* Check AVX-512 */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_AVX512,
+ XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_AVX512);
+
+ /* Check AMX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_XTILE);
+
+ vector = xsetbv_safe(0, XFEATURE_MASK_FP);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(FP), got %s",
+ ex_str(vector));
+
+ vector = xsetbv_safe(0, supported_xcr0);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(0x%lx), got %s",
+ supported_xcr0, ex_str(vector));
+
+ for (i = 0; i < 64; i++) {
+ if (supported_xcr0 & BIT_ULL(i))
+ continue;
+
+ vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s",
+ BIT_ULL(i), supported_xcr0, ex_str(vector));
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
new file mode 100644
index 000000000000..23909b501ac2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -0,0 +1,1145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA 0xc0000000ULL
+#define SHINFO_REGION_GPA 0xc0000000ULL
+#define SHINFO_REGION_SLOT 10
+
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2 12
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR 0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+ TEST_INJECT_VECTOR = 0,
+ TEST_RUNSTATE_runnable,
+ TEST_RUNSTATE_blocked,
+ TEST_RUNSTATE_offline,
+ TEST_RUNSTATE_ADJUST,
+ TEST_RUNSTATE_DATA,
+ TEST_STEAL_TIME,
+ TEST_EVTCHN_MASKED,
+ TEST_EVTCHN_UNMASKED,
+ TEST_EVTCHN_SLOWPATH,
+ TEST_EVTCHN_SEND_IOCTL,
+ TEST_EVTCHN_HCALL,
+ TEST_EVTCHN_HCALL_SLOWPATH,
+ TEST_EVTCHN_HCALL_EVENTFD,
+ TEST_TIMER_SETUP,
+ TEST_TIMER_WAIT,
+ TEST_TIMER_RESTORE,
+ TEST_POLL_READY,
+ TEST_POLL_TIMEOUT,
+ TEST_POLL_MASKED,
+ TEST_POLL_WAKE,
+ SET_VCPU_INFO,
+ TEST_TIMER_PAST,
+ TEST_LOCKING_SEND_RACE,
+ TEST_LOCKING_POLL_RACE,
+ TEST_LOCKING_POLL_TIMEOUT,
+ TEST_DONE,
+
+ TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+#define MIN_STEAL_TIME 50000
+
+#define SHINFO_RACE_TIMEOUT 2 /* seconds */
+
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_event_channel_op 32
+
+#define SCHEDOP_poll 3
+
+#define EVTCHNOP_send 4
+
+#define EVTCHNSTAT_interdomain 2
+
+struct evtchn_send {
+ u32 port;
+};
+
+struct sched_poll {
+ u32 *ports;
+ unsigned int nr_ports;
+ u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5];
+} __attribute__((__packed__));
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+ struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+
+ vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+ vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
+ guest_saw_irq = true;
+
+ GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+ while (!guest_saw_irq)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+ guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+ int i;
+
+ sti_nop();
+
+ /* Trigger an interrupt injection */
+ GUEST_SYNC(TEST_INJECT_VECTOR);
+
+ guest_wait_for_irq();
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(TEST_RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(TEST_RUNSTATE_DATA);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(TEST_STEAL_TIME);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+ guest_wait_for_irq();
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+ guest_wait_for_irq();
+
+ /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+ GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+ /* Our turn. Deliver event channel (to ourselves) with
+ * EVTCHNOP_send hypercall. */
+ struct evtchn_send s = { .port = 127 };
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+ /*
+ * Same again, but this time the host has messed with memslots so it
+ * should take the slow path in kvm_xen_set_evtchn().
+ */
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+ /* Deliver "outbound" event channel to an eventfd which
+ * happens to be one of our own irqfds. */
+ s.port = 197;
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_SETUP);
+
+ /* Set a timer 100ms in the future. */
+ xen_hypercall(__HYPERVISOR_set_timer_op,
+ rs->state_entry_time + 100000000, NULL);
+
+ GUEST_SYNC(TEST_TIMER_WAIT);
+
+ /* Now wait for the timer */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_RESTORE);
+
+ /* The host has 'restored' the timer. Just wait for it. */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_POLL_READY);
+
+ /* Poll for an event channel port which is already set */
+ u32 ports[1] = { EVTCHN_TIMER };
+ struct sched_poll p = {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0,
+ };
+
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+ /* Poll for an unset port and wait for the timeout. */
+ p.timeout = 100000000;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_MASKED);
+
+ /* A timer will wake the masked port we're waiting on, while we poll */
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_WAKE);
+
+ /* Set the vcpu_info to point at exactly the place it already is to
+ * make sure the attribute is functional. */
+ GUEST_SYNC(SET_VCPU_INFO);
+
+ /* A timer wake an *unmasked* port which should wake us with an
+ * actual interrupt, while we're polling on a different port. */
+ ports[0]++;
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_PAST);
+
+ /* Timer should have fired already */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+ /* Racing host ioctls */
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+ /* Racing vmcall against host ioctl */
+
+ ports[0] = 0;
+
+ p = (struct sched_poll) {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0
+ };
+
+wait_for_timer:
+ /*
+ * Poll for a timer wake event while the worker thread is mucking with
+ * the shared info. KVM XEN drops timer IRQs if the shared info is
+ * invalid when the timer expires. Arbitrarily poll 100 times before
+ * giving up and asking the VMM to re-arm the timer. 100 polls should
+ * consume enough time to beat on KVM without taking too long if the
+ * timer IRQ is dropped due to an invalid event channel.
+ */
+ for (i = 0; i < 100 && !guest_saw_irq; i++)
+ __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ /*
+ * Re-send the timer IRQ if it was (likely) dropped due to the timer
+ * expiring while the event channel was invalid.
+ */
+ if (!guest_saw_irq) {
+ GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+ goto wait_for_timer;
+ }
+ guest_saw_irq = false;
+
+ GUEST_SYNC(TEST_DONE);
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+ if (vinfo)
+ printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+ vcpu_dump(stdout, vcpu, 0);
+ TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+ struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+ struct kvm_xen_hvm_attr cache_activate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+ };
+
+ struct kvm_xen_hvm_attr cache_activate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+ .u.shared_info.hva = (unsigned long)shinfo
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.hva = 0
+ };
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+ for (;;) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+ if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_xen_hvm_attr evt_reset;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ bool verbose;
+ int ret;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+ bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+ bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Map a region for the shared_info page */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+ shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+
+ /* Let the kernel know that we *will* use it for sending all
+ * event channels, which lets it intercept SCHEDOP_poll */
+ if (do_evtchn_tests)
+ hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ struct kvm_xen_hvm_attr lm = {
+ .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+ .u.long_mode = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ if (do_runstate_flag) {
+ struct kvm_xen_hvm_attr ruf = {
+ .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+ .u.runstate_update_flag = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+ ruf.u.runstate_update_flag = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+ TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+ "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+ }
+
+ struct kvm_xen_hvm_attr ha = {};
+
+ if (has_shinfo_hva) {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+ ha.u.shared_info.hva = (unsigned long)shinfo;
+ } else {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+ ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+ }
+
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
+ struct kvm_xen_vcpu_attr vi = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ .u.gpa = VCPU_INFO_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+ struct kvm_xen_vcpu_attr pvclock = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+ .u.gpa = PVTIME_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+ struct kvm_xen_hvm_attr vec = {
+ .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+ .u.vector = EVTCHN_VECTOR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+ vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = kvm_new_eventfd();
+ irq_fd[1] = kvm_new_eventfd();
+
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+ kvm_assign_irqfd(vm, 32, irq_fd[0]);
+ kvm_assign_irqfd(vm, 33, irq_fd[1]);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
+ struct kvm_xen_vcpu_attr tmr = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+ .u.timer.port = EVTCHN_TIMER,
+ .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ .u.timer.expires_ns = 0
+ };
+
+ if (do_evtchn_tests) {
+ struct kvm_xen_hvm_attr inj = {
+ .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+ .u.evtchn.send_port = 127,
+ .u.evtchn.type = EVTCHNSTAT_interdomain,
+ .u.evtchn.flags = 0,
+ .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+ .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+ .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ /* Test migration to a different vCPU */
+ inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+ inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ inj.u.evtchn.send_port = 197;
+ inj.u.evtchn.deliver.eventfd.port = 0;
+ inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+ inj.u.evtchn.flags = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ }
+ vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+ vinfo->evtchn_upcall_pending = 0;
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+ rs->state = 0x5a;
+
+ bool evtchn_irq_expected = false;
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ if (do_runstate_tests)
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case TEST_INJECT_VECTOR:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
+ evtchn_irq_expected = true;
+ vinfo->evtchn_upcall_pending = 1;
+ break;
+
+ case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+ TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+ if (!do_runstate_tests)
+ goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+ TEST_RUNSTATE_runnable;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_ADJUST:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_DATA:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_STEAL_TIME:
+ if (verbose)
+ printf("Testing steal time\n");
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+
+ case TEST_EVTCHN_MASKED:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_UNMASKED:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SEND_IOCTL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ if (!do_evtchn_tests)
+ goto done;
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+ struct kvm_irq_routing_xen_evtchn e;
+ e.port = EVTCHN_TEST2;
+ e.vcpu = vcpu->id;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_EVENTFD:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send to eventfd\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_SETUP:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest oneshot timer\n");
+ break;
+
+ case TEST_TIMER_WAIT:
+ memset(&tmr, 0, sizeof(tmr));
+ tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+ "Timer port not returned");
+ TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ "Timer priority not returned");
+ TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+ "Timer expiry not returned");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_RESTORE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing restored oneshot timer\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_POLL_READY:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing SCHEDOP_poll with already pending event\n");
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+ alarm(1);
+ break;
+
+ case TEST_POLL_TIMEOUT:
+ if (verbose)
+ printf("Testing SCHEDOP_poll timeout\n");
+ shinfo->evtchn_pending[0] = 0;
+ alarm(1);
+ break;
+
+ case TEST_POLL_MASKED:
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on masked event\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_POLL_WAKE:
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+ /* Read it back and check the pending time is reported correctly */
+ tmr.u.timer.expires_ns = 0;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+ "Timer not reported pending");
+ alarm(1);
+ break;
+
+ case SET_VCPU_INFO:
+ if (has_shinfo_hva) {
+ struct kvm_xen_vcpu_attr vih = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+ .u.hva = (unsigned long)vinfo
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+ }
+ break;
+
+ case TEST_TIMER_PAST:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ /* Read timer and check it is no longer pending */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing timer in the past\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_LOCKING_SEND_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ alarm(0);
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+ ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+ TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+ struct kvm_irq_routing_xen_evtchn uxe = {
+ .port = 1,
+ .vcpu = vcpu->id,
+ .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+ };
+
+ evtchn_irq_expected = true;
+ for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+ __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+ break;
+
+ case TEST_LOCKING_POLL_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+ shinfo->evtchn_pending[0] = 1;
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+
+ case TEST_LOCKING_POLL_TIMEOUT:
+ /*
+ * Optional and possibly repeated sync point.
+ * Injecting the timer IRQ may fail if the
+ * shinfo is invalid when the timer expires.
+ * If the timer has expired but the IRQ hasn't
+ * been delivered, rearm the timer and retry.
+ */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+ /* Resume the guest if the timer is still pending. */
+ if (tmr.u.timer.expires_ns)
+ break;
+
+ /* All done if the IRQ was delivered. */
+ if (!evtchn_irq_expected)
+ break;
+
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+ case TEST_DONE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ ret = pthread_cancel(thread);
+ TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+ ret = pthread_join(thread, 0);
+ TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+ goto done;
+
+ case TEST_GUEST_SAW_IRQ:
+ TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+ evtchn_irq_expected = false;
+ break;
+ }
+ break;
+ }
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ done:
+ evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+ evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+ alarm(0);
+
+ /*
+ * Just a *really* basic check that things are being put in the
+ * right place. The actual calculations are much the same for
+ * Xen as they are for the KVM variants, so no need to check.
+ */
+ struct pvclock_wall_clock *wc;
+ struct pvclock_vcpu_time_info *ti, *ti2;
+ struct kvm_clock_data kcdata;
+ long long delta;
+
+ wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+ ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+ ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
+ TEST_ASSERT(wc->version && !(wc->version & 1),
+ "Bad wallclock version %x", wc->version);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+ if (kcdata.flags & KVM_CLOCK_REALTIME) {
+ if (verbose) {
+ printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+ kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+ printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+ kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+ }
+
+ delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+ /*
+ * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+ * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+ * delta as testing clock accuracy is not the goal here. The test just needs to
+ * check that the value in shinfo is somewhat sane.
+ */
+ TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+ "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+ wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+ (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+ } else {
+ pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+ }
+
+ TEST_ASSERT(ti->version && !(ti->version & 1),
+ "Bad time_info version %x", ti->version);
+ TEST_ASSERT(ti2->version && !(ti2->version & 1),
+ "Bad time_info version %x", ti->version);
+
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
+
+ /*
+ * Exercise runstate info at all points across the page boundary, in
+ * 32-bit and 64-bit mode. In particular, test the case where it is
+ * configured in 32-bit mode and then switched to 64-bit mode while
+ * active, which takes it onto the second page.
+ */
+ unsigned long runstate_addr;
+ struct compat_vcpu_runstate_info *crs;
+ for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+ runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+ rs = addr_gpa2hva(vm, runstate_addr);
+ crs = (void *)rs;
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Set to compatibility mode */
+ lm.u.long_mode = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ /* Set runstate to new address (kernel will write it) */
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = runstate_addr,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+ if (verbose)
+ printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+ TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+ crs->time[1] + crs->time[2] + crs->time[3],
+ "runstate times don't add up");
+
+
+ /* Now switch to 64-bit mode */
+ lm.u.long_mode = 1;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Don't change the address, just trigger a write */
+ struct kvm_xen_vcpu_attr adj = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+ .u.runstate.state = (uint64_t)-1
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+ if (verbose)
+ printf("64-bit runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
index 8389e0bfd711..2585087cdf5c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
@@ -10,14 +10,10 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
-
-#define VCPU_ID 5
+#include "hyperv.h"
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
-#define PAGE_SIZE 4096
-
-static struct kvm_vm *vm;
#define INPUTVALUE 17
#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
@@ -85,14 +81,15 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
- if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
- KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
- print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
- exit(KSFT_SKIP);
- }
+ unsigned int xen_caps;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
- vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -103,37 +100,34 @@ int main(int argc, char *argv[])
/* Map a region for the hypercall pages */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
- virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_XEN) {
- ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
- ASSERT_EQ(run->xen.u.hcall.cpl, 0);
- ASSERT_EQ(run->xen.u.hcall.longmode, 1);
- ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
- ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
- ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
- ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
- ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
- ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
- ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
run->xen.u.hcall.result = RETVALUE;
continue;
}
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
new file mode 100644
index 000000000000..f331a4e9bae3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_BITS 64
+
+int main(int argc, char *argv[])
+{
+ bool xss_in_msr_list;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t xss_val;
+ int i, r;
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
+
+ xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
+ TEST_ASSERT(xss_val == 0,
+ "MSR_IA32_XSS should be initialized to zero");
+
+ vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
+ /*
+ * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+ * that trying to set the guest IA32_XSS to an unsupported value fails.
+ * Also, in the future when a non-zero value succeeds check that
+ * IA32_XSS is in the list of MSRs to save/restore.
+ */
+ xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
+ for (i = 0; i < MSR_BITS; ++i) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+ /*
+ * Setting a list of MSRs returns the entry that "faulted", or
+ * the last entry +1 if all MSRs were successfully written.
+ */
+ TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+ TEST_ASSERT(r != 1 || xss_in_msr_list,
+ "IA32_XSS was able to be set, but was not in save/restore list");
+ }
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
deleted file mode 100644
index f40fd097cb35..000000000000
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ /dev/null
@@ -1,116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CR4 and CPUID sync test
- *
- * Copyright 2018, Red Hat, Inc. and/or its affiliates.
- *
- * Author:
- * Wei Huang <wei@redhat.com>
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define X86_FEATURE_XSAVE (1<<26)
-#define X86_FEATURE_OSXSAVE (1<<27)
-#define VCPU_ID 1
-
-static inline bool cr4_cpuid_is_sync(void)
-{
- int func, subfunc;
- uint32_t eax, ebx, ecx, edx;
- uint64_t cr4;
-
- func = 0x1;
- subfunc = 0x0;
- __asm__ __volatile__("cpuid"
- : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
- : "a"(func), "c"(subfunc));
-
- cr4 = get_cr4();
-
- return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
-}
-
-static void guest_code(void)
-{
- uint64_t cr4;
-
- /* turn on CR4.OSXSAVE */
- cr4 = get_cr4();
- cr4 |= X86_CR4_OSXSAVE;
- set_cr4(cr4);
-
- /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
- GUEST_ASSERT(cr4_cpuid_is_sync());
-
- /* notify hypervisor to change CR4 */
- GUEST_SYNC(0);
-
- /* check again */
- GUEST_ASSERT(cr4_cpuid_is_sync());
-
- GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_run *run;
- struct kvm_vm *vm;
- struct kvm_sregs sregs;
- struct kvm_cpuid_entry2 *entry;
- struct ucall uc;
- int rc;
-
- entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- print_skip("XSAVE feature not supported");
- return 0;
- }
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
-
- while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_SYNC:
- /* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
- break;
- case UCALL_ABORT:
- TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-
- kvm_vm_free(vm);
-
-done:
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
deleted file mode 100644
index ca22ee6d19cb..000000000000
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-
-#define VCPU_ID 5
-
-void l2_guest_code(void)
-{
- GUEST_SYNC(7);
-
- GUEST_SYNC(8);
-
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
-
- GUEST_ASSERT(vmx_pages->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_SYNC(3);
- GUEST_ASSERT(load_vmcs(vmx_pages));
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- GUEST_SYNC(4);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(5);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- current_evmcs->revision_id = -1u;
- GUEST_ASSERT(vmlaunch());
- current_evmcs->revision_id = EVMCS_VERSION;
- GUEST_SYNC(6);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- GUEST_SYNC(9);
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(10);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
- GUEST_SYNC(1);
- GUEST_SYNC(2);
-
- if (vmx_pages)
- l1_guest_code(vmx_pages);
-
- GUEST_DONE();
-
- /* Try enlightened vmptrld with an incorrect GPA */
- evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
- GUEST_ASSERT(vmlaunch());
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva = 0;
-
- struct kvm_regs regs1, regs2;
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_x86_state *state;
- struct ucall uc;
- int stage;
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- exit(KSFT_SKIP);
- }
-
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
-
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-
- for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto part1_done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-
-part1_done:
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Unexpected successful VMEnter with invalid eVMCS pointer!");
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
deleted file mode 100644
index 9b78e8889638..000000000000
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat Inc.
- *
- * Generic tests for KVM CPUID set/get ioctls
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID 0
-
-/* CPUIDs known to differ */
-struct {
- u32 function;
- u32 index;
-} mangled_cpuids[] = {
- {.function = 0xd, .index = 0},
-};
-
-static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
-{
- int i;
- u32 eax, ebx, ecx, edx;
-
- for (i = 0; i < guest_cpuid->nent; i++) {
- eax = guest_cpuid->entries[i].function;
- ecx = guest_cpuid->entries[i].index;
-
- cpuid(&eax, &ebx, &ecx, &edx);
-
- GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
- ebx == guest_cpuid->entries[i].ebx &&
- ecx == guest_cpuid->entries[i].ecx &&
- edx == guest_cpuid->entries[i].edx);
- }
-
-}
-
-static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
-{
- u32 eax = 0x40000000, ebx, ecx = 0, edx;
-
- cpuid(&eax, &ebx, &ecx, &edx);
-
- GUEST_ASSERT(eax == 0x40000001);
-}
-
-static void guest_main(struct kvm_cpuid2 *guest_cpuid)
-{
- GUEST_SYNC(1);
-
- test_guest_cpuids(guest_cpuid);
-
- GUEST_SYNC(2);
-
- test_cpuid_40000000(guest_cpuid);
-
- GUEST_DONE();
-}
-
-static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie)
-{
- int i;
-
- for (i = 0; i < sizeof(mangled_cpuids); i++) {
- if (mangled_cpuids[i].function == entrie->function &&
- mangled_cpuids[i].index == entrie->index)
- return true;
- }
-
- return false;
-}
-
-static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie)
-{
- int i;
-
- for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == entrie->function &&
- cpuid->entries[i].index == entrie->index) {
- if (is_cpuid_mangled(entrie))
- return;
-
- TEST_ASSERT(cpuid->entries[i].eax == entrie->eax &&
- cpuid->entries[i].ebx == entrie->ebx &&
- cpuid->entries[i].ecx == entrie->ecx &&
- cpuid->entries[i].edx == entrie->edx,
- "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
- entrie->function, entrie->index,
- cpuid->entries[i].eax, cpuid->entries[i].ebx,
- cpuid->entries[i].ecx, cpuid->entries[i].edx,
- entrie->eax, entrie->ebx, entrie->ecx, entrie->edx);
- return;
- }
- }
-
- TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index);
-}
-
-static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2)
-{
- int i;
-
- for (i = 0; i < cpuid1->nent; i++)
- check_cpuid(cpuid2, &cpuid1->entries[i]);
-
- for (i = 0; i < cpuid2->nent; i++)
- check_cpuid(cpuid1, &cpuid2->entries[i]);
-}
-
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
-{
- struct ucall uc;
-
- _vcpu_run(vm, vcpuid);
-
- switch (get_ucall(vm, vcpuid, &uc)) {
- case UCALL_SYNC:
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage + 1,
- "Stage %d: Unexpected register values vmexit, got %lx",
- stage + 1, (ulong)uc.args[1]);
- return;
- case UCALL_DONE:
- return;
- case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2], uc.args[3]);
- default:
- TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
- }
-}
-
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
-{
- int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
- getpagesize(), 0, 0);
- struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
-
- memcpy(guest_cpuids, cpuid, size);
-
- *p_gva = gva;
- return guest_cpuids;
-}
-
-int main(void)
-{
- struct kvm_cpuid2 *supp_cpuid, *cpuid2;
- vm_vaddr_t cpuid_gva;
- struct kvm_vm *vm;
- int stage;
-
- vm = vm_create_default(VCPU_ID, 0, guest_main);
-
- supp_cpuid = kvm_get_supported_cpuid();
- cpuid2 = vcpu_get_cpuid(vm, VCPU_ID);
-
- compare_cpuids(supp_cpuid, cpuid2);
-
- vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2);
-
- vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva);
-
- for (stage = 0; stage < 3; stage++)
- run_vcpu(vm, VCPU_ID, stage);
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
deleted file mode 100644
index 7e2d2d17d2ed..000000000000
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ /dev/null
@@ -1,176 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_HYPERV_CPUID
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define VCPU_ID 0
-
-static void guest_code(void)
-{
-}
-
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
-static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_expected)
-{
- int i;
- int nent = 9;
- u32 test_val;
-
- if (evmcs_expected)
- nent += 1; /* 0x4000000A */
-
- TEST_ASSERT(hv_cpuid_entries->nent == nent,
- "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
- " with evmcs=%d (returned %d)",
- nent, evmcs_expected, hv_cpuid_entries->nent);
-
- for (i = 0; i < hv_cpuid_entries->nent; i++) {
- struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
-
- TEST_ASSERT((entry->function >= 0x40000000) &&
- (entry->function <= 0x40000082),
- "function %x is our of supported range",
- entry->function);
-
- TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A),
- "0x4000000A leaf should not be reported");
-
- TEST_ASSERT(entry->index == 0,
- ".index field should be zero");
-
- TEST_ASSERT(entry->flags == 0,
- ".flags field should be zero");
-
- TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
- !entry->padding[2], "padding should be zero");
-
- switch (entry->function) {
- case 0x40000000:
- test_val = 0x40000082;
-
- TEST_ASSERT(entry->eax == test_val,
- "Wrong max leaf report in 0x40000000.EAX: %x"
- " (evmcs=%d)",
- entry->eax, evmcs_expected
- );
- break;
- case 0x40000004:
- test_val = entry->eax & (1UL << 18);
-
- TEST_ASSERT(!!test_val == !smt_possible(),
- "NoNonArchitecturalCoreSharing bit"
- " doesn't reflect SMT setting");
- break;
- }
-
- /*
- * If needed for debug:
- * fprintf(stdout,
- * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
- * entry->function, entry->eax, entry->ebx, entry->ecx,
- * entry->edx);
- */
- }
-
-}
-
-void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
-{
- static struct kvm_cpuid2 cpuid = {.nent = 0};
- int ret;
-
- if (!system)
- ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
- else
- ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-
- TEST_ASSERT(ret == -1 && errno == E2BIG,
- "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
- " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_cpuid2 *hv_cpuid_entries;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) {
- print_skip("KVM_CAP_HYPERV_CPUID not supported");
- exit(KSFT_SKIP);
- }
-
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- /* Test vCPU ioctl version */
- test_hv_cpuid_e2big(vm, false);
-
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
- test_hv_cpuid(hv_cpuid_entries, false);
- free(hv_cpuid_entries);
-
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- goto do_sys;
- }
- vcpu_enable_evmcs(vm, VCPU_ID);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
- test_hv_cpuid(hv_cpuid_entries, true);
- free(hv_cpuid_entries);
-
-do_sys:
- /* Test system ioctl version */
- if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
- print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
- goto out;
- }
-
- test_hv_cpuid_e2big(vm, true);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid();
- test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
-
-out:
- kvm_vm_free(vm);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
deleted file mode 100644
index 732b244d6956..000000000000
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ /dev/null
@@ -1,234 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-extern unsigned char rdmsr_start;
-extern unsigned char rdmsr_end;
-
-static u64 do_rdmsr(u32 idx)
-{
- u32 lo, hi;
-
- asm volatile("rdmsr_start: rdmsr;"
- "rdmsr_end:"
- : "=a"(lo), "=c"(hi)
- : "c"(idx));
-
- return (((u64) hi) << 32) | lo;
-}
-
-extern unsigned char wrmsr_start;
-extern unsigned char wrmsr_end;
-
-static void do_wrmsr(u32 idx, u64 val)
-{
- u32 lo, hi;
-
- lo = val;
- hi = val >> 32;
-
- asm volatile("wrmsr_start: wrmsr;"
- "wrmsr_end:"
- : : "a"(lo), "c"(idx), "d"(hi));
-}
-
-static int nr_gp;
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
- unsigned char *rip = (unsigned char *)regs->rip;
- bool r, w;
-
- r = rip == &rdmsr_start;
- w = rip == &wrmsr_start;
- GUEST_ASSERT(r || w);
-
- nr_gp++;
-
- if (r)
- regs->rip = (uint64_t)&rdmsr_end;
- else
- regs->rip = (uint64_t)&wrmsr_end;
-}
-
-struct msr_data {
- uint32_t idx;
- const char *name;
-};
-
-#define TEST_MSR(msr) { .idx = msr, .name = #msr }
-#define UCALL_PR_MSR 0xdeadbeef
-#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
-
-/*
- * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
- * written, as the KVM_CPUID_FEATURES leaf is cleared.
- */
-static struct msr_data msrs_to_test[] = {
- TEST_MSR(MSR_KVM_SYSTEM_TIME),
- TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
- TEST_MSR(MSR_KVM_WALL_CLOCK),
- TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
- TEST_MSR(MSR_KVM_ASYNC_PF_EN),
- TEST_MSR(MSR_KVM_STEAL_TIME),
- TEST_MSR(MSR_KVM_PV_EOI_EN),
- TEST_MSR(MSR_KVM_POLL_CONTROL),
- TEST_MSR(MSR_KVM_ASYNC_PF_INT),
- TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
-};
-
-static void test_msr(struct msr_data *msr)
-{
- PR_MSR(msr);
- do_rdmsr(msr->idx);
- GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
-
- nr_gp = 0;
- do_wrmsr(msr->idx, 0);
- GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
- nr_gp = 0;
-}
-
-struct hcall_data {
- uint64_t nr;
- const char *name;
-};
-
-#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
-#define UCALL_PR_HCALL 0xdeadc0de
-#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
-
-/*
- * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
- * features have been cleared in KVM_CPUID_FEATURES.
- */
-static struct hcall_data hcalls_to_test[] = {
- TEST_HCALL(KVM_HC_KICK_CPU),
- TEST_HCALL(KVM_HC_SEND_IPI),
- TEST_HCALL(KVM_HC_SCHED_YIELD),
-};
-
-static void test_hcall(struct hcall_data *hc)
-{
- uint64_t r;
-
- PR_HCALL(hc);
- r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
- GUEST_ASSERT(r == -KVM_ENOSYS);
-}
-
-static void guest_main(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
- test_msr(&msrs_to_test[i]);
- }
-
- for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
- test_hcall(&hcalls_to_test[i]);
- }
-
- GUEST_DONE();
-}
-
-static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
-{
- struct kvm_cpuid_entry2 ent = {0};
-
- ent.function = KVM_CPUID_FEATURES;
- TEST_ASSERT(set_cpuid(cpuid, &ent),
- "failed to clear KVM_CPUID_FEATURES leaf");
-}
-
-static void pr_msr(struct ucall *uc)
-{
- struct msr_data *msr = (struct msr_data *)uc->args[0];
-
- pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
-}
-
-static void pr_hcall(struct ucall *uc)
-{
- struct hcall_data *hc = (struct hcall_data *)uc->args[0];
-
- pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
-}
-
-static void handle_abort(struct ucall *uc)
-{
- TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
- __FILE__, uc->args[1]);
-}
-
-#define VCPU_ID 0
-
-static void enter_guest(struct kvm_vm *vm)
-{
- struct kvm_run *run;
- struct ucall uc;
- int r;
-
- run = vcpu_state(vm, VCPU_ID);
-
- while (true) {
- r = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "unexpected exit reason: %u (%s)",
- run->exit_reason, exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_PR_MSR:
- pr_msr(&uc);
- break;
- case UCALL_PR_HCALL:
- pr_hcall(&uc);
- break;
- case UCALL_ABORT:
- handle_abort(&uc);
- return;
- case UCALL_DONE:
- return;
- }
- }
-}
-
-int main(void)
-{
- struct kvm_enable_cap cap = {0};
- struct kvm_cpuid2 *best;
- struct kvm_vm *vm;
-
- if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
- print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported");
- exit(KSFT_SKIP);
- }
-
- vm = vm_create_default(VCPU_ID, 0, guest_main);
-
- cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
- cap.args[0] = 1;
- vcpu_enable_cap(vm, VCPU_ID, &cap);
-
- best = kvm_get_supported_cpuid();
- clear_kvm_cpuid_features(best);
- vcpu_set_cpuid(vm, VCPU_ID, best);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
-
- enter_guest(vm);
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
deleted file mode 100644
index e6480fd5c4bd..000000000000
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * mmio_warning_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that we don't get a kernel warning when we call KVM_RUN after a
- * triple fault occurs. To get the triple fault to occur we call KVM_RUN
- * on a VCPU that hasn't been properly setup.
- *
- */
-
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <kvm_util.h>
-#include <linux/kvm.h>
-#include <processor.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <test_util.h>
-#include <unistd.h>
-
-#define NTHREAD 4
-#define NPROCESS 5
-
-struct thread_context {
- int kvmcpu;
- struct kvm_run *run;
-};
-
-void *thr(void *arg)
-{
- struct thread_context *tc = (struct thread_context *)arg;
- int res;
- int kvmcpu = tc->kvmcpu;
- struct kvm_run *run = tc->run;
-
- res = ioctl(kvmcpu, KVM_RUN, 0);
- pr_info("ret1=%d exit_reason=%d suberror=%d\n",
- res, run->exit_reason, run->internal.suberror);
-
- return 0;
-}
-
-void test(void)
-{
- int i, kvm, kvmvm, kvmcpu;
- pthread_t th[NTHREAD];
- struct kvm_run *run;
- struct thread_context tc;
-
- kvm = open("/dev/kvm", O_RDWR);
- TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
- kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
- TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
- kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
- run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
- kvmcpu, 0);
- tc.kvmcpu = kvmcpu;
- tc.run = run;
- srand(getpid());
- for (i = 0; i < NTHREAD; i++) {
- pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc);
- usleep(rand() % 10000);
- }
- for (i = 0; i < NTHREAD; i++)
- pthread_join(th[i], NULL);
-}
-
-int get_warnings_count(void)
-{
- int warnings;
- FILE *f;
-
- f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
- fclose(f);
-
- return warnings;
-}
-
-int main(void)
-{
- int warnings_before, warnings_after;
-
- if (!is_intel_cpu()) {
- print_skip("Must be run on an Intel CPU");
- exit(KSFT_SKIP);
- }
-
- if (vm_is_unrestricted_guest(NULL)) {
- print_skip("Unrestricted guest must be disabled");
- exit(KSFT_SKIP);
- }
-
- warnings_before = get_warnings_count();
-
- for (int i = 0; i < NPROCESS; ++i) {
- int status;
- int pid = fork();
-
- if (pid < 0)
- exit(1);
- if (pid == 0) {
- test();
- exit(0);
- }
- while (waitpid(pid, &status, __WALL) != pid)
- ;
- }
-
- warnings_after = get_warnings_count();
- TEST_ASSERT(warnings_before == warnings_after,
- "Warnings found in kernel. Run 'dmesg' to inspect them.");
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
deleted file mode 100644
index 1e89688cbbbf..000000000000
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies expected behavior of controlling guest access to
- * MSR_PLATFORM_INFO.
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID 0
-#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
-
-static void guest_code(void)
-{
- uint64_t msr_platform_info;
-
- for (;;) {
- msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
- GUEST_SYNC(msr_platform_info);
- asm volatile ("inc %r11");
- }
-}
-
-static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
-{
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
- cap.flags = 0;
- cap.args[0] = (int)enable;
- vm_enable_cap(vm, &cap);
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct ucall uc;
-
- set_msr_platform_info_enabled(vm, true);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- get_ucall(vm, VCPU_ID, &uc);
- TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
- TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
- "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-}
-
-static void test_msr_platform_info_disabled(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-
- set_msr_platform_info_enabled(vm, false);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- int rv;
- uint64_t msr_platform_info;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
- if (!rv) {
- print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
- exit(KSFT_SKIP);
- }
-
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
- msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vm);
- test_msr_platform_info_disabled(vm);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
-
- kvm_vm_free(vm);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
deleted file mode 100644
index 318be0bf77ab..000000000000
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_SET_SREGS tests
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This is a regression test for the bug fixed by the following commit:
- * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
- *
- * That bug allowed a user-mode program that called the KVM_SET_SREGS
- * ioctl to put a VCPU's local APIC into an invalid state.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID 5
-
-static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
- uint64_t feature_bit)
-{
- struct kvm_sregs sregs;
- int rc;
-
- /* Skip the sub-test, the feature is supported. */
- if (orig->cr4 & feature_bit)
- return;
-
- memcpy(&sregs, orig, sizeof(sregs));
- sregs.cr4 |= feature_bit;
-
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
- TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
-
- /* Sanity check that KVM didn't change anything. */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
-}
-
-static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
-{
- struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7;
- uint64_t cr4;
-
- cpuid_1 = kvm_get_supported_cpuid_entry(1);
- cpuid_7 = kvm_get_supported_cpuid_entry(7);
-
- cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
- X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
- X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
- if (cpuid_7->ecx & CPUID_UMIP)
- cr4 |= X86_CR4_UMIP;
- if (cpuid_7->ecx & CPUID_LA57)
- cr4 |= X86_CR4_LA57;
- if (cpuid_1->ecx & CPUID_VMX)
- cr4 |= X86_CR4_VMXE;
- if (cpuid_1->ecx & CPUID_SMX)
- cr4 |= X86_CR4_SMXE;
- if (cpuid_7->ebx & CPUID_FSGSBASE)
- cr4 |= X86_CR4_FSGSBASE;
- if (cpuid_1->ecx & CPUID_PCID)
- cr4 |= X86_CR4_PCIDE;
- if (cpuid_1->ecx & CPUID_XSAVE)
- cr4 |= X86_CR4_OSXSAVE;
- if (cpuid_7->ebx & CPUID_SMEP)
- cr4 |= X86_CR4_SMEP;
- if (cpuid_7->ebx & CPUID_SMAP)
- cr4 |= X86_CR4_SMAP;
- if (cpuid_7->ecx & CPUID_PKU)
- cr4 |= X86_CR4_PKE;
-
- return cr4;
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_sregs sregs;
- struct kvm_vm *vm;
- uint64_t cr4;
- int rc;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- /*
- * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
- * use it to verify all supported CR4 bits can be set prior to defining
- * the vCPU model, i.e. without doing KVM_SET_CPUID2.
- */
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
-
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
-
- sregs.cr4 |= calc_cr4_feature_bits(vm);
- cr4 = sregs.cr4;
-
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
- TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
-
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
- sregs.cr4, cr4);
-
- /* Verify all unsupported features are rejected by KVM. */
- test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP);
- test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE);
- kvm_vm_free(vm);
-
- /* Create a "real" VM and verify APIC_BASE can be set. */
- vm = vm_create_default(VCPU_ID, 0, NULL);
-
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- sregs.apic_base = 1 << 10;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
- TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
- sregs.apic_base);
- sregs.apic_base = 1 << 11;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
- TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
- sregs.apic_base);
-
- kvm_vm_free(vm);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
deleted file mode 100644
index d672f0a473f8..000000000000
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ /dev/null
@@ -1,243 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for x86 KVM_CAP_SYNC_REGS
- *
- * Copyright (C) 2018, Google LLC.
- *
- * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
- * including requesting an invalid register set, updates to/from values
- * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID 5
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-/*
- * ucall is embedded here to protect against compiler reshuffling registers
- * before calling a function. In this test we only need to get KVM_EXIT_IO
- * vmexit and preserve RBX, no additional information is needed.
- */
-void guest_code(void)
-{
- asm volatile("1: in %[port], %%al\n"
- "add $0x1, %%rbx\n"
- "jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
-}
-
-static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
-{
-#define REG_COMPARE(reg) \
- TEST_ASSERT(left->reg == right->reg, \
- "Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
- left->reg, right->reg)
- REG_COMPARE(rax);
- REG_COMPARE(rbx);
- REG_COMPARE(rcx);
- REG_COMPARE(rdx);
- REG_COMPARE(rsi);
- REG_COMPARE(rdi);
- REG_COMPARE(rsp);
- REG_COMPARE(rbp);
- REG_COMPARE(r8);
- REG_COMPARE(r9);
- REG_COMPARE(r10);
- REG_COMPARE(r11);
- REG_COMPARE(r12);
- REG_COMPARE(r13);
- REG_COMPARE(r14);
- REG_COMPARE(r15);
- REG_COMPARE(rip);
- REG_COMPARE(rflags);
-#undef REG_COMPARE
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
-{
-}
-
-static void compare_vcpu_events(struct kvm_vcpu_events *left,
- struct kvm_vcpu_events *right)
-{
-}
-
-#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
-#define INVALID_SYNC_FIELD 0x80000000
-
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- struct kvm_vcpu_events events;
- int rv, cap;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
- print_skip("KVM_CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
- }
- if ((cap & INVALID_SYNC_FIELD) != 0) {
- print_skip("The \"invalid\" field is not invalid");
- exit(KSFT_SKIP);
- }
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- run = vcpu_state(vm, VCPU_ID);
-
- /* Request reading invalid register set from VCPU. */
- run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
- rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
-
- run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
- rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
-
- /* Request setting invalid register set into VCPU. */
- run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
- rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
-
- run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
- rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
-
- /* Request and verify all valid register sets. */
- /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- vcpu_regs_get(vm, VCPU_ID, &regs);
- compare_regs(&regs, &run->s.regs.regs);
-
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- compare_sregs(&sregs, &run->s.regs.sregs);
-
- vcpu_events_get(vm, VCPU_ID, &events);
- compare_vcpu_events(&events, &run->s.regs.events);
-
- /* Set and verify various register values. */
- run->s.regs.regs.rbx = 0xBAD1DEA;
- run->s.regs.sregs.apic_base = 1 << 11;
- /* TODO run->s.regs.events.XYZ = ABC; */
-
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
- "apic_base sync regs value incorrect 0x%llx.",
- run->s.regs.sregs.apic_base);
-
- vcpu_regs_get(vm, VCPU_ID, &regs);
- compare_regs(&regs, &run->s.regs.regs);
-
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- compare_sregs(&sregs, &run->s.regs.sregs);
-
- vcpu_events_get(vm, VCPU_ID, &events);
- compare_vcpu_events(&events, &run->s.regs.events);
-
- /* Clear kvm_dirty_regs bits, verify new s.regs values are
- * overwritten with existing guest values.
- */
- run->kvm_valid_regs = TEST_SYNC_FIELDS;
- run->kvm_dirty_regs = 0;
- run->s.regs.regs.rbx = 0xDEADBEEF;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
-
- /* Clear kvm_valid_regs bits and kvm_dirty_bits.
- * Verify s.regs values are not overwritten with existing guest values
- * and that guest values are not overwritten with kvm_sync_regs values.
- */
- run->kvm_valid_regs = 0;
- run->kvm_dirty_regs = 0;
- run->s.regs.regs.rbx = 0xAAAA;
- regs.rbx = 0xBAC0;
- vcpu_regs_set(vm, VCPU_ID, &regs);
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
- TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
- "rbx guest value incorrect 0x%llx.",
- regs.rbx);
-
- /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
- * with existing guest values but that guest values are overwritten
- * with kvm_sync_regs values.
- */
- run->kvm_valid_regs = 0;
- run->kvm_dirty_regs = TEST_SYNC_FIELDS;
- run->s.regs.regs.rbx = 0xBBBB;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
- "rbx sync regs value incorrect 0x%llx.",
- run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
- TEST_ASSERT(regs.rbx == 0xBBBB + 1,
- "rbx guest value incorrect 0x%llx.",
- regs.rbx);
-
- kvm_vm_free(vm);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
deleted file mode 100644
index 2835a17f1b7a..000000000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_close_while_nested
- *
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Verify that nothing bad happens if a KVM user exits with open
- * file descriptors while executing a nested guest.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define VCPU_ID 5
-
-enum {
- PORT_L0_EXIT = 0x2000,
-};
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
- /* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva;
-
- nested_vmx_check_supported();
-
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-
- for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct ucall uc;
-
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- if (run->io.port == PORT_L0_EXIT)
- break;
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
- /* NOT REACHED */
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
deleted file mode 100644
index 23051d84b907..000000000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
+++ /dev/null
@@ -1,131 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * VMX-pmu related msrs test
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Test to check the effect of various CPUID settings
- * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that
- * whatever we write with KVM_SET_MSR is _not_ modified
- * in the guest and test it can be retrieved with KVM_GET_MSR.
- *
- * Test to check that invalid LBR formats are rejected.
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <sys/ioctl.h>
-
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define VCPU_ID 0
-
-#define X86_FEATURE_PDCM (1<<15)
-#define PMU_CAP_FW_WRITES (1ULL << 13)
-#define PMU_CAP_LBR_FMT 0x3f
-
-union cpuid10_eax {
- struct {
- unsigned int version_id:8;
- unsigned int num_counters:8;
- unsigned int bit_width:8;
- unsigned int mask_length:8;
- } split;
- unsigned int full;
-};
-
-union perf_capabilities {
- struct {
- u64 lbr_format:6;
- u64 pebs_trap:1;
- u64 pebs_arch_reg:1;
- u64 pebs_format:4;
- u64 smm_freeze:1;
- u64 full_width_write:1;
- u64 pebs_baseline:1;
- u64 perf_metrics:1;
- u64 pebs_output_pt_available:1;
- u64 anythread_deprecated:1;
- };
- u64 capabilities;
-};
-
-static void guest_code(void)
-{
- wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry_1_0;
- struct kvm_cpuid_entry2 *entry_a_0;
- bool pdcm_supported = false;
- struct kvm_vm *vm;
- int ret;
- union cpuid10_eax eax;
- union perf_capabilities host_cap;
-
- host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
- host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- cpuid = kvm_get_supported_cpuid();
-
- if (kvm_get_cpuid_max_basic() >= 0xa) {
- entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
- entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
- pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
- eax.full = entry_a_0->eax;
- }
- if (!pdcm_supported) {
- print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
- if (!eax.split.version_id) {
- print_skip("PMU is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
-
- /* testcase 1, set capabilities when we have PDCM bit */
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
-
- /* check capabilities can be retrieved with KVM_GET_MSR */
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
-
- /* check whatever we write with KVM_SET_MSR is _not_ modified */
- vcpu_run(vm, VCPU_ID);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
-
- /* testcase 2, check valid LBR formats are accepted */
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
-
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
-
- /* testcase 3, check invalid LBR format is rejected */
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
- TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
- /* testcase 4, set capabilities when we don't have PDCM bit */
- entry_1_0->ecx &= ~X86_FEATURE_PDCM;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
- /* testcase 5, set capabilities when we don't have PMU version bits */
- entry_1_0->ecx |= X86_FEATURE_PDCM;
- eax.split.version_id = 0;
- entry_1_0->ecx = eax.full;
- vcpu_set_cpuid(vm, VCPU_ID, cpuid);
- ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
- TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
- vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
deleted file mode 100644
index 9246ea310587..000000000000
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_vmcall_test
- *
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-
-#define VCPU_ID 5
-
-#define SHINFO_REGION_GPA 0xc0000000ULL
-#define SHINFO_REGION_SLOT 10
-#define PAGE_SIZE 4096
-
-#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
-
-static struct kvm_vm *vm;
-
-#define XEN_HYPERCALL_MSR 0x40000000
-
-struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
-} __attribute__((__packed__));
-
-static void guest_code(void)
-{
- GUEST_DONE();
-}
-
-static int cmp_timespec(struct timespec *a, struct timespec *b)
-{
- if (a->tv_sec > b->tv_sec)
- return 1;
- else if (a->tv_sec < b->tv_sec)
- return -1;
- else if (a->tv_nsec > b->tv_nsec)
- return 1;
- else if (a->tv_nsec < b->tv_nsec)
- return -1;
- else
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- struct timespec min_ts, max_ts, vm_ts;
-
- if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
- KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
- print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
- exit(KSFT_SKIP);
- }
-
- clock_gettime(CLOCK_REALTIME, &min_ts);
-
- vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
- /* Map a region for the shared_info page */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
-
- struct kvm_xen_hvm_config hvmc = {
- .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
- .msr = XEN_HYPERCALL_MSR,
- };
- vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
- struct kvm_xen_hvm_attr lm = {
- .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
- .u.long_mode = 1,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
- struct kvm_xen_hvm_attr ha = {
- .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
- .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
- };
- vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
-
- struct kvm_xen_vcpu_attr vi = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
- .u.gpa = SHINFO_REGION_GPA + 0x40,
- };
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
-
- struct kvm_xen_vcpu_attr pvclock = {
- .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
- .u.gpa = PVTIME_ADDR,
- };
- vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
-
- for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
- struct ucall uc;
-
- vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
- }
- }
-
- done:
- clock_gettime(CLOCK_REALTIME, &max_ts);
-
- /*
- * Just a *really* basic check that things are being put in the
- * right place. The actual calculations are much the same for
- * Xen as they are for the KVM variants, so no need to check.
- */
- struct pvclock_wall_clock *wc;
- struct pvclock_vcpu_time_info *ti, *ti2;
-
- wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
- ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
- ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
-
- vm_ts.tv_sec = wc->sec;
- vm_ts.tv_nsec = wc->nsec;
- TEST_ASSERT(wc->version && !(wc->version & 1),
- "Bad wallclock version %x", wc->version);
- TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
- TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
-
- TEST_ASSERT(ti->version && !(ti->version & 1),
- "Bad time_info version %x", ti->version);
- TEST_ASSERT(ti2->version && !(ti2->version & 1),
- "Bad time_info version %x", ti->version);
-
- kvm_vm_free(vm);
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
deleted file mode 100644
index 3529376747c2..000000000000
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2019, Google LLC.
- *
- * Tests for the IA32_XSS MSR.
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define VCPU_ID 1
-#define MSR_BITS 64
-
-#define X86_FEATURE_XSAVES (1<<3)
-
-bool is_supported_msr(u32 msr_index)
-{
- struct kvm_msr_list *list;
- bool found = false;
- int i;
-
- list = kvm_get_msr_index_list();
- for (i = 0; i < list->nmsrs; ++i) {
- if (list->indices[i] == msr_index) {
- found = true;
- break;
- }
- }
-
- free(list);
- return found;
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_cpuid_entry2 *entry;
- bool xss_supported = false;
- struct kvm_vm *vm;
- uint64_t xss_val;
- int i, r;
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, 0);
-
- if (kvm_get_cpuid_max_basic() >= 0xd) {
- entry = kvm_get_supported_cpuid_index(0xd, 1);
- xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
- }
- if (!xss_supported) {
- print_skip("IA32_XSS is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
-
- xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
- TEST_ASSERT(xss_val == 0,
- "MSR_IA32_XSS should be initialized to zero\n");
-
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
- /*
- * At present, KVM only supports a guest IA32_XSS value of 0. Verify
- * that trying to set the guest IA32_XSS to an unsupported value fails.
- * Also, in the future when a non-zero value succeeds check that
- * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
- */
- for (i = 0; i < MSR_BITS; ++i) {
- r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
- TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
- "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
- }
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
new file mode 100644
index 000000000000..a820329cae0d
--- /dev/null
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -0,0 +1,5 @@
+/*_test
+/sandbox-and-launch
+/true
+/wait-pipe
+/wait-pipe-sandbox
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
new file mode 100644
index 000000000000..044b83bde16e
--- /dev/null
+++ b/tools/testing/selftests/landlock/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# First run: make -C ../../../.. headers_install
+
+CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
+
+LOCAL_HDRS += $(wildcard *.h)
+
+src_test := $(wildcard *_test.c)
+
+TEST_GEN_PROGS := $(src_test:.c=)
+
+TEST_GEN_PROGS_EXTENDED := \
+ true \
+ sandbox-and-launch \
+ wait-pipe \
+ wait-pipe-sandbox
+
+# Short targets:
+$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
+$(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
+
+include ../lib.mk
+
+# Targets with $(OUTPUT)/ prefix:
+$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
+$(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
new file mode 100644
index 000000000000..44eb433e9666
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit.h
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock audit helpers
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)"
+
+struct audit_filter {
+ __u32 record_type;
+ size_t exe_len;
+ char exe[PATH_MAX];
+};
+
+struct audit_message {
+ struct nlmsghdr header;
+ union {
+ struct audit_status status;
+ struct audit_features features;
+ struct audit_rule_data rule;
+ struct nlmsgerr err;
+ char data[PATH_MAX + 200];
+ };
+};
+
+static const struct timeval audit_tv_dom_drop = {
+ /*
+ * Because domain deallocation is tied to asynchronous credential
+ * freeing, receiving such event may take some time. In practice,
+ * on a small VM, it should not exceed 100k usec, but let's wait up
+ * to 1 second to be safe.
+ */
+ .tv_sec = 1,
+};
+
+static const struct timeval audit_tv_default = {
+ .tv_usec = 1,
+};
+
+static int audit_send(const int fd, const struct audit_message *const msg)
+{
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, msg, msg->header.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0)
+ return -errno;
+
+ if (ret != msg->header.nlmsg_len)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int audit_recv(const int fd, struct audit_message *msg)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ struct audit_message msg_tmp;
+ int err;
+
+ if (!msg)
+ msg = &msg_tmp;
+
+ do {
+ err = recvfrom(fd, msg, sizeof(*msg), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (err < 0 && errno == EINTR);
+
+ if (err < 0)
+ return -errno;
+
+ if (addrlen != sizeof(addr) || addr.nl_pid != 0)
+ return -EINVAL;
+
+ /* Checks Netlink error or end of messages. */
+ if (msg->header.nlmsg_type == NLMSG_ERROR)
+ return msg->err.error;
+
+ return 0;
+}
+
+static int audit_request(const int fd,
+ const struct audit_message *const request,
+ struct audit_message *reply)
+{
+ struct audit_message msg_tmp;
+ bool first_reply = true;
+ int err;
+
+ err = audit_send(fd, request);
+ if (err)
+ return err;
+
+ if (!reply)
+ reply = &msg_tmp;
+
+ do {
+ if (first_reply)
+ first_reply = false;
+ else
+ reply = &msg_tmp;
+
+ err = audit_recv(fd, reply);
+ if (err)
+ return err;
+ } while (reply->header.nlmsg_type != NLMSG_ERROR &&
+ reply->err.msg.nlmsg_type != request->header.nlmsg_type);
+
+ return reply->err.error;
+}
+
+static int audit_filter_exe(const int audit_fd,
+ const struct audit_filter *const filter,
+ const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)) +
+ NLMSG_ALIGN(filter->exe_len),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = filter->record_type,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = filter->exe_len,
+ .buflen = filter->exe_len,
+ }
+ };
+
+ if (filter->record_type != AUDIT_EXE)
+ return -EINVAL;
+
+ memcpy(msg.rule.buf, filter->exe, filter->exe_len);
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_filter_drop(const int audit_fd, const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = AUDIT_MSGTYPE,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = AUDIT_LANDLOCK_DOMAIN,
+ }
+ };
+
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_set_status(int fd, __u32 key, __u32 val)
+{
+ const struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.status)),
+ .nlmsg_type = AUDIT_SET,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .status = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+
+ return audit_request(fd, &msg, NULL);
+}
+
+/* Returns a pointer to the last filled character of @dst, which is `\0`. */
+static __maybe_unused char *regex_escape(const char *const src, char *dst,
+ size_t dst_size)
+{
+ char *d = dst;
+
+ for (const char *s = src; *s; s++) {
+ switch (*s) {
+ case '$':
+ case '*':
+ case '.':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ if (d >= dst + dst_size - 2)
+ return (char *)-ENOMEM;
+
+ *d++ = '\\';
+ *d++ = *s;
+ break;
+ default:
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d++ = *s;
+ }
+ }
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d = '\0';
+ return d;
+}
+
+/*
+ * @domain_id: The domain ID extracted from the audit message (if the first part
+ * of @pattern is REGEX_LANDLOCK_PREFIX). It is set to 0 if the domain ID is
+ * not found.
+ */
+static int audit_match_record(int audit_fd, const __u16 type,
+ const char *const pattern, __u64 *domain_id)
+{
+ struct audit_message msg;
+ int ret, err = 0;
+ bool matches_record = !type;
+ regmatch_t matches[2];
+ regex_t regex;
+
+ ret = regcomp(&regex, pattern, 0);
+ if (ret)
+ return -EINVAL;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err)
+ goto out;
+
+ if (msg.header.nlmsg_type == type)
+ matches_record = true;
+ } while (!matches_record);
+
+ ret = regexec(&regex, msg.data, ARRAY_SIZE(matches), matches, 0);
+ if (ret) {
+ printf("DATA: %s\n", msg.data);
+ printf("ERROR: no match for pattern: %s\n", pattern);
+ err = -ENOENT;
+ }
+
+ if (domain_id) {
+ *domain_id = 0;
+ if (matches[1].rm_so != -1) {
+ int match_len = matches[1].rm_eo - matches[1].rm_so;
+ /* The maximal characters of a 2^64 hexadecimal number is 17. */
+ char dom_id[18];
+
+ if (match_len > 0 && match_len < sizeof(dom_id)) {
+ memcpy(dom_id, msg.data + matches[1].rm_so,
+ match_len);
+ dom_id[match_len] = '\0';
+ if (domain_id)
+ *domain_id = strtoull(dom_id, NULL, 16);
+ }
+ }
+ }
+
+out:
+ regfree(&regex);
+ return err;
+}
+
+static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
+ __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=%d uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, pid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
+}
+
+static int __maybe_unused matches_log_domain_deallocated(
+ int audit_fd, unsigned int num_denials, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=deallocated denials=%u$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ num_denials);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
+}
+
+struct audit_records {
+ size_t access;
+ size_t domain;
+};
+
+static int audit_count_records(int audit_fd, struct audit_records *records)
+{
+ struct audit_message msg;
+ int err;
+
+ records->access = 0;
+ records->domain = 0;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err) {
+ if (err == -EAGAIN)
+ return 0;
+ else
+ return err;
+ }
+
+ switch (msg.header.nlmsg_type) {
+ case AUDIT_LANDLOCK_ACCESS:
+ records->access++;
+ break;
+ case AUDIT_LANDLOCK_DOMAIN:
+ records->domain++;
+ break;
+ }
+ } while (true);
+
+ return 0;
+}
+
+static int audit_init(void)
+{
+ int fd, err;
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0)
+ return -errno;
+
+ err = audit_set_status(fd, AUDIT_STATUS_ENABLED, 1);
+ if (err)
+ return err;
+
+ err = audit_set_status(fd, AUDIT_STATUS_PID, getpid());
+ if (err)
+ return err;
+
+ /* Sets a timeout for negative tests. */
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
+ sizeof(audit_tv_default));
+ if (err)
+ return -errno;
+
+ return fd;
+}
+
+static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
+{
+ char *absolute_path = NULL;
+
+ /* It is assume that there is not already filtering rules. */
+ filter->record_type = AUDIT_EXE;
+ if (!path) {
+ int ret = readlink("/proc/self/exe", filter->exe,
+ sizeof(filter->exe) - 1);
+ if (ret < 0)
+ return -errno;
+
+ filter->exe_len = ret;
+ return 0;
+ }
+
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ /* No need for the terminating NULL byte. */
+ filter->exe_len = strlen(absolute_path);
+ if (filter->exe_len > sizeof(filter->exe))
+ return -E2BIG;
+
+ memcpy(filter->exe, absolute_path, filter->exe_len);
+ free(absolute_path);
+ return 0;
+}
+
+static int audit_cleanup(int audit_fd, struct audit_filter *filter)
+{
+ struct audit_filter new_filter;
+
+ if (audit_fd < 0 || !filter) {
+ int err;
+
+ /*
+ * Simulates audit_init_with_exe_filter() when called from
+ * FIXTURE_TEARDOWN_PARENT().
+ */
+ audit_fd = audit_init();
+ if (audit_fd < 0)
+ return audit_fd;
+
+ filter = &new_filter;
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+ }
+
+ /* Filters might not be in place. */
+ audit_filter_exe(audit_fd, filter, AUDIT_DEL_RULE);
+ audit_filter_drop(audit_fd, AUDIT_DEL_RULE);
+
+ /*
+ * Because audit_cleanup() might not be called by the test auditd
+ * process, it might not be possible to explicitly set it. Anyway,
+ * AUDIT_STATUS_ENABLED will implicitly be set to 0 when the auditd
+ * process will exit.
+ */
+ return close(audit_fd);
+}
+
+static int audit_init_with_exe_filter(struct audit_filter *filter)
+{
+ int fd, err;
+
+ fd = audit_init();
+ if (fd < 0)
+ return fd;
+
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+
+ err = audit_filter_exe(fd, filter, AUDIT_ADD_RULE);
+ if (err)
+ return err;
+
+ return fd;
+}
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
new file mode 100644
index 000000000000..46d02d49835a
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Audit
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/landlock.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "audit.h"
+#include "common.h"
+
+static int matches_log_signal(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.signal opid=%d ocomm=\"audit_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ domain_id);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN(audit)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, layers)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int status, ruleset_fd, i;
+ __u64(*domain_stack)[16];
+ __u64 prev_dom = 3;
+ pid_t child;
+
+ domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, domain_stack);
+ memset(domain_stack, 0, sizeof(*domain_stack));
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) {
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ /* Creates a denial to get the domain ID. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0,
+ matches_log_signal(_metadata, self->audit_fd,
+ getppid(), &denial_dom));
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Checks that the new domain is younger than the previous one. */
+ EXPECT_GT(allocated_dom, prev_dom);
+ prev_dom = allocated_dom;
+ (*domain_stack)[i] = allocated_dom;
+ }
+
+ /* Checks that we reached the maximum number of layers. */
+ EXPECT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(E2BIG, errno);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Purges log from deallocated domains. */
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
+ __u64 deallocated_dom = 2;
+
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ((*domain_stack)[i], deallocated_dom)
+ {
+ TH_LOG("Failed to match domain %llx (#%d)",
+ (*domain_stack)[i], i);
+ }
+ }
+ EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+struct thread_data {
+ pid_t parent_pid;
+ int ruleset_fd, pipe_child, pipe_parent;
+};
+
+static void *thread_audit_test(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* TGID and TID are different for a second thread. */
+ if (getpid() == gettid()) {
+ err = 1;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 2;
+ goto out;
+ }
+
+ if (close(data->ruleset_fd)) {
+ err = 3;
+ goto out;
+ }
+
+ /* Creates a denial to get the domain ID. */
+ if (kill(data->parent_pid, 0) != -1) {
+ err = 4;
+ goto out;
+ }
+
+ if (EPERM != errno) {
+ err = 5;
+ goto out;
+ }
+
+ /* Signals the parent to read denial logs. */
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ /* Waits for the parent to update audit filters. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 7;
+ goto out;
+ }
+
+out:
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/* Checks that the PID tied to a domain is not a TID but the TGID. */
+TEST_F(audit, thread)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+ __u64 deallocated_dom = 3;
+ pthread_t thread;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ struct thread_data child_data;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ /* TGID and TID are the same for the initial thread . */
+ EXPECT_EQ(getpid(), gettid());
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test,
+ &child_data));
+
+ /* Waits for the child to generate a denial. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Matches the signal log to get the domain ID. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, &denial_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ /* Signals the thread to exit, which will generate a domain deallocation. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, NULL));
+
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ(denial_dom, deallocated_dom);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+}
+
+FIXTURE(audit_flags)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+ __u64 *domain_id;
+};
+
+FIXTURE_VARIANT(audit_flags)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_flags)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->domain_id = mmap(NULL, sizeof(*self->domain_id),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->domain_id);
+ /* Domain IDs are greater or equal to 2^32. */
+ *self->domain_id = 1;
+}
+
+FIXTURE_TEARDOWN(audit_flags)
+{
+ EXPECT_EQ(0, munmap(self->domain_id, sizeof(*self->domain_id)));
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit_flags, signal)
+{
+ int status;
+ pid_t child;
+ struct audit_records records;
+ __u64 deallocated_dom = 2;
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int ruleset_fd;
+
+ /* Add filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* First signal checks to test log entries. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+ EXPECT_EQ(*self->domain_id, 1);
+ } else {
+ __u64 allocated_dom = 3;
+
+ EXPECT_EQ(0, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+
+ /* Checks domain information records. */
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_NE(*self->domain_id, 1);
+ EXPECT_NE(*self->domain_id, 0);
+ EXPECT_EQ(*self->domain_id, allocated_dom);
+ }
+
+ /* Second signal checks to test audit_count_records(). */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(0, records.access);
+ } else {
+ EXPECT_EQ(1, records.access);
+ }
+ EXPECT_EQ(0, records.domain);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN,
+ matches_log_domain_deallocated(self->audit_fd, 0,
+ &deallocated_dom));
+ EXPECT_EQ(deallocated_dom, 2);
+ } else {
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop,
+ sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2,
+ &deallocated_dom));
+ EXPECT_NE(deallocated_dom, 2);
+ EXPECT_NE(deallocated_dom, 0);
+ EXPECT_EQ(deallocated_dom, *self->domain_id);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default,
+ sizeof(audit_tv_default)));
+ }
+}
+
+static int matches_log_fs_read_root(int audit_fd)
+{
+ return audit_match_record(
+ audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.read_dir path=\"/\" dev=\"[^\"]\\+\" ino=[0-9]\\+$",
+ NULL);
+}
+
+FIXTURE(audit_exec)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit_exec)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off_and_cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_exec)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->audit_fd = audit_init();
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+
+ /* Applies test filter for the bin_wait_pipe_sandbox program. */
+ EXPECT_EQ(0, audit_init_filter_exe(&self->audit_filter,
+ bin_wait_pipe_sandbox));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_ADD_RULE));
+
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN(audit_exec)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, close(self->audit_fd));
+}
+
+TEST_F(audit_exec, signal_and_open)
+{
+ struct audit_records records;
+ int pipe_child[2], pipe_parent[2];
+ char buf_parent;
+ pid_t child;
+ int status;
+
+ ASSERT_EQ(0, pipe2(pipe_child, 0));
+ ASSERT_EQ(0, pipe2(pipe_parent, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr layer1 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ char pipe_child_str[12], pipe_parent_str[12];
+ char *const argv[] = { (char *)bin_wait_pipe_sandbox,
+ pipe_child_str, pipe_parent_str, NULL };
+ int ruleset_fd;
+
+ /* Passes the pipe FDs to the executed binary. */
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ snprintf(pipe_child_str, sizeof(pipe_child_str), "%d",
+ pipe_child[1]);
+ snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d",
+ pipe_parent[0]);
+
+ ruleset_fd =
+ landlock_create_ruleset(&layer1, sizeof(layer1), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create a ruleset");
+ _exit(1);
+ }
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags)) {
+ perror("Failed to restrict self");
+ _exit(1);
+ }
+ close(ruleset_fd);
+
+ ASSERT_EQ(0, execve(argv[0], argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", argv[0],
+ strerror(errno));
+ };
+ _exit(1);
+ return;
+ }
+
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Waits for the child. */
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that there was no denial until now. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ /*
+ * Wait for the child to do a first denied action by layer1 and
+ * sandbox itself with layer2.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /*
+ * Wait for the child to do a second denied action by layer1 and
+ * layer2, and sandbox itself with layer3.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /* Matches the child domain. */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /* Waits for the child to terminate. */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+
+ /* Tests that the audit record only matches the child. */
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /*
+ * Matches the child domains, which tests that the
+ * llcred->domain_exec bitmask is correctly updated with a new
+ * domain.
+ */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
new file mode 100644
index 000000000000..7b69002239d7
--- /dev/null
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Common user space base
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/keyctl.h>
+#include <linux/landlock.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "common.h"
+
+#ifndef O_PATH
+#define O_PATH 010000000
+#endif
+
+TEST(inconsistent_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ char *const buf = malloc(page_size + 1);
+ struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
+
+ ASSERT_NE(NULL, buf);
+
+ /* Checks copy_from_user(). */
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0));
+ /* The size if less than sizeof(struct landlock_attr_enforce). */
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
+ /* The size if less than sizeof(struct landlock_attr_enforce). */
+ ASSERT_EQ(EFAULT, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ /* Checks minimal valid attribute size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ ruleset_attr,
+ sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
+ ASSERT_EQ(ENOMSG, errno);
+
+ /* Checks non-zero value. */
+ buf[page_size - 2] = '.';
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ free(buf);
+}
+
+TEST(abi_version)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ ASSERT_EQ(7, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION |
+ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+/*
+ * Old source trees might not have the set of Kselftest fixes related to kernel
+ * UAPI headers.
+ */
+#ifndef LANDLOCK_CREATE_RULESET_ERRATA
+#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1)
+#endif
+
+TEST(errata)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ int errata;
+
+ errata = landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA);
+ /* The errata bitmask will not be backported to tests. */
+ ASSERT_LE(0, errata);
+ TH_LOG("errata: 0x%x", errata);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION |
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA |
+ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+/* Tests ordering of syscall argument checks. */
+TEST(create_ruleset_checks_ordering)
+{
+ const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA;
+ const int invalid_flag = last_flag << 1;
+ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+
+ /* Checks priority for invalid flags. */
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks too big ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ /* Checks too small ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks valid call. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests ordering of syscall argument checks. */
+TEST(add_rule_checks_ordering)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks invalid rule type. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid rule attr. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ NULL, 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Checks invalid path_beneath.parent_fd. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks valid call. */
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests ordering of syscall argument and permission checks. */
+TEST(restrict_self_checks_ordering)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ /* Checks unprivileged enforcement without no_new_privs. */
+ drop_caps(_metadata);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(EPERM, errno);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks valid call. */
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST(restrict_self_fd)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(-1, landlock_restrict_self(fd, 0));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_fd_flags)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF accepts -1 but not any file
+ * descriptor.
+ */
+ EXPECT_EQ(-1, landlock_restrict_self(
+ fd, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_flags)
+{
+ const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF;
+
+ /* Tests invalid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, last_flag << 1));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, -1));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Tests valid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, 0));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ /* Tests with an invalid ruleset_fd. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -2, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(0, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+}
+
+TEST(ruleset_fd_io)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ int ruleset_fd;
+ char buf;
+
+ drop_caps(_metadata);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, read(ruleset_fd, &buf, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */
+TEST(ruleset_fd_transfer)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd_tx, dir_fd;
+ int socket_fds[2];
+ pid_t child;
+ int status;
+
+ drop_caps(_metadata);
+
+ /* Creates a test ruleset with a simple rule. */
+ ruleset_fd_tx =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd_tx);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ /* Sends the ruleset FD over a socketpair and then close it. */
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+ socket_fds));
+ ASSERT_EQ(0, send_fd(socket_fds[0], ruleset_fd_tx));
+ ASSERT_EQ(0, close(socket_fds[0]));
+ ASSERT_EQ(0, close(ruleset_fd_tx));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const int ruleset_fd_rx = recv_fd(socket_fds[1]);
+
+ ASSERT_LE(0, ruleset_fd_rx);
+ ASSERT_EQ(0, close(socket_fds[1]));
+
+ /* Enforces the received ruleset on the child. */
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0));
+ ASSERT_EQ(0, close(ruleset_fd_rx));
+
+ /* Checks that the ruleset enforcement. */
+ ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ ASSERT_EQ(EACCES, errno);
+ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(socket_fds[1]));
+
+ /* Checks that the parent is unrestricted. */
+ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+}
+
+TEST(cred_transfer)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd, dir_fd;
+ pid_t child;
+ int status;
+
+ drop_caps(_metadata);
+
+ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ /* Denies opening directories. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0,
+ 0, 0))
+ {
+ TH_LOG("Failed to join session keyring: %s", strerror(errno));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a
+ * different session keyring in the child, so make that happen.
+ */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING,
+ NULL, 0, 0, 0));
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT installs credentials on the parent
+ * that never go through the cred_prepare hook, this path uses
+ * cred_transfer instead.
+ */
+ EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0,
+ 0, 0, 0));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
new file mode 100644
index 000000000000..230b75f6015b
--- /dev/null
+++ b/tools/testing/selftests/landlock/common.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock test helpers
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2021 Microsoft Corporation
+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/securebits.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+#include "wrappers.h"
+
+#define TMP_DIR "tmp"
+
+/* TEST_F_FORK() should not be used for new tests. */
+#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
+
+static const char bin_sandbox_and_launch[] = "./sandbox-and-launch";
+static const char bin_wait_pipe[] = "./wait-pipe";
+static const char bin_wait_pipe_sandbox[] = "./wait-pipe-sandbox";
+
+static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
+{
+ cap_t cap_p;
+ /* Only these three capabilities are useful for the tests. */
+ const cap_value_t caps[] = {
+ /* clang-format off */
+ CAP_AUDIT_CONTROL,
+ CAP_DAC_OVERRIDE,
+ CAP_MKNOD,
+ CAP_NET_ADMIN,
+ CAP_NET_BIND_SERVICE,
+ CAP_SETUID,
+ CAP_SYS_ADMIN,
+ CAP_SYS_CHROOT,
+ /* clang-format on */
+ };
+ const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+
+ if ((cap_get_secbits() & noroot) != noroot)
+ EXPECT_EQ(0, cap_set_secbits(noroot));
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_clear(cap_p));
+ if (!drop_all) {
+ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
+ ARRAY_SIZE(caps), caps, CAP_SET));
+ }
+
+ /* Automatically resets ambient capabilities. */
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
+ TH_LOG("Failed to set capabilities: %s", strerror(errno));
+ }
+ EXPECT_NE(-1, cap_free(cap_p));
+
+ /* Quickly checks that ambient capabilities are cleared. */
+ EXPECT_NE(-1, cap_get_ambient(caps[0]));
+}
+
+/* We cannot put such helpers in a library because of kselftest_harness.h . */
+static void __maybe_unused disable_caps(struct __test_metadata *const _metadata)
+{
+ _init_caps(_metadata, false);
+}
+
+static void __maybe_unused drop_caps(struct __test_metadata *const _metadata)
+{
+ _init_caps(_metadata, true);
+}
+
+static void _change_cap(struct __test_metadata *const _metadata,
+ const cap_flag_t flag, const cap_value_t cap,
+ const cap_flag_value_t value)
+{
+ cap_t cap_p;
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_set_flag(cap_p, flag, 1, &cap, value));
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
+ TH_LOG("Failed to set capability %d: %s", cap, strerror(errno));
+ }
+ EXPECT_NE(-1, cap_free(cap_p));
+}
+
+static void __maybe_unused set_cap(struct __test_metadata *const _metadata,
+ const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_SET);
+}
+
+static void __maybe_unused clear_cap(struct __test_metadata *const _metadata,
+ const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_CLEAR);
+}
+
+static void __maybe_unused
+set_ambient_cap(struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_SET);
+
+ EXPECT_NE(-1, cap_set_ambient(cap, CAP_SET))
+ {
+ TH_LOG("Failed to set ambient capability %d: %s", cap,
+ strerror(errno));
+ }
+}
+
+static void __maybe_unused clear_ambient_cap(
+ struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+ EXPECT_EQ(1, cap_get_ambient(cap));
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_CLEAR);
+ EXPECT_EQ(0, cap_get_ambient(cap));
+}
+
+/* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */
+static int __maybe_unused recv_fd(int usock)
+{
+ int fd_rx;
+ union {
+ /* Aligned ancillary data buffer. */
+ char buf[CMSG_SPACE(sizeof(fd_rx))];
+ struct cmsghdr _align;
+ } cmsg_rx = {};
+ char data = '\0';
+ struct iovec io = {
+ .iov_base = &data,
+ .iov_len = sizeof(data),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg_rx.buf,
+ .msg_controllen = sizeof(cmsg_rx.buf),
+ };
+ struct cmsghdr *cmsg;
+ int res;
+
+ res = recvmsg(usock, &msg, MSG_CMSG_CLOEXEC);
+ if (res < 0)
+ return -errno;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(fd_rx)))
+ return -EIO;
+
+ memcpy(&fd_rx, CMSG_DATA(cmsg), sizeof(fd_rx));
+ return fd_rx;
+}
+
+/* Sends an FD on a UNIX socket. Returns 0 on success or -errno. */
+static int __maybe_unused send_fd(int usock, int fd_tx)
+{
+ union {
+ /* Aligned ancillary data buffer. */
+ char buf[CMSG_SPACE(sizeof(fd_tx))];
+ struct cmsghdr _align;
+ } cmsg_tx = {};
+ char data_tx = '.';
+ struct iovec io = {
+ .iov_base = &data_tx,
+ .iov_len = sizeof(data_tx),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg_tx.buf,
+ .msg_controllen = sizeof(cmsg_tx.buf),
+ };
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fd_tx));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), &fd_tx, sizeof(fd_tx));
+
+ if (sendmsg(usock, &msg, 0) < 0)
+ return -errno;
+ return 0;
+}
+
+static void __maybe_unused
+enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
+{
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+ {
+ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+ }
+}
+
+static void __maybe_unused
+drop_access_rights(struct __test_metadata *const _metadata,
+ const struct landlock_ruleset_attr *const ruleset_attr)
+{
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(ruleset_attr, sizeof(*ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+struct protocol_variant {
+ int domain;
+ int type;
+ int protocol;
+};
+
+struct service_fixture {
+ struct protocol_variant protocol;
+ /* port is also stored in ipv4_addr.sin_port or ipv6_addr.sin6_port */
+ unsigned short port;
+ union {
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ struct {
+ struct sockaddr_un unix_addr;
+ socklen_t unix_addr_len;
+ };
+ };
+};
+
+static void __maybe_unused set_unix_address(struct service_fixture *const srv,
+ const unsigned short index)
+{
+ srv->unix_addr.sun_family = AF_UNIX;
+ sprintf(srv->unix_addr.sun_path,
+ "_selftests-landlock-abstract-unix-tid%d-index%d", sys_gettid(),
+ index);
+ srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
+ srv->unix_addr.sun_path[0] = '\0';
+}
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
new file mode 100644
index 000000000000..8fe9b461b1fd
--- /dev/null
+++ b/tools/testing/selftests/landlock/config
@@ -0,0 +1,19 @@
+CONFIG_AF_UNIX_OOB=y
+CONFIG_AUDIT=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_KEYS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
+CONFIG_NET=y
+CONFIG_NET_NS=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PROC_FS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_SHMEM=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
diff --git a/tools/testing/selftests/landlock/config.um b/tools/testing/selftests/landlock/config.um
new file mode 100644
index 000000000000..40937c0395d6
--- /dev/null
+++ b/tools/testing/selftests/landlock/config.um
@@ -0,0 +1 @@
+CONFIG_HOSTFS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
new file mode 100644
index 000000000000..eee814e09dd7
--- /dev/null
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -0,0 +1,7650 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Filesystem
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2020 ANSSI
+ * Copyright © 2020-2022 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <asm/termbits.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <linux/fiemap.h>
+#include <linux/landlock.h>
+#include <linux/magic.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/un.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+/*
+ * Intentionally included last to work around header conflict.
+ * See https://sourceware.org/glibc/wiki/Synchronizing_Headers.
+ */
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
+#include "audit.h"
+#include "common.h"
+
+#ifndef renameat2
+int renameat2(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath, unsigned int flags)
+{
+ return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
+ flags);
+}
+#endif
+
+#ifndef open_tree
+int open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+#endif
+
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+ char *const envp[], int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE (1 << 1)
+#endif
+
+static const char bin_true[] = "./true";
+
+/* Paths (sibling number and depth) */
+static const char dir_s1d1[] = TMP_DIR "/s1d1";
+static const char file1_s1d1[] = TMP_DIR "/s1d1/f1";
+static const char file2_s1d1[] = TMP_DIR "/s1d1/f2";
+static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2";
+static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1";
+static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2";
+static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3";
+static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1";
+static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2";
+
+static const char dir_s2d1[] = TMP_DIR "/s2d1";
+static const char file1_s2d1[] = TMP_DIR "/s2d1/f1";
+static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2";
+static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1";
+static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3";
+static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1";
+static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2";
+
+static const char dir_s3d1[] = TMP_DIR "/s3d1";
+static const char file1_s3d1[] = TMP_DIR "/s3d1/f1";
+/* dir_s3d2 is a mount point. */
+static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2";
+static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+static const char file1_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3/f1";
+static const char dir_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4";
+static const char file1_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4/f1";
+
+/*
+ * layout1 hierarchy:
+ *
+ * tmp
+ * ├── s1d1
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d2
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3
+ * │   ├── f1
+ * │   └── f2
+ * ├── s2d1
+ * │   ├── f1
+ * │   └── s2d2
+ * │   ├── f1
+ * │   └── s2d3
+ * │   ├── f1
+ * │   └── f2
+ * └── s3d1
+ *    ├── f1
+ * └── s3d2 [mount point]
+ *    ├── s3d3
+ *    │ └── f1
+ *    └── s3d4
+ *    └── f1
+ */
+
+static bool fgrep(FILE *const inf, const char *const str)
+{
+ char line[32];
+ const int slen = strlen(str);
+
+ while (!feof(inf)) {
+ if (!fgets(line, sizeof(line), inf))
+ break;
+ if (strncmp(line, str, slen))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool supports_filesystem(const char *const filesystem)
+{
+ char str[32];
+ int len;
+ bool res = true;
+ FILE *const inf = fopen("/proc/filesystems", "r");
+
+ /*
+ * Consider that the filesystem is supported if we cannot get the
+ * supported ones.
+ */
+ if (!inf)
+ return true;
+
+ /* filesystem can be null for bind mounts. */
+ if (!filesystem)
+ goto out;
+
+ len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
+ if (len >= sizeof(str))
+ /* Ignores too-long filesystem names. */
+ goto out;
+
+ res = fgrep(inf, str);
+
+out:
+ fclose(inf);
+ return res;
+}
+
+static bool cwd_matches_fs(unsigned int fs_magic)
+{
+ struct statfs statfs_buf;
+
+ if (!fs_magic)
+ return true;
+
+ if (statfs(".", &statfs_buf))
+ return true;
+
+ return statfs_buf.f_type == fs_magic;
+}
+
+static void mkdir_parents(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ char *walker;
+ const char *parent;
+ int i, err;
+
+ ASSERT_NE(path[0], '\0');
+ walker = strdup(path);
+ ASSERT_NE(NULL, walker);
+ parent = walker;
+ for (i = 1; walker[i]; i++) {
+ if (walker[i] != '/')
+ continue;
+ walker[i] = '\0';
+ err = mkdir(parent, 0700);
+ ASSERT_FALSE(err && errno != EEXIST)
+ {
+ TH_LOG("Failed to create directory \"%s\": %s", parent,
+ strerror(errno));
+ }
+ walker[i] = '/';
+ }
+ free(walker);
+}
+
+static void create_directory(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ mkdir_parents(_metadata, path);
+ ASSERT_EQ(0, mkdir(path, 0700))
+ {
+ TH_LOG("Failed to create directory \"%s\": %s", path,
+ strerror(errno));
+ }
+}
+
+static void create_file(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ mkdir_parents(_metadata, path);
+ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
+ {
+ TH_LOG("Failed to create file \"%s\": %s", path,
+ strerror(errno));
+ }
+}
+
+static int remove_path(const char *const path)
+{
+ char *walker;
+ int i, ret, err = 0;
+
+ walker = strdup(path);
+ if (!walker) {
+ err = ENOMEM;
+ goto out;
+ }
+ if (unlink(path) && rmdir(path)) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ err = errno;
+ goto out;
+ }
+ for (i = strlen(walker); i > 0; i--) {
+ if (walker[i] != '/')
+ continue;
+ walker[i] = '\0';
+ ret = rmdir(walker);
+ if (ret) {
+ if (errno != ENOTEMPTY && errno != EBUSY)
+ err = errno;
+ goto out;
+ }
+ if (strcmp(walker, TMP_DIR) == 0)
+ goto out;
+ }
+
+out:
+ free(walker);
+ return err;
+}
+
+struct mnt_opt {
+ const char *const source;
+ const char *const type;
+ const unsigned long flags;
+ const char *const data;
+};
+
+#define MNT_TMP_DATA "size=4m,mode=700"
+
+static const struct mnt_opt mnt_tmp = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+};
+
+static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
+{
+ return mount(mnt->source ?: mnt->type, target, mnt->type, mnt->flags,
+ mnt->data);
+}
+
+static void prepare_layout_opt(struct __test_metadata *const _metadata,
+ const struct mnt_opt *const mnt)
+{
+ disable_caps(_metadata);
+ umask(0077);
+ create_directory(_metadata, TMP_DIR);
+
+ /*
+ * Do not pollute the rest of the system: creates a private mount point
+ * for tests relying on pivot_root(2) and move_mount(2).
+ */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, unshare(CLONE_NEWNS | CLONE_NEWCGROUP));
+ ASSERT_EQ(0, mount_opt(mnt, TMP_DIR))
+ {
+ TH_LOG("Failed to mount the %s filesystem: %s", mnt->type,
+ strerror(errno));
+ /*
+ * FIXTURE_TEARDOWN() is not called when FIXTURE_SETUP()
+ * failed, so we need to explicitly do a minimal cleanup to
+ * avoid cascading errors with other tests that don't depend on
+ * the same filesystem.
+ */
+ remove_path(TMP_DIR);
+ }
+ ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+static void prepare_layout(struct __test_metadata *const _metadata)
+{
+ prepare_layout_opt(_metadata, &mnt_tmp);
+}
+
+static void cleanup_layout(struct __test_metadata *const _metadata)
+{
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ if (umount(TMP_DIR)) {
+ /*
+ * According to the test environment, the mount point of the
+ * current directory may be shared or not, which changes the
+ * visibility of the nested TMP_DIR mount point for the test's
+ * parent process doing this cleanup.
+ */
+ ASSERT_EQ(EINVAL, errno);
+ }
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, remove_path(TMP_DIR));
+}
+
+/* clang-format off */
+FIXTURE(layout0) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout0)
+{
+ prepare_layout(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout0)
+{
+ cleanup_layout(_metadata);
+}
+
+static void create_layout1(struct __test_metadata *const _metadata)
+{
+ create_file(_metadata, file1_s1d1);
+ create_file(_metadata, file1_s1d2);
+ create_file(_metadata, file1_s1d3);
+ create_file(_metadata, file2_s1d1);
+ create_file(_metadata, file2_s1d2);
+ create_file(_metadata, file2_s1d3);
+
+ create_file(_metadata, file1_s2d1);
+ create_file(_metadata, file1_s2d2);
+ create_file(_metadata, file1_s2d3);
+ create_file(_metadata, file2_s2d3);
+
+ create_file(_metadata, file1_s3d1);
+ create_directory(_metadata, dir_s3d2);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ create_file(_metadata, file1_s3d3);
+ create_file(_metadata, file1_s3d4);
+}
+
+static void remove_layout1(struct __test_metadata *const _metadata)
+{
+ EXPECT_EQ(0, remove_path(file2_s1d3));
+ EXPECT_EQ(0, remove_path(file2_s1d2));
+ EXPECT_EQ(0, remove_path(file2_s1d1));
+ EXPECT_EQ(0, remove_path(file1_s1d3));
+ EXPECT_EQ(0, remove_path(file1_s1d2));
+ EXPECT_EQ(0, remove_path(file1_s1d1));
+ EXPECT_EQ(0, remove_path(dir_s1d3));
+
+ EXPECT_EQ(0, remove_path(file2_s2d3));
+ EXPECT_EQ(0, remove_path(file1_s2d3));
+ EXPECT_EQ(0, remove_path(file1_s2d2));
+ EXPECT_EQ(0, remove_path(file1_s2d1));
+ EXPECT_EQ(0, remove_path(dir_s2d2));
+
+ EXPECT_EQ(0, remove_path(file1_s3d1));
+ EXPECT_EQ(0, remove_path(file1_s3d3));
+ EXPECT_EQ(0, remove_path(file1_s3d4));
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ umount(dir_s3d2);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, remove_path(dir_s3d2));
+}
+
+/* clang-format off */
+FIXTURE(layout1) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout1)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout1)
+{
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+}
+
+/*
+ * This helper enables to use the ASSERT_* macros and print the line number
+ * pointing to the test caller.
+ */
+static int test_open_rel(const int dirfd, const char *const path,
+ const int flags)
+{
+ int fd;
+
+ /* Works with file and directories. */
+ fd = openat(dirfd, path, flags | O_CLOEXEC);
+ if (fd < 0)
+ return errno;
+ /*
+ * Mixing error codes from close(2) and open(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) != 0)
+ return errno;
+ return 0;
+}
+
+static int test_open(const char *const path, const int flags)
+{
+ return test_open_rel(AT_FDCWD, path, flags);
+}
+
+TEST_F_FORK(layout1, no_restriction)
+{
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, inval)
+{
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .parent_fd = -1,
+ };
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ };
+ int ruleset_fd;
+
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
+ ASSERT_EQ(EBADF, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Gets a real ruleset. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests without O_PATH. */
+ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests with a ruleset FD. */
+ path_beneath.parent_fd = ruleset_fd;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+
+ /* Checks unhandled allowed_access. */
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ /* Test with legitimate values. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
+
+ /* Tests with denied-by-default access right. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_REFER;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_REFER;
+
+ /* Test with unknown (64-bits) value. */
+ path_beneath.allowed_access |= (1ULL << 60);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+ /* Test with no access. */
+ path_beneath.allowed_access = 0;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Enforces the ruleset. */
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* clang-format off */
+
+#define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_TRUNCATE | \
+ LANDLOCK_ACCESS_FS_IOCTL_DEV)
+
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_IOCTL_DEV
+
+#define ACCESS_ALL ( \
+ ACCESS_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR | \
+ LANDLOCK_ACCESS_FS_REMOVE_DIR | \
+ LANDLOCK_ACCESS_FS_REMOVE_FILE | \
+ LANDLOCK_ACCESS_FS_MAKE_CHAR | \
+ LANDLOCK_ACCESS_FS_MAKE_DIR | \
+ LANDLOCK_ACCESS_FS_MAKE_REG | \
+ LANDLOCK_ACCESS_FS_MAKE_SOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_FIFO | \
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_SYM | \
+ LANDLOCK_ACCESS_FS_REFER)
+
+/* clang-format on */
+
+TEST_F_FORK(layout1, file_and_dir_access_rights)
+{
+ __u64 access;
+ int err;
+ struct landlock_path_beneath_attr path_beneath_file = {},
+ path_beneath_dir = {};
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Tests access rights for files. */
+ path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_file.parent_fd);
+
+ /* Tests access rights for directories. */
+ path_beneath_dir.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_dir.parent_fd);
+
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+ path_beneath_dir.allowed_access = access;
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_dir, 0));
+
+ path_beneath_file.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_file, 0);
+ if (access & ACCESS_FILE) {
+ ASSERT_EQ(0, err);
+ } else {
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EINVAL, errno);
+ }
+ }
+ ASSERT_EQ(0, close(path_beneath_file.parent_fd));
+ ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout0, ruleset_with_unknown_access)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = access_mask,
+ };
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ ASSERT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_F_FORK(layout0, rule_with_unknown_access)
+{
+ __u64 access;
+ struct landlock_path_beneath_attr path_beneath = {};
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ path_beneath.parent_fd =
+ open(TMP_DIR, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) {
+ path_beneath.allowed_access = access;
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, rule_with_unhandled_access)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath = {};
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ for (access = 1; access > 0; access <<= 1) {
+ int err;
+
+ path_beneath.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0);
+ if (access == ruleset_attr.handled_access_fs) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EINVAL, errno);
+ }
+ }
+
+ EXPECT_EQ(0, close(path_beneath.parent_fd));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static void add_path_beneath(struct __test_metadata *const _metadata,
+ const int ruleset_fd, const __u64 allowed_access,
+ const char *const path)
+{
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = allowed_access,
+ };
+
+ path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd)
+ {
+ TH_LOG("Failed to open directory \"%s\": %s", path,
+ strerror(errno));
+ }
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0))
+ {
+ TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
+ strerror(errno));
+ }
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+}
+
+struct rule {
+ const char *path;
+ __u64 access;
+};
+
+/* clang-format off */
+
+#define ACCESS_RO ( \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR)
+
+#define ACCESS_RW ( \
+ ACCESS_RO | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE)
+
+/* clang-format on */
+
+static int create_ruleset(struct __test_metadata *const _metadata,
+ const __u64 handled_access_fs,
+ const struct rule rules[])
+{
+ int ruleset_fd, i;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = handled_access_fs,
+ };
+
+ ASSERT_NE(NULL, rules)
+ {
+ TH_LOG("No rule list");
+ }
+ ASSERT_NE(NULL, rules[0].path)
+ {
+ TH_LOG("Empty rule list");
+ }
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+
+ for (i = 0; rules[i].path; i++) {
+ if (!rules[i].access)
+ continue;
+
+ add_path_beneath(_metadata, ruleset_fd, rules[i].access,
+ rules[i].path);
+ }
+ return ruleset_fd;
+}
+
+TEST_F_FORK(layout0, proc_nsfs)
+{
+ const struct rule rules[] = {
+ {
+ .path = "/dev/null",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ struct landlock_path_beneath_attr path_beneath;
+ const int ruleset_fd = create_ruleset(
+ _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY));
+ ASSERT_EQ(0, test_open("/dev/null", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY));
+ /*
+ * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a
+ * disconnected path. Such path cannot be identified and must then be
+ * allowed.
+ */
+ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
+
+ /*
+ * Checks that it is not possible to add nsfs-like filesystem
+ * references to a ruleset.
+ */
+ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+}
+
+TEST_F_FORK(layout0, unpriv)
+{
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ drop_caps(_metadata);
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(EPERM, errno);
+
+ /* enforce_ruleset() calls prctl(no_new_privs). */
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, effective_access)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = file1_s2d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ char buf;
+ int reg_fd;
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Tests on a directory (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
+
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ /* Tests on a file (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+
+ /* Checks effective read and write actions. */
+ reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(1, write(reg_fd, ".", 1));
+ ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET));
+ ASSERT_EQ(1, read(reg_fd, &buf, 1));
+ ASSERT_EQ('.', buf);
+ ASSERT_EQ(0, close(reg_fd));
+
+ /* Just in case, double-checks effective actions. */
+ reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(-1, write(reg_fd, &buf, 1));
+ ASSERT_EQ(EBADF, errno);
+ ASSERT_EQ(0, close(reg_fd));
+}
+
+TEST_F_FORK(layout1, unhandled_access)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ /* Here, we only handle read accesses, not write accesses. */
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE,
+ * opening for write-only should be allowed, but not read-write.
+ */
+ ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+}
+
+TEST_F_FORK(layout1, ruleset_overlap)
+{
+ const struct rule rules[] = {
+ /* These rules should be ORed among them. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, layer_rule_unions)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ /* Doesn't change anything from layer1. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer3[] = {
+ /* Only allows write (but not read) to dir_s1d3. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer1. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Doesn't change anything from layer1. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Only allows write (but not read) to dir_s1d3. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, non_overlapping_accesses)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Unchanged accesses for file creation. */
+ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
+
+ /* Checks file removing. */
+ ASSERT_EQ(-1, unlink(file1_s1d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d3));
+}
+
+TEST_F_FORK(layout1, interleaved_masked_accesses)
+{
+ /*
+ * Checks overly restrictive rules:
+ * layer 1: allows R s1d1/s1d2/s1d3/file1
+ * layer 2: allows RW s1d1/s1d2/s1d3
+ * allows W s1d1/s1d2
+ * denies R s1d1/s1d2
+ * layer 3: allows R s1d1
+ * layer 4: allows R s1d1/s1d2
+ * denies W s1d1/s1d2
+ * layer 5: allows R s1d1/s1d2
+ * layer 6: allows X ----
+ * layer 7: allows W s1d1/s1d2
+ * denies R s1d1/s1d2
+ */
+ const struct rule layer1_read[] = {
+ /* Allows read access to file1_s1d3 with the first layer. */
+ {
+ .path = file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ /* First rule with write restrictions. */
+ const struct rule layer2_read_write[] = {
+ /* Start by granting read-write access via its parent directory... */
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ /* ...but also denies read access via its grandparent directory. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer3_read[] = {
+ /* Allows read access via its great-grandparent directory. */
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer4_read_write[] = {
+ /*
+ * Try to confuse the deny access by denying write (but not
+ * read) access via its grandparent directory.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer5_read[] = {
+ /*
+ * Try to override layer2's deny read access by explicitly
+ * allowing read access via file1_s1d3's grandparent.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer6_execute[] = {
+ /*
+ * Restricts an unrelated file hierarchy with a new access
+ * (non-overlapping) type.
+ */
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ const struct rule layer7_read_write[] = {
+ /*
+ * Finally, denies read access to file1_s1d3 via its
+ * grandparent.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer1_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that read access is granted for file1_s1d3 with layer 1. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer2_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 2. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer3_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ /* This time, denies write access for the file hierarchy. */
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer4_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks that the only change with layer 4 is that write access is
+ * denied.
+ */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer5_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 5. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
+ layer6_execute);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 6. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer7_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks read access is now denied with layer 7. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, inherit_subset)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Write access is forbidden. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Write access is forbidden. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Tests shared rule extension: the following rules should not grant
+ * any new access, only remove some. Once enforced, these rules are
+ * ANDed with the previous ones.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+ dir_s1d2);
+ /*
+ * According to ruleset_fd, dir_s1d2 should now have the
+ * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
+ * access rights (even if this directory is opened a second time).
+ * However, when enforcing this updated ruleset, the ruleset tied to
+ * the current process (i.e. its domain) will still only have the
+ * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and
+ * LANDLOCK_ACCESS_FS_READ_DIR accesses, but
+ * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would
+ * be a privilege escalation.
+ */
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Try to get more privileges by adding new access rights to the parent
+ * directory: dir_s1d1.
+ */
+ add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Now, dir_s1d3 get a new rule tied to it, only allowing
+ * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is
+ * that there was no rule tied to it before.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+ dir_s1d3);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Same tests and results as above, except for open(dir_s1d3) which is
+ * now denied because the new rule mask the rule previously inherited
+ * from dir_s1d2.
+ */
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /*
+ * Readdir of dir_s1d3 is still allowed because of the OR policy inside
+ * the same layer.
+ */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, inherit_superset)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d3,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Readdir access is denied for dir_s1d2. */
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+ /* Readdir access is allowed for dir_s1d3. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+ /* File access is allowed for file1_s1d3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
+ add_path_beneath(_metadata, ruleset_fd,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ dir_s1d2);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Readdir access is still denied for dir_s1d2. */
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+ /* Readdir access is still allowed for dir_s1d3. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+ /* File access is still allowed for file1_s1d3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout0, max_layers)
+{
+ int i, err;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ for (i = 0; i < 16; i++)
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ for (i = 0; i < 2; i++) {
+ err = landlock_restrict_self(ruleset_fd, 0);
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(E2BIG, errno);
+ }
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, empty_or_same_ruleset)
+{
+ struct landlock_ruleset_attr ruleset_attr = {};
+ int ruleset_fd;
+
+ /* Tests empty handled_access_fs. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(-1, ruleset_fd);
+ ASSERT_EQ(ENOMSG, errno);
+
+ /* Enforces policy which deny read access to all files. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ /* Nests a policy which deny read access to all directories. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+
+ /* Enforces a second time with the same ruleset. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, rule_on_mountpoint)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ /* dir_s3d2 is a mount point. */
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_over_mountpoint)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ /* dir_s3d2 is a mount point. */
+ .path = dir_s3d1,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+/*
+ * This test verifies that we can apply a landlock rule on the root directory
+ * (which might require special handling).
+ */
+TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
+{
+ struct rule rules[] = {
+ {
+ .path = "/",
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks allowed access. */
+ ASSERT_EQ(0, test_open("/", O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE;
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks denied access (on a directory). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_over_root_deny)
+{
+ const struct rule rules[] = {
+ {
+ .path = "/",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks denied access (on a directory). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_inside_mount_ns)
+{
+ const struct rule rules[] = {
+ {
+ .path = "s3d3",
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
+ {
+ TH_LOG("Failed to pivot root: %s", strerror(errno));
+ };
+ ASSERT_EQ(0, chdir("/"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open("s3d3", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+}
+
+TEST_F_FORK(layout1, mount_and_pivot)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, move_mount)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0))
+ {
+ TH_LOG("Failed to move mount: %s", strerror(errno));
+ }
+
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s3d2, 0));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_only)
+{
+ const struct landlock_ruleset_attr ruleset_net = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ int ruleset_fd;
+
+ /* Add network restrictions. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_net, sizeof(ruleset_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(0, mount(NULL, dir_s1d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(0, umount(dir_s2d2));
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(0, chdir("/"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_and_fs)
+{
+ const struct landlock_ruleset_attr ruleset_net_fs = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ int ruleset_fd;
+
+ /* Add network and filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_net_fs,
+ sizeof(ruleset_net_fs), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, umount(dir_s3d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, release_inodes)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s3d3,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ /* Unmount a file hierarchy while it is being used by a ruleset. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+ /* This dir_s3d3 would not be allowed and does not exist anyway. */
+ ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
+}
+
+/*
+ * This test checks that a rule on a directory used as a mount point does not
+ * grant access to the mount covering it. It is a generalization of the bind
+ * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points.
+ */
+TEST_F_FORK(layout1, covered_rule)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s3d2,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Unmount to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Creates a ruleset with the future hidden directory. */
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Covers with a new mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+}
+
+enum relative_access {
+ REL_OPEN,
+ REL_CHDIR,
+ REL_CHROOT_ONLY,
+ REL_CHROOT_CHDIR,
+};
+
+static void test_relative_path(struct __test_metadata *const _metadata,
+ const enum relative_access rel)
+{
+ /*
+ * Common layer to check that chroot doesn't ignore it (i.e. a chroot
+ * is not a disconnected root directory).
+ */
+ const struct rule layer1_base[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const struct rule layer2_subs[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s2d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int dirfd, ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs);
+
+ ASSERT_LE(0, ruleset_fd);
+ switch (rel) {
+ case REL_OPEN:
+ case REL_CHDIR:
+ break;
+ case REL_CHROOT_ONLY:
+ ASSERT_EQ(0, chdir(dir_s2d2));
+ break;
+ case REL_CHROOT_CHDIR:
+ ASSERT_EQ(0, chdir(dir_s1d2));
+ break;
+ default:
+ ASSERT_TRUE(false);
+ return;
+ }
+
+ set_cap(_metadata, CAP_SYS_CHROOT);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ switch (rel) {
+ case REL_OPEN:
+ dirfd = open(dir_s1d2, O_DIRECTORY);
+ ASSERT_LE(0, dirfd);
+ break;
+ case REL_CHDIR:
+ ASSERT_EQ(0, chdir(dir_s1d2));
+ dirfd = AT_FDCWD;
+ break;
+ case REL_CHROOT_ONLY:
+ /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
+ ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
+ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+ break;
+ case REL_CHROOT_CHDIR:
+ /* Do chroot into dir_s1d2. */
+ ASSERT_EQ(0, chroot("."))
+ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+ break;
+ }
+
+ ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
+ test_open_rel(dirfd, "..", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
+
+ if (rel == REL_CHROOT_ONLY) {
+ /* The current directory is dir_s2d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY));
+ } else {
+ /* The current directory is dir_s1d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY));
+ }
+
+ if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) {
+ /* Checks the root dir_s1d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY));
+ }
+
+ if (rel != REL_CHROOT_CHDIR) {
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
+ O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
+ O_RDONLY));
+ }
+
+ if (rel == REL_OPEN)
+ ASSERT_EQ(0, close(dirfd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, relative_open)
+{
+ test_relative_path(_metadata, REL_OPEN);
+}
+
+TEST_F_FORK(layout1, relative_chdir)
+{
+ test_relative_path(_metadata, REL_CHDIR);
+}
+
+TEST_F_FORK(layout1, relative_chroot_only)
+{
+ test_relative_path(_metadata, REL_CHROOT_ONLY);
+}
+
+TEST_F_FORK(layout1, relative_chroot_chdir)
+{
+ test_relative_path(_metadata, REL_CHROOT_CHDIR);
+}
+
+static void copy_file(struct __test_metadata *const _metadata,
+ const char *const src_path, const char *const dst_path)
+{
+ int dst_fd, src_fd;
+ struct stat statbuf;
+
+ dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
+ ASSERT_LE(0, dst_fd)
+ {
+ TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
+ }
+ src_fd = open(src_path, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, src_fd)
+ {
+ TH_LOG("Failed to open \"%s\": %s", src_path, strerror(errno));
+ }
+ ASSERT_EQ(0, fstat(src_fd, &statbuf));
+ ASSERT_EQ(statbuf.st_size,
+ sendfile(dst_fd, src_fd, 0, statbuf.st_size));
+ ASSERT_EQ(0, close(src_fd));
+ ASSERT_EQ(0, close(dst_fd));
+}
+
+static void test_execute(struct __test_metadata *const _metadata, const int err,
+ const char *const path)
+{
+ int status;
+ char *const argv[] = { (char *)path, NULL };
+ const pid_t child = fork();
+
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", path,
+ strerror(errno));
+ };
+ ASSERT_EQ(err, errno);
+ _exit(__test_passed(_metadata) ? 2 : 1);
+ return;
+ }
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
+ {
+ TH_LOG("Unexpected return code for \"%s\"", path);
+ };
+}
+
+static void test_check_exec(struct __test_metadata *const _metadata,
+ const int err, const char *const path)
+{
+ int ret;
+ char *const argv[] = { (char *)path, NULL };
+
+ ret = sys_execveat(AT_FDCWD, path, argv, NULL,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ if (err) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(errno, err);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
+TEST_F_FORK(layout1, execute)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ copy_file(_metadata, bin_true, file1_s1d1);
+ copy_file(_metadata, bin_true, file1_s1d2);
+ copy_file(_metadata, bin_true, file1_s1d3);
+
+ /* Checks before file1_s1d1 being denied. */
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ test_execute(_metadata, EACCES, file1_s1d1);
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ test_execute(_metadata, 0, file1_s1d2);
+ test_check_exec(_metadata, 0, file1_s1d2);
+
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ test_execute(_metadata, 0, file1_s1d3);
+ test_check_exec(_metadata, 0, file1_s1d3);
+}
+
+TEST_F_FORK(layout1, umount_sandboxer)
+{
+ int pipe_child[2], pipe_parent[2];
+ char buf_parent;
+ pid_t child;
+ int status;
+
+ copy_file(_metadata, bin_sandbox_and_launch, file1_s3d3);
+ ASSERT_EQ(0, pipe2(pipe_child, 0));
+ ASSERT_EQ(0, pipe2(pipe_parent, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char pipe_child_str[12], pipe_parent_str[12];
+ char *const argv[] = { (char *)file1_s3d3,
+ (char *)bin_wait_pipe, pipe_child_str,
+ pipe_parent_str, NULL };
+
+ /* Passes the pipe FDs to the executed binary and its child. */
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ snprintf(pipe_child_str, sizeof(pipe_child_str), "%d",
+ pipe_child[1]);
+ snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d",
+ pipe_parent[0]);
+
+ /*
+ * We need bin_sandbox_and_launch (copied inside the mount as
+ * file1_s3d3) to execute bin_wait_pipe (outside the mount) to
+ * make sure the mount point will not be EBUSY because of
+ * file1_s3d3 being in use. This avoids a potential race
+ * condition between the following read() and umount() calls.
+ */
+ ASSERT_EQ(0, execve(argv[0], argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", argv[0],
+ strerror(errno));
+ };
+ _exit(1);
+ return;
+ }
+
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Waits for the child to sandbox itself. */
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the sandboxer is tied to its mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(-1, umount(dir_s3d2));
+ EXPECT_EQ(EBUSY, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Signals the child to launch a grandchild. */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /* Waits for the grandchild. */
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the domain's sandboxer is not tied to its mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, umount(dir_s3d2))
+ {
+ TH_LOG("Failed to umount \"%s\": %s", dir_s3d2,
+ strerror(errno));
+ };
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Signals the grandchild to terminate. */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST_F_FORK(layout1, link)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies linking because of reparenting. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+
+ /* Prepares for next unlinks. */
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that linkind doesn't require the ability to delete a file. */
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+}
+
+static int test_rename(const char *const oldpath, const char *const newpath)
+{
+ if (rename(oldpath, newpath))
+ return errno;
+ return 0;
+}
+
+static int test_exchange(const char *const oldpath, const char *const newpath)
+{
+ if (renameat2(AT_FDCWD, oldpath, AT_FDCWD, newpath, RENAME_EXCHANGE))
+ return errno;
+ return 0;
+}
+
+static int test_renameat(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath)
+{
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, 0))
+ return errno;
+ return 0;
+}
+
+static int test_exchangeat(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath)
+{
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_EXCHANGE))
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, rename_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Tries to replace a file, from a directory that allows file removal,
+ * but to a different directory (which also allows file removal).
+ */
+ ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Tries to replace a file, from a directory that denies file removal,
+ * to a different directory (which allows file removal).
+ */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Exchanges files and directories that partially allow removal. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Renames files with different parents. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Exchanges and renames files with same parent. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
+
+ /* Exchanges files and directories with same parent, twice. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+}
+
+TEST_F_FORK(layout1, rename_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Empties dir_s1d3 to allow renaming. */
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Exchanges and renames directory to a different parent. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Exchanges directory to the same parent, which doesn't allow
+ * directory removal.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Exchanges and renames directory to the same parent, which allows
+ * directory removal.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, unlink(dir_s1d3));
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
+ ASSERT_EQ(0, rmdir(dir_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_refer)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving should only be allowed when the source and the destination
+ * parent directory have REFER.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3));
+ ASSERT_EQ(ENOTEMPTY, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
+}
+
+/* Checks renames beneath dir_s1d1. */
+static void refer_denied_by_default(struct __test_metadata *const _metadata,
+ const struct rule layer1[],
+ const int layer1_err,
+ const struct rule layer2[])
+{
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * If the first layer handles LANDLOCK_ACCESS_FS_REFER (according to
+ * layer1_err), then it allows some different-parent renames and links.
+ */
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d1, file1_s1d2));
+ if (layer1_err == 0)
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d2, file1_s1d1));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d2, file2_s1d1));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Now, either the first or the second layer does not handle
+ * LANDLOCK_ACCESS_FS_REFER, which means that any different-parent
+ * renames and links are denied, thus making the layer handling
+ * LANDLOCK_ACCESS_FS_REFER null and void.
+ */
+ ASSERT_EQ(EXDEV, test_rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d2, file2_s1d1));
+}
+
+const struct rule layer_dir_s1d1_refer[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+};
+
+const struct rule layer_dir_s1d1_execute[] = {
+ {
+ /* Matches a parent directory. */
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+const struct rule layer_dir_s2d1_execute[] = {
+ {
+ /* Does not match a parent directory. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *with* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default1)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s1d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default2)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *without* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default3)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s2d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default4)
+{
+ refer_denied_by_default(_metadata, layer_dir_s2d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
+/*
+ * Tests walking through a denied root mount.
+ */
+TEST_F_FORK(layout1, refer_mount_root_deny)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ };
+ int root_fd, ruleset_fd;
+
+ /* Creates a mount object from a non-mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ root_fd =
+ open_tree(AT_FDCWD, dir_s1d1,
+ AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_LE(0, root_fd);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Link denied by Landlock: EACCES. */
+ EXPECT_EQ(-1, linkat(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EACCES, errno);
+
+ /* renameat2() always returns EBUSY. */
+ EXPECT_EQ(-1, renameat2(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EBUSY, errno);
+
+ EXPECT_EQ(0, close(root_fd));
+}
+
+TEST_F_FORK(layout1, refer_part_mount_tree_is_allowed)
+{
+ const struct rule layer1[] = {
+ {
+ /* Parent mount point. */
+ .path = dir_s3d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ /*
+ * Removing the source file is allowed because its
+ * access rights are already a superset of the
+ * destination.
+ */
+ .path = dir_s3d4,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s3d3));
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, rename(file1_s3d4, file1_s3d3));
+}
+
+TEST_F_FORK(layout1, reparent_link)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing source and destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies linking because of missing source REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Allows linking because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, link(file1_s2d2, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ /* Reverse linking denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ /* Checks reverse linking. */
+ ASSERT_EQ(0, link(file1_s1d3, file1_s2d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /*
+ * This is OK for a file link, but it should not be allowed for a
+ * directory rename (because of the superset of access rights.
+ */
+ ASSERT_EQ(0, link(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_rename)
+{
+ /* Same rules as for reparent_link. */
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Even denies same file exchange. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source and destination REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Denies renaming because of missing MAKE_REG, source and destination
+ * REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies renaming because of missing destination REFER*/
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies exchange because of one missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Allows renaming because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3));
+
+ /* Reverse renaming denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /* Tests reverse renaming. */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * This is OK for a file rename, but it should not be allowed for a
+ * directory rename (because of the superset of access rights).
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * Tests superset restrictions applied to directories. Not only the
+ * dir_s2d3's parent (dir_s2d2) should be taken into account but also
+ * access rights tied to dir_s2d3. dir_s2d2 is missing one access right
+ * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided
+ * directly by the moved dir_s2d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3));
+ /*
+ * The first rename is allowed but not the exchange because dir_s1d3's
+ * parent (dir_s1d2) doesn't have REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Renaming in the same directory is always allowed. */
+ ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ /* Denies because of missing source MAKE_REG and destination REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ /* Denies because of missing source MAKE_REG and REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+static void
+reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ /* Interesting for the layer2 tests. */
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+static void
+reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata)
+{
+ const struct rule layer2[] = {
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+ {},
+ };
+ /*
+ * Same checks as before but with a second layer and a new MAKE_DIR
+ * rule (and no explicit handling of REFER).
+ */
+ const int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
+{
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ ASSERT_EQ(0, unlink(file1_s2d3));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock
+ * because it doesn't inherit new access rights.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it
+ * gets a new inherited access rights (MAKE_REG), because MAKE_REG is
+ * already allowed for dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3));
+
+ /*
+ * However, moving the file1_s1d3 file below dir_s2d3 is allowed
+ * because it cannot inherit MAKE_REG right (which is dedicated to
+ * directories).
+ */
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is now denied because
+ * MAKE_DIR is not tied to dir_s2d2.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it
+ * would grants MAKE_REG and MAKE_DIR rights to it.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Moving the file2_s1d3 file below dir_s2d3 is denied because the
+ * second layer does not handle REFER, which is always denied by
+ * default.
+ */
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
+{
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Modify layout! */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3));
+
+ /* Without REFER source. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks with actual file2_s1d2. */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Modifying the layout is now denied because the second layer does not
+ * handle REFER, which is always denied by default.
+ */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Without REFER source, EACCES wins over EXDEV. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange1)
+{
+ const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 =
+ file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Checks with directories which creation could be allowed, but denied
+ * because of access rights that would be inherited.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks with same access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+
+ /*
+ * Checks that exchange between file and directory are consistent.
+ *
+ * Moving a file (file1_s2d2) to a directory which only grants more
+ * directory-related access rights is allowed, and at the same time
+ * moving a directory (dir_file2_s2d3) to another directory which
+ * grants less access rights is allowed too.
+ *
+ * See layout1.reparent_exdev_layers_exchange3 for inverted arguments.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ /*
+ * However, moving back the directory is denied because it would get
+ * more access rights than the current state and because file creation
+ * is forbidden (in dir_s2d2).
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with directories which creation is now denied. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* See layout1.reparent_exdev_layers_exchange2 for complement. */
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange2)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks that exchange between file and directory are consistent. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange3)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Checks that exchange between file and directory are consistent,
+ * including with inverted arguments (see
+ * layout1.reparent_exdev_layers_exchange1).
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_remove)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Access denied because of wrong/swapped remove file/dir. */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Access allowed thanks to the matching rights. */
+ ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2));
+ ASSERT_EQ(EISDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1));
+
+ /* Effectively removes a file and a directory by exchanging them. */
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_dom_superset)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE
+ * access right.
+ */
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a
+ * superset of access rights compared to dir_s1d2, because file1_s1d2
+ * already has these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2));
+
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access
+ * right.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset
+ * of access rights compared to dir_s1d2, because dir_s1d3 already has
+ * these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back
+ * will be denied because the new inherited access rights from dir_s1d2
+ * will be less than the destination (original) dir_s2d3. This is a
+ * sinkhole scenario where we cannot move back files or directories.
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ /*
+ * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and
+ * MAKE_SOCK which were inherited from dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+TEST_F_FORK(layout1, remove_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, rmdir(dir_s1d3));
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+
+ /* dir_s1d2 itself cannot be removed. */
+ ASSERT_EQ(-1, rmdir(dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rmdir(dir_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, remove_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, unlink(file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
+}
+
+static void test_make_file(struct __test_metadata *const _metadata,
+ const __u64 access, const mode_t mode,
+ const dev_t dev)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = access,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file2_s1d1));
+ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
+ strerror(errno));
+ };
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
+ strerror(errno));
+ };
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
+
+ ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
+}
+
+TEST_F_FORK(layout1, make_char)
+{
+ /* Creates a /dev/null device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
+ makedev(1, 3));
+}
+
+TEST_F_FORK(layout1, make_block)
+{
+ /* Creates a /dev/loop0 device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
+ makedev(7, 0));
+}
+
+TEST_F_FORK(layout1, make_reg_1)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0);
+}
+
+TEST_F_FORK(layout1, make_reg_2)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0);
+}
+
+TEST_F_FORK(layout1, make_sock)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0);
+}
+
+TEST_F_FORK(layout1, make_fifo)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0);
+}
+
+TEST_F_FORK(layout1, make_sym)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file2_s1d1));
+ ASSERT_EQ(0, symlink("none", file2_s1d1));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, symlink("none", file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, symlink("none", file1_s1d2));
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
+
+ ASSERT_EQ(0, symlink("none", file1_s1d3));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
+}
+
+TEST_F_FORK(layout1, make_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Uses file_* as directory names. */
+ ASSERT_EQ(-1, mkdir(file1_s1d1, 0700));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+ ASSERT_EQ(0, mkdir(file1_s1d3, 0700));
+}
+
+static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
+ const int open_flags)
+{
+ static const char path_template[] = "/proc/self/fd/%d";
+ char procfd_path[sizeof(path_template) + 10];
+ const int procfd_path_size =
+ snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
+
+ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+ return open(procfd_path, open_flags);
+}
+
+TEST_F_FORK(layout1, proc_unlinked_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ int reg_fd, proc_fd;
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
+ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ ASSERT_EQ(0, close(proc_fd));
+
+ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
+ ASSERT_EQ(-1, proc_fd)
+ {
+ TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
+ strerror(errno));
+ }
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, close(reg_fd));
+}
+
+TEST_F_FORK(layout1, proc_pipe)
+{
+ int proc_fd;
+ int pipe_fds[2];
+ char buf = '\0';
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ /* Limits read and write access to files tied to the filesystem. */
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks enforcement for normal files. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+
+ /* Checks access to pipes through FD. */
+ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
+ ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
+ {
+ TH_LOG("Failed to write in pipe: %s", strerror(errno));
+ }
+ ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
+ ASSERT_EQ('.', buf);
+
+ /* Checks write access to pipe through /proc/self/fd . */
+ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ ASSERT_EQ(1, write(proc_fd, ".", 1))
+ {
+ TH_LOG("Failed to write through /proc/self/fd/%d: %s",
+ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+ /* Checks read access to pipe through /proc/self/fd . */
+ proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ buf = '\0';
+ ASSERT_EQ(1, read(proc_fd, &buf, 1))
+ {
+ TH_LOG("Failed to read through /proc/self/fd/%d: %s",
+ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+ ASSERT_EQ(0, close(pipe_fds[0]));
+ ASSERT_EQ(0, close(pipe_fds[1]));
+}
+
+/* Invokes truncate(2) and returns its errno or 0. */
+static int test_truncate(const char *const path)
+{
+ if (truncate(path, 10) < 0)
+ return errno;
+ return 0;
+}
+
+/*
+ * Invokes creat(2) and returns its errno or 0.
+ * Closes the opened file descriptor on success.
+ */
+static int test_creat(const char *const path)
+{
+ int fd = creat(path, 0600);
+
+ if (fd < 0)
+ return errno;
+
+ /*
+ * Mixing error codes from close(2) and creat(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) < 0)
+ return errno;
+ return 0;
+}
+
+/*
+ * Exercises file truncation when it's not restricted,
+ * as it was the case before LANDLOCK_ACCESS_FS_TRUNCATE existed.
+ */
+TEST_F_FORK(layout1, truncate_unhandled)
+{
+ const char *const file_r = file1_s1d1;
+ const char *const file_w = file2_s1d1;
+ const char *const file_none = file1_s1d2;
+ const struct rule rules[] = {
+ {
+ .path = file_r,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = file_w,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ /* Implicitly: No rights for file_none. */
+ {},
+ };
+
+ const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ int ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, handled, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks read right: truncate and open with O_TRUNC work, unless the
+ * file is attempted to be opened for writing.
+ */
+ EXPECT_EQ(0, test_truncate(file_r));
+ EXPECT_EQ(0, test_open(file_r, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_r, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_creat(file_r));
+
+ /*
+ * Checks write right: truncate and open with O_TRUNC work, unless the
+ * file is attempted to be opened for reading.
+ */
+ EXPECT_EQ(0, test_truncate(file_w));
+ EXPECT_EQ(EACCES, test_open(file_w, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(0, test_open(file_w, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(0, test_creat(file_w));
+
+ /*
+ * Checks "no rights" case: truncate works but all open attempts fail,
+ * including creat.
+ */
+ EXPECT_EQ(0, test_truncate(file_none));
+ EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_creat(file_none));
+}
+
+TEST_F_FORK(layout1, truncate)
+{
+ const char *const file_rwt = file1_s1d1;
+ const char *const file_rw = file2_s1d1;
+ const char *const file_rt = file1_s1d2;
+ const char *const file_t = file2_s1d2;
+ const char *const file_none = file1_s1d3;
+ const char *const dir_t = dir_s2d1;
+ const char *const file_in_dir_t = file1_s2d1;
+ const char *const dir_w = dir_s3d1;
+ const char *const file_in_dir_w = file1_s3d1;
+ const struct rule rules[] = {
+ {
+ .path = file_rwt,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = file_rw,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = file_rt,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = file_t,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ /* Implicitly: No access rights for file_none. */
+ {
+ .path = dir_t,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = dir_w,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE;
+ int ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, handled, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks read, write and truncate rights: truncation works. */
+ EXPECT_EQ(0, test_truncate(file_rwt));
+ EXPECT_EQ(0, test_open(file_rwt, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(0, test_open(file_rwt, O_WRONLY | O_TRUNC));
+
+ /* Checks read and write rights: no truncate variant works. */
+ EXPECT_EQ(EACCES, test_truncate(file_rw));
+ EXPECT_EQ(EACCES, test_open(file_rw, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_rw, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks read and truncate rights: truncation works.
+ *
+ * Note: Files can get truncated using open() even with O_RDONLY.
+ */
+ EXPECT_EQ(0, test_truncate(file_rt));
+ EXPECT_EQ(0, test_open(file_rt, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_rt, O_WRONLY | O_TRUNC));
+
+ /* Checks truncate right: truncate works, but can't open file. */
+ EXPECT_EQ(0, test_truncate(file_t));
+ EXPECT_EQ(EACCES, test_open(file_t, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_t, O_WRONLY | O_TRUNC));
+
+ /* Checks "no rights" case: No form of truncation works. */
+ EXPECT_EQ(EACCES, test_truncate(file_none));
+ EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks truncate right on directory: truncate works on contained
+ * files.
+ */
+ EXPECT_EQ(0, test_truncate(file_in_dir_t));
+ EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks creat in dir_w: This requires the truncate right when
+ * overwriting an existing file, but does not require it when the file
+ * is new.
+ */
+ EXPECT_EQ(EACCES, test_creat(file_in_dir_w));
+
+ ASSERT_EQ(0, unlink(file_in_dir_w));
+ EXPECT_EQ(0, test_creat(file_in_dir_w));
+}
+
+/* Invokes ftruncate(2) and returns its errno or 0. */
+static int test_ftruncate(int fd)
+{
+ if (ftruncate(fd, 10) < 0)
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, ftruncate)
+{
+ /*
+ * This test opens a new file descriptor at different stages of
+ * Landlock restriction:
+ *
+ * without restriction: ftruncate works
+ * something else but truncate restricted: ftruncate works
+ * truncate restricted and permitted: ftruncate works
+ * truncate restricted and not permitted: ftruncate fails
+ *
+ * Whether this works or not is expected to depend on the time when the
+ * FD was opened, not to depend on the time when ftruncate() was
+ * called.
+ */
+ const char *const path = file1_s1d1;
+ const __u64 handled1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ const struct rule layer1[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const __u64 handled2 = LANDLOCK_ACCESS_FS_TRUNCATE;
+ const struct rule layer2[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {},
+ };
+ const __u64 handled3 = LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ const struct rule layer3[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int fd_layer0, fd_layer1, fd_layer2, fd_layer3, ruleset_fd;
+
+ fd_layer0 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+
+ ruleset_fd = create_ruleset(_metadata, handled1, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer1 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+
+ ruleset_fd = create_ruleset(_metadata, handled2, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer2 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+ EXPECT_EQ(0, test_ftruncate(fd_layer2));
+
+ ruleset_fd = create_ruleset(_metadata, handled3, layer3);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer3 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+ EXPECT_EQ(0, test_ftruncate(fd_layer2));
+ EXPECT_EQ(EACCES, test_ftruncate(fd_layer3));
+
+ ASSERT_EQ(0, close(fd_layer0));
+ ASSERT_EQ(0, close(fd_layer1));
+ ASSERT_EQ(0, close(fd_layer2));
+ ASSERT_EQ(0, close(fd_layer3));
+}
+
+/* clang-format off */
+FIXTURE(ftruncate) {};
+/* clang-format on */
+
+FIXTURE_SETUP(ftruncate)
+{
+ prepare_layout(_metadata);
+ create_file(_metadata, file1_s1d1);
+}
+
+FIXTURE_TEARDOWN_PARENT(ftruncate)
+{
+ EXPECT_EQ(0, remove_path(file1_s1d1));
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(ftruncate)
+{
+ const __u64 handled;
+ const __u64 allowed;
+ const int expected_open_result;
+ const int expected_ftruncate_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, w_w) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, t_t) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_w) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_wt) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_t) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = EACCES,
+};
+
+TEST_F_FORK(ftruncate, open_and_ftruncate)
+{
+ const char *const path = file1_s1d1;
+ const struct rule rules[] = {
+ {
+ .path = path,
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open(path, O_WRONLY);
+ EXPECT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0));
+ if (fd >= 0) {
+ EXPECT_EQ(variant->expected_ftruncate_result,
+ test_ftruncate(fd));
+ ASSERT_EQ(0, close(fd));
+ }
+}
+
+TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
+{
+ int child, fd, status;
+ int socket_fds[2];
+
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+ socket_fds));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /*
+ * Enables Landlock in the child process, open a file descriptor
+ * where truncation is forbidden and send it to the
+ * non-landlocked parent process.
+ */
+ const char *const path = file1_s1d1;
+ const struct rule rules[] = {
+ {
+ .path = path,
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int fd, ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open(path, O_WRONLY);
+ ASSERT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0));
+
+ if (fd >= 0) {
+ ASSERT_EQ(0, send_fd(socket_fds[0], fd));
+ ASSERT_EQ(0, close(fd));
+ }
+
+ ASSERT_EQ(0, close(socket_fds[0]));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ if (variant->expected_open_result == 0) {
+ fd = recv_fd(socket_fds[1]);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(variant->expected_ftruncate_result,
+ test_ftruncate(fd));
+ ASSERT_EQ(0, close(fd));
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ ASSERT_EQ(0, close(socket_fds[0]));
+ ASSERT_EQ(0, close(socket_fds[1]));
+}
+
+/* Invokes the FS_IOC_GETFLAGS IOCTL and returns its errno or 0. */
+static int test_fs_ioc_getflags_ioctl(int fd)
+{
+ uint32_t flags;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &flags) < 0)
+ return errno;
+ return 0;
+}
+
+TEST(memfd_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd, i;
+
+ /*
+ * We exercise the same test both with and without Landlock enabled, to
+ * ensure that it behaves the same in both cases.
+ */
+ for (i = 0; i < 2; i++) {
+ /* Creates a new memfd. */
+ fd = memfd_create("name", MFD_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks that operations associated with the opened file
+ * (ftruncate, ioctl) are permitted on file descriptors that are
+ * created in ways other than open(2).
+ */
+ EXPECT_EQ(0, test_ftruncate(fd));
+ EXPECT_EQ(0, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+}
+
+static int test_fionread_ioctl(int fd)
+{
+ size_t sz = 0;
+
+ if (ioctl(fd, FIONREAD, &sz) < 0 && errno == EACCES)
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, o_path_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd;
+
+ /*
+ * Checks that for files opened with O_PATH, both ioctl(2) and
+ * ftruncate(2) yield EBADF, as it is documented in open(2) for the
+ * O_PATH flag.
+ */
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks that after enabling Landlock,
+ * - the file can still be opened with O_PATH
+ * - both ioctl and truncate still yield EBADF (not EACCES).
+ */
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/*
+ * ioctl_error - generically call the given ioctl with a pointer to a
+ * sufficiently large zeroed-out memory region.
+ *
+ * Returns the IOCTLs error, or 0.
+ */
+static int ioctl_error(struct __test_metadata *const _metadata, int fd,
+ unsigned int cmd)
+{
+ char buf[128]; /* sufficiently large */
+ int res, stdinbak_fd;
+
+ /*
+ * Depending on the IOCTL command, parts of the zeroed-out buffer might
+ * be interpreted as file descriptor numbers. We do not want to
+ * accidentally operate on file descriptor 0 (stdin), so we temporarily
+ * move stdin to a different FD and close FD 0 for the IOCTL call.
+ */
+ stdinbak_fd = dup(0);
+ ASSERT_LT(0, stdinbak_fd);
+ ASSERT_EQ(0, close(0));
+
+ /* Invokes the IOCTL with a zeroed-out buffer. */
+ bzero(&buf, sizeof(buf));
+ res = ioctl(fd, cmd, &buf);
+
+ /* Restores the old FD 0 and closes the backup FD. */
+ ASSERT_EQ(0, dup2(stdinbak_fd, 0));
+ ASSERT_EQ(0, close(stdinbak_fd));
+
+ if (res < 0)
+ return errno;
+
+ return 0;
+}
+
+/* Define some linux/falloc.h IOCTL commands which are not available in uapi headers. */
+struct space_resv {
+ __s16 l_type;
+ __s16 l_whence;
+ __s64 l_start;
+ __s64 l_len; /* len == 0 means until end of file */
+ __s32 l_sysid;
+ __u32 l_pid;
+ __s32 l_pad[4]; /* reserved area */
+};
+
+#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
+#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv)
+#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
+#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
+#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
+
+/*
+ * Tests a series of blanket-permitted and denied IOCTLs.
+ */
+TEST_F_FORK(layout1, blanket_permitted_ioctls)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+ int ruleset_fd, fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open("/dev/null", O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks permitted commands.
+ * These ones may return errors, but should not be blocked by Landlock.
+ */
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONBIO));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOASYNC));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOQSIZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIFREEZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FITHAW));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_FIEMAP));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIGETBSZ));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIDEDUPERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSUUID));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSSYSFSPATH));
+
+ /*
+ * Checks blocked commands.
+ * A call to a blocked IOCTL command always returns EACCES.
+ */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_SETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSGETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSSETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIBMAP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_ZERO_RANGE));
+
+ /* Default case is also blocked. */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, 0xc00ffeee));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/*
+ * Named pipes are not governed by the LANDLOCK_ACCESS_FS_IOCTL_DEV right,
+ * because they are not character or block devices.
+ */
+TEST_F_FORK(layout1, named_pipe_ioctl)
+{
+ pid_t child_pid;
+ int fd, ruleset_fd;
+ const char *const path = file1_s1d1;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ ASSERT_EQ(0, unlink(path));
+ ASSERT_EQ(0, mkfifo(path, 0600));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* The child process opens the pipe for writing. */
+ child_pid = fork();
+ ASSERT_NE(-1, child_pid);
+ if (child_pid == 0) {
+ fd = open(path, O_WRONLY);
+ close(fd);
+ exit(0);
+ }
+
+ fd = open(path, O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ /* FIONREAD is implemented by pipefifo_fops. */
+ EXPECT_EQ(0, test_fionread_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+ ASSERT_EQ(0, unlink(path));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, NULL, 0));
+}
+
+/* For named UNIX domain sockets, no IOCTL restrictions apply. */
+TEST_F_FORK(layout1, named_unix_domain_socket_ioctl)
+{
+ const char *const path = file1_s1d1;
+ int srv_fd, cli_fd, ruleset_fd;
+ socklen_t size;
+ struct sockaddr_un srv_un, cli_un;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ /* Sets up a server */
+ srv_un.sun_family = AF_UNIX;
+ strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path));
+
+ ASSERT_EQ(0, unlink(path));
+ srv_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, srv_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(srv_un.sun_path);
+ ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, size));
+ ASSERT_EQ(0, listen(srv_fd, 10 /* qlen */));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Sets up a client connection to it */
+ cli_un.sun_family = AF_UNIX;
+ cli_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, cli_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+ ASSERT_EQ(0, bind(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ bzero(&cli_un, sizeof(cli_un));
+ cli_un.sun_family = AF_UNIX;
+ strncpy(cli_un.sun_path, path, sizeof(cli_un.sun_path));
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+
+ ASSERT_EQ(0, connect(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ /* FIONREAD and other IOCTLs should not be forbidden. */
+ EXPECT_EQ(0, test_fionread_ioctl(cli_fd));
+
+ ASSERT_EQ(0, close(cli_fd));
+}
+
+/* clang-format off */
+FIXTURE(ioctl) {};
+
+FIXTURE_SETUP(ioctl) {};
+
+FIXTURE_TEARDOWN(ioctl) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(ioctl)
+{
+ const __u64 handled;
+ const __u64 allowed;
+ const mode_t open_mode;
+ /*
+ * FIONREAD is used as a characteristic device-specific IOCTL command.
+ * It is implemented in fs/ioctl.c for regular files,
+ * but we do not blanket-permit it for devices.
+ */
+ const int expected_fionread_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_none) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = 0,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_i) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, unhandled) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed = LANDLOCK_ACCESS_FS_EXECUTE,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+TEST_F_FORK(ioctl, handle_dir_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd);
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
+}
+
+TEST_F_FORK(ioctl, handle_dir_access_dir)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int dir_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Ignore variant->open_mode for this test, as we intend to open a
+ * directory. If the directory can not be opened, the variant is
+ * infeasible to test with an opened directory.
+ */
+ dir_fd = open("/dev", O_RDONLY);
+ if (dir_fd < 0)
+ return;
+
+ /*
+ * Checks that IOCTL commands return the expected errors.
+ * We do not use the expected values from the fixture here.
+ *
+ * When using IOCTL on a directory, no Landlock restrictions apply.
+ */
+ EXPECT_EQ(0, test_fionread_ioctl(dir_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(dir_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(dir_fd));
+}
+
+TEST_F_FORK(ioctl, handle_file_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev/zero",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd)
+ {
+ TH_LOG("Failed to open /dev/zero: %s", strerror(errno));
+ }
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
+}
+
+/* clang-format off */
+FIXTURE(layout1_bind) {};
+/* clang-format on */
+
+static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
+static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
+
+/* Move targets for disconnected path tests. */
+static const char dir_s4d1[] = TMP_DIR "/s4d1";
+static const char file1_s4d1[] = TMP_DIR "/s4d1/f1";
+static const char file2_s4d1[] = TMP_DIR "/s4d1/f2";
+static const char dir_s4d2[] = TMP_DIR "/s4d1/s4d2";
+static const char file1_s4d2[] = TMP_DIR "/s4d1/s4d2/f1";
+static const char file1_name[] = "f1";
+static const char file2_name[] = "f2";
+
+FIXTURE_SETUP(layout1_bind)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout1_bind)
+{
+ /* umount(dir_s2d2)) is handled by namespace lifetime. */
+
+ remove_path(file1_s4d1);
+ remove_path(file2_s4d1);
+
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+}
+
+/*
+ * layout1_bind hierarchy:
+ *
+ * tmp
+ * ├── s1d1
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d2
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3 [disconnected by path_disconnected]
+ * │   ├── f1
+ * │   └── f2
+ * ├── s2d1
+ * │   ├── f1
+ * │   └── s2d2 [bind mount from s1d2]
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3
+ * │   ├── f1
+ * │   └── f2
+ * ├── s3d1
+ * │   └── s3d2
+ * │   └── s3d3
+ * └── s4d1 [renamed from s1d3 by path_disconnected]
+ *    ├── f1
+ *    ├── f2
+ * └── s4d2
+ * └── f1
+ */
+
+TEST_F_FORK(layout1_bind, no_restriction)
+{
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY));
+ ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1_bind, same_content_same_file)
+{
+ /*
+ * Sets access right on parent directories of both source and
+ * destination mount points.
+ */
+ const struct rule layer1_parent[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s2d1,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /*
+ * Sets access rights on the same bind-mounted directories. The result
+ * should be ACCESS_RW for both directories, but not both hierarchies
+ * because of the first layer.
+ */
+ const struct rule layer2_mount_point[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Only allow read-access to the s1d3 hierarchies. */
+ const struct rule layer3_source[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ /* Removes all access rights. */
+ const struct rule layer4_destination[] = {
+ {
+ .path = bind_file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Sets rules for the parent directories. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+
+ /* Sets rules for the mount points. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Sets a (shared) rule only on the source. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Sets a (shared) rule only on the destination. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
+}
+
+TEST_F_FORK(layout1_bind, reparent_cross_mount)
+{
+ const struct rule layer1[] = {
+ {
+ /* dir_s2d1 is beneath the dir_s2d2 mount point. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = bind_dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks basic denied move. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks real cross-mount move (Landlock is not involved). */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks move that will give more accesses. */
+ ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks legitimate downgrade move. */
+ ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
+}
+
+/*
+ * Make sure access to file through a disconnected path works as expected.
+ * This test moves s1d3 to s4d1.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected)
+{
+ const struct rule layer1_allow_all[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_ALL,
+ },
+ {},
+ };
+ const struct rule layer2_allow_just_f1[] = {
+ {
+ .path = file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer3_only_s1d2[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+
+ /* Landlock should not deny access just because it is disconnected. */
+ int ruleset_fd_l1 =
+ create_ruleset(_metadata, ACCESS_ALL, layer1_allow_all);
+
+ /* Creates the new ruleset now before we move the dir containing the file. */
+ int ruleset_fd_l2 =
+ create_ruleset(_metadata, ACCESS_RW, layer2_allow_just_f1);
+ int ruleset_fd_l3 =
+ create_ruleset(_metadata, ACCESS_RW, layer3_only_s1d2);
+ int bind_s1d3_fd;
+
+ ASSERT_LE(0, ruleset_fd_l1);
+ ASSERT_LE(0, ruleset_fd_l2);
+ ASSERT_LE(0, ruleset_fd_l3);
+
+ enforce_ruleset(_metadata, ruleset_fd_l1);
+ EXPECT_EQ(0, close(ruleset_fd_l1));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+
+ /* Tests access is possible before we move. */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, "..", O_RDONLY | O_DIRECTORY));
+
+ /* Makes it disconnected. */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d1))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d1,
+ strerror(errno));
+ }
+
+ /* Tests that access is still possible. */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+
+ /*
+ * Tests that ".." is not possible (not because of Landlock, but just
+ * because it's disconnected).
+ */
+ EXPECT_EQ(ENOENT,
+ test_open_rel(bind_s1d3_fd, "..", O_RDONLY | O_DIRECTORY));
+
+ /* This should still work with a narrower rule. */
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY));
+ /*
+ * Accessing a file through a disconnected file descriptor can still be
+ * allowed by a rule tied to this file, even if it is no longer visible in
+ * its mount point.
+ */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd_l3);
+ EXPECT_EQ(0, close(ruleset_fd_l3));
+
+ EXPECT_EQ(EACCES, test_open(file1_s4d1, O_RDONLY));
+ /*
+ * Accessing a file through a disconnected file descriptor can still be
+ * allowed by a rule tied to the original mount point, even if it is no
+ * longer visible in its mount point.
+ */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+}
+
+/*
+ * Test that renameat with disconnected paths works under Landlock. This test
+ * moves s1d3 to s4d2, so that we can have a rule allowing refers on the move
+ * target's immediate parent.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected_rename)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = dir_s4d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {}
+ };
+
+ /* This layer only handles LANDLOCK_ACCESS_FS_READ_FILE. */
+ const struct rule layer2_only_s1d2[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ int ruleset_fd_l1, ruleset_fd_l2;
+ pid_t child_pid;
+ int bind_s1d3_fd, status;
+
+ ASSERT_EQ(0, mkdir(dir_s4d1, 0755))
+ {
+ TH_LOG("Failed to create %s: %s", dir_s4d1, strerror(errno));
+ }
+ ruleset_fd_l1 = create_ruleset(_metadata, ACCESS_ALL, layer1);
+ ruleset_fd_l2 = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer2_only_s1d2);
+ ASSERT_LE(0, ruleset_fd_l1);
+ ASSERT_LE(0, ruleset_fd_l2);
+
+ enforce_ruleset(_metadata, ruleset_fd_l1);
+ EXPECT_EQ(0, close(ruleset_fd_l1));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+
+ /* Tests ENOENT priority over EACCES for disconnected directory. */
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, "..", O_DIRECTORY));
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d2))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d2,
+ strerror(errno));
+ }
+ EXPECT_EQ(ENOENT, test_open_rel(bind_s1d3_fd, "..", O_DIRECTORY));
+
+ /*
+ * The file is no longer under s1d2 but we should still be able to access it
+ * with layer 2 because its mount point is evaluated as the first valid
+ * directory because it was initially a parent. Do a fork to test this so
+ * we don't prevent ourselves from renaming it back later.
+ */
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open(file1_s4d2, O_RDONLY));
+
+ /*
+ * Tests that access widening checks indeed prevents us from renaming it
+ * back.
+ */
+ EXPECT_EQ(-1, rename(dir_s4d2, dir_s1d3));
+ EXPECT_EQ(EXDEV, errno);
+
+ /*
+ * Including through the now disconnected fd (but it should return
+ * EXDEV).
+ */
+ EXPECT_EQ(-1, renameat(bind_s1d3_fd, file1_name, AT_FDCWD,
+ file1_s2d2));
+ EXPECT_EQ(EXDEV, errno);
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ ASSERT_EQ(0, rename(dir_s4d2, dir_s1d3))
+ {
+ TH_LOG("Failed to rename %s back to %s: %s", dir_s4d1, dir_s1d3,
+ strerror(errno));
+ }
+
+ /* Now checks that we can access it under l2. */
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Also test that we can rename via a disconnected path. We move the
+ * dir back to the disconnected place first, then we rename file1 to
+ * file2 through our dir fd.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d2))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d2,
+ strerror(errno));
+ }
+ ASSERT_EQ(0,
+ renameat(bind_s1d3_fd, file1_name, bind_s1d3_fd, file2_name))
+ {
+ TH_LOG("Failed to rename %s to %s within disconnected %s: %s",
+ file1_name, file2_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+ ASSERT_EQ(0, renameat(bind_s1d3_fd, file2_name, AT_FDCWD, file1_s2d2))
+ {
+ TH_LOG("Failed to rename %s to %s through disconnected %s: %s",
+ file2_name, file1_s2d2, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+
+ /* Move it back using the disconnected path as the target. */
+ ASSERT_EQ(0, renameat(AT_FDCWD, file1_s2d2, bind_s1d3_fd, file1_name))
+ {
+ TH_LOG("Failed to rename %s to %s through disconnected %s: %s",
+ file1_s1d2, file1_name, bind_dir_s1d3, strerror(errno));
+ }
+
+ /* Now make it connected again. */
+ ASSERT_EQ(0, rename(dir_s4d2, dir_s1d3))
+ {
+ TH_LOG("Failed to rename %s back to %s: %s", dir_s4d2, dir_s1d3,
+ strerror(errno));
+ }
+
+ /* Checks again that we can access it under l2. */
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+}
+
+/*
+ * Test that linkat(2) with disconnected paths works under Landlock. This
+ * test moves s1d3 to s4d1.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected_link)
+{
+ /* Ruleset to be applied after renaming s1d3 to s4d1. */
+ const struct rule layer1[] = {
+ {
+ .path = dir_s4d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {}
+ };
+ int ruleset_fd, bind_s1d3_fd;
+
+ /* Removes unneeded files created by layout1, otherwise it will EEXIST. */
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+
+ /* Disconnects bind_s1d3_fd. */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d1))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d1,
+ strerror(errno));
+ }
+
+ /* Need this later to test different parent link. */
+ ASSERT_EQ(0, mkdir(dir_s4d2, 0755))
+ {
+ TH_LOG("Failed to create %s: %s", dir_s4d2, strerror(errno));
+ }
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_ALL, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* From disconnected to connected. */
+ ASSERT_EQ(0, linkat(bind_s1d3_fd, file1_name, AT_FDCWD, file1_s2d2, 0))
+ {
+ TH_LOG("Failed to link %s to %s via disconnected %s: %s",
+ file1_name, file1_s2d2, bind_dir_s1d3, strerror(errno));
+ }
+
+ /* Tests that we can access via the new link... */
+ EXPECT_EQ(0, test_open(file1_s2d2, O_RDONLY))
+ {
+ TH_LOG("Failed to open newly linked %s: %s", file1_s2d2,
+ strerror(errno));
+ }
+
+ /* ...as well as the old one. */
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY))
+ {
+ TH_LOG("Failed to open original %s: %s", file1_s4d1,
+ strerror(errno));
+ }
+
+ /* From connected to disconnected. */
+ ASSERT_EQ(0, unlink(file1_s4d1));
+ ASSERT_EQ(0, linkat(AT_FDCWD, file1_s2d2, bind_s1d3_fd, file2_name, 0))
+ {
+ TH_LOG("Failed to link %s to %s via disconnected %s: %s",
+ file1_s2d2, file2_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file2_s4d1, O_RDONLY));
+ ASSERT_EQ(0, unlink(file1_s2d2));
+
+ /* From disconnected to disconnected (same parent). */
+ ASSERT_EQ(0,
+ linkat(bind_s1d3_fd, file2_name, bind_s1d3_fd, file1_name, 0))
+ {
+ TH_LOG("Failed to link %s to %s within disconnected %s: %s",
+ file2_name, file1_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY))
+ {
+ TH_LOG("Failed to open newly linked %s: %s", file1_s4d1,
+ strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY))
+ {
+ TH_LOG("Failed to open %s through newly created link under disconnected path: %s",
+ file1_name, strerror(errno));
+ }
+ ASSERT_EQ(0, unlink(file2_s4d1));
+
+ /* From disconnected to disconnected (different parent). */
+ ASSERT_EQ(0,
+ linkat(bind_s1d3_fd, file1_name, bind_s1d3_fd, "s4d2/f1", 0))
+ {
+ TH_LOG("Failed to link %s to %s within disconnected %s: %s",
+ file1_name, "s4d2/f1", bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s4d2, O_RDONLY))
+ {
+ TH_LOG("Failed to open %s after link: %s", file1_s4d2,
+ strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, "s4d2/f1", O_RDONLY))
+ {
+ TH_LOG("Failed to open %s through disconnected path after link: %s",
+ "s4d2/f1", strerror(errno));
+ }
+}
+
+/*
+ * layout4_disconnected_leafs with bind mount and renames:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the bind mount]
+ * │ ├── s1d31
+ * │   │ └── s1d41 [now renamed beneath s3d1]
+ * │ │ ├── f1
+ * │ │ └── f2
+ * │   └── s1d32
+ * │ └── s1d42 [now renamed beneath s4d1]
+ * │ ├── f3
+ * │ └── f4
+ * ├── s2d1
+ * │   └── s2d2 [bind mount of s1d2]
+ * │ ├── s1d31
+ * │   │ └── s1d41 [opened FD, now renamed beneath s3d1]
+ * │ │ ├── f1
+ * │ │ └── f2
+ * │   └── s1d32
+ * │ └── s1d42 [opened FD, now renamed beneath s4d1]
+ * │ ├── f3
+ * │ └── f4
+ * ├── s3d1
+ * │  └── s1d41 [renamed here]
+ * │ ├── f1
+ * │ └── f2
+ * └── s4d1
+ * └── s1d42 [renamed here]
+ * ├── f3
+ * └── f4
+ */
+/* clang-format off */
+FIXTURE(layout4_disconnected_leafs) {
+ int s2d2_fd;
+};
+/* clang-format on */
+
+FIXTURE_SETUP(layout4_disconnected_leafs)
+{
+ prepare_layout(_metadata);
+
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f1");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f2");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f3");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f4");
+ create_directory(_metadata, TMP_DIR "/s2d1/s2d2");
+ create_directory(_metadata, TMP_DIR "/s3d1");
+ create_directory(_metadata, TMP_DIR "/s4d1");
+
+ self->s2d2_fd =
+ open(TMP_DIR "/s2d1/s2d2", O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s2d2_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(TMP_DIR "/s1d1/s1d2", TMP_DIR "/s2d1/s2d2", NULL,
+ MS_BIND, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout4_disconnected_leafs)
+{
+ /* umount(TMP_DIR "/s2d1") is handled by namespace lifetime. */
+
+ /* Removes files after renames. */
+ remove_path(TMP_DIR "/s3d1/s1d41/f1");
+ remove_path(TMP_DIR "/s3d1/s1d41/f2");
+ remove_path(TMP_DIR "/s4d1/s1d42/f1");
+ remove_path(TMP_DIR "/s4d1/s1d42/f3");
+ remove_path(TMP_DIR "/s4d1/s1d42/f4");
+ remove_path(TMP_DIR "/s4d1/s1d42/f5");
+
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(layout4_disconnected_leafs)
+{
+ /*
+ * Parent of the bind mount source. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d1;
+ /*
+ * Source of bind mount (to s2d2). It should always be enforced when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d2;
+ /*
+ * Original parent of s1d41. It should always be ignored when testing
+ * against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s1d31;
+ /*
+ * Original parent of s1d42. It should always be ignored when testing
+ * against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s1d32;
+ /*
+ * Opened and disconnected source directory. It should always be enforced
+ * when testing against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s1d41;
+ /*
+ * Opened and disconnected source directory. It should always be enforced
+ * when testing against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s1d42;
+ /*
+ * File in the s1d41 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_f1;
+ /*
+ * File in the s1d41 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_f2;
+ /*
+ * File in the s1d42 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_f3;
+ /*
+ * Parent of the bind mount destination. It should always be enforced when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s2d1;
+ /*
+ * Directory covered by the bind mount. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s2d2;
+ /*
+ * New parent of the renamed s1d41. It should always be ignored when
+ * testing against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s3d1;
+ /*
+ * New parent of the renamed s1d42. It should always be ignored when
+ * testing against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s4d1;
+
+ /* Expected result of the call to open([fd:s1d41]/f1, O_RDONLY). */
+ const int expected_read_result;
+ /* Expected result of the call to renameat([fd:s1d41]/f1, [fd:s1d42]/f1). */
+ const int expected_rename_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d41]/f2, [fd:s1d42]/f3,
+ * RENAME_EXCHANGE).
+ */
+ const int expected_exchange_result;
+ /* Expected result of the call to renameat([fd:s1d42]/f4, [fd:s1d42]/f5). */
+ const int expected_same_dir_rename_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d1_mount_src_parent) {
+ /* clang-format on */
+ .allowed_s1d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_refer) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_create) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_rename) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d31_s1d32_old_parent) {
+ /* clang-format on */
+ .allowed_s1d31 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d32 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_refer) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_create) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_even) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* The destination directory has more access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_more) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access denied. */
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* The destination directory has less access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_less) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access allowed. */
+ .expected_rename_result = 0,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_mini) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d2_covered_by_mount) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* Tests collect_domain_accesses(). */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_new_parent_refer) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_new_parent_create) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs,
+ s3d1_s4d1_disconnected_rename_even){
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* The destination directory has more access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_disconnected_rename_more) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access denied. */
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* The destination directory has less access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_disconnected_rename_less) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access allowed. */
+ .expected_rename_result = 0,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, f1_f2_f3) {
+ /* clang-format on */
+ .allowed_f1 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_f2 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_f3 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+TEST_F_FORK(layout4_disconnected_leafs, read_rename_exchange)
+{
+ const __u64 handled_access =
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_MAKE_REG;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR "/s1d1",
+ .access = variant->allowed_s1d1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2",
+ .access = variant->allowed_s1d2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31",
+ .access = variant->allowed_s1d31,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32",
+ .access = variant->allowed_s1d32,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41",
+ .access = variant->allowed_s1d41,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32/s1d42",
+ .access = variant->allowed_s1d42,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f1",
+ .access = variant->allowed_f1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f2",
+ .access = variant->allowed_f2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f3",
+ .access = variant->allowed_f3,
+ },
+ {
+ .path = TMP_DIR "/s2d1",
+ .access = variant->allowed_s2d1,
+ },
+ /* s2d2_fd */
+ {
+ .path = TMP_DIR "/s3d1",
+ .access = variant->allowed_s3d1,
+ },
+ {
+ .path = TMP_DIR "/s4d1",
+ .access = variant->allowed_s4d1,
+ },
+ {},
+ };
+ int ruleset_fd, s1d41_bind_fd, s1d42_bind_fd;
+
+ ruleset_fd = create_ruleset(_metadata, handled_access, rules);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds rule for the covered directory. */
+ if (variant->allowed_s2d2) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s2d2_fd,
+ .allowed_access =
+ variant->allowed_s2d2,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s2d2_fd));
+
+ s1d41_bind_fd = open(TMP_DIR "/s2d1/s2d2/s1d31/s1d41",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d41_bind_fd);
+ s1d42_bind_fd = open(TMP_DIR "/s2d1/s2d2/s1d32/s1d42",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d42_bind_fd);
+
+ /* Disconnects and checks source and destination directories. */
+ EXPECT_EQ(0, test_open_rel(s1d41_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(0, test_open_rel(s1d42_bind_fd, "..", O_DIRECTORY));
+ /* Renames to make it accessible through s3d1/s1d41 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s1d1/s1d2/s1d31/s1d41",
+ AT_FDCWD, TMP_DIR "/s3d1/s1d41"));
+ /* Renames to make it accessible through s4d1/s1d42 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s1d1/s1d2/s1d32/s1d42",
+ AT_FDCWD, TMP_DIR "/s4d1/s1d42"));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d41_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d42_bind_fd, "..", O_DIRECTORY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(variant->expected_read_result,
+ test_open_rel(s1d41_bind_fd, "f1", O_RDONLY));
+
+ EXPECT_EQ(variant->expected_rename_result,
+ test_renameat(s1d41_bind_fd, "f1", s1d42_bind_fd, "f1"));
+ EXPECT_EQ(variant->expected_exchange_result,
+ test_exchangeat(s1d41_bind_fd, "f2", s1d42_bind_fd, "f3"));
+
+ EXPECT_EQ(variant->expected_same_dir_rename_result,
+ test_renameat(s1d42_bind_fd, "f4", s1d42_bind_fd, "f5"));
+}
+
+/*
+ * layout5_disconnected_branch before rename:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the first bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s2d1
+ * │   └── s2d2 [source of the second bind mount]
+ * │   └── s2d3
+ * │   └── s2d4 [first s1d2 bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s3d1
+ * │   └── s3d2 [second s2d2 bind mount]
+ * │   └── s2d3
+ * │   └── s2d4 [first s1d2 bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * └── s4d1
+ *
+ * After rename:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the first bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s2d1
+ * │   └── s2d2 [source of the second bind mount]
+ * ├── s3d1
+ * │   └── s3d2 [second s2d2 bind mount]
+ * └── s4d1
+ * └── s2d3 [renamed here]
+ * └── s2d4 [first s1d2 bind mount]
+ * └── s1d3
+ * ├── s1d41
+ * │   ├── f1
+ * │   └── f2
+ * └── s1d42
+ * ├── f3
+ * └── f4
+ *
+ * Decision path for access from the s3d1/s3d2/s2d3/s2d4/s1d3 file descriptor:
+ * 1. first bind mount: s1d3 -> s1d2
+ * 2. second bind mount: s2d3
+ * 3. tmp mount: s4d1 -> tmp [disconnected branch]
+ * 4. second bind mount: s2d2
+ * 5. tmp mount: s3d1 -> tmp
+ * 6. parent mounts: [...] -> /
+ *
+ * The s4d1 directory is evaluated even if it is not in the s2d2 mount.
+ */
+
+/* clang-format off */
+FIXTURE(layout5_disconnected_branch) {
+ int s2d4_fd, s3d2_fd;
+};
+/* clang-format on */
+
+FIXTURE_SETUP(layout5_disconnected_branch)
+{
+ prepare_layout(_metadata);
+
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f1");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f2");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f3");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f4");
+ create_directory(_metadata, TMP_DIR "/s2d1/s2d2/s2d3/s2d4");
+ create_directory(_metadata, TMP_DIR "/s3d1/s3d2");
+ create_directory(_metadata, TMP_DIR "/s4d1");
+
+ self->s2d4_fd = open(TMP_DIR "/s2d1/s2d2/s2d3/s2d4",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s2d4_fd);
+
+ self->s3d2_fd =
+ open(TMP_DIR "/s3d1/s3d2", O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s3d2_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(TMP_DIR "/s1d1/s1d2", TMP_DIR "/s2d1/s2d2/s2d3/s2d4",
+ NULL, MS_BIND, NULL));
+ ASSERT_EQ(0, mount(TMP_DIR "/s2d1/s2d2", TMP_DIR "/s3d1/s3d2", NULL,
+ MS_BIND | MS_REC, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout5_disconnected_branch)
+{
+ /* Bind mounts are handled by namespace lifetime. */
+
+ /* Removes files after renames. */
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f1");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f2");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f1");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f3");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f4");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f5");
+
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(layout5_disconnected_branch)
+{
+ /*
+ * Parent of all files. It should always be enforced when testing against
+ * files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_base;
+ /*
+ * Parent of the first bind mount source. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d1;
+ const __u64 allowed_s1d2;
+ const __u64 allowed_s1d3;
+ const __u64 allowed_s2d1;
+ const __u64 allowed_s2d2;
+ const __u64 allowed_s2d3;
+ const __u64 allowed_s2d4;
+ const __u64 allowed_s3d1;
+ const __u64 allowed_s3d2;
+ const __u64 allowed_s4d1;
+
+ /* Expected result of the call to open([fd:s1d3]/s1d41/f1, O_RDONLY). */
+ const int expected_read_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d41/f1,
+ * [fd:s1d3]/s1d42/f1).
+ */
+ const int expected_rename_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d41/f2,
+ * [fd:s1d3]/s1d42/f3, RENAME_EXCHANGE).
+ */
+ const int expected_exchange_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d42/f4,
+ * [fd:s1d3]/s1d42/f5).
+ */
+ const int expected_same_dir_rename_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d1_mount1_src_parent) {
+ /* clang-format on */
+ .allowed_s1d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_refer) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_create) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_rename) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_refer) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_create) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_rename) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_full) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d1_mount2_src_parent) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_refer) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_create) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_rename) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_rename) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d4_mount1_dst) {
+ /* clang-format on */
+ .allowed_s2d4 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_rename) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d2_mount1_dst) {
+ /* clang-format on */
+ .allowed_s3d2 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_refer) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_create) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_rename) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+TEST_F_FORK(layout5_disconnected_branch, read_rename_exchange)
+{
+ const __u64 handled_access =
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_MAKE_REG;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR "/s1d1",
+ .access = variant->allowed_s1d1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2",
+ .access = variant->allowed_s1d2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d3",
+ .access = variant->allowed_s1d3,
+ },
+ {
+ .path = TMP_DIR "/s2d1",
+ .access = variant->allowed_s2d1,
+ },
+ {
+ .path = TMP_DIR "/s2d1/s2d2",
+ .access = variant->allowed_s2d2,
+ },
+ {
+ .path = TMP_DIR "/s2d1/s2d2/s2d3",
+ .access = variant->allowed_s2d3,
+ },
+ /* s2d4_fd */
+ {
+ .path = TMP_DIR "/s3d1",
+ .access = variant->allowed_s3d1,
+ },
+ /* s3d2_fd */
+ {
+ .path = TMP_DIR "/s4d1",
+ .access = variant->allowed_s4d1,
+ },
+ {},
+ };
+ int ruleset_fd, s1d3_bind_fd;
+
+ ruleset_fd = create_ruleset(_metadata, handled_access, rules);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds rules for the covered directories. */
+ if (variant->allowed_s2d4) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s2d4_fd,
+ .allowed_access =
+ variant->allowed_s2d4,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s2d4_fd));
+
+ if (variant->allowed_s3d2) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s3d2_fd,
+ .allowed_access =
+ variant->allowed_s3d2,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s3d2_fd));
+
+ s1d3_bind_fd = open(TMP_DIR "/s3d1/s3d2/s2d3/s2d4/s1d3",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d3_bind_fd);
+
+ /* Disconnects and checks source and destination directories. */
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "../..", O_DIRECTORY));
+ /* Renames to make it accessible through s3d1/s1d41 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s2d1/s2d2/s2d3",
+ AT_FDCWD, TMP_DIR "/s4d1/s2d3"));
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d3_bind_fd, "../..", O_DIRECTORY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(variant->expected_read_result,
+ test_open_rel(s1d3_bind_fd, "s1d41/f1", O_RDONLY));
+
+ EXPECT_EQ(variant->expected_rename_result,
+ test_renameat(s1d3_bind_fd, "s1d41/f1", s1d3_bind_fd,
+ "s1d42/f1"));
+ EXPECT_EQ(variant->expected_exchange_result,
+ test_exchangeat(s1d3_bind_fd, "s1d41/f2", s1d3_bind_fd,
+ "s1d42/f3"));
+
+ EXPECT_EQ(variant->expected_same_dir_rename_result,
+ test_renameat(s1d3_bind_fd, "s1d42/f4", s1d3_bind_fd,
+ "s1d42/f5"));
+}
+
+#define LOWER_BASE TMP_DIR "/lower"
+#define LOWER_DATA LOWER_BASE "/data"
+static const char lower_fl1[] = LOWER_DATA "/fl1";
+static const char lower_dl1[] = LOWER_DATA "/dl1";
+static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
+static const char lower_fo1[] = LOWER_DATA "/fo1";
+static const char lower_do1[] = LOWER_DATA "/do1";
+static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2";
+static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
+
+static const char (*lower_base_files[])[] = {
+ &lower_fl1,
+ &lower_fo1,
+ NULL,
+};
+static const char (*lower_base_directories[])[] = {
+ &lower_dl1,
+ &lower_do1,
+ NULL,
+};
+static const char (*lower_sub_files[])[] = {
+ &lower_dl1_fl2,
+ &lower_do1_fo2,
+ &lower_do1_fl3,
+ NULL,
+};
+
+#define UPPER_BASE TMP_DIR "/upper"
+#define UPPER_DATA UPPER_BASE "/data"
+#define UPPER_WORK UPPER_BASE "/work"
+static const char upper_fu1[] = UPPER_DATA "/fu1";
+static const char upper_du1[] = UPPER_DATA "/du1";
+static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
+static const char upper_fo1[] = UPPER_DATA "/fo1";
+static const char upper_do1[] = UPPER_DATA "/do1";
+static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2";
+static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
+
+static const char (*upper_base_files[])[] = {
+ &upper_fu1,
+ &upper_fo1,
+ NULL,
+};
+static const char (*upper_base_directories[])[] = {
+ &upper_du1,
+ &upper_do1,
+ NULL,
+};
+static const char (*upper_sub_files[])[] = {
+ &upper_du1_fu2,
+ &upper_do1_fo2,
+ &upper_do1_fu3,
+ NULL,
+};
+
+#define MERGE_BASE TMP_DIR "/merge"
+#define MERGE_DATA MERGE_BASE "/data"
+static const char merge_fl1[] = MERGE_DATA "/fl1";
+static const char merge_dl1[] = MERGE_DATA "/dl1";
+static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
+static const char merge_fu1[] = MERGE_DATA "/fu1";
+static const char merge_du1[] = MERGE_DATA "/du1";
+static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2";
+static const char merge_fo1[] = MERGE_DATA "/fo1";
+static const char merge_do1[] = MERGE_DATA "/do1";
+static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2";
+static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3";
+static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3";
+
+static const char (*merge_base_files[])[] = {
+ &merge_fl1,
+ &merge_fu1,
+ &merge_fo1,
+ NULL,
+};
+static const char (*merge_base_directories[])[] = {
+ &merge_dl1,
+ &merge_du1,
+ &merge_do1,
+ NULL,
+};
+static const char (*merge_sub_files[])[] = {
+ &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
+ &merge_do1_fl3, &merge_do1_fu3, NULL,
+};
+
+/*
+ * layout2_overlay hierarchy:
+ *
+ * tmp
+ * ├── lower
+ * │   └── data
+ * │   ├── dl1
+ * │   │   └── fl2
+ * │   ├── do1
+ * │   │   ├── fl3
+ * │   │   └── fo2
+ * │   ├── fl1
+ * │   └── fo1
+ * ├── merge
+ * │   └── data
+ * │   ├── dl1
+ * │   │   └── fl2
+ * │   ├── do1
+ * │   │   ├── fl3
+ * │   │   ├── fo2
+ * │   │   └── fu3
+ * │   ├── du1
+ * │   │   └── fu2
+ * │   ├── fl1
+ * │   ├── fo1
+ * │   └── fu1
+ * └── upper
+ * ├── data
+ * │   ├── do1
+ * │   │   ├── fo2
+ * │   │   └── fu3
+ * │   ├── du1
+ * │   │   └── fu2
+ * │   ├── fo1
+ * │   └── fu1
+ * └── work
+ * └── work
+ */
+
+FIXTURE(layout2_overlay)
+{
+ bool skip_test;
+};
+
+FIXTURE_SETUP(layout2_overlay)
+{
+ if (!supports_filesystem("overlay")) {
+ self->skip_test = true;
+ SKIP(return, "overlayfs is not supported (setup)");
+ }
+
+ prepare_layout(_metadata);
+
+ create_directory(_metadata, LOWER_BASE);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ /* Creates tmpfs mount points to get deterministic overlayfs. */
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, LOWER_BASE));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ create_file(_metadata, lower_fl1);
+ create_file(_metadata, lower_dl1_fl2);
+ create_file(_metadata, lower_fo1);
+ create_file(_metadata, lower_do1_fo2);
+ create_file(_metadata, lower_do1_fl3);
+
+ create_directory(_metadata, UPPER_BASE);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, UPPER_BASE));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ create_file(_metadata, upper_fu1);
+ create_file(_metadata, upper_du1_fu2);
+ create_file(_metadata, upper_fo1);
+ create_file(_metadata, upper_do1_fo2);
+ create_file(_metadata, upper_do1_fu3);
+ ASSERT_EQ(0, mkdir(UPPER_WORK, 0700));
+
+ create_directory(_metadata, MERGE_DATA);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
+ "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
+ ",workdir=" UPPER_WORK));
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout2_overlay)
+{
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (teardown)");
+
+ EXPECT_EQ(0, remove_path(lower_do1_fl3));
+ EXPECT_EQ(0, remove_path(lower_dl1_fl2));
+ EXPECT_EQ(0, remove_path(lower_fl1));
+ EXPECT_EQ(0, remove_path(lower_do1_fo2));
+ EXPECT_EQ(0, remove_path(lower_fo1));
+
+ /* umount(LOWER_BASE)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(LOWER_BASE));
+
+ EXPECT_EQ(0, remove_path(upper_do1_fu3));
+ EXPECT_EQ(0, remove_path(upper_du1_fu2));
+ EXPECT_EQ(0, remove_path(upper_fu1));
+ EXPECT_EQ(0, remove_path(upper_do1_fo2));
+ EXPECT_EQ(0, remove_path(upper_fo1));
+ EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
+
+ /* umount(UPPER_BASE)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(UPPER_BASE));
+
+ /* umount(MERGE_DATA)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(MERGE_DATA));
+
+ cleanup_layout(_metadata);
+}
+
+TEST_F_FORK(layout2_overlay, no_restriction)
+{
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
+
+ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_du1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_du1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
+}
+
+#define for_each_path(path_list, path_entry, i) \
+ for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+ path_entry = *path_list[++i])
+
+TEST_F_FORK(layout2_overlay, same_content_different_file)
+{
+ /* Sets access right on parent directories of both layers. */
+ const struct rule layer1_base[] = {
+ {
+ .path = LOWER_BASE,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = UPPER_BASE,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = MERGE_BASE,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ const struct rule layer2_data[] = {
+ {
+ .path = LOWER_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = UPPER_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = MERGE_DATA,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Sets access right on directories inside both layers. */
+ const struct rule layer3_subdirs[] = {
+ {
+ .path = lower_dl1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_du1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = merge_dl1,
+ .access = ACCESS_RW,
+ },
+ {
+ .path = merge_du1,
+ .access = ACCESS_RW,
+ },
+ {
+ .path = merge_do1,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Tighten access rights to the files. */
+ const struct rule layer4_files[] = {
+ {
+ .path = lower_dl1_fl2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1_fl3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_du1_fu2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1_fu3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = merge_dl1_fl2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_du1_fu2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fl3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fu3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer5_merge_only[] = {
+ {
+ .path = MERGE_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+ size_t i;
+ const char *path_entry;
+
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
+
+ /* Sets rules on base directories (i.e. outside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks lower layer. */
+ for_each_path(lower_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(lower_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks upper layer. */
+ for_each_path(upper_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(upper_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /*
+ * Checks that access rights are independent from the lower and upper
+ * layers: write access to upper files viewed through the merge point
+ * is still allowed, and write access to lower file viewed (and copied)
+ * through the merge point is still allowed.
+ */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Sets rules on data directories (i.e. inside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks merge. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Same checks with tighter rules. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks changes for lower layer. */
+ for_each_path(lower_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks changes for upper layer. */
+ for_each_path(upper_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Sets rules directly on overlayed files. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks unchanged accesses on lower layer. */
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks unchanged accesses on upper layer. */
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Only allowes access to the merge hierarchy. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks new accesses on lower layer. */
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks new accesses on upper layer. */
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+}
+
+FIXTURE(layout3_fs)
+{
+ bool has_created_dir;
+ bool has_created_file;
+ bool skip_test;
+};
+
+FIXTURE_VARIANT(layout3_fs)
+{
+ const struct mnt_opt mnt;
+ const char *const file_path;
+ unsigned int cwd_fs_magic;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
+ /* clang-format on */
+ .mnt = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+ },
+ .file_path = file1_s1d1,
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, ramfs) {
+ .mnt = {
+ .type = "ramfs",
+ .data = "mode=700",
+ },
+ .file_path = TMP_DIR "/dir/file",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, cgroup2) {
+ .mnt = {
+ .type = "cgroup2",
+ },
+ .file_path = TMP_DIR "/test/cgroup.procs",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, proc) {
+ .mnt = {
+ .type = "proc",
+ },
+ .file_path = TMP_DIR "/self/status",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, sysfs) {
+ .mnt = {
+ .type = "sysfs",
+ },
+ .file_path = TMP_DIR "/kernel/notes",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, hostfs) {
+ .mnt = {
+ .source = TMP_DIR,
+ .flags = MS_BIND,
+ },
+ .file_path = TMP_DIR "/dir/file",
+ .cwd_fs_magic = HOSTFS_SUPER_MAGIC,
+};
+
+static char *dirname_alloc(const char *path)
+{
+ char *dup;
+
+ if (!path)
+ return NULL;
+
+ dup = strdup(path);
+ if (!dup)
+ return NULL;
+
+ return dirname(dup);
+}
+
+FIXTURE_SETUP(layout3_fs)
+{
+ struct stat statbuf;
+ char *dir_path = dirname_alloc(variant->file_path);
+
+ if (!supports_filesystem(variant->mnt.type) ||
+ !cwd_matches_fs(variant->cwd_fs_magic)) {
+ self->skip_test = true;
+ SKIP(return, "this filesystem is not supported (setup)");
+ }
+
+ prepare_layout_opt(_metadata, &variant->mnt);
+
+ /* Creates directory when required. */
+ if (stat(dir_path, &statbuf)) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ EXPECT_EQ(0, mkdir(dir_path, 0700))
+ {
+ TH_LOG("Failed to create directory \"%s\": %s",
+ dir_path, strerror(errno));
+ }
+ self->has_created_dir = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ /* Creates file when required. */
+ if (stat(variant->file_path, &statbuf)) {
+ int fd;
+
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ fd = creat(variant->file_path, 0600);
+ EXPECT_LE(0, fd)
+ {
+ TH_LOG("Failed to create file \"%s\": %s",
+ variant->file_path, strerror(errno));
+ }
+ EXPECT_EQ(0, close(fd));
+ self->has_created_file = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ free(dir_path);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout3_fs)
+{
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (teardown)");
+
+ if (self->has_created_file) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the file might already
+ * have been removed (cf. release_inode test).
+ */
+ unlink(variant->file_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ if (self->has_created_dir) {
+ char *dir_path = dirname_alloc(variant->file_path);
+
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the directory might already
+ * have been removed (cf. release_inode test).
+ */
+ rmdir(dir_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ free(dir_path);
+ }
+
+ cleanup_layout(_metadata);
+}
+
+static void layer3_fs_tag_inode(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(layout3_fs) * self,
+ const FIXTURE_VARIANT(layout3_fs) * variant,
+ const char *const rule_path)
+{
+ const struct rule layer1_allow_read_file[] = {
+ {
+ .path = rule_path,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct landlock_ruleset_attr layer2_deny_everything_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ const char *const dev_null_path = "/dev/null";
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Checks without Landlock. */
+ EXPECT_EQ(0, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer1_allow_read_file);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ /* Forbids directory reading. */
+ ruleset_fd =
+ landlock_create_ruleset(&layer2_deny_everything_attr,
+ sizeof(layer2_deny_everything_attr), 0);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks with Landlock and forbidden access. */
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+}
+
+/* Matrix of tests to check file hierarchy evaluation. */
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_parent)
+{
+ /* The current directory must not be the root for this test. */
+ layer3_fs_tag_inode(_metadata, self, variant, ".");
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_mnt)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, TMP_DIR);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_child)
+{
+ char *dir_path = dirname_alloc(variant->file_path);
+
+ layer3_fs_tag_inode(_metadata, self, variant, dir_path);
+ free(dir_path);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_file)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, variant->file_path);
+}
+
+/* Light version of layout1.release_inodes */
+TEST_F_FORK(layout3_fs, release_inodes)
+{
+ const struct rule layer1[] = {
+ {
+ .path = TMP_DIR,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Clean up for the teardown to not fail. */
+ if (self->has_created_file)
+ EXPECT_EQ(0, remove_path(variant->file_path));
+
+ if (self->has_created_dir) {
+ char *dir_path = dirname_alloc(variant->file_path);
+
+ /* Don't check for error because of cgroup specificities. */
+ remove_path(dir_path);
+ free(dir_path);
+ }
+
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Unmount the filesystem while it is being used by a ruleset. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Replaces with a new mount point to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY));
+}
+
+static int matches_log_fs_extra(struct __test_metadata *const _metadata,
+ int audit_fd, const char *const blockers,
+ const char *const path, const char *const extra)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.%s path=\"%s\" dev=\"[^\"]\\+\" ino=[0-9]\\+$";
+ char *absolute_path = NULL;
+ size_t log_match_remaining = sizeof(log_template) + strlen(blockers) +
+ PATH_MAX * 2 +
+ (extra ? strlen(extra) : 0) + 1;
+ char log_match[log_match_remaining];
+ char *log_match_cursor = log_match;
+ size_t chunk_len;
+
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ REGEX_LANDLOCK_PREFIX " blockers=%s path=\"",
+ blockers);
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ /*
+ * It is assume that absolute_path does not contain control characters nor
+ * spaces, see audit_string_contains_control().
+ */
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ log_match_remaining -= chunk_len;
+ log_match_cursor += chunk_len;
+ log_match_cursor = regex_escape(absolute_path, log_match_cursor,
+ log_match_remaining);
+ free(absolute_path);
+ if (log_match_cursor < 0)
+ return (long long)log_match_cursor;
+
+ log_match_remaining -= log_match_cursor - log_match;
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ "\" dev=\"[^\"]\\+\" ino=[0-9]\\+%s$",
+ extra ?: "");
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+static int matches_log_fs(struct __test_metadata *const _metadata, int audit_fd,
+ const char *const blockers, const char *const path)
+{
+ return matches_log_fs_extra(_metadata, audit_fd, blockers, path, NULL);
+}
+
+FIXTURE(audit_layout1)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit_layout1)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit_layout1)
+{
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+TEST_F(audit_layout1, execute_make)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * Using a set of handled/denied access rights make it possible to check that
+ * only the blocked ones are logged.
+ */
+
+/* clang-format off */
+static const __u64 access_fs_16 =
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_CHAR |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO |
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK |
+ LANDLOCK_ACCESS_FS_MAKE_SYM |
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV;
+/* clang-format on */
+
+TEST_F(audit_layout1, execute_read)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.execute_read test is
+ * the extra ",fs\\.read_file" blocked by the executable file.
+ */
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, write_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.write_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_file",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_dir)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(dir_s1d1, O_DIRECTORY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_dir",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, remove_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+ EXPECT_EQ(0, unlink(file2_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, rmdir(dir_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, remove_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, unlink(file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_char)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFCHR | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_char",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mkdir(file1_s1d3, 0755));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_dir",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_reg)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFREG | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_reg",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sock)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFSOCK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sock",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_fifo)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFIFO | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_fifo",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_block)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFBLK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_block", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sym)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, symlink("target", file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sym",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, refer_handled)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EXDEV, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0,
+ matches_log_domain_allocated(self->audit_fd, getpid(), NULL));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_make)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_reg,fs\\.refer", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_rename)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_rename(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_exchange)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.refer_rename test is
+ * the extra ",fs\\.make_reg" blocked by the source directory.
+ */
+ EXPECT_EQ(EACCES, test_exchange(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * This test checks that the audit record is correctly generated when the
+ * operation is only partially denied. This is the case for rename(2) when the
+ * source file is allowed to be referenced but the destination directory is not.
+ *
+ * This is also a regression test for commit d617f0d72d80 ("landlock: Optimize
+ * file path walks and prepare for audit support") and commit 058518c20920
+ * ("landlock: Align partial refer access checks with final ones").
+ */
+TEST_F(audit_layout1, refer_rename_half)
+{
+ struct audit_records records;
+ const struct rule layer1[] = {
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Only half of the request is denied. */
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, truncate)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, truncate(file1_s1d3, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.truncate",
+ file1_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, ioctl_dev)
+{
+ struct audit_records records;
+ int fd;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ access_fs_16 &
+ ~LANDLOCK_ACCESS_FS_READ_FILE,
+ });
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(0, matches_log_fs_extra(_metadata, self->audit_fd,
+ "fs\\.ioctl_dev", "/dev/null",
+ " ioctlcmd=0x541b"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, mount)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ EXPECT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.change_topology", dir_s3d2));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
new file mode 100644
index 000000000000..2a45208551e6
--- /dev/null
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -0,0 +1,2003 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock tests - Network
+ *
+ * Copyright © 2022-2023 Huawei Tech. Co., Ltd.
+ * Copyright © 2023 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/in.h>
+#include <sched.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+
+#include "audit.h"
+#include "common.h"
+
+const short sock_port_start = (1 << 10);
+
+static const char loopback_ipv4[] = "127.0.0.1";
+static const char loopback_ipv6[] = "::1";
+
+/* Number pending connections queue to be hold. */
+const short backlog = 10;
+
+enum sandbox_type {
+ NO_SANDBOX,
+ /* This may be used to test rules that allow *and* deny accesses. */
+ TCP_SANDBOX,
+};
+
+static int set_service(struct service_fixture *const srv,
+ const struct protocol_variant prot,
+ const unsigned short index)
+{
+ memset(srv, 0, sizeof(*srv));
+
+ /*
+ * Copies all protocol properties in case of the variant only contains
+ * a subset of them.
+ */
+ srv->protocol = prot;
+
+ /* Checks for port overflow. */
+ if (index > 2)
+ return 1;
+ srv->port = sock_port_start << (2 * index);
+
+ switch (prot.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_family = prot.domain;
+ srv->ipv4_addr.sin_port = htons(srv->port);
+ srv->ipv4_addr.sin_addr.s_addr = inet_addr(loopback_ipv4);
+ return 0;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_family = prot.domain;
+ srv->ipv6_addr.sin6_port = htons(srv->port);
+ inet_pton(AF_INET6, loopback_ipv6, &srv->ipv6_addr.sin6_addr);
+ return 0;
+
+ case AF_UNIX:
+ set_unix_address(srv, index);
+ return 0;
+ }
+ return 1;
+}
+
+static void setup_loopback(struct __test_metadata *const _metadata)
+{
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, unshare(CLONE_NEWNET));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ set_ambient_cap(_metadata, CAP_NET_ADMIN);
+ ASSERT_EQ(0, system("ip link set dev lo up"));
+ clear_ambient_cap(_metadata, CAP_NET_ADMIN);
+}
+
+static bool prot_is_tcp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_STREAM &&
+ (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
+}
+
+static bool is_restricted(const struct protocol_variant *const prot,
+ const enum sandbox_type sandbox)
+{
+ if (sandbox == TCP_SANDBOX)
+ return prot_is_tcp(prot);
+ return false;
+}
+
+static int socket_variant(const struct service_fixture *const srv)
+{
+ int ret;
+
+ ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+ srv->protocol.protocol);
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+#ifndef SIN6_LEN_RFC2133
+#define SIN6_LEN_RFC2133 24
+#endif
+
+static socklen_t get_addrlen(const struct service_fixture *const srv,
+ const bool minimal)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ return sizeof(srv->ipv4_addr);
+
+ case AF_INET6:
+ if (minimal)
+ return SIN6_LEN_RFC2133;
+ return sizeof(srv->ipv6_addr);
+
+ case AF_UNIX:
+ if (minimal)
+ return sizeof(srv->unix_addr) -
+ sizeof(srv->unix_addr.sun_path);
+ return srv->unix_addr_len;
+
+ default:
+ return 0;
+ }
+}
+
+static void set_port(struct service_fixture *const srv, uint16_t port)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_port = htons(port);
+ return;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_port = htons(port);
+ return;
+
+ default:
+ return;
+ }
+}
+
+static uint16_t get_binded_port(int socket_fd,
+ const struct protocol_variant *const prot)
+{
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ socklen_t ipv4_addr_len, ipv6_addr_len;
+
+ /* Gets binded port. */
+ switch (prot->domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ipv4_addr_len = sizeof(ipv4_addr);
+ getsockname(socket_fd, &ipv4_addr, &ipv4_addr_len);
+ return ntohs(ipv4_addr.sin_port);
+
+ case AF_INET6:
+ ipv6_addr_len = sizeof(ipv6_addr);
+ getsockname(socket_fd, &ipv6_addr, &ipv6_addr_len);
+ return ntohs(ipv6_addr.sin6_port);
+
+ default:
+ return 0;
+ }
+}
+
+static int bind_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = bind(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = bind(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = bind(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int bind_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return bind_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+static int connect_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = connect(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = connect(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = connect(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = -EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int connect_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+FIXTURE(protocol)
+{
+ struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+};
+
+FIXTURE_VARIANT(protocol)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+FIXTURE_SETUP(protocol)
+{
+ const struct protocol_variant prot_unspec = {
+ .domain = AF_UNSPEC,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+ ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
+
+ ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+
+ ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
+ self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(protocol)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+static void test_bind_and_connect(struct __test_metadata *const _metadata,
+ const struct service_fixture *const srv,
+ const bool deny_bind, const bool deny_connect)
+{
+ char buf = '\0';
+ int inval_fd, bind_fd, client_fd, status, ret;
+ pid_t child;
+
+ /* Starts invalid addrlen tests with bind. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd)
+ {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+
+ /* Tries to bind with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to bind with too small addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to bind with minimal addrlen. */
+ ret = bind_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to socket: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts invalid addrlen tests with connect. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd);
+
+ /* Tries to connect with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to connect with too small addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to connect with minimal addrlen. */
+ ret = connect_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (srv->protocol.domain == AF_UNIX) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (srv->protocol.type == SOCK_STREAM) {
+ /* No listening server, whatever the value of deny_bind. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to connect to socket: %s",
+ strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts connection tests. */
+ bind_fd = socket_variant(srv);
+ ASSERT_LE(0, bind_fd);
+
+ ret = bind_variant(bind_fd, srv);
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+
+ /* Creates a listening socket. */
+ if (srv->protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Starts connection tests. */
+ connect_fd = socket_variant(srv);
+ ASSERT_LE(0, connect_fd);
+ ret = connect_variant(connect_fd, srv);
+ if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (deny_bind) {
+ /* No listening server. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, write(connect_fd, ".", 1));
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ /* Accepts connection from the child. */
+ client_fd = bind_fd;
+ if (!deny_bind && !deny_connect) {
+ if (srv->protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(1, read(client_fd, &buf, 1));
+ EXPECT_EQ('.', buf);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, bind)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_connect_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows connect and denies bind for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_connect_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Binds a socket to the first port. */
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ /* Binds a socket to the second port. */
+ test_bind_and_connect(_metadata, &self->srv1,
+ is_restricted(&variant->prot, variant->sandbox),
+ false);
+
+ /* Binds a socket to the third port. */
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, connect)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows bind and denies connect for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ test_bind_and_connect(_metadata, &self->srv1, false,
+ is_restricted(&variant->prot, variant->sandbox));
+
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, bind_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, ret;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Allowed bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to unspec/any socket: %s",
+ strerror(errno));
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies bind. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Denied bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Checks bind with AF_UNSPEC and the loopback address. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ ret = bind_variant(bind_fd, &self->unspec_srv0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(-EAFNOSUPPORT, ret);
+ } else {
+ EXPECT_EQ(-EINVAL, ret)
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, connect_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, client_fd, status;
+ pid_t child;
+
+ /* Specific connection tests. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ if (self->srv0.protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+ EXPECT_EQ(0, connect_variant(connect_fd, &self->srv0));
+
+ /* Tries to connect again, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &tcp_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Disconnects already connected socket, or set peer. */
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ /* Tries to reconnect, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies connect. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ /* Always allowed to disconnect. */
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ client_fd = bind_fd;
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+FIXTURE(ipv4)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(ipv4)
+{
+ const enum sandbox_type sandbox;
+ const int type;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_SETUP(ipv4)
+{
+ const struct protocol_variant prot = {
+ .domain = AF_INET,
+ .type = variant->type,
+ };
+
+ disable_caps(_metadata);
+
+ set_service(&self->srv0, prot, 0);
+ set_service(&self->srv1, prot, 1);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4)
+{
+}
+
+TEST_F(ipv4, from_unix_to_inet)
+{
+ int unix_stream_fd, unix_dgram_fd;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ /* Denies connect and bind to check errno value. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for srv0. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ unix_stream_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_stream_fd);
+
+ unix_dgram_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_dgram_fd);
+
+ /* Checks unix stream bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv0));
+
+ /* Checks unix stream bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv1))
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv1));
+
+ /* Checks unix datagram bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv0));
+
+ /* Checks unix datagram bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv1));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv1));
+}
+
+FIXTURE(tcp_layers)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(tcp_layers)
+{
+ const size_t num_layers;
+ const int domain;
+};
+
+FIXTURE_SETUP(tcp_layers)
+{
+ const struct protocol_variant prot = {
+ .domain = variant->domain,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, prot, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(tcp_layers)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 3,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 3,
+};
+
+TEST_F(tcp_layers, ruleset_overlap)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+
+ if (variant->num_layers >= 1) {
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ /* Also allows bind, but allows connect too. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Only allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Try to allow bind and connect. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /*
+ * Forbids to connect to the socket because only one ruleset layer
+ * allows connect.
+ */
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 2);
+}
+
+TEST_F(tcp_layers, ruleset_expand)
+{
+ if (variant->num_layers >= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ /* Allows bind for srv0. */
+ const struct landlock_net_port_attr bind_srv0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_srv0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ /* Expands network mask with connect action. */
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows bind for srv0 and connect to srv0. */
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ /* Try to allow bind for srv1. */
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows connect to srv0, without bind rule. */
+ const struct landlock_net_port_attr tcp_bind_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 3);
+
+ test_bind_and_connect(_metadata, &self->srv1, variant->num_layers >= 1,
+ variant->num_layers >= 2);
+}
+
+/* clang-format off */
+FIXTURE(mini) {};
+/* clang-format on */
+
+FIXTURE_SETUP(mini)
+{
+ disable_caps(_metadata);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(mini)
+{
+}
+
+/* clang-format off */
+
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+
+#define ACCESS_ALL ( \
+ LANDLOCK_ACCESS_NET_BIND_TCP | \
+ LANDLOCK_ACCESS_NET_CONNECT_TCP)
+
+/* clang-format on */
+
+TEST_F(mini, network_access_rights)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = ACCESS_ALL,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+ net_port.allowed_access = access;
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0))
+ {
+ TH_LOG("Failed to add rule with access 0x%llx: %s",
+ access, strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+/* Checks invalid attribute, out of landlock network access range. */
+TEST_F(mini, ruleset_with_unknown_access)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = access_mask,
+ };
+
+ EXPECT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_F(mini, rule_with_unknown_access)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = ACCESS_ALL,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) {
+ net_port.allowed_access = access;
+ EXPECT_EQ(-1,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F(mini, rule_with_unhandled_access)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1; access > 0; access <<= 1) {
+ int err;
+
+ net_port.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0);
+ if (access == ruleset_attr.handled_access_net) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EINVAL, errno);
+ }
+ }
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F(mini, inval)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_denied = {
+ .allowed_access = 0,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks unhandled allowed_access. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Checks zero access value. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_denied, 0));
+ EXPECT_EQ(ENOMSG, errno);
+
+ /* Adds with legitimate values. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+}
+
+TEST_F(mini, tcp_port_overflow)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr port_max_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_max_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_overflow1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 1,
+ };
+ const struct landlock_net_port_attr port_overflow2 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 2,
+ };
+ const struct landlock_net_port_attr port_overflow3 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 1UL,
+ };
+ const struct landlock_net_port_attr port_overflow4 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 2UL,
+ };
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+ struct service_fixture srv_denied, srv_max_allowed;
+ int ruleset_fd;
+
+ ASSERT_EQ(0, set_service(&srv_denied, ipv4_tcp, 0));
+
+ /* Be careful to avoid port inconsistencies. */
+ srv_max_allowed = srv_denied;
+ srv_max_allowed.port = port_max_bind.port;
+ srv_max_allowed.ipv4_addr.sin_port = htons(port_max_bind.port);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_bind, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow1, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow2, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow3, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Interleaves with invalid rule additions. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_connect, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow4, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ test_bind_and_connect(_metadata, &srv_denied, true, true);
+ test_bind_and_connect(_metadata, &srv_max_allowed, false, false);
+}
+
+FIXTURE(ipv4_tcp)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_SETUP(ipv4_tcp)
+{
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, ipv4_tcp, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, ipv4_tcp, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4_tcp)
+{
+}
+
+TEST_F(ipv4_tcp, port_endianness)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr bind_host_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ /* Host port format. */
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr connect_big_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Big endian port format. */
+ .port = htons(self->srv0.port),
+ };
+ const struct landlock_net_port_attr bind_connect_host_endian_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Host port format. */
+ .port = self->srv1.port,
+ };
+ const unsigned int one = 1;
+ const char little_endian = *(const char *)&one;
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_host_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &connect_big_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_connect_host_endian_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* No restriction for big endinan CPU. */
+ test_bind_and_connect(_metadata, &self->srv0, false, little_endian);
+
+ /* No restriction for any CPU. */
+ test_bind_and_connect(_metadata, &self->srv1, false, false);
+}
+
+TEST_F(ipv4_tcp, with_fs)
+{
+ const struct landlock_ruleset_attr ruleset_attr_fs_net = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ .parent_fd = -1,
+ };
+ struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd, bind_fd, dir_fd;
+
+ /* Creates ruleset both for filesystem and network access. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr_fs_net,
+ sizeof(ruleset_attr_fs_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds a filesystem rule. */
+ path_beneath.parent_fd = open("/dev", O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ EXPECT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Adds a network rule. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Tests file access. */
+ dir_fd = open("/dev", O_RDONLY);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ dir_fd = open("/", O_RDONLY);
+ EXPECT_EQ(-1, dir_fd);
+ EXPECT_EQ(EACCES, errno);
+
+ /* Tests port binding. */
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(-EACCES, bind_variant(bind_fd, &self->srv1));
+}
+
+FIXTURE(port_specific)
+{
+ struct service_fixture srv0;
+};
+
+FIXTURE_VARIANT(port_specific)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(port_specific)
+{
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(port_specific)
+{
+}
+
+TEST_F(port_specific, bind_connect_zero)
+{
+ int bind_fd, connect_fd, ret;
+ uint16_t port;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_zero = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 0,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks zero port value on bind and connect actions. */
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_zero, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 0 for both protocol families. */
+ set_port(&self->srv0, 0);
+ /*
+ * Binds on port 0, which selects a random port within
+ * ip_local_port_range.
+ */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on port 0. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(-ECONNREFUSED, ret);
+
+ /* Sets binded port for both protocol families. */
+ port = get_binded_port(bind_fd, &variant->prot);
+ EXPECT_NE(0, port);
+ set_port(&self->srv0, port);
+ /* Connects on the binded port. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ /* Denied by Landlock. */
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(port_specific, bind_connect_1023)
+{
+ int bind_fd, connect_fd, ret;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ /* A rule with port value less than 1024. */
+ const struct landlock_net_port_attr tcp_bind_connect_low_range = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1023,
+ };
+ /* A rule with 1024 port. */
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1024,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_low_range, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1023 for both protocol families. */
+ set_port(&self->srv0, 1023);
+ /* Binds on port 1023. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ /* Denied by the system. */
+ EXPECT_EQ(-EACCES, ret);
+
+ /* Binds on port 1023. */
+ set_cap(_metadata, CAP_NET_BIND_SERVICE);
+ ret = bind_variant(bind_fd, &self->srv0);
+ clear_cap(_metadata, CAP_NET_BIND_SERVICE);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1023. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1024 for both protocol families. */
+ set_port(&self->srv0, 1024);
+ /* Binds on port 1024. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1024. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+static int matches_log_tcp(const int audit_fd, const char *const blockers,
+ const char *const dir_addr, const char *const addr,
+ const char *const dir_port)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s %s=%s %s=1024$";
+ /*
+ * Max strlen(blockers): 16
+ * Max strlen(dir_addr): 5
+ * Max strlen(addr): 12
+ * Max strlen(dir_port): 4
+ */
+ char log_match[sizeof(log_template) + 37];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ blockers, dir_addr, addr, dir_port);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct service_fixture srv0;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit)
+{
+ const char *const addr;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv4) {
+ /* clang-format on */
+ .addr = "127\\.0\\.0\\.1",
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6) {
+ /* clang-format on */
+ .addr = "::1",
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(audit)
+{
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ setup_loopback(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+};
+
+FIXTURE_TEARDOWN(audit)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, bind)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr",
+ variant->addr, "src"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+TEST_F(audit, connect)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp",
+ "daddr", variant->addr, "dest"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
new file mode 100644
index 000000000000..4e356334ecb7
--- /dev/null
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Ptrace
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "audit.h"
+#include "common.h"
+
+/* Copied from security/yama/yama_lsm.c */
+#define YAMA_SCOPE_DISABLED 0
+#define YAMA_SCOPE_RELATIONAL 1
+
+static void create_domain(struct __test_metadata *const _metadata)
+{
+ int ruleset_fd;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
+ };
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static int test_ptrace_read(const pid_t pid)
+{
+ static const char path_template[] = "/proc/%d/environ";
+ char procenv_path[sizeof(path_template) + 10];
+ int procenv_path_size, fd;
+
+ procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
+ path_template, pid);
+ if (procenv_path_size >= sizeof(procenv_path))
+ return E2BIG;
+
+ fd = open(procenv_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return errno;
+ /*
+ * Mixing error codes from close(2) and open(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) != 0)
+ return errno;
+ return 0;
+}
+
+static int get_yama_ptrace_scope(void)
+{
+ int ret;
+ char buf[2] = {};
+ const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
+
+ if (fd < 0)
+ return 0;
+
+ if (read(fd, buf, 1) < 0) {
+ close(fd);
+ return -1;
+ }
+
+ ret = atoi(buf);
+ close(fd);
+ return ret;
+}
+
+/* clang-format off */
+FIXTURE(hierarchy) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(hierarchy)
+{
+ const bool domain_both;
+ const bool domain_parent;
+ const bool domain_child;
+};
+
+/*
+ * Test multiple tracing combinations between a parent process P1 and a child
+ * process P2.
+ *
+ * Yama's scoped ptrace is presumed disabled. If enabled, this optional
+ * restriction is enforced in addition to any Landlock check, which means that
+ * all P2 requests to trace P1 would be denied.
+ */
+
+/*
+ * No domain
+ *
+ * P1-. P1 -> P2 : allow
+ * \ P2 -> P1 : allow
+ * 'P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Child domain
+ *
+ * P1--. P1 -> P2 : allow
+ * \ P2 -> P1 : deny
+ * .'-----.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Parent domain
+ * .------.
+ * | P1 --. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : allow
+ * '
+ * P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Parent + child domain (siblings)
+ * .------.
+ * | P1 ---. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : deny
+ * .---'--.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = true,
+};
+
+/*
+ * Same domain (inherited)
+ * .-------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : allow
+ * | ' |
+ * | P2 |
+ * '-------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + child domain
+ * .-----------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : deny
+ * | .-'----. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Inherited + parent domain
+ * .-----------------.
+ * |.------. | P1 -> P2 : deny
+ * || P1 ----. | P2 -> P1 : allow
+ * |'------' \ |
+ * | ' |
+ * | P2 |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + parent and child domain (siblings)
+ * .-----------------.
+ * | .------. | P1 -> P2 : deny
+ * | | P1 . | P2 -> P1 : deny
+ * | '------'\ |
+ * | \ |
+ * | .--'---. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = true,
+};
+
+FIXTURE_SETUP(hierarchy)
+{
+}
+
+FIXTURE_TEARDOWN(hierarchy)
+{
+}
+
+/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+TEST_F(hierarchy, trace)
+{
+ pid_t child, parent;
+ int status, err_proc_read;
+ int pipe_child[2], pipe_parent[2];
+ int yama_ptrace_scope;
+ char buf_parent;
+ long ret;
+ bool can_read_child, can_trace_child, can_read_parent, can_trace_parent;
+
+ yama_ptrace_scope = get_yama_ptrace_scope();
+ ASSERT_LE(0, yama_ptrace_scope);
+
+ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
+ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
+ yama_ptrace_scope);
+
+ /*
+ * can_read_child is true if a parent process can read its child
+ * process, which is only the case when the parent process is not
+ * isolated from the child with a dedicated Landlock domain.
+ */
+ can_read_child = !variant->domain_parent;
+
+ /*
+ * can_trace_child is true if a parent process can trace its child
+ * process. This depends on two conditions:
+ * - The parent process is not isolated from the child with a dedicated
+ * Landlock domain.
+ * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL).
+ */
+ can_trace_child = can_read_child &&
+ yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL;
+
+ /*
+ * can_read_parent is true if a child process can read its parent
+ * process, which is only the case when the child process is not
+ * isolated from the parent with a dedicated Landlock domain.
+ */
+ can_read_parent = !variant->domain_child;
+
+ /*
+ * can_trace_parent is true if a child process can trace its parent
+ * process. This depends on two conditions:
+ * - The child process is not isolated from the parent with a dedicated
+ * Landlock domain.
+ * - Yama is disabled (YAMA_SCOPE_DISABLED).
+ */
+ can_trace_parent = can_read_parent &&
+ yama_ptrace_scope <= YAMA_SCOPE_DISABLED;
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
+ */
+ drop_caps(_metadata);
+
+ parent = getpid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ if (variant->domain_both) {
+ create_domain(_metadata);
+ if (!__test_passed(_metadata))
+ /* Aborts before forking. */
+ return;
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ ASSERT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, close(pipe_child[0]));
+ if (variant->domain_child)
+ create_domain(_metadata);
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ /* Tests PTRACE_MODE_READ on the parent. */
+ err_proc_read = test_ptrace_read(parent);
+ if (can_read_parent) {
+ EXPECT_EQ(0, err_proc_read);
+ } else {
+ EXPECT_EQ(EACCES, err_proc_read);
+ }
+
+ /* Tests PTRACE_ATTACH on the parent. */
+ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
+ if (can_trace_parent) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+ if (ret == 0) {
+ ASSERT_EQ(parent, waitpid(parent, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0));
+ }
+
+ /* Tests child PTRACE_TRACEME. */
+ ret = ptrace(PTRACE_TRACEME);
+ if (can_trace_child) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+
+ /*
+ * Signals that the PTRACE_ATTACH test is done and the
+ * PTRACE_TRACEME test is ongoing.
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ if (can_trace_child) {
+ ASSERT_EQ(0, raise(SIGSTOP));
+ }
+
+ /* Waits for the parent PTRACE_ATTACH test. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(pipe_child[1]));
+ ASSERT_EQ(0, close(pipe_parent[0]));
+ if (variant->domain_parent)
+ create_domain(_metadata);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /*
+ * Waits for the child to test PTRACE_ATTACH on the parent and start
+ * testing PTRACE_TRACEME.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+ if (can_trace_child) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+ } else {
+ /* The child should not be traced by the parent. */
+ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
+ EXPECT_EQ(ESRCH, errno);
+ }
+
+ /* Tests PTRACE_MODE_READ on the child. */
+ err_proc_read = test_ptrace_read(child);
+ if (can_read_child) {
+ EXPECT_EQ(0, err_proc_read);
+ } else {
+ EXPECT_EQ(EACCES, err_proc_read);
+ }
+
+ /* Tests PTRACE_ATTACH on the child. */
+ ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
+ if (can_trace_child) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+
+ if (ret == 0) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+ }
+
+ /* Signals that the parent PTRACE_ATTACH test is done. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+static int matches_log_ptrace(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=ptrace opid=%d ocomm=\"ptrace_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+TEST_F(audit, trace)
+{
+ pid_t child;
+ int status;
+ int pipe_child[2], pipe_parent[2];
+ int yama_ptrace_scope;
+ char buf_parent;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ yama_ptrace_scope = get_yama_ptrace_scope();
+ ASSERT_LE(0, yama_ptrace_scope);
+
+ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
+ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
+ yama_ptrace_scope);
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
+ */
+ drop_caps(_metadata);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ ASSERT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+ EXPECT_EQ(-1, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(EPERM, errno);
+ /* We should see the child process. */
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd,
+ getpid()));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ /* Checks for a domain creation. */
+ EXPECT_EQ(1, records.domain);
+
+ /*
+ * Signals that the PTRACE_ATTACH test is done and the
+ * PTRACE_TRACEME test is ongoing.
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits for the parent PTRACE_ATTACH test. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(pipe_child[1]));
+ ASSERT_EQ(0, close(pipe_parent[0]));
+ create_domain(_metadata);
+
+ /* Signals that the parent is in a domain. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /*
+ * Waits for the child to test PTRACE_ATTACH on the parent and start
+ * testing PTRACE_TRACEME.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* The child should not be traced by the parent. */
+ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
+ EXPECT_EQ(ESRCH, errno);
+
+ /* Tests PTRACE_ATTACH on the child. */
+ EXPECT_EQ(-1, ptrace(PTRACE_ATTACH, child, NULL, 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd, child));
+
+ /* Signals that the parent PTRACE_ATTACH test is done. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/sandbox-and-launch.c b/tools/testing/selftests/landlock/sandbox-and-launch.c
new file mode 100644
index 000000000000..3e32e1a51ac5
--- /dev/null
+++ b/tools/testing/selftests/landlock/sandbox-and-launch.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sandbox itself and execute another program (in a different mount point).
+ *
+ * Used by layout1.umount_sandboxer from fs_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+int main(int argc, char *argv[])
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int pipe_child, pipe_parent, ruleset_fd;
+ char buf;
+
+ /*
+ * The first argument must be the file descriptor number of a pipe.
+ * The second argument must be the program to execute.
+ */
+ if (argc != 4) {
+ fprintf(stderr, "Wrong number of arguments (not three)\n");
+ return 1;
+ }
+
+ pipe_child = atoi(argv[2]);
+ pipe_parent = atoi(argv[3]);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create ruleset");
+ return 1;
+ }
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("Failed to call prctl()");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+
+ if (close(ruleset_fd)) {
+ perror("Failed to close ruleset");
+ return 1;
+ }
+
+ /* Signals that we are sandboxed. */
+ errno = 0;
+ if (write(pipe_child, ".", 1) != 1) {
+ perror("Failed to write to the second argument");
+ return 1;
+ }
+
+ /* Waits for the parent to try to umount. */
+ if (read(pipe_parent, &buf, 1) != 1) {
+ perror("Failed to write to the third argument");
+ return 1;
+ }
+
+ /* Shifts arguments. */
+ argv[0] = argv[1];
+ argv[1] = argv[2];
+ argv[2] = argv[3];
+ argv[3] = NULL;
+ execve(argv[0], argv, NULL);
+ perror("Failed to execute the provided binary");
+ return 1;
+}
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
new file mode 100644
index 000000000000..6825082c079c
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -0,0 +1,1152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Abstract UNIX socket
+ *
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "audit.h"
+#include "common.h"
+#include "scoped_common.h"
+
+/* Number of pending connections queue to be hold. */
+const short backlog = 10;
+
+static void create_fs_domain(struct __test_metadata *const _metadata)
+{
+ int ruleset_fd;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+FIXTURE(scoped_domains)
+{
+ struct service_fixture stream_address, dgram_address;
+};
+
+#include "scoped_base_variants.h"
+
+FIXTURE_SETUP(scoped_domains)
+{
+ drop_caps(_metadata);
+
+ memset(&self->stream_address, 0, sizeof(self->stream_address));
+ memset(&self->dgram_address, 0, sizeof(self->dgram_address));
+ set_unix_address(&self->stream_address, 0);
+ set_unix_address(&self->dgram_address, 1);
+}
+
+FIXTURE_TEARDOWN(scoped_domains)
+{
+}
+
+/*
+ * Test unix_stream_connect() and unix_may_send() for a child connecting to its
+ * parent, when they have scoped domain or no domain.
+ */
+TEST_F(scoped_domains, connect_to_parent)
+{
+ pid_t child;
+ bool can_connect_to_parent;
+ int status;
+ int pipe_parent[2];
+ int stream_server, dgram_server;
+
+ /*
+ * can_connect_to_parent is true if a child process can connect to its
+ * parent process. This depends on the child process not being isolated
+ * from the parent with a dedicated Landlock domain.
+ */
+ can_connect_to_parent = !variant->domain_child;
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ if (variant->domain_both) {
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+ if (!__test_passed(_metadata))
+ return;
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int err;
+ int stream_client, dgram_client;
+ char buf_child;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ if (variant->domain_child)
+ create_scoped_domain(
+ _metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_client);
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ /* Waits for the server. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ err = connect(stream_client, &self->stream_address.unix_addr,
+ self->stream_address.unix_addr_len);
+ if (can_connect_to_parent) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ }
+ EXPECT_EQ(0, close(stream_client));
+
+ err = connect(dgram_client, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len);
+ if (can_connect_to_parent) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ }
+ EXPECT_EQ(0, close(dgram_client));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ if (variant->domain_parent)
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ stream_server = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_server);
+ dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server);
+ ASSERT_EQ(0, bind(stream_server, &self->stream_address.unix_addr,
+ self->stream_address.unix_addr_len));
+ ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len));
+ ASSERT_EQ(0, listen(stream_server, backlog));
+
+ /* Signals to child that the parent is listening. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(stream_server));
+ EXPECT_EQ(0, close(dgram_server));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+/*
+ * Test unix_stream_connect() and unix_may_send() for a parent connecting to
+ * its child, when they have scoped domain or no domain.
+ */
+TEST_F(scoped_domains, connect_to_child)
+{
+ pid_t child;
+ bool can_connect_to_child;
+ int err_stream, err_dgram, errno_stream, errno_dgram, status;
+ int pipe_child[2], pipe_parent[2];
+ char buf;
+ int stream_client, dgram_client;
+
+ /*
+ * can_connect_to_child is true if a parent process can connect to its
+ * child process. The parent process is not isolated from the child
+ * with a dedicated Landlock domain.
+ */
+ can_connect_to_child = !variant->domain_parent;
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ if (variant->domain_both) {
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+ if (!__test_passed(_metadata))
+ return;
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int stream_server, dgram_server;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+ if (variant->domain_child)
+ create_scoped_domain(
+ _metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+
+ stream_server = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_server);
+ dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server);
+ ASSERT_EQ(0,
+ bind(stream_server, &self->stream_address.unix_addr,
+ self->stream_address.unix_addr_len));
+ ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len));
+ ASSERT_EQ(0, listen(stream_server, backlog));
+
+ /* Signals to the parent that child is listening. */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits to connect. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ EXPECT_EQ(0, close(stream_server));
+ EXPECT_EQ(0, close(dgram_server));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ if (variant->domain_parent)
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_client);
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ /* Waits for the child to listen */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ err_stream = connect(stream_client, &self->stream_address.unix_addr,
+ self->stream_address.unix_addr_len);
+ errno_stream = errno;
+ err_dgram = connect(dgram_client, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len);
+ errno_dgram = errno;
+ if (can_connect_to_child) {
+ EXPECT_EQ(0, err_stream);
+ EXPECT_EQ(0, err_dgram);
+ } else {
+ EXPECT_EQ(-1, err_stream);
+ EXPECT_EQ(-1, err_dgram);
+ EXPECT_EQ(EPERM, errno_stream);
+ EXPECT_EQ(EPERM, errno_dgram);
+ }
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(stream_client));
+ EXPECT_EQ(0, close(dgram_client));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+FIXTURE(scoped_audit)
+{
+ struct service_fixture dgram_address;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(scoped_audit)
+{
+ disable_caps(_metadata);
+
+ memset(&self->dgram_address, 0, sizeof(self->dgram_address));
+ set_unix_address(&self->dgram_address, 1);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ drop_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(scoped_audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */
+#define ABSTRACT_SOCKET_PATH_PREFIX \
+ "0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D"
+
+/*
+ * Simpler version of scoped_domains.connect_to_child, but with audit tests.
+ */
+TEST_F(scoped_audit, connect_to_child)
+{
+ pid_t child;
+ int err_dgram, status;
+ int pipe_child[2], pipe_parent[2];
+ char buf;
+ int dgram_client;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int dgram_server;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+
+ dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server);
+ ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len));
+
+ /* Signals to the parent that child is listening. */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits to connect. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ EXPECT_EQ(0, close(dgram_server));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ /* Waits for the child to listen */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ err_dgram = connect(dgram_client, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len);
+ EXPECT_EQ(-1, err_dgram);
+ EXPECT_EQ(EPERM, errno);
+
+ EXPECT_EQ(
+ 0,
+ audit_match_record(
+ self->audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
+ "[0-9A-F]\\+$",
+ NULL));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(dgram_client));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+FIXTURE(scoped_vs_unscoped)
+{
+ struct service_fixture parent_stream_address, parent_dgram_address,
+ child_stream_address, child_dgram_address;
+};
+
+#include "scoped_multiple_domain_variants.h"
+
+FIXTURE_SETUP(scoped_vs_unscoped)
+{
+ drop_caps(_metadata);
+
+ memset(&self->parent_stream_address, 0,
+ sizeof(self->parent_stream_address));
+ set_unix_address(&self->parent_stream_address, 0);
+ memset(&self->parent_dgram_address, 0,
+ sizeof(self->parent_dgram_address));
+ set_unix_address(&self->parent_dgram_address, 1);
+ memset(&self->child_stream_address, 0,
+ sizeof(self->child_stream_address));
+ set_unix_address(&self->child_stream_address, 2);
+ memset(&self->child_dgram_address, 0,
+ sizeof(self->child_dgram_address));
+ set_unix_address(&self->child_dgram_address, 3);
+}
+
+FIXTURE_TEARDOWN(scoped_vs_unscoped)
+{
+}
+
+/*
+ * Test unix_stream_connect and unix_may_send for parent, child and
+ * grand child processes when they can have scoped or non-scoped domains.
+ */
+TEST_F(scoped_vs_unscoped, unix_scoping)
+{
+ pid_t child;
+ int status;
+ bool can_connect_to_parent, can_connect_to_child;
+ int pipe_parent[2];
+ int stream_server_parent, dgram_server_parent;
+
+ can_connect_to_child = (variant->domain_grand_child != SCOPE_SANDBOX);
+ can_connect_to_parent = (can_connect_to_child &&
+ (variant->domain_children != SCOPE_SANDBOX));
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ if (variant->domain_all == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+ else if (variant->domain_all == SCOPE_SANDBOX)
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int stream_server_child, dgram_server_child;
+ int pipe_child[2];
+ pid_t grand_child;
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+
+ if (variant->domain_children == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+ else if (variant->domain_children == SCOPE_SANDBOX)
+ create_scoped_domain(
+ _metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ grand_child = fork();
+ ASSERT_LE(0, grand_child);
+ if (grand_child == 0) {
+ char buf;
+ int stream_err, dgram_err, stream_errno, dgram_errno;
+ int stream_client, dgram_client;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ if (variant->domain_grand_child == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+ else if (variant->domain_grand_child == SCOPE_SANDBOX)
+ create_scoped_domain(
+ _metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_client);
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ stream_err = connect(
+ stream_client,
+ &self->child_stream_address.unix_addr,
+ self->child_stream_address.unix_addr_len);
+ stream_errno = errno;
+ dgram_err = connect(
+ dgram_client,
+ &self->child_dgram_address.unix_addr,
+ self->child_dgram_address.unix_addr_len);
+ dgram_errno = errno;
+ if (can_connect_to_child) {
+ EXPECT_EQ(0, stream_err);
+ EXPECT_EQ(0, dgram_err);
+ } else {
+ EXPECT_EQ(-1, stream_err);
+ EXPECT_EQ(-1, dgram_err);
+ EXPECT_EQ(EPERM, stream_errno);
+ EXPECT_EQ(EPERM, dgram_errno);
+ }
+
+ EXPECT_EQ(0, close(stream_client));
+ stream_client = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_client);
+ /* Datagram sockets can "reconnect". */
+
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ stream_err = connect(
+ stream_client,
+ &self->parent_stream_address.unix_addr,
+ self->parent_stream_address.unix_addr_len);
+ stream_errno = errno;
+ dgram_err = connect(
+ dgram_client,
+ &self->parent_dgram_address.unix_addr,
+ self->parent_dgram_address.unix_addr_len);
+ dgram_errno = errno;
+ if (can_connect_to_parent) {
+ EXPECT_EQ(0, stream_err);
+ EXPECT_EQ(0, dgram_err);
+ } else {
+ EXPECT_EQ(-1, stream_err);
+ EXPECT_EQ(-1, dgram_err);
+ EXPECT_EQ(EPERM, stream_errno);
+ EXPECT_EQ(EPERM, dgram_errno);
+ }
+ EXPECT_EQ(0, close(stream_client));
+ EXPECT_EQ(0, close(dgram_client));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[0]));
+ if (variant->domain_child == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+ else if (variant->domain_child == SCOPE_SANDBOX)
+ create_scoped_domain(
+ _metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ stream_server_child = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_server_child);
+ dgram_server_child = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server_child);
+
+ ASSERT_EQ(0, bind(stream_server_child,
+ &self->child_stream_address.unix_addr,
+ self->child_stream_address.unix_addr_len));
+ ASSERT_EQ(0, bind(dgram_server_child,
+ &self->child_dgram_address.unix_addr,
+ self->child_dgram_address.unix_addr_len));
+ ASSERT_EQ(0, listen(stream_server_child, backlog));
+
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+ ASSERT_EQ(grand_child, waitpid(grand_child, &status, 0));
+ EXPECT_EQ(0, close(stream_server_child))
+ EXPECT_EQ(0, close(dgram_server_child));
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ if (variant->domain_parent == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+ else if (variant->domain_parent == SCOPE_SANDBOX)
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ stream_server_parent = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_server_parent);
+ dgram_server_parent = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server_parent);
+ ASSERT_EQ(0, bind(stream_server_parent,
+ &self->parent_stream_address.unix_addr,
+ self->parent_stream_address.unix_addr_len));
+ ASSERT_EQ(0, bind(dgram_server_parent,
+ &self->parent_dgram_address.unix_addr,
+ self->parent_dgram_address.unix_addr_len));
+
+ ASSERT_EQ(0, listen(stream_server_parent, backlog));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(stream_server_parent));
+ EXPECT_EQ(0, close(dgram_server_parent));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+FIXTURE(outside_socket)
+{
+ struct service_fixture address, transit_address;
+};
+
+FIXTURE_VARIANT(outside_socket)
+{
+ const bool child_socket;
+ const int type;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(outside_socket, allow_dgram_child) {
+ /* clang-format on */
+ .child_socket = true,
+ .type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(outside_socket, deny_dgram_server) {
+ /* clang-format on */
+ .child_socket = false,
+ .type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(outside_socket, allow_stream_child) {
+ /* clang-format on */
+ .child_socket = true,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(outside_socket, deny_stream_server) {
+ /* clang-format on */
+ .child_socket = false,
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_SETUP(outside_socket)
+{
+ drop_caps(_metadata);
+
+ memset(&self->transit_address, 0, sizeof(self->transit_address));
+ set_unix_address(&self->transit_address, 0);
+ memset(&self->address, 0, sizeof(self->address));
+ set_unix_address(&self->address, 1);
+}
+
+FIXTURE_TEARDOWN(outside_socket)
+{
+}
+
+/*
+ * Test unix_stream_connect and unix_may_send for parent and child processes
+ * when connecting socket has different domain than the process using it.
+ */
+TEST_F(outside_socket, socket_with_different_domain)
+{
+ pid_t child;
+ int err, status;
+ int pipe_child[2], pipe_parent[2];
+ char buf_parent;
+ int server_socket;
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int client_socket;
+ char buf_child;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Client always has a domain. */
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ if (variant->child_socket) {
+ int data_socket, passed_socket, stream_server;
+
+ passed_socket = socket(AF_UNIX, variant->type, 0);
+ ASSERT_LE(0, passed_socket);
+ stream_server = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_server);
+ ASSERT_EQ(0, bind(stream_server,
+ &self->transit_address.unix_addr,
+ self->transit_address.unix_addr_len));
+ ASSERT_EQ(0, listen(stream_server, backlog));
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+ data_socket = accept(stream_server, NULL, NULL);
+ ASSERT_LE(0, data_socket);
+ ASSERT_EQ(0, send_fd(data_socket, passed_socket));
+ EXPECT_EQ(0, close(passed_socket));
+ EXPECT_EQ(0, close(stream_server));
+ }
+
+ client_socket = socket(AF_UNIX, variant->type, 0);
+ ASSERT_LE(0, client_socket);
+
+ /* Waits for parent signal for connection. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ err = connect(client_socket, &self->address.unix_addr,
+ self->address.unix_addr_len);
+ if (variant->child_socket) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ }
+ EXPECT_EQ(0, close(client_socket));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ if (variant->child_socket) {
+ int client_child = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ ASSERT_LE(0, client_child);
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+ ASSERT_EQ(0, connect(client_child,
+ &self->transit_address.unix_addr,
+ self->transit_address.unix_addr_len));
+ server_socket = recv_fd(client_child);
+ EXPECT_EQ(0, close(client_child));
+ } else {
+ server_socket = socket(AF_UNIX, variant->type, 0);
+ }
+ ASSERT_LE(0, server_socket);
+
+ /* Server always has a domain. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ ASSERT_EQ(0, bind(server_socket, &self->address.unix_addr,
+ self->address.unix_addr_len));
+ if (variant->type == SOCK_STREAM)
+ ASSERT_EQ(0, listen(server_socket, backlog));
+
+ /* Signals to child that the parent is listening. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(server_socket));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+static const char stream_path[] = TMP_DIR "/stream.sock";
+static const char dgram_path[] = TMP_DIR "/dgram.sock";
+
+/* clang-format off */
+FIXTURE(various_address_sockets) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(various_address_sockets)
+{
+ const int domain;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(various_address_sockets, pathname_socket_scoped_domain) {
+ /* clang-format on */
+ .domain = SCOPE_SANDBOX,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(various_address_sockets, pathname_socket_other_domain) {
+ /* clang-format on */
+ .domain = OTHER_SANDBOX,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(various_address_sockets, pathname_socket_no_domain) {
+ /* clang-format on */
+ .domain = NO_SANDBOX,
+};
+
+FIXTURE_SETUP(various_address_sockets)
+{
+ drop_caps(_metadata);
+
+ umask(0077);
+ ASSERT_EQ(0, mkdir(TMP_DIR, 0700));
+}
+
+FIXTURE_TEARDOWN(various_address_sockets)
+{
+ EXPECT_EQ(0, unlink(stream_path));
+ EXPECT_EQ(0, unlink(dgram_path));
+ EXPECT_EQ(0, rmdir(TMP_DIR));
+}
+
+TEST_F(various_address_sockets, scoped_pathname_sockets)
+{
+ socklen_t size_stream, size_dgram;
+ pid_t child;
+ int status;
+ char buf_child, buf_parent;
+ int pipe_parent[2];
+ int unnamed_sockets[2];
+ int stream_pathname_socket, dgram_pathname_socket,
+ stream_abstract_socket, dgram_abstract_socket, data_socket;
+ struct service_fixture stream_abstract_addr, dgram_abstract_addr;
+ struct sockaddr_un stream_pathname_addr = {
+ .sun_family = AF_UNIX,
+ };
+ struct sockaddr_un dgram_pathname_addr = {
+ .sun_family = AF_UNIX,
+ };
+
+ /* Pathname address. */
+ snprintf(stream_pathname_addr.sun_path,
+ sizeof(stream_pathname_addr.sun_path), "%s", stream_path);
+ size_stream = offsetof(struct sockaddr_un, sun_path) +
+ strlen(stream_pathname_addr.sun_path);
+ snprintf(dgram_pathname_addr.sun_path,
+ sizeof(dgram_pathname_addr.sun_path), "%s", dgram_path);
+ size_dgram = offsetof(struct sockaddr_un, sun_path) +
+ strlen(dgram_pathname_addr.sun_path);
+
+ /* Abstract address. */
+ memset(&stream_abstract_addr, 0, sizeof(stream_abstract_addr));
+ set_unix_address(&stream_abstract_addr, 0);
+ memset(&dgram_abstract_addr, 0, sizeof(dgram_abstract_addr));
+ set_unix_address(&dgram_abstract_addr, 1);
+
+ /* Unnamed address for datagram socket. */
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM, 0, unnamed_sockets));
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int err;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(unnamed_sockets[1]));
+
+ if (variant->domain == SCOPE_SANDBOX)
+ create_scoped_domain(
+ _metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+ else if (variant->domain == OTHER_SANDBOX)
+ create_fs_domain(_metadata);
+
+ /* Waits for parent to listen. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Checks that we can send data through a datagram socket. */
+ ASSERT_EQ(1, write(unnamed_sockets[0], "a", 1));
+ EXPECT_EQ(0, close(unnamed_sockets[0]));
+
+ /* Connects with pathname sockets. */
+ stream_pathname_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_pathname_socket);
+ ASSERT_EQ(0, connect(stream_pathname_socket,
+ &stream_pathname_addr, size_stream));
+ ASSERT_EQ(1, write(stream_pathname_socket, "b", 1));
+ EXPECT_EQ(0, close(stream_pathname_socket));
+
+ /* Sends without connection. */
+ dgram_pathname_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_pathname_socket);
+ err = sendto(dgram_pathname_socket, "c", 1, 0,
+ &dgram_pathname_addr, size_dgram);
+ EXPECT_EQ(1, err);
+
+ /* Sends with connection. */
+ ASSERT_EQ(0, connect(dgram_pathname_socket,
+ &dgram_pathname_addr, size_dgram));
+ ASSERT_EQ(1, write(dgram_pathname_socket, "d", 1));
+ EXPECT_EQ(0, close(dgram_pathname_socket));
+
+ /* Connects with abstract sockets. */
+ stream_abstract_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_abstract_socket);
+ err = connect(stream_abstract_socket,
+ &stream_abstract_addr.unix_addr,
+ stream_abstract_addr.unix_addr_len);
+ if (variant->domain == SCOPE_SANDBOX) {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ } else {
+ EXPECT_EQ(0, err);
+ ASSERT_EQ(1, write(stream_abstract_socket, "e", 1));
+ }
+ EXPECT_EQ(0, close(stream_abstract_socket));
+
+ /* Sends without connection. */
+ dgram_abstract_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_abstract_socket);
+ err = sendto(dgram_abstract_socket, "f", 1, 0,
+ &dgram_abstract_addr.unix_addr,
+ dgram_abstract_addr.unix_addr_len);
+ if (variant->domain == SCOPE_SANDBOX) {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ } else {
+ EXPECT_EQ(1, err);
+ }
+
+ /* Sends with connection. */
+ err = connect(dgram_abstract_socket,
+ &dgram_abstract_addr.unix_addr,
+ dgram_abstract_addr.unix_addr_len);
+ if (variant->domain == SCOPE_SANDBOX) {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EPERM, errno);
+ } else {
+ EXPECT_EQ(0, err);
+ ASSERT_EQ(1, write(dgram_abstract_socket, "g", 1));
+ }
+ EXPECT_EQ(0, close(dgram_abstract_socket));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(unnamed_sockets[0]));
+
+ /* Sets up pathname servers. */
+ stream_pathname_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_pathname_socket);
+ ASSERT_EQ(0, bind(stream_pathname_socket, &stream_pathname_addr,
+ size_stream));
+ ASSERT_EQ(0, listen(stream_pathname_socket, backlog));
+
+ dgram_pathname_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_pathname_socket);
+ ASSERT_EQ(0, bind(dgram_pathname_socket, &dgram_pathname_addr,
+ size_dgram));
+
+ /* Sets up abstract servers. */
+ stream_abstract_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, stream_abstract_socket);
+ ASSERT_EQ(0,
+ bind(stream_abstract_socket, &stream_abstract_addr.unix_addr,
+ stream_abstract_addr.unix_addr_len));
+
+ dgram_abstract_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_abstract_socket);
+ ASSERT_EQ(0, bind(dgram_abstract_socket, &dgram_abstract_addr.unix_addr,
+ dgram_abstract_addr.unix_addr_len));
+ ASSERT_EQ(0, listen(stream_abstract_socket, backlog));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+
+ /* Reads from unnamed socket. */
+ ASSERT_EQ(1, read(unnamed_sockets[1], &buf_parent, sizeof(buf_parent)));
+ ASSERT_EQ('a', buf_parent);
+ EXPECT_LE(0, close(unnamed_sockets[1]));
+
+ /* Reads from pathname sockets. */
+ data_socket = accept(stream_pathname_socket, NULL, NULL);
+ ASSERT_LE(0, data_socket);
+ ASSERT_EQ(1, read(data_socket, &buf_parent, sizeof(buf_parent)));
+ ASSERT_EQ('b', buf_parent);
+ EXPECT_EQ(0, close(data_socket));
+ EXPECT_EQ(0, close(stream_pathname_socket));
+
+ ASSERT_EQ(1,
+ read(dgram_pathname_socket, &buf_parent, sizeof(buf_parent)));
+ ASSERT_EQ('c', buf_parent);
+ ASSERT_EQ(1,
+ read(dgram_pathname_socket, &buf_parent, sizeof(buf_parent)));
+ ASSERT_EQ('d', buf_parent);
+ EXPECT_EQ(0, close(dgram_pathname_socket));
+
+ if (variant->domain != SCOPE_SANDBOX) {
+ /* Reads from abstract sockets if allowed to send. */
+ data_socket = accept(stream_abstract_socket, NULL, NULL);
+ ASSERT_LE(0, data_socket);
+ ASSERT_EQ(1,
+ read(data_socket, &buf_parent, sizeof(buf_parent)));
+ ASSERT_EQ('e', buf_parent);
+ EXPECT_EQ(0, close(data_socket));
+
+ ASSERT_EQ(1, read(dgram_abstract_socket, &buf_parent,
+ sizeof(buf_parent)));
+ ASSERT_EQ('f', buf_parent);
+ ASSERT_EQ(1, read(dgram_abstract_socket, &buf_parent,
+ sizeof(buf_parent)));
+ ASSERT_EQ('g', buf_parent);
+ }
+
+ /* Waits for all abstract socket tests. */
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(stream_abstract_socket));
+ EXPECT_EQ(0, close(dgram_abstract_socket));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+TEST(datagram_sockets)
+{
+ struct service_fixture connected_addr, non_connected_addr;
+ int server_conn_socket, server_unconn_socket;
+ int pipe_parent[2], pipe_child[2];
+ int status;
+ char buf;
+ pid_t child;
+
+ drop_caps(_metadata);
+ memset(&connected_addr, 0, sizeof(connected_addr));
+ set_unix_address(&connected_addr, 0);
+ memset(&non_connected_addr, 0, sizeof(non_connected_addr));
+ set_unix_address(&non_connected_addr, 1);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int client_conn_socket, client_unconn_socket;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ client_conn_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ client_unconn_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, client_conn_socket);
+ ASSERT_LE(0, client_unconn_socket);
+
+ /* Waits for parent to listen. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ ASSERT_EQ(0,
+ connect(client_conn_socket, &connected_addr.unix_addr,
+ connected_addr.unix_addr_len));
+
+ /*
+ * Both connected and non-connected sockets can send data when
+ * the domain is not scoped.
+ */
+ ASSERT_EQ(1, send(client_conn_socket, ".", 1, 0));
+ ASSERT_EQ(1, sendto(client_unconn_socket, ".", 1, 0,
+ &non_connected_addr.unix_addr,
+ non_connected_addr.unix_addr_len));
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Scopes the domain. */
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /*
+ * Connected socket sends data to the receiver, but the
+ * non-connected socket must fail to send data.
+ */
+ ASSERT_EQ(1, send(client_conn_socket, ".", 1, 0));
+ ASSERT_EQ(-1, sendto(client_unconn_socket, ".", 1, 0,
+ &non_connected_addr.unix_addr,
+ non_connected_addr.unix_addr_len));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ EXPECT_EQ(0, close(client_conn_socket));
+ EXPECT_EQ(0, close(client_unconn_socket));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ server_conn_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ server_unconn_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, server_conn_socket);
+ ASSERT_LE(0, server_unconn_socket);
+
+ ASSERT_EQ(0, bind(server_conn_socket, &connected_addr.unix_addr,
+ connected_addr.unix_addr_len));
+ ASSERT_EQ(0, bind(server_unconn_socket, &non_connected_addr.unix_addr,
+ non_connected_addr.unix_addr_len));
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /* Waits for child to test. */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ ASSERT_EQ(1, recv(server_conn_socket, &buf, 1, 0));
+ ASSERT_EQ(1, recv(server_unconn_socket, &buf, 1, 0));
+
+ /*
+ * Connected datagram socket will receive data, but
+ * non-connected datagram socket does not receive data.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ ASSERT_EQ(1, recv(server_conn_socket, &buf, 1, 0));
+
+ /* Waits for all tests to finish. */
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(server_conn_socket));
+ EXPECT_EQ(0, close(server_unconn_socket));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+TEST(self_connect)
+{
+ struct service_fixture connected_addr, non_connected_addr;
+ int connected_socket, non_connected_socket, status;
+ pid_t child;
+
+ drop_caps(_metadata);
+ memset(&connected_addr, 0, sizeof(connected_addr));
+ set_unix_address(&connected_addr, 0);
+ memset(&non_connected_addr, 0, sizeof(non_connected_addr));
+ set_unix_address(&non_connected_addr, 1);
+
+ connected_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ non_connected_socket = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, connected_socket);
+ ASSERT_LE(0, non_connected_socket);
+
+ ASSERT_EQ(0, bind(connected_socket, &connected_addr.unix_addr,
+ connected_addr.unix_addr_len));
+ ASSERT_EQ(0, bind(non_connected_socket, &non_connected_addr.unix_addr,
+ non_connected_addr.unix_addr_len));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /* Child's domain is scoped. */
+ create_scoped_domain(_metadata,
+ LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /*
+ * The child inherits the sockets, and cannot connect or
+ * send data to them.
+ */
+ ASSERT_EQ(-1,
+ connect(connected_socket, &connected_addr.unix_addr,
+ connected_addr.unix_addr_len));
+ ASSERT_EQ(EPERM, errno);
+
+ ASSERT_EQ(-1, sendto(connected_socket, ".", 1, 0,
+ &connected_addr.unix_addr,
+ connected_addr.unix_addr_len));
+ ASSERT_EQ(EPERM, errno);
+
+ ASSERT_EQ(-1, sendto(non_connected_socket, ".", 1, 0,
+ &non_connected_addr.unix_addr,
+ non_connected_addr.unix_addr_len));
+ ASSERT_EQ(EPERM, errno);
+
+ EXPECT_EQ(0, close(connected_socket));
+ EXPECT_EQ(0, close(non_connected_socket));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ /* Waits for all tests to finish. */
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(0, close(connected_socket));
+ EXPECT_EQ(0, close(non_connected_socket));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/scoped_base_variants.h b/tools/testing/selftests/landlock/scoped_base_variants.h
new file mode 100644
index 000000000000..d3b1fa8a584e
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_base_variants.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock scoped_domains variants
+ *
+ * See the hierarchy variants from ptrace_test.c
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+/* clang-format on */
+FIXTURE_VARIANT(scoped_domains)
+{
+ bool domain_both;
+ bool domain_parent;
+ bool domain_child;
+};
+
+/*
+ * No domain
+ *
+ * P1-. P1 -> P2 : allow
+ * \ P2 -> P1 : allow
+ * 'P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, without_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Child domain
+ *
+ * P1--. P1 -> P2 : allow
+ * \ P2 -> P1 : deny
+ * .'-----.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, child_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Parent domain
+ * .------.
+ * | P1 --. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : allow
+ * '
+ * P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, parent_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Parent + child domain (siblings)
+ * .------.
+ * | P1 ---. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : deny
+ * .---'--.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, sibling_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = true,
+};
+
+/*
+ * Same domain (inherited)
+ * .-------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : allow
+ * | ' |
+ * | P2 |
+ * '-------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, inherited_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + child domain
+ * .-----------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : deny
+ * | .-'----. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, nested_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Inherited + parent domain
+ * .-----------------.
+ * |.------. | P1 -> P2 : deny
+ * || P1 ----. | P2 -> P1 : allow
+ * |'------' \ |
+ * | ' |
+ * | P2 |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, nested_and_parent_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + parent and child domain (siblings)
+ * .-----------------.
+ * | .------. | P1 -> P2 : deny
+ * | | P1 . | P2 -> P1 : deny
+ * | '------'\ |
+ * | \ |
+ * | .--'---. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_domains, forked_domains) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = true,
+};
diff --git a/tools/testing/selftests/landlock/scoped_common.h b/tools/testing/selftests/landlock/scoped_common.h
new file mode 100644
index 000000000000..a9a912d30c4d
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_common.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock scope test helpers
+ *
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+
+static void create_scoped_domain(struct __test_metadata *const _metadata,
+ const __u16 scope)
+{
+ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = scope,
+ };
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+}
diff --git a/tools/testing/selftests/landlock/scoped_multiple_domain_variants.h b/tools/testing/selftests/landlock/scoped_multiple_domain_variants.h
new file mode 100644
index 000000000000..bcd9a83805d0
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_multiple_domain_variants.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock variants for three processes with various domains.
+ *
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+enum sandbox_type {
+ NO_SANDBOX,
+ SCOPE_SANDBOX,
+ /* Any other type of sandboxing domain */
+ OTHER_SANDBOX,
+};
+
+/* clang-format on */
+FIXTURE_VARIANT(scoped_vs_unscoped)
+{
+ const int domain_all;
+ const int domain_parent;
+ const int domain_children;
+ const int domain_child;
+ const int domain_grand_child;
+};
+
+/*
+ * .-----------------.
+ * | ####### | P3 -> P2 : allow
+ * | P1----# P2 # | P3 -> P1 : deny
+ * | # | # |
+ * | # P3 # |
+ * | ####### |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, deny_scoped) {
+ .domain_all = OTHER_SANDBOX,
+ .domain_parent = NO_SANDBOX,
+ .domain_children = SCOPE_SANDBOX,
+ .domain_child = NO_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * ###################
+ * # ####### # P3 -> P2 : allow
+ * # P1----# P2 # # P3 -> P1 : deny
+ * # # | # #
+ * # # P3 # #
+ * # ####### #
+ * ###################
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, all_scoped) {
+ .domain_all = SCOPE_SANDBOX,
+ .domain_parent = NO_SANDBOX,
+ .domain_children = SCOPE_SANDBOX,
+ .domain_child = NO_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * .-----------------.
+ * | .-----. | P3 -> P2 : allow
+ * | P1----| P2 | | P3 -> P1 : allow
+ * | | | |
+ * | | P3 | |
+ * | '-----' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, allow_with_other_domain) {
+ .domain_all = OTHER_SANDBOX,
+ .domain_parent = NO_SANDBOX,
+ .domain_children = OTHER_SANDBOX,
+ .domain_child = NO_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * .----. ###### P3 -> P2 : allow
+ * | P1 |----# P2 # P3 -> P1 : allow
+ * '----' ######
+ * |
+ * P3
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, allow_with_one_domain) {
+ .domain_all = NO_SANDBOX,
+ .domain_parent = OTHER_SANDBOX,
+ .domain_children = NO_SANDBOX,
+ .domain_child = SCOPE_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * ###### .-----. P3 -> P2 : allow
+ * # P1 #----| P2 | P3 -> P1 : allow
+ * ###### '-----'
+ * |
+ * P3
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, allow_with_grand_parent_scoped) {
+ .domain_all = NO_SANDBOX,
+ .domain_parent = SCOPE_SANDBOX,
+ .domain_children = NO_SANDBOX,
+ .domain_child = OTHER_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * ###### ###### P3 -> P2 : allow
+ * # P1 #----# P2 # P3 -> P1 : allow
+ * ###### ######
+ * |
+ * .----.
+ * | P3 |
+ * '----'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, allow_with_parents_domain) {
+ .domain_all = NO_SANDBOX,
+ .domain_parent = SCOPE_SANDBOX,
+ .domain_children = NO_SANDBOX,
+ .domain_child = SCOPE_SANDBOX,
+ .domain_grand_child = NO_SANDBOX,
+ /* clang-format on */
+};
+
+/*
+ * ###### P3 -> P2 : deny
+ * # P1 #----P2 P3 -> P1 : deny
+ * ###### |
+ * |
+ * ######
+ * # P3 #
+ * ######
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoped_vs_unscoped, deny_with_self_and_grandparent_domain) {
+ .domain_all = NO_SANDBOX,
+ .domain_parent = SCOPE_SANDBOX,
+ .domain_children = NO_SANDBOX,
+ .domain_child = NO_SANDBOX,
+ .domain_grand_child = SCOPE_SANDBOX,
+ /* clang-format on */
+};
diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c
new file mode 100644
index 000000000000..d8bf33417619
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_signal_test.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Signal Scoping
+ *
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+#include "scoped_common.h"
+
+/* This variable is used for handling several signals. */
+static volatile sig_atomic_t is_signaled;
+
+/* clang-format off */
+FIXTURE(scoping_signals) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(scoping_signals)
+{
+ int sig;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoping_signals, sigtrap) {
+ /* clang-format on */
+ .sig = SIGTRAP,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoping_signals, sigurg) {
+ /* clang-format on */
+ .sig = SIGURG,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoping_signals, sighup) {
+ /* clang-format on */
+ .sig = SIGHUP,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(scoping_signals, sigtstp) {
+ /* clang-format on */
+ .sig = SIGTSTP,
+};
+
+FIXTURE_SETUP(scoping_signals)
+{
+ drop_caps(_metadata);
+
+ is_signaled = 0;
+}
+
+FIXTURE_TEARDOWN(scoping_signals)
+{
+}
+
+static void scope_signal_handler(int sig, siginfo_t *info, void *ucontext)
+{
+ if (sig == SIGTRAP || sig == SIGURG || sig == SIGHUP || sig == SIGTSTP)
+ is_signaled = 1;
+}
+
+/*
+ * In this test, a child process sends a signal to parent before and
+ * after getting scoped.
+ */
+TEST_F(scoping_signals, send_sig_to_parent)
+{
+ int pipe_parent[2];
+ int status;
+ pid_t child;
+ pid_t parent = getpid();
+ struct sigaction action = {
+ .sa_sigaction = scope_signal_handler,
+ .sa_flags = SA_SIGINFO,
+
+ };
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ ASSERT_LE(0, sigaction(variant->sig, &action, NULL));
+
+ /* The process should not have already been signaled. */
+ EXPECT_EQ(0, is_signaled);
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+ int err;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+
+ /*
+ * The child process can send signal to parent when
+ * domain is not scoped.
+ */
+ err = kill(parent, variant->sig);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ /*
+ * The child process cannot send signal to the parent
+ * anymore.
+ */
+ err = kill(parent, variant->sig);
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EPERM, errno);
+
+ /*
+ * No matter of the domain, a process should be able to
+ * send a signal to itself.
+ */
+ ASSERT_EQ(0, is_signaled);
+ ASSERT_EQ(0, raise(variant->sig));
+ ASSERT_EQ(1, is_signaled);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Waits for a first signal to be received, without race condition. */
+ while (!is_signaled && !usleep(1))
+ ;
+ ASSERT_EQ(1, is_signaled);
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ is_signaled = 0;
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ EXPECT_EQ(0, is_signaled);
+}
+
+/* clang-format off */
+FIXTURE(scoped_domains) {};
+/* clang-format on */
+
+#include "scoped_base_variants.h"
+
+FIXTURE_SETUP(scoped_domains)
+{
+ drop_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN(scoped_domains)
+{
+}
+
+/*
+ * This test ensures that a scoped process cannot send signal out of
+ * scoped domain.
+ */
+TEST_F(scoped_domains, check_access_signal)
+{
+ pid_t child;
+ pid_t parent = getpid();
+ int status;
+ bool can_signal_child, can_signal_parent;
+ int pipe_parent[2], pipe_child[2];
+ char buf_parent;
+ int err;
+
+ can_signal_parent = !variant->domain_child;
+ can_signal_child = !variant->domain_parent;
+
+ if (variant->domain_both)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+
+ if (variant->domain_child)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ /* Waits for the parent to send signals. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ err = kill(parent, 0);
+ if (can_signal_parent) {
+ ASSERT_EQ(0, err);
+ } else {
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EPERM, errno);
+ }
+ /*
+ * No matter of the domain, a process should be able to
+ * send a signal to itself.
+ */
+ ASSERT_EQ(0, raise(0));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ if (variant->domain_parent)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ err = kill(child, 0);
+ if (can_signal_child) {
+ ASSERT_EQ(0, err);
+ } else {
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EPERM, errno);
+ }
+ ASSERT_EQ(0, raise(0));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+enum thread_return {
+ THREAD_INVALID = 0,
+ THREAD_SUCCESS = 1,
+ THREAD_ERROR = 2,
+ THREAD_TEST_FAILED = 3,
+};
+
+static void *thread_sync(void *arg)
+{
+ const int pipe_read = *(int *)arg;
+ char buf;
+
+ if (read(pipe_read, &buf, 1) != 1)
+ return (void *)THREAD_ERROR;
+
+ return (void *)THREAD_SUCCESS;
+}
+
+TEST(signal_scoping_thread_before)
+{
+ pthread_t no_sandbox_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
+
+ drop_caps(_metadata);
+ ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
+
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync,
+ &thread_pipe[0]));
+
+ /* Enforces restriction after creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
+
+ EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
+ EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ EXPECT_EQ(0, close(thread_pipe[0]));
+ EXPECT_EQ(0, close(thread_pipe[1]));
+}
+
+TEST(signal_scoping_thread_after)
+{
+ pthread_t scoped_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
+
+ drop_caps(_metadata);
+ ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
+
+ /* Enforces restriction before creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync,
+ &thread_pipe[0]));
+
+ EXPECT_EQ(0, pthread_kill(scoped_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
+
+ EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret));
+ EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ EXPECT_EQ(0, close(thread_pipe[0]));
+ EXPECT_EQ(0, close(thread_pipe[1]));
+}
+
+struct thread_setuid_args {
+ int pipe_read, new_uid;
+};
+
+void *thread_setuid(void *ptr)
+{
+ const struct thread_setuid_args *arg = ptr;
+ char buf;
+
+ if (read(arg->pipe_read, &buf, 1) != 1)
+ return (void *)THREAD_ERROR;
+
+ /* libc's setuid() should update all thread's credentials. */
+ if (getuid() != arg->new_uid)
+ return (void *)THREAD_TEST_FAILED;
+
+ return (void *)THREAD_SUCCESS;
+}
+
+TEST(signal_scoping_thread_setuid)
+{
+ struct thread_setuid_args arg;
+ pthread_t no_sandbox_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int pipe_parent[2];
+ int prev_uid;
+
+ disable_caps(_metadata);
+
+ /* This test does not need to be run as root. */
+ prev_uid = getuid();
+ arg.new_uid = prev_uid + 1;
+ EXPECT_LT(0, arg.new_uid);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ arg.pipe_read = pipe_parent[0];
+
+ /* Capabilities must be set before creating a new thread. */
+ set_cap(_metadata, CAP_SETUID);
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid,
+ &arg));
+
+ /* Enforces restriction after creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ EXPECT_NE(arg.new_uid, getuid());
+ EXPECT_EQ(0, setuid(arg.new_uid));
+ EXPECT_EQ(arg.new_uid, getuid());
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
+ EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ clear_cap(_metadata, CAP_SETUID);
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+}
+
+const short backlog = 10;
+
+static volatile sig_atomic_t signal_received;
+
+static void handle_sigurg(int sig)
+{
+ if (sig == SIGURG)
+ signal_received = 1;
+ else
+ signal_received = -1;
+}
+
+static int setup_signal_handler(int signal)
+{
+ struct sigaction sa = {
+ .sa_handler = handle_sigurg,
+ };
+
+ if (sigemptyset(&sa.sa_mask))
+ return -1;
+
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+ return sigaction(SIGURG, &sa, NULL);
+}
+
+/* clang-format off */
+FIXTURE(fown) {};
+/* clang-format on */
+
+enum fown_sandbox {
+ SANDBOX_NONE,
+ SANDBOX_BEFORE_FORK,
+ SANDBOX_BEFORE_SETOWN,
+ SANDBOX_AFTER_SETOWN,
+};
+
+FIXTURE_VARIANT(fown)
+{
+ const enum fown_sandbox sandbox_setown;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(fown, no_sandbox) {
+ /* clang-format on */
+ .sandbox_setown = SANDBOX_NONE,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(fown, sandbox_before_fork) {
+ /* clang-format on */
+ .sandbox_setown = SANDBOX_BEFORE_FORK,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(fown, sandbox_before_setown) {
+ /* clang-format on */
+ .sandbox_setown = SANDBOX_BEFORE_SETOWN,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(fown, sandbox_after_setown) {
+ /* clang-format on */
+ .sandbox_setown = SANDBOX_AFTER_SETOWN,
+};
+
+FIXTURE_SETUP(fown)
+{
+ drop_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN(fown)
+{
+}
+
+/*
+ * Sending an out of bound message will trigger the SIGURG signal
+ * through file_send_sigiotask.
+ */
+TEST_F(fown, sigurg_socket)
+{
+ int server_socket, recv_socket;
+ struct service_fixture server_address;
+ char buffer_parent;
+ int status;
+ int pipe_parent[2], pipe_child[2];
+ pid_t child;
+
+ memset(&server_address, 0, sizeof(server_address));
+ set_unix_address(&server_address, 0);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+
+ if (variant->sandbox_setown == SANDBOX_BEFORE_FORK)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int client_socket;
+ char buffer_child;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ ASSERT_EQ(0, setup_signal_handler(SIGURG));
+ client_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, client_socket);
+
+ /* Waits for the parent to listen. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buffer_child, 1));
+ ASSERT_EQ(0, connect(client_socket, &server_address.unix_addr,
+ server_address.unix_addr_len));
+
+ /*
+ * Waits for the parent to accept the connection, sandbox
+ * itself, and call fcntl(2).
+ */
+ ASSERT_EQ(1, read(pipe_parent[0], &buffer_child, 1));
+ /* May signal itself. */
+ ASSERT_EQ(1, send(client_socket, ".", 1, MSG_OOB));
+ EXPECT_EQ(0, close(client_socket));
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ /* Waits for the message to be received. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buffer_child, 1));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ if (variant->sandbox_setown == SANDBOX_BEFORE_SETOWN) {
+ ASSERT_EQ(0, signal_received);
+ } else {
+ /*
+ * A signal is only received if fcntl(F_SETOWN) was
+ * called before any sandboxing or if the signal
+ * receiver is in the same domain.
+ */
+ ASSERT_EQ(1, signal_received);
+ }
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_child[1]));
+
+ server_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, server_socket);
+ ASSERT_EQ(0, bind(server_socket, &server_address.unix_addr,
+ server_address.unix_addr_len));
+ ASSERT_EQ(0, listen(server_socket, backlog));
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ recv_socket = accept(server_socket, NULL, NULL);
+ ASSERT_LE(0, recv_socket);
+
+ if (variant->sandbox_setown == SANDBOX_BEFORE_SETOWN)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ /*
+ * Sets the child to receive SIGURG for MSG_OOB. This uncommon use is
+ * a valid attack scenario which also simplifies this test.
+ */
+ ASSERT_EQ(0, fcntl(recv_socket, F_SETOWN, child));
+
+ if (variant->sandbox_setown == SANDBOX_AFTER_SETOWN)
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /* Waits for the child to send MSG_OOB. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer_parent, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+ ASSERT_EQ(1, recv(recv_socket, &buffer_parent, 1, MSG_OOB));
+ EXPECT_EQ(0, close(recv_socket));
+ EXPECT_EQ(0, close(server_socket));
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/scoped_test.c b/tools/testing/selftests/landlock/scoped_test.c
new file mode 100644
index 000000000000..b90f76ed0d9c
--- /dev/null
+++ b/tools/testing/selftests/landlock/scoped_test.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Common scope restriction
+ *
+ * Copyright © 2024 Tahera Fahimi <fahimitahera@gmail.com>
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/landlock.h>
+#include <sys/prctl.h>
+
+#include "common.h"
+
+#define ACCESS_LAST LANDLOCK_SCOPE_SIGNAL
+
+TEST(ruleset_with_unknown_scope)
+{
+ __u64 scoped_mask;
+
+ for (scoped_mask = 1ULL << 63; scoped_mask != ACCESS_LAST;
+ scoped_mask >>= 1) {
+ struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = scoped_mask,
+ };
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ ASSERT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c
new file mode 100644
index 000000000000..3f9ccbf52783
--- /dev/null
+++ b/tools/testing/selftests/landlock/true.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+int main(void)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/landlock/wait-pipe-sandbox.c b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
new file mode 100644
index 000000000000..87dbc9164430
--- /dev/null
+++ b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Write in a pipe, wait, sandbox itself, test sandboxing, and wait again.
+ *
+ * Used by audit_exec.flags from audit_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+static int sync_with(int pipe_child, int pipe_parent)
+{
+ char buf;
+
+ /* Signals that we are waiting. */
+ if (write(pipe_child, ".", 1) != 1) {
+ perror("Failed to write to first argument");
+ return 1;
+ }
+
+ /* Waits for the parent do its test. */
+ if (read(pipe_parent, &buf, 1) != 1) {
+ perror("Failed to write to the second argument");
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ const struct landlock_ruleset_attr layer2 = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ const struct landlock_ruleset_attr layer3 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int err, pipe_child, pipe_parent, ruleset_fd;
+
+ /* The first argument must be the file descriptor number of a pipe. */
+ if (argc != 3) {
+ fprintf(stderr, "Wrong number of arguments (not two)\n");
+ return 1;
+ }
+
+ pipe_child = atoi(argv[1]);
+ pipe_parent = atoi(argv[2]);
+ /* PR_SET_NO_NEW_PRIVS already set by parent. */
+
+ /* First step to test parent's layer1. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Second step to test parent's layer1 and our layer2. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer2, sizeof(layer2), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer2 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Third step to test our layer2 and layer3. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer3, sizeof(layer3), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer3 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Tries to send a signal, denied by layer3. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/landlock/wait-pipe.c b/tools/testing/selftests/landlock/wait-pipe.c
new file mode 100644
index 000000000000..0dbcd260a0fa
--- /dev/null
+++ b/tools/testing/selftests/landlock/wait-pipe.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Write in a pipe and wait.
+ *
+ * Used by layout1.umount_sandboxer from fs_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+ int pipe_child, pipe_parent;
+ char buf;
+
+ /* The first argument must be the file descriptor number of a pipe. */
+ if (argc != 3) {
+ fprintf(stderr, "Wrong number of arguments (not two)\n");
+ return 1;
+ }
+
+ pipe_child = atoi(argv[1]);
+ pipe_parent = atoi(argv[2]);
+
+ /* Signals that we are waiting. */
+ if (write(pipe_child, ".", 1) != 1) {
+ perror("Failed to write to first argument");
+ return 1;
+ }
+
+ /* Waits for the parent do its test. */
+ if (read(pipe_parent, &buf, 1) != 1) {
+ perror("Failed to write to the second argument");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/landlock/wrappers.h b/tools/testing/selftests/landlock/wrappers.h
new file mode 100644
index 000000000000..65548323e45d
--- /dev/null
+++ b/tools/testing/selftests/landlock/wrappers.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Syscall wrappers
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2021-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <linux/landlock.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef landlock_create_ruleset
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+ const size_t size, const __u32 flags)
+{
+ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+}
+#endif
+
+#ifndef landlock_add_rule
+static inline int landlock_add_rule(const int ruleset_fd,
+ const enum landlock_rule_type rule_type,
+ const void *const rule_attr,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+ flags);
+}
+#endif
+
+#ifndef landlock_restrict_self
+static inline int landlock_restrict_self(const int ruleset_fd,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+}
+#endif
+
+static inline pid_t sys_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a5ce26d548e4..f02cc8a2e4ae 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,55 @@
# This mimics the top-level Makefile. We do it explicitly here so that this
# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+ifneq ($(filter %/,$(LLVM)),)
+LLVM_PREFIX := $(LLVM)
+else ifneq ($(filter -%,$(LLVM)),)
+LLVM_SUFFIX := $(LLVM)
+endif
+
+CLANG := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+
+CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl
+CLANG_TARGET_FLAGS_i386 := i386-linux-gnu
+CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu
+
+# Default to host architecture if ARCH is not explicitly given.
+ifeq ($(ARCH),)
+CLANG_TARGET_FLAGS := $(shell $(CLANG) -print-target-triple)
+else
+CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+endif
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk)
+else
+CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+# gcc defaults to silence (off) for the following warnings, but clang defaults
+# to the opposite. The warnings are not useful for the kernel itself, which is
+# why they have remained disabled in gcc for the main kernel build. And it is
+# only due to including kernel data structures in the selftests, that we get the
+# warnings from clang. Therefore, disable the warnings for clang builds.
+CFLAGS += -Wno-address-of-packed-member
+CFLAGS += -Wno-gnu-variable-sized-type-not-at-end
+
+CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as
+else
CC := $(CROSS_COMPILE)gcc
+endif # LLVM
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)
@@ -9,6 +58,34 @@ ifeq (0,$(MAKELEVEL))
endif
endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
+top_srcdir = $(selfdir)/../../..
+
+# msg: emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+endif
+
+ifeq ($(KHDR_INCLUDES),)
+KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
+endif
+
+# In order to use newer items that haven't yet been added to the user's system
+# header files, add $(TOOLS_INCLUDES) to the compiler invocation in each
+# each selftest.
+# You may need to add files to that location, or to refresh an existing file. In
+# order to do that, run "make headers" from $(top_srcdir), then copy the
+# header file that you want from $(top_srcdir)/usr/include/... , to the matching
+# subdir in $(TOOLS_INCLUDE).
+TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
@@ -20,44 +97,8 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-ifdef KSFT_KHDR_INSTALL
-top_srcdir ?= ../../../..
-include $(top_srcdir)/scripts/subarch.include
-ARCH ?= $(SUBARCH)
-
-# set default goal to all, so make without a target runs all, even when
-# all isn't the first target in the file.
-.DEFAULT_GOAL := all
-
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Note that the support to install headers from lib.mk is necessary
-# when test Makefile is run directly with "make -C".
-# When local build is done, headers are installed in the default
-# INSTALL_HDR_PATH usr/include.
-.PHONY: khdr
-khdr:
-ifndef KSFT_KHDR_INSTALL_DONE
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-endif
-
-all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-else
-all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-endif
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) \
+ $(if $(TEST_GEN_MODS_DIR),gen_mods_dir)
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
@@ -68,13 +109,32 @@ define RUN_TESTS
run_many $(1)
endef
+define INSTALL_INCLUDES
+ $(if $(TEST_INCLUDES), \
+ relative_files=""; \
+ for entry in $(TEST_INCLUDES); do \
+ entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \
+ entry_name=$$(basename "$$entry"); \
+ relative_dir=$${entry_dir#"$$SRC_PATH"/}; \
+ if [ "$$relative_dir" = "$$entry_dir" ]; then \
+ echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \
+ exit 1; \
+ fi; \
+ relative_files="$$relative_files $$relative_dir/$$entry_name"; \
+ done; \
+ cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \
+ )
+endef
+
run_tests: all
ifdef building_out_of_srctree
- @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
- rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)$(TEST_GEN_MODS_DIR)" != "X" ]; then \
+ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_MODS_DIR) $(OUTPUT); \
fi
+ @$(INSTALL_INCLUDES)
@if [ "X$(TEST_PROGS)" != "X" ]; then \
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
+ $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
else \
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
fi
@@ -82,9 +142,20 @@ else
@$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
+gen_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR)
+
+clean_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR) clean
+
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
- $(if $(INSTALL_LIST),rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
+define INSTALL_MODS_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)/$(INSTALL_LIST))
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST)/*.ko $(INSTALL_PATH)/$(INSTALL_LIST))
endef
define INSTALL_RULE
@@ -95,11 +166,14 @@ define INSTALL_RULE
$(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(notdir $(TEST_GEN_MODS_DIR))) $(INSTALL_MODS_RULE)
+ $(eval INSTALL_LIST = $(wildcard config settings)) $(INSTALL_SINGLE_RULE)
endef
install: all
ifdef INSTALL_PATH
$(INSTALL_RULE)
+ $(INSTALL_INCLUDES)
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -119,9 +193,20 @@ define CLEAN
$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
-clean:
+clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
$(CLEAN)
+# Build with _GNU_SOURCE by default
+CFLAGS += -D_GNU_SOURCE=
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I${top_srcdir}/tools/testing/selftests
+
+# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
+# make USERCFLAGS=-Werror USERLDFLAGS=-static
+CFLAGS += $(USERCFLAGS)
+LDFLAGS += $(USERLDFLAGS)
+
# When make O= with kselftest target from main level
# the following aren't defined.
#
@@ -134,9 +219,10 @@ endif
# Selftest makefiles can override those targets by setting
# OVERRIDE_TARGETS = 1.
ifeq ($(OVERRIDE_TARGETS),)
-LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
+LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
- $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
+ $(call msg,CC,,$@)
+ $(Q)$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
@@ -145,4 +231,10 @@ $(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif
-.PHONY: run_tests all clean install emit_tests
+# Extract the expected header directory
+khdr_output := $(patsubst %/usr/include,%,$(filter %/usr/include,$(KHDR_INCLUDES)))
+
+headers:
+ $(Q)$(MAKE) -f $(top_srcdir)/Makefile -C $(khdr_output) headers
+
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..f876bf4744e1 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,5 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
-
+TEST_PROGS := bitmap.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..377b3699ff31 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,5 +1,3 @@
-CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
-CONFIG_TEST_STRSCPY=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
deleted file mode 100755
index 370b79a9cb2e..000000000000
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
deleted file mode 100755
index 05f4544e87f9..000000000000
--- a/tools/testing/selftests/lib/printf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh
deleted file mode 100755
index be60ef6e1a7f..000000000000
--- a/tools/testing/selftests/lib/strscpy.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0+
-$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy
diff --git a/tools/testing/selftests/livepatch/.gitignore b/tools/testing/selftests/livepatch/.gitignore
new file mode 100644
index 000000000000..f1e9c2a20e99
--- /dev/null
+++ b/tools/testing/selftests/livepatch/.gitignore
@@ -0,0 +1 @@
+test_klp-call_getpid
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index 1acc9e1fa3fb..a080eb54a215 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -1,12 +1,17 @@
# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := test_klp-call_getpid
+TEST_GEN_MODS_DIR := test_modules
TEST_PROGS_EXTENDED := functions.sh
TEST_PROGS := \
test-livepatch.sh \
test-callbacks.sh \
test-shadow-vars.sh \
test-state.sh \
- test-ftrace.sh
+ test-ftrace.sh \
+ test-sysfs.sh \
+ test-syscall.sh \
+ test-kprobe.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index 0942dd5826f8..d2035dd64a2b 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -13,23 +13,36 @@ the message buffer for only the duration of each individual test.)
Config
------
-Set these config options and their prerequisites:
+Set CONFIG_LIVEPATCH=y option and it's prerequisites.
-CONFIG_LIVEPATCH=y
-CONFIG_TEST_LIVEPATCH=m
+Building the tests
+------------------
+
+To only build the tests without running them, run:
+
+ % make -C tools/testing/selftests/livepatch
+
+The command above will compile all test modules and test programs, making them
+ready to be packaged if so desired.
Running the tests
-----------------
-Test kernel modules are built as part of lib/ (make modules) and need to
-be installed (make modules_install) as the test scripts will modprobe
-them.
+Test kernel modules are built before running the livepatch selftests. The
+modules are located under test_modules directory, and are built as out-of-tree
+modules. This is specially useful since the same sources can be built and
+tested on systems with different kABI, ensuring they the tests are backwards
+compatible. The modules will be loaded by the test scripts using insmod.
To run the livepatch selftests, from the top of the kernel source tree:
% make -C tools/testing/selftests TARGETS=livepatch run_tests
+or
+
+ % make kselftest TARGETS=livepatch
+
Adding tests
------------
diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config
index ad23100cb27c..e88bf518a23a 100644
--- a/tools/testing/selftests/livepatch/config
+++ b/tools/testing/selftests/livepatch/config
@@ -1,3 +1,2 @@
CONFIG_LIVEPATCH=y
CONFIG_DYNAMIC_DEBUG=y
-CONFIG_TEST_LIVEPATCH=m
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 846c7ed71556..8ec0cb64ad94 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -6,6 +6,15 @@
MAX_RETRIES=600
RETRY_INTERVAL=".1" # seconds
+SYSFS_KERNEL_DIR="/sys/kernel"
+SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch"
+SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug"
+SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes"
+if [[ -e /sys/kernel/tracing/trace ]]; then
+ SYSFS_TRACING_DIR="$SYSFS_KERNEL_DIR/tracing"
+else
+ SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
+fi
# Kselftest framework requirement - SKIP code is 4
ksft_skip=4
@@ -33,6 +42,18 @@ function is_root() {
fi
}
+# Check if we can compile the modules before loading them
+function has_kdir() {
+ if [ -z "$KDIR" ]; then
+ KDIR="/lib/modules/$(uname -r)/build"
+ fi
+
+ if [ ! -d "$KDIR" ]; then
+ echo "skip all tests: KDIR ($KDIR) not available to compile modules."
+ exit $ksft_skip
+ fi
+}
+
# die(msg) - game over, man
# msg - dying words
function die() {
@@ -41,48 +62,70 @@ function die() {
exit 1
}
-# save existing dmesg so we can detect new content
-function save_dmesg() {
- SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX)
- dmesg > "$SAVED_DMESG"
-}
-
-# cleanup temporary dmesg file from save_dmesg()
-function cleanup_dmesg_file() {
- rm -f "$SAVED_DMESG"
-}
-
function push_config() {
- DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
+ DYNAMIC_DEBUG=$(grep '^kernel/livepatch' "$SYSFS_DEBUG_DIR/dynamic_debug/control" | \
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled)
+ KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled")
+ TRACING_ON=$(cat "$SYSFS_TRACING_DIR/tracing_on")
+ CURRENT_TRACER=$(cat "$SYSFS_TRACING_DIR/current_tracer")
+ FTRACE_FILTER=$(cat "$SYSFS_TRACING_DIR/set_ftrace_filter")
}
function pop_config() {
if [[ -n "$DYNAMIC_DEBUG" ]]; then
- echo -n "$DYNAMIC_DEBUG" > /sys/kernel/debug/dynamic_debug/control
+ echo -n "$DYNAMIC_DEBUG" > "$SYSFS_DEBUG_DIR/dynamic_debug/control"
fi
if [[ -n "$FTRACE_ENABLED" ]]; then
sysctl kernel.ftrace_enabled="$FTRACE_ENABLED" &> /dev/null
fi
+ if [[ -n "$KPROBE_ENABLED" ]]; then
+ echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled"
+ fi
+ if [[ -n "$TRACING_ON" ]]; then
+ echo "$TRACING_ON" > "$SYSFS_TRACING_DIR/tracing_on"
+ fi
+ if [[ -n "$CURRENT_TRACER" ]]; then
+ echo "$CURRENT_TRACER" > "$SYSFS_TRACING_DIR/current_tracer"
+ fi
+ if [[ -n "$FTRACE_FILTER" ]]; then
+ echo "$FTRACE_FILTER" \
+ | sed -e "/#### all functions enabled ####/d" \
+ > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ fi
}
function set_dynamic_debug() {
- cat <<-EOF > /sys/kernel/debug/dynamic_debug/control
+ cat <<-EOF > "$SYSFS_DEBUG_DIR/dynamic_debug/control"
file kernel/livepatch/* +p
func klp_try_switch_task -p
EOF
}
function set_ftrace_enabled() {
- result=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1 && \
- sysctl kernel.ftrace_enabled 2>&1)
- echo "livepatch: $result" > /dev/kmsg
+ local can_fail=0
+ if [[ "$1" == "--fail" ]] ; then
+ can_fail=1
+ shift
+ fi
+
+ local err=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1)
+ local result=$(sysctl --values kernel.ftrace_enabled)
+
+ if [[ "$result" != "$1" ]] ; then
+ if [[ $can_fail -eq 1 ]] ; then
+ echo "livepatch: $err" | sed 's#/proc/sys/kernel/#kernel.#' > /dev/kmsg
+ return
+ fi
+
+ skip "failed to set kernel.ftrace_enabled = $1"
+ fi
+
+ echo "livepatch: kernel.ftrace_enabled = $result" > /dev/kmsg
}
function cleanup() {
pop_config
- cleanup_dmesg_file
}
# setup_config - save the current config and set a script exit trap that
@@ -91,6 +134,7 @@ function cleanup() {
# the ftrace_enabled sysctl.
function setup_config() {
is_root
+ has_kdir
push_config
set_dynamic_debug
set_ftrace_enabled 1
@@ -110,16 +154,14 @@ function loop_until() {
done
}
-function assert_mod() {
- local mod="$1"
-
- modprobe --dry-run "$mod" &>/dev/null
-}
-
function is_livepatch_mod() {
local mod="$1"
- if [[ $(modinfo "$mod" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
+ if [[ ! -f "test_modules/$mod.ko" ]]; then
+ die "Can't find \"test_modules/$mod.ko\", try \"make\""
+ fi
+
+ if [[ $(modinfo "test_modules/$mod.ko" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
return 0
fi
@@ -129,9 +171,9 @@ function is_livepatch_mod() {
function __load_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" != "" ]]; then
die "$ret"
fi
@@ -144,13 +186,10 @@ function __load_mod() {
# load_mod(modname, params) - load a kernel module
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_mod() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" &&
die "use load_lp() to load the livepatch module $mod"
@@ -160,45 +199,42 @@ function load_mod() {
# load_lp_nowait(modname, params) - load a kernel module with a livepatch
# but do not wait on until the transition finishes
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp_nowait() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" ||
die "module $mod is not a livepatch"
__load_mod "$mod" "$@"
# Wait for livepatch in sysfs ...
- loop_until '[[ -e "/sys/kernel/livepatch/$mod" ]]' ||
+ loop_until '[[ -e "$SYSFS_KLP_DIR/$mod" ]]' ||
die "failed to load module $mod (sysfs)"
}
# load_lp(modname, params) - load a kernel module with a livepatch
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp() {
local mod="$1"; shift
load_lp_nowait "$mod" "$@"
# Wait until the transition finishes ...
- loop_until 'grep -q '^0$' /sys/kernel/livepatch/$mod/transition' ||
+ loop_until 'grep -q '^0$' $SYSFS_KLP_DIR/$mod/transition' ||
die "failed to complete transition"
}
# load_failing_mod(modname, params) - load a kernel module, expect to fail
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_failing_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" == "" ]]; then
die "$mod unexpectedly loaded"
fi
@@ -236,12 +272,12 @@ function unload_lp() {
function disable_lp() {
local mod="$1"
- log "% echo 0 > /sys/kernel/livepatch/$mod/enabled"
- echo 0 > /sys/kernel/livepatch/"$mod"/enabled
+ log "% echo 0 > $SYSFS_KLP_DIR/$mod/enabled"
+ echo 0 > "$SYSFS_KLP_DIR/$mod/enabled"
# Wait until the transition finishes and the livepatch gets
# removed from sysfs...
- loop_until '[[ ! -e "/sys/kernel/livepatch/$mod" ]]' ||
+ loop_until '[[ ! -e "$SYSFS_KLP_DIR/$mod" ]]' ||
die "failed to disable livepatch $mod"
}
@@ -263,7 +299,15 @@ function set_pre_patch_ret {
function start_test {
local test="$1"
- save_dmesg
+ # Dump something unique into the dmesg log, then stash the entry
+ # in LAST_DMESG. The check_result() function will use it to
+ # find new kernel messages since the test started.
+ local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)"
+ log "$last_dmesg_msg"
+ loop_until 'dmesg | grep -q "$last_dmesg_msg"' ||
+ die "buffer busy? can't find canary dmesg message: $last_dmesg_msg"
+ LAST_DMESG=$(dmesg | grep "$last_dmesg_msg")
+
echo -n "TEST: $test ... "
log "===== TEST: $test ====="
}
@@ -274,21 +318,90 @@ function check_result {
local expect="$*"
local result
- # Note: when comparing dmesg output, the kernel log timestamps
- # help differentiate repeated testing runs. Remove them with a
- # post-comparison sed filter.
-
- result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
+ # Test results include any new dmesg entry since LAST_DMESG, then:
+ # - include lines matching keywords
+ # - exclude lines matching keywords
+ # - filter out dmesg timestamp prefixes
+ result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
grep -e 'livepatch:' -e 'test_klp' | \
grep -v '\(tainting\|taints\) kernel' | \
- sed 's/^\[[ 0-9.]*\] //')
+ sed 's/^\[[ 0-9.]*\] //' | \
+ sed 's/^\[[ ]*[CT][0-9]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
+ elif [[ "$result" == "" ]] ; then
+ echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n"
+ die "livepatch kselftest(s) failed"
else
echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
die "livepatch kselftest(s) failed"
fi
+}
+
+# check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs
+# path permissions
+# modname - livepatch module creating the sysfs interface
+# rel_path - relative path of the sysfs interface
+# expected_rights - expected access rights
+function check_sysfs_rights() {
+ local mod="$1"; shift
+ local rel_path="$1"; shift
+ local expected_rights="$1"; shift
+
+ local path="$SYSFS_KLP_DIR/$mod/$rel_path"
+ local rights=$(/bin/stat --format '%A' "$path")
+ if test "$rights" != "$expected_rights" ; then
+ die "Unexpected access rights of $path: $expected_rights vs. $rights"
+ fi
+}
+
+# check_sysfs_value(modname, rel_path, expected_value) - check sysfs value
+# modname - livepatch module creating the sysfs interface
+# rel_path - relative path of the sysfs interface
+# expected_value - expected value read from the file
+function check_sysfs_value() {
+ local mod="$1"; shift
+ local rel_path="$1"; shift
+ local expected_value="$1"; shift
+
+ local path="$SYSFS_KLP_DIR/$mod/$rel_path"
+ local value=`cat $path`
+ if test "$value" != "$expected_value" ; then
+ die "Unexpected value in $path: $expected_value vs. $value"
+ fi
+}
+
+# cleanup_tracing() - stop and clean up function tracing
+function cleanup_tracing() {
+ echo 0 > "$SYSFS_TRACING_DIR/tracing_on"
+ echo "" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo "nop" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "" > "$SYSFS_TRACING_DIR/trace"
+}
+
+# trace_function(function) - start tracing of a function
+# function - to be traced function
+function trace_function() {
+ local function="$1"; shift
+
+ cleanup_tracing
+
+ echo "function" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "$function" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo 1 > "$SYSFS_TRACING_DIR/tracing_on"
+}
+
+# check_traced_functions(functions...) - check whether each function appeared in the trace log
+# functions - list of functions to be checked
+function check_traced_functions() {
+ local function
+
+ for function in "$@"; do
+ if ! grep -Fwq "$function" "$SYSFS_TRACING_DIR/trace" ; then
+ die "Function ($function) did not appear in the trace"
+ fi
+ done
- cleanup_dmesg_file
+ cleanup_tracing
}
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index 90b26dbb2626..2a03deb26a12 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -34,9 +34,9 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -46,7 +46,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state
@@ -81,7 +81,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -89,12 +89,12 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_TARGET: ${MOD_TARGET}_init
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state
@@ -129,9 +129,9 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -146,7 +146,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET'
$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
@@ -177,7 +177,7 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -185,7 +185,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -195,7 +195,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET'
$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
@@ -219,7 +219,7 @@ load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -227,7 +227,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
@@ -254,18 +254,18 @@ load_mod $MOD_TARGET
load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH pre_patch_ret=-19
+% insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
-test_klp_callbacks_demo: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH: pre_patch_callback: vmlinux
livepatch: pre-patch callback failed for object 'vmlinux'
livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: No such device
% rmmod $MOD_TARGET
$MOD_TARGET: ${MOD_TARGET}_exit"
@@ -295,7 +295,7 @@ load_failing_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -304,13 +304,13 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
% echo -19 > /sys/module/$MOD_LIVEPATCH/parameters/pre_patch_ret
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
livepatch: pre-patch callback failed for object '$MOD_TARGET'
livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET'
-modprobe: ERROR: could not insert '$MOD_TARGET': No such device
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+insmod: ERROR: could not insert module test_modules/$MOD_TARGET.ko: No such device
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
@@ -340,11 +340,11 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY block_transition=N
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=N
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
$MOD_TARGET_BUSY: busymod_work_func enter
$MOD_TARGET_BUSY: busymod_work_func exit
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -354,7 +354,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -364,7 +364,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET'
$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
@@ -412,7 +412,7 @@ load_lp_nowait $MOD_LIVEPATCH
# Wait until the livepatch reports in-transition state, i.e. that it's
# stalled on $MOD_TARGET_BUSY::busymod_work_func()
-loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' ||
+loop_until 'grep -q '^1$' $SYSFS_KLP_DIR/$MOD_LIVEPATCH/transition' ||
die "failed to stall transition"
load_mod $MOD_TARGET
@@ -421,16 +421,16 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY block_transition=Y
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=Y
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
$MOD_TARGET_BUSY: busymod_work_func enter
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': starting patching transition
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_TARGET: ${MOD_TARGET}_init
@@ -438,7 +438,7 @@ $MOD_TARGET: ${MOD_TARGET}_init
$MOD_TARGET: ${MOD_TARGET}_exit
livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET'
$MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': reversing transition from patching to unpatching
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
@@ -467,7 +467,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -475,7 +475,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -483,14 +483,14 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition
livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH2': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH2': unpatching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
@@ -523,7 +523,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -531,7 +531,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2 replace=1
+% insmod test_modules/$MOD_LIVEPATCH2.ko replace=1
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -539,7 +539,7 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition
livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH2': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index 552e165512f4..094176f1a46a 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -25,7 +25,8 @@ if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]]
die "livepatch kselftest(s) failed"
fi
-set_ftrace_enabled 0
+# Check that ftrace could not get disabled when a livepatch is enabled
+set_ftrace_enabled --fail 0
if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
@@ -34,7 +35,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
check_result "livepatch: kernel.ftrace_enabled = 0
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16)
@@ -43,16 +44,16 @@ livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Device or resource busy
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy
livepatch: kernel.ftrace_enabled = 1
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
@@ -60,4 +61,38 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
+# - verify livepatch can load
+# - check if traces have a patched function
+# - reset trace and unload livepatch
+
+start_test "trace livepatched function and check that the live patch remains in effect"
+
+FUNCTION_NAME="livepatch_cmdline_proc_show"
+
+load_lp $MOD_LIVEPATCH
+trace_function "$FUNCTION_NAME"
+
+if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+ log "livepatch: ok"
+fi
+
+check_traced_functions "$FUNCTION_NAME"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+livepatch: ok
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
exit 0
diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh
new file mode 100755
index 000000000000..b67dfad03d97
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-kprobe.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2024 SUSE
+# Author: Michael Vetter <mvetter@suse.com>
+
+. $(dirname $0)/functions.sh
+
+grep -q kprobe_ftrace_ops /proc/kallsyms || skip "test-kprobe requires CONFIG_KPROBES_ON_FTRACE"
+
+MOD_LIVEPATCH=test_klp_livepatch
+MOD_KPROBE=test_klp_kprobe
+
+setup_config
+
+# Kprobe a function and verify that we can't livepatch that same function
+# when it uses a post_handler since only one IPMODIFY maybe be registered
+# to any given function at a time.
+
+start_test "livepatch interaction with kprobed function with post_handler"
+
+echo 1 > "$SYSFS_KPROBES_DIR/enabled"
+
+load_mod $MOD_KPROBE has_post_handler=true
+load_failing_mod $MOD_LIVEPATCH
+unload_mod $MOD_KPROBE
+
+check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=true
+% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16)
+livepatch: failed to patch object 'vmlinux'
+livepatch: failed to enable patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy
+% rmmod test_klp_kprobe"
+
+start_test "livepatch interaction with kprobed function without post_handler"
+
+load_mod $MOD_KPROBE has_post_handler=false
+load_lp $MOD_LIVEPATCH
+
+unload_mod $MOD_KPROBE
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=false
+% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% rmmod test_klp_kprobe
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 5fe79ac34be1..6673023d2b66 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -4,7 +4,9 @@
. $(dirname $0)/functions.sh
-MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH1=test_klp_livepatch
+MOD_LIVEPATCH2=test_klp_syscall
+MOD_LIVEPATCH3=test_klp_callbacks_demo
MOD_REPLACE=test_klp_atomic_replace
setup_config
@@ -16,33 +18,33 @@ setup_config
start_test "basic function patching"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
-if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH1: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
fi
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH1
+unload_lp $MOD_LIVEPATCH1
-if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH1: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
fi
-check_result "% modprobe $MOD_LIVEPATCH
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled
+livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH1': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': unpatching complete
+% rmmod $MOD_LIVEPATCH1"
# - load a livepatch that modifies the output from /proc/cmdline and
@@ -53,7 +55,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
start_test "multiple livepatches"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
@@ -69,62 +71,81 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH1
+unload_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=0
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+$MOD_LIVEPATCH1: this has been live patched
+% insmod test_modules/$MOD_REPLACE.ko replace=0
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
livepatch: '$MOD_REPLACE': completing patching transition
livepatch: '$MOD_REPLACE': patching complete
-$MOD_LIVEPATCH: this has been live patched
+$MOD_LIVEPATCH1: this has been live patched
$MOD_REPLACE: this has been live patched
-% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled
livepatch: '$MOD_REPLACE': initializing unpatching transition
livepatch: '$MOD_REPLACE': starting unpatching transition
livepatch: '$MOD_REPLACE': completing unpatching transition
livepatch: '$MOD_REPLACE': unpatching complete
% rmmod $MOD_REPLACE
-$MOD_LIVEPATCH: this has been live patched
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+$MOD_LIVEPATCH1: this has been live patched
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled
+livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH1': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': unpatching complete
+% rmmod $MOD_LIVEPATCH1"
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
-# - load an atomic replace livepatch and verify that only the second is active
-# - remove the first livepatch and verify that the atomic replace livepatch
-# is still active
+# - load two additional livepatches and check the number of livepatch modules
+# applied
+# - load an atomic replace livepatch and check that the other three modules were
+# disabled
+# - remove all livepatches besides the atomic replace one and verify that the
+# atomic replace livepatch is still active
# - remove the atomic replace livepatch and verify that none are active
start_test "atomic replace livepatch"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
+for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do
+ load_lp "$mod"
+done
+
+mods=($SYSFS_KLP_DIR/*)
+nmods=${#mods[@]}
+if [ "$nmods" -ne 3 ]; then
+ die "Expecting three modules listed, found $nmods"
+fi
+
load_lp $MOD_REPLACE replace=1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-unload_lp $MOD_LIVEPATCH
+loop_until 'mods=($SYSFS_KLP_DIR/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' ||
+ die "Expecting only one moduled listed, found $nmods"
+
+# These modules were disabled by the atomic replace
+for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do
+ unload_lp "$mod"
+done
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
@@ -135,23 +156,39 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=1
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+$MOD_LIVEPATCH1: this has been live patched
+% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+$MOD_LIVEPATCH3: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH3': patching complete
+% insmod test_modules/$MOD_REPLACE.ko replace=1
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
livepatch: '$MOD_REPLACE': completing patching transition
livepatch: '$MOD_REPLACE': patching complete
$MOD_REPLACE: this has been live patched
-% rmmod $MOD_LIVEPATCH
+% rmmod $MOD_LIVEPATCH3
+% rmmod $MOD_LIVEPATCH2
+% rmmod $MOD_LIVEPATCH1
$MOD_REPLACE: this has been live patched
-% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled
livepatch: '$MOD_REPLACE': initializing unpatching transition
livepatch: '$MOD_REPLACE': starting unpatching transition
livepatch: '$MOD_REPLACE': completing unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index e04cb354f56b..1218c155bffe 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -16,7 +16,7 @@ start_test "basic shadow variable API"
load_mod $MOD_TEST
unload_mod $MOD_TEST
-check_result "% modprobe $MOD_TEST
+check_result "% insmod test_modules/$MOD_TEST.ko
$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
$MOD_TEST: shadow_ctor: PTR3 -> PTR2
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index 38656721c958..04b66380f8a0 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -19,7 +19,7 @@ load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -29,7 +29,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH: restore_console_loglevel: restoring console_loglevel
@@ -51,7 +51,7 @@ unload_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -61,7 +61,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -72,7 +72,7 @@ $MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change
livepatch: '$MOD_LIVEPATCH2': patching complete
% rmmod $MOD_LIVEPATCH
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
@@ -96,7 +96,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH3
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -106,7 +106,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH3
+% insmod test_modules/$MOD_LIVEPATCH3.ko
livepatch: enabling patch '$MOD_LIVEPATCH3'
livepatch: '$MOD_LIVEPATCH3': initializing patching transition
$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
@@ -117,7 +117,7 @@ $MOD_LIVEPATCH3: post_patch_callback: vmlinux
$MOD_LIVEPATCH3: fix_console_loglevel: taking over the console_loglevel change
livepatch: '$MOD_LIVEPATCH3': patching complete
% rmmod $MOD_LIVEPATCH2
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -127,7 +127,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change
livepatch: '$MOD_LIVEPATCH2': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
@@ -149,7 +149,7 @@ load_failing_mod $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -159,10 +159,10 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches.
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Invalid argument
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Invalid parameters
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel
diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh
new file mode 100755
index 000000000000..5f9344277b62
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-syscall.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 SUSE
+# Author: Marcos Paulo de Souza <mpdesouza@suse.com>
+
+. $(dirname $0)/functions.sh
+
+MOD_SYSCALL=test_klp_syscall
+
+setup_config
+
+# - Start _NRPROC processes calling getpid and load a livepatch to patch the
+# getpid syscall. Check if all the processes transitioned to the livepatched
+# state.
+
+start_test "patch getpid syscall while being heavily hammered"
+
+NPROC=$(getconf _NPROCESSORS_ONLN)
+MAXPROC=128
+
+for i in $(seq 1 $(($NPROC < $MAXPROC ? $NPROC : $MAXPROC))); do
+ ./test_klp-call_getpid &
+ pids[$i]="$!"
+done
+
+pid_list=$(echo ${pids[@]} | tr ' ' ',')
+load_lp $MOD_SYSCALL klp_pids=$pid_list
+
+# wait for all tasks to transition to patched state
+loop_until 'grep -q '^0$' $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids'
+
+pending_pids=$(cat $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids)
+log "$MOD_SYSCALL: Remaining not livepatched processes: $pending_pids"
+
+for pid in ${pids[@]}; do
+ kill $pid || true
+done
+
+disable_lp $MOD_SYSCALL
+unload_lp $MOD_SYSCALL
+
+check_result "% insmod test_modules/$MOD_SYSCALL.ko klp_pids=$pid_list
+livepatch: enabling patch '$MOD_SYSCALL'
+livepatch: '$MOD_SYSCALL': initializing patching transition
+livepatch: '$MOD_SYSCALL': starting patching transition
+livepatch: '$MOD_SYSCALL': completing patching transition
+livepatch: '$MOD_SYSCALL': patching complete
+$MOD_SYSCALL: Remaining not livepatched processes: 0
+% echo 0 > $SYSFS_KLP_DIR/$MOD_SYSCALL/enabled
+livepatch: '$MOD_SYSCALL': initializing unpatching transition
+livepatch: '$MOD_SYSCALL': starting unpatching transition
+livepatch: '$MOD_SYSCALL': completing unpatching transition
+livepatch: '$MOD_SYSCALL': unpatching complete
+% rmmod $MOD_SYSCALL"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
new file mode 100755
index 000000000000..58fe1d96997c
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2022 Song Liu <song@kernel.org>
+
+. $(dirname $0)/functions.sh
+
+MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH2=test_klp_callbacks_demo
+MOD_LIVEPATCH3=test_klp_syscall
+
+setup_config
+
+# - load a livepatch and verifies the sysfs entries work as expected
+
+start_test "sysfs test"
+
+load_lp $MOD_LIVEPATCH
+
+check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x"
+check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1"
+check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "transition" "0"
+check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "vmlinux/patched" "1"
+
+disable_lp $MOD_LIVEPATCH
+
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+start_test "sysfs test object/patched"
+
+MOD_LIVEPATCH=test_klp_callbacks_demo
+MOD_TARGET=test_klp_callbacks_mod
+load_lp $MOD_LIVEPATCH
+
+# check the "patch" file changes as target module loads/unloads
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
+load_mod $MOD_TARGET
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "1"
+unload_mod $MOD_TARGET
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/test_klp_callbacks_demo.ko
+livepatch: enabling patch 'test_klp_callbacks_demo'
+livepatch: 'test_klp_callbacks_demo': initializing patching transition
+test_klp_callbacks_demo: pre_patch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': starting patching transition
+livepatch: 'test_klp_callbacks_demo': completing patching transition
+test_klp_callbacks_demo: post_patch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': patching complete
+% insmod test_modules/test_klp_callbacks_mod.ko
+livepatch: applying patch 'test_klp_callbacks_demo' to loading module 'test_klp_callbacks_mod'
+test_klp_callbacks_demo: pre_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
+test_klp_callbacks_demo: post_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
+test_klp_callbacks_mod: test_klp_callbacks_mod_init
+% rmmod test_klp_callbacks_mod
+test_klp_callbacks_mod: test_klp_callbacks_mod_exit
+test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
+livepatch: reverting patch 'test_klp_callbacks_demo' on unloading module 'test_klp_callbacks_mod'
+test_klp_callbacks_demo: post_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
+% echo 0 > $SYSFS_KLP_DIR/test_klp_callbacks_demo/enabled
+livepatch: 'test_klp_callbacks_demo': initializing unpatching transition
+test_klp_callbacks_demo: pre_unpatch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': starting unpatching transition
+livepatch: 'test_klp_callbacks_demo': completing unpatching transition
+test_klp_callbacks_demo: post_unpatch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': unpatching complete
+% rmmod test_klp_callbacks_demo"
+
+start_test "sysfs test replace enabled"
+
+MOD_LIVEPATCH=test_klp_atomic_replace
+load_lp $MOD_LIVEPATCH replace=1
+
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "replace" "1"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=1
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+start_test "sysfs test replace disabled"
+
+load_lp $MOD_LIVEPATCH replace=0
+
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "replace" "0"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=0
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+start_test "sysfs test stack_order value"
+
+load_lp $MOD_LIVEPATCH
+
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+
+load_lp $MOD_LIVEPATCH2
+
+check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2"
+
+load_lp $MOD_LIVEPATCH3
+
+check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3"
+
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2"
+
+disable_lp $MOD_LIVEPATCH3
+unload_lp $MOD_LIVEPATCH3
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+livepatch: '$MOD_LIVEPATCH3': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled
+livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH3': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': unpatching complete
+% rmmod $MOD_LIVEPATCH3
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test_klp-call_getpid.c b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
new file mode 100644
index 000000000000..ce321a2d7308
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <signal.h>
+
+static int stop;
+static int sig_int;
+
+void hup_handler(int signum)
+{
+ stop = 1;
+}
+
+void int_handler(int signum)
+{
+ stop = 1;
+ sig_int = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ long count = 0;
+
+ signal(SIGHUP, &hup_handler);
+ signal(SIGINT, &int_handler);
+
+ while (!stop) {
+ (void)syscall(SYS_getpid);
+ count++;
+ }
+
+ if (sig_int)
+ printf("%ld iterations done\n", count);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile
new file mode 100644
index 000000000000..939230e571f5
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/Makefile
@@ -0,0 +1,27 @@
+TESTMODS_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+obj-m += test_klp_atomic_replace.o \
+ test_klp_callbacks_busy.o \
+ test_klp_callbacks_demo.o \
+ test_klp_callbacks_demo2.o \
+ test_klp_callbacks_mod.o \
+ test_klp_kprobe.o \
+ test_klp_livepatch.o \
+ test_klp_shadow_vars.o \
+ test_klp_state.o \
+ test_klp_state2.o \
+ test_klp_state3.o \
+ test_klp_syscall.o
+
+# Ensure that KDIR exists, otherwise skip the compilation
+modules:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) modules KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
+
+# Ensure that KDIR exists, otherwise skip the clean target
+clean:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) clean KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
new file mode 100644
index 000000000000..5af7093ca00c
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+#include <linux/seq_file.h>
+static int livepatch_meminfo_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+ "this has been live patched");
+ return 0;
+}
+
+static struct klp_func funcs[] = {
+ {
+ .old_name = "meminfo_proc_show",
+ .new_func = livepatch_meminfo_proc_show,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = funcs,
+ }, {}
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ /* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_atomic_replace_init(void)
+{
+ patch.replace = replace;
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_atomic_replace_exit(void)
+{
+}
+
+module_init(test_klp_atomic_replace_init);
+module_exit(test_klp_atomic_replace_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: atomic replace");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
new file mode 100644
index 000000000000..133929e0ce8f
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+
+/* load/run-time control from sysfs writer */
+static bool block_transition;
+module_param(block_transition, bool, 0644);
+MODULE_PARM_DESC(block_transition, "block_transition (default=false)");
+
+static void busymod_work_func(struct work_struct *work);
+static DECLARE_WORK(work, busymod_work_func);
+static DECLARE_COMPLETION(busymod_work_started);
+
+static void busymod_work_func(struct work_struct *work)
+{
+ pr_info("%s enter\n", __func__);
+ complete(&busymod_work_started);
+
+ while (READ_ONCE(block_transition)) {
+ /*
+ * Busy-wait until the sysfs writer has acknowledged a
+ * blocked transition and clears the flag.
+ */
+ msleep(20);
+ }
+
+ pr_info("%s exit\n", __func__);
+}
+
+static int test_klp_callbacks_busy_init(void)
+{
+ pr_info("%s\n", __func__);
+ schedule_work(&work);
+
+ /*
+ * To synchronize kernel messages, hold the init function from
+ * exiting until the work function's entry message has printed.
+ */
+ wait_for_completion(&busymod_work_started);
+
+ if (!block_transition) {
+ /*
+ * Serialize output: print all messages from the work
+ * function before returning from init().
+ */
+ flush_work(&work);
+ }
+
+ return 0;
+}
+
+static void test_klp_callbacks_busy_exit(void)
+{
+ WRITE_ONCE(block_transition, false);
+ flush_work(&work);
+ pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_busy_init);
+module_exit(test_klp_callbacks_busy_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: busy target module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
new file mode 100644
index 000000000000..3fd8fe1cd1cc
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int pre_patch_ret;
+module_param(pre_patch_ret, int, 0644);
+MODULE_PARM_DESC(pre_patch_ret, "pre_patch_ret (default=0)");
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return pre_patch_ret;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+static void patched_work_func(struct work_struct *work)
+{
+ pr_info("%s\n", __func__);
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_func busymod_funcs[] = {
+ {
+ .old_name = "busymod_work_func",
+ .new_func = patched_work_func,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, {
+ .name = "test_klp_callbacks_mod",
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, {
+ .name = "test_klp_callbacks_busy",
+ .funcs = busymod_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
new file mode 100644
index 000000000000..5417573e80af
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return 0;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+static struct klp_func no_funcs[] = {
+ { }
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ /* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_callbacks_demo2_init(void)
+{
+ patch.replace = replace;
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo2_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo2_init);
+module_exit(test_klp_callbacks_demo2_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo2");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
new file mode 100644
index 000000000000..8fbe645b1c2c
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+static int test_klp_callbacks_mod_init(void)
+{
+ pr_info("%s\n", __func__);
+ return 0;
+}
+
+static void test_klp_callbacks_mod_exit(void)
+{
+ pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_mod_init);
+module_exit(test_klp_callbacks_mod_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: target module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c
new file mode 100644
index 000000000000..67a8d29012f6
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2024 Marcos Paulo de Souza <mpdesouza@suse.com>
+// Copyright (C) 2024 Michael Vetter <mvetter@suse.com>
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+static bool has_post_handler = true;
+module_param(has_post_handler, bool, 0444);
+
+static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags)
+{
+}
+
+static struct kprobe kp = {
+ .symbol_name = "cmdline_proc_show",
+};
+
+static int __init kprobe_init(void)
+{
+ if (has_post_handler)
+ kp.post_handler = post_handler;
+
+ return register_kprobe(&kp);
+}
+
+static void __exit kprobe_exit(void)
+{
+ unregister_kprobe(&kp);
+}
+
+module_init(kprobe_init)
+module_exit(kprobe_exit)
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Vetter <mvetter@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: kprobe function");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
new file mode 100644
index 000000000000..aff08199de71
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+#include <linux/seq_file.h>
+static int livepatch_cmdline_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+ "this has been live patched");
+ return 0;
+}
+
+static struct klp_func funcs[] = {
+ {
+ .old_name = "cmdline_proc_show",
+ .new_func = livepatch_cmdline_proc_show,
+ }, { }
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = funcs,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int test_klp_livepatch_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_livepatch_exit(void)
+{
+}
+
+module_init(test_klp_livepatch_init);
+module_exit(test_klp_livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Seth Jennings <sjenning@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
new file mode 100644
index 000000000000..b99116490858
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/livepatch.h>
+#include <linux/slab.h>
+
+/*
+ * Keep a small list of pointers so that we can print address-agnostic
+ * pointer values. Use a rolling integer count to differentiate the values.
+ * Ironically we could have used the shadow variable API to do this, but
+ * let's not lean too heavily on the very code we're testing.
+ */
+static LIST_HEAD(ptr_list);
+struct shadow_ptr {
+ void *ptr;
+ int id;
+ struct list_head list;
+};
+
+static void free_ptr_list(void)
+{
+ struct shadow_ptr *sp, *tmp_sp;
+
+ list_for_each_entry_safe(sp, tmp_sp, &ptr_list, list) {
+ list_del(&sp->list);
+ kfree(sp);
+ }
+}
+
+static int ptr_id(void *ptr)
+{
+ struct shadow_ptr *sp;
+ static int count;
+
+ list_for_each_entry(sp, &ptr_list, list) {
+ if (sp->ptr == ptr)
+ return sp->id;
+ }
+
+ sp = kmalloc(sizeof(*sp), GFP_ATOMIC);
+ if (!sp)
+ return -ENOMEM;
+ sp->ptr = ptr;
+ sp->id = count++;
+
+ list_add(&sp->list, &ptr_list);
+
+ return sp->id;
+}
+
+/*
+ * Shadow variable wrapper functions that echo the function and arguments
+ * to the kernel log for testing verification. Don't display raw pointers,
+ * but use the ptr_id() value instead.
+ */
+static void *shadow_get(void *obj, unsigned long id)
+{
+ int **sv;
+
+ sv = klp_shadow_get(obj, id);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n",
+ __func__, ptr_id(obj), id, ptr_id(sv));
+
+ return sv;
+}
+
+static void *shadow_alloc(void *obj, unsigned long id, size_t size,
+ gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+ void *ctor_data)
+{
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, var);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+ __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
+}
+
+static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size,
+ gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+ void *ctor_data)
+{
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, var);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+ __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
+}
+
+static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
+{
+ klp_shadow_free(obj, id, dtor);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, dtor=PTR%d)\n",
+ __func__, ptr_id(obj), id, ptr_id(dtor));
+}
+
+static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
+{
+ klp_shadow_free_all(id, dtor);
+ pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n", __func__, id, ptr_id(dtor));
+}
+
+
+/* Shadow variable constructor - remember simple pointer data */
+static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data)
+{
+ int **sv = shadow_data;
+ int **var = ctor_data;
+
+ if (!var)
+ return -EINVAL;
+
+ *sv = *var;
+ pr_info("%s: PTR%d -> PTR%d\n", __func__, ptr_id(sv), ptr_id(*var));
+
+ return 0;
+}
+
+/*
+ * With more than one item to free in the list, order is not determined and
+ * shadow_dtor will not be passed to shadow_free_all() which would make the
+ * test fail. (see pass 6)
+ */
+static void shadow_dtor(void *obj, void *shadow_data)
+{
+ int **sv = shadow_data;
+
+ pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n",
+ __func__, ptr_id(obj), ptr_id(sv));
+}
+
+/* number of objects we simulate that need shadow vars */
+#define NUM_OBJS 3
+
+/* dynamically created obj fields have the following shadow var id values */
+#define SV_ID1 0x1234
+#define SV_ID2 0x1235
+
+/*
+ * The main test case adds/removes new fields (shadow var) to each of these
+ * test structure instances. The last group of fields in the struct represent
+ * the idea that shadow variables may be added and removed to and from the
+ * struct during execution.
+ */
+struct test_object {
+ /* add anything here below and avoid to define an empty struct */
+ struct shadow_ptr sp;
+
+ /* these represent shadow vars added and removed with SV_ID{1,2} */
+ /* char nfield1; */
+ /* int nfield2; */
+};
+
+static int test_klp_shadow_vars_init(void)
+{
+ struct test_object objs[NUM_OBJS];
+ char nfields1[NUM_OBJS], *pnfields1[NUM_OBJS], **sv1[NUM_OBJS];
+ char *pndup[NUM_OBJS];
+ int nfields2[NUM_OBJS], *pnfields2[NUM_OBJS], **sv2[NUM_OBJS];
+ void **sv;
+ int ret;
+ int i;
+
+ ptr_id(NULL);
+
+ /*
+ * With an empty shadow variable hash table, expect not to find
+ * any matches.
+ */
+ sv = shadow_get(&objs[0], SV_ID1);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+
+ /* pass 1: init & alloc a char+int pair of svars for each objs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ pnfields1[i] = &nfields1[i];
+ ptr_id(pnfields1[i]);
+
+ if (i % 2) {
+ sv1[i] = shadow_alloc(&objs[i], SV_ID1,
+ sizeof(pnfields1[i]), GFP_KERNEL,
+ shadow_ctor, &pnfields1[i]);
+ } else {
+ sv1[i] = shadow_get_or_alloc(&objs[i], SV_ID1,
+ sizeof(pnfields1[i]), GFP_KERNEL,
+ shadow_ctor, &pnfields1[i]);
+ }
+ if (!sv1[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pnfields2[i] = &nfields2[i];
+ ptr_id(pnfields2[i]);
+ sv2[i] = shadow_alloc(&objs[i], SV_ID2, sizeof(pnfields2[i]),
+ GFP_KERNEL, shadow_ctor, &pnfields2[i]);
+ if (!sv2[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* pass 2: verify we find allocated svars and where they point to */
+ for (i = 0; i < NUM_OBJS; i++) {
+ /* check the "char" svar for all objects */
+ sv = shadow_get(&objs[i], SV_ID1);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv1[i]), ptr_id(*sv1[i]));
+
+ /* check the "int" svar for all objects */
+ sv = shadow_get(&objs[i], SV_ID2);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv2[i]), ptr_id(*sv2[i]));
+ }
+
+ /* pass 3: verify that 'get_or_alloc' returns already allocated svars */
+ for (i = 0; i < NUM_OBJS; i++) {
+ pndup[i] = &nfields1[i];
+ ptr_id(pndup[i]);
+
+ sv = shadow_get_or_alloc(&objs[i], SV_ID1, sizeof(pndup[i]),
+ GFP_KERNEL, shadow_ctor, &pndup[i]);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv1[i]), ptr_id(*sv1[i]));
+ }
+
+ /* pass 4: free <objs[*], SV_ID1> pairs of svars, verify removal */
+ for (i = 0; i < NUM_OBJS; i++) {
+ shadow_free(&objs[i], SV_ID1, shadow_dtor); /* 'char' pairs */
+ sv = shadow_get(&objs[i], SV_ID1);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+ }
+
+ /* pass 5: check we still find <objs[*], SV_ID2> svar pairs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ sv = shadow_get(&objs[i], SV_ID2); /* 'int' pairs */
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv2[i]), ptr_id(*sv2[i]));
+ }
+
+ /* pass 6: free all the <objs[*], SV_ID2> svar pairs too. */
+ shadow_free_all(SV_ID2, NULL); /* 'int' pairs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ sv = shadow_get(&objs[i], SV_ID2);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+ }
+
+ free_ptr_list();
+
+ return 0;
+out:
+ shadow_free_all(SV_ID1, NULL); /* 'char' pairs */
+ shadow_free_all(SV_ID2, NULL); /* 'int' pairs */
+ free_ptr_list();
+
+ return ret;
+}
+
+static void test_klp_shadow_vars_exit(void)
+{
+}
+
+module_init(test_klp_shadow_vars_init);
+module_exit(test_klp_shadow_vars_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: shadow variables");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
new file mode 100644
index 000000000000..57a4253acb01
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/livepatch.h>
+
+#define CONSOLE_LOGLEVEL_STATE 1
+/* Version 1 does not support migration. */
+#define CONSOLE_LOGLEVEL_STATE_VERSION 1
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+static struct klp_patch patch;
+
+static int allocate_loglevel_state(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return -EINVAL;
+
+ loglevel_state->data = kzalloc(sizeof(console_loglevel), GFP_KERNEL);
+ if (!loglevel_state->data)
+ return -ENOMEM;
+
+ pr_info("%s: allocating space to store console_loglevel\n",
+ __func__);
+ return 0;
+}
+
+static void fix_console_loglevel(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: fixing console_loglevel\n", __func__);
+ *(int *)loglevel_state->data = console_loglevel;
+ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+}
+
+static void restore_console_loglevel(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: restoring console_loglevel\n", __func__);
+ console_loglevel = *(int *)loglevel_state->data;
+}
+
+static void free_loglevel_state(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: freeing space for the stored console_loglevel\n",
+ __func__);
+ kfree(loglevel_state->data);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return allocate_loglevel_state();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ fix_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ restore_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ free_loglevel_state();
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_state states[] = {
+ {
+ .id = CONSOLE_LOGLEVEL_STATE,
+ .version = CONSOLE_LOGLEVEL_STATE_VERSION,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ .states = states,
+ .replace = true,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Petr Mladek <pmladek@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: system state modification");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
new file mode 100644
index 000000000000..c978ea4d5e67
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/livepatch.h>
+
+#define CONSOLE_LOGLEVEL_STATE 1
+/* Version 2 supports migration. */
+#define CONSOLE_LOGLEVEL_STATE_VERSION 2
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+static struct klp_patch patch;
+
+static int allocate_loglevel_state(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: space to store console_loglevel already allocated\n",
+ __func__);
+ return 0;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return -EINVAL;
+
+ loglevel_state->data = kzalloc(sizeof(console_loglevel), GFP_KERNEL);
+ if (!loglevel_state->data)
+ return -ENOMEM;
+
+ pr_info("%s: allocating space to store console_loglevel\n",
+ __func__);
+ return 0;
+}
+
+static void fix_console_loglevel(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: taking over the console_loglevel change\n",
+ __func__);
+ loglevel_state->data = prev_loglevel_state->data;
+ return;
+ }
+
+ pr_info("%s: fixing console_loglevel\n", __func__);
+ *(int *)loglevel_state->data = console_loglevel;
+ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+}
+
+static void restore_console_loglevel(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: passing the console_loglevel change back to the old livepatch\n",
+ __func__);
+ return;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: restoring console_loglevel\n", __func__);
+ console_loglevel = *(int *)loglevel_state->data;
+}
+
+static void free_loglevel_state(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: keeping space to store console_loglevel\n",
+ __func__);
+ return;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: freeing space for the stored console_loglevel\n",
+ __func__);
+ kfree(loglevel_state->data);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return allocate_loglevel_state();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ fix_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ restore_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ free_loglevel_state();
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_state states[] = {
+ {
+ .id = CONSOLE_LOGLEVEL_STATE,
+ .version = CONSOLE_LOGLEVEL_STATE_VERSION,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ .states = states,
+ .replace = true,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Petr Mladek <pmladek@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: system state modification");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
new file mode 100644
index 000000000000..9226579d10c5
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+/* The console loglevel fix is the same in the next cumulative patch. */
+#include "test_klp_state2.c"
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
new file mode 100644
index 000000000000..dd802783ea84
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Nicolai Stange <nstange@suse.de>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/livepatch.h>
+
+#if defined(__x86_64__)
+#define FN_PREFIX __x64_
+#elif defined(__s390x__)
+#define FN_PREFIX __s390x_
+#elif defined(__aarch64__)
+#define FN_PREFIX __arm64_
+#else
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER */
+#define FN_PREFIX
+#endif
+
+/* Protects klp_pids */
+static DEFINE_MUTEX(kpid_mutex);
+
+static unsigned int npids, npids_pending;
+static int klp_pids[NR_CPUS];
+module_param_array(klp_pids, int, &npids_pending, 0);
+MODULE_PARM_DESC(klp_pids, "Array of pids to be transitioned to livepatched state.");
+
+static ssize_t npids_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", npids_pending);
+}
+
+static struct kobj_attribute klp_attr = __ATTR_RO(npids);
+static struct kobject *klp_kobj;
+
+static asmlinkage long lp_sys_getpid(void)
+{
+ int i;
+
+ mutex_lock(&kpid_mutex);
+ if (npids_pending > 0) {
+ for (i = 0; i < npids; i++) {
+ if (current->pid == klp_pids[i]) {
+ klp_pids[i] = 0;
+ npids_pending--;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&kpid_mutex);
+
+ return task_tgid_vnr(current);
+}
+
+static struct klp_func vmlinux_funcs[] = {
+ {
+ .old_name = __stringify(FN_PREFIX) "sys_getpid",
+ .new_func = lp_sys_getpid,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = vmlinux_funcs,
+ }, {}
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int livepatch_init(void)
+{
+ int ret;
+
+ klp_kobj = kobject_create_and_add("test_klp_syscall", kernel_kobj);
+ if (!klp_kobj)
+ return -ENOMEM;
+
+ ret = sysfs_create_file(klp_kobj, &klp_attr.attr);
+ if (ret) {
+ kobject_put(klp_kobj);
+ return ret;
+ }
+
+ /*
+ * Save the number pids to transition to livepatched state before the
+ * number of pending pids is decremented.
+ */
+ npids = npids_pending;
+
+ return klp_enable_patch(&patch);
+}
+
+static void livepatch_exit(void)
+{
+ kobject_put(klp_kobj);
+}
+
+module_init(livepatch_init);
+module_exit(livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Libor Pechacek <lpechacek@suse.cz>");
+MODULE_AUTHOR("Nicolai Stange <nstange@suse.de>");
+MODULE_AUTHOR("Marcos Paulo de Souza <mpdesouza@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: syscall transition");
diff --git a/tools/testing/selftests/liveupdate/.gitignore b/tools/testing/selftests/liveupdate/.gitignore
new file mode 100644
index 000000000000..661827083ab6
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/.gitignore
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*
+!/**/
+!*.c
+!*.h
+!*.sh
+!.gitignore
+!config
+!Makefile
diff --git a/tools/testing/selftests/liveupdate/Makefile b/tools/testing/selftests/liveupdate/Makefile
new file mode 100644
index 000000000000..080754787ede
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+LIB_C += luo_test_utils.c
+
+TEST_GEN_PROGS += liveupdate
+
+TEST_GEN_PROGS_EXTENDED += luo_kexec_simple
+TEST_GEN_PROGS_EXTENDED += luo_multi_session
+
+TEST_FILES += do_kexec.sh
+
+include ../lib.mk
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -MD
+
+LIB_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIB_C))
+TEST_O := $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_O += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+
+TEST_DEP_FILES := $(patsubst %.o, %.d, $(LIB_O))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(TEST_O))
+-include $(TEST_DEP_FILES)
+
+$(LIB_O): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/%: %.o $(LIB_O)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIB_O) $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIB_O)
+EXTRA_CLEAN += $(TEST_O)
+EXTRA_CLEAN += $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/liveupdate/config b/tools/testing/selftests/liveupdate/config
new file mode 100644
index 000000000000..91d03f9a6a39
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/config
@@ -0,0 +1,11 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_ENABLE_DEFAULT=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
+CONFIG_KEXEC_HANDOVER_DEBUG=y
+CONFIG_LIVEUPDATE=y
+CONFIG_LIVEUPDATE_TEST=y
+CONFIG_MEMFD_CREATE=y
+CONFIG_TMPFS=y
+CONFIG_SHMEM=y
diff --git a/tools/testing/selftests/liveupdate/do_kexec.sh b/tools/testing/selftests/liveupdate/do_kexec.sh
new file mode 100755
index 000000000000..3c7c6cafbef8
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/do_kexec.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+# Use $KERNEL and $INITRAMFS to pass custom Kernel and optional initramfs
+
+KERNEL="${KERNEL:-/boot/bzImage}"
+set -- -l -s --reuse-cmdline "$KERNEL"
+
+INITRAMFS="${INITRAMFS:-/boot/initramfs}"
+if [ -f "$INITRAMFS" ]; then
+ set -- "$@" --initrd="$INITRAMFS"
+fi
+
+kexec "$@"
+kexec -e
diff --git a/tools/testing/selftests/liveupdate/liveupdate.c b/tools/testing/selftests/liveupdate/liveupdate.c
new file mode 100644
index 000000000000..c2878e3d5ef9
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/liveupdate.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/*
+ * Selftests for the Live Update Orchestrator.
+ * This test suite verifies the functionality and behavior of the
+ * /dev/liveupdate character device and its session management capabilities.
+ *
+ * Tests include:
+ * - Device access: basic open/close, and enforcement of exclusive access.
+ * - Session management: creation of unique sessions, and duplicate name detection.
+ * - Resource preservation: successfully preserving individual and multiple memfds,
+ * verifying contents remain accessible.
+ * - Complex multi-session scenarios involving mixed empty and populated files.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <linux/liveupdate.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define LIVEUPDATE_DEV "/dev/liveupdate"
+
+FIXTURE(liveupdate_device) {
+ int fd1;
+ int fd2;
+};
+
+FIXTURE_SETUP(liveupdate_device)
+{
+ self->fd1 = -1;
+ self->fd2 = -1;
+}
+
+FIXTURE_TEARDOWN(liveupdate_device)
+{
+ if (self->fd1 >= 0)
+ close(self->fd1);
+ if (self->fd2 >= 0)
+ close(self->fd2);
+}
+
+/*
+ * Test Case: Basic Open and Close
+ *
+ * Verifies that the /dev/liveupdate device can be opened and subsequently
+ * closed without errors. Skips if the device does not exist.
+ */
+TEST_F(liveupdate_device, basic_open_close)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ ASSERT_EQ(close(self->fd1), 0);
+ self->fd1 = -1;
+}
+
+/*
+ * Test Case: Exclusive Open Enforcement
+ *
+ * Verifies that the /dev/liveupdate device can only be opened by one process
+ * at a time. It checks that a second attempt to open the device fails with
+ * the EBUSY error code.
+ */
+TEST_F(liveupdate_device, exclusive_open)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ self->fd2 = open(LIVEUPDATE_DEV, O_RDWR);
+ EXPECT_LT(self->fd2, 0);
+ EXPECT_EQ(errno, EBUSY);
+}
+
+/* Helper function to create a LUO session via ioctl. */
+static int create_session(int lu_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session args = {};
+
+ args.size = sizeof(args);
+ strncpy((char *)args.name, name, sizeof(args.name) - 1);
+
+ if (ioctl(lu_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &args))
+ return -errno;
+
+ return args.fd;
+}
+
+/*
+ * Test Case: Create Duplicate Session
+ *
+ * Verifies that attempting to create two sessions with the same name fails
+ * on the second attempt with EEXIST.
+ */
+TEST_F(liveupdate_device, create_duplicate_session)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "duplicate-session-test");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "duplicate-session-test");
+ EXPECT_LT(session_fd2, 0);
+ EXPECT_EQ(-session_fd2, EEXIST);
+
+ ASSERT_EQ(close(session_fd1), 0);
+}
+
+/*
+ * Test Case: Create Distinct Sessions
+ *
+ * Verifies that creating two sessions with different names succeeds.
+ */
+TEST_F(liveupdate_device, create_distinct_sessions)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "distinct-session-1");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "distinct-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+static int preserve_fd(int session_fd, int fd_to_preserve, __u64 token)
+{
+ struct liveupdate_session_preserve_fd args = {};
+
+ args.size = sizeof(args);
+ args.fd = fd_to_preserve;
+ args.token = token;
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &args))
+ return -errno;
+
+ return 0;
+}
+
+/*
+ * Test Case: Preserve MemFD
+ *
+ * Verifies that a valid memfd can be successfully preserved in a session and
+ * that its contents remain intact after the preservation call.
+ */
+TEST_F(liveupdate_device, preserve_memfd)
+{
+ const char *test_str = "hello liveupdate";
+ char read_buf[64] = {};
+ int session_fd, mem_fd;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd = memfd_create("test-memfd", 0);
+ ASSERT_GE(mem_fd, 0);
+
+ ASSERT_EQ(write(mem_fd, test_str, strlen(test_str)), strlen(test_str));
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd, 0x1234), 0);
+ ASSERT_EQ(close(session_fd), 0);
+
+ ASSERT_EQ(lseek(mem_fd, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd, read_buf, sizeof(read_buf)), strlen(test_str));
+ ASSERT_STREQ(read_buf, test_str);
+ ASSERT_EQ(close(mem_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Multiple MemFDs
+ *
+ * Verifies that multiple memfds can be preserved in a single session,
+ * each with a unique token, and that their contents remain distinct and
+ * correct after preservation.
+ */
+TEST_F(liveupdate_device, preserve_multiple_memfds)
+{
+ const char *test_str1 = "data for memfd one";
+ const char *test_str2 = "data for memfd two";
+ char read_buf[64] = {};
+ int session_fd, mem_fd1, mem_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-multi-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd1 = memfd_create("test-memfd-1", 0);
+ ASSERT_GE(mem_fd1, 0);
+ mem_fd2 = memfd_create("test-memfd-2", 0);
+ ASSERT_GE(mem_fd2, 0);
+
+ ASSERT_EQ(write(mem_fd1, test_str1, strlen(test_str1)), strlen(test_str1));
+ ASSERT_EQ(write(mem_fd2, test_str2, strlen(test_str2)), strlen(test_str2));
+
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd1, 0xAAAA), 0);
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd2, 0xBBBB), 0);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd1, read_buf, sizeof(read_buf)), strlen(test_str1));
+ ASSERT_STREQ(read_buf, test_str1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd2, read_buf, sizeof(read_buf)), strlen(test_str2));
+ ASSERT_STREQ(read_buf, test_str2);
+
+ ASSERT_EQ(close(mem_fd1), 0);
+ ASSERT_EQ(close(mem_fd2), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Complex Scenario
+ *
+ * Verifies a more complex scenario with multiple sessions and a mix of empty
+ * and non-empty memfds distributed across them.
+ */
+TEST_F(liveupdate_device, preserve_complex_scenario)
+{
+ const char *data1 = "data for session 1";
+ const char *data2 = "data for session 2";
+ char read_buf[64] = {};
+ int session_fd1, session_fd2;
+ int mem_fd_data1, mem_fd_empty1, mem_fd_data2, mem_fd_empty2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "complex-session-1");
+ ASSERT_GE(session_fd1, 0);
+ session_fd2 = create_session(self->fd1, "complex-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ mem_fd_data1 = memfd_create("data1", 0);
+ ASSERT_GE(mem_fd_data1, 0);
+ ASSERT_EQ(write(mem_fd_data1, data1, strlen(data1)), strlen(data1));
+
+ mem_fd_empty1 = memfd_create("empty1", 0);
+ ASSERT_GE(mem_fd_empty1, 0);
+
+ mem_fd_data2 = memfd_create("data2", 0);
+ ASSERT_GE(mem_fd_data2, 0);
+ ASSERT_EQ(write(mem_fd_data2, data2, strlen(data2)), strlen(data2));
+
+ mem_fd_empty2 = memfd_create("empty2", 0);
+ ASSERT_GE(mem_fd_empty2, 0);
+
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_data1, 0x1111), 0);
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_empty1, 0x2222), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_data2, 0x3333), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_empty2, 0x4444), 0);
+
+ ASSERT_EQ(lseek(mem_fd_data1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data1, read_buf, sizeof(read_buf)), strlen(data1));
+ ASSERT_STREQ(read_buf, data1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd_data2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data2, read_buf, sizeof(read_buf)), strlen(data2));
+ ASSERT_STREQ(read_buf, data2);
+
+ ASSERT_EQ(lseek(mem_fd_empty1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty1, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(lseek(mem_fd_empty2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty2, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(close(mem_fd_data1), 0);
+ ASSERT_EQ(close(mem_fd_empty1), 0);
+ ASSERT_EQ(close(mem_fd_data2), 0);
+ ASSERT_EQ(close(mem_fd_empty2), 0);
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+/*
+ * Test Case: Preserve Unsupported File Descriptor
+ *
+ * Verifies that attempting to preserve a file descriptor that does not have
+ * a registered Live Update handler fails gracefully.
+ * Uses /dev/null as a representative of a file type (character device)
+ * that is not supported by the orchestrator.
+ */
+TEST_F(liveupdate_device, preserve_unsupported_fd)
+{
+ int session_fd, unsupported_fd;
+ int ret;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "unsupported-fd-test");
+ ASSERT_GE(session_fd, 0);
+
+ unsupported_fd = open("/dev/null", O_RDWR);
+ ASSERT_GE(unsupported_fd, 0);
+
+ ret = preserve_fd(session_fd, unsupported_fd, 0xDEAD);
+ EXPECT_EQ(ret, -ENOENT);
+
+ ASSERT_EQ(close(unsupported_fd), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/liveupdate/luo_kexec_simple.c b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
new file mode 100644
index 000000000000..d7ac1f3dc4cb
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A simple selftest to validate the end-to-end lifecycle of a LUO session
+ * across a single kexec reboot.
+ */
+
+#include "luo_test_utils.h"
+
+#define TEST_SESSION_NAME "test-session"
+#define TEST_MEMFD_TOKEN 0x1A
+#define TEST_MEMFD_DATA "hello kexec world"
+
+/* Constants for the state-tracking mechanism, specific to this test file. */
+#define STATE_SESSION_NAME "kexec_simple_state"
+#define STATE_MEMFD_TOKEN 999
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int session_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' and preserving memfd...\n",
+ TEST_SESSION_NAME);
+ session_fd = luo_create_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_create_session for '%s'", TEST_SESSION_NAME);
+
+ if (create_and_preserve_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ TEST_MEMFD_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int session_fd, mfd, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2)
+ fail_exit("Expected stage 2, but state file contains %d", stage);
+
+ ksft_print_msg("[STAGE 2] Retrieving session '%s'...\n", TEST_SESSION_NAME);
+ session_fd = luo_retrieve_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", TEST_SESSION_NAME);
+
+ ksft_print_msg("[STAGE 2] Restoring and verifying memfd (token %#x)...\n",
+ TEST_MEMFD_TOKEN);
+ mfd = restore_and_verify_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA);
+ if (mfd < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", TEST_MEMFD_TOKEN);
+ close(mfd);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+ ksft_print_msg("[STAGE 2] Finalizing test session...\n");
+ if (luo_session_finish(session_fd) < 0)
+ fail_exit("luo_session_finish for test session");
+ close(session_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- SIMPLE KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_multi_session.c b/tools/testing/selftests/liveupdate/luo_multi_session.c
new file mode 100644
index 000000000000..0ee2d795beef
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_multi_session.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A selftest to validate the end-to-end lifecycle of multiple LUO sessions
+ * across a kexec reboot, including empty sessions and sessions with multiple
+ * files.
+ */
+
+#include "luo_test_utils.h"
+
+#define SESSION_EMPTY_1 "multi-test-empty-1"
+#define SESSION_EMPTY_2 "multi-test-empty-2"
+#define SESSION_FILES_1 "multi-test-files-1"
+#define SESSION_FILES_2 "multi-test-files-2"
+
+#define MFD1_TOKEN 0x1001
+#define MFD2_TOKEN 0x2002
+#define MFD3_TOKEN 0x3003
+
+#define MFD1_DATA "Data for session files 1"
+#define MFD2_DATA "First file for session files 2"
+#define MFD3_DATA "Second file for session files 2"
+
+#define STATE_SESSION_NAME "kexec_multi_state"
+#define STATE_MEMFD_TOKEN 998
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup for multi-session test...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating empty sessions '%s' and '%s'...\n",
+ SESSION_EMPTY_1, SESSION_EMPTY_2);
+ s_empty1_fd = luo_create_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_create_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with one memfd...\n",
+ SESSION_FILES_1);
+
+ s_files1_fd = luo_create_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_1);
+ if (create_and_preserve_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD1_TOKEN);
+ }
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with two memfds...\n",
+ SESSION_FILES_2);
+
+ s_files2_fd = luo_create_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_2);
+ if (create_and_preserve_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD2_TOKEN);
+ }
+ if (create_and_preserve_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD3_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+ int mfd1, mfd2, mfd3, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2) {
+ fail_exit("Expected stage 2, but state file contains %d",
+ stage);
+ }
+
+ ksft_print_msg("[STAGE 2] Retrieving all sessions...\n");
+ s_empty1_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_2);
+
+ s_files1_fd = luo_retrieve_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_1);
+
+ s_files2_fd = luo_retrieve_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_2);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_1);
+ mfd1 = restore_and_verify_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA);
+ if (mfd1 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD1_TOKEN);
+ close(mfd1);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_2);
+
+ mfd2 = restore_and_verify_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA);
+ if (mfd2 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD2_TOKEN);
+ close(mfd2);
+
+ mfd3 = restore_and_verify_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA);
+ if (mfd3 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD3_TOKEN);
+ close(mfd3);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+
+ ksft_print_msg("[STAGE 2] Finalizing all test sessions...\n");
+ if (luo_session_finish(s_empty1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_1);
+ close(s_empty1_fd);
+
+ if (luo_session_finish(s_empty2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_2);
+ close(s_empty2_fd);
+
+ if (luo_session_finish(s_files1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_1);
+ close(s_files1_fd);
+
+ if (luo_session_finish(s_files2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_2);
+ close(s_files2_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- MULTI-SESSION KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.c b/tools/testing/selftests/liveupdate/luo_test_utils.c
new file mode 100644
index 000000000000..3c8721c505df
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#include "luo_test_utils.h"
+
+int luo_open_device(void)
+{
+ return open(LUO_DEVICE, O_RDWR);
+}
+
+int luo_create_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int luo_retrieve_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_retrieve_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_RETRIEVE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data)
+{
+ struct liveupdate_session_preserve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ mfd = memfd_create("test_mfd", 0);
+ if (mfd < 0)
+ return -errno;
+
+ if (ftruncate(mfd, page_size) != 0)
+ goto out;
+
+ map = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ snprintf(map, page_size, "%s", data);
+ munmap(map, page_size);
+
+ arg.fd = mfd;
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &arg) < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret != 0 && errno != 0)
+ ret = -errno;
+ if (mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int restore_and_verify_memfd(int session_fd, int token,
+ const char *expected_data)
+{
+ struct liveupdate_session_retrieve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_RETRIEVE_FD, &arg) < 0)
+ return -errno;
+ mfd = arg.fd;
+
+ map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ if (expected_data && strcmp(expected_data, map) != 0) {
+ ksft_print_msg("Data mismatch! Expected '%s', Got '%s'\n",
+ expected_data, (char *)map);
+ ret = -EINVAL;
+ goto out_munmap;
+ }
+
+ ret = mfd;
+out_munmap:
+ munmap(map, page_size);
+out:
+ if (ret < 0 && errno != 0)
+ ret = -errno;
+ if (ret < 0 && mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int luo_session_finish(int session_fd)
+{
+ struct liveupdate_session_finish arg = { .size = sizeof(arg) };
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_FINISH, &arg) < 0)
+ return -errno;
+
+ return 0;
+}
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage)
+{
+ char buf[32];
+ int state_session_fd;
+
+ state_session_fd = luo_create_session(luo_fd, session_name);
+ if (state_session_fd < 0)
+ fail_exit("luo_create_session for state tracking");
+
+ snprintf(buf, sizeof(buf), "%d", next_stage);
+ if (create_and_preserve_memfd(state_session_fd, token, buf) < 0)
+ fail_exit("create_and_preserve_memfd for state tracking");
+
+ /*
+ * DO NOT close session FD, otherwise it is going to be unpreserved
+ */
+}
+
+void restore_and_read_stage(int state_session_fd, int token, int *stage)
+{
+ char buf[32] = {0};
+ int mfd;
+
+ mfd = restore_and_verify_memfd(state_session_fd, token, NULL);
+ if (mfd < 0)
+ fail_exit("failed to restore state memfd");
+
+ if (read(mfd, buf, sizeof(buf) - 1) < 0)
+ fail_exit("failed to read state mfd");
+
+ *stage = atoi(buf);
+
+ close(mfd);
+}
+
+void daemonize_and_wait(void)
+{
+ pid_t pid;
+
+ ksft_print_msg("[STAGE 1] Forking persistent child to hold sessions...\n");
+
+ pid = fork();
+ if (pid < 0)
+ fail_exit("fork failed");
+
+ if (pid > 0) {
+ ksft_print_msg("[STAGE 1] Child PID: %d. Resources are pinned.\n", pid);
+ ksft_print_msg("[STAGE 1] You may now perform kexec reboot.\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Detach from terminal so closing the window doesn't kill us */
+ if (setsid() < 0)
+ fail_exit("setsid failed");
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Change dir to root to avoid locking filesystems */
+ if (chdir("/") < 0)
+ exit(EXIT_FAILURE);
+
+ while (1)
+ sleep(60);
+}
+
+static int parse_stage_args(int argc, char *argv[])
+{
+ static struct option long_options[] = {
+ {"stage", required_argument, 0, 's'},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ int stage = 1;
+ int opt;
+
+ optind = 1;
+ while ((opt = getopt_long(argc, argv, "s:", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 's':
+ stage = atoi(optarg);
+ if (stage != 1 && stage != 2)
+ fail_exit("Invalid stage argument");
+ break;
+ default:
+ fail_exit("Unknown argument");
+ }
+ }
+ return stage;
+}
+
+int luo_test(int argc, char *argv[],
+ const char *state_session_name,
+ luo_test_stage1_fn stage1,
+ luo_test_stage2_fn stage2)
+{
+ int target_stage = parse_stage_args(argc, argv);
+ int luo_fd = luo_open_device();
+ int state_session_fd;
+ int detected_stage;
+
+ if (luo_fd < 0) {
+ ksft_exit_skip("Failed to open %s. Is the luo module loaded?\n",
+ LUO_DEVICE);
+ }
+
+ state_session_fd = luo_retrieve_session(luo_fd, state_session_name);
+ if (state_session_fd == -ENOENT)
+ detected_stage = 1;
+ else if (state_session_fd >= 0)
+ detected_stage = 2;
+ else
+ fail_exit("Failed to check for state session");
+
+ if (target_stage != detected_stage) {
+ ksft_exit_fail_msg("Stage mismatch Requested --stage %d, but system is in stage %d.\n"
+ "(State session %s: %s)\n",
+ target_stage, detected_stage, state_session_name,
+ (detected_stage == 2) ? "EXISTS" : "MISSING");
+ }
+
+ if (target_stage == 1)
+ stage1(luo_fd);
+ else
+ stage2(luo_fd, state_session_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.h b/tools/testing/selftests/liveupdate/luo_test_utils.h
new file mode 100644
index 000000000000..90099bf49577
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Utility functions for LUO kselftests.
+ */
+
+#ifndef LUO_TEST_UTILS_H
+#define LUO_TEST_UTILS_H
+
+#include <errno.h>
+#include <string.h>
+#include <linux/liveupdate.h>
+#include "../kselftest.h"
+
+#define LUO_DEVICE "/dev/liveupdate"
+
+#define fail_exit(fmt, ...) \
+ ksft_exit_fail_msg("[%s:%d] " fmt " (errno: %s)\n", \
+ __func__, __LINE__, ##__VA_ARGS__, strerror(errno))
+
+int luo_open_device(void);
+int luo_create_session(int luo_fd, const char *name);
+int luo_retrieve_session(int luo_fd, const char *name);
+int luo_session_finish(int session_fd);
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data);
+int restore_and_verify_memfd(int session_fd, int token, const char *expected_data);
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage);
+void restore_and_read_stage(int state_session_fd, int token, int *stage);
+
+void daemonize_and_wait(void);
+
+typedef void (*luo_test_stage1_fn)(int luo_fd);
+typedef void (*luo_test_stage2_fn)(int luo_fd, int state_session_fd);
+
+int luo_test(int argc, char *argv[], const char *state_session_name,
+ luo_test_stage1_fn stage1, luo_test_stage2_fn stage2);
+
+#endif /* LUO_TEST_UTILS_H */
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
index f26212605b6b..d4b0be857deb 100644
--- a/tools/testing/selftests/lkdtm/.gitignore
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -1,2 +1,3 @@
*.sh
!run.sh
+!stack-entropy.sh
diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile
index 1bcc9ee990eb..c71109ceeb2d 100644
--- a/tools/testing/selftests/lkdtm/Makefile
+++ b/tools/testing/selftests/lkdtm/Makefile
@@ -5,6 +5,7 @@ include ../lib.mk
# NOTE: $(OUTPUT) won't get default value if used before lib.mk
TEST_FILES := tests.txt
+TEST_PROGS := stack-entropy.sh
TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//'))
all: $(TEST_GEN_PROGS)
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index d874990e442b..bd09fdaf53e0 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -1 +1,14 @@
CONFIG_LKDTM=y
+CONFIG_DEBUG_LIST=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_KSTACK_ERASE=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+CONFIG_INIT_ON_FREE_DEFAULT_ON=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_STACKPROTECTOR_STRONG=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_SLUB_DEBUG_ON=y
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index bb7a1775307b..95e904959207 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -56,8 +56,14 @@ if echo "$test" | grep -q '^#' ; then
fi
# If no expected output given, assume an Oops with back trace is success.
+repeat=1
if [ -z "$expect" ]; then
expect="call trace:"
+else
+ if echo "$expect" | grep -q '^repeat:' ; then
+ repeat=$(echo "$expect" | cut -d' ' -f1 | cut -d: -f2)
+ expect=$(echo "$expect" | cut -d' ' -f2-)
+ fi
fi
# Prepare log for report checking
@@ -76,10 +82,16 @@ fi
# Save existing dmesg so we can detect new content below
dmesg > "$DMESG"
-# Most shells yell about signals and we're expecting the "cat" process
-# to usually be killed by the kernel. So we have to run it in a sub-shell
-# and silence errors.
-($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
+# Since the kernel is likely killing the process writing to the trigger
+# file, it must not be the script's shell itself. i.e. we cannot do:
+# echo "$test" >"$TRIGGER"
+# Instead, use "cat" to take the signal. Since the shell will yell about
+# the signal that killed the subprocess, we must ignore the failure and
+# continue. However we don't silence stderr since there might be other
+# useful details reported there in the case of other unexpected conditions.
+for i in $(seq 1 $repeat); do
+ echo "$test" | cat >"$TRIGGER" || true
+done
# Record and dump the results
dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
new file mode 100755
index 000000000000..14fedeef762e
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
+set -e
+samples="${1:-1000}"
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+# Capture dmesg continuously since it may fill up depending on sample size.
+log=$(mktemp -t stack-entropy-XXXXXX)
+dmesg --follow >"$log" & pid=$!
+report=-1
+for i in $(seq 1 $samples); do
+ echo "REPORT_STACK" > $TRIGGER
+ if [ -t 1 ]; then
+ percent=$(( 100 * $i / $samples ))
+ if [ "$percent" -ne "$report" ]; then
+ /bin/echo -en "$percent%\r"
+ report="$percent"
+ fi
+ fi
+done
+kill "$pid"
+
+# Count unique offsets since last run.
+seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \
+ grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l)
+bits=$(echo "obase=2; $seen" | bc | wc -L)
+echo "Bits of stack entropy: $bits"
+rm -f "$log"
+
+# We would expect any functional stack randomization to be at least 5 bits.
+if [ "$bits" -lt 5 ]; then
+ echo "Stack entropy is low! Booted without 'randomize_kstack_offset=y'?"
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 11ef159be0fd..cff124c1eddd 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,4 +1,5 @@
#PANIC
+#PANIC_STOP_IRQOFF Crashes entire system
BUG kernel BUG at
WARNING WARNING:
WARNING_MESSAGE message trigger
@@ -7,24 +8,30 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
+ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
-UNSET_SMEP CR4 bits went missing
+REPORT_STACK_CANARY repeat:2 ok: stack canaries differ
+UNSET_SMEP pinned CR4 bits changed:
DOUBLE_FAULT
CORRUPT_PAC
UNALIGNED_LOAD_STORE_WRITE
-#OVERWRITE_ALLOCATION Corrupts memory on failure
+SLAB_LINEAR_OVERFLOW
+VMALLOC_LINEAR_OVERFLOW
#WRITE_AFTER_FREE Corrupts memory on failure
-READ_AFTER_FREE
+READ_AFTER_FREE call trace:|Memory correctly poisoned
#WRITE_BUDDY_AFTER_FREE Corrupts memory on failure
-READ_BUDDY_AFTER_FREE
+READ_BUDDY_AFTER_FREE call trace:|Memory correctly poisoned
+SLAB_INIT_ON_ALLOC Memory appears initialized
+BUDDY_INIT_ON_ALLOC Memory appears initialized
SLAB_FREE_DOUBLE
SLAB_FREE_CROSS
SLAB_FREE_PAGE
#SOFTLOCKUP Hangs the system
#HARDLOCKUP Hangs the system
+#SMP_CALL_LOCKUP Hangs the system
#SPINLOCKUP Hangs the system
#HUNG_TASK Hangs the system
EXEC_DATA
@@ -39,6 +46,7 @@ ACCESS_NULL
WRITE_RO
WRITE_RO_AFTER_INIT
WRITE_KERN
+WRITE_OPD
REFCOUNT_INC_OVERFLOW
REFCOUNT_ADD_OVERFLOW
REFCOUNT_INC_NOT_ZERO_OVERFLOW
@@ -58,15 +66,20 @@ REFCOUNT_DEC_AND_TEST_SATURATED Saturation detected: still saturated
REFCOUNT_SUB_AND_TEST_SATURATED Saturation detected: still saturated
#REFCOUNT_TIMING timing only
#ATOMIC_TIMING timing only
-USERCOPY_HEAP_SIZE_TO
-USERCOPY_HEAP_SIZE_FROM
-USERCOPY_HEAP_WHITELIST_TO
-USERCOPY_HEAP_WHITELIST_FROM
+USERCOPY_SLAB_SIZE_TO
+USERCOPY_SLAB_SIZE_FROM
+USERCOPY_SLAB_WHITELIST_TO
+USERCOPY_SLAB_WHITELIST_FROM
USERCOPY_STACK_FRAME_TO
USERCOPY_STACK_FRAME_FROM
USERCOPY_STACK_BEYOND
USERCOPY_KERNEL
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
-FORTIFIED_STRSCPY
+CFI_BACKWARD call trace:|ok: control flow unchanged
+FORTIFY_STRSCPY detected buffer overflow
+FORTIFY_STR_OBJECT detected buffer overflow
+FORTIFY_STR_MEMBER detected buffer overflow
+FORTIFY_MEM_OBJECT detected buffer overflow
+FORTIFY_MEM_MEMBER detected field-spanning write
PPC_SLB_MULTIHIT Recovered
diff --git a/tools/testing/selftests/lsm/.gitignore b/tools/testing/selftests/lsm/.gitignore
new file mode 100644
index 000000000000..bd68f6c3fd07
--- /dev/null
+++ b/tools/testing/selftests/lsm/.gitignore
@@ -0,0 +1 @@
+/*_test
diff --git a/tools/testing/selftests/lsm/Makefile b/tools/testing/selftests/lsm/Makefile
new file mode 100644
index 000000000000..3f80c0bc093d
--- /dev/null
+++ b/tools/testing/selftests/lsm/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# First run: make -C ../../../.. headers_install
+
+CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
+LOCAL_HDRS += common.h
+
+TEST_GEN_PROGS := lsm_get_self_attr_test lsm_list_modules_test \
+ lsm_set_self_attr_test
+
+include ../lib.mk
+
+$(OUTPUT)/lsm_get_self_attr_test: lsm_get_self_attr_test.c common.c
+$(OUTPUT)/lsm_set_self_attr_test: lsm_set_self_attr_test.c common.c
+$(OUTPUT)/lsm_list_modules_test: lsm_list_modules_test.c common.c
+
+EXTRA_CLEAN = $(OUTPUT)/common.o
diff --git a/tools/testing/selftests/lsm/common.c b/tools/testing/selftests/lsm/common.c
new file mode 100644
index 000000000000..9ad258912646
--- /dev/null
+++ b/tools/testing/selftests/lsm/common.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ *
+ * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "common.h"
+
+#define PROCATTR "/proc/self/attr/"
+
+int read_proc_attr(const char *attr, char *value, size_t size)
+{
+ int fd;
+ int len;
+ char *path;
+
+ len = strlen(PROCATTR) + strlen(attr) + 1;
+ path = calloc(len, 1);
+ if (path == NULL)
+ return -1;
+ sprintf(path, "%s%s", PROCATTR, attr);
+
+ fd = open(path, O_RDONLY);
+ free(path);
+
+ if (fd < 0)
+ return -1;
+ len = read(fd, value, size);
+
+ close(fd);
+
+ /* Ensure value is terminated */
+ if (len <= 0 || len == size)
+ return -1;
+ value[len] = '\0';
+
+ path = strchr(value, '\n');
+ if (path)
+ *path = '\0';
+
+ return 0;
+}
+
+int read_sysfs_lsms(char *lsms, size_t size)
+{
+ FILE *fp;
+ size_t red;
+
+ fp = fopen("/sys/kernel/security/lsm", "r");
+ if (fp == NULL)
+ return -1;
+ red = fread(lsms, 1, size, fp);
+ fclose(fp);
+
+ if (red <= 0 || red == size)
+ return -1;
+ lsms[red] = '\0';
+ return 0;
+}
+
+int attr_lsm_count(void)
+{
+ char *names = calloc(sysconf(_SC_PAGESIZE), 1);
+ int count = 0;
+
+ if (!names)
+ return 0;
+
+ if (read_sysfs_lsms(names, sysconf(_SC_PAGESIZE)))
+ return 0;
+
+ if (strstr(names, "selinux"))
+ count++;
+ if (strstr(names, "smack"))
+ count++;
+ if (strstr(names, "apparmor"))
+ count++;
+
+ return count;
+}
diff --git a/tools/testing/selftests/lsm/common.h b/tools/testing/selftests/lsm/common.h
new file mode 100644
index 000000000000..06d12110d241
--- /dev/null
+++ b/tools/testing/selftests/lsm/common.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module infrastructure tests
+ *
+ * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#ifndef lsm_get_self_attr
+static inline int lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ __u32 *size, __u32 flags)
+{
+ return syscall(__NR_lsm_get_self_attr, attr, ctx, size, flags);
+}
+#endif
+
+#ifndef lsm_set_self_attr
+static inline int lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ __u32 size, __u32 flags)
+{
+ return syscall(__NR_lsm_set_self_attr, attr, ctx, size, flags);
+}
+#endif
+
+#ifndef lsm_list_modules
+static inline int lsm_list_modules(__u64 *ids, __u32 *size, __u32 flags)
+{
+ return syscall(__NR_lsm_list_modules, ids, size, flags);
+}
+#endif
+
+extern int read_proc_attr(const char *attr, char *value, size_t size);
+extern int read_sysfs_lsms(char *lsms, size_t size);
+int attr_lsm_count(void);
diff --git a/tools/testing/selftests/lsm/config b/tools/testing/selftests/lsm/config
new file mode 100644
index 000000000000..1c0c4c020f9c
--- /dev/null
+++ b/tools/testing/selftests/lsm/config
@@ -0,0 +1,3 @@
+CONFIG_SYSFS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
new file mode 100644
index 000000000000..60caf8528f81
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_get_self_attr system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kselftest_harness.h"
+#include "common.h"
+
+static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp)
+{
+ void *vp;
+
+ vp = (void *)ctxp + sizeof(*ctxp) + ctxp->ctx_len;
+ return (struct lsm_ctx *)vp;
+}
+
+TEST(size_null_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, NULL, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ free(ctx);
+}
+
+TEST(ctx_null_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ int rc;
+
+ rc = lsm_get_self_attr(LSM_ATTR_CURRENT, NULL, &size, 0);
+
+ if (attr_lsm_count()) {
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(1, size);
+ } else {
+ ASSERT_EQ(-1, rc);
+ }
+}
+
+TEST(size_too_small_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = 1;
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0));
+ if (attr_lsm_count()) {
+ ASSERT_EQ(E2BIG, errno);
+ } else {
+ ASSERT_EQ(EOPNOTSUPP, errno);
+ }
+ ASSERT_NE(1, size);
+
+ free(ctx);
+}
+
+TEST(flags_zero_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size;
+ int lsmcount;
+ int i;
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ LSM_FLAG_SINGLE));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(page_size, size);
+
+ lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0);
+ ASSERT_LE(1, lsmcount);
+ ASSERT_NE(NULL, syscall_lsms);
+
+ for (i = 0; i < lsmcount; i++) {
+ errno = 0;
+ size = page_size;
+ ctx->id = syscall_lsms[i];
+
+ if (syscall_lsms[i] == LSM_ID_SELINUX ||
+ syscall_lsms[i] == LSM_ID_SMACK ||
+ syscall_lsms[i] == LSM_ID_APPARMOR) {
+ ASSERT_EQ(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx,
+ &size, LSM_FLAG_SINGLE));
+ } else {
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx,
+ &size,
+ LSM_FLAG_SINGLE));
+ }
+ }
+
+ free(ctx);
+}
+
+TEST(flags_overset_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size;
+
+ ASSERT_NE(NULL, ctx);
+
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx,
+ &size, 0));
+ ASSERT_EQ(EOPNOTSUPP, errno);
+
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ LSM_FLAG_SINGLE |
+ (LSM_FLAG_SINGLE << 1)));
+ ASSERT_EQ(EINVAL, errno);
+
+ free(ctx);
+}
+
+TEST(basic_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ struct lsm_ctx *tctx = NULL;
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ char *attr = calloc(page_size, 1);
+ int cnt_current = 0;
+ int cnt_exec = 0;
+ int cnt_fscreate = 0;
+ int cnt_keycreate = 0;
+ int cnt_prev = 0;
+ int cnt_sockcreate = 0;
+ int lsmcount;
+ int count;
+ int i;
+
+ ASSERT_NE(NULL, ctx);
+ ASSERT_NE(NULL, syscall_lsms);
+
+ lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0);
+ ASSERT_LE(1, lsmcount);
+
+ for (i = 0; i < lsmcount; i++) {
+ switch (syscall_lsms[i]) {
+ case LSM_ID_SELINUX:
+ cnt_current++;
+ cnt_exec++;
+ cnt_fscreate++;
+ cnt_keycreate++;
+ cnt_prev++;
+ cnt_sockcreate++;
+ break;
+ case LSM_ID_SMACK:
+ cnt_current++;
+ break;
+ case LSM_ID_APPARMOR:
+ cnt_current++;
+ cnt_exec++;
+ cnt_prev++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (cnt_current) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0);
+ ASSERT_EQ(cnt_current, count);
+ tctx = ctx;
+ ASSERT_EQ(0, read_proc_attr("current", attr, page_size));
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_exec) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_EXEC, ctx, &size, 0);
+ ASSERT_GE(cnt_exec, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("exec", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_fscreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_FSCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_fscreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("fscreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_keycreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_KEYCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_keycreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("keycreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_prev) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_PREV, ctx, &size, 0);
+ ASSERT_GE(cnt_prev, count);
+ if (count > 0) {
+ tctx = ctx;
+ ASSERT_EQ(0, read_proc_attr("prev", attr, page_size));
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ }
+ if (cnt_sockcreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_SOCKCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_sockcreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("sockcreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+
+ free(ctx);
+ free(attr);
+ free(syscall_lsms);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
new file mode 100644
index 000000000000..54d59044ace1
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_list_modules system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kselftest_harness.h"
+#include "common.h"
+
+TEST(size_null_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, NULL, 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ free(syscall_lsms);
+}
+
+TEST(ids_null_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(NULL, &size, 0));
+ ASSERT_EQ(EFAULT, errno);
+ ASSERT_NE(1, size);
+}
+
+TEST(size_too_small_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size = 1;
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 0));
+ ASSERT_EQ(E2BIG, errno);
+ ASSERT_NE(1, size);
+
+ free(syscall_lsms);
+}
+
+TEST(flags_set_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 7));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(page_size, size);
+
+ free(syscall_lsms);
+}
+
+TEST(correct_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ char *sysfs_lsms = calloc(page_size, 1);
+ char *name;
+ char *cp;
+ int count;
+ int i;
+
+ ASSERT_NE(NULL, sysfs_lsms);
+ ASSERT_NE(NULL, syscall_lsms);
+ ASSERT_EQ(0, read_sysfs_lsms(sysfs_lsms, page_size));
+
+ count = lsm_list_modules(syscall_lsms, &size, 0);
+ ASSERT_LE(1, count);
+ cp = sysfs_lsms;
+ for (i = 0; i < count; i++) {
+ switch (syscall_lsms[i]) {
+ case LSM_ID_CAPABILITY:
+ name = "capability";
+ break;
+ case LSM_ID_SELINUX:
+ name = "selinux";
+ break;
+ case LSM_ID_SMACK:
+ name = "smack";
+ break;
+ case LSM_ID_TOMOYO:
+ name = "tomoyo";
+ break;
+ case LSM_ID_APPARMOR:
+ name = "apparmor";
+ break;
+ case LSM_ID_YAMA:
+ name = "yama";
+ break;
+ case LSM_ID_LOADPIN:
+ name = "loadpin";
+ break;
+ case LSM_ID_SAFESETID:
+ name = "safesetid";
+ break;
+ case LSM_ID_LOCKDOWN:
+ name = "lockdown";
+ break;
+ case LSM_ID_BPF:
+ name = "bpf";
+ break;
+ case LSM_ID_LANDLOCK:
+ name = "landlock";
+ break;
+ case LSM_ID_IMA:
+ name = "ima";
+ break;
+ case LSM_ID_EVM:
+ name = "evm";
+ break;
+ case LSM_ID_IPE:
+ name = "ipe";
+ break;
+ default:
+ name = "INVALID";
+ break;
+ }
+ ASSERT_EQ(0, strncmp(cp, name, strlen(name)));
+ cp += strlen(name) + 1;
+ }
+
+ free(sysfs_lsms);
+ free(syscall_lsms);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
new file mode 100644
index 000000000000..dcb6f8aa772e
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_set_self_attr system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kselftest_harness.h"
+#include "common.h"
+
+TEST(ctx_null_lsm_set_self_attr)
+{
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, NULL,
+ sizeof(struct lsm_ctx), 0));
+}
+
+TEST(size_too_small_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, 1, 0));
+
+ free(ctx);
+}
+
+TEST(flags_zero_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, size, 1));
+
+ free(ctx);
+}
+
+TEST(flags_overset_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx,
+ size, 0));
+
+ free(ctx);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index 60826d7d37d4..471d83e61d95 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-CFLAGS += -I../ -I../../../../usr/include/
+CFLAGS += -I../ $(KHDR_INCLUDES)
TEST_GEN_PROGS := media_device_test media_device_open video_device_test
include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index 93183a37b133..4396bf2273a4 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,7 +34,7 @@
#include <sys/stat.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 4b9953359e40..6e4a8090a0eb 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,7 +39,7 @@
#include <time.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt
index 2627367681f7..9d0fcd98c085 100644
--- a/tools/testing/selftests/media_tests/regression_test.txt
+++ b/tools/testing/selftests/media_tests/regression_test.txt
@@ -1,5 +1,5 @@
Testing for regressions in Media Controller API register, ioctl, syscall,
-and unregister paths. There have a few problems that result in user-after
+and unregister paths. There have a few problems that result in use-after
free on media_device, media_devnode, and cdev pointers when the driver is
unbound while ioctl is in progress.
@@ -15,11 +15,11 @@ Build media_device_test
cd tools/testing/selftests/media_tests
make
-Regressions test for cdev user-after free error on /dev/mediaX when driver
+Regressions test for cdev use-after-free error on /dev/mediaX when driver
is unbound:
Start media_device_test to regression test media devnode dynamic alloc
-and cdev user-after-free fixes. This opens media dev files and sits in
+and cdev use-after-free fixes. This opens media dev files and sits in
a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10
seconds. The idea is when device file goes away, media devnode and cdev
should stick around until this test exits.
@@ -40,4 +40,4 @@ keep ioctls going while bind/unbind runs.
Copy bind_unbind_sample.txt and make changes to specify the driver name
and number to run bind and unbind. Start the bind_unbind.sh
-Run dmesg looking for any user-after free errors or mutex lock errors.
+Run dmesg looking for any use-after-free errors or mutex lock errors.
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
index 0f6aef2e2593..2c44e115f2f0 100644
--- a/tools/testing/selftests/media_tests/video_device_test.c
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -37,45 +37,58 @@
#include <time.h>
#include <linux/videodev2.h>
-int main(int argc, char **argv)
+#define PRIORITY_MAX 4
+
+int priority_test(int fd)
{
- int opt;
- char video_dev[256];
- int count;
- struct v4l2_tuner vtuner;
- struct v4l2_capability vcap;
+ /* This test will try to update the priority associated with a file descriptor */
+
+ enum v4l2_priority old_priority, new_priority, priority_to_compare;
int ret;
- int fd;
+ int result = 0;
- if (argc < 2) {
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to get priority: %s\n", strerror(errno));
+ return -1;
+ }
+ new_priority = (old_priority + 1) % PRIORITY_MAX;
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &new_priority);
+ if (ret < 0) {
+ printf("Failed to set priority: %s\n", strerror(errno));
+ return -1;
+ }
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &priority_to_compare);
+ if (ret < 0) {
+ printf("Failed to get new priority: %s\n", strerror(errno));
+ result = -1;
+ goto cleanup;
+ }
+ if (priority_to_compare != new_priority) {
+ printf("Priority wasn't set - test failed\n");
+ result = -1;
}
- /* Process arguments */
- while ((opt = getopt(argc, argv, "d:")) != -1) {
- switch (opt) {
- case 'd':
- strncpy(video_dev, optarg, sizeof(video_dev) - 1);
- video_dev[sizeof(video_dev)-1] = '\0';
- break;
- default:
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
- }
+cleanup:
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to restore priority: %s\n", strerror(errno));
+ return -1;
}
+ return result;
+}
+
+int loop_test(int fd)
+{
+ int count;
+ struct v4l2_tuner vtuner;
+ struct v4l2_capability vcap;
+ int ret;
/* Generate random number of interations */
srand((unsigned int) time(NULL));
count = rand();
- /* Open Video device and keep it open */
- fd = open(video_dev, O_RDWR);
- if (fd == -1) {
- printf("Video Device open errno %s\n", strerror(errno));
- exit(-1);
- }
-
printf("\nNote:\n"
"While test is running, remove the device or unbind\n"
"driver and ensure there are no use after free errors\n"
@@ -98,4 +111,46 @@ int main(int argc, char **argv)
sleep(10);
count--;
}
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char video_dev[256];
+ int fd;
+ int test_result;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+ video_dev[sizeof(video_dev)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ /* Open Video device and keep it open */
+ fd = open(video_dev, O_RDWR);
+ if (fd == -1) {
+ printf("Video Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ test_result = priority_test(fd);
+ if (!test_result)
+ printf("Priority test - PASSED\n");
+ else
+ printf("Priority test - FAILED\n");
+
+ loop_test(fd);
}
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index 34d1c81a2324..fc840e06ff56 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g $(KHDR_INCLUDES)
LDLIBS += -lpthread
TEST_GEN_PROGS := membarrier_test_single_thread \
diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index 186be69f0a59..f6d7c44b2288 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_impl.h
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -7,13 +7,40 @@
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
+
+static int registrations;
static int sys_membarrier(int cmd, int flags)
{
return syscall(__NR_membarrier, cmd, flags);
}
+static int test_membarrier_get_registrations(int cmd)
+{
+ int ret, flags = 0;
+ const char *test_name =
+ "sys membarrier MEMBARRIER_CMD_GET_REGISTRATIONS";
+
+ registrations |= cmd;
+
+ ret = sys_membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS, 0);
+ if (ret < 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ } else if (ret != registrations) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, ret = %d, registrations = %d\n",
+ test_name, flags, ret, registrations);
+ }
+ ksft_test_result_pass(
+ "%s test: flags = %d, ret = %d, registrations = %d\n",
+ test_name, flags, ret, registrations);
+
+ return 0;
+}
+
static int test_membarrier_cmd_fail(void)
{
int cmd = -1, flags = 0;
@@ -113,6 +140,8 @@ static int test_membarrier_register_private_expedited_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
@@ -170,6 +199,8 @@ static int test_membarrier_register_private_expedited_sync_core_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
@@ -204,6 +235,8 @@ static int test_membarrier_register_global_expedited_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
index ac5613e5b0eb..4e14dba81234 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -62,12 +62,12 @@ static int test_mt_membarrier(void)
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(13);
+ ksft_set_plan(16);
test_membarrier_query();
/* Multi-threaded */
test_mt_membarrier();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
index c1c963902854..fa3f1d6c37a0 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -12,7 +12,9 @@
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(13);
+ ksft_set_plan(18);
+
+ test_membarrier_get_registrations(/*cmd=*/0);
test_membarrier_query();
@@ -20,5 +22,7 @@ int main(int argc, char **argv)
test_membarrier_success();
- return ksft_exit_pass();
+ test_membarrier_get_registrations(/*cmd=*/0);
+
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 4da8b565fa32..163b6f68631c 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -D_FILE_OFFSET_BITS=64
-CFLAGS += -I../../../../include/uapi/
-CFLAGS += -I../../../../include/
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := memfd_test
TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index be675002f918..dbc171a3806d 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -22,6 +22,7 @@
#include <linux/falloc.h>
#include <fcntl.h>
#include <linux/memfd.h>
+#include <linux/types.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
@@ -305,7 +306,7 @@ int main(int argc, char **argv)
* then the kernel did a page-replacement or canceled the read() (or
* whatever magic it did..). In that case, the memfd object is still
* all zero.
- * In case the memfd-object was *not* sealed, the read() was successfull
+ * In case the memfd-object was *not* sealed, the read() was successful
* and the memfd object must *not* be all zero.
* Note that in real scenarios, there might be a mixture of both, but
* in this test-cases, we have explicit 200ms delays which should be
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 74baab83fec3..5b993924cc3f 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -9,6 +9,7 @@
#include <fcntl.h>
#include <linux/memfd.h>
#include <sched.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
@@ -18,6 +19,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <ctype.h>
#include "common.h"
@@ -28,12 +30,46 @@
#define MFD_DEF_SIZE 8192
#define STACK_SIZE 65536
+#define F_SEAL_EXEC 0x0020
+
+#define F_WX_SEALS (F_SEAL_SHRINK | \
+ F_SEAL_GROW | \
+ F_SEAL_WRITE | \
+ F_SEAL_FUTURE_WRITE | \
+ F_SEAL_EXEC)
+
+#define MFD_NOEXEC_SEAL 0x0008U
+
/*
* Default is not to test hugetlbfs
*/
static size_t mfd_def_size = MFD_DEF_SIZE;
static const char *memfd_str = MEMFD_STR;
+static ssize_t fd2name(int fd, char *buf, size_t bufsize)
+{
+ char buf1[PATH_MAX];
+ int size;
+ ssize_t nbytes;
+
+ size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
+ if (size < 0) {
+ printf("snprintf(%d) failed on %m\n", fd);
+ abort();
+ }
+
+ /*
+ * reserver one byte for string termination.
+ */
+ nbytes = readlink(buf1, buf, bufsize-1);
+ if (nbytes == -1) {
+ printf("readlink(%s) failed %m\n", buf1);
+ abort();
+ }
+ buf[nbytes] = '\0';
+ return nbytes;
+}
+
static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
{
int r, fd;
@@ -54,9 +90,67 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
return fd;
}
+static void sysctl_assert_write(const char *val)
+{
+ int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (write(fd, val, strlen(val)) < 0) {
+ printf("write sysctl %s failed: %m\n", val);
+ abort();
+ }
+}
+
+static void sysctl_fail_write(const char *val)
+{
+ int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (write(fd, val, strlen(val)) >= 0) {
+ printf("write sysctl %s succeeded, but failure expected\n",
+ val);
+ abort();
+ }
+}
+
+static void sysctl_assert_equal(const char *val)
+{
+ char *p, buf[128] = {};
+ int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (read(fd, buf, sizeof(buf)) < 0) {
+ printf("read sysctl failed: %m\n");
+ abort();
+ }
+
+ /* Strip trailing whitespace. */
+ p = buf;
+ while (!isspace(*p))
+ p++;
+ *p = '\0';
+
+ if (strcmp(buf, val) != 0) {
+ printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
+ abort();
+ }
+}
+
static int mfd_assert_reopen_fd(int fd_in)
{
- int r, fd;
+ int fd;
char path[100];
sprintf(path, "/proc/self/fd/%d", fd_in);
@@ -77,7 +171,7 @@ static void mfd_fail_new(const char *name, unsigned int flags)
r = sys_memfd_create(name, flags);
if (r >= 0) {
printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
- name, flags);
+ name ? name : "NULL", flags);
close(r);
abort();
}
@@ -98,11 +192,13 @@ static unsigned int mfd_assert_get_seals(int fd)
static void mfd_assert_has_seals(int fd, unsigned int seals)
{
+ char buf[PATH_MAX];
unsigned int s;
+ fd2name(fd, buf, PATH_MAX);
s = mfd_assert_get_seals(fd);
if (s != seals) {
- printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+ printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
abort();
}
}
@@ -186,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd)
return p;
}
+static void *mfd_assert_mmap_read_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
static void *mfd_assert_mmap_private(int fd)
{
void *p;
@@ -455,6 +569,7 @@ static void mfd_fail_write(int fd)
printf("mmap()+mprotect() didn't fail as expected\n");
abort();
}
+ munmap(p, mfd_def_size);
}
/* verify PUNCH_HOLE fails */
@@ -593,6 +708,61 @@ static void mfd_fail_grow_write(int fd)
}
}
+static void mfd_assert_mode(int fd, int mode)
+{
+ struct stat st;
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fstat(fd, &st) < 0) {
+ printf("fstat(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ if ((st.st_mode & 07777) != mode) {
+ printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
+ buf, (int)st.st_mode & 07777, mode);
+ abort();
+ }
+}
+
+static void mfd_assert_chmod(int fd, int mode)
+{
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fchmod(fd, mode) < 0) {
+ printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
+ abort();
+ }
+
+ mfd_assert_mode(fd, mode);
+}
+
+static void mfd_fail_chmod(int fd, int mode)
+{
+ struct stat st;
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fstat(fd, &st) < 0) {
+ printf("fstat(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ if (fchmod(fd, mode) == 0) {
+ printf("fchmod(%s, 0%04o) didn't fail as expected\n",
+ buf, mode);
+ abort();
+ }
+
+ /* verify that file mode bits did not change */
+ mfd_assert_mode(fd, st.st_mode & 07777);
+}
+
static int idle_thread_fn(void *arg)
{
sigset_t set;
@@ -606,7 +776,7 @@ static int idle_thread_fn(void *arg)
return 0;
}
-static pid_t spawn_idle_thread(unsigned int flags)
+static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
{
uint8_t *stack;
pid_t pid;
@@ -617,10 +787,7 @@ static pid_t spawn_idle_thread(unsigned int flags)
abort();
}
- pid = clone(idle_thread_fn,
- stack + STACK_SIZE,
- SIGCHLD | flags,
- NULL);
+ pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
if (pid < 0) {
printf("clone() failed: %m\n");
abort();
@@ -629,6 +796,33 @@ static pid_t spawn_idle_thread(unsigned int flags)
return pid;
}
+static void join_thread(pid_t pid)
+{
+ int wstatus;
+
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ printf("newpid thread: waitpid() failed: %m\n");
+ abort();
+ }
+
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
+ printf("newpid thread: exited with non-zero error code %d\n",
+ WEXITSTATUS(wstatus));
+ abort();
+ }
+
+ if (WIFSIGNALED(wstatus)) {
+ printf("newpid thread: killed by signal %d\n",
+ WTERMSIG(wstatus));
+ abort();
+ }
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ return spawn_thread(flags, idle_thread_fn, NULL);
+}
+
static void join_idle_thread(pid_t pid)
{
kill(pid, SIGTERM);
@@ -670,6 +864,9 @@ static void test_create(void)
mfd_fail_new("", ~0);
mfd_fail_new("", 0x80000000U);
+ /* verify EXEC and NOEXEC_SEAL can't both be set */
+ mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
+
/* verify MFD_CLOEXEC is allowed */
fd = mfd_assert_new("", 0, MFD_CLOEXEC);
close(fd);
@@ -801,6 +998,30 @@ static void test_seal_future_write(void)
close(fd);
}
+static void test_seal_write_map_read_shared(void)
+{
+ int fd;
+ void *p;
+
+ printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_write_map_read",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+ p = mfd_assert_mmap_read_shared(fd);
+
+ mfd_assert_read(fd);
+ mfd_assert_read_shared(fd);
+ mfd_fail_write(fd);
+
+ munmap(p, mfd_def_size);
+ close(fd);
+}
+
/*
* Test SEAL_SHRINK
* Test whether SEAL_SHRINK actually prevents shrinking
@@ -880,6 +1101,357 @@ static void test_seal_resize(void)
}
/*
+ * Test SEAL_EXEC
+ * Test fd is created with exec and allow sealing.
+ * chmod() cannot change x bits after sealing.
+ */
+static void test_exec_seal(void)
+{
+ int fd;
+
+ printf("%s SEAL-EXEC\n", memfd_str);
+
+ printf("%s Apply SEAL_EXEC\n", memfd_str);
+ fd = mfd_assert_new("kern_memfd_seal_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
+
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_chmod(fd, 0644);
+
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_EXEC);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+
+ mfd_assert_chmod(fd, 0600);
+ mfd_fail_chmod(fd, 0777);
+ mfd_fail_chmod(fd, 0670);
+ mfd_fail_chmod(fd, 0605);
+ mfd_fail_chmod(fd, 0700);
+ mfd_fail_chmod(fd, 0100);
+ mfd_assert_chmod(fd, 0666);
+ mfd_assert_write(fd);
+ close(fd);
+
+ printf("%s Apply ALL_SEALS\n", memfd_str);
+ fd = mfd_assert_new("kern_memfd_seal_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
+
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_chmod(fd, 0700);
+
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_EXEC);
+ mfd_assert_has_seals(fd, F_WX_SEALS);
+
+ mfd_fail_chmod(fd, 0711);
+ mfd_fail_chmod(fd, 0600);
+ mfd_fail_write(fd);
+ close(fd);
+}
+
+/*
+ * Test EXEC_NO_SEAL
+ * Test fd is created with exec and not allow sealing.
+ */
+static void test_exec_no_seal(void)
+{
+ int fd;
+
+ printf("%s EXEC_NO_SEAL\n", memfd_str);
+
+ /* Create with EXEC but without ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_exec_no_sealing",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_EXEC);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_assert_chmod(fd, 0666);
+ close(fd);
+}
+
+/*
+ * Test memfd_create with MFD_NOEXEC flag
+ */
+static void test_noexec_seal(void)
+{
+ int fd;
+
+ printf("%s NOEXEC_SEAL\n", memfd_str);
+
+ /* Create with NOEXEC and ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ /* Create with NOEXEC but without ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_sysctl0(void)
+{
+ int fd;
+
+ sysctl_assert_equal("0");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl0(void)
+{
+ sysctl_assert_write("0");
+ test_sysctl_sysctl0();
+}
+
+static void test_sysctl_sysctl1(void)
+{
+ int fd;
+
+ sysctl_assert_equal("1");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl1(void)
+{
+ sysctl_assert_write("1");
+ test_sysctl_sysctl1();
+}
+
+static void test_sysctl_sysctl2(void)
+{
+ int fd;
+
+ sysctl_assert_equal("2");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl2(void)
+{
+ sysctl_assert_write("2");
+ test_sysctl_sysctl2();
+}
+
+static int sysctl_simple_child(void *arg)
+{
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 2\n", memfd_str);
+ test_sysctl_set_sysctl2();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ return 0;
+}
+
+/*
+ * Test sysctl
+ * A very basic test to make sure the core sysctl semantics work.
+ */
+static void test_sysctl_simple(void)
+{
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+
+ join_thread(pid);
+}
+
+static int sysctl_nested(void *arg)
+{
+ void (*fn)(void) = arg;
+
+ fn();
+ return 0;
+}
+
+static int sysctl_nested_wait(void *arg)
+{
+ /* Wait for a SIGCONT. */
+ kill(getpid(), SIGSTOP);
+ return sysctl_nested(arg);
+}
+
+static void test_sysctl_sysctl1_failset(void)
+{
+ sysctl_fail_write("0");
+ test_sysctl_sysctl1();
+}
+
+static void test_sysctl_sysctl2_failset(void)
+{
+ sysctl_fail_write("1");
+ test_sysctl_sysctl2();
+
+ sysctl_fail_write("0");
+ test_sysctl_sysctl2();
+}
+
+static int sysctl_nested_child(void *arg)
+{
+ int pid;
+
+ printf("%s nested sysctl 0\n", memfd_str);
+ sysctl_assert_write("0");
+ /* A further nested pidns works the same. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1\n", memfd_str);
+ sysctl_assert_write("1");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl1_failset);
+ join_thread(pid);
+ /* Child can lower the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_set_sysctl2);
+ join_thread(pid);
+ /* Child lowering the setting has no effect on our setting. */
+ test_sysctl_sysctl1();
+
+ printf("%s nested sysctl 2\n", memfd_str);
+ sysctl_assert_write("2");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl2_failset);
+ join_thread(pid);
+
+ /* Verify that the rules are actually inherited after fork. */
+ printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1_failset);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2_failset);
+ sysctl_assert_write("2");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ /*
+ * Verify that the current effective setting is saved on fork, meaning
+ * that the parent lowering the sysctl doesn't affect already-forked
+ * children.
+ */
+ printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("1");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ return 0;
+}
+
+/*
+ * Test sysctl with nested pid namespaces
+ * Make sure that the sysctl nesting semantics work correctly.
+ */
+static void test_sysctl_nested(void)
+{
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
+
+ join_thread(pid);
+}
+
+/*
* Test sharing via dup()
* Test that seals are shared between dupped FDs and they're all equal.
*/
@@ -999,7 +1571,7 @@ static void test_share_open(char *banner, char *b_suffix)
/*
* Test sharing via fork()
- * Test whether seal-modifications work as expected with forked childs.
+ * Test whether seal-modifications work as expected with forked children.
*/
static void test_share_fork(char *banner, char *b_suffix)
{
@@ -1028,6 +1600,11 @@ static void test_share_fork(char *banner, char *b_suffix)
close(fd);
}
+static bool pid_ns_supported(void)
+{
+ return access("/proc/self/ns/pid", F_OK) == 0;
+}
+
int main(int argc, char **argv)
{
pid_t pid;
@@ -1052,13 +1629,24 @@ int main(int argc, char **argv)
test_create();
test_basic();
+ test_exec_seal();
+ test_exec_no_seal();
+ test_noexec_seal();
test_seal_write();
test_seal_future_write();
+ test_seal_write_map_read_shared();
test_seal_shrink();
test_seal_grow();
test_seal_resize();
+ if (pid_ns_supported()) {
+ test_sysctl_simple();
+ test_sysctl_nested();
+ } else {
+ printf("PID namespaces are not supported; skipping sysctl tests\n");
+ }
+
test_share_dup("SHARE-DUP", "");
test_share_mmap("SHARE-MMAP", "");
test_share_open("SHARE-OPEN", "");
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index a7e8cd5bb265..1eef042a31e1 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -1,5 +1,4 @@
CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
CONFIG_NOTIFIER_ERROR_INJECTION=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index b37585e6aa38..611be86eaf3d 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -134,6 +134,15 @@ offline_memory_expect_fail()
return 0
}
+online_all_offline_memory()
+{
+ for memory in `hotpluggable_offline_memory`; do
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+}
+
error=-12
priority=0
# Run with default of ratio=2 for Kselftest run
@@ -197,8 +206,11 @@ echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
for memory in `hotpluggable_online_memory`; do
if [ "$target" -gt 0 ]; then
echo "online->offline memory$memory"
- if offline_memory_expect_success $memory; then
+ if offline_memory_expect_success $memory &>/dev/null; then
target=$(($target - 1))
+ echo "-> Success"
+ else
+ echo "-> Failure"
fi
fi
done
@@ -257,7 +269,7 @@ prerequisite_extra
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
- offline_memory_expect_success $memory
+ offline_memory_expect_success $memory &>/dev/null
fi
done
@@ -266,26 +278,35 @@ done
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_fail $memory
+ if ! online_memory_expect_fail $memory; then
+ retval=1
+ fi
done
#
# Online all hot-pluggable memory
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
-for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_success $memory
-done
+online_all_offline_memory
#
# Test memory hot-remove error handling (online => offline)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
- offline_memory_expect_fail $memory
+ if [ $((RANDOM % 100)) -lt $ratio ]; then
+ if ! offline_memory_expect_fail $memory; then
+ retval=1
+ fi
+ fi
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
/sbin/modprobe -q -r memory-notifier-error-inject
+#
+# Restore memory before exit
+#
+online_all_offline_memory
+
exit $retval
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index 5a1e85ff5d32..cdd022c1c497 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -14,10 +14,9 @@
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
-#include <string.h>
-#include "../kselftest.h"
-#include "../kselftest_harness.h"
+#include "kselftest.h"
+#include "kselftest_harness.h"
/* Default test file size: 4MB */
#define MB (1UL << 20)
@@ -151,8 +150,8 @@ TEST(check_huge_pages)
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if (addr == MAP_FAILED) {
- if (errno == ENOMEM)
- SKIP(return, "No huge pages available.");
+ if (errno == ENOMEM || errno == EINVAL)
+ SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled.");
else
TH_LOG("mmap error: %s", strerror(errno));
}
@@ -208,15 +207,21 @@ TEST(check_file_mmap)
errno = 0;
fd = open(".", O_TMPFILE | O_RDWR, 0600);
- ASSERT_NE(-1, fd) {
- TH_LOG("Can't create temporary file: %s",
- strerror(errno));
+ if (fd < 0) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_free, "O_TMPFILE not supported by filesystem.");
}
errno = 0;
retval = fallocate(fd, 0, 0, FILE_SIZE);
- ASSERT_EQ(0, retval) {
- TH_LOG("Error allocating space for the temporary file: %s",
- strerror(errno));
+ if (retval) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_close, "fallocate not supported by filesystem.");
}
/*
@@ -256,9 +261,6 @@ TEST(check_file_mmap)
TH_LOG("No read-ahead pages found in memory");
}
- EXPECT_LT(i, vec_size) {
- TH_LOG("Read-ahead pages reached the end of the file");
- }
/*
* End of the readahead window. The rest of the pages shouldn't
* be in memory.
@@ -272,15 +274,16 @@ TEST(check_file_mmap)
}
munmap(addr, FILE_SIZE);
+out_close:
close(fd);
+out_free:
free(vec);
}
/*
* Test mincore() behavior on a page backed by a tmpfs file. This test
- * performs the same steps as the previous one. However, we don't expect
- * any readahead in this case.
+ * performs the same steps as the previous one.
*/
TEST(check_tmpfs_mmap)
{
@@ -291,7 +294,6 @@ TEST(check_tmpfs_mmap)
int page_size;
int fd;
int i;
- int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
@@ -334,8 +336,7 @@ TEST(check_tmpfs_mmap)
}
/*
- * Touch a page in the middle of the mapping. We expect only
- * that page to be fetched into memory.
+ * Touch a page in the middle of the mapping.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
@@ -344,15 +345,6 @@ TEST(check_tmpfs_mmap)
TH_LOG("Page not found in memory after use");
}
- i = FILE_SIZE / 2 / page_size + 1;
- while (i < vec_size && vec[i]) {
- ra_pages++;
- i++;
- }
- ASSERT_EQ(ra_pages, 0) {
- TH_LOG("Read-ahead pages found in memory");
- }
-
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
new file mode 100644
index 000000000000..c2a8586e51a1
--- /dev/null
+++ b/tools/testing/selftests/mm/.gitignore
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cow
+hugepage-mmap
+hugepage-mremap
+hugepage-shm
+hugepage-vmemmap
+hugetlb-madvise
+hugetlb-read-hwpoison
+hugetlb-soft-offline
+khugepaged
+map_hugetlb
+map_populate
+thuge-gen
+compaction_test
+migration
+mlock2-tests
+mrelease_test
+mremap_dontunmap
+mremap_test
+on-fault-limit
+transhuge-stress
+pagemap_ioctl
+pfnmap
+process_madv
+*.tmp*
+protection_keys
+protection_keys_32
+protection_keys_64
+madv_populate
+uffd-stress
+uffd-unit-tests
+uffd-wp-mremap
+mlock-intersect-test
+mlock-random-test
+virtual_address_range
+gup_test
+va_128TBswitch
+map_fixed_noreplace
+write_to_hugetlbfs
+hmm-tests
+memfd_secret
+soft-dirty
+split_huge_page_test
+ksm_tests
+local_config.h
+local_config.mk
+ksm_functional_tests
+mdwe_test
+gup_longterm
+mkdirty
+va_high_addr_switch
+hugetlb_fault_after_madv
+hugetlb_madv_vs_map
+mseal_test
+droppable
+hugetlb_dio
+pkey_sighandler_tests_32
+pkey_sighandler_tests_64
+guard-regions
+merge
+prctl_thp_disable
+rmap
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
new file mode 100644
index 000000000000..eaf9312097f7
--- /dev/null
+++ b/tools/testing/selftests/mm/Makefile
@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mm selftests
+
+LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
+LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h
+
+include local_config.mk
+
+ifeq ($(ARCH),)
+
+ifeq ($(CROSS_COMPILE),)
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+else
+uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
+endif
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/powerpc/')
+endif
+
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make $SOME_TEST" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However, the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
+CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS += -Wunreachable-code
+LDLIBS = -lrt -lpthread -lm
+
+# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
+# automatically enabled at -O1 or above. This triggers various unused-result
+# warnings where functions such as read() or write() are called and their
+# return value is not checked. Disable _FORTIFY_SOURCE to silence those
+# warnings.
+CFLAGS += -U_FORTIFY_SOURCE
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+ifneq (,$(wildcard $(KDIR)/Module.symvers))
+ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h))
+TEST_GEN_MODS_DIR := page_frag
+else
+PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel"
+endif
+else
+PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first"
+endif
+
+TEST_GEN_FILES = cow
+TEST_GEN_FILES += compaction_test
+TEST_GEN_FILES += gup_longterm
+TEST_GEN_FILES += gup_test
+TEST_GEN_FILES += hmm-tests
+TEST_GEN_FILES += hugetlb-madvise
+TEST_GEN_FILES += hugetlb-read-hwpoison
+TEST_GEN_FILES += hugetlb-soft-offline
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-mremap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
+TEST_GEN_FILES += khugepaged
+TEST_GEN_FILES += madv_populate
+TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
+TEST_GEN_FILES += memfd_secret
+endif
+TEST_GEN_FILES += migration
+TEST_GEN_FILES += mkdirty
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
+TEST_GEN_FILES += mremap_dontunmap
+TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += mseal_test
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += pagemap_ioctl
+TEST_GEN_FILES += pfnmap
+TEST_GEN_FILES += process_madv
+TEST_GEN_FILES += prctl_thp_disable
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += uffd-stress
+TEST_GEN_FILES += uffd-unit-tests
+TEST_GEN_FILES += uffd-wp-mremap
+TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
+TEST_GEN_FILES += ksm_functional_tests
+TEST_GEN_FILES += mdwe_test
+TEST_GEN_FILES += hugetlb_fault_after_madv
+TEST_GEN_FILES += hugetlb_madv_vs_map
+TEST_GEN_FILES += hugetlb_dio
+TEST_GEN_FILES += droppable
+TEST_GEN_FILES += guard-regions
+TEST_GEN_FILES += merge
+TEST_GEN_FILES += rmap
+
+ifneq ($(ARCH),arm64)
+TEST_GEN_FILES += soft-dirty
+endif
+
+ifeq ($(ARCH),x86_64)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
+
+VMTARGETS := protection_keys
+VMTARGETS += pkey_sighandler_tests
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
+
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+ifeq ($(CAN_BUILD_I386),1)
+TEST_GEN_FILES += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+TEST_GEN_FILES += $(BINARIES_64)
+endif
+
+else ifeq ($(ARCH),arm64)
+TEST_GEN_FILES += protection_keys
+TEST_GEN_FILES += pkey_sighandler_tests
+else ifeq ($(ARCH),powerpc)
+TEST_GEN_FILES += protection_keys
+endif
+
+ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390))
+TEST_GEN_FILES += va_high_addr_switch
+ifneq ($(ARCH),riscv64)
+TEST_GEN_FILES += virtual_address_range
+endif
+TEST_GEN_FILES += write_to_hugetlbfs
+endif
+
+TEST_PROGS := run_vmtests.sh
+
+TEST_FILES := test_vmalloc.sh
+TEST_FILES += test_hmm.sh
+TEST_FILES += va_high_addr_switch.sh
+TEST_FILES += charge_reserved_hugetlb.sh
+TEST_FILES += hugetlb_reparenting_test.sh
+TEST_FILES += test_page_frag.sh
+
+# required by charge_reserved_hugetlb.sh
+TEST_FILES += write_hugetlb_memory.sh
+
+include ../lib.mk
+
+$(TEST_GEN_PROGS): vm_util.c thp_settings.c
+$(TEST_GEN_FILES): vm_util.c thp_settings.c
+
+$(OUTPUT)/uffd-stress: uffd-common.c
+$(OUTPUT)/uffd-unit-tests: uffd-common.c
+$(OUTPUT)/uffd-wp-mremap: uffd-common.c
+$(OUTPUT)/protection_keys: pkey_util.c
+$(OUTPUT)/pkey_sighandler_tests: pkey_util.c
+
+ifeq ($(ARCH),x86_64)
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+$(BINARIES_32) $(BINARIES_64): pkey_util.c
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+$(BINARIES_32): CFLAGS += -m32 -mxsave
+$(BINARIES_32): LDLIBS += -lrt -ldl -lm
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+$(BINARIES_64): CFLAGS += -m64 -mxsave
+$(BINARIES_64): LDLIBS += -lrt -ldl
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+endif
+
+# IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS)
+
+$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+
+$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
+
+$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+
+$(OUTPUT)/migration: LDLIBS += -lnuma
+
+$(OUTPUT)/rmap: LDLIBS += -lnuma
+
+local_config.mk local_config.h: check_config.sh
+ /bin/sh ./check_config.sh $(CC)
+
+EXTRA_CLEAN += local_config.mk local_config.h
+
+ifeq ($(IOURING_EXTRA_LIBS),)
+all: warn_missing_liburing
+
+warn_missing_liburing:
+ @echo ; \
+ echo "Warning: missing liburing support. Some tests will be skipped." ; \
+ echo
+endif
+
+ifneq ($(PAGE_FRAG_WARNING),)
+all: warn_missing_page_frag
+
+warn_missing_page_frag:
+ @echo ; \
+ echo "Warning: $(PAGE_FRAG_WARNING). page_frag test will be skipped." ; \
+ echo
+endif
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 18d33684faad..e1fe16bcbbe8 100644..100755
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,13 +1,18 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+
fault_limit_file=limit_in_bytes
reservation_limit_file=rsvd.limit_in_bytes
fault_usage_file=usage_in_bytes
@@ -21,19 +26,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=$(mktemp -d)
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=$(mktemp -d)
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -105,7 +114,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -118,7 +127,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -131,7 +140,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -245,7 +254,7 @@ function cleanup_hugetlb_memory() {
local cgroup="$1"
if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then
echo killing write_to_hugetlbfs
- killall -2 write_to_hugetlbfs
+ killall -2 --wait write_to_hugetlbfs
wait_for_hugetlb_memory_to_get_depleted $cgroup
fi
set -e
@@ -571,5 +580,9 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
index 079c8a40b85d..3954f4746161 100644..100755
--- a/tools/testing/selftests/vm/check_config.sh
+++ b/tools/testing/selftests/mm/check_config.sh
@@ -7,25 +7,25 @@
OUTPUT_H_FILE=local_config.h
OUTPUT_MKFILE=local_config.mk
-# libhugetlbfs
tmpname=$(mktemp)
tmpfile_c=${tmpname}.c
tmpfile_o=${tmpname}.o
+# liburing
echo "#include <sys/types.h>" > $tmpfile_c
-echo "#include <hugetlbfs.h>" >> $tmpfile_c
+echo "#include <liburing.h>" >> $tmpfile_c
echo "int func(void) { return 0; }" >> $tmpfile_c
CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
if [ -f $tmpfile_o ]; then
- echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE
- echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE
+ echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE
+ echo "IOURING_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE
else
- echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE
- echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE
- echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE
+ echo "// No liburing support found" > $OUTPUT_H_FILE
+ echo "# No liburing support found, so:" > $OUTPUT_MKFILE
+ echo "IOURING_EXTRA_LIBS = " >> $OUTPUT_MKFILE
fi
rm ${tmpname}.*
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
new file mode 100644
index 000000000000..30209c40b697
--- /dev/null
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "kselftest.h"
+
+#define MAP_SIZE_MB 100
+#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
+
+struct map_list {
+ void *map;
+ struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+ char buffer[256] = {0};
+ char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
+ return -1;
+ }
+
+ pclose(cmdfile);
+
+ *memfree = atoll(buffer);
+ cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+ cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
+ return -1;
+ }
+
+ pclose(cmdfile);
+ *hugepagesize = atoll(buffer);
+
+ return 0;
+}
+
+int prereq(void)
+{
+ char allowed;
+ int fd;
+
+ fd = open("/proc/sys/vm/compact_unevictable_allowed",
+ O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+ ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ if (allowed == '1')
+ return 0;
+
+ ksft_print_msg("Compaction isn't allowed\n");
+ return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
+ unsigned long initial_nr_hugepages)
+{
+ unsigned long nr_hugepages_ul;
+ int fd, ret = -1;
+ int compaction_index = 0;
+ char nr_hugepages[20] = {0};
+ char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
+
+ snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
+ "%lu", initial_nr_hugepages);
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+ mem_free = mem_free * 0.8;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ /* We should have been able to request at least 1/3 rd of the memory in
+ huge pages */
+ nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10);
+ if (!nr_hugepages_ul) {
+ ksft_print_msg("ERROR: No memory is available as huge pages\n");
+ goto close_fd;
+ }
+ compaction_index = mem_free/(nr_hugepages_ul * hugepage_size);
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (write(fd, init_nr_hugepages, strlen(init_nr_hugepages))
+ != strlen(init_nr_hugepages)) {
+ ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ ksft_print_msg("Number of huge pages allocated = %lu\n",
+ nr_hugepages_ul);
+
+ if (compaction_index > 3) {
+ ksft_print_msg("ERROR: Less than 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ ret = 0;
+
+ close_fd:
+ close(fd);
+ out:
+ ksft_test_result(ret == 0, "check_compaction\n");
+ return ret;
+}
+
+int set_zero_hugepages(unsigned long *initial_nr_hugepages)
+{
+ int fd, ret = -1;
+ char nr_hugepages[20] = {0};
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Start with the initial condition of 0 huge pages */
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ *initial_nr_hugepages = strtoul(nr_hugepages, NULL, 10);
+ ret = 0;
+
+ close_fd:
+ close(fd);
+
+ out:
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit lim;
+ struct map_list *list = NULL, *entry;
+ size_t page_size, i;
+ void *map = NULL;
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ long mem_fragmentable_MB = 0;
+ unsigned long initial_nr_hugepages;
+
+ ksft_print_header();
+
+ if (prereq() || geteuid())
+ ksft_exit_skip("Prerequisites unsatisfied\n");
+
+ ksft_set_plan(1);
+
+ /* Start the test without hugepages reducing mem_free */
+ if (set_zero_hugepages(&initial_nr_hugepages))
+ ksft_exit_fail();
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim))
+ ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno));
+
+ page_size = getpagesize();
+
+ if (read_memory_info(&mem_free, &hugepage_size) != 0)
+ ksft_exit_fail_msg("Failed to get meminfo\n");
+
+ mem_fragmentable_MB = mem_free * 0.8 / 1024;
+
+ while (mem_fragmentable_MB > 0) {
+ map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+ if (map == MAP_FAILED)
+ break;
+
+ entry = malloc(sizeof(struct map_list));
+ if (!entry) {
+ munmap(map, MAP_SIZE);
+ break;
+ }
+ entry->map = map;
+ entry->next = list;
+ list = entry;
+
+ /* Write something (in this case the address of the map) to
+ * ensure that KSM can't merge the mapped pages
+ */
+ for (i = 0; i < MAP_SIZE; i += page_size)
+ *(unsigned long *)(map + i) = (unsigned long)map + i;
+
+ mem_fragmentable_MB -= MAP_SIZE_MB;
+ }
+
+ for (entry = list; entry != NULL; entry = entry->next) {
+ munmap(entry->map, MAP_SIZE);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+
+ if (check_compaction(mem_free, hugepage_size,
+ initial_nr_hugepages) == 0)
+ ksft_exit_pass();
+
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
new file mode 100644
index 000000000000..deba93379c80
--- /dev/null
+++ b/tools/testing/selftests/mm/config
@@ -0,0 +1,13 @@
+CONFIG_SYSVIPC=y
+CONFIG_USERFAULTFD=y
+CONFIG_PTE_MARKER_UFFD_WP=y
+CONFIG_TEST_VMALLOC=m
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_TEST_HMM=m
+CONFIG_GUP_TEST=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_FTRACE=y
+CONFIG_PROFILING=y
+CONFIG_UPROBES=y
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
new file mode 100644
index 000000000000..accfd198dbda
--- /dev/null
+++ b/tools/testing/selftests/mm/cow.c
@@ -0,0 +1,1897 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * COW (Copy On Write) tests.
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+#include "../../../../mm/gup_test.h"
+#include "kselftest.h"
+#include "vm_util.h"
+#include "thp_settings.h"
+
+static size_t pagesize;
+static int pagemap_fd;
+static size_t pmdsize;
+static int nr_thpsizes;
+static size_t thpsizes[20];
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+static bool has_huge_zeropage;
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+ int count = 0;
+ unsigned long orders;
+ size_t kb;
+ int i;
+
+ /* thp not supported at all. */
+ if (!pmdsize)
+ return 0;
+
+ orders = 1UL << sz2ord(pmdsize, pagesize);
+ orders |= thp_supported_orders();
+
+ for (i = 0; orders && count < max; i++) {
+ if (!(orders & (1UL << i)))
+ continue;
+ orders &= ~(1UL << i);
+ kb = (pagesize >> 10) << i;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+ }
+
+ return count;
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+ for (; size; addr += pagesize, size -= pagesize)
+ if (!pagemap_is_swapped(pagemap_fd, addr))
+ return false;
+ return true;
+}
+
+struct comm_pipes {
+ int child_ready[2];
+ int parent_ready[2];
+};
+
+static int setup_comm_pipes(struct comm_pipes *comm_pipes)
+{
+ if (pipe(comm_pipes->child_ready) < 0) {
+ ksft_perror("pipe() failed");
+ return -errno;
+ }
+ if (pipe(comm_pipes->parent_ready) < 0) {
+ ksft_perror("pipe() failed");
+ close(comm_pipes->child_ready[0]);
+ close(comm_pipes->child_ready[1]);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void close_comm_pipes(struct comm_pipes *comm_pipes)
+{
+ close(comm_pipes->child_ready[0]);
+ close(comm_pipes->child_ready[1]);
+ close(comm_pipes->parent_ready[0]);
+ close(comm_pipes->parent_ready[1]);
+}
+
+static int child_memcmp_fn(char *mem, size_t size,
+ struct comm_pipes *comm_pipes)
+{
+ char *old = malloc(size);
+ char buf;
+
+ /* Backup the original content. */
+ memcpy(old, mem, size);
+
+ /* Wait until the parent modified the page. */
+ write(comm_pipes->child_ready[1], "0", 1);
+ while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+ ;
+
+ /* See if we still read the old values. */
+ return memcmp(old, mem, size);
+}
+
+static int child_vmsplice_memcmp_fn(char *mem, size_t size,
+ struct comm_pipes *comm_pipes)
+{
+ struct iovec iov = {
+ .iov_base = mem,
+ .iov_len = size,
+ };
+ ssize_t cur, total, transferred;
+ char *old, *new;
+ int fds[2];
+ char buf;
+
+ old = malloc(size);
+ new = malloc(size);
+
+ /* Backup the original content. */
+ memcpy(old, mem, size);
+
+ if (pipe(fds) < 0)
+ return -errno;
+
+ /* Trigger a read-only pin. */
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred < 0)
+ return -errno;
+ if (transferred == 0)
+ return -EINVAL;
+
+ /* Unmap it from our page tables. */
+ if (munmap(mem, size) < 0)
+ return -errno;
+
+ /* Wait until the parent modified it. */
+ write(comm_pipes->child_ready[1], "0", 1);
+ while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+ ;
+
+ /* See if we still read the old values via the pipe. */
+ for (total = 0; total < transferred; total += cur) {
+ cur = read(fds[0], new + total, transferred - total);
+ if (cur < 0)
+ return -errno;
+ }
+
+ return memcmp(old, new, transferred);
+}
+
+typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
+
+static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
+ child_fn fn, bool xfail)
+{
+ struct comm_pipes comm_pipes;
+ char buf;
+ int ret;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ } else if (!ret) {
+ exit(fn(mem, size, &comm_pipes));
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ if (do_mprotect) {
+ /*
+ * mprotect() optimizations might try avoiding
+ * write-faults by directly mapping pages writable.
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+
+ ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+
+ if (!ret) {
+ log_test_result(KSFT_PASS);
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_XFAIL);
+ } else {
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
+ }
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
+}
+
+static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
+}
+
+static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
+ is_hugetlb);
+}
+
+static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
+ is_hugetlb);
+}
+
+static void do_test_vmsplice_in_parent(char *mem, size_t size,
+ bool before_fork, bool xfail)
+{
+ struct iovec iov = {
+ .iov_base = mem,
+ .iov_len = size,
+ };
+ ssize_t cur, total, transferred = 0;
+ struct comm_pipes comm_pipes;
+ char *old, *new;
+ int ret, fds[2];
+ char buf;
+
+ old = malloc(size);
+ new = malloc(size);
+
+ memcpy(old, mem, size);
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ log_test_result(KSFT_FAIL);
+ goto free;
+ }
+
+ if (pipe(fds) < 0) {
+ ksft_perror("pipe() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+
+ if (before_fork) {
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred <= 0) {
+ ksft_perror("vmsplice() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto close_pipe;
+ }
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto close_pipe;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ /* Modify page content in the child. */
+ memset(mem, 0xff, size);
+ exit(0);
+ }
+
+ if (!before_fork) {
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred <= 0) {
+ ksft_perror("vmsplice() failed");
+ log_test_result(KSFT_FAIL);
+ wait(&ret);
+ goto close_pipe;
+ }
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+ if (munmap(mem, size) < 0) {
+ ksft_perror("munmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_pipe;
+ }
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ /* Wait until the child is done writing. */
+ wait(&ret);
+ if (!WIFEXITED(ret)) {
+ ksft_perror("wait() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_pipe;
+ }
+
+ /* See if we still read the old values. */
+ for (total = 0; total < transferred; total += cur) {
+ cur = read(fds[0], new + total, transferred - total);
+ if (cur < 0) {
+ ksft_perror("read() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_pipe;
+ }
+ }
+
+ if (!memcmp(old, new, transferred)) {
+ log_test_result(KSFT_PASS);
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_print_msg("Leak from child into parent\n");
+ log_test_result(KSFT_XFAIL);
+ } else {
+ ksft_print_msg("Leak from child into parent\n");
+ log_test_result(KSFT_FAIL);
+ }
+close_pipe:
+ close(fds[0]);
+ close(fds[1]);
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+free:
+ free(old);
+ free(new);
+}
+
+static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
+}
+
+static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
+}
+
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void do_test_iouring(char *mem, size_t size, bool use_fork)
+{
+ struct comm_pipes comm_pipes;
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ ssize_t cur, total;
+ struct iovec iov;
+ char *buf, *tmp;
+ int ret, fd;
+ FILE *file;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ file = tmpfile();
+ if (!file) {
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+ fd = fileno(file);
+ assert(fd);
+
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto close_file;
+ }
+
+ /* Skip on errors, as we might just lack kernel support. */
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret < 0) {
+ ksft_print_msg("io_uring_queue_init() failed\n");
+ log_test_result(KSFT_SKIP);
+ goto free_tmp;
+ }
+
+ /*
+ * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
+ * | FOLL_LONGTERM the range.
+ *
+ * Skip on errors, as we might just lack kernel support or might not
+ * have sufficient MEMLOCK permissions.
+ */
+ iov.iov_base = mem;
+ iov.iov_len = size;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret) {
+ ksft_print_msg("io_uring_register_buffers() failed\n");
+ log_test_result(KSFT_SKIP);
+ goto queue_exit;
+ }
+
+ if (use_fork) {
+ /*
+ * fork() and keep the child alive until we're done. Note that
+ * we expect the pinned page to not get shared with the child.
+ */
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ exit(0);
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+ } else {
+ /*
+ * Map the page R/O into the page table. Enable softdirty
+ * tracking to stop the page from getting mapped R/W immediately
+ * again by mprotect() optimizations. Note that we don't have an
+ * easy way to test if that worked (the pagemap does not export
+ * if the page is mapped R/O vs. R/W).
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ }
+
+ clear_softdirty();
+ ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ }
+ }
+
+ /*
+ * Modify the page and write page content as observed by the fixed
+ * buffer pin to the file so we can verify it.
+ */
+ memset(mem, 0xff, size);
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ ksft_print_msg("io_uring_get_sqe() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto quit_child;
+ }
+ io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
+
+ ret = io_uring_submit(&ring);
+ if (ret < 0) {
+ ksft_print_msg("io_uring_submit() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto quit_child;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ ksft_print_msg("io_uring_wait_cqe() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto quit_child;
+ }
+
+ if (cqe->res != size) {
+ ksft_print_msg("write_fixed failed\n");
+ log_test_result(KSFT_FAIL);
+ goto quit_child;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* Read back the file content to the temporary buffer. */
+ total = 0;
+ while (total < size) {
+ cur = pread(fd, tmp + total, size - total, total);
+ if (cur < 0) {
+ ksft_perror("pread() failed\n");
+ log_test_result(KSFT_FAIL);
+ goto quit_child;
+ }
+ total += cur;
+ }
+
+ /* Finally, check if we read what we expected. */
+ if (!memcmp(mem, tmp, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Longtom R/W pin is not reliable\n");
+ log_test_result(KSFT_FAIL);
+ }
+
+quit_child:
+ if (use_fork) {
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ }
+unregister_buffers:
+ io_uring_unregister_buffers(&ring);
+queue_exit:
+ io_uring_queue_exit(&ring);
+free_tmp:
+ free(tmp);
+close_file:
+ fclose(file);
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_iouring(mem, size, false);
+}
+
+static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_iouring(mem, size, true);
+}
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+enum ro_pin_test {
+ RO_PIN_TEST,
+ RO_PIN_TEST_SHARED,
+ RO_PIN_TEST_PREVIOUSLY_SHARED,
+ RO_PIN_TEST_RO_EXCLUSIVE,
+};
+
+static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
+ bool fast)
+{
+ struct pin_longterm_test args;
+ struct comm_pipes comm_pipes;
+ char *tmp, buf;
+ __u64 tmp_val;
+ int ret;
+
+ if (gup_fd < 0) {
+ ksft_print_msg("gup_test not available\n");
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_perror("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ log_test_result(KSFT_FAIL);
+ goto free_tmp;
+ }
+
+ switch (test) {
+ case RO_PIN_TEST:
+ break;
+ case RO_PIN_TEST_SHARED:
+ case RO_PIN_TEST_PREVIOUSLY_SHARED:
+ /*
+ * Share the pages with our child. As the pages are not pinned,
+ * this should just work.
+ */
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ exit(0);
+ }
+
+ /* Wait until our child is ready. */
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
+ /*
+ * Tell the child to quit now and wait until it quit.
+ * The pages should now be mapped R/O into our page
+ * tables, but they are no longer shared.
+ */
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ if (!WIFEXITED(ret))
+ ksft_print_msg("[INFO] wait() failed\n");
+ }
+ break;
+ case RO_PIN_TEST_RO_EXCLUSIVE:
+ /*
+ * Map the page R/O into the page table. Enable softdirty
+ * tracking to stop the page from getting mapped R/W immediately
+ * again by mprotect() optimizations. Note that we don't have an
+ * easy way to test if that worked (the pagemap does not export
+ * if the page is mapped R/O vs. R/W).
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ clear_softdirty();
+ ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ /* Take a R/O pin. This should trigger unsharing. */
+ args.addr = (__u64)(uintptr_t)mem;
+ args.size = size;
+ args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+ if (ret) {
+ if (errno == EINVAL)
+ ret = KSFT_SKIP;
+ else
+ ret = KSFT_FAIL;
+ ksft_perror("PIN_LONGTERM_TEST_START failed");
+ log_test_result(ret);
+ goto wait;
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+
+ /*
+ * Read back the content via the pin to the temporary buffer and
+ * test if we observed the modification.
+ */
+ tmp_val = (__u64)(uintptr_t)tmp;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
+ if (ret) {
+ ksft_perror("PIN_LONGTERM_TEST_READ failed");
+ log_test_result(KSFT_FAIL);
+ } else {
+ if (!memcmp(mem, tmp, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Longterm R/O pin is not reliable\n");
+ log_test_result(KSFT_FAIL);
+ }
+ }
+
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
+ if (ret)
+ ksft_perror("PIN_LONGTERM_TEST_STOP failed");
+wait:
+ switch (test) {
+ case RO_PIN_TEST_SHARED:
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ if (!WIFEXITED(ret))
+ ksft_perror("wait() failed");
+ break;
+ default:
+ break;
+ }
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+free_tmp:
+ free(tmp);
+}
+
+static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
+}
+
+static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
+}
+
+typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
+
+static void do_run_with_base_page(test_fn fn, bool swapout)
+{
+ char *mem;
+ int ret;
+
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
+ /* Ignore if not around on a kernel. */
+ if (ret && errno != EINVAL) {
+ ksft_perror("MADV_NOHUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* Populate a base page. */
+ memset(mem, 1, pagesize);
+
+ if (swapout) {
+ madvise(mem, pagesize, MADV_PAGEOUT);
+ if (!pagemap_is_swapped(pagemap_fd, mem)) {
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
+ goto munmap;
+ }
+ }
+
+ fn(mem, pagesize, false);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void run_with_base_page(test_fn fn, const char *desc)
+{
+ log_test_start("%s ... with base page", desc);
+ do_run_with_base_page(fn, false);
+}
+
+static void run_with_base_page_swap(test_fn fn, const char *desc)
+{
+ log_test_start("%s ... with swapped out base page", desc);
+ do_run_with_base_page(fn, true);
+}
+
+enum thp_run {
+ THP_RUN_PMD,
+ THP_RUN_PMD_SWAPOUT,
+ THP_RUN_PTE,
+ THP_RUN_PTE_SWAPOUT,
+ THP_RUN_SINGLE_PTE,
+ THP_RUN_SINGLE_PTE_SWAPOUT,
+ THP_RUN_PARTIAL_MREMAP,
+ THP_RUN_PARTIAL_SHARED,
+};
+
+static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
+{
+ char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
+ size_t size, mmap_size, mremap_size;
+ int ret;
+
+ /* For alignment purposes, we need twice the thp size. */
+ mmap_size = 2 * thpsize;
+ mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+
+ ret = madvise(mem, thpsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_perror("MADV_HUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /*
+ * Try to populate a THP. Touch the first sub-page and test if
+ * we get the last sub-page populated automatically.
+ */
+ mem[0] = 1;
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_print_msg("Did not get a THP populated\n");
+ log_test_result(KSFT_SKIP);
+ goto munmap;
+ }
+ memset(mem, 1, thpsize);
+
+ size = thpsize;
+ switch (thp_run) {
+ case THP_RUN_PMD:
+ case THP_RUN_PMD_SWAPOUT:
+ assert(thpsize == pmdsize);
+ break;
+ case THP_RUN_PTE:
+ case THP_RUN_PTE_SWAPOUT:
+ /*
+ * Trigger PTE-mapping the THP by temporarily mapping a single
+ * subpage R/O. This is a noop if the THP is not pmdsize (and
+ * therefore already PTE-mapped).
+ */
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ break;
+ case THP_RUN_SINGLE_PTE:
+ case THP_RUN_SINGLE_PTE_SWAPOUT:
+ /*
+ * Discard all but a single subpage of that PTE-mapped THP. What
+ * remains is a single PTE mapping a single subpage.
+ */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
+ if (ret) {
+ ksft_perror("MADV_DONTNEED failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ size = pagesize;
+ break;
+ case THP_RUN_PARTIAL_MREMAP:
+ /*
+ * Remap half of the THP. We need some new memory location
+ * for that.
+ */
+ mremap_size = thpsize / 2;
+ mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mremap_mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
+ if (tmp != mremap_mem) {
+ ksft_perror("mremap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ size = mremap_size;
+ break;
+ case THP_RUN_PARTIAL_SHARED:
+ /*
+ * Share the first page of the THP with a child and quit the
+ * child. This will result in some parts of the THP never
+ * have been shared.
+ */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
+ if (ret) {
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ } else if (!ret) {
+ exit(0);
+ }
+ wait(&ret);
+ /* Allow for sharing all pages again. */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
+ if (ret) {
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ switch (thp_run) {
+ case THP_RUN_PMD_SWAPOUT:
+ case THP_RUN_PTE_SWAPOUT:
+ case THP_RUN_SINGLE_PTE_SWAPOUT:
+ madvise(mem, size, MADV_PAGEOUT);
+ if (!range_is_swapped(mem, size)) {
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
+ goto munmap;
+ }
+ break;
+ default:
+ break;
+ }
+
+ fn(mem, size, false);
+munmap:
+ munmap(mmap_mem, mmap_size);
+ if (mremap_mem != MAP_FAILED)
+ munmap(mremap_mem, mremap_size);
+}
+
+static void run_with_thp(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD, size);
+}
+
+static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with swapped-out THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
+}
+
+static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with PTE-mapped THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE, size);
+}
+
+static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
+}
+
+static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with single PTE of THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
+}
+
+static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
+}
+
+static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
+}
+
+static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
+{
+ log_test_start("%s ... with partially shared THP (%zu kB)",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
+}
+
+static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
+{
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+ char *mem, *dummy;
+
+ log_test_start("%s ... with hugetlb (%zu kB)", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
+
+ mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ /* Populate an huge page. */
+ memset(mem, 1, hugetlbsize);
+
+ /*
+ * We need a total of two hugetlb pages to handle COW/unsharing
+ * properly, otherwise we might get zapped by a SIGBUS.
+ */
+ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (dummy == MAP_FAILED) {
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
+ goto munmap;
+ }
+ munmap(dummy, hugetlbsize);
+
+ fn(mem, hugetlbsize, true);
+munmap:
+ munmap(mem, hugetlbsize);
+}
+
+struct test_case {
+ const char *desc;
+ test_fn fn;
+};
+
+/*
+ * Test cases that are specific to anonymous pages: pages in private mappings
+ * that may get shared via COW during fork().
+ */
+static const struct test_case anon_test_cases[] = {
+ /*
+ * Basic COW tests for fork() without any GUP. If we miss to break COW,
+ * either the child can observe modifications by the parent or the
+ * other way around.
+ */
+ {
+ "Basic COW after fork()",
+ test_cow_in_parent,
+ },
+ /*
+ * Basic test, but do an additional mprotect(PROT_READ)+
+ * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+ */
+ {
+ "Basic COW after fork() with mprotect() optimization",
+ test_cow_in_parent_mprotect,
+ },
+ /*
+ * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
+ * we miss to break COW, the child observes modifications by the parent.
+ * This is CVE-2020-29374 reported by Jann Horn.
+ */
+ {
+ "vmsplice() + unmap in child",
+ test_vmsplice_in_child,
+ },
+ /*
+ * vmsplice() test, but do an additional mprotect(PROT_READ)+
+ * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+ */
+ {
+ "vmsplice() + unmap in child with mprotect() optimization",
+ test_vmsplice_in_child_mprotect,
+ },
+ /*
+ * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
+ * fork(); modify in the child. If we miss to break COW, the parent
+ * observes modifications by the child.
+ */
+ {
+ "vmsplice() before fork(), unmap in parent after fork()",
+ test_vmsplice_before_fork,
+ },
+ /*
+ * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
+ * child. If we miss to break COW, the parent observes modifications by
+ * the child.
+ */
+ {
+ "vmsplice() + unmap in parent after fork()",
+ test_vmsplice_after_fork,
+ },
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ /*
+ * Take a R/W longterm pin and then map the page R/O into the page
+ * table to trigger a write fault on next access. When modifying the
+ * page, the page content must be visible via the pin.
+ */
+ {
+ "R/O-mapping a page registered as iouring fixed buffer",
+ test_iouring_ro,
+ },
+ /*
+ * Take a R/W longterm pin and then fork() a child. When modifying the
+ * page, the page content must be visible via the pin. We expect the
+ * pinned page to not get shared with the child.
+ */
+ {
+ "fork() with an iouring fixed buffer",
+ test_iouring_fork,
+ },
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+ /*
+ * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
+ * When modifying the page via the page table, the page content change
+ * must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped shared page",
+ test_ro_pin_on_shared,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped shared page",
+ test_ro_fast_pin_on_shared,
+ },
+ /*
+ * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
+ * was previously shared. When modifying the page via the page table,
+ * the page content change must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped previously-shared page",
+ test_ro_pin_on_ro_previously_shared,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped previously-shared page",
+ test_ro_fast_pin_on_ro_previously_shared,
+ },
+ /*
+ * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
+ * When modifying the page via the page table, the page content change
+ * must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped exclusive page",
+ test_ro_pin_on_ro_exclusive,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped exclusive page",
+ test_ro_fast_pin_on_ro_exclusive,
+ },
+};
+
+static void run_anon_test_case(struct test_case const *test_case)
+{
+ int i;
+
+ run_with_base_page(test_case->fn, test_case->desc);
+ run_with_base_page_swap(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_thpsizes; i++) {
+ size_t size = thpsizes[i];
+ struct thp_settings settings = *thp_current_settings();
+
+ settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER;
+ settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
+ thp_push_settings(&settings);
+
+ if (size == pmdsize) {
+ run_with_thp(test_case->fn, test_case->desc, size);
+ run_with_thp_swap(test_case->fn, test_case->desc, size);
+ }
+
+ run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
+ run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
+ run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
+
+ thp_pop_settings();
+ }
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static void run_anon_test_cases(void)
+{
+ int i;
+
+ ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
+
+ for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
+ run_anon_test_case(&anon_test_cases[i]);
+}
+
+static int tests_per_anon_test_case(void)
+{
+ int tests = 2 + nr_hugetlbsizes;
+
+ tests += 6 * nr_thpsizes;
+ if (pmdsize)
+ tests += 2;
+ return tests;
+}
+
+enum anon_thp_collapse_test {
+ ANON_THP_COLLAPSE_UNSHARED,
+ ANON_THP_COLLAPSE_FULLY_SHARED,
+ ANON_THP_COLLAPSE_LOWER_SHARED,
+ ANON_THP_COLLAPSE_UPPER_SHARED,
+};
+
+static void do_test_anon_thp_collapse(char *mem, size_t size,
+ enum anon_thp_collapse_test test)
+{
+ struct comm_pipes comm_pipes;
+ char buf;
+ int ret;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ /*
+ * Trigger PTE-mapping the THP by temporarily mapping a single subpage
+ * R/O, such that we can try collapsing it later.
+ */
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ /* Collapse before actually COW-sharing the page. */
+ ret = madvise(mem, size, MADV_COLLAPSE);
+ if (ret) {
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
+ goto close_comm_pipes;
+ }
+ break;
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ /* COW-share the full PTE-mapped THP. */
+ break;
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ /* Don't COW-share the upper part of the THP. */
+ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
+ if (ret) {
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ /* Don't COW-share the lower part of the THP. */
+ ret = madvise(mem, size / 2, MADV_DONTFORK);
+ if (ret) {
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
+ goto close_comm_pipes;
+ } else if (!ret) {
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ exit(child_memcmp_fn(mem, size, &comm_pipes));
+ break;
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ exit(child_memcmp_fn(mem + size / 2, size / 2,
+ &comm_pipes));
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ /*
+ * Revert MADV_DONTFORK such that we merge the VMAs and are
+ * able to actually collapse.
+ */
+ ret = madvise(mem, size, MADV_DOFORK);
+ if (ret) {
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ /* FALLTHROUGH */
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ /* Collapse before anyone modified the COW-shared page. */
+ ret = madvise(mem, size, MADV_COLLAPSE);
+ if (ret) {
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+
+ if (!ret) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
+ }
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_anon_thp_collapse_unshared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ assert(!is_hugetlb);
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
+}
+
+static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ assert(!is_hugetlb);
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
+}
+
+static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ assert(!is_hugetlb);
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
+}
+
+static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
+ bool is_hugetlb)
+{
+ assert(!is_hugetlb);
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
+}
+
+/*
+ * Test cases that are specific to anonymous THP: pages in private mappings
+ * that may get shared via COW during fork().
+ */
+static const struct test_case anon_thp_test_cases[] = {
+ /*
+ * Basic COW test for fork() without any GUP when collapsing a THP
+ * before fork().
+ *
+ * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
+ * collapse") might easily get COW handling wrong when not collapsing
+ * exclusivity information properly.
+ */
+ {
+ "Basic COW after fork() when collapsing before fork()",
+ test_anon_thp_collapse_unshared,
+ },
+ /* Basic COW test, but collapse after COW-sharing a full THP. */
+ {
+ "Basic COW after fork() when collapsing after fork() (fully shared)",
+ test_anon_thp_collapse_fully_shared,
+ },
+ /*
+ * Basic COW test, but collapse after COW-sharing the lower half of a
+ * THP.
+ */
+ {
+ "Basic COW after fork() when collapsing after fork() (lower shared)",
+ test_anon_thp_collapse_lower_shared,
+ },
+ /*
+ * Basic COW test, but collapse after COW-sharing the upper half of a
+ * THP.
+ */
+ {
+ "Basic COW after fork() when collapsing after fork() (upper shared)",
+ test_anon_thp_collapse_upper_shared,
+ },
+};
+
+static void run_anon_thp_test_cases(void)
+{
+ int i;
+
+ if (!pmdsize)
+ return;
+
+ ksft_print_msg("[INFO] Anonymous THP tests\n");
+
+ for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
+ struct test_case const *test_case = &anon_thp_test_cases[i];
+
+ log_test_start("%s", test_case->desc);
+ do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
+ }
+}
+
+static int tests_per_anon_thp_test_case(void)
+{
+ return pmdsize ? 1 : 0;
+}
+
+typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
+
+static void test_cow(char *mem, const char *smem, size_t size)
+{
+ char *old = malloc(size);
+
+ /* Backup the original content. */
+ memcpy(old, smem, size);
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+
+ /* See if we still read the old values via the other mapping. */
+ if (!memcmp(smem, old, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Other mapping modified\n");
+ log_test_result(KSFT_FAIL);
+ }
+ free(old);
+}
+
+static void test_ro_pin(char *mem, const char *smem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST, false);
+}
+
+static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST, true);
+}
+
+static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem;
+
+ log_test_start("%s ... with shared zeropage", desc);
+
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (smem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* Read from the page to populate the shared zeropage. */
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+}
+
+static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem, *mmap_mem, *mmap_smem;
+ size_t mmap_size;
+ int ret;
+
+ log_test_start("%s ... with huge zeropage", desc);
+
+ if (!has_huge_zeropage) {
+ ksft_print_msg("Huge zeropage not enabled\n");
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ /* For alignment purposes, we need twice the thp size. */
+ mmap_size = 2 * pmdsize;
+ mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+ mmap_smem = mmap(NULL, mmap_size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_smem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
+ smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
+
+ ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ ret = madvise(smem, pmdsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /*
+ * Read from the memory to populate the huge shared zeropage. Read from
+ * the first sub-page and test if we get another sub-page populated
+ * automatically.
+ */
+ FORCE_READ(mem);
+ FORCE_READ(smem);
+ if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
+ !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
+ ksft_test_result_skip("Did not get THPs populated\n");
+ goto munmap;
+ }
+
+ fn(mem, smem, pmdsize);
+munmap:
+ munmap(mmap_mem, mmap_size);
+ if (mmap_smem != MAP_FAILED)
+ munmap(mmap_smem, mmap_size);
+}
+
+static void run_with_memfd(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem;
+ int fd;
+
+ log_test_start("%s ... with memfd", desc);
+
+ fd = memfd_create("test", 0);
+ if (fd < 0) {
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, pagesize)) {
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto close;
+ }
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (smem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ FORCE_READ(mem);
+ FORCE_READ(smem);
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+close:
+ close(fd);
+}
+
+static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem;
+ FILE *file;
+ int fd;
+
+ log_test_start("%s ... with tmpfile", desc);
+
+ file = tmpfile();
+ if (!file) {
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
+ return;
+ }
+
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_perror("fileno() failed");
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, pagesize)) {
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto close;
+ }
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (smem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ FORCE_READ(mem);
+ FORCE_READ(smem);
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+close:
+ fclose(file);
+}
+
+static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
+ size_t hugetlbsize)
+{
+ int flags = MFD_HUGETLB;
+ char *mem, *smem;
+ int fd;
+
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("test", flags);
+ if (fd < 0) {
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, hugetlbsize)) {
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+ 0);
+ if (mem == MAP_FAILED) {
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
+ goto close;
+ }
+ smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (smem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ FORCE_READ(mem);
+ FORCE_READ(smem);
+
+ fn(mem, smem, hugetlbsize);
+munmap:
+ munmap(mem, hugetlbsize);
+ if (smem != MAP_FAILED)
+ munmap(smem, hugetlbsize);
+close:
+ close(fd);
+}
+
+struct non_anon_test_case {
+ const char *desc;
+ non_anon_test_fn fn;
+};
+
+/*
+ * Test cases that target any pages in private mappings that are not anonymous:
+ * pages that may get shared via COW ndependent of fork(). This includes
+ * the shared zeropage(s), pagecache pages, ...
+ */
+static const struct non_anon_test_case non_anon_test_cases[] = {
+ /*
+ * Basic COW test without any GUP. If we miss to break COW, changes are
+ * visible via other private/shared mappings.
+ */
+ {
+ "Basic COW",
+ test_cow,
+ },
+ /*
+ * Take a R/O longterm pin. When modifying the page via the page table,
+ * the page content change must be visible via the pin.
+ */
+ {
+ "R/O longterm GUP pin",
+ test_ro_pin,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O longterm GUP-fast pin",
+ test_ro_fast_pin,
+ },
+};
+
+static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
+{
+ int i;
+
+ run_with_zeropage(test_case->fn, test_case->desc);
+ run_with_memfd(test_case->fn, test_case->desc);
+ run_with_tmpfile(test_case->fn, test_case->desc);
+ if (pmdsize)
+ run_with_huge_zeropage(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static void run_non_anon_test_cases(void)
+{
+ int i;
+
+ ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
+
+ for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
+ run_non_anon_test_case(&non_anon_test_cases[i]);
+}
+
+static int tests_per_non_anon_test_case(void)
+{
+ int tests = 3 + nr_hugetlbsizes;
+
+ if (pmdsize)
+ tests += 1;
+ return tests;
+}
+
+int main(int argc, char **argv)
+{
+ struct thp_settings default_settings;
+
+ ksft_print_header();
+
+ pagesize = getpagesize();
+ pmdsize = read_pmd_pagesize();
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_read_settings(&default_settings);
+ default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
+ thp_save_settings();
+ thp_push_settings(&default_settings);
+
+ ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
+ pmdsize / 1024);
+ nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+ }
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+ has_huge_zeropage = detect_huge_zeropage();
+
+ ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
+ ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
+ ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+
+ run_anon_test_cases();
+ run_anon_thp_test_cases();
+ run_non_anon_test_cases();
+
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_restore_settings();
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c
new file mode 100644
index 000000000000..44940f75c461
--- /dev/null
+++ b/tools/testing/selftests/mm/droppable.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+#include "kselftest.h"
+
+int main(int argc, char *argv[])
+{
+ size_t alloc_size = 134217728;
+ size_t page_size = getpagesize();
+ void *alloc;
+ pid_t child;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0);
+ assert(alloc != MAP_FAILED);
+ memset(alloc, 'A', alloc_size);
+ for (size_t i = 0; i < alloc_size; i += page_size)
+ assert(*(uint8_t *)(alloc + i));
+
+ child = fork();
+ assert(child >= 0);
+ if (!child) {
+ for (;;)
+ *(char *)malloc(page_size) = 'B';
+ }
+
+ for (bool done = false; !done;) {
+ for (size_t i = 0; i < alloc_size; i += page_size) {
+ if (!*(uint8_t *)(alloc + i)) {
+ done = true;
+ break;
+ }
+ }
+ }
+ kill(child, SIGTERM);
+
+ ksft_test_result_pass("MAP_DROPPABLE: PASS\n");
+ exit(KSFT_PASS);
+}
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
new file mode 100644
index 000000000000..dbd21d66d383
--- /dev/null
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -0,0 +1,2326 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "kselftest_harness.h"
+#include <asm-generic/mman.h> /* Force the import of the tools version. */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "vm_util.h"
+
+#include "../pidfd/pidfd.h"
+
+/*
+ * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
+ *
+ * "If the signal occurs other than as the result of calling the abort or raise
+ * function, the behavior is undefined if the signal handler refers to any
+ * object with static storage duration other than by assigning a value to an
+ * object declared as volatile sig_atomic_t"
+ */
+static volatile sig_atomic_t signal_jump_set;
+static sigjmp_buf signal_jmp_buf;
+
+/*
+ * How is the test backing the mapping being tested?
+ */
+enum backing_type {
+ ANON_BACKED,
+ SHMEM_BACKED,
+ LOCAL_FILE_BACKED,
+};
+
+FIXTURE(guard_regions)
+{
+ unsigned long page_size;
+ char path[PATH_MAX];
+ int fd;
+};
+
+FIXTURE_VARIANT(guard_regions)
+{
+ enum backing_type backing;
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, anon)
+{
+ .backing = ANON_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, shmem)
+{
+ .backing = SHMEM_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, file)
+{
+ .backing = LOCAL_FILE_BACKED,
+};
+
+static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ switch (variant->backing) {
+ case ANON_BACKED:
+ case SHMEM_BACKED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void *mmap_(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant,
+ void *addr, size_t length, int prot, int extra_flags,
+ off_t offset)
+{
+ int fd;
+ int flags = extra_flags;
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ flags |= MAP_PRIVATE | MAP_ANON;
+ fd = -1;
+ offset = 0;
+ break;
+ case SHMEM_BACKED:
+ case LOCAL_FILE_BACKED:
+ flags |= MAP_SHARED;
+ fd = self->fd;
+ break;
+ default:
+ ksft_exit_fail();
+ break;
+ }
+
+ return mmap(addr, length, prot, flags, fd, offset);
+}
+
+static int userfaultfd(int flags)
+{
+ return syscall(SYS_userfaultfd, flags);
+}
+
+static void handle_fatal(int c)
+{
+ if (!signal_jump_set)
+ return;
+
+ siglongjmp(signal_jmp_buf, c);
+}
+
+static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
+ size_t n, int advice, unsigned int flags)
+{
+ return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags);
+}
+
+/*
+ * Enable our signal catcher and try to read/write the specified buffer. The
+ * return value indicates whether the read/write succeeds without a fatal
+ * signal.
+ */
+static bool try_access_buf(char *ptr, bool write)
+{
+ bool failed;
+
+ /* Tell signal handler to jump back here on fatal signal. */
+ signal_jump_set = true;
+ /* If a fatal signal arose, we will jump back here and failed is set. */
+ failed = sigsetjmp(signal_jmp_buf, 0) != 0;
+
+ if (!failed) {
+ if (write)
+ *ptr = 'x';
+ else
+ FORCE_READ(*ptr);
+ }
+
+ signal_jump_set = false;
+ return !failed;
+}
+
+/* Try and read from a buffer, return true if no fatal signal. */
+static bool try_read_buf(char *ptr)
+{
+ return try_access_buf(ptr, false);
+}
+
+/* Try and write to a buffer, return true if no fatal signal. */
+static bool try_write_buf(char *ptr)
+{
+ return try_access_buf(ptr, true);
+}
+
+/*
+ * Try and BOTH read from AND write to a buffer, return true if BOTH operations
+ * succeed.
+ */
+static bool try_read_write_buf(char *ptr)
+{
+ return try_read_buf(ptr) && try_write_buf(ptr);
+}
+
+static void setup_sighandler(void)
+{
+ struct sigaction act = {
+ .sa_handler = &handle_fatal,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGSEGV, &act, NULL))
+ ksft_exit_fail_perror("sigaction");
+}
+
+static void teardown_sighandler(void)
+{
+ struct sigaction act = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+}
+
+static int open_file(const char *prefix, char *path)
+{
+ int fd;
+
+ snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix);
+ fd = mkstemp(path);
+ if (fd < 0)
+ ksft_exit_fail_perror("mkstemp");
+
+ return fd;
+}
+
+/* Establish a varying pattern in a buffer. */
+static void set_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ char *ptr2 = &ptr[i * page_size];
+
+ memset(ptr2, 'a' + (i % 26), page_size);
+ }
+}
+
+/*
+ * Check that a buffer contains the pattern set by set_pattern(), starting at a
+ * page offset of pgoff within the buffer.
+ */
+static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size,
+ size_t pgoff)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages * page_size; i++) {
+ size_t offset = pgoff * page_size + i;
+ char actual = ptr[offset];
+ char expected = 'a' + ((offset / page_size) % 26);
+
+ if (actual != expected)
+ return false;
+ }
+
+ return true;
+}
+
+/* Check that a buffer contains the pattern set by set_pattern(). */
+static bool check_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ return check_pattern_offset(ptr, num_pages, page_size, 0);
+}
+
+/* Determine if a buffer contains only repetitions of a specified char. */
+static bool is_buf_eq(char *buf, size_t size, char chr)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (buf[i] != chr)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Some file systems have issues with merging due to changing merge-sensitive
+ * parameters in the .mmap callback, and prior to .mmap_prepare being
+ * implemented everywhere this will now result in an unexpected failure to
+ * merge (e.g. - overlayfs).
+ *
+ * Perform a simple test to see if the local file system suffers from this, if
+ * it does then we can skip test logic that assumes local file system merging is
+ * sane.
+ */
+static bool local_fs_has_sane_mmap(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr2;
+ struct procmap_fd procmap;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ return true;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ if (ptr == MAP_FAILED)
+ return false;
+ /* Unmap the middle. */
+ munmap(&ptr[5 * page_size], page_size);
+
+ /* Map again. */
+ ptr2 = mmap_(self, variant, &ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 5 * page_size);
+
+ if (ptr2 == MAP_FAILED)
+ return false;
+
+ /* Now make sure they all merged. */
+ if (open_self_procmap(&procmap) != 0)
+ return false;
+ if (!find_vma_procmap(&procmap, ptr))
+ return false;
+ if (procmap.query.vma_start != (unsigned long)ptr)
+ return false;
+ if (procmap.query.vma_end != (unsigned long)ptr + 10 * page_size)
+ return false;
+ close_procmap(&procmap);
+
+ return true;
+}
+
+FIXTURE_SETUP(guard_regions)
+{
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+ setup_sighandler();
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ return;
+ case LOCAL_FILE_BACKED:
+ self->fd = open_file("", self->path);
+ break;
+ case SHMEM_BACKED:
+ self->fd = memfd_create(self->path, 0);
+ break;
+ }
+
+ /* We truncate file to at least 100 pages, tests can modify as needed. */
+ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
+};
+
+FIXTURE_TEARDOWN_PARENT(guard_regions)
+{
+ teardown_sighandler();
+
+ if (variant->backing == ANON_BACKED)
+ return;
+
+ if (self->fd >= 0)
+ close(self->fd);
+
+ if (self->path[0] != '\0')
+ unlink(self->path);
+}
+
+TEST_F(guard_regions, basic)
+{
+ const unsigned long NUM_PAGES = 10;
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Trivially assert we can touch the first page. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Establish that 1st page SIGSEGV's. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* Ensure we can touch everything else.*/
+ for (i = 1; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Establish a guard page at the end of the mapping. */
+ ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Check that both guard pages result in SIGSEGV. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /* Remove the first guard page. */
+ ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ /* Remove the last guard page. */
+ ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /*
+ * Test setting a _range_ of pages, namely the first 3. The first of
+ * these be faulted in, so this also tests that we can install guard
+ * pages over backed pages.
+ */
+ ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure they are all guard pages. */
+ for (i = 0; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Make sure the rest are not. */
+ for (i = 3; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Remove guard pages. */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure we can touch everything. */
+ for (i = 0; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * Now remove all guard pages, make sure we don't remove existing
+ * entries.
+ */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < NUM_PAGES * page_size; i += page_size) {
+ char chr = ptr[i];
+
+ ASSERT_EQ(chr, 'x');
+ }
+
+ ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0);
+}
+
+/* Assert that operations applied across multiple VMAs work as expected. */
+TEST_F(guard_regions, multi_vma)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3;
+ int i;
+
+ /* Reserve a 100 page region over which we can install VMAs. */
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /* Place a VMA of 10 pages size at the start of the region. */
+ ptr1 = mmap_(self, variant, ptr_region, 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ /* Place a VMA of 5 pages size 50 pages into the region. */
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Place a VMA of 20 pages size at the end of the region. */
+ ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Unmap gaps. */
+ ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---] [---] [---]
+ */
+
+ /*
+ * Now mark the whole range as guard pages and make sure all VMAs are as
+ * such.
+ */
+
+ /*
+ * madvise() is certifiable and lets you perform operations over gaps,
+ * everything works, but it indicates an error and errno is set to
+ * -ENOMEM. Also if anything runs out of memory it is set to
+ * -ENOMEM. You are meant to guess which is which.
+ */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now remove guar pages over range and assert the opposite. */
+
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Now map incompatible VMAs in the gaps. */
+ ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---][xxxx][---][xxxx][---]
+ *
+ * Where 'x' signifies VMAs that cannot be merged with those adjacent to
+ * them.
+ */
+
+ /* Multiple VMAs adjacent to one another should result in no error. */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0);
+}
+
+/*
+ * Assert that batched operations performed using process_madvise() work as
+ * expected.
+ */
+TEST_F(guard_regions, process_madvise)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_region, *ptr1, *ptr2, *ptr3;
+ ssize_t count;
+ struct iovec vec[6];
+
+ /* Reserve region to map over. */
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /*
+ * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we
+ * overwrite existing entries and test this code path against
+ * overwriting existing entries.
+ */
+ ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+ /* We want guard markers at start/end of each VMA. */
+ vec[0].iov_base = ptr1;
+ vec[0].iov_len = page_size;
+ vec[1].iov_base = &ptr1[9 * page_size];
+ vec[1].iov_len = page_size;
+
+ /* 5 pages offset 50 pages into reserve region. */
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ vec[2].iov_base = ptr2;
+ vec[2].iov_len = page_size;
+ vec[3].iov_base = &ptr2[4 * page_size];
+ vec[3].iov_len = page_size;
+
+ /* 20 pages offset 79 pages into reserve region. */
+ ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+ vec[4].iov_base = ptr3;
+ vec[4].iov_len = page_size;
+ vec[5].iov_base = &ptr3[19 * page_size];
+ vec[5].iov_len = page_size;
+
+ /* Free surrounding VMAs. */
+ ASSERT_EQ(munmap(ptr_region, page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
+
+ /* Now guard in one step. */
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0);
+
+ /* OK we don't have permission to do this, skip. */
+ if (count == -1 && errno == EPERM)
+ SKIP(return, "No process_madvise() permissions, try running as root.\n");
+
+ /* Returns the number of bytes advised. */
+ ASSERT_EQ(count, 6 * page_size);
+
+ /* Now make sure the guarding was applied. */
+
+ ASSERT_FALSE(try_read_write_buf(ptr1));
+ ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr2));
+ ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr3));
+ ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Now do the same with unguard... */
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0);
+
+ /* ...and everything should now succeed. */
+
+ ASSERT_TRUE(try_read_write_buf(ptr1));
+ ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr2));
+ ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr3));
+ ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
+ ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
+}
+
+/* Assert that unmapping ranges does not leave guard markers behind. */
+TEST_F(guard_regions, munmap)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new1, *ptr_new2;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard first and last pages. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Assert that they are guarded. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size]));
+
+ /* Unmap them. */
+ ASSERT_EQ(munmap(ptr, page_size), 0);
+ ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0);
+
+ /* Map over them.*/
+ ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 0);
+ ASSERT_NE(ptr_new1, MAP_FAILED);
+ ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new2, MAP_FAILED);
+
+ /* Assert that they are now not guarded. */
+ ASSERT_TRUE(try_read_write_buf(ptr_new1));
+ ASSERT_TRUE(try_read_write_buf(ptr_new2));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mprotect() operations have no bearing on guard markers. */
+TEST_F(guard_regions, mprotect)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the middle of the range. */
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Assert that it is indeed guarded. */
+ ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size]));
+
+ /* Now make these pages read-only. */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0);
+
+ /* Make sure the range is still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Make sure we can guard again without issue.*/
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the range is, yet again, still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Now unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is readable. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Split and merge VMAs and make sure guard pages still behave. */
+TEST_F(guard_regions, split_merge)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now unmap some pages in the range so we split. */
+ ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0);
+
+ /* Make sure the remaining ranges are guarded post-split. */
+ for (i = 0; i < 2; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 2; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 6; i < 8; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 9; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now map them again - the unmap will have cleared the guards. */
+ ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now make sure guard pages are established. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+ bool expect_true = i == 2 || i == 5 || i == 8;
+
+ ASSERT_TRUE(expect_true ? result : !result);
+ }
+
+ /* Now guard everything again. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now split the range into three. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Make sure the whole range is guarded for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_buf(curr));
+ }
+
+ /* Now reset protection bits so we merge the whole thing. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Make sure the whole range is still guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Split range into 3 again... */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* ...and unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is remedied for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Merge them again. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Now ensure the merged range is remedied for read/write. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that MADV_DONTNEED does not remove guard markers. */
+TEST_F(guard_regions, dontneed)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Back the whole range. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Guard every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *curr = &ptr[i * page_size];
+ int res = madvise(curr, page_size, MADV_GUARD_INSTALL);
+
+ ASSERT_EQ(res, 0);
+ }
+
+ /* Indicate that we don't need any of the range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0);
+
+ /* Check to ensure guard markers are still in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_buf(curr);
+
+ if (i % 2 == 0) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ switch (variant->backing) {
+ case ANON_BACKED:
+ /* If anon, then we get a zero page. */
+ ASSERT_EQ(*curr, '\0');
+ break;
+ default:
+ /* Otherwise, we get the file data. */
+ ASSERT_EQ(*curr, 'y');
+ break;
+ }
+ }
+
+ /* Now write... */
+ result = try_write_buf(&ptr[i * page_size]);
+
+ /* ...and make sure same result. */
+ ASSERT_TRUE(i % 2 != 0 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mlock()'ed pages work correctly with guard markers. */
+TEST_F(guard_regions, mlock)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Lock. */
+ ASSERT_EQ(mlock(ptr, 10 * page_size), 0);
+
+ /* Now try to guard, should fail with EINVAL. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ /* OK unlock. */
+ ASSERT_EQ(munlock(ptr, 10 * page_size), 0);
+
+ /* Guard first half of range, should now succeed. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure guard works. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ if (i < 5) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ ASSERT_EQ(*curr, 'x');
+ }
+ }
+
+ /*
+ * Now lock the latter part of the range. We can't lock the guard pages,
+ * as this would result in the pages being populated and the guarding
+ * would cause this to error out.
+ */
+ ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /*
+ * Now remove guard pages, we permit mlock()'d ranges to have guard
+ * pages removed as it is a non-destructive operation.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now check that no guard pages remain. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * - Moving a mapping alone should retain markers as they are.
+ */
+TEST_F(guard_regions, mremap_move)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 5 pages. */
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guard pages are in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Map a new region we will move this range into. Doing this ensures
+ * that we have reserved a range to map into.
+ */
+ ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new);
+
+ /* Make sure the guard markers are retained. */
+ ASSERT_FALSE(try_read_write_buf(ptr_new));
+ ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size]));
+
+ /*
+ * Clean up - we only need reference the new pointer as we overwrote the
+ * PROT_NONE range and moved the existing one.
+ */
+ munmap(ptr_new, 5 * page_size);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Expanding should retain guard pages, only now in different position. The user
+ * will have to remove guard pages manually to fix up (they'd have to do the
+ * same if it were a PROT_NONE mapping).
+ */
+TEST_F(guard_regions, mremap_expand)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 10 pages... */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /* ...But unmap the last 5 so we can ensure we can expand into them. */
+ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now expand to 10 pages. */
+ ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Reserve a region which we can move to and expand into. */
+ ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now move and expand into it. */
+ ptr = mremap(ptr, 10 * page_size, 20 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new);
+ ASSERT_EQ(ptr, ptr_new);
+
+ /*
+ * Again, make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /*
+ * A real user would have to remove guard markers, but would reasonably
+ * expect all characteristics of the mapping to be retained, including
+ * guard markers.
+ */
+
+ /* Cleanup. */
+ munmap(ptr, 20 * page_size);
+}
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Shrinking will result in markers that are shrunk over being removed. Again,
+ * if the user were using a PROT_NONE mapping they'd have to manually fix this
+ * up also so this is OK.
+ */
+TEST_F(guard_regions, mremap_shrink)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 5 pages. */
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now shrink to 3 pages. */
+ ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * As with expansion, a real user would have to remove guard pages and
+ * fixup. But you'd have to do similar manual things with PROT_NONE
+ * mappings too.
+ */
+
+ /*
+ * If we expand back to the original size, the end marker will, of
+ * course, no longer be present.
+ */
+ ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Again, we expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ munmap(ptr, 5 * page_size);
+}
+
+/*
+ * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set
+ * retain guard pages.
+ */
+TEST_F(guard_regions, fork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish guard pages in the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Assert that the guarding is in effect. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Now unguard the range.*/
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Child unguard does not impact parent page table state. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert expected behaviour after we fork populated ranges of anonymous memory
+ * and then guard and unguard the range.
+ */
+TEST_F(guard_regions, fork_cow)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "CoW only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char chr = 'a' + (i % 26);
+
+ ptr[i] = chr;
+ }
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Ensure the range is as expected. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Establish guard pages across the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+ /* Remove it. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ * By removing the guard pages, the page tables will be
+ * cleared. Assert that we are looking at the zero page now.
+ */
+ for (i = 0; i < 10 * page_size; i++) {
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, '\0');
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Ensure the range is unchanged in parent anon range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that forking a process with VMAs that do have VM_WIPEONFORK set
+ * behave as expected.
+ */
+TEST_F(guard_regions, fork_wipeonfork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "Wipe on fork only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark wipe on fork. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0);
+
+ /* Guard the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Guard will have been wiped. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ waitpid(pid, NULL, 0);
+
+ /* Guard markers should be in effect.*/
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_FREE retains guard entries as expected. */
+TEST_F(guard_regions, lazyfree)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "MADV_FREE only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensure guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Lazyfree range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0);
+
+ /* This should leave the guard markers in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */
+TEST_F(guard_regions, populate)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Populate read should error out... */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* ...as should populate write. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */
+TEST_F(guard_regions, cold_pageout)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensured guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now mark cold. This should have no impact on guard markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* OK, now page out. This should equally, have no effect on markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that guard pages do not break userfaultd. */
+TEST_F(guard_regions, uffd)
+{
+ const unsigned long page_size = self->page_size;
+ int uffd;
+ char *ptr;
+ int i;
+ struct uffdio_api api = {
+ .api = UFFD_API,
+ .features = 0,
+ };
+ struct uffdio_register reg;
+ struct uffdio_range range;
+
+ if (!is_anon_backed(variant))
+ SKIP(return, "uffd only works on anon backing");
+
+ /* Set up uffd. */
+ uffd = userfaultfd(0);
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ SKIP(return, "No userfaultfd permissions, try running as root.");
+ break;
+ case ENOSYS:
+ SKIP(return, "userfaultfd is not supported/not enabled.");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n",
+ strerror(errno));
+ break;
+ }
+ }
+
+ ASSERT_NE(uffd, -1);
+
+ ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Register the range with uffd. */
+ range.start = (unsigned long)ptr;
+ range.len = 10 * page_size;
+ reg.range = range;
+ reg.mode = UFFDIO_REGISTER_MODE_MISSING;
+ ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, &reg), 0);
+
+ /* Guard the range. This should not trigger the uffd. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* The guarding should behave as usual with no uffd intervention. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0);
+ close(uffd);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we
+ * aggressively read-ahead, then install guard regions and assert that it
+ * behaves correctly.
+ *
+ * We page out using MADV_PAGEOUT before checking guard regions so we drop page
+ * cache folios, meaning we maximise the possibility of some broken readahead.
+ */
+TEST_F(guard_regions, madvise_sequential)
+{
+ char *ptr;
+ int i;
+ const unsigned long page_size = self->page_size;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern of data in the file. */
+ set_pattern(ptr, 10, page_size);
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Mark it as being accessed sequentially. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0);
+
+ /* Mark every other page a guard page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr2 = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now page it out. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Now make sure pages are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *chrp = &ptr[i * page_size];
+
+ if (i % 2 == 0) {
+ bool result = try_read_write_buf(chrp);
+
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_EQ(*chrp, 'a' + i);
+ }
+ }
+
+ /* Now remove guard pages. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure all data is as expected. */
+ if (!check_pattern(ptr, 10, page_size))
+ ASSERT_TRUE(false);
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Check that file-backed mappings implement guard regions with MAP_PRIVATE
+ * correctly.
+ */
+TEST_F(guard_regions, map_private)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_shared, *ptr_private;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MAP_PRIVATE test specific to file-backed");
+
+ ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr_shared, MAP_FAILED);
+
+ /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */
+ ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0);
+ ASSERT_NE(ptr_private, MAP_FAILED);
+
+ /* Set pattern in shared mapping. */
+ set_pattern(ptr_shared, 10, page_size);
+
+ /* Install guard regions in every other page in the shared mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Install guard regions in every other page in the private mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Every odd private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from shared mapping. */
+ ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Every even private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from private mapping. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Ensure patterns are intact. */
+ ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size));
+ ASSERT_TRUE(check_pattern(ptr_private, 10, page_size));
+
+ /* Now write out every other page to MAP_PRIVATE. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ memset(ptr, 'a' + i, page_size);
+ }
+
+ /*
+ * At this point the mapping is:
+ *
+ * 0123456789
+ * SPSPSPSPSP
+ *
+ * Where S = shared, P = private mappings.
+ */
+
+ /* Now mark the beginning of the mapping guarded. */
+ ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /*
+ * This renders the mapping:
+ *
+ * 0123456789
+ * xxxxxPSPSP
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /* Ensure guard regions as expected. */
+ ASSERT_EQ(try_read_buf(ptr), i >= 5);
+ /* The shared mapping should always succeed. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ }
+
+ /* Remove the guard regions altogether. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ *
+ * We now expect the mapping to be:
+ *
+ * 0123456789
+ * SSSSSPSPSP
+ *
+ * As we removed guard regions, the private pages from the first 5 will
+ * have been zapped, so on fault will reestablish the shared mapping.
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /*
+ * Assert that shared mappings in the MAP_PRIVATE mapping match
+ * the shared mapping.
+ */
+ if (i < 5 || i % 2 == 0) {
+ char *ptr_s = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0);
+ continue;
+ }
+
+ /* Everything else is a private mapping. */
+ ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i));
+ }
+
+ ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0);
+}
+
+/* Test that guard regions established over a read-only mapping function correctly. */
+TEST_F(guard_regions, readonly_file)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ SKIP(return, "Read-only test specific to file-backed");
+
+ /* Map shared so we can populate with pattern, populate it, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ /* Close the fd so we can re-open read-only. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Re-open read-only. */
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_NE(self->fd, -1);
+ /* Re-map read-only. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark every other page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Assert that the guard regions are in place.*/
+ for (i = 0; i < 10; i++) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0);
+ }
+
+ /* Remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure the data is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, fault_around)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Fault-around test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern in the backing file. */
+ set_pattern(ptr, 10, page_size);
+
+ /*
+ * Now drop it from the page cache so we get major faults when next we
+ * map it.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Unmap and remap 'to be sure'. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now fault in every odd page. This should trigger fault-around. */
+ for (i = 1; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Finally, ensure that guard regions are intact as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, truncation)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Establish a pattern in the backing file, just so there is data
+ * there.
+ */
+ set_pattern(ptr, 10, page_size);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to actually used size (initialised to 100). */
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Here the guard regions will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to half the size, then truncate again to the full size. */
+ ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Again, guard pages will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, hole_punch)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ /* Establish pattern in mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+
+ /* Install a guard region in the middle of the mapping. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /*
+ * The buffer will now be:
+ *
+ * 0123456789
+ * ***xxxx***
+ *
+ * Where * is data and x is the guard region.
+ */
+
+ /* Ensure established. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now hole punch the guarded region. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_REMOVE), 0);
+
+ /* Ensure guard regions remain. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now remove guard region throughout. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Check that the pattern exists in non-hole punched region. */
+ ASSERT_TRUE(check_pattern(ptr, 3, page_size));
+ /* Check that hole punched region is zeroed. */
+ ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0'));
+ /* Check that the pattern exists in the remainder of the file. */
+ ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Ensure that a memfd works correctly with guard regions, that we can write
+ * seal it then open the mapping read-only and still establish guard regions
+ * within, remove those guard regions and have everything work correctly.
+ */
+TEST_F(guard_regions, memfd_write_seal)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != SHMEM_BACKED)
+ SKIP(return, "memfd write seal test specific to shmem");
+
+ /* OK, we need a memfd, so close existing one. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Create and truncate memfd. */
+ self->fd = memfd_create("guard_regions_memfd_seals_test",
+ MFD_ALLOW_SEALING);
+ ASSERT_NE(self->fd, -1);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Map, set pattern, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+
+ /* Write-seal the memfd. */
+ ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0);
+
+ /* Now map the memfd readonly. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Ensure write seal intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_write_buf(ptr_p));
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+
+/*
+ * Since we are now permitted to establish guard regions in read-only anonymous
+ * mappings, for the sake of thoroughness, though it probably has no practical
+ * use, test that guard regions function with a mapping to the anonymous zero
+ * page.
+ */
+TEST_F(guard_regions, anon_zeropage)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (!is_anon_backed(variant))
+ SKIP(return, "anon zero page test specific to anon/shmem");
+
+ /* Obtain a read-only i.e. anon zero page mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove all guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Ensure zero page...*/
+ ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0'));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that /proc/$pid/pagemap correctly identifies guard region ranges.
+ */
+TEST_F(guard_regions, pagemap)
+{
+ const unsigned long page_size = self->page_size;
+ int proc_fd;
+ char *ptr;
+ int i;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Read from pagemap, and assert no guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, 0);
+ }
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that PAGEMAP_SCAN correctly reports guard region ranges.
+ */
+TEST_F(guard_regions, pagemap_scan)
+{
+ const unsigned long page_size = self->page_size;
+ struct page_region pm_regs[10];
+ struct pm_scan_arg pm_scan_args = {
+ .size = sizeof(struct pm_scan_arg),
+ .category_anyof_mask = PAGE_IS_GUARD,
+ .return_mask = PAGE_IS_GUARD,
+ .vec = (long)&pm_regs,
+ .vec_len = ARRAY_SIZE(pm_regs),
+ };
+ int proc_fd, i;
+ char *ptr;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ pm_scan_args.start = (long)ptr;
+ pm_scan_args.end = (long)ptr + 10 * page_size;
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /*
+ * Assert ioctl() returns the count of located regions, where each
+ * region spans every other page within the range of 10 pages.
+ */
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 5; i++) {
+ long ptr_p = (long)&ptr[2 * i * page_size];
+
+ ASSERT_EQ(pm_regs[i].start, ptr_p);
+ ASSERT_EQ(pm_regs[i].end, ptr_p + page_size);
+ ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, collapse)
+{
+ const unsigned long page_size = self->page_size;
+ const unsigned long size = 2 * HPAGE_SIZE;
+ const unsigned long num_pages = size / page_size;
+ char *ptr;
+ int i;
+
+ /* Need file to be correct size for tests for non-anon. */
+ if (variant->backing != ANON_BACKED)
+ ASSERT_EQ(ftruncate(self->fd, size), 0);
+
+ /*
+ * We must close and re-open local-file backed as read-only for
+ * CONFIG_READ_ONLY_THP_FOR_FS to work.
+ */
+ if (variant->backing == LOCAL_FILE_BACKED) {
+ ASSERT_EQ(close(self->fd), 0);
+
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_GE(self->fd, 0);
+ }
+
+ ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Prevent being faulted-in as huge. */
+ ASSERT_EQ(madvise(ptr, size, MADV_NOHUGEPAGE), 0);
+ /* Fault in. */
+ ASSERT_EQ(madvise(ptr, size, MADV_POPULATE_READ), 0);
+
+ /* Install guard regions in ever other page. */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_page, page_size, MADV_GUARD_INSTALL), 0);
+ /* Accesses should now fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+
+ /* Allow huge page throughout region. */
+ ASSERT_EQ(madvise(ptr, size, MADV_HUGEPAGE), 0);
+
+ /*
+ * Now collapse the entire region. This should fail in all cases.
+ *
+ * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is
+ * not set for the local file case, but we can't differentiate whether
+ * this occurred or if the collapse was rightly rejected.
+ */
+ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0);
+
+ /*
+ * If we introduce a bug that causes the collapse to succeed, gather
+ * data on whether guard regions are at least preserved. The test will
+ * fail at this point in any case.
+ */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ /* Accesses should still fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+}
+
+TEST_F(guard_regions, smaps)
+{
+ const unsigned long page_size = self->page_size;
+ struct procmap_fd procmap;
+ char *ptr, *ptr2;
+ int i;
+
+ /* Map a region. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We shouldn't yet see a guard flag. */
+ ASSERT_FALSE(check_vmflag_guard(ptr));
+
+ /* Install a single guard region. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Now we should see a guard flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /*
+ * Removing the guard region should not change things because we simply
+ * cannot accurately track whether a given VMA has had all of its guard
+ * regions removed.
+ */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_REMOVE), 0);
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /* Install guard regions throughout. */
+ for (i = 0; i < 10; i++) {
+ ASSERT_EQ(madvise(&ptr[i * page_size], page_size, MADV_GUARD_INSTALL), 0);
+ /* We should always see the guard region flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ }
+
+ /* Split into two VMAs. */
+ ASSERT_EQ(munmap(&ptr[4 * page_size], page_size), 0);
+
+ /* Both VMAs should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ ASSERT_TRUE(check_vmflag_guard(&ptr[5 * page_size]));
+
+ /*
+ * If the local file system is unable to merge VMAs due to having
+ * unusual characteristics, there is no point in asserting merge
+ * behaviour.
+ */
+ if (!local_fs_has_sane_mmap(self, variant)) {
+ TH_LOG("local filesystem does not support sane merging skipping merge test");
+ return;
+ }
+
+ /* Map a fresh VMA between the two split VMAs. */
+ ptr2 = mmap_(self, variant, &ptr[4 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 4 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Check the procmap to ensure that this VMA merged with the adjacent
+ * two. The guard region flag is 'sticky' so should not preclude
+ * merging.
+ */
+ ASSERT_EQ(open_self_procmap(&procmap), 0);
+ ASSERT_TRUE(find_vma_procmap(&procmap, ptr));
+ ASSERT_EQ(procmap.query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap.query.vma_end, (unsigned long)ptr + 10 * page_size);
+ ASSERT_EQ(close_procmap(&procmap), 0);
+ /* And, of course, this VMA should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
new file mode 100644
index 000000000000..6279893a0adc
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GUP long-term page pinning tests.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+#include "../../../../mm/gup_test.h"
+#include "kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+
+static __fsword_t get_fs_type(int fd)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret && errno == EINTR);
+
+ return ret ? 0 : fs.f_type;
+}
+
+static bool fs_is_unknown(__fsword_t fs_type)
+{
+ /*
+ * We only support some filesystems in our tests when dealing with
+ * R/W long-term pinning. For these filesystems, we can be fairly sure
+ * whether they support it or not.
+ */
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ case BTRFS_SUPER_MAGIC:
+ case EXT4_SUPER_MAGIC:
+ case XFS_SUPER_MAGIC:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
+{
+ assert(!fs_is_unknown(fs_type));
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+enum test_type {
+ TEST_TYPE_RO,
+ TEST_TYPE_RO_FAST,
+ TEST_TYPE_RW,
+ TEST_TYPE_RW_FAST,
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ TEST_TYPE_IOURING,
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void do_test(int fd, size_t size, enum test_type type, bool shared)
+{
+ __fsword_t fs_type = get_fs_type(fd);
+ bool should_work;
+ char *mem;
+ int result = KSFT_PASS;
+ int ret;
+
+ if (fd < 0) {
+ result = KSFT_FAIL;
+ goto report;
+ }
+
+ if (ftruncate(fd, size)) {
+ if (errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ } else {
+ ksft_print_msg("ftruncate() failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ goto report;
+ }
+ return;
+ }
+
+ if (fallocate(fd, 0, 0, size)) {
+ /*
+ * Some filesystems (eg, NFSv3) don't support
+ * fallocate(), report this as a skip rather than a
+ * test failure.
+ */
+ if (errno == EOPNOTSUPP) {
+ ksft_print_msg("fallocate() not supported by filesystem\n");
+ result = KSFT_SKIP;
+ } else if (size == pagesize) {
+ ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
+ }
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ if (size == pagesize || shared) {
+ ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
+ }
+
+ /* Fault in the page such that GUP-fast can pin it directly. */
+ memset(mem, 0, size);
+
+ switch (type) {
+ case TEST_TYPE_RO:
+ case TEST_TYPE_RO_FAST:
+ /*
+ * Cover more cases regarding unsharing decisions when
+ * long-term R/O pinning by mapping the page R/O.
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ goto munmap;
+ }
+ /* FALLTHROUGH */
+ case TEST_TYPE_RW:
+ case TEST_TYPE_RW_FAST: {
+ struct pin_longterm_test args;
+ const bool fast = type == TEST_TYPE_RO_FAST ||
+ type == TEST_TYPE_RW_FAST;
+ const bool rw = type == TEST_TYPE_RW ||
+ type == TEST_TYPE_RW_FAST;
+
+ if (gup_fd < 0) {
+ ksft_print_msg("gup_test not available\n");
+ result = KSFT_SKIP;
+ break;
+ }
+
+ if (rw && shared && fs_is_unknown(fs_type)) {
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
+ return;
+ }
+ /*
+ * R/O pinning or pinning in a private mapping is always
+ * expected to work. Otherwise, we expect long-term R/W pinning
+ * to only succeed for special filesystems.
+ */
+ should_work = !shared || !rw ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ args.addr = (__u64)(uintptr_t)mem;
+ args.size = size;
+ args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+ args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+ if (ret && errno == EINVAL) {
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ result = KSFT_SKIP;
+ break;
+ } else if (ret && errno == EFAULT) {
+ if (should_work)
+ result = KSFT_FAIL;
+ else
+ result = KSFT_PASS;
+ break;
+ } else if (ret) {
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ break;
+ }
+
+ if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n",
+ strerror(errno));
+
+ /*
+ * TODO: if the kernel ever supports long-term R/W pinning on
+ * some previously unsupported filesystems, we might want to
+ * perform some additional tests for possible data corruptions.
+ */
+ if (should_work)
+ result = KSFT_PASS;
+ else
+ result = KSFT_FAIL;
+ break;
+ }
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ case TEST_TYPE_IOURING: {
+ struct io_uring ring;
+ struct iovec iov;
+
+ /* io_uring always pins pages writable. */
+ if (shared && fs_is_unknown(fs_type)) {
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
+ goto report;
+ }
+ should_work = !shared ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ /* Skip on errors, as we might just lack kernel support. */
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret < 0) {
+ ksft_print_msg("io_uring_queue_init() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
+ break;
+ }
+ /*
+ * Register the range as a fixed buffer. This will FOLL_WRITE |
+ * FOLL_PIN | FOLL_LONGTERM the range.
+ */
+ iov.iov_base = mem;
+ iov.iov_len = size;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ /* Only new kernels return EFAULT. */
+ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
+ errno == EFAULT)) {
+ if (should_work) {
+ ksft_print_msg("Should have failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ result = KSFT_PASS;
+ }
+ } else if (ret) {
+ /*
+ * We might just lack support or have insufficient
+ * MEMLOCK limits.
+ */
+ ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
+ } else {
+ if (should_work) {
+ result = KSFT_PASS;
+ } else {
+ ksft_print_msg("Should have worked\n");
+ result = KSFT_FAIL;
+ }
+ io_uring_unregister_buffers(&ring);
+ }
+
+ io_uring_queue_exit(&ring);
+ break;
+ }
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+ default:
+ assert(false);
+ }
+
+munmap:
+ munmap(mem, size);
+report:
+ log_test_result(result);
+}
+
+typedef void (*test_fn)(int fd, size_t size);
+
+static void run_with_memfd(test_fn fn, const char *desc)
+{
+ int fd;
+
+ log_test_start("%s ... with memfd", desc);
+
+ fd = memfd_create("test", 0);
+ if (fd < 0) {
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ fn(fd, pagesize);
+ close(fd);
+}
+
+static void run_with_tmpfile(test_fn fn, const char *desc)
+{
+ FILE *file;
+ int fd;
+
+ log_test_start("%s ... with tmpfile", desc);
+
+ file = tmpfile();
+ if (!file) {
+ ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
+ fd = -1;
+ } else {
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
+ }
+ }
+
+ fn(fd, pagesize);
+
+ if (file)
+ fclose(file);
+}
+
+static void run_with_local_tmpfile(test_fn fn, const char *desc)
+{
+ char filename[] = __FILE__"_tmpfile_XXXXXX";
+ int fd;
+
+ log_test_start("%s ... with local tmpfile", desc);
+
+ fd = mkstemp(filename);
+ if (fd < 0)
+ ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
+
+ if (unlink(filename)) {
+ ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
+ close(fd);
+ fd = -1;
+ }
+
+ fn(fd, pagesize);
+
+ if (fd >= 0)
+ close(fd);
+}
+
+static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
+ size_t hugetlbsize)
+{
+ int flags = MFD_HUGETLB;
+ int fd;
+
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("test", flags);
+ if (fd < 0) {
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
+ }
+
+ fn(fd, hugetlbsize);
+ close(fd);
+}
+
+struct test_case {
+ const char *desc;
+ test_fn fn;
+};
+
+static void test_shared_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, true);
+}
+
+static void test_shared_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, true);
+}
+
+static void test_shared_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, true);
+}
+
+static void test_shared_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, true);
+}
+
+static void test_private_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, false);
+}
+
+static void test_private_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, false);
+}
+
+static void test_private_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, false);
+}
+
+static void test_private_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, false);
+}
+
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void test_shared_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, true);
+}
+
+static void test_private_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, false);
+}
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+static const struct test_case test_cases[] = {
+ {
+ "R/W longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_ro_fast_pin,
+ },
+ {
+ "R/W longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_ro_fast_pin,
+ },
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ {
+ "io_uring fixed buffer with MAP_SHARED file mapping",
+ test_shared_iouring,
+ },
+ {
+ "io_uring fixed buffer with MAP_PRIVATE file mapping",
+ test_private_iouring,
+ },
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void run_test_case(struct test_case const *test_case)
+{
+ int i;
+
+ run_with_memfd(test_case->fn, test_case->desc);
+ run_with_tmpfile(test_case->fn, test_case->desc);
+ run_with_local_tmpfile(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static int tests_per_test_case(void)
+{
+ return 3 + nr_hugetlbsizes;
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ pagesize = getpagesize();
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ run_test_case(&test_cases[i]);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
new file mode 100644
index 000000000000..fb8f9ae49efa
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -0,0 +1,260 @@
+#define __SANE_USERSPACE_TYPES__ // Use ll64
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
+#include <mm/gup_test.h>
+#include "kselftest.h"
+#include "vm_util.h"
+
+#define MB (1UL << 20)
+
+/* Just the flags we need, copied from the kernel internals. */
+#define FOLL_WRITE 0x01 /* check pte is writable */
+
+#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
+
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static char *cmd_to_str(unsigned long cmd)
+{
+ switch (cmd) {
+ case GUP_FAST_BENCHMARK:
+ return "GUP_FAST_BENCHMARK";
+ case PIN_FAST_BENCHMARK:
+ return "PIN_FAST_BENCHMARK";
+ case PIN_LONGTERM_BENCHMARK:
+ return "PIN_LONGTERM_BENCHMARK";
+ case GUP_BASIC_TEST:
+ return "GUP_BASIC_TEST";
+ case PIN_BASIC_TEST:
+ return "PIN_BASIC_TEST";
+ case DUMP_USER_PAGES_TEST:
+ return "DUMP_USER_PAGES_TEST";
+ }
+ return "Unknown command";
+}
+
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i, status;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ break;
+
+ pthread_mutex_lock(&print_mutex);
+ ksft_print_msg("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ ksft_print_msg(", truncated (size: %lld)", gup.size);
+ ksft_print_msg("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ goto return_;
+
+ pthread_mutex_lock(&print_mutex);
+ ksft_print_msg("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ ksft_print_msg("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+return_:
+ ksft_test_result(!status, "ioctl status %d\n", status);
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ struct gup_test gup = { 0 };
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
+ int flags = MAP_PRIVATE;
+ char *file = "/dev/zero";
+ pthread_t *tid;
+ char *p;
+
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
+ switch (opt) {
+ case 'a':
+ cmd = PIN_FAST_BENCHMARK;
+ break;
+ case 'b':
+ cmd = PIN_BASIC_TEST;
+ break;
+ case 'L':
+ cmd = PIN_LONGTERM_BENCHMARK;
+ break;
+ case 'c':
+ cmd = DUMP_USER_PAGES_TEST;
+ /*
+ * Dump page 0 (index 1). May be overridden later, by
+ * user's non-option arguments.
+ *
+ * .which_pages is zero-based, so that zero can mean "do
+ * nothing".
+ */
+ gup.which_pages[0] = 1;
+ break;
+ case 'p':
+ /* works only with DUMP_USER_PAGES_TEST */
+ gup.test_flags |= GUP_TEST_FLAG_DUMP_PAGES_USE_PIN;
+ break;
+ case 'F':
+ /* strtol, so you can pass flags in hex form */
+ gup.gup_flags = strtol(optarg, 0, 0);
+ break;
+ case 'j':
+ nthreads = atoi(optarg);
+ break;
+ case 'm':
+ size = atoi(optarg) * MB;
+ break;
+ case 'r':
+ repeats = atoi(optarg);
+ break;
+ case 'n':
+ nr_pages = atoi(optarg);
+ if (nr_pages < 0)
+ nr_pages = size / psize();
+ break;
+ case 't':
+ thp = 1;
+ break;
+ case 'T':
+ thp = 0;
+ break;
+ case 'U':
+ cmd = GUP_BASIC_TEST;
+ break;
+ case 'u':
+ cmd = GUP_FAST_BENCHMARK;
+ break;
+ case 'w':
+ write = 1;
+ break;
+ case 'W':
+ write = 0;
+ break;
+ case 'f':
+ file = optarg;
+ break;
+ case 'S':
+ flags &= ~MAP_PRIVATE;
+ flags |= MAP_SHARED;
+ break;
+ case 'H':
+ flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
+ break;
+ default:
+ ksft_exit_fail_msg("Wrong argument\n");
+ }
+ }
+
+ if (optind < argc) {
+ int extra_arg_count = 0;
+ /*
+ * For example:
+ *
+ * ./gup_test -c 0 1 0x1001
+ *
+ * ...to dump pages 0, 1, and 4097
+ */
+
+ while ((optind < argc) &&
+ (extra_arg_count < GUP_TEST_MAX_PAGES_TO_DUMP)) {
+ /*
+ * Do the 1-based indexing here, so that the user can
+ * use normal 0-based indexing on the command line.
+ */
+ long page_index = strtol(argv[optind], 0, 0) + 1;
+
+ gup.which_pages[extra_arg_count] = page_index;
+ extra_arg_count++;
+ optind++;
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(nthreads);
+
+ filed = open(file, O_RDWR|O_CREAT, 0664);
+ if (filed < 0)
+ ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
+
+ gup.nr_pages_per_call = nr_pages;
+ if (write)
+ gup.gup_flags |= FOLL_WRITE;
+
+ gup_fd = open(GUP_TEST_FILE, O_RDWR);
+ if (gup_fd == -1) {
+ switch (errno) {
+ case EACCES:
+ if (getuid())
+ ksft_print_msg("Please run this test as root\n");
+ break;
+ case ENOENT:
+ if (opendir("/sys/kernel/debug") == NULL)
+ ksft_print_msg("mount debugfs at /sys/kernel/debug\n");
+ ksft_print_msg("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+ break;
+ default:
+ ksft_print_msg("failed to open %s: %s\n", GUP_TEST_FILE, strerror(errno));
+ break;
+ }
+ ksft_test_result_skip("Please run this test as root\n");
+ ksft_exit_pass();
+ }
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
+ if (p == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
+ gup.addr = (unsigned long)p;
+
+ if (thp == 1)
+ madvise(p, size, MADV_HUGEPAGE);
+ else if (thp == 0)
+ madvise(p, size, MADV_NOHUGEPAGE);
+
+ /* Fault them in here, from user space. */
+ for (; (unsigned long)p < gup.addr + size; p += psize())
+ p[0] = 0;
+
+ tid = malloc(sizeof(pthread_t) * nthreads);
+ assert(tid);
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+ assert(ret == 0);
+ }
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(tid[i], NULL);
+ assert(ret == 0);
+ }
+
+ free(tid);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
new file mode 100644
index 000000000000..e8328c89d855
--- /dev/null
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -0,0 +1,2855 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
+ * the linux kernel to help device drivers mirror a process address space in
+ * the device. This allows the device to use the same address space which
+ * makes communication and data exchange a lot easier.
+ *
+ * This framework's sole purpose is to exercise various code paths inside
+ * the kernel to make sure that HMM performs as expected and to flush out any
+ * bugs.
+ */
+
+#include "kselftest_harness.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <strings.h>
+#include <time.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+
+/*
+ * This is a private UAPI to the kernel test module so it isn't exported
+ * in the usual include/uapi/... directory.
+ */
+#include <lib/test_hmm_uapi.h>
+#include <mm/gup_test.h>
+
+struct hmm_buffer {
+ void *ptr;
+ void *mirror;
+ unsigned long size;
+ int fd;
+ uint64_t cpages;
+ uint64_t faults;
+};
+
+enum {
+ HMM_PRIVATE_DEVICE_ONE,
+ HMM_PRIVATE_DEVICE_TWO,
+ HMM_COHERENCE_DEVICE_ONE,
+ HMM_COHERENCE_DEVICE_TWO,
+};
+
+#define ONEKB (1 << 10)
+#define ONEMEG (1 << 20)
+#define TWOMEG (1 << 21)
+#define HMM_BUFFER_SIZE (1024 << 12)
+#define HMM_PATH_MAX 64
+#define NTIMES 10
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#endif
+
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
+#endif
+FIXTURE(hmm)
+{
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+FIXTURE_VARIANT(hmm)
+{
+ int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+ .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+ .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
+FIXTURE(hmm2)
+{
+ int fd0;
+ int fd1;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+FIXTURE_VARIANT(hmm2)
+{
+ int device_number0;
+ int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+ .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+ .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+ .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+ .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
+static int hmm_open(int unit)
+{
+ char pathname[HMM_PATH_MAX];
+ int fd;
+
+ snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
+ fd = open(pathname, O_RDWR, 0);
+ if (fd < 0)
+ fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
+ pathname);
+ return fd;
+}
+
+static bool hmm_is_coherent_type(int dev_num)
+{
+ return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
+FIXTURE_SETUP(hmm)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd = hmm_open(variant->device_number);
+ if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+ SKIP(return, "DEVICE_COHERENT not available");
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_SETUP(hmm2)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd0 = hmm_open(variant->device_number0);
+ if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+ SKIP(return, "DEVICE_COHERENT not available");
+ ASSERT_GE(self->fd0, 0);
+ self->fd1 = hmm_open(variant->device_number1);
+ ASSERT_GE(self->fd1, 0);
+}
+
+FIXTURE_TEARDOWN(hmm)
+{
+ int ret = close(self->fd);
+
+ ASSERT_EQ(ret, 0);
+ self->fd = -1;
+}
+
+FIXTURE_TEARDOWN(hmm2)
+{
+ int ret = close(self->fd0);
+
+ ASSERT_EQ(ret, 0);
+ self->fd0 = -1;
+
+ ret = close(self->fd1);
+ ASSERT_EQ(ret, 0);
+ self->fd1 = -1;
+}
+
+static int hmm_dmirror_cmd(int fd,
+ unsigned long request,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ struct hmm_dmirror_cmd cmd;
+ int ret;
+
+ /* Simulate a device reading system memory. */
+ cmd.addr = (__u64)buffer->ptr;
+ cmd.ptr = (__u64)buffer->mirror;
+ cmd.npages = npages;
+
+ for (;;) {
+ ret = ioctl(fd, request, &cmd);
+ if (ret == 0)
+ break;
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+ buffer->cpages = cmd.cpages;
+ buffer->faults = cmd.faults;
+
+ return 0;
+}
+
+static void hmm_buffer_free(struct hmm_buffer *buffer)
+{
+ if (buffer == NULL)
+ return;
+
+ if (buffer->ptr) {
+ munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ }
+ free(buffer->mirror);
+ free(buffer);
+}
+
+/*
+ * Create a temporary file that will be deleted on close.
+ */
+static int hmm_create_file(unsigned long size)
+{
+ char path[HMM_PATH_MAX];
+ int fd;
+
+ strcpy(path, "/tmp");
+ fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
+ if (fd >= 0) {
+ int r;
+
+ do {
+ r = ftruncate(fd, size);
+ } while (r == -1 && errno == EINTR);
+ if (!r)
+ return fd;
+ close(fd);
+ }
+ return -1;
+}
+
+/*
+ * Return a random unsigned number.
+ */
+static unsigned int hmm_random(void)
+{
+ static int fd = -1;
+ unsigned int r;
+
+ if (fd < 0) {
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
+ __FILE__, __LINE__);
+ return ~0U;
+ }
+ }
+ read(fd, &r, sizeof(r));
+ return r;
+}
+
+static void hmm_nanosleep(unsigned int n)
+{
+ struct timespec t;
+
+ t.tv_sec = 0;
+ t.tv_nsec = n;
+ nanosleep(&t, NULL);
+}
+
+static int hmm_migrate_sys_to_dev(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
+/*
+ * Simple NULL test of device open/close.
+ */
+TEST_F(hmm, open_close)
+{
+}
+
+/*
+ * Read private anonymous memory.
+ */
+TEST_F(hmm, anon_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int val;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /*
+ * Initialize buffer in system memory but leave the first two pages
+ * zero (pte_none and pfn_zero).
+ */
+ i = 2 * self->page_size / sizeof(*ptr);
+ for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Set buffer permission to read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Populate the CPU page table with a special zero page. */
+ val = *(int *)(buffer->ptr + self->page_size);
+ ASSERT_EQ(val, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ ptr = buffer->mirror;
+ for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+ for (; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read private anonymous memory which has been protected with
+ * mprotect() PROT_NONE.
+ */
+TEST_F(hmm, anon_read_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize mirror buffer so we can verify it isn't written. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ /* Protect buffer from reading. */
+ ret = mprotect(buffer->ptr, size, PROT_NONE);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, -EFAULT);
+
+ /* Allow CPU to read the buffer so we can check it. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory.
+ */
+TEST_F(hmm, anon_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory which has been protected with
+ * mprotect() PROT_READ.
+ */
+TEST_F(hmm, anon_write_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading a zero page of memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Now allow writing and see that the zero page is replaced. */
+ ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check that a device writing an anonymous private mapping
+ * will copy-on-write if a child process inherits the mapping.
+ *
+ * Also verifies after fork() memory the device can be read by child.
+ */
+TEST_F(hmm, anon_write_child)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret, use_thp, migrate;
+
+ for (migrate = 0; migrate < 2; ++migrate) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ npages = ALIGN(use_thp ? TWOMEG : HMM_BUFFER_SIZE,
+ self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size * 2;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ old_ptr = buffer->ptr;
+ if (use_thp) {
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+ }
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ if (migrate) {
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ }
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ continue;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ if (!migrate) {
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ if (!migrate) {
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
+
+ close(child_fd);
+ exit(0);
+ }
+ }
+}
+
+/*
+ * Check that a device writing an anonymous shared mapping
+ * will not copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ return;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ close(child_fd);
+ exit(0);
+}
+
+/*
+ * Write private anonymous huge page.
+ */
+TEST_F(hmm, anon_write_huge)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = 2 * TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ size = TWOMEG;
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read numeric data from raw and tagged kernel status files. Used to read
+ * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag).
+ */
+static long file_read_ulong(char *file, const char *tag)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q;
+ long val;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
+/*
+ * Write huge TLBFS page.
+ */
+TEST_F(hmm, anon_write_hugetlbfs)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long default_hsize;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
+ free(buffer);
+ SKIP(return, "Huge page could not be allocated");
+ }
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read mmap'ed file memory.
+ */
+TEST_F(hmm, file_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Write initial contents of the file. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+ len = pwrite(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ memset(buffer->mirror, 0, size);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write mmap'ed file memory.
+ */
+TEST_F(hmm, file_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check that the device also wrote the file. */
+ len = pread(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory.
+ */
+TEST_F(hmm, migrate)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault some of it back
+ * to system memory, then try migrating the resulting mix of system and device
+ * private memory to the device.
+ */
+TEST_F(hmm, migrate_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault half the pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate memory to the device again. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, migrate_release)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Release device memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, -ENOENT);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Try to migrate various memory types to device private memory.
+ */
+TEST_F(hmm2, migrate_mixed)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ int ret;
+ int val;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Migrating a protected area should be an error. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* We expect an error if the vma doesn't cover the range. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Page 2 will be a read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-5 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+ ptr = (int *)(buffer->ptr + 5 * self->page_size);
+ *ptr = val;
+
+ /* Now try to migrate pages 2-5 to device 1. */
+ buffer->ptr = p + 2 * self->page_size;
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 4);
+
+ /* Page 5 won't be migrated to device 0 because it's on device 1. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
+ ASSERT_EQ(ret, -ENOENT);
+ buffer->ptr = p;
+
+ buffer->ptr = p;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device memory and back to system memory
+ * multiple times. In case of private zone configuration, this is done
+ * through fault pages accessed by CPU. In case of coherent zone configuration,
+ * the pages from the device should be explicitly migrated back to system memory.
+ * The reason is Coherent device zone has coherent access by CPU, therefore
+ * it will not generate any page fault.
+ */
+TEST_F(hmm, migrate_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate back to system memory and check them. */
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ }
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Read anonymous memory multiple times.
+ */
+TEST_F(hmm, anon_read_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i + c);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+void *unmap_buffer(void *p)
+{
+ struct hmm_buffer *buffer = p;
+
+ /* Delay for a bit and then unmap buffer while it is being read. */
+ hmm_nanosleep(hmm_random() % 32000);
+ munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
+ buffer->ptr = NULL;
+
+ return NULL;
+}
+
+/*
+ * Try reading anonymous memory while it is being unmapped.
+ */
+TEST_F(hmm, anon_teardown)
+{
+ unsigned long npages;
+ unsigned long size;
+ unsigned long c;
+ void *ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; ++c) {
+ pthread_t thread;
+ struct hmm_buffer *buffer;
+ unsigned long i;
+ int *ptr;
+ int rc;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
+ ASSERT_EQ(rc, 0);
+
+ /* Simulate a device reading system memory. */
+ rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ if (rc == 0) {
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr);
+ ++i)
+ ASSERT_EQ(ptr[i], i + c);
+ }
+
+ pthread_join(thread, &ret);
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm2, snapshot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ unsigned char *m;
+ int ret;
+ int val;
+
+ npages = 7;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 2 will be read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-6 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+
+ /* Page 5 will be migrated to device 0. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Page 6 will be migrated to device 1. */
+ buffer->ptr = p + 6 * self->page_size;
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ buffer->ptr = p;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
+ if (!hmm_is_coherent_type(variant->device_number0)) {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ } else {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
+ HMM_DMIRROR_PROT_WRITE);
+ }
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
+ * should be mapped by a large page table entry.
+ */
+TEST_F(hmm, compound)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long default_hsize;
+ int *ptr;
+ unsigned char *m;
+ int ret;
+ unsigned long i;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
+ free(buffer);
+ return;
+ }
+
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize the pages the device will snapshot in buffer->ptr. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
+ HMM_DMIRROR_PROT_PMD);
+
+ /* Make the region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
+ HMM_DMIRROR_PROT_PMD);
+
+ munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test two devices reading the same memory (double mapped).
+ */
+TEST_F(hmm2, double_map)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Make region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate device 0 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Simulate device 1 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate pages to device 1 and try to read from device 0. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what device 0 read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Basic check of exclusive faulting.
+ */
+TEST_F(hmm, exclusive)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ /* Check atomic access revoked */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, exclusive_mprotect)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check copy-on-write works.
+ */
+TEST_F(hmm, exclusive_cow)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ fork();
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ hmm_buffer_free(buffer);
+}
+
+static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
+ int npages, int size, int flags)
+{
+ struct gup_test gup = {
+ .nr_pages_per_call = npages,
+ .addr = addr,
+ .gup_flags = FOLL_WRITE | flags,
+ .size = size,
+ };
+
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl on error\n");
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+ struct hmm_buffer *buffer;
+ int gup_fd;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1)
+ SKIP(return, "Skipping test, could not find gup_test driver");
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr,
+ GUP_BASIC_TEST, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 1 * self->page_size,
+ GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 2 * self->page_size,
+ PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 3 * self->page_size,
+ PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
+ } else {
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+ }
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
+ /*
+ * Check again the content on the pages. Make sure there's no
+ * corrupted data.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ close(gup_fd);
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+ pid_t pid;
+ int status;
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (!pid) {
+ /* Child process waits for SIGTERM from the parent. */
+ while (1) {
+ }
+ /* Should not reach this */
+ }
+ /* Parent process writes to COW pages(s) and gets a
+ * new copy in system. In case of device private pages,
+ * this write causes a migration to system mem first.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Terminate child and wait */
+ EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_NE(0, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ for (i = 0; i < npages; i++)
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge empty page.
+ */
+TEST_F(hmm, migrate_anon_huge_empty)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page.
+ */
+TEST_F(hmm, migrate_anon_huge_zero)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+ int val;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize a read-only zero huge page. */
+ val = *(int *)buffer->ptr;
+ ASSERT_EQ(val, 0);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], 0);
+ /* If it asserts once, it probably will 500,000 times */
+ if (ptr[i] != 0)
+ break;
+ }
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and free.
+ */
+TEST_F(hmm, migrate_anon_huge_free)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Try freeing it. */
+ ret = madvise(map, size, MADV_FREE);
+ ASSERT_EQ(ret, 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and fault back to sysmem.
+ */
+TEST_F(hmm, migrate_anon_huge_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate memory and fault back to sysmem after partially unmapping.
+ */
+TEST_F(hmm, migrate_partial_unmap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(buffer->ptr + offsets[j], ONEMEG);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ if (i * sizeof(int) < offsets[j] ||
+ i * sizeof(int) >= offsets[j] + ONEMEG)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+}
+
+TEST_F(hmm, migrate_remap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr, *new_ptr = NULL;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp, dont_unmap, before;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (before = 0; before < 2; ++before) {
+ for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
+
+ if (dont_unmap)
+ flags |= MREMAP_DONTUNMAP;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 8 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, buffer->size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ munmap(map + size, size * 2);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ if (before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ if (!before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(new_ptr, size);
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Migrate private anonymous huge page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], i);
+ if (ptr[i] != i)
+ break;
+ }
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate THP to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /*
+ * Force an allocation error when faulting back a THP resident in the
+ * device.
+ */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_zero_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 0);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory (zero THP page). */
+ ret = ptr[0];
+ ASSERT_EQ(ret, 0);
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Fault the device memory back and check it. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+struct benchmark_results {
+ double sys_to_dev_time;
+ double dev_to_sys_time;
+ double throughput_s2d;
+ double throughput_d2s;
+};
+
+static double get_time_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
+{
+ struct hmm_buffer *buffer;
+
+ buffer = malloc(sizeof(*buffer));
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ memset(buffer->mirror, 0xFF, size);
+ return buffer;
+}
+
+static void print_benchmark_results(const char *test_name, size_t buffer_size,
+ struct benchmark_results *thp,
+ struct benchmark_results *regular)
+{
+ double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
+ regular->sys_to_dev_time) * 100.0;
+ double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
+ regular->dev_to_sys_time) * 100.0;
+ double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
+ regular->throughput_s2d) * 100.0;
+ double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
+ regular->throughput_d2s) * 100.0;
+
+ printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
+ printf(" | With THP | Without THP | Improvement\n");
+ printf("---------------------------------------------------------------------\n");
+ printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
+ printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
+ printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
+ printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
+}
+
+/*
+ * Run a single migration benchmark
+ * fd: file descriptor for hmm device
+ * use_thp: whether to use THP
+ * buffer_size: size of buffer to allocate
+ * iterations: number of iterations
+ * results: where to store results
+ */
+static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
+ int iterations, struct benchmark_results *results)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
+ double start, end;
+ double s2d_total = 0, d2s_total = 0;
+ int ret, i;
+ int *ptr;
+
+ buffer = hmm_buffer_alloc(buffer_size);
+
+ /* Map memory */
+ buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (!buffer->ptr)
+ return -1;
+
+ /* Apply THP hint if requested */
+ if (use_thp)
+ ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
+ else
+ ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
+
+ if (ret)
+ return ret;
+
+ /* Initialize memory to make sure pages are allocated */
+ ptr = (int *)buffer->ptr;
+ for (i = 0; i < buffer_size / sizeof(int); i++)
+ ptr[i] = i & 0xFF;
+
+ /* Warmup iteration */
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ /* Benchmark iterations */
+ for (i = 0; i < iterations; i++) {
+ /* System to device migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ s2d_total += (end - start);
+
+ /* Device to system migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ d2s_total += (end - start);
+ }
+
+ /* Calculate average times and throughput */
+ results->sys_to_dev_time = s2d_total / iterations;
+ results->dev_to_sys_time = d2s_total / iterations;
+ results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->sys_to_dev_time / 1000.0);
+ results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->dev_to_sys_time / 1000.0);
+
+ /* Cleanup */
+ hmm_buffer_free(buffer);
+ return 0;
+}
+
+/*
+ * Benchmark THP migration with different buffer sizes
+ */
+TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
+{
+ struct benchmark_results thp_results, regular_results;
+ size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
+ int iterations = 5;
+
+ printf("\nHMM THP Migration Benchmark\n");
+ printf("---------------------------\n");
+ printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
+
+ /* Test different buffer sizes */
+ size_t test_sizes[] = {
+ thp_size / 4, /* 512KB - smaller than THP */
+ thp_size / 2, /* 1MB - half THP */
+ thp_size, /* 2MB - single THP */
+ thp_size * 2, /* 4MB - two THPs */
+ thp_size * 4, /* 8MB - four THPs */
+ thp_size * 8, /* 16MB - eight THPs */
+ thp_size * 128, /* 256MB - one twenty eight THPs */
+ };
+
+ static const char *const test_names[] = {
+ "Small Buffer (512KB)",
+ "Half THP Size (1MB)",
+ "Single THP Size (2MB)",
+ "Two THP Size (4MB)",
+ "Four THP Size (8MB)",
+ "Eight THP Size (16MB)",
+ "One twenty eight THP Size (256MB)"
+ };
+
+ int num_tests = ARRAY_SIZE(test_sizes);
+
+ /* Run all tests */
+ for (int i = 0; i < num_tests; i++) {
+ /* Test with THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
+ iterations, &thp_results), 0);
+
+ /* Test without THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
+ iterations, &regular_results), 0);
+
+ /* Print results */
+ print_benchmark_results(test_names[i], test_sizes[i],
+ &thp_results, &regular_results);
+ }
+}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
index 93f9e7b81331..d543419de040 100644
--- a/tools/testing/selftests/vm/hugepage-mmap.c
+++ b/tools/testing/selftests/mm/hugepage-mmap.c
@@ -8,37 +8,21 @@
* like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
* example, the app is requesting memory of size 256MB that is backed by
* huge pages.
- *
- * For the ia64 architecture, the Linux kernel reserves Region number 4 for
- * huge pages. That means that if one requires a fixed address, a huge page
- * aligned address starting with 0x800000... will be required. If a fixed
- * address is not required, the kernel will select an address in the proper
- * range.
- * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
*/
-
+#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "kselftest.h"
-#define FILE_NAME "huge/hugepagefile"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
-/* Only ia64 requires this */
-#ifdef __ia64__
-#define ADDR (void *)(0x8000000000000000UL)
-#define FLAGS (MAP_SHARED | MAP_FIXED)
-#else
-#define ADDR (void *)(0x0UL)
-#define FLAGS (MAP_SHARED)
-#endif
-
static void check_bytes(char *addr)
{
- printf("First hex is %x\n", *((unsigned int *)addr));
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
}
static void write_bytes(char *addr)
@@ -56,7 +40,7 @@ static int read_bytes(char *addr)
check_bytes(addr);
for (i = 0; i < LENGTH; i++)
if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
+ ksft_print_msg("Error: Mismatch at %lu\n", i);
return 1;
}
return 0;
@@ -67,27 +51,28 @@ int main(void)
void *addr;
int fd, ret;
- fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
- if (fd < 0) {
- perror("Open failed");
- exit(1);
- }
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ fd = memfd_create("hugepage-mmap", MFD_HUGETLB);
+ if (fd < 0)
+ ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno));
- addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
+ addr = mmap(NULL, LENGTH, PROTECTION, MAP_SHARED, fd, 0);
if (addr == MAP_FAILED) {
- perror("mmap");
- unlink(FILE_NAME);
- exit(1);
+ close(fd);
+ ksft_exit_fail_msg("mmap(): %s\n", strerror(errno));
}
- printf("Returned address is %p\n", addr);
+ ksft_print_msg("Returned address is %p\n", addr);
check_bytes(addr);
write_bytes(addr);
ret = read_bytes(addr);
munmap(addr, LENGTH);
close(fd);
- unlink(FILE_NAME);
- return ret;
+ ksft_test_result(!ret, "Read same data\n");
+
+ ksft_exit(!ret);
}
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
new file mode 100644
index 000000000000..b8f7d92e5a35
--- /dev/null
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-mremap:
+ *
+ * Example of remapping huge page memory in a user application using the
+ * mremap system call. The path to a file in a hugetlbfs filesystem must
+ * be passed as the last argument to this test. The amount of memory used
+ * by this test in MBs can optionally be passed as an argument. If no memory
+ * amount is passed, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <fcntl.h> /* Definition of O_* constants */
+#include <sys/syscall.h> /* Definition of SYS_* constants */
+#include <linux/userfaultfd.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <stdbool.h>
+#include "kselftest.h"
+#include "vm_util.h"
+
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
+
+#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
+#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
+
+static void check_bytes(char *addr)
+{
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr, size_t len)
+{
+ unsigned long i;
+
+ for (i = 0; i < len; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr, size_t len)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < len; i++)
+ if (*(addr + i) != (char)i) {
+ ksft_print_msg("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+static void register_region_with_uffd(char *addr, size_t len)
+{
+ long uffd; /* userfaultfd file descriptor */
+ struct uffdio_api uffdio_api;
+
+ /* Create and enable userfaultfd object. */
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ ksft_exit_skip("Insufficient permissions, try running as root.\n");
+ break;
+ case ENOSYS:
+ ksft_exit_skip("userfaultfd is not supported/not enabled.\n");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno));
+ break;
+ }
+ }
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
+ ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno));
+
+ /* Create a private anonymous mapping. The memory will be
+ * demand-zero paged--that is, not yet allocated. When we
+ * actually touch the memory, it will be allocated via
+ * the userfaultfd.
+ */
+
+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
+
+ ksft_print_msg("Address returned by mmap() = %p\n", addr);
+
+ /* Register the memory range of the mapping we just created for
+ * handling by the userfaultfd object. In mode, we request to track
+ * missing pages (i.e., pages that have not yet been faulted in).
+ */
+ if (uffd_register(uffd, addr, len, true, false, false))
+ ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno));
+}
+
+int main(int argc, char *argv[])
+{
+ size_t length = 0;
+ int ret = 0, fd;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (argc >= 2 && !strcmp(argv[1], "-h"))
+ ksft_exit_fail_msg("Usage: %s [length_in_MB]\n", argv[0]);
+
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length.
+ */
+ if (argc >= 2)
+ length = (size_t)atoi(argv[1]);
+ else
+ length = DEFAULT_LENGTH_MB;
+
+ length = MB_TO_BYTES(length);
+ fd = memfd_create(argv[0], MFD_HUGETLB);
+ if (fd < 0)
+ ksft_exit_fail_msg("Open failed: %s\n", strerror(errno));
+
+ /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
+ unsigned long suggested_addr = 0x7eaa40000000;
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
+ MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ ksft_print_msg("Map haddr: Returned address is %p\n", haddr);
+ if (haddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap1: %s\n", strerror(errno));
+
+ /* mmap again to a dummy address to hopefully trigger pmd sharing. */
+ suggested_addr = 0x7daa40000000;
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
+ MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ ksft_print_msg("Map daddr: Returned address is %p\n", daddr);
+ if (daddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap3: %s\n", strerror(errno));
+
+ suggested_addr = 0x7faa40000000;
+ void *vaddr =
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
+ ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr);
+ if (vaddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap2: %s\n", strerror(errno));
+
+ register_region_with_uffd(haddr, length);
+
+ void *addr = mremap(haddr, length, length,
+ MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mremap: %s\n", strerror(errno));
+
+ ksft_print_msg("Mremap: Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
+
+ munmap(addr, length);
+
+ addr = mremap(addr, length, length, 0);
+ if (addr != MAP_FAILED)
+ ksft_exit_fail_msg("mremap: Expected failure, but call succeeded\n");
+
+ close(fd);
+
+ ksft_test_result(!ret, "Read same data\n");
+ ksft_exit(!ret);
+}
diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c
index e2527f32005b..ef06260802b5 100644
--- a/tools/testing/selftests/vm/hugepage-shm.c
+++ b/tools/testing/selftests/mm/hugepage-shm.c
@@ -8,13 +8,6 @@
* SHM_HUGETLB in the shmget system call to inform the kernel that it is
* requesting huge pages.
*
- * For the ia64 architecture, the Linux kernel reserves Region number 4 for
- * huge pages. That means that if one requires a fixed address, a huge page
- * aligned address starting with 0x800000... will be required. If a fixed
- * address is not required, the kernel will select an address in the proper
- * range.
- * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
- *
* Note: The default shared memory limit is quite low on many kernels,
* you may need to increase it via:
*
@@ -35,23 +28,10 @@
#include <sys/shm.h>
#include <sys/mman.h>
-#ifndef SHM_HUGETLB
-#define SHM_HUGETLB 04000
-#endif
-
#define LENGTH (256UL*1024*1024)
#define dprintf(x) printf(x)
-/* Only ia64 requires this */
-#ifdef __ia64__
-#define ADDR (void *)(0x8000000000000000UL)
-#define SHMAT_FLAGS (SHM_RND)
-#else
-#define ADDR (void *)(0x0UL)
-#define SHMAT_FLAGS (0)
-#endif
-
int main(void)
{
int shmid;
@@ -65,7 +45,7 @@ int main(void)
}
printf("shmid: 0x%x\n", shmid);
- shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
+ shmaddr = shmat(shmid, NULL, 0);
if (shmaddr == (char *)-1) {
perror("Shared memory attach failure");
shmctl(shmid, IPC_RMID, NULL);
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c
new file mode 100644
index 000000000000..df366a4d1b92
--- /dev/null
+++ b/tools/testing/selftests/mm/hugepage-vmemmap.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag. Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "vm_util.h"
+
+#define PAGE_COMPOUND_HEAD (1UL << 15)
+#define PAGE_COMPOUND_TAIL (1UL << 16)
+#define PAGE_HUGE (1UL << 17)
+
+#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1)
+
+static size_t pagesize;
+static size_t maplength;
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+ int fd;
+ unsigned long pagemap;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ return -1UL;
+
+ lseek(fd, (unsigned long)addr / pagesize * sizeof(pagemap), SEEK_SET);
+ read(fd, &pagemap, sizeof(pagemap));
+ close(fd);
+
+ return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+ int fd, i;
+ unsigned long pageflags;
+
+ fd = open("/proc/kpageflags", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Head page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+
+ /*
+ * pages other than the first page must be tail and shouldn't be head;
+ * this also verifies kernel has correctly set the fake page_head to tail
+ * while hugetlb_free_vmemmap is enabled.
+ */
+ for (i = 1; i < maplength / pagesize; i++) {
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+ (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Tail page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ void *addr;
+ unsigned long pfn;
+
+ pagesize = psize();
+ maplength = default_huge_page_size();
+ if (!maplength) {
+ printf("Unable to determine huge page size\n");
+ exit(1);
+ }
+
+ addr = mmap(NULL, maplength, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* Trigger allocation of HugeTLB page. */
+ write_bytes(addr, maplength);
+
+ pfn = virt_to_pfn(addr);
+ if (pfn == -1UL) {
+ munmap(addr, maplength);
+ perror("virt_to_pfn");
+ exit(1);
+ }
+
+ printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+ if (check_page_flags(pfn) < 0) {
+ munmap(addr, maplength);
+ perror("check_page_flags");
+ exit(1);
+ }
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, maplength)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
new file mode 100644
index 000000000000..05d9d2805ae4
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-madvise:
+ *
+ * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
+ * on hugetlb mappings.
+ *
+ * Before running this test, make sure the administrator has pre-allocated
+ * at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
+ * the test takes an argument that is the path to a file in a hugetlbfs
+ * filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
+ * directory.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+#define MIN_FREE_PAGES 20
+#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
+
+#define validate_free_pages(exp_free) \
+ do { \
+ int fhp = get_free_hugepages(); \
+ if (fhp != (exp_free)) { \
+ printf("Unexpected number of free huge " \
+ "pages line %d\n", __LINE__); \
+ exit(1); \
+ } \
+ } while (0)
+
+unsigned long huge_page_size;
+unsigned long base_page_size;
+
+void write_fault_pages(void *addr, unsigned long nr_pages)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(addr + (i * huge_page_size))) = i;
+}
+
+void read_fault_pages(void *addr, unsigned long nr_pages)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++) {
+ unsigned long *addr2 =
+ ((unsigned long *)(addr + (i * huge_page_size)));
+ /* Prevent the compiler from optimizing out the entire loop: */
+ FORCE_READ(*addr2);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long free_hugepages;
+ void *addr, *addr2;
+ int fd;
+ int ret;
+
+ huge_page_size = default_huge_page_size();
+ if (!huge_page_size) {
+ printf("Unable to determine huge page size, exiting!\n");
+ exit(1);
+ }
+ base_page_size = sysconf(_SC_PAGE_SIZE);
+ if (!huge_page_size) {
+ printf("Unable to determine base page size, exiting!\n");
+ exit(1);
+ }
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages < MIN_FREE_PAGES) {
+ printf("Not enough free huge pages to test, exiting!\n");
+ exit(KSFT_SKIP);
+ }
+
+ fd = memfd_create(argv[0], MFD_HUGETLB);
+ if (fd < 0) {
+ perror("memfd_create() failed");
+ exit(1);
+ }
+
+ /*
+ * Test validity of MADV_DONTNEED addr and length arguments. mmap
+ * size is NR_HUGE_PAGES + 2. One page at the beginning and end of
+ * the mapping will be unmapped so we KNOW there is nothing mapped
+ * there.
+ */
+ addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ if (munmap(addr, huge_page_size) ||
+ munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
+ huge_page_size)) {
+ perror("munmap");
+ exit(1);
+ }
+ addr = addr + huge_page_size;
+
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr before mapping should fail */
+ ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid addr line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length after mapping should fail */
+ ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid length line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test alignment of MADV_DONTNEED addr and length arguments
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr is not huge page size aligned and should fail */
+ ret = madvise(addr + base_page_size,
+ NR_HUGE_PAGES * huge_page_size - base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with unaligned start address %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length should be aligned down to huge page size */
+ if (madvise(addr,
+ ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
+ MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all but last page in mapping */
+ validate_free_pages(free_hugepages - 1);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+ validate_free_pages(free_hugepages);
+
+ /*
+ * Test MADV_DONTNEED on anonymous private mapping
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all pages in mapping */
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* read should not consume any pages */
+ read_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise should free private pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * The fallocate below certainly should free the pages associated
+ * with the file. However, pages in the private mapping are also
+ * freed. This is not the 'correct' behavior, but is expected
+ * because this is how it has worked since the initial hugetlb
+ * implementation.
+ */
+ if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on shared mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* write should not consume any pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /*
+ * Test MADV_REMOVE on shared mapping of hugetlb file
+ *
+ * madvise is same as hole punch and should free all pages.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_REMOVE on shared and private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* shared write should not consume any additional pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr2 == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* private read should not consume any pages */
+ read_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of shared mapping should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of private mapping should free private pages */
+ if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages again */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * madvise should free both file and private pages although this is
+ * not correct. private pages should not be freed, but this is
+ * expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+ (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
new file mode 100644
index 000000000000..46230462ad48
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/magic.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "kselftest.h"
+
+#define PREFIX " ... "
+#define ERROR_PREFIX " !!! "
+
+#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16)
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+enum test_status {
+ TEST_PASSED = 0,
+ TEST_FAILED = 1,
+ TEST_SKIPPED = 2,
+};
+
+static char *status_to_str(enum test_status status)
+{
+ switch (status) {
+ case TEST_PASSED:
+ return "TEST_PASSED";
+ case TEST_FAILED:
+ return "TEST_FAILED";
+ case TEST_SKIPPED:
+ return "TEST_SKIPPED";
+ default:
+ return "TEST_???";
+ }
+}
+
+static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size)
+{
+ char iter = 0;
+
+ for (size_t offset = 0; offset < len;
+ offset += wr_chunk_size) {
+ iter++;
+ memset(filemap + offset, iter, wr_chunk_size);
+ }
+
+ return 0;
+}
+
+static bool verify_chunk(char *buf, size_t len, char val)
+{
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ if (buf[i] != val) {
+ printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n",
+ i, buf[i], val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size,
+ off_t offset, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = offset / wr_chunk_size + offset % wr_chunk_size;
+
+ printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset);
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ perror(PREFIX ERROR_PREFIX "seek failed");
+ return false;
+ }
+
+ while (offset + total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static bool read_hugepage_filemap(int fd, size_t len,
+ size_t wr_chunk_size, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = 0;
+
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ while (total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static enum test_status
+test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ if (read_hugepage_filemap(fd, len, wr_chunk_size, len))
+ status = TEST_PASSED;
+
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static enum test_status
+test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size,
+ bool skip_hwpoison_page)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+ char *hwp_addr = NULL;
+ const unsigned long pagesize = getpagesize();
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ /*
+ * Poisoned hugetlb page layout (assume hugepagesize=2MB):
+ * |<---------------------- 1MB ---------------------->|
+ * |<---- healthy page ---->|<---- HWPOISON page ----->|
+ * |<------------------- (1MB - 8KB) ----------------->|
+ */
+ hwp_addr = filemap + len / 2 + pagesize;
+ if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) {
+ perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed");
+ goto unmap;
+ }
+
+ if (!skip_hwpoison_page) {
+ /*
+ * Userspace should be able to read (1MB + 1 page) from
+ * the beginning of the HWPOISONed hugepage.
+ */
+ if (read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + pagesize))
+ status = TEST_PASSED;
+ } else {
+ /*
+ * Userspace should be able to read (1MB - 2 pages) from
+ * HWPOISONed hugepage.
+ */
+ if (seek_read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + MAX(2 * pagesize, wr_chunk_size),
+ len / 2 - MAX(2 * pagesize, wr_chunk_size)))
+ status = TEST_PASSED;
+ }
+
+unmap:
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static int create_hugetlbfs_file(struct statfs *file_stat)
+{
+ int fd;
+
+ fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
+ if (fd < 0) {
+ perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file");
+ return -1;
+ }
+
+ memset(file_stat, 0, sizeof(*file_stat));
+ if (fstatfs(fd, file_stat)) {
+ perror(PREFIX ERROR_PREFIX "fstatfs failed");
+ goto close;
+ }
+ if (file_stat->f_type != HUGETLBFS_MAGIC) {
+ printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n");
+ goto close;
+ }
+
+ return fd;
+close:
+ close(fd);
+ return -1;
+}
+
+int main(void)
+{
+ int fd;
+ struct statfs file_stat;
+ enum test_status status;
+ /* Test read() in different granularity. */
+ size_t wr_chunk_sizes[] = {
+ getpagesize() / 2, getpagesize(),
+ getpagesize() * 2, getpagesize() * 4
+ };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) {
+ printf("Write/read chunk size=0x%lx\n",
+ wr_chunk_sizes[i]);
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read regression test...\n");
+ status = test_hugetlb_read(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i]);
+ printf(PREFIX "HugeTLB read regression test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], false);
+ printf(PREFIX "HugeTLB read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], true);
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+ }
+
+ return 0;
+
+create_failure:
+ printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n");
+ return -1;
+}
diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c
new file mode 100644
index 000000000000..a8bc02688085
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test soft offline behavior for HugeTLB pages:
+ * - if enable_soft_offline = 0, hugepages should stay intact and soft
+ * offlining failed with EOPNOTSUPP.
+ * - if enable_soft_offline = 1, a hugepage should be dissolved and
+ * nr_hugepages/free_hugepages should be reduced by 1.
+ *
+ * Before running, make sure more than 2 hugepages of default_hugepagesz
+ * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB:
+ * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/magic.h>
+#include <linux/memfd.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+
+#ifndef MADV_SOFT_OFFLINE
+#define MADV_SOFT_OFFLINE 101
+#endif
+
+#define EPREFIX " !!! "
+
+static int do_soft_offline(int fd, size_t len, int expect_errno)
+{
+ char *filemap = NULL;
+ char *hwp_addr = NULL;
+ const unsigned long pagesize = getpagesize();
+ int ret = 0;
+
+ if (ftruncate(fd, len) < 0) {
+ ksft_perror(EPREFIX "ftruncate to len failed");
+ return -1;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ ksft_perror(EPREFIX "mmap failed");
+ ret = -1;
+ goto untruncate;
+ }
+
+ memset(filemap, 0xab, len);
+ ksft_print_msg("Allocated %#lx bytes of hugetlb pages\n", len);
+
+ hwp_addr = filemap + len / 2;
+ ret = madvise(hwp_addr, pagesize, MADV_SOFT_OFFLINE);
+ ksft_print_msg("MADV_SOFT_OFFLINE %p ret=%d, errno=%d\n",
+ hwp_addr, ret, errno);
+ if (ret != 0)
+ ksft_perror(EPREFIX "madvise failed");
+
+ if (errno == expect_errno)
+ ret = 0;
+ else {
+ ksft_print_msg("MADV_SOFT_OFFLINE should ret %d\n",
+ expect_errno);
+ ret = -1;
+ }
+
+ munmap(filemap, len);
+untruncate:
+ if (ftruncate(fd, 0) < 0)
+ ksft_perror(EPREFIX "ftruncate back to 0 failed");
+
+ return ret;
+}
+
+static int set_enable_soft_offline(int value)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ if (value != 0 && value != 1)
+ return -EINVAL;
+
+ sprintf(cmd, "echo %d > /proc/sys/vm/enable_soft_offline", value);
+ cmdfile = popen(cmd, "r");
+
+ if (cmdfile)
+ ksft_print_msg("enable_soft_offline => %d\n", value);
+ else {
+ ksft_perror(EPREFIX "failed to set enable_soft_offline");
+ return errno;
+ }
+
+ pclose(cmdfile);
+ return 0;
+}
+
+static int read_nr_hugepages(unsigned long hugepage_size,
+ unsigned long *nr_hugepages)
+{
+ char buffer[256] = {0};
+ char cmd[256] = {0};
+
+ sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages",
+ hugepage_size);
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (cmdfile == NULL) {
+ ksft_perror(EPREFIX "failed to popen nr_hugepages");
+ return -1;
+ }
+
+ if (!fgets(buffer, sizeof(buffer), cmdfile)) {
+ ksft_perror(EPREFIX "failed to read nr_hugepages");
+ pclose(cmdfile);
+ return -1;
+ }
+
+ *nr_hugepages = atoll(buffer);
+ pclose(cmdfile);
+ return 0;
+}
+
+static int create_hugetlbfs_file(struct statfs *file_stat)
+{
+ int fd;
+
+ fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
+ if (fd < 0) {
+ ksft_perror(EPREFIX "could not open hugetlbfs file");
+ return -1;
+ }
+
+ memset(file_stat, 0, sizeof(*file_stat));
+ if (fstatfs(fd, file_stat)) {
+ ksft_perror(EPREFIX "fstatfs failed");
+ goto close;
+ }
+ if (file_stat->f_type != HUGETLBFS_MAGIC) {
+ ksft_print_msg(EPREFIX "not hugetlbfs file\n");
+ goto close;
+ }
+
+ return fd;
+close:
+ close(fd);
+ return -1;
+}
+
+static void test_soft_offline_common(int enable_soft_offline)
+{
+ int fd;
+ int expect_errno = enable_soft_offline ? 0 : EOPNOTSUPP;
+ struct statfs file_stat;
+ unsigned long hugepagesize_kb = 0;
+ unsigned long nr_hugepages_before = 0;
+ unsigned long nr_hugepages_after = 0;
+ int ret;
+
+ ksft_print_msg("Test soft-offline when enabled_soft_offline=%d\n",
+ enable_soft_offline);
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ ksft_exit_fail_msg("Failed to create hugetlbfs file\n");
+
+ hugepagesize_kb = file_stat.f_bsize / 1024;
+ ksft_print_msg("Hugepagesize is %ldkB\n", hugepagesize_kb);
+
+ if (set_enable_soft_offline(enable_soft_offline) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to set enable_soft_offline\n");
+ }
+
+ if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to read nr_hugepages\n");
+ }
+
+ ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
+ nr_hugepages_before);
+
+ ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno);
+
+ if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to read nr_hugepages\n");
+ }
+
+ ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
+ nr_hugepages_after);
+
+ // No need for the hugetlbfs file from now on.
+ close(fd);
+
+ if (enable_soft_offline) {
+ if (nr_hugepages_before != nr_hugepages_after + 1) {
+ ksft_test_result_fail("MADV_SOFT_OFFLINE should reduced 1 hugepage\n");
+ return;
+ }
+ } else {
+ if (nr_hugepages_before != nr_hugepages_after) {
+ ksft_test_result_fail("MADV_SOFT_OFFLINE reduced %lu hugepages\n",
+ nr_hugepages_before - nr_hugepages_after);
+ return;
+ }
+ }
+
+ ksft_test_result(ret == 0,
+ "Test soft-offline when enabled_soft_offline=%d\n",
+ enable_soft_offline);
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ test_soft_offline_common(1);
+ test_soft_offline_common(0);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
new file mode 100644
index 000000000000..9ac62eb4c97d
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program tests for hugepage leaks after DIO writes to a file using a
+ * hugepage as the user buffer. During DIO, the user buffer is pinned and
+ * should be properly unpinned upon completion. This patch verifies that the
+ * kernel correctly unpins the buffer at DIO completion for both aligned and
+ * unaligned user buffer offsets (w.r.t page boundary), ensuring the hugepage
+ * is freed upon unmapping.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
+{
+ int fd;
+ char *buffer = NULL;
+ char *orig_buffer = NULL;
+ size_t h_pagesize = 0;
+ size_t writesize;
+ int free_hpage_b = 0;
+ int free_hpage_a = 0;
+ const int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+ const int mmap_prot = PROT_READ | PROT_WRITE;
+
+ writesize = end_off - start_off;
+
+ /* Get the default huge page size */
+ h_pagesize = default_huge_page_size();
+ if (!h_pagesize)
+ ksft_exit_fail_msg("Unable to determine huge page size\n");
+
+ /* Open the file to DIO */
+ fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664);
+ if (fd < 0)
+ ksft_exit_fail_perror("Error opening file\n");
+
+ /* Get the free huge pages before allocation */
+ free_hpage_b = get_free_hugepages();
+ if (free_hpage_b == 0) {
+ close(fd);
+ ksft_exit_skip("No free hugepage, exiting!\n");
+ }
+
+ /* Allocate a hugetlb page */
+ orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0);
+ if (orig_buffer == MAP_FAILED) {
+ close(fd);
+ ksft_exit_fail_perror("Error mapping memory\n");
+ }
+ buffer = orig_buffer;
+ buffer += start_off;
+
+ memset(buffer, 'A', writesize);
+
+ /* Write the buffer to the file */
+ if (write(fd, buffer, writesize) != (writesize)) {
+ munmap(orig_buffer, h_pagesize);
+ close(fd);
+ ksft_exit_fail_perror("Error writing to file\n");
+ }
+
+ /* unmap the huge page */
+ munmap(orig_buffer, h_pagesize);
+ close(fd);
+
+ /* Get the free huge pages after unmap*/
+ free_hpage_a = get_free_hugepages();
+
+ ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+ ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+
+ /*
+ * If the no. of free hugepages before allocation and after unmap does
+ * not match - that means there could still be a page which is pinned.
+ */
+ ksft_test_result(free_hpage_a == free_hpage_b,
+ "free huge pages from %u-%u\n", start_off, end_off);
+}
+
+int main(void)
+{
+ size_t pagesize = 0;
+ int fd;
+
+ ksft_print_header();
+
+ /* Open the file to DIO */
+ fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664);
+ if (fd < 0)
+ ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno));
+ close(fd);
+
+ /* Check if huge pages are free */
+ if (!get_free_hugepages())
+ ksft_exit_skip("No free hugepage, exiting\n");
+
+ ksft_set_plan(4);
+
+ /* Get base page size */
+ pagesize = psize();
+
+ /* start and end is aligned to pagesize */
+ run_dio_using_hugetlb(0, (pagesize * 3));
+
+ /* start is aligned but end is not aligned */
+ run_dio_using_hugetlb(0, (pagesize * 3) - (pagesize / 2));
+
+ /* start is unaligned and end is aligned */
+ run_dio_using_hugetlb(pagesize / 2, (pagesize * 3));
+
+ /* both start and end are unaligned */
+ run_dio_using_hugetlb(pagesize / 2, (pagesize * 3) + (pagesize / 2));
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
new file mode 100644
index 000000000000..b4b257775b74
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "vm_util.h"
+#include "kselftest.h"
+
+#define INLOOP_ITER 100
+
+static char *huge_ptr;
+static size_t huge_page_size;
+
+static sigjmp_buf sigbuf;
+static bool sigbus_triggered;
+
+static void signal_handler(int signal)
+{
+ if (signal == SIGBUS) {
+ sigbus_triggered = true;
+ siglongjmp(sigbuf, 1);
+ }
+}
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+ char *ptr = (char *)huge_ptr;
+
+ if (sigsetjmp(sigbuf, 1))
+ return NULL;
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ ptr[0] = '.';
+
+ return NULL;
+}
+
+void *madv(void *unused)
+{
+ usleep(rand() % 10);
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ madvise(huge_ptr, huge_page_size, MADV_DONTNEED);
+
+ return NULL;
+}
+
+int main(void)
+{
+ unsigned long free_hugepages;
+ pthread_t thread1, thread2;
+ /*
+ * On kernel 6.4, we are able to reproduce the problem with ~1000
+ * interactions
+ */
+ int max = 10000;
+ int err;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ srand(getpid());
+
+ if (signal(SIGBUS, signal_handler) == SIG_ERR)
+ ksft_exit_skip("Could not register signal handler.");
+
+ huge_page_size = default_huge_page_size();
+ if (!huge_page_size)
+ ksft_exit_skip("Could not detect default hugetlb page size.");
+
+ ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n",
+ huge_page_size / 1024);
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages != 1) {
+ ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+ free_hugepages);
+ }
+
+ while (max--) {
+ huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((unsigned long)huge_ptr == -1)
+ ksft_exit_skip("Failed to allocated huge page\n");
+
+ pthread_create(&thread1, NULL, madv, NULL);
+ pthread_create(&thread2, NULL, touch, NULL);
+
+ pthread_join(thread1, NULL);
+ pthread_join(thread2, NULL);
+ munmap(huge_ptr, huge_page_size);
+ }
+
+ ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n");
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
new file mode 100644
index 000000000000..efd774b41389
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case that must run on a system with one and only one huge page available.
+ * # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ *
+ * During setup, the test allocates the only available page, and starts three threads:
+ * - thread1:
+ * * madvise(MADV_DONTNEED) on the allocated huge page
+ * - thread 2:
+ * * Write to the allocated huge page
+ * - thread 3:
+ * * Try to allocated an extra huge page (which must not available)
+ *
+ * The test fails if thread3 is able to allocate a page.
+ *
+ * Touching the first page after thread3's allocation will raise a SIGBUS
+ *
+ * Author: Breno Leitao <leitao@debian.org>
+ */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "kselftest.h"
+
+#define INLOOP_ITER 100
+
+size_t mmap_size;
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+ for (int i = 0; i < INLOOP_ITER; i++)
+ huge_ptr[0] = '.';
+
+ return NULL;
+}
+
+void *madv(void *unused)
+{
+ for (int i = 0; i < INLOOP_ITER; i++)
+ madvise(huge_ptr, mmap_size, MADV_DONTNEED);
+
+ return NULL;
+}
+
+/*
+ * We got here, and there must be no huge page available for mapping
+ * The other hugepage should be flipping from used <-> reserved, because
+ * of madvise(DONTNEED).
+ */
+void *map_extra(void *unused)
+{
+ void *ptr;
+
+ for (int i = 0; i < INLOOP_ITER; i++) {
+ ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((long)ptr != -1) {
+ /* Touching the other page now will cause a SIGBUG
+ * huge_ptr[0] = '1';
+ */
+ return ptr;
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread1, thread2, thread3;
+ unsigned long free_hugepages;
+ void *ret;
+
+ /*
+ * On kernel 6.7, we are able to reproduce the problem with ~10
+ * interactions
+ */
+ int max = 10;
+
+ free_hugepages = get_free_hugepages();
+
+ if (free_hugepages != 1) {
+ ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+ free_hugepages);
+ }
+
+ mmap_size = default_huge_page_size();
+
+ while (max--) {
+ huge_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((unsigned long)huge_ptr == -1) {
+ ksft_test_result_fail("Failed to allocate huge page\n");
+ return KSFT_FAIL;
+ }
+
+ pthread_create(&thread1, NULL, madv, NULL);
+ pthread_create(&thread2, NULL, touch, NULL);
+ pthread_create(&thread3, NULL, map_extra, NULL);
+
+ pthread_join(thread1, NULL);
+ pthread_join(thread2, NULL);
+ pthread_join(thread3, &ret);
+
+ if (ret) {
+ ksft_test_result_fail("Unexpected huge page allocation\n");
+ return KSFT_FAIL;
+ }
+
+ /* Unmap and restart */
+ munmap(huge_ptr, mmap_size);
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index d11d1febccc3..0dd31892ff67 100644..100755
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -1,13 +1,17 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
usage_file=usage_in_bytes
if [[ "$1" == "-cgroup-v2" ]]; then
@@ -15,19 +19,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=$(mktemp -d)
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -47,10 +56,45 @@ function cleanup() {
rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
rmdir "$CGROUP_ROOT"/a 2>/dev/null
rmdir "$CGROUP_ROOT"/test1 2>/dev/null
- echo 0 >/proc/sys/vm/nr_hugepages
+ echo $nr_hugepgs >/proc/sys/vm/nr_hugepages
set -e
}
+function assert_with_retry() {
+ local actual_path="$1"
+ local expected="$2"
+ local tolerance=$((7 * 1024 * 1024))
+ local timeout=20
+ local interval=1
+ local start_time
+ local now
+ local elapsed
+ local actual
+
+ start_time=$(date +%s)
+
+ while true; do
+ actual="$(cat "$actual_path")"
+
+ if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+ [[ $actual -le $(($expected + $tolerance)) ]]; then
+ return 0
+ fi
+
+ now=$(date +%s)
+ elapsed=$((now - start_time))
+
+ if [[ $elapsed -ge $timeout ]]; then
+ echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+ echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ cleanup
+ exit 1
+ fi
+
+ sleep $interval
+ done
+}
+
function assert_state() {
local expected_a="$1"
local expected_a_hugetlb="$2"
@@ -61,58 +105,13 @@ function assert_state() {
expected_b="$3"
expected_b_hugetlb="$4"
fi
- local tolerance=$((5 * 1024 * 1024))
-
- local actual_a
- actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
- if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
- [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
- echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
- echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_a_hugetlb
- actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
- [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
- echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
- echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
- if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
- return
- fi
-
- local actual_b
- actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
- if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
- [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
- echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
- echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_b_hugetlb
- actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
- [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
- echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
- echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
+ assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+ assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
- cleanup
- exit 1
+ if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+ assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -166,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
cleanup
echo
-echo
echo Test charge, rmdir, uncharge
setup
echo mkdir
@@ -186,7 +184,6 @@ cleanup
echo done
echo
-echo
if [[ ! $cgroup2 ]]; then
echo "Test parent and child hugetlb usage"
setup
@@ -203,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
- sleep 5
echo Assert memory reparent correctly.
assert_state 0 $(($size * 2))
@@ -216,7 +212,6 @@ if [[ ! $cgroup2 ]]; then
fi
echo
-echo
echo "Test child only hugetlb usage"
echo setup
setup
@@ -240,5 +235,9 @@ cleanup
echo ALL PASS
-umount $CGROUP_ROOT
-rm -rf $CGROUP_ROOT
+if [[ $do_umount ]]; then
+ umount $CGROUP_ROOT
+ rm -rf $CGROUP_ROOT
+fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
new file mode 100644
index 000000000000..3fe7ef04ac62
--- /dev/null
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -0,0 +1,1290 @@
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <dirent.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include "linux/magic.h"
+
+#include "vm_util.h"
+#include "thp_settings.h"
+
+#define BASE_ADDR ((void *)(1UL << 30))
+static unsigned long hpage_pmd_size;
+static unsigned long page_size;
+static int hpage_pmd_nr;
+static int anon_order;
+
+#define PID_SMAPS "/proc/self/smaps"
+#define TEST_FILE "collapse_test_file"
+
+#define MAX_LINE_LENGTH 500
+
+enum vma_type {
+ VMA_ANON,
+ VMA_FILE,
+ VMA_SHMEM,
+};
+
+struct mem_ops {
+ void *(*setup_area)(int nr_hpages);
+ void (*cleanup_area)(void *p, unsigned long size);
+ void (*fault)(void *p, unsigned long start, unsigned long end);
+ bool (*check_huge)(void *addr, int nr_hpages);
+ const char *name;
+};
+
+static struct mem_ops *file_ops;
+static struct mem_ops *anon_ops;
+static struct mem_ops *shmem_ops;
+
+struct collapse_context {
+ void (*collapse)(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect);
+ bool enforce_pte_scan_limits;
+ const char *name;
+};
+
+static struct collapse_context *khugepaged_context;
+static struct collapse_context *madvise_context;
+
+struct file_info {
+ const char *dir;
+ char path[PATH_MAX];
+ enum vma_type type;
+ int fd;
+ char dev_queue_read_ahead_path[PATH_MAX];
+};
+
+static struct file_info finfo;
+static bool skip_settings_restore;
+static int exit_status;
+
+static void success(const char *msg)
+{
+ printf(" \e[32m%s\e[0m\n", msg);
+}
+
+static void fail(const char *msg)
+{
+ printf(" \e[31m%s\e[0m\n", msg);
+ exit_status++;
+}
+
+static void skip(const char *msg)
+{
+ printf(" \e[33m%s\e[0m\n", msg);
+}
+
+static void restore_settings_atexit(void)
+{
+ if (skip_settings_restore)
+ return;
+
+ printf("Restore THP and khugepaged settings...");
+ thp_restore_settings();
+ success("OK");
+
+ skip_settings_restore = true;
+}
+
+static void restore_settings(int sig)
+{
+ /* exit() will invoke the restore_settings_atexit handler. */
+ exit(sig ? EXIT_FAILURE : exit_status);
+}
+
+static void save_settings(void)
+{
+ printf("Save THP and khugepaged settings...");
+ if (file_ops && finfo.type == VMA_FILE)
+ thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
+ thp_save_settings();
+
+ success("OK");
+
+ atexit(restore_settings_atexit);
+ signal(SIGTERM, restore_settings);
+ signal(SIGINT, restore_settings);
+ signal(SIGHUP, restore_settings);
+ signal(SIGQUIT, restore_settings);
+}
+
+static void get_finfo(const char *dir)
+{
+ struct stat path_stat;
+ struct statfs fs;
+ char buf[1 << 10];
+ char path[PATH_MAX];
+ char *str, *end;
+
+ finfo.dir = dir;
+ stat(finfo.dir, &path_stat);
+ if (!S_ISDIR(path_stat.st_mode)) {
+ printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
+ exit(EXIT_FAILURE);
+ }
+ if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
+ finfo.dir) >= sizeof(finfo.path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (statfs(finfo.dir, &fs)) {
+ perror("statfs()");
+ exit(EXIT_FAILURE);
+ }
+ finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
+ if (finfo.type == VMA_SHMEM)
+ return;
+
+ /* Find owning device's queue/read_ahead_kb control */
+ if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+ if (strstr(buf, "DEVTYPE=disk")) {
+ /* Found it */
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/dev/block/%d:%d/queue/read_ahead_kb",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ if (!strstr(buf, "DEVTYPE=partition")) {
+ printf("%s: Unknown device type: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Partition of block device - need to find actual device.
+ * Using naming convention that devnameN is partition of
+ * device devname.
+ */
+ str = strstr(buf, "DEVNAME=");
+ if (!str) {
+ printf("%s: Could not read: %s", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ str += 8;
+ end = str;
+ while (*end) {
+ if (isdigit(*end)) {
+ *end = '\0';
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/block/%s/queue/read_ahead_kb",
+ str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ ++end;
+ }
+ printf("%s: Could not read: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
+}
+
+static bool check_swap(void *addr, unsigned long size)
+{
+ bool swap = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
+ size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the Swap: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ swap = true;
+err_out:
+ fclose(fp);
+ return swap;
+}
+
+static void *alloc_mapping(int nr)
+{
+ void *p;
+
+ p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != BASE_ADDR) {
+ printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+ exit(EXIT_FAILURE);
+ }
+
+ return p;
+}
+
+static void fill_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++)
+ p[i * page_size / sizeof(*p)] = i + 0xdead0000;
+}
+
+/*
+ * MADV_COLLAPSE is a best-effort request and may fail if an internal
+ * resource is temporarily unavailable, in which case it will set errno to
+ * EAGAIN. In such a case, immediately reattempt the operation one more
+ * time.
+ */
+static int madvise_collapse_retry(void *p, unsigned long size)
+{
+ bool retry = true;
+ int ret;
+
+retry:
+ ret = madvise(p, size, MADV_COLLAPSE);
+ if (ret && errno == EAGAIN && retry) {
+ retry = false;
+ goto retry;
+ }
+ return ret;
+}
+
+/*
+ * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
+ * validate_memory()'able contents.
+ */
+static void *alloc_hpage(struct mem_ops *ops)
+{
+ void *p = ops->setup_area(1);
+
+ ops->fault(p, 0, hpage_pmd_size);
+
+ /*
+ * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
+ * The latter is ineligible for collapse by MADV_COLLAPSE
+ * while the former might cause MADV_COLLAPSE to race with
+ * khugepaged on low-load system (like a test machine), which
+ * would cause MADV_COLLAPSE to fail with EAGAIN.
+ */
+ printf("Allocate huge page...");
+ if (madvise_collapse_retry(p, hpage_pmd_size)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (!ops->check_huge(p, 1)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
+ perror("madvise(MADV_HUGEPAGE)");
+ exit(EXIT_FAILURE);
+ }
+ success("OK");
+ return p;
+}
+
+static void validate_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++) {
+ if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
+ printf("Page %d is corrupted: %#x\n",
+ i, p[i * page_size / sizeof(*p)]);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void *anon_setup_area(int nr_hpages)
+{
+ return alloc_mapping(nr_hpages);
+}
+
+static void anon_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+}
+
+static void anon_fault(void *p, unsigned long start, unsigned long end)
+{
+ fill_memory(p, start, end);
+}
+
+static bool anon_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
+}
+
+static void *file_setup_area(int nr_hpages)
+{
+ int fd;
+ void *p;
+ unsigned long size;
+
+ unlink(finfo.path); /* Cleanup from previous failed tests */
+ printf("Creating %s for collapse%s...", finfo.path,
+ finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
+ fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
+ 777);
+ if (fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+
+ size = nr_hpages * hpage_pmd_size;
+ p = alloc_mapping(nr_hpages);
+ fill_memory(p, 0, size);
+ write(fd, p, size);
+ close(fd);
+ munmap(p, size);
+ success("OK");
+
+ printf("Opening %s read only for collapse...", finfo.path);
+ finfo.fd = open(finfo.path, O_RDONLY, 777);
+ if (finfo.fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ,
+ MAP_PRIVATE, finfo.fd, 0);
+ if (p == MAP_FAILED || p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Drop page cache */
+ write_file("/proc/sys/vm/drop_caches", "3", 2);
+ success("OK");
+ return p;
+}
+
+static void file_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+ unlink(finfo.path);
+}
+
+static void file_fault(void *p, unsigned long start, unsigned long end)
+{
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
+ perror("madvise(MADV_POPULATE_READ");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static bool file_check_huge(void *addr, int nr_hpages)
+{
+ switch (finfo.type) {
+ case VMA_FILE:
+ return check_huge_file(addr, nr_hpages, hpage_pmd_size);
+ case VMA_SHMEM:
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+ default:
+ exit(EXIT_FAILURE);
+ return false;
+ }
+}
+
+static void *shmem_setup_area(int nr_hpages)
+{
+ void *p;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
+ if (finfo.fd < 0) {
+ perror("memfd_create()");
+ exit(EXIT_FAILURE);
+ }
+ if (ftruncate(finfo.fd, size)) {
+ perror("ftruncate()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
+ 0);
+ if (p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+ return p;
+}
+
+static void shmem_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+}
+
+static bool shmem_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+}
+
+static struct mem_ops __anon_ops = {
+ .setup_area = &anon_setup_area,
+ .cleanup_area = &anon_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &anon_check_huge,
+ .name = "anon",
+};
+
+static struct mem_ops __file_ops = {
+ .setup_area = &file_setup_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __shmem_ops = {
+ .setup_area = &shmem_setup_area,
+ .cleanup_area = &shmem_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &shmem_check_huge,
+ .name = "shmem",
+};
+
+static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ int ret;
+ struct thp_settings settings = *thp_current_settings();
+
+ printf("%s...", msg);
+
+ /*
+ * Prevent khugepaged interference and tests that MADV_COLLAPSE
+ * ignores /sys/kernel/mm/transparent_hugepage/enabled
+ */
+ settings.thp_enabled = THP_NEVER;
+ settings.shmem_enabled = SHMEM_NEVER;
+ thp_push_settings(&settings);
+
+ /* Clear VM_NOHUGEPAGE */
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+ ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
+ if (((bool)ret) == expect)
+ fail("Fail: Bad return value");
+ else if (!ops->check_huge(p, expect ? nr_hpages : 0))
+ fail("Fail: check_huge()");
+ else
+ success("OK");
+
+ thp_pop_settings();
+}
+
+static void madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ __madvise_collapse(msg, p, nr_hpages, ops, expect);
+}
+
+#define TICK 500000
+static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops)
+{
+ int full_scans;
+ int timeout = 6; /* 3 seconds */
+
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+
+ /* Wait until the second full_scan completed */
+ full_scans = thp_read_num("khugepaged/full_scans") + 2;
+
+ printf("%s...", msg);
+ while (timeout--) {
+ if (ops->check_huge(p, nr_hpages))
+ break;
+ if (thp_read_num("khugepaged/full_scans") >= full_scans)
+ break;
+ printf(".");
+ usleep(TICK);
+ }
+
+ return timeout == -1;
+}
+
+static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ if (wait_for_scan(msg, p, nr_hpages, ops)) {
+ if (expect)
+ fail("Timeout");
+ else
+ success("OK");
+ return;
+ }
+
+ /*
+ * For file and shmem memory, khugepaged only retracts pte entries after
+ * putting the new hugepage in the page cache. The hugepage must be
+ * subsequently refaulted to install the pmd mapping for the mm.
+ */
+ if (ops != &__anon_ops)
+ ops->fault(p, 0, nr_hpages * hpage_pmd_size);
+
+ if (ops->check_huge(p, expect ? nr_hpages : 0))
+ success("OK");
+ else
+ fail("Fail");
+}
+
+static struct collapse_context __khugepaged_context = {
+ .collapse = &khugepaged_collapse,
+ .enforce_pte_scan_limits = true,
+ .name = "khugepaged",
+};
+
+static struct collapse_context __madvise_context = {
+ .collapse = &madvise_collapse,
+ .enforce_pte_scan_limits = false,
+ .name = "madvise",
+};
+
+static bool is_tmpfs(struct mem_ops *ops)
+{
+ return ops == &__file_ops && finfo.type == VMA_SHMEM;
+}
+
+static bool is_anon(struct mem_ops *ops)
+{
+ return ops == &__anon_ops;
+}
+
+static void alloc_at_fault(void)
+{
+ struct thp_settings settings = *thp_current_settings();
+ char *p;
+
+ settings.thp_enabled = THP_ALWAYS;
+ thp_push_settings(&settings);
+
+ p = alloc_mapping(1);
+ *p = 1;
+ printf("Allocate huge page on fault...");
+ if (check_huge_anon(p, 1, hpage_pmd_size))
+ success("OK");
+ else
+ fail("Fail");
+
+ thp_pop_settings();
+
+ madvise(p, page_size, MADV_DONTNEED);
+ printf("Split huge PMD on MADV_DONTNEED...");
+ if (check_huge_anon(p, 0, hpage_pmd_size))
+ success("OK");
+ else
+ fail("Fail");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 4;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+ c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
+ ops, true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, page_size);
+ c->collapse("Collapse PTE table with single PTE entry present", p,
+ 1, ops, true);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_none = hpage_pmd_nr / 2;
+ struct thp_settings settings = *thp_current_settings();
+ void *p;
+ int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1;
+
+ settings.khugepaged.max_ptes_none = max_ptes_none;
+ thp_push_settings(&settings);
+
+ p = ops->setup_area(1);
+
+ if (is_tmpfs(ops)) {
+ /* shmem pages always in the page cache */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
+ c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
+ ops, !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
+
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
+ true);
+ validate_memory(p, 0,
+ (hpage_pmd_nr - max_ptes_none) * page_size);
+ }
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+ thp_pop_settings();
+}
+
+static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+
+ printf("Swapout one page...");
+ if (madvise(p, page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
+ true);
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap");
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+
+ printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+ if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
+ !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap,
+ hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Collapse with max_ptes_swap pages swapped out", p,
+ 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ }
+out:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = alloc_hpage(ops);
+
+ if (is_tmpfs(ops)) {
+ /* MADV_DONTNEED won't evict tmpfs pages */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ printf("Split huge page leaving single PTE mapping compound page...");
+ madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table with single PTE mapping compound page",
+ p, 1, ops, true);
+ validate_memory(p, 0, page_size);
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Split huge page leaving single PTE page table full of compound pages...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
+ true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+ int i;
+
+ p = ops->setup_area(1);
+ for (i = 0; i < hpage_pmd_nr; i++) {
+ printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
+ i + 1, hpage_pmd_nr);
+
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
+ ops->fault(BASE_ADDR, 0, hpage_pmd_size);
+ if (!ops->check_huge(BASE_ADDR, 1)) {
+ printf("Failed to allocate huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ p = mremap(BASE_ADDR - i * page_size,
+ i * page_size + hpage_pmd_size,
+ (i + 1) * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR + 2 * hpage_pmd_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+unmap");
+ exit(EXIT_FAILURE);
+ }
+
+ p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
+ (i + 1) * page_size,
+ (i + 1) * page_size + hpage_pmd_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR - (i + 1) * page_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+alloc");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
+ ops->fault(p, 0, hpage_pmd_size);
+ if (!ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table full of different compound pages", p, 1,
+ ops, true);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
+{
+ int wstatus;
+ void *p;
+
+ p = ops->setup_area(1);
+
+ printf("Allocate small page...");
+ ops->fault(p, 0, page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share small page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ ops->fault(p, page_size, 2 * page_size);
+ c->collapse("Collapse PTE table with single page shared with parent process",
+ p, 1, ops, true);
+
+ validate_memory(p, 0, page_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has small page...");
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page PMD in child process...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+ ops->fault(p, 0, page_size);
+
+ thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+ c->collapse("Collapse PTE table full of compound pages in child",
+ p, 1, ops, true);
+ thp_write_num("khugepaged/max_ptes_shared",
+ thp_current_settings()->khugepaged.max_ptes_shared);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared");
+ int wstatus;
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
+ 1, ops, !c->enforce_pte_scan_limits);
+
+ if (c->enforce_pte_scan_limits) {
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
+ page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse with max_ptes_shared PTEs shared",
+ p, 1, ops, true);
+ }
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void madvise_collapse_existing_thps(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+ c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ /* c->collapse() will find a hugepage and complain - call directly. */
+ __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+/*
+ * Test race with khugepaged where page tables have been retracted and
+ * pmd cleared.
+ */
+static void madvise_retracted_page_tables(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 1;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+
+ /* Let khugepaged collapse and leave pmd cleared */
+ if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
+ ops)) {
+ fail("Timeout");
+ return;
+ }
+ success("OK");
+ c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
+ true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n");
+ fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
+ fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
+ fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
+ fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
+ fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
+ fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n");
+ fprintf(stderr, "\n\tSupported Options:\n");
+ fprintf(stderr, "\t\t-h: This help message.\n");
+ fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n");
+ fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n");
+ exit(1);
+}
+
+static void parse_test_type(int argc, char **argv)
+{
+ int opt;
+ char *buf;
+ const char *token;
+
+ while ((opt = getopt(argc, argv, "s:h")) != -1) {
+ switch (opt) {
+ case 's':
+ anon_order = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc == 0) {
+ /* Backwards compatibility */
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ anon_ops = &__anon_ops;
+ return;
+ }
+
+ buf = strdup(argv[0]);
+ token = strsep(&buf, ":");
+
+ if (!strcmp(token, "all")) {
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ } else if (!strcmp(token, "khugepaged")) {
+ khugepaged_context = &__khugepaged_context;
+ } else if (!strcmp(token, "madvise")) {
+ madvise_context = &__madvise_context;
+ } else {
+ usage();
+ }
+
+ if (!buf)
+ usage();
+
+ if (!strcmp(buf, "all")) {
+ file_ops = &__file_ops;
+ anon_ops = &__anon_ops;
+ shmem_ops = &__shmem_ops;
+ } else if (!strcmp(buf, "anon")) {
+ anon_ops = &__anon_ops;
+ } else if (!strcmp(buf, "file")) {
+ file_ops = &__file_ops;
+ } else if (!strcmp(buf, "shmem")) {
+ shmem_ops = &__shmem_ops;
+ } else {
+ usage();
+ }
+
+ if (!file_ops)
+ return;
+
+ if (argc != 2)
+ usage();
+
+ get_finfo(argv[1]);
+}
+
+int main(int argc, char **argv)
+{
+ int hpage_pmd_order;
+ struct thp_settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_ADVISE,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+ /*
+ * When testing file-backed memory, the collapse path
+ * looks at how many pages are found in the page cache, not
+ * what pages are mapped. Disable read ahead optimization so
+ * pages don't find their way into the page cache unless
+ * we mem_ops->fault() them in.
+ */
+ .read_ahead_kb = 0,
+ };
+
+ if (!thp_is_enabled()) {
+ printf("Transparent Hugepages not available\n");
+ return KSFT_SKIP;
+ }
+
+ parse_test_type(argc, argv);
+
+ setbuf(stdout, NULL);
+
+ page_size = getpagesize();
+ hpage_pmd_size = read_pmd_pagesize();
+ if (!hpage_pmd_size) {
+ printf("Reading PMD pagesize failed");
+ exit(EXIT_FAILURE);
+ }
+ hpage_pmd_nr = hpage_pmd_size / page_size;
+ hpage_pmd_order = __builtin_ctz(hpage_pmd_nr);
+
+ default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
+ default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
+ default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
+ default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+ default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT;
+ default_settings.hugepages[anon_order].enabled = THP_ALWAYS;
+ default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT;
+ default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS;
+
+ save_settings();
+ thp_push_settings(&default_settings);
+
+ alloc_at_fault();
+
+#define TEST(t, c, o) do { \
+ if (c && o) { \
+ printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
+ t(c, o); \
+ } \
+ } while (0)
+
+ TEST(collapse_full, khugepaged_context, anon_ops);
+ TEST(collapse_full, khugepaged_context, file_ops);
+ TEST(collapse_full, khugepaged_context, shmem_ops);
+ TEST(collapse_full, madvise_context, anon_ops);
+ TEST(collapse_full, madvise_context, file_ops);
+ TEST(collapse_full, madvise_context, shmem_ops);
+
+ TEST(collapse_empty, khugepaged_context, anon_ops);
+ TEST(collapse_empty, madvise_context, anon_ops);
+
+ TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
+ TEST(collapse_single_pte_entry, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
+
+ TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, anon_ops);
+ TEST(collapse_max_ptes_none, madvise_context, file_ops);
+
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
+
+ TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
+ TEST(collapse_full_of_compound, madvise_context, anon_ops);
+ TEST(collapse_full_of_compound, madvise_context, file_ops);
+ TEST(collapse_full_of_compound, madvise_context, shmem_ops);
+
+ TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
+ TEST(collapse_compound_extreme, madvise_context, anon_ops);
+
+ TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
+ TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
+
+ TEST(collapse_fork, khugepaged_context, anon_ops);
+ TEST(collapse_fork, madvise_context, anon_ops);
+
+ TEST(collapse_fork_compound, khugepaged_context, anon_ops);
+ TEST(collapse_fork_compound, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
+
+ TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
+
+ TEST(madvise_retracted_page_tables, madvise_context, file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
+
+ restore_settings(0);
+}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
new file mode 100644
index 000000000000..8d874c4754f3
--- /dev/null
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KSM functional tests
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <linux/userfaultfd.h>
+
+#include "kselftest.h"
+#include "vm_util.h"
+
+#define KiB 1024u
+#define MiB (1024 * KiB)
+#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
+
+#define MAP_MERGE_FAIL ((void *)-1)
+#define MAP_MERGE_SKIP ((void *)-2)
+
+enum ksm_merge_mode {
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_NONE, /* PRCTL already set */
+};
+
+static int mem_fd;
+static int pages_to_scan_fd;
+static int sleep_millisecs_fd;
+static int pagemap_fd;
+static size_t pagesize;
+
+static void init_global_file_handles(void);
+
+static bool range_maps_duplicates(char *addr, unsigned long size)
+{
+ unsigned long offs_a, offs_b, pfn_a, pfn_b;
+
+ /*
+ * There is no easy way to check if there are KSM pages mapped into
+ * this range. We only check that the range does not map the same PFN
+ * twice by comparing each pair of mapped pages.
+ */
+ for (offs_a = 0; offs_a < size; offs_a += pagesize) {
+ pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
+ /* Page not present or PFN not exposed by the kernel. */
+ if (pfn_a == -1ul || !pfn_a)
+ continue;
+
+ for (offs_b = offs_a + pagesize; offs_b < size;
+ offs_b += pagesize) {
+ pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
+ if (pfn_b == -1ul || !pfn_b)
+ continue;
+ if (pfn_a == pfn_b)
+ return true;
+ }
+ }
+ return false;
+}
+
+static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
+{
+ char *map;
+ char *err_map = MAP_MERGE_FAIL;
+ int ret;
+
+ /* Stabilize accounting by disabling KSM completely. */
+ if (ksm_stop() < 0) {
+ ksft_print_msg("Disabling (unmerging) KSM failed\n");
+ return err_map;
+ }
+
+ if (ksm_get_self_merging_pages() > 0) {
+ ksft_print_msg("Still pages merged\n");
+ return err_map;
+ }
+
+ map = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (map == MAP_FAILED) {
+ ksft_print_msg("mmap() failed\n");
+ return err_map;
+ }
+
+ /* Don't use THP. Ignore if THP are not around on a kernel. */
+ if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
+ ksft_print_msg("MADV_NOHUGEPAGE failed\n");
+ goto unmap;
+ }
+
+ /* Make sure each page contains the same values to merge them. */
+ memset(map, val, size);
+
+ if (mprotect(map, size, prot)) {
+ ksft_print_msg("mprotect() failed\n");
+ err_map = MAP_MERGE_SKIP;
+ goto unmap;
+ }
+
+ switch (mode) {
+ case KSM_MERGE_PRCTL:
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n");
+ err_map = MAP_MERGE_SKIP;
+ goto unmap;
+ } else if (ret) {
+ ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n");
+ goto unmap;
+ }
+ break;
+ case KSM_MERGE_MADVISE:
+ if (madvise(map, size, MADV_MERGEABLE)) {
+ ksft_print_msg("MADV_MERGEABLE failed\n");
+ goto unmap;
+ }
+ break;
+ case KSM_MERGE_NONE:
+ break;
+ }
+
+ /* Run KSM to trigger merging and wait. */
+ if (ksm_start() < 0) {
+ ksft_print_msg("Running KSM failed\n");
+ goto unmap;
+ }
+
+ /*
+ * Check if anything was merged at all. Ignore the zero page that is
+ * accounted differently (depending on kernel support).
+ */
+ if (val && !ksm_get_self_merging_pages()) {
+ ksft_print_msg("No pages got merged\n");
+ goto unmap;
+ }
+
+ return map;
+unmap:
+ munmap(map, size);
+ return err_map;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
+{
+ char *map;
+ char *ret = MAP_FAILED;
+
+ map = __mmap_and_merge_range(val, size, prot, mode);
+ if (map == MAP_MERGE_FAIL)
+ ksft_test_result_fail("Merging memory failed");
+ else if (map == MAP_MERGE_SKIP)
+ ksft_test_result_skip("Merging memory skipped");
+ else
+ ret = map;
+
+ return ret;
+}
+
+static void test_unmerge(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void test_unmerge_zero_pages(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ unsigned int offs;
+ unsigned long pages_expected;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ if (ksm_get_self_zero_pages() < 0) {
+ ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n");
+ return;
+ }
+
+ if (ksm_use_zero_pages() < 0) {
+ ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ return;
+ }
+
+ /* Let KSM deduplicate zero pages. */
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ /* Check if ksm_zero_pages is updated correctly after KSM merging */
+ pages_expected = size / pagesize;
+ if (pages_expected != ksm_get_self_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
+ goto unmap;
+ }
+
+ /* Try to unmerge half of the region */
+ if (madvise(map, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ /* Check if ksm_zero_pages is updated correctly after unmerging */
+ pages_expected /= 2;
+ if (pages_expected != ksm_get_self_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
+ goto unmap;
+ }
+
+ /* Trigger unmerging of the other half by writing to the pages. */
+ for (offs = size / 2; offs < size; offs += pagesize)
+ *((unsigned int *)&map[offs]) = offs;
+
+ /* Now we should have no zeropages remaining. */
+ if (ksm_get_self_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
+ goto unmap;
+ }
+
+ /* Check if ksm zero pages are really unmerged */
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "KSM zero pages were unmerged\n");
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void test_unmerge_discarded(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ /* Discard half of all mapped pages so we have pte_none() entries. */
+ if (madvise(map, size / 2, MADV_DONTNEED)) {
+ ksft_test_result_fail("MADV_DONTNEED failed\n");
+ goto unmap;
+ }
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+#ifdef __NR_userfaultfd
+static void test_unmerge_uffd_wp(void)
+{
+ struct uffdio_writeprotect uffd_writeprotect;
+ const unsigned int size = 2 * MiB;
+ struct uffdio_api uffdio_api;
+ char *map;
+ int uffd;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ /* See if UFFD is around. */
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_skip("__NR_userfaultfd failed\n");
+ goto unmap;
+ }
+
+ /* See if UFFD-WP is around. */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ if (errno == EINVAL)
+ ksft_test_result_skip("The API version requested is not supported\n");
+ else
+ ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+
+ goto close_uffd;
+ }
+ if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+ ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n");
+ goto close_uffd;
+ }
+
+ /*
+ * UFFDIO_API must only be called once to enable features.
+ * So we close the old userfaultfd and create a new one to
+ * actually enable UFFD_FEATURE_PAGEFAULT_FLAG_WP.
+ */
+ close(uffd);
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_fail("__NR_userfaultfd failed\n");
+ goto unmap;
+ }
+
+ /* Now, enable it ("two-step handshake") */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+ goto close_uffd;
+ }
+
+ /* Register UFFD-WP, no need for an actual handler. */
+ if (uffd_register(uffd, map, size, false, true, false)) {
+ ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
+ goto close_uffd;
+ }
+
+ /* Write-protect the range using UFFD-WP. */
+ uffd_writeprotect.range.start = (unsigned long) map;
+ uffd_writeprotect.range.len = size;
+ uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
+ ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n");
+ goto close_uffd;
+ }
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto close_uffd;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+close_uffd:
+ close(uffd);
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+#endif
+
+/* Verify that KSM can be enabled / queried with prctl. */
+static void test_prctl(void)
+{
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret < 0) {
+ ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
+ return;
+ } else if (ret != 1) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 not effective\n");
+ return;
+ }
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret < 0) {
+ ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
+ return;
+ } else if (ret != 0) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 not effective\n");
+ return;
+ }
+
+ ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
+}
+
+static int test_child_ksm(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ /* Test if KSM is enabled for the process. */
+ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
+ return 1;
+
+ /* Test if merge could really happen. */
+ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
+ if (map == MAP_MERGE_FAIL)
+ return 2;
+ else if (map == MAP_MERGE_SKIP)
+ return 3;
+
+ ksm_stop();
+ munmap(map, size);
+ return 0;
+}
+
+static void test_child_ksm_err(int status)
+{
+ if (status == 1)
+ ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ else if (status == 2)
+ ksft_test_result_fail("Merge in child failed\n");
+ else if (status == 3)
+ ksft_test_result_skip("Merge in child skipped\n");
+ else if (status == 4)
+ ksft_test_result_fail("Binary not found\n");
+}
+
+/* Verify that prctl ksm flag is inherited. */
+static void test_prctl_fork(void)
+{
+ int ret, status;
+ pid_t child_pid;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ child_pid = fork();
+ if (!child_pid) {
+ init_global_file_handles();
+ exit(test_child_ksm());
+ } else if (child_pid < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ return;
+ }
+
+ if (waitpid(child_pid, &status, 0) < 0) {
+ ksft_test_result_fail("waitpid() failed\n");
+ return;
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ test_child_ksm_err(status);
+ return;
+ }
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
+static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "1", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, pages_to_scan, strlen(pages_to_scan)) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, sleep_ms, strlen(sleep_ms)) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static int stop_ksmd_and_restore_frequency(void)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, "100", 3) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, "20", 2) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static void test_prctl_fork_exec(void)
+{
+ int ret, status;
+ pid_t child_pid;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ if (start_ksmd_and_set_frequency("2000", "0"))
+ ksft_test_result_fail("set ksmd's scanning frequency failed\n");
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ child_pid = fork();
+ if (child_pid == -1) {
+ ksft_test_result_skip("fork() failed\n");
+ return;
+ } else if (child_pid == 0) {
+ char *prg_name = "./ksm_functional_tests";
+ char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL };
+
+ execv(prg_name, argv_for_program);
+ exit(4);
+ }
+
+ if (waitpid(child_pid, &status, 0) > 0) {
+ if (WIFEXITED(status)) {
+ status = WEXITSTATUS(status);
+ if (status) {
+ test_child_ksm_err(status);
+ return;
+ }
+ } else {
+ ksft_test_result_fail("program didn't terminate normally\n");
+ return;
+ }
+ } else {
+ ksft_test_result_fail("waitpid() failed\n");
+ return;
+ }
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ if (stop_ksmd_and_restore_frequency()) {
+ ksft_test_result_fail("restore ksmd frequency failed\n");
+ return;
+ }
+
+ ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
+static void test_prctl_unmerge(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL);
+ if (map == MAP_FAILED)
+ return;
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void test_prot_none(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ int i;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ goto unmap;
+
+ /* Store a unique value in each page on one half using ptrace */
+ for (i = 0; i < size / 2; i += pagesize) {
+ lseek(mem_fd, (uintptr_t) map + i, SEEK_SET);
+ if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) {
+ ksft_test_result_fail("ptrace write failed\n");
+ goto unmap;
+ }
+ }
+
+ /* Trigger unsharing on the other half. */
+ if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void test_fork_ksm_merging_page_count(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ pid_t child_pid;
+ int status;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ child_pid = fork();
+ if (!child_pid) {
+ init_global_file_handles();
+ exit(ksm_get_self_merging_pages());
+ } else if (child_pid < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto unmap;
+ }
+
+ if (waitpid(child_pid, &status, 0) < 0) {
+ ksft_test_result_fail("waitpid() failed\n");
+ goto unmap;
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ ksft_test_result_fail("ksm_merging_page in child: %d\n", status);
+ goto unmap;
+ }
+
+ ksft_test_result_pass("ksm_merging_pages is not inherited after fork\n");
+
+unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void init_global_file_handles(void)
+{
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ if (ksm_stop() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n");
+ if (ksm_get_full_scans() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+ if (ksm_get_self_merging_pages() < 0)
+ ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n");
+
+ pages_to_scan_fd = open("/sys/kernel/mm/ksm/pages_to_scan", O_RDWR);
+ if (pages_to_scan_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/pages_to_scan failed\n");
+ sleep_millisecs_fd = open("/sys/kernel/mm/ksm/sleep_millisecs", O_RDWR);
+ if (sleep_millisecs_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/sleep_millisecs failed\n");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int tests = 9;
+ int err;
+
+ if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+ init_global_file_handles();
+ exit(test_child_ksm());
+ }
+
+#ifdef __NR_userfaultfd
+ tests++;
+#endif
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ pagesize = getpagesize();
+
+ init_global_file_handles();
+
+ test_unmerge();
+ test_unmerge_zero_pages();
+ test_unmerge_discarded();
+#ifdef __NR_userfaultfd
+ test_unmerge_uffd_wp();
+#endif
+
+ test_prot_none();
+
+ test_prctl();
+ test_prctl_fork();
+ test_prctl_fork_exec();
+ test_prctl_unmerge();
+ test_fork_ksm_merging_page_count();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
new file mode 100644
index 000000000000..a0b48b839d54
--- /dev/null
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -0,0 +1,926 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <numa.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <err.h>
+
+#include "kselftest.h"
+#include <include/vdso/time64.h>
+#include "vm_util.h"
+#include "thp_settings.h"
+
+#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
+#define KSM_FP(s) (KSM_SYSFS_PATH s)
+#define KSM_SCAN_LIMIT_SEC_DEFAULT 120
+#define KSM_PAGE_COUNT_DEFAULT 10l
+#define KSM_PROT_STR_DEFAULT "rw"
+#define KSM_USE_ZERO_PAGES_DEFAULT false
+#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define KSM_MERGE_TYPE_DEFAULT 0
+#define MB (1ul << 20)
+
+struct ksm_sysfs {
+ unsigned long max_page_sharing;
+ unsigned long merge_across_nodes;
+ unsigned long pages_to_scan;
+ unsigned long run;
+ unsigned long sleep_millisecs;
+ unsigned long stable_node_chains_prune_millisecs;
+ unsigned long use_zero_pages;
+};
+
+enum ksm_merge_type {
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_LAST = KSM_MERGE_PRCTL
+};
+
+enum ksm_test_name {
+ CHECK_KSM_MERGE,
+ CHECK_KSM_UNMERGE,
+ CHECK_KSM_GET_MERGE_TYPE,
+ CHECK_KSM_ZERO_PAGE_MERGE,
+ CHECK_KSM_NUMA_MERGE,
+ KSM_MERGE_TIME,
+ KSM_MERGE_TIME_HUGE_PAGES,
+ KSM_UNMERGE_TIME,
+ KSM_COW_TIME
+};
+
+int debug;
+
+static int ksm_write_sysfs(const char *file_path, unsigned long val)
+{
+ return write_sysfs(file_path, val);
+}
+
+static int ksm_read_sysfs(const char *file_path, unsigned long *val)
+{
+ return read_sysfs(file_path, val);
+}
+
+static void ksm_print_sysfs(void)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+ unsigned long full_scans, pages_unshared, pages_volatile;
+ unsigned long stable_node_chains, stable_node_dups;
+ long general_profit;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing) ||
+ ksm_read_sysfs(KSM_FP("full_scans"), &full_scans) ||
+ ksm_read_sysfs(KSM_FP("pages_unshared"), &pages_unshared) ||
+ ksm_read_sysfs(KSM_FP("pages_volatile"), &pages_volatile) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains"), &stable_node_chains) ||
+ ksm_read_sysfs(KSM_FP("stable_node_dups"), &stable_node_dups) ||
+ ksm_read_sysfs(KSM_FP("general_profit"), (unsigned long *)&general_profit))
+ return;
+
+ printf("pages_shared : %lu\n", pages_shared);
+ printf("pages_sharing : %lu\n", pages_sharing);
+ printf("max_page_sharing : %lu\n", max_page_sharing);
+ printf("full_scans : %lu\n", full_scans);
+ printf("pages_unshared : %lu\n", pages_unshared);
+ printf("pages_volatile : %lu\n", pages_volatile);
+ printf("stable_node_chains: %lu\n", stable_node_chains);
+ printf("stable_node_dups : %lu\n", stable_node_dups);
+ printf("general_profit : %ld\n", general_profit);
+}
+
+static void ksm_print_procfs(void)
+{
+ const char *file_name = "/proc/self/ksm_stat";
+ char buffer[512];
+ FILE *f = fopen(file_name, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_name);
+ perror("fopen");
+ return;
+ }
+
+ while (fgets(buffer, sizeof(buffer), f))
+ printf("%s", buffer);
+
+ fclose(f);
+}
+
+static int str_to_prot(char *prot_str)
+{
+ int prot = 0;
+
+ if ((strchr(prot_str, 'r')) != NULL)
+ prot |= PROT_READ;
+ if ((strchr(prot_str, 'w')) != NULL)
+ prot |= PROT_WRITE;
+ if ((strchr(prot_str, 'x')) != NULL)
+ prot |= PROT_EXEC;
+
+ return prot;
+}
+
+static void print_help(void)
+{
+ printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
+ "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
+
+ printf("Supported <test type>:\n"
+ " -M (page merging)\n"
+ " -Z (zero pages merging)\n"
+ " -N (merging of pages in different NUMA nodes)\n"
+ " -U (page unmerging)\n"
+ " -P evaluate merging time and speed.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -H evaluate merging time and speed of area allocated mostly with huge pages\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -D evaluate unmerging time and speed when disabling KSM.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -C evaluate the time required to break COW of merged pages.\n\n");
+
+ printf(" -a: specify the access protections of pages.\n"
+ " <prot> must be of the form [rwx].\n"
+ " Default: %s\n", KSM_PROT_STR_DEFAULT);
+ printf(" -p: specify the number of pages to test.\n"
+ " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
+ printf(" -l: limit the maximum running time (in seconds) for a test.\n"
+ " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
+ printf(" -z: change use_zero_pages tunable\n"
+ " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
+ printf(" -m: change merge_across_nodes tunable\n"
+ " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+ printf(" -d: turn debugging output on\n");
+ printf(" -s: the size of duplicated memory area (in MiB)\n");
+ printf(" -t: KSM merge type\n"
+ " Default: 0\n"
+ " 0: madvise merging\n"
+ " 1: prctl merging\n");
+
+ exit(0);
+}
+
+static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
+{
+ void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
+
+ if (!map_ptr) {
+ perror("mmap");
+ return NULL;
+ }
+ memset(map_ptr, data, map_size);
+ if (mprotect(map_ptr, map_size, prot)) {
+ perror("mprotect");
+ munmap(map_ptr, map_size);
+ return NULL;
+ }
+
+ return map_ptr;
+}
+
+static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
+{
+ struct timespec cur_time;
+ unsigned long cur_scan, init_scan;
+
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
+ return 1;
+ cur_scan = init_scan;
+
+ while (cur_scan < init_scan + scan_count) {
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
+ return 1;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
+ perror("clock_gettime");
+ return 1;
+ }
+ if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
+ printf("Scan time limit exceeded\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int ksm_merge_pages(int merge_type, void *addr, size_t size,
+ struct timespec start_time, int timeout)
+{
+ if (merge_type == KSM_MERGE_MADVISE) {
+ if (madvise(addr, size, MADV_MERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ } else if (merge_type == KSM_MERGE_PRCTL) {
+ if (prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0)) {
+ perror("prctl");
+ return 1;
+ }
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 1))
+ return 1;
+
+ /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
+ if (ksm_do_scan(2, start_time, timeout))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_unmerge_pages(void *addr, size_t size,
+ struct timespec start_time, int timeout)
+{
+ if (madvise(addr, size, MADV_UNMERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ return 0;
+}
+
+static bool assert_ksm_pages_count(long dupl_page_count)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
+ return false;
+
+ if (debug) {
+ ksm_print_sysfs();
+ ksm_print_procfs();
+ }
+
+ /*
+ * Since there must be at least 2 pages for merging and 1 page can be
+ * shared with the limited number of pages (max_page_sharing), sometimes
+ * there are 'leftover' pages that cannot be merged. For example, if there
+ * are 11 pages and max_page_sharing = 10, then only 10 pages will be
+ * merged and the 11th page won't be affected. As a result, when the number
+ * of duplicate pages is divided by max_page_sharing and the remainder is 1,
+ * pages_shared and pages_sharing values will be equal between dupl_page_count
+ * and dupl_page_count - 1.
+ */
+ if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
+ if (pages_shared == dupl_page_count / max_page_sharing &&
+ pages_sharing == pages_shared * (max_page_sharing - 1))
+ return true;
+ } else {
+ if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
+ pages_sharing == dupl_page_count - pages_shared)
+ return true;
+ }
+
+ return false;
+}
+
+static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
+ numa_available() ? 0 :
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
+ ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
+ ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ &ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
+ ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
+ ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int check_ksm_merge(int merge_type, int mapping, int prot,
+ long page_count, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* verify that the right number of pages are merged */
+ if (assert_ksm_pages_count(page_count)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ if (merge_type == KSM_MERGE_PRCTL)
+ prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_unmerge(int merge_type, int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
+ memset(map_ptr, '-', 1);
+ memset(map_ptr + page_size, '+', 1);
+
+ /* get at least 1 scan, so KSM can detect that the pages were modified */
+ if (ksm_do_scan(1, start_time, timeout))
+ goto err_out;
+
+ /* check that unmerging was successful and 0 pages are currently merged */
+ if (assert_ksm_pages_count(0)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_zero_page_merge(int merge_type, int mapping, int prot, long page_count,
+ int timeout, bool use_zero_pages, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
+ return KSFT_FAIL;
+
+ /* fill pages with zero and try to merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if use_zero_pages is set to 1, empty pages are merged
+ * with the kernel zero page instead of with each other;
+ * 2) if use_zero_pages is set to 0, empty pages are not treated specially
+ * and merged as usual.
+ */
+ if (use_zero_pages && !assert_ksm_pages_count(0))
+ goto err_out;
+ else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
+ goto err_out;
+
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int get_next_mem_node(int node)
+{
+
+ long node_size;
+ int mem_node = 0;
+ int i, max_node = numa_max_node();
+
+ for (i = node + 1; i <= max_node + node; i++) {
+ mem_node = i % (max_node + 1);
+ node_size = numa_node_size(mem_node, NULL);
+ if (node_size > 0)
+ break;
+ }
+ return mem_node;
+}
+
+static int get_first_mem_node(void)
+{
+ return get_next_mem_node(numa_max_node());
+}
+
+static int check_ksm_numa_merge(int merge_type, int mapping, int prot, int timeout,
+ bool merge_across_nodes, size_t page_size)
+{
+ void *numa1_map_ptr, *numa2_map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+ int first_node;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (numa_available() < 0) {
+ perror("NUMA support not enabled");
+ return KSFT_SKIP;
+ }
+ if (numa_num_configured_nodes() <= 1) {
+ printf("At least 2 NUMA nodes must be available\n");
+ return KSFT_SKIP;
+ }
+ if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
+ return KSFT_FAIL;
+
+ /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
+ first_node = get_first_mem_node();
+ numa1_map_ptr = numa_alloc_onnode(page_size, first_node);
+ numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node));
+ if (!numa1_map_ptr || !numa2_map_ptr) {
+ perror("numa_alloc_onnode");
+ return KSFT_FAIL;
+ }
+
+ memset(numa1_map_ptr, '*', page_size);
+ memset(numa2_map_ptr, '*', page_size);
+
+ /* try to merge the pages */
+ if (ksm_merge_pages(merge_type, numa1_map_ptr, page_size, start_time, timeout) ||
+ ksm_merge_pages(merge_type, numa2_map_ptr, page_size, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
+ * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
+ * only 1 unique page in each node and they can't be shared.
+ */
+ if (merge_across_nodes && !assert_ksm_pages_count(page_count))
+ goto err_out;
+ else if (!merge_across_nodes && !assert_ksm_pages_count(0))
+ goto err_out;
+
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("OK\n");
+ return KSFT_PASS;
+
+err_out:
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("Not OK\n");
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
+ int timeout, size_t map_size)
+{
+ void *map_ptr, *map_ptr_orig;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+ int pagemap_fd, n_normal_pages, n_huge_pages;
+
+ if (!thp_is_enabled()) {
+ printf("Transparent Hugepages not available\n");
+ return KSFT_SKIP;
+ }
+
+ map_size *= MB;
+ size_t len = map_size;
+
+ len -= len % HPAGE_SIZE;
+ map_ptr_orig = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ map_ptr = map_ptr_orig + HPAGE_SIZE - (uintptr_t)map_ptr_orig % HPAGE_SIZE;
+
+ if (map_ptr_orig == MAP_FAILED)
+ err(2, "initial mmap");
+
+ if (madvise(map_ptr, len, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ err(2, "open pagemap");
+
+ n_normal_pages = 0;
+ n_huge_pages = 0;
+ for (void *p = map_ptr; p < map_ptr + len; p += HPAGE_SIZE) {
+ if (allocate_transhuge(p, pagemap_fd) < 0)
+ n_normal_pages++;
+ else
+ n_huge_pages++;
+ }
+ printf("Number of normal pages: %d\n", n_normal_pages);
+ printf("Number of huge pages: %d\n", n_huge_pages);
+
+ memset(map_ptr, '*', len);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr_orig, len + HPAGE_SIZE);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr_orig, len + HPAGE_SIZE);
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_time(int merge_type, int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_unmerge_time(int merge_type, int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_cow_time(int merge_type, int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long cow_time_ns;
+
+ /* page_count must be less than 2*page_size */
+ size_t page_count = 4000;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ printf("Not merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ /* Create 2000 pairs of duplicate pages */
+ for (size_t i = 0; i < page_count - 1; i = i + 2) {
+ memset(map_ptr + page_size * i, '+', i / 2 + 1);
+ memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0, opt;
+ int prot = 0;
+ int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
+ int merge_type = KSM_MERGE_TYPE_DEFAULT;
+ long page_count = KSM_PAGE_COUNT_DEFAULT;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ struct ksm_sysfs ksm_sysfs_old;
+ int test_name = CHECK_KSM_MERGE;
+ bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
+ bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+ long size_MB = 0;
+
+ while ((opt = getopt(argc, argv, "dha:p:l:z:m:s:t:MUZNPCHD")) != -1) {
+ switch (opt) {
+ case 'a':
+ prot = str_to_prot(optarg);
+ break;
+ case 'p':
+ page_count = atol(optarg);
+ if (page_count <= 0) {
+ printf("The number of pages must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'l':
+ ksm_scan_limit_sec = atoi(optarg);
+ if (ksm_scan_limit_sec <= 0) {
+ printf("Timeout value must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'h':
+ print_help();
+ break;
+ case 'z':
+ if (strcmp(optarg, "0") == 0)
+ use_zero_pages = 0;
+ else
+ use_zero_pages = 1;
+ break;
+ case 'm':
+ if (strcmp(optarg, "0") == 0)
+ merge_across_nodes = 0;
+ else
+ merge_across_nodes = 1;
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 's':
+ size_MB = atoi(optarg);
+ if (size_MB <= 0) {
+ printf("Size must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 't':
+ {
+ int tmp = atoi(optarg);
+
+ if (tmp < 0 || tmp > KSM_MERGE_LAST) {
+ printf("Invalid merge type\n");
+ return KSFT_FAIL;
+ }
+ merge_type = tmp;
+ }
+ break;
+ case 'M':
+ break;
+ case 'U':
+ test_name = CHECK_KSM_UNMERGE;
+ break;
+ case 'Z':
+ test_name = CHECK_KSM_ZERO_PAGE_MERGE;
+ break;
+ case 'N':
+ test_name = CHECK_KSM_NUMA_MERGE;
+ break;
+ case 'P':
+ test_name = KSM_MERGE_TIME;
+ break;
+ case 'H':
+ test_name = KSM_MERGE_TIME_HUGE_PAGES;
+ break;
+ case 'D':
+ test_name = KSM_UNMERGE_TIME;
+ break;
+ case 'C':
+ test_name = KSM_COW_TIME;
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+ }
+
+ if (prot == 0)
+ prot = str_to_prot(KSM_PROT_STR_DEFAULT);
+
+ if (access(KSM_SYSFS_PATH, F_OK)) {
+ printf("Config KSM not enabled\n");
+ return KSFT_SKIP;
+ }
+
+ if (ksm_save_def(&ksm_sysfs_old)) {
+ printf("Cannot save default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 2) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
+ return KSFT_FAIL;
+
+ switch (test_name) {
+ case CHECK_KSM_MERGE:
+ ret = check_ksm_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_UNMERGE:
+ ret = check_ksm_unmerge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_ZERO_PAGE_MERGE:
+ ret = check_ksm_zero_page_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ page_count, ksm_scan_limit_sec, use_zero_pages,
+ page_size);
+ break;
+ case CHECK_KSM_NUMA_MERGE:
+ ret = check_ksm_numa_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, merge_across_nodes, page_size);
+ break;
+ case KSM_MERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_MERGE_TIME_HUGE_PAGES:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_hugepages_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_UNMERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_unmerge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_COW_TIME:
+ ret = ksm_cow_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, page_size);
+ break;
+ }
+
+ if (ksm_restore(&ksm_sysfs_old)) {
+ printf("Cannot restore default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
new file mode 100644
index 000000000000..88050e0f829a
--- /dev/null
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ *
+ * Copyright 2021, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "vm_util.h"
+
+/*
+ * For now, we're using 2 MiB of private anonymous memory for all tests.
+ */
+#define SIZE (2 * 1024 * 1024)
+
+static size_t pagesize;
+
+static void sense_support(void)
+{
+ char *addr;
+ int ret;
+
+ addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (!addr)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_READ);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_READ is not available\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_WRITE);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_WRITE is not available\n");
+
+ munmap(addr, pagesize);
+}
+
+static void test_prot_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_WRITE with PROT_READ\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_prot_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_READ with PROT_WRITE\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_holes(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ret = munmap(addr + pagesize, pagesize);
+ if (ret)
+ ksft_exit_fail_msg("munmap failed\n");
+
+ /* Hole in the middle */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes in the middle\n");
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes in the middle\n");
+
+ /* Hole at end */
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the end\n");
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the end\n");
+
+ /* Hole at beginning */
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the beginning\n");
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the beginning\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_populate_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "read range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "read range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_populate_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "write range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "write range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_softdirty(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear any softdirty bits. */
+ clear_softdirty();
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "cleared range is not softdirty\n");
+
+ /* Populating READ should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty after MADV_POPULATE_READ\n");
+
+ /* Populating WRITE should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_softdirty(addr, SIZE),
+ "range is softdirty after MADV_POPULATE_WRITE \n");
+
+ munmap(addr, SIZE);
+}
+
+int main(int argc, char **argv)
+{
+ int nr_tests = 16;
+ int err;
+
+ pagesize = getpagesize();
+
+ if (softdirty_supported())
+ nr_tests += 5;
+
+ ksft_print_header();
+ ksft_set_plan(nr_tests);
+
+ sense_support();
+ test_prot_read();
+ test_prot_write();
+ test_holes();
+ test_populate_read();
+ test_populate_write();
+ if (softdirty_supported())
+ test_softdirty();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
new file mode 100644
index 000000000000..11241edde7fe
--- /dev/null
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test that MAP_FIXED_NOREPLACE works.
+ *
+ * Copyright 2018, Jann Horn <jannh@google.com>
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "kselftest.h"
+
+static void dump_maps(void)
+{
+ char cmd[32];
+
+ snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+ system(cmd);
+}
+
+static unsigned long find_base_addr(unsigned long size)
+{
+ void *addr;
+ unsigned long flags;
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
+
+ if (munmap(addr, size) != 0)
+ ksft_exit_fail_msg("Error: munmap failed\n");
+
+ return (unsigned long)addr;
+}
+
+int main(void)
+{
+ unsigned long base_addr;
+ unsigned long flags, addr, size, page_size;
+ char *p;
+
+ ksft_print_header();
+ ksft_set_plan(9);
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ /* let's find a base addr that is free before we start the tests */
+ size = 5 * page_size;
+ base_addr = find_base_addr(size);
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
+
+ /* Check we can map all the areas we need below */
+ addr = base_addr;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p == MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
+ }
+ if (munmap((void *)addr, 5 * page_size) != 0) {
+ dump_maps();
+ ksft_exit_fail_msg("Error: munmap failed!?\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
+
+ addr = base_addr + page_size;
+ size = 3 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p == MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n");
+
+ /*
+ * Exact same mapping again:
+ * base | free | new
+ * +1 | mapped | new
+ * +2 | mapped | new
+ * +3 | mapped | new
+ * +4 | free | new
+ */
+ addr = base_addr;
+ size = 5 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p != MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n");
+
+ /*
+ * Second mapping contained within first:
+ *
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped | new
+ * +3 | mapped |
+ * +4 | free |
+ */
+ addr = base_addr + (2 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p != MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n");
+
+ /*
+ * Overlap end of existing mapping:
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped | new
+ * +4 | free | new
+ */
+ addr = base_addr + (3 * page_size);
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p != MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n");
+
+ /*
+ * Overlap start of existing mapping:
+ * base | free | new
+ * +1 | mapped | new
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free |
+ */
+ addr = base_addr;
+ size = 2 * page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p != MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n");
+
+ /*
+ * Adjacent to start of existing mapping:
+ * base | free | new
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free |
+ */
+ addr = base_addr;
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p == MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base\n");
+
+ /*
+ * Adjacent to end of existing mapping:
+ * base | free |
+ * +1 | mapped |
+ * +2 | mapped |
+ * +3 | mapped |
+ * +4 | free | new
+ */
+ addr = base_addr + (4 * page_size);
+ size = page_size;
+ p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+ if (p == MAP_FAILED) {
+ dump_maps();
+ ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
+ }
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n");
+
+ addr = base_addr;
+ size = 5 * page_size;
+ if (munmap((void *)addr, size) != 0) {
+ dump_maps();
+ ksft_exit_fail_msg("Error: munmap failed!?\n");
+ }
+ ksft_test_result_pass("Base Address unmap() successful\n");
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
new file mode 100644
index 000000000000..aa409107611b
--- /dev/null
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Example of using hugepage memory in a user application using the mmap
+ * system call with MAP_HUGETLB flag. Before running this program make
+ * sure the administrator has allocated enough default sized huge pages
+ * to cover the 256 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+static void check_bytes(char *addr)
+{
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static void read_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < length; i++)
+ if (*(addr + i) != (char)i)
+ ksft_exit_fail_msg("Mismatch at %lu\n", i);
+
+ ksft_test_result_pass("Read correct data\n");
+}
+
+int main(int argc, char **argv)
+{
+ void *addr;
+ size_t hugepage_size;
+ size_t length = LENGTH;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+ int shift = 0;
+
+ hugepage_size = default_huge_page_size();
+ /* munmap with fail if the length is not page aligned */
+ if (hugepage_size > length)
+ length = hugepage_size;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (argc > 1)
+ length = atol(argv[1]) << 20;
+ if (argc > 2) {
+ shift = atoi(argv[2]);
+ if (shift)
+ flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
+ }
+
+ if (shift)
+ ksft_print_msg("%u kB hugepages\n", 1 << (shift - 10));
+ else
+ ksft_print_msg("Default size hugepages\n");
+ ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
+
+ addr = mmap(NULL, length, PROTECTION, flags, -1, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
+
+ ksft_print_msg("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr, length);
+ read_bytes(addr, length);
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, length))
+ ksft_exit_fail_msg("munmap: %s\n", strerror(errno));
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 6b8aeaa0bf7a..712327f4e932 100644
--- a/tools/testing/selftests/vm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -16,21 +16,23 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include "kselftest.h"
+
+#include "vm_util.h"
-#ifndef MMAP_SZ
#define MMAP_SZ 4096
-#endif
-
-#define BUG_ON(condition, description) \
- do { \
- if (condition) { \
- fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
- __LINE__, (description), strerror(errno)); \
- exit(1); \
- } \
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) \
+ ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n", \
+ __func__, __LINE__, (description), \
+ strerror(errno)); \
} while (0)
-static int parent_f(int sock, unsigned long *smap, int child)
+#define TESTS_IN_CHILD 2
+
+static void parent_f(int sock, unsigned long *smap, int child)
{
int status, ret;
@@ -45,9 +47,10 @@ static int parent_f(int sock, unsigned long *smap, int child)
BUG_ON(ret <= 0, "write(sock)");
waitpid(child, &status, 0);
- BUG_ON(!WIFEXITED(status), "child in unexpected state");
- return WEXITSTATUS(status);
+ /* The ksft macros don't keep counters between processes */
+ ksft_cnt.ksft_pass = WEXITSTATUS(status);
+ ksft_cnt.ksft_fail = TESTS_IN_CHILD - WEXITSTATUS(status);
}
static int child_f(int sock, unsigned long *smap, int fd)
@@ -66,10 +69,11 @@ static int child_f(int sock, unsigned long *smap, int fd)
ret = read(sock, &buf, sizeof(int));
BUG_ON(ret <= 0, "read(sock)");
- BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
- BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+ ksft_test_result(*smap != 0x22222BAD, "MAP_POPULATE COW private page\n");
+ ksft_test_result(*smap == 0xdeadbabe, "The mapping state\n");
- return 0;
+ /* The ksft macros don't keep counters between processes */
+ return ksft_cnt.ksft_pass;
}
int main(int argc, char **argv)
@@ -78,10 +82,16 @@ int main(int argc, char **argv)
FILE *ftmp;
unsigned long *smap;
+ ksft_print_header();
+ ksft_set_plan(TESTS_IN_CHILD);
+
ftmp = tmpfile();
- BUG_ON(ftmp == 0, "tmpfile()");
+ BUG_ON(!ftmp, "tmpfile()");
ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ if (ret < 0 && errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ }
BUG_ON(ret, "ftruncate()");
smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
@@ -103,7 +113,9 @@ int main(int argc, char **argv)
ret = close(sock[0]);
BUG_ON(ret, "close()");
- return parent_f(sock[1], smap, child);
+ parent_f(sock[1], smap, child);
+
+ ksft_finished();
}
ret = close(sock[1]);
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
new file mode 100644
index 000000000000..647779653da0
--- /dev/null
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef __aarch64__
+#include <asm/hwcap.h>
+#endif
+
+#include <linux/mman.h>
+#include <linux/prctl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+#ifndef __aarch64__
+# define PROT_BTI 0
+#endif
+
+TEST(prctl_flags)
+{
+ EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(consecutive_prctl_flags) {};
+FIXTURE_SETUP(consecutive_prctl_flags) {}
+FIXTURE_TEARDOWN(consecutive_prctl_flags) {}
+
+FIXTURE_VARIANT(consecutive_prctl_flags)
+{
+ unsigned long first_flags;
+ unsigned long second_flags;
+ bool should_work;
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags)
+{
+ .first_flags = 0,
+ .second_flags = 0,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = false,
+};
+
+TEST_F(consecutive_prctl_flags, two_prctls)
+{
+ int ret;
+
+ EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0);
+
+ ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L);
+ if (variant->should_work) {
+ EXPECT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+ ASSERT_EQ(ret, variant->second_flags);
+ } else {
+ EXPECT_NE(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+ }
+}
+
+FIXTURE(mdwe)
+{
+ void *p;
+ int flags;
+ size_t size;
+ pid_t pid;
+};
+
+FIXTURE_VARIANT(mdwe)
+{
+ bool enabled;
+ bool forked;
+ bool inherit;
+};
+
+FIXTURE_VARIANT_ADD(mdwe, stock)
+{
+ .enabled = false,
+ .forked = false,
+ .inherit = false,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, enabled)
+{
+ .enabled = true,
+ .forked = false,
+ .inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, inherited)
+{
+ .enabled = true,
+ .forked = true,
+ .inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, not_inherited)
+{
+ .enabled = true,
+ .forked = true,
+ .inherit = false,
+};
+
+static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant)
+{
+ return variant->enabled && (!variant->forked || variant->inherit);
+}
+
+FIXTURE_SETUP(mdwe)
+{
+ unsigned long mdwe_flags;
+ int ret, status;
+
+ self->p = NULL;
+ self->flags = MAP_SHARED | MAP_ANONYMOUS;
+ self->size = getpagesize();
+
+ if (!variant->enabled)
+ return;
+
+ mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN;
+ if (!variant->inherit)
+ mdwe_flags |= PR_MDWE_NO_INHERIT;
+
+ ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("PR_SET_MDWE failed or unsupported");
+ }
+
+ ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+ ASSERT_EQ(ret, mdwe_flags);
+
+ if (variant->forked) {
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0) {
+ TH_LOG("fork failed\n");
+ }
+
+ if (self->pid > 0) {
+ ret = waitpid(self->pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ exit(WEXITSTATUS(status));
+ }
+ }
+}
+
+FIXTURE_TEARDOWN(mdwe)
+{
+ if (self->p && self->p != MAP_FAILED)
+ munmap(self->p, self->size);
+}
+
+TEST_F(mdwe, mmap_READ_EXEC)
+{
+ self->p = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0);
+ EXPECT_NE(self->p, MAP_FAILED);
+}
+
+TEST_F(mdwe, mmap_WRITE_EXEC)
+{
+ self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_EQ(self->p, MAP_FAILED);
+ } else {
+ EXPECT_NE(self->p, MAP_FAILED);
+ }
+}
+
+TEST_F(mdwe, mprotect_stay_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(mdwe, mprotect_add_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_LT(ret, 0);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+}
+
+TEST_F(mdwe, mprotect_WRITE_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_WRITE, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_LT(ret, 0);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+}
+
+TEST_F(mdwe, mmap_FIXED)
+{
+ void *p;
+
+ self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ /* MAP_FIXED unmaps the existing page before mapping which is allowed */
+ p = mmap(self->p, self->size, PROT_READ | PROT_EXEC,
+ self->flags | MAP_FIXED, 0, 0);
+ EXPECT_EQ(p, self->p);
+}
+
+TEST_F(mdwe, arm64_BTI)
+{
+ int ret;
+
+#ifdef __aarch64__
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_BTI))
+#endif
+ SKIP(return, "HWCAP2_BTI not supported");
+
+ self->p = mmap(NULL, self->size, PROT_EXEC, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_EXEC | PROT_BTI);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
new file mode 100644
index 000000000000..aac4f795c327
--- /dev/null
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2021
+ *
+ * Author: Mike Rapoport <rppt@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include "kselftest.h"
+
+#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
+#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
+#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+
+#ifdef __NR_memfd_secret
+
+#define PATTERN 0x55
+
+static const int prot = PROT_READ | PROT_WRITE;
+static const int mode = MAP_SHARED;
+
+static unsigned long page_size;
+static unsigned long mlock_limit_cur;
+static unsigned long mlock_limit_max;
+
+static int memfd_secret(unsigned int flags)
+{
+ return syscall(__NR_memfd_secret, flags);
+}
+
+static void test_file_apis(int fd)
+{
+ char buf[64];
+
+ if ((read(fd, buf, sizeof(buf)) >= 0) ||
+ (write(fd, buf, sizeof(buf)) >= 0) ||
+ (pread(fd, buf, sizeof(buf), 0) >= 0) ||
+ (pwrite(fd, buf, sizeof(buf), 0) >= 0))
+ fail("unexpected file IO\n");
+ else
+ pass("file IO is blocked as expected\n");
+}
+
+static void test_mlock_limit(int fd)
+{
+ size_t len;
+ char *mem;
+
+ len = mlock_limit_cur;
+ if (len % page_size != 0)
+ len = (len/page_size) * page_size;
+
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("unable to mmap secret memory\n");
+ return;
+ }
+ munmap(mem, len);
+
+ len = mlock_limit_max * 2;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem != MAP_FAILED) {
+ fail("unexpected mlock limit violation\n");
+ munmap(mem, len);
+ return;
+ }
+
+ pass("mlock limit is respected\n");
+}
+
+static void test_vmsplice(int fd, const char *desc)
+{
+ ssize_t transferred;
+ struct iovec iov;
+ int pipefd[2];
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ goto close_pipe;
+ }
+
+ /*
+ * vmsplice() may use GUP-fast, which must also fail. Prefault the
+ * page table, so GUP-fast could find it.
+ */
+ memset(mem, PATTERN, page_size);
+
+ iov.iov_base = mem;
+ iov.iov_len = page_size;
+ transferred = vmsplice(pipefd[1], &iov, 1, 0);
+
+ if (transferred < 0 && errno == EFAULT)
+ pass("vmsplice is blocked as expected with %s\n", desc);
+ else
+ fail("vmsplice: unexpected memory access with %s\n", desc);
+
+ munmap(mem, page_size);
+close_pipe:
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
+static void try_process_vm_read(int fd, int pipefd[2])
+{
+ struct iovec liov, riov;
+ char buf[64];
+ char *mem;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
+ }
+
+ liov.iov_len = riov.iov_len = sizeof(buf);
+ liov.iov_base = buf;
+ riov.iov_base = mem;
+
+ if (process_vm_readv(getppid(), &liov, 1, &riov, 1, 0) < 0) {
+ if (errno == ENOSYS)
+ exit(KSFT_SKIP);
+ exit(KSFT_PASS);
+ }
+
+ exit(KSFT_FAIL);
+}
+
+static void try_ptrace(int fd, int pipefd[2])
+{
+ pid_t ppid = getppid();
+ int status;
+ char *mem;
+ long ret;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ perror("pipe write");
+ exit(KSFT_FAIL);
+ }
+
+ ret = ptrace(PTRACE_ATTACH, ppid, 0, 0);
+ if (ret) {
+ perror("ptrace_attach");
+ exit(KSFT_FAIL);
+ }
+
+ ret = waitpid(ppid, &status, WUNTRACED);
+ if ((ret != ppid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitppid result %ld stat %x\n",
+ ret, status);
+ exit(KSFT_FAIL);
+ }
+
+ if (ptrace(PTRACE_PEEKDATA, ppid, mem, 0))
+ exit(KSFT_PASS);
+
+ exit(KSFT_FAIL);
+}
+
+static void check_child_status(pid_t pid, const char *name)
+{
+ int status;
+
+ waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == KSFT_SKIP) {
+ skip("%s is not supported\n", name);
+ return;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == KSFT_PASS) ||
+ WIFSIGNALED(status)) {
+ pass("%s is blocked as expected\n", name);
+ return;
+ }
+
+ fail("%s: unexpected memory access\n", name);
+}
+
+static void test_remote_access(int fd, const char *name,
+ void (*func)(int fd, int pipefd[2]))
+{
+ int pipefd[2];
+ pid_t pid;
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fail("fork failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (pid == 0) {
+ func(fd, pipefd);
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ return;
+ }
+
+ memset(mem, PATTERN, page_size);
+
+ if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ return;
+ }
+
+ check_child_status(pid, name);
+}
+
+static void test_process_vm_read(int fd)
+{
+ test_remote_access(fd, "process_vm_read", try_process_vm_read);
+}
+
+static void test_ptrace(int fd)
+{
+ test_remote_access(fd, "ptrace", try_ptrace);
+}
+
+static int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error");
+ return -2;
+ }
+
+ return 0;
+}
+
+static void prepare(void)
+{
+ struct rlimit rlim;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (!page_size)
+ ksft_exit_fail_msg("Failed to get page size %s\n",
+ strerror(errno));
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim))
+ ksft_exit_fail_msg("Unable to detect mlock limit: %s\n",
+ strerror(errno));
+
+ mlock_limit_cur = rlim.rlim_cur;
+ mlock_limit_max = rlim.rlim_max;
+
+ printf("page_size: %ld, mlock.soft: %ld, mlock.hard: %ld\n",
+ page_size, mlock_limit_cur, mlock_limit_max);
+
+ if (page_size > mlock_limit_cur)
+ mlock_limit_cur = page_size;
+ if (page_size > mlock_limit_max)
+ mlock_limit_max = page_size;
+
+ if (set_cap_limits(mlock_limit_max))
+ ksft_exit_fail_msg("Unable to set mlock limit: %s\n",
+ strerror(errno));
+}
+
+#define NUM_TESTS 6
+
+int main(int argc, char *argv[])
+{
+ int fd;
+
+ prepare();
+
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ fd = memfd_secret(0);
+ if (fd < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("memfd_secret is not supported\n");
+ else
+ ksft_exit_fail_msg("memfd_secret failed: %s\n",
+ strerror(errno));
+ }
+ if (ftruncate(fd, page_size))
+ ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno));
+
+ test_mlock_limit(fd);
+ test_file_apis(fd);
+ /*
+ * We have to run the first vmsplice test before any secretmem page was
+ * allocated for this fd.
+ */
+ test_vmsplice(fd, "fresh page");
+ test_vmsplice(fd, "existing page");
+ test_process_vm_read(fd);
+ test_ptrace(fd);
+
+ close(fd);
+
+ ksft_finished();
+}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
new file mode 100644
index 000000000000..363c1033cc7d
--- /dev/null
+++ b/tools/testing/selftests/mm/merge.c
@@ -0,0 +1,1174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "kselftest_harness.h"
+#include <linux/prctl.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <linux/perf_event.h>
+#include "vm_util.h"
+#include <linux/mman.h>
+
+FIXTURE(merge)
+{
+ unsigned int page_size;
+ char *carveout;
+ struct procmap_fd procmap;
+};
+
+FIXTURE_SETUP(merge)
+{
+ self->page_size = psize();
+ /* Carve out PROT_NONE region to map over. */
+ self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(self->carveout, MAP_FAILED);
+ /* Setup PROCMAP_QUERY interface. */
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+}
+
+FIXTURE_TEARDOWN(merge)
+{
+ ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ /*
+ * Clear unconditionally, as some tests set this. It is no issue if this
+ * fails (KSM may be disabled for instance).
+ */
+ prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+}
+
+TEST_F(merge, mprotect_unfaulted_left)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit
+ * merge failure due to lack of VM_ACCOUNT flag by mistake.
+ *
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 5 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | faulted |
+ * |-----------|-----------|
+ */
+ ptr[5 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge (though for
+ * ~15 years we did not! :):
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 5 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_both)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first and last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | unfaulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the middle 3 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_faulted_left_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | faulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_left_faulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | faulted |
+ * |-----------|-----------------------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | faulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, forked_target_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|
+ * | unfaulted |
+ * |-----------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|
+ * | faulted |
+ * |-----------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child.
+ *
+ * forked
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size);
+}
+
+TEST_F(merge, forked_source_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|------------|
+ * | unfaulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|------------|
+ * | faulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child, ptr2 after ptr, but incompatible.
+ *
+ * forked RW RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+
+ /*
+ * Now mprotect forked region to RWX so it becomes the source for the
+ * merge to unfaulted region:
+ *
+ * forked RWX RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ *
+ * This should NOT result in a merge, as ptr was forked.
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0);
+ /* Again, make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+}
+
+TEST_F(merge, handle_uprobe_upon_merged_vma)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ unsigned int page_size = self->page_size;
+ const char *probe_file = "./foo";
+ char *carveout = self->carveout;
+ struct perf_event_attr attr;
+ unsigned long type;
+ void *ptr1, *ptr2;
+ int fd;
+
+ fd = open(probe_file, O_RDWR|O_CREAT, 0600);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(ftruncate(fd, page_size), 0);
+ if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) {
+ SKIP(goto out, "Failed to read uprobe sysfs file, skipping");
+ }
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = (__u64)(long)probe_file;
+ attr.config2 = 0x0;
+
+ ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
+
+ ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
+ MAP_PRIVATE | MAP_FIXED, fd, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ ptr2 = mremap(ptr1, page_size, 2 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_NE(mremap(ptr2, page_size, page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
+
+out:
+ close(fd);
+ remove(probe_file);
+}
+
+TEST_F(merge, ksm_merge)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2;
+ int err;
+
+ /*
+ * Map two R/W immediately adjacent to one another, they should
+ * trivially merge:
+ *
+ * |-----------|-----------|
+ * | R/W | R/W |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+
+ ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+
+ /* Unmap the second half of this merged VMA. */
+ ASSERT_EQ(munmap(ptr2, page_size), 0);
+
+ /* OK, now enable global KSM merge. We clear this on test teardown. */
+ err = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (err == -1) {
+ int errnum = errno;
+
+ /* Only non-failure case... */
+ ASSERT_EQ(errnum, EINVAL);
+ /* ...but indicates we should skip. */
+ SKIP(return, "KSM memory merging not supported, skipping.");
+ }
+
+ /*
+ * Now map a VMA adjacent to the existing that was just made
+ * VM_MERGEABLE, this should merge as well.
+ */
+ ptr2 = mmap(&carveout[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+
+ /* Now this VMA altogether. */
+ ASSERT_EQ(munmap(ptr, 2 * page_size), 0);
+
+ /* Try the same operation as before, asserting this also merges fine. */
+ ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_to_faulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2;
+
+ /*
+ * Map two distinct areas:
+ *
+ * |-----------| |-----------|
+ * | unfaulted | | unfaulted |
+ * |-----------| |-----------|
+ * ptr ptr2
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Offset ptr2 further away. */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Fault in ptr:
+ * \
+ * |-----------| / |-----------|
+ * | faulted | \ | unfaulted |
+ * |-----------| / |-----------|
+ * ptr \ ptr2
+ */
+ ptr[0] = 'x';
+
+ /*
+ * Now move ptr2 adjacent to ptr:
+ *
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ *
+ * It should merge:
+ *
+ * |----------------------|
+ * | faulted |
+ * |----------------------|
+ * ptr
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_behind_faulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2;
+
+ /*
+ * Map two distinct areas:
+ *
+ * |-----------| |-----------|
+ * | unfaulted | | unfaulted |
+ * |-----------| |-----------|
+ * ptr ptr2
+ */
+ ptr = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Offset ptr2 further away. */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Fault in ptr:
+ * \
+ * |-----------| / |-----------|
+ * | faulted | \ | unfaulted |
+ * |-----------| / |-----------|
+ * ptr \ ptr2
+ */
+ ptr[0] = 'x';
+
+ /*
+ * Now move ptr2 adjacent, but behind, ptr:
+ *
+ * |-----------|-----------|
+ * | unfaulted | faulted |
+ * |-----------|-----------|
+ * ptr2 ptr
+ *
+ * It should merge:
+ *
+ * |----------------------|
+ * | faulted |
+ * |----------------------|
+ * ptr2
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_faulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2, *ptr3;
+
+ /*
+ * Map three distinct areas:
+ *
+ * |-----------| |-----------| |-----------|
+ * | unfaulted | | unfaulted | | unfaulted |
+ * |-----------| |-----------| |-----------|
+ * ptr ptr2 ptr3
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Offset ptr3 further away. */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Offset ptr2 further away. */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Fault in ptr, ptr3:
+ * \ \
+ * |-----------| / |-----------| / |-----------|
+ * | faulted | \ | unfaulted | \ | faulted |
+ * |-----------| / |-----------| / |-----------|
+ * ptr \ ptr2 \ ptr3
+ */
+ ptr[0] = 'x';
+ ptr3[0] = 'x';
+
+ /*
+ * Move ptr3 back into place, leaving a place for ptr2:
+ * \
+ * |-----------| |-----------| / |-----------|
+ * | faulted | | faulted | \ | unfaulted |
+ * |-----------| |-----------| / |-----------|
+ * ptr ptr3 \ ptr2
+ */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /*
+ * Finally, move ptr2 into place:
+ *
+ * |-----------|-----------|-----------|
+ * | faulted | unfaulted | faulted |
+ * |-----------|-----------|-----------|
+ * ptr ptr2 ptr3
+ *
+ * It should merge, but only ptr, ptr2:
+ *
+ * |-----------------------|-----------|
+ * | faulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr3));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr3);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr3 + 5 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_faulted_unfaulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2, *ptr3;
+
+ /*
+ * Map three distinct areas:
+ *
+ * |-----------| |-----------| |-----------|
+ * | unfaulted | | unfaulted | | unfaulted |
+ * |-----------| |-----------| |-----------|
+ * ptr ptr2 ptr3
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Offset ptr3 further away. */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+
+ /* Offset ptr2 further away. */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Fault in ptr:
+ * \ \
+ * |-----------| / |-----------| / |-----------|
+ * | faulted | \ | unfaulted | \ | unfaulted |
+ * |-----------| / |-----------| / |-----------|
+ * ptr \ ptr2 \ ptr3
+ */
+ ptr[0] = 'x';
+
+ /*
+ * Move ptr3 back into place, leaving a place for ptr2:
+ * \
+ * |-----------| |-----------| / |-----------|
+ * | faulted | | unfaulted | \ | unfaulted |
+ * |-----------| |-----------| / |-----------|
+ * ptr ptr3 \ ptr2
+ */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /*
+ * Finally, move ptr2 into place:
+ *
+ * |-----------|-----------|-----------|
+ * | faulted | unfaulted | unfaulted |
+ * |-----------|-----------|-----------|
+ * ptr ptr2 ptr3
+ *
+ * It should merge:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_correctly_placed_faulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2;
+
+ /*
+ * Map one larger area:
+ *
+ * |-----------------------------------|
+ * | unfaulted |
+ * |-----------------------------------|
+ */
+ ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in ptr:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ */
+ ptr[0] = 'x';
+
+ /*
+ * Unmap middle:
+ *
+ * |-----------| |-----------|
+ * | faulted | | faulted |
+ * |-----------| |-----------|
+ *
+ * Now the faulted areas are compatible with each other (anon_vma the
+ * same, vma->vm_pgoff equal to virtual page offset).
+ */
+ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /*
+ * Map a new area, ptr2:
+ * \
+ * |-----------| |-----------| / |-----------|
+ * | faulted | | faulted | \ | unfaulted |
+ * |-----------| |-----------| / |-----------|
+ * ptr \ ptr2
+ */
+ ptr2 = mmap(&carveout[20 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Finally, move ptr2 into place:
+ *
+ * |-----------|-----------|-----------|
+ * | faulted | unfaulted | faulted |
+ * |-----------|-----------|-----------|
+ * ptr ptr2 ptr3
+ *
+ * It should merge:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
+TEST_F(merge, mremap_correct_placed_faulted)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr, *ptr2, *ptr3;
+
+ /*
+ * Map one larger area:
+ *
+ * |-----------------------------------|
+ * | unfaulted |
+ * |-----------------------------------|
+ */
+ ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in ptr:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ */
+ ptr[0] = 'x';
+
+ /*
+ * Offset the final and middle 5 pages further away:
+ * \ \
+ * |-----------| / |-----------| / |-----------|
+ * | faulted | \ | faulted | \ | faulted |
+ * |-----------| / |-----------| / |-----------|
+ * ptr \ ptr2 \ ptr3
+ */
+ ptr3 = &ptr[10 * page_size];
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+ ASSERT_NE(ptr3, MAP_FAILED);
+ ptr2 = &ptr[5 * page_size];
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Move ptr2 into its correct place:
+ * \
+ * |-----------|-----------| / |-----------|
+ * | faulted | faulted | \ | faulted |
+ * |-----------|-----------| / |-----------|
+ * ptr ptr2 \ ptr3
+ *
+ * It should merge:
+ * \
+ * |-----------------------| / |-----------|
+ * | faulted | \ | faulted |
+ * |-----------------------| / |-----------|
+ * ptr \ ptr3
+ */
+
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+ /*
+ * Now move ptr out of place:
+ * \ \
+ * |-----------| / |-----------| / |-----------|
+ * | faulted | \ | faulted | \ | faulted |
+ * |-----------| / |-----------| / |-----------|
+ * ptr2 \ ptr \ ptr3
+ */
+ ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Now move ptr back into place:
+ * \
+ * |-----------|-----------| / |-----------|
+ * | faulted | faulted | \ | faulted |
+ * |-----------|-----------| / |-----------|
+ * ptr ptr2 \ ptr3
+ *
+ * It should merge:
+ * \
+ * |-----------------------| / |-----------|
+ * | faulted | \ | faulted |
+ * |-----------------------| / |-----------|
+ * ptr \ ptr3
+ */
+ ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+ /*
+ * Now move ptr out of place again:
+ * \ \
+ * |-----------| / |-----------| / |-----------|
+ * | faulted | \ | faulted | \ | faulted |
+ * |-----------| / |-----------| / |-----------|
+ * ptr2 \ ptr \ ptr3
+ */
+ ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Now move ptr3 back into place:
+ * \
+ * |-----------|-----------| / |-----------|
+ * | faulted | faulted | \ | faulted |
+ * |-----------|-----------| / |-----------|
+ * ptr2 ptr3 \ ptr
+ *
+ * It should merge:
+ * \
+ * |-----------------------| / |-----------|
+ * | faulted | \ | faulted |
+ * |-----------------------| / |-----------|
+ * ptr2 \ ptr
+ */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr2[5 * page_size]);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size);
+
+ /*
+ * Now move ptr back into place:
+ *
+ * |-----------|-----------------------|
+ * | faulted | faulted |
+ * |-----------|-----------------------|
+ * ptr ptr2
+ *
+ * It should merge:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ * ptr
+ */
+ ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+
+ /*
+ * Now move ptr2 out of the way:
+ * \
+ * |-----------| |-----------| / |-----------|
+ * | faulted | | faulted | \ | faulted |
+ * |-----------| |-----------| / |-----------|
+ * ptr ptr3 \ ptr2
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Now move it back:
+ *
+ * |-----------|-----------|-----------|
+ * | faulted | faulted | faulted |
+ * |-----------|-----------|-----------|
+ * ptr ptr2 ptr3
+ *
+ * It should merge:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ * ptr
+ */
+ ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+
+ /*
+ * Move ptr3 out of place:
+ * \
+ * |-----------------------| / |-----------|
+ * | faulted | \ | faulted |
+ * |-----------------------| / |-----------|
+ * ptr \ ptr3
+ */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 1000);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /*
+ * Now move it back:
+ *
+ * |-----------|-----------|-----------|
+ * | faulted | faulted | faulted |
+ * |-----------|-----------|-----------|
+ * ptr ptr2 ptr3
+ *
+ * It should merge:
+ *
+ * |-----------------------------------|
+ * | faulted |
+ * |-----------------------------------|
+ * ptr
+ */
+ ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
new file mode 100644
index 000000000000..ee24b88c2b24
--- /dev/null
+++ b/tools/testing/selftests/mm/migration.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The main purpose of the tests here is to exercise the migration entry code
+ * paths in the kernel.
+ */
+
+#include "kselftest_harness.h"
+#include "thp_settings.h"
+
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+#include "vm_util.h"
+
+#define TWOMEG (2<<20)
+#define RUNTIME (20)
+#define MAX_RETRIES 100
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(migration)
+{
+ pthread_t *threads;
+ pid_t *pids;
+ int nthreads;
+ int n1;
+ int n2;
+};
+
+FIXTURE_SETUP(migration)
+{
+ int n;
+
+ ASSERT_EQ(numa_available(), 0);
+ self->nthreads = numa_num_task_cpus() - 1;
+ self->n1 = -1;
+ self->n2 = -1;
+
+ for (n = 0; n < numa_max_possible_node(); n++)
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+ if (self->n1 == -1) {
+ self->n1 = n;
+ } else {
+ self->n2 = n;
+ break;
+ }
+ }
+
+ self->threads = malloc(self->nthreads * sizeof(*self->threads));
+ ASSERT_NE(self->threads, NULL);
+ self->pids = malloc(self->nthreads * sizeof(*self->pids));
+ ASSERT_NE(self->pids, NULL);
+};
+
+FIXTURE_TEARDOWN(migration)
+{
+ free(self->threads);
+ free(self->pids);
+}
+
+int migrate(uint64_t *ptr, int n1, int n2)
+{
+ int ret, tmp;
+ int status = 0;
+ struct timespec ts1, ts2;
+ int failures = 0;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts1))
+ return -1;
+
+ while (1) {
+ if (clock_gettime(CLOCK_MONOTONIC, &ts2))
+ return -1;
+
+ if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
+ return 0;
+
+ ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
+ MPOL_MF_MOVE_ALL);
+ if (ret) {
+ if (ret > 0) {
+ /* Migration is best effort; try again */
+ if (++failures < MAX_RETRIES)
+ continue;
+ printf("Didn't migrate %d pages\n", ret);
+ }
+ else
+ perror("Couldn't migrate pages");
+ return -2;
+ }
+ failures = 0;
+ tmp = n2;
+ n2 = n1;
+ n1 = tmp;
+ }
+
+ return 0;
+}
+
+void *access_mem(void *ptr)
+{
+ while (1) {
+ pthread_testcancel();
+ /* Force a read from the memory pointed to by ptr. This ensures
+ * the memory access actually happens and prevents the compiler
+ * from optimizing away this entire loop.
+ */
+ FORCE_READ(*(uint64_t *)ptr);
+ }
+
+ return NULL;
+}
+
+/*
+ * Basic migration entry testing. One thread will move pages back and forth
+ * between nodes whilst other threads try and access them triggering the
+ * migration entry wait paths in the kernel.
+ */
+TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * Same as the previous test but with shared memory.
+ */
+TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * Tests the pmd migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (!thp_is_enabled())
+ SKIP(return, "Transparent Hugepages not available");
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * migration test with shared anon THP page
+ */
+
+TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (!thp_is_enabled())
+ SKIP(return, "Transparent Hugepages not available");
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * migration test with private anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * migration test with shared anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
new file mode 100644
index 000000000000..68dd447a5454
--- /dev/null
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test handling of code that might set PTE/PMD dirty in read-only VMAs.
+ * Setting a PTE/PMD dirty must not accidentally set the PTE/PMD writable.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <setjmp.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <linux/mempolicy.h>
+
+#include "kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static size_t thpsize;
+static int mem_fd;
+static int pagemap_fd;
+static sigjmp_buf env;
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGSEGV)
+ siglongjmp(env, 1);
+ siglongjmp(env, 2);
+}
+
+static void do_test_write_sigsegv(char *mem)
+{
+ char orig = *mem;
+ int ret;
+
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR) {
+ ksft_test_result_fail("signal() failed\n");
+ return;
+ }
+
+ ret = sigsetjmp(env, 1);
+ if (!ret)
+ *mem = orig + 1;
+
+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
+ ksft_test_result_fail("signal() failed\n");
+
+ ksft_test_result(ret == 1 && *mem == orig,
+ "SIGSEGV generated, page not modified\n");
+}
+
+static char *mmap_thp_range(int prot, char **_mmap_mem, size_t *_mmap_size)
+{
+ const size_t mmap_size = 2 * thpsize;
+ char *mem, *mmap_mem;
+
+ mmap_mem = mmap(NULL, mmap_size, prot, MAP_PRIVATE|MAP_ANON,
+ -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return MAP_FAILED;
+ }
+ mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+
+ if (madvise(mem, thpsize, MADV_HUGEPAGE)) {
+ ksft_test_result_skip("MADV_HUGEPAGE failed\n");
+ munmap(mmap_mem, mmap_size);
+ return MAP_FAILED;
+ }
+
+ *_mmap_mem = mmap_mem;
+ *_mmap_size = mmap_size;
+ return mem;
+}
+
+static void test_ptrace_write(void)
+{
+ char data = 1;
+ char *mem;
+ int ret;
+
+ ksft_print_msg("[INFO] PTRACE write access\n");
+
+ mem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ /* Fault in the shared zeropage. */
+ if (*mem != 0) {
+ ksft_test_result_fail("Memory not zero\n");
+ goto munmap;
+ }
+
+ /*
+ * Unshare the page (populating a fresh anon page that might be set
+ * dirty in the PTE) in the read-only VMA using ptrace (FOLL_FORCE).
+ */
+ lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
+ ret = write(mem_fd, &data, 1);
+ if (ret != 1 || *mem != data) {
+ ksft_test_result_fail("write() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void test_ptrace_write_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+ char data = 1;
+ int ret;
+
+ ksft_print_msg("[INFO] PTRACE write access to THP\n");
+
+ mem = mmap_thp_range(PROT_READ, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first subpage in the read-only VMA using
+ * ptrace(FOLL_FORCE), eventually placing a fresh THP that is marked
+ * dirty in the PMD.
+ */
+ lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
+ ret = write(mem_fd, &data, 1);
+ if (ret != 1 || *mem != data) {
+ ksft_test_result_fail("write() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+static void test_page_migration(void)
+{
+ char *mem;
+
+ ksft_print_msg("[INFO] Page migration\n");
+
+ mem = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON,
+ -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ /* Populate a fresh page and dirty it. */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, pagesize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* Trigger page migration. Might not be available or fail. */
+ if (syscall(__NR_mbind, mem, pagesize, MPOL_LOCAL, NULL, 0x7fful,
+ MPOL_MF_MOVE)) {
+ ksft_test_result_skip("mbind() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void test_page_migration_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+
+ ksft_print_msg("[INFO] Page migration of THP\n");
+
+ mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first page, which might populate a fresh anon THP
+ * and dirty it.
+ */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, thpsize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ /* Trigger page migration. Might not be available or fail. */
+ if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful,
+ MPOL_MF_MOVE)) {
+ ksft_test_result_skip("mbind() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+static void test_pte_mapped_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+
+ ksft_print_msg("[INFO] PTE-mapping a THP\n");
+
+ mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first page, which might populate a fresh anon THP
+ * and dirty it.
+ */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, thpsize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ /* Trigger PTE-mapping the THP by mprotect'ing the last subpage. */
+ if (mprotect(mem + thpsize - pagesize, pagesize,
+ PROT_READ|PROT_WRITE)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+#ifdef __NR_userfaultfd
+static void test_uffdio_copy(void)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_copy uffdio_copy;
+ struct uffdio_api uffdio_api;
+ char *dst, *src;
+ int uffd;
+
+ ksft_print_msg("[INFO] UFFDIO_COPY\n");
+
+ src = malloc(pagesize);
+ memset(src, 1, pagesize);
+ dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (dst == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ free(src);
+ return;
+ }
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_skip("__NR_userfaultfd failed\n");
+ goto munmap;
+ }
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ ksft_test_result_fail("UFFDIO_API failed\n");
+ goto close_uffd;
+ }
+
+ uffdio_register.range.start = (unsigned long) dst;
+ uffdio_register.range.len = pagesize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ ksft_test_result_fail("UFFDIO_REGISTER failed\n");
+ goto close_uffd;
+ }
+
+ /* Place a page in a read-only VMA, which might set the PTE dirty. */
+ uffdio_copy.dst = (unsigned long) dst;
+ uffdio_copy.src = (unsigned long) src;
+ uffdio_copy.len = pagesize;
+ uffdio_copy.mode = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+ ksft_test_result_fail("UFFDIO_COPY failed\n");
+ goto close_uffd;
+ }
+
+ do_test_write_sigsegv(dst);
+close_uffd:
+ close(uffd);
+munmap:
+ munmap(dst, pagesize);
+ free(src);
+}
+#endif /* __NR_userfaultfd */
+
+int main(void)
+{
+ int err, tests = 2;
+
+ pagesize = getpagesize();
+ thpsize = read_pmd_pagesize();
+ if (thpsize) {
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
+ thpsize / 1024);
+ tests += 3;
+ }
+#ifdef __NR_userfaultfd
+ tests += 1;
+#endif /* __NR_userfaultfd */
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/pagemap failed\n");
+
+ /*
+ * On some ptrace(FOLL_FORCE) write access via /proc/self/mem in
+ * read-only VMAs, the kernel may set the PTE/PMD dirty.
+ */
+ test_ptrace_write();
+ if (thpsize)
+ test_ptrace_write_thp();
+ /*
+ * On page migration, the kernel may set the PTE/PMD dirty when
+ * remapping the page.
+ */
+ test_page_migration();
+ if (thpsize)
+ test_page_migration_thp();
+ /* PTE-mapping a THP might propagate the dirty PMD bit to the PTEs. */
+ if (thpsize)
+ test_pte_mapped_thp();
+ /* Placing a fresh page via userfaultfd may set the PTE dirty. */
+#ifdef __NR_userfaultfd
+ test_uffdio_copy();
+#endif /* __NR_userfaultfd */
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c
index ff4d72eb74b9..9d349c151360 100644
--- a/tools/testing/selftests/vm/mlock-random-test.c
+++ b/tools/testing/selftests/mm/mlock-random-test.c
@@ -7,11 +7,13 @@
#include <sys/resource.h>
#include <sys/capability.h>
#include <sys/mman.h>
+#include <linux/mman.h>
#include <fcntl.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <time.h>
+#include "kselftest.h"
#include "mlock2.h"
#define CHUNK_UNIT (128 * 1024)
@@ -30,14 +32,14 @@ int set_cap_limits(rlim_t max)
new.rlim_cur = max;
new.rlim_max = max;
if (setrlimit(RLIMIT_MEMLOCK, &new)) {
- perror("setrlimit() returns error\n");
+ ksft_perror("setrlimit() returns error\n");
return -1;
}
/* drop capabilities including CAP_IPC_LOCK */
if (cap_set_proc(cap)) {
- perror("cap_set_proc() returns error\n");
- return -2;
+ ksft_perror("cap_set_proc() returns error\n");
+ return -1;
}
return 0;
@@ -51,27 +53,24 @@ int get_proc_locked_vm_size(void)
unsigned long lock_size = 0;
f = fopen("/proc/self/status", "r");
- if (!f) {
- perror("fopen");
- return -1;
- }
+ if (!f)
+ ksft_exit_fail_msg("fopen: %s\n", strerror(errno));
while (fgets(line, 1024, f)) {
if (strstr(line, "VmLck")) {
ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
if (ret <= 0) {
- printf("sscanf() on VmLck error: %s: %d\n",
- line, ret);
fclose(f);
- return -1;
+ ksft_exit_fail_msg("sscanf() on VmLck error: %s: %d\n",
+ line, ret);
}
fclose(f);
return (int)(lock_size << 10);
}
}
- perror("cann't parse VmLck in /proc/self/status\n");
fclose(f);
+ ksft_exit_fail_msg("cannot parse VmLck in /proc/self/status: %s\n", strerror(errno));
return -1;
}
@@ -90,10 +89,8 @@ int get_proc_page_size(unsigned long addr)
size_t size;
smaps = seek_to_smaps_entry(addr);
- if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
- return 0;
- }
+ if (!smaps)
+ ksft_exit_fail_msg("Unable to parse /proc/self/smaps\n");
while (getline(&line, &size, smaps) > 0) {
if (!strstr(line, "MMUPageSize")) {
@@ -104,12 +101,9 @@ int get_proc_page_size(unsigned long addr)
}
/* found the MMUPageSize of this section */
- if (sscanf(line, "MMUPageSize: %8lu kB",
- &mmupage_size) < 1) {
- printf("Unable to parse smaps entry for Size:%s\n",
- line);
- break;
- }
+ if (sscanf(line, "MMUPageSize: %8lu kB", &mmupage_size) < 1)
+ ksft_exit_fail_msg("Unable to parse smaps entry for Size:%s\n",
+ line);
}
free(line);
@@ -135,7 +129,7 @@ int get_proc_page_size(unsigned long addr)
* return value: 0 - success
* else: failure
*/
-int test_mlock_within_limit(char *p, int alloc_size)
+static void test_mlock_within_limit(char *p, int alloc_size)
{
int i;
int ret = 0;
@@ -144,11 +138,9 @@ int test_mlock_within_limit(char *p, int alloc_size)
int page_size = 0;
getrlimit(RLIMIT_MEMLOCK, &cur);
- if (cur.rlim_cur < alloc_size) {
- printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
- alloc_size, (unsigned int)cur.rlim_cur);
- return -1;
- }
+ if (cur.rlim_cur < alloc_size)
+ ksft_exit_fail_msg("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
srand(time(NULL));
for (i = 0; i < TEST_LOOP; i++) {
@@ -168,13 +160,11 @@ int test_mlock_within_limit(char *p, int alloc_size)
ret = mlock2_(p + start_offset, lock_size,
MLOCK_ONFAULT);
- if (ret) {
- printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
- is_mlock ? "mlock" : "mlock2",
- p, alloc_size,
- p + start_offset, lock_size);
- return ret;
- }
+ if (ret)
+ ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n",
+ is_mlock ? "mlock" : "mlock2",
+ strerror(errno), p, alloc_size,
+ p + start_offset, lock_size);
}
/*
@@ -182,18 +172,12 @@ int test_mlock_within_limit(char *p, int alloc_size)
*/
locked_vm_size = get_proc_locked_vm_size();
page_size = get_proc_page_size((unsigned long)p);
- if (page_size == 0) {
- printf("cannot get proc MMUPageSize\n");
- return -1;
- }
- if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
- printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
- locked_vm_size, alloc_size);
- return -1;
- }
+ if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size)
+ ksft_exit_fail_msg("%s left VmLck:%d on %d chunk\n",
+ __func__, locked_vm_size, alloc_size);
- return 0;
+ ksft_test_result_pass("%s\n", __func__);
}
@@ -212,7 +196,7 @@ int test_mlock_within_limit(char *p, int alloc_size)
* return value: 0 - success
* else: failure
*/
-int test_mlock_outof_limit(char *p, int alloc_size)
+static void test_mlock_outof_limit(char *p, int alloc_size)
{
int i;
int ret = 0;
@@ -220,11 +204,9 @@ int test_mlock_outof_limit(char *p, int alloc_size)
struct rlimit cur;
getrlimit(RLIMIT_MEMLOCK, &cur);
- if (cur.rlim_cur >= alloc_size) {
- printf("alloc_size[%d] >%u rlimit, violates test condition\n",
- alloc_size, (unsigned int)cur.rlim_cur);
- return -1;
- }
+ if (cur.rlim_cur >= alloc_size)
+ ksft_exit_fail_msg("alloc_size[%d] >%u rlimit, violates test condition\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
old_locked_vm_size = get_proc_locked_vm_size();
srand(time(NULL));
@@ -239,56 +221,47 @@ int test_mlock_outof_limit(char *p, int alloc_size)
else
ret = mlock2_(p + start_offset, lock_size,
MLOCK_ONFAULT);
- if (ret == 0) {
- printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
- is_mlock ? "mlock" : "mlock2",
- p, alloc_size,
- p + start_offset, lock_size);
- return -1;
- }
+ if (ret == 0)
+ ksft_exit_fail_msg("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size, p + start_offset, lock_size);
}
locked_vm_size = get_proc_locked_vm_size();
- if (locked_vm_size != old_locked_vm_size) {
- printf("tests leads to new mlocked page: old[%d], new[%d]\n",
- old_locked_vm_size,
- locked_vm_size);
- return -1;
- }
+ if (locked_vm_size != old_locked_vm_size)
+ ksft_exit_fail_msg("tests leads to new mlocked page: old[%d], new[%d]\n",
+ old_locked_vm_size,
+ locked_vm_size);
- return 0;
+ ksft_test_result_pass("%s\n", __func__);
}
int main(int argc, char **argv)
{
char *p = NULL;
- int ret = 0;
+
+ ksft_print_header();
if (set_cap_limits(MLOCK_RLIMIT_SIZE))
- return -1;
+ ksft_finished();
+
+ ksft_set_plan(2);
p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
- if (p == NULL) {
- perror("malloc() failure\n");
- return -1;
- }
- ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
- if (ret)
- return ret;
+ if (p == NULL)
+ ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+ test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
free(p);
-
p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
- if (p == NULL) {
- perror("malloc() failure\n");
- return -1;
- }
- ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
- if (ret)
- return ret;
+ if (p == NULL)
+ ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+ test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
free(p);
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 11b2301f3aa3..b474f2b20def 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -7,10 +7,9 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <stdbool.h>
+#include "kselftest.h"
#include "mlock2.h"
-#include "../kselftest.h"
-
struct vm_boundaries {
unsigned long start;
unsigned long end;
@@ -21,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
FILE *file;
int ret = 1;
char line[1024] = {0};
- char *end_addr;
- char *stop;
unsigned long start;
unsigned long end;
@@ -38,22 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
memset(area, 0, sizeof(struct vm_boundaries));
while(fgets(line, 1024, file)) {
- end_addr = strchr(line, '-');
- if (!end_addr) {
- printf("cannot parse /proc/self/maps\n");
- goto out;
- }
- *end_addr = '\0';
- end_addr++;
- stop = strchr(end_addr, ' ');
- if (!stop) {
- printf("cannot parse /proc/self/maps\n");
+ if (sscanf(line, "%lx-%lx", &start, &end) != 2) {
+ ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
- stop = '\0';
-
- sscanf(line, "%lx", &start);
- sscanf(end_addr, "%lx", &end);
if (start <= addr && end > addr) {
area->start = start;
@@ -79,7 +64,7 @@ static bool is_vmflag_set(unsigned long addr, const char *vmflag)
smaps = seek_to_smaps_entry(addr);
if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
+ ksft_print_msg("Unable to parse /proc/self/smaps\n");
goto out;
}
@@ -116,7 +101,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
smaps = seek_to_smaps_entry(addr);
if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
+ ksft_print_msg("Unable to parse /proc/self/smaps\n");
goto out;
}
@@ -130,7 +115,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
value_ptr = line + strlen(name);
if (sscanf(value_ptr, "%lu kB", &value) < 1) {
- printf("Unable to parse smaps entry for Size\n");
+ ksft_print_msg("Unable to parse smaps entry for Size\n");
goto out;
}
break;
@@ -181,57 +166,45 @@ static int lock_check(unsigned long addr)
static int unlock_lock_check(char *map)
{
if (is_vmflag_set((unsigned long)map, LOCKED)) {
- printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+ ksft_print_msg("VMA flag %s is present on page 1 after unlock\n", LOCKED);
return 1;
}
return 0;
}
-static int test_mlock_lock()
+static void test_mlock_lock(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlock2_(map, 2 * page_size, 0)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(0)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlock2(0): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
/* Now unlock and recheck attributes */
if (munlock(map, 2 * page_size)) {
- perror("munlock()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ret = unlock_lock_check(map);
-
-unmap:
+ ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__);
munmap(map, 2 * page_size);
-out:
- return ret;
}
static int onfault_check(char *map)
{
*map = 'a';
if (!is_vma_lock_on_fault((unsigned long)map)) {
- printf("VMA is not marked for lock on fault\n");
+ ksft_print_msg("VMA is not marked for lock on fault\n");
return 1;
}
@@ -244,172 +217,131 @@ static int unlock_onfault_check(char *map)
if (is_vma_lock_on_fault((unsigned long)map) ||
is_vma_lock_on_fault((unsigned long)map + page_size)) {
- printf("VMA is still lock on fault after unlock\n");
+ ksft_print_msg("VMA is still lock on fault after unlock\n");
return 1;
}
return 0;
}
-static int test_mlock_onfault()
+static void test_mlock_onfault(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(MLOCK_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlock2(MLOCK_ONFAULT): %s\n", strerror(errno));
}
- if (onfault_check(map))
- goto unmap;
+ ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
/* Now unlock and recheck attributes */
if (munlock(map, 2 * page_size)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("munlock()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ret = unlock_onfault_check(map);
-unmap:
+ ksft_test_result(!unlock_onfault_check(map), "VMA open lock after fault\n");
munmap(map, 2 * page_size);
-out:
- return ret;
}
-static int test_lock_onfault_of_present()
+static void test_lock_onfault_of_present(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
*map = 'a';
if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(MLOCK_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_test_result_fail("mlock2(MLOCK_ONFAULT) error: %s", strerror(errno));
}
- if (!is_vma_lock_on_fault((unsigned long)map) ||
- !is_vma_lock_on_fault((unsigned long)map + page_size)) {
- printf("VMA with present pages is not marked lock on fault\n");
- goto unmap;
- }
- ret = 0;
-unmap:
+ ksft_test_result(is_vma_lock_on_fault((unsigned long)map) ||
+ is_vma_lock_on_fault((unsigned long)map + page_size),
+ "VMA with present pages is not marked lock on fault\n");
munmap(map, 2 * page_size);
-out:
- return ret;
}
-static int test_munlockall()
+static void test_munlockall0(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- if (map == MAP_FAILED) {
- perror("test_munlockall mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s\n", strerror(errno));
if (mlockall(MCL_CURRENT)) {
- perror("mlockall(MCL_CURRENT)");
- goto out;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked memory area\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
}
- if (unlock_lock_check(map))
- goto unmap;
-
+ ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
munmap(map, 2 * page_size);
+}
+
+static void test_munlockall1(void)
+{
+ char *map;
+ unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- if (map == MAP_FAILED) {
- perror("test_munlockall second mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
- perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_ONFAULT): %s\n", strerror(errno));
}
- if (onfault_check(map))
- goto unmap;
+ ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
}
- if (unlock_onfault_check(map))
- goto unmap;
+ ksft_test_result(!unlock_onfault_check(map), "%s: Unlocked\n", __func__);
if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
- perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
- goto out;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_FUTURE): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall() %s\n", strerror(errno));
}
- ret = unlock_lock_check(map);
-
-unmap:
+ ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
munmap(map, 2 * page_size);
-out:
- munlockall();
- return ret;
}
-static int test_vma_management(bool call_mlock)
+static void test_vma_management(bool call_mlock)
{
- int ret = 1;
void *map;
unsigned long page_size = getpagesize();
struct vm_boundaries page1;
@@ -418,25 +350,19 @@ static int test_vma_management(bool call_mlock)
map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("mmap()");
- return ret;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock(ONFAULT)\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("mlock error: %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/*
@@ -445,76 +371,86 @@ static int test_vma_management(bool call_mlock)
* not a failure)
*/
if (page1.start != page2.start || page2.start != page3.start) {
- printf("VMAs are not merged to start, aborting test\n");
- ret = 0;
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("VMAs are not merged to start, aborting test");
}
if (munlock(map + page_size, page_size)) {
- perror("munlock()");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("munlock(): %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/* All three VMAs should be different */
if (page1.start == page2.start || page2.start == page3.start) {
- printf("failed to split VMA for munlock\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("failed to split VMA for munlock");
}
/* Now unlock the first and third page and check the VMAs again */
if (munlock(map, page_size * 3)) {
- perror("munlock()");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("munlock(): %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/* Now all three VMAs should be the same */
if (page1.start != page2.start || page2.start != page3.start) {
- printf("failed to merge VMAs after munlock\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("failed to merge VMAs after munlock");
}
- ret = 0;
-out:
+ ksft_test_result_pass("%s call_mlock %d\n", __func__, call_mlock);
munmap(map, 3 * page_size);
- return ret;
}
-static int test_mlockall(int (test_function)(bool call_mlock))
+static void test_mlockall(void)
{
- int ret = 1;
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE))
+ ksft_exit_fail_msg("mlockall failed: %s\n", strerror(errno));
- if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
- perror("mlockall");
- return ret;
- }
-
- ret = test_function(false);
+ test_vma_management(false);
munlockall();
- return ret;
}
int main(int argc, char **argv)
{
- int ret = 0;
- ret += test_mlock_lock();
- ret += test_mlock_onfault();
- ret += test_munlockall();
- ret += test_lock_onfault_of_present();
- ret += test_vma_management(true);
- ret += test_mlockall(test_vma_management);
- return ret;
+ int ret, size = 3 * getpagesize();
+ void *map;
+
+ ksft_print_header();
+
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
+
+ ret = mlock2_(map, size, MLOCK_ONFAULT);
+ if (ret && errno == ENOSYS)
+ ksft_finished();
+
+ munmap(map, size);
+
+ ksft_set_plan(13);
+
+ test_mlock_lock();
+ test_mlock_onfault();
+ test_munlockall0();
+ test_munlockall1();
+ test_lock_onfault_of_present();
+ test_vma_management(true);
+ test_mlockall();
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 2a6e76c226bc..81e77fa41901 100644
--- a/tools/testing/selftests/vm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -4,22 +4,15 @@
#include <stdio.h>
#include <stdlib.h>
-#ifndef MLOCK_ONFAULT
-#define MLOCK_ONFAULT 1
-#endif
-
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
static int mlock2_(void *start, size_t len, int flags)
{
-#ifdef __NR_mlock2
- return syscall(__NR_mlock2, start, len, flags);
-#else
- errno = ENOSYS;
- return -1;
-#endif
+ int ret = syscall(__NR_mlock2, start, len, flags);
+
+ if (ret) {
+ errno = ret;
+ return -1;
+ }
+ return 0;
}
static FILE *seek_to_smaps_entry(unsigned long addr)
@@ -35,10 +28,8 @@ static FILE *seek_to_smaps_entry(unsigned long addr)
char path[BUFSIZ];
file = fopen("/proc/self/smaps", "r");
- if (!file) {
- perror("fopen smaps");
- _exit(1);
- }
+ if (!file)
+ ksft_exit_fail_msg("fopen smaps: %s\n", strerror(errno));
while (getline(&line, &size, file) > 0) {
if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c
new file mode 100644
index 000000000000..64e8d00ae944
--- /dev/null
+++ b/tools/testing/selftests/mm/mrelease_test.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 Google LLC
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <asm-generic/unistd.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+#define MB(x) (x << 20)
+#define MAX_SIZE_MB 1024
+
+static int alloc_noexit(unsigned long nr_pages, int pipefd)
+{
+ int ppid = getppid();
+ int timeout = 10; /* 10sec timeout to get killed */
+ unsigned long i;
+ char *buf;
+
+ buf = (char *)mmap(NULL, nr_pages * psize(), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, 0, 0);
+ if (buf == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed, halting the test: %s\n", strerror(errno));
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(buf + (i * psize()))) = i;
+
+ /* Signal the parent that the child is ready */
+ if (write(pipefd, "", 1) < 0)
+ ksft_exit_fail_msg("write: %s\n", strerror(errno));
+
+ /* Wait to be killed (when reparenting happens) */
+ while (getppid() == ppid && timeout > 0) {
+ sleep(1);
+ timeout--;
+ }
+
+ munmap(buf, nr_pages * psize());
+
+ return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
+}
+
+/* The process_mrelease calls in this test are expected to fail */
+static void run_negative_tests(int pidfd)
+{
+ /* Test invalid flags. Expect to fail with EINVAL error code. */
+ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
+ errno != EINVAL) {
+ ksft_exit_fail_msg("process_mrelease with wrong flags: %s\n", strerror(errno));
+ }
+ /*
+ * Test reaping while process is alive with no pending SIGKILL.
+ * Expect to fail with EINVAL error code.
+ */
+ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL)
+ ksft_exit_fail_msg("process_mrelease on a live process: %s\n", strerror(errno));
+}
+
+static int child_main(int pipefd[], size_t size)
+{
+ int res;
+
+ /* Allocate and fault-in memory and wait to be killed */
+ close(pipefd[0]);
+ res = alloc_noexit(MB(size) / psize(), pipefd[1]);
+ close(pipefd[1]);
+ return res;
+}
+
+int main(void)
+{
+ int pipefd[2], pidfd;
+ bool success, retry;
+ size_t size;
+ pid_t pid;
+ char byte;
+ int res;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ /* Test a wrong pidfd */
+ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+ if (errno == ENOSYS) {
+ ksft_test_result_skip("process_mrelease not implemented\n");
+ ksft_finished();
+ } else {
+ ksft_exit_fail_msg("process_mrelease with wrong pidfd: %s",
+ strerror(errno));
+ }
+ }
+
+ /* Start the test with 1MB child memory allocation */
+ size = 1;
+retry:
+ /*
+ * Pipe for the child to signal when it's done allocating
+ * memory
+ */
+ if (pipe(pipefd))
+ ksft_exit_fail_msg("pipe: %s\n", strerror(errno));
+
+ pid = fork();
+ if (pid < 0) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ ksft_exit_fail_msg("fork: %s\n", strerror(errno));
+ }
+
+ if (pid == 0) {
+ /* Child main routine */
+ res = child_main(pipefd, size);
+ exit(res);
+ }
+
+ /*
+ * Parent main routine:
+ * Wait for the child to finish allocations, then kill and reap
+ */
+ close(pipefd[1]);
+ /* Block until the child is ready */
+ res = read(pipefd[0], &byte, 1);
+ close(pipefd[0]);
+ if (res < 0) {
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("read: %s\n", strerror(errno));
+ }
+
+ pidfd = syscall(__NR_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("pidfd_open: %s\n", strerror(errno));
+ }
+
+ /* Run negative tests which require a live child */
+ run_negative_tests(pidfd);
+
+ if (kill(pid, SIGKILL))
+ ksft_exit_fail_msg("kill: %s\n", strerror(errno));
+
+ success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
+ if (!success) {
+ /*
+ * If we failed to reap because the child exited too soon,
+ * before we could call process_mrelease. Double child's memory
+ * which causes it to spend more time on cleanup and increases
+ * our chances of reaping its memory before it exits.
+ * Retry until we succeed or reach MAX_SIZE_MB.
+ */
+ if (errno == ESRCH) {
+ retry = (size <= MAX_SIZE_MB);
+ } else {
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("process_mrelease: %s\n", strerror(errno));
+ }
+ }
+
+ /* Cleanup to prevent zombies */
+ if (waitpid(pid, NULL, 0) < 0)
+ ksft_exit_fail_msg("waitpid: %s\n", strerror(errno));
+
+ close(pidfd);
+
+ if (!success) {
+ if (retry) {
+ size *= 2;
+ goto retry;
+ }
+ ksft_exit_fail_msg("All process_mrelease attempts failed!\n");
+ }
+
+ ksft_test_result_pass("Success reaping a child with %zuMB of memory allocations\n",
+ size);
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index 3a7b5ef0b0c6..a4f75d836733 100644
--- a/tools/testing/selftests/vm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -7,17 +7,14 @@
*/
#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
-
-#ifndef MREMAP_DONTUNMAP
-#define MREMAP_DONTUNMAP 4
-#endif
+#include "kselftest.h"
unsigned long page_size;
char *page_buffer;
@@ -30,14 +27,14 @@ static void dump_maps(void)
system(cmd);
}
-#define BUG_ON(condition, description) \
- do { \
- if (condition) { \
- fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \
- __LINE__, (description), strerror(errno)); \
- dump_maps(); \
- exit(1); \
- } \
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ dump_maps(); \
+ ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n", \
+ __func__, __LINE__, (description), \
+ strerror(errno)); \
+ } \
} while (0)
// Try a simple operation for to "test" for kernel support this prevents
@@ -125,6 +122,59 @@ static void mremap_dontunmap_simple()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+// This test validates that MREMAP_DONTUNMAP on a shared mapping works as expected.
+static void mremap_dontunmap_simple_shmem()
+{
+ unsigned long num_pages = 5;
+
+ int mem_fd = memfd_create("memfd", MFD_CLOEXEC);
+ BUG_ON(mem_fd < 0, "memfd_create");
+
+ BUG_ON(ftruncate(mem_fd, num_pages * page_size) < 0,
+ "ftruncate");
+
+ void *source_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_FILE | MAP_SHARED, mem_fd, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+ BUG_ON(close(mem_fd) < 0, "close");
+
+ memset(source_mapping, 'a', num_pages * page_size);
+
+ // Try to just move the whole mapping anywhere (not fixed).
+ void *dest_mapping =
+ mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+ if (dest_mapping == MAP_FAILED && errno == EINVAL) {
+ // Old kernel which doesn't support MREMAP_DONTUNMAP on shmem.
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+ return;
+ }
+
+ BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+ // Validate that the pages have been moved, we know they were moved if
+ // the dest_mapping contains a's.
+ BUG_ON(check_region_contains_byte
+ (dest_mapping, num_pages * page_size, 'a') != 0,
+ "pages did not migrate");
+
+ // Because the region is backed by shmem, we will actually see the same
+ // memory at the source location still.
+ BUG_ON(check_region_contains_byte
+ (source_mapping, num_pages * page_size, 'a') != 0,
+ "source should have no ptes");
+
+ BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates MREMAP_DONTUNMAP will move page tables to a specific
@@ -171,6 +221,7 @@ static void mremap_dontunmap_simple_fixed()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates that we can MREMAP_DONTUNMAP for a portion of an
@@ -221,6 +272,7 @@ static void mremap_dontunmap_partial_mapping()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates that we can remap over only a portion of a mapping.
@@ -280,19 +332,24 @@ static void mremap_dontunmap_partial_mapping_overwrite(void)
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, 5 * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
int main(void)
{
+ ksft_print_header();
+
page_size = sysconf(_SC_PAGE_SIZE);
// test for kernel support for MREMAP_DONTUNMAP skipping the test if
// not.
if (kernel_support_for_mremap_dontunmap() != 0) {
- printf("No kernel support for MREMAP_DONTUNMAP\n");
- return KSFT_SKIP;
+ ksft_print_msg("No kernel support for MREMAP_DONTUNMAP\n");
+ ksft_finished();
}
+ ksft_set_plan(5);
+
// Keep a page sized buffer around for when we need it.
page_buffer =
mmap(NULL, page_size, PROT_READ | PROT_WRITE,
@@ -300,6 +357,7 @@ int main(void)
BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page.");
mremap_dontunmap_simple();
+ mremap_dontunmap_simple_shmem();
mremap_dontunmap_simple_fixed();
mremap_dontunmap_partial_mapping();
mremap_dontunmap_partial_mapping_overwrite();
@@ -307,6 +365,5 @@ int main(void)
BUG_ON(munmap(page_buffer, page_size) == -1,
"unable to unmap page buffer");
- printf("OK\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
new file mode 100644
index 000000000000..308576437228
--- /dev/null
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -0,0 +1,1485 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Google LLC
+ */
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/userfaultfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <time.h>
+#include <stdbool.h>
+
+#include "kselftest.h"
+
+#define EXPECT_SUCCESS 0
+#define EXPECT_FAILURE 1
+#define NON_OVERLAPPING 0
+#define OVERLAPPING 1
+#define NS_PER_SEC 1000000000ULL
+#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
+#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
+#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
+#define SIZE_KB(k) ((size_t)k * 1024)
+
+struct config {
+ unsigned long long src_alignment;
+ unsigned long long dest_alignment;
+ unsigned long long region_size;
+ int overlapping;
+ unsigned int dest_preamble_size;
+};
+
+struct test {
+ const char *name;
+ struct config config;
+ int expect_failure;
+};
+
+enum {
+ _1KB = 1ULL << 10, /* 1KB -> not page aligned */
+ _4KB = 4ULL << 10,
+ _8KB = 8ULL << 10,
+ _1MB = 1ULL << 20,
+ _2MB = 2ULL << 20,
+ _4MB = 4ULL << 20,
+ _5MB = 5ULL << 20,
+ _1GB = 1ULL << 30,
+ _2GB = 2ULL << 30,
+ PMD = _2MB,
+ PUD = _1GB,
+};
+
+#define PTE page_size
+
+#define MAKE_TEST(source_align, destination_align, size, \
+ overlaps, should_fail, test_name) \
+(struct test){ \
+ .name = test_name, \
+ .config = { \
+ .src_alignment = source_align, \
+ .dest_alignment = destination_align, \
+ .region_size = size, \
+ .overlapping = overlaps, \
+ }, \
+ .expect_failure = should_fail \
+}
+
+/* compute square root using binary search */
+static unsigned long get_sqrt(unsigned long val)
+{
+ unsigned long low = 1;
+
+ /* assuming rand_size is less than 1TB */
+ unsigned long high = (1UL << 20);
+
+ while (low <= high) {
+ unsigned long mid = low + (high - low) / 2;
+ unsigned long temp = mid * mid;
+
+ if (temp == val)
+ return mid;
+ if (temp < val)
+ low = mid + 1;
+ high = mid - 1;
+ }
+ return low;
+}
+
+/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
+ * Using /proc/self/maps, assert that the specified address range is contained
+ * within a single mapping.
+ */
+static bool is_range_mapped(FILE *maps_fp, unsigned long start,
+ unsigned long end)
+{
+ char *line = NULL;
+ size_t len = 0;
+ bool success = false;
+ unsigned long first_val, second_val;
+
+ rewind(maps_fp);
+
+ while (getline(&line, &len, maps_fp) != -1) {
+ if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) {
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+ break;
+ }
+
+ if (first_val <= start && second_val >= end) {
+ success = true;
+ fflush(maps_fp);
+ break;
+ }
+ }
+
+ return success;
+}
+
+/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */
+static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size)
+{
+ unsigned long start = (unsigned long)ptr;
+ unsigned long end = start + size;
+
+ return is_range_mapped(maps_fp, start, end);
+}
+
+/*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+ unsigned long long addr = 0ULL;
+ void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+ /*
+ * For some tests, we need to not have any mappings below the
+ * source mapping. Add some headroom to mmap_min_addr for this.
+ */
+ mmap_min_addr += 10 * _4MB;
+
+retry:
+ addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
+ src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (src_addr == MAP_FAILED) {
+ if (errno == EPERM || errno == EEXIST)
+ goto retry;
+ goto error;
+ }
+ /*
+ * Check that the address is aligned to the specified alignment.
+ * Addresses which have alignments that are multiples of that
+ * specified are not considered valid. For instance, 1GB address is
+ * 2MB-aligned, however it will not be considered valid for a
+ * requested alignment of 2MB. This is done to reduce coincidental
+ * alignment in the tests.
+ */
+ if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
+ goto retry;
+ }
+
+ if (!src_addr)
+ goto error;
+
+ return src_addr;
+error:
+ ksft_print_msg("Failed to map source region: %s\n",
+ strerror(errno));
+ return NULL;
+}
+
+/*
+ * This test validates that merge is called when expanding a mapping.
+ * Mapping containing three pages is created, middle page is unmapped
+ * and then the mapping containing the first page is expanded so that
+ * it fills the created hole. The two parts should merge creating
+ * single mapping with three pages.
+ */
+static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
+{
+ char *test_name = "mremap expand merge";
+ bool success = false;
+ char *remap, *start;
+
+ start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (start == MAP_FAILED) {
+ ksft_print_msg("mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ munmap(start + page_size, page_size);
+ remap = mremap(start, page_size, 2 * page_size, 0);
+ if (remap == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ munmap(start, page_size);
+ munmap(start + 2 * page_size, page_size);
+ goto out;
+ }
+
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
+ munmap(start, 3 * page_size);
+
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+/*
+ * Similar to mremap_expand_merge() except instead of removing the middle page,
+ * we remove the last then attempt to remap offset from the second page. This
+ * should result in the mapping being restored to its former state.
+ */
+static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
+{
+
+ char *test_name = "mremap expand merge offset";
+ bool success = false;
+ char *remap, *start;
+
+ start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (start == MAP_FAILED) {
+ ksft_print_msg("mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ /* Unmap final page to ensure we have space to expand. */
+ munmap(start + 2 * page_size, page_size);
+ remap = mremap(start + page_size, page_size, 2 * page_size, 0);
+ if (remap == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ munmap(start, 2 * page_size);
+ goto out;
+ }
+
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
+ munmap(start, 3 * page_size);
+
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+/*
+ * Verify that an mremap within a range does not cause corruption
+ * of unrelated part of range.
+ *
+ * Consider the following range which is 2MB aligned and is
+ * a part of a larger 20MB range which is not shown. Each
+ * character is 256KB below making the source and destination
+ * 2MB each. The lower case letters are moved (s to d) and the
+ * upper case letters are not moved. The below test verifies
+ * that the upper case S letters are not corrupted by the
+ * adjacent mremap.
+ *
+ * |DDDDddddSSSSssss|
+ */
+static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
+{
+ char *test_name = "mremap mremap move within range";
+ void *src, *dest;
+ unsigned int i, success = 1;
+
+ size_t size = SIZE_MB(20);
+ void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = 0;
+ goto out;
+ }
+ memset(ptr, 0, size);
+
+ src = ptr + SIZE_MB(6);
+ src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
+
+ /* Set byte pattern for source block. */
+ memcpy(src, rand_addr, SIZE_MB(2));
+
+ dest = src - SIZE_MB(2);
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
+ }
+
+out:
+ if (munmap(ptr, size) == -1)
+ perror("munmap");
+
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+static bool is_multiple_vma_range_ok(unsigned int pattern_seed,
+ char *ptr, unsigned long page_size)
+{
+ int i;
+
+ srand(pattern_seed);
+ for (i = 0; i <= 10; i += 2) {
+ int j;
+ char *buf = &ptr[i * page_size];
+ size_t size = i == 4 ? 2 * page_size : page_size;
+
+ for (j = 0; j < size; j++) {
+ char chr = rand();
+
+ if (chr != buf[j]) {
+ ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n",
+ i, j, chr, buf[j]);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static void mremap_move_multiple_vmas(unsigned int pattern_seed,
+ unsigned long page_size,
+ bool dont_unmap)
+{
+ int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE;
+ char *test_name = "mremap move multiple vmas";
+ const size_t size = 11 * page_size;
+ bool success = true;
+ char *ptr, *tgt_ptr;
+ int i;
+
+ if (dont_unmap)
+ mremap_flags |= MREMAP_DONTUNMAP;
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+
+ tgt_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (tgt_ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+ if (munmap(tgt_ptr, 2 * size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap so we end up with:
+ *
+ * 0 2 4 5 6 8 10 offset in buffer
+ * |*| |*| |*****| |*| |*|
+ * |*| |*| |*****| |*| |*|
+ * 0 1 2 3 4 5 6 pattern offset
+ */
+ for (i = 1; i < 10; i += 2) {
+ if (i == 5)
+ continue;
+
+ if (munmap(&ptr[i * page_size], page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ srand(pattern_seed);
+
+ /* Set up random patterns. */
+ for (i = 0; i <= 10; i += 2) {
+ int j;
+ size_t size = i == 4 ? 2 * page_size : page_size;
+ char *buf = &ptr[i * page_size];
+
+ for (j = 0; j < size; j++)
+ buf[j] = rand();
+ }
+
+ /* First, just move the whole thing. */
+ if (mremap(ptr, size, size, mremap_flags, tgt_ptr) == MAP_FAILED) {
+ perror("mremap");
+ success = false;
+ goto out_unmap;
+ }
+ /* Check move was ok. */
+ if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) {
+ success = false;
+ goto out_unmap;
+ }
+
+ /* Move next to itself. */
+ if (mremap(tgt_ptr, size, size, mremap_flags,
+ &tgt_ptr[size]) == MAP_FAILED) {
+ perror("mremap");
+ success = false;
+ goto out_unmap;
+ }
+ /* Check that the move is ok. */
+ if (!is_multiple_vma_range_ok(pattern_seed, &tgt_ptr[size], page_size)) {
+ success = false;
+ goto out_unmap;
+ }
+
+ /* Map a range to overwrite. */
+ if (mmap(tgt_ptr, size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) {
+ perror("mmap tgt");
+ success = false;
+ goto out_unmap;
+ }
+ /* Move and overwrite. */
+ if (mremap(&tgt_ptr[size], size, size,
+ mremap_flags, tgt_ptr) == MAP_FAILED) {
+ perror("mremap");
+ success = false;
+ goto out_unmap;
+ }
+ /* Check that the move is ok. */
+ if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) {
+ success = false;
+ goto out_unmap;
+ }
+
+out_unmap:
+ if (munmap(tgt_ptr, 2 * size))
+ perror("munmap tgt");
+ if (munmap(ptr, size))
+ perror("munmap src");
+
+out:
+ if (success)
+ ksft_test_result_pass("%s%s\n", test_name,
+ dont_unmap ? " [dontunnmap]" : "");
+ else
+ ksft_test_result_fail("%s%s\n", test_name,
+ dont_unmap ? " [dontunnmap]" : "");
+}
+
+static void mremap_shrink_multiple_vmas(unsigned long page_size,
+ bool inplace)
+{
+ char *test_name = "mremap shrink multiple vmas";
+ const size_t size = 10 * page_size;
+ bool success = true;
+ char *ptr, *tgt_ptr;
+ void *res;
+ int i;
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+
+ tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (tgt_ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+ if (munmap(tgt_ptr, size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap so we end up with:
+ *
+ * 0 2 4 6 8 10 offset in buffer
+ * |*| |*| |*| |*| |*| |*|
+ * |*| |*| |*| |*| |*| |*|
+ */
+ for (i = 1; i < 10; i += 2) {
+ if (munmap(&ptr[i * page_size], page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ /*
+ * Shrink in-place across multiple VMAs and gaps so we end up with:
+ *
+ * 0
+ * |*|
+ * |*|
+ */
+ if (inplace)
+ res = mremap(ptr, size, page_size, 0);
+ else
+ res = mremap(ptr, size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ tgt_ptr);
+
+ if (res == MAP_FAILED) {
+ perror("mremap");
+ success = false;
+ goto out_unmap;
+ }
+
+out_unmap:
+ if (munmap(tgt_ptr, size))
+ perror("munmap tgt");
+ if (munmap(ptr, size))
+ perror("munmap src");
+out:
+ if (success)
+ ksft_test_result_pass("%s%s\n", test_name,
+ inplace ? " [inplace]" : "");
+ else
+ ksft_test_result_fail("%s%s\n", test_name,
+ inplace ? " [inplace]" : "");
+}
+
+static void mremap_move_multiple_vmas_split(unsigned int pattern_seed,
+ unsigned long page_size,
+ bool dont_unmap)
+{
+ char *test_name = "mremap move multiple vmas split";
+ int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE;
+ const size_t size = 10 * page_size;
+ bool success = true;
+ char *ptr, *tgt_ptr;
+ int i;
+
+ if (dont_unmap)
+ mremap_flags |= MREMAP_DONTUNMAP;
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+
+ tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (tgt_ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out;
+ }
+ if (munmap(tgt_ptr, size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap so we end up with:
+ *
+ * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer
+ * |**********| |*******|
+ * |**********| |*******|
+ * 0 1 2 3 4 5 6 7 8 9 pattern offset
+ */
+ if (munmap(&ptr[5 * page_size], page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /* Set up random patterns. */
+ srand(pattern_seed);
+ for (i = 0; i < 10; i++) {
+ int j;
+ char *buf = &ptr[i * page_size];
+
+ if (i == 5)
+ continue;
+
+ for (j = 0; j < page_size; j++)
+ buf[j] = rand();
+ }
+
+ /*
+ * Move the below:
+ *
+ * <------------->
+ * 0 1 2 3 4 5 6 7 8 9 10 offset in buffer
+ * |**********| |*******|
+ * |**********| |*******|
+ * 0 1 2 3 4 5 6 7 8 9 pattern offset
+ *
+ * Into:
+ *
+ * 0 1 2 3 4 5 6 7 offset in buffer
+ * |*****| |*****|
+ * |*****| |*****|
+ * 2 3 4 5 6 7 pattern offset
+ */
+ if (mremap(&ptr[2 * page_size], size - 3 * page_size, size - 3 * page_size,
+ mremap_flags, tgt_ptr) == MAP_FAILED) {
+ perror("mremap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /* Offset into random pattern. */
+ srand(pattern_seed);
+ for (i = 0; i < 2 * page_size; i++)
+ rand();
+
+ /* Check pattern. */
+ for (i = 0; i < 7; i++) {
+ int j;
+ char *buf = &tgt_ptr[i * page_size];
+
+ if (i == 3)
+ continue;
+
+ for (j = 0; j < page_size; j++) {
+ char chr = rand();
+
+ if (chr != buf[j]) {
+ ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n",
+ i, j, chr, buf[j]);
+ goto out_unmap;
+ }
+ }
+ }
+
+out_unmap:
+ if (munmap(tgt_ptr, size))
+ perror("munmap tgt");
+ if (munmap(ptr, size))
+ perror("munmap src");
+out:
+ if (success)
+ ksft_test_result_pass("%s%s\n", test_name,
+ dont_unmap ? " [dontunnmap]" : "");
+ else
+ ksft_test_result_fail("%s%s\n", test_name,
+ dont_unmap ? " [dontunnmap]" : "");
+}
+
+#ifdef __NR_userfaultfd
+static void mremap_move_multi_invalid_vmas(FILE *maps_fp,
+ unsigned long page_size)
+{
+ char *test_name = "mremap move multiple invalid vmas";
+ const size_t size = 10 * page_size;
+ bool success = true;
+ char *ptr, *tgt_ptr;
+ int uffd, err, i;
+ void *res;
+ struct uffdio_api api = {
+ .api = UFFD_API,
+ .features = UFFD_EVENT_PAGEFAULT,
+ };
+
+ uffd = syscall(__NR_userfaultfd, O_NONBLOCK);
+ if (uffd == -1) {
+ err = errno;
+ perror("userfaultfd");
+ if (err == EPERM) {
+ ksft_test_result_skip("%s - missing uffd", test_name);
+ return;
+ }
+ success = false;
+ goto out;
+ }
+ if (ioctl(uffd, UFFDIO_API, &api)) {
+ perror("ioctl UFFDIO_API");
+ success = false;
+ goto out_close_uffd;
+ }
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_close_uffd;
+ }
+
+ tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (tgt_ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_close_uffd;
+ }
+ if (munmap(tgt_ptr, size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap so we end up with:
+ *
+ * 0 2 4 6 8 10 offset in buffer
+ * |*| |*| |*| |*| |*|
+ * |*| |*| |*| |*| |*|
+ *
+ * Additionally, register each with UFFD.
+ */
+ for (i = 0; i < 10; i += 2) {
+ void *unmap_ptr = &ptr[(i + 1) * page_size];
+ unsigned long start = (unsigned long)&ptr[i * page_size];
+ struct uffdio_register reg = {
+ .range = {
+ .start = start,
+ .len = page_size,
+ },
+ .mode = UFFDIO_REGISTER_MODE_MISSING,
+ };
+
+ if (ioctl(uffd, UFFDIO_REGISTER, &reg) == -1) {
+ perror("ioctl UFFDIO_REGISTER");
+ success = false;
+ goto out_unmap;
+ }
+ if (munmap(unmap_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ /*
+ * Now try to move the entire range which is invalid for multi VMA move.
+ *
+ * This will fail, and no VMA should be moved, as we check this ahead of
+ * time.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mremap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+ if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) {
+ fprintf(stderr,
+ "Invalid uffd-armed VMA at start of multi range moved\n");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Now try to move a single VMA, this should succeed as not multi VMA
+ * move.
+ */
+ res = mremap(ptr, page_size, page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ if (res == MAP_FAILED) {
+ perror("mremap single invalid-multi VMA");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA
+ * move valid) VMA at the start of ptr range.
+ */
+ if (munmap(tgt_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ res = mmap(ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ if (res == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Now try to move the entire range, we should succeed in moving the
+ * first VMA, but no others, and report a failure.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mremap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+ if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) {
+ fprintf(stderr, "Valid VMA not moved\n");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap the VMA, and map valid VMA at start of ptr range, and replace
+ * all existing multi-move invalid VMAs, except the last, with valid
+ * multi-move VMAs.
+ */
+ if (munmap(tgt_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ if (munmap(ptr, size - 2 * page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ for (i = 0; i < 8; i += 2) {
+ res = mmap(&ptr[i * page_size], page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ if (res == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ /*
+ * Now try to move the entire range, we should succeed in moving all but
+ * the last VMA, and report a failure.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mremap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+
+ for (i = 0; i < 10; i += 2) {
+ bool is_mapped = is_ptr_mapped(maps_fp,
+ &tgt_ptr[i * page_size], page_size);
+
+ if (i < 8 && !is_mapped) {
+ fprintf(stderr, "Valid VMA not moved at %d\n", i);
+ success = false;
+ goto out_unmap;
+ } else if (i == 8 && is_mapped) {
+ fprintf(stderr, "Invalid VMA moved at %d\n", i);
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+out_unmap:
+ if (munmap(tgt_ptr, size))
+ perror("munmap tgt");
+ if (munmap(ptr, size))
+ perror("munmap src");
+out_close_uffd:
+ close(uffd);
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+#else
+static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size)
+{
+ char *test_name = "mremap move multiple invalid vmas";
+
+ ksft_test_result_skip("%s - missing uffd", test_name);
+}
+#endif /* __NR_userfaultfd */
+
+/* Returns the time taken for the remap on success else returns -1. */
+static long long remap_region(struct config c, unsigned int threshold_mb,
+ char *rand_addr)
+{
+ void *addr, *tmp_addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
+ unsigned long long t, d;
+ struct timespec t_start = {0, 0}, t_end = {0, 0};
+ long long start_ns, end_ns, align_mask, ret, offset;
+ unsigned long long threshold;
+ unsigned long num_chunks;
+
+ if (threshold_mb == VALIDATION_NO_THRESHOLD)
+ threshold = c.region_size;
+ else
+ threshold = MIN(threshold_mb * _1MB, c.region_size);
+
+ src_addr = get_source_mapping(c);
+ if (!src_addr) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Set byte pattern for source block. */
+ memcpy(src_addr, rand_addr, threshold);
+
+ /* Mask to zero out lower bits of address for alignment */
+ align_mask = ~(c.dest_alignment - 1);
+ /* Offset of destination address from the end of the source region */
+ offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
+ addr = (void *) (((unsigned long long) src_addr + c.region_size
+ + offset) & align_mask);
+
+ /* Remap after the destination block preamble. */
+ addr += c.dest_preamble_size;
+
+ /* See comment in get_source_mapping() */
+ if (!((unsigned long long) addr & c.dest_alignment))
+ addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ tmp_addr = addr + c.dest_alignment;
+ if (tmp_addr < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto clean_up_src;
+ }
+ addr += c.dest_alignment;
+ }
+
+ if (c.dest_preamble_size) {
+ dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (dest_preamble_addr == MAP_FAILED) {
+ ksft_print_msg("Failed to map dest preamble region: %s\n",
+ strerror(errno));
+ ret = -1;
+ goto clean_up_src;
+ }
+
+ /* Set byte pattern for the dest preamble block. */
+ memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &t_start);
+ dest_addr = mremap(src_addr, c.region_size, c.region_size,
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ clock_gettime(CLOCK_MONOTONIC, &t_end);
+
+ if (dest_addr == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ ret = -1;
+ goto clean_up_dest_preamble;
+ }
+
+ /*
+ * Verify byte pattern after remapping. Employ an algorithm with a
+ * square root time complexity in threshold: divide the range into
+ * chunks, if memcmp() returns non-zero, only then perform an
+ * iteration in that chunk to find the mismatch index.
+ */
+ num_chunks = get_sqrt(threshold);
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = threshold / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatch segment */
+ for (t = shift; t < shift + chunk_size; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
+
+ /*
+ * if threshold is not divisible by num_chunks, then check the
+ * last chunk
+ */
+ for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+ /* Verify the dest preamble byte pattern after remapping */
+ if (!c.dest_preamble_size)
+ goto no_preamble;
+
+ num_chunks = get_sqrt(c.dest_preamble_size);
+
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = c.dest_preamble_size / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_preamble_addr + shift, rand_addr + shift,
+ chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatched segment */
+ for (d = shift; d < shift + chunk_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
+
+ for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+no_preamble:
+ start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
+ end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
+ ret = end_ns - start_ns;
+
+/*
+ * Since the destination address is specified using MREMAP_FIXED, subsequent
+ * mremap will unmap any previous mapping at the address range specified by
+ * dest_addr and region_size. This significantly affects the remap time of
+ * subsequent tests. So we clean up mappings after each test.
+ */
+clean_up_dest:
+ munmap(dest_addr, c.region_size);
+clean_up_dest_preamble:
+ if (c.dest_preamble_size && dest_preamble_addr)
+ munmap(dest_preamble_addr, c.dest_preamble_size);
+clean_up_src:
+ munmap(src_addr, c.region_size);
+out:
+ return ret;
+}
+
+/*
+ * Verify that an mremap aligning down does not destroy
+ * the beginning of the mapping just because the aligned
+ * down address landed on a mapping that maybe does not exist.
+ */
+static void mremap_move_1mb_from_start(unsigned int pattern_seed,
+ char *rand_addr)
+{
+ char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
+ void *src = NULL, *dest = NULL;
+ unsigned int i, success = 1;
+
+ /* Config to reuse get_source_mapping() to do an aligned mmap. */
+ struct config c = {
+ .src_alignment = SIZE_MB(1) + SIZE_KB(256),
+ .region_size = SIZE_MB(6)
+ };
+
+ src = get_source_mapping(c);
+ if (!src) {
+ success = 0;
+ goto out;
+ }
+
+ c.src_alignment = SIZE_MB(1) + SIZE_KB(256);
+ dest = get_source_mapping(c);
+ if (!dest) {
+ success = 0;
+ goto out;
+ }
+
+ /* Set byte pattern for source block. */
+ memcpy(src, rand_addr, SIZE_MB(2));
+
+ /*
+ * Unmap the beginning of dest so that the aligned address
+ * falls on no mapping.
+ */
+ munmap(dest, SIZE_MB(1));
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
+ }
+
+out:
+ if (src && munmap(src, c.region_size) == -1)
+ perror("munmap src");
+
+ if (dest && munmap(dest, c.region_size) == -1)
+ perror("munmap dest");
+
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+static void run_mremap_test_case(struct test test_case, int *failures,
+ unsigned int threshold_mb,
+ char *rand_addr)
+{
+ long long remap_time = remap_region(test_case.config, threshold_mb,
+ rand_addr);
+
+ if (remap_time < 0) {
+ if (test_case.expect_failure)
+ ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
+ test_case.name);
+ else {
+ ksft_test_result_fail("%s\n", test_case.name);
+ *failures += 1;
+ }
+ } else {
+ /*
+ * Comparing mremap time is only applicable if entire region
+ * was faulted in.
+ */
+ if (threshold_mb == VALIDATION_NO_THRESHOLD ||
+ test_case.config.region_size <= threshold_mb * _1MB)
+ ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
+ test_case.name, remap_time);
+ else
+ ksft_test_result_pass("%s\n", test_case.name);
+ }
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
+ "-t\t only validate threshold_mb of the remapped region\n"
+ " \t if 0 is supplied no threshold is used; all tests\n"
+ " \t are run and remapped regions validated fully.\n"
+ " \t The default threshold used is 4MB.\n"
+ "-p\t provide a seed to generate the random pattern for\n"
+ " \t validating the remapped region.\n", cmd);
+}
+
+static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
+ unsigned int *pattern_seed)
+{
+ const char *optstr = "t:p:";
+ int opt;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 't':
+ *threshold_mb = atoi(optarg);
+ break;
+ case 'p':
+ *pattern_seed = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ if (optind < argc) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+#define MAX_TEST 15
+#define MAX_PERF_TEST 3
+int main(int argc, char **argv)
+{
+ int failures = 0;
+ unsigned int i;
+ int run_perf_tests;
+ unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+
+ /* hard-coded test configs */
+ size_t max_test_variable_region_size = _2GB;
+ size_t max_test_constant_region_size = _2MB;
+ size_t dest_preamble_size = 10 * _4MB;
+
+ unsigned int pattern_seed;
+ char *rand_addr;
+ size_t rand_size;
+ int num_expand_tests = 2;
+ int num_misc_tests = 9;
+ struct test test_cases[MAX_TEST] = {};
+ struct test perf_test_cases[MAX_PERF_TEST];
+ int page_size;
+ time_t t;
+ FILE *maps_fp;
+
+ pattern_seed = (unsigned int) time(&t);
+
+ if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
+ exit(EXIT_FAILURE);
+
+ ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
+ threshold_mb, pattern_seed);
+
+ /*
+ * set preallocated random array according to test configs; see the
+ * functions for the logic of setting the size
+ */
+ if (!threshold_mb)
+ rand_size = MAX(max_test_variable_region_size,
+ max_test_constant_region_size);
+ else
+ rand_size = MAX(MIN(threshold_mb * _1MB,
+ max_test_variable_region_size),
+ max_test_constant_region_size);
+ rand_size = MAX(dest_preamble_size, rand_size);
+
+ rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (rand_addr == MAP_FAILED) {
+ perror("mmap");
+ ksft_exit_fail_msg("cannot mmap rand_addr\n");
+ }
+
+ /* fill stream of random bytes */
+ srand(pattern_seed);
+ for (unsigned long i = 0; i < rand_size; ++i)
+ rand_addr[i] = (char) rand();
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Expected mremap failures */
+ test_cases[0] = MAKE_TEST(page_size, page_size, page_size,
+ OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source and Destination Regions Overlapping");
+
+ test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Destination Address Misaligned (1KB-aligned)");
+ test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source Address Misaligned (1KB-aligned)");
+
+ /* Src addr PTE aligned */
+ test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
+ NON_OVERLAPPING, EXPECT_SUCCESS,
+ "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
+
+ /* Src addr 1MB aligned */
+ test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
+ test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src addr PMD aligned */
+ test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
+ test_cases[7] = MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
+ test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
+
+ /* Src addr PUD aligned */
+ test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
+ test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
+ test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
+ test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble");
+ test_cases[14].config.dest_preamble_size = 10 * _4MB;
+
+ perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PTE-aligned, Destination PTE-aligned");
+ /*
+ * mremap 1GB region - Page table level aligned time
+ * comparison.
+ */
+ perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
+ perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) ||
+ (threshold_mb * _1MB >= _1GB);
+
+ ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ run_mremap_test_case(test_cases[i], &failures, threshold_mb,
+ rand_addr);
+
+ maps_fp = fopen("/proc/self/maps", "r");
+
+ if (maps_fp == NULL) {
+ munmap(rand_addr, rand_size);
+ ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
+ }
+
+ mremap_expand_merge(maps_fp, page_size);
+ mremap_expand_merge_offset(maps_fp, page_size);
+
+ mremap_move_within_range(pattern_seed, rand_addr);
+ mremap_move_1mb_from_start(pattern_seed, rand_addr);
+ mremap_shrink_multiple_vmas(page_size, /* inplace= */true);
+ mremap_shrink_multiple_vmas(page_size, /* inplace= */false);
+ mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ false);
+ mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true);
+ mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false);
+ mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true);
+ mremap_move_multi_invalid_vmas(maps_fp, page_size);
+
+ fclose(maps_fp);
+
+ if (run_perf_tests) {
+ ksft_print_msg("\n%s\n",
+ "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
+ for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
+ run_mremap_test_case(perf_test_cases[i], &failures,
+ threshold_mb,
+ rand_addr);
+ }
+
+ munmap(rand_addr, rand_size);
+
+ if (failures > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/mseal_helpers.h b/tools/testing/selftests/mm/mseal_helpers.h
new file mode 100644
index 000000000000..0cfce31c76d2
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_helpers.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define FAIL_TEST_IF_FALSE(test_passed) \
+ do { \
+ if (!(test_passed)) { \
+ ksft_test_result_fail("%s: line:%d\n", \
+ __func__, __LINE__); \
+ return; \
+ } \
+ } while (0)
+
+#define SKIP_TEST_IF_FALSE(test_passed) \
+ do { \
+ if (!(test_passed)) { \
+ ksft_test_result_skip("%s: line:%d\n", \
+ __func__, __LINE__); \
+ return; \
+ } \
+ } while (0)
+
+#define REPORT_TEST_PASS() ksft_test_result_pass("%s\n", __func__)
+
+#ifndef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#ifndef PKEY_BITS_PER_PKEY
+#define PKEY_BITS_PER_PKEY 2
+#endif
+
+#ifndef PKEY_MASK
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
new file mode 100644
index 000000000000..faad4833366a
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -0,0 +1,1989 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <asm-generic/unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include "mseal_helpers.h"
+
+static unsigned long get_vma_size(void *addr, int *prot)
+{
+ FILE *maps;
+ char line[256];
+ int size = 0;
+ uintptr_t addr_start, addr_end;
+ char protstr[5];
+ *prot = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ return 0;
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) {
+ if (addr_start == (uintptr_t) addr) {
+ size = addr_end - addr_start;
+ if (protstr[0] == 'r')
+ *prot |= 0x4;
+ if (protstr[1] == 'w')
+ *prot |= 0x2;
+ if (protstr[2] == 'x')
+ *prot |= 0x1;
+ break;
+ }
+ }
+ }
+ fclose(maps);
+ return size;
+}
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ return sret;
+}
+
+static int sys_munmap(void *ptr, size_t size)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_munmap, ptr, size);
+ return sret;
+}
+
+static int sys_madvise(void *start, size_t len, int types)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_madvise, start, len, types);
+ return sret;
+}
+
+static void *sys_mremap(void *addr, size_t old_len, size_t new_len,
+ unsigned long flags, void *new_addr)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mremap, addr, old_len, new_len, flags, new_addr);
+ return sret;
+}
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(__NR_pkey_alloc, flags, init_val);
+
+ return ret;
+}
+
+static unsigned int __read_pkey_reg(void)
+{
+ unsigned int pkey_reg = 0;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+#endif
+ return pkey_reg;
+}
+
+static void __write_pkey_reg(u64 pkey_reg)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+#endif
+}
+
+static unsigned long pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ unsigned long shift = pkey_bit_position(pkey);
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static void set_pkey(int pkey, unsigned long pkey_value)
+{
+ u64 new_pkey_reg;
+
+ new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value);
+ __write_pkey_reg(new_pkey_reg);
+}
+
+static void setup_single_address(int size, void **ptrOut)
+{
+ void *ptr;
+
+ ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ *ptrOut = ptr;
+}
+
+static void setup_single_address_rw(int size, void **ptrOut)
+{
+ void *ptr;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ *ptrOut = ptr;
+}
+
+static int clean_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = munmap(ptr, size);
+ return ret;
+}
+
+static int seal_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = sys_mseal(ptr, size);
+ return ret;
+}
+
+bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+bool pkey_supported(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+
+ if (pkey > 0)
+ return true;
+#endif
+ return false;
+}
+
+static void test_seal_addseal(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr. */
+ ret = sys_munmap(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_middle(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr + page. */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, since middle 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* we still can add seal to the first page and last page*/
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail since last 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* The first 2 pages is not sealed, and can add seals */
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_multiple_vmas(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mprotect(ptr, size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_split_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page, this will split the VMA */
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* add seal to the remain 3 pages */
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_split_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last page */
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* Adding seals to the first 3 pages */
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_invalid_input(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(8 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ ret = clean_single_address(ptr + 4 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* invalid flag */
+ ret = syscall(__NR_mseal, ptr, size, 0x20);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* unaligned address */
+ ret = sys_mseal(ptr + 1, 2 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length too big */
+ ret = sys_mseal(ptr, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length overflow */
+ ret = sys_mseal(ptr, UINT64_MAX/page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* start is not in a valid VMA */
+ ret = sys_mseal(ptr - page_size, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_zero_length(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal 0 length will be OK, same as mprotect */
+ ret = sys_mseal(ptr, 0);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are not sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_zero_address(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ /* use mmap to change protection. */
+ ptr = mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr == 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 4 * page_size);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_twice(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* apply the same seal will be OK. idempotent. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_start_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* pages after the first page is not sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_end_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* first page is not sealed */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 3 page are sealed */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_unalign_len(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 - 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_unalign_len_variant_2(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 + 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 3 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 3, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 4);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size * 2,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_two_vma_with_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split as two vma. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal can apply across 2 vma, also split them. */
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the second page is sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the third page is sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the fouth page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_partial_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* seal one page. */
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect first 2 page will fail, since the first page are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_partial_mprotect_tail(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 2 * page_size;
+ int ret;
+ int prot;
+
+ /*
+ * Check if a partial mseal (that results in two vmas) works correctly.
+ * It might mprotect the first, but it'll never touch the second (msealed) vma.
+ */
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, size, PROT_EXEC);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+
+static void test_seal_mprotect_two_vma_with_gap(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use munmap to free two pages in the middle */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, because there is a gap in the address. */
+ /* notes, internally mprotect still updated the first page. */
+ ret = sys_mprotect(ptr, 4 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ /* the last page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal all 4 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect is sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_merge(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split one page. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal first two pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 2 pages are not sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_munmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 4 pages are sealed. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+/*
+ * allocate 4 pages,
+ * use mprotect to split it as two VMAs
+ * seal the whole range
+ * munmap will fail on both
+ */
+static void test_seal_munmap_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_munmap(ptr, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+/*
+ * allocate a VMA with 4 pages.
+ * munmap the middle 2 pages.
+ * seal the whole 4 pages, will fail.
+ * munmap the first page will be OK.
+ * munmap the last page will be OK.
+ */
+static void test_seal_munmap_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ /* can't have gap in the middle. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ }
+
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size * 2, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_munmap_partial_across_vmas(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 2 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_start_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap the first page. */
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap from the first page. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size * 3);
+ } else {
+ /* note: this will be OK, even the first page is */
+ /* already unmapped. */
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_end_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last page. */
+ ret = sys_munmap(ptr + page_size * 3, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap all pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_middle_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap 2 pages in the middle. */
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page. */
+ if (seal) {
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* munmap all 4 pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ } else {
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* shrink from 4 pages to 2 pages. */
+ ret2 = sys_mremap(ptr, size, 2 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == (void *) MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != (void *) MAP_FAILED);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* expand from 2 page to 4 pages. */
+ ret2 = sys_mremap(ptr, 2 * page_size, 4 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move(bool seal)
+{
+ void *ptr, *newPtr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newPtr);
+ FAIL_TEST_IF_FALSE(newPtr != (void *)-1);
+ ret = clean_single_address(newPtr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* move from ptr to fixed address. */
+ ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_overwrite_prot(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to change protection. */
+ ret2 = mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 4 pages. */
+ ret = sys_munmap(ptr + 8 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 8 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to expand. */
+ ret2 = mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to shrink. */
+ ret2 = mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_shrink_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and shrink to fixed address */
+ ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_expand_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and expand to fixed address */
+ ret2 = sys_mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move to fixed address */
+ ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_fixed_zero(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * MREMAP_FIXED can move the mapping to zero address
+ */
+ ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == 0);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_dontunmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move, and don't unmap src addr. */
+ ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ /* kernel will allocate a new address */
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_dontunmap_anyaddr(bool seal)
+{
+ void *ptr, *ptr2;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * The new address is any address that not allocated.
+ * use allocate/free to similate that.
+ */
+ setup_single_address(size, &ptr2);
+ FAIL_TEST_IF_FALSE(ptr2 != (void *)-1);
+ ret = sys_munmap(ptr2, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /*
+ * remap to any address.
+ */
+ ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ (void *) ptr2);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ /* remap success and return ptr2 */
+ FAIL_TEST_IF_FALSE(ret2 == ptr2);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_merge_and_split(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size;
+ int ret;
+ int prot;
+
+ /* (24 RO) */
+ setup_single_address(24 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect(NONE) to set out boundary */
+ /* (1 NONE) (22 RO) (1 NONE) */
+ ret = sys_mprotect(ptr, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 4);
+
+ /* use mseal to split from beginning */
+ /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 21 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* use mseal to split from the end. */
+ /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 22 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 22 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 20 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with prev. */
+ /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with after. */
+ /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 21 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 21 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* split and merge from prev */
+ /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 1 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ ret = sys_munmap(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* split and merge from next */
+ /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 20 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 20 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge from middle of prev and middle of next. */
+ /* (1 NONE) (22 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 20 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_rw(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect on RW memory. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_pkey(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int pkey;
+
+ SKIP_TEST_IF_FALSE(pkey_supported());
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+ FAIL_TEST_IF_FALSE(pkey > 0);
+
+ ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect if PKRU allow write. */
+ set_pkey(pkey, PKEY_UNRESTRICTED);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* sealing will take effect if PKRU deny write. */
+ set_pkey(pkey, PKEY_DISABLE_WRITE);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_filebacked(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int fd;
+ unsigned long mapflags = MAP_PRIVATE;
+
+ fd = memfd_create("test", 0);
+ FAIL_TEST_IF_FALSE(fd > 0);
+
+ ret = fallocate(fd, 0, 0, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for file backed mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+ close(fd);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_shared(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
+
+ ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for shared mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_across_vmas(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 2 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+
+static void test_seal_madvise_nodiscard(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * Test a random madvise flag like MADV_RANDOM that does not touch page
+ * contents (and thus should work for msealed VMAs). RANDOM also happens to
+ * share bits with other discard-ish flags like REMOVE.
+ */
+ ret = sys_madvise(ptr, size, MADV_RANDOM);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+int main(void)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ if (!pkey_supported())
+ ksft_print_msg("PKEY not supported\n");
+
+ ksft_set_plan(88);
+
+ test_seal_addseal();
+ test_seal_unmapped_start();
+ test_seal_unmapped_middle();
+ test_seal_unmapped_end();
+ test_seal_multiple_vmas();
+ test_seal_split_start();
+ test_seal_split_end();
+ test_seal_invalid_input();
+ test_seal_zero_length();
+ test_seal_twice();
+
+ test_seal_mprotect(false);
+ test_seal_mprotect(true);
+
+ test_seal_start_mprotect(false);
+ test_seal_start_mprotect(true);
+
+ test_seal_end_mprotect(false);
+ test_seal_end_mprotect(true);
+
+ test_seal_mprotect_unalign_len(false);
+ test_seal_mprotect_unalign_len(true);
+
+ test_seal_mprotect_unalign_len_variant_2(false);
+ test_seal_mprotect_unalign_len_variant_2(true);
+
+ test_seal_mprotect_two_vma(false);
+ test_seal_mprotect_two_vma(true);
+
+ test_seal_mprotect_two_vma_with_split(false);
+ test_seal_mprotect_two_vma_with_split(true);
+
+ test_seal_mprotect_partial_mprotect(false);
+ test_seal_mprotect_partial_mprotect(true);
+
+ test_seal_mprotect_two_vma_with_gap();
+ test_seal_mprotect_two_vma_with_gap();
+
+ test_seal_mprotect_merge(false);
+ test_seal_mprotect_merge(true);
+
+ test_seal_mprotect_split(false);
+ test_seal_mprotect_split(true);
+
+ test_seal_mprotect_partial_mprotect_tail(false);
+ test_seal_mprotect_partial_mprotect_tail(true);
+
+ test_seal_munmap(false);
+ test_seal_munmap(true);
+ test_seal_munmap_two_vma(false);
+ test_seal_munmap_two_vma(true);
+ test_seal_munmap_vma_with_gap(false);
+ test_seal_munmap_vma_with_gap(true);
+ test_seal_munmap_partial_across_vmas(false);
+ test_seal_munmap_partial_across_vmas(true);
+
+ test_munmap_start_freed(false);
+ test_munmap_start_freed(true);
+ test_munmap_middle_freed(false);
+ test_munmap_middle_freed(true);
+ test_munmap_end_freed(false);
+ test_munmap_end_freed(true);
+
+ test_seal_mremap_shrink(false);
+ test_seal_mremap_shrink(true);
+ test_seal_mremap_expand(false);
+ test_seal_mremap_expand(true);
+ test_seal_mremap_move(false);
+ test_seal_mremap_move(true);
+
+ test_seal_mremap_shrink_fixed(false);
+ test_seal_mremap_shrink_fixed(true);
+ test_seal_mremap_expand_fixed(false);
+ test_seal_mremap_expand_fixed(true);
+ test_seal_mremap_move_fixed(false);
+ test_seal_mremap_move_fixed(true);
+ test_seal_mremap_move_dontunmap(false);
+ test_seal_mremap_move_dontunmap(true);
+ test_seal_mremap_move_fixed_zero(false);
+ test_seal_mremap_move_fixed_zero(true);
+ test_seal_mremap_move_dontunmap_anyaddr(false);
+ test_seal_mremap_move_dontunmap_anyaddr(true);
+ test_seal_madvise_nodiscard(false);
+ test_seal_madvise_nodiscard(true);
+ test_seal_discard_ro_anon(false);
+ test_seal_discard_ro_anon(true);
+ test_seal_discard_across_vmas(false);
+ test_seal_discard_across_vmas(true);
+ test_seal_discard_ro_anon_on_rw(false);
+ test_seal_discard_ro_anon_on_rw(true);
+ test_seal_discard_ro_anon_on_shared(false);
+ test_seal_discard_ro_anon_on_shared(true);
+ test_seal_discard_ro_anon_on_filebacked(false);
+ test_seal_discard_ro_anon_on_filebacked(true);
+ test_seal_mmap_overwrite_prot(false);
+ test_seal_mmap_overwrite_prot(true);
+ test_seal_mmap_expand(false);
+ test_seal_mmap_expand(true);
+ test_seal_mmap_shrink(false);
+ test_seal_mmap_shrink(true);
+
+ test_seal_merge_and_split();
+ test_seal_zero_address();
+
+ test_seal_discard_ro_anon_on_pkey(false);
+ test_seal_discard_ro_anon_on_pkey(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/on-fault-limit.c b/tools/testing/selftests/mm/on-fault-limit.c
new file mode 100644
index 000000000000..fc4117453c84
--- /dev/null
+++ b/tools/testing/selftests/mm/on-fault-limit.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include "kselftest.h"
+
+static void test_limit(void)
+{
+ struct rlimit lims;
+ void *map;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &lims))
+ ksft_exit_fail_msg("getrlimit: %s\n", strerror(errno));
+
+ if (mlockall(MCL_ONFAULT | MCL_FUTURE))
+ ksft_exit_fail_msg("mlockall: %s\n", strerror(errno));
+
+ map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+
+ ksft_test_result(map == MAP_FAILED, "The map failed respecting mlock limits\n");
+
+ if (map != MAP_FAILED)
+ munmap(map, 2 * lims.rlim_max);
+ munlockall();
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (!getuid())
+ ksft_test_result_skip("The test must be run from a normal user\n");
+ else
+ test_limit();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile
new file mode 100644
index 000000000000..8c8bb39ffa28
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/Makefile
@@ -0,0 +1,18 @@
+PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = page_frag_test.ko
+
+obj-m += page_frag_test.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c
new file mode 100644
index 000000000000..e806c1866e36
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test module for page_frag cache
+ *
+ * Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
+ */
+
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/completion.h>
+#include <linux/ptr_ring.h>
+#include <linux/kthread.h>
+#include <linux/page_frag_cache.h>
+
+#define TEST_FAILED_PREFIX "page_frag_test failed: "
+
+static struct ptr_ring ptr_ring;
+static int nr_objs = 512;
+static atomic_t nthreads;
+static struct completion wait;
+static struct page_frag_cache test_nc;
+static int test_popped;
+static int test_pushed;
+static bool force_exit;
+
+static int nr_test = 2000000;
+module_param(nr_test, int, 0);
+MODULE_PARM_DESC(nr_test, "number of iterations to test");
+
+static bool test_align;
+module_param(test_align, bool, 0);
+MODULE_PARM_DESC(test_align, "use align API for testing");
+
+static int test_alloc_len = 2048;
+module_param(test_alloc_len, int, 0);
+MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
+
+static int test_push_cpu;
+module_param(test_push_cpu, int, 0);
+MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
+
+static int test_pop_cpu;
+module_param(test_pop_cpu, int, 0);
+MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
+
+static int page_frag_pop_thread(void *arg)
+{
+ struct ptr_ring *ring = arg;
+
+ pr_info("page_frag pop test thread begins on cpu %d\n",
+ smp_processor_id());
+
+ while (test_popped < nr_test) {
+ void *obj = __ptr_ring_consume(ring);
+
+ if (obj) {
+ test_popped++;
+ page_frag_free(obj);
+ } else {
+ if (force_exit)
+ break;
+
+ cond_resched();
+ }
+ }
+
+ if (atomic_dec_and_test(&nthreads))
+ complete(&wait);
+
+ pr_info("page_frag pop test thread exits on cpu %d\n",
+ smp_processor_id());
+
+ return 0;
+}
+
+static int page_frag_push_thread(void *arg)
+{
+ struct ptr_ring *ring = arg;
+
+ pr_info("page_frag push test thread begins on cpu %d\n",
+ smp_processor_id());
+
+ while (test_pushed < nr_test && !force_exit) {
+ void *va;
+ int ret;
+
+ if (test_align) {
+ va = page_frag_alloc_align(&test_nc, test_alloc_len,
+ GFP_KERNEL, SMP_CACHE_BYTES);
+
+ if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) {
+ force_exit = true;
+ WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n");
+ }
+ } else {
+ va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL);
+ }
+
+ if (!va)
+ continue;
+
+ ret = __ptr_ring_produce(ring, va);
+ if (ret) {
+ page_frag_free(va);
+ cond_resched();
+ } else {
+ test_pushed++;
+ }
+ }
+
+ pr_info("page_frag push test thread exits on cpu %d\n",
+ smp_processor_id());
+
+ if (atomic_dec_and_test(&nthreads))
+ complete(&wait);
+
+ return 0;
+}
+
+static int __init page_frag_test_init(void)
+{
+ struct task_struct *tsk_push, *tsk_pop;
+ int last_pushed = 0, last_popped = 0;
+ ktime_t start;
+ u64 duration;
+ int ret;
+
+ page_frag_cache_init(&test_nc);
+ atomic_set(&nthreads, 2);
+ init_completion(&wait);
+
+ if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 ||
+ !cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu))
+ return -EINVAL;
+
+ ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring,
+ test_push_cpu, "page_frag_push");
+ if (IS_ERR(tsk_push))
+ return PTR_ERR(tsk_push);
+
+ tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring,
+ test_pop_cpu, "page_frag_pop");
+ if (IS_ERR(tsk_pop)) {
+ kthread_stop(tsk_push);
+ return PTR_ERR(tsk_pop);
+ }
+
+ start = ktime_get();
+ wake_up_process(tsk_push);
+ wake_up_process(tsk_pop);
+
+ pr_info("waiting for test to complete\n");
+
+ while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) {
+ /* exit if there is no progress for push or pop size */
+ if (last_pushed == test_pushed || last_popped == test_popped) {
+ WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n");
+ force_exit = true;
+ continue;
+ }
+
+ last_pushed = test_pushed;
+ last_popped = test_popped;
+ pr_info("page_frag_test progress: pushed = %d, popped = %d\n",
+ test_pushed, test_popped);
+ }
+
+ if (force_exit) {
+ pr_err(TEST_FAILED_PREFIX "exit with error\n");
+ goto out;
+ }
+
+ duration = (u64)ktime_us_delta(ktime_get(), start);
+ pr_info("%d of iterations for %s testing took: %lluus\n", nr_test,
+ test_align ? "aligned" : "non-aligned", duration);
+
+out:
+ ptr_ring_cleanup(&ptr_ring, NULL);
+ page_frag_cache_drain(&test_nc);
+
+ return -EAGAIN;
+}
+
+static void __exit page_frag_test_exit(void)
+{
+}
+
+module_init(page_frag_test_init);
+module_exit(page_frag_test_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>");
+MODULE_DESCRIPTION("Test module for page_frag");
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
new file mode 100644
index 000000000000..2cb5441f29c7
--- /dev/null
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -0,0 +1,1738 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <malloc.h>
+#include "vm_util.h"
+#include "kselftest.h"
+#include <linux/types.h>
+#include <linux/memfd.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <asm/unistd.h>
+#include <pthread.h>
+#include <sys/resource.h>
+#include <assert.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
+ PAGE_IS_FILE | PAGE_IS_PRESENT | \
+ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
+ PAGE_IS_HUGE)
+#define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_WPALLOWED | PAGE_IS_FILE | \
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | \
+ PAGE_IS_PFNZERO | PAGE_IS_HUGE)
+
+#define TEST_ITERATIONS 100
+#define PAGEMAP "/proc/self/pagemap"
+int pagemap_fd;
+int uffd;
+size_t page_size;
+size_t hpage_size;
+const char *progname;
+
+#define LEN(region) ((region.end - region.start)/page_size)
+
+static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask, long *walk_end)
+{
+ struct pm_scan_arg arg;
+ int ret;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+
+ if (walk_end)
+ *walk_end = arg.walk_end;
+
+ return ret;
+}
+
+
+int init_uffd(void)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (uffd == -1)
+ return uffd;
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ return -1;
+
+ if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+ return -1;
+
+ return 0;
+}
+
+int wp_init(void *lpBaseAddress, long dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_writeprotect wp;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno));
+
+ if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT))
+ ksft_exit_fail_msg("ioctl set is incorrect\n");
+
+ wp.range.start = (unsigned long)lpBaseAddress;
+ wp.range.len = dwRegionSize;
+ wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp))
+ ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n");
+
+ return 0;
+}
+
+int wp_free(void *lpBaseAddress, long dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+ ksft_exit_fail_msg("ioctl unregister failure\n");
+ return 0;
+}
+
+int wp_addr_range(void *lpBaseAddress, int dwRegionSize)
+{
+ if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno));
+
+ return 0;
+}
+
+void *gethugetlb_mem(int size, int *shmid)
+{
+ char *mem;
+
+ if (shmid) {
+ *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (*shmid < 0)
+ return NULL;
+
+ mem = shmat(*shmid, 0, 0);
+ if (mem == (char *)-1) {
+ shmctl(*shmid, IPC_RMID, NULL);
+ ksft_exit_fail_msg("Shared memory attach failure\n");
+ }
+ } else {
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ return NULL;
+ }
+
+ return mem;
+}
+
+int userfaultfd_tests(void)
+{
+ long mem_size, vec_size, written, num_pages = 16;
+ char *mem, *vec;
+
+ mem_size = num_pages * page_size;
+ mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+
+ /* Change protection of pages differently */
+ mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE);
+ mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ);
+
+ wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8));
+ wp_addr_range(mem, mem_size);
+
+ vec_size = mem_size/page_size;
+ vec = calloc(vec_size, sizeof(struct page_region));
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ free(vec);
+ return 0;
+}
+
+int get_reads(struct page_region *vec, int vec_size)
+{
+ int i, sum = 0;
+
+ for (i = 0; i < vec_size; i++)
+ sum += LEN(vec[i]);
+
+ return sum;
+}
+
+int sanity_tests_sd(void)
+{
+ unsigned long long mem_size, vec_size, i, total_pages = 0;
+ long ret, ret2, ret3;
+ int num_pages = 1000;
+ int total_writes, total_reads, reads, count;
+ struct page_region *vec, *vec2;
+ char *mem, *m[2];
+ long walk_end;
+
+ vec_size = num_pages/2;
+ mem_size = num_pages * page_size;
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ vec2 = calloc(vec_size, sizeof(struct page_region));
+ if (!vec2)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ /* 1. wrong operation */
+ ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s Zero range size is valid\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0,
+ "%s output buffer must be specified with size\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s wrong flag specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s flag has extra bits specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0,
+ "%s no selection mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0,
+ "%s no return mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0,
+ "%s wrong return mask specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0,
+ "%s mixture of correct and wrong flag\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0,
+ "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n",
+ __func__);
+
+ /* 2. Clear area with larger vec size */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__);
+
+ /* 3. Repeated pattern of written and non-written pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
+ 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2),
+ "%s Repeated pattern of written and non-written pages\n", __func__);
+
+ /* 4. Repeated pattern of written and non-written pages in parts */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
+
+ ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret3 < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno));
+
+ ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
+ "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n",
+ __func__, ret, ret3, ret2);
+
+ /* 5. Repeated pattern of written and non-written pages max_pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+ mem[(mem_size/page_size - 1) * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == num_pages/2 && ret2 == 1,
+ "%s Repeated pattern of written and non-written pages max_pages\n",
+ __func__);
+
+ /* 6. only get 2 dirty pages and clear them as well */
+ vec_size = mem_size/page_size;
+ memset(mem, -1, mem_size);
+
+ /* get and clear second and third pages */
+ ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
+ vec[0].start == (uintptr_t)(mem + page_size) &&
+ ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem &&
+ LEN(vec2[1]) == vec_size - 3 &&
+ vec2[1].start == (uintptr_t)(mem + 3 * page_size),
+ "%s only get 2 written pages and clear them as well\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 7. Two regions */
+ m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[0] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[1] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(m[0], mem_size);
+ wp_init(m[1], mem_size);
+ wp_addr_range(m[0], mem_size);
+ wp_addr_range(m[1], mem_size);
+
+ memset(m[0], 'a', mem_size);
+ memset(m[1], 'b', mem_size);
+
+ wp_addr_range(m[0], mem_size);
+
+ ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s Two regions\n", __func__);
+
+ wp_free(m[0], mem_size);
+ wp_free(m[1], mem_size);
+ munmap(m[0], mem_size);
+ munmap(m[1], mem_size);
+
+ free(vec);
+ free(vec2);
+
+ /* 8. Smaller vec */
+ mem_size = 1050 * page_size;
+ vec_size = mem_size/(page_size*2);
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ for (i = 0; i < mem_size/page_size; i += 2)
+ mem[i * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__);
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ total_pages = 0;
+
+ /* 9. Smaller vec */
+ mem_size = 10000 * page_size;
+ vec_size = 50;
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ for (count = 0; count < TEST_ITERATIONS; count++) {
+ total_writes = total_reads = 0;
+ walk_end = (long)mem;
+
+ for (i = 0; i < mem_size; i += page_size) {
+ if (rand() % 2) {
+ mem[i]++;
+ total_writes++;
+ }
+ }
+
+ while (total_reads < total_writes) {
+ ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec,
+ vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ if ((unsigned long)ret > vec_size)
+ break;
+
+ reads = get_reads(vec, ret);
+ total_reads += reads;
+ }
+
+ if (total_reads != total_writes)
+ break;
+ }
+
+ ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 10. Walk_end tester */
+ vec_size = 1000;
+ mem_size = vec_size * page_size;
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end address\n");
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with WP\n");
+
+ ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with 0 output buffer\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
+ "Walk_end: Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
+ "Walk_end: 1 max page\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: max pages\n");
+
+ wp_addr_range(mem, mem_size);
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_endsparse : Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end: 1 max page\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ return 0;
+}
+
+int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip)
+{
+ unsigned long long vec_size;
+ int written;
+ struct page_region *vec, *vec2;
+
+ if (skip) {
+ ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages dirty other than first and the last one\n",
+ prefix);
+ ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+ ksft_test_result_skip("%s only middle page dirty\n", prefix);
+ ksft_test_result_skip("%s only two middle pages dirty\n", prefix);
+ return 0;
+ }
+
+ vec_size = mem_size/page_size;
+ vec = calloc(vec_size, sizeof(struct page_region));
+ vec2 = calloc(vec_size, sizeof(struct page_region));
+
+ /* 1. all new pages must be not be written (dirty) */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix);
+
+ /* 2. all pages must be written */
+ memset(mem, -1, mem_size);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s all pages must be written (dirty)\n", prefix);
+
+ /* 3. all pages dirty other than first and the last one */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ memset(mem + page_size, 0, mem_size - (2 * page_size));
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size,
+ "%s all pages dirty other than first and the last one\n", prefix);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0,
+ "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+
+ /* 4. only middle page dirty */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+
+ written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= 1,
+ "%s only middle page dirty\n", prefix);
+
+ /* 5. only two middle pages dirty and walk over only middle pages */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+ mem[(vec_size/2 + 1) * page_size]++;
+
+ written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size])
+ && LEN(vec[0]) == 2,
+ "%s only two middle pages dirty\n", prefix);
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+void *gethugepage(int map_size)
+{
+ int ret;
+ char *map;
+
+ map = memalign(hpage_size, map_size);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno));
+
+ ret = madvise(map, map_size, MADV_HUGEPAGE);
+ if (ret)
+ return NULL;
+
+ memset(map, 0, map_size);
+
+ return map;
+}
+
+int hpage_unit_tests(void)
+{
+ char *map;
+ int ret, ret2;
+ size_t num_pages = 10;
+ unsigned long long map_size = hpage_size * num_pages;
+ unsigned long long vec_size = map_size/page_size;
+ struct page_region *vec, *vec2;
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ vec2 = calloc(vec_size, sizeof(struct page_region));
+ if (!vec || !vec2)
+ ksft_exit_fail_msg("malloc failed\n");
+
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ /* 1. all new huge page must not be written (dirty) */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n",
+ __func__);
+
+ /* 2. all the huge page must not be written */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__);
+
+ /* 3. all the huge page must be written and clear dirty as well */
+ memset(map, -1, map_size);
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map &&
+ LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN,
+ "%s all the huge page must be written and clear\n", __func__);
+
+ /* 4. only middle page written */
+ wp_free(map, map_size);
+ free(map);
+ map = gethugepage(map_size);
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+ map[vec_size/2 * page_size]++;
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) > 0,
+ "%s only middle page written\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s all new huge page must be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must not be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__);
+ ksft_test_result_skip("%s only middle page written\n", __func__);
+ }
+
+ /* 5. clear first half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ wp_addr_range(map, map_size/2);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page\n", __func__);
+ }
+
+ /* 6. clear first half of huge page with limited buffer */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page with limited buffer\n",
+ __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page with limited buffer\n",
+ __func__);
+ }
+
+ /* 7. clear second half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+
+ ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2,
+ "%s clear second half huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear second half huge page\n", __func__);
+ }
+
+ /* 8. get half huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+ usleep(100);
+
+ ret = pagemap_ioctl(map, map_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size),
+ "%s get half huge page\n", __func__);
+
+ ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size,
+ "%s get half huge page\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ }
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+int unmapped_region_tests(void)
+{
+ void *start = (void *)0x10000000;
+ int written, len = 0x00040000;
+ long vec_size = len / page_size;
+ struct page_region *vec = calloc(vec_size, sizeof(struct page_region));
+
+ /* 1. Get written pages */
+ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0,
+ PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written >= 0, "%s Get status of pages\n", __func__);
+
+ free(vec);
+ return 0;
+}
+
+static void test_simple(void)
+{
+ int i;
+ char *map;
+ struct page_region vec;
+
+ map = aligned_alloc(page_size, page_size);
+ if (!map)
+ ksft_exit_fail_msg("aligned_alloc failed\n");
+
+ wp_init(map, page_size);
+ wp_addr_range(map, page_size);
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) {
+ ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ /* Write something to the page to get the written bit enabled on the page */
+ map[0]++;
+
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) {
+ ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ }
+ wp_free(map, page_size);
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+int sanity_tests(void)
+{
+ unsigned long long mem_size, vec_size;
+ long ret, fd, i, buf_size;
+ struct page_region *vec;
+ char *mem, *fmem;
+ struct stat sbuf;
+ char *tmp_buf;
+
+ /* 1. wrong operation */
+ mem_size = 10 * page_size;
+ vec_size = mem_size / page_size;
+
+ vec = calloc(vec_size, sizeof(struct page_region));
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED || vec == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s anyof_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0,
+ "%s excluded_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0,
+ PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask and anyof_mask specified\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 2. Get sd and present pages with anyof_mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with anyof_mask\n", __func__);
+
+ /* 3. Get sd and present pages with required_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get all the pages with required_mask\n", __func__);
+
+ /* 4. Get sd and present pages with required_mask and anyof_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with required_mask and anyof_mask\n",
+ __func__);
+
+ /* 5. Don't get sd pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__);
+
+ /* 6. Don't get present pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 8. Find written present pages with return mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ vec[0].categories == PAGE_IS_WRITTEN,
+ "%s Find written present pages with return mask\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 9. Memory mapped file */
+ fd = open(progname, O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
+
+ ret = stat(progname, &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ tmp_buf = malloc(sbuf.st_size);
+ memcpy(tmp_buf, fmem, sbuf.st_size);
+
+ ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) &&
+ (vec[0].categories & PAGE_IS_FILE),
+ "%s Memory mapped file\n", __func__);
+
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 10. Create and read/write to a memory mapped file */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666);
+ if (fd < 0)
+ ksft_exit_fail_msg("Read/write to memory: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ for (i = 0; i < buf_size; i++)
+ fmem[i] = 'z';
+
+ msync(fmem, buf_size, MS_SYNC);
+
+ ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0,
+ PAGEMAP_BITS_ALL);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == (buf_size/page_size) &&
+ (vec[0].categories & PAGE_IS_WRITTEN),
+ "%s Read/write to memory\n", __func__);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ free(vec);
+ return 0;
+}
+
+int mprotect_tests(void)
+{
+ int ret;
+ char *mem, *mem2;
+ struct page_region vec;
+ int pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (pagemap_fd < 0) {
+ fprintf(stderr, "open() failed\n");
+ exit(1);
+ }
+
+ /* 1. Map two pages */
+ mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, 2 * page_size);
+ wp_addr_range(mem, 2 * page_size);
+
+ /* Populate both pages. */
+ memset(mem, 1, 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__);
+
+ /* 2. Start tracking */
+ wp_addr_range(mem, 2 * page_size);
+
+ ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0,
+ "%s Both pages are not written (dirty)\n", __func__);
+
+ /* 3. Remap the second page */
+ mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
+ if (mem2 == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem2, page_size);
+ wp_addr_range(mem2, page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, 2 * page_size, PROT_READ);
+ mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE);
+
+ /* Modify both pages. */
+ memset(mem, 2, 2 * page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, page_size, PROT_READ);
+ mprotect(mem, page_size, PROT_READ|PROT_WRITE);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Both pages written after remap and mprotect\n", __func__);
+
+ /* 4. Clear and make the pages written */
+ wp_addr_range(mem, 2 * page_size);
+
+ memset(mem, 'A', 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Clear and make the pages written\n", __func__);
+
+ wp_free(mem, 2 * page_size);
+ munmap(mem, 2 * page_size);
+ return 0;
+}
+
+/* transact test */
+static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8;
+static pthread_barrier_t start_barrier, end_barrier;
+static unsigned int extra_thread_faults;
+static unsigned int iter_count = 1000;
+static volatile int finish;
+
+static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
+ int reset, int page_size)
+{
+ struct pm_scan_arg arg = {0};
+ struct page_region rgns[256];
+ unsigned long long i, j;
+ long ret;
+ int cnt;
+
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.start = (uintptr_t)mem;
+ arg.max_pages = count;
+ arg.end = (uintptr_t)(mem + count * page_size);
+ arg.vec = (uintptr_t)rgns;
+ arg.vec_len = sizeof(rgns) / sizeof(*rgns);
+ if (reset)
+ arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC;
+ arg.category_mask = PAGE_IS_WRITTEN;
+ arg.return_mask = PAGE_IS_WRITTEN;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+ if (ret < 0)
+ ksft_exit_fail_msg("ioctl failed\n");
+
+ cnt = 0;
+ for (i = 0; i < (unsigned long)ret; ++i) {
+ if (rgns[i].categories != PAGE_IS_WRITTEN)
+ ksft_exit_fail_msg("wrong flags\n");
+
+ for (j = 0; j < LEN(rgns[i]); ++j)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+void *thread_proc(void *mem)
+{
+ int *m = mem;
+ long curr_faults, faults;
+ struct rusage r;
+ unsigned int i;
+ int ret;
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ curr_faults = r.ru_minflt;
+
+ while (!finish) {
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ for (i = 0; i < access_per_thread; ++i)
+ __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ faults = r.ru_minflt - curr_faults;
+ if (faults < access_per_thread)
+ ksft_exit_fail_msg("faults < access_per_thread");
+
+ __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread,
+ __ATOMIC_SEQ_CST);
+ curr_faults = r.ru_minflt;
+ }
+
+ return NULL;
+}
+
+static void transact_test(int page_size)
+{
+ unsigned int i, count, extra_pages;
+ unsigned int c;
+ pthread_t th;
+ char *mem;
+ int ret;
+
+ if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno));
+
+ wp_init(mem, 0x1000 * nthreads * pages_per_thread);
+ wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread);
+
+ memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count > 0, "%s count %u\n", __func__, count);
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count == 0, "%s count %u\n", __func__, count);
+
+ finish = 0;
+ for (i = 0; i < nthreads; ++i)
+ pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread);
+
+ extra_pages = 0;
+ for (i = 0; i < iter_count; ++i) {
+ count = 0;
+
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1,
+ page_size);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (count > nthreads * access_per_thread)
+ ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n",
+ count, nthreads * access_per_thread, i);
+
+ c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ count += c;
+
+ if (c > nthreads * access_per_thread) {
+ ksft_test_result_fail(" %s count > nthreads\n", __func__);
+ return;
+ }
+
+ if (count != nthreads * access_per_thread) {
+ /*
+ * The purpose of the test is to make sure that no page updates are lost
+ * when the page updates and read-resetting soft dirty flags are performed
+ * in parallel. However, it is possible that the application will get the
+ * soft dirty flags twice on the two consecutive read-resets. This seems
+ * unavoidable as soft dirty flag is handled in software through page faults
+ * in kernel. While the updating the flags is supposed to be synchronized
+ * between page fault handling and read-reset, it is possible that
+ * read-reset happens after page fault PTE update but before the application
+ * re-executes write instruction. So read-reset gets the flag, clears write
+ * access and application gets page fault again for the same write.
+ */
+ if (count < nthreads * access_per_thread) {
+ ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count,
+ nthreads * access_per_thread);
+ return;
+ }
+
+ extra_pages += count - nthreads * access_per_thread;
+ }
+ }
+
+ pthread_barrier_wait(&start_barrier);
+ finish = 1;
+ pthread_barrier_wait(&end_barrier);
+
+ ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__,
+ extra_pages,
+ 100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
+ extra_thread_faults);
+}
+
+void zeropfn_tests(void)
+{
+ unsigned long long mem_size;
+ struct page_region vec;
+ int i, ret;
+ char *mmap_mem, *mem;
+
+ /* Test with normal memory */
+ mem_size = 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ /* Touch each page to ensure it's mapped */
+ for (i = 0; i < mem_size; i += page_size)
+ (void)((volatile char *)mem)[i];
+
+ ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0,
+ (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size),
+ "%s all pages must have PFNZERO set\n", __func__);
+
+ munmap(mem, mem_size);
+
+ /* Test with huge page if user_zero_page is set to 1 */
+ if (!detect_huge_zeropage()) {
+ ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__);
+ return;
+ }
+
+ mem_size = 2 * hpage_size;
+ mmap_mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + hpage_size) & ~(hpage_size - 1));
+
+ ret = madvise(mem, hpage_size, MADV_HUGEPAGE);
+ if (!ret) {
+ FORCE_READ(*mem);
+
+ ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0,
+ 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == (hpage_size / page_size),
+ "%s all huge pages must have PFNZERO set\n", __func__);
+ } else {
+ ksft_test_result_skip("%s huge page not supported\n", __func__);
+ }
+
+ munmap(mmap_mem, mem_size);
+}
+
+int main(int __attribute__((unused)) argc, char *argv[])
+{
+ int shmid, buf_size, fd, i, ret;
+ unsigned long long mem_size;
+ char *mem, *map, *fmem;
+ struct stat sbuf;
+
+ progname = argv[0];
+
+ ksft_print_header();
+
+ if (init_uffd())
+ ksft_exit_pass();
+
+ ksft_set_plan(117);
+
+ page_size = getpagesize();
+ hpage_size = read_pmd_pagesize();
+
+ pagemap_fd = open(PAGEMAP, O_RDONLY);
+ if (pagemap_fd < 0)
+ return -EINVAL;
+
+ /* 1. Sanity testing */
+ sanity_tests_sd();
+
+ /* 2. Normal page testing */
+ mem_size = 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 3. Large page testing */
+ mem_size = 512 * 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Large Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 4. Huge page testing */
+ map = gethugepage(hpage_size);
+ if (map) {
+ wp_init(map, hpage_size);
+ wp_addr_range(map, hpage_size);
+ base_tests("Huge page testing:", map, hpage_size, 0);
+ wp_free(map, hpage_size);
+ free(map);
+ } else {
+ base_tests("Huge page testing:", NULL, 0, 1);
+ }
+
+ /* 5. SHM Hugetlb page testing */
+ mem_size = 2*1024*1024;
+ mem = gethugetlb_mem(mem_size, &shmid);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+
+ /* 6. Hugetlb page testing */
+ mem = gethugetlb_mem(mem_size, NULL);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb mem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ } else {
+ base_tests("Hugetlb mem testing:", NULL, 0, 1);
+ }
+
+ /* 7. File Hugetlb testing */
+ mem_size = 2*1024*1024;
+ fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
+ if (fd < 0)
+ ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno));
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (mem != MAP_FAILED) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+ close(fd);
+
+ /* 8. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ ret = stat(__FILE__".tmp0", &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, sbuf.st_size);
+ wp_addr_range(fmem, sbuf.st_size);
+
+ base_tests("File memory testing:", fmem, sbuf.st_size, 0);
+
+ wp_free(fmem, sbuf.st_size);
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 9. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ if (ftruncate(fd, buf_size))
+ ksft_exit_fail_msg("Error ftruncate\n");
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ base_tests("File anonymous memory testing:", fmem, buf_size, 0);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ /* 10. Huge page tests */
+ hpage_unit_tests();
+
+ /* 11. Iterative test */
+ test_simple();
+
+ /* 12. Mprotect test */
+ mprotect_tests();
+
+ /* 13. Transact test */
+ transact_test(page_size);
+
+ /* 14. Sanity testing */
+ sanity_tests();
+
+ /*15. Unmapped address test */
+ unmapped_region_tests();
+
+ /* 16. Userfaultfd tests */
+ userfaultfd_tests();
+
+ /* 17. ZEROPFN tests */
+ zeropfn_tests();
+
+ close(pagemap_fd);
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
new file mode 100644
index 000000000000..f546dfb10cae
--- /dev/null
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Basic VM_PFNMAP tests relying on mmap() of input file provided.
+ * Use '/dev/mem' as default.
+ *
+ * Copyright 2025, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "kselftest_harness.h"
+#include "vm_util.h"
+
+static sigjmp_buf sigjmp_buf_env;
+static char *file = "/dev/mem";
+
+static void signal_handler(int sig)
+{
+ siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+
+static int test_read_access(char *addr, size_t size, size_t pagesize)
+{
+ size_t offs;
+ int ret;
+
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR)
+ return -EINVAL;
+
+ ret = sigsetjmp(sigjmp_buf_env, 1);
+ if (!ret) {
+ for (offs = 0; offs < size; offs += pagesize)
+ /* Force a read that the compiler cannot optimize out. */
+ *((volatile char *)(addr + offs));
+ }
+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
+ return -EINVAL;
+
+ return ret;
+}
+
+static int find_ram_target(off_t *offset,
+ unsigned long long pagesize)
+{
+ unsigned long long start, end;
+ char line[80], *end_ptr;
+ FILE *file;
+
+ /* Search /proc/iomem for the first suitable "System RAM" range. */
+ file = fopen("/proc/iomem", "r");
+ if (!file)
+ return -errno;
+
+ while (fgets(line, sizeof(line), file)) {
+ /* Ignore any child nodes. */
+ if (!isalnum(line[0]))
+ continue;
+
+ if (!strstr(line, "System RAM\n"))
+ continue;
+
+ start = strtoull(line, &end_ptr, 16);
+ /* Skip over the "-" */
+ end_ptr++;
+ /* Make end "exclusive". */
+ end = strtoull(end_ptr, NULL, 16) + 1;
+
+ /* Actual addresses are not exported */
+ if (!start && !end)
+ break;
+
+ /* We need full pages. */
+ start = (start + pagesize - 1) & ~(pagesize - 1);
+ end &= ~(pagesize - 1);
+
+ if (start != (off_t)start)
+ break;
+
+ /* We need two pages. */
+ if (end > start + 2 * pagesize) {
+ fclose(file);
+ *offset = start;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+FIXTURE(pfnmap)
+{
+ off_t offset;
+ size_t pagesize;
+ int dev_mem_fd;
+ char *addr1;
+ size_t size1;
+ char *addr2;
+ size_t size2;
+};
+
+FIXTURE_SETUP(pfnmap)
+{
+ self->pagesize = getpagesize();
+
+ if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) {
+ /* We'll require two physical pages throughout our tests ... */
+ if (find_ram_target(&self->offset, self->pagesize))
+ SKIP(return,
+ "Cannot find ram target in '/proc/iomem'\n");
+ } else {
+ self->offset = 0;
+ }
+
+ self->dev_mem_fd = open(file, O_RDONLY);
+ if (self->dev_mem_fd < 0)
+ SKIP(return, "Cannot open '%s'\n", file);
+
+ self->size1 = self->pagesize * 2;
+ self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, self->offset);
+ if (self->addr1 == MAP_FAILED)
+ SKIP(return, "Cannot mmap '%s'\n", file);
+
+ if (!check_vmflag_pfnmap(self->addr1))
+ SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file);
+
+ /* ... and want to be able to read from them. */
+ if (test_read_access(self->addr1, self->size1, self->pagesize))
+ SKIP(return, "Cannot read-access mmap'ed '%s'\n", file);
+
+ self->size2 = 0;
+ self->addr2 = MAP_FAILED;
+}
+
+FIXTURE_TEARDOWN(pfnmap)
+{
+ if (self->addr2 != MAP_FAILED)
+ munmap(self->addr2, self->size2);
+ if (self->addr1 != MAP_FAILED)
+ munmap(self->addr1, self->size1);
+ if (self->dev_mem_fd >= 0)
+ close(self->dev_mem_fd);
+}
+
+TEST_F(pfnmap, madvise_disallowed)
+{
+ int advices[] = {
+ MADV_DONTNEED,
+ MADV_DONTNEED_LOCKED,
+ MADV_FREE,
+ MADV_WIPEONFORK,
+ MADV_COLD,
+ MADV_PAGEOUT,
+ MADV_POPULATE_READ,
+ MADV_POPULATE_WRITE,
+ };
+ int i;
+
+ /* All these advices must be rejected. */
+ for (i = 0; i < ARRAY_SIZE(advices); i++) {
+ EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(pfnmap, munmap_split)
+{
+ /*
+ * Unmap the first page. This munmap() call is not really expected to
+ * fail, but we might be able to trigger other internal issues.
+ */
+ ASSERT_EQ(munmap(self->addr1, self->pagesize), 0);
+
+ /*
+ * Remap the first page while the second page is still mapped. This
+ * makes sure that any PAT tracking on x86 will allow for mmap()'ing
+ * a page again while some parts of the first mmap() are still
+ * around.
+ */
+ self->size2 = self->pagesize;
+ self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, self->offset);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_fixed)
+{
+ char *ret;
+
+ /* Reserve a destination area. */
+ self->size2 = self->size1;
+ self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE,
+ -1, 0);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+
+ /* mremap() over our destination. */
+ ret = mremap(self->addr1, self->size1, self->size2,
+ MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_shrink)
+{
+ char *ret;
+
+ /* Shrinking is expected to work. */
+ ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_expand)
+{
+ /*
+ * Growing is not expected to work, and getting it right would
+ * be challenging. So this test primarily serves as an early warning
+ * that something that probably should never work suddenly works.
+ */
+ self->size2 = self->size1 + self->pagesize;
+ self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE);
+ ASSERT_EQ(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, fork)
+{
+ pid_t pid;
+ int ret;
+
+ /* fork() a child and test if the child can access the pages. */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ EXPECT_EQ(test_read_access(self->addr1, self->size1,
+ self->pagesize), 0);
+ exit(0);
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+int main(int argc, char **argv)
+{
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--") == 0) {
+ if (i + 1 < argc && strlen(argv[i + 1]) > 0)
+ file = argv[i + 1];
+ return test_harness_run(i, argv);
+ }
+ }
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
new file mode 100644
index 000000000000..8e9685e03c44
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _PKEYS_ARM64_H
+#define _PKEYS_ARM64_H
+
+#include "vm_util.h"
+/* for signal frame parsing */
+#include "../arm64/signal/testcases/testcases.h"
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 288
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 289
+# define SYS_pkey_free 290
+#endif
+#define MCONTEXT_IP(mc) mc.pc
+#define MCONTEXT_TRAPNO(mc) -1
+
+#define PKEY_MASK 0xf
+
+#define POE_NONE 0x0
+#define POE_X 0x2
+#define POE_RX 0x3
+#define POE_RWX 0x7
+
+#define NR_PKEYS 8
+#define NR_RESERVED_PKEYS 1 /* pkey-0 */
+
+#define PKEY_REG_ALLOW_ALL 0x77777777
+#define PKEY_REG_ALLOW_NONE 0x0
+
+#define PKEY_BITS_PER_PKEY 4
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#undef HPAGE_SIZE
+#define HPAGE_SIZE default_huge_page_size()
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+static inline u64 __read_pkey_reg(void)
+{
+ u64 pkey_reg = 0;
+
+ // POR_EL0
+ asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ u64 por = pkey_reg;
+
+ dprintf4("%s() changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ // POR_EL0
+ asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :);
+
+ dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+ /* No simple way to determine this */
+ return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+}
+
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ return PTR_ERR_ENOTSUP;
+}
+
+#define set_pkey_bits set_pkey_bits
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+ u64 new_val = POE_RWX;
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+
+ if (flags & PKEY_DISABLE_ACCESS)
+ new_val = POE_X;
+ else if (flags & PKEY_DISABLE_WRITE)
+ new_val = POE_RX;
+
+ /* OR in new bits for pkey */
+ reg |= new_val << shift;
+
+ return reg;
+}
+
+#define get_pkey_bits get_pkey_bits
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /*
+ * shift down the relevant bits to the lowest four, then
+ * mask off all the other higher bits
+ */
+ u32 perm = (reg >> shift) & PKEY_MASK;
+
+ if (perm == POE_X)
+ return PKEY_DISABLE_ACCESS;
+ if (perm == POE_RX)
+ return PKEY_DISABLE_WRITE;
+ return 0;
+}
+
+static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
+{
+ struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
+ struct poe_context *poe_ctx =
+ (struct poe_context *) get_header(ctx, POE_MAGIC,
+ sizeof(uctxt->uc_mcontext), NULL);
+ if (poe_ctx)
+ poe_ctx->por_el0 = pkey;
+}
+
+#endif /* _PKEYS_ARM64_H */
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 622a85848f61..7c29f075e40b 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -13,26 +13,29 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+
+#include "kselftest.h"
+
/* Define some kernel-like types */
-#define u8 __u8
-#define u16 __u16
-#define u32 __u32
-#define u64 __u64
+typedef __u8 u8;
+typedef __u16 u16;
+typedef __u32 u32;
+typedef __u64 u64;
#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
extern int test_nr;
extern int iteration_nr;
#ifdef __GNUC__
-__attribute__((format(printf, 1, 2)))
+__printf(1, 2)
#endif
static inline void sigsafe_printf(const char *format, ...)
{
@@ -77,10 +80,19 @@ extern void abort_hooks(void);
} \
} while (0)
-__attribute__((noinline)) int read_ptr(int *ptr);
-void expected_pkey_fault(int pkey);
+#define barrier() __asm__ __volatile__("": : :"memory")
+#ifndef noinline
+# define noinline __attribute__((noinline))
+#endif
+
int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
int sys_pkey_free(unsigned long pkey);
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey);
+
+/* For functions called from protection_keys.c only */
+noinline int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
unsigned long pkey);
void record_pkey_malloc(void *ptr, long size, int prot);
@@ -89,12 +101,24 @@ void record_pkey_malloc(void *ptr, long size, int prot);
#include "pkey-x86.h"
#elif defined(__powerpc64__) /* arch */
#include "pkey-powerpc.h"
+#elif defined(__aarch64__) /* arch */
+#include "pkey-arm64.h"
#else /* arch */
#error Architecture not supported
#endif /* arch */
+#ifndef PKEY_MASK
#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+/*
+ * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged.
+ */
+#ifndef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED 0x0
+#endif
+
+#ifndef set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
u32 shift = pkey_bit_position(pkey);
@@ -104,7 +128,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
reg |= (flags & PKEY_MASK) << shift;
return reg;
}
+#endif
+#ifndef get_pkey_bits
static inline u64 get_pkey_bits(u64 reg, int pkey)
{
u32 shift = pkey_bit_position(pkey);
@@ -114,6 +140,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
*/
return ((reg >> shift) & PKEY_MASK);
}
+#endif
extern u64 shadow_pkey_reg;
@@ -143,39 +170,6 @@ static inline void write_pkey_reg(u64 pkey_reg)
pkey_reg, __read_pkey_reg());
}
-/*
- * These are technically racy. since something could
- * change PKEY register between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- u64 pkey_reg = read_pkey_reg();
- int bit = pkey * 2;
-
- if (do_allow)
- pkey_reg &= (1<<bit);
- else
- pkey_reg |= (1<<bit);
-
- dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
- write_pkey_reg(pkey_reg);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- u64 pkey_reg = read_pkey_reg();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkey_reg &= (1<<bit);
- else
- pkey_reg |= (1<<bit);
-
- write_pkey_reg(pkey_reg);
- dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
-}
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) \
@@ -197,7 +191,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
static inline int kernel_has_pkeys(void)
{
/* try allocating a key and see if it succeeds */
- int ret = sys_pkey_alloc(0, 0);
+ int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
if (ret <= 0) {
return 0;
}
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 1ebb586b2fbc..17bf2d1b0192 100644
--- a/tools/testing/selftests/vm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -3,15 +3,17 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 386
-#endif
+#include <sys/stat.h>
+
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
#endif
#define REG_IP_IDX PT_NIP
+#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO]
#define REG_TRAPNO PT_TRAP
+#define MCONTEXT_FPREGS
#define gregs gp_regs
#define fpregs fp_regs
#define si_pkey_offset 0x20
@@ -91,7 +93,7 @@ static inline int get_arch_reserved_keys(void)
return NR_RESERVED_PKEYS_64K_3KEYS;
}
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
/*
* powerpc does not allow userspace to change permissions of exec-only
@@ -102,10 +104,20 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
return;
}
+#define REPEAT_8(s) s s s s s s s s
+#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \
+ REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s)
+#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \
+ REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s)
+#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \
+ REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s)
+#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \
+ REPEAT_4096(s) REPEAT_4096(s)
+
/* 4-byte instructions * 16384 = 64K page */
-#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+#define __page_o_noops() asm(REPEAT_16384("nop\n"))
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
void *ptr;
int ret;
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 3be20f5d5275..f7ecd335df1e 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -5,34 +5,20 @@
#ifdef __i386__
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
#define REG_IP_IDX REG_EIP
#define si_pkey_offset 0x14
#else
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
#define REG_IP_IDX REG_RIP
#define si_pkey_offset 0x20
#endif
+#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO]
+#define MCONTEXT_FPREGS
+
#ifndef PKEY_DISABLE_ACCESS
# define PKEY_DISABLE_ACCESS 0x1
#endif
@@ -48,6 +34,8 @@
#define PAGE_SIZE 4096
#define MB (1<<20)
+#define PKEY_REG_ALLOW_NONE 0x55555555
+
static inline void __page_o_noops(void)
{
/* 8-bytes of instruction * 512 bytes = 1 page */
@@ -80,19 +68,6 @@ static inline void __write_pkey_reg(u64 pkey_reg)
assert(pkey_reg == __read_pkey_reg());
}
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
@@ -104,9 +79,7 @@ static inline int cpu_has_pkeys(void)
unsigned int ecx;
unsigned int edx;
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx);
if (!(ecx & X86_FEATURE_PKU)) {
dprintf2("cpu does not have PKU\n");
@@ -119,6 +92,18 @@ static inline int cpu_has_pkeys(void)
return 1;
}
+static inline int cpu_max_xsave_size(void)
+{
+ unsigned long XSTATE_CPUID = 0xd;
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
+ return ecx;
+}
+
static inline u32 pkey_bit_position(int pkey)
{
return pkey * PKEY_BITS_PER_PKEY;
@@ -126,24 +111,23 @@ static inline u32 pkey_bit_position(int pkey)
#define XSTATE_PKEY_BIT (9)
#define XSTATE_PKEY 0x200
+#define XSTATE_BV_OFFSET 512
-int pkey_reg_xstate_offset(void)
+static inline int pkey_reg_xstate_offset(void)
{
unsigned int eax;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
int xstate_offset;
- int xstate_size;
+ int xstate_size = 0;
unsigned long XSTATE_CPUID = 0xd;
int leaf;
/* assume that XSTATE_PKEY is set in XCR0 */
leaf = XSTATE_PKEY_BIT;
{
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx);
if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
@@ -164,7 +148,7 @@ static inline int get_arch_reserved_keys(void)
return NR_RESERVED_PKEYS;
}
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
{
int ptr_contents;
@@ -173,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
expected_pkey_fault(pkey);
}
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
return PTR_ERR_ENOTSUP;
}
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
new file mode 100644
index 000000000000..302fef54049c
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ *
+ * The testcases in this file exercise various flows related to signal handling,
+ * using an alternate signal stack, with the default pkey (pkey 0) disabled.
+ *
+ * Compile with:
+ * gcc -mxsave -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <linux/mman.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <limits.h>
+
+#include "pkey-helpers.h"
+
+#define STACK_SIZE PTHREAD_STACK_MIN
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static siginfo_t siginfo = {0};
+
+/*
+ * We need to use inline assembly instead of glibc's syscall because glibc's
+ * syscall will attempt to access the PLT in order to call a library function
+ * which is protected by MPK 0 which we don't have access to.
+ */
+static __always_inline
+long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6)
+{
+ unsigned long ret;
+#ifdef __x86_64__
+ register long r10 asm("r10") = a4;
+ register long r8 asm("r8") = a5;
+ register long r9 asm("r9") = a6;
+ asm volatile ("syscall"
+ : "=a"(ret)
+ : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9)
+ : "rcx", "r11", "memory");
+#elif defined __i386__
+ asm volatile ("int $0x80"
+ : "=a"(ret)
+ : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5)
+ : "memory");
+#elif defined __aarch64__
+ register long x0 asm("x0") = a1;
+ register long x1 asm("x1") = a2;
+ register long x2 asm("x2") = a3;
+ register long x3 asm("x3") = a4;
+ register long x4 asm("x4") = a5;
+ register long x5 asm("x5") = a6;
+ register long x8 asm("x8") = n;
+ asm volatile ("svc #0"
+ : "=r"(x0)
+ : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8)
+ : "memory");
+ ret = x0;
+#else
+# error syscall_raw() not implemented
+#endif
+ return ret;
+}
+
+static inline long clone_raw(unsigned long flags, void *stack,
+ int *parent_tid, int *child_tid)
+{
+ long a1 = flags;
+ long a2 = (long)stack;
+ long a3 = (long)parent_tid;
+#if defined(__x86_64__) || defined(__i386)
+ long a4 = (long)child_tid;
+ long a5 = 0;
+#elif defined(__aarch64__)
+ long a4 = 0;
+ long a5 = (long)child_tid;
+#else
+# error clone_raw() not implemented
+#endif
+
+ return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0);
+}
+
+/*
+ * Returns the most restrictive pkey register value that can be used by the
+ * tests.
+ */
+static inline u64 pkey_reg_restrictive_default(void)
+{
+ /*
+ * Disallow everything except execution on pkey 0, so that each caller
+ * doesn't need to enable it explicitly (the selftest code runs with
+ * its code mapped with pkey 0).
+ */
+ return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS);
+}
+
+static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ pthread_mutex_lock(&mutex);
+
+ memcpy(&siginfo, info, sizeof(siginfo_t));
+
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+}
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ pthread_mutex_lock(&mutex);
+
+ memcpy(&siginfo, info, sizeof(siginfo_t));
+
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+}
+
+static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext)
+{
+ /*
+ * pkru should be the init_pkru value which enabled MPK 0 so
+ * we can use library functions.
+ */
+ printf("%s invoked.\n", __func__);
+}
+
+static void raise_sigusr2(void)
+{
+ pid_t tid = 0;
+
+ tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0);
+
+ syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0);
+
+ /*
+ * We should return from the signal handler here and be able to
+ * return to the interrupted thread.
+ */
+}
+
+static void *thread_segv_with_pkey0_disabled(void *ptr)
+{
+ /* Disable MPK 0 (and all others too) */
+ __write_pkey_reg(pkey_reg_restrictive_default());
+
+ /* Segfault (with SEGV_MAPERR) */
+ *(volatile int *)NULL = 1;
+ return NULL;
+}
+
+static void *thread_segv_pkuerr_stack(void *ptr)
+{
+ /* Disable MPK 0 (and all others too) */
+ __write_pkey_reg(pkey_reg_restrictive_default());
+
+ /* After we disable MPK 0, we can't access the stack to return */
+ return NULL;
+}
+
+static void *thread_segv_maperr_ptr(void *ptr)
+{
+ stack_t *stack = ptr;
+ u64 pkey_reg;
+
+ /*
+ * Setup alternate signal stack, which should be pkey_mprotect()ed by
+ * MPK 0. The thread's stack cannot be used for signals because it is
+ * not accessible by the default init_pkru value of 0x55555554.
+ */
+ syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+ /* Disable MPK 0. Only MPK 1 is enabled. */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
+
+ /* Segfault */
+ *(volatile int *)NULL = 1;
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ return NULL;
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0.
+ */
+static void test_sigsegv_handler_with_pkey0_disabled(void)
+{
+ struct sigaction sa;
+ pthread_attr_t attr;
+ pthread_t thr;
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigsegv_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_MAPERR &&
+ siginfo.si_addr == NULL,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler is invoked when pkey 0 is disabled.
+ * Note that the new thread stack and the alternate signal stack is
+ * protected by MPK 0, which renders them inaccessible when MPK 0
+ * is disabled. So just the return from the thread should cause a
+ * segfault with SEGV_PKUERR.
+ */
+static void test_sigsegv_handler_cannot_access_stack(void)
+{
+ struct sigaction sa;
+ pthread_attr_t attr;
+ pthread_t thr;
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigsegv_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_PKUERR,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the sigsegv handler that uses an alternate signal stack
+ * is correctly invoked for a thread which uses a non-zero MPK to protect
+ * its own stack, and disables all other MPKs (including 0).
+ */
+static void test_sigsegv_handler_with_different_pkey_for_stack(void)
+{
+ struct sigaction sa;
+ static stack_t sigstack;
+ void *stack;
+ int pkey;
+ int parent_pid = 0;
+ int child_pid = 0;
+ u64 pkey_reg;
+
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+
+ sa.sa_sigaction = sigsegv_handler;
+
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ assert(stack != MAP_FAILED);
+
+ /* Allow access to MPK 0 and MPK 1 */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
+
+ /* Protect the new stack with MPK 1 */
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+ sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+ /* Set up alternate signal stack that will use the default MPK */
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ sigstack.ss_flags = 0;
+ sigstack.ss_size = STACK_SIZE;
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ /* Use clone to avoid newer glibcs using rseq on new threads */
+ long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ stack + STACK_SIZE,
+ &parent_pid,
+ &child_pid);
+
+ if (ret < 0) {
+ errno = -ret;
+ perror("clone");
+ } else if (ret == 0) {
+ thread_segv_maperr_ptr(&sigstack);
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ }
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ ksft_test_result(siginfo.si_signo == SIGSEGV &&
+ siginfo.si_code == SEGV_MAPERR &&
+ siginfo.si_addr == NULL,
+ "%s\n", __func__);
+}
+
+/*
+ * Verify that the PKRU value set by the application is correctly
+ * restored upon return from signal handling.
+ */
+static void test_pkru_preserved_after_sigusr1(void)
+{
+ struct sigaction sa;
+ u64 pkey_reg;
+
+ /* Allow access to MPK 0 and an arbitrary set of keys */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED);
+
+ sa.sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = sigusr1_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&siginfo, 0, sizeof(siginfo));
+
+ __write_pkey_reg(pkey_reg);
+
+ raise(SIGUSR1);
+
+ pthread_mutex_lock(&mutex);
+ while (siginfo.si_signo == 0)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ /* Ensure the pkru value is the same after returning from signal. */
+ ksft_test_result(pkey_reg == __read_pkey_reg() &&
+ siginfo.si_signo == SIGUSR1,
+ "%s\n", __func__);
+}
+
+static noinline void *thread_sigusr2_self(void *ptr)
+{
+ /*
+ * A const char array like "Resuming after SIGUSR2" won't be stored on
+ * the stack and the code could access it via an offset from the program
+ * counter. This makes sure it's on the function's stack frame.
+ */
+ char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ',
+ 'a', 'f', 't', 'e', 'r', ' ',
+ 'S', 'I', 'G', 'U', 'S', 'R', '2',
+ '.', '.', '.', '\n', '\0'};
+ stack_t *stack = ptr;
+ u64 pkey_reg;
+
+ /*
+ * Setup alternate signal stack, which should be pkey_mprotect()ed by
+ * MPK 0. The thread's stack cannot be used for signals because it is
+ * not accessible by the default init_pkru value of 0x55555554.
+ */
+ syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
+
+ /* Disable MPK 0. Only MPK 2 is enabled. */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
+
+ raise_sigusr2();
+
+ /* Do something, to show the thread resumed execution after the signal */
+ syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0);
+
+ /*
+ * We can't return to test_pkru_sigreturn because it
+ * will attempt to use a %rbp value which is on the stack
+ * of the main thread.
+ */
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ return NULL;
+}
+
+/*
+ * Verify that sigreturn is able to restore altstack even if the thread had
+ * disabled pkey 0.
+ */
+static void test_pkru_sigreturn(void)
+{
+ struct sigaction sa = {0};
+ static stack_t sigstack;
+ void *stack;
+ int pkey;
+ int parent_pid = 0;
+ int child_pid = 0;
+ u64 pkey_reg;
+
+ sa.sa_handler = SIG_DFL;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+
+ /*
+ * For this testcase, we do not want to handle SIGSEGV. Reset handler
+ * to default so that the application can crash if it receives SIGSEGV.
+ */
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ sa.sa_sigaction = sigusr2_handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGUSR2, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ assert(stack != MAP_FAILED);
+
+ /*
+ * Allow access to MPK 0 and MPK 2. The child thread (to be created
+ * later in this flow) will have its stack protected by MPK 2, whereas
+ * the current thread's stack is protected by the default MPK 0. Hence
+ * both need to be enabled.
+ */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
+
+ /* Protect the stack with MPK 2 */
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+ sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+
+ /* Set up alternate signal stack that will use the default MPK */
+ sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ sigstack.ss_flags = 0;
+ sigstack.ss_size = STACK_SIZE;
+
+ /* Use clone to avoid newer glibcs using rseq on new threads */
+ long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ stack + STACK_SIZE,
+ &parent_pid,
+ &child_pid);
+
+ if (ret < 0) {
+ errno = -ret;
+ perror("clone");
+ } else if (ret == 0) {
+ thread_sigusr2_self(&sigstack);
+ syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
+ }
+
+ child_pid = ret;
+ /* Check that thread exited */
+ do {
+ sched_yield();
+ ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0);
+ } while (ret != -ESRCH && ret != -EINVAL);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void (*pkey_tests[])(void) = {
+ test_sigsegv_handler_with_pkey0_disabled,
+ test_sigsegv_handler_cannot_access_stack,
+ test_sigsegv_handler_with_different_pkey_for_stack,
+ test_pkru_preserved_after_sigusr1,
+ test_pkru_sigreturn
+};
+
+int main(int argc, char *argv[])
+{
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(pkey_tests));
+
+ if (!is_pkeys_supported())
+ ksft_exit_skip("pkeys not supported\n");
+
+ for (i = 0; i < ARRAY_SIZE(pkey_tests); i++)
+ (*pkey_tests[i])();
+
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c
new file mode 100644
index 000000000000..255b332f7a08
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_util.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define __SANE_USERSPACE_TYPES__
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "pkey-helpers.h"
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c
new file mode 100644
index 000000000000..ca27200596a4
--- /dev/null
+++ b/tools/testing/selftests/mm/prctl_thp_disable.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic tests for PR_GET/SET_THP_DISABLE prctl calls
+ *
+ * Author(s): Usama Arif <usamaarif642@gmail.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "kselftest_harness.h"
+#include "thp_settings.h"
+#include "vm_util.h"
+
+#ifndef PR_THP_DISABLE_EXCEPT_ADVISED
+#define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
+#endif
+
+enum thp_collapse_type {
+ THP_COLLAPSE_NONE,
+ THP_COLLAPSE_MADV_NOHUGEPAGE,
+ THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */
+ THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */
+};
+
+/*
+ * Function to mmap a buffer, fault it in, madvise it appropriately (before
+ * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the
+ * mmap region is huge.
+ * Returns:
+ * 0 if test doesn't give hugepage
+ * 1 if test gives a hugepage
+ * -errno if mmap fails
+ */
+static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+ int ret;
+
+ /* For alignment purposes, we need twice the THP size. */
+ mmap_size = 2 * pmdsize;
+ mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED)
+ return -errno;
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
+
+ if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE)
+ madvise(mem, pmdsize, MADV_HUGEPAGE);
+ else if (madvise_buf == THP_COLLAPSE_MADV_NOHUGEPAGE)
+ madvise(mem, pmdsize, MADV_NOHUGEPAGE);
+
+ /* Ensure memory is allocated */
+ memset(mem, 1, pmdsize);
+
+ if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE)
+ madvise(mem, pmdsize, MADV_COLLAPSE);
+
+ /* HACK: make sure we have a separate VMA that we can check reliably. */
+ mprotect(mem, pmdsize, PROT_READ);
+
+ ret = check_huge_anon(mem, 1, pmdsize);
+ munmap(mmap_mem, mmap_size);
+ return ret;
+}
+
+static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata,
+ size_t pmdsize,
+ enum thp_enabled thp_policy)
+{
+ ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1);
+
+ /* tests after prctl overrides global policy */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0);
+
+ /* Reset to global policy */
+ ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0);
+
+ /* tests after prctl is cleared, and only global policy is effective */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize),
+ thp_policy == THP_ALWAYS ? 1 : 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+}
+
+FIXTURE(prctl_thp_disable_completely)
+{
+ struct thp_settings settings;
+ size_t pmdsize;
+};
+
+FIXTURE_VARIANT(prctl_thp_disable_completely)
+{
+ enum thp_enabled thp_policy;
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never)
+{
+ .thp_policy = THP_NEVER,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise)
+{
+ .thp_policy = THP_MADVISE,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always)
+{
+ .thp_policy = THP_ALWAYS,
+};
+
+FIXTURE_SETUP(prctl_thp_disable_completely)
+{
+ if (!thp_available())
+ SKIP(return, "Transparent Hugepages not available\n");
+
+ self->pmdsize = read_pmd_pagesize();
+ if (!self->pmdsize)
+ SKIP(return, "Unable to read PMD size\n");
+
+ if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL))
+ SKIP(return, "Unable to disable THPs completely for the process\n");
+
+ thp_save_settings();
+ thp_read_settings(&self->settings);
+ self->settings.thp_enabled = variant->thp_policy;
+ self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT;
+ thp_write_settings(&self->settings);
+}
+
+FIXTURE_TEARDOWN(prctl_thp_disable_completely)
+{
+ thp_restore_settings();
+}
+
+TEST_F(prctl_thp_disable_completely, nofork)
+{
+ prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy);
+}
+
+TEST_F(prctl_thp_disable_completely, fork)
+{
+ int ret = 0;
+ pid_t pid;
+
+ /* Make sure prctl changes are carried across fork */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy);
+ return;
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+static void prctl_thp_disable_except_madvise_test(struct __test_metadata *const _metadata,
+ size_t pmdsize,
+ enum thp_enabled thp_policy)
+{
+ ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 3);
+
+ /* tests after prctl overrides global policy */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+
+ /* Reset to global policy */
+ ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0);
+
+ /* tests after prctl is cleared, and only global policy is effective */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize),
+ thp_policy == THP_ALWAYS ? 1 : 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+}
+
+FIXTURE(prctl_thp_disable_except_madvise)
+{
+ struct thp_settings settings;
+ size_t pmdsize;
+};
+
+FIXTURE_VARIANT(prctl_thp_disable_except_madvise)
+{
+ enum thp_enabled thp_policy;
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, never)
+{
+ .thp_policy = THP_NEVER,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, madvise)
+{
+ .thp_policy = THP_MADVISE,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, always)
+{
+ .thp_policy = THP_ALWAYS,
+};
+
+FIXTURE_SETUP(prctl_thp_disable_except_madvise)
+{
+ if (!thp_available())
+ SKIP(return, "Transparent Hugepages not available\n");
+
+ self->pmdsize = read_pmd_pagesize();
+ if (!self->pmdsize)
+ SKIP(return, "Unable to read PMD size\n");
+
+ if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, NULL, NULL))
+ SKIP(return, "Unable to set PR_THP_DISABLE_EXCEPT_ADVISED\n");
+
+ thp_save_settings();
+ thp_read_settings(&self->settings);
+ self->settings.thp_enabled = variant->thp_policy;
+ self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT;
+ thp_write_settings(&self->settings);
+}
+
+FIXTURE_TEARDOWN(prctl_thp_disable_except_madvise)
+{
+ thp_restore_settings();
+}
+
+TEST_F(prctl_thp_disable_except_madvise, nofork)
+{
+ prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, variant->thp_policy);
+}
+
+TEST_F(prctl_thp_disable_except_madvise, fork)
+{
+ int ret = 0;
+ pid_t pid;
+
+ /* Make sure prctl changes are carried across fork */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize,
+ variant->thp_policy);
+ return;
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/process_madv.c b/tools/testing/selftests/mm/process_madv.c
new file mode 100644
index 000000000000..cd4610baf5d7
--- /dev/null
+++ b/tools/testing/selftests/mm/process_madv.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "kselftest_harness.h"
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sched.h>
+#include "vm_util.h"
+
+#include "../pidfd/pidfd.h"
+
+FIXTURE(process_madvise)
+{
+ unsigned long page_size;
+ pid_t child_pid;
+ int remote_pidfd;
+ int pidfd;
+};
+
+FIXTURE_SETUP(process_madvise)
+{
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+ self->pidfd = PIDFD_SELF;
+ self->remote_pidfd = -1;
+ self->child_pid = -1;
+};
+
+FIXTURE_TEARDOWN_PARENT(process_madvise)
+{
+ /* This teardown is guaranteed to run, even if tests SKIP or ASSERT */
+ if (self->child_pid > 0) {
+ kill(self->child_pid, SIGKILL);
+ waitpid(self->child_pid, NULL, 0);
+ }
+
+ if (self->remote_pidfd >= 0)
+ close(self->remote_pidfd);
+}
+
+static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
+ size_t vlen, int advice, unsigned int flags)
+{
+ return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags);
+}
+
+/*
+ * This test uses PIDFD_SELF to target the current process. The main
+ * goal is to verify the basic behavior of process_madvise() with
+ * a vector of non-contiguous memory ranges, not its cross-process
+ * capabilities.
+ */
+TEST_F(process_madvise, basic)
+{
+ const unsigned long pagesize = self->page_size;
+ const int madvise_pages = 4;
+ struct iovec vec[madvise_pages];
+ int pidfd = self->pidfd;
+ ssize_t ret;
+ char *map;
+
+ /*
+ * Create a single large mapping. We will pick pages from this
+ * mapping to advise on. This ensures we test non-contiguous iovecs.
+ */
+ map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (map == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+
+ /* Fill the entire region with a known pattern. */
+ memset(map, 'A', pagesize * 10);
+
+ /*
+ * Setup the iovec to point to 4 non-contiguous pages
+ * within the mapping.
+ */
+ vec[0].iov_base = &map[0 * pagesize];
+ vec[0].iov_len = pagesize;
+ vec[1].iov_base = &map[3 * pagesize];
+ vec[1].iov_len = pagesize;
+ vec[2].iov_base = &map[5 * pagesize];
+ vec[2].iov_len = pagesize;
+ vec[3].iov_base = &map[8 * pagesize];
+ vec[3].iov_len = pagesize;
+
+ ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0);
+ if (ret == -1 && errno == EPERM)
+ SKIP(return,
+ "process_madvise() unsupported or permission denied, try running as root.\n");
+ else if (errno == EINVAL)
+ SKIP(return,
+ "process_madvise() unsupported or parameter invalid, please check arguments.\n");
+
+ /* The call should succeed and report the total bytes processed. */
+ ASSERT_EQ(ret, madvise_pages * pagesize);
+
+ /* Check that advised pages are now zero. */
+ for (int i = 0; i < madvise_pages; i++) {
+ char *advised_page = (char *)vec[i].iov_base;
+
+ /* Content must be 0, not 'A'. */
+ ASSERT_EQ(*advised_page, '\0');
+ }
+
+ /* Check that an un-advised page in between is still 'A'. */
+ char *unadvised_page = &map[1 * pagesize];
+
+ for (int i = 0; i < pagesize; i++)
+ ASSERT_EQ(unadvised_page[i], 'A');
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(map, pagesize * 10), 0);
+}
+
+/*
+ * This test deterministically validates process_madvise() with MADV_COLLAPSE
+ * on a remote process, other advices are difficult to verify reliably.
+ *
+ * The test verifies that a memory region in a child process,
+ * focus on process_madv remote result, only check addresses and lengths.
+ * The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged.
+ */
+TEST_F(process_madvise, remote_collapse)
+{
+ const unsigned long pagesize = self->page_size;
+ long huge_page_size;
+ int pipe_info[2];
+ ssize_t ret;
+ struct iovec vec;
+
+ struct child_info {
+ pid_t pid;
+ void *map_addr;
+ } info;
+
+ huge_page_size = read_pmd_pagesize();
+ if (huge_page_size <= 0)
+ SKIP(return, "Could not determine a valid huge page size.\n");
+
+ ASSERT_EQ(pipe(pipe_info), 0);
+
+ self->child_pid = fork();
+ ASSERT_NE(self->child_pid, -1);
+
+ if (self->child_pid == 0) {
+ char *map;
+ size_t map_size = 2 * huge_page_size;
+
+ close(pipe_info[0]);
+
+ map = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ /* Fault in as small pages */
+ for (size_t i = 0; i < map_size; i += pagesize)
+ map[i] = 'A';
+
+ /* Send info and pause */
+ info.pid = getpid();
+ info.map_addr = map;
+ ret = write(pipe_info[1], &info, sizeof(info));
+ ASSERT_EQ(ret, sizeof(info));
+ close(pipe_info[1]);
+
+ pause();
+ exit(0);
+ }
+
+ close(pipe_info[1]);
+
+ /* Receive child info */
+ ret = read(pipe_info[0], &info, sizeof(info));
+ if (ret <= 0) {
+ waitpid(self->child_pid, NULL, 0);
+ SKIP(return, "Failed to read child info from pipe.\n");
+ }
+ ASSERT_EQ(ret, sizeof(info));
+ close(pipe_info[0]);
+ self->child_pid = info.pid;
+
+ self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
+ ASSERT_GE(self->remote_pidfd, 0);
+
+ vec.iov_base = info.map_addr;
+ vec.iov_len = huge_page_size;
+
+ ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE,
+ 0);
+ if (ret == -1) {
+ if (errno == EINVAL)
+ SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n");
+ else if (errno == EPERM)
+ SKIP(return,
+ "No process_madvise() permissions, try running as root.\n");
+ return;
+ }
+
+ ASSERT_EQ(ret, huge_page_size);
+}
+
+/*
+ * Test process_madvise() with a pidfd for a process that has already
+ * exited to ensure correct error handling.
+ */
+TEST_F(process_madvise, exited_process_pidfd)
+{
+ const unsigned long pagesize = self->page_size;
+ struct iovec vec;
+ char *map;
+ ssize_t ret;
+
+ map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
+ 0);
+ if (map == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+
+ vec.iov_base = map;
+ vec.iov_len = pagesize;
+
+ /*
+ * Using a pidfd for a process that has already exited should fail
+ * with ESRCH.
+ */
+ self->child_pid = fork();
+ ASSERT_NE(self->child_pid, -1);
+
+ if (self->child_pid == 0)
+ exit(0);
+
+ self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
+ ASSERT_GE(self->remote_pidfd, 0);
+
+ /* Wait for the child to ensure it has terminated. */
+ waitpid(self->child_pid, NULL, 0);
+
+ ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED,
+ 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ESRCH);
+}
+
+/*
+ * Test process_madvise() with bad pidfds to ensure correct error
+ * handling.
+ */
+TEST_F(process_madvise, bad_pidfd)
+{
+ const unsigned long pagesize = self->page_size;
+ struct iovec vec;
+ char *map;
+ ssize_t ret;
+
+ map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
+ 0);
+ if (map == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+
+ vec.iov_base = map;
+ vec.iov_len = pagesize;
+
+ /* Using an invalid fd number (-1) should fail with EBADF. */
+ ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBADF);
+
+ /*
+ * Using a valid fd that is not a pidfd (e.g. stdin) should fail
+ * with EBADF.
+ */
+ ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBADF);
+}
+
+/*
+ * Test that process_madvise() rejects vlen > UIO_MAXIOV.
+ * The kernel should return -EINVAL when the number of iovecs exceeds 1024.
+ */
+TEST_F(process_madvise, invalid_vlen)
+{
+ const unsigned long pagesize = self->page_size;
+ int pidfd = self->pidfd;
+ struct iovec vec;
+ char *map;
+ ssize_t ret;
+
+ map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
+ 0);
+ if (map == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+
+ vec.iov_base = map;
+ vec.iov_len = pagesize;
+
+ ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(map, pagesize), 0);
+}
+
+/*
+ * Test process_madvise() with an invalid flag value. Currently, only a flag
+ * value of 0 is supported. This test is reserved for the future, e.g., if
+ * synchronous flags are added.
+ */
+TEST_F(process_madvise, flag)
+{
+ const unsigned long pagesize = self->page_size;
+ unsigned int invalid_flag;
+ int pidfd = self->pidfd;
+ struct iovec vec;
+ char *map;
+ ssize_t ret;
+
+ map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
+ 0);
+ if (map == MAP_FAILED)
+ SKIP(return, "mmap failed, not enough memory.\n");
+
+ vec.iov_base = map;
+ vec.iov_len = pagesize;
+
+ invalid_flag = 0x80000000;
+
+ ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(map, pagesize), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index fdbb602ecf32..2085982dba69 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -18,12 +18,13 @@
* do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
*
* Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
*/
#define _GNU_SOURCE
#define __SANE_USERSPACE_TYPES__
#include <errno.h>
+#include <linux/elf.h>
#include <linux/futex.h>
#include <time.h>
#include <sys/time.h>
@@ -52,9 +53,15 @@ int test_nr;
u64 shadow_pkey_reg;
int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-void cat_into_file(char *str, char *file)
+noinline int read_ptr(int *ptr)
+{
+ /* Keep GCC from optimizing this away somehow */
+ barrier();
+ return *ptr;
+}
+
+static void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
int ret;
@@ -81,7 +88,7 @@ void cat_into_file(char *str, char *file)
#if CONTROL_TRACING > 0
static int warned_tracing;
-int tracing_root_ok(void)
+static int tracing_root_ok(void)
{
if (geteuid() != 0) {
if (!warned_tracing)
@@ -94,10 +101,10 @@ int tracing_root_ok(void)
}
#endif
-void tracing_on(void)
+static void tracing_on(void)
{
#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
+#define TRACEDIR "/sys/kernel/tracing"
char pidstr[32];
if (!tracing_root_ok())
@@ -118,12 +125,12 @@ void tracing_on(void)
#endif
}
-void tracing_off(void)
+static void tracing_off(void)
{
#if CONTROL_TRACING > 0
if (!tracing_root_ok())
return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+ cat_into_file("0", "/sys/kernel/tracing/tracing_on");
#endif
}
@@ -146,13 +153,13 @@ void abort_hooks(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
-#ifdef __powerpc64__
+#if defined(__powerpc64__) || defined(__aarch64__)
/* This way, both 4K and 64K alignment are maintained */
__attribute__((__aligned__(65536)))
#else
__attribute__((__aligned__(PAGE_SIZE)))
#endif
-void lots_o_noops_around_write(int *write_to_me)
+static void lots_o_noops_around_write(int *write_to_me)
{
dprintf3("running %s()\n", __func__);
__page_o_noops();
@@ -163,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-void dump_mem(void *dumpme, int len_bytes)
+static void dump_mem(void *dumpme, int len_bytes)
{
char *c = (void *)dumpme;
int i;
@@ -206,12 +213,11 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
return 0;
}
-void pkey_disable_set(int pkey, int flags)
+static void pkey_disable_set(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u64 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -241,18 +247,15 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
__func__, pkey, read_pkey_reg());
- if (flags)
- pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
-void pkey_disable_clear(int pkey, int flags)
+static void pkey_disable_clear(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u64 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -272,36 +275,25 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
pkey, read_pkey_reg());
- if (flags)
- assert(read_pkey_reg() <= orig_pkey_reg);
}
-void pkey_write_allow(int pkey)
+__maybe_unused static void pkey_write_allow(int pkey)
{
pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
}
-void pkey_write_deny(int pkey)
+__maybe_unused static void pkey_write_deny(int pkey)
{
pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
}
-void pkey_access_allow(int pkey)
+__maybe_unused static void pkey_access_allow(int pkey)
{
pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
}
-void pkey_access_deny(int pkey)
+__maybe_unused static void pkey_access_deny(int pkey)
{
pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
static char *si_code_str(int si_code)
{
if (si_code == SEGV_MAPERR)
@@ -315,14 +307,16 @@ static char *si_code_str(int si_code)
return "UNKNOWN";
}
-int pkey_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
+static int pkey_faults;
+static int last_si_pkey = -1;
+static void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
+#ifdef MCONTEXT_FPREGS
char *fpregs;
+#endif
#if defined(__i386__) || defined(__x86_64__) /* arch */
u32 *pkey_reg_ptr;
int pkey_reg_offset;
@@ -336,9 +330,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
__func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
+ ip = MCONTEXT_IP(uctxt->uc_mcontext);
+#ifdef MCONTEXT_FPREGS
fpregs = (char *) uctxt->uc_mcontext.fpregs;
+#endif
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
__func__, trapno, ip, si_code_str(si->si_code),
@@ -367,7 +363,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#endif /* arch */
dprintf1("siginfo: %p\n", si);
+#ifdef MCONTEXT_FPREGS
dprintf1(" fpregs: %p\n", fpregs);
+#endif
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -397,26 +395,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#elif defined(__powerpc64__) /* arch */
/* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey);
+#elif defined(__aarch64__)
+ aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL);
#endif /* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
+static void sig_chld(int x)
{
dprint_in_signal = 1;
dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
dprint_in_signal = 0;
}
-void setup_sigsegv_handler(void)
+static void setup_sigsegv_handler(void)
{
int r, rs;
struct sigaction newact;
@@ -442,13 +436,13 @@ void setup_sigsegv_handler(void)
pkey_assert(r == 0);
}
-void setup_handlers(void)
+static void setup_handlers(void)
{
signal(SIGCHLD, &sig_chld);
setup_sigsegv_handler();
}
-pid_t fork_lazy_child(void)
+static pid_t fork_lazy_child(void)
{
pid_t forkret;
@@ -466,38 +460,10 @@ pid_t fork_lazy_child(void)
return forkret;
}
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
+static int alloc_pkey(void)
{
int ret;
- unsigned long init_val = 0x0;
+ unsigned long init_val = PKEY_UNRESTRICTED;
dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
@@ -510,7 +476,7 @@ int alloc_pkey(void)
" shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
- if (ret) {
+ if (ret > 0) {
/* clear both the bits: */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
~PKEY_MASK);
@@ -540,19 +506,12 @@ int alloc_pkey(void)
return ret;
}
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
/*
* I had a bug where pkey bits could be set by mprotect() but
* not cleared. This ensures we get lots of random bit sets
* and clears on the vma and pte pkey bits.
*/
-int alloc_random_pkey(void)
+static int alloc_random_pkey(void)
{
int max_nr_pkey_allocs;
int ret;
@@ -561,7 +520,6 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
- srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
@@ -599,13 +557,11 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
int nr_iterations = random() % 100;
int ret;
- while (0) {
+ while (nr_iterations-- >= 0) {
int rpkey = alloc_random_pkey();
ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
- if (nr_iterations-- < 0)
- break;
dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
" shadow: 0x%016llx\n",
@@ -636,7 +592,7 @@ struct pkey_malloc_record {
};
struct pkey_malloc_record *pkey_malloc_records;
struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
+static long nr_pkey_malloc_records;
void record_pkey_malloc(void *ptr, long size, int prot)
{
long i;
@@ -674,7 +630,7 @@ void record_pkey_malloc(void *ptr, long size, int prot)
nr_pkey_malloc_records++;
}
-void free_pkey_malloc(void *ptr)
+static void free_pkey_malloc(void *ptr)
{
long i;
int ret;
@@ -701,8 +657,7 @@ void free_pkey_malloc(void *ptr)
pkey_assert(false);
}
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
{
void *ptr;
int ret;
@@ -722,7 +677,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
return ptr;
}
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
{
int ret;
void *ptr;
@@ -752,10 +707,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
return ptr;
}
-int hugetlb_setup_ok;
+static int hugetlb_setup_ok;
#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
+static void setup_hugetlbfs(void)
{
int err;
int fd;
@@ -803,7 +758,7 @@ void setup_hugetlbfs(void)
hugetlb_setup_ok = 1;
}
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
{
void *ptr;
int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
@@ -824,42 +779,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
return ptr;
}
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect,
malloc_pkey_with_mprotect_subpage,
malloc_pkey_anon_huge,
malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
};
-void *malloc_pkey(long size, int prot, u16 pkey)
+static void *malloc_pkey(long size, int prot, u16 pkey)
{
void *ret;
static int malloc_type;
@@ -889,7 +817,7 @@ void *malloc_pkey(long size, int prot, u16 pkey)
return ret;
}
-int last_pkey_faults;
+static int last_pkey_faults;
#define UNKNOWN_PKEY -2
void expected_pkey_fault(int pkey)
{
@@ -911,7 +839,9 @@ void expected_pkey_fault(int pkey)
* test program continue. We now have to restore it.
*/
if (__read_pkey_reg() != 0)
-#else /* arch */
+#elif defined(__aarch64__)
+ if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL)
+#else
if (__read_pkey_reg() != shadow_pkey_reg)
#endif /* arch */
pkey_assert(0);
@@ -929,9 +859,9 @@ void expected_pkey_fault(int pkey)
pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
+static int test_fds[10] = { -1 };
+static int nr_test_fds;
+static void __save_test_fd(int fd)
{
pkey_assert(fd >= 0);
pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
@@ -939,14 +869,14 @@ void __save_test_fd(int fd)
nr_test_fds++;
}
-int get_test_read_fd(void)
+static int get_test_read_fd(void)
{
int test_fd = open("/etc/passwd", O_RDONLY);
__save_test_fd(test_fd);
return test_fd;
}
-void close_test_fds(void)
+static void close_test_fds(void)
{
int i;
@@ -959,17 +889,7 @@ void close_test_fds(void)
nr_test_fds = 0;
}
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
-void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
{
int i, err;
int max_nr_pkey_allocs;
@@ -1021,7 +941,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
pkey_assert(!err);
}
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_write_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1031,7 +951,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey)
dprintf1("*ptr: %d\n", ptr_contents);
dprintf1("\n");
}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_access_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1043,7 +963,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
expected_pkey_fault(pkey);
}
-void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
int ptr_contents;
@@ -1060,7 +980,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
*ptr = __LINE__;
@@ -1071,14 +991,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
expected_pkey_fault(pkey);
}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
@@ -1086,7 +1006,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey)
expected_pkey_fault(pkey);
}
-void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
*ptr = __LINE__;
@@ -1097,7 +1017,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
expected_pkey_fault(pkey);
}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
int ret;
int test_fd = get_test_read_fd();
@@ -1109,7 +1029,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
dprintf1("read ret: %d\n", ret);
pkey_assert(ret);
}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+
+static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
{
int ret;
int test_fd = get_test_read_fd();
@@ -1122,7 +1043,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
pkey_assert(ret);
}
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
{
int pipe_ret, vmsplice_ret;
struct iovec iov;
@@ -1144,7 +1065,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
close(pipe_fds[1]);
}
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
{
int ignored = 0xdada;
int futex_ret;
@@ -1162,7 +1083,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
{
int err;
int i;
@@ -1185,7 +1106,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
{
int err;
int bad_pkey = NR_PKEYS+99;
@@ -1195,7 +1116,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
pkey_assert(err);
}
-void become_child(void)
+static void become_child(void)
{
pid_t forkret;
@@ -1211,7 +1132,7 @@ void become_child(void)
}
/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+static void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
{
int err;
int allocated_pkeys[NR_PKEYS] = {0};
@@ -1278,12 +1199,84 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+static void arch_force_pkey_reg_init(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u64 *buf;
+
+ /*
+ * All keys should be allocated and set to allow reads and
+ * writes, so the register should be all 0. If not, just
+ * skip the test.
+ */
+ if (read_pkey_reg())
+ return;
+
+ /*
+ * Just allocate an absurd about of memory rather than
+ * doing the XSAVE size enumeration dance.
+ */
+ buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+
+ /* These __builtins require compiling with -mxsave */
+
+ /* XSAVE to build a valid buffer: */
+ __builtin_ia32_xsave(buf, XSTATE_PKEY);
+ /* Clear XSTATE_BV[PKRU]: */
+ buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
+ /* XRSTOR will likely get PKRU back to the init state: */
+ __builtin_ia32_xrstor(buf, XSTATE_PKEY);
+
+ munmap(buf, 1*MB);
+#endif
+}
+
+
+/*
+ * This is mostly useless on ppc for now. But it will not
+ * hurt anything and should give some better coverage as
+ * a long-running test that continually checks the pkey
+ * register.
+ */
+static void test_pkey_init_state(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS; i++) {
+ int new_pkey = alloc_pkey();
+
+ if (new_pkey < 0)
+ continue;
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ arch_force_pkey_reg_init();
+
+ /*
+ * Loop for a bit, hoping to get exercise the kernel
+ * context switch code.
+ */
+ for (i = 0; i < 1000000; i++)
+ read_pkey_reg();
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ read_pkey_reg(); /* for shadow checking */
+ }
+}
+
/*
* pkey 0 is special. It is allocated by default, so you do not
* have to call pkey_alloc() to use it first. Make sure that it
* is usable.
*/
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+static void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
{
long size;
int prot;
@@ -1307,9 +1300,9 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
mprotect_pkey(ptr, size, prot, pkey);
}
-void test_ptrace_of_child(int *ptr, u16 pkey)
+static void test_ptrace_of_child(int *ptr, u16 pkey)
{
- __attribute__((__unused__)) int peek_result;
+ __always_unused int peek_result;
pid_t child_pid;
void *ignored = 0;
long ret;
@@ -1383,7 +1376,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
free(plain_ptr_unaligned);
}
-void *get_pointer_to_instructions(void)
+static void *get_pointer_to_instructions(void)
{
void *p1;
@@ -1404,7 +1397,7 @@ void *get_pointer_to_instructions(void)
return p1;
}
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+static void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
{
void *p1;
int scratch;
@@ -1429,9 +1422,14 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, pkey);
+
+ // Reset back to PROT_EXEC | PROT_READ for architectures that support
+ // non-PKEY execute-only permissions.
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
+ pkey_assert(!ret);
}
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
{
void *p1;
int scratch;
@@ -1449,6 +1447,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
+ /*
+ * Reset the shadow, assuming that the above mprotect()
+ * correctly changed PKRU, but to an unknown value since
+ * the actual allocated pkey is unknown.
+ */
+ shadow_pkey_reg = __read_pkey_reg();
+
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
@@ -1472,7 +1477,208 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+#if defined(__i386__) || defined(__x86_64__)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+ u32 new_pkru;
+ pid_t child;
+ int status, ret;
+ int pkey_offset = pkey_reg_xstate_offset();
+ size_t xsave_size = cpu_max_xsave_size();
+ void *xsave;
+ u32 *pkey_register;
+ u64 *xstate_bv;
+ struct iovec iov;
+
+ new_pkru = ~read_pkey_reg();
+ /* Don't make PROT_EXEC mappings inaccessible */
+ new_pkru &= ~3;
+
+ child = fork();
+ pkey_assert(child >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+ if (!child) {
+ ptrace(PTRACE_TRACEME, 0, 0, 0);
+ /* Stop and allow the tracer to modify PKRU directly */
+ raise(SIGSTOP);
+
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ if (__read_pkey_reg() != new_pkru)
+ exit(1);
+
+ /* Stop and allow the tracer to clear XSTATE_BV for PKRU */
+ raise(SIGSTOP);
+
+ if (__read_pkey_reg() != 0)
+ exit(1);
+
+ /* Stop and allow the tracer to examine PKRU */
+ raise(SIGSTOP);
+
+ exit(0);
+ }
+
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ xsave = (void *)malloc(xsave_size);
+ pkey_assert(xsave > 0);
+
+ /* Modify the PKRU register directly */
+ iov.iov_base = xsave;
+ iov.iov_len = xsave_size;
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ pkey_register = (u32 *)(xsave + pkey_offset);
+ pkey_assert(*pkey_register == read_pkey_reg());
+
+ *pkey_register = new_pkru;
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == new_pkru);
+
+ /* Execute the tracee */
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value change */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == new_pkru);
+
+ /* Clear the PKRU bit from XSTATE_BV */
+ xstate_bv = (u64 *)(xsave + 512);
+ *xstate_bv &= ~(1 << 9);
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == 0);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value go to 0 */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == 0);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFEXITED(status));
+ pkey_assert(WEXITSTATUS(status) == 0);
+ free(xsave);
+}
+#endif
+
+#if defined(__aarch64__)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+ pid_t child;
+ int status, ret;
+ struct iovec iov;
+ u64 trace_pkey;
+ /* Just a random pkey value.. */
+ u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
+ (POE_NONE << PKEY_BITS_PER_PKEY) |
+ POE_RWX;
+
+ child = fork();
+ pkey_assert(child >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+ if (!child) {
+ ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+ /* Stop and allow the tracer to modify PKRU directly */
+ raise(SIGSTOP);
+
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ if (__read_pkey_reg() != new_pkey)
+ exit(1);
+
+ raise(SIGSTOP);
+
+ exit(0);
+ }
+
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ iov.iov_base = &trace_pkey;
+ iov.iov_len = 8;
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == read_pkey_reg());
+
+ trace_pkey = new_pkey;
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ /* Execute the tracee */
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value change */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFEXITED(status));
+ pkey_assert(WEXITSTATUS(status) == 0);
+}
+#endif
+
+static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
int sret;
@@ -1482,11 +1688,11 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
return;
}
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey);
pkey_assert(sret < 0);
}
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
+static void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_read_of_write_disabled_region,
test_read_of_access_disabled_region,
test_read_of_access_disabled_region_with_page_already_mapped,
@@ -1502,13 +1708,17 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_implicit_mprotect_exec_only_memory,
test_mprotect_with_pkey_0,
test_ptrace_of_child,
+ test_pkey_init_state,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
test_pkey_alloc_free_attach_pkey0,
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+ test_ptrace_modifies_pkru,
+#endif
};
-void run_tests_once(void)
+static void run_tests_once(void)
{
int *ptr;
int prot = PROT_READ|PROT_WRITE;
@@ -1542,7 +1752,7 @@ void run_tests_once(void)
iteration_nr++;
}
-void pkey_setup_shadow(void)
+static void pkey_setup_shadow(void)
{
shadow_pkey_reg = __read_pkey_reg();
}
@@ -1552,6 +1762,8 @@ int main(void)
int nr_iterations = 22;
int pkeys_supported = is_pkeys_supported();
+ srand((unsigned int)time(NULL));
+
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c
new file mode 100644
index 000000000000..53f2058b0ef2
--- /dev/null
+++ b/tools/testing/selftests/mm/rmap.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RMAP functional tests
+ *
+ * Author(s): Wei Yang <richard.weiyang@gmail.com>
+ */
+
+#include "kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/sem.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "vm_util.h"
+
+#define TOTAL_LEVEL 5
+#define MAX_CHILDREN 3
+
+#define FAIL_ON_CHECK (1 << 0)
+#define FAIL_ON_WORK (1 << 1)
+
+struct sembuf sem_wait = {0, -1, 0};
+struct sembuf sem_signal = {0, 1, 0};
+
+enum backend_type {
+ ANON,
+ SHM,
+ NORM_FILE,
+};
+
+#define PREFIX "kst_rmap"
+#define MAX_FILENAME_LEN 256
+const char *suffixes[] = {
+ "",
+ "_shm",
+ "_file",
+};
+
+struct global_data;
+typedef int (*work_fn)(struct global_data *data);
+typedef int (*check_fn)(struct global_data *data);
+typedef void (*prepare_fn)(struct global_data *data);
+
+struct global_data {
+ int worker_level;
+
+ int semid;
+ int pipefd[2];
+
+ unsigned int mapsize;
+ unsigned int rand_seed;
+ char *region;
+
+ prepare_fn do_prepare;
+ work_fn do_work;
+ check_fn do_check;
+
+ enum backend_type backend;
+ char filename[MAX_FILENAME_LEN];
+
+ unsigned long *expected_pfn;
+};
+
+/*
+ * Create a process tree with TOTAL_LEVEL height and at most MAX_CHILDREN
+ * children for each.
+ *
+ * It will randomly select one process as 'worker' process which will
+ * 'do_work' until all processes are created. And all other processes will
+ * wait until 'worker' finish its work.
+ */
+void propagate_children(struct __test_metadata *_metadata, struct global_data *data)
+{
+ pid_t root_pid, pid;
+ unsigned int num_child;
+ int status;
+ int ret = 0;
+ int curr_child, worker_child;
+ int curr_level = 1;
+ bool is_worker = true;
+
+ root_pid = getpid();
+repeat:
+ num_child = rand_r(&data->rand_seed) % MAX_CHILDREN + 1;
+ worker_child = is_worker ? rand_r(&data->rand_seed) % num_child : -1;
+
+ for (curr_child = 0; curr_child < num_child; curr_child++) {
+ pid = fork();
+
+ if (pid < 0) {
+ perror("Error: fork\n");
+ } else if (pid == 0) {
+ curr_level++;
+
+ if (curr_child != worker_child)
+ is_worker = false;
+
+ if (curr_level == TOTAL_LEVEL)
+ break;
+
+ data->rand_seed += curr_child;
+ goto repeat;
+ }
+ }
+
+ if (data->do_prepare)
+ data->do_prepare(data);
+
+ close(data->pipefd[1]);
+
+ if (is_worker && curr_level == data->worker_level) {
+ /* This is the worker process, first wait last process created */
+ char buf;
+
+ while (read(data->pipefd[0], &buf, 1) > 0)
+ ;
+
+ if (data->do_work)
+ ret = data->do_work(data);
+
+ /* Kick others */
+ semctl(data->semid, 0, IPC_RMID);
+ } else {
+ /* Wait worker finish */
+ semop(data->semid, &sem_wait, 1);
+ if (data->do_check)
+ ret = data->do_check(data);
+ }
+
+ /* Wait all child to quit */
+ while (wait(&status) > 0) {
+ if (WIFEXITED(status))
+ ret |= WEXITSTATUS(status);
+ }
+
+ if (getpid() == root_pid) {
+ if (ret & FAIL_ON_WORK)
+ SKIP(return, "Failed in worker");
+
+ ASSERT_EQ(ret, 0);
+ } else {
+ exit(ret);
+ }
+}
+
+FIXTURE(migrate)
+{
+ struct global_data data;
+};
+
+FIXTURE_SETUP(migrate)
+{
+ struct global_data *data = &self->data;
+
+ if (numa_available() < 0)
+ SKIP(return, "NUMA not available");
+ if (numa_bitmask_weight(numa_all_nodes_ptr) <= 1)
+ SKIP(return, "Not enough NUMA nodes available");
+
+ data->mapsize = getpagesize();
+
+ data->expected_pfn = mmap(0, sizeof(unsigned long),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(data->expected_pfn, MAP_FAILED);
+
+ /* Prepare semaphore */
+ data->semid = semget(IPC_PRIVATE, 1, 0666 | IPC_CREAT);
+ ASSERT_NE(data->semid, -1);
+ ASSERT_NE(semctl(data->semid, 0, SETVAL, 0), -1);
+
+ /* Prepare pipe */
+ ASSERT_NE(pipe(data->pipefd), -1);
+
+ data->rand_seed = time(NULL);
+ srand(data->rand_seed);
+
+ data->worker_level = rand() % TOTAL_LEVEL + 1;
+
+ data->do_prepare = NULL;
+ data->do_work = NULL;
+ data->do_check = NULL;
+
+ data->backend = ANON;
+};
+
+FIXTURE_TEARDOWN(migrate)
+{
+ struct global_data *data = &self->data;
+
+ if (data->region != MAP_FAILED)
+ munmap(data->region, data->mapsize);
+ data->region = MAP_FAILED;
+ if (data->expected_pfn != MAP_FAILED)
+ munmap(data->expected_pfn, sizeof(unsigned long));
+ data->expected_pfn = MAP_FAILED;
+ semctl(data->semid, 0, IPC_RMID);
+ data->semid = -1;
+
+ close(data->pipefd[0]);
+
+ switch (data->backend) {
+ case ANON:
+ break;
+ case SHM:
+ shm_unlink(data->filename);
+ break;
+ case NORM_FILE:
+ unlink(data->filename);
+ break;
+ }
+}
+
+void access_region(struct global_data *data)
+{
+ /*
+ * Force read "region" to make sure page fault in.
+ */
+ FORCE_READ(*data->region);
+}
+
+int try_to_move_page(char *region)
+{
+ int ret;
+ int node;
+ int status = 0;
+ int failures = 0;
+
+ ret = move_pages(0, 1, (void **)&region, NULL, &status, MPOL_MF_MOVE_ALL);
+ if (ret != 0) {
+ perror("Failed to get original numa");
+ return FAIL_ON_WORK;
+ }
+
+ /* Pick up a different target node */
+ for (node = 0; node <= numa_max_node(); node++) {
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, node) && node != status)
+ break;
+ }
+
+ if (node > numa_max_node()) {
+ ksft_print_msg("Couldn't find available numa node for testing\n");
+ return FAIL_ON_WORK;
+ }
+
+ while (1) {
+ ret = move_pages(0, 1, (void **)&region, &node, &status, MPOL_MF_MOVE_ALL);
+
+ /* migrate successfully */
+ if (!ret)
+ break;
+
+ /* error happened */
+ if (ret < 0) {
+ ksft_perror("Failed to move pages");
+ return FAIL_ON_WORK;
+ }
+
+ /* migration is best effort; try again */
+ if (++failures >= 100)
+ return FAIL_ON_WORK;
+ }
+
+ return 0;
+}
+
+int move_region(struct global_data *data)
+{
+ int ret;
+ int pagemap_fd;
+
+ ret = try_to_move_page(data->region);
+ if (ret != 0)
+ return ret;
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_WORK;
+ *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region);
+
+ return 0;
+}
+
+int has_same_pfn(struct global_data *data)
+{
+ unsigned long pfn;
+ int pagemap_fd;
+
+ if (data->region == MAP_FAILED)
+ return 0;
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_CHECK;
+
+ pfn = pagemap_get_pfn(pagemap_fd, data->region);
+ if (pfn != *data->expected_pfn)
+ return FAIL_ON_CHECK;
+
+ return 0;
+}
+
+TEST_F(migrate, anon)
+{
+ struct global_data *data = &self->data;
+
+ /* Map an area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_F(migrate, shm)
+{
+ int shm_fd;
+ struct global_data *data = &self->data;
+
+ snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[SHM]);
+ shm_fd = shm_open(data->filename, O_CREAT | O_RDWR, 0666);
+ ASSERT_NE(shm_fd, -1);
+ ftruncate(shm_fd, data->mapsize);
+ data->backend = SHM;
+
+ /* Map a shared area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+ close(shm_fd);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_F(migrate, file)
+{
+ int fd;
+ struct global_data *data = &self->data;
+
+ snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[NORM_FILE]);
+ fd = open(data->filename, O_CREAT | O_RDWR | O_EXCL, 0666);
+ ASSERT_NE(fd, -1);
+ ftruncate(fd, data->mapsize);
+ data->backend = NORM_FILE;
+
+ /* Map a shared area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+ close(fd);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+void prepare_local_region(struct global_data *data)
+{
+ /* Allocate range and set the same data */
+ data->region = mmap(NULL, data->mapsize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (data->region == MAP_FAILED)
+ return;
+
+ memset(data->region, 0xcf, data->mapsize);
+}
+
+int merge_and_migrate(struct global_data *data)
+{
+ int pagemap_fd;
+ int ret = 0;
+
+ if (data->region == MAP_FAILED)
+ return FAIL_ON_WORK;
+
+ if (ksm_start() < 0)
+ return FAIL_ON_WORK;
+
+ ret = try_to_move_page(data->region);
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_WORK;
+ *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region);
+
+ return ret;
+}
+
+TEST_F(migrate, ksm)
+{
+ int ret;
+ struct global_data *data = &self->data;
+
+ if (ksm_stop() < 0)
+ SKIP(return, "accessing \"/sys/kernel/mm/ksm/run\") failed");
+ if (ksm_get_full_scans() < 0)
+ SKIP(return, "accessing \"/sys/kernel/mm/ksm/full_scan\") failed");
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL)
+ SKIP(return, "PR_SET_MEMORY_MERGE not supported");
+ else if (ret)
+ ksft_exit_fail_perror("PR_SET_MEMORY_MERGE=1 failed");
+
+ data->do_prepare = prepare_local_region;
+ data->do_work = merge_and_migrate;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
new file mode 100755
index 000000000000..d9173f2312b7
--- /dev/null
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -0,0 +1,553 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+count_total=0
+count_pass=0
+count_fail=0
+count_skip=0
+exitcode=0
+
+usage() {
+ cat <<EOF
+usage: ${BASH_SOURCE[0]:-$0} [ options ]
+
+ -a: run all tests, including extra ones (other than destructive ones)
+ -t: specify specific categories to tests to run
+ -h: display this message
+ -n: disable TAP output
+ -d: run destructive tests
+
+The default behavior is to run required tests only. If -a is specified,
+will run all tests.
+
+Alternatively, specific groups tests can be run by passing a string
+to the -t argument containing one or more of the following categories
+separated by spaces:
+- mmap
+ tests for mmap(2)
+- gup_test
+ tests for gup
+- userfaultfd
+ tests for userfaultfd(2)
+- compaction
+ a test for the patch "Allow compaction of unevictable pages"
+- mlock
+ tests for mlock(2)
+- mremap
+ tests for mremap(2)
+- hugevm
+ tests for very large virtual address space
+- vmalloc
+ vmalloc smoke tests
+- hmm
+ hmm smoke tests
+- madv_guard
+ test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options
+- madv_populate
+ test memadvise(2) MADV_POPULATE_{READ,WRITE} options
+- memfd_secret
+ test memfd_secret(2)
+- process_mrelease
+ test process_mrelease(2)
+- ksm
+ ksm tests that do not require >=2 NUMA nodes
+- ksm_numa
+ ksm tests that require >=2 NUMA nodes
+- pkey
+ memory protection key tests
+- soft_dirty
+ test soft dirty page bit semantics
+- pagemap
+ test pagemap_scan IOCTL
+- pfnmap
+ tests for VM_PFNMAP handling
+- process_madv
+ test for process_madv
+- cow
+ test copy-on-write semantics
+- thp
+ test transparent huge pages
+- hugetlb
+ test hugetlbfs huge pages
+- migration
+ invoke move_pages(2) to exercise the migration entry code
+ paths in the kernel
+- mkdirty
+ test handling of code that might set PTE/PMD dirty in
+ read-only VMAs
+- mdwe
+ test prctl(PR_SET_MDWE, ...)
+- page_frag
+ test handling of page fragment allocation and freeing
+- vma_merge
+ test VMA merge cases behave as expected
+- rmap
+ test rmap behaves as expected
+
+example: ./run_vmtests.sh -t "hmm mmap ksm"
+EOF
+ exit 0
+}
+
+RUN_ALL=false
+RUN_DESTRUCTIVE=false
+TAP_PREFIX="# "
+
+while getopts "aht:n" OPT; do
+ case ${OPT} in
+ "a") RUN_ALL=true ;;
+ "h") usage ;;
+ "t") VM_SELFTEST_ITEMS=${OPTARG} ;;
+ "n") TAP_PREFIX= ;;
+ "d") RUN_DESTRUCTIVE=true ;;
+ esac
+done
+shift $((OPTIND -1))
+
+# default behavior: run all tests
+VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default}
+
+test_selected() {
+ if [ "$VM_SELFTEST_ITEMS" == "default" ]; then
+ # If no VM_SELFTEST_ITEMS are specified, run all tests
+ return 0
+ fi
+ # If test selected argument is one of the test items
+ if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+run_gup_matrix() {
+ # -t: thp=on, -T: thp=off, -H: hugetlb=on
+ local hugetlb_mb=$(( needmem_KB / 1024 ))
+
+ for huge in -t -T "-H -m $hugetlb_mb"; do
+ # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm
+ for test_cmd in -u -U -a -b -L; do
+ # -w: write=1, -W: write=0
+ for write in -w -W; do
+ # -S: shared
+ for share in -S " "; do
+ # -n: How many pages to fetch together? 512 is special
+ # because it's default thp size (or 2M on x86), 123 to
+ # just test partial gup when hit a huge in whatever form
+ for num in "-n 1" "-n 512" "-n 123" "-n -1"; do
+ CATEGORY="gup_test" run_test ./gup_test \
+ $huge $test_cmd $write $share $num
+ done
+ done
+ done
+ done
+ done
+}
+
+# get huge pagesize and freepages from /proc/meminfo
+while read -r name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs="$size"
+ fi
+ if [ "$name" = "Hugepagesize:" ]; then
+ hpgsize_KB="$size"
+ fi
+done < /proc/meminfo
+
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size. The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+uffd_min_KB=$((hpgsize_KB * nr_cpus * 2))
+hugetlb_min_KB=$((256 * 1024))
+if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then
+ needmem_KB=$uffd_min_KB
+else
+ needmem_KB=$hugetlb_min_KB
+fi
+
+# set proper nr_hugepages
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
+ orig_nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+ needpgs=$((needmem_KB / hpgsize_KB))
+ tries=2
+ while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
+ lackpgs=$((needpgs - freepgs))
+ echo 3 > /proc/sys/vm/drop_caches
+ if ! echo $((lackpgs + orig_nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
+ echo "Please run this test as root"
+ exit $ksft_skip
+ fi
+ while read -r name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+ tries=$((tries - 1))
+ done
+ nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+ if [ "$freepgs" -lt "$needpgs" ]; then
+ printf "Not enough huge pages available (%d < %d)\n" \
+ "$freepgs" "$needpgs"
+ fi
+ HAVE_HUGEPAGES=1
+else
+ echo "no hugetlbfs support in kernel?"
+ HAVE_HUGEPAGES=0
+fi
+
+# filter 64bit architectures
+ARCH64STR="arm64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64"
+if [ -z "$ARCH" ]; then
+ ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
+fi
+VADDR64=0
+echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1
+
+tap_prefix() {
+ sed -e "s/^/${TAP_PREFIX}/"
+}
+
+tap_output() {
+ if [[ ! -z "$TAP_PREFIX" ]]; then
+ read str
+ echo $str
+ fi
+}
+
+pretty_name() {
+ echo "$*" | sed -e 's/^\(bash \)\?\.\///'
+}
+
+# Usage: run_test [test binary] [arbitrary test arguments...]
+run_test() {
+ if test_selected ${CATEGORY}; then
+ local skip=0
+
+ # On memory constrainted systems some tests can fail to allocate hugepages.
+ # perform some cleanup before the test for a higher success rate.
+ if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
+ if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ echo 3 > /proc/sys/vm/drop_caches
+ sleep 2
+ echo 1 > /proc/sys/vm/compact_memory
+ sleep 2
+ else
+ echo "hugepages not supported" | tap_prefix
+ skip=1
+ fi
+ fi
+
+ local test=$(pretty_name "$*")
+ local title="running $*"
+ local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+ printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix
+
+ if [ "${skip}" != "1" ]; then
+ ("$@" 2>&1) | tap_prefix
+ local ret=${PIPESTATUS[0]}
+ else
+ local ret=$ksft_skip
+ fi
+ count_total=$(( count_total + 1 ))
+ if [ $ret -eq 0 ]; then
+ count_pass=$(( count_pass + 1 ))
+ echo "[PASS]" | tap_prefix
+ echo "ok ${count_total} ${test}" | tap_output
+ elif [ $ret -eq $ksft_skip ]; then
+ count_skip=$(( count_skip + 1 ))
+ echo "[SKIP]" | tap_prefix
+ echo "ok ${count_total} ${test} # SKIP" | tap_output
+ exitcode=$ksft_skip
+ else
+ count_fail=$(( count_fail + 1 ))
+ echo "[FAIL]" | tap_prefix
+ echo "not ok ${count_total} ${test} # exit=$ret" | tap_output
+ exitcode=1
+ fi
+ fi # test_selected
+}
+
+echo "TAP version 13" | tap_output
+
+CATEGORY="hugetlb" run_test ./hugepage-mmap
+
+shmmax=$(cat /proc/sys/kernel/shmmax)
+shmall=$(cat /proc/sys/kernel/shmall)
+echo 268435456 > /proc/sys/kernel/shmmax
+echo 4194304 > /proc/sys/kernel/shmall
+CATEGORY="hugetlb" run_test ./hugepage-shm
+echo "$shmmax" > /proc/sys/kernel/shmmax
+echo "$shmall" > /proc/sys/kernel/shmall
+
+CATEGORY="hugetlb" run_test ./map_hugetlb
+CATEGORY="hugetlb" run_test ./hugepage-mremap
+CATEGORY="hugetlb" run_test ./hugepage-vmemmap
+CATEGORY="hugetlb" run_test ./hugetlb-madvise
+CATEGORY="hugetlb" run_test ./hugetlb_dio
+
+if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+ # For this test, we need one and just one huge page
+ echo 1 > /proc/sys/vm/nr_hugepages
+ CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+ CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
+ # Restore the previous number of huge pages, since further tests rely on it
+ echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+fi
+
+if test_selected "hugetlb"; then
+ echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix
+ echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" | tap_prefix
+ echo " hugetlb regression testing." | tap_prefix
+fi
+
+CATEGORY="mmap" run_test ./map_fixed_noreplace
+
+if $RUN_ALL; then
+ run_gup_matrix
+else
+ # get_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -u -n 1
+ CATEGORY="gup_test" run_test ./gup_test -u -n -1
+ # pin_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -a -n 1
+ CATEGORY="gup_test" run_test ./gup_test -a -n -1
+fi
+# Dump pages 0, 19, and 4096, using pin_user_pages:
+CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
+CATEGORY="gup_test" run_test ./gup_longterm
+
+CATEGORY="userfaultfd" run_test ./uffd-unit-tests
+uffd_stress_bin=./uffd-stress
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
+# Hugetlb tests require source and destination huge pages. Pass in almost half
+# the size of the free pages we have, which is used for *each*. An adjustment
+# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some
+# extra hugepages - this is done to prevent the test from failing by racily
+# reserving more hugepages than strictly required.
+# uffd-stress expects a region expressed in MiB, so we adjust
+# half_ufd_size_MB accordingly.
+adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) ))
+half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2))
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
+# uffd-wp-mremap requires at least one page of each size.
+have_all_size_hugepgs=true
+declare -A nr_size_hugepgs
+for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do
+ old=$(cat $f)
+ nr_size_hugepgs["$f"]="$old"
+ if [ "$old" == 0 ]; then
+ echo 1 > "$f"
+ fi
+ if [ $(cat "$f") == 0 ]; then
+ have_all_size_hugepgs=false
+ break
+ fi
+done
+if $have_all_size_hugepgs; then
+ CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
+else
+ echo "# SKIP ./uffd-wp-mremap"
+fi
+
+#cleanup
+for f in "${!nr_size_hugepgs[@]}"; do
+ echo "${nr_size_hugepgs["$f"]}" > "$f"
+done
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
+
+CATEGORY="compaction" run_test ./compaction_test
+
+if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null;
+then
+ CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
+else
+ echo "# SKIP ./on-fault-limit"
+fi
+
+CATEGORY="mmap" run_test ./map_populate
+
+CATEGORY="mlock" run_test ./mlock-random-test
+
+CATEGORY="mlock" run_test ./mlock2-tests
+
+CATEGORY="process_mrelease" run_test ./mrelease_test
+
+CATEGORY="mremap" run_test ./mremap_test
+
+CATEGORY="hugetlb" run_test ./thuge-gen
+CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2
+CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2
+if $RUN_DESTRUCTIVE; then
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline)
+echo 8 > /proc/sys/vm/nr_hugepages
+CATEGORY="hugetlb" run_test ./hugetlb-soft-offline
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline
+CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
+fi
+
+if [ $VADDR64 -ne 0 ]; then
+
+ # set overcommit_policy as OVERCOMMIT_ALWAYS so that kernel
+ # allows high virtual address allocation requests independent
+ # of platform's physical memory.
+
+ if [ -x ./virtual_address_range ]; then
+ prev_policy=$(cat /proc/sys/vm/overcommit_memory)
+ echo 1 > /proc/sys/vm/overcommit_memory
+ CATEGORY="hugevm" run_test ./virtual_address_range
+ echo $prev_policy > /proc/sys/vm/overcommit_memory
+ fi
+
+ # va high address boundary switch test
+ ARCH_ARM64="arm64"
+ prev_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages)
+ if [ "$ARCH" == "$ARCH_ARM64" ]; then
+ echo 6 > /proc/sys/vm/nr_hugepages
+ fi
+ CATEGORY="hugevm" run_test bash ./va_high_addr_switch.sh
+ if [ "$ARCH" == "$ARCH_ARM64" ]; then
+ echo $prev_nr_hugepages > /proc/sys/vm/nr_hugepages
+ fi
+fi # VADDR64
+
+# vmalloc stability smoke test
+CATEGORY="vmalloc" run_test bash ./test_vmalloc.sh smoke
+
+CATEGORY="mremap" run_test ./mremap_dontunmap
+
+CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
+
+# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests
+CATEGORY="madv_guard" run_test ./guard-regions
+
+# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+CATEGORY="madv_populate" run_test ./madv_populate
+
+# PROCESS_MADV test
+CATEGORY="process_madv" run_test ./process_madv
+
+CATEGORY="vma_merge" run_test ./merge
+
+if [ -x ./memfd_secret ]
+then
+if [ -f /proc/sys/kernel/yama/ptrace_scope ]; then
+ (echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+fi
+CATEGORY="memfd_secret" run_test ./memfd_secret
+fi
+
+# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
+if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ CATEGORY="ksm" run_test ./ksm_tests -H -s 100
+fi
+# KSM KSM_MERGE_TIME test with size of 100
+CATEGORY="ksm" run_test ./ksm_tests -P -s 100
+# KSM MADV_MERGEABLE test with 10 identical pages
+CATEGORY="ksm" run_test ./ksm_tests -M -p 10
+# KSM unmerge test
+CATEGORY="ksm" run_test ./ksm_tests -U
+# KSM test with 10 zero pages and use_zero_pages = 0
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0
+# KSM test with 10 zero pages and use_zero_pages = 1
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 1
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 0
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
+
+CATEGORY="ksm" run_test ./ksm_functional_tests
+
+# protection_keys tests
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+if [ -x ./protection_keys_32 ]
+then
+ CATEGORY="pkey" run_test ./protection_keys_32
+fi
+
+if [ -x ./protection_keys_64 ]
+then
+ CATEGORY="pkey" run_test ./protection_keys_64
+fi
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
+
+if [ -x ./soft-dirty ]
+then
+ CATEGORY="soft_dirty" run_test ./soft-dirty
+fi
+
+CATEGORY="pagemap" run_test ./pagemap_ioctl
+
+CATEGORY="pfnmap" run_test ./pfnmap
+
+# COW tests
+CATEGORY="cow" run_test ./cow
+
+CATEGORY="thp" run_test ./khugepaged
+
+CATEGORY="thp" run_test ./khugepaged -s 2
+
+CATEGORY="thp" run_test ./khugepaged all:shmem
+
+CATEGORY="thp" run_test ./khugepaged -s 4 all:shmem
+
+CATEGORY="thp" run_test ./transhuge-stress -d 20
+
+# Try to create XFS if not provided
+if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+ if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ if test_selected "thp"; then
+ if grep xfs /proc/filesystems &>/dev/null; then
+ XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+ SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+ truncate -s 314572800 ${XFS_IMG}
+ mkfs.xfs -q ${XFS_IMG}
+ mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ MOUNTED_XFS=1
+ fi
+ fi
+ fi
+fi
+
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+
+if [ -n "${MOUNTED_XFS}" ]; then
+ umount ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ rmdir ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ rm -f ${XFS_IMG}
+fi
+
+CATEGORY="migration" run_test ./migration
+
+CATEGORY="mkdirty" run_test ./mkdirty
+
+CATEGORY="mdwe" run_test ./mdwe_test
+
+CATEGORY="page_frag" run_test ./test_page_frag.sh smoke
+
+CATEGORY="page_frag" run_test ./test_page_frag.sh aligned
+
+CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
+
+CATEGORY="rmap" run_test ./rmap
+
+if [ "${HAVE_HUGEPAGES}" = 1 ]; then
+ echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
+fi
+
+echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
+echo "1..${count_total}" | tap_output
+
+exit $exitcode
diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/mm/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
new file mode 100644
index 000000000000..59c0dbe99a9b
--- /dev/null
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "vm_util.h"
+#include "thp_settings.h"
+
+#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
+#define TEST_ITERATIONS 10000
+
+static void test_simple(int pagemap_fd, int pagesize)
+{
+ int i;
+ char *map;
+
+ map = aligned_alloc(pagesize, pagesize);
+ if (!map)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ clear_softdirty();
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ }
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+static void test_vma_reuse(int pagemap_fd, int pagesize)
+{
+ char *map, *map2;
+
+ map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // The kernel always marks new regions as soft dirty
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s dirty bit of allocated page\n", __func__);
+
+ clear_softdirty();
+ munmap(map, pagesize);
+
+ map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // Dirty bit is set for new regions even if they are reused
+ if (map == map2)
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s dirty bit of reused address page\n", __func__);
+ else
+ ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__);
+
+ munmap(map2, pagesize);
+}
+
+static void test_hugepage(int pagemap_fd, int pagesize)
+{
+ char *map;
+ int i, ret;
+
+ if (!thp_is_enabled()) {
+ ksft_test_result_skip("Transparent Hugepages not available\n");
+ return;
+ }
+
+ size_t hpage_len = read_pmd_pagesize();
+ if (!hpage_len)
+ ksft_exit_fail_msg("Reading PMD pagesize failed");
+
+ map = memalign(hpage_len, hpage_len);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed\n");
+
+ ret = madvise(map, hpage_len, MADV_HUGEPAGE);
+ if (ret)
+ ksft_exit_fail_msg("madvise failed %d\n", ret);
+
+ for (i = 0; i < hpage_len; i++)
+ map[i] = (char)i;
+
+ if (check_huge_anon(map, 1, hpage_len)) {
+ ksft_test_result_pass("Test %s huge page allocation\n", __func__);
+
+ clear_softdirty();
+ for (i = 0 ; i < TEST_ITERATIONS ; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+ clear_softdirty();
+ }
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__);
+ } else {
+ // hugepage allocation failed. skip these tests
+ ksft_test_result_skip("Test %s huge page allocation\n", __func__);
+ ksft_test_result_skip("Test %s huge page dirty bit\n", __func__);
+ }
+ free(map);
+}
+
+static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
+{
+ const char *type[] = {"file", "anon"};
+ const char *fname = "./soft-dirty-test-file";
+ int test_fd = 0;
+ char *map;
+
+ if (anon) {
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (!map)
+ ksft_exit_fail_msg("anon mmap failed\n");
+ } else {
+ test_fd = open(fname, O_RDWR | O_CREAT, 0664);
+ if (test_fd < 0) {
+ ksft_test_result_skip("Test %s open() file failed\n", __func__);
+ return;
+ }
+ unlink(fname);
+ ftruncate(test_fd, pagesize);
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_SHARED, test_fd, 0);
+ if (!map)
+ ksft_exit_fail_msg("file mmap failed\n");
+ }
+
+ *map = 1;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s dirty bit of new written page\n",
+ __func__, type[anon]);
+ clear_softdirty();
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after clear_refs\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RO\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ|PROT_WRITE);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RW\n",
+ __func__, type[anon]);
+ *map = 2;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s soft-dirty after rewritten\n",
+ __func__, type[anon]);
+
+ munmap(map, pagesize);
+
+ if (!anon)
+ close(test_fd);
+}
+
+static void test_merge(int pagemap_fd, int pagesize)
+{
+ char *reserved, *map, *map2;
+
+ /*
+ * Reserve space for tests:
+ *
+ * ---padding to ---
+ * | avoid adj. |
+ * v merge v
+ * |---|---|---|---|---|
+ * | | 1 | 2 | 3 | |
+ * |---|---|---|---|---|
+ */
+ reserved = mmap(NULL, 5 * pagesize, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (reserved == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ munmap(reserved, 4 * pagesize);
+
+ /*
+ * Establish initial VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* This will clear VM_SOFTDIRTY too. */
+ clear_softdirty();
+
+ /*
+ * Now place a new mapping which will be marked VM_SOFTDIRTY. Away from
+ * map:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | | 2 | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[3 * pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now remap it immediately adjacent to map, if the merge correctly
+ * propagates VM_SOFTDIRTY, we should then observe the VMA as a whole
+ * being marked soft-dirty:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ map2 = mremap(map2, pagesize, pagesize, MREMAP_FIXED | MREMAP_MAYMOVE,
+ &reserved[2 * pagesize]);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mremap failed\n");
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after remap merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after remap merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+
+ /*
+ * Now establish another VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear VM_SOFTDIRTY... */
+ clear_softdirty();
+ /* ...and establish incompatible adjacent VMA:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | 2 | | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[2 * pagesize], pagesize,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now mprotect() VMA 1 so it's compatible with 2 and therefore merges:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ if (mprotect(map, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC))
+ ksft_exit_fail_msg("mprotect failed\n");
+
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+}
+
+static void test_mprotect_anon(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, true);
+}
+
+static void test_mprotect_file(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, false);
+}
+
+int main(int argc, char **argv)
+{
+ int pagemap_fd;
+ int pagesize;
+
+ ksft_print_header();
+
+ if (!softdirty_supported())
+ ksft_exit_skip("soft-dirty is not support\n");
+
+ ksft_set_plan(19);
+ pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+
+ pagesize = getpagesize();
+
+ test_simple(pagemap_fd, pagesize);
+ test_vma_reuse(pagemap_fd, pagesize);
+ test_hugepage(pagemap_fd, pagesize);
+ test_mprotect_anon(pagemap_fd, pagesize);
+ test_mprotect_file(pagemap_fd, pagesize);
+ test_merge(pagemap_fd, pagesize);
+
+ close(pagemap_fd);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
new file mode 100644
index 000000000000..40799f3f0213
--- /dev/null
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -0,0 +1,837 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
+ * address range in a process via <debugfs>/split_huge_pages interface.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <malloc.h>
+#include <stdbool.h>
+#include <time.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+uint64_t pagesize;
+unsigned int pageshift;
+uint64_t pmd_pagesize;
+unsigned int pmd_order;
+int *expected_orders;
+
+#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
+#define INPUT_MAX 80
+
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
+
+const char *pagemap_proc = "/proc/self/pagemap";
+const char *kpageflags_proc = "/proc/kpageflags";
+int pagemap_fd;
+int kpageflags_fd;
+
+static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd,
+ int kpageflags_fd)
+{
+ const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD;
+ const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL;
+ const unsigned long nr_pages = 1UL << order;
+ unsigned long pfn_head;
+ uint64_t pfn_flags;
+ unsigned long pfn;
+ unsigned long i;
+
+ pfn = pagemap_get_pfn(pagemap_fd, vaddr);
+
+ /* non present page */
+ if (pfn == -1UL)
+ return false;
+
+ if (pageflags_get(pfn, kpageflags_fd, &pfn_flags))
+ goto fail;
+
+ /* check for order-0 pages */
+ if (!order) {
+ if (pfn_flags & (folio_head_flags | folio_tail_flags))
+ return false;
+ return true;
+ }
+
+ /* non THP folio */
+ if (!(pfn_flags & KPF_THP))
+ return false;
+
+ pfn_head = pfn & ~(nr_pages - 1);
+
+ if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags))
+ goto fail;
+
+ /* head PFN has no compound_head flag set */
+ if ((pfn_flags & folio_head_flags) != folio_head_flags)
+ return false;
+
+ /* check all tail PFN flags */
+ for (i = 1; i < nr_pages; i++) {
+ if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags))
+ goto fail;
+ if ((pfn_flags & folio_tail_flags) != folio_tail_flags)
+ return false;
+ }
+
+ /*
+ * check the PFN after this folio, but if its flags cannot be obtained,
+ * assume this folio has the expected order
+ */
+ if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags))
+ return true;
+
+ /* If we find another tail page, then the folio is larger. */
+ return (pfn_flags & folio_tail_flags) != folio_tail_flags;
+fail:
+ ksft_exit_fail_msg("Failed to get folio info\n");
+ return false;
+}
+
+static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd,
+ uint64_t *flags)
+{
+ unsigned long pfn;
+
+ pfn = pagemap_get_pfn(pagemap_fd, vaddr);
+
+ /* non-present PFN */
+ if (pfn == -1UL)
+ return 1;
+
+ if (pageflags_get(pfn, kpageflags_fd, flags))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * gather_after_split_folio_orders - scan through [vaddr_start, len) and record
+ * folio orders
+ *
+ * @vaddr_start: start vaddr
+ * @len: range length
+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
+ * @kpageflags_fd: file descriptor to /proc/kpageflags
+ * @orders: output folio order array
+ * @nr_orders: folio order array size
+ *
+ * gather_after_split_folio_orders() scan through [vaddr_start, len) and check
+ * all folios within the range and record their orders. All order-0 pages will
+ * be recorded. Non-present vaddr is skipped.
+ *
+ * NOTE: the function is used to check folio orders after a split is performed,
+ * so it assumes [vaddr_start, len) fully maps to after-split folios within that
+ * range.
+ *
+ * Return: 0 - no error, -1 - unhandled cases
+ */
+static int gather_after_split_folio_orders(char *vaddr_start, size_t len,
+ int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
+{
+ uint64_t page_flags = 0;
+ int cur_order = -1;
+ char *vaddr;
+
+ if (pagemap_fd == -1 || kpageflags_fd == -1)
+ return -1;
+ if (!orders)
+ return -1;
+ if (nr_orders <= 0)
+ return -1;
+
+ for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
+ char *next_folio_vaddr;
+ int status;
+
+ status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd,
+ &page_flags);
+ if (status < 0)
+ return -1;
+
+ /* skip non present vaddr */
+ if (status == 1) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* all order-0 pages with possible false postive (non folio) */
+ if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ orders[0]++;
+ vaddr += psize();
+ continue;
+ }
+
+ /* skip non thp compound pages */
+ if (!(page_flags & KPF_THP)) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* vpn points to part of a THP at this point */
+ if (page_flags & KPF_COMPOUND_HEAD)
+ cur_order = 1;
+ else {
+ vaddr += psize();
+ continue;
+ }
+
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+
+ if (next_folio_vaddr >= vaddr_start + len)
+ break;
+
+ while ((status = vaddr_pageflags_get(next_folio_vaddr,
+ pagemap_fd, kpageflags_fd,
+ &page_flags)) >= 0) {
+ /*
+ * non present vaddr, next compound head page, or
+ * order-0 page
+ */
+ if (status == 1 ||
+ (page_flags & KPF_COMPOUND_HEAD) ||
+ !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ if (cur_order < nr_orders) {
+ orders[cur_order]++;
+ cur_order = -1;
+ vaddr = next_folio_vaddr;
+ }
+ break;
+ }
+
+ cur_order++;
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+ }
+
+ if (status < 0)
+ return status;
+ }
+ if (cur_order > 0 && cur_order < nr_orders)
+ orders[cur_order]++;
+ return 0;
+}
+
+static int check_after_split_folio_orders(char *vaddr_start, size_t len,
+ int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
+{
+ int *vaddr_orders;
+ int status;
+ int i;
+
+ vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
+
+ if (!vaddr_orders)
+ ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
+
+ memset(vaddr_orders, 0, sizeof(int) * nr_orders);
+ status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd,
+ kpageflags_fd, vaddr_orders, nr_orders);
+ if (status)
+ ksft_exit_fail_msg("gather folio info failed\n");
+
+ for (i = 0; i < nr_orders; i++)
+ if (vaddr_orders[i] != orders[i]) {
+ ksft_print_msg("order %d: expected: %d got %d\n", i,
+ orders[i], vaddr_orders[i]);
+ status = -1;
+ }
+
+ free(vaddr_orders);
+ return status;
+}
+
+static void write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno));
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1)
+ ksft_exit_fail_msg("Write failed\n");
+}
+
+static void write_debugfs(const char *fmt, ...)
+{
+ char input[INPUT_MAX];
+ int ret;
+ va_list argp;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(input, INPUT_MAX, fmt, argp);
+ va_end(argp);
+
+ if (ret >= INPUT_MAX)
+ ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__);
+
+ write_file(SPLIT_DEBUGFS, input, ret + 1);
+}
+
+static char *allocate_zero_filled_hugepage(size_t len)
+{
+ char *result;
+ size_t i;
+
+ result = memalign(pmd_pagesize, len);
+ if (!result) {
+ printf("Fail to allocate memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(result, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ result[i] = (char)0;
+
+ return result;
+}
+
+static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len)
+{
+ unsigned long rss_anon_before, rss_anon_after;
+ size_t i;
+
+ if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize))
+ ksft_exit_fail_msg("No THP is allocated\n");
+
+ rss_anon_before = rss_anon();
+ if (!rss_anon_before)
+ ksft_exit_fail_msg("No RssAnon is allocated before split\n");
+
+ /* split all THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
+ (uint64_t)one_page + len, 0);
+
+ for (i = 0; i < len; i++)
+ if (one_page[i] != (char)0)
+ ksft_exit_fail_msg("%ld byte corrupted\n", i);
+
+ if (!check_huge_anon(one_page, 0, pmd_pagesize))
+ ksft_exit_fail_msg("Still AnonHugePages not split\n");
+
+ rss_anon_after = rss_anon();
+ if (rss_anon_after >= rss_anon_before)
+ ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n",
+ rss_anon_before, rss_anon_after);
+}
+
+static void split_pmd_zero_pages(void)
+{
+ char *one_page;
+ int nr_hpages = 4;
+ size_t len = nr_hpages * pmd_pagesize;
+
+ one_page = allocate_zero_filled_hugepage(len);
+ verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
+ ksft_test_result_pass("Split zero filled huge pages successful\n");
+ free(one_page);
+}
+
+static void split_pmd_thp_to_order(int order)
+{
+ char *one_page;
+ size_t len = 4 * pmd_pagesize;
+ size_t i;
+
+ one_page = memalign(pmd_pagesize, len);
+ if (!one_page)
+ ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
+
+ madvise(one_page, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ one_page[i] = (char)i;
+
+ if (!check_huge_anon(one_page, 4, pmd_pagesize))
+ ksft_exit_fail_msg("No THP is allocated\n");
+
+ /* split all THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
+ (uint64_t)one_page + len, order);
+
+ for (i = 0; i < len; i++)
+ if (one_page[i] != (char)i)
+ ksft_exit_fail_msg("%ld byte corrupted\n", i);
+
+ memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
+ expected_orders[order] = 4 << (pmd_order - order);
+
+ if (check_after_split_folio_orders(one_page, len, pagemap_fd,
+ kpageflags_fd, expected_orders,
+ (pmd_order + 1)))
+ ksft_exit_fail_msg("Unexpected THP split\n");
+
+ if (!check_huge_anon(one_page, 0, pmd_pagesize))
+ ksft_exit_fail_msg("Still AnonHugePages not split\n");
+
+ ksft_test_result_pass("Split huge pages to order %d successful\n", order);
+ free(one_page);
+}
+
+static void split_pte_mapped_thp(void)
+{
+ const size_t nr_thps = 4;
+ const size_t thp_area_size = nr_thps * pmd_pagesize;
+ const size_t page_area_size = nr_thps * pagesize;
+ char *thp_area, *tmp, *page_area = MAP_FAILED;
+ size_t i;
+
+ thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (thp_area == MAP_FAILED) {
+ ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
+ return;
+ }
+
+ madvise(thp_area, thp_area_size, MADV_HUGEPAGE);
+
+ for (i = 0; i < thp_area_size; i++)
+ thp_area[i] = (char)i;
+
+ if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) {
+ ksft_test_result_skip("Not all THPs allocated\n");
+ goto out;
+ }
+
+ /*
+ * To challenge spitting code, we will mremap a single page of each
+ * THP (page[i] of thp[i]) in the thp_area into page_area. This will
+ * replace the PMD mappings in the thp_area by PTE mappings first,
+ * but leaving the THP unsplit, to then create a page-sized hole in
+ * the thp_area.
+ * We will then manually trigger splitting of all THPs through the
+ * single mremap'ed pages of each THP in the page_area.
+ */
+ page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (page_area == MAP_FAILED) {
+ ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno));
+ goto out;
+ }
+
+ for (i = 0; i < nr_thps; i++) {
+ tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i,
+ pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED,
+ page_area + pagesize * i);
+ if (tmp != MAP_FAILED)
+ continue;
+ ksft_test_result_fail("mremap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ /*
+ * Verify that our THPs were not split yet. Note that
+ * check_huge_anon() cannot be used as it checks for PMD mappings.
+ */
+ for (i = 0; i < nr_thps; i++) {
+ if (is_backed_by_folio(page_area + i * pagesize, pmd_order,
+ pagemap_fd, kpageflags_fd))
+ continue;
+ ksft_test_result_fail("THP %zu missing after mremap\n", i);
+ goto out;
+ }
+
+ /* Split all THPs through the remapped pages. */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)page_area,
+ (uint64_t)page_area + page_area_size, 0);
+
+ /* Corruption during mremap or split? */
+ for (i = 0; i < page_area_size; i++) {
+ if (page_area[i] == (char)i)
+ continue;
+ ksft_test_result_fail("%zu byte corrupted\n", i);
+ goto out;
+ }
+
+ /* Split failed? */
+ for (i = 0; i < nr_thps; i++) {
+ if (is_backed_by_folio(page_area + i * pagesize, 0,
+ pagemap_fd, kpageflags_fd))
+ continue;
+ ksft_test_result_fail("THP %zu not split\n", i);
+ }
+
+ ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
+out:
+ munmap(thp_area, thp_area_size);
+ if (page_area != MAP_FAILED)
+ munmap(page_area, page_area_size);
+}
+
+static void split_file_backed_thp(int order)
+{
+ int status;
+ int fd;
+ char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
+ const char *tmpfs_loc = mkdtemp(tmpfs_template);
+ char testfile[INPUT_MAX];
+ ssize_t num_written, num_read;
+ char *file_buf1, *file_buf2;
+ uint64_t pgoff_start = 0, pgoff_end = 1024;
+ int i;
+
+ ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
+
+ file_buf1 = (char *)malloc(pmd_pagesize);
+ file_buf2 = (char *)malloc(pmd_pagesize);
+
+ if (!file_buf1 || !file_buf2) {
+ ksft_print_msg("cannot allocate file buffers\n");
+ goto out;
+ }
+
+ for (i = 0; i < pmd_pagesize; i++)
+ file_buf1[i] = (char)i;
+ memset(file_buf2, 0, pmd_pagesize);
+
+ status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
+
+ if (status)
+ ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
+
+ status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
+ if (status >= INPUT_MAX) {
+ ksft_print_msg("Fail to create file-backed THP split testing file\n");
+ goto cleanup;
+ }
+
+ fd = open(testfile, O_CREAT|O_RDWR, 0664);
+ if (fd == -1) {
+ ksft_perror("Cannot open testing file");
+ goto cleanup;
+ }
+
+ /* write pmd size data to the file, so a file-backed THP can be allocated */
+ num_written = write(fd, file_buf1, pmd_pagesize);
+
+ if (num_written == -1 || num_written != pmd_pagesize) {
+ ksft_perror("Failed to write data to testing file");
+ goto close_file;
+ }
+
+ /* split the file-backed THP */
+ write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order);
+
+ /* check file content after split */
+ status = lseek(fd, 0, SEEK_SET);
+ if (status == -1) {
+ ksft_perror("Cannot lseek file");
+ goto close_file;
+ }
+
+ num_read = read(fd, file_buf2, num_written);
+ if (num_read == -1 || num_read != num_written) {
+ ksft_perror("Cannot read file content back");
+ goto close_file;
+ }
+
+ if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) {
+ ksft_print_msg("File content changed\n");
+ goto close_file;
+ }
+
+ close(fd);
+ status = unlink(testfile);
+ if (status) {
+ ksft_perror("Cannot remove testing file");
+ goto cleanup;
+ }
+
+ status = umount(tmpfs_loc);
+ if (status) {
+ rmdir(tmpfs_loc);
+ ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
+ }
+
+ status = rmdir(tmpfs_loc);
+ if (status)
+ ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
+
+ ksft_print_msg("Please check dmesg for more information\n");
+ ksft_test_result_pass("File-backed THP split to order %d test done\n", order);
+ return;
+
+close_file:
+ close(fd);
+cleanup:
+ umount(tmpfs_loc);
+ rmdir(tmpfs_loc);
+out:
+ ksft_exit_fail_msg("Error occurred\n");
+}
+
+static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+ const char **thp_fs_loc)
+{
+ if (xfs_path) {
+ *thp_fs_loc = xfs_path;
+ return false;
+ }
+
+ *thp_fs_loc = mkdtemp(thp_fs_template);
+
+ if (!*thp_fs_loc)
+ ksft_exit_fail_msg("cannot create temp folder\n");
+
+ return true;
+}
+
+static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+{
+ int status;
+
+ if (!created_tmp)
+ return;
+
+ status = rmdir(thp_fs_loc);
+ if (status)
+ ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+ strerror(errno));
+}
+
+static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
+ int *fd, char **addr)
+{
+ size_t i;
+ unsigned char buf[1024];
+
+ srand(time(NULL));
+
+ *fd = open(testfile, O_CREAT | O_RDWR, 0664);
+ if (*fd == -1)
+ ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
+
+ assert(fd_size % sizeof(buf) == 0);
+ for (i = 0; i < sizeof(buf); i++)
+ buf[i] = (unsigned char)i;
+ for (i = 0; i < fd_size; i += sizeof(buf))
+ write(*fd, buf, sizeof(buf));
+
+ close(*fd);
+ sync();
+ *fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+ if (*fd == -1) {
+ ksft_perror("open drop_caches");
+ goto err_out_unlink;
+ }
+ if (write(*fd, "3", 1) != 1) {
+ ksft_perror("write to drop_caches");
+ goto err_out_unlink;
+ }
+ close(*fd);
+
+ *fd = open(testfile, O_RDWR);
+ if (*fd == -1) {
+ ksft_perror("Failed to open testfile\n");
+ goto err_out_unlink;
+ }
+
+ *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+ if (*addr == (char *)-1) {
+ ksft_perror("cannot mmap");
+ goto err_out_close;
+ }
+ madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+ for (size_t i = 0; i < fd_size; i++) {
+ char *addr2 = *addr + i;
+
+ FORCE_READ(*addr2);
+ }
+
+ if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
+ ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+ munmap(*addr, fd_size);
+ close(*fd);
+ unlink(testfile);
+ ksft_test_result_skip("Pagecache folio split skipped\n");
+ return -2;
+ }
+ return 0;
+err_out_close:
+ close(*fd);
+err_out_unlink:
+ unlink(testfile);
+ ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+ return -1;
+}
+
+static void split_thp_in_pagecache_to_order_at(size_t fd_size,
+ const char *fs_loc, int order, int offset)
+{
+ int fd;
+ char *split_addr;
+ char *addr;
+ size_t i;
+ char testfile[INPUT_MAX];
+ int err = 0;
+
+ err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+ if (err < 0)
+ ksft_exit_fail_msg("cannot generate right test file name\n");
+
+ err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+ if (err)
+ return;
+
+ err = 0;
+
+ memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
+ /*
+ * use [split_addr, split_addr + pagesize) range to split THPs, since
+ * the debugfs function always split a range with pagesize step and
+ * providing a full [addr, addr + fd_size) range can trigger multiple
+ * splits, complicating after-split result checking.
+ */
+ if (offset == -1) {
+ for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
+ write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr,
+ (uint64_t)split_addr + pagesize, order);
+
+ expected_orders[order] = fd_size / (pagesize << order);
+ } else {
+ int times = fd_size / pmd_pagesize;
+
+ for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
+ write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr,
+ (uint64_t)split_addr + pagesize, order, offset);
+
+ for (i = order + 1; i < pmd_order; i++)
+ expected_orders[i] = times;
+ expected_orders[order] = 2 * times;
+ }
+
+ for (i = 0; i < fd_size; i++)
+ if (*(addr + i) != (char)i) {
+ ksft_print_msg("%lu byte corrupted in the file\n", i);
+ err = EXIT_FAILURE;
+ goto out;
+ }
+
+ if (check_after_split_folio_orders(addr, fd_size, pagemap_fd,
+ kpageflags_fd, expected_orders,
+ (pmd_order + 1))) {
+ ksft_print_msg("Unexpected THP split\n");
+ err = 1;
+ goto out;
+ }
+
+ if (!check_huge_file(addr, 0, pmd_pagesize)) {
+ ksft_print_msg("Still FilePmdMapped not split\n");
+ err = EXIT_FAILURE;
+ goto out;
+ }
+
+out:
+ munmap(addr, fd_size);
+ close(fd);
+ unlink(testfile);
+ if (offset == -1) {
+ if (err)
+ ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
+ ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
+ } else {
+ if (err)
+ ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset);
+ ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ size_t fd_size;
+ char *optional_xfs_path = NULL;
+ char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+ const char *fs_loc;
+ bool created_tmp;
+ int offset;
+ unsigned int nr_pages;
+ unsigned int tests;
+
+ ksft_print_header();
+
+ if (geteuid() != 0) {
+ ksft_print_msg("Please run the benchmark as root\n");
+ ksft_finished();
+ }
+
+ if (argc > 1)
+ optional_xfs_path = argv[1];
+
+ pagesize = getpagesize();
+ pageshift = ffs(pagesize) - 1;
+ pmd_pagesize = read_pmd_pagesize();
+ if (!pmd_pagesize)
+ ksft_exit_fail_msg("Reading PMD pagesize failed\n");
+
+ nr_pages = pmd_pagesize / pagesize;
+ pmd_order = sz2ord(pmd_pagesize, pagesize);
+
+ expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1));
+ if (!expected_orders)
+ ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
+
+ tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2;
+ ksft_set_plan(tests);
+
+ pagemap_fd = open(pagemap_proc, O_RDONLY);
+ if (pagemap_fd == -1)
+ ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
+
+ kpageflags_fd = open(kpageflags_proc, O_RDONLY);
+ if (kpageflags_fd == -1)
+ ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
+
+ fd_size = 2 * pmd_pagesize;
+
+ split_pmd_zero_pages();
+
+ for (i = 0; i < pmd_order; i++)
+ if (i != 1)
+ split_pmd_thp_to_order(i);
+
+ split_pte_mapped_thp();
+ for (i = 0; i < pmd_order; i++)
+ split_file_backed_thp(i);
+
+ created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
+ &fs_loc);
+ for (i = pmd_order - 1; i >= 0; i--)
+ split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1);
+
+ for (i = 0; i < pmd_order; i++)
+ for (offset = 0;
+ offset < nr_pages;
+ offset += MAX(nr_pages / 4, 1 << i))
+ split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset);
+ cleanup_thp_fs(fs_loc, created_tmp);
+
+ close(pagemap_fd);
+ close(kpageflags_fd);
+ free(expected_orders);
+
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh
index 0647b525a625..46e19b5d648d 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/mm/test_hmm.sh
@@ -40,25 +40,30 @@ check_test_requirements()
load_driver()
{
- modprobe $DRIVER > /dev/null 2>&1
- if [ $? == 0 ]; then
- major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
- mknod /dev/hmm_dmirror0 c $major 0
- mknod /dev/hmm_dmirror1 c $major 1
+ if [ $# -eq 0 ]; then
+ modprobe $DRIVER > /dev/null 2>&1
+ else
+ if [ $# -eq 2 ]; then
+ modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+ > /dev/null 2>&1
+ else
+ echo "Missing module parameters. Make sure pass"\
+ "spm_addr_dev0 and spm_addr_dev1"
+ usage
+ fi
fi
}
unload_driver()
{
modprobe -r $DRIVER > /dev/null 2>&1
- rm -f /dev/hmm_dmirror?
}
run_smoke()
{
echo "Running smoke test. Note, this test provides basic coverage."
- load_driver
+ load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
}
@@ -75,6 +80,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+ echo "# Smoke testing with SPM enabled"
+ echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>"
+ echo
exit 0
}
@@ -84,7 +92,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
- run_smoke
+ run_smoke $2 $3
else
usage
fi
diff --git a/tools/testing/selftests/mm/test_page_frag.sh b/tools/testing/selftests/mm/test_page_frag.sh
new file mode 100755
index 000000000000..f55b105084cf
--- /dev/null
+++ b/tools/testing/selftests/mm/test_page_frag.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+#
+# This is a test script for the kernel test driver to test the
+# correctness and performance of page_frag's implementation.
+# Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+# a) analyse performance of page fragment allocations;
+# b) stressing and stability check of page_frag subsystem.
+
+DRIVER="./page_frag/page_frag_test.ko"
+CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
+TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}')
+
+if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then
+ TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}')
+ NR_TEST=100000000
+else
+ TEST_CPU_1=$TEST_CPU_0
+ NR_TEST=1000000
+fi
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_failed_prefix() {
+ if dmesg | grep -q 'page_frag_test failed:';then
+ echo "page_frag_test failed, please check dmesg"
+ exit $exitcode
+ fi
+}
+
+#
+# Static templates for testing of page_frag APIs.
+# Also it is possible to pass any supported parameters manually.
+#
+SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1"
+NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST"
+ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1"
+
+check_test_requirements()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit $ksft_skip
+ fi
+
+ if ! which insmod > /dev/null 2>&1; then
+ echo "$0: You need insmod installed"
+ exit $ksft_skip
+ fi
+
+ if [ ! -f $DRIVER ]; then
+ echo "$0: You need to compile page_frag_test module"
+ exit $ksft_skip
+ fi
+}
+
+run_nonaligned_check()
+{
+ echo "Run performance tests to evaluate how fast nonaligned alloc API is."
+
+ insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1
+}
+
+run_aligned_check()
+{
+ echo "Run performance tests to evaluate how fast aligned alloc API is."
+
+ insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1
+}
+
+run_smoke_check()
+{
+ echo "Run smoke test."
+
+ insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1
+}
+
+usage()
+{
+ echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | "
+ echo "manual parameters"
+ echo
+ echo "Valid tests and parameters:"
+ echo
+ modinfo $DRIVER
+ echo
+ echo "Example usage:"
+ echo
+ echo "# Shows help message"
+ echo "$0"
+ echo
+ echo "# Smoke testing"
+ echo "$0 smoke"
+ echo
+ echo "# Performance testing for nonaligned alloc API"
+ echo "$0 nonaligned"
+ echo
+ echo "# Performance testing for aligned alloc API"
+ echo "$0 aligned"
+ echo
+ exit 0
+}
+
+function validate_passed_args()
+{
+ VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'`
+
+ #
+ # Something has been passed, check it.
+ #
+ for passed_arg in $@; do
+ key=${passed_arg//=*/}
+ valid=0
+
+ for valid_arg in $VALID_ARGS; do
+ if [[ $key = $valid_arg ]]; then
+ valid=1
+ break
+ fi
+ done
+
+ if [[ $valid -ne 1 ]]; then
+ echo "Error: key is not correct: ${key}"
+ exit $exitcode
+ fi
+ done
+}
+
+function run_manual_check()
+{
+ #
+ # Validate passed parameters. If there is wrong one,
+ # the script exists and does not execute further.
+ #
+ validate_passed_args $@
+
+ echo "Run the test with following parameters: $@"
+ insmod $DRIVER $@ > /dev/null 2>&1
+}
+
+function run_test()
+{
+ if [ $# -eq 0 ]; then
+ usage
+ else
+ if [[ "$1" = "smoke" ]]; then
+ run_smoke_check
+ elif [[ "$1" = "nonaligned" ]]; then
+ run_nonaligned_check
+ elif [[ "$1" = "aligned" ]]; then
+ run_aligned_check
+ else
+ run_manual_check $@
+ fi
+ fi
+
+ check_test_failed_prefix
+
+ echo "Done."
+ echo "Check the kernel ring buffer to see the summary."
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index 06d2bb109f06..d39096723fca 100755
--- a/tools/testing/selftests/vm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
@@ -11,6 +11,7 @@
TEST_NAME="vmalloc"
DRIVER="test_${TEST_NAME}"
+NUM_CPUS=`grep -c ^processor /proc/cpuinfo`
# 1 if fails
exitcode=1
@@ -22,9 +23,9 @@ ksft_skip=4
# Static templates for performance, stressing and smoke tests.
# Also it is possible to pass any supported parameters manualy.
#
-PERF_PARAM="single_cpu_test=1 sequential_test_order=1 test_repeat_count=3"
-SMOKE_PARAM="single_cpu_test=1 test_loop_count=10000 test_repeat_count=10"
-STRESS_PARAM="test_repeat_count=20"
+PERF_PARAM="sequential_test_order=1 test_repeat_count=3"
+SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10"
+STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20"
check_test_requirements()
{
@@ -46,20 +47,20 @@ check_test_requirements()
fi
}
-run_perfformance_check()
+run_performance_check()
{
echo "Run performance tests to evaluate how fast vmalloc allocation is."
echo "It runs all test cases on one single CPU with sequential order."
modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1
echo "Done."
- echo "Ccheck the kernel message buffer to see the summary."
+ echo "Check the kernel message buffer to see the summary."
}
run_stability_check()
{
- echo "Run stability tests. In order to stress vmalloc subsystem we run"
- echo "all available test cases on all available CPUs simultaneously."
+ echo "Run stability tests. In order to stress vmalloc subsystem all"
+ echo "available test cases are run by NUM_CPUS workers simultaneously."
echo "It will take time, so be patient."
modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1
@@ -92,17 +93,17 @@ usage()
echo "# Shows help message"
echo "./${DRIVER}.sh"
echo
- echo "# Runs 1 test(id_1), repeats it 5 times on all online CPUs"
- echo "./${DRIVER}.sh run_test_mask=1 test_repeat_count=5"
+ echo "# Runs 1 test(id_1), repeats it 5 times by NUM_CPUS workers"
+ echo "./${DRIVER}.sh nr_threads=$NUM_CPUS run_test_mask=1 test_repeat_count=5"
echo
echo -n "# Runs 4 tests(id_1|id_2|id_4|id_16) on one CPU with "
echo "sequential order"
- echo -n "./${DRIVER}.sh single_cpu_test=1 sequential_test_order=1 "
+ echo -n "./${DRIVER}.sh sequential_test_order=1 "
echo "run_test_mask=23"
echo
- echo -n "# Runs all tests on all online CPUs, shuffled order, repeats "
+ echo -n "# Runs all tests by NUM_CPUS workers, shuffled order, repeats "
echo "20 times"
- echo "./${DRIVER}.sh test_repeat_count=20"
+ echo "./${DRIVER}.sh nr_threads=$NUM_CPUS test_repeat_count=20"
echo
echo "# Performance analysis"
echo "./${DRIVER}.sh performance"
@@ -159,7 +160,7 @@ function run_test()
usage
else
if [[ "$1" = "performance" ]]; then
- run_perfformance_check
+ run_performance_check
elif [[ "$1" = "stress" ]]; then
run_stability_check
elif [[ "$1" = "smoke" ]]; then
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
new file mode 100644
index 000000000000..574bd0f8ae48
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "thp_settings.h"
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define MAX_SETTINGS_DEPTH 4
+static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH];
+static int settings_index;
+static struct thp_settings saved_settings;
+static char dev_queue_read_ahead_path[PATH_MAX];
+
+static const char * const thp_enabled_strings[] = {
+ "never",
+ "always",
+ "inherit",
+ "madvise",
+ NULL
+};
+
+static const char * const thp_defrag_strings[] = {
+ "always",
+ "defer",
+ "defer+madvise",
+ "madvise",
+ "never",
+ NULL
+};
+
+static const char * const shmem_enabled_strings[] = {
+ "never",
+ "always",
+ "within_size",
+ "advise",
+ "inherit",
+ "deny",
+ "force",
+ NULL
+};
+
+int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1) {
+ printf("open(%s)\n", path);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1) {
+ printf("write(%s)\n", buf);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ return (unsigned int) numwritten;
+}
+
+unsigned long read_num(const char *path)
+{
+ char buf[21];
+
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file()");
+ exit(EXIT_FAILURE);
+ }
+
+ return strtoul(buf, NULL, 10);
+}
+
+void write_num(const char *path, unsigned long num)
+{
+ char buf[21];
+
+ sprintf(buf, "%ld", num);
+ if (!write_file(path, buf, strlen(buf) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+int thp_read_string(const char *name, const char * const strings[])
+{
+ char path[PATH_MAX];
+ char buf[256];
+ char *c;
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!read_file(path, buf, sizeof(buf))) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+
+ c = strchr(buf, '[');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ c++;
+ memmove(buf, c, sizeof(buf) - (c - buf));
+
+ c = strchr(buf, ']');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ *c = '\0';
+
+ ret = 0;
+ while (strings[ret]) {
+ if (!strcmp(strings[ret], buf))
+ return ret;
+ ret++;
+ }
+
+ printf("Failed to parse %s\n", name);
+ exit(EXIT_FAILURE);
+}
+
+void thp_write_string(const char *name, const char *val)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(path, val, strlen(val) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+unsigned long thp_read_num(const char *name)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return read_num(path);
+}
+
+void thp_write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ write_num(path, num);
+}
+
+void thp_read_settings(struct thp_settings *settings)
+{
+ unsigned long orders = thp_supported_orders();
+ unsigned long shmem_orders = thp_shmem_supported_orders();
+ char path[PATH_MAX];
+ int i;
+
+ *settings = (struct thp_settings) {
+ .thp_enabled = thp_read_string("enabled", thp_enabled_strings),
+ .thp_defrag = thp_read_string("defrag", thp_defrag_strings),
+ .shmem_enabled =
+ thp_read_string("shmem_enabled", shmem_enabled_strings),
+ .use_zero_page = thp_read_num("use_zero_page"),
+ };
+ settings->khugepaged = (struct khugepaged_settings) {
+ .defrag = thp_read_num("khugepaged/defrag"),
+ .alloc_sleep_millisecs =
+ thp_read_num("khugepaged/alloc_sleep_millisecs"),
+ .scan_sleep_millisecs =
+ thp_read_num("khugepaged/scan_sleep_millisecs"),
+ .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"),
+ .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"),
+ .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"),
+ .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"),
+ };
+ if (dev_queue_read_ahead_path[0])
+ settings->read_ahead_kb = read_num(dev_queue_read_ahead_path);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders)) {
+ settings->hugepages[i].enabled = THP_NEVER;
+ continue;
+ }
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ settings->hugepages[i].enabled =
+ thp_read_string(path, thp_enabled_strings);
+ }
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & shmem_orders)) {
+ settings->shmem_hugepages[i].enabled = SHMEM_NEVER;
+ continue;
+ }
+ snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled",
+ (getpagesize() >> 10) << i);
+ settings->shmem_hugepages[i].enabled =
+ thp_read_string(path, shmem_enabled_strings);
+ }
+}
+
+void thp_write_settings(struct thp_settings *settings)
+{
+ struct khugepaged_settings *khugepaged = &settings->khugepaged;
+ unsigned long orders = thp_supported_orders();
+ unsigned long shmem_orders = thp_shmem_supported_orders();
+ char path[PATH_MAX];
+ int enabled;
+ int i;
+
+ thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+ thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+ thp_write_string("shmem_enabled",
+ shmem_enabled_strings[settings->shmem_enabled]);
+ thp_write_num("use_zero_page", settings->use_zero_page);
+
+ thp_write_num("khugepaged/defrag", khugepaged->defrag);
+ thp_write_num("khugepaged/alloc_sleep_millisecs",
+ khugepaged->alloc_sleep_millisecs);
+ thp_write_num("khugepaged/scan_sleep_millisecs",
+ khugepaged->scan_sleep_millisecs);
+ thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+ thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+ thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+ thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+
+ if (dev_queue_read_ahead_path[0])
+ write_num(dev_queue_read_ahead_path, settings->read_ahead_kb);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders))
+ continue;
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ enabled = settings->hugepages[i].enabled;
+ thp_write_string(path, thp_enabled_strings[enabled]);
+ }
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & shmem_orders))
+ continue;
+ snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled",
+ (getpagesize() >> 10) << i);
+ enabled = settings->shmem_hugepages[i].enabled;
+ thp_write_string(path, shmem_enabled_strings[enabled]);
+ }
+}
+
+struct thp_settings *thp_current_settings(void)
+{
+ if (!settings_index) {
+ printf("Fail: No settings set");
+ exit(EXIT_FAILURE);
+ }
+ return settings_stack + settings_index - 1;
+}
+
+void thp_push_settings(struct thp_settings *settings)
+{
+ if (settings_index >= MAX_SETTINGS_DEPTH) {
+ printf("Fail: Settings stack exceeded");
+ exit(EXIT_FAILURE);
+ }
+ settings_stack[settings_index++] = *settings;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_pop_settings(void)
+{
+ if (settings_index <= 0) {
+ printf("Fail: Settings stack empty");
+ exit(EXIT_FAILURE);
+ }
+ --settings_index;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_restore_settings(void)
+{
+ thp_write_settings(&saved_settings);
+}
+
+void thp_save_settings(void)
+{
+ thp_read_settings(&saved_settings);
+}
+
+void thp_set_read_ahead_path(char *path)
+{
+ if (!path) {
+ dev_queue_read_ahead_path[0] = '\0';
+ return;
+ }
+
+ strncpy(dev_queue_read_ahead_path, path,
+ sizeof(dev_queue_read_ahead_path));
+ dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0';
+}
+
+static unsigned long __thp_supported_orders(bool is_shmem)
+{
+ unsigned long orders = 0;
+ char path[PATH_MAX];
+ char buf[256];
+ int ret, i;
+ char anon_dir[] = "enabled";
+ char shmem_dir[] = "shmem_enabled";
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s",
+ (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = read_file(path, buf, sizeof(buf));
+ if (ret)
+ orders |= 1UL << i;
+ }
+
+ return orders;
+}
+
+unsigned long thp_supported_orders(void)
+{
+ return __thp_supported_orders(false);
+}
+
+unsigned long thp_shmem_supported_orders(void)
+{
+ return __thp_supported_orders(true);
+}
+
+bool thp_available(void)
+{
+ if (access(THP_SYSFS, F_OK) != 0)
+ return false;
+ return true;
+}
+
+bool thp_is_enabled(void)
+{
+ if (!thp_available())
+ return false;
+
+ int mode = thp_read_string("enabled", thp_enabled_strings);
+
+ /* THP is considered enabled if it's either "always" or "madvise" */
+ return mode == 1 || mode == 3;
+}
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
new file mode 100644
index 000000000000..76eeb712e5f1
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __THP_SETTINGS_H__
+#define __THP_SETTINGS_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+enum thp_enabled {
+ THP_NEVER,
+ THP_ALWAYS,
+ THP_INHERIT,
+ THP_MADVISE,
+};
+
+enum thp_defrag {
+ THP_DEFRAG_ALWAYS,
+ THP_DEFRAG_DEFER,
+ THP_DEFRAG_DEFER_MADVISE,
+ THP_DEFRAG_MADVISE,
+ THP_DEFRAG_NEVER,
+};
+
+enum shmem_enabled {
+ SHMEM_NEVER,
+ SHMEM_ALWAYS,
+ SHMEM_WITHIN_SIZE,
+ SHMEM_ADVISE,
+ SHMEM_INHERIT,
+ SHMEM_DENY,
+ SHMEM_FORCE,
+};
+
+#define NR_ORDERS 20
+
+struct hugepages_settings {
+ enum thp_enabled enabled;
+};
+
+struct khugepaged_settings {
+ bool defrag;
+ unsigned int alloc_sleep_millisecs;
+ unsigned int scan_sleep_millisecs;
+ unsigned int max_ptes_none;
+ unsigned int max_ptes_swap;
+ unsigned int max_ptes_shared;
+ unsigned long pages_to_scan;
+};
+
+struct shmem_hugepages_settings {
+ enum shmem_enabled enabled;
+};
+
+struct thp_settings {
+ enum thp_enabled thp_enabled;
+ enum thp_defrag thp_defrag;
+ enum shmem_enabled shmem_enabled;
+ bool use_zero_page;
+ struct khugepaged_settings khugepaged;
+ unsigned long read_ahead_kb;
+ struct hugepages_settings hugepages[NR_ORDERS];
+ struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS];
+};
+
+int read_file(const char *path, char *buf, size_t buflen);
+int write_file(const char *path, const char *buf, size_t buflen);
+unsigned long read_num(const char *path);
+void write_num(const char *path, unsigned long num);
+
+int thp_read_string(const char *name, const char * const strings[]);
+void thp_write_string(const char *name, const char *val);
+unsigned long thp_read_num(const char *name);
+void thp_write_num(const char *name, unsigned long num);
+
+void thp_write_settings(struct thp_settings *settings);
+void thp_read_settings(struct thp_settings *settings);
+struct thp_settings *thp_current_settings(void);
+void thp_push_settings(struct thp_settings *settings);
+void thp_pop_settings(void);
+void thp_restore_settings(void);
+void thp_save_settings(void);
+
+void thp_set_read_ahead_path(char *path);
+unsigned long thp_supported_orders(void);
+unsigned long thp_shmem_supported_orders(void);
+
+bool thp_available(void);
+bool thp_is_enabled(void);
+
+#endif /* __THP_SETTINGS_H__ */
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 361ef7192cc6..77813d34dcc2 100644
--- a/tools/testing/selftests/vm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -3,7 +3,8 @@
Before running this huge pages for each huge page size must have been
reserved.
- For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must
+ be used. 1GB wouldn't be tested if it isn't available.
Also shmmax must be increased.
And you need to run as root to work around some weird permissions in shm.
And nothing using huge pages should run in parallel.
@@ -12,8 +13,9 @@
sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
(warning this will remove all if someone else uses them) */
-#define _GNU_SOURCE 1
+#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/ipc.h>
@@ -24,29 +26,30 @@
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
+#include "vm_util.h"
+#include "kselftest.h"
-#define err(x) perror(x), exit(1)
-
-#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
#if !defined(MAP_HUGETLB)
#define MAP_HUGETLB 0x40000
#endif
#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
+#ifndef SHM_HUGE_SHIFT
#define SHM_HUGE_SHIFT 26
+#endif
+#ifndef SHM_HUGE_MASK
#define SHM_HUGE_MASK 0x3f
+#endif
+#ifndef SHM_HUGE_2MB
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#endif
+#ifndef SHM_HUGE_1GB
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+#endif
#define NUM_PAGESIZES 5
-
#define NUM_PAGES 4
-#define Dprintf(fmt...) // printf(fmt)
-
unsigned long page_sizes[NUM_PAGESIZES];
int num_page_sizes;
@@ -58,46 +61,15 @@ int ilog2(unsigned long v)
return l;
}
-void find_pagesizes(void)
-{
- glob_t g;
- int i;
- glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
- assert(g.gl_pathc <= NUM_PAGESIZES);
- for (i = 0; i < g.gl_pathc; i++) {
- sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
- &page_sizes[i]);
- page_sizes[i] <<= 10;
- printf("Found %luMB\n", page_sizes[i] >> 20);
- }
- num_page_sizes = g.gl_pathc;
- globfree(&g);
-}
-
-unsigned long default_huge_page_size(void)
-{
- unsigned long hps = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
- if (!f)
- return 0;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
- hps <<= 10;
- break;
- }
- }
- free(line);
- return hps;
-}
-
void show(unsigned long ps)
{
char buf[100];
+
if (ps == getpagesize())
return;
- printf("%luMB: ", ps >> 20);
+
+ ksft_print_msg("%luMB: ", ps >> 20);
+
fflush(stdout);
snprintf(buf, sizeof buf,
"cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
@@ -105,153 +77,158 @@ void show(unsigned long ps)
system(buf);
}
-unsigned long read_sysfs(int warn, char *fmt, ...)
+unsigned long read_free(unsigned long ps)
{
- char *line = NULL;
- size_t linelen = 0;
- char buf[100];
- FILE *f;
- va_list ap;
unsigned long val = 0;
+ char buf[100];
- va_start(ap, fmt);
- vsnprintf(buf, sizeof buf, fmt, ap);
- va_end(ap);
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+ if (read_sysfs(buf, &val) && ps != getpagesize())
+ ksft_print_msg("missing %s\n", buf);
- f = fopen(buf, "r");
- if (!f) {
- if (warn)
- printf("missing %s\n", buf);
- return 0;
- }
- if (getline(&line, &linelen, f) > 0) {
- sscanf(line, "%lu", &val);
- }
- fclose(f);
- free(line);
return val;
}
-unsigned long read_free(unsigned long ps)
-{
- return read_sysfs(ps != getpagesize(),
- "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
- ps >> 10);
-}
-
void test_mmap(unsigned long size, unsigned flags)
{
char *map;
unsigned long before, after;
- int err;
before = read_free(size);
map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
- if (map == (char *)-1) err("mmap");
memset(map, 0xff, size*NUM_PAGES);
after = read_free(size);
- Dprintf("before %lu after %lu diff %ld size %lu\n",
- before, after, before - after, size);
- assert(size == getpagesize() || (before - after) == NUM_PAGES);
+
show(size);
- err = munmap(map, size);
- assert(!err);
+ ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+ "%s mmap %lu %x\n", __func__, size, flags);
+
+ if (munmap(map, size * NUM_PAGES))
+ ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
}
void test_shmget(unsigned long size, unsigned flags)
{
int id;
unsigned long before, after;
- int err;
+ struct shm_info i;
+ char *map;
before = read_free(size);
id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
- if (id < 0) err("shmget");
-
- struct shm_info i;
- if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
- Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+ if (id < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("shmget requires root privileges: %s\n",
+ strerror(errno));
+ return;
+ }
+ ksft_exit_fail_msg("shmget: %s\n", strerror(errno));
+ }
+ if (shmctl(id, SHM_INFO, (void *)&i) < 0)
+ ksft_exit_fail_msg("shmctl: %s\n", strerror(errno));
- Dprintf("id %d\n", id);
- char *map = shmat(id, NULL, 0600);
- if (map == (char*)-1) err("shmat");
+ map = shmat(id, NULL, 0600);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("shmat: %s\n", strerror(errno));
shmctl(id, IPC_RMID, NULL);
memset(map, 0xff, size*NUM_PAGES);
after = read_free(size);
- Dprintf("before %lu after %lu diff %ld size %lu\n",
- before, after, before - after, size);
- assert(size == getpagesize() || (before - after) == NUM_PAGES);
show(size);
- err = shmdt(map);
- assert(!err);
+ ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+ "%s: mmap %lu %x\n", __func__, size, flags);
+ if (shmdt(map))
+ ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
}
-void sanity_checks(void)
+void find_pagesizes(void)
{
- int i;
unsigned long largest = getpagesize();
+ unsigned long shmmax_val = 0;
+ int i;
+ glob_t g;
- for (i = 0; i < num_page_sizes; i++) {
- if (page_sizes[i] > largest)
+ glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+ assert(g.gl_pathc <= NUM_PAGESIZES);
+ for (i = 0; (i < g.gl_pathc) && (num_page_sizes < NUM_PAGESIZES); i++) {
+ sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+ &page_sizes[num_page_sizes]);
+ page_sizes[num_page_sizes] <<= 10;
+ ksft_print_msg("Found %luMB\n", page_sizes[i] >> 20);
+
+ if (page_sizes[num_page_sizes] > largest)
largest = page_sizes[i];
- if (read_free(page_sizes[i]) < NUM_PAGES) {
- printf("Not enough huge pages for page size %lu MB, need %u\n",
- page_sizes[i] >> 20,
- NUM_PAGES);
- exit(0);
- }
+ if (read_free(page_sizes[num_page_sizes]) >= NUM_PAGES)
+ num_page_sizes++;
+ else
+ ksft_print_msg("SKIP for size %lu MB as not enough huge pages, need %u\n",
+ page_sizes[num_page_sizes] >> 20, NUM_PAGES);
}
+ globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
- printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
- exit(0);
+ read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val);
+ if (shmmax_val < NUM_PAGES * largest) {
+ ksft_print_msg("WARNING: shmmax is too small to run this test.\n");
+ ksft_print_msg("Please run the following command to increase shmmax:\n");
+ ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES);
+ ksft_exit_skip("Test skipped due to insufficient shmmax value.\n");
}
#if defined(__x86_64__)
if (largest != 1U<<30) {
- printf("No GB pages available on x86-64\n"
- "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
- exit(0);
+ ksft_exit_skip("No GB pages available on x86-64\n"
+ "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
}
#endif
}
int main(void)
{
- int i;
unsigned default_hps = default_huge_page_size();
+ int i;
+
+ ksft_print_header();
find_pagesizes();
- sanity_checks();
+ if (!num_page_sizes)
+ ksft_finished();
+
+ ksft_set_plan(2 * num_page_sizes + 3);
for (i = 0; i < num_page_sizes; i++) {
unsigned long ps = page_sizes[i];
int arg = ilog2(ps) << MAP_HUGE_SHIFT;
- printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+
+ ksft_print_msg("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
test_mmap(ps, MAP_HUGETLB | arg);
}
- printf("Testing default huge mmap\n");
- test_mmap(default_hps, SHM_HUGETLB);
- puts("Testing non-huge shmget");
+ ksft_print_msg("Testing default huge mmap\n");
+ test_mmap(default_hps, MAP_HUGETLB);
+
+ ksft_print_msg("Testing non-huge shmget\n");
test_shmget(getpagesize(), 0);
for (i = 0; i < num_page_sizes; i++) {
unsigned long ps = page_sizes[i];
int arg = ilog2(ps) << SHM_HUGE_SHIFT;
- printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+ ksft_print_msg("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
test_shmget(ps, SHM_HUGETLB | arg);
}
- puts("default huge shmget");
+
+ ksft_print_msg("default huge shmget\n");
test_shmget(default_hps, SHM_HUGETLB);
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c
new file mode 100644
index 000000000000..bcad47c09518
--- /dev/null
+++ b/tools/testing/selftests/mm/transhuge-stress.c
@@ -0,0 +1,138 @@
+/*
+ * Stress test for transparent huge pages, memory compaction and migration.
+ *
+ * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <err.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include "vm_util.h"
+#include "kselftest.h"
+
+int backing_fd = -1;
+int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
+#define PROT_RW (PROT_READ | PROT_WRITE)
+
+int main(int argc, char **argv)
+{
+ size_t ram, len;
+ void *ptr, *p;
+ struct timespec start, a, b;
+ int i = 0;
+ char *name = NULL;
+ double s;
+ uint8_t *map;
+ size_t map_len;
+ int pagemap_fd;
+ int duration = 0;
+
+ ksft_print_header();
+
+ ram = sysconf(_SC_PHYS_PAGES);
+ if (ram > SIZE_MAX / psize() / 4)
+ ram = SIZE_MAX / 4;
+ else
+ ram *= psize();
+ len = ram;
+
+ while (++i < argc) {
+ if (!strcmp(argv[i], "-h"))
+ ksft_exit_fail_msg("usage: %s [-f <filename>] [-d <duration>] [size in MiB]\n",
+ argv[0]);
+ else if (!strcmp(argv[i], "-f"))
+ name = argv[++i];
+ else if (!strcmp(argv[i], "-d"))
+ duration = atoi(argv[++i]);
+ else
+ len = atoll(argv[i]) << 20;
+ }
+
+ ksft_set_plan(1);
+
+ if (name) {
+ backing_fd = open(name, O_RDWR);
+ if (backing_fd == -1)
+ ksft_exit_fail_msg("open %s\n", name);
+ mmap_flags = MAP_SHARED;
+ }
+
+ warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
+ " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
+ ram >> (20 + HPAGE_SHIFT - pshift() - 1));
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("open pagemap\n");
+
+ len -= len % HPAGE_SIZE;
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
+ if (ptr == MAP_FAILED)
+ ksft_exit_fail_msg("initial mmap");
+ ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
+
+ if (madvise(ptr, len, MADV_HUGEPAGE))
+ ksft_exit_fail_msg("MADV_HUGEPAGE");
+
+ map_len = ram >> (HPAGE_SHIFT - 1);
+ map = malloc(map_len);
+ if (!map)
+ ksft_exit_fail_msg("map malloc\n");
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ while (1) {
+ int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
+
+ memset(map, 0, map_len);
+
+ clock_gettime(CLOCK_MONOTONIC, &a);
+ for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
+ int64_t pfn;
+
+ pfn = allocate_transhuge(p, pagemap_fd);
+
+ if (pfn < 0) {
+ nr_failed++;
+ } else {
+ size_t idx = pfn >> (HPAGE_SHIFT - pshift());
+
+ nr_succeed++;
+ if (idx >= map_len) {
+ map = realloc(map, idx + 1);
+ if (!map)
+ ksft_exit_fail_msg("map realloc\n");
+ memset(map + map_len, 0, idx + 1 - map_len);
+ map_len = idx + 1;
+ }
+ if (!map[idx])
+ nr_pages++;
+ map[idx] = 1;
+ }
+
+ /* split transhuge page, keep last page */
+ if (madvise(p, HPAGE_SIZE - psize(), MADV_DONTNEED))
+ ksft_exit_fail_msg("MADV_DONTNEED");
+ }
+ clock_gettime(CLOCK_MONOTONIC, &b);
+ s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
+
+ ksft_print_msg("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+ "%4d succeed, %4d failed, %4d different pages\n",
+ s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+ nr_succeed, nr_failed, nr_pages);
+
+ if (duration > 0 && b.tv_sec - start.tv_sec >= duration) {
+ ksft_test_result_pass("Completed\n");
+ ksft_finished();
+ }
+ }
+}
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
new file mode 100644
index 000000000000..edd02328f77b
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -0,0 +1,745 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd tests util functions
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+
+#include "uffd-common.h"
+
+uffd_test_ops_t *uffd_test_ops;
+uffd_test_case_ops_t *uffd_test_case_ops;
+
+
+/* pthread_mutex_t starts at page offset 0 */
+pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts)
+{
+ return (pthread_mutex_t *) (area + nr * gopts->page_size);
+}
+
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+volatile unsigned long long *area_count(char *area, unsigned long nr,
+ uffd_global_test_opts_t *gopts)
+{
+ return (volatile unsigned long long *)
+ ((unsigned long)(area + nr * gopts->page_size +
+ sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) &
+ ~(unsigned long)(sizeof(unsigned long long) - 1));
+}
+
+static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
+{
+ unsigned int memfd_flags = 0;
+ int mem_fd;
+
+ if (hugetlb)
+ memfd_flags = MFD_HUGETLB;
+ mem_fd = memfd_create("uffd-test", memfd_flags);
+ if (mem_fd < 0)
+ err("memfd_create");
+ if (ftruncate(mem_fd, mem_size))
+ err("ftruncate");
+ if (fallocate(mem_fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ mem_size))
+ err("fallocate");
+
+ return mem_fd;
+}
+
+static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area)
+{
+ if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+}
+
+static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src)
+{
+ *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ return -errno;
+ }
+ return 0;
+}
+
+static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start,
+ size_t len, unsigned long offset)
+{
+}
+
+static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area)
+{
+ if (!gopts->map_shared) {
+ if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+ } else {
+ if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
+ }
+}
+
+static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src)
+{
+ off_t size = gopts->nr_pages * gopts->page_size;
+ off_t offset = is_src ? 0 : size;
+ void *area_alias = NULL;
+ char **alloc_area_alias;
+ int mem_fd = uffd_mem_fd_create(size * 2, true);
+
+ *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ (is_src ? 0 : MAP_NORESERVE),
+ mem_fd, offset);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ return -errno;
+ }
+
+ if (gopts->map_shared) {
+ area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, mem_fd, offset);
+ if (area_alias == MAP_FAILED)
+ return -errno;
+ }
+
+ if (is_src) {
+ alloc_area_alias = &gopts->area_src_alias;
+ } else {
+ alloc_area_alias = &gopts->area_dst_alias;
+ }
+ if (area_alias)
+ *alloc_area_alias = area_alias;
+
+ close(mem_fd);
+ return 0;
+}
+
+static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start,
+ size_t len, unsigned long offset)
+{
+ if (!gopts->map_shared)
+ return;
+
+ *start = (unsigned long) gopts->area_dst_alias + offset;
+}
+
+static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area)
+{
+ if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
+}
+
+static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src)
+{
+ void *area_alias = NULL;
+ size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize();
+ unsigned long offset = is_src ? 0 : bytes;
+ char *p = NULL, *p_alias = NULL;
+ int mem_fd = uffd_mem_fd_create(bytes * 2, false);
+ size_t region_size = bytes * 2 + hpage_size;
+
+ void *reserve = mmap(NULL, region_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (reserve == MAP_FAILED) {
+ close(mem_fd);
+ return -errno;
+ }
+
+ p = reserve;
+ p_alias = p;
+ p_alias += bytes;
+ p_alias += hpage_size; /* Prevent src/dst VMA merge */
+
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ mem_fd, offset);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
+ return -errno;
+ }
+ if (*alloc_area != p)
+ err("mmap of memfd failed at %p", p);
+
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ mem_fd, offset);
+ if (area_alias == MAP_FAILED) {
+ *alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
+ return -errno;
+ }
+ if (area_alias != p_alias)
+ err("mmap of anonymous memory failed at %p", p_alias);
+
+ if (is_src)
+ gopts->area_src_alias = area_alias;
+ else
+ gopts->area_dst_alias = area_alias;
+
+ close(mem_fd);
+ return 0;
+}
+
+static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start,
+ size_t len, unsigned long offset)
+{
+ *start = (unsigned long)gopts->area_dst_alias + offset;
+}
+
+static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages)
+{
+ if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages,
+ read_pmd_pagesize()))
+ err("Did not find expected %d number of hugepages",
+ expect_nr_hpages);
+}
+
+struct uffd_test_ops anon_uffd_test_ops = {
+ .allocate_area = anon_allocate_area,
+ .release_pages = anon_release_pages,
+ .alias_mapping = noop_alias_mapping,
+ .check_pmd_mapping = NULL,
+};
+
+struct uffd_test_ops shmem_uffd_test_ops = {
+ .allocate_area = shmem_allocate_area,
+ .release_pages = shmem_release_pages,
+ .alias_mapping = shmem_alias_mapping,
+ .check_pmd_mapping = shmem_check_pmd_mapping,
+};
+
+struct uffd_test_ops hugetlb_uffd_test_ops = {
+ .allocate_area = hugetlb_allocate_area,
+ .release_pages = hugetlb_release_pages,
+ .alias_mapping = hugetlb_alias_mapping,
+ .check_pmd_mapping = NULL,
+};
+
+void uffd_stats_report(struct uffd_args *args, int n_cpus)
+{
+ int i;
+ unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
+
+ for (i = 0; i < n_cpus; i++) {
+ miss_total += args[i].missing_faults;
+ wp_total += args[i].wp_faults;
+ minor_total += args[i].minor_faults;
+ }
+
+ printf("userfaults: ");
+ if (miss_total) {
+ printf("%llu missing (", miss_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].missing_faults);
+ printf("\b) ");
+ }
+ if (wp_total) {
+ printf("%llu wp (", wp_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].wp_faults);
+ printf("\b) ");
+ }
+ if (minor_total) {
+ printf("%llu minor (", minor_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].minor_faults);
+ printf("\b)");
+ }
+ printf("\n");
+}
+
+int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features)
+{
+ struct uffdio_api uffdio_api;
+
+ gopts->uffd = uffd_open(UFFD_FLAGS);
+ if (gopts->uffd < 0)
+ return -1;
+ gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = *features;
+ if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api))
+ /* Probably lack of CAP_PTRACE? */
+ return -1;
+ if (uffdio_api.api != UFFD_API)
+ err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
+
+ *features = uffdio_api.features;
+ return 0;
+}
+
+static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area)
+{
+ if (*area)
+ if (munmap(*area, gopts->nr_pages * gopts->page_size))
+ err("munmap");
+
+ *area = NULL;
+}
+
+void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts)
+{
+ size_t i;
+
+ if (gopts->pipefd) {
+ for (i = 0; i < gopts->nr_parallel * 2; ++i) {
+ if (close(gopts->pipefd[i]))
+ err("close pipefd");
+ }
+ free(gopts->pipefd);
+ gopts->pipefd = NULL;
+ }
+
+ if (gopts->count_verify) {
+ free(gopts->count_verify);
+ gopts->count_verify = NULL;
+ }
+
+ if (gopts->uffd != -1) {
+ if (close(gopts->uffd))
+ err("close uffd");
+ gopts->uffd = -1;
+ }
+
+ munmap_area(gopts, (void **)&gopts->area_src);
+ munmap_area(gopts, (void **)&gopts->area_src_alias);
+ munmap_area(gopts, (void **)&gopts->area_dst);
+ munmap_area(gopts, (void **)&gopts->area_dst_alias);
+ munmap_area(gopts, (void **)&gopts->area_remap);
+}
+
+int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg)
+{
+ unsigned long nr, cpu;
+ int ret;
+
+ gopts->area_src_alias = NULL;
+ gopts->area_dst_alias = NULL;
+ gopts->area_remap = NULL;
+
+ if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) {
+ ret = uffd_test_case_ops->pre_alloc(gopts, errmsg);
+ if (ret)
+ return ret;
+ }
+
+ ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true);
+ ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false);
+ if (ret) {
+ if (errmsg)
+ *errmsg = "memory allocation failed";
+ return ret;
+ }
+
+ if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) {
+ ret = uffd_test_case_ops->post_alloc(gopts, errmsg);
+ if (ret)
+ return ret;
+ }
+
+ ret = userfaultfd_open(gopts, &features);
+ if (ret) {
+ if (errmsg)
+ *errmsg = "possible lack of privilege";
+ return ret;
+ }
+
+ gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long));
+ if (!gopts->count_verify)
+ err("count_verify");
+
+ for (nr = 0; nr < gopts->nr_pages; nr++) {
+ *area_mutex(gopts->area_src, nr, gopts) =
+ (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
+ gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(gopts->area_src, nr, gopts) + 1) = 1;
+ }
+
+ /*
+ * After initialization of area_src, we must explicitly release pages
+ * for area_dst to make sure it's fully empty. Otherwise we could have
+ * some area_dst pages be erroneously initialized with zero pages,
+ * hence we could hit memory corruption later in the test.
+ *
+ * One example is when THP is globally enabled, above allocate_area()
+ * calls could have the two areas merged into a single VMA (as they
+ * will have the same VMA flags so they're mergeable). When we
+ * initialize the area_src above, it's possible that some part of
+ * area_dst could have been faulted in via one huge THP that will be
+ * shared between area_src and area_dst. It could cause some of the
+ * area_dst won't be trapped by missing userfaults.
+ *
+ * This release_pages() will guarantee even if that happened, we'll
+ * proactively split the thp and drop any accidentally initialized
+ * pages within area_dst.
+ */
+ uffd_test_ops->release_pages(gopts, gopts->area_dst);
+
+ gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2);
+ if (!gopts->pipefd)
+ err("pipefd");
+ for (cpu = 0; cpu < gopts->nr_parallel; cpu++)
+ if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
+ err("pipe");
+
+ return 0;
+}
+
+void wp_range(int ufd, __u64 start, __u64 len, bool wp)
+{
+ struct uffdio_writeprotect prms;
+
+ /* Write protection page faults */
+ prms.range.start = start;
+ prms.range.len = len;
+ /* Undo write-protect, do wakeup after that */
+ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
+
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
+ err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
+}
+
+static void continue_range(int ufd, __u64 start, __u64 len, bool wp)
+{
+ struct uffdio_continue req;
+ int ret;
+
+ req.range.start = start;
+ req.range.len = len;
+ req.mode = 0;
+ if (wp)
+ req.mode |= UFFDIO_CONTINUE_MODE_WP;
+
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req))
+ err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
+ (uint64_t)start);
+
+ /*
+ * Error handling within the kernel for continue is subtly different
+ * from copy or zeropage, so it may be a source of bugs. Trigger an
+ * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
+ */
+ req.mapped = 0;
+ ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
+ if (ret >= 0 || req.mapped != -EEXIST)
+ err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
+ ret, (int64_t) req.mapped);
+}
+
+int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg)
+{
+ int ret = read(gopts->uffd, msg, sizeof(*msg));
+
+ if (ret != sizeof(*msg)) {
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ return 1;
+ err("blocking read error");
+ } else {
+ err("short read");
+ }
+ }
+
+ return 0;
+}
+
+void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
+ /* Write protect page faults */
+ wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false);
+ args->wp_faults++;
+ } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
+ uint8_t *area;
+ int b;
+
+ /*
+ * Minor page faults
+ *
+ * To prove we can modify the original range for testing
+ * purposes, we're going to bit flip this range before
+ * continuing.
+ *
+ * Note that this requires all minor page fault tests operate on
+ * area_dst (non-UFFD-registered) and area_dst_alias
+ * (UFFD-registered).
+ */
+
+ area = (uint8_t *)(gopts->area_dst +
+ ((char *)msg->arg.pagefault.address -
+ gopts->area_dst_alias));
+ for (b = 0; b < gopts->page_size; ++b)
+ area[b] = ~area[b];
+ continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size,
+ args->apply_wp);
+ args->minor_faults++;
+ } else {
+ /*
+ * Missing page faults.
+ *
+ * Here we force a write check for each of the missing mode
+ * faults. It's guaranteed because the only threads that
+ * will trigger uffd faults are the locking threads, and
+ * their first instruction to touch the missing page will
+ * always be pthread_mutex_lock().
+ *
+ * Note that here we relied on an NPTL glibc impl detail to
+ * always read the lock type at the entry of the lock op
+ * (pthread_mutex_t.__data.__type, offset 0x10) before
+ * doing any locking operations to guarantee that. It's
+ * actually not good to rely on this impl detail because
+ * logically a pthread-compatible lib can implement the
+ * locks without types and we can fail when linking with
+ * them. However since we used to find bugs with this
+ * strict check we still keep it around. Hopefully this
+ * could be a good hint when it fails again. If one day
+ * it'll break on some other impl of glibc we'll revisit.
+ */
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ err("unexpected write fault");
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
+ offset &= ~(gopts->page_size-1);
+
+ if (copy_page(gopts, offset, args->apply_wp))
+ args->missing_faults++;
+ }
+}
+
+void *uffd_poll_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *)arg;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ unsigned long cpu = args->cpu;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ struct uffdio_register uffd_reg;
+ int ret;
+ char tmp_chr;
+
+ if (!args->handle_fault)
+ args->handle_fault = uffd_handle_page_fault;
+
+ pollfd[0].fd = gopts->uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = gopts->pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ gopts->ready_for_fork = true;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ err("poll error: %d", ret);
+ }
+ if (pollfd[1].revents) {
+ if (!(pollfd[1].revents & POLLIN))
+ err("pollfd[1].revents %d", pollfd[1].revents);
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ err("read pipefd error");
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ err("pollfd[0].revents %d", pollfd[0].revents);
+ if (uffd_read_msg(gopts, &msg))
+ continue;
+ switch (msg.event) {
+ default:
+ err("unexpected msg event %u\n", msg.event);
+ break;
+ case UFFD_EVENT_PAGEFAULT:
+ args->handle_fault(gopts, &msg, args);
+ break;
+ case UFFD_EVENT_FORK:
+ close(gopts->uffd);
+ gopts->uffd = msg.arg.fork.ufd;
+ pollfd[0].fd = gopts->uffd;
+ break;
+ case UFFD_EVENT_REMOVE:
+ uffd_reg.range.start = msg.arg.remove.start;
+ uffd_reg.range.len = msg.arg.remove.end -
+ msg.arg.remove.start;
+ if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ err("remove failure");
+ break;
+ case UFFD_EVENT_REMAP:
+ gopts->area_remap = gopts->area_dst; /* save for later unmap */
+ gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(gopts,
+ &uffdio_copy->dst,
+ uffdio_copy->len,
+ offset);
+ if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy->copy != -EEXIST)
+ err("UFFDIO_COPY retry error: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ } else {
+ err("UFFDIO_COPY retry unexpected: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ }
+}
+
+static void wake_range(int ufd, unsigned long addr, unsigned long len)
+{
+ struct uffdio_range uffdio_wake;
+
+ uffdio_wake.start = addr;
+ uffdio_wake.len = len;
+
+ if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
+ fprintf(stderr, "error waking %lu\n",
+ addr), exit(1);
+}
+
+int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= gopts->nr_pages * gopts->page_size)
+ err("unexpected offset %lu\n", offset);
+ uffdio_copy.dst = (unsigned long) gopts->area_dst + offset;
+ uffdio_copy.src = (unsigned long) gopts->area_src + offset;
+ uffdio_copy.len = gopts->page_size;
+ if (wp)
+ uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
+ else
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ err("UFFDIO_COPY error: %"PRId64,
+ (int64_t)uffdio_copy.copy);
+ wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size);
+ } else if (uffdio_copy.copy != gopts->page_size) {
+ err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
+ } else {
+ if (gopts->test_uffdio_copy_eexist && retry) {
+ gopts->test_uffdio_copy_eexist = false;
+ retry_copy_page(gopts, &uffdio_copy, offset);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp)
+{
+ return __copy_page(gopts, offset, false, wp);
+}
+
+int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len)
+{
+ struct uffdio_move uffdio_move;
+
+ if (offset + len > gopts->nr_pages * gopts->page_size)
+ err("unexpected offset %lu and length %lu\n", offset, len);
+ uffdio_move.dst = (unsigned long) gopts->area_dst + offset;
+ uffdio_move.src = (unsigned long) gopts->area_src + offset;
+ uffdio_move.len = len;
+ uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES;
+ uffdio_move.move = 0;
+ if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) {
+ /* real retval in uffdio_move.move */
+ if (uffdio_move.move != -EEXIST)
+ err("UFFDIO_MOVE error: %"PRId64,
+ (int64_t)uffdio_move.move);
+ wake_range(gopts->uffd, uffdio_move.dst, len);
+ } else if (uffdio_move.move != len) {
+ err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move);
+ } else
+ return 1;
+ return 0;
+}
+
+int uffd_open_dev(unsigned int flags)
+{
+ int fd, uffd;
+
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+ uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
+ close(fd);
+
+ return uffd;
+}
+
+int uffd_open_sys(unsigned int flags)
+{
+#ifdef __NR_userfaultfd
+ return syscall(__NR_userfaultfd, flags);
+#else
+ return -1;
+#endif
+}
+
+int uffd_open(unsigned int flags)
+{
+ int uffd = uffd_open_sys(flags);
+
+ if (uffd < 0)
+ uffd = uffd_open_dev(flags);
+
+ return uffd;
+}
+
+int uffd_get_features(uint64_t *features)
+{
+ struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
+ /*
+ * This should by default work in most kernels; the feature list
+ * will be the same no matter what we pass in here.
+ */
+ int fd = uffd_open(UFFD_USER_MODE_ONLY);
+
+ if (fd < 0)
+ /* Maybe the kernel is older than user-only mode? */
+ fd = uffd_open(0);
+
+ if (fd < 0)
+ return fd;
+
+ if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
+ close(fd);
+ return -errno;
+ }
+
+ *features = uffdio_api.features;
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
new file mode 100644
index 000000000000..844a85ab31eb
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd tests common header
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+#ifndef __UFFD_COMMON_H__
+#define __UFFD_COMMON_H__
+
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <sys/random.h>
+#include <stdatomic.h>
+
+#include "kselftest.h"
+#include "vm_util.h"
+
+#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
+
+#define _err(fmt, ...) \
+ do { \
+ int ret = errno; \
+ fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
+ fprintf(stderr, " (errno=%d, @%s:%d)\n", \
+ ret, __FILE__, __LINE__); \
+ } while (0)
+
+#define errexit(exitcode, fmt, ...) \
+ do { \
+ _err(fmt, ##__VA_ARGS__); \
+ exit(exitcode); \
+ } while (0)
+
+#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+
+struct uffd_global_test_opts {
+ unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size;
+ char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
+ int uffd, uffd_flags, finished, *pipefd, test_type;
+ bool map_shared;
+ bool test_uffdio_wp;
+ unsigned long long *count_verify;
+ volatile bool test_uffdio_copy_eexist;
+ atomic_bool ready_for_fork;
+};
+typedef struct uffd_global_test_opts uffd_global_test_opts_t;
+
+/* Userfaultfd test statistics */
+struct uffd_args {
+ int cpu;
+ /* Whether apply wr-protects when installing pages */
+ bool apply_wp;
+ unsigned long missing_faults;
+ unsigned long wp_faults;
+ unsigned long minor_faults;
+ struct uffd_global_test_opts *gopts;
+
+ /* A custom fault handler; defaults to uffd_handle_page_fault. */
+ void (*handle_fault)(struct uffd_global_test_opts *gopts,
+ struct uffd_msg *msg,
+ struct uffd_args *args);
+};
+
+struct uffd_test_ops {
+ int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src);
+ void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area);
+ void (*alias_mapping)(uffd_global_test_opts_t *gopts,
+ __u64 *start,
+ size_t len,
+ unsigned long offset);
+ void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages);
+};
+typedef struct uffd_test_ops uffd_test_ops_t;
+
+struct uffd_test_case_ops {
+ int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg);
+ int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg);
+};
+typedef struct uffd_test_case_ops uffd_test_case_ops_t;
+
+extern uffd_global_test_opts_t *uffd_gtest_opts;
+extern uffd_test_ops_t anon_uffd_test_ops;
+extern uffd_test_ops_t shmem_uffd_test_ops;
+extern uffd_test_ops_t hugetlb_uffd_test_ops;
+extern uffd_test_ops_t *uffd_test_ops;
+extern uffd_test_case_ops_t *uffd_test_case_ops;
+
+pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts);
+volatile unsigned long long *area_count(char *area,
+ unsigned long nr,
+ uffd_global_test_opts_t *gopts);
+
+void uffd_stats_report(struct uffd_args *args, int n_cpus);
+int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg);
+void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts);
+int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features);
+int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg);
+void wp_range(int ufd, __u64 start, __u64 len, bool wp);
+void uffd_handle_page_fault(uffd_global_test_opts_t *gopts,
+ struct uffd_msg *msg,
+ struct uffd_args *args);
+int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp);
+int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp);
+int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len);
+void *uffd_poll_thread(void *arg);
+
+int uffd_open_dev(unsigned int flags);
+int uffd_open_sys(unsigned int flags);
+int uffd_open(unsigned int flags);
+int uffd_get_features(uint64_t *features);
+
+#define TEST_ANON 1
+#define TEST_HUGETLB 2
+#define TEST_SHMEM 3
+
+#endif
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
new file mode 100644
index 000000000000..700fbaa18d44
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ */
+
+#include "uffd-common.h"
+
+uint64_t features;
+#ifdef __NR_userfaultfd
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+/* defined globally for this particular test as the sigalrm handler
+ * depends on test_uffdio_*_eexist.
+ * XXX: define gopts in main() when we figure out a way to deal with
+ * test_uffdio_*_eexist.
+ */
+static uffd_global_test_opts_t *gopts;
+
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static char *zeropage;
+pthread_attr_t attr;
+
+#define swap(a, b) \
+ do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+const char *examples =
+ "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+ "./uffd-stress anon 100 99999\n\n"
+ "# Run share memory test on 1GiB region with 99 bounces:\n"
+ "./uffd-stress shmem 1000 99\n\n"
+ "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
+ "./uffd-stress hugetlb 256 50\n\n"
+ "# Run the same hugetlb test but using private file:\n"
+ "./uffd-stress hugetlb-private 256 50\n\n"
+ "# 10MiB-~6GiB 999 bounces anonymous test, "
+ "continue forever unless an error triggers\n"
+ "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n");
+ fprintf(stderr, "Supported <test type>: anon, hugetlb, "
+ "hugetlb-private, shmem, shmem-private\n\n");
+ fprintf(stderr, "Examples:\n\n");
+ fprintf(stderr, "%s", examples);
+ exit(1);
+}
+
+static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args,
+ unsigned long n_cpus)
+{
+ int i;
+
+ for (i = 0; i < n_cpus; i++) {
+ args[i].cpu = i;
+ args[i].apply_wp = gopts->test_uffdio_wp;
+ args[i].missing_faults = 0;
+ args[i].wp_faults = 0;
+ args[i].minor_faults = 0;
+ args[i].gopts = gopts;
+ }
+}
+
+static void *locking_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *) arg;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ unsigned long cpu = (unsigned long) args->cpu;
+ unsigned long page_nr;
+ unsigned long long count;
+
+ if (!(bounces & BOUNCE_RANDOM)) {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * gopts->nr_pages_per_cpu;
+ }
+
+ while (!gopts->finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr))
+ err("getrandom failed");
+ } else
+ page_nr += 1;
+ page_nr %= gopts->nr_pages;
+ pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts));
+ count = *area_count(gopts->area_dst, page_nr, gopts);
+ if (count != gopts->count_verify[page_nr])
+ err("page_nr %lu memory corruption %llu %llu",
+ page_nr, count, gopts->count_verify[page_nr]);
+ count++;
+ *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts));
+ }
+
+ return NULL;
+}
+
+static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset)
+{
+ return __copy_page(gopts, offset, true, gopts->test_uffdio_wp);
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *)arg;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ struct uffd_msg msg;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ if (uffd_read_msg(gopts, &msg))
+ continue;
+ uffd_handle_page_fault(gopts, &msg, args);
+ }
+
+ return NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *) arg;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ unsigned long cpu = (unsigned long) args->cpu;
+ unsigned long page_nr, start_nr, mid_nr, end_nr;
+
+ start_nr = cpu * gopts->nr_pages_per_cpu;
+ end_nr = (cpu+1) * gopts->nr_pages_per_cpu;
+ mid_nr = (start_nr + end_nr) / 2;
+
+ /* Copy the first half of the pages */
+ for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
+ copy_page_retry(gopts, page_nr * gopts->page_size);
+
+ /*
+ * If we need to test uffd-wp, set it up now. Then we'll have
+ * at least the first half of the pages mapped already which
+ * can be write-protected for testing
+ */
+ if (gopts->test_uffdio_wp)
+ wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size,
+ gopts->nr_pages_per_cpu * gopts->page_size, true);
+
+ /*
+ * Continue the 2nd half of the page copying, handling write
+ * protection faults if any
+ */
+ for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
+ copy_page_retry(gopts, page_nr * gopts->page_size);
+
+ return NULL;
+}
+
+static int stress(struct uffd_args *args)
+{
+ unsigned long cpu;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ pthread_t locking_threads[gopts->nr_parallel];
+ pthread_t uffd_threads[gopts->nr_parallel];
+ pthread_t background_threads[gopts->nr_parallel];
+
+ gopts->finished = 0;
+ for (cpu = 0; cpu < gopts->nr_parallel; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)&args[cpu]))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu],
+ &attr,
+ uffd_poll_thread,
+ (void *) &args[cpu]))
+ err("uffd_poll_thread create");
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ (void *)&args[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)&args[cpu]))
+ return 1;
+ }
+ for (cpu = 0; cpu < gopts->nr_parallel; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ uffd_test_ops->release_pages(gopts, gopts->area_src);
+
+ gopts->finished = 1;
+ for (cpu = 0; cpu < gopts->nr_parallel; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ for (cpu = 0; cpu < gopts->nr_parallel; cpu++) {
+ char c = '\0';
+ if (bounces & BOUNCE_POLL) {
+ if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1)
+ err("pipefd write error");
+ if (pthread_join(uffd_threads[cpu],
+ (void *)&args[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int userfaultfd_stress(uffd_global_test_opts_t *gopts)
+{
+ void *area;
+ unsigned long nr;
+ struct uffd_args args[gopts->nr_parallel];
+ uint64_t mem_size = gopts->nr_pages * gopts->page_size;
+ int flags = 0;
+
+ memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel);
+
+ if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON)
+ flags = UFFD_FEATURE_WP_UNPOPULATED;
+
+ if (uffd_test_ctx_init(gopts, flags, NULL))
+ err("context init failed");
+
+ if (posix_memalign(&area, gopts->page_size, gopts->page_size))
+ err("out of memory");
+ zeropage = area;
+ bzero(zeropage, gopts->page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ while (bounces--) {
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ else
+ printf(" read");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
+ else
+ fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ if (uffd_register(gopts->uffd, gopts->area_dst, mem_size,
+ true, gopts->test_uffdio_wp, false))
+ err("register failure");
+
+ if (gopts->area_dst_alias) {
+ if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size,
+ true, gopts->test_uffdio_wp, false))
+ err("register failure alias");
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ uffd_test_ops->release_pages(gopts, gopts->area_dst);
+
+ uffd_stats_reset(gopts, args, gopts->nr_parallel);
+
+ /* bounce pass */
+ if (stress(args)) {
+ uffd_test_ctx_clear(gopts);
+ return 1;
+ }
+
+ /* Clear all the write protections if there is any */
+ if (gopts->test_uffdio_wp)
+ wp_range(gopts->uffd, (unsigned long)gopts->area_dst,
+ gopts->nr_pages * gopts->page_size, false);
+
+ /* unregister */
+ if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size))
+ err("unregister failure");
+ if (gopts->area_dst_alias) {
+ if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size))
+ err("unregister failure alias");
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY)
+ for (nr = 0; nr < gopts->nr_pages; nr++)
+ if (*area_count(gopts->area_dst, nr, gopts) !=
+ gopts->count_verify[nr])
+ err("error area_count %llu %llu %lu\n",
+ *area_count(gopts->area_src, nr, gopts),
+ gopts->count_verify[nr], nr);
+
+ /* prepare next bounce */
+ swap(gopts->area_src, gopts->area_dst);
+
+ swap(gopts->area_src_alias, gopts->area_dst_alias);
+
+ uffd_stats_report(args, gopts->nr_parallel);
+ }
+ uffd_test_ctx_clear(gopts);
+
+ return 0;
+}
+
+static void set_test_type(uffd_global_test_opts_t *gopts, const char *type)
+{
+ if (!strcmp(type, "anon")) {
+ gopts->test_type = TEST_ANON;
+ uffd_test_ops = &anon_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb")) {
+ gopts->test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ gopts->map_shared = true;
+ } else if (!strcmp(type, "hugetlb-private")) {
+ gopts->test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "shmem")) {
+ gopts->map_shared = true;
+ gopts->test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ } else if (!strcmp(type, "shmem-private")) {
+ gopts->test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ }
+}
+
+static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type)
+{
+ set_test_type(gopts, raw_type);
+
+ if (!gopts->test_type)
+ err("failed to parse test type argument: '%s'", raw_type);
+
+ if (gopts->test_type == TEST_HUGETLB)
+ gopts->page_size = default_huge_page_size();
+ else
+ gopts->page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (!gopts->page_size)
+ err("Unable to determine page size");
+ if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2
+ > gopts->page_size)
+ err("Impossible to run this test");
+
+ /*
+ * Whether we can test certain features depends not just on test type,
+ * but also on whether or not this particular kernel supports the
+ * feature.
+ */
+
+ if (uffd_get_features(&features) && errno == ENOENT)
+ ksft_exit_skip("failed to get available features (%d)\n", errno);
+
+ gopts->test_uffdio_wp = gopts->test_uffdio_wp &&
+ (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
+
+ if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+ gopts->test_uffdio_wp = false;
+
+ close(gopts->uffd);
+ gopts->uffd = -1;
+}
+
+static void sigalrm(int sig)
+{
+ if (sig != SIGALRM)
+ abort();
+ gopts->test_uffdio_copy_eexist = true;
+ alarm(ALARM_INTERVAL_SECS);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long nr_cpus;
+ size_t bytes;
+
+ gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t));
+
+ if (argc < 4)
+ usage();
+
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ err("failed to arm SIGALRM");
+ alarm(ALARM_INTERVAL_SECS);
+
+ parse_test_type_arg(gopts, argv[1]);
+ bytes = atol(argv[2]) * 1024 * 1024;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nr_cpus > 32) {
+ /* Don't let calculation below go to zero. */
+ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n",
+ nr_cpus);
+ gopts->nr_parallel = 32;
+ } else {
+ gopts->nr_parallel = nr_cpus;
+ }
+
+ /*
+ * src and dst each require bytes / page_size number of hugepages.
+ * Ensure nr_parallel - 1 hugepages on top of that to account
+ * for racy extra reservation of hugepages.
+ */
+ if (gopts->test_type == TEST_HUGETLB &&
+ get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) {
+ printf("skip: Skipping userfaultfd... not enough hugepages\n");
+ return KSFT_SKIP;
+ }
+
+ gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel;
+ if (!gopts->nr_pages_per_cpu) {
+ _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)",
+ bytes, gopts->page_size, gopts->nr_parallel);
+ usage();
+ }
+
+ bounces = atoi(argv[3]);
+ if (bounces <= 0) {
+ _err("invalid bounces");
+ usage();
+ }
+ gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel;
+
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ gopts->nr_pages, gopts->nr_pages_per_cpu);
+ return userfaultfd_stress(gopts);
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
new file mode 100644
index 000000000000..f4807242c5b2
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -0,0 +1,1813 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd unit tests.
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+
+#include "uffd-common.h"
+
+#include "../../../../mm/gup_test.h"
+
+#ifdef __NR_userfaultfd
+
+/* The unit test doesn't need a large or random size, make it 32MB for now */
+#define UFFD_TEST_MEM_SIZE (32UL << 20)
+
+#define MEM_ANON BIT_ULL(0)
+#define MEM_SHMEM BIT_ULL(1)
+#define MEM_SHMEM_PRIVATE BIT_ULL(2)
+#define MEM_HUGETLB BIT_ULL(3)
+#define MEM_HUGETLB_PRIVATE BIT_ULL(4)
+
+#define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
+ MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
+
+#define ALIGN_UP(x, align_to) \
+ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+struct mem_type {
+ const char *name;
+ unsigned int mem_flag;
+ uffd_test_ops_t *mem_ops;
+ bool shared;
+};
+typedef struct mem_type mem_type_t;
+
+mem_type_t mem_types[] = {
+ {
+ .name = "anon",
+ .mem_flag = MEM_ANON,
+ .mem_ops = &anon_uffd_test_ops,
+ .shared = false,
+ },
+ {
+ .name = "shmem",
+ .mem_flag = MEM_SHMEM,
+ .mem_ops = &shmem_uffd_test_ops,
+ .shared = true,
+ },
+ {
+ .name = "shmem-private",
+ .mem_flag = MEM_SHMEM_PRIVATE,
+ .mem_ops = &shmem_uffd_test_ops,
+ .shared = false,
+ },
+ {
+ .name = "hugetlb",
+ .mem_flag = MEM_HUGETLB,
+ .mem_ops = &hugetlb_uffd_test_ops,
+ .shared = true,
+ },
+ {
+ .name = "hugetlb-private",
+ .mem_flag = MEM_HUGETLB_PRIVATE,
+ .mem_ops = &hugetlb_uffd_test_ops,
+ .shared = false,
+ },
+};
+
+/* Arguments to be passed over to each uffd unit test */
+struct uffd_test_args {
+ mem_type_t *mem_type;
+};
+typedef struct uffd_test_args uffd_test_args_t;
+
+/* Returns: UFFD_TEST_* */
+typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *);
+
+typedef struct {
+ const char *name;
+ uffd_test_fn uffd_fn;
+ unsigned int mem_targets;
+ uint64_t uffd_feature_required;
+ uffd_test_case_ops_t *test_case_ops;
+} uffd_test_case_t;
+
+static void uffd_test_report(void)
+{
+ printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
+ ksft_get_pass_cnt(),
+ ksft_get_xskip_cnt(),
+ ksft_get_fail_cnt(),
+ ksft_test_num());
+}
+
+static void uffd_test_pass(void)
+{
+ printf("done\n");
+ ksft_inc_pass_cnt();
+}
+
+#define uffd_test_start(...) do { \
+ printf("Testing "); \
+ printf(__VA_ARGS__); \
+ printf("... "); \
+ fflush(stdout); \
+ } while (0)
+
+#define uffd_test_fail(...) do { \
+ printf("failed [reason: "); \
+ printf(__VA_ARGS__); \
+ printf("]\n"); \
+ ksft_inc_fail_cnt(); \
+ } while (0)
+
+static void uffd_test_skip(const char *message)
+{
+ printf("skipped [reason: %s]\n", message);
+ ksft_inc_xskip_cnt();
+}
+
+/*
+ * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
+ * return 1 even if some test failed as long as uffd supported, because in
+ * that case we still want to proceed with the rest uffd unit tests.
+ */
+static int test_uffd_api(bool use_dev)
+{
+ struct uffdio_api uffdio_api;
+ int uffd;
+
+ uffd_test_start("UFFDIO_API (with %s)",
+ use_dev ? "/dev/userfaultfd" : "syscall");
+
+ if (use_dev)
+ uffd = uffd_open_dev(UFFD_FLAGS);
+ else
+ uffd = uffd_open_sys(UFFD_FLAGS);
+ if (uffd < 0) {
+ uffd_test_skip("cannot open userfaultfd handle");
+ return 0;
+ }
+
+ /* Test wrong UFFD_API */
+ uffdio_api.api = 0xab;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
+ goto out;
+ }
+
+ /* Test wrong feature bit */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = BIT_ULL(63);
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
+ goto out;
+ }
+
+ /* Test normal UFFDIO_API */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ uffd_test_fail("UFFDIO_API should succeed but failed");
+ goto out;
+ }
+
+ /* Test double requests of UFFDIO_API with a random feature set */
+ uffdio_api.features = BIT_ULL(0);
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should reject initialized uffd");
+ goto out;
+ }
+
+ uffd_test_pass();
+out:
+ close(uffd);
+ /* We have a valid uffd handle */
+ return 1;
+}
+
+
+static bool uffd_feature_supported(uffd_test_case_t *test)
+{
+ uint64_t features;
+
+ if (uffd_get_features(&features))
+ return false;
+
+ return (features & test->uffd_feature_required) ==
+ test->uffd_feature_required;
+}
+
+static int pagemap_open(void)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0)
+ err("open pagemap");
+
+ return fd;
+}
+
+/* This macro let __LINE__ works in err() */
+#define pagemap_check_wp(value, wp) do { \
+ if (!!(value & PM_UFFD_WP) != wp) \
+ err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
+ } while (0)
+
+typedef struct {
+ uffd_global_test_opts_t *gopts;
+ int child_uffd;
+} fork_event_args;
+
+static void *fork_event_consumer(void *data)
+{
+ fork_event_args *args = data;
+ struct uffd_msg msg = { 0 };
+
+ args->gopts->ready_for_fork = true;
+
+ /* Read until a full msg received */
+ while (uffd_read_msg(args->gopts, &msg));
+
+ if (msg.event != UFFD_EVENT_FORK)
+ err("wrong message: %u\n", msg.event);
+
+ /* Just to be properly freed later */
+ args->child_uffd = msg.arg.fork.ufd;
+ return NULL;
+}
+
+typedef struct {
+ int gup_fd;
+ bool pinned;
+} pin_args;
+
+/*
+ * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired
+ * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy
+ * all needs of the test cases (e.g., trigger unshare, trigger fork() early
+ * CoW, etc.).
+ */
+static int pin_pages(pin_args *args, void *buffer, size_t size)
+{
+ struct pin_longterm_test test = {
+ .addr = (uintptr_t)buffer,
+ .size = size,
+ /* Read-only pins */
+ .flags = 0,
+ };
+
+ if (args->pinned)
+ err("already pinned");
+
+ args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (args->gup_fd < 0)
+ return -errno;
+
+ if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
+ /* Even if gup_test existed, can be an old gup_test / kernel */
+ close(args->gup_fd);
+ return -errno;
+ }
+ args->pinned = true;
+ return 0;
+}
+
+static void unpin_pages(pin_args *args)
+{
+ if (!args->pinned)
+ err("unpin without pin first");
+ if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
+ err("PIN_LONGTERM_TEST_STOP");
+ close(args->gup_fd);
+ args->pinned = false;
+}
+
+static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin)
+{
+ fork_event_args args = { .gopts = gopts, .child_uffd = -1 };
+ pthread_t thread;
+ pid_t child;
+ uint64_t value;
+ int fd, result;
+
+ /* Prepare a thread to resolve EVENT_FORK */
+ if (with_event) {
+ gopts->ready_for_fork = false;
+ if (pthread_create(&thread, NULL, fork_event_consumer, &args))
+ err("pthread_create()");
+ while (!gopts->ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+ }
+
+ child = fork();
+ if (!child) {
+ /* Open the pagemap fd of the child itself */
+ pin_args args = {};
+
+ fd = pagemap_open();
+
+ if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size))
+ /*
+ * Normally when reach here we have pinned in
+ * previous tests, so shouldn't fail anymore
+ */
+ err("pin page failed in child");
+
+ value = pagemap_get_entry(fd, gopts->area_dst);
+ /*
+ * After fork(), we should handle uffd-wp bit differently:
+ *
+ * (1) when with EVENT_FORK, it should persist
+ * (2) when without EVENT_FORK, it should be dropped
+ */
+ pagemap_check_wp(value, with_event);
+ if (test_pin)
+ unpin_pages(&args);
+ /* Succeed */
+ exit(0);
+ }
+ waitpid(child, &result, 0);
+
+ if (with_event) {
+ if (pthread_join(thread, NULL))
+ err("pthread_join()");
+ if (args.child_uffd < 0)
+ err("Didn't receive child uffd");
+ close(args.child_uffd);
+ }
+
+ return result;
+}
+
+static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uint64_t value;
+ int pagemap_fd;
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Test applying pte marker to anon unpopulated */
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, true);
+
+ /* Test unprotect on anon pte marker */
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Test zap on anon marker */
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
+ if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Test fault in after marker removed */
+ *gopts->area_dst = 1;
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, false);
+ /* Drop it to make pte none again */
+ if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+
+ /* Test read-zero-page upon pte marker */
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
+ *(volatile char *)gopts->area_dst;
+ /* Drop it to make pte none again */
+ if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+
+ uffd_test_pass();
+}
+
+static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args,
+ bool with_event)
+{
+ int pagemap_fd;
+ uint64_t value;
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *gopts->area_dst = 1;
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, true);
+ if (pagemap_test_fork(gopts, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+
+ /*
+ * This is an attempt for zapping the pgtable so as to test the
+ * markers.
+ *
+ * For private mappings, PAGEOUT will only work on exclusive ptes
+ * (PM_MMAP_EXCLUSIVE) which we should satisfy.
+ *
+ * For shared, PAGEOUT may not work. Use DONTNEED instead which
+ * plays a similar role of zapping (rather than freeing the page)
+ * to expose pte markers.
+ */
+ if (args->mem_type->shared) {
+ if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED))
+ err("MADV_DONTNEED");
+ } else {
+ /*
+ * NOTE: ignore retval because private-hugetlb doesn't yet
+ * support swapping, so it could fail.
+ */
+ madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT);
+ }
+
+ /* Uffd-wp should persist even swapped out */
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, true);
+ if (pagemap_test_fork(gopts, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+
+ /* Unprotect; this tests swap pte modifications */
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false);
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Fault in the page from disk */
+ *gopts->area_dst = 2;
+ value = pagemap_get_entry(pagemap_fd, gopts->area_dst);
+ pagemap_check_wp(value, false);
+ uffd_test_pass();
+out:
+ if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size))
+ err("unregister failed");
+ close(pagemap_fd);
+}
+
+static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_wp_fork_test_common(gopts, args, false);
+}
+
+static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_wp_fork_test_common(gopts, args, true);
+}
+
+static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts,
+ uffd_test_args_t *args,
+ bool with_event)
+{
+ int pagemap_fd;
+ pin_args pin_args = {};
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *gopts->area_dst = 1;
+ wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true);
+
+ /*
+ * 1. First pin, then fork(). This tests fork() special path when
+ * doing early CoW if the page is private.
+ */
+ if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) {
+ uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
+ "or unprivileged");
+ close(pagemap_fd);
+ uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size);
+ return;
+ }
+
+ if (pagemap_test_fork(gopts, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
+ with_event ? "missing" : "stall");
+ unpin_pages(&pin_args);
+ goto out;
+ }
+
+ unpin_pages(&pin_args);
+
+ /*
+ * 2. First fork(), then pin (in the child, where test_pin==true).
+ * This tests COR, aka, page unsharing on private memories.
+ */
+ if (pagemap_test_fork(gopts, with_event, true)) {
+ uffd_test_fail("Detected %s uffd-wp bit when RO pin",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+ uffd_test_pass();
+out:
+ if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
+ err("register failed");
+ close(pagemap_fd);
+}
+
+static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_wp_fork_pin_test_common(gopts, args, false);
+}
+
+static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_wp_fork_pin_test_common(gopts, args, true);
+}
+
+static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p)
+{
+ unsigned long i, j;
+ uint8_t expected_byte;
+
+ for (i = 0; i < gopts->nr_pages; ++i) {
+ expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
+ for (j = 0; j < gopts->page_size; j++) {
+ uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j);
+ if (v != expected_byte)
+ err("unexpected page contents");
+ }
+ }
+}
+
+static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp)
+{
+ unsigned long p;
+ pthread_t uffd_mon;
+ char c = '\0';
+ struct uffd_args args = { 0 };
+ args.gopts = gopts;
+
+ /*
+ * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
+ * both do not make much sense.
+ */
+ assert(!(test_collapse && test_wp));
+
+ if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
+ /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
+ false, test_wp, true))
+ err("register failure");
+
+ /*
+ * After registering with UFFD, populate the non-UFFD-registered side of
+ * the shared mapping. This should *not* trigger any UFFD minor faults.
+ */
+ for (p = 0; p < gopts->nr_pages; ++p)
+ memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1),
+ gopts->page_size);
+
+ args.apply_wp = test_wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a minor
+ * fault. uffd_poll_thread will resolve the fault by bit-flipping the
+ * page's contents, and then issuing a CONTINUE ioctl.
+ */
+ check_memory_contents(gopts, gopts->area_dst_alias);
+
+ if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("join() failed");
+
+ if (test_collapse) {
+ if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size,
+ MADV_COLLAPSE)) {
+ /* It's fine to fail for this one... */
+ uffd_test_skip("MADV_COLLAPSE failed");
+ return;
+ }
+
+ uffd_test_ops->check_pmd_mapping(gopts,
+ gopts->area_dst,
+ gopts->nr_pages * gopts->page_size /
+ read_pmd_pagesize());
+ /*
+ * This won't cause uffd-fault - it purely just makes sure there
+ * was no corruption.
+ */
+ check_memory_contents(gopts, gopts->area_dst_alias);
+ }
+
+ if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages)
+ uffd_test_fail("stats check error");
+ else
+ uffd_test_pass();
+}
+
+void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_minor_test_common(gopts, false, false);
+}
+
+void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_minor_test_common(gopts, false, true);
+}
+
+void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_minor_test_common(gopts, true, false);
+}
+
+static sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+ if (sig == SIGBUS) {
+ if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+ abort();
+ }
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
+ */
+static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp)
+{
+ unsigned long nr, i;
+ unsigned long long count;
+ unsigned long split_nr_pages;
+ unsigned long lastnr;
+ struct sigaction act;
+ volatile unsigned long signalled = 0;
+
+ split_nr_pages = (gopts->nr_pages + 1) / 2;
+
+ if (signal_test) {
+ sigbuf = &jbuf;
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
+ lastnr = (unsigned long)-1;
+ }
+
+ for (nr = 0; nr < split_nr_pages; nr++) {
+ volatile int steps = 1;
+ unsigned long offset = nr * gopts->page_size;
+
+ if (signal_test) {
+ if (sigsetjmp(*sigbuf, 1) != 0) {
+ if (steps == 1 && nr == lastnr)
+ err("Signal repeated");
+
+ lastnr = nr;
+ if (signal_test == 1) {
+ if (steps == 1) {
+ /* This is a MISSING request */
+ steps++;
+ if (copy_page(gopts, offset, wp))
+ signalled++;
+ } else {
+ /* This is a WP request */
+ assert(steps == 2);
+ wp_range(gopts->uffd,
+ (__u64)gopts->area_dst +
+ offset,
+ gopts->page_size, false);
+ }
+ } else {
+ signalled++;
+ continue;
+ }
+ }
+ }
+
+ count = *area_count(gopts->area_dst, nr, gopts);
+ if (count != gopts->count_verify[nr])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, gopts->count_verify[nr]);
+ /*
+ * Trigger write protection if there is by writing
+ * the same value back.
+ */
+ *area_count(gopts->area_dst, nr, gopts) = count;
+ }
+
+ if (signal_test)
+ return signalled != split_nr_pages;
+
+ gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ gopts->nr_pages * gopts->page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ gopts->area_src);
+ if (gopts->area_dst == MAP_FAILED)
+ err("mremap");
+ /* Reset area_src since we just clobbered it */
+ gopts->area_src = NULL;
+
+ for (; nr < gopts->nr_pages; nr++) {
+ count = *area_count(gopts->area_dst, nr, gopts);
+ if (count != gopts->count_verify[nr]) {
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, gopts->count_verify[nr]);
+ }
+ /*
+ * Trigger write protection if there is by writing
+ * the same value back.
+ */
+ *area_count(gopts->area_dst, nr, gopts) = count;
+ }
+
+ uffd_test_ops->release_pages(gopts, gopts->area_dst);
+
+ for (nr = 0; nr < gopts->nr_pages; nr++)
+ for (i = 0; i < gopts->page_size; i++)
+ if (*(gopts->area_dst + nr * gopts->page_size + i) != 0)
+ err("page %lu offset %lu is not zero", nr, i);
+
+ return 0;
+}
+
+static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
+{
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ pid_t pid;
+ int err;
+ char c = '\0';
+ struct uffd_args args = { 0 };
+ args.gopts = gopts;
+
+ gopts->ready_for_fork = false;
+
+ fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ true, wp, false))
+ err("register failure");
+
+ if (faulting_process(gopts, 1, wp))
+ err("faulting process failed");
+
+ uffd_test_ops->release_pages(gopts, gopts->area_dst);
+
+ args.apply_wp = wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ while (!gopts->ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
+ pid = fork();
+ if (pid < 0)
+ err("fork");
+
+ if (!pid)
+ exit(faulting_process(gopts, 2, wp));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ err("faulting process failed");
+ if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ err("pthread_join()");
+
+ if (userfaults)
+ uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
+ else
+ uffd_test_pass();
+}
+
+static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_sigbus_test_common(gopts, false);
+}
+
+static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_sigbus_test_common(gopts, true);
+}
+
+static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
+{
+ pthread_t uffd_mon;
+ pid_t pid;
+ int err;
+ char c = '\0';
+ struct uffd_args args = { 0 };
+ args.gopts = gopts;
+
+ gopts->ready_for_fork = false;
+
+ fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ true, wp, false))
+ err("register failure");
+
+ args.apply_wp = wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ while (!gopts->ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
+ pid = fork();
+ if (pid < 0)
+ err("fork");
+
+ if (!pid)
+ exit(faulting_process(gopts, 0, wp));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ err("faulting process failed");
+ if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("pthread_join()");
+
+ if (args.missing_faults != gopts->nr_pages)
+ uffd_test_fail("Fault counts wrong");
+ else
+ uffd_test_pass();
+}
+
+static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_events_test_common(gopts, false);
+}
+
+static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ uffd_events_test_common(gopts, true);
+}
+
+static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts,
+ struct uffdio_zeropage *uffdio_zeropage)
+{
+ uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start,
+ uffdio_zeropage->range.len,
+ 0);
+ if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
+ } else {
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
+ }
+}
+
+static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage)
+{
+ struct uffdio_zeropage uffdio_zeropage = { 0 };
+ int ret;
+ __s64 res;
+
+ uffdio_zeropage.range.start = (unsigned long) gopts->area_dst;
+ uffdio_zeropage.range.len = gopts->page_size;
+ uffdio_zeropage.mode = 0;
+ ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+ res = uffdio_zeropage.zeropage;
+ if (ret) {
+ /* real retval in ufdio_zeropage.zeropage */
+ if (has_zeropage)
+ err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
+ else if (res != -EINVAL)
+ err("UFFDIO_ZEROPAGE not -EINVAL");
+ } else if (has_zeropage) {
+ if (res != gopts->page_size)
+ err("UFFDIO_ZEROPAGE unexpected size");
+ else
+ retry_uffdio_zeropage(gopts, &uffdio_zeropage);
+ return true;
+ } else
+ err("UFFDIO_ZEROPAGE succeeded");
+
+ return false;
+}
+
+/*
+ * Registers a range with MISSING mode only for zeropage test. Return true
+ * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
+ * because we want to detect .ioctls along the way.
+ */
+static bool
+uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
+{
+ uint64_t ioctls = 0;
+
+ if (uffd_register_with_ioctls(uffd, addr, len, true,
+ false, false, &ioctls))
+ err("zeropage register fail");
+
+ return ioctls & (1 << _UFFDIO_ZEROPAGE);
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ bool has_zeropage;
+ int i;
+
+ has_zeropage = uffd_register_detect_zeropage(gopts->uffd,
+ gopts->area_dst,
+ gopts->page_size);
+ if (gopts->area_dst_alias)
+ /* Ignore the retval; we already have it */
+ uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size);
+
+ if (do_uffdio_zeropage(gopts, has_zeropage))
+ for (i = 0; i < gopts->page_size; i++)
+ if (gopts->area_dst[i] != 0)
+ err("data non-zero at offset %d\n", i);
+
+ if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
+ err("unregister");
+
+ if (gopts->area_dst_alias && uffd_unregister(gopts->uffd,
+ gopts->area_dst_alias,
+ gopts->page_size))
+ err("unregister");
+
+ uffd_test_pass();
+}
+
+static void uffd_register_poison(int uffd, void *addr, uint64_t len)
+{
+ uint64_t ioctls = 0;
+ uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
+
+ if (uffd_register_with_ioctls(uffd, addr, len, true,
+ false, false, &ioctls))
+ err("poison register fail");
+
+ if ((ioctls & expected) != expected)
+ err("registered area doesn't support COPY and POISON ioctls");
+}
+
+static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset)
+{
+ struct uffdio_poison uffdio_poison = { 0 };
+ int ret;
+ __s64 res;
+
+ uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset;
+ uffdio_poison.range.len = gopts->page_size;
+ uffdio_poison.mode = 0;
+ ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison);
+ res = uffdio_poison.updated;
+
+ if (ret)
+ err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
+ else if (res != gopts->page_size)
+ err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
+}
+
+static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts,
+ struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
+ offset &= ~(gopts->page_size-1);
+
+ /* Odd pages -> copy zeroed page; even pages -> poison. */
+ if (offset & gopts->page_size)
+ copy_page(gopts, offset, false);
+ else
+ do_uffdio_poison(gopts, offset);
+}
+
+/* Make sure to cover odd/even, and minimum duplications */
+#define UFFD_POISON_TEST_NPAGES 4
+
+static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
+{
+ pthread_t uffd_mon;
+ char c;
+ struct uffd_args args = { 0 };
+ struct sigaction act = { 0 };
+ unsigned long nr_sigbus = 0;
+ unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES;
+
+ if (gopts->nr_pages < poison_pages) {
+ uffd_test_skip("Too less pages for POISON test");
+ return;
+ }
+
+ args.gopts = gopts;
+
+ fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK);
+
+ uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size);
+ memset(gopts->area_src, 0, poison_pages * gopts->page_size);
+
+ args.handle_fault = uffd_poison_handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ sigbuf = &jbuf;
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
+
+ for (nr = 0; nr < poison_pages; ++nr) {
+ unsigned long offset = nr * gopts->page_size;
+ const char *bytes = (const char *) gopts->area_dst + offset;
+ const char *i;
+
+ if (sigsetjmp(*sigbuf, 1)) {
+ /*
+ * Access below triggered a SIGBUS, which was caught by
+ * sighndl, which then jumped here. Count this SIGBUS,
+ * and move on to next page.
+ */
+ ++nr_sigbus;
+ continue;
+ }
+
+ for (i = bytes; i < bytes + gopts->page_size; ++i) {
+ if (*i)
+ err("nonzero byte in area_dst (%p) at %p: %u",
+ gopts->area_dst, i, *i);
+ }
+ }
+
+ if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("pthread_join()");
+
+ if (nr_sigbus != poison_pages / 2)
+ err("expected to receive %lu SIGBUS, actually received %lu",
+ poison_pages / 2, nr_sigbus);
+
+ uffd_test_pass();
+}
+
+static void
+uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts,
+ struct uffd_msg *msg,
+ struct uffd_args *args,
+ unsigned long len)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst;
+ offset &= ~(len-1);
+
+ if (move_page(gopts, offset, len))
+ args->missing_faults++;
+}
+
+static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size);
+}
+
+static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize());
+}
+
+static void
+uffd_move_test_common(uffd_global_test_opts_t *gopts,
+ uffd_test_args_t *targs,
+ unsigned long chunk_size,
+ void (*handle_fault)(struct uffd_global_test_opts *gopts,
+ struct uffd_msg *msg, struct uffd_args *args)
+)
+{
+ unsigned long nr;
+ pthread_t uffd_mon;
+ char c = '\0';
+ unsigned long long count;
+ struct uffd_args args = { 0 };
+ char *orig_area_src = NULL, *orig_area_dst = NULL;
+ unsigned long step_size, step_count;
+ unsigned long src_offs = 0;
+ unsigned long dst_offs = 0;
+
+ args.gopts = gopts;
+
+ /* Prevent source pages from being mapped more than once */
+ if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK))
+ err("madvise(MADV_DONTFORK) failure");
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ true, false, false))
+ err("register failure");
+
+ args.handle_fault = handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ step_size = chunk_size / gopts->page_size;
+ step_count = gopts->nr_pages / step_size;
+
+ if (chunk_size > gopts->page_size) {
+ char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size);
+ char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size);
+
+ if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) {
+ src_offs = (aligned_src - gopts->area_src) / gopts->page_size;
+ dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size;
+ step_count--;
+ }
+ orig_area_src = gopts->area_src;
+ orig_area_dst = gopts->area_dst;
+ gopts->area_src = aligned_src;
+ gopts->area_dst = aligned_dst;
+ }
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a missing
+ * fault. uffd_poll_thread will resolve the fault by moving source
+ * page to destination.
+ */
+ for (nr = 0; nr < step_count * step_size; nr += step_size) {
+ unsigned long i;
+
+ /* Check area_src content */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(gopts->area_src, nr + i, gopts);
+ if (count != gopts->count_verify[src_offs + nr + i])
+ err("nr %lu source memory invalid %llu %llu\n",
+ nr + i, count, gopts->count_verify[src_offs + nr + i]);
+ }
+
+ /* Faulting into area_dst should move the page or the huge page */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(gopts->area_dst, nr + i, gopts);
+ if (count != gopts->count_verify[dst_offs + nr + i])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, gopts->count_verify[dst_offs + nr + i]);
+ }
+
+ /* Re-check area_src content which should be empty */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(gopts->area_src, nr + i, gopts);
+ if (count != 0)
+ err("nr %lu move failed %llu %llu\n",
+ nr, count, gopts->count_verify[src_offs + nr + i]);
+ }
+ }
+ if (chunk_size > gopts->page_size) {
+ gopts->area_src = orig_area_src;
+ gopts->area_dst = orig_area_dst;
+ }
+
+ if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("join() failed");
+
+ if (args.missing_faults != step_count || args.minor_faults != 0)
+ uffd_test_fail("stats check error");
+ else
+ uffd_test_pass();
+}
+
+static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
+{
+ uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault);
+}
+
+static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
+{
+ if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE))
+ err("madvise(MADV_HUGEPAGE) failure");
+ uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
+{
+ if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE))
+ err("madvise(MADV_NOHUGEPAGE) failure");
+ uffd_move_test_common(gopts, targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static bool
+uffdio_verify_results(const char *name, int ret, int error, long result)
+{
+ /*
+ * Should always return -1 with errno=EAGAIN, with corresponding
+ * result field updated in ioctl() args to be -EAGAIN too
+ * (e.g. copy.copy field for UFFDIO_COPY).
+ */
+ if (ret != -1) {
+ uffd_test_fail("%s should have returned -1", name);
+ return false;
+ }
+
+ if (error != EAGAIN) {
+ uffd_test_fail("%s should have errno==EAGAIN", name);
+ return false;
+ }
+
+ if (result != -EAGAIN) {
+ uffd_test_fail("%s should have been updated for -EAGAIN",
+ name);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This defines a function to test one ioctl. Note that here "field" can
+ * be 1 or anything not -EAGAIN. With that initial value set, we can
+ * verify later that it should be updated by kernel (when -EAGAIN
+ * returned), by checking whether it is also updated to -EAGAIN.
+ */
+#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \
+ static bool uffdio_mmap_changing_test_##name(int fd) \
+ { \
+ int ret; \
+ struct uffdio_##name args = { \
+ .field = 1, \
+ }; \
+ ret = ioctl(fd, ioctl_name, &args); \
+ return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
+ }
+
+DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
+DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
+DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
+DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
+DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
+
+typedef enum {
+ /* We actually do not care about any state except UNINTERRUPTIBLE.. */
+ THR_STATE_UNKNOWN = 0,
+ THR_STATE_UNINTERRUPTIBLE,
+} thread_state;
+
+typedef struct {
+ uffd_global_test_opts_t *gopts;
+ volatile pid_t *pid;
+} mmap_changing_thread_args;
+
+static void sleep_short(void)
+{
+ usleep(1000);
+}
+
+static thread_state thread_state_get(pid_t tid)
+{
+ const char *header = "State:\t";
+ char tmp[256], *p, c;
+ FILE *fp;
+
+ snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
+ fp = fopen(tmp, "r");
+
+ if (!fp)
+ return THR_STATE_UNKNOWN;
+
+ while (fgets(tmp, sizeof(tmp), fp)) {
+ p = strstr(tmp, header);
+ if (p) {
+ /* For example, "State:\tD (disk sleep)" */
+ c = *(p + sizeof(header) - 1);
+ return c == 'D' ?
+ THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
+ }
+ }
+
+ return THR_STATE_UNKNOWN;
+}
+
+static void thread_state_until(pid_t tid, thread_state state)
+{
+ thread_state s;
+
+ do {
+ s = thread_state_get(tid);
+ sleep_short();
+ } while (s != state);
+}
+
+static void *uffd_mmap_changing_thread(void *opaque)
+{
+ mmap_changing_thread_args *args = opaque;
+ uffd_global_test_opts_t *gopts = args->gopts;
+ volatile pid_t *pid = args->pid;
+ int ret;
+
+ /* Unfortunately, it's only fetch-able from the thread itself.. */
+ assert(*pid == 0);
+ *pid = syscall(SYS_gettid);
+
+ /* Inject an event, this will hang solid until the event read */
+ ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE);
+ if (ret)
+ err("madvise(MADV_REMOVE) failed");
+
+ return NULL;
+}
+
+static void uffd_consume_message(uffd_global_test_opts_t *gopts)
+{
+ struct uffd_msg msg = { 0 };
+
+ while (uffd_read_msg(gopts, &msg));
+}
+
+static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs)
+{
+ /*
+ * This stores the real PID (which can be different from how tid is
+ * defined..) for the child thread, 0 means not initialized.
+ */
+ pid_t pid = 0;
+ pthread_t tid;
+ int ret;
+ mmap_changing_thread_args args = { gopts, &pid };
+
+ if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size,
+ true, false, false))
+ err("uffd_register() failed");
+
+ /* Create a thread to generate the racy event */
+ ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args);
+ if (ret)
+ err("pthread_create() failed");
+
+ /*
+ * Wait until the thread setup the pid. Use volatile to make sure
+ * it reads from RAM not regs.
+ */
+ while (!(volatile pid_t)pid)
+ sleep_short();
+
+ /* Wait until the thread hangs at REMOVE event */
+ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
+
+ if (!uffdio_mmap_changing_test_copy(gopts->uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_zeropage(gopts->uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_move(gopts->uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_poison(gopts->uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_continue(gopts->uffd))
+ return;
+
+ /*
+ * All succeeded above! Recycle everything. Start by reading the
+ * event so as to kick the thread roll again..
+ */
+ uffd_consume_message(gopts);
+
+ ret = pthread_join(tid, NULL);
+ assert(ret == 0);
+
+ uffd_test_pass();
+}
+
+static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) {
+ /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
+ if (errno != EINVAL) {
+ if (errmsg)
+ *errmsg = "madvise(MADV_NOHUGEPAGE) failed";
+ return -errno;
+ }
+ }
+ return 0;
+}
+
+static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) {
+ if (errmsg) {
+ *errmsg = (errno == EINVAL) ?
+ "CONFIG_TRANSPARENT_HUGEPAGE is not set" :
+ "madvise(MADV_HUGEPAGE) failed";
+ }
+ return -errno;
+ }
+ return 0;
+}
+
+struct uffd_test_case_ops uffd_move_test_case_ops = {
+ .post_alloc = prevent_hugepages,
+};
+
+struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
+ .post_alloc = request_hugepages,
+};
+
+/*
+ * Test the returned uffdio_register.ioctls with different register modes.
+ * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
+ */
+static void
+do_register_ioctls_test(uffd_global_test_opts_t *gopts,
+ uffd_test_args_t *args,
+ bool miss,
+ bool wp,
+ bool minor)
+{
+ uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
+ mem_type_t *mem_type = args->mem_type;
+ int ret;
+
+ ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size,
+ miss, wp, minor, &ioctls);
+
+ /*
+ * Handle special cases of UFFDIO_REGISTER here where it should
+ * just fail with -EINVAL first..
+ *
+ * Case 1: register MINOR on anon
+ * Case 2: register with no mode selected
+ */
+ if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
+ (!miss && !wp && !minor)) {
+ if (ret != -EINVAL)
+ err("register (miss=%d, wp=%d, minor=%d) failed "
+ "with wrong errno=%d", miss, wp, minor, ret);
+ return;
+ }
+
+ /* UFFDIO_REGISTER should succeed, then check ioctls returned */
+ if (miss)
+ expected |= BIT_ULL(_UFFDIO_COPY);
+ if (wp)
+ expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
+ if (minor)
+ expected |= BIT_ULL(_UFFDIO_CONTINUE);
+
+ if ((ioctls & expected) != expected)
+ err("unexpected uffdio_register.ioctls "
+ "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
+ "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
+
+ if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size))
+ err("unregister");
+}
+
+static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args)
+{
+ int miss, wp, minor;
+
+ for (miss = 0; miss <= 1; miss++)
+ for (wp = 0; wp <= 1; wp++)
+ for (minor = 0; minor <= 1; minor++)
+ do_register_ioctls_test(gopts, args, miss, wp, minor);
+
+ uffd_test_pass();
+}
+
+uffd_test_case_t uffd_tests[] = {
+ {
+ /* Test returned uffdio_register.ioctls. */
+ .name = "register-ioctls",
+ .uffd_fn = uffd_register_ioctls_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
+ UFFD_FEATURE_MISSING_SHMEM |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ UFFD_FEATURE_MINOR_HUGETLBFS |
+ UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "zeropage",
+ .uffd_fn = uffd_zeropage_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = 0,
+ },
+ {
+ .name = "move",
+ .uffd_fn = uffd_move_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_case_ops,
+ },
+ {
+ .name = "move-pmd",
+ .uffd_fn = uffd_move_pmd_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
+ .name = "move-pmd-split",
+ .uffd_fn = uffd_move_pmd_split_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
+ .name = "wp-fork",
+ .uffd_fn = uffd_wp_fork_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "wp-fork-with-event",
+ .uffd_fn = uffd_wp_fork_with_event_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ /* when set, child process should inherit uffd-wp bits */
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "wp-fork-pin",
+ .uffd_fn = uffd_wp_fork_pin_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "wp-fork-pin-with-event",
+ .uffd_fn = uffd_wp_fork_pin_with_event_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ /* when set, child process should inherit uffd-wp bits */
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "wp-unpopulated",
+ .uffd_fn = uffd_wp_unpopulated_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required =
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
+ },
+ {
+ .name = "minor",
+ .uffd_fn = uffd_minor_test,
+ .mem_targets = MEM_SHMEM | MEM_HUGETLB,
+ .uffd_feature_required =
+ UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "minor-wp",
+ .uffd_fn = uffd_minor_wp_test,
+ .mem_targets = MEM_SHMEM | MEM_HUGETLB,
+ .uffd_feature_required =
+ UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ /*
+ * HACK: here we leveraged WP_UNPOPULATED to detect whether
+ * minor mode supports wr-protect. There's no feature flag
+ * for it so this is the best we can test against.
+ */
+ UFFD_FEATURE_WP_UNPOPULATED,
+ },
+ {
+ .name = "minor-collapse",
+ .uffd_fn = uffd_minor_collapse_test,
+ /* MADV_COLLAPSE only works with shmem */
+ .mem_targets = MEM_SHMEM,
+ /* We can't test MADV_COLLAPSE, so try our luck */
+ .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "sigbus",
+ .uffd_fn = uffd_sigbus_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_SIGBUS |
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "sigbus-wp",
+ .uffd_fn = uffd_sigbus_wp_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_SIGBUS |
+ UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "events",
+ .uffd_fn = uffd_events_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
+ UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
+ },
+ {
+ .name = "events-wp",
+ .uffd_fn = uffd_events_wp_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
+ UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "poison",
+ .uffd_fn = uffd_poison_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_POISON,
+ },
+ {
+ .name = "mmap-changing",
+ .uffd_fn = uffd_mmap_changing_test,
+ /*
+ * There's no point running this test over all mem types as
+ * they share the same code paths.
+ *
+ * Choose shmem for simplicity, because (1) shmem supports
+ * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
+ * almost always available (unlike hugetlb). Here we
+ * abused SHMEM for UFFDIO_MOVE, but the test we want to
+ * cover doesn't yet need the correct memory type..
+ */
+ .mem_targets = MEM_SHMEM,
+ /*
+ * Any UFFD_FEATURE_EVENT_* should work to trigger the
+ * race logically, but choose the simplest (REMOVE).
+ *
+ * Meanwhile, since we'll cover quite a few new ioctl()s
+ * (CONTINUE, POISON, MOVE), skip this test for old kernels
+ * by choosing all of them.
+ */
+ .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
+ UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
+ UFFD_FEATURE_MINOR_SHMEM,
+ },
+};
+
+static void usage(const char *prog)
+{
+ printf("usage: %s [-f TESTNAME]\n", prog);
+ puts("");
+ puts(" -f: test name to filter (e.g., event)");
+ puts(" -h: show the help msg");
+ puts(" -l: list tests only");
+ puts("");
+ exit(KSFT_FAIL);
+}
+
+int main(int argc, char *argv[])
+{
+ int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
+ int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
+ const char *test_filter = NULL;
+ bool list_only = false;
+ uffd_test_case_t *test;
+ mem_type_t *mem_type;
+ uffd_test_args_t args;
+ const char *errmsg;
+ int has_uffd, opt;
+ int i, j;
+
+ while ((opt = getopt(argc, argv, "f:hl")) != -1) {
+ switch (opt) {
+ case 'f':
+ test_filter = optarg;
+ break;
+ case 'l':
+ list_only = true;
+ break;
+ case 'h':
+ default:
+ /* Unknown */
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (!test_filter && !list_only) {
+ has_uffd = test_uffd_api(false);
+ has_uffd |= test_uffd_api(true);
+
+ if (!has_uffd) {
+ printf("Userfaultfd not supported or unprivileged, skip all tests\n");
+ exit(KSFT_SKIP);
+ }
+ }
+
+ for (i = 0; i < n_tests; i++) {
+ test = &uffd_tests[i];
+ if (test_filter && !strstr(test->name, test_filter))
+ continue;
+ if (list_only) {
+ printf("%s\n", test->name);
+ continue;
+ }
+ for (j = 0; j < n_mems; j++) {
+ mem_type = &mem_types[j];
+
+ /* Initialize global test options */
+ uffd_global_test_opts_t gopts = { 0 };
+
+ gopts.map_shared = mem_type->shared;
+ uffd_test_ops = mem_type->mem_ops;
+ uffd_test_case_ops = test->test_case_ops;
+
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
+ gopts.page_size = default_huge_page_size();
+ if (gopts.page_size == 0) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ } else {
+ gopts.page_size = psize();
+ }
+
+ /* Ensure we have at least 2 pages */
+ gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
+ / gopts.page_size;
+
+ gopts.nr_parallel = 1;
+
+ /* Initialize test arguments */
+ args.mem_type = mem_type;
+
+ if (!(test->mem_targets & mem_type->mem_flag))
+ continue;
+
+ uffd_test_start("%s on %s", test->name, mem_type->name);
+ if (!uffd_feature_supported(test)) {
+ uffd_test_skip("feature missing");
+ continue;
+ }
+ if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) {
+ uffd_test_skip(errmsg);
+ continue;
+ }
+ test->uffd_fn(&gopts, &args);
+ uffd_test_ctx_clear(&gopts);
+ }
+ }
+
+ if (!list_only)
+ uffd_test_report();
+
+ return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
new file mode 100644
index 000000000000..17186d4a4147
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include "kselftest.h"
+#include "thp_settings.h"
+#include "uffd-common.h"
+
+static int pagemap_fd;
+static size_t pagesize;
+static int nr_pagesizes = 1;
+static int nr_thpsizes;
+static size_t thpsizes[20];
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+ int count = 0;
+ unsigned long orders;
+ size_t kb;
+ int i;
+
+ /* thp not supported at all. */
+ if (!read_pmd_pagesize())
+ return 0;
+
+ orders = thp_supported_orders();
+
+ for (i = 0; orders && count < max; i++) {
+ if (!(orders & (1UL << i)))
+ continue;
+ orders &= ~(1UL << i);
+ kb = (pagesize >> 10) << i;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+ }
+
+ return count;
+}
+
+static void *mmap_aligned(size_t size, int prot, int flags)
+{
+ size_t mmap_size = size * 2;
+ char *mmap_mem, *mem;
+
+ mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0);
+ if (mmap_mem == MAP_FAILED)
+ return mmap_mem;
+
+ mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1));
+ munmap(mmap_mem, mem - mmap_mem);
+ munmap(mem + size, mmap_mem + mmap_size - mem - size);
+
+ return mem;
+}
+
+static void *alloc_one_folio(size_t size, bool private, bool hugetlb)
+{
+ bool thp = !hugetlb && size > pagesize;
+ int flags = MAP_ANONYMOUS;
+ int prot = PROT_READ | PROT_WRITE;
+ char *mem, *addr;
+
+ assert((size & (size - 1)) == 0);
+
+ if (private)
+ flags |= MAP_PRIVATE;
+ else
+ flags |= MAP_SHARED;
+
+ /*
+ * For THP, we must explicitly enable the THP size, allocate twice the
+ * required space then manually align.
+ */
+ if (thp) {
+ struct thp_settings settings = *thp_current_settings();
+
+ if (private)
+ settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
+ else
+ settings.shmem_hugepages[sz2ord(size, pagesize)].enabled = SHMEM_ALWAYS;
+
+ thp_push_settings(&settings);
+
+ mem = mmap_aligned(size, prot, flags);
+ } else {
+ if (hugetlb) {
+ flags |= MAP_HUGETLB;
+ flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT;
+ }
+
+ mem = mmap(NULL, size, prot, flags, -1, 0);
+ }
+
+ if (mem == MAP_FAILED) {
+ mem = NULL;
+ goto out;
+ }
+
+ assert(((uintptr_t)mem & (size - 1)) == 0);
+
+ /*
+ * Populate the folio by writing the first byte and check that all pages
+ * are populated. Finally set the whole thing to non-zero data to avoid
+ * kernel from mapping it back to the zero page.
+ */
+ mem[0] = 1;
+ for (addr = mem; addr < mem + size; addr += pagesize) {
+ if (!pagemap_is_populated(pagemap_fd, addr)) {
+ munmap(mem, size);
+ mem = NULL;
+ goto out;
+ }
+ }
+ memset(mem, 1, size);
+out:
+ if (thp)
+ thp_pop_settings();
+
+ return mem;
+}
+
+static bool check_uffd_wp_state(void *mem, size_t size, bool expect)
+{
+ uint64_t pte;
+ void *addr;
+
+ for (addr = mem; addr < mem + size; addr += pagesize) {
+ pte = pagemap_get_entry(pagemap_fd, addr);
+ if (!!(pte & PM_UFFD_WP) != expect) {
+ ksft_test_result_fail("uffd-wp not %s for pte %lu!\n",
+ expect ? "set" : "clear",
+ (addr - mem) / pagesize);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+ for (; size; addr += pagesize, size -= pagesize)
+ if (!pagemap_is_swapped(pagemap_fd, addr))
+ return false;
+ return true;
+}
+
+static void test_one_folio(uffd_global_test_opts_t *gopts, size_t size, bool private,
+ bool swapout, bool hugetlb)
+{
+ struct uffdio_writeprotect wp_prms;
+ uint64_t features = 0;
+ void *addr = NULL;
+ void *mem = NULL;
+
+ assert(!(hugetlb && swapout));
+
+ ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+ __func__,
+ size,
+ private ? "true" : "false",
+ swapout ? "true" : "false",
+ hugetlb ? "true" : "false");
+
+ /* Allocate a folio of required size and type. */
+ mem = alloc_one_folio(size, private, hugetlb);
+ if (!mem) {
+ ksft_test_result_fail("alloc_one_folio() failed\n");
+ goto out;
+ }
+
+ /* Register range for uffd-wp. */
+ if (userfaultfd_open(gopts, &features)) {
+ if (errno == ENOENT)
+ ksft_test_result_skip("userfaultfd not available\n");
+ else
+ ksft_test_result_fail("userfaultfd_open() failed\n");
+ goto out;
+ }
+ if (uffd_register(gopts->uffd, mem, size, false, true, false)) {
+ ksft_test_result_fail("uffd_register() failed\n");
+ goto out;
+ }
+ wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ wp_prms.range.start = (uintptr_t)mem;
+ wp_prms.range.len = size;
+ if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) {
+ ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n");
+ goto out;
+ }
+
+ if (swapout) {
+ madvise(mem, size, MADV_PAGEOUT);
+ if (!range_is_swapped(mem, size)) {
+ ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ goto out;
+ }
+ }
+
+ /* Check that uffd-wp is set for all PTEs in range. */
+ if (!check_uffd_wp_state(mem, size, true))
+ goto out;
+
+ /*
+ * Move the mapping to a new, aligned location. Since
+ * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for
+ * each PTE to be cleared in the new mapping.
+ */
+ addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS);
+ if (addr == MAP_FAILED) {
+ ksft_test_result_fail("mmap_aligned() failed\n");
+ goto out;
+ }
+ if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) {
+ ksft_test_result_fail("mremap() failed\n");
+ munmap(addr, size);
+ goto out;
+ }
+ mem = addr;
+
+ /* Check that uffd-wp is cleared for all PTEs in range. */
+ if (!check_uffd_wp_state(mem, size, false))
+ goto out;
+
+ ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+ __func__,
+ size,
+ private ? "true" : "false",
+ swapout ? "true" : "false",
+ hugetlb ? "true" : "false");
+out:
+ if (mem)
+ munmap(mem, size);
+ if (gopts->uffd >= 0) {
+ close(gopts->uffd);
+ gopts->uffd = -1;
+ }
+}
+
+struct testcase {
+ size_t *sizes;
+ int *nr_sizes;
+ bool private;
+ bool swapout;
+ bool hugetlb;
+};
+
+static const struct testcase testcases[] = {
+ /* base pages. */
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = false,
+ .swapout = true,
+ .hugetlb = false,
+ },
+ {
+ .sizes = &pagesize,
+ .nr_sizes = &nr_pagesizes,
+ .private = true,
+ .swapout = true,
+ .hugetlb = false,
+ },
+
+ /* thp. */
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = false,
+ .swapout = true,
+ .hugetlb = false,
+ },
+ {
+ .sizes = thpsizes,
+ .nr_sizes = &nr_thpsizes,
+ .private = true,
+ .swapout = true,
+ .hugetlb = false,
+ },
+
+ /* hugetlb. */
+ {
+ .sizes = hugetlbsizes,
+ .nr_sizes = &nr_hugetlbsizes,
+ .private = false,
+ .swapout = false,
+ .hugetlb = true,
+ },
+ {
+ .sizes = hugetlbsizes,
+ .nr_sizes = &nr_hugetlbsizes,
+ .private = true,
+ .swapout = false,
+ .hugetlb = true,
+ },
+};
+
+int main(int argc, char **argv)
+{
+ uffd_global_test_opts_t gopts = { 0 };
+ struct thp_settings settings;
+ int i, j, plan = 0;
+
+ pagesize = getpagesize();
+ nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+
+ /* If THP is supported, save THP settings and initially disable THP. */
+ if (nr_thpsizes) {
+ thp_save_settings();
+ thp_read_settings(&settings);
+ for (i = 0; i < NR_ORDERS; i++) {
+ settings.hugepages[i].enabled = THP_NEVER;
+ settings.shmem_hugepages[i].enabled = SHMEM_NEVER;
+ }
+ thp_push_settings(&settings);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++)
+ plan += *testcases[i].nr_sizes;
+ ksft_set_plan(plan);
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ const struct testcase *tc = &testcases[i];
+
+ for (j = 0; j < *tc->nr_sizes; j++)
+ test_one_folio(&gopts, tc->sizes[j], tc->private,
+ tc->swapout, tc->hugetlb);
+ }
+
+ /* If THP is supported, restore original THP settings. */
+ if (nr_thpsizes)
+ thp_restore_settings();
+
+ i = ksft_get_fail_cnt();
+ if (i)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ i, ksft_test_num());
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
new file mode 100644
index 000000000000..02f290a69132
--- /dev/null
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ */
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "vm_util.h"
+#include "kselftest.h"
+
+/*
+ * The hint addr value is used to allocate addresses
+ * beyond the high address switch boundary.
+ */
+
+#define ADDR_MARK_128TB (1UL << 47)
+#define ADDR_MARK_256TB (1UL << 48)
+
+#define HIGH_ADDR_128TB (1UL << 48)
+#define HIGH_ADDR_256TB (1UL << 49)
+
+struct testcase {
+ void *addr;
+ unsigned long size;
+ unsigned long flags;
+ const char *msg;
+ unsigned int low_addr_required:1;
+ unsigned int keep_mapped:1;
+};
+
+static struct testcase *testcases;
+static struct testcase *hugetlb_testcases;
+static int sz_testcases, sz_hugetlb_testcases;
+static unsigned long switch_hint;
+
+/* Initialize testcases inside a function to compute parameters at runtime */
+void testcases_init(void)
+{
+ unsigned long pagesize = getpagesize();
+ unsigned long hugepagesize = default_huge_page_size();
+ unsigned long low_addr = (1UL << 30);
+ unsigned long addr_switch_hint = ADDR_MARK_128TB;
+ unsigned long high_addr = HIGH_ADDR_128TB;
+
+#ifdef __aarch64__
+
+ /* Post LPA2, the lower userspace VA on a 16K pagesize is 47 bits. */
+ if (pagesize != (16UL << 10)) {
+ addr_switch_hint = ADDR_MARK_256TB;
+ high_addr = HIGH_ADDR_256TB;
+ }
+#endif
+
+ struct testcase t[] = {
+ {
+ /*
+ * If stack is moved, we could possibly allocate
+ * this at the requested address.
+ */
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, pagesize)",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Unless MAP_FIXED is specified, allocation based on hint
+ * addr is never at requested address or above it, which is
+ * beyond high address switch boundary in this case. Instead,
+ * a suitable allocation is found in lower address space.
+ */
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, (2 * pagesize))",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Exact mapping at high address switch boundary, should
+ * be obtained even without MAP_FIXED as area is free.
+ */
+ .addr = ((void *)(addr_switch_hint)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint, pagesize)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)",
+ },
+ {
+ .addr = NULL,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)low_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(low_addr)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(high_addr, MAP_FIXED)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1) again",
+ },
+ {
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, pagesize)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint - pagesize),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, 2 * pagesize)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint - pagesize / 2),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize/2 , 2 * pagesize)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = ((void *)(addr_switch_hint)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint, pagesize)",
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)",
+ },
+ };
+
+ struct testcase ht[] = {
+ {
+ .addr = NULL,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)low_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(low_addr, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr, MAP_HUGETLB) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(high_addr, MAP_FIXED | MAP_HUGETLB)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB) again",
+ },
+ {
+ .addr = (void *)(addr_switch_hint - hugepagesize),
+ .size = 2 * hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - hugepagesize, 2*hugepagesize, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint , 2*hugepagesize, MAP_FIXED | MAP_HUGETLB)",
+ },
+ };
+
+ testcases = malloc(sizeof(t));
+ hugetlb_testcases = malloc(sizeof(ht));
+
+ /* Copy into global arrays */
+ memcpy(testcases, t, sizeof(t));
+ memcpy(hugetlb_testcases, ht, sizeof(ht));
+
+ sz_testcases = ARRAY_SIZE(t);
+ sz_hugetlb_testcases = ARRAY_SIZE(ht);
+ switch_hint = addr_switch_hint;
+}
+
+static int run_test(struct testcase *test, int count)
+{
+ void *p;
+ int i, ret = KSFT_PASS;
+
+ for (i = 0; i < count; i++) {
+ struct testcase *t = test + i;
+
+ p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
+
+ printf("%s: %p - ", t->msg, p);
+
+ if (p == MAP_FAILED) {
+ printf("FAILED\n");
+ ret = KSFT_FAIL;
+ continue;
+ }
+
+ if (t->low_addr_required && p >= (void *)(switch_hint)) {
+ printf("FAILED\n");
+ ret = KSFT_FAIL;
+ } else {
+ /*
+ * Do a dereference of the address returned so that we catch
+ * bugs in page fault handling
+ */
+ memset(p, 0, t->size);
+ printf("OK\n");
+ }
+ if (!t->keep_mapped)
+ munmap(p, t->size);
+ }
+
+ return ret;
+}
+
+#ifdef __aarch64__
+/* Check if userspace VA > 48 bits */
+static int high_address_present(void)
+{
+ void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (ptr == MAP_FAILED)
+ return 0;
+
+ munmap(ptr, 1);
+ return 1;
+}
+#endif
+
+static int supported_arch(void)
+{
+#if defined(__powerpc64__)
+ return 1;
+#elif defined(__x86_64__)
+ return 1;
+#elif defined(__aarch64__)
+ return high_address_present();
+#else
+ return 0;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ if (!supported_arch())
+ return KSFT_SKIP;
+
+ testcases_init();
+
+ ret = run_test(testcases, sz_testcases);
+ if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
+ ret = run_test(hugetlb_testcases, sz_hugetlb_testcases);
+ return ret;
+}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
new file mode 100755
index 000000000000..a7d4b02b21dd
--- /dev/null
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io>
+#
+# This is a test for mmap behavior with 5-level paging. This script wraps the
+# real test to check that the kernel is configured to support at least 5
+# pagetable levels.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+orig_nr_hugepages=0
+
+skip()
+{
+ echo "$1"
+ exit $ksft_skip
+}
+
+check_supported_x86_64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
+
+ # gzip -dcfq automatically handles both compressed and plaintext input.
+ # See man 1 gzip under '-f'.
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+
+ local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+ else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+ skip "$0: CPU does not have the necessary la57 flag to support page table level 5"
+ fi
+}
+
+check_supported_ppc64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
+
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ fi
+
+ local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}')
+ if [[ "$mmu_support" != "radix" ]]; then
+ skip "$0: System does not use Radix MMU, required for 5-level paging"
+ fi
+
+ local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo)
+ if [[ "${hugepages_total}" -eq 0 ]]; then
+ skip "$0: HugePages are not enabled, required for some tests"
+ fi
+}
+
+check_test_requirements()
+{
+ # The test supports x86_64 and powerpc64. We currently have no useful
+ # eligibility check for powerpc64, and the test itself will reject other
+ # architectures.
+ case `uname -m` in
+ "x86_64")
+ check_supported_x86_64
+ ;;
+ "ppc64le"|"ppc64")
+ check_supported_ppc64
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
+save_nr_hugepages()
+{
+ orig_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages)
+}
+
+restore_nr_hugepages()
+{
+ echo "$orig_nr_hugepages" > /proc/sys/vm/nr_hugepages
+}
+
+setup_nr_hugepages()
+{
+ local needpgs=$1
+ while read -r name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs="$size"
+ break
+ fi
+ done < /proc/meminfo
+ if [ "$freepgs" -ge "$needpgs" ]; then
+ return
+ fi
+ local hpgs=$((orig_nr_hugepages + needpgs))
+ echo $hpgs > /proc/sys/vm/nr_hugepages
+
+ local nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+ if [ "$nr_hugepgs" != "$hpgs" ]; then
+ restore_nr_hugepages
+ skip "$0: no enough hugepages for testing"
+ fi
+}
+
+check_test_requirements
+save_nr_hugepages
+# 4 keep_mapped pages, and one for tmp usage
+setup_nr_hugepages 5
+./va_high_addr_switch --run-hugetlb
+restore_nr_hugepages
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
new file mode 100644
index 000000000000..4f0923825ed7
--- /dev/null
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017, Anshuman Khandual, IBM Corp.
+ *
+ * Works on architectures which support 128TB virtual
+ * address range and beyond.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <fcntl.h>
+
+#include "vm_util.h"
+#include "kselftest.h"
+
+/*
+ * Maximum address range mapped with a single mmap()
+ * call is little bit more than 1GB. Hence 1GB is
+ * chosen as the single chunk size for address space
+ * mapping.
+ */
+
+#define SZ_1GB (1024 * 1024 * 1024UL)
+#define SZ_1TB (1024 * 1024 * 1024 * 1024UL)
+
+#define MAP_CHUNK_SIZE SZ_1GB
+
+/*
+ * Address space till 128TB is mapped without any hint
+ * and is enabled by default. Address space beyond 128TB
+ * till 512TB is obtained by passing hint address as the
+ * first argument into mmap() system call.
+ *
+ * The process heap address space is divided into two
+ * different areas one below 128TB and one above 128TB
+ * till it reaches 512TB. One with size 128TB and the
+ * other being 384TB.
+ *
+ * On Arm64 the address space is 256TB and support for
+ * high mappings up to 4PB virtual address space has
+ * been added.
+ *
+ * On PowerPC64, the address space up to 128TB can be
+ * mapped without a hint. Addresses beyond 128TB, up to
+ * 4PB, can be mapped with a hint.
+ *
+ */
+
+#define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */
+#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
+#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
+#define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL)
+#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL)
+
+#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
+#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
+
+#ifdef __aarch64__
+#define HIGH_ADDR_MARK ADDR_MARK_256TB
+#define HIGH_ADDR_SHIFT 49
+#define NR_CHUNKS_LOW NR_CHUNKS_256TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_3840TB
+#elif defined(__PPC64__)
+#define HIGH_ADDR_MARK ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB
+#else
+#define HIGH_ADDR_MARK ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_384TB
+#endif
+
+static char *hint_addr(void)
+{
+ int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
+
+ return (char *) (1UL << bits);
+}
+
+static void validate_addr(char *ptr, int high_addr)
+{
+ unsigned long addr = (unsigned long) ptr;
+
+ if (high_addr) {
+ if (addr < HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
+ return;
+ }
+
+ if (addr > HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
+}
+
+static void mark_range(char *ptr, size_t size)
+{
+ if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) {
+ if (errno == EINVAL) {
+ /* Depends on CONFIG_ANON_VMA_NAME */
+ ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n");
+ ksft_finished();
+ } else {
+ ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n");
+ }
+ }
+}
+
+static int is_marked_vma(const char *vma_name)
+{
+ return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n");
+}
+
+static int validate_lower_address_hint(void)
+{
+ char *ptr;
+
+ ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ return 0;
+
+ return 1;
+}
+
+static int validate_complete_va_space(void)
+{
+ unsigned long start_addr, end_addr, prev_end_addr;
+ char line[400];
+ char prot[6];
+ FILE *file;
+ int fd;
+
+ fd = open("va_dump", O_CREAT | O_WRONLY, 0600);
+ unlink("va_dump");
+ if (fd < 0) {
+ ksft_test_result_skip("cannot create or open dump file\n");
+ ksft_finished();
+ }
+
+ file = fopen("/proc/self/maps", "r");
+ if (file == NULL)
+ ksft_exit_fail_msg("cannot open /proc/self/maps\n");
+
+ prev_end_addr = 0;
+ while (fgets(line, sizeof(line), file)) {
+ const char *vma_name = NULL;
+ int vma_name_start = 0;
+ unsigned long hop;
+
+ if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
+ &start_addr, &end_addr, prot, &vma_name_start) != 3)
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+
+ if (vma_name_start)
+ vma_name = line + vma_name_start;
+
+ /* end of userspace mappings; ignore vsyscall mapping */
+ if (start_addr & (1UL << 63))
+ return 0;
+
+ /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */
+ if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE)
+ return 1;
+
+ prev_end_addr = end_addr;
+
+ if (prot[0] != 'r')
+ continue;
+
+ if (check_vmflag_io((void *)start_addr))
+ continue;
+
+ /*
+ * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
+ * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
+ * addresses after that. If the address was not held by this
+ * process, write would fail with errno set to EFAULT.
+ * Anyways, if write returns anything apart from 1, exit the
+ * program since that would mean a bug in /proc/self/maps.
+ */
+ hop = 0;
+ while (start_addr + hop < end_addr) {
+ if (write(fd, (void *)(start_addr + hop), 1) != 1)
+ return 1;
+ lseek(fd, 0, SEEK_SET);
+
+ if (is_marked_vma(vma_name))
+ munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE);
+
+ hop += MAP_CHUNK_SIZE;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ char *ptr[NR_CHUNKS_LOW];
+ char **hptr;
+ char *hint;
+ unsigned long i, lchunks, hchunks;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ for (i = 0; i < NR_CHUNKS_LOW; i++) {
+ ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr[i] == MAP_FAILED) {
+ if (validate_lower_address_hint())
+ ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n");
+ break;
+ }
+
+ mark_range(ptr[i], MAP_CHUNK_SIZE);
+ validate_addr(ptr[i], 0);
+ }
+ lchunks = i;
+ hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
+ if (hptr == NULL) {
+ ksft_test_result_skip("Memory constraint not fulfilled\n");
+ ksft_finished();
+ }
+
+ for (i = 0; i < NR_CHUNKS_HIGH; i++) {
+ hint = hint_addr();
+ hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (hptr[i] == MAP_FAILED)
+ break;
+
+ mark_range(hptr[i], MAP_CHUNK_SIZE);
+ validate_addr(hptr[i], 1);
+ }
+ hchunks = i;
+ if (validate_complete_va_space()) {
+ ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n");
+ ksft_finished();
+ }
+
+ for (i = 0; i < lchunks; i++)
+ munmap(ptr[i], MAP_CHUNK_SIZE);
+
+ for (i = 0; i < hchunks; i++)
+ munmap(hptr[i], MAP_CHUNK_SIZE);
+
+ free(hptr);
+
+ ksft_test_result_pass("Test\n");
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
new file mode 100644
index 000000000000..d954bf91afd5
--- /dev/null
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "kselftest.h"
+#include "vm_util.h"
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SMAP_FILE_PATH "/proc/self/smaps"
+#define STATUS_FILE_PATH "/proc/self/status"
+#define MAX_LINE_LENGTH 500
+
+unsigned int __page_size;
+unsigned int __page_shift;
+
+uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / getpagesize();
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + psize());
+ arg.vec = (uintptr_t)r;
+ arg.vec_len = 1;
+ arg.flags = 0;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = 0;
+ arg.category_inverted = 0;
+ arg.category_mask = 0;
+ arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
+ arg.return_mask = arg.category_anyof_mask;
+
+ return ioctl(fd, PAGEMAP_SCAN, &arg);
+}
+
+static uint64_t pagemap_scan_get_categories(int fd, char *start)
+{
+ struct page_region r;
+ long ret;
+
+ ret = __pagemap_scan_get_categories(fd, start, &r);
+ if (ret < 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
+ if (ret == 0)
+ return 0;
+ return r.categories;
+}
+
+/* `start` is any valid address. */
+static bool pagemap_scan_supported(int fd, char *start)
+{
+ static int supported = -1;
+ int ret;
+
+ if (supported != -1)
+ return supported;
+
+ /* Provide an invalid address in order to trigger EFAULT. */
+ ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
+ if (ret == 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
+
+ supported = errno == EFAULT;
+
+ return supported;
+}
+
+static bool page_entry_is(int fd, char *start, char *desc,
+ uint64_t pagemap_flags, uint64_t pagescan_flags)
+{
+ bool m = pagemap_get_entry(fd, start) & pagemap_flags;
+
+ if (pagemap_scan_supported(fd, start)) {
+ bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
+
+ if (m == s)
+ return m;
+
+ ksft_exit_fail_msg(
+ "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
+ }
+ return m;
+}
+
+bool pagemap_is_softdirty(int fd, char *start)
+{
+ return page_entry_is(fd, start, "soft-dirty",
+ PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
+}
+
+bool pagemap_is_swapped(int fd, char *start)
+{
+ return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
+}
+
+bool pagemap_is_populated(int fd, char *start)
+{
+ return page_entry_is(fd, start, "populated",
+ PM_PRESENT | PM_SWAP,
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
+}
+
+unsigned long pagemap_get_pfn(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ /* If present (63th bit), PFN is at bit 0 -- 54. */
+ if (entry & PM_PRESENT)
+ return entry & 0x007fffffffffffffull;
+ return -1ul;
+}
+
+void clear_softdirty(void)
+{
+ int ret;
+ const char *ctrl = "4";
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ close(fd);
+ if (ret != (signed int)strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+}
+
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
+{
+ while (fgets(buf, len, fp)) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+uint64_t read_pmd_pagesize(void)
+{
+ int fd;
+ char buf[20];
+ ssize_t num_read;
+
+ fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ num_read = read(fd, buf, 19);
+ if (num_read < 1) {
+ close(fd);
+ return 0;
+ }
+ buf[num_read] = '\0';
+ close(fd);
+
+ return strtoul(buf, NULL, 10);
+}
+
+unsigned long rss_anon(void)
+{
+ unsigned long rss_anon = 0;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+
+ fp = fopen(STATUS_FILE_PATH, "r");
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH);
+
+ if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer)))
+ goto err_out;
+
+ if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1)
+ ksft_exit_fail_msg("Reading status error\n");
+
+err_out:
+ fclose(fp);
+ return rss_anon;
+}
+
+char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len)
+{
+ int ret;
+ FILE *fp;
+ char *entry = NULL;
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
+
+ fp = fopen(SMAP_FILE_PATH, "r");
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
+
+ if (!check_for_pattern(fp, addr_pattern, buf, len))
+ goto err_out;
+
+ /* Fetch the pattern in the same block */
+ if (!check_for_pattern(fp, pattern, buf, len))
+ goto err_out;
+
+ /* Trim trailing newline */
+ entry = strchr(buf, '\n');
+ if (entry)
+ *entry = '\0';
+
+ entry = buf + strlen(pattern);
+
+err_out:
+ fclose(fp);
+ return entry;
+}
+
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ uint64_t hpage_size)
+{
+ char buffer[MAX_LINE_LENGTH];
+ uint64_t thp = -1;
+ char *entry;
+
+ entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer));
+ if (!entry)
+ goto err_out;
+
+ if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1)
+ ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
+ return thp == (nr_hpages * (hpage_size >> 10));
+}
+
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
+}
+
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
+}
+
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
+}
+
+int64_t allocate_transhuge(void *ptr, int pagemap_fd)
+{
+ uint64_t ent[2];
+
+ /* drop pmd */
+ if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANONYMOUS |
+ MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ ksft_exit_fail_msg("mmap transhuge\n");
+
+ if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+ ksft_exit_fail_msg("MADV_HUGEPAGE\n");
+
+ /* allocate transparent huge page */
+ *(volatile void **)ptr = ptr;
+
+ if (pread(pagemap_fd, ent, sizeof(ent),
+ (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
+ ksft_exit_fail_msg("read pagemap\n");
+
+ if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+ PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+ !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
+ return PAGEMAP_PFN(ent[0]);
+
+ return -1;
+}
+
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+int detect_hugetlb_page_sizes(size_t sizes[], int max)
+{
+ DIR *dir = opendir("/sys/kernel/mm/hugepages/");
+ int count = 0;
+
+ if (!dir)
+ return 0;
+
+ while (count < max) {
+ struct dirent *entry = readdir(dir);
+ size_t kb;
+
+ if (!entry)
+ break;
+ if (entry->d_type != DT_DIR)
+ continue;
+ if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
+ continue;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
+ kb);
+ }
+ closedir(dir);
+ return count;
+}
+
+int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
+{
+ size_t count;
+
+ count = pread(kpageflags_fd, flags, sizeof(*flags),
+ pfn * sizeof(*flags));
+
+ if (count != sizeof(*flags))
+ return -1;
+
+ return 0;
+}
+
+/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
+int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor, uint64_t *ioctls)
+{
+ struct uffdio_register uffdio_register = { 0 };
+ uint64_t mode = 0;
+ int ret = 0;
+
+ if (miss)
+ mode |= UFFDIO_REGISTER_MODE_MISSING;
+ if (wp)
+ mode |= UFFDIO_REGISTER_MODE_WP;
+ if (minor)
+ mode |= UFFDIO_REGISTER_MODE_MINOR;
+
+ uffdio_register.range.start = (unsigned long)addr;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = mode;
+
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
+ ret = -errno;
+ else if (ioctls)
+ *ioctls = uffdio_register.ioctls;
+
+ return ret;
+}
+
+int uffd_register(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor)
+{
+ return uffd_register_with_ioctls(uffd, addr, len,
+ miss, wp, minor, NULL);
+}
+
+int uffd_unregister(int uffd, void *addr, uint64_t len)
+{
+ struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
+ int ret = 0;
+
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
+ ret = -errno;
+
+ return ret;
+}
+
+unsigned long get_free_hugepages(void)
+{
+ unsigned long fhp = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return fhp;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
+ break;
+ }
+
+ free(line);
+ fclose(f);
+ return fhp;
+}
+
+static bool check_vmflag(void *addr, const char *flag)
+{
+ char buffer[MAX_LINE_LENGTH];
+ const char *flags;
+ size_t flaglen;
+
+ flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer));
+ if (!flags)
+ ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr);
+
+ while (true) {
+ flags += strspn(flags, " ");
+
+ flaglen = strcspn(flags, " ");
+ if (!flaglen)
+ return false;
+
+ if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen))
+ return true;
+
+ flags += flaglen;
+ }
+}
+
+bool check_vmflag_io(void *addr)
+{
+ return check_vmflag(addr, "io");
+}
+
+bool check_vmflag_pfnmap(void *addr)
+{
+ return check_vmflag(addr, "pf");
+}
+
+bool check_vmflag_guard(void *addr)
+{
+ return check_vmflag(addr, "gu");
+}
+
+bool softdirty_supported(void)
+{
+ char *addr;
+ bool supported = false;
+ const size_t pagesize = getpagesize();
+
+ /* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */
+ addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (!addr)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ supported = check_vmflag(addr, "sd");
+ munmap(addr, pagesize);
+ return supported;
+}
+
+/*
+ * Open an fd at /proc/$pid/maps and configure procmap_out ready for
+ * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise.
+ */
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
+{
+ char path[256];
+ int ret = 0;
+
+ memset(procmap_out, '\0', sizeof(*procmap_out));
+ sprintf(path, "/proc/%d/maps", pid);
+ procmap_out->query.size = sizeof(procmap_out->query);
+ procmap_out->fd = open(path, O_RDONLY);
+ if (procmap_out->fd < 0)
+ ret = -errno;
+
+ return ret;
+}
+
+/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */
+int query_procmap(struct procmap_fd *procmap)
+{
+ int ret = 0;
+
+ if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1)
+ ret = -errno;
+
+ return ret;
+}
+
+/*
+ * Try to find the VMA at specified address, returns true if found, false if not
+ * found, and the test is failed if any other error occurs.
+ *
+ * On success, procmap->query is populated with the results.
+ */
+bool find_vma_procmap(struct procmap_fd *procmap, void *address)
+{
+ int err;
+
+ procmap->query.query_flags = 0;
+ procmap->query.query_addr = (unsigned long)address;
+ err = query_procmap(procmap);
+ if (!err)
+ return true;
+
+ if (err != -ENOENT)
+ ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n",
+ __func__, err);
+ return false;
+}
+
+/*
+ * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error
+ * code otherwise.
+ */
+int close_procmap(struct procmap_fd *procmap)
+{
+ return close(procmap->fd);
+}
+
+int write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+int read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+void *sys_mremap(void *old_address, unsigned long old_size,
+ unsigned long new_size, int flags, void *new_address)
+{
+ return (void *)syscall(__NR_mremap, (unsigned long)old_address,
+ old_size, new_size, flags,
+ (unsigned long)new_address);
+}
+
+bool detect_huge_zeropage(void)
+{
+ int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
+ O_RDONLY);
+ bool enabled = 0;
+ char buf[15];
+ int ret;
+
+ if (fd < 0)
+ return 0;
+
+ ret = pread(fd, buf, sizeof(buf), 0);
+ if (ret > 0 && ret < sizeof(buf)) {
+ buf[ret] = 0;
+
+ if (strtoul(buf, NULL, 10) == 1)
+ enabled = 1;
+ }
+
+ close(fd);
+ return enabled;
+}
+
+long ksm_get_self_zero_pages(void)
+{
+ int proc_self_ksm_stat_fd;
+ char buf[200];
+ char *substr_ksm_zero;
+ size_t value_pos;
+ ssize_t read_size;
+
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ if (proc_self_ksm_stat_fd < 0)
+ return -errno;
+
+ read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
+ close(proc_self_ksm_stat_fd);
+ if (read_size < 0)
+ return -errno;
+
+ buf[read_size] = 0;
+
+ substr_ksm_zero = strstr(buf, "ksm_zero_pages");
+ if (!substr_ksm_zero)
+ return 0;
+
+ value_pos = strcspn(substr_ksm_zero, "0123456789");
+ return strtol(substr_ksm_zero + value_pos, NULL, 10);
+}
+
+long ksm_get_self_merging_pages(void)
+{
+ int proc_self_ksm_merging_pages_fd;
+ char buf[10];
+ ssize_t ret;
+
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ if (proc_self_ksm_merging_pages_fd < 0)
+ return -errno;
+
+ ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
+ close(proc_self_ksm_merging_pages_fd);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+long ksm_get_full_scans(void)
+{
+ int ksm_full_scans_fd;
+ char buf[10];
+ ssize_t ret;
+
+ ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+ if (ksm_full_scans_fd < 0)
+ return -errno;
+
+ ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
+ close(ksm_full_scans_fd);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+int ksm_use_zero_pages(void)
+{
+ int ksm_use_zero_pages_fd;
+ ssize_t ret;
+
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ if (ksm_use_zero_pages_fd < 0)
+ return -errno;
+
+ ret = write(ksm_use_zero_pages_fd, "1", 1);
+ close(ksm_use_zero_pages_fd);
+ return ret == 1 ? 0 : -errno;
+}
+
+int ksm_start(void)
+{
+ int ksm_fd;
+ ssize_t ret;
+ long start_scans, end_scans;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ /* Wait for two full scans such that any possible merging happened. */
+ start_scans = ksm_get_full_scans();
+ if (start_scans < 0) {
+ close(ksm_fd);
+ return start_scans;
+ }
+ ret = write(ksm_fd, "1", 1);
+ close(ksm_fd);
+ if (ret != 1)
+ return -errno;
+ do {
+ end_scans = ksm_get_full_scans();
+ if (end_scans < 0)
+ return end_scans;
+ } while (end_scans < start_scans + 2);
+
+ return 0;
+}
+
+int ksm_stop(void)
+{
+ int ksm_fd;
+ ssize_t ret;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ ret = write(ksm_fd, "2", 1);
+ close(ksm_fd);
+ return ret == 1 ? 0 : -errno;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
new file mode 100644
index 000000000000..6ad32b1830f1
--- /dev/null
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stdarg.h>
+#include <strings.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
+#include "kselftest.h"
+#include <linux/fs.h>
+
+#define BIT_ULL(nr) (1ULL << (nr))
+#define PM_SOFT_DIRTY BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_UFFD_WP BIT_ULL(57)
+#define PM_GUARD_REGION BIT_ULL(58)
+#define PM_FILE BIT_ULL(61)
+#define PM_SWAP BIT_ULL(62)
+#define PM_PRESENT BIT_ULL(63)
+
+#define KPF_COMPOUND_HEAD BIT_ULL(15)
+#define KPF_COMPOUND_TAIL BIT_ULL(16)
+#define KPF_THP BIT_ULL(22)
+/*
+ * Ignore the checkpatch warning, we must read from x but don't want to do
+ * anything with it in order to trigger a read page fault. We therefore must use
+ * volatile to stop the compiler from optimising this away.
+ */
+#define FORCE_READ(x) (*(const volatile typeof(x) *)&(x))
+
+extern unsigned int __page_size;
+extern unsigned int __page_shift;
+
+/*
+ * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl)
+ * /proc/$pid/[s]maps lookup.
+ */
+struct procmap_fd {
+ int fd;
+ struct procmap_query query;
+};
+
+static inline unsigned int psize(void)
+{
+ if (!__page_size)
+ __page_size = sysconf(_SC_PAGESIZE);
+ return __page_size;
+}
+
+static inline unsigned int pshift(void)
+{
+ if (!__page_shift)
+ __page_shift = (ffsl(psize()) - 1);
+ return __page_shift;
+}
+
+bool detect_huge_zeropage(void);
+
+/*
+ * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with
+ * ENOENT on unlinked files. See
+ * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such
+ * bugs. There are rumours of NFS implementations with similar bugs.
+ *
+ * Ideally, tests should just detect filesystems known to have such issues and
+ * bail early. But 9pfs has the additional "feature" that it causes fstatfs to
+ * pass through the f_type field from the host filesystem. To avoid having to
+ * scrape /proc/mounts or some other hackery, tests can call this function when
+ * it seems such a bug might have been encountered.
+ */
+static inline void skip_test_dodgy_fs(const char *op_name)
+{
+ ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name);
+}
+
+uint64_t pagemap_get_entry(int fd, char *start);
+bool pagemap_is_softdirty(int fd, char *start);
+bool pagemap_is_swapped(int fd, char *start);
+bool pagemap_is_populated(int fd, char *start);
+unsigned long pagemap_get_pfn(int fd, char *start);
+void clear_softdirty(void);
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
+uint64_t read_pmd_pagesize(void);
+unsigned long rss_anon(void);
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
+int64_t allocate_transhuge(void *ptr, int pagemap_fd);
+unsigned long default_huge_page_size(void);
+int detect_hugetlb_page_sizes(size_t sizes[], int max);
+int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags);
+
+int uffd_register(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor);
+int uffd_unregister(int uffd, void *addr, uint64_t len);
+int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor, uint64_t *ioctls);
+unsigned long get_free_hugepages(void);
+bool check_vmflag_io(void *addr);
+bool check_vmflag_pfnmap(void *addr);
+bool check_vmflag_guard(void *addr);
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
+int query_procmap(struct procmap_fd *procmap);
+bool find_vma_procmap(struct procmap_fd *procmap, void *address);
+int close_procmap(struct procmap_fd *procmap);
+int write_sysfs(const char *file_path, unsigned long val);
+int read_sysfs(const char *file_path, unsigned long *val);
+bool softdirty_supported(void);
+
+static inline int open_self_procmap(struct procmap_fd *procmap_out)
+{
+ pid_t pid = getpid();
+
+ return open_procmap(pid, procmap_out);
+}
+
+/* These helpers need to be inline to match the kselftest.h idiom. */
+static char test_name[1024];
+
+static inline void log_test_start(const char *name, ...)
+{
+ va_list args;
+ va_start(args, name);
+
+ vsnprintf(test_name, sizeof(test_name), name, args);
+ ksft_print_msg("[RUN] %s\n", test_name);
+
+ va_end(args);
+}
+
+static inline void log_test_result(int result)
+{
+ ksft_test_result_report(result, "%s\n", test_name);
+}
+
+static inline int sz2ord(size_t size, size_t pagesize)
+{
+ return __builtin_ctzll(size / pagesize);
+}
+
+void *sys_mremap(void *old_address, unsigned long old_size,
+ unsigned long new_size, int flags, void *new_address);
+
+long ksm_get_self_zero_pages(void);
+long ksm_get_self_merging_pages(void);
+long ksm_get_full_scans(void);
+int ksm_use_zero_pages(void);
+int ksm_start(void);
+int ksm_stop(void);
+
+/*
+ * On ppc64 this will only work with radix 2M hugepage size
+ */
+#define HPAGE_SHIFT 21
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index d3d0d108924d..3d2d2eb9d6ff 100644..100755
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
set -e
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"
diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c
index 6a2caba19ee1..34c91f7e6128 100644
--- a/tools/testing/selftests/vm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c
@@ -28,7 +28,7 @@ enum method {
/* Global variables. */
static const char *self;
-static char *shmaddr;
+static int *shmaddr;
static int shmid;
/*
@@ -47,15 +47,17 @@ void sig_handler(int signo)
{
printf("Received %d.\n", signo);
if (signo == SIGINT) {
- printf("Deleting the memory\n");
- if (shmdt((const void *)shmaddr) != 0) {
- perror("Detach failure");
+ if (shmaddr) {
+ printf("Deleting the memory\n");
+ if (shmdt((const void *)shmaddr) != 0) {
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(4);
+ }
+
shmctl(shmid, IPC_RMID, NULL);
- exit(4);
+ printf("Done deleting the memory\n");
}
-
- shmctl(shmid, IPC_RMID, NULL);
- printf("Done deleting the memory\n");
}
exit(2);
}
@@ -87,7 +89,7 @@ int main(int argc, char **argv)
size = atoi(optarg);
break;
case 'p':
- strncpy(path, optarg, sizeof(path));
+ strncpy(path, optarg, sizeof(path) - 1);
break;
case 'm':
if (atoi(optarg) >= MAX_METHOD) {
@@ -211,7 +213,8 @@ int main(int argc, char **argv)
shmctl(shmid, IPC_RMID, NULL);
exit(2);
}
- printf("shmaddr: %p\n", ptr);
+ shmaddr = ptr;
+ printf("shmaddr: %p\n", shmaddr);
break;
default:
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/module/Makefile
index 640a40f9b72b..6132d7ddb08b 100644
--- a/tools/testing/selftests/user/Makefile
+++ b/tools/testing/selftests/module/Makefile
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
-# Makefile for user memory selftests
+# Makefile for module loading selftests
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := test_user_copy.sh
+TEST_PROGS := find_symbol.sh
include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/module/config b/tools/testing/selftests/module/config
new file mode 100644
index 000000000000..b0c206b1ad47
--- /dev/null
+++ b/tools/testing/selftests/module/config
@@ -0,0 +1,3 @@
+CONFIG_TEST_RUNTIME=y
+CONFIG_TEST_RUNTIME_MODULE=y
+CONFIG_TEST_KALLSYMS=m
diff --git a/tools/testing/selftests/module/find_symbol.sh b/tools/testing/selftests/module/find_symbol.sh
new file mode 100755
index 000000000000..2c56805c9b6e
--- /dev/null
+++ b/tools/testing/selftests/module/find_symbol.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
+# Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
+#
+# This is a stress test script for kallsyms through find_symbol()
+
+set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+test_reqs()
+{
+ if ! which modprobe 2> /dev/null > /dev/null; then
+ echo "$0: You need modprobe installed" >&2
+ exit $ksft_skip
+ fi
+
+ if ! which kmod 2> /dev/null > /dev/null; then
+ echo "$0: You need kmod installed" >&2
+ exit $ksft_skip
+ fi
+
+ if ! which perf 2> /dev/null > /dev/null; then
+ echo "$0: You need perf installed" >&2
+ exit $ksft_skip
+ fi
+
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+load_mod()
+{
+ local STATS="-e duration_time"
+ STATS="$STATS -e user_time"
+ STATS="$STATS -e system_time"
+ STATS="$STATS -e page-faults"
+ local MOD=$1
+
+ local ARCH="$(uname -m)"
+ case "${ARCH}" in
+ x86_64)
+ perf stat $STATS $MODPROBE $MOD
+ ;;
+ *)
+ time $MODPROBE $MOD
+ exit 1
+ ;;
+ esac
+}
+
+remove_all()
+{
+ $MODPROBE -r test_kallsyms_b
+ for i in a b c d; do
+ $MODPROBE -r test_kallsyms_$i
+ done
+}
+test_reqs
+
+MODPROBE=$(</proc/sys/kernel/modprobe)
+
+remove_all
+load_mod test_kallsyms_b
+remove_all
+
+# Now pollute the namespace
+$MODPROBE test_kallsyms_c
+load_mod test_kallsyms_b
+
+# Now pollute the namespace with twice the number of symbols than the last time
+remove_all
+$MODPROBE test_kallsyms_c
+$MODPROBE test_kallsyms_d
+load_mod test_kallsyms_b
+
+exit 0
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 584dc6bc3b06..d2917054fe3a 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options,
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
create_and_enter_userns();
@@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void)
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
orig_mnt_flags = read_mnt_flags(orig_path);
diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore
new file mode 100644
index 000000000000..5f74d8488472
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/.gitignore
@@ -0,0 +1 @@
+mount_setattr_test
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
new file mode 100644
index 000000000000..4d4f810cdf2c
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread
+
+LOCAL_HDRS += ../filesystems/wrappers.h
+
+TEST_GEN_PROGS := mount_setattr_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
new file mode 100644
index 000000000000..7aec3ae82a44
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -0,0 +1,2140 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/sysinfo.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <linux/mount.h>
+
+#include "../filesystems/wrappers.h"
+#include "kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#define DEFAULT_THREADS 4
+#define ptr_to_int(p) ((int)((intptr_t)(p)))
+#define int_to_ptr(u) ((void *)((intptr_t)(u)))
+
+#ifndef __NR_mount_setattr
+ #if defined __alpha__
+ #define __NR_mount_setattr 552
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_mount_setattr (442 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_mount_setattr (442 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_mount_setattr (442 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_mount_setattr (442 + 1024)
+ #else
+ #define __NR_mount_setattr 442
+ #endif
+#endif
+
+#ifndef __NR_open_tree_attr
+ #if defined __alpha__
+ #define __NR_open_tree_attr 577
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree_attr (467 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree_attr (467 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree_attr (467 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree_attr (467 + 1024)
+ #else
+ #define __NR_open_tree_attr 467
+ #endif
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000
+#endif
+
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
+static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+
+static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size);
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ unsigned int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+ ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+ ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
+ return -EINVAL;
+
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+ if (stat.f_flag & ST_NOSYMFOLLOW)
+ mnt_flags |= ST_NOSYMFOLLOW;
+
+ return mnt_flags;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+static void *mount_setattr_thread(void *data)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
+ .attr_clr = 0,
+ .propagation = MS_SHARED,
+ };
+
+ if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
+ pthread_exit(int_to_ptr(-1));
+
+ pthread_exit(int_to_ptr(0));
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+static bool mount_setattr_supported(void)
+{
+ int ret;
+
+ ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
+ if (ret < 0 && errno == ENOSYS)
+ return false;
+
+ return true;
+}
+
+FIXTURE(mount_setattr) {
+};
+
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
+FIXTURE_SETUP(mount_setattr)
+{
+ int fd = -EBADF;
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+
+ ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
+
+ ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, invalid_attributes)
+{
+ struct mount_attr invalid_attr = {
+ .attr_set = (1U << 31),
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = 0;
+ invalid_attr.attr_clr = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_clr = 0;
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = (1U << 31);
+ invalid_attr.attr_clr = (1U << 31);
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+}
+
+TEST_F(mount_setattr, extensibility)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ char *s = "dummy";
+ struct mount_attr invalid_attr = {};
+ struct mount_attr_large {
+ struct mount_attr attr1;
+ struct mount_attr attr2;
+ struct mount_attr attr3;
+ } large_attr = {};
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EFAULT);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = 0;
+ large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, basic)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+}
+
+TEST_F(mount_setattr, basic_recursive)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_RDONLY;
+ attr.propagation = MS_SHARED;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RDONLY;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open for writing so this needs to fail somewhere
+ * in the middle and the mount options need to be unchanged.
+ */
+ attr.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, mount_has_writers)
+{
+ int fd, dfd;
+ unsigned int old_flags = 0, new_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ .propagation = MS_SHARED,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open to a mount somwhere in the middle so this
+ * needs to fail somewhere in the middle. After this the mount options
+ * need to be unchanged.
+ */
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
+
+ dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(dfd, 0);
+ EXPECT_EQ(fsync(dfd), 0);
+ EXPECT_EQ(close(dfd), 0);
+
+ EXPECT_EQ(fsync(fd), 0);
+ EXPECT_EQ(close(fd), 0);
+
+ /* All writers are gone so this should succeed. */
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+}
+
+TEST_F(mount_setattr, mixed_mount_options)
+{
+ unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
+ .attr_set = MOUNT_ATTR_RELATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags1 = read_mnt_flags("/mnt/B");
+ ASSERT_GT(old_flags1, 0);
+
+ old_flags2 = read_mnt_flags("/mnt/B/BB");
+ ASSERT_GT(old_flags2, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, time_changes)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = 0;
+ attr.attr_clr = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_clr = MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_NOATIME;
+ expected_flags |= MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_NOATIME;
+ attr.attr_set |= MOUNT_ATTR_RELATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_RELATIME;
+ attr.attr_set |= MOUNT_ATTR_STRICTATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
+ attr.attr_set |= MOUNT_ATTR_NOATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags |= MS_NOATIME;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_NODIRATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, multi_threaded)
+{
+ int i, j, nthreads, ret = 0;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ pthread_attr_t pattr;
+ pthread_t threads[DEFAULT_THREADS];
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ /* Try to change mount options from multiple threads. */
+ nthreads = get_nprocs_conf();
+ if (nthreads > DEFAULT_THREADS)
+ nthreads = DEFAULT_THREADS;
+
+ pthread_attr_init(&pattr);
+ for (i = 0; i < nthreads; i++)
+ ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
+
+ for (j = 0; j < i; j++) {
+ void *retptr = NULL;
+
+ EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
+
+ ret += ptr_to_int(retptr);
+ EXPECT_EQ(ret, 0);
+ }
+ pthread_attr_destroy(&pattr);
+
+ ASSERT_EQ(ret, 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOSUID;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+}
+
+TEST_F(mount_setattr, wrong_user_namespace)
+{
+ int ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+ ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, wrong_mount_namespace)
+{
+ int fd, ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(mount_setattr_idmapped) {
+};
+
+FIXTURE_SETUP(mount_setattr_idmapped)
+{
+ int img_fd = -EBADF;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=2m,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
+ ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
+ img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
+ ASSERT_GE(img_fd, 0);
+ ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0);
+ ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
+ ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
+ ASSERT_EQ(close(img_fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr_idmapped)
+{
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+/**
+ * Validate that negative fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_negative)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = -EBADF,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with negative fd");
+ }
+}
+
+/**
+ * Validate that excessively large fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_large)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = INT64_MAX,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with too large fd value");
+ }
+}
+
+/**
+ * Validate that closed fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_closed)
+{
+ int fd;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_GE(close(fd), 0);
+
+ attr.userns_fd = fd;
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with closed fd");
+ }
+}
+
+/**
+ * Validate that the initial user namespace is rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(errno, EPERM);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
+ unsigned long range)
+{
+ char map[100], procfile[256];
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+ return 0;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ return kill(getpid(), SIGSTOP);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ int ret;
+ pid_t pid;
+ char path[256];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids(pid, nsid, hostid, range);
+ if (ret < 0)
+ return ret;
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ ret = open(path, O_RDONLY | O_CLOEXEC);
+ kill(pid, SIGKILL);
+ wait_for_pid(pid);
+ return ret;
+}
+
+/**
+ * Validate that an attached mount in our mount namespace cannot be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that idmapping a mount is rejected if the mount's mount namespace
+ * and our mount namespace don't match.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+ sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that a detached mount not in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
+/**
+ * Validate that currently changing the idmapping of an idmapped mount fails.
+ */
+TEST_F(mount_setattr_idmapped, change_idmapping)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0));
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
+ /* Change idmapping on a detached mount that is already idmapped. */
+ attr.userns_fd = get_userns_fd(0, 20000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
+ AT_RECURSIVE |
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
+
+ (void)umount2("/mnt/A", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= ST_NOSYMFOLLOW;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ELOOP);
+
+ attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+ attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~ST_NOSYMFOLLOW;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, open_tree_detached)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /AA testing tmpfs
+ * `-/AA/B testing tmpfs
+ * `-/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target1 testing tmpfs
+ * `-/tmp/target1/B testing tmpfs
+ * `-/tmp/target1/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target2 testing tmpfs
+ * |-/tmp/target2/A testing tmpfs
+ * | `-/tmp/target2/A/AA testing tmpfs
+ * | `-/tmp/target2/A/AA/B testing tmpfs
+ * | `-/tmp/target2/A/AA/B/BB testing tmpfs
+ * `-/tmp/target2/B testing ramfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /*
+ * The origin mount namespace of the anonymous mount namespace
+ * of @fd_tree_base doesn't match the caller's mount namespace
+ * anymore so creation of another detached mounts must fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail2)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+
+ /*
+ * The caller entered a new user namespace. They will have
+ * CAP_SYS_ADMIN in this user namespace. However, they're still
+ * located in a mount namespace that is owned by an ancestor
+ * user namespace in which they hold no privilege. Creating a
+ * detached mount must thus fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail3)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(prepare_unpriv_mountns(), 0);
+
+ /*
+ * The caller entered a new mount namespace. They will have
+ * CAP_SYS_ADMIN in the owning user namespace of their mount
+ * namespace.
+ *
+ * However, the origin mount namespace of the anonymous mount
+ * namespace of @fd_tree_base doesn't match the caller's mount
+ * namespace anymore so creation of another detached mounts must
+ * fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_subfolder)
+{
+ int fd_context, fd_tmpfs, fd_tree;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+
+ EXPECT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
+
+ fd_tree = sys_open_tree(fd_tmpfs, "subdir",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ EXPECT_EQ(close(fd_tmpfs), 0);
+
+ ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree), 0);
+
+ ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
+
+ EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+ __u64 mnt_id = 0;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ mnt_id = stx.stx_mnt_id;
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ ASSERT_EQ(stx.stx_mnt_id, mnt_id);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, move_mount_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Not allowed to move an attached mount to a detached mount. */
+ ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
+{
+ int fd_tree = -EBADF;
+ struct statx stx;
+
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
+ /* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
+
+ /*
+ * This tests whether dissolve_on_fput() handles a NULL mount
+ * namespace correctly, i.e., that it doesn't splat.
+ */
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
+TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Move the source detached mount tree to the target detached
+ * mount tree. This will move all the mounts in the source mount
+ * tree from the source anonymous mount namespace to the target
+ * anonymous mount namespace.
+ *
+ * The source detached mount tree and the target detached mount
+ * tree now both refer to the same anonymous mount namespace.
+ *
+ * |-"" testing ramfs
+ * `-"" testing tmpfs
+ * `-""/AA testing tmpfs
+ * `-""/AA/B testing tmpfs
+ * `-""/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ /*
+ * The source detached mount tree @fd_tree1 is now an attached
+ * mount, i.e., it has a parent. Specifically, it now has the
+ * root mount of the mount tree of @fd_tree2 as its parent.
+ *
+ * That means we are no longer allowed to attach it as we only
+ * allow attaching the root of an anonymous mount tree, not
+ * random bits and pieces. Verify that the kernel enforces this.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * Closing the source detached mount tree must not unmount and
+ * free the shared anonymous mount namespace. The kernel will
+ * quickly yell at us because the anonymous mount namespace
+ * won't be empty when it's freed.
+ */
+ EXPECT_EQ(close(fd_tree1), 0);
+
+ /*
+ * Attach the mount tree to a non-anonymous mount namespace.
+ * This can only succeed if closing fd_tree1 had proper
+ * semantics and didn't cause the anonymous mount namespace to
+ * be freed. If it did this will trigger a UAF which will be
+ * visible on any KASAN enabled kernel.
+ *
+ * |-/tmp/target1 testing ramfs
+ * `-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ EXPECT_EQ(close(fd_tree2), 0);
+}
+
+TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file
+ * that refers to the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * This must fail as this would mean adding the same mount tree
+ * into the same mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file that
+ * refers to a subtree of the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * This must fail as it is only possible to attach the root of a
+ * detached mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, detached_tree_propagation)
+{
+ int fd_tree = -EBADF;
+ struct statx stx1, stx2, stx3, stx4;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
+
+ /*
+ * Copying the mount namespace like done above doesn't alter the
+ * mounts in any way so the filesystem mounted on /mnt must be
+ * identical even though the mounts will differ. Use the device
+ * information to verify that. Note that tmpfs will have a 0
+ * major number so comparing the major number is misleading.
+ */
+ ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
+
+ /* Mount a tmpfs filesystem over /mnt/A. */
+ ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
+
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
+
+ /*
+ * A new filesystem has been mounted on top of /mnt/A which
+ * means that the device information will be different for any
+ * statx() that was taken from /mnt/A before the mount compared
+ * to one after the mount.
+ */
+ ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
+ ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
+
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/move_mount_set_group/.gitignore b/tools/testing/selftests/move_mount_set_group/.gitignore
new file mode 100644
index 000000000000..f5e339268720
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/.gitignore
@@ -0,0 +1 @@
+move_mount_set_group_test
diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile
new file mode 100644
index 000000000000..94235846b6f9
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2
+
+TEST_GEN_FILES += move_mount_set_group_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/move_mount_set_group/config b/tools/testing/selftests/move_mount_set_group/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
new file mode 100644
index 000000000000..12434415ec36
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <sys/syscall.h>
+
+#include "kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MOVE_MOUNT_SET_GROUP
+#define MOVE_MOUNT_SET_GROUP 0x00000100
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
+#endif
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
+#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
+
+static bool move_mount_set_group_supported(void)
+{
+ int ret;
+
+ if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
+ return -1;
+
+ if (mkdir(SET_GROUP_FROM, 0777))
+ return -1;
+
+ if (mkdir(SET_GROUP_TO, 0777))
+ return -1;
+
+ if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
+ return -1;
+
+ if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
+ return -1;
+
+ ret = syscall(__NR_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
+ umount2("/tmp", MNT_DETACH);
+
+ return ret >= 0;
+}
+
+FIXTURE(move_mount_set_group) {
+};
+
+#define SET_GROUP_A "/tmp/A"
+
+FIXTURE_SETUP(move_mount_set_group)
+{
+ bool ret;
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
+
+ ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+}
+
+FIXTURE_TEARDOWN(move_mount_set_group)
+{
+ bool ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+struct child_args {
+ int unsfd;
+ int mntnsfd;
+ bool shared;
+ int mntfd;
+};
+
+static int get_nestedns_mount_cb(void *data)
+{
+ struct child_args *ca = (struct child_args *)data;
+ int ret;
+
+ ret = prepare_unpriv_mountns();
+ if (ret)
+ return 1;
+
+ if (ca->shared) {
+ ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
+ if (ret)
+ return 1;
+ }
+
+ ret = open("/proc/self/ns/user", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->unsfd = ret;
+
+ ret = open("/proc/self/ns/mnt", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntnsfd = ret;
+
+ ret = open(SET_GROUP_A, O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntfd = ret;
+
+ return 0;
+}
+
+TEST_F(move_mount_set_group, complex_sharing_copying)
+{
+ struct child_args ca_from = {
+ .shared = true,
+ };
+ struct child_args ca_to = {
+ .shared = false,
+ };
+ pid_t pid;
+ bool ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ ASSERT_EQ(syscall(__NR_move_mount, ca_from.mntfd, "",
+ ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
+ | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
+ 0);
+
+ ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
+ ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index 9403ac01ba11..b16029c40c0f 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,7 +33,7 @@
#include <mqueue.h>
#include <error.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *usage =
"Usage:\n"
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c..303c46eebd94 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -35,11 +35,12 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
+#include <sys/param.h>
#include <mqueue.h>
#include <popt.h>
#include <error.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *usage =
"Usage:\n"
@@ -73,7 +74,6 @@ static char *usage =
char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
-#define min(a, b) ((a) < (b) ? (a) : (b))
#define MAX_CPUS 64
char *cpu_option_string;
int cpus_to_pin[MAX_CPUS];
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
if (in_shutdown++)
return;
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i]) {
pthread_kill(cpu_threads[i], SIGUSR1);
@@ -320,7 +323,8 @@ void *fake_cont_thread(void *arg)
void *cont_thread(void *arg)
{
char buff[MSG_SIZE];
- int i, priority;
+ int i;
+ unsigned int priority;
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i] == pthread_self())
@@ -422,7 +426,8 @@ struct test test2[] = {
void *perf_test_thread(void *arg)
{
char buff[MSG_SIZE];
- int prio_out, prio_in;
+ int prio_out;
+ unsigned int prio_in;
int i;
clockid_t clock;
pthread_t *t;
@@ -551,7 +556,13 @@ int main(int argc, char *argv[])
perror("sysconf(_SC_NPROCESSORS_ONLN)");
exit(1);
}
- cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ cpus_online = MIN(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
cpu_set = CPU_ALLOC(cpus_online);
if (cpu_set == NULL) {
perror("CPU_ALLOC()");
@@ -589,7 +600,7 @@ int main(int argc, char *argv[])
cpu_set)) {
fprintf(stderr, "Any given CPU may "
"only be given once.\n");
- exit(1);
+ goto err_code;
} else
CPU_SET_S(cpus_to_pin[cpu],
cpu_set_size, cpu_set);
@@ -607,7 +618,7 @@ int main(int argc, char *argv[])
queue_path = malloc(strlen(option) + 2);
if (!queue_path) {
perror("malloc()");
- exit(1);
+ goto err_code;
}
queue_path[0] = '/';
queue_path[1] = 0;
@@ -622,17 +633,12 @@ int main(int argc, char *argv[])
fprintf(stderr, "Must pass at least one CPU to continuous "
"mode.\n");
poptPrintUsage(popt_context, stderr, 0);
- exit(1);
+ goto err_code;
} else if (!continuous_mode) {
num_cpus_to_pin = 1;
cpus_to_pin[0] = cpus_online - 1;
}
- if (getuid() != 0)
- ksft_exit_skip("Not running as root, but almost all tests "
- "require root in order to modify\nsystem settings. "
- "Exiting.\n");
-
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
@@ -740,4 +746,9 @@ int main(int argc, char *argv[])
sleep(1);
}
shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
}
diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting
new file mode 100644
index 000000000000..a953c96aa16e
--- /dev/null
+++ b/tools/testing/selftests/mqueue/setting
@@ -0,0 +1 @@
+timeout=180
diff --git a/tools/testing/selftests/mseal_system_mappings/.gitignore b/tools/testing/selftests/mseal_system_mappings/.gitignore
new file mode 100644
index 000000000000..319c497a595e
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sysmap_is_sealed
diff --git a/tools/testing/selftests/mseal_system_mappings/Makefile b/tools/testing/selftests/mseal_system_mappings/Makefile
new file mode 100644
index 000000000000..2b4504e2f52f
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := sysmap_is_sealed
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mseal_system_mappings/config b/tools/testing/selftests/mseal_system_mappings/config
new file mode 100644
index 000000000000..675cb9f37b86
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/config
@@ -0,0 +1 @@
+CONFIG_MSEAL_SYSTEM_MAPPINGS=y
diff --git a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
new file mode 100644
index 000000000000..cb0ca6ed7ebe
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * test system mappings are sealed when
+ * KCONFIG_MSEAL_SYSTEM_MAPPINGS=y
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "kselftest.h"
+#include "kselftest_harness.h"
+
+#define VMFLAGS "VmFlags:"
+#define MSEAL_FLAGS "sl"
+#define MAX_LINE_LEN 512
+
+bool has_mapping(char *name, FILE *maps)
+{
+ char line[MAX_LINE_LEN];
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (strstr(line, name))
+ return true;
+ }
+
+ return false;
+}
+
+bool mapping_is_sealed(char *name, FILE *maps)
+{
+ char line[MAX_LINE_LEN];
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (!strncmp(line, VMFLAGS, strlen(VMFLAGS))) {
+ if (strstr(line, MSEAL_FLAGS))
+ return true;
+
+ return false;
+ }
+ }
+
+ return false;
+}
+
+FIXTURE(basic) {
+ FILE *maps;
+};
+
+FIXTURE_SETUP(basic)
+{
+ self->maps = fopen("/proc/self/smaps", "r");
+ if (!self->maps)
+ SKIP(return, "Could not open /proc/self/smap, errno=%d",
+ errno);
+};
+
+FIXTURE_TEARDOWN(basic)
+{
+ if (self->maps)
+ fclose(self->maps);
+};
+
+FIXTURE_VARIANT(basic)
+{
+ char *name;
+ bool sealed;
+};
+
+FIXTURE_VARIANT_ADD(basic, vdso) {
+ .name = "[vdso]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vvar) {
+ .name = "[vvar]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vvar_vclock) {
+ .name = "[vvar_vclock]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, sigpage) {
+ .name = "[sigpage]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vectors) {
+ .name = "[vectors]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, uprobes) {
+ .name = "[uprobes]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, stack) {
+ .name = "[stack]",
+ .sealed = false,
+};
+
+TEST_F(basic, check_sealed)
+{
+ if (!has_mapping(variant->name, self->maps)) {
+ SKIP(return, "could not find the mapping, %s",
+ variant->name);
+ }
+
+ EXPECT_EQ(variant->sealed,
+ mapping_is_sealed(variant->name, self->maps));
+};
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore
new file mode 100644
index 000000000000..0989e80da457
--- /dev/null
+++ b/tools/testing/selftests/namespaces/.gitignore
@@ -0,0 +1,12 @@
+nsid_test
+file_handle_test
+init_ino_test
+ns_active_ref_test
+listns_test
+listns_permissions_test
+listns_efault_test
+siocgskns_test
+cred_change_test
+stress_test
+listns_pagination_bug
+regression_pidfd_setns_test
diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile
new file mode 100644
index 000000000000..fbb821652c17
--- /dev/null
+++ b/tools/testing/selftests/namespaces/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := nsid_test \
+ file_handle_test \
+ init_ino_test \
+ ns_active_ref_test \
+ listns_test \
+ listns_permissions_test \
+ listns_efault_test \
+ siocgskns_test \
+ cred_change_test \
+ stress_test \
+ listns_pagination_bug \
+ regression_pidfd_setns_test
+
+include ../lib.mk
+
+$(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c
+$(OUTPUT)/listns_test: ../filesystems/utils.c
+$(OUTPUT)/listns_permissions_test: ../filesystems/utils.c
+$(OUTPUT)/listns_efault_test: ../filesystems/utils.c
+$(OUTPUT)/siocgskns_test: ../filesystems/utils.c
+$(OUTPUT)/cred_change_test: ../filesystems/utils.c
+$(OUTPUT)/stress_test: ../filesystems/utils.c
+$(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c
+$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c
+
diff --git a/tools/testing/selftests/namespaces/config b/tools/testing/selftests/namespaces/config
new file mode 100644
index 000000000000..d09836260262
--- /dev/null
+++ b/tools/testing/selftests/namespaces/config
@@ -0,0 +1,7 @@
+CONFIG_UTS_NS=y
+CONFIG_TIME_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/namespaces/cred_change_test.c b/tools/testing/selftests/namespaces/cred_change_test.c
new file mode 100644
index 000000000000..7b4f5ad3f725
--- /dev/null
+++ b/tools/testing/selftests/namespaces/cred_change_test.c
@@ -0,0 +1,814 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test credential changes and their impact on namespace active references.
+ */
+
+/*
+ * Test setuid() in a user namespace properly swaps active references.
+ * Create a user namespace with multiple UIDs mapped, then setuid() between them.
+ * Verify that the user namespace remains active throughout.
+ */
+TEST(setuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setuid_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with multiple UIDs mapped (0-9) */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send namespace ID to parent */
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /*
+ * Perform multiple setuid() calls.
+ * Each setuid() triggers commit_creds() which should properly
+ * swap active references via switch_cred_namespaces().
+ */
+ for (setuid_count = 0; setuid_count < 50; setuid_count++) {
+ uid_t target_uid = (setuid_count % 10);
+ if (setuid(target_uid) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Child user namespace ID: %llu", (unsigned long long)userns_id);
+
+ /* Verify namespace is active while child is running */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive after child exits */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setgid() in a user namespace properly handles active references.
+ */
+TEST(setgid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setgid_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with multiple GIDs mapped */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setgid() calls */
+ for (setgid_count = 0; setgid_count < 50; setgid_count++) {
+ gid_t target_gid = (setgid_count % 10);
+ if (setgid(target_gid) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setgid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setresuid() which changes real, effective, and saved UIDs.
+ * This should properly swap active references via commit_creds().
+ */
+TEST(setresuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int setres_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setresuid() calls */
+ for (setres_count = 0; setres_count < 30; setres_count++) {
+ uid_t uid1 = (setres_count % 5);
+ uid_t uid2 = ((setres_count + 1) % 5);
+ uid_t uid3 = ((setres_count + 2) % 5);
+
+ if (setresuid(uid1, uid2, uid3) < 0) {
+ if (errno != EPERM) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setresuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test credential changes across multiple user namespaces.
+ * Create nested user namespaces and verify active reference tracking.
+ */
+TEST(cred_change_nested_userns)
+{
+ pid_t pid;
+ int status;
+ __u64 parent_userns_id, child_userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found_parent = false, found_child = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 parent_id, child_id;
+ uid_t orig_uid = getuid();
+
+ close(pipefd[0]);
+
+ /* Create first user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 1);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get first namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create nested user namespace */
+ userns_fd = get_userns_fd(0, 0, 1);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get nested namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send both IDs to parent */
+ write(pipefd[1], &parent_id, sizeof(parent_id));
+ write(pipefd[1], &child_id, sizeof(child_id));
+
+ /* Perform some credential changes in nested namespace */
+ setuid(0);
+ setgid(0);
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ /* Read both namespace IDs */
+ if (read(pipefd[0], &parent_userns_id, sizeof(parent_userns_id)) != sizeof(parent_userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get parent namespace ID");
+ }
+
+ if (read(pipefd[0], &child_userns_id, sizeof(child_userns_id)) != sizeof(child_userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get child namespace ID");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Parent userns: %llu, Child userns: %llu",
+ (unsigned long long)parent_userns_id,
+ (unsigned long long)child_userns_id);
+
+ /* Verify both namespaces are active */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_userns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_userns_id)
+ found_child = true;
+ }
+
+ ASSERT_TRUE(found_parent);
+ ASSERT_TRUE(found_child);
+
+ /* Wait for child */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify both namespaces become inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ found_parent = false;
+ found_child = false;
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_userns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_userns_id)
+ found_child = true;
+ }
+
+ ASSERT_FALSE(found_parent);
+ ASSERT_FALSE(found_child);
+ TH_LOG("Nested user namespace credential changes preserved active refs (no leak)");
+}
+
+/*
+ * Test rapid credential changes don't cause refcount imbalances.
+ * This stress-tests the switch_cred_namespaces() logic.
+ */
+TEST(rapid_cred_changes_no_leak)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int change_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace with wider range of UIDs/GIDs */
+ userns_fd = get_userns_fd(0, orig_uid, 100);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /*
+ * Perform many rapid credential changes.
+ * Mix setuid, setgid, setreuid, setregid, setresuid, setresgid.
+ */
+ for (change_count = 0; change_count < 200; change_count++) {
+ switch (change_count % 6) {
+ case 0:
+ setuid(change_count % 50);
+ break;
+ case 1:
+ setgid(change_count % 50);
+ break;
+ case 2:
+ setreuid(change_count % 50, (change_count + 1) % 50);
+ break;
+ case 3:
+ setregid(change_count % 50, (change_count + 1) % 50);
+ break;
+ case 4:
+ setresuid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+ break;
+ case 5:
+ setresgid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+ break;
+ }
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ TH_LOG("Testing with user namespace ID: %llu", (unsigned long long)userns_id);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive (no leaked active refs) */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("200 rapid credential changes completed with no active ref leak");
+}
+
+/*
+ * Test setfsuid/setfsgid which change filesystem UID/GID.
+ * These also trigger credential changes but may have different code paths.
+ */
+TEST(setfsuid_preserves_active_refs)
+{
+ pid_t pid;
+ int status;
+ __u64 userns_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ ssize_t ret;
+ int i;
+ bool found = false;
+ int pipefd[2];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ int fd, userns_fd;
+ __u64 child_userns_id;
+ uid_t orig_uid = getuid();
+ int change_count;
+
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ userns_fd = get_userns_fd(0, orig_uid, 10);
+ if (userns_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+ close(userns_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(userns_fd);
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+ /* Perform multiple setfsuid/setfsgid calls */
+ for (change_count = 0; change_count < 50; change_count++) {
+ setfsuid(change_count % 10);
+ setfsgid(change_count % 10);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+ close(pipefd[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace ID from child");
+ }
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Verify namespace becomes inactive */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ ASSERT_GE(ret, 0);
+ }
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == userns_id) {
+ found = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found);
+ TH_LOG("setfsuid/setfsgid correctly preserved active references (no leak)");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/file_handle_test.c b/tools/testing/selftests/namespaces/file_handle_test.c
new file mode 100644
index 000000000000..064b41ad96b2
--- /dev/null
+++ b/tools/testing/selftests/namespaces/file_handle_test.c
@@ -0,0 +1,1429 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include "kselftest_harness.h"
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#endif
+
+TEST(nsfs_net_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open a namespace file descriptor */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT as unprivileged user */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ if (fd < 0 && errno == EPERM) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "Permission denied for unprivileged user (expected)");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_uts_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open UTS namespace file descriptor */
+ ns_fd = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_ipc_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open IPC namespace file descriptor */
+ ns_fd = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_pid_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open PID namespace file descriptor */
+ ns_fd = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_mnt_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open mount namespace file descriptor */
+ ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_user_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open user namespace file descriptor */
+ ns_fd = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_cgroup_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open cgroup namespace file descriptor */
+ ns_fd = open("/proc/self/ns/cgroup", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); return, "cgroup namespace not available");
+ }
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_time_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open time namespace file descriptor */
+ ns_fd = open("/proc/self/ns/time", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); return, "time namespace not available");
+ }
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_user_net_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current network namespace */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create network namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's network namespace handle from new user+net namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new network namespace");
+ }
+
+ /* Should fail with permission denied since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_uts_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current UTS namespace */
+ ns_fd = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new UTS namespace */
+ ret = unshare(CLONE_NEWUTS);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create UTS namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's UTS namespace handle from new user+uts namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new UTS namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_ipc_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current IPC namespace */
+ ns_fd = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new IPC namespace */
+ ret = unshare(CLONE_NEWIPC);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create IPC namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's IPC namespace handle from new user+ipc namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new IPC namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_mnt_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current mount namespace */
+ ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new mount namespace */
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create mount namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's mount namespace handle from new user+mnt namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new mount namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_cgroup_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current cgroup namespace */
+ ns_fd = open("/proc/self/ns/cgroup", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); close(pipefd[0]); close(pipefd[1]);
+ return, "cgroup namespace not available");
+ }
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new cgroup namespace */
+ ret = unshare(CLONE_NEWCGROUP);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create cgroup namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's cgroup namespace handle from new user+cgroup namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new cgroup namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_pid_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current PID namespace */
+ ns_fd = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new PID namespace - requires fork to take effect */
+ ret = unshare(CLONE_NEWPID);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create PID namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Fork again for PID namespace to take effect */
+ pid_t child_pid = fork();
+ if (child_pid < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to fork in PID namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ if (child_pid == 0) {
+ /* Grandchild in new PID namespace */
+ /* Try to open parent's PID namespace handle from new user+pid namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S",
+ 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child_pid, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new PID namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_time_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current time namespace */
+ ns_fd = open("/proc/self/ns/time", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); close(pipefd[0]); close(pipefd[1]);
+ return, "time namespace not available");
+ }
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new time namespace - requires fork to take effect */
+ ret = unshare(CLONE_NEWTIME);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create time namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Fork again for time namespace to take effect */
+ pid_t child_pid = fork();
+ if (child_pid < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to fork in time namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ if (child_pid == 0) {
+ /* Grandchild in new time namespace */
+ /* Try to open parent's time namespace handle from new user+time namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S",
+ 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child_pid, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new time namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_open_flags)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open a namespace file descriptor */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Test invalid flags that should fail */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_WRONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDWR);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TRUNC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECT);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TMPFILE);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECTORY);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ENOTDIR);
+
+ close(ns_fd);
+ free(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/init_ino_test.c b/tools/testing/selftests/namespaces/init_ino_test.c
new file mode 100644
index 000000000000..e4394a2fa0a9
--- /dev/null
+++ b/tools/testing/selftests/namespaces/init_ino_test.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/nsfs.h>
+
+#include "kselftest_harness.h"
+
+struct ns_info {
+ const char *name;
+ const char *proc_path;
+ unsigned int expected_ino;
+};
+
+static struct ns_info namespaces[] = {
+ { "ipc", "/proc/1/ns/ipc", IPC_NS_INIT_INO },
+ { "uts", "/proc/1/ns/uts", UTS_NS_INIT_INO },
+ { "user", "/proc/1/ns/user", USER_NS_INIT_INO },
+ { "pid", "/proc/1/ns/pid", PID_NS_INIT_INO },
+ { "cgroup", "/proc/1/ns/cgroup", CGROUP_NS_INIT_INO },
+ { "time", "/proc/1/ns/time", TIME_NS_INIT_INO },
+ { "net", "/proc/1/ns/net", NET_NS_INIT_INO },
+ { "mnt", "/proc/1/ns/mnt", MNT_NS_INIT_INO },
+};
+
+TEST(init_namespace_inodes)
+{
+ struct stat st;
+
+ for (int i = 0; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) {
+ int ret = stat(namespaces[i].proc_path, &st);
+
+ /* Some namespaces might not be available (e.g., time namespace on older kernels) */
+ if (ret < 0) {
+ if (errno == ENOENT) {
+ ksft_test_result_skip("%s namespace not available\n",
+ namespaces[i].name);
+ continue;
+ }
+ ASSERT_GE(ret, 0)
+ TH_LOG("Failed to stat %s: %s",
+ namespaces[i].proc_path, strerror(errno));
+ }
+
+ ASSERT_EQ(st.st_ino, namespaces[i].expected_ino)
+ TH_LOG("Namespace %s has inode 0x%lx, expected 0x%x",
+ namespaces[i].name, st.st_ino, namespaces[i].expected_ino);
+
+ ksft_print_msg("Namespace %s: inode 0x%lx matches expected 0x%x\n",
+ namespaces[i].name, st.st_ino, namespaces[i].expected_ino);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tools/testing/selftests/namespaces/listns_efault_test.c
new file mode 100644
index 000000000000..c7ed4023d7a8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_efault_test.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "../pidfd/pidfd.h"
+#include "wrappers.h"
+
+/*
+ * Test listns() error handling with invalid buffer addresses.
+ *
+ * When the buffer pointer is invalid (e.g., crossing page boundaries
+ * into unmapped memory), listns() returns EINVAL.
+ *
+ * This test also creates mount namespaces that get destroyed during
+ * iteration, testing that namespace cleanup happens outside the RCU
+ * read lock.
+ */
+TEST(listns_partial_fault_with_ns_cleanup)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[5];
+ int sv[5][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /*
+ * Map two pages:
+ * - First page: readable and writable
+ * - Second page: will be unmapped to trigger EFAULT
+ */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ /* Unmap the second page */
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Position the buffer pointer so there's room for exactly one u64
+ * before the page boundary. The second u64 would fall into the
+ * unmapped page.
+ */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 1;
+
+ /*
+ * Create a separate process to run listns() in a loop concurrently
+ * with namespace creation and destruction.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Global listing */
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * The kernel should:
+ * 1. Successfully write the first namespace ID (within valid page)
+ * 2. Fail with EFAULT when trying to write the second ID (unmapped page)
+ * 3. Handle concurrent namespace destruction without deadlock
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 2, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /*
+ * Create several child processes, each in its own mount namespace.
+ * These will be destroyed while the iterator is running listns().
+ */
+ for (i = 0; i < 5; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Child: create a couple of tmpfs mounts */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 5; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /*
+ * Signal children to exit. This will destroy their mount namespaces
+ * while listns() is iterating the namespace tree.
+ * This tests that cleanup happens outside the RCU read lock.
+ */
+ for (i = 0; i < 5; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for all mount namespace children to exit and cleanup */
+ for (i = 0; i < 5; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ /* Clean up */
+ munmap(map, page_size);
+}
+
+/*
+ * Test listns() error handling when the entire buffer is invalid.
+ * This is a sanity check that basic invalid pointer detection works.
+ */
+TEST(listns_complete_fault)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 *ns_ids;
+ ssize_t ret;
+
+ /* Use a clearly invalid pointer */
+ ns_ids = (__u64 *)0xdeadbeef;
+
+ ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+
+ /* Should fail with EFAULT */
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() error handling when the buffer is NULL.
+ */
+TEST(listns_null_buffer)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ ssize_t ret;
+
+ /* NULL buffer with non-zero count should fail */
+ ret = sys_listns(&req, NULL, 10, 0);
+
+ if (ret == -1 && errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+
+ /* Should fail with EFAULT */
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() with a buffer that becomes invalid mid-iteration
+ * (after several successful writes), combined with mount namespace
+ * destruction to test RCU cleanup logic.
+ */
+TEST(listns_late_fault_with_ns_cleanup)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[10];
+ int sv[10][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /* Map two pages */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ /* Unmap the second page */
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Position buffer so we can write several u64s successfully
+ * before hitting the page boundary.
+ */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 5;
+
+ /*
+ * Create a separate process to run listns() concurrently.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * Request 10 namespace IDs while namespaces are being destroyed.
+ * This tests:
+ * 1. EFAULT handling when buffer becomes invalid
+ * 2. Namespace cleanup outside RCU read lock during iteration
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /*
+ * Create more children with mount namespaces to increase the
+ * likelihood that namespace cleanup happens during iteration.
+ */
+ for (i = 0; i < 10; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Child: create tmpfs mounts */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 10; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /* Kill half the children */
+ for (i = 0; i < 5; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Small delay to let some exit */
+ usleep(10000);
+
+ /* Kill remaining children */
+ for (i = 5; i < 10; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for all children and cleanup */
+ for (i = 0; i < 10; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ /* Clean up */
+ munmap(map, page_size);
+}
+
+/*
+ * Test specifically focused on mount namespace cleanup during EFAULT.
+ * Filter for mount namespaces only.
+ */
+TEST(listns_mnt_ns_cleanup_on_fault)
+{
+ void *map;
+ __u64 *ns_ids;
+ ssize_t ret;
+ long page_size;
+ pid_t pid, iter_pid;
+ int pidfds[8];
+ int sv[8][2];
+ int iter_pidfd;
+ int i, status;
+ char c;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ ASSERT_GT(page_size, 0);
+
+ /* Set up partial fault buffer */
+ map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(map, MAP_FAILED);
+
+ ret = munmap((char *)map + page_size, page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* Position for 3 successful writes, then fault */
+ ns_ids = ((__u64 *)((char *)map + page_size)) - 3;
+
+ /*
+ * Create a separate process to run listns() concurrently.
+ */
+ iter_pid = create_child(&iter_pidfd, 0);
+ ASSERT_NE(iter_pid, -1);
+
+ if (iter_pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNS, /* Only mount namespaces */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ int iter_ret;
+
+ /*
+ * Loop calling listns() until killed.
+ * Call listns() to race with namespace destruction.
+ */
+ while (1) {
+ iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+ if (iter_ret == -1 && errno == ENOSYS)
+ _exit(PIDFD_SKIP);
+ }
+ }
+
+ /* Small delay to let iterator start looping */
+ usleep(50000);
+
+ /* Create children with mount namespaces */
+ for (i = 0; i < 8; i++) {
+ /* Create socketpair for synchronization */
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+ pid = create_child(&pidfds[i], CLONE_NEWNS);
+ ASSERT_NE(pid, -1);
+
+ if (pid == 0) {
+ close(sv[i][0]); /* Close parent end */
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ _exit(1);
+
+ /* Do some mount operations to make cleanup more interesting */
+ if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+ if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+ _exit(1);
+
+ if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+ if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+ _exit(1);
+
+ /* Signal parent that setup is complete */
+ if (write_nointr(sv[i][1], "R", 1) != 1)
+ _exit(1);
+
+ /* Wait for parent to signal us to exit */
+ if (read_nointr(sv[i][1], &c, 1) != 1)
+ _exit(1);
+
+ close(sv[i][1]);
+ _exit(0);
+ }
+
+ close(sv[i][1]); /* Close child end */
+ }
+
+ /* Wait for all children to finish setup */
+ for (i = 0; i < 8; i++) {
+ ret = read_nointr(sv[i][0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ ASSERT_EQ(c, 'R');
+ }
+
+ /* Kill children to trigger namespace destruction during iteration */
+ for (i = 0; i < 8; i++)
+ write_nointr(sv[i][0], "X", 1);
+
+ /* Wait for children and cleanup */
+ for (i = 0; i < 8; i++) {
+ waitpid(-1, NULL, 0);
+ close(sv[i][0]);
+ close(pidfds[i]);
+ }
+
+ /* Kill iterator and wait for it */
+ sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+ ret = waitpid(iter_pid, &status, 0);
+ ASSERT_EQ(ret, iter_pid);
+ close(iter_pidfd);
+
+ /* Should have been killed */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ munmap(map, page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_pagination_bug.c b/tools/testing/selftests/namespaces/listns_pagination_bug.c
new file mode 100644
index 000000000000..da7d33f96397
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_pagination_bug.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Minimal test case to reproduce KASAN out-of-bounds in listns pagination.
+ *
+ * The bug occurs when:
+ * 1. Filtering by a specific namespace type (e.g., CLONE_NEWUSER)
+ * 2. Using pagination (req.ns_id != 0)
+ * 3. The lookup_ns_id_at() call in do_listns() passes ns_type=0 instead of
+ * the filtered type, causing it to search the unified tree and potentially
+ * return a namespace of the wrong type.
+ */
+TEST(pagination_with_type_filter)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER, /* Filter by user namespace */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ pid_t pids[10];
+ int num_children = 10;
+ int i;
+ int sv[2];
+ __u64 first_batch[3];
+ ssize_t ret;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create children with user namespaces */
+ for (i = 0; i < num_children; i++) {
+ pids[i] = fork();
+ ASSERT_GE(pids[i], 0);
+
+ if (pids[i] == 0) {
+ char c;
+ close(sv[0]);
+
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Signal parent we're ready */
+ if (write(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal to exit */
+ if (read(sv[1], &c, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ close(sv[1]);
+ exit(0);
+ }
+ }
+
+ close(sv[1]);
+
+ /* Wait for all children to signal ready */
+ for (i = 0; i < num_children; i++) {
+ char c;
+ if (read(sv[0], &c, 1) != 1) {
+ close(sv[0]);
+ for (int j = 0; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ /* First batch - this should work */
+ ret = sys_listns(&req, first_batch, 3, 0);
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ close(sv[0]);
+ for (i = 0; i < num_children; i++)
+ kill(pids[i], SIGKILL);
+ for (i = 0; i < num_children; i++)
+ waitpid(pids[i], NULL, 0);
+ SKIP(return, "listns() not supported");
+ }
+ ASSERT_GE(ret, 0);
+ }
+
+ TH_LOG("First batch returned %zd entries", ret);
+
+ if (ret == 3) {
+ __u64 second_batch[3];
+
+ /* Second batch - pagination triggers the bug */
+ req.ns_id = first_batch[2]; /* Continue from last ID */
+ ret = sys_listns(&req, second_batch, 3, 0);
+
+ TH_LOG("Second batch returned %zd entries", ret);
+ ASSERT_GE(ret, 0);
+ }
+
+ /* Signal all children to exit */
+ for (i = 0; i < num_children; i++) {
+ char c = 'X';
+ if (write(sv[0], &c, 1) != 1) {
+ close(sv[0]);
+ for (int j = i; j < num_children; j++)
+ kill(pids[j], SIGKILL);
+ for (int j = 0; j < num_children; j++)
+ waitpid(pids[j], NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ }
+
+ close(sv[0]);
+
+ /* Cleanup */
+ for (i = 0; i < num_children; i++) {
+ int status;
+ waitpid(pids[i], &status, 0);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_permissions_test.c b/tools/testing/selftests/namespaces/listns_permissions_test.c
new file mode 100644
index 000000000000..82d818751a5f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_permissions_test.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test that unprivileged users can only see namespaces they're currently in.
+ * Create a namespace, drop privileges, verify we can only see our own namespaces.
+ */
+TEST(listns_unprivileged_current_only)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_ours;
+ int unexpected_count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 our_netns_id;
+ bool found_ours;
+ int unexpected_count;
+
+ close(pipefd[0]);
+
+ /* Create user namespace to be unprivileged */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create a network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get our network namespace ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Now we're unprivileged - list all network namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* We should only see our own network namespace */
+ found_ours = false;
+ unexpected_count = 0;
+
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == our_netns_id) {
+ found_ours = true;
+ } else {
+ /* This is either init_net (which we can see) or unexpected */
+ unexpected_count++;
+ }
+ }
+
+ /* Send results to parent */
+ write(pipefd[1], &found_ours, sizeof(found_ours));
+ write(pipefd[1], &unexpected_count, sizeof(unexpected_count));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_ours = false;
+ unexpected_count = 0;
+ read(pipefd[0], &found_ours, sizeof(found_ours));
+ read(pipefd[0], &unexpected_count, sizeof(unexpected_count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Child should have seen its own namespace */
+ ASSERT_TRUE(found_ours);
+
+ TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)",
+ unexpected_count);
+}
+
+/*
+ * Test that users with CAP_SYS_ADMIN in a user namespace can see
+ * all namespaces owned by that user namespace.
+ */
+TEST(listns_cap_sys_admin_in_userns)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Will be set to our created user namespace */
+ };
+ __u64 ns_ids[100];
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool success;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 userns_id;
+ ssize_t ret;
+ int min_expected;
+ bool success;
+
+ close(pipefd[0]);
+
+ /* Create user namespace - we'll have CAP_SYS_ADMIN in it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get the user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create several namespaces owned by this user namespace */
+ unshare(CLONE_NEWNET);
+ unshare(CLONE_NEWUTS);
+ unshare(CLONE_NEWIPC);
+
+ /* List namespaces owned by our user namespace */
+ req.user_ns_id = userns_id;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /*
+ * We have CAP_SYS_ADMIN in this user namespace,
+ * so we should see all namespaces owned by it.
+ * That includes: net, uts, ipc, and the user namespace itself.
+ */
+ min_expected = 4;
+ success = (ret >= min_expected);
+
+ write(pipefd[1], &success, sizeof(success));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ success = false;
+ count = 0;
+ read(pipefd[0], &success, sizeof(success));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(success);
+ TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace",
+ count);
+}
+
+/*
+ * Test that users cannot see namespaces from unrelated user namespaces.
+ * Create two sibling user namespaces, verify they can't see each other's
+ * owned namespaces.
+ */
+TEST(listns_cannot_see_sibling_userns_namespaces)
+{
+ int pipefd[2];
+ pid_t pid1, pid2;
+ int status;
+ __u64 netns_a_id;
+ int pipefd2[2];
+ bool found_sibling_netns;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Fork first child - creates user namespace A */
+ pid1 = fork();
+ ASSERT_GE(pid1, 0);
+
+ if (pid1 == 0) {
+ int fd;
+ __u64 netns_a_id;
+ char buf;
+
+ close(pipefd[0]);
+
+ /* Create user namespace A */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create network namespace owned by user namespace A */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get network namespace ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send namespace ID to parent */
+ write(pipefd[1], &netns_a_id, sizeof(netns_a_id));
+
+ /* Keep alive for sibling to check */
+ read(pipefd[1], &buf, 1);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent reads namespace A ID */
+ close(pipefd[1]);
+ netns_a_id = 0;
+ read(pipefd[0], &netns_a_id, sizeof(netns_a_id));
+
+ TH_LOG("User namespace A created network namespace with ID %llu",
+ (unsigned long long)netns_a_id);
+
+ /* Fork second child - creates user namespace B */
+ ASSERT_EQ(pipe(pipefd2), 0);
+
+ pid2 = fork();
+ ASSERT_GE(pid2, 0);
+
+ if (pid2 == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_sibling_netns;
+
+ close(pipefd[0]);
+ close(pipefd2[0]);
+
+ /* Create user namespace B (sibling to A) */
+ if (setup_userns() < 0) {
+ close(pipefd2[1]);
+ exit(1);
+ }
+
+ /* Try to list all network namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ found_sibling_netns = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_a_id) {
+ found_sibling_netns = true;
+ break;
+ }
+ }
+ }
+
+ /* We should NOT see the sibling's network namespace */
+ write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns));
+ close(pipefd2[1]);
+ exit(0);
+ }
+
+ /* Parent reads result from second child */
+ close(pipefd2[1]);
+ found_sibling_netns = false;
+ read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns));
+ close(pipefd2[0]);
+
+ /* Signal first child to exit */
+ close(pipefd[0]);
+
+ /* Wait for both children */
+ waitpid(pid2, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ waitpid(pid1, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /* Second child should NOT have seen first child's namespace */
+ ASSERT_FALSE(found_sibling_netns);
+ TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace");
+}
+
+/*
+ * Test permission checking with LISTNS_CURRENT_USER.
+ * Verify that listing with LISTNS_CURRENT_USER respects permissions.
+ */
+TEST(listns_current_user_permissions)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool success;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool success;
+
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Create some namespaces owned by this user namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* List with LISTNS_CURRENT_USER - should see our owned namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ success = (ret >= 3); /* At least user, net, uts */
+ write(pipefd[1], &success, sizeof(success));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ success = false;
+ count = 0;
+ read(pipefd[0], &success, sizeof(success));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(success);
+ TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count);
+}
+
+/*
+ * Test that CAP_SYS_ADMIN in parent user namespace allows seeing
+ * child user namespace's owned namespaces.
+ */
+TEST(listns_parent_userns_cap_sys_admin)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_child_userns;
+ ssize_t count;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 parent_userns_id;
+ __u64 child_userns_id;
+ struct ns_id_req req;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_child_userns;
+
+ close(pipefd[0]);
+
+ /* Create parent user namespace - we have CAP_SYS_ADMIN in it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get parent user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get child user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create namespaces owned by child user namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* List namespaces owned by parent user namespace */
+ req.size = sizeof(req);
+ req.spare = 0;
+ req.ns_id = 0;
+ req.ns_type = 0;
+ req.spare2 = 0;
+ req.user_ns_id = parent_userns_id;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ /* Should see child user namespace in the list */
+ found_child_userns = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == child_userns_id) {
+ found_child_userns = true;
+ break;
+ }
+ }
+ }
+
+ write(pipefd[1], &found_child_userns, sizeof(found_child_userns));
+ write(pipefd[1], &ret, sizeof(ret));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_child_userns = false;
+ count = 0;
+ read(pipefd[0], &found_child_userns, sizeof(found_child_userns));
+ read(pipefd[0], &count, sizeof(count));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(found_child_userns);
+ TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)",
+ count);
+}
+
+/*
+ * Test that we can see user namespaces we have CAP_SYS_ADMIN inside of.
+ * This is different from seeing namespaces owned by a user namespace.
+ */
+TEST(listns_cap_sys_admin_inside_userns)
+{
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool found_ours;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 our_userns_id;
+ struct ns_id_req req;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_ours;
+
+ close(pipefd[0]);
+
+ /* Create user namespace - we have CAP_SYS_ADMIN inside it */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get our user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* List all user namespaces globally */
+ req.size = sizeof(req);
+ req.spare = 0;
+ req.ns_id = 0;
+ req.ns_type = CLONE_NEWUSER;
+ req.spare2 = 0;
+ req.user_ns_id = 0;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ /* We should be able to see our own user namespace */
+ found_ours = false;
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == our_userns_id) {
+ found_ours = true;
+ break;
+ }
+ }
+ }
+
+ write(pipefd[1], &found_ours, sizeof(found_ours));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ found_ours = false;
+ read(pipefd[0], &found_ours, sizeof(found_ours));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(found_ours);
+ TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of");
+}
+
+/*
+ * Test that dropping CAP_SYS_ADMIN restricts what we can see.
+ */
+TEST(listns_drop_cap_sys_admin)
+{
+ cap_t caps;
+ cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
+
+ /* This test needs to start with CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ if (!caps) {
+ SKIP(return, "Cannot get capabilities");
+ }
+
+ cap_flag_value_t cap_val;
+ if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) {
+ cap_free(caps);
+ SKIP(return, "Cannot check CAP_SYS_ADMIN");
+ }
+
+ if (cap_val != CAP_SET) {
+ cap_free(caps);
+ SKIP(return, "Test needs CAP_SYS_ADMIN to start");
+ }
+ cap_free(caps);
+
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ bool correct;
+ ssize_t count_before, count_after;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids_before[100];
+ ssize_t count_before;
+ __u64 ns_ids_after[100];
+ ssize_t count_after;
+ bool correct;
+
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Count namespaces with CAP_SYS_ADMIN */
+ count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+
+ /* Drop CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ if (caps) {
+ cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR);
+ cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR);
+ cap_set_proc(caps);
+ cap_free(caps);
+ }
+
+ /* Ensure we can't regain the capability */
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
+ /* Count namespaces without CAP_SYS_ADMIN */
+ count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+
+ /* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */
+ correct = (count_after <= count_before);
+
+ write(pipefd[1], &correct, sizeof(correct));
+ write(pipefd[1], &count_before, sizeof(count_before));
+ write(pipefd[1], &count_after, sizeof(count_after));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+
+ correct = false;
+ count_before = 0;
+ count_after = 0;
+ read(pipefd[0], &correct, sizeof(correct));
+ read(pipefd[0], &count_before, sizeof(count_before));
+ read(pipefd[0], &count_after, sizeof(count_after));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_TRUE(correct);
+ TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces",
+ count_before, count_after);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_test.c b/tools/testing/selftests/namespaces/listns_test.c
new file mode 100644
index 000000000000..8a95789d6a87
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_test.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test basic listns() functionality with the unified namespace tree.
+ * List all active namespaces globally.
+ */
+TEST(listns_basic_unified)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0, /* All types */
+ .spare2 = 0,
+ .user_ns_id = 0, /* Global listing */
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+
+ /* Should find at least the initial namespaces */
+ ASSERT_GT(ret, 0);
+ TH_LOG("Found %zd active namespaces", ret);
+
+ /* Verify all returned IDs are non-zero */
+ for (ssize_t i = 0; i < ret; i++) {
+ ASSERT_NE(ns_ids[i], 0);
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+ }
+}
+
+/*
+ * Test listns() with type filtering.
+ * List only network namespaces.
+ */
+TEST(listns_filter_by_type)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET, /* Only network namespaces */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ /* Should find at least init_net */
+ ASSERT_GT(ret, 0);
+ TH_LOG("Found %zd active network namespaces", ret);
+
+ /* Verify we can open each namespace and it's actually a network namespace */
+ for (ssize_t i = 0; i < ret && i < 5; i++) {
+ struct nsfs_file_handle nsfh = {
+ .ns_id = ns_ids[i],
+ .ns_type = CLONE_NEWNET,
+ .ns_inum = 0,
+ };
+ struct file_handle *fh;
+ int fd;
+
+ fh = (struct file_handle *)malloc(sizeof(*fh) + sizeof(nsfh));
+ ASSERT_NE(fh, NULL);
+ fh->handle_bytes = sizeof(nsfh);
+ fh->handle_type = 0;
+ memcpy(fh->f_handle, &nsfh, sizeof(nsfh));
+
+ fd = open_by_handle_at(-10003, fh, O_RDONLY);
+ free(fh);
+
+ if (fd >= 0) {
+ int ns_type;
+ /* Verify it's a network namespace via ioctl */
+ ns_type = ioctl(fd, NS_GET_NSTYPE);
+ if (ns_type >= 0) {
+ ASSERT_EQ(ns_type, CLONE_NEWNET);
+ }
+ close(fd);
+ }
+ }
+}
+
+/*
+ * Test listns() pagination.
+ * List namespaces in batches.
+ */
+TEST(listns_pagination)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 batch1[2], batch2[2];
+ ssize_t ret1, ret2;
+
+ /* Get first batch */
+ ret1 = sys_listns(&req, batch1, ARRAY_SIZE(batch1), 0);
+ if (ret1 < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret1, 0);
+
+ if (ret1 == 0)
+ SKIP(return, "No namespaces found");
+
+ TH_LOG("First batch: %zd namespaces", ret1);
+
+ /* Get second batch using last ID from first batch */
+ if (ret1 == ARRAY_SIZE(batch1)) {
+ req.ns_id = batch1[ret1 - 1];
+ ret2 = sys_listns(&req, batch2, ARRAY_SIZE(batch2), 0);
+ ASSERT_GE(ret2, 0);
+
+ TH_LOG("Second batch: %zd namespaces (after ns_id=%llu)",
+ ret2, (unsigned long long)req.ns_id);
+
+ /* If we got more results, verify IDs are monotonically increasing */
+ if (ret2 > 0) {
+ ASSERT_GT(batch2[0], batch1[ret1 - 1]);
+ TH_LOG("Pagination working: %llu > %llu",
+ (unsigned long long)batch2[0],
+ (unsigned long long)batch1[ret1 - 1]);
+ }
+ } else {
+ TH_LOG("All namespaces fit in first batch");
+ }
+}
+
+/*
+ * Test listns() with LISTNS_CURRENT_USER.
+ * List namespaces owned by current user namespace.
+ */
+TEST(listns_current_user)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = LISTNS_CURRENT_USER,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ /* Should find at least the initial namespaces if we're in init_user_ns */
+ TH_LOG("Found %zd namespaces owned by current user namespace", ret);
+
+ for (ssize_t i = 0; i < ret; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that listns() only returns active namespaces.
+ * Create a namespace, let it become inactive, verify it's not listed.
+ */
+TEST(listns_only_active)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids_before[100], ns_ids_after[100];
+ ssize_t ret_before, ret_after;
+ int pipefd[2];
+ pid_t pid;
+ __u64 new_ns_id = 0;
+ int status;
+
+ /* Get initial list */
+ ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+ if (ret_before < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret_before, 0);
+
+ TH_LOG("Before: %zd active network namespaces", ret_before);
+
+ /* Create a new namespace in a child process and get its ID */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 ns_id;
+
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get its ID */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+ close(fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send ID to parent */
+ write(pipefd[1], &ns_id, sizeof(ns_id));
+ close(pipefd[1]);
+
+ /* Keep namespace active briefly */
+ usleep(100000);
+ exit(0);
+ }
+
+ /* Parent reads the new namespace ID */
+ {
+ int bytes;
+
+ close(pipefd[1]);
+ bytes = read(pipefd[0], &new_ns_id, sizeof(new_ns_id));
+ close(pipefd[0]);
+
+ if (bytes == sizeof(new_ns_id)) {
+ __u64 ns_ids_during[100];
+ int ret_during;
+
+ TH_LOG("Child created namespace with ID %llu", (unsigned long long)new_ns_id);
+
+ /* List namespaces while child is still alive - should see new one */
+ ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
+ ASSERT_GE(ret_during, 0);
+ TH_LOG("During: %d active network namespaces", ret_during);
+
+ /* Should have more namespaces than before */
+ ASSERT_GE(ret_during, ret_before);
+ }
+ }
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+
+ /* Give time for namespace to become inactive */
+ usleep(100000);
+
+ /* List namespaces after child exits - should not see new one */
+ ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+ ASSERT_GE(ret_after, 0);
+ TH_LOG("After: %zd active network namespaces", ret_after);
+
+ /* Verify the new namespace ID is not in the after list */
+ if (new_ns_id != 0) {
+ bool found = false;
+
+ for (ssize_t i = 0; i < ret_after; i++) {
+ if (ns_ids_after[i] == new_ns_id) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_FALSE(found);
+ }
+}
+
+/*
+ * Test listns() with specific user namespace ID.
+ * Create a user namespace and list namespaces it owns.
+ */
+TEST(listns_specific_userns)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0, /* Will be filled with created userns ID */
+ };
+ __u64 ns_ids[100];
+ int sv[2];
+ pid_t pid;
+ int status;
+ __u64 user_ns_id = 0;
+ int bytes;
+ ssize_t ret;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ __u64 ns_id;
+ char buf;
+
+ close(sv[0]);
+
+ /* Create new user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Get user namespace ID */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send ID to parent */
+ if (write(sv[1], &ns_id, sizeof(ns_id)) != sizeof(ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Create some namespaces owned by this user namespace */
+ unshare(CLONE_NEWNET);
+ unshare(CLONE_NEWUTS);
+
+ /* Wait for parent signal */
+ if (read(sv[1], &buf, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+ close(sv[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(sv[1]);
+ bytes = read(sv[0], &user_ns_id, sizeof(user_ns_id));
+
+ if (bytes != sizeof(user_ns_id)) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get user namespace ID from child");
+ }
+
+ TH_LOG("Child created user namespace with ID %llu", (unsigned long long)user_ns_id);
+
+ /* List namespaces owned by this user namespace */
+ req.user_ns_id = user_ns_id;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ if (ret < 0) {
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOSYS) {
+ SKIP(return, "listns() not supported");
+ }
+ ASSERT_GE(ret, 0);
+ }
+
+ TH_LOG("Found %zd namespaces owned by user namespace %llu", ret,
+ (unsigned long long)user_ns_id);
+
+ /* Should find at least the network and UTS namespaces we created */
+ if (ret > 0) {
+ for (ssize_t i = 0; i < ret && i < 10; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+ }
+
+ /* Signal child to exit */
+ if (write(sv[0], "X", 1) != 1) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ close(sv[0]);
+ waitpid(pid, &status, 0);
+}
+
+/*
+ * Test listns() with multiple namespace types filter.
+ */
+TEST(listns_multiple_types)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET | CLONE_NEWUTS, /* Network and UTS */
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[100];
+ ssize_t ret;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(false);
+ }
+ ASSERT_GE(ret, 0);
+
+ TH_LOG("Found %zd active network/UTS namespaces", ret);
+
+ for (ssize_t i = 0; i < ret; i++)
+ TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that hierarchical active reference propagation keeps parent
+ * user namespaces visible in listns().
+ */
+TEST(listns_hierarchical_visibility)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 parent_ns_id = 0, child_ns_id = 0;
+ int sv[2];
+ pid_t pid;
+ int status;
+ int bytes;
+ __u64 ns_ids[100];
+ ssize_t ret;
+ bool found_parent, found_child;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int fd;
+ char buf;
+
+ close(sv[0]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &parent_ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(fd, NS_GET_ID, &child_ns_id) < 0) {
+ close(fd);
+ close(sv[1]);
+ exit(1);
+ }
+ close(fd);
+
+ /* Send both IDs to parent */
+ if (write(sv[1], &parent_ns_id, sizeof(parent_ns_id)) != sizeof(parent_ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+ if (write(sv[1], &child_ns_id, sizeof(child_ns_id)) != sizeof(child_ns_id)) {
+ close(sv[1]);
+ exit(1);
+ }
+
+ /* Wait for parent signal */
+ if (read(sv[1], &buf, 1) != 1) {
+ close(sv[1]);
+ exit(1);
+ }
+ close(sv[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(sv[1]);
+
+ /* Read both namespace IDs */
+ bytes = read(sv[0], &parent_ns_id, sizeof(parent_ns_id));
+ bytes += read(sv[0], &child_ns_id, sizeof(child_ns_id));
+
+ if (bytes != (int)(2 * sizeof(__u64))) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to get namespace IDs from child");
+ }
+
+ TH_LOG("Parent user namespace ID: %llu", (unsigned long long)parent_ns_id);
+ TH_LOG("Child user namespace ID: %llu", (unsigned long long)child_ns_id);
+
+ /* List all user namespaces */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+ if (ret < 0 && errno == ENOSYS) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "listns() not supported");
+ }
+
+ ASSERT_GE(ret, 0);
+ TH_LOG("Found %zd active user namespaces", ret);
+
+ /* Both parent and child should be visible (active due to child process) */
+ found_parent = false;
+ found_child = false;
+ for (ssize_t i = 0; i < ret; i++) {
+ if (ns_ids[i] == parent_ns_id)
+ found_parent = true;
+ if (ns_ids[i] == child_ns_id)
+ found_child = true;
+ }
+
+ TH_LOG("Parent namespace %s, child namespace %s",
+ found_parent ? "found" : "NOT FOUND",
+ found_child ? "found" : "NOT FOUND");
+
+ ASSERT_TRUE(found_child);
+ /* With hierarchical propagation, parent should also be active */
+ ASSERT_TRUE(found_parent);
+
+ /* Signal child to exit */
+ if (write(sv[0], "X", 1) != 1) {
+ close(sv[0]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ ASSERT_TRUE(false);
+ }
+ close(sv[0]);
+ waitpid(pid, &status, 0);
+}
+
+/*
+ * Test error cases for listns().
+ */
+TEST(listns_error_cases)
+{
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = 0,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[10];
+ int ret;
+
+ /* Test with invalid flags */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0xFFFF);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+ }
+
+ /* Test with NULL ns_ids array */
+ ret = sys_listns(&req, NULL, 10, 0);
+ ASSERT_LT(ret, 0);
+
+ /* Test with invalid spare field */
+ req.spare = 1;
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+ }
+ req.spare = 0;
+
+ /* Test with huge nr_ns_ids */
+ ret = sys_listns(&req, ns_ids, 2000000, 0);
+ if (errno == ENOSYS) {
+ /* listns() not supported, skip this check */
+ } else {
+ ASSERT_LT(ret, 0);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/ns_active_ref_test.c b/tools/testing/selftests/namespaces/ns_active_ref_test.c
new file mode 100644
index 000000000000..093268f0efaa
--- /dev/null
+++ b/tools/testing/selftests/namespaces/ns_active_ref_test.c
@@ -0,0 +1,2672 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <pthread.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test that initial namespaces can be reopened via file handle.
+ * Initial namespaces should have active ref count of 1 from boot.
+ */
+TEST(init_ns_always_active)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd1, fd2;
+ struct stat st1, st2;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open initial network namespace */
+ fd1 = open("/proc/1/ns/net", O_RDONLY);
+ ASSERT_GE(fd1, 0);
+
+ /* Get file handle for initial namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(fd1);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+
+ /* Close the namespace fd */
+ close(fd1);
+
+ /* Try to reopen via file handle - should succeed since init ns is always active */
+ fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle);
+ return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd2, 0);
+
+ /* Verify we opened the same namespace */
+ fd1 = open("/proc/1/ns/net", O_RDONLY);
+ ASSERT_GE(fd1, 0);
+ ASSERT_EQ(fstat(fd1, &st1), 0);
+ ASSERT_EQ(fstat(fd2, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(fd1);
+ close(fd2);
+ free(handle);
+}
+
+/*
+ * Test namespace lifecycle: create a namespace in a child process,
+ * get a file handle while it's active, then try to reopen after
+ * the process exits (namespace becomes inactive).
+ */
+TEST(ns_inactive_after_exit)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ /* Create pipe for passing file handle from child */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Open our new namespace */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Get file handle for the namespace */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+
+ /* Exit - namespace should become inactive */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ /* Read file handle from child */
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Try to reopen namespace - should fail with ENOENT since it's inactive */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ /* Should fail with ENOENT (namespace inactive) or ESTALE */
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a process is using it,
+ * even after the creating process exits.
+ */
+TEST(ns_active_with_multiple_processes)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ int syncpipe[2];
+ pid_t pid1, pid2;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+
+ /* Create pipes for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ pid1 = fork();
+ ASSERT_GE(pid1, 0);
+
+ if (pid1 == 0) {
+ /* First child - creates namespace */
+ close(pipefd[0]);
+ close(syncpipe[1]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Open and get handle */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+
+ /* Wait for signal before exiting */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ /* Parent reads handle */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+ ASSERT_GT(ret, 0);
+
+ handle = (struct file_handle *)buf;
+
+ /* Create second child that will keep namespace active */
+ pid2 = fork();
+ ASSERT_GE(pid2, 0);
+
+ if (pid2 == 0) {
+ /* Second child - reopens the namespace */
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+
+ /* Open the namespace via handle */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0) {
+ exit(1);
+ }
+
+ /* Join the namespace */
+ ret = setns(fd, CLONE_NEWNET);
+ close(fd);
+ if (ret < 0) {
+ exit(1);
+ }
+
+ /* Sleep to keep namespace active */
+ sleep(1);
+ exit(0);
+ }
+
+ /* Let second child enter the namespace */
+ usleep(100000); /* 100ms */
+
+ /* Signal first child to exit */
+ close(syncpipe[0]);
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for first child */
+ waitpid(pid1, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /* Namespace should still be active because second child is using it */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(fd, 0);
+ close(fd);
+
+ /* Wait for second child */
+ waitpid(pid2, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+}
+
+/*
+ * Test user namespace active ref tracking via credential lifecycle
+ */
+TEST(userns_active_ref_lifecycle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Set up uid/gid mappings */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) {
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+ }
+
+ /* Get file handle */
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Namespace should be inactive after all tasks exit */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test PID namespace active ref tracking
+ */
+TEST(pidns_active_ref_lifecycle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new PID namespace */
+ ret = unshare(CLONE_NEWPID);
+ if (ret < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ /* Fork to actually enter the PID namespace */
+ pid_t child = fork();
+ if (child < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ if (child == 0) {
+ /* Grandchild - in new PID namespace */
+ fd = open("/proc/self/ns/pid", O_RDONLY);
+ if (fd < 0) {
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ exit(1);
+ }
+
+ /* Send handle to grandparent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /* Namespace should be inactive after all processes exit */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that an open file descriptor keeps a namespace active.
+ * Even after the creating process exits, the namespace should remain
+ * active as long as an fd is held open.
+ */
+TEST(ns_fd_keeps_active)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int nsfd;
+ int pipe_child_ready[2];
+ int pipe_parent_ready[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+ char proc_path[64];
+
+ ASSERT_EQ(pipe(pipe_child_ready), 0);
+ ASSERT_EQ(pipe(pipe_parent_ready), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipe_child_ready[0]);
+ close(pipe_parent_ready[1]);
+
+ TH_LOG("Child: creating new network namespace");
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: network namespace created successfully");
+
+ /* Get file handle for the namespace */
+ nsfd = open("/proc/self/ns/net", O_RDONLY);
+ if (nsfd < 0) {
+ TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened namespace fd %d", nsfd);
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(nsfd);
+
+ if (ret < 0) {
+ TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno));
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+ exit(1);
+ }
+
+ TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes);
+
+ /* Send file handle to parent */
+ ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes);
+ TH_LOG("Child: sent %d bytes of file handle to parent", ret);
+ close(pipe_child_ready[1]);
+
+ /* Wait for parent to open the fd */
+ TH_LOG("Child: waiting for parent to open fd");
+ ret = read(pipe_parent_ready[0], &sync_byte, 1);
+ close(pipe_parent_ready[0]);
+
+ TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret);
+ /* Exit - namespace should stay active because parent holds fd */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipe_child_ready[1]);
+ close(pipe_parent_ready[0]);
+
+ TH_LOG("Parent: reading file handle from child");
+
+ /* Read file handle from child */
+ ret = read(pipe_child_ready[0], buf, sizeof(buf));
+ close(pipe_child_ready[0]);
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes);
+
+ /* Open the child's namespace while it's still alive */
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
+ TH_LOG("Parent: opening child's namespace at %s", proc_path);
+ nsfd = open(proc_path, O_RDONLY);
+ if (nsfd < 0) {
+ TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno));
+ close(pipe_parent_ready[1]);
+ kill(pid, SIGKILL);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child's namespace");
+ }
+
+ TH_LOG("Parent: opened child's namespace, got fd %d", nsfd);
+
+ /* Signal child that we have the fd */
+ sync_byte = 'G';
+ write(pipe_parent_ready[1], &sync_byte, 1);
+ close(pipe_parent_ready[1]);
+ TH_LOG("Parent: signaled child that we have the fd");
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ TH_LOG("Child exited, parent holds fd %d to namespace", nsfd);
+
+ /*
+ * Namespace should still be ACTIVE because we hold an fd.
+ * We should be able to reopen it via file handle.
+ */
+ TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)");
+ int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(fd2, 0);
+
+ TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2);
+
+ /* Verify it's the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(nsfd, &st1), 0);
+ ASSERT_EQ(fstat(fd2, &st2), 0);
+ TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ close(fd2);
+
+ /* Now close the fd - namespace should become inactive */
+ TH_LOG("Closing fd %d - namespace should become inactive", nsfd);
+ close(nsfd);
+
+ /* Now reopening should fail - namespace is inactive */
+ TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)");
+ fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd2, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test hierarchical active reference propagation.
+ * When a child namespace is active, its owning user namespace should also
+ * be active automatically due to hierarchical active reference propagation.
+ * This ensures parents are always reachable when children are active.
+ */
+TEST(ns_parent_always_reachable)
+{
+ struct file_handle *parent_handle, *child_handle;
+ int ret;
+ int child_nsfd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 parent_id, child_id;
+ char parent_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+ char child_buf[sizeof(*child_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ TH_LOG("Child: creating parent user namespace and setting up mappings");
+
+ /* Create parent user namespace with mappings */
+ ret = setup_userns();
+ if (ret < 0) {
+ TH_LOG("Child: setup_userns() for parent failed: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: parent user namespace created, now uid=%d gid=%d", getuid(), getgid());
+
+ /* Get namespace ID for parent user namespace */
+ int parent_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (parent_fd < 0) {
+ TH_LOG("Child: failed to open parent /proc/self/ns/user: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened parent userns fd %d", parent_fd);
+
+ if (ioctl(parent_fd, NS_GET_ID, &parent_id) < 0) {
+ TH_LOG("Child: NS_GET_ID for parent failed: %s", strerror(errno));
+ close(parent_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(parent_fd);
+
+ TH_LOG("Child: got parent namespace ID %llu", (unsigned long long)parent_id);
+
+ /* Create child user namespace within parent */
+ TH_LOG("Child: creating nested child user namespace");
+ ret = setup_userns();
+ if (ret < 0) {
+ TH_LOG("Child: setup_userns() for child failed: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: nested child user namespace created, uid=%d gid=%d", getuid(), getgid());
+
+ /* Get namespace ID for child user namespace */
+ int child_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (child_fd < 0) {
+ TH_LOG("Child: failed to open child /proc/self/ns/user: %s", strerror(errno));
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ TH_LOG("Child: opened child userns fd %d", child_fd);
+
+ if (ioctl(child_fd, NS_GET_ID, &child_id) < 0) {
+ TH_LOG("Child: NS_GET_ID for child failed: %s", strerror(errno));
+ close(child_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(child_fd);
+
+ TH_LOG("Child: got child namespace ID %llu", (unsigned long long)child_id);
+
+ /* Send both namespace IDs to parent */
+ TH_LOG("Child: sending both namespace IDs to parent");
+ write(pipefd[1], &parent_id, sizeof(parent_id));
+ write(pipefd[1], &child_id, sizeof(child_id));
+ close(pipefd[1]);
+
+ TH_LOG("Child: exiting - parent userns should become inactive");
+ /* Exit - parent user namespace should become inactive */
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ TH_LOG("Parent: reading both namespace IDs from child");
+
+ /* Read both namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &parent_id, sizeof(parent_id));
+ if (ret != sizeof(parent_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &child_id, sizeof(child_id));
+ close(pipefd[0]);
+ if (ret != sizeof(child_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read child namespace ID from child");
+ }
+
+ TH_LOG("Parent: received parent_id=%llu, child_id=%llu",
+ (unsigned long long)parent_id, (unsigned long long)child_id);
+
+ /* Construct file handles from namespace IDs */
+ parent_handle = (struct file_handle *)parent_buf;
+ parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ parent_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *parent_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+ parent_fh->ns_id = parent_id;
+ parent_fh->ns_type = 0;
+ parent_fh->ns_inum = 0;
+
+ child_handle = (struct file_handle *)child_buf;
+ child_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ child_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *child_fh = (struct nsfs_file_handle *)child_handle->f_handle;
+ child_fh->ns_id = child_id;
+ child_fh->ns_type = 0;
+ child_fh->ns_inum = 0;
+
+ TH_LOG("Parent: opening child namespace BEFORE child exits");
+
+ /* Open child namespace while child is still alive to keep it active */
+ child_nsfd = open_by_handle_at(FD_NSFS_ROOT, child_handle, O_RDONLY);
+ if (child_nsfd < 0) {
+ TH_LOG("Failed to open child namespace: %s (errno=%d)", strerror(errno), errno);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespace");
+ }
+
+ TH_LOG("Opened child namespace fd %d", child_nsfd);
+
+ /* Now wait for child to exit */
+ TH_LOG("Parent: waiting for child to exit");
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ TH_LOG("Child process exited, parent holds fd to child namespace");
+
+ /*
+ * With hierarchical active reference propagation:
+ * Since the child namespace is active (parent process holds fd),
+ * the parent user namespace should ALSO be active automatically.
+ * This is because when we took an active reference on the child,
+ * it propagated up to the owning user namespace.
+ */
+ TH_LOG("Attempting to reopen parent namespace (should SUCCEED - hierarchical propagation)");
+ int parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(parent_fd, 0);
+
+ TH_LOG("SUCCESS: Parent namespace is active (fd=%d) due to active child", parent_fd);
+
+ /* Verify we can also get parent via NS_GET_USERNS */
+ TH_LOG("Verifying NS_GET_USERNS also works");
+ int parent_fd2 = ioctl(child_nsfd, NS_GET_USERNS);
+ if (parent_fd2 < 0) {
+ close(parent_fd);
+ close(child_nsfd);
+ TH_LOG("NS_GET_USERNS failed: %s (errno=%d)", strerror(errno), errno);
+ SKIP(return, "NS_GET_USERNS not supported or failed");
+ }
+
+ TH_LOG("NS_GET_USERNS succeeded, got parent fd %d", parent_fd2);
+
+ /* Verify both methods give us the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(parent_fd, &st1), 0);
+ ASSERT_EQ(fstat(parent_fd2, &st2), 0);
+ TH_LOG("Parent namespace inodes: parent_fd=%lu, parent_fd2=%lu", st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ /*
+ * Close child fd - parent should remain active because we still
+ * hold direct references to it (parent_fd and parent_fd2).
+ */
+ TH_LOG("Closing child fd - parent should remain active (direct refs held)");
+ close(child_nsfd);
+
+ /* Parent should still be openable */
+ TH_LOG("Verifying parent still active via file handle");
+ int parent_fd3 = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(parent_fd3, 0);
+ close(parent_fd3);
+
+ TH_LOG("Closing all fds to parent namespace");
+ close(parent_fd);
+ close(parent_fd2);
+
+ /* Both should now be inactive */
+ TH_LOG("Attempting to reopen parent (should fail - inactive, no refs)");
+ parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_LT(parent_fd, 0);
+ TH_LOG("Parent inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that bind mounts keep namespaces in the tree even when inactive
+ */
+TEST(ns_bind_mount_keeps_in_tree)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int pipefd[2];
+ pid_t pid;
+ int status;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+ char tmpfile[] = "/tmp/ns-test-XXXXXX";
+ int tmpfd;
+
+ /* Create temporary file for bind mount */
+ tmpfd = mkstemp(tmpfile);
+ if (tmpfd < 0) {
+ SKIP(return, "Cannot create temporary file");
+ }
+ close(tmpfd);
+
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Unshare mount namespace and make mounts private to avoid propagation */
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+ ret = mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Bind mount the namespace */
+ ret = mount("/proc/self/ns/net", tmpfile, NULL, MS_BIND, NULL);
+ if (ret < 0) {
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Get file handle */
+ fd = open("/proc/self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ umount(tmpfile);
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ close(fd);
+
+ if (ret < 0) {
+ umount(tmpfile);
+ close(pipefd[1]);
+ unlink(tmpfile);
+ exit(1);
+ }
+
+ /* Send handle to parent */
+ write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(pipefd[1]);
+ ret = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_GT(ret, 0);
+ handle = (struct file_handle *)buf;
+
+ /*
+ * Namespace should be inactive but still in tree due to bind mount.
+ * Reopening should fail with ENOENT (inactive) not ESTALE (not in tree).
+ */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(fd, 0);
+ /* Should be ENOENT (inactive) since bind mount keeps it in tree */
+ if (errno != ENOENT && errno != ESTALE) {
+ TH_LOG("Unexpected error: %d", errno);
+ }
+
+ /* Cleanup */
+ umount(tmpfile);
+ unlink(tmpfile);
+}
+
+/*
+ * Test multi-level hierarchy (3+ levels deep).
+ * Grandparent → Parent → Child
+ * When child is active, both parent AND grandparent should be active.
+ */
+TEST(ns_multilevel_hierarchy)
+{
+ struct file_handle *gp_handle, *p_handle, *c_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 gp_id, p_id, c_id;
+ char gp_buf[sizeof(*gp_handle) + MAX_HANDLE_SZ];
+ char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+ char c_buf[sizeof(*c_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create grandparent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int gp_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (gp_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(gp_fd, NS_GET_ID, &gp_id) < 0) {
+ close(gp_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(gp_fd);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (c_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c_fd, NS_GET_ID, &c_id) < 0) {
+ close(c_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c_fd);
+
+ /* Send all three namespace IDs */
+ write(pipefd[1], &gp_id, sizeof(gp_id));
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &c_id, sizeof(c_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &gp_id, sizeof(gp_id));
+ if (ret != sizeof(gp_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read grandparent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &c_id, sizeof(c_id));
+ close(pipefd[0]);
+ if (ret != sizeof(c_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read child namespace ID from child");
+ }
+
+ /* Construct file handles from namespace IDs */
+ gp_handle = (struct file_handle *)gp_buf;
+ gp_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ gp_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *gp_fh = (struct nsfs_file_handle *)gp_handle->f_handle;
+ gp_fh->ns_id = gp_id;
+ gp_fh->ns_type = 0;
+ gp_fh->ns_inum = 0;
+
+ p_handle = (struct file_handle *)p_buf;
+ p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ p_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ c_handle = (struct file_handle *)c_buf;
+ c_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c_fh = (struct nsfs_file_handle *)c_handle->f_handle;
+ c_fh->ns_id = c_id;
+ c_fh->ns_type = 0;
+ c_fh->ns_inum = 0;
+
+ /* Open child before process exits */
+ int c_fd = open_by_handle_at(FD_NSFS_ROOT, c_handle, O_RDONLY);
+ if (c_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespace");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /*
+ * With 3-level hierarchy and child active:
+ * - Child is active (we hold fd)
+ * - Parent should be active (propagated from child)
+ * - Grandparent should be active (propagated from parent)
+ */
+ TH_LOG("Testing parent active when child is active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+
+ TH_LOG("Testing grandparent active when child is active");
+ int gp_fd = open_by_handle_at(FD_NSFS_ROOT, gp_handle, O_RDONLY);
+ ASSERT_GE(gp_fd, 0);
+
+ close(c_fd);
+ close(p_fd);
+ close(gp_fd);
+}
+
+/*
+ * Test multiple children sharing same parent.
+ * Parent should stay active as long as ANY child is active.
+ */
+TEST(ns_multiple_children_same_parent)
+{
+ struct file_handle *p_handle, *c1_handle, *c2_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 p_id, c1_id, c2_id;
+ char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+ char c1_buf[sizeof(*c1_handle) + MAX_HANDLE_SZ];
+ char c2_buf[sizeof(*c2_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create first child user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c1_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (c1_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c1_fd, NS_GET_ID, &c1_id) < 0) {
+ close(c1_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c1_fd);
+
+ /* Return to parent user namespace and create second child */
+ /* We can't actually do this easily, so let's create a sibling namespace
+ * by creating a network namespace instead */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int c2_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (c2_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(c2_fd, NS_GET_ID, &c2_id) < 0) {
+ close(c2_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(c2_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &c1_id, sizeof(c1_id));
+ write(pipefd[1], &c2_id, sizeof(c2_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID");
+ }
+
+ ret = read(pipefd[0], &c1_id, sizeof(c1_id));
+ if (ret != sizeof(c1_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read first child namespace ID");
+ }
+
+ ret = read(pipefd[0], &c2_id, sizeof(c2_id));
+ close(pipefd[0]);
+ if (ret != sizeof(c2_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read second child namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ p_handle = (struct file_handle *)p_buf;
+ p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ p_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ c1_handle = (struct file_handle *)c1_buf;
+ c1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c1_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c1_fh = (struct nsfs_file_handle *)c1_handle->f_handle;
+ c1_fh->ns_id = c1_id;
+ c1_fh->ns_type = 0;
+ c1_fh->ns_inum = 0;
+
+ c2_handle = (struct file_handle *)c2_buf;
+ c2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ c2_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *c2_fh = (struct nsfs_file_handle *)c2_handle->f_handle;
+ c2_fh->ns_id = c2_id;
+ c2_fh->ns_type = 0;
+ c2_fh->ns_inum = 0;
+
+ /* Open both children before process exits */
+ int c1_fd = open_by_handle_at(FD_NSFS_ROOT, c1_handle, O_RDONLY);
+ int c2_fd = open_by_handle_at(FD_NSFS_ROOT, c2_handle, O_RDONLY);
+
+ if (c1_fd < 0 || c2_fd < 0) {
+ if (c1_fd >= 0) close(c1_fd);
+ if (c2_fd >= 0) close(c2_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open child namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Parent should be active (both children active) */
+ TH_LOG("Both children active - parent should be active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close first child - parent should STILL be active */
+ TH_LOG("Closing first child - parent should still be active");
+ close(c1_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close second child - NOW parent should become inactive */
+ TH_LOG("Closing second child - parent should become inactive");
+ close(c2_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+ ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that different namespace types with same owner all contribute
+ * active references to the owning user namespace.
+ */
+TEST(ns_different_types_same_owner)
+{
+ struct file_handle *u_handle, *n_handle, *ut_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 u_id, n_id, ut_id;
+ char u_buf[sizeof(*u_handle) + MAX_HANDLE_SZ];
+ char n_buf[sizeof(*n_handle) + MAX_HANDLE_SZ];
+ char ut_buf[sizeof(*ut_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int u_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (u_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+ close(u_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(u_fd);
+
+ /* Create network namespace (owned by user namespace) */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int n_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+ close(n_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(n_fd);
+
+ /* Create UTS namespace (also owned by user namespace) */
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+ if (ut_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+ close(ut_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ut_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &u_id, sizeof(u_id));
+ write(pipefd[1], &n_id, sizeof(n_id));
+ write(pipefd[1], &ut_id, sizeof(ut_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &u_id, sizeof(u_id));
+ if (ret != sizeof(u_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID");
+ }
+
+ ret = read(pipefd[0], &n_id, sizeof(n_id));
+ if (ret != sizeof(n_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ut_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read UTS namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ u_handle = (struct file_handle *)u_buf;
+ u_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ u_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)u_handle->f_handle;
+ u_fh->ns_id = u_id;
+ u_fh->ns_type = 0;
+ u_fh->ns_inum = 0;
+
+ n_handle = (struct file_handle *)n_buf;
+ n_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ n_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)n_handle->f_handle;
+ n_fh->ns_id = n_id;
+ n_fh->ns_type = 0;
+ n_fh->ns_inum = 0;
+
+ ut_handle = (struct file_handle *)ut_buf;
+ ut_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ut_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)ut_handle->f_handle;
+ ut_fh->ns_id = ut_id;
+ ut_fh->ns_type = 0;
+ ut_fh->ns_inum = 0;
+
+ /* Open both non-user namespaces before process exits */
+ int n_fd = open_by_handle_at(FD_NSFS_ROOT, n_handle, O_RDONLY);
+ int ut_fd = open_by_handle_at(FD_NSFS_ROOT, ut_handle, O_RDONLY);
+
+ if (n_fd < 0 || ut_fd < 0) {
+ if (n_fd >= 0) close(n_fd);
+ if (ut_fd >= 0) close(ut_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /*
+ * Both network and UTS namespaces are active.
+ * User namespace should be active (gets 2 active refs).
+ */
+ TH_LOG("Both net and uts active - user namespace should be active");
+ int u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close network namespace - user namespace should STILL be active */
+ TH_LOG("Closing network ns - user ns should still be active (uts still active)");
+ close(n_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close UTS namespace - user namespace should become inactive */
+ TH_LOG("Closing uts ns - user ns should become inactive");
+ close(ut_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+ ASSERT_LT(u_fd, 0);
+}
+
+/*
+ * Test hierarchical propagation with deep namespace hierarchy.
+ * Create: init_user_ns -> user_A -> user_B -> net_ns
+ * When net_ns is active, both user_A and user_B should be active.
+ * This verifies the conditional recursion in __ns_ref_active_put() works.
+ */
+TEST(ns_deep_hierarchy_propagation)
+{
+ struct file_handle *ua_handle, *ub_handle, *net_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 ua_id, ub_id, net_id;
+ char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+ char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+ char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user_A -> user_B -> net hierarchy */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ua_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+ close(ua_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ua_fd);
+
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ub_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+ close(ub_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ub_fd);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (net_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+ close(net_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(net_fd);
+
+ /* Send all three namespace IDs */
+ write(pipefd[1], &ua_id, sizeof(ua_id));
+ write(pipefd[1], &ub_id, sizeof(ub_id));
+ write(pipefd[1], &net_id, sizeof(net_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+ if (ret != sizeof(ua_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_A namespace ID");
+ }
+
+ ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+ if (ret != sizeof(ub_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_B namespace ID");
+ }
+
+ ret = read(pipefd[0], &net_id, sizeof(net_id));
+ close(pipefd[0]);
+ if (ret != sizeof(net_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ ua_handle = (struct file_handle *)ua_buf;
+ ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ua_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+ ua_fh->ns_id = ua_id;
+ ua_fh->ns_type = 0;
+ ua_fh->ns_inum = 0;
+
+ ub_handle = (struct file_handle *)ub_buf;
+ ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ub_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+ ub_fh->ns_id = ub_id;
+ ub_fh->ns_type = 0;
+ ub_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)net_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ net_fh->ns_id = net_id;
+ net_fh->ns_type = 0;
+ net_fh->ns_inum = 0;
+
+ /* Open net_ns before child exits to keep it active */
+ int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ if (net_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open network namespace");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* With net_ns active, both user_A and user_B should be active */
+ TH_LOG("Testing user_B active (net_ns active causes propagation)");
+ int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ ASSERT_GE(ub_fd, 0);
+
+ TH_LOG("Testing user_A active (propagated through user_B)");
+ int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd, 0);
+
+ /* Close net_ns - user_B should stay active (we hold direct ref) */
+ TH_LOG("Closing net_ns, user_B should remain active (direct ref held)");
+ close(net_fd);
+ int ub_fd2 = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ ASSERT_GE(ub_fd2, 0);
+ close(ub_fd2);
+
+ /* Close user_B - user_A should stay active (we hold direct ref) */
+ TH_LOG("Closing user_B, user_A should remain active (direct ref held)");
+ close(ub_fd);
+ int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd2, 0);
+ close(ua_fd2);
+
+ /* Close user_A - everything should become inactive */
+ TH_LOG("Closing user_A, all should become inactive");
+ close(ua_fd);
+
+ /* All should now be inactive */
+ ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test that parent stays active as long as ANY child is active.
+ * Create parent user namespace with two child net namespaces.
+ * Parent should remain active until BOTH children are inactive.
+ */
+TEST(ns_parent_multiple_children_refcount)
+{
+ struct file_handle *parent_handle, *net1_handle, *net2_handle;
+ int ret, pipefd[2], syncpipe[2];
+ pid_t pid;
+ int status;
+ __u64 p_id, n1_id, n2_id;
+ char p_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+ char n1_buf[sizeof(*net1_handle) + MAX_HANDLE_SZ];
+ char n2_buf[sizeof(*net2_handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+ close(syncpipe[1]);
+
+ /* Create parent user namespace */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int p_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (p_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+ close(p_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(p_fd);
+
+ /* Create first network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ int n1_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n1_fd < 0) {
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ if (ioctl(n1_fd, NS_GET_ID, &n1_id) < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ /* Keep n1_fd open so first namespace stays active */
+
+ /* Create second network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ int n2_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n2_fd < 0) {
+ close(n1_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ if (ioctl(n2_fd, NS_GET_ID, &n2_id) < 0) {
+ close(n1_fd);
+ close(n2_fd);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+ /* Keep both n1_fd and n2_fd open */
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &p_id, sizeof(p_id));
+ write(pipefd[1], &n1_id, sizeof(n1_id));
+ write(pipefd[1], &n2_id, sizeof(n2_id));
+ close(pipefd[1]);
+
+ /* Wait for parent to signal before exiting */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &p_id, sizeof(p_id));
+ if (ret != sizeof(p_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read parent namespace ID");
+ }
+
+ ret = read(pipefd[0], &n1_id, sizeof(n1_id));
+ if (ret != sizeof(n1_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read first network namespace ID");
+ }
+
+ ret = read(pipefd[0], &n2_id, sizeof(n2_id));
+ close(pipefd[0]);
+ if (ret != sizeof(n2_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read second network namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ parent_handle = (struct file_handle *)p_buf;
+ parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ parent_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+ p_fh->ns_id = p_id;
+ p_fh->ns_type = 0;
+ p_fh->ns_inum = 0;
+
+ net1_handle = (struct file_handle *)n1_buf;
+ net1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net1_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n1_fh = (struct nsfs_file_handle *)net1_handle->f_handle;
+ n1_fh->ns_id = n1_id;
+ n1_fh->ns_type = 0;
+ n1_fh->ns_inum = 0;
+
+ net2_handle = (struct file_handle *)n2_buf;
+ net2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net2_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n2_fh = (struct nsfs_file_handle *)net2_handle->f_handle;
+ n2_fh->ns_id = n2_id;
+ n2_fh->ns_type = 0;
+ n2_fh->ns_inum = 0;
+
+ /* Open both net namespaces while child is still alive */
+ int n1_fd = open_by_handle_at(FD_NSFS_ROOT, net1_handle, O_RDONLY);
+ int n2_fd = open_by_handle_at(FD_NSFS_ROOT, net2_handle, O_RDONLY);
+ if (n1_fd < 0 || n2_fd < 0) {
+ if (n1_fd >= 0) close(n1_fd);
+ if (n2_fd >= 0) close(n2_fd);
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open net namespaces");
+ }
+
+ /* Signal child that we have opened the namespaces */
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Parent should be active (has 2 active children) */
+ TH_LOG("Both net namespaces active - parent should be active");
+ int p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close first net namespace - parent should STILL be active */
+ TH_LOG("Closing first net ns - parent should still be active");
+ close(n1_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_GE(p_fd, 0);
+ close(p_fd);
+
+ /* Close second net namespace - parent should become inactive */
+ TH_LOG("Closing second net ns - parent should become inactive");
+ close(n2_fd);
+ p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+ ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that user namespace as a child also propagates correctly.
+ * Create user_A -> user_B, verify when user_B is active that user_A
+ * is also active. This is different from non-user namespace children.
+ */
+TEST(ns_userns_child_propagation)
+{
+ struct file_handle *ua_handle, *ub_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 ua_id, ub_id;
+ char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+ char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ /* Create user_A */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ua_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+ close(ua_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ua_fd);
+
+ /* Create user_B (child of user_A) */
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (ub_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+ close(ub_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ub_fd);
+
+ /* Send both namespace IDs */
+ write(pipefd[1], &ua_id, sizeof(ua_id));
+ write(pipefd[1], &ub_id, sizeof(ub_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read both namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+ if (ret != sizeof(ua_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_A namespace ID");
+ }
+
+ ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ub_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user_B namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ ua_handle = (struct file_handle *)ua_buf;
+ ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ua_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+ ua_fh->ns_id = ua_id;
+ ua_fh->ns_type = 0;
+ ua_fh->ns_inum = 0;
+
+ ub_handle = (struct file_handle *)ub_buf;
+ ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ ub_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+ ub_fh->ns_id = ub_id;
+ ub_fh->ns_type = 0;
+ ub_fh->ns_inum = 0;
+
+ /* Open user_B before child exits */
+ int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+ if (ub_fd < 0) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open user_B");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* With user_B active, user_A should also be active */
+ TH_LOG("Testing user_A active when child user_B is active");
+ int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd, 0);
+
+ /* Close user_B */
+ TH_LOG("Closing user_B");
+ close(ub_fd);
+
+ /* user_A should remain active (we hold direct ref) */
+ int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_GE(ua_fd2, 0);
+ close(ua_fd2);
+
+ /* Close user_A - should become inactive */
+ TH_LOG("Closing user_A - should become inactive");
+ close(ua_fd);
+
+ ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+ ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test different namespace types (net, uts, ipc) all contributing
+ * active references to the same owning user namespace.
+ */
+TEST(ns_mixed_types_same_owner)
+{
+ struct file_handle *user_handle, *net_handle, *uts_handle;
+ int ret, pipefd[2];
+ pid_t pid;
+ int status;
+ __u64 u_id, n_id, ut_id;
+ char u_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+ char n_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+ char ut_buf[sizeof(*uts_handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(pipefd[0]);
+
+ if (setup_userns() < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int u_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (u_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+ close(u_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(u_fd);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int n_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (n_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+ close(n_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(n_fd);
+
+ if (unshare(CLONE_NEWUTS) < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+
+ int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+ if (ut_fd < 0) {
+ close(pipefd[1]);
+ exit(1);
+ }
+ if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+ close(ut_fd);
+ close(pipefd[1]);
+ exit(1);
+ }
+ close(ut_fd);
+
+ /* Send all namespace IDs */
+ write(pipefd[1], &u_id, sizeof(u_id));
+ write(pipefd[1], &n_id, sizeof(n_id));
+ write(pipefd[1], &ut_id, sizeof(ut_id));
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ close(pipefd[1]);
+
+ /* Read all three namespace IDs - fixed size, no parsing needed */
+ ret = read(pipefd[0], &u_id, sizeof(u_id));
+ if (ret != sizeof(u_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID");
+ }
+
+ ret = read(pipefd[0], &n_id, sizeof(n_id));
+ if (ret != sizeof(n_id)) {
+ close(pipefd[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID");
+ }
+
+ ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+ close(pipefd[0]);
+ if (ret != sizeof(ut_id)) {
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read UTS namespace ID");
+ }
+
+ /* Construct file handles from namespace IDs */
+ user_handle = (struct file_handle *)u_buf;
+ user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ user_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+ u_fh->ns_id = u_id;
+ u_fh->ns_type = 0;
+ u_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)n_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ n_fh->ns_id = n_id;
+ n_fh->ns_type = 0;
+ n_fh->ns_inum = 0;
+
+ uts_handle = (struct file_handle *)ut_buf;
+ uts_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ uts_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)uts_handle->f_handle;
+ ut_fh->ns_id = ut_id;
+ ut_fh->ns_type = 0;
+ ut_fh->ns_inum = 0;
+
+ /* Open both non-user namespaces */
+ int n_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ int ut_fd = open_by_handle_at(FD_NSFS_ROOT, uts_handle, O_RDONLY);
+ if (n_fd < 0 || ut_fd < 0) {
+ if (n_fd >= 0) close(n_fd);
+ if (ut_fd >= 0) close(ut_fd);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to open namespaces");
+ }
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* User namespace should be active (2 active children) */
+ TH_LOG("Both net and uts active - user ns should be active");
+ int u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close net - user ns should STILL be active (uts still active) */
+ TH_LOG("Closing net - user ns should still be active");
+ close(n_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(u_fd, 0);
+ close(u_fd);
+
+ /* Close uts - user ns should become inactive */
+ TH_LOG("Closing uts - user ns should become inactive");
+ close(ut_fd);
+ u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_LT(u_fd, 0);
+}
+
+/* Thread test helpers and structures */
+struct thread_ns_info {
+ __u64 ns_id;
+ int pipefd;
+ int syncfd_read;
+ int syncfd_write;
+ int exit_code;
+};
+
+static void *thread_create_namespace(void *arg)
+{
+ struct thread_ns_info *info = (struct thread_ns_info *)arg;
+ int ret;
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ info->exit_code = 1;
+ return NULL;
+ }
+
+ /* Get namespace ID */
+ int fd = open("/proc/thread-self/ns/net", O_RDONLY);
+ if (fd < 0) {
+ info->exit_code = 2;
+ return NULL;
+ }
+
+ ret = ioctl(fd, NS_GET_ID, &info->ns_id);
+ close(fd);
+ if (ret < 0) {
+ info->exit_code = 3;
+ return NULL;
+ }
+
+ /* Send namespace ID to main thread */
+ if (write(info->pipefd, &info->ns_id, sizeof(info->ns_id)) != sizeof(info->ns_id)) {
+ info->exit_code = 4;
+ return NULL;
+ }
+
+ /* Wait for signal to exit */
+ char sync_byte;
+ if (read(info->syncfd_read, &sync_byte, 1) != 1) {
+ info->exit_code = 5;
+ return NULL;
+ }
+
+ info->exit_code = 0;
+ return NULL;
+}
+
+/*
+ * Test that namespace becomes inactive after thread exits.
+ * This verifies active reference counting works with threads, not just processes.
+ */
+TEST(thread_ns_inactive_after_exit)
+{
+ pthread_t thread;
+ struct thread_ns_info info;
+ struct file_handle *handle;
+ int pipefd[2];
+ int syncpipe[2];
+ int ret;
+ char sync_byte;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ info.pipefd = pipefd[1];
+ info.syncfd_read = syncpipe[0];
+ info.syncfd_write = -1;
+ info.exit_code = -1;
+
+ /* Create thread that will create a namespace */
+ ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+ ASSERT_EQ(ret, 0);
+
+ /* Read namespace ID from thread */
+ __u64 ns_id;
+ ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+ if (ret != sizeof(ns_id)) {
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+ SKIP(return, "Failed to read namespace ID from thread");
+ }
+
+ TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+ /* Construct file handle */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+ fh->ns_id = ns_id;
+ fh->ns_type = 0;
+ fh->ns_inum = 0;
+
+ /* Namespace should be active while thread is alive */
+ TH_LOG("Attempting to open namespace while thread is alive (should succeed)");
+ int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd, 0);
+ close(nsfd);
+
+ /* Signal thread to exit */
+ TH_LOG("Signaling thread to exit");
+ sync_byte = 'X';
+ ASSERT_EQ(write(syncpipe[1], &sync_byte, 1), 1);
+ close(syncpipe[1]);
+
+ /* Wait for thread to exit */
+ ASSERT_EQ(pthread_join(thread, NULL), 0);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ if (info.exit_code != 0)
+ SKIP(return, "Thread failed to create namespace");
+
+ TH_LOG("Thread exited, namespace should be inactive");
+
+ /* Namespace should now be inactive */
+ nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(nsfd, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a thread holds an fd to it.
+ * Even after the thread exits, the namespace should remain active as long as
+ * another thread holds a file descriptor to it.
+ */
+TEST(thread_ns_fd_keeps_active)
+{
+ pthread_t thread;
+ struct thread_ns_info info;
+ struct file_handle *handle;
+ int pipefd[2];
+ int syncpipe[2];
+ int ret;
+ char sync_byte;
+ char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ info.pipefd = pipefd[1];
+ info.syncfd_read = syncpipe[0];
+ info.syncfd_write = -1;
+ info.exit_code = -1;
+
+ /* Create thread that will create a namespace */
+ ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+ ASSERT_EQ(ret, 0);
+
+ /* Read namespace ID from thread */
+ __u64 ns_id;
+ ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+ if (ret != sizeof(ns_id)) {
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+ close(syncpipe[1]);
+ SKIP(return, "Failed to read namespace ID from thread");
+ }
+
+ TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+ /* Construct file handle */
+ handle = (struct file_handle *)buf;
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+ fh->ns_id = ns_id;
+ fh->ns_type = 0;
+ fh->ns_inum = 0;
+
+ /* Open namespace while thread is alive */
+ TH_LOG("Opening namespace while thread is alive");
+ int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd, 0);
+
+ /* Signal thread to exit */
+ TH_LOG("Signaling thread to exit");
+ sync_byte = 'X';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ /* Wait for thread to exit */
+ pthread_join(thread, NULL);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(syncpipe[0]);
+
+ if (info.exit_code != 0) {
+ close(nsfd);
+ SKIP(return, "Thread failed to create namespace");
+ }
+
+ TH_LOG("Thread exited, but main thread holds fd - namespace should remain active");
+
+ /* Namespace should still be active because we hold an fd */
+ int nsfd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_GE(nsfd2, 0);
+
+ /* Verify it's the same namespace */
+ struct stat st1, st2;
+ ASSERT_EQ(fstat(nsfd, &st1), 0);
+ ASSERT_EQ(fstat(nsfd2, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ close(nsfd2);
+
+ TH_LOG("Closing fd - namespace should become inactive");
+ close(nsfd);
+
+ /* Now namespace should be inactive */
+ nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ ASSERT_LT(nsfd, 0);
+ /* Should fail with ENOENT (inactive) or ESTALE (gone) */
+ TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/* Structure for thread data in subprocess */
+struct thread_sleep_data {
+ int syncfd_read;
+};
+
+static void *thread_sleep_and_wait(void *arg)
+{
+ struct thread_sleep_data *data = (struct thread_sleep_data *)arg;
+ char sync_byte;
+
+ /* Wait for signal to exit - read will unblock when pipe is closed */
+ (void)read(data->syncfd_read, &sync_byte, 1);
+ return NULL;
+}
+
+/*
+ * Test that namespaces become inactive after subprocess with multiple threads exits.
+ * Create a subprocess that unshares user and network namespaces, then creates two
+ * threads that share those namespaces. Verify that after all threads and subprocess
+ * exit, the namespaces are no longer listed by listns() and cannot be opened by
+ * open_by_handle_at().
+ */
+TEST(thread_subprocess_ns_inactive_after_all_exit)
+{
+ int pipefd[2];
+ int sv[2];
+ pid_t pid;
+ int status;
+ __u64 user_id, net_id;
+ struct file_handle *user_handle, *net_handle;
+ char user_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+ char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+ char sync_byte;
+ int ret;
+
+ ASSERT_EQ(pipe(pipefd), 0);
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+ close(sv[0]);
+
+ /* Create user namespace with mappings */
+ if (setup_userns() < 0) {
+ fprintf(stderr, "Child: setup_userns() failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: setup_userns() succeeded\n");
+
+ /* Get user namespace ID */
+ int user_fd = open("/proc/self/ns/user", O_RDONLY);
+ if (user_fd < 0) {
+ fprintf(stderr, "Child: open(/proc/self/ns/user) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(user_fd, NS_GET_ID, &user_id) < 0) {
+ fprintf(stderr, "Child: ioctl(NS_GET_ID) for user ns failed: %s\n", strerror(errno));
+ close(user_fd);
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ close(user_fd);
+ fprintf(stderr, "Child: user ns ID = %llu\n", (unsigned long long)user_id);
+
+ /* Unshare network namespace */
+ if (unshare(CLONE_NEWNET) < 0) {
+ fprintf(stderr, "Child: unshare(CLONE_NEWNET) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: unshare(CLONE_NEWNET) succeeded\n");
+
+ /* Get network namespace ID */
+ int net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (net_fd < 0) {
+ fprintf(stderr, "Child: open(/proc/self/ns/net) failed: %s\n", strerror(errno));
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+
+ if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+ fprintf(stderr, "Child: ioctl(NS_GET_ID) for net ns failed: %s\n", strerror(errno));
+ close(net_fd);
+ close(pipefd[1]);
+ close(sv[1]);
+ exit(1);
+ }
+ close(net_fd);
+ fprintf(stderr, "Child: net ns ID = %llu\n", (unsigned long long)net_id);
+
+ /* Send namespace IDs to parent */
+ if (write(pipefd[1], &user_id, sizeof(user_id)) != sizeof(user_id)) {
+ fprintf(stderr, "Child: write(user_id) failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (write(pipefd[1], &net_id, sizeof(net_id)) != sizeof(net_id)) {
+ fprintf(stderr, "Child: write(net_id) failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ close(pipefd[1]);
+ fprintf(stderr, "Child: sent namespace IDs to parent\n");
+
+ /* Create two threads that share the namespaces */
+ pthread_t thread1, thread2;
+ struct thread_sleep_data data;
+ data.syncfd_read = sv[1];
+
+ int ret_thread = pthread_create(&thread1, NULL, thread_sleep_and_wait, &data);
+ if (ret_thread != 0) {
+ fprintf(stderr, "Child: pthread_create(thread1) failed: %s\n", strerror(ret_thread));
+ close(sv[1]);
+ exit(1);
+ }
+ fprintf(stderr, "Child: created thread1\n");
+
+ ret_thread = pthread_create(&thread2, NULL, thread_sleep_and_wait, &data);
+ if (ret_thread != 0) {
+ fprintf(stderr, "Child: pthread_create(thread2) failed: %s\n", strerror(ret_thread));
+ close(sv[1]);
+ pthread_cancel(thread1);
+ exit(1);
+ }
+ fprintf(stderr, "Child: created thread2\n");
+
+ /* Wait for threads to complete - they will unblock when parent writes */
+ fprintf(stderr, "Child: waiting for threads to exit\n");
+ pthread_join(thread1, NULL);
+ fprintf(stderr, "Child: thread1 exited\n");
+ pthread_join(thread2, NULL);
+ fprintf(stderr, "Child: thread2 exited\n");
+
+ close(sv[1]);
+
+ /* Exit - namespaces should become inactive */
+ fprintf(stderr, "Child: all threads joined, exiting with success\n");
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ close(sv[1]);
+
+ TH_LOG("Parent: waiting to read namespace IDs from child");
+
+ /* Read namespace IDs from child */
+ ret = read(pipefd[0], &user_id, sizeof(user_id));
+ if (ret != sizeof(user_id)) {
+ TH_LOG("Parent: failed to read user_id, ret=%d, errno=%s", ret, strerror(errno));
+ close(pipefd[0]);
+ sync_byte = 'X';
+ (void)write(sv[0], &sync_byte, 1);
+ close(sv[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read user namespace ID from child");
+ }
+
+ ret = read(pipefd[0], &net_id, sizeof(net_id));
+ close(pipefd[0]);
+ if (ret != sizeof(net_id)) {
+ TH_LOG("Parent: failed to read net_id, ret=%d, errno=%s", ret, strerror(errno));
+ sync_byte = 'X';
+ (void)write(sv[0], &sync_byte, 1);
+ close(sv[0]);
+ waitpid(pid, NULL, 0);
+ SKIP(return, "Failed to read network namespace ID from child");
+ }
+
+ TH_LOG("Child created user ns %llu and net ns %llu with 2 threads",
+ (unsigned long long)user_id, (unsigned long long)net_id);
+
+ /* Construct file handles */
+ user_handle = (struct file_handle *)user_buf;
+ user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ user_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *user_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+ user_fh->ns_id = user_id;
+ user_fh->ns_type = 0;
+ user_fh->ns_inum = 0;
+
+ net_handle = (struct file_handle *)net_buf;
+ net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ net_handle->handle_type = FILEID_NSFS;
+ struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+ net_fh->ns_id = net_id;
+ net_fh->ns_type = 0;
+ net_fh->ns_inum = 0;
+
+ /* Verify namespaces are active while subprocess and threads are alive */
+ TH_LOG("Verifying namespaces are active while subprocess with threads is running");
+ int user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_GE(user_fd, 0);
+
+ int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ ASSERT_GE(net_fd, 0);
+
+ close(user_fd);
+ close(net_fd);
+
+ /* Also verify they appear in listns() */
+ TH_LOG("Verifying namespaces appear in listns() while active");
+ struct ns_id_req req = {
+ .size = sizeof(struct ns_id_req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWUSER,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids < 0) {
+ TH_LOG("listns() not available, skipping listns verification");
+ } else {
+ /* Check if user_id is in the list */
+ int found_user = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == user_id) {
+ found_user = 1;
+ break;
+ }
+ }
+ ASSERT_TRUE(found_user);
+ TH_LOG("User namespace found in listns() as expected");
+
+ /* Check network namespace */
+ req.ns_type = CLONE_NEWNET;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_net = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == net_id) {
+ found_net = 1;
+ break;
+ }
+ }
+ ASSERT_TRUE(found_net);
+ TH_LOG("Network namespace found in listns() as expected");
+ }
+ }
+
+ /* Signal threads to exit */
+ TH_LOG("Signaling threads to exit");
+ sync_byte = 'X';
+ /* Write two bytes - one for each thread */
+ ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+ ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+ close(sv[0]);
+
+ /* Wait for child process to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ if (WEXITSTATUS(status) != 0) {
+ TH_LOG("Child process failed with exit code %d", WEXITSTATUS(status));
+ SKIP(return, "Child process failed");
+ }
+
+ TH_LOG("Subprocess and all threads have exited successfully");
+
+ /* Verify namespaces are now inactive - open_by_handle_at should fail */
+ TH_LOG("Verifying namespaces are inactive after subprocess and threads exit");
+ user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+ ASSERT_LT(user_fd, 0);
+ TH_LOG("User namespace inactive as expected: %s (errno=%d)",
+ strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+ net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+ ASSERT_LT(net_fd, 0);
+ TH_LOG("Network namespace inactive as expected: %s (errno=%d)",
+ strerror(errno), errno);
+ ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+ /* Verify namespaces do NOT appear in listns() */
+ TH_LOG("Verifying namespaces do NOT appear in listns() when inactive");
+ memset(&req, 0, sizeof(req));
+ req.size = sizeof(struct ns_id_req);
+ req.ns_type = CLONE_NEWUSER;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_user = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == user_id) {
+ found_user = 1;
+ break;
+ }
+ }
+ ASSERT_FALSE(found_user);
+ TH_LOG("User namespace correctly not listed in listns()");
+
+ /* Check network namespace */
+ req.ns_type = CLONE_NEWNET;
+ nr_ids = sys_listns(&req, ns_ids, 256, 0);
+ if (nr_ids >= 0) {
+ int found_net = 0;
+ for (int i = 0; i < nr_ids; i++) {
+ if (ns_ids[i] == net_id) {
+ found_net = 1;
+ break;
+ }
+ }
+ ASSERT_FALSE(found_net);
+ TH_LOG("Network namespace correctly not listed in listns()");
+ }
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
new file mode 100644
index 000000000000..b4a14c6693a5
--- /dev/null
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/nsfs.h>
+#include "kselftest_harness.h"
+
+/* Fixture for tests that create child processes */
+FIXTURE(nsid) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(nsid) {
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(nsid) {
+ /* Clean up any child process that may still be running */
+ if (self->child_pid > 0) {
+ kill(self->child_pid, SIGKILL);
+ waitpid(self->child_pid, NULL, 0);
+ }
+}
+
+TEST(nsid_mntns_basic)
+{
+ __u64 mnt_ns_id = 0;
+ int fd_mntns;
+ int ret;
+
+ /* Open the current mount namespace */
+ fd_mntns = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(fd_mntns, 0);
+
+ /* Get the mount namespace ID */
+ ret = ioctl(fd_mntns, NS_GET_MNTNS_ID, &mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(mnt_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 mnt_ns_id2 = 0;
+ ret = ioctl(fd_mntns, NS_GET_ID, &mnt_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mnt_ns_id, mnt_ns_id2);
+
+ close(fd_mntns);
+}
+
+TEST_F(nsid, mntns_separate)
+{
+ __u64 parent_mnt_ns_id = 0;
+ __u64 child_mnt_ns_id = 0;
+ int fd_parent_mntns, fd_child_mntns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's mount namespace ID */
+ fd_parent_mntns = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(fd_parent_mntns, 0);
+ ret = ioctl(fd_parent_mntns, NS_GET_ID, &parent_mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_mnt_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new mount namespace */
+ ret = unshare(CLONE_NEWNS);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_mntns);
+ SKIP(return, "No permission to create mount namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's mount namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+ fd_child_mntns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_mntns, 0);
+
+ /* Get child's mount namespace ID */
+ ret = ioctl(fd_child_mntns, NS_GET_ID, &child_mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_mnt_ns_id, 0);
+
+ /* Parent and child should have different mount namespace IDs */
+ ASSERT_NE(parent_mnt_ns_id, child_mnt_ns_id);
+
+ close(fd_parent_mntns);
+ close(fd_child_mntns);
+}
+
+TEST(nsid_cgroupns_basic)
+{
+ __u64 cgroup_ns_id = 0;
+ int fd_cgroupns;
+ int ret;
+
+ /* Open the current cgroup namespace */
+ fd_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY);
+ ASSERT_GE(fd_cgroupns, 0);
+
+ /* Get the cgroup namespace ID */
+ ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(cgroup_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 cgroup_ns_id2 = 0;
+ ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(cgroup_ns_id, cgroup_ns_id2);
+
+ close(fd_cgroupns);
+}
+
+TEST_F(nsid, cgroupns_separate)
+{
+ __u64 parent_cgroup_ns_id = 0;
+ __u64 child_cgroup_ns_id = 0;
+ int fd_parent_cgroupns, fd_child_cgroupns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's cgroup namespace ID */
+ fd_parent_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY);
+ ASSERT_GE(fd_parent_cgroupns, 0);
+ ret = ioctl(fd_parent_cgroupns, NS_GET_ID, &parent_cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_cgroup_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new cgroup namespace */
+ ret = unshare(CLONE_NEWCGROUP);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_cgroupns);
+ SKIP(return, "No permission to create cgroup namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's cgroup namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/cgroup", pid);
+ fd_child_cgroupns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_cgroupns, 0);
+
+ /* Get child's cgroup namespace ID */
+ ret = ioctl(fd_child_cgroupns, NS_GET_ID, &child_cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_cgroup_ns_id, 0);
+
+ /* Parent and child should have different cgroup namespace IDs */
+ ASSERT_NE(parent_cgroup_ns_id, child_cgroup_ns_id);
+
+ close(fd_parent_cgroupns);
+ close(fd_child_cgroupns);
+}
+
+TEST(nsid_ipcns_basic)
+{
+ __u64 ipc_ns_id = 0;
+ int fd_ipcns;
+ int ret;
+
+ /* Open the current IPC namespace */
+ fd_ipcns = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(fd_ipcns, 0);
+
+ /* Get the IPC namespace ID */
+ ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(ipc_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 ipc_ns_id2 = 0;
+ ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(ipc_ns_id, ipc_ns_id2);
+
+ close(fd_ipcns);
+}
+
+TEST_F(nsid, ipcns_separate)
+{
+ __u64 parent_ipc_ns_id = 0;
+ __u64 child_ipc_ns_id = 0;
+ int fd_parent_ipcns, fd_child_ipcns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's IPC namespace ID */
+ fd_parent_ipcns = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(fd_parent_ipcns, 0);
+ ret = ioctl(fd_parent_ipcns, NS_GET_ID, &parent_ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_ipc_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new IPC namespace */
+ ret = unshare(CLONE_NEWIPC);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_ipcns);
+ SKIP(return, "No permission to create IPC namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's IPC namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/ipc", pid);
+ fd_child_ipcns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_ipcns, 0);
+
+ /* Get child's IPC namespace ID */
+ ret = ioctl(fd_child_ipcns, NS_GET_ID, &child_ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_ipc_ns_id, 0);
+
+ /* Parent and child should have different IPC namespace IDs */
+ ASSERT_NE(parent_ipc_ns_id, child_ipc_ns_id);
+
+ close(fd_parent_ipcns);
+ close(fd_child_ipcns);
+}
+
+TEST(nsid_utsns_basic)
+{
+ __u64 uts_ns_id = 0;
+ int fd_utsns;
+ int ret;
+
+ /* Open the current UTS namespace */
+ fd_utsns = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(fd_utsns, 0);
+
+ /* Get the UTS namespace ID */
+ ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(uts_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 uts_ns_id2 = 0;
+ ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(uts_ns_id, uts_ns_id2);
+
+ close(fd_utsns);
+}
+
+TEST_F(nsid, utsns_separate)
+{
+ __u64 parent_uts_ns_id = 0;
+ __u64 child_uts_ns_id = 0;
+ int fd_parent_utsns, fd_child_utsns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's UTS namespace ID */
+ fd_parent_utsns = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(fd_parent_utsns, 0);
+ ret = ioctl(fd_parent_utsns, NS_GET_ID, &parent_uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_uts_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new UTS namespace */
+ ret = unshare(CLONE_NEWUTS);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_utsns);
+ SKIP(return, "No permission to create UTS namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's UTS namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+ fd_child_utsns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_utsns, 0);
+
+ /* Get child's UTS namespace ID */
+ ret = ioctl(fd_child_utsns, NS_GET_ID, &child_uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_uts_ns_id, 0);
+
+ /* Parent and child should have different UTS namespace IDs */
+ ASSERT_NE(parent_uts_ns_id, child_uts_ns_id);
+
+ close(fd_parent_utsns);
+ close(fd_child_utsns);
+}
+
+TEST(nsid_userns_basic)
+{
+ __u64 user_ns_id = 0;
+ int fd_userns;
+ int ret;
+
+ /* Open the current user namespace */
+ fd_userns = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(fd_userns, 0);
+
+ /* Get the user namespace ID */
+ ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(user_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 user_ns_id2 = 0;
+ ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(user_ns_id, user_ns_id2);
+
+ close(fd_userns);
+}
+
+TEST_F(nsid, userns_separate)
+{
+ __u64 parent_user_ns_id = 0;
+ __u64 child_user_ns_id = 0;
+ int fd_parent_userns, fd_child_userns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's user namespace ID */
+ fd_parent_userns = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(fd_parent_userns, 0);
+ ret = ioctl(fd_parent_userns, NS_GET_ID, &parent_user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_user_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_userns);
+ SKIP(return, "No permission to create user namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's user namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_child_userns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_userns, 0);
+
+ /* Get child's user namespace ID */
+ ret = ioctl(fd_child_userns, NS_GET_ID, &child_user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_user_ns_id, 0);
+
+ /* Parent and child should have different user namespace IDs */
+ ASSERT_NE(parent_user_ns_id, child_user_ns_id);
+
+ close(fd_parent_userns);
+ close(fd_child_userns);
+}
+
+TEST(nsid_timens_basic)
+{
+ __u64 time_ns_id = 0;
+ int fd_timens;
+ int ret;
+
+ /* Open the current time namespace */
+ fd_timens = open("/proc/self/ns/time", O_RDONLY);
+ if (fd_timens < 0) {
+ SKIP(return, "Time namespaces not supported");
+ }
+
+ /* Get the time namespace ID */
+ ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(time_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 time_ns_id2 = 0;
+ ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(time_ns_id, time_ns_id2);
+
+ close(fd_timens);
+}
+
+TEST_F(nsid, timens_separate)
+{
+ __u64 parent_time_ns_id = 0;
+ __u64 child_time_ns_id = 0;
+ int fd_parent_timens, fd_child_timens;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Open the current time namespace */
+ fd_parent_timens = open("/proc/self/ns/time", O_RDONLY);
+ if (fd_parent_timens < 0) {
+ SKIP(return, "Time namespaces not supported");
+ }
+
+ /* Get parent's time namespace ID */
+ ret = ioctl(fd_parent_timens, NS_GET_ID, &parent_time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_time_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new time namespace */
+ ret = unshare(CLONE_NEWTIME);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES || errno == EINVAL) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Fork a grandchild to actually enter the new namespace */
+ pid_t grandchild = fork();
+ if (grandchild == 0) {
+ /* Grandchild is in the new namespace */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+ pause();
+ _exit(0);
+ } else if (grandchild > 0) {
+ /* Child writes grandchild PID and waits */
+ write(pipefd[1], "Y", 1);
+ write(pipefd[1], &grandchild, sizeof(grandchild));
+ close(pipefd[1]);
+ pause(); /* Keep the parent alive to maintain the grandchild */
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_timens);
+ close(pipefd[0]);
+ SKIP(return, "Cannot create time namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ pid_t grandchild_pid;
+ ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ close(pipefd[0]);
+
+ /* Open grandchild's time namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/time", grandchild_pid);
+ fd_child_timens = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_timens, 0);
+
+ /* Get child's time namespace ID */
+ ret = ioctl(fd_child_timens, NS_GET_ID, &child_time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_time_ns_id, 0);
+
+ /* Parent and child should have different time namespace IDs */
+ ASSERT_NE(parent_time_ns_id, child_time_ns_id);
+
+ close(fd_parent_timens);
+ close(fd_child_timens);
+}
+
+TEST(nsid_pidns_basic)
+{
+ __u64 pid_ns_id = 0;
+ int fd_pidns;
+ int ret;
+
+ /* Open the current PID namespace */
+ fd_pidns = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(fd_pidns, 0);
+
+ /* Get the PID namespace ID */
+ ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(pid_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 pid_ns_id2 = 0;
+ ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(pid_ns_id, pid_ns_id2);
+
+ close(fd_pidns);
+}
+
+TEST_F(nsid, pidns_separate)
+{
+ __u64 parent_pid_ns_id = 0;
+ __u64 child_pid_ns_id = 0;
+ int fd_parent_pidns, fd_child_pidns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's PID namespace ID */
+ fd_parent_pidns = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(fd_parent_pidns, 0);
+ ret = ioctl(fd_parent_pidns, NS_GET_ID, &parent_pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_pid_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new PID namespace */
+ ret = unshare(CLONE_NEWPID);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Fork a grandchild to actually enter the new namespace */
+ pid_t grandchild = fork();
+ if (grandchild == 0) {
+ /* Grandchild is in the new namespace */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+ pause();
+ _exit(0);
+ } else if (grandchild > 0) {
+ /* Child writes grandchild PID and waits */
+ write(pipefd[1], "Y", 1);
+ write(pipefd[1], &grandchild, sizeof(grandchild));
+ close(pipefd[1]);
+ pause(); /* Keep the parent alive to maintain the grandchild */
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_pidns);
+ close(pipefd[0]);
+ SKIP(return, "No permission to create PID namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ pid_t grandchild_pid;
+ ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ close(pipefd[0]);
+
+ /* Open grandchild's PID namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/pid", grandchild_pid);
+ fd_child_pidns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_pidns, 0);
+
+ /* Get child's PID namespace ID */
+ ret = ioctl(fd_child_pidns, NS_GET_ID, &child_pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_pid_ns_id, 0);
+
+ /* Parent and child should have different PID namespace IDs */
+ ASSERT_NE(parent_pid_ns_id, child_pid_ns_id);
+
+ close(fd_parent_pidns);
+ close(fd_child_pidns);
+}
+
+TEST(nsid_netns_basic)
+{
+ __u64 net_ns_id = 0;
+ __u64 netns_cookie = 0;
+ int fd_netns;
+ int sock;
+ socklen_t optlen;
+ int ret;
+
+ /* Open the current network namespace */
+ fd_netns = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(fd_netns, 0);
+
+ /* Get the network namespace ID via ioctl */
+ ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(net_ns_id, 0);
+
+ /* Create a socket to get the SO_NETNS_COOKIE */
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(sock, 0);
+
+ /* Get the network namespace cookie via socket option */
+ optlen = sizeof(netns_cookie);
+ ret = getsockopt(sock, SOL_SOCKET, SO_NETNS_COOKIE, &netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(optlen, sizeof(netns_cookie));
+
+ /* The namespace ID and cookie should be identical */
+ ASSERT_EQ(net_ns_id, netns_cookie);
+
+ /* Verify we can get the same ID again */
+ __u64 net_ns_id2 = 0;
+ ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(net_ns_id, net_ns_id2);
+
+ close(sock);
+ close(fd_netns);
+}
+
+TEST_F(nsid, netns_separate)
+{
+ __u64 parent_net_ns_id = 0;
+ __u64 parent_netns_cookie = 0;
+ __u64 child_net_ns_id = 0;
+ __u64 child_netns_cookie = 0;
+ int fd_parent_netns, fd_child_netns;
+ int parent_sock, child_sock;
+ socklen_t optlen;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's network namespace ID */
+ fd_parent_netns = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(fd_parent_netns, 0);
+ ret = ioctl(fd_parent_netns, NS_GET_ID, &parent_net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_net_ns_id, 0);
+
+ /* Get parent's network namespace cookie */
+ parent_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(parent_sock, 0);
+ optlen = sizeof(parent_netns_cookie);
+ ret = getsockopt(parent_sock, SOL_SOCKET, SO_NETNS_COOKIE, &parent_netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify parent's ID and cookie match */
+ ASSERT_EQ(parent_net_ns_id, parent_netns_cookie);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ close(fd_parent_netns);
+ close(parent_sock);
+ SKIP(return, "No permission to create network namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's network namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/net", pid);
+ fd_child_netns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_netns, 0);
+
+ /* Get child's network namespace ID */
+ ret = ioctl(fd_child_netns, NS_GET_ID, &child_net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_net_ns_id, 0);
+
+ /* Create socket in child's namespace to get cookie */
+ ret = setns(fd_child_netns, CLONE_NEWNET);
+ if (ret == 0) {
+ child_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(child_sock, 0);
+
+ optlen = sizeof(child_netns_cookie);
+ ret = getsockopt(child_sock, SOL_SOCKET, SO_NETNS_COOKIE, &child_netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify child's ID and cookie match */
+ ASSERT_EQ(child_net_ns_id, child_netns_cookie);
+
+ close(child_sock);
+
+ /* Return to parent namespace */
+ setns(fd_parent_netns, CLONE_NEWNET);
+ }
+
+ /* Parent and child should have different network namespace IDs */
+ ASSERT_NE(parent_net_ns_id, child_net_ns_id);
+ if (child_netns_cookie != 0) {
+ ASSERT_NE(parent_netns_cookie, child_netns_cookie);
+ }
+
+ close(fd_parent_netns);
+ close(fd_child_netns);
+ close(parent_sock);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
new file mode 100644
index 000000000000..753fd29dffd8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "../pidfd/pidfd.h"
+#include "../kselftest_harness.h"
+
+/*
+ * Regression tests for the setns(pidfd) active reference counting bug.
+ *
+ * These tests are based on the reproducers that triggered the race condition
+ * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly").
+ *
+ * The bug: When using setns() with a pidfd, if the target task exits between
+ * prepare_nsset() and commit_nsset(), the namespaces would become inactive.
+ * Then ns_ref_active_get() would increment from 0 without properly resurrecting
+ * the owner chain, causing active reference count underflows.
+ */
+
+/*
+ * Simple pidfd setns test using create_child()+unshare().
+ *
+ * Without the fix, this would trigger active refcount warnings when the
+ * parent exits after doing setns(pidfd) on a child that has already exited.
+ */
+TEST(simple_pidfd_setns)
+{
+ pid_t child_pid;
+ int pidfd = -1;
+ int ret;
+ int sv[2];
+ char c;
+
+ /* Ignore SIGCHLD for autoreap */
+ ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+ /* Create a child process without namespaces initially */
+ child_pid = create_child(&pidfd, 0);
+ ASSERT_GE(child_pid, 0);
+
+ if (child_pid == 0) {
+ close(sv[0]);
+
+ if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) {
+ close(sv[1]);
+ _exit(1);
+ }
+
+ /* Signal parent that namespaces are ready */
+ if (write_nointr(sv[1], "1", 1) < 0) {
+ close(sv[1]);
+ _exit(1);
+ }
+
+ close(sv[1]);
+ _exit(0);
+ }
+ ASSERT_GE(pidfd, 0);
+ EXPECT_EQ(close(sv[1]), 0);
+
+ ret = read_nointr(sv[0], &c, 1);
+ ASSERT_EQ(ret, 1);
+ EXPECT_EQ(close(sv[0]), 0);
+
+ /* Set to child's namespaces via pidfd */
+ ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+ TH_LOG("setns() returned %d", ret);
+ close(pidfd);
+}
+
+/*
+ * Simple pidfd setns test using create_child().
+ *
+ * This variation uses create_child() with namespace flags directly.
+ * Namespaces are created immediately at clone time.
+ */
+TEST(simple_pidfd_setns_clone)
+{
+ pid_t child_pid;
+ int pidfd = -1;
+ int ret;
+
+ /* Ignore SIGCHLD for autoreap */
+ ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+ /* Create a child process with new namespaces using create_child() */
+ child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
+ ASSERT_GE(child_pid, 0);
+
+ if (child_pid == 0) {
+ /* Child: sleep for a while so parent can setns to us */
+ sleep(2);
+ _exit(0);
+ }
+
+ /* Parent: pidfd was already created by create_child() */
+ ASSERT_GE(pidfd, 0);
+
+ /* Set to child's namespaces via pidfd */
+ ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+ close(pidfd);
+ TH_LOG("setns() returned %d", ret);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/siocgskns_test.c b/tools/testing/selftests/namespaces/siocgskns_test.c
new file mode 100644
index 000000000000..ba689a22d82f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/siocgskns_test.c
@@ -0,0 +1,1824 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/sockios.h>
+#include <linux/nsfs.h>
+#include <arpa/inet.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef SIOCGSKNS
+#define SIOCGSKNS 0x894C
+#endif
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test basic SIOCGSKNS functionality.
+ * Create a socket and verify SIOCGSKNS returns the correct network namespace.
+ */
+TEST(siocgskns_basic)
+{
+ int sock_fd, netns_fd, current_netns_fd;
+ struct stat st1, st2;
+
+ /* Create a TCP socket */
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Use SIOCGSKNS to get network namespace */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Get current network namespace */
+ current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(current_netns_fd, 0);
+
+ /* Verify they match */
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(sock_fd);
+ close(netns_fd);
+ close(current_netns_fd);
+}
+
+/*
+ * Test that socket file descriptors keep network namespaces active.
+ * Create a network namespace, create a socket in it, then exit the namespace.
+ * The namespace should remain active while the socket FD is held.
+ */
+TEST(siocgskns_keeps_netns_active)
+{
+ int sock_fd, netns_fd, test_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ struct stat st;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Create a socket in the new network namespace */
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ TH_LOG("socket() failed: %s", strerror(errno));
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ ASSERT_EQ(fstat(netns_fd, &st), 0);
+
+ /*
+ * Namespace should still be active because socket FD keeps it alive.
+ * Try to access it via /proc/self/fd/<fd>.
+ */
+ char path[64];
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fd);
+ test_fd = open(path, O_RDONLY);
+ ASSERT_GE(test_fd, 0);
+ close(test_fd);
+ close(netns_fd);
+
+ /* Close socket - namespace should become inactive */
+ close(sock_fd);
+
+ /* Try SIOCGSKNS again - should fail since socket is closed */
+ ASSERT_LT(ioctl(sock_fd, SIOCGSKNS), 0);
+}
+
+/*
+ * Test SIOCGSKNS with different socket types (TCP, UDP, RAW).
+ */
+TEST(siocgskns_socket_types)
+{
+ int sock_tcp, sock_udp, sock_raw;
+ int netns_tcp, netns_udp, netns_raw;
+ struct stat st_tcp, st_udp, st_raw;
+
+ /* TCP socket */
+ sock_tcp = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_tcp, 0);
+
+ /* UDP socket */
+ sock_udp = socket(AF_INET, SOCK_DGRAM, 0);
+ ASSERT_GE(sock_udp, 0);
+
+ /* RAW socket (may require privileges) */
+ sock_raw = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+ if (sock_raw < 0 && (errno == EPERM || errno == EACCES)) {
+ sock_raw = -1; /* Skip raw socket test */
+ }
+
+ /* Test SIOCGSKNS on TCP */
+ netns_tcp = ioctl(sock_tcp, SIOCGSKNS);
+ if (netns_tcp < 0) {
+ close(sock_tcp);
+ close(sock_udp);
+ if (sock_raw >= 0) close(sock_raw);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_tcp, 0);
+ }
+
+ /* Test SIOCGSKNS on UDP */
+ netns_udp = ioctl(sock_udp, SIOCGSKNS);
+ ASSERT_GE(netns_udp, 0);
+
+ /* Test SIOCGSKNS on RAW (if available) */
+ if (sock_raw >= 0) {
+ netns_raw = ioctl(sock_raw, SIOCGSKNS);
+ ASSERT_GE(netns_raw, 0);
+ }
+
+ /* Verify all return the same network namespace */
+ ASSERT_EQ(fstat(netns_tcp, &st_tcp), 0);
+ ASSERT_EQ(fstat(netns_udp, &st_udp), 0);
+ ASSERT_EQ(st_tcp.st_ino, st_udp.st_ino);
+
+ if (sock_raw >= 0) {
+ ASSERT_EQ(fstat(netns_raw, &st_raw), 0);
+ ASSERT_EQ(st_tcp.st_ino, st_raw.st_ino);
+ close(netns_raw);
+ close(sock_raw);
+ }
+
+ close(netns_tcp);
+ close(netns_udp);
+ close(sock_tcp);
+ close(sock_udp);
+}
+
+/*
+ * Test SIOCGSKNS across setns.
+ * Create a socket in netns A, switch to netns B, verify SIOCGSKNS still
+ * returns netns A.
+ */
+TEST(siocgskns_across_setns)
+{
+ int sock_fd, netns_a_fd, netns_b_fd, result_fd;
+ struct stat st_a;
+
+ /* Get current netns (A) */
+ netns_a_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(netns_a_fd, 0);
+ ASSERT_EQ(fstat(netns_a_fd, &st_a), 0);
+
+ /* Create socket in netns A */
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Create new netns (B) */
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+ netns_b_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(netns_b_fd, 0);
+
+ /* Get netns from socket created in A */
+ result_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (result_fd < 0) {
+ close(sock_fd);
+ setns(netns_a_fd, CLONE_NEWNET);
+ close(netns_a_fd);
+ close(netns_b_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(result_fd, 0);
+ }
+
+ /* Verify it still points to netns A */
+ struct stat st_result_stat;
+ ASSERT_EQ(fstat(result_fd, &st_result_stat), 0);
+ ASSERT_EQ(st_a.st_ino, st_result_stat.st_ino);
+
+ close(result_fd);
+ close(sock_fd);
+ close(netns_b_fd);
+
+ /* Restore original netns */
+ ASSERT_EQ(setns(netns_a_fd, CLONE_NEWNET), 0);
+ close(netns_a_fd);
+}
+
+/*
+ * Test SIOCGSKNS fails on non-socket file descriptors.
+ */
+TEST(siocgskns_non_socket)
+{
+ int fd;
+ int pipefd[2];
+
+ /* Test on regular file */
+ fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_LT(ioctl(fd, SIOCGSKNS), 0);
+ ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+ close(fd);
+
+ /* Test on pipe */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ ASSERT_LT(ioctl(pipefd[0], SIOCGSKNS), 0);
+ ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
+/*
+ * Test multiple sockets keep the same network namespace active.
+ * Create multiple sockets, verify closing some doesn't affect others.
+ */
+TEST(siocgskns_multiple_sockets)
+{
+ int socks[5];
+ int netns_fds[5];
+ int i;
+ struct stat st;
+ ino_t netns_ino;
+
+ /* Create new network namespace */
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+ /* Create multiple sockets */
+ for (i = 0; i < 5; i++) {
+ socks[i] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(socks[i], 0);
+ }
+
+ /* Get netns from all sockets */
+ for (i = 0; i < 5; i++) {
+ netns_fds[i] = ioctl(socks[i], SIOCGSKNS);
+ if (netns_fds[i] < 0) {
+ int j;
+ for (j = 0; j <= i; j++) {
+ close(socks[j]);
+ if (j < i && netns_fds[j] >= 0)
+ close(netns_fds[j]);
+ }
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fds[i], 0);
+ }
+ }
+
+ /* Verify all point to same netns */
+ ASSERT_EQ(fstat(netns_fds[0], &st), 0);
+ netns_ino = st.st_ino;
+
+ for (i = 1; i < 5; i++) {
+ ASSERT_EQ(fstat(netns_fds[i], &st), 0);
+ ASSERT_EQ(st.st_ino, netns_ino);
+ }
+
+ /* Close some sockets */
+ for (i = 0; i < 3; i++) {
+ close(socks[i]);
+ }
+
+ /* Remaining netns FDs should still be valid */
+ for (i = 3; i < 5; i++) {
+ char path[64];
+ snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fds[i]);
+ int test_fd = open(path, O_RDONLY);
+ ASSERT_GE(test_fd, 0);
+ close(test_fd);
+ }
+
+ /* Cleanup */
+ for (i = 0; i < 5; i++) {
+ if (i >= 3)
+ close(socks[i]);
+ close(netns_fds[i]);
+ }
+}
+
+/*
+ * Test socket keeps netns active after creating process exits.
+ * Verify that as long as the socket FD exists, the namespace remains active.
+ */
+TEST(siocgskns_netns_lifecycle)
+{
+ int sock_fd, netns_fd;
+ int ipc_sockets[2];
+ int syncpipe[2];
+ pid_t pid;
+ int status;
+ char sync_byte;
+ struct stat st;
+ ino_t netns_ino;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ ASSERT_EQ(pipe(syncpipe), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child */
+ close(ipc_sockets[0]);
+ close(syncpipe[1]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ /* Send socket to parent */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+
+ /* Wait for parent signal */
+ read(syncpipe[0], &sync_byte, 1);
+ close(syncpipe[0]);
+ exit(0);
+ }
+
+ /* Parent */
+ close(ipc_sockets[1]);
+ close(syncpipe[0]);
+
+ /* Receive socket FD */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Get netns from socket while child is alive */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+ close(sock_fd);
+ waitpid(pid, NULL, 0);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+ ASSERT_EQ(fstat(netns_fd, &st), 0);
+ netns_ino = st.st_ino;
+
+ /* Signal child to exit */
+ sync_byte = 'G';
+ write(syncpipe[1], &sync_byte, 1);
+ close(syncpipe[1]);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+
+ /*
+ * Socket FD should still keep namespace active even after
+ * the creating process exited.
+ */
+ int test_fd = ioctl(sock_fd, SIOCGSKNS);
+ ASSERT_GE(test_fd, 0);
+
+ struct stat st_test;
+ ASSERT_EQ(fstat(test_fd, &st_test), 0);
+ ASSERT_EQ(st_test.st_ino, netns_ino);
+
+ close(test_fd);
+ close(netns_fd);
+
+ /* Close socket - namespace should become inactive */
+ close(sock_fd);
+}
+
+/*
+ * Test IPv6 sockets also work with SIOCGSKNS.
+ */
+TEST(siocgskns_ipv6)
+{
+ int sock_fd, netns_fd, current_netns_fd;
+ struct stat st1, st2;
+
+ /* Create an IPv6 TCP socket */
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ ASSERT_GE(sock_fd, 0);
+
+ /* Use SIOCGSKNS */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Verify it matches current namespace */
+ current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(current_netns_fd, 0);
+
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+ close(sock_fd);
+ close(netns_fd);
+ close(current_netns_fd);
+}
+
+/*
+ * Test that socket-kept netns appears in listns() output.
+ * Verify that a network namespace kept alive by a socket FD appears in
+ * listns() output even after the creating process exits, and that it
+ * disappears when the socket is closed.
+ */
+TEST(siocgskns_listns_visibility)
+{
+ int sock_fd, netns_fd, owner_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ __u64 netns_id, owner_id;
+ struct ns_id_req req = {
+ .size = sizeof(req),
+ .spare = 0,
+ .ns_id = 0,
+ .ns_type = CLONE_NEWNET,
+ .spare2 = 0,
+ .user_ns_id = 0,
+ };
+ __u64 ns_ids[256];
+ int ret, i;
+ bool found_netns = false;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ /* Get namespace ID */
+ ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+ if (ret < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_ID not supported");
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Get owner user namespace */
+ owner_fd = ioctl(netns_fd, NS_GET_USERNS);
+ if (owner_fd < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "NS_GET_USERNS not supported");
+ ASSERT_GE(owner_fd, 0);
+ }
+
+ /* Get owner namespace ID */
+ ret = ioctl(owner_fd, NS_GET_ID, &owner_id);
+ if (ret < 0) {
+ close(owner_fd);
+ close(sock_fd);
+ close(netns_fd);
+ ASSERT_EQ(ret, 0);
+ }
+ close(owner_fd);
+
+ /* Namespace should appear in listns() output */
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ if (ret < 0) {
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOSYS)
+ SKIP(return, "listns() not supported");
+ TH_LOG("listns failed: %s", strerror(errno));
+ ASSERT_GE(ret, 0);
+ }
+
+ /* Search for our network namespace in the list */
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_TRUE(found_netns);
+ TH_LOG("Found netns %llu in listns() output (kept alive by socket)", netns_id);
+
+ /* Now verify with owner filtering */
+ req.user_ns_id = owner_id;
+ found_netns = false;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_TRUE(found_netns);
+ TH_LOG("Found netns %llu owned by userns %llu", netns_id, owner_id);
+
+ /* Close socket - namespace should become inactive and disappear from listns() */
+ close(sock_fd);
+ close(netns_fd);
+
+ /* Verify it's no longer in listns() output */
+ req.user_ns_id = 0;
+ found_netns = false;
+
+ ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+ ASSERT_GE(ret, 0);
+
+ for (i = 0; i < ret; i++) {
+ if (ns_ids[i] == netns_id) {
+ found_netns = true;
+ break;
+ }
+ }
+
+ ASSERT_FALSE(found_netns);
+ TH_LOG("Netns %llu correctly disappeared from listns() after socket closed", netns_id);
+}
+
+/*
+ * Test that socket-kept netns can be reopened via file handle.
+ * Verify that a network namespace kept alive by a socket FD can be
+ * reopened using file handles even after the creating process exits.
+ */
+TEST(siocgskns_file_handle)
+{
+ int sock_fd, netns_fd, reopened_fd;
+ int ipc_sockets[2];
+ pid_t pid;
+ int status;
+ struct stat st1, st2;
+ ino_t netns_ino;
+ __u64 netns_id;
+ struct file_handle *handle;
+ struct nsfs_file_handle *nsfs_fh;
+ int ret;
+
+ /* Allocate file_handle structure for nsfs */
+ handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+ ASSERT_NE(handle, NULL);
+ handle->handle_bytes = sizeof(struct nsfs_file_handle);
+ handle->handle_type = FILEID_NSFS;
+
+ EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child: create new netns and socket */
+ close(ipc_sockets[0]);
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0) {
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ /* Send socket FD to parent via SCM_RIGHTS */
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1] = {'X'};
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+ if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(1);
+ }
+
+ close(sock_fd);
+ close(ipc_sockets[1]);
+ exit(0);
+ }
+
+ /* Parent: receive socket FD */
+ close(ipc_sockets[1]);
+
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ char buf[1];
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+ iov.iov_base = buf;
+ iov.iov_len = 1;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+ close(ipc_sockets[0]);
+ ASSERT_EQ(n, 1);
+
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, NULL);
+ memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+ /* Wait for child to exit */
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ /* Get network namespace from socket */
+ netns_fd = ioctl(sock_fd, SIOCGSKNS);
+ if (netns_fd < 0) {
+ free(handle);
+ close(sock_fd);
+ if (errno == ENOTTY || errno == EINVAL)
+ SKIP(return, "SIOCGSKNS not supported");
+ ASSERT_GE(netns_fd, 0);
+ }
+
+ ASSERT_EQ(fstat(netns_fd, &st1), 0);
+ netns_ino = st1.st_ino;
+
+ /* Get namespace ID */
+ ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+ if (ret < 0) {
+ free(handle);
+ close(sock_fd);
+ close(netns_fd);
+ if (errno == ENOTTY || errno == EINVAL)